direct-io.hg

view xen/arch/x86/mm.c @ 14134:9e5e94942045

x86: Clean up {alloc,free}_xen_pagetable() interface to avoid use of
frame_table variable before initialisation. This wasn't a bug, but was
confusing.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Mon Feb 26 16:25:17 2007 +0000 (2007-02-26)
parents b010e556fe2c
children 0070b18869f7
line source
1 /******************************************************************************
2 * arch/x86/mm.c
3 *
4 * Copyright (c) 2002-2005 K A Fraser
5 * Copyright (c) 2004 Christian Limpach
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 /*
23 * A description of the x86 page table API:
24 *
25 * Domains trap to do_mmu_update with a list of update requests.
26 * This is a list of (ptr, val) pairs, where the requested operation
27 * is *ptr = val.
28 *
29 * Reference counting of pages:
30 * ----------------------------
31 * Each page has two refcounts: tot_count and type_count.
32 *
33 * TOT_COUNT is the obvious reference count. It counts all uses of a
34 * physical page frame by a domain, including uses as a page directory,
35 * a page table, or simple mappings via a PTE. This count prevents a
36 * domain from releasing a frame back to the free pool when it still holds
37 * a reference to it.
38 *
39 * TYPE_COUNT is more subtle. A frame can be put to one of three
40 * mutually-exclusive uses: it might be used as a page directory, or a
41 * page table, or it may be mapped writable by the domain [of course, a
42 * frame may not be used in any of these three ways!].
43 * So, type_count is a count of the number of times a frame is being
44 * referred to in its current incarnation. Therefore, a page can only
45 * change its type when its type count is zero.
46 *
47 * Pinning the page type:
48 * ----------------------
49 * The type of a page can be pinned/unpinned with the commands
50 * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
51 * pinning is not reference counted, so it can't be nested).
52 * This is useful to prevent a page's type count falling to zero, at which
53 * point safety checks would need to be carried out next time the count
54 * is increased again.
55 *
56 * A further note on writable page mappings:
57 * -----------------------------------------
58 * For simplicity, the count of writable mappings for a page may not
59 * correspond to reality. The 'writable count' is incremented for every
60 * PTE which maps the page with the _PAGE_RW flag set. However, for
61 * write access to be possible the page directory entry must also have
62 * its _PAGE_RW bit set. We do not check this as it complicates the
63 * reference counting considerably [consider the case of multiple
64 * directory entries referencing a single page table, some with the RW
65 * bit set, others not -- it starts getting a bit messy].
66 * In normal use, this simplification shouldn't be a problem.
67 * However, the logic can be added if required.
68 *
69 * One more note on read-only page mappings:
70 * -----------------------------------------
71 * We want domains to be able to map pages for read-only access. The
72 * main reason is that page tables and directories should be readable
73 * by a domain, but it would not be safe for them to be writable.
74 * However, domains have free access to rings 1 & 2 of the Intel
75 * privilege model. In terms of page protection, these are considered
76 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
77 * read-only restrictions are respected in supervisor mode -- if the
78 * bit is clear then any mapped page is writable.
79 *
80 * We get round this by always setting the WP bit and disallowing
81 * updates to it. This is very unlikely to cause a problem for guest
82 * OS's, which will generally use the WP bit to simplify copy-on-write
83 * implementation (in that case, OS wants a fault when it writes to
84 * an application-supplied buffer).
85 */
87 #include <xen/config.h>
88 #include <xen/init.h>
89 #include <xen/kernel.h>
90 #include <xen/lib.h>
91 #include <xen/mm.h>
92 #include <xen/domain.h>
93 #include <xen/sched.h>
94 #include <xen/errno.h>
95 #include <xen/perfc.h>
96 #include <xen/irq.h>
97 #include <xen/softirq.h>
98 #include <xen/domain_page.h>
99 #include <xen/event.h>
100 #include <xen/iocap.h>
101 #include <xen/guest_access.h>
102 #include <asm/paging.h>
103 #include <asm/shadow.h>
104 #include <asm/page.h>
105 #include <asm/flushtlb.h>
106 #include <asm/io.h>
107 #include <asm/ldt.h>
108 #include <asm/x86_emulate.h>
109 #include <asm/e820.h>
110 #include <asm/hypercall.h>
111 #include <public/memory.h>
113 #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
115 /*
116 * PTE updates can be done with ordinary writes except:
117 * 1. Debug builds get extra checking by using CMPXCHG[8B].
118 * 2. PAE builds perform an atomic 8-byte store with CMPXCHG8B.
119 */
120 #if !defined(NDEBUG) || defined(CONFIG_X86_PAE)
121 #define PTE_UPDATE_WITH_CMPXCHG
122 #endif
124 /* Used to defer flushing of memory structures. */
125 struct percpu_mm_info {
126 #define DOP_FLUSH_TLB (1<<0) /* Flush the local TLB. */
127 #define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */
128 #define DOP_RELOAD_LDT (1<<2) /* Reload the LDT shadow mapping. */
129 unsigned int deferred_ops;
130 /* If non-NULL, specifies a foreign subject domain for some operations. */
131 struct domain *foreign;
132 };
133 static DEFINE_PER_CPU(struct percpu_mm_info, percpu_mm_info);
135 /*
136 * Returns the current foreign domain; defaults to the currently-executing
137 * domain if a foreign override hasn't been specified.
138 */
139 #define FOREIGNDOM (this_cpu(percpu_mm_info).foreign ?: current->domain)
141 /* Private domain structs for DOMID_XEN and DOMID_IO. */
142 static struct domain *dom_xen, *dom_io;
144 /* Frame table and its size in pages. */
145 struct page_info *frame_table;
146 unsigned long max_page;
147 unsigned long total_pages;
149 #ifdef CONFIG_COMPAT
150 l2_pgentry_t *compat_idle_pg_table_l2 = NULL;
151 #define l3_disallow_mask(d) (!IS_COMPAT(d) ? \
152 L3_DISALLOW_MASK : \
153 COMPAT_L3_DISALLOW_MASK)
154 #else
155 #define l3_disallow_mask(d) L3_DISALLOW_MASK
156 #endif
158 static void queue_deferred_ops(struct domain *d, unsigned int ops)
159 {
160 if ( d == current->domain )
161 this_cpu(percpu_mm_info).deferred_ops |= ops;
162 else
163 BUG_ON(!test_bit(_DOMF_paused, &d->domain_flags) ||
164 !cpus_empty(d->domain_dirty_cpumask));
165 }
167 void __init init_frametable(void)
168 {
169 unsigned long nr_pages, page_step, i, mfn;
171 frame_table = (struct page_info *)FRAMETABLE_VIRT_START;
173 nr_pages = PFN_UP(max_page * sizeof(*frame_table));
174 page_step = (1 << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT;
176 for ( i = 0; i < nr_pages; i += page_step )
177 {
178 mfn = alloc_boot_pages(min(nr_pages - i, page_step), page_step);
179 if ( mfn == 0 )
180 panic("Not enough memory for frame table\n");
181 map_pages_to_xen(
182 FRAMETABLE_VIRT_START + (i << PAGE_SHIFT),
183 mfn, page_step, PAGE_HYPERVISOR);
184 }
186 memset(frame_table, 0, nr_pages << PAGE_SHIFT);
187 }
189 void arch_init_memory(void)
190 {
191 extern void subarch_init_memory(void);
193 unsigned long i, pfn, rstart_pfn, rend_pfn;
195 /*
196 * Initialise our DOMID_XEN domain.
197 * Any Xen-heap pages that we will allow to be mapped will have
198 * their domain field set to dom_xen.
199 */
200 dom_xen = alloc_domain(DOMID_XEN);
201 BUG_ON(dom_xen == NULL);
203 /*
204 * Initialise our DOMID_IO domain.
205 * This domain owns I/O pages that are within the range of the page_info
206 * array. Mappings occur at the priv of the caller.
207 */
208 dom_io = alloc_domain(DOMID_IO);
209 BUG_ON(dom_io == NULL);
211 /* First 1MB of RAM is historically marked as I/O. */
212 for ( i = 0; i < 0x100; i++ )
213 share_xen_page_with_guest(mfn_to_page(i), dom_io, XENSHARE_writable);
215 /* Any areas not specified as RAM by the e820 map are considered I/O. */
216 for ( i = 0, pfn = 0; i < e820.nr_map; i++ )
217 {
218 if ( e820.map[i].type != E820_RAM )
219 continue;
220 /* Every page from cursor to start of next RAM region is I/O. */
221 rstart_pfn = PFN_UP(e820.map[i].addr);
222 rend_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
223 for ( ; pfn < rstart_pfn; pfn++ )
224 {
225 BUG_ON(!mfn_valid(pfn));
226 share_xen_page_with_guest(
227 mfn_to_page(pfn), dom_io, XENSHARE_writable);
228 }
229 /* Skip the RAM region. */
230 pfn = rend_pfn;
231 }
232 BUG_ON(pfn != max_page);
234 subarch_init_memory();
235 }
237 int memory_is_conventional_ram(paddr_t p)
238 {
239 int i;
241 for ( i = 0; i < e820.nr_map; i++ )
242 {
243 if ( (e820.map[i].type == E820_RAM) &&
244 (e820.map[i].addr <= p) &&
245 (e820.map[i].size > p) )
246 return 1;
247 }
249 return 0;
250 }
252 void share_xen_page_with_guest(
253 struct page_info *page, struct domain *d, int readonly)
254 {
255 if ( page_get_owner(page) == d )
256 return;
258 set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
260 spin_lock(&d->page_alloc_lock);
262 /* The incremented type count pins as writable or read-only. */
263 page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page);
264 page->u.inuse.type_info |= PGT_validated | 1;
266 page_set_owner(page, d);
267 wmb(); /* install valid domain ptr before updating refcnt. */
268 ASSERT(page->count_info == 0);
269 page->count_info |= PGC_allocated | 1;
271 if ( unlikely(d->xenheap_pages++ == 0) )
272 get_knownalive_domain(d);
273 list_add_tail(&page->list, &d->xenpage_list);
275 spin_unlock(&d->page_alloc_lock);
276 }
278 void share_xen_page_with_privileged_guests(
279 struct page_info *page, int readonly)
280 {
281 share_xen_page_with_guest(page, dom_xen, readonly);
282 }
284 #if defined(CONFIG_X86_PAE)
286 #ifdef NDEBUG
287 /* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
288 #define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000)
289 #else
290 /*
291 * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths.
292 * We cannot safely shadow the idle page table, nor shadow (v1) page tables
293 * (detected by lack of an owning domain). As required for correctness, we
294 * always shadow PDPTs above 4GB.
295 */
296 #define l3tab_needs_shadow(mfn) \
297 (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
298 (page_get_owner(mfn_to_page(mfn)) != NULL) && \
299 ((mfn) & 1)) || /* odd MFNs are shadowed */ \
300 ((mfn) >= 0x100000))
301 #endif
303 static l1_pgentry_t *fix_pae_highmem_pl1e;
305 /* Cache the address of PAE high-memory fixmap page tables. */
306 static int __init cache_pae_fixmap_address(void)
307 {
308 unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0);
309 l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base);
310 fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base);
311 return 0;
312 }
313 __initcall(cache_pae_fixmap_address);
315 static DEFINE_PER_CPU(u32, make_cr3_timestamp);
317 void make_cr3(struct vcpu *v, unsigned long mfn)
318 /* Takes the MFN of a PAE l3 table, copies the contents to below 4GB if
319 * necessary, and sets v->arch.cr3 to the value to load in CR3. */
320 {
321 l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
322 struct pae_l3_cache *cache = &v->arch.pae_l3_cache;
323 unsigned int cpu = smp_processor_id();
325 /* Fast path: does this mfn need a shadow at all? */
326 if ( !l3tab_needs_shadow(mfn) )
327 {
328 v->arch.cr3 = mfn << PAGE_SHIFT;
329 /* Cache is no longer in use or valid */
330 cache->high_mfn = 0;
331 return;
332 }
334 /* Caching logic is not interrupt safe. */
335 ASSERT(!in_irq());
337 /* Protects against pae_flush_pgd(). */
338 spin_lock(&cache->lock);
340 cache->inuse_idx ^= 1;
341 cache->high_mfn = mfn;
343 /* Map the guest L3 table and copy to the chosen low-memory cache. */
344 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
345 /* First check the previous high mapping can't be in the TLB.
346 * (i.e. have we loaded CR3 since we last did this?) */
347 if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) )
348 local_flush_tlb_one(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu));
349 highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
350 lowmem_l3tab = cache->table[cache->inuse_idx];
351 memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
352 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_empty());
353 this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);
355 v->arch.cr3 = __pa(lowmem_l3tab);
357 spin_unlock(&cache->lock);
358 }
360 #else /* !CONFIG_X86_PAE */
362 void make_cr3(struct vcpu *v, unsigned long mfn)
363 {
364 v->arch.cr3 = mfn << PAGE_SHIFT;
365 }
367 #endif /* !CONFIG_X86_PAE */
369 void write_ptbase(struct vcpu *v)
370 {
371 write_cr3(v->arch.cr3);
372 }
374 /* Should be called after CR3 is updated.
375 * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
376 *
377 * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
378 * for HVM guests, arch.monitor_table and hvm's guest CR3.
379 *
380 * Update ref counts to shadow tables appropriately.
381 */
382 void update_cr3(struct vcpu *v)
383 {
384 unsigned long cr3_mfn=0;
386 if ( paging_mode_enabled(v->domain) )
387 {
388 paging_update_cr3(v);
389 return;
390 }
392 #if CONFIG_PAGING_LEVELS == 4
393 if ( !(v->arch.flags & TF_kernel_mode) )
394 cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
395 else
396 #endif
397 cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
399 make_cr3(v, cr3_mfn);
400 }
403 void invalidate_shadow_ldt(struct vcpu *v)
404 {
405 int i;
406 unsigned long pfn;
407 struct page_info *page;
409 if ( v->arch.shadow_ldt_mapcnt == 0 )
410 return;
412 v->arch.shadow_ldt_mapcnt = 0;
414 for ( i = 16; i < 32; i++ )
415 {
416 pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]);
417 if ( pfn == 0 ) continue;
418 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
419 page = mfn_to_page(pfn);
420 ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
421 ASSERT_PAGE_IS_DOMAIN(page, v->domain);
422 put_page_and_type(page);
423 }
425 /* Dispose of the (now possibly invalid) mappings from the TLB. */
426 queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
427 }
430 static int alloc_segdesc_page(struct page_info *page)
431 {
432 struct desc_struct *descs;
433 int i;
435 descs = map_domain_page(page_to_mfn(page));
437 for ( i = 0; i < 512; i++ )
438 if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) )
439 goto fail;
441 unmap_domain_page(descs);
442 return 1;
444 fail:
445 unmap_domain_page(descs);
446 return 0;
447 }
450 /* Map shadow page at offset @off. */
451 int map_ldt_shadow_page(unsigned int off)
452 {
453 struct vcpu *v = current;
454 struct domain *d = v->domain;
455 unsigned long gmfn, mfn;
456 l1_pgentry_t l1e, nl1e;
457 unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
458 int okay;
460 BUG_ON(unlikely(in_irq()));
462 guest_get_eff_kern_l1e(v, gva, &l1e);
463 if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
464 return 0;
466 gmfn = l1e_get_pfn(l1e);
467 mfn = gmfn_to_mfn(d, gmfn);
468 if ( unlikely(!mfn_valid(mfn)) )
469 return 0;
471 okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
472 if ( unlikely(!okay) )
473 return 0;
475 nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
477 l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
478 v->arch.shadow_ldt_mapcnt++;
480 return 1;
481 }
484 static int get_page_from_pagenr(unsigned long page_nr, struct domain *d)
485 {
486 struct page_info *page = mfn_to_page(page_nr);
488 if ( unlikely(!mfn_valid(page_nr)) || unlikely(!get_page(page, d)) )
489 {
490 MEM_LOG("Could not get page ref for pfn %lx", page_nr);
491 return 0;
492 }
494 return 1;
495 }
498 static int get_page_and_type_from_pagenr(unsigned long page_nr,
499 unsigned long type,
500 struct domain *d)
501 {
502 struct page_info *page = mfn_to_page(page_nr);
504 if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
505 return 0;
507 if ( unlikely(!get_page_type(page, type)) )
508 {
509 put_page(page);
510 return 0;
511 }
513 return 1;
514 }
516 #ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */
517 /*
518 * We allow root tables to map each other (a.k.a. linear page tables). It
519 * needs some special care with reference counts and access permissions:
520 * 1. The mapping entry must be read-only, or the guest may get write access
521 * to its own PTEs.
522 * 2. We must only bump the reference counts for an *already validated*
523 * L2 table, or we can end up in a deadlock in get_page_type() by waiting
524 * on a validation that is required to complete that validation.
525 * 3. We only need to increment the reference counts for the mapped page
526 * frame if it is mapped by a different root table. This is sufficient and
527 * also necessary to allow validation of a root table mapping itself.
528 */
529 static int
530 get_linear_pagetable(
531 root_pgentry_t re, unsigned long re_pfn, struct domain *d)
532 {
533 unsigned long x, y;
534 struct page_info *page;
535 unsigned long pfn;
537 if ( (root_get_flags(re) & _PAGE_RW) )
538 {
539 MEM_LOG("Attempt to create linear p.t. with write perms");
540 return 0;
541 }
543 if ( (pfn = root_get_pfn(re)) != re_pfn )
544 {
545 /* Make sure the mapped frame belongs to the correct domain. */
546 if ( unlikely(!get_page_from_pagenr(pfn, d)) )
547 return 0;
549 /*
550 * Make sure that the mapped frame is an already-validated L2 table.
551 * If so, atomically increment the count (checking for overflow).
552 */
553 page = mfn_to_page(pfn);
554 y = page->u.inuse.type_info;
555 do {
556 x = y;
557 if ( unlikely((x & PGT_count_mask) == PGT_count_mask) ||
558 unlikely((x & (PGT_type_mask|PGT_validated)) !=
559 (PGT_root_page_table|PGT_validated)) )
560 {
561 put_page(page);
562 return 0;
563 }
564 }
565 while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x );
566 }
568 return 1;
569 }
570 #endif /* !CONFIG_X86_PAE */
572 int
573 get_page_from_l1e(
574 l1_pgentry_t l1e, struct domain *d)
575 {
576 unsigned long mfn = l1e_get_pfn(l1e);
577 struct page_info *page = mfn_to_page(mfn);
578 int okay;
580 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
581 return 1;
583 if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
584 {
585 MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
586 return 0;
587 }
589 if ( unlikely(!mfn_valid(mfn)) ||
590 unlikely(page_get_owner(page) == dom_io) )
591 {
592 /* DOMID_IO reverts to caller for privilege checks. */
593 if ( d == dom_io )
594 d = current->domain;
596 if ( !iomem_access_permitted(d, mfn, mfn) )
597 {
598 if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */
599 MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx",
600 d->domain_id, mfn);
601 return 0;
602 }
604 /* No reference counting for out-of-range I/O pages. */
605 if ( !mfn_valid(mfn) )
606 return 1;
608 d = dom_io;
609 }
611 /* Foreign mappings into guests in shadow external mode don't
612 * contribute to writeable mapping refcounts. (This allows the
613 * qemu-dm helper process in dom0 to map the domain's memory without
614 * messing up the count of "real" writable mappings.) */
615 okay = (((l1e_get_flags(l1e) & _PAGE_RW) &&
616 !(unlikely(paging_mode_external(d) && (d != current->domain))))
617 ? get_page_and_type(page, d, PGT_writable_page)
618 : get_page(page, d));
619 if ( !okay )
620 {
621 MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
622 " for dom%d",
623 mfn, get_gpfn_from_mfn(mfn),
624 l1e_get_intpte(l1e), d->domain_id);
625 }
627 return okay;
628 }
631 /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
632 static int
633 get_page_from_l2e(
634 l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
635 {
636 int rc;
638 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
639 return 1;
641 if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
642 {
643 MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
644 return 0;
645 }
647 rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
648 #if CONFIG_PAGING_LEVELS == 2
649 if ( unlikely(!rc) )
650 rc = get_linear_pagetable(l2e, pfn, d);
651 #endif
652 return rc;
653 }
656 #if CONFIG_PAGING_LEVELS >= 3
657 static int
658 get_page_from_l3e(
659 l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
660 {
661 int rc;
663 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
664 return 1;
666 if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
667 {
668 MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
669 return 0;
670 }
672 rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
673 return rc;
674 }
675 #endif /* 3 level */
677 #if CONFIG_PAGING_LEVELS >= 4
678 static int
679 get_page_from_l4e(
680 l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
681 {
682 int rc;
684 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
685 return 1;
687 if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
688 {
689 MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
690 return 0;
691 }
693 rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
695 if ( unlikely(!rc) )
696 rc = get_linear_pagetable(l4e, pfn, d);
698 return rc;
699 }
700 #endif /* 4 level */
702 #ifdef __x86_64__
704 #ifdef USER_MAPPINGS_ARE_GLOBAL
705 #define adjust_guest_l1e(pl1e, d) \
706 do { \
707 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
708 likely(!IS_COMPAT(d)) ) \
709 { \
710 /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \
711 if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \
712 == (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL) ) \
713 MEM_LOG("Global bit is set to kernel page %lx", \
714 l1e_get_pfn((pl1e))); \
715 if ( !(l1e_get_flags((pl1e)) & _PAGE_USER) ) \
716 l1e_add_flags((pl1e), (_PAGE_GUEST_KERNEL|_PAGE_USER)); \
717 if ( !(l1e_get_flags((pl1e)) & _PAGE_GUEST_KERNEL) ) \
718 l1e_add_flags((pl1e), (_PAGE_GLOBAL|_PAGE_USER)); \
719 } \
720 } while ( 0 )
721 #else
722 #define adjust_guest_l1e(pl1e, d) \
723 do { \
724 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
725 likely(!IS_COMPAT(d)) ) \
726 l1e_add_flags((pl1e), _PAGE_USER); \
727 } while ( 0 )
728 #endif
730 #define adjust_guest_l2e(pl2e, d) \
731 do { \
732 if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \
733 likely(!IS_COMPAT(d)) ) \
734 l2e_add_flags((pl2e), _PAGE_USER); \
735 } while ( 0 )
737 #define adjust_guest_l3e(pl3e, d) \
738 do { \
739 if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
740 l3e_add_flags((pl3e), likely(!IS_COMPAT(d)) ? \
741 _PAGE_USER : \
742 _PAGE_USER|_PAGE_RW); \
743 } while ( 0 )
745 #define adjust_guest_l4e(pl4e, d) \
746 do { \
747 if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) && \
748 likely(!IS_COMPAT(d)) ) \
749 l4e_add_flags((pl4e), _PAGE_USER); \
750 } while ( 0 )
752 #else /* !defined(__x86_64__) */
754 #define adjust_guest_l1e(_p, _d) ((void)(_d))
755 #define adjust_guest_l2e(_p, _d) ((void)(_d))
756 #define adjust_guest_l3e(_p, _d) ((void)(_d))
758 #endif
760 #ifdef CONFIG_COMPAT
761 #define unadjust_guest_l3e(pl3e, d) \
762 do { \
763 if ( unlikely(IS_COMPAT(d)) && \
764 likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
765 l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \
766 } while ( 0 )
767 #else
768 #define unadjust_guest_l3e(_p, _d) ((void)(_d))
769 #endif
771 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
772 {
773 unsigned long pfn = l1e_get_pfn(l1e);
774 struct page_info *page = mfn_to_page(pfn);
775 struct domain *e;
776 struct vcpu *v;
778 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(pfn) )
779 return;
781 e = page_get_owner(page);
783 /*
784 * Check if this is a mapping that was established via a grant reference.
785 * If it was then we should not be here: we require that such mappings are
786 * explicitly destroyed via the grant-table interface.
787 *
788 * The upshot of this is that the guest can end up with active grants that
789 * it cannot destroy (because it no longer has a PTE to present to the
790 * grant-table interface). This can lead to subtle hard-to-catch bugs,
791 * hence a special grant PTE flag can be enabled to catch the bug early.
792 *
793 * (Note that the undestroyable active grants are not a security hole in
794 * Xen. All active grants can safely be cleaned up when the domain dies.)
795 */
796 if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
797 !(d->domain_flags & (DOMF_shutdown|DOMF_dying)) )
798 {
799 MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
800 l1e_get_intpte(l1e));
801 domain_crash(d);
802 }
804 /* Remember we didn't take a type-count of foreign writable mappings
805 * to paging-external domains */
806 if ( (l1e_get_flags(l1e) & _PAGE_RW) &&
807 !(unlikely((e != d) && paging_mode_external(e))) )
808 {
809 put_page_and_type(page);
810 }
811 else
812 {
813 /* We expect this is rare so we blow the entire shadow LDT. */
814 if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
815 PGT_ldt_page)) &&
816 unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) &&
817 (d == e) )
818 {
819 for_each_vcpu ( d, v )
820 invalidate_shadow_ldt(v);
821 }
822 put_page(page);
823 }
824 }
827 /*
828 * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
829 * Note also that this automatically deals correctly with linear p.t.'s.
830 */
831 static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
832 {
833 if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
834 (l2e_get_pfn(l2e) != pfn) )
835 put_page_and_type(l2e_get_page(l2e));
836 }
839 #if CONFIG_PAGING_LEVELS >= 3
840 static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
841 {
842 if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
843 (l3e_get_pfn(l3e) != pfn) )
844 put_page_and_type(l3e_get_page(l3e));
845 }
846 #endif
848 #if CONFIG_PAGING_LEVELS >= 4
849 static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
850 {
851 if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
852 (l4e_get_pfn(l4e) != pfn) )
853 put_page_and_type(l4e_get_page(l4e));
854 }
855 #endif
857 static int alloc_l1_table(struct page_info *page)
858 {
859 struct domain *d = page_get_owner(page);
860 unsigned long pfn = page_to_mfn(page);
861 l1_pgentry_t *pl1e;
862 int i;
864 pl1e = map_domain_page(pfn);
866 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
867 {
868 if ( is_guest_l1_slot(i) &&
869 unlikely(!get_page_from_l1e(pl1e[i], d)) )
870 goto fail;
872 adjust_guest_l1e(pl1e[i], d);
873 }
875 unmap_domain_page(pl1e);
876 return 1;
878 fail:
879 MEM_LOG("Failure in alloc_l1_table: entry %d", i);
880 while ( i-- > 0 )
881 if ( is_guest_l1_slot(i) )
882 put_page_from_l1e(pl1e[i], d);
884 unmap_domain_page(pl1e);
885 return 0;
886 }
888 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
889 static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
890 {
891 struct page_info *page;
892 l2_pgentry_t *pl2e;
893 l3_pgentry_t l3e3;
894 #ifndef CONFIG_COMPAT
895 l2_pgentry_t l2e;
896 int i;
897 #else
899 if ( !IS_COMPAT(d) )
900 return 1;
901 #endif
903 pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
905 /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */
906 l3e3 = pl3e[3];
907 if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) )
908 {
909 MEM_LOG("PAE L3 3rd slot is empty");
910 return 0;
911 }
913 /*
914 * The Xen-private mappings include linear mappings. The L2 thus cannot
915 * be shared by multiple L3 tables. The test here is adequate because:
916 * 1. Cannot appear in slots != 3 because get_page_type() checks the
917 * PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3
918 * 2. Cannot appear in another page table's L3:
919 * a. alloc_l3_table() calls this function and this check will fail
920 * b. mod_l3_entry() disallows updates to slot 3 in an existing table
921 */
922 page = l3e_get_page(l3e3);
923 BUG_ON(page->u.inuse.type_info & PGT_pinned);
924 BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
925 BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2));
926 if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
927 {
928 MEM_LOG("PAE L3 3rd slot is shared");
929 return 0;
930 }
932 /* Xen private mappings. */
933 pl2e = map_domain_page(l3e_get_pfn(l3e3));
934 #ifndef CONFIG_COMPAT
935 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
936 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
937 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
938 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
939 {
940 l2e = l2e_from_page(
941 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
942 __PAGE_HYPERVISOR);
943 l2e_write(&pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i], l2e);
944 }
945 for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
946 {
947 l2e = l2e_empty();
948 if ( l3e_get_flags(pl3e[i]) & _PAGE_PRESENT )
949 l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR);
950 l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e);
951 }
952 #else
953 memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
954 &compat_idle_pg_table_l2[
955 l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
956 COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e));
957 #endif
958 unmap_domain_page(pl2e);
960 return 1;
961 }
962 #else
963 # define create_pae_xen_mappings(d, pl3e) (1)
964 #endif
966 #ifdef CONFIG_X86_PAE
967 /* Flush a pgdir update into low-memory caches. */
968 static void pae_flush_pgd(
969 unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
970 {
971 struct domain *d = page_get_owner(mfn_to_page(mfn));
972 struct vcpu *v;
973 intpte_t _ol3e, _nl3e, _pl3e;
974 l3_pgentry_t *l3tab_ptr;
975 struct pae_l3_cache *cache;
977 if ( unlikely(shadow_mode_enabled(d)) )
978 {
979 cpumask_t m = CPU_MASK_NONE;
980 /* Re-shadow this l3 table on any vcpus that are using it */
981 for_each_vcpu ( d, v )
982 if ( pagetable_get_pfn(v->arch.guest_table) == mfn )
983 {
984 paging_update_cr3(v);
985 cpus_or(m, m, v->vcpu_dirty_cpumask);
986 }
987 flush_tlb_mask(m);
988 }
990 /* If below 4GB then the pgdir is not shadowed in low memory. */
991 if ( !l3tab_needs_shadow(mfn) )
992 return;
994 for_each_vcpu ( d, v )
995 {
996 cache = &v->arch.pae_l3_cache;
998 spin_lock(&cache->lock);
1000 if ( cache->high_mfn == mfn )
1002 l3tab_ptr = &cache->table[cache->inuse_idx][idx];
1003 _ol3e = l3e_get_intpte(*l3tab_ptr);
1004 _nl3e = l3e_get_intpte(nl3e);
1005 _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
1006 BUG_ON(_pl3e != _ol3e);
1009 spin_unlock(&cache->lock);
1012 flush_tlb_mask(d->domain_dirty_cpumask);
1014 #else
1015 # define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
1016 #endif
1018 static int alloc_l2_table(struct page_info *page, unsigned long type)
1020 struct domain *d = page_get_owner(page);
1021 unsigned long pfn = page_to_mfn(page);
1022 l2_pgentry_t *pl2e;
1023 int i;
1025 pl2e = map_domain_page(pfn);
1027 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1029 if ( is_guest_l2_slot(d, type, i) &&
1030 unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
1031 goto fail;
1033 adjust_guest_l2e(pl2e[i], d);
1036 #if CONFIG_PAGING_LEVELS == 2
1037 /* Xen private mappings. */
1038 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
1039 &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
1040 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
1041 pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1042 l2e_from_pfn(pfn, __PAGE_HYPERVISOR);
1043 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
1044 pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
1045 l2e_from_page(
1046 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
1047 __PAGE_HYPERVISOR);
1048 #endif
1050 unmap_domain_page(pl2e);
1051 return 1;
1053 fail:
1054 MEM_LOG("Failure in alloc_l2_table: entry %d", i);
1055 while ( i-- > 0 )
1056 if ( is_guest_l2_slot(d, type, i) )
1057 put_page_from_l2e(pl2e[i], pfn);
1059 unmap_domain_page(pl2e);
1060 return 0;
1064 #if CONFIG_PAGING_LEVELS >= 3
1065 static int alloc_l3_table(struct page_info *page)
1067 struct domain *d = page_get_owner(page);
1068 unsigned long pfn = page_to_mfn(page);
1069 l3_pgentry_t *pl3e;
1070 int i;
1072 #ifdef CONFIG_X86_PAE
1073 /*
1074 * PAE pgdirs above 4GB are unacceptable if the guest does not understand
1075 * the weird 'extended cr3' format for dealing with high-order address
1076 * bits. We cut some slack for control tools (before vcpu0 is initialised).
1077 */
1078 if ( (pfn >= 0x100000) &&
1079 unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) &&
1080 d->vcpu[0] && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
1082 MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
1083 return 0;
1085 #endif
1087 pl3e = map_domain_page(pfn);
1089 /*
1090 * PAE guests allocate full pages, but aren't required to initialize
1091 * more than the first four entries; when running in compatibility
1092 * mode, however, the full page is visible to the MMU, and hence all
1093 * 512 entries must be valid/verified, which is most easily achieved
1094 * by clearing them out.
1095 */
1096 if ( IS_COMPAT(d) )
1097 memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
1099 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1101 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
1102 if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) && i == 3 )
1104 if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
1105 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
1106 !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
1107 PGT_l2_page_table |
1108 PGT_pae_xen_l2,
1109 d) )
1110 goto fail;
1112 else
1113 #endif
1114 if ( is_guest_l3_slot(i) &&
1115 unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
1116 goto fail;
1118 adjust_guest_l3e(pl3e[i], d);
1121 if ( !create_pae_xen_mappings(d, pl3e) )
1122 goto fail;
1124 unmap_domain_page(pl3e);
1125 return 1;
1127 fail:
1128 MEM_LOG("Failure in alloc_l3_table: entry %d", i);
1129 while ( i-- > 0 )
1130 if ( is_guest_l3_slot(i) )
1131 put_page_from_l3e(pl3e[i], pfn);
1133 unmap_domain_page(pl3e);
1134 return 0;
1136 #else
1137 #define alloc_l3_table(page) (0)
1138 #endif
1140 #if CONFIG_PAGING_LEVELS >= 4
1141 static int alloc_l4_table(struct page_info *page)
1143 struct domain *d = page_get_owner(page);
1144 unsigned long pfn = page_to_mfn(page);
1145 l4_pgentry_t *pl4e = page_to_virt(page);
1146 int i;
1148 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
1150 if ( is_guest_l4_slot(d, i) &&
1151 unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
1152 goto fail;
1154 adjust_guest_l4e(pl4e[i], d);
1157 /* Xen private mappings. */
1158 memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1159 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1160 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
1161 pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
1162 l4e_from_pfn(pfn, __PAGE_HYPERVISOR);
1163 pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
1164 l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
1165 __PAGE_HYPERVISOR);
1166 if ( IS_COMPAT(d) )
1167 pl4e[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
1168 l4e_from_page(virt_to_page(d->arch.mm_arg_xlat_l3),
1169 __PAGE_HYPERVISOR);
1171 return 1;
1173 fail:
1174 MEM_LOG("Failure in alloc_l4_table: entry %d", i);
1175 while ( i-- > 0 )
1176 if ( is_guest_l4_slot(d, i) )
1177 put_page_from_l4e(pl4e[i], pfn);
1179 return 0;
1181 #else
1182 #define alloc_l4_table(page) (0)
1183 #endif
1186 static void free_l1_table(struct page_info *page)
1188 struct domain *d = page_get_owner(page);
1189 unsigned long pfn = page_to_mfn(page);
1190 l1_pgentry_t *pl1e;
1191 int i;
1193 pl1e = map_domain_page(pfn);
1195 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
1196 if ( is_guest_l1_slot(i) )
1197 put_page_from_l1e(pl1e[i], d);
1199 unmap_domain_page(pl1e);
1203 static void free_l2_table(struct page_info *page)
1205 #ifdef CONFIG_COMPAT
1206 struct domain *d = page_get_owner(page);
1207 #endif
1208 unsigned long pfn = page_to_mfn(page);
1209 l2_pgentry_t *pl2e;
1210 int i;
1212 pl2e = map_domain_page(pfn);
1214 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1215 if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
1216 put_page_from_l2e(pl2e[i], pfn);
1218 unmap_domain_page(pl2e);
1220 page->u.inuse.type_info &= ~PGT_pae_xen_l2;
1224 #if CONFIG_PAGING_LEVELS >= 3
1226 static void free_l3_table(struct page_info *page)
1228 struct domain *d = page_get_owner(page);
1229 unsigned long pfn = page_to_mfn(page);
1230 l3_pgentry_t *pl3e;
1231 int i;
1233 pl3e = map_domain_page(pfn);
1235 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1236 if ( is_guest_l3_slot(i) )
1238 put_page_from_l3e(pl3e[i], pfn);
1239 unadjust_guest_l3e(pl3e[i], d);
1242 unmap_domain_page(pl3e);
1245 #endif
1247 #if CONFIG_PAGING_LEVELS >= 4
1249 static void free_l4_table(struct page_info *page)
1251 struct domain *d = page_get_owner(page);
1252 unsigned long pfn = page_to_mfn(page);
1253 l4_pgentry_t *pl4e = page_to_virt(page);
1254 int i;
1256 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
1257 if ( is_guest_l4_slot(d, i) )
1258 put_page_from_l4e(pl4e[i], pfn);
1261 #endif
1264 /* How to write an entry to the guest pagetables.
1265 * Returns 0 for failure (pointer not valid), 1 for success. */
1266 static inline int update_intpte(intpte_t *p,
1267 intpte_t old,
1268 intpte_t new,
1269 unsigned long mfn,
1270 struct vcpu *v)
1272 int rv = 1;
1273 #ifndef PTE_UPDATE_WITH_CMPXCHG
1274 rv = paging_write_guest_entry(v, p, new, _mfn(mfn));
1275 #else
1277 intpte_t t = old;
1278 for ( ; ; )
1280 rv = paging_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn));
1281 if ( unlikely(rv == 0) )
1283 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
1284 ": saw %" PRIpte, old, new, t);
1285 break;
1288 if ( t == old )
1289 break;
1291 /* Allowed to change in Accessed/Dirty flags only. */
1292 BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY));
1294 old = t;
1297 #endif
1298 return rv;
1301 /* Macro that wraps the appropriate type-changes around update_intpte().
1302 * Arguments are: type, ptr, old, new, mfn, vcpu */
1303 #define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v) \
1304 update_intpte((intpte_t *)(_p), \
1305 _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \
1306 (_m), (_v))
1308 /* Update the L1 entry at pl1e to new value nl1e. */
1309 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
1310 unsigned long gl1mfn)
1312 l1_pgentry_t ol1e;
1313 struct domain *d = current->domain;
1315 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
1316 return 0;
1318 if ( unlikely(paging_mode_refcounts(d)) )
1319 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
1321 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
1323 /* Translate foreign guest addresses. */
1324 nl1e = l1e_from_pfn(gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e)),
1325 l1e_get_flags(nl1e));
1327 if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
1329 MEM_LOG("Bad L1 flags %x",
1330 l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
1331 return 0;
1334 adjust_guest_l1e(nl1e, d);
1336 /* Fast path for identical mapping, r/w and presence. */
1337 if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
1338 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
1340 if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
1341 return 0;
1343 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
1345 put_page_from_l1e(nl1e, d);
1346 return 0;
1349 else
1351 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
1352 return 0;
1355 put_page_from_l1e(ol1e, d);
1356 return 1;
1360 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
1361 static int mod_l2_entry(l2_pgentry_t *pl2e,
1362 l2_pgentry_t nl2e,
1363 unsigned long pfn,
1364 unsigned long type)
1366 l2_pgentry_t ol2e;
1367 struct domain *d = current->domain;
1369 if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
1371 MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
1372 return 0;
1375 if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
1376 return 0;
1378 if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
1380 if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
1382 MEM_LOG("Bad L2 flags %x",
1383 l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
1384 return 0;
1387 adjust_guest_l2e(nl2e, d);
1389 /* Fast path for identical mapping and presence. */
1390 if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
1391 return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current);
1393 if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
1394 return 0;
1396 if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
1398 put_page_from_l2e(nl2e, pfn);
1399 return 0;
1402 else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
1404 return 0;
1407 put_page_from_l2e(ol2e, pfn);
1408 return 1;
1411 #if CONFIG_PAGING_LEVELS >= 3
1413 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
1414 static int mod_l3_entry(l3_pgentry_t *pl3e,
1415 l3_pgentry_t nl3e,
1416 unsigned long pfn)
1418 l3_pgentry_t ol3e;
1419 struct domain *d = current->domain;
1420 int okay;
1422 if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
1424 MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
1425 return 0;
1428 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
1429 /*
1430 * Disallow updates to final L3 slot. It contains Xen mappings, and it
1431 * would be a pain to ensure they remain continuously valid throughout.
1432 */
1433 if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) &&
1434 pgentry_ptr_to_slot(pl3e) >= 3 )
1435 return 0;
1436 #endif
1438 if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
1439 return 0;
1441 if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
1443 if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
1445 MEM_LOG("Bad L3 flags %x",
1446 l3e_get_flags(nl3e) & l3_disallow_mask(d));
1447 return 0;
1450 adjust_guest_l3e(nl3e, d);
1452 /* Fast path for identical mapping and presence. */
1453 if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
1454 return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current);
1456 if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
1457 return 0;
1459 if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
1461 put_page_from_l3e(nl3e, pfn);
1462 return 0;
1465 else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
1467 return 0;
1470 okay = create_pae_xen_mappings(d, pl3e);
1471 BUG_ON(!okay);
1473 pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
1475 put_page_from_l3e(ol3e, pfn);
1476 return 1;
1479 #endif
1481 #if CONFIG_PAGING_LEVELS >= 4
1483 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
1484 static int mod_l4_entry(struct domain *d,
1485 l4_pgentry_t *pl4e,
1486 l4_pgentry_t nl4e,
1487 unsigned long pfn)
1489 l4_pgentry_t ol4e;
1491 if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
1493 MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
1494 return 0;
1497 if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
1498 return 0;
1500 if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
1502 if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
1504 MEM_LOG("Bad L4 flags %x",
1505 l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
1506 return 0;
1509 adjust_guest_l4e(nl4e, current->domain);
1511 /* Fast path for identical mapping and presence. */
1512 if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
1513 return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current);
1515 if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
1516 return 0;
1518 if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
1520 put_page_from_l4e(nl4e, pfn);
1521 return 0;
1524 else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
1526 return 0;
1529 put_page_from_l4e(ol4e, pfn);
1530 return 1;
1533 #endif
1535 int alloc_page_type(struct page_info *page, unsigned long type)
1537 struct domain *owner = page_get_owner(page);
1539 /* A page table is dirtied when its type count becomes non-zero. */
1540 if ( likely(owner != NULL) )
1541 mark_dirty(owner, page_to_mfn(page));
1543 switch ( type & PGT_type_mask )
1545 case PGT_l1_page_table:
1546 return alloc_l1_table(page);
1547 case PGT_l2_page_table:
1548 return alloc_l2_table(page, type);
1549 case PGT_l3_page_table:
1550 return alloc_l3_table(page);
1551 case PGT_l4_page_table:
1552 return alloc_l4_table(page);
1553 case PGT_gdt_page:
1554 case PGT_ldt_page:
1555 return alloc_segdesc_page(page);
1556 default:
1557 printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
1558 type, page->u.inuse.type_info,
1559 page->count_info);
1560 BUG();
1563 return 0;
1567 void free_page_type(struct page_info *page, unsigned long type)
1569 struct domain *owner = page_get_owner(page);
1570 unsigned long gmfn;
1572 if ( likely(owner != NULL) )
1574 /*
1575 * We have to flush before the next use of the linear mapping
1576 * (e.g., update_va_mapping()) or we could end up modifying a page
1577 * that is no longer a page table (and hence screw up ref counts).
1578 */
1579 queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS);
1581 if ( unlikely(paging_mode_enabled(owner)) )
1583 /* A page table is dirtied when its type count becomes zero. */
1584 mark_dirty(owner, page_to_mfn(page));
1586 if ( shadow_mode_refcounts(owner) )
1587 return;
1589 gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
1590 ASSERT(VALID_M2P(gmfn));
1591 shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
1595 switch ( type & PGT_type_mask )
1597 case PGT_l1_page_table:
1598 free_l1_table(page);
1599 break;
1601 case PGT_l2_page_table:
1602 free_l2_table(page);
1603 break;
1605 #if CONFIG_PAGING_LEVELS >= 3
1606 case PGT_l3_page_table:
1607 free_l3_table(page);
1608 break;
1609 #endif
1611 #if CONFIG_PAGING_LEVELS >= 4
1612 case PGT_l4_page_table:
1613 free_l4_table(page);
1614 break;
1615 #endif
1617 default:
1618 printk("%s: type %lx pfn %lx\n",__FUNCTION__,
1619 type, page_to_mfn(page));
1620 BUG();
1625 void put_page_type(struct page_info *page)
1627 unsigned long nx, x, y = page->u.inuse.type_info;
1629 again:
1630 do {
1631 x = y;
1632 nx = x - 1;
1634 ASSERT((x & PGT_count_mask) != 0);
1636 if ( unlikely((nx & PGT_count_mask) == 0) )
1638 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
1639 likely(nx & PGT_validated) )
1641 /*
1642 * Page-table pages must be unvalidated when count is zero. The
1643 * 'free' is safe because the refcnt is non-zero and validated
1644 * bit is clear => other ops will spin or fail.
1645 */
1646 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
1647 x & ~PGT_validated)) != x) )
1648 goto again;
1649 /* We cleared the 'valid bit' so we do the clean up. */
1650 free_page_type(page, x);
1651 /* Carry on, but with the 'valid bit' now clear. */
1652 x &= ~PGT_validated;
1653 nx &= ~PGT_validated;
1656 /*
1657 * Record TLB information for flush later. We do not stamp page
1658 * tables when running in shadow mode:
1659 * 1. Pointless, since it's the shadow pt's which must be tracked.
1660 * 2. Shadow mode reuses this field for shadowed page tables to
1661 * store flags info -- we don't want to conflict with that.
1662 */
1663 if ( !(shadow_mode_enabled(page_get_owner(page)) &&
1664 (page->count_info & PGC_page_table)) )
1665 page->tlbflush_timestamp = tlbflush_current_time();
1668 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1672 int get_page_type(struct page_info *page, unsigned long type)
1674 unsigned long nx, x, y = page->u.inuse.type_info;
1676 ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
1678 again:
1679 do {
1680 x = y;
1681 nx = x + 1;
1682 if ( unlikely((nx & PGT_count_mask) == 0) )
1684 MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
1685 return 0;
1687 else if ( unlikely((x & PGT_count_mask) == 0) )
1689 struct domain *d = page_get_owner(page);
1691 /* Never allow a shadowed frame to go from type count 0 to 1 */
1692 if ( d && shadow_mode_enabled(d) )
1693 shadow_remove_all_shadows(d->vcpu[0], _mfn(page_to_mfn(page)));
1695 ASSERT(!(x & PGT_pae_xen_l2));
1696 if ( (x & PGT_type_mask) != type )
1698 /*
1699 * On type change we check to flush stale TLB entries. This
1700 * may be unnecessary (e.g., page was GDT/LDT) but those
1701 * circumstances should be very rare.
1702 */
1703 cpumask_t mask = d->domain_dirty_cpumask;
1705 /* Don't flush if the timestamp is old enough */
1706 tlbflush_filter(mask, page->tlbflush_timestamp);
1708 if ( unlikely(!cpus_empty(mask)) &&
1709 /* Shadow mode: track only writable pages. */
1710 (!shadow_mode_enabled(page_get_owner(page)) ||
1711 ((nx & PGT_type_mask) == PGT_writable_page)) )
1713 perfc_incrc(need_flush_tlb_flush);
1714 flush_tlb_mask(mask);
1717 /* We lose existing type, back pointer, and validity. */
1718 nx &= ~(PGT_type_mask | PGT_validated);
1719 nx |= type;
1721 /* No special validation needed for writable pages. */
1722 /* Page tables and GDT/LDT need to be scanned for validity. */
1723 if ( type == PGT_writable_page )
1724 nx |= PGT_validated;
1727 else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
1729 if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
1730 (type != PGT_l1_page_table) )
1731 MEM_LOG("Bad type (saw %" PRtype_info
1732 " != exp %" PRtype_info ") "
1733 "for mfn %lx (pfn %lx)",
1734 x, type, page_to_mfn(page),
1735 get_gpfn_from_mfn(page_to_mfn(page)));
1736 return 0;
1738 else if ( unlikely(!(x & PGT_validated)) )
1740 /* Someone else is updating validation of this page. Wait... */
1741 while ( (y = page->u.inuse.type_info) == x )
1742 cpu_relax();
1743 goto again;
1746 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1748 if ( unlikely(!(nx & PGT_validated)) )
1750 /* Try to validate page type; drop the new reference on failure. */
1751 if ( unlikely(!alloc_page_type(page, type)) )
1753 MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
1754 PRtype_info ": caf=%08x taf=%" PRtype_info,
1755 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
1756 type, page->count_info, page->u.inuse.type_info);
1757 /* Noone else can get a reference. We hold the only ref. */
1758 page->u.inuse.type_info = 0;
1759 return 0;
1762 /* Noone else is updating simultaneously. */
1763 __set_bit(_PGT_validated, &page->u.inuse.type_info);
1766 return 1;
1770 int new_guest_cr3(unsigned long mfn)
1772 struct vcpu *v = current;
1773 struct domain *d = v->domain;
1774 int okay;
1775 unsigned long old_base_mfn;
1777 #ifdef CONFIG_COMPAT
1778 if ( IS_COMPAT(d) )
1780 okay = paging_mode_refcounts(d)
1781 ? 0 /* Old code was broken, but what should it be? */
1782 : mod_l4_entry(
1783 d,
1784 __va(pagetable_get_paddr(v->arch.guest_table)),
1785 l4e_from_pfn(
1786 mfn,
1787 (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
1788 pagetable_get_pfn(v->arch.guest_table));
1789 if ( unlikely(!okay) )
1791 MEM_LOG("Error while installing new compat baseptr %lx", mfn);
1792 return 0;
1795 invalidate_shadow_ldt(v);
1796 write_ptbase(v);
1798 return 1;
1800 #endif
1801 okay = paging_mode_refcounts(d)
1802 ? get_page_from_pagenr(mfn, d)
1803 : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
1804 if ( unlikely(!okay) )
1806 MEM_LOG("Error while installing new baseptr %lx", mfn);
1807 return 0;
1810 invalidate_shadow_ldt(v);
1812 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1814 v->arch.guest_table = pagetable_from_pfn(mfn);
1815 update_cr3(v);
1817 write_ptbase(v);
1819 if ( likely(old_base_mfn != 0) )
1821 if ( paging_mode_refcounts(d) )
1822 put_page(mfn_to_page(old_base_mfn));
1823 else
1824 put_page_and_type(mfn_to_page(old_base_mfn));
1827 return 1;
1830 static void process_deferred_ops(void)
1832 unsigned int deferred_ops;
1833 struct domain *d = current->domain;
1834 struct percpu_mm_info *info = &this_cpu(percpu_mm_info);
1836 deferred_ops = info->deferred_ops;
1837 info->deferred_ops = 0;
1839 if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) )
1841 if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
1842 flush_tlb_mask(d->domain_dirty_cpumask);
1843 else
1844 local_flush_tlb();
1847 if ( deferred_ops & DOP_RELOAD_LDT )
1848 (void)map_ldt_shadow_page(0);
1850 if ( unlikely(info->foreign != NULL) )
1852 put_domain(info->foreign);
1853 info->foreign = NULL;
1857 static int set_foreigndom(domid_t domid)
1859 struct domain *e, *d = current->domain;
1860 struct percpu_mm_info *info = &this_cpu(percpu_mm_info);
1861 int okay = 1;
1863 ASSERT(info->foreign == NULL);
1865 if ( likely(domid == DOMID_SELF) )
1866 goto out;
1868 if ( unlikely(domid == d->domain_id) )
1870 MEM_LOG("Dom %u tried to specify itself as foreign domain",
1871 d->domain_id);
1872 okay = 0;
1874 else if ( unlikely(paging_mode_translate(d)) )
1876 MEM_LOG("Cannot mix foreign mappings with translated domains");
1877 okay = 0;
1879 else if ( !IS_PRIV(d) )
1881 switch ( domid )
1883 case DOMID_IO:
1884 get_knownalive_domain(dom_io);
1885 info->foreign = dom_io;
1886 break;
1887 default:
1888 MEM_LOG("Dom %u cannot set foreign dom", d->domain_id);
1889 okay = 0;
1890 break;
1893 else
1895 info->foreign = e = get_domain_by_id(domid);
1896 if ( e == NULL )
1898 switch ( domid )
1900 case DOMID_XEN:
1901 get_knownalive_domain(dom_xen);
1902 info->foreign = dom_xen;
1903 break;
1904 case DOMID_IO:
1905 get_knownalive_domain(dom_io);
1906 info->foreign = dom_io;
1907 break;
1908 default:
1909 MEM_LOG("Unknown domain '%u'", domid);
1910 okay = 0;
1911 break;
1916 out:
1917 return okay;
1920 static inline cpumask_t vcpumask_to_pcpumask(
1921 struct domain *d, unsigned long vmask)
1923 unsigned int vcpu_id;
1924 cpumask_t pmask = CPU_MASK_NONE;
1925 struct vcpu *v;
1927 while ( vmask != 0 )
1929 vcpu_id = find_first_set_bit(vmask);
1930 vmask &= ~(1UL << vcpu_id);
1931 if ( (vcpu_id < MAX_VIRT_CPUS) &&
1932 ((v = d->vcpu[vcpu_id]) != NULL) )
1933 cpus_or(pmask, pmask, v->vcpu_dirty_cpumask);
1936 return pmask;
1939 int do_mmuext_op(
1940 XEN_GUEST_HANDLE(mmuext_op_t) uops,
1941 unsigned int count,
1942 XEN_GUEST_HANDLE(uint) pdone,
1943 unsigned int foreigndom)
1945 struct mmuext_op op;
1946 int rc = 0, i = 0, okay;
1947 unsigned long mfn = 0, gmfn = 0, type;
1948 unsigned int done = 0;
1949 struct page_info *page;
1950 struct vcpu *v = current;
1951 struct domain *d = v->domain;
1953 LOCK_BIGLOCK(d);
1955 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
1957 count &= ~MMU_UPDATE_PREEMPTED;
1958 if ( unlikely(!guest_handle_is_null(pdone)) )
1959 (void)copy_from_guest(&done, pdone, 1);
1962 if ( !set_foreigndom(foreigndom) )
1964 rc = -ESRCH;
1965 goto out;
1968 if ( unlikely(!guest_handle_okay(uops, count)) )
1970 rc = -EFAULT;
1971 goto out;
1974 for ( i = 0; i < count; i++ )
1976 if ( hypercall_preempt_check() )
1978 rc = hypercall_create_continuation(
1979 __HYPERVISOR_mmuext_op, "hihi",
1980 uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
1981 break;
1984 if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) )
1986 MEM_LOG("Bad __copy_from_guest");
1987 rc = -EFAULT;
1988 break;
1991 okay = 1;
1992 gmfn = op.arg1.mfn;
1993 mfn = gmfn_to_mfn(FOREIGNDOM, gmfn);
1994 page = mfn_to_page(mfn);
1996 switch ( op.cmd )
1998 case MMUEXT_PIN_L1_TABLE:
1999 type = PGT_l1_page_table;
2000 goto pin_page;
2002 case MMUEXT_PIN_L2_TABLE:
2003 type = PGT_l2_page_table;
2004 goto pin_page;
2006 case MMUEXT_PIN_L3_TABLE:
2007 type = PGT_l3_page_table;
2008 goto pin_page;
2010 case MMUEXT_PIN_L4_TABLE:
2011 if ( IS_COMPAT(FOREIGNDOM) )
2012 break;
2013 type = PGT_l4_page_table;
2015 pin_page:
2016 /* Ignore pinning of invalid paging levels. */
2017 if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) )
2018 break;
2020 if ( paging_mode_refcounts(FOREIGNDOM) )
2021 break;
2023 okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
2024 if ( unlikely(!okay) )
2026 MEM_LOG("Error while pinning mfn %lx", mfn);
2027 break;
2030 if ( unlikely(test_and_set_bit(_PGT_pinned,
2031 &page->u.inuse.type_info)) )
2033 MEM_LOG("Mfn %lx already pinned", mfn);
2034 put_page_and_type(page);
2035 okay = 0;
2036 break;
2039 /* A page is dirtied when its pin status is set. */
2040 mark_dirty(d, mfn);
2042 break;
2044 case MMUEXT_UNPIN_TABLE:
2045 if ( paging_mode_refcounts(d) )
2046 break;
2048 if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) )
2050 MEM_LOG("Mfn %lx bad domain (dom=%p)",
2051 mfn, page_get_owner(page));
2053 else if ( likely(test_and_clear_bit(_PGT_pinned,
2054 &page->u.inuse.type_info)) )
2056 put_page_and_type(page);
2057 put_page(page);
2058 /* A page is dirtied when its pin status is cleared. */
2059 mark_dirty(d, mfn);
2061 else
2063 okay = 0;
2064 put_page(page);
2065 MEM_LOG("Mfn %lx not pinned", mfn);
2067 break;
2069 case MMUEXT_NEW_BASEPTR:
2070 okay = new_guest_cr3(mfn);
2071 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
2072 break;
2074 #ifdef __x86_64__
2075 case MMUEXT_NEW_USER_BASEPTR:
2076 if ( IS_COMPAT(FOREIGNDOM) )
2078 okay = 0;
2079 break;
2081 if (likely(mfn != 0))
2083 if ( paging_mode_refcounts(d) )
2084 okay = get_page_from_pagenr(mfn, d);
2085 else
2086 okay = get_page_and_type_from_pagenr(
2087 mfn, PGT_root_page_table, d);
2089 if ( unlikely(!okay) )
2091 MEM_LOG("Error while installing new mfn %lx", mfn);
2093 else
2095 unsigned long old_mfn =
2096 pagetable_get_pfn(v->arch.guest_table_user);
2097 v->arch.guest_table_user = pagetable_from_pfn(mfn);
2098 if ( old_mfn != 0 )
2100 if ( paging_mode_refcounts(d) )
2101 put_page(mfn_to_page(old_mfn));
2102 else
2103 put_page_and_type(mfn_to_page(old_mfn));
2106 break;
2107 #endif
2109 case MMUEXT_TLB_FLUSH_LOCAL:
2110 this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
2111 break;
2113 case MMUEXT_INVLPG_LOCAL:
2114 if ( !paging_mode_enabled(d)
2115 || paging_invlpg(v, op.arg1.linear_addr) != 0 )
2116 local_flush_tlb_one(op.arg1.linear_addr);
2117 break;
2119 case MMUEXT_TLB_FLUSH_MULTI:
2120 case MMUEXT_INVLPG_MULTI:
2122 unsigned long vmask;
2123 cpumask_t pmask;
2124 if ( unlikely(copy_from_guest(&vmask, op.arg2.vcpumask, 1)) )
2126 okay = 0;
2127 break;
2129 pmask = vcpumask_to_pcpumask(d, vmask);
2130 if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
2131 flush_tlb_mask(pmask);
2132 else
2133 flush_tlb_one_mask(pmask, op.arg1.linear_addr);
2134 break;
2137 case MMUEXT_TLB_FLUSH_ALL:
2138 flush_tlb_mask(d->domain_dirty_cpumask);
2139 break;
2141 case MMUEXT_INVLPG_ALL:
2142 flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr);
2143 break;
2145 case MMUEXT_FLUSH_CACHE:
2146 if ( unlikely(!cache_flush_permitted(d)) )
2148 MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
2149 okay = 0;
2151 else
2153 wbinvd();
2155 break;
2157 case MMUEXT_SET_LDT:
2159 unsigned long ptr = op.arg1.linear_addr;
2160 unsigned long ents = op.arg2.nr_ents;
2162 if ( paging_mode_external(d) )
2164 MEM_LOG("ignoring SET_LDT hypercall from external "
2165 "domain %u", d->domain_id);
2166 okay = 0;
2168 else if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
2169 (ents > 8192) ||
2170 !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
2172 okay = 0;
2173 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
2175 else if ( (v->arch.guest_context.ldt_ents != ents) ||
2176 (v->arch.guest_context.ldt_base != ptr) )
2178 invalidate_shadow_ldt(v);
2179 v->arch.guest_context.ldt_base = ptr;
2180 v->arch.guest_context.ldt_ents = ents;
2181 load_LDT(v);
2182 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT;
2183 if ( ents != 0 )
2184 this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT;
2186 break;
2189 default:
2190 MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
2191 rc = -ENOSYS;
2192 okay = 0;
2193 break;
2196 if ( unlikely(!okay) )
2198 rc = rc ? rc : -EINVAL;
2199 break;
2202 guest_handle_add_offset(uops, 1);
2205 out:
2206 process_deferred_ops();
2208 /* Add incremental work we have done to the @done output parameter. */
2209 if ( unlikely(!guest_handle_is_null(pdone)) )
2211 done += i;
2212 copy_to_guest(pdone, &done, 1);
2215 UNLOCK_BIGLOCK(d);
2216 return rc;
2219 int do_mmu_update(
2220 XEN_GUEST_HANDLE(mmu_update_t) ureqs,
2221 unsigned int count,
2222 XEN_GUEST_HANDLE(uint) pdone,
2223 unsigned int foreigndom)
2225 struct mmu_update req;
2226 void *va;
2227 unsigned long gpfn, gmfn, mfn;
2228 struct page_info *page;
2229 int rc = 0, okay = 1, i = 0;
2230 unsigned int cmd, done = 0;
2231 struct vcpu *v = current;
2232 struct domain *d = v->domain;
2233 unsigned long type_info;
2234 struct domain_mmap_cache mapcache, sh_mapcache;
2236 LOCK_BIGLOCK(d);
2238 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
2240 count &= ~MMU_UPDATE_PREEMPTED;
2241 if ( unlikely(!guest_handle_is_null(pdone)) )
2242 (void)copy_from_guest(&done, pdone, 1);
2245 domain_mmap_cache_init(&mapcache);
2246 domain_mmap_cache_init(&sh_mapcache);
2248 if ( !set_foreigndom(foreigndom) )
2250 rc = -ESRCH;
2251 goto out;
2254 perfc_incrc(calls_to_mmu_update);
2255 perfc_addc(num_page_updates, count);
2257 if ( unlikely(!guest_handle_okay(ureqs, count)) )
2259 rc = -EFAULT;
2260 goto out;
2263 for ( i = 0; i < count; i++ )
2265 if ( hypercall_preempt_check() )
2267 rc = hypercall_create_continuation(
2268 __HYPERVISOR_mmu_update, "hihi",
2269 ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
2270 break;
2273 if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) )
2275 MEM_LOG("Bad __copy_from_guest");
2276 rc = -EFAULT;
2277 break;
2280 cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
2281 okay = 0;
2283 switch ( cmd )
2285 /*
2286 * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
2287 */
2288 case MMU_NORMAL_PT_UPDATE:
2290 gmfn = req.ptr >> PAGE_SHIFT;
2291 mfn = gmfn_to_mfn(d, gmfn);
2293 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2295 MEM_LOG("Could not get page for normal update");
2296 break;
2299 va = map_domain_page_with_cache(mfn, &mapcache);
2300 va = (void *)((unsigned long)va +
2301 (unsigned long)(req.ptr & ~PAGE_MASK));
2302 page = mfn_to_page(mfn);
2304 switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
2306 case PGT_l1_page_table:
2307 case PGT_l2_page_table:
2308 case PGT_l3_page_table:
2309 case PGT_l4_page_table:
2311 if ( paging_mode_refcounts(d) )
2313 MEM_LOG("mmu update on auto-refcounted domain!");
2314 break;
2317 if ( unlikely(!get_page_type(
2318 page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) )
2319 goto not_a_pt;
2321 switch ( type_info & PGT_type_mask )
2323 case PGT_l1_page_table:
2325 l1_pgentry_t l1e = l1e_from_intpte(req.val);
2326 okay = mod_l1_entry(va, l1e, mfn);
2328 break;
2329 case PGT_l2_page_table:
2331 l2_pgentry_t l2e = l2e_from_intpte(req.val);
2332 okay = mod_l2_entry(va, l2e, mfn, type_info);
2334 break;
2335 #if CONFIG_PAGING_LEVELS >= 3
2336 case PGT_l3_page_table:
2338 l3_pgentry_t l3e = l3e_from_intpte(req.val);
2339 okay = mod_l3_entry(va, l3e, mfn);
2341 break;
2342 #endif
2343 #if CONFIG_PAGING_LEVELS >= 4
2344 case PGT_l4_page_table:
2345 if ( !IS_COMPAT(FOREIGNDOM) )
2347 l4_pgentry_t l4e = l4e_from_intpte(req.val);
2348 okay = mod_l4_entry(d, va, l4e, mfn);
2350 break;
2351 #endif
2354 put_page_type(page);
2356 break;
2358 default:
2359 not_a_pt:
2361 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
2362 break;
2364 okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
2366 put_page_type(page);
2368 break;
2371 unmap_domain_page_with_cache(va, &mapcache);
2373 put_page(page);
2374 break;
2376 case MMU_MACHPHYS_UPDATE:
2378 mfn = req.ptr >> PAGE_SHIFT;
2379 gpfn = req.val;
2381 if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
2383 MEM_LOG("Could not get page for mach->phys update");
2384 break;
2387 if ( unlikely(paging_mode_translate(FOREIGNDOM)) )
2389 MEM_LOG("Mach-phys update on auto-translate guest");
2390 break;
2393 set_gpfn_from_mfn(mfn, gpfn);
2394 okay = 1;
2396 mark_dirty(FOREIGNDOM, mfn);
2398 put_page(mfn_to_page(mfn));
2399 break;
2401 default:
2402 MEM_LOG("Invalid page update command %x", cmd);
2403 rc = -ENOSYS;
2404 okay = 0;
2405 break;
2408 if ( unlikely(!okay) )
2410 rc = rc ? rc : -EINVAL;
2411 break;
2414 guest_handle_add_offset(ureqs, 1);
2417 out:
2418 domain_mmap_cache_destroy(&mapcache);
2419 domain_mmap_cache_destroy(&sh_mapcache);
2421 process_deferred_ops();
2423 /* Add incremental work we have done to the @done output parameter. */
2424 if ( unlikely(!guest_handle_is_null(pdone)) )
2426 done += i;
2427 copy_to_guest(pdone, &done, 1);
2430 UNLOCK_BIGLOCK(d);
2431 return rc;
2435 static int create_grant_pte_mapping(
2436 uint64_t pte_addr, l1_pgentry_t nl1e, struct vcpu *v)
2438 int rc = GNTST_okay;
2439 void *va;
2440 unsigned long gmfn, mfn;
2441 struct page_info *page;
2442 u32 type;
2443 l1_pgentry_t ol1e;
2444 struct domain *d = v->domain;
2446 ASSERT(spin_is_locked(&d->big_lock));
2448 adjust_guest_l1e(nl1e, d);
2450 gmfn = pte_addr >> PAGE_SHIFT;
2451 mfn = gmfn_to_mfn(d, gmfn);
2453 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2455 MEM_LOG("Could not get page for normal update");
2456 return GNTST_general_error;
2459 va = map_domain_page(mfn);
2460 va = (void *)((unsigned long)va + ((unsigned long)pte_addr & ~PAGE_MASK));
2461 page = mfn_to_page(mfn);
2463 type = page->u.inuse.type_info & PGT_type_mask;
2464 if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
2466 MEM_LOG("Grant map attempted to update a non-L1 page");
2467 rc = GNTST_general_error;
2468 goto failed;
2471 ol1e = *(l1_pgentry_t *)va;
2472 if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) )
2474 put_page_type(page);
2475 rc = GNTST_general_error;
2476 goto failed;
2479 if ( !paging_mode_refcounts(d) )
2480 put_page_from_l1e(ol1e, d);
2482 put_page_type(page);
2484 failed:
2485 unmap_domain_page(va);
2486 put_page(page);
2488 return rc;
2491 static int destroy_grant_pte_mapping(
2492 uint64_t addr, unsigned long frame, struct domain *d)
2494 int rc = GNTST_okay;
2495 void *va;
2496 unsigned long gmfn, mfn;
2497 struct page_info *page;
2498 u32 type;
2499 l1_pgentry_t ol1e;
2501 gmfn = addr >> PAGE_SHIFT;
2502 mfn = gmfn_to_mfn(d, gmfn);
2504 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2506 MEM_LOG("Could not get page for normal update");
2507 return GNTST_general_error;
2510 va = map_domain_page(mfn);
2511 va = (void *)((unsigned long)va + ((unsigned long)addr & ~PAGE_MASK));
2512 page = mfn_to_page(mfn);
2514 type = page->u.inuse.type_info & PGT_type_mask;
2515 if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
2517 MEM_LOG("Grant map attempted to update a non-L1 page");
2518 rc = GNTST_general_error;
2519 goto failed;
2522 if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
2524 put_page_type(page);
2525 rc = GNTST_general_error;
2526 goto failed;
2529 /* Check that the virtual address supplied is actually mapped to frame. */
2530 if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
2532 MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
2533 (unsigned long)l1e_get_intpte(ol1e), addr, frame);
2534 put_page_type(page);
2535 rc = GNTST_general_error;
2536 goto failed;
2539 /* Delete pagetable entry. */
2540 if ( unlikely(!UPDATE_ENTRY(l1,
2541 (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn,
2542 d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) )
2544 MEM_LOG("Cannot delete PTE entry at %p", va);
2545 put_page_type(page);
2546 rc = GNTST_general_error;
2547 goto failed;
2550 put_page_type(page);
2552 failed:
2553 unmap_domain_page(va);
2554 put_page(page);
2555 return rc;
2559 static int create_grant_va_mapping(
2560 unsigned long va, l1_pgentry_t nl1e, struct vcpu *v)
2562 l1_pgentry_t *pl1e, ol1e;
2563 struct domain *d = v->domain;
2564 unsigned long gl1mfn;
2565 int okay;
2567 ASSERT(spin_is_locked(&d->big_lock));
2569 adjust_guest_l1e(nl1e, d);
2571 pl1e = guest_map_l1e(v, va, &gl1mfn);
2572 if ( !pl1e )
2574 MEM_LOG("Could not find L1 PTE for address %lx", va);
2575 return GNTST_general_error;
2577 ol1e = *pl1e;
2578 okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v);
2579 guest_unmap_l1e(v, pl1e);
2580 pl1e = NULL;
2582 if ( !okay )
2583 return GNTST_general_error;
2585 if ( !paging_mode_refcounts(d) )
2586 put_page_from_l1e(ol1e, d);
2588 return GNTST_okay;
2591 static int destroy_grant_va_mapping(
2592 unsigned long addr, unsigned long frame, struct vcpu *v)
2594 l1_pgentry_t *pl1e, ol1e;
2595 unsigned long gl1mfn;
2596 int rc = 0;
2598 pl1e = guest_map_l1e(v, addr, &gl1mfn);
2599 if ( !pl1e )
2601 MEM_LOG("Could not find L1 PTE for address %lx", addr);
2602 return GNTST_general_error;
2604 ol1e = *pl1e;
2606 /* Check that the virtual address supplied is actually mapped to frame. */
2607 if ( unlikely(l1e_get_pfn(ol1e) != frame) )
2609 MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
2610 l1e_get_pfn(ol1e), addr, frame);
2611 rc = GNTST_general_error;
2612 goto out;
2615 /* Delete pagetable entry. */
2616 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
2618 MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
2619 rc = GNTST_general_error;
2620 goto out;
2623 out:
2624 guest_unmap_l1e(v, pl1e);
2625 return rc;
2628 int create_grant_host_mapping(
2629 uint64_t addr, unsigned long frame, unsigned int flags)
2631 l1_pgentry_t pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS);
2633 if ( (flags & GNTMAP_application_map) )
2634 l1e_add_flags(pte,_PAGE_USER);
2635 if ( !(flags & GNTMAP_readonly) )
2636 l1e_add_flags(pte,_PAGE_RW);
2638 if ( flags & GNTMAP_contains_pte )
2639 return create_grant_pte_mapping(addr, pte, current);
2640 return create_grant_va_mapping(addr, pte, current);
2643 int destroy_grant_host_mapping(
2644 uint64_t addr, unsigned long frame, unsigned int flags)
2646 if ( flags & GNTMAP_contains_pte )
2647 return destroy_grant_pte_mapping(addr, frame, current->domain);
2648 return destroy_grant_va_mapping(addr, frame, current);
2651 int steal_page(
2652 struct domain *d, struct page_info *page, unsigned int memflags)
2654 u32 _d, _nd, x, y;
2656 spin_lock(&d->page_alloc_lock);
2658 /*
2659 * The tricky bit: atomically release ownership while there is just one
2660 * benign reference to the page (PGC_allocated). If that reference
2661 * disappears then the deallocation routine will safely spin.
2662 */
2663 _d = pickle_domptr(d);
2664 _nd = page->u.inuse._domain;
2665 y = page->count_info;
2666 do {
2667 x = y;
2668 if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
2669 (1 | PGC_allocated)) || unlikely(_nd != _d)) {
2670 MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p,"
2671 " caf=%08x, taf=%" PRtype_info "\n",
2672 (void *) page_to_mfn(page),
2673 d, d->domain_id, unpickle_domptr(_nd), x,
2674 page->u.inuse.type_info);
2675 spin_unlock(&d->page_alloc_lock);
2676 return -1;
2678 __asm__ __volatile__(
2679 LOCK_PREFIX "cmpxchg8b %2"
2680 : "=d" (_nd), "=a" (y),
2681 "=m" (*(volatile u64 *)(&page->count_info))
2682 : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
2683 } while (unlikely(_nd != _d) || unlikely(y != x));
2685 /*
2686 * Unlink from 'd'. At least one reference remains (now anonymous), so
2687 * noone else is spinning to try to delete this page from 'd'.
2688 */
2689 if ( !(memflags & MEMF_no_refcount) )
2690 d->tot_pages--;
2691 list_del(&page->list);
2693 spin_unlock(&d->page_alloc_lock);
2695 return 0;
2698 int do_update_va_mapping(unsigned long va, u64 val64,
2699 unsigned long flags)
2701 l1_pgentry_t val = l1e_from_intpte(val64);
2702 struct vcpu *v = current;
2703 struct domain *d = v->domain;
2704 l1_pgentry_t *pl1e;
2705 unsigned long vmask, bmap_ptr, gl1mfn;
2706 cpumask_t pmask;
2707 int rc = 0;
2709 perfc_incrc(calls_to_update_va);
2711 if ( unlikely(!__addr_ok(va) && !paging_mode_external(d)) )
2712 return -EINVAL;
2714 LOCK_BIGLOCK(d);
2716 pl1e = guest_map_l1e(v, va, &gl1mfn);
2718 if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn)) )
2719 rc = -EINVAL;
2721 if ( pl1e )
2722 guest_unmap_l1e(v, pl1e);
2723 pl1e = NULL;
2725 switch ( flags & UVMF_FLUSHTYPE_MASK )
2727 case UVMF_TLB_FLUSH:
2728 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2730 case UVMF_LOCAL:
2731 local_flush_tlb();
2732 break;
2733 case UVMF_ALL:
2734 flush_tlb_mask(d->domain_dirty_cpumask);
2735 break;
2736 default:
2737 if ( unlikely(!IS_COMPAT(d) ?
2738 get_user(vmask, (unsigned long *)bmap_ptr) :
2739 get_user(vmask, (unsigned int *)bmap_ptr)) )
2740 rc = -EFAULT;
2741 pmask = vcpumask_to_pcpumask(d, vmask);
2742 flush_tlb_mask(pmask);
2743 break;
2745 break;
2747 case UVMF_INVLPG:
2748 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2750 case UVMF_LOCAL:
2751 if ( !paging_mode_enabled(d)
2752 || (paging_invlpg(current, va) != 0) )
2753 local_flush_tlb_one(va);
2754 break;
2755 case UVMF_ALL:
2756 flush_tlb_one_mask(d->domain_dirty_cpumask, va);
2757 break;
2758 default:
2759 if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
2760 rc = -EFAULT;
2761 pmask = vcpumask_to_pcpumask(d, vmask);
2762 flush_tlb_one_mask(pmask, va);
2763 break;
2765 break;
2768 process_deferred_ops();
2770 UNLOCK_BIGLOCK(d);
2772 return rc;
2775 int do_update_va_mapping_otherdomain(unsigned long va, u64 val64,
2776 unsigned long flags,
2777 domid_t domid)
2779 int rc;
2781 if ( unlikely(!IS_PRIV(current->domain)) )
2782 return -EPERM;
2784 if ( !set_foreigndom(domid) )
2785 return -ESRCH;
2787 rc = do_update_va_mapping(va, val64, flags);
2789 return rc;
2794 /*************************
2795 * Descriptor Tables
2796 */
2798 void destroy_gdt(struct vcpu *v)
2800 int i;
2801 unsigned long pfn;
2803 v->arch.guest_context.gdt_ents = 0;
2804 for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ )
2806 if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 )
2807 put_page_and_type(mfn_to_page(pfn));
2808 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
2809 v->arch.guest_context.gdt_frames[i] = 0;
2814 long set_gdt(struct vcpu *v,
2815 unsigned long *frames,
2816 unsigned int entries)
2818 struct domain *d = v->domain;
2819 /* NB. There are 512 8-byte entries per GDT page. */
2820 int i, nr_pages = (entries + 511) / 512;
2821 unsigned long mfn;
2823 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2824 return -EINVAL;
2826 /* Check the pages in the new GDT. */
2827 for ( i = 0; i < nr_pages; i++ ) {
2828 mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
2829 if ( !mfn_valid(mfn) ||
2830 !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
2831 goto fail;
2834 /* Tear down the old GDT. */
2835 destroy_gdt(v);
2837 /* Install the new GDT. */
2838 v->arch.guest_context.gdt_ents = entries;
2839 for ( i = 0; i < nr_pages; i++ )
2841 v->arch.guest_context.gdt_frames[i] = frames[i];
2842 l1e_write(&v->arch.perdomain_ptes[i],
2843 l1e_from_pfn(frames[i], __PAGE_HYPERVISOR));
2846 return 0;
2848 fail:
2849 while ( i-- > 0 )
2850 put_page_and_type(mfn_to_page(frames[i]));
2851 return -EINVAL;
2855 long do_set_gdt(XEN_GUEST_HANDLE(ulong) frame_list, unsigned int entries)
2857 int nr_pages = (entries + 511) / 512;
2858 unsigned long frames[16];
2859 long ret;
2861 /* Rechecked in set_gdt, but ensures a sane limit for copy_from_user(). */
2862 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2863 return -EINVAL;
2865 if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) )
2866 return -EFAULT;
2868 LOCK_BIGLOCK(current->domain);
2870 if ( (ret = set_gdt(current, frames, entries)) == 0 )
2871 local_flush_tlb();
2873 UNLOCK_BIGLOCK(current->domain);
2875 return ret;
2879 long do_update_descriptor(u64 pa, u64 desc)
2881 struct domain *dom = current->domain;
2882 unsigned long gmfn = pa >> PAGE_SHIFT;
2883 unsigned long mfn;
2884 unsigned int offset;
2885 struct desc_struct *gdt_pent, d;
2886 struct page_info *page;
2887 long ret = -EINVAL;
2889 offset = ((unsigned int)pa & ~PAGE_MASK) / sizeof(struct desc_struct);
2891 *(u64 *)&d = desc;
2893 LOCK_BIGLOCK(dom);
2895 mfn = gmfn_to_mfn(dom, gmfn);
2896 if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
2897 !mfn_valid(mfn) ||
2898 !check_descriptor(dom, &d) )
2900 UNLOCK_BIGLOCK(dom);
2901 return -EINVAL;
2904 page = mfn_to_page(mfn);
2905 if ( unlikely(!get_page(page, dom)) )
2907 UNLOCK_BIGLOCK(dom);
2908 return -EINVAL;
2911 /* Check if the given frame is in use in an unsafe context. */
2912 switch ( page->u.inuse.type_info & PGT_type_mask )
2914 case PGT_gdt_page:
2915 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
2916 goto out;
2917 break;
2918 case PGT_ldt_page:
2919 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
2920 goto out;
2921 break;
2922 default:
2923 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
2924 goto out;
2925 break;
2928 mark_dirty(dom, mfn);
2930 /* All is good so make the update. */
2931 gdt_pent = map_domain_page(mfn);
2932 memcpy(&gdt_pent[offset], &d, 8);
2933 unmap_domain_page(gdt_pent);
2935 put_page_type(page);
2937 ret = 0; /* success */
2939 out:
2940 put_page(page);
2942 UNLOCK_BIGLOCK(dom);
2944 return ret;
2947 typedef struct e820entry e820entry_t;
2948 DEFINE_XEN_GUEST_HANDLE(e820entry_t);
2950 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
2952 switch ( op )
2954 case XENMEM_add_to_physmap:
2956 struct xen_add_to_physmap xatp;
2957 unsigned long prev_mfn, mfn = 0, gpfn;
2958 struct domain *d;
2960 if ( copy_from_guest(&xatp, arg, 1) )
2961 return -EFAULT;
2963 if ( xatp.domid == DOMID_SELF )
2965 d = current->domain;
2966 get_knownalive_domain(d);
2968 else if ( !IS_PRIV(current->domain) )
2969 return -EPERM;
2970 else if ( (d = get_domain_by_id(xatp.domid)) == NULL )
2971 return -ESRCH;
2973 switch ( xatp.space )
2975 case XENMAPSPACE_shared_info:
2976 if ( xatp.idx == 0 )
2977 mfn = virt_to_mfn(d->shared_info);
2978 break;
2979 case XENMAPSPACE_grant_table:
2980 spin_lock(&d->grant_table->lock);
2982 if ( (xatp.idx >= nr_grant_frames(d->grant_table)) &&
2983 (xatp.idx < max_nr_grant_frames) )
2984 gnttab_grow_table(d, xatp.idx + 1);
2986 if ( xatp.idx < nr_grant_frames(d->grant_table) )
2987 mfn = virt_to_mfn(d->grant_table->shared[xatp.idx]);
2989 spin_unlock(&d->grant_table->lock);
2990 break;
2991 default:
2992 break;
2995 if ( !paging_mode_translate(d) || (mfn == 0) )
2997 put_domain(d);
2998 return -EINVAL;
3001 LOCK_BIGLOCK(d);
3003 /* Remove previously mapped page if it was present. */
3004 prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
3005 if ( mfn_valid(prev_mfn) )
3007 if ( IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)) )
3008 /* Xen heap frames are simply unhooked from this phys slot. */
3009 guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
3010 else
3011 /* Normal domain memory is freed, to avoid leaking memory. */
3012 guest_remove_page(d, xatp.gpfn);
3015 /* Unmap from old location, if any. */
3016 gpfn = get_gpfn_from_mfn(mfn);
3017 if ( gpfn != INVALID_M2P_ENTRY )
3018 guest_physmap_remove_page(d, gpfn, mfn);
3020 /* Map at new location. */
3021 guest_physmap_add_page(d, xatp.gpfn, mfn);
3023 UNLOCK_BIGLOCK(d);
3025 put_domain(d);
3027 break;
3030 case XENMEM_set_memory_map:
3032 struct xen_foreign_memory_map fmap;
3033 struct domain *d;
3034 int rc;
3036 if ( copy_from_guest(&fmap, arg, 1) )
3037 return -EFAULT;
3039 if ( fmap.map.nr_entries > ARRAY_SIZE(d->arch.e820) )
3040 return -EINVAL;
3042 if ( fmap.domid == DOMID_SELF )
3044 d = current->domain;
3045 get_knownalive_domain(d);
3047 else if ( !IS_PRIV(current->domain) )
3048 return -EPERM;
3049 else if ( (d = get_domain_by_id(fmap.domid)) == NULL )
3050 return -ESRCH;
3052 rc = copy_from_guest(&d->arch.e820[0], fmap.map.buffer,
3053 fmap.map.nr_entries) ? -EFAULT : 0;
3054 d->arch.nr_e820 = fmap.map.nr_entries;
3056 put_domain(d);
3057 return rc;
3060 case XENMEM_memory_map:
3062 struct xen_memory_map map;
3063 struct domain *d = current->domain;
3065 /* Backwards compatibility. */
3066 if ( d->arch.nr_e820 == 0 )
3067 return -ENOSYS;
3069 if ( copy_from_guest(&map, arg, 1) )
3070 return -EFAULT;
3072 map.nr_entries = min(map.nr_entries, d->arch.nr_e820);
3073 if ( copy_to_guest(map.buffer, &d->arch.e820[0], map.nr_entries) ||
3074 copy_to_guest(arg, &map, 1) )
3075 return -EFAULT;
3077 return 0;
3080 case XENMEM_machine_memory_map:
3082 struct xen_memory_map memmap;
3083 XEN_GUEST_HANDLE(e820entry_t) buffer;
3084 int count;
3086 if ( !IS_PRIV(current->domain) )
3087 return -EINVAL;
3089 if ( copy_from_guest(&memmap, arg, 1) )
3090 return -EFAULT;
3091 if ( memmap.nr_entries < e820.nr_map + 1 )
3092 return -EINVAL;
3094 buffer = guest_handle_cast(memmap.buffer, e820entry_t);
3096 count = min((unsigned int)e820.nr_map, memmap.nr_entries);
3097 if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
3098 return -EFAULT;
3100 memmap.nr_entries = count;
3102 if ( copy_to_guest(arg, &memmap, 1) )
3103 return -EFAULT;
3105 return 0;
3108 case XENMEM_machphys_mapping:
3110 struct xen_machphys_mapping mapping = {
3111 .v_start = MACH2PHYS_VIRT_START,
3112 .v_end = MACH2PHYS_VIRT_END,
3113 .max_mfn = MACH2PHYS_NR_ENTRIES - 1
3114 };
3116 if ( copy_to_guest(arg, &mapping, 1) )
3117 return -EFAULT;
3119 return 0;
3122 default:
3123 return subarch_memory_op(op, arg);
3126 return 0;
3130 /*************************
3131 * Writable Pagetables
3132 */
3134 struct ptwr_emulate_ctxt {
3135 struct x86_emulate_ctxt ctxt;
3136 unsigned long cr2;
3137 l1_pgentry_t pte;
3138 };
3140 static int ptwr_emulated_read(
3141 enum x86_segment seg,
3142 unsigned long offset,
3143 unsigned long *val,
3144 unsigned int bytes,
3145 struct x86_emulate_ctxt *ctxt)
3147 unsigned int rc;
3148 unsigned long addr = offset;
3150 *val = 0;
3151 if ( (rc = copy_from_user((void *)val, (void *)addr, bytes)) != 0 )
3153 propagate_page_fault(addr + bytes - rc, 0); /* read fault */
3154 return X86EMUL_EXCEPTION;
3157 return X86EMUL_OKAY;
3160 static int ptwr_emulated_update(
3161 unsigned long addr,
3162 paddr_t old,
3163 paddr_t val,
3164 unsigned int bytes,
3165 unsigned int do_cmpxchg,
3166 struct ptwr_emulate_ctxt *ptwr_ctxt)
3168 unsigned long mfn;
3169 struct page_info *page;
3170 l1_pgentry_t pte, ol1e, nl1e, *pl1e;
3171 struct vcpu *v = current;
3172 struct domain *d = v->domain;
3174 /* Only allow naturally-aligned stores within the original %cr2 page. */
3175 if ( unlikely(((addr^ptwr_ctxt->cr2) & PAGE_MASK) || (addr & (bytes-1))) )
3177 MEM_LOG("Bad ptwr access (cr2=%lx, addr=%lx, bytes=%u)",
3178 ptwr_ctxt->cr2, addr, bytes);
3179 return X86EMUL_UNHANDLEABLE;
3182 /* Turn a sub-word access into a full-word access. */
3183 if ( bytes != sizeof(paddr_t) )
3185 paddr_t full;
3186 unsigned int rc, offset = addr & (sizeof(paddr_t)-1);
3188 /* Align address; read full word. */
3189 addr &= ~(sizeof(paddr_t)-1);
3190 if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 )
3192 propagate_page_fault(addr+sizeof(paddr_t)-rc, 0); /* read fault */
3193 return X86EMUL_EXCEPTION;
3195 /* Mask out bits provided by caller. */
3196 full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
3197 /* Shift the caller value and OR in the missing bits. */
3198 val &= (((paddr_t)1 << (bytes*8)) - 1);
3199 val <<= (offset)*8;
3200 val |= full;
3201 /* Also fill in missing parts of the cmpxchg old value. */
3202 old &= (((paddr_t)1 << (bytes*8)) - 1);
3203 old <<= (offset)*8;
3204 old |= full;
3207 pte = ptwr_ctxt->pte;
3208 mfn = l1e_get_pfn(pte);
3209 page = mfn_to_page(mfn);
3211 /* We are looking only for read-only mappings of p.t. pages. */
3212 ASSERT((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) == _PAGE_PRESENT);
3213 ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table);
3214 ASSERT((page->u.inuse.type_info & PGT_count_mask) != 0);
3215 ASSERT(page_get_owner(page) == d);
3217 /* Check the new PTE. */
3218 nl1e = l1e_from_intpte(val);
3219 if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) )
3221 if ( (CONFIG_PAGING_LEVELS == 3 || IS_COMPAT(d)) &&
3222 (bytes == 4) && (addr & 4) && !do_cmpxchg &&
3223 (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
3225 /*
3226 * If this is an upper-half write to a PAE PTE then we assume that
3227 * the guest has simply got the two writes the wrong way round. We
3228 * zap the PRESENT bit on the assumption that the bottom half will
3229 * be written immediately after we return to the guest.
3230 */
3231 MEM_LOG("ptwr_emulate: fixing up invalid PAE PTE %"PRIpte,
3232 l1e_get_intpte(nl1e));
3233 l1e_remove_flags(nl1e, _PAGE_PRESENT);
3235 else
3237 MEM_LOG("ptwr_emulate: could not get_page_from_l1e()");
3238 return X86EMUL_UNHANDLEABLE;
3242 adjust_guest_l1e(nl1e, d);
3244 /* Checked successfully: do the update (write or cmpxchg). */
3245 pl1e = map_domain_page(page_to_mfn(page));
3246 pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
3247 if ( do_cmpxchg )
3249 int okay;
3250 intpte_t t = old;
3251 ol1e = l1e_from_intpte(old);
3253 okay = paging_cmpxchg_guest_entry(v, (intpte_t *) pl1e,
3254 &t, val, _mfn(mfn));
3255 okay = (okay && t == old);
3257 if ( !okay )
3259 unmap_domain_page(pl1e);
3260 put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d);
3261 return X86EMUL_CMPXCHG_FAILED;
3264 else
3266 ol1e = *pl1e;
3267 if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) )
3268 BUG();
3271 unmap_domain_page(pl1e);
3273 /* Finally, drop the old PTE. */
3274 put_page_from_l1e(gl1e_to_ml1e(d, ol1e), d);
3276 return X86EMUL_OKAY;
3279 static int ptwr_emulated_write(
3280 enum x86_segment seg,
3281 unsigned long offset,
3282 unsigned long val,
3283 unsigned int bytes,
3284 struct x86_emulate_ctxt *ctxt)
3286 return ptwr_emulated_update(
3287 offset, 0, val, bytes, 0,
3288 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3291 static int ptwr_emulated_cmpxchg(
3292 enum x86_segment seg,
3293 unsigned long offset,
3294 unsigned long old,
3295 unsigned long new,
3296 unsigned int bytes,
3297 struct x86_emulate_ctxt *ctxt)
3299 return ptwr_emulated_update(
3300 offset, old, new, bytes, 1,
3301 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3304 static int ptwr_emulated_cmpxchg8b(
3305 enum x86_segment seg,
3306 unsigned long offset,
3307 unsigned long old,
3308 unsigned long old_hi,
3309 unsigned long new,
3310 unsigned long new_hi,
3311 struct x86_emulate_ctxt *ctxt)
3313 if ( CONFIG_PAGING_LEVELS == 2 )
3314 return X86EMUL_UNHANDLEABLE;
3315 return ptwr_emulated_update(
3316 offset, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1,
3317 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3320 static struct x86_emulate_ops ptwr_emulate_ops = {
3321 .read = ptwr_emulated_read,
3322 .insn_fetch = ptwr_emulated_read,
3323 .write = ptwr_emulated_write,
3324 .cmpxchg = ptwr_emulated_cmpxchg,
3325 .cmpxchg8b = ptwr_emulated_cmpxchg8b
3326 };
3328 /* Write page fault handler: check if guest is trying to modify a PTE. */
3329 int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
3330 struct cpu_user_regs *regs)
3332 struct domain *d = v->domain;
3333 struct page_info *page;
3334 l1_pgentry_t pte;
3335 struct ptwr_emulate_ctxt ptwr_ctxt;
3336 int rc;
3338 LOCK_BIGLOCK(d);
3340 /*
3341 * Attempt to read the PTE that maps the VA being accessed. By checking for
3342 * PDE validity in the L2 we avoid many expensive fixups in __get_user().
3343 */
3344 guest_get_eff_l1e(v, addr, &pte);
3345 if ( !(l1e_get_flags(pte) & _PAGE_PRESENT) )
3346 goto bail;
3347 page = l1e_get_page(pte);
3349 /* We are looking only for read-only mappings of p.t. pages. */
3350 if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
3351 ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
3352 ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
3353 (page_get_owner(page) != d) )
3354 goto bail;
3356 ptwr_ctxt.ctxt.regs = regs;
3357 ptwr_ctxt.ctxt.addr_size = ptwr_ctxt.ctxt.sp_size =
3358 IS_COMPAT(d) ? 32 : BITS_PER_LONG;
3359 ptwr_ctxt.cr2 = addr;
3360 ptwr_ctxt.pte = pte;
3362 rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
3363 if ( rc == X86EMUL_UNHANDLEABLE )
3364 goto bail;
3366 UNLOCK_BIGLOCK(d);
3367 perfc_incrc(ptwr_emulations);
3368 return EXCRET_fault_fixed;
3370 bail:
3371 UNLOCK_BIGLOCK(d);
3372 return 0;
3375 int map_pages_to_xen(
3376 unsigned long virt,
3377 unsigned long mfn,
3378 unsigned long nr_mfns,
3379 unsigned long flags)
3381 l2_pgentry_t *pl2e, ol2e;
3382 l1_pgentry_t *pl1e, ol1e;
3383 unsigned int i;
3385 unsigned int map_small_pages = !!(flags & MAP_SMALL_PAGES);
3386 flags &= ~MAP_SMALL_PAGES;
3388 while ( nr_mfns != 0 )
3390 pl2e = virt_to_xen_l2e(virt);
3392 if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) &&
3393 (nr_mfns >= (1<<PAGETABLE_ORDER)) &&
3394 !map_small_pages )
3396 /* Super-page mapping. */
3397 ol2e = *pl2e;
3398 l2e_write(pl2e, l2e_from_pfn(mfn, flags|_PAGE_PSE));
3400 if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
3402 local_flush_tlb_pge();
3403 if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
3404 free_xen_pagetable(page_to_virt(l2e_get_page(ol2e)));
3407 virt += 1UL << L2_PAGETABLE_SHIFT;
3408 mfn += 1UL << PAGETABLE_ORDER;
3409 nr_mfns -= 1UL << PAGETABLE_ORDER;
3411 else
3413 /* Normal page mapping. */
3414 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
3416 pl1e = alloc_xen_pagetable();
3417 clear_page(pl1e);
3418 l2e_write(pl2e, l2e_from_page(virt_to_page(pl1e),
3419 __PAGE_HYPERVISOR));
3421 else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
3423 pl1e = alloc_xen_pagetable();
3424 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3425 l1e_write(&pl1e[i],
3426 l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
3427 l2e_get_flags(*pl2e) & ~_PAGE_PSE));
3428 l2e_write(pl2e, l2e_from_page(virt_to_page(pl1e),
3429 __PAGE_HYPERVISOR));
3430 local_flush_tlb_pge();
3433 pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
3434 ol1e = *pl1e;
3435 l1e_write(pl1e, l1e_from_pfn(mfn, flags));
3436 if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) )
3437 local_flush_tlb_one(virt);
3439 virt += 1UL << L1_PAGETABLE_SHIFT;
3440 mfn += 1UL;
3441 nr_mfns -= 1UL;
3445 return 0;
3448 void __set_fixmap(
3449 enum fixed_addresses idx, unsigned long mfn, unsigned long flags)
3451 BUG_ON(idx >= __end_of_fixed_addresses);
3452 map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags);
3455 #ifdef MEMORY_GUARD
3457 void memguard_init(void)
3459 map_pages_to_xen(
3460 PAGE_OFFSET, 0, xenheap_phys_end >> PAGE_SHIFT,
3461 __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
3464 static void __memguard_change_range(void *p, unsigned long l, int guard)
3466 unsigned long _p = (unsigned long)p;
3467 unsigned long _l = (unsigned long)l;
3468 unsigned long flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES;
3470 /* Ensure we are dealing with a page-aligned whole number of pages. */
3471 ASSERT((_p&PAGE_MASK) != 0);
3472 ASSERT((_l&PAGE_MASK) != 0);
3473 ASSERT((_p&~PAGE_MASK) == 0);
3474 ASSERT((_l&~PAGE_MASK) == 0);
3476 if ( guard )
3477 flags &= ~_PAGE_PRESENT;
3479 map_pages_to_xen(
3480 _p, virt_to_maddr(p) >> PAGE_SHIFT, _l >> PAGE_SHIFT, flags);
3483 void memguard_guard_range(void *p, unsigned long l)
3485 __memguard_change_range(p, l, 1);
3488 void memguard_unguard_range(void *p, unsigned long l)
3490 __memguard_change_range(p, l, 0);
3493 #endif
3495 void memguard_guard_stack(void *p)
3497 BUILD_BUG_ON((DEBUG_STACK_SIZE + PAGE_SIZE) > STACK_SIZE);
3498 p = (void *)((unsigned long)p + STACK_SIZE - DEBUG_STACK_SIZE - PAGE_SIZE);
3499 memguard_guard_range(p, PAGE_SIZE);
3502 /*
3503 * Local variables:
3504 * mode: C
3505 * c-set-style: "BSD"
3506 * c-basic-offset: 4
3507 * tab-width: 4
3508 * indent-tabs-mode: nil
3509 * End:
3510 */