ia64/xen-unstable

view xen/arch/x86/mm.c @ 14859:6a4c6d8a00f5

Fix map_pages_to_xen() to atomically switch between valid mappings.
This is needed to successfully boot a debug=y build of xen on kvm.
Reported by Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Sun Apr 15 21:56:38 2007 +0100 (2007-04-15)
parents 76f9a8e730ea
children 55d0a5c70986
line source
1 /******************************************************************************
2 * arch/x86/mm.c
3 *
4 * Copyright (c) 2002-2005 K A Fraser
5 * Copyright (c) 2004 Christian Limpach
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 /*
23 * A description of the x86 page table API:
24 *
25 * Domains trap to do_mmu_update with a list of update requests.
26 * This is a list of (ptr, val) pairs, where the requested operation
27 * is *ptr = val.
28 *
29 * Reference counting of pages:
30 * ----------------------------
31 * Each page has two refcounts: tot_count and type_count.
32 *
33 * TOT_COUNT is the obvious reference count. It counts all uses of a
34 * physical page frame by a domain, including uses as a page directory,
35 * a page table, or simple mappings via a PTE. This count prevents a
36 * domain from releasing a frame back to the free pool when it still holds
37 * a reference to it.
38 *
39 * TYPE_COUNT is more subtle. A frame can be put to one of three
40 * mutually-exclusive uses: it might be used as a page directory, or a
41 * page table, or it may be mapped writable by the domain [of course, a
42 * frame may not be used in any of these three ways!].
43 * So, type_count is a count of the number of times a frame is being
44 * referred to in its current incarnation. Therefore, a page can only
45 * change its type when its type count is zero.
46 *
47 * Pinning the page type:
48 * ----------------------
49 * The type of a page can be pinned/unpinned with the commands
50 * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
51 * pinning is not reference counted, so it can't be nested).
52 * This is useful to prevent a page's type count falling to zero, at which
53 * point safety checks would need to be carried out next time the count
54 * is increased again.
55 *
56 * A further note on writable page mappings:
57 * -----------------------------------------
58 * For simplicity, the count of writable mappings for a page may not
59 * correspond to reality. The 'writable count' is incremented for every
60 * PTE which maps the page with the _PAGE_RW flag set. However, for
61 * write access to be possible the page directory entry must also have
62 * its _PAGE_RW bit set. We do not check this as it complicates the
63 * reference counting considerably [consider the case of multiple
64 * directory entries referencing a single page table, some with the RW
65 * bit set, others not -- it starts getting a bit messy].
66 * In normal use, this simplification shouldn't be a problem.
67 * However, the logic can be added if required.
68 *
69 * One more note on read-only page mappings:
70 * -----------------------------------------
71 * We want domains to be able to map pages for read-only access. The
72 * main reason is that page tables and directories should be readable
73 * by a domain, but it would not be safe for them to be writable.
74 * However, domains have free access to rings 1 & 2 of the Intel
75 * privilege model. In terms of page protection, these are considered
76 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
77 * read-only restrictions are respected in supervisor mode -- if the
78 * bit is clear then any mapped page is writable.
79 *
80 * We get round this by always setting the WP bit and disallowing
81 * updates to it. This is very unlikely to cause a problem for guest
82 * OS's, which will generally use the WP bit to simplify copy-on-write
83 * implementation (in that case, OS wants a fault when it writes to
84 * an application-supplied buffer).
85 */
87 #include <xen/config.h>
88 #include <xen/init.h>
89 #include <xen/kernel.h>
90 #include <xen/lib.h>
91 #include <xen/mm.h>
92 #include <xen/domain.h>
93 #include <xen/sched.h>
94 #include <xen/errno.h>
95 #include <xen/perfc.h>
96 #include <xen/irq.h>
97 #include <xen/softirq.h>
98 #include <xen/domain_page.h>
99 #include <xen/event.h>
100 #include <xen/iocap.h>
101 #include <xen/guest_access.h>
102 #include <asm/paging.h>
103 #include <asm/shadow.h>
104 #include <asm/page.h>
105 #include <asm/flushtlb.h>
106 #include <asm/io.h>
107 #include <asm/ldt.h>
108 #include <asm/x86_emulate.h>
109 #include <asm/e820.h>
110 #include <asm/hypercall.h>
111 #include <asm/shared.h>
112 #include <public/memory.h>
114 #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
116 /*
117 * PTE updates can be done with ordinary writes except:
118 * 1. Debug builds get extra checking by using CMPXCHG[8B].
119 * 2. PAE builds perform an atomic 8-byte store with CMPXCHG8B.
120 */
121 #if !defined(NDEBUG) || defined(CONFIG_X86_PAE)
122 #define PTE_UPDATE_WITH_CMPXCHG
123 #endif
125 /* Used to defer flushing of memory structures. */
126 struct percpu_mm_info {
127 #define DOP_FLUSH_TLB (1<<0) /* Flush the local TLB. */
128 #define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */
129 #define DOP_RELOAD_LDT (1<<2) /* Reload the LDT shadow mapping. */
130 unsigned int deferred_ops;
131 /* If non-NULL, specifies a foreign subject domain for some operations. */
132 struct domain *foreign;
133 };
134 static DEFINE_PER_CPU(struct percpu_mm_info, percpu_mm_info);
136 /*
137 * Returns the current foreign domain; defaults to the currently-executing
138 * domain if a foreign override hasn't been specified.
139 */
140 #define FOREIGNDOM (this_cpu(percpu_mm_info).foreign ?: current->domain)
142 /* Private domain structs for DOMID_XEN and DOMID_IO. */
143 static struct domain *dom_xen, *dom_io;
145 /* Frame table and its size in pages. */
146 struct page_info *frame_table;
147 unsigned long max_page;
148 unsigned long total_pages;
150 #ifdef CONFIG_COMPAT
151 l2_pgentry_t *compat_idle_pg_table_l2 = NULL;
152 #define l3_disallow_mask(d) (!IS_COMPAT(d) ? \
153 L3_DISALLOW_MASK : \
154 COMPAT_L3_DISALLOW_MASK)
155 #else
156 #define l3_disallow_mask(d) L3_DISALLOW_MASK
157 #endif
159 static void queue_deferred_ops(struct domain *d, unsigned int ops)
160 {
161 ASSERT(d == current->domain);
162 this_cpu(percpu_mm_info).deferred_ops |= ops;
163 }
165 void __init init_frametable(void)
166 {
167 unsigned long nr_pages, page_step, i, mfn;
169 frame_table = (struct page_info *)FRAMETABLE_VIRT_START;
171 nr_pages = PFN_UP(max_page * sizeof(*frame_table));
172 page_step = (1 << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT;
174 for ( i = 0; i < nr_pages; i += page_step )
175 {
176 mfn = alloc_boot_pages(min(nr_pages - i, page_step), page_step);
177 if ( mfn == 0 )
178 panic("Not enough memory for frame table\n");
179 map_pages_to_xen(
180 FRAMETABLE_VIRT_START + (i << PAGE_SHIFT),
181 mfn, page_step, PAGE_HYPERVISOR);
182 }
184 memset(frame_table, 0, nr_pages << PAGE_SHIFT);
185 }
187 void arch_init_memory(void)
188 {
189 extern void subarch_init_memory(void);
191 unsigned long i, pfn, rstart_pfn, rend_pfn;
193 /*
194 * Initialise our DOMID_XEN domain.
195 * Any Xen-heap pages that we will allow to be mapped will have
196 * their domain field set to dom_xen.
197 */
198 dom_xen = alloc_domain(DOMID_XEN);
199 BUG_ON(dom_xen == NULL);
201 /*
202 * Initialise our DOMID_IO domain.
203 * This domain owns I/O pages that are within the range of the page_info
204 * array. Mappings occur at the priv of the caller.
205 */
206 dom_io = alloc_domain(DOMID_IO);
207 BUG_ON(dom_io == NULL);
209 /* First 1MB of RAM is historically marked as I/O. */
210 for ( i = 0; i < 0x100; i++ )
211 share_xen_page_with_guest(mfn_to_page(i), dom_io, XENSHARE_writable);
213 /* Any areas not specified as RAM by the e820 map are considered I/O. */
214 for ( i = 0, pfn = 0; i < e820.nr_map; i++ )
215 {
216 if ( e820.map[i].type != E820_RAM )
217 continue;
218 /* Every page from cursor to start of next RAM region is I/O. */
219 rstart_pfn = PFN_UP(e820.map[i].addr);
220 rend_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
221 for ( ; pfn < rstart_pfn; pfn++ )
222 {
223 BUG_ON(!mfn_valid(pfn));
224 share_xen_page_with_guest(
225 mfn_to_page(pfn), dom_io, XENSHARE_writable);
226 }
227 /* Skip the RAM region. */
228 pfn = rend_pfn;
229 }
230 BUG_ON(pfn != max_page);
232 subarch_init_memory();
233 }
235 int memory_is_conventional_ram(paddr_t p)
236 {
237 int i;
239 for ( i = 0; i < e820.nr_map; i++ )
240 {
241 if ( (e820.map[i].type == E820_RAM) &&
242 (e820.map[i].addr <= p) &&
243 (e820.map[i].size > p) )
244 return 1;
245 }
247 return 0;
248 }
250 unsigned long domain_get_maximum_gpfn(struct domain *d)
251 {
252 return is_hvm_domain(d) ? d->arch.p2m.max_mapped_pfn : arch_get_max_pfn(d);
253 }
255 void share_xen_page_with_guest(
256 struct page_info *page, struct domain *d, int readonly)
257 {
258 if ( page_get_owner(page) == d )
259 return;
261 set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
263 spin_lock(&d->page_alloc_lock);
265 /* The incremented type count pins as writable or read-only. */
266 page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page);
267 page->u.inuse.type_info |= PGT_validated | 1;
269 page_set_owner(page, d);
270 wmb(); /* install valid domain ptr before updating refcnt. */
271 ASSERT(page->count_info == 0);
273 /* Only add to the allocation list if the domain isn't dying. */
274 if ( !d->is_dying )
275 {
276 page->count_info |= PGC_allocated | 1;
277 if ( unlikely(d->xenheap_pages++ == 0) )
278 get_knownalive_domain(d);
279 list_add_tail(&page->list, &d->xenpage_list);
280 }
282 spin_unlock(&d->page_alloc_lock);
283 }
285 void share_xen_page_with_privileged_guests(
286 struct page_info *page, int readonly)
287 {
288 share_xen_page_with_guest(page, dom_xen, readonly);
289 }
291 #if defined(CONFIG_X86_PAE)
293 #ifdef NDEBUG
294 /* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
295 #define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000)
296 #else
297 /*
298 * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths.
299 * We cannot safely shadow the idle page table, nor shadow (v1) page tables
300 * (detected by lack of an owning domain). As required for correctness, we
301 * always shadow PDPTs above 4GB.
302 */
303 #define l3tab_needs_shadow(mfn) \
304 (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
305 (page_get_owner(mfn_to_page(mfn)) != NULL) && \
306 ((mfn) & 1)) || /* odd MFNs are shadowed */ \
307 ((mfn) >= 0x100000))
308 #endif
310 static l1_pgentry_t *fix_pae_highmem_pl1e;
312 /* Cache the address of PAE high-memory fixmap page tables. */
313 static int __init cache_pae_fixmap_address(void)
314 {
315 unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0);
316 l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base);
317 fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base);
318 return 0;
319 }
320 __initcall(cache_pae_fixmap_address);
322 static DEFINE_PER_CPU(u32, make_cr3_timestamp);
324 void make_cr3(struct vcpu *v, unsigned long mfn)
325 /* Takes the MFN of a PAE l3 table, copies the contents to below 4GB if
326 * necessary, and sets v->arch.cr3 to the value to load in CR3. */
327 {
328 l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
329 struct pae_l3_cache *cache = &v->arch.pae_l3_cache;
330 unsigned int cpu = smp_processor_id();
332 /* Fast path: does this mfn need a shadow at all? */
333 if ( !l3tab_needs_shadow(mfn) )
334 {
335 v->arch.cr3 = mfn << PAGE_SHIFT;
336 /* Cache is no longer in use or valid */
337 cache->high_mfn = 0;
338 return;
339 }
341 /* Caching logic is not interrupt safe. */
342 ASSERT(!in_irq());
344 /* Protects against pae_flush_pgd(). */
345 spin_lock(&cache->lock);
347 cache->inuse_idx ^= 1;
348 cache->high_mfn = mfn;
350 /* Map the guest L3 table and copy to the chosen low-memory cache. */
351 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
352 /* First check the previous high mapping can't be in the TLB.
353 * (i.e. have we loaded CR3 since we last did this?) */
354 if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) )
355 local_flush_tlb_one(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu));
356 highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
357 lowmem_l3tab = cache->table[cache->inuse_idx];
358 memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
359 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_empty());
360 this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);
362 v->arch.cr3 = __pa(lowmem_l3tab);
364 spin_unlock(&cache->lock);
365 }
367 #else /* !CONFIG_X86_PAE */
369 void make_cr3(struct vcpu *v, unsigned long mfn)
370 {
371 v->arch.cr3 = mfn << PAGE_SHIFT;
372 }
374 #endif /* !CONFIG_X86_PAE */
376 void write_ptbase(struct vcpu *v)
377 {
378 write_cr3(v->arch.cr3);
379 }
381 /* Should be called after CR3 is updated.
382 * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
383 *
384 * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
385 * for HVM guests, arch.monitor_table and hvm's guest CR3.
386 *
387 * Update ref counts to shadow tables appropriately.
388 */
389 void update_cr3(struct vcpu *v)
390 {
391 unsigned long cr3_mfn=0;
393 if ( paging_mode_enabled(v->domain) )
394 {
395 paging_update_cr3(v);
396 return;
397 }
399 #if CONFIG_PAGING_LEVELS == 4
400 if ( !(v->arch.flags & TF_kernel_mode) )
401 cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
402 else
403 #endif
404 cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
406 make_cr3(v, cr3_mfn);
407 }
410 void invalidate_shadow_ldt(struct vcpu *v)
411 {
412 int i;
413 unsigned long pfn;
414 struct page_info *page;
416 if ( v->arch.shadow_ldt_mapcnt == 0 )
417 return;
419 v->arch.shadow_ldt_mapcnt = 0;
421 for ( i = 16; i < 32; i++ )
422 {
423 pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]);
424 if ( pfn == 0 ) continue;
425 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
426 page = mfn_to_page(pfn);
427 ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
428 ASSERT_PAGE_IS_DOMAIN(page, v->domain);
429 put_page_and_type(page);
430 }
432 /* Dispose of the (now possibly invalid) mappings from the TLB. */
433 if ( v == current )
434 queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
435 else
436 flush_tlb_mask(v->domain->domain_dirty_cpumask);
437 }
440 static int alloc_segdesc_page(struct page_info *page)
441 {
442 struct desc_struct *descs;
443 int i;
445 descs = map_domain_page(page_to_mfn(page));
447 for ( i = 0; i < 512; i++ )
448 if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) )
449 goto fail;
451 unmap_domain_page(descs);
452 return 1;
454 fail:
455 unmap_domain_page(descs);
456 return 0;
457 }
460 /* Map shadow page at offset @off. */
461 int map_ldt_shadow_page(unsigned int off)
462 {
463 struct vcpu *v = current;
464 struct domain *d = v->domain;
465 unsigned long gmfn, mfn;
466 l1_pgentry_t l1e, nl1e;
467 unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
468 int okay;
470 BUG_ON(unlikely(in_irq()));
472 guest_get_eff_kern_l1e(v, gva, &l1e);
473 if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
474 return 0;
476 gmfn = l1e_get_pfn(l1e);
477 mfn = gmfn_to_mfn(d, gmfn);
478 if ( unlikely(!mfn_valid(mfn)) )
479 return 0;
481 okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
482 if ( unlikely(!okay) )
483 return 0;
485 nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
487 l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
488 v->arch.shadow_ldt_mapcnt++;
490 return 1;
491 }
494 static int get_page_from_pagenr(unsigned long page_nr, struct domain *d)
495 {
496 struct page_info *page = mfn_to_page(page_nr);
498 if ( unlikely(!mfn_valid(page_nr)) || unlikely(!get_page(page, d)) )
499 {
500 MEM_LOG("Could not get page ref for pfn %lx", page_nr);
501 return 0;
502 }
504 return 1;
505 }
508 static int get_page_and_type_from_pagenr(unsigned long page_nr,
509 unsigned long type,
510 struct domain *d)
511 {
512 struct page_info *page = mfn_to_page(page_nr);
514 if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
515 return 0;
517 if ( unlikely(!get_page_type(page, type)) )
518 {
519 put_page(page);
520 return 0;
521 }
523 return 1;
524 }
526 /*
527 * We allow root tables to map each other (a.k.a. linear page tables). It
528 * needs some special care with reference counts and access permissions:
529 * 1. The mapping entry must be read-only, or the guest may get write access
530 * to its own PTEs.
531 * 2. We must only bump the reference counts for an *already validated*
532 * L2 table, or we can end up in a deadlock in get_page_type() by waiting
533 * on a validation that is required to complete that validation.
534 * 3. We only need to increment the reference counts for the mapped page
535 * frame if it is mapped by a different root table. This is sufficient and
536 * also necessary to allow validation of a root table mapping itself.
537 */
538 #define define_get_linear_pagetable(level) \
539 static int \
540 get_##level##_linear_pagetable( \
541 level##_pgentry_t pde, unsigned long pde_pfn, struct domain *d) \
542 { \
543 unsigned long x, y; \
544 struct page_info *page; \
545 unsigned long pfn; \
546 \
547 if ( (level##e_get_flags(pde) & _PAGE_RW) ) \
548 { \
549 MEM_LOG("Attempt to create linear p.t. with write perms"); \
550 return 0; \
551 } \
552 \
553 if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \
554 { \
555 /* Make sure the mapped frame belongs to the correct domain. */ \
556 if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \
557 return 0; \
558 \
559 /* \
560 * Ensure that the mapped frame is an already-validated page table. \
561 * If so, atomically increment the count (checking for overflow). \
562 */ \
563 page = mfn_to_page(pfn); \
564 y = page->u.inuse.type_info; \
565 do { \
566 x = y; \
567 if ( unlikely((x & PGT_count_mask) == PGT_count_mask) || \
568 unlikely((x & (PGT_type_mask|PGT_validated)) != \
569 (PGT_##level##_page_table|PGT_validated)) ) \
570 { \
571 put_page(page); \
572 return 0; \
573 } \
574 } \
575 while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x ); \
576 } \
577 \
578 return 1; \
579 }
581 int
582 get_page_from_l1e(
583 l1_pgentry_t l1e, struct domain *d)
584 {
585 unsigned long mfn = l1e_get_pfn(l1e);
586 struct page_info *page = mfn_to_page(mfn);
587 int okay;
589 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
590 return 1;
592 if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
593 {
594 MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
595 return 0;
596 }
598 if ( unlikely(!mfn_valid(mfn)) ||
599 unlikely(page_get_owner(page) == dom_io) )
600 {
601 /* DOMID_IO reverts to caller for privilege checks. */
602 if ( d == dom_io )
603 d = current->domain;
605 if ( !iomem_access_permitted(d, mfn, mfn) )
606 {
607 if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */
608 MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx",
609 d->domain_id, mfn);
610 return 0;
611 }
613 /* No reference counting for out-of-range I/O pages. */
614 if ( !mfn_valid(mfn) )
615 return 1;
617 d = dom_io;
618 }
620 /* Foreign mappings into guests in shadow external mode don't
621 * contribute to writeable mapping refcounts. (This allows the
622 * qemu-dm helper process in dom0 to map the domain's memory without
623 * messing up the count of "real" writable mappings.) */
624 okay = (((l1e_get_flags(l1e) & _PAGE_RW) &&
625 !(unlikely(paging_mode_external(d) && (d != current->domain))))
626 ? get_page_and_type(page, d, PGT_writable_page)
627 : get_page(page, d));
628 if ( !okay )
629 {
630 MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
631 " for dom%d",
632 mfn, get_gpfn_from_mfn(mfn),
633 l1e_get_intpte(l1e), d->domain_id);
634 }
636 return okay;
637 }
640 /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
641 define_get_linear_pagetable(l2);
642 static int
643 get_page_from_l2e(
644 l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
645 {
646 int rc;
648 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
649 return 1;
651 if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
652 {
653 MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
654 return 0;
655 }
657 rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
658 if ( unlikely(!rc) )
659 rc = get_l2_linear_pagetable(l2e, pfn, d);
661 return rc;
662 }
665 #if CONFIG_PAGING_LEVELS >= 3
666 define_get_linear_pagetable(l3);
667 static int
668 get_page_from_l3e(
669 l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
670 {
671 int rc;
673 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
674 return 1;
676 if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
677 {
678 MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
679 return 0;
680 }
682 rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
683 if ( unlikely(!rc) )
684 rc = get_l3_linear_pagetable(l3e, pfn, d);
686 return rc;
687 }
688 #endif /* 3 level */
690 #if CONFIG_PAGING_LEVELS >= 4
691 define_get_linear_pagetable(l4);
692 static int
693 get_page_from_l4e(
694 l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
695 {
696 int rc;
698 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
699 return 1;
701 if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
702 {
703 MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
704 return 0;
705 }
707 rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
708 if ( unlikely(!rc) )
709 rc = get_l4_linear_pagetable(l4e, pfn, d);
711 return rc;
712 }
713 #endif /* 4 level */
715 #ifdef __x86_64__
717 #ifdef USER_MAPPINGS_ARE_GLOBAL
718 #define adjust_guest_l1e(pl1e, d) \
719 do { \
720 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
721 likely(!IS_COMPAT(d)) ) \
722 { \
723 /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \
724 if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \
725 == (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL) ) \
726 MEM_LOG("Global bit is set to kernel page %lx", \
727 l1e_get_pfn((pl1e))); \
728 if ( !(l1e_get_flags((pl1e)) & _PAGE_USER) ) \
729 l1e_add_flags((pl1e), (_PAGE_GUEST_KERNEL|_PAGE_USER)); \
730 if ( !(l1e_get_flags((pl1e)) & _PAGE_GUEST_KERNEL) ) \
731 l1e_add_flags((pl1e), (_PAGE_GLOBAL|_PAGE_USER)); \
732 } \
733 } while ( 0 )
734 #else
735 #define adjust_guest_l1e(pl1e, d) \
736 do { \
737 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
738 likely(!IS_COMPAT(d)) ) \
739 l1e_add_flags((pl1e), _PAGE_USER); \
740 } while ( 0 )
741 #endif
743 #define adjust_guest_l2e(pl2e, d) \
744 do { \
745 if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \
746 likely(!IS_COMPAT(d)) ) \
747 l2e_add_flags((pl2e), _PAGE_USER); \
748 } while ( 0 )
750 #define adjust_guest_l3e(pl3e, d) \
751 do { \
752 if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
753 l3e_add_flags((pl3e), likely(!IS_COMPAT(d)) ? \
754 _PAGE_USER : \
755 _PAGE_USER|_PAGE_RW); \
756 } while ( 0 )
758 #define adjust_guest_l4e(pl4e, d) \
759 do { \
760 if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) && \
761 likely(!IS_COMPAT(d)) ) \
762 l4e_add_flags((pl4e), _PAGE_USER); \
763 } while ( 0 )
765 #else /* !defined(__x86_64__) */
767 #define adjust_guest_l1e(_p, _d) ((void)(_d))
768 #define adjust_guest_l2e(_p, _d) ((void)(_d))
769 #define adjust_guest_l3e(_p, _d) ((void)(_d))
771 #endif
773 #ifdef CONFIG_COMPAT
774 #define unadjust_guest_l3e(pl3e, d) \
775 do { \
776 if ( unlikely(IS_COMPAT(d)) && \
777 likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
778 l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \
779 } while ( 0 )
780 #else
781 #define unadjust_guest_l3e(_p, _d) ((void)(_d))
782 #endif
784 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
785 {
786 unsigned long pfn = l1e_get_pfn(l1e);
787 struct page_info *page = mfn_to_page(pfn);
788 struct domain *e;
789 struct vcpu *v;
791 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(pfn) )
792 return;
794 e = page_get_owner(page);
796 /*
797 * Check if this is a mapping that was established via a grant reference.
798 * If it was then we should not be here: we require that such mappings are
799 * explicitly destroyed via the grant-table interface.
800 *
801 * The upshot of this is that the guest can end up with active grants that
802 * it cannot destroy (because it no longer has a PTE to present to the
803 * grant-table interface). This can lead to subtle hard-to-catch bugs,
804 * hence a special grant PTE flag can be enabled to catch the bug early.
805 *
806 * (Note that the undestroyable active grants are not a security hole in
807 * Xen. All active grants can safely be cleaned up when the domain dies.)
808 */
809 if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
810 !d->is_shutting_down && !d->is_dying )
811 {
812 MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
813 l1e_get_intpte(l1e));
814 domain_crash(d);
815 }
817 /* Remember we didn't take a type-count of foreign writable mappings
818 * to paging-external domains */
819 if ( (l1e_get_flags(l1e) & _PAGE_RW) &&
820 !(unlikely((e != d) && paging_mode_external(e))) )
821 {
822 put_page_and_type(page);
823 }
824 else
825 {
826 /* We expect this is rare so we blow the entire shadow LDT. */
827 if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
828 PGT_ldt_page)) &&
829 unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) &&
830 (d == e) )
831 {
832 for_each_vcpu ( d, v )
833 invalidate_shadow_ldt(v);
834 }
835 put_page(page);
836 }
837 }
840 /*
841 * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
842 * Note also that this automatically deals correctly with linear p.t.'s.
843 */
844 static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
845 {
846 if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
847 (l2e_get_pfn(l2e) != pfn) )
848 put_page_and_type(l2e_get_page(l2e));
849 }
852 #if CONFIG_PAGING_LEVELS >= 3
853 static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
854 {
855 if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
856 (l3e_get_pfn(l3e) != pfn) )
857 put_page_and_type(l3e_get_page(l3e));
858 }
859 #endif
861 #if CONFIG_PAGING_LEVELS >= 4
862 static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
863 {
864 if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
865 (l4e_get_pfn(l4e) != pfn) )
866 put_page_and_type(l4e_get_page(l4e));
867 }
868 #endif
870 static int alloc_l1_table(struct page_info *page)
871 {
872 struct domain *d = page_get_owner(page);
873 unsigned long pfn = page_to_mfn(page);
874 l1_pgentry_t *pl1e;
875 int i;
877 pl1e = map_domain_page(pfn);
879 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
880 {
881 if ( is_guest_l1_slot(i) &&
882 unlikely(!get_page_from_l1e(pl1e[i], d)) )
883 goto fail;
885 adjust_guest_l1e(pl1e[i], d);
886 }
888 unmap_domain_page(pl1e);
889 return 1;
891 fail:
892 MEM_LOG("Failure in alloc_l1_table: entry %d", i);
893 while ( i-- > 0 )
894 if ( is_guest_l1_slot(i) )
895 put_page_from_l1e(pl1e[i], d);
897 unmap_domain_page(pl1e);
898 return 0;
899 }
901 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
902 static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
903 {
904 struct page_info *page;
905 l2_pgentry_t *pl2e;
906 l3_pgentry_t l3e3;
907 #ifndef CONFIG_COMPAT
908 l2_pgentry_t l2e;
909 int i;
910 #else
912 if ( !IS_COMPAT(d) )
913 return 1;
914 #endif
916 pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
918 /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */
919 l3e3 = pl3e[3];
920 if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) )
921 {
922 MEM_LOG("PAE L3 3rd slot is empty");
923 return 0;
924 }
926 /*
927 * The Xen-private mappings include linear mappings. The L2 thus cannot
928 * be shared by multiple L3 tables. The test here is adequate because:
929 * 1. Cannot appear in slots != 3 because get_page_type() checks the
930 * PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3
931 * 2. Cannot appear in another page table's L3:
932 * a. alloc_l3_table() calls this function and this check will fail
933 * b. mod_l3_entry() disallows updates to slot 3 in an existing table
934 */
935 page = l3e_get_page(l3e3);
936 BUG_ON(page->u.inuse.type_info & PGT_pinned);
937 BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
938 BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2));
939 if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
940 {
941 MEM_LOG("PAE L3 3rd slot is shared");
942 return 0;
943 }
945 /* Xen private mappings. */
946 pl2e = map_domain_page(l3e_get_pfn(l3e3));
947 #ifndef CONFIG_COMPAT
948 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
949 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
950 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
951 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
952 {
953 l2e = l2e_from_page(
954 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
955 __PAGE_HYPERVISOR);
956 l2e_write(&pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i], l2e);
957 }
958 for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
959 {
960 l2e = l2e_empty();
961 if ( l3e_get_flags(pl3e[i]) & _PAGE_PRESENT )
962 l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR);
963 l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e);
964 }
965 #else
966 memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
967 &compat_idle_pg_table_l2[
968 l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
969 COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e));
970 #endif
971 unmap_domain_page(pl2e);
973 return 1;
974 }
975 #else
976 # define create_pae_xen_mappings(d, pl3e) (1)
977 #endif
979 #ifdef CONFIG_X86_PAE
980 /* Flush a pgdir update into low-memory caches. */
981 static void pae_flush_pgd(
982 unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
983 {
984 struct domain *d = page_get_owner(mfn_to_page(mfn));
985 struct vcpu *v;
986 intpte_t _ol3e, _nl3e, _pl3e;
987 l3_pgentry_t *l3tab_ptr;
988 struct pae_l3_cache *cache;
990 if ( unlikely(shadow_mode_enabled(d)) )
991 {
992 cpumask_t m = CPU_MASK_NONE;
993 /* Re-shadow this l3 table on any vcpus that are using it */
994 for_each_vcpu ( d, v )
995 if ( pagetable_get_pfn(v->arch.guest_table) == mfn )
996 {
997 paging_update_cr3(v);
998 cpus_or(m, m, v->vcpu_dirty_cpumask);
999 }
1000 flush_tlb_mask(m);
1003 /* If below 4GB then the pgdir is not shadowed in low memory. */
1004 if ( !l3tab_needs_shadow(mfn) )
1005 return;
1007 for_each_vcpu ( d, v )
1009 cache = &v->arch.pae_l3_cache;
1011 spin_lock(&cache->lock);
1013 if ( cache->high_mfn == mfn )
1015 l3tab_ptr = &cache->table[cache->inuse_idx][idx];
1016 _ol3e = l3e_get_intpte(*l3tab_ptr);
1017 _nl3e = l3e_get_intpte(nl3e);
1018 _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
1019 BUG_ON(_pl3e != _ol3e);
1022 spin_unlock(&cache->lock);
1025 flush_tlb_mask(d->domain_dirty_cpumask);
1027 #else
1028 # define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
1029 #endif
1031 static int alloc_l2_table(struct page_info *page, unsigned long type)
1033 struct domain *d = page_get_owner(page);
1034 unsigned long pfn = page_to_mfn(page);
1035 l2_pgentry_t *pl2e;
1036 int i;
1038 pl2e = map_domain_page(pfn);
1040 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1042 if ( is_guest_l2_slot(d, type, i) &&
1043 unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
1044 goto fail;
1046 adjust_guest_l2e(pl2e[i], d);
1049 #if CONFIG_PAGING_LEVELS == 2
1050 /* Xen private mappings. */
1051 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
1052 &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
1053 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
1054 pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1055 l2e_from_pfn(pfn, __PAGE_HYPERVISOR);
1056 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
1057 pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
1058 l2e_from_page(
1059 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
1060 __PAGE_HYPERVISOR);
1061 #endif
1063 unmap_domain_page(pl2e);
1064 return 1;
1066 fail:
1067 MEM_LOG("Failure in alloc_l2_table: entry %d", i);
1068 while ( i-- > 0 )
1069 if ( is_guest_l2_slot(d, type, i) )
1070 put_page_from_l2e(pl2e[i], pfn);
1072 unmap_domain_page(pl2e);
1073 return 0;
1077 #if CONFIG_PAGING_LEVELS >= 3
1078 static int alloc_l3_table(struct page_info *page)
1080 struct domain *d = page_get_owner(page);
1081 unsigned long pfn = page_to_mfn(page);
1082 l3_pgentry_t *pl3e;
1083 int i;
1085 #ifdef CONFIG_X86_PAE
1086 /*
1087 * PAE pgdirs above 4GB are unacceptable if the guest does not understand
1088 * the weird 'extended cr3' format for dealing with high-order address
1089 * bits. We cut some slack for control tools (before vcpu0 is initialised).
1090 */
1091 if ( (pfn >= 0x100000) &&
1092 unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) &&
1093 d->vcpu[0] && d->vcpu[0]->is_initialised )
1095 MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
1096 return 0;
1098 #endif
1100 pl3e = map_domain_page(pfn);
1102 /*
1103 * PAE guests allocate full pages, but aren't required to initialize
1104 * more than the first four entries; when running in compatibility
1105 * mode, however, the full page is visible to the MMU, and hence all
1106 * 512 entries must be valid/verified, which is most easily achieved
1107 * by clearing them out.
1108 */
1109 if ( IS_COMPAT(d) )
1110 memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
1112 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1114 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
1115 if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) && i == 3 )
1117 if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
1118 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
1119 !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
1120 PGT_l2_page_table |
1121 PGT_pae_xen_l2,
1122 d) )
1123 goto fail;
1125 else
1126 #endif
1127 if ( is_guest_l3_slot(i) &&
1128 unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
1129 goto fail;
1131 adjust_guest_l3e(pl3e[i], d);
1134 if ( !create_pae_xen_mappings(d, pl3e) )
1135 goto fail;
1137 unmap_domain_page(pl3e);
1138 return 1;
1140 fail:
1141 MEM_LOG("Failure in alloc_l3_table: entry %d", i);
1142 while ( i-- > 0 )
1143 if ( is_guest_l3_slot(i) )
1144 put_page_from_l3e(pl3e[i], pfn);
1146 unmap_domain_page(pl3e);
1147 return 0;
1149 #else
1150 #define alloc_l3_table(page) (0)
1151 #endif
1153 #if CONFIG_PAGING_LEVELS >= 4
1154 static int alloc_l4_table(struct page_info *page)
1156 struct domain *d = page_get_owner(page);
1157 unsigned long pfn = page_to_mfn(page);
1158 l4_pgentry_t *pl4e = page_to_virt(page);
1159 int i;
1161 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
1163 if ( is_guest_l4_slot(d, i) &&
1164 unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
1165 goto fail;
1167 adjust_guest_l4e(pl4e[i], d);
1170 /* Xen private mappings. */
1171 memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1172 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1173 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
1174 pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
1175 l4e_from_pfn(pfn, __PAGE_HYPERVISOR);
1176 pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
1177 l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
1178 __PAGE_HYPERVISOR);
1179 if ( IS_COMPAT(d) )
1180 pl4e[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
1181 l4e_from_page(virt_to_page(d->arch.mm_arg_xlat_l3),
1182 __PAGE_HYPERVISOR);
1184 return 1;
1186 fail:
1187 MEM_LOG("Failure in alloc_l4_table: entry %d", i);
1188 while ( i-- > 0 )
1189 if ( is_guest_l4_slot(d, i) )
1190 put_page_from_l4e(pl4e[i], pfn);
1192 return 0;
1194 #else
1195 #define alloc_l4_table(page) (0)
1196 #endif
1199 static void free_l1_table(struct page_info *page)
1201 struct domain *d = page_get_owner(page);
1202 unsigned long pfn = page_to_mfn(page);
1203 l1_pgentry_t *pl1e;
1204 int i;
1206 pl1e = map_domain_page(pfn);
1208 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
1209 if ( is_guest_l1_slot(i) )
1210 put_page_from_l1e(pl1e[i], d);
1212 unmap_domain_page(pl1e);
1216 static void free_l2_table(struct page_info *page)
1218 #ifdef CONFIG_COMPAT
1219 struct domain *d = page_get_owner(page);
1220 #endif
1221 unsigned long pfn = page_to_mfn(page);
1222 l2_pgentry_t *pl2e;
1223 int i;
1225 pl2e = map_domain_page(pfn);
1227 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1228 if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
1229 put_page_from_l2e(pl2e[i], pfn);
1231 unmap_domain_page(pl2e);
1233 page->u.inuse.type_info &= ~PGT_pae_xen_l2;
1237 #if CONFIG_PAGING_LEVELS >= 3
1239 static void free_l3_table(struct page_info *page)
1241 struct domain *d = page_get_owner(page);
1242 unsigned long pfn = page_to_mfn(page);
1243 l3_pgentry_t *pl3e;
1244 int i;
1246 pl3e = map_domain_page(pfn);
1248 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1249 if ( is_guest_l3_slot(i) )
1251 put_page_from_l3e(pl3e[i], pfn);
1252 unadjust_guest_l3e(pl3e[i], d);
1255 unmap_domain_page(pl3e);
1258 #endif
1260 #if CONFIG_PAGING_LEVELS >= 4
1262 static void free_l4_table(struct page_info *page)
1264 struct domain *d = page_get_owner(page);
1265 unsigned long pfn = page_to_mfn(page);
1266 l4_pgentry_t *pl4e = page_to_virt(page);
1267 int i;
1269 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
1270 if ( is_guest_l4_slot(d, i) )
1271 put_page_from_l4e(pl4e[i], pfn);
1274 #endif
1277 /* How to write an entry to the guest pagetables.
1278 * Returns 0 for failure (pointer not valid), 1 for success. */
1279 static inline int update_intpte(intpte_t *p,
1280 intpte_t old,
1281 intpte_t new,
1282 unsigned long mfn,
1283 struct vcpu *v)
1285 int rv = 1;
1286 #ifndef PTE_UPDATE_WITH_CMPXCHG
1287 rv = paging_write_guest_entry(v, p, new, _mfn(mfn));
1288 #else
1290 intpte_t t = old;
1291 for ( ; ; )
1293 rv = paging_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn));
1294 if ( unlikely(rv == 0) )
1296 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
1297 ": saw %" PRIpte, old, new, t);
1298 break;
1301 if ( t == old )
1302 break;
1304 /* Allowed to change in Accessed/Dirty flags only. */
1305 BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY));
1307 old = t;
1310 #endif
1311 return rv;
1314 /* Macro that wraps the appropriate type-changes around update_intpte().
1315 * Arguments are: type, ptr, old, new, mfn, vcpu */
1316 #define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v) \
1317 update_intpte((intpte_t *)(_p), \
1318 _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \
1319 (_m), (_v))
1321 /* Update the L1 entry at pl1e to new value nl1e. */
1322 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
1323 unsigned long gl1mfn)
1325 l1_pgentry_t ol1e;
1326 struct domain *d = current->domain;
1328 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
1329 return 0;
1331 if ( unlikely(paging_mode_refcounts(d)) )
1332 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
1334 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
1336 /* Translate foreign guest addresses. */
1337 nl1e = l1e_from_pfn(gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e)),
1338 l1e_get_flags(nl1e));
1340 if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
1342 MEM_LOG("Bad L1 flags %x",
1343 l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
1344 return 0;
1347 adjust_guest_l1e(nl1e, d);
1349 /* Fast path for identical mapping, r/w and presence. */
1350 if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
1351 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
1353 if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
1354 return 0;
1356 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
1358 put_page_from_l1e(nl1e, d);
1359 return 0;
1362 else
1364 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
1365 return 0;
1368 put_page_from_l1e(ol1e, d);
1369 return 1;
1373 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
1374 static int mod_l2_entry(l2_pgentry_t *pl2e,
1375 l2_pgentry_t nl2e,
1376 unsigned long pfn,
1377 unsigned long type)
1379 l2_pgentry_t ol2e;
1380 struct domain *d = current->domain;
1382 if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
1384 MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
1385 return 0;
1388 if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
1389 return 0;
1391 if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
1393 if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
1395 MEM_LOG("Bad L2 flags %x",
1396 l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
1397 return 0;
1400 adjust_guest_l2e(nl2e, d);
1402 /* Fast path for identical mapping and presence. */
1403 if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
1404 return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current);
1406 if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
1407 return 0;
1409 if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
1411 put_page_from_l2e(nl2e, pfn);
1412 return 0;
1415 else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
1417 return 0;
1420 put_page_from_l2e(ol2e, pfn);
1421 return 1;
1424 #if CONFIG_PAGING_LEVELS >= 3
1426 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
1427 static int mod_l3_entry(l3_pgentry_t *pl3e,
1428 l3_pgentry_t nl3e,
1429 unsigned long pfn)
1431 l3_pgentry_t ol3e;
1432 struct domain *d = current->domain;
1433 int okay;
1435 if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
1437 MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
1438 return 0;
1441 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
1442 /*
1443 * Disallow updates to final L3 slot. It contains Xen mappings, and it
1444 * would be a pain to ensure they remain continuously valid throughout.
1445 */
1446 if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) &&
1447 pgentry_ptr_to_slot(pl3e) >= 3 )
1448 return 0;
1449 #endif
1451 if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
1452 return 0;
1454 if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
1456 if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
1458 MEM_LOG("Bad L3 flags %x",
1459 l3e_get_flags(nl3e) & l3_disallow_mask(d));
1460 return 0;
1463 adjust_guest_l3e(nl3e, d);
1465 /* Fast path for identical mapping and presence. */
1466 if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
1467 return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current);
1469 if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
1470 return 0;
1472 if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
1474 put_page_from_l3e(nl3e, pfn);
1475 return 0;
1478 else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
1480 return 0;
1483 okay = create_pae_xen_mappings(d, pl3e);
1484 BUG_ON(!okay);
1486 pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
1488 put_page_from_l3e(ol3e, pfn);
1489 return 1;
1492 #endif
1494 #if CONFIG_PAGING_LEVELS >= 4
1496 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
1497 static int mod_l4_entry(struct domain *d,
1498 l4_pgentry_t *pl4e,
1499 l4_pgentry_t nl4e,
1500 unsigned long pfn)
1502 l4_pgentry_t ol4e;
1504 if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
1506 MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
1507 return 0;
1510 if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
1511 return 0;
1513 if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
1515 if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
1517 MEM_LOG("Bad L4 flags %x",
1518 l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
1519 return 0;
1522 adjust_guest_l4e(nl4e, current->domain);
1524 /* Fast path for identical mapping and presence. */
1525 if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
1526 return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current);
1528 if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
1529 return 0;
1531 if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
1533 put_page_from_l4e(nl4e, pfn);
1534 return 0;
1537 else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
1539 return 0;
1542 put_page_from_l4e(ol4e, pfn);
1543 return 1;
1546 #endif
1548 int alloc_page_type(struct page_info *page, unsigned long type)
1550 struct domain *owner = page_get_owner(page);
1552 /* A page table is dirtied when its type count becomes non-zero. */
1553 if ( likely(owner != NULL) )
1554 mark_dirty(owner, page_to_mfn(page));
1556 switch ( type & PGT_type_mask )
1558 case PGT_l1_page_table:
1559 return alloc_l1_table(page);
1560 case PGT_l2_page_table:
1561 return alloc_l2_table(page, type);
1562 case PGT_l3_page_table:
1563 return alloc_l3_table(page);
1564 case PGT_l4_page_table:
1565 return alloc_l4_table(page);
1566 case PGT_gdt_page:
1567 case PGT_ldt_page:
1568 return alloc_segdesc_page(page);
1569 default:
1570 printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
1571 type, page->u.inuse.type_info,
1572 page->count_info);
1573 BUG();
1576 return 0;
1580 void free_page_type(struct page_info *page, unsigned long type)
1582 struct domain *owner = page_get_owner(page);
1583 unsigned long gmfn;
1585 if ( likely(owner != NULL) )
1587 /*
1588 * We have to flush before the next use of the linear mapping
1589 * (e.g., update_va_mapping()) or we could end up modifying a page
1590 * that is no longer a page table (and hence screw up ref counts).
1591 */
1592 if ( current->domain == owner )
1593 queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS);
1594 else
1595 flush_tlb_mask(owner->domain_dirty_cpumask);
1597 if ( unlikely(paging_mode_enabled(owner)) )
1599 /* A page table is dirtied when its type count becomes zero. */
1600 mark_dirty(owner, page_to_mfn(page));
1602 if ( shadow_mode_refcounts(owner) )
1603 return;
1605 gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
1606 ASSERT(VALID_M2P(gmfn));
1607 shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
1611 switch ( type & PGT_type_mask )
1613 case PGT_l1_page_table:
1614 free_l1_table(page);
1615 break;
1617 case PGT_l2_page_table:
1618 free_l2_table(page);
1619 break;
1621 #if CONFIG_PAGING_LEVELS >= 3
1622 case PGT_l3_page_table:
1623 free_l3_table(page);
1624 break;
1625 #endif
1627 #if CONFIG_PAGING_LEVELS >= 4
1628 case PGT_l4_page_table:
1629 free_l4_table(page);
1630 break;
1631 #endif
1633 default:
1634 printk("%s: type %lx pfn %lx\n",__FUNCTION__,
1635 type, page_to_mfn(page));
1636 BUG();
1641 void put_page_type(struct page_info *page)
1643 unsigned long nx, x, y = page->u.inuse.type_info;
1645 again:
1646 do {
1647 x = y;
1648 nx = x - 1;
1650 ASSERT((x & PGT_count_mask) != 0);
1652 if ( unlikely((nx & PGT_count_mask) == 0) )
1654 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
1655 likely(nx & PGT_validated) )
1657 /*
1658 * Page-table pages must be unvalidated when count is zero. The
1659 * 'free' is safe because the refcnt is non-zero and validated
1660 * bit is clear => other ops will spin or fail.
1661 */
1662 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
1663 x & ~PGT_validated)) != x) )
1664 goto again;
1665 /* We cleared the 'valid bit' so we do the clean up. */
1666 free_page_type(page, x);
1667 /* Carry on, but with the 'valid bit' now clear. */
1668 x &= ~PGT_validated;
1669 nx &= ~PGT_validated;
1672 /*
1673 * Record TLB information for flush later. We do not stamp page
1674 * tables when running in shadow mode:
1675 * 1. Pointless, since it's the shadow pt's which must be tracked.
1676 * 2. Shadow mode reuses this field for shadowed page tables to
1677 * store flags info -- we don't want to conflict with that.
1678 */
1679 if ( !(shadow_mode_enabled(page_get_owner(page)) &&
1680 (page->count_info & PGC_page_table)) )
1681 page->tlbflush_timestamp = tlbflush_current_time();
1684 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1688 int get_page_type(struct page_info *page, unsigned long type)
1690 unsigned long nx, x, y = page->u.inuse.type_info;
1692 ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
1694 again:
1695 do {
1696 x = y;
1697 nx = x + 1;
1698 if ( unlikely((nx & PGT_count_mask) == 0) )
1700 MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
1701 return 0;
1703 else if ( unlikely((x & PGT_count_mask) == 0) )
1705 struct domain *d = page_get_owner(page);
1707 /* Never allow a shadowed frame to go from type count 0 to 1 */
1708 if ( d && shadow_mode_enabled(d) )
1709 shadow_remove_all_shadows(d->vcpu[0], _mfn(page_to_mfn(page)));
1711 ASSERT(!(x & PGT_pae_xen_l2));
1712 if ( (x & PGT_type_mask) != type )
1714 /*
1715 * On type change we check to flush stale TLB entries. This
1716 * may be unnecessary (e.g., page was GDT/LDT) but those
1717 * circumstances should be very rare.
1718 */
1719 cpumask_t mask = d->domain_dirty_cpumask;
1721 /* Don't flush if the timestamp is old enough */
1722 tlbflush_filter(mask, page->tlbflush_timestamp);
1724 if ( unlikely(!cpus_empty(mask)) &&
1725 /* Shadow mode: track only writable pages. */
1726 (!shadow_mode_enabled(page_get_owner(page)) ||
1727 ((nx & PGT_type_mask) == PGT_writable_page)) )
1729 perfc_incr(need_flush_tlb_flush);
1730 flush_tlb_mask(mask);
1733 /* We lose existing type, back pointer, and validity. */
1734 nx &= ~(PGT_type_mask | PGT_validated);
1735 nx |= type;
1737 /* No special validation needed for writable pages. */
1738 /* Page tables and GDT/LDT need to be scanned for validity. */
1739 if ( type == PGT_writable_page )
1740 nx |= PGT_validated;
1743 else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
1745 if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
1746 (type != PGT_l1_page_table) )
1747 MEM_LOG("Bad type (saw %" PRtype_info
1748 " != exp %" PRtype_info ") "
1749 "for mfn %lx (pfn %lx)",
1750 x, type, page_to_mfn(page),
1751 get_gpfn_from_mfn(page_to_mfn(page)));
1752 return 0;
1754 else if ( unlikely(!(x & PGT_validated)) )
1756 /* Someone else is updating validation of this page. Wait... */
1757 while ( (y = page->u.inuse.type_info) == x )
1758 cpu_relax();
1759 goto again;
1762 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1764 if ( unlikely(!(nx & PGT_validated)) )
1766 /* Try to validate page type; drop the new reference on failure. */
1767 if ( unlikely(!alloc_page_type(page, type)) )
1769 MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
1770 PRtype_info ": caf=%08x taf=%" PRtype_info,
1771 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
1772 type, page->count_info, page->u.inuse.type_info);
1773 /* Noone else can get a reference. We hold the only ref. */
1774 page->u.inuse.type_info = 0;
1775 return 0;
1778 /* Noone else is updating simultaneously. */
1779 __set_bit(_PGT_validated, &page->u.inuse.type_info);
1782 return 1;
1786 int new_guest_cr3(unsigned long mfn)
1788 struct vcpu *v = current;
1789 struct domain *d = v->domain;
1790 int okay;
1791 unsigned long old_base_mfn;
1793 #ifdef CONFIG_COMPAT
1794 if ( IS_COMPAT(d) )
1796 okay = paging_mode_refcounts(d)
1797 ? 0 /* Old code was broken, but what should it be? */
1798 : mod_l4_entry(
1799 d,
1800 __va(pagetable_get_paddr(v->arch.guest_table)),
1801 l4e_from_pfn(
1802 mfn,
1803 (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
1804 pagetable_get_pfn(v->arch.guest_table));
1805 if ( unlikely(!okay) )
1807 MEM_LOG("Error while installing new compat baseptr %lx", mfn);
1808 return 0;
1811 invalidate_shadow_ldt(v);
1812 write_ptbase(v);
1814 return 1;
1816 #endif
1817 okay = paging_mode_refcounts(d)
1818 ? get_page_from_pagenr(mfn, d)
1819 : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
1820 if ( unlikely(!okay) )
1822 MEM_LOG("Error while installing new baseptr %lx", mfn);
1823 return 0;
1826 invalidate_shadow_ldt(v);
1828 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1830 v->arch.guest_table = pagetable_from_pfn(mfn);
1831 update_cr3(v);
1833 write_ptbase(v);
1835 if ( likely(old_base_mfn != 0) )
1837 if ( paging_mode_refcounts(d) )
1838 put_page(mfn_to_page(old_base_mfn));
1839 else
1840 put_page_and_type(mfn_to_page(old_base_mfn));
1843 return 1;
1846 static void process_deferred_ops(void)
1848 unsigned int deferred_ops;
1849 struct domain *d = current->domain;
1850 struct percpu_mm_info *info = &this_cpu(percpu_mm_info);
1852 deferred_ops = info->deferred_ops;
1853 info->deferred_ops = 0;
1855 if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) )
1857 if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
1858 flush_tlb_mask(d->domain_dirty_cpumask);
1859 else
1860 local_flush_tlb();
1863 if ( deferred_ops & DOP_RELOAD_LDT )
1864 (void)map_ldt_shadow_page(0);
1866 if ( unlikely(info->foreign != NULL) )
1868 rcu_unlock_domain(info->foreign);
1869 info->foreign = NULL;
1873 static int set_foreigndom(domid_t domid)
1875 struct domain *e, *d = current->domain;
1876 struct percpu_mm_info *info = &this_cpu(percpu_mm_info);
1877 int okay = 1;
1879 ASSERT(info->foreign == NULL);
1881 if ( likely(domid == DOMID_SELF) )
1882 goto out;
1884 if ( unlikely(domid == d->domain_id) )
1886 MEM_LOG("Dom %u tried to specify itself as foreign domain",
1887 d->domain_id);
1888 okay = 0;
1890 else if ( unlikely(paging_mode_translate(d)) )
1892 MEM_LOG("Cannot mix foreign mappings with translated domains");
1893 okay = 0;
1895 else if ( !IS_PRIV(d) )
1897 switch ( domid )
1899 case DOMID_IO:
1900 info->foreign = rcu_lock_domain(dom_io);
1901 break;
1902 default:
1903 MEM_LOG("Dom %u cannot set foreign dom", d->domain_id);
1904 okay = 0;
1905 break;
1908 else
1910 info->foreign = e = rcu_lock_domain_by_id(domid);
1911 if ( e == NULL )
1913 switch ( domid )
1915 case DOMID_XEN:
1916 info->foreign = rcu_lock_domain(dom_xen);
1917 break;
1918 case DOMID_IO:
1919 info->foreign = rcu_lock_domain(dom_io);
1920 break;
1921 default:
1922 MEM_LOG("Unknown domain '%u'", domid);
1923 okay = 0;
1924 break;
1929 out:
1930 return okay;
1933 static inline cpumask_t vcpumask_to_pcpumask(
1934 struct domain *d, unsigned long vmask)
1936 unsigned int vcpu_id;
1937 cpumask_t pmask = CPU_MASK_NONE;
1938 struct vcpu *v;
1940 while ( vmask != 0 )
1942 vcpu_id = find_first_set_bit(vmask);
1943 vmask &= ~(1UL << vcpu_id);
1944 if ( (vcpu_id < MAX_VIRT_CPUS) &&
1945 ((v = d->vcpu[vcpu_id]) != NULL) )
1946 cpus_or(pmask, pmask, v->vcpu_dirty_cpumask);
1949 return pmask;
1952 int do_mmuext_op(
1953 XEN_GUEST_HANDLE(mmuext_op_t) uops,
1954 unsigned int count,
1955 XEN_GUEST_HANDLE(uint) pdone,
1956 unsigned int foreigndom)
1958 struct mmuext_op op;
1959 int rc = 0, i = 0, okay;
1960 unsigned long mfn = 0, gmfn = 0, type;
1961 unsigned int done = 0;
1962 struct page_info *page;
1963 struct vcpu *v = current;
1964 struct domain *d = v->domain;
1966 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
1968 count &= ~MMU_UPDATE_PREEMPTED;
1969 if ( unlikely(!guest_handle_is_null(pdone)) )
1970 (void)copy_from_guest(&done, pdone, 1);
1972 else
1973 perfc_incr(calls_to_mmuext_op);
1975 if ( unlikely(!guest_handle_okay(uops, count)) )
1977 rc = -EFAULT;
1978 goto out;
1981 if ( !set_foreigndom(foreigndom) )
1983 rc = -ESRCH;
1984 goto out;
1987 LOCK_BIGLOCK(d);
1989 for ( i = 0; i < count; i++ )
1991 if ( hypercall_preempt_check() )
1993 rc = hypercall_create_continuation(
1994 __HYPERVISOR_mmuext_op, "hihi",
1995 uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
1996 break;
1999 if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) )
2001 MEM_LOG("Bad __copy_from_guest");
2002 rc = -EFAULT;
2003 break;
2006 okay = 1;
2007 gmfn = op.arg1.mfn;
2008 mfn = gmfn_to_mfn(FOREIGNDOM, gmfn);
2009 page = mfn_to_page(mfn);
2011 switch ( op.cmd )
2013 case MMUEXT_PIN_L1_TABLE:
2014 type = PGT_l1_page_table;
2015 goto pin_page;
2017 case MMUEXT_PIN_L2_TABLE:
2018 type = PGT_l2_page_table;
2019 goto pin_page;
2021 case MMUEXT_PIN_L3_TABLE:
2022 type = PGT_l3_page_table;
2023 goto pin_page;
2025 case MMUEXT_PIN_L4_TABLE:
2026 if ( IS_COMPAT(FOREIGNDOM) )
2027 break;
2028 type = PGT_l4_page_table;
2030 pin_page:
2031 /* Ignore pinning of invalid paging levels. */
2032 if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) )
2033 break;
2035 if ( paging_mode_refcounts(FOREIGNDOM) )
2036 break;
2038 okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
2039 if ( unlikely(!okay) )
2041 MEM_LOG("Error while pinning mfn %lx", mfn);
2042 break;
2045 if ( unlikely(test_and_set_bit(_PGT_pinned,
2046 &page->u.inuse.type_info)) )
2048 MEM_LOG("Mfn %lx already pinned", mfn);
2049 put_page_and_type(page);
2050 okay = 0;
2051 break;
2054 /* A page is dirtied when its pin status is set. */
2055 mark_dirty(d, mfn);
2057 /* We can race domain destruction (domain_relinquish_resources). */
2058 if ( unlikely(this_cpu(percpu_mm_info).foreign != NULL) )
2060 int drop_ref;
2061 spin_lock(&FOREIGNDOM->page_alloc_lock);
2062 drop_ref = (FOREIGNDOM->is_dying &&
2063 test_and_clear_bit(_PGT_pinned,
2064 &page->u.inuse.type_info));
2065 spin_unlock(&FOREIGNDOM->page_alloc_lock);
2066 if ( drop_ref )
2067 put_page_and_type(page);
2070 break;
2072 case MMUEXT_UNPIN_TABLE:
2073 if ( paging_mode_refcounts(d) )
2074 break;
2076 if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) )
2078 MEM_LOG("Mfn %lx bad domain (dom=%p)",
2079 mfn, page_get_owner(page));
2081 else if ( likely(test_and_clear_bit(_PGT_pinned,
2082 &page->u.inuse.type_info)) )
2084 put_page_and_type(page);
2085 put_page(page);
2086 /* A page is dirtied when its pin status is cleared. */
2087 mark_dirty(d, mfn);
2089 else
2091 okay = 0;
2092 put_page(page);
2093 MEM_LOG("Mfn %lx not pinned", mfn);
2095 break;
2097 case MMUEXT_NEW_BASEPTR:
2098 okay = new_guest_cr3(mfn);
2099 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
2100 break;
2102 #ifdef __x86_64__
2103 case MMUEXT_NEW_USER_BASEPTR: {
2104 unsigned long old_mfn;
2106 if ( mfn != 0 )
2108 if ( paging_mode_refcounts(d) )
2109 okay = get_page_from_pagenr(mfn, d);
2110 else
2111 okay = get_page_and_type_from_pagenr(
2112 mfn, PGT_root_page_table, d);
2113 if ( unlikely(!okay) )
2115 MEM_LOG("Error while installing new mfn %lx", mfn);
2116 break;
2120 old_mfn = pagetable_get_pfn(v->arch.guest_table_user);
2121 v->arch.guest_table_user = pagetable_from_pfn(mfn);
2123 if ( old_mfn != 0 )
2125 if ( paging_mode_refcounts(d) )
2126 put_page(mfn_to_page(old_mfn));
2127 else
2128 put_page_and_type(mfn_to_page(old_mfn));
2131 break;
2133 #endif
2135 case MMUEXT_TLB_FLUSH_LOCAL:
2136 this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
2137 break;
2139 case MMUEXT_INVLPG_LOCAL:
2140 if ( !paging_mode_enabled(d)
2141 || paging_invlpg(v, op.arg1.linear_addr) != 0 )
2142 local_flush_tlb_one(op.arg1.linear_addr);
2143 break;
2145 case MMUEXT_TLB_FLUSH_MULTI:
2146 case MMUEXT_INVLPG_MULTI:
2148 unsigned long vmask;
2149 cpumask_t pmask;
2150 if ( unlikely(copy_from_guest(&vmask, op.arg2.vcpumask, 1)) )
2152 okay = 0;
2153 break;
2155 pmask = vcpumask_to_pcpumask(d, vmask);
2156 if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
2157 flush_tlb_mask(pmask);
2158 else
2159 flush_tlb_one_mask(pmask, op.arg1.linear_addr);
2160 break;
2163 case MMUEXT_TLB_FLUSH_ALL:
2164 flush_tlb_mask(d->domain_dirty_cpumask);
2165 break;
2167 case MMUEXT_INVLPG_ALL:
2168 flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr);
2169 break;
2171 case MMUEXT_FLUSH_CACHE:
2172 if ( unlikely(!cache_flush_permitted(d)) )
2174 MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
2175 okay = 0;
2177 else
2179 wbinvd();
2181 break;
2183 case MMUEXT_SET_LDT:
2185 unsigned long ptr = op.arg1.linear_addr;
2186 unsigned long ents = op.arg2.nr_ents;
2188 if ( paging_mode_external(d) )
2190 MEM_LOG("ignoring SET_LDT hypercall from external "
2191 "domain %u", d->domain_id);
2192 okay = 0;
2194 else if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
2195 (ents > 8192) ||
2196 !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
2198 okay = 0;
2199 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
2201 else if ( (v->arch.guest_context.ldt_ents != ents) ||
2202 (v->arch.guest_context.ldt_base != ptr) )
2204 invalidate_shadow_ldt(v);
2205 v->arch.guest_context.ldt_base = ptr;
2206 v->arch.guest_context.ldt_ents = ents;
2207 load_LDT(v);
2208 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT;
2209 if ( ents != 0 )
2210 this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT;
2212 break;
2215 default:
2216 MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
2217 rc = -ENOSYS;
2218 okay = 0;
2219 break;
2222 if ( unlikely(!okay) )
2224 rc = rc ? rc : -EINVAL;
2225 break;
2228 guest_handle_add_offset(uops, 1);
2231 process_deferred_ops();
2233 UNLOCK_BIGLOCK(d);
2235 perfc_add(num_mmuext_ops, i);
2237 out:
2238 /* Add incremental work we have done to the @done output parameter. */
2239 if ( unlikely(!guest_handle_is_null(pdone)) )
2241 done += i;
2242 copy_to_guest(pdone, &done, 1);
2245 return rc;
2248 int do_mmu_update(
2249 XEN_GUEST_HANDLE(mmu_update_t) ureqs,
2250 unsigned int count,
2251 XEN_GUEST_HANDLE(uint) pdone,
2252 unsigned int foreigndom)
2254 struct mmu_update req;
2255 void *va;
2256 unsigned long gpfn, gmfn, mfn;
2257 struct page_info *page;
2258 int rc = 0, okay = 1, i = 0;
2259 unsigned int cmd, done = 0;
2260 struct vcpu *v = current;
2261 struct domain *d = v->domain;
2262 unsigned long type_info;
2263 struct domain_mmap_cache mapcache, sh_mapcache;
2265 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
2267 count &= ~MMU_UPDATE_PREEMPTED;
2268 if ( unlikely(!guest_handle_is_null(pdone)) )
2269 (void)copy_from_guest(&done, pdone, 1);
2271 else
2272 perfc_incr(calls_to_mmu_update);
2274 if ( unlikely(!guest_handle_okay(ureqs, count)) )
2276 rc = -EFAULT;
2277 goto out;
2280 if ( !set_foreigndom(foreigndom) )
2282 rc = -ESRCH;
2283 goto out;
2286 domain_mmap_cache_init(&mapcache);
2287 domain_mmap_cache_init(&sh_mapcache);
2289 LOCK_BIGLOCK(d);
2291 for ( i = 0; i < count; i++ )
2293 if ( hypercall_preempt_check() )
2295 rc = hypercall_create_continuation(
2296 __HYPERVISOR_mmu_update, "hihi",
2297 ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
2298 break;
2301 if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) )
2303 MEM_LOG("Bad __copy_from_guest");
2304 rc = -EFAULT;
2305 break;
2308 cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
2309 okay = 0;
2311 switch ( cmd )
2313 /*
2314 * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
2315 */
2316 case MMU_NORMAL_PT_UPDATE:
2318 gmfn = req.ptr >> PAGE_SHIFT;
2319 mfn = gmfn_to_mfn(d, gmfn);
2321 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2323 MEM_LOG("Could not get page for normal update");
2324 break;
2327 va = map_domain_page_with_cache(mfn, &mapcache);
2328 va = (void *)((unsigned long)va +
2329 (unsigned long)(req.ptr & ~PAGE_MASK));
2330 page = mfn_to_page(mfn);
2332 switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
2334 case PGT_l1_page_table:
2335 case PGT_l2_page_table:
2336 case PGT_l3_page_table:
2337 case PGT_l4_page_table:
2339 if ( paging_mode_refcounts(d) )
2341 MEM_LOG("mmu update on auto-refcounted domain!");
2342 break;
2345 if ( unlikely(!get_page_type(
2346 page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) )
2347 goto not_a_pt;
2349 switch ( type_info & PGT_type_mask )
2351 case PGT_l1_page_table:
2353 l1_pgentry_t l1e = l1e_from_intpte(req.val);
2354 okay = mod_l1_entry(va, l1e, mfn);
2356 break;
2357 case PGT_l2_page_table:
2359 l2_pgentry_t l2e = l2e_from_intpte(req.val);
2360 okay = mod_l2_entry(va, l2e, mfn, type_info);
2362 break;
2363 #if CONFIG_PAGING_LEVELS >= 3
2364 case PGT_l3_page_table:
2366 l3_pgentry_t l3e = l3e_from_intpte(req.val);
2367 okay = mod_l3_entry(va, l3e, mfn);
2369 break;
2370 #endif
2371 #if CONFIG_PAGING_LEVELS >= 4
2372 case PGT_l4_page_table:
2374 l4_pgentry_t l4e = l4e_from_intpte(req.val);
2375 okay = mod_l4_entry(d, va, l4e, mfn);
2377 break;
2378 #endif
2381 put_page_type(page);
2383 break;
2385 default:
2386 not_a_pt:
2388 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
2389 break;
2391 okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
2393 put_page_type(page);
2395 break;
2398 unmap_domain_page_with_cache(va, &mapcache);
2400 put_page(page);
2401 break;
2403 case MMU_MACHPHYS_UPDATE:
2405 mfn = req.ptr >> PAGE_SHIFT;
2406 gpfn = req.val;
2408 if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
2410 MEM_LOG("Could not get page for mach->phys update");
2411 break;
2414 if ( unlikely(paging_mode_translate(FOREIGNDOM)) )
2416 MEM_LOG("Mach-phys update on auto-translate guest");
2417 break;
2420 set_gpfn_from_mfn(mfn, gpfn);
2421 okay = 1;
2423 mark_dirty(FOREIGNDOM, mfn);
2425 put_page(mfn_to_page(mfn));
2426 break;
2428 default:
2429 MEM_LOG("Invalid page update command %x", cmd);
2430 rc = -ENOSYS;
2431 okay = 0;
2432 break;
2435 if ( unlikely(!okay) )
2437 rc = rc ? rc : -EINVAL;
2438 break;
2441 guest_handle_add_offset(ureqs, 1);
2444 process_deferred_ops();
2446 UNLOCK_BIGLOCK(d);
2448 domain_mmap_cache_destroy(&mapcache);
2449 domain_mmap_cache_destroy(&sh_mapcache);
2451 perfc_add(num_page_updates, i);
2453 out:
2454 /* Add incremental work we have done to the @done output parameter. */
2455 if ( unlikely(!guest_handle_is_null(pdone)) )
2457 done += i;
2458 copy_to_guest(pdone, &done, 1);
2461 return rc;
2465 static int create_grant_pte_mapping(
2466 uint64_t pte_addr, l1_pgentry_t nl1e, struct vcpu *v)
2468 int rc = GNTST_okay;
2469 void *va;
2470 unsigned long gmfn, mfn;
2471 struct page_info *page;
2472 u32 type;
2473 l1_pgentry_t ol1e;
2474 struct domain *d = v->domain;
2476 ASSERT(spin_is_locked(&d->big_lock));
2478 adjust_guest_l1e(nl1e, d);
2480 gmfn = pte_addr >> PAGE_SHIFT;
2481 mfn = gmfn_to_mfn(d, gmfn);
2483 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2485 MEM_LOG("Could not get page for normal update");
2486 return GNTST_general_error;
2489 va = map_domain_page(mfn);
2490 va = (void *)((unsigned long)va + ((unsigned long)pte_addr & ~PAGE_MASK));
2491 page = mfn_to_page(mfn);
2493 type = page->u.inuse.type_info & PGT_type_mask;
2494 if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
2496 MEM_LOG("Grant map attempted to update a non-L1 page");
2497 rc = GNTST_general_error;
2498 goto failed;
2501 ol1e = *(l1_pgentry_t *)va;
2502 if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) )
2504 put_page_type(page);
2505 rc = GNTST_general_error;
2506 goto failed;
2509 if ( !paging_mode_refcounts(d) )
2510 put_page_from_l1e(ol1e, d);
2512 put_page_type(page);
2514 failed:
2515 unmap_domain_page(va);
2516 put_page(page);
2518 return rc;
2521 static int destroy_grant_pte_mapping(
2522 uint64_t addr, unsigned long frame, struct domain *d)
2524 int rc = GNTST_okay;
2525 void *va;
2526 unsigned long gmfn, mfn;
2527 struct page_info *page;
2528 u32 type;
2529 l1_pgentry_t ol1e;
2531 gmfn = addr >> PAGE_SHIFT;
2532 mfn = gmfn_to_mfn(d, gmfn);
2534 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2536 MEM_LOG("Could not get page for normal update");
2537 return GNTST_general_error;
2540 va = map_domain_page(mfn);
2541 va = (void *)((unsigned long)va + ((unsigned long)addr & ~PAGE_MASK));
2542 page = mfn_to_page(mfn);
2544 type = page->u.inuse.type_info & PGT_type_mask;
2545 if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
2547 MEM_LOG("Grant map attempted to update a non-L1 page");
2548 rc = GNTST_general_error;
2549 goto failed;
2552 if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
2554 put_page_type(page);
2555 rc = GNTST_general_error;
2556 goto failed;
2559 /* Check that the virtual address supplied is actually mapped to frame. */
2560 if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
2562 MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
2563 (unsigned long)l1e_get_intpte(ol1e), addr, frame);
2564 put_page_type(page);
2565 rc = GNTST_general_error;
2566 goto failed;
2569 /* Delete pagetable entry. */
2570 if ( unlikely(!UPDATE_ENTRY(l1,
2571 (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn,
2572 d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) )
2574 MEM_LOG("Cannot delete PTE entry at %p", va);
2575 put_page_type(page);
2576 rc = GNTST_general_error;
2577 goto failed;
2580 put_page_type(page);
2582 failed:
2583 unmap_domain_page(va);
2584 put_page(page);
2585 return rc;
2589 static int create_grant_va_mapping(
2590 unsigned long va, l1_pgentry_t nl1e, struct vcpu *v)
2592 l1_pgentry_t *pl1e, ol1e;
2593 struct domain *d = v->domain;
2594 unsigned long gl1mfn;
2595 int okay;
2597 ASSERT(spin_is_locked(&d->big_lock));
2599 adjust_guest_l1e(nl1e, d);
2601 pl1e = guest_map_l1e(v, va, &gl1mfn);
2602 if ( !pl1e )
2604 MEM_LOG("Could not find L1 PTE for address %lx", va);
2605 return GNTST_general_error;
2607 ol1e = *pl1e;
2608 okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v);
2609 guest_unmap_l1e(v, pl1e);
2610 pl1e = NULL;
2612 if ( !okay )
2613 return GNTST_general_error;
2615 if ( !paging_mode_refcounts(d) )
2616 put_page_from_l1e(ol1e, d);
2618 return GNTST_okay;
2621 static int destroy_grant_va_mapping(
2622 unsigned long addr, unsigned long frame, struct vcpu *v)
2624 l1_pgentry_t *pl1e, ol1e;
2625 unsigned long gl1mfn;
2626 int rc = 0;
2628 pl1e = guest_map_l1e(v, addr, &gl1mfn);
2629 if ( !pl1e )
2631 MEM_LOG("Could not find L1 PTE for address %lx", addr);
2632 return GNTST_general_error;
2634 ol1e = *pl1e;
2636 /* Check that the virtual address supplied is actually mapped to frame. */
2637 if ( unlikely(l1e_get_pfn(ol1e) != frame) )
2639 MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
2640 l1e_get_pfn(ol1e), addr, frame);
2641 rc = GNTST_general_error;
2642 goto out;
2645 /* Delete pagetable entry. */
2646 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
2648 MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
2649 rc = GNTST_general_error;
2650 goto out;
2653 out:
2654 guest_unmap_l1e(v, pl1e);
2655 return rc;
2658 int create_grant_host_mapping(
2659 uint64_t addr, unsigned long frame, unsigned int flags)
2661 l1_pgentry_t pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS);
2663 if ( (flags & GNTMAP_application_map) )
2664 l1e_add_flags(pte,_PAGE_USER);
2665 if ( !(flags & GNTMAP_readonly) )
2666 l1e_add_flags(pte,_PAGE_RW);
2668 if ( flags & GNTMAP_contains_pte )
2669 return create_grant_pte_mapping(addr, pte, current);
2670 return create_grant_va_mapping(addr, pte, current);
2673 int destroy_grant_host_mapping(
2674 uint64_t addr, unsigned long frame, unsigned int flags)
2676 if ( flags & GNTMAP_contains_pte )
2677 return destroy_grant_pte_mapping(addr, frame, current->domain);
2678 return destroy_grant_va_mapping(addr, frame, current);
2681 int steal_page(
2682 struct domain *d, struct page_info *page, unsigned int memflags)
2684 u32 _d, _nd, x, y;
2686 spin_lock(&d->page_alloc_lock);
2688 /*
2689 * The tricky bit: atomically release ownership while there is just one
2690 * benign reference to the page (PGC_allocated). If that reference
2691 * disappears then the deallocation routine will safely spin.
2692 */
2693 _d = pickle_domptr(d);
2694 _nd = page->u.inuse._domain;
2695 y = page->count_info;
2696 do {
2697 x = y;
2698 if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
2699 (1 | PGC_allocated)) || unlikely(_nd != _d)) {
2700 MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p,"
2701 " caf=%08x, taf=%" PRtype_info "\n",
2702 (void *) page_to_mfn(page),
2703 d, d->domain_id, unpickle_domptr(_nd), x,
2704 page->u.inuse.type_info);
2705 spin_unlock(&d->page_alloc_lock);
2706 return -1;
2708 __asm__ __volatile__(
2709 LOCK_PREFIX "cmpxchg8b %2"
2710 : "=d" (_nd), "=a" (y),
2711 "=m" (*(volatile u64 *)(&page->count_info))
2712 : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
2713 } while (unlikely(_nd != _d) || unlikely(y != x));
2715 /*
2716 * Unlink from 'd'. At least one reference remains (now anonymous), so
2717 * noone else is spinning to try to delete this page from 'd'.
2718 */
2719 if ( !(memflags & MEMF_no_refcount) )
2720 d->tot_pages--;
2721 list_del(&page->list);
2723 spin_unlock(&d->page_alloc_lock);
2725 return 0;
2728 int do_update_va_mapping(unsigned long va, u64 val64,
2729 unsigned long flags)
2731 l1_pgentry_t val = l1e_from_intpte(val64);
2732 struct vcpu *v = current;
2733 struct domain *d = v->domain;
2734 l1_pgentry_t *pl1e;
2735 unsigned long vmask, bmap_ptr, gl1mfn;
2736 cpumask_t pmask;
2737 int rc = 0;
2739 perfc_incr(calls_to_update_va);
2741 if ( unlikely(!__addr_ok(va) && !paging_mode_external(d)) )
2742 return -EINVAL;
2744 LOCK_BIGLOCK(d);
2746 pl1e = guest_map_l1e(v, va, &gl1mfn);
2748 if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn)) )
2749 rc = -EINVAL;
2751 if ( pl1e )
2752 guest_unmap_l1e(v, pl1e);
2753 pl1e = NULL;
2755 process_deferred_ops();
2757 UNLOCK_BIGLOCK(d);
2759 switch ( flags & UVMF_FLUSHTYPE_MASK )
2761 case UVMF_TLB_FLUSH:
2762 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2764 case UVMF_LOCAL:
2765 local_flush_tlb();
2766 break;
2767 case UVMF_ALL:
2768 flush_tlb_mask(d->domain_dirty_cpumask);
2769 break;
2770 default:
2771 if ( unlikely(!IS_COMPAT(d) ?
2772 get_user(vmask, (unsigned long *)bmap_ptr) :
2773 get_user(vmask, (unsigned int *)bmap_ptr)) )
2774 rc = -EFAULT;
2775 pmask = vcpumask_to_pcpumask(d, vmask);
2776 flush_tlb_mask(pmask);
2777 break;
2779 break;
2781 case UVMF_INVLPG:
2782 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2784 case UVMF_LOCAL:
2785 if ( !paging_mode_enabled(d)
2786 || (paging_invlpg(current, va) != 0) )
2787 local_flush_tlb_one(va);
2788 break;
2789 case UVMF_ALL:
2790 flush_tlb_one_mask(d->domain_dirty_cpumask, va);
2791 break;
2792 default:
2793 if ( unlikely(!IS_COMPAT(d) ?
2794 get_user(vmask, (unsigned long *)bmap_ptr) :
2795 get_user(vmask, (unsigned int *)bmap_ptr)) )
2796 rc = -EFAULT;
2797 pmask = vcpumask_to_pcpumask(d, vmask);
2798 flush_tlb_one_mask(pmask, va);
2799 break;
2801 break;
2804 return rc;
2807 int do_update_va_mapping_otherdomain(unsigned long va, u64 val64,
2808 unsigned long flags,
2809 domid_t domid)
2811 int rc;
2813 if ( unlikely(!IS_PRIV(current->domain)) )
2814 return -EPERM;
2816 if ( !set_foreigndom(domid) )
2817 return -ESRCH;
2819 rc = do_update_va_mapping(va, val64, flags);
2821 BUG_ON(this_cpu(percpu_mm_info).deferred_ops);
2822 process_deferred_ops(); /* only to clear foreigndom */
2824 return rc;
2829 /*************************
2830 * Descriptor Tables
2831 */
2833 void destroy_gdt(struct vcpu *v)
2835 int i;
2836 unsigned long pfn;
2838 v->arch.guest_context.gdt_ents = 0;
2839 for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ )
2841 if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 )
2842 put_page_and_type(mfn_to_page(pfn));
2843 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
2844 v->arch.guest_context.gdt_frames[i] = 0;
2849 long set_gdt(struct vcpu *v,
2850 unsigned long *frames,
2851 unsigned int entries)
2853 struct domain *d = v->domain;
2854 /* NB. There are 512 8-byte entries per GDT page. */
2855 int i, nr_pages = (entries + 511) / 512;
2856 unsigned long mfn;
2858 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2859 return -EINVAL;
2861 /* Check the pages in the new GDT. */
2862 for ( i = 0; i < nr_pages; i++ ) {
2863 mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
2864 if ( !mfn_valid(mfn) ||
2865 !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
2866 goto fail;
2869 /* Tear down the old GDT. */
2870 destroy_gdt(v);
2872 /* Install the new GDT. */
2873 v->arch.guest_context.gdt_ents = entries;
2874 for ( i = 0; i < nr_pages; i++ )
2876 v->arch.guest_context.gdt_frames[i] = frames[i];
2877 l1e_write(&v->arch.perdomain_ptes[i],
2878 l1e_from_pfn(frames[i], __PAGE_HYPERVISOR));
2881 return 0;
2883 fail:
2884 while ( i-- > 0 )
2885 put_page_and_type(mfn_to_page(frames[i]));
2886 return -EINVAL;
2890 long do_set_gdt(XEN_GUEST_HANDLE(ulong) frame_list, unsigned int entries)
2892 int nr_pages = (entries + 511) / 512;
2893 unsigned long frames[16];
2894 long ret;
2896 /* Rechecked in set_gdt, but ensures a sane limit for copy_from_user(). */
2897 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2898 return -EINVAL;
2900 if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) )
2901 return -EFAULT;
2903 LOCK_BIGLOCK(current->domain);
2905 if ( (ret = set_gdt(current, frames, entries)) == 0 )
2906 local_flush_tlb();
2908 UNLOCK_BIGLOCK(current->domain);
2910 return ret;
2914 long do_update_descriptor(u64 pa, u64 desc)
2916 struct domain *dom = current->domain;
2917 unsigned long gmfn = pa >> PAGE_SHIFT;
2918 unsigned long mfn;
2919 unsigned int offset;
2920 struct desc_struct *gdt_pent, d;
2921 struct page_info *page;
2922 long ret = -EINVAL;
2924 offset = ((unsigned int)pa & ~PAGE_MASK) / sizeof(struct desc_struct);
2926 *(u64 *)&d = desc;
2928 LOCK_BIGLOCK(dom);
2930 mfn = gmfn_to_mfn(dom, gmfn);
2931 if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
2932 !mfn_valid(mfn) ||
2933 !check_descriptor(dom, &d) )
2935 UNLOCK_BIGLOCK(dom);
2936 return -EINVAL;
2939 page = mfn_to_page(mfn);
2940 if ( unlikely(!get_page(page, dom)) )
2942 UNLOCK_BIGLOCK(dom);
2943 return -EINVAL;
2946 /* Check if the given frame is in use in an unsafe context. */
2947 switch ( page->u.inuse.type_info & PGT_type_mask )
2949 case PGT_gdt_page:
2950 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
2951 goto out;
2952 break;
2953 case PGT_ldt_page:
2954 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
2955 goto out;
2956 break;
2957 default:
2958 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
2959 goto out;
2960 break;
2963 mark_dirty(dom, mfn);
2965 /* All is good so make the update. */
2966 gdt_pent = map_domain_page(mfn);
2967 memcpy(&gdt_pent[offset], &d, 8);
2968 unmap_domain_page(gdt_pent);
2970 put_page_type(page);
2972 ret = 0; /* success */
2974 out:
2975 put_page(page);
2977 UNLOCK_BIGLOCK(dom);
2979 return ret;
2982 typedef struct e820entry e820entry_t;
2983 DEFINE_XEN_GUEST_HANDLE(e820entry_t);
2985 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
2987 switch ( op )
2989 case XENMEM_add_to_physmap:
2991 struct xen_add_to_physmap xatp;
2992 unsigned long prev_mfn, mfn = 0, gpfn;
2993 struct domain *d;
2995 if ( copy_from_guest(&xatp, arg, 1) )
2996 return -EFAULT;
2998 if ( xatp.domid == DOMID_SELF )
2999 d = rcu_lock_current_domain();
3000 else if ( !IS_PRIV(current->domain) )
3001 return -EPERM;
3002 else if ( (d = rcu_lock_domain_by_id(xatp.domid)) == NULL )
3003 return -ESRCH;
3005 switch ( xatp.space )
3007 case XENMAPSPACE_shared_info:
3008 if ( xatp.idx == 0 )
3009 mfn = virt_to_mfn(d->shared_info);
3010 break;
3011 case XENMAPSPACE_grant_table:
3012 spin_lock(&d->grant_table->lock);
3014 if ( (xatp.idx >= nr_grant_frames(d->grant_table)) &&
3015 (xatp.idx < max_nr_grant_frames) )
3016 gnttab_grow_table(d, xatp.idx + 1);
3018 if ( xatp.idx < nr_grant_frames(d->grant_table) )
3019 mfn = virt_to_mfn(d->grant_table->shared[xatp.idx]);
3021 spin_unlock(&d->grant_table->lock);
3022 break;
3023 default:
3024 break;
3027 if ( !paging_mode_translate(d) || (mfn == 0) )
3029 rcu_unlock_domain(d);
3030 return -EINVAL;
3033 LOCK_BIGLOCK(d);
3035 /* Remove previously mapped page if it was present. */
3036 prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
3037 if ( mfn_valid(prev_mfn) )
3039 if ( IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)) )
3040 /* Xen heap frames are simply unhooked from this phys slot. */
3041 guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
3042 else
3043 /* Normal domain memory is freed, to avoid leaking memory. */
3044 guest_remove_page(d, xatp.gpfn);
3047 /* Unmap from old location, if any. */
3048 gpfn = get_gpfn_from_mfn(mfn);
3049 if ( gpfn != INVALID_M2P_ENTRY )
3050 guest_physmap_remove_page(d, gpfn, mfn);
3052 /* Map at new location. */
3053 guest_physmap_add_page(d, xatp.gpfn, mfn);
3055 UNLOCK_BIGLOCK(d);
3057 rcu_unlock_domain(d);
3059 break;
3062 case XENMEM_set_memory_map:
3064 struct xen_foreign_memory_map fmap;
3065 struct domain *d;
3066 int rc;
3068 if ( copy_from_guest(&fmap, arg, 1) )
3069 return -EFAULT;
3071 if ( fmap.map.nr_entries > ARRAY_SIZE(d->arch.e820) )
3072 return -EINVAL;
3074 if ( fmap.domid == DOMID_SELF )
3075 d = rcu_lock_current_domain();
3076 else if ( !IS_PRIV(current->domain) )
3077 return -EPERM;
3078 else if ( (d = rcu_lock_domain_by_id(fmap.domid)) == NULL )
3079 return -ESRCH;
3081 rc = copy_from_guest(&d->arch.e820[0], fmap.map.buffer,
3082 fmap.map.nr_entries) ? -EFAULT : 0;
3083 d->arch.nr_e820 = fmap.map.nr_entries;
3085 rcu_unlock_domain(d);
3086 return rc;
3089 case XENMEM_memory_map:
3091 struct xen_memory_map map;
3092 struct domain *d = current->domain;
3094 /* Backwards compatibility. */
3095 if ( d->arch.nr_e820 == 0 )
3096 return -ENOSYS;
3098 if ( copy_from_guest(&map, arg, 1) )
3099 return -EFAULT;
3101 map.nr_entries = min(map.nr_entries, d->arch.nr_e820);
3102 if ( copy_to_guest(map.buffer, &d->arch.e820[0], map.nr_entries) ||
3103 copy_to_guest(arg, &map, 1) )
3104 return -EFAULT;
3106 return 0;
3109 case XENMEM_machine_memory_map:
3111 struct xen_memory_map memmap;
3112 XEN_GUEST_HANDLE(e820entry_t) buffer;
3113 int count;
3115 if ( !IS_PRIV(current->domain) )
3116 return -EINVAL;
3118 if ( copy_from_guest(&memmap, arg, 1) )
3119 return -EFAULT;
3120 if ( memmap.nr_entries < e820.nr_map + 1 )
3121 return -EINVAL;
3123 buffer = guest_handle_cast(memmap.buffer, e820entry_t);
3125 count = min((unsigned int)e820.nr_map, memmap.nr_entries);
3126 if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
3127 return -EFAULT;
3129 memmap.nr_entries = count;
3131 if ( copy_to_guest(arg, &memmap, 1) )
3132 return -EFAULT;
3134 return 0;
3137 case XENMEM_machphys_mapping:
3139 struct xen_machphys_mapping mapping = {
3140 .v_start = MACH2PHYS_VIRT_START,
3141 .v_end = MACH2PHYS_VIRT_END,
3142 .max_mfn = MACH2PHYS_NR_ENTRIES - 1
3143 };
3145 if ( copy_to_guest(arg, &mapping, 1) )
3146 return -EFAULT;
3148 return 0;
3151 default:
3152 return subarch_memory_op(op, arg);
3155 return 0;
3159 /*************************
3160 * Writable Pagetables
3161 */
3163 struct ptwr_emulate_ctxt {
3164 struct x86_emulate_ctxt ctxt;
3165 unsigned long cr2;
3166 l1_pgentry_t pte;
3167 };
3169 static int ptwr_emulated_read(
3170 enum x86_segment seg,
3171 unsigned long offset,
3172 unsigned long *val,
3173 unsigned int bytes,
3174 struct x86_emulate_ctxt *ctxt)
3176 unsigned int rc;
3177 unsigned long addr = offset;
3179 *val = 0;
3180 if ( (rc = copy_from_user((void *)val, (void *)addr, bytes)) != 0 )
3182 propagate_page_fault(addr + bytes - rc, 0); /* read fault */
3183 return X86EMUL_EXCEPTION;
3186 return X86EMUL_OKAY;
3189 static int ptwr_emulated_update(
3190 unsigned long addr,
3191 paddr_t old,
3192 paddr_t val,
3193 unsigned int bytes,
3194 unsigned int do_cmpxchg,
3195 struct ptwr_emulate_ctxt *ptwr_ctxt)
3197 unsigned long mfn;
3198 struct page_info *page;
3199 l1_pgentry_t pte, ol1e, nl1e, *pl1e;
3200 struct vcpu *v = current;
3201 struct domain *d = v->domain;
3203 /* Only allow naturally-aligned stores within the original %cr2 page. */
3204 if ( unlikely(((addr^ptwr_ctxt->cr2) & PAGE_MASK) || (addr & (bytes-1))) )
3206 MEM_LOG("Bad ptwr access (cr2=%lx, addr=%lx, bytes=%u)",
3207 ptwr_ctxt->cr2, addr, bytes);
3208 return X86EMUL_UNHANDLEABLE;
3211 /* Turn a sub-word access into a full-word access. */
3212 if ( bytes != sizeof(paddr_t) )
3214 paddr_t full;
3215 unsigned int rc, offset = addr & (sizeof(paddr_t)-1);
3217 /* Align address; read full word. */
3218 addr &= ~(sizeof(paddr_t)-1);
3219 if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 )
3221 propagate_page_fault(addr+sizeof(paddr_t)-rc, 0); /* read fault */
3222 return X86EMUL_EXCEPTION;
3224 /* Mask out bits provided by caller. */
3225 full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
3226 /* Shift the caller value and OR in the missing bits. */
3227 val &= (((paddr_t)1 << (bytes*8)) - 1);
3228 val <<= (offset)*8;
3229 val |= full;
3230 /* Also fill in missing parts of the cmpxchg old value. */
3231 old &= (((paddr_t)1 << (bytes*8)) - 1);
3232 old <<= (offset)*8;
3233 old |= full;
3236 pte = ptwr_ctxt->pte;
3237 mfn = l1e_get_pfn(pte);
3238 page = mfn_to_page(mfn);
3240 /* We are looking only for read-only mappings of p.t. pages. */
3241 ASSERT((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) == _PAGE_PRESENT);
3242 ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table);
3243 ASSERT((page->u.inuse.type_info & PGT_count_mask) != 0);
3244 ASSERT(page_get_owner(page) == d);
3246 /* Check the new PTE. */
3247 nl1e = l1e_from_intpte(val);
3248 if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) )
3250 if ( (CONFIG_PAGING_LEVELS == 3 || IS_COMPAT(d)) &&
3251 (bytes == 4) && (addr & 4) && !do_cmpxchg &&
3252 (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
3254 /*
3255 * If this is an upper-half write to a PAE PTE then we assume that
3256 * the guest has simply got the two writes the wrong way round. We
3257 * zap the PRESENT bit on the assumption that the bottom half will
3258 * be written immediately after we return to the guest.
3259 */
3260 MEM_LOG("ptwr_emulate: fixing up invalid PAE PTE %"PRIpte,
3261 l1e_get_intpte(nl1e));
3262 l1e_remove_flags(nl1e, _PAGE_PRESENT);
3264 else
3266 MEM_LOG("ptwr_emulate: could not get_page_from_l1e()");
3267 return X86EMUL_UNHANDLEABLE;
3271 adjust_guest_l1e(nl1e, d);
3273 /* Checked successfully: do the update (write or cmpxchg). */
3274 pl1e = map_domain_page(page_to_mfn(page));
3275 pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
3276 if ( do_cmpxchg )
3278 int okay;
3279 intpte_t t = old;
3280 ol1e = l1e_from_intpte(old);
3282 okay = paging_cmpxchg_guest_entry(v, (intpte_t *) pl1e,
3283 &t, val, _mfn(mfn));
3284 okay = (okay && t == old);
3286 if ( !okay )
3288 unmap_domain_page(pl1e);
3289 put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d);
3290 return X86EMUL_CMPXCHG_FAILED;
3293 else
3295 ol1e = *pl1e;
3296 if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) )
3297 BUG();
3300 unmap_domain_page(pl1e);
3302 /* Finally, drop the old PTE. */
3303 put_page_from_l1e(gl1e_to_ml1e(d, ol1e), d);
3305 return X86EMUL_OKAY;
3308 static int ptwr_emulated_write(
3309 enum x86_segment seg,
3310 unsigned long offset,
3311 unsigned long val,
3312 unsigned int bytes,
3313 struct x86_emulate_ctxt *ctxt)
3315 return ptwr_emulated_update(
3316 offset, 0, val, bytes, 0,
3317 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3320 static int ptwr_emulated_cmpxchg(
3321 enum x86_segment seg,
3322 unsigned long offset,
3323 unsigned long old,
3324 unsigned long new,
3325 unsigned int bytes,
3326 struct x86_emulate_ctxt *ctxt)
3328 return ptwr_emulated_update(
3329 offset, old, new, bytes, 1,
3330 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3333 static int ptwr_emulated_cmpxchg8b(
3334 enum x86_segment seg,
3335 unsigned long offset,
3336 unsigned long old,
3337 unsigned long old_hi,
3338 unsigned long new,
3339 unsigned long new_hi,
3340 struct x86_emulate_ctxt *ctxt)
3342 if ( CONFIG_PAGING_LEVELS == 2 )
3343 return X86EMUL_UNHANDLEABLE;
3344 return ptwr_emulated_update(
3345 offset, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1,
3346 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3349 static struct x86_emulate_ops ptwr_emulate_ops = {
3350 .read = ptwr_emulated_read,
3351 .insn_fetch = ptwr_emulated_read,
3352 .write = ptwr_emulated_write,
3353 .cmpxchg = ptwr_emulated_cmpxchg,
3354 .cmpxchg8b = ptwr_emulated_cmpxchg8b
3355 };
3357 /* Write page fault handler: check if guest is trying to modify a PTE. */
3358 int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
3359 struct cpu_user_regs *regs)
3361 struct domain *d = v->domain;
3362 struct page_info *page;
3363 l1_pgentry_t pte;
3364 struct ptwr_emulate_ctxt ptwr_ctxt;
3365 int rc;
3367 LOCK_BIGLOCK(d);
3369 /*
3370 * Attempt to read the PTE that maps the VA being accessed. By checking for
3371 * PDE validity in the L2 we avoid many expensive fixups in __get_user().
3372 */
3373 guest_get_eff_l1e(v, addr, &pte);
3374 if ( !(l1e_get_flags(pte) & _PAGE_PRESENT) )
3375 goto bail;
3376 page = l1e_get_page(pte);
3378 /* We are looking only for read-only mappings of p.t. pages. */
3379 if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
3380 ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
3381 ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
3382 (page_get_owner(page) != d) )
3383 goto bail;
3385 ptwr_ctxt.ctxt.regs = regs;
3386 ptwr_ctxt.ctxt.addr_size = ptwr_ctxt.ctxt.sp_size =
3387 IS_COMPAT(d) ? 32 : BITS_PER_LONG;
3388 ptwr_ctxt.cr2 = addr;
3389 ptwr_ctxt.pte = pte;
3391 rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
3392 if ( rc == X86EMUL_UNHANDLEABLE )
3393 goto bail;
3395 UNLOCK_BIGLOCK(d);
3396 perfc_incr(ptwr_emulations);
3397 return EXCRET_fault_fixed;
3399 bail:
3400 UNLOCK_BIGLOCK(d);
3401 return 0;
3404 int map_pages_to_xen(
3405 unsigned long virt,
3406 unsigned long mfn,
3407 unsigned long nr_mfns,
3408 unsigned long flags)
3410 l2_pgentry_t *pl2e, ol2e;
3411 l1_pgentry_t *pl1e, ol1e;
3412 unsigned int i;
3414 unsigned int map_small_pages = !!(flags & MAP_SMALL_PAGES);
3415 flags &= ~MAP_SMALL_PAGES;
3417 while ( nr_mfns != 0 )
3419 pl2e = virt_to_xen_l2e(virt);
3421 if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) &&
3422 (nr_mfns >= (1<<PAGETABLE_ORDER)) &&
3423 !map_small_pages )
3425 /* Super-page mapping. */
3426 ol2e = *pl2e;
3427 l2e_write_atomic(pl2e, l2e_from_pfn(mfn, flags|_PAGE_PSE));
3429 if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
3431 local_flush_tlb_pge();
3432 if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
3433 free_xen_pagetable(mfn_to_virt(l2e_get_pfn(ol2e)));
3436 virt += 1UL << L2_PAGETABLE_SHIFT;
3437 mfn += 1UL << PAGETABLE_ORDER;
3438 nr_mfns -= 1UL << PAGETABLE_ORDER;
3440 else
3442 /* Normal page mapping. */
3443 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
3445 pl1e = alloc_xen_pagetable();
3446 clear_page(pl1e);
3447 l2e_write(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
3448 __PAGE_HYPERVISOR));
3450 else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
3452 pl1e = alloc_xen_pagetable();
3453 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3454 l1e_write(&pl1e[i],
3455 l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
3456 l2e_get_flags(*pl2e) & ~_PAGE_PSE));
3457 l2e_write_atomic(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
3458 __PAGE_HYPERVISOR));
3459 local_flush_tlb_pge();
3462 pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
3463 ol1e = *pl1e;
3464 l1e_write_atomic(pl1e, l1e_from_pfn(mfn, flags));
3465 if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) )
3466 local_flush_tlb_one(virt);
3468 virt += 1UL << L1_PAGETABLE_SHIFT;
3469 mfn += 1UL;
3470 nr_mfns -= 1UL;
3474 return 0;
3477 void __set_fixmap(
3478 enum fixed_addresses idx, unsigned long mfn, unsigned long flags)
3480 BUG_ON(idx >= __end_of_fixed_addresses);
3481 map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags);
3484 #ifdef MEMORY_GUARD
3486 void memguard_init(void)
3488 map_pages_to_xen(
3489 PAGE_OFFSET, 0, xenheap_phys_end >> PAGE_SHIFT,
3490 __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
3493 static void __memguard_change_range(void *p, unsigned long l, int guard)
3495 unsigned long _p = (unsigned long)p;
3496 unsigned long _l = (unsigned long)l;
3497 unsigned long flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES;
3499 /* Ensure we are dealing with a page-aligned whole number of pages. */
3500 ASSERT((_p&PAGE_MASK) != 0);
3501 ASSERT((_l&PAGE_MASK) != 0);
3502 ASSERT((_p&~PAGE_MASK) == 0);
3503 ASSERT((_l&~PAGE_MASK) == 0);
3505 if ( guard )
3506 flags &= ~_PAGE_PRESENT;
3508 map_pages_to_xen(
3509 _p, virt_to_maddr(p) >> PAGE_SHIFT, _l >> PAGE_SHIFT, flags);
3512 void memguard_guard_range(void *p, unsigned long l)
3514 __memguard_change_range(p, l, 1);
3517 void memguard_unguard_range(void *p, unsigned long l)
3519 __memguard_change_range(p, l, 0);
3522 #endif
3524 void memguard_guard_stack(void *p)
3526 BUILD_BUG_ON((DEBUG_STACK_SIZE + PAGE_SIZE) > STACK_SIZE);
3527 p = (void *)((unsigned long)p + STACK_SIZE - DEBUG_STACK_SIZE - PAGE_SIZE);
3528 memguard_guard_range(p, PAGE_SIZE);
3531 /*
3532 * Local variables:
3533 * mode: C
3534 * c-set-style: "BSD"
3535 * c-basic-offset: 4
3536 * tab-width: 4
3537 * indent-tabs-mode: nil
3538 * End:
3539 */