direct-io.hg

view xen/arch/x86/mm.c @ 14353:b01d4f415f5f

x86: PAE linear page tables.

While full linear page table support makes little sense (and would be
more complicated to implement), partial linear page table support is
almost identical to that in non-PAE, and is used (at least) by NetWare.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kfraser@localhost.localdomain
date Mon Mar 12 14:47:00 2007 +0000 (2007-03-12)
parents a951cf1da459
children a4ca6a264fee
line source
1 /******************************************************************************
2 * arch/x86/mm.c
3 *
4 * Copyright (c) 2002-2005 K A Fraser
5 * Copyright (c) 2004 Christian Limpach
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 /*
23 * A description of the x86 page table API:
24 *
25 * Domains trap to do_mmu_update with a list of update requests.
26 * This is a list of (ptr, val) pairs, where the requested operation
27 * is *ptr = val.
28 *
29 * Reference counting of pages:
30 * ----------------------------
31 * Each page has two refcounts: tot_count and type_count.
32 *
33 * TOT_COUNT is the obvious reference count. It counts all uses of a
34 * physical page frame by a domain, including uses as a page directory,
35 * a page table, or simple mappings via a PTE. This count prevents a
36 * domain from releasing a frame back to the free pool when it still holds
37 * a reference to it.
38 *
39 * TYPE_COUNT is more subtle. A frame can be put to one of three
40 * mutually-exclusive uses: it might be used as a page directory, or a
41 * page table, or it may be mapped writable by the domain [of course, a
42 * frame may not be used in any of these three ways!].
43 * So, type_count is a count of the number of times a frame is being
44 * referred to in its current incarnation. Therefore, a page can only
45 * change its type when its type count is zero.
46 *
47 * Pinning the page type:
48 * ----------------------
49 * The type of a page can be pinned/unpinned with the commands
50 * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
51 * pinning is not reference counted, so it can't be nested).
52 * This is useful to prevent a page's type count falling to zero, at which
53 * point safety checks would need to be carried out next time the count
54 * is increased again.
55 *
56 * A further note on writable page mappings:
57 * -----------------------------------------
58 * For simplicity, the count of writable mappings for a page may not
59 * correspond to reality. The 'writable count' is incremented for every
60 * PTE which maps the page with the _PAGE_RW flag set. However, for
61 * write access to be possible the page directory entry must also have
62 * its _PAGE_RW bit set. We do not check this as it complicates the
63 * reference counting considerably [consider the case of multiple
64 * directory entries referencing a single page table, some with the RW
65 * bit set, others not -- it starts getting a bit messy].
66 * In normal use, this simplification shouldn't be a problem.
67 * However, the logic can be added if required.
68 *
69 * One more note on read-only page mappings:
70 * -----------------------------------------
71 * We want domains to be able to map pages for read-only access. The
72 * main reason is that page tables and directories should be readable
73 * by a domain, but it would not be safe for them to be writable.
74 * However, domains have free access to rings 1 & 2 of the Intel
75 * privilege model. In terms of page protection, these are considered
76 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
77 * read-only restrictions are respected in supervisor mode -- if the
78 * bit is clear then any mapped page is writable.
79 *
80 * We get round this by always setting the WP bit and disallowing
81 * updates to it. This is very unlikely to cause a problem for guest
82 * OS's, which will generally use the WP bit to simplify copy-on-write
83 * implementation (in that case, OS wants a fault when it writes to
84 * an application-supplied buffer).
85 */
87 #include <xen/config.h>
88 #include <xen/init.h>
89 #include <xen/kernel.h>
90 #include <xen/lib.h>
91 #include <xen/mm.h>
92 #include <xen/domain.h>
93 #include <xen/sched.h>
94 #include <xen/errno.h>
95 #include <xen/perfc.h>
96 #include <xen/irq.h>
97 #include <xen/softirq.h>
98 #include <xen/domain_page.h>
99 #include <xen/event.h>
100 #include <xen/iocap.h>
101 #include <xen/guest_access.h>
102 #include <asm/paging.h>
103 #include <asm/shadow.h>
104 #include <asm/page.h>
105 #include <asm/flushtlb.h>
106 #include <asm/io.h>
107 #include <asm/ldt.h>
108 #include <asm/x86_emulate.h>
109 #include <asm/e820.h>
110 #include <asm/hypercall.h>
111 #include <public/memory.h>
113 #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
115 /*
116 * PTE updates can be done with ordinary writes except:
117 * 1. Debug builds get extra checking by using CMPXCHG[8B].
118 * 2. PAE builds perform an atomic 8-byte store with CMPXCHG8B.
119 */
120 #if !defined(NDEBUG) || defined(CONFIG_X86_PAE)
121 #define PTE_UPDATE_WITH_CMPXCHG
122 #endif
124 /* Used to defer flushing of memory structures. */
125 struct percpu_mm_info {
126 #define DOP_FLUSH_TLB (1<<0) /* Flush the local TLB. */
127 #define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */
128 #define DOP_RELOAD_LDT (1<<2) /* Reload the LDT shadow mapping. */
129 unsigned int deferred_ops;
130 /* If non-NULL, specifies a foreign subject domain for some operations. */
131 struct domain *foreign;
132 };
133 static DEFINE_PER_CPU(struct percpu_mm_info, percpu_mm_info);
135 /*
136 * Returns the current foreign domain; defaults to the currently-executing
137 * domain if a foreign override hasn't been specified.
138 */
139 #define FOREIGNDOM (this_cpu(percpu_mm_info).foreign ?: current->domain)
141 /* Private domain structs for DOMID_XEN and DOMID_IO. */
142 static struct domain *dom_xen, *dom_io;
144 /* Frame table and its size in pages. */
145 struct page_info *frame_table;
146 unsigned long max_page;
147 unsigned long total_pages;
149 #ifdef CONFIG_COMPAT
150 l2_pgentry_t *compat_idle_pg_table_l2 = NULL;
151 #define l3_disallow_mask(d) (!IS_COMPAT(d) ? \
152 L3_DISALLOW_MASK : \
153 COMPAT_L3_DISALLOW_MASK)
154 #else
155 #define l3_disallow_mask(d) L3_DISALLOW_MASK
156 #endif
158 static void queue_deferred_ops(struct domain *d, unsigned int ops)
159 {
160 ASSERT(d == current->domain);
161 this_cpu(percpu_mm_info).deferred_ops |= ops;
162 }
164 void __init init_frametable(void)
165 {
166 unsigned long nr_pages, page_step, i, mfn;
168 frame_table = (struct page_info *)FRAMETABLE_VIRT_START;
170 nr_pages = PFN_UP(max_page * sizeof(*frame_table));
171 page_step = (1 << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT;
173 for ( i = 0; i < nr_pages; i += page_step )
174 {
175 mfn = alloc_boot_pages(min(nr_pages - i, page_step), page_step);
176 if ( mfn == 0 )
177 panic("Not enough memory for frame table\n");
178 map_pages_to_xen(
179 FRAMETABLE_VIRT_START + (i << PAGE_SHIFT),
180 mfn, page_step, PAGE_HYPERVISOR);
181 }
183 memset(frame_table, 0, nr_pages << PAGE_SHIFT);
184 }
186 void arch_init_memory(void)
187 {
188 extern void subarch_init_memory(void);
190 unsigned long i, pfn, rstart_pfn, rend_pfn;
192 /*
193 * Initialise our DOMID_XEN domain.
194 * Any Xen-heap pages that we will allow to be mapped will have
195 * their domain field set to dom_xen.
196 */
197 dom_xen = alloc_domain(DOMID_XEN);
198 BUG_ON(dom_xen == NULL);
200 /*
201 * Initialise our DOMID_IO domain.
202 * This domain owns I/O pages that are within the range of the page_info
203 * array. Mappings occur at the priv of the caller.
204 */
205 dom_io = alloc_domain(DOMID_IO);
206 BUG_ON(dom_io == NULL);
208 /* First 1MB of RAM is historically marked as I/O. */
209 for ( i = 0; i < 0x100; i++ )
210 share_xen_page_with_guest(mfn_to_page(i), dom_io, XENSHARE_writable);
212 /* Any areas not specified as RAM by the e820 map are considered I/O. */
213 for ( i = 0, pfn = 0; i < e820.nr_map; i++ )
214 {
215 if ( e820.map[i].type != E820_RAM )
216 continue;
217 /* Every page from cursor to start of next RAM region is I/O. */
218 rstart_pfn = PFN_UP(e820.map[i].addr);
219 rend_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
220 for ( ; pfn < rstart_pfn; pfn++ )
221 {
222 BUG_ON(!mfn_valid(pfn));
223 share_xen_page_with_guest(
224 mfn_to_page(pfn), dom_io, XENSHARE_writable);
225 }
226 /* Skip the RAM region. */
227 pfn = rend_pfn;
228 }
229 BUG_ON(pfn != max_page);
231 subarch_init_memory();
232 }
234 int memory_is_conventional_ram(paddr_t p)
235 {
236 int i;
238 for ( i = 0; i < e820.nr_map; i++ )
239 {
240 if ( (e820.map[i].type == E820_RAM) &&
241 (e820.map[i].addr <= p) &&
242 (e820.map[i].size > p) )
243 return 1;
244 }
246 return 0;
247 }
249 void share_xen_page_with_guest(
250 struct page_info *page, struct domain *d, int readonly)
251 {
252 if ( page_get_owner(page) == d )
253 return;
255 set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
257 spin_lock(&d->page_alloc_lock);
259 /* The incremented type count pins as writable or read-only. */
260 page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page);
261 page->u.inuse.type_info |= PGT_validated | 1;
263 page_set_owner(page, d);
264 wmb(); /* install valid domain ptr before updating refcnt. */
265 ASSERT(page->count_info == 0);
267 /* Only add to the allocation list if the domain isn't dying. */
268 if ( !test_bit(_DOMF_dying, &d->domain_flags) )
269 {
270 page->count_info |= PGC_allocated | 1;
271 if ( unlikely(d->xenheap_pages++ == 0) )
272 get_knownalive_domain(d);
273 list_add_tail(&page->list, &d->xenpage_list);
274 }
276 spin_unlock(&d->page_alloc_lock);
277 }
279 void share_xen_page_with_privileged_guests(
280 struct page_info *page, int readonly)
281 {
282 share_xen_page_with_guest(page, dom_xen, readonly);
283 }
285 #if defined(CONFIG_X86_PAE)
287 #ifdef NDEBUG
288 /* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
289 #define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000)
290 #else
291 /*
292 * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths.
293 * We cannot safely shadow the idle page table, nor shadow (v1) page tables
294 * (detected by lack of an owning domain). As required for correctness, we
295 * always shadow PDPTs above 4GB.
296 */
297 #define l3tab_needs_shadow(mfn) \
298 (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
299 (page_get_owner(mfn_to_page(mfn)) != NULL) && \
300 ((mfn) & 1)) || /* odd MFNs are shadowed */ \
301 ((mfn) >= 0x100000))
302 #endif
304 static l1_pgentry_t *fix_pae_highmem_pl1e;
306 /* Cache the address of PAE high-memory fixmap page tables. */
307 static int __init cache_pae_fixmap_address(void)
308 {
309 unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0);
310 l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base);
311 fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base);
312 return 0;
313 }
314 __initcall(cache_pae_fixmap_address);
316 static DEFINE_PER_CPU(u32, make_cr3_timestamp);
318 void make_cr3(struct vcpu *v, unsigned long mfn)
319 /* Takes the MFN of a PAE l3 table, copies the contents to below 4GB if
320 * necessary, and sets v->arch.cr3 to the value to load in CR3. */
321 {
322 l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
323 struct pae_l3_cache *cache = &v->arch.pae_l3_cache;
324 unsigned int cpu = smp_processor_id();
326 /* Fast path: does this mfn need a shadow at all? */
327 if ( !l3tab_needs_shadow(mfn) )
328 {
329 v->arch.cr3 = mfn << PAGE_SHIFT;
330 /* Cache is no longer in use or valid */
331 cache->high_mfn = 0;
332 return;
333 }
335 /* Caching logic is not interrupt safe. */
336 ASSERT(!in_irq());
338 /* Protects against pae_flush_pgd(). */
339 spin_lock(&cache->lock);
341 cache->inuse_idx ^= 1;
342 cache->high_mfn = mfn;
344 /* Map the guest L3 table and copy to the chosen low-memory cache. */
345 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
346 /* First check the previous high mapping can't be in the TLB.
347 * (i.e. have we loaded CR3 since we last did this?) */
348 if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) )
349 local_flush_tlb_one(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu));
350 highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
351 lowmem_l3tab = cache->table[cache->inuse_idx];
352 memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
353 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_empty());
354 this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);
356 v->arch.cr3 = __pa(lowmem_l3tab);
358 spin_unlock(&cache->lock);
359 }
361 #else /* !CONFIG_X86_PAE */
363 void make_cr3(struct vcpu *v, unsigned long mfn)
364 {
365 v->arch.cr3 = mfn << PAGE_SHIFT;
366 }
368 #endif /* !CONFIG_X86_PAE */
370 void write_ptbase(struct vcpu *v)
371 {
372 write_cr3(v->arch.cr3);
373 }
375 /* Should be called after CR3 is updated.
376 * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3.
377 *
378 * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
379 * for HVM guests, arch.monitor_table and hvm's guest CR3.
380 *
381 * Update ref counts to shadow tables appropriately.
382 */
383 void update_cr3(struct vcpu *v)
384 {
385 unsigned long cr3_mfn=0;
387 if ( paging_mode_enabled(v->domain) )
388 {
389 paging_update_cr3(v);
390 return;
391 }
393 #if CONFIG_PAGING_LEVELS == 4
394 if ( !(v->arch.flags & TF_kernel_mode) )
395 cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
396 else
397 #endif
398 cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
400 make_cr3(v, cr3_mfn);
401 }
404 void invalidate_shadow_ldt(struct vcpu *v)
405 {
406 int i;
407 unsigned long pfn;
408 struct page_info *page;
410 if ( v->arch.shadow_ldt_mapcnt == 0 )
411 return;
413 v->arch.shadow_ldt_mapcnt = 0;
415 for ( i = 16; i < 32; i++ )
416 {
417 pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]);
418 if ( pfn == 0 ) continue;
419 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
420 page = mfn_to_page(pfn);
421 ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page);
422 ASSERT_PAGE_IS_DOMAIN(page, v->domain);
423 put_page_and_type(page);
424 }
426 /* Dispose of the (now possibly invalid) mappings from the TLB. */
427 if ( v == current )
428 queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
429 else
430 flush_tlb_mask(v->domain->domain_dirty_cpumask);
431 }
434 static int alloc_segdesc_page(struct page_info *page)
435 {
436 struct desc_struct *descs;
437 int i;
439 descs = map_domain_page(page_to_mfn(page));
441 for ( i = 0; i < 512; i++ )
442 if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) )
443 goto fail;
445 unmap_domain_page(descs);
446 return 1;
448 fail:
449 unmap_domain_page(descs);
450 return 0;
451 }
454 /* Map shadow page at offset @off. */
455 int map_ldt_shadow_page(unsigned int off)
456 {
457 struct vcpu *v = current;
458 struct domain *d = v->domain;
459 unsigned long gmfn, mfn;
460 l1_pgentry_t l1e, nl1e;
461 unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
462 int okay;
464 BUG_ON(unlikely(in_irq()));
466 guest_get_eff_kern_l1e(v, gva, &l1e);
467 if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
468 return 0;
470 gmfn = l1e_get_pfn(l1e);
471 mfn = gmfn_to_mfn(d, gmfn);
472 if ( unlikely(!mfn_valid(mfn)) )
473 return 0;
475 okay = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page);
476 if ( unlikely(!okay) )
477 return 0;
479 nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
481 l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
482 v->arch.shadow_ldt_mapcnt++;
484 return 1;
485 }
488 static int get_page_from_pagenr(unsigned long page_nr, struct domain *d)
489 {
490 struct page_info *page = mfn_to_page(page_nr);
492 if ( unlikely(!mfn_valid(page_nr)) || unlikely(!get_page(page, d)) )
493 {
494 MEM_LOG("Could not get page ref for pfn %lx", page_nr);
495 return 0;
496 }
498 return 1;
499 }
502 static int get_page_and_type_from_pagenr(unsigned long page_nr,
503 unsigned long type,
504 struct domain *d)
505 {
506 struct page_info *page = mfn_to_page(page_nr);
508 if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
509 return 0;
511 if ( unlikely(!get_page_type(page, type)) )
512 {
513 put_page(page);
514 return 0;
515 }
517 return 1;
518 }
520 /*
521 * We allow root tables to map each other (a.k.a. linear page tables). It
522 * needs some special care with reference counts and access permissions:
523 * 1. The mapping entry must be read-only, or the guest may get write access
524 * to its own PTEs.
525 * 2. We must only bump the reference counts for an *already validated*
526 * L2 table, or we can end up in a deadlock in get_page_type() by waiting
527 * on a validation that is required to complete that validation.
528 * 3. We only need to increment the reference counts for the mapped page
529 * frame if it is mapped by a different root table. This is sufficient and
530 * also necessary to allow validation of a root table mapping itself.
531 */
532 #define define_get_linear_pagetable(name1, name2) \
533 static int \
534 get_##name1##_linear_pagetable( \
535 name1##_pgentry_t pde, unsigned long pde_pfn, struct domain *d) \
536 { \
537 unsigned long x, y; \
538 struct page_info *page; \
539 unsigned long pfn; \
540 \
541 if ( (name2##_get_flags(pde) & _PAGE_RW) ) \
542 { \
543 MEM_LOG("Attempt to create linear p.t. with write perms"); \
544 return 0; \
545 } \
546 \
547 if ( (pfn = name2##_get_pfn(pde)) != pde_pfn ) \
548 { \
549 /* Make sure the mapped frame belongs to the correct domain. */ \
550 if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \
551 return 0; \
552 \
553 /* \
554 * Ensure that the mapped frame is an already-validated page table. \
555 * If so, atomically increment the count (checking for overflow). \
556 */ \
557 page = mfn_to_page(pfn); \
558 y = page->u.inuse.type_info; \
559 do { \
560 x = y; \
561 if ( unlikely((x & PGT_count_mask) == PGT_count_mask) || \
562 unlikely((x & (PGT_type_mask|PGT_validated)) != \
563 (PGT_##name1##_page_table|PGT_validated)) ) \
564 { \
565 put_page(page); \
566 return 0; \
567 } \
568 } \
569 while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x ); \
570 } \
571 \
572 return 1; \
573 }
574 #if !defined(CONFIG_X86_PAE)
575 define_get_linear_pagetable(root,root)
576 #endif
577 #if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
578 define_get_linear_pagetable(l2,l2e)
579 #endif
581 int
582 get_page_from_l1e(
583 l1_pgentry_t l1e, struct domain *d)
584 {
585 unsigned long mfn = l1e_get_pfn(l1e);
586 struct page_info *page = mfn_to_page(mfn);
587 int okay;
589 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
590 return 1;
592 if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
593 {
594 MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
595 return 0;
596 }
598 if ( unlikely(!mfn_valid(mfn)) ||
599 unlikely(page_get_owner(page) == dom_io) )
600 {
601 /* DOMID_IO reverts to caller for privilege checks. */
602 if ( d == dom_io )
603 d = current->domain;
605 if ( !iomem_access_permitted(d, mfn, mfn) )
606 {
607 if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */
608 MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx",
609 d->domain_id, mfn);
610 return 0;
611 }
613 /* No reference counting for out-of-range I/O pages. */
614 if ( !mfn_valid(mfn) )
615 return 1;
617 d = dom_io;
618 }
620 /* Foreign mappings into guests in shadow external mode don't
621 * contribute to writeable mapping refcounts. (This allows the
622 * qemu-dm helper process in dom0 to map the domain's memory without
623 * messing up the count of "real" writable mappings.) */
624 okay = (((l1e_get_flags(l1e) & _PAGE_RW) &&
625 !(unlikely(paging_mode_external(d) && (d != current->domain))))
626 ? get_page_and_type(page, d, PGT_writable_page)
627 : get_page(page, d));
628 if ( !okay )
629 {
630 MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
631 " for dom%d",
632 mfn, get_gpfn_from_mfn(mfn),
633 l1e_get_intpte(l1e), d->domain_id);
634 }
636 return okay;
637 }
640 /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
641 static int
642 get_page_from_l2e(
643 l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
644 {
645 int rc;
647 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
648 return 1;
650 if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
651 {
652 MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
653 return 0;
654 }
656 rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
657 if ( unlikely(!rc) )
658 {
659 #if CONFIG_PAGING_LEVELS == 2
660 rc = get_root_linear_pagetable(l2e, pfn, d);
661 #else
662 rc = get_l2_linear_pagetable(l2e, pfn, d);
663 #endif
664 }
666 return rc;
667 }
670 #if CONFIG_PAGING_LEVELS >= 3
671 static int
672 get_page_from_l3e(
673 l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
674 {
675 int rc;
677 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
678 return 1;
680 if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
681 {
682 MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
683 return 0;
684 }
686 rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
687 return rc;
688 }
689 #endif /* 3 level */
691 #if CONFIG_PAGING_LEVELS >= 4
692 static int
693 get_page_from_l4e(
694 l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
695 {
696 int rc;
698 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
699 return 1;
701 if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
702 {
703 MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
704 return 0;
705 }
707 rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
709 if ( unlikely(!rc) )
710 rc = get_root_linear_pagetable(l4e, pfn, d);
712 return rc;
713 }
714 #endif /* 4 level */
716 #ifdef __x86_64__
718 #ifdef USER_MAPPINGS_ARE_GLOBAL
719 #define adjust_guest_l1e(pl1e, d) \
720 do { \
721 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
722 likely(!IS_COMPAT(d)) ) \
723 { \
724 /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \
725 if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \
726 == (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL) ) \
727 MEM_LOG("Global bit is set to kernel page %lx", \
728 l1e_get_pfn((pl1e))); \
729 if ( !(l1e_get_flags((pl1e)) & _PAGE_USER) ) \
730 l1e_add_flags((pl1e), (_PAGE_GUEST_KERNEL|_PAGE_USER)); \
731 if ( !(l1e_get_flags((pl1e)) & _PAGE_GUEST_KERNEL) ) \
732 l1e_add_flags((pl1e), (_PAGE_GLOBAL|_PAGE_USER)); \
733 } \
734 } while ( 0 )
735 #else
736 #define adjust_guest_l1e(pl1e, d) \
737 do { \
738 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
739 likely(!IS_COMPAT(d)) ) \
740 l1e_add_flags((pl1e), _PAGE_USER); \
741 } while ( 0 )
742 #endif
744 #define adjust_guest_l2e(pl2e, d) \
745 do { \
746 if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \
747 likely(!IS_COMPAT(d)) ) \
748 l2e_add_flags((pl2e), _PAGE_USER); \
749 } while ( 0 )
751 #define adjust_guest_l3e(pl3e, d) \
752 do { \
753 if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
754 l3e_add_flags((pl3e), likely(!IS_COMPAT(d)) ? \
755 _PAGE_USER : \
756 _PAGE_USER|_PAGE_RW); \
757 } while ( 0 )
759 #define adjust_guest_l4e(pl4e, d) \
760 do { \
761 if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) && \
762 likely(!IS_COMPAT(d)) ) \
763 l4e_add_flags((pl4e), _PAGE_USER); \
764 } while ( 0 )
766 #else /* !defined(__x86_64__) */
768 #define adjust_guest_l1e(_p, _d) ((void)(_d))
769 #define adjust_guest_l2e(_p, _d) ((void)(_d))
770 #define adjust_guest_l3e(_p, _d) ((void)(_d))
772 #endif
774 #ifdef CONFIG_COMPAT
775 #define unadjust_guest_l3e(pl3e, d) \
776 do { \
777 if ( unlikely(IS_COMPAT(d)) && \
778 likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
779 l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \
780 } while ( 0 )
781 #else
782 #define unadjust_guest_l3e(_p, _d) ((void)(_d))
783 #endif
785 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
786 {
787 unsigned long pfn = l1e_get_pfn(l1e);
788 struct page_info *page = mfn_to_page(pfn);
789 struct domain *e;
790 struct vcpu *v;
792 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(pfn) )
793 return;
795 e = page_get_owner(page);
797 /*
798 * Check if this is a mapping that was established via a grant reference.
799 * If it was then we should not be here: we require that such mappings are
800 * explicitly destroyed via the grant-table interface.
801 *
802 * The upshot of this is that the guest can end up with active grants that
803 * it cannot destroy (because it no longer has a PTE to present to the
804 * grant-table interface). This can lead to subtle hard-to-catch bugs,
805 * hence a special grant PTE flag can be enabled to catch the bug early.
806 *
807 * (Note that the undestroyable active grants are not a security hole in
808 * Xen. All active grants can safely be cleaned up when the domain dies.)
809 */
810 if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
811 !(d->domain_flags & (DOMF_shutdown|DOMF_dying)) )
812 {
813 MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
814 l1e_get_intpte(l1e));
815 domain_crash(d);
816 }
818 /* Remember we didn't take a type-count of foreign writable mappings
819 * to paging-external domains */
820 if ( (l1e_get_flags(l1e) & _PAGE_RW) &&
821 !(unlikely((e != d) && paging_mode_external(e))) )
822 {
823 put_page_and_type(page);
824 }
825 else
826 {
827 /* We expect this is rare so we blow the entire shadow LDT. */
828 if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
829 PGT_ldt_page)) &&
830 unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) &&
831 (d == e) )
832 {
833 for_each_vcpu ( d, v )
834 invalidate_shadow_ldt(v);
835 }
836 put_page(page);
837 }
838 }
841 /*
842 * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
843 * Note also that this automatically deals correctly with linear p.t.'s.
844 */
845 static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
846 {
847 if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
848 (l2e_get_pfn(l2e) != pfn) )
849 put_page_and_type(l2e_get_page(l2e));
850 }
853 #if CONFIG_PAGING_LEVELS >= 3
854 static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
855 {
856 if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
857 (l3e_get_pfn(l3e) != pfn) )
858 put_page_and_type(l3e_get_page(l3e));
859 }
860 #endif
862 #if CONFIG_PAGING_LEVELS >= 4
863 static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
864 {
865 if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
866 (l4e_get_pfn(l4e) != pfn) )
867 put_page_and_type(l4e_get_page(l4e));
868 }
869 #endif
871 static int alloc_l1_table(struct page_info *page)
872 {
873 struct domain *d = page_get_owner(page);
874 unsigned long pfn = page_to_mfn(page);
875 l1_pgentry_t *pl1e;
876 int i;
878 pl1e = map_domain_page(pfn);
880 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
881 {
882 if ( is_guest_l1_slot(i) &&
883 unlikely(!get_page_from_l1e(pl1e[i], d)) )
884 goto fail;
886 adjust_guest_l1e(pl1e[i], d);
887 }
889 unmap_domain_page(pl1e);
890 return 1;
892 fail:
893 MEM_LOG("Failure in alloc_l1_table: entry %d", i);
894 while ( i-- > 0 )
895 if ( is_guest_l1_slot(i) )
896 put_page_from_l1e(pl1e[i], d);
898 unmap_domain_page(pl1e);
899 return 0;
900 }
902 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
903 static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
904 {
905 struct page_info *page;
906 l2_pgentry_t *pl2e;
907 l3_pgentry_t l3e3;
908 #ifndef CONFIG_COMPAT
909 l2_pgentry_t l2e;
910 int i;
911 #else
913 if ( !IS_COMPAT(d) )
914 return 1;
915 #endif
917 pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
919 /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */
920 l3e3 = pl3e[3];
921 if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) )
922 {
923 MEM_LOG("PAE L3 3rd slot is empty");
924 return 0;
925 }
927 /*
928 * The Xen-private mappings include linear mappings. The L2 thus cannot
929 * be shared by multiple L3 tables. The test here is adequate because:
930 * 1. Cannot appear in slots != 3 because get_page_type() checks the
931 * PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3
932 * 2. Cannot appear in another page table's L3:
933 * a. alloc_l3_table() calls this function and this check will fail
934 * b. mod_l3_entry() disallows updates to slot 3 in an existing table
935 */
936 page = l3e_get_page(l3e3);
937 BUG_ON(page->u.inuse.type_info & PGT_pinned);
938 BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
939 BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2));
940 if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
941 {
942 MEM_LOG("PAE L3 3rd slot is shared");
943 return 0;
944 }
946 /* Xen private mappings. */
947 pl2e = map_domain_page(l3e_get_pfn(l3e3));
948 #ifndef CONFIG_COMPAT
949 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
950 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
951 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
952 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
953 {
954 l2e = l2e_from_page(
955 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
956 __PAGE_HYPERVISOR);
957 l2e_write(&pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i], l2e);
958 }
959 for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
960 {
961 l2e = l2e_empty();
962 if ( l3e_get_flags(pl3e[i]) & _PAGE_PRESENT )
963 l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR);
964 l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e);
965 }
966 #else
967 memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
968 &compat_idle_pg_table_l2[
969 l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
970 COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e));
971 #endif
972 unmap_domain_page(pl2e);
974 return 1;
975 }
976 #else
977 # define create_pae_xen_mappings(d, pl3e) (1)
978 #endif
980 #ifdef CONFIG_X86_PAE
981 /* Flush a pgdir update into low-memory caches. */
982 static void pae_flush_pgd(
983 unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
984 {
985 struct domain *d = page_get_owner(mfn_to_page(mfn));
986 struct vcpu *v;
987 intpte_t _ol3e, _nl3e, _pl3e;
988 l3_pgentry_t *l3tab_ptr;
989 struct pae_l3_cache *cache;
991 if ( unlikely(shadow_mode_enabled(d)) )
992 {
993 cpumask_t m = CPU_MASK_NONE;
994 /* Re-shadow this l3 table on any vcpus that are using it */
995 for_each_vcpu ( d, v )
996 if ( pagetable_get_pfn(v->arch.guest_table) == mfn )
997 {
998 paging_update_cr3(v);
999 cpus_or(m, m, v->vcpu_dirty_cpumask);
1001 flush_tlb_mask(m);
1004 /* If below 4GB then the pgdir is not shadowed in low memory. */
1005 if ( !l3tab_needs_shadow(mfn) )
1006 return;
1008 for_each_vcpu ( d, v )
1010 cache = &v->arch.pae_l3_cache;
1012 spin_lock(&cache->lock);
1014 if ( cache->high_mfn == mfn )
1016 l3tab_ptr = &cache->table[cache->inuse_idx][idx];
1017 _ol3e = l3e_get_intpte(*l3tab_ptr);
1018 _nl3e = l3e_get_intpte(nl3e);
1019 _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e);
1020 BUG_ON(_pl3e != _ol3e);
1023 spin_unlock(&cache->lock);
1026 flush_tlb_mask(d->domain_dirty_cpumask);
1028 #else
1029 # define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
1030 #endif
1032 static int alloc_l2_table(struct page_info *page, unsigned long type)
1034 struct domain *d = page_get_owner(page);
1035 unsigned long pfn = page_to_mfn(page);
1036 l2_pgentry_t *pl2e;
1037 int i;
1039 pl2e = map_domain_page(pfn);
1041 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1043 if ( is_guest_l2_slot(d, type, i) &&
1044 unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
1045 goto fail;
1047 adjust_guest_l2e(pl2e[i], d);
1050 #if CONFIG_PAGING_LEVELS == 2
1051 /* Xen private mappings. */
1052 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
1053 &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
1054 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
1055 pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
1056 l2e_from_pfn(pfn, __PAGE_HYPERVISOR);
1057 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
1058 pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
1059 l2e_from_page(
1060 virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
1061 __PAGE_HYPERVISOR);
1062 #endif
1064 unmap_domain_page(pl2e);
1065 return 1;
1067 fail:
1068 MEM_LOG("Failure in alloc_l2_table: entry %d", i);
1069 while ( i-- > 0 )
1070 if ( is_guest_l2_slot(d, type, i) )
1071 put_page_from_l2e(pl2e[i], pfn);
1073 unmap_domain_page(pl2e);
1074 return 0;
1078 #if CONFIG_PAGING_LEVELS >= 3
1079 static int alloc_l3_table(struct page_info *page)
1081 struct domain *d = page_get_owner(page);
1082 unsigned long pfn = page_to_mfn(page);
1083 l3_pgentry_t *pl3e;
1084 int i;
1086 #ifdef CONFIG_X86_PAE
1087 /*
1088 * PAE pgdirs above 4GB are unacceptable if the guest does not understand
1089 * the weird 'extended cr3' format for dealing with high-order address
1090 * bits. We cut some slack for control tools (before vcpu0 is initialised).
1091 */
1092 if ( (pfn >= 0x100000) &&
1093 unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) &&
1094 d->vcpu[0] && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) )
1096 MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
1097 return 0;
1099 #endif
1101 pl3e = map_domain_page(pfn);
1103 /*
1104 * PAE guests allocate full pages, but aren't required to initialize
1105 * more than the first four entries; when running in compatibility
1106 * mode, however, the full page is visible to the MMU, and hence all
1107 * 512 entries must be valid/verified, which is most easily achieved
1108 * by clearing them out.
1109 */
1110 if ( IS_COMPAT(d) )
1111 memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
1113 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1115 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
1116 if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) && i == 3 )
1118 if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
1119 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
1120 !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
1121 PGT_l2_page_table |
1122 PGT_pae_xen_l2,
1123 d) )
1124 goto fail;
1126 else
1127 #endif
1128 if ( is_guest_l3_slot(i) &&
1129 unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
1130 goto fail;
1132 adjust_guest_l3e(pl3e[i], d);
1135 if ( !create_pae_xen_mappings(d, pl3e) )
1136 goto fail;
1138 unmap_domain_page(pl3e);
1139 return 1;
1141 fail:
1142 MEM_LOG("Failure in alloc_l3_table: entry %d", i);
1143 while ( i-- > 0 )
1144 if ( is_guest_l3_slot(i) )
1145 put_page_from_l3e(pl3e[i], pfn);
1147 unmap_domain_page(pl3e);
1148 return 0;
1150 #else
1151 #define alloc_l3_table(page) (0)
1152 #endif
1154 #if CONFIG_PAGING_LEVELS >= 4
1155 static int alloc_l4_table(struct page_info *page)
1157 struct domain *d = page_get_owner(page);
1158 unsigned long pfn = page_to_mfn(page);
1159 l4_pgentry_t *pl4e = page_to_virt(page);
1160 int i;
1162 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
1164 if ( is_guest_l4_slot(d, i) &&
1165 unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
1166 goto fail;
1168 adjust_guest_l4e(pl4e[i], d);
1171 /* Xen private mappings. */
1172 memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1173 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
1174 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
1175 pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
1176 l4e_from_pfn(pfn, __PAGE_HYPERVISOR);
1177 pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
1178 l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
1179 __PAGE_HYPERVISOR);
1180 if ( IS_COMPAT(d) )
1181 pl4e[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
1182 l4e_from_page(virt_to_page(d->arch.mm_arg_xlat_l3),
1183 __PAGE_HYPERVISOR);
1185 return 1;
1187 fail:
1188 MEM_LOG("Failure in alloc_l4_table: entry %d", i);
1189 while ( i-- > 0 )
1190 if ( is_guest_l4_slot(d, i) )
1191 put_page_from_l4e(pl4e[i], pfn);
1193 return 0;
1195 #else
1196 #define alloc_l4_table(page) (0)
1197 #endif
1200 static void free_l1_table(struct page_info *page)
1202 struct domain *d = page_get_owner(page);
1203 unsigned long pfn = page_to_mfn(page);
1204 l1_pgentry_t *pl1e;
1205 int i;
1207 pl1e = map_domain_page(pfn);
1209 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
1210 if ( is_guest_l1_slot(i) )
1211 put_page_from_l1e(pl1e[i], d);
1213 unmap_domain_page(pl1e);
1217 static void free_l2_table(struct page_info *page)
1219 #ifdef CONFIG_COMPAT
1220 struct domain *d = page_get_owner(page);
1221 #endif
1222 unsigned long pfn = page_to_mfn(page);
1223 l2_pgentry_t *pl2e;
1224 int i;
1226 pl2e = map_domain_page(pfn);
1228 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
1229 if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
1230 put_page_from_l2e(pl2e[i], pfn);
1232 unmap_domain_page(pl2e);
1234 page->u.inuse.type_info &= ~PGT_pae_xen_l2;
1238 #if CONFIG_PAGING_LEVELS >= 3
1240 static void free_l3_table(struct page_info *page)
1242 struct domain *d = page_get_owner(page);
1243 unsigned long pfn = page_to_mfn(page);
1244 l3_pgentry_t *pl3e;
1245 int i;
1247 pl3e = map_domain_page(pfn);
1249 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
1250 if ( is_guest_l3_slot(i) )
1252 put_page_from_l3e(pl3e[i], pfn);
1253 unadjust_guest_l3e(pl3e[i], d);
1256 unmap_domain_page(pl3e);
1259 #endif
1261 #if CONFIG_PAGING_LEVELS >= 4
1263 static void free_l4_table(struct page_info *page)
1265 struct domain *d = page_get_owner(page);
1266 unsigned long pfn = page_to_mfn(page);
1267 l4_pgentry_t *pl4e = page_to_virt(page);
1268 int i;
1270 for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
1271 if ( is_guest_l4_slot(d, i) )
1272 put_page_from_l4e(pl4e[i], pfn);
1275 #endif
1278 /* How to write an entry to the guest pagetables.
1279 * Returns 0 for failure (pointer not valid), 1 for success. */
1280 static inline int update_intpte(intpte_t *p,
1281 intpte_t old,
1282 intpte_t new,
1283 unsigned long mfn,
1284 struct vcpu *v)
1286 int rv = 1;
1287 #ifndef PTE_UPDATE_WITH_CMPXCHG
1288 rv = paging_write_guest_entry(v, p, new, _mfn(mfn));
1289 #else
1291 intpte_t t = old;
1292 for ( ; ; )
1294 rv = paging_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn));
1295 if ( unlikely(rv == 0) )
1297 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
1298 ": saw %" PRIpte, old, new, t);
1299 break;
1302 if ( t == old )
1303 break;
1305 /* Allowed to change in Accessed/Dirty flags only. */
1306 BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY));
1308 old = t;
1311 #endif
1312 return rv;
1315 /* Macro that wraps the appropriate type-changes around update_intpte().
1316 * Arguments are: type, ptr, old, new, mfn, vcpu */
1317 #define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v) \
1318 update_intpte((intpte_t *)(_p), \
1319 _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \
1320 (_m), (_v))
1322 /* Update the L1 entry at pl1e to new value nl1e. */
1323 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
1324 unsigned long gl1mfn)
1326 l1_pgentry_t ol1e;
1327 struct domain *d = current->domain;
1329 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
1330 return 0;
1332 if ( unlikely(paging_mode_refcounts(d)) )
1333 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
1335 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
1337 /* Translate foreign guest addresses. */
1338 nl1e = l1e_from_pfn(gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e)),
1339 l1e_get_flags(nl1e));
1341 if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
1343 MEM_LOG("Bad L1 flags %x",
1344 l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
1345 return 0;
1348 adjust_guest_l1e(nl1e, d);
1350 /* Fast path for identical mapping, r/w and presence. */
1351 if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
1352 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current);
1354 if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
1355 return 0;
1357 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
1359 put_page_from_l1e(nl1e, d);
1360 return 0;
1363 else
1365 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current)) )
1366 return 0;
1369 put_page_from_l1e(ol1e, d);
1370 return 1;
1374 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
1375 static int mod_l2_entry(l2_pgentry_t *pl2e,
1376 l2_pgentry_t nl2e,
1377 unsigned long pfn,
1378 unsigned long type)
1380 l2_pgentry_t ol2e;
1381 struct domain *d = current->domain;
1383 if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
1385 MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
1386 return 0;
1389 if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
1390 return 0;
1392 if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
1394 if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
1396 MEM_LOG("Bad L2 flags %x",
1397 l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
1398 return 0;
1401 adjust_guest_l2e(nl2e, d);
1403 /* Fast path for identical mapping and presence. */
1404 if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
1405 return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current);
1407 if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
1408 return 0;
1410 if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
1412 put_page_from_l2e(nl2e, pfn);
1413 return 0;
1416 else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, current)) )
1418 return 0;
1421 put_page_from_l2e(ol2e, pfn);
1422 return 1;
1425 #if CONFIG_PAGING_LEVELS >= 3
1427 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
1428 static int mod_l3_entry(l3_pgentry_t *pl3e,
1429 l3_pgentry_t nl3e,
1430 unsigned long pfn)
1432 l3_pgentry_t ol3e;
1433 struct domain *d = current->domain;
1434 int okay;
1436 if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
1438 MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
1439 return 0;
1442 #if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
1443 /*
1444 * Disallow updates to final L3 slot. It contains Xen mappings, and it
1445 * would be a pain to ensure they remain continuously valid throughout.
1446 */
1447 if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) &&
1448 pgentry_ptr_to_slot(pl3e) >= 3 )
1449 return 0;
1450 #endif
1452 if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
1453 return 0;
1455 if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
1457 if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
1459 MEM_LOG("Bad L3 flags %x",
1460 l3e_get_flags(nl3e) & l3_disallow_mask(d));
1461 return 0;
1464 adjust_guest_l3e(nl3e, d);
1466 /* Fast path for identical mapping and presence. */
1467 if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
1468 return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current);
1470 if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
1471 return 0;
1473 if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
1475 put_page_from_l3e(nl3e, pfn);
1476 return 0;
1479 else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, current)) )
1481 return 0;
1484 okay = create_pae_xen_mappings(d, pl3e);
1485 BUG_ON(!okay);
1487 pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
1489 put_page_from_l3e(ol3e, pfn);
1490 return 1;
1493 #endif
1495 #if CONFIG_PAGING_LEVELS >= 4
1497 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
1498 static int mod_l4_entry(struct domain *d,
1499 l4_pgentry_t *pl4e,
1500 l4_pgentry_t nl4e,
1501 unsigned long pfn)
1503 l4_pgentry_t ol4e;
1505 if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
1507 MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
1508 return 0;
1511 if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
1512 return 0;
1514 if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
1516 if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
1518 MEM_LOG("Bad L4 flags %x",
1519 l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
1520 return 0;
1523 adjust_guest_l4e(nl4e, current->domain);
1525 /* Fast path for identical mapping and presence. */
1526 if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
1527 return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current);
1529 if ( unlikely(!get_page_from_l4e(nl4e, pfn, current->domain)) )
1530 return 0;
1532 if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
1534 put_page_from_l4e(nl4e, pfn);
1535 return 0;
1538 else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, current)) )
1540 return 0;
1543 put_page_from_l4e(ol4e, pfn);
1544 return 1;
1547 #endif
1549 int alloc_page_type(struct page_info *page, unsigned long type)
1551 struct domain *owner = page_get_owner(page);
1553 /* A page table is dirtied when its type count becomes non-zero. */
1554 if ( likely(owner != NULL) )
1555 mark_dirty(owner, page_to_mfn(page));
1557 switch ( type & PGT_type_mask )
1559 case PGT_l1_page_table:
1560 return alloc_l1_table(page);
1561 case PGT_l2_page_table:
1562 return alloc_l2_table(page, type);
1563 case PGT_l3_page_table:
1564 return alloc_l3_table(page);
1565 case PGT_l4_page_table:
1566 return alloc_l4_table(page);
1567 case PGT_gdt_page:
1568 case PGT_ldt_page:
1569 return alloc_segdesc_page(page);
1570 default:
1571 printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
1572 type, page->u.inuse.type_info,
1573 page->count_info);
1574 BUG();
1577 return 0;
1581 void free_page_type(struct page_info *page, unsigned long type)
1583 struct domain *owner = page_get_owner(page);
1584 unsigned long gmfn;
1586 if ( likely(owner != NULL) )
1588 /*
1589 * We have to flush before the next use of the linear mapping
1590 * (e.g., update_va_mapping()) or we could end up modifying a page
1591 * that is no longer a page table (and hence screw up ref counts).
1592 */
1593 if ( current->domain == owner )
1594 queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS);
1595 else
1596 flush_tlb_mask(owner->domain_dirty_cpumask);
1598 if ( unlikely(paging_mode_enabled(owner)) )
1600 /* A page table is dirtied when its type count becomes zero. */
1601 mark_dirty(owner, page_to_mfn(page));
1603 if ( shadow_mode_refcounts(owner) )
1604 return;
1606 gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
1607 ASSERT(VALID_M2P(gmfn));
1608 shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
1612 switch ( type & PGT_type_mask )
1614 case PGT_l1_page_table:
1615 free_l1_table(page);
1616 break;
1618 case PGT_l2_page_table:
1619 free_l2_table(page);
1620 break;
1622 #if CONFIG_PAGING_LEVELS >= 3
1623 case PGT_l3_page_table:
1624 free_l3_table(page);
1625 break;
1626 #endif
1628 #if CONFIG_PAGING_LEVELS >= 4
1629 case PGT_l4_page_table:
1630 free_l4_table(page);
1631 break;
1632 #endif
1634 default:
1635 printk("%s: type %lx pfn %lx\n",__FUNCTION__,
1636 type, page_to_mfn(page));
1637 BUG();
1642 void put_page_type(struct page_info *page)
1644 unsigned long nx, x, y = page->u.inuse.type_info;
1646 again:
1647 do {
1648 x = y;
1649 nx = x - 1;
1651 ASSERT((x & PGT_count_mask) != 0);
1653 if ( unlikely((nx & PGT_count_mask) == 0) )
1655 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
1656 likely(nx & PGT_validated) )
1658 /*
1659 * Page-table pages must be unvalidated when count is zero. The
1660 * 'free' is safe because the refcnt is non-zero and validated
1661 * bit is clear => other ops will spin or fail.
1662 */
1663 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
1664 x & ~PGT_validated)) != x) )
1665 goto again;
1666 /* We cleared the 'valid bit' so we do the clean up. */
1667 free_page_type(page, x);
1668 /* Carry on, but with the 'valid bit' now clear. */
1669 x &= ~PGT_validated;
1670 nx &= ~PGT_validated;
1673 /*
1674 * Record TLB information for flush later. We do not stamp page
1675 * tables when running in shadow mode:
1676 * 1. Pointless, since it's the shadow pt's which must be tracked.
1677 * 2. Shadow mode reuses this field for shadowed page tables to
1678 * store flags info -- we don't want to conflict with that.
1679 */
1680 if ( !(shadow_mode_enabled(page_get_owner(page)) &&
1681 (page->count_info & PGC_page_table)) )
1682 page->tlbflush_timestamp = tlbflush_current_time();
1685 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1689 int get_page_type(struct page_info *page, unsigned long type)
1691 unsigned long nx, x, y = page->u.inuse.type_info;
1693 ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
1695 again:
1696 do {
1697 x = y;
1698 nx = x + 1;
1699 if ( unlikely((nx & PGT_count_mask) == 0) )
1701 MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
1702 return 0;
1704 else if ( unlikely((x & PGT_count_mask) == 0) )
1706 struct domain *d = page_get_owner(page);
1708 /* Never allow a shadowed frame to go from type count 0 to 1 */
1709 if ( d && shadow_mode_enabled(d) )
1710 shadow_remove_all_shadows(d->vcpu[0], _mfn(page_to_mfn(page)));
1712 ASSERT(!(x & PGT_pae_xen_l2));
1713 if ( (x & PGT_type_mask) != type )
1715 /*
1716 * On type change we check to flush stale TLB entries. This
1717 * may be unnecessary (e.g., page was GDT/LDT) but those
1718 * circumstances should be very rare.
1719 */
1720 cpumask_t mask = d->domain_dirty_cpumask;
1722 /* Don't flush if the timestamp is old enough */
1723 tlbflush_filter(mask, page->tlbflush_timestamp);
1725 if ( unlikely(!cpus_empty(mask)) &&
1726 /* Shadow mode: track only writable pages. */
1727 (!shadow_mode_enabled(page_get_owner(page)) ||
1728 ((nx & PGT_type_mask) == PGT_writable_page)) )
1730 perfc_incrc(need_flush_tlb_flush);
1731 flush_tlb_mask(mask);
1734 /* We lose existing type, back pointer, and validity. */
1735 nx &= ~(PGT_type_mask | PGT_validated);
1736 nx |= type;
1738 /* No special validation needed for writable pages. */
1739 /* Page tables and GDT/LDT need to be scanned for validity. */
1740 if ( type == PGT_writable_page )
1741 nx |= PGT_validated;
1744 else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
1746 if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
1747 (type != PGT_l1_page_table) )
1748 MEM_LOG("Bad type (saw %" PRtype_info
1749 " != exp %" PRtype_info ") "
1750 "for mfn %lx (pfn %lx)",
1751 x, type, page_to_mfn(page),
1752 get_gpfn_from_mfn(page_to_mfn(page)));
1753 return 0;
1755 else if ( unlikely(!(x & PGT_validated)) )
1757 /* Someone else is updating validation of this page. Wait... */
1758 while ( (y = page->u.inuse.type_info) == x )
1759 cpu_relax();
1760 goto again;
1763 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
1765 if ( unlikely(!(nx & PGT_validated)) )
1767 /* Try to validate page type; drop the new reference on failure. */
1768 if ( unlikely(!alloc_page_type(page, type)) )
1770 MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
1771 PRtype_info ": caf=%08x taf=%" PRtype_info,
1772 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
1773 type, page->count_info, page->u.inuse.type_info);
1774 /* Noone else can get a reference. We hold the only ref. */
1775 page->u.inuse.type_info = 0;
1776 return 0;
1779 /* Noone else is updating simultaneously. */
1780 __set_bit(_PGT_validated, &page->u.inuse.type_info);
1783 return 1;
1787 int new_guest_cr3(unsigned long mfn)
1789 struct vcpu *v = current;
1790 struct domain *d = v->domain;
1791 int okay;
1792 unsigned long old_base_mfn;
1794 #ifdef CONFIG_COMPAT
1795 if ( IS_COMPAT(d) )
1797 okay = paging_mode_refcounts(d)
1798 ? 0 /* Old code was broken, but what should it be? */
1799 : mod_l4_entry(
1800 d,
1801 __va(pagetable_get_paddr(v->arch.guest_table)),
1802 l4e_from_pfn(
1803 mfn,
1804 (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
1805 pagetable_get_pfn(v->arch.guest_table));
1806 if ( unlikely(!okay) )
1808 MEM_LOG("Error while installing new compat baseptr %lx", mfn);
1809 return 0;
1812 invalidate_shadow_ldt(v);
1813 write_ptbase(v);
1815 return 1;
1817 #endif
1818 okay = paging_mode_refcounts(d)
1819 ? get_page_from_pagenr(mfn, d)
1820 : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
1821 if ( unlikely(!okay) )
1823 MEM_LOG("Error while installing new baseptr %lx", mfn);
1824 return 0;
1827 invalidate_shadow_ldt(v);
1829 old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
1831 v->arch.guest_table = pagetable_from_pfn(mfn);
1832 update_cr3(v);
1834 write_ptbase(v);
1836 if ( likely(old_base_mfn != 0) )
1838 if ( paging_mode_refcounts(d) )
1839 put_page(mfn_to_page(old_base_mfn));
1840 else
1841 put_page_and_type(mfn_to_page(old_base_mfn));
1844 return 1;
1847 static void process_deferred_ops(void)
1849 unsigned int deferred_ops;
1850 struct domain *d = current->domain;
1851 struct percpu_mm_info *info = &this_cpu(percpu_mm_info);
1853 deferred_ops = info->deferred_ops;
1854 info->deferred_ops = 0;
1856 if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) )
1858 if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
1859 flush_tlb_mask(d->domain_dirty_cpumask);
1860 else
1861 local_flush_tlb();
1864 if ( deferred_ops & DOP_RELOAD_LDT )
1865 (void)map_ldt_shadow_page(0);
1867 if ( unlikely(info->foreign != NULL) )
1869 rcu_unlock_domain(info->foreign);
1870 info->foreign = NULL;
1874 static int set_foreigndom(domid_t domid)
1876 struct domain *e, *d = current->domain;
1877 struct percpu_mm_info *info = &this_cpu(percpu_mm_info);
1878 int okay = 1;
1880 ASSERT(info->foreign == NULL);
1882 if ( likely(domid == DOMID_SELF) )
1883 goto out;
1885 if ( unlikely(domid == d->domain_id) )
1887 MEM_LOG("Dom %u tried to specify itself as foreign domain",
1888 d->domain_id);
1889 okay = 0;
1891 else if ( unlikely(paging_mode_translate(d)) )
1893 MEM_LOG("Cannot mix foreign mappings with translated domains");
1894 okay = 0;
1896 else if ( !IS_PRIV(d) )
1898 switch ( domid )
1900 case DOMID_IO:
1901 info->foreign = rcu_lock_domain(dom_io);
1902 break;
1903 default:
1904 MEM_LOG("Dom %u cannot set foreign dom", d->domain_id);
1905 okay = 0;
1906 break;
1909 else
1911 info->foreign = e = rcu_lock_domain_by_id(domid);
1912 if ( e == NULL )
1914 switch ( domid )
1916 case DOMID_XEN:
1917 info->foreign = rcu_lock_domain(dom_xen);
1918 break;
1919 case DOMID_IO:
1920 info->foreign = rcu_lock_domain(dom_io);
1921 break;
1922 default:
1923 MEM_LOG("Unknown domain '%u'", domid);
1924 okay = 0;
1925 break;
1930 out:
1931 return okay;
1934 static inline cpumask_t vcpumask_to_pcpumask(
1935 struct domain *d, unsigned long vmask)
1937 unsigned int vcpu_id;
1938 cpumask_t pmask = CPU_MASK_NONE;
1939 struct vcpu *v;
1941 while ( vmask != 0 )
1943 vcpu_id = find_first_set_bit(vmask);
1944 vmask &= ~(1UL << vcpu_id);
1945 if ( (vcpu_id < MAX_VIRT_CPUS) &&
1946 ((v = d->vcpu[vcpu_id]) != NULL) )
1947 cpus_or(pmask, pmask, v->vcpu_dirty_cpumask);
1950 return pmask;
1953 int do_mmuext_op(
1954 XEN_GUEST_HANDLE(mmuext_op_t) uops,
1955 unsigned int count,
1956 XEN_GUEST_HANDLE(uint) pdone,
1957 unsigned int foreigndom)
1959 struct mmuext_op op;
1960 int rc = 0, i = 0, okay;
1961 unsigned long mfn = 0, gmfn = 0, type;
1962 unsigned int done = 0;
1963 struct page_info *page;
1964 struct vcpu *v = current;
1965 struct domain *d = v->domain;
1967 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
1969 count &= ~MMU_UPDATE_PREEMPTED;
1970 if ( unlikely(!guest_handle_is_null(pdone)) )
1971 (void)copy_from_guest(&done, pdone, 1);
1974 if ( unlikely(!guest_handle_okay(uops, count)) )
1976 rc = -EFAULT;
1977 goto out;
1980 if ( !set_foreigndom(foreigndom) )
1982 rc = -ESRCH;
1983 goto out;
1986 LOCK_BIGLOCK(d);
1988 for ( i = 0; i < count; i++ )
1990 if ( hypercall_preempt_check() )
1992 rc = hypercall_create_continuation(
1993 __HYPERVISOR_mmuext_op, "hihi",
1994 uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
1995 break;
1998 if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) )
2000 MEM_LOG("Bad __copy_from_guest");
2001 rc = -EFAULT;
2002 break;
2005 okay = 1;
2006 gmfn = op.arg1.mfn;
2007 mfn = gmfn_to_mfn(FOREIGNDOM, gmfn);
2008 page = mfn_to_page(mfn);
2010 switch ( op.cmd )
2012 case MMUEXT_PIN_L1_TABLE:
2013 type = PGT_l1_page_table;
2014 goto pin_page;
2016 case MMUEXT_PIN_L2_TABLE:
2017 type = PGT_l2_page_table;
2018 goto pin_page;
2020 case MMUEXT_PIN_L3_TABLE:
2021 type = PGT_l3_page_table;
2022 goto pin_page;
2024 case MMUEXT_PIN_L4_TABLE:
2025 if ( IS_COMPAT(FOREIGNDOM) )
2026 break;
2027 type = PGT_l4_page_table;
2029 pin_page:
2030 /* Ignore pinning of invalid paging levels. */
2031 if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) )
2032 break;
2034 if ( paging_mode_refcounts(FOREIGNDOM) )
2035 break;
2037 okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
2038 if ( unlikely(!okay) )
2040 MEM_LOG("Error while pinning mfn %lx", mfn);
2041 break;
2044 if ( unlikely(test_and_set_bit(_PGT_pinned,
2045 &page->u.inuse.type_info)) )
2047 MEM_LOG("Mfn %lx already pinned", mfn);
2048 put_page_and_type(page);
2049 okay = 0;
2050 break;
2053 /* A page is dirtied when its pin status is set. */
2054 mark_dirty(d, mfn);
2056 /* We can race domain destruction (domain_relinquish_resources). */
2057 if ( unlikely(this_cpu(percpu_mm_info).foreign != NULL) &&
2058 test_bit(_DOMF_dying, &FOREIGNDOM->domain_flags) &&
2059 test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
2060 put_page_and_type(page);
2062 break;
2064 case MMUEXT_UNPIN_TABLE:
2065 if ( paging_mode_refcounts(d) )
2066 break;
2068 if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) )
2070 MEM_LOG("Mfn %lx bad domain (dom=%p)",
2071 mfn, page_get_owner(page));
2073 else if ( likely(test_and_clear_bit(_PGT_pinned,
2074 &page->u.inuse.type_info)) )
2076 put_page_and_type(page);
2077 put_page(page);
2078 /* A page is dirtied when its pin status is cleared. */
2079 mark_dirty(d, mfn);
2081 else
2083 okay = 0;
2084 put_page(page);
2085 MEM_LOG("Mfn %lx not pinned", mfn);
2087 break;
2089 case MMUEXT_NEW_BASEPTR:
2090 okay = new_guest_cr3(mfn);
2091 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
2092 break;
2094 #ifdef __x86_64__
2095 case MMUEXT_NEW_USER_BASEPTR: {
2096 unsigned long old_mfn;
2098 if ( mfn != 0 )
2100 if ( paging_mode_refcounts(d) )
2101 okay = get_page_from_pagenr(mfn, d);
2102 else
2103 okay = get_page_and_type_from_pagenr(
2104 mfn, PGT_root_page_table, d);
2105 if ( unlikely(!okay) )
2107 MEM_LOG("Error while installing new mfn %lx", mfn);
2108 break;
2112 old_mfn = pagetable_get_pfn(v->arch.guest_table_user);
2113 v->arch.guest_table_user = pagetable_from_pfn(mfn);
2115 if ( old_mfn != 0 )
2117 if ( paging_mode_refcounts(d) )
2118 put_page(mfn_to_page(old_mfn));
2119 else
2120 put_page_and_type(mfn_to_page(old_mfn));
2123 break;
2125 #endif
2127 case MMUEXT_TLB_FLUSH_LOCAL:
2128 this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
2129 break;
2131 case MMUEXT_INVLPG_LOCAL:
2132 if ( !paging_mode_enabled(d)
2133 || paging_invlpg(v, op.arg1.linear_addr) != 0 )
2134 local_flush_tlb_one(op.arg1.linear_addr);
2135 break;
2137 case MMUEXT_TLB_FLUSH_MULTI:
2138 case MMUEXT_INVLPG_MULTI:
2140 unsigned long vmask;
2141 cpumask_t pmask;
2142 if ( unlikely(copy_from_guest(&vmask, op.arg2.vcpumask, 1)) )
2144 okay = 0;
2145 break;
2147 pmask = vcpumask_to_pcpumask(d, vmask);
2148 if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
2149 flush_tlb_mask(pmask);
2150 else
2151 flush_tlb_one_mask(pmask, op.arg1.linear_addr);
2152 break;
2155 case MMUEXT_TLB_FLUSH_ALL:
2156 flush_tlb_mask(d->domain_dirty_cpumask);
2157 break;
2159 case MMUEXT_INVLPG_ALL:
2160 flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr);
2161 break;
2163 case MMUEXT_FLUSH_CACHE:
2164 if ( unlikely(!cache_flush_permitted(d)) )
2166 MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
2167 okay = 0;
2169 else
2171 wbinvd();
2173 break;
2175 case MMUEXT_SET_LDT:
2177 unsigned long ptr = op.arg1.linear_addr;
2178 unsigned long ents = op.arg2.nr_ents;
2180 if ( paging_mode_external(d) )
2182 MEM_LOG("ignoring SET_LDT hypercall from external "
2183 "domain %u", d->domain_id);
2184 okay = 0;
2186 else if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
2187 (ents > 8192) ||
2188 !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
2190 okay = 0;
2191 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
2193 else if ( (v->arch.guest_context.ldt_ents != ents) ||
2194 (v->arch.guest_context.ldt_base != ptr) )
2196 invalidate_shadow_ldt(v);
2197 v->arch.guest_context.ldt_base = ptr;
2198 v->arch.guest_context.ldt_ents = ents;
2199 load_LDT(v);
2200 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT;
2201 if ( ents != 0 )
2202 this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT;
2204 break;
2207 default:
2208 MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
2209 rc = -ENOSYS;
2210 okay = 0;
2211 break;
2214 if ( unlikely(!okay) )
2216 rc = rc ? rc : -EINVAL;
2217 break;
2220 guest_handle_add_offset(uops, 1);
2223 process_deferred_ops();
2225 UNLOCK_BIGLOCK(d);
2227 out:
2228 /* Add incremental work we have done to the @done output parameter. */
2229 if ( unlikely(!guest_handle_is_null(pdone)) )
2231 done += i;
2232 copy_to_guest(pdone, &done, 1);
2235 return rc;
2238 int do_mmu_update(
2239 XEN_GUEST_HANDLE(mmu_update_t) ureqs,
2240 unsigned int count,
2241 XEN_GUEST_HANDLE(uint) pdone,
2242 unsigned int foreigndom)
2244 struct mmu_update req;
2245 void *va;
2246 unsigned long gpfn, gmfn, mfn;
2247 struct page_info *page;
2248 int rc = 0, okay = 1, i = 0;
2249 unsigned int cmd, done = 0;
2250 struct vcpu *v = current;
2251 struct domain *d = v->domain;
2252 unsigned long type_info;
2253 struct domain_mmap_cache mapcache, sh_mapcache;
2255 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
2257 count &= ~MMU_UPDATE_PREEMPTED;
2258 if ( unlikely(!guest_handle_is_null(pdone)) )
2259 (void)copy_from_guest(&done, pdone, 1);
2262 if ( unlikely(!guest_handle_okay(ureqs, count)) )
2264 rc = -EFAULT;
2265 goto out;
2268 if ( !set_foreigndom(foreigndom) )
2270 rc = -ESRCH;
2271 goto out;
2274 domain_mmap_cache_init(&mapcache);
2275 domain_mmap_cache_init(&sh_mapcache);
2277 perfc_incrc(calls_to_mmu_update);
2278 perfc_addc(num_page_updates, count);
2280 LOCK_BIGLOCK(d);
2282 for ( i = 0; i < count; i++ )
2284 if ( hypercall_preempt_check() )
2286 rc = hypercall_create_continuation(
2287 __HYPERVISOR_mmu_update, "hihi",
2288 ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
2289 break;
2292 if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) )
2294 MEM_LOG("Bad __copy_from_guest");
2295 rc = -EFAULT;
2296 break;
2299 cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
2300 okay = 0;
2302 switch ( cmd )
2304 /*
2305 * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
2306 */
2307 case MMU_NORMAL_PT_UPDATE:
2309 gmfn = req.ptr >> PAGE_SHIFT;
2310 mfn = gmfn_to_mfn(d, gmfn);
2312 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2314 MEM_LOG("Could not get page for normal update");
2315 break;
2318 va = map_domain_page_with_cache(mfn, &mapcache);
2319 va = (void *)((unsigned long)va +
2320 (unsigned long)(req.ptr & ~PAGE_MASK));
2321 page = mfn_to_page(mfn);
2323 switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
2325 case PGT_l1_page_table:
2326 case PGT_l2_page_table:
2327 case PGT_l3_page_table:
2328 case PGT_l4_page_table:
2330 if ( paging_mode_refcounts(d) )
2332 MEM_LOG("mmu update on auto-refcounted domain!");
2333 break;
2336 if ( unlikely(!get_page_type(
2337 page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) )
2338 goto not_a_pt;
2340 switch ( type_info & PGT_type_mask )
2342 case PGT_l1_page_table:
2344 l1_pgentry_t l1e = l1e_from_intpte(req.val);
2345 okay = mod_l1_entry(va, l1e, mfn);
2347 break;
2348 case PGT_l2_page_table:
2350 l2_pgentry_t l2e = l2e_from_intpte(req.val);
2351 okay = mod_l2_entry(va, l2e, mfn, type_info);
2353 break;
2354 #if CONFIG_PAGING_LEVELS >= 3
2355 case PGT_l3_page_table:
2357 l3_pgentry_t l3e = l3e_from_intpte(req.val);
2358 okay = mod_l3_entry(va, l3e, mfn);
2360 break;
2361 #endif
2362 #if CONFIG_PAGING_LEVELS >= 4
2363 case PGT_l4_page_table:
2365 l4_pgentry_t l4e = l4e_from_intpte(req.val);
2366 okay = mod_l4_entry(d, va, l4e, mfn);
2368 break;
2369 #endif
2372 put_page_type(page);
2374 break;
2376 default:
2377 not_a_pt:
2379 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
2380 break;
2382 okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
2384 put_page_type(page);
2386 break;
2389 unmap_domain_page_with_cache(va, &mapcache);
2391 put_page(page);
2392 break;
2394 case MMU_MACHPHYS_UPDATE:
2396 mfn = req.ptr >> PAGE_SHIFT;
2397 gpfn = req.val;
2399 if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) )
2401 MEM_LOG("Could not get page for mach->phys update");
2402 break;
2405 if ( unlikely(paging_mode_translate(FOREIGNDOM)) )
2407 MEM_LOG("Mach-phys update on auto-translate guest");
2408 break;
2411 set_gpfn_from_mfn(mfn, gpfn);
2412 okay = 1;
2414 mark_dirty(FOREIGNDOM, mfn);
2416 put_page(mfn_to_page(mfn));
2417 break;
2419 default:
2420 MEM_LOG("Invalid page update command %x", cmd);
2421 rc = -ENOSYS;
2422 okay = 0;
2423 break;
2426 if ( unlikely(!okay) )
2428 rc = rc ? rc : -EINVAL;
2429 break;
2432 guest_handle_add_offset(ureqs, 1);
2435 domain_mmap_cache_destroy(&mapcache);
2436 domain_mmap_cache_destroy(&sh_mapcache);
2438 process_deferred_ops();
2440 UNLOCK_BIGLOCK(d);
2442 out:
2443 /* Add incremental work we have done to the @done output parameter. */
2444 if ( unlikely(!guest_handle_is_null(pdone)) )
2446 done += i;
2447 copy_to_guest(pdone, &done, 1);
2450 return rc;
2454 static int create_grant_pte_mapping(
2455 uint64_t pte_addr, l1_pgentry_t nl1e, struct vcpu *v)
2457 int rc = GNTST_okay;
2458 void *va;
2459 unsigned long gmfn, mfn;
2460 struct page_info *page;
2461 u32 type;
2462 l1_pgentry_t ol1e;
2463 struct domain *d = v->domain;
2465 ASSERT(spin_is_locked(&d->big_lock));
2467 adjust_guest_l1e(nl1e, d);
2469 gmfn = pte_addr >> PAGE_SHIFT;
2470 mfn = gmfn_to_mfn(d, gmfn);
2472 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2474 MEM_LOG("Could not get page for normal update");
2475 return GNTST_general_error;
2478 va = map_domain_page(mfn);
2479 va = (void *)((unsigned long)va + ((unsigned long)pte_addr & ~PAGE_MASK));
2480 page = mfn_to_page(mfn);
2482 type = page->u.inuse.type_info & PGT_type_mask;
2483 if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
2485 MEM_LOG("Grant map attempted to update a non-L1 page");
2486 rc = GNTST_general_error;
2487 goto failed;
2490 ol1e = *(l1_pgentry_t *)va;
2491 if ( !UPDATE_ENTRY(l1, va, ol1e, nl1e, mfn, v) )
2493 put_page_type(page);
2494 rc = GNTST_general_error;
2495 goto failed;
2498 if ( !paging_mode_refcounts(d) )
2499 put_page_from_l1e(ol1e, d);
2501 put_page_type(page);
2503 failed:
2504 unmap_domain_page(va);
2505 put_page(page);
2507 return rc;
2510 static int destroy_grant_pte_mapping(
2511 uint64_t addr, unsigned long frame, struct domain *d)
2513 int rc = GNTST_okay;
2514 void *va;
2515 unsigned long gmfn, mfn;
2516 struct page_info *page;
2517 u32 type;
2518 l1_pgentry_t ol1e;
2520 gmfn = addr >> PAGE_SHIFT;
2521 mfn = gmfn_to_mfn(d, gmfn);
2523 if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) )
2525 MEM_LOG("Could not get page for normal update");
2526 return GNTST_general_error;
2529 va = map_domain_page(mfn);
2530 va = (void *)((unsigned long)va + ((unsigned long)addr & ~PAGE_MASK));
2531 page = mfn_to_page(mfn);
2533 type = page->u.inuse.type_info & PGT_type_mask;
2534 if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
2536 MEM_LOG("Grant map attempted to update a non-L1 page");
2537 rc = GNTST_general_error;
2538 goto failed;
2541 if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) )
2543 put_page_type(page);
2544 rc = GNTST_general_error;
2545 goto failed;
2548 /* Check that the virtual address supplied is actually mapped to frame. */
2549 if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) )
2551 MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
2552 (unsigned long)l1e_get_intpte(ol1e), addr, frame);
2553 put_page_type(page);
2554 rc = GNTST_general_error;
2555 goto failed;
2558 /* Delete pagetable entry. */
2559 if ( unlikely(!UPDATE_ENTRY(l1,
2560 (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn,
2561 d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) )
2563 MEM_LOG("Cannot delete PTE entry at %p", va);
2564 put_page_type(page);
2565 rc = GNTST_general_error;
2566 goto failed;
2569 put_page_type(page);
2571 failed:
2572 unmap_domain_page(va);
2573 put_page(page);
2574 return rc;
2578 static int create_grant_va_mapping(
2579 unsigned long va, l1_pgentry_t nl1e, struct vcpu *v)
2581 l1_pgentry_t *pl1e, ol1e;
2582 struct domain *d = v->domain;
2583 unsigned long gl1mfn;
2584 int okay;
2586 ASSERT(spin_is_locked(&d->big_lock));
2588 adjust_guest_l1e(nl1e, d);
2590 pl1e = guest_map_l1e(v, va, &gl1mfn);
2591 if ( !pl1e )
2593 MEM_LOG("Could not find L1 PTE for address %lx", va);
2594 return GNTST_general_error;
2596 ol1e = *pl1e;
2597 okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v);
2598 guest_unmap_l1e(v, pl1e);
2599 pl1e = NULL;
2601 if ( !okay )
2602 return GNTST_general_error;
2604 if ( !paging_mode_refcounts(d) )
2605 put_page_from_l1e(ol1e, d);
2607 return GNTST_okay;
2610 static int destroy_grant_va_mapping(
2611 unsigned long addr, unsigned long frame, struct vcpu *v)
2613 l1_pgentry_t *pl1e, ol1e;
2614 unsigned long gl1mfn;
2615 int rc = 0;
2617 pl1e = guest_map_l1e(v, addr, &gl1mfn);
2618 if ( !pl1e )
2620 MEM_LOG("Could not find L1 PTE for address %lx", addr);
2621 return GNTST_general_error;
2623 ol1e = *pl1e;
2625 /* Check that the virtual address supplied is actually mapped to frame. */
2626 if ( unlikely(l1e_get_pfn(ol1e) != frame) )
2628 MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
2629 l1e_get_pfn(ol1e), addr, frame);
2630 rc = GNTST_general_error;
2631 goto out;
2634 /* Delete pagetable entry. */
2635 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(), gl1mfn, v)) )
2637 MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
2638 rc = GNTST_general_error;
2639 goto out;
2642 out:
2643 guest_unmap_l1e(v, pl1e);
2644 return rc;
2647 int create_grant_host_mapping(
2648 uint64_t addr, unsigned long frame, unsigned int flags)
2650 l1_pgentry_t pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS);
2652 if ( (flags & GNTMAP_application_map) )
2653 l1e_add_flags(pte,_PAGE_USER);
2654 if ( !(flags & GNTMAP_readonly) )
2655 l1e_add_flags(pte,_PAGE_RW);
2657 if ( flags & GNTMAP_contains_pte )
2658 return create_grant_pte_mapping(addr, pte, current);
2659 return create_grant_va_mapping(addr, pte, current);
2662 int destroy_grant_host_mapping(
2663 uint64_t addr, unsigned long frame, unsigned int flags)
2665 if ( flags & GNTMAP_contains_pte )
2666 return destroy_grant_pte_mapping(addr, frame, current->domain);
2667 return destroy_grant_va_mapping(addr, frame, current);
2670 int steal_page(
2671 struct domain *d, struct page_info *page, unsigned int memflags)
2673 u32 _d, _nd, x, y;
2675 spin_lock(&d->page_alloc_lock);
2677 /*
2678 * The tricky bit: atomically release ownership while there is just one
2679 * benign reference to the page (PGC_allocated). If that reference
2680 * disappears then the deallocation routine will safely spin.
2681 */
2682 _d = pickle_domptr(d);
2683 _nd = page->u.inuse._domain;
2684 y = page->count_info;
2685 do {
2686 x = y;
2687 if (unlikely((x & (PGC_count_mask|PGC_allocated)) !=
2688 (1 | PGC_allocated)) || unlikely(_nd != _d)) {
2689 MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p,"
2690 " caf=%08x, taf=%" PRtype_info "\n",
2691 (void *) page_to_mfn(page),
2692 d, d->domain_id, unpickle_domptr(_nd), x,
2693 page->u.inuse.type_info);
2694 spin_unlock(&d->page_alloc_lock);
2695 return -1;
2697 __asm__ __volatile__(
2698 LOCK_PREFIX "cmpxchg8b %2"
2699 : "=d" (_nd), "=a" (y),
2700 "=m" (*(volatile u64 *)(&page->count_info))
2701 : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
2702 } while (unlikely(_nd != _d) || unlikely(y != x));
2704 /*
2705 * Unlink from 'd'. At least one reference remains (now anonymous), so
2706 * noone else is spinning to try to delete this page from 'd'.
2707 */
2708 if ( !(memflags & MEMF_no_refcount) )
2709 d->tot_pages--;
2710 list_del(&page->list);
2712 spin_unlock(&d->page_alloc_lock);
2714 return 0;
2717 int do_update_va_mapping(unsigned long va, u64 val64,
2718 unsigned long flags)
2720 l1_pgentry_t val = l1e_from_intpte(val64);
2721 struct vcpu *v = current;
2722 struct domain *d = v->domain;
2723 l1_pgentry_t *pl1e;
2724 unsigned long vmask, bmap_ptr, gl1mfn;
2725 cpumask_t pmask;
2726 int rc = 0;
2728 perfc_incrc(calls_to_update_va);
2730 if ( unlikely(!__addr_ok(va) && !paging_mode_external(d)) )
2731 return -EINVAL;
2733 LOCK_BIGLOCK(d);
2735 pl1e = guest_map_l1e(v, va, &gl1mfn);
2737 if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn)) )
2738 rc = -EINVAL;
2740 if ( pl1e )
2741 guest_unmap_l1e(v, pl1e);
2742 pl1e = NULL;
2744 switch ( flags & UVMF_FLUSHTYPE_MASK )
2746 case UVMF_TLB_FLUSH:
2747 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2749 case UVMF_LOCAL:
2750 local_flush_tlb();
2751 break;
2752 case UVMF_ALL:
2753 flush_tlb_mask(d->domain_dirty_cpumask);
2754 break;
2755 default:
2756 if ( unlikely(!IS_COMPAT(d) ?
2757 get_user(vmask, (unsigned long *)bmap_ptr) :
2758 get_user(vmask, (unsigned int *)bmap_ptr)) )
2759 rc = -EFAULT;
2760 pmask = vcpumask_to_pcpumask(d, vmask);
2761 flush_tlb_mask(pmask);
2762 break;
2764 break;
2766 case UVMF_INVLPG:
2767 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
2769 case UVMF_LOCAL:
2770 if ( !paging_mode_enabled(d)
2771 || (paging_invlpg(current, va) != 0) )
2772 local_flush_tlb_one(va);
2773 break;
2774 case UVMF_ALL:
2775 flush_tlb_one_mask(d->domain_dirty_cpumask, va);
2776 break;
2777 default:
2778 if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) )
2779 rc = -EFAULT;
2780 pmask = vcpumask_to_pcpumask(d, vmask);
2781 flush_tlb_one_mask(pmask, va);
2782 break;
2784 break;
2787 process_deferred_ops();
2789 UNLOCK_BIGLOCK(d);
2791 return rc;
2794 int do_update_va_mapping_otherdomain(unsigned long va, u64 val64,
2795 unsigned long flags,
2796 domid_t domid)
2798 int rc;
2800 if ( unlikely(!IS_PRIV(current->domain)) )
2801 return -EPERM;
2803 if ( !set_foreigndom(domid) )
2804 return -ESRCH;
2806 rc = do_update_va_mapping(va, val64, flags);
2808 return rc;
2813 /*************************
2814 * Descriptor Tables
2815 */
2817 void destroy_gdt(struct vcpu *v)
2819 int i;
2820 unsigned long pfn;
2822 v->arch.guest_context.gdt_ents = 0;
2823 for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ )
2825 if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 )
2826 put_page_and_type(mfn_to_page(pfn));
2827 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
2828 v->arch.guest_context.gdt_frames[i] = 0;
2833 long set_gdt(struct vcpu *v,
2834 unsigned long *frames,
2835 unsigned int entries)
2837 struct domain *d = v->domain;
2838 /* NB. There are 512 8-byte entries per GDT page. */
2839 int i, nr_pages = (entries + 511) / 512;
2840 unsigned long mfn;
2842 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2843 return -EINVAL;
2845 /* Check the pages in the new GDT. */
2846 for ( i = 0; i < nr_pages; i++ ) {
2847 mfn = frames[i] = gmfn_to_mfn(d, frames[i]);
2848 if ( !mfn_valid(mfn) ||
2849 !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) )
2850 goto fail;
2853 /* Tear down the old GDT. */
2854 destroy_gdt(v);
2856 /* Install the new GDT. */
2857 v->arch.guest_context.gdt_ents = entries;
2858 for ( i = 0; i < nr_pages; i++ )
2860 v->arch.guest_context.gdt_frames[i] = frames[i];
2861 l1e_write(&v->arch.perdomain_ptes[i],
2862 l1e_from_pfn(frames[i], __PAGE_HYPERVISOR));
2865 return 0;
2867 fail:
2868 while ( i-- > 0 )
2869 put_page_and_type(mfn_to_page(frames[i]));
2870 return -EINVAL;
2874 long do_set_gdt(XEN_GUEST_HANDLE(ulong) frame_list, unsigned int entries)
2876 int nr_pages = (entries + 511) / 512;
2877 unsigned long frames[16];
2878 long ret;
2880 /* Rechecked in set_gdt, but ensures a sane limit for copy_from_user(). */
2881 if ( entries > FIRST_RESERVED_GDT_ENTRY )
2882 return -EINVAL;
2884 if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) )
2885 return -EFAULT;
2887 LOCK_BIGLOCK(current->domain);
2889 if ( (ret = set_gdt(current, frames, entries)) == 0 )
2890 local_flush_tlb();
2892 UNLOCK_BIGLOCK(current->domain);
2894 return ret;
2898 long do_update_descriptor(u64 pa, u64 desc)
2900 struct domain *dom = current->domain;
2901 unsigned long gmfn = pa >> PAGE_SHIFT;
2902 unsigned long mfn;
2903 unsigned int offset;
2904 struct desc_struct *gdt_pent, d;
2905 struct page_info *page;
2906 long ret = -EINVAL;
2908 offset = ((unsigned int)pa & ~PAGE_MASK) / sizeof(struct desc_struct);
2910 *(u64 *)&d = desc;
2912 LOCK_BIGLOCK(dom);
2914 mfn = gmfn_to_mfn(dom, gmfn);
2915 if ( (((unsigned int)pa % sizeof(struct desc_struct)) != 0) ||
2916 !mfn_valid(mfn) ||
2917 !check_descriptor(dom, &d) )
2919 UNLOCK_BIGLOCK(dom);
2920 return -EINVAL;
2923 page = mfn_to_page(mfn);
2924 if ( unlikely(!get_page(page, dom)) )
2926 UNLOCK_BIGLOCK(dom);
2927 return -EINVAL;
2930 /* Check if the given frame is in use in an unsafe context. */
2931 switch ( page->u.inuse.type_info & PGT_type_mask )
2933 case PGT_gdt_page:
2934 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
2935 goto out;
2936 break;
2937 case PGT_ldt_page:
2938 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
2939 goto out;
2940 break;
2941 default:
2942 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
2943 goto out;
2944 break;
2947 mark_dirty(dom, mfn);
2949 /* All is good so make the update. */
2950 gdt_pent = map_domain_page(mfn);
2951 memcpy(&gdt_pent[offset], &d, 8);
2952 unmap_domain_page(gdt_pent);
2954 put_page_type(page);
2956 ret = 0; /* success */
2958 out:
2959 put_page(page);
2961 UNLOCK_BIGLOCK(dom);
2963 return ret;
2966 typedef struct e820entry e820entry_t;
2967 DEFINE_XEN_GUEST_HANDLE(e820entry_t);
2969 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
2971 switch ( op )
2973 case XENMEM_add_to_physmap:
2975 struct xen_add_to_physmap xatp;
2976 unsigned long prev_mfn, mfn = 0, gpfn;
2977 struct domain *d;
2979 if ( copy_from_guest(&xatp, arg, 1) )
2980 return -EFAULT;
2982 if ( xatp.domid == DOMID_SELF )
2983 d = rcu_lock_current_domain();
2984 else if ( !IS_PRIV(current->domain) )
2985 return -EPERM;
2986 else if ( (d = rcu_lock_domain_by_id(xatp.domid)) == NULL )
2987 return -ESRCH;
2989 switch ( xatp.space )
2991 case XENMAPSPACE_shared_info:
2992 if ( xatp.idx == 0 )
2993 mfn = virt_to_mfn(d->shared_info);
2994 break;
2995 case XENMAPSPACE_grant_table:
2996 spin_lock(&d->grant_table->lock);
2998 if ( (xatp.idx >= nr_grant_frames(d->grant_table)) &&
2999 (xatp.idx < max_nr_grant_frames) )
3000 gnttab_grow_table(d, xatp.idx + 1);
3002 if ( xatp.idx < nr_grant_frames(d->grant_table) )
3003 mfn = virt_to_mfn(d->grant_table->shared[xatp.idx]);
3005 spin_unlock(&d->grant_table->lock);
3006 break;
3007 default:
3008 break;
3011 if ( !paging_mode_translate(d) || (mfn == 0) )
3013 rcu_unlock_domain(d);
3014 return -EINVAL;
3017 LOCK_BIGLOCK(d);
3019 /* Remove previously mapped page if it was present. */
3020 prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
3021 if ( mfn_valid(prev_mfn) )
3023 if ( IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)) )
3024 /* Xen heap frames are simply unhooked from this phys slot. */
3025 guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
3026 else
3027 /* Normal domain memory is freed, to avoid leaking memory. */
3028 guest_remove_page(d, xatp.gpfn);
3031 /* Unmap from old location, if any. */
3032 gpfn = get_gpfn_from_mfn(mfn);
3033 if ( gpfn != INVALID_M2P_ENTRY )
3034 guest_physmap_remove_page(d, gpfn, mfn);
3036 /* Map at new location. */
3037 guest_physmap_add_page(d, xatp.gpfn, mfn);
3039 UNLOCK_BIGLOCK(d);
3041 rcu_unlock_domain(d);
3043 break;
3046 case XENMEM_set_memory_map:
3048 struct xen_foreign_memory_map fmap;
3049 struct domain *d;
3050 int rc;
3052 if ( copy_from_guest(&fmap, arg, 1) )
3053 return -EFAULT;
3055 if ( fmap.map.nr_entries > ARRAY_SIZE(d->arch.e820) )
3056 return -EINVAL;
3058 if ( fmap.domid == DOMID_SELF )
3059 d = rcu_lock_current_domain();
3060 else if ( !IS_PRIV(current->domain) )
3061 return -EPERM;
3062 else if ( (d = rcu_lock_domain_by_id(fmap.domid)) == NULL )
3063 return -ESRCH;
3065 rc = copy_from_guest(&d->arch.e820[0], fmap.map.buffer,
3066 fmap.map.nr_entries) ? -EFAULT : 0;
3067 d->arch.nr_e820 = fmap.map.nr_entries;
3069 rcu_unlock_domain(d);
3070 return rc;
3073 case XENMEM_memory_map:
3075 struct xen_memory_map map;
3076 struct domain *d = current->domain;
3078 /* Backwards compatibility. */
3079 if ( d->arch.nr_e820 == 0 )
3080 return -ENOSYS;
3082 if ( copy_from_guest(&map, arg, 1) )
3083 return -EFAULT;
3085 map.nr_entries = min(map.nr_entries, d->arch.nr_e820);
3086 if ( copy_to_guest(map.buffer, &d->arch.e820[0], map.nr_entries) ||
3087 copy_to_guest(arg, &map, 1) )
3088 return -EFAULT;
3090 return 0;
3093 case XENMEM_machine_memory_map:
3095 struct xen_memory_map memmap;
3096 XEN_GUEST_HANDLE(e820entry_t) buffer;
3097 int count;
3099 if ( !IS_PRIV(current->domain) )
3100 return -EINVAL;
3102 if ( copy_from_guest(&memmap, arg, 1) )
3103 return -EFAULT;
3104 if ( memmap.nr_entries < e820.nr_map + 1 )
3105 return -EINVAL;
3107 buffer = guest_handle_cast(memmap.buffer, e820entry_t);
3109 count = min((unsigned int)e820.nr_map, memmap.nr_entries);
3110 if ( copy_to_guest(buffer, &e820.map[0], count) < 0 )
3111 return -EFAULT;
3113 memmap.nr_entries = count;
3115 if ( copy_to_guest(arg, &memmap, 1) )
3116 return -EFAULT;
3118 return 0;
3121 case XENMEM_machphys_mapping:
3123 struct xen_machphys_mapping mapping = {
3124 .v_start = MACH2PHYS_VIRT_START,
3125 .v_end = MACH2PHYS_VIRT_END,
3126 .max_mfn = MACH2PHYS_NR_ENTRIES - 1
3127 };
3129 if ( copy_to_guest(arg, &mapping, 1) )
3130 return -EFAULT;
3132 return 0;
3135 default:
3136 return subarch_memory_op(op, arg);
3139 return 0;
3143 /*************************
3144 * Writable Pagetables
3145 */
3147 struct ptwr_emulate_ctxt {
3148 struct x86_emulate_ctxt ctxt;
3149 unsigned long cr2;
3150 l1_pgentry_t pte;
3151 };
3153 static int ptwr_emulated_read(
3154 enum x86_segment seg,
3155 unsigned long offset,
3156 unsigned long *val,
3157 unsigned int bytes,
3158 struct x86_emulate_ctxt *ctxt)
3160 unsigned int rc;
3161 unsigned long addr = offset;
3163 *val = 0;
3164 if ( (rc = copy_from_user((void *)val, (void *)addr, bytes)) != 0 )
3166 propagate_page_fault(addr + bytes - rc, 0); /* read fault */
3167 return X86EMUL_EXCEPTION;
3170 return X86EMUL_OKAY;
3173 static int ptwr_emulated_update(
3174 unsigned long addr,
3175 paddr_t old,
3176 paddr_t val,
3177 unsigned int bytes,
3178 unsigned int do_cmpxchg,
3179 struct ptwr_emulate_ctxt *ptwr_ctxt)
3181 unsigned long mfn;
3182 struct page_info *page;
3183 l1_pgentry_t pte, ol1e, nl1e, *pl1e;
3184 struct vcpu *v = current;
3185 struct domain *d = v->domain;
3187 /* Only allow naturally-aligned stores within the original %cr2 page. */
3188 if ( unlikely(((addr^ptwr_ctxt->cr2) & PAGE_MASK) || (addr & (bytes-1))) )
3190 MEM_LOG("Bad ptwr access (cr2=%lx, addr=%lx, bytes=%u)",
3191 ptwr_ctxt->cr2, addr, bytes);
3192 return X86EMUL_UNHANDLEABLE;
3195 /* Turn a sub-word access into a full-word access. */
3196 if ( bytes != sizeof(paddr_t) )
3198 paddr_t full;
3199 unsigned int rc, offset = addr & (sizeof(paddr_t)-1);
3201 /* Align address; read full word. */
3202 addr &= ~(sizeof(paddr_t)-1);
3203 if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 )
3205 propagate_page_fault(addr+sizeof(paddr_t)-rc, 0); /* read fault */
3206 return X86EMUL_EXCEPTION;
3208 /* Mask out bits provided by caller. */
3209 full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8));
3210 /* Shift the caller value and OR in the missing bits. */
3211 val &= (((paddr_t)1 << (bytes*8)) - 1);
3212 val <<= (offset)*8;
3213 val |= full;
3214 /* Also fill in missing parts of the cmpxchg old value. */
3215 old &= (((paddr_t)1 << (bytes*8)) - 1);
3216 old <<= (offset)*8;
3217 old |= full;
3220 pte = ptwr_ctxt->pte;
3221 mfn = l1e_get_pfn(pte);
3222 page = mfn_to_page(mfn);
3224 /* We are looking only for read-only mappings of p.t. pages. */
3225 ASSERT((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) == _PAGE_PRESENT);
3226 ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table);
3227 ASSERT((page->u.inuse.type_info & PGT_count_mask) != 0);
3228 ASSERT(page_get_owner(page) == d);
3230 /* Check the new PTE. */
3231 nl1e = l1e_from_intpte(val);
3232 if ( unlikely(!get_page_from_l1e(gl1e_to_ml1e(d, nl1e), d)) )
3234 if ( (CONFIG_PAGING_LEVELS == 3 || IS_COMPAT(d)) &&
3235 (bytes == 4) && (addr & 4) && !do_cmpxchg &&
3236 (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
3238 /*
3239 * If this is an upper-half write to a PAE PTE then we assume that
3240 * the guest has simply got the two writes the wrong way round. We
3241 * zap the PRESENT bit on the assumption that the bottom half will
3242 * be written immediately after we return to the guest.
3243 */
3244 MEM_LOG("ptwr_emulate: fixing up invalid PAE PTE %"PRIpte,
3245 l1e_get_intpte(nl1e));
3246 l1e_remove_flags(nl1e, _PAGE_PRESENT);
3248 else
3250 MEM_LOG("ptwr_emulate: could not get_page_from_l1e()");
3251 return X86EMUL_UNHANDLEABLE;
3255 adjust_guest_l1e(nl1e, d);
3257 /* Checked successfully: do the update (write or cmpxchg). */
3258 pl1e = map_domain_page(page_to_mfn(page));
3259 pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK));
3260 if ( do_cmpxchg )
3262 int okay;
3263 intpte_t t = old;
3264 ol1e = l1e_from_intpte(old);
3266 okay = paging_cmpxchg_guest_entry(v, (intpte_t *) pl1e,
3267 &t, val, _mfn(mfn));
3268 okay = (okay && t == old);
3270 if ( !okay )
3272 unmap_domain_page(pl1e);
3273 put_page_from_l1e(gl1e_to_ml1e(d, nl1e), d);
3274 return X86EMUL_CMPXCHG_FAILED;
3277 else
3279 ol1e = *pl1e;
3280 if ( !UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, page_to_mfn(page), v) )
3281 BUG();
3284 unmap_domain_page(pl1e);
3286 /* Finally, drop the old PTE. */
3287 put_page_from_l1e(gl1e_to_ml1e(d, ol1e), d);
3289 return X86EMUL_OKAY;
3292 static int ptwr_emulated_write(
3293 enum x86_segment seg,
3294 unsigned long offset,
3295 unsigned long val,
3296 unsigned int bytes,
3297 struct x86_emulate_ctxt *ctxt)
3299 return ptwr_emulated_update(
3300 offset, 0, val, bytes, 0,
3301 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3304 static int ptwr_emulated_cmpxchg(
3305 enum x86_segment seg,
3306 unsigned long offset,
3307 unsigned long old,
3308 unsigned long new,
3309 unsigned int bytes,
3310 struct x86_emulate_ctxt *ctxt)
3312 return ptwr_emulated_update(
3313 offset, old, new, bytes, 1,
3314 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3317 static int ptwr_emulated_cmpxchg8b(
3318 enum x86_segment seg,
3319 unsigned long offset,
3320 unsigned long old,
3321 unsigned long old_hi,
3322 unsigned long new,
3323 unsigned long new_hi,
3324 struct x86_emulate_ctxt *ctxt)
3326 if ( CONFIG_PAGING_LEVELS == 2 )
3327 return X86EMUL_UNHANDLEABLE;
3328 return ptwr_emulated_update(
3329 offset, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1,
3330 container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
3333 static struct x86_emulate_ops ptwr_emulate_ops = {
3334 .read = ptwr_emulated_read,
3335 .insn_fetch = ptwr_emulated_read,
3336 .write = ptwr_emulated_write,
3337 .cmpxchg = ptwr_emulated_cmpxchg,
3338 .cmpxchg8b = ptwr_emulated_cmpxchg8b
3339 };
3341 /* Write page fault handler: check if guest is trying to modify a PTE. */
3342 int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
3343 struct cpu_user_regs *regs)
3345 struct domain *d = v->domain;
3346 struct page_info *page;
3347 l1_pgentry_t pte;
3348 struct ptwr_emulate_ctxt ptwr_ctxt;
3349 int rc;
3351 LOCK_BIGLOCK(d);
3353 /*
3354 * Attempt to read the PTE that maps the VA being accessed. By checking for
3355 * PDE validity in the L2 we avoid many expensive fixups in __get_user().
3356 */
3357 guest_get_eff_l1e(v, addr, &pte);
3358 if ( !(l1e_get_flags(pte) & _PAGE_PRESENT) )
3359 goto bail;
3360 page = l1e_get_page(pte);
3362 /* We are looking only for read-only mappings of p.t. pages. */
3363 if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
3364 ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
3365 ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
3366 (page_get_owner(page) != d) )
3367 goto bail;
3369 ptwr_ctxt.ctxt.regs = regs;
3370 ptwr_ctxt.ctxt.addr_size = ptwr_ctxt.ctxt.sp_size =
3371 IS_COMPAT(d) ? 32 : BITS_PER_LONG;
3372 ptwr_ctxt.cr2 = addr;
3373 ptwr_ctxt.pte = pte;
3375 rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
3376 if ( rc == X86EMUL_UNHANDLEABLE )
3377 goto bail;
3379 UNLOCK_BIGLOCK(d);
3380 perfc_incrc(ptwr_emulations);
3381 return EXCRET_fault_fixed;
3383 bail:
3384 UNLOCK_BIGLOCK(d);
3385 return 0;
3388 int map_pages_to_xen(
3389 unsigned long virt,
3390 unsigned long mfn,
3391 unsigned long nr_mfns,
3392 unsigned long flags)
3394 l2_pgentry_t *pl2e, ol2e;
3395 l1_pgentry_t *pl1e, ol1e;
3396 unsigned int i;
3398 unsigned int map_small_pages = !!(flags & MAP_SMALL_PAGES);
3399 flags &= ~MAP_SMALL_PAGES;
3401 while ( nr_mfns != 0 )
3403 pl2e = virt_to_xen_l2e(virt);
3405 if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) &&
3406 (nr_mfns >= (1<<PAGETABLE_ORDER)) &&
3407 !map_small_pages )
3409 /* Super-page mapping. */
3410 ol2e = *pl2e;
3411 l2e_write(pl2e, l2e_from_pfn(mfn, flags|_PAGE_PSE));
3413 if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
3415 local_flush_tlb_pge();
3416 if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
3417 free_xen_pagetable(mfn_to_virt(l2e_get_pfn(ol2e)));
3420 virt += 1UL << L2_PAGETABLE_SHIFT;
3421 mfn += 1UL << PAGETABLE_ORDER;
3422 nr_mfns -= 1UL << PAGETABLE_ORDER;
3424 else
3426 /* Normal page mapping. */
3427 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
3429 pl1e = alloc_xen_pagetable();
3430 clear_page(pl1e);
3431 l2e_write(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
3432 __PAGE_HYPERVISOR));
3434 else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
3436 pl1e = alloc_xen_pagetable();
3437 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
3438 l1e_write(&pl1e[i],
3439 l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
3440 l2e_get_flags(*pl2e) & ~_PAGE_PSE));
3441 l2e_write(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
3442 __PAGE_HYPERVISOR));
3443 local_flush_tlb_pge();
3446 pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
3447 ol1e = *pl1e;
3448 l1e_write(pl1e, l1e_from_pfn(mfn, flags));
3449 if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) )
3450 local_flush_tlb_one(virt);
3452 virt += 1UL << L1_PAGETABLE_SHIFT;
3453 mfn += 1UL;
3454 nr_mfns -= 1UL;
3458 return 0;
3461 void __set_fixmap(
3462 enum fixed_addresses idx, unsigned long mfn, unsigned long flags)
3464 BUG_ON(idx >= __end_of_fixed_addresses);
3465 map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags);
3468 #ifdef MEMORY_GUARD
3470 void memguard_init(void)
3472 map_pages_to_xen(
3473 PAGE_OFFSET, 0, xenheap_phys_end >> PAGE_SHIFT,
3474 __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
3477 static void __memguard_change_range(void *p, unsigned long l, int guard)
3479 unsigned long _p = (unsigned long)p;
3480 unsigned long _l = (unsigned long)l;
3481 unsigned long flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES;
3483 /* Ensure we are dealing with a page-aligned whole number of pages. */
3484 ASSERT((_p&PAGE_MASK) != 0);
3485 ASSERT((_l&PAGE_MASK) != 0);
3486 ASSERT((_p&~PAGE_MASK) == 0);
3487 ASSERT((_l&~PAGE_MASK) == 0);
3489 if ( guard )
3490 flags &= ~_PAGE_PRESENT;
3492 map_pages_to_xen(
3493 _p, virt_to_maddr(p) >> PAGE_SHIFT, _l >> PAGE_SHIFT, flags);
3496 void memguard_guard_range(void *p, unsigned long l)
3498 __memguard_change_range(p, l, 1);
3501 void memguard_unguard_range(void *p, unsigned long l)
3503 __memguard_change_range(p, l, 0);
3506 #endif
3508 void memguard_guard_stack(void *p)
3510 BUILD_BUG_ON((DEBUG_STACK_SIZE + PAGE_SIZE) > STACK_SIZE);
3511 p = (void *)((unsigned long)p + STACK_SIZE - DEBUG_STACK_SIZE - PAGE_SIZE);
3512 memguard_guard_range(p, PAGE_SIZE);
3515 /*
3516 * Local variables:
3517 * mode: C
3518 * c-set-style: "BSD"
3519 * c-basic-offset: 4
3520 * tab-width: 4
3521 * indent-tabs-mode: nil
3522 * End:
3523 */