ia64/xen-unstable

view xen/arch/ia64/xen/mm.c @ 10376:97226bfc659c

[IA64] Pull changes from xen-unstable.hg cset 10242 into mm.c

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
author awilliam@xenbuild.aw
date Tue Jun 13 13:13:39 2006 -0600 (2006-06-13)
parents bc76ad9d6270
children d0a77e90eaa3
line source
1 /*
2 * Copyright (C) 2005 Intel Co
3 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
4 *
5 * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add VTI domain support
6 *
7 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
8 * VA Linux Systems Japan K.K.
9 * dom0 vp model support
10 */
12 #include <xen/config.h>
13 #include <asm/xentypes.h>
14 #include <asm/mm.h>
15 #include <asm/pgalloc.h>
16 #include <asm/vhpt.h>
17 #include <asm/vcpu.h>
18 #include <linux/efi.h>
20 #ifndef CONFIG_XEN_IA64_DOM0_VP
21 #define CONFIG_DOMAIN0_CONTIGUOUS
22 #else
23 static void domain_page_flush(struct domain* d, unsigned long mpaddr,
24 unsigned long old_mfn, unsigned long new_mfn);
25 #endif
27 static struct domain *dom_xen, *dom_io;
29 // followings are stolen from arch_init_memory() @ xen/arch/x86/mm.c
30 void
31 alloc_dom_xen_and_dom_io(void)
32 {
33 /*
34 * Initialise our DOMID_XEN domain.
35 * Any Xen-heap pages that we will allow to be mapped will have
36 * their domain field set to dom_xen.
37 */
38 dom_xen = alloc_domain(DOMID_XEN);
39 BUG_ON(dom_xen == NULL);
41 /*
42 * Initialise our DOMID_IO domain.
43 * This domain owns I/O pages that are within the range of the page_info
44 * array. Mappings occur at the priv of the caller.
45 */
46 dom_io = alloc_domain(DOMID_IO);
47 BUG_ON(dom_io == NULL);
48 }
50 // heavily depends on the struct page_info layout.
51 // if (page_get_owner(page) == d &&
52 // test_and_clear_bit(_PGC_allocated, &page->count_info)) {
53 // put_page(page);
54 // }
55 static void
56 try_to_clear_PGC_allocate(struct domain* d, struct page_info* page)
57 {
58 u32 _d, _nd;
59 u64 x, nx, y;
61 _d = pickle_domptr(d);
62 y = *((u64*)&page->count_info);
63 do {
64 x = y;
65 _nd = x >> 32;
66 nx = x - 1;
67 __clear_bit(_PGC_allocated, &nx);
69 if (unlikely(!(x & PGC_allocated)) || unlikely(_nd != _d)) {
70 struct domain* nd = unpickle_domptr(_nd);
71 if (nd == NULL) {
72 DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
73 "sd=%p 0x%x,"
74 " caf=%016lx, taf=%" PRtype_info "\n",
75 (void *) page_to_mfn(page),
76 d, d->domain_id, _d,
77 nd, _nd,
78 x,
79 page->u.inuse.type_info);
80 }
81 break;
82 }
84 BUG_ON((nx & PGC_count_mask) < 1);
85 y = cmpxchg((u64*)&page->count_info, x, nx);
86 } while (unlikely(y != x));
87 }
89 static void
90 relinquish_pte(struct domain* d, pte_t* pte)
91 {
92 unsigned long mfn = pte_pfn(*pte);
93 struct page_info* page;
95 // vmx domain use bit[58:56] to distinguish io region from memory.
96 // see vmx_build_physmap_table() in vmx_init.c
97 if (!pte_mem(*pte))
98 return;
100 // domain might map IO space or acpi table pages. check it.
101 if (!mfn_valid(mfn))
102 return;
103 page = mfn_to_page(mfn);
104 // struct page_info corresponding to mfn may exist or not depending
105 // on CONFIG_VIRTUAL_FRAME_TABLE.
106 // This check is too easy.
107 // The right way is to check whether this page is of io area or acpi pages
108 if (page_get_owner(page) == NULL) {
109 BUG_ON(page->count_info != 0);
110 return;
111 }
113 #ifdef CONFIG_XEN_IA64_DOM0_VP
114 if (page_get_owner(page) == d) {
115 BUG_ON(get_gpfn_from_mfn(mfn) == INVALID_M2P_ENTRY);
116 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
117 }
118 #endif
119 try_to_clear_PGC_allocate(d, page);
120 put_page(page);
121 }
123 static void
124 relinquish_pmd(struct domain* d, pmd_t* pmd, unsigned long offset)
125 {
126 unsigned long i;
127 pte_t* pte = pte_offset_map(pmd, offset);
129 for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
130 if (!pte_present(*pte))
131 continue;
133 relinquish_pte(d, pte);
134 }
135 pte_free_kernel(pte_offset_map(pmd, offset));
136 }
138 static void
139 relinquish_pud(struct domain* d, pud_t *pud, unsigned long offset)
140 {
141 unsigned long i;
142 pmd_t *pmd = pmd_offset(pud, offset);
144 for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
145 if (!pmd_present(*pmd))
146 continue;
148 relinquish_pmd(d, pmd, offset + (i << PMD_SHIFT));
149 }
150 pmd_free(pmd_offset(pud, offset));
151 }
153 static void
154 relinquish_pgd(struct domain* d, pgd_t *pgd, unsigned long offset)
155 {
156 unsigned long i;
157 pud_t *pud = pud_offset(pgd, offset);
159 for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
160 if (!pud_present(*pud))
161 continue;
163 relinquish_pud(d, pud, offset + (i << PUD_SHIFT));
164 }
165 pud_free(pud_offset(pgd, offset));
166 }
168 void
169 relinquish_mm(struct domain* d)
170 {
171 struct mm_struct* mm = &d->arch.mm;
172 unsigned long i;
173 pgd_t* pgd;
175 if (mm->pgd == NULL)
176 return;
178 pgd = pgd_offset(mm, 0);
179 for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
180 if (!pgd_present(*pgd))
181 continue;
183 relinquish_pgd(d, pgd, i << PGDIR_SHIFT);
184 }
185 pgd_free(mm->pgd);
186 mm->pgd = NULL;
187 }
189 // stolen from share_xen_page_with_guest() in xen/arch/x86/mm.c
190 void
191 share_xen_page_with_guest(struct page_info *page,
192 struct domain *d, int readonly)
193 {
194 if ( page_get_owner(page) == d )
195 return;
197 #if 1
198 if (readonly) {
199 printk("%s:%d readonly is not supported yet\n", __func__, __LINE__);
200 }
201 #endif
203 // alloc_xenheap_pages() doesn't initialize page owner.
204 //BUG_ON(page_get_owner(page) != NULL);
206 spin_lock(&d->page_alloc_lock);
208 #ifndef __ia64__
209 /* The incremented type count pins as writable or read-only. */
210 page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page);
211 page->u.inuse.type_info |= PGT_validated | 1;
212 #endif
214 page_set_owner(page, d);
215 wmb(); /* install valid domain ptr before updating refcnt. */
216 ASSERT(page->count_info == 0);
217 page->count_info |= PGC_allocated | 1;
219 if ( unlikely(d->xenheap_pages++ == 0) )
220 get_knownalive_domain(d);
221 list_add_tail(&page->list, &d->xenpage_list);
223 // grant_table_destroy() releases these pages.
224 // but it doesn't clear their m2p entry. So there might remain stale
225 // entries. such a stale entry is cleared here.
226 set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
228 spin_unlock(&d->page_alloc_lock);
229 }
231 void
232 share_xen_page_with_privileged_guests(struct page_info *page, int readonly)
233 {
234 share_xen_page_with_guest(page, dom_xen, readonly);
235 }
237 unsigned long
238 gmfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
239 {
240 unsigned long pte;
242 #ifndef CONFIG_XEN_IA64_DOM0_VP
243 if (d == dom0)
244 return(gpfn);
245 #endif
246 pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT);
247 if (!pte) {
248 panic("gmfn_to_mfn_foreign: bad gpfn. spinning...\n");
249 }
250 return ((pte & _PFN_MASK) >> PAGE_SHIFT);
251 }
253 // given a domain virtual address, pte and pagesize, extract the metaphysical
254 // address, convert the pte for a physical address for (possibly different)
255 // Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use
256 // PAGE_SIZE!)
257 u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* logps)
258 {
259 struct domain *d = current->domain;
260 ia64_itir_t itir = {.itir = itir__};
261 u64 mask, mpaddr, pteval2;
262 u64 arflags;
263 u64 arflags2;
265 pteval &= ((1UL << 53) - 1);// ignore [63:53] bits
267 // FIXME address had better be pre-validated on insert
268 mask = ~itir_mask(itir.itir);
269 mpaddr = (((pteval & ~_PAGE_ED) & _PAGE_PPN_MASK) & ~mask) |
270 (address & mask);
271 #ifdef CONFIG_XEN_IA64_DOM0_VP
272 if (itir.ps > PAGE_SHIFT) {
273 itir.ps = PAGE_SHIFT;
274 }
275 #endif
276 *logps = itir.ps;
277 #ifndef CONFIG_XEN_IA64_DOM0_VP
278 if (d == dom0) {
279 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
280 /*
281 printk("translate_domain_pte: out-of-bounds dom0 mpaddr 0x%lx! itc=%lx...\n",
282 mpaddr, ia64_get_itc());
283 */
284 }
285 }
286 else if ((mpaddr >> PAGE_SHIFT) > d->max_pages) {
287 /* Address beyond the limit. However the grant table is
288 also beyond the limit. Display a message if not in the
289 grant table. */
290 if (mpaddr >= IA64_GRANT_TABLE_PADDR
291 && mpaddr < (IA64_GRANT_TABLE_PADDR
292 + (ORDER_GRANT_FRAMES << PAGE_SHIFT)))
293 printf("translate_domain_pte: bad mpa=0x%lx (> 0x%lx),"
294 "vadr=0x%lx,pteval=0x%lx,itir=0x%lx\n",
295 mpaddr, (unsigned long)d->max_pages<<PAGE_SHIFT,
296 address, pteval, itir.itir);
297 }
298 #endif
299 pteval2 = lookup_domain_mpa(d,mpaddr);
300 arflags = pteval & _PAGE_AR_MASK;
301 arflags2 = pteval2 & _PAGE_AR_MASK;
302 if (arflags != _PAGE_AR_R && arflags2 == _PAGE_AR_R) {
303 #if 0
304 DPRINTK("%s:%d "
305 "pteval 0x%lx arflag 0x%lx address 0x%lx itir 0x%lx "
306 "pteval2 0x%lx arflags2 0x%lx mpaddr 0x%lx\n",
307 __func__, __LINE__,
308 pteval, arflags, address, itir__,
309 pteval2, arflags2, mpaddr);
310 #endif
311 pteval = (pteval & ~_PAGE_AR_MASK) | _PAGE_AR_R;
312 }
314 pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits
315 pteval2 |= (pteval & _PAGE_ED);
316 pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected)
317 pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2;
318 return pteval2;
319 }
321 // given a current domain metaphysical address, return the physical address
322 unsigned long translate_domain_mpaddr(unsigned long mpaddr)
323 {
324 unsigned long pteval;
326 #ifndef CONFIG_XEN_IA64_DOM0_VP
327 if (current->domain == dom0) {
328 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
329 printk("translate_domain_mpaddr: out-of-bounds dom0 mpaddr 0x%lx! continuing...\n",
330 mpaddr);
331 }
332 }
333 #endif
334 pteval = lookup_domain_mpa(current->domain,mpaddr);
335 return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK));
336 }
338 //XXX !xxx_present() should be used instread of !xxx_none()?
339 // pud, pmd, pte page is zero cleared when they are allocated.
340 // Their area must be visible before population so that
341 // cmpxchg must have release semantics.
342 static pte_t*
343 lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
344 {
345 struct mm_struct *mm = &d->arch.mm;
346 pgd_t *pgd;
347 pud_t *pud;
348 pmd_t *pmd;
350 BUG_ON(mm->pgd == NULL);
352 pgd = pgd_offset(mm, mpaddr);
353 again_pgd:
354 if (unlikely(pgd_none(*pgd))) {
355 pud_t *old_pud = NULL;
356 pud = pud_alloc_one(mm, mpaddr);
357 if (unlikely(!pgd_cmpxchg_rel(mm, pgd, old_pud, pud))) {
358 pud_free(pud);
359 goto again_pgd;
360 }
361 }
363 pud = pud_offset(pgd, mpaddr);
364 again_pud:
365 if (unlikely(pud_none(*pud))) {
366 pmd_t* old_pmd = NULL;
367 pmd = pmd_alloc_one(mm, mpaddr);
368 if (unlikely(!pud_cmpxchg_rel(mm, pud, old_pmd, pmd))) {
369 pmd_free(pmd);
370 goto again_pud;
371 }
372 }
374 pmd = pmd_offset(pud, mpaddr);
375 again_pmd:
376 if (unlikely(pmd_none(*pmd))) {
377 pte_t* old_pte = NULL;
378 pte_t* pte = pte_alloc_one_kernel(mm, mpaddr);
379 if (unlikely(!pmd_cmpxchg_kernel_rel(mm, pmd, old_pte, pte))) {
380 pte_free_kernel(pte);
381 goto again_pmd;
382 }
383 }
385 return pte_offset_map(pmd, mpaddr);
386 }
388 //XXX xxx_none() should be used instread of !xxx_present()?
389 static pte_t*
390 lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr)
391 {
392 struct mm_struct *mm = &d->arch.mm;
393 pgd_t *pgd;
394 pud_t *pud;
395 pmd_t *pmd;
397 BUG_ON(mm->pgd == NULL);
398 pgd = pgd_offset(mm, mpaddr);
399 if (unlikely(!pgd_present(*pgd)))
400 return NULL;
402 pud = pud_offset(pgd, mpaddr);
403 if (unlikely(!pud_present(*pud)))
404 return NULL;
406 pmd = pmd_offset(pud, mpaddr);
407 if (unlikely(!pmd_present(*pmd)))
408 return NULL;
410 return pte_offset_map(pmd, mpaddr);
411 }
413 #ifdef CONFIG_XEN_IA64_DOM0_VP
414 static pte_t*
415 lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr)
416 {
417 struct mm_struct *mm = &d->arch.mm;
418 pgd_t *pgd;
419 pud_t *pud;
420 pmd_t *pmd;
422 BUG_ON(mm->pgd == NULL);
423 pgd = pgd_offset(mm, mpaddr);
424 if (unlikely(pgd_none(*pgd)))
425 return NULL;
427 pud = pud_offset(pgd, mpaddr);
428 if (unlikely(pud_none(*pud)))
429 return NULL;
431 pmd = pmd_offset(pud, mpaddr);
432 if (unlikely(pmd_none(*pmd)))
433 return NULL;
435 return pte_offset_map(pmd, mpaddr);
436 }
438 unsigned long
439 ____lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
440 {
441 pte_t *pte;
443 pte = lookup_noalloc_domain_pte(d, mpaddr);
444 if (pte == NULL)
445 return INVALID_MFN;
447 if (pte_present(*pte))
448 return (pte->pte & _PFN_MASK);
449 else if (VMX_DOMAIN(d->vcpu[0]))
450 return GPFN_INV_MASK;
451 return INVALID_MFN;
452 }
454 unsigned long
455 __lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
456 {
457 unsigned long machine = ____lookup_domain_mpa(d, mpaddr);
458 if (machine != INVALID_MFN)
459 return machine;
461 printk("%s: d 0x%p id %d current 0x%p id %d\n",
462 __func__, d, d->domain_id, current, current->vcpu_id);
463 printk("%s: bad mpa 0x%lx (max_pages 0x%lx)\n",
464 __func__, mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
465 return INVALID_MFN;
466 }
467 #endif
469 unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
470 {
471 pte_t *pte;
473 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
474 if (d == dom0) {
475 pte_t pteval;
476 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
477 //printk("lookup_domain_mpa: bad dom0 mpaddr 0x%lx!\n",mpaddr);
478 //printk("lookup_domain_mpa: start=0x%lx,end=0x%lx!\n",dom0_start,dom0_start+dom0_size);
479 }
480 pteval = pfn_pte(mpaddr >> PAGE_SHIFT,
481 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
482 return pte_val(pteval);
483 }
484 #endif
485 pte = lookup_noalloc_domain_pte(d, mpaddr);
486 if (pte != NULL) {
487 if (pte_present(*pte)) {
488 //printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte));
489 return pte_val(*pte);
490 } else if (VMX_DOMAIN(d->vcpu[0]))
491 return GPFN_INV_MASK;
492 }
494 printk("%s: d 0x%p id %d current 0x%p id %d\n",
495 __func__, d, d->domain_id, current, current->vcpu_id);
496 if ((mpaddr >> PAGE_SHIFT) < d->max_pages)
497 printk("%s: non-allocated mpa 0x%lx (< 0x%lx)\n", __func__,
498 mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
499 else
500 printk("%s: bad mpa 0x%lx (=> 0x%lx)\n", __func__,
501 mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
503 //XXX This is a work around until the emulation memory access to a region
504 // where memory or device are attached is implemented.
505 return pte_val(pfn_pte(0, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
506 }
508 // FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
509 #if 1
510 void *domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
511 {
512 unsigned long pte = lookup_domain_mpa(d,mpaddr);
513 unsigned long imva;
515 pte &= _PAGE_PPN_MASK;
516 imva = (unsigned long) __va(pte);
517 imva |= mpaddr & ~PAGE_MASK;
518 return (void*)imva;
519 }
520 #else
521 void *domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
522 {
523 unsigned long imva = __gpa_to_mpa(d, mpaddr);
525 return (void *)__va(imva);
526 }
527 #endif
529 /* Allocate a new page for domain and map it to the specified metaphysical
530 address. */
531 struct page_info *
532 __assign_new_domain_page(struct domain *d, unsigned long mpaddr, pte_t* pte)
533 {
534 struct page_info *p = NULL;
535 unsigned long maddr;
536 int ret;
538 BUG_ON(!pte_none(*pte));
540 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
541 if (d == dom0) {
542 #if 0
543 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
544 /* FIXME: is it true ?
545 dom0 memory is not contiguous! */
546 panic("assign_new_domain_page: bad domain0 "
547 "mpaddr=%lx, start=%lx, end=%lx!\n",
548 mpaddr, dom0_start, dom0_start+dom0_size);
549 }
550 #endif
551 p = mfn_to_page((mpaddr >> PAGE_SHIFT));
552 return p;
553 }
554 #endif
556 p = alloc_domheap_page(d);
557 if (unlikely(!p)) {
558 printf("assign_new_domain_page: Can't alloc!!!! Aaaargh!\n");
559 return(p);
560 }
562 // zero out pages for security reasons
563 clear_page(page_to_virt(p));
564 maddr = page_to_maddr (p);
565 if (unlikely(maddr > __get_cpu_var(vhpt_paddr)
566 && maddr < __get_cpu_var(vhpt_pend))) {
567 /* FIXME: how can this happen ?
568 vhpt is allocated by alloc_domheap_page. */
569 printf("assign_new_domain_page: reassigned vhpt page %lx!!\n",
570 maddr);
571 }
573 ret = get_page(p, d);
574 BUG_ON(ret == 0);
575 set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
576 // clear_page() and set_gpfn_from_mfn() become visible before set_pte_rel()
577 // because set_pte_rel() has release semantics
578 set_pte_rel(pte,
579 pfn_pte(maddr >> PAGE_SHIFT,
580 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
582 smp_mb();
583 return p;
584 }
586 struct page_info *
587 assign_new_domain_page(struct domain *d, unsigned long mpaddr)
588 {
589 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
590 pte_t dummy_pte = __pte(0);
591 return __assign_new_domain_page(d, mpaddr, &dummy_pte);
592 #else
593 struct page_info *p = NULL;
594 pte_t *pte;
596 pte = lookup_alloc_domain_pte(d, mpaddr);
597 if (pte_none(*pte)) {
598 p = __assign_new_domain_page(d, mpaddr, pte);
599 } else {
600 DPRINTK("%s: d 0x%p mpaddr %lx already mapped!\n",
601 __func__, d, mpaddr);
602 }
604 return p;
605 #endif
606 }
608 void
609 assign_new_domain0_page(struct domain *d, unsigned long mpaddr)
610 {
611 #ifndef CONFIG_DOMAIN0_CONTIGUOUS
612 pte_t *pte;
614 BUG_ON(d != dom0);
615 pte = lookup_alloc_domain_pte(d, mpaddr);
616 if (pte_none(*pte)) {
617 struct page_info *p = __assign_new_domain_page(d, mpaddr, pte);
618 if (p == NULL) {
619 panic("%s: can't allocate page for dom0", __func__);
620 }
621 }
622 #endif
623 }
625 /* map a physical address to the specified metaphysical addr */
626 // flags: currently only ASSIGN_readonly
627 void
628 __assign_domain_page(struct domain *d,
629 unsigned long mpaddr, unsigned long physaddr,
630 unsigned long flags)
631 {
632 pte_t *pte;
633 unsigned long arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX;
635 pte = lookup_alloc_domain_pte(d, mpaddr);
636 if (pte_none(*pte)) {
637 set_pte_rel(pte,
638 pfn_pte(physaddr >> PAGE_SHIFT,
639 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags)));
640 smp_mb();
641 } else
642 printk("%s: mpaddr %lx already mapped!\n", __func__, mpaddr);
643 }
645 /* get_page() and map a physical address to the specified metaphysical addr */
646 void
647 assign_domain_page(struct domain *d,
648 unsigned long mpaddr, unsigned long physaddr)
649 {
650 struct page_info* page = mfn_to_page(physaddr >> PAGE_SHIFT);
651 int ret;
653 BUG_ON((physaddr & GPFN_IO_MASK) != GPFN_MEM);
654 ret = get_page(page, d);
655 BUG_ON(ret == 0);
656 set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
657 // because __assign_domain_page() uses set_pte_rel() which has
658 // release semantics, smp_mb() isn't needed.
659 __assign_domain_page(d, mpaddr, physaddr, ASSIGN_writable);
660 }
662 #ifdef CONFIG_XEN_IA64_DOM0_VP
663 static void
664 assign_domain_same_page(struct domain *d,
665 unsigned long mpaddr, unsigned long size,
666 unsigned long flags)
667 {
668 //XXX optimization
669 unsigned long end = PAGE_ALIGN(mpaddr + size);
670 for (mpaddr &= PAGE_MASK; mpaddr < end; mpaddr += PAGE_SIZE) {
671 __assign_domain_page(d, mpaddr, mpaddr, flags);
672 }
673 }
675 int
676 efi_mmio(unsigned long physaddr, unsigned long size)
677 {
678 void *efi_map_start, *efi_map_end;
679 u64 efi_desc_size;
680 void* p;
682 efi_map_start = __va(ia64_boot_param->efi_memmap);
683 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
684 efi_desc_size = ia64_boot_param->efi_memdesc_size;
686 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
687 efi_memory_desc_t* md = (efi_memory_desc_t *)p;
688 unsigned long start = md->phys_addr;
689 unsigned long end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
691 if (start <= physaddr && physaddr < end) {
692 if ((physaddr + size) > end) {
693 DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n",
694 __func__, __LINE__, physaddr, size);
695 return 0;
696 }
698 // for io space
699 if (md->type == EFI_MEMORY_MAPPED_IO ||
700 md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
701 return 1;
702 }
704 // for runtime
705 // see efi_enter_virtual_mode(void)
706 // in linux/arch/ia64/kernel/efi.c
707 if ((md->attribute & EFI_MEMORY_RUNTIME) &&
708 !(md->attribute & EFI_MEMORY_WB)) {
709 return 1;
710 }
712 DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n",
713 __func__, __LINE__, physaddr, size);
714 return 0;
715 }
717 if (physaddr < start) {
718 break;
719 }
720 }
722 return 1;
723 }
725 unsigned long
726 assign_domain_mmio_page(struct domain *d,
727 unsigned long mpaddr, unsigned long size)
728 {
729 if (size == 0) {
730 DPRINTK("%s: domain %p mpaddr 0x%lx size = 0x%lx\n",
731 __func__, d, mpaddr, size);
732 }
733 if (!efi_mmio(mpaddr, size)) {
734 DPRINTK("%s:%d domain %p mpaddr 0x%lx size = 0x%lx\n",
735 __func__, __LINE__, d, mpaddr, size);
736 return -EINVAL;
737 }
738 assign_domain_same_page(d, mpaddr, size, ASSIGN_writable);
739 return mpaddr;
740 }
742 unsigned long
743 assign_domain_mach_page(struct domain *d,
744 unsigned long mpaddr, unsigned long size,
745 unsigned long flags)
746 {
747 assign_domain_same_page(d, mpaddr, size, flags);
748 return mpaddr;
749 }
751 // caller must get_page(mfn_to_page(mfn)) before call.
752 // caller must call set_gpfn_from_mfn() before call if necessary.
753 // because set_gpfn_from_mfn() result must be visible before pte xchg
754 // caller must use memory barrier. NOTE: xchg has acquire semantics.
755 // flags: currently only ASSIGN_readonly
756 static void
757 assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
758 unsigned long mfn, unsigned long flags)
759 {
760 struct mm_struct *mm = &d->arch.mm;
761 pte_t* pte;
762 pte_t old_pte;
763 pte_t npte;
764 unsigned long arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX;
765 pte = lookup_alloc_domain_pte(d, mpaddr);
767 // update pte
768 npte = pfn_pte(mfn, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags));
769 old_pte = ptep_xchg(mm, mpaddr, pte, npte);
770 if (pte_mem(old_pte)) {
771 unsigned long old_mfn = pte_pfn(old_pte);
773 // mfn = old_mfn case can happen when domain maps a granted page
774 // twice with the same pseudo physial address.
775 // It's non sense, but allowed.
776 // __gnttab_map_grant_ref()
777 // => create_host_mapping()
778 // => assign_domain_page_replace()
779 if (mfn != old_mfn) {
780 struct page_info* old_page = mfn_to_page(old_mfn);
782 if (page_get_owner(old_page) == d) {
783 BUG_ON(get_gpfn_from_mfn(old_mfn) != (mpaddr >> PAGE_SHIFT));
784 set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
785 }
787 domain_page_flush(d, mpaddr, old_mfn, mfn);
789 try_to_clear_PGC_allocate(d, old_page);
790 put_page(old_page);
791 }
792 }
793 }
795 // caller must get_page(new_page) before
796 // Only steal_page_for_grant_transfer() calls this function.
797 static int
798 assign_domain_page_cmpxchg_rel(struct domain* d, unsigned long mpaddr,
799 struct page_info* old_page,
800 struct page_info* new_page,
801 unsigned long flags)
802 {
803 struct mm_struct *mm = &d->arch.mm;
804 pte_t* pte;
805 unsigned long old_mfn;
806 unsigned long old_arflags;
807 pte_t old_pte;
808 unsigned long new_mfn;
809 unsigned long new_arflags;
810 pte_t new_pte;
811 pte_t ret_pte;
813 pte = lookup_alloc_domain_pte(d, mpaddr);
815 again:
816 old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;//XXX
817 old_mfn = page_to_mfn(old_page);
818 old_pte = pfn_pte(old_mfn, __pgprot(old_arflags));
820 new_arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX;
821 new_mfn = page_to_mfn(new_page);
822 new_pte = pfn_pte(new_mfn,
823 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | new_arflags));
825 // update pte
826 ret_pte = ptep_cmpxchg_rel(mm, mpaddr, pte, old_pte, new_pte);
827 if (unlikely(pte_val(old_pte) != pte_val(ret_pte))) {
828 if (pte_pfn(old_pte) == pte_pfn(ret_pte)) {
829 goto again;
830 }
832 DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx "
833 "ret_pte 0x%lx ret_mfn 0x%lx\n",
834 __func__,
835 pte_val(old_pte), old_arflags, old_mfn,
836 pte_val(ret_pte), pte_pfn(ret_pte));
837 return -EINVAL;
838 }
840 BUG_ON(!pte_mem(old_pte));
841 BUG_ON(page_get_owner(old_page) != d);
842 BUG_ON(get_gpfn_from_mfn(old_mfn) != (mpaddr >> PAGE_SHIFT));
843 BUG_ON(old_mfn == new_mfn);
845 set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
847 domain_page_flush(d, mpaddr, old_mfn, new_mfn);
848 put_page(old_page);
849 return 0;
850 }
852 static void
853 zap_domain_page_one(struct domain *d, unsigned long mpaddr)
854 {
855 struct mm_struct *mm = &d->arch.mm;
856 pte_t *pte;
857 pte_t old_pte;
858 unsigned long mfn;
859 struct page_info *page;
861 pte = lookup_noalloc_domain_pte_none(d, mpaddr);
862 if (pte == NULL)
863 return;
864 if (pte_none(*pte))
865 return;
867 // update pte
868 old_pte = ptep_get_and_clear(mm, mpaddr, pte);
869 mfn = pte_pfn(old_pte);
870 page = mfn_to_page(mfn);
871 BUG_ON((page->count_info & PGC_count_mask) == 0);
873 if (page_get_owner(page) == d) {
874 BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT));
875 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
876 }
878 domain_page_flush(d, mpaddr, mfn, INVALID_MFN);
880 try_to_clear_PGC_allocate(d, page);
881 put_page(page);
882 }
884 unsigned long
885 dom0vp_zap_physmap(struct domain *d, unsigned long gpfn,
886 unsigned int extent_order)
887 {
888 if (extent_order != 0) {
889 //XXX
890 return -ENOSYS;
891 }
893 zap_domain_page_one(d, gpfn << PAGE_SHIFT);
894 return 0;
895 }
897 unsigned long
898 dom0vp_add_physmap(struct domain* d, unsigned long gpfn, unsigned long mfn,
899 unsigned long flags, domid_t domid)
900 {
901 int error = 0;
902 struct domain* rd;
904 rd = find_domain_by_id(domid);
905 if (unlikely(rd == NULL)) {
906 switch (domid) {
907 case DOMID_XEN:
908 rd = dom_xen;
909 break;
910 case DOMID_IO:
911 rd = dom_io;
912 break;
913 default:
914 DPRINTK("d 0x%p domid %d "
915 "pgfn 0x%lx mfn 0x%lx flags 0x%lx domid %d\n",
916 d, d->domain_id, gpfn, mfn, flags, domid);
917 return -ESRCH;
918 }
919 BUG_ON(rd == NULL);
920 get_knownalive_domain(rd);
921 }
923 if (unlikely(rd == d)) {
924 error = -EINVAL;
925 goto out1;
926 }
927 BUG_ON(!mfn_valid(mfn));
928 if (unlikely(get_page(mfn_to_page(mfn), rd) == 0)) {
929 error = -EINVAL;
930 goto out1;
931 }
932 BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
933 get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
934 assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, flags);
935 //don't update p2m table because this page belongs to rd, not d.
936 out1:
937 put_domain(rd);
938 return error;
939 }
941 // grant table host mapping
942 // mpaddr: host_addr: pseudo physical address
943 // mfn: frame: machine page frame
944 // flags: GNTMAP_readonly | GNTMAP_application_map | GNTMAP_contains_pte
945 int
946 create_grant_host_mapping(unsigned long gpaddr,
947 unsigned long mfn, unsigned int flags)
948 {
949 struct domain* d = current->domain;
950 struct page_info* page;
951 int ret;
953 if (flags & (GNTMAP_device_map |
954 GNTMAP_application_map | GNTMAP_contains_pte)) {
955 DPRINTK("%s: flags 0x%x\n", __func__, flags);
956 return GNTST_general_error;
957 }
959 BUG_ON(!mfn_valid(mfn));
960 page = mfn_to_page(mfn);
961 ret = get_page(page, page_get_owner(page));
962 BUG_ON(ret == 0);
963 BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
964 get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
965 assign_domain_page_replace(d, gpaddr, mfn, (flags & GNTMAP_readonly)?
966 ASSIGN_readonly: ASSIGN_writable);
967 return GNTST_okay;
968 }
970 // grant table host unmapping
971 int
972 destroy_grant_host_mapping(unsigned long gpaddr,
973 unsigned long mfn, unsigned int flags)
974 {
975 struct domain* d = current->domain;
976 pte_t* pte;
977 pte_t old_pte;
978 unsigned long old_mfn = INVALID_MFN;
979 struct page_info* old_page;
981 if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
982 DPRINTK("%s: flags 0x%x\n", __func__, flags);
983 return GNTST_general_error;
984 }
986 pte = lookup_noalloc_domain_pte(d, gpaddr);
987 if (pte == NULL || !pte_present(*pte) || pte_pfn(*pte) != mfn)
988 return GNTST_general_error;
990 // update pte
991 old_pte = ptep_get_and_clear(&d->arch.mm, gpaddr, pte);
992 if (pte_present(old_pte)) {
993 old_mfn = pte_pfn(old_pte);
994 } else {
995 return GNTST_general_error;
996 }
997 domain_page_flush(d, gpaddr, old_mfn, INVALID_MFN);
999 old_page = mfn_to_page(old_mfn);
1000 BUG_ON(page_get_owner(old_page) == d);//try_to_clear_PGC_allocate(d, page) is not needed.
1001 put_page(old_page);
1003 return GNTST_okay;
1006 // heavily depends on the struct page layout.
1007 int
1008 steal_page_for_grant_transfer(struct domain *d, struct page_info *page)
1010 #if 0 /* if big endian */
1011 # error "implement big endian version of steal_page_for_grant_transfer()"
1012 #endif
1013 u32 _d, _nd;
1014 u64 x, nx, y;
1015 unsigned long gpfn;
1016 struct page_info *new;
1017 unsigned long new_mfn;
1018 int ret;
1019 new = alloc_domheap_page(d);
1020 if (new == NULL) {
1021 DPRINTK("alloc_domheap_page() failed\n");
1022 return -1;
1024 // zero out pages for security reasons
1025 clear_page(page_to_virt(new));
1026 // assign_domain_page_cmpxchg_rel() has release semantics
1027 // so smp_mb() isn't needed.
1029 ret = get_page(new, d);
1030 BUG_ON(ret == 0);
1032 gpfn = get_gpfn_from_mfn(page_to_mfn(page));
1033 if (gpfn == INVALID_M2P_ENTRY) {
1034 free_domheap_page(new);
1035 return -1;
1037 new_mfn = page_to_mfn(new);
1038 set_gpfn_from_mfn(new_mfn, gpfn);
1039 // smp_mb() isn't needed because assign_domain_pge_cmpxchg_rel()
1040 // has release semantics.
1042 ret = assign_domain_page_cmpxchg_rel(d, gpfn << PAGE_SHIFT, page, new,
1043 ASSIGN_writable);
1044 if (ret < 0) {
1045 DPRINTK("assign_domain_page_cmpxchg_rel failed %d\n", ret);
1046 set_gpfn_from_mfn(new_mfn, INVALID_M2P_ENTRY);
1047 free_domheap_page(new);
1048 return -1;
1051 spin_lock(&d->page_alloc_lock);
1053 /*
1054 * The tricky bit: atomically release ownership while there is just one
1055 * benign reference to the page (PGC_allocated). If that reference
1056 * disappears then the deallocation routine will safely spin.
1057 */
1058 _d = pickle_domptr(d);
1059 y = *((u64*)&page->count_info);
1060 do {
1061 x = y;
1062 nx = x & 0xffffffff;
1063 // page->count_info: untouched
1064 // page->u.inused._domain = 0;
1065 _nd = x >> 32;
1067 if (unlikely((x & (PGC_count_mask | PGC_allocated)) !=
1068 (1 | PGC_allocated)) ||
1069 unlikely(_nd != _d)) {
1070 struct domain* nd = unpickle_domptr(_nd);
1071 if (nd == NULL) {
1072 DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
1073 "sd=%p 0x%x,"
1074 " caf=%016lx, taf=%" PRtype_info "\n",
1075 (void *) page_to_mfn(page),
1076 d, d->domain_id, _d,
1077 nd, _nd,
1078 x,
1079 page->u.inuse.type_info);
1080 } else {
1081 DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
1082 "sd=%p(%u) 0x%x,"
1083 " caf=%016lx, taf=%" PRtype_info "\n",
1084 (void *) page_to_mfn(page),
1085 d, d->domain_id, _d,
1086 nd, nd->domain_id, _nd,
1087 x,
1088 page->u.inuse.type_info);
1090 spin_unlock(&d->page_alloc_lock);
1091 return -1;
1094 y = cmpxchg((u64*)&page->count_info, x, nx);
1095 } while (unlikely(y != x));
1097 /*
1098 * Unlink from 'd'. At least one reference remains (now anonymous), so
1099 * noone else is spinning to try to delete this page from 'd'.
1100 */
1101 d->tot_pages--;
1102 list_del(&page->list);
1104 spin_unlock(&d->page_alloc_lock);
1105 return 0;
1108 void
1109 guest_physmap_add_page(struct domain *d, unsigned long gpfn,
1110 unsigned long mfn)
1112 int ret;
1114 BUG_ON(!mfn_valid(mfn));
1115 ret = get_page(mfn_to_page(mfn), d);
1116 BUG_ON(ret == 0);
1117 BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
1118 get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
1119 set_gpfn_from_mfn(mfn, gpfn);
1120 smp_mb();
1121 assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, ASSIGN_writable);
1123 //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> PAGE_SHIFT));
1126 void
1127 guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
1128 unsigned long mfn)
1130 BUG_ON(mfn == 0);//XXX
1131 zap_domain_page_one(d, gpfn << PAGE_SHIFT);
1134 //XXX sledgehammer.
1135 // flush finer range.
1136 void
1137 domain_page_flush(struct domain* d, unsigned long mpaddr,
1138 unsigned long old_mfn, unsigned long new_mfn)
1140 domain_flush_vtlb_all();
1143 int
1144 domain_page_mapped(struct domain* d, unsigned long mpaddr)
1146 pte_t * pte;
1148 pte = lookup_noalloc_domain_pte(d, mpaddr);
1149 if(pte != NULL && !pte_none(*pte))
1150 return 1;
1151 return 0;
1153 #endif
1155 /* Flush cache of domain d. */
1156 void domain_cache_flush (struct domain *d, int sync_only)
1158 struct mm_struct *mm = &d->arch.mm;
1159 pgd_t *pgd = mm->pgd;
1160 unsigned long maddr;
1161 int i,j,k, l;
1162 int nbr_page = 0;
1163 void (*flush_func)(unsigned long start, unsigned long end);
1164 extern void flush_dcache_range (unsigned long, unsigned long);
1166 if (sync_only)
1167 flush_func = &flush_icache_range;
1168 else
1169 flush_func = &flush_dcache_range;
1171 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
1172 if (d == dom0) {
1173 /* This is not fully correct (because of hole), but it should
1174 be enough for now. */
1175 (*flush_func)(__va_ul (dom0_start),
1176 __va_ul (dom0_start + dom0_size));
1177 return;
1179 #endif
1180 for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
1181 pud_t *pud;
1182 if (!pgd_present(*pgd))
1183 continue;
1184 pud = pud_offset(pgd, 0);
1185 for (j = 0; j < PTRS_PER_PUD; pud++, j++) {
1186 pmd_t *pmd;
1187 if (!pud_present(*pud))
1188 continue;
1189 pmd = pmd_offset(pud, 0);
1190 for (k = 0; k < PTRS_PER_PMD; pmd++, k++) {
1191 pte_t *pte;
1192 if (!pmd_present(*pmd))
1193 continue;
1194 pte = pte_offset_map(pmd, 0);
1195 for (l = 0; l < PTRS_PER_PTE; pte++, l++) {
1196 if (!pte_present(*pte))
1197 continue;
1198 /* Convert PTE to maddr. */
1199 maddr = __va_ul (pte_val(*pte)
1200 & _PAGE_PPN_MASK);
1201 (*flush_func)(maddr, maddr+ PAGE_SIZE);
1202 nbr_page++;
1207 //printf ("domain_cache_flush: %d %d pages\n", d->domain_id, nbr_page);
1210 #ifdef VERBOSE
1211 #define MEM_LOG(_f, _a...) \
1212 printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
1213 current->domain->domain_id , __LINE__ , ## _a )
1214 #else
1215 #define MEM_LOG(_f, _a...) ((void)0)
1216 #endif
1218 static void free_page_type(struct page_info *page, u32 type)
1222 static int alloc_page_type(struct page_info *page, u32 type)
1224 return 1;
1227 unsigned long __get_free_pages(unsigned int mask, unsigned int order)
1229 void *p = alloc_xenheap_pages(order);
1231 memset(p,0,PAGE_SIZE<<order);
1232 return (unsigned long)p;
1235 void __free_pages(struct page_info *page, unsigned int order)
1237 if (order) BUG();
1238 free_xenheap_page(page);
1241 void *pgtable_quicklist_alloc(void)
1243 void *p;
1244 p = alloc_xenheap_pages(0);
1245 if (p)
1246 clear_page(p);
1247 return p;
1250 void pgtable_quicklist_free(void *pgtable_entry)
1252 free_xenheap_page(pgtable_entry);
1255 void cleanup_writable_pagetable(struct domain *d)
1257 return;
1260 void put_page_type(struct page_info *page)
1262 u32 nx, x, y = page->u.inuse.type_info;
1264 again:
1265 do {
1266 x = y;
1267 nx = x - 1;
1269 ASSERT((x & PGT_count_mask) != 0);
1271 /*
1272 * The page should always be validated while a reference is held. The
1273 * exception is during domain destruction, when we forcibly invalidate
1274 * page-table pages if we detect a referential loop.
1275 * See domain.c:relinquish_list().
1276 */
1277 ASSERT((x & PGT_validated) ||
1278 test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
1280 if ( unlikely((nx & PGT_count_mask) == 0) )
1282 /* Record TLB information for flush later. Races are harmless. */
1283 page->tlbflush_timestamp = tlbflush_current_time();
1285 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
1286 likely(nx & PGT_validated) )
1288 /*
1289 * Page-table pages must be unvalidated when count is zero. The
1290 * 'free' is safe because the refcnt is non-zero and validated
1291 * bit is clear => other ops will spin or fail.
1292 */
1293 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
1294 x & ~PGT_validated)) != x) )
1295 goto again;
1296 /* We cleared the 'valid bit' so we do the clean up. */
1297 free_page_type(page, x);
1298 /* Carry on, but with the 'valid bit' now clear. */
1299 x &= ~PGT_validated;
1300 nx &= ~PGT_validated;
1303 else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) ==
1304 (PGT_pinned | 1)) &&
1305 ((nx & PGT_type_mask) != PGT_writable_page)) )
1307 /* Page is now only pinned. Make the back pointer mutable again. */
1308 nx |= PGT_va_mutable;
1311 while ( unlikely((y = cmpxchg_rel(&page->u.inuse.type_info, x, nx)) != x) );
1315 int get_page_type(struct page_info *page, u32 type)
1317 u32 nx, x, y = page->u.inuse.type_info;
1319 again:
1320 do {
1321 x = y;
1322 nx = x + 1;
1323 if ( unlikely((nx & PGT_count_mask) == 0) )
1325 MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
1326 return 0;
1328 else if ( unlikely((x & PGT_count_mask) == 0) )
1330 if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
1332 if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
1334 /*
1335 * On type change we check to flush stale TLB
1336 * entries. This may be unnecessary (e.g., page
1337 * was GDT/LDT) but those circumstances should be
1338 * very rare.
1339 */
1340 cpumask_t mask =
1341 page_get_owner(page)->domain_dirty_cpumask;
1342 tlbflush_filter(mask, page->tlbflush_timestamp);
1344 if ( unlikely(!cpus_empty(mask)) )
1346 perfc_incrc(need_flush_tlb_flush);
1347 flush_tlb_mask(mask);
1351 /* We lose existing type, back pointer, and validity. */
1352 nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
1353 nx |= type;
1355 /* No special validation needed for writable pages. */
1356 /* Page tables and GDT/LDT need to be scanned for validity. */
1357 if ( type == PGT_writable_page )
1358 nx |= PGT_validated;
1361 else
1363 if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
1365 if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
1367 if ( current->domain == page_get_owner(page) )
1369 /*
1370 * This ensures functions like set_gdt() see up-to-date
1371 * type info without needing to clean up writable p.t.
1372 * state on the fast path.
1373 */
1374 LOCK_BIGLOCK(current->domain);
1375 cleanup_writable_pagetable(current->domain);
1376 y = page->u.inuse.type_info;
1377 UNLOCK_BIGLOCK(current->domain);
1378 /* Can we make progress now? */
1379 if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) ||
1380 ((y & PGT_count_mask) == 0) )
1381 goto again;
1383 if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
1384 ((type & PGT_type_mask) != PGT_l1_page_table) )
1385 MEM_LOG("Bad type (saw %08x != exp %08x) "
1386 "for mfn %016lx (pfn %016lx)",
1387 x, type, page_to_mfn(page),
1388 get_gpfn_from_mfn(page_to_mfn(page)));
1389 return 0;
1391 else if ( (x & PGT_va_mask) == PGT_va_mutable )
1393 /* The va backpointer is mutable, hence we update it. */
1394 nx &= ~PGT_va_mask;
1395 nx |= type; /* we know the actual type is correct */
1397 else if ( ((type & PGT_va_mask) != PGT_va_mutable) &&
1398 ((type & PGT_va_mask) != (x & PGT_va_mask)) )
1400 #ifdef CONFIG_X86_PAE
1401 /* We use backptr as extra typing. Cannot be unknown. */
1402 if ( (type & PGT_type_mask) == PGT_l2_page_table )
1403 return 0;
1404 #endif
1405 /* This table is possibly mapped at multiple locations. */
1406 nx &= ~PGT_va_mask;
1407 nx |= PGT_va_unknown;
1410 if ( unlikely(!(x & PGT_validated)) )
1412 /* Someone else is updating validation of this page. Wait... */
1413 while ( (y = page->u.inuse.type_info) == x )
1414 cpu_relax();
1415 goto again;
1419 while ( unlikely((y = cmpxchg_acq(&page->u.inuse.type_info, x, nx)) != x) );
1421 if ( unlikely(!(nx & PGT_validated)) )
1423 /* Try to validate page type; drop the new reference on failure. */
1424 if ( unlikely(!alloc_page_type(page, type)) )
1426 MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %08x"
1427 ": caf=%08x taf=%" PRtype_info,
1428 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
1429 type, page->count_info, page->u.inuse.type_info);
1430 /* Noone else can get a reference. We hold the only ref. */
1431 page->u.inuse.type_info = 0;
1432 return 0;
1435 /* Noone else is updating simultaneously. */
1436 __set_bit(_PGT_validated, &page->u.inuse.type_info);
1439 return 1;
1442 /*
1443 * Local variables:
1444 * mode: C
1445 * c-set-style: "BSD"
1446 * c-basic-offset: 4
1447 * tab-width: 4
1448 * indent-tabs-mode: nil
1449 * End:
1450 */