ia64/xen-unstable

view xen/arch/ia64/xen/mm.c @ 10383:34bdc8d86f8d

[IA64] clean up "assign domain page ... already mapped!" printks

Signed-off-by: Al Stone <ahs3@fc.hp.com>
author awilliam@xenbuild.aw
date Thu Jun 15 08:46:43 2006 -0600 (2006-06-15)
parents d0a77e90eaa3
children 0d1dab1d9b67
line source
1 /*
2 * Copyright (C) 2005 Intel Co
3 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
4 *
5 * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add VTI domain support
6 *
7 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
8 * VA Linux Systems Japan K.K.
9 * dom0 vp model support
10 */
12 #include <xen/config.h>
13 #include <xen/sched.h>
14 #include <xen/domain.h>
15 #include <asm/xentypes.h>
16 #include <asm/mm.h>
17 #include <asm/pgalloc.h>
18 #include <asm/vhpt.h>
19 #include <asm/vcpu.h>
20 #include <linux/efi.h>
22 #ifndef CONFIG_XEN_IA64_DOM0_VP
23 #define CONFIG_DOMAIN0_CONTIGUOUS
24 #else
25 static void domain_page_flush(struct domain* d, unsigned long mpaddr,
26 unsigned long old_mfn, unsigned long new_mfn);
27 #endif
29 static struct domain *dom_xen, *dom_io;
31 // followings are stolen from arch_init_memory() @ xen/arch/x86/mm.c
32 void
33 alloc_dom_xen_and_dom_io(void)
34 {
35 /*
36 * Initialise our DOMID_XEN domain.
37 * Any Xen-heap pages that we will allow to be mapped will have
38 * their domain field set to dom_xen.
39 */
40 dom_xen = alloc_domain(DOMID_XEN);
41 BUG_ON(dom_xen == NULL);
43 /*
44 * Initialise our DOMID_IO domain.
45 * This domain owns I/O pages that are within the range of the page_info
46 * array. Mappings occur at the priv of the caller.
47 */
48 dom_io = alloc_domain(DOMID_IO);
49 BUG_ON(dom_io == NULL);
50 }
52 // heavily depends on the struct page_info layout.
53 // if (page_get_owner(page) == d &&
54 // test_and_clear_bit(_PGC_allocated, &page->count_info)) {
55 // put_page(page);
56 // }
57 static void
58 try_to_clear_PGC_allocate(struct domain* d, struct page_info* page)
59 {
60 u32 _d, _nd;
61 u64 x, nx, y;
63 _d = pickle_domptr(d);
64 y = *((u64*)&page->count_info);
65 do {
66 x = y;
67 _nd = x >> 32;
68 nx = x - 1;
69 __clear_bit(_PGC_allocated, &nx);
71 if (unlikely(!(x & PGC_allocated)) || unlikely(_nd != _d)) {
72 struct domain* nd = unpickle_domptr(_nd);
73 if (nd == NULL) {
74 DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
75 "sd=%p 0x%x,"
76 " caf=%016lx, taf=%" PRtype_info "\n",
77 (void *) page_to_mfn(page),
78 d, d->domain_id, _d,
79 nd, _nd,
80 x,
81 page->u.inuse.type_info);
82 }
83 break;
84 }
86 BUG_ON((nx & PGC_count_mask) < 1);
87 y = cmpxchg((u64*)&page->count_info, x, nx);
88 } while (unlikely(y != x));
89 }
91 static void
92 relinquish_pte(struct domain* d, pte_t* pte)
93 {
94 unsigned long mfn = pte_pfn(*pte);
95 struct page_info* page;
97 // vmx domain use bit[58:56] to distinguish io region from memory.
98 // see vmx_build_physmap_table() in vmx_init.c
99 if (!pte_mem(*pte))
100 return;
102 // domain might map IO space or acpi table pages. check it.
103 if (!mfn_valid(mfn))
104 return;
105 page = mfn_to_page(mfn);
106 // struct page_info corresponding to mfn may exist or not depending
107 // on CONFIG_VIRTUAL_FRAME_TABLE.
108 // This check is too easy.
109 // The right way is to check whether this page is of io area or acpi pages
110 if (page_get_owner(page) == NULL) {
111 BUG_ON(page->count_info != 0);
112 return;
113 }
115 #ifdef CONFIG_XEN_IA64_DOM0_VP
116 if (page_get_owner(page) == d) {
117 BUG_ON(get_gpfn_from_mfn(mfn) == INVALID_M2P_ENTRY);
118 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
119 }
120 #endif
121 try_to_clear_PGC_allocate(d, page);
122 put_page(page);
123 }
125 static void
126 relinquish_pmd(struct domain* d, pmd_t* pmd, unsigned long offset)
127 {
128 unsigned long i;
129 pte_t* pte = pte_offset_map(pmd, offset);
131 for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
132 if (!pte_present(*pte))
133 continue;
135 relinquish_pte(d, pte);
136 }
137 pte_free_kernel(pte_offset_map(pmd, offset));
138 }
140 static void
141 relinquish_pud(struct domain* d, pud_t *pud, unsigned long offset)
142 {
143 unsigned long i;
144 pmd_t *pmd = pmd_offset(pud, offset);
146 for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
147 if (!pmd_present(*pmd))
148 continue;
150 relinquish_pmd(d, pmd, offset + (i << PMD_SHIFT));
151 }
152 pmd_free(pmd_offset(pud, offset));
153 }
155 static void
156 relinquish_pgd(struct domain* d, pgd_t *pgd, unsigned long offset)
157 {
158 unsigned long i;
159 pud_t *pud = pud_offset(pgd, offset);
161 for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
162 if (!pud_present(*pud))
163 continue;
165 relinquish_pud(d, pud, offset + (i << PUD_SHIFT));
166 }
167 pud_free(pud_offset(pgd, offset));
168 }
170 void
171 relinquish_mm(struct domain* d)
172 {
173 struct mm_struct* mm = &d->arch.mm;
174 unsigned long i;
175 pgd_t* pgd;
177 if (mm->pgd == NULL)
178 return;
180 pgd = pgd_offset(mm, 0);
181 for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
182 if (!pgd_present(*pgd))
183 continue;
185 relinquish_pgd(d, pgd, i << PGDIR_SHIFT);
186 }
187 pgd_free(mm->pgd);
188 mm->pgd = NULL;
189 }
191 // stolen from share_xen_page_with_guest() in xen/arch/x86/mm.c
192 void
193 share_xen_page_with_guest(struct page_info *page,
194 struct domain *d, int readonly)
195 {
196 if ( page_get_owner(page) == d )
197 return;
199 #if 1
200 if (readonly) {
201 printk("%s:%d readonly is not supported yet\n", __func__, __LINE__);
202 }
203 #endif
205 // alloc_xenheap_pages() doesn't initialize page owner.
206 //BUG_ON(page_get_owner(page) != NULL);
208 spin_lock(&d->page_alloc_lock);
210 #ifndef __ia64__
211 /* The incremented type count pins as writable or read-only. */
212 page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page);
213 page->u.inuse.type_info |= PGT_validated | 1;
214 #endif
216 page_set_owner(page, d);
217 wmb(); /* install valid domain ptr before updating refcnt. */
218 ASSERT(page->count_info == 0);
219 page->count_info |= PGC_allocated | 1;
221 if ( unlikely(d->xenheap_pages++ == 0) )
222 get_knownalive_domain(d);
223 list_add_tail(&page->list, &d->xenpage_list);
225 // grant_table_destroy() releases these pages.
226 // but it doesn't clear their m2p entry. So there might remain stale
227 // entries. such a stale entry is cleared here.
228 set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
230 spin_unlock(&d->page_alloc_lock);
231 }
233 void
234 share_xen_page_with_privileged_guests(struct page_info *page, int readonly)
235 {
236 share_xen_page_with_guest(page, dom_xen, readonly);
237 }
239 unsigned long
240 gmfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
241 {
242 unsigned long pte;
244 #ifndef CONFIG_XEN_IA64_DOM0_VP
245 if (d == dom0)
246 return(gpfn);
247 #endif
248 pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT);
249 if (!pte) {
250 panic("gmfn_to_mfn_foreign: bad gpfn. spinning...\n");
251 }
252 return ((pte & _PFN_MASK) >> PAGE_SHIFT);
253 }
255 // given a domain virtual address, pte and pagesize, extract the metaphysical
256 // address, convert the pte for a physical address for (possibly different)
257 // Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use
258 // PAGE_SIZE!)
259 u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* logps)
260 {
261 struct domain *d = current->domain;
262 ia64_itir_t itir = {.itir = itir__};
263 u64 mask, mpaddr, pteval2;
264 u64 arflags;
265 u64 arflags2;
267 pteval &= ((1UL << 53) - 1);// ignore [63:53] bits
269 // FIXME address had better be pre-validated on insert
270 mask = ~itir_mask(itir.itir);
271 mpaddr = (((pteval & ~_PAGE_ED) & _PAGE_PPN_MASK) & ~mask) |
272 (address & mask);
273 #ifdef CONFIG_XEN_IA64_DOM0_VP
274 if (itir.ps > PAGE_SHIFT) {
275 itir.ps = PAGE_SHIFT;
276 }
277 #endif
278 *logps = itir.ps;
279 #ifndef CONFIG_XEN_IA64_DOM0_VP
280 if (d == dom0) {
281 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
282 /*
283 printk("translate_domain_pte: out-of-bounds dom0 mpaddr 0x%lx! itc=%lx...\n",
284 mpaddr, ia64_get_itc());
285 */
286 }
287 }
288 else if ((mpaddr >> PAGE_SHIFT) > d->max_pages) {
289 /* Address beyond the limit. However the grant table is
290 also beyond the limit. Display a message if not in the
291 grant table. */
292 if (mpaddr >= IA64_GRANT_TABLE_PADDR
293 && mpaddr < (IA64_GRANT_TABLE_PADDR
294 + (ORDER_GRANT_FRAMES << PAGE_SHIFT)))
295 printf("translate_domain_pte: bad mpa=0x%lx (> 0x%lx),"
296 "vadr=0x%lx,pteval=0x%lx,itir=0x%lx\n",
297 mpaddr, (unsigned long)d->max_pages<<PAGE_SHIFT,
298 address, pteval, itir.itir);
299 }
300 #endif
301 pteval2 = lookup_domain_mpa(d,mpaddr);
302 arflags = pteval & _PAGE_AR_MASK;
303 arflags2 = pteval2 & _PAGE_AR_MASK;
304 if (arflags != _PAGE_AR_R && arflags2 == _PAGE_AR_R) {
305 #if 0
306 DPRINTK("%s:%d "
307 "pteval 0x%lx arflag 0x%lx address 0x%lx itir 0x%lx "
308 "pteval2 0x%lx arflags2 0x%lx mpaddr 0x%lx\n",
309 __func__, __LINE__,
310 pteval, arflags, address, itir__,
311 pteval2, arflags2, mpaddr);
312 #endif
313 pteval = (pteval & ~_PAGE_AR_MASK) | _PAGE_AR_R;
314 }
316 pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits
317 pteval2 |= (pteval & _PAGE_ED);
318 pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected)
319 pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2;
320 return pteval2;
321 }
323 // given a current domain metaphysical address, return the physical address
324 unsigned long translate_domain_mpaddr(unsigned long mpaddr)
325 {
326 unsigned long pteval;
328 #ifndef CONFIG_XEN_IA64_DOM0_VP
329 if (current->domain == dom0) {
330 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
331 printk("translate_domain_mpaddr: out-of-bounds dom0 mpaddr 0x%lx! continuing...\n",
332 mpaddr);
333 }
334 }
335 #endif
336 pteval = lookup_domain_mpa(current->domain,mpaddr);
337 return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK));
338 }
340 //XXX !xxx_present() should be used instread of !xxx_none()?
341 // pud, pmd, pte page is zero cleared when they are allocated.
342 // Their area must be visible before population so that
343 // cmpxchg must have release semantics.
344 static pte_t*
345 lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
346 {
347 struct mm_struct *mm = &d->arch.mm;
348 pgd_t *pgd;
349 pud_t *pud;
350 pmd_t *pmd;
352 BUG_ON(mm->pgd == NULL);
354 pgd = pgd_offset(mm, mpaddr);
355 again_pgd:
356 if (unlikely(pgd_none(*pgd))) {
357 pud_t *old_pud = NULL;
358 pud = pud_alloc_one(mm, mpaddr);
359 if (unlikely(!pgd_cmpxchg_rel(mm, pgd, old_pud, pud))) {
360 pud_free(pud);
361 goto again_pgd;
362 }
363 }
365 pud = pud_offset(pgd, mpaddr);
366 again_pud:
367 if (unlikely(pud_none(*pud))) {
368 pmd_t* old_pmd = NULL;
369 pmd = pmd_alloc_one(mm, mpaddr);
370 if (unlikely(!pud_cmpxchg_rel(mm, pud, old_pmd, pmd))) {
371 pmd_free(pmd);
372 goto again_pud;
373 }
374 }
376 pmd = pmd_offset(pud, mpaddr);
377 again_pmd:
378 if (unlikely(pmd_none(*pmd))) {
379 pte_t* old_pte = NULL;
380 pte_t* pte = pte_alloc_one_kernel(mm, mpaddr);
381 if (unlikely(!pmd_cmpxchg_kernel_rel(mm, pmd, old_pte, pte))) {
382 pte_free_kernel(pte);
383 goto again_pmd;
384 }
385 }
387 return pte_offset_map(pmd, mpaddr);
388 }
390 //XXX xxx_none() should be used instread of !xxx_present()?
391 static pte_t*
392 lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr)
393 {
394 struct mm_struct *mm = &d->arch.mm;
395 pgd_t *pgd;
396 pud_t *pud;
397 pmd_t *pmd;
399 BUG_ON(mm->pgd == NULL);
400 pgd = pgd_offset(mm, mpaddr);
401 if (unlikely(!pgd_present(*pgd)))
402 return NULL;
404 pud = pud_offset(pgd, mpaddr);
405 if (unlikely(!pud_present(*pud)))
406 return NULL;
408 pmd = pmd_offset(pud, mpaddr);
409 if (unlikely(!pmd_present(*pmd)))
410 return NULL;
412 return pte_offset_map(pmd, mpaddr);
413 }
415 #ifdef CONFIG_XEN_IA64_DOM0_VP
416 static pte_t*
417 lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr)
418 {
419 struct mm_struct *mm = &d->arch.mm;
420 pgd_t *pgd;
421 pud_t *pud;
422 pmd_t *pmd;
424 BUG_ON(mm->pgd == NULL);
425 pgd = pgd_offset(mm, mpaddr);
426 if (unlikely(pgd_none(*pgd)))
427 return NULL;
429 pud = pud_offset(pgd, mpaddr);
430 if (unlikely(pud_none(*pud)))
431 return NULL;
433 pmd = pmd_offset(pud, mpaddr);
434 if (unlikely(pmd_none(*pmd)))
435 return NULL;
437 return pte_offset_map(pmd, mpaddr);
438 }
440 unsigned long
441 ____lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
442 {
443 pte_t *pte;
445 pte = lookup_noalloc_domain_pte(d, mpaddr);
446 if (pte == NULL)
447 return INVALID_MFN;
449 if (pte_present(*pte))
450 return (pte->pte & _PFN_MASK);
451 else if (VMX_DOMAIN(d->vcpu[0]))
452 return GPFN_INV_MASK;
453 return INVALID_MFN;
454 }
456 unsigned long
457 __lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
458 {
459 unsigned long machine = ____lookup_domain_mpa(d, mpaddr);
460 if (machine != INVALID_MFN)
461 return machine;
463 printk("%s: d 0x%p id %d current 0x%p id %d\n",
464 __func__, d, d->domain_id, current, current->vcpu_id);
465 printk("%s: bad mpa 0x%lx (max_pages 0x%lx)\n",
466 __func__, mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
467 return INVALID_MFN;
468 }
469 #endif
471 unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
472 {
473 pte_t *pte;
475 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
476 if (d == dom0) {
477 pte_t pteval;
478 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
479 //printk("lookup_domain_mpa: bad dom0 mpaddr 0x%lx!\n",mpaddr);
480 //printk("lookup_domain_mpa: start=0x%lx,end=0x%lx!\n",dom0_start,dom0_start+dom0_size);
481 }
482 pteval = pfn_pte(mpaddr >> PAGE_SHIFT,
483 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX));
484 return pte_val(pteval);
485 }
486 #endif
487 pte = lookup_noalloc_domain_pte(d, mpaddr);
488 if (pte != NULL) {
489 if (pte_present(*pte)) {
490 //printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte));
491 return pte_val(*pte);
492 } else if (VMX_DOMAIN(d->vcpu[0]))
493 return GPFN_INV_MASK;
494 }
496 printk("%s: d 0x%p id %d current 0x%p id %d\n",
497 __func__, d, d->domain_id, current, current->vcpu_id);
498 if ((mpaddr >> PAGE_SHIFT) < d->max_pages)
499 printk("%s: non-allocated mpa 0x%lx (< 0x%lx)\n", __func__,
500 mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
501 else
502 printk("%s: bad mpa 0x%lx (=> 0x%lx)\n", __func__,
503 mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
505 //XXX This is a work around until the emulation memory access to a region
506 // where memory or device are attached is implemented.
507 return pte_val(pfn_pte(0, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
508 }
510 // FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
511 #if 1
512 void *domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
513 {
514 unsigned long pte = lookup_domain_mpa(d,mpaddr);
515 unsigned long imva;
517 pte &= _PAGE_PPN_MASK;
518 imva = (unsigned long) __va(pte);
519 imva |= mpaddr & ~PAGE_MASK;
520 return (void*)imva;
521 }
522 #else
523 void *domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
524 {
525 unsigned long imva = __gpa_to_mpa(d, mpaddr);
527 return (void *)__va(imva);
528 }
529 #endif
531 /* Allocate a new page for domain and map it to the specified metaphysical
532 address. */
533 struct page_info *
534 __assign_new_domain_page(struct domain *d, unsigned long mpaddr, pte_t* pte)
535 {
536 struct page_info *p = NULL;
537 unsigned long maddr;
538 int ret;
540 BUG_ON(!pte_none(*pte));
542 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
543 if (d == dom0) {
544 #if 0
545 if (mpaddr < dom0_start || mpaddr >= dom0_start + dom0_size) {
546 /* FIXME: is it true ?
547 dom0 memory is not contiguous! */
548 panic("assign_new_domain_page: bad domain0 "
549 "mpaddr=%lx, start=%lx, end=%lx!\n",
550 mpaddr, dom0_start, dom0_start+dom0_size);
551 }
552 #endif
553 p = mfn_to_page((mpaddr >> PAGE_SHIFT));
554 return p;
555 }
556 #endif
558 p = alloc_domheap_page(d);
559 if (unlikely(!p)) {
560 printf("assign_new_domain_page: Can't alloc!!!! Aaaargh!\n");
561 return(p);
562 }
564 // zero out pages for security reasons
565 clear_page(page_to_virt(p));
566 maddr = page_to_maddr (p);
567 if (unlikely(maddr > __get_cpu_var(vhpt_paddr)
568 && maddr < __get_cpu_var(vhpt_pend))) {
569 /* FIXME: how can this happen ?
570 vhpt is allocated by alloc_domheap_page. */
571 printf("assign_new_domain_page: reassigned vhpt page %lx!!\n",
572 maddr);
573 }
575 ret = get_page(p, d);
576 BUG_ON(ret == 0);
577 set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
578 // clear_page() and set_gpfn_from_mfn() become visible before set_pte_rel()
579 // because set_pte_rel() has release semantics
580 set_pte_rel(pte,
581 pfn_pte(maddr >> PAGE_SHIFT,
582 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
584 smp_mb();
585 return p;
586 }
588 struct page_info *
589 assign_new_domain_page(struct domain *d, unsigned long mpaddr)
590 {
591 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
592 pte_t dummy_pte = __pte(0);
593 return __assign_new_domain_page(d, mpaddr, &dummy_pte);
594 #else
595 struct page_info *p = NULL;
596 pte_t *pte;
598 pte = lookup_alloc_domain_pte(d, mpaddr);
599 if (pte_none(*pte))
600 p = __assign_new_domain_page(d, mpaddr, pte);
602 return p;
603 #endif
604 }
606 void
607 assign_new_domain0_page(struct domain *d, unsigned long mpaddr)
608 {
609 #ifndef CONFIG_DOMAIN0_CONTIGUOUS
610 pte_t *pte;
612 BUG_ON(d != dom0);
613 pte = lookup_alloc_domain_pte(d, mpaddr);
614 if (pte_none(*pte)) {
615 struct page_info *p = __assign_new_domain_page(d, mpaddr, pte);
616 if (p == NULL) {
617 panic("%s: can't allocate page for dom0", __func__);
618 }
619 }
620 #endif
621 }
623 /* map a physical address to the specified metaphysical addr */
624 // flags: currently only ASSIGN_readonly
625 void
626 __assign_domain_page(struct domain *d,
627 unsigned long mpaddr, unsigned long physaddr,
628 unsigned long flags)
629 {
630 pte_t *pte;
631 unsigned long arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX;
633 pte = lookup_alloc_domain_pte(d, mpaddr);
634 if (pte_none(*pte)) {
635 set_pte_rel(pte,
636 pfn_pte(physaddr >> PAGE_SHIFT,
637 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags)));
638 smp_mb();
639 }
640 }
642 /* get_page() and map a physical address to the specified metaphysical addr */
643 void
644 assign_domain_page(struct domain *d,
645 unsigned long mpaddr, unsigned long physaddr)
646 {
647 struct page_info* page = mfn_to_page(physaddr >> PAGE_SHIFT);
648 int ret;
650 BUG_ON((physaddr & GPFN_IO_MASK) != GPFN_MEM);
651 ret = get_page(page, d);
652 BUG_ON(ret == 0);
653 set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
654 // because __assign_domain_page() uses set_pte_rel() which has
655 // release semantics, smp_mb() isn't needed.
656 __assign_domain_page(d, mpaddr, physaddr, ASSIGN_writable);
657 }
659 #ifdef CONFIG_XEN_IA64_DOM0_VP
660 static void
661 assign_domain_same_page(struct domain *d,
662 unsigned long mpaddr, unsigned long size,
663 unsigned long flags)
664 {
665 //XXX optimization
666 unsigned long end = PAGE_ALIGN(mpaddr + size);
667 for (mpaddr &= PAGE_MASK; mpaddr < end; mpaddr += PAGE_SIZE) {
668 __assign_domain_page(d, mpaddr, mpaddr, flags);
669 }
670 }
672 int
673 efi_mmio(unsigned long physaddr, unsigned long size)
674 {
675 void *efi_map_start, *efi_map_end;
676 u64 efi_desc_size;
677 void* p;
679 efi_map_start = __va(ia64_boot_param->efi_memmap);
680 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
681 efi_desc_size = ia64_boot_param->efi_memdesc_size;
683 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
684 efi_memory_desc_t* md = (efi_memory_desc_t *)p;
685 unsigned long start = md->phys_addr;
686 unsigned long end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
688 if (start <= physaddr && physaddr < end) {
689 if ((physaddr + size) > end) {
690 DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n",
691 __func__, __LINE__, physaddr, size);
692 return 0;
693 }
695 // for io space
696 if (md->type == EFI_MEMORY_MAPPED_IO ||
697 md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
698 return 1;
699 }
701 // for runtime
702 // see efi_enter_virtual_mode(void)
703 // in linux/arch/ia64/kernel/efi.c
704 if ((md->attribute & EFI_MEMORY_RUNTIME) &&
705 !(md->attribute & EFI_MEMORY_WB)) {
706 return 1;
707 }
709 DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n",
710 __func__, __LINE__, physaddr, size);
711 return 0;
712 }
714 if (physaddr < start) {
715 break;
716 }
717 }
719 return 1;
720 }
722 unsigned long
723 assign_domain_mmio_page(struct domain *d,
724 unsigned long mpaddr, unsigned long size)
725 {
726 if (size == 0) {
727 DPRINTK("%s: domain %p mpaddr 0x%lx size = 0x%lx\n",
728 __func__, d, mpaddr, size);
729 }
730 if (!efi_mmio(mpaddr, size)) {
731 DPRINTK("%s:%d domain %p mpaddr 0x%lx size = 0x%lx\n",
732 __func__, __LINE__, d, mpaddr, size);
733 return -EINVAL;
734 }
735 assign_domain_same_page(d, mpaddr, size, ASSIGN_writable);
736 return mpaddr;
737 }
739 unsigned long
740 assign_domain_mach_page(struct domain *d,
741 unsigned long mpaddr, unsigned long size,
742 unsigned long flags)
743 {
744 assign_domain_same_page(d, mpaddr, size, flags);
745 return mpaddr;
746 }
748 // caller must get_page(mfn_to_page(mfn)) before call.
749 // caller must call set_gpfn_from_mfn() before call if necessary.
750 // because set_gpfn_from_mfn() result must be visible before pte xchg
751 // caller must use memory barrier. NOTE: xchg has acquire semantics.
752 // flags: currently only ASSIGN_readonly
753 static void
754 assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
755 unsigned long mfn, unsigned long flags)
756 {
757 struct mm_struct *mm = &d->arch.mm;
758 pte_t* pte;
759 pte_t old_pte;
760 pte_t npte;
761 unsigned long arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX;
762 pte = lookup_alloc_domain_pte(d, mpaddr);
764 // update pte
765 npte = pfn_pte(mfn, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags));
766 old_pte = ptep_xchg(mm, mpaddr, pte, npte);
767 if (pte_mem(old_pte)) {
768 unsigned long old_mfn = pte_pfn(old_pte);
770 // mfn = old_mfn case can happen when domain maps a granted page
771 // twice with the same pseudo physial address.
772 // It's non sense, but allowed.
773 // __gnttab_map_grant_ref()
774 // => create_host_mapping()
775 // => assign_domain_page_replace()
776 if (mfn != old_mfn) {
777 struct page_info* old_page = mfn_to_page(old_mfn);
779 if (page_get_owner(old_page) == d) {
780 BUG_ON(get_gpfn_from_mfn(old_mfn) != (mpaddr >> PAGE_SHIFT));
781 set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
782 }
784 domain_page_flush(d, mpaddr, old_mfn, mfn);
786 try_to_clear_PGC_allocate(d, old_page);
787 put_page(old_page);
788 }
789 }
790 }
792 // caller must get_page(new_page) before
793 // Only steal_page_for_grant_transfer() calls this function.
794 static int
795 assign_domain_page_cmpxchg_rel(struct domain* d, unsigned long mpaddr,
796 struct page_info* old_page,
797 struct page_info* new_page,
798 unsigned long flags)
799 {
800 struct mm_struct *mm = &d->arch.mm;
801 pte_t* pte;
802 unsigned long old_mfn;
803 unsigned long old_arflags;
804 pte_t old_pte;
805 unsigned long new_mfn;
806 unsigned long new_arflags;
807 pte_t new_pte;
808 pte_t ret_pte;
810 pte = lookup_alloc_domain_pte(d, mpaddr);
812 again:
813 old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;//XXX
814 old_mfn = page_to_mfn(old_page);
815 old_pte = pfn_pte(old_mfn, __pgprot(old_arflags));
817 new_arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX;
818 new_mfn = page_to_mfn(new_page);
819 new_pte = pfn_pte(new_mfn,
820 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | new_arflags));
822 // update pte
823 ret_pte = ptep_cmpxchg_rel(mm, mpaddr, pte, old_pte, new_pte);
824 if (unlikely(pte_val(old_pte) != pte_val(ret_pte))) {
825 if (pte_pfn(old_pte) == pte_pfn(ret_pte)) {
826 goto again;
827 }
829 DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx "
830 "ret_pte 0x%lx ret_mfn 0x%lx\n",
831 __func__,
832 pte_val(old_pte), old_arflags, old_mfn,
833 pte_val(ret_pte), pte_pfn(ret_pte));
834 return -EINVAL;
835 }
837 BUG_ON(!pte_mem(old_pte));
838 BUG_ON(page_get_owner(old_page) != d);
839 BUG_ON(get_gpfn_from_mfn(old_mfn) != (mpaddr >> PAGE_SHIFT));
840 BUG_ON(old_mfn == new_mfn);
842 set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
844 domain_page_flush(d, mpaddr, old_mfn, new_mfn);
845 put_page(old_page);
846 return 0;
847 }
849 static void
850 zap_domain_page_one(struct domain *d, unsigned long mpaddr)
851 {
852 struct mm_struct *mm = &d->arch.mm;
853 pte_t *pte;
854 pte_t old_pte;
855 unsigned long mfn;
856 struct page_info *page;
858 pte = lookup_noalloc_domain_pte_none(d, mpaddr);
859 if (pte == NULL)
860 return;
861 if (pte_none(*pte))
862 return;
864 // update pte
865 old_pte = ptep_get_and_clear(mm, mpaddr, pte);
866 mfn = pte_pfn(old_pte);
867 page = mfn_to_page(mfn);
868 BUG_ON((page->count_info & PGC_count_mask) == 0);
870 if (page_get_owner(page) == d) {
871 BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT));
872 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
873 }
875 domain_page_flush(d, mpaddr, mfn, INVALID_MFN);
877 try_to_clear_PGC_allocate(d, page);
878 put_page(page);
879 }
881 unsigned long
882 dom0vp_zap_physmap(struct domain *d, unsigned long gpfn,
883 unsigned int extent_order)
884 {
885 if (extent_order != 0) {
886 //XXX
887 return -ENOSYS;
888 }
890 zap_domain_page_one(d, gpfn << PAGE_SHIFT);
891 return 0;
892 }
894 unsigned long
895 dom0vp_add_physmap(struct domain* d, unsigned long gpfn, unsigned long mfn,
896 unsigned long flags, domid_t domid)
897 {
898 int error = 0;
899 struct domain* rd;
901 rd = find_domain_by_id(domid);
902 if (unlikely(rd == NULL)) {
903 switch (domid) {
904 case DOMID_XEN:
905 rd = dom_xen;
906 break;
907 case DOMID_IO:
908 rd = dom_io;
909 break;
910 default:
911 DPRINTK("d 0x%p domid %d "
912 "pgfn 0x%lx mfn 0x%lx flags 0x%lx domid %d\n",
913 d, d->domain_id, gpfn, mfn, flags, domid);
914 return -ESRCH;
915 }
916 BUG_ON(rd == NULL);
917 get_knownalive_domain(rd);
918 }
920 if (unlikely(rd == d)) {
921 error = -EINVAL;
922 goto out1;
923 }
924 BUG_ON(!mfn_valid(mfn));
925 if (unlikely(get_page(mfn_to_page(mfn), rd) == 0)) {
926 error = -EINVAL;
927 goto out1;
928 }
929 BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
930 get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
931 assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, flags);
932 //don't update p2m table because this page belongs to rd, not d.
933 out1:
934 put_domain(rd);
935 return error;
936 }
938 // grant table host mapping
939 // mpaddr: host_addr: pseudo physical address
940 // mfn: frame: machine page frame
941 // flags: GNTMAP_readonly | GNTMAP_application_map | GNTMAP_contains_pte
942 int
943 create_grant_host_mapping(unsigned long gpaddr,
944 unsigned long mfn, unsigned int flags)
945 {
946 struct domain* d = current->domain;
947 struct page_info* page;
948 int ret;
950 if (flags & (GNTMAP_device_map |
951 GNTMAP_application_map | GNTMAP_contains_pte)) {
952 DPRINTK("%s: flags 0x%x\n", __func__, flags);
953 return GNTST_general_error;
954 }
956 BUG_ON(!mfn_valid(mfn));
957 page = mfn_to_page(mfn);
958 ret = get_page(page, page_get_owner(page));
959 BUG_ON(ret == 0);
960 BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
961 get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
962 assign_domain_page_replace(d, gpaddr, mfn, (flags & GNTMAP_readonly)?
963 ASSIGN_readonly: ASSIGN_writable);
964 return GNTST_okay;
965 }
967 // grant table host unmapping
968 int
969 destroy_grant_host_mapping(unsigned long gpaddr,
970 unsigned long mfn, unsigned int flags)
971 {
972 struct domain* d = current->domain;
973 pte_t* pte;
974 pte_t old_pte;
975 unsigned long old_mfn = INVALID_MFN;
976 struct page_info* old_page;
978 if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
979 DPRINTK("%s: flags 0x%x\n", __func__, flags);
980 return GNTST_general_error;
981 }
983 pte = lookup_noalloc_domain_pte(d, gpaddr);
984 if (pte == NULL || !pte_present(*pte) || pte_pfn(*pte) != mfn)
985 return GNTST_general_error;
987 // update pte
988 old_pte = ptep_get_and_clear(&d->arch.mm, gpaddr, pte);
989 if (pte_present(old_pte)) {
990 old_mfn = pte_pfn(old_pte);
991 } else {
992 return GNTST_general_error;
993 }
994 domain_page_flush(d, gpaddr, old_mfn, INVALID_MFN);
996 old_page = mfn_to_page(old_mfn);
997 BUG_ON(page_get_owner(old_page) == d);//try_to_clear_PGC_allocate(d, page) is not needed.
998 put_page(old_page);
1000 return GNTST_okay;
1003 // heavily depends on the struct page layout.
1004 int
1005 steal_page_for_grant_transfer(struct domain *d, struct page_info *page)
1007 #if 0 /* if big endian */
1008 # error "implement big endian version of steal_page_for_grant_transfer()"
1009 #endif
1010 u32 _d, _nd;
1011 u64 x, nx, y;
1012 unsigned long gpfn;
1013 struct page_info *new;
1014 unsigned long new_mfn;
1015 int ret;
1016 new = alloc_domheap_page(d);
1017 if (new == NULL) {
1018 DPRINTK("alloc_domheap_page() failed\n");
1019 return -1;
1021 // zero out pages for security reasons
1022 clear_page(page_to_virt(new));
1023 // assign_domain_page_cmpxchg_rel() has release semantics
1024 // so smp_mb() isn't needed.
1026 ret = get_page(new, d);
1027 BUG_ON(ret == 0);
1029 gpfn = get_gpfn_from_mfn(page_to_mfn(page));
1030 if (gpfn == INVALID_M2P_ENTRY) {
1031 free_domheap_page(new);
1032 return -1;
1034 new_mfn = page_to_mfn(new);
1035 set_gpfn_from_mfn(new_mfn, gpfn);
1036 // smp_mb() isn't needed because assign_domain_pge_cmpxchg_rel()
1037 // has release semantics.
1039 ret = assign_domain_page_cmpxchg_rel(d, gpfn << PAGE_SHIFT, page, new,
1040 ASSIGN_writable);
1041 if (ret < 0) {
1042 DPRINTK("assign_domain_page_cmpxchg_rel failed %d\n", ret);
1043 set_gpfn_from_mfn(new_mfn, INVALID_M2P_ENTRY);
1044 free_domheap_page(new);
1045 return -1;
1048 spin_lock(&d->page_alloc_lock);
1050 /*
1051 * The tricky bit: atomically release ownership while there is just one
1052 * benign reference to the page (PGC_allocated). If that reference
1053 * disappears then the deallocation routine will safely spin.
1054 */
1055 _d = pickle_domptr(d);
1056 y = *((u64*)&page->count_info);
1057 do {
1058 x = y;
1059 nx = x & 0xffffffff;
1060 // page->count_info: untouched
1061 // page->u.inused._domain = 0;
1062 _nd = x >> 32;
1064 if (unlikely((x & (PGC_count_mask | PGC_allocated)) !=
1065 (1 | PGC_allocated)) ||
1066 unlikely(_nd != _d)) {
1067 struct domain* nd = unpickle_domptr(_nd);
1068 if (nd == NULL) {
1069 DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
1070 "sd=%p 0x%x,"
1071 " caf=%016lx, taf=%" PRtype_info "\n",
1072 (void *) page_to_mfn(page),
1073 d, d->domain_id, _d,
1074 nd, _nd,
1075 x,
1076 page->u.inuse.type_info);
1077 } else {
1078 DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
1079 "sd=%p(%u) 0x%x,"
1080 " caf=%016lx, taf=%" PRtype_info "\n",
1081 (void *) page_to_mfn(page),
1082 d, d->domain_id, _d,
1083 nd, nd->domain_id, _nd,
1084 x,
1085 page->u.inuse.type_info);
1087 spin_unlock(&d->page_alloc_lock);
1088 return -1;
1091 y = cmpxchg((u64*)&page->count_info, x, nx);
1092 } while (unlikely(y != x));
1094 /*
1095 * Unlink from 'd'. At least one reference remains (now anonymous), so
1096 * noone else is spinning to try to delete this page from 'd'.
1097 */
1098 d->tot_pages--;
1099 list_del(&page->list);
1101 spin_unlock(&d->page_alloc_lock);
1102 return 0;
1105 void
1106 guest_physmap_add_page(struct domain *d, unsigned long gpfn,
1107 unsigned long mfn)
1109 int ret;
1111 BUG_ON(!mfn_valid(mfn));
1112 ret = get_page(mfn_to_page(mfn), d);
1113 BUG_ON(ret == 0);
1114 BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
1115 get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
1116 set_gpfn_from_mfn(mfn, gpfn);
1117 smp_mb();
1118 assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, ASSIGN_writable);
1120 //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> PAGE_SHIFT));
1123 void
1124 guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
1125 unsigned long mfn)
1127 BUG_ON(mfn == 0);//XXX
1128 zap_domain_page_one(d, gpfn << PAGE_SHIFT);
1131 //XXX sledgehammer.
1132 // flush finer range.
1133 void
1134 domain_page_flush(struct domain* d, unsigned long mpaddr,
1135 unsigned long old_mfn, unsigned long new_mfn)
1137 domain_flush_vtlb_all();
1140 int
1141 domain_page_mapped(struct domain* d, unsigned long mpaddr)
1143 pte_t * pte;
1145 pte = lookup_noalloc_domain_pte(d, mpaddr);
1146 if(pte != NULL && !pte_none(*pte))
1147 return 1;
1148 return 0;
1150 #endif
1152 /* Flush cache of domain d. */
1153 void domain_cache_flush (struct domain *d, int sync_only)
1155 struct mm_struct *mm = &d->arch.mm;
1156 pgd_t *pgd = mm->pgd;
1157 unsigned long maddr;
1158 int i,j,k, l;
1159 int nbr_page = 0;
1160 void (*flush_func)(unsigned long start, unsigned long end);
1161 extern void flush_dcache_range (unsigned long, unsigned long);
1163 if (sync_only)
1164 flush_func = &flush_icache_range;
1165 else
1166 flush_func = &flush_dcache_range;
1168 #ifdef CONFIG_DOMAIN0_CONTIGUOUS
1169 if (d == dom0) {
1170 /* This is not fully correct (because of hole), but it should
1171 be enough for now. */
1172 (*flush_func)(__va_ul (dom0_start),
1173 __va_ul (dom0_start + dom0_size));
1174 return;
1176 #endif
1177 for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
1178 pud_t *pud;
1179 if (!pgd_present(*pgd))
1180 continue;
1181 pud = pud_offset(pgd, 0);
1182 for (j = 0; j < PTRS_PER_PUD; pud++, j++) {
1183 pmd_t *pmd;
1184 if (!pud_present(*pud))
1185 continue;
1186 pmd = pmd_offset(pud, 0);
1187 for (k = 0; k < PTRS_PER_PMD; pmd++, k++) {
1188 pte_t *pte;
1189 if (!pmd_present(*pmd))
1190 continue;
1191 pte = pte_offset_map(pmd, 0);
1192 for (l = 0; l < PTRS_PER_PTE; pte++, l++) {
1193 if (!pte_present(*pte))
1194 continue;
1195 /* Convert PTE to maddr. */
1196 maddr = __va_ul (pte_val(*pte)
1197 & _PAGE_PPN_MASK);
1198 (*flush_func)(maddr, maddr+ PAGE_SIZE);
1199 nbr_page++;
1204 //printf ("domain_cache_flush: %d %d pages\n", d->domain_id, nbr_page);
1207 #ifdef VERBOSE
1208 #define MEM_LOG(_f, _a...) \
1209 printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
1210 current->domain->domain_id , __LINE__ , ## _a )
1211 #else
1212 #define MEM_LOG(_f, _a...) ((void)0)
1213 #endif
1215 static void free_page_type(struct page_info *page, u32 type)
1219 static int alloc_page_type(struct page_info *page, u32 type)
1221 return 1;
1224 unsigned long __get_free_pages(unsigned int mask, unsigned int order)
1226 void *p = alloc_xenheap_pages(order);
1228 memset(p,0,PAGE_SIZE<<order);
1229 return (unsigned long)p;
1232 void __free_pages(struct page_info *page, unsigned int order)
1234 if (order) BUG();
1235 free_xenheap_page(page);
1238 void *pgtable_quicklist_alloc(void)
1240 void *p;
1241 p = alloc_xenheap_pages(0);
1242 if (p)
1243 clear_page(p);
1244 return p;
1247 void pgtable_quicklist_free(void *pgtable_entry)
1249 free_xenheap_page(pgtable_entry);
1252 void cleanup_writable_pagetable(struct domain *d)
1254 return;
1257 void put_page_type(struct page_info *page)
1259 u32 nx, x, y = page->u.inuse.type_info;
1261 again:
1262 do {
1263 x = y;
1264 nx = x - 1;
1266 ASSERT((x & PGT_count_mask) != 0);
1268 /*
1269 * The page should always be validated while a reference is held. The
1270 * exception is during domain destruction, when we forcibly invalidate
1271 * page-table pages if we detect a referential loop.
1272 * See domain.c:relinquish_list().
1273 */
1274 ASSERT((x & PGT_validated) ||
1275 test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
1277 if ( unlikely((nx & PGT_count_mask) == 0) )
1279 /* Record TLB information for flush later. Races are harmless. */
1280 page->tlbflush_timestamp = tlbflush_current_time();
1282 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
1283 likely(nx & PGT_validated) )
1285 /*
1286 * Page-table pages must be unvalidated when count is zero. The
1287 * 'free' is safe because the refcnt is non-zero and validated
1288 * bit is clear => other ops will spin or fail.
1289 */
1290 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
1291 x & ~PGT_validated)) != x) )
1292 goto again;
1293 /* We cleared the 'valid bit' so we do the clean up. */
1294 free_page_type(page, x);
1295 /* Carry on, but with the 'valid bit' now clear. */
1296 x &= ~PGT_validated;
1297 nx &= ~PGT_validated;
1300 else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) ==
1301 (PGT_pinned | 1)) &&
1302 ((nx & PGT_type_mask) != PGT_writable_page)) )
1304 /* Page is now only pinned. Make the back pointer mutable again. */
1305 nx |= PGT_va_mutable;
1308 while ( unlikely((y = cmpxchg_rel(&page->u.inuse.type_info, x, nx)) != x) );
1312 int get_page_type(struct page_info *page, u32 type)
1314 u32 nx, x, y = page->u.inuse.type_info;
1316 again:
1317 do {
1318 x = y;
1319 nx = x + 1;
1320 if ( unlikely((nx & PGT_count_mask) == 0) )
1322 MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
1323 return 0;
1325 else if ( unlikely((x & PGT_count_mask) == 0) )
1327 if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
1329 if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
1331 /*
1332 * On type change we check to flush stale TLB
1333 * entries. This may be unnecessary (e.g., page
1334 * was GDT/LDT) but those circumstances should be
1335 * very rare.
1336 */
1337 cpumask_t mask =
1338 page_get_owner(page)->domain_dirty_cpumask;
1339 tlbflush_filter(mask, page->tlbflush_timestamp);
1341 if ( unlikely(!cpus_empty(mask)) )
1343 perfc_incrc(need_flush_tlb_flush);
1344 flush_tlb_mask(mask);
1348 /* We lose existing type, back pointer, and validity. */
1349 nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
1350 nx |= type;
1352 /* No special validation needed for writable pages. */
1353 /* Page tables and GDT/LDT need to be scanned for validity. */
1354 if ( type == PGT_writable_page )
1355 nx |= PGT_validated;
1358 else
1360 if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
1362 if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
1364 if ( current->domain == page_get_owner(page) )
1366 /*
1367 * This ensures functions like set_gdt() see up-to-date
1368 * type info without needing to clean up writable p.t.
1369 * state on the fast path.
1370 */
1371 LOCK_BIGLOCK(current->domain);
1372 cleanup_writable_pagetable(current->domain);
1373 y = page->u.inuse.type_info;
1374 UNLOCK_BIGLOCK(current->domain);
1375 /* Can we make progress now? */
1376 if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) ||
1377 ((y & PGT_count_mask) == 0) )
1378 goto again;
1380 if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
1381 ((type & PGT_type_mask) != PGT_l1_page_table) )
1382 MEM_LOG("Bad type (saw %08x != exp %08x) "
1383 "for mfn %016lx (pfn %016lx)",
1384 x, type, page_to_mfn(page),
1385 get_gpfn_from_mfn(page_to_mfn(page)));
1386 return 0;
1388 else if ( (x & PGT_va_mask) == PGT_va_mutable )
1390 /* The va backpointer is mutable, hence we update it. */
1391 nx &= ~PGT_va_mask;
1392 nx |= type; /* we know the actual type is correct */
1394 else if ( ((type & PGT_va_mask) != PGT_va_mutable) &&
1395 ((type & PGT_va_mask) != (x & PGT_va_mask)) )
1397 #ifdef CONFIG_X86_PAE
1398 /* We use backptr as extra typing. Cannot be unknown. */
1399 if ( (type & PGT_type_mask) == PGT_l2_page_table )
1400 return 0;
1401 #endif
1402 /* This table is possibly mapped at multiple locations. */
1403 nx &= ~PGT_va_mask;
1404 nx |= PGT_va_unknown;
1407 if ( unlikely(!(x & PGT_validated)) )
1409 /* Someone else is updating validation of this page. Wait... */
1410 while ( (y = page->u.inuse.type_info) == x )
1411 cpu_relax();
1412 goto again;
1416 while ( unlikely((y = cmpxchg_acq(&page->u.inuse.type_info, x, nx)) != x) );
1418 if ( unlikely(!(nx & PGT_validated)) )
1420 /* Try to validate page type; drop the new reference on failure. */
1421 if ( unlikely(!alloc_page_type(page, type)) )
1423 MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %08x"
1424 ": caf=%08x taf=%" PRtype_info,
1425 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
1426 type, page->count_info, page->u.inuse.type_info);
1427 /* Noone else can get a reference. We hold the only ref. */
1428 page->u.inuse.type_info = 0;
1429 return 0;
1432 /* Noone else is updating simultaneously. */
1433 __set_bit(_PGT_validated, &page->u.inuse.type_info);
1436 return 1;
1439 /*
1440 * Local variables:
1441 * mode: C
1442 * c-set-style: "BSD"
1443 * c-basic-offset: 4
1444 * tab-width: 4
1445 * indent-tabs-mode: nil
1446 * End:
1447 */