direct-io.hg

view xen/arch/ia64/xen/mm.c @ 11985:c5ddcf89f050

[IA64] Add not-SMP-safe comment about PV-on-HVM

Signed-off-by: Tsunehisa Doi <Doi.Tsunehisa@jp.fujitsu.com>
author awilliam@xenbuild.aw
date Tue Oct 17 14:30:36 2006 -0600 (2006-10-17)
parents 0c18c6009448
children 463658ccf683
line source
1 /*
2 * Copyright (C) 2005 Intel Co
3 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
4 *
5 * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add VTI domain support
6 *
7 * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
8 * VA Linux Systems Japan K.K.
9 * dom0 vp model support
10 */
12 /*
13 * NOTES on SMP
14 *
15 * * shared structures
16 * There are some structures which are accessed by CPUs concurrently.
17 * Here is the list of shared structures and operations on them which
18 * read/write the structures.
19 *
20 * - struct page_info
21 * This is a xen global resource. This structure is accessed by
22 * any CPUs.
23 *
24 * operations on this structure:
25 * - get_page() and its variant
26 * - put_page() and its variant
27 *
28 * - vTLB
29 * vcpu->arch.{d, i}tlb: Software tlb cache. These are per VCPU data.
30 * DEFINE_PER_CPU (unsigned long, vhpt_paddr): VHPT table per physical CPU.
31 *
32 * domain_flush_vtlb_range() and domain_flush_vtlb_all()
33 * write vcpu->arch.{d, i}tlb and VHPT table of vcpu which isn't current.
34 * So there are potential races to read/write VHPT and vcpu->arch.{d, i}tlb.
35 * Please note that reading VHPT is done by hardware page table walker.
36 *
37 * operations on this structure:
38 * - global tlb purge
39 * vcpu_ptc_g(), vcpu_ptc_ga() and domain_page_flush()
40 * I.e. callers of domain_flush_vtlb_range() and domain_flush_vtlb_all()
41 * These functions invalidate VHPT entry and vcpu->arch.{i, d}tlb
42 *
43 * - tlb insert and fc
44 * vcpu_itc_i()
45 * vcpu_itc_d()
46 * ia64_do_page_fault()
47 * vcpu_fc()
48 * These functions set VHPT entry and vcpu->arch.{i, d}tlb.
49 * Actually vcpu_itc_no_srlz() does.
50 *
51 * - the P2M table
52 * domain->mm and pgd, pud, pmd, pte table page.
53 * This structure is used to convert domain pseudo physical address
54 * to machine address. This is per domain resource.
55 *
56 * operations on this structure:
57 * - populate the P2M table tree
58 * lookup_alloc_domain_pte() and its variants.
59 * - set p2m entry
60 * assign_new_domain_page() and its variants.
61 * assign_domain_page() and its variants.
62 * - xchg p2m entry
63 * assign_domain_page_replace()
64 * - cmpxchg p2m entry
65 * assign_domain_page_cmpxchg_rel()
66 * destroy_grant_host_mapping()
67 * steal_page()
68 * zap_domain_page_one()
69 * - read p2m entry
70 * lookup_alloc_domain_pte() and its variants.
71 *
72 * - the M2P table
73 * mpt_table (or machine_to_phys_mapping)
74 * This is a table which converts from machine address to pseudo physical
75 * address. This is a global structure.
76 *
77 * operations on this structure:
78 * - set m2p entry
79 * set_gpfn_from_mfn()
80 * - zap m2p entry
81 * set_gpfn_from_mfn(INVALID_P2M_ENTRY)
82 * - get m2p entry
83 * get_gpfn_from_mfn()
84 *
85 *
86 * * avoiding races
87 * The resources which are shared by CPUs must be accessed carefully
88 * to avoid race.
89 * IA64 has weak memory ordering so that attention must be paid
90 * to access shared structures. [SDM vol2 PartII chap. 2]
91 *
92 * - struct page_info memory ordering
93 * get_page() has acquire semantics.
94 * put_page() has release semantics.
95 *
96 * - populating the p2m table
97 * pgd, pud, pmd are append only.
98 *
99 * - races when updating the P2M tables and the M2P table
100 * The P2M entry are shared by more than one vcpu.
101 * So they are accessed atomic operations.
102 * I.e. xchg or cmpxchg must be used to update the p2m entry.
103 * NOTE: When creating/destructing a domain, we don't need to take care of
104 * this race.
105 *
106 * The M2P table is inverse of the P2M table.
107 * I.e. P2M(M2P(p)) = p and M2P(P2M(m)) = m
108 * The M2P table and P2M table must be updated consistently.
109 * Here is the update sequence
110 *
111 * xchg or cmpxchg case
112 * - set_gpfn_from_mfn(new_mfn, gpfn)
113 * - memory barrier
114 * - atomic update of the p2m entry (xchg or cmpxchg the p2m entry)
115 * get old_mfn entry as a result.
116 * - memory barrier
117 * - set_gpfn_from_mfn(old_mfn, INVALID_P2M_ENTRY)
118 *
119 * Here memory barrier can be achieved by release semantics.
120 *
121 * - races between global tlb purge and tlb insert
122 * This is a race between reading/writing vcpu->arch.{d, i}tlb or VHPT entry.
123 * When a vcpu is about to insert tlb, another vcpu may purge tlb
124 * cache globally. Inserting tlb (vcpu_itc_no_srlz()) or global tlb purge
125 * (domain_flush_vtlb_range() and domain_flush_vtlb_all()) can't update
126 * cpu->arch.{d, i}tlb, VHPT and mTLB. So there is a race here.
127 *
128 * Here check vcpu->arch.{d, i}tlb.p bit
129 * After inserting tlb entry, check the p bit and retry to insert.
130 * This means that when global tlb purge and tlb insert are issued
131 * simultaneously, always global tlb purge happens after tlb insert.
132 *
133 * - races between p2m entry update and tlb insert
134 * This is a race between reading/writing the p2m entry.
135 * reader: vcpu_itc_i(), vcpu_itc_d(), ia64_do_page_fault(), vcpu_fc()
136 * writer: assign_domain_page_cmpxchg_rel(), destroy_grant_host_mapping(),
137 * steal_page(), zap_domain_page_one()
138 *
139 * For example, vcpu_itc_i() is about to insert tlb by calling
140 * vcpu_itc_no_srlz() after reading the p2m entry.
141 * At the same time, the p2m entry is replaced by xchg or cmpxchg and
142 * tlb cache of the page is flushed.
143 * There is a possibility that the p2m entry doesn't already point to the
144 * old page, but tlb cache still points to the old page.
145 * This can be detected similar to sequence lock using the p2m entry itself.
146 * reader remember the read value of the p2m entry, and insert tlb.
147 * Then read the p2m entry again. If the new p2m entry value is different
148 * from the used p2m entry value, the retry.
149 *
150 * - races between referencing page and p2m entry update
151 * This is a race between reading/writing the p2m entry.
152 * reader: vcpu_get_domain_bundle(), vmx_get_domain_bundle(),
153 * efi_emulate_get_time()
154 * writer: assign_domain_page_cmpxchg_rel(), destroy_grant_host_mapping(),
155 * steal_page(), zap_domain_page_one()
156 *
157 * A page which assigned to a domain can be de-assigned by another vcpu.
158 * So before read/write to a domain page, the page's reference count
159 * must be incremented.
160 * vcpu_get_domain_bundle(), vmx_get_domain_bundle() and
161 * efi_emulate_get_time()
162 *
163 */
165 #include <xen/config.h>
166 #include <xen/sched.h>
167 #include <xen/domain.h>
168 #include <asm/xentypes.h>
169 #include <xen/mm.h>
170 #include <xen/errno.h>
171 #include <asm/pgalloc.h>
172 #include <asm/vhpt.h>
173 #include <asm/vcpu.h>
174 #include <asm/shadow.h>
175 #include <asm/p2m_entry.h>
176 #include <asm/tlb_track.h>
177 #include <linux/efi.h>
178 #include <xen/guest_access.h>
179 #include <asm/page.h>
180 #include <public/memory.h>
182 static void domain_page_flush(struct domain* d, unsigned long mpaddr,
183 volatile pte_t* ptep, pte_t old_pte);
185 extern unsigned long ia64_iobase;
187 static struct domain *dom_xen, *dom_io;
189 // followings are stolen from arch_init_memory() @ xen/arch/x86/mm.c
190 void
191 alloc_dom_xen_and_dom_io(void)
192 {
193 /*
194 * Initialise our DOMID_XEN domain.
195 * Any Xen-heap pages that we will allow to be mapped will have
196 * their domain field set to dom_xen.
197 */
198 dom_xen = alloc_domain(DOMID_XEN);
199 BUG_ON(dom_xen == NULL);
201 /*
202 * Initialise our DOMID_IO domain.
203 * This domain owns I/O pages that are within the range of the page_info
204 * array. Mappings occur at the priv of the caller.
205 */
206 dom_io = alloc_domain(DOMID_IO);
207 BUG_ON(dom_io == NULL);
208 }
210 // heavily depends on the struct page_info layout.
211 // if (page_get_owner(page) == d &&
212 // test_and_clear_bit(_PGC_allocated, &page->count_info)) {
213 // put_page(page);
214 // }
215 static void
216 try_to_clear_PGC_allocate(struct domain* d, struct page_info* page)
217 {
218 u32 _d, _nd;
219 u64 x, nx, y;
221 _d = pickle_domptr(d);
222 y = *((u64*)&page->count_info);
223 do {
224 x = y;
225 _nd = x >> 32;
226 nx = x - 1;
227 __clear_bit(_PGC_allocated, &nx);
229 if (unlikely(!(x & PGC_allocated)) || unlikely(_nd != _d)) {
230 struct domain* nd = unpickle_domptr(_nd);
231 if (nd == NULL) {
232 DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
233 "sd=%p 0x%x,"
234 " caf=%016lx, taf=%" PRtype_info "\n",
235 (void *) page_to_mfn(page),
236 d, d->domain_id, _d,
237 nd, _nd,
238 x,
239 page->u.inuse.type_info);
240 }
241 break;
242 }
244 BUG_ON((nx & PGC_count_mask) < 1);
245 y = cmpxchg((u64*)&page->count_info, x, nx);
246 } while (unlikely(y != x));
247 }
249 static void
250 relinquish_pte(struct domain* d, pte_t* pte)
251 {
252 unsigned long mfn = pte_pfn(*pte);
253 struct page_info* page;
255 // vmx domain use bit[58:56] to distinguish io region from memory.
256 // see vmx_build_physmap_table() in vmx_init.c
257 if (!pte_mem(*pte))
258 return;
260 // domain might map IO space or acpi table pages. check it.
261 if (!mfn_valid(mfn))
262 return;
263 page = mfn_to_page(mfn);
264 // struct page_info corresponding to mfn may exist or not depending
265 // on CONFIG_VIRTUAL_FRAME_TABLE.
266 // This check is too easy.
267 // The right way is to check whether this page is of io area or acpi pages
268 if (page_get_owner(page) == NULL) {
269 BUG_ON(page->count_info != 0);
270 return;
271 }
273 if (page_get_owner(page) == d) {
274 BUG_ON(get_gpfn_from_mfn(mfn) == INVALID_M2P_ENTRY);
275 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
276 }
278 try_to_clear_PGC_allocate(d, page);
279 put_page(page);
280 }
282 static void
283 relinquish_pmd(struct domain* d, pmd_t* pmd, unsigned long offset)
284 {
285 unsigned long i;
286 pte_t* pte = pte_offset_map(pmd, offset);
288 for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
289 if (!pte_present(*pte))
290 continue;
292 relinquish_pte(d, pte);
293 }
294 pte_free_kernel(pte_offset_map(pmd, offset));
295 }
297 static void
298 relinquish_pud(struct domain* d, pud_t *pud, unsigned long offset)
299 {
300 unsigned long i;
301 pmd_t *pmd = pmd_offset(pud, offset);
303 for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
304 if (!pmd_present(*pmd))
305 continue;
307 relinquish_pmd(d, pmd, offset + (i << PMD_SHIFT));
308 }
309 pmd_free(pmd_offset(pud, offset));
310 }
312 static void
313 relinquish_pgd(struct domain* d, pgd_t *pgd, unsigned long offset)
314 {
315 unsigned long i;
316 pud_t *pud = pud_offset(pgd, offset);
318 for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
319 if (!pud_present(*pud))
320 continue;
322 relinquish_pud(d, pud, offset + (i << PUD_SHIFT));
323 }
324 pud_free(pud_offset(pgd, offset));
325 }
327 void
328 relinquish_mm(struct domain* d)
329 {
330 struct mm_struct* mm = &d->arch.mm;
331 unsigned long i;
332 pgd_t* pgd;
334 if (mm->pgd == NULL)
335 return;
337 pgd = pgd_offset(mm, 0);
338 for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
339 if (!pgd_present(*pgd))
340 continue;
342 relinquish_pgd(d, pgd, i << PGDIR_SHIFT);
343 }
344 pgd_free(mm->pgd);
345 mm->pgd = NULL;
346 }
348 // stolen from share_xen_page_with_guest() in xen/arch/x86/mm.c
349 void
350 share_xen_page_with_guest(struct page_info *page,
351 struct domain *d, int readonly)
352 {
353 if ( page_get_owner(page) == d )
354 return;
356 #if 1
357 if (readonly) {
358 printk("%s:%d readonly is not supported yet\n", __func__, __LINE__);
359 }
360 #endif
362 // alloc_xenheap_pages() doesn't initialize page owner.
363 //BUG_ON(page_get_owner(page) != NULL);
365 spin_lock(&d->page_alloc_lock);
367 #ifndef __ia64__
368 /* The incremented type count pins as writable or read-only. */
369 page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page);
370 page->u.inuse.type_info |= PGT_validated | 1;
371 #endif
373 page_set_owner(page, d);
374 wmb(); /* install valid domain ptr before updating refcnt. */
375 ASSERT(page->count_info == 0);
376 page->count_info |= PGC_allocated | 1;
378 if ( unlikely(d->xenheap_pages++ == 0) )
379 get_knownalive_domain(d);
380 list_add_tail(&page->list, &d->xenpage_list);
382 // grant_table_destroy() releases these pages.
383 // but it doesn't clear their m2p entry. So there might remain stale
384 // entries. such a stale entry is cleared here.
385 set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
387 spin_unlock(&d->page_alloc_lock);
388 }
390 void
391 share_xen_page_with_privileged_guests(struct page_info *page, int readonly)
392 {
393 share_xen_page_with_guest(page, dom_xen, readonly);
394 }
396 unsigned long
397 gmfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
398 {
399 unsigned long pte;
401 // This function may be called from __gnttab_copy()
402 // during destruction of VT-i domain with PV-on-HVM driver.
403 // ** FIXME: This is not SMP-safe yet about p2m table. **
404 if (unlikely(d->arch.mm.pgd == NULL)) {
405 if (VMX_DOMAIN(d->vcpu[0]))
406 return INVALID_MFN;
407 }
408 pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT, NULL);
409 if (!pte) {
410 panic("gmfn_to_mfn_foreign: bad gpfn. spinning...\n");
411 }
412 return ((pte & _PFN_MASK) >> PAGE_SHIFT);
413 }
415 // given a domain virtual address, pte and pagesize, extract the metaphysical
416 // address, convert the pte for a physical address for (possibly different)
417 // Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use
418 // PAGE_SIZE!)
419 u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* logps,
420 struct p2m_entry* entry)
421 {
422 struct domain *d = current->domain;
423 ia64_itir_t itir = {.itir = itir__};
424 u64 mask, mpaddr, pteval2;
425 u64 arflags;
426 u64 arflags2;
427 u64 maflags2;
429 pteval &= ((1UL << 53) - 1);// ignore [63:53] bits
431 // FIXME address had better be pre-validated on insert
432 mask = ~itir_mask(itir.itir);
433 mpaddr = ((pteval & _PAGE_PPN_MASK) & ~mask) | (address & mask);
435 if (itir.ps > PAGE_SHIFT)
436 itir.ps = PAGE_SHIFT;
438 *logps = itir.ps;
440 pteval2 = lookup_domain_mpa(d, mpaddr, entry);
442 /* Check access rights. */
443 arflags = pteval & _PAGE_AR_MASK;
444 arflags2 = pteval2 & _PAGE_AR_MASK;
445 if (arflags != _PAGE_AR_R && arflags2 == _PAGE_AR_R) {
446 #if 0
447 DPRINTK("%s:%d "
448 "pteval 0x%lx arflag 0x%lx address 0x%lx itir 0x%lx "
449 "pteval2 0x%lx arflags2 0x%lx mpaddr 0x%lx\n",
450 __func__, __LINE__,
451 pteval, arflags, address, itir__,
452 pteval2, arflags2, mpaddr);
453 #endif
454 pteval = (pteval & ~_PAGE_AR_MASK) | _PAGE_AR_R;
455 }
457 /* Check memory attribute. The switch is on the *requested* memory
458 attribute. */
459 maflags2 = pteval2 & _PAGE_MA_MASK;
460 switch (pteval & _PAGE_MA_MASK) {
461 case _PAGE_MA_NAT:
462 /* NaT pages are always accepted! */
463 break;
464 case _PAGE_MA_UC:
465 case _PAGE_MA_UCE:
466 case _PAGE_MA_WC:
467 if (maflags2 == _PAGE_MA_WB) {
468 /* Don't let domains WB-map uncached addresses.
469 This can happen when domU tries to touch i/o
470 port space. Also prevents possible address
471 aliasing issues. */
472 printf("Warning: UC to WB for mpaddr=%lx\n", mpaddr);
473 pteval = (pteval & ~_PAGE_MA_MASK) | _PAGE_MA_WB;
474 }
475 break;
476 case _PAGE_MA_WB:
477 if (maflags2 != _PAGE_MA_WB) {
478 /* Forbid non-coherent access to coherent memory. */
479 panic_domain(NULL, "try to use WB mem attr on "
480 "UC page, mpaddr=%lx\n", mpaddr);
481 }
482 break;
483 default:
484 panic_domain(NULL, "try to use unknown mem attribute\n");
485 }
487 /* If shadow mode is enabled, virtualize dirty bit. */
488 if (shadow_mode_enabled(d) && (pteval & _PAGE_D)) {
489 u64 mp_page = mpaddr >> PAGE_SHIFT;
490 pteval |= _PAGE_VIRT_D;
492 /* If the page is not already dirty, don't set the dirty bit! */
493 if (mp_page < d->arch.shadow_bitmap_size * 8
494 && !test_bit(mp_page, d->arch.shadow_bitmap))
495 pteval &= ~_PAGE_D;
496 }
498 /* Ignore non-addr bits of pteval2 and force PL0->2
499 (PL3 is unaffected) */
500 return (pteval & ~_PAGE_PPN_MASK) |
501 (pteval2 & _PAGE_PPN_MASK) | _PAGE_PL_2;
502 }
504 // given a current domain metaphysical address, return the physical address
505 unsigned long translate_domain_mpaddr(unsigned long mpaddr,
506 struct p2m_entry* entry)
507 {
508 unsigned long pteval;
510 pteval = lookup_domain_mpa(current->domain, mpaddr, entry);
511 return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK));
512 }
514 //XXX !xxx_present() should be used instread of !xxx_none()?
515 // __assign_new_domain_page(), assign_new_domain_page() and
516 // assign_new_domain0_page() are used only when domain creation.
517 // their accesses aren't racy so that returned pte_t doesn't need
518 // volatile qualifier
519 static pte_t*
520 __lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
521 {
522 struct mm_struct *mm = &d->arch.mm;
523 pgd_t *pgd;
524 pud_t *pud;
525 pmd_t *pmd;
527 BUG_ON(mm->pgd == NULL);
528 pgd = pgd_offset(mm, mpaddr);
529 if (pgd_none(*pgd)) {
530 pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr));
531 }
533 pud = pud_offset(pgd, mpaddr);
534 if (pud_none(*pud)) {
535 pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr));
536 }
538 pmd = pmd_offset(pud, mpaddr);
539 if (pmd_none(*pmd)) {
540 pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr));
541 }
543 return pte_offset_map(pmd, mpaddr);
544 }
546 //XXX !xxx_present() should be used instread of !xxx_none()?
547 // pud, pmd, pte page is zero cleared when they are allocated.
548 // Their area must be visible before population so that
549 // cmpxchg must have release semantics.
550 static volatile pte_t*
551 lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr)
552 {
553 struct mm_struct *mm = &d->arch.mm;
554 pgd_t *pgd;
555 pud_t *pud;
556 pmd_t *pmd;
558 BUG_ON(mm->pgd == NULL);
560 pgd = pgd_offset(mm, mpaddr);
561 again_pgd:
562 if (unlikely(pgd_none(*pgd))) {
563 pud_t *old_pud = NULL;
564 pud = pud_alloc_one(mm, mpaddr);
565 if (unlikely(!pgd_cmpxchg_rel(mm, pgd, old_pud, pud))) {
566 pud_free(pud);
567 goto again_pgd;
568 }
569 }
571 pud = pud_offset(pgd, mpaddr);
572 again_pud:
573 if (unlikely(pud_none(*pud))) {
574 pmd_t* old_pmd = NULL;
575 pmd = pmd_alloc_one(mm, mpaddr);
576 if (unlikely(!pud_cmpxchg_rel(mm, pud, old_pmd, pmd))) {
577 pmd_free(pmd);
578 goto again_pud;
579 }
580 }
582 pmd = pmd_offset(pud, mpaddr);
583 again_pmd:
584 if (unlikely(pmd_none(*pmd))) {
585 pte_t* old_pte = NULL;
586 pte_t* pte = pte_alloc_one_kernel(mm, mpaddr);
587 if (unlikely(!pmd_cmpxchg_kernel_rel(mm, pmd, old_pte, pte))) {
588 pte_free_kernel(pte);
589 goto again_pmd;
590 }
591 }
593 return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
594 }
596 //XXX xxx_none() should be used instread of !xxx_present()?
597 volatile pte_t*
598 lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr)
599 {
600 struct mm_struct *mm = &d->arch.mm;
601 pgd_t *pgd;
602 pud_t *pud;
603 pmd_t *pmd;
605 BUG_ON(mm->pgd == NULL);
606 pgd = pgd_offset(mm, mpaddr);
607 if (unlikely(!pgd_present(*pgd)))
608 return NULL;
610 pud = pud_offset(pgd, mpaddr);
611 if (unlikely(!pud_present(*pud)))
612 return NULL;
614 pmd = pmd_offset(pud, mpaddr);
615 if (unlikely(!pmd_present(*pmd)))
616 return NULL;
618 return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
619 }
621 static volatile pte_t*
622 lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr)
623 {
624 struct mm_struct *mm = &d->arch.mm;
625 pgd_t *pgd;
626 pud_t *pud;
627 pmd_t *pmd;
629 BUG_ON(mm->pgd == NULL);
630 pgd = pgd_offset(mm, mpaddr);
631 if (unlikely(pgd_none(*pgd)))
632 return NULL;
634 pud = pud_offset(pgd, mpaddr);
635 if (unlikely(pud_none(*pud)))
636 return NULL;
638 pmd = pmd_offset(pud, mpaddr);
639 if (unlikely(pmd_none(*pmd)))
640 return NULL;
642 return (volatile pte_t*)pte_offset_map(pmd, mpaddr);
643 }
645 unsigned long
646 ____lookup_domain_mpa(struct domain *d, unsigned long mpaddr)
647 {
648 volatile pte_t *pte;
650 pte = lookup_noalloc_domain_pte(d, mpaddr);
651 if (pte == NULL)
652 return INVALID_MFN;
654 if (pte_present(*pte))
655 return (pte->pte & _PFN_MASK);
656 else if (VMX_DOMAIN(d->vcpu[0]))
657 return GPFN_INV_MASK;
658 return INVALID_MFN;
659 }
661 unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr,
662 struct p2m_entry* entry)
663 {
664 volatile pte_t *pte = lookup_noalloc_domain_pte(d, mpaddr);
666 if (pte != NULL) {
667 pte_t tmp_pte = *pte;// pte is volatile. copy the value.
668 if (pte_present(tmp_pte)) {
669 //printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte));
670 if (entry != NULL)
671 p2m_entry_set(entry, pte, tmp_pte);
672 return pte_val(tmp_pte);
673 } else if (VMX_DOMAIN(d->vcpu[0]))
674 return GPFN_INV_MASK;
675 }
677 printk("%s: d 0x%p id %d current 0x%p id %d\n",
678 __func__, d, d->domain_id, current, current->vcpu_id);
679 if ((mpaddr >> PAGE_SHIFT) < d->max_pages)
680 printk("%s: non-allocated mpa 0x%lx (< 0x%lx)\n", __func__,
681 mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
682 else
683 printk("%s: bad mpa 0x%lx (=> 0x%lx)\n", __func__,
684 mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT);
686 if (entry != NULL)
687 p2m_entry_set(entry, NULL, __pte(0));
688 //XXX This is a work around until the emulation memory access to a region
689 // where memory or device are attached is implemented.
690 return pte_val(pfn_pte(0, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
691 }
693 // FIXME: ONLY USE FOR DOMAIN PAGE_SIZE == PAGE_SIZE
694 #if 1
695 void *domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
696 {
697 unsigned long pte = lookup_domain_mpa(d, mpaddr, NULL);
698 unsigned long imva;
700 pte &= _PAGE_PPN_MASK;
701 imva = (unsigned long) __va(pte);
702 imva |= mpaddr & ~PAGE_MASK;
703 return (void*)imva;
704 }
705 #else
706 void *domain_mpa_to_imva(struct domain *d, unsigned long mpaddr)
707 {
708 unsigned long imva = __gpa_to_mpa(d, mpaddr);
710 return (void *)__va(imva);
711 }
712 #endif
714 unsigned long
715 xencomm_paddr_to_maddr(unsigned long paddr)
716 {
717 struct vcpu *v = current;
718 struct domain *d = v->domain;
719 u64 pa;
721 pa = ____lookup_domain_mpa(d, paddr);
722 if (pa == INVALID_MFN) {
723 printf("%s: called with bad memory address: 0x%lx - iip=%lx\n",
724 __func__, paddr, vcpu_regs(v)->cr_iip);
725 return 0;
726 }
727 return __va_ul((pa & _PFN_MASK) | (paddr & ~PAGE_MASK));
728 }
730 /* Allocate a new page for domain and map it to the specified metaphysical
731 address. */
732 static struct page_info *
733 __assign_new_domain_page(struct domain *d, unsigned long mpaddr, pte_t* pte)
734 {
735 struct page_info *p;
736 unsigned long maddr;
737 int ret;
739 BUG_ON(!pte_none(*pte));
741 p = alloc_domheap_page(d);
742 if (unlikely(!p)) {
743 printf("assign_new_domain_page: Can't alloc!!!! Aaaargh!\n");
744 return(p);
745 }
747 // zero out pages for security reasons
748 clear_page(page_to_virt(p));
749 maddr = page_to_maddr (p);
750 if (unlikely(maddr > __get_cpu_var(vhpt_paddr)
751 && maddr < __get_cpu_var(vhpt_pend))) {
752 /* FIXME: how can this happen ?
753 vhpt is allocated by alloc_domheap_page. */
754 printf("assign_new_domain_page: reassigned vhpt page %lx!!\n",
755 maddr);
756 }
758 ret = get_page(p, d);
759 BUG_ON(ret == 0);
760 set_gpfn_from_mfn(page_to_mfn(p), mpaddr >> PAGE_SHIFT);
761 // clear_page() and set_gpfn_from_mfn() become visible before set_pte_rel()
762 // because set_pte_rel() has release semantics
763 set_pte_rel(pte,
764 pfn_pte(maddr >> PAGE_SHIFT,
765 __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX)));
767 smp_mb();
768 return p;
769 }
771 struct page_info *
772 assign_new_domain_page(struct domain *d, unsigned long mpaddr)
773 {
774 pte_t *pte = __lookup_alloc_domain_pte(d, mpaddr);
776 if (!pte_none(*pte))
777 return NULL;
779 return __assign_new_domain_page(d, mpaddr, pte);
780 }
782 void
783 assign_new_domain0_page(struct domain *d, unsigned long mpaddr)
784 {
785 pte_t *pte;
787 BUG_ON(d != dom0);
788 pte = __lookup_alloc_domain_pte(d, mpaddr);
789 if (pte_none(*pte)) {
790 struct page_info *p = __assign_new_domain_page(d, mpaddr, pte);
791 if (p == NULL) {
792 panic("%s: can't allocate page for dom0", __func__);
793 }
794 }
795 }
797 static unsigned long
798 flags_to_prot (unsigned long flags)
799 {
800 unsigned long res = _PAGE_PL_2 | __DIRTY_BITS;
802 res |= flags & ASSIGN_readonly ? _PAGE_AR_R: _PAGE_AR_RWX;
803 res |= flags & ASSIGN_nocache ? _PAGE_MA_UC: _PAGE_MA_WB;
804 #ifdef CONFIG_XEN_IA64_TLB_TRACK
805 res |= flags & ASSIGN_tlb_track ? _PAGE_TLB_TRACKING: 0;
806 #endif
808 return res;
809 }
811 /* map a physical address to the specified metaphysical addr */
812 // flags: currently only ASSIGN_readonly, ASSIGN_nocache, ASSIGN_tlb_tack
813 // This is called by assign_domain_mmio_page().
814 // So accessing to pte is racy.
815 int
816 __assign_domain_page(struct domain *d,
817 unsigned long mpaddr, unsigned long physaddr,
818 unsigned long flags)
819 {
820 volatile pte_t *pte;
821 pte_t old_pte;
822 pte_t new_pte;
823 pte_t ret_pte;
824 unsigned long prot = flags_to_prot(flags);
826 pte = lookup_alloc_domain_pte(d, mpaddr);
828 old_pte = __pte(0);
829 new_pte = pfn_pte(physaddr >> PAGE_SHIFT, __pgprot(prot));
830 ret_pte = ptep_cmpxchg_rel(&d->arch.mm, mpaddr, pte, old_pte, new_pte);
831 if (pte_val(ret_pte) == pte_val(old_pte)) {
832 smp_mb();
833 return 0;
834 }
836 // dom0 tries to map real machine's I/O region, but failed.
837 // It is very likely that dom0 doesn't boot correctly because
838 // it can't access I/O. So complain here.
839 if ((flags & ASSIGN_nocache) &&
840 (pte_pfn(ret_pte) != (physaddr >> PAGE_SHIFT) ||
841 !(pte_val(ret_pte) & _PAGE_MA_UC)))
842 printk("%s:%d WARNING can't assign page domain 0x%p id %d\n"
843 "\talready assigned pte_val 0x%016lx\n"
844 "\tmpaddr 0x%016lx physaddr 0x%016lx flags 0x%lx\n",
845 __func__, __LINE__,
846 d, d->domain_id, pte_val(ret_pte),
847 mpaddr, physaddr, flags);
849 return -EAGAIN;
850 }
852 /* get_page() and map a physical address to the specified metaphysical addr */
853 void
854 assign_domain_page(struct domain *d,
855 unsigned long mpaddr, unsigned long physaddr)
856 {
857 struct page_info* page = mfn_to_page(physaddr >> PAGE_SHIFT);
858 int ret;
860 BUG_ON((physaddr & GPFN_IO_MASK) != GPFN_MEM);
861 ret = get_page(page, d);
862 BUG_ON(ret == 0);
863 set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
864 // because __assign_domain_page() uses set_pte_rel() which has
865 // release semantics, smp_mb() isn't needed.
866 (void)__assign_domain_page(d, mpaddr, physaddr, ASSIGN_writable);
867 }
869 int
870 ioports_permit_access(struct domain *d, unsigned long fp, unsigned long lp)
871 {
872 int ret;
873 unsigned long off;
874 unsigned long fp_offset;
875 unsigned long lp_offset;
877 ret = rangeset_add_range(d->arch.ioport_caps, fp, lp);
878 if (ret != 0)
879 return ret;
881 /* Domain 0 doesn't virtualize IO ports space. */
882 if (d == dom0)
883 return 0;
885 fp_offset = IO_SPACE_SPARSE_ENCODING(fp) & ~PAGE_MASK;
886 lp_offset = PAGE_ALIGN(IO_SPACE_SPARSE_ENCODING(lp));
888 for (off = fp_offset; off <= lp_offset; off += PAGE_SIZE)
889 (void)__assign_domain_page(d, IO_PORTS_PADDR + off,
890 __pa(ia64_iobase) + off, ASSIGN_nocache);
892 return 0;
893 }
895 static int
896 ioports_has_allowed(struct domain *d, unsigned long fp, unsigned long lp)
897 {
898 unsigned long i;
899 for (i = fp; i < lp; i++)
900 if (rangeset_contains_singleton(d->arch.ioport_caps, i))
901 return 1;
902 return 0;
903 }
905 int
906 ioports_deny_access(struct domain *d, unsigned long fp, unsigned long lp)
907 {
908 int ret;
909 struct mm_struct *mm = &d->arch.mm;
910 unsigned long off;
911 unsigned long io_ports_base;
912 unsigned long fp_offset;
913 unsigned long lp_offset;
915 ret = rangeset_remove_range(d->arch.ioport_caps, fp, lp);
916 if (ret != 0)
917 return ret;
918 if (d == dom0)
919 io_ports_base = __pa(ia64_iobase);
920 else
921 io_ports_base = IO_PORTS_PADDR;
923 fp_offset = IO_SPACE_SPARSE_ENCODING(fp) & PAGE_MASK;
924 lp_offset = PAGE_ALIGN(IO_SPACE_SPARSE_ENCODING(lp));
926 for (off = fp_offset; off < lp_offset; off += PAGE_SIZE) {
927 unsigned long mpaddr = io_ports_base + off;
928 unsigned long port;
929 volatile pte_t *pte;
930 pte_t old_pte;
932 port = IO_SPACE_SPARSE_DECODING (off);
933 if (port < fp || port + IO_SPACE_SPARSE_PORTS_PER_PAGE - 1 > lp) {
934 /* Maybe this covers an allowed port. */
935 if (ioports_has_allowed(d, port,
936 port + IO_SPACE_SPARSE_PORTS_PER_PAGE - 1))
937 continue;
938 }
940 pte = lookup_noalloc_domain_pte_none(d, mpaddr);
941 BUG_ON(pte == NULL);
942 BUG_ON(pte_none(*pte));
944 // clear pte
945 old_pte = ptep_get_and_clear(mm, mpaddr, pte);
946 }
947 domain_flush_vtlb_all();
948 return 0;
949 }
951 static void
952 assign_domain_same_page(struct domain *d,
953 unsigned long mpaddr, unsigned long size,
954 unsigned long flags)
955 {
956 //XXX optimization
957 unsigned long end = PAGE_ALIGN(mpaddr + size);
958 for (mpaddr &= PAGE_MASK; mpaddr < end; mpaddr += PAGE_SIZE) {
959 (void)__assign_domain_page(d, mpaddr, mpaddr, flags);
960 }
961 }
963 int
964 efi_mmio(unsigned long physaddr, unsigned long size)
965 {
966 void *efi_map_start, *efi_map_end;
967 u64 efi_desc_size;
968 void* p;
970 efi_map_start = __va(ia64_boot_param->efi_memmap);
971 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
972 efi_desc_size = ia64_boot_param->efi_memdesc_size;
974 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
975 efi_memory_desc_t* md = (efi_memory_desc_t *)p;
976 unsigned long start = md->phys_addr;
977 unsigned long end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
979 if (start <= physaddr && physaddr < end) {
980 if ((physaddr + size) > end) {
981 DPRINTK("%s:%d physaddr 0x%lx size = 0x%lx\n",
982 __func__, __LINE__, physaddr, size);
983 return 0;
984 }
986 // for io space
987 if (md->type == EFI_MEMORY_MAPPED_IO ||
988 md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
989 return 1;
990 }
992 // for runtime
993 // see efi_enter_virtual_mode(void)
994 // in linux/arch/ia64/kernel/efi.c
995 if ((md->attribute & EFI_MEMORY_RUNTIME) &&
996 !(md->attribute & EFI_MEMORY_WB)) {
997 return 1;
998 }
1000 return 0;
1003 if (physaddr < start) {
1004 break;
1008 return 1;
1011 unsigned long
1012 assign_domain_mmio_page(struct domain *d,
1013 unsigned long mpaddr, unsigned long size)
1015 if (size == 0) {
1016 DPRINTK("%s: domain %p mpaddr 0x%lx size = 0x%lx\n",
1017 __func__, d, mpaddr, size);
1019 if (!efi_mmio(mpaddr, size)) {
1020 #ifndef NDEBUG
1021 DPRINTK("%s:%d domain %p mpaddr 0x%lx size = 0x%lx\n",
1022 __func__, __LINE__, d, mpaddr, size);
1023 #endif
1024 return -EINVAL;
1026 assign_domain_same_page(d, mpaddr, size, ASSIGN_writable | ASSIGN_nocache);
1027 return mpaddr;
1030 unsigned long
1031 assign_domain_mach_page(struct domain *d,
1032 unsigned long mpaddr, unsigned long size,
1033 unsigned long flags)
1035 assign_domain_same_page(d, mpaddr, size, flags);
1036 return mpaddr;
1039 // caller must get_page(mfn_to_page(mfn)) before call.
1040 // caller must call set_gpfn_from_mfn() before call if necessary.
1041 // because set_gpfn_from_mfn() result must be visible before pte xchg
1042 // caller must use memory barrier. NOTE: xchg has acquire semantics.
1043 // flags: ASSIGN_xxx
1044 static void
1045 assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
1046 unsigned long mfn, unsigned long flags)
1048 struct mm_struct *mm = &d->arch.mm;
1049 volatile pte_t* pte;
1050 pte_t old_pte;
1051 pte_t npte;
1052 unsigned long prot = flags_to_prot(flags);
1054 pte = lookup_alloc_domain_pte(d, mpaddr);
1056 // update pte
1057 npte = pfn_pte(mfn, __pgprot(prot));
1058 old_pte = ptep_xchg(mm, mpaddr, pte, npte);
1059 if (pte_mem(old_pte)) {
1060 unsigned long old_mfn = pte_pfn(old_pte);
1062 // mfn = old_mfn case can happen when domain maps a granted page
1063 // twice with the same pseudo physial address.
1064 // It's non sense, but allowed.
1065 // __gnttab_map_grant_ref()
1066 // => create_host_mapping()
1067 // => assign_domain_page_replace()
1068 if (mfn != old_mfn) {
1069 struct page_info* old_page = mfn_to_page(old_mfn);
1071 if (page_get_owner(old_page) == d ||
1072 page_get_owner(old_page) == NULL) {
1073 BUG_ON(get_gpfn_from_mfn(old_mfn) != (mpaddr >> PAGE_SHIFT));
1074 set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
1077 domain_page_flush(d, mpaddr, pte, old_pte);
1079 try_to_clear_PGC_allocate(d, old_page);
1080 put_page(old_page);
1083 perfc_incrc(assign_domain_page_replace);
1086 // caller must get_page(new_page) before
1087 // Only steal_page() calls this function.
1088 static int
1089 assign_domain_page_cmpxchg_rel(struct domain* d, unsigned long mpaddr,
1090 struct page_info* old_page,
1091 struct page_info* new_page,
1092 unsigned long flags)
1094 struct mm_struct *mm = &d->arch.mm;
1095 volatile pte_t* pte;
1096 unsigned long old_mfn;
1097 unsigned long old_prot;
1098 pte_t old_pte;
1099 unsigned long new_mfn;
1100 unsigned long new_prot;
1101 pte_t new_pte;
1102 pte_t ret_pte;
1104 pte = lookup_alloc_domain_pte(d, mpaddr);
1106 again:
1107 old_prot = pte_val(*pte) & ~_PAGE_PPN_MASK;
1108 old_mfn = page_to_mfn(old_page);
1109 old_pte = pfn_pte(old_mfn, __pgprot(old_prot));
1110 if (!pte_present(old_pte)) {
1111 DPRINTK("%s: old_pte 0x%lx old_prot 0x%lx old_mfn 0x%lx\n",
1112 __func__, pte_val(old_pte), old_prot, old_mfn);
1113 return -EINVAL;
1116 new_prot = flags_to_prot(flags);
1117 new_mfn = page_to_mfn(new_page);
1118 new_pte = pfn_pte(new_mfn, __pgprot(new_prot));
1120 // update pte
1121 ret_pte = ptep_cmpxchg_rel(mm, mpaddr, pte, old_pte, new_pte);
1122 if (unlikely(pte_val(old_pte) != pte_val(ret_pte))) {
1123 if (pte_pfn(old_pte) == pte_pfn(ret_pte)) {
1124 goto again;
1127 DPRINTK("%s: old_pte 0x%lx old_prot 0x%lx old_mfn 0x%lx "
1128 "ret_pte 0x%lx ret_mfn 0x%lx\n",
1129 __func__,
1130 pte_val(old_pte), old_prot, old_mfn,
1131 pte_val(ret_pte), pte_pfn(ret_pte));
1132 return -EINVAL;
1135 BUG_ON(!pte_mem(old_pte));
1136 BUG_ON(page_get_owner(old_page) != d);
1137 BUG_ON(get_gpfn_from_mfn(old_mfn) != (mpaddr >> PAGE_SHIFT));
1138 BUG_ON(old_mfn == new_mfn);
1140 set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
1142 domain_page_flush(d, mpaddr, pte, old_pte);
1143 put_page(old_page);
1144 perfc_incrc(assign_domain_pge_cmpxchg_rel);
1145 return 0;
1148 static void
1149 zap_domain_page_one(struct domain *d, unsigned long mpaddr, unsigned long mfn)
1151 struct mm_struct *mm = &d->arch.mm;
1152 volatile pte_t *pte;
1153 pte_t old_pte;
1154 struct page_info *page;
1156 pte = lookup_noalloc_domain_pte_none(d, mpaddr);
1157 if (pte == NULL)
1158 return;
1159 if (pte_none(*pte))
1160 return;
1162 if (mfn == INVALID_MFN) {
1163 // clear pte
1164 old_pte = ptep_get_and_clear(mm, mpaddr, pte);
1165 mfn = pte_pfn(old_pte);
1166 } else {
1167 unsigned long old_arflags;
1168 pte_t new_pte;
1169 pte_t ret_pte;
1171 again:
1172 // memory_exchange() calls guest_physmap_remove_page() with
1173 // a stealed page. i.e. page owner = NULL.
1174 BUG_ON(page_get_owner(mfn_to_page(mfn)) != d &&
1175 page_get_owner(mfn_to_page(mfn)) != NULL);
1176 old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;
1177 old_pte = pfn_pte(mfn, __pgprot(old_arflags));
1178 new_pte = __pte(0);
1180 // update pte
1181 ret_pte = ptep_cmpxchg_rel(mm, mpaddr, pte, old_pte, new_pte);
1182 if (unlikely(pte_val(old_pte) != pte_val(ret_pte))) {
1183 if (pte_pfn(old_pte) == pte_pfn(ret_pte)) {
1184 goto again;
1187 DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx mfn 0x%lx "
1188 "ret_pte 0x%lx ret_mfn 0x%lx\n",
1189 __func__,
1190 pte_val(old_pte), old_arflags, mfn,
1191 pte_val(ret_pte), pte_pfn(ret_pte));
1192 return;
1194 BUG_ON(mfn != pte_pfn(ret_pte));
1197 page = mfn_to_page(mfn);
1198 BUG_ON((page->count_info & PGC_count_mask) == 0);
1200 if (page_get_owner(page) == d ||
1201 page_get_owner(page) == NULL) {
1202 // exchange_memory() calls
1203 // steal_page()
1204 // page owner is set to NULL
1205 // guest_physmap_remove_page()
1206 // zap_domain_page_one()
1207 BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT));
1208 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
1211 domain_page_flush(d, mpaddr, pte, old_pte);
1213 if (page_get_owner(page) != NULL) {
1214 try_to_clear_PGC_allocate(d, page);
1216 put_page(page);
1217 perfc_incrc(zap_dcomain_page_one);
1220 unsigned long
1221 dom0vp_zap_physmap(struct domain *d, unsigned long gpfn,
1222 unsigned int extent_order)
1224 if (extent_order != 0) {
1225 //XXX
1226 return -ENOSYS;
1229 zap_domain_page_one(d, gpfn << PAGE_SHIFT, INVALID_MFN);
1230 perfc_incrc(dom0vp_zap_physmap);
1231 return 0;
1234 unsigned long
1235 dom0vp_add_physmap(struct domain* d, unsigned long gpfn, unsigned long mfn,
1236 unsigned long flags, domid_t domid)
1238 int error = 0;
1239 struct domain* rd;
1241 /* Not allowed by a domain. */
1242 if (flags & ASSIGN_nocache)
1243 return -EINVAL;
1245 rd = find_domain_by_id(domid);
1246 if (unlikely(rd == NULL)) {
1247 switch (domid) {
1248 case DOMID_XEN:
1249 rd = dom_xen;
1250 break;
1251 case DOMID_IO:
1252 rd = dom_io;
1253 break;
1254 default:
1255 DPRINTK("d 0x%p domid %d "
1256 "pgfn 0x%lx mfn 0x%lx flags 0x%lx domid %d\n",
1257 d, d->domain_id, gpfn, mfn, flags, domid);
1258 return -ESRCH;
1260 BUG_ON(rd == NULL);
1261 get_knownalive_domain(rd);
1264 if (unlikely(rd == d || !mfn_valid(mfn))) {
1265 error = -EINVAL;
1266 goto out1;
1268 if (unlikely(get_page(mfn_to_page(mfn), rd) == 0)) {
1269 error = -EINVAL;
1270 goto out1;
1272 BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
1273 get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
1274 assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, flags);
1275 //don't update p2m table because this page belongs to rd, not d.
1276 perfc_incrc(dom0vp_add_physmap);
1277 out1:
1278 put_domain(rd);
1279 return error;
1282 #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
1283 static struct page_info* p2m_pte_zero_page = NULL;
1285 void
1286 expose_p2m_init(void)
1288 pte_t* pte;
1290 pte = pte_alloc_one_kernel(NULL, 0);
1291 BUG_ON(pte == NULL);
1292 smp_mb();// make contents of the page visible.
1293 p2m_pte_zero_page = virt_to_page(pte);
1296 static int
1297 expose_p2m_page(struct domain* d, unsigned long mpaddr, struct page_info* page)
1299 // we can't get_page(page) here.
1300 // pte page is allocated form xen heap.(see pte_alloc_one_kernel().)
1301 // so that the page has NULL page owner and it's reference count
1302 // is useless.
1303 // see also relinquish_pte()'s page_get_owner() == NULL check.
1304 BUG_ON(page_get_owner(page) != NULL);
1306 return __assign_domain_page(d, mpaddr, page_to_maddr(page),
1307 ASSIGN_readonly);
1310 // It is possible to optimize loop, But this isn't performance critical.
1311 unsigned long
1312 dom0vp_expose_p2m(struct domain* d,
1313 unsigned long conv_start_gpfn,
1314 unsigned long assign_start_gpfn,
1315 unsigned long expose_size, unsigned long granule_pfn)
1317 unsigned long expose_num_pfn = expose_size >> PAGE_SHIFT;
1318 unsigned long i;
1319 volatile pte_t* conv_pte;
1320 volatile pte_t* assign_pte;
1322 if ((expose_size % PAGE_SIZE) != 0 ||
1323 (granule_pfn % PTRS_PER_PTE) != 0 ||
1324 (expose_num_pfn % PTRS_PER_PTE) != 0 ||
1325 (conv_start_gpfn % granule_pfn) != 0 ||
1326 (assign_start_gpfn % granule_pfn) != 0 ||
1327 (expose_num_pfn % granule_pfn) != 0) {
1328 DPRINTK("%s conv_start_gpfn 0x%016lx assign_start_gpfn 0x%016lx "
1329 "expose_size 0x%016lx granulte_pfn 0x%016lx\n", __func__,
1330 conv_start_gpfn, assign_start_gpfn, expose_size, granule_pfn);
1331 return -EINVAL;
1334 if (granule_pfn != PTRS_PER_PTE) {
1335 DPRINTK("%s granule_pfn 0x%016lx PTRS_PER_PTE 0x%016lx\n",
1336 __func__, granule_pfn, PTRS_PER_PTE);
1337 return -ENOSYS;
1340 // allocate pgd, pmd.
1341 i = conv_start_gpfn;
1342 while (i < expose_num_pfn) {
1343 conv_pte = lookup_noalloc_domain_pte(d, (conv_start_gpfn + i) <<
1344 PAGE_SHIFT);
1345 if (conv_pte == NULL) {
1346 i++;
1347 continue;
1350 assign_pte = lookup_alloc_domain_pte(d, (assign_start_gpfn <<
1351 PAGE_SHIFT) + i * sizeof(pte_t));
1352 if (assign_pte == NULL) {
1353 DPRINTK("%s failed to allocate pte page\n", __func__);
1354 return -ENOMEM;
1357 // skip to next pte page
1358 i += PTRS_PER_PTE;
1359 i &= ~(PTRS_PER_PTE - 1);
1362 // expose pte page
1363 i = 0;
1364 while (i < expose_num_pfn) {
1365 conv_pte = lookup_noalloc_domain_pte(d, (conv_start_gpfn + i) <<
1366 PAGE_SHIFT);
1367 if (conv_pte == NULL) {
1368 i++;
1369 continue;
1372 if (expose_p2m_page(d, (assign_start_gpfn << PAGE_SHIFT) +
1373 i * sizeof(pte_t), virt_to_page(conv_pte)) < 0) {
1374 DPRINTK("%s failed to assign page\n", __func__);
1375 return -EAGAIN;
1378 // skip to next pte page
1379 i += PTRS_PER_PTE;
1380 i &= ~(PTRS_PER_PTE - 1);
1383 // expose p2m_pte_zero_page
1384 for (i = 0; i < expose_num_pfn / PTRS_PER_PTE + 1; i++) {
1385 assign_pte = lookup_noalloc_domain_pte(d, (assign_start_gpfn + i) <<
1386 PAGE_SHIFT);
1387 BUG_ON(assign_pte == NULL);
1388 if (pte_present(*assign_pte)) {
1389 continue;
1391 if (expose_p2m_page(d, (assign_start_gpfn + i) << PAGE_SHIFT,
1392 p2m_pte_zero_page) < 0) {
1393 DPRINTK("%s failed to assign zero-pte page\n", __func__);
1394 return -EAGAIN;
1398 return 0;
1400 #endif
1402 // grant table host mapping
1403 // mpaddr: host_addr: pseudo physical address
1404 // mfn: frame: machine page frame
1405 // flags: GNTMAP_readonly | GNTMAP_application_map | GNTMAP_contains_pte
1406 int
1407 create_grant_host_mapping(unsigned long gpaddr,
1408 unsigned long mfn, unsigned int flags)
1410 struct domain* d = current->domain;
1411 struct page_info* page;
1412 int ret;
1414 if (flags & (GNTMAP_device_map |
1415 GNTMAP_application_map | GNTMAP_contains_pte)) {
1416 DPRINTK("%s: flags 0x%x\n", __func__, flags);
1417 return GNTST_general_error;
1420 BUG_ON(!mfn_valid(mfn));
1421 page = mfn_to_page(mfn);
1422 ret = get_page(page, page_get_owner(page));
1423 BUG_ON(ret == 0);
1424 BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
1425 get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
1426 assign_domain_page_replace(d, gpaddr, mfn,
1427 #ifdef CONFIG_XEN_IA64_TLB_TRACK
1428 ASSIGN_tlb_track |
1429 #endif
1430 ((flags & GNTMAP_readonly) ?
1431 ASSIGN_readonly : ASSIGN_writable));
1432 perfc_incrc(create_grant_host_mapping);
1433 return GNTST_okay;
1436 // grant table host unmapping
1437 int
1438 destroy_grant_host_mapping(unsigned long gpaddr,
1439 unsigned long mfn, unsigned int flags)
1441 struct domain* d = current->domain;
1442 volatile pte_t* pte;
1443 unsigned long cur_arflags;
1444 pte_t cur_pte;
1445 pte_t new_pte;
1446 pte_t old_pte;
1447 struct page_info* page;
1449 if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) {
1450 DPRINTK("%s: flags 0x%x\n", __func__, flags);
1451 return GNTST_general_error;
1454 pte = lookup_noalloc_domain_pte(d, gpaddr);
1455 if (pte == NULL) {
1456 DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx\n", __func__, gpaddr, mfn);
1457 return GNTST_general_error;
1460 again:
1461 cur_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;
1462 cur_pte = pfn_pte(mfn, __pgprot(cur_arflags));
1463 if (!pte_present(cur_pte)) {
1464 DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx\n",
1465 __func__, gpaddr, mfn, pte_val(cur_pte));
1466 return GNTST_general_error;
1468 new_pte = __pte(0);
1470 old_pte = ptep_cmpxchg_rel(&d->arch.mm, gpaddr, pte, cur_pte, new_pte);
1471 if (unlikely(!pte_present(old_pte))) {
1472 DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx old_pte 0x%lx\n",
1473 __func__, gpaddr, mfn, pte_val(cur_pte), pte_val(old_pte));
1474 return GNTST_general_error;
1476 if (unlikely(pte_val(cur_pte) != pte_val(old_pte))) {
1477 if (pte_pfn(old_pte) == mfn) {
1478 goto again;
1480 DPRINTK("%s gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx old_pte 0x%lx\n",
1481 __func__, gpaddr, mfn, pte_val(cur_pte), pte_val(old_pte));
1482 return GNTST_general_error;
1484 BUG_ON(pte_pfn(old_pte) != mfn);
1486 domain_page_flush(d, gpaddr, pte, old_pte);
1488 page = mfn_to_page(mfn);
1489 BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) is not needed.
1490 put_page(page);
1492 perfc_incrc(destroy_grant_host_mapping);
1493 return GNTST_okay;
1496 // heavily depends on the struct page layout.
1497 // gnttab_transfer() calls steal_page() with memflags = 0
1498 // For grant table transfer, we must fill the page.
1499 // memory_exchange() calls steal_page() with memflags = MEMF_no_refcount
1500 // For memory exchange, we don't have to fill the page because
1501 // memory_exchange() does it.
1502 int
1503 steal_page(struct domain *d, struct page_info *page, unsigned int memflags)
1505 #if 0 /* if big endian */
1506 # error "implement big endian version of steal_page()"
1507 #endif
1508 u32 _d, _nd;
1509 u64 x, nx, y;
1511 if (page_get_owner(page) != d) {
1512 DPRINTK("%s d 0x%p owner 0x%p\n", __func__, d, page_get_owner(page));
1513 return -1;
1516 if (!(memflags & MEMF_no_refcount)) {
1517 unsigned long gpfn;
1518 struct page_info *new;
1519 unsigned long new_mfn;
1520 int ret;
1522 new = alloc_domheap_page(d);
1523 if (new == NULL) {
1524 DPRINTK("alloc_domheap_page() failed\n");
1525 return -1;
1527 // zero out pages for security reasons
1528 clear_page(page_to_virt(new));
1529 // assign_domain_page_cmpxchg_rel() has release semantics
1530 // so smp_mb() isn't needed.
1532 ret = get_page(new, d);
1533 BUG_ON(ret == 0);
1535 gpfn = get_gpfn_from_mfn(page_to_mfn(page));
1536 if (gpfn == INVALID_M2P_ENTRY) {
1537 free_domheap_page(new);
1538 return -1;
1540 new_mfn = page_to_mfn(new);
1541 set_gpfn_from_mfn(new_mfn, gpfn);
1542 // smp_mb() isn't needed because assign_domain_pge_cmpxchg_rel()
1543 // has release semantics.
1545 ret = assign_domain_page_cmpxchg_rel(d, gpfn << PAGE_SHIFT, page, new,
1546 ASSIGN_writable);
1547 if (ret < 0) {
1548 DPRINTK("assign_domain_page_cmpxchg_rel failed %d\n", ret);
1549 set_gpfn_from_mfn(new_mfn, INVALID_M2P_ENTRY);
1550 free_domheap_page(new);
1551 return -1;
1553 perfc_incrc(steal_page_refcount);
1556 spin_lock(&d->page_alloc_lock);
1558 /*
1559 * The tricky bit: atomically release ownership while there is just one
1560 * benign reference to the page (PGC_allocated). If that reference
1561 * disappears then the deallocation routine will safely spin.
1562 */
1563 _d = pickle_domptr(d);
1564 y = *((u64*)&page->count_info);
1565 do {
1566 x = y;
1567 nx = x & 0xffffffff;
1568 // page->count_info: untouched
1569 // page->u.inused._domain = 0;
1570 _nd = x >> 32;
1572 if (unlikely(!(memflags & MEMF_no_refcount) &&
1573 ((x & (PGC_count_mask | PGC_allocated)) !=
1574 (1 | PGC_allocated))) ||
1576 // when MEMF_no_refcount, page isn't de-assigned from
1577 // this domain yet. So count_info = 2
1578 unlikely((memflags & MEMF_no_refcount) &&
1579 ((x & (PGC_count_mask | PGC_allocated)) !=
1580 (2 | PGC_allocated))) ||
1582 unlikely(_nd != _d)) {
1583 struct domain* nd = unpickle_domptr(_nd);
1584 if (nd == NULL) {
1585 DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
1586 "sd=%p 0x%x,"
1587 " caf=%016lx, taf=%" PRtype_info
1588 " memflags 0x%x\n",
1589 (void *) page_to_mfn(page),
1590 d, d->domain_id, _d,
1591 nd, _nd,
1592 x,
1593 page->u.inuse.type_info,
1594 memflags);
1595 } else {
1596 DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, "
1597 "sd=%p(%u) 0x%x,"
1598 " caf=%016lx, taf=%" PRtype_info
1599 " memflags 0x%x\n",
1600 (void *) page_to_mfn(page),
1601 d, d->domain_id, _d,
1602 nd, nd->domain_id, _nd,
1603 x,
1604 page->u.inuse.type_info,
1605 memflags);
1607 spin_unlock(&d->page_alloc_lock);
1608 return -1;
1611 y = cmpxchg((u64*)&page->count_info, x, nx);
1612 } while (unlikely(y != x));
1614 /*
1615 * Unlink from 'd'. At least one reference remains (now anonymous), so
1616 * noone else is spinning to try to delete this page from 'd'.
1617 */
1618 if ( !(memflags & MEMF_no_refcount) )
1619 d->tot_pages--;
1620 list_del(&page->list);
1622 spin_unlock(&d->page_alloc_lock);
1623 perfc_incrc(steal_page);
1624 return 0;
1627 void
1628 guest_physmap_add_page(struct domain *d, unsigned long gpfn,
1629 unsigned long mfn)
1631 int ret;
1633 BUG_ON(!mfn_valid(mfn));
1634 ret = get_page(mfn_to_page(mfn), d);
1635 BUG_ON(ret == 0);
1636 set_gpfn_from_mfn(mfn, gpfn);
1637 smp_mb();
1638 assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, ASSIGN_writable);
1640 //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> PAGE_SHIFT));
1642 perfc_incrc(guest_physmap_add_page);
1645 void
1646 guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
1647 unsigned long mfn)
1649 BUG_ON(mfn == 0);//XXX
1650 zap_domain_page_one(d, gpfn << PAGE_SHIFT, mfn);
1651 perfc_incrc(guest_physmap_remove_page);
1654 //XXX sledgehammer.
1655 // flush finer range.
1656 static void
1657 domain_page_flush(struct domain* d, unsigned long mpaddr,
1658 volatile pte_t* ptep, pte_t old_pte)
1660 #ifdef CONFIG_XEN_IA64_TLB_TRACK
1661 struct tlb_track_entry* entry;
1662 #endif
1664 if (shadow_mode_enabled(d))
1665 shadow_mark_page_dirty(d, mpaddr >> PAGE_SHIFT);
1667 #ifndef CONFIG_XEN_IA64_TLB_TRACK
1668 domain_flush_vtlb_all();
1669 #else
1670 switch (tlb_track_search_and_remove(d->arch.tlb_track,
1671 ptep, old_pte, &entry)) {
1672 case TLB_TRACK_NOT_TRACKED:
1673 // DPRINTK("%s TLB_TRACK_NOT_TRACKED\n", __func__);
1674 domain_flush_vtlb_all();
1675 break;
1676 case TLB_TRACK_NOT_FOUND:
1677 /* do nothing */
1678 // DPRINTK("%s TLB_TRACK_NOT_FOUND\n", __func__);
1679 break;
1680 case TLB_TRACK_FOUND:
1681 // DPRINTK("%s TLB_TRACK_FOUND\n", __func__);
1682 domain_flush_vtlb_track_entry(d, entry);
1683 tlb_track_free_entry(d->arch.tlb_track, entry);
1684 break;
1685 case TLB_TRACK_MANY:
1686 DPRINTK("%s TLB_TRACK_MANY\n", __func__);
1687 domain_flush_vtlb_all();
1688 break;
1689 case TLB_TRACK_AGAIN:
1690 DPRINTK("%s TLB_TRACK_AGAIN\n", __func__);
1691 BUG();
1692 break;
1694 #endif
1695 perfc_incrc(domain_page_flush);
1698 int
1699 domain_page_mapped(struct domain* d, unsigned long mpaddr)
1701 volatile pte_t * pte;
1703 pte = lookup_noalloc_domain_pte(d, mpaddr);
1704 if(pte != NULL && !pte_none(*pte))
1705 return 1;
1706 return 0;
1709 /* Flush cache of domain d. */
1710 void domain_cache_flush (struct domain *d, int sync_only)
1712 struct mm_struct *mm = &d->arch.mm;
1713 pgd_t *pgd = mm->pgd;
1714 unsigned long maddr;
1715 int i,j,k, l;
1716 int nbr_page = 0;
1717 void (*flush_func)(unsigned long start, unsigned long end);
1718 extern void flush_dcache_range (unsigned long, unsigned long);
1720 if (sync_only)
1721 flush_func = &flush_icache_range;
1722 else
1723 flush_func = &flush_dcache_range;
1725 for (i = 0; i < PTRS_PER_PGD; pgd++, i++) {
1726 pud_t *pud;
1727 if (!pgd_present(*pgd))
1728 continue;
1729 pud = pud_offset(pgd, 0);
1730 for (j = 0; j < PTRS_PER_PUD; pud++, j++) {
1731 pmd_t *pmd;
1732 if (!pud_present(*pud))
1733 continue;
1734 pmd = pmd_offset(pud, 0);
1735 for (k = 0; k < PTRS_PER_PMD; pmd++, k++) {
1736 pte_t *pte;
1737 if (!pmd_present(*pmd))
1738 continue;
1739 pte = pte_offset_map(pmd, 0);
1740 for (l = 0; l < PTRS_PER_PTE; pte++, l++) {
1741 if (!pte_present(*pte))
1742 continue;
1743 /* Convert PTE to maddr. */
1744 maddr = __va_ul (pte_val(*pte)
1745 & _PAGE_PPN_MASK);
1746 (*flush_func)(maddr, maddr+ PAGE_SIZE);
1747 nbr_page++;
1752 //printf ("domain_cache_flush: %d %d pages\n", d->domain_id, nbr_page);
1755 #ifdef VERBOSE
1756 #define MEM_LOG(_f, _a...) \
1757 printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
1758 current->domain->domain_id , __LINE__ , ## _a )
1759 #else
1760 #define MEM_LOG(_f, _a...) ((void)0)
1761 #endif
1763 static void free_page_type(struct page_info *page, u32 type)
1767 static int alloc_page_type(struct page_info *page, u32 type)
1769 return 1;
1772 unsigned long __get_free_pages(unsigned int mask, unsigned int order)
1774 void *p = alloc_xenheap_pages(order);
1776 memset(p,0,PAGE_SIZE<<order);
1777 return (unsigned long)p;
1780 void __free_pages(struct page_info *page, unsigned int order)
1782 if (order) BUG();
1783 free_xenheap_page(page);
1786 void *pgtable_quicklist_alloc(void)
1788 void *p;
1789 p = alloc_xenheap_pages(0);
1790 if (p)
1791 clear_page(p);
1792 return p;
1795 void pgtable_quicklist_free(void *pgtable_entry)
1797 free_xenheap_page(pgtable_entry);
1800 void put_page_type(struct page_info *page)
1802 u32 nx, x, y = page->u.inuse.type_info;
1804 again:
1805 do {
1806 x = y;
1807 nx = x - 1;
1809 ASSERT((x & PGT_count_mask) != 0);
1811 /*
1812 * The page should always be validated while a reference is held. The
1813 * exception is during domain destruction, when we forcibly invalidate
1814 * page-table pages if we detect a referential loop.
1815 * See domain.c:relinquish_list().
1816 */
1817 ASSERT((x & PGT_validated) ||
1818 test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
1820 if ( unlikely((nx & PGT_count_mask) == 0) )
1822 /* Record TLB information for flush later. Races are harmless. */
1823 page->tlbflush_timestamp = tlbflush_current_time();
1825 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
1826 likely(nx & PGT_validated) )
1828 /*
1829 * Page-table pages must be unvalidated when count is zero. The
1830 * 'free' is safe because the refcnt is non-zero and validated
1831 * bit is clear => other ops will spin or fail.
1832 */
1833 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
1834 x & ~PGT_validated)) != x) )
1835 goto again;
1836 /* We cleared the 'valid bit' so we do the clean up. */
1837 free_page_type(page, x);
1838 /* Carry on, but with the 'valid bit' now clear. */
1839 x &= ~PGT_validated;
1840 nx &= ~PGT_validated;
1844 while ( unlikely((y = cmpxchg_rel(&page->u.inuse.type_info, x, nx)) != x) );
1848 int get_page_type(struct page_info *page, u32 type)
1850 u32 nx, x, y = page->u.inuse.type_info;
1852 ASSERT(!(type & ~PGT_type_mask));
1854 again:
1855 do {
1856 x = y;
1857 nx = x + 1;
1858 if ( unlikely((nx & PGT_count_mask) == 0) )
1860 MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
1861 return 0;
1863 else if ( unlikely((x & PGT_count_mask) == 0) )
1865 if ( (x & PGT_type_mask) != type )
1867 /*
1868 * On type change we check to flush stale TLB entries. This
1869 * may be unnecessary (e.g., page was GDT/LDT) but those
1870 * circumstances should be very rare.
1871 */
1872 cpumask_t mask =
1873 page_get_owner(page)->domain_dirty_cpumask;
1874 tlbflush_filter(mask, page->tlbflush_timestamp);
1876 if ( unlikely(!cpus_empty(mask)) )
1878 perfc_incrc(need_flush_tlb_flush);
1879 flush_tlb_mask(mask);
1882 /* We lose existing type, back pointer, and validity. */
1883 nx &= ~(PGT_type_mask | PGT_validated);
1884 nx |= type;
1886 /* No special validation needed for writable pages. */
1887 /* Page tables and GDT/LDT need to be scanned for validity. */
1888 if ( type == PGT_writable_page )
1889 nx |= PGT_validated;
1892 else if ( unlikely((x & PGT_type_mask) != type) )
1894 if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
1895 (type != PGT_l1_page_table) )
1896 MEM_LOG("Bad type (saw %08x != exp %08x) "
1897 "for mfn %016lx (pfn %016lx)",
1898 x, type, page_to_mfn(page),
1899 get_gpfn_from_mfn(page_to_mfn(page)));
1900 return 0;
1902 else if ( unlikely(!(x & PGT_validated)) )
1904 /* Someone else is updating validation of this page. Wait... */
1905 while ( (y = page->u.inuse.type_info) == x )
1906 cpu_relax();
1907 goto again;
1910 while ( unlikely((y = cmpxchg_acq(&page->u.inuse.type_info, x, nx)) != x) );
1912 if ( unlikely(!(nx & PGT_validated)) )
1914 /* Try to validate page type; drop the new reference on failure. */
1915 if ( unlikely(!alloc_page_type(page, type)) )
1917 MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %08x"
1918 ": caf=%08x taf=%" PRtype_info,
1919 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
1920 type, page->count_info, page->u.inuse.type_info);
1921 /* Noone else can get a reference. We hold the only ref. */
1922 page->u.inuse.type_info = 0;
1923 return 0;
1926 /* Noone else is updating simultaneously. */
1927 __set_bit(_PGT_validated, &page->u.inuse.type_info);
1930 return 1;
1933 int memory_is_conventional_ram(paddr_t p)
1935 return (efi_mem_type(p) == EFI_CONVENTIONAL_MEMORY);
1939 long
1940 arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
1942 switch (op) {
1943 case XENMEM_add_to_physmap:
1945 struct xen_add_to_physmap xatp;
1946 unsigned long prev_mfn, mfn = 0, gpfn;
1947 struct domain *d;
1949 if (copy_from_guest(&xatp, arg, 1))
1950 return -EFAULT;
1952 if (xatp.domid == DOMID_SELF) {
1953 d = current->domain;
1954 get_knownalive_domain(d);
1956 else if (!IS_PRIV(current->domain))
1957 return -EPERM;
1958 else if ((d = find_domain_by_id(xatp.domid)) == NULL)
1959 return -ESRCH;
1961 /* This hypercall is used for VT-i domain only */
1962 if (!VMX_DOMAIN(d->vcpu[0])) {
1963 put_domain(d);
1964 return -ENOSYS;
1967 switch (xatp.space) {
1968 case XENMAPSPACE_shared_info:
1969 if (xatp.idx == 0)
1970 mfn = virt_to_mfn(d->shared_info);
1971 break;
1972 case XENMAPSPACE_grant_table:
1973 if (xatp.idx < NR_GRANT_FRAMES)
1974 mfn = virt_to_mfn(d->grant_table->shared) + xatp.idx;
1975 break;
1976 default:
1977 break;
1980 LOCK_BIGLOCK(d);
1982 /* Remove previously mapped page if it was present. */
1983 prev_mfn = gmfn_to_mfn(d, xatp.gpfn);
1984 if (prev_mfn && mfn_valid(prev_mfn)) {
1985 if (IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)))
1986 /* Xen heap frames are simply unhooked from this phys slot. */
1987 guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
1988 else
1989 /* Normal domain memory is freed, to avoid leaking memory. */
1990 guest_remove_page(d, xatp.gpfn);
1993 /* Unmap from old location, if any. */
1994 gpfn = get_gpfn_from_mfn(mfn);
1995 if (gpfn != INVALID_M2P_ENTRY)
1996 guest_physmap_remove_page(d, gpfn, mfn);
1998 /* Map at new location. */
1999 guest_physmap_add_page(d, xatp.gpfn, mfn);
2001 UNLOCK_BIGLOCK(d);
2003 put_domain(d);
2005 break;
2008 default:
2009 return -ENOSYS;
2012 return 0;
2015 /*
2016 * Local variables:
2017 * mode: C
2018 * c-set-style: "BSD"
2019 * c-basic-offset: 4
2020 * tab-width: 4
2021 * indent-tabs-mode: nil
2022 * End:
2023 */