ia64/xen-unstable

view xen/arch/x86/shadow_public.c @ 6538:84ee014ebd41

Merge xen-vtx-unstable.hg
author adsharma@los-vmm.sc.intel.com
date Wed Aug 17 12:34:38 2005 -0800 (2005-08-17)
parents 23979fb12c49 69bf77e1b102
children 99914b54f7bf
line source
1 /******************************************************************************
2 * arch/x86/shadow_public.c
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
23 #include <xen/config.h>
24 #include <xen/types.h>
25 #include <xen/mm.h>
26 #include <xen/domain_page.h>
27 #include <asm/shadow.h>
28 #include <asm/page.h>
29 #include <xen/event.h>
30 #include <xen/sched.h>
31 #include <xen/trace.h>
33 #if CONFIG_PAGING_LEVELS >= 3
34 #include <asm/shadow_64.h>
36 extern struct shadow_ops MODE_F_HANDLER;
37 #endif
39 extern struct shadow_ops MODE_A_HANDLER;
41 /****************************************************************************/
42 /************* export interface functions ***********************************/
43 /****************************************************************************/
46 int shadow_set_guest_paging_levels(struct domain *d, int levels)
47 {
48 shadow_lock(d);
50 switch(levels) {
51 #if CONFIG_PAGING_LEVELS >= 4
52 case 4:
53 if ( d->arch.ops != &MODE_F_HANDLER )
54 d->arch.ops = &MODE_F_HANDLER;
55 shadow_unlock(d);
56 return 1;
57 #endif
58 case 3:
59 case 2:
60 if ( d->arch.ops != &MODE_A_HANDLER )
61 d->arch.ops = &MODE_A_HANDLER;
62 shadow_unlock(d);
63 return 1;
64 default:
65 shadow_unlock(d);
66 return 0;
67 }
68 }
70 void shadow_invlpg(struct vcpu *v, unsigned long va)
71 {
72 struct domain *d = current->domain;
73 d->arch.ops->invlpg(v, va);
74 }
76 int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
77 {
78 struct domain *d = current->domain;
79 return d->arch.ops->fault(va, regs);
80 }
82 void __update_pagetables(struct vcpu *v)
83 {
84 struct domain *d = v->domain;
85 d->arch.ops->update_pagetables(v);
86 }
88 void __shadow_sync_all(struct domain *d)
89 {
90 d->arch.ops->sync_all(d);
91 }
93 int shadow_remove_all_write_access(
94 struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
95 {
96 return d->arch.ops->remove_all_write_access(d, readonly_gpfn, readonly_gmfn);
97 }
99 int shadow_do_update_va_mapping(unsigned long va,
100 l1_pgentry_t val,
101 struct vcpu *v)
102 {
103 struct domain *d = v->domain;
104 return d->arch.ops->do_update_va_mapping(va, val, v);
105 }
107 struct out_of_sync_entry *
108 shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
109 unsigned long mfn)
110 {
111 struct domain *d = v->domain;
112 return d->arch.ops->mark_mfn_out_of_sync(v, gpfn, mfn);
113 }
115 /*
116 * Returns 1 if va's shadow mapping is out-of-sync.
117 * Returns 0 otherwise.
118 */
119 int __shadow_out_of_sync(struct vcpu *v, unsigned long va)
120 {
121 struct domain *d = v->domain;
122 return d->arch.ops->is_out_of_sync(v, va);
123 }
125 /****************************************************************************/
126 /****************************************************************************/
127 #if CONFIG_PAGING_LEVELS >= 4
128 /*
129 * Convert PAE 3-level page-table to 4-level page-table
130 */
131 #define PDP_ENTRIES 4
132 static pagetable_t page_table_convert(struct domain *d)
133 {
134 struct pfn_info *l4page, *l3page;
135 l4_pgentry_t *l4;
136 l3_pgentry_t *l3, *pae_l3;
137 int i;
139 l4page = alloc_domheap_page(NULL);
140 if (l4page == NULL)
141 domain_crash();
142 l4 = map_domain_page(page_to_pfn(l4page));
143 memset(l4, 0, PAGE_SIZE);
145 l3page = alloc_domheap_page(NULL);
146 if (l3page == NULL)
147 domain_crash();
148 l3 = map_domain_page(page_to_pfn(l3page));
149 memset(l3, 0, PAGE_SIZE);
151 l4[0] = l4e_from_page(l3page, __PAGE_HYPERVISOR);
152 pae_l3 = map_domain_page(pagetable_get_pfn(d->arch.phys_table));
154 for (i = 0; i < PDP_ENTRIES; i++) {
155 l3[i] = pae_l3[i];
156 l3e_add_flags(l3[i], 0x67);
157 }
159 unmap_domain_page(l4);
160 unmap_domain_page(l3);
162 return mk_pagetable(page_to_phys(l4page));
163 }
165 static void alloc_monitor_pagetable(struct vcpu *v)
166 {
167 unsigned long mmfn;
168 l4_pgentry_t *mpl4e;
169 struct pfn_info *mmfn_info;
170 struct domain *d = v->domain;
171 pagetable_t phys_table;
173 ASSERT(!pagetable_get_paddr(v->arch.monitor_table)); /* we should only get called once */
175 mmfn_info = alloc_domheap_page(NULL);
176 ASSERT( mmfn_info );
178 mmfn = (unsigned long) (mmfn_info - frame_table);
179 mpl4e = (l4_pgentry_t *) map_domain_page(mmfn);
180 memcpy(mpl4e, &idle_pg_table[0], PAGE_SIZE);
181 mpl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
182 l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
183 /* map the phys_to_machine map into the per domain Read-Only MPT space */
184 phys_table = page_table_convert(d);
186 mpl4e[l4_table_offset(RO_MPT_VIRT_START)] =
187 l4e_from_paddr(pagetable_get_paddr(phys_table),
188 __PAGE_HYPERVISOR);
189 v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
190 v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e;
191 }
193 static void inline
194 free_shadow_fl1_table(struct domain *d, unsigned long smfn)
195 {
196 l1_pgentry_t *pl1e = map_domain_page(smfn);
197 int i;
199 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
200 put_page_from_l1e(pl1e[i], d);
201 }
203 /*
204 * Free l2, l3, l4 shadow tables
205 */
206 static void inline
207 free_shadow_tables(struct domain *d, unsigned long smfn, u32 level)
208 {
209 pgentry_64_t *ple = map_domain_page(smfn);
210 int i, external = shadow_mode_external(d);
212 for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
213 if ( external || is_guest_l4_slot(i) )
214 if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
215 put_shadow_ref(entry_get_pfn(ple[i]));
217 unmap_domain_page(ple);
218 }
220 void free_monitor_pagetable(struct vcpu *v)
221 {
222 unsigned long mfn;
224 // ASSERT( pagetable_val(v->arch.monitor_table) );
225 /*
226 * free monitor_table.
227 */
228 //mfn = (pagetable_val(v->arch.monitor_table)) >> PAGE_SHIFT;
229 mfn = pagetable_get_pfn(v->arch.monitor_table);
230 unmap_domain_page(v->arch.monitor_vtable);
231 free_domheap_page(&frame_table[mfn]);
232 v->arch.monitor_table = mk_pagetable(0);
233 v->arch.monitor_vtable = 0;
234 }
236 #elif CONFIG_PAGING_LEVELS == 3
238 static void alloc_monitor_pagetable(struct vcpu *v)
239 {
240 BUG(); /* PAE not implemented yet */
241 }
243 void free_monitor_pagetable(struct vcpu *v)
244 {
245 BUG(); /* PAE not implemented yet */
246 }
248 #elif CONFIG_PAGING_LEVELS == 2
250 static void alloc_monitor_pagetable(struct vcpu *v)
251 {
252 unsigned long mmfn;
253 l2_pgentry_t *mpl2e;
254 struct pfn_info *mmfn_info;
255 struct domain *d = v->domain;
257 ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
259 mmfn_info = alloc_domheap_page(NULL);
260 ASSERT(mmfn_info != NULL);
262 mmfn = page_to_pfn(mmfn_info);
263 mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
264 memset(mpl2e, 0, PAGE_SIZE);
266 #ifdef __i386__ /* XXX screws x86/64 build */
267 memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
268 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
269 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
270 #endif
272 mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
273 l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
274 __PAGE_HYPERVISOR);
276 // map the phys_to_machine map into the Read-Only MPT space for this domain
277 mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
278 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
279 __PAGE_HYPERVISOR);
281 // Don't (yet) have mappings for these...
282 // Don't want to accidentally see the idle_pg_table's linear mapping.
283 //
284 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
285 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
287 v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
288 v->arch.monitor_vtable = mpl2e;
289 }
291 /*
292 * Free the pages for monitor_table and hl2_table
293 */
294 void free_monitor_pagetable(struct vcpu *v)
295 {
296 l2_pgentry_t *mpl2e, hl2e, sl2e;
297 unsigned long mfn;
299 ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
301 mpl2e = v->arch.monitor_vtable;
303 /*
304 * First get the mfn for hl2_table by looking at monitor_table
305 */
306 hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
307 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
308 {
309 mfn = l2e_get_pfn(hl2e);
310 ASSERT(mfn);
311 put_shadow_ref(mfn);
312 }
314 sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
315 if ( l2e_get_flags(sl2e) & _PAGE_PRESENT )
316 {
317 mfn = l2e_get_pfn(sl2e);
318 ASSERT(mfn);
319 put_shadow_ref(mfn);
320 }
322 unmap_domain_page(mpl2e);
324 /*
325 * Then free monitor_table.
326 */
327 mfn = pagetable_get_pfn(v->arch.monitor_table);
328 free_domheap_page(&frame_table[mfn]);
330 v->arch.monitor_table = mk_pagetable(0);
331 v->arch.monitor_vtable = 0;
332 }
333 #endif
335 static void
336 shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry)
337 {
338 void *snapshot;
340 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
341 return;
343 // Clear the out_of_sync bit.
344 //
345 clear_bit(_PGC_out_of_sync, &frame_table[entry->gmfn].count_info);
347 // XXX Need to think about how to protect the domain's
348 // information less expensively.
349 //
350 snapshot = map_domain_page(entry->snapshot_mfn);
351 memset(snapshot, 0, PAGE_SIZE);
352 unmap_domain_page(snapshot);
354 put_shadow_ref(entry->snapshot_mfn);
355 }
357 void
358 release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
359 {
360 struct pfn_info *page;
362 page = &frame_table[entry->gmfn];
364 // Decrement ref count of guest & shadow pages
365 //
366 put_page(page);
368 // Only use entries that have low bits clear...
369 //
370 if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
371 {
372 put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
373 entry->writable_pl1e = -2;
374 }
375 else
376 ASSERT( entry->writable_pl1e == -1 );
378 // Free the snapshot
379 //
380 shadow_free_snapshot(d, entry);
381 }
383 static void remove_out_of_sync_entries(struct domain *d, unsigned long gmfn)
384 {
385 struct out_of_sync_entry *entry = d->arch.out_of_sync;
386 struct out_of_sync_entry **prev = &d->arch.out_of_sync;
387 struct out_of_sync_entry *found = NULL;
389 // NB: Be careful not to call something that manipulates this list
390 // while walking it. Collect the results into a separate list
391 // first, then walk that list.
392 //
393 while ( entry )
394 {
395 if ( entry->gmfn == gmfn )
396 {
397 // remove from out of sync list
398 *prev = entry->next;
400 // add to found list
401 entry->next = found;
402 found = entry;
404 entry = *prev;
405 continue;
406 }
407 prev = &entry->next;
408 entry = entry->next;
409 }
411 prev = NULL;
412 entry = found;
413 while ( entry )
414 {
415 release_out_of_sync_entry(d, entry);
417 prev = &entry->next;
418 entry = entry->next;
419 }
421 // Add found list to free list
422 if ( prev )
423 {
424 *prev = d->arch.out_of_sync_free;
425 d->arch.out_of_sync_free = found;
426 }
427 }
429 static inline void
430 shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
431 {
432 if ( !shadow_mode_refcounts(d) )
433 return;
435 ASSERT(frame_table[gmfn].count_info & PGC_page_table);
437 if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
438 {
439 clear_bit(_PGC_page_table, &frame_table[gmfn].count_info);
441 if ( page_out_of_sync(pfn_to_page(gmfn)) )
442 {
443 remove_out_of_sync_entries(d, gmfn);
444 }
445 }
446 }
448 static void inline
449 free_shadow_l1_table(struct domain *d, unsigned long smfn)
450 {
451 l1_pgentry_t *pl1e = map_domain_page(smfn);
452 int i;
453 struct pfn_info *spage = pfn_to_page(smfn);
454 u32 min_max = spage->tlbflush_timestamp;
455 int min = SHADOW_MIN(min_max);
456 int max = SHADOW_MAX(min_max);
458 for ( i = min; i <= max; i++ )
459 {
460 shadow_put_page_from_l1e(pl1e[i], d);
461 pl1e[i] = l1e_empty();
462 }
464 unmap_domain_page(pl1e);
465 }
467 static void inline
468 free_shadow_hl2_table(struct domain *d, unsigned long smfn)
469 {
470 l1_pgentry_t *hl2 = map_domain_page(smfn);
471 int i, limit;
473 SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
475 #ifdef __i386__
476 if ( shadow_mode_external(d) )
477 limit = L2_PAGETABLE_ENTRIES;
478 else
479 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
480 #else
481 limit = 0; /* XXX x86/64 XXX */
482 #endif
484 for ( i = 0; i < limit; i++ )
485 {
486 if ( l1e_get_flags(hl2[i]) & _PAGE_PRESENT )
487 put_page(pfn_to_page(l1e_get_pfn(hl2[i])));
488 }
490 unmap_domain_page(hl2);
491 }
493 static void inline
494 free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
495 {
496 l2_pgentry_t *pl2e = map_domain_page(smfn);
497 int i, external = shadow_mode_external(d);
499 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
500 if ( external || is_guest_l2_slot(type, i) )
501 if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
502 put_shadow_ref(l2e_get_pfn(pl2e[i]));
504 if ( (PGT_base_page_table == PGT_l2_page_table) &&
505 shadow_mode_translate(d) && !external )
506 {
507 // free the ref to the hl2
508 //
509 put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
510 }
512 unmap_domain_page(pl2e);
513 }
515 void free_shadow_page(unsigned long smfn)
516 {
517 struct pfn_info *page = &frame_table[smfn];
518 unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
519 struct domain *d = page_get_owner(pfn_to_page(gmfn));
520 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
521 unsigned long type = page->u.inuse.type_info & PGT_type_mask;
523 SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
525 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
526 #if CONFIG_PAGING_LEVELS >=4
527 if (type == PGT_fl1_shadow) {
528 unsigned long mfn;
529 mfn = __shadow_status(d, gpfn, PGT_fl1_shadow);
530 if (!mfn)
531 gpfn |= (1UL << 63);
532 }
533 #endif
534 delete_shadow_status(d, gpfn, gmfn, type);
536 switch ( type )
537 {
538 case PGT_l1_shadow:
539 perfc_decr(shadow_l1_pages);
540 shadow_demote(d, gpfn, gmfn);
541 free_shadow_l1_table(d, smfn);
542 break;
543 #if defined (__i386__)
544 case PGT_l2_shadow:
545 perfc_decr(shadow_l2_pages);
546 shadow_demote(d, gpfn, gmfn);
547 free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
548 break;
550 case PGT_hl2_shadow:
551 perfc_decr(hl2_table_pages);
552 shadow_demote(d, gpfn, gmfn);
553 free_shadow_hl2_table(d, smfn);
554 break;
555 #else
556 case PGT_l2_shadow:
557 case PGT_l3_shadow:
558 case PGT_l4_shadow:
559 shadow_demote(d, gpfn, gmfn);
560 free_shadow_tables(d, smfn, shadow_type_to_level(type));
561 break;
563 case PGT_fl1_shadow:
564 free_shadow_fl1_table(d, smfn);
565 break;
567 #endif
569 case PGT_snapshot:
570 perfc_decr(apshot_pages);
571 break;
573 default:
574 printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
575 page_to_pfn(page), page->u.inuse.type_info);
576 break;
577 }
579 d->arch.shadow_page_count--;
581 // No TLB flushes are needed the next time this page gets allocated.
582 //
583 page->tlbflush_timestamp = 0;
584 page->u.free.cpumask = CPU_MASK_NONE;
586 if ( type == PGT_l1_shadow )
587 {
588 list_add(&page->list, &d->arch.free_shadow_frames);
589 perfc_incr(free_l1_pages);
590 }
591 else
592 free_domheap_page(page);
593 }
595 static void
596 free_writable_pte_predictions(struct domain *d)
597 {
598 int i;
599 struct shadow_status *x;
601 for ( i = 0; i < shadow_ht_buckets; i++ )
602 {
603 u32 count;
604 unsigned long *gpfn_list;
606 /* Skip empty buckets. */
607 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
608 continue;
610 count = 0;
611 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
612 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
613 count++;
615 gpfn_list = xmalloc_array(unsigned long, count);
616 count = 0;
617 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
618 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
619 gpfn_list[count++] = x->gpfn_and_flags & PGT_mfn_mask;
621 while ( count )
622 {
623 count--;
624 delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
625 }
627 xfree(gpfn_list);
628 }
629 }
631 static void free_shadow_ht_entries(struct domain *d)
632 {
633 struct shadow_status *x, *n;
635 SH_VLOG("freed tables count=%d l1=%d l2=%d",
636 d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
637 perfc_value(shadow_l2_pages));
639 n = d->arch.shadow_ht_extras;
640 while ( (x = n) != NULL )
641 {
642 d->arch.shadow_extras_count--;
643 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
644 xfree(x);
645 }
647 d->arch.shadow_ht_extras = NULL;
648 d->arch.shadow_ht_free = NULL;
650 ASSERT(d->arch.shadow_extras_count == 0);
651 SH_LOG("freed extras, now %d", d->arch.shadow_extras_count);
653 if ( d->arch.shadow_dirty_bitmap != NULL )
654 {
655 xfree(d->arch.shadow_dirty_bitmap);
656 d->arch.shadow_dirty_bitmap = 0;
657 d->arch.shadow_dirty_bitmap_size = 0;
658 }
660 xfree(d->arch.shadow_ht);
661 d->arch.shadow_ht = NULL;
662 }
664 static void free_out_of_sync_entries(struct domain *d)
665 {
666 struct out_of_sync_entry *x, *n;
668 n = d->arch.out_of_sync_extras;
669 while ( (x = n) != NULL )
670 {
671 d->arch.out_of_sync_extras_count--;
672 n = *((struct out_of_sync_entry **)(&x[out_of_sync_extra_size]));
673 xfree(x);
674 }
676 d->arch.out_of_sync_extras = NULL;
677 d->arch.out_of_sync_free = NULL;
678 d->arch.out_of_sync = NULL;
680 ASSERT(d->arch.out_of_sync_extras_count == 0);
681 FSH_LOG("freed extra out_of_sync entries, now %d",
682 d->arch.out_of_sync_extras_count);
683 }
685 void free_shadow_pages(struct domain *d)
686 {
687 int i;
688 struct shadow_status *x;
689 struct vcpu *v;
691 /*
692 * WARNING! The shadow page table must not currently be in use!
693 * e.g., You are expected to have paused the domain and synchronized CR3.
694 */
696 if( !d->arch.shadow_ht ) return;
698 shadow_audit(d, 1);
700 // first, remove any outstanding refs from out_of_sync entries...
701 //
702 free_out_of_sync_state(d);
704 // second, remove any outstanding refs from v->arch.shadow_table
705 // and CR3.
706 //
707 for_each_vcpu(d, v)
708 {
709 if ( pagetable_get_paddr(v->arch.shadow_table) )
710 {
711 put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
712 v->arch.shadow_table = mk_pagetable(0);
713 }
715 if ( v->arch.monitor_shadow_ref )
716 {
717 put_shadow_ref(v->arch.monitor_shadow_ref);
718 v->arch.monitor_shadow_ref = 0;
719 }
720 }
722 #if defined (__i386__)
723 // For external shadows, remove the monitor table's refs
724 //
725 if ( shadow_mode_external(d) )
726 {
727 for_each_vcpu(d, v)
728 {
729 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
731 if ( mpl2e )
732 {
733 l2_pgentry_t hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
734 l2_pgentry_t smfn = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
736 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
737 {
738 put_shadow_ref(l2e_get_pfn(hl2e));
739 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
740 }
741 if ( l2e_get_flags(smfn) & _PAGE_PRESENT )
742 {
743 put_shadow_ref(l2e_get_pfn(smfn));
744 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
745 }
746 }
747 }
748 }
749 #endif
750 // Now, the only refs to shadow pages that are left are from the shadow
751 // pages themselves. We just unpin the pinned pages, and the rest
752 // should automatically disappear.
753 //
754 // NB: Beware: each explicitly or implicit call to free_shadow_page
755 // can/will result in the hash bucket getting rewritten out from
756 // under us... First, collect the list of pinned pages, then
757 // free them.
758 //
759 for ( i = 0; i < shadow_ht_buckets; i++ )
760 {
761 u32 count;
762 unsigned long *mfn_list;
764 /* Skip empty buckets. */
765 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
766 continue;
768 count = 0;
769 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
770 if ( MFN_PINNED(x->smfn) )
771 count++;
772 if ( !count )
773 continue;
775 mfn_list = xmalloc_array(unsigned long, count);
776 count = 0;
777 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
778 if ( MFN_PINNED(x->smfn) )
779 mfn_list[count++] = x->smfn;
781 while ( count )
782 {
783 shadow_unpin(mfn_list[--count]);
784 }
785 xfree(mfn_list);
786 }
788 // Now free the pre-zero'ed pages from the domain
789 //
790 struct list_head *list_ent, *tmp;
791 list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
792 {
793 list_del(list_ent);
794 perfc_decr(free_l1_pages);
796 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
797 free_domheap_page(page);
798 }
800 shadow_audit(d, 0);
802 SH_LOG("Free shadow table.");
803 }
805 void __shadow_mode_disable(struct domain *d)
806 {
807 if ( unlikely(!shadow_mode_enabled(d)) )
808 return;
810 /*
811 * Currently this does not fix up page ref counts, so it is valid to call
812 * only when a domain is being destroyed.
813 */
814 BUG_ON(!test_bit(_DOMF_dying, &d->domain_flags) &&
815 shadow_mode_refcounts(d));
816 d->arch.shadow_tainted_refcnts = shadow_mode_refcounts(d);
818 free_shadow_pages(d);
819 free_writable_pte_predictions(d);
821 #ifndef NDEBUG
822 int i;
823 for ( i = 0; i < shadow_ht_buckets; i++ )
824 {
825 if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
826 {
827 printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
828 __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
829 BUG();
830 }
831 }
832 #endif
834 d->arch.shadow_mode = 0;
836 free_shadow_ht_entries(d);
837 free_out_of_sync_entries(d);
839 struct vcpu *v;
840 for_each_vcpu(d, v)
841 {
842 update_pagetables(v);
843 }
844 }
847 static void
848 free_p2m_table(struct domain *d)
849 {
850 // uh, this needs some work... :)
851 BUG();
852 }
855 int __shadow_mode_enable(struct domain *d, unsigned int mode)
856 {
857 struct vcpu *v;
858 int new_modes = (mode & ~d->arch.shadow_mode);
860 // Gotta be adding something to call this function.
861 ASSERT(new_modes);
863 // can't take anything away by calling this function.
864 ASSERT(!(d->arch.shadow_mode & ~mode));
866 #if defined(CONFIG_PAGING_LEVELS)
867 if(!shadow_set_guest_paging_levels(d,
868 CONFIG_PAGING_LEVELS)) {
869 printk("Unsupported guest paging levels\n");
870 domain_crash_synchronous(); /* need to take a clean path */
871 }
872 #endif
874 for_each_vcpu(d, v)
875 {
876 invalidate_shadow_ldt(v);
878 // We need to set these up for __update_pagetables().
879 // See the comment there.
881 /*
882 * arch.guest_vtable
883 */
884 if ( v->arch.guest_vtable &&
885 (v->arch.guest_vtable != __linear_l2_table) )
886 {
887 unmap_domain_page(v->arch.guest_vtable);
888 }
889 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
890 v->arch.guest_vtable = __linear_l2_table;
891 else
892 v->arch.guest_vtable = NULL;
894 /*
895 * arch.shadow_vtable
896 */
897 if ( v->arch.shadow_vtable &&
898 (v->arch.shadow_vtable != __shadow_linear_l2_table) )
899 {
900 unmap_domain_page(v->arch.shadow_vtable);
901 }
902 if ( !(mode & SHM_external) && d->arch.ops->guest_paging_levels == 2)
903 v->arch.shadow_vtable = __shadow_linear_l2_table;
904 else
905 v->arch.shadow_vtable = NULL;
907 #if defined (__i386__)
908 /*
909 * arch.hl2_vtable
910 */
911 if ( v->arch.hl2_vtable &&
912 (v->arch.hl2_vtable != __linear_hl2_table) )
913 {
914 unmap_domain_page(v->arch.hl2_vtable);
915 }
916 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
917 v->arch.hl2_vtable = __linear_hl2_table;
918 else
919 v->arch.hl2_vtable = NULL;
920 #endif
921 /*
922 * arch.monitor_table & arch.monitor_vtable
923 */
924 if ( v->arch.monitor_vtable )
925 {
926 free_monitor_pagetable(v);
927 }
928 if ( mode & SHM_external )
929 {
930 alloc_monitor_pagetable(v);
931 }
932 }
934 if ( new_modes & SHM_enable )
935 {
936 ASSERT( !d->arch.shadow_ht );
937 d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
938 if ( d->arch.shadow_ht == NULL )
939 goto nomem;
941 memset(d->arch.shadow_ht, 0,
942 shadow_ht_buckets * sizeof(struct shadow_status));
943 }
945 if ( new_modes & SHM_log_dirty )
946 {
947 ASSERT( !d->arch.shadow_dirty_bitmap );
948 d->arch.shadow_dirty_bitmap_size = (d->max_pages + 63) & ~63;
949 d->arch.shadow_dirty_bitmap =
950 xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
951 (8 * sizeof(unsigned long)));
952 if ( d->arch.shadow_dirty_bitmap == NULL )
953 {
954 d->arch.shadow_dirty_bitmap_size = 0;
955 goto nomem;
956 }
957 memset(d->arch.shadow_dirty_bitmap, 0,
958 d->arch.shadow_dirty_bitmap_size/8);
959 }
961 if ( new_modes & SHM_translate )
962 {
963 if ( !(new_modes & SHM_external) )
964 {
965 ASSERT( !pagetable_get_paddr(d->arch.phys_table) );
966 if ( !alloc_p2m_table(d) )
967 {
968 printk("alloc_p2m_table failed (out-of-memory?)\n");
969 goto nomem;
970 }
971 }
972 else
973 {
974 // external guests provide their own memory for their P2M maps.
975 //
976 ASSERT( d == page_get_owner(
977 &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
978 }
979 }
981 printk("audit1\n");
982 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
983 printk("audit1 done\n");
985 // Get rid of any shadow pages from any previous shadow mode.
986 //
987 free_shadow_pages(d);
989 printk("audit2\n");
990 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
991 printk("audit2 done\n");
993 /*
994 * Tear down it's counts by disassembling its page-table-based ref counts.
995 * Also remove CR3's gcount/tcount.
996 * That leaves things like GDTs and LDTs and external refs in tact.
997 *
998 * Most pages will be writable tcount=0.
999 * Some will still be L1 tcount=0 or L2 tcount=0.
1000 * Maybe some pages will be type none tcount=0.
1001 * Pages granted external writable refs (via grant tables?) will
1002 * still have a non-zero tcount. That's OK.
1004 * gcounts will generally be 1 for PGC_allocated.
1005 * GDTs and LDTs will have additional gcounts.
1006 * Any grant-table based refs will still be in the gcount.
1008 * We attempt to grab writable refs to each page (thus setting its type).
1009 * Immediately put back those type refs.
1011 * Assert that no pages are left with L1/L2/L3/L4 type.
1012 */
1013 audit_adjust_pgtables(d, -1, 1);
1015 d->arch.shadow_mode = mode;
1017 if ( shadow_mode_refcounts(d) )
1019 struct list_head *list_ent = d->page_list.next;
1020 while ( list_ent != &d->page_list )
1022 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
1023 if ( !get_page_type(page, PGT_writable_page) )
1024 BUG();
1025 put_page_type(page);
1027 list_ent = page->list.next;
1031 audit_adjust_pgtables(d, 1, 1);
1033 printk("audit3\n");
1034 _audit_domain(d, AUDIT_SHADOW_ALREADY_LOCKED | AUDIT_ERRORS_OK);
1035 printk("audit3 done\n");
1037 return 0;
1039 nomem:
1040 if ( (new_modes & SHM_enable) )
1042 xfree(d->arch.shadow_ht);
1043 d->arch.shadow_ht = NULL;
1045 if ( (new_modes & SHM_log_dirty) )
1047 xfree(d->arch.shadow_dirty_bitmap);
1048 d->arch.shadow_dirty_bitmap = NULL;
1050 if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
1051 pagetable_get_paddr(d->arch.phys_table) )
1053 free_p2m_table(d);
1055 return -ENOMEM;
1059 int shadow_mode_enable(struct domain *d, unsigned int mode)
1061 int rc;
1062 shadow_lock(d);
1063 rc = __shadow_mode_enable(d, mode);
1064 shadow_unlock(d);
1065 return rc;
1068 static int shadow_mode_table_op(
1069 struct domain *d, dom0_shadow_control_t *sc)
1071 unsigned int op = sc->op;
1072 int i, rc = 0;
1073 struct vcpu *v;
1075 ASSERT(shadow_lock_is_acquired(d));
1077 SH_VLOG("shadow mode table op %lx %lx count %d",
1078 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.guest_table), /* XXX SMP */
1079 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.shadow_table), /* XXX SMP */
1080 d->arch.shadow_page_count);
1082 shadow_audit(d, 1);
1084 switch ( op )
1086 case DOM0_SHADOW_CONTROL_OP_FLUSH:
1087 free_shadow_pages(d);
1089 d->arch.shadow_fault_count = 0;
1090 d->arch.shadow_dirty_count = 0;
1091 d->arch.shadow_dirty_net_count = 0;
1092 d->arch.shadow_dirty_block_count = 0;
1094 break;
1096 case DOM0_SHADOW_CONTROL_OP_CLEAN:
1097 free_shadow_pages(d);
1099 sc->stats.fault_count = d->arch.shadow_fault_count;
1100 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1101 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1102 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1104 d->arch.shadow_fault_count = 0;
1105 d->arch.shadow_dirty_count = 0;
1106 d->arch.shadow_dirty_net_count = 0;
1107 d->arch.shadow_dirty_block_count = 0;
1109 if ( (d->max_pages > sc->pages) ||
1110 (sc->dirty_bitmap == NULL) ||
1111 (d->arch.shadow_dirty_bitmap == NULL) )
1113 rc = -EINVAL;
1114 break;
1117 sc->pages = d->max_pages;
1119 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
1120 for ( i = 0; i < d->max_pages; i += chunk )
1122 int bytes = ((((d->max_pages - i) > chunk) ?
1123 chunk : (d->max_pages - i)) + 7) / 8;
1125 if (copy_to_user(
1126 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
1127 d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
1128 bytes))
1130 // copy_to_user can fail when copying to guest app memory.
1131 // app should zero buffer after mallocing, and pin it
1132 rc = -EINVAL;
1133 memset(
1134 d->arch.shadow_dirty_bitmap +
1135 (i/(8*sizeof(unsigned long))),
1136 0, (d->max_pages/8) - (i/(8*sizeof(unsigned long))));
1137 break;
1139 memset(
1140 d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
1141 0, bytes);
1144 break;
1146 case DOM0_SHADOW_CONTROL_OP_PEEK:
1147 sc->stats.fault_count = d->arch.shadow_fault_count;
1148 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1149 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1150 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1152 if ( (d->max_pages > sc->pages) ||
1153 (sc->dirty_bitmap == NULL) ||
1154 (d->arch.shadow_dirty_bitmap == NULL) )
1156 rc = -EINVAL;
1157 break;
1160 sc->pages = d->max_pages;
1161 if (copy_to_user(
1162 sc->dirty_bitmap, d->arch.shadow_dirty_bitmap, (d->max_pages+7)/8))
1164 rc = -EINVAL;
1165 break;
1168 break;
1170 default:
1171 rc = -EINVAL;
1172 break;
1175 SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
1176 shadow_audit(d, 1);
1178 for_each_vcpu(d,v)
1179 __update_pagetables(v);
1181 return rc;
1184 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
1186 unsigned int op = sc->op;
1187 int rc = 0;
1188 struct vcpu *v;
1190 if ( unlikely(d == current->domain) )
1192 DPRINTK("Don't try to do a shadow op on yourself!\n");
1193 return -EINVAL;
1196 domain_pause(d);
1198 shadow_lock(d);
1200 switch ( op )
1202 case DOM0_SHADOW_CONTROL_OP_OFF:
1203 __shadow_sync_all(d);
1204 __shadow_mode_disable(d);
1205 break;
1207 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
1208 free_shadow_pages(d);
1209 rc = __shadow_mode_enable(d, SHM_enable);
1210 break;
1212 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
1213 free_shadow_pages(d);
1214 rc = __shadow_mode_enable(
1215 d, d->arch.shadow_mode|SHM_enable|SHM_log_dirty);
1216 break;
1218 case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
1219 free_shadow_pages(d);
1220 rc = __shadow_mode_enable(
1221 d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
1222 break;
1224 default:
1225 rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
1226 break;
1229 shadow_unlock(d);
1231 for_each_vcpu(d,v)
1232 update_pagetables(v);
1234 domain_unpause(d);
1236 return rc;
1239 void shadow_mode_init(void)
1243 int _shadow_mode_refcounts(struct domain *d)
1245 return shadow_mode_refcounts(d);
1248 int
1249 set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
1250 struct domain_mmap_cache *l2cache,
1251 struct domain_mmap_cache *l1cache)
1253 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
1254 l2_pgentry_t *l2, l2e;
1255 l1_pgentry_t *l1;
1256 struct pfn_info *l1page;
1257 unsigned long va = pfn << PAGE_SHIFT;
1259 ASSERT(tabpfn != 0);
1261 l2 = map_domain_page_with_cache(tabpfn, l2cache);
1262 l2e = l2[l2_table_offset(va)];
1263 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1265 l1page = alloc_domheap_page(NULL);
1266 if ( !l1page )
1268 unmap_domain_page_with_cache(l2, l2cache);
1269 return 0;
1272 l1 = map_domain_page_with_cache(page_to_pfn(l1page), l1cache);
1273 memset(l1, 0, PAGE_SIZE);
1274 unmap_domain_page_with_cache(l1, l1cache);
1276 l2e = l2e_from_page(l1page, __PAGE_HYPERVISOR);
1277 l2[l2_table_offset(va)] = l2e;
1279 unmap_domain_page_with_cache(l2, l2cache);
1281 l1 = map_domain_page_with_cache(l2e_get_pfn(l2e), l1cache);
1282 l1[l1_table_offset(va)] = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
1283 unmap_domain_page_with_cache(l1, l1cache);
1285 return 1;
1288 int
1289 alloc_p2m_table(struct domain *d)
1291 struct list_head *list_ent;
1292 struct pfn_info *page, *l2page;
1293 l2_pgentry_t *l2;
1294 unsigned long mfn, pfn;
1295 struct domain_mmap_cache l1cache, l2cache;
1297 l2page = alloc_domheap_page(NULL);
1298 if ( l2page == NULL )
1299 return 0;
1301 domain_mmap_cache_init(&l1cache);
1302 domain_mmap_cache_init(&l2cache);
1304 d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
1305 l2 = map_domain_page_with_cache(page_to_pfn(l2page), &l2cache);
1306 memset(l2, 0, PAGE_SIZE);
1307 unmap_domain_page_with_cache(l2, &l2cache);
1309 list_ent = d->page_list.next;
1310 while ( list_ent != &d->page_list )
1312 page = list_entry(list_ent, struct pfn_info, list);
1313 mfn = page_to_pfn(page);
1314 pfn = machine_to_phys_mapping[mfn];
1315 ASSERT(pfn != INVALID_M2P_ENTRY);
1316 ASSERT(pfn < (1u<<20));
1318 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
1320 list_ent = page->list.next;
1323 list_ent = d->xenpage_list.next;
1324 while ( list_ent != &d->xenpage_list )
1326 page = list_entry(list_ent, struct pfn_info, list);
1327 mfn = page_to_pfn(page);
1328 pfn = machine_to_phys_mapping[mfn];
1329 if ( (pfn != INVALID_M2P_ENTRY) &&
1330 (pfn < (1u<<20)) )
1332 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
1335 list_ent = page->list.next;
1338 domain_mmap_cache_destroy(&l2cache);
1339 domain_mmap_cache_destroy(&l1cache);
1341 return 1;
1344 void shadow_l1_normal_pt_update(
1345 struct domain *d,
1346 unsigned long pa, l1_pgentry_t gpte,
1347 struct domain_mmap_cache *cache)
1349 unsigned long sl1mfn;
1350 l1_pgentry_t *spl1e, spte;
1352 shadow_lock(d);
1354 sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
1355 if ( sl1mfn )
1357 SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte,
1358 (void *)pa, l1e_get_intpte(gpte));
1359 l1pte_propagate_from_guest(current->domain, gpte, &spte);
1361 spl1e = map_domain_page_with_cache(sl1mfn, cache);
1362 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
1363 unmap_domain_page_with_cache(spl1e, cache);
1366 shadow_unlock(d);
1369 void shadow_l2_normal_pt_update(
1370 struct domain *d,
1371 unsigned long pa, l2_pgentry_t gpde,
1372 struct domain_mmap_cache *cache)
1374 unsigned long sl2mfn;
1375 l2_pgentry_t *spl2e;
1377 shadow_lock(d);
1379 sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
1380 if ( sl2mfn )
1382 SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte,
1383 (void *)pa, l2e_get_intpte(gpde));
1384 spl2e = map_domain_page_with_cache(sl2mfn, cache);
1385 validate_pde_change(d, gpde,
1386 &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
1387 unmap_domain_page_with_cache(spl2e, cache);
1390 shadow_unlock(d);
1393 #if CONFIG_PAGING_LEVELS >= 3
1394 void shadow_l3_normal_pt_update(
1395 struct domain *d,
1396 unsigned long pa, l3_pgentry_t gpde,
1397 struct domain_mmap_cache *cache)
1399 unsigned long sl3mfn;
1400 pgentry_64_t *spl3e;
1402 shadow_lock(d);
1404 sl3mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l3_shadow);
1405 if ( sl3mfn )
1407 SH_VVLOG("shadow_l3_normal_pt_update pa=%p, gpde=%" PRIpte,
1408 (void *)pa, l3e_get_intpte(gpde));
1410 spl3e = (pgentry_64_t *) map_domain_page_with_cache(sl3mfn, cache);
1411 validate_entry_change(d, (pgentry_64_t *) &gpde,
1412 &spl3e[(pa & ~PAGE_MASK) / sizeof(l3_pgentry_t)],
1413 shadow_type_to_level(PGT_l3_shadow));
1414 unmap_domain_page_with_cache(spl3e, cache);
1417 shadow_unlock(d);
1419 #endif
1421 #if CONFIG_PAGING_LEVELS >= 4
1422 void shadow_l4_normal_pt_update(
1423 struct domain *d,
1424 unsigned long pa, l4_pgentry_t gpde,
1425 struct domain_mmap_cache *cache)
1427 unsigned long sl4mfn;
1428 pgentry_64_t *spl4e;
1430 shadow_lock(d);
1432 sl4mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l4_shadow);
1433 if ( sl4mfn )
1435 SH_VVLOG("shadow_l4_normal_pt_update pa=%p, gpde=%" PRIpte,
1436 (void *)pa, l4e_get_intpte(gpde));
1438 spl4e = (pgentry_64_t *)map_domain_page_with_cache(sl4mfn, cache);
1439 validate_entry_change(d, (pgentry_64_t *)&gpde,
1440 &spl4e[(pa & ~PAGE_MASK) / sizeof(l4_pgentry_t)],
1441 shadow_type_to_level(PGT_l4_shadow));
1442 unmap_domain_page_with_cache(spl4e, cache);
1445 shadow_unlock(d);
1447 #endif
1449 static void
1450 translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
1452 int i;
1453 l1_pgentry_t *l1;
1455 l1 = map_domain_page(l1mfn);
1456 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
1458 if ( is_guest_l1_slot(i) &&
1459 (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
1461 unsigned long mfn = l1e_get_pfn(l1[i]);
1462 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1463 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1464 l1[i] = l1e_from_pfn(gpfn, l1e_get_flags(l1[i]));
1467 unmap_domain_page(l1);
1470 // This is not general enough to handle arbitrary pagetables
1471 // with shared L1 pages, etc., but it is sufficient for bringing
1472 // up dom0.
1473 //
1474 void
1475 translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
1476 unsigned int type)
1478 int i;
1479 l2_pgentry_t *l2;
1481 ASSERT(shadow_mode_translate(d) && !shadow_mode_external(d));
1483 l2 = map_domain_page(l2mfn);
1484 for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
1486 if ( is_guest_l2_slot(type, i) &&
1487 (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
1489 unsigned long mfn = l2e_get_pfn(l2[i]);
1490 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1491 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1492 l2[i] = l2e_from_pfn(gpfn, l2e_get_flags(l2[i]));
1493 translate_l1pgtable(d, p2m, mfn);
1496 unmap_domain_page(l2);
1499 void
1500 remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
1502 unsigned long smfn;
1504 //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype);
1506 shadow_lock(d);
1508 while ( stype >= PGT_l1_shadow )
1510 smfn = __shadow_status(d, gpfn, stype);
1511 if ( smfn && MFN_PINNED(smfn) )
1512 shadow_unpin(smfn);
1513 stype -= PGT_l1_shadow;
1516 shadow_unlock(d);
1519 unsigned long
1520 gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
1522 ASSERT( shadow_mode_translate(d) );
1524 perfc_incrc(gpfn_to_mfn_foreign);
1526 unsigned long va = gpfn << PAGE_SHIFT;
1527 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
1528 l2_pgentry_t *l2 = map_domain_page(tabpfn);
1529 l2_pgentry_t l2e = l2[l2_table_offset(va)];
1530 unmap_domain_page(l2);
1531 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1533 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
1534 d->domain_id, gpfn, l2e_get_intpte(l2e));
1535 return INVALID_MFN;
1537 l1_pgentry_t *l1 = map_domain_page(l2e_get_pfn(l2e));
1538 l1_pgentry_t l1e = l1[l1_table_offset(va)];
1539 unmap_domain_page(l1);
1541 #if 0
1542 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n",
1543 d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e);
1544 #endif
1546 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
1548 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
1549 d->domain_id, gpfn, l1e_get_intpte(l1e));
1550 return INVALID_MFN;
1553 return l1e_get_pfn(l1e);
1556 static u32 remove_all_access_in_page(
1557 struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
1559 l1_pgentry_t *pl1e = map_domain_page(l1mfn);
1560 l1_pgentry_t match;
1561 unsigned long flags = _PAGE_PRESENT;
1562 int i;
1563 u32 count = 0;
1564 int is_l1_shadow =
1565 ((frame_table[l1mfn].u.inuse.type_info & PGT_type_mask) ==
1566 PGT_l1_shadow);
1568 match = l1e_from_pfn(forbidden_gmfn, flags);
1570 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
1572 if ( unlikely(!l1e_has_changed(pl1e[i], match, flags) == 0) )
1574 l1_pgentry_t ol2e = pl1e[i];
1575 pl1e[i] = l1e_empty();
1576 count++;
1578 if ( is_l1_shadow )
1579 shadow_put_page_from_l1e(ol2e, d);
1580 else /* must be an hl2 page */
1581 put_page(&frame_table[forbidden_gmfn]);
1585 unmap_domain_page(pl1e);
1587 return count;
1590 static u32 __shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
1592 int i;
1593 struct shadow_status *a;
1594 u32 count = 0;
1596 if ( unlikely(!shadow_mode_enabled(d)) )
1597 return 0;
1599 ASSERT(shadow_lock_is_acquired(d));
1600 perfc_incrc(remove_all_access);
1602 for (i = 0; i < shadow_ht_buckets; i++)
1604 a = &d->arch.shadow_ht[i];
1605 while ( a && a->gpfn_and_flags )
1607 switch (a->gpfn_and_flags & PGT_type_mask)
1609 case PGT_l1_shadow:
1610 case PGT_l2_shadow:
1611 case PGT_l3_shadow:
1612 case PGT_l4_shadow:
1613 case PGT_hl2_shadow:
1614 count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
1615 break;
1616 case PGT_snapshot:
1617 case PGT_writable_pred:
1618 // these can't hold refs to the forbidden page
1619 break;
1620 default:
1621 BUG();
1624 a = a->next;
1628 return count;
1631 void shadow_drop_references(
1632 struct domain *d, struct pfn_info *page)
1634 if ( likely(!shadow_mode_refcounts(d)) ||
1635 ((page->u.inuse.type_info & PGT_count_mask) == 0) )
1636 return;
1638 /* XXX This needs more thought... */
1639 printk("%s: needing to call __shadow_remove_all_access for mfn=%lx\n",
1640 __func__, page_to_pfn(page));
1641 printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
1642 page->count_info, page->u.inuse.type_info);
1644 shadow_lock(d);
1645 __shadow_remove_all_access(d, page_to_pfn(page));
1646 shadow_unlock(d);
1648 printk("After: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
1649 page->count_info, page->u.inuse.type_info);
1652 /* XXX Needs more thought. Neither pretty nor fast: a place holder. */
1653 void shadow_sync_and_drop_references(
1654 struct domain *d, struct pfn_info *page)
1656 if ( likely(!shadow_mode_refcounts(d)) )
1657 return;
1659 shadow_lock(d);
1661 if ( page_out_of_sync(page) )
1662 __shadow_sync_mfn(d, page_to_pfn(page));
1664 __shadow_remove_all_access(d, page_to_pfn(page));
1666 shadow_unlock(d);