direct-io.hg

view xen/arch/x86/shadow_public.c @ 7966:060a6634d9ec

SHADOW_CONTROL_OP_OFF should be checkign whether shadow mode
is actually currently enabled.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Nov 22 11:53:45 2005 +0100 (2005-11-22)
parents bdf1a8039d13
children c7508abc5b6b
line source
1 /******************************************************************************
2 * arch/x86/shadow_public.c
3 *
4 * Copyright (c) 2005 Michael A Fetterman
5 * Based on an earlier implementation by Ian Pratt et al
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
23 #include <xen/config.h>
24 #include <xen/types.h>
25 #include <xen/mm.h>
26 #include <xen/domain_page.h>
27 #include <asm/shadow.h>
28 #include <asm/page.h>
29 #include <xen/event.h>
30 #include <xen/sched.h>
31 #include <xen/trace.h>
33 #if CONFIG_PAGING_LEVELS >= 3
34 #include <asm/shadow_64.h>
36 #endif
37 #if CONFIG_PAGING_LEVELS == 4
38 extern struct shadow_ops MODE_F_HANDLER;
39 extern struct shadow_ops MODE_D_HANDLER;
40 #endif
42 extern struct shadow_ops MODE_A_HANDLER;
44 #define SHADOW_MAX_GUEST32(_encoded) ((L1_PAGETABLE_ENTRIES_32 - 1) - ((_encoded) >> 16))
45 /****************************************************************************/
46 /************* export interface functions ***********************************/
47 /****************************************************************************/
50 int shadow_set_guest_paging_levels(struct domain *d, int levels)
51 {
52 shadow_lock(d);
54 switch(levels) {
55 #if CONFIG_PAGING_LEVELS >= 4
56 case 4:
57 if ( d->arch.ops != &MODE_F_HANDLER )
58 d->arch.ops = &MODE_F_HANDLER;
59 shadow_unlock(d);
60 return 1;
61 #endif
62 case 3:
63 case 2:
64 #if CONFIG_PAGING_LEVELS == 2
65 if ( d->arch.ops != &MODE_A_HANDLER )
66 d->arch.ops = &MODE_A_HANDLER;
67 #elif CONFIG_PAGING_LEVELS == 3
68 if ( d->arch.ops != &MODE_B_HANDLER )
69 d->arch.ops = &MODE_B_HANDLER;
70 #elif CONFIG_PAGING_LEVELS == 4
71 if ( d->arch.ops != &MODE_D_HANDLER )
72 d->arch.ops = &MODE_D_HANDLER;
73 #endif
74 shadow_unlock(d);
75 return 1;
76 default:
77 shadow_unlock(d);
78 return 0;
79 }
80 }
82 void shadow_invlpg(struct vcpu *v, unsigned long va)
83 {
84 struct domain *d = current->domain;
85 d->arch.ops->invlpg(v, va);
86 }
88 int shadow_fault(unsigned long va, struct cpu_user_regs *regs)
89 {
90 struct domain *d = current->domain;
91 return d->arch.ops->fault(va, regs);
92 }
94 void __update_pagetables(struct vcpu *v)
95 {
96 struct domain *d = v->domain;
97 d->arch.ops->update_pagetables(v);
98 }
100 void __shadow_sync_all(struct domain *d)
101 {
102 d->arch.ops->sync_all(d);
103 }
105 int shadow_remove_all_write_access(
106 struct domain *d, unsigned long readonly_gpfn, unsigned long readonly_gmfn)
107 {
108 return d->arch.ops->remove_all_write_access(d, readonly_gpfn, readonly_gmfn);
109 }
111 int shadow_do_update_va_mapping(unsigned long va,
112 l1_pgentry_t val,
113 struct vcpu *v)
114 {
115 struct domain *d = v->domain;
116 return d->arch.ops->do_update_va_mapping(va, val, v);
117 }
119 struct out_of_sync_entry *
120 shadow_mark_mfn_out_of_sync(struct vcpu *v, unsigned long gpfn,
121 unsigned long mfn)
122 {
123 struct domain *d = v->domain;
124 return d->arch.ops->mark_mfn_out_of_sync(v, gpfn, mfn);
125 }
127 /*
128 * Returns 1 if va's shadow mapping is out-of-sync.
129 * Returns 0 otherwise.
130 */
131 int __shadow_out_of_sync(struct vcpu *v, unsigned long va)
132 {
133 struct domain *d = v->domain;
134 return d->arch.ops->is_out_of_sync(v, va);
135 }
137 unsigned long gva_to_gpa(unsigned long gva)
138 {
139 struct domain *d = current->domain;
140 return d->arch.ops->gva_to_gpa(gva);
141 }
142 /****************************************************************************/
143 /****************************************************************************/
144 #if CONFIG_PAGING_LEVELS >= 3
146 static void inline
147 free_shadow_fl1_table(struct domain *d, unsigned long smfn)
148 {
149 l1_pgentry_t *pl1e = map_domain_page(smfn);
150 int i;
152 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
153 put_page_from_l1e(pl1e[i], d);
154 }
156 /*
157 * Free l2, l3, l4 shadow tables
158 */
160 void free_fake_shadow_l2(struct domain *d,unsigned long smfn);
162 static void inline
163 free_shadow_tables(struct domain *d, unsigned long smfn, u32 level)
164 {
165 pgentry_64_t *ple = map_domain_page(smfn);
166 int i, external = shadow_mode_external(d);
168 #if CONFIG_PAGING_LEVELS >=3
169 if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
170 {
171 struct pfn_info *page = &frame_table[smfn];
172 for ( i = 0; i < PDP_ENTRIES; i++ )
173 {
174 if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
175 free_fake_shadow_l2(d,entry_get_pfn(ple[i]));
176 }
178 page = &frame_table[entry_get_pfn(ple[0])];
179 free_domheap_pages(page, SL2_ORDER);
180 unmap_domain_page(ple);
181 }
182 else
183 #endif
184 {
185 /*
186 * No Xen mappings in external pages
187 */
188 if ( external )
189 {
190 for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
191 if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
192 put_shadow_ref(entry_get_pfn(ple[i]));
193 }
194 else
195 {
196 for ( i = 0; i < PAGETABLE_ENTRIES; i++ )
197 {
198 /*
199 * List the skip/break conditions to avoid freeing
200 * Xen private mappings.
201 */
202 #if CONFIG_PAGING_LEVELS == 2
203 if ( level == PAGING_L2 && !is_guest_l2_slot(0, i) )
204 continue;
205 #endif
206 #if CONFIG_PAGING_LEVELS == 3
207 if ( level == PAGING_L3 && i == L3_PAGETABLE_ENTRIES )
208 break;
209 if ( level == PAGING_L2 )
210 {
211 struct pfn_info *page = &frame_table[smfn];
212 if ( is_xen_l2_slot(page->u.inuse.type_info, i) )
213 continue;
214 }
215 #endif
216 #if CONFIG_PAGING_LEVELS == 4
217 if ( level == PAGING_L4 && !is_guest_l4_slot(i))
218 continue;
219 #endif
220 if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
221 put_shadow_ref(entry_get_pfn(ple[i]));
222 }
223 }
224 unmap_domain_page(ple);
225 }
226 }
227 #endif
229 #if CONFIG_PAGING_LEVELS == 4
230 /*
231 * Convert PAE 3-level page-table to 4-level page-table
232 */
233 static pagetable_t page_table_convert(struct domain *d)
234 {
235 struct pfn_info *l4page, *l3page;
236 l4_pgentry_t *l4;
237 l3_pgentry_t *l3, *pae_l3;
238 int i;
240 l4page = alloc_domheap_page(NULL);
241 if (l4page == NULL)
242 domain_crash(d);
243 l4 = map_domain_page(page_to_pfn(l4page));
244 memset(l4, 0, PAGE_SIZE);
246 l3page = alloc_domheap_page(NULL);
247 if (l3page == NULL)
248 domain_crash(d);
249 l3 = map_domain_page(page_to_pfn(l3page));
250 memset(l3, 0, PAGE_SIZE);
252 l4[0] = l4e_from_page(l3page, __PAGE_HYPERVISOR);
253 pae_l3 = map_domain_page(pagetable_get_pfn(d->arch.phys_table));
255 for (i = 0; i < PDP_ENTRIES; i++) {
256 l3[i] = pae_l3[i];
257 l3e_add_flags(l3[i], 0x67);
258 }
260 unmap_domain_page(l4);
261 unmap_domain_page(l3);
263 return mk_pagetable(page_to_phys(l4page));
264 }
266 static void alloc_monitor_pagetable(struct vcpu *v)
267 {
268 unsigned long mmfn;
269 l4_pgentry_t *mpl4e;
270 struct pfn_info *mmfn_info;
271 struct domain *d = v->domain;
272 pagetable_t phys_table;
274 ASSERT(!pagetable_get_paddr(v->arch.monitor_table)); /* we should only get called once */
276 mmfn_info = alloc_domheap_page(NULL);
277 ASSERT( mmfn_info );
279 mmfn = (unsigned long) (mmfn_info - frame_table);
280 mpl4e = (l4_pgentry_t *) map_domain_page(mmfn);
281 memcpy(mpl4e, &idle_pg_table[0], PAGE_SIZE);
282 mpl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
283 l4e_from_paddr(__pa(d->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
284 /* map the phys_to_machine map into the per domain Read-Only MPT space */
285 phys_table = page_table_convert(d);
287 mpl4e[l4_table_offset(RO_MPT_VIRT_START)] =
288 l4e_from_paddr(pagetable_get_paddr(phys_table),
289 __PAGE_HYPERVISOR);
290 v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
291 v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e;
292 }
294 void free_monitor_pagetable(struct vcpu *v)
295 {
296 unsigned long mfn;
298 /*
299 * free monitor_table.
300 * Note: for VMX guest, only BSP need do this free.
301 */
302 if (!(VMX_DOMAIN(v) && v->vcpu_id)) {
303 mfn = pagetable_get_pfn(v->arch.monitor_table);
304 unmap_domain_page(v->arch.monitor_vtable);
305 free_domheap_page(&frame_table[mfn]);
306 }
307 v->arch.monitor_table = mk_pagetable(0);
308 v->arch.monitor_vtable = 0;
309 }
311 #elif CONFIG_PAGING_LEVELS == 3
313 static void alloc_monitor_pagetable(struct vcpu *v)
314 {
315 BUG(); /* PAE not implemented yet */
316 }
318 void free_monitor_pagetable(struct vcpu *v)
319 {
320 BUG(); /* PAE not implemented yet */
321 }
323 #elif CONFIG_PAGING_LEVELS == 2
325 static void alloc_monitor_pagetable(struct vcpu *v)
326 {
327 unsigned long mmfn;
328 l2_pgentry_t *mpl2e;
329 struct pfn_info *mmfn_info;
330 struct domain *d = v->domain;
332 ASSERT(pagetable_get_paddr(v->arch.monitor_table) == 0);
334 mmfn_info = alloc_domheap_page(NULL);
335 ASSERT(mmfn_info != NULL);
337 mmfn = page_to_pfn(mmfn_info);
338 mpl2e = (l2_pgentry_t *)map_domain_page(mmfn);
339 memset(mpl2e, 0, PAGE_SIZE);
341 memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
342 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
343 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
345 mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
346 l2e_from_paddr(__pa(d->arch.mm_perdomain_pt),
347 __PAGE_HYPERVISOR);
349 // map the phys_to_machine map into the Read-Only MPT space for this domain
350 mpl2e[l2_table_offset(RO_MPT_VIRT_START)] =
351 l2e_from_paddr(pagetable_get_paddr(d->arch.phys_table),
352 __PAGE_HYPERVISOR);
354 // Don't (yet) have mappings for these...
355 // Don't want to accidentally see the idle_pg_table's linear mapping.
356 //
357 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
358 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
360 v->arch.monitor_table = mk_pagetable(mmfn << PAGE_SHIFT);
361 v->arch.monitor_vtable = mpl2e;
362 }
364 /*
365 * Free the pages for monitor_table and hl2_table
366 */
367 void free_monitor_pagetable(struct vcpu *v)
368 {
369 l2_pgentry_t *mpl2e, hl2e, sl2e;
370 unsigned long mfn;
372 ASSERT( pagetable_get_paddr(v->arch.monitor_table) );
374 mpl2e = v->arch.monitor_vtable;
376 /*
377 * First get the mfn for hl2_table by looking at monitor_table
378 */
379 hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
380 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
381 {
382 mfn = l2e_get_pfn(hl2e);
383 ASSERT(mfn);
384 put_shadow_ref(mfn);
385 }
387 sl2e = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
388 if ( l2e_get_flags(sl2e) & _PAGE_PRESENT )
389 {
390 mfn = l2e_get_pfn(sl2e);
391 ASSERT(mfn);
392 put_shadow_ref(mfn);
393 }
395 unmap_domain_page(mpl2e);
397 /*
398 * Then free monitor_table.
399 * Note: for VMX guest, only BSP need do this free.
400 */
401 if (!(VMX_DOMAIN(v) && v->vcpu_id)) {
402 mfn = pagetable_get_pfn(v->arch.monitor_table);
403 unmap_domain_page(v->arch.monitor_vtable);
404 free_domheap_page(&frame_table[mfn]);
405 }
407 v->arch.monitor_table = mk_pagetable(0);
408 v->arch.monitor_vtable = 0;
409 }
410 #endif
412 static void
413 shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry)
414 {
415 void *snapshot;
417 if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE )
418 return;
420 // Clear the out_of_sync bit.
421 //
422 clear_bit(_PGC_out_of_sync, &frame_table[entry->gmfn].count_info);
424 // XXX Need to think about how to protect the domain's
425 // information less expensively.
426 //
427 snapshot = map_domain_page(entry->snapshot_mfn);
428 memset(snapshot, 0, PAGE_SIZE);
429 unmap_domain_page(snapshot);
431 put_shadow_ref(entry->snapshot_mfn);
432 }
434 void
435 release_out_of_sync_entry(struct domain *d, struct out_of_sync_entry *entry)
436 {
437 struct pfn_info *page;
439 page = &frame_table[entry->gmfn];
441 // Decrement ref count of guest & shadow pages
442 //
443 put_page(page);
445 // Only use entries that have low bits clear...
446 //
447 if ( !(entry->writable_pl1e & (sizeof(l1_pgentry_t)-1)) )
448 {
449 put_shadow_ref(entry->writable_pl1e >> PAGE_SHIFT);
450 entry->writable_pl1e = -2;
451 }
452 else
453 ASSERT( entry->writable_pl1e == -1 );
455 // Free the snapshot
456 //
457 shadow_free_snapshot(d, entry);
458 }
460 static void remove_out_of_sync_entries(struct domain *d, unsigned long gmfn)
461 {
462 struct out_of_sync_entry *entry = d->arch.out_of_sync;
463 struct out_of_sync_entry **prev = &d->arch.out_of_sync;
464 struct out_of_sync_entry *found = NULL;
466 // NB: Be careful not to call something that manipulates this list
467 // while walking it. Collect the results into a separate list
468 // first, then walk that list.
469 //
470 while ( entry )
471 {
472 if ( entry->gmfn == gmfn )
473 {
474 // remove from out of sync list
475 *prev = entry->next;
477 // add to found list
478 entry->next = found;
479 found = entry;
481 entry = *prev;
482 continue;
483 }
484 prev = &entry->next;
485 entry = entry->next;
486 }
488 prev = NULL;
489 entry = found;
490 while ( entry )
491 {
492 release_out_of_sync_entry(d, entry);
494 prev = &entry->next;
495 entry = entry->next;
496 }
498 // Add found list to free list
499 if ( prev )
500 {
501 *prev = d->arch.out_of_sync_free;
502 d->arch.out_of_sync_free = found;
503 }
504 }
506 static inline void
507 shadow_demote(struct domain *d, unsigned long gpfn, unsigned long gmfn)
508 {
509 if ( !shadow_mode_refcounts(d) )
510 return;
512 ASSERT(frame_table[gmfn].count_info & PGC_page_table);
514 if ( shadow_max_pgtable_type(d, gpfn, NULL) == PGT_none )
515 {
516 clear_bit(_PGC_page_table, &frame_table[gmfn].count_info);
518 if ( page_out_of_sync(pfn_to_page(gmfn)) )
519 {
520 remove_out_of_sync_entries(d, gmfn);
521 }
522 }
523 }
525 static void inline
526 free_shadow_l1_table(struct domain *d, unsigned long smfn)
527 {
528 l1_pgentry_t *pl1e = map_domain_page(smfn);
529 int i;
530 struct pfn_info *spage = pfn_to_page(smfn);
531 u32 min_max = spage->tlbflush_timestamp;
532 int min = SHADOW_MIN(min_max);
533 int max;
535 if (d->arch.ops->guest_paging_levels == PAGING_L2)
536 max = SHADOW_MAX_GUEST32(min_max);
537 else
538 max = SHADOW_MAX(min_max);
540 for ( i = min; i <= max; i++ )
541 {
542 shadow_put_page_from_l1e(pl1e[i], d);
543 pl1e[i] = l1e_empty();
544 }
546 unmap_domain_page(pl1e);
547 }
549 static void inline
550 free_shadow_hl2_table(struct domain *d, unsigned long smfn)
551 {
552 l1_pgentry_t *hl2 = map_domain_page(smfn);
553 int i, limit;
555 SH_VVLOG("%s: smfn=%lx freed", __func__, smfn);
557 #if CONFIG_PAGING_LEVELS == 2
558 if ( shadow_mode_external(d) )
559 limit = L2_PAGETABLE_ENTRIES;
560 else
561 limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE;
562 #endif
564 for ( i = 0; i < limit; i++ )
565 {
566 if ( l1e_get_flags(hl2[i]) & _PAGE_PRESENT )
567 put_page(pfn_to_page(l1e_get_pfn(hl2[i])));
568 }
570 unmap_domain_page(hl2);
571 }
573 static void inline
574 free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
575 {
576 l2_pgentry_t *pl2e = map_domain_page(smfn);
577 int i, external = shadow_mode_external(d);
579 for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
580 if ( external || is_guest_l2_slot(type, i) )
581 if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
582 put_shadow_ref(l2e_get_pfn(pl2e[i]));
584 if ( (PGT_base_page_table == PGT_l2_page_table) &&
585 shadow_mode_translate(d) && !external )
586 {
587 // free the ref to the hl2
588 //
589 put_shadow_ref(l2e_get_pfn(pl2e[l2_table_offset(LINEAR_PT_VIRT_START)]));
590 }
592 unmap_domain_page(pl2e);
593 }
595 void free_fake_shadow_l2(struct domain *d, unsigned long smfn)
596 {
597 pgentry_64_t *ple = map_domain_page(smfn);
598 int i;
600 for ( i = 0; i < PAGETABLE_ENTRIES; i = i + 2 )
601 {
602 if ( entry_get_flags(ple[i]) & _PAGE_PRESENT )
603 put_shadow_ref(entry_get_pfn(ple[i]));
604 }
606 unmap_domain_page(ple);
607 }
609 void free_shadow_page(unsigned long smfn)
610 {
611 struct pfn_info *page = &frame_table[smfn];
613 unsigned long gmfn = page->u.inuse.type_info & PGT_mfn_mask;
614 struct domain *d = page_get_owner(pfn_to_page(gmfn));
615 unsigned long gpfn = __mfn_to_gpfn(d, gmfn);
616 unsigned long type = page->u.inuse.type_info & PGT_type_mask;
618 SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
620 ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
621 #if CONFIG_PAGING_LEVELS >=4
622 if ( type == PGT_fl1_shadow )
623 {
624 unsigned long mfn;
625 mfn = __shadow_status(d, gpfn, PGT_fl1_shadow);
626 if ( !mfn )
627 gpfn |= (1UL << 63);
628 }
629 #endif
631 delete_shadow_status(d, gpfn, gmfn, type);
633 switch ( type )
634 {
635 case PGT_l1_shadow:
636 perfc_decr(shadow_l1_pages);
637 shadow_demote(d, gpfn, gmfn);
638 free_shadow_l1_table(d, smfn);
639 d->arch.shadow_page_count--;
640 break;
641 #if CONFIG_PAGING_LEVELS == 2
642 case PGT_l2_shadow:
643 perfc_decr(shadow_l2_pages);
644 shadow_demote(d, gpfn, gmfn);
645 free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
646 d->arch.shadow_page_count--;
647 break;
649 case PGT_hl2_shadow:
650 perfc_decr(hl2_table_pages);
651 shadow_demote(d, gpfn, gmfn);
652 free_shadow_hl2_table(d, smfn);
653 d->arch.hl2_page_count--;
654 break;
655 #endif
656 #if CONFIG_PAGING_LEVELS >= 3
657 case PGT_l2_shadow:
658 case PGT_l3_shadow:
659 case PGT_l4_shadow:
660 shadow_demote(d, gpfn, gmfn);
661 free_shadow_tables(d, smfn, shadow_type_to_level(type));
662 d->arch.shadow_page_count--;
663 break;
665 case PGT_fl1_shadow:
666 free_shadow_fl1_table(d, smfn);
667 d->arch.shadow_page_count--;
668 break;
669 #endif
670 case PGT_snapshot:
671 perfc_decr(apshot_pages);
672 break;
674 default:
675 printk("Free shadow weird page type mfn=%lx type=%" PRtype_info "\n",
676 page_to_pfn(page), page->u.inuse.type_info);
677 break;
678 }
680 // No TLB flushes are needed the next time this page gets allocated.
681 //
682 page->tlbflush_timestamp = 0;
683 page->u.free.cpumask = CPU_MASK_NONE;
685 if ( type == PGT_l1_shadow )
686 {
687 list_add(&page->list, &d->arch.free_shadow_frames);
688 perfc_incr(free_l1_pages);
689 }
690 else
691 free_domheap_page(page);
692 }
694 static void
695 free_writable_pte_predictions(struct domain *d)
696 {
697 int i;
698 struct shadow_status *x;
700 for ( i = 0; i < shadow_ht_buckets; i++ )
701 {
702 u32 count;
703 unsigned long *gpfn_list;
705 /* Skip empty buckets. */
706 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
707 continue;
709 count = 0;
710 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
711 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
712 count++;
714 gpfn_list = xmalloc_array(unsigned long, count);
715 count = 0;
716 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
717 if ( (x->gpfn_and_flags & PGT_type_mask) == PGT_writable_pred )
718 gpfn_list[count++] = x->gpfn_and_flags & PGT_mfn_mask;
720 while ( count )
721 {
722 count--;
723 delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
724 }
726 xfree(gpfn_list);
727 }
728 }
730 static void free_shadow_ht_entries(struct domain *d)
731 {
732 struct shadow_status *x, *n;
734 SH_VLOG("freed tables count=%d l1=%d l2=%d",
735 d->arch.shadow_page_count, perfc_value(shadow_l1_pages),
736 perfc_value(shadow_l2_pages));
738 n = d->arch.shadow_ht_extras;
739 while ( (x = n) != NULL )
740 {
741 d->arch.shadow_extras_count--;
742 n = *((struct shadow_status **)(&x[shadow_ht_extra_size]));
743 xfree(x);
744 }
746 d->arch.shadow_ht_extras = NULL;
747 d->arch.shadow_ht_free = NULL;
749 ASSERT(d->arch.shadow_extras_count == 0);
750 SH_LOG("freed extras, now %d", d->arch.shadow_extras_count);
752 if ( d->arch.shadow_dirty_bitmap != NULL )
753 {
754 xfree(d->arch.shadow_dirty_bitmap);
755 d->arch.shadow_dirty_bitmap = 0;
756 d->arch.shadow_dirty_bitmap_size = 0;
757 }
759 xfree(d->arch.shadow_ht);
760 d->arch.shadow_ht = NULL;
761 }
763 static void free_out_of_sync_entries(struct domain *d)
764 {
765 struct out_of_sync_entry *x, *n;
767 n = d->arch.out_of_sync_extras;
768 while ( (x = n) != NULL )
769 {
770 d->arch.out_of_sync_extras_count--;
771 n = *((struct out_of_sync_entry **)(&x[out_of_sync_extra_size]));
772 xfree(x);
773 }
775 d->arch.out_of_sync_extras = NULL;
776 d->arch.out_of_sync_free = NULL;
777 d->arch.out_of_sync = NULL;
779 ASSERT(d->arch.out_of_sync_extras_count == 0);
780 FSH_LOG("freed extra out_of_sync entries, now %d",
781 d->arch.out_of_sync_extras_count);
782 }
784 void free_shadow_pages(struct domain *d)
785 {
786 int i;
787 struct shadow_status *x;
788 struct vcpu *v;
790 /*
791 * WARNING! The shadow page table must not currently be in use!
792 * e.g., You are expected to have paused the domain and synchronized CR3.
793 */
795 if( !d->arch.shadow_ht ) return;
797 shadow_audit(d, 1);
799 // first, remove any outstanding refs from out_of_sync entries...
800 //
801 free_out_of_sync_state(d);
803 // second, remove any outstanding refs from v->arch.shadow_table
804 // and CR3.
805 //
806 for_each_vcpu(d, v)
807 {
808 if ( pagetable_get_paddr(v->arch.shadow_table) )
809 {
810 put_shadow_ref(pagetable_get_pfn(v->arch.shadow_table));
811 v->arch.shadow_table = mk_pagetable(0);
812 }
814 if ( v->arch.monitor_shadow_ref )
815 {
816 put_shadow_ref(v->arch.monitor_shadow_ref);
817 v->arch.monitor_shadow_ref = 0;
818 }
819 }
821 #if CONFIG_PAGING_LEVELS == 2
822 // For external shadows, remove the monitor table's refs
823 //
824 if ( shadow_mode_external(d) )
825 {
826 for_each_vcpu(d, v)
827 {
828 l2_pgentry_t *mpl2e = v->arch.monitor_vtable;
830 if ( mpl2e )
831 {
832 l2_pgentry_t hl2e = mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)];
833 l2_pgentry_t smfn = mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)];
835 if ( l2e_get_flags(hl2e) & _PAGE_PRESENT )
836 {
837 put_shadow_ref(l2e_get_pfn(hl2e));
838 mpl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_empty();
839 }
840 if ( l2e_get_flags(smfn) & _PAGE_PRESENT )
841 {
842 put_shadow_ref(l2e_get_pfn(smfn));
843 mpl2e[l2_table_offset(SH_LINEAR_PT_VIRT_START)] = l2e_empty();
844 }
845 }
846 }
847 }
848 #endif
849 // Now, the only refs to shadow pages that are left are from the shadow
850 // pages themselves. We just unpin the pinned pages, and the rest
851 // should automatically disappear.
852 //
853 // NB: Beware: each explicitly or implicit call to free_shadow_page
854 // can/will result in the hash bucket getting rewritten out from
855 // under us... First, collect the list of pinned pages, then
856 // free them.
857 //
858 for ( i = 0; i < shadow_ht_buckets; i++ )
859 {
860 u32 count;
861 unsigned long *mfn_list;
863 /* Skip empty buckets. */
864 if ( d->arch.shadow_ht[i].gpfn_and_flags == 0 )
865 continue;
867 count = 0;
868 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
869 if ( MFN_PINNED(x->smfn) )
870 count++;
871 if ( !count )
872 continue;
874 mfn_list = xmalloc_array(unsigned long, count);
875 count = 0;
876 for ( x = &d->arch.shadow_ht[i]; x != NULL; x = x->next )
877 if ( MFN_PINNED(x->smfn) )
878 mfn_list[count++] = x->smfn;
880 while ( count )
881 {
882 shadow_unpin(mfn_list[--count]);
883 }
884 xfree(mfn_list);
885 }
887 // Now free the pre-zero'ed pages from the domain
888 //
889 struct list_head *list_ent, *tmp;
890 list_for_each_safe(list_ent, tmp, &d->arch.free_shadow_frames)
891 {
892 list_del(list_ent);
893 perfc_decr(free_l1_pages);
895 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
896 if (d->arch.ops->guest_paging_levels == PAGING_L2)
897 {
898 #if CONFIG_PAGING_LEVELS >=4
899 free_domheap_pages(page, SL1_ORDER);
900 #else
901 free_domheap_page(page);
902 #endif
903 }
904 else
905 free_domheap_page(page);
906 }
908 shadow_audit(d, 0);
910 SH_LOG("Free shadow table.");
911 }
913 void __shadow_mode_disable(struct domain *d)
914 {
915 if ( unlikely(!shadow_mode_enabled(d)) )
916 return;
918 free_shadow_pages(d);
919 free_writable_pte_predictions(d);
921 #ifndef NDEBUG
922 int i;
923 for ( i = 0; i < shadow_ht_buckets; i++ )
924 {
925 if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
926 {
927 printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
928 __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
929 BUG();
930 }
931 }
932 #endif
934 d->arch.shadow_mode = 0;
936 free_shadow_ht_entries(d);
937 free_out_of_sync_entries(d);
939 struct vcpu *v;
940 for_each_vcpu(d, v)
941 {
942 update_pagetables(v);
943 }
944 }
947 static void
948 free_p2m_table(struct domain *d)
949 {
950 // uh, this needs some work... :)
951 BUG();
952 }
955 int __shadow_mode_enable(struct domain *d, unsigned int mode)
956 {
957 struct vcpu *v;
958 int new_modes = (mode & ~d->arch.shadow_mode);
960 // Gotta be adding something to call this function.
961 ASSERT(new_modes);
963 // can't take anything away by calling this function.
964 ASSERT(!(d->arch.shadow_mode & ~mode));
966 #if defined(CONFIG_PAGING_LEVELS)
967 if(!shadow_set_guest_paging_levels(d,
968 CONFIG_PAGING_LEVELS)) {
969 printk("Unsupported guest paging levels\n");
970 domain_crash_synchronous(); /* need to take a clean path */
971 }
972 #endif
974 for_each_vcpu(d, v)
975 {
976 invalidate_shadow_ldt(v);
978 // We need to set these up for __update_pagetables().
979 // See the comment there.
981 /*
982 * arch.guest_vtable
983 */
984 if ( v->arch.guest_vtable &&
985 (v->arch.guest_vtable != __linear_l2_table) )
986 {
987 unmap_domain_page(v->arch.guest_vtable);
988 }
989 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
990 v->arch.guest_vtable = __linear_l2_table;
991 else
992 v->arch.guest_vtable = NULL;
994 /*
995 * arch.shadow_vtable
996 */
997 if ( v->arch.shadow_vtable &&
998 (v->arch.shadow_vtable != __shadow_linear_l2_table) )
999 {
1000 unmap_domain_page(v->arch.shadow_vtable);
1002 if ( !(mode & SHM_external) && d->arch.ops->guest_paging_levels == 2)
1003 v->arch.shadow_vtable = __shadow_linear_l2_table;
1004 else
1005 v->arch.shadow_vtable = NULL;
1007 #if CONFIG_PAGING_LEVELS == 2
1008 /*
1009 * arch.hl2_vtable
1010 */
1011 if ( v->arch.hl2_vtable &&
1012 (v->arch.hl2_vtable != __linear_hl2_table) )
1014 unmap_domain_page(v->arch.hl2_vtable);
1016 if ( (mode & (SHM_translate | SHM_external)) == SHM_translate )
1017 v->arch.hl2_vtable = __linear_hl2_table;
1018 else
1019 v->arch.hl2_vtable = NULL;
1020 #endif
1021 /*
1022 * arch.monitor_table & arch.monitor_vtable
1023 */
1024 if ( v->arch.monitor_vtable )
1026 free_monitor_pagetable(v);
1028 if ( mode & SHM_external )
1030 alloc_monitor_pagetable(v);
1034 if ( new_modes & SHM_enable )
1036 ASSERT( !d->arch.shadow_ht );
1037 d->arch.shadow_ht = xmalloc_array(struct shadow_status, shadow_ht_buckets);
1038 if ( d->arch.shadow_ht == NULL )
1039 goto nomem;
1041 memset(d->arch.shadow_ht, 0,
1042 shadow_ht_buckets * sizeof(struct shadow_status));
1045 if ( new_modes & SHM_log_dirty )
1047 ASSERT( !d->arch.shadow_dirty_bitmap );
1048 d->arch.shadow_dirty_bitmap_size =
1049 (d->shared_info->arch.max_pfn + 63) & ~63;
1050 d->arch.shadow_dirty_bitmap =
1051 xmalloc_array(unsigned long, d->arch.shadow_dirty_bitmap_size /
1052 (8 * sizeof(unsigned long)));
1053 if ( d->arch.shadow_dirty_bitmap == NULL )
1055 d->arch.shadow_dirty_bitmap_size = 0;
1056 goto nomem;
1058 memset(d->arch.shadow_dirty_bitmap, 0,
1059 d->arch.shadow_dirty_bitmap_size/8);
1062 if ( new_modes & SHM_translate )
1064 if ( !(new_modes & SHM_external) )
1066 ASSERT( !pagetable_get_paddr(d->arch.phys_table) );
1067 if ( !alloc_p2m_table(d) )
1069 printk("alloc_p2m_table failed (out-of-memory?)\n");
1070 goto nomem;
1073 else
1075 // external guests provide their own memory for their P2M maps.
1076 //
1077 ASSERT( d == page_get_owner(
1078 &frame_table[pagetable_get_pfn(d->arch.phys_table)]) );
1082 // Get rid of any shadow pages from any previous shadow mode.
1083 //
1084 free_shadow_pages(d);
1086 /*
1087 * Tear down it's counts by disassembling its page-table-based ref counts.
1088 * Also remove CR3's gcount/tcount.
1089 * That leaves things like GDTs and LDTs and external refs in tact.
1091 * Most pages will be writable tcount=0.
1092 * Some will still be L1 tcount=0 or L2 tcount=0.
1093 * Maybe some pages will be type none tcount=0.
1094 * Pages granted external writable refs (via grant tables?) will
1095 * still have a non-zero tcount. That's OK.
1097 * gcounts will generally be 1 for PGC_allocated.
1098 * GDTs and LDTs will have additional gcounts.
1099 * Any grant-table based refs will still be in the gcount.
1101 * We attempt to grab writable refs to each page (thus setting its type).
1102 * Immediately put back those type refs.
1104 * Assert that no pages are left with L1/L2/L3/L4 type.
1105 */
1106 audit_adjust_pgtables(d, -1, 1);
1108 d->arch.shadow_mode = mode;
1110 if ( shadow_mode_refcounts(d) )
1112 struct list_head *list_ent = d->page_list.next;
1113 while ( list_ent != &d->page_list )
1115 struct pfn_info *page = list_entry(list_ent, struct pfn_info, list);
1116 if ( !get_page_type(page, PGT_writable_page) )
1117 BUG();
1118 put_page_type(page);
1119 /*
1120 * We use tlbflush_timestamp as back pointer to smfn, and need to
1121 * clean up it.
1122 */
1123 if ( shadow_mode_external(d) )
1124 page->tlbflush_timestamp = 0;
1125 list_ent = page->list.next;
1129 audit_adjust_pgtables(d, 1, 1);
1131 return 0;
1133 nomem:
1134 if ( (new_modes & SHM_enable) )
1136 xfree(d->arch.shadow_ht);
1137 d->arch.shadow_ht = NULL;
1139 if ( (new_modes & SHM_log_dirty) )
1141 xfree(d->arch.shadow_dirty_bitmap);
1142 d->arch.shadow_dirty_bitmap = NULL;
1144 if ( (new_modes & SHM_translate) && !(new_modes & SHM_external) &&
1145 pagetable_get_paddr(d->arch.phys_table) )
1147 free_p2m_table(d);
1149 return -ENOMEM;
1153 int shadow_mode_enable(struct domain *d, unsigned int mode)
1155 int rc;
1156 shadow_lock(d);
1157 rc = __shadow_mode_enable(d, mode);
1158 shadow_unlock(d);
1159 return rc;
1162 static int shadow_mode_table_op(
1163 struct domain *d, dom0_shadow_control_t *sc)
1165 unsigned int op = sc->op;
1166 int i, rc = 0;
1167 struct vcpu *v;
1169 ASSERT(shadow_lock_is_acquired(d));
1171 SH_VLOG("shadow mode table op %lx %lx count %d",
1172 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.guest_table), /* XXX SMP */
1173 (unsigned long)pagetable_get_pfn(d->vcpu[0]->arch.shadow_table), /* XXX SMP */
1174 d->arch.shadow_page_count);
1176 shadow_audit(d, 1);
1178 switch ( op )
1180 case DOM0_SHADOW_CONTROL_OP_FLUSH:
1181 free_shadow_pages(d);
1183 d->arch.shadow_fault_count = 0;
1184 d->arch.shadow_dirty_count = 0;
1185 d->arch.shadow_dirty_net_count = 0;
1186 d->arch.shadow_dirty_block_count = 0;
1188 break;
1190 case DOM0_SHADOW_CONTROL_OP_CLEAN:
1191 free_shadow_pages(d);
1193 sc->stats.fault_count = d->arch.shadow_fault_count;
1194 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1195 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1196 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1198 d->arch.shadow_fault_count = 0;
1199 d->arch.shadow_dirty_count = 0;
1200 d->arch.shadow_dirty_net_count = 0;
1201 d->arch.shadow_dirty_block_count = 0;
1204 if ( (sc->dirty_bitmap == NULL) ||
1205 (d->arch.shadow_dirty_bitmap == NULL) )
1207 rc = -EINVAL;
1208 break;
1211 if(sc->pages > d->arch.shadow_dirty_bitmap_size)
1212 sc->pages = d->arch.shadow_dirty_bitmap_size;
1214 #define chunk (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
1215 for ( i = 0; i < sc->pages; i += chunk )
1217 int bytes = ((((sc->pages - i) > chunk) ?
1218 chunk : (sc->pages - i)) + 7) / 8;
1220 if (copy_to_user(
1221 sc->dirty_bitmap + (i/(8*sizeof(unsigned long))),
1222 d->arch.shadow_dirty_bitmap +(i/(8*sizeof(unsigned long))),
1223 bytes))
1225 rc = -EINVAL;
1226 break;
1228 memset(
1229 d->arch.shadow_dirty_bitmap + (i/(8*sizeof(unsigned long))),
1230 0, bytes);
1233 break;
1235 case DOM0_SHADOW_CONTROL_OP_PEEK:
1236 sc->stats.fault_count = d->arch.shadow_fault_count;
1237 sc->stats.dirty_count = d->arch.shadow_dirty_count;
1238 sc->stats.dirty_net_count = d->arch.shadow_dirty_net_count;
1239 sc->stats.dirty_block_count = d->arch.shadow_dirty_block_count;
1241 if ( (sc->dirty_bitmap == NULL) ||
1242 (d->arch.shadow_dirty_bitmap == NULL) )
1244 rc = -EINVAL;
1245 break;
1248 if(sc->pages > d->arch.shadow_dirty_bitmap_size)
1249 sc->pages = d->arch.shadow_dirty_bitmap_size;
1251 if (copy_to_user(sc->dirty_bitmap,
1252 d->arch.shadow_dirty_bitmap, (sc->pages+7)/8))
1254 rc = -EINVAL;
1255 break;
1258 break;
1260 default:
1261 rc = -EINVAL;
1262 break;
1265 SH_VLOG("shadow mode table op : page count %d", d->arch.shadow_page_count);
1266 shadow_audit(d, 1);
1268 for_each_vcpu(d,v)
1269 __update_pagetables(v);
1271 return rc;
1274 int shadow_mode_control(struct domain *d, dom0_shadow_control_t *sc)
1276 unsigned int op = sc->op;
1277 int rc = 0;
1278 struct vcpu *v;
1280 if ( unlikely(d == current->domain) )
1282 DPRINTK("Don't try to do a shadow op on yourself!\n");
1283 return -EINVAL;
1286 domain_pause(d);
1288 shadow_lock(d);
1290 switch ( op )
1292 case DOM0_SHADOW_CONTROL_OP_OFF:
1293 if ( shadow_mode_enabled(d) )
1295 __shadow_sync_all(d);
1296 __shadow_mode_disable(d);
1298 break;
1300 case DOM0_SHADOW_CONTROL_OP_ENABLE_TEST:
1301 free_shadow_pages(d);
1302 rc = __shadow_mode_enable(d, SHM_enable);
1303 break;
1305 case DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY:
1306 free_shadow_pages(d);
1307 rc = __shadow_mode_enable(
1308 d, d->arch.shadow_mode|SHM_enable|SHM_log_dirty);
1309 break;
1311 case DOM0_SHADOW_CONTROL_OP_ENABLE_TRANSLATE:
1312 free_shadow_pages(d);
1313 rc = __shadow_mode_enable(
1314 d, d->arch.shadow_mode|SHM_enable|SHM_refcounts|SHM_translate);
1315 break;
1317 default:
1318 rc = shadow_mode_enabled(d) ? shadow_mode_table_op(d, sc) : -EINVAL;
1319 break;
1322 shadow_unlock(d);
1324 for_each_vcpu(d,v)
1325 update_pagetables(v);
1327 domain_unpause(d);
1329 return rc;
1332 void shadow_mode_init(void)
1336 int _shadow_mode_refcounts(struct domain *d)
1338 return shadow_mode_refcounts(d);
1341 int
1342 set_p2m_entry(struct domain *d, unsigned long pfn, unsigned long mfn,
1343 struct domain_mmap_cache *l2cache,
1344 struct domain_mmap_cache *l1cache)
1346 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
1347 l2_pgentry_t *l2, l2e;
1348 l1_pgentry_t *l1;
1349 struct pfn_info *l1page;
1350 unsigned long va = pfn << PAGE_SHIFT;
1352 ASSERT(tabpfn != 0);
1354 l2 = map_domain_page_with_cache(tabpfn, l2cache);
1355 l2e = l2[l2_table_offset(va)];
1356 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1358 l1page = alloc_domheap_page(NULL);
1359 if ( !l1page )
1361 unmap_domain_page_with_cache(l2, l2cache);
1362 return 0;
1365 l1 = map_domain_page_with_cache(page_to_pfn(l1page), l1cache);
1366 memset(l1, 0, PAGE_SIZE);
1367 unmap_domain_page_with_cache(l1, l1cache);
1369 l2e = l2e_from_page(l1page, __PAGE_HYPERVISOR);
1370 l2[l2_table_offset(va)] = l2e;
1372 unmap_domain_page_with_cache(l2, l2cache);
1374 l1 = map_domain_page_with_cache(l2e_get_pfn(l2e), l1cache);
1375 l1[l1_table_offset(va)] = l1e_from_pfn(mfn, __PAGE_HYPERVISOR);
1376 unmap_domain_page_with_cache(l1, l1cache);
1378 return 1;
1381 int
1382 alloc_p2m_table(struct domain *d)
1384 struct list_head *list_ent;
1385 struct pfn_info *page, *l2page;
1386 l2_pgentry_t *l2;
1387 unsigned long mfn, pfn;
1388 struct domain_mmap_cache l1cache, l2cache;
1390 l2page = alloc_domheap_page(NULL);
1391 if ( l2page == NULL )
1392 return 0;
1394 domain_mmap_cache_init(&l1cache);
1395 domain_mmap_cache_init(&l2cache);
1397 d->arch.phys_table = mk_pagetable(page_to_phys(l2page));
1398 l2 = map_domain_page_with_cache(page_to_pfn(l2page), &l2cache);
1399 memset(l2, 0, PAGE_SIZE);
1400 unmap_domain_page_with_cache(l2, &l2cache);
1402 list_ent = d->page_list.next;
1403 while ( list_ent != &d->page_list )
1405 page = list_entry(list_ent, struct pfn_info, list);
1406 mfn = page_to_pfn(page);
1407 pfn = get_pfn_from_mfn(mfn);
1408 ASSERT(pfn != INVALID_M2P_ENTRY);
1409 ASSERT(pfn < (1u<<20));
1411 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
1413 list_ent = page->list.next;
1416 list_ent = d->xenpage_list.next;
1417 while ( list_ent != &d->xenpage_list )
1419 page = list_entry(list_ent, struct pfn_info, list);
1420 mfn = page_to_pfn(page);
1421 pfn = get_pfn_from_mfn(mfn);
1422 if ( (pfn != INVALID_M2P_ENTRY) &&
1423 (pfn < (1u<<20)) )
1425 set_p2m_entry(d, pfn, mfn, &l2cache, &l1cache);
1428 list_ent = page->list.next;
1431 domain_mmap_cache_destroy(&l2cache);
1432 domain_mmap_cache_destroy(&l1cache);
1434 return 1;
1437 void shadow_l1_normal_pt_update(
1438 struct domain *d,
1439 unsigned long pa, l1_pgentry_t gpte,
1440 struct domain_mmap_cache *cache)
1442 unsigned long sl1mfn;
1443 l1_pgentry_t *spl1e, spte;
1445 shadow_lock(d);
1447 sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow);
1448 if ( sl1mfn )
1450 SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpde=%" PRIpte,
1451 (void *)pa, l1e_get_intpte(gpte));
1452 l1pte_propagate_from_guest(current->domain, gpte, &spte);
1454 spl1e = map_domain_page_with_cache(sl1mfn, cache);
1455 spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t)] = spte;
1456 unmap_domain_page_with_cache(spl1e, cache);
1459 shadow_unlock(d);
1462 void shadow_l2_normal_pt_update(
1463 struct domain *d,
1464 unsigned long pa, l2_pgentry_t gpde,
1465 struct domain_mmap_cache *cache)
1467 unsigned long sl2mfn;
1468 l2_pgentry_t *spl2e;
1470 shadow_lock(d);
1472 sl2mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l2_shadow);
1473 if ( sl2mfn )
1475 SH_VVLOG("shadow_l2_normal_pt_update pa=%p, gpde=%" PRIpte,
1476 (void *)pa, l2e_get_intpte(gpde));
1477 spl2e = map_domain_page_with_cache(sl2mfn, cache);
1478 validate_pde_change(d, gpde,
1479 &spl2e[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t)]);
1480 unmap_domain_page_with_cache(spl2e, cache);
1483 shadow_unlock(d);
1486 #if CONFIG_PAGING_LEVELS >= 3
1487 void shadow_l3_normal_pt_update(
1488 struct domain *d,
1489 unsigned long pa, l3_pgentry_t l3e,
1490 struct domain_mmap_cache *cache)
1492 unsigned long sl3mfn;
1493 pgentry_64_t *spl3e;
1495 shadow_lock(d);
1497 sl3mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l3_shadow);
1498 if ( sl3mfn )
1500 SH_VVLOG("shadow_l3_normal_pt_update pa=%p, l3e=%" PRIpte,
1501 (void *)pa, l3e_get_intpte(l3e));
1502 spl3e = (pgentry_64_t *) map_domain_page_with_cache(sl3mfn, cache);
1503 validate_entry_change(d, (pgentry_64_t *) &l3e,
1504 &spl3e[(pa & ~PAGE_MASK) / sizeof(l3_pgentry_t)],
1505 shadow_type_to_level(PGT_l3_shadow));
1506 unmap_domain_page_with_cache(spl3e, cache);
1509 shadow_unlock(d);
1511 #endif
1513 #if CONFIG_PAGING_LEVELS >= 4
1514 void shadow_l4_normal_pt_update(
1515 struct domain *d,
1516 unsigned long pa, l4_pgentry_t l4e,
1517 struct domain_mmap_cache *cache)
1519 unsigned long sl4mfn;
1520 pgentry_64_t *spl4e;
1522 shadow_lock(d);
1524 sl4mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l4_shadow);
1525 if ( sl4mfn )
1527 SH_VVLOG("shadow_l4_normal_pt_update pa=%p, l4e=%" PRIpte,
1528 (void *)pa, l4e_get_intpte(l4e));
1529 spl4e = (pgentry_64_t *)map_domain_page_with_cache(sl4mfn, cache);
1530 validate_entry_change(d, (pgentry_64_t *)&l4e,
1531 &spl4e[(pa & ~PAGE_MASK) / sizeof(l4_pgentry_t)],
1532 shadow_type_to_level(PGT_l4_shadow));
1533 unmap_domain_page_with_cache(spl4e, cache);
1536 shadow_unlock(d);
1538 #endif
1540 static void
1541 translate_l1pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l1mfn)
1543 int i;
1544 l1_pgentry_t *l1;
1546 l1 = map_domain_page(l1mfn);
1547 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
1549 if ( is_guest_l1_slot(i) &&
1550 (l1e_get_flags(l1[i]) & _PAGE_PRESENT) )
1552 unsigned long mfn = l1e_get_pfn(l1[i]);
1553 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1554 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1555 l1[i] = l1e_from_pfn(gpfn, l1e_get_flags(l1[i]));
1558 unmap_domain_page(l1);
1561 // This is not general enough to handle arbitrary pagetables
1562 // with shared L1 pages, etc., but it is sufficient for bringing
1563 // up dom0.
1564 //
1565 void
1566 translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
1567 unsigned int type)
1569 int i;
1570 l2_pgentry_t *l2;
1572 ASSERT(shadow_mode_translate(d) && !shadow_mode_external(d));
1574 l2 = map_domain_page(l2mfn);
1575 for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
1577 if ( is_guest_l2_slot(type, i) &&
1578 (l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
1580 unsigned long mfn = l2e_get_pfn(l2[i]);
1581 unsigned long gpfn = __mfn_to_gpfn(d, mfn);
1582 ASSERT(l1e_get_pfn(p2m[gpfn]) == mfn);
1583 l2[i] = l2e_from_pfn(gpfn, l2e_get_flags(l2[i]));
1584 translate_l1pgtable(d, p2m, mfn);
1587 unmap_domain_page(l2);
1590 void
1591 remove_shadow(struct domain *d, unsigned long gpfn, u32 stype)
1593 unsigned long smfn;
1595 shadow_lock(d);
1597 while ( stype >= PGT_l1_shadow )
1599 smfn = __shadow_status(d, gpfn, stype);
1600 if ( smfn && MFN_PINNED(smfn) )
1601 shadow_unpin(smfn);
1602 stype -= PGT_l1_shadow;
1605 shadow_unlock(d);
1608 unsigned long
1609 gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
1611 ASSERT( shadow_mode_translate(d) );
1613 perfc_incrc(gpfn_to_mfn_foreign);
1615 unsigned long va = gpfn << PAGE_SHIFT;
1616 unsigned long tabpfn = pagetable_get_pfn(d->arch.phys_table);
1617 l2_pgentry_t *l2 = map_domain_page(tabpfn);
1618 l2_pgentry_t l2e = l2[l2_table_offset(va)];
1619 unmap_domain_page(l2);
1620 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
1622 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%" PRIpte "\n",
1623 d->domain_id, gpfn, l2e_get_intpte(l2e));
1624 return INVALID_MFN;
1626 l1_pgentry_t *l1 = map_domain_page(l2e_get_pfn(l2e));
1627 l1_pgentry_t l1e = l1[l1_table_offset(va)];
1628 unmap_domain_page(l1);
1630 #if 0
1631 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => %lx tabpfn=%lx l2e=%lx l1tab=%lx, l1e=%lx\n",
1632 d->domain_id, gpfn, l1_pgentry_val(l1e) >> PAGE_SHIFT, tabpfn, l2e, l1tab, l1e);
1633 #endif
1635 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
1637 printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%" PRIpte "\n",
1638 d->domain_id, gpfn, l1e_get_intpte(l1e));
1639 return INVALID_MFN;
1642 return l1e_get_pfn(l1e);
1645 static u32 remove_all_access_in_page(
1646 struct domain *d, unsigned long l1mfn, unsigned long forbidden_gmfn)
1648 l1_pgentry_t *pl1e = map_domain_page(l1mfn);
1649 l1_pgentry_t match, ol2e;
1650 unsigned long flags = _PAGE_PRESENT;
1651 int i;
1652 u32 count = 0;
1653 int is_l1_shadow =
1654 ((frame_table[l1mfn].u.inuse.type_info & PGT_type_mask) ==
1655 PGT_l1_shadow);
1657 match = l1e_from_pfn(forbidden_gmfn, flags);
1659 for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
1661 if ( l1e_has_changed(pl1e[i], match, flags) )
1662 continue;
1664 ol2e = pl1e[i];
1665 pl1e[i] = l1e_empty();
1666 count++;
1668 if ( is_l1_shadow )
1669 shadow_put_page_from_l1e(ol2e, d);
1670 else /* must be an hl2 page */
1671 put_page(&frame_table[forbidden_gmfn]);
1674 unmap_domain_page(pl1e);
1676 return count;
1679 static u32 __shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn)
1681 int i;
1682 struct shadow_status *a;
1683 u32 count = 0;
1685 if ( unlikely(!shadow_mode_enabled(d)) )
1686 return 0;
1688 ASSERT(shadow_lock_is_acquired(d));
1689 perfc_incrc(remove_all_access);
1691 for (i = 0; i < shadow_ht_buckets; i++)
1693 a = &d->arch.shadow_ht[i];
1694 while ( a && a->gpfn_and_flags )
1696 switch (a->gpfn_and_flags & PGT_type_mask)
1698 case PGT_l1_shadow:
1699 case PGT_l2_shadow:
1700 case PGT_l3_shadow:
1701 case PGT_l4_shadow:
1702 case PGT_hl2_shadow:
1703 count += remove_all_access_in_page(d, a->smfn, forbidden_gmfn);
1704 break;
1705 case PGT_snapshot:
1706 case PGT_writable_pred:
1707 // these can't hold refs to the forbidden page
1708 break;
1709 default:
1710 BUG();
1713 a = a->next;
1717 return count;
1720 void shadow_drop_references(
1721 struct domain *d, struct pfn_info *page)
1723 if ( likely(!shadow_mode_refcounts(d)) ||
1724 ((page->u.inuse.type_info & PGT_count_mask) == 0) )
1725 return;
1727 /* XXX This needs more thought... */
1728 printk("%s: needing to call __shadow_remove_all_access for mfn=%lx\n",
1729 __func__, page_to_pfn(page));
1730 printk("Before: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
1731 page->count_info, page->u.inuse.type_info);
1733 shadow_lock(d);
1734 __shadow_remove_all_access(d, page_to_pfn(page));
1735 shadow_unlock(d);
1737 printk("After: mfn=%lx c=%08x t=%" PRtype_info "\n", page_to_pfn(page),
1738 page->count_info, page->u.inuse.type_info);
1741 /* XXX Needs more thought. Neither pretty nor fast: a place holder. */
1742 void shadow_sync_and_drop_references(
1743 struct domain *d, struct pfn_info *page)
1745 if ( likely(!shadow_mode_refcounts(d)) )
1746 return;
1748 shadow_lock(d);
1750 if ( page_out_of_sync(page) )
1751 __shadow_sync_mfn(d, page_to_pfn(page));
1753 __shadow_remove_all_access(d, page_to_pfn(page));
1755 shadow_unlock(d);
1758 /*
1759 * Local variables:
1760 * mode: C
1761 * c-set-style: "BSD"
1762 * c-basic-offset: 4
1763 * tab-width: 4
1764 * indent-tabs-mode: nil
1765 * End:
1766 */