ia64/xen-unstable

view xen/arch/x86/mm/hap/hap.c @ 19787:cecc76506afc

x86_64: don't allocate L1 per-domain page table pages in a single chunk

Instead, allocate them on demand, and adjust the consumer to no longer
assume the allocated space is contiguous.

This another prerequisite to extend to number of vCPU-s the hypervisor
can support per guest.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 18 10:05:23 2009 +0100 (2009-06-18)
parents 8dd5c3cae086
children aad8bb7f5e10
line source
1 /******************************************************************************
2 * arch/x86/mm/hap/hap.c
3 *
4 * hardware assisted paging
5 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6 * Parts of this code are Copyright (c) 2007 by XenSource Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
23 #include <xen/config.h>
24 #include <xen/types.h>
25 #include <xen/mm.h>
26 #include <xen/trace.h>
27 #include <xen/sched.h>
28 #include <xen/perfc.h>
29 #include <xen/irq.h>
30 #include <xen/domain_page.h>
31 #include <xen/guest_access.h>
32 #include <xen/keyhandler.h>
33 #include <asm/event.h>
34 #include <asm/page.h>
35 #include <asm/current.h>
36 #include <asm/flushtlb.h>
37 #include <asm/shared.h>
38 #include <asm/hap.h>
39 #include <asm/paging.h>
40 #include <asm/p2m.h>
41 #include <asm/domain.h>
42 #include <xen/numa.h>
44 #include "private.h"
46 /* Override macros from asm/page.h to make them work with mfn_t */
47 #undef mfn_to_page
48 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
49 #undef mfn_valid
50 #define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
51 #undef page_to_mfn
52 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
54 /************************************************/
55 /* HAP VRAM TRACKING SUPPORT */
56 /************************************************/
58 int hap_enable_vram_tracking(struct domain *d)
59 {
60 int i;
61 struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
63 if ( !dirty_vram )
64 return -EINVAL;
66 /* turn on PG_log_dirty bit in paging mode */
67 hap_lock(d);
68 d->arch.paging.mode |= PG_log_dirty;
69 hap_unlock(d);
71 /* set l1e entries of P2M table to be read-only. */
72 for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++)
73 p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty);
75 flush_tlb_mask(&d->domain_dirty_cpumask);
76 return 0;
77 }
79 int hap_disable_vram_tracking(struct domain *d)
80 {
81 int i;
82 struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
84 if ( !dirty_vram )
85 return -EINVAL;
87 hap_lock(d);
88 d->arch.paging.mode &= ~PG_log_dirty;
89 hap_unlock(d);
91 /* set l1e entries of P2M table with normal mode */
92 for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++)
93 p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty);
95 flush_tlb_mask(&d->domain_dirty_cpumask);
96 return 0;
97 }
99 void hap_clean_vram_tracking(struct domain *d)
100 {
101 int i;
102 struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
104 if ( !dirty_vram )
105 return;
107 /* set l1e entries of P2M table to be read-only. */
108 for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++)
109 p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty);
111 flush_tlb_mask(&d->domain_dirty_cpumask);
112 }
114 void hap_vram_tracking_init(struct domain *d)
115 {
116 paging_log_dirty_init(d, hap_enable_vram_tracking,
117 hap_disable_vram_tracking,
118 hap_clean_vram_tracking);
119 }
121 int hap_track_dirty_vram(struct domain *d,
122 unsigned long begin_pfn,
123 unsigned long nr,
124 XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
125 {
126 long rc = 0;
127 struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
129 if ( nr )
130 {
131 if ( paging_mode_log_dirty(d) && dirty_vram )
132 {
133 if ( begin_pfn != dirty_vram->begin_pfn ||
134 begin_pfn + nr != dirty_vram->end_pfn )
135 {
136 paging_log_dirty_disable(d);
137 dirty_vram->begin_pfn = begin_pfn;
138 dirty_vram->end_pfn = begin_pfn + nr;
139 rc = paging_log_dirty_enable(d);
140 if (rc != 0)
141 goto param_fail;
142 }
143 }
144 else if ( !paging_mode_log_dirty(d) && !dirty_vram )
145 {
146 rc -ENOMEM;
147 if ( (dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
148 goto param_fail;
150 dirty_vram->begin_pfn = begin_pfn;
151 dirty_vram->end_pfn = begin_pfn + nr;
152 d->arch.hvm_domain.dirty_vram = dirty_vram;
153 hap_vram_tracking_init(d);
154 rc = paging_log_dirty_enable(d);
155 if (rc != 0)
156 goto param_fail;
157 }
158 else
159 {
160 if ( !paging_mode_log_dirty(d) && dirty_vram )
161 rc = -EINVAL;
162 else
163 rc = -ENODATA;
164 goto param_fail;
165 }
166 /* get the bitmap */
167 rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
168 }
169 else
170 {
171 if ( paging_mode_log_dirty(d) && dirty_vram ) {
172 rc = paging_log_dirty_disable(d);
173 xfree(dirty_vram);
174 dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
175 } else
176 rc = 0;
177 }
179 return rc;
181 param_fail:
182 if ( dirty_vram )
183 {
184 xfree(dirty_vram);
185 dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
186 }
187 return rc;
188 }
190 /************************************************/
191 /* HAP LOG DIRTY SUPPORT */
192 /************************************************/
194 /* hap code to call when log_dirty is enable. return 0 if no problem found. */
195 int hap_enable_log_dirty(struct domain *d)
196 {
197 /* turn on PG_log_dirty bit in paging mode */
198 hap_lock(d);
199 d->arch.paging.mode |= PG_log_dirty;
200 hap_unlock(d);
202 /* set l1e entries of P2M table to be read-only. */
203 p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
204 flush_tlb_mask(&d->domain_dirty_cpumask);
205 return 0;
206 }
208 int hap_disable_log_dirty(struct domain *d)
209 {
210 hap_lock(d);
211 d->arch.paging.mode &= ~PG_log_dirty;
212 hap_unlock(d);
214 /* set l1e entries of P2M table with normal mode */
215 p2m_change_entry_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
216 return 0;
217 }
219 void hap_clean_dirty_bitmap(struct domain *d)
220 {
221 /* set l1e entries of P2M table to be read-only. */
222 p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
223 flush_tlb_mask(&d->domain_dirty_cpumask);
224 }
226 void hap_logdirty_init(struct domain *d)
227 {
228 struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram;
229 if ( paging_mode_log_dirty(d) && dirty_vram )
230 {
231 paging_log_dirty_disable(d);
232 xfree(dirty_vram);
233 dirty_vram = d->arch.hvm_domain.dirty_vram = NULL;
234 }
236 /* Reinitialize logdirty mechanism */
237 paging_log_dirty_init(d, hap_enable_log_dirty,
238 hap_disable_log_dirty,
239 hap_clean_dirty_bitmap);
240 }
242 /************************************************/
243 /* HAP SUPPORT FUNCTIONS */
244 /************************************************/
245 static struct page_info *hap_alloc(struct domain *d)
246 {
247 struct page_info *pg = NULL;
248 void *p;
250 ASSERT(hap_locked_by_me(d));
252 pg = page_list_remove_head(&d->arch.paging.hap.freelist);
253 if ( unlikely(!pg) )
254 return NULL;
256 d->arch.paging.hap.free_pages--;
258 p = hap_map_domain_page(page_to_mfn(pg));
259 ASSERT(p != NULL);
260 clear_page(p);
261 hap_unmap_domain_page(p);
263 return pg;
264 }
266 static void hap_free(struct domain *d, mfn_t mfn)
267 {
268 struct page_info *pg = mfn_to_page(mfn);
270 ASSERT(hap_locked_by_me(d));
272 d->arch.paging.hap.free_pages++;
273 page_list_add_tail(pg, &d->arch.paging.hap.freelist);
274 }
276 static struct page_info *hap_alloc_p2m_page(struct domain *d)
277 {
278 struct page_info *pg;
280 hap_lock(d);
281 pg = hap_alloc(d);
283 #if CONFIG_PAGING_LEVELS == 3
284 /* Under PAE mode, top-level P2M table should be allocated below 4GB space
285 * because the size of h_cr3 is only 32-bit. We use alloc_domheap_pages to
286 * force this requirement, and exchange the guaranteed 32-bit-clean
287 * page for the one we just hap_alloc()ed. */
288 if ( d->arch.paging.hap.p2m_pages == 0
289 && mfn_x(page_to_mfn(pg)) >= (1UL << (32 - PAGE_SHIFT)) )
290 {
291 free_domheap_page(pg);
292 pg = alloc_domheap_page(
293 NULL, MEMF_bits(32) | MEMF_node(domain_to_node(d)));
294 if ( likely(pg != NULL) )
295 {
296 void *p = hap_map_domain_page(page_to_mfn(pg));
297 clear_page(p);
298 hap_unmap_domain_page(p);
299 }
300 }
301 #endif
303 if ( likely(pg != NULL) )
304 {
305 d->arch.paging.hap.total_pages--;
306 d->arch.paging.hap.p2m_pages++;
307 page_set_owner(pg, d);
308 pg->count_info |= 1;
309 }
311 hap_unlock(d);
312 return pg;
313 }
315 void hap_free_p2m_page(struct domain *d, struct page_info *pg)
316 {
317 hap_lock(d);
318 ASSERT(page_get_owner(pg) == d);
319 /* Should have just the one ref we gave it in alloc_p2m_page() */
320 if ( (pg->count_info & PGC_count_mask) != 1 )
321 HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
322 pg->count_info, pg->u.inuse.type_info);
323 pg->count_info &= ~PGC_count_mask;
324 /* Free should not decrement domain's total allocation, since
325 * these pages were allocated without an owner. */
326 page_set_owner(pg, NULL);
327 free_domheap_page(pg);
328 d->arch.paging.hap.p2m_pages--;
329 ASSERT(d->arch.paging.hap.p2m_pages >= 0);
330 hap_unlock(d);
331 }
333 /* Return the size of the pool, rounded up to the nearest MB */
334 static unsigned int
335 hap_get_allocation(struct domain *d)
336 {
337 unsigned int pg = d->arch.paging.hap.total_pages;
339 return ((pg >> (20 - PAGE_SHIFT))
340 + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
341 }
343 /* Set the pool of pages to the required number of pages.
344 * Returns 0 for success, non-zero for failure. */
345 static unsigned int
346 hap_set_allocation(struct domain *d, unsigned int pages, int *preempted)
347 {
348 struct page_info *pg;
350 ASSERT(hap_locked_by_me(d));
352 while ( d->arch.paging.hap.total_pages != pages )
353 {
354 if ( d->arch.paging.hap.total_pages < pages )
355 {
356 /* Need to allocate more memory from domheap */
357 pg = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
358 if ( pg == NULL )
359 {
360 HAP_PRINTK("failed to allocate hap pages.\n");
361 return -ENOMEM;
362 }
363 d->arch.paging.hap.free_pages++;
364 d->arch.paging.hap.total_pages++;
365 page_list_add_tail(pg, &d->arch.paging.hap.freelist);
366 }
367 else if ( d->arch.paging.hap.total_pages > pages )
368 {
369 /* Need to return memory to domheap */
370 pg = page_list_remove_head(&d->arch.paging.hap.freelist);
371 ASSERT(pg);
372 d->arch.paging.hap.free_pages--;
373 d->arch.paging.hap.total_pages--;
374 free_domheap_page(pg);
375 }
377 /* Check to see if we need to yield and try again */
378 if ( preempted && hypercall_preempt_check() )
379 {
380 *preempted = 1;
381 return 0;
382 }
383 }
385 return 0;
386 }
388 #if CONFIG_PAGING_LEVELS == 4
389 static void hap_install_xen_entries_in_l4(struct vcpu *v, mfn_t l4mfn)
390 {
391 struct domain *d = v->domain;
392 l4_pgentry_t *l4e;
394 l4e = hap_map_domain_page(l4mfn);
395 ASSERT(l4e != NULL);
397 /* Copy the common Xen mappings from the idle domain */
398 memcpy(&l4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
399 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
400 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
402 /* Install the per-domain mappings for this domain */
403 l4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
404 l4e_from_pfn(mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3))),
405 __PAGE_HYPERVISOR);
407 /* Install a linear mapping */
408 l4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
409 l4e_from_pfn(mfn_x(l4mfn), __PAGE_HYPERVISOR);
411 /* Install the domain-specific P2M table */
412 l4e[l4_table_offset(RO_MPT_VIRT_START)] =
413 l4e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)),
414 __PAGE_HYPERVISOR);
416 hap_unmap_domain_page(l4e);
417 }
418 #endif /* CONFIG_PAGING_LEVELS == 4 */
420 #if CONFIG_PAGING_LEVELS == 3
421 static void hap_install_xen_entries_in_l2h(struct vcpu *v, mfn_t l2hmfn)
422 {
423 struct domain *d = v->domain;
424 l2_pgentry_t *l2e;
425 l3_pgentry_t *p2m;
426 int i;
428 l2e = hap_map_domain_page(l2hmfn);
429 ASSERT(l2e != NULL);
431 /* Copy the common Xen mappings from the idle domain */
432 memcpy(&l2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
433 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
434 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
436 /* Install the per-domain mappings for this domain */
437 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
438 l2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
439 l2e_from_pfn(
440 mfn_x(page_to_mfn(perdomain_pt_page(d, i))),
441 __PAGE_HYPERVISOR);
443 /* No linear mapping; will be set up by monitor-table contructor. */
444 for ( i = 0; i < 4; i++ )
445 l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
446 l2e_empty();
448 /* Install the domain-specific p2m table */
449 ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
450 p2m = hap_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
451 for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
452 {
453 l2e[l2_table_offset(RO_MPT_VIRT_START) + i] =
454 (l3e_get_flags(p2m[i]) & _PAGE_PRESENT)
455 ? l2e_from_pfn(mfn_x(_mfn(l3e_get_pfn(p2m[i]))),
456 __PAGE_HYPERVISOR)
457 : l2e_empty();
458 }
459 hap_unmap_domain_page(p2m);
460 hap_unmap_domain_page(l2e);
461 }
462 #endif
464 static mfn_t hap_make_monitor_table(struct vcpu *v)
465 {
466 struct domain *d = v->domain;
467 struct page_info *pg;
469 ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
471 #if CONFIG_PAGING_LEVELS == 4
472 {
473 mfn_t m4mfn;
474 if ( (pg = hap_alloc(d)) == NULL )
475 goto oom;
476 m4mfn = page_to_mfn(pg);
477 hap_install_xen_entries_in_l4(v, m4mfn);
478 return m4mfn;
479 }
480 #elif CONFIG_PAGING_LEVELS == 3
481 {
482 mfn_t m3mfn, m2mfn;
483 l3_pgentry_t *l3e;
484 l2_pgentry_t *l2e;
485 int i;
487 if ( (pg = hap_alloc(d)) == NULL )
488 goto oom;
489 m3mfn = page_to_mfn(pg);
491 /* Install a monitor l2 table in slot 3 of the l3 table.
492 * This is used for all Xen entries, including linear maps
493 */
494 if ( (pg = hap_alloc(d)) == NULL )
495 goto oom;
496 m2mfn = page_to_mfn(pg);
497 l3e = hap_map_domain_page(m3mfn);
498 l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
499 hap_install_xen_entries_in_l2h(v, m2mfn);
500 /* Install the monitor's own linear map */
501 l2e = hap_map_domain_page(m2mfn);
502 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
503 l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
504 (l3e_get_flags(l3e[i]) & _PAGE_PRESENT)
505 ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR)
506 : l2e_empty();
507 hap_unmap_domain_page(l2e);
508 hap_unmap_domain_page(l3e);
510 HAP_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
511 return m3mfn;
512 }
513 #endif
515 oom:
516 HAP_ERROR("out of memory building monitor pagetable\n");
517 domain_crash(d);
518 return _mfn(INVALID_MFN);
519 }
521 static void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn)
522 {
523 struct domain *d = v->domain;
525 #if CONFIG_PAGING_LEVELS == 3
526 /* Need to destroy the l2 monitor page in slot 4 too */
527 {
528 l3_pgentry_t *l3e = hap_map_domain_page(mmfn);
529 ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
530 hap_free(d, _mfn(l3e_get_pfn(l3e[3])));
531 hap_unmap_domain_page(l3e);
532 }
533 #endif
535 /* Put the memory back in the pool */
536 hap_free(d, mmfn);
537 }
539 /************************************************/
540 /* HAP DOMAIN LEVEL FUNCTIONS */
541 /************************************************/
542 void hap_domain_init(struct domain *d)
543 {
544 hap_lock_init(d);
545 INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
546 }
548 /* return 0 for success, -errno for failure */
549 int hap_enable(struct domain *d, u32 mode)
550 {
551 unsigned int old_pages;
552 int rv = 0;
554 domain_pause(d);
555 /* error check */
556 if ( (d == current->domain) )
557 {
558 rv = -EINVAL;
559 goto out;
560 }
562 old_pages = d->arch.paging.hap.total_pages;
563 if ( old_pages == 0 )
564 {
565 unsigned int r;
566 hap_lock(d);
567 r = hap_set_allocation(d, 256, NULL);
568 hap_unlock(d);
569 if ( r != 0 )
570 {
571 hap_set_allocation(d, 0, NULL);
572 rv = -ENOMEM;
573 goto out;
574 }
575 }
577 /* allocate P2m table */
578 if ( mode & PG_translate )
579 {
580 rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page);
581 if ( rv != 0 )
582 goto out;
583 }
585 d->arch.paging.mode = mode | PG_HAP_enable;
587 out:
588 domain_unpause(d);
589 return rv;
590 }
592 void hap_final_teardown(struct domain *d)
593 {
594 if ( d->arch.paging.hap.total_pages != 0 )
595 hap_teardown(d);
597 p2m_teardown(d);
598 ASSERT(d->arch.paging.hap.p2m_pages == 0);
599 }
601 void hap_teardown(struct domain *d)
602 {
603 struct vcpu *v;
604 mfn_t mfn;
606 ASSERT(d->is_dying);
607 ASSERT(d != current->domain);
609 if ( !hap_locked_by_me(d) )
610 hap_lock(d); /* Keep various asserts happy */
612 if ( paging_mode_enabled(d) )
613 {
614 /* release the monitor table held by each vcpu */
615 for_each_vcpu ( d, v )
616 {
617 if ( v->arch.paging.mode && paging_mode_external(d) )
618 {
619 mfn = pagetable_get_mfn(v->arch.monitor_table);
620 if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
621 hap_destroy_monitor_table(v, mfn);
622 v->arch.monitor_table = pagetable_null();
623 }
624 }
625 }
627 if ( d->arch.paging.hap.total_pages != 0 )
628 {
629 HAP_PRINTK("teardown of domain %u starts."
630 " pages total = %u, free = %u, p2m=%u\n",
631 d->domain_id,
632 d->arch.paging.hap.total_pages,
633 d->arch.paging.hap.free_pages,
634 d->arch.paging.hap.p2m_pages);
635 hap_set_allocation(d, 0, NULL);
636 HAP_PRINTK("teardown done."
637 " pages total = %u, free = %u, p2m=%u\n",
638 d->arch.paging.hap.total_pages,
639 d->arch.paging.hap.free_pages,
640 d->arch.paging.hap.p2m_pages);
641 ASSERT(d->arch.paging.hap.total_pages == 0);
642 }
644 d->arch.paging.mode &= ~PG_log_dirty;
646 hap_unlock(d);
647 }
649 int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
650 XEN_GUEST_HANDLE(void) u_domctl)
651 {
652 int rc, preempted = 0;
654 switch ( sc->op )
655 {
656 case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
657 hap_lock(d);
658 rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
659 hap_unlock(d);
660 if ( preempted )
661 /* Not finished. Set up to re-run the call. */
662 rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h",
663 u_domctl);
664 else
665 /* Finished. Return the new allocation */
666 sc->mb = hap_get_allocation(d);
667 return rc;
668 case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
669 sc->mb = hap_get_allocation(d);
670 return 0;
671 default:
672 HAP_ERROR("Bad hap domctl op %u\n", sc->op);
673 return -EINVAL;
674 }
675 }
677 void hap_vcpu_init(struct vcpu *v)
678 {
679 v->arch.paging.mode = &hap_paging_real_mode;
680 }
682 /************************************************/
683 /* HAP PAGING MODE FUNCTIONS */
684 /************************************************/
685 /*
686 * HAP guests can handle page faults (in the guest page tables) without
687 * needing any action from Xen, so we should not be intercepting them.
688 */
689 static int hap_page_fault(struct vcpu *v, unsigned long va,
690 struct cpu_user_regs *regs)
691 {
692 HAP_ERROR("Intercepted a guest #PF (%u:%u) with HAP enabled.\n",
693 v->domain->domain_id, v->vcpu_id);
694 domain_crash(v->domain);
695 return 0;
696 }
698 /*
699 * HAP guests can handle invlpg without needing any action from Xen, so
700 * should not be intercepting it.
701 */
702 static int hap_invlpg(struct vcpu *v, unsigned long va)
703 {
704 HAP_ERROR("Intercepted a guest INVLPG (%u:%u) with HAP enabled.\n",
705 v->domain->domain_id, v->vcpu_id);
706 domain_crash(v->domain);
707 return 0;
708 }
710 static void hap_update_cr3(struct vcpu *v, int do_locking)
711 {
712 v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3];
713 hvm_update_guest_cr(v, 3);
714 }
716 static void hap_update_paging_modes(struct vcpu *v)
717 {
718 struct domain *d = v->domain;
720 hap_lock(d);
722 v->arch.paging.mode =
723 !hvm_paging_enabled(v) ? &hap_paging_real_mode :
724 hvm_long_mode_enabled(v) ? &hap_paging_long_mode :
725 hvm_pae_enabled(v) ? &hap_paging_pae_mode :
726 &hap_paging_protected_mode;
728 if ( pagetable_is_null(v->arch.monitor_table) )
729 {
730 mfn_t mmfn = hap_make_monitor_table(v);
731 v->arch.monitor_table = pagetable_from_mfn(mmfn);
732 make_cr3(v, mfn_x(mmfn));
733 hvm_update_host_cr3(v);
734 }
736 /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
737 hap_update_cr3(v, 0);
739 hap_unlock(d);
740 }
742 #if CONFIG_PAGING_LEVELS == 3
743 static void p2m_install_entry_in_monitors(struct domain *d, l3_pgentry_t *l3e)
744 /* Special case, only used for PAE hosts: update the mapping of the p2m
745 * table. This is trivial in other paging modes (one top-level entry
746 * points to the top-level p2m, no maintenance needed), but PAE makes
747 * life difficult by needing a copy of the p2m table in eight l2h slots
748 * in the monitor table. This function makes fresh copies when a p2m
749 * l3e changes. */
750 {
751 l2_pgentry_t *ml2e;
752 struct vcpu *v;
753 unsigned int index;
755 index = ((unsigned long)l3e & ~PAGE_MASK) / sizeof(l3_pgentry_t);
756 ASSERT(index < MACHPHYS_MBYTES>>1);
758 for_each_vcpu ( d, v )
759 {
760 if ( pagetable_get_pfn(v->arch.monitor_table) == 0 )
761 continue;
763 ASSERT(paging_mode_external(v->domain));
765 if ( v == current ) /* OK to use linear map of monitor_table */
766 ml2e = __linear_l2_table + l2_linear_offset(RO_MPT_VIRT_START);
767 else {
768 l3_pgentry_t *ml3e;
769 ml3e = hap_map_domain_page(
770 pagetable_get_mfn(v->arch.monitor_table));
771 ASSERT(l3e_get_flags(ml3e[3]) & _PAGE_PRESENT);
772 ml2e = hap_map_domain_page(_mfn(l3e_get_pfn(ml3e[3])));
773 ml2e += l2_table_offset(RO_MPT_VIRT_START);
774 hap_unmap_domain_page(ml3e);
775 }
776 ml2e[index] = l2e_from_pfn(l3e_get_pfn(*l3e), __PAGE_HYPERVISOR);
777 if ( v != current )
778 hap_unmap_domain_page(ml2e);
779 }
780 }
781 #endif
783 static void
784 hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
785 mfn_t table_mfn, l1_pgentry_t new, unsigned int level)
786 {
787 uint32_t old_flags;
789 hap_lock(v->domain);
791 old_flags = l1e_get_flags(*p);
792 safe_write_pte(p, new);
793 if ( (old_flags & _PAGE_PRESENT)
794 && (level == 1 || (level == 2 && (old_flags & _PAGE_PSE))) )
795 flush_tlb_mask(&v->domain->domain_dirty_cpumask);
797 #if CONFIG_PAGING_LEVELS == 3
798 /* install P2M in monitor table for PAE Xen */
799 if ( level == 3 )
800 /* We have written to the p2m l3: need to sync the per-vcpu
801 * copies of it in the monitor tables */
802 p2m_install_entry_in_monitors(v->domain, (l3_pgentry_t *)p);
803 #endif
805 hap_unlock(v->domain);
806 }
808 static unsigned long hap_gva_to_gfn_real_mode(
809 struct vcpu *v, unsigned long gva, uint32_t *pfec)
810 {
811 return ((paddr_t)gva >> PAGE_SHIFT);
812 }
814 /* Entry points into this mode of the hap code. */
815 struct paging_mode hap_paging_real_mode = {
816 .page_fault = hap_page_fault,
817 .invlpg = hap_invlpg,
818 .gva_to_gfn = hap_gva_to_gfn_real_mode,
819 .update_cr3 = hap_update_cr3,
820 .update_paging_modes = hap_update_paging_modes,
821 .write_p2m_entry = hap_write_p2m_entry,
822 .guest_levels = 1
823 };
825 struct paging_mode hap_paging_protected_mode = {
826 .page_fault = hap_page_fault,
827 .invlpg = hap_invlpg,
828 .gva_to_gfn = hap_gva_to_gfn_2level,
829 .update_cr3 = hap_update_cr3,
830 .update_paging_modes = hap_update_paging_modes,
831 .write_p2m_entry = hap_write_p2m_entry,
832 .guest_levels = 2
833 };
835 struct paging_mode hap_paging_pae_mode = {
836 .page_fault = hap_page_fault,
837 .invlpg = hap_invlpg,
838 .gva_to_gfn = hap_gva_to_gfn_3level,
839 .update_cr3 = hap_update_cr3,
840 .update_paging_modes = hap_update_paging_modes,
841 .write_p2m_entry = hap_write_p2m_entry,
842 .guest_levels = 3
843 };
845 struct paging_mode hap_paging_long_mode = {
846 .page_fault = hap_page_fault,
847 .invlpg = hap_invlpg,
848 .gva_to_gfn = hap_gva_to_gfn_4level,
849 .update_cr3 = hap_update_cr3,
850 .update_paging_modes = hap_update_paging_modes,
851 .write_p2m_entry = hap_write_p2m_entry,
852 .guest_levels = 4
853 };
855 /*
856 * Local variables:
857 * mode: C
858 * c-set-style: "BSD"
859 * c-basic-offset: 4
860 * indent-tabs-mode: nil
861 * End:
862 */