ia64/xen-unstable

view xen/arch/x86/mm/paging.c @ 19496:6dc5c26a75b9

x86: A further fix to paging_log_dirty_op() -- do not unmap_domain_page(NULL)

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Apr 02 07:53:11 2009 +0100 (2009-04-02)
parents 2491158ccd17
children 43833a6d50a5
line source
1 /******************************************************************************
2 * arch/x86/paging.c
3 *
4 * x86 specific paging support
5 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6 * Copyright (c) 2007 XenSource Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
23 #include <xen/init.h>
24 #include <asm/paging.h>
25 #include <asm/shadow.h>
26 #include <asm/p2m.h>
27 #include <asm/hap.h>
28 #include <asm/guest_access.h>
29 #include <xen/numa.h>
30 #include <xsm/xsm.h>
32 #define hap_enabled(d) (is_hvm_domain(d) && (d)->arch.hvm_domain.hap_enabled)
34 /* Printouts */
35 #define PAGING_PRINTK(_f, _a...) \
36 debugtrace_printk("pg: %s(): " _f, __func__, ##_a)
37 #define PAGING_ERROR(_f, _a...) \
38 printk("pg error: %s(): " _f, __func__, ##_a)
39 #define PAGING_DEBUG(flag, _f, _a...) \
40 do { \
41 if (PAGING_DEBUG_ ## flag) \
42 debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \
43 } while (0)
45 /************************************************/
46 /* LOG DIRTY SUPPORT */
47 /************************************************/
48 /* Override macros from asm/page.h to make them work with mfn_t */
49 #undef mfn_to_page
50 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
51 #undef mfn_valid
52 #define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
53 #undef page_to_mfn
54 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
56 /* The log-dirty lock. This protects the log-dirty bitmap from
57 * concurrent accesses (and teardowns, etc).
58 *
59 * Locking discipline: always acquire shadow or HAP lock before this one.
60 *
61 * Because mark_dirty is called from a lot of places, the log-dirty lock
62 * may be acquired with the shadow or HAP locks already held. When the
63 * log-dirty code makes callbacks into HAP or shadow code to reset
64 * various traps that will trigger the mark_dirty calls, it must *not*
65 * have the log-dirty lock held, or it risks deadlock. Because the only
66 * purpose of those calls is to make sure that *guest* actions will
67 * cause mark_dirty to be called (hypervisor actions explictly call it
68 * anyway), it is safe to release the log-dirty lock before the callback
69 * as long as the domain is paused for the entire operation. */
71 #define log_dirty_lock_init(_d) \
72 do { \
73 spin_lock_init(&(_d)->arch.paging.log_dirty.lock); \
74 (_d)->arch.paging.log_dirty.locker = -1; \
75 (_d)->arch.paging.log_dirty.locker_function = "nobody"; \
76 } while (0)
78 #define log_dirty_lock(_d) \
79 do { \
80 if (unlikely((_d)->arch.paging.log_dirty.locker==current->processor))\
81 { \
82 printk("Error: paging log dirty lock held by %s\n", \
83 (_d)->arch.paging.log_dirty.locker_function); \
84 BUG(); \
85 } \
86 spin_lock(&(_d)->arch.paging.log_dirty.lock); \
87 ASSERT((_d)->arch.paging.log_dirty.locker == -1); \
88 (_d)->arch.paging.log_dirty.locker = current->processor; \
89 (_d)->arch.paging.log_dirty.locker_function = __func__; \
90 } while (0)
92 #define log_dirty_unlock(_d) \
93 do { \
94 ASSERT((_d)->arch.paging.log_dirty.locker == current->processor); \
95 (_d)->arch.paging.log_dirty.locker = -1; \
96 (_d)->arch.paging.log_dirty.locker_function = "nobody"; \
97 spin_unlock(&(_d)->arch.paging.log_dirty.lock); \
98 } while (0)
100 static mfn_t paging_new_log_dirty_page(struct domain *d, void **mapping_p)
101 {
102 mfn_t mfn;
103 struct page_info *page;
105 page = alloc_domheap_page(NULL, MEMF_node(domain_to_node(d)));
106 if ( unlikely(page == NULL) )
107 {
108 d->arch.paging.log_dirty.failed_allocs++;
109 return _mfn(INVALID_MFN);
110 }
112 d->arch.paging.log_dirty.allocs++;
113 mfn = page_to_mfn(page);
114 *mapping_p = map_domain_page(mfn_x(mfn));
116 return mfn;
117 }
119 static mfn_t paging_new_log_dirty_leaf(
120 struct domain *d, unsigned long **leaf_p)
121 {
122 mfn_t mfn = paging_new_log_dirty_page(d, (void **)leaf_p);
123 if ( mfn_valid(mfn) )
124 clear_page(*leaf_p);
125 return mfn;
126 }
128 static mfn_t paging_new_log_dirty_node(struct domain *d, mfn_t **node_p)
129 {
130 int i;
131 mfn_t mfn = paging_new_log_dirty_page(d, (void **)node_p);
132 if ( mfn_valid(mfn) )
133 for ( i = 0; i < LOGDIRTY_NODE_ENTRIES; i++ )
134 (*node_p)[i] = _mfn(INVALID_MFN);
135 return mfn;
136 }
138 int paging_alloc_log_dirty_bitmap(struct domain *d)
139 {
140 mfn_t *mapping;
142 if ( mfn_valid(d->arch.paging.log_dirty.top) )
143 return 0;
145 d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d, &mapping);
146 if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
147 {
148 /* Clear error indicator since we're reporting this one */
149 d->arch.paging.log_dirty.failed_allocs = 0;
150 return -ENOMEM;
151 }
152 unmap_domain_page(mapping);
154 return 0;
155 }
157 static void paging_free_log_dirty_page(struct domain *d, mfn_t mfn)
158 {
159 d->arch.paging.log_dirty.allocs--;
160 free_domheap_page(mfn_to_page(mfn));
161 }
163 void paging_free_log_dirty_bitmap(struct domain *d)
164 {
165 mfn_t *l4, *l3, *l2;
166 int i4, i3, i2;
168 if ( !mfn_valid(d->arch.paging.log_dirty.top) )
169 return;
171 dprintk(XENLOG_DEBUG, "%s: used %d pages for domain %d dirty logging\n",
172 __FUNCTION__, d->arch.paging.log_dirty.allocs, d->domain_id);
174 l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
176 for ( i4 = 0; i4 < LOGDIRTY_NODE_ENTRIES; i4++ )
177 {
178 if ( !mfn_valid(l4[i4]) )
179 continue;
181 l3 = map_domain_page(mfn_x(l4[i4]));
183 for ( i3 = 0; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
184 {
185 if ( !mfn_valid(l3[i3]) )
186 continue;
188 l2 = map_domain_page(mfn_x(l3[i3]));
190 for ( i2 = 0; i2 < LOGDIRTY_NODE_ENTRIES; i2++ )
191 if ( mfn_valid(l2[i2]) )
192 paging_free_log_dirty_page(d, l2[i2]);
194 unmap_domain_page(l2);
195 paging_free_log_dirty_page(d, l3[i3]);
196 }
198 unmap_domain_page(l3);
199 paging_free_log_dirty_page(d, l4[i4]);
200 }
202 unmap_domain_page(l4);
203 paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
205 d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
206 ASSERT(d->arch.paging.log_dirty.allocs == 0);
207 d->arch.paging.log_dirty.failed_allocs = 0;
208 }
210 int paging_log_dirty_enable(struct domain *d)
211 {
212 int ret;
214 domain_pause(d);
215 log_dirty_lock(d);
217 if ( paging_mode_log_dirty(d) )
218 {
219 ret = -EINVAL;
220 goto out;
221 }
223 ret = paging_alloc_log_dirty_bitmap(d);
224 if ( ret != 0 )
225 {
226 paging_free_log_dirty_bitmap(d);
227 goto out;
228 }
230 log_dirty_unlock(d);
232 /* Safe because the domain is paused. */
233 ret = d->arch.paging.log_dirty.enable_log_dirty(d);
235 /* Possibility of leaving the bitmap allocated here but it'll be
236 * tidied on domain teardown. */
238 domain_unpause(d);
239 return ret;
241 out:
242 log_dirty_unlock(d);
243 domain_unpause(d);
244 return ret;
245 }
247 int paging_log_dirty_disable(struct domain *d)
248 {
249 int ret;
251 domain_pause(d);
252 /* Safe because the domain is paused. */
253 ret = d->arch.paging.log_dirty.disable_log_dirty(d);
254 log_dirty_lock(d);
255 if ( !paging_mode_log_dirty(d) )
256 paging_free_log_dirty_bitmap(d);
257 log_dirty_unlock(d);
258 domain_unpause(d);
260 return ret;
261 }
263 /* Mark a page as dirty */
264 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
265 {
266 unsigned long pfn;
267 mfn_t gmfn;
268 int changed;
269 mfn_t mfn, *l4, *l3, *l2;
270 unsigned long *l1;
271 int i1, i2, i3, i4;
273 gmfn = _mfn(guest_mfn);
275 if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) )
276 return;
278 log_dirty_lock(d);
280 ASSERT(mfn_valid(d->arch.paging.log_dirty.top));
282 /* We /really/ mean PFN here, even for non-translated guests. */
283 pfn = get_gpfn_from_mfn(mfn_x(gmfn));
285 /*
286 * Values with the MSB set denote MFNs that aren't really part of the
287 * domain's pseudo-physical memory map (e.g., the shared info frame).
288 * Nothing to do here...
289 */
290 if ( unlikely(!VALID_M2P(pfn)) )
291 goto out;
293 i1 = L1_LOGDIRTY_IDX(pfn);
294 i2 = L2_LOGDIRTY_IDX(pfn);
295 i3 = L3_LOGDIRTY_IDX(pfn);
296 i4 = L4_LOGDIRTY_IDX(pfn);
298 l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
299 mfn = l4[i4];
300 if ( !mfn_valid(mfn) )
301 mfn = l4[i4] = paging_new_log_dirty_node(d, &l3);
302 else
303 l3 = map_domain_page(mfn_x(mfn));
304 unmap_domain_page(l4);
305 if ( unlikely(!mfn_valid(mfn)) )
306 goto out;
308 mfn = l3[i3];
309 if ( !mfn_valid(mfn) )
310 mfn = l3[i3] = paging_new_log_dirty_node(d, &l2);
311 else
312 l2 = map_domain_page(mfn_x(mfn));
313 unmap_domain_page(l3);
314 if ( unlikely(!mfn_valid(mfn)) )
315 goto out;
317 mfn = l2[i2];
318 if ( !mfn_valid(mfn) )
319 mfn = l2[i2] = paging_new_log_dirty_leaf(d, &l1);
320 else
321 l1 = map_domain_page(mfn_x(mfn));
322 unmap_domain_page(l2);
323 if ( unlikely(!mfn_valid(mfn)) )
324 goto out;
326 changed = !__test_and_set_bit(i1, l1);
327 unmap_domain_page(l1);
328 if ( changed )
329 {
330 PAGING_DEBUG(LOGDIRTY,
331 "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
332 mfn_x(gmfn), pfn, d->domain_id);
333 d->arch.paging.log_dirty.dirty_count++;
334 }
336 out:
337 log_dirty_unlock(d);
338 }
340 /* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN,
341 * clear the bitmap and stats as well. */
342 int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
343 {
344 int rv = 0, clean = 0, peek = 1;
345 unsigned long pages = 0;
346 mfn_t *l4, *l3, *l2;
347 unsigned long *l1;
348 int i4, i3, i2;
350 domain_pause(d);
351 log_dirty_lock(d);
353 clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
355 PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
356 (clean) ? "clean" : "peek",
357 d->domain_id,
358 d->arch.paging.log_dirty.fault_count,
359 d->arch.paging.log_dirty.dirty_count);
361 sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
362 sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
364 if ( clean )
365 {
366 d->arch.paging.log_dirty.fault_count = 0;
367 d->arch.paging.log_dirty.dirty_count = 0;
368 }
370 if ( guest_handle_is_null(sc->dirty_bitmap) )
371 /* caller may have wanted just to clean the state or access stats. */
372 peek = 0;
374 if ( (peek || clean) && !mfn_valid(d->arch.paging.log_dirty.top) )
375 {
376 rv = -EINVAL; /* perhaps should be ENOMEM? */
377 goto out;
378 }
380 if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
381 printk("%s: %d failed page allocs while logging dirty pages\n",
382 __FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
383 rv = -ENOMEM;
384 goto out;
385 }
387 pages = 0;
388 l4 = (mfn_valid(d->arch.paging.log_dirty.top) ?
389 map_domain_page(mfn_x(d->arch.paging.log_dirty.top)) : NULL);
391 for ( i4 = 0;
392 (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES);
393 i4++ )
394 {
395 l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL;
396 for ( i3 = 0;
397 (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES);
398 i3++ )
399 {
400 l2 = ((l3 && mfn_valid(l3[i3])) ?
401 map_domain_page(mfn_x(l3[i3])) : NULL);
402 for ( i2 = 0;
403 (pages < sc->pages) && (i2 < LOGDIRTY_NODE_ENTRIES);
404 i2++ )
405 {
406 static unsigned long zeroes[PAGE_SIZE/BYTES_PER_LONG];
407 unsigned int bytes = PAGE_SIZE;
408 l1 = ((l2 && mfn_valid(l2[i2])) ?
409 map_domain_page(mfn_x(l2[i2])) : zeroes);
410 if ( unlikely(((sc->pages - pages + 7) >> 3) < bytes) )
411 bytes = (unsigned int)((sc->pages - pages + 7) >> 3);
412 if ( likely(peek) )
413 {
414 if ( copy_to_guest_offset(sc->dirty_bitmap, pages >> 3,
415 (uint8_t *)l1, bytes) != 0 )
416 {
417 rv = -EFAULT;
418 goto out;
419 }
420 }
421 if ( clean && l1 != zeroes )
422 clear_page(l1);
423 pages += bytes << 3;
424 if ( l1 != zeroes )
425 unmap_domain_page(l1);
426 }
427 if ( l2 )
428 unmap_domain_page(l2);
429 }
430 if ( l3 )
431 unmap_domain_page(l3);
432 }
433 if ( l4 )
434 unmap_domain_page(l4);
436 if ( pages < sc->pages )
437 sc->pages = pages;
439 log_dirty_unlock(d);
441 if ( clean )
442 {
443 /* We need to further call clean_dirty_bitmap() functions of specific
444 * paging modes (shadow or hap). Safe because the domain is paused. */
445 d->arch.paging.log_dirty.clean_dirty_bitmap(d);
446 }
447 domain_unpause(d);
448 return rv;
450 out:
451 log_dirty_unlock(d);
452 domain_unpause(d);
453 return rv;
454 }
457 /* Note that this function takes three function pointers. Callers must supply
458 * these functions for log dirty code to call. This function usually is
459 * invoked when paging is enabled. Check shadow_enable() and hap_enable() for
460 * reference.
461 *
462 * These function pointers must not be followed with the log-dirty lock held.
463 */
464 void paging_log_dirty_init(struct domain *d,
465 int (*enable_log_dirty)(struct domain *d),
466 int (*disable_log_dirty)(struct domain *d),
467 void (*clean_dirty_bitmap)(struct domain *d))
468 {
469 /* We initialize log dirty lock first */
470 log_dirty_lock_init(d);
472 d->arch.paging.log_dirty.enable_log_dirty = enable_log_dirty;
473 d->arch.paging.log_dirty.disable_log_dirty = disable_log_dirty;
474 d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
475 d->arch.paging.log_dirty.top = _mfn(INVALID_MFN);
476 }
478 /* This function fress log dirty bitmap resources. */
479 void paging_log_dirty_teardown(struct domain*d)
480 {
481 log_dirty_lock(d);
482 paging_free_log_dirty_bitmap(d);
483 log_dirty_unlock(d);
484 }
485 /************************************************/
486 /* CODE FOR PAGING SUPPORT */
487 /************************************************/
488 /* Domain paging struct initialization. */
489 int paging_domain_init(struct domain *d)
490 {
491 int rc;
493 if ( (rc = p2m_init(d)) != 0 )
494 return rc;
496 /* The order of the *_init calls below is important, as the later
497 * ones may rewrite some common fields. Shadow pagetables are the
498 * default... */
499 shadow_domain_init(d);
501 /* ... but we will use hardware assistance if it's available. */
502 if ( hap_enabled(d) )
503 hap_domain_init(d);
505 return 0;
506 }
508 /* vcpu paging struct initialization goes here */
509 void paging_vcpu_init(struct vcpu *v)
510 {
511 if ( hap_enabled(v->domain) )
512 hap_vcpu_init(v);
513 else
514 shadow_vcpu_init(v);
515 }
518 int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
519 XEN_GUEST_HANDLE(void) u_domctl)
520 {
521 int rc;
523 if ( unlikely(d == current->domain) )
524 {
525 gdprintk(XENLOG_INFO, "Tried to do a paging op on itself.\n");
526 return -EINVAL;
527 }
529 if ( unlikely(d->is_dying) )
530 {
531 gdprintk(XENLOG_INFO, "Ignoring paging op on dying domain %u\n",
532 d->domain_id);
533 return 0;
534 }
536 if ( unlikely(d->vcpu[0] == NULL) )
537 {
538 PAGING_ERROR("Paging op on a domain (%u) with no vcpus\n",
539 d->domain_id);
540 return -EINVAL;
541 }
543 rc = xsm_shadow_control(d, sc->op);
544 if ( rc )
545 return rc;
547 /* Code to handle log-dirty. Note that some log dirty operations
548 * piggy-back on shadow operations. For example, when
549 * XEN_DOMCTL_SHADOW_OP_OFF is called, it first checks whether log dirty
550 * mode is enabled. If does, we disables log dirty and continues with
551 * shadow code. For this reason, we need to further dispatch domctl
552 * to next-level paging code (shadow or hap).
553 */
554 switch ( sc->op )
555 {
556 case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
557 return paging_log_dirty_enable(d);
559 case XEN_DOMCTL_SHADOW_OP_ENABLE:
560 if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
561 return paging_log_dirty_enable(d);
563 case XEN_DOMCTL_SHADOW_OP_OFF:
564 if ( paging_mode_log_dirty(d) )
565 if ( (rc = paging_log_dirty_disable(d)) != 0 )
566 return rc;
568 case XEN_DOMCTL_SHADOW_OP_CLEAN:
569 case XEN_DOMCTL_SHADOW_OP_PEEK:
570 return paging_log_dirty_op(d, sc);
571 }
573 /* Here, dispatch domctl to the appropriate paging code */
574 if ( hap_enabled(d) )
575 return hap_domctl(d, sc, u_domctl);
576 else
577 return shadow_domctl(d, sc, u_domctl);
578 }
580 /* Call when destroying a domain */
581 void paging_teardown(struct domain *d)
582 {
583 if ( hap_enabled(d) )
584 hap_teardown(d);
585 else
586 shadow_teardown(d);
588 /* clean up log dirty resources. */
589 paging_log_dirty_teardown(d);
591 /* Move populate-on-demand cache back to domain_list for destruction */
592 p2m_pod_empty_cache(d);
593 }
595 /* Call once all of the references to the domain have gone away */
596 void paging_final_teardown(struct domain *d)
597 {
598 if ( hap_enabled(d) )
599 hap_final_teardown(d);
600 else
601 shadow_final_teardown(d);
603 p2m_final_teardown(d);
604 }
606 /* Enable an arbitrary paging-assistance mode. Call once at domain
607 * creation. */
608 int paging_enable(struct domain *d, u32 mode)
609 {
610 if ( hap_enabled(d) )
611 return hap_enable(d, mode | PG_HAP_enable);
612 else
613 return shadow_enable(d, mode | PG_SH_enable);
614 }
616 /* Print paging-assistance info to the console */
617 void paging_dump_domain_info(struct domain *d)
618 {
619 if ( paging_mode_enabled(d) )
620 {
621 printk(" paging assistance: ");
622 if ( paging_mode_shadow(d) )
623 printk("shadow ");
624 if ( paging_mode_hap(d) )
625 printk("hap ");
626 if ( paging_mode_refcounts(d) )
627 printk("refcounts ");
628 if ( paging_mode_log_dirty(d) )
629 printk("log_dirty ");
630 if ( paging_mode_translate(d) )
631 printk("translate ");
632 if ( paging_mode_external(d) )
633 printk("external ");
634 printk("\n");
635 }
636 }
638 void paging_dump_vcpu_info(struct vcpu *v)
639 {
640 if ( paging_mode_enabled(v->domain) )
641 {
642 printk(" paging assistance: ");
643 if ( paging_mode_shadow(v->domain) )
644 {
645 if ( v->arch.paging.mode )
646 printk("shadowed %u-on-%u\n",
647 v->arch.paging.mode->guest_levels,
648 v->arch.paging.mode->shadow.shadow_levels);
649 else
650 printk("not shadowed\n");
651 }
652 else if ( paging_mode_hap(v->domain) && v->arch.paging.mode )
653 printk("hap, %u levels\n",
654 v->arch.paging.mode->guest_levels);
655 else
656 printk("none\n");
657 }
658 }
661 /*
662 * Local variables:
663 * mode: C
664 * c-set-style: "BSD"
665 * c-basic-offset: 4
666 * indent-tabs-mode: nil
667 * End:
668 */