ia64/xen-unstable

view xen/arch/x86/mm/paging.c @ 15812:86a154e1ef5d

[HVM] Shadow: don't shadow the p2m table.
For HVM vcpus with paging disabled, we used to shadow the p2m table,
and skip the p2m lookup to go from gfn to mfn. Instead, we now
provide a simple pagetable that gives a one-to-one mapping of 4GB, and
shadow that, making the translations from gfn to mfn via the p2m.
This removes the paging-disabled special-case code from the shadow
fault handler, and allows us to expand the p2m interface, since all HVM
translations now go through the same p2m lookups.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Fri Aug 31 11:06:22 2007 +0100 (2007-08-31)
parents 7953164cebb6
children 96f64f4c42f0
line source
1 /******************************************************************************
2 * arch/x86/paging.c
3 *
4 * x86 specific paging support
5 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6 * Copyright (c) 2007 XenSource Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
23 #include <xen/init.h>
24 #include <asm/paging.h>
25 #include <asm/shadow.h>
26 #include <asm/p2m.h>
27 #include <asm/hap.h>
28 #include <asm/guest_access.h>
30 /* Xen command-line option to enable hardware-assisted paging */
31 int opt_hap_enabled;
32 boolean_param("hap", opt_hap_enabled);
34 /* Printouts */
35 #define PAGING_PRINTK(_f, _a...) \
36 debugtrace_printk("pg: %s(): " _f, __func__, ##_a)
37 #define PAGING_ERROR(_f, _a...) \
38 printk("pg error: %s(): " _f, __func__, ##_a)
39 #define PAGING_DEBUG(flag, _f, _a...) \
40 do { \
41 if (PAGING_DEBUG_ ## flag) \
42 debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \
43 } while (0)
45 /************************************************/
46 /* LOG DIRTY SUPPORT */
47 /************************************************/
48 /* Override macros from asm/page.h to make them work with mfn_t */
49 #undef mfn_to_page
50 #define mfn_to_page(_m) (frame_table + mfn_x(_m))
51 #undef mfn_valid
52 #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
53 #undef page_to_mfn
54 #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
56 /* The log-dirty lock. This protects the log-dirty bitmap from
57 * concurrent accesses (and teardowns, etc).
58 *
59 * Locking discipline: always acquire shadow or HAP lock before this one.
60 *
61 * Because mark_dirty is called from a lot of places, the log-dirty lock
62 * may be acquired with the shadow or HAP locks already held. When the
63 * log-dirty code makes callbacks into HAP or shadow code to reset
64 * various traps that will trigger the mark_dirty calls, it must *not*
65 * have the log-dirty lock held, or it risks deadlock. Because the only
66 * purpose of those calls is to make sure that *guest* actions will
67 * cause mark_dirty to be called (hypervisor actions explictly call it
68 * anyway), it is safe to release the log-dirty lock before the callback
69 * as long as the domain is paused for the entire operation. */
71 #define log_dirty_lock_init(_d) \
72 do { \
73 spin_lock_init(&(_d)->arch.paging.log_dirty.lock); \
74 (_d)->arch.paging.log_dirty.locker = -1; \
75 (_d)->arch.paging.log_dirty.locker_function = "nobody"; \
76 } while (0)
78 #define log_dirty_lock(_d) \
79 do { \
80 if (unlikely((_d)->arch.paging.log_dirty.locker==current->processor))\
81 { \
82 printk("Error: paging log dirty lock held by %s\n", \
83 (_d)->arch.paging.log_dirty.locker_function); \
84 BUG(); \
85 } \
86 spin_lock(&(_d)->arch.paging.log_dirty.lock); \
87 ASSERT((_d)->arch.paging.log_dirty.locker == -1); \
88 (_d)->arch.paging.log_dirty.locker = current->processor; \
89 (_d)->arch.paging.log_dirty.locker_function = __func__; \
90 } while (0)
92 #define log_dirty_unlock(_d) \
93 do { \
94 ASSERT((_d)->arch.paging.log_dirty.locker == current->processor); \
95 (_d)->arch.paging.log_dirty.locker = -1; \
96 (_d)->arch.paging.log_dirty.locker_function = "nobody"; \
97 spin_unlock(&(_d)->arch.paging.log_dirty.lock); \
98 } while (0)
100 /* allocate bitmap resources for log dirty */
101 int paging_alloc_log_dirty_bitmap(struct domain *d)
102 {
103 if ( d->arch.paging.log_dirty.bitmap != NULL )
104 return 0;
106 d->arch.paging.log_dirty.bitmap_size =
107 (domain_get_maximum_gpfn(d) + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
108 d->arch.paging.log_dirty.bitmap =
109 xmalloc_array(unsigned long,
110 d->arch.paging.log_dirty.bitmap_size / BITS_PER_LONG);
111 if ( d->arch.paging.log_dirty.bitmap == NULL )
112 {
113 d->arch.paging.log_dirty.bitmap_size = 0;
114 return -ENOMEM;
115 }
116 memset(d->arch.paging.log_dirty.bitmap, 0,
117 d->arch.paging.log_dirty.bitmap_size/8);
119 return 0;
120 }
122 /* free bitmap resources */
123 void paging_free_log_dirty_bitmap(struct domain *d)
124 {
125 d->arch.paging.log_dirty.bitmap_size = 0;
126 if ( d->arch.paging.log_dirty.bitmap )
127 {
128 xfree(d->arch.paging.log_dirty.bitmap);
129 d->arch.paging.log_dirty.bitmap = NULL;
130 }
131 }
133 int paging_log_dirty_enable(struct domain *d)
134 {
135 int ret;
137 domain_pause(d);
138 log_dirty_lock(d);
140 if ( paging_mode_log_dirty(d) )
141 {
142 ret = -EINVAL;
143 goto out;
144 }
146 ret = paging_alloc_log_dirty_bitmap(d);
147 if ( ret != 0 )
148 {
149 paging_free_log_dirty_bitmap(d);
150 goto out;
151 }
153 log_dirty_unlock(d);
155 /* Safe because the domain is paused. */
156 ret = d->arch.paging.log_dirty.enable_log_dirty(d);
158 /* Possibility of leaving the bitmap allocated here but it'll be
159 * tidied on domain teardown. */
161 domain_unpause(d);
162 return ret;
164 out:
165 log_dirty_unlock(d);
166 domain_unpause(d);
167 return ret;
168 }
170 int paging_log_dirty_disable(struct domain *d)
171 {
172 int ret;
174 domain_pause(d);
175 /* Safe because the domain is paused. */
176 ret = d->arch.paging.log_dirty.disable_log_dirty(d);
177 log_dirty_lock(d);
178 if ( !paging_mode_log_dirty(d) )
179 paging_free_log_dirty_bitmap(d);
180 log_dirty_unlock(d);
181 domain_unpause(d);
183 return ret;
184 }
186 /* Mark a page as dirty */
187 void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
188 {
189 unsigned long pfn;
190 mfn_t gmfn;
192 gmfn = _mfn(guest_mfn);
194 if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) )
195 return;
197 log_dirty_lock(d);
199 ASSERT(d->arch.paging.log_dirty.bitmap != NULL);
201 /* We /really/ mean PFN here, even for non-translated guests. */
202 pfn = get_gpfn_from_mfn(mfn_x(gmfn));
204 /*
205 * Values with the MSB set denote MFNs that aren't really part of the
206 * domain's pseudo-physical memory map (e.g., the shared info frame).
207 * Nothing to do here...
208 */
209 if ( unlikely(!VALID_M2P(pfn)) )
210 {
211 log_dirty_unlock(d);
212 return;
213 }
215 if ( likely(pfn < d->arch.paging.log_dirty.bitmap_size) )
216 {
217 if ( !__test_and_set_bit(pfn, d->arch.paging.log_dirty.bitmap) )
218 {
219 PAGING_DEBUG(LOGDIRTY,
220 "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
221 mfn_x(gmfn), pfn, d->domain_id);
222 d->arch.paging.log_dirty.dirty_count++;
223 }
224 }
225 else
226 {
227 PAGING_PRINTK("mark_dirty OOR! "
228 "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
229 "owner=%d c=%08x t=%" PRtype_info "\n",
230 mfn_x(gmfn),
231 pfn,
232 d->arch.paging.log_dirty.bitmap_size,
233 d->domain_id,
234 (page_get_owner(mfn_to_page(gmfn))
235 ? page_get_owner(mfn_to_page(gmfn))->domain_id
236 : -1),
237 mfn_to_page(gmfn)->count_info,
238 mfn_to_page(gmfn)->u.inuse.type_info);
239 }
241 log_dirty_unlock(d);
242 }
244 /* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN,
245 * clear the bitmap and stats as well. */
246 int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
247 {
248 int i, rv = 0, clean = 0, peek = 1;
250 domain_pause(d);
251 log_dirty_lock(d);
253 clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
255 PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
256 (clean) ? "clean" : "peek",
257 d->domain_id,
258 d->arch.paging.log_dirty.fault_count,
259 d->arch.paging.log_dirty.dirty_count);
261 sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
262 sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
264 if ( clean )
265 {
266 d->arch.paging.log_dirty.fault_count = 0;
267 d->arch.paging.log_dirty.dirty_count = 0;
268 }
270 if ( guest_handle_is_null(sc->dirty_bitmap) )
271 /* caller may have wanted just to clean the state or access stats. */
272 peek = 0;
274 if ( (peek || clean) && (d->arch.paging.log_dirty.bitmap == NULL) )
275 {
276 rv = -EINVAL; /* perhaps should be ENOMEM? */
277 goto out;
278 }
280 if ( sc->pages > d->arch.paging.log_dirty.bitmap_size )
281 sc->pages = d->arch.paging.log_dirty.bitmap_size;
283 #define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
284 for ( i = 0; i < sc->pages; i += CHUNK )
285 {
286 int bytes = ((((sc->pages - i) > CHUNK)
287 ? CHUNK
288 : (sc->pages - i)) + 7) / 8;
290 if ( likely(peek) )
291 {
292 if ( copy_to_guest_offset(
293 sc->dirty_bitmap, i/8,
294 (uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), bytes) )
295 {
296 rv = -EFAULT;
297 goto out;
298 }
299 }
301 if ( clean )
302 memset((uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), 0, bytes);
303 }
304 #undef CHUNK
306 log_dirty_unlock(d);
308 if ( clean )
309 {
310 /* We need to further call clean_dirty_bitmap() functions of specific
311 * paging modes (shadow or hap). Safe because the domain is paused. */
312 d->arch.paging.log_dirty.clean_dirty_bitmap(d);
313 }
314 domain_unpause(d);
315 return rv;
317 out:
318 log_dirty_unlock(d);
319 domain_unpause(d);
320 return rv;
321 }
324 /* Note that this function takes three function pointers. Callers must supply
325 * these functions for log dirty code to call. This function usually is
326 * invoked when paging is enabled. Check shadow_enable() and hap_enable() for
327 * reference.
328 *
329 * These function pointers must not be followed with the log-dirty lock held.
330 */
331 void paging_log_dirty_init(struct domain *d,
332 int (*enable_log_dirty)(struct domain *d),
333 int (*disable_log_dirty)(struct domain *d),
334 void (*clean_dirty_bitmap)(struct domain *d))
335 {
336 /* We initialize log dirty lock first */
337 log_dirty_lock_init(d);
339 d->arch.paging.log_dirty.enable_log_dirty = enable_log_dirty;
340 d->arch.paging.log_dirty.disable_log_dirty = disable_log_dirty;
341 d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
342 }
344 /* This function fress log dirty bitmap resources. */
345 void paging_log_dirty_teardown(struct domain*d)
346 {
347 log_dirty_lock(d);
348 paging_free_log_dirty_bitmap(d);
349 log_dirty_unlock(d);
350 }
351 /************************************************/
352 /* CODE FOR PAGING SUPPORT */
353 /************************************************/
354 /* Domain paging struct initialization. */
355 void paging_domain_init(struct domain *d)
356 {
357 p2m_init(d);
359 /* The order of the *_init calls below is important, as the later
360 * ones may rewrite some common fields. Shadow pagetables are the
361 * default... */
362 shadow_domain_init(d);
364 /* ... but we will use hardware assistance if it's available. */
365 if ( opt_hap_enabled && is_hvm_domain(d) )
366 hap_domain_init(d);
367 }
369 /* vcpu paging struct initialization goes here */
370 void paging_vcpu_init(struct vcpu *v)
371 {
372 if ( opt_hap_enabled && is_hvm_vcpu(v) )
373 hap_vcpu_init(v);
374 else
375 shadow_vcpu_init(v);
376 }
379 int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
380 XEN_GUEST_HANDLE(void) u_domctl)
381 {
382 int rc;
384 if ( unlikely(d == current->domain) )
385 {
386 gdprintk(XENLOG_INFO, "Dom %u tried to do a paging op on itself.\n",
387 d->domain_id);
388 return -EINVAL;
389 }
391 if ( unlikely(d->is_dying) )
392 {
393 gdprintk(XENLOG_INFO, "Ignoring paging op on dying domain %u\n",
394 d->domain_id);
395 return 0;
396 }
398 if ( unlikely(d->vcpu[0] == NULL) )
399 {
400 PAGING_ERROR("Paging op on a domain (%u) with no vcpus\n",
401 d->domain_id);
402 return -EINVAL;
403 }
405 /* Code to handle log-dirty. Note that some log dirty operations
406 * piggy-back on shadow operations. For example, when
407 * XEN_DOMCTL_SHADOW_OP_OFF is called, it first checks whether log dirty
408 * mode is enabled. If does, we disables log dirty and continues with
409 * shadow code. For this reason, we need to further dispatch domctl
410 * to next-level paging code (shadow or hap).
411 */
412 switch ( sc->op )
413 {
414 case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
415 return paging_log_dirty_enable(d);
417 case XEN_DOMCTL_SHADOW_OP_ENABLE:
418 if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
419 return paging_log_dirty_enable(d);
421 case XEN_DOMCTL_SHADOW_OP_OFF:
422 if ( paging_mode_log_dirty(d) )
423 if ( (rc = paging_log_dirty_disable(d)) != 0 )
424 return rc;
426 case XEN_DOMCTL_SHADOW_OP_CLEAN:
427 case XEN_DOMCTL_SHADOW_OP_PEEK:
428 return paging_log_dirty_op(d, sc);
429 }
431 /* Here, dispatch domctl to the appropriate paging code */
432 if ( opt_hap_enabled && is_hvm_domain(d) )
433 return hap_domctl(d, sc, u_domctl);
434 else
435 return shadow_domctl(d, sc, u_domctl);
436 }
438 /* Call when destroying a domain */
439 void paging_teardown(struct domain *d)
440 {
441 if ( opt_hap_enabled && is_hvm_domain(d) )
442 hap_teardown(d);
443 else
444 shadow_teardown(d);
446 /* clean up log dirty resources. */
447 paging_log_dirty_teardown(d);
448 }
450 /* Call once all of the references to the domain have gone away */
451 void paging_final_teardown(struct domain *d)
452 {
453 if ( opt_hap_enabled && is_hvm_domain(d) )
454 hap_final_teardown(d);
455 else
456 shadow_final_teardown(d);
457 }
459 /* Enable an arbitrary paging-assistance mode. Call once at domain
460 * creation. */
461 int paging_enable(struct domain *d, u32 mode)
462 {
463 if ( opt_hap_enabled && is_hvm_domain(d) )
464 return hap_enable(d, mode | PG_HAP_enable);
465 else
466 return shadow_enable(d, mode | PG_SH_enable);
467 }
469 /* Print paging-assistance info to the console */
470 void paging_dump_domain_info(struct domain *d)
471 {
472 if ( paging_mode_enabled(d) )
473 {
474 printk(" paging assistance: ");
475 if ( paging_mode_shadow(d) )
476 printk("shadow ");
477 if ( paging_mode_hap(d) )
478 printk("hap ");
479 if ( paging_mode_refcounts(d) )
480 printk("refcounts ");
481 if ( paging_mode_log_dirty(d) )
482 printk("log_dirty ");
483 if ( paging_mode_translate(d) )
484 printk("translate ");
485 if ( paging_mode_external(d) )
486 printk("external ");
487 printk("\n");
488 }
489 }
491 void paging_dump_vcpu_info(struct vcpu *v)
492 {
493 if ( paging_mode_enabled(v->domain) )
494 {
495 printk(" paging assistance: ");
496 if ( paging_mode_shadow(v->domain) )
497 {
498 if ( v->arch.paging.mode )
499 printk("shadowed %u-on-%u\n",
500 v->arch.paging.mode->guest_levels,
501 v->arch.paging.mode->shadow.shadow_levels);
502 else
503 printk("not shadowed\n");
504 }
505 else if ( paging_mode_hap(v->domain) && v->arch.paging.mode )
506 printk("hap, %u levels\n",
507 v->arch.paging.mode->guest_levels);
508 else
509 printk("none\n");
510 }
511 }
514 /*
515 * Local variables:
516 * mode: C
517 * c-set-style: "BSD"
518 * c-basic-offset: 4
519 * indent-tabs-mode: nil
520 * End:
521 */