ia64/xen-unstable

view xen/arch/x86/mm/shadow/private.h @ 12564:2fd223c64fc6

[XEN] Pin l3 shadows of older x86_64 linux guests.
Older x86_64 linux kernels use one l4 table per cpu and context switch by
changing an l4 entry pointing to an l3 table. If we're shadowing them
we need to pin l3 shadows to stop them being torn down on every
context switch. (But don't do this for normal 64bit guests).
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Thu Nov 23 17:46:52 2006 +0000 (2006-11-23)
parents 47a8bb3cd123
children b4baf35cff11
line source
1 /******************************************************************************
2 * arch/x86/mm/shadow/private.h
3 *
4 * Shadow code that is private, and does not need to be multiply compiled.
5 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
6 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
7 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #ifndef _XEN_SHADOW_PRIVATE_H
25 #define _XEN_SHADOW_PRIVATE_H
27 // In order to override the definition of mfn_to_page, we make sure page.h has
28 // been included...
29 #include <asm/page.h>
30 #include <xen/domain_page.h>
31 #include <asm/x86_emulate.h>
32 #include <asm/hvm/support.h>
35 /******************************************************************************
36 * Debug and error-message output
37 */
38 #define SHADOW_PRINTK(_f, _a...) \
39 debugtrace_printk("sh: %s(): " _f, __func__, ##_a)
40 #define SHADOW_ERROR(_f, _a...) \
41 printk("sh error: %s(): " _f, __func__, ##_a)
42 #define SHADOW_DEBUG(flag, _f, _a...) \
43 do { \
44 if (SHADOW_DEBUG_ ## flag) \
45 debugtrace_printk("shdebug: %s(): " _f, __func__, ##_a); \
46 } while (0)
48 // The flags for use with SHADOW_DEBUG:
49 #define SHADOW_DEBUG_PROPAGATE 1
50 #define SHADOW_DEBUG_MAKE_SHADOW 1
51 #define SHADOW_DEBUG_DESTROY_SHADOW 1
52 #define SHADOW_DEBUG_P2M 0
53 #define SHADOW_DEBUG_A_AND_D 1
54 #define SHADOW_DEBUG_EMULATE 1
55 #define SHADOW_DEBUG_LOGDIRTY 0
58 /******************************************************************************
59 * Auditing routines
60 */
62 #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES_FULL
63 extern void shadow_audit_tables(struct vcpu *v);
64 #else
65 #define shadow_audit_tables(_v) do {} while(0)
66 #endif
68 #if SHADOW_AUDIT & SHADOW_AUDIT_P2M
69 extern void shadow_audit_p2m(struct domain *d);
70 #else
71 #define shadow_audit_p2m(_d) do {} while(0)
72 #endif
75 /******************************************************************************
76 * Macro for dealing with the naming of the internal names of the
77 * shadow code's external entry points.
78 */
79 #define SHADOW_INTERNAL_NAME_HIDDEN(name, shadow_levels, guest_levels) \
80 name ## __shadow_ ## shadow_levels ## _guest_ ## guest_levels
81 #define SHADOW_INTERNAL_NAME(name, shadow_levels, guest_levels) \
82 SHADOW_INTERNAL_NAME_HIDDEN(name, shadow_levels, guest_levels)
84 #if CONFIG_PAGING_LEVELS == 2
85 #define GUEST_LEVELS 2
86 #define SHADOW_LEVELS 2
87 #include "multi.h"
88 #undef GUEST_LEVELS
89 #undef SHADOW_LEVELS
90 #endif /* CONFIG_PAGING_LEVELS == 2 */
92 #if CONFIG_PAGING_LEVELS == 3
93 #define GUEST_LEVELS 2
94 #define SHADOW_LEVELS 3
95 #include "multi.h"
96 #undef GUEST_LEVELS
97 #undef SHADOW_LEVELS
99 #define GUEST_LEVELS 3
100 #define SHADOW_LEVELS 3
101 #include "multi.h"
102 #undef GUEST_LEVELS
103 #undef SHADOW_LEVELS
104 #endif /* CONFIG_PAGING_LEVELS == 3 */
106 #if CONFIG_PAGING_LEVELS == 4
107 #define GUEST_LEVELS 2
108 #define SHADOW_LEVELS 3
109 #include "multi.h"
110 #undef GUEST_LEVELS
111 #undef SHADOW_LEVELS
113 #define GUEST_LEVELS 3
114 #define SHADOW_LEVELS 3
115 #include "multi.h"
116 #undef GUEST_LEVELS
117 #undef SHADOW_LEVELS
119 #define GUEST_LEVELS 3
120 #define SHADOW_LEVELS 4
121 #include "multi.h"
122 #undef GUEST_LEVELS
123 #undef SHADOW_LEVELS
125 #define GUEST_LEVELS 4
126 #define SHADOW_LEVELS 4
127 #include "multi.h"
128 #undef GUEST_LEVELS
129 #undef SHADOW_LEVELS
130 #endif /* CONFIG_PAGING_LEVELS == 4 */
132 /******************************************************************************
133 * Page metadata for shadow pages.
134 */
136 struct shadow_page_info
137 {
138 union {
139 /* When in use, guest page we're a shadow of */
140 unsigned long backpointer;
141 /* When free, order of the freelist we're on */
142 unsigned int order;
143 };
144 union {
145 /* When in use, next shadow in this hash chain */
146 struct shadow_page_info *next_shadow;
147 /* When free, TLB flush time when freed */
148 u32 tlbflush_timestamp;
149 };
150 struct {
151 unsigned int type:4; /* What kind of shadow is this? */
152 unsigned int pinned:1; /* Is the shadow pinned? */
153 unsigned int logdirty:1; /* Was it made in log-dirty mode? */
154 unsigned int count:26; /* Reference count */
155 u32 mbz; /* Must be zero: this is where the owner
156 * field lives in a non-shadow page */
157 } __attribute__((packed));
158 union {
159 /* For unused shadow pages, a list of pages of this order;
160 * for pinnable shadows, if pinned, a list of other pinned shadows
161 * (see sh_type_is_pinnable() below for the definition of
162 * "pinnable" shadow types). */
163 struct list_head list;
164 /* For non-pinnable shadows, a higher entry that points at us */
165 paddr_t up;
166 };
167 };
169 /* The structure above *must* be the same size as a struct page_info
170 * from mm.h, since we'll be using the same space in the frametable.
171 * Also, the mbz field must line up with the owner field of normal
172 * pages, so they look properly like anonymous/xen pages. */
173 static inline void shadow_check_page_struct_offsets(void) {
174 BUILD_BUG_ON(sizeof (struct shadow_page_info)
175 != sizeof (struct page_info));
176 BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz)
177 != offsetof(struct page_info, u.inuse._domain));
178 };
180 /* Shadow type codes */
181 #define SH_type_none (0U) /* on the shadow free list */
182 #define SH_type_min_shadow (1U)
183 #define SH_type_l1_32_shadow (1U) /* shadowing a 32-bit L1 guest page */
184 #define SH_type_fl1_32_shadow (2U) /* L1 shadow for a 32b 4M superpage */
185 #define SH_type_l2_32_shadow (3U) /* shadowing a 32-bit L2 guest page */
186 #define SH_type_l1_pae_shadow (4U) /* shadowing a pae L1 page */
187 #define SH_type_fl1_pae_shadow (5U) /* L1 shadow for pae 2M superpg */
188 #define SH_type_l2_pae_shadow (6U) /* shadowing a pae L2-low page */
189 #define SH_type_l2h_pae_shadow (7U) /* shadowing a pae L2-high page */
190 #define SH_type_l1_64_shadow (8U) /* shadowing a 64-bit L1 page */
191 #define SH_type_fl1_64_shadow (9U) /* L1 shadow for 64-bit 2M superpg */
192 #define SH_type_l2_64_shadow (10U) /* shadowing a 64-bit L2 page */
193 #define SH_type_l3_64_shadow (11U) /* shadowing a 64-bit L3 page */
194 #define SH_type_l4_64_shadow (12U) /* shadowing a 64-bit L4 page */
195 #define SH_type_max_shadow (12U)
196 #define SH_type_p2m_table (13U) /* in use as the p2m table */
197 #define SH_type_monitor_table (14U) /* in use as a monitor table */
198 #define SH_type_unused (15U)
200 /*
201 * What counts as a pinnable shadow?
202 */
204 static inline int sh_type_is_pinnable(struct vcpu *v, unsigned int t)
205 {
206 /* Top-level shadow types in each mode can be pinned, so that they
207 * persist even when not currently in use in a guest CR3 */
208 if ( t == SH_type_l2_32_shadow
209 || t == SH_type_l2_pae_shadow
210 || t == SH_type_l2h_pae_shadow
211 || t == SH_type_l4_64_shadow )
212 return 1;
214 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
215 /* Early 64-bit linux used three levels of pagetables for the guest
216 * and context switched by changing one l4 entry in a per-cpu l4
217 * page. When we're shadowing those kernels, we have to pin l3
218 * shadows so they don't just evaporate on every context switch.
219 * For all other guests, we'd rather use the up-pointer field in l3s. */
220 if ( unlikely((v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL)
221 && CONFIG_PAGING_LEVELS >= 4
222 && t == SH_type_l3_64_shadow) )
223 return 1;
224 #endif
226 /* Everything else is not pinnable, and can use the "up" pointer */
227 return 0;
228 }
230 /*
231 * Definitions for the shadow_flags field in page_info.
232 * These flags are stored on *guest* pages...
233 * Bits 1-13 are encodings for the shadow types.
234 */
235 #define SHF_page_type_mask \
236 (((1u << (SH_type_max_shadow + 1u)) - 1u) - \
237 ((1u << SH_type_min_shadow) - 1u))
239 #define SHF_L1_32 (1u << SH_type_l1_32_shadow)
240 #define SHF_FL1_32 (1u << SH_type_fl1_32_shadow)
241 #define SHF_L2_32 (1u << SH_type_l2_32_shadow)
242 #define SHF_L1_PAE (1u << SH_type_l1_pae_shadow)
243 #define SHF_FL1_PAE (1u << SH_type_fl1_pae_shadow)
244 #define SHF_L2_PAE (1u << SH_type_l2_pae_shadow)
245 #define SHF_L2H_PAE (1u << SH_type_l2h_pae_shadow)
246 #define SHF_L1_64 (1u << SH_type_l1_64_shadow)
247 #define SHF_FL1_64 (1u << SH_type_fl1_64_shadow)
248 #define SHF_L2_64 (1u << SH_type_l2_64_shadow)
249 #define SHF_L3_64 (1u << SH_type_l3_64_shadow)
250 #define SHF_L4_64 (1u << SH_type_l4_64_shadow)
252 /* Used for hysteresis when automatically unhooking mappings on fork/exit */
253 #define SHF_unhooked_mappings (1u<<31)
256 /******************************************************************************
257 * Various function declarations
258 */
260 /* x86 emulator support */
261 extern struct x86_emulate_ops shadow_emulator_ops;
263 /* Hash table functions */
264 mfn_t shadow_hash_lookup(struct vcpu *v, unsigned long n, unsigned int t);
265 void shadow_hash_insert(struct vcpu *v,
266 unsigned long n, unsigned int t, mfn_t smfn);
267 void shadow_hash_delete(struct vcpu *v,
268 unsigned long n, unsigned int t, mfn_t smfn);
270 /* shadow promotion */
271 void shadow_promote(struct vcpu *v, mfn_t gmfn, u32 type);
272 void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type);
274 /* Shadow page allocation functions */
275 void shadow_prealloc(struct domain *d, unsigned int order);
276 mfn_t shadow_alloc(struct domain *d,
277 u32 shadow_type,
278 unsigned long backpointer);
279 void shadow_free(struct domain *d, mfn_t smfn);
281 /* Function to convert a shadow to log-dirty */
282 void shadow_convert_to_log_dirty(struct vcpu *v, mfn_t smfn);
284 /* Dispatcher function: call the per-mode function that will unhook the
285 * non-Xen mappings in this top-level shadow mfn */
286 void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn);
288 /* Install the xen mappings in various flavours of shadow */
289 void sh_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn);
290 void sh_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn);
291 void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn);
294 /******************************************************************************
295 * Flags used in the return value of the shadow_set_lXe() functions...
296 */
298 /* We actually wrote something new to the shadow */
299 #define SHADOW_SET_CHANGED 0x1
300 /* Caller should flush TLBs to clear the old entry */
301 #define SHADOW_SET_FLUSH 0x2
302 /* Something went wrong: the shadow entry was invalid or refcount failed */
303 #define SHADOW_SET_ERROR 0x4
306 /******************************************************************************
307 * MFN/page-info handling
308 */
310 // Override mfn_to_page from asm/page.h, which was #include'd above,
311 // in order to make it work with our mfn type.
312 #undef mfn_to_page
313 #define mfn_to_page(_m) (frame_table + mfn_x(_m))
314 #define mfn_to_shadow_page(_m) ((struct shadow_page_info *)mfn_to_page(_m))
316 // Override page_to_mfn from asm/page.h, which was #include'd above,
317 // in order to make it work with our mfn type.
318 #undef page_to_mfn
319 #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
320 #define shadow_page_to_mfn(_spg) (page_to_mfn((struct page_info *)_spg))
322 // Override mfn_valid from asm/page.h, which was #include'd above,
323 // in order to make it work with our mfn type.
324 #undef mfn_valid
325 #define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
327 // Provide mfn_t-aware versions of common xen functions
328 static inline void *
329 sh_map_domain_page(mfn_t mfn)
330 {
331 return map_domain_page(mfn_x(mfn));
332 }
334 static inline void
335 sh_unmap_domain_page(void *p)
336 {
337 unmap_domain_page(p);
338 }
340 static inline void *
341 sh_map_domain_page_global(mfn_t mfn)
342 {
343 return map_domain_page_global(mfn_x(mfn));
344 }
346 static inline void
347 sh_unmap_domain_page_global(void *p)
348 {
349 unmap_domain_page_global(p);
350 }
352 static inline int
353 sh_mfn_is_dirty(struct domain *d, mfn_t gmfn)
354 /* Is this guest page dirty? Call only in log-dirty mode. */
355 {
356 unsigned long pfn;
357 ASSERT(shadow_mode_log_dirty(d));
358 ASSERT(d->arch.shadow.dirty_bitmap != NULL);
360 /* We /really/ mean PFN here, even for non-translated guests. */
361 pfn = get_gpfn_from_mfn(mfn_x(gmfn));
362 if ( likely(VALID_M2P(pfn))
363 && likely(pfn < d->arch.shadow.dirty_bitmap_size)
364 && test_bit(pfn, d->arch.shadow.dirty_bitmap) )
365 return 1;
367 return 0;
368 }
370 static inline int
371 sh_mfn_is_a_page_table(mfn_t gmfn)
372 {
373 struct page_info *page = mfn_to_page(gmfn);
374 struct domain *owner;
375 unsigned long type_info;
377 if ( !valid_mfn(gmfn) )
378 return 0;
380 owner = page_get_owner(page);
381 if ( owner && shadow_mode_refcounts(owner)
382 && (page->count_info & PGC_page_table) )
383 return 1;
385 type_info = page->u.inuse.type_info & PGT_type_mask;
386 return type_info && (type_info <= PGT_l4_page_table);
387 }
390 /**************************************************************************/
391 /* Shadow-page refcounting. */
393 void sh_destroy_shadow(struct vcpu *v, mfn_t smfn);
395 /* Increase the refcount of a shadow page. Arguments are the mfn to refcount,
396 * and the physical address of the shadow entry that holds the ref (or zero
397 * if the ref is held by something else).
398 * Returns 0 for failure, 1 for success. */
399 static inline int sh_get_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
400 {
401 u32 x, nx;
402 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
404 ASSERT(mfn_valid(smfn));
406 x = sp->count;
407 nx = x + 1;
409 if ( unlikely(nx >= 1U<<26) )
410 {
411 SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
412 sp->backpointer, mfn_x(smfn));
413 return 0;
414 }
416 /* Guarded by the shadow lock, so no need for atomic update */
417 sp->count = nx;
419 /* We remember the first shadow entry that points to each shadow. */
420 if ( entry_pa != 0
421 && sh_type_is_pinnable(v, sp->type)
422 && sp->up == 0 )
423 sp->up = entry_pa;
425 return 1;
426 }
429 /* Decrease the refcount of a shadow page. As for get_ref, takes the
430 * physical address of the shadow entry that held this reference. */
431 static inline void sh_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
432 {
433 u32 x, nx;
434 struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
436 ASSERT(mfn_valid(smfn));
437 ASSERT(sp->mbz == 0);
439 /* If this is the entry in the up-pointer, remove it */
440 if ( entry_pa != 0
441 && sh_type_is_pinnable(v, sp->type)
442 && sp->up == entry_pa )
443 sp->up = 0;
445 x = sp->count;
446 nx = x - 1;
448 if ( unlikely(x == 0) )
449 {
450 SHADOW_ERROR("shadow ref underflow, smfn=%lx oc=%08x t=%#x\n",
451 mfn_x(smfn), sp->count, sp->type);
452 BUG();
453 }
455 /* Guarded by the shadow lock, so no need for atomic update */
456 sp->count = nx;
458 if ( unlikely(nx == 0) )
459 sh_destroy_shadow(v, smfn);
460 }
463 /* Pin a shadow page: take an extra refcount, set the pin bit,
464 * and put the shadow at the head of the list of pinned shadows.
465 * Returns 0 for failure, 1 for success. */
466 static inline int sh_pin(struct vcpu *v, mfn_t smfn)
467 {
468 struct shadow_page_info *sp;
470 ASSERT(mfn_valid(smfn));
471 sp = mfn_to_shadow_page(smfn);
472 ASSERT(sh_type_is_pinnable(v, sp->type));
473 if ( sp->pinned )
474 {
475 /* Already pinned: take it out of the pinned-list so it can go
476 * at the front */
477 list_del(&sp->list);
478 }
479 else
480 {
481 /* Not pinned: pin it! */
482 if ( !sh_get_ref(v, smfn, 0) )
483 return 0;
484 sp->pinned = 1;
485 }
486 /* Put it at the head of the list of pinned shadows */
487 list_add(&sp->list, &v->domain->arch.shadow.pinned_shadows);
488 return 1;
489 }
491 /* Unpin a shadow page: unset the pin bit, take the shadow off the list
492 * of pinned shadows, and release the extra ref. */
493 static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
494 {
495 struct shadow_page_info *sp;
497 ASSERT(mfn_valid(smfn));
498 sp = mfn_to_shadow_page(smfn);
499 ASSERT(sh_type_is_pinnable(v, sp->type));
500 if ( sp->pinned )
501 {
502 sp->pinned = 0;
503 list_del(&sp->list);
504 sp->up = 0; /* in case this stops being a pinnable type in future */
505 sh_put_ref(v, smfn, 0);
506 }
507 }
509 #endif /* _XEN_SHADOW_PRIVATE_H */
511 /*
512 * Local variables:
513 * mode: C
514 * c-set-style: "BSD"
515 * c-basic-offset: 4
516 * indent-tabs-mode: nil
517 * End:
518 */