ia64/xen-unstable

view xen/include/asm-x86/mm.h @ 15812:86a154e1ef5d

[HVM] Shadow: don't shadow the p2m table.
For HVM vcpus with paging disabled, we used to shadow the p2m table,
and skip the p2m lookup to go from gfn to mfn. Instead, we now
provide a simple pagetable that gives a one-to-one mapping of 4GB, and
shadow that, making the translations from gfn to mfn via the p2m.
This removes the paging-disabled special-case code from the shadow
fault handler, and allows us to expand the p2m interface, since all HVM
translations now go through the same p2m lookups.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Fri Aug 31 11:06:22 2007 +0100 (2007-08-31)
parents ecb89c6ce615
children 4633e9604da9
line source
2 #ifndef __ASM_X86_MM_H__
3 #define __ASM_X86_MM_H__
5 #include <xen/config.h>
6 #include <xen/cpumask.h>
7 #include <xen/list.h>
8 #include <asm/io.h>
9 #include <asm/uaccess.h>
11 /*
12 * Per-page-frame information.
13 *
14 * Every architecture must ensure the following:
15 * 1. 'struct page_info' contains a 'struct list_head list'.
16 * 2. Provide a PFN_ORDER() macro for accessing the order of a free page.
17 */
18 #define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
20 struct page_info
21 {
22 /* Each frame can be threaded onto a doubly-linked list. */
23 struct list_head list;
25 /* Reference count and various PGC_xxx flags and fields. */
26 u32 count_info;
28 /* Context-dependent fields follow... */
29 union {
31 /* Page is in use: ((count_info & PGC_count_mask) != 0). */
32 struct {
33 /* Owner of this page (NULL if page is anonymous). */
34 u32 _domain; /* pickled format */
35 /* Type reference count and various PGT_xxx flags and fields. */
36 unsigned long type_info;
37 } __attribute__ ((packed)) inuse;
39 /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
40 struct {
41 /* Order-size of the free chunk this page is the head of. */
42 u32 order;
43 /* Mask of possibly-tainted TLBs. */
44 cpumask_t cpumask;
45 } __attribute__ ((packed)) free;
47 } u;
49 union {
50 /*
51 * Timestamp from 'TLB clock', used to avoid extra safety flushes.
52 * Only valid for: a) free pages, and b) pages with zero type count
53 * (except page table pages when the guest is in shadow mode).
54 */
55 u32 tlbflush_timestamp;
57 /*
58 * Guest pages with a shadow. This does not conflict with
59 * tlbflush_timestamp since page table pages are explicitly not
60 * tracked for TLB-flush avoidance when a guest runs in shadow mode.
61 */
62 unsigned long shadow_flags;
63 };
64 };
66 /* The following page types are MUTUALLY EXCLUSIVE. */
67 #define PGT_none (0U<<29) /* no special uses of this page */
68 #define PGT_l1_page_table (1U<<29) /* using this page as an L1 page table? */
69 #define PGT_l2_page_table (2U<<29) /* using this page as an L2 page table? */
70 #define PGT_l3_page_table (3U<<29) /* using this page as an L3 page table? */
71 #define PGT_l4_page_table (4U<<29) /* using this page as an L4 page table? */
72 #define PGT_gdt_page (5U<<29) /* using this page in a GDT? */
73 #define PGT_ldt_page (6U<<29) /* using this page in an LDT? */
74 #define PGT_writable_page (7U<<29) /* has writable mappings of this page? */
75 #define PGT_type_mask (7U<<29) /* Bits 29-31. */
77 /* Owning guest has pinned this page to its current type? */
78 #define _PGT_pinned 28
79 #define PGT_pinned (1U<<_PGT_pinned)
80 /* Has this page been validated for use as its current type? */
81 #define _PGT_validated 27
82 #define PGT_validated (1U<<_PGT_validated)
83 /* PAE only: is this an L2 page directory containing Xen-private mappings? */
84 #define _PGT_pae_xen_l2 26
85 #define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2)
87 /* 16-bit count of uses of this frame as its current type. */
88 #define PGT_count_mask ((1U<<16)-1)
90 /* Cleared when the owning guest 'frees' this page. */
91 #define _PGC_allocated 31
92 #define PGC_allocated (1U<<_PGC_allocated)
93 /* Set on a *guest* page to mark it out-of-sync with its shadow */
94 #define _PGC_out_of_sync 30
95 #define PGC_out_of_sync (1U<<_PGC_out_of_sync)
96 /* Set when is using a page as a page table */
97 #define _PGC_page_table 29
98 #define PGC_page_table (1U<<_PGC_page_table)
99 /* 29-bit count of references to this frame. */
100 #define PGC_count_mask ((1U<<29)-1)
102 /* We trust the slab allocator in slab.c, and our use of it. */
103 #define PageSlab(page) (1)
104 #define PageSetSlab(page) ((void)0)
105 #define PageClearSlab(page) ((void)0)
107 #define is_xen_heap_frame(pfn) ({ \
108 paddr_t maddr = page_to_maddr(pfn); \
109 ((maddr >= xenheap_phys_start) && (maddr < xenheap_phys_end)); \
110 })
112 #if defined(__i386__)
113 #define pickle_domptr(_d) ((u32)(unsigned long)(_d))
114 static inline struct domain *unpickle_domptr(u32 _domain)
115 { return (_domain & 1) ? NULL : (void *)_domain; }
116 #define PRtype_info "08lx" /* should only be used for printk's */
117 #elif defined(__x86_64__)
118 static inline struct domain *unpickle_domptr(u32 _domain)
119 { return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
120 static inline u32 pickle_domptr(struct domain *domain)
121 { return (domain == NULL) ? 0 : (u32)__pa(domain); }
122 #define PRtype_info "016lx"/* should only be used for printk's */
123 #endif
125 /* The order of the largest allocation unit we use for shadow pages */
126 #if CONFIG_PAGING_LEVELS == 2
127 #define SHADOW_MAX_ORDER 0 /* Only ever need 4k allocations */
128 #else
129 #define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
130 #endif
132 #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
133 #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
135 #define XENSHARE_writable 0
136 #define XENSHARE_readonly 1
137 extern void share_xen_page_with_guest(
138 struct page_info *page, struct domain *d, int readonly);
139 extern void share_xen_page_with_privileged_guests(
140 struct page_info *page, int readonly);
142 extern struct page_info *frame_table;
143 extern unsigned long max_page;
144 extern unsigned long total_pages;
145 void init_frametable(void);
147 int alloc_page_type(struct page_info *page, unsigned long type);
148 void free_page_type(struct page_info *page, unsigned long type);
149 int _shadow_mode_refcounts(struct domain *d);
151 static inline void put_page(struct page_info *page)
152 {
153 u32 nx, x, y = page->count_info;
155 do {
156 x = y;
157 nx = x - 1;
158 }
159 while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
161 if ( unlikely((nx & PGC_count_mask) == 0) )
162 free_domheap_page(page);
163 }
166 static inline int get_page(struct page_info *page,
167 struct domain *domain)
168 {
169 u32 x, nx, y = page->count_info;
170 u32 d, nd = page->u.inuse._domain;
171 u32 _domain = pickle_domptr(domain);
173 do {
174 x = y;
175 nx = x + 1;
176 d = nd;
177 if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
178 unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
179 unlikely(d != _domain) ) /* Wrong owner? */
180 {
181 if ( !_shadow_mode_refcounts(domain) )
182 gdprintk(XENLOG_INFO,
183 "Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
184 PRtype_info "\n",
185 page_to_mfn(page), domain, unpickle_domptr(d),
186 x, page->u.inuse.type_info);
187 return 0;
188 }
189 __asm__ __volatile__(
190 LOCK_PREFIX "cmpxchg8b %3"
191 : "=d" (nd), "=a" (y), "=c" (d),
192 "=m" (*(volatile u64 *)(&page->count_info))
193 : "0" (d), "1" (x), "c" (d), "b" (nx) );
194 }
195 while ( unlikely(nd != d) || unlikely(y != x) );
197 return 1;
198 }
200 void put_page_type(struct page_info *page);
201 int get_page_type(struct page_info *page, unsigned long type);
202 int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
203 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
205 static inline void put_page_and_type(struct page_info *page)
206 {
207 put_page_type(page);
208 put_page(page);
209 }
212 static inline int get_page_and_type(struct page_info *page,
213 struct domain *domain,
214 unsigned long type)
215 {
216 int rc = get_page(page, domain);
218 if ( likely(rc) && unlikely(!get_page_type(page, type)) )
219 {
220 put_page(page);
221 rc = 0;
222 }
224 return rc;
225 }
227 #define ASSERT_PAGE_IS_TYPE(_p, _t) \
228 ASSERT(((_p)->u.inuse.type_info & PGT_type_mask) == (_t)); \
229 ASSERT(((_p)->u.inuse.type_info & PGT_count_mask) != 0)
230 #define ASSERT_PAGE_IS_DOMAIN(_p, _d) \
231 ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
232 ASSERT(page_get_owner(_p) == (_d))
234 // Quick test for whether a given page can be represented directly in CR3.
235 //
236 #if CONFIG_PAGING_LEVELS == 3
237 #define MFN_FITS_IN_CR3(_MFN) !(mfn_x(_MFN) >> 20)
239 /* returns a lowmem machine address of the copied L3 root table */
240 unsigned long
241 pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab);
242 #endif /* CONFIG_PAGING_LEVELS == 3 */
244 int check_descriptor(const struct domain *, struct desc_struct *d);
247 /******************************************************************************
248 * With shadow pagetables, the different kinds of address start
249 * to get get confusing.
250 *
251 * Virtual addresses are what they usually are: the addresses that are used
252 * to accessing memory while the guest is running. The MMU translates from
253 * virtual addresses to machine addresses.
254 *
255 * (Pseudo-)physical addresses are the abstraction of physical memory the
256 * guest uses for allocation and so forth. For the purposes of this code,
257 * we can largely ignore them.
258 *
259 * Guest frame numbers (gfns) are the entries that the guest puts in its
260 * pagetables. For normal paravirtual guests, they are actual frame numbers,
261 * with the translation done by the guest.
262 *
263 * Machine frame numbers (mfns) are the entries that the hypervisor puts
264 * in the shadow page tables.
265 *
266 * Elsewhere in the xen code base, the name "gmfn" is generally used to refer
267 * to a "machine frame number, from the guest's perspective", or in other
268 * words, pseudo-physical frame numbers. However, in the shadow code, the
269 * term "gmfn" means "the mfn of a guest page"; this combines naturally with
270 * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a
271 * guest L2 page), etc...
272 */
274 /* With this defined, we do some ugly things to force the compiler to
275 * give us type safety between mfns and gfns and other integers.
276 * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions
277 * that translate beween int and foo_t.
278 *
279 * It does have some performance cost because the types now have
280 * a different storage attribute, so may not want it on all the time. */
282 #ifndef NDEBUG
283 #define TYPE_SAFETY 1
284 #endif
286 #ifdef TYPE_SAFETY
287 #define TYPE_SAFE(_type,_name) \
288 typedef struct { _type _name; } _name##_t; \
289 static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \
290 static inline _type _name##_x(_name##_t n) { return n._name; }
291 #else
292 #define TYPE_SAFE(_type,_name) \
293 typedef _type _name##_t; \
294 static inline _name##_t _##_name(_type n) { return n; } \
295 static inline _type _name##_x(_name##_t n) { return n; }
296 #endif
298 TYPE_SAFE(unsigned long,mfn);
300 /* Macro for printk formats: use as printk("%"PRI_mfn"\n", mfn_x(foo)); */
301 #define PRI_mfn "05lx"
304 /*
305 * The MPT (machine->physical mapping table) is an array of word-sized
306 * values, indexed on machine frame number. It is expected that guest OSes
307 * will use it to store a "physical" frame number to give the appearance of
308 * contiguous (or near contiguous) physical memory.
309 */
310 #undef machine_to_phys_mapping
311 #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
312 #define INVALID_M2P_ENTRY (~0UL)
313 #define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1))))
315 #ifdef CONFIG_COMPAT
316 #define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START)
317 #define set_gpfn_from_mfn(mfn, pfn) \
318 ((void)((mfn) >= (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 || \
319 (compat_machine_to_phys_mapping[(mfn)] = (unsigned int)(pfn))), \
320 machine_to_phys_mapping[(mfn)] = (pfn))
321 #else
322 #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
323 #endif
324 #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)])
326 #define mfn_to_gmfn(_d, mfn) \
327 ( (paging_mode_translate(_d)) \
328 ? get_gpfn_from_mfn(mfn) \
329 : (mfn) )
331 #define gmfn_to_mfn(_d, gpfn) mfn_x(gfn_to_mfn(_d, gpfn))
333 #define INVALID_MFN (~0UL)
335 #ifdef CONFIG_COMPAT
336 #define compat_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
337 #define compat_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
338 #endif
340 #ifdef MEMORY_GUARD
341 void memguard_init(void);
342 void memguard_guard_range(void *p, unsigned long l);
343 void memguard_unguard_range(void *p, unsigned long l);
344 #else
345 #define memguard_init() ((void)0)
346 #define memguard_guard_range(_p,_l) ((void)0)
347 #define memguard_unguard_range(_p,_l) ((void)0)
348 #endif
350 void memguard_guard_stack(void *p);
352 int ptwr_do_page_fault(struct vcpu *, unsigned long,
353 struct cpu_user_regs *);
355 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
357 #ifndef NDEBUG
359 #define AUDIT_SHADOW_ALREADY_LOCKED ( 1u << 0 )
360 #define AUDIT_ERRORS_OK ( 1u << 1 )
361 #define AUDIT_QUIET ( 1u << 2 )
363 void _audit_domain(struct domain *d, int flags);
364 #define audit_domain(_d) _audit_domain((_d), AUDIT_ERRORS_OK)
365 void audit_domains(void);
367 #else
369 #define _audit_domain(_d, _f) ((void)0)
370 #define audit_domain(_d) ((void)0)
371 #define audit_domains() ((void)0)
373 #endif
375 int new_guest_cr3(unsigned long pfn);
376 void make_cr3(struct vcpu *v, unsigned long mfn);
377 void update_cr3(struct vcpu *v);
378 void propagate_page_fault(unsigned long addr, u16 error_code);
380 int __sync_lazy_execstate(void);
382 /* Arch-specific portion of memory_op hypercall. */
383 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
384 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
385 #ifdef CONFIG_COMPAT
386 int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void));
387 int compat_subarch_memory_op(int op, XEN_GUEST_HANDLE(void));
388 #endif
390 int steal_page(
391 struct domain *d, struct page_info *page, unsigned int memflags);
393 int map_ldt_shadow_page(unsigned int);
395 #ifdef CONFIG_COMPAT
396 int setup_arg_xlat_area(struct vcpu *, l4_pgentry_t *);
397 unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits);
398 #else
399 # define setup_arg_xlat_area(vcpu, l4tab) 0
400 # define domain_clamp_alloc_bitsize(d, b) (b)
401 #endif
403 unsigned long domain_get_maximum_gpfn(struct domain *d);
405 #endif /* __ASM_X86_MM_H__ */