ia64/xen-unstable

view xen/include/asm-x86/mm.h @ 19164:de853e901b5c

Remove cpumask for page_info struct.

This makes TLB flushing on page allocation more conservative, but the
flush clock should still save us most of the time (page freeing and
alloc'ing tends to happen in batches, and not necesasrily close
together). We could add some optimisations to the flush filter if this
does turn out to be a significant overhead for some (useful)
workloads.

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 04 15:29:51 2009 +0000 (2009-02-04)
parents 2e1734aa8db3
children 416197f0292b
line source
2 #ifndef __ASM_X86_MM_H__
3 #define __ASM_X86_MM_H__
5 #include <xen/config.h>
6 #include <xen/cpumask.h>
7 #include <xen/list.h>
8 #include <asm/io.h>
9 #include <asm/uaccess.h>
11 /*
12 * Per-page-frame information.
13 *
14 * Every architecture must ensure the following:
15 * 1. 'struct page_info' contains a 'struct page_list_entry list'.
16 * 2. Provide a PFN_ORDER() macro for accessing the order of a free page.
17 */
18 #define PFN_ORDER(_pfn) ((_pfn)->v.free.order)
20 /*
21 * This definition is solely for the use in struct page_info (and
22 * struct page_list_head), intended to allow easy adjustment once x86-64
23 * wants to support more than 16TB.
24 * 'unsigned long' should be used for MFNs everywhere else.
25 */
26 #define __mfn_t unsigned int
27 #define PRpgmfn "08x"
29 #undef page_list_entry
30 struct page_list_entry
31 {
32 __mfn_t next, prev;
33 };
35 struct page_info
36 {
37 union {
38 /* Each frame can be threaded onto a doubly-linked list.
39 *
40 * For unused shadow pages, a list of pages of this order; for
41 * pinnable shadows, if pinned, a list of other pinned shadows
42 * (see sh_type_is_pinnable() below for the definition of
43 * "pinnable" shadow types).
44 */
45 struct page_list_entry list;
46 /* For non-pinnable shadows, a higher entry that points at us. */
47 paddr_t up;
48 };
50 /* Reference count and various PGC_xxx flags and fields. */
51 unsigned long count_info;
53 /* Context-dependent fields follow... */
54 union {
56 /* Page is in use: ((count_info & PGC_count_mask) != 0). */
57 struct {
58 /* Type reference count and various PGT_xxx flags and fields. */
59 unsigned long type_info;
60 } inuse;
62 /* Page is in use as a shadow: count_info == 0. */
63 struct {
64 unsigned long type:5; /* What kind of shadow is this? */
65 unsigned long pinned:1; /* Is the shadow pinned? */
66 unsigned long count:26; /* Reference count */
67 } sh;
69 } u;
71 union {
73 /* Page is in use, but not as a shadow. */
74 struct {
75 /* Owner of this page (NULL if page is anonymous). */
76 u32 _domain; /* pickled format */
77 } inuse;
79 /* Page is in use as a shadow. */
80 struct {
81 /* GMFN of guest page we're a shadow of. */
82 __mfn_t back;
83 } sh;
85 /* Page is on a free list (including shadow code free lists). */
86 struct {
87 /* Order-size of the free chunk this page is the head of. */
88 unsigned int order;
89 } free;
91 } v;
93 union {
94 /*
95 * Timestamp from 'TLB clock', used to avoid extra safety flushes.
96 * Only valid for: a) free pages, and b) pages with zero type count
97 * (except page table pages when the guest is in shadow mode).
98 */
99 u32 tlbflush_timestamp;
101 /*
102 * When PGT_partial is true then this field is valid and indicates
103 * that PTEs in the range [0, @nr_validated_ptes) have been validated.
104 * An extra page reference must be acquired (or not dropped) whenever
105 * PGT_partial gets set, and it must be dropped when the flag gets
106 * cleared. This is so that a get() leaving a page in partially
107 * validated state (where the caller would drop the reference acquired
108 * due to the getting of the type [apparently] failing [-EAGAIN])
109 * would not accidentally result in a page left with zero general
110 * reference count, but non-zero type reference count (possible when
111 * the partial get() is followed immediately by domain destruction).
112 * Likewise, the ownership of the single type reference for partially
113 * (in-)validated pages is tied to this flag, i.e. the instance
114 * setting the flag must not drop that reference, whereas the instance
115 * clearing it will have to.
116 *
117 * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has
118 * been partially validated. This implies that the general reference
119 * to the page (acquired from get_page_from_lNe()) would be dropped
120 * (again due to the apparent failure) and hence must be re-acquired
121 * when resuming the validation, but must not be dropped when picking
122 * up the page for invalidation.
123 *
124 * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has
125 * been partially invalidated. This is basically the opposite case of
126 * above, i.e. the general reference to the page was not dropped in
127 * put_page_from_lNe() (due to the apparent failure), and hence it
128 * must be dropped when the put operation is resumed (and completes),
129 * but it must not be acquired if picking up the page for validation.
130 */
131 struct {
132 u16 nr_validated_ptes;
133 s8 partial_pte;
134 };
136 /*
137 * Guest pages with a shadow. This does not conflict with
138 * tlbflush_timestamp since page table pages are explicitly not
139 * tracked for TLB-flush avoidance when a guest runs in shadow mode.
140 */
141 u32 shadow_flags;
143 /* When in use as a shadow, next shadow in this hash chain. */
144 __mfn_t next_shadow;
145 };
146 };
148 #undef __mfn_t
150 #define PG_shift(idx) (BITS_PER_LONG - (idx))
151 #define PG_mask(x, idx) (x ## UL << PG_shift(idx))
153 /* The following page types are MUTUALLY EXCLUSIVE. */
154 #define PGT_none PG_mask(0, 3) /* no special uses of this page */
155 #define PGT_l1_page_table PG_mask(1, 3) /* using as an L1 page table? */
156 #define PGT_l2_page_table PG_mask(2, 3) /* using as an L2 page table? */
157 #define PGT_l3_page_table PG_mask(3, 3) /* using as an L3 page table? */
158 #define PGT_l4_page_table PG_mask(4, 3) /* using as an L4 page table? */
159 #define PGT_seg_desc_page PG_mask(5, 3) /* using this page in a GDT/LDT? */
160 #define PGT_writable_page PG_mask(7, 3) /* has writable mappings? */
161 #define PGT_type_mask PG_mask(7, 3) /* Bits 29-31. */
163 /* Owning guest has pinned this page to its current type? */
164 #define _PGT_pinned PG_shift(4)
165 #define PGT_pinned PG_mask(1, 4)
166 /* Has this page been validated for use as its current type? */
167 #define _PGT_validated PG_shift(5)
168 #define PGT_validated PG_mask(1, 5)
169 /* PAE only: is this an L2 page directory containing Xen-private mappings? */
170 #define _PGT_pae_xen_l2 PG_shift(6)
171 #define PGT_pae_xen_l2 PG_mask(1, 6)
172 /* Has this page been *partially* validated for use as its current type? */
173 #define _PGT_partial PG_shift(7)
174 #define PGT_partial PG_mask(1, 7)
175 /* Page is locked? */
176 #define _PGT_locked PG_shift(8)
177 #define PGT_locked PG_mask(1, 8)
179 /* Count of uses of this frame as its current type. */
180 #define PGT_count_width PG_shift(8)
181 #define PGT_count_mask ((1UL<<PGT_count_width)-1)
183 /* Cleared when the owning guest 'frees' this page. */
184 #define _PGC_allocated PG_shift(1)
185 #define PGC_allocated PG_mask(1, 1)
186 /* Page is Xen heap? */
187 #define _PGC_xen_heap PG_shift(2)
188 #define PGC_xen_heap PG_mask(1, 2)
189 /* Set when is using a page as a page table */
190 #define _PGC_page_table PG_shift(3)
191 #define PGC_page_table PG_mask(1, 3)
192 /* 3-bit PAT/PCD/PWT cache-attribute hint. */
193 #define PGC_cacheattr_base PG_shift(6)
194 #define PGC_cacheattr_mask PG_mask(7, 6)
195 /* Count of references to this frame. */
196 #define PGC_count_width PG_shift(6)
197 #define PGC_count_mask ((1UL<<PGC_count_width)-1)
199 #if defined(__i386__)
200 #define is_xen_heap_page(page) is_xen_heap_mfn(page_to_mfn(page))
201 #define is_xen_heap_mfn(mfn) ({ \
202 unsigned long _mfn = (mfn); \
203 (_mfn < paddr_to_pfn(xenheap_phys_end)); \
204 })
205 #else
206 #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
207 #define is_xen_heap_mfn(mfn) is_xen_heap_page(&frame_table[mfn])
208 #endif
210 #if defined(__i386__)
211 #define PRtype_info "08lx" /* should only be used for printk's */
212 #elif defined(__x86_64__)
213 #define PRtype_info "016lx"/* should only be used for printk's */
214 #endif
216 /* The order of the largest allocation unit we use for shadow pages */
217 #define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
219 /* The number of out-of-sync shadows we allow per vcpu (prime, please) */
220 #define SHADOW_OOS_PAGES 3
222 /* OOS fixup entries */
223 #define SHADOW_OOS_FIXUPS 2
225 #define page_get_owner(_p) \
226 ((struct domain *)((_p)->v.inuse._domain ? \
227 mfn_to_virt((_p)->v.inuse._domain) : NULL))
228 #define page_set_owner(_p,_d) \
229 ((_p)->v.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
231 #define maddr_get_owner(ma) (page_get_owner(maddr_to_page((ma))))
232 #define vaddr_get_owner(va) (page_get_owner(virt_to_page((va))))
234 #define XENSHARE_writable 0
235 #define XENSHARE_readonly 1
236 extern void share_xen_page_with_guest(
237 struct page_info *page, struct domain *d, int readonly);
238 extern void share_xen_page_with_privileged_guests(
239 struct page_info *page, int readonly);
241 extern struct page_info *frame_table;
242 extern unsigned long max_page;
243 extern unsigned long total_pages;
244 void init_frametable(void);
246 int free_page_type(struct page_info *page, unsigned long type,
247 int preemptible);
248 int _shadow_mode_refcounts(struct domain *d);
250 void cleanup_page_cacheattr(struct page_info *page);
252 int is_iomem_page(unsigned long mfn);
254 void put_page(struct page_info *page);
255 int get_page(struct page_info *page, struct domain *domain);
256 void put_page_type(struct page_info *page);
257 int get_page_type(struct page_info *page, unsigned long type);
258 int put_page_type_preemptible(struct page_info *page);
259 int get_page_type_preemptible(struct page_info *page, unsigned long type);
260 int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
261 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
263 static inline void put_page_and_type(struct page_info *page)
264 {
265 put_page_type(page);
266 put_page(page);
267 }
269 static inline int put_page_and_type_preemptible(struct page_info *page,
270 int preemptible)
271 {
272 int rc = 0;
274 if ( preemptible )
275 rc = put_page_type_preemptible(page);
276 else
277 put_page_type(page);
278 if ( likely(rc == 0) )
279 put_page(page);
280 return rc;
281 }
283 static inline int get_page_and_type(struct page_info *page,
284 struct domain *domain,
285 unsigned long type)
286 {
287 int rc = get_page(page, domain);
289 if ( likely(rc) && unlikely(!get_page_type(page, type)) )
290 {
291 put_page(page);
292 rc = 0;
293 }
295 return rc;
296 }
298 #define ASSERT_PAGE_IS_TYPE(_p, _t) \
299 ASSERT(((_p)->u.inuse.type_info & PGT_type_mask) == (_t)); \
300 ASSERT(((_p)->u.inuse.type_info & PGT_count_mask) != 0)
301 #define ASSERT_PAGE_IS_DOMAIN(_p, _d) \
302 ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
303 ASSERT(page_get_owner(_p) == (_d))
305 // Quick test for whether a given page can be represented directly in CR3.
306 //
307 #if CONFIG_PAGING_LEVELS == 3
308 #define MFN_FITS_IN_CR3(_MFN) !(mfn_x(_MFN) >> 20)
310 /* returns a lowmem machine address of the copied L3 root table */
311 unsigned long
312 pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab);
313 #endif /* CONFIG_PAGING_LEVELS == 3 */
315 int check_descriptor(const struct domain *, struct desc_struct *d);
317 extern int opt_allow_hugepage;
319 /******************************************************************************
320 * With shadow pagetables, the different kinds of address start
321 * to get get confusing.
322 *
323 * Virtual addresses are what they usually are: the addresses that are used
324 * to accessing memory while the guest is running. The MMU translates from
325 * virtual addresses to machine addresses.
326 *
327 * (Pseudo-)physical addresses are the abstraction of physical memory the
328 * guest uses for allocation and so forth. For the purposes of this code,
329 * we can largely ignore them.
330 *
331 * Guest frame numbers (gfns) are the entries that the guest puts in its
332 * pagetables. For normal paravirtual guests, they are actual frame numbers,
333 * with the translation done by the guest.
334 *
335 * Machine frame numbers (mfns) are the entries that the hypervisor puts
336 * in the shadow page tables.
337 *
338 * Elsewhere in the xen code base, the name "gmfn" is generally used to refer
339 * to a "machine frame number, from the guest's perspective", or in other
340 * words, pseudo-physical frame numbers. However, in the shadow code, the
341 * term "gmfn" means "the mfn of a guest page"; this combines naturally with
342 * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a
343 * guest L2 page), etc...
344 */
346 /* With this defined, we do some ugly things to force the compiler to
347 * give us type safety between mfns and gfns and other integers.
348 * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions
349 * that translate beween int and foo_t.
350 *
351 * It does have some performance cost because the types now have
352 * a different storage attribute, so may not want it on all the time. */
354 #ifndef NDEBUG
355 #define TYPE_SAFETY 1
356 #endif
358 #ifdef TYPE_SAFETY
359 #define TYPE_SAFE(_type,_name) \
360 typedef struct { _type _name; } _name##_t; \
361 static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \
362 static inline _type _name##_x(_name##_t n) { return n._name; }
363 #else
364 #define TYPE_SAFE(_type,_name) \
365 typedef _type _name##_t; \
366 static inline _name##_t _##_name(_type n) { return n; } \
367 static inline _type _name##_x(_name##_t n) { return n; }
368 #endif
370 TYPE_SAFE(unsigned long,mfn);
372 /* Macro for printk formats: use as printk("%"PRI_mfn"\n", mfn_x(foo)); */
373 #define PRI_mfn "05lx"
376 /*
377 * The MPT (machine->physical mapping table) is an array of word-sized
378 * values, indexed on machine frame number. It is expected that guest OSes
379 * will use it to store a "physical" frame number to give the appearance of
380 * contiguous (or near contiguous) physical memory.
381 */
382 #undef machine_to_phys_mapping
383 #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
384 #define INVALID_M2P_ENTRY (~0UL)
385 #define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1))))
387 #ifdef CONFIG_COMPAT
388 #define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START)
389 #define set_gpfn_from_mfn(mfn, pfn) \
390 ((void)((mfn) >= (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 || \
391 (compat_machine_to_phys_mapping[(mfn)] = (unsigned int)(pfn))), \
392 machine_to_phys_mapping[(mfn)] = (pfn))
393 #else
394 #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
395 #endif
396 #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)])
398 #define mfn_to_gmfn(_d, mfn) \
399 ( (paging_mode_translate(_d)) \
400 ? get_gpfn_from_mfn(mfn) \
401 : (mfn) )
403 #define INVALID_MFN (~0UL)
405 #ifdef CONFIG_COMPAT
406 #define compat_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
407 #define compat_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
408 #endif
410 #ifdef MEMORY_GUARD
411 void memguard_init(void);
412 void memguard_guard_range(void *p, unsigned long l);
413 void memguard_unguard_range(void *p, unsigned long l);
414 #else
415 #define memguard_init() ((void)0)
416 #define memguard_guard_range(_p,_l) ((void)0)
417 #define memguard_unguard_range(_p,_l) ((void)0)
418 #endif
420 void memguard_guard_stack(void *p);
422 int ptwr_do_page_fault(struct vcpu *, unsigned long,
423 struct cpu_user_regs *);
425 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
427 #ifndef NDEBUG
429 #define AUDIT_SHADOW_ALREADY_LOCKED ( 1u << 0 )
430 #define AUDIT_ERRORS_OK ( 1u << 1 )
431 #define AUDIT_QUIET ( 1u << 2 )
433 void _audit_domain(struct domain *d, int flags);
434 #define audit_domain(_d) _audit_domain((_d), AUDIT_ERRORS_OK)
435 void audit_domains(void);
437 #else
439 #define _audit_domain(_d, _f) ((void)0)
440 #define audit_domain(_d) ((void)0)
441 #define audit_domains() ((void)0)
443 #endif
445 int new_guest_cr3(unsigned long pfn);
446 void make_cr3(struct vcpu *v, unsigned long mfn);
447 void update_cr3(struct vcpu *v);
448 void propagate_page_fault(unsigned long addr, u16 error_code);
450 int __sync_lazy_execstate(void);
452 /* Arch-specific portion of memory_op hypercall. */
453 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
454 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
455 #ifdef CONFIG_COMPAT
456 int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void));
457 int compat_subarch_memory_op(int op, XEN_GUEST_HANDLE(void));
458 #endif
460 int steal_page(
461 struct domain *d, struct page_info *page, unsigned int memflags);
463 int map_ldt_shadow_page(unsigned int);
465 #ifdef CONFIG_COMPAT
466 void domain_set_alloc_bitsize(struct domain *d);
467 unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits);
468 #else
469 # define domain_set_alloc_bitsize(d) ((void)0)
470 # define domain_clamp_alloc_bitsize(d, b) (b)
471 #endif
473 unsigned long domain_get_maximum_gpfn(struct domain *d);
475 extern struct domain *dom_xen, *dom_io; /* for vmcoreinfo */
477 #endif /* __ASM_X86_MM_H__ */