ia64/xen-unstable

view xen/include/asm-x86/mm.h @ 19348:dd3219cd019a

Code cleanups after page offline patch.

Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Mar 12 15:31:36 2009 +0000 (2009-03-12)
parents dd489125a2e7
children f210a633571c
line source
2 #ifndef __ASM_X86_MM_H__
3 #define __ASM_X86_MM_H__
5 #include <xen/config.h>
6 #include <xen/cpumask.h>
7 #include <xen/list.h>
8 #include <asm/io.h>
9 #include <asm/uaccess.h>
11 /*
12 * Per-page-frame information.
13 *
14 * Every architecture must ensure the following:
15 * 1. 'struct page_info' contains a 'struct page_list_entry list'.
16 * 2. Provide a PFN_ORDER() macro for accessing the order of a free page.
17 */
18 #define PFN_ORDER(_pfn) ((_pfn)->v.free.order)
20 /*
21 * This definition is solely for the use in struct page_info (and
22 * struct page_list_head), intended to allow easy adjustment once x86-64
23 * wants to support more than 16TB.
24 * 'unsigned long' should be used for MFNs everywhere else.
25 */
26 #define __mfn_t unsigned int
27 #define PRpgmfn "08x"
29 #undef page_list_entry
30 struct page_list_entry
31 {
32 __mfn_t next, prev;
33 };
35 struct page_info
36 {
37 union {
38 /* Each frame can be threaded onto a doubly-linked list.
39 *
40 * For unused shadow pages, a list of pages of this order; for
41 * pinnable shadows, if pinned, a list of other pinned shadows
42 * (see sh_type_is_pinnable() below for the definition of
43 * "pinnable" shadow types).
44 */
45 struct page_list_entry list;
46 /* For non-pinnable shadows, a higher entry that points at us. */
47 paddr_t up;
48 };
50 /* Reference count and various PGC_xxx flags and fields. */
51 unsigned long count_info;
53 /* Context-dependent fields follow... */
54 union {
56 /* Page is in use: ((count_info & PGC_count_mask) != 0). */
57 struct {
58 /* Type reference count and various PGT_xxx flags and fields. */
59 unsigned long type_info;
60 } inuse;
62 /* Page is in use as a shadow: count_info == 0. */
63 struct {
64 unsigned long type:5; /* What kind of shadow is this? */
65 unsigned long pinned:1; /* Is the shadow pinned? */
66 unsigned long count:26; /* Reference count */
67 } sh;
69 /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
70 struct {
71 /* Do TLBs need flushing for safety before next page use? */
72 bool_t need_tlbflush;
73 } free;
75 } u;
77 union {
79 /* Page is in use, but not as a shadow. */
80 struct {
81 /* Owner of this page (NULL if page is anonymous). */
82 u32 _domain; /* pickled format */
83 } inuse;
85 /* Page is in use as a shadow. */
86 struct {
87 /* GMFN of guest page we're a shadow of. */
88 __mfn_t back;
89 } sh;
91 /* Page is on a free list (including shadow code free lists). */
92 struct {
93 /* Order-size of the free chunk this page is the head of. */
94 unsigned int order;
95 } free;
97 } v;
99 union {
100 /*
101 * Timestamp from 'TLB clock', used to avoid extra safety flushes.
102 * Only valid for: a) free pages, and b) pages with zero type count
103 * (except page table pages when the guest is in shadow mode).
104 */
105 u32 tlbflush_timestamp;
107 /*
108 * When PGT_partial is true then this field is valid and indicates
109 * that PTEs in the range [0, @nr_validated_ptes) have been validated.
110 * An extra page reference must be acquired (or not dropped) whenever
111 * PGT_partial gets set, and it must be dropped when the flag gets
112 * cleared. This is so that a get() leaving a page in partially
113 * validated state (where the caller would drop the reference acquired
114 * due to the getting of the type [apparently] failing [-EAGAIN])
115 * would not accidentally result in a page left with zero general
116 * reference count, but non-zero type reference count (possible when
117 * the partial get() is followed immediately by domain destruction).
118 * Likewise, the ownership of the single type reference for partially
119 * (in-)validated pages is tied to this flag, i.e. the instance
120 * setting the flag must not drop that reference, whereas the instance
121 * clearing it will have to.
122 *
123 * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has
124 * been partially validated. This implies that the general reference
125 * to the page (acquired from get_page_from_lNe()) would be dropped
126 * (again due to the apparent failure) and hence must be re-acquired
127 * when resuming the validation, but must not be dropped when picking
128 * up the page for invalidation.
129 *
130 * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has
131 * been partially invalidated. This is basically the opposite case of
132 * above, i.e. the general reference to the page was not dropped in
133 * put_page_from_lNe() (due to the apparent failure), and hence it
134 * must be dropped when the put operation is resumed (and completes),
135 * but it must not be acquired if picking up the page for validation.
136 */
137 struct {
138 u16 nr_validated_ptes;
139 s8 partial_pte;
140 };
142 /*
143 * Guest pages with a shadow. This does not conflict with
144 * tlbflush_timestamp since page table pages are explicitly not
145 * tracked for TLB-flush avoidance when a guest runs in shadow mode.
146 */
147 u32 shadow_flags;
149 /* When in use as a shadow, next shadow in this hash chain. */
150 __mfn_t next_shadow;
151 };
152 };
154 #undef __mfn_t
156 #define PG_shift(idx) (BITS_PER_LONG - (idx))
157 #define PG_mask(x, idx) (x ## UL << PG_shift(idx))
159 /* The following page types are MUTUALLY EXCLUSIVE. */
160 #define PGT_none PG_mask(0, 3) /* no special uses of this page */
161 #define PGT_l1_page_table PG_mask(1, 3) /* using as an L1 page table? */
162 #define PGT_l2_page_table PG_mask(2, 3) /* using as an L2 page table? */
163 #define PGT_l3_page_table PG_mask(3, 3) /* using as an L3 page table? */
164 #define PGT_l4_page_table PG_mask(4, 3) /* using as an L4 page table? */
165 #define PGT_seg_desc_page PG_mask(5, 3) /* using this page in a GDT/LDT? */
166 #define PGT_writable_page PG_mask(7, 3) /* has writable mappings? */
167 #define PGT_type_mask PG_mask(7, 3) /* Bits 29-31. */
169 /* Owning guest has pinned this page to its current type? */
170 #define _PGT_pinned PG_shift(4)
171 #define PGT_pinned PG_mask(1, 4)
172 /* Has this page been validated for use as its current type? */
173 #define _PGT_validated PG_shift(5)
174 #define PGT_validated PG_mask(1, 5)
175 /* PAE only: is this an L2 page directory containing Xen-private mappings? */
176 #define _PGT_pae_xen_l2 PG_shift(6)
177 #define PGT_pae_xen_l2 PG_mask(1, 6)
178 /* Has this page been *partially* validated for use as its current type? */
179 #define _PGT_partial PG_shift(7)
180 #define PGT_partial PG_mask(1, 7)
181 /* Page is locked? */
182 #define _PGT_locked PG_shift(8)
183 #define PGT_locked PG_mask(1, 8)
185 /* Count of uses of this frame as its current type. */
186 #define PGT_count_width PG_shift(8)
187 #define PGT_count_mask ((1UL<<PGT_count_width)-1)
189 /* Cleared when the owning guest 'frees' this page. */
190 #define _PGC_allocated PG_shift(1)
191 #define PGC_allocated PG_mask(1, 1)
192 /* Page is Xen heap? */
193 #define _PGC_xen_heap PG_shift(2)
194 #define PGC_xen_heap PG_mask(1, 2)
195 /* Set when is using a page as a page table */
196 #define _PGC_page_table PG_shift(3)
197 #define PGC_page_table PG_mask(1, 3)
198 /* 3-bit PAT/PCD/PWT cache-attribute hint. */
199 #define PGC_cacheattr_base PG_shift(6)
200 #define PGC_cacheattr_mask PG_mask(7, 6)
201 /* Page is broken? */
202 #define _PGC_broken PG_shift(7)
203 #define PGC_broken PG_mask(1, 7)
204 /* Page is offline pending ? */
205 #define _PGC_offlining PG_shift(8)
206 #define PGC_offlining PG_mask(1, 8)
207 /* Page is offlined */
208 #define _PGC_offlined PG_shift(9)
209 #define PGC_offlined PG_mask(1, 9)
210 #define PGC_offlined_broken (PGC_offlined | PGC_broken)
212 /* Count of references to this frame. */
213 #define PGC_count_width PG_shift(9)
214 #define PGC_count_mask ((1UL<<PGC_count_width)-1)
216 #define is_page_offlining(page) ((page)->count_info & PGC_offlining)
217 #define is_page_offlined(page) ((page)->count_info & PGC_offlined)
218 #define is_page_broken(page) ((page)->count_info & PGC_broken)
219 #define is_page_online(page) (!is_page_offlined(page))
221 #if defined(__i386__)
222 #define is_xen_heap_page(page) is_xen_heap_mfn(page_to_mfn(page))
223 #define is_xen_heap_mfn(mfn) ({ \
224 unsigned long _mfn = (mfn); \
225 (_mfn < paddr_to_pfn(xenheap_phys_end)); \
226 })
227 #else
228 extern unsigned long allocator_bitmap_end;
229 #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
230 #define is_xen_heap_mfn(mfn) \
231 (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn)))
232 #define is_xen_fixed_mfn(mfn) \
233 ( (mfn << PAGE_SHIFT) >= __pa(&_start) && \
234 (mfn << PAGE_SHIFT) <= allocator_bitmap_end )
235 #endif
237 #if defined(__i386__)
238 #define PRtype_info "08lx" /* should only be used for printk's */
239 #elif defined(__x86_64__)
240 #define PRtype_info "016lx"/* should only be used for printk's */
241 #endif
243 /* The order of the largest allocation unit we use for shadow pages */
244 #define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
246 /* The number of out-of-sync shadows we allow per vcpu (prime, please) */
247 #define SHADOW_OOS_PAGES 3
249 /* OOS fixup entries */
250 #define SHADOW_OOS_FIXUPS 2
252 #define page_get_owner(_p) \
253 ((struct domain *)((_p)->v.inuse._domain ? \
254 mfn_to_virt((_p)->v.inuse._domain) : NULL))
255 #define page_set_owner(_p,_d) \
256 ((_p)->v.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
258 #define maddr_get_owner(ma) (page_get_owner(maddr_to_page((ma))))
259 #define vaddr_get_owner(va) (page_get_owner(virt_to_page((va))))
261 #define XENSHARE_writable 0
262 #define XENSHARE_readonly 1
263 extern void share_xen_page_with_guest(
264 struct page_info *page, struct domain *d, int readonly);
265 extern void share_xen_page_with_privileged_guests(
266 struct page_info *page, int readonly);
268 extern struct page_info *frame_table;
269 extern unsigned long max_page;
270 extern unsigned long total_pages;
271 void init_frametable(void);
273 int free_page_type(struct page_info *page, unsigned long type,
274 int preemptible);
275 int _shadow_mode_refcounts(struct domain *d);
277 void cleanup_page_cacheattr(struct page_info *page);
279 int is_iomem_page(unsigned long mfn);
281 struct domain *page_get_owner_and_reference(struct page_info *page);
282 void put_page(struct page_info *page);
283 int get_page(struct page_info *page, struct domain *domain);
284 void put_page_type(struct page_info *page);
285 int get_page_type(struct page_info *page, unsigned long type);
286 int put_page_type_preemptible(struct page_info *page);
287 int get_page_type_preemptible(struct page_info *page, unsigned long type);
288 int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
289 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
291 static inline void put_page_and_type(struct page_info *page)
292 {
293 put_page_type(page);
294 put_page(page);
295 }
297 static inline int put_page_and_type_preemptible(struct page_info *page,
298 int preemptible)
299 {
300 int rc = 0;
302 if ( preemptible )
303 rc = put_page_type_preemptible(page);
304 else
305 put_page_type(page);
306 if ( likely(rc == 0) )
307 put_page(page);
308 return rc;
309 }
311 static inline int get_page_and_type(struct page_info *page,
312 struct domain *domain,
313 unsigned long type)
314 {
315 int rc = get_page(page, domain);
317 if ( likely(rc) && unlikely(!get_page_type(page, type)) )
318 {
319 put_page(page);
320 rc = 0;
321 }
323 return rc;
324 }
326 #define ASSERT_PAGE_IS_TYPE(_p, _t) \
327 ASSERT(((_p)->u.inuse.type_info & PGT_type_mask) == (_t)); \
328 ASSERT(((_p)->u.inuse.type_info & PGT_count_mask) != 0)
329 #define ASSERT_PAGE_IS_DOMAIN(_p, _d) \
330 ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
331 ASSERT(page_get_owner(_p) == (_d))
333 // Quick test for whether a given page can be represented directly in CR3.
334 //
335 #if CONFIG_PAGING_LEVELS == 3
336 #define MFN_FITS_IN_CR3(_MFN) !(mfn_x(_MFN) >> 20)
338 /* returns a lowmem machine address of the copied L3 root table */
339 unsigned long
340 pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab);
341 #endif /* CONFIG_PAGING_LEVELS == 3 */
343 int check_descriptor(const struct domain *, struct desc_struct *d);
345 extern int opt_allow_hugepage;
347 /******************************************************************************
348 * With shadow pagetables, the different kinds of address start
349 * to get get confusing.
350 *
351 * Virtual addresses are what they usually are: the addresses that are used
352 * to accessing memory while the guest is running. The MMU translates from
353 * virtual addresses to machine addresses.
354 *
355 * (Pseudo-)physical addresses are the abstraction of physical memory the
356 * guest uses for allocation and so forth. For the purposes of this code,
357 * we can largely ignore them.
358 *
359 * Guest frame numbers (gfns) are the entries that the guest puts in its
360 * pagetables. For normal paravirtual guests, they are actual frame numbers,
361 * with the translation done by the guest.
362 *
363 * Machine frame numbers (mfns) are the entries that the hypervisor puts
364 * in the shadow page tables.
365 *
366 * Elsewhere in the xen code base, the name "gmfn" is generally used to refer
367 * to a "machine frame number, from the guest's perspective", or in other
368 * words, pseudo-physical frame numbers. However, in the shadow code, the
369 * term "gmfn" means "the mfn of a guest page"; this combines naturally with
370 * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a
371 * guest L2 page), etc...
372 */
374 /* With this defined, we do some ugly things to force the compiler to
375 * give us type safety between mfns and gfns and other integers.
376 * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions
377 * that translate beween int and foo_t.
378 *
379 * It does have some performance cost because the types now have
380 * a different storage attribute, so may not want it on all the time. */
382 #ifndef NDEBUG
383 #define TYPE_SAFETY 1
384 #endif
386 #ifdef TYPE_SAFETY
387 #define TYPE_SAFE(_type,_name) \
388 typedef struct { _type _name; } _name##_t; \
389 static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \
390 static inline _type _name##_x(_name##_t n) { return n._name; }
391 #else
392 #define TYPE_SAFE(_type,_name) \
393 typedef _type _name##_t; \
394 static inline _name##_t _##_name(_type n) { return n; } \
395 static inline _type _name##_x(_name##_t n) { return n; }
396 #endif
398 TYPE_SAFE(unsigned long,mfn);
400 /* Macro for printk formats: use as printk("%"PRI_mfn"\n", mfn_x(foo)); */
401 #define PRI_mfn "05lx"
404 /*
405 * The MPT (machine->physical mapping table) is an array of word-sized
406 * values, indexed on machine frame number. It is expected that guest OSes
407 * will use it to store a "physical" frame number to give the appearance of
408 * contiguous (or near contiguous) physical memory.
409 */
410 #undef machine_to_phys_mapping
411 #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
412 #define INVALID_M2P_ENTRY (~0UL)
413 #define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1))))
415 #ifdef CONFIG_COMPAT
416 #define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START)
417 #define set_gpfn_from_mfn(mfn, pfn) \
418 ((void)((mfn) >= (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 || \
419 (compat_machine_to_phys_mapping[(mfn)] = (unsigned int)(pfn))), \
420 machine_to_phys_mapping[(mfn)] = (pfn))
421 #else
422 #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
423 #endif
424 #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)])
426 #define mfn_to_gmfn(_d, mfn) \
427 ( (paging_mode_translate(_d)) \
428 ? get_gpfn_from_mfn(mfn) \
429 : (mfn) )
431 #define INVALID_MFN (~0UL)
433 #ifdef CONFIG_COMPAT
434 #define compat_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
435 #define compat_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
436 #endif
438 #ifdef MEMORY_GUARD
439 void memguard_init(void);
440 void memguard_guard_range(void *p, unsigned long l);
441 void memguard_unguard_range(void *p, unsigned long l);
442 #else
443 #define memguard_init() ((void)0)
444 #define memguard_guard_range(_p,_l) ((void)0)
445 #define memguard_unguard_range(_p,_l) ((void)0)
446 #endif
448 void memguard_guard_stack(void *p);
450 int ptwr_do_page_fault(struct vcpu *, unsigned long,
451 struct cpu_user_regs *);
453 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
455 #ifndef NDEBUG
457 #define AUDIT_SHADOW_ALREADY_LOCKED ( 1u << 0 )
458 #define AUDIT_ERRORS_OK ( 1u << 1 )
459 #define AUDIT_QUIET ( 1u << 2 )
461 void _audit_domain(struct domain *d, int flags);
462 #define audit_domain(_d) _audit_domain((_d), AUDIT_ERRORS_OK)
463 void audit_domains(void);
465 #else
467 #define _audit_domain(_d, _f) ((void)0)
468 #define audit_domain(_d) ((void)0)
469 #define audit_domains() ((void)0)
471 #endif
473 int new_guest_cr3(unsigned long pfn);
474 void make_cr3(struct vcpu *v, unsigned long mfn);
475 void update_cr3(struct vcpu *v);
476 void propagate_page_fault(unsigned long addr, u16 error_code);
478 int __sync_lazy_execstate(void);
480 /* Arch-specific portion of memory_op hypercall. */
481 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
482 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
483 #ifdef CONFIG_COMPAT
484 int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void));
485 int compat_subarch_memory_op(int op, XEN_GUEST_HANDLE(void));
486 #endif
488 int steal_page(
489 struct domain *d, struct page_info *page, unsigned int memflags);
491 int map_ldt_shadow_page(unsigned int);
493 #ifdef CONFIG_COMPAT
494 void domain_set_alloc_bitsize(struct domain *d);
495 unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits);
496 #else
497 # define domain_set_alloc_bitsize(d) ((void)0)
498 # define domain_clamp_alloc_bitsize(d, b) (b)
499 #endif
501 unsigned long domain_get_maximum_gpfn(struct domain *d);
503 extern struct domain *dom_xen, *dom_io; /* for vmcoreinfo */
505 #endif /* __ASM_X86_MM_H__ */