ia64/xen-unstable

view xen/include/asm-x86/mm.h @ 14107:1e5a83fb928b

xen memory allocator: Allow per-domain bitwidth restrictions.
Original patch by Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author Keir Fraser <keir@xensource.com>
date Sat Feb 24 13:57:34 2007 +0000 (2007-02-24)
parents 6daa91dc9247
children 36e6f85cd572
line source
2 #ifndef __ASM_X86_MM_H__
3 #define __ASM_X86_MM_H__
5 #include <xen/config.h>
6 #include <xen/cpumask.h>
7 #include <xen/list.h>
8 #include <asm/io.h>
9 #include <asm/uaccess.h>
11 /*
12 * Per-page-frame information.
13 *
14 * Every architecture must ensure the following:
15 * 1. 'struct page_info' contains a 'struct list_head list'.
16 * 2. Provide a PFN_ORDER() macro for accessing the order of a free page.
17 */
18 #define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
20 struct page_info
21 {
22 /* Each frame can be threaded onto a doubly-linked list. */
23 struct list_head list;
25 /* Reference count and various PGC_xxx flags and fields. */
26 u32 count_info;
28 /* Context-dependent fields follow... */
29 union {
31 /* Page is in use: ((count_info & PGC_count_mask) != 0). */
32 struct {
33 /* Owner of this page (NULL if page is anonymous). */
34 u32 _domain; /* pickled format */
35 /* Type reference count and various PGT_xxx flags and fields. */
36 unsigned long type_info;
37 } __attribute__ ((packed)) inuse;
39 /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
40 struct {
41 /* Order-size of the free chunk this page is the head of. */
42 u32 order;
43 /* Mask of possibly-tainted TLBs. */
44 cpumask_t cpumask;
45 } __attribute__ ((packed)) free;
47 } u;
49 union {
50 /*
51 * Timestamp from 'TLB clock', used to avoid extra safety flushes.
52 * Only valid for: a) free pages, and b) pages with zero type count
53 * (except page table pages when the guest is in shadow mode).
54 */
55 u32 tlbflush_timestamp;
57 /*
58 * Guest pages with a shadow. This does not conflict with
59 * tlbflush_timestamp since page table pages are explicitly not
60 * tracked for TLB-flush avoidance when a guest runs in shadow mode.
61 */
62 unsigned long shadow_flags;
63 };
64 };
66 /* The following page types are MUTUALLY EXCLUSIVE. */
67 #define PGT_none (0U<<29) /* no special uses of this page */
68 #define PGT_l1_page_table (1U<<29) /* using this page as an L1 page table? */
69 #define PGT_l2_page_table (2U<<29) /* using this page as an L2 page table? */
70 #define PGT_l3_page_table (3U<<29) /* using this page as an L3 page table? */
71 #define PGT_l4_page_table (4U<<29) /* using this page as an L4 page table? */
72 #define PGT_gdt_page (5U<<29) /* using this page in a GDT? */
73 #define PGT_ldt_page (6U<<29) /* using this page in an LDT? */
74 #define PGT_writable_page (7U<<29) /* has writable mappings of this page? */
75 #define PGT_type_mask (7U<<29) /* Bits 29-31. */
77 /* Owning guest has pinned this page to its current type? */
78 #define _PGT_pinned 28
79 #define PGT_pinned (1U<<_PGT_pinned)
80 /* Has this page been validated for use as its current type? */
81 #define _PGT_validated 27
82 #define PGT_validated (1U<<_PGT_validated)
83 /* PAE only: is this an L2 page directory containing Xen-private mappings? */
84 #define _PGT_pae_xen_l2 26
85 #define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2)
87 /* 16-bit count of uses of this frame as its current type. */
88 #define PGT_count_mask ((1U<<16)-1)
90 /* Cleared when the owning guest 'frees' this page. */
91 #define _PGC_allocated 31
92 #define PGC_allocated (1U<<_PGC_allocated)
93 /* Set on a *guest* page to mark it out-of-sync with its shadow */
94 #define _PGC_out_of_sync 30
95 #define PGC_out_of_sync (1U<<_PGC_out_of_sync)
96 /* Set when is using a page as a page table */
97 #define _PGC_page_table 29
98 #define PGC_page_table (1U<<_PGC_page_table)
99 /* 29-bit count of references to this frame. */
100 #define PGC_count_mask ((1U<<29)-1)
102 /* We trust the slab allocator in slab.c, and our use of it. */
103 #define PageSlab(page) (1)
104 #define PageSetSlab(page) ((void)0)
105 #define PageClearSlab(page) ((void)0)
107 #define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
109 #if defined(__i386__)
110 #define pickle_domptr(_d) ((u32)(unsigned long)(_d))
111 static inline struct domain *unpickle_domptr(u32 _domain)
112 { return (_domain & 1) ? NULL : (void *)_domain; }
113 #define PRtype_info "08lx" /* should only be used for printk's */
114 #elif defined(__x86_64__)
115 static inline struct domain *unpickle_domptr(u32 _domain)
116 { return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
117 static inline u32 pickle_domptr(struct domain *domain)
118 { return (domain == NULL) ? 0 : (u32)__pa(domain); }
119 #define PRtype_info "016lx"/* should only be used for printk's */
120 #endif
122 /* The order of the largest allocation unit we use for shadow pages */
123 #if CONFIG_PAGING_LEVELS == 2
124 #define SHADOW_MAX_ORDER 0 /* Only ever need 4k allocations */
125 #else
126 #define SHADOW_MAX_ORDER 2 /* Need up to 16k allocs for 32-bit on PAE/64 */
127 #endif
129 #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
130 #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
132 #define XENSHARE_writable 0
133 #define XENSHARE_readonly 1
134 extern void share_xen_page_with_guest(
135 struct page_info *page, struct domain *d, int readonly);
136 extern void share_xen_page_with_privileged_guests(
137 struct page_info *page, int readonly);
139 extern struct page_info *frame_table;
140 extern unsigned long max_page;
141 extern unsigned long total_pages;
142 void init_frametable(void);
144 int alloc_page_type(struct page_info *page, unsigned long type);
145 void free_page_type(struct page_info *page, unsigned long type);
146 void invalidate_shadow_ldt(struct vcpu *d);
147 int _shadow_mode_refcounts(struct domain *d);
149 static inline void put_page(struct page_info *page)
150 {
151 u32 nx, x, y = page->count_info;
153 do {
154 x = y;
155 nx = x - 1;
156 }
157 while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
159 if ( unlikely((nx & PGC_count_mask) == 0) )
160 free_domheap_page(page);
161 }
164 static inline int get_page(struct page_info *page,
165 struct domain *domain)
166 {
167 u32 x, nx, y = page->count_info;
168 u32 d, nd = page->u.inuse._domain;
169 u32 _domain = pickle_domptr(domain);
171 do {
172 x = y;
173 nx = x + 1;
174 d = nd;
175 if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
176 unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
177 unlikely(d != _domain) ) /* Wrong owner? */
178 {
179 if ( !_shadow_mode_refcounts(domain) )
180 gdprintk(XENLOG_INFO,
181 "Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
182 PRtype_info "\n",
183 page_to_mfn(page), domain, unpickle_domptr(d),
184 x, page->u.inuse.type_info);
185 return 0;
186 }
187 __asm__ __volatile__(
188 LOCK_PREFIX "cmpxchg8b %3"
189 : "=d" (nd), "=a" (y), "=c" (d),
190 "=m" (*(volatile u64 *)(&page->count_info))
191 : "0" (d), "1" (x), "c" (d), "b" (nx) );
192 }
193 while ( unlikely(nd != d) || unlikely(y != x) );
195 return 1;
196 }
198 void put_page_type(struct page_info *page);
199 int get_page_type(struct page_info *page, unsigned long type);
200 int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
201 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
203 static inline void put_page_and_type(struct page_info *page)
204 {
205 put_page_type(page);
206 put_page(page);
207 }
210 static inline int get_page_and_type(struct page_info *page,
211 struct domain *domain,
212 unsigned long type)
213 {
214 int rc = get_page(page, domain);
216 if ( likely(rc) && unlikely(!get_page_type(page, type)) )
217 {
218 put_page(page);
219 rc = 0;
220 }
222 return rc;
223 }
225 static inline int page_is_removable(struct page_info *page)
226 {
227 return ((page->count_info & PGC_count_mask) == 1);
228 }
230 #define ASSERT_PAGE_IS_TYPE(_p, _t) \
231 ASSERT(((_p)->u.inuse.type_info & PGT_type_mask) == (_t)); \
232 ASSERT(((_p)->u.inuse.type_info & PGT_count_mask) != 0)
233 #define ASSERT_PAGE_IS_DOMAIN(_p, _d) \
234 ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
235 ASSERT(page_get_owner(_p) == (_d))
237 // Quick test for whether a given page can be represented directly in CR3.
238 //
239 #if CONFIG_PAGING_LEVELS == 3
240 #define MFN_FITS_IN_CR3(_MFN) !(mfn_x(_MFN) >> 20)
242 /* returns a lowmem machine address of the copied L3 root table */
243 unsigned long
244 pae_copy_root(struct vcpu *v, l3_pgentry_t *l3tab);
245 #endif /* CONFIG_PAGING_LEVELS == 3 */
247 int check_descriptor(const struct domain *, struct desc_struct *d);
250 /******************************************************************************
251 * With shadow pagetables, the different kinds of address start
252 * to get get confusing.
253 *
254 * Virtual addresses are what they usually are: the addresses that are used
255 * to accessing memory while the guest is running. The MMU translates from
256 * virtual addresses to machine addresses.
257 *
258 * (Pseudo-)physical addresses are the abstraction of physical memory the
259 * guest uses for allocation and so forth. For the purposes of this code,
260 * we can largely ignore them.
261 *
262 * Guest frame numbers (gfns) are the entries that the guest puts in its
263 * pagetables. For normal paravirtual guests, they are actual frame numbers,
264 * with the translation done by the guest.
265 *
266 * Machine frame numbers (mfns) are the entries that the hypervisor puts
267 * in the shadow page tables.
268 *
269 * Elsewhere in the xen code base, the name "gmfn" is generally used to refer
270 * to a "machine frame number, from the guest's perspective", or in other
271 * words, pseudo-physical frame numbers. However, in the shadow code, the
272 * term "gmfn" means "the mfn of a guest page"; this combines naturally with
273 * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a
274 * guest L2 page), etc...
275 */
277 /* With this defined, we do some ugly things to force the compiler to
278 * give us type safety between mfns and gfns and other integers.
279 * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions
280 * that translate beween int and foo_t.
281 *
282 * It does have some performance cost because the types now have
283 * a different storage attribute, so may not want it on all the time. */
285 #ifndef NDEBUG
286 #define TYPE_SAFETY 1
287 #endif
289 #ifdef TYPE_SAFETY
290 #define TYPE_SAFE(_type,_name) \
291 typedef struct { _type _name; } _name##_t; \
292 static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \
293 static inline _type _name##_x(_name##_t n) { return n._name; }
294 #else
295 #define TYPE_SAFE(_type,_name) \
296 typedef _type _name##_t; \
297 static inline _name##_t _##_name(_type n) { return n; } \
298 static inline _type _name##_x(_name##_t n) { return n; }
299 #endif
301 TYPE_SAFE(unsigned long,mfn);
303 /* Macro for printk formats: use as printk("%"PRI_mfn"\n", mfn_x(foo)); */
304 #define PRI_mfn "05lx"
307 /*
308 * The MPT (machine->physical mapping table) is an array of word-sized
309 * values, indexed on machine frame number. It is expected that guest OSes
310 * will use it to store a "physical" frame number to give the appearance of
311 * contiguous (or near contiguous) physical memory.
312 */
313 #undef machine_to_phys_mapping
314 #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
315 #define INVALID_M2P_ENTRY (~0UL)
316 #define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1))))
318 #ifdef CONFIG_COMPAT
319 #define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START)
320 #define set_gpfn_from_mfn(mfn, pfn) \
321 ((void)(compat_disabled || \
322 (mfn) >= (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 || \
323 (compat_machine_to_phys_mapping[(mfn)] = (unsigned int)(pfn))), \
324 machine_to_phys_mapping[(mfn)] = (pfn))
325 #else
326 #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
327 #endif
328 #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)])
330 #define mfn_to_gmfn(_d, mfn) \
331 ( (paging_mode_translate(_d)) \
332 ? get_gpfn_from_mfn(mfn) \
333 : (mfn) )
335 #define gmfn_to_mfn(_d, gpfn) mfn_x(gfn_to_mfn(_d, gpfn))
337 #define INVALID_MFN (~0UL)
339 #ifdef CONFIG_COMPAT
340 #define compat_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
341 #define compat_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
342 #endif
344 #ifdef MEMORY_GUARD
345 void memguard_init(void);
346 void memguard_guard_range(void *p, unsigned long l);
347 void memguard_unguard_range(void *p, unsigned long l);
348 #else
349 #define memguard_init() ((void)0)
350 #define memguard_guard_range(_p,_l) ((void)0)
351 #define memguard_unguard_range(_p,_l) ((void)0)
352 #endif
354 void memguard_guard_stack(void *p);
356 int ptwr_do_page_fault(struct vcpu *, unsigned long,
357 struct cpu_user_regs *);
359 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
361 #ifndef NDEBUG
363 #define AUDIT_SHADOW_ALREADY_LOCKED ( 1u << 0 )
364 #define AUDIT_ERRORS_OK ( 1u << 1 )
365 #define AUDIT_QUIET ( 1u << 2 )
367 void _audit_domain(struct domain *d, int flags);
368 #define audit_domain(_d) _audit_domain((_d), AUDIT_ERRORS_OK)
369 void audit_domains(void);
371 #else
373 #define _audit_domain(_d, _f) ((void)0)
374 #define audit_domain(_d) ((void)0)
375 #define audit_domains() ((void)0)
377 #endif
379 int new_guest_cr3(unsigned long pfn);
380 void make_cr3(struct vcpu *v, unsigned long mfn);
381 void update_cr3(struct vcpu *v);
382 void propagate_page_fault(unsigned long addr, u16 error_code);
384 int __sync_lazy_execstate(void);
386 /* Arch-specific portion of memory_op hypercall. */
387 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
388 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
389 #ifdef CONFIG_COMPAT
390 int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void));
391 int compat_subarch_memory_op(int op, XEN_GUEST_HANDLE(void));
392 #endif
394 int steal_page(
395 struct domain *d, struct page_info *page, unsigned int memflags);
397 int map_ldt_shadow_page(unsigned int);
399 #ifdef CONFIG_COMPAT
400 int setup_arg_xlat_area(struct vcpu *, l4_pgentry_t *);
401 unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits);
402 #else
403 # define setup_arg_xlat_area(vcpu, l4tab) 0
404 # define domain_clamp_alloc_bitsize(d, b) (b)
405 #endif
408 #endif /* __ASM_X86_MM_H__ */