ia64/xen-unstable

view xen/include/asm-x86/mm.h @ 6552:a9873d384da4

Merge.
author adsharma@los-vmm.sc.intel.com
date Thu Aug 25 12:24:48 2005 -0700 (2005-08-25)
parents 112d44270733 fa0754a9f64f
children dfaf788ab18c
line source
2 #ifndef __ASM_X86_MM_H__
3 #define __ASM_X86_MM_H__
5 #include <xen/config.h>
6 #include <xen/cpumask.h>
7 #include <xen/list.h>
8 #include <asm/io.h>
9 #include <asm/uaccess.h>
11 /*
12 * Per-page-frame information.
13 *
14 * Every architecture must ensure the following:
15 * 1. 'struct pfn_info' contains a 'struct list_head list'.
16 * 2. Provide a PFN_ORDER() macro for accessing the order of a free page.
17 */
18 #define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
20 struct pfn_info
21 {
22 /* Each frame can be threaded onto a doubly-linked list. */
23 struct list_head list;
25 /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
26 u32 tlbflush_timestamp;
28 /* Reference count and various PGC_xxx flags and fields. */
29 u32 count_info;
31 /* Context-dependent fields follow... */
32 union {
34 /* Page is in use: ((count_info & PGC_count_mask) != 0). */
35 struct {
36 /* Owner of this page (NULL if page is anonymous). */
37 u32 _domain; /* pickled format */
38 /* Type reference count and various PGT_xxx flags and fields. */
39 unsigned long type_info;
40 } inuse;
42 /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
43 struct {
44 /* Mask of possibly-tainted TLBs. */
45 cpumask_t cpumask;
46 /* Order-size of the free chunk this page is the head of. */
47 u8 order;
48 } free;
50 } u;
51 };
53 /* The following page types are MUTUALLY EXCLUSIVE. */
54 #define PGT_none (0U<<29) /* no special uses of this page */
55 #define PGT_l1_page_table (1U<<29) /* using this page as an L1 page table? */
56 #define PGT_l2_page_table (2U<<29) /* using this page as an L2 page table? */
57 #define PGT_l3_page_table (3U<<29) /* using this page as an L3 page table? */
58 #define PGT_l4_page_table (4U<<29) /* using this page as an L4 page table? */
59 #define PGT_gdt_page (5U<<29) /* using this page in a GDT? */
60 #define PGT_ldt_page (6U<<29) /* using this page in an LDT? */
61 #define PGT_writable_page (7U<<29) /* has writable mappings of this page? */
63 #define PGT_l1_shadow PGT_l1_page_table
64 #define PGT_l2_shadow PGT_l2_page_table
65 #define PGT_l3_shadow PGT_l3_page_table
66 #define PGT_l4_shadow PGT_l4_page_table
67 #define PGT_hl2_shadow (5U<<29)
68 #define PGT_snapshot (6U<<29)
69 #define PGT_writable_pred (7U<<29) /* predicted gpfn with writable ref */
71 #define PGT_fl1_shadow (5U<<29)
72 #define PGT_type_mask (7U<<29) /* Bits 29-31. */
74 /* Has this page been validated for use as its current type? */
75 #define _PGT_validated 28
76 #define PGT_validated (1U<<_PGT_validated)
77 /* Owning guest has pinned this page to its current type? */
78 #define _PGT_pinned 27
79 #define PGT_pinned (1U<<_PGT_pinned)
80 #if defined(__i386__)
81 /* The 11 most significant bits of virt address if this is a page table. */
82 #define PGT_va_shift 16
83 #define PGT_va_mask (((1U<<11)-1)<<PGT_va_shift)
84 /* Is the back pointer still mutable (i.e. not fixed yet)? */
85 #define PGT_va_mutable (((1U<<11)-1)<<PGT_va_shift)
86 /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
87 #define PGT_va_unknown (((1U<<11)-2)<<PGT_va_shift)
88 #elif defined(__x86_64__)
89 /* The 27 most significant bits of virt address if this is a page table. */
90 #define PGT_va_shift 32
91 #define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
92 /* Is the back pointer still mutable (i.e. not fixed yet)? */
93 #define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
94 /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
95 #define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
96 #endif
98 /* 16-bit count of uses of this frame as its current type. */
99 #define PGT_count_mask ((1U<<16)-1)
101 #define PGT_mfn_mask ((1U<<20)-1) /* mfn mask for shadow types */
103 #define PGT_score_shift 20
104 #define PGT_score_mask (((1U<<4)-1)<<PGT_score_shift)
106 /* Cleared when the owning guest 'frees' this page. */
107 #define _PGC_allocated 31
108 #define PGC_allocated (1U<<_PGC_allocated)
109 /* Set when fullshadow mode marks a page out-of-sync */
110 #define _PGC_out_of_sync 30
111 #define PGC_out_of_sync (1U<<_PGC_out_of_sync)
112 /* Set when fullshadow mode is using a page as a page table */
113 #define _PGC_page_table 29
114 #define PGC_page_table (1U<<_PGC_page_table)
115 /* 29-bit count of references to this frame. */
116 #define PGC_count_mask ((1U<<29)-1)
118 /* We trust the slab allocator in slab.c, and our use of it. */
119 #define PageSlab(page) (1)
120 #define PageSetSlab(page) ((void)0)
121 #define PageClearSlab(page) ((void)0)
123 #define IS_XEN_HEAP_FRAME(_pfn) (page_to_phys(_pfn) < xenheap_phys_end)
125 #if defined(__i386__)
126 #define pickle_domptr(_d) ((u32)(unsigned long)(_d))
127 #define unpickle_domptr(_d) ((struct domain *)(unsigned long)(_d))
128 #define PRtype_info "08lx" /* should only be used for printk's */
129 #elif defined(__x86_64__)
130 static inline struct domain *unpickle_domptr(u32 _domain)
131 { return (_domain == 0) ? NULL : __va(_domain); }
132 static inline u32 pickle_domptr(struct domain *domain)
133 { return (domain == NULL) ? 0 : (u32)__pa(domain); }
134 #define PRtype_info "016lx"/* should only be used for printk's */
135 #endif
137 #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
138 #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
140 #define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \
141 do { \
142 page_set_owner((_pfn), (_dom)); \
143 /* The incremented type count is intended to pin to 'writable'. */ \
144 (_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; \
145 wmb(); /* install valid domain ptr before updating refcnt. */ \
146 spin_lock(&(_dom)->page_alloc_lock); \
147 /* _dom holds an allocation reference */ \
148 ASSERT((_pfn)->count_info == 0); \
149 (_pfn)->count_info |= PGC_allocated | 1; \
150 if ( unlikely((_dom)->xenheap_pages++ == 0) ) \
151 get_knownalive_domain(_dom); \
152 list_add_tail(&(_pfn)->list, &(_dom)->xenpage_list); \
153 spin_unlock(&(_dom)->page_alloc_lock); \
154 } while ( 0 )
156 extern struct pfn_info *frame_table;
157 extern unsigned long max_page;
158 void init_frametable(void);
160 int alloc_page_type(struct pfn_info *page, unsigned long type);
161 void free_page_type(struct pfn_info *page, unsigned long type);
162 extern void invalidate_shadow_ldt(struct vcpu *d);
163 extern int shadow_remove_all_write_access(
164 struct domain *d, unsigned long gpfn, unsigned long gmfn);
165 extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
166 extern int _shadow_mode_refcounts(struct domain *d);
168 static inline void put_page(struct pfn_info *page)
169 {
170 u32 nx, x, y = page->count_info;
172 do {
173 x = y;
174 nx = x - 1;
175 }
176 while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
178 if ( unlikely((nx & PGC_count_mask) == 0) )
179 free_domheap_page(page);
180 }
183 static inline int get_page(struct pfn_info *page,
184 struct domain *domain)
185 {
186 u32 x, nx, y = page->count_info;
187 u32 d, nd = page->u.inuse._domain;
188 u32 _domain = pickle_domptr(domain);
190 do {
191 x = y;
192 nx = x + 1;
193 d = nd;
194 if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
195 unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
196 unlikely(d != _domain) ) /* Wrong owner? */
197 {
198 if ( !_shadow_mode_refcounts(domain) )
199 DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" PRtype_info "\n",
200 page_to_pfn(page), domain, unpickle_domptr(d),
201 x, page->u.inuse.type_info);
202 return 0;
203 }
204 __asm__ __volatile__(
205 LOCK_PREFIX "cmpxchg8b %3"
206 : "=d" (nd), "=a" (y), "=c" (d),
207 "=m" (*(volatile u64 *)(&page->count_info))
208 : "0" (d), "1" (x), "c" (d), "b" (nx) );
209 }
210 while ( unlikely(nd != d) || unlikely(y != x) );
212 return 1;
213 }
215 void put_page_type(struct pfn_info *page);
216 int get_page_type(struct pfn_info *page, unsigned long type);
217 int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
218 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
220 static inline void put_page_and_type(struct pfn_info *page)
221 {
222 put_page_type(page);
223 put_page(page);
224 }
227 static inline int get_page_and_type(struct pfn_info *page,
228 struct domain *domain,
229 unsigned long type)
230 {
231 int rc = get_page(page, domain);
233 if ( likely(rc) && unlikely(!get_page_type(page, type)) )
234 {
235 put_page(page);
236 rc = 0;
237 }
239 return rc;
240 }
242 #define ASSERT_PAGE_IS_TYPE(_p, _t) \
243 ASSERT(((_p)->u.inuse.type_info & PGT_type_mask) == (_t)); \
244 ASSERT(((_p)->u.inuse.type_info & PGT_count_mask) != 0)
245 #define ASSERT_PAGE_IS_DOMAIN(_p, _d) \
246 ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
247 ASSERT(page_get_owner(_p) == (_d))
249 int check_descriptor(struct desc_struct *d);
251 /*
252 * The MPT (machine->physical mapping table) is an array of word-sized
253 * values, indexed on machine frame number. It is expected that guest OSes
254 * will use it to store a "physical" frame number to give the appearance of
255 * contiguous (or near contiguous) physical memory.
256 */
257 #undef machine_to_phys_mapping
258 #define machine_to_phys_mapping ((u32 *)RDWR_MPT_VIRT_START)
259 #define INVALID_M2P_ENTRY (~0U)
260 #define VALID_M2P(_e) (!((_e) & (1U<<31)))
261 #define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
263 /*
264 * The phys_to_machine_mapping is the reversed mapping of MPT for full
265 * virtualization. It is only used by shadow_mode_translate()==true
266 * guests, so we steal the address space that would have normally
267 * been used by the read-only MPT map.
268 */
269 #define __phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START)
270 #define INVALID_MFN (~0UL)
271 #define VALID_MFN(_mfn) (!((_mfn) & (1U<<31)))
273 /* Returns the machine physical */
274 static inline unsigned long phys_to_machine_mapping(unsigned long pfn)
275 {
276 unsigned long mfn;
277 l1_pgentry_t pte;
279 if ( (__copy_from_user(&pte, &__phys_to_machine_mapping[pfn],
280 sizeof(pte)) == 0) &&
281 (l1e_get_flags(pte) & _PAGE_PRESENT) )
282 mfn = l1e_get_pfn(pte);
283 else
284 mfn = INVALID_MFN;
286 return mfn;
287 }
288 #define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn)
290 #ifdef MEMORY_GUARD
291 void memguard_init(void);
292 void memguard_guard_range(void *p, unsigned long l);
293 void memguard_unguard_range(void *p, unsigned long l);
294 #else
295 #define memguard_init() ((void)0)
296 #define memguard_guard_range(_p,_l) ((void)0)
297 #define memguard_unguard_range(_p,_l) ((void)0)
298 #endif
300 void memguard_guard_stack(void *p);
302 /* Writable Pagetables */
303 struct ptwr_info {
304 /* Linear address where the guest is updating the p.t. page. */
305 unsigned long l1va;
306 /* Copy of the p.t. page, taken before guest is given write access. */
307 l1_pgentry_t *page;
308 /* A temporary Xen mapping of the actual p.t. page. */
309 l1_pgentry_t *pl1e;
310 /* Index in L2 page table where this L1 p.t. is always hooked. */
311 unsigned int l2_idx; /* NB. Only used for PTWR_PT_ACTIVE. */
312 /* Info about last ptwr update batch. */
313 unsigned int prev_nr_updates;
314 /* Exec domain which created writable mapping. */
315 struct vcpu *vcpu;
316 /* EIP of the address which took the original write fault
317 used for stats collection only */
318 unsigned long eip;
319 };
321 #define PTWR_PT_ACTIVE 0
322 #define PTWR_PT_INACTIVE 1
324 #define PTWR_CLEANUP_ACTIVE 1
325 #define PTWR_CLEANUP_INACTIVE 2
327 int ptwr_init(struct domain *);
328 void ptwr_destroy(struct domain *);
329 void ptwr_flush(struct domain *, const int);
330 int ptwr_do_page_fault(struct domain *, unsigned long,
331 struct cpu_user_regs *);
332 int revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
334 void cleanup_writable_pagetable(struct domain *d);
335 #define sync_pagetable_state(d) cleanup_writable_pagetable(d)
337 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
339 #ifndef NDEBUG
341 #define AUDIT_SHADOW_ALREADY_LOCKED ( 1u << 0 )
342 #define AUDIT_ERRORS_OK ( 1u << 1 )
343 #define AUDIT_QUIET ( 1u << 2 )
345 void _audit_domain(struct domain *d, int flags);
346 #define audit_domain(_d) _audit_domain((_d), AUDIT_ERRORS_OK)
347 void audit_domains(void);
349 #else
351 #define _audit_domain(_d, _f) ((void)0)
352 #define audit_domain(_d) ((void)0)
353 #define audit_domains() ((void)0)
355 #endif
357 #ifdef PERF_ARRAYS
359 void ptwr_eip_stat_reset();
360 void ptwr_eip_stat_print();
362 #else
364 #define ptwr_eip_stat_reset() ((void)0)
365 #define ptwr_eip_stat_print() ((void)0)
367 #endif
369 int new_guest_cr3(unsigned long pfn);
371 void propagate_page_fault(unsigned long addr, u16 error_code);
373 /*
374 * Caller must own d's BIGLOCK, is responsible for flushing the TLB, and must
375 * hold a reference to the page.
376 */
377 int update_grant_va_mapping(
378 unsigned long va, l1_pgentry_t _nl1e,
379 struct domain *d, struct vcpu *v);
380 int update_grant_pte_mapping(
381 unsigned long pte_addr, l1_pgentry_t _nl1e,
382 struct domain *d, struct vcpu *v);
383 int clear_grant_va_mapping(unsigned long addr, unsigned long frame);
384 int clear_grant_pte_mapping(
385 unsigned long addr, unsigned long frame, struct domain *d);
387 #endif /* __ASM_X86_MM_H__ */