direct-io.hg

view xen/include/asm-x86/mm.h @ 10749:5fa2cd68d059

[IA64] Fix of C/S 10529:4260eb8c08740de0000081c61a6237ffcb95b2d5 for IA64.
When page is zapped from a domain, the page referenced counter
is checked. But it results in false positive alert on Xen/IA64
because a page 'in use' has reference count 2 on Xen/IA64.
- a page is assigned to guest domain's psudo physical address space.
This is decremented by guest_physmap_remove_page()
- a page is allocated for a domain.
This is decremented by the following put_page()

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author kfraser@localhost.localdomain
date Thu Jul 27 13:17:17 2006 +0100 (2006-07-27)
parents 1507021dccdf
children 9727328c008e
line source
2 #ifndef __ASM_X86_MM_H__
3 #define __ASM_X86_MM_H__
5 #include <xen/config.h>
6 #include <xen/cpumask.h>
7 #include <xen/list.h>
8 #include <asm/io.h>
9 #include <asm/uaccess.h>
11 /*
12 * Per-page-frame information.
13 *
14 * Every architecture must ensure the following:
15 * 1. 'struct page_info' contains a 'struct list_head list'.
16 * 2. Provide a PFN_ORDER() macro for accessing the order of a free page.
17 */
18 #define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
20 struct page_info
21 {
22 /* Each frame can be threaded onto a doubly-linked list. */
23 struct list_head list;
25 /* Reference count and various PGC_xxx flags and fields. */
26 u32 count_info;
28 /* Context-dependent fields follow... */
29 union {
31 /* Page is in use: ((count_info & PGC_count_mask) != 0). */
32 struct {
33 /* Owner of this page (NULL if page is anonymous). */
34 u32 _domain; /* pickled format */
35 /* Type reference count and various PGT_xxx flags and fields. */
36 unsigned long type_info;
37 } __attribute__ ((packed)) inuse;
39 /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
40 struct {
41 /* Order-size of the free chunk this page is the head of. */
42 u32 order;
43 /* Mask of possibly-tainted TLBs. */
44 cpumask_t cpumask;
45 } __attribute__ ((packed)) free;
47 } u;
49 /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
50 u32 tlbflush_timestamp;
51 };
53 /* The following page types are MUTUALLY EXCLUSIVE. */
54 #define PGT_none (0U<<29) /* no special uses of this page */
55 #define PGT_l1_page_table (1U<<29) /* using this page as an L1 page table? */
56 #define PGT_l2_page_table (2U<<29) /* using this page as an L2 page table? */
57 #define PGT_l3_page_table (3U<<29) /* using this page as an L3 page table? */
58 #define PGT_l4_page_table (4U<<29) /* using this page as an L4 page table? */
59 #define PGT_gdt_page (5U<<29) /* using this page in a GDT? */
60 #define PGT_ldt_page (6U<<29) /* using this page in an LDT? */
61 #define PGT_writable_page (7U<<29) /* has writable mappings of this page? */
63 #define PGT_l1_shadow PGT_l1_page_table
64 #define PGT_l2_shadow PGT_l2_page_table
65 #define PGT_l3_shadow PGT_l3_page_table
66 #define PGT_l4_shadow PGT_l4_page_table
67 #define PGT_hl2_shadow (5U<<29)
68 #define PGT_snapshot (6U<<29)
69 #define PGT_writable_pred (7U<<29) /* predicted gpfn with writable ref */
71 #define PGT_fl1_shadow (5U<<29)
72 #define PGT_type_mask (7U<<29) /* Bits 29-31. */
74 /* Has this page been validated for use as its current type? */
75 #define _PGT_validated 28
76 #define PGT_validated (1U<<_PGT_validated)
77 /* Owning guest has pinned this page to its current type? */
78 #define _PGT_pinned 27
79 #define PGT_pinned (1U<<_PGT_pinned)
80 #if defined(__i386__)
81 /* The 11 most significant bits of virt address if this is a page table. */
82 #define PGT_va_shift 16
83 #define PGT_va_mask (((1U<<11)-1)<<PGT_va_shift)
84 /* Is the back pointer still mutable (i.e. not fixed yet)? */
85 #define PGT_va_mutable (((1U<<11)-1)<<PGT_va_shift)
86 /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
87 #define PGT_va_unknown (((1U<<11)-2)<<PGT_va_shift)
88 #elif defined(__x86_64__)
89 /* The 27 most significant bits of virt address if this is a page table. */
90 #define PGT_va_shift 32
91 #define PGT_va_mask ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
92 /* Is the back pointer still mutable (i.e. not fixed yet)? */
93 #define PGT_va_mutable ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
94 /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
95 #define PGT_va_unknown ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
96 #endif
98 /* 16-bit count of uses of this frame as its current type. */
99 #define PGT_count_mask ((1U<<16)-1)
101 #ifdef __x86_64__
102 #define PGT_high_mfn_shift 52
103 #define PGT_high_mfn_mask (0xfffUL << PGT_high_mfn_shift)
104 #define PGT_mfn_mask (((1U<<23)-1) | PGT_high_mfn_mask)
105 #define PGT_high_mfn_nx (0x800UL << PGT_high_mfn_shift)
106 #else
107 /* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */
108 #define PGT_mfn_mask ((1U<<23)-1)
109 /* NX for PAE xen is not supported yet */
110 #define PGT_high_mfn_nx (1ULL << 63)
111 #endif
113 #define PGT_score_shift 23
114 #define PGT_score_mask (((1U<<4)-1)<<PGT_score_shift)
116 /* Cleared when the owning guest 'frees' this page. */
117 #define _PGC_allocated 31
118 #define PGC_allocated (1U<<_PGC_allocated)
119 /* Set when fullshadow mode marks a page out-of-sync */
120 #define _PGC_out_of_sync 30
121 #define PGC_out_of_sync (1U<<_PGC_out_of_sync)
122 /* Set when fullshadow mode is using a page as a page table */
123 #define _PGC_page_table 29
124 #define PGC_page_table (1U<<_PGC_page_table)
125 /* 29-bit count of references to this frame. */
126 #define PGC_count_mask ((1U<<29)-1)
128 /* We trust the slab allocator in slab.c, and our use of it. */
129 #define PageSlab(page) (1)
130 #define PageSetSlab(page) ((void)0)
131 #define PageClearSlab(page) ((void)0)
133 #define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
135 #if defined(__i386__)
136 #define pickle_domptr(_d) ((u32)(unsigned long)(_d))
137 #define unpickle_domptr(_d) ((struct domain *)(unsigned long)(_d))
138 #define PRtype_info "08lx" /* should only be used for printk's */
139 #elif defined(__x86_64__)
140 static inline struct domain *unpickle_domptr(u32 _domain)
141 { return (_domain == 0) ? NULL : __va(_domain); }
142 static inline u32 pickle_domptr(struct domain *domain)
143 { return (domain == NULL) ? 0 : (u32)__pa(domain); }
144 #define PRtype_info "016lx"/* should only be used for printk's */
145 #endif
147 #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
148 #define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
150 #define XENSHARE_writable 0
151 #define XENSHARE_readonly 1
152 extern void share_xen_page_with_guest(
153 struct page_info *page, struct domain *d, int readonly);
154 extern void share_xen_page_with_privileged_guests(
155 struct page_info *page, int readonly);
157 extern struct page_info *frame_table;
158 extern unsigned long max_page;
159 extern unsigned long total_pages;
160 void init_frametable(void);
162 int alloc_page_type(struct page_info *page, unsigned long type);
163 void free_page_type(struct page_info *page, unsigned long type);
164 extern void invalidate_shadow_ldt(struct vcpu *d);
165 extern int shadow_remove_all_write_access(
166 struct domain *d, unsigned long gmfn, unsigned long mfn);
167 extern u32 shadow_remove_all_access( struct domain *d, unsigned long gmfn);
168 extern int _shadow_mode_refcounts(struct domain *d);
170 static inline void put_page(struct page_info *page)
171 {
172 u32 nx, x, y = page->count_info;
174 do {
175 x = y;
176 nx = x - 1;
177 }
178 while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
180 if ( unlikely((nx & PGC_count_mask) == 0) )
181 free_domheap_page(page);
182 }
185 static inline int get_page(struct page_info *page,
186 struct domain *domain)
187 {
188 u32 x, nx, y = page->count_info;
189 u32 d, nd = page->u.inuse._domain;
190 u32 _domain = pickle_domptr(domain);
192 do {
193 x = y;
194 nx = x + 1;
195 d = nd;
196 if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
197 unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
198 unlikely(d != _domain) ) /* Wrong owner? */
199 {
200 if ( !_shadow_mode_refcounts(domain) )
201 DPRINTK("Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%" PRtype_info "\n",
202 page_to_mfn(page), domain, unpickle_domptr(d),
203 x, page->u.inuse.type_info);
204 return 0;
205 }
206 __asm__ __volatile__(
207 LOCK_PREFIX "cmpxchg8b %3"
208 : "=d" (nd), "=a" (y), "=c" (d),
209 "=m" (*(volatile u64 *)(&page->count_info))
210 : "0" (d), "1" (x), "c" (d), "b" (nx) );
211 }
212 while ( unlikely(nd != d) || unlikely(y != x) );
214 return 1;
215 }
217 void put_page_type(struct page_info *page);
218 int get_page_type(struct page_info *page, unsigned long type);
219 int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
220 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
222 static inline void put_page_and_type(struct page_info *page)
223 {
224 put_page_type(page);
225 put_page(page);
226 }
229 static inline int get_page_and_type(struct page_info *page,
230 struct domain *domain,
231 unsigned long type)
232 {
233 int rc = get_page(page, domain);
235 if ( likely(rc) && unlikely(!get_page_type(page, type)) )
236 {
237 put_page(page);
238 rc = 0;
239 }
241 return rc;
242 }
244 static inline int page_is_removable(struct page_info *page)
245 {
246 return ((page->count_info & PGC_count_mask) == 1);
247 }
249 #define ASSERT_PAGE_IS_TYPE(_p, _t) \
250 ASSERT(((_p)->u.inuse.type_info & PGT_type_mask) == (_t)); \
251 ASSERT(((_p)->u.inuse.type_info & PGT_count_mask) != 0)
252 #define ASSERT_PAGE_IS_DOMAIN(_p, _d) \
253 ASSERT(((_p)->count_info & PGC_count_mask) != 0); \
254 ASSERT(page_get_owner(_p) == (_d))
256 int check_descriptor(struct desc_struct *d);
258 /*
259 * The MPT (machine->physical mapping table) is an array of word-sized
260 * values, indexed on machine frame number. It is expected that guest OSes
261 * will use it to store a "physical" frame number to give the appearance of
262 * contiguous (or near contiguous) physical memory.
263 */
264 #undef machine_to_phys_mapping
265 #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
266 #define INVALID_M2P_ENTRY (~0UL)
267 #define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1))))
268 #define IS_INVALID_M2P_ENTRY(_e) (!VALID_M2P(_e))
270 #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
271 #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)])
273 /*
274 * The phys_to_machine_mapping is the reversed mapping of MPT for full
275 * virtualization. It is only used by shadow_mode_translate()==true
276 * guests, so we steal the address space that would have normally
277 * been used by the read-only MPT map.
278 */
279 #define phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START)
280 #define NR_P2M_TABLE_ENTRIES ((unsigned long *)RO_MPT_VIRT_END \
281 - phys_to_machine_mapping)
282 #define INVALID_MFN (~0UL)
283 #define VALID_MFN(_mfn) (!((_mfn) & (1U<<31)))
285 #define set_mfn_from_gpfn(pfn, mfn) (phys_to_machine_mapping[(pfn)] = (mfn))
286 static inline unsigned long get_mfn_from_gpfn(unsigned long pfn)
287 {
288 unsigned long mfn;
290 if ( unlikely(pfn >= NR_P2M_TABLE_ENTRIES) ||
291 unlikely(__copy_from_user(&mfn, &phys_to_machine_mapping[pfn],
292 sizeof(mfn))) )
293 mfn = INVALID_MFN;
295 return mfn;
296 }
298 #ifdef MEMORY_GUARD
299 void memguard_init(void);
300 void memguard_guard_range(void *p, unsigned long l);
301 void memguard_unguard_range(void *p, unsigned long l);
302 #else
303 #define memguard_init() ((void)0)
304 #define memguard_guard_range(_p,_l) ((void)0)
305 #define memguard_unguard_range(_p,_l) ((void)0)
306 #endif
308 void memguard_guard_stack(void *p);
310 /* Writable Pagetables */
311 struct ptwr_info {
312 /* Linear address where the guest is updating the p.t. page. */
313 unsigned long l1va;
314 /* Copy of the p.t. page, taken before guest is given write access. */
315 l1_pgentry_t *page;
316 /* Index in L2 page table where this L1 p.t. is always hooked. */
317 unsigned int l2_idx; /* NB. Only used for PTWR_PT_ACTIVE. */
318 /* Info about last ptwr update batch. */
319 unsigned int prev_nr_updates;
320 /* VCPU which created writable mapping. */
321 struct vcpu *vcpu;
322 /* EIP of the original write fault (stats collection only). */
323 unsigned long eip;
324 };
326 #define PTWR_PT_ACTIVE 0
327 #define PTWR_PT_INACTIVE 1
329 #define PTWR_CLEANUP_ACTIVE 1
330 #define PTWR_CLEANUP_INACTIVE 2
332 int ptwr_init(struct domain *);
333 void ptwr_destroy(struct domain *);
334 void ptwr_flush(struct domain *, const int);
335 int ptwr_do_page_fault(struct domain *, unsigned long,
336 struct cpu_user_regs *);
337 int revalidate_l1(struct domain *, l1_pgentry_t *, l1_pgentry_t *);
339 void cleanup_writable_pagetable(struct domain *d);
340 #define sync_pagetable_state(d) \
341 do { \
342 LOCK_BIGLOCK(d); \
343 /* Avoid racing with ptwr_destroy(). */ \
344 if ( !test_bit(_DOMF_dying, &(d)->domain_flags) ) \
345 cleanup_writable_pagetable(d); \
346 UNLOCK_BIGLOCK(d); \
347 } while ( 0 )
349 #define writable_pagetable_in_sync(d) \
350 (!((d)->arch.ptwr[PTWR_PT_ACTIVE].l1va | \
351 (d)->arch.ptwr[PTWR_PT_INACTIVE].l1va))
353 int audit_adjust_pgtables(struct domain *d, int dir, int noisy);
355 #ifndef NDEBUG
357 #define AUDIT_SHADOW_ALREADY_LOCKED ( 1u << 0 )
358 #define AUDIT_ERRORS_OK ( 1u << 1 )
359 #define AUDIT_QUIET ( 1u << 2 )
361 void _audit_domain(struct domain *d, int flags);
362 #define audit_domain(_d) _audit_domain((_d), AUDIT_ERRORS_OK)
363 void audit_domains(void);
365 #else
367 #define _audit_domain(_d, _f) ((void)0)
368 #define audit_domain(_d) ((void)0)
369 #define audit_domains() ((void)0)
371 #endif
373 #ifdef PERF_ARRAYS
375 void ptwr_eip_stat_reset(void);
376 void ptwr_eip_stat_print(void);
378 #else
380 #define ptwr_eip_stat_reset() ((void)0)
381 #define ptwr_eip_stat_print() ((void)0)
383 #endif
385 int new_guest_cr3(unsigned long pfn);
387 void propagate_page_fault(unsigned long addr, u16 error_code);
389 int __sync_lazy_execstate(void);
391 /* Arch-specific portion of memory_op hypercall. */
392 long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
393 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
395 int steal_page(
396 struct domain *d, struct page_info *page, unsigned int memflags);
398 #endif /* __ASM_X86_MM_H__ */