ia64/xen-unstable

view xen/include/asm-x86/mm.h @ 1776:c2f673cea5e4

bitkeeper revision 1.1072.1.1 (40f4e51fLMgcKX4Sn6FNYePX6EqkGA)

Merge http://xen.bkbits.net:8080/xeno-unstable.bk
into gandalf.hpl.hp.com:/var/bk/xeno-unstable.bk
author xenbk@gandalf.hpl.hp.com
date Wed Jul 14 07:47:43 2004 +0000 (2004-07-14)
parents cd887a8fa08a c4061a2a3309
children e91945007886
line source
2 #ifndef __ASM_X86_MM_H__
3 #define __ASM_X86_MM_H__
5 #include <xen/config.h>
6 #include <xen/list.h>
7 #include <xen/spinlock.h>
8 #include <xen/perfc.h>
9 #include <xen/sched.h>
11 #include <asm/processor.h>
12 #include <asm/atomic.h>
13 #include <asm/desc.h>
14 #include <asm/flushtlb.h>
15 #include <asm/io.h>
17 #include <hypervisor-ifs/hypervisor-if.h>
19 /*
20 * Per-page-frame information.
21 */
23 struct pfn_info
24 {
25 /* Each frame can be threaded onto a doubly-linked list. */
26 struct list_head list;
27 /* The following possible uses are context-dependent. */
28 union {
29 /* Page is in use: we keep a pointer to its owner. */
30 struct domain *domain;
31 /* Page is not currently allocated: mask of possibly-tainted TLBs. */
32 unsigned long cpu_mask;
33 } u;
34 /* Reference count and various PGC_xxx flags and fields. */
35 u32 count_and_flags;
36 /* Type reference count and various PGT_xxx flags and fields. */
37 u32 type_and_flags;
38 /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
39 u32 tlbflush_timestamp;
40 };
42 /* The following page types are MUTUALLY EXCLUSIVE. */
43 #define PGT_none (0<<29) /* no special uses of this page */
44 #define PGT_l1_page_table (1<<29) /* using this page as an L1 page table? */
45 #define PGT_l2_page_table (2<<29) /* using this page as an L2 page table? */
46 #define PGT_l3_page_table (3<<29) /* using this page as an L3 page table? */
47 #define PGT_l4_page_table (4<<29) /* using this page as an L4 page table? */
48 #define PGT_gdt_page (5<<29) /* using this page in a GDT? */
49 #define PGT_ldt_page (6<<29) /* using this page in an LDT? */
50 #define PGT_writeable_page (7<<29) /* has writable mappings of this page? */
51 #define PGT_type_mask (7<<29) /* Bits 29-31. */
52 /* Has this page been validated for use as its current type? */
53 #define _PGT_validated 28
54 #define PGT_validated (1<<_PGT_validated)
55 /* 28-bit count of uses of this frame as its current type. */
56 #define PGT_count_mask ((1<<28)-1)
58 /* For safety, force a TLB flush when this page's type changes. */
59 #define _PGC_tlb_flush_on_type_change 31
60 #define PGC_tlb_flush_on_type_change (1<<_PGC_tlb_flush_on_type_change)
61 /* Owning guest has pinned this page to its current type? */
62 #define _PGC_guest_pinned 30
63 #define PGC_guest_pinned (1<<_PGC_guest_pinned)
64 /* Cleared when the owning guest 'frees' this page. */
65 #define _PGC_allocated 29
66 #define PGC_allocated (1<<_PGC_allocated)
67 /* 28-bit count of references to this frame. */
68 #define PGC_count_mask ((1<<29)-1)
71 /* We trust the slab allocator in slab.c, and our use of it. */
72 #define PageSlab(page) (1)
73 #define PageSetSlab(page) ((void)0)
74 #define PageClearSlab(page) ((void)0)
76 #define IS_XEN_HEAP_FRAME(_pfn) (page_to_phys(_pfn) < xenheap_phys_end)
78 #define SHARE_PFN_WITH_DOMAIN(_pfn, _dom) \
79 do { \
80 (_pfn)->u.domain = (_dom); \
81 /* The incremented type count is intended to pin to 'writeable'. */ \
82 (_pfn)->type_and_flags = PGT_writeable_page | PGT_validated | 1; \
83 wmb(); /* install valid domain ptr before updating refcnt. */ \
84 spin_lock(&(_dom)->page_alloc_lock); \
85 /* _dom holds an allocation reference */ \
86 (_pfn)->count_and_flags = PGC_allocated | 1; \
87 if ( unlikely((_dom)->xenheap_pages++ == 0) ) \
88 get_domain(_dom); \
89 spin_unlock(&(_dom)->page_alloc_lock); \
90 } while ( 0 )
92 extern struct pfn_info *frame_table;
93 extern unsigned long frame_table_size;
94 extern struct list_head free_list;
95 extern spinlock_t free_list_lock;
96 extern unsigned int free_pfns;
97 extern unsigned long max_page;
98 void init_frametable(void *frametable_vstart, unsigned long nr_pages);
99 void add_to_domain_alloc_list(unsigned long ps, unsigned long pe);
101 struct pfn_info *alloc_domain_page(struct domain *p);
102 void free_domain_page(struct pfn_info *page);
104 int alloc_page_type(struct pfn_info *page, unsigned int type);
105 void free_page_type(struct pfn_info *page, unsigned int type);
107 static inline void put_page(struct pfn_info *page)
108 {
109 u32 nx, x, y = page->count_and_flags;
111 do {
112 x = y;
113 nx = x - 1;
114 }
115 while ( unlikely((y = cmpxchg(&page->count_and_flags, x, nx)) != x) );
117 if ( unlikely((nx & PGC_count_mask) == 0) )
118 free_domain_page(page);
119 }
122 static inline int get_page(struct pfn_info *page,
123 struct domain *domain)
124 {
125 u32 x, nx, y = page->count_and_flags;
126 struct domain *p, *np = page->u.domain;
128 do {
129 x = y;
130 nx = x + 1;
131 p = np;
132 if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
133 unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
134 unlikely(p != domain) ) /* Wrong owner? */
135 {
136 DPRINTK("Error pfn %08lx: ed=%p(%u), sd=%p(%u),"
137 " caf=%08x, taf=%08x\n",
138 page_to_pfn(page), domain, domain->domain,
139 p, (p && !((x & PGC_count_mask) == 0))?p->domain:999,
140 x, page->type_and_flags);
141 return 0;
142 }
143 __asm__ __volatile__(
144 LOCK_PREFIX "cmpxchg8b %3"
145 : "=a" (np), "=d" (y), "=b" (p),
146 "=m" (*(volatile u64 *)(&page->u.domain))
147 : "0" (p), "1" (x), "b" (p), "c" (nx) );
148 }
149 while ( unlikely(np != p) || unlikely(y != x) );
151 return 1;
152 }
155 static inline void put_page_type(struct pfn_info *page)
156 {
157 u32 nx, x, y = page->type_and_flags;
159 again:
160 do {
161 x = y;
162 nx = x - 1;
163 if ( unlikely((nx & PGT_count_mask) == 0) )
164 {
165 page->tlbflush_timestamp = tlbflush_clock;
166 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
167 likely(nx & PGT_validated) )
168 {
169 /*
170 * Page-table pages must be unvalidated when count is zero. The
171 * 'free' is safe because the refcnt is non-zero and the
172 * validated bit is clear => other ops will spin or fail.
173 */
174 if ( unlikely((y = cmpxchg(&page->type_and_flags, x,
175 x & ~PGT_validated)) != x) )
176 goto again;
177 /* We cleared the 'valid bit' so we must do the clear up. */
178 free_page_type(page, x & PGT_type_mask);
179 /* Carry on as we were, but with the 'valid bit' now clear. */
180 x &= ~PGT_validated;
181 nx &= ~PGT_validated;
182 }
183 }
184 }
185 while ( unlikely((y = cmpxchg(&page->type_and_flags, x, nx)) != x) );
186 }
189 static inline int get_page_type(struct pfn_info *page, u32 type)
190 {
191 u32 nx, x, y = page->type_and_flags;
192 again:
193 do {
194 x = y;
195 nx = x + 1;
196 if ( unlikely((nx & PGT_count_mask) == 0) )
197 {
198 DPRINTK("Type count overflow on pfn %08lx\n", page_to_pfn(page));
199 return 0;
200 }
201 else if ( unlikely((x & PGT_count_mask) == 0) )
202 {
203 if ( (x & PGT_type_mask) != type )
204 {
205 nx &= ~(PGT_type_mask | PGT_validated);
206 nx |= type;
207 /* No extra validation needed for writeable pages. */
208 if ( type == PGT_writeable_page )
209 nx |= PGT_validated;
210 }
211 }
212 else if ( unlikely((x & PGT_type_mask) != type) )
213 {
214 DPRINTK("Unexpected type (saw %08x != exp %08x) for pfn %08lx\n",
215 x & PGT_type_mask, type, page_to_pfn(page));
216 return 0;
217 }
218 else if ( unlikely(!(x & PGT_validated)) )
219 {
220 /* Someone else is updating validation of this page. Wait... */
221 while ( (y = page->type_and_flags) != x )
222 {
223 rep_nop();
224 barrier();
225 }
226 goto again;
227 }
228 }
229 while ( unlikely((y = cmpxchg(&page->type_and_flags, x, nx)) != x) );
231 if ( unlikely(!(nx & PGT_validated)) )
232 {
233 /* Try to validate page type; drop the new reference on failure. */
234 if ( unlikely(!alloc_page_type(page, type)) )
235 {
236 DPRINTK("Error while validating pfn %08lx for type %08x\n",
237 page_to_pfn(page), type);
238 put_page_type(page);
239 return 0;
240 }
241 set_bit(_PGT_validated, &page->type_and_flags);
242 }
244 return 1;
245 }
248 static inline void put_page_and_type(struct pfn_info *page)
249 {
250 put_page_type(page);
251 put_page(page);
252 }
255 static inline int get_page_and_type(struct pfn_info *page,
256 struct domain *domain,
257 u32 type)
258 {
259 int rc = get_page(page, domain);
261 if ( likely(rc) && unlikely(!get_page_type(page, type)) )
262 {
263 put_page(page);
264 rc = 0;
265 }
267 return rc;
268 }
270 #define ASSERT_PAGE_IS_TYPE(_p, _t) \
271 ASSERT(((_p)->type_and_flags & PGT_type_mask) == (_t)); \
272 ASSERT(((_p)->type_and_flags & PGT_count_mask) != 0)
273 #define ASSERT_PAGE_IS_DOMAIN(_p, _d) \
274 ASSERT(((_p)->count_and_flags & PGC_count_mask) != 0); \
275 ASSERT((_p)->u.domain == (_d))
277 int check_descriptor(unsigned long a, unsigned long b);
279 /*
280 * Use currently-executing domain's pagetables on the specified CPUs.
281 * i.e., stop borrowing someone else's tables if you are the idle domain.
282 */
283 void synchronise_pagetables(unsigned long cpu_mask);
285 /*
286 * The MPT (machine->physical mapping table) is an array of word-sized
287 * values, indexed on machine frame number. It is expected that guest OSes
288 * will use it to store a "physical" frame number to give the appearance of
289 * contiguous (or near contiguous) physical memory.
290 */
291 #undef machine_to_phys_mapping
292 #ifdef __x86_64__
293 extern unsigned long *machine_to_phys_mapping;
294 #else
295 #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START)
296 #endif
298 /* Part of the domain API. */
299 int do_mmu_update(mmu_update_t *updates, int count, int *success_count);
301 #define DEFAULT_GDT_ENTRIES ((LAST_RESERVED_GDT_ENTRY*8)+7)
302 #define DEFAULT_GDT_ADDRESS ((unsigned long)gdt_table)
304 #ifdef MEMORY_GUARD
305 void *memguard_init(void *heap_start);
306 void memguard_guard_range(void *p, unsigned long l);
307 void memguard_unguard_range(void *p, unsigned long l);
308 int memguard_is_guarded(void *p);
309 #else
310 #define memguard_init(_s) (_s)
311 #define memguard_guard_range(_p,_l) ((void)0)
312 #define memguard_unguard_range(_p,_l) ((void)0)
313 #define memguard_is_guarded(_p) (0)
314 #endif
316 #endif /* __ASM_X86_MM_H__ */