ia64/xen-unstable
changeset 2461:77bc1f0ea51f
bitkeeper revision 1.1159.1.139 (413f4325XttGg5bPpva0Ul0ivQSUGA)
Merge freefall.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into freefall.cl.cam.ac.uk:/local/scratch/kaf24/xeno
Merge freefall.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into freefall.cl.cam.ac.uk:/local/scratch/kaf24/xeno
author | kaf24@freefall.cl.cam.ac.uk |
---|---|
date | Wed Sep 08 17:36:37 2004 +0000 (2004-09-08) |
parents | 0aadba2b9bc3 9c7d7819508a |
children | 4b6481c1b7f4 |
files | xen/arch/x86/domain.c xen/arch/x86/memory.c xen/include/asm-x86/mm.h |
line diff
1.1 --- a/xen/arch/x86/domain.c Wed Sep 08 16:38:13 2004 +0000 1.2 +++ b/xen/arch/x86/domain.c Wed Sep 08 17:36:37 2004 +0000 1.3 @@ -733,7 +733,6 @@ int construct_dom0(struct domain *p, 1.4 *l1tab++ = mk_l1_pgentry((mfn << PAGE_SHIFT) | L1_PROT); 1.5 1.6 page = &frame_table[mfn]; 1.7 - set_bit(_PGC_tlb_flush_on_type_change, &page->count_info); 1.8 if ( !get_page_and_type(page, p, PGT_writable_page) ) 1.9 BUG(); 1.10
2.1 --- a/xen/arch/x86/memory.c Wed Sep 08 16:38:13 2004 +0000 2.2 +++ b/xen/arch/x86/memory.c Wed Sep 08 17:36:37 2004 +0000 2.3 @@ -462,7 +462,6 @@ get_page_from_l1e( 2.4 { 2.5 if ( unlikely(!get_page_type(page, PGT_writable_page)) ) 2.6 return 0; 2.7 - set_bit(_PGC_tlb_flush_on_type_change, &page->count_info); 2.8 } 2.9 2.10 return 1; 2.11 @@ -774,18 +773,6 @@ static int mod_l1_entry(l1_pgentry_t *pl 2.12 2.13 int alloc_page_type(struct pfn_info *page, unsigned int type) 2.14 { 2.15 - if ( unlikely(test_and_clear_bit(_PGC_tlb_flush_on_type_change, 2.16 - &page->count_info)) ) 2.17 - { 2.18 - struct domain *p = page->u.inuse.domain; 2.19 - if ( unlikely(NEED_FLUSH(tlbflush_time[p->processor], 2.20 - page->tlbflush_timestamp)) ) 2.21 - { 2.22 - perfc_incr(need_flush_tlb_flush); 2.23 - flush_tlb_cpu(p->processor); 2.24 - } 2.25 - } 2.26 - 2.27 switch ( type ) 2.28 { 2.29 case PGT_l1_page_table: 2.30 @@ -833,6 +820,151 @@ void free_page_type(struct pfn_info *pag 2.31 } 2.32 2.33 2.34 +void put_page_type(struct pfn_info *page) 2.35 +{ 2.36 + u32 nx, x, y = page->u.inuse.type_info; 2.37 + 2.38 + again: 2.39 + do { 2.40 + x = y; 2.41 + nx = x - 1; 2.42 + 2.43 + ASSERT((x & PGT_count_mask) != 0); 2.44 + ASSERT(x & PGT_validated); 2.45 + 2.46 + if ( unlikely((nx & PGT_count_mask) == 0) ) 2.47 + { 2.48 + /* Record TLB information for flush later. Races are harmless. */ 2.49 + page->tlbflush_timestamp = tlbflush_clock; 2.50 + 2.51 + if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) ) 2.52 + { 2.53 + /* 2.54 + * Page-table pages must be unvalidated when count is zero. The 2.55 + * 'free' is safe because the refcnt is non-zero and validated 2.56 + * bit is clear => other ops will spin or fail. 2.57 + */ 2.58 + if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, 2.59 + x & ~PGT_validated)) != x) ) 2.60 + goto again; 2.61 + /* We cleared the 'valid bit' so we do the clear up. */ 2.62 + free_page_type(page, x & PGT_type_mask); 2.63 + /* Carry on, but with the 'valid bit' now clear. */ 2.64 + x &= ~PGT_validated; 2.65 + nx &= ~PGT_validated; 2.66 + } 2.67 + } 2.68 + else if ( unlikely((nx & (PGT_pinned | PGT_count_mask)) == 2.69 + (PGT_pinned | 1)) ) 2.70 + { 2.71 + /* Page is now only pinned. Make the back pointer mutable again. */ 2.72 + nx |= PGT_va_mutable; 2.73 + } 2.74 + } 2.75 + while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); 2.76 +} 2.77 + 2.78 + 2.79 +int get_page_type(struct pfn_info *page, u32 type) 2.80 +{ 2.81 + u32 nx, x, y = page->u.inuse.type_info; 2.82 + 2.83 + again: 2.84 + do { 2.85 + x = y; 2.86 + nx = x + 1; 2.87 + if ( unlikely((nx & PGT_count_mask) == 0) ) 2.88 + { 2.89 + MEM_LOG("Type count overflow on pfn %08lx\n", page_to_pfn(page)); 2.90 + return 0; 2.91 + } 2.92 + else if ( unlikely((x & PGT_count_mask) == 0) ) 2.93 + { 2.94 + if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) 2.95 + { 2.96 + /* 2.97 + * On type change we check to flush stale TLB entries. This 2.98 + * may be unnecessary (e.g., page was GDT/LDT) but those 2.99 + * circumstances should be very rare. 2.100 + */ 2.101 + struct domain *d = page->u.inuse.domain; 2.102 + if ( unlikely(NEED_FLUSH(tlbflush_time[d->processor], 2.103 + page->tlbflush_timestamp)) ) 2.104 + { 2.105 + perfc_incr(need_flush_tlb_flush); 2.106 + flush_tlb_cpu(d->processor); 2.107 + } 2.108 + 2.109 + /* We lose existing type, back pointer, and validity. */ 2.110 + nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); 2.111 + nx |= type; 2.112 + 2.113 + /* No special validation needed for writable pages. */ 2.114 + /* Page tables and GDT/LDT need to be scanned for validity. */ 2.115 + if ( type == PGT_writable_page ) 2.116 + nx |= PGT_validated; 2.117 + } 2.118 + } 2.119 + else if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) 2.120 + { 2.121 + if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) 2.122 + { 2.123 + if ( ((x & PGT_type_mask) != PGT_l2_page_table) || 2.124 + ((type & PGT_type_mask) != PGT_l1_page_table) ) 2.125 + MEM_LOG("Bad type (saw %08x != exp %08x) for pfn %08lx\n", 2.126 + x & PGT_type_mask, type, page_to_pfn(page)); 2.127 + return 0; 2.128 + } 2.129 + else if ( (x & PGT_va_mask) == PGT_va_mutable ) 2.130 + { 2.131 + /* The va backpointer is mutable, hence we update it. */ 2.132 + nx &= ~PGT_va_mask; 2.133 + nx |= type; /* we know the actual type is correct */ 2.134 + } 2.135 + else if ( unlikely((x & PGT_va_mask) != (type & PGT_va_mask)) ) 2.136 + { 2.137 + /* The va backpointer wasn't mutable, and is different. */ 2.138 + MEM_LOG("Unexpected va backpointer (saw %08x != exp %08x)" 2.139 + " for pfn %08lx\n", x, type, page_to_pfn(page)); 2.140 + return 0; 2.141 + } 2.142 + } 2.143 + else if ( unlikely(!(x & PGT_validated)) ) 2.144 + { 2.145 + /* Someone else is updating validation of this page. Wait... */ 2.146 + while ( (y = page->u.inuse.type_info) == x ) 2.147 + { 2.148 + rep_nop(); 2.149 + barrier(); 2.150 + } 2.151 + goto again; 2.152 + } 2.153 + } 2.154 + while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); 2.155 + 2.156 + if ( unlikely(!(nx & PGT_validated)) ) 2.157 + { 2.158 + /* Try to validate page type; drop the new reference on failure. */ 2.159 + if ( unlikely(!alloc_page_type(page, type & PGT_type_mask)) ) 2.160 + { 2.161 + MEM_LOG("Error while validating pfn %08lx for type %08x." 2.162 + " caf=%08x taf=%08x\n", 2.163 + page_to_pfn(page), type, 2.164 + page->count_info, 2.165 + page->u.inuse.type_info); 2.166 + /* Noone else can get a reference. We hold the only ref. */ 2.167 + page->u.inuse.type_info = 0; 2.168 + return 0; 2.169 + } 2.170 + 2.171 + /* Noone else is updating simultaneously. */ 2.172 + __set_bit(_PGT_validated, &page->u.inuse.type_info); 2.173 + } 2.174 + 2.175 + return 1; 2.176 +} 2.177 + 2.178 + 2.179 static int do_extended_command(unsigned long ptr, unsigned long val) 2.180 { 2.181 int okay = 1, cpu = smp_processor_id(); 2.182 @@ -1747,7 +1879,6 @@ int ptwr_do_page_fault(unsigned long add 2.183 #ifndef NDEBUG 2.184 void ptwr_status(void) 2.185 { 2.186 - int i; 2.187 unsigned long pte, pfn; 2.188 struct pfn_info *page; 2.189 l2_pgentry_t *pl2e;
3.1 --- a/xen/include/asm-x86/mm.h Wed Sep 08 16:38:13 2004 +0000 3.2 +++ b/xen/include/asm-x86/mm.h Wed Sep 08 17:36:37 2004 +0000 3.3 @@ -81,17 +81,14 @@ struct pfn_info 3.4 /* 17-bit count of uses of this frame as its current type. */ 3.5 #define PGT_count_mask ((1<<17)-1) 3.6 3.7 - /* For safety, force a TLB flush when this page's type changes. */ 3.8 -#define _PGC_tlb_flush_on_type_change 31 3.9 -#define PGC_tlb_flush_on_type_change (1<<_PGC_tlb_flush_on_type_change) 3.10 /* Cleared when the owning guest 'frees' this page. */ 3.11 -#define _PGC_allocated 30 3.12 +#define _PGC_allocated 31 3.13 #define PGC_allocated (1<<_PGC_allocated) 3.14 /* This bit is always set, guaranteeing that the count word is never zero. */ 3.15 -#define _PGC_always_set 29 3.16 +#define _PGC_always_set 30 3.17 #define PGC_always_set (1<<_PGC_always_set) 3.18 - /* 29-bit count of references to this frame. */ 3.19 -#define PGC_count_mask ((1<<29)-1) 3.20 + /* 30-bit count of references to this frame. */ 3.21 +#define PGC_count_mask ((1<<30)-1) 3.22 3.23 /* We trust the slab allocator in slab.c, and our use of it. */ 3.24 #define PageSlab(page) (1) 3.25 @@ -104,7 +101,7 @@ struct pfn_info 3.26 do { \ 3.27 (_pfn)->u.inuse.domain = (_dom); \ 3.28 /* The incremented type count is intended to pin to 'writable'. */ \ 3.29 - (_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; \ 3.30 + (_pfn)->u.inuse.type_info = PGT_writable_page | PGT_validated | 1; \ 3.31 wmb(); /* install valid domain ptr before updating refcnt. */ \ 3.32 spin_lock(&(_dom)->page_alloc_lock); \ 3.33 /* _dom holds an allocation reference */ \ 3.34 @@ -143,155 +140,34 @@ static inline int get_page(struct pfn_in 3.35 struct domain *domain) 3.36 { 3.37 u32 x, nx, y = page->count_info; 3.38 - struct domain *p, *np = page->u.inuse.domain; 3.39 + struct domain *d, *nd = page->u.inuse.domain; 3.40 3.41 do { 3.42 x = y; 3.43 nx = x + 1; 3.44 - p = np; 3.45 + d = nd; 3.46 if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */ 3.47 unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */ 3.48 - unlikely(p != domain) ) /* Wrong owner? */ 3.49 + unlikely(d != domain) ) /* Wrong owner? */ 3.50 { 3.51 DPRINTK("Error pfn %08lx: ed=%p, sd=%p, caf=%08x, taf=%08x\n", 3.52 - page_to_pfn(page), domain, p, 3.53 + page_to_pfn(page), domain, d, 3.54 x, page->u.inuse.type_info); 3.55 return 0; 3.56 } 3.57 __asm__ __volatile__( 3.58 LOCK_PREFIX "cmpxchg8b %3" 3.59 - : "=d" (np), "=a" (y), "=c" (p), 3.60 + : "=d" (nd), "=a" (y), "=c" (d), 3.61 "=m" (*(volatile u64 *)(&page->count_info)) 3.62 - : "0" (p), "1" (x), "c" (p), "b" (nx) ); 3.63 + : "0" (d), "1" (x), "c" (d), "b" (nx) ); 3.64 } 3.65 - while ( unlikely(np != p) || unlikely(y != x) ); 3.66 + while ( unlikely(nd != d) || unlikely(y != x) ); 3.67 3.68 return 1; 3.69 } 3.70 3.71 - 3.72 -static inline void put_page_type(struct pfn_info *page) 3.73 -{ 3.74 - u32 nx, x, y = page->u.inuse.type_info; 3.75 - 3.76 - again: 3.77 - do { 3.78 - x = y; 3.79 - nx = x - 1; 3.80 - if ( unlikely((nx & PGT_count_mask) == 0) ) 3.81 - { 3.82 - page->tlbflush_timestamp = tlbflush_clock; 3.83 - if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && 3.84 - likely(nx & PGT_validated) ) 3.85 - { 3.86 - /* 3.87 - * Page-table pages must be unvalidated when count is zero. The 3.88 - * 'free' is safe because the refcnt is non-zero and the 3.89 - * validated bit is clear => other ops will spin or fail. 3.90 - */ 3.91 - if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, 3.92 - x & ~PGT_validated)) != x) ) 3.93 - goto again; 3.94 - /* We cleared the 'valid bit' so we must do the clear up. */ 3.95 - free_page_type(page, x & PGT_type_mask); 3.96 - /* Carry on as we were, but with the 'valid bit' now clear. */ 3.97 - x &= ~PGT_validated; 3.98 - nx &= ~PGT_validated; 3.99 - } 3.100 - } 3.101 - else if ( unlikely((nx & (PGT_pinned | PGT_count_mask)) == 3.102 - (PGT_pinned | 1)) ) 3.103 - { 3.104 - /* Page is now only pinned. Make the back pointer mutable again. */ 3.105 - nx |= PGT_va_mutable; 3.106 - } 3.107 - } 3.108 - while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); 3.109 -} 3.110 - 3.111 - 3.112 -static inline int get_page_type(struct pfn_info *page, u32 type) 3.113 -{ 3.114 - u32 nx, x, y = page->u.inuse.type_info; 3.115 - again: 3.116 - do { 3.117 - x = y; 3.118 - nx = x + 1; 3.119 - if ( unlikely((nx & PGT_count_mask) == 0) ) 3.120 - { 3.121 - DPRINTK("Type count overflow on pfn %08lx\n", page_to_pfn(page)); 3.122 - return 0; 3.123 - } 3.124 - else if ( unlikely((x & PGT_count_mask) == 0) ) 3.125 - { 3.126 - if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) 3.127 - { 3.128 - nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); 3.129 - nx |= type; 3.130 - /* No extra validation needed for writable pages. */ 3.131 - if ( type == PGT_writable_page ) 3.132 - nx |= PGT_validated; 3.133 - } 3.134 - } 3.135 - else if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) 3.136 - { 3.137 - if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) 3.138 - { 3.139 -#ifdef VERBOSE 3.140 - if ( ((x & PGT_type_mask) != PGT_l2_page_table) || 3.141 - ((type & PGT_type_mask) != PGT_l1_page_table) ) 3.142 - DPRINTK("Bad type (saw %08x != exp %08x) for pfn %08lx\n", 3.143 - x & PGT_type_mask, type, page_to_pfn(page)); 3.144 -#endif 3.145 - return 0; 3.146 - } 3.147 - else if ( (x & PGT_va_mask) == PGT_va_mutable ) 3.148 - { 3.149 - /* The va backpointer is mutable, hence we update it. */ 3.150 - nx &= ~PGT_va_mask; 3.151 - nx |= type; /* we know the actual type is correct */ 3.152 - } 3.153 - else if ( unlikely((x & PGT_va_mask) != (type & PGT_va_mask)) ) 3.154 - { 3.155 - /* The va backpointer wasn't mutable, and is different. */ 3.156 - DPRINTK("Unexpected va backpointer (saw %08x != exp %08x)" 3.157 - " for pfn %08lx\n", x, type, page_to_pfn(page)); 3.158 - return 0; 3.159 - } 3.160 - } 3.161 - else if ( unlikely(!(x & PGT_validated)) ) 3.162 - { 3.163 - /* Someone else is updating validation of this page. Wait... */ 3.164 - while ( (y = page->u.inuse.type_info) == x ) 3.165 - { 3.166 - rep_nop(); 3.167 - barrier(); 3.168 - } 3.169 - goto again; 3.170 - } 3.171 - } 3.172 - while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); 3.173 - 3.174 - if ( unlikely(!(nx & PGT_validated)) ) 3.175 - { 3.176 - /* Try to validate page type; drop the new reference on failure. */ 3.177 - if ( unlikely(!alloc_page_type(page, type & PGT_type_mask)) ) 3.178 - { 3.179 - DPRINTK("Error while validating pfn %08lx for type %08x." 3.180 - " caf=%08x taf=%08x\n", 3.181 - page_to_pfn(page), type, 3.182 - page->count_info, 3.183 - page->u.inuse.type_info); 3.184 - put_page_type(page); 3.185 - return 0; 3.186 - } 3.187 - 3.188 - set_bit(_PGT_validated, &page->u.inuse.type_info); 3.189 - } 3.190 - 3.191 - return 1; 3.192 -} 3.193 - 3.194 +void put_page_type(struct pfn_info *page); 3.195 +int get_page_type(struct pfn_info *page, u32 type); 3.196 3.197 static inline void put_page_and_type(struct pfn_info *page) 3.198 {