ia64/xen-unstable
changeset 18750:ed30f4efb728
x86: fix preemptable page type handling
- retain a page reference when PGT_partial is set on a page (and drop
it when clearing that flag)
- don't drop a page reference never acquired when freeing the page
type
of a page where the allocation of the type got preempted (and never
completed)
- don't acquire a page reference when allocating the page type of a
page where freeing the type got preempted (and never completed, and
hence didn't drop the respective reference)
Signed-off-by: Jan Beulich <jbeulich@novell.com>
- retain a page reference when PGT_partial is set on a page (and drop
it when clearing that flag)
- don't drop a page reference never acquired when freeing the page
type
of a page where the allocation of the type got preempted (and never
completed)
- don't acquire a page reference when allocating the page type of a
page where freeing the type got preempted (and never completed, and
hence didn't drop the respective reference)
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Thu Oct 30 14:53:24 2008 +0000 (2008-10-30) |
parents | 9e5cf6778a6d |
children | 85ba96069dfb |
files | xen/arch/x86/domain.c xen/arch/x86/mm.c xen/include/asm-x86/mm.h |
line diff
1.1 --- a/xen/arch/x86/domain.c Thu Oct 30 14:37:48 2008 +0000 1.2 +++ b/xen/arch/x86/domain.c Thu Oct 30 14:53:24 2008 +0000 1.3 @@ -1684,18 +1684,24 @@ static int relinquish_memory( 1.4 break; 1.5 case -EINTR: 1.6 page->u.inuse.type_info |= PGT_validated; 1.7 + if ( x & PGT_partial ) 1.8 + put_page(page); 1.9 put_page(page); 1.10 ret = -EAGAIN; 1.11 goto out; 1.12 case -EAGAIN: 1.13 page->u.inuse.type_info |= PGT_partial; 1.14 - put_page(page); 1.15 + if ( x & PGT_partial ) 1.16 + put_page(page); 1.17 goto out; 1.18 default: 1.19 BUG(); 1.20 } 1.21 if ( x & PGT_partial ) 1.22 + { 1.23 page->u.inuse.type_info--; 1.24 + put_page(page); 1.25 + } 1.26 break; 1.27 } 1.28 }
2.1 --- a/xen/arch/x86/mm.c Thu Oct 30 14:37:48 2008 +0000 2.2 +++ b/xen/arch/x86/mm.c Thu Oct 30 14:53:24 2008 +0000 2.3 @@ -566,19 +566,21 @@ static int get_page_from_pagenr(unsigned 2.4 static int get_page_and_type_from_pagenr(unsigned long page_nr, 2.5 unsigned long type, 2.6 struct domain *d, 2.7 + int partial, 2.8 int preemptible) 2.9 { 2.10 struct page_info *page = mfn_to_page(page_nr); 2.11 int rc; 2.12 2.13 - if ( unlikely(!get_page_from_pagenr(page_nr, d)) ) 2.14 + if ( likely(partial >= 0) && 2.15 + unlikely(!get_page_from_pagenr(page_nr, d)) ) 2.16 return -EINVAL; 2.17 2.18 rc = (preemptible ? 2.19 get_page_type_preemptible(page, type) : 2.20 (get_page_type(page, type) ? 0 : -EINVAL)); 2.21 2.22 - if ( rc ) 2.23 + if ( unlikely(rc) && partial >= 0 ) 2.24 put_page(page); 2.25 2.26 return rc; 2.27 @@ -761,7 +763,7 @@ get_page_from_l2e( 2.28 } 2.29 2.30 rc = get_page_and_type_from_pagenr( 2.31 - l2e_get_pfn(l2e), PGT_l1_page_table, d, 0); 2.32 + l2e_get_pfn(l2e), PGT_l1_page_table, d, 0, 0); 2.33 if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) 2.34 rc = 0; 2.35 2.36 @@ -772,7 +774,7 @@ get_page_from_l2e( 2.37 define_get_linear_pagetable(l3); 2.38 static int 2.39 get_page_from_l3e( 2.40 - l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible) 2.41 + l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial, int preemptible) 2.42 { 2.43 int rc; 2.44 2.45 @@ -786,7 +788,7 @@ get_page_from_l3e( 2.46 } 2.47 2.48 rc = get_page_and_type_from_pagenr( 2.49 - l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible); 2.50 + l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, preemptible); 2.51 if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) ) 2.52 rc = 0; 2.53 2.54 @@ -797,7 +799,7 @@ get_page_from_l3e( 2.55 define_get_linear_pagetable(l4); 2.56 static int 2.57 get_page_from_l4e( 2.58 - l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible) 2.59 + l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial, int preemptible) 2.60 { 2.61 int rc; 2.62 2.63 @@ -811,7 +813,7 @@ get_page_from_l4e( 2.64 } 2.65 2.66 rc = get_page_and_type_from_pagenr( 2.67 - l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible); 2.68 + l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, preemptible); 2.69 if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) ) 2.70 rc = 0; 2.71 2.72 @@ -961,23 +963,32 @@ static int put_page_from_l2e(l2_pgentry_ 2.73 return 1; 2.74 } 2.75 2.76 +static int __put_page_type(struct page_info *, int preemptible); 2.77 2.78 static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, 2.79 - int preemptible) 2.80 + int partial, int preemptible) 2.81 { 2.82 if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && 2.83 (l3e_get_pfn(l3e) != pfn) ) 2.84 + { 2.85 + if ( unlikely(partial > 0) ) 2.86 + return __put_page_type(l3e_get_page(l3e), preemptible); 2.87 return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); 2.88 + } 2.89 return 1; 2.90 } 2.91 2.92 #if CONFIG_PAGING_LEVELS >= 4 2.93 static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, 2.94 - int preemptible) 2.95 + int partial, int preemptible) 2.96 { 2.97 if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && 2.98 (l4e_get_pfn(l4e) != pfn) ) 2.99 + { 2.100 + if ( unlikely(partial > 0) ) 2.101 + return __put_page_type(l4e_get_page(l4e), preemptible); 2.102 return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible); 2.103 + } 2.104 return 1; 2.105 } 2.106 #endif 2.107 @@ -1184,7 +1195,7 @@ static int alloc_l3_table(struct page_in 2.108 unsigned long pfn = page_to_mfn(page); 2.109 l3_pgentry_t *pl3e; 2.110 unsigned int i; 2.111 - int rc = 0; 2.112 + int rc = 0, partial = page->partial_pte; 2.113 2.114 #if CONFIG_PAGING_LEVELS == 3 2.115 /* 2.116 @@ -1213,7 +1224,8 @@ static int alloc_l3_table(struct page_in 2.117 if ( is_pv_32on64_domain(d) ) 2.118 memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e)); 2.119 2.120 - for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ ) 2.121 + for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; 2.122 + i++, partial = 0 ) 2.123 { 2.124 if ( is_pv_32bit_domain(d) && (i == 3) ) 2.125 { 2.126 @@ -1224,16 +1236,17 @@ static int alloc_l3_table(struct page_in 2.127 rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]), 2.128 PGT_l2_page_table | 2.129 PGT_pae_xen_l2, 2.130 - d, preemptible); 2.131 + d, partial, preemptible); 2.132 } 2.133 else if ( !is_guest_l3_slot(i) || 2.134 - (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 ) 2.135 + (rc = get_page_from_l3e(pl3e[i], pfn, d, 2.136 + partial, preemptible)) > 0 ) 2.137 continue; 2.138 2.139 if ( rc == -EAGAIN ) 2.140 { 2.141 page->nr_validated_ptes = i; 2.142 - page->partial_pte = 1; 2.143 + page->partial_pte = partial ?: 1; 2.144 } 2.145 else if ( rc == -EINTR && i ) 2.146 { 2.147 @@ -1257,7 +1270,7 @@ static int alloc_l3_table(struct page_in 2.148 if ( !is_guest_l3_slot(i) ) 2.149 continue; 2.150 unadjust_guest_l3e(pl3e[i], d); 2.151 - put_page_from_l3e(pl3e[i], pfn, 0); 2.152 + put_page_from_l3e(pl3e[i], pfn, 0, 0); 2.153 } 2.154 } 2.155 2.156 @@ -1272,18 +1285,20 @@ static int alloc_l4_table(struct page_in 2.157 unsigned long pfn = page_to_mfn(page); 2.158 l4_pgentry_t *pl4e = page_to_virt(page); 2.159 unsigned int i; 2.160 - int rc = 0; 2.161 - 2.162 - for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ ) 2.163 + int rc = 0, partial = page->partial_pte; 2.164 + 2.165 + for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; 2.166 + i++, partial = 0 ) 2.167 { 2.168 if ( !is_guest_l4_slot(d, i) || 2.169 - (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 ) 2.170 + (rc = get_page_from_l4e(pl4e[i], pfn, d, 2.171 + partial, preemptible)) > 0 ) 2.172 continue; 2.173 2.174 if ( rc == -EAGAIN ) 2.175 { 2.176 page->nr_validated_ptes = i; 2.177 - page->partial_pte = 1; 2.178 + page->partial_pte = partial ?: 1; 2.179 } 2.180 else if ( rc == -EINTR ) 2.181 { 2.182 @@ -1299,7 +1314,7 @@ static int alloc_l4_table(struct page_in 2.183 MEM_LOG("Failure in alloc_l4_table: entry %d", i); 2.184 while ( i-- > 0 ) 2.185 if ( is_guest_l4_slot(d, i) ) 2.186 - put_page_from_l4e(pl4e[i], pfn, 0); 2.187 + put_page_from_l4e(pl4e[i], pfn, 0, 0); 2.188 } 2.189 if ( rc < 0 ) 2.190 return rc; 2.191 @@ -1377,19 +1392,20 @@ static int free_l3_table(struct page_inf 2.192 struct domain *d = page_get_owner(page); 2.193 unsigned long pfn = page_to_mfn(page); 2.194 l3_pgentry_t *pl3e; 2.195 - unsigned int i = page->nr_validated_ptes - !page->partial_pte; 2.196 - int rc = 0; 2.197 + int rc = 0, partial = page->partial_pte; 2.198 + unsigned int i = page->nr_validated_ptes - !partial; 2.199 2.200 pl3e = map_domain_page(pfn); 2.201 2.202 do { 2.203 if ( is_guest_l3_slot(i) ) 2.204 { 2.205 - rc = put_page_from_l3e(pl3e[i], pfn, preemptible); 2.206 + rc = put_page_from_l3e(pl3e[i], pfn, partial, preemptible); 2.207 + if ( rc < 0 ) 2.208 + break; 2.209 + partial = 0; 2.210 if ( rc > 0 ) 2.211 continue; 2.212 - if ( rc ) 2.213 - break; 2.214 unadjust_guest_l3e(pl3e[i], d); 2.215 } 2.216 } while ( i-- ); 2.217 @@ -1399,7 +1415,7 @@ static int free_l3_table(struct page_inf 2.218 if ( rc == -EAGAIN ) 2.219 { 2.220 page->nr_validated_ptes = i; 2.221 - page->partial_pte = 1; 2.222 + page->partial_pte = partial ?: -1; 2.223 } 2.224 else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 ) 2.225 { 2.226 @@ -1416,18 +1432,21 @@ static int free_l4_table(struct page_inf 2.227 struct domain *d = page_get_owner(page); 2.228 unsigned long pfn = page_to_mfn(page); 2.229 l4_pgentry_t *pl4e = page_to_virt(page); 2.230 - unsigned int i = page->nr_validated_ptes - !page->partial_pte; 2.231 - int rc = 0; 2.232 + int rc = 0, partial = page->partial_pte; 2.233 + unsigned int i = page->nr_validated_ptes - !partial; 2.234 2.235 do { 2.236 if ( is_guest_l4_slot(d, i) ) 2.237 - rc = put_page_from_l4e(pl4e[i], pfn, preemptible); 2.238 - } while ( rc >= 0 && i-- ); 2.239 + rc = put_page_from_l4e(pl4e[i], pfn, partial, preemptible); 2.240 + if ( rc < 0 ) 2.241 + break; 2.242 + partial = 0; 2.243 + } while ( i-- ); 2.244 2.245 if ( rc == -EAGAIN ) 2.246 { 2.247 page->nr_validated_ptes = i; 2.248 - page->partial_pte = 1; 2.249 + page->partial_pte = partial ?: -1; 2.250 } 2.251 else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 ) 2.252 { 2.253 @@ -1703,7 +1722,7 @@ static int mod_l3_entry(l3_pgentry_t *pl 2.254 return rc ? 0 : -EFAULT; 2.255 } 2.256 2.257 - rc = get_page_from_l3e(nl3e, pfn, d, preemptible); 2.258 + rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible); 2.259 if ( unlikely(rc < 0) ) 2.260 return page_unlock(l3pg), rc; 2.261 rc = 0; 2.262 @@ -1732,7 +1751,7 @@ static int mod_l3_entry(l3_pgentry_t *pl 2.263 } 2.264 2.265 page_unlock(l3pg); 2.266 - put_page_from_l3e(ol3e, pfn, 0); 2.267 + put_page_from_l3e(ol3e, pfn, 0, 0); 2.268 return rc; 2.269 } 2.270 2.271 @@ -1781,7 +1800,7 @@ static int mod_l4_entry(l4_pgentry_t *pl 2.272 return rc ? 0 : -EFAULT; 2.273 } 2.274 2.275 - rc = get_page_from_l4e(nl4e, pfn, d, preemptible); 2.276 + rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible); 2.277 if ( unlikely(rc < 0) ) 2.278 return page_unlock(l4pg), rc; 2.279 rc = 0; 2.280 @@ -1802,7 +1821,7 @@ static int mod_l4_entry(l4_pgentry_t *pl 2.281 } 2.282 2.283 page_unlock(l4pg); 2.284 - put_page_from_l4e(ol4e, pfn, 0); 2.285 + put_page_from_l4e(ol4e, pfn, 0, 0); 2.286 return rc; 2.287 } 2.288 2.289 @@ -1866,6 +1885,10 @@ static int alloc_page_type(struct page_i 2.290 struct domain *owner = page_get_owner(page); 2.291 int rc; 2.292 2.293 + /* Obtain an extra reference to retain if we set PGT_partial. */ 2.294 + if ( preemptible && !get_page(page, owner) ) 2.295 + return -EINVAL; 2.296 + 2.297 /* A page table is dirtied when its type count becomes non-zero. */ 2.298 if ( likely(owner != NULL) ) 2.299 paging_mark_dirty(owner, page_to_mfn(page)); 2.300 @@ -1900,8 +1923,13 @@ static int alloc_page_type(struct page_i 2.301 if ( rc == -EAGAIN ) 2.302 { 2.303 page->u.inuse.type_info |= PGT_partial; 2.304 + return -EAGAIN; 2.305 } 2.306 - else if ( rc == -EINTR ) 2.307 + 2.308 + if ( preemptible ) 2.309 + put_page(page); 2.310 + 2.311 + if ( rc == -EINTR ) 2.312 { 2.313 ASSERT((page->u.inuse.type_info & 2.314 (PGT_count_mask|PGT_validated|PGT_partial)) == 1); 2.315 @@ -2029,8 +2057,13 @@ static int __put_final_page_type( 2.316 BUG_ON(rc != -EAGAIN); 2.317 wmb(); 2.318 page->u.inuse.type_info |= PGT_partial; 2.319 + /* Must skip put_page() below. */ 2.320 + preemptible = 0; 2.321 } 2.322 2.323 + if ( preemptible ) 2.324 + put_page(page); 2.325 + 2.326 return rc; 2.327 } 2.328 2.329 @@ -2040,6 +2073,10 @@ static int __put_page_type(struct page_i 2.330 { 2.331 unsigned long nx, x, y = page->u.inuse.type_info; 2.332 2.333 + /* Obtain an extra reference to retain if we set PGT_partial. */ 2.334 + if ( preemptible && !get_page(page, page_get_owner(page)) ) 2.335 + return -EINVAL; 2.336 + 2.337 for ( ; ; ) 2.338 { 2.339 x = y; 2.340 @@ -2061,6 +2098,8 @@ static int __put_page_type(struct page_i 2.341 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, 2.342 x, nx)) != x) ) 2.343 continue; 2.344 + if ( x & PGT_partial ) 2.345 + put_page(page); 2.346 /* We cleared the 'valid bit' so we do the clean up. */ 2.347 return __put_final_page_type(page, x, preemptible); 2.348 } 2.349 @@ -2081,9 +2120,16 @@ static int __put_page_type(struct page_i 2.350 break; 2.351 2.352 if ( preemptible && hypercall_preempt_check() ) 2.353 + { 2.354 + if ( preemptible ) 2.355 + put_page(page); 2.356 return -EINTR; 2.357 + } 2.358 } 2.359 2.360 + if ( preemptible ) 2.361 + put_page(page); 2.362 + 2.363 return 0; 2.364 } 2.365 2.366 @@ -2187,7 +2233,11 @@ static int __get_page_type(struct page_i 2.367 } 2.368 2.369 if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) 2.370 + { 2.371 + if ( (x & PGT_partial) && !(nx & PGT_partial) ) 2.372 + put_page(page); 2.373 break; 2.374 + } 2.375 2.376 if ( preemptible && hypercall_preempt_check() ) 2.377 return -EINTR; 2.378 @@ -2296,7 +2346,7 @@ int new_guest_cr3(unsigned long mfn) 2.379 #endif 2.380 okay = paging_mode_refcounts(d) 2.381 ? get_page_from_pagenr(mfn, d) 2.382 - : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0); 2.383 + : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0); 2.384 if ( unlikely(!okay) ) 2.385 { 2.386 MEM_LOG("Error while installing new baseptr %lx", mfn); 2.387 @@ -2540,7 +2590,7 @@ int do_mmuext_op( 2.388 if ( paging_mode_refcounts(FOREIGNDOM) ) 2.389 break; 2.390 2.391 - rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1); 2.392 + rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 0, 1); 2.393 okay = !rc; 2.394 if ( unlikely(!okay) ) 2.395 { 2.396 @@ -2621,7 +2671,7 @@ int do_mmuext_op( 2.397 okay = get_page_from_pagenr(mfn, d); 2.398 else 2.399 okay = !get_page_and_type_from_pagenr( 2.400 - mfn, PGT_root_page_table, d, 0); 2.401 + mfn, PGT_root_page_table, d, 0, 0); 2.402 if ( unlikely(!okay) ) 2.403 { 2.404 MEM_LOG("Error while installing new mfn %lx", mfn); 2.405 @@ -2728,7 +2778,7 @@ int do_mmuext_op( 2.406 unsigned char *ptr; 2.407 2.408 okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page, 2.409 - FOREIGNDOM, 0); 2.410 + FOREIGNDOM, 0, 0); 2.411 if ( unlikely(!okay) ) 2.412 { 2.413 MEM_LOG("Error while clearing mfn %lx", mfn); 2.414 @@ -2761,7 +2811,7 @@ int do_mmuext_op( 2.415 } 2.416 2.417 okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page, 2.418 - FOREIGNDOM, 0); 2.419 + FOREIGNDOM, 0, 0); 2.420 if ( unlikely(!okay) ) 2.421 { 2.422 put_page(mfn_to_page(src_mfn));
3.1 --- a/xen/include/asm-x86/mm.h Thu Oct 30 14:37:48 2008 +0000 3.2 +++ b/xen/include/asm-x86/mm.h Thu Oct 30 14:53:24 2008 +0000 3.3 @@ -61,12 +61,36 @@ struct page_info 3.4 /* 3.5 * When PGT_partial is true then this field is valid and indicates 3.6 * that PTEs in the range [0, @nr_validated_ptes) have been validated. 3.7 - * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been 3.8 - * partially validated. 3.9 + * An extra page reference must be acquired (or not dropped) whenever 3.10 + * PGT_partial gets set, and it must be dropped when the flag gets 3.11 + * cleared. This is so that a get() leaving a page in partially 3.12 + * validated state (where the caller would drop the reference acquired 3.13 + * due to the getting of the type [apparently] failing [-EAGAIN]) 3.14 + * would not accidentally result in a page left with zero general 3.15 + * reference count, but non-zero type reference count (possible when 3.16 + * the partial get() is followed immediately by domain destruction). 3.17 + * Likewise, the ownership of the single type reference for partially 3.18 + * (in-)validated pages is tied to this flag, i.e. the instance 3.19 + * setting the flag must not drop that reference, whereas the instance 3.20 + * clearing it will have to. 3.21 + * 3.22 + * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has 3.23 + * been partially validated. This implies that the general reference 3.24 + * to the page (acquired from get_page_from_lNe()) would be dropped 3.25 + * (again due to the apparent failure) and hence must be re-acquired 3.26 + * when resuming the validation, but must not be dropped when picking 3.27 + * up the page for invalidation. 3.28 + * 3.29 + * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has 3.30 + * been partially invalidated. This is basically the opposite case of 3.31 + * above, i.e. the general reference to the page was not dropped in 3.32 + * put_page_from_lNe() (due to the apparent failure), and hence it 3.33 + * must be dropped when the put operation is resumed (and completes), 3.34 + * but it must not be acquired if picking up the page for validation. 3.35 */ 3.36 struct { 3.37 u16 nr_validated_ptes; 3.38 - bool_t partial_pte; 3.39 + s8 partial_pte; 3.40 }; 3.41 3.42 /*