direct-io.hg
changeset 1018:80553bc5d3e8
bitkeeper revision 1.658 (3fe5ac16UXA85i7JkYQ0lVd6adEPDQ)
Merge scramble.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into scramble.cl.cam.ac.uk:/local/scratch/kaf24/xeno
Merge scramble.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into scramble.cl.cam.ac.uk:/local/scratch/kaf24/xeno
author | kaf24@scramble.cl.cam.ac.uk |
---|---|
date | Sun Dec 21 14:20:06 2003 +0000 (2003-12-21) |
parents | 94cd24f6b95e 332a83939362 |
children | 83b414c7559c |
files | xen/common/memory.c xen/drivers/block/xen_block.c xen/net/dev.c |
line diff
1.1 --- a/xen/common/memory.c Sun Dec 21 01:06:08 2003 +0000 1.2 +++ b/xen/common/memory.c Sun Dec 21 14:20:06 2003 +0000 1.3 @@ -172,7 +172,6 @@ unsigned int free_pfns; 1.4 static struct { 1.5 #define DOP_FLUSH_TLB (1<<0) /* Flush the TLB. */ 1.6 #define DOP_RELOAD_LDT (1<<1) /* Reload the LDT shadow mapping. */ 1.7 -#define DOP_RESTORE_CR0 (1<<2) /* Set the WP bit in CR0. */ 1.8 unsigned long flags; 1.9 unsigned long cr0; 1.10 } deferred_op[NR_CPUS] __cacheline_aligned; 1.11 @@ -316,7 +315,7 @@ static int get_page_from_pagenr(unsigned 1.12 } 1.13 1.14 if ( unlikely(!get_page(page, current)) && 1.15 - ((current->domain != 0) || !dom0_get_page(page)) ) 1.16 + unlikely((current->domain != 0) || !dom0_get_page(page)) ) 1.17 { 1.18 MEM_LOG("Could not get page reference for pfn %08lx\n", page_nr); 1.19 return 0; 1.20 @@ -372,12 +371,10 @@ static int get_page_from_l1e(l1_pgentry_ 1.21 { 1.22 ASSERT(l1_pgentry_val(l1e) & _PAGE_PRESENT); 1.23 1.24 - if ( unlikely((l1_pgentry_val(l1e) & 1.25 - (_PAGE_GLOBAL|_PAGE_PAT))) ) 1.26 + if ( unlikely((l1_pgentry_val(l1e) & (_PAGE_GLOBAL|_PAGE_PAT))) ) 1.27 { 1.28 MEM_LOG("Bad L1 page type settings %04lx", 1.29 - l1_pgentry_val(l1e) & 1.30 - (_PAGE_GLOBAL|_PAGE_PAT)); 1.31 + l1_pgentry_val(l1e) & (_PAGE_GLOBAL|_PAGE_PAT)); 1.32 return 0; 1.33 } 1.34 1.35 @@ -388,14 +385,10 @@ static int get_page_from_l1e(l1_pgentry_ 1.36 return 0; 1.37 set_bit(_PGC_tlb_flush_on_type_change, 1.38 &frame_table[l1_pgentry_to_pagenr(l1e)].count_and_flags); 1.39 - } 1.40 - else 1.41 - { 1.42 - if ( unlikely(!get_page_from_pagenr(l1_pgentry_to_pagenr(l1e))) ) 1.43 - return 0; 1.44 + return 1; 1.45 } 1.46 1.47 - return 1; 1.48 + return get_page_from_pagenr(l1_pgentry_to_pagenr(l1e)); 1.49 } 1.50 1.51 1.52 @@ -412,9 +405,8 @@ static int get_page_from_l2e(l2_pgentry_ 1.53 } 1.54 1.55 if ( unlikely(!get_page_and_type_from_pagenr( 1.56 - l2_pgentry_to_pagenr(l2e), PGT_l1_page_table)) && 1.57 - unlikely(!check_linear_pagetable(l2e, pfn)) ) 1.58 - return 0; 1.59 + l2_pgentry_to_pagenr(l2e), PGT_l1_page_table)) ) 1.60 + return check_linear_pagetable(l2e, pfn); 1.61 1.62 return 1; 1.63 } 1.64 @@ -422,12 +414,10 @@ static int get_page_from_l2e(l2_pgentry_ 1.65 1.66 static void put_page_from_l1e(l1_pgentry_t l1e) 1.67 { 1.68 - struct pfn_info *page; 1.69 + struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)]; 1.70 1.71 ASSERT(l1_pgentry_val(l1e) & _PAGE_PRESENT); 1.72 1.73 - page = &frame_table[l1_pgentry_to_pagenr(l1e)]; 1.74 - 1.75 if ( l1_pgentry_val(l1e) & _PAGE_RW ) 1.76 { 1.77 put_page_and_type(page); 1.78 @@ -613,34 +603,30 @@ static int mod_l2_entry(l2_pgentry_t *pl 1.79 if ( l2_pgentry_val(nl2e) & _PAGE_PRESENT ) 1.80 { 1.81 /* Differ in mapping (bits 12-31) or presence (bit 0)? */ 1.82 - if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) != 0 ) 1.83 - { 1.84 - if ( unlikely(!get_page_from_l2e(nl2e, pfn)) ) 1.85 - return 0; 1.86 + if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) == 0 ) 1.87 + return update_l2e(pl2e, ol2e, nl2e); 1.88 1.89 - if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) 1.90 - { 1.91 - put_page_from_l2e(nl2e, pfn); 1.92 - return 0; 1.93 - } 1.94 - 1.95 - if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT ) 1.96 - put_page_from_l2e(ol2e, pfn); 1.97 - } 1.98 - else if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) 1.99 + if ( unlikely(!get_page_from_l2e(nl2e, pfn)) ) 1.100 + return 0; 1.101 + 1.102 + if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) 1.103 { 1.104 + put_page_from_l2e(nl2e, pfn); 1.105 return 0; 1.106 } 1.107 - } 1.108 - else 1.109 - { 1.110 - if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) 1.111 - return 0; 1.112 - 1.113 + 1.114 if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT ) 1.115 put_page_from_l2e(ol2e, pfn); 1.116 + 1.117 + return 1; 1.118 } 1.119 - 1.120 + 1.121 + if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) 1.122 + return 0; 1.123 + 1.124 + if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT ) 1.125 + put_page_from_l2e(ol2e, pfn); 1.126 + 1.127 return 1; 1.128 } 1.129 1.130 @@ -652,26 +638,15 @@ static inline int update_l1e(l1_pgentry_ 1.131 unsigned long o = l1_pgentry_val(ol1e); 1.132 unsigned long n = l1_pgentry_val(nl1e); 1.133 1.134 - while ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ) 1.135 + if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) || 1.136 + unlikely(o != l1_pgentry_val(ol1e)) ) 1.137 { 1.138 - unsigned int cpu = smp_processor_id(); 1.139 - /* The CMPXCHG faulted -- maybe we need to clear the WP bit. */ 1.140 - if ( deferred_op[cpu].flags & DOP_RESTORE_CR0 ) 1.141 - { 1.142 - MEM_LOG("cmpxchg fault despite WP bit cleared\n"); 1.143 - return 0; 1.144 - } 1.145 - deferred_op[cpu].cr0 = read_cr0(); 1.146 - write_cr0(deferred_op[cpu].cr0 & ~X86_CR0_WP); 1.147 - deferred_op[cpu].flags |= DOP_RESTORE_CR0; 1.148 + MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n", 1.149 + l1_pgentry_val(ol1e), l1_pgentry_val(nl1e), o); 1.150 + return 0; 1.151 } 1.152 1.153 - if ( o != l1_pgentry_val(ol1e)) 1.154 - MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n", 1.155 - l1_pgentry_val(ol1e), l1_pgentry_val(nl1e), o); 1.156 - 1.157 - /* The swap was successful if the old value we saw is equal to ol1e. */ 1.158 - return (o == l1_pgentry_val(ol1e)); 1.159 + return 1; 1.160 } 1.161 1.162 1.163 @@ -691,38 +666,31 @@ static int mod_l1_entry(l1_pgentry_t *pl 1.164 1.165 if ( l1_pgentry_val(nl1e) & _PAGE_PRESENT ) 1.166 { 1.167 - /* 1.168 - * Differ in mapping (bits 12-31), writeable (bit 1), or 1.169 - * presence (bit 0)? 1.170 - */ 1.171 - if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) != 0 ) 1.172 - { 1.173 - if ( unlikely(!get_page_from_l1e(nl1e)) ) 1.174 - return 0; 1.175 + /* Differ in mapping (bits 12-31), r/w (bit 1), or presence (bit 0)? */ 1.176 + if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) == 0 ) 1.177 + return update_l1e(pl1e, ol1e, nl1e); 1.178 1.179 - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) 1.180 - { 1.181 - put_page_from_l1e(nl1e); 1.182 - return 0; 1.183 - } 1.184 - 1.185 - if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT ) 1.186 - put_page_from_l1e(ol1e); 1.187 - } 1.188 - else if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) 1.189 + if ( unlikely(!get_page_from_l1e(nl1e)) ) 1.190 + return 0; 1.191 + 1.192 + if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) 1.193 { 1.194 + put_page_from_l1e(nl1e); 1.195 return 0; 1.196 } 1.197 - } 1.198 - else 1.199 - { 1.200 - if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) 1.201 - return 0; 1.202 - 1.203 + 1.204 if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT ) 1.205 put_page_from_l1e(ol1e); 1.206 + 1.207 + return 1; 1.208 } 1.209 1.210 + if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) 1.211 + return 0; 1.212 + 1.213 + if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT ) 1.214 + put_page_from_l1e(ol1e); 1.215 + 1.216 return 1; 1.217 } 1.218 1.219 @@ -738,12 +706,16 @@ int alloc_page_type(struct pfn_info *pag 1.220 * NB. 'p' may no longer be valid by time we dereference it, so 1.221 * p->processor might be garbage. We clamp it, just in case. 1.222 */ 1.223 - if ( !test_bit(_PGC_zombie, &page->count_and_flags) && 1.224 - unlikely(NEED_FLUSH(tlbflush_time[(p->processor)&(NR_CPUS-1)], 1.225 - page->tlbflush_timestamp)) ) 1.226 + if ( likely(!test_bit(_PGC_zombie, &page->count_and_flags)) ) 1.227 { 1.228 - perfc_incr(need_flush_tlb_flush); 1.229 - flush_tlb_cpu(p->processor); 1.230 + unsigned int cpu = p->processor; 1.231 + if ( likely(cpu <= smp_num_cpus) && 1.232 + unlikely(NEED_FLUSH(tlbflush_time[cpu], 1.233 + page->tlbflush_timestamp)) ) 1.234 + { 1.235 + perfc_incr(need_flush_tlb_flush); 1.236 + flush_tlb_cpu(cpu); 1.237 + } 1.238 } 1.239 } 1.240 1.241 @@ -1053,9 +1025,6 @@ int do_mmu_update(mmu_update_t *ureqs, i 1.242 if ( flags & DOP_RELOAD_LDT ) 1.243 (void)map_ldt_shadow_page(0); 1.244 1.245 - if ( unlikely(flags & DOP_RESTORE_CR0) ) 1.246 - write_cr0(deferred_op[cpu].cr0); 1.247 - 1.248 return rc; 1.249 } 1.250 1.251 @@ -1087,9 +1056,6 @@ int do_update_va_mapping(unsigned long p 1.252 1.253 if ( unlikely(defer_flags & DOP_RELOAD_LDT) ) 1.254 (void)map_ldt_shadow_page(0); 1.255 - 1.256 - if ( unlikely(defer_flags & DOP_RESTORE_CR0) ) 1.257 - write_cr0(deferred_op[cpu].cr0); 1.258 - 1.259 + 1.260 return err; 1.261 }
2.1 --- a/xen/drivers/block/xen_block.c Sun Dec 21 01:06:08 2003 +0000 2.2 +++ b/xen/drivers/block/xen_block.c Sun Dec 21 14:20:06 2003 +0000 2.3 @@ -433,7 +433,8 @@ static void dispatch_rw_block_io(struct 2.4 phys_seg_t phys_seg[MAX_BLK_SEGS * 2]; 2.5 2.6 /* Check that number of segments is sane. */ 2.7 - if ( (req->nr_segments == 0) || (req->nr_segments > MAX_BLK_SEGS) ) 2.8 + if ( unlikely(req->nr_segments == 0) || 2.9 + unlikely(req->nr_segments > MAX_BLK_SEGS) ) 2.10 { 2.11 DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments); 2.12 goto bad_descriptor; 2.13 @@ -450,18 +451,12 @@ static void dispatch_rw_block_io(struct 2.14 buffer = req->buffer_and_sects[i] & ~0x1FF; 2.15 nr_sects = req->buffer_and_sects[i] & 0x1FF; 2.16 2.17 - if ( nr_sects == 0 ) 2.18 + if ( unlikely(nr_sects == 0) ) 2.19 { 2.20 DPRINTK("zero-sized data request\n"); 2.21 goto bad_descriptor; 2.22 } 2.23 2.24 - if ( !lock_buffer(p, buffer, nr_sects<<9, (operation==READ)) ) 2.25 - { 2.26 - DPRINTK("invalid buffer\n"); 2.27 - goto bad_descriptor; 2.28 - } 2.29 - 2.30 phys_seg[nr_psegs].dev = req->device; 2.31 phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects; 2.32 phys_seg[nr_psegs].buffer = buffer; 2.33 @@ -480,7 +475,6 @@ static void dispatch_rw_block_io(struct 2.34 req->sector_number + tot_sects, 2.35 req->sector_number + tot_sects + nr_sects, 2.36 req->device); 2.37 - unlock_buffer(buffer, nr_sects<<9, (operation==READ)); 2.38 goto bad_descriptor; 2.39 } 2.40 2.41 @@ -494,7 +488,22 @@ static void dispatch_rw_block_io(struct 2.42 } 2.43 2.44 nr_psegs += new_segs; 2.45 - if ( nr_psegs >= (MAX_BLK_SEGS*2) ) BUG(); 2.46 + ASSERT(nr_psegs <= MAX_BLK_SEGS*2); 2.47 + } 2.48 + 2.49 + for ( i = 0; i < nr_psegs; i++ ) 2.50 + { 2.51 + if ( unlikely(!lock_buffer(p, phys_seg[i].buffer, 2.52 + phys_seg[i].nr_sects << 9, 2.53 + operation==READ)) ) 2.54 + { 2.55 + DPRINTK("invalid buffer\n"); 2.56 + while ( i-- > 0 ) 2.57 + unlock_buffer(phys_seg[i].buffer, 2.58 + phys_seg[i].nr_sects << 9, 2.59 + operation==READ); 2.60 + goto bad_descriptor; 2.61 + } 2.62 } 2.63 2.64 atomic_inc(&nr_pending); 2.65 @@ -512,8 +521,9 @@ static void dispatch_rw_block_io(struct 2.66 for ( i = 0; i < nr_psegs; i++ ) 2.67 { 2.68 bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL); 2.69 - if ( bh == NULL ) panic("bh is null\n"); 2.70 - memset (bh, 0, sizeof (struct buffer_head)); 2.71 + if ( unlikely(bh == NULL) ) 2.72 + panic("bh is null\n"); 2.73 + memset(bh, 0, sizeof (struct buffer_head)); 2.74 2.75 bh->b_size = phys_seg[i].nr_sects << 9; 2.76 bh->b_dev = phys_seg[i].dev;
3.1 --- a/xen/net/dev.c Sun Dec 21 01:06:08 2003 +0000 3.2 +++ b/xen/net/dev.c Sun Dec 21 14:20:06 2003 +0000 3.3 @@ -522,6 +522,8 @@ void deliver_packet(struct sk_buff *skb, 3.4 old_page = &frame_table[rx->buf_pfn]; 3.5 new_page = skb->pf; 3.6 3.7 + skb->pf = old_page; 3.8 + 3.9 ptep = map_domain_mem(rx->pte_ptr); 3.10 3.11 new_page->u.domain = p; 3.12 @@ -541,6 +543,8 @@ void deliver_packet(struct sk_buff *skb, 3.13 ((new_page - frame_table) << PAGE_SHIFT))) != pte ) 3.14 { 3.15 unmap_domain_mem(ptep); 3.16 + /* At some point maybe should have 'new_page' in error response. */ 3.17 + put_page_and_type(new_page); 3.18 status = RING_STATUS_BAD_PAGE; 3.19 goto out; 3.20 } 3.21 @@ -550,9 +554,6 @@ void deliver_packet(struct sk_buff *skb, 3.22 3.23 unmap_domain_mem(ptep); 3.24 3.25 - /* Our skbuff now points at the guest's old frame. */ 3.26 - skb->pf = old_page; 3.27 - 3.28 /* Updates must happen before releasing the descriptor. */ 3.29 smp_wmb(); 3.30 3.31 @@ -2078,17 +2079,13 @@ static void get_rx_bufs(net_vif_t *vif) 3.32 * just once as a writeable page. 3.33 */ 3.34 if ( unlikely(buf_page->u.domain != p) || 3.35 - unlikely(!test_and_clear_bit(_PGC_allocated, 3.36 - &buf_page->count_and_flags)) || 3.37 unlikely(cmpxchg(&buf_page->type_and_flags, 3.38 PGT_writeable_page|PGT_validated|1, 3.39 0) != (PGT_writeable_page|PGT_validated|1)) ) 3.40 { 3.41 DPRINTK("Bad domain or page mapped writeable more than once.\n"); 3.42 - if ( buf_page->u.domain == p ) 3.43 - set_bit(_PGC_allocated, &buf_page->count_and_flags); 3.44 - if ( unlikely(cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) != 3.45 - (pte & ~_PAGE_PRESENT)) ) 3.46 + if ( cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) != 3.47 + (pte & ~_PAGE_PRESENT) ) 3.48 put_page_and_type(buf_page); 3.49 make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0); 3.50 goto rx_unmap_and_continue; 3.51 @@ -2099,11 +2096,17 @@ static void get_rx_bufs(net_vif_t *vif) 3.52 * The final count should be 2, because of PGC_allocated. 3.53 */ 3.54 if ( unlikely(cmpxchg(&buf_page->count_and_flags, 3.55 - PGC_tlb_flush_on_type_change | 2, 0) != 3.56 - (PGC_tlb_flush_on_type_change | 2)) ) 3.57 + PGC_allocated | PGC_tlb_flush_on_type_change | 2, 3.58 + 0) != 3.59 + (PGC_allocated | PGC_tlb_flush_on_type_change | 2)) ) 3.60 { 3.61 - DPRINTK("Page held more than once\n"); 3.62 - /* Leave the page unmapped at 'ptep'. Stoopid domain! */ 3.63 + DPRINTK("Page held more than once %08lx\n", 3.64 + buf_page->count_and_flags); 3.65 + if ( get_page_type(buf_page, PGT_writeable_page) && 3.66 + (cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) != 3.67 + (pte & ~_PAGE_PRESENT)) ) 3.68 + put_page_and_type(buf_page); 3.69 + /* NB. If we fail to remap the page, we should probably flag it. */ 3.70 make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0); 3.71 goto rx_unmap_and_continue; 3.72 }