direct-io.hg

changeset 1018:80553bc5d3e8

bitkeeper revision 1.658 (3fe5ac16UXA85i7JkYQ0lVd6adEPDQ)

Merge scramble.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into scramble.cl.cam.ac.uk:/local/scratch/kaf24/xeno
author kaf24@scramble.cl.cam.ac.uk
date Sun Dec 21 14:20:06 2003 +0000 (2003-12-21)
parents 94cd24f6b95e 332a83939362
children 83b414c7559c
files xen/common/memory.c xen/drivers/block/xen_block.c xen/net/dev.c
line diff
     1.1 --- a/xen/common/memory.c	Sun Dec 21 01:06:08 2003 +0000
     1.2 +++ b/xen/common/memory.c	Sun Dec 21 14:20:06 2003 +0000
     1.3 @@ -172,7 +172,6 @@ unsigned int free_pfns;
     1.4  static struct {
     1.5  #define DOP_FLUSH_TLB   (1<<0) /* Flush the TLB.                 */
     1.6  #define DOP_RELOAD_LDT  (1<<1) /* Reload the LDT shadow mapping. */
     1.7 -#define DOP_RESTORE_CR0 (1<<2) /* Set the WP bit in CR0.         */
     1.8      unsigned long flags;
     1.9      unsigned long cr0;
    1.10  } deferred_op[NR_CPUS] __cacheline_aligned;
    1.11 @@ -316,7 +315,7 @@ static int get_page_from_pagenr(unsigned
    1.12      }
    1.13  
    1.14      if ( unlikely(!get_page(page, current)) &&
    1.15 -         ((current->domain != 0) || !dom0_get_page(page)) )
    1.16 +         unlikely((current->domain != 0) || !dom0_get_page(page)) )
    1.17      {
    1.18          MEM_LOG("Could not get page reference for pfn %08lx\n", page_nr);
    1.19          return 0;
    1.20 @@ -372,12 +371,10 @@ static int get_page_from_l1e(l1_pgentry_
    1.21  {
    1.22      ASSERT(l1_pgentry_val(l1e) & _PAGE_PRESENT);
    1.23  
    1.24 -    if ( unlikely((l1_pgentry_val(l1e) &
    1.25 -                   (_PAGE_GLOBAL|_PAGE_PAT))) )
    1.26 +    if ( unlikely((l1_pgentry_val(l1e) & (_PAGE_GLOBAL|_PAGE_PAT))) )
    1.27      {
    1.28          MEM_LOG("Bad L1 page type settings %04lx",
    1.29 -                l1_pgentry_val(l1e) &
    1.30 -                (_PAGE_GLOBAL|_PAGE_PAT));
    1.31 +                l1_pgentry_val(l1e) & (_PAGE_GLOBAL|_PAGE_PAT));
    1.32          return 0;
    1.33      }
    1.34  
    1.35 @@ -388,14 +385,10 @@ static int get_page_from_l1e(l1_pgentry_
    1.36              return 0;
    1.37          set_bit(_PGC_tlb_flush_on_type_change, 
    1.38                  &frame_table[l1_pgentry_to_pagenr(l1e)].count_and_flags);
    1.39 -    }
    1.40 -    else
    1.41 -    {
    1.42 -        if ( unlikely(!get_page_from_pagenr(l1_pgentry_to_pagenr(l1e))) )
    1.43 -            return 0;
    1.44 +        return 1;
    1.45      }
    1.46  
    1.47 -    return 1;
    1.48 +    return get_page_from_pagenr(l1_pgentry_to_pagenr(l1e));
    1.49  }
    1.50  
    1.51  
    1.52 @@ -412,9 +405,8 @@ static int get_page_from_l2e(l2_pgentry_
    1.53      }
    1.54  
    1.55      if ( unlikely(!get_page_and_type_from_pagenr(
    1.56 -        l2_pgentry_to_pagenr(l2e), PGT_l1_page_table)) &&
    1.57 -         unlikely(!check_linear_pagetable(l2e, pfn)) )
    1.58 -        return 0;
    1.59 +        l2_pgentry_to_pagenr(l2e), PGT_l1_page_table)) )
    1.60 +        return check_linear_pagetable(l2e, pfn);
    1.61  
    1.62      return 1;
    1.63  }
    1.64 @@ -422,12 +414,10 @@ static int get_page_from_l2e(l2_pgentry_
    1.65  
    1.66  static void put_page_from_l1e(l1_pgentry_t l1e)
    1.67  {
    1.68 -    struct pfn_info *page;
    1.69 +    struct pfn_info *page = &frame_table[l1_pgentry_to_pagenr(l1e)];
    1.70  
    1.71      ASSERT(l1_pgentry_val(l1e) & _PAGE_PRESENT);
    1.72  
    1.73 -    page = &frame_table[l1_pgentry_to_pagenr(l1e)];
    1.74 -
    1.75      if ( l1_pgentry_val(l1e) & _PAGE_RW )
    1.76      {
    1.77          put_page_and_type(page);
    1.78 @@ -613,34 +603,30 @@ static int mod_l2_entry(l2_pgentry_t *pl
    1.79      if ( l2_pgentry_val(nl2e) & _PAGE_PRESENT )
    1.80      {
    1.81          /* Differ in mapping (bits 12-31) or presence (bit 0)? */
    1.82 -        if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) != 0 )
    1.83 -        {
    1.84 -            if ( unlikely(!get_page_from_l2e(nl2e, pfn)) )
    1.85 -                return 0;
    1.86 +        if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) == 0 )
    1.87 +            return update_l2e(pl2e, ol2e, nl2e);
    1.88  
    1.89 -            if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
    1.90 -            {
    1.91 -                put_page_from_l2e(nl2e, pfn);
    1.92 -                return 0;
    1.93 -            }
    1.94 -
    1.95 -            if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT )
    1.96 -                put_page_from_l2e(ol2e, pfn);
    1.97 -        }
    1.98 -        else if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
    1.99 +        if ( unlikely(!get_page_from_l2e(nl2e, pfn)) )
   1.100 +            return 0;
   1.101 +        
   1.102 +        if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
   1.103          {
   1.104 +            put_page_from_l2e(nl2e, pfn);
   1.105              return 0;
   1.106          }
   1.107 -    }
   1.108 -    else
   1.109 -    {
   1.110 -        if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
   1.111 -            return 0;
   1.112 -
   1.113 +        
   1.114          if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT )
   1.115              put_page_from_l2e(ol2e, pfn);
   1.116 +        
   1.117 +        return 1;
   1.118      }
   1.119 -    
   1.120 +
   1.121 +    if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) )
   1.122 +        return 0;
   1.123 +
   1.124 +    if ( l2_pgentry_val(ol2e) & _PAGE_PRESENT )
   1.125 +        put_page_from_l2e(ol2e, pfn);
   1.126 +
   1.127      return 1;
   1.128  }
   1.129  
   1.130 @@ -652,26 +638,15 @@ static inline int update_l1e(l1_pgentry_
   1.131      unsigned long o = l1_pgentry_val(ol1e);
   1.132      unsigned long n = l1_pgentry_val(nl1e);
   1.133  
   1.134 -    while ( unlikely(cmpxchg_user(pl1e, o, n) != 0) )
   1.135 +    if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
   1.136 +         unlikely(o != l1_pgentry_val(ol1e)) )
   1.137      {
   1.138 -        unsigned int cpu = smp_processor_id();
   1.139 -        /* The CMPXCHG faulted -- maybe we need to clear the WP bit. */
   1.140 -        if ( deferred_op[cpu].flags & DOP_RESTORE_CR0 )
   1.141 -        {
   1.142 -            MEM_LOG("cmpxchg fault despite WP bit cleared\n");
   1.143 -            return 0;
   1.144 -        }
   1.145 -        deferred_op[cpu].cr0 = read_cr0();
   1.146 -        write_cr0(deferred_op[cpu].cr0 & ~X86_CR0_WP);
   1.147 -        deferred_op[cpu].flags |= DOP_RESTORE_CR0;
   1.148 +        MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n",
   1.149 +                l1_pgentry_val(ol1e), l1_pgentry_val(nl1e), o);
   1.150 +        return 0;
   1.151      }
   1.152  
   1.153 -    if ( o != l1_pgentry_val(ol1e))
   1.154 -        MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n",
   1.155 -                l1_pgentry_val(ol1e), l1_pgentry_val(nl1e), o);
   1.156 -
   1.157 -    /* The swap was successful if the old value we saw is equal to ol1e. */
   1.158 -    return (o == l1_pgentry_val(ol1e));
   1.159 +    return 1;
   1.160  }
   1.161  
   1.162  
   1.163 @@ -691,38 +666,31 @@ static int mod_l1_entry(l1_pgentry_t *pl
   1.164  
   1.165      if ( l1_pgentry_val(nl1e) & _PAGE_PRESENT )
   1.166      {
   1.167 -        /*
   1.168 -         * Differ in mapping (bits 12-31), writeable (bit 1), or
   1.169 -         * presence (bit 0)?
   1.170 -         */
   1.171 -        if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) != 0 )
   1.172 -        {
   1.173 -            if ( unlikely(!get_page_from_l1e(nl1e)) )
   1.174 -                return 0;
   1.175 +        /* Differ in mapping (bits 12-31), r/w (bit 1), or presence (bit 0)? */
   1.176 +        if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) == 0 )
   1.177 +            return update_l1e(pl1e, ol1e, nl1e);
   1.178  
   1.179 -            if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   1.180 -            {
   1.181 -                put_page_from_l1e(nl1e);
   1.182 -                return 0;
   1.183 -            }
   1.184 -
   1.185 -            if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT )
   1.186 -                put_page_from_l1e(ol1e);
   1.187 -        }
   1.188 -        else if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   1.189 +        if ( unlikely(!get_page_from_l1e(nl1e)) )
   1.190 +            return 0;
   1.191 +        
   1.192 +        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   1.193          {
   1.194 +            put_page_from_l1e(nl1e);
   1.195              return 0;
   1.196          }
   1.197 -    }
   1.198 -    else 
   1.199 -    {
   1.200 -        if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   1.201 -            return 0;
   1.202 -
   1.203 +        
   1.204          if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT )
   1.205              put_page_from_l1e(ol1e);
   1.206 +        
   1.207 +        return 1;
   1.208      }
   1.209  
   1.210 +    if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) )
   1.211 +        return 0;
   1.212 +    
   1.213 +    if ( l1_pgentry_val(ol1e) & _PAGE_PRESENT )
   1.214 +        put_page_from_l1e(ol1e);
   1.215 +
   1.216      return 1;
   1.217  }
   1.218  
   1.219 @@ -738,12 +706,16 @@ int alloc_page_type(struct pfn_info *pag
   1.220           * NB. 'p' may no longer be valid by time we dereference it, so
   1.221           * p->processor might be garbage. We clamp it, just in case.
   1.222           */
   1.223 -        if ( !test_bit(_PGC_zombie, &page->count_and_flags) &&
   1.224 -             unlikely(NEED_FLUSH(tlbflush_time[(p->processor)&(NR_CPUS-1)], 
   1.225 -                                 page->tlbflush_timestamp)) )
   1.226 +        if ( likely(!test_bit(_PGC_zombie, &page->count_and_flags)) )
   1.227          {
   1.228 -            perfc_incr(need_flush_tlb_flush);
   1.229 -            flush_tlb_cpu(p->processor);
   1.230 +            unsigned int cpu = p->processor;
   1.231 +            if ( likely(cpu <= smp_num_cpus) &&
   1.232 +                 unlikely(NEED_FLUSH(tlbflush_time[cpu],
   1.233 +                                     page->tlbflush_timestamp)) )
   1.234 +            {
   1.235 +                perfc_incr(need_flush_tlb_flush);
   1.236 +                flush_tlb_cpu(cpu);
   1.237 +            }
   1.238          }
   1.239      }
   1.240  
   1.241 @@ -1053,9 +1025,6 @@ int do_mmu_update(mmu_update_t *ureqs, i
   1.242      if ( flags & DOP_RELOAD_LDT )
   1.243          (void)map_ldt_shadow_page(0);
   1.244  
   1.245 -    if ( unlikely(flags & DOP_RESTORE_CR0) )
   1.246 -        write_cr0(deferred_op[cpu].cr0);
   1.247 -
   1.248      return rc;
   1.249  }
   1.250  
   1.251 @@ -1087,9 +1056,6 @@ int do_update_va_mapping(unsigned long p
   1.252  
   1.253      if ( unlikely(defer_flags & DOP_RELOAD_LDT) )
   1.254          (void)map_ldt_shadow_page(0);
   1.255 -
   1.256 -    if ( unlikely(defer_flags & DOP_RESTORE_CR0) )
   1.257 -        write_cr0(deferred_op[cpu].cr0);
   1.258 -
   1.259 +    
   1.260      return err;
   1.261  }
     2.1 --- a/xen/drivers/block/xen_block.c	Sun Dec 21 01:06:08 2003 +0000
     2.2 +++ b/xen/drivers/block/xen_block.c	Sun Dec 21 14:20:06 2003 +0000
     2.3 @@ -433,7 +433,8 @@ static void dispatch_rw_block_io(struct 
     2.4      phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
     2.5  
     2.6      /* Check that number of segments is sane. */
     2.7 -    if ( (req->nr_segments == 0) || (req->nr_segments > MAX_BLK_SEGS) )
     2.8 +    if ( unlikely(req->nr_segments == 0) || 
     2.9 +         unlikely(req->nr_segments > MAX_BLK_SEGS) )
    2.10      {
    2.11          DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
    2.12          goto bad_descriptor;
    2.13 @@ -450,18 +451,12 @@ static void dispatch_rw_block_io(struct 
    2.14          buffer   = req->buffer_and_sects[i] & ~0x1FF;
    2.15          nr_sects = req->buffer_and_sects[i] &  0x1FF;
    2.16  
    2.17 -        if ( nr_sects == 0 )
    2.18 +        if ( unlikely(nr_sects == 0) )
    2.19          {
    2.20              DPRINTK("zero-sized data request\n");
    2.21              goto bad_descriptor;
    2.22          }
    2.23  
    2.24 -        if ( !lock_buffer(p, buffer, nr_sects<<9, (operation==READ)) )
    2.25 -	{
    2.26 -            DPRINTK("invalid buffer\n");
    2.27 -            goto bad_descriptor;
    2.28 -	}
    2.29 -
    2.30  	phys_seg[nr_psegs].dev           = req->device;
    2.31  	phys_seg[nr_psegs].sector_number = req->sector_number + tot_sects;
    2.32  	phys_seg[nr_psegs].buffer        = buffer;
    2.33 @@ -480,7 +475,6 @@ static void dispatch_rw_block_io(struct 
    2.34                          req->sector_number + tot_sects, 
    2.35                          req->sector_number + tot_sects + nr_sects, 
    2.36                          req->device); 
    2.37 -                unlock_buffer(buffer, nr_sects<<9, (operation==READ));
    2.38                  goto bad_descriptor;
    2.39              }
    2.40  
    2.41 @@ -494,7 +488,22 @@ static void dispatch_rw_block_io(struct 
    2.42          }
    2.43  	 
    2.44          nr_psegs += new_segs;
    2.45 -        if ( nr_psegs >= (MAX_BLK_SEGS*2) ) BUG();
    2.46 +        ASSERT(nr_psegs <= MAX_BLK_SEGS*2);
    2.47 +    }
    2.48 +
    2.49 +    for ( i = 0; i < nr_psegs; i++ )
    2.50 +    {
    2.51 +        if ( unlikely(!lock_buffer(p, phys_seg[i].buffer, 
    2.52 +                                   phys_seg[i].nr_sects << 9,
    2.53 +                                   operation==READ)) )
    2.54 +	{
    2.55 +            DPRINTK("invalid buffer\n");
    2.56 +            while ( i-- > 0 )
    2.57 +                unlock_buffer(phys_seg[i].buffer, 
    2.58 +                              phys_seg[i].nr_sects << 9,
    2.59 +                              operation==READ);
    2.60 +            goto bad_descriptor;
    2.61 +	}
    2.62      }
    2.63  
    2.64      atomic_inc(&nr_pending);
    2.65 @@ -512,8 +521,9 @@ static void dispatch_rw_block_io(struct 
    2.66      for ( i = 0; i < nr_psegs; i++ )
    2.67      {
    2.68          bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
    2.69 -        if ( bh == NULL ) panic("bh is null\n");
    2.70 -        memset (bh, 0, sizeof (struct buffer_head));
    2.71 +        if ( unlikely(bh == NULL) )
    2.72 +            panic("bh is null\n");
    2.73 +        memset(bh, 0, sizeof (struct buffer_head));
    2.74      
    2.75          bh->b_size          = phys_seg[i].nr_sects << 9;
    2.76          bh->b_dev           = phys_seg[i].dev;
     3.1 --- a/xen/net/dev.c	Sun Dec 21 01:06:08 2003 +0000
     3.2 +++ b/xen/net/dev.c	Sun Dec 21 14:20:06 2003 +0000
     3.3 @@ -522,6 +522,8 @@ void deliver_packet(struct sk_buff *skb,
     3.4      old_page = &frame_table[rx->buf_pfn];
     3.5      new_page = skb->pf;
     3.6      
     3.7 +    skb->pf = old_page;
     3.8 +
     3.9      ptep = map_domain_mem(rx->pte_ptr);
    3.10  
    3.11      new_page->u.domain = p;
    3.12 @@ -541,6 +543,8 @@ void deliver_packet(struct sk_buff *skb,
    3.13                            ((new_page - frame_table) << PAGE_SHIFT))) != pte )
    3.14      {
    3.15          unmap_domain_mem(ptep);
    3.16 +        /* At some point maybe should have 'new_page' in error response. */
    3.17 +        put_page_and_type(new_page);
    3.18          status = RING_STATUS_BAD_PAGE;
    3.19          goto out;
    3.20      }
    3.21 @@ -550,9 +554,6 @@ void deliver_packet(struct sk_buff *skb,
    3.22      
    3.23      unmap_domain_mem(ptep);
    3.24  
    3.25 -    /* Our skbuff now points at the guest's old frame. */
    3.26 -    skb->pf = old_page;
    3.27 -
    3.28      /* Updates must happen before releasing the descriptor. */
    3.29      smp_wmb();
    3.30  
    3.31 @@ -2078,17 +2079,13 @@ static void get_rx_bufs(net_vif_t *vif)
    3.32           * just once as a writeable page.
    3.33           */
    3.34          if ( unlikely(buf_page->u.domain != p) ||
    3.35 -             unlikely(!test_and_clear_bit(_PGC_allocated, 
    3.36 -                                          &buf_page->count_and_flags)) ||
    3.37               unlikely(cmpxchg(&buf_page->type_and_flags, 
    3.38                                PGT_writeable_page|PGT_validated|1,
    3.39                                0) != (PGT_writeable_page|PGT_validated|1)) )
    3.40          {
    3.41              DPRINTK("Bad domain or page mapped writeable more than once.\n");
    3.42 -            if ( buf_page->u.domain == p )
    3.43 -                set_bit(_PGC_allocated, &buf_page->count_and_flags);
    3.44 -            if ( unlikely(cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) !=
    3.45 -                          (pte & ~_PAGE_PRESENT)) )
    3.46 +            if ( cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) != 
    3.47 +                 (pte & ~_PAGE_PRESENT) )
    3.48                  put_page_and_type(buf_page);
    3.49              make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
    3.50              goto rx_unmap_and_continue;
    3.51 @@ -2099,11 +2096,17 @@ static void get_rx_bufs(net_vif_t *vif)
    3.52           * The final count should be 2, because of PGC_allocated.
    3.53           */
    3.54          if ( unlikely(cmpxchg(&buf_page->count_and_flags, 
    3.55 -                              PGC_tlb_flush_on_type_change | 2, 0) != 
    3.56 -                      (PGC_tlb_flush_on_type_change | 2)) )
    3.57 +                              PGC_allocated | PGC_tlb_flush_on_type_change | 2,
    3.58 +                              0) != 
    3.59 +                      (PGC_allocated | PGC_tlb_flush_on_type_change | 2)) )
    3.60          {
    3.61 -            DPRINTK("Page held more than once\n");
    3.62 -            /* Leave the page unmapped at 'ptep'. Stoopid domain! */
    3.63 +            DPRINTK("Page held more than once %08lx\n", 
    3.64 +                    buf_page->count_and_flags);
    3.65 +            if ( get_page_type(buf_page, PGT_writeable_page) &&
    3.66 +                 (cmpxchg(ptep, pte & ~_PAGE_PRESENT, pte) !=
    3.67 +                  (pte & ~_PAGE_PRESENT)) )
    3.68 +                put_page_and_type(buf_page);
    3.69 +            /* NB. If we fail to remap the page, we should probably flag it. */
    3.70              make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
    3.71              goto rx_unmap_and_continue;
    3.72          }