ia64/xen-unstable

changeset 1421:abde9435cdb4

bitkeeper revision 1.925 (40bcb315tJAvhGVz2HeWAUX326czSg)

Device-driver fixes and improvements.
author kaf24@scramble.cl.cam.ac.uk
date Tue Jun 01 16:47:17 2004 +0000 (2004-06-01)
parents e46faf0b4305
children 44389f729723
files xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c
line diff
     1.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h	Mon May 31 22:43:14 2004 +0000
     1.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h	Tue Jun 01 16:47:17 2004 +0000
     1.3 @@ -88,14 +88,6 @@ void vbd_destroy(blkif_be_vbd_destroy_t 
     1.4  int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
     1.5  void destroy_all_vbds(blkif_t *blkif);
     1.6  
     1.7 -typedef struct {
     1.8 -    blkif_t       *blkif;
     1.9 -    unsigned long  id;
    1.10 -    atomic_t       pendcnt;
    1.11 -    unsigned short operation;
    1.12 -    int            status;
    1.13 -} pending_req_t;
    1.14 -
    1.15  /* Describes a [partial] disk extent (part of a block io request) */
    1.16  typedef struct {
    1.17      unsigned short dev;
     2.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c	Mon May 31 22:43:14 2004 +0000
     2.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c	Tue Jun 01 16:47:17 2004 +0000
     2.3 @@ -24,22 +24,39 @@
     2.4  #define MAX_PENDING_REQS 64
     2.5  #define BATCH_PER_DOMAIN 16
     2.6  
     2.7 +/*
     2.8 + * NB. We place a page of padding between each buffer page to avoid incorrect
     2.9 + * merging of requests by the IDE and SCSI merging routines. Otherwise, two
    2.10 + * adjacent buffers in a scatter-gather request would have adjacent page
    2.11 + * numbers: since the merge routines don't realise that this is in *pseudophys*
    2.12 + * space, not real space, they may collapse the s-g elements!
    2.13 + */
    2.14  static unsigned long mmap_vstart;
    2.15  #define MMAP_PAGES_PER_REQUEST \
    2.16 -    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
    2.17 +    (2 * (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1))
    2.18  #define MMAP_PAGES             \
    2.19      (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
    2.20  #define MMAP_VADDR(_req,_seg)                        \
    2.21      (mmap_vstart +                                   \
    2.22       ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
    2.23 -     ((_seg) * PAGE_SIZE))
    2.24 +     ((_seg) * 2 * PAGE_SIZE))
    2.25  
    2.26  /*
    2.27   * Each outstanding request that we've passed to the lower device layers has a 
    2.28   * 'pending_req' allocated to it. Each buffer_head that completes decrements 
    2.29   * the pendcnt towards zero. When it hits zero, the specified domain has a 
    2.30   * response queued for it, with the saved 'id' passed back.
    2.31 - * 
    2.32 + */
    2.33 +typedef struct {
    2.34 +    blkif_t       *blkif;
    2.35 +    unsigned long  id;
    2.36 +    int            nr_pages;
    2.37 +    atomic_t       pendcnt;
    2.38 +    unsigned short operation;
    2.39 +    int            status;
    2.40 +} pending_req_t;
    2.41 +
    2.42 +/*
    2.43   * We can't allocate pending_req's in order, since they may complete out of 
    2.44   * order. We therefore maintain an allocation ring. This ring also indicates 
    2.45   * when enough work has been passed down -- at that point the allocation ring 
    2.46 @@ -62,6 +79,23 @@ static void dispatch_rw_block_io(blkif_t
    2.47  static void make_response(blkif_t *blkif, unsigned long id, 
    2.48                            unsigned short op, int st);
    2.49  
    2.50 +static void fast_flush_area(int idx, int nr_pages)
    2.51 +{
    2.52 +    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
    2.53 +    int               i;
    2.54 +
    2.55 +    for ( i = 0; i < nr_pages; i++ )
    2.56 +    {
    2.57 +        mcl[i].op = __HYPERVISOR_update_va_mapping;
    2.58 +        mcl[i].args[0] = MMAP_VADDR(idx, i) >> PAGE_SHIFT;
    2.59 +        mcl[i].args[1] = 0;
    2.60 +        mcl[i].args[2] = 0;
    2.61 +    }
    2.62 +
    2.63 +    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
    2.64 +    (void)HYPERVISOR_multicall(mcl, nr_pages);
    2.65 +}
    2.66 +
    2.67  
    2.68  /******************************************************************
    2.69   * BLOCK-DEVICE SCHEDULER LIST MAINTENANCE
    2.70 @@ -151,10 +185,9 @@ static void maybe_trigger_io_schedule(vo
    2.71   * COMPLETION CALLBACK -- Called as bh->b_end_io()
    2.72   */
    2.73  
    2.74 -static void end_block_io_op(struct buffer_head *bh, int uptodate)
    2.75 +static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
    2.76  {
    2.77 -    pending_req_t *pending_req = bh->b_private;
    2.78 -    unsigned long  flags;
    2.79 +    unsigned long flags;
    2.80  
    2.81      /* An error fails the entire request. */
    2.82      if ( !uptodate )
    2.83 @@ -166,8 +199,7 @@ static void end_block_io_op(struct buffe
    2.84      if ( atomic_dec_and_test(&pending_req->pendcnt) )
    2.85      {
    2.86          int pending_idx = pending_req - pending_reqs;
    2.87 -        vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
    2.88 -                          MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
    2.89 +        fast_flush_area(pending_idx, pending_req->nr_pages);
    2.90          make_response(pending_req->blkif, pending_req->id,
    2.91                        pending_req->operation, pending_req->status);
    2.92          blkif_put(pending_req->blkif);
    2.93 @@ -176,7 +208,11 @@ static void end_block_io_op(struct buffe
    2.94          spin_unlock_irqrestore(&pend_prod_lock, flags);
    2.95          maybe_trigger_io_schedule();
    2.96      }
    2.97 +}
    2.98  
    2.99 +static void end_block_io_op(struct buffer_head *bh, int uptodate)
   2.100 +{
   2.101 +    __end_block_io_op(bh->b_private, uptodate);
   2.102      kmem_cache_free(buffer_head_cachep, bh);
   2.103  }
   2.104  
   2.105 @@ -245,44 +281,30 @@ static int do_block_io_op(blkif_t *blkif
   2.106  
   2.107  static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
   2.108  {
   2.109 -    int      i, rc, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
   2.110 -    pgprot_t prot;
   2.111 +    int rsp = BLKIF_RSP_ERROR;
   2.112 +    int pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
   2.113  
   2.114 -    /* Check that number of segments is sane. */
   2.115 -    if ( unlikely(req->nr_segments == 0) || 
   2.116 -         unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
   2.117 -    {
   2.118 -        DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
   2.119 -        goto bad_descriptor;
   2.120 -    }
   2.121 +    /* We expect one buffer only. */
   2.122 +    if ( unlikely(req->nr_segments != 1) )
   2.123 +        goto out;
   2.124 +
   2.125 +    /* Make sure the buffer is page-sized. */
   2.126 +    if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) ||
   2.127 +         (blkif_last_sect(req->frame_and_sects[0]) != 7) )
   2.128 +        goto out;
   2.129  
   2.130 -    prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW);
   2.131 -    for ( i = 0; i < req->nr_segments; i++ )
   2.132 -    {
   2.133 -        /* Make sure the buffer is page-sized. */
   2.134 -        if ( (blkif_first_sect(req->frame_and_sects[i]) != 0) ||
   2.135 -             (blkif_last_sect(req->frame_and_sects[i]) != 7) )
   2.136 -            goto bad_descriptor;
   2.137 -        rc = direct_remap_area_pages(&init_mm, 
   2.138 -                                     MMAP_VADDR(pending_idx, i),
   2.139 -                                     req->frame_and_sects[i] & PAGE_MASK, 
   2.140 -                                     PAGE_SIZE, prot, blkif->domid);
   2.141 -        if ( rc != 0 )
   2.142 -            goto bad_descriptor;
   2.143 -    }
   2.144 +    if ( HYPERVISOR_update_va_mapping_otherdomain(
   2.145 +        MMAP_VADDR(pending_idx, 0) >> PAGE_SHIFT,
   2.146 +        (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
   2.147 +        0, blkif->domid) )
   2.148 +        goto out;
   2.149  
   2.150 -    rc = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), 
   2.151 -                   (req->nr_segments * PAGE_SIZE) / sizeof(vdisk_t));
   2.152 +    rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), 
   2.153 +                    PAGE_SIZE / sizeof(vdisk_t));
   2.154  
   2.155 -    vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
   2.156 -                      MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
   2.157 -    make_response(blkif, req->id, req->operation, rc);
   2.158 -    return;
   2.159 -
   2.160 - bad_descriptor:
   2.161 -    vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
   2.162 -                      MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
   2.163 -    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
   2.164 + out:
   2.165 +    fast_flush_area(pending_idx, 1);
   2.166 +    make_response(blkif, req->id, req->operation, rsp);
   2.167  }
   2.168  
   2.169  static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
   2.170 @@ -294,7 +316,8 @@ static void dispatch_rw_block_io(blkif_t
   2.171      unsigned long buffer, fas;
   2.172      int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
   2.173      pending_req_t *pending_req;
   2.174 -    pgprot_t       prot;
   2.175 +    unsigned long  remap_prot;
   2.176 +    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
   2.177  
   2.178      /* We map virtual scatter/gather segments to physical segments. */
   2.179      int new_segs, nr_psegs = 0;
   2.180 @@ -349,25 +372,33 @@ static void dispatch_rw_block_io(blkif_t
   2.181          goto bad_descriptor;
   2.182  
   2.183      if ( operation == READ )
   2.184 -        prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW);
   2.185 +        remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
   2.186      else
   2.187 -        prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED);
   2.188 +        remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED;
   2.189  
   2.190      for ( i = 0; i < nr_psegs; i++ )
   2.191      {
   2.192 -        int rc = direct_remap_area_pages(&init_mm, 
   2.193 -                                         MMAP_VADDR(pending_idx, i),
   2.194 -                                         phys_seg[i].buffer & PAGE_MASK, 
   2.195 -                                         PAGE_SIZE, prot, blkif->domid);
   2.196 -        if ( rc != 0 )
   2.197 +        mcl[i].op = __HYPERVISOR_update_va_mapping_otherdomain;
   2.198 +        mcl[i].args[0] = MMAP_VADDR(pending_idx, i) >> PAGE_SHIFT;
   2.199 +        mcl[i].args[1] = (phys_seg[i].buffer & PAGE_MASK) | remap_prot;
   2.200 +        mcl[i].args[2] = 0;
   2.201 +        mcl[i].args[3] = (unsigned long)blkif->domid;
   2.202 +        mcl[i].args[4] = (unsigned long)(blkif->domid>>32);
   2.203 +
   2.204 +        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
   2.205 +            phys_seg[i].buffer >> PAGE_SHIFT;
   2.206 +    }
   2.207 +
   2.208 +    (void)HYPERVISOR_multicall(mcl, nr_psegs);
   2.209 +
   2.210 +    for ( i = 0; i < nr_psegs; i++ )
   2.211 +    {
   2.212 +        if ( unlikely(mcl[i].args[5] != 0) )
   2.213          {
   2.214 -            DPRINTK("invalid buffer\n");
   2.215 -            vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
   2.216 -                              MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
   2.217 +            DPRINTK("invalid buffer -- could not remap it\n");
   2.218 +            fast_flush_area(pending_idx, nr_psegs);
   2.219              goto bad_descriptor;
   2.220          }
   2.221 -        phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
   2.222 -            phys_seg[i].buffer >> PAGE_SHIFT;
   2.223      }
   2.224  
   2.225      pending_req = &pending_reqs[pending_idx];
   2.226 @@ -375,6 +406,7 @@ static void dispatch_rw_block_io(blkif_t
   2.227      pending_req->id        = req->id;
   2.228      pending_req->operation = operation;
   2.229      pending_req->status    = BLKIF_RSP_OKAY;
   2.230 +    pending_req->nr_pages  = nr_psegs;
   2.231      atomic_set(&pending_req->pendcnt, nr_psegs);
   2.232      pending_cons++;
   2.233  
   2.234 @@ -385,7 +417,10 @@ static void dispatch_rw_block_io(blkif_t
   2.235      {
   2.236          bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC);
   2.237          if ( unlikely(bh == NULL) )
   2.238 -            panic("bh is null\n");
   2.239 +        {
   2.240 +            __end_block_io_op(pending_req, 0);
   2.241 +            continue;
   2.242 +        }
   2.243          memset(bh, 0, sizeof (struct buffer_head));
   2.244  
   2.245          init_waitqueue_head(&bh->b_wait);
   2.246 @@ -395,7 +430,7 @@ static void dispatch_rw_block_io(blkif_t
   2.247          bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
   2.248          bh->b_data          = (char *)MMAP_VADDR(pending_idx, i) +
   2.249              (phys_seg[i].buffer & ~PAGE_MASK);
   2.250 -//        bh->b_page          = virt_to_page(MMAP_VADDR(pending_idx, i));
   2.251 +        bh->b_page          = virt_to_page(MMAP_VADDR(pending_idx, i));
   2.252          bh->b_end_io        = end_block_io_op;
   2.253          bh->b_private       = pending_req;
   2.254  
     3.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c	Mon May 31 22:43:14 2004 +0000
     3.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/netif/backend/main.c	Tue Jun 01 16:47:17 2004 +0000
     3.3 @@ -77,6 +77,7 @@ static spinlock_t net_schedule_list_lock
     3.4  static unsigned long mfn_list[MAX_MFN_ALLOC];
     3.5  static unsigned int alloc_index = 0;
     3.6  static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
     3.7 +
     3.8  static void __refresh_mfn_list(void)
     3.9  {
    3.10      int ret;
    3.11 @@ -91,6 +92,7 @@ static void __refresh_mfn_list(void)
    3.12      }
    3.13      alloc_index = MAX_MFN_ALLOC;
    3.14  }
    3.15 +
    3.16  static unsigned long get_new_mfn(void)
    3.17  {
    3.18      unsigned long mfn, flags;
    3.19 @@ -101,11 +103,25 @@ static unsigned long get_new_mfn(void)
    3.20      spin_unlock_irqrestore(&mfn_lock, flags);
    3.21      return mfn;
    3.22  }
    3.23 +
    3.24  static void dealloc_mfn(unsigned long mfn)
    3.25  {
    3.26      unsigned long flags;
    3.27 +    dom_mem_op_t  op;
    3.28 +
    3.29      spin_lock_irqsave(&mfn_lock, flags);
    3.30 -    mfn_list[alloc_index++] = mfn;
    3.31 +    if ( alloc_index != MAX_MFN_ALLOC )
    3.32 +    {
    3.33 +        /* Usually we can put the MFN back on the quicklist. */
    3.34 +        mfn_list[alloc_index++] = mfn;
    3.35 +    }
    3.36 +    else
    3.37 +    {
    3.38 +        op.op = MEMOP_RESERVATION_INCREASE;
    3.39 +        op.u.decrease.size  = 1;
    3.40 +        op.u.decrease.pages = &mfn;
    3.41 +        (void)HYPERVISOR_dom_mem_op(&op);
    3.42 +    }
    3.43      spin_unlock_irqrestore(&mfn_lock, flags);
    3.44  }
    3.45  
    3.46 @@ -180,7 +196,7 @@ static void xen_network_done_notify(void
    3.47  /* 
    3.48   * Add following to poll() function in NAPI driver (Tigon3 is example):
    3.49   *  if ( xen_network_done() )
    3.50 - *      tge_3nable_ints(tp); 
    3.51 + *      tg3_enable_ints(tp); 
    3.52   */
    3.53  int xen_network_done(void)
    3.54  {