ia64/xen-unstable

changeset 948:7272f4d3fafe

bitkeeper revision 1.604 (3fb7bdc6G8jZv2m6lWAfV3eKAgnirQ)

vbd.h, sched.h, xen_vbd.c, xen_block.c, domain.c:
Cleanups and a few bug fixes to the new vbd code.
author kaf24@scramble.cl.cam.ac.uk
date Sun Nov 16 18:11:18 2003 +0000 (2003-11-16)
parents c7dc2e589f8f
children ad93f972420f
files xen/common/domain.c xen/drivers/block/xen_block.c xen/drivers/block/xen_vbd.c xen/include/xeno/sched.h xen/include/xeno/vbd.h
line diff
     1.1 --- a/xen/common/domain.c	Thu Nov 13 17:09:34 2003 +0000
     1.2 +++ b/xen/common/domain.c	Sun Nov 16 18:11:18 2003 +0000
     1.3 @@ -52,7 +52,6 @@ struct task_struct *do_createdomain(unsi
     1.4  
     1.5      spin_lock_init(&p->blk_ring_lock);
     1.6      spin_lock_init(&p->page_lock);
     1.7 -    spin_lock_init(&p->physdev_lock);
     1.8  
     1.9      p->shared_info = (void *)get_free_page(GFP_KERNEL);
    1.10      memset(p->shared_info, 0, PAGE_SIZE);
    1.11 @@ -63,8 +62,6 @@ struct task_struct *do_createdomain(unsi
    1.12  
    1.13      init_blkdev_info(p);
    1.14  
    1.15 -    INIT_LIST_HEAD(&p->physdisk_aces);
    1.16 -
    1.17      p->addr_limit = USER_DS;
    1.18  
    1.19      sched_add_domain(p);
    1.20 @@ -132,11 +129,6 @@ void __kill_domain(struct task_struct *p
    1.21  
    1.22      unlink_blkdev_info(p);
    1.23  
    1.24 -#if 0
    1.25 -    for ( i = 0; i < XEN_MAX_VBDS; i++ )
    1.26 -	xen_vbd_delete(p, i);
    1.27 -#endif
    1.28 -
    1.29      for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
    1.30          unlink_net_vif(p->net_vif_list[i]);
    1.31  
    1.32 @@ -302,16 +294,11 @@ void release_task(struct task_struct *p)
    1.33      printk("Releasing task %d\n", p->domain);
    1.34  
    1.35      /*
    1.36 -     * This frees up blkdev rings. Totally safe since blkdev ref counting
    1.37 -     * actually uses the task_struct refcnt.
    1.38 +     * This frees up blkdev rings and vbd-access lists. Totally safe since
    1.39 +     * blkdev ref counting actually uses the task_struct refcnt.
    1.40       */
    1.41      destroy_blkdev_info(p);
    1.42  
    1.43 -#if 0
    1.44 -    /* Free up the physdisk access control info */
    1.45 -    destroy_physdisk_aces(p);
    1.46 -#endif
    1.47 -
    1.48      /* Free all memory associated with this domain. */
    1.49      free_page((unsigned long)p->mm.perdomain_pt);
    1.50      UNSHARE_PFN(virt_to_page(p->shared_info));
     2.1 --- a/xen/drivers/block/xen_block.c	Thu Nov 13 17:09:34 2003 +0000
     2.2 +++ b/xen/drivers/block/xen_block.c	Sun Nov 16 18:11:18 2003 +0000
     2.3 @@ -166,6 +166,7 @@ static void maybe_trigger_io_schedule(vo
     2.4  
     2.5  /******************************************************************
     2.6   * COMPLETION CALLBACK -- Called as bh->b_end_io()
     2.7 + * NB. This can be called from interrupt context!
     2.8   */
     2.9  
    2.10  static void end_block_io_op(struct buffer_head *bh, int uptodate)
    2.11 @@ -208,11 +209,11 @@ long do_block_io_op(block_io_op_t *u_blo
    2.12      block_io_op_t op; 
    2.13      struct task_struct *p = current;
    2.14  
    2.15 -    if (copy_from_user(&op, u_block_io_op, sizeof(op)))
    2.16 +    if ( copy_from_user(&op, u_block_io_op, sizeof(op)) )
    2.17          return -EFAULT;
    2.18  
    2.19 -    switch (op.cmd) {
    2.20 -
    2.21 +    switch ( op.cmd )
    2.22 +    {
    2.23      case BLOCK_IO_OP_SIGNAL: 
    2.24  	/* simply indicates there're reqs outstanding => add current to list */
    2.25  	add_to_blkdev_list_tail(p);
    2.26 @@ -277,7 +278,6 @@ long do_block_io_op(block_io_op_t *u_blo
    2.27  	ret = -ENOSYS; 
    2.28      } 
    2.29  
    2.30 -
    2.31      return ret;
    2.32  }
    2.33  
    2.34 @@ -439,7 +439,7 @@ static void dispatch_rw_block_io(struct 
    2.35      int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
    2.36      unsigned short nr_sects;
    2.37      unsigned long buffer, flags;
    2.38 -    int i, rc, tot_sects;
    2.39 +    int i, tot_sects;
    2.40      pending_req_t *pending_req;
    2.41  
    2.42      /* We map virtual scatter/gather segments to physical segments. */
    2.43 @@ -484,23 +484,29 @@ static void dispatch_rw_block_io(struct 
    2.44  	phys_seg[nr_psegs].nr_sects      = nr_sects;
    2.45  
    2.46          /* Translate the request into the relevant 'physical device' */
    2.47 -	new_segs = 1; 
    2.48 -	rc = vbd_translate(&phys_seg[nr_psegs], &new_segs, p, operation); 
    2.49 +	new_segs = vbd_translate(&phys_seg[nr_psegs], p, operation); 
    2.50  
    2.51 -	/* If it fails we bail (unless the caller is priv => has raw access) */
    2.52 -	if(rc) { 
    2.53 -	    if(!IS_PRIV(p)) {
    2.54 -		printk("access denied: %s of [%ld,%ld] on dev=%04x\n", 
    2.55 -		       operation == READ ? "read" : "write", 
    2.56 -		       req->sector_number + tot_sects, 
    2.57 -		       req->sector_number + tot_sects + nr_sects, 
    2.58 -		       req->device); 
    2.59 -		goto bad_descriptor;
    2.60 -	    }
    2.61 +	/* If it fails we bail (unless the caller is privileged). */
    2.62 +	if ( new_segs < 0 )
    2.63 +        { 
    2.64 +            if ( unlikely(new_segs != -ENODEV) || unlikely(!IS_PRIV(p)) )
    2.65 +            {
    2.66 +                DPRINTK("access denied: %s of [%ld,%ld] on dev=%04x\n", 
    2.67 +                        operation == READ ? "read" : "write", 
    2.68 +                        req->sector_number + tot_sects, 
    2.69 +                        req->sector_number + tot_sects + nr_sects, 
    2.70 +                        req->device); 
    2.71 +                goto bad_descriptor;
    2.72 +            }
    2.73  
    2.74 -	    /* SMH: skanky hack; clear any 'partition' info in device */
    2.75 +	    /*
    2.76 +             * XXX Clear any 'partition' info in device. This works because IDE
    2.77 +             * ignores the partition bits anyway. Only SCSI needs this hack,
    2.78 +             * and it has four bits to clear.
    2.79 +             */
    2.80  	    phys_seg[nr_psegs].dev = req->device & 0xFFF0;
    2.81 -	}
    2.82 +            new_segs = 1;
    2.83 +        }
    2.84  	 
    2.85          nr_psegs += new_segs;
    2.86          if ( nr_psegs >= (MAX_BLK_SEGS*2) ) BUG();
    2.87 @@ -620,6 +626,7 @@ void init_blkdev_info(struct task_struct
    2.88      clear_page(p->blk_ring_base);
    2.89      SHARE_PFN_WITH_DOMAIN(virt_to_page(p->blk_ring_base), p->domain);
    2.90      p->blkdev_list.next = NULL;
    2.91 +    spin_lock_init(&p->vbd_lock);
    2.92  }
    2.93  
    2.94  /* End-of-day teardown for a domain. */
    2.95 @@ -628,6 +635,7 @@ void destroy_blkdev_info(struct task_str
    2.96      ASSERT(!__on_blkdev_list(p));
    2.97      UNSHARE_PFN(virt_to_page(p->blk_ring_base));
    2.98      free_page((unsigned long)p->blk_ring_base);
    2.99 +    destroy_all_vbds(p);
   2.100  }
   2.101  
   2.102  void unlink_blkdev_info(struct task_struct *p)
     3.1 --- a/xen/drivers/block/xen_vbd.c	Thu Nov 13 17:09:34 2003 +0000
     3.2 +++ b/xen/drivers/block/xen_vbd.c	Sun Nov 16 18:11:18 2003 +0000
     3.3 @@ -30,7 +30,6 @@ extern int scsi_probe_devices(xen_disk_i
     3.4  #define DPRINTK(_f, _a...) ((void)0)
     3.5  #endif
     3.6  
     3.7 -
     3.8  /* XXX SMH: crappy 'hash function' .. fix when care. */
     3.9  #define HSH(_x) (((_x) >> 6) & (VBD_HTAB_SZ - 1))
    3.10  
    3.11 @@ -38,107 +37,231 @@ extern int scsi_probe_devices(xen_disk_i
    3.12  ** Create a new VBD; all this involves is adding an entry to the domain's
    3.13  ** vbd hash table; caller must be privileged. 
    3.14  */
    3.15 -long vbd_create(vbd_create_t *create_params) 
    3.16 +long vbd_create(vbd_create_t *create) 
    3.17  {
    3.18      struct task_struct *p; 
    3.19 -    vbd_t *new_vbd, *v; 
    3.20 -    int h; 
    3.21 -
    3.22 -    if(!IS_PRIV(current))
    3.23 -	return -EPERM; 
    3.24 +    vbd_t *new_vbd, **pv; 
    3.25 +    long ret = 0;
    3.26  
    3.27 -    p = find_domain_by_id(create_params->domain);
    3.28 +    if( !IS_PRIV(current) )
    3.29 +        return -EPERM; 
    3.30  
    3.31 -    if (!p) { 
    3.32 -	printk("vbd_create attempted for non-existent domain %d\n", 
    3.33 -	       create_params->domain); 
    3.34 -	return -EINVAL; 
    3.35 +    if ( (p = find_domain_by_id(create->domain)) == NULL )
    3.36 +    {
    3.37 +        DPRINTK("vbd_create attempted for non-existent domain %d\n", 
    3.38 +                create->domain); 
    3.39 +        return -EINVAL; 
    3.40 +    }
    3.41 +
    3.42 +    spin_lock(&p->vbd_lock);
    3.43 +
    3.44 +    for ( pv = &p->vbdtab[HSH(create->vdevice)]; 
    3.45 +          *pv != NULL; 
    3.46 +          pv = &(*pv)->next ) 
    3.47 +    {
    3.48 +        if ( (*pv)->vdevice == create->vdevice )
    3.49 +        {
    3.50 +            DPRINTK("vbd_create attempted for already existing vbd\n");
    3.51 +            ret = -EINVAL;
    3.52 +            goto out;
    3.53 +        }
    3.54 +        if ( (*pv)->vdevice > create->vdevice )
    3.55 +            break;
    3.56      }
    3.57  
    3.58      new_vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL); 
    3.59 -    new_vbd->vdevice = create_params->vdevice; 
    3.60 -    new_vbd->mode    = create_params->mode; 
    3.61 +    new_vbd->vdevice = create->vdevice; 
    3.62 +    new_vbd->mode    = create->mode; 
    3.63      new_vbd->extents = (xen_extent_le_t *)NULL; 
    3.64      new_vbd->next    = (vbd_t *)NULL; 
    3.65  
    3.66 -    h = HSH(create_params->vdevice); 
    3.67 -    if(p->vbdtab[h]) { 
    3.68 -	for(v = p->vbdtab[h]; v->next; v = v->next) 
    3.69 -	    ; 
    3.70 -	v->next = new_vbd; 
    3.71 -    } else p->vbdtab[h] = new_vbd; 
    3.72 +    *pv = new_vbd;
    3.73  
    3.74 + out:
    3.75 +    spin_unlock(&p->vbd_lock);
    3.76      put_task_struct(p);
    3.77 -    
    3.78 -    return 0; 
    3.79 +    return ret; 
    3.80  }
    3.81  
    3.82  /*
    3.83  ** Add an extent to an existing VBD; fails if the VBD doesn't exist. 
    3.84  ** Doesn't worry about overlapping extents (e.g. merging etc) for now. 
    3.85  */
    3.86 -long vbd_add(vbd_add_t *add_params) 
    3.87 +long vbd_add(vbd_add_t *add) 
    3.88  {
    3.89      struct task_struct *p; 
    3.90 -    xen_extent_le_t *x, *xele; 
    3.91 +    xen_extent_le_t **px, *x; 
    3.92      vbd_t *v; 
    3.93 -    int h; 
    3.94 -
    3.95 -    if(!IS_PRIV(current))
    3.96 -	return -EPERM; 
    3.97 -
    3.98 -    p = find_domain_by_id(add_params->domain);
    3.99 +    long ret = 0;
   3.100  
   3.101 -    if (!p) { 
   3.102 -	printk("vbd_add attempted for non-existent domain %d\n", 
   3.103 -	       add_params->domain); 
   3.104 -	return -EINVAL; 
   3.105 -    }
   3.106 +    if ( !IS_PRIV(current) )
   3.107 +        return -EPERM; 
   3.108  
   3.109 -    h = HSH(add_params->vdevice); 
   3.110 -
   3.111 -    for(v = p->vbdtab[h]; v; v = v->next) 
   3.112 -	if(v->vdevice == add_params->vdevice)
   3.113 -	    break; 
   3.114 -
   3.115 -    if(!v) {
   3.116 -	printk("vbd_add; attempted to add extent to non-existent VBD.\n"); 
   3.117 -	return -EINVAL; 
   3.118 +    if ( (p = find_domain_by_id(add->domain)) == NULL )
   3.119 +    {
   3.120 +        DPRINTK("vbd_add attempted for non-existent domain %d\n", 
   3.121 +                add->domain); 
   3.122 +        return -EINVAL; 
   3.123      }
   3.124  
   3.125 -    xele = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL); 
   3.126 -    xele->extent.device       = add_params->extent.device; 
   3.127 -    xele->extent.start_sector = add_params->extent.start_sector; 
   3.128 -    xele->extent.nr_sectors   = add_params->extent.nr_sectors; 
   3.129 -    xele->next                = (xen_extent_le_t *)NULL; 
   3.130 +    spin_lock(&p->vbd_lock);
   3.131  
   3.132 -    if(!v->extents) {
   3.133 -	v->extents = xele; 
   3.134 -    } else { 
   3.135 -	for(x = v->extents; x->next; x = x->next) 
   3.136 -	    ; 
   3.137 -	x->next = xele; 
   3.138 -    } 
   3.139 +    for ( v = p->vbdtab[HSH(add->vdevice)]; v != NULL; v = v->next ) 
   3.140 +        if ( v->vdevice == add->vdevice )
   3.141 +            break; 
   3.142  
   3.143 +    if ( v == NULL )
   3.144 +    {
   3.145 +        DPRINTK("vbd_add; attempted to add extent to non-existent VBD.\n"); 
   3.146 +        ret = -EINVAL;
   3.147 +        goto out; 
   3.148 +    }
   3.149 +
   3.150 +    x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL); 
   3.151 +    x->extent.device       = add->extent.device; 
   3.152 +    x->extent.start_sector = add->extent.start_sector; 
   3.153 +    x->extent.nr_sectors   = add->extent.nr_sectors; 
   3.154 +    x->next                = (xen_extent_le_t *)NULL; 
   3.155 +
   3.156 +    for ( px = &v->extents; *px != NULL; px = &(*px)->next ) 
   3.157 +        continue;
   3.158 +
   3.159 +    *px = x;
   3.160 +
   3.161 + out:
   3.162 +    spin_unlock(&p->vbd_lock);
   3.163      put_task_struct(p);
   3.164 -
   3.165 -    return 0; 
   3.166 +    return ret;
   3.167  }
   3.168  
   3.169 -long vbd_remove(vbd_remove_t *remove_params) 
   3.170 +long vbd_remove(vbd_remove_t *remove) 
   3.171  {
   3.172 -    if(!IS_PRIV(current))
   3.173 -	return -EPERM; 
   3.174 +    struct task_struct *p; 
   3.175 +    xen_extent_le_t **px, *x; 
   3.176 +    vbd_t *v; 
   3.177 +    long ret = 0;
   3.178  
   3.179 -    return -ENOSYS; 
   3.180 +    if ( !IS_PRIV(current) )
   3.181 +        return -EPERM; 
   3.182 +
   3.183 +    if ( (p = find_domain_by_id(remove->domain)) == NULL )
   3.184 +    {
   3.185 +        DPRINTK("vbd_remove attempted for non-existent domain %d\n", 
   3.186 +                remove->domain); 
   3.187 +        return -EINVAL; 
   3.188 +    }
   3.189 +
   3.190 +    spin_lock(&p->vbd_lock);
   3.191 +
   3.192 +    for ( v = p->vbdtab[HSH(remove->vdevice)]; v != NULL; v = v->next ) 
   3.193 +        if ( v->vdevice == remove->vdevice )
   3.194 +            break; 
   3.195 +
   3.196 +    if ( v == NULL )
   3.197 +    {
   3.198 +        DPRINTK("vbd_remove; attempt to remove ext from non-existent VBD.\n"); 
   3.199 +        ret = -EINVAL;
   3.200 +        goto out;
   3.201 +    }
   3.202 +
   3.203 +    for ( px = &v->extents; *px != NULL; px = &(*px)->next ) 
   3.204 +        if ( (*px)->extent.start_sector == remove->extent.start_sector )
   3.205 +            break;
   3.206 +    
   3.207 +    if ( ((x = *px) == NULL) || 
   3.208 +         (x->extent.nr_sectors != remove->extent.nr_sectors) || 
   3.209 +         (x->extent.device != remove->extent.device) )
   3.210 +    {
   3.211 +        DPRINTK("vbd_remove: attempt to remove non-matching extent.\n");
   3.212 +        ret = -EINVAL;
   3.213 +        goto out;
   3.214 +    }
   3.215 +
   3.216 +    *px = x->next;
   3.217 +    kfree(x);
   3.218 +
   3.219 + out:
   3.220 +    spin_unlock(&p->vbd_lock);
   3.221 +    put_task_struct(p);
   3.222 +    return ret; 
   3.223  }
   3.224  
   3.225 -long vbd_delete(vbd_delete_t *delete_params) 
   3.226 +long vbd_delete(vbd_delete_t *delete) 
   3.227  {
   3.228 -    if(!IS_PRIV(current))
   3.229 -	return -EPERM; 
   3.230 +    struct task_struct *p; 
   3.231 +    vbd_t *v, **pv; 
   3.232 +    xen_extent_le_t *x, *t;
   3.233  
   3.234 -    return -ENOSYS; 
   3.235 +    if( !IS_PRIV(current) )
   3.236 +        return -EPERM; 
   3.237 +
   3.238 +    if ( (p = find_domain_by_id(delete->domain)) == NULL )
   3.239 +    {
   3.240 +        DPRINTK("vbd_delete attempted for non-existent domain %d\n", 
   3.241 +                delete->domain); 
   3.242 +        return -EINVAL; 
   3.243 +    }
   3.244 +
   3.245 +    spin_lock(&p->vbd_lock);
   3.246 +
   3.247 +    for ( pv = &p->vbdtab[HSH(delete->vdevice)]; 
   3.248 +          *pv != NULL; 
   3.249 +          pv = &(*pv)->next ) 
   3.250 +    {
   3.251 +        if ( (*pv)->vdevice == delete->vdevice )
   3.252 +            goto found;
   3.253 +    }
   3.254 +    
   3.255 +    DPRINTK("vbd_delete attempted for non-existing VBD.\n");
   3.256 +
   3.257 +    spin_unlock(&p->vbd_lock);
   3.258 +    put_task_struct(p);
   3.259 +    return -EINVAL;
   3.260 +
   3.261 + found:
   3.262 +    v = *pv;
   3.263 +    *pv = v->next;
   3.264 +    x = v->extents;
   3.265 +    kfree(v);
   3.266 +
   3.267 +    while ( x != NULL )
   3.268 +    {
   3.269 +        t = x->next;
   3.270 +        kfree(x);
   3.271 +        x = t;
   3.272 +    }
   3.273 +    
   3.274 +    spin_unlock(&p->vbd_lock);
   3.275 +    put_task_struct(p);
   3.276 +    return 0;
   3.277 +}
   3.278 +
   3.279 +
   3.280 +void destroy_all_vbds(struct task_struct *p)
   3.281 +{
   3.282 +    int i;
   3.283 +    vbd_t *v; 
   3.284 +    xen_extent_le_t *x, *t;
   3.285 +
   3.286 +    spin_lock(&p->vbd_lock);
   3.287 +    for ( i = 0; i < VBD_HTAB_SZ; i++ )
   3.288 +    {
   3.289 +        while ( (v = p->vbdtab[i]) != NULL )
   3.290 +        {
   3.291 +            p->vbdtab[i] = v->next;
   3.292 +      
   3.293 +            x = v->extents;
   3.294 +            kfree(v);
   3.295 +            
   3.296 +            while ( x != NULL )
   3.297 +            {
   3.298 +                t = x->next;
   3.299 +                kfree(x);
   3.300 +                x = t;
   3.301 +            }          
   3.302 +        }
   3.303 +    }
   3.304 +    spin_unlock(&p->vbd_lock);
   3.305  }
   3.306  
   3.307  
   3.308 @@ -153,40 +276,45 @@ static int vbd_probe_devices(xen_disk_in
   3.309      xen_extent_le_t *x; 
   3.310      xen_disk_t cur_disk; 
   3.311      vbd_t *v; 
   3.312 -    int i, ret; 
   3.313 -
   3.314 -    for(i = 0; i < VBD_HTAB_SZ; i++) { 
   3.315 -
   3.316 -	for(v = p->vbdtab[i]; v; v = v->next) { 
   3.317 -
   3.318 -	    /* SMH: don't ever expect this to happen, hence verbose printk */
   3.319 -	    if ( xdi->count == xdi->max ) { 
   3.320 -		printk("vbd_probe_devices: out of space for probe.\n"); 
   3.321 -		return -ENOMEM; 
   3.322 -	    }
   3.323 +    int i; 
   3.324  
   3.325 -	    cur_disk.device = v->vdevice; 
   3.326 -	    cur_disk.info   = XD_FLAG_VIRT | XD_TYPE_DISK; 
   3.327 -	    if(!VBD_CAN_WRITE(v))
   3.328 -		cur_disk.info |= XD_FLAG_RO; 
   3.329 -	    cur_disk.capacity = 0 ; 
   3.330 -	    for(x = v->extents; x; x = x->next) 
   3.331 -		cur_disk.capacity += x->extent.nr_sectors; 
   3.332 -	    cur_disk.domain   = p->domain; 
   3.333 +    spin_lock(&p->vbd_lock);
   3.334  
   3.335 -	    /* Now copy into relevant part of user-space buffer */
   3.336 -	    if((ret = copy_to_user(xdi->disks + xdi->count, &cur_disk, 
   3.337 -				   sizeof(xen_disk_t))) < 0) { 
   3.338 -		printk("vbd_probe_devices: copy_to_user failed [rc=%d]\n", 
   3.339 -		       ret); 
   3.340 -		return ret; 
   3.341 -	    } 
   3.342 -	    
   3.343 +    for ( i = 0; i < VBD_HTAB_SZ; i++ )
   3.344 +    { 
   3.345 +        for ( v = p->vbdtab[i]; v != NULL; v = v->next )
   3.346 +        { 
   3.347 +            if ( xdi->count == xdi->max )
   3.348 +            {
   3.349 +                DPRINTK("vbd_probe_devices: out of space for probe.\n"); 
   3.350 +                spin_unlock(&p->vbd_lock);
   3.351 +                return -ENOMEM; 
   3.352 +            }
   3.353  
   3.354 -	    xdi->count++; 
   3.355 -	}
   3.356 +            cur_disk.device = v->vdevice; 
   3.357 +            cur_disk.info   = XD_FLAG_VIRT | XD_TYPE_DISK; 
   3.358 +            if ( !VBD_CAN_WRITE(v) )
   3.359 +                cur_disk.info |= XD_FLAG_RO; 
   3.360 +            cur_disk.capacity = 0 ; 
   3.361 +            for ( x = v->extents; x != NULL; x = x->next )
   3.362 +                cur_disk.capacity += x->extent.nr_sectors; 
   3.363 +            cur_disk.domain = p->domain; 
   3.364 +
   3.365 +            /* Now copy into relevant part of user-space buffer */
   3.366 +            if( copy_to_user(&xdi->disks[xdi->count], 
   3.367 +                             &cur_disk, 
   3.368 +                             sizeof(xen_disk_t)) )
   3.369 +            { 
   3.370 +                DPRINTK("vbd_probe_devices: copy_to_user failed\n");
   3.371 +                spin_unlock(&p->vbd_lock);
   3.372 +                return -EFAULT;
   3.373 +            } 
   3.374 +        
   3.375 +            xdi->count++; 
   3.376 +        }
   3.377      } 
   3.378  
   3.379 +    spin_unlock(&p->vbd_lock);
   3.380      return 0;  
   3.381  }
   3.382  
   3.383 @@ -199,192 +327,218 @@ static int vbd_probe_devices(xen_disk_in
   3.384  ** all domains ("VBD_PROBE_ALL") -- both of these cases require the
   3.385  ** caller to be privileged. 
   3.386  */
   3.387 -long vbd_probe(vbd_probe_t *probe_params) 
   3.388 +long vbd_probe(vbd_probe_t *probe) 
   3.389  {
   3.390      struct task_struct *p = NULL; 
   3.391 -    short putp = 0; 
   3.392 -    int ret = 0;  
   3.393 -
   3.394 -    if(probe_params->domain) { 
   3.395 -
   3.396 -	/* we can only probe for ourselves unless we're privileged */
   3.397 -	if(probe_params->domain != current->domain && !IS_PRIV(current))
   3.398 -	    return -EPERM; 
   3.399 -
   3.400 -	if(probe_params->domain != VBD_PROBE_ALL) { 
   3.401 -
   3.402 -	    p = find_domain_by_id(probe_params->domain);
   3.403 -	    
   3.404 -	    if (!p) { 
   3.405 -		printk("vbd_probe attempted for non-existent domain %d\n", 
   3.406 -		       probe_params->domain); 
   3.407 -		return -EINVAL; 
   3.408 -	    }
   3.409 -
   3.410 -	    putp = 1; 
   3.411 -	}
   3.412 -
   3.413 -    } else 
   3.414 -	/* default is to probe for ourselves */
   3.415 -	p = current; 
   3.416 -
   3.417 -
   3.418 -    if(!p || IS_PRIV(p)) { 
   3.419 +    unsigned long flags;
   3.420 +    long ret = 0;  
   3.421  
   3.422 -	/* privileged domains always get access to the 'real' devices */
   3.423 -	if((ret = ide_probe_devices(&probe_params->xdi))) {
   3.424 -	    printk("vbd_probe: error %d in probing ide devices\n", ret); 
   3.425 -	    goto out; 
   3.426 -	}
   3.427 -	if((ret = scsi_probe_devices(&probe_params->xdi))) { 
   3.428 -	    printk("vbd_probe: error %d in probing scsi devices\n", ret); 
   3.429 -	    goto out; 
   3.430 -	}
   3.431 -    } 
   3.432 -    
   3.433 -
   3.434 -    if(!p) { 
   3.435 -
   3.436 -        u_long flags;
   3.437 -
   3.438 -        read_lock_irqsave (&tasklist_lock, flags);
   3.439 +    if ( probe->domain != 0 )
   3.440 +    { 
   3.441 +        /* We can only probe for ourselves unless we're privileged. */
   3.442 +        if( (probe->domain != current->domain) && !IS_PRIV(current) )
   3.443 +            return -EPERM; 
   3.444  
   3.445 -	p = &idle0_task; 
   3.446 -        while ( (p = p->next_task) != &idle0_task ) {
   3.447 -            if (!is_idle_task(p)) { 
   3.448 -		if((ret = vbd_probe_devices(&probe_params->xdi, p))) { 
   3.449 -		    printk("vbd_probe: error %d in probing virtual devices\n",
   3.450 -			   ret); 
   3.451 -		    read_unlock_irqrestore(&tasklist_lock, flags);
   3.452 -		    goto out; 
   3.453 -		}
   3.454 -	    }
   3.455 -	}
   3.456 +        if ( (probe->domain != VBD_PROBE_ALL) &&
   3.457 +             ((p = find_domain_by_id(probe->domain)) == NULL) )
   3.458 +        {
   3.459 +            DPRINTK("vbd_probe attempted for non-existent domain %d\n", 
   3.460 +                    probe->domain); 
   3.461 +            return -EINVAL; 
   3.462 +        }
   3.463 +    }
   3.464 +    else
   3.465 +    { 
   3.466 +        /* Default is to probe for ourselves. */
   3.467 +        p = current; 
   3.468 +        get_task_struct(p); /* to mirror final put_task_struct */
   3.469 +    }
   3.470  
   3.471 -	read_unlock_irqrestore(&tasklist_lock, flags);
   3.472 -		
   3.473 -    } else { 
   3.474 +    if ( (probe->domain == VBD_PROBE_ALL) || IS_PRIV(p) )
   3.475 +    { 
   3.476 +        /* Privileged domains always get access to the 'real' devices. */
   3.477 +        if ( (ret = ide_probe_devices(&probe->xdi)) != 0 ) 
   3.478 +        {
   3.479 +            DPRINTK("vbd_probe: error %d in probing ide devices\n", ret); 
   3.480 +            goto out; 
   3.481 +        }
   3.482 +        if ( (ret = scsi_probe_devices(&probe->xdi)) != 0 )
   3.483 +        { 
   3.484 +            DPRINTK("vbd_probe: error %d in probing scsi devices\n", ret); 
   3.485 +            goto out; 
   3.486 +        }
   3.487 +    } 
   3.488  
   3.489 -	/* probe for disks and VBDs for just 'p' */
   3.490 -	if((ret = vbd_probe_devices(&probe_params->xdi, p))) { 
   3.491 -	    printk("vbd_probe: error %d in probing virtual devices\n", ret); 
   3.492 -	    goto out; 
   3.493 -	}
   3.494 +    if ( probe->domain == VBD_PROBE_ALL )
   3.495 +    { 
   3.496 +        read_lock_irqsave(&tasklist_lock, flags);
   3.497 +        p = &idle0_task; 
   3.498 +        while ( (p = p->next_task) != &idle0_task )
   3.499 +        {
   3.500 +            if ( !is_idle_task(p) )
   3.501 +            { 
   3.502 +                if( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
   3.503 +                { 
   3.504 +                    DPRINTK("vbd_probe: error %d in probing virtual devices\n",
   3.505 +                            ret); 
   3.506 +                    read_unlock_irqrestore(&tasklist_lock, flags);
   3.507 +                    goto out; 
   3.508 +                }
   3.509 +            }
   3.510 +        }
   3.511 +        read_unlock_irqrestore(&tasklist_lock, flags);
   3.512 +    } 
   3.513 +    else 
   3.514 +    { 
   3.515 +        if ( (ret = vbd_probe_devices(&probe->xdi, p)) )
   3.516 +        { 
   3.517 +            DPRINTK("vbd_probe: error %d in probing virtual devices\n", ret); 
   3.518 +            goto out; 
   3.519 +        }
   3.520  
   3.521      }
   3.522  
   3.523   out: 
   3.524 -    if(putp) 
   3.525 -	put_task_struct(p); 
   3.526 -
   3.527 +    if ( p != NULL )
   3.528 +        put_task_struct(p); 
   3.529      return ret; 
   3.530  }
   3.531  
   3.532 -long vbd_info(vbd_info_t *info_params) 
   3.533 +
   3.534 +long vbd_info(vbd_info_t *info) 
   3.535  {
   3.536 -    struct task_struct *p = NULL; 
   3.537 +    struct task_struct *p; 
   3.538      xen_extent_le_t *x; 
   3.539      xen_extent_t *extents; 
   3.540      vbd_t *v; 
   3.541 -    int h, ret = 0;  
   3.542 +    long ret = 0;  
   3.543     
   3.544 -    if(info_params->domain != current->domain && !IS_PRIV(current))
   3.545 -	return -EPERM; 
   3.546 +    if ( (info->domain != current->domain) && !IS_PRIV(current) )
   3.547 +        return -EPERM; 
   3.548  
   3.549 -    p = find_domain_by_id(info_params->domain);
   3.550 -    
   3.551 -    if (!p) { 
   3.552 -	printk("vbd_info attempted for non-existent domain %d\n", 
   3.553 -	       info_params->domain); 
   3.554 -	return -EINVAL; 
   3.555 +    if ( (p = find_domain_by_id(info->domain)) == NULL )
   3.556 +    {
   3.557 +        DPRINTK("vbd_info attempted for non-existent domain %d\n", 
   3.558 +                info->domain); 
   3.559 +        return -EINVAL; 
   3.560      }
   3.561  
   3.562 -    h = HSH(info_params->vdevice); 
   3.563 +    spin_lock(&p->vbd_lock);
   3.564  
   3.565 -    for(v = p->vbdtab[h]; v; v = v->next) 
   3.566 -	if(v->vdevice == info_params->vdevice)
   3.567 -	    break; 
   3.568 +    for ( v = p->vbdtab[HSH(info->vdevice)]; v != NULL; v = v->next ) 
   3.569 +        if ( v->vdevice == info->vdevice )
   3.570 +            break; 
   3.571  
   3.572 -    if(!v) {
   3.573 -	printk("vbd_info attempted on non-existent VBD.\n"); 
   3.574 -	ret = -EINVAL; 
   3.575 -	goto out; 
   3.576 +    if ( v == NULL )
   3.577 +    {
   3.578 +        DPRINTK("vbd_info attempted on non-existent VBD.\n"); 
   3.579 +        ret = -EINVAL; 
   3.580 +        goto out; 
   3.581      }
   3.582  
   3.583 -    info_params->mode     = v->mode; 
   3.584 -    info_params->nextents = 0; 
   3.585 -
   3.586 -    extents = info_params->extents; // convenience 
   3.587 +    info->mode     = v->mode; 
   3.588 +    info->nextents = 0; 
   3.589  
   3.590 -    for(x = v->extents; x; x = x->next) {
   3.591 -	if((ret = copy_to_user(extents++, &x->extent, 
   3.592 -			       sizeof(xen_extent_t))) < 0) {
   3.593 -	    printk("vbd_info: copy_to_user failed [rc=%d]\n", ret); 
   3.594 -	    goto out; 
   3.595 -	} 
   3.596 -	info_params->nextents++; 
   3.597 +    extents = info->extents;
   3.598 +    for ( x = v->extents; x != NULL; x = x->next )
   3.599 +    {
   3.600 +        if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) )
   3.601 +        {
   3.602 +            DPRINTK("vbd_info: copy_to_user failed\n");
   3.603 +            ret = -EFAULT;
   3.604 +            goto out; 
   3.605 +        } 
   3.606 +        extents++;
   3.607 +        info->nextents++; 
   3.608      }
   3.609  
   3.610   out: 
   3.611 +    spin_unlock(&p->vbd_lock);
   3.612      put_task_struct(p); 
   3.613      return ret; 
   3.614  }
   3.615  
   3.616  
   3.617 -int vbd_translate(phys_seg_t * pseg, int *nr_segs, 
   3.618 -		  struct task_struct *p, int operation)
   3.619 +int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
   3.620  {
   3.621      xen_extent_le_t *x; 
   3.622      vbd_t *v; 
   3.623 -    int h; 
   3.624 -    long sec; 
   3.625 -
   3.626 -    h = HSH(pseg->dev); 
   3.627 +    unsigned long sec_off, nr_secs;
   3.628  
   3.629 -    for(v = p->vbdtab[h]; v; v = v->next) 
   3.630 -	if(v->vdevice == pseg->dev)
   3.631 -	    break; 
   3.632 +    spin_lock(&p->vbd_lock);
   3.633  
   3.634 -    if(!v) {
   3.635 -	if(!IS_PRIV(p)) 
   3.636 -	    printk("vbd_translate; domain %d attempted to access "
   3.637 -		   "non-existent VBD.\n", p->domain); 
   3.638 -	return -ENODEV; 
   3.639 +    for ( v = p->vbdtab[HSH(pseg->dev)]; v != NULL; v = v->next ) 
   3.640 +        if ( v->vdevice == pseg->dev )
   3.641 +            goto found; 
   3.642 +
   3.643 +    if ( unlikely(!IS_PRIV(p)) ) 
   3.644 +        DPRINTK("vbd_translate; domain %d attempted to access "
   3.645 +                "non-existent VBD.\n", p->domain); 
   3.646 +
   3.647 +    spin_unlock(&p->vbd_lock);
   3.648 +    return -ENODEV; 
   3.649 +
   3.650 + found:
   3.651 +
   3.652 +    if ( ((operation == READ) && !VBD_CAN_READ(v)) ||
   3.653 +         ((operation == WRITE) && !VBD_CAN_WRITE(v)) )
   3.654 +    {
   3.655 +        spin_unlock(&p->vbd_lock);
   3.656 +        return -EACCES; 
   3.657      }
   3.658  
   3.659 -    if(operation == READ && !VBD_CAN_READ(v)) 
   3.660 -	return -EACCES; 
   3.661 -    
   3.662 -    if(operation == WRITE && !VBD_CAN_WRITE(v))
   3.663 -	return -EACCES; 
   3.664 -    
   3.665 -
   3.666 -    /* Now iterate through the list of xen_extents, working out which 
   3.667 -       should be used to perform the translation. */
   3.668 -    sec = pseg->sector_number; 
   3.669 -    for(x = v->extents; x; x = x->next) { 
   3.670 -
   3.671 -	if(sec < x->extent.nr_sectors) {
   3.672 -
   3.673 -	    /* we've got a match! XXX SMH: should deal with 
   3.674 -	       situation where we span multiple xe's */
   3.675 -
   3.676 -	    pseg->dev = x->extent.device; 
   3.677 -	    pseg->sector_number += x->extent.start_sector; 
   3.678 -
   3.679 -	    return 0; 
   3.680 -
   3.681 -	} 
   3.682 -
   3.683 -	sec -= x->extent.nr_sectors; 
   3.684 +    /*
   3.685 +     * Now iterate through the list of xen_extents, working out which should 
   3.686 +     * be used to perform the translation.
   3.687 +     */
   3.688 +    sec_off = pseg->sector_number; 
   3.689 +    nr_secs = pseg->nr_sects;
   3.690 +    for ( x = v->extents; x != NULL; x = x->next )
   3.691 +    { 
   3.692 +        if ( sec_off < x->extent.nr_sectors )
   3.693 +        {
   3.694 +            pseg->dev = x->extent.device; 
   3.695 +            pseg->sector_number = x->extent.start_sector + sec_off;
   3.696 +            if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) )
   3.697 +                goto overrun;
   3.698 +            spin_unlock(&p->vbd_lock);
   3.699 +            return 1;
   3.700 +        } 
   3.701 +        sec_off -= x->extent.nr_sectors; 
   3.702      }
   3.703  
   3.704 -    /* No luck -- return no access */
   3.705 +    DPRINTK("vbd_translate: end of vbd.\n");
   3.706 +    spin_unlock(&p->vbd_lock);
   3.707      return -EACCES; 
   3.708 -}
   3.709  
   3.710 -
   3.711 +    /*
   3.712 +     * Here we deal with overrun onto the following extent. We don't deal with 
   3.713 +     * overrun of more than one boundary since each request is restricted to 
   3.714 +     * 2^9 512-byte sectors, so it should be trivial for control software to 
   3.715 +     * ensure that extents are large enough to prevent excessive overrun.
   3.716 +     */
   3.717 + overrun:
   3.718  
   3.719 +    /* Adjust length of first chunk to run to end of first extent. */
   3.720 +    pseg[0].nr_sects = x->extent.nr_sectors - sec_off;
   3.721  
   3.722 +    /* Set second chunk buffer and length to start where first chunk ended. */
   3.723 +    pseg[1].buffer   = pseg[0].buffer + (pseg[0].nr_sects << 9);
   3.724 +    pseg[1].nr_sects = nr_secs - pseg[0].nr_sects;
   3.725  
   3.726 +    /* Now move to the next extent. Check it exists and is long enough! */
   3.727 +    if ( unlikely((x = x->next) == NULL) || 
   3.728 +         unlikely(x->extent.nr_sectors < pseg[1].nr_sects) )
   3.729 +    {
   3.730 +        DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
   3.731 +        spin_unlock(&p->vbd_lock);
   3.732 +        return -EACCES;
   3.733 +    }
   3.734  
   3.735 +    /* Store the real device and start sector for the second chunk. */
   3.736 +    pseg[1].dev           = x->extent.device;
   3.737 +    pseg[1].sector_number = x->extent.start_sector;
   3.738 +    
   3.739 +    spin_unlock(&p->vbd_lock);
   3.740 +    return 2;
   3.741 +}
     4.1 --- a/xen/include/xeno/sched.h	Thu Nov 13 17:09:34 2003 +0000
     4.2 +++ b/xen/include/xeno/sched.h	Sun Nov 16 18:11:18 2003 +0000
     4.3 @@ -114,12 +114,8 @@ struct task_struct
     4.4      unsigned int blk_resp_prod; /* (private version of) response producer */
     4.5      struct list_head blkdev_list;
     4.6      spinlock_t blk_ring_lock;
     4.7 -    struct list_head physdisk_aces; /* physdisk_ace structures
     4.8 -				       describing what bits of disk
     4.9 -				       the process can do raw access
    4.10 -				       to. */
    4.11 -    spinlock_t physdev_lock;
    4.12 -    vbd_t *vbdtab[VBD_HTAB_SZ];     // mapping from 16bit vdevices to vbds
    4.13 +    vbd_t *vbdtab[VBD_HTAB_SZ];   /* mapping from 16-bit vdevices to vbds */
    4.14 +    spinlock_t vbd_lock;
    4.15  
    4.16      /* VM */
    4.17      struct mm_struct mm;
     5.1 --- a/xen/include/xeno/vbd.h	Thu Nov 13 17:09:34 2003 +0000
     5.2 +++ b/xen/include/xeno/vbd.h	Sun Nov 16 18:11:18 2003 +0000
     5.3 @@ -37,6 +37,7 @@ long vbd_delete(vbd_delete_t *delete_par
     5.4  long vbd_probe(vbd_probe_t *probe_params); 
     5.5  long vbd_info(vbd_info_t *info_params); 
     5.6  
     5.7 +void destroy_all_vbds(struct task_struct *p);
     5.8  
     5.9  /* Describes a [partial] disk extent (part of a block io request) */
    5.10  typedef struct {
    5.11 @@ -47,8 +48,7 @@ typedef struct {
    5.12  } phys_seg_t;
    5.13  
    5.14  
    5.15 -int vbd_translate(phys_seg_t * pseg, int *nr_segs, 
    5.16 -		  struct task_struct *p, int operation); 
    5.17 +int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation); 
    5.18  
    5.19  
    5.20  #endif /* __VBD_H__ */