ia64/xen-unstable

changeset 11945:80b296ec93dc

[BLKTAP] Only allocate tapfd descriptors when they are requested.

Currently all are allocated at bootup, even when they will never be used.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
author kfraser@localhost.localdomain
date Mon Oct 23 14:18:16 2006 +0100 (2006-10-23)
parents 93f0957e02ce
children f5d179bcad70
files linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Mon Oct 23 14:09:01 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Mon Oct 23 14:18:16 2006 +0100
     1.3 @@ -10,6 +10,9 @@
     1.4   * 
     1.5   * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
     1.6   *
     1.7 + * Clean ups and fix ups:
     1.8 + *    Copyright (c) 2006, Steven Rostedt - Red Hat, Inc.
     1.9 + *
    1.10   * This program is free software; you can redistribute it and/or
    1.11   * modify it under the terms of the GNU General Public License version 2
    1.12   * as published by the Free Software Foundation; or, when distributed
    1.13 @@ -51,7 +54,7 @@
    1.14  #include <asm/tlbflush.h>
    1.15  #include <linux/devfs_fs_kernel.h>
    1.16  
    1.17 -#define MAX_TAP_DEV 100     /*the maximum number of tapdisk ring devices    */
    1.18 +#define MAX_TAP_DEV 256     /*the maximum number of tapdisk ring devices    */
    1.19  #define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */
    1.20  
    1.21  
    1.22 @@ -105,6 +108,12 @@ static int mmap_pages = MMAP_PAGES;
    1.23  		      * memory rings.
    1.24  		      */
    1.25  
    1.26 +/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
    1.27 +typedef struct domid_translate {
    1.28 +	unsigned short domid;
    1.29 +	unsigned short busid;
    1.30 +} domid_translate_t ;
    1.31 +
    1.32  /*Data struct associated with each of the tapdisk devices*/
    1.33  typedef struct tap_blkif {
    1.34  	struct vm_area_struct *vma;   /*Shared memory area                   */
    1.35 @@ -123,17 +132,11 @@ typedef struct tap_blkif {
    1.36  	unsigned long *idx_map;       /*Record the user ring id to kern 
    1.37  					[req id, idx] tuple                  */
    1.38  	blkif_t *blkif;               /*Associate blkif with tapdev          */
    1.39 -	int sysfs_set;                /*Set if it has a class device.        */
    1.40 +	struct domid_translate trans; /*Translation from domid to bus.       */
    1.41  } tap_blkif_t;
    1.42  
    1.43 -/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
    1.44 -typedef struct domid_translate {
    1.45 -	unsigned short domid;
    1.46 -	unsigned short busid;
    1.47 -} domid_translate_t ;
    1.48 -
    1.49 -static domid_translate_t  translate_domid[MAX_TAP_DEV];
    1.50 -static tap_blkif_t *tapfds[MAX_TAP_DEV];
    1.51 +static struct tap_blkif *tapfds[MAX_TAP_DEV];
    1.52 +static int blktap_next_minor;
    1.53  
    1.54  static int __init set_blkif_reqs(char *str)
    1.55  {
    1.56 @@ -322,7 +325,7 @@ struct vm_operations_struct blktap_vm_op
    1.57   */
    1.58   
    1.59  /*Function Declarations*/
    1.60 -static int get_next_free_dev(void);
    1.61 +static tap_blkif_t *get_next_free_dev(void);
    1.62  static int blktap_open(struct inode *inode, struct file *filp);
    1.63  static int blktap_release(struct inode *inode, struct file *filp);
    1.64  static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
    1.65 @@ -340,51 +343,94 @@ static struct file_operations blktap_fop
    1.66  };
    1.67  
    1.68  
    1.69 -static int get_next_free_dev(void)
    1.70 +static tap_blkif_t *get_next_free_dev(void)
    1.71  {
    1.72  	tap_blkif_t *info;
    1.73 -	int i = 0, ret = -1;
    1.74 -	unsigned long flags;
    1.75 +	int minor;
    1.76 +
    1.77 +	/*
    1.78 +	 * This is called only from the ioctl, which
    1.79 +	 * means we should always have interrupts enabled.
    1.80 +	 */
    1.81 +	BUG_ON(irqs_disabled());
    1.82 +
    1.83 +	spin_lock_irq(&pending_free_lock);
    1.84  
    1.85 -	spin_lock_irqsave(&pending_free_lock, flags);
    1.86 -	
    1.87 -	while (i < MAX_TAP_DEV) {
    1.88 -		info = tapfds[i];
    1.89 -		if ( (tapfds[i] != NULL) && (info->dev_inuse == 0)
    1.90 -			&& (info->dev_pending == 0) ) {
    1.91 +	for (minor = 1; minor < blktap_next_minor; minor++) {
    1.92 +		info = tapfds[minor];
    1.93 +		/* we could have failed a previous attempt. */
    1.94 +		if (!info ||
    1.95 +		    ((info->dev_inuse == 0) &&
    1.96 +		     (info->dev_pending == 0)) ) {
    1.97  			info->dev_pending = 1;
    1.98 -			ret = i;
    1.99 -			goto done;
   1.100 +			goto found;
   1.101  		}
   1.102 -		i++;
   1.103  	}
   1.104 -	
   1.105 -done:
   1.106 -	spin_unlock_irqrestore(&pending_free_lock, flags);
   1.107 +	info = NULL;
   1.108 +	minor = -1;
   1.109  
   1.110  	/*
   1.111 -	 * We are protected by having the dev_pending set.
   1.112 +	 * We didn't find free device. If we can still allocate
   1.113 +	 * more, then we grab the next device minor that is
   1.114 +	 * available.  This is done while we are still under
   1.115 +	 * the protection of the pending_free_lock.
   1.116  	 */
   1.117 -	if (!tapfds[i]->sysfs_set && xen_class) {
   1.118 +	if (blktap_next_minor < MAX_TAP_DEV)
   1.119 +		minor = blktap_next_minor++;
   1.120 +found:
   1.121 +	spin_unlock_irq(&pending_free_lock);
   1.122 +
   1.123 +	if (!info && minor > 0) {
   1.124 +		info = kzalloc(sizeof(*info), GFP_KERNEL);
   1.125 +		if (unlikely(!info)) {
   1.126 +			/*
   1.127 +			 * If we failed here, try to put back
   1.128 +			 * the next minor number. But if one
   1.129 +			 * was just taken, then we just lose this
   1.130 +			 * minor.  We can try to allocate this
   1.131 +			 * minor again later.
   1.132 +			 */
   1.133 +			spin_lock_irq(&pending_free_lock);
   1.134 +			if (blktap_next_minor == minor+1)
   1.135 +				blktap_next_minor--;
   1.136 +			spin_unlock_irq(&pending_free_lock);
   1.137 +			goto out;
   1.138 +		}
   1.139 +
   1.140 +		info->minor = minor;
   1.141 +		/*
   1.142 +		 * Make sure that we have a minor before others can
   1.143 +		 * see us.
   1.144 +		 */
   1.145 +		wmb();
   1.146 +		tapfds[minor] = info;
   1.147 +
   1.148  		class_device_create(xen_class, NULL,
   1.149 -				    MKDEV(blktap_major, ret), NULL,
   1.150 -				    "blktap%d", ret);
   1.151 -		tapfds[i]->sysfs_set = 1;
   1.152 +				    MKDEV(blktap_major, minor), NULL,
   1.153 +				    "blktap%d", minor);
   1.154 +		devfs_mk_cdev(MKDEV(blktap_major, minor),
   1.155 +			S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", minor);
   1.156  	}
   1.157 -	return ret;
   1.158 +
   1.159 +out:
   1.160 +	return info;
   1.161  }
   1.162  
   1.163  int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) 
   1.164  {
   1.165 +	tap_blkif_t *info;
   1.166  	int i;
   1.167 -		
   1.168 -	for (i = 0; i < MAX_TAP_DEV; i++)
   1.169 -		if ( (translate_domid[i].domid == domid)
   1.170 -		    && (translate_domid[i].busid == xenbus_id) ) {
   1.171 -			tapfds[i]->blkif = blkif;
   1.172 -			tapfds[i]->status = RUNNING;
   1.173 +
   1.174 +	for (i = 0; i < blktap_next_minor; i++) {
   1.175 +		info = tapfds[i];
   1.176 +		if ( info &&
   1.177 +		     (info->trans.domid == domid) &&
   1.178 +		     (info->trans.busid == xenbus_id) ) {
   1.179 +			info->blkif = blkif;
   1.180 +			info->status = RUNNING;
   1.181  			return i;
   1.182  		}
   1.183 +	}
   1.184  	return -1;
   1.185  }
   1.186  
   1.187 @@ -394,12 +440,16 @@ void signal_tapdisk(int idx)
   1.188  	struct task_struct *ptask;
   1.189  
   1.190  	info = tapfds[idx];
   1.191 -	if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) {
   1.192 +	if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
   1.193 +		return;
   1.194 +
   1.195 +	if (info->pid > 0) {
   1.196  		ptask = find_task_by_pid(info->pid);
   1.197  		if (ptask)
   1.198  			info->status = CLEANSHUTDOWN;
   1.199  	}
   1.200  	info->blkif = NULL;
   1.201 +
   1.202  	return;
   1.203  }
   1.204  
   1.205 @@ -410,15 +460,16 @@ static int blktap_open(struct inode *ino
   1.206  	tap_blkif_t *info;
   1.207  	int i;
   1.208  	
   1.209 -	if (tapfds[idx] == NULL) {
   1.210 +	info = tapfds[idx];
   1.211 +
   1.212 +	if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) {
   1.213  		WPRINTK("Unable to open device /dev/xen/blktap%d\n",
   1.214 -		       idx);
   1.215 -		return -ENOMEM;
   1.216 +			idx);
   1.217 +		return -ENODEV;
   1.218  	}
   1.219 +
   1.220  	DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
   1.221  	
   1.222 -	info = tapfds[idx];
   1.223 -	
   1.224  	/*Only one process can access device at a time*/
   1.225  	if (test_and_set_bit(0, &info->dev_inuse))
   1.226  		return -EBUSY;
   1.227 @@ -619,33 +670,31 @@ static int blktap_ioctl(struct inode *in
   1.228  	{		
   1.229  		uint64_t val = (uint64_t)arg;
   1.230  		domid_translate_t *tr = (domid_translate_t *)&val;
   1.231 -		int newdev;
   1.232  
   1.233  		DPRINTK("NEWINTF Req for domid %d and bus id %d\n", 
   1.234  		       tr->domid, tr->busid);
   1.235 -		newdev = get_next_free_dev();
   1.236 -		if (newdev < 1) {
   1.237 +		info = get_next_free_dev();
   1.238 +		if (!info) {
   1.239  			WPRINTK("Error initialising /dev/xen/blktap - "
   1.240  				"No more devices\n");
   1.241  			return -1;
   1.242  		}
   1.243 -		translate_domid[newdev].domid = tr->domid;
   1.244 -		translate_domid[newdev].busid = tr->busid;
   1.245 -		return newdev;
   1.246 +		info->trans.domid = tr->domid;
   1.247 +		info->trans.busid = tr->busid;
   1.248 +		return info->minor;
   1.249  	}
   1.250  	case BLKTAP_IOCTL_FREEINTF:
   1.251  	{
   1.252  		unsigned long dev = arg;
   1.253  		unsigned long flags;
   1.254  
   1.255 -		/* Looking at another device */
   1.256 -		info = NULL;
   1.257 +		info = tapfds[dev];
   1.258  
   1.259 -		if ( (dev > 0) && (dev < MAX_TAP_DEV) )
   1.260 -			info = tapfds[dev];
   1.261 +		if ((dev > MAX_TAP_DEV) || !info)
   1.262 +			return 0; /* should this be an error? */
   1.263  
   1.264  		spin_lock_irqsave(&pending_free_lock, flags);
   1.265 -		if ( (info != NULL) && (info->dev_pending) )
   1.266 +		if (info->dev_pending)
   1.267  			info->dev_pending = 0;
   1.268  		spin_unlock_irqrestore(&pending_free_lock, flags);
   1.269  
   1.270 @@ -655,16 +704,12 @@ static int blktap_ioctl(struct inode *in
   1.271  	{
   1.272  		unsigned long dev = arg;
   1.273  
   1.274 -		/* Looking at another device */
   1.275 -		info = NULL;
   1.276 -		
   1.277 -		if ( (dev > 0) && (dev < MAX_TAP_DEV) )
   1.278 -			info = tapfds[dev];
   1.279 -		
   1.280 -		if (info != NULL)
   1.281 -			return info->minor;
   1.282 -		else
   1.283 -			return -1;
   1.284 +		info = tapfds[dev];
   1.285 +
   1.286 +		if (!dev || (dev > MAX_TAP_DEV) || !info)
   1.287 +			return -EINVAL;
   1.288 +
   1.289 +		return info->minor;
   1.290  	}
   1.291  	case BLKTAP_IOCTL_MAJOR:
   1.292  		return blktap_major;
   1.293 @@ -704,13 +749,13 @@ void blktap_kick_user(int idx)
   1.294  {
   1.295  	tap_blkif_t *info;
   1.296  
   1.297 -	if (idx == 0)
   1.298 +	info = tapfds[idx];
   1.299 +
   1.300 +	/* Don't kick control device minor==0 */
   1.301 +	if ((idx <= 0) || (idx > MAX_TAP_DEV) || !info)
   1.302  		return;
   1.303 -	
   1.304 -	info = tapfds[idx];
   1.305 -	
   1.306 -	if (info != NULL)
   1.307 -		wake_up_interruptible(&info->wait);
   1.308 +
   1.309 +	wake_up_interruptible(&info->wait);
   1.310  
   1.311  	return;
   1.312  }
   1.313 @@ -822,8 +867,8 @@ static void free_req(pending_req_t *req)
   1.314  		wake_up(&pending_free_wq);
   1.315  }
   1.316  
   1.317 -static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int 
   1.318 -			    tapidx)
   1.319 +static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx,
   1.320 +			    int tapidx)
   1.321  {
   1.322  	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
   1.323  	unsigned int i, invcount = 0;
   1.324 @@ -831,13 +876,16 @@ static void fast_flush_area(pending_req_
   1.325  	uint64_t ptep;
   1.326  	int ret, mmap_idx;
   1.327  	unsigned long kvaddr, uvaddr;
   1.328 +	tap_blkif_t *info;
   1.329 +	
   1.330  
   1.331 -	tap_blkif_t *info = tapfds[tapidx];
   1.332 -	
   1.333 -	if (info == NULL) {
   1.334 +	info = tapfds[tapidx];
   1.335 +
   1.336 +	if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) {
   1.337  		WPRINTK("fast_flush: Couldn't get info!\n");
   1.338  		return;
   1.339  	}
   1.340 +
   1.341  	mmap_idx = req->mem_idx;
   1.342  
   1.343  	for (i = 0; i < req->nr_pages; i++) {
   1.344 @@ -1042,7 +1090,7 @@ static int do_block_io_op(blkif_t *blkif
   1.345  	rmb(); /* Ensure we see queued requests up to 'rp'. */
   1.346  
   1.347  	/*Check blkif has corresponding UE ring*/
   1.348 -	if (blkif->dev_num == -1) {
   1.349 +	if (blkif->dev_num < 0) {
   1.350  		/*oops*/
   1.351  		if (print_dbug) {
   1.352  			WPRINTK("Corresponding UE " 
   1.353 @@ -1053,7 +1101,8 @@ static int do_block_io_op(blkif_t *blkif
   1.354  	}
   1.355  
   1.356  	info = tapfds[blkif->dev_num];
   1.357 -	if (info == NULL || !info->dev_inuse) {
   1.358 +
   1.359 +	if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) {
   1.360  		if (print_dbug) {
   1.361  			WPRINTK("Can't get UE info!\n");
   1.362  			print_dbug = 0;
   1.363 @@ -1121,15 +1170,22 @@ static void dispatch_rw_block_io(blkif_t
   1.364  	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
   1.365  	unsigned int nseg;
   1.366  	int ret, i;
   1.367 -	tap_blkif_t *info = tapfds[blkif->dev_num];
   1.368 +	tap_blkif_t *info;
   1.369  	uint64_t sector;
   1.370 -	
   1.371  	blkif_request_t *target;
   1.372  	int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
   1.373 -	int usr_idx = GET_NEXT_REQ(info->idx_map);
   1.374 +	int usr_idx;
   1.375  	uint16_t mmap_idx = pending_req->mem_idx;
   1.376  
   1.377 +	if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV)
   1.378 +		goto fail_response;
   1.379 +
   1.380 +	info = tapfds[blkif->dev_num];
   1.381 +	if (info == NULL)
   1.382 +		goto fail_response;
   1.383 +
   1.384  	/* Check we have space on user ring - should never fail. */
   1.385 +	usr_idx = GET_NEXT_REQ(info->idx_map);
   1.386  	if (usr_idx == INVALID_REQ)
   1.387  		goto fail_response;
   1.388  
   1.389 @@ -1350,9 +1406,6 @@ static int __init blkif_init(void)
   1.390  
   1.391  	tap_blkif_xenbus_init();
   1.392  
   1.393 -	/*Create the blktap devices, but do not map memory or waitqueue*/
   1.394 -	for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF;
   1.395 -
   1.396  	/* Dynamically allocate a major for this device */
   1.397  	ret = register_chrdev(0, "blktap", &blktap_fops);
   1.398  	blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL);
   1.399 @@ -1364,24 +1417,22 @@ static int __init blkif_init(void)
   1.400  	
   1.401  	blktap_major = ret;
   1.402  
   1.403 -	for(i = 0; i < MAX_TAP_DEV; i++ ) {
   1.404 -		info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL);
   1.405 -		if(tapfds[i] == NULL)
   1.406 -			return -ENOMEM;
   1.407 -		info->minor = i;
   1.408 -		info->pid = 0;
   1.409 -		info->blkif = NULL;
   1.410 +	info = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL);
   1.411 +	if (!info)
   1.412 +		return -ENOMEM;
   1.413 +
   1.414 +	blktap_next_minor++;
   1.415  
   1.416 -		ret = devfs_mk_cdev(MKDEV(blktap_major, i),
   1.417 -			S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);
   1.418 +	ret = devfs_mk_cdev(MKDEV(blktap_major, i),
   1.419 +			    S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);
   1.420  
   1.421 -		if(ret != 0)
   1.422 -			return -ENOMEM;
   1.423 -		info->dev_pending = info->dev_inuse = 0;
   1.424 +	if(ret != 0)
   1.425 +		return -ENOMEM;
   1.426  
   1.427 -		DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
   1.428 -	}
   1.429 -	
   1.430 +	DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
   1.431 +
   1.432 +	tapfds[0] = info;
   1.433 +
   1.434  	/* Make sure the xen class exists */
   1.435  	if (!setup_xen_class()) {
   1.436  		/*
   1.437 @@ -1394,7 +1445,6 @@ static int __init blkif_init(void)
   1.438  		class_device_create(xen_class, NULL,
   1.439  				    MKDEV(blktap_major, 0), NULL,
   1.440  				    "blktap0");
   1.441 -		tapfds[0]->sysfs_set = 1;
   1.442  	} else {
   1.443  		/* this is bad, but not fatal */
   1.444  		WPRINTK("blktap: sysfs xen_class not created\n");