ia64/linux-2.6.18-xen.hg

changeset 579:043dc7488c11

privcmd: avoid deadlock due to copy_(to|from)_user with mmap_sem write lock held.

Accessing user memory with the write lock held is illegal, if the pages are
non-present then we will deadlock against the lock in do_page_fault. Avoid this
for IOCTL_PRIVCMD_MMAP and MMAPBATCH by copying the entire input data structure
into the kernel before taking the lock.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Ian Campbell <ian.campbell@citrix.com>
date Tue Jun 24 21:02:47 2008 +0100 (2008-06-24)
parents 5201a184f513
children e7b48c70c727
files drivers/xen/privcmd/privcmd.c
line diff
     1.1 --- a/drivers/xen/privcmd/privcmd.c	Fri Jun 20 17:43:16 2008 +0100
     1.2 +++ b/drivers/xen/privcmd/privcmd.c	Tue Jun 24 21:02:47 2008 +0100
     1.3 @@ -93,13 +93,16 @@ static long privcmd_ioctl(struct file *f
     1.4  	break;
     1.5  
     1.6  	case IOCTL_PRIVCMD_MMAP: {
     1.7 +#define MMAP_NR_PER_PAGE (int)((PAGE_SIZE-sizeof(struct list_head))/sizeof(privcmd_mmap_entry_t))
     1.8  		privcmd_mmap_t mmapcmd;
     1.9 -		privcmd_mmap_entry_t msg;
    1.10 +		privcmd_mmap_entry_t *msg;
    1.11  		privcmd_mmap_entry_t __user *p;
    1.12  		struct mm_struct *mm = current->mm;
    1.13  		struct vm_area_struct *vma;
    1.14  		unsigned long va;
    1.15  		int i, rc;
    1.16 +		LIST_HEAD(pagelist);
    1.17 +		struct list_head *l,*l2;
    1.18  
    1.19  		if (!is_initial_xendomain())
    1.20  			return -EPERM;
    1.21 @@ -108,63 +111,92 @@ static long privcmd_ioctl(struct file *f
    1.22  			return -EFAULT;
    1.23  
    1.24  		p = mmapcmd.entry;
    1.25 -		if (copy_from_user(&msg, p, sizeof(msg)))
    1.26 -			return -EFAULT;
    1.27 +		for (i = 0; i < mmapcmd.num;) {
    1.28 +			int nr = min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
    1.29 +
    1.30 +			rc = -ENOMEM;
    1.31 +			l = (struct list_head *) __get_free_page(GFP_KERNEL);
    1.32 +			if (l == NULL)
    1.33 +				goto mmap_out;
    1.34 +
    1.35 +			INIT_LIST_HEAD(l);
    1.36 +			list_add_tail(l, &pagelist);
    1.37 +			msg = (privcmd_mmap_entry_t*)(l + 1);
    1.38 +
    1.39 +			rc = -EFAULT;
    1.40 +			if (copy_from_user(msg, p, nr*sizeof(*msg)))
    1.41 +				goto mmap_out;
    1.42 +			i += nr;
    1.43 +			p += nr;
    1.44 +		}
    1.45 +
    1.46 +		l = pagelist.next;
    1.47 +		msg = (privcmd_mmap_entry_t*)(l + 1);
    1.48  
    1.49  		down_write(&mm->mmap_sem);
    1.50  
    1.51 -		vma = find_vma(mm, msg.va);
    1.52 +		vma = find_vma(mm, msg->va);
    1.53  		rc = -EINVAL;
    1.54 -		if (!vma || (msg.va != vma->vm_start) ||
    1.55 +		if (!vma || (msg->va != vma->vm_start) ||
    1.56  		    !privcmd_enforce_singleshot_mapping(vma))
    1.57  			goto mmap_out;
    1.58  
    1.59  		va = vma->vm_start;
    1.60  
    1.61 -		for (i = 0; i < mmapcmd.num; i++) {
    1.62 -			rc = -EFAULT;
    1.63 -			if (copy_from_user(&msg, p, sizeof(msg)))
    1.64 -				goto mmap_out;
    1.65 -
    1.66 -			/* Do not allow range to wrap the address space. */
    1.67 -			rc = -EINVAL;
    1.68 -			if ((msg.npages > (LONG_MAX >> PAGE_SHIFT)) ||
    1.69 -			    ((unsigned long)(msg.npages << PAGE_SHIFT) >= -va))
    1.70 -				goto mmap_out;
    1.71 +		i = 0;
    1.72 +		list_for_each(l, &pagelist) {
    1.73 +			int nr = i + min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
    1.74  
    1.75 -			/* Range chunks must be contiguous in va space. */
    1.76 -			if ((msg.va != va) ||
    1.77 -			    ((msg.va+(msg.npages<<PAGE_SHIFT)) > vma->vm_end))
    1.78 -				goto mmap_out;
    1.79 +			msg = (privcmd_mmap_entry_t*)(l + 1);
    1.80 +			while (i<nr) {
    1.81  
    1.82 -			if ((rc = direct_remap_pfn_range(
    1.83 -				vma,
    1.84 -				msg.va & PAGE_MASK, 
    1.85 -				msg.mfn, 
    1.86 -				msg.npages << PAGE_SHIFT, 
    1.87 -				vma->vm_page_prot,
    1.88 -				mmapcmd.dom)) < 0)
    1.89 -				goto mmap_out;
    1.90 +				/* Do not allow range to wrap the address space. */
    1.91 +				rc = -EINVAL;
    1.92 +				if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
    1.93 +				    ((unsigned long)(msg->npages << PAGE_SHIFT) >= -va))
    1.94 +					goto mmap_out;
    1.95  
    1.96 -			p++;
    1.97 -			va += msg.npages << PAGE_SHIFT;
    1.98 +				/* Range chunks must be contiguous in va space. */
    1.99 +				if ((msg->va != va) ||
   1.100 +				    ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
   1.101 +					goto mmap_out;
   1.102 +
   1.103 +				if ((rc = direct_remap_pfn_range(
   1.104 +					     vma,
   1.105 +					     msg->va & PAGE_MASK,
   1.106 +					     msg->mfn,
   1.107 +					     msg->npages << PAGE_SHIFT,
   1.108 +					     vma->vm_page_prot,
   1.109 +					     mmapcmd.dom)) < 0)
   1.110 +					goto mmap_out;
   1.111 +
   1.112 +				va += msg->npages << PAGE_SHIFT;
   1.113 +				msg++;
   1.114 +				i++;
   1.115 +			}
   1.116  		}
   1.117  
   1.118  		rc = 0;
   1.119  
   1.120  	mmap_out:
   1.121  		up_write(&mm->mmap_sem);
   1.122 +		list_for_each_safe(l,l2,&pagelist)
   1.123 +			free_page((unsigned long)l);
   1.124  		ret = rc;
   1.125  	}
   1.126 +#undef MMAP_NR_PER_PAGE
   1.127  	break;
   1.128  
   1.129  	case IOCTL_PRIVCMD_MMAPBATCH: {
   1.130 +#define MMAPBATCH_NR_PER_PAGE (unsigned long)((PAGE_SIZE-sizeof(struct list_head))/sizeof(unsigned long))
   1.131  		privcmd_mmapbatch_t m;
   1.132  		struct mm_struct *mm = current->mm;
   1.133  		struct vm_area_struct *vma;
   1.134  		xen_pfn_t __user *p;
   1.135 -		unsigned long addr, mfn, nr_pages;
   1.136 +		unsigned long addr, *mfn, nr_pages;
   1.137  		int i;
   1.138 +		LIST_HEAD(pagelist);
   1.139 +		struct list_head *l, *l2;
   1.140  
   1.141  		if (!is_initial_xendomain())
   1.142  			return -EPERM;
   1.143 @@ -176,34 +208,74 @@ static long privcmd_ioctl(struct file *f
   1.144  		if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
   1.145  			return -EINVAL;
   1.146  
   1.147 +		p = m.arr;
   1.148 +		for (i=0; i<nr_pages; )	{
   1.149 +			int nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
   1.150 +
   1.151 +			ret = -ENOMEM;
   1.152 +			l = (struct list_head *)__get_free_page(GFP_KERNEL);
   1.153 +			if (l == NULL)
   1.154 +				goto mmapbatch_out;
   1.155 +
   1.156 +			INIT_LIST_HEAD(l);
   1.157 +			list_add_tail(l, &pagelist);
   1.158 +
   1.159 +			mfn = (unsigned long*)(l + 1);
   1.160 +			ret = -EFAULT;
   1.161 +			if (copy_from_user(mfn, p, nr*sizeof(*mfn)))
   1.162 +				goto mmapbatch_out;
   1.163 +
   1.164 +			i += nr; p+= nr;
   1.165 +		}
   1.166 +
   1.167  		down_write(&mm->mmap_sem);
   1.168  
   1.169  		vma = find_vma(mm, m.addr);
   1.170 +		ret = -EINVAL;
   1.171  		if (!vma ||
   1.172  		    (m.addr != vma->vm_start) ||
   1.173  		    ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
   1.174  		    !privcmd_enforce_singleshot_mapping(vma)) {
   1.175  			up_write(&mm->mmap_sem);
   1.176 -			return -EINVAL;
   1.177 +			goto mmapbatch_out;
   1.178  		}
   1.179  
   1.180  		p = m.arr;
   1.181  		addr = m.addr;
   1.182 -		for (i = 0; i < nr_pages; i++, addr += PAGE_SIZE, p++) {
   1.183 -			if (get_user(mfn, p)) {
   1.184 -				up_write(&mm->mmap_sem);
   1.185 -				return -EFAULT;
   1.186 +		i = 0;
   1.187 +		ret = 0;
   1.188 +		list_for_each(l, &pagelist) {
   1.189 +			int nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
   1.190 +			mfn = (unsigned long *)(l + 1);
   1.191 +
   1.192 +			while (i<nr) {
   1.193 +				if(direct_remap_pfn_range(vma, addr & PAGE_MASK,
   1.194 +							  *mfn, PAGE_SIZE,
   1.195 +							  vma->vm_page_prot, m.dom) < 0) {
   1.196 +					*mfn |= 0xf0000000U;
   1.197 +					ret++;
   1.198 +				}
   1.199 +				mfn++; i++; addr += PAGE_SIZE;
   1.200  			}
   1.201 -
   1.202 -			ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
   1.203 -						     mfn, PAGE_SIZE,
   1.204 -						     vma->vm_page_prot, m.dom);
   1.205 -			if (ret < 0)
   1.206 -				put_user(0xF0000000 | mfn, p);
   1.207  		}
   1.208  
   1.209  		up_write(&mm->mmap_sem);
   1.210 -		ret = 0;
   1.211 +		if (ret > 0) {
   1.212 +			p = m.arr;
   1.213 +			i = 0;
   1.214 +			ret = 0;
   1.215 +			list_for_each(l, &pagelist) {
   1.216 +				int nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
   1.217 +				mfn = (unsigned long *)(l + 1);
   1.218 +				if (copy_to_user(p, mfn, nr*sizeof(*mfn)))
   1.219 +					ret = -EFAULT;
   1.220 +				i += nr; p += nr;
   1.221 +			}
   1.222 +		}
   1.223 +	mmapbatch_out:
   1.224 +		list_for_each_safe(l,l2,&pagelist)
   1.225 +			free_page((unsigned long)l);
   1.226 +#undef MMAPBATCH_NR_PER_PAGE
   1.227  	}
   1.228  	break;
   1.229