ia64/xen-unstable

changeset 11773:e1f3af226a8e

[LINUX] Various fixes for mmapping I/O and foreign memory pages.

First, auto-translate guests can use remap_pfn_range() rather than
direct_remap_pfn_range(). This actually works better because
remap_pfn_range() can legitimately assert VM_PFNMAP (this patch
removes this flag for direct_remap_pfn_range().

There are various cleanups and fixes to the privcmd interface:
1. VMAs should be searched and used under the mmap semaphore
2. Mapping should be single shot (since cirect_remap_pfn_range()
expects the PTEs to be empty when it is called).
3. Demand-fault population of the privcmd vma should be disallowed.
4. Various others, including a more thorough check of input args.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Mon Oct 09 10:56:17 2006 +0100 (2006-10-09)
parents b92104e0cf08
children 411c6aca255c
files linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6-xen-sparse/mm/memory.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Mon Oct 09 10:18:11 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Mon Oct 09 10:56:17 2006 +0100
     1.3 @@ -29,6 +29,8 @@ static int direct_remap_area_pte_fn(pte_
     1.4  {
     1.5  	mmu_update_t **v = (mmu_update_t **)data;
     1.6  
     1.7 +	BUG_ON(!pte_none(*pte));
     1.8 +
     1.9  	(*v)->ptr = ((u64)pfn_to_mfn(page_to_pfn(pmd_page)) <<
    1.10  		     PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
    1.11  	(*v)++;
    1.12 @@ -110,12 +112,14 @@ int direct_remap_pfn_range(struct vm_are
    1.13  			   pgprot_t prot,
    1.14  			   domid_t  domid)
    1.15  {
    1.16 -	/* Same as remap_pfn_range(). */
    1.17 -	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
    1.18 +	if (xen_feature(XENFEAT_auto_translated_physmap))
    1.19 +		return remap_pfn_range(vma, address, mfn, size, prot);
    1.20  
    1.21  	if (domid == DOMID_SELF)
    1.22  		return -EINVAL;
    1.23  
    1.24 +	vma->vm_flags |= VM_IO | VM_RESERVED;
    1.25 +
    1.26  	vma->vm_mm->context.has_foreign_mappings = 1;
    1.27  
    1.28  	return __direct_remap_pfn_range(
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Mon Oct 09 10:18:11 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Mon Oct 09 10:56:17 2006 +0100
     2.3 @@ -100,10 +100,12 @@ static int privcmd_ioctl(struct inode *i
     2.4  	break;
     2.5  
     2.6  	case IOCTL_PRIVCMD_MMAP: {
     2.7 -#define PRIVCMD_MMAP_SZ 32
     2.8  		privcmd_mmap_t mmapcmd;
     2.9 -		privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ];
    2.10 +		privcmd_mmap_entry_t msg;
    2.11  		privcmd_mmap_entry_t __user *p;
    2.12 +		struct mm_struct *mm = current->mm;
    2.13 +		struct vm_area_struct *vma;
    2.14 +		unsigned long va;
    2.15  		int i, rc;
    2.16  
    2.17  		if (!is_initial_xendomain())
    2.18 @@ -113,47 +115,62 @@ static int privcmd_ioctl(struct inode *i
    2.19  			return -EFAULT;
    2.20  
    2.21  		p = mmapcmd.entry;
    2.22 -
    2.23 -		for (i = 0; i < mmapcmd.num;
    2.24 -		     i += PRIVCMD_MMAP_SZ, p += PRIVCMD_MMAP_SZ) {
    2.25 -			int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
    2.26 -				PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
    2.27 -
    2.28 -			if (copy_from_user(&msg, p,
    2.29 -					   n*sizeof(privcmd_mmap_entry_t)))
    2.30 -				return -EFAULT;
    2.31 -     
    2.32 -			for (j = 0; j < n; j++) {
    2.33 -				struct vm_area_struct *vma = 
    2.34 -					find_vma( current->mm, msg[j].va );
    2.35 -
    2.36 -				if (!vma)
    2.37 -					return -EINVAL;
    2.38 +		if (copy_from_user(&msg, p, sizeof(msg)))
    2.39 +			return -EFAULT;
    2.40  
    2.41 -				if (msg[j].va > PAGE_OFFSET)
    2.42 -					return -EINVAL;
    2.43 -
    2.44 -				if ((msg[j].va + (msg[j].npages << PAGE_SHIFT))
    2.45 -				    > vma->vm_end )
    2.46 -					return -EINVAL;
    2.47 +		down_read(&mm->mmap_sem);
    2.48  
    2.49 -				if ((rc = direct_remap_pfn_range(
    2.50 -					vma,
    2.51 -					msg[j].va&PAGE_MASK, 
    2.52 -					msg[j].mfn, 
    2.53 -					msg[j].npages<<PAGE_SHIFT, 
    2.54 -					vma->vm_page_prot,
    2.55 -					mmapcmd.dom)) < 0)
    2.56 -					return rc;
    2.57 -			}
    2.58 +		vma = find_vma(mm, msg.va);
    2.59 +		rc = -EINVAL;
    2.60 +		if (!vma || (msg.va != vma->vm_start) || vma->vm_private_data)
    2.61 +			goto mmap_out;
    2.62 +
    2.63 +		/* Mapping is a one-shot operation per vma. */
    2.64 +		vma->vm_private_data = (void *)1;
    2.65 +
    2.66 +		va = vma->vm_start;
    2.67 +
    2.68 +		for (i = 0; i < mmapcmd.num; i++, p++) {
    2.69 +			rc = -EFAULT;
    2.70 +			if (copy_from_user(&msg, p, sizeof(msg)))
    2.71 +				goto mmap_out;
    2.72 +
    2.73 +			/* Do not allow range to wrap the address space. */
    2.74 +			rc = -EINVAL;
    2.75 +			if ((msg.npages > (INT_MAX >> PAGE_SHIFT)) ||
    2.76 +			    ((unsigned long)(msg.npages << PAGE_SHIFT) >= -va))
    2.77 +				goto mmap_out;
    2.78 +
    2.79 +			/* Range chunks must be contiguous in va space. */
    2.80 +			if ((msg.va != va) ||
    2.81 +			    ((msg.va+(msg.npages<<PAGE_SHIFT)) > vma->vm_end))
    2.82 +				goto mmap_out;
    2.83 +
    2.84 +			if ((rc = direct_remap_pfn_range(
    2.85 +				vma,
    2.86 +				msg.va & PAGE_MASK, 
    2.87 +				msg.mfn, 
    2.88 +				msg.npages << PAGE_SHIFT, 
    2.89 +				vma->vm_page_prot,
    2.90 +				mmapcmd.dom)) < 0)
    2.91 +				goto mmap_out;
    2.92 +
    2.93 +			p++;
    2.94 +			va += msg.npages << PAGE_SHIFT;
    2.95  		}
    2.96 -		ret = 0;
    2.97 +
    2.98 +		rc = 0;
    2.99 +
   2.100 +	mmap_out:
   2.101 +		up_read(&mm->mmap_sem);
   2.102 +		ret = rc;
   2.103  	}
   2.104  	break;
   2.105  
   2.106  	case IOCTL_PRIVCMD_MMAPBATCH: {
   2.107  		privcmd_mmapbatch_t m;
   2.108 -		struct vm_area_struct *vma = NULL;
   2.109 +		struct mm_struct *mm = current->mm;
   2.110 +		struct vm_area_struct *vma;
   2.111  		xen_pfn_t __user *p;
   2.112  		unsigned long addr, mfn;
   2.113  		int i;
   2.114 @@ -161,37 +178,33 @@ static int privcmd_ioctl(struct inode *i
   2.115  		if (!is_initial_xendomain())
   2.116  			return -EPERM;
   2.117  
   2.118 -		if (copy_from_user(&m, udata, sizeof(m))) {
   2.119 -			ret = -EFAULT;
   2.120 -			goto batch_err;
   2.121 -		}
   2.122 +		if (copy_from_user(&m, udata, sizeof(m)))
   2.123 +			return -EFAULT;
   2.124  
   2.125 -		if (m.dom == DOMID_SELF) {
   2.126 -			ret = -EINVAL;
   2.127 -			goto batch_err;
   2.128 +		if ((m.num <= 0) || (m.num > (INT_MAX >> PAGE_SHIFT)))
   2.129 +			return -EINVAL;
   2.130 +
   2.131 +		down_read(&mm->mmap_sem);
   2.132 +
   2.133 +		vma = find_vma(mm, m.addr);
   2.134 +		if (!vma ||
   2.135 +		    (m.addr != vma->vm_start) ||
   2.136 +		    ((m.addr + (m.num<<PAGE_SHIFT)) != vma->vm_end) ||
   2.137 +		    vma->vm_private_data) {
   2.138 +			up_read(&mm->mmap_sem);
   2.139 +			return -EINVAL;
   2.140  		}
   2.141  
   2.142 -		vma = find_vma(current->mm, m.addr);
   2.143 -		if (!vma) {
   2.144 -			ret = -EINVAL;
   2.145 -			goto batch_err;
   2.146 -		}
   2.147 -
   2.148 -		if (m.addr > PAGE_OFFSET) {
   2.149 -			ret = -EFAULT;
   2.150 -			goto batch_err;
   2.151 -		}
   2.152 -
   2.153 -		if ((m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end) {
   2.154 -			ret = -EFAULT;
   2.155 -			goto batch_err;
   2.156 -		}
   2.157 +		/* Mapping is a one-shot operation per vma. */
   2.158 +		vma->vm_private_data = (void *)1;
   2.159  
   2.160  		p = m.arr;
   2.161  		addr = m.addr;
   2.162  		for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
   2.163 -			if (get_user(mfn, p))
   2.164 +			if (get_user(mfn, p)) {
   2.165 +				up_read(&mm->mmap_sem);
   2.166  				return -EFAULT;
   2.167 +			}
   2.168  
   2.169  			ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
   2.170  						     mfn, PAGE_SIZE,
   2.171 @@ -200,15 +213,8 @@ static int privcmd_ioctl(struct inode *i
   2.172  				put_user(0xF0000000 | mfn, p);
   2.173  		}
   2.174  
   2.175 +		up_read(&mm->mmap_sem);
   2.176  		ret = 0;
   2.177 -		break;
   2.178 -
   2.179 -	batch_err:
   2.180 -		printk("batch_err ret=%d vma=%p addr=%lx "
   2.181 -		       "num=%d arr=%p %lx-%lx\n", 
   2.182 -		       ret, vma, (unsigned long)m.addr, m.num, m.arr,
   2.183 -		       vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
   2.184 -		break;
   2.185  	}
   2.186  	break;
   2.187  
   2.188 @@ -221,10 +227,27 @@ static int privcmd_ioctl(struct inode *i
   2.189  }
   2.190  
   2.191  #ifndef HAVE_ARCH_PRIVCMD_MMAP
   2.192 +static struct page *privcmd_nopage(struct vm_area_struct *vma,
   2.193 +				   unsigned long address,
   2.194 +				   int *type)
   2.195 +{
   2.196 +	return NOPAGE_SIGBUS;
   2.197 +}
   2.198 +
   2.199 +static struct vm_operations_struct privcmd_vm_ops = {
   2.200 +	.nopage = privcmd_nopage
   2.201 +};
   2.202 +
   2.203  static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
   2.204  {
   2.205 +	/* Unsupported for auto-translate guests. */
   2.206 +	if (xen_feature(XENFEAT_auto_translated_physmap))
   2.207 +		return -ENOSYS;
   2.208 +
   2.209  	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
   2.210  	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
   2.211 +	vma->vm_ops = &privcmd_vm_ops;
   2.212 +	vma->vm_private_data = NULL;
   2.213  
   2.214  	return 0;
   2.215  }
     3.1 --- a/linux-2.6-xen-sparse/mm/memory.c	Mon Oct 09 10:18:11 2006 +0100
     3.2 +++ b/linux-2.6-xen-sparse/mm/memory.c	Mon Oct 09 10:56:17 2006 +0100
     3.3 @@ -390,7 +390,7 @@ struct page *vm_normal_page(struct vm_ar
     3.4  
     3.5  	if (vma->vm_flags & VM_PFNMAP) {
     3.6  		unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
     3.7 -		if ((pfn == vma->vm_pgoff + off) || !pfn_valid(pfn))
     3.8 +		if (pfn == vma->vm_pgoff + off)
     3.9  			return NULL;
    3.10  		if (!is_cow_mapping(vma->vm_flags))
    3.11  			return NULL;
    3.12 @@ -405,7 +405,8 @@ struct page *vm_normal_page(struct vm_ar
    3.13  	 * Remove this test eventually!
    3.14  	 */
    3.15  	if (unlikely(!pfn_valid(pfn))) {
    3.16 -		print_bad_pte(vma, pte, addr);
    3.17 +		if (!(vma->vm_flags & VM_RESERVED))
    3.18 +			print_bad_pte(vma, pte, addr);
    3.19  		return NULL;
    3.20  	}
    3.21