ia64/xen-unstable

changeset 6190:4ec947baae75

Add generic_page_range() -- generic page table operation.

Linux has several instances of repeated code to do updates to a range
of PTEs. Mapping memory between domains in Xen also tends to need to
do this quite frequently, to ensure page tables have been constructed
and to look up PTE addresses when making mapping-related hypercalls.
This patch adds a generic PTE walk-and-fill operation that takes a
function pointer to call on leaf entries. direct_remap_area_pages()
is updated to use the new call, ass are abuses of
__direct_remap_area_pages.

This patch also introduces two new helper functions for working with
page tables when mapping memory between domains:
create_lookup_pte_addr() returns the machine address of a PTE,
allocating intermediate page tables as necessary. touch_pte_range()
ensures that page tables exist for a virtual address range.

Many of the existing linux page table operations (e.g. zap/remap/etc)
could be modified to use this interface, which would potentially
shorten up mm/memory.c a bit.
author akw27@arcadians.cl.cam.ac.uk
date Mon Aug 15 13:16:04 2005 +0000 (2005-08-15)
parents f2e0bbec3bf9
children 430ce2bade9b d4338cc89c2c
files linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h linux-2.6-xen-sparse/include/linux/mm.h linux-2.6-xen-sparse/mm/memory.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c	Mon Aug 15 12:41:57 2005 +0000
     1.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c	Mon Aug 15 13:16:04 2005 +0000
     1.3 @@ -298,91 +298,21 @@ void __init bt_iounmap(void *addr, unsig
     1.4  #define direct_mk_pte_phys(physpage, pgprot) \
     1.5    __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
     1.6  
     1.7 -static inline void direct_remap_area_pte(pte_t *pte, 
     1.8 -					 unsigned long address, 
     1.9 -					 unsigned long size,
    1.10 -					 mmu_update_t **v)
    1.11 -{
    1.12 -	unsigned long end;
    1.13 -
    1.14 -	address &= ~PMD_MASK;
    1.15 -	end = address + size;
    1.16 -	if (end > PMD_SIZE)
    1.17 -		end = PMD_SIZE;
    1.18 -	if (address >= end)
    1.19 -		BUG();
    1.20 -
    1.21 -	do {
    1.22 -		(*v)->ptr = virt_to_machine(pte);
    1.23 -		(*v)++;
    1.24 -		address += PAGE_SIZE;
    1.25 -		pte++;
    1.26 -	} while (address && (address < end));
    1.27 -}
    1.28 -
    1.29 -static inline int direct_remap_area_pmd(struct mm_struct *mm,
    1.30 -					pmd_t *pmd, 
    1.31 -					unsigned long address, 
    1.32 -					unsigned long size,
    1.33 -					mmu_update_t **v)
    1.34 -{
    1.35 -	unsigned long end;
    1.36  
    1.37 -	address &= ~PGDIR_MASK;
    1.38 -	end = address + size;
    1.39 -	if (end > PGDIR_SIZE)
    1.40 -		end = PGDIR_SIZE;
    1.41 -	if (address >= end)
    1.42 -		BUG();
    1.43 -	do {
    1.44 -		pte_t *pte = (mm == &init_mm) ? 
    1.45 -			pte_alloc_kernel(mm, pmd, address) :
    1.46 -			pte_alloc_map(mm, pmd, address);
    1.47 -		if (!pte)
    1.48 -			return -ENOMEM;
    1.49 -		direct_remap_area_pte(pte, address, end - address, v);
    1.50 -		pte_unmap(pte);
    1.51 -		address = (address + PMD_SIZE) & PMD_MASK;
    1.52 -		pmd++;
    1.53 -	} while (address && (address < end));
    1.54 -	return 0;
    1.55 -}
    1.56 - 
    1.57 -int __direct_remap_area_pages(struct mm_struct *mm,
    1.58 -			      unsigned long address, 
    1.59 -			      unsigned long size, 
    1.60 -			      mmu_update_t *v)
    1.61 +static int direct_remap_area_pte_fn(pte_t *pte, 
    1.62 +                                    struct page *pte_page,
    1.63 +                                    unsigned long address, 
    1.64 +                                    void *data)
    1.65  {
    1.66 -	pgd_t * dir;
    1.67 -	unsigned long end = address + size;
    1.68 -	int error;
    1.69 -
    1.70 -	dir = pgd_offset(mm, address);
    1.71 -	if (address >= end)
    1.72 -		BUG();
    1.73 -	spin_lock(&mm->page_table_lock);
    1.74 -	do {
    1.75 -		pud_t *pud;
    1.76 -		pmd_t *pmd;
    1.77 +        mmu_update_t **v = (mmu_update_t **)data;
    1.78  
    1.79 -		error = -ENOMEM;
    1.80 -		pud = pud_alloc(mm, dir, address);
    1.81 -		if (!pud)
    1.82 -			break;
    1.83 -		pmd = pmd_alloc(mm, pud, address);
    1.84 -		if (!pmd)
    1.85 -			break;
    1.86 -		error = 0;
    1.87 -		direct_remap_area_pmd(mm, pmd, address, end - address, &v);
    1.88 -		address = (address + PGDIR_SIZE) & PGDIR_MASK;
    1.89 -		dir++;
    1.90 +        (*v)->ptr = (pfn_to_mfn(page_to_pfn(pte_page)) << PAGE_SHIFT)
    1.91 +                    | ((unsigned long)pte & ~PAGE_MASK);
    1.92 +        (*v)++;
    1.93  
    1.94 -	} while (address && (address < end));
    1.95 -	spin_unlock(&mm->page_table_lock);
    1.96 -	return error;
    1.97 +        return 0;
    1.98  }
    1.99  
   1.100 -
   1.101  int direct_remap_area_pages(struct mm_struct *mm,
   1.102  			    unsigned long address, 
   1.103  			    unsigned long machine_addr,
   1.104 @@ -393,7 +323,7 @@ int direct_remap_area_pages(struct mm_st
   1.105  	int i;
   1.106  	unsigned long start_address;
   1.107  #define MAX_DIRECTMAP_MMU_QUEUE 130
   1.108 -	mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u;
   1.109 +	mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v = u, *w = u;
   1.110  
   1.111  	start_address = address;
   1.112  
   1.113 @@ -402,10 +332,9 @@ int direct_remap_area_pages(struct mm_st
   1.114  	for (i = 0; i < size; i += PAGE_SIZE) {
   1.115  		if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
   1.116  			/* Fill in the PTE pointers. */
   1.117 -			__direct_remap_area_pages(mm,
   1.118 -						  start_address, 
   1.119 -						  address-start_address, 
   1.120 -						  u);
   1.121 +                        generic_page_range(mm, start_address, 
   1.122 +                                           address-start_address,
   1.123 +                                           direct_remap_area_pte_fn, &w);
   1.124   
   1.125  			if (HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0)
   1.126  				return -EFAULT;
   1.127 @@ -426,10 +355,9 @@ int direct_remap_area_pages(struct mm_st
   1.128  
   1.129  	if (v != u) {
   1.130  		/* get the ptep's filled in */
   1.131 -		__direct_remap_area_pages(mm,
   1.132 -					  start_address, 
   1.133 -					  address-start_address, 
   1.134 -					  u);
   1.135 +                generic_page_range(mm, start_address, 
   1.136 +                                   address-start_address,
   1.137 +                                   direct_remap_area_pte_fn, &w);
   1.138  		if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL, domid) < 0))
   1.139  			return -EFAULT;
   1.140  	}
   1.141 @@ -440,3 +368,34 @@ int direct_remap_area_pages(struct mm_st
   1.142  }
   1.143  
   1.144  EXPORT_SYMBOL(direct_remap_area_pages);
   1.145 +
   1.146 +int create_lookup_pte_addr(struct mm_struct *mm, 
   1.147 +                           unsigned long address,
   1.148 +                           unsigned long *ptep)
   1.149 +{
   1.150 +    int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 
   1.151 +    {
   1.152 +        unsigned long *ptep = (unsigned long *)data;
   1.153 +        if (ptep) *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << PAGE_SHIFT)
   1.154 +                       | ((unsigned long)pte & ~PAGE_MASK);
   1.155 +        return 0;
   1.156 +    }
   1.157 +
   1.158 +    return generic_page_range(mm, address, PAGE_SIZE, f, ptep);
   1.159 +}
   1.160 +
   1.161 +EXPORT_SYMBOL(create_lookup_pte_addr);
   1.162 +
   1.163 +int touch_pte_range(struct mm_struct *mm,
   1.164 +                    unsigned long address,
   1.165 +                    unsigned long size)
   1.166 +{
   1.167 +    int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 
   1.168 +    {
   1.169 +        return 0;
   1.170 +    }
   1.171 +
   1.172 +    return generic_page_range(mm, address, size, f, NULL);
   1.173 +}                 
   1.174 +
   1.175 +EXPORT_SYMBOL(touch_pte_range);
     2.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c	Mon Aug 15 12:41:57 2005 +0000
     2.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/ioremap.c	Mon Aug 15 13:16:04 2005 +0000
     2.3 @@ -464,3 +464,34 @@ int direct_remap_area_pages(struct mm_st
     2.4  }
     2.5  
     2.6  EXPORT_SYMBOL(direct_remap_area_pages);
     2.7 +
     2.8 +int create_lookup_pte_addr(struct mm_struct *mm, 
     2.9 +                           unsigned long address,
    2.10 +                           unsigned long *ptep)
    2.11 +{
    2.12 +    int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 
    2.13 +    {
    2.14 +        unsigned long *ptep = (unsigned long *)data;
    2.15 +        if (ptep) *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) << PAGE_SHIFT)
    2.16 +                       | ((unsigned long)pte & ~PAGE_MASK);
    2.17 +        return 0;
    2.18 +    }
    2.19 +
    2.20 +    return generic_page_range(mm, address, PAGE_SIZE, f, ptep);
    2.21 +}
    2.22 +
    2.23 +EXPORT_SYMBOL(create_lookup_pte_addr);
    2.24 +
    2.25 +int touch_pte_range(struct mm_struct *mm,
    2.26 +                    unsigned long address,
    2.27 +                    unsigned long size)
    2.28 +{
    2.29 +    int f(pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 
    2.30 +    {
    2.31 +        return 0;
    2.32 +    }
    2.33 +
    2.34 +    return generic_page_range(mm, address, size, f, NULL);
    2.35 +}                 
    2.36 +
    2.37 +EXPORT_SYMBOL(touch_pte_range);
     3.1 --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Mon Aug 15 12:41:57 2005 +0000
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Mon Aug 15 13:16:04 2005 +0000
     3.3 @@ -139,7 +139,7 @@ static int privcmd_ioctl(struct inode *i
     3.4          privcmd_mmapbatch_t m;
     3.5          struct vm_area_struct *vma = NULL;
     3.6          unsigned long *p, addr;
     3.7 -        unsigned long mfn;
     3.8 +        unsigned long mfn, ptep;
     3.9          int i;
    3.10  
    3.11          if ( copy_from_user(&m, (void *)data, sizeof(m)) )
    3.12 @@ -163,12 +163,12 @@ static int privcmd_ioctl(struct inode *i
    3.13              if ( get_user(mfn, p) )
    3.14                  return -EFAULT;
    3.15  
    3.16 -            u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
    3.17 +            ret = create_lookup_pte_addr(vma->vm_mm, addr, &ptep);
    3.18 +            if (ret)
    3.19 +                goto batch_err;
    3.20  
    3.21 -            __direct_remap_area_pages(vma->vm_mm,
    3.22 -                                      addr, 
    3.23 -                                      PAGE_SIZE, 
    3.24 -                                      &u);
    3.25 +            u.val = (mfn << PAGE_SHIFT) | pgprot_val(vma->vm_page_prot);
    3.26 +            u.ptr = ptep;
    3.27  
    3.28              if ( unlikely(HYPERVISOR_mmu_update(&u, 1, NULL, m.dom) < 0) )
    3.29                  put_user(0xF0000000 | mfn, p);
     4.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Mon Aug 15 12:41:57 2005 +0000
     4.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Mon Aug 15 13:16:04 2005 +0000
     4.3 @@ -466,10 +466,12 @@ int direct_remap_area_pages(struct mm_st
     4.4                              unsigned long size, 
     4.5                              pgprot_t prot,
     4.6                              domid_t  domid);
     4.7 -int __direct_remap_area_pages(struct mm_struct *mm,
     4.8 -			      unsigned long address, 
     4.9 -			      unsigned long size, 
    4.10 -			      mmu_update_t *v);
    4.11 +int create_lookup_pte_addr(struct mm_struct *mm,
    4.12 +                           unsigned long address,
    4.13 +                           unsigned long *ptep);
    4.14 +int touch_pte_range(struct mm_struct *mm,
    4.15 +                    unsigned long address,
    4.16 +                    unsigned long size);
    4.17  
    4.18  #define io_remap_page_range(vma,from,phys,size,prot) \
    4.19  direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO)
     5.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h	Mon Aug 15 12:41:57 2005 +0000
     5.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h	Mon Aug 15 13:16:04 2005 +0000
     5.3 @@ -538,6 +538,12 @@ int __direct_remap_area_pages(struct mm_
     5.4                                unsigned long address,
     5.5                                unsigned long size,
     5.6                                mmu_update_t *v);
     5.7 +int create_lookup_pte_addr(struct mm_struct *mm,
     5.8 +                           unsigned long address,
     5.9 +                           unsigned long *ptep);
    5.10 +int touch_pte_range(struct mm_struct *mm,
    5.11 +                    unsigned long address,
    5.12 +                    unsigned long size);
    5.13  
    5.14  #define io_remap_page_range(vma, vaddr, paddr, size, prot)		\
    5.15  		direct_remap_area_pages((vma)->vm_mm,vaddr,paddr,size,prot,DOMID_IO)
     6.1 --- a/linux-2.6-xen-sparse/include/linux/mm.h	Mon Aug 15 12:41:57 2005 +0000
     6.2 +++ b/linux-2.6-xen-sparse/include/linux/mm.h	Mon Aug 15 13:16:04 2005 +0000
     6.3 @@ -817,6 +817,12 @@ extern int check_user_page_readable(stru
     6.4  int remap_pfn_range(struct vm_area_struct *, unsigned long,
     6.5  		unsigned long, unsigned long, pgprot_t);
     6.6  
     6.7 +typedef int (*pte_fn_t)(pte_t *pte, struct page *pte_page, unsigned long addr, 
     6.8 +                        void *data);
     6.9 +extern int generic_page_range(struct mm_struct *mm, unsigned long address, 
    6.10 +                              unsigned long size, pte_fn_t fn, void *data);
    6.11 +
    6.12 +
    6.13  #ifdef CONFIG_PROC_FS
    6.14  void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
    6.15  #else
     7.1 --- a/linux-2.6-xen-sparse/mm/memory.c	Mon Aug 15 12:41:57 2005 +0000
     7.2 +++ b/linux-2.6-xen-sparse/mm/memory.c	Mon Aug 15 13:16:04 2005 +0000
     7.3 @@ -954,8 +954,10 @@ int get_user_pages(struct task_struct *t
     7.4                          i++;
     7.5                          start += PAGE_SIZE;
     7.6                          len--;
     7.7 +printk(KERN_ALERT "HIT  0x%lx\n", start);
     7.8                          continue;
     7.9 -                    }
    7.10 +                    } 
    7.11 +else printk(KERN_ALERT "MISS 0x%lx\n", start);
    7.12                  }
    7.13  
    7.14  		if (!vma || (vma->vm_flags & VM_IO)
    7.15 @@ -1213,6 +1215,104 @@ int remap_pfn_range(struct vm_area_struc
    7.16  }
    7.17  EXPORT_SYMBOL(remap_pfn_range);
    7.18  
    7.19 +static inline int generic_pte_range(struct mm_struct *mm,
    7.20 +                                    pmd_t *pmd, 
    7.21 +                                    unsigned long addr, 
    7.22 +                                    unsigned long end,
    7.23 +                                    pte_fn_t fn, void *data)
    7.24 +{
    7.25 +	pte_t *pte;
    7.26 +        int err;
    7.27 +        struct page *pte_page;
    7.28 +
    7.29 +        pte = (mm == &init_mm) ? 
    7.30 +                pte_alloc_kernel(mm, pmd, addr) :
    7.31 +                pte_alloc_map(mm, pmd, addr);
    7.32 +        if (!pte)
    7.33 +                return -ENOMEM;
    7.34 +
    7.35 +        pte_page = pmd_page(*pmd);
    7.36 +
    7.37 +        do {
    7.38 +                err = fn(pte, pte_page, addr, data);
    7.39 +		if (err)
    7.40 +                        break;
    7.41 +        } while (pte++, addr += PAGE_SIZE, addr != end);
    7.42 +
    7.43 +        if (mm != &init_mm)
    7.44 +                pte_unmap(pte-1);
    7.45 +        return err;
    7.46 +
    7.47 +}
    7.48 +
    7.49 +static inline int generic_pmd_range(struct mm_struct *mm,
    7.50 +                                    pud_t *pud, 
    7.51 +                                    unsigned long addr, 
    7.52 +                                    unsigned long end,
    7.53 +                                    pte_fn_t fn, void *data)
    7.54 +{
    7.55 +	pmd_t *pmd;
    7.56 +	unsigned long next;
    7.57 +        int err;
    7.58 +
    7.59 +	pmd = pmd_alloc(mm, pud, addr);
    7.60 +	if (!pmd)
    7.61 +		return -ENOMEM;
    7.62 +	do {
    7.63 +		next = pmd_addr_end(addr, end);
    7.64 +                err = generic_pte_range(mm, pmd, addr, next, fn, data);
    7.65 +                if (err)
    7.66 +                    break;
    7.67 +	} while (pmd++, addr = next, addr != end);
    7.68 +	return err;
    7.69 +}
    7.70 +
    7.71 +static inline int generic_pud_range(struct mm_struct *mm, pgd_t *pgd, 
    7.72 +                                    unsigned long addr,
    7.73 +                                    unsigned long end,
    7.74 +                                    pte_fn_t fn, void *data)
    7.75 +{
    7.76 +	pud_t *pud;
    7.77 +	unsigned long next;
    7.78 +        int err;
    7.79 +
    7.80 +	pud = pud_alloc(mm, pgd, addr);
    7.81 +	if (!pud)
    7.82 +		return -ENOMEM;
    7.83 +	do {
    7.84 +		next = pud_addr_end(addr, end);
    7.85 +		err = generic_pmd_range(mm, pud, addr, next, fn, data);
    7.86 +                if (err)
    7.87 +			break;
    7.88 +	} while (pud++, addr = next, addr != end);
    7.89 +	return err;
    7.90 +}
    7.91 +
    7.92 +/*
    7.93 + * Scan a region of virtual memory, filling in page tables as necessary
    7.94 + * and calling a provided function on each leaf page table.
    7.95 + */
    7.96 +int generic_page_range(struct mm_struct *mm, unsigned long addr, 
    7.97 +                  unsigned long size, pte_fn_t fn, void *data)
    7.98 +{
    7.99 +	pgd_t *pgd;
   7.100 +	unsigned long next;
   7.101 +	unsigned long end = addr + size;
   7.102 +	int err;
   7.103 +
   7.104 +	BUG_ON(addr >= end);
   7.105 +	pgd = pgd_offset(mm, addr);
   7.106 +	spin_lock(&mm->page_table_lock);
   7.107 +	do {
   7.108 +		next = pgd_addr_end(addr, end);
   7.109 +		err = generic_pud_range(mm, pgd, addr, next, fn, data);
   7.110 +		if (err)
   7.111 +			break;
   7.112 +	} while (pgd++, addr = next, addr != end);
   7.113 +	spin_unlock(&mm->page_table_lock);
   7.114 +	return err;
   7.115 +}
   7.116 +
   7.117  /*
   7.118   * Do pte_mkwrite, but only if the vma says VM_WRITE.  We do this when
   7.119   * servicing faults for write access.  In the normal case, do always want