ia64/xen-unstable

changeset 8059:c3cfc4ff3b08

Implement new memory_op() XENMEM_machphys_mfn_list. Replaces old
privcmd Linux ioctl and includes an implementation for x86/64.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Nov 25 18:43:35 2005 +0100 (2005-11-25)
parents 549130374cfa
children b9181b1c576f
files linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h tools/libxc/xc_linux_save.c tools/libxc/xc_private.c xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_64/mm.c xen/common/memory.c xen/include/asm-ia64/mm.h xen/include/asm-x86/mm.h xen/include/public/memory.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Fri Nov 25 17:58:36 2005 +0100
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Fri Nov 25 18:43:35 2005 +0100
     1.3 @@ -213,55 +213,6 @@ static int privcmd_ioctl(struct inode *i
     1.4  	break;
     1.5  #endif
     1.6  
     1.7 -#ifndef __ia64__
     1.8 -	case IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS: {
     1.9 -		pgd_t *pgd; 
    1.10 -		pud_t *pud; 
    1.11 -		pmd_t *pmd; 
    1.12 -		unsigned long m2pv, m2p_mfn; 	
    1.13 -		privcmd_m2pmfns_t m; 
    1.14 -		unsigned long __user *p;
    1.15 -		int i; 
    1.16 -
    1.17 -#if defined (__x86_64__)
    1.18 -		/* 
    1.19 -		** XXX SMH: the below procedure won't work for 64 since 
    1.20 -		** we don't have access to the memory which maps the M2P. 
    1.21 -		** A proper fix will probably involve moving this 
    1.22 -		** functionality to Xen - for now just return an error 
    1.23 -		** here rather than GPF'ing in the kernel. 
    1.24 -		*/
    1.25 -		ret = -EINVAL; 
    1.26 -		break; 
    1.27 -#endif
    1.28 -
    1.29 -		if (copy_from_user(&m, udata, sizeof(m)))
    1.30 -			return -EFAULT;
    1.31 -
    1.32 -		m2pv = (unsigned long)machine_to_phys_mapping;
    1.33 -
    1.34 -		p = m.arr; 
    1.35 -
    1.36 -		for (i=0; i < m.num; i++) { 
    1.37 -			pgd = pgd_offset_k(m2pv);
    1.38 -			pud = pud_offset(pgd, m2pv);
    1.39 -			pmd = pmd_offset(pud, m2pv);
    1.40 -			m2p_mfn  = (*(uint64_t *)pmd >> PAGE_SHIFT)&0xFFFFFFFF;
    1.41 -			m2p_mfn += pte_index(m2pv);
    1.42 -
    1.43 -			if (put_user(m2p_mfn, p + i))
    1.44 -				return -EFAULT;
    1.45 -
    1.46 -			m2pv += (1 << 21); 
    1.47 -		}
    1.48 -
    1.49 -		ret = 0; 
    1.50 -		break; 
    1.51 -
    1.52 -	}
    1.53 -	break;
    1.54 -#endif
    1.55 -
    1.56  	default:
    1.57  		ret = -EINVAL;
    1.58  		break;
     2.1 --- a/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h	Fri Nov 25 17:58:36 2005 +0100
     2.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h	Fri Nov 25 18:43:35 2005 +0100
     2.3 @@ -59,11 +59,6 @@ typedef struct privcmd_mmapbatch {
     2.4  	unsigned long __user *arr; /* array of mfns - top nibble set on err */
     2.5  } privcmd_mmapbatch_t; 
     2.6  
     2.7 -typedef struct privcmd_m2pmfns { 
     2.8 -	int num;    /* max number of mfns to return */
     2.9 -	unsigned long __user *arr; /* array of mfns */
    2.10 -} privcmd_m2pmfns_t; 
    2.11 -
    2.12  typedef struct privcmd_blkmsg
    2.13  {
    2.14  	unsigned long op;
    2.15 @@ -82,8 +77,6 @@ typedef struct privcmd_blkmsg
    2.16  	_IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t))
    2.17  #define IOCTL_PRIVCMD_MMAPBATCH					\
    2.18  	_IOC(_IOC_NONE, 'P', 3, sizeof(privcmd_mmapbatch_t))
    2.19 -#define IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS			\
    2.20 -	_IOC(_IOC_READ, 'P', 4, sizeof(unsigned long))
    2.21  
    2.22  #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
    2.23  
     3.1 --- a/tools/libxc/xc_linux_save.c	Fri Nov 25 17:58:36 2005 +0100
     3.2 +++ b/tools/libxc/xc_linux_save.c	Fri Nov 25 18:43:35 2005 +0100
     3.3 @@ -502,7 +502,7 @@ static unsigned long *xc_map_m2p(int xc_
     3.4                                   unsigned long max_mfn, 
     3.5                                   int prot) 
     3.6  { 
     3.7 -    privcmd_m2pmfns_t m2p_mfns; 
     3.8 +    struct xen_machphys_mfn_list xmml;
     3.9      privcmd_mmap_t ioctlx; 
    3.10      privcmd_mmap_entry_t *entries; 
    3.11      unsigned long m2p_chunks, m2p_size; 
    3.12 @@ -512,50 +512,45 @@ static unsigned long *xc_map_m2p(int xc_
    3.13      m2p_size   = M2P_SIZE(max_mfn); 
    3.14      m2p_chunks = M2P_CHUNKS(max_mfn); 
    3.15  
    3.16 -
    3.17 -    m2p_mfns.num = m2p_chunks; 
    3.18 -
    3.19 -    if(!(m2p_mfns.arr = malloc(m2p_chunks * sizeof(unsigned long)))) { 
    3.20 +    xmml.max_extents = m2p_chunks;
    3.21 +    if (!(xmml.extent_start = malloc(m2p_chunks * sizeof(unsigned long)))) { 
    3.22          ERR("failed to allocate space for m2p mfns!\n"); 
    3.23          return NULL; 
    3.24      } 
    3.25  
    3.26 -    if (ioctl(xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS, &m2p_mfns) < 0) {
    3.27 +    if (xc_memory_op(xc_handle, XENMEM_machphys_mfn_list, &xmml) ||
    3.28 +        (xmml.nr_extents != m2p_chunks)) {
    3.29          ERR("xc_get_m2p_mfns:"); 
    3.30          return NULL;
    3.31      }
    3.32  
    3.33 -    if((m2p = mmap(NULL, m2p_size, prot, 
    3.34 -                   MAP_SHARED, xc_handle, 0)) == MAP_FAILED) {
    3.35 +    if ((m2p = mmap(NULL, m2p_size, prot, 
    3.36 +                    MAP_SHARED, xc_handle, 0)) == MAP_FAILED) {
    3.37          ERR("failed to mmap m2p"); 
    3.38          return NULL; 
    3.39      } 
    3.40 -    
    3.41  
    3.42 -    if(!(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)))) { 
    3.43 +    if (!(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)))) { 
    3.44          ERR("failed to allocate space for mmap entries!\n"); 
    3.45          return NULL; 
    3.46      } 
    3.47  
    3.48 -
    3.49      ioctlx.num   = m2p_chunks;
    3.50      ioctlx.dom   = DOMID_XEN; 
    3.51      ioctlx.entry = entries; 
    3.52      
    3.53 -    for(i=0; i < m2p_chunks; i++) { 
    3.54 -        
    3.55 +    for (i=0; i < m2p_chunks; i++) { 
    3.56          entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE)); 
    3.57 -        entries[i].mfn = m2p_mfns.arr[i]; 
    3.58 +        entries[i].mfn = xmml.extent_start[i];
    3.59          entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
    3.60 -
    3.61      }
    3.62  
    3.63 -    if((rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx)) < 0) {
    3.64 +    if ((rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx)) < 0) {
    3.65          ERR("ioctl_mmap failed (rc = %d)", rc); 
    3.66          return NULL; 
    3.67      }
    3.68 -        
    3.69 -    free(m2p_mfns.arr); 
    3.70 +
    3.71 +    free(xmml.extent_start);
    3.72      free(entries); 
    3.73  
    3.74      return m2p; 
     4.1 --- a/tools/libxc/xc_private.c	Fri Nov 25 17:58:36 2005 +0100
     4.2 +++ b/tools/libxc/xc_private.c	Fri Nov 25 18:43:35 2005 +0100
     4.3 @@ -190,6 +190,7 @@ int xc_memory_op(int xc_handle,
     4.4  {
     4.5      DECLARE_HYPERCALL;
     4.6      struct xen_memory_reservation *reservation = arg;
     4.7 +    struct xen_machphys_mfn_list *xmml = arg;
     4.8      long ret = -EINVAL;
     4.9  
    4.10      hypercall.op     = __HYPERVISOR_memory_op;
    4.11 @@ -214,6 +215,20 @@ int xc_memory_op(int xc_handle,
    4.12              goto out1;
    4.13          }
    4.14          break;
    4.15 +    case XENMEM_machphys_mfn_list:
    4.16 +        if ( mlock(xmml, sizeof(*xmml)) != 0 )
    4.17 +        {
    4.18 +            PERROR("Could not mlock");
    4.19 +            goto out1;
    4.20 +        }
    4.21 +        if ( mlock(xmml->extent_start,
    4.22 +                   xmml->max_extents * sizeof(unsigned long)) != 0 )
    4.23 +        {
    4.24 +            PERROR("Could not mlock");
    4.25 +            safe_munlock(xmml, sizeof(*xmml));
    4.26 +            goto out1;
    4.27 +        }
    4.28 +        break;
    4.29      }
    4.30  
    4.31      ret = do_xen_hypercall(xc_handle, &hypercall);
    4.32 @@ -227,6 +242,11 @@ int xc_memory_op(int xc_handle,
    4.33              safe_munlock(reservation->extent_start,
    4.34                           reservation->nr_extents * sizeof(unsigned long));
    4.35          break;
    4.36 +    case XENMEM_machphys_mfn_list:
    4.37 +        safe_munlock(xmml, sizeof(*xmml));
    4.38 +        safe_munlock(xmml->extent_start,
    4.39 +                     xmml->max_extents * sizeof(unsigned long));
    4.40 +        break;
    4.41      }
    4.42  
    4.43   out1:
     5.1 --- a/xen/arch/x86/x86_32/mm.c	Fri Nov 25 17:58:36 2005 +0100
     5.2 +++ b/xen/arch/x86/x86_32/mm.c	Fri Nov 25 18:43:35 2005 +0100
     5.3 @@ -27,6 +27,7 @@
     5.4  #include <asm/page.h>
     5.5  #include <asm/flushtlb.h>
     5.6  #include <asm/fixmap.h>
     5.7 +#include <public/memory.h>
     5.8  
     5.9  extern l1_pgentry_t *mapcache;
    5.10  
    5.11 @@ -184,6 +185,41 @@ void subarch_init_memory(struct domain *
    5.12      }
    5.13  }
    5.14  
    5.15 +long arch_memory_op(int op, void *arg)
    5.16 +{
    5.17 +    struct xen_machphys_mfn_list xmml;
    5.18 +    unsigned long mfn;
    5.19 +    unsigned int i, max;
    5.20 +    long rc = 0;
    5.21 +
    5.22 +    switch ( op )
    5.23 +    {
    5.24 +    case XENMEM_machphys_mfn_list:
    5.25 +        if ( copy_from_user(&xmml, arg, sizeof(xmml)) )
    5.26 +            return -EFAULT;
    5.27 +
    5.28 +        max = min_t(unsigned int, xmml.max_extents, mpt_size >> 21);
    5.29 +
    5.30 +        for ( i = 0; i < max; i++ )
    5.31 +        {
    5.32 +            mfn = l2e_get_pfn(idle_pg_table_l2[l2_linear_offset(
    5.33 +                RDWR_MPT_VIRT_START + (i << 21))]) + l1_table_offset(i << 21);
    5.34 +            if ( put_user(mfn, &xmml.extent_start[i]) )
    5.35 +                return -EFAULT;
    5.36 +        }
    5.37 +
    5.38 +        if ( put_user(i, &((struct xen_machphys_mfn_list *)arg)->nr_extents) )
    5.39 +            return -EFAULT;
    5.40 +
    5.41 +        break;
    5.42 +
    5.43 +    default:
    5.44 +        rc = -ENOSYS;
    5.45 +        break;
    5.46 +    }
    5.47 +
    5.48 +    return rc;
    5.49 +}
    5.50  
    5.51  long do_stack_switch(unsigned long ss, unsigned long esp)
    5.52  {
     6.1 --- a/xen/arch/x86/x86_64/mm.c	Fri Nov 25 17:58:36 2005 +0100
     6.2 +++ b/xen/arch/x86/x86_64/mm.c	Fri Nov 25 18:43:35 2005 +0100
     6.3 @@ -28,6 +28,7 @@
     6.4  #include <asm/flushtlb.h>
     6.5  #include <asm/fixmap.h>
     6.6  #include <asm/msr.h>
     6.7 +#include <public/memory.h>
     6.8  
     6.9  struct pfn_info *alloc_xen_pagetable(void)
    6.10  {
    6.11 @@ -174,6 +175,51 @@ void subarch_init_memory(struct domain *
    6.12      }
    6.13  }
    6.14  
    6.15 +long arch_memory_op(int op, void *arg)
    6.16 +{
    6.17 +    struct xen_machphys_mfn_list xmml;
    6.18 +    l3_pgentry_t l3e;
    6.19 +    l2_pgentry_t l2e;
    6.20 +    unsigned long mfn, v;
    6.21 +    unsigned int i;
    6.22 +    long rc = 0;
    6.23 +
    6.24 +    switch ( op )
    6.25 +    {
    6.26 +    case XENMEM_machphys_mfn_list:
    6.27 +        if ( copy_from_user(&xmml, arg, sizeof(xmml)) )
    6.28 +            return -EFAULT;
    6.29 +
    6.30 +        for ( v = RDWR_MPT_VIRT_START; v != RDWR_MPT_VIRT_END; v += 1 << 21 )
    6.31 +        {
    6.32 +            l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
    6.33 +                l3_table_offset(v)];
    6.34 +            if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
    6.35 +                break;
    6.36 +            l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
    6.37 +            if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
    6.38 +                break;
    6.39 +            mfn = l2e_get_pfn(l2e) + l1_table_offset(v);
    6.40 +            if ( i == xmml.max_extents )
    6.41 +                break;
    6.42 +            if ( put_user(mfn, &xmml.extent_start[i]) )
    6.43 +                return -EFAULT;
    6.44 +            i++;
    6.45 +        }
    6.46 +
    6.47 +        if ( put_user(i, &((struct xen_machphys_mfn_list *)arg)->nr_extents) )
    6.48 +            return -EFAULT;
    6.49 +
    6.50 +        break;
    6.51 +
    6.52 +    default:
    6.53 +        rc = -ENOSYS;
    6.54 +        break;
    6.55 +    }
    6.56 +
    6.57 +    return rc;
    6.58 +}
    6.59 +
    6.60  long do_stack_switch(unsigned long ss, unsigned long esp)
    6.61  {
    6.62      if ( (ss & 3) != 3 )
     7.1 --- a/xen/common/memory.c	Fri Nov 25 17:58:36 2005 +0100
     7.2 +++ b/xen/common/memory.c	Fri Nov 25 18:43:35 2005 +0100
     7.3 @@ -215,7 +215,7 @@ long do_memory_op(int cmd, void *arg)
     7.4          break;
     7.5  
     7.6      default:
     7.7 -        rc = -ENOSYS;
     7.8 +        rc = arch_memory_op(op, arg);
     7.9          break;
    7.10      }
    7.11  
     8.1 --- a/xen/include/asm-ia64/mm.h	Fri Nov 25 17:58:36 2005 +0100
     8.2 +++ b/xen/include/asm-ia64/mm.h	Fri Nov 25 18:43:35 2005 +0100
     8.3 @@ -440,4 +440,7 @@ extern unsigned long lookup_domain_mpa(s
     8.4  #define __gpa_to_mpa(_d, gpa)   \
     8.5      ((__gpfn_to_mfn((_d),(gpa)>>PAGE_SHIFT)<<PAGE_SHIFT)|((gpa)&~PAGE_MASK))
     8.6  
     8.7 +/* Arch-specific portion of memory_op hypercall. */
     8.8 +#define arch_memory_op(op, arg) (-ENOSYS)
     8.9 +
    8.10  #endif /* __ASM_IA64_MM_H__ */
     9.1 --- a/xen/include/asm-x86/mm.h	Fri Nov 25 17:58:36 2005 +0100
     9.2 +++ b/xen/include/asm-x86/mm.h	Fri Nov 25 18:43:35 2005 +0100
     9.3 @@ -379,6 +379,9 @@ int new_guest_cr3(unsigned long pfn);
     9.4  
     9.5  void propagate_page_fault(unsigned long addr, u16 error_code);
     9.6  
     9.7 -extern int __sync_lazy_execstate(void);
     9.8 +int __sync_lazy_execstate(void);
     9.9 +
    9.10 +/* Arch-specific portion of memory_op hypercall. */
    9.11 +long arch_memory_op(int op, void *arg);
    9.12  
    9.13  #endif /* __ASM_X86_MM_H__ */
    10.1 --- a/xen/include/public/memory.h	Fri Nov 25 17:58:36 2005 +0100
    10.2 +++ b/xen/include/public/memory.h	Fri Nov 25 18:43:35 2005 +0100
    10.3 @@ -60,6 +60,34 @@ typedef struct xen_memory_reservation {
    10.4  #define XENMEM_current_reservation  3
    10.5  #define XENMEM_maximum_reservation  4
    10.6  
    10.7 +/*
    10.8 + * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
    10.9 + * mapping table. Architectures which do not have a m2p table do not implement
   10.10 + * this command.
   10.11 + * arg == addr of xen_machphys_mfn_list_t.
   10.12 + */
   10.13 +#define XENMEM_machphys_mfn_list    5
   10.14 +typedef struct xen_machphys_mfn_list {
   10.15 +    /*
   10.16 +     * Size of the 'extent_start' array. Fewer entries will be filled if the
   10.17 +     * machphys table is smaller than max_extents * 2MB.
   10.18 +     */
   10.19 +    unsigned int max_extents;
   10.20 +    
   10.21 +    /*
   10.22 +     * Pointer to buffer to fill with list of extent starts. If there are
   10.23 +     * any large discontiguities in the machine address space, 2MB gaps in
   10.24 +     * the machphys table will be represented by an MFN base of zero.
   10.25 +     */
   10.26 +    unsigned long *extent_start;
   10.27 +
   10.28 +    /*
   10.29 +     * Number of extents written to the above array. This will be smaller
   10.30 +     * than 'max_extents' if the machphys table is smaller than max_e * 2MB.
   10.31 +     */
   10.32 +    unsigned int nr_extents;
   10.33 +} xen_machphys_mfn_list_t;
   10.34 +
   10.35  #endif /* __XEN_PUBLIC_MEMORY_H__ */
   10.36  
   10.37  /*