ia64/xen-unstable

changeset 15842:d956779d8d47

[IA64] Foreign p2m: xen side

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Alex Williamson <alex.williamson@hp.com>
date Thu Sep 06 14:13:38 2007 -0600 (2007-09-06)
parents c5f735271e22
children 3cde7ffdfd0f
files xen/arch/ia64/xen/dom0_ops.c xen/arch/ia64/xen/domain.c xen/arch/ia64/xen/mm.c xen/include/asm-ia64/domain.h xen/include/asm-ia64/mm.h xen/include/public/arch-ia64.h
line diff
     1.1 --- a/xen/arch/ia64/xen/dom0_ops.c	Thu Sep 06 13:48:43 2007 -0600
     1.2 +++ b/xen/arch/ia64/xen/dom0_ops.c	Thu Sep 06 14:13:38 2007 -0600
     1.3 @@ -415,6 +415,15 @@ do_dom0vp_op(unsigned long cmd,
     1.4      case IA64_DOM0VP_add_io_space:
     1.5          ret = dom0vp_add_io_space(d, arg0, arg1, arg2);
     1.6          break;
     1.7 +    case IA64_DOM0VP_expose_foreign_p2m: {
     1.8 +        XEN_GUEST_HANDLE(char) hnd;
     1.9 +        set_xen_guest_handle(hnd, (char*)arg2);
    1.10 +        ret = dom0vp_expose_foreign_p2m(d, arg0, (domid_t)arg1, hnd, arg3);
    1.11 +        break;
    1.12 +    }
    1.13 +    case IA64_DOM0VP_unexpose_foreign_p2m:
    1.14 +        ret = dom0vp_unexpose_foreign_p2m(d, arg0, arg1);
    1.15 +        break;
    1.16      default:
    1.17          ret = -1;
    1.18  		printk("unknown dom0_vp_op 0x%lx\n", cmd);
     2.1 --- a/xen/arch/ia64/xen/domain.c	Thu Sep 06 13:48:43 2007 -0600
     2.2 +++ b/xen/arch/ia64/xen/domain.c	Thu Sep 06 14:13:38 2007 -0600
     2.3 @@ -540,6 +540,7 @@ int arch_domain_create(struct domain *d)
     2.4  	if (is_idle_domain(d))
     2.5  	    return 0;
     2.6  
     2.7 +	foreign_p2m_init(d);
     2.8  #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
     2.9  	d->arch.has_pervcpu_vhpt = opt_pervcpu_vhpt;
    2.10  	dprintk(XENLOG_INFO, "%s:%d domain %d pervcpu_vhpt %d\n",
     3.1 --- a/xen/arch/ia64/xen/mm.c	Thu Sep 06 13:48:43 2007 -0600
     3.2 +++ b/xen/arch/ia64/xen/mm.c	Thu Sep 06 14:13:38 2007 -0600
     3.3 @@ -175,8 +175,10 @@
     3.4  #include <asm/p2m_entry.h>
     3.5  #include <asm/tlb_track.h>
     3.6  #include <linux/efi.h>
     3.7 +#include <linux/sort.h>
     3.8  #include <xen/guest_access.h>
     3.9  #include <asm/page.h>
    3.10 +#include <asm/dom_fw_common.h>
    3.11  #include <public/memory.h>
    3.12  #include <asm/event.h>
    3.13  
    3.14 @@ -354,6 +356,8 @@ mm_teardown(struct domain* d)
    3.15          if (mm_teardown_pgd(d, pgd, cur_offset))
    3.16              return -EAGAIN;
    3.17      }
    3.18 +
    3.19 +    foreign_p2m_destroy(d);
    3.20      return 0;
    3.21  }
    3.22  
    3.23 @@ -1528,6 +1532,12 @@ dom0vp_add_physmap_with_gmfn(struct doma
    3.24  }
    3.25  
    3.26  #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
    3.27 +#define P2M_PFN_ROUNDUP(x)      (((x) + PTRS_PER_PTE - 1) & \
    3.28 +                                 ~(PTRS_PER_PTE - 1))
    3.29 +#define P2M_PFN_ROUNDDOWN(x)    ((x) & ~(PTRS_PER_PTE - 1))
    3.30 +#define P2M_NUM_PFN(x)          (((x) + PTRS_PER_PTE - 1) / PTRS_PER_PTE)
    3.31 +#define MD_END(md)              ((md)->phys_addr + \
    3.32 +                                 ((md)->num_pages << EFI_PAGE_SHIFT))
    3.33  static struct page_info* p2m_pte_zero_page = NULL;
    3.34  
    3.35  /* This must called before dom0 p2m table allocation */
    3.36 @@ -1550,6 +1560,43 @@ expose_p2m_init(void)
    3.37      p2m_pte_zero_page = virt_to_page(pte);
    3.38  }
    3.39  
    3.40 +// allocate pgd, pmd of dest_dom if necessary
    3.41 +static int
    3.42 +allocate_pgd_pmd(struct domain* dest_dom, unsigned long dest_gpfn,
    3.43 +                 struct domain* src_dom,
    3.44 +                 unsigned long src_gpfn, unsigned long num_src_gpfn)
    3.45 +{
    3.46 +    unsigned long i = 0;
    3.47 +
    3.48 +    BUG_ON((src_gpfn % PTRS_PER_PTE) != 0);
    3.49 +    BUG_ON((num_src_gpfn % PTRS_PER_PTE) != 0);
    3.50 +
    3.51 +    while (i < num_src_gpfn) {
    3.52 +        volatile pte_t* src_pte;
    3.53 +        volatile pte_t* dest_pte;
    3.54 +
    3.55 +        src_pte = lookup_noalloc_domain_pte(src_dom,
    3.56 +                                            (src_gpfn + i) << PAGE_SHIFT);
    3.57 +        if (src_pte == NULL) {
    3.58 +            i++;
    3.59 +            continue;
    3.60 +        }
    3.61 +        
    3.62 +        dest_pte = lookup_alloc_domain_pte(dest_dom,
    3.63 +                                           (dest_gpfn << PAGE_SHIFT) +
    3.64 +                                           i * sizeof(pte_t));
    3.65 +        if (dest_pte == NULL) {
    3.66 +            gdprintk(XENLOG_INFO, "%s failed to allocate pte page\n",
    3.67 +                     __func__);
    3.68 +            return -ENOMEM;
    3.69 +        }
    3.70 +
    3.71 +        // skip to next pte page
    3.72 +        i = P2M_PFN_ROUNDDOWN(i + PTRS_PER_PTE);
    3.73 +    }
    3.74 +    return 0;
    3.75 +}
    3.76 +
    3.77  static int
    3.78  expose_p2m_page(struct domain* d, unsigned long mpaddr, struct page_info* page)
    3.79  {
    3.80 @@ -1559,6 +1606,94 @@ expose_p2m_page(struct domain* d, unsign
    3.81                                  ASSIGN_readonly);
    3.82  }
    3.83  
    3.84 +// expose pte page
    3.85 +static int
    3.86 +expose_p2m_range(struct domain* dest_dom, unsigned long dest_gpfn,
    3.87 +                 struct domain* src_dom,
    3.88 +                 unsigned long src_gpfn, unsigned long num_src_gpfn)
    3.89 +{
    3.90 +    unsigned long i = 0;
    3.91 +
    3.92 +    BUG_ON((src_gpfn % PTRS_PER_PTE) != 0);
    3.93 +    BUG_ON((num_src_gpfn % PTRS_PER_PTE) != 0);
    3.94 +
    3.95 +    while (i < num_src_gpfn) {
    3.96 +        volatile pte_t* pte;
    3.97 +
    3.98 +        pte = lookup_noalloc_domain_pte(src_dom, (src_gpfn + i) << PAGE_SHIFT);
    3.99 +        if (pte == NULL) {
   3.100 +            i++;
   3.101 +            continue;
   3.102 +        }
   3.103 +
   3.104 +        if (expose_p2m_page(dest_dom,
   3.105 +                            (dest_gpfn << PAGE_SHIFT) + i * sizeof(pte_t),
   3.106 +                            virt_to_page(pte)) < 0) {
   3.107 +            gdprintk(XENLOG_INFO, "%s failed to assign page\n", __func__);
   3.108 +            return -EAGAIN;
   3.109 +        }
   3.110 +
   3.111 +        // skip to next pte page
   3.112 +        i = P2M_PFN_ROUNDDOWN(i + PTRS_PER_PTE);
   3.113 +    }
   3.114 +    return 0;
   3.115 +}
   3.116 +
   3.117 +// expose p2m_pte_zero_page 
   3.118 +static int
   3.119 +expose_zero_page(struct domain* dest_dom, unsigned long dest_gpfn,
   3.120 +                 unsigned long num_src_gpfn)
   3.121 +{
   3.122 +    unsigned long i;
   3.123 +    
   3.124 +    for (i = 0; i < P2M_NUM_PFN(num_src_gpfn); i++) {
   3.125 +        volatile pte_t* pte;
   3.126 +        pte = lookup_noalloc_domain_pte(dest_dom,
   3.127 +                                        (dest_gpfn + i) << PAGE_SHIFT);
   3.128 +        if (pte == NULL || pte_present(*pte))
   3.129 +            continue;
   3.130 +
   3.131 +        if (expose_p2m_page(dest_dom, (dest_gpfn + i) << PAGE_SHIFT,
   3.132 +                            p2m_pte_zero_page) < 0) {
   3.133 +            gdprintk(XENLOG_INFO, "%s failed to assign zero-pte page\n",
   3.134 +                     __func__);
   3.135 +            return -EAGAIN;
   3.136 +        }
   3.137 +    }
   3.138 +    return 0;
   3.139 +}
   3.140 +
   3.141 +static int
   3.142 +expose_p2m(struct domain* dest_dom, unsigned long dest_gpfn,
   3.143 +           struct domain* src_dom,
   3.144 +           unsigned long src_gpfn, unsigned long num_src_gpfn)
   3.145 +{
   3.146 +    if (allocate_pgd_pmd(dest_dom, dest_gpfn,
   3.147 +                         src_dom, src_gpfn, num_src_gpfn))
   3.148 +        return -ENOMEM;
   3.149 +
   3.150 +    if (expose_p2m_range(dest_dom, dest_gpfn,
   3.151 +                         src_dom, src_gpfn, num_src_gpfn))
   3.152 +        return -EAGAIN;
   3.153 +
   3.154 +    if (expose_zero_page(dest_dom, dest_gpfn, num_src_gpfn))
   3.155 +        return -EAGAIN;
   3.156 +    
   3.157 +    return 0;
   3.158 +}
   3.159 +
   3.160 +static void
   3.161 +unexpose_p2m(struct domain* dest_dom,
   3.162 +             unsigned long dest_gpfn, unsigned long num_dest_gpfn)
   3.163 +{
   3.164 +    unsigned long i;
   3.165 +
   3.166 +    for (i = 0; i < num_dest_gpfn; i++) {
   3.167 +        zap_domain_page_one(dest_dom, (dest_gpfn + i) << PAGE_SHIFT,
   3.168 +                            0, INVALID_MFN);
   3.169 +    }
   3.170 +}
   3.171 +
   3.172  // It is possible to optimize loop, But this isn't performance critical.
   3.173  unsigned long
   3.174  dom0vp_expose_p2m(struct domain* d,
   3.175 @@ -1566,10 +1701,8 @@ dom0vp_expose_p2m(struct domain* d,
   3.176                    unsigned long assign_start_gpfn,
   3.177                    unsigned long expose_size, unsigned long granule_pfn)
   3.178  {
   3.179 +    unsigned long ret;
   3.180      unsigned long expose_num_pfn = expose_size >> PAGE_SHIFT;
   3.181 -    unsigned long i;
   3.182 -    volatile pte_t* conv_pte;
   3.183 -    volatile pte_t* assign_pte;
   3.184  
   3.185      if ((expose_size % PAGE_SIZE) != 0 ||
   3.186          (granule_pfn % PTRS_PER_PTE) != 0 ||
   3.187 @@ -1590,65 +1723,419 @@ dom0vp_expose_p2m(struct domain* d,
   3.188                  __func__, granule_pfn, PTRS_PER_PTE);
   3.189          return -ENOSYS;
   3.190      }
   3.191 -
   3.192 -    // allocate pgd, pmd.
   3.193 -    i = conv_start_gpfn;
   3.194 -    while (i < expose_num_pfn) {
   3.195 -        conv_pte = lookup_noalloc_domain_pte(d, (conv_start_gpfn + i) <<
   3.196 -                                             PAGE_SHIFT);
   3.197 -        if (conv_pte == NULL) {
   3.198 -            i++;
   3.199 +    ret = expose_p2m(d, assign_start_gpfn,
   3.200 +                     d, conv_start_gpfn, expose_num_pfn);
   3.201 +    return ret;
   3.202 +}
   3.203 +
   3.204 +static int
   3.205 +memmap_info_copy_from_guest(struct xen_ia64_memmap_info* memmap_info,
   3.206 +                            char** memmap_p,
   3.207 +                            XEN_GUEST_HANDLE(char) buffer)
   3.208 +{
   3.209 +    char *memmap;
   3.210 +    char *p;
   3.211 +    char *memmap_end;
   3.212 +    efi_memory_desc_t *md;
   3.213 +    unsigned long start;
   3.214 +    unsigned long end;
   3.215 +    efi_memory_desc_t *prev_md;
   3.216 +
   3.217 +    if (copy_from_guest((char*)memmap_info, buffer, sizeof(*memmap_info)))
   3.218 +        return -EFAULT;
   3.219 +    if (memmap_info->efi_memdesc_size < sizeof(efi_memory_desc_t) ||
   3.220 +        memmap_info->efi_memmap_size < memmap_info->efi_memdesc_size ||
   3.221 +        (memmap_info->efi_memmap_size % memmap_info->efi_memdesc_size) != 0)
   3.222 +        return -EINVAL;
   3.223 +    
   3.224 +    memmap = _xmalloc(memmap_info->efi_memmap_size,
   3.225 +                      __alignof__(efi_memory_desc_t));
   3.226 +    if (memmap == NULL)
   3.227 +        return -ENOMEM;
   3.228 +    if (copy_from_guest_offset(memmap, buffer, sizeof(*memmap_info),
   3.229 +                               memmap_info->efi_memmap_size)) {
   3.230 +        xfree(memmap);
   3.231 +        return -EFAULT;
   3.232 +    }
   3.233 +
   3.234 +    /* intergirty check & simplify */
   3.235 +    sort(memmap, memmap_info->efi_memmap_size / memmap_info->efi_memdesc_size,
   3.236 +         memmap_info->efi_memdesc_size, efi_mdt_cmp, NULL);
   3.237 +
   3.238 +    /* alignement & overlap check */
   3.239 +    prev_md = NULL;
   3.240 +    p = memmap;
   3.241 +    memmap_end = memmap + memmap_info->efi_memmap_size;
   3.242 +    for (p = memmap; p < memmap_end; p += memmap_info->efi_memmap_size) {
   3.243 +        md = (efi_memory_desc_t*)p;
   3.244 +        start = md->phys_addr;
   3.245 +        
   3.246 +        if (start & ((1UL << EFI_PAGE_SHIFT) - 1) || md->num_pages == 0) {
   3.247 +            xfree(memmap);
   3.248 +            return -EINVAL;
   3.249 +        }
   3.250 +
   3.251 +        if (prev_md != NULL) {
   3.252 +            unsigned long prev_end = MD_END(prev_md);
   3.253 +            if (prev_end > start) {
   3.254 +                xfree(memmap);
   3.255 +                return -EINVAL;
   3.256 +            }
   3.257 +        }
   3.258 +
   3.259 +        prev_md = (efi_memory_desc_t *)p;
   3.260 +    }
   3.261 +
   3.262 +    /* coalease */
   3.263 +    prev_md = NULL;
   3.264 +    p = memmap;
   3.265 +    while (p < memmap_end) {
   3.266 +        md = (efi_memory_desc_t*)p;
   3.267 +        start = md->phys_addr;
   3.268 +        end = MD_END(md);
   3.269 +
   3.270 +        start = P2M_PFN_ROUNDDOWN(start >> PAGE_SHIFT) << PAGE_SHIFT;
   3.271 +        end = P2M_PFN_ROUNDUP(end >> PAGE_SHIFT) << PAGE_SHIFT;
   3.272 +        md->phys_addr = start;
   3.273 +        md->num_pages = (end - start) >> EFI_PAGE_SHIFT;
   3.274 +
   3.275 +        if (prev_md != NULL) {
   3.276 +            unsigned long prev_end = MD_END(prev_md);
   3.277 +            if (prev_end >= start) {
   3.278 +                size_t left;
   3.279 +                end = max(prev_end, end);
   3.280 +                prev_md->num_pages = (end - prev_md->phys_addr) >> EFI_PAGE_SHIFT;
   3.281 +
   3.282 +                left = memmap_end - p;
   3.283 +                if (left > memmap_info->efi_memdesc_size) {
   3.284 +                    left -= memmap_info->efi_memdesc_size;
   3.285 +                    memmove(p, p + memmap_info->efi_memdesc_size, left);
   3.286 +                }
   3.287 +
   3.288 +                memmap_info->efi_memmap_size -= memmap_info->efi_memdesc_size;
   3.289 +                memmap_end -= memmap_info->efi_memdesc_size;
   3.290 +                continue;
   3.291 +            }
   3.292 +        }
   3.293 +
   3.294 +        prev_md = md;
   3.295 +        p += memmap_info->efi_memdesc_size;
   3.296 +    }
   3.297 +
   3.298 +    if (copy_to_guest(buffer, (char*)memmap_info, sizeof(*memmap_info)) ||
   3.299 +        copy_to_guest_offset(buffer, sizeof(*memmap_info),
   3.300 +                             (char*)memmap, memmap_info->efi_memmap_size)) {
   3.301 +        xfree(memmap);
   3.302 +        return -EFAULT;
   3.303 +    }
   3.304 +    
   3.305 +    *memmap_p = memmap;
   3.306 +    return 0;
   3.307 +}
   3.308 +
   3.309 +static int
   3.310 +foreign_p2m_allocate_pte(struct domain* d,
   3.311 +                         const struct xen_ia64_memmap_info* memmap_info,
   3.312 +                         const void* memmap)
   3.313 +{
   3.314 +    const void* memmap_end = memmap + memmap_info->efi_memmap_size;
   3.315 +    const void* p;
   3.316 +
   3.317 +    for (p = memmap; p < memmap_end; p += memmap_info->efi_memdesc_size) {
   3.318 +        const efi_memory_desc_t* md = p;
   3.319 +        unsigned long start = md->phys_addr;
   3.320 +        unsigned long end = MD_END(md);
   3.321 +        unsigned long gpaddr;
   3.322 +
   3.323 +        for (gpaddr = start; gpaddr < end; gpaddr += PAGE_SIZE) {
   3.324 +            if (lookup_alloc_domain_pte(d, gpaddr) == NULL) {
   3.325 +                return -ENOMEM;
   3.326 +            }
   3.327 +        }
   3.328 +    }
   3.329 +
   3.330 +    return 0;
   3.331 +}
   3.332 +
   3.333 +struct foreign_p2m_region {
   3.334 +    unsigned long       gpfn;
   3.335 +    unsigned long       num_gpfn;
   3.336 +};
   3.337 +
   3.338 +struct foreign_p2m_entry {
   3.339 +    struct list_head            list;
   3.340 +    int                         busy;
   3.341 +
   3.342 +    /* src domain  */
   3.343 +    struct domain*              src_dom;
   3.344 +
   3.345 +    /* region into which foreign p2m table is mapped */
   3.346 +    unsigned long               gpfn;
   3.347 +    unsigned long               num_gpfn;
   3.348 +    unsigned int                num_region;
   3.349 +    struct foreign_p2m_region   region[0];
   3.350 +};
   3.351 +
   3.352 +/* caller must increment the reference count of src_dom */
   3.353 +static int
   3.354 +foreign_p2m_alloc(struct foreign_p2m* foreign_p2m,
   3.355 +                  unsigned long dest_gpfn, struct domain* src_dom,
   3.356 +                  struct xen_ia64_memmap_info* memmap_info, void* memmap,
   3.357 +                  struct foreign_p2m_entry** entryp)
   3.358 +{
   3.359 +    void* memmap_end = memmap + memmap_info->efi_memmap_size;
   3.360 +    efi_memory_desc_t* md;
   3.361 +    unsigned long dest_gpfn_end;
   3.362 +    unsigned long src_gpfn;
   3.363 +    unsigned long src_gpfn_end;
   3.364 +
   3.365 +    unsigned int num_region;
   3.366 +    struct foreign_p2m_entry* entry;
   3.367 +    struct foreign_p2m_entry* prev;
   3.368 +    struct foreign_p2m_entry* pos;
   3.369 +
   3.370 +    num_region = (memmap_end - memmap) / memmap_info->efi_memdesc_size;
   3.371 +
   3.372 +    md = memmap;
   3.373 +    src_gpfn = P2M_PFN_ROUNDDOWN(md->phys_addr >> PAGE_SHIFT);
   3.374 +
   3.375 +    md = memmap + (num_region - 1) * memmap_info->efi_memdesc_size;
   3.376 +    src_gpfn_end = MD_END(md) >> PAGE_SHIFT;
   3.377 +    if (src_gpfn_end >
   3.378 +        P2M_PFN_ROUNDUP(src_dom->arch.convmem_end >> PAGE_SHIFT))
   3.379 +        return -EINVAL;
   3.380 +
   3.381 +    src_gpfn_end = P2M_PFN_ROUNDUP(src_gpfn_end);
   3.382 +    dest_gpfn_end = dest_gpfn + P2M_NUM_PFN(src_gpfn_end - src_gpfn);
   3.383 +    entry = _xmalloc(sizeof(*entry) + num_region * sizeof(entry->region[0]),
   3.384 +                     __alignof__(*entry));
   3.385 +    if (entry == NULL)
   3.386 +        return -ENOMEM;
   3.387 +
   3.388 +    entry->busy = 1;
   3.389 +    entry->gpfn = dest_gpfn;
   3.390 +    entry->num_gpfn = dest_gpfn_end - dest_gpfn;
   3.391 +    entry->src_dom = src_dom;
   3.392 +    entry->num_region = 0;
   3.393 +    memset(entry->region, 0, sizeof(entry->region[0]) * num_region);
   3.394 +    prev = NULL;
   3.395 +
   3.396 +    spin_lock(&foreign_p2m->lock);
   3.397 +    if (list_empty(&foreign_p2m->head))
   3.398 +        prev = (struct foreign_p2m_entry*)&foreign_p2m->head;
   3.399 +
   3.400 +    list_for_each_entry(pos, &foreign_p2m->head, list) {
   3.401 +        if (pos->gpfn + pos->num_gpfn < dest_gpfn) {
   3.402 +            prev = pos;
   3.403              continue;
   3.404          }
   3.405 -        
   3.406 -        assign_pte = lookup_alloc_domain_pte(d, (assign_start_gpfn <<
   3.407 -                                             PAGE_SHIFT) + i * sizeof(pte_t));
   3.408 -        if (assign_pte == NULL) {
   3.409 -            gdprintk(XENLOG_INFO, "%s failed to allocate pte page\n", __func__);
   3.410 -            return -ENOMEM;
   3.411 -        }
   3.412 -
   3.413 -        // skip to next pte page
   3.414 -        i += PTRS_PER_PTE;
   3.415 -        i &= ~(PTRS_PER_PTE - 1);
   3.416 -    }
   3.417 -
   3.418 -    // expose pte page
   3.419 -    i = 0;
   3.420 -    while (i < expose_num_pfn) {
   3.421 -        conv_pte = lookup_noalloc_domain_pte(d, (conv_start_gpfn + i) <<
   3.422 -                                             PAGE_SHIFT);
   3.423 -        if (conv_pte == NULL) {
   3.424 -            i++;
   3.425 -            continue;
   3.426 +
   3.427 +        if (dest_gpfn_end < pos->gpfn) {
   3.428 +            if (prev != NULL && prev->gpfn + prev->num_gpfn > dest_gpfn)
   3.429 +                prev = NULL;/* overlap */
   3.430 +            break;
   3.431          }
   3.432  
   3.433 -        if (expose_p2m_page(d, (assign_start_gpfn << PAGE_SHIFT) +
   3.434 -                            i * sizeof(pte_t), virt_to_page(conv_pte)) < 0) {
   3.435 -            gdprintk(XENLOG_INFO, "%s failed to assign page\n", __func__);
   3.436 -            return -EAGAIN;
   3.437 -        }
   3.438 -
   3.439 -        // skip to next pte page
   3.440 -        i += PTRS_PER_PTE;
   3.441 -        i &= ~(PTRS_PER_PTE - 1);
   3.442 +        /* overlap */
   3.443 +        prev = NULL;
   3.444 +        break;
   3.445      }
   3.446 -
   3.447 -    // expose p2m_pte_zero_page 
   3.448 -    for (i = 0; i < (expose_num_pfn + PTRS_PER_PTE - 1) / PTRS_PER_PTE; i++) {
   3.449 -        assign_pte = lookup_noalloc_domain_pte(d, (assign_start_gpfn + i) <<
   3.450 -                                               PAGE_SHIFT);
   3.451 -        if (assign_pte == NULL || pte_present(*assign_pte))
   3.452 -            continue;
   3.453 -
   3.454 -        if (expose_p2m_page(d, (assign_start_gpfn + i) << PAGE_SHIFT,
   3.455 -                            p2m_pte_zero_page) < 0) {
   3.456 -            gdprintk(XENLOG_INFO, "%s failed to assign zero-pte page\n", __func__);
   3.457 -            return -EAGAIN;
   3.458 -        }
   3.459 +    if (prev != NULL) {
   3.460 +            list_add(&entry->list, &prev->list);
   3.461 +            spin_unlock(&foreign_p2m->lock);
   3.462 +            *entryp = entry;
   3.463 +            return 0;
   3.464      }
   3.465 -    
   3.466 -    return 0;
   3.467 +    spin_unlock(&foreign_p2m->lock);
   3.468 +    xfree(entry);
   3.469 +    return -EBUSY;
   3.470 +}
   3.471 +
   3.472 +static void
   3.473 +foreign_p2m_unexpose(struct domain* dest_dom, struct foreign_p2m_entry* entry)
   3.474 +{
   3.475 +    unsigned int i;
   3.476 +
   3.477 +    BUG_ON(!entry->busy);
   3.478 +    for (i = 0; i < entry->num_region; i++)
   3.479 +        unexpose_p2m(dest_dom,
   3.480 +                     entry->region[i].gpfn, entry->region[i].num_gpfn);
   3.481 +}
   3.482 +
   3.483 +static void
   3.484 +foreign_p2m_unbusy(struct foreign_p2m* foreign_p2m,
   3.485 +                   struct foreign_p2m_entry* entry)
   3.486 +{
   3.487 +    spin_lock(&foreign_p2m->lock);
   3.488 +    BUG_ON(!entry->busy);
   3.489 +    entry->busy = 0;
   3.490 +    spin_unlock(&foreign_p2m->lock);
   3.491 +}
   3.492 +
   3.493 +static void
   3.494 +foreign_p2m_free(struct foreign_p2m* foreign_p2m, 
   3.495 +                 struct foreign_p2m_entry* entry)
   3.496 +{
   3.497 +    spin_lock(&foreign_p2m->lock);
   3.498 +    BUG_ON(!entry->busy);
   3.499 +    list_del(&entry->list);
   3.500 +    spin_unlock(&foreign_p2m->lock);
   3.501 +
   3.502 +    put_domain(entry->src_dom);
   3.503 +    xfree(entry);
   3.504 +}
   3.505 +
   3.506 +void
   3.507 +foreign_p2m_init(struct domain* d)
   3.508 +{
   3.509 +    struct foreign_p2m* foreign_p2m = &d->arch.foreign_p2m;
   3.510 +    INIT_LIST_HEAD(&foreign_p2m->head);
   3.511 +    spin_lock_init(&foreign_p2m->lock);
   3.512 +}
   3.513 +
   3.514 +void
   3.515 +foreign_p2m_destroy(struct domain* d)
   3.516 +{
   3.517 +    struct foreign_p2m* foreign_p2m = &d->arch.foreign_p2m;
   3.518 +    struct foreign_p2m_entry* entry;
   3.519 +    struct foreign_p2m_entry* n;
   3.520 +
   3.521 +    spin_lock(&foreign_p2m->lock);
   3.522 +    list_for_each_entry_safe(entry, n, &foreign_p2m->head, list) {
   3.523 +        /* mm_teardown() cleared p2m table already */
   3.524 +        /* foreign_p2m_unexpose(d, entry);*/
   3.525 +        list_del(&entry->list);
   3.526 +        put_domain(entry->src_dom);
   3.527 +        xfree(entry);
   3.528 +    }
   3.529 +    spin_unlock(&foreign_p2m->lock);
   3.530 +}
   3.531 +
   3.532 +unsigned long
   3.533 +dom0vp_expose_foreign_p2m(struct domain* dest_dom,
   3.534 +                          unsigned long dest_gpfn,
   3.535 +                          domid_t domid,
   3.536 +                          XEN_GUEST_HANDLE(char) buffer,
   3.537 +                          unsigned long flags)
   3.538 +{
   3.539 +    unsigned long ret = 0;
   3.540 +    struct domain* src_dom;
   3.541 +    struct xen_ia64_memmap_info memmap_info;
   3.542 +    char* memmap;
   3.543 +    void* memmap_end;
   3.544 +    void* p;
   3.545 +
   3.546 +    struct foreign_p2m_entry* entry;
   3.547 +
   3.548 +    ret = memmap_info_copy_from_guest(&memmap_info, &memmap, buffer);
   3.549 +    if (ret != 0)
   3.550 +        return ret;
   3.551 +
   3.552 +    dest_dom = rcu_lock_domain(dest_dom);
   3.553 +    if (dest_dom == NULL) {
   3.554 +        ret = -EINVAL;
   3.555 +        goto out;
   3.556 +    }
   3.557 +#if 1
   3.558 +    // Self foreign domain p2m exposure isn't allowed.
   3.559 +    // Otherwise the domain can't be destroyed because
   3.560 +    // no one decrements the domain reference count.
   3.561 +    if (domid == dest_dom->domain_id) {
   3.562 +        ret = -EINVAL;
   3.563 +        goto out;
   3.564 +    }
   3.565 +#endif    
   3.566 +
   3.567 +    src_dom = get_domain_by_id(domid);
   3.568 +    if (src_dom == NULL) {
   3.569 +        ret = -EINVAL;
   3.570 +        goto out_unlock;
   3.571 +    }
   3.572 +
   3.573 +    if (flags & IA64_DOM0VP_EFP_ALLOC_PTE) {
   3.574 +        ret = foreign_p2m_allocate_pte(src_dom, &memmap_info, memmap);
   3.575 +        if (ret != 0)
   3.576 +            goto out_unlock;
   3.577 +    }
   3.578 +
   3.579 +    ret = foreign_p2m_alloc(&dest_dom->arch.foreign_p2m, dest_gpfn,
   3.580 +                            src_dom, &memmap_info, memmap, &entry);
   3.581 +    if (ret != 0)
   3.582 +        goto out_unlock;
   3.583 +
   3.584 +    memmap_end = memmap + memmap_info.efi_memmap_size;
   3.585 +    for (p = memmap; p < memmap_end; p += memmap_info.efi_memdesc_size) {
   3.586 +        efi_memory_desc_t* md = p;
   3.587 +        unsigned long src_gpfn =
   3.588 +            P2M_PFN_ROUNDDOWN(md->phys_addr >> PAGE_SHIFT);
   3.589 +        unsigned long src_gpfn_end =
   3.590 +            P2M_PFN_ROUNDUP(MD_END(md) >> PAGE_SHIFT);
   3.591 +        unsigned long num_src_gpfn = src_gpfn_end - src_gpfn;
   3.592 +        
   3.593 +        ret = expose_p2m(dest_dom, dest_gpfn + src_gpfn / PTRS_PER_PTE,
   3.594 +                         src_dom, src_gpfn, num_src_gpfn);
   3.595 +        if (ret != 0)
   3.596 +            break;
   3.597 +
   3.598 +        entry->region[entry->num_region].gpfn =
   3.599 +            dest_gpfn + src_gpfn / PTRS_PER_PTE;
   3.600 +        entry->region[entry->num_region].num_gpfn = P2M_NUM_PFN(num_src_gpfn);
   3.601 +        entry->num_region++;
   3.602 +    }
   3.603 +
   3.604 +    if (ret == 0) {
   3.605 +        foreign_p2m_unbusy(&dest_dom->arch.foreign_p2m, entry);
   3.606 +    } else {
   3.607 +        foreign_p2m_unexpose(dest_dom, entry);
   3.608 +        foreign_p2m_free(&dest_dom->arch.foreign_p2m, entry);
   3.609 +    }
   3.610 +
   3.611 + out_unlock:
   3.612 +    rcu_unlock_domain(dest_dom);
   3.613 + out:
   3.614 +    xfree(memmap);
   3.615 +    return ret;
   3.616 +}
   3.617 +
   3.618 +unsigned long
   3.619 +dom0vp_unexpose_foreign_p2m(struct domain* dest_dom,
   3.620 +                            unsigned long dest_gpfn,
   3.621 +                            domid_t domid)
   3.622 +{
   3.623 +    int ret = -ENOENT;
   3.624 +    struct foreign_p2m* foreign_p2m = &dest_dom->arch.foreign_p2m;
   3.625 +    struct foreign_p2m_entry* entry;
   3.626 +
   3.627 +    dest_dom = rcu_lock_domain(dest_dom);
   3.628 +    if (dest_dom == NULL)
   3.629 +        return ret;
   3.630 +    spin_lock(&foreign_p2m->lock);
   3.631 +    list_for_each_entry(entry, &foreign_p2m->head, list) {
   3.632 +        if (entry->gpfn < dest_gpfn)
   3.633 +              continue;
   3.634 +        if (dest_gpfn < entry->gpfn)
   3.635 +            break;
   3.636 +
   3.637 +        if (domid == entry->src_dom->domain_id)
   3.638 +            ret = 0;
   3.639 +        else
   3.640 +            ret = -EINVAL;
   3.641 +        break;
   3.642 +    }
   3.643 +    if (ret == 0) {
   3.644 +        if (entry->busy == 0)
   3.645 +            entry->busy = 1;
   3.646 +        else
   3.647 +            ret = -EBUSY;
   3.648 +    }
   3.649 +    spin_unlock(&foreign_p2m->lock);
   3.650 +
   3.651 +    if (ret == 0) {
   3.652 +        foreign_p2m_unexpose(dest_dom, entry);
   3.653 +        foreign_p2m_free(&dest_dom->arch.foreign_p2m, entry);
   3.654 +    }
   3.655 +    rcu_unlock_domain(dest_dom);
   3.656 +    return ret;
   3.657  }
   3.658  #endif
   3.659  
     4.1 --- a/xen/include/asm-ia64/domain.h	Thu Sep 06 13:48:43 2007 -0600
     4.2 +++ b/xen/include/asm-ia64/domain.h	Thu Sep 06 14:13:38 2007 -0600
     4.3 @@ -47,6 +47,16 @@ struct mm_struct {
     4.4      //	atomic_t mm_users;			/* How many users with user space? */
     4.5  };
     4.6  
     4.7 +struct foreign_p2m {
     4.8 +    spinlock_t          lock;
     4.9 +    /*
    4.10 +     * sorted list with entry->gpfn.
    4.11 +     * It is expected that only small number of foreign domain p2m
    4.12 +     * mapping happens at the same time.
    4.13 +     */
    4.14 +    struct list_head    head;
    4.15 +};
    4.16 +
    4.17  struct last_vcpu {
    4.18  #define INVALID_VCPU_ID INT_MAX
    4.19      int vcpu_id;
    4.20 @@ -164,6 +174,9 @@ struct arch_domain {
    4.21      /* Number of faults.  */
    4.22      atomic64_t shadow_fault_count;
    4.23  
    4.24 +    /* for foreign domain p2m table mapping */
    4.25 +    struct foreign_p2m foreign_p2m;
    4.26 +
    4.27      struct last_vcpu last_vcpu[NR_CPUS];
    4.28  
    4.29      struct opt_feature opt_feature;
     5.1 --- a/xen/include/asm-ia64/mm.h	Thu Sep 06 13:48:43 2007 -0600
     5.2 +++ b/xen/include/asm-ia64/mm.h	Thu Sep 06 14:13:38 2007 -0600
     5.3 @@ -440,9 +440,17 @@ extern unsigned long dom0vp_add_physmap_
     5.4  #ifdef CONFIG_XEN_IA64_EXPOSE_P2M
     5.5  extern void expose_p2m_init(void);
     5.6  extern unsigned long dom0vp_expose_p2m(struct domain* d, unsigned long conv_start_gpfn, unsigned long assign_start_gpfn, unsigned long expose_size, unsigned long granule_pfn);
     5.7 +extern void foreign_p2m_init(struct domain* d);
     5.8 +extern void foreign_p2m_destroy(struct domain* d);
     5.9 +extern unsigned long dom0vp_expose_foreign_p2m(struct domain* dest_dom, unsigned long dest_gpfn, domid_t domid, XEN_GUEST_HANDLE(char) buffer, unsigned long flags);
    5.10 +extern unsigned long dom0vp_unexpose_foreign_p2m(struct domain* dest_dom, unsigned long dest_gpfn, domid_t domid);
    5.11  #else
    5.12  #define expose_p2m_init()       do { } while (0)
    5.13  #define dom0vp_expose_p2m(d, conv_start_gpfn, assign_start_gpfn, expose_size, granule_pfn)	(-ENOSYS)
    5.14 +#define foreign_p2m_init(d)	do { } while (0)
    5.15 +#define foreign_p2m_destroy(d)	do { } while (0)
    5.16 +#define dom0vp_expose_foreign_p2m(dest_dom, dest_gpfn, domid, buffer, flags)	(-ENOSYS)
    5.17 +#define dom0vp_unexpose_foreign_p2m(dest_dom, dest_gpfn, domid)	(-ENOSYS)
    5.18  #endif
    5.19  
    5.20  extern volatile unsigned long *mpt_table;
     6.1 --- a/xen/include/public/arch-ia64.h	Thu Sep 06 13:48:43 2007 -0600
     6.2 +++ b/xen/include/public/arch-ia64.h	Thu Sep 06 14:13:38 2007 -0600
     6.3 @@ -469,6 +469,13 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte
     6.4  /* Add an I/O port space range */
     6.5  #define IA64_DOM0VP_add_io_space        11
     6.6  
     6.7 +/* expose the foreign domain's p2m table into privileged domain */
     6.8 +#define IA64_DOM0VP_expose_foreign_p2m  12
     6.9 +#define         IA64_DOM0VP_EFP_ALLOC_PTE       0x1 /* allocate p2m table */
    6.10 +
    6.11 +/* unexpose the foreign domain's p2m table into privileged domain */
    6.12 +#define IA64_DOM0VP_unexpose_foreign_p2m        13
    6.13 +
    6.14  // flags for page assignement to pseudo physical address space
    6.15  #define _ASSIGN_readonly                0
    6.16  #define ASSIGN_readonly                 (1UL << _ASSIGN_readonly)