ia64/xen-unstable

changeset 13293:f632c0c36976

Add page table setup and handling, including the creation of an m2p table
meaningful to compatibility mode guests.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Emmanuel Ackaouy <ack@xensource.com>
date Fri Jan 05 17:34:30 2007 +0000 (2007-01-05)
parents 4c8f157a3a47
children 3870aff51ae3
files xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/domctl.c xen/arch/x86/e820.c xen/arch/x86/mm.c xen/arch/x86/x86_64/mm.c xen/common/compat/kernel.c xen/include/asm-x86/config.h xen/include/asm-x86/domain.h xen/include/asm-x86/mm.h xen/include/asm-x86/page.h xen/include/asm-x86/x86_32/page-2level.h xen/include/asm-x86/x86_32/page-3level.h xen/include/asm-x86/x86_64/page.h xen/include/asm-x86/x86_64/uaccess.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Fri Jan 05 17:34:29 2007 +0000
     1.2 +++ b/xen/arch/x86/domain.c	Fri Jan 05 17:34:30 2007 +0000
     1.3 @@ -127,6 +127,28 @@ void free_vcpu_struct(struct vcpu *v)
     1.4      xfree(v);
     1.5  }
     1.6  
     1.7 +#ifdef CONFIG_COMPAT
     1.8 +static int setup_compat_l4(struct vcpu *v)
     1.9 +{
    1.10 +    struct page_info *pg = alloc_domheap_page(NULL);
    1.11 +    l4_pgentry_t *l4tab;
    1.12 +
    1.13 +    if ( !pg )
    1.14 +        return -ENOMEM;
    1.15 +    l4tab = copy_page(page_to_virt(pg), idle_pg_table);
    1.16 +    l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
    1.17 +        l4e_from_page(pg, __PAGE_HYPERVISOR);
    1.18 +    l4tab[l4_table_offset(PERDOMAIN_VIRT_START)] =
    1.19 +        l4e_from_paddr(__pa(v->domain->arch.mm_perdomain_l3), __PAGE_HYPERVISOR);
    1.20 +    v->arch.guest_table = pagetable_from_page(pg);
    1.21 +    v->arch.guest_table_user = v->arch.guest_table;
    1.22 +
    1.23 +    return 0;
    1.24 +}
    1.25 +#else
    1.26 +#define setup_compat_l4(v) 0
    1.27 +#endif
    1.28 +
    1.29  int vcpu_initialise(struct vcpu *v)
    1.30  {
    1.31      struct domain *d = v->domain;
    1.32 @@ -161,11 +183,16 @@ int vcpu_initialise(struct vcpu *v)
    1.33      v->arch.perdomain_ptes =
    1.34          d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
    1.35  
    1.36 +    if ( IS_COMPAT(d) && (rc = setup_compat_l4(v)) != 0 )
    1.37 +        return rc;
    1.38 +
    1.39      return 0;
    1.40  }
    1.41  
    1.42  void vcpu_destroy(struct vcpu *v)
    1.43  {
    1.44 +    if ( IS_COMPAT(v->domain) )
    1.45 +        free_domheap_page(pagetable_get_page(v->arch.guest_table));
    1.46  }
    1.47  
    1.48  int arch_domain_create(struct domain *d)
    1.49 @@ -219,6 +246,10 @@ int arch_domain_create(struct domain *d)
    1.50  
    1.51  #endif /* __x86_64__ */
    1.52  
    1.53 +#ifdef CONFIG_COMPAT
    1.54 +    HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START;
    1.55 +#endif
    1.56 +
    1.57      shadow_domain_init(d);
    1.58  
    1.59      if ( !is_idle_domain(d) )
    1.60 @@ -349,18 +380,41 @@ int arch_set_info_guest(
    1.61          if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
    1.62              return rc;
    1.63  
    1.64 -        cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c->ctrlreg[3]));
    1.65 +        if ( !IS_COMPAT(d) )
    1.66 +        {
    1.67 +            cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c->ctrlreg[3]));
    1.68 +
    1.69 +            if ( shadow_mode_refcounts(d)
    1.70 +                 ? !get_page(mfn_to_page(cr3_pfn), d)
    1.71 +                 : !get_page_and_type(mfn_to_page(cr3_pfn), d,
    1.72 +                                      PGT_base_page_table) )
    1.73 +            {
    1.74 +                destroy_gdt(v);
    1.75 +                return -EINVAL;
    1.76 +            }
    1.77  
    1.78 -        if ( shadow_mode_refcounts(d)
    1.79 -             ? !get_page(mfn_to_page(cr3_pfn), d)
    1.80 -             : !get_page_and_type(mfn_to_page(cr3_pfn), d,
    1.81 -                                  PGT_base_page_table) )
    1.82 +            v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
    1.83 +        }
    1.84 +#ifdef CONFIG_COMPAT
    1.85 +        else
    1.86          {
    1.87 -            destroy_gdt(v);
    1.88 -            return -EINVAL;
    1.89 +            l4_pgentry_t *l4tab;
    1.90 +
    1.91 +            cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c->ctrlreg[3]));
    1.92 +
    1.93 +            if ( shadow_mode_refcounts(d)
    1.94 +                 ? !get_page(mfn_to_page(cr3_pfn), d)
    1.95 +                 : !get_page_and_type(mfn_to_page(cr3_pfn), d,
    1.96 +                                    PGT_l3_page_table) )
    1.97 +            {
    1.98 +                destroy_gdt(v);
    1.99 +                return -EINVAL;
   1.100 +            }
   1.101 +
   1.102 +            l4tab = __va(pagetable_get_paddr(v->arch.guest_table));
   1.103 +            *l4tab = l4e_from_pfn(cr3_pfn, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED);
   1.104          }
   1.105 -
   1.106 -        v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
   1.107 +#endif
   1.108      }    
   1.109  
   1.110      if ( v->vcpu_id == 0 )
     2.1 --- a/xen/arch/x86/domain_build.c	Fri Jan 05 17:34:29 2007 +0000
     2.2 +++ b/xen/arch/x86/domain_build.c	Fri Jan 05 17:34:30 2007 +0000
     2.3 @@ -91,9 +91,11 @@ string_param("dom0_ioports_disable", opt
     2.4  #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
     2.5  #define L3_PROT (_PAGE_PRESENT)
     2.6  #elif defined(__x86_64__)
     2.7 -/* Allow ring-3 access in long mode as guest cannot use ring 1. */
     2.8 +/* Allow ring-3 access in long mode as guest cannot use ring 1 ... */
     2.9  #define BASE_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
    2.10  #define L1_PROT (BASE_PROT|_PAGE_GUEST_KERNEL)
    2.11 +/* ... except for compatibility mode guests. */
    2.12 +#define COMPAT_L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
    2.13  #define L2_PROT (BASE_PROT|_PAGE_DIRTY)
    2.14  #define L3_PROT (BASE_PROT|_PAGE_DIRTY)
    2.15  #define L4_PROT (BASE_PROT|_PAGE_DIRTY)
    2.16 @@ -262,8 +264,8 @@ int construct_dom0(struct domain *d,
    2.17      start_info_t *si;
    2.18      struct vcpu *v = d->vcpu[0];
    2.19      const char *p;
    2.20 -    unsigned long hypercall_page;
    2.21 -    int hypercall_page_defined;
    2.22 +    unsigned long long value;
    2.23 +    int value_defined;
    2.24  #if defined(__i386__)
    2.25      char *image_start  = (char *)_image_start;  /* use lowmem mappings */
    2.26      char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
    2.27 @@ -323,6 +325,7 @@ int construct_dom0(struct domain *d,
    2.28      rc = parseelfimage(&dsi);
    2.29  #ifdef CONFIG_COMPAT
    2.30      if ( rc == -ENOSYS
    2.31 +         && !compat_disabled
    2.32           && (rc = parseelf32image(&dsi)) == 0 )
    2.33      {
    2.34          l1_pgentry_t gdt_l1e;
    2.35 @@ -370,10 +373,37 @@ int construct_dom0(struct domain *d,
    2.36  
    2.37  #ifdef CONFIG_COMPAT
    2.38      if ( IS_COMPAT(d) )
    2.39 +    {
    2.40 +        value = xen_elf32note_numeric(&dsi, XEN_ELFNOTE_HV_START_LOW, &value_defined);
    2.41          p = xen_elf32note_string(&dsi, XEN_ELFNOTE_FEATURES);
    2.42 +    }
    2.43      else
    2.44  #endif
    2.45 +    {
    2.46 +        value = xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HV_START_LOW, &value_defined);
    2.47          p = xen_elfnote_string(&dsi, XEN_ELFNOTE_FEATURES);
    2.48 +    }
    2.49 +    if ( value_defined )
    2.50 +    {
    2.51 +#if CONFIG_PAGING_LEVELS < 4
    2.52 +        unsigned long mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
    2.53 +#else
    2.54 +        unsigned long mask = !IS_COMPAT(d)
    2.55 +                             ? (1UL << L4_PAGETABLE_SHIFT) - 1
    2.56 +                             : (1UL << L2_PAGETABLE_SHIFT) - 1;
    2.57 +#endif
    2.58 +
    2.59 +        value = (value + mask) & ~mask;
    2.60 +#ifdef CONFIG_COMPAT
    2.61 +        HYPERVISOR_COMPAT_VIRT_START(d) = max_t(unsigned int, m2p_compat_vstart, value);
    2.62 +        if ( value > (!IS_COMPAT(d) ?
    2.63 +                      HYPERVISOR_VIRT_START :
    2.64 +                      __HYPERVISOR_COMPAT_VIRT_START) )
    2.65 +#else
    2.66 +        if ( value > HYPERVISOR_VIRT_START )
    2.67 +#endif
    2.68 +            panic("Domain 0 expects too high a hypervisor start address.\n");
    2.69 +    }
    2.70      if ( p != NULL )
    2.71      {
    2.72          parse_features(p,
    2.73 @@ -400,7 +430,9 @@ int construct_dom0(struct domain *d,
    2.74      vinitrd_start    = round_pgup(dsi.v_end);
    2.75      vinitrd_end      = vinitrd_start + initrd_len;
    2.76      vphysmap_start   = round_pgup(vinitrd_end);
    2.77 -    vphysmap_end     = vphysmap_start + (nr_pages * sizeof(unsigned long));
    2.78 +    vphysmap_end     = vphysmap_start + (nr_pages * (!IS_COMPAT(d) ?
    2.79 +                                                     sizeof(unsigned long) :
    2.80 +                                                     sizeof(unsigned int)));
    2.81      vstartinfo_start = round_pgup(vphysmap_end);
    2.82      vstartinfo_end   = (vstartinfo_start +
    2.83                          sizeof(struct start_info) +
    2.84 @@ -429,7 +461,9 @@ int construct_dom0(struct domain *d,
    2.85         ((_l) & ~((1UL<<(_s))-1))) >> (_s))
    2.86          if ( (1 + /* # L4 */
    2.87                NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
    2.88 -              NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
    2.89 +              (!IS_COMPAT(d) ?
    2.90 +               NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) : /* # L2 */
    2.91 +               4) + /* # compat L2 */
    2.92                NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT))  /* # L1 */
    2.93               <= nr_pt_pages )
    2.94              break;
    2.95 @@ -619,8 +653,10 @@ int construct_dom0(struct domain *d,
    2.96  #elif defined(__x86_64__)
    2.97  
    2.98      /* Overlap with Xen protected area? */
    2.99 -    if ( (dsi.v_start < HYPERVISOR_VIRT_END) &&
   2.100 -         (v_end > HYPERVISOR_VIRT_START) )
   2.101 +    if ( !IS_COMPAT(d) ?
   2.102 +         ((dsi.v_start < HYPERVISOR_VIRT_END) &&
   2.103 +          (v_end > HYPERVISOR_VIRT_START)) :
   2.104 +         (v_end > HYPERVISOR_COMPAT_VIRT_START(d)) )
   2.105      {
   2.106          printk("DOM0 image overlaps with Xen private area.\n");
   2.107          return -EINVAL;
   2.108 @@ -633,8 +669,18 @@ int construct_dom0(struct domain *d,
   2.109      }
   2.110  
   2.111      /* WARNING: The new domain must have its 'processor' field filled in! */
   2.112 -    maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
   2.113 -    l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
   2.114 +    if ( !IS_COMPAT(d) )
   2.115 +    {
   2.116 +        maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l4_page_table;
   2.117 +        l4start = l4tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
   2.118 +    }
   2.119 +    else
   2.120 +    {
   2.121 +        page = alloc_domheap_page(NULL);
   2.122 +        if ( !page )
   2.123 +            panic("Not enough RAM for domain 0 PML4.\n");
   2.124 +        l4start = l4tab = page_to_virt(page);
   2.125 +    }
   2.126      memcpy(l4tab, idle_pg_table, PAGE_SIZE);
   2.127      l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
   2.128          l4e_from_paddr(__pa(l4start), __PAGE_HYPERVISOR);
   2.129 @@ -679,7 +725,7 @@ int construct_dom0(struct domain *d,
   2.130              *l2tab = l2e_from_paddr(__pa(l1start), L2_PROT);
   2.131              l2tab++;
   2.132          }
   2.133 -        *l1tab = l1e_from_pfn(mfn, L1_PROT);
   2.134 +        *l1tab = l1e_from_pfn(mfn, !IS_COMPAT(d) ? L1_PROT : COMPAT_L1_PROT);
   2.135          l1tab++;
   2.136  
   2.137          page = mfn_to_page(mfn);
   2.138 @@ -690,6 +736,30 @@ int construct_dom0(struct domain *d,
   2.139          mfn++;
   2.140      }
   2.141  
   2.142 +#ifdef CONFIG_COMPAT
   2.143 +    if ( IS_COMPAT(d) )
   2.144 +    {
   2.145 +        /* Ensure the first four L3 entries are all populated. */
   2.146 +        for ( i = 0, l3tab = l3start; i < 4; ++i, ++l3tab )
   2.147 +        {
   2.148 +            if ( !l3e_get_intpte(*l3tab) )
   2.149 +            {
   2.150 +                maddr_to_page(mpt_alloc)->u.inuse.type_info = PGT_l2_page_table;
   2.151 +                l2tab = __va(mpt_alloc); mpt_alloc += PAGE_SIZE;
   2.152 +                clear_page(l2tab);
   2.153 +                *l3tab = l3e_from_paddr(__pa(l2tab), L3_PROT);
   2.154 +            }
   2.155 +            if ( i == 3 )
   2.156 +                l3e_get_page(*l3tab)->u.inuse.type_info |= PGT_pae_xen_l2;
   2.157 +        }
   2.158 +        /* Install read-only guest visible MPT mapping. */
   2.159 +        l2tab = l3e_to_l2e(l3start[3]);
   2.160 +        memcpy(&l2tab[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
   2.161 +               &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
   2.162 +               COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*l2tab));
   2.163 +    }
   2.164 +#endif
   2.165 +
   2.166      /* Pages that are part of page tables must be read only. */
   2.167      l4tab = l4start + l4_table_offset(vpt_start);
   2.168      l3start = l3tab = l4e_to_l3e(*l4tab);
   2.169 @@ -708,7 +778,8 @@ int construct_dom0(struct domain *d,
   2.170          page->u.inuse.type_info |= PGT_validated | 1;
   2.171  
   2.172          /* Top-level p.t. is pinned. */
   2.173 -        if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_l4_page_table )
   2.174 +        if ( (page->u.inuse.type_info & PGT_type_mask) ==
   2.175 +             (!IS_COMPAT(d) ? PGT_l4_page_table : PGT_l3_page_table) )
   2.176          {
   2.177              page->count_info        += 1;
   2.178              page->u.inuse.type_info += 1 | PGT_pinned;
   2.179 @@ -761,26 +832,26 @@ int construct_dom0(struct domain *d,
   2.180      if ( IS_COMPAT(d) )
   2.181      {
   2.182          (void)loadelf32image(&dsi);
   2.183 -        hypercall_page =
   2.184 -            xen_elf32note_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &hypercall_page_defined);
   2.185 +        value =
   2.186 +            xen_elf32note_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &value_defined);
   2.187      }
   2.188      else
   2.189  #endif
   2.190      {
   2.191          (void)loadelfimage(&dsi);
   2.192 -        hypercall_page =
   2.193 -            xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &hypercall_page_defined);
   2.194 +        value =
   2.195 +            xen_elfnote_numeric(&dsi, XEN_ELFNOTE_HYPERCALL_PAGE, &value_defined);
   2.196      }
   2.197 -    if ( hypercall_page_defined )
   2.198 +    if ( value_defined )
   2.199      {
   2.200 -        if ( (hypercall_page < dsi.v_start) || (hypercall_page >= v_end) )
   2.201 +        if ( (value < dsi.v_start) || (value >= v_end) )
   2.202          {
   2.203              write_ptbase(current);
   2.204              local_irq_enable();
   2.205              printk("Invalid HYPERCALL_PAGE field in ELF notes.\n");
   2.206              return -1;
   2.207          }
   2.208 -        hypercall_page_initialise(d, (void *)hypercall_page);
   2.209 +        hypercall_page_initialise(d, (void *)(unsigned long)value);
   2.210      }
   2.211  
   2.212      /* Copy the initial ramdisk. */
   2.213 @@ -798,7 +869,7 @@ int construct_dom0(struct domain *d,
   2.214      si->shared_info = virt_to_maddr(d->shared_info);
   2.215  
   2.216      si->flags        = SIF_PRIVILEGED | SIF_INITDOMAIN;
   2.217 -    si->pt_base      = vpt_start;
   2.218 +    si->pt_base      = vpt_start + 2 * PAGE_SIZE * !!IS_COMPAT(d);
   2.219      si->nr_pt_frames = nr_pt_pages;
   2.220      si->mfn_list     = vphysmap_start;
   2.221      sprintf(si->magic, "xen-%i.%i-x86_%d%s",
   2.222 @@ -814,7 +885,10 @@ int construct_dom0(struct domain *d,
   2.223          if ( pfn > REVERSE_START )
   2.224              mfn = alloc_epfn - (pfn - REVERSE_START);
   2.225  #endif
   2.226 -        ((unsigned long *)vphysmap_start)[pfn] = mfn;
   2.227 +        if ( !IS_COMPAT(d) )
   2.228 +            ((unsigned long *)vphysmap_start)[pfn] = mfn;
   2.229 +        else
   2.230 +            ((unsigned int *)vphysmap_start)[pfn] = mfn;
   2.231          set_gpfn_from_mfn(mfn, pfn);
   2.232      }
   2.233      while ( pfn < nr_pages )
   2.234 @@ -827,7 +901,10 @@ int construct_dom0(struct domain *d,
   2.235  #ifndef NDEBUG
   2.236  #define pfn (nr_pages - 1 - (pfn - (alloc_epfn - alloc_spfn)))
   2.237  #endif
   2.238 -            ((unsigned long *)vphysmap_start)[pfn] = mfn;
   2.239 +            if ( !IS_COMPAT(d) )
   2.240 +                ((unsigned long *)vphysmap_start)[pfn] = mfn;
   2.241 +            else
   2.242 +                ((unsigned int *)vphysmap_start)[pfn] = mfn;
   2.243              set_gpfn_from_mfn(mfn, pfn);
   2.244  #undef pfn
   2.245              page++; pfn++;
     3.1 --- a/xen/arch/x86/domctl.c	Fri Jan 05 17:34:29 2007 +0000
     3.2 +++ b/xen/arch/x86/domctl.c	Fri Jan 05 17:34:30 2007 +0000
     3.3 @@ -311,7 +311,12 @@ void arch_getdomaininfo_ctxt(
     3.4      if ( guest_kernel_mode(v, &v->arch.guest_context.user_regs) )
     3.5          c->flags |= VGCF_in_kernel;
     3.6  
     3.7 -    c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table));
     3.8 +    if ( !IS_COMPAT(v->domain) )
     3.9 +        c->ctrlreg[3] = xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table));
    3.10 +#ifdef CONFIG_COMPAT
    3.11 +    else
    3.12 +        c->ctrlreg[3] = compat_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table));
    3.13 +#endif
    3.14  
    3.15      c->vm_assist = v->domain->vm_assist;
    3.16  }
     4.1 --- a/xen/arch/x86/e820.c	Fri Jan 05 17:34:29 2007 +0000
     4.2 +++ b/xen/arch/x86/e820.c	Fri Jan 05 17:34:30 2007 +0000
     4.3 @@ -1,6 +1,7 @@
     4.4  #include <xen/config.h>
     4.5  #include <xen/init.h>
     4.6  #include <xen/lib.h>
     4.7 +#include <xen/compat.h>
     4.8  #include <asm/e820.h>
     4.9  #include <asm/page.h>
    4.10  
    4.11 @@ -341,6 +342,39 @@ static void __init clip_4gb(void)
    4.12  #define clip_4gb() ((void)0)
    4.13  #endif
    4.14  
    4.15 +#ifdef CONFIG_COMPAT
    4.16 +static void __init clip_compat(void)
    4.17 +{
    4.18 +    unsigned long long limit;
    4.19 +    unsigned int i;
    4.20 +
    4.21 +    if ( compat_disabled )
    4.22 +        return;
    4.23 +    /* 32-bit guests restricted to 166 GB (with current memory allocator). */
    4.24 +    limit = (unsigned long long)(MACH2PHYS_COMPAT_VIRT_END -
    4.25 +                                 __HYPERVISOR_COMPAT_VIRT_START) << 10;
    4.26 +    for ( i = 0; i < e820.nr_map; i++ )
    4.27 +    {
    4.28 +        if ( (e820.map[i].addr + e820.map[i].size) <= limit )
    4.29 +            continue;
    4.30 +        printk("WARNING: Only the first %Lu GB of the physical memory map "
    4.31 +               "can be accessed\n"
    4.32 +               "         by compatibility mode guests. "
    4.33 +               "Truncating the memory map...\n",
    4.34 +	       limit >> 30);
    4.35 +        if ( e820.map[i].addr >= limit )
    4.36 +            e820.nr_map = i;
    4.37 +        else
    4.38 +        {
    4.39 +            e820.map[i].size = limit - e820.map[i].addr;
    4.40 +            e820.nr_map = i + 1;
    4.41 +        }
    4.42 +    }
    4.43 +}
    4.44 +#else
    4.45 +#define clip_compat() ((void)0)
    4.46 +#endif
    4.47 +
    4.48  static void __init clip_mem(void)
    4.49  {
    4.50      int i;
    4.51 @@ -374,6 +408,7 @@ static void __init machine_specific_memo
    4.52      *raw_nr = nr;
    4.53      (void)copy_e820_map(raw, nr);
    4.54      clip_4gb();
    4.55 +    clip_compat();
    4.56      clip_mem();
    4.57  }
    4.58  
     5.1 --- a/xen/arch/x86/mm.c	Fri Jan 05 17:34:29 2007 +0000
     5.2 +++ b/xen/arch/x86/mm.c	Fri Jan 05 17:34:30 2007 +0000
     5.3 @@ -126,13 +126,6 @@
     5.4   */
     5.5  #define MMU_UPDATE_PREEMPTED          (~(~0U>>1))
     5.6  
     5.7 -static void free_l2_table(struct page_info *page);
     5.8 -static void free_l1_table(struct page_info *page);
     5.9 -
    5.10 -static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long,
    5.11 -                        unsigned long type);
    5.12 -static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t, unsigned long gl1mfn);
    5.13 -
    5.14  /* Used to defer flushing of memory structures. */
    5.15  struct percpu_mm_info {
    5.16  #define DOP_FLUSH_TLB      (1<<0) /* Flush the local TLB.                    */
    5.17 @@ -158,6 +151,15 @@ struct page_info *frame_table;
    5.18  unsigned long max_page;
    5.19  unsigned long total_pages;
    5.20  
    5.21 +#ifdef CONFIG_COMPAT
    5.22 +l2_pgentry_t *compat_idle_pg_table_l2 = NULL;
    5.23 +#define l3_disallow_mask(d) (!IS_COMPAT(d) ? \
    5.24 +                             L3_DISALLOW_MASK : \
    5.25 +                             COMPAT_L3_DISALLOW_MASK)
    5.26 +#else
    5.27 +#define l3_disallow_mask(d) L3_DISALLOW_MASK
    5.28 +#endif
    5.29 +
    5.30  void __init init_frametable(void)
    5.31  {
    5.32      unsigned long nr_pages, page_step, i, mfn;
    5.33 @@ -661,9 +663,9 @@ get_page_from_l3e(
    5.34      if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
    5.35          return 1;
    5.36  
    5.37 -    if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) )
    5.38 +    if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
    5.39      {
    5.40 -        MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK);
    5.41 +        MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
    5.42          return 0;
    5.43      }
    5.44  
    5.45 @@ -700,9 +702,10 @@ get_page_from_l4e(
    5.46  #ifdef __x86_64__
    5.47  
    5.48  #ifdef USER_MAPPINGS_ARE_GLOBAL
    5.49 -#define adjust_guest_l1e(pl1e)                                               \
    5.50 +#define adjust_guest_l1e(pl1e, d)                                            \
    5.51      do {                                                                     \
    5.52 -        if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) )                 \
    5.53 +        if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) &&                \
    5.54 +             likely(!IS_COMPAT(d)) )                                         \
    5.55          {                                                                    \
    5.56              /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */    \
    5.57              if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \
    5.58 @@ -716,37 +719,53 @@ get_page_from_l4e(
    5.59          }                                                                    \
    5.60      } while ( 0 )
    5.61  #else
    5.62 -#define adjust_guest_l1e(pl1e)                                  \
    5.63 +#define adjust_guest_l1e(pl1e, d)                               \
    5.64      do {                                                        \
    5.65 -        if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) )    \
    5.66 +        if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) &&   \
    5.67 +             likely(!IS_COMPAT(d)) )                            \
    5.68              l1e_add_flags((pl1e), _PAGE_USER);                  \
    5.69      } while ( 0 )
    5.70  #endif
    5.71  
    5.72 -#define adjust_guest_l2e(pl2e)                                  \
    5.73 +#define adjust_guest_l2e(pl2e, d)                               \
    5.74      do {                                                        \
    5.75 -        if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) )    \
    5.76 +        if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) &&   \
    5.77 +             likely(!IS_COMPAT(d)) )                            \
    5.78              l2e_add_flags((pl2e), _PAGE_USER);                  \
    5.79      } while ( 0 )
    5.80  
    5.81 -#define adjust_guest_l3e(pl3e)                                  \
    5.82 +#define adjust_guest_l3e(pl3e, d)                               \
    5.83      do {                                                        \
    5.84          if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) )    \
    5.85 -            l3e_add_flags((pl3e), _PAGE_USER);                  \
    5.86 +            l3e_add_flags((pl3e), likely(!IS_COMPAT(d)) ?       \
    5.87 +                                         _PAGE_USER :           \
    5.88 +                                         _PAGE_USER|_PAGE_RW);  \
    5.89      } while ( 0 )
    5.90  
    5.91 -#define adjust_guest_l4e(pl4e)                                  \
    5.92 +#define adjust_guest_l4e(pl4e, d)                               \
    5.93      do {                                                        \
    5.94 -        if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) )    \
    5.95 +        if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) &&   \
    5.96 +             likely(!IS_COMPAT(d)) )                            \
    5.97              l4e_add_flags((pl4e), _PAGE_USER);                  \
    5.98      } while ( 0 )
    5.99  
   5.100  #else /* !defined(__x86_64__) */
   5.101  
   5.102 -#define adjust_guest_l1e(_p) ((void)0)
   5.103 -#define adjust_guest_l2e(_p) ((void)0)
   5.104 -#define adjust_guest_l3e(_p) ((void)0)
   5.105 -
   5.106 +#define adjust_guest_l1e(_p, _d) ((void)(_d))
   5.107 +#define adjust_guest_l2e(_p, _d) ((void)(_d))
   5.108 +#define adjust_guest_l3e(_p, _d) ((void)(_d))
   5.109 +
   5.110 +#endif
   5.111 +
   5.112 +#ifdef CONFIG_COMPAT
   5.113 +#define unadjust_guest_l3e(pl3e, d)                             \
   5.114 +    do {                                                        \
   5.115 +        if ( unlikely(IS_COMPAT(d)) &&                          \
   5.116 +             likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) )    \
   5.117 +            l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \
   5.118 +    } while ( 0 )
   5.119 +#else
   5.120 +#define unadjust_guest_l3e(_p, _d) ((void)(_d))
   5.121  #endif
   5.122  
   5.123  void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
   5.124 @@ -850,7 +869,7 @@ static int alloc_l1_table(struct page_in
   5.125               unlikely(!get_page_from_l1e(pl1e[i], d)) )
   5.126              goto fail;
   5.127  
   5.128 -        adjust_guest_l1e(pl1e[i]);
   5.129 +        adjust_guest_l1e(pl1e[i], d);
   5.130      }
   5.131  
   5.132      unmap_domain_page(pl1e);
   5.133 @@ -866,13 +885,20 @@ static int alloc_l1_table(struct page_in
   5.134      return 0;
   5.135  }
   5.136  
   5.137 -#ifdef CONFIG_X86_PAE
   5.138 -static int create_pae_xen_mappings(l3_pgentry_t *pl3e)
   5.139 +#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
   5.140 +static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
   5.141  {
   5.142      struct page_info *page;
   5.143 -    l2_pgentry_t    *pl2e, l2e;
   5.144 +    l2_pgentry_t    *pl2e;
   5.145      l3_pgentry_t     l3e3;
   5.146 +#ifndef CONFIG_COMPAT
   5.147 +    l2_pgentry_t     l2e;
   5.148      int              i;
   5.149 +#else
   5.150 +
   5.151 +    if ( !IS_COMPAT(d) )
   5.152 +        return 1;
   5.153 +#endif
   5.154  
   5.155      pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
   5.156  
   5.157 @@ -905,6 +931,7 @@ static int create_pae_xen_mappings(l3_pg
   5.158  
   5.159      /* Xen private mappings. */
   5.160      pl2e = map_domain_page(l3e_get_pfn(l3e3));
   5.161 +#ifndef CONFIG_COMPAT
   5.162      memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
   5.163             &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
   5.164             L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
   5.165 @@ -922,11 +949,20 @@ static int create_pae_xen_mappings(l3_pg
   5.166              l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR);
   5.167          l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e);
   5.168      }
   5.169 +#else
   5.170 +    memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
   5.171 +           &compat_idle_pg_table_l2[l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
   5.172 +           COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e));
   5.173 +#endif
   5.174      unmap_domain_page(pl2e);
   5.175  
   5.176      return 1;
   5.177  }
   5.178 -
   5.179 +#else
   5.180 +# define create_pae_xen_mappings(d, pl3e) (1)
   5.181 +#endif
   5.182 +
   5.183 +#ifdef CONFIG_X86_PAE
   5.184  /* Flush a pgdir update into low-memory caches. */
   5.185  static void pae_flush_pgd(
   5.186      unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
   5.187 @@ -961,12 +997,8 @@ static void pae_flush_pgd(
   5.188  
   5.189      flush_tlb_mask(d->domain_dirty_cpumask);
   5.190  }
   5.191 -
   5.192 -#elif CONFIG_X86_64
   5.193 -# define create_pae_xen_mappings(pl3e) (1)
   5.194 +#else
   5.195  # define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
   5.196 -#else
   5.197 -# define create_pae_xen_mappings(pl3e) (1)
   5.198  #endif
   5.199  
   5.200  static int alloc_l2_table(struct page_info *page, unsigned long type)
   5.201 @@ -980,11 +1012,11 @@ static int alloc_l2_table(struct page_in
   5.202  
   5.203      for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   5.204      {
   5.205 -        if ( is_guest_l2_slot(type, i) &&
   5.206 +        if ( is_guest_l2_slot(d, type, i) &&
   5.207               unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
   5.208              goto fail;
   5.209          
   5.210 -        adjust_guest_l2e(pl2e[i]);
   5.211 +        adjust_guest_l2e(pl2e[i], d);
   5.212      }
   5.213  
   5.214  #if CONFIG_PAGING_LEVELS == 2
   5.215 @@ -1007,7 +1039,7 @@ static int alloc_l2_table(struct page_in
   5.216   fail:
   5.217      MEM_LOG("Failure in alloc_l2_table: entry %d", i);
   5.218      while ( i-- > 0 )
   5.219 -        if ( is_guest_l2_slot(type, i) )
   5.220 +        if ( is_guest_l2_slot(d, type, i) )
   5.221              put_page_from_l2e(pl2e[i], pfn);
   5.222  
   5.223      unmap_domain_page(pl2e);
   5.224 @@ -1039,13 +1071,24 @@ static int alloc_l3_table(struct page_in
   5.225  #endif
   5.226  
   5.227      pl3e = map_domain_page(pfn);
   5.228 +
   5.229 +    /*
   5.230 +     * PAE guests allocate full pages, but aren't required to initialize
   5.231 +     * more than the first four entries; when running in compatibility
   5.232 +     * mode, however, the full page is visible to the MMU, and hence all
   5.233 +     * 512 entries must be valid/verified, which is most easily achieved
   5.234 +     * by clearing them out.
   5.235 +     */
   5.236 +    if ( IS_COMPAT(d) )
   5.237 +        memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
   5.238 +
   5.239      for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
   5.240      {
   5.241 -#ifdef CONFIG_X86_PAE
   5.242 -        if ( i == 3 )
   5.243 +#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
   5.244 +        if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) && i == 3 )
   5.245          {
   5.246              if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
   5.247 -                 (l3e_get_flags(pl3e[i]) & L3_DISALLOW_MASK) ||
   5.248 +                 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
   5.249                   !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
   5.250                                                  PGT_l2_page_table |
   5.251                                                  PGT_pae_xen_l2,
   5.252 @@ -1058,10 +1101,10 @@ static int alloc_l3_table(struct page_in
   5.253               unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
   5.254              goto fail;
   5.255          
   5.256 -        adjust_guest_l3e(pl3e[i]);
   5.257 +        adjust_guest_l3e(pl3e[i], d);
   5.258      }
   5.259  
   5.260 -    if ( !create_pae_xen_mappings(pl3e) )
   5.261 +    if ( !create_pae_xen_mappings(d, pl3e) )
   5.262          goto fail;
   5.263  
   5.264      unmap_domain_page(pl3e);
   5.265 @@ -1094,7 +1137,7 @@ static int alloc_l4_table(struct page_in
   5.266               unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
   5.267              goto fail;
   5.268  
   5.269 -        adjust_guest_l4e(pl4e[i]);
   5.270 +        adjust_guest_l4e(pl4e[i], d);
   5.271      }
   5.272  
   5.273      /* Xen private mappings. */
   5.274 @@ -1142,6 +1185,9 @@ static void free_l1_table(struct page_in
   5.275  
   5.276  static void free_l2_table(struct page_info *page)
   5.277  {
   5.278 +#ifdef CONFIG_COMPAT
   5.279 +    struct domain *d = page_get_owner(page);
   5.280 +#endif
   5.281      unsigned long pfn = page_to_mfn(page);
   5.282      l2_pgentry_t *pl2e;
   5.283      int i;
   5.284 @@ -1149,7 +1195,7 @@ static void free_l2_table(struct page_in
   5.285      pl2e = map_domain_page(pfn);
   5.286  
   5.287      for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
   5.288 -        if ( is_guest_l2_slot(page->u.inuse.type_info, i) )
   5.289 +        if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
   5.290              put_page_from_l2e(pl2e[i], pfn);
   5.291  
   5.292      unmap_domain_page(pl2e);
   5.293 @@ -1162,6 +1208,7 @@ static void free_l2_table(struct page_in
   5.294  
   5.295  static void free_l3_table(struct page_info *page)
   5.296  {
   5.297 +    struct domain *d = page_get_owner(page);
   5.298      unsigned long pfn = page_to_mfn(page);
   5.299      l3_pgentry_t *pl3e;
   5.300      int           i;
   5.301 @@ -1170,7 +1217,10 @@ static void free_l3_table(struct page_in
   5.302  
   5.303      for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
   5.304          if ( is_guest_l3_slot(i) )
   5.305 +        {
   5.306              put_page_from_l3e(pl3e[i], pfn);
   5.307 +            unadjust_guest_l3e(pl3e[i], d);
   5.308 +        }
   5.309  
   5.310      unmap_domain_page(pl3e);
   5.311  }
   5.312 @@ -1270,7 +1320,7 @@ static int mod_l1_entry(l1_pgentry_t *pl
   5.313              return 0;
   5.314          }
   5.315  
   5.316 -        adjust_guest_l1e(nl1e);
   5.317 +        adjust_guest_l1e(nl1e, d);
   5.318  
   5.319          /* Fast path for identical mapping, r/w and presence. */
   5.320          if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
   5.321 @@ -1303,8 +1353,9 @@ static int mod_l2_entry(l2_pgentry_t *pl
   5.322                          unsigned long type)
   5.323  {
   5.324      l2_pgentry_t ol2e;
   5.325 -
   5.326 -    if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) )
   5.327 +    struct domain *d = current->domain;
   5.328 +
   5.329 +    if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
   5.330      {
   5.331          MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
   5.332          return 0;
   5.333 @@ -1322,7 +1373,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
   5.334              return 0;
   5.335          }
   5.336  
   5.337 -        adjust_guest_l2e(nl2e);
   5.338 +        adjust_guest_l2e(nl2e, d);
   5.339  
   5.340          /* Fast path for identical mapping and presence. */
   5.341          if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT))
   5.342 @@ -1354,6 +1405,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
   5.343                          unsigned long pfn)
   5.344  {
   5.345      l3_pgentry_t ol3e;
   5.346 +    struct domain *d = current->domain;
   5.347      int okay;
   5.348  
   5.349      if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
   5.350 @@ -1362,12 +1414,13 @@ static int mod_l3_entry(l3_pgentry_t *pl
   5.351          return 0;
   5.352      }
   5.353  
   5.354 -#ifdef CONFIG_X86_PAE
   5.355 +#if defined(CONFIG_X86_PAE) || defined(CONFIG_COMPAT)
   5.356      /*
   5.357       * Disallow updates to final L3 slot. It contains Xen mappings, and it
   5.358       * would be a pain to ensure they remain continuously valid throughout.
   5.359       */
   5.360 -    if ( pgentry_ptr_to_slot(pl3e) >= 3 )
   5.361 +    if ( (CONFIG_PAGING_LEVELS < 4 || IS_COMPAT(d)) &&
   5.362 +         pgentry_ptr_to_slot(pl3e) >= 3 )
   5.363          return 0;
   5.364  #endif 
   5.365  
   5.366 @@ -1376,14 +1429,14 @@ static int mod_l3_entry(l3_pgentry_t *pl
   5.367  
   5.368      if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
   5.369      {
   5.370 -        if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) )
   5.371 +        if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
   5.372          {
   5.373              MEM_LOG("Bad L3 flags %x",
   5.374 -                    l3e_get_flags(nl3e) & L3_DISALLOW_MASK);
   5.375 +                    l3e_get_flags(nl3e) & l3_disallow_mask(d));
   5.376              return 0;
   5.377          }
   5.378  
   5.379 -        adjust_guest_l3e(nl3e);
   5.380 +        adjust_guest_l3e(nl3e, d);
   5.381  
   5.382          /* Fast path for identical mapping and presence. */
   5.383          if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT))
   5.384 @@ -1403,7 +1456,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
   5.385          return 0;
   5.386      }
   5.387  
   5.388 -    okay = create_pae_xen_mappings(pl3e);
   5.389 +    okay = create_pae_xen_mappings(d, pl3e);
   5.390      BUG_ON(!okay);
   5.391  
   5.392      pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
   5.393 @@ -1441,7 +1494,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
   5.394              return 0;
   5.395          }
   5.396  
   5.397 -        adjust_guest_l4e(nl4e);
   5.398 +        adjust_guest_l4e(nl4e, current->domain);
   5.399  
   5.400          /* Fast path for identical mapping and presence. */
   5.401          if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT))
   5.402 @@ -2265,8 +2318,7 @@ int do_mmu_update(
   5.403                  case PGT_l2_page_table:
   5.404                  {
   5.405                      l2_pgentry_t l2e = l2e_from_intpte(req.val);
   5.406 -                    okay = mod_l2_entry(
   5.407 -                        (l2_pgentry_t *)va, l2e, mfn, type_info);
   5.408 +                    okay = mod_l2_entry(va, l2e, mfn, type_info);
   5.409                  }
   5.410                  break;
   5.411  #if CONFIG_PAGING_LEVELS >= 3
   5.412 @@ -2279,11 +2331,12 @@ int do_mmu_update(
   5.413  #endif
   5.414  #if CONFIG_PAGING_LEVELS >= 4
   5.415                  case PGT_l4_page_table:
   5.416 -                {
   5.417 -                    l4_pgentry_t l4e = l4e_from_intpte(req.val);
   5.418 -                    okay = mod_l4_entry(va, l4e, mfn);
   5.419 -                }
   5.420 -                break;
   5.421 +                    if ( !IS_COMPAT(FOREIGNDOM) )
   5.422 +                    {
   5.423 +                        l4_pgentry_t l4e = l4e_from_intpte(req.val);
   5.424 +                        okay = mod_l4_entry(va, l4e, mfn);
   5.425 +                    }
   5.426 +                    break;
   5.427  #endif
   5.428                  }
   5.429  
   5.430 @@ -2387,7 +2440,7 @@ static int create_grant_pte_mapping(
   5.431  
   5.432      ASSERT(spin_is_locked(&d->big_lock));
   5.433  
   5.434 -    adjust_guest_l1e(nl1e);
   5.435 +    adjust_guest_l1e(nl1e, d);
   5.436  
   5.437      gmfn = pte_addr >> PAGE_SHIFT;
   5.438      mfn = gmfn_to_mfn(d, gmfn);
   5.439 @@ -2508,7 +2561,7 @@ static int create_grant_va_mapping(
   5.440      
   5.441      ASSERT(spin_is_locked(&d->big_lock));
   5.442  
   5.443 -    adjust_guest_l1e(nl1e);
   5.444 +    adjust_guest_l1e(nl1e, d);
   5.445  
   5.446      pl1e = guest_map_l1e(v, va, &gl1mfn);
   5.447      if ( !pl1e )
   5.448 @@ -3173,7 +3226,7 @@ static int ptwr_emulated_update(
   5.449          }
   5.450      }
   5.451  
   5.452 -    adjust_guest_l1e(nl1e);
   5.453 +    adjust_guest_l1e(nl1e, d);
   5.454  
   5.455      /* Checked successfully: do the update (write or cmpxchg). */
   5.456      pl1e = map_domain_page(page_to_mfn(page));
     6.1 --- a/xen/arch/x86/x86_64/mm.c	Fri Jan 05 17:34:29 2007 +0000
     6.2 +++ b/xen/arch/x86/x86_64/mm.c	Fri Jan 05 17:34:30 2007 +0000
     6.3 @@ -31,6 +31,10 @@
     6.4  #include <asm/msr.h>
     6.5  #include <public/memory.h>
     6.6  
     6.7 +#ifdef CONFIG_COMPAT
     6.8 +unsigned int m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
     6.9 +#endif
    6.10 +
    6.11  struct page_info *alloc_xen_pagetable(void)
    6.12  {
    6.13      extern int early_boot;
    6.14 @@ -121,6 +125,47 @@ void __init paging_init(void)
    6.15          l2_ro_mpt++;
    6.16      }
    6.17  
    6.18 +#ifdef CONFIG_COMPAT
    6.19 +    if ( !compat_disabled )
    6.20 +    {
    6.21 +        /* Create user-accessible L2 directory to map the MPT for compatibility guests. */
    6.22 +        BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) !=
    6.23 +                     l4_table_offset(HIRO_COMPAT_MPT_VIRT_START));
    6.24 +        l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
    6.25 +        if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
    6.26 +            goto nomem;
    6.27 +        compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg));
    6.28 +        l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
    6.29 +                  l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
    6.30 +        l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
    6.31 +        /*
    6.32 +         * Allocate and map the compatibility mode machine-to-phys table.
    6.33 +        */
    6.34 +        mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1));
    6.35 +        if ( mpt_size > RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START )
    6.36 +            mpt_size = RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START;
    6.37 +        mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
    6.38 +        if ( m2p_compat_vstart + mpt_size < MACH2PHYS_COMPAT_VIRT_END )
    6.39 +            m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size;
    6.40 +        for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
    6.41 +        {
    6.42 +            if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
    6.43 +                goto nomem;
    6.44 +            map_pages_to_xen(
    6.45 +                RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
    6.46 +                page_to_mfn(l1_pg),
    6.47 +                1UL << PAGETABLE_ORDER,
    6.48 +                PAGE_HYPERVISOR);
    6.49 +            memset((void *)(RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)),
    6.50 +                   0x55,
    6.51 +                   1UL << L2_PAGETABLE_SHIFT);
    6.52 +            /* NB. Cannot be GLOBAL as the pt entries get copied into per-VM space. */
    6.53 +            l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT));
    6.54 +            l2_ro_mpt++;
    6.55 +        }
    6.56 +    }
    6.57 +#endif
    6.58 +
    6.59      /* Set up linear page table mapping. */
    6.60      l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)],
    6.61                l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR));
    6.62 @@ -182,6 +227,30 @@ void subarch_init_memory(void)
    6.63              share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
    6.64          }
    6.65      }
    6.66 +#ifdef CONFIG_COMPAT
    6.67 +    if ( !compat_disabled )
    6.68 +    {
    6.69 +        for ( v  = RDWR_COMPAT_MPT_VIRT_START;
    6.70 +              v != RDWR_COMPAT_MPT_VIRT_END;
    6.71 +              v += 1 << L2_PAGETABLE_SHIFT )
    6.72 +        {
    6.73 +            l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
    6.74 +                l3_table_offset(v)];
    6.75 +            if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
    6.76 +                continue;
    6.77 +            l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
    6.78 +            if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
    6.79 +                continue;
    6.80 +            m2p_start_mfn = l2e_get_pfn(l2e);
    6.81 +
    6.82 +            for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
    6.83 +            {
    6.84 +                struct page_info *page = mfn_to_page(m2p_start_mfn + i);
    6.85 +                share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
    6.86 +            }
    6.87 +        }
    6.88 +    }
    6.89 +#endif
    6.90  }
    6.91  
    6.92  long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
    6.93 @@ -189,7 +258,8 @@ long subarch_memory_op(int op, XEN_GUEST
    6.94      struct xen_machphys_mfn_list xmml;
    6.95      l3_pgentry_t l3e;
    6.96      l2_pgentry_t l2e;
    6.97 -    unsigned long mfn, v;
    6.98 +    unsigned long v;
    6.99 +    xen_pfn_t mfn;
   6.100      unsigned int i;
   6.101      long rc = 0;
   6.102  
     7.1 --- a/xen/common/compat/kernel.c	Fri Jan 05 17:34:29 2007 +0000
     7.2 +++ b/xen/common/compat/kernel.c	Fri Jan 05 17:34:30 2007 +0000
     7.3 @@ -27,7 +27,7 @@ CHECK_TYPE(capabilities_info);
     7.4  #define xen_platform_parameters compat_platform_parameters
     7.5  #define xen_platform_parameters_t compat_platform_parameters_t
     7.6  #undef HYPERVISOR_VIRT_START
     7.7 -#define HYPERVISOR_VIRT_START HYPERVISOR_COMPAT_VIRT_START
     7.8 +#define HYPERVISOR_VIRT_START HYPERVISOR_COMPAT_VIRT_START(current->domain)
     7.9  
    7.10  #define xen_changeset_info compat_changeset_info
    7.11  #define xen_changeset_info_t compat_changeset_info_t
     8.1 --- a/xen/include/asm-x86/config.h	Fri Jan 05 17:34:29 2007 +0000
     8.2 +++ b/xen/include/asm-x86/config.h	Fri Jan 05 17:34:30 2007 +0000
     8.3 @@ -129,7 +129,11 @@
     8.4   *    Page-frame information array.
     8.5   *  0xffff828800000000 - 0xffff828bffffffff [16GB,  2^34 bytes, PML4:261]
     8.6   *    ioremap()/fixmap area.
     8.7 - *  0xffff828c00000000 - 0xffff82ffffffffff [464GB,             PML4:261]
     8.8 + *  0xffff828c00000000 - 0xffff828c3fffffff [1GB,   2^30 bytes, PML4:261]
     8.9 + *    Compatibility machine-to-phys translation table.
    8.10 + *  0xffff828c40000000 - 0xffff828c7fffffff [1GB,   2^30 bytes, PML4:261]
    8.11 + *    High read-only compatibility machine-to-phys translation table.
    8.12 + *  0xffff828c80000000 - 0xffff82ffffffffff [462GB,             PML4:261]
    8.13   *    Reserved for future use.
    8.14   *  0xffff830000000000 - 0xffff83ffffffffff [1TB,   2^40 bytes, PML4:262-263]
    8.15   *    1:1 direct mapping of all physical memory. Xen and its heap live here.
    8.16 @@ -178,17 +182,33 @@
    8.17  /* Slot 261: ioremap()/fixmap area (16GB). */
    8.18  #define IOREMAP_VIRT_START      (FRAMETABLE_VIRT_END)
    8.19  #define IOREMAP_VIRT_END        (IOREMAP_VIRT_START + (16UL<<30))
    8.20 +/* Slot 261: compatibility machine-to-phys conversion table (1GB). */
    8.21 +#define RDWR_COMPAT_MPT_VIRT_START IOREMAP_VIRT_END
    8.22 +#define RDWR_COMPAT_MPT_VIRT_END (RDWR_COMPAT_MPT_VIRT_START + (1UL << 30))
    8.23 +/* Slot 261: high read-only compatibility machine-to-phys conversion table (1GB). */
    8.24 +#define HIRO_COMPAT_MPT_VIRT_START RDWR_COMPAT_MPT_VIRT_END
    8.25 +#define HIRO_COMPAT_MPT_VIRT_END (HIRO_COMPAT_MPT_VIRT_START + (1UL << 30))
    8.26  /* Slot 262-263: A direct 1:1 mapping of all of physical memory. */
    8.27  #define DIRECTMAP_VIRT_START    (PML4_ADDR(262))
    8.28  #define DIRECTMAP_VIRT_END      (DIRECTMAP_VIRT_START + PML4_ENTRY_BYTES*2)
    8.29  
    8.30 +#ifndef __ASSEMBLY__
    8.31 +
    8.32 +/* This is not a fixed value, just a lower limit. */
    8.33  #define __HYPERVISOR_COMPAT_VIRT_START 0xF5800000
    8.34 -#define HYPERVISOR_COMPAT_VIRT_START   \
    8.35 -    mk_unsigned_long(__HYPERVISOR_COMPAT_VIRT_START)
    8.36 +#define HYPERVISOR_COMPAT_VIRT_START(d) ((d)->arch.hv_compat_vstart)
    8.37  #define MACH2PHYS_COMPAT_VIRT_START    HYPERVISOR_COMPAT_VIRT_START
    8.38  #define MACH2PHYS_COMPAT_VIRT_END      0xFFE00000
    8.39 -#define MACH2PHYS_COMPAT_NR_ENTRIES    \
    8.40 -    ((MACH2PHYS_COMPAT_VIRT_END-MACH2PHYS_COMPAT_VIRT_START)>>2)
    8.41 +#define MACH2PHYS_COMPAT_NR_ENTRIES(d) \
    8.42 +    ((MACH2PHYS_COMPAT_VIRT_END-MACH2PHYS_COMPAT_VIRT_START(d))>>2)
    8.43 +
    8.44 +#define COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d) \
    8.45 +    l2_table_offset(HYPERVISOR_COMPAT_VIRT_START(d))
    8.46 +#define COMPAT_L2_PAGETABLE_LAST_XEN_SLOT  l2_table_offset(~0U)
    8.47 +#define COMPAT_L2_PAGETABLE_XEN_SLOTS(d) \
    8.48 +    (COMPAT_L2_PAGETABLE_LAST_XEN_SLOT - COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d) + 1)
    8.49 +
    8.50 +#endif
    8.51  
    8.52  #define PGT_base_page_table     PGT_l4_page_table
    8.53  
     9.1 --- a/xen/include/asm-x86/domain.h	Fri Jan 05 17:34:29 2007 +0000
     9.2 +++ b/xen/include/asm-x86/domain.h	Fri Jan 05 17:34:30 2007 +0000
     9.3 @@ -98,6 +98,10 @@ struct arch_domain
     9.4      struct mapcache mapcache;
     9.5  #endif
     9.6  
     9.7 +#ifdef CONFIG_COMPAT
     9.8 +    unsigned int hv_compat_vstart;
     9.9 +#endif
    9.10 +
    9.11      /* I/O-port admin-specified access capabilities. */
    9.12      struct rangeset *ioport_caps;
    9.13  
    10.1 --- a/xen/include/asm-x86/mm.h	Fri Jan 05 17:34:29 2007 +0000
    10.2 +++ b/xen/include/asm-x86/mm.h	Fri Jan 05 17:34:30 2007 +0000
    10.3 @@ -257,7 +257,16 @@ int check_descriptor(const struct domain
    10.4  #define INVALID_M2P_ENTRY        (~0UL)
    10.5  #define VALID_M2P(_e)            (!((_e) & (1UL<<(BITS_PER_LONG-1))))
    10.6  
    10.7 +#ifdef CONFIG_COMPAT
    10.8 +#define compat_machine_to_phys_mapping ((unsigned int *)RDWR_COMPAT_MPT_VIRT_START)
    10.9 +#define set_gpfn_from_mfn(mfn, pfn) \
   10.10 +    ((void)(compat_disabled || \
   10.11 +            (mfn) >= (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) / 4 || \
   10.12 +            (compat_machine_to_phys_mapping[(mfn)] = (unsigned int)(pfn))), \
   10.13 +     machine_to_phys_mapping[(mfn)] = (pfn))
   10.14 +#else
   10.15  #define set_gpfn_from_mfn(mfn, pfn) (machine_to_phys_mapping[(mfn)] = (pfn))
   10.16 +#endif
   10.17  #define get_gpfn_from_mfn(mfn)      (machine_to_phys_mapping[(mfn)])
   10.18  
   10.19  
    11.1 --- a/xen/include/asm-x86/page.h	Fri Jan 05 17:34:29 2007 +0000
    11.2 +++ b/xen/include/asm-x86/page.h	Fri Jan 05 17:34:30 2007 +0000
    11.3 @@ -206,6 +206,7 @@ typedef struct { u32 pfn; } pagetable_t;
    11.4  typedef struct { u64 pfn; } pagetable_t;
    11.5  #endif
    11.6  #define pagetable_get_paddr(x)  ((paddr_t)(x).pfn << PAGE_SHIFT)
    11.7 +#define pagetable_get_page(x)   mfn_to_page((x).pfn)
    11.8  #define pagetable_get_pfn(x)    ((x).pfn)
    11.9  #define pagetable_is_null(x)    ((x).pfn == 0)
   11.10  #define pagetable_from_pfn(pfn) ((pagetable_t) { (pfn) })
   11.11 @@ -287,6 +288,10 @@ extern l2_pgentry_t   idle_pg_table_l2[R
   11.12  #else
   11.13  extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
   11.14  extern l2_pgentry_t   idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES];
   11.15 +#ifdef CONFIG_COMPAT
   11.16 +extern l2_pgentry_t  *compat_idle_pg_table_l2;
   11.17 +extern unsigned int   m2p_compat_vstart;
   11.18 +#endif
   11.19  #endif
   11.20  void paging_init(void);
   11.21  void setup_idle_pagetable(void);
    12.1 --- a/xen/include/asm-x86/x86_32/page-2level.h	Fri Jan 05 17:34:29 2007 +0000
    12.2 +++ b/xen/include/asm-x86/x86_32/page-2level.h	Fri Jan 05 17:34:30 2007 +0000
    12.3 @@ -42,7 +42,7 @@ typedef l2_pgentry_t root_pgentry_t;
    12.4  
    12.5  /* misc */
    12.6  #define is_guest_l1_slot(_s)    (1)
    12.7 -#define is_guest_l2_slot(_t,_s) ((_s) < L2_PAGETABLE_FIRST_XEN_SLOT)
    12.8 +#define is_guest_l2_slot(_d, _t,_s) ((_s) < L2_PAGETABLE_FIRST_XEN_SLOT)
    12.9  
   12.10  /*
   12.11   * PTE pfn and flags:
    13.1 --- a/xen/include/asm-x86/x86_32/page-3level.h	Fri Jan 05 17:34:29 2007 +0000
    13.2 +++ b/xen/include/asm-x86/x86_32/page-3level.h	Fri Jan 05 17:34:30 2007 +0000
    13.3 @@ -67,7 +67,7 @@ typedef l3_pgentry_t root_pgentry_t;
    13.4  
    13.5  /* misc */
    13.6  #define is_guest_l1_slot(s)    (1)
    13.7 -#define is_guest_l2_slot(t,s)                                              \
    13.8 +#define is_guest_l2_slot(d,t,s)                                            \
    13.9      ( !((t) & PGT_pae_xen_l2) ||                                           \
   13.10        ((s) < (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) )
   13.11  #define is_guest_l3_slot(s)    (1)
    14.1 --- a/xen/include/asm-x86/x86_64/page.h	Fri Jan 05 17:34:29 2007 +0000
    14.2 +++ b/xen/include/asm-x86/x86_64/page.h	Fri Jan 05 17:34:30 2007 +0000
    14.3 @@ -54,7 +54,10 @@ typedef l4_pgentry_t root_pgentry_t;
    14.4  #define l4_linear_offset(_a) (((_a) & VADDR_MASK) >> L4_PAGETABLE_SHIFT)
    14.5  
    14.6  #define is_guest_l1_slot(_s) (1)
    14.7 -#define is_guest_l2_slot(_t, _s) (1)
    14.8 +#define is_guest_l2_slot(_d, _t, _s)                   \
    14.9 +    ( !IS_COMPAT(_d) ||                                \
   14.10 +      !((_t) & PGT_pae_xen_l2) ||                      \
   14.11 +      ((_s) < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_d)) )
   14.12  #define is_guest_l3_slot(_s) (1)
   14.13  #define is_guest_l4_slot(_s)                   \
   14.14      (((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT) || \
   14.15 @@ -93,6 +96,8 @@ typedef l4_pgentry_t root_pgentry_t;
   14.16  #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */)
   14.17  #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK | 0x180U /* must-be-zero */)
   14.18  
   14.19 +#define COMPAT_L3_DISALLOW_MASK 0xFFFFF1E6U /* must-be-zero */
   14.20 +
   14.21  #define PAGE_HYPERVISOR         (__PAGE_HYPERVISOR         | _PAGE_GLOBAL)
   14.22  #define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL)
   14.23  
    15.1 --- a/xen/include/asm-x86/x86_64/uaccess.h	Fri Jan 05 17:34:29 2007 +0000
    15.2 +++ b/xen/include/asm-x86/x86_64/uaccess.h	Fri Jan 05 17:34:30 2007 +0000
    15.3 @@ -18,7 +18,7 @@
    15.4  #ifdef CONFIG_COMPAT
    15.5  
    15.6  #define __compat_addr_ok(addr) \
    15.7 -    ((unsigned long)(addr) < HYPERVISOR_COMPAT_VIRT_START)
    15.8 +    ((unsigned long)(addr) < HYPERVISOR_COMPAT_VIRT_START(current->domain))
    15.9  
   15.10  #define compat_access_ok(addr, size) __compat_addr_ok((addr) + (size))
   15.11