ia64/xen-unstable

changeset 11381:43ec7afa5734

[XEN][POWERPC] Lots of domain page managment cleanups.
Signed-off-by: Jimi Xenidis <jimix@watson.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
author Jimi Xenidis <jimix@watson.ibm.com>
date Fri Aug 25 15:28:48 2006 -0400 (2006-08-25)
parents 7825169895d0
children 215d5eae720c
files xen/arch/powerpc/domain.c xen/arch/powerpc/domain_build.c xen/arch/powerpc/iommu.c xen/arch/powerpc/mm.c xen/arch/powerpc/papr/xlate.c xen/arch/powerpc/powerpc64/ppc970.c xen/include/asm-powerpc/domain.h xen/include/asm-powerpc/mm.h xen/include/asm-powerpc/processor.h
line diff
     1.1 --- a/xen/arch/powerpc/domain.c	Fri Aug 25 14:34:51 2006 -0400
     1.2 +++ b/xen/arch/powerpc/domain.c	Fri Aug 25 15:28:48 2006 -0400
     1.3 @@ -27,6 +27,7 @@
     1.4  #include <xen/domain.h>
     1.5  #include <xen/console.h>
     1.6  #include <xen/shutdown.h>
     1.7 +#include <xen/mm.h>
     1.8  #include <asm/htab.h>
     1.9  #include <asm/current.h>
    1.10  #include <asm/hcalls.h>
    1.11 @@ -76,6 +77,7 @@ int arch_domain_create(struct domain *d)
    1.12      unsigned long rma_base;
    1.13      unsigned long rma_sz;
    1.14      uint htab_order;
    1.15 +    uint nr_pages;
    1.16  
    1.17      if (d->domain_id == IDLE_DOMAIN_ID) {
    1.18          d->shared_info = (void *)alloc_xenheap_page();
    1.19 @@ -88,11 +90,13 @@ int arch_domain_create(struct domain *d)
    1.20      rma_sz = rma_size(d->arch.rma_order);
    1.21  
    1.22      /* allocate the real mode area */
    1.23 -    d->max_pages = 1UL << d->arch.rma_order;
    1.24 +    nr_pages =  1UL << d->arch.rma_order;
    1.25 +    d->max_pages = nr_pages;
    1.26      d->tot_pages = 0;
    1.27      d->arch.rma_page = alloc_domheap_pages(d, d->arch.rma_order, 0);
    1.28      if (NULL == d->arch.rma_page)
    1.29          return 1;
    1.30 +
    1.31      rma_base = page_to_maddr(d->arch.rma_page);
    1.32  
    1.33      BUG_ON(rma_base & (rma_sz - 1)); /* check alignment */
    1.34 @@ -103,8 +107,8 @@ int arch_domain_create(struct domain *d)
    1.35      d->shared_info = (shared_info_t *)
    1.36          (rma_addr(&d->arch, RMA_SHARED_INFO) + rma_base);
    1.37  
    1.38 -    d->arch.large_page_sizes = 1;
    1.39 -    d->arch.large_page_shift[0] = 24; /* 16 M for 970s */
    1.40 +    d->arch.large_page_sizes = cpu_large_page_orders(
    1.41 +        d->arch.large_page_order, ARRAY_SIZE(d->arch.large_page_order));
    1.42  
    1.43      /* FIXME: we need to the the maximum addressible memory for this
    1.44       * domain to calculate this correctly. It should probably be set
     2.1 --- a/xen/arch/powerpc/domain_build.c	Fri Aug 25 14:34:51 2006 -0400
     2.2 +++ b/xen/arch/powerpc/domain_build.c	Fri Aug 25 15:28:48 2006 -0400
     2.3 @@ -34,18 +34,22 @@ extern int parseelfimage_32(struct domai
     2.4  extern int loadelfimage_32(struct domain_setup_info *dsi);
     2.5  
     2.6  /* opt_dom0_mem: memory allocated to domain 0. */
     2.7 -static unsigned int opt_dom0_mem;
     2.8 +static unsigned int dom0_nrpages;
     2.9  static void parse_dom0_mem(char *s)
    2.10  {
    2.11 -    unsigned long long bytes = parse_size_and_unit(s);
    2.12 -    /* If no unit is specified we default to kB units, not bytes. */
    2.13 -    if (isdigit(s[strlen(s)-1]))
    2.14 -        opt_dom0_mem = (unsigned int)bytes;
    2.15 -    else
    2.16 -        opt_dom0_mem = (unsigned int)(bytes >> 10);
    2.17 +    unsigned long long bytes;
    2.18 +
    2.19 +    bytes = parse_size_and_unit(s);
    2.20 +    dom0_nrpages = bytes >> PAGE_SHIFT;
    2.21  }
    2.22  custom_param("dom0_mem", parse_dom0_mem);
    2.23  
    2.24 +static unsigned int opt_dom0_max_vcpus;
    2.25 +integer_param("dom0_max_vcpus", opt_dom0_max_vcpus);
    2.26 +
    2.27 +static unsigned int opt_dom0_shadow;
    2.28 +boolean_param("dom0_shadow", opt_dom0_shadow);
    2.29 +
    2.30  int elf_sanity_check(Elf_Ehdr *ehdr)
    2.31  {
    2.32      if (IS_ELF(*ehdr))
    2.33 @@ -146,8 +150,14 @@ int construct_dom0(struct domain *d,
    2.34  
    2.35      /* By default DOM0 is allocated all available memory. */
    2.36      d->max_pages = ~0U;
    2.37 -    d->tot_pages = 1UL << d->arch.rma_order;
    2.38  
    2.39 +    if (dom0_nrpages == 0) {
    2.40 +        dom0_nrpages = 1UL << d->arch.rma_order;
    2.41 +    }
    2.42 +
    2.43 +    d->tot_pages = dom0_nrpages;
    2.44 +    ASSERT(d->tot_pages > 0);
    2.45 +    
    2.46      ASSERT( image_len < rma_sz );
    2.47  
    2.48      si = (start_info_t *)(rma_addr(&d->arch, RMA_START_INFO) + rma);
    2.49 @@ -162,10 +172,6 @@ int construct_dom0(struct domain *d,
    2.50  
    2.51      eomem = si->shared_info;
    2.52  
    2.53 -    /* allow dom0 to access all of system RAM */
    2.54 -    d->arch.logical_base_pfn = 128 << (20 - PAGE_SHIFT); /* 128 MB */
    2.55 -    d->arch.logical_end_pfn = max_page;
    2.56 -
    2.57      /* number of pages accessible */
    2.58      si->nr_pages = rma_sz >> PAGE_SHIFT;
    2.59  
     3.1 --- a/xen/arch/powerpc/iommu.c	Fri Aug 25 14:34:51 2006 -0400
     3.2 +++ b/xen/arch/powerpc/iommu.c	Fri Aug 25 15:28:48 2006 -0400
     3.3 @@ -52,17 +52,14 @@ int iommu_put(u32 buid, ulong ioba, unio
     3.4  
     3.5          pfn = tce.tce_bits.tce_rpn;
     3.6          mfn = pfn2mfn(d, pfn, &mtype);
     3.7 -        if (mtype != 0) {
     3.8 -            panic("we don't do non-RMO memory yet\n");
     3.9 -        }
    3.10 -
    3.11 +        if (mfn > 0) {
    3.12  #ifdef DEBUG
    3.13 -        printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
    3.14 -               ioba, pfn, mfn);
    3.15 +            printk("%s: ioba=0x%lx pfn=0x%lx mfn=0x%lx\n", __func__,
    3.16 +                   ioba, pfn, mfn);
    3.17  #endif
    3.18 -        tce.tce_bits.tce_rpn = mfn;
    3.19 -
    3.20 -        return iommu_phbs[buid].iommu_put(ioba, tce);
    3.21 +            tce.tce_bits.tce_rpn = mfn;
    3.22 +            return iommu_phbs[buid].iommu_put(ioba, tce);
    3.23 +        }
    3.24      }
    3.25      return -1;
    3.26  }
     4.1 --- a/xen/arch/powerpc/mm.c	Fri Aug 25 14:34:51 2006 -0400
     4.2 +++ b/xen/arch/powerpc/mm.c	Fri Aug 25 15:28:48 2006 -0400
     4.3 @@ -13,9 +13,10 @@
     4.4   * along with this program; if not, write to the Free Software
     4.5   * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
     4.6   *
     4.7 - * Copyright (C) IBM Corp. 2005
     4.8 + * Copyright (C) IBM Corp. 2005, 2006
     4.9   *
    4.10   * Authors: Hollis Blanchard <hollisb@us.ibm.com>
    4.11 + *          Jimi Xenidis <jimix@watson.ibm.com>
    4.12   */
    4.13  
    4.14  #include <xen/config.h>
    4.15 @@ -23,10 +24,19 @@
    4.16  #include <xen/shadow.h>
    4.17  #include <xen/kernel.h>
    4.18  #include <xen/sched.h>
    4.19 +#include <xen/perfc.h>
    4.20  #include <asm/misc.h>
    4.21  #include <asm/init.h>
    4.22  #include <asm/page.h>
    4.23  
    4.24 +#ifdef VERBOSE
    4.25 +#define MEM_LOG(_f, _a...)                                  \
    4.26 +  printk("DOM%u: (file=mm.c, line=%d) " _f "\n",            \
    4.27 +         current->domain->domain_id , __LINE__ , ## _a )
    4.28 +#else
    4.29 +#define MEM_LOG(_f, _a...) ((void)0)
    4.30 +#endif
    4.31 +
    4.32  /* Frame table and its size in pages. */
    4.33  struct page_info *frame_table;
    4.34  unsigned long frame_table_size;
    4.35 @@ -53,16 +63,128 @@ int steal_page(struct domain *d, struct 
    4.36      return 1;
    4.37  }
    4.38  
    4.39 -
    4.40 -int get_page_type(struct page_info *page, u32 type)
    4.41 +void put_page_type(struct page_info *page)
    4.42  {
    4.43 -    panic("%s called\n", __func__);
    4.44 -    return 1;
    4.45 +    unsigned long nx, x, y = page->u.inuse.type_info;
    4.46 +
    4.47 +    do {
    4.48 +        x  = y;
    4.49 +        nx = x - 1;
    4.50 +
    4.51 +        ASSERT((x & PGT_count_mask) != 0);
    4.52 +
    4.53 +        /*
    4.54 +         * The page should always be validated while a reference is held. The 
    4.55 +         * exception is during domain destruction, when we forcibly invalidate 
    4.56 +         * page-table pages if we detect a referential loop.
    4.57 +         * See domain.c:relinquish_list().
    4.58 +         */
    4.59 +        ASSERT((x & PGT_validated) || 
    4.60 +               test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
    4.61 +
    4.62 +        if ( unlikely((nx & PGT_count_mask) == 0) )
    4.63 +        {
    4.64 +            /* Record TLB information for flush later. */
    4.65 +            page->tlbflush_timestamp = tlbflush_current_time();
    4.66 +        }
    4.67 +        else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == 
    4.68 +                           (PGT_pinned | 1)) )
    4.69 +        {
    4.70 +            /* Page is now only pinned. Make the back pointer mutable again. */
    4.71 +            nx |= PGT_va_mutable;
    4.72 +        }
    4.73 +    }
    4.74 +    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
    4.75  }
    4.76  
    4.77 -void put_page_type(struct page_info *page)
    4.78 +
    4.79 +int get_page_type(struct page_info *page, unsigned long type)
    4.80  {
    4.81 -    panic("%s called\n", __func__);
    4.82 +    unsigned long nx, x, y = page->u.inuse.type_info;
    4.83 +
    4.84 + again:
    4.85 +    do {
    4.86 +        x  = y;
    4.87 +        nx = x + 1;
    4.88 +        if ( unlikely((nx & PGT_count_mask) == 0) )
    4.89 +        {
    4.90 +            MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
    4.91 +            return 0;
    4.92 +        }
    4.93 +        else if ( unlikely((x & PGT_count_mask) == 0) )
    4.94 +        {
    4.95 +            if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
    4.96 +            {
    4.97 +                if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
    4.98 +                {
    4.99 +                    /*
   4.100 +                     * On type change we check to flush stale TLB
   4.101 +                     * entries. This may be unnecessary (e.g., page
   4.102 +                     * was GDT/LDT) but those circumstances should be
   4.103 +                     * very rare.
   4.104 +                     */
   4.105 +                    cpumask_t mask =
   4.106 +                        page_get_owner(page)->domain_dirty_cpumask;
   4.107 +                    tlbflush_filter(mask, page->tlbflush_timestamp);
   4.108 +
   4.109 +                    if ( unlikely(!cpus_empty(mask)) )
   4.110 +                    {
   4.111 +                        perfc_incrc(need_flush_tlb_flush);
   4.112 +                        flush_tlb_mask(mask);
   4.113 +                    }
   4.114 +                }
   4.115 +
   4.116 +                /* We lose existing type, back pointer, and validity. */
   4.117 +                nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
   4.118 +                nx |= type;
   4.119 +
   4.120 +                /* No special validation needed for writable pages. */
   4.121 +                /* Page tables and GDT/LDT need to be scanned for validity. */
   4.122 +                if ( type == PGT_writable_page )
   4.123 +                    nx |= PGT_validated;
   4.124 +            }
   4.125 +        }
   4.126 +        else
   4.127 +        {
   4.128 +            if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
   4.129 +            {
   4.130 +                if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
   4.131 +                {
   4.132 +                    return 0;
   4.133 +                }
   4.134 +                else if ( (x & PGT_va_mask) == PGT_va_mutable )
   4.135 +                {
   4.136 +                    /* The va backpointer is mutable, hence we update it. */
   4.137 +                    nx &= ~PGT_va_mask;
   4.138 +                    nx |= type; /* we know the actual type is correct */
   4.139 +                }
   4.140 +                else if ( (type & PGT_va_mask) != PGT_va_mutable )
   4.141 +                {
   4.142 +                    ASSERT((type & PGT_va_mask) != (x & PGT_va_mask));
   4.143 +
   4.144 +                    /* This table is possibly mapped at multiple locations. */
   4.145 +                    nx &= ~PGT_va_mask;
   4.146 +                    nx |= PGT_va_unknown;
   4.147 +                }
   4.148 +            }
   4.149 +            if ( unlikely(!(x & PGT_validated)) )
   4.150 +            {
   4.151 +                /* Someone else is updating validation of this page. Wait... */
   4.152 +                while ( (y = page->u.inuse.type_info) == x )
   4.153 +                    cpu_relax();
   4.154 +                goto again;
   4.155 +            }
   4.156 +        }
   4.157 +    }
   4.158 +    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
   4.159 +
   4.160 +    if ( unlikely(!(nx & PGT_validated)) )
   4.161 +    {
   4.162 +        /* Noone else is updating simultaneously. */
   4.163 +        __set_bit(_PGT_validated, &page->u.inuse.type_info);
   4.164 +    }
   4.165 +
   4.166 +    return 1;
   4.167  }
   4.168  
   4.169  void __init init_frametable(void)
   4.170 @@ -107,44 +229,50 @@ extern void copy_page(void *dp, void *sp
   4.171      }
   4.172  }
   4.173  
   4.174 +static int mfn_in_hole(ulong mfn)
   4.175 +{
   4.176 +    /* totally cheating */
   4.177 +    if (mfn >= (0xf0000000UL >> PAGE_SHIFT) &&
   4.178 +        mfn < (((1UL << 32) - 1) >> PAGE_SHIFT))
   4.179 +        return 1;
   4.180 +
   4.181 +    return 0;
   4.182 +}
   4.183 +
   4.184  ulong pfn2mfn(struct domain *d, long pfn, int *type)
   4.185  {
   4.186      ulong rma_base_mfn = page_to_mfn(d->arch.rma_page);
   4.187      ulong rma_size_mfn = 1UL << d->arch.rma_order;
   4.188 -    ulong mfn;
   4.189 -    int t;
   4.190  
   4.191      if (pfn < rma_size_mfn) {
   4.192 -        mfn = pfn + rma_base_mfn;
   4.193 -        t = PFN_TYPE_RMA;
   4.194 -    } else if (pfn >= d->arch.logical_base_pfn &&
   4.195 -               pfn < d->arch.logical_end_pfn) {
   4.196 -        if (test_bit(_DOMF_privileged, &d->domain_flags)) {
   4.197 -            /* This hack allows dom0 to map all memory, necessary to
   4.198 -             * initialize domU state. */
   4.199 -            mfn = pfn;
   4.200 -        } else {
   4.201 -            panic("we do not handle the logical area yet\n");
   4.202 -            mfn = 0;
   4.203 -        }
   4.204 -
   4.205 -        t = PFN_TYPE_LOGICAL;
   4.206 -    } else {
   4.207 -        /* don't know */
   4.208 -        mfn = pfn;
   4.209 -        t = PFN_TYPE_IO;
   4.210 +        if (type)
   4.211 +            *type = PFN_TYPE_RMA;
   4.212 +        return pfn + rma_base_mfn;
   4.213      }
   4.214  
   4.215 -    if (type != NULL)
   4.216 -        *type = t;
   4.217 +    if (test_bit(_DOMF_privileged, &d->domain_flags) &&
   4.218 +        mfn_in_hole(pfn)) {
   4.219 +        if (type)
   4.220 +            *type = PFN_TYPE_IO;
   4.221 +        return pfn;
   4.222 +    }
   4.223  
   4.224 -    return mfn;
   4.225 +    /* This hack allows dom0 to map all memory, necessary to
   4.226 +     * initialize domU state. */
   4.227 +    if (test_bit(_DOMF_privileged, &d->domain_flags)) {
   4.228 +        if (type)
   4.229 +            *type = PFN_TYPE_REMOTE;
   4.230 +        return pfn;
   4.231 +    }
   4.232 +
   4.233 +    BUG();
   4.234 +    return 0;
   4.235  }
   4.236  
   4.237  void guest_physmap_add_page(
   4.238      struct domain *d, unsigned long gpfn, unsigned long mfn)
   4.239  {
   4.240 -    panic("%s\n", __func__);
   4.241 +    printk("%s(%d, 0x%lx, 0x%lx)\n", __func__, d->domain_id, gpfn, mfn);
   4.242  }
   4.243  void guest_physmap_remove_page(
   4.244      struct domain *d, unsigned long gpfn, unsigned long mfn)
     5.1 --- a/xen/arch/powerpc/papr/xlate.c	Fri Aug 25 14:34:51 2006 -0400
     5.2 +++ b/xen/arch/powerpc/papr/xlate.c	Fri Aug 25 15:28:48 2006 -0400
     5.3 @@ -154,13 +154,13 @@ static void h_enter(struct cpu_user_regs
     5.4          }
     5.5  
     5.6          /* get correct pgshift value */
     5.7 -        pgshift = d->arch.large_page_shift[lp_size];
     5.8 +        pgshift = d->arch.large_page_order[lp_size] + PAGE_SHIFT;
     5.9      }
    5.10  
    5.11      /* get the correct logical RPN in terms of 4K pages need to mask
    5.12       * off lp bits and unused arpn bits if this is a large page */
    5.13  
    5.14 -    lpn = ~0ULL << (pgshift - 12);
    5.15 +    lpn = ~0ULL << (pgshift - PAGE_SHIFT);
    5.16      lpn = pte.bits.rpn & lpn;
    5.17  
    5.18      rpn = pfn2mfn(d, lpn, &mtype);
     6.1 --- a/xen/arch/powerpc/powerpc64/ppc970.c	Fri Aug 25 14:34:51 2006 -0400
     6.2 +++ b/xen/arch/powerpc/powerpc64/ppc970.c	Fri Aug 25 15:28:48 2006 -0400
     6.3 @@ -41,6 +41,17 @@ unsigned int cpu_rma_order(void)
     6.4      return rma_log_size - PAGE_SHIFT;
     6.5  }
     6.6  
     6.7 +unsigned int cpu_large_page_orders(uint *sizes, uint max)
     6.8 +{
     6.9 +    uint lp_log_size = 4 + 20; /* (1 << 4) == 16M */
    6.10 +    if (max < 1)
    6.11 +        return 0;
    6.12 +
    6.13 +    sizes[0] = lp_log_size - PAGE_SHIFT;
    6.14 +
    6.15 +    return 1;
    6.16 +}    
    6.17 +
    6.18  void cpu_initialize(int cpuid)
    6.19  {
    6.20      ulong r1, r2;
     7.1 --- a/xen/include/asm-powerpc/domain.h	Fri Aug 25 14:34:51 2006 -0400
     7.2 +++ b/xen/include/asm-powerpc/domain.h	Fri Aug 25 15:28:48 2006 -0400
     7.3 @@ -38,15 +38,11 @@ struct arch_domain {
     7.4      struct page_info *rma_page;
     7.5      uint rma_order;
     7.6  
     7.7 -    /* This is regular memory, only available thru translataion */
     7.8 -    ulong logical_base_pfn;
     7.9 -    ulong logical_end_pfn;
    7.10 -
    7.11      /* I/O-port access bitmap mask. */
    7.12      u8 *iobmp_mask;       /* Address of IO bitmap mask, or NULL.      */
    7.13  
    7.14      uint large_page_sizes;
    7.15 -    char large_page_shift[4];
    7.16 +    uint large_page_order[4];
    7.17  } __cacheline_aligned;
    7.18  
    7.19  struct slb_entry {
     8.1 --- a/xen/include/asm-powerpc/mm.h	Fri Aug 25 14:34:51 2006 -0400
     8.2 +++ b/xen/include/asm-powerpc/mm.h	Fri Aug 25 15:28:48 2006 -0400
     8.3 @@ -24,6 +24,7 @@
     8.4  #include <public/xen.h>
     8.5  #include <xen/list.h>
     8.6  #include <xen/types.h>
     8.7 +#include <xen/mm.h>
     8.8  #include <asm/misc.h>
     8.9  #include <asm/system.h>
    8.10  #include <asm/flushtlb.h>
    8.11 @@ -33,7 +34,6 @@
    8.12  #define memguard_unguard_range(_p,_l)    ((void)0)
    8.13  
    8.14  extern unsigned long xenheap_phys_end;
    8.15 -#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
    8.16  
    8.17  /*
    8.18   * Per-page-frame information.
    8.19 @@ -43,7 +43,6 @@ extern unsigned long xenheap_phys_end;
    8.20   *  2. Provide a PFN_ORDER() macro for accessing the order of a free page.
    8.21   */
    8.22  #define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
    8.23 -#define PRtype_info "016lx"
    8.24  
    8.25  /* XXX copy-and-paste job; re-examine me */
    8.26  struct page_info
    8.27 @@ -63,7 +62,7 @@ struct page_info
    8.28          /* Page is in use: ((count_info & PGC_count_mask) != 0). */
    8.29          struct {
    8.30              /* Owner of this page (NULL if page is anonymous). */
    8.31 -            struct domain *_domain;
    8.32 +            u32 _domain;
    8.33              /* Type reference count and various PGT_xxx flags and fields. */
    8.34              unsigned long type_info;
    8.35          } inuse;
    8.36 @@ -80,80 +79,132 @@ struct page_info
    8.37  
    8.38  };
    8.39  
    8.40 +struct page_extents {
    8.41 +    /* Each frame can be threaded onto a doubly-linked list. */
    8.42 +    struct list_head pe_list;
    8.43 +
    8.44 +    /* page extent */
    8.45 +    struct page_info *pg;
    8.46 +    uint order;
    8.47 +    ulong pfn;
    8.48 +};
    8.49 +
    8.50   /* The following page types are MUTUALLY EXCLUSIVE. */
    8.51  #define PGT_none            (0<<29) /* no special uses of this page */
    8.52 -#define PGT_l1_page_table   (1<<29) /* using this page as an L1 page table? */
    8.53 -#define PGT_l2_page_table   (2<<29) /* using this page as an L2 page table? */
    8.54 -#define PGT_l3_page_table   (3<<29) /* using this page as an L3 page table? */
    8.55 -#define PGT_l4_page_table   (4<<29) /* using this page as an L4 page table? */
    8.56 -#define PGT_gdt_page        (5<<29) /* using this page in a GDT? */
    8.57 -#define PGT_ldt_page        (6<<29) /* using this page in an LDT? */
    8.58 +#define PGT_RMA             (1<<29) /* This page is an RMA page? */
    8.59  #define PGT_writable_page   (7<<29) /* has writable mappings of this page? */
    8.60  #define PGT_type_mask       (7<<29) /* Bits 29-31. */
    8.61 - /* Has this page been validated for use as its current type? */
    8.62 -#define _PGT_validated      28
    8.63 -#define PGT_validated       (1U<<_PGT_validated)
    8.64 +
    8.65   /* Owning guest has pinned this page to its current type? */
    8.66 -#define _PGT_pinned         27
    8.67 +#define _PGT_pinned         28
    8.68  #define PGT_pinned          (1U<<_PGT_pinned)
    8.69 - /* The 10 most significant bits of virt address if this is a page table. */
    8.70 -#define PGT_va_shift        17
    8.71 -#define PGT_va_mask         (((1U<<10)-1)<<PGT_va_shift)
    8.72 + /* Has this page been validated for use as its current type? */
    8.73 +#define _PGT_validated      27
    8.74 +#define PGT_validated       (1U<<_PGT_validated)
    8.75 +
    8.76 + /* The 27 most significant bits of virt address if this is a page table. */
    8.77 +#define PGT_va_shift        32
    8.78 +#define PGT_va_mask         ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
    8.79   /* Is the back pointer still mutable (i.e. not fixed yet)? */
    8.80 -#define PGT_va_mutable      (((1U<<10)-1)<<PGT_va_shift)
    8.81 +#define PGT_va_mutable      ((unsigned long)((1U<<28)-1)<<PGT_va_shift)
    8.82   /* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
    8.83 -#define PGT_va_unknown      (((1U<<10)-2)<<PGT_va_shift)
    8.84 - /* 17-bit count of uses of this frame as its current type. */
    8.85 -#define PGT_count_mask      ((1U<<17)-1)
    8.86 +#define PGT_va_unknown      ((unsigned long)((1U<<28)-2)<<PGT_va_shift)
    8.87 +
    8.88 + /* 16-bit count of uses of this frame as its current type. */
    8.89 +#define PGT_count_mask      ((1U<<16)-1)
    8.90  
    8.91   /* Cleared when the owning guest 'frees' this page. */
    8.92  #define _PGC_allocated      31
    8.93  #define PGC_allocated       (1U<<_PGC_allocated)
    8.94 - /* 31-bit count of references to this frame. */
    8.95 -#define PGC_count_mask      ((1U<<31)-1)
    8.96 + /* Set on a *guest* page to mark it out-of-sync with its shadow */
    8.97 +#define _PGC_out_of_sync     30
    8.98 +#define PGC_out_of_sync     (1U<<_PGC_out_of_sync)
    8.99 + /* Set when is using a page as a page table */
   8.100 +#define _PGC_page_table      29
   8.101 +#define PGC_page_table      (1U<<_PGC_page_table)
   8.102 + /* 29-bit count of references to this frame. */
   8.103 +#define PGC_count_mask      ((1U<<29)-1)
   8.104 +
   8.105 +#define IS_XEN_HEAP_FRAME(_pfn) (page_to_maddr(_pfn) < xenheap_phys_end)
   8.106 +
   8.107 +static inline struct domain *unpickle_domptr(u32 _domain)
   8.108 +{ return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
   8.109 +
   8.110 +static inline u32 pickle_domptr(struct domain *domain)
   8.111 +{ return (domain == NULL) ? 0 : (u32)__pa(domain); }
   8.112 +
   8.113 +#define PRtype_info "016lx"/* should only be used for printk's */
   8.114 +
   8.115 +#define page_get_owner(_p)    (unpickle_domptr((_p)->u.inuse._domain))
   8.116 +#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
   8.117 +
   8.118 +extern struct page_info *frame_table;
   8.119 +extern unsigned long max_page;
   8.120 +extern unsigned long total_pages;
   8.121 +void init_frametable(void);
   8.122  
   8.123  static inline void put_page(struct page_info *page)
   8.124  {
   8.125 -#if 0
   8.126 -    int count;
   8.127 -
   8.128 -    count = atomic_dec_return(&page->count_info);
   8.129 +    u32 nx, x, y = page->count_info;
   8.130  
   8.131 -    if ( unlikely((count & PGC_count_mask) == 0) )
   8.132 +    do {
   8.133 +        x  = y;
   8.134 +        nx = x - 1;
   8.135 +    }
   8.136 +    while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
   8.137 +
   8.138 +    if ( unlikely((nx & PGC_count_mask) == 0) ) {
   8.139 +        panic("about to free page\n");
   8.140          free_domheap_page(page);
   8.141 -#else
   8.142 -    trap();
   8.143 -#endif
   8.144 +    }
   8.145  }
   8.146  
   8.147  static inline int get_page(struct page_info *page,
   8.148                             struct domain *domain)
   8.149  {
   8.150 -#if 0
   8.151 -    int count;
   8.152 -
   8.153 -    count = atomic_inc_return(&page->count_info);
   8.154 +    u32 x, nx, y = page->count_info;
   8.155 +    u32 d, nd = page->u.inuse._domain;
   8.156 +    u32 _domain = pickle_domptr(domain);
   8.157  
   8.158 -    if (((count & PGC_count_mask) == 0) ||      /* Count overflow? */
   8.159 -            ((count & PGC_count_mask) == 1) ||  /* Wasn't allocated? */
   8.160 -            ((page->domain != domain)))         /* Wrong owner? */
   8.161 -    {
   8.162 -        atomic_dec(&page->count_info);
   8.163 -        return 0;
   8.164 +    do {
   8.165 +        x  = y;
   8.166 +        nx = x + 1;
   8.167 +        d  = nd;
   8.168 +        if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
   8.169 +             unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
   8.170 +             unlikely(d != _domain) )                /* Wrong owner? */
   8.171 +        {
   8.172 +            return 0;
   8.173 +        }
   8.174 +        y = cmpxchg(&page->count_info, x, nx);
   8.175      }
   8.176 +    while ( unlikely(y != x) );
   8.177  
   8.178 -#else
   8.179 -    trap();
   8.180 -#endif
   8.181      return 1;
   8.182  }
   8.183  
   8.184 +extern void put_page_type(struct page_info *page);
   8.185 +extern int  get_page_type(struct page_info *page, unsigned long type);
   8.186 +
   8.187 +static inline void put_page_and_type(struct page_info *page)
   8.188 +{
   8.189 +    put_page_type(page);
   8.190 +    put_page(page);
   8.191 +}
   8.192 +
   8.193  static inline int get_page_and_type(struct page_info *page,
   8.194                                      struct domain *domain,
   8.195 -                                    u32 type)
   8.196 +                                    unsigned long type)
   8.197  {
   8.198 -    trap();
   8.199 -    return 1;
   8.200 +    int rc = get_page(page, domain);
   8.201 +
   8.202 +    if ( likely(rc) && unlikely(!get_page_type(page, type)) )
   8.203 +    {
   8.204 +        put_page(page);
   8.205 +        rc = 0;
   8.206 +    }
   8.207 +
   8.208 +    return rc;
   8.209  }
   8.210  
   8.211  static inline int page_is_removable(struct page_info *page)
   8.212 @@ -161,17 +212,10 @@ static inline int page_is_removable(stru
   8.213      return ((page->count_info & PGC_count_mask) == 1);
   8.214  }
   8.215  
   8.216 -int get_page_type(struct page_info *page, u32 type);
   8.217 -
   8.218  #define set_machinetophys(_mfn, _pfn) (trap(), 0)
   8.219  
   8.220  extern void synchronise_pagetables(unsigned long cpu_mask);
   8.221  
   8.222 -static inline void put_page_and_type(struct page_info *page)
   8.223 -{
   8.224 -    trap();
   8.225 -}
   8.226 -
   8.227  /* XXX don't know what this is for */
   8.228  typedef struct {
   8.229      void (*enable)(struct domain *);
   8.230 @@ -179,17 +223,10 @@ typedef struct {
   8.231  } vm_assist_info_t;
   8.232  extern vm_assist_info_t vm_assist_info[];
   8.233  
   8.234 -#define page_get_owner(_p)    ((_p)->u.inuse._domain)
   8.235 -#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = _d)
   8.236 -
   8.237  #define share_xen_page_with_guest(p, d, r) do { } while (0)
   8.238  #define share_xen_page_with_privileged_guests(p, r) do { } while (0)
   8.239  
   8.240 -extern struct page_info *frame_table;
   8.241  extern unsigned long frame_table_size;
   8.242 -extern unsigned long max_page;
   8.243 -extern unsigned long total_pages;
   8.244 -void init_frametable(void);
   8.245  
   8.246  /* hope that accesses to this will fail spectacularly */
   8.247  #define machine_to_phys_mapping ((u32 *)-1UL)
   8.248 @@ -199,12 +236,12 @@ extern int update_grant_va_mapping(unsig
   8.249                                     struct domain *,
   8.250                                     struct vcpu *);
   8.251  
   8.252 -extern void put_page_type(struct page_info *page);
   8.253 +#define PFN_TYPE_RMA 1
   8.254 +#define PFN_TYPE_LOGICAL 2
   8.255 +#define PFN_TYPE_IO 3
   8.256 +#define PFN_TYPE_REMOTE 4
   8.257  
   8.258 -#define PFN_TYPE_RMA 0
   8.259 -#define PFN_TYPE_LOGICAL 1
   8.260 -#define PFN_TYPE_IO 2
   8.261 -extern ulong pfn2mfn(struct domain *d, long mfn, int *type);
   8.262 +extern ulong pfn2mfn(struct domain *d, long pfn, int *type);
   8.263  
   8.264  /* Arch-specific portion of memory_op hypercall. */
   8.265  long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg);
     9.1 --- a/xen/include/asm-powerpc/processor.h	Fri Aug 25 14:34:51 2006 -0400
     9.2 +++ b/xen/include/asm-powerpc/processor.h	Fri Aug 25 15:28:48 2006 -0400
     9.3 @@ -40,7 +40,8 @@ struct cpu_user_regs;
     9.4  extern void show_registers(struct cpu_user_regs *);
     9.5  extern void show_execution_state(struct cpu_user_regs *);
     9.6  extern void show_backtrace(ulong sp, ulong lr, ulong pc);
     9.7 -extern unsigned int cpu_rma_order(void);
     9.8 +extern uint cpu_rma_order(void);
     9.9 +extern uint cpu_large_page_orders(uint *sizes, uint max);
    9.10  extern void cpu_initialize(int cpuid);
    9.11  extern void cpu_init_vcpu(struct vcpu *);
    9.12  extern void save_cpu_sprs(struct vcpu *);