ia64/xen-unstable

changeset 14304:18cf0c56226d

xen: Clean up heap allocator.
Move some common free/alloc code into core heap-allocator
functions. Make it clear that alloc/free can only ever be done outside
hardirq context (previously it was unsafe: could deadlock on heap_lock).
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Mar 08 14:29:09 2007 +0000 (2007-03-08)
parents a7f6392ea850
children fdbd9b91a030
files xen/common/page_alloc.c xen/common/xmalloc.c
line diff
     1.1 --- a/xen/common/page_alloc.c	Thu Mar 08 10:54:56 2007 +0000
     1.2 +++ b/xen/common/page_alloc.c	Thu Mar 08 14:29:09 2007 +0000
     1.3 @@ -49,7 +49,7 @@ string_param("badpage", opt_badpage);
     1.4   * Bit width of the DMA heap.
     1.5   */
     1.6  static unsigned int  dma_bitsize = CONFIG_DMA_BITSIZE;
     1.7 -static unsigned long max_dma_mfn = (1UL << (CONFIG_DMA_BITSIZE - PAGE_SHIFT)) - 1;
     1.8 +static unsigned long max_dma_mfn = (1UL<<(CONFIG_DMA_BITSIZE-PAGE_SHIFT))-1;
     1.9  static void parse_dma_bits(char *s)
    1.10  {
    1.11      unsigned int v = simple_strtol(s, NULL, 0);
    1.12 @@ -345,6 +345,7 @@ static struct page_info *alloc_heap_page
    1.13      unsigned int i, j, zone;
    1.14      unsigned int node = cpu_to_node(cpu), num_nodes = num_online_nodes();
    1.15      unsigned long request = 1UL << order;
    1.16 +    cpumask_t extra_cpus_mask, mask;
    1.17      struct page_info *pg;
    1.18  
    1.19      ASSERT(node >= 0);
    1.20 @@ -403,6 +404,29 @@ static struct page_info *alloc_heap_page
    1.21  
    1.22      spin_unlock(&heap_lock);
    1.23  
    1.24 +    cpus_clear(mask);
    1.25 +
    1.26 +    for ( i = 0; i < (1 << order); i++ )
    1.27 +    {
    1.28 +        /* Reference count must continuously be zero for free pages. */
    1.29 +        BUG_ON(pg[i].count_info != 0);
    1.30 +
    1.31 +        /* Add in any extra CPUs that need flushing because of this page. */
    1.32 +        cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
    1.33 +        tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
    1.34 +        cpus_or(mask, mask, extra_cpus_mask);
    1.35 +
    1.36 +        /* Initialise fields which have other uses for free pages. */
    1.37 +        pg[i].u.inuse.type_info = 0;
    1.38 +        page_set_owner(&pg[i], NULL);
    1.39 +    }
    1.40 +
    1.41 +    if ( unlikely(!cpus_empty(mask)) )
    1.42 +    {
    1.43 +        perfc_incrc(need_flush_tlb_flush);
    1.44 +        flush_tlb_mask(mask);
    1.45 +    }
    1.46 +
    1.47      return pg;
    1.48  }
    1.49  
    1.50 @@ -411,13 +435,28 @@ static void free_heap_pages(
    1.51      unsigned int zone, struct page_info *pg, unsigned int order)
    1.52  {
    1.53      unsigned long mask;
    1.54 -    unsigned int node = phys_to_nid(page_to_maddr(pg));
    1.55 +    unsigned int i, node = phys_to_nid(page_to_maddr(pg));
    1.56 +    struct domain *d;
    1.57  
    1.58      ASSERT(zone < NR_ZONES);
    1.59      ASSERT(order <= MAX_ORDER);
    1.60      ASSERT(node >= 0);
    1.61      ASSERT(node < num_online_nodes());
    1.62  
    1.63 +    for ( i = 0; i < (1 << order); i++ )
    1.64 +    {
    1.65 +        BUG_ON(pg[i].count_info != 0);
    1.66 +        if ( (d = page_get_owner(&pg[i])) != NULL )
    1.67 +        {
    1.68 +            pg[i].tlbflush_timestamp = tlbflush_current_time();
    1.69 +            pg[i].u.free.cpumask     = d->domain_dirty_cpumask;
    1.70 +        }
    1.71 +        else
    1.72 +        {
    1.73 +            cpus_clear(pg[i].u.free.cpumask);
    1.74 +        }
    1.75 +    }
    1.76 +
    1.77      spin_lock(&heap_lock);
    1.78  
    1.79      map_free(page_to_mfn(pg), 1 << order);
    1.80 @@ -554,7 +593,7 @@ void end_boot_allocator(void)
    1.81  /*
    1.82   * Scrub all unallocated pages in all heap zones. This function is more
    1.83   * convoluted than appears necessary because we do not want to continuously
    1.84 - * hold the lock or disable interrupts while scrubbing very large memory areas.
    1.85 + * hold the lock while scrubbing very large memory areas.
    1.86   */
    1.87  void scrub_heap_pages(void)
    1.88  {
    1.89 @@ -575,7 +614,7 @@ void scrub_heap_pages(void)
    1.90          if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
    1.91              printk(".");
    1.92  
    1.93 -        spin_lock_irq(&heap_lock);
    1.94 +        spin_lock(&heap_lock);
    1.95  
    1.96          /* Re-check page status with lock held. */
    1.97          if ( !allocated_in_map(mfn) )
    1.98 @@ -595,7 +634,7 @@ void scrub_heap_pages(void)
    1.99              }
   1.100          }
   1.101  
   1.102 -        spin_unlock_irq(&heap_lock);
   1.103 +        spin_unlock(&heap_lock);
   1.104      }
   1.105  
   1.106      printk("done.\n");
   1.107 @@ -609,8 +648,6 @@ void scrub_heap_pages(void)
   1.108  
   1.109  void init_xenheap_pages(paddr_t ps, paddr_t pe)
   1.110  {
   1.111 -    unsigned long flags;
   1.112 -
   1.113      ps = round_pgup(ps);
   1.114      pe = round_pgdown(pe);
   1.115      if ( pe <= ps )
   1.116 @@ -625,34 +662,22 @@ void init_xenheap_pages(paddr_t ps, padd
   1.117      if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
   1.118          pe -= PAGE_SIZE;
   1.119  
   1.120 -    local_irq_save(flags);
   1.121      init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
   1.122 -    local_irq_restore(flags);
   1.123  }
   1.124  
   1.125  
   1.126  void *alloc_xenheap_pages(unsigned int order)
   1.127  {
   1.128 -    unsigned long flags;
   1.129      struct page_info *pg;
   1.130 -    int i;
   1.131  
   1.132 -    local_irq_save(flags);
   1.133 +    ASSERT(!in_irq());
   1.134 +
   1.135      pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN, smp_processor_id(), order);
   1.136 -    local_irq_restore(flags);
   1.137 -
   1.138      if ( unlikely(pg == NULL) )
   1.139          goto no_memory;
   1.140  
   1.141      memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
   1.142  
   1.143 -    for ( i = 0; i < (1 << order); i++ )
   1.144 -    {
   1.145 -        pg[i].count_info        = 0;
   1.146 -        pg[i].u.inuse._domain   = 0;
   1.147 -        pg[i].u.inuse.type_info = 0;
   1.148 -    }
   1.149 -
   1.150      return page_to_virt(pg);
   1.151  
   1.152   no_memory:
   1.153 @@ -663,16 +688,14 @@ void *alloc_xenheap_pages(unsigned int o
   1.154  
   1.155  void free_xenheap_pages(void *v, unsigned int order)
   1.156  {
   1.157 -    unsigned long flags;
   1.158 +    ASSERT(!in_irq());
   1.159  
   1.160      if ( v == NULL )
   1.161          return;
   1.162  
   1.163 -    memguard_guard_range(v, 1 << (order + PAGE_SHIFT));    
   1.164 +    memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
   1.165  
   1.166 -    local_irq_save(flags);
   1.167      free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
   1.168 -    local_irq_restore(flags);
   1.169  }
   1.170  
   1.171  
   1.172 @@ -762,8 +785,6 @@ struct page_info *__alloc_domheap_pages(
   1.173      unsigned int memflags)
   1.174  {
   1.175      struct page_info *pg = NULL;
   1.176 -    cpumask_t mask;
   1.177 -    unsigned long i;
   1.178      unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
   1.179  
   1.180      ASSERT(!in_irq());
   1.181 @@ -792,38 +813,10 @@ struct page_info *__alloc_domheap_pages(
   1.182              return NULL;
   1.183      }
   1.184  
   1.185 -    if ( pg == NULL )
   1.186 -        if ( (pg = alloc_heap_pages(MEMZONE_XEN + 1,
   1.187 -                                    zone_hi,
   1.188 -                                    cpu, order)) == NULL )
   1.189 -            return NULL;
   1.190 -
   1.191 -    mask = pg->u.free.cpumask;
   1.192 -    tlbflush_filter(mask, pg->tlbflush_timestamp);
   1.193 -
   1.194 -    pg->count_info        = 0;
   1.195 -    pg->u.inuse._domain   = 0;
   1.196 -    pg->u.inuse.type_info = 0;
   1.197 -
   1.198 -    for ( i = 1; i < (1 << order); i++ )
   1.199 -    {
   1.200 -        /* Add in any extra CPUs that need flushing because of this page. */
   1.201 -        cpumask_t extra_cpus_mask;
   1.202 -        cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
   1.203 -        tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
   1.204 -        cpus_or(mask, mask, extra_cpus_mask);
   1.205 -
   1.206 -        pg[i].count_info        = 0;
   1.207 -        pg[i].u.inuse._domain   = 0;
   1.208 -        pg[i].u.inuse.type_info = 0;
   1.209 -        page_set_owner(&pg[i], NULL);
   1.210 -    }
   1.211 -
   1.212 -    if ( unlikely(!cpus_empty(mask)) )
   1.213 -    {
   1.214 -        perfc_incrc(need_flush_tlb_flush);
   1.215 -        flush_tlb_mask(mask);
   1.216 -    }
   1.217 +    if ( (pg == NULL) &&
   1.218 +         ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
   1.219 +                                 cpu, order)) == NULL) )
   1.220 +         return NULL;
   1.221  
   1.222      if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
   1.223      {
   1.224 @@ -867,10 +860,7 @@ void free_domheap_pages(struct page_info
   1.225  
   1.226          for ( i = 0; i < (1 << order); i++ )
   1.227          {
   1.228 -            shadow_drop_references(d, &pg[i]);
   1.229 -            ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
   1.230 -            pg[i].tlbflush_timestamp  = tlbflush_current_time();
   1.231 -            pg[i].u.free.cpumask      = d->domain_dirty_cpumask;
   1.232 +            BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
   1.233              list_del(&pg[i].list);
   1.234          }
   1.235  
   1.236 @@ -892,6 +882,7 @@ void free_domheap_pages(struct page_info
   1.237               */
   1.238              for ( i = 0; i < (1 << order); i++ )
   1.239              {
   1.240 +                page_set_owner(&pg[i], NULL);
   1.241                  spin_lock(&page_scrub_lock);
   1.242                  list_add(&pg[i].list, &page_scrub_list);
   1.243                  scrub_pages++;
   1.244 @@ -902,8 +893,6 @@ void free_domheap_pages(struct page_info
   1.245      else
   1.246      {
   1.247          /* Freeing anonymous domain-heap pages. */
   1.248 -        for ( i = 0; i < (1 << order); i++ )
   1.249 -            cpus_clear(pg[i].u.free.cpumask);
   1.250          free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
   1.251          drop_dom_ref = 0;
   1.252      }
     2.1 --- a/xen/common/xmalloc.c	Thu Mar 08 10:54:56 2007 +0000
     2.2 +++ b/xen/common/xmalloc.c	Thu Mar 08 14:29:09 2007 +0000
     2.3 @@ -33,6 +33,8 @@
     2.4  #include <xen/timer.h>
     2.5  #include <xen/cache.h>
     2.6  #include <xen/prefetch.h>
     2.7 +#include <xen/irq.h>
     2.8 +#include <xen/smp.h>
     2.9  
    2.10  /*
    2.11   * XMALLOC_DEBUG:
    2.12 @@ -175,6 +177,8 @@ void *_xmalloc(size_t size, size_t align
    2.13      struct xmalloc_hdr *i;
    2.14      unsigned long flags;
    2.15  
    2.16 +    ASSERT(!in_irq());
    2.17 +
    2.18      /* We currently always return cacheline aligned. */
    2.19      BUG_ON(align > SMP_CACHE_BYTES);
    2.20  
    2.21 @@ -213,6 +217,8 @@ void xfree(void *p)
    2.22      unsigned long flags;
    2.23      struct xmalloc_hdr *i, *tmp, *hdr;
    2.24  
    2.25 +    ASSERT(!in_irq());
    2.26 +
    2.27      if ( p == NULL )
    2.28          return;
    2.29