ia64/xen-unstable

changeset 981:88ef6048499a

bitkeeper revision 1.634 (3fc267950JCnZdSjqz12f7QhAx9gWA)

Many files:
Cleanups to page reference counting in Xen.
author kaf24@scramble.cl.cam.ac.uk
date Mon Nov 24 20:18:29 2003 +0000 (2003-11-24)
parents a822251cfa9b
children 433e0c504cbe
files xen/arch/i386/mm.c xen/common/dom0_ops.c xen/common/dom_mem_ops.c xen/common/domain.c xen/common/memory.c xen/drivers/block/xen_block.c xen/include/xeno/mm.h xen/net/dev.c xen/net/skbuff.c
line diff
     1.1 --- a/xen/arch/i386/mm.c	Sun Nov 23 11:34:18 2003 +0000
     1.2 +++ b/xen/arch/i386/mm.c	Mon Nov 24 20:18:29 2003 +0000
     1.3 @@ -232,7 +232,7 @@ long set_gdt(struct task_struct *p,
     1.4  
     1.5          if ( (page->flags & PG_type_mask) != PGT_gdt_page )
     1.6          {
     1.7 -            if ( page->type_count != 0 )
     1.8 +            if ( page_type_count(page) != 0 )
     1.9                  goto out;
    1.10  
    1.11              /* Check all potential GDT entries in the page. */
    1.12 @@ -253,7 +253,7 @@ long set_gdt(struct task_struct *p,
    1.13          page = frame_table + pfn;
    1.14          ASSERT((page->flags & PG_type_mask) == PGT_gdt_page);
    1.15          ASSERT((page->flags & PG_domain_mask) == p->domain);
    1.16 -        ASSERT((page->type_count != 0) && (page->tot_count != 0));
    1.17 +        ASSERT((page_type_count(page) != 0) && (page_tot_count(page) != 0));
    1.18          put_page_type(page);
    1.19          put_page_tot(page);
    1.20      }
    1.21 @@ -340,7 +340,7 @@ long do_update_descriptor(
    1.22      case PGT_writeable_page:
    1.23          break;
    1.24      default:
    1.25 -        if ( page->type_count != 0 )
    1.26 +        if ( page_type_count(page) != 0 )
    1.27              goto out;
    1.28      }
    1.29  
     2.1 --- a/xen/common/dom0_ops.c	Sun Nov 23 11:34:18 2003 +0000
     2.2 +++ b/xen/common/dom0_ops.c	Mon Nov 24 20:18:29 2003 +0000
     2.3 @@ -344,7 +344,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
     2.4              op.u.getpageframeinfo.domain = page->flags & PG_domain_mask;
     2.5              op.u.getpageframeinfo.type   = NONE;
     2.6  
     2.7 -            if ( page->type_count != 0 )
     2.8 +            if ( page_type_count(page) != 0 )
     2.9              {
    2.10                  switch ( page->flags & PG_type_mask )
    2.11                  {
     3.1 --- a/xen/common/dom_mem_ops.c	Sun Nov 23 11:34:18 2003 +0000
     3.2 +++ b/xen/common/dom_mem_ops.c	Mon Nov 24 20:18:29 2003 +0000
     3.3 @@ -57,7 +57,8 @@ static long alloc_dom_mem(struct task_st
     3.4          /* Get a free page and add it to the domain's page list. */
     3.5          pf = list_entry(temp, struct pfn_info, list);
     3.6          pf->flags |= p->domain;
     3.7 -        pf->type_count = pf->tot_count = 0;
     3.8 +        set_page_type_count(pf, 0);
     3.9 +        set_page_tot_count(pf, 0);
    3.10          temp = temp->next;
    3.11          list_del(&pf->list);
    3.12          list_add_tail(&pf->list, &p->pg_head);
    3.13 @@ -109,12 +110,13 @@ static long free_dom_mem(struct task_str
    3.14          }
    3.15  
    3.16          pf = &frame_table[mpfn];
    3.17 -        if ( (pf->type_count != 0) || 
    3.18 -             (pf->tot_count != 0) ||
    3.19 +        if ( (page_type_count(pf) != 0) || 
    3.20 +             (page_tot_count(pf) != 0) ||
    3.21               ((pf->flags & PG_domain_mask) != p->domain) )
    3.22          {
    3.23              DPRINTK("Bad page free for domain %d (%ld, %ld, %08lx)\n",
    3.24 -                    p->domain, pf->type_count, pf->tot_count, pf->flags);
    3.25 +                    p->domain, page_type_count(pf), 
    3.26 +                    page_tot_count(pf), pf->flags);
    3.27              rc = -EINVAL;
    3.28              goto out;
    3.29          }
     4.1 --- a/xen/common/domain.c	Sun Nov 23 11:34:18 2003 +0000
     4.2 +++ b/xen/common/domain.c	Mon Nov 24 20:18:29 2003 +0000
     4.3 @@ -245,7 +245,8 @@ unsigned int alloc_new_dom_mem(struct ta
     4.4      {
     4.5          pf = list_entry(temp, struct pfn_info, list);
     4.6          pf->flags = p->domain;
     4.7 -        pf->type_count = pf->tot_count = 0;
     4.8 +        set_page_type_count(pf, 0);
     4.9 +        set_page_tot_count(pf, 0);
    4.10          temp = temp->next;
    4.11          list_del(&pf->list);
    4.12          list_add_tail(&pf->list, &p->pg_head);
    4.13 @@ -273,7 +274,9 @@ void free_all_dom_mem(struct task_struct
    4.14      while ( (ent = p->pg_head.next) != &p->pg_head )
    4.15      {
    4.16          struct pfn_info *pf = list_entry(ent, struct pfn_info, list);
    4.17 -        pf->type_count = pf->tot_count = pf->flags = 0;
    4.18 +        set_page_type_count(pf, 0);
    4.19 +        set_page_tot_count(pf, 0);
    4.20 +        pf->flags = 0;
    4.21          ASSERT(ent->next->prev == ent);
    4.22          ASSERT(ent->prev->next == ent);
    4.23          list_del(ent);
    4.24 @@ -513,7 +516,8 @@ int setup_guestos(struct task_struct *p,
    4.25          
    4.26          page = frame_table + (cur_address >> PAGE_SHIFT);
    4.27          page->flags = dom | PGT_writeable_page | PG_need_flush;
    4.28 -        page->type_count = page->tot_count = 1;
    4.29 +        set_page_type_count(page, 1);
    4.30 +        set_page_tot_count(page, 1);
    4.31          /* Set up the MPT entry. */
    4.32          machine_to_phys_mapping[cur_address >> PAGE_SHIFT] = count;
    4.33  
    4.34 @@ -535,7 +539,7 @@ int setup_guestos(struct task_struct *p,
    4.35          *l1tab = mk_l1_pgentry(l1_pgentry_val(*l1tab) & ~_PAGE_RW);
    4.36          page = frame_table + l1_pgentry_to_pagenr(*l1tab);
    4.37          page->flags = dom | PGT_l1_page_table;
    4.38 -        page->tot_count++;
    4.39 +        get_page_tot(page);
    4.40          l1tab++;
    4.41          if( !((unsigned long)l1tab & (PAGE_SIZE - 1)) )
    4.42          {
    4.43 @@ -544,9 +548,9 @@ int setup_guestos(struct task_struct *p,
    4.44              l2tab++;
    4.45          }
    4.46      }
    4.47 -    page->type_count |= REFCNT_PIN_BIT;
    4.48 -    page->tot_count  |= REFCNT_PIN_BIT;
    4.49 -    page->flags = dom | PGT_l2_page_table;
    4.50 +    get_page_type(page); /* guest_pinned */
    4.51 +    get_page_tot(page);  /* guest_pinned */
    4.52 +    page->flags = dom | PG_guest_pinned | PGT_l2_page_table;
    4.53      unmap_domain_mem(l1start);
    4.54  
    4.55      /* Set up shared info area. */
     5.1 --- a/xen/common/memory.c	Sun Nov 23 11:34:18 2003 +0000
     5.2 +++ b/xen/common/memory.c	Mon Nov 24 20:18:29 2003 +0000
     5.3 @@ -33,7 +33,7 @@
     5.4   * physical page frame by a domain, including uses as a page directory,
     5.5   * a page table, or simple mappings via a PTE. This count prevents a
     5.6   * domain from releasing a frame back to the hypervisor's free pool when
     5.7 - * it is still referencing it!
     5.8 + * it still holds a reference to it.
     5.9   * 
    5.10   * TYPE_COUNT is more subtle. A frame can be put to one of three
    5.11   * mutually-exclusive uses: it might be used as a page directory, or a
    5.12 @@ -140,7 +140,9 @@
    5.13  #include <asm/domain_page.h>
    5.14  
    5.15  #if 0
    5.16 -#define MEM_LOG(_f, _a...) printk("DOM%d: (file=memory.c, line=%d) " _f "\n", current->domain, __LINE__, ## _a )
    5.17 +#define MEM_LOG(_f, _a...) 
    5.18 +  printk("DOM%d: (file=memory.c, line=%d) " _f "\n", \
    5.19 +         current->domain, __LINE__, ## _a )
    5.20  #else
    5.21  #define MEM_LOG(_f, _a...) ((void)0)
    5.22  #endif
    5.23 @@ -230,7 +232,7 @@ static void __invalidate_shadow_ldt(stru
    5.24          page = frame_table + pfn;
    5.25          ASSERT((page->flags & PG_type_mask) == PGT_ldt_page);
    5.26          ASSERT((page->flags & PG_domain_mask) == p->domain);
    5.27 -        ASSERT((page->type_count != 0) && (page->tot_count != 0));
    5.28 +        ASSERT((page_type_count(page) != 0) && (page_tot_count(page) != 0));
    5.29          put_page_type(page);
    5.30          put_page_tot(page);                
    5.31      }
    5.32 @@ -271,7 +273,7 @@ int map_ldt_shadow_page(unsigned int off
    5.33      page = frame_table + (l1e >> PAGE_SHIFT);
    5.34      if ( unlikely((page->flags & PG_type_mask) != PGT_ldt_page) )
    5.35      {
    5.36 -        if ( unlikely(page->type_count != 0) )
    5.37 +        if ( unlikely(page_type_count(page) != 0) )
    5.38              goto out;
    5.39  
    5.40          /* Check all potential LDT entries in the page. */
    5.41 @@ -367,7 +369,7 @@ static int dec_page_refcnt(unsigned long
    5.42                  type);
    5.43          return -1;
    5.44      }
    5.45 -    ASSERT((page_type_count(page) & ~REFCNT_PIN_BIT) != 0);
    5.46 +    ASSERT(page_type_count(page) != 0);
    5.47      put_page_tot(page);
    5.48      return put_page_type(page);
    5.49  }
    5.50 @@ -607,7 +609,7 @@ static void put_page(unsigned long page_
    5.51      page = frame_table + page_nr;
    5.52      ASSERT(DOMAIN_OKAY(page->flags));
    5.53      ASSERT((!writeable) || 
    5.54 -           (((page_type_count(page) & ~REFCNT_PIN_BIT) != 0) && 
    5.55 +           ((page_type_count(page) != 0) && 
    5.56              ((page->flags & PG_type_mask) == PGT_writeable_page) &&
    5.57              ((page->flags & PG_need_flush) == PG_need_flush)));
    5.58      if ( writeable )
    5.59 @@ -735,7 +737,7 @@ static int do_extended_command(unsigned 
    5.60      switch ( cmd )
    5.61      {
    5.62      case MMUEXT_PIN_L1_TABLE:
    5.63 -        if ( unlikely(page->type_count & REFCNT_PIN_BIT) )
    5.64 +        if ( unlikely(page->flags & PG_guest_pinned) )
    5.65          {
    5.66              MEM_LOG("Pfn %08lx already pinned", pfn);
    5.67              err = 1;
    5.68 @@ -745,7 +747,7 @@ static int do_extended_command(unsigned 
    5.69          goto mark_as_pinned;
    5.70  
    5.71      case MMUEXT_PIN_L2_TABLE:
    5.72 -        if ( unlikely(page->type_count & REFCNT_PIN_BIT) )
    5.73 +        if ( unlikely(page->flags & PG_guest_pinned) )
    5.74          {
    5.75              MEM_LOG("Pfn %08lx already pinned", pfn);
    5.76              err = 1;
    5.77 @@ -759,25 +761,19 @@ static int do_extended_command(unsigned 
    5.78              MEM_LOG("Error while pinning pfn %08lx", pfn);
    5.79              break;
    5.80          }
    5.81 -        put_page_type(page);
    5.82 -        put_page_tot(page);
    5.83 -        page->type_count |= REFCNT_PIN_BIT;
    5.84 -        page->tot_count  |= REFCNT_PIN_BIT;
    5.85 +        page->flags |= PG_guest_pinned;
    5.86          break;
    5.87  
    5.88      case MMUEXT_UNPIN_TABLE:
    5.89 -        if ( !DOMAIN_OKAY(page->flags) )
    5.90 +        if ( unlikely(!DOMAIN_OKAY(page->flags)) )
    5.91          {
    5.92              err = 1;
    5.93              MEM_LOG("Page %08lx bad domain (dom=%ld)",
    5.94                      ptr, page->flags & PG_domain_mask);
    5.95          }
    5.96 -        else if ( (page->type_count & REFCNT_PIN_BIT) )
    5.97 +        else if ( likely(page->flags & PG_guest_pinned) )
    5.98          {
    5.99 -            page->type_count &= ~REFCNT_PIN_BIT;
   5.100 -            page->tot_count  &= ~REFCNT_PIN_BIT;
   5.101 -            get_page_type(page);
   5.102 -            get_page_tot(page);
   5.103 +            page->flags &= ~PG_guest_pinned;
   5.104              ((page->flags & PG_type_mask) == PGT_l1_page_table) ?
   5.105                  put_l1_table(pfn) : put_l2_table(pfn);
   5.106          }
   5.107 @@ -916,7 +912,7 @@ int do_mmu_update(mmu_update_t *ureqs, i
   5.108                                         mk_l2_pgentry(req.val)); 
   5.109                      break;                    
   5.110                  default:
   5.111 -                    if ( page->type_count == 0 )
   5.112 +                    if ( page_type_count(page) == 0 )
   5.113                      {
   5.114                          *(unsigned long *)req.ptr = req.val;
   5.115                          err = 0;
     6.1 --- a/xen/drivers/block/xen_block.c	Sun Nov 23 11:34:18 2003 +0000
     6.2 +++ b/xen/drivers/block/xen_block.c	Mon Nov 24 20:18:29 2003 +0000
     6.3 @@ -350,7 +350,7 @@ static int __buffer_is_valid(struct task
     6.4          /* If reading into the frame, the frame must be writeable. */
     6.5          if ( writeable_buffer &&
     6.6               ((page->flags & PG_type_mask) != PGT_writeable_page) &&
     6.7 -             (page->type_count != 0) )
     6.8 +             (page_type_count(page) != 0) )
     6.9          {
    6.10              DPRINTK("non-writeable page passed for block read\n");
    6.11              goto out;
    6.12 @@ -376,7 +376,7 @@ static void __lock_buffer(unsigned long 
    6.13          page = frame_table + pfn;
    6.14          if ( writeable_buffer )
    6.15          {
    6.16 -            if ( page->type_count == 0 )
    6.17 +            if ( page_type_count(page) == 0 )
    6.18              {
    6.19                  page->flags &= ~PG_type_mask;
    6.20                  /* No need for PG_need_flush here. */
     7.1 --- a/xen/include/xeno/mm.h	Sun Nov 23 11:34:18 2003 +0000
     7.2 +++ b/xen/include/xeno/mm.h	Mon Nov 24 20:18:29 2003 +0000
     7.3 @@ -59,13 +59,6 @@ typedef struct pfn_info {
     7.4      unsigned long type_count;   /* pagetable/dir, or domain-writeable refs. */
     7.5  } frame_table_t;
     7.6  
     7.7 -/*
     7.8 - * We use a high bit to indicate that a page is pinned.
     7.9 - * We do not use the top bit as that would mean that we'd get confused with
    7.10 - * -ve error numbers in some places in common/memory.c.
    7.11 - */
    7.12 -#define REFCNT_PIN_BIT 0x40000000UL
    7.13 -
    7.14  #define get_page_tot(p)		 ((p)->tot_count++)
    7.15  #define put_page_tot(p)		 \
    7.16      ({ ASSERT((p)->tot_count != 0); --(p)->tot_count; })
    7.17 @@ -83,9 +76,9 @@ typedef struct pfn_info {
    7.18  #define PG_slab	       24
    7.19  /* domain flags (domain != 0) */
    7.20  /*
    7.21 - * NB. The following three flags are MUTUALLY EXCLUSIVE!
    7.22 + * NB. The following page types are MUTUALLY EXCLUSIVE.
    7.23   * At most one can be true at any point, and 'type_count' counts how many
    7.24 - * references exist of teh current type. A change in type can only occur
    7.25 + * references exist of the current type. A change in type can only occur
    7.26   * when type_count == 0.
    7.27   */
    7.28  #define PG_type_mask        (15<<24) /* bits 24-27 */
    7.29 @@ -111,6 +104,13 @@ typedef struct pfn_info {
    7.30   */
    7.31  #define PG_need_flush       (1<<28)
    7.32  
    7.33 +/*
    7.34 + * This bit indicates that the guest OS has pinned the page to its current
    7.35 + * type. For page tables this can avoid the frame scanning and reference-count
    7.36 + * updates that occur when the type count falls to zero.
    7.37 + */
    7.38 +#define PG_guest_pinned     (1<<29)
    7.39 +
    7.40  #define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
    7.41  #define PageSetSlab(page)	set_bit(PG_slab, &(page)->flags)
    7.42  #define PageClearSlab(page)	clear_bit(PG_slab, &(page)->flags)
    7.43 @@ -118,11 +118,16 @@ typedef struct pfn_info {
    7.44  #define SHARE_PFN_WITH_DOMAIN(_pfn, _dom)                            \
    7.45      do {                                                             \
    7.46          (_pfn)->flags = (_dom) | PGT_writeable_page | PG_need_flush; \
    7.47 -        (_pfn)->tot_count = (_pfn)->type_count = 2;                  \
    7.48 +        set_page_tot_count((_pfn), 2);                               \
    7.49 +        set_page_type_count((_pfn), 2);                              \
    7.50      } while ( 0 )
    7.51  
    7.52 -#define UNSHARE_PFN(_pfn) \
    7.53 -    (_pfn)->flags = (_pfn)->type_count = (_pfn)->tot_count = 0
    7.54 +#define UNSHARE_PFN(_pfn)                                            \
    7.55 +    do {                                                             \
    7.56 +        (_pfn)->flags = 0;                                           \
    7.57 +        set_page_tot_count((_pfn), 0);                               \
    7.58 +        set_page_type_count((_pfn), 0);                              \
    7.59 +    } while ( 0 )
    7.60  
    7.61  /* The array of struct pfn_info,  
    7.62   * free pfn list and number of free pfns in the free list
     8.1 --- a/xen/net/dev.c	Sun Nov 23 11:34:18 2003 +0000
     8.2 +++ b/xen/net/dev.c	Mon Nov 24 20:18:29 2003 +0000
     8.3 @@ -550,7 +550,8 @@ void deliver_packet(struct sk_buff *skb,
     8.4      }
     8.5  
     8.6      /* Give the new page to the domain, marking it writeable. */
     8.7 -    new_page->tot_count = new_page->type_count = 1;
     8.8 +    set_page_type_count(new_page, 1);
     8.9 +    set_page_tot_count(new_page, 1);
    8.10      new_page->flags = vif->domain->domain | PGT_writeable_page | PG_need_flush;
    8.11      list_add(&new_page->list, &vif->domain->pg_head);
    8.12      
    8.13 @@ -2113,10 +2114,10 @@ static long get_bufs_from_vif(net_vif_t 
    8.14  
    8.15          if ( ((buf_page->flags & (PG_type_mask | PG_domain_mask)) !=
    8.16                (PGT_writeable_page | p->domain)) || 
    8.17 -             (buf_page->tot_count != 1) )
    8.18 +             (page_tot_count(buf_page) != 1) )
    8.19          {
    8.20              DPRINTK("Need a mapped-once writeable page (%ld/%ld/%08lx)\n",
    8.21 -                    buf_page->type_count, buf_page->tot_count, 
    8.22 +                    page_type_count(buf_page), page_tot_count(buf_page), 
    8.23                      buf_page->flags);
    8.24              make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
    8.25              goto rx_unmap_and_continue;
    8.26 @@ -2129,7 +2130,9 @@ static long get_bufs_from_vif(net_vif_t 
    8.27          get_page_type(pte_page);
    8.28          get_page_tot(pte_page);
    8.29          *ptep &= ~_PAGE_PRESENT;
    8.30 -        buf_page->flags = buf_page->type_count = buf_page->tot_count = 0;
    8.31 +        buf_page->flags = 0;
    8.32 +        set_page_type_count(buf_page, 0);
    8.33 +        set_page_tot_count(buf_page, 0);
    8.34          list_del(&buf_page->list);
    8.35  
    8.36          vif->rx_shadow_ring[j].id          = rx.id;
    8.37 @@ -2198,7 +2201,8 @@ long flush_bufs_for_vif(net_vif_t *vif)
    8.38              *pte = (rx->buf_pfn<<PAGE_SHIFT) | (*pte & ~PAGE_MASK) | 
    8.39                  _PAGE_RW | _PAGE_PRESENT;
    8.40              page->flags |= PGT_writeable_page | PG_need_flush;
    8.41 -            page->type_count = page->tot_count = 1;
    8.42 +            set_page_type_count(page, 1);
    8.43 +            set_page_tot_count(page, 1);
    8.44          }
    8.45          unmap_domain_mem(pte);
    8.46  
     9.1 --- a/xen/net/skbuff.c	Sun Nov 23 11:34:18 2003 +0000
     9.2 +++ b/xen/net/skbuff.c	Mon Nov 24 20:18:29 2003 +0000
     9.3 @@ -162,7 +162,9 @@ static inline void dealloc_skb_data_page
     9.4  
     9.5      spin_lock_irqsave(&free_list_lock, flags);
     9.6          
     9.7 -    pf->flags = pf->type_count = pf->tot_count = 0;
     9.8 +    pf->flags = 0;
     9.9 +    set_page_type_count(pf, 0);
    9.10 +    set_page_tot_count(pf, 0);
    9.11      list_add(&pf->list, &free_list);
    9.12      free_pfns++;
    9.13