ia64/xen-unstable

changeset 9162:c644eb4049ab

[IA64] page ref counter

Signed-off-by: Akio Takebe <takebe_akio@jp.fujitsu.com>
Signed-off-by: Masaki Kanno <kanno.masaki@jp.fujitsu.com>
author awilliam@xenbuild.aw
date Fri Mar 10 08:23:39 2006 -0700 (2006-03-10)
parents 405f0f847c0f
children 551f7935f79a
files xen/arch/ia64/xen/xenmisc.c xen/include/asm-ia64/mm.h
line diff
     1.1 --- a/xen/arch/ia64/xen/xenmisc.c	Thu Mar 09 16:24:31 2006 -0700
     1.2 +++ b/xen/arch/ia64/xen/xenmisc.c	Fri Mar 10 08:23:39 2006 -0700
     1.3 @@ -147,12 +147,17 @@ void init_percpu_info(void)
     1.4      //memset(percpu_info, 0, sizeof(percpu_info));
     1.5  }
     1.6  
     1.7 -#if 0
     1.8 -void free_page_type(struct page_info *page, unsigned int type)
     1.9 +void free_page_type(struct page_info *page, u32 type)
    1.10  {
    1.11 -	dummy();
    1.12 +//	dummy();
    1.13 +	return;
    1.14  }
    1.15 -#endif
    1.16 +
    1.17 +int alloc_page_type(struct page_info *page, u32 type)
    1.18 +{
    1.19 +//	dummy();
    1.20 +	return 1;
    1.21 +}
    1.22  
    1.23  ///////////////////////////////
    1.24  //// misc memory stuff
    1.25 @@ -415,3 +420,203 @@ void sync_split_caches(void)
    1.26  	}
    1.27  	else printk("sync_split_caches ignored for CPU with no split cache\n");
    1.28  }
    1.29 +
    1.30 +///////////////////////////////
    1.31 +// from arch/x86/mm.c
    1.32 +///////////////////////////////
    1.33 +
    1.34 +#ifdef VERBOSE
    1.35 +#define MEM_LOG(_f, _a...)                           \
    1.36 +  printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
    1.37 +         current->domain->domain_id , __LINE__ , ## _a )
    1.38 +#else
    1.39 +#define MEM_LOG(_f, _a...) ((void)0)
    1.40 +#endif
    1.41 +
    1.42 +void cleanup_writable_pagetable(struct domain *d)
    1.43 +{
    1.44 +  return;
    1.45 +}
    1.46 +
    1.47 +void put_page_type(struct page_info *page)
    1.48 +{
    1.49 +    u32 nx, x, y = page->u.inuse.type_info;
    1.50 +
    1.51 + again:
    1.52 +    do {
    1.53 +        x  = y;
    1.54 +        nx = x - 1;
    1.55 +
    1.56 +        ASSERT((x & PGT_count_mask) != 0);
    1.57 +
    1.58 +        /*
    1.59 +         * The page should always be validated while a reference is held. The 
    1.60 +         * exception is during domain destruction, when we forcibly invalidate 
    1.61 +         * page-table pages if we detect a referential loop.
    1.62 +         * See domain.c:relinquish_list().
    1.63 +         */
    1.64 +        ASSERT((x & PGT_validated) || 
    1.65 +               test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags));
    1.66 +
    1.67 +        if ( unlikely((nx & PGT_count_mask) == 0) )
    1.68 +        {
    1.69 +            /* Record TLB information for flush later. Races are harmless. */
    1.70 +            page->tlbflush_timestamp = tlbflush_current_time();
    1.71 +            
    1.72 +            if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
    1.73 +                 likely(nx & PGT_validated) )
    1.74 +            {
    1.75 +                /*
    1.76 +                 * Page-table pages must be unvalidated when count is zero. The
    1.77 +                 * 'free' is safe because the refcnt is non-zero and validated
    1.78 +                 * bit is clear => other ops will spin or fail.
    1.79 +                 */
    1.80 +                if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, 
    1.81 +                                           x & ~PGT_validated)) != x) )
    1.82 +                    goto again;
    1.83 +                /* We cleared the 'valid bit' so we do the clean up. */
    1.84 +                free_page_type(page, x);
    1.85 +                /* Carry on, but with the 'valid bit' now clear. */
    1.86 +                x  &= ~PGT_validated;
    1.87 +                nx &= ~PGT_validated;
    1.88 +            }
    1.89 +        }
    1.90 +        else if ( unlikely(((nx & (PGT_pinned | PGT_count_mask)) == 
    1.91 +                            (PGT_pinned | 1)) &&
    1.92 +                           ((nx & PGT_type_mask) != PGT_writable_page)) )
    1.93 +        {
    1.94 +            /* Page is now only pinned. Make the back pointer mutable again. */
    1.95 +            nx |= PGT_va_mutable;
    1.96 +        }
    1.97 +    }
    1.98 +    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
    1.99 +}
   1.100 +
   1.101 +
   1.102 +int get_page_type(struct page_info *page, u32 type)
   1.103 +{
   1.104 +    u32 nx, x, y = page->u.inuse.type_info;
   1.105 +
   1.106 + again:
   1.107 +    do {
   1.108 +        x  = y;
   1.109 +        nx = x + 1;
   1.110 +        if ( unlikely((nx & PGT_count_mask) == 0) )
   1.111 +        {
   1.112 +            MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
   1.113 +            return 0;
   1.114 +        }
   1.115 +        else if ( unlikely((x & PGT_count_mask) == 0) )
   1.116 +        {
   1.117 +            if ( (x & (PGT_type_mask|PGT_va_mask)) != type )
   1.118 +            {
   1.119 +                if ( (x & PGT_type_mask) != (type & PGT_type_mask) )
   1.120 +                {
   1.121 +                    /*
   1.122 +                     * On type change we check to flush stale TLB
   1.123 +                     * entries. This may be unnecessary (e.g., page
   1.124 +                     * was GDT/LDT) but those circumstances should be
   1.125 +                     * very rare.
   1.126 +                     */
   1.127 +                    cpumask_t mask =
   1.128 +                        page_get_owner(page)->domain_dirty_cpumask;
   1.129 +                    tlbflush_filter(mask, page->tlbflush_timestamp);
   1.130 +
   1.131 +                    if ( unlikely(!cpus_empty(mask)) )
   1.132 +                    {
   1.133 +                        perfc_incrc(need_flush_tlb_flush);
   1.134 +                        flush_tlb_mask(mask);
   1.135 +                    }
   1.136 +                }
   1.137 +
   1.138 +                /* We lose existing type, back pointer, and validity. */
   1.139 +                nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated);
   1.140 +                nx |= type;
   1.141 +
   1.142 +                /* No special validation needed for writable pages. */
   1.143 +                /* Page tables and GDT/LDT need to be scanned for validity. */
   1.144 +                if ( type == PGT_writable_page )
   1.145 +                    nx |= PGT_validated;
   1.146 +            }
   1.147 +        }
   1.148 +        else
   1.149 +        {
   1.150 +            if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) )
   1.151 +            {
   1.152 +                if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) )
   1.153 +                {
   1.154 +                    if ( current->domain == page_get_owner(page) )
   1.155 +                    {
   1.156 +                        /*
   1.157 +                         * This ensures functions like set_gdt() see up-to-date
   1.158 +                         * type info without needing to clean up writable p.t.
   1.159 +                         * state on the fast path.
   1.160 +                         */
   1.161 +                        LOCK_BIGLOCK(current->domain);
   1.162 +                        cleanup_writable_pagetable(current->domain);
   1.163 +                        y = page->u.inuse.type_info;
   1.164 +                        UNLOCK_BIGLOCK(current->domain);
   1.165 +                        /* Can we make progress now? */
   1.166 +                        if ( ((y & PGT_type_mask) == (type & PGT_type_mask)) ||
   1.167 +                             ((y & PGT_count_mask) == 0) )
   1.168 +                            goto again;
   1.169 +                    }
   1.170 +                    if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
   1.171 +                         ((type & PGT_type_mask) != PGT_l1_page_table) )
   1.172 +                        MEM_LOG("Bad type (saw %" PRtype_info
   1.173 +                                " != exp %" PRtype_info ") "
   1.174 +                                "for mfn %lx (pfn %lx)",
   1.175 +                                x, type, page_to_mfn(page),
   1.176 +                                get_gpfn_from_mfn(page_to_mfn(page)));
   1.177 +                    return 0;
   1.178 +                }
   1.179 +                else if ( (x & PGT_va_mask) == PGT_va_mutable )
   1.180 +                {
   1.181 +                    /* The va backpointer is mutable, hence we update it. */
   1.182 +                    nx &= ~PGT_va_mask;
   1.183 +                    nx |= type; /* we know the actual type is correct */
   1.184 +                }
   1.185 +                else if ( ((type & PGT_va_mask) != PGT_va_mutable) &&
   1.186 +                          ((type & PGT_va_mask) != (x & PGT_va_mask)) )
   1.187 +                {
   1.188 +#ifdef CONFIG_X86_PAE
   1.189 +                    /* We use backptr as extra typing. Cannot be unknown. */
   1.190 +                    if ( (type & PGT_type_mask) == PGT_l2_page_table )
   1.191 +                        return 0;
   1.192 +#endif
   1.193 +                    /* This table is possibly mapped at multiple locations. */
   1.194 +                    nx &= ~PGT_va_mask;
   1.195 +                    nx |= PGT_va_unknown;
   1.196 +                }
   1.197 +            }
   1.198 +            if ( unlikely(!(x & PGT_validated)) )
   1.199 +            {
   1.200 +                /* Someone else is updating validation of this page. Wait... */
   1.201 +                while ( (y = page->u.inuse.type_info) == x )
   1.202 +                    cpu_relax();
   1.203 +                goto again;
   1.204 +            }
   1.205 +        }
   1.206 +    }
   1.207 +    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
   1.208 +
   1.209 +    if ( unlikely(!(nx & PGT_validated)) )
   1.210 +    {
   1.211 +        /* Try to validate page type; drop the new reference on failure. */
   1.212 +        if ( unlikely(!alloc_page_type(page, type)) )
   1.213 +        {
   1.214 +            MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
   1.215 +                    PRtype_info ": caf=%08x taf=%" PRtype_info,
   1.216 +                    page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
   1.217 +                    type, page->count_info, page->u.inuse.type_info);
   1.218 +            /* Noone else can get a reference. We hold the only ref. */
   1.219 +            page->u.inuse.type_info = 0;
   1.220 +            return 0;
   1.221 +        }
   1.222 +
   1.223 +        /* Noone else is updating simultaneously. */
   1.224 +        __set_bit(_PGT_validated, &page->u.inuse.type_info);
   1.225 +    }
   1.226 +
   1.227 +    return 1;
   1.228 +}
     2.1 --- a/xen/include/asm-ia64/mm.h	Thu Mar 09 16:24:31 2006 -0700
     2.2 +++ b/xen/include/asm-ia64/mm.h	Fri Mar 10 08:23:39 2006 -0700
     2.3 @@ -41,32 +41,33 @@ struct page
     2.4      /* Each frame can be threaded onto a doubly-linked list. */
     2.5      struct list_head list;
     2.6  
     2.7 -    /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
     2.8 -    u32 tlbflush_timestamp;
     2.9 -
    2.10      /* Reference count and various PGC_xxx flags and fields. */
    2.11      u32 count_info;
    2.12  
    2.13      /* Context-dependent fields follow... */
    2.14      union {
    2.15  
    2.16 -        /* Page is in use by a domain. */
    2.17 +        /* Page is in use: ((count_info & PGC_count_mask) != 0). */
    2.18          struct {
    2.19 -            /* Owner of this page. */
    2.20 -            u32	_domain;
    2.21 +            /* Owner of this page (NULL if page is anonymous). */
    2.22 +            u32 _domain; /* pickled format */
    2.23              /* Type reference count and various PGT_xxx flags and fields. */
    2.24 -            u32 type_info;
    2.25 -        } inuse;
    2.26 +            unsigned long type_info;
    2.27 +        } __attribute__ ((packed)) inuse;
    2.28  
    2.29 -        /* Page is on a free list. */
    2.30 +        /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
    2.31          struct {
    2.32 +            /* Order-size of the free chunk this page is the head of. */
    2.33 +            u32 order;
    2.34              /* Mask of possibly-tainted TLBs. */
    2.35              cpumask_t cpumask;
    2.36 -            /* Order-size of the free chunk this page is the head of. */
    2.37 -            u8 order;
    2.38 -        } free;
    2.39 +        } __attribute__ ((packed)) free;
    2.40  
    2.41      } u;
    2.42 +
    2.43 +    /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
    2.44 +    u32 tlbflush_timestamp;
    2.45 +
    2.46  #if 0
    2.47  // following added for Linux compiling
    2.48      page_flags_t flags;
    2.49 @@ -94,8 +95,15 @@ struct page
    2.50  #define _PGT_pinned         27
    2.51  #define PGT_pinned          (1U<<_PGT_pinned)
    2.52  
    2.53 -/* 27-bit count of uses of this frame as its current type. */
    2.54 -#define PGT_count_mask      ((1U<<27)-1)
    2.55 +/* The 11 most significant bits of virt address if this is a page table. */
    2.56 +#define PGT_va_shift        16
    2.57 +#define PGT_va_mask         (((1U<<11)-1)<<PGT_va_shift)
    2.58 +/* Is the back pointer still mutable (i.e. not fixed yet)? */
    2.59 +#define PGT_va_mutable      (((1U<<11)-1)<<PGT_va_shift)
    2.60 +/* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
    2.61 +#define PGT_va_unknown      (((1U<<11)-2)<<PGT_va_shift)
    2.62 +/* 16-bit count of uses of this frame as its current type. */
    2.63 +#define PGT_count_mask      ((1U<<16)-1)
    2.64  
    2.65  /* Cleared when the owning guest 'frees' this page. */
    2.66  #define _PGC_allocated      31
    2.67 @@ -138,7 +146,6 @@ extern unsigned long gmfn_to_mfn_foreign
    2.68  
    2.69  static inline void put_page(struct page_info *page)
    2.70  {
    2.71 -#ifdef VALIDATE_VT	// doesn't work with non-VTI in grant tables yet
    2.72      u32 nx, x, y = page->count_info;
    2.73  
    2.74      do {
    2.75 @@ -149,14 +156,12 @@ static inline void put_page(struct page_
    2.76  
    2.77      if (unlikely((nx & PGC_count_mask) == 0))
    2.78  	free_domheap_page(page);
    2.79 -#endif
    2.80  }
    2.81  
    2.82  /* count_info and ownership are checked atomically. */
    2.83  static inline int get_page(struct page_info *page,
    2.84                             struct domain *domain)
    2.85  {
    2.86 -#ifdef VALIDATE_VT
    2.87      u64 x, nx, y = *((u64*)&page->count_info);
    2.88      u32 _domain = pickle_domptr(domain);
    2.89  
    2.90 @@ -172,14 +177,13 @@ static inline int get_page(struct page_i
    2.91  	    return 0;
    2.92  	}
    2.93      }
    2.94 -    while(unlikely(y = cmpxchg(&page->count_info, x, nx)) != x);
    2.95 -#endif
    2.96 +    while(unlikely((y = cmpxchg((u64*)&page->count_info, x, nx)) != x));
    2.97      return 1;
    2.98  }
    2.99  
   2.100 -/* No type info now */
   2.101 -#define put_page_type(page)
   2.102 -#define get_page_type(page, type) 1
   2.103 +extern void put_page_type(struct page_info *page);
   2.104 +extern int get_page_type(struct page_info *page, u32 type);
   2.105 +
   2.106  static inline void put_page_and_type(struct page_info *page)
   2.107  {
   2.108      put_page_type(page);