ia64/xen-unstable

changeset 11554:69e52712fbc4

[XEN] Shadow mode no longer obtains page type references.
This allows the shadow destructor hook in free_page_type()
to work properly.

Also, move mark_dirty() back to alloc/free_page_type(). It
doesn't matter that this happens before the type count is
modified -- bitmap is extracted by the tools with the domain
paused, so these non-blocking paths are atomic from p.o.v of
the tools.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Sep 21 10:47:05 2006 +0100 (2006-09-21)
parents e50872355390
children f872300b672f
files xen/arch/x86/mm.c xen/arch/x86/mm/shadow/common.c xen/include/asm-x86/mm.h xen/include/asm-x86/shadow.h
line diff
     1.1 --- a/xen/arch/x86/mm.c	Thu Sep 21 09:37:28 2006 +0100
     1.2 +++ b/xen/arch/x86/mm.c	Thu Sep 21 10:47:05 2006 +0100
     1.3 @@ -1490,6 +1490,12 @@ static int mod_l4_entry(l4_pgentry_t *pl
     1.4  
     1.5  int alloc_page_type(struct page_info *page, unsigned long type)
     1.6  {
     1.7 +    struct domain *owner = page_get_owner(page);
     1.8 +
     1.9 +    /* A page table is dirtied when its type count becomes non-zero. */
    1.10 +    if ( likely(owner != NULL) )
    1.11 +        mark_dirty(owner, page_to_mfn(page));
    1.12 +
    1.13      switch ( type & PGT_type_mask )
    1.14      {
    1.15      case PGT_l1_page_table:
    1.16 @@ -1528,9 +1534,11 @@ void free_page_type(struct page_info *pa
    1.17           */
    1.18          this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
    1.19  
    1.20 -        if ( unlikely(shadow_mode_enabled(owner)
    1.21 -                 && !shadow_lock_is_acquired(owner)) )
    1.22 +        if ( unlikely(shadow_mode_enabled(owner)) )
    1.23          {
    1.24 +            /* A page table is dirtied when its type count becomes zero. */
    1.25 +            mark_dirty(owner, page_to_mfn(page));
    1.26 +
    1.27              if ( shadow_mode_refcounts(owner) )
    1.28                  return;
    1.29  
    1.30 @@ -1603,19 +1611,19 @@ void put_page_type(struct page_info *pag
    1.31                  nx &= ~PGT_validated;
    1.32              }
    1.33  
    1.34 -            /* Record TLB information for flush later. */
    1.35 -            page->tlbflush_timestamp = tlbflush_current_time();
    1.36 +            /*
    1.37 +             * Record TLB information for flush later. We do not stamp page
    1.38 +             * tables when running in shadow mode:
    1.39 +             *  1. Pointless, since it's the shadow pt's which must be tracked.
    1.40 +             *  2. Shadow mode reuses this field for shadowed page tables to
    1.41 +             *     store flags info -- we don't want to conflict with that.
    1.42 +             */
    1.43 +            if ( !shadow_mode_enabled(page_get_owner(page)) ||
    1.44 +                 ((nx & PGT_type_mask) == PGT_writable_page) )
    1.45 +                page->tlbflush_timestamp = tlbflush_current_time();
    1.46          }
    1.47      }
    1.48      while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
    1.49 -
    1.50 -    /*
    1.51 -     * A page table is dirtied when its type count becomes zero.
    1.52 -     * We cannot set the dirty flag earlier than this because we must wait
    1.53 -     * until the type count has been zeroed by the CMPXCHG above.
    1.54 -     */
    1.55 -    if ( unlikely((nx & (PGT_validated|PGT_count_mask)) == 0) )
    1.56 -        mark_dirty(page_get_owner(page), page_to_mfn(page));
    1.57  }
    1.58  
    1.59  
    1.60 @@ -1648,7 +1656,10 @@ int get_page_type(struct page_info *page
    1.61                      page_get_owner(page)->domain_dirty_cpumask;
    1.62                  tlbflush_filter(mask, page->tlbflush_timestamp);
    1.63  
    1.64 -                if ( unlikely(!cpus_empty(mask)) )
    1.65 +                if ( unlikely(!cpus_empty(mask)) &&
    1.66 +                     /* Shadow mode: track only writable pages. */
    1.67 +                     (!shadow_mode_enabled(page_get_owner(page)) ||
    1.68 +                      ((nx & PGT_type_mask) == PGT_writable_page)) )
    1.69                  {
    1.70                      perfc_incrc(need_flush_tlb_flush);
    1.71                      flush_tlb_mask(mask);
    1.72 @@ -1701,13 +1712,6 @@ int get_page_type(struct page_info *page
    1.73  
    1.74          /* Noone else is updating simultaneously. */
    1.75          __set_bit(_PGT_validated, &page->u.inuse.type_info);
    1.76 -
    1.77 -        /*
    1.78 -         * A page table is dirtied when its type count becomes non-zero. It is
    1.79 -         * safe to mark dirty here because any PTE modifications in
    1.80 -         * alloc_page_type() have now happened.
    1.81 -         */
    1.82 -        mark_dirty(page_get_owner(page), page_to_mfn(page));
    1.83      }
    1.84  
    1.85      return 1;
    1.86 @@ -2001,14 +2005,8 @@ int do_mmuext_op(
    1.87              {
    1.88                  put_page_and_type(page);
    1.89                  put_page(page);
    1.90 -                if ( shadow_mode_enabled(d) )
    1.91 -                {
    1.92 -                    shadow_lock(d);
    1.93 -                    shadow_remove_all_shadows(v, _mfn(mfn));
    1.94 -                    /* A page is dirtied when its pin status is cleared. */
    1.95 -                    sh_mark_dirty(d, _mfn(mfn));
    1.96 -                    shadow_unlock(d);
    1.97 -                }
    1.98 +                /* A page is dirtied when its pin status is cleared. */
    1.99 +                mark_dirty(d, mfn);
   1.100              }
   1.101              else
   1.102              {
     2.1 --- a/xen/arch/x86/mm/shadow/common.c	Thu Sep 21 09:37:28 2006 +0100
     2.2 +++ b/xen/arch/x86/mm/shadow/common.c	Thu Sep 21 10:47:05 2006 +0100
     2.3 @@ -232,32 +232,15 @@ struct x86_emulate_ops shadow_emulator_o
     2.4  void shadow_promote(struct vcpu *v, mfn_t gmfn, u32 type)
     2.5  {
     2.6      struct page_info *page = mfn_to_page(gmfn);
     2.7 -    unsigned long type_info;
     2.8  
     2.9      ASSERT(valid_mfn(gmfn));
    2.10  
    2.11      /* We should never try to promote a gmfn that has writeable mappings */
    2.12      ASSERT(shadow_remove_write_access(v, gmfn, 0, 0) == 0);
    2.13  
    2.14 -    // Is the page already shadowed?
    2.15 +    /* Is the page already shadowed? */
    2.16      if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
    2.17 -    {
    2.18 -        // No prior shadow exists...
    2.19 -
    2.20 -        // Grab a type-ref.  We don't really care if we are racing with another
    2.21 -        // vcpu or not, or even what kind of type we get; we just want the type
    2.22 -        // count to be > 0.
    2.23 -        //
    2.24 -        do {
    2.25 -            type_info = page->u.inuse.type_info &
    2.26 -                (PGT_type_mask | PGT_pae_xen_l2);
    2.27 -        } while ( !get_page_type(page, type_info) );
    2.28 -
    2.29 -        // Now that the type ref is non-zero, we can safely use the
    2.30 -        // shadow_flags.
    2.31 -        //
    2.32          page->shadow_flags = 0;
    2.33 -    }
    2.34  
    2.35      ASSERT(!test_bit(type >> PGC_SH_type_shift, &page->shadow_flags));
    2.36      set_bit(type >> PGC_SH_type_shift, &page->shadow_flags);
    2.37 @@ -273,13 +256,7 @@ void shadow_demote(struct vcpu *v, mfn_t
    2.38      clear_bit(type >> PGC_SH_type_shift, &page->shadow_flags);
    2.39  
    2.40      if ( (page->shadow_flags & SHF_page_type_mask) == 0 )
    2.41 -    {
    2.42 -        // release the extra type ref
    2.43 -        put_page_type(page);
    2.44 -
    2.45 -        // clear the is-a-page-table bit.
    2.46          clear_bit(_PGC_page_table, &page->count_info);
    2.47 -    }
    2.48  }
    2.49  
    2.50  /**************************************************************************/
     3.1 --- a/xen/include/asm-x86/mm.h	Thu Sep 21 09:37:28 2006 +0100
     3.2 +++ b/xen/include/asm-x86/mm.h	Thu Sep 21 10:47:05 2006 +0100
     3.3 @@ -51,18 +51,19 @@ struct page_info
     3.4      } u;
     3.5  
     3.6      union {
     3.7 -        /* Timestamp from 'TLB clock', used to reduce need for safety
     3.8 -         * flushes.  Only valid on a) free pages, and b) guest pages with a
     3.9 -         * zero type count. */
    3.10 +        /*
    3.11 +         * Timestamp from 'TLB clock', used to avoid extra safety flushes.
    3.12 +         * Only valid for: a) free pages, and b) pages with zero type count
    3.13 +         * (except page table pages when the guest is in shadow mode).
    3.14 +         */
    3.15          u32 tlbflush_timestamp;
    3.16  
    3.17 -        /* Only used on guest pages with a shadow.
    3.18 -         * Guest pages with a shadow must have a non-zero type count, so this
    3.19 -         * does not conflict with the tlbflush timestamp. */
    3.20 +        /*
    3.21 +         * Guest pages with a shadow. This does not conflict with
    3.22 +         * tlbflush_timestamp since page table pages are explicitly not
    3.23 +         * tracked for TLB-flush avoidance when a guest runs in shadow mode.
    3.24 +         */
    3.25          u32 shadow_flags;
    3.26 -
    3.27 -        // XXX -- we expect to add another field here, to be used for min/max
    3.28 -        // purposes, which is only used for shadow pages.
    3.29      };
    3.30  };
    3.31  
     4.1 --- a/xen/include/asm-x86/shadow.h	Thu Sep 21 09:37:28 2006 +0100
     4.2 +++ b/xen/include/asm-x86/shadow.h	Thu Sep 21 10:47:05 2006 +0100
     4.3 @@ -325,24 +325,19 @@ void shadow_final_teardown(struct domain
     4.4  void sh_do_mark_dirty(struct domain *d, mfn_t gmfn);
     4.5  static inline void mark_dirty(struct domain *d, unsigned long gmfn)
     4.6  {
     4.7 -    int caller_locked;
     4.8 -
     4.9 -    if ( unlikely(d == NULL) || likely(!shadow_mode_log_dirty(d)) )
    4.10 +    if ( likely(!shadow_mode_log_dirty(d)) )
    4.11          return;
    4.12  
    4.13 -    caller_locked = shadow_lock_is_acquired(d);
    4.14 -    if ( !caller_locked )
    4.15 -        shadow_lock(d);
    4.16 +    shadow_lock(d);
    4.17      sh_do_mark_dirty(d, _mfn(gmfn));
    4.18 -    if ( !caller_locked )
    4.19 -        shadow_unlock(d);
    4.20 +    shadow_unlock(d);
    4.21  }
    4.22  
    4.23  /* Internal version, for when the shadow lock is already held */
    4.24  static inline void sh_mark_dirty(struct domain *d, mfn_t gmfn)
    4.25  {
    4.26      ASSERT(shadow_lock_is_acquired(d));
    4.27 -    if ( shadow_mode_log_dirty(d) )
    4.28 +    if ( unlikely(shadow_mode_log_dirty(d)) )
    4.29          sh_do_mark_dirty(d, gmfn);
    4.30  }
    4.31