ia64/xen-unstable

changeset 19729:43833a6d50a5

x86: hap dirty vram tracking

Currently HAP systems suffer a significant performance loss when a vnc
client is connect or the sdl interface is used, because HAP is lacking
an implementation of track_dirty_vram.
As a consequence qemu always tries to update the whole screen because
it does not know which areas of the screen have been updated by the
guest.

This patch implements track_dirty_vram for HAP enabling the logdirty
mechanism only in a specific gfn range and adding a
paging_log_dirty_range function that returns the log dirty bitmap in a
requested range.

Paging_log_dirty_range is different from paging_log_dirty_op because
operates on a range and also because it does not pause the domain. In
order not to lose any update I moved clean_dirty_bitmap at the
beginning of the function before evaluating the logdirty bitmap.
The bitmap is still safe because it is protected by the logdirty lock.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 04 22:25:10 2009 +0100 (2009-06-04)
parents 50cf07f42fdd
children 687040d3f342
files xen/arch/x86/hvm/hvm.c xen/arch/x86/mm/hap/hap.c xen/arch/x86/mm/paging.c xen/arch/x86/mm/shadow/private.h xen/include/asm-x86/hap.h xen/include/asm-x86/paging.h
line diff
     1.1 --- a/xen/arch/x86/hvm/hvm.c	Thu Jun 04 10:57:39 2009 +0100
     1.2 +++ b/xen/arch/x86/hvm/hvm.c	Thu Jun 04 22:25:10 2009 +0100
     1.3 @@ -34,6 +34,7 @@
     1.4  #include <xen/event.h>
     1.5  #include <xen/paging.h>
     1.6  #include <asm/shadow.h>
     1.7 +#include <asm/hap.h>
     1.8  #include <asm/current.h>
     1.9  #include <asm/e820.h>
    1.10  #include <asm/io.h>
    1.11 @@ -2653,12 +2654,13 @@ long do_hvm_op(unsigned long op, XEN_GUE
    1.12              goto param_fail2;
    1.13  
    1.14          rc = -EINVAL;
    1.15 -        if ( !shadow_mode_enabled(d))
    1.16 -            goto param_fail2;
    1.17          if ( d->vcpu[0] == NULL )
    1.18              goto param_fail2;
    1.19  
    1.20 -        rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
    1.21 +        if ( shadow_mode_enabled(d) )
    1.22 +            rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
    1.23 +        else
    1.24 +            rc = hap_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
    1.25  
    1.26      param_fail2:
    1.27          rcu_unlock_domain(d);
     2.1 --- a/xen/arch/x86/mm/hap/hap.c	Thu Jun 04 10:57:39 2009 +0100
     2.2 +++ b/xen/arch/x86/mm/hap/hap.c	Thu Jun 04 22:25:10 2009 +0100
     2.3 @@ -52,8 +52,140 @@
     2.4  #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
     2.5  
     2.6  /************************************************/
     2.7 +/*          HAP VRAM TRACKING SUPPORT           */
     2.8 +/************************************************/
     2.9 +
    2.10 +int hap_enable_vram_tracking(struct domain *d)
    2.11 +{
    2.12 +    int i;
    2.13 +
    2.14 +    if ( !d->dirty_vram )
    2.15 +        return -EINVAL;
    2.16 +
    2.17 +    /* turn on PG_log_dirty bit in paging mode */
    2.18 +    hap_lock(d);
    2.19 +    d->arch.paging.mode |= PG_log_dirty;
    2.20 +    hap_unlock(d);
    2.21 +
    2.22 +    /* set l1e entries of P2M table to be read-only. */
    2.23 +    for (i = d->dirty_vram->begin_pfn; i < d->dirty_vram->end_pfn; i++)
    2.24 +        p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty);
    2.25 +
    2.26 +    flush_tlb_mask(&d->domain_dirty_cpumask);
    2.27 +    return 0;
    2.28 +}
    2.29 +
    2.30 +int hap_disable_vram_tracking(struct domain *d)
    2.31 +{
    2.32 +    int i;
    2.33 +
    2.34 +    if ( !d->dirty_vram )
    2.35 +        return -EINVAL;
    2.36 +
    2.37 +    hap_lock(d);
    2.38 +    d->arch.paging.mode &= ~PG_log_dirty;
    2.39 +    hap_unlock(d);
    2.40 +
    2.41 +    /* set l1e entries of P2M table with normal mode */
    2.42 +    for (i = d->dirty_vram->begin_pfn; i < d->dirty_vram->end_pfn; i++)
    2.43 +        p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty);
    2.44 +
    2.45 +    flush_tlb_mask(&d->domain_dirty_cpumask);
    2.46 +    return 0;
    2.47 +}
    2.48 +
    2.49 +void hap_clean_vram_tracking(struct domain *d)
    2.50 +{
    2.51 +    int i;
    2.52 +
    2.53 +    if ( !d->dirty_vram )
    2.54 +        return;
    2.55 +
    2.56 +    /* set l1e entries of P2M table to be read-only. */
    2.57 +    for (i = d->dirty_vram->begin_pfn; i < d->dirty_vram->end_pfn; i++)
    2.58 +        p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty);
    2.59 +
    2.60 +    flush_tlb_mask(&d->domain_dirty_cpumask);
    2.61 +}
    2.62 +
    2.63 +void hap_vram_tracking_init(struct domain *d)
    2.64 +{
    2.65 +    paging_log_dirty_init(d, hap_enable_vram_tracking,
    2.66 +                          hap_disable_vram_tracking,
    2.67 +                          hap_clean_vram_tracking);
    2.68 +}
    2.69 +
    2.70 +int hap_track_dirty_vram(struct domain *d,
    2.71 +                         unsigned long begin_pfn,
    2.72 +                         unsigned long nr,
    2.73 +                         XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
    2.74 +{
    2.75 +    long rc = 0;
    2.76 +
    2.77 +    if ( nr )
    2.78 +    {
    2.79 +        if ( paging_mode_log_dirty(d) && d->dirty_vram )
    2.80 +        {
    2.81 +            if ( begin_pfn != d->dirty_vram->begin_pfn ||
    2.82 +                 begin_pfn + nr != d->dirty_vram->end_pfn )
    2.83 +            {
    2.84 +                paging_log_dirty_disable(d);
    2.85 +                d->dirty_vram->begin_pfn = begin_pfn;
    2.86 +                d->dirty_vram->end_pfn = begin_pfn + nr;
    2.87 +                rc = paging_log_dirty_enable(d);
    2.88 +                if (rc != 0)
    2.89 +                    goto param_fail;
    2.90 +            }
    2.91 +        }
    2.92 +        else if ( !paging_mode_log_dirty(d) && !d->dirty_vram )
    2.93 +        {
    2.94 +            rc -ENOMEM;
    2.95 +            if ( (d->dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
    2.96 +                goto param_fail;
    2.97 +
    2.98 +            d->dirty_vram->begin_pfn = begin_pfn;
    2.99 +            d->dirty_vram->end_pfn = begin_pfn + nr;
   2.100 +            hap_vram_tracking_init(d);
   2.101 +            rc = paging_log_dirty_enable(d);
   2.102 +            if (rc != 0)
   2.103 +                goto param_fail;
   2.104 +        }
   2.105 +        else
   2.106 +        {
   2.107 +            if ( !paging_mode_log_dirty(d) && d->dirty_vram )
   2.108 +                rc = -EINVAL;
   2.109 +            else
   2.110 +                rc = -ENODATA;
   2.111 +            goto param_fail;
   2.112 +        }
   2.113 +        /* get the bitmap */
   2.114 +        rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
   2.115 +    }
   2.116 +    else
   2.117 +    {
   2.118 +        if ( paging_mode_log_dirty(d) && d->dirty_vram ) {
   2.119 +            rc = paging_log_dirty_disable(d);
   2.120 +            xfree(d->dirty_vram);
   2.121 +            d->dirty_vram = NULL;
   2.122 +        } else
   2.123 +            rc = 0;
   2.124 +    }
   2.125 +
   2.126 +    return rc;
   2.127 +
   2.128 +param_fail:
   2.129 +    if ( d->dirty_vram )
   2.130 +    {
   2.131 +        xfree(d->dirty_vram);
   2.132 +        d->dirty_vram = NULL;
   2.133 +    }
   2.134 +    return rc;
   2.135 +}
   2.136 +
   2.137 +/************************************************/
   2.138  /*            HAP LOG DIRTY SUPPORT             */
   2.139  /************************************************/
   2.140 +
   2.141  /* hap code to call when log_dirty is enable. return 0 if no problem found. */
   2.142  int hap_enable_log_dirty(struct domain *d)
   2.143  {
   2.144 @@ -86,6 +218,21 @@ void hap_clean_dirty_bitmap(struct domai
   2.145      flush_tlb_mask(&d->domain_dirty_cpumask);
   2.146  }
   2.147  
   2.148 +void hap_logdirty_init(struct domain *d)
   2.149 +{
   2.150 +    if ( paging_mode_log_dirty(d) && d->dirty_vram )
   2.151 +    {
   2.152 +        paging_log_dirty_disable(d);
   2.153 +        xfree(d->dirty_vram);
   2.154 +        d->dirty_vram = NULL;
   2.155 +    }
   2.156 +
   2.157 +    /* Reinitialize logdirty mechanism */
   2.158 +    paging_log_dirty_init(d, hap_enable_log_dirty,
   2.159 +                          hap_disable_log_dirty,
   2.160 +                          hap_clean_dirty_bitmap);
   2.161 +}
   2.162 +
   2.163  /************************************************/
   2.164  /*             HAP SUPPORT FUNCTIONS            */
   2.165  /************************************************/
   2.166 @@ -390,10 +537,6 @@ void hap_domain_init(struct domain *d)
   2.167  {
   2.168      hap_lock_init(d);
   2.169      INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
   2.170 -
   2.171 -    /* This domain will use HAP for log-dirty mode */
   2.172 -    paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty,
   2.173 -                          hap_clean_dirty_bitmap);
   2.174  }
   2.175  
   2.176  /* return 0 for success, -errno for failure */
     3.1 --- a/xen/arch/x86/mm/paging.c	Thu Jun 04 10:57:39 2009 +0100
     3.2 +++ b/xen/arch/x86/mm/paging.c	Thu Jun 04 22:25:10 2009 +0100
     3.3 @@ -453,6 +453,157 @@ int paging_log_dirty_op(struct domain *d
     3.4      return rv;
     3.5  }
     3.6  
     3.7 +int paging_log_dirty_range(struct domain *d,
     3.8 +                            unsigned long begin_pfn,
     3.9 +                            unsigned long nr,
    3.10 +                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
    3.11 +{
    3.12 +    int rv = 0;
    3.13 +    unsigned long pages = 0;
    3.14 +    mfn_t *l4, *l3, *l2;
    3.15 +    unsigned long *l1;
    3.16 +    int b1, b2, b3, b4;
    3.17 +    int i2, i3, i4;
    3.18 +
    3.19 +    d->arch.paging.log_dirty.clean_dirty_bitmap(d);
    3.20 +    log_dirty_lock(d);
    3.21 +
    3.22 +    PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n",
    3.23 +                 d->domain_id,
    3.24 +                 d->arch.paging.log_dirty.fault_count,
    3.25 +                 d->arch.paging.log_dirty.dirty_count);
    3.26 +
    3.27 +    if ( !mfn_valid(d->arch.paging.log_dirty.top) )
    3.28 +    {
    3.29 +        rv = -EINVAL; /* perhaps should be ENOMEM? */
    3.30 +        goto out;
    3.31 +    }
    3.32 +
    3.33 +    if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
    3.34 +        printk("%s: %d failed page allocs while logging dirty pages\n",
    3.35 +               __FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
    3.36 +        rv = -ENOMEM;
    3.37 +        goto out;
    3.38 +    }
    3.39 +
    3.40 +    if ( !d->arch.paging.log_dirty.fault_count &&
    3.41 +         !d->arch.paging.log_dirty.dirty_count ) {
    3.42 +        int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
    3.43 +        unsigned long zeroes[size];
    3.44 +        memset(zeroes, 0x00, size * BYTES_PER_LONG);
    3.45 +        rv = 0;
    3.46 +        if ( copy_to_guest_offset(dirty_bitmap, 0, (uint8_t *) zeroes,
    3.47 +                                  size * BYTES_PER_LONG) != 0 )
    3.48 +            rv = -EFAULT;
    3.49 +        goto out;
    3.50 +    }
    3.51 +    d->arch.paging.log_dirty.fault_count = 0;
    3.52 +    d->arch.paging.log_dirty.dirty_count = 0;
    3.53 +
    3.54 +    b1 = L1_LOGDIRTY_IDX(begin_pfn);
    3.55 +    b2 = L2_LOGDIRTY_IDX(begin_pfn);
    3.56 +    b3 = L3_LOGDIRTY_IDX(begin_pfn);
    3.57 +    b4 = L4_LOGDIRTY_IDX(begin_pfn);
    3.58 +    l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
    3.59 +
    3.60 +    for ( i4 = b4;
    3.61 +          (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES);
    3.62 +          i4++ )
    3.63 +    {
    3.64 +        l3 = mfn_valid(l4[i4]) ? map_domain_page(mfn_x(l4[i4])) : NULL;
    3.65 +        for ( i3 = b3;
    3.66 +              (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES);
    3.67 +              i3++ )
    3.68 +        {
    3.69 +            l2 = ((l3 && mfn_valid(l3[i3])) ?
    3.70 +                  map_domain_page(mfn_x(l3[i3])) : NULL);
    3.71 +            for ( i2 = b2;
    3.72 +                  (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES);
    3.73 +                  i2++ )
    3.74 +            {
    3.75 +                static unsigned long zeroes[PAGE_SIZE/BYTES_PER_LONG];
    3.76 +                unsigned int bytes = PAGE_SIZE;
    3.77 +                uint8_t *s;
    3.78 +                l1 = ((l2 && mfn_valid(l2[i2])) ?
    3.79 +                      map_domain_page(mfn_x(l2[i2])) : zeroes);
    3.80 +
    3.81 +                s = ((uint8_t*)l1) + (b1 >> 3);
    3.82 +                bytes -= b1 >> 3;
    3.83 +
    3.84 +                if ( likely(((nr - pages + 7) >> 3) < bytes) )
    3.85 +                    bytes = (unsigned int)((nr - pages + 7) >> 3);
    3.86 +
    3.87 +                /* begin_pfn is not 32K aligned, hence we have to bit
    3.88 +                 * shift the bitmap */
    3.89 +                if ( b1 & 0x7 )
    3.90 +                {
    3.91 +                    int i, j;
    3.92 +                    uint32_t *l = (uint32_t*) s;
    3.93 +                    int bits = b1 & 0x7;
    3.94 +                    int bitmask = (1 << bits) - 1;
    3.95 +                    int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG;
    3.96 +                    unsigned long bitmap[size];
    3.97 +                    static unsigned long printed = 0;
    3.98 +
    3.99 +                    if ( printed != begin_pfn )
   3.100 +                    {
   3.101 +                        dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n",
   3.102 +                                __FUNCTION__, begin_pfn);
   3.103 +                        printed = begin_pfn;
   3.104 +                    }
   3.105 +
   3.106 +                    for ( i = 0; i < size - 1; i++, l++ ) {
   3.107 +                        bitmap[i] = ((*l) >> bits) |
   3.108 +                            (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits));
   3.109 +                    }
   3.110 +                    s = (uint8_t*) l;
   3.111 +                    size = BYTES_PER_LONG - ((b1 >> 3) & 0x3);
   3.112 +                    bitmap[i] = 0;
   3.113 +                    for ( j = 0; j < size; j++, s++ )
   3.114 +                        bitmap[i] |= (*s) << (j * 8);
   3.115 +                    bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits));
   3.116 +                    if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3),
   3.117 +                                (uint8_t*) bitmap, bytes) != 0 )
   3.118 +                    {
   3.119 +                        rv = -EFAULT;
   3.120 +                        goto out;
   3.121 +                    }
   3.122 +                }
   3.123 +                else
   3.124 +                {
   3.125 +                    if ( copy_to_guest_offset(dirty_bitmap, pages >> 3,
   3.126 +                                              s, bytes) != 0 )
   3.127 +                    {
   3.128 +                        rv = -EFAULT;
   3.129 +                        goto out;
   3.130 +                    }
   3.131 +                }
   3.132 +
   3.133 +                if ( l1 != zeroes )
   3.134 +                    clear_page(l1);
   3.135 +                pages += bytes << 3;
   3.136 +                if ( l1 != zeroes )
   3.137 +                    unmap_domain_page(l1);
   3.138 +                b1 = b1 & 0x7;
   3.139 +            }
   3.140 +            b2 = 0;
   3.141 +            if ( l2 )
   3.142 +                unmap_domain_page(l2);
   3.143 +        }
   3.144 +        b3 = 0;
   3.145 +        if ( l3 )
   3.146 +            unmap_domain_page(l3);
   3.147 +    }
   3.148 +    unmap_domain_page(l4);
   3.149 +
   3.150 +    log_dirty_unlock(d);
   3.151 +
   3.152 +    return rv;
   3.153 +
   3.154 + out:
   3.155 +    log_dirty_unlock(d);
   3.156 +    return rv;
   3.157 +}
   3.158  
   3.159  /* Note that this function takes three function pointers. Callers must supply
   3.160   * these functions for log dirty code to call. This function usually is
   3.161 @@ -554,11 +705,17 @@ int paging_domctl(struct domain *d, xen_
   3.162      switch ( sc->op )
   3.163      {
   3.164      case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
   3.165 +        if ( hap_enabled(d) )
   3.166 +            hap_logdirty_init(d);
   3.167          return paging_log_dirty_enable(d);
   3.168  
   3.169      case XEN_DOMCTL_SHADOW_OP_ENABLE:
   3.170          if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
   3.171 +        {
   3.172 +            if ( hap_enabled(d) )
   3.173 +                hap_logdirty_init(d);
   3.174              return paging_log_dirty_enable(d);
   3.175 +        }
   3.176  
   3.177      case XEN_DOMCTL_SHADOW_OP_OFF:
   3.178          if ( paging_mode_log_dirty(d) )
     4.1 --- a/xen/arch/x86/mm/shadow/private.h	Thu Jun 04 10:57:39 2009 +0100
     4.2 +++ b/xen/arch/x86/mm/shadow/private.h	Thu Jun 04 22:25:10 2009 +0100
     4.3 @@ -590,17 +590,6 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 
     4.4  }
     4.5  
     4.6  /**************************************************************************/
     4.7 -/* VRAM dirty tracking support */
     4.8 -
     4.9 -struct sh_dirty_vram {
    4.10 -    unsigned long begin_pfn;
    4.11 -    unsigned long end_pfn;
    4.12 -    paddr_t *sl1ma;
    4.13 -    uint8_t *dirty_bitmap;
    4.14 -    s_time_t last_dirty;
    4.15 -};
    4.16 -
    4.17 -/**************************************************************************/
    4.18  /* Shadow-page refcounting. */
    4.19  
    4.20  void sh_destroy_shadow(struct vcpu *v, mfn_t smfn);
     5.1 --- a/xen/include/asm-x86/hap.h	Thu Jun 04 10:57:39 2009 +0100
     5.2 +++ b/xen/include/asm-x86/hap.h	Thu Jun 04 22:25:10 2009 +0100
     5.3 @@ -91,6 +91,11 @@ int   hap_enable(struct domain *d, u32 m
     5.4  void  hap_final_teardown(struct domain *d);
     5.5  void  hap_teardown(struct domain *d);
     5.6  void  hap_vcpu_init(struct vcpu *v);
     5.7 +void  hap_logdirty_init(struct domain *d);
     5.8 +int   hap_track_dirty_vram(struct domain *d,
     5.9 +                           unsigned long begin_pfn,
    5.10 +                           unsigned long nr,
    5.11 +                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
    5.12  
    5.13  extern struct paging_mode hap_paging_real_mode;
    5.14  extern struct paging_mode hap_paging_protected_mode;
     6.1 --- a/xen/include/asm-x86/paging.h	Thu Jun 04 10:57:39 2009 +0100
     6.2 +++ b/xen/include/asm-x86/paging.h	Thu Jun 04 22:25:10 2009 +0100
     6.3 @@ -139,6 +139,12 @@ int paging_alloc_log_dirty_bitmap(struct
     6.4  /* free log dirty bitmap resource */
     6.5  void paging_free_log_dirty_bitmap(struct domain *d);
     6.6  
     6.7 +/* get the dirty bitmap for a specific range of pfns */
     6.8 +int paging_log_dirty_range(struct domain *d,
     6.9 +                           unsigned long begin_pfn,
    6.10 +                           unsigned long nr,
    6.11 +                           XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
    6.12 +
    6.13  /* enable log dirty */
    6.14  int paging_log_dirty_enable(struct domain *d);
    6.15  
    6.16 @@ -176,6 +182,15 @@ void paging_mark_dirty(struct domain *d,
    6.17  #define L4_LOGDIRTY_IDX(pfn) 0
    6.18  #endif
    6.19  
    6.20 +/* VRAM dirty tracking support */
    6.21 +struct sh_dirty_vram {
    6.22 +    unsigned long begin_pfn;
    6.23 +    unsigned long end_pfn;
    6.24 +    paddr_t *sl1ma;
    6.25 +    uint8_t *dirty_bitmap;
    6.26 +    s_time_t last_dirty;
    6.27 +};
    6.28 +
    6.29  /*****************************************************************************
    6.30   * Entry points into the paging-assistance code */
    6.31