ia64/xen-unstable

changeset 17571:b6aa55ca599e

shadow: track video RAM dirty bits

This adds a new HVM op that enables tracking dirty bits of a range of
video RAM. The idea is to optimize just for the most common case
(only one guest mapping, with sometimes some temporary other
mappings), which permits to keep the overhead on shadow as low as
possible.

Signed-off-by: Samuel Thibault <samuel.thibault@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri May 02 15:08:27 2008 +0100 (2008-05-02)
parents cd5fa4e7993f
children 6bd4625a20ee
files tools/ioemu/hw/cirrus_vga.c tools/ioemu/hw/vga.c tools/ioemu/hw/vga_int.h tools/libxc/xc_misc.c tools/libxc/xenctrl.h xen/arch/ia64/vmx/vmx_hypercall.c xen/arch/x86/hvm/hvm.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/private.h xen/include/asm-ia64/config.h xen/include/asm-powerpc/types.h xen/include/asm-x86/shadow.h xen/include/asm-x86/types.h xen/include/public/hvm/hvm_op.h xen/include/xen/sched.h
line diff
     1.1 --- a/tools/ioemu/hw/cirrus_vga.c	Fri May 02 14:35:27 2008 +0100
     1.2 +++ b/tools/ioemu/hw/cirrus_vga.c	Fri May 02 15:08:27 2008 +0100
     1.3 @@ -234,8 +234,6 @@ typedef struct CirrusVGAState {
     1.4      int cirrus_linear_io_addr;
     1.5      int cirrus_linear_bitblt_io_addr;
     1.6      int cirrus_mmio_io_addr;
     1.7 -    unsigned long cirrus_lfb_addr;
     1.8 -    unsigned long cirrus_lfb_end;
     1.9      uint32_t cirrus_addr_mask;
    1.10      uint32_t linear_mmio_mask;
    1.11      uint8_t cirrus_shadow_gr0;
    1.12 @@ -2657,11 +2655,11 @@ static void cirrus_update_memory_access(
    1.13          
    1.14  	mode = s->gr[0x05] & 0x7;
    1.15  	if (mode < 4 || mode > 5 || ((s->gr[0x0B] & 0x4) == 0)) {
    1.16 -            if (s->cirrus_lfb_addr && s->cirrus_lfb_end && !s->map_addr) {
    1.17 +            if (s->lfb_addr && s->lfb_end && !s->map_addr) {
    1.18                  void *vram_pointer, *old_vram;
    1.19  
    1.20 -                vram_pointer = set_vram_mapping(s->cirrus_lfb_addr,
    1.21 -                                                s->cirrus_lfb_end);
    1.22 +                vram_pointer = set_vram_mapping(s->lfb_addr,
    1.23 +                                                s->lfb_end);
    1.24                  if (!vram_pointer)
    1.25                      fprintf(stderr, "NULL vram_pointer\n");
    1.26                  else {
    1.27 @@ -2669,21 +2667,21 @@ static void cirrus_update_memory_access(
    1.28                                                 VGA_RAM_SIZE);
    1.29                      qemu_free(old_vram);
    1.30                  }
    1.31 -                s->map_addr = s->cirrus_lfb_addr;
    1.32 -                s->map_end = s->cirrus_lfb_end;
    1.33 +                s->map_addr = s->lfb_addr;
    1.34 +                s->map_end = s->lfb_end;
    1.35              }
    1.36              s->cirrus_linear_write[0] = cirrus_linear_mem_writeb;
    1.37              s->cirrus_linear_write[1] = cirrus_linear_mem_writew;
    1.38              s->cirrus_linear_write[2] = cirrus_linear_mem_writel;
    1.39          } else {
    1.40          generic_io:
    1.41 -            if (s->cirrus_lfb_addr && s->cirrus_lfb_end && s->map_addr) {
    1.42 +            if (s->lfb_addr && s->lfb_end && s->map_addr) {
    1.43                  void *old_vram;
    1.44  
    1.45                  old_vram = vga_update_vram((VGAState *)s, NULL, VGA_RAM_SIZE);
    1.46  
    1.47 -                unset_vram_mapping(s->cirrus_lfb_addr,
    1.48 -                                   s->cirrus_lfb_end, 
    1.49 +                unset_vram_mapping(s->lfb_addr,
    1.50 +                                   s->lfb_end, 
    1.51                                     old_vram);
    1.52  
    1.53                  s->map_addr = s->map_end = 0;
    1.54 @@ -3049,27 +3047,27 @@ void cirrus_stop_acc(CirrusVGAState *s)
    1.55      if (s->map_addr){
    1.56          int error;
    1.57          s->map_addr = 0;
    1.58 -        error = unset_vram_mapping(s->cirrus_lfb_addr,
    1.59 -                s->cirrus_lfb_end, s->vram_ptr);
    1.60 +        error = unset_vram_mapping(s->lfb_addr,
    1.61 +                s->lfb_end, s->vram_ptr);
    1.62          fprintf(stderr, "cirrus_stop_acc:unset_vram_mapping.\n");
    1.63      }
    1.64  }
    1.65  
    1.66  void cirrus_restart_acc(CirrusVGAState *s)
    1.67  {
    1.68 -    if (s->cirrus_lfb_addr && s->cirrus_lfb_end) {
    1.69 +    if (s->lfb_addr && s->lfb_end) {
    1.70          void *vram_pointer, *old_vram;
    1.71          fprintf(stderr, "cirrus_vga_load:re-enable vga acc.lfb_addr=0x%lx, lfb_end=0x%lx.\n",
    1.72 -                s->cirrus_lfb_addr, s->cirrus_lfb_end);
    1.73 -        vram_pointer = set_vram_mapping(s->cirrus_lfb_addr ,s->cirrus_lfb_end);
    1.74 +                s->lfb_addr, s->lfb_end);
    1.75 +        vram_pointer = set_vram_mapping(s->lfb_addr ,s->lfb_end);
    1.76          if (!vram_pointer){
    1.77              fprintf(stderr, "cirrus_vga_load:NULL vram_pointer\n");
    1.78          } else {
    1.79              old_vram = vga_update_vram((VGAState *)s, vram_pointer,
    1.80                      VGA_RAM_SIZE);
    1.81              qemu_free(old_vram);
    1.82 -            s->map_addr = s->cirrus_lfb_addr;
    1.83 -            s->map_end = s->cirrus_lfb_end;
    1.84 +            s->map_addr = s->lfb_addr;
    1.85 +            s->map_end = s->lfb_end;
    1.86          }
    1.87      }
    1.88  }
    1.89 @@ -3120,8 +3118,8 @@ static void cirrus_vga_save(QEMUFile *f,
    1.90  
    1.91      vga_acc = (!!s->map_addr);
    1.92      qemu_put_8s(f, &vga_acc);
    1.93 -    qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
    1.94 -    qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
    1.95 +    qemu_put_be64s(f, (uint64_t*)&s->lfb_addr);
    1.96 +    qemu_put_be64s(f, (uint64_t*)&s->lfb_end);
    1.97      qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 
    1.98  }
    1.99  
   1.100 @@ -3175,8 +3173,8 @@ static int cirrus_vga_load(QEMUFile *f, 
   1.101      qemu_get_be32s(f, &s->hw_cursor_y);
   1.102  
   1.103      qemu_get_8s(f, &vga_acc);
   1.104 -    qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
   1.105 -    qemu_get_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
   1.106 +    qemu_get_be64s(f, (uint64_t*)&s->lfb_addr);
   1.107 +    qemu_get_be64s(f, (uint64_t*)&s->lfb_end);
   1.108      qemu_get_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 
   1.109      if (vga_acc){
   1.110          cirrus_restart_acc(s);
   1.111 @@ -3337,11 +3335,11 @@ static void cirrus_pci_lfb_map(PCIDevice
   1.112      /* XXX: add byte swapping apertures */
   1.113      cpu_register_physical_memory(addr, s->vram_size,
   1.114  				 s->cirrus_linear_io_addr);
   1.115 -    s->cirrus_lfb_addr = addr;
   1.116 -    s->cirrus_lfb_end = addr + VGA_RAM_SIZE;
   1.117 -
   1.118 -    if (s->map_addr && (s->cirrus_lfb_addr != s->map_addr) &&
   1.119 -        (s->cirrus_lfb_end != s->map_end))
   1.120 +    s->lfb_addr = addr;
   1.121 +    s->lfb_end = addr + VGA_RAM_SIZE;
   1.122 +
   1.123 +    if (s->map_addr && (s->lfb_addr != s->map_addr) &&
   1.124 +        (s->lfb_end != s->map_end))
   1.125          fprintf(logfile, "cirrus vga map change while on lfb mode\n");
   1.126  
   1.127      cpu_register_physical_memory(addr + 0x1000000, 0x400000,
     2.1 --- a/tools/ioemu/hw/vga.c	Fri May 02 14:35:27 2008 +0100
     2.2 +++ b/tools/ioemu/hw/vga.c	Fri May 02 15:08:27 2008 +0100
     2.3 @@ -1086,6 +1086,9 @@ static void vga_draw_text(VGAState *s, i
     2.4      vga_draw_glyph8_func *vga_draw_glyph8;
     2.5      vga_draw_glyph9_func *vga_draw_glyph9;
     2.6  
     2.7 +    /* Disable dirty bit tracking */
     2.8 +    xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL);
     2.9 +
    2.10      if (s->ds->dpy_colourdepth != NULL && s->ds->depth != 0)
    2.11          s->ds->dpy_colourdepth(s->ds, 0);
    2.12      s->rgb_to_pixel = 
    2.13 @@ -1485,7 +1488,7 @@ void check_sse2(void)
    2.14  static void vga_draw_graphic(VGAState *s, int full_update)
    2.15  {
    2.16      int y1, y, update, linesize, y_start, double_scan, mask, depth;
    2.17 -    int width, height, shift_control, line_offset, bwidth, ds_depth;
    2.18 +    int width, height, shift_control, line_offset, bwidth, ds_depth, bits;
    2.19      ram_addr_t page0, page1;
    2.20      int disp_width, multi_scan, multi_run;
    2.21      uint8_t *d;
    2.22 @@ -1533,6 +1536,7 @@ static void vga_draw_graphic(VGAState *s
    2.23          } else {
    2.24              v = VGA_DRAW_LINE4;
    2.25          }
    2.26 +        bits = 4;
    2.27      } else if (shift_control == 1) {
    2.28          full_update |= update_palette16(s);
    2.29          if (s->sr[0x01] & 8) {
    2.30 @@ -1541,28 +1545,35 @@ static void vga_draw_graphic(VGAState *s
    2.31          } else {
    2.32              v = VGA_DRAW_LINE2;
    2.33          }
    2.34 +        bits = 4;
    2.35      } else {
    2.36          switch(s->get_bpp(s)) {
    2.37          default:
    2.38          case 0:
    2.39              full_update |= update_palette256(s);
    2.40              v = VGA_DRAW_LINE8D2;
    2.41 +            bits = 4;
    2.42              break;
    2.43          case 8:
    2.44              full_update |= update_palette256(s);
    2.45              v = VGA_DRAW_LINE8;
    2.46 +            bits = 8;
    2.47              break;
    2.48          case 15:
    2.49              v = VGA_DRAW_LINE15;
    2.50 +            bits = 16;
    2.51              break;
    2.52          case 16:
    2.53              v = VGA_DRAW_LINE16;
    2.54 +            bits = 16;
    2.55              break;
    2.56          case 24:
    2.57              v = VGA_DRAW_LINE24;
    2.58 +            bits = 24;
    2.59              break;
    2.60          case 32:
    2.61              v = VGA_DRAW_LINE32;
    2.62 +            bits = 32;
    2.63              break;
    2.64          }
    2.65      }
    2.66 @@ -1590,12 +1601,72 @@ static void vga_draw_graphic(VGAState *s
    2.67             width, height, v, line_offset, s->cr[9], s->cr[0x17], s->line_compare, s->sr[0x01]);
    2.68  #endif
    2.69  
    2.70 -    for (y = 0; y < s->vram_size; y += TARGET_PAGE_SIZE)
    2.71 -        if (vram_dirty(s, y, TARGET_PAGE_SIZE))
    2.72 +    y = 0;
    2.73 +
    2.74 +    if (height - 1 > s->line_compare || multi_run || (s->cr[0x17] & 3) != 3
    2.75 +            || !s->lfb_addr) {
    2.76 +        /* Tricky things happen, disable dirty bit tracking */
    2.77 +        xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL);
    2.78 +
    2.79 +        for ( ; y < s->vram_size; y += TARGET_PAGE_SIZE)
    2.80 +            if (vram_dirty(s, y, TARGET_PAGE_SIZE))
    2.81 +                cpu_physical_memory_set_dirty(s->vram_offset + y);
    2.82 +    } else {
    2.83 +        /* Tricky things won't have any effect, i.e. we are in the very simple
    2.84 +         * (and very usual) case of a linear buffer. */
    2.85 +        unsigned long end;
    2.86 +
    2.87 +        for ( ; y < ((s->start_addr * 4) & TARGET_PAGE_MASK); y += TARGET_PAGE_SIZE)
    2.88 +            /* We will not read that anyway. */
    2.89              cpu_physical_memory_set_dirty(s->vram_offset + y);
    2.90  
    2.91 +        if (y < (s->start_addr * 4)) {
    2.92 +            /* start address not aligned on a page, track dirtyness by hand. */
    2.93 +            if (vram_dirty(s, y, TARGET_PAGE_SIZE))
    2.94 +                cpu_physical_memory_set_dirty(s->vram_offset + y);
    2.95 +            y += TARGET_PAGE_SIZE;
    2.96 +        }
    2.97 +
    2.98 +        /* use page table dirty bit tracking for the inner of the LFB */
    2.99 +        end = s->start_addr * 4 + height * line_offset;
   2.100 +        {
   2.101 +            unsigned long npages = ((end & TARGET_PAGE_MASK) - y) / TARGET_PAGE_SIZE;
   2.102 +            const int width = sizeof(unsigned long) * 8;
   2.103 +            unsigned long bitmap[(npages + width - 1) / width];
   2.104 +            int err;
   2.105 +
   2.106 +            if (!(err = xc_hvm_track_dirty_vram(xc_handle, domid,
   2.107 +                        (s->lfb_addr + y) / TARGET_PAGE_SIZE, npages, bitmap))) {
   2.108 +                int i, j;
   2.109 +                for (i = 0; i < sizeof(bitmap) / sizeof(*bitmap); i++) {
   2.110 +                    unsigned long map = bitmap[i];
   2.111 +                    for (j = i * width; map && j < npages; map >>= 1, j++)
   2.112 +                        if (map & 1)
   2.113 +                            cpu_physical_memory_set_dirty(s->vram_offset + y
   2.114 +                                + j * TARGET_PAGE_SIZE);
   2.115 +                }
   2.116 +                y += npages * TARGET_PAGE_SIZE;
   2.117 +            } else {
   2.118 +                /* ENODATA just means we have changed mode and will succeed
   2.119 +                 * next time */
   2.120 +                if (err != -ENODATA)
   2.121 +                    fprintf(stderr, "track_dirty_vram(%lx, %lx) failed (%d)\n", s->lfb_addr + y, npages, err);
   2.122 +            }
   2.123 +        }
   2.124 +
   2.125 +        for ( ; y < s->vram_size && y < end; y += TARGET_PAGE_SIZE)
   2.126 +            /* failed or end address not aligned on a page, track dirtyness by
   2.127 +             * hand. */
   2.128 +            if (vram_dirty(s, y, TARGET_PAGE_SIZE))
   2.129 +                cpu_physical_memory_set_dirty(s->vram_offset + y);
   2.130 +
   2.131 +        for ( ; y < s->vram_size; y += TARGET_PAGE_SIZE)
   2.132 +            /* We will not read that anyway. */
   2.133 +            cpu_physical_memory_set_dirty(s->vram_offset + y);
   2.134 +    }
   2.135 +
   2.136      addr1 = (s->start_addr * 4);
   2.137 -    bwidth = width * 4;
   2.138 +    bwidth = (width * bits + 7) / 8;
   2.139      y_start = -1;
   2.140      page_min = 0;
   2.141      page_max = 0;
   2.142 @@ -1681,6 +1752,10 @@ static void vga_draw_blank(VGAState *s, 
   2.143          return;
   2.144      if (s->last_scr_width <= 0 || s->last_scr_height <= 0)
   2.145          return;
   2.146 +
   2.147 +    /* Disable dirty bit tracking */
   2.148 +    xc_hvm_track_dirty_vram(xc_handle, domid, 0, 0, NULL);
   2.149 +
   2.150      s->rgb_to_pixel = 
   2.151          rgb_to_pixel_dup_table[get_depth_index(s->ds)];
   2.152      if (s->ds->depth == 8) 
     3.1 --- a/tools/ioemu/hw/vga_int.h	Fri May 02 14:35:27 2008 +0100
     3.2 +++ b/tools/ioemu/hw/vga_int.h	Fri May 02 15:08:27 2008 +0100
     3.3 @@ -87,6 +87,8 @@
     3.4      unsigned int vram_size;                                             \
     3.5      unsigned long bios_offset;                                          \
     3.6      unsigned int bios_size;                                             \
     3.7 +    unsigned long lfb_addr;                                             \
     3.8 +    unsigned long lfb_end;                                              \
     3.9      PCIDevice *pci_dev;                                                 \
    3.10      uint32_t latch;                                                     \
    3.11      uint8_t sr_index;                                                   \
     4.1 --- a/tools/libxc/xc_misc.c	Fri May 02 14:35:27 2008 +0100
     4.2 +++ b/tools/libxc/xc_misc.c	Fri May 02 15:08:27 2008 +0100
     4.3 @@ -236,6 +236,37 @@ int xc_hvm_set_pci_link_route(
     4.4      return rc;
     4.5  }
     4.6  
     4.7 +int xc_hvm_track_dirty_vram(
     4.8 +    int xc_handle, domid_t dom,
     4.9 +    uint64_t first_pfn, uint64_t nr,
    4.10 +    unsigned long *dirty_bitmap)
    4.11 +{
    4.12 +    DECLARE_HYPERCALL;
    4.13 +    struct xen_hvm_track_dirty_vram arg;
    4.14 +    int rc;
    4.15 +
    4.16 +    hypercall.op     = __HYPERVISOR_hvm_op;
    4.17 +    hypercall.arg[0] = HVMOP_track_dirty_vram;
    4.18 +    hypercall.arg[1] = (unsigned long)&arg;
    4.19 +
    4.20 +    arg.domid     = dom;
    4.21 +    arg.first_pfn = first_pfn;
    4.22 +    arg.nr        = nr;
    4.23 +    set_xen_guest_handle(arg.dirty_bitmap, (uint8_t *)dirty_bitmap);
    4.24 +
    4.25 +    if ( (rc = lock_pages(&arg, sizeof(arg))) != 0 )
    4.26 +    {
    4.27 +        PERROR("Could not lock memory");
    4.28 +        return rc;
    4.29 +    }
    4.30 +
    4.31 +    rc = do_xen_hypercall(xc_handle, &hypercall);
    4.32 +
    4.33 +    unlock_pages(&arg, sizeof(arg));
    4.34 +
    4.35 +    return rc;
    4.36 +}
    4.37 +
    4.38  void *xc_map_foreign_pages(int xc_handle, uint32_t dom, int prot,
    4.39                             const xen_pfn_t *arr, int num)
    4.40  {
     5.1 --- a/tools/libxc/xenctrl.h	Fri May 02 14:35:27 2008 +0100
     5.2 +++ b/tools/libxc/xenctrl.h	Fri May 02 15:08:27 2008 +0100
     5.3 @@ -882,6 +882,22 @@ int xc_hvm_set_pci_link_route(
     5.4      int xc_handle, domid_t dom, uint8_t link, uint8_t isa_irq);
     5.5  
     5.6  
     5.7 +/*
     5.8 + * Track dirty bit changes in the VRAM area
     5.9 + *
    5.10 + * All of this is done atomically:
    5.11 + * - get the dirty bitmap since the last call
    5.12 + * - set up dirty tracking area for period up to the next call
    5.13 + * - clear the dirty tracking area.
    5.14 + *
    5.15 + * Returns -ENODATA and does not fill bitmap if the area has changed since the
    5.16 + * last call.
    5.17 + */
    5.18 +int xc_hvm_track_dirty_vram(
    5.19 +    int xc_handle, domid_t dom,
    5.20 +    uint64_t first_pfn, uint64_t nr,
    5.21 +    unsigned long *bitmap);
    5.22 +
    5.23  typedef enum {
    5.24    XC_ERROR_NONE = 0,
    5.25    XC_INTERNAL_ERROR = 1,
     6.1 --- a/xen/arch/ia64/vmx/vmx_hypercall.c	Fri May 02 14:35:27 2008 +0100
     6.2 +++ b/xen/arch/ia64/vmx/vmx_hypercall.c	Fri May 02 15:08:27 2008 +0100
     6.3 @@ -200,6 +200,10 @@ do_hvm_op(unsigned long op, XEN_GUEST_HA
     6.4          rc = 0;
     6.5          break;
     6.6  
     6.7 +    case HVMOP_track_dirty_vram:
     6.8 +        rc = -ENOSYS;
     6.9 +        break;
    6.10 +
    6.11      default:
    6.12          gdprintk(XENLOG_INFO, "Bad HVM op %ld.\n", op);
    6.13          rc = -ENOSYS;
     7.1 --- a/xen/arch/x86/hvm/hvm.c	Fri May 02 14:35:27 2008 +0100
     7.2 +++ b/xen/arch/x86/hvm/hvm.c	Fri May 02 15:08:27 2008 +0100
     7.3 @@ -2345,6 +2345,54 @@ long do_hvm_op(unsigned long op, XEN_GUE
     7.4          rc = guest_handle_is_null(arg) ? hvmop_flush_tlb_all() : -ENOSYS;
     7.5          break;
     7.6  
     7.7 +    case HVMOP_track_dirty_vram:
     7.8 +    {
     7.9 +        struct xen_hvm_track_dirty_vram a;
    7.10 +        struct domain *d;
    7.11 +
    7.12 +        if ( copy_from_guest(&a, arg, 1) )
    7.13 +            return -EFAULT;
    7.14 +
    7.15 +        if ( a.domid == DOMID_SELF )
    7.16 +        {
    7.17 +            d = rcu_lock_current_domain();
    7.18 +        }
    7.19 +        else
    7.20 +        {
    7.21 +            if ( (d = rcu_lock_domain_by_id(a.domid)) == NULL )
    7.22 +                return -ESRCH;
    7.23 +            if ( !IS_PRIV_FOR(current->domain, d) )
    7.24 +            {
    7.25 +                rc = -EPERM;
    7.26 +                goto param_fail2;
    7.27 +            }
    7.28 +        }
    7.29 +
    7.30 +        rc = -EINVAL;
    7.31 +        if ( !is_hvm_domain(d) )
    7.32 +            goto param_fail2;
    7.33 +
    7.34 +        rc = xsm_hvm_param(d, op);
    7.35 +        if ( rc )
    7.36 +            goto param_fail2;
    7.37 +
    7.38 +        rc = -ESRCH;
    7.39 +        if ( d->is_dying )
    7.40 +            goto param_fail2;
    7.41 +
    7.42 +        rc = -EINVAL;
    7.43 +        if ( !shadow_mode_enabled(d))
    7.44 +            goto param_fail2;
    7.45 +        if ( d->vcpu[0] == NULL )
    7.46 +            goto param_fail2;
    7.47 +
    7.48 +        rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
    7.49 +
    7.50 +    param_fail2:
    7.51 +        rcu_unlock_domain(d);
    7.52 +        break;
    7.53 +    }
    7.54 +
    7.55      default:
    7.56      {
    7.57          gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
     8.1 --- a/xen/arch/x86/mm/shadow/common.c	Fri May 02 14:35:27 2008 +0100
     8.2 +++ b/xen/arch/x86/mm/shadow/common.c	Fri May 02 15:08:27 2008 +0100
     8.3 @@ -2589,6 +2589,13 @@ void shadow_teardown(struct domain *d)
     8.4       * calls now that we've torn down the bitmap */
     8.5      d->arch.paging.mode &= ~PG_log_dirty;
     8.6  
     8.7 +    if (d->dirty_vram) {
     8.8 +        xfree(d->dirty_vram->sl1ma);
     8.9 +        xfree(d->dirty_vram->dirty_bitmap);
    8.10 +        xfree(d->dirty_vram);
    8.11 +        d->dirty_vram = NULL;
    8.12 +    }
    8.13 +
    8.14      shadow_unlock(d);
    8.15  }
    8.16  
    8.17 @@ -2849,6 +2856,164 @@ void shadow_clean_dirty_bitmap(struct do
    8.18      shadow_blow_tables(d);
    8.19      shadow_unlock(d);
    8.20  }
    8.21 +
    8.22 +
    8.23 +/**************************************************************************/
    8.24 +/* VRAM dirty tracking support */
    8.25 +int shadow_track_dirty_vram(struct domain *d,
    8.26 +                            unsigned long begin_pfn,
    8.27 +                            unsigned long nr,
    8.28 +                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
    8.29 +{
    8.30 +    int rc;
    8.31 +    unsigned long end_pfn = begin_pfn + nr;
    8.32 +    unsigned long dirty_size = (nr + 7) / 8;
    8.33 +    int flush_tlb = 0;
    8.34 +
    8.35 +    if (end_pfn < begin_pfn
    8.36 +            || begin_pfn > d->arch.p2m->max_mapped_pfn
    8.37 +            || end_pfn >= d->arch.p2m->max_mapped_pfn)
    8.38 +        return -EINVAL;
    8.39 +
    8.40 +    shadow_lock(d);
    8.41 +
    8.42 +    if ( d->dirty_vram && (!nr ||
    8.43 +             ( begin_pfn != d->dirty_vram->begin_pfn
    8.44 +            || end_pfn   != d->dirty_vram->end_pfn )) ) {
    8.45 +        /* Different tracking, tear the previous down. */
    8.46 +        gdprintk(XENLOG_INFO, "stopping tracking VRAM %lx - %lx\n", d->dirty_vram->begin_pfn, d->dirty_vram->end_pfn);
    8.47 +        xfree(d->dirty_vram->sl1ma);
    8.48 +        xfree(d->dirty_vram->dirty_bitmap);
    8.49 +        xfree(d->dirty_vram);
    8.50 +        d->dirty_vram = NULL;
    8.51 +    }
    8.52 +
    8.53 +    if ( !nr ) {
    8.54 +        rc = 0;
    8.55 +        goto out;
    8.56 +    }
    8.57 +
    8.58 +    /* This should happen seldomly (Video mode change),
    8.59 +     * no need to be careful. */
    8.60 +    if ( !d->dirty_vram ) {
    8.61 +        unsigned long i;
    8.62 +        p2m_type_t t;
    8.63 +
    8.64 +        /* Just recount from start. */
    8.65 +        for ( i = begin_pfn; i < end_pfn; i++ )
    8.66 +            flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, i, &t));
    8.67 +
    8.68 +        gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
    8.69 +
    8.70 +        rc = -ENOMEM;
    8.71 +        if ( (d->dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
    8.72 +            goto out;
    8.73 +        d->dirty_vram->begin_pfn = begin_pfn;
    8.74 +        d->dirty_vram->end_pfn = end_pfn;
    8.75 +
    8.76 +        if ( (d->dirty_vram->sl1ma = xmalloc_array(paddr_t, nr)) == NULL )
    8.77 +            goto out_dirty_vram;
    8.78 +        memset(d->dirty_vram->sl1ma, ~0, sizeof(paddr_t) * nr);
    8.79 +
    8.80 +        if ( (d->dirty_vram->dirty_bitmap = xmalloc_array(uint8_t, dirty_size)) == NULL )
    8.81 +            goto out_sl1ma;
    8.82 +        memset(d->dirty_vram->dirty_bitmap, 0, dirty_size);
    8.83 +
    8.84 +        /* Tell the caller that this time we could not track dirty bits. */
    8.85 +        rc = -ENODATA;
    8.86 +    } else {
    8.87 +        int i;
    8.88 +#ifdef __i386__
    8.89 +        unsigned long map_mfn = INVALID_MFN;
    8.90 +        void *map_sl1p = NULL;
    8.91 +#endif
    8.92 +
    8.93 +        /* Iterate over VRAM to track dirty bits. */
    8.94 +        for ( i = 0; i < nr; i++ ) {
    8.95 +            p2m_type_t t;
    8.96 +            mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
    8.97 +            struct page_info *page = mfn_to_page(mfn);
    8.98 +            u32 count_info = page->u.inuse.type_info & PGT_count_mask;
    8.99 +            int dirty = 0;
   8.100 +            paddr_t sl1ma = d->dirty_vram->sl1ma[i];
   8.101 +
   8.102 +            switch (count_info) {
   8.103 +            case 0:
   8.104 +                /* No guest reference, nothing to track. */
   8.105 +                break;
   8.106 +            case 1:
   8.107 +                /* One guest reference. */
   8.108 +                if ( sl1ma == INVALID_PADDR ) {
   8.109 +                    /* We don't know which sl1e points to this, too bad. */
   8.110 +                    dirty = 1;
   8.111 +                    /* TODO: Heuristics for finding the single mapping of
   8.112 +                     * this gmfn */
   8.113 +                    flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, begin_pfn + i, &t));
   8.114 +                } else {
   8.115 +                    /* Hopefully the most common case: only one mapping,
   8.116 +                     * whose dirty bit we can use. */
   8.117 +                    l1_pgentry_t *sl1e;
   8.118 +#ifdef __i386__
   8.119 +                    void *sl1p = map_sl1p;
   8.120 +                    unsigned long sl1mfn = paddr_to_pfn(sl1ma);
   8.121 +
   8.122 +                    if ( sl1mfn != map_mfn ) {
   8.123 +                        if ( map_sl1p )
   8.124 +                            sh_unmap_domain_page(map_sl1p);
   8.125 +                        map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
   8.126 +                        map_mfn = sl1mfn;
   8.127 +                    }
   8.128 +                    sl1e = sl1p + (sl1ma & ~PAGE_MASK);
   8.129 +#else
   8.130 +                    sl1e = maddr_to_virt(sl1ma);
   8.131 +#endif
   8.132 +
   8.133 +                    if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY ) {
   8.134 +                        dirty = 1;
   8.135 +                        /* Note: this is atomic, so we may clear a
   8.136 +                         * _PAGE_ACCESSED set by another processor. */
   8.137 +                        l1e_remove_flags(*sl1e, _PAGE_DIRTY);
   8.138 +                        flush_tlb = 1;
   8.139 +                    }
   8.140 +                }
   8.141 +                break;
   8.142 +            default:
   8.143 +                /* More than one guest reference,
   8.144 +                 * we don't afford tracking that. */
   8.145 +                dirty = 1;
   8.146 +                break;
   8.147 +            }
   8.148 +
   8.149 +            if ( dirty )
   8.150 +                d->dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
   8.151 +        }
   8.152 +
   8.153 +#ifdef __i386__
   8.154 +        if ( map_sl1p )
   8.155 +            sh_unmap_domain_page(map_sl1p);
   8.156 +#endif
   8.157 +
   8.158 +        rc = -EFAULT;
   8.159 +        if ( copy_to_guest(dirty_bitmap, d->dirty_vram->dirty_bitmap, dirty_size) == 0 ) {
   8.160 +            memset(d->dirty_vram->dirty_bitmap, 0, dirty_size);
   8.161 +            rc = 0;
   8.162 +        }
   8.163 +    }
   8.164 +    if ( flush_tlb )
   8.165 +        flush_tlb_mask(d->domain_dirty_cpumask);    
   8.166 +    goto out;
   8.167 +
   8.168 +out_sl1ma:
   8.169 +    xfree(d->dirty_vram->sl1ma);
   8.170 +out_dirty_vram:
   8.171 +    xfree(d->dirty_vram);
   8.172 +    d->dirty_vram = NULL;
   8.173 +
   8.174 +out:
   8.175 +    shadow_unlock(d);
   8.176 +    return rc;
   8.177 +}
   8.178 +
   8.179  /**************************************************************************/
   8.180  /* Shadow-control XEN_DOMCTL dispatcher */
   8.181  
     9.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri May 02 14:35:27 2008 +0100
     9.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri May 02 15:08:27 2008 +0100
     9.3 @@ -801,7 +801,7 @@ static always_inline void
     9.4      // Since we know the guest's PRESENT bit is set, we also set the shadow's
     9.5      // SHADOW_PRESENT bit.
     9.6      //
     9.7 -    pass_thru_flags = (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER |
     9.8 +    pass_thru_flags = (_PAGE_ACCESSED | _PAGE_USER |
     9.9                         _PAGE_RW | _PAGE_PRESENT);
    9.10      if ( guest_supports_nx(v) )
    9.11          pass_thru_flags |= _PAGE_NX_BIT;
    9.12 @@ -1251,6 +1251,80 @@ static int shadow_set_l2e(struct vcpu *v
    9.13      return flags;
    9.14  }
    9.15  
    9.16 +static inline void shadow_vram_get_l1e(shadow_l1e_t new_sl1e,
    9.17 +                                       shadow_l1e_t *sl1e,
    9.18 +                                       mfn_t sl1mfn,
    9.19 +                                       struct domain *d)
    9.20 +{ 
    9.21 +    mfn_t mfn;
    9.22 +    unsigned long gfn;
    9.23 +
    9.24 +    if ( !d->dirty_vram ) return;
    9.25 +
    9.26 +    mfn = shadow_l1e_get_mfn(new_sl1e);
    9.27 +    gfn = mfn_to_gfn(d, mfn);
    9.28 +
    9.29 +    if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
    9.30 +        unsigned long i = gfn - d->dirty_vram->begin_pfn;
    9.31 +        struct page_info *page = mfn_to_page(mfn);
    9.32 +        u32 count_info = page->u.inuse.type_info & PGT_count_mask;
    9.33 +        
    9.34 +        if ( count_info == 1 )
    9.35 +            /* Initial guest reference, record it */
    9.36 +            d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
    9.37 +                | ((paddr_t) sl1e & ~PAGE_MASK);
    9.38 +    }
    9.39 +}
    9.40 +
    9.41 +static inline void shadow_vram_put_l1e(shadow_l1e_t old_sl1e,
    9.42 +                                       shadow_l1e_t *sl1e,
    9.43 +                                       mfn_t sl1mfn,
    9.44 +                                       struct domain *d)
    9.45 +{
    9.46 +    mfn_t mfn;
    9.47 +    unsigned long gfn;
    9.48 +
    9.49 +    if ( !d->dirty_vram ) return;
    9.50 +
    9.51 +    mfn = shadow_l1e_get_mfn(old_sl1e);
    9.52 +    gfn = mfn_to_gfn(d, mfn);
    9.53 +
    9.54 +    if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
    9.55 +        unsigned long i = gfn - d->dirty_vram->begin_pfn;
    9.56 +        struct page_info *page = mfn_to_page(mfn);
    9.57 +        u32 count_info = page->u.inuse.type_info & PGT_count_mask;
    9.58 +        int dirty = 0;
    9.59 +        paddr_t sl1ma =  pfn_to_paddr(mfn_x(sl1mfn))
    9.60 +            | ((paddr_t) sl1e & ~PAGE_MASK);
    9.61 +
    9.62 +        if ( count_info == 1 ) {
    9.63 +            /* Last reference */
    9.64 +            if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) {
    9.65 +                /* We didn't know it was that one, let's say it is dirty */
    9.66 +                dirty = 1;
    9.67 +            } else {
    9.68 +                ASSERT(d->dirty_vram->sl1ma[i] == sl1ma);
    9.69 +                d->dirty_vram->sl1ma[i] = INVALID_PADDR;
    9.70 +                if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_DIRTY )
    9.71 +                    dirty = 1;
    9.72 +            }
    9.73 +        } else {
    9.74 +            /* We had more than one reference, just consider the page dirty. */
    9.75 +            dirty = 1;
    9.76 +            /* Check that it's not the one we recorded. */
    9.77 +            if ( d->dirty_vram->sl1ma[i] == sl1ma ) {
    9.78 +                /* Too bad, we remembered the wrong one... */
    9.79 +                d->dirty_vram->sl1ma[i] = INVALID_PADDR;
    9.80 +            } else {
    9.81 +                /* Ok, our recorded sl1e is still pointing to this page, let's
    9.82 +                 * just hope it will remain. */
    9.83 +            }
    9.84 +        }
    9.85 +        if ( dirty )
    9.86 +            d->dirty_vram->dirty_bitmap[i / 8] |= d->dirty_vram->dirty_bitmap[i % 8];
    9.87 +    }
    9.88 +}
    9.89 +
    9.90  static int shadow_set_l1e(struct vcpu *v, 
    9.91                            shadow_l1e_t *sl1e, 
    9.92                            shadow_l1e_t new_sl1e,
    9.93 @@ -1275,6 +1349,8 @@ static int shadow_set_l1e(struct vcpu *v
    9.94                  /* Doesn't look like a pagetable. */
    9.95                  flags |= SHADOW_SET_ERROR;
    9.96                  new_sl1e = shadow_l1e_empty();
    9.97 +            } else {
    9.98 +                shadow_vram_get_l1e(new_sl1e, sl1e, sl1mfn, d);
    9.99              }
   9.100          }
   9.101      } 
   9.102 @@ -1293,6 +1369,7 @@ static int shadow_set_l1e(struct vcpu *v
   9.103           * trigger a flush later. */
   9.104          if ( shadow_mode_refcounts(d) ) 
   9.105          {
   9.106 +            shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
   9.107              shadow_put_page_from_l1e(old_sl1e, d);
   9.108          } 
   9.109      }
   9.110 @@ -2248,8 +2325,10 @@ void sh_destroy_l1_shadow(struct vcpu *v
   9.111          mfn_t sl1mfn = smfn; 
   9.112          SHADOW_FOREACH_L1E(sl1mfn, sl1e, 0, 0, {
   9.113              if ( (shadow_l1e_get_flags(*sl1e) & _PAGE_PRESENT)
   9.114 -                 && !sh_l1e_is_magic(*sl1e) )
   9.115 +                 && !sh_l1e_is_magic(*sl1e) ) {
   9.116 +                shadow_vram_put_l1e(*sl1e, sl1e, sl1mfn, d);
   9.117                  shadow_put_page_from_l1e(*sl1e, d);
   9.118 +            }
   9.119          });
   9.120      }
   9.121      
    10.1 --- a/xen/arch/x86/mm/shadow/private.h	Fri May 02 14:35:27 2008 +0100
    10.2 +++ b/xen/arch/x86/mm/shadow/private.h	Fri May 02 15:08:27 2008 +0100
    10.3 @@ -528,6 +528,15 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 
    10.4      return rv;
    10.5  }
    10.6  
    10.7 +/**************************************************************************/
    10.8 +/* VRAM dirty tracking support */
    10.9 +
   10.10 +struct sh_dirty_vram {
   10.11 +    unsigned long begin_pfn;
   10.12 +    unsigned long end_pfn;
   10.13 +    paddr_t *sl1ma;
   10.14 +    uint8_t *dirty_bitmap;
   10.15 +};
   10.16  
   10.17  /**************************************************************************/
   10.18  /* Shadow-page refcounting. */
    11.1 --- a/xen/include/asm-ia64/config.h	Fri May 02 14:35:27 2008 +0100
    11.2 +++ b/xen/include/asm-ia64/config.h	Fri May 02 15:08:27 2008 +0100
    11.3 @@ -71,6 +71,7 @@ typedef int pid_t;
    11.4  
    11.5  // now needed for xen/include/mm.h
    11.6  typedef unsigned long paddr_t;
    11.7 +#define INVALID_PADDR (~0UL)
    11.8  // from include/linux/kernel.h
    11.9  #define ALIGN(x,a) (((x)+(a)-1)&~((a)-1))
   11.10  
    12.1 --- a/xen/include/asm-powerpc/types.h	Fri May 02 14:35:27 2008 +0100
    12.2 +++ b/xen/include/asm-powerpc/types.h	Fri May 02 15:08:27 2008 +0100
    12.3 @@ -61,6 +61,7 @@ typedef unsigned long size_t;
    12.4  #endif
    12.5  
    12.6  typedef unsigned long paddr_t;
    12.7 +#define INVALID_PADDR (~0UL)
    12.8  #define PRIpaddr "08lx"
    12.9  
   12.10  /* DMA addresses come in generic and 64-bit flavours.  */
    13.1 --- a/xen/include/asm-x86/shadow.h	Fri May 02 14:35:27 2008 +0100
    13.2 +++ b/xen/include/asm-x86/shadow.h	Fri May 02 15:08:27 2008 +0100
    13.3 @@ -62,6 +62,12 @@ void shadow_vcpu_init(struct vcpu *v);
    13.4  /* Enable an arbitrary shadow mode.  Call once at domain creation. */
    13.5  int shadow_enable(struct domain *d, u32 mode);
    13.6  
    13.7 +/* Enable VRAM dirty bit tracking. */
    13.8 +int shadow_track_dirty_vram(struct domain *d,
    13.9 +                            unsigned long first_pfn,
   13.10 +                            unsigned long nr,
   13.11 +                            XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
   13.12 +
   13.13  /* Handler for shadow control ops: operations from user-space to enable
   13.14   * and disable ephemeral shadow modes (test mode and log-dirty mode) and
   13.15   * manipulate the log-dirty bitmap. */
    14.1 --- a/xen/include/asm-x86/types.h	Fri May 02 14:35:27 2008 +0100
    14.2 +++ b/xen/include/asm-x86/types.h	Fri May 02 15:08:27 2008 +0100
    14.3 @@ -38,15 +38,18 @@ typedef signed long long s64;
    14.4  typedef unsigned long long u64;
    14.5  #if defined(CONFIG_X86_PAE)
    14.6  typedef u64 paddr_t;
    14.7 +#define INVALID_PADDR (~0ULL)
    14.8  #define PRIpaddr "016llx"
    14.9  #else
   14.10  typedef unsigned long paddr_t;
   14.11 +#define INVALID_PADDR (~0UL)
   14.12  #define PRIpaddr "08lx"
   14.13  #endif
   14.14  #elif defined(__x86_64__)
   14.15  typedef signed long s64;
   14.16  typedef unsigned long u64;
   14.17  typedef unsigned long paddr_t;
   14.18 +#define INVALID_PADDR (~0UL)
   14.19  #define PRIpaddr "016lx"
   14.20  #endif
   14.21  
    15.1 --- a/xen/include/public/hvm/hvm_op.h	Fri May 02 14:35:27 2008 +0100
    15.2 +++ b/xen/include/public/hvm/hvm_op.h	Fri May 02 15:08:27 2008 +0100
    15.3 @@ -73,4 +73,20 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_
    15.4  /* Flushes all VCPU TLBs: @arg must be NULL. */
    15.5  #define HVMOP_flush_tlbs          5
    15.6  
    15.7 +/* Track dirty VRAM. */
    15.8 +#define HVMOP_track_dirty_vram    6
    15.9 +struct xen_hvm_track_dirty_vram {
   15.10 +    /* Domain to be tracked. */
   15.11 +    domid_t  domid;
   15.12 +    /* First pfn to track. */
   15.13 +    uint64_aligned_t first_pfn;
   15.14 +    /* Number of pages to track. */
   15.15 +    uint64_aligned_t nr;
   15.16 +    /* OUT variable. */
   15.17 +    /* Dirty bitmap buffer. */
   15.18 +    XEN_GUEST_HANDLE_64(uint8) dirty_bitmap;
   15.19 +};
   15.20 +typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t;
   15.21 +DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t);
   15.22 +
   15.23  #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
    16.1 --- a/xen/include/xen/sched.h	Fri May 02 14:35:27 2008 +0100
    16.2 +++ b/xen/include/xen/sched.h	Fri May 02 15:08:27 2008 +0100
    16.3 @@ -236,6 +236,9 @@ struct domain
    16.4       * cause a deadlock. Acquirers don't spin waiting; they preempt.
    16.5       */
    16.6      spinlock_t hypercall_deadlock_mutex;
    16.7 +
    16.8 +    /* VRAM dirty support. */
    16.9 +    struct sh_dirty_vram *dirty_vram;
   16.10  };
   16.11  
   16.12  struct domain_setup_info