ia64/xen-unstable

changeset 9301:48211017a16a

Add a shadow VRAM to track changes to the real VRAM. When the guest
OS was given write access to the VRAM the device model tracked all
VRAM changes by updating the entire screen on every output loop,
causing significant overhead (a CPU bound loop in a guest slows down
by about 35%) and significant mouse latency (VNC uses the same data
path for mouse events and video updates). With the shadow VRAM only
modified pages need to be updated and the comparison of the shadow
VRAM to the real VRAM only adds ~4% overhead while eliminating the
mouse latencies.

Signed-off-by: Don Dugger <donald.d.dugger@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Mar 16 18:41:01 2006 +0100 (2006-03-16)
parents a7b6eed7b0a6
children d6bab69e856f
files tools/ioemu/hw/vga.c tools/ioemu/hw/vga_int.h tools/ioemu/target-i386-dm/Makefile
line diff
     1.1 --- a/tools/ioemu/hw/vga.c	Thu Mar 16 12:34:27 2006 +0100
     1.2 +++ b/tools/ioemu/hw/vga.c	Thu Mar 16 18:41:01 2006 +0100
     1.3 @@ -1340,6 +1340,99 @@ void vga_invalidate_scanlines(VGAState *
     1.4      }
     1.5  }
     1.6  
     1.7 +extern inline int cmp_vram(VGAState *s, int offset, int n)
     1.8 +{
     1.9 +    long *vp, *sp;
    1.10 +
    1.11 +    if (s->vram_shadow == NULL)
    1.12 +        return 1;
    1.13 +    vp = (long *)(s->vram_ptr + offset);
    1.14 +    sp = (long *)(s->vram_shadow + offset);
    1.15 +    while ((n -= sizeof(*vp)) >= 0) {
    1.16 +        if (*vp++ != *sp++) {
    1.17 +            memcpy(sp - 1, vp - 1, n + sizeof(*vp));
    1.18 +            return 1;
    1.19 +        }
    1.20 +    }
    1.21 +    return 0;
    1.22 +}
    1.23 +
    1.24 +#ifdef USE_SSE2
    1.25 +
    1.26 +#include <signal.h>
    1.27 +#include <setjmp.h>
    1.28 +#include <emmintrin.h>
    1.29 +
    1.30 +int sse2_ok = 1;
    1.31 +
    1.32 +static inline unsigned int cpuid_edx(unsigned int op)
    1.33 +{
    1.34 +    unsigned int eax, edx;
    1.35 +
    1.36 +    __asm__("cpuid"
    1.37 +            : "=a" (eax), "=d" (edx)
    1.38 +            : "0" (op)
    1.39 +            : "bx", "cx");
    1.40 +
    1.41 +    return edx;
    1.42 +}
    1.43 +
    1.44 +jmp_buf sse_jbuf;
    1.45 +
    1.46 +void intr(int sig)
    1.47 +{
    1.48 +    sse2_ok = 0;
    1.49 +    longjmp(sse_jbuf, 1);
    1.50 +}
    1.51 +
    1.52 +void check_sse2(void)
    1.53 +{
    1.54 +    /* Check 1: What does CPUID say? */
    1.55 +    if ((cpuid_edx(1) & 0x4000000) == 0) {
    1.56 +        sse2_ok = 0;
    1.57 +        return;
    1.58 +    }
    1.59 +
    1.60 +    /* Check 2: Can we use SSE2 in anger? */
    1.61 +    signal(SIGILL, intr);
    1.62 +    if (setjmp(sse_jbuf) == 0)
    1.63 +        __asm__("xorps %xmm0,%xmm0\n");
    1.64 +}
    1.65 +
    1.66 +int vram_dirty(VGAState *s, int offset, int n)
    1.67 +{
    1.68 +    __m128i *sp, *vp;
    1.69 +
    1.70 +    if (s->vram_shadow == NULL)
    1.71 +        return 1;
    1.72 +    if (sse2_ok == 0)
    1.73 +        return cmp_vram(s, offset, n);
    1.74 +    vp = (__m128i *)(s->vram_ptr + offset);
    1.75 +    sp = (__m128i *)(s->vram_shadow + offset);
    1.76 +    while ((n -= sizeof(*vp)) >= 0) {
    1.77 +        if (_mm_movemask_epi8(_mm_cmpeq_epi8(*sp, *vp)) != 0xffff) {
    1.78 +            while (n >= 0) {
    1.79 +                _mm_store_si128(sp++, _mm_load_si128(vp++));
    1.80 +                n -= sizeof(*vp);
    1.81 +            }
    1.82 +            return 1;
    1.83 +        }
    1.84 +        sp++;
    1.85 +        vp++;
    1.86 +    }
    1.87 +    return 0;
    1.88 +}
    1.89 +#else /* !USE_SSE2 */
    1.90 +int vram_dirty(VGAState *s, int offset, int n)
    1.91 +{
    1.92 +    return cmp_vram(s, offset, n);
    1.93 +}
    1.94 +
    1.95 +void check_sse2(void)
    1.96 +{
    1.97 +}
    1.98 +#endif /* !USE_SSE2 */
    1.99 +
   1.100  /* 
   1.101   * graphic modes
   1.102   */
   1.103 @@ -1434,6 +1527,9 @@ static void vga_draw_graphic(VGAState *s
   1.104      printf("w=%d h=%d v=%d line_offset=%d cr[0x09]=0x%02x cr[0x17]=0x%02x linecmp=%d sr[0x01]=0x%02x\n",
   1.105             width, height, v, line_offset, s->cr[9], s->cr[0x17], s->line_compare, s->sr[0x01]);
   1.106  #endif
   1.107 +    for (y = 0; y < s->vram_size; y += TARGET_PAGE_SIZE)
   1.108 +        if (vram_dirty(s, y, TARGET_PAGE_SIZE))
   1.109 +            cpu_physical_memory_set_dirty(s->vram_offset + y);
   1.110      addr1 = (s->start_addr * 4);
   1.111      bwidth = width * 4;
   1.112      y_start = -1;
   1.113 @@ -1536,9 +1632,18 @@ static void vga_draw_blank(VGAState *s, 
   1.114  
   1.115  void vga_update_display(void)
   1.116  {
   1.117 +    static int loop;
   1.118      VGAState *s = vga_state;
   1.119      int full_update, graphic_mode;
   1.120  
   1.121 +    /*
   1.122 +     * Only update the display every other time.  The responsiveness is
   1.123 +     * acceptable and it cuts down on the overhead of the VRAM compare
   1.124 +     * in `vram_dirty'.
   1.125 +     */
   1.126 +    if (loop++ & 1)
   1.127 +        return;
   1.128 +
   1.129      if (s->ds->depth == 0) {
   1.130          /* nothing to do */
   1.131      } else {
   1.132 @@ -1569,7 +1674,6 @@ void vga_update_display(void)
   1.133              full_update = 1;
   1.134          }
   1.135  
   1.136 -        full_update = 1;
   1.137          switch(graphic_mode) {
   1.138          case GMODE_TEXT:
   1.139              vga_draw_text(s, full_update);
   1.140 @@ -1874,7 +1978,13 @@ void vga_common_init(VGAState *s, Displa
   1.141  #else
   1.142      s->vram_ptr = qemu_malloc(vga_ram_size);
   1.143  #endif
   1.144 -
   1.145 +    check_sse2();
   1.146 +    s->vram_shadow = qemu_malloc(vga_ram_size+TARGET_PAGE_SIZE+1);
   1.147 +    if (s->vram_shadow == NULL)
   1.148 +        fprintf(stderr, "Cannot allocate %d bytes for VRAM shadow, "
   1.149 +                "mouse will be slow\n", vga_ram_size);
   1.150 +    s->vram_shadow = (uint8_t *)((long)(s->vram_shadow + TARGET_PAGE_SIZE - 1)
   1.151 +                                 & ~(TARGET_PAGE_SIZE - 1));
   1.152      s->vram_offset = vga_ram_offset;
   1.153      s->vram_size = vga_ram_size;
   1.154      s->ds = ds;
     2.1 --- a/tools/ioemu/hw/vga_int.h	Thu Mar 16 12:34:27 2006 +0100
     2.2 +++ b/tools/ioemu/hw/vga_int.h	Thu Mar 16 18:41:01 2006 +0100
     2.3 @@ -76,6 +76,7 @@
     2.4  
     2.5  #define VGA_STATE_COMMON                                                \
     2.6      uint8_t *vram_ptr;                                                  \
     2.7 +    uint8_t *vram_shadow;                                               \
     2.8      unsigned long vram_offset;                                          \
     2.9      unsigned int vram_size;                                             \
    2.10      uint32_t latch;                                                     \
     3.1 --- a/tools/ioemu/target-i386-dm/Makefile	Thu Mar 16 12:34:27 2006 +0100
     3.2 +++ b/tools/ioemu/target-i386-dm/Makefile	Thu Mar 16 18:41:01 2006 +0100
     3.3 @@ -13,8 +13,15 @@ ifdef CONFIG_USER_ONLY
     3.4  VPATH+=:$(SRC_PATH)/linux-user
     3.5  DEFINES+=-I$(SRC_PATH)/linux-user -I$(SRC_PATH)/linux-user/$(TARGET_ARCH)
     3.6  endif
     3.7 -CFLAGS+=-g -fno-strict-aliasing
     3.8 -LDFLAGS=-g
     3.9 +
    3.10 +SSE2 := $(call test-gcc-flag,$(CC),-msse2)
    3.11 +ifeq ($(SSE2),-msse2)
    3.12 +CFLAGS += -DUSE_SSE2=1 -msse2
    3.13 +endif
    3.14 +
    3.15 +CFLAGS += -g -fno-strict-aliasing $(LOCAL_CFLAGS)
    3.16 +LDFLAGS = -g
    3.17 +
    3.18  LIBS=
    3.19  HELPER_CFLAGS=$(CFLAGS)
    3.20  DYNGEN=../dyngen$(EXESUF)