direct-io.hg

changeset 12759:67a06a9b7b1d

[HVM] qemu: Add guest address-space mapping cache.

On IA32 host or IA32 PAE host, at present, generally, we can't create
an HVM guest with more than 2G memory, because generally it's almost
impossible for Qemu to find a large enough and consecutive virtual
address space to map an HVM guest's whole physical address space.
The attached patch fixes this issue using dynamic mapping based on
little blocks of memory.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Dec 07 11:12:52 2006 +0000 (2006-12-07)
parents 3f0ca90351e2
children 0f5dd1d43b67
files tools/ioemu/target-i386-dm/cpu.h tools/ioemu/target-i386-dm/exec-dm.c tools/ioemu/vl.c tools/ioemu/vl.h
line diff
     1.1 --- a/tools/ioemu/target-i386-dm/cpu.h	Thu Dec 07 10:54:43 2006 +0000
     1.2 +++ b/tools/ioemu/target-i386-dm/cpu.h	Thu Dec 07 11:12:52 2006 +0000
     1.3 @@ -25,7 +25,8 @@
     1.4  #ifdef TARGET_X86_64
     1.5  #define TARGET_LONG_BITS 64
     1.6  #else
     1.7 -#define TARGET_LONG_BITS 32
     1.8 +/* #define TARGET_LONG_BITS 32 */
     1.9 +#define TARGET_LONG_BITS 64 /* for Qemu map cache */
    1.10  #endif
    1.11  
    1.12  /* target supports implicit self modifying code */
     2.1 --- a/tools/ioemu/target-i386-dm/exec-dm.c	Thu Dec 07 10:54:43 2006 +0000
     2.2 +++ b/tools/ioemu/target-i386-dm/exec-dm.c	Thu Dec 07 11:12:52 2006 +0000
     2.3 @@ -36,6 +36,7 @@
     2.4  
     2.5  #include "cpu.h"
     2.6  #include "exec-all.h"
     2.7 +#include "vl.h"
     2.8  
     2.9  //#define DEBUG_TB_INVALIDATE
    2.10  //#define DEBUG_FLUSH
    2.11 @@ -426,6 +427,12 @@ static inline int paddr_is_ram(target_ph
    2.12  #endif
    2.13  }
    2.14  
    2.15 +#if defined(__i386__) || defined(__x86_64__)
    2.16 +#define phys_ram_addr(x) (qemu_map_cache(x))
    2.17 +#elif defined(__ia64__)
    2.18 +#define phys_ram_addr(x) (phys_ram_base + (x))
    2.19 +#endif
    2.20 +
    2.21  void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, 
    2.22                              int len, int is_write)
    2.23  {
    2.24 @@ -438,7 +445,7 @@ void cpu_physical_memory_rw(target_phys_
    2.25          l = TARGET_PAGE_SIZE - (addr & ~TARGET_PAGE_MASK); 
    2.26          if (l > len)
    2.27              l = len;
    2.28 -	
    2.29 +
    2.30          io_index = iomem_index(addr);
    2.31          if (is_write) {
    2.32              if (io_index) {
    2.33 @@ -460,9 +467,10 @@ void cpu_physical_memory_rw(target_phys_
    2.34                  }
    2.35              } else if (paddr_is_ram(addr)) {
    2.36                  /* Reading from RAM */
    2.37 -                memcpy(phys_ram_base + addr, buf, l);
    2.38 +                ptr = phys_ram_addr(addr);
    2.39 +                memcpy(ptr, buf, l);
    2.40  #ifdef __ia64__
    2.41 -                sync_icache((unsigned long)(phys_ram_base + addr), l);
    2.42 +                sync_icache(ptr, l);
    2.43  #endif 
    2.44              }
    2.45          } else {
    2.46 @@ -485,7 +493,8 @@ void cpu_physical_memory_rw(target_phys_
    2.47                  }
    2.48              } else if (paddr_is_ram(addr)) {
    2.49                  /* Reading from RAM */
    2.50 -                memcpy(buf, phys_ram_base + addr, l);
    2.51 +                ptr = phys_ram_addr(addr);
    2.52 +                memcpy(buf, ptr, l);
    2.53              } else {
    2.54                  /* Neither RAM nor known MMIO space */
    2.55                  memset(buf, 0xff, len); 
     3.1 --- a/tools/ioemu/vl.c	Thu Dec 07 10:54:43 2006 +0000
     3.2 +++ b/tools/ioemu/vl.c	Thu Dec 07 11:12:52 2006 +0000
     3.3 @@ -5808,6 +5808,92 @@ int set_mm_mapping(int xc_handle, uint32
     3.4      return 0;
     3.5  }
     3.6  
     3.7 +#if defined(__i386__) || defined(__x86_64__)
     3.8 +static struct map_cache *mapcache_entry;
     3.9 +static unsigned long nr_buckets;
    3.10 +
    3.11 +static int qemu_map_cache_init(unsigned long nr_pages)
    3.12 +{
    3.13 +    unsigned long max_pages = MAX_MCACHE_SIZE >> PAGE_SHIFT;
    3.14 +    int i;
    3.15 +
    3.16 +    if (nr_pages < max_pages)
    3.17 +        max_pages = nr_pages;
    3.18 +
    3.19 +    nr_buckets = (max_pages << PAGE_SHIFT) >> MCACHE_BUCKET_SHIFT;
    3.20 +
    3.21 +    fprintf(logfile, "qemu_map_cache_init nr_buckets = %lx\n", nr_buckets);
    3.22 +
    3.23 +    mapcache_entry = malloc(nr_buckets * sizeof(struct map_cache));
    3.24 +    if (mapcache_entry == NULL) {
    3.25 +        errno = ENOMEM;
    3.26 +        return -1;
    3.27 +    }
    3.28 +
    3.29 +    memset(mapcache_entry, 0, nr_buckets * sizeof(struct map_cache));
    3.30 +
    3.31 +    /*
    3.32 +     * To avoid ENOMEM from xc_map_foreign_batch() at runtime, we
    3.33 +     * pre-fill all the map caches in advance.
    3.34 +     */
    3.35 +    for (i = 0; i < nr_buckets; i++)
    3.36 +       (void)qemu_map_cache(((target_phys_addr_t)i) << MCACHE_BUCKET_SHIFT);
    3.37 +
    3.38 +    return 0;
    3.39 +}
    3.40 +
    3.41 +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr)
    3.42 +{
    3.43 +    struct map_cache *entry;
    3.44 +    unsigned long address_index  = phys_addr >> MCACHE_BUCKET_SHIFT;
    3.45 +    unsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1);
    3.46 +
    3.47 +    /* For most cases (>99.9%), the page address is the same. */
    3.48 +    static unsigned long last_address_index = ~0UL;
    3.49 +    static uint8_t      *last_address_vaddr;
    3.50 +
    3.51 +    if (address_index == last_address_index)
    3.52 +        return last_address_vaddr + address_offset;
    3.53 +
    3.54 +    entry = &mapcache_entry[address_index % nr_buckets];
    3.55 +
    3.56 +    if (entry->vaddr_base == NULL || entry->paddr_index != address_index)
    3.57 +    { 
    3.58 +        /* We need to remap a bucket. */
    3.59 +        uint8_t *vaddr_base;
    3.60 +        unsigned long pfns[MCACHE_BUCKET_SIZE >> PAGE_SHIFT];
    3.61 +        unsigned int i;
    3.62 +
    3.63 +        if (entry->vaddr_base != NULL) {
    3.64 +            errno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE);
    3.65 +            if (errno) {
    3.66 +                fprintf(logfile, "unmap fails %d\n", errno);
    3.67 +                exit(-1);
    3.68 +            }
    3.69 +        }
    3.70 +
    3.71 +        for (i = 0; i < MCACHE_BUCKET_SIZE >> PAGE_SHIFT; i++)
    3.72 +            pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-PAGE_SHIFT)) + i;
    3.73 +
    3.74 +        vaddr_base = xc_map_foreign_batch(
    3.75 +            xc_handle, domid, PROT_READ|PROT_WRITE,
    3.76 +            pfns, MCACHE_BUCKET_SIZE >> PAGE_SHIFT);
    3.77 +        if (vaddr_base == NULL) {
    3.78 +            fprintf(logfile, "xc_map_foreign_batch error %d\n", errno);
    3.79 +            exit(-1);
    3.80 +        }
    3.81 +
    3.82 +        entry->vaddr_base  = vaddr_base;
    3.83 +        entry->paddr_index = address_index;;
    3.84 +    }
    3.85 +
    3.86 +    last_address_index = address_index;
    3.87 +    last_address_vaddr = entry->vaddr_base;
    3.88 +
    3.89 +    return last_address_vaddr + address_offset;
    3.90 +}
    3.91 +#endif
    3.92 +
    3.93  int main(int argc, char **argv)
    3.94  {
    3.95  #ifdef CONFIG_GDBSTUB
    3.96 @@ -6130,6 +6216,7 @@ int main(int argc, char **argv)
    3.97                  break;
    3.98              case QEMU_OPTION_m:
    3.99                  ram_size = atol(optarg) * 1024 * 1024;
   3.100 +                ram_size = (uint64_t)atol(optarg) * 1024 * 1024;
   3.101                  if (ram_size <= 0)
   3.102                      help();
   3.103  #ifndef CONFIG_DM
   3.104 @@ -6400,50 +6487,41 @@ int main(int argc, char **argv)
   3.105          shared_page_nr = nr_pages - 1;
   3.106  #endif
   3.107  
   3.108 -    page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t));
   3.109 -    if (page_array == NULL) {
   3.110 -        fprintf(logfile, "malloc returned error %d\n", errno);
   3.111 -        exit(-1);
   3.112 -    }
   3.113 -
   3.114  #if defined(__i386__) || defined(__x86_64__)
   3.115 -    for ( i = 0; i < tmp_nr_pages; i++)
   3.116 -        page_array[i] = i;
   3.117 -
   3.118 -    phys_ram_base = xc_map_foreign_batch(xc_handle, domid,
   3.119 -                                         PROT_READ|PROT_WRITE, page_array,
   3.120 -                                         tmp_nr_pages);
   3.121 -    if (phys_ram_base == NULL) {
   3.122 -        fprintf(logfile, "batch map guest memory returned error %d\n", errno);
   3.123 +
   3.124 +    if ( qemu_map_cache_init(tmp_nr_pages) )
   3.125 +    {
   3.126 +        fprintf(logfile, "qemu_map_cache_init returned: error %d\n", errno);
   3.127          exit(-1);
   3.128      }
   3.129  
   3.130      shared_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
   3.131 -                                       PROT_READ|PROT_WRITE,
   3.132 -                                       page_array[shared_page_nr]);
   3.133 +                                       PROT_READ|PROT_WRITE, shared_page_nr);
   3.134      if (shared_page == NULL) {
   3.135          fprintf(logfile, "map shared IO page returned error %d\n", errno);
   3.136          exit(-1);
   3.137      }
   3.138  
   3.139 -    fprintf(logfile, "shared page at pfn:%lx, mfn: %"PRIx64"\n",
   3.140 -            shared_page_nr, (uint64_t)(page_array[shared_page_nr]));
   3.141 +    fprintf(logfile, "shared page at pfn:%lx\n", shared_page_nr);
   3.142  
   3.143      buffered_io_page = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
   3.144                                              PROT_READ|PROT_WRITE,
   3.145 -                                            page_array[shared_page_nr - 2]);
   3.146 +                                            shared_page_nr - 2);
   3.147      if (buffered_io_page == NULL) {
   3.148          fprintf(logfile, "map buffered IO page returned error %d\n", errno);
   3.149          exit(-1);
   3.150      }
   3.151  
   3.152 -    fprintf(logfile, "buffered io page at pfn:%lx, mfn: %"PRIx64"\n",
   3.153 -            shared_page_nr - 2, (uint64_t)(page_array[shared_page_nr - 2]));
   3.154 -
   3.155 -    free(page_array);
   3.156 +    fprintf(logfile, "buffered io page at pfn:%lx\n", shared_page_nr - 2);
   3.157  
   3.158  #elif defined(__ia64__)
   3.159 -  
   3.160 +
   3.161 +    page_array = (xen_pfn_t *)malloc(tmp_nr_pages * sizeof(xen_pfn_t));
   3.162 +    if (page_array == NULL) {
   3.163 +        fprintf(logfile, "malloc returned error %d\n", errno);
   3.164 +        exit(-1);
   3.165 +    }
   3.166 +
   3.167      if (xc_ia64_get_pfn_list(xc_handle, domid, page_array,
   3.168                               IO_PAGE_START >> PAGE_SHIFT, 3) != 3) {
   3.169          fprintf(logfile, "xc_ia64_get_pfn_list returned error %d\n", errno);
     4.1 --- a/tools/ioemu/vl.h	Thu Dec 07 10:54:43 2006 +0000
     4.2 +++ b/tools/ioemu/vl.h	Thu Dec 07 11:12:52 2006 +0000
     4.3 @@ -156,6 +156,26 @@ extern void *shared_vram;
     4.4  
     4.5  extern FILE *logfile;
     4.6  
     4.7 +
     4.8 +#if defined(__i386__) || defined(__x86_64__)
     4.9 +#if defined(__i386__) 
    4.10 +#define MAX_MCACHE_SIZE    0x40000000 /* 1GB max for x86 */
    4.11 +#define MCACHE_BUCKET_SHIFT 16
    4.12 +#elif defined(__x86_64__)
    4.13 +#define MAX_MCACHE_SIZE    0x1000000000 /* 64GB max for x86_64 */
    4.14 +#define MCACHE_BUCKET_SHIFT 20
    4.15 +#endif
    4.16 +
    4.17 +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
    4.18 +
    4.19 +struct map_cache {
    4.20 +    unsigned long paddr_index;
    4.21 +    uint8_t      *vaddr_base;
    4.22 +};
    4.23 +
    4.24 +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr);
    4.25 +#endif
    4.26 +
    4.27  extern int xc_handle;
    4.28  extern int domid;
    4.29