ia64/xen-unstable

changeset 14114:59b8d5168cc1

Reduce impact of saving/restoring/dumping large domains on Dom0 memory
usage by means of fadvise64() to tell the OS to discard the cache
pages used for the save/dump file.

Signed-off-by: Simon Graham <Simon.Graham@stratus.com>
author Keir Fraser <keir@xensource.com>
date Sat Feb 24 14:48:17 2007 +0000 (2007-02-24)
parents 0147ef7c3cd7
children c051ed67258a
files tools/libxc/Makefile tools/libxc/xc_core.c tools/libxc/xc_linux.c tools/libxc/xc_linux_restore.c tools/libxc/xc_linux_save.c tools/libxc/xc_private.h tools/libxc/xc_solaris.c
line diff
     1.1 --- a/tools/libxc/Makefile	Sat Feb 24 14:33:34 2007 +0000
     1.2 +++ b/tools/libxc/Makefile	Sat Feb 24 14:48:17 2007 +0000
     1.3 @@ -59,6 +59,9 @@ CFLAGS   += -Werror -Wmissing-prototypes
     1.4  CFLAGS   += -fno-strict-aliasing
     1.5  CFLAGS   += $(INCLUDES) -I.
     1.6  
     1.7 +# Needed for posix_fadvise64() in xc_linux.c
     1.8 +CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE
     1.9 +
    1.10  # Define this to make it possible to run valgrind on code linked with these
    1.11  # libraries.
    1.12  #CFLAGS   += -DVALGRIND -O0 -ggdb3
     2.1 --- a/tools/libxc/xc_core.c	Sat Feb 24 14:33:34 2007 +0000
     2.2 +++ b/tools/libxc/xc_core.c	Sat Feb 24 14:48:17 2007 +0000
     2.3 @@ -802,6 +802,12 @@ static int local_file_dump(void *args, c
     2.4          }
     2.5      }
     2.6  
     2.7 +    if (length >= DUMP_INCREMENT*PAGE_SIZE) {
     2.8 +        // Now dumping pages -- make sure we discard clean pages from
     2.9 +        // the cache after each write
    2.10 +        discard_file_cache(da->fd, 0 /* no flush */);
    2.11 +    }
    2.12 +
    2.13      return 0;
    2.14  }
    2.15  
    2.16 @@ -822,6 +828,9 @@ xc_domain_dumpcore(int xc_handle,
    2.17      sts = xc_domain_dumpcore_via_callback(
    2.18          xc_handle, domid, &da, &local_file_dump);
    2.19  
    2.20 +    /* flush and discard any remaining portion of the file from cache */
    2.21 +    discard_file_cache(da.fd, 1/* flush first*/);
    2.22 +
    2.23      close(da.fd);
    2.24  
    2.25      return sts;
     3.1 --- a/tools/libxc/xc_linux.c	Sat Feb 24 14:33:34 2007 +0000
     3.2 +++ b/tools/libxc/xc_linux.c	Sat Feb 24 14:48:17 2007 +0000
     3.3 @@ -328,6 +328,39 @@ int xc_evtchn_unmask(int xce_handle, evt
     3.4      return dorw(xce_handle, (char *)&port, sizeof(port), 1);
     3.5  }
     3.6  
     3.7 +/* Optionally flush file to disk and discard page cache */
     3.8 +int discard_file_cache(int fd, int flush) 
     3.9 +{
    3.10 +    off_t cur = 0;
    3.11 +
    3.12 +    if ( flush && (fsync(fd) < 0) )
    3.13 +    {
    3.14 +        PERROR("Failed to flush file: %s", strerror(errno));
    3.15 +        return -errno;
    3.16 +    }
    3.17 +
    3.18 +    /* 
    3.19 +     * Calculate last page boundary of amount written so far 
    3.20 +     * unless we are flushing in which case entire cache
    3.21 +     * is discarded.
    3.22 +     */
    3.23 +    if ( !flush )
    3.24 +    {
    3.25 +        if ( (cur = lseek(fd, 0, SEEK_CUR)) == (off_t)-1 )
    3.26 +            cur = 0;
    3.27 +        cur &= ~(PAGE_SIZE-1);
    3.28 +    }
    3.29 +
    3.30 +    /* Discard from the buffer cache. */
    3.31 +    if ( posix_fadvise64(fd, 0, cur, POSIX_FADV_DONTNEED) < 0 )
    3.32 +    {
    3.33 +        PERROR("Failed to discard cache: %s", strerror(errno));
    3.34 +        return -errno;
    3.35 +    }
    3.36 +
    3.37 +    return 0;
    3.38 +}
    3.39 +
    3.40  /*
    3.41   * Local variables:
    3.42   * mode: C
     4.1 --- a/tools/libxc/xc_linux_restore.c	Sat Feb 24 14:33:34 2007 +0000
     4.2 +++ b/tools/libxc/xc_linux_restore.c	Sat Feb 24 14:48:17 2007 +0000
     4.3 @@ -144,7 +144,7 @@ int xc_linux_restore(int xc_handle, int 
     4.4                       unsigned int console_evtchn, unsigned long *console_mfn)
     4.5  {
     4.6      DECLARE_DOMCTL;
     4.7 -    int rc = 1, i, n, pae_extended_cr3 = 0;
     4.8 +    int rc = 1, i, n, m, pae_extended_cr3 = 0;
     4.9      unsigned long mfn, pfn;
    4.10      unsigned int prev_pc, this_pc;
    4.11      int verify = 0;
    4.12 @@ -331,7 +331,7 @@ int xc_linux_restore(int xc_handle, int 
    4.13       */
    4.14      prev_pc = 0;
    4.15  
    4.16 -    n = 0;
    4.17 +    n = m = 0;
    4.18      while (1) {
    4.19  
    4.20          int j, nr_mfns = 0; 
    4.21 @@ -530,6 +530,17 @@ int xc_linux_restore(int xc_handle, int 
    4.22  
    4.23          munmap(region_base, j*PAGE_SIZE);
    4.24          n+= j; /* crude stats */
    4.25 +
    4.26 +        /* 
    4.27 +         * Discard cache for portion of file read so far up to last
    4.28 +         *  page boundary every 16MB or so.
    4.29 +         */
    4.30 +        m += j;
    4.31 +        if ( m > MAX_PAGECACHE_USAGE )
    4.32 +        {
    4.33 +            discard_file_cache(io_fd, 0 /* no flush */);
    4.34 +            m = 0;
    4.35 +        }
    4.36      }
    4.37  
    4.38      /*
    4.39 @@ -864,6 +875,9 @@ int xc_linux_restore(int xc_handle, int 
    4.40      free(p2m);
    4.41      free(pfn_type);
    4.42  
    4.43 +    /* discard cache for save file  */
    4.44 +    discard_file_cache(io_fd, 1 /*flush*/);
    4.45 +
    4.46      DPRINTF("Restore exit with rc=%d\n", rc);
    4.47      
    4.48      return rc;
     5.1 --- a/tools/libxc/xc_linux_save.c	Sat Feb 24 14:33:34 2007 +0000
     5.2 +++ b/tools/libxc/xc_linux_save.c	Sat Feb 24 14:48:17 2007 +0000
     5.3 @@ -172,6 +172,28 @@ static uint64_t tv_delta(struct timeval 
     5.4          (new->tv_usec - old->tv_usec);
     5.5  }
     5.6  
     5.7 +static int noncached_write(int fd, int live, void *buffer, int len) 
     5.8 +{
     5.9 +    static int write_count = 0;
    5.10 +
    5.11 +    int rc = write(fd,buffer,len);
    5.12 +
    5.13 +    if (!live) {
    5.14 +        write_count += len;
    5.15 +
    5.16 +        if (write_count >= MAX_PAGECACHE_USAGE*PAGE_SIZE) {
    5.17 +            int serrno = errno;
    5.18 +
    5.19 +            /* Time to discard cache - dont care if this fails */
    5.20 +            discard_file_cache(fd, 0 /* no flush */);
    5.21 +
    5.22 +            write_count = 0;
    5.23 +
    5.24 +            errno = serrno;
    5.25 +        }
    5.26 +    }
    5.27 +    return rc;
    5.28 +}
    5.29  
    5.30  #ifdef ADAPTIVE_SAVE
    5.31  
    5.32 @@ -205,7 +227,7 @@ static inline void initialize_mbit_rate(
    5.33  }
    5.34  
    5.35  
    5.36 -static int ratewrite(int io_fd, void *buf, int n)
    5.37 +static int ratewrite(int io_fd, int live, void *buf, int n)
    5.38  {
    5.39      static int budget = 0;
    5.40      static int burst_time_us = -1;
    5.41 @@ -215,7 +237,7 @@ static int ratewrite(int io_fd, void *bu
    5.42      long long delta;
    5.43  
    5.44      if (START_MBIT_RATE == 0)
    5.45 -        return write(io_fd, buf, n);
    5.46 +        return noncached_write(io_fd, live, buf, n);
    5.47  
    5.48      budget -= n;
    5.49      if (budget < 0) {
    5.50 @@ -251,13 +273,13 @@ static int ratewrite(int io_fd, void *bu
    5.51              }
    5.52          }
    5.53      }
    5.54 -    return write(io_fd, buf, n);
    5.55 +    return noncached_write(io_fd, live, buf, n);
    5.56  }
    5.57  
    5.58  #else /* ! ADAPTIVE SAVE */
    5.59  
    5.60  #define RATE_IS_MAX() (0)
    5.61 -#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n))
    5.62 +#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n))
    5.63  #define initialize_mbit_rate()
    5.64  
    5.65  #endif
    5.66 @@ -1082,7 +1104,7 @@ int xc_linux_save(int xc_handle, int io_
    5.67                      if(race && !live) 
    5.68                          goto out; 
    5.69  
    5.70 -                    if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) {
    5.71 +                    if (ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE) {
    5.72                          ERROR("Error when writing to state file (4)"
    5.73                                " (errno %d)", errno);
    5.74                          goto out;
    5.75 @@ -1091,7 +1113,7 @@ int xc_linux_save(int xc_handle, int io_
    5.76                  }  else {
    5.77  
    5.78                      /* We have a normal page: just write it directly. */
    5.79 -                    if (ratewrite(io_fd, spage, PAGE_SIZE) != PAGE_SIZE) {
    5.80 +                    if (ratewrite(io_fd, live, spage, PAGE_SIZE) != PAGE_SIZE) {
    5.81                          ERROR("Error when writing to state file (5)"
    5.82                                " (errno %d)", errno);
    5.83                          goto out;
    5.84 @@ -1261,6 +1283,10 @@ int xc_linux_save(int xc_handle, int io_
    5.85              DPRINTF("Warning - couldn't disable shadow mode");
    5.86          }
    5.87      }
    5.88 +    else {
    5.89 +        // flush last write and discard cache for file
    5.90 +        discard_file_cache(io_fd, 1 /* flush */);
    5.91 +    }            
    5.92  
    5.93      if (live_shinfo)
    5.94          munmap(live_shinfo, PAGE_SIZE);
     6.1 --- a/tools/libxc/xc_private.h	Sat Feb 24 14:33:34 2007 +0000
     6.2 +++ b/tools/libxc/xc_private.h	Sat Feb 24 14:48:17 2007 +0000
     6.3 @@ -41,6 +41,13 @@
     6.4  #define INFO     1
     6.5  #define PROGRESS 0
     6.6  
     6.7 +/*
     6.8 +** Define max dirty page cache to permit during save/restore -- need to balance 
     6.9 +** keeping cache usage down with CPU impact of invalidating too often.
    6.10 +** (Currently 16MB)
    6.11 +*/
    6.12 +#define MAX_PAGECACHE_USAGE (4*1024)
    6.13 +
    6.14  #if INFO
    6.15  #define IPRINTF(_f, _a...) printf(_f , ## _a)
    6.16  #else
    6.17 @@ -158,4 +165,7 @@ int xc_waitdomain_core(int xc_handle, in
    6.18  void bitmap_64_to_byte(uint8_t *bp, const uint64_t *lp, int nbits);
    6.19  void bitmap_byte_to_64(uint64_t *lp, const uint8_t *bp, int nbits);
    6.20  
    6.21 +/* Optionally flush file to disk and discard page cache */
    6.22 +int discard_file_cache(int fd, int flush);
    6.23 +
    6.24  #endif /* __XC_PRIVATE_H__ */
     7.1 --- a/tools/libxc/xc_solaris.c	Sat Feb 24 14:33:34 2007 +0000
     7.2 +++ b/tools/libxc/xc_solaris.c	Sat Feb 24 14:48:17 2007 +0000
     7.3 @@ -242,3 +242,10 @@ int xc_evtchn_unmask(int xce_handle, evt
     7.4  {
     7.5      return dorw(xce_handle, (char *)&port, sizeof(port), 1);
     7.6  }
     7.7 +
     7.8 +/* Optionally flush file to disk and discard page cache */
     7.9 +int discard_file_cache(int fd, int flush) 
    7.10 +{
    7.11 +    // TODO: Implement for Solaris!
    7.12 +    return 0;
    7.13 +}