ia64/xen-unstable

changeset 16190:d105b8c30b7d

merge with xen-unstable.hg (staging)
author Alex Williamson <alex.williamson@hp.com>
date Mon Oct 22 13:57:08 2007 -0600 (2007-10-22)
parents 62a7a2f4d9c7 24c2745f939f
children f9772037c2e7
files
line diff
     1.1 --- a/tools/ioemu/hw/ide.c	Mon Oct 22 12:30:17 2007 -0600
     1.2 +++ b/tools/ioemu/hw/ide.c	Mon Oct 22 13:57:08 2007 -0600
     1.3 @@ -431,16 +431,21 @@ buffered_pio_init(void)
     1.4  }
     1.5  
     1.6  static inline void
     1.7 +__buffered_pio_flush(struct pio_buffer *piobuf, IDEState *s, uint32_t pointer)
     1.8 +{
     1.9 +    uint8_t *buf = (uint8_t *)buffered_pio_page + piobuf->page_offset;
    1.10 +    memcpy(s->data_ptr, buf, pointer);
    1.11 +    s->data_ptr += pointer;
    1.12 +}
    1.13 +
    1.14 +static inline void
    1.15  buffered_pio_flush(struct pio_buffer *piobuf)
    1.16  {
    1.17      IDEState *s = piobuf->opaque;
    1.18      uint32_t pointer = piobuf->pointer;
    1.19  
    1.20 -    if (s != NULL && pointer > 0) {
    1.21 -        uint8_t *buf = (uint8_t *)buffered_pio_page + piobuf->page_offset;
    1.22 -        memcpy(s->data_ptr, buf, pointer);
    1.23 -        s->data_ptr += pointer;
    1.24 -    }
    1.25 +    if (s != NULL && pointer > 0)
    1.26 +        __buffered_pio_flush(piobuf, s, pointer);
    1.27  }
    1.28  
    1.29  static inline void
    1.30 @@ -502,6 +507,54 @@ buffered_pio_read(IDEState *s, uint32_t 
    1.31      piobuf->opaque = NULL;
    1.32  }
    1.33  
    1.34 +/*
    1.35 + * buffered pio reads are undone. It results in normal pio when the domain
    1.36 + * is restored.
    1.37 + * buffered pio writes are handled before saving domain.
    1.38 + * However currently pci_ide_save/load() just discards a pending transfer. XXX
    1.39 + */
    1.40 +static void
    1.41 +__handle_buffered_pio(struct pio_buffer *piobuf)
    1.42 +{
    1.43 +    IDEState *s = piobuf->opaque;
    1.44 +    uint32_t pointer = piobuf->pointer;
    1.45 +
    1.46 +    
    1.47 +    if (pointer == 0)
    1.48 +        return;/* no buffered pio */
    1.49 +
    1.50 +    if (s != NULL) {
    1.51 +        /* written data are pending in pio_buffer. process it */
    1.52 +        __buffered_pio_flush(piobuf, s, pointer);
    1.53 +    } else {
    1.54 +        /* data are buffered for pio read in pio_buffer.
    1.55 +         * undone buffering by buffered_pio_read()
    1.56 +         */
    1.57 +        if (pointer > s->data_ptr - s->io_buffer)
    1.58 +            pointer = s->data_ptr - s->io_buffer;
    1.59 +        s->data_ptr -= pointer;
    1.60 +    }
    1.61 +	
    1.62 +    piobuf->pointer = 0;
    1.63 +    piobuf->data_end = 0;
    1.64 +    piobuf->opaque = NULL;
    1.65 +}
    1.66 +
    1.67 +void
    1.68 +handle_buffered_pio(void)
    1.69 +{
    1.70 +    struct pio_buffer *p1, *p2;
    1.71 +
    1.72 +    if (!buffered_pio_page)
    1.73 +        return;
    1.74 +
    1.75 +    p1 = &buffered_pio_page->pio[PIO_BUFFER_IDE_PRIMARY];
    1.76 +    p2 = &buffered_pio_page->pio[PIO_BUFFER_IDE_SECONDARY];
    1.77 +
    1.78 +    __handle_buffered_pio(p1);
    1.79 +    __handle_buffered_pio(p2);
    1.80 +}
    1.81 +
    1.82  #else /* !__ia64__ */
    1.83  #define buffered_pio_init()         do {} while (0)
    1.84  #define buffered_pio_reset(I)       do {} while (0)
     2.1 --- a/tools/ioemu/hw/pass-through.c	Mon Oct 22 12:30:17 2007 -0600
     2.2 +++ b/tools/ioemu/hw/pass-through.c	Mon Oct 22 13:57:08 2007 -0600
     2.3 @@ -39,11 +39,10 @@ static int next_bdf(char **str, int *seg
     2.4  {
     2.5      char *token;
     2.6  
     2.7 -    token = strchr(*str, ',');
     2.8 -    if ( !token )
     2.9 +    if ( !(*str) || !strchr(*str, ',') )
    2.10          return 0;
    2.11 -    token++;
    2.12  
    2.13 +    token = *str;
    2.14      *seg  = token_value(token);
    2.15      token = strchr(token, ',') + 1;
    2.16      *bus  = token_value(token);
    2.17 @@ -51,8 +50,9 @@ static int next_bdf(char **str, int *seg
    2.18      *dev  = token_value(token);
    2.19      token = strchr(token, ',') + 1;
    2.20      *func  = token_value(token);
    2.21 +    token = strchr(token, ',');
    2.22 +    *str = token ? token + 1 : NULL;
    2.23  
    2.24 -    *str = token;
    2.25      return 1;
    2.26  }
    2.27  
     3.1 --- a/tools/ioemu/target-i386-dm/helper2.c	Mon Oct 22 12:30:17 2007 -0600
     3.2 +++ b/tools/ioemu/target-i386-dm/helper2.c	Mon Oct 22 13:57:08 2007 -0600
     3.3 @@ -635,6 +635,7 @@ int main_loop(void)
     3.4          fprintf(logfile, "device model saving state\n");
     3.5  
     3.6          /* Pull all outstanding ioreqs through the system */
     3.7 +        handle_buffered_pio();
     3.8          handle_buffered_io(env);
     3.9          main_loop_wait(1); /* For the select() on events */
    3.10  
     4.1 --- a/tools/ioemu/vl.h	Mon Oct 22 12:30:17 2007 -0600
     4.2 +++ b/tools/ioemu/vl.h	Mon Oct 22 13:57:08 2007 -0600
     4.3 @@ -1494,8 +1494,11 @@ static inline void xc_domain_shutdown_ho
     4.4  {
     4.5  	xc_ia64_save_to_nvram(xc_handle, domid);
     4.6  }
     4.7 +
     4.8 +void handle_buffered_pio(void);
     4.9  #else
    4.10  #define xc_domain_shutdown_hook(xc_handle, domid)	do {} while (0)
    4.11 +#define handle_buffered_pio()				do {} while (0)
    4.12  #endif
    4.13  
    4.14  #endif /* VL_H */
     5.1 --- a/tools/libxc/xc_dom.h	Mon Oct 22 12:30:17 2007 -0600
     5.2 +++ b/tools/libxc/xc_dom.h	Mon Oct 22 13:57:08 2007 -0600
     5.3 @@ -95,6 +95,7 @@ struct xc_dom_image {
     5.4  
     5.5      int guest_xc;
     5.6      domid_t guest_domid;
     5.7 +    int8_t vhpt_size_log2; /* for IA64 */
     5.8      int shadow_enabled;
     5.9  
    5.10      int xen_version;
     6.1 --- a/tools/libxc/xc_dom_ia64.c	Mon Oct 22 12:30:17 2007 -0600
     6.2 +++ b/tools/libxc/xc_dom_ia64.c	Mon Oct 22 13:57:08 2007 -0600
     6.3 @@ -260,7 +260,7 @@ int arch_setup_bootearly(struct xc_dom_i
     6.4      domctl.u.arch_setup.bp = (dom->start_info_pfn << PAGE_SHIFT)
     6.5          + sizeof(start_info_t);
     6.6      domctl.u.arch_setup.maxmem = dom->total_pages << PAGE_SHIFT;
     6.7 -    domctl.u.arch_setup.vhpt_size_log2 = dom->flags;
     6.8 +    domctl.u.arch_setup.vhpt_size_log2 = dom->vhpt_size_log2;
     6.9      rc = do_domctl(dom->guest_xc, &domctl);
    6.10      return rc;
    6.11  }
     7.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Mon Oct 22 12:30:17 2007 -0600
     7.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Mon Oct 22 13:57:08 2007 -0600
     7.3 @@ -413,6 +413,7 @@ static PyObject *pyxc_linux_build(XcObje
     7.4      char *image, *ramdisk = NULL, *cmdline = "", *features = NULL;
     7.5      int flags = 0;
     7.6      int store_evtchn, console_evtchn;
     7.7 +    int vhpt = 0;
     7.8      unsigned int mem_mb;
     7.9      unsigned long store_mfn = 0;
    7.10      unsigned long console_mfn = 0;
    7.11 @@ -425,20 +426,23 @@ static PyObject *pyxc_linux_build(XcObje
    7.12                                  "console_evtchn", "image",
    7.13                                  /* optional */
    7.14                                  "ramdisk", "cmdline", "flags",
    7.15 -                                "features", NULL };
    7.16 +                                "features", "vhpt", NULL };
    7.17  
    7.18 -    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiis|ssis", kwd_list,
    7.19 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiis|ssisi", kwd_list,
    7.20                                        &domid, &store_evtchn, &mem_mb,
    7.21                                        &console_evtchn, &image,
    7.22                                        /* optional */
    7.23                                        &ramdisk, &cmdline, &flags,
    7.24 -                                      &features) )
    7.25 +                                      &features, &vhpt) )
    7.26          return NULL;
    7.27  
    7.28      xc_dom_loginit();
    7.29      if (!(dom = xc_dom_allocate(cmdline, features)))
    7.30          return pyxc_error_to_exception();
    7.31  
    7.32 +    /* for IA64 */
    7.33 +    dom->vhpt_size_log2 = vhpt;
    7.34 +
    7.35      if ( xc_dom_linux_build(self->xc_handle, dom, domid, mem_mb, image,
    7.36  			    ramdisk, flags, store_evtchn, &store_mfn,
    7.37  			    console_evtchn, &console_mfn) != 0 ) {
    7.38 @@ -539,11 +543,10 @@ static int next_bdf(char **str, int *seg
    7.39  {
    7.40      char *token;
    7.41  
    7.42 -    token = strchr(*str, ',');
    7.43 -    if ( !token )
    7.44 +    if ( !(*str) || !strchr(*str, ',') )
    7.45          return 0;
    7.46 -    token++;
    7.47  
    7.48 +    token = *str;
    7.49      *seg  = token_value(token);
    7.50      token = strchr(token, ',') + 1;
    7.51      *bus  = token_value(token);
    7.52 @@ -551,8 +554,9 @@ static int next_bdf(char **str, int *seg
    7.53      *dev  = token_value(token);
    7.54      token = strchr(token, ',') + 1;
    7.55      *func  = token_value(token);
    7.56 +    token = strchr(token, ',');
    7.57 +    *str = token ? token + 1 : NULL;
    7.58  
    7.59 -    *str = token;
    7.60      return 1;
    7.61  }
    7.62  
     8.1 --- a/tools/python/xen/xend/XendConstants.py	Mon Oct 22 12:30:17 2007 -0600
     8.2 +++ b/tools/python/xen/xend/XendConstants.py	Mon Oct 22 13:57:08 2007 -0600
     8.3 @@ -45,6 +45,7 @@ HVM_PARAM_IOREQ_PFN    = 5
     8.4  HVM_PARAM_BUFIOREQ_PFN = 6
     8.5  HVM_PARAM_NVRAM_FD     = 7
     8.6  HVM_PARAM_VHPT_SIZE    = 8
     8.7 +HVM_PARAM_BUFPIOREQ_PFN = 9
     8.8  
     8.9  restart_modes = [
    8.10      "restart",
     9.1 --- a/tools/python/xen/xend/image.py	Mon Oct 22 12:30:17 2007 -0600
     9.2 +++ b/tools/python/xen/xend/image.py	Mon Oct 22 13:57:08 2007 -0600
     9.3 @@ -198,6 +198,7 @@ class LinuxImageHandler(ImageHandler):
     9.4  
     9.5      ostype = "linux"
     9.6      flags = 0
     9.7 +    vhpt = 0
     9.8  
     9.9      def buildDomain(self):
    9.10          store_evtchn = self.vm.getStorePort()
    9.11 @@ -225,7 +226,8 @@ class LinuxImageHandler(ImageHandler):
    9.12                                cmdline        = self.cmdline,
    9.13                                ramdisk        = self.ramdisk,
    9.14                                features       = self.vm.getFeatures(),
    9.15 -                              flags          = self.flags)
    9.16 +                              flags          = self.flags,
    9.17 +                              vhpt           = self.vhpt)
    9.18  
    9.19  class PPC_LinuxImageHandler(LinuxImageHandler):
    9.20  
    9.21 @@ -533,9 +535,6 @@ class IA64_Linux_ImageHandler(LinuxImage
    9.22          LinuxImageHandler.configure(self, vmConfig)
    9.23          self.vhpt = int(vmConfig['platform'].get('vhpt',  0))
    9.24  
    9.25 -    def buildDomain(self):
    9.26 -        self.flags = self.vhpt
    9.27 -        return LinuxImageHandler.buildDomain(self)
    9.28  
    9.29  class X86_HVM_ImageHandler(HVMImageHandler):
    9.30  
    10.1 --- a/tools/xenstore/xs_lib.c	Mon Oct 22 12:30:17 2007 -0600
    10.2 +++ b/tools/xenstore/xs_lib.c	Mon Oct 22 13:57:08 2007 -0600
    10.3 @@ -44,7 +44,7 @@ static const char *xs_daemon_path(void)
    10.4  	char *s = getenv("XENSTORED_PATH");
    10.5  	if (s)
    10.6  		return s;
    10.7 -	if (snprintf(buf, PATH_MAX, "%s/socket",
    10.8 +	if (snprintf(buf, sizeof(buf), "%s/socket",
    10.9  		     xs_daemon_rundir()) >= PATH_MAX)
   10.10  		return NULL;
   10.11  	return buf;
   10.12 @@ -68,7 +68,7 @@ const char *xs_daemon_socket_ro(void)
   10.13  	const char *s = xs_daemon_path();
   10.14  	if (s == NULL)
   10.15  		return NULL;
   10.16 -	if (snprintf(buf, PATH_MAX, "%s_ro", s) >= PATH_MAX)
   10.17 +	if (snprintf(buf, sizeof(buf), "%s_ro", s) >= PATH_MAX)
   10.18  		return NULL;
   10.19  	return buf;
   10.20  }
   10.21 @@ -79,8 +79,10 @@ const char *xs_domain_dev(void)
   10.22  	if (s)
   10.23  		return s;
   10.24  
   10.25 -#ifdef __linux__
   10.26 +#if defined(__linux__)
   10.27  	return "/proc/xen/xenbus";
   10.28 +#elif defined(__NetBSD__)
   10.29 +	return "/kern/xen/xenbus";
   10.30  #else
   10.31  	return "/dev/xen/xenbus";
   10.32  #endif
    11.1 --- a/xen/arch/x86/boot/trampoline.S	Mon Oct 22 12:30:17 2007 -0600
    11.2 +++ b/xen/arch/x86/boot/trampoline.S	Mon Oct 22 13:57:08 2007 -0600
    11.3 @@ -22,13 +22,22 @@ trampoline_realmode_entry:
    11.4  idt_48: .word   0, 0, 0 # base = limit = 0
    11.5  gdt_48: .word   6*8-1
    11.6          .long   bootsym_phys(trampoline_gdt)
    11.7 +        .align  8
    11.8  trampoline_gdt:
    11.9 -        .quad   0x0000000000000000     /* 0x0000: unused */
   11.10 -        .quad   0x00cf9a000000ffff     /* 0x0008: ring 0 code, 32-bit mode */
   11.11 -        .quad   0x00af9a000000ffff     /* 0x0010: ring 0 code, 64-bit mode */
   11.12 -        .quad   0x00cf92000000ffff     /* 0x0018: ring 0 data */
   11.13 -        .quad   0x00009a090000ffff     /* 0x0020: real-mode code @ 0x90000 */
   11.14 -        .quad   0x000092090000ffff     /* 0x0028: real-mode data @ 0x90000 */
   11.15 +        /* 0x0000: unused */
   11.16 +        .quad   0x0000000000000000
   11.17 +        /* 0x0008: ring 0 code, 32-bit mode */
   11.18 +        .quad   0x00cf9a000000ffff
   11.19 +        /* 0x0010: ring 0 code, 64-bit mode */
   11.20 +        .quad   0x00af9a000000ffff
   11.21 +        /* 0x0018: ring 0 data */
   11.22 +        .quad   0x00cf92000000ffff
   11.23 +        /* 0x0020: real-mode code @ BOOT_TRAMPOLINE */
   11.24 +        .long   0x0000ffff | ((BOOT_TRAMPOLINE & 0x00ffff) << 16)
   11.25 +        .long   0x00009a00 | ((BOOT_TRAMPOLINE & 0xff0000) >> 16)
   11.26 +        /* 0x0028: real-mode data @ BOOT_TRAMPOLINE */
   11.27 +        .long   0x0000ffff | ((BOOT_TRAMPOLINE & 0x00ffff) << 16)
   11.28 +        .long   0x00009200 | ((BOOT_TRAMPOLINE & 0xff0000) >> 16)
   11.29  
   11.30  cpuid_ext_features:
   11.31          .long   0
   11.32 @@ -142,7 +151,7 @@ 1:      mov     $(BOOT_TRAMPOLINE>>4),%a
   11.33          mov     %ax,%es
   11.34          mov     %ax,%ss
   11.35  
   11.36 -        /* Stack grows down from 0x93000. Initialise IDT and enable irqs. */
   11.37 +        /* Stack grows down from +0x3000. Initialise IDT and enable irqs. */
   11.38          mov     $0x3000,%sp
   11.39          lidt    bootsym(rm_idt)
   11.40          sti
    12.1 --- a/xen/arch/x86/cpu/common.c	Mon Oct 22 12:30:17 2007 -0600
    12.2 +++ b/xen/arch/x86/cpu/common.c	Mon Oct 22 13:57:08 2007 -0600
    12.3 @@ -23,6 +23,12 @@ static int disable_x86_serial_nr __devin
    12.4  
    12.5  struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
    12.6  
    12.7 +/*
    12.8 + * Default host IA32_CR_PAT value to cover all memory types.
    12.9 + * BIOS usually sets it to 0x07040600070406.
   12.10 + */
   12.11 +u64 host_pat = 0x050100070406;
   12.12 +
   12.13  static void default_init(struct cpuinfo_x86 * c)
   12.14  {
   12.15  	/* Not much we can do here... */
   12.16 @@ -557,6 +563,9 @@ void __devinit cpu_init(void)
   12.17  	}
   12.18  	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
   12.19  
   12.20 +	if (cpu_has_pat)
   12.21 +		wrmsrl(MSR_IA32_CR_PAT, host_pat);
   12.22 +
   12.23  	*(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
   12.24  	*(unsigned long  *)(&gdt_load[2]) = GDT_VIRT_START(current);
   12.25  	asm volatile ( "lgdt %0" : "=m" (gdt_load) );
    13.1 --- a/xen/arch/x86/cpu/mtrr/generic.c	Mon Oct 22 12:30:17 2007 -0600
    13.2 +++ b/xen/arch/x86/cpu/mtrr/generic.c	Mon Oct 22 13:57:08 2007 -0600
    13.3 @@ -11,14 +11,6 @@
    13.4  #include <asm/cpufeature.h>
    13.5  #include "mtrr.h"
    13.6  
    13.7 -struct mtrr_state {
    13.8 -	struct mtrr_var_range *var_ranges;
    13.9 -	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
   13.10 -	unsigned char enabled;
   13.11 -	unsigned char have_fixed;
   13.12 -	mtrr_type def_type;
   13.13 -};
   13.14 -
   13.15  struct fixed_range_block {
   13.16  	int base_msr; /* start address of an MTRR block */
   13.17  	int ranges;   /* number of MTRRs in this block  */
   13.18 @@ -32,7 +24,7 @@ static struct fixed_range_block fixed_ra
   13.19  };
   13.20  
   13.21  static unsigned long smp_changes_mask;
   13.22 -static struct mtrr_state mtrr_state = {};
   13.23 +struct mtrr_state mtrr_state = {};
   13.24  
   13.25  /*  Get the MSR pair relating to a var range  */
   13.26  static void
   13.27 @@ -88,6 +80,9 @@ void __init get_mtrr_state(void)
   13.28  	rdmsr(MTRRdefType_MSR, lo, dummy);
   13.29  	mtrr_state.def_type = (lo & 0xff);
   13.30  	mtrr_state.enabled = (lo & 0xc00) >> 10;
   13.31 +
   13.32 +	/* Store mtrr_cap for HVM MTRR virtualisation. */
   13.33 +	rdmsrl(MTRRcap_MSR, mtrr_state.mtrr_cap);
   13.34  }
   13.35  
   13.36  /*  Some BIOS's are fucked and don't set all MTRRs the same!  */
   13.37 @@ -107,6 +102,7 @@ void __init mtrr_state_warn(void)
   13.38  	printk(KERN_INFO "mtrr: corrected configuration.\n");
   13.39  }
   13.40  
   13.41 +extern bool_t is_var_mtrr_overlapped(struct mtrr_state *m);
   13.42  /* Doesn't attempt to pass an error out to MTRR users
   13.43     because it's quite complicated in some cases and probably not
   13.44     worth it because the best error handling is to ignore it. */
   13.45 @@ -116,6 +112,8 @@ void mtrr_wrmsr(unsigned msr, unsigned a
   13.46  		printk(KERN_ERR
   13.47  			"MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
   13.48  			smp_processor_id(), msr, a, b);
   13.49 +	/* Cache overlap status for efficient HVM MTRR virtualisation. */
   13.50 +	mtrr_state.overlapped = is_var_mtrr_overlapped(&mtrr_state);
   13.51  }
   13.52  
   13.53  /**
    14.1 --- a/xen/arch/x86/cpu/mtrr/main.c	Mon Oct 22 12:30:17 2007 -0600
    14.2 +++ b/xen/arch/x86/cpu/mtrr/main.c	Mon Oct 22 13:57:08 2007 -0600
    14.3 @@ -588,6 +588,8 @@ struct mtrr_value {
    14.4  	unsigned long	lsize;
    14.5  };
    14.6  
    14.7 +extern void global_init_mtrr_pat(void);
    14.8 +
    14.9  /**
   14.10   * mtrr_bp_init - initialize mtrrs on the boot CPU
   14.11   *
   14.12 @@ -654,8 +656,11 @@ void __init mtrr_bp_init(void)
   14.13  	if (mtrr_if) {
   14.14  		set_num_var_ranges();
   14.15  		init_table();
   14.16 -		if (use_intel())
   14.17 +		if (use_intel()) {
   14.18  			get_mtrr_state();
   14.19 +			/* initialize some global data for MTRR/PAT virutalization */
   14.20 +			global_init_mtrr_pat();
   14.21 +		}
   14.22  	}
   14.23  }
   14.24  
    15.1 --- a/xen/arch/x86/cpu/mtrr/mtrr.h	Mon Oct 22 12:30:17 2007 -0600
    15.2 +++ b/xen/arch/x86/cpu/mtrr/mtrr.h	Mon Oct 22 13:57:08 2007 -0600
    15.3 @@ -13,7 +13,6 @@
    15.4  #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
    15.5  #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
    15.6  
    15.7 -#define NUM_FIXED_RANGES 88
    15.8  #define MTRRfix64K_00000_MSR 0x250
    15.9  #define MTRRfix16K_80000_MSR 0x258
   15.10  #define MTRRfix16K_A0000_MSR 0x259
   15.11 @@ -30,9 +29,6 @@
   15.12  #define MTRR_CHANGE_MASK_VARIABLE  0x02
   15.13  #define MTRR_CHANGE_MASK_DEFTYPE   0x04
   15.14  
   15.15 -/* In the Intel processor's MTRR interface, the MTRR type is always held in
   15.16 -   an 8 bit field: */
   15.17 -typedef u8 mtrr_type;
   15.18  
   15.19  struct mtrr_ops {
   15.20  	u32	vendor;
   15.21 @@ -69,13 +65,6 @@ struct set_mtrr_context {
   15.22  	u32 ccr3;
   15.23  };
   15.24  
   15.25 -struct mtrr_var_range {
   15.26 -	u32 base_lo;
   15.27 -	u32 base_hi;
   15.28 -	u32 mask_lo;
   15.29 -	u32 mask_hi;
   15.30 -};
   15.31 -
   15.32  void set_mtrr_done(struct set_mtrr_context *ctxt);
   15.33  void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
   15.34  void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
    16.1 --- a/xen/arch/x86/hvm/Makefile	Mon Oct 22 12:30:17 2007 -0600
    16.2 +++ b/xen/arch/x86/hvm/Makefile	Mon Oct 22 13:57:08 2007 -0600
    16.3 @@ -7,6 +7,7 @@ obj-y += instrlen.o
    16.4  obj-y += intercept.o
    16.5  obj-y += io.o
    16.6  obj-y += irq.o
    16.7 +obj-y += mtrr.o
    16.8  obj-y += platform.o
    16.9  obj-y += pmtimer.o
   16.10  obj-y += rtc.o
    17.1 --- a/xen/arch/x86/hvm/hvm.c	Mon Oct 22 12:30:17 2007 -0600
    17.2 +++ b/xen/arch/x86/hvm/hvm.c	Mon Oct 22 13:57:08 2007 -0600
    17.3 @@ -226,6 +226,7 @@ int hvm_domain_initialise(struct domain 
    17.4  
    17.5      spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
    17.6      spin_lock_init(&d->arch.hvm_domain.irq_lock);
    17.7 +    spin_lock_init(&d->arch.hvm_domain.uc_lock);
    17.8  
    17.9      rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
   17.10      if ( rc != 0 )
   17.11 @@ -417,27 +418,22 @@ static int hvm_load_cpu_ctxt(struct doma
   17.12  HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
   17.13                            1, HVMSR_PER_VCPU);
   17.14  
   17.15 +extern int reset_vmsr(struct mtrr_state *m, u64 *p);
   17.16 +
   17.17  int hvm_vcpu_initialise(struct vcpu *v)
   17.18  {
   17.19      int rc;
   17.20  
   17.21      if ( (rc = vlapic_init(v)) != 0 )
   17.22 -        return rc;
   17.23 +        goto fail1;
   17.24  
   17.25      if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 )
   17.26 -    {
   17.27 -        vlapic_destroy(v);
   17.28 -        return rc;
   17.29 -    }
   17.30 +        goto fail2;
   17.31  
   17.32      /* Create ioreq event channel. */
   17.33      rc = alloc_unbound_xen_event_channel(v, 0);
   17.34      if ( rc < 0 )
   17.35 -    {
   17.36 -        hvm_funcs.vcpu_destroy(v);
   17.37 -        vlapic_destroy(v);
   17.38 -        return rc;
   17.39 -    }
   17.40 +        goto fail3;
   17.41  
   17.42      /* Register ioreq event channel. */
   17.43      v->arch.hvm_vcpu.xen_port = rc;
   17.44 @@ -449,6 +445,10 @@ int hvm_vcpu_initialise(struct vcpu *v)
   17.45      spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
   17.46      INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
   17.47  
   17.48 +    rc = reset_vmsr(&v->arch.hvm_vcpu.mtrr, &v->arch.hvm_vcpu.pat_cr);
   17.49 +    if ( rc != 0 )
   17.50 +        goto fail3;
   17.51 +
   17.52      v->arch.guest_context.user_regs.eflags = 2;
   17.53  
   17.54      if ( v->vcpu_id == 0 )
   17.55 @@ -468,6 +468,13 @@ int hvm_vcpu_initialise(struct vcpu *v)
   17.56      }
   17.57  
   17.58      return 0;
   17.59 +
   17.60 + fail3:
   17.61 +    hvm_funcs.vcpu_destroy(v);
   17.62 + fail2:
   17.63 +    vlapic_destroy(v);
   17.64 + fail1:
   17.65 +    return rc;
   17.66  }
   17.67  
   17.68  void hvm_vcpu_destroy(struct vcpu *v)
   17.69 @@ -606,6 +613,32 @@ int hvm_set_efer(uint64_t value)
   17.70      return 1;
   17.71  }
   17.72  
   17.73 +extern void shadow_blow_tables_per_domain(struct domain *d);
   17.74 +extern bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs);
   17.75 +
   17.76 +/* Exit UC mode only if all VCPUs agree on MTRR/PAT and are not in no_fill. */
   17.77 +static bool_t domain_exit_uc_mode(struct vcpu *v)
   17.78 +{
   17.79 +    struct domain *d = v->domain;
   17.80 +    struct vcpu *vs;
   17.81 +
   17.82 +    for_each_vcpu ( d, vs )
   17.83 +    {
   17.84 +        if ( (vs == v) || !vs->is_initialised )
   17.85 +            continue;
   17.86 +        if ( (vs->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) ||
   17.87 +             mtrr_pat_not_equal(vs, v) )
   17.88 +            return 0;
   17.89 +    }
   17.90 +
   17.91 +    return 1;
   17.92 +}
   17.93 +
   17.94 +static void local_flush_cache(void *info)
   17.95 +{
   17.96 +    wbinvd();
   17.97 +}
   17.98 +
   17.99  int hvm_set_cr0(unsigned long value)
  17.100  {
  17.101      struct vcpu *v = current;
  17.102 @@ -686,6 +719,41 @@ int hvm_set_cr0(unsigned long value)
  17.103          }
  17.104      }
  17.105  
  17.106 +    if ( !list_empty(&(domain_hvm_iommu(v->domain)->pdev_list)) )
  17.107 +    {
  17.108 +        if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
  17.109 +        {
  17.110 +            /* Entering no fill cache mode. */
  17.111 +            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
  17.112 +            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
  17.113 +
  17.114 +            if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
  17.115 +            {
  17.116 +                /* Flush physical caches. */
  17.117 +                on_each_cpu(local_flush_cache, NULL, 1, 1);
  17.118 +                /* Shadow pagetables must recognise UC mode. */
  17.119 +                v->domain->arch.hvm_domain.is_in_uc_mode = 1;
  17.120 +                shadow_blow_tables_per_domain(v->domain);
  17.121 +            }
  17.122 +            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
  17.123 +        }
  17.124 +        else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
  17.125 +                  (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
  17.126 +        {
  17.127 +            /* Exit from no fill cache mode. */
  17.128 +            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
  17.129 +            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
  17.130 +
  17.131 +            if ( domain_exit_uc_mode(v) )
  17.132 +            {
  17.133 +                /* Shadow pagetables must recognise normal caching mode. */
  17.134 +                v->domain->arch.hvm_domain.is_in_uc_mode = 0;
  17.135 +                shadow_blow_tables_per_domain(v->domain);
  17.136 +            }
  17.137 +            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
  17.138 +        }
  17.139 +    }
  17.140 +
  17.141      v->arch.hvm_vcpu.guest_cr[0] = value;
  17.142      hvm_update_guest_cr(v, 0);
  17.143  
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/xen/arch/x86/hvm/mtrr.c	Mon Oct 22 13:57:08 2007 -0600
    18.3 @@ -0,0 +1,687 @@
    18.4 +/*
    18.5 + * mtrr.c: MTRR/PAT virtualization
    18.6 + *
    18.7 + * Copyright (c) 2007, Intel Corporation.
    18.8 + *
    18.9 + * This program is free software; you can redistribute it and/or modify it
   18.10 + * under the terms and conditions of the GNU General Public License,
   18.11 + * version 2, as published by the Free Software Foundation.
   18.12 + *
   18.13 + * This program is distributed in the hope it will be useful, but WITHOUT
   18.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   18.15 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   18.16 + * more details.
   18.17 + *
   18.18 + * You should have received a copy of the GNU General Public License along with
   18.19 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   18.20 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   18.21 + */
   18.22 +
   18.23 +#include <public/hvm/e820.h>
   18.24 +#include <xen/types.h>
   18.25 +#include <asm/e820.h>
   18.26 +#include <asm/paging.h>
   18.27 +#include <asm/p2m.h>
   18.28 +#include <xen/domain_page.h>
   18.29 +#include <stdbool.h>
   18.30 +#include <asm/mtrr.h>
   18.31 +#include <asm/hvm/support.h>
   18.32 +
   18.33 +/* Xen holds the native MTRR MSRs */
   18.34 +extern struct mtrr_state mtrr_state;
   18.35 +
   18.36 +static u64 phys_base_msr_mask;
   18.37 +static u64 phys_mask_msr_mask;
   18.38 +static u32 size_or_mask;
   18.39 +static u32 size_and_mask;
   18.40 +
   18.41 +static void init_pat_entry_tbl(u64 pat);
   18.42 +static void init_mtrr_epat_tbl(void);
   18.43 +static unsigned char get_mtrr_type(struct mtrr_state *m, paddr_t pa);
   18.44 +/* get page attribute fields (PAn) from PAT MSR */
   18.45 +#define pat_cr_2_paf(pat_cr,n)  ((((u64)pat_cr) >> ((n)<<3)) & 0xff)
   18.46 +/* pat entry to PTE flags (PAT, PCD, PWT bits) */
   18.47 +static unsigned char pat_entry_2_pte_flags[8] = {
   18.48 +    0,           _PAGE_PWT,
   18.49 +    _PAGE_PCD,   _PAGE_PCD | _PAGE_PWT,
   18.50 +    _PAGE_PAT,   _PAGE_PAT | _PAGE_PWT,
   18.51 +    _PAGE_PAT | _PAGE_PCD, _PAGE_PAT | _PAGE_PCD | _PAGE_PWT };
   18.52 +
   18.53 +/* effective mm type lookup table, according to MTRR and PAT */
   18.54 +static u8 mm_type_tbl[MTRR_NUM_TYPES][PAT_TYPE_NUMS] = {
   18.55 +/********PAT(UC,WC,RS,RS,WT,WP,WB,UC-)*/
   18.56 +/* RS means reserved type(2,3), and type is hardcoded here */
   18.57 + /*MTRR(UC):(UC,WC,RS,RS,UC,UC,UC,UC)*/
   18.58 +            {0, 1, 2, 2, 0, 0, 0, 0},
   18.59 + /*MTRR(WC):(UC,WC,RS,RS,UC,UC,WC,WC)*/
   18.60 +            {0, 1, 2, 2, 0, 0, 1, 1},
   18.61 + /*MTRR(RS):(RS,RS,RS,RS,RS,RS,RS,RS)*/
   18.62 +            {2, 2, 2, 2, 2, 2, 2, 2},
   18.63 + /*MTRR(RS):(RS,RS,RS,RS,RS,RS,RS,RS)*/
   18.64 +            {2, 2, 2, 2, 2, 2, 2, 2},
   18.65 + /*MTRR(WT):(UC,WC,RS,RS,WT,WP,WT,UC)*/
   18.66 +            {0, 1, 2, 2, 4, 5, 4, 0},
   18.67 + /*MTRR(WP):(UC,WC,RS,RS,WT,WP,WP,WC)*/
   18.68 +            {0, 1, 2, 2, 4, 5, 5, 1},
   18.69 + /*MTRR(WB):(UC,WC,RS,RS,WT,WP,WB,UC)*/
   18.70 +            {0, 1, 2, 2, 4, 5, 6, 0}
   18.71 +};
   18.72 +
   18.73 +/* reverse lookup table, to find a pat type according to MTRR and effective
   18.74 + * memory type. This table is dynamically generated
   18.75 + */
   18.76 +static u8 mtrr_epat_tbl[MTRR_NUM_TYPES][MEMORY_NUM_TYPES];
   18.77 +
   18.78 +/* lookup table for PAT entry of a given PAT value in host pat */
   18.79 +static u8 pat_entry_tbl[PAT_TYPE_NUMS];
   18.80 +
   18.81 +static void get_mtrr_range(uint64_t base_msr, uint64_t mask_msr,
   18.82 +                           uint64_t *base, uint64_t *end)
   18.83 +{
   18.84 +    uint32_t mask_lo = (uint32_t)mask_msr;
   18.85 +    uint32_t mask_hi = (uint32_t)(mask_msr >> 32);
   18.86 +    uint32_t base_lo = (uint32_t)base_msr;
   18.87 +    uint32_t base_hi = (uint32_t)(base_msr >> 32);
   18.88 +    uint32_t size;
   18.89 +
   18.90 +    if ( (mask_lo & 0x800) == 0 )
   18.91 +    {
   18.92 +        /* Invalid (i.e. free) range */
   18.93 +        *base = 0;
   18.94 +        *end = 0;
   18.95 +        return;
   18.96 +    }
   18.97 +
   18.98 +    /* Work out the shifted address mask. */
   18.99 +    mask_lo = (size_or_mask | (mask_hi << (32 - PAGE_SHIFT)) |
  18.100 +               (mask_lo >> PAGE_SHIFT));
  18.101 +
  18.102 +    /* This works correctly if size is a power of two (a contiguous range). */
  18.103 +    size = -mask_lo;
  18.104 +    *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
  18.105 +    *end = *base + size - 1;
  18.106 +}
  18.107 +
  18.108 +bool_t is_var_mtrr_overlapped(struct mtrr_state *m)
  18.109 +{
  18.110 +    int seg, i;
  18.111 +    uint64_t phys_base, phys_mask, phys_base_pre, phys_mask_pre;
  18.112 +    uint64_t base_pre, end_pre, base, end;
  18.113 +    uint8_t num_var_ranges = (u8)m->mtrr_cap;
  18.114 +
  18.115 +    for ( i = 0; i < num_var_ranges; i++ )
  18.116 +    {
  18.117 +        phys_base_pre = ((u64*)m->var_ranges)[i*2];
  18.118 +        phys_mask_pre = ((u64*)m->var_ranges)[i*2 + 1];
  18.119 +
  18.120 +        get_mtrr_range(phys_base_pre, phys_mask_pre,
  18.121 +                        &base_pre, &end_pre);
  18.122 +
  18.123 +        for ( seg = i + 1; seg < num_var_ranges; seg ++ )
  18.124 +        {
  18.125 +            phys_base = ((u64*)m->var_ranges)[seg*2];
  18.126 +            phys_mask = ((u64*)m->var_ranges)[seg*2 + 1];
  18.127 +
  18.128 +            get_mtrr_range(phys_base, phys_mask,
  18.129 +                            &base, &end);
  18.130 +
  18.131 +            if ( ((base_pre != end_pre) && (base != end))
  18.132 +                 || ((base >= base_pre) && (base <= end_pre))
  18.133 +                 || ((end >= base_pre) && (end <= end_pre))
  18.134 +                 || ((base_pre >= base) && (base_pre <= end))
  18.135 +                 || ((end_pre >= base) && (end_pre <= end)) )
  18.136 +            {
  18.137 +                /* MTRR is overlapped. */
  18.138 +                return 1;
  18.139 +            }
  18.140 +        }
  18.141 +    }
  18.142 +    return 0;
  18.143 +}
  18.144 +
  18.145 +/* reserved mtrr for guest OS */
  18.146 +#define RESERVED_MTRR 2
  18.147 +#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
  18.148 +#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
  18.149 +bool mtrr_var_range_msr_set(struct mtrr_state *m, u32 msr, u64 msr_content);
  18.150 +bool mtrr_def_type_msr_set(struct mtrr_state *m, u64 msr_content);
  18.151 +bool mtrr_fix_range_msr_set(struct mtrr_state *m, int row, u64 msr_content);
  18.152 +static void set_var_mtrr(unsigned int reg, struct mtrr_state *m,
  18.153 +                    unsigned int base, unsigned int size,
  18.154 +                    unsigned int type)
  18.155 +{
  18.156 +    struct mtrr_var_range *vr;
  18.157 +
  18.158 +    vr = &m->var_ranges[reg];
  18.159 +
  18.160 +    if ( size == 0 )
  18.161 +    {
  18.162 +        /* The invalid bit is kept in the mask, so we simply clear the
  18.163 +         * relevant mask register to disable a range.
  18.164 +         */
  18.165 +        mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg), 0);
  18.166 +    }
  18.167 +    else
  18.168 +    {
  18.169 +        vr->base_lo = base << PAGE_SHIFT | type;
  18.170 +        vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
  18.171 +        vr->mask_lo = -size << PAGE_SHIFT | 0x800;
  18.172 +        vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
  18.173 +
  18.174 +        mtrr_var_range_msr_set(m, MTRRphysBase_MSR(reg), *(unsigned long *)vr);
  18.175 +        mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg),
  18.176 +                               *((unsigned long *)vr + 1));
  18.177 +    }
  18.178 +}
  18.179 +/* From Intel Vol. III Section 10.11.4, the Range Size and Base Alignment has
  18.180 + * some kind of requirement:
  18.181 + * 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum).
  18.182 + * 2. The base address must be 2^N aligned, where the N here is equal to
  18.183 + * the N in previous requirement. So a 8K range must be 8K aligned not 4K aligned.
  18.184 + */
  18.185 +static unsigned int range_to_mtrr(unsigned int reg, struct mtrr_state *m,
  18.186 +    unsigned int range_startk, unsigned int range_sizek, unsigned char type)
  18.187 +{
  18.188 +    if ( !range_sizek || (reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR)) )
  18.189 +        return reg;
  18.190 +
  18.191 +    while ( range_sizek )
  18.192 +    {
  18.193 +        unsigned int max_align, align, sizek;
  18.194 +
  18.195 +        max_align = (range_startk == 0) ? 32 : ffs(range_startk);
  18.196 +        align = min_t(unsigned int, fls(range_sizek), max_align);
  18.197 +        sizek = 1 << (align - 1);
  18.198 +
  18.199 +        set_var_mtrr(reg++, m, range_startk, sizek, type);
  18.200 +
  18.201 +        range_startk += sizek;
  18.202 +        range_sizek  -= sizek;
  18.203 +
  18.204 +        if ( reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR) )
  18.205 +            break;
  18.206 +    }
  18.207 +
  18.208 +    return reg;
  18.209 +}
  18.210 +
  18.211 +static void setup_fixed_mtrrs(struct vcpu *v)
  18.212 +{
  18.213 +    uint64_t content;
  18.214 +    int i;
  18.215 +    struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
  18.216 +
  18.217 +    /* 1. Map (0~A0000) as WB */
  18.218 +    content = 0x0606060606060606ull;
  18.219 +    mtrr_fix_range_msr_set(m, 0, content);
  18.220 +    mtrr_fix_range_msr_set(m, 1, content);
  18.221 +    /* 2. Map VRAM(A0000~C0000) as WC */
  18.222 +    content = 0x0101010101010101;
  18.223 +    mtrr_fix_range_msr_set(m, 2, content);
  18.224 +    /* 3. Map (C0000~100000) as UC */
  18.225 +    for ( i = 3; i < 11; i++)
  18.226 +        mtrr_fix_range_msr_set(m, i, 0);
  18.227 +}
  18.228 +
  18.229 +static void setup_var_mtrrs(struct vcpu *v)
  18.230 +{
  18.231 +    p2m_type_t p2m;
  18.232 +    unsigned long e820_mfn;
  18.233 +    char *p = NULL;
  18.234 +    unsigned char nr = 0;
  18.235 +    int i;
  18.236 +    unsigned int reg = 0;
  18.237 +    unsigned long size = 0;
  18.238 +    unsigned long addr = 0;
  18.239 +    struct e820entry *e820_table;
  18.240 +
  18.241 +    e820_mfn = mfn_x(gfn_to_mfn(v->domain,
  18.242 +                    HVM_E820_PAGE >> PAGE_SHIFT, &p2m));
  18.243 +
  18.244 +    p = (char *)map_domain_page(e820_mfn);
  18.245 +
  18.246 +    nr = *(unsigned char*)(p + HVM_E820_NR_OFFSET);
  18.247 +    e820_table = (struct e820entry*)(p + HVM_E820_OFFSET);
  18.248 +    /* search E820 table, set MTRR for RAM */
  18.249 +    for ( i = 0; i < nr; i++)
  18.250 +    {
  18.251 +        if ( (e820_table[i].addr >= 0x100000) &&
  18.252 +             (e820_table[i].type == E820_RAM) )
  18.253 +        {
  18.254 +            if ( e820_table[i].addr == 0x100000 )
  18.255 +            {
  18.256 +                size = e820_table[i].size + 0x100000 + PAGE_SIZE * 3;
  18.257 +                addr = 0;
  18.258 +            }
  18.259 +            else
  18.260 +            {
  18.261 +                /* Larger than 4G */
  18.262 +                size = e820_table[i].size;
  18.263 +                addr = e820_table[i].addr;
  18.264 +            }
  18.265 +
  18.266 +            reg = range_to_mtrr(reg, &v->arch.hvm_vcpu.mtrr,
  18.267 +                                addr >> PAGE_SHIFT, size >> PAGE_SHIFT,
  18.268 +                                MTRR_TYPE_WRBACK);
  18.269 +        }
  18.270 +    }
  18.271 +}
  18.272 +
  18.273 +void init_mtrr_in_hyper(struct vcpu *v)
  18.274 +{
  18.275 +    /* TODO:MTRR should be initialized in BIOS or other places.
  18.276 +     * workaround to do it in here
  18.277 +     */
  18.278 +    if ( v->arch.hvm_vcpu.mtrr.is_initialized )
  18.279 +        return;
  18.280 +
  18.281 +    setup_fixed_mtrrs(v);
  18.282 +    setup_var_mtrrs(v);
  18.283 +    /* enable mtrr */
  18.284 +    mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, 0xc00);
  18.285 +
  18.286 +    v->arch.hvm_vcpu.mtrr.is_initialized = 1;
  18.287 +}
  18.288 +
  18.289 +static int reset_mtrr(struct mtrr_state *m)
  18.290 +{
  18.291 +    m->var_ranges = xmalloc_array(struct mtrr_var_range, MTRR_VCNT);
  18.292 +    if ( m->var_ranges == NULL )
  18.293 +        return -ENOMEM;
  18.294 +    memset(m->var_ranges, 0, MTRR_VCNT * sizeof(struct mtrr_var_range));
  18.295 +    memset(m->fixed_ranges, 0, sizeof(m->fixed_ranges));
  18.296 +    m->enabled = 0;
  18.297 +    m->def_type = 0;/*mtrr is disabled*/
  18.298 +    m->mtrr_cap = (0x5<<8)|MTRR_VCNT;/*wc,fix enabled, and vcnt=8*/
  18.299 +    m->overlapped = 0;
  18.300 +    return 0;
  18.301 +}
  18.302 +
  18.303 +/* init global variables for MTRR and PAT */
  18.304 +void global_init_mtrr_pat(void)
  18.305 +{
  18.306 +    extern u64 host_pat;
  18.307 +    u32 phys_addr;
  18.308 +
  18.309 +    init_mtrr_epat_tbl();
  18.310 +    init_pat_entry_tbl(host_pat);
  18.311 +    /* Get max physical address, set some global variable */
  18.312 +    if ( cpuid_eax(0x80000000) < 0x80000008 )
  18.313 +        phys_addr = 36;
  18.314 +    else
  18.315 +        phys_addr = cpuid_eax(0x80000008);
  18.316 +
  18.317 +    phys_base_msr_mask = ~((((u64)1) << phys_addr) - 1) | 0xf00UL;
  18.318 +    phys_mask_msr_mask = ~((((u64)1) << phys_addr) - 1) | 0x7ffUL;
  18.319 +
  18.320 +    size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
  18.321 +    size_and_mask = ~size_or_mask & 0xfff00000;
  18.322 +}
  18.323 +
  18.324 +static void init_pat_entry_tbl(u64 pat)
  18.325 +{
  18.326 +    int i, j;
  18.327 +
  18.328 +    memset(&pat_entry_tbl, INVALID_MEM_TYPE,
  18.329 +           PAT_TYPE_NUMS * sizeof(pat_entry_tbl[0]));
  18.330 +
  18.331 +    for ( i = 0; i < PAT_TYPE_NUMS; i++ )
  18.332 +    {
  18.333 +        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
  18.334 +        {
  18.335 +            if ( pat_cr_2_paf(pat, j) == i )
  18.336 +            {
  18.337 +                pat_entry_tbl[i] = j;
  18.338 +                break;
  18.339 +            }
  18.340 +        }
  18.341 +    }
  18.342 +}
  18.343 +
  18.344 +unsigned char pat_type_2_pte_flags(unsigned char pat_type)
  18.345 +{
  18.346 +    int pat_entry = pat_entry_tbl[pat_type];
  18.347 +
  18.348 +    /* INVALID_MEM_TYPE, means doesn't find the pat_entry in host pat for
  18.349 +     * a given pat_type. If host pat covers all the pat types,
  18.350 +     * it can't happen.
  18.351 +     */
  18.352 +    if ( likely(pat_entry != INVALID_MEM_TYPE) )
  18.353 +        return pat_entry_2_pte_flags[pat_entry];
  18.354 +
  18.355 +    return pat_entry_2_pte_flags[pat_entry_tbl[PAT_TYPE_UNCACHABLE]];
  18.356 +}
  18.357 +
  18.358 +int reset_vmsr(struct mtrr_state *m, u64 *pat_ptr)
  18.359 +{
  18.360 +    int rc;
  18.361 +
  18.362 +    rc = reset_mtrr(m);
  18.363 +    if ( rc != 0 )
  18.364 +        return rc;
  18.365 +
  18.366 +    *pat_ptr = ( (u64)PAT_TYPE_WRBACK) |                /* PAT0: WB */
  18.367 +        ( (u64)PAT_TYPE_WRTHROUGH << 8 ) |              /* PAT1: WT */
  18.368 +        ( (u64)PAT_TYPE_UC_MINUS << 16 ) |              /* PAT2: UC- */
  18.369 +        ( (u64)PAT_TYPE_UNCACHABLE << 24 ) |            /* PAT3: UC */
  18.370 +        ( (u64)PAT_TYPE_WRBACK << 32 ) |                /* PAT4: WB */
  18.371 +        ( (u64)PAT_TYPE_WRTHROUGH << 40 ) |             /* PAT5: WT */
  18.372 +        ( (u64)PAT_TYPE_UC_MINUS << 48 ) |              /* PAT6: UC- */
  18.373 +        ( (u64)PAT_TYPE_UNCACHABLE << 56 );             /* PAT7: UC */
  18.374 +
  18.375 +    return 0;
  18.376 +}
  18.377 +
  18.378 +/*
  18.379 + * Get MTRR memory type for physical address pa.
  18.380 + */
  18.381 +static unsigned char get_mtrr_type(struct mtrr_state *m, paddr_t pa)
  18.382 +{
  18.383 +   int    addr, seg, index;
  18.384 +   u8     overlap_mtrr = 0;
  18.385 +   u8     overlap_mtrr_pos = 0;
  18.386 +   u64    phys_base;
  18.387 +   u64    phys_mask;
  18.388 +   u8     num_var_ranges = m->mtrr_cap & 0xff;
  18.389 +
  18.390 +   if ( unlikely(!(m->enabled & 0x2)) )
  18.391 +       return MTRR_TYPE_UNCACHABLE;
  18.392 +
  18.393 +   if ( (pa < 0x100000) && (m->enabled & 1) )
  18.394 +   {
  18.395 +       /* Fixed range MTRR takes effective */
  18.396 +       addr = (unsigned int) pa;
  18.397 +       if ( addr < 0x80000 )
  18.398 +       {
  18.399 +           seg = (addr >> 16);
  18.400 +           return m->fixed_ranges[seg];
  18.401 +       }
  18.402 +       else if ( addr < 0xc0000 )
  18.403 +       {
  18.404 +           seg = (addr - 0x80000) >> 14;
  18.405 +           index = (seg >> 3) + 1;
  18.406 +           seg &= 7;            /* select 0-7 segments */
  18.407 +           return m->fixed_ranges[index*8 + seg];
  18.408 +       }
  18.409 +       else
  18.410 +       {
  18.411 +           /* 0xC0000 --- 0x100000 */
  18.412 +           seg = (addr - 0xc0000) >> 12;
  18.413 +           index = (seg >> 3) + 3;
  18.414 +           seg &= 7;            /* select 0-7 segments */
  18.415 +           return m->fixed_ranges[index*8 + seg];
  18.416 +       }
  18.417 +   }
  18.418 +
  18.419 +   /* Match with variable MTRRs. */
  18.420 +   for ( seg = 0; seg < num_var_ranges; seg++ )
  18.421 +   {
  18.422 +       phys_base = ((u64*)m->var_ranges)[seg*2];
  18.423 +       phys_mask = ((u64*)m->var_ranges)[seg*2 + 1];
  18.424 +       if ( phys_mask & (1 << MTRR_PHYSMASK_VALID_BIT) )
  18.425 +       {
  18.426 +           if ( ((u64) pa & phys_mask) >> MTRR_PHYSMASK_SHIFT ==
  18.427 +                (phys_base & phys_mask) >> MTRR_PHYSMASK_SHIFT )
  18.428 +           {
  18.429 +               if ( unlikely(m->overlapped) )
  18.430 +               {
  18.431 +                    overlap_mtrr |= 1 << (phys_base & MTRR_PHYSBASE_TYPE_MASK);
  18.432 +                    overlap_mtrr_pos = phys_base & MTRR_PHYSBASE_TYPE_MASK;
  18.433 +               }
  18.434 +               else
  18.435 +               {
  18.436 +                   /* If no overlap, return the found one */
  18.437 +                   return (phys_base & MTRR_PHYSBASE_TYPE_MASK);
  18.438 +               }
  18.439 +           }
  18.440 +       }
  18.441 +   }
  18.442 +
  18.443 +   /* Overlapped or not found. */
  18.444 +   if ( unlikely(overlap_mtrr == 0) )
  18.445 +       return m->def_type;
  18.446 +
  18.447 +   if ( likely(!(overlap_mtrr & ~( ((u8)1) << overlap_mtrr_pos ))) )
  18.448 +       /* Covers both one variable memory range matches and
  18.449 +        * two or more identical match.
  18.450 +        */
  18.451 +       return overlap_mtrr_pos;
  18.452 +
  18.453 +   if ( overlap_mtrr & 0x1 )
  18.454 +       /* Two or more match, one is UC. */
  18.455 +       return MTRR_TYPE_UNCACHABLE;
  18.456 +   
  18.457 +   if ( !(overlap_mtrr & 0xaf) )
  18.458 +       /* Two or more match, WT and WB. */
  18.459 +       return MTRR_TYPE_WRTHROUGH;
  18.460 +
  18.461 +   /* Behaviour is undefined, but return the last overlapped type. */
  18.462 +   return overlap_mtrr_pos;
  18.463 +}
  18.464 +
  18.465 +/*
  18.466 + * return the memory type from PAT.
  18.467 + * NOTE: valid only when paging is enabled.
  18.468 + *       Only 4K page PTE is supported now.
  18.469 + */
  18.470 +static unsigned char page_pat_type(u64 pat_cr, unsigned long pte_flags)
  18.471 +{
  18.472 +    int pat_entry;
  18.473 +
  18.474 +    /* PCD/PWT -> bit 1/0 of PAT entry */
  18.475 +    pat_entry = ( pte_flags >> 3 ) & 0x3;
  18.476 +    /* PAT bits as bit 2 of PAT entry */
  18.477 +    if ( pte_flags & _PAGE_PAT )
  18.478 +        pat_entry |= 4;
  18.479 +
  18.480 +    return (unsigned char)pat_cr_2_paf(pat_cr, pat_entry);
  18.481 +}
  18.482 +
  18.483 +/*
  18.484 + * Effective memory type for leaf page.
  18.485 + */
  18.486 +static u8 effective_mm_type(
  18.487 +        struct mtrr_state *m,
  18.488 +        u64 pat,
  18.489 +        paddr_t gpa,
  18.490 +        unsigned long pte_flags)
  18.491 +{
  18.492 +    unsigned char mtrr_mtype, pat_value, effective;
  18.493 +
  18.494 +    mtrr_mtype = get_mtrr_type(m, gpa);
  18.495 +
  18.496 +    pat_value = page_pat_type(pat, pte_flags);
  18.497 +
  18.498 +    effective = mm_type_tbl[mtrr_mtype][pat_value];
  18.499 +
  18.500 +    return effective;
  18.501 +}
  18.502 +
  18.503 +static void init_mtrr_epat_tbl(void)
  18.504 +{
  18.505 +    int i, j;
  18.506 +    /* set default value to an invalid type, just for checking conflict */
  18.507 +    memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl));
  18.508 +
  18.509 +    for ( i = 0; i < MTRR_NUM_TYPES; i++ )
  18.510 +    {
  18.511 +        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
  18.512 +        {
  18.513 +            int tmp = mm_type_tbl[i][j];
  18.514 +            if ( (tmp >= 0) && (tmp < MEMORY_NUM_TYPES) )
  18.515 +                mtrr_epat_tbl[i][tmp] = j;
  18.516 +        }
  18.517 +    }
  18.518 +}
  18.519 +
  18.520 +u32 get_pat_flags(struct vcpu *v,
  18.521 +                  u32 gl1e_flags,
  18.522 +                  paddr_t gpaddr,
  18.523 +                  paddr_t spaddr)
  18.524 +{
  18.525 +    u8 guest_eff_mm_type;
  18.526 +    u8 shadow_mtrr_type;
  18.527 +    u8 pat_entry_value;
  18.528 +    u64 pat = v->arch.hvm_vcpu.pat_cr;
  18.529 +    struct mtrr_state *g = &v->arch.hvm_vcpu.mtrr;
  18.530 +
  18.531 +    /* 1. Get the effective memory type of guest physical address,
  18.532 +     * with the pair of guest MTRR and PAT
  18.533 +     */
  18.534 +    guest_eff_mm_type = effective_mm_type(g, pat, gpaddr, gl1e_flags);
  18.535 +    /* 2. Get the memory type of host physical address, with MTRR */
  18.536 +    shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr);
  18.537 +
  18.538 +    /* 3. Find the memory type in PAT, with host MTRR memory type
  18.539 +     * and guest effective memory type.
  18.540 +     */
  18.541 +    pat_entry_value = mtrr_epat_tbl[shadow_mtrr_type][guest_eff_mm_type];
  18.542 +    /* If conflit occurs(e.g host MTRR is UC, guest memory type is
  18.543 +     * WB),set UC as effective memory. Here, returning PAT_TYPE_UNCACHABLE will
  18.544 +     * always set effective memory as UC.
  18.545 +     */
  18.546 +    if ( pat_entry_value == INVALID_MEM_TYPE )
  18.547 +    {
  18.548 +        gdprintk(XENLOG_WARNING,
  18.549 +                 "Conflict occurs for a given guest l1e flags:%x "
  18.550 +                 "at %"PRIx64" (the effective mm type:%d), "
  18.551 +                 "because the host mtrr type is:%d\n",
  18.552 +                 gl1e_flags, (uint64_t)gpaddr, guest_eff_mm_type,
  18.553 +                 shadow_mtrr_type);
  18.554 +        pat_entry_value = PAT_TYPE_UNCACHABLE;
  18.555 +    }
  18.556 +    /* 4. Get the pte flags */
  18.557 +    return pat_type_2_pte_flags(pat_entry_value);
  18.558 +}
  18.559 +
  18.560 +/* Helper funtions for seting mtrr/pat */
  18.561 +bool pat_msr_set(u64 *pat, u64 msr_content)
  18.562 +{
  18.563 +    u8 *value = (u8*)&msr_content;
  18.564 +    int i;
  18.565 +
  18.566 +    if ( *pat != msr_content )
  18.567 +    {
  18.568 +        for ( i = 0; i < 8; i++ )
  18.569 +            if ( unlikely(!(value[i] == 0 || value[i] == 1 ||
  18.570 +                            value[i] == 4 || value[i] == 5 ||
  18.571 +                            value[i] == 6 || value[i] == 7)) )
  18.572 +                return 0;
  18.573 +
  18.574 +        *pat = msr_content;
  18.575 +    }
  18.576 +
  18.577 +    return 1;
  18.578 +}
  18.579 +
  18.580 +bool mtrr_def_type_msr_set(struct mtrr_state *m, u64 msr_content)
  18.581 +{
  18.582 +    u8 def_type = msr_content & 0xff;
  18.583 +    u8 enabled = (msr_content >> 10) & 0x3;
  18.584 +
  18.585 +    if ( unlikely(!(def_type == 0 || def_type == 1 || def_type == 4 ||
  18.586 +                    def_type == 5 || def_type == 6)) )
  18.587 +    {
  18.588 +         HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid MTRR def type:%x\n", def_type);
  18.589 +         return 0;
  18.590 +    }
  18.591 +
  18.592 +    if ( unlikely(msr_content && (msr_content & ~0xcffUL)) )
  18.593 +    {
  18.594 +         HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
  18.595 +                     msr_content);
  18.596 +         return 0;
  18.597 +    }
  18.598 +
  18.599 +    m->enabled = enabled;
  18.600 +    m->def_type = def_type;
  18.601 +
  18.602 +    return 1;
  18.603 +}
  18.604 +
  18.605 +bool mtrr_fix_range_msr_set(struct mtrr_state *m, int row, u64 msr_content)
  18.606 +{
  18.607 +    u64 *fixed_range_base = (u64 *)m->fixed_ranges;
  18.608 +
  18.609 +    if ( fixed_range_base[row] != msr_content )
  18.610 +    {
  18.611 +        u8 *range = (u8*)&msr_content;
  18.612 +        int i, type;
  18.613 +
  18.614 +        for ( i = 0; i < 8; i++ )
  18.615 +        {
  18.616 +            type = range[i];
  18.617 +            if ( unlikely(!(type == 0 || type == 1 ||
  18.618 +                            type == 4 || type == 5 || type == 6)) )
  18.619 +                return 0;
  18.620 +        }
  18.621 +
  18.622 +        fixed_range_base[row] = msr_content;
  18.623 +    }
  18.624 +
  18.625 +    return 1;
  18.626 +}
  18.627 +
  18.628 +bool mtrr_var_range_msr_set(struct mtrr_state *m, u32 msr, u64 msr_content)
  18.629 +{
  18.630 +    u32 index;
  18.631 +    u64 msr_mask;
  18.632 +    u64 *var_range_base = (u64*)m->var_ranges;
  18.633 +
  18.634 +    index = msr - MSR_IA32_MTRR_PHYSBASE0;
  18.635 +
  18.636 +    if ( var_range_base[index] != msr_content )
  18.637 +    {
  18.638 +        u32 type = msr_content & 0xff;
  18.639 +
  18.640 +        msr_mask = (index & 1) ? phys_mask_msr_mask : phys_base_msr_mask;
  18.641 +
  18.642 +        if ( unlikely(!(type == 0 || type == 1 ||
  18.643 +                        type == 4 || type == 5 || type == 6)) )
  18.644 +            return 0;
  18.645 +
  18.646 +        if ( unlikely(msr_content && (msr_content & msr_mask)) )
  18.647 +        {
  18.648 +            HVM_DBG_LOG(DBG_LEVEL_MSR, "invalid msr content:%"PRIx64"\n",
  18.649 +                        msr_content);
  18.650 +            return 0;
  18.651 +        }
  18.652 +
  18.653 +        var_range_base[index] = msr_content;
  18.654 +    }
  18.655 +
  18.656 +    m->overlapped = is_var_mtrr_overlapped(m);
  18.657 +
  18.658 +    return 1;
  18.659 +}
  18.660 +
  18.661 +bool_t mtrr_pat_not_equal(struct vcpu *vd, struct vcpu *vs)
  18.662 +{
  18.663 +    struct mtrr_state *md = &vd->arch.hvm_vcpu.mtrr;
  18.664 +    struct mtrr_state *ms = &vs->arch.hvm_vcpu.mtrr;
  18.665 +    int res;
  18.666 +    u8 num_var_ranges = (u8)md->mtrr_cap;
  18.667 +
  18.668 +    /* Test fixed ranges. */
  18.669 +    res = memcmp(md->fixed_ranges, ms->fixed_ranges,
  18.670 +            NUM_FIXED_RANGES*sizeof(mtrr_type));
  18.671 +    if ( res )
  18.672 +        return 1;
  18.673 +
  18.674 +    /* Test var ranges. */
  18.675 +    res = memcmp(md->var_ranges, ms->var_ranges,
  18.676 +            num_var_ranges*sizeof(struct mtrr_var_range));
  18.677 +    if ( res )
  18.678 +        return 1;
  18.679 +
  18.680 +    /* Test default type MSR. */
  18.681 +    if ( (md->def_type != ms->def_type)
  18.682 +            && (md->enabled != ms->enabled) )
  18.683 +        return 1;
  18.684 +
  18.685 +    /* Test PAT. */
  18.686 +    if ( vd->arch.hvm_vcpu.pat_cr != vs->arch.hvm_vcpu.pat_cr )
  18.687 +        return 1;
  18.688 +
  18.689 +    return 0;
  18.690 +}
    19.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Mon Oct 22 12:30:17 2007 -0600
    19.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Mon Oct 22 13:57:08 2007 -0600
    19.3 @@ -756,6 +756,11 @@ void vm_resume_fail(unsigned long eflags
    19.4      domain_crash_synchronous();
    19.5  }
    19.6  
    19.7 +static void flush_cache(void *info)
    19.8 +{
    19.9 +    wbinvd();
   19.10 +}
   19.11 +
   19.12  void vmx_do_resume(struct vcpu *v)
   19.13  {
   19.14      bool_t debug_state;
   19.15 @@ -767,6 +772,18 @@ void vmx_do_resume(struct vcpu *v)
   19.16      }
   19.17      else
   19.18      {
   19.19 +        /* For pass-through domain, guest PCI-E device driver may leverage the
   19.20 +         * "Non-Snoop" I/O, and explicitly "WBINVD" or "CFLUSH" to a RAM space.
   19.21 +         * In that case, if migration occurs before "WBINVD" or "CFLUSH", need
   19.22 +         * to maintain data consistency.
   19.23 +         */
   19.24 +        if ( !list_empty(&(domain_hvm_iommu(v->domain)->pdev_list)) )
   19.25 +        {
   19.26 +            int cpu = v->arch.hvm_vmx.active_cpu;
   19.27 +            if ( cpu != -1 )
   19.28 +                on_selected_cpus(cpumask_of_cpu(cpu), flush_cache, NULL, 1, 1);
   19.29 +        }
   19.30 +
   19.31          vmx_clear_vmcs(v);
   19.32          vmx_load_vmcs(v);
   19.33          hvm_migrate_timers(v);
    20.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Mon Oct 22 12:30:17 2007 -0600
    20.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Mon Oct 22 13:57:08 2007 -0600
    20.3 @@ -50,6 +50,7 @@
    20.4  #include <asm/hvm/vpt.h>
    20.5  #include <public/hvm/save.h>
    20.6  #include <asm/hvm/trace.h>
    20.7 +#include <stdbool.h>
    20.8  
    20.9  enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
   20.10  
   20.11 @@ -2285,6 +2286,9 @@ static int vmx_do_msr_read(struct cpu_us
   20.12      u64 msr_content = 0;
   20.13      u32 ecx = regs->ecx, eax, edx;
   20.14      struct vcpu *v = current;
   20.15 +    int index;
   20.16 +    u64 *var_range_base = (u64*)v->arch.hvm_vcpu.mtrr.var_ranges;
   20.17 +    u64 *fixed_range_base =  (u64*)v->arch.hvm_vcpu.mtrr.fixed_ranges;
   20.18  
   20.19      HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
   20.20  
   20.21 @@ -2305,6 +2309,32 @@ static int vmx_do_msr_read(struct cpu_us
   20.22      case MSR_IA32_APICBASE:
   20.23          msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
   20.24          break;
   20.25 +    case MSR_IA32_CR_PAT:
   20.26 +        msr_content = v->arch.hvm_vcpu.pat_cr;
   20.27 +        break;
   20.28 +    case MSR_MTRRcap:
   20.29 +        msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
   20.30 +        break;
   20.31 +    case MSR_MTRRdefType:
   20.32 +        msr_content = v->arch.hvm_vcpu.mtrr.def_type
   20.33 +                        | (v->arch.hvm_vcpu.mtrr.enabled << 10);
   20.34 +        break;
   20.35 +    case MSR_MTRRfix64K_00000:
   20.36 +        msr_content = fixed_range_base[0];
   20.37 +        break;
   20.38 +    case MSR_MTRRfix16K_80000:
   20.39 +    case MSR_MTRRfix16K_A0000:
   20.40 +        index = regs->ecx - MSR_MTRRfix16K_80000;
   20.41 +        msr_content = fixed_range_base[index + 1];
   20.42 +        break;
   20.43 +    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
   20.44 +        index = regs->ecx - MSR_MTRRfix4K_C0000;
   20.45 +        msr_content = fixed_range_base[index + 3];
   20.46 +        break;
   20.47 +    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
   20.48 +        index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
   20.49 +        msr_content = var_range_base[index];
   20.50 +        break;
   20.51      case MSR_IA32_DEBUGCTLMSR:
   20.52          if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0 )
   20.53              msr_content = 0;
   20.54 @@ -2428,11 +2458,19 @@ void vmx_vlapic_msr_changed(struct vcpu 
   20.55      vmx_vmcs_exit(v);
   20.56  }
   20.57  
   20.58 +extern bool mtrr_var_range_msr_set(struct mtrr_state *v,
   20.59 +        u32 msr, u64 msr_content);
   20.60 +extern bool mtrr_fix_range_msr_set(struct mtrr_state *v,
   20.61 +        int row, u64 msr_content);
   20.62 +extern bool mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
   20.63 +extern bool pat_msr_set(u64 *pat, u64 msr);
   20.64 +
   20.65  static int vmx_do_msr_write(struct cpu_user_regs *regs)
   20.66  {
   20.67      u32 ecx = regs->ecx;
   20.68      u64 msr_content;
   20.69      struct vcpu *v = current;
   20.70 +    int index;
   20.71  
   20.72      HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
   20.73                  ecx, (u32)regs->eax, (u32)regs->edx);
   20.74 @@ -2459,6 +2497,38 @@ static int vmx_do_msr_write(struct cpu_u
   20.75      case MSR_IA32_APICBASE:
   20.76          vlapic_msr_set(vcpu_vlapic(v), msr_content);
   20.77          break;
   20.78 +    case MSR_IA32_CR_PAT:
   20.79 +        if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
   20.80 +           goto gp_fault;
   20.81 +        break;
   20.82 +    case MSR_MTRRdefType:
   20.83 +        if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
   20.84 +           goto gp_fault;
   20.85 +        break;
   20.86 +    case MSR_MTRRfix64K_00000:
   20.87 +        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
   20.88 +            goto gp_fault;
   20.89 +        break;
   20.90 +    case MSR_MTRRfix16K_80000:
   20.91 +    case MSR_MTRRfix16K_A0000:
   20.92 +        index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
   20.93 +        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
   20.94 +                                     index, msr_content) )
   20.95 +            goto gp_fault;
   20.96 +        break;
   20.97 +    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
   20.98 +        index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
   20.99 +        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
  20.100 +                                     index, msr_content) )
  20.101 +            goto gp_fault;
  20.102 +        break;
  20.103 +    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
  20.104 +        if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
  20.105 +                                     regs->ecx, msr_content) )
  20.106 +            goto gp_fault;
  20.107 +        break;
  20.108 +    case MSR_MTRRcap:
  20.109 +        goto gp_fault;
  20.110      case MSR_IA32_DEBUGCTLMSR: {
  20.111          int i, rc = 0;
  20.112  
    21.1 --- a/xen/arch/x86/mm.c	Mon Oct 22 12:30:17 2007 -0600
    21.2 +++ b/xen/arch/x86/mm.c	Mon Oct 22 13:57:08 2007 -0600
    21.3 @@ -3115,6 +3115,15 @@ long arch_memory_op(int op, XEN_GUEST_HA
    21.4          case XENMAPSPACE_shared_info:
    21.5              if ( xatp.idx == 0 )
    21.6                  mfn = virt_to_mfn(d->shared_info);
    21.7 +            /* XXX: assumption here, this is called after E820 table is build
    21.8 +             * need the E820 to initialize MTRR.
    21.9 +             */
   21.10 +            if ( is_hvm_domain(d) ) {
   21.11 +                extern void init_mtrr_in_hyper(struct vcpu *);
   21.12 +                struct vcpu *vs;
   21.13 +                for_each_vcpu(d, vs)
   21.14 +                    init_mtrr_in_hyper(vs);
   21.15 +            }
   21.16              break;
   21.17          case XENMAPSPACE_grant_table:
   21.18              spin_lock(&d->grant_table->lock);
    22.1 --- a/xen/arch/x86/mm/shadow/common.c	Mon Oct 22 12:30:17 2007 -0600
    22.2 +++ b/xen/arch/x86/mm/shadow/common.c	Mon Oct 22 13:57:08 2007 -0600
    22.3 @@ -880,6 +880,14 @@ static void shadow_blow_tables(struct do
    22.4      flush_tlb_mask(d->domain_dirty_cpumask);
    22.5  }
    22.6  
    22.7 +void shadow_blow_tables_per_domain(struct domain *d)
    22.8 +{
    22.9 +    if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL ) {
   22.10 +        shadow_lock(d);
   22.11 +        shadow_blow_tables(d);
   22.12 +        shadow_unlock(d);
   22.13 +    }
   22.14 +}
   22.15  
   22.16  #ifndef NDEBUG
   22.17  /* Blow all shadows of all shadowed domains: this can be used to cause the
    23.1 --- a/xen/arch/x86/mm/shadow/multi.c	Mon Oct 22 12:30:17 2007 -0600
    23.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Mon Oct 22 13:57:08 2007 -0600
    23.3 @@ -33,6 +33,7 @@
    23.4  #include <asm/shadow.h>
    23.5  #include <asm/flushtlb.h>
    23.6  #include <asm/hvm/hvm.h>
    23.7 +#include <asm/mtrr.h>
    23.8  #include "private.h"
    23.9  #include "types.h"
   23.10  
   23.11 @@ -267,6 +268,11 @@ guest_walk_tables(struct vcpu *v, unsign
   23.12           * us reflect l2 changes later without touching the l1s. */
   23.13          int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
   23.14                       _PAGE_ACCESSED|_PAGE_DIRTY);
   23.15 +        /* propagate PWT PCD to level 1 for PSE */
   23.16 +        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PWT) )
   23.17 +            flags |= _PAGE_PWT;
   23.18 +        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PCD) )
   23.19 +            flags |= _PAGE_PCD;
   23.20          /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
   23.21           * of the level 1 */
   23.22          if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) ) 
   23.23 @@ -614,7 +620,12 @@ shadow_l4_index(mfn_t *smfn, u32 guest_i
   23.24  
   23.25  #endif // GUEST_PAGING_LEVELS >= 4
   23.26  
   23.27 -
   23.28 +extern u32 get_pat_flags(struct vcpu *v,
   23.29 +                  u32 gl1e_flags,
   23.30 +                  paddr_t gpaddr,
   23.31 +                  paddr_t spaddr);
   23.32 +
   23.33 +unsigned char pat_type_2_pte_flags(unsigned char pat_type);
   23.34  /**************************************************************************/
   23.35  /* Function which computes shadow entries from their corresponding guest
   23.36   * entries.  This is the "heart" of the shadow code. It operates using
   23.37 @@ -703,6 +714,17 @@ static always_inline void
   23.38          pass_thru_flags |= _PAGE_NX_BIT;
   23.39      sflags = gflags & pass_thru_flags;
   23.40  
   23.41 +    /* Only change memory caching type for pass-through domain */
   23.42 +    if ( (level == 1) && !list_empty(&(domain_hvm_iommu(d)->pdev_list)) ) {
   23.43 +        if ( v->domain->arch.hvm_domain.is_in_uc_mode )
   23.44 +            sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
   23.45 +        else
   23.46 +            sflags |= get_pat_flags(v,
   23.47 +                                    gflags,
   23.48 +                                    guest_l1e_get_paddr(*gp),
   23.49 +                                    mfn_x(target_mfn) << PAGE_SHIFT);
   23.50 +    }
   23.51 +
   23.52      // Set the A&D bits for higher level shadows.
   23.53      // Higher level entries do not, strictly speaking, have dirty bits, but
   23.54      // since we use shadow linear tables, each of these entries may, at some
   23.55 @@ -774,10 +796,6 @@ static always_inline void
   23.56          sflags |= _PAGE_USER;
   23.57      }
   23.58  
   23.59 -    /* MMIO addresses should never be cached */
   23.60 -    if ( p2m_is_mmio(p2mt) )
   23.61 -        sflags |= _PAGE_PCD;
   23.62 -
   23.63      *sp = shadow_l1e_from_mfn(target_mfn, sflags);
   23.64  
   23.65   done:
    24.1 --- a/xen/arch/x86/traps.c	Mon Oct 22 12:30:17 2007 -0600
    24.2 +++ b/xen/arch/x86/traps.c	Mon Oct 22 13:57:08 2007 -0600
    24.3 @@ -2229,6 +2229,37 @@ void __init trap_init(void)
    24.4      open_softirq(NMI_SOFTIRQ, nmi_softirq);
    24.5  }
    24.6  
    24.7 +long register_guest_nmi_callback(unsigned long address)
    24.8 +{
    24.9 +    struct vcpu *v = current;
   24.10 +    struct domain *d = current->domain;
   24.11 +    struct trap_info *t = &v->arch.guest_context.trap_ctxt[TRAP_nmi];
   24.12 +
   24.13 +    t->vector  = TRAP_nmi;
   24.14 +    t->flags   = 0;
   24.15 +    t->cs      = !IS_COMPAT(d) ? FLAT_KERNEL_CS : FLAT_COMPAT_KERNEL_CS;
   24.16 +    t->address = address;
   24.17 +    TI_SET_IF(t, 1);
   24.18 +
   24.19 +    /*
   24.20 +     * If no handler was registered we can 'lose the NMI edge'. Re-assert it
   24.21 +     * now.
   24.22 +     */
   24.23 +    if ( (v->vcpu_id == 0) && (arch_get_nmi_reason(d) != 0) )
   24.24 +        v->nmi_pending = 1;
   24.25 +
   24.26 +    return 0;
   24.27 +}
   24.28 +
   24.29 +long unregister_guest_nmi_callback(void)
   24.30 +{
   24.31 +    struct vcpu *v = current;
   24.32 +    struct trap_info *t = &v->arch.guest_context.trap_ctxt[TRAP_nmi];
   24.33 +
   24.34 +    memset(t, 0, sizeof(*t));
   24.35 +
   24.36 +    return 0;
   24.37 +}
   24.38  
   24.39  long do_set_trap_table(XEN_GUEST_HANDLE(trap_info_t) traps)
   24.40  {
   24.41 @@ -2262,6 +2293,12 @@ long do_set_trap_table(XEN_GUEST_HANDLE(
   24.42          if ( cur.address == 0 )
   24.43              break;
   24.44  
   24.45 +        if ( (cur.vector == TRAP_nmi) && !TI_GET_IF(&cur) )
   24.46 +        {
   24.47 +            rc = -EINVAL;
   24.48 +            break;
   24.49 +        }
   24.50 +
   24.51          fixup_guest_code_selector(current->domain, cur.cs);
   24.52  
   24.53          memcpy(&dst[cur.vector], &cur, sizeof(cur));
    25.1 --- a/xen/arch/x86/x86_32/asm-offsets.c	Mon Oct 22 12:30:17 2007 -0600
    25.2 +++ b/xen/arch/x86/x86_32/asm-offsets.c	Mon Oct 22 13:57:08 2007 -0600
    25.3 @@ -66,7 +66,10 @@ void __dummy__(void)
    25.4             arch.guest_context.kernel_sp);
    25.5      OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
    25.6      OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
    25.7 -    OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
    25.8 +    OFFSET(VCPU_nmi_cs, struct vcpu,
    25.9 +           arch.guest_context.trap_ctxt[TRAP_nmi].cs);
   25.10 +    OFFSET(VCPU_nmi_addr, struct vcpu,
   25.11 +           arch.guest_context.trap_ctxt[TRAP_nmi].address);
   25.12      OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
   25.13      OFFSET(VCPU_nmi_masked, struct vcpu, nmi_masked);
   25.14      DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
    26.1 --- a/xen/arch/x86/x86_32/entry.S	Mon Oct 22 12:30:17 2007 -0600
    26.2 +++ b/xen/arch/x86/x86_32/entry.S	Mon Oct 22 13:57:08 2007 -0600
    26.3 @@ -257,13 +257,15 @@ process_nmi:
    26.4          testb $1,VCPU_nmi_masked(%ebx)
    26.5          jnz  test_guest_events
    26.6          movb $0,VCPU_nmi_pending(%ebx)
    26.7 -        movl VCPU_nmi_addr(%ebx),%eax
    26.8 +        movzwl VCPU_nmi_cs(%ebx),%eax
    26.9 +        movl VCPU_nmi_addr(%ebx),%ecx
   26.10          test %eax,%eax
   26.11          jz   test_guest_events
   26.12          movb $1,VCPU_nmi_masked(%ebx)
   26.13          sti
   26.14          leal VCPU_trap_bounce(%ebx),%edx
   26.15 -        movl %eax,TRAPBOUNCE_eip(%edx)
   26.16 +        movw %ax,TRAPBOUNCE_cs(%edx)
   26.17 +        movl %ecx,TRAPBOUNCE_eip(%edx)
   26.18          movw $FLAT_KERNEL_CS,TRAPBOUNCE_cs(%edx)
   26.19          movb $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
   26.20          call create_bounce_frame
    27.1 --- a/xen/arch/x86/x86_64/asm-offsets.c	Mon Oct 22 12:30:17 2007 -0600
    27.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c	Mon Oct 22 13:57:08 2007 -0600
    27.3 @@ -75,7 +75,10 @@ void __dummy__(void)
    27.4      OFFSET(VCPU_kernel_ss, struct vcpu, arch.guest_context.kernel_ss);
    27.5      OFFSET(VCPU_guest_context_flags, struct vcpu, arch.guest_context.flags);
    27.6      OFFSET(VCPU_arch_guest_fpu_ctxt, struct vcpu, arch.guest_context.fpu_ctxt);
    27.7 -    OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
    27.8 +    OFFSET(VCPU_nmi_cs, struct vcpu,
    27.9 +           arch.guest_context.trap_ctxt[TRAP_nmi].cs);
   27.10 +    OFFSET(VCPU_nmi_addr, struct vcpu,
   27.11 +           arch.guest_context.trap_ctxt[TRAP_nmi].address);
   27.12      OFFSET(VCPU_nmi_pending, struct vcpu, nmi_pending);
   27.13      OFFSET(VCPU_nmi_masked, struct vcpu, nmi_masked);
   27.14      DEFINE(_VGCF_failsafe_disables_events, _VGCF_failsafe_disables_events);
    28.1 --- a/xen/arch/x86/x86_64/compat/entry.S	Mon Oct 22 12:30:17 2007 -0600
    28.2 +++ b/xen/arch/x86/x86_64/compat/entry.S	Mon Oct 22 13:57:08 2007 -0600
    28.3 @@ -131,13 +131,15 @@ compat_process_nmi:
    28.4          testb $1,VCPU_nmi_masked(%rbx)
    28.5          jnz   compat_test_guest_events
    28.6          movb  $0,VCPU_nmi_pending(%rbx)
    28.7 -        movl  VCPU_nmi_addr(%rbx),%eax
    28.8 +        movzwl VCPU_nmi_cs(%rbx),%eax
    28.9 +        movl  VCPU_nmi_addr(%rbx),%ecx
   28.10          testl %eax,%eax
   28.11          jz    compat_test_guest_events
   28.12          movb  $1,VCPU_nmi_masked(%rbx)
   28.13          sti
   28.14          leaq  VCPU_trap_bounce(%rbx),%rdx
   28.15 -        movl  %eax,TRAPBOUNCE_eip(%rdx)
   28.16 +        movw  %ax,TRAPBOUNCE_cs(%rdx)
   28.17 +        movl  %ecx,TRAPBOUNCE_eip(%rdx)
   28.18          movw  $FLAT_COMPAT_KERNEL_CS,TRAPBOUNCE_cs(%rdx)
   28.19          movb  $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
   28.20          call  compat_create_bounce_frame
    29.1 --- a/xen/arch/x86/x86_64/compat/traps.c	Mon Oct 22 12:30:17 2007 -0600
    29.2 +++ b/xen/arch/x86/x86_64/compat/traps.c	Mon Oct 22 13:57:08 2007 -0600
    29.3 @@ -294,6 +294,12 @@ int compat_set_trap_table(XEN_GUEST_HAND
    29.4          if ( cur.address == 0 )
    29.5              break;
    29.6  
    29.7 +        if ( (cur.vector == TRAP_nmi) && !TI_GET_IF(&cur) )
    29.8 +        {
    29.9 +            rc = -EINVAL;
   29.10 +            break;
   29.11 +        }
   29.12 +
   29.13          fixup_guest_code_selector(current->domain, cur.cs);
   29.14  
   29.15          XLAT_trap_info(dst + cur.vector, &cur);
    30.1 --- a/xen/common/compat/domain.c	Mon Oct 22 12:30:17 2007 -0600
    30.2 +++ b/xen/common/compat/domain.c	Mon Oct 22 13:57:08 2007 -0600
    30.3 @@ -59,6 +59,7 @@ int compat_vcpu_op(int cmd, int vcpuid, 
    30.4      case VCPUOP_stop_periodic_timer:
    30.5      case VCPUOP_set_singleshot_timer:
    30.6      case VCPUOP_stop_singleshot_timer:
    30.7 +    case VCPUOP_send_nmi:
    30.8          rc = do_vcpu_op(cmd, vcpuid, arg);
    30.9          break;
   30.10  
    31.1 --- a/xen/common/domain.c	Mon Oct 22 12:30:17 2007 -0600
    31.2 +++ b/xen/common/domain.c	Mon Oct 22 13:57:08 2007 -0600
    31.3 @@ -708,11 +708,9 @@ long do_vcpu_op(int cmd, int vcpuid, XEN
    31.4      }
    31.5  
    31.6      case VCPUOP_stop_periodic_timer:
    31.7 -    {
    31.8          v->periodic_period = 0;
    31.9          vcpu_force_reschedule(v);
   31.10          break;
   31.11 -    }
   31.12  
   31.13      case VCPUOP_set_singleshot_timer:
   31.14      {
   31.15 @@ -740,13 +738,21 @@ long do_vcpu_op(int cmd, int vcpuid, XEN
   31.16      }
   31.17  
   31.18      case VCPUOP_stop_singleshot_timer:
   31.19 -    {
   31.20          if ( v != current )
   31.21              return -EINVAL;
   31.22  
   31.23          stop_timer(&v->singleshot_timer);
   31.24 +
   31.25          break;
   31.26 -    }
   31.27 +
   31.28 +    case VCPUOP_send_nmi:
   31.29 +        if ( !guest_handle_is_null(arg) )
   31.30 +            return -EINVAL;
   31.31 +
   31.32 +        if ( !test_and_set_bool(v->nmi_pending) )
   31.33 +            vcpu_kick(v);
   31.34 +
   31.35 +        break;
   31.36  
   31.37      default:
   31.38          rc = arch_do_vcpu_op(cmd, v, arg);
    32.1 --- a/xen/common/kernel.c	Mon Oct 22 12:30:17 2007 -0600
    32.2 +++ b/xen/common/kernel.c	Mon Oct 22 13:57:08 2007 -0600
    32.3 @@ -247,40 +247,6 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDL
    32.4      return -ENOSYS;
    32.5  }
    32.6  
    32.7 -#ifndef COMPAT
    32.8 -
    32.9 -long register_guest_nmi_callback(unsigned long address)
   32.10 -{
   32.11 -    struct vcpu *v = current;
   32.12 -    struct domain *d = current->domain;
   32.13 -
   32.14 -    if ( (d->domain_id != 0) || (v->vcpu_id != 0) )
   32.15 -        return -EINVAL;
   32.16 -
   32.17 -    v->nmi_addr = address;
   32.18 -#ifdef CONFIG_X86
   32.19 -    /*
   32.20 -     * If no handler was registered we can 'lose the NMI edge'. Re-assert it
   32.21 -     * now.
   32.22 -     */
   32.23 -    if ( arch_get_nmi_reason(d) != 0 )
   32.24 -        v->nmi_pending = 1;
   32.25 -#endif
   32.26 -
   32.27 -    return 0;
   32.28 -}
   32.29 -
   32.30 -long unregister_guest_nmi_callback(void)
   32.31 -{
   32.32 -    struct vcpu *v = current;
   32.33 -
   32.34 -    v->nmi_addr = 0;
   32.35 -
   32.36 -    return 0;
   32.37 -}
   32.38 -
   32.39 -#endif
   32.40 -
   32.41  DO(nmi_op)(unsigned int cmd, XEN_GUEST_HANDLE(void) arg)
   32.42  {
   32.43      struct xennmi_callback cb;
    33.1 --- a/xen/include/asm-ia64/linux-null/asm/nmi.h	Mon Oct 22 12:30:17 2007 -0600
    33.2 +++ b/xen/include/asm-ia64/linux-null/asm/nmi.h	Mon Oct 22 13:57:08 2007 -0600
    33.3 @@ -1,1 +1,7 @@
    33.4 -/* This file is intentionally left empty. */
    33.5 +#ifndef __IA64_NMI_H__
    33.6 +#define __IA64_NMI_H__
    33.7 +
    33.8 +#define register_guest_nmi_callback(a)  (-ENOSYS)
    33.9 +#define unregister_guest_nmi_callback() (-ENOSYS)
   33.10 +
   33.11 +#endif /* __IA64_NMI_H__ */
    34.1 --- a/xen/include/asm-powerpc/nmi.h	Mon Oct 22 12:30:17 2007 -0600
    34.2 +++ b/xen/include/asm-powerpc/nmi.h	Mon Oct 22 13:57:08 2007 -0600
    34.3 @@ -3,4 +3,7 @@
    34.4  
    34.5  #include <public/nmi.h>
    34.6  
    34.7 +#define register_guest_nmi_callback(a)  (-ENOSYS)
    34.8 +#define unregister_guest_nmi_callback() (-ENOSYS)
    34.9 +
   34.10  #endif /* ASM_NMI_H */
    35.1 --- a/xen/include/asm-x86/config.h	Mon Oct 22 12:30:17 2007 -0600
    35.2 +++ b/xen/include/asm-x86/config.h	Mon Oct 22 13:57:08 2007 -0600
    35.3 @@ -93,7 +93,7 @@
    35.4  
    35.5  #define CONFIG_DMA_BITSIZE 32
    35.6  
    35.7 -#define BOOT_TRAMPOLINE 0x90000
    35.8 +#define BOOT_TRAMPOLINE 0x94000
    35.9  #define bootsym_phys(sym)                                 \
   35.10      (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+BOOT_TRAMPOLINE)
   35.11  #define bootsym(sym)                                      \
    36.1 --- a/xen/include/asm-x86/cpufeature.h	Mon Oct 22 12:30:17 2007 -0600
    36.2 +++ b/xen/include/asm-x86/cpufeature.h	Mon Oct 22 13:57:08 2007 -0600
    36.3 @@ -128,6 +128,7 @@
    36.4  #define cpu_has_tsc		boot_cpu_has(X86_FEATURE_TSC)
    36.5  #define cpu_has_pae		boot_cpu_has(X86_FEATURE_PAE)
    36.6  #define cpu_has_pge		boot_cpu_has(X86_FEATURE_PGE)
    36.7 +#define cpu_has_pat		boot_cpu_has(X86_FEATURE_PAT)
    36.8  #define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
    36.9  #define cpu_has_sep		boot_cpu_has(X86_FEATURE_SEP)
   36.10  #define cpu_has_mtrr		boot_cpu_has(X86_FEATURE_MTRR)
   36.11 @@ -152,6 +153,7 @@
   36.12  #define cpu_has_tsc		1
   36.13  #define cpu_has_pae		1
   36.14  #define cpu_has_pge		1
   36.15 +#define cpu_has_pat		1
   36.16  #define cpu_has_apic		boot_cpu_has(X86_FEATURE_APIC)
   36.17  #define cpu_has_sep		0
   36.18  #define cpu_has_mtrr		1
    37.1 --- a/xen/include/asm-x86/hvm/domain.h	Mon Oct 22 12:30:17 2007 -0600
    37.2 +++ b/xen/include/asm-x86/hvm/domain.h	Mon Oct 22 13:57:08 2007 -0600
    37.3 @@ -61,6 +61,12 @@ struct hvm_domain {
    37.4  
    37.5      unsigned long          vmx_apic_access_mfn;
    37.6  
    37.7 +    /* If one of vcpus of this domain is in no_fill_mode or
    37.8 +     * mtrr/pat between vcpus is not the same, set is_in_uc_mode
    37.9 +     */
   37.10 +    spinlock_t       uc_lock;
   37.11 +    bool_t           is_in_uc_mode;
   37.12 +
   37.13      /* Pass-through */
   37.14      struct hvm_iommu       hvm_iommu;
   37.15  };
    38.1 --- a/xen/include/asm-x86/hvm/support.h	Mon Oct 22 12:30:17 2007 -0600
    38.2 +++ b/xen/include/asm-x86/hvm/support.h	Mon Oct 22 13:57:08 2007 -0600
    38.3 @@ -64,6 +64,7 @@ static inline vcpu_iodata_t *get_ioreq(s
    38.4  #define DBG_LEVEL_VLAPIC_INTERRUPT  (1 << 8)
    38.5  #define DBG_LEVEL_IOAPIC            (1 << 9)
    38.6  #define DBG_LEVEL_HCALL             (1 << 10)
    38.7 +#define DBG_LEVEL_MSR               (1 << 11)
    38.8  
    38.9  extern unsigned int opt_hvm_debug_level;
   38.10  #define HVM_DBG_LOG(level, _f, _a...)                                         \
    39.1 --- a/xen/include/asm-x86/hvm/vcpu.h	Mon Oct 22 12:30:17 2007 -0600
    39.2 +++ b/xen/include/asm-x86/hvm/vcpu.h	Mon Oct 22 13:57:08 2007 -0600
    39.3 @@ -24,6 +24,7 @@
    39.4  #include <asm/hvm/vlapic.h>
    39.5  #include <asm/hvm/vmx/vmcs.h>
    39.6  #include <asm/hvm/svm/vmcb.h>
    39.7 +#include <asm/mtrr.h>
    39.8  
    39.9  #define HVM_VCPU_INIT_SIPI_SIPI_STATE_NORM          0
   39.10  #define HVM_VCPU_INIT_SIPI_SIPI_STATE_WAIT_SIPI     1
   39.11 @@ -62,6 +63,12 @@ struct hvm_vcpu {
   39.12          struct arch_vmx_struct vmx;
   39.13          struct arch_svm_struct svm;
   39.14      } u;
   39.15 +
   39.16 +    struct mtrr_state   mtrr;
   39.17 +    u64                 pat_cr;
   39.18 +
   39.19 +    /* Which cache mode is this VCPU in (CR0:CD/NW)? */
   39.20 +    u8                  cache_mode;
   39.21  };
   39.22  
   39.23  #define ARCH_HVM_IO_WAIT         1   /* Waiting for I/O completion */
    40.1 --- a/xen/include/asm-x86/msr-index.h	Mon Oct 22 12:30:17 2007 -0600
    40.2 +++ b/xen/include/asm-x86/msr-index.h	Mon Oct 22 13:57:08 2007 -0600
    40.3 @@ -68,6 +68,25 @@
    40.4  #define MSR_IA32_LASTBRANCHTOIP		0x000001dc
    40.5  #define MSR_IA32_LASTINTFROMIP		0x000001dd
    40.6  #define MSR_IA32_LASTINTTOIP		0x000001de
    40.7 + 
    40.8 +#define MSR_IA32_MTRR_PHYSBASE0     0x00000200
    40.9 +#define MSR_IA32_MTRR_PHYSMASK0     0x00000201
   40.10 +#define MSR_IA32_MTRR_PHYSBASE1     0x00000202
   40.11 +#define MSR_IA32_MTRR_PHYSMASK1     0x00000203
   40.12 +#define MSR_IA32_MTRR_PHYSBASE2     0x00000204
   40.13 +#define MSR_IA32_MTRR_PHYSMASK2     0x00000205
   40.14 +#define MSR_IA32_MTRR_PHYSBASE3     0x00000206
   40.15 +#define MSR_IA32_MTRR_PHYSMASK3     0x00000207
   40.16 +#define MSR_IA32_MTRR_PHYSBASE4     0x00000208
   40.17 +#define MSR_IA32_MTRR_PHYSMASK4     0x00000209
   40.18 +#define MSR_IA32_MTRR_PHYSBASE5     0x0000020a
   40.19 +#define MSR_IA32_MTRR_PHYSMASK5     0x0000020b
   40.20 +#define MSR_IA32_MTRR_PHYSBASE6     0x0000020c
   40.21 +#define MSR_IA32_MTRR_PHYSMASK6     0x0000020d
   40.22 +#define MSR_IA32_MTRR_PHYSBASE7     0x0000020e
   40.23 +#define MSR_IA32_MTRR_PHYSMASK7     0x0000020f
   40.24 +
   40.25 +#define MSR_IA32_CR_PAT             0x00000277
   40.26  
   40.27  #define MSR_IA32_MC0_CTL		0x00000400
   40.28  #define MSR_IA32_MC0_STATUS		0x00000401
    41.1 --- a/xen/include/asm-x86/mtrr.h	Mon Oct 22 12:30:17 2007 -0600
    41.2 +++ b/xen/include/asm-x86/mtrr.h	Mon Oct 22 13:57:08 2007 -0600
    41.3 @@ -10,6 +10,55 @@
    41.4  #define MTRR_TYPE_WRPROT     5
    41.5  #define MTRR_TYPE_WRBACK     6
    41.6  #define MTRR_NUM_TYPES       7
    41.7 +#define MEMORY_NUM_TYPES     MTRR_NUM_TYPES
    41.8 +
    41.9 +#define MTRR_PHYSMASK_VALID_BIT  11
   41.10 +#define MTRR_PHYSMASK_SHIFT      12
   41.11 +
   41.12 +#define MTRR_PHYSBASE_TYPE_MASK  0xff   /* lowest 8 bits */
   41.13 +#define MTRR_PHYSBASE_SHIFT      12
   41.14 +#define MTRR_VCNT            8
   41.15 +
   41.16 +#define NORMAL_CACHE_MODE          0
   41.17 +#define NO_FILL_CACHE_MODE         2
   41.18 +
   41.19 +enum {
   41.20 +    PAT_TYPE_UNCACHABLE=0,
   41.21 +    PAT_TYPE_WRCOMB=1,
   41.22 +    PAT_TYPE_RESERVED=2,
   41.23 +    PAT_TYPE_WRTHROUGH=4,
   41.24 +    PAT_TYPE_WRPROT=5,
   41.25 +    PAT_TYPE_WRBACK=6,
   41.26 +    PAT_TYPE_UC_MINUS=7,
   41.27 +    PAT_TYPE_NUMS
   41.28 +};
   41.29 +
   41.30 +#define INVALID_MEM_TYPE PAT_TYPE_NUMS
   41.31 +
   41.32 +/* In the Intel processor's MTRR interface, the MTRR type is always held in
   41.33 +   an 8 bit field: */
   41.34 +typedef u8 mtrr_type;
   41.35 +
   41.36 +struct mtrr_var_range {
   41.37 +	u32 base_lo;
   41.38 +	u32 base_hi;
   41.39 +	u32 mask_lo;
   41.40 +	u32 mask_hi;
   41.41 +};
   41.42 +
   41.43 +#define NUM_FIXED_RANGES 88
   41.44 +struct mtrr_state {
   41.45 +	struct mtrr_var_range *var_ranges;
   41.46 +	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
   41.47 +	unsigned char enabled;
   41.48 +	unsigned char have_fixed;
   41.49 +	mtrr_type def_type;
   41.50 +
   41.51 +	u64       mtrr_cap;
   41.52 +	/* ranges in var MSRs are overlapped or not:0(no overlapped) */
   41.53 +	bool_t    overlapped;
   41.54 +	bool_t    is_initialized;
   41.55 +};
   41.56  
   41.57  extern void mtrr_save_fixed_ranges(void *);
   41.58  extern void mtrr_save_state(void);
    42.1 --- a/xen/include/asm-x86/nmi.h	Mon Oct 22 12:30:17 2007 -0600
    42.2 +++ b/xen/include/asm-x86/nmi.h	Mon Oct 22 13:57:08 2007 -0600
    42.3 @@ -23,4 +23,19 @@ void set_nmi_callback(nmi_callback_t cal
    42.4   */
    42.5  void unset_nmi_callback(void);
    42.6   
    42.7 +/**
    42.8 + * register_guest_nmi_callback
    42.9 + *
   42.10 + * The default NMI handler passes the NMI to a guest callback. This
   42.11 + * function registers the address of that callback.
   42.12 + */
   42.13 +long register_guest_nmi_callback(unsigned long address);
   42.14 +
   42.15 +/**
   42.16 + * unregister_guest_nmi_callback
   42.17 + *
   42.18 + * Unregister a guest NMI handler.
   42.19 + */
   42.20 +long unregister_guest_nmi_callback(void);
   42.21 +
   42.22  #endif /* ASM_NMI_H */
    43.1 --- a/xen/include/public/hvm/params.h	Mon Oct 22 12:30:17 2007 -0600
    43.2 +++ b/xen/include/public/hvm/params.h	Mon Oct 22 13:57:08 2007 -0600
    43.3 @@ -53,7 +53,8 @@
    43.4  #ifdef __ia64__
    43.5  #define HVM_PARAM_NVRAM_FD     7
    43.6  #define HVM_PARAM_VHPT_SIZE    8
    43.7 -#define HVM_NR_PARAMS          9
    43.8 +#define HVM_PARAM_BUFPIOREQ_PFN	9
    43.9 +#define HVM_NR_PARAMS          10
   43.10  #else
   43.11  #define HVM_NR_PARAMS          7
   43.12  #endif
    44.1 --- a/xen/include/public/vcpu.h	Mon Oct 22 12:30:17 2007 -0600
    44.2 +++ b/xen/include/public/vcpu.h	Mon Oct 22 13:57:08 2007 -0600
    44.3 @@ -179,6 +179,9 @@ struct vcpu_register_vcpu_info {
    44.4  typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
    44.5  DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
    44.6  
    44.7 +/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
    44.8 +#define VCPUOP_send_nmi             11
    44.9 +
   44.10  #endif /* __XEN_PUBLIC_VCPU_H__ */
   44.11  
   44.12  /*
    45.1 --- a/xen/include/xen/nmi.h	Mon Oct 22 12:30:17 2007 -0600
    45.2 +++ b/xen/include/xen/nmi.h	Mon Oct 22 13:57:08 2007 -0600
    45.3 @@ -11,19 +11,4 @@
    45.4  
    45.5  #include <asm/nmi.h>
    45.6  
    45.7 -/**
    45.8 - * register_guest_nmi_callback
    45.9 - *
   45.10 - * The default NMI handler passes the NMI to a guest callback. This
   45.11 - * function registers the address of that callback.
   45.12 - */
   45.13 -extern long register_guest_nmi_callback(unsigned long address);
   45.14 -
   45.15 -/**
   45.16 - * unregister_guest_nmi_callback
   45.17 - *
   45.18 - * Unregister a guest NMI handler.
   45.19 - */
   45.20 -extern long unregister_guest_nmi_callback(void);
   45.21 -
   45.22  #endif /* __XEN_NMI_H__ */
    46.1 --- a/xen/include/xen/sched.h	Mon Oct 22 12:30:17 2007 -0600
    46.2 +++ b/xen/include/xen/sched.h	Mon Oct 22 13:57:08 2007 -0600
    46.3 @@ -131,8 +131,6 @@ struct vcpu
    46.4      /* Bitmask of CPUs on which this VCPU may run. */
    46.5      cpumask_t        cpu_affinity;
    46.6  
    46.7 -    unsigned long    nmi_addr;      /* NMI callback address. */
    46.8 -
    46.9      /* Bitmask of CPUs which are holding onto this VCPU's state. */
   46.10      cpumask_t        vcpu_dirty_cpumask;
   46.11