ia64/xen-unstable

changeset 16168:328dcc446f9e

merge with xen-unstable.hg
author Alex Williamson <alex.williamson@hp.com>
date Sun Oct 21 12:10:25 2007 -0600 (2007-10-21)
parents 3f6e855d218b 7231d971f78c
children d261b2d5e988
files tools/flask/libflask/include/flask_op.h xen/arch/ia64/xen/dom0_ops.c
line diff
     1.1 --- a/Config.mk	Wed Oct 17 10:36:31 2007 -0600
     1.2 +++ b/Config.mk	Sun Oct 21 12:10:25 2007 -0600
     1.3 @@ -4,7 +4,8 @@
     1.4  debug ?= n
     1.5  
     1.6  XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
     1.7 -                         -e s/ppc/powerpc/ -e s/i86pc/x86_32/)
     1.8 +                         -e s/ppc/powerpc/ -e s/i86pc/x86_32/   \
     1.9 +                         -e s/amd64/x86_64/)
    1.10  XEN_TARGET_ARCH     ?= $(XEN_COMPILE_ARCH)
    1.11  XEN_OS              ?= $(shell uname -s)
    1.12  
     2.1 --- a/docs/man/xm.pod.1	Wed Oct 17 10:36:31 2007 -0600
     2.2 +++ b/docs/man/xm.pod.1	Sun Oct 21 12:10:25 2007 -0600
     2.3 @@ -446,7 +446,6 @@ page more readable):
     2.4   machine                : i686
     2.5   nr_cpus                : 2
     2.6   nr_nodes               : 1
     2.7 - sockets_per_node       : 2
     2.8   cores_per_socket       : 1
     2.9   threads_per_core       : 1
    2.10   cpu_mhz                : 696
     3.1 --- a/extras/mini-os/include/x86/arch_sched.h	Wed Oct 17 10:36:31 2007 -0600
     3.2 +++ b/extras/mini-os/include/x86/arch_sched.h	Sun Oct 21 12:10:25 2007 -0600
     3.3 @@ -7,9 +7,9 @@ static inline struct thread* get_current
     3.4  {
     3.5      struct thread **current;
     3.6  #ifdef __i386__    
     3.7 -    __asm__("andl %%esp,%0; ":"=r" (current) : "r" (~8191UL));
     3.8 +    __asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
     3.9  #else
    3.10 -    __asm__("andq %%rsp,%0; ":"=r" (current) : "r" (~8191UL));
    3.11 +    __asm__("andq %%rsp,%0; ":"=r" (current) : "0" (~8191UL));
    3.12  #endif 
    3.13      return *current;
    3.14  }
     4.1 --- a/tools/examples/xmexample.vti	Wed Oct 17 10:36:31 2007 -0600
     4.2 +++ b/tools/examples/xmexample.vti	Sun Oct 21 12:10:25 2007 -0600
     4.3 @@ -35,6 +35,10 @@ name = "ExampleVTIDomain"
     4.4  #cpus = "0"        # all vcpus run on CPU0
     4.5  #cpus = "0-3,5,^1" # run on cpus 0,2,3,5
     4.6  
     4.7 +# Log2 of VHPT size, default=23 (8MB), minimum=15 (32KB).
     4.8 +# In Windows OS, smaller size shows better performance.
     4.9 +#vhpt = 23
    4.10 +
    4.11  # Optionally define mac and/or bridge for the network interfaces.
    4.12  # Random MACs are assigned if not given.
    4.13  #vif = [ 'type=ioemu, mac=00:16:3e:00:00:11, bridge=xenbr0, model=ne2k_pci' ]
     5.1 --- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c	Wed Oct 17 10:36:31 2007 -0600
     5.2 +++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c	Sun Oct 21 12:10:25 2007 -0600
     5.3 @@ -533,7 +533,8 @@ uint16_t tcpa_add_measurement_to_log_sim
     5.4  	memset(&pcpes, 0x0, sizeof(pcpes));
     5.5  	pcpes.pcrindex = pcrIndex;
     5.6  	pcpes.eventtype = event_type;
     5.7 -	pcpes.eventdatasize = length;
     5.8 +	/* specs: 10.4.1, EV_IPL eventfield should not contain the code.*/
     5.9 +	pcpes.eventdatasize = 0;
    5.10  
    5.11  	hleei.ipblength = 0x18;
    5.12  	hleei.reserved  = 0x0;
    5.13 @@ -570,11 +571,9 @@ static const char ev_action[][23] = {
    5.14             "Start Option ROM Scan"
    5.15  };
    5.16  
    5.17 -
    5.18 -static char evt_separator[] = "---------------";
    5.19 +static char evt_separator[] = {0xff,0xff,0xff,0xff}; 
    5.20  static char wake_event_1[]    = "Wake Event 1";
    5.21  
    5.22 -
    5.23  /*
    5.24   * Add a measurement to the list of measurements
    5.25   * pcrIndex   : PCR to be extended
    5.26 @@ -590,11 +589,10 @@ void tcpa_add_measurement(uint32_t pcrIn
    5.27  
    5.28  	switch (event_type) {
    5.29  	case EV_SEPARATOR:
    5.30 -		tcpa_add_measurement_to_log(pcrIndex,
    5.31 +		tcpa_add_measurement_to_log_simple(pcrIndex,
    5.32  		                            event_type,
    5.33 -		                            0,
    5.34  		                            evt_separator,
    5.35 -		                            strlen(evt_separator));
    5.36 +		                            4);
    5.37  	break;
    5.38  	case EV_ACTION:
    5.39  		string = ev_action[data /* event_id */];
    5.40 @@ -723,22 +721,44 @@ void tcpa_option_rom(uint32_t seg)
    5.41   * Creates two log entries
    5.42   *
    5.43   * Input parameter:
    5.44 + *  bootcd : 0: MBR of hdd, 1: boot image, 2: boot catalog of El Torito
    5.45   *  seg    : segment where the IPL data are located
    5.46 + *  off    : offset where the IPL data are located
    5.47 + *  count  : length in bytes
    5.48   */
    5.49 -void tcpa_ipl(Bit32u seg)
    5.50 +void tcpa_ipl(Bit32u bootcd,Bit32u seg,Bit32u off,Bit32u count)
    5.51  {
    5.52 -	/* specs: 8.2.5.3 */
    5.53 -	uint8_t *addr = (uint8_t *)ADDR_FROM_SEG_OFF(seg,0);
    5.54 -	/* equivalent to: dd if=/dev/hda ibs=1 count=440 | sha1sum */
    5.55 -	tcpa_add_measurement_to_log_simple(4,
    5.56 -	                                   EV_IPL,
    5.57 -	                                   addr,
    5.58 -	                                   0x1b8);
    5.59 -	/* equivalent to: dd if=/dev/hda ibs=1 count=72 skip=440 | sha1sum */
    5.60 -	tcpa_add_measurement_to_log_simple(5,
    5.61 -	                                   EV_IPL_PARTITION_DATA,
    5.62 -	                                   addr + 0x1b8,
    5.63 -	                                   0x48);
    5.64 +	uint8_t *addr = (uint8_t *)ADDR_FROM_SEG_OFF(seg,off);
    5.65 +	if (bootcd == 1) {
    5.66 +		/* specs: 8.2.5.6 El Torito */
    5.67 +		tcpa_add_measurement_to_log_simple(4,
    5.68 +						   EV_IPL,
    5.69 +						   addr,
    5.70 +						   count);
    5.71 +	}
    5.72 +	else if (bootcd == 2) { /* Boot Catalog */
    5.73 +
    5.74 +		/* specs: 8.2.5.6 El Torito */
    5.75 +		tcpa_add_measurement_to_log_simple(5,
    5.76 +						   EV_IPL_PARTITION_DATA,
    5.77 +						   addr,
    5.78 +						   count);
    5.79 +	}
    5.80 +	else {
    5.81 +		/* specs: 8.2.5.3 */
    5.82 +		/* equivalent to: dd if=/dev/hda ibs=1 count=440 | sha1sum */
    5.83 +		tcpa_add_measurement_to_log_simple(4,
    5.84 +						   EV_IPL,
    5.85 +						   addr,
    5.86 +		                                   0x1b8);
    5.87 +
    5.88 +
    5.89 +		/* equivalent to: dd if=/dev/hda ibs=1 count=72 skip=440 | sha1sum */
    5.90 +		tcpa_add_measurement_to_log_simple(5,
    5.91 +						   EV_IPL_PARTITION_DATA,
    5.92 +						   addr + 0x1b8,
    5.93 +						   0x48);
    5.94 +	}
    5.95  }
    5.96  
    5.97  void tcpa_measure_post(Bit32u from, Bit32u to)
     6.1 --- a/tools/firmware/rombios/32bitprotos.h	Wed Oct 17 10:36:31 2007 -0600
     6.2 +++ b/tools/firmware/rombios/32bitprotos.h	Sun Oct 21 12:10:25 2007 -0600
     6.3 @@ -38,7 +38,7 @@ void tcpa_wake_event( PARMS(void) );
     6.4  void tcpa_add_bootdevice( PARMS(Bit32u bootcd, Bit32u bootdrv) );
     6.5  void tcpa_start_option_rom_scan( PARMS(void) );
     6.6  void tcpa_option_rom( PARMS(Bit32u seg) );
     6.7 -void tcpa_ipl( PARMS(Bit32u seg) );
     6.8 +void tcpa_ipl( PARMS(Bit32u bootcd,Bit32u seg,Bit32u off,Bit32u count) );
     6.9  void tcpa_measure_post( PARMS(Bit32u from, Bit32u to) );
    6.10  Bit32u tcpa_initialize_tpm( PARMS(Bit32u physpres) );
    6.11  
     7.1 --- a/tools/firmware/rombios/rombios.c	Wed Oct 17 10:36:31 2007 -0600
     7.2 +++ b/tools/firmware/rombios/rombios.c	Sun Oct 21 12:10:25 2007 -0600
     7.3 @@ -3378,6 +3378,13 @@ cdrom_boot()
     7.4    // Initial/Default Entry
     7.5    if(buffer[0x20]!=0x88)return 11; // Bootable
     7.6  
     7.7 +#if BX_TCGBIOS
     7.8 +  /* specs: 8.2.3 step 5 and 8.2.5.6, measure El Torito boot catalog */
     7.9 +  /* measure 2048 bytes (one sector) */
    7.10 +  tcpa_add_bootdevice((Bit32u)1L, (Bit32u)0L); /* bootcd = 1 */
    7.11 +  tcpa_ipl((Bit32u)2L,(Bit32u)get_SS(),(Bit32u)buffer,(Bit32u)2048L);
    7.12 +#endif
    7.13 +
    7.14    write_byte(ebda_seg,&EbdaData->cdemu.media,buffer[0x21]);
    7.15    if(buffer[0x21]==0){
    7.16      // FIXME ElTorito Hardcoded. cdrom is hardcoded as device 0xE0. 
    7.17 @@ -3416,6 +3423,13 @@ cdrom_boot()
    7.18    if((error = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, nbsectors*512L, ATA_DATA_IN, boot_segment,0)) != 0)
    7.19      return 12;
    7.20  
    7.21 +#if BX_TCGBIOS
    7.22 +  /* specs: 8.2.3 step 4 and 8.2.5.6, measure El Torito boot image */
    7.23 +  /* measure 1st 512 bytes  */
    7.24 +  tcpa_ipl((Bit32u)1L,(Bit32u)boot_segment,(Bit32u)0L,(Bit32u)512L);
    7.25 +#endif
    7.26 +
    7.27 +
    7.28    // Remember the media type
    7.29    switch(read_byte(ebda_seg,&EbdaData->cdemu.media)) {
    7.30      case 0x01:  // 1.2M floppy
    7.31 @@ -7686,6 +7700,7 @@ ASM_END
    7.32  
    7.33  #if BX_TCGBIOS
    7.34      tcpa_add_bootdevice((Bit32u)0L, (Bit32u)bootdrv);
    7.35 +    tcpa_ipl((Bit32u)0L,(Bit32u)bootseg,(Bit32u)0L,(Bit32u)512L); /* specs: 8.2.3 steps 4 and 5 */
    7.36  #endif
    7.37  
    7.38      /* Canonicalize bootseg:bootip */
    7.39 @@ -7706,9 +7721,6 @@ ASM_END
    7.40  
    7.41      bootdrv = (Bit8u)(status>>8);
    7.42      bootseg = read_word(ebda_seg,&EbdaData->cdemu.load_segment);
    7.43 -#if BX_TCGBIOS
    7.44 -    tcpa_add_bootdevice((Bit32u)1L, (Bit32u)0L);
    7.45 -#endif
    7.46  
    7.47      /* Canonicalize bootseg:bootip */
    7.48      bootip = (bootseg & 0x0fff) << 4;
    7.49 @@ -7724,9 +7736,6 @@ ASM_END
    7.50    default: return;
    7.51    }
    7.52  
    7.53 -#if BX_TCGBIOS
    7.54 -  tcpa_ipl((Bit32u)bootseg);               /* specs: 8.2.3 steps 4 and 5 */
    7.55 -#endif
    7.56    
    7.57    /* Jump to the boot vector */
    7.58  ASM_START
    7.59 @@ -9795,16 +9804,14 @@ post_default_ints:
    7.60  #if BX_TCGBIOS
    7.61    call _tcpa_calling_int19h          /* specs: 8.2.3 step 1 */
    7.62    call _tcpa_add_event_separators    /* specs: 8.2.3 step 2 */
    7.63 +  /* we do not call int 19h handler but keep following eventlog */
    7.64 +  call _tcpa_returned_int19h         /* specs: 8.2.3 step 3/7 */
    7.65  #endif
    7.66  
    7.67    ;; Start the boot sequence.   See the comments in int19_relocated 
    7.68    ;; for why we use INT 18h instead of INT 19h here.
    7.69    int  #0x18
    7.70  
    7.71 -#if BX_TCGBIOS
    7.72 -  call _tcpa_returned_int19h         /* specs: 8.2.3 step 3/7 */
    7.73 -#endif
    7.74 -
    7.75  .org 0xe2c3 ; NMI Handler Entry Point
    7.76  nmi:
    7.77    ;; FIXME the NMI handler should not panic
     8.1 --- a/tools/firmware/rombios/tcgbios.c	Wed Oct 17 10:36:31 2007 -0600
     8.2 +++ b/tools/firmware/rombios/tcgbios.c	Sun Oct 21 12:10:25 2007 -0600
     8.3 @@ -150,8 +150,11 @@ void
     8.4   *  seg    : segment where the IPL data are located
     8.5   */
     8.6   void
     8.7 -tcpa_ipl(seg)
     8.8 + tcpa_ipl(bootcd,seg,off,count)
     8.9 +    Bit32u bootcd;
    8.10      Bit32u seg;
    8.11 +    Bit32u off;
    8.12 +    Bit32u count;
    8.13  {
    8.14  	ASM_START
    8.15  	DoUpcall(IDX_TCPA_IPL)
     9.1 --- a/tools/flask/libflask/Makefile	Wed Oct 17 10:36:31 2007 -0600
     9.2 +++ b/tools/flask/libflask/Makefile	Sun Oct 21 12:10:25 2007 -0600
     9.3 @@ -39,7 +39,7 @@ install: build
     9.4  	$(INSTALL_DATA) libflask.a $(DESTDIR)/usr/$(LIBDIR)
     9.5  	ln -sf libflask.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libflask.so.$(MAJOR)
     9.6  	ln -sf libflask.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libflask.so
     9.7 -	$(INSTALL_DATA) include/flask_op.h $(DESTDIR)/usr/include
     9.8 +	$(INSTALL_DATA) include/flask.h $(DESTDIR)/usr/include
     9.9  
    9.10  .PHONY: TAGS
    9.11  TAGS:
    10.1 --- a/tools/flask/libflask/flask_op.c	Wed Oct 17 10:36:31 2007 -0600
    10.2 +++ b/tools/flask/libflask/flask_op.c	Sun Oct 21 12:10:25 2007 -0600
    10.3 @@ -17,11 +17,10 @@
    10.4  #include <sys/types.h>
    10.5  #include <sys/stat.h>
    10.6  #include <stdlib.h>
    10.7 +#include <stdint.h>
    10.8  #include <sys/ioctl.h>
    10.9 -
   10.10 -#include <xc_private.h>
   10.11 -
   10.12 -#include <flask_op.h>
   10.13 +#include <flask.h>
   10.14 +#include <xenctrl.h>
   10.15  
   10.16  int flask_load(int xc_handle, char *buf, int size)
   10.17  {
   10.18 @@ -32,7 +31,7 @@ int flask_load(int xc_handle, char *buf,
   10.19      op.buf = buf;
   10.20      op.size = size;
   10.21      
   10.22 -    if ( (err = do_flask_op(xc_handle, &op)) != 0 )
   10.23 +    if ( (err = xc_flask_op(xc_handle, &op)) != 0 )
   10.24          return err;
   10.25  
   10.26      return 0;
   10.27 @@ -47,7 +46,7 @@ int flask_context_to_sid(int xc_handle, 
   10.28      op.buf = buf;
   10.29      op.size = size;
   10.30      
   10.31 -    if ( (err = do_flask_op(xc_handle, &op)) != 0 )
   10.32 +    if ( (err = xc_flask_op(xc_handle, &op)) != 0 )
   10.33          return err;
   10.34      
   10.35      sscanf(buf, "%u", sid);
   10.36 @@ -66,35 +65,8 @@ int flask_sid_to_context(int xc_handle, 
   10.37      
   10.38      snprintf(buf, size, "%u", sid);
   10.39  
   10.40 -    if ( (err = do_flask_op(xc_handle, &op)) != 0 )
   10.41 +    if ( (err = xc_flask_op(xc_handle, &op)) != 0 )
   10.42          return err;
   10.43  
   10.44      return 0;
   10.45  }
   10.46 -
   10.47 -int do_flask_op(int xc_handle, flask_op_t *op)
   10.48 -{
   10.49 -    int ret = -1;
   10.50 -    DECLARE_HYPERCALL;
   10.51 -
   10.52 -    hypercall.op     = __HYPERVISOR_xsm_op;
   10.53 -    hypercall.arg[0] = (unsigned long)op;
   10.54 -
   10.55 -    if ( mlock(op, sizeof(*op)) != 0 )
   10.56 -    {
   10.57 -        PERROR("Could not lock memory for Xen hypercall");
   10.58 -        goto out;
   10.59 -    }
   10.60 -
   10.61 -    if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
   10.62 -    {
   10.63 -        if ( errno == EACCES )
   10.64 -            fprintf(stderr, "XSM operation failed!\n");
   10.65 -    }
   10.66 -
   10.67 -    safe_munlock(op, sizeof(*op));
   10.68 -
   10.69 - out:
   10.70 -    return ret;
   10.71 -}
   10.72 -
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/tools/flask/libflask/include/flask.h	Sun Oct 21 12:10:25 2007 -0600
    11.3 @@ -0,0 +1,22 @@
    11.4 +/*
    11.5 + *
    11.6 + *  Authors:  Michael LeMay, <mdlemay@epoch.ncsc.mil>
    11.7 + *            George Coker, <gscoker@alpha.ncsc.mil>
    11.8 + *
    11.9 + *  This program is free software; you can redistribute it and/or modify
   11.10 + *  it under the terms of the GNU General Public License version 2,
   11.11 + *  as published by the Free Software Foundation.
   11.12 + */
   11.13 +
   11.14 +#ifndef __FLASK_H__
   11.15 +#define __FLASK_H__
   11.16 +
   11.17 +#include <stdint.h>
   11.18 +#include <xen/xen.h>
   11.19 +#include <xen/xsm/flask_op.h>
   11.20 +
   11.21 +int flask_load(int xc_handle, char *buf, int size);
   11.22 +int flask_context_to_sid(int xc_handle, char *buf, int size, uint32_t *sid);
   11.23 +int flask_sid_to_context(int xc_handle, int sid, char *buf, int size);
   11.24 +
   11.25 +#endif /* __FLASK_H__ */
    12.1 --- a/tools/flask/libflask/include/flask_op.h	Wed Oct 17 10:36:31 2007 -0600
    12.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.3 @@ -1,46 +0,0 @@
    12.4 -/*
    12.5 - *
    12.6 - *  Authors:  Michael LeMay, <mdlemay@epoch.ncsc.mil>
    12.7 - *            George Coker, <gscoker@alpha.ncsc.mil>
    12.8 - *
    12.9 - *  This program is free software; you can redistribute it and/or modify
   12.10 - *  it under the terms of the GNU General Public License version 2,
   12.11 - *  as published by the Free Software Foundation.
   12.12 - */
   12.13 -
   12.14 -#ifndef __FLASK_OP_H
   12.15 -#define __FLASK_OP_H
   12.16 -
   12.17 -#define FLASK_LOAD              1
   12.18 -#define FLASK_GETENFORCE        2
   12.19 -#define FLASK_SETENFORCE        3
   12.20 -#define FLASK_CONTEXT_TO_SID    4
   12.21 -#define FLASK_SID_TO_CONTEXT    5
   12.22 -#define FLASK_ACCESS            6
   12.23 -#define FLASK_CREATE            7
   12.24 -#define FLASK_RELABEL           8
   12.25 -#define FLASK_USER              9
   12.26 -#define FLASK_POLICYVERS        10
   12.27 -#define FLASK_GETBOOL           11
   12.28 -#define FLASK_SETBOOL           12
   12.29 -#define FLASK_COMMITBOOLS       13
   12.30 -#define FLASK_MLS               14
   12.31 -#define FLASK_DISABLE           15
   12.32 -#define FLASK_GETAVC_THRESHOLD  16
   12.33 -#define FLASK_SETAVC_THRESHOLD  17
   12.34 -#define FLASK_AVC_HASHSTATS     18
   12.35 -#define FLASK_AVC_CACHESTATS    19
   12.36 -#define FLASK_MEMBER            20
   12.37 -
   12.38 -typedef struct flask_op {
   12.39 -    int   cmd;
   12.40 -    int   size;
   12.41 -    char *buf;
   12.42 -} flask_op_t;
   12.43 -
   12.44 -int flask_load(int xc_handle, char *buf, int size);
   12.45 -int flask_context_to_sid(int xc_handle, char *buf, int size, uint32_t *sid);
   12.46 -int flask_sid_to_context(int xc_handle, int sid, char *buf, int size);
   12.47 -int do_flask_op(int xc_handle, flask_op_t *op);
   12.48 -
   12.49 -#endif
    13.1 --- a/tools/flask/loadpolicy/loadpolicy.c	Wed Oct 17 10:36:31 2007 -0600
    13.2 +++ b/tools/flask/loadpolicy/loadpolicy.c	Sun Oct 21 12:10:25 2007 -0600
    13.3 @@ -17,8 +17,7 @@
    13.4  #include <sys/stat.h>
    13.5  #include <string.h>
    13.6  #include <unistd.h>
    13.7 -
    13.8 -#include <flask_op.h>
    13.9 +#include <flask.h>
   13.10  
   13.11  #define USE_MMAP
   13.12  
    14.1 --- a/tools/ioemu/hw/pass-through.c	Wed Oct 17 10:36:31 2007 -0600
    14.2 +++ b/tools/ioemu/hw/pass-through.c	Sun Oct 21 12:10:25 2007 -0600
    14.3 @@ -28,40 +28,32 @@
    14.4  #include "pci/pci.h"
    14.5  
    14.6  extern FILE *logfile;
    14.7 -char *token;
    14.8  
    14.9 -int pci_devs(const char *direct_pci)
   14.10 +static int token_value(char *token)
   14.11  {
   14.12 -    int count = 0;
   14.13 -    const char *c;
   14.14 -
   14.15 -    /* skip first "[" character */
   14.16 -    c = direct_pci + 1;
   14.17 -    while ((c = strchr(c, '[')) != NULL) {
   14.18 -        c++;
   14.19 -        count++;
   14.20 -    }
   14.21 -    return (count);
   14.22 +    token = strchr(token, 'x') + 1;
   14.23 +    return strtol(token, NULL, 16);
   14.24  }
   14.25  
   14.26 -int next_token(char *direct_pci)
   14.27 +static int next_bdf(char **str, int *seg, int *bus, int *dev, int *func)
   14.28  {
   14.29 -    if (token == NULL)
   14.30 -        token = strtok(direct_pci, ",");
   14.31 -    else 
   14.32 -        token = strtok(NULL, ",");
   14.33 -    token = strchr(token, 'x');
   14.34 -    token = token + 1;
   14.35 -    return ((int) strtol(token, NULL, 16));
   14.36 -}
   14.37 +    char *token;
   14.38 +
   14.39 +    token = strchr(*str, ',');
   14.40 +    if ( !token )
   14.41 +        return 0;
   14.42 +    token++;
   14.43  
   14.44 -void next_bdf(char *direct_pci, int *seg,
   14.45 -              int *bus, int *dev, int *func)
   14.46 -{
   14.47 -    *seg  = next_token(direct_pci);
   14.48 -    *bus  = next_token(direct_pci);
   14.49 -    *dev  = next_token(direct_pci);
   14.50 -    *func = next_token(direct_pci);
   14.51 +    *seg  = token_value(token);
   14.52 +    token = strchr(token, ',') + 1;
   14.53 +    *bus  = token_value(token);
   14.54 +    token = strchr(token, ',') + 1;
   14.55 +    *dev  = token_value(token);
   14.56 +    token = strchr(token, ',') + 1;
   14.57 +    *func  = token_value(token);
   14.58 +
   14.59 +    *str = token;
   14.60 +    return 1;
   14.61  }
   14.62  
   14.63  uint8_t find_cap_offset(struct pci_dev *pci_dev, uint8_t cap)
   14.64 @@ -333,7 +325,6 @@ struct pt_dev * register_real_device(PCI
   14.65      int rc, i;
   14.66      struct pt_dev *assigned_device = NULL;
   14.67      struct pci_dev *pci_dev;
   14.68 -    struct pci_config_cf8 machine_bdf;
   14.69      uint8_t e_device, e_intx;
   14.70  
   14.71      PT_LOG("Assigning real physical device %02x:%02x.%x ...\n",
   14.72 @@ -368,15 +359,6 @@ struct pt_dev * register_real_device(PCI
   14.73      /* Issue PCIe FLR */
   14.74      pdev_flr(pci_dev);
   14.75  
   14.76 -    /* Tell XEN vmm to change iommu settings */
   14.77 -    machine_bdf.reg = 0;
   14.78 -    machine_bdf.bus = r_bus;
   14.79 -    machine_bdf.dev = r_dev;
   14.80 -    machine_bdf.func = r_func;
   14.81 -    rc = xc_assign_device(xc_handle, domid, machine_bdf.value);
   14.82 -    if ( rc < 0 )
   14.83 -        PT_LOG("Error: xc_domain_assign_device error %d\n", rc);
   14.84 -
   14.85      /* Initialize virtualized PCI configuration (Extended 256 Bytes) */
   14.86      for ( i = 0; i < PCI_CONFIG_SIZE; i++ )
   14.87          assigned_device->dev.config[i] = pci_read_byte(pci_dev, i);
   14.88 @@ -417,11 +399,9 @@ struct pt_dev * register_real_device(PCI
   14.89  
   14.90  int pt_init(PCIBus *e_bus, char *direct_pci)
   14.91  {
   14.92 -    int i;
   14.93      int seg, b, d, f;
   14.94      struct pt_dev *pt_dev;
   14.95      struct pci_access *pci_access;
   14.96 -    int dev_count = pci_devs(direct_pci);
   14.97  
   14.98      /* Initialize libpci */
   14.99      pci_access = pci_alloc();
  14.100 @@ -434,11 +414,8 @@ int pt_init(PCIBus *e_bus, char *direct_
  14.101      pci_scan_bus(pci_access);
  14.102  
  14.103      /* Assign given devices to guest */
  14.104 -    for ( i = 0; i < dev_count; i++ )
  14.105 +    while ( next_bdf(&direct_pci, &seg, &b, &d, &f) )
  14.106      {
  14.107 -        /* Get next device bdf (bus, device, function) */
  14.108 -        next_bdf(direct_pci, &seg, &b, &d, &f);
  14.109 -
  14.110          /* Register real device with the emulated bus */
  14.111          pt_dev = register_real_device(e_bus, "DIRECT PCI", PT_VIRT_DEVFN_AUTO,
  14.112              b, d, f, PT_MACHINE_IRQ_AUTO, pci_access);
    15.1 --- a/tools/ioemu/hw/xen_platform.c	Wed Oct 17 10:36:31 2007 -0600
    15.2 +++ b/tools/ioemu/hw/xen_platform.c	Sun Oct 21 12:10:25 2007 -0600
    15.3 @@ -25,7 +25,6 @@
    15.4  #include "vl.h"
    15.5  
    15.6  #include <xenguest.h>
    15.7 -#include <xc_private.h>
    15.8  
    15.9  extern FILE *logfile;
   15.10  
    16.1 --- a/tools/ioemu/keymaps/nl-be	Wed Oct 17 10:36:31 2007 -0600
    16.2 +++ b/tools/ioemu/keymaps/nl-be	Sun Oct 21 12:10:25 2007 -0600
    16.3 @@ -1,3 +1,69 @@
    16.4  # Dutch (Belgium)
    16.5  map 0x813
    16.6  include common
    16.7 +ampersand 0x02
    16.8 +1 0x02 shift
    16.9 +bar 0x02 altgr
   16.10 +eacute 0x03
   16.11 +2 0x03 shift
   16.12 +at 0x03 altgr
   16.13 +quotedbl 0x04
   16.14 +3 0x04 shift
   16.15 +numbersign 0x04 altgr
   16.16 +apostrophe 0x05
   16.17 +4 0x05 shift
   16.18 +parenleft 0x06
   16.19 +5 0x06 shift
   16.20 +section 0x07
   16.21 +6 0x07 shift
   16.22 +circumflex 0x07 altgr
   16.23 +egrave 0x08
   16.24 +7 0x08 shift
   16.25 +exclam 0x09
   16.26 +8 0x09 shift
   16.27 +bracketleft 0x09 altgr
   16.28 +ccedilla 0x0a
   16.29 +9 0x0a shift
   16.30 +braceleft 0x0a altgr
   16.31 +agrave 0x0b
   16.32 +0 0x0b shift
   16.33 +braceright 0x0b altgr
   16.34 +parenright 0x0c
   16.35 +degree 0x0c shift
   16.36 +minus 0x0d
   16.37 +underscore 0x0d shift
   16.38 +a 0x10 addupper
   16.39 +z 0x11 addupper
   16.40 +EuroSign 0x12 altgr
   16.41 +dead_circumflex 0x1a
   16.42 +dead_diaeresis 0x1a shift
   16.43 +bracketleft 0x1a altgr
   16.44 +dollar 0x1b
   16.45 +asterisk 0x1b shift
   16.46 +bracketright 0x1b altgr
   16.47 +q 0x1e addupper
   16.48 +m 0x27 addupper
   16.49 +ugrave 0x28
   16.50 +percent 0x28 shift
   16.51 +dead_acute 0x28 altgr
   16.52 +twosuperior 0x29
   16.53 +threesuperior 0x29 shift
   16.54 +mu 0x2b
   16.55 +sterling 0x2b shift
   16.56 +dead_grave 0x2b altgr
   16.57 +w 0x2c addupper
   16.58 +comma 0x32
   16.59 +question 0x32 shift
   16.60 +semicolon 0x33
   16.61 +period 0x33 shift
   16.62 +colon 0x34
   16.63 +slash 0x34 shift
   16.64 +periodcentered 0x34 altgr
   16.65 +equal 0x35
   16.66 +plus 0x35 shift
   16.67 +tilde 0x35 altgr
   16.68 +dead_tilde 0x35 shift altgr
   16.69 +less 0x56
   16.70 +greater 0x56 shift
   16.71 +backslash 0x56 altgr
   16.72 +
    17.1 --- a/tools/ioemu/xenstore.c	Wed Oct 17 10:36:31 2007 -0600
    17.2 +++ b/tools/ioemu/xenstore.c	Sun Oct 21 12:10:25 2007 -0600
    17.3 @@ -82,8 +82,8 @@ void xenstore_parse_domain_config(int do
    17.4      char **e = NULL;
    17.5      char *buf = NULL, *path;
    17.6      char *fpath = NULL, *bpath = NULL,
    17.7 -        *dev = NULL, *params = NULL, *type = NULL;
    17.8 -    int i, is_scsi;
    17.9 +        *dev = NULL, *params = NULL, *type = NULL, *drv = NULL;
   17.10 +    int i, is_scsi, is_hdN = 0;
   17.11      unsigned int len, num, hd_index;
   17.12  
   17.13      for(i = 0; i < MAX_DISKS + MAX_SCSI_DISKS; i++)
   17.14 @@ -123,6 +123,35 @@ void xenstore_parse_domain_config(int do
   17.15          dev = xs_read(xsh, XBT_NULL, buf, &len);
   17.16          if (dev == NULL)
   17.17              continue;
   17.18 +        if (!strncmp(dev, "hd", 2)) {
   17.19 +            is_hdN = 1;
   17.20 +            break;
   17.21 +        }
   17.22 +    }
   17.23 +        
   17.24 +    for (i = 0; i < num; i++) {
   17.25 +        /* read the backend path */
   17.26 +        if (pasprintf(&buf, "%s/device/vbd/%s/backend", path, e[i]) == -1)
   17.27 +            continue;
   17.28 +        free(bpath);
   17.29 +        bpath = xs_read(xsh, XBT_NULL, buf, &len);
   17.30 +        if (bpath == NULL)
   17.31 +            continue;
   17.32 +        /* read the name of the device */
   17.33 +        if (pasprintf(&buf, "%s/dev", bpath) == -1)
   17.34 +            continue;
   17.35 +        free(dev);
   17.36 +        dev = xs_read(xsh, XBT_NULL, buf, &len);
   17.37 +        if (dev == NULL)
   17.38 +            continue;
   17.39 +        /* Change xvdN to look like hdN */
   17.40 +        if (!is_hdN && !strncmp(dev, "xvd", 3)) {
   17.41 +            fprintf(logfile, "Change xvd%c to look like hd%c\n",
   17.42 +                    dev[3], dev[3]);
   17.43 +            memmove(dev, dev+1, strlen(dev));
   17.44 +            dev[0] = 'h';
   17.45 +            dev[1] = 'd';
   17.46 +        }
   17.47          is_scsi = !strncmp(dev, "sd", 2);
   17.48          if ((strncmp(dev, "hd", 2) && !is_scsi) || strlen(dev) != 3 )
   17.49              continue;
   17.50 @@ -140,6 +169,22 @@ void xenstore_parse_domain_config(int do
   17.51          params = xs_read(xsh, XBT_NULL, buf, &len);
   17.52          if (params == NULL)
   17.53              continue;
   17.54 +        /* read the name of the device */
   17.55 +        if (pasprintf(&buf, "%s/type", bpath) == -1)
   17.56 +            continue;
   17.57 +        free(drv);
   17.58 +        drv = xs_read(xsh, XBT_NULL, buf, &len);
   17.59 +        if (drv == NULL)
   17.60 +            continue;
   17.61 +        /* Strip off blktap sub-type prefix aio: - QEMU can autodetect this */
   17.62 +        if (!strcmp(drv, "tap") && params[0]) {
   17.63 +            char *offset = strchr(params, ':'); 
   17.64 +            if (!offset)
   17.65 +                continue ;
   17.66 +            memmove(params, offset+1, strlen(offset+1)+1 );
   17.67 +            fprintf(logfile, "Strip off blktap sub-type prefix to %s\n", params); 
   17.68 +        }
   17.69 +
   17.70          /* 
   17.71           * check if device has a phantom vbd; the phantom is hooked
   17.72           * to the frontend device (for ease of cleanup), so lookup 
   17.73 @@ -202,6 +247,7 @@ void xenstore_parse_domain_config(int do
   17.74      free(buf);
   17.75      free(path);
   17.76      free(e);
   17.77 +    free(drv);
   17.78      return;
   17.79  }
   17.80  
    18.1 --- a/tools/libxc/Makefile	Wed Oct 17 10:36:31 2007 -0600
    18.2 +++ b/tools/libxc/Makefile	Sun Oct 21 12:10:25 2007 -0600
    18.3 @@ -12,7 +12,8 @@ CTRL_SRCS-$(CONFIG_POWERPC) += xc_core_p
    18.4  CTRL_SRCS-y       += xc_domain.c
    18.5  CTRL_SRCS-y       += xc_evtchn.c
    18.6  CTRL_SRCS-y       += xc_misc.c
    18.7 -CTRL_SRCS-y       += xc_acm.c   
    18.8 +CTRL_SRCS-y       += xc_acm.c
    18.9 +CTRL_SRCS-y       += xc_flask.c
   18.10  CTRL_SRCS-y       += xc_physdev.c
   18.11  CTRL_SRCS-y       += xc_private.c
   18.12  CTRL_SRCS-y       += xc_sedf.c
    19.1 --- a/tools/libxc/xc_core.c	Wed Oct 17 10:36:31 2007 -0600
    19.2 +++ b/tools/libxc/xc_core.c	Sun Oct 21 12:10:25 2007 -0600
    19.3 @@ -628,7 +628,7 @@ xc_domain_dumpcore_via_callback(int xc_h
    19.4          PERROR("could not get section headers for .xen_pages");
    19.5          goto out;
    19.6      }
    19.7 -    filesz = nr_pages * PAGE_SIZE;
    19.8 +    filesz = (uint64_t)nr_pages * PAGE_SIZE;
    19.9      sts = xc_core_shdr_set(shdr, strtab, XEN_DUMPCORE_SEC_PAGES, SHT_PROGBITS,
   19.10                             offset, filesz, PAGE_SIZE, PAGE_SIZE);
   19.11      if ( sts != 0 )
   19.12 @@ -644,7 +644,7 @@ xc_domain_dumpcore_via_callback(int xc_h
   19.13      }
   19.14      if ( !auto_translated_physmap )
   19.15      {
   19.16 -        filesz = nr_pages * sizeof(p2m_array[0]);
   19.17 +        filesz = (uint64_t)nr_pages * sizeof(p2m_array[0]);
   19.18          sts = xc_core_shdr_set(shdr, strtab, XEN_DUMPCORE_SEC_P2M,
   19.19                                 SHT_PROGBITS,
   19.20                                 offset, filesz, __alignof__(p2m_array[0]),
   19.21 @@ -652,7 +652,7 @@ xc_domain_dumpcore_via_callback(int xc_h
   19.22      }
   19.23      else
   19.24      {
   19.25 -        filesz = nr_pages * sizeof(pfn_array[0]);
   19.26 +        filesz = (uint64_t)nr_pages * sizeof(pfn_array[0]);
   19.27          sts = xc_core_shdr_set(shdr, strtab, XEN_DUMPCORE_SEC_PFN,
   19.28                                 SHT_PROGBITS,
   19.29                                 offset, filesz, __alignof__(pfn_array[0]),
    20.1 --- a/tools/libxc/xc_dom_ia64.c	Wed Oct 17 10:36:31 2007 -0600
    20.2 +++ b/tools/libxc/xc_dom_ia64.c	Sun Oct 21 12:10:25 2007 -0600
    20.3 @@ -260,6 +260,7 @@ int arch_setup_bootearly(struct xc_dom_i
    20.4      domctl.u.arch_setup.bp = (dom->start_info_pfn << PAGE_SHIFT)
    20.5          + sizeof(start_info_t);
    20.6      domctl.u.arch_setup.maxmem = dom->total_pages << PAGE_SHIFT;
    20.7 +    domctl.u.arch_setup.vhpt_size_log2 = dom->flags;
    20.8      rc = do_domctl(dom->guest_xc, &domctl);
    20.9      return rc;
   20.10  }
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/tools/libxc/xc_flask.c	Sun Oct 21 12:10:25 2007 -0600
    21.3 @@ -0,0 +1,46 @@
    21.4 +/******************************************************************************
    21.5 + * xc_flask.c
    21.6 + *
    21.7 + * This program is free software; you can redistribute it and/or
    21.8 + * modify it under the terms of the GNU General Public License as
    21.9 + * published by the Free Software Foundation, version 2 of the
   21.10 + * License.
   21.11 + */
   21.12 +
   21.13 +#include "xc_private.h"
   21.14 +
   21.15 +int xc_flask_op(int xc_handle, flask_op_t *op)
   21.16 +{
   21.17 +    int ret = -1;
   21.18 +    DECLARE_HYPERCALL;
   21.19 +
   21.20 +    hypercall.op     = __HYPERVISOR_xsm_op;
   21.21 +    hypercall.arg[0] = (unsigned long)op;
   21.22 +
   21.23 +    if ( mlock(op, sizeof(*op)) != 0 )
   21.24 +    {
   21.25 +        PERROR("Could not lock memory for Xen hypercall");
   21.26 +        goto out;
   21.27 +    }
   21.28 +
   21.29 +    if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
   21.30 +    {
   21.31 +        if ( errno == EACCES )
   21.32 +            fprintf(stderr, "XSM operation failed!\n");
   21.33 +    }
   21.34 +
   21.35 +    safe_munlock(op, sizeof(*op));
   21.36 +
   21.37 + out:
   21.38 +    return ret;
   21.39 +}
   21.40 +
   21.41 +/*
   21.42 + * Local variables:
   21.43 + * mode: C
   21.44 + * c-set-style: "BSD"
   21.45 + * c-basic-offset: 4
   21.46 + * tab-width: 4
   21.47 + * indent-tabs-mode: nil
   21.48 + * End:
   21.49 + */
    22.1 --- a/tools/libxc/xenctrl.h	Wed Oct 17 10:36:31 2007 -0600
    22.2 +++ b/tools/libxc/xenctrl.h	Sun Oct 21 12:10:25 2007 -0600
    22.3 @@ -28,6 +28,7 @@
    22.4  #include <xen/memory.h>
    22.5  #include <xen/xsm/acm.h>
    22.6  #include <xen/xsm/acm_ops.h>
    22.7 +#include <xen/xsm/flask_op.h>
    22.8  
    22.9  #ifdef __ia64__
   22.10  #define XC_PAGE_SHIFT           14
   22.11 @@ -771,6 +772,8 @@ int xc_version(int xc_handle, int cmd, v
   22.12  
   22.13  int xc_acm_op(int xc_handle, int cmd, void *arg, unsigned long arg_size);
   22.14  
   22.15 +int xc_flask_op(int xc_handle, flask_op_t *op);
   22.16 +
   22.17  /**************************
   22.18   * GRANT TABLE OPERATIONS *
   22.19   **************************/
    23.1 --- a/tools/python/xen/lowlevel/flask/flask.c	Wed Oct 17 10:36:31 2007 -0600
    23.2 +++ b/tools/python/xen/lowlevel/flask/flask.c	Sun Oct 21 12:10:25 2007 -0600
    23.3 @@ -12,8 +12,7 @@
    23.4  
    23.5  #include <Python.h>
    23.6  #include <xenctrl.h>
    23.7 -
    23.8 -#include <flask_op.h>
    23.9 +#include <flask.h>
   23.10  
   23.11  #define PKG "xen.lowlevel.flask"
   23.12  #define CLS "flask"
    24.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Wed Oct 17 10:36:31 2007 -0600
    24.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Sun Oct 21 12:10:25 2007 -0600
    24.3 @@ -498,15 +498,91 @@ static PyObject *pyxc_get_hvm_param(XcOb
    24.4      unsigned long value;
    24.5  
    24.6      static char *kwd_list[] = { "domid", "param", NULL }; 
    24.7 -    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|i", kwd_list,
    24.8 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "ii", kwd_list,
    24.9                                        &dom, &param) )
   24.10          return NULL;
   24.11  
   24.12      if ( xc_get_hvm_param(self->xc_handle, dom, param, &value) != 0 )
   24.13          return pyxc_error_to_exception();
   24.14  
   24.15 -    return Py_BuildValue("i", value);
   24.16 +    return PyLong_FromUnsignedLong(value);
   24.17 +
   24.18 +}
   24.19 +
   24.20 +static PyObject *pyxc_set_hvm_param(XcObject *self,
   24.21 +                                    PyObject *args,
   24.22 +                                    PyObject *kwds)
   24.23 +{
   24.24 +    uint32_t dom;
   24.25 +    int param;
   24.26 +    uint64_t value;
   24.27 +
   24.28 +    static char *kwd_list[] = { "domid", "param", "value", NULL }; 
   24.29 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiL", kwd_list,
   24.30 +                                      &dom, &param, &value) )
   24.31 +        return NULL;
   24.32 +
   24.33 +    if ( xc_set_hvm_param(self->xc_handle, dom, param, value) != 0 )
   24.34 +        return pyxc_error_to_exception();
   24.35 +
   24.36 +    Py_INCREF(zero);
   24.37 +    return zero;
   24.38 +}
   24.39 +
   24.40 +static int token_value(char *token)
   24.41 +{
   24.42 +    token = strchr(token, 'x') + 1;
   24.43 +    return strtol(token, NULL, 16);
   24.44 +}
   24.45 +
   24.46 +static int next_bdf(char **str, int *seg, int *bus, int *dev, int *func)
   24.47 +{
   24.48 +    char *token;
   24.49 +
   24.50 +    token = strchr(*str, ',');
   24.51 +    if ( !token )
   24.52 +        return 0;
   24.53 +    token++;
   24.54  
   24.55 +    *seg  = token_value(token);
   24.56 +    token = strchr(token, ',') + 1;
   24.57 +    *bus  = token_value(token);
   24.58 +    token = strchr(token, ',') + 1;
   24.59 +    *dev  = token_value(token);
   24.60 +    token = strchr(token, ',') + 1;
   24.61 +    *func  = token_value(token);
   24.62 +
   24.63 +    *str = token;
   24.64 +    return 1;
   24.65 +}
   24.66 +
   24.67 +static PyObject *pyxc_assign_device(XcObject *self,
   24.68 +                                    PyObject *args,
   24.69 +                                    PyObject *kwds)
   24.70 +{
   24.71 +    uint32_t dom;
   24.72 +    char *pci_str;
   24.73 +    uint32_t bdf = 0;
   24.74 +    int seg, bus, dev, func;
   24.75 +
   24.76 +    static char *kwd_list[] = { "domid", "pci", NULL };
   24.77 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list,
   24.78 +                                      &dom, &pci_str) )
   24.79 +        return NULL;
   24.80 +
   24.81 +    while ( next_bdf(&pci_str, &seg, &bus, &dev, &func) )
   24.82 +    {
   24.83 +        bdf |= (bus & 0xff) << 16;
   24.84 +        bdf |= (dev & 0x1f) << 11;
   24.85 +        bdf |= (func & 0x7) << 8;
   24.86 +
   24.87 +        if ( xc_assign_device(self->xc_handle, dom, bdf) != 0 )
   24.88 +            break;
   24.89 +
   24.90 +        bdf = 0;
   24.91 +    }
   24.92 +
   24.93 +    return Py_BuildValue("i", bdf);
   24.94  }
   24.95  
   24.96  #ifdef __ia64__
   24.97 @@ -537,15 +613,14 @@ static PyObject *pyxc_hvm_build(XcObject
   24.98      int i;
   24.99  #endif
  24.100      char *image;
  24.101 -    int store_evtchn, memsize, vcpus = 1, pae = 0, acpi = 0, apic = 1;
  24.102 -    unsigned long store_mfn;
  24.103 +    int memsize, vcpus = 1, acpi = 0, apic = 1;
  24.104  
  24.105 -    static char *kwd_list[] = { "domid", "store_evtchn",
  24.106 -				"memsize", "image", "vcpus", "pae", "acpi",
  24.107 +    static char *kwd_list[] = { "domid",
  24.108 +				"memsize", "image", "vcpus", "acpi",
  24.109  				"apic", NULL };
  24.110 -    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|iiii", kwd_list,
  24.111 -                                      &dom, &store_evtchn, &memsize,
  24.112 -                                      &image, &vcpus, &pae, &acpi, &apic) )
  24.113 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iii", kwd_list,
  24.114 +                                      &dom, &memsize,
  24.115 +                                      &image, &vcpus, &acpi, &apic) )
  24.116          return NULL;
  24.117  
  24.118      if ( xc_hvm_build(self->xc_handle, dom, memsize, image) != 0 )
  24.119 @@ -571,14 +646,7 @@ static PyObject *pyxc_hvm_build(XcObject
  24.120      munmap(va_map, XC_PAGE_SIZE);
  24.121  #endif
  24.122  
  24.123 -    xc_get_hvm_param(self->xc_handle, dom, HVM_PARAM_STORE_PFN, &store_mfn);
  24.124 -#if !defined(__ia64__)
  24.125 -    xc_set_hvm_param(self->xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
  24.126 -#endif
  24.127 -    xc_set_hvm_param(self->xc_handle, dom, HVM_PARAM_STORE_EVTCHN,
  24.128 -                     store_evtchn);
  24.129 -
  24.130 -    return Py_BuildValue("{s:i}", "store_mfn", store_mfn);
  24.131 +    return Py_BuildValue("{}");
  24.132  }
  24.133  
  24.134  static PyObject *pyxc_evtchn_alloc_unbound(XcObject *self,
  24.135 @@ -709,7 +777,7 @@ static PyObject *pyxc_physinfo(XcObject 
  24.136                              "max_cpu_id",       info.max_cpu_id,
  24.137                              "threads_per_core", info.threads_per_core,
  24.138                              "cores_per_socket", info.cores_per_socket,
  24.139 -                            "sockets_per_node", info.sockets_per_node,
  24.140 +                            "nr_cpus",          info.nr_cpus, 
  24.141                              "total_memory",     pages_to_kib(info.total_pages),
  24.142                              "free_memory",      pages_to_kib(info.free_pages),
  24.143                              "scrub_memory",     pages_to_kib(info.scrub_pages),
  24.144 @@ -1326,8 +1394,25 @@ static PyMethodDef pyxc_methods[] = {
  24.145        "get a parameter of HVM guest OS.\n"
  24.146        " dom     [int]:      Identifier of domain to build into.\n"
  24.147        " param   [int]:      No. of HVM param.\n"
  24.148 -      "Returns: [int] value of the param.\n" },
  24.149 +      "Returns: [long] value of the param.\n" },
  24.150  
  24.151 +    { "hvm_set_param", 
  24.152 +      (PyCFunction)pyxc_set_hvm_param, 
  24.153 +      METH_VARARGS | METH_KEYWORDS, "\n"
  24.154 +      "set a parameter of HVM guest OS.\n"
  24.155 +      " dom     [int]:      Identifier of domain to build into.\n"
  24.156 +      " param   [int]:      No. of HVM param.\n"
  24.157 +      " value   [long]:     Value of param.\n"
  24.158 +      "Returns: [int] 0 on success.\n" },
  24.159 +
  24.160 +     { "assign_device",
  24.161 +       (PyCFunction)pyxc_assign_device,
  24.162 +       METH_VARARGS | METH_KEYWORDS, "\n"
  24.163 +       "assign device with VT-d.\n"
  24.164 +       " dom     [int]:      Identifier of domain to build into.\n"
  24.165 +       " pci_str [str]:      PCI devices.\n"
  24.166 +       "Returns: [int] 0 on success, or device bdf that can't be assigned.\n" },
  24.167 +  
  24.168      { "sched_id_get",
  24.169        (PyCFunction)pyxc_sched_id_get,
  24.170        METH_NOARGS, "\n"
    25.1 --- a/tools/python/xen/xend/XendAPI.py	Wed Oct 17 10:36:31 2007 -0600
    25.2 +++ b/tools/python/xen/xend/XendAPI.py	Sun Oct 21 12:10:25 2007 -0600
    25.3 @@ -555,7 +555,7 @@ class XendAPI(object):
    25.4                  return xen_api_success(ref)
    25.5  
    25.6              def unpack(v):
    25.7 -                return v['Value']
    25.8 +                return v.get('Value')
    25.9  
   25.10              def _get_all_records(_api_cls):
   25.11                  return lambda s, session: \
    26.1 --- a/tools/python/xen/xend/XendConfig.py	Wed Oct 17 10:36:31 2007 -0600
    26.2 +++ b/tools/python/xen/xend/XendConfig.py	Sun Oct 21 12:10:25 2007 -0600
    26.3 @@ -127,7 +127,7 @@ XENAPI_PLATFORM_CFG = [ 'acpi', 'apic', 
    26.4                          'nographic', 'pae', 'rtc_timeoffset', 'serial', 'sdl',
    26.5                          'soundhw','stdvga', 'usb', 'usbdevice', 'vnc',
    26.6                          'vncconsole', 'vncdisplay', 'vnclisten',
    26.7 -                        'vncpasswd', 'vncunused', 'xauthority', 'pci']
    26.8 +                        'vncpasswd', 'vncunused', 'xauthority', 'pci', 'vhpt']
    26.9  
   26.10  # Xen API console 'other_config' keys.
   26.11  XENAPI_CONSOLE_OTHER_CFG = ['vncunused', 'vncdisplay', 'vnclisten',
    27.1 --- a/tools/python/xen/xend/XendConstants.py	Wed Oct 17 10:36:31 2007 -0600
    27.2 +++ b/tools/python/xen/xend/XendConstants.py	Sun Oct 21 12:10:25 2007 -0600
    27.3 @@ -43,6 +43,8 @@ HVM_PARAM_STORE_EVTCHN = 2
    27.4  HVM_PARAM_PAE_ENABLED  = 4
    27.5  HVM_PARAM_IOREQ_PFN    = 5
    27.6  HVM_PARAM_BUFIOREQ_PFN = 6
    27.7 +HVM_PARAM_NVRAM_FD     = 7
    27.8 +HVM_PARAM_VHPT_SIZE    = 8
    27.9  
   27.10  restart_modes = [
   27.11      "restart",
    28.1 --- a/tools/python/xen/xend/XendDomain.py	Wed Oct 17 10:36:31 2007 -0600
    28.2 +++ b/tools/python/xen/xend/XendDomain.py	Sun Oct 21 12:10:25 2007 -0600
    28.3 @@ -886,6 +886,7 @@ class XendDomain:
    28.4          self.domains_lock.acquire()
    28.5          try:
    28.6              try:
    28.7 +                fd = None
    28.8                  dominfo = self.domain_lookup_nr(domname)
    28.9  
   28.10                  if not dominfo:
   28.11 @@ -908,8 +909,9 @@ class XendDomain:
   28.12                      oflags = os.O_RDONLY
   28.13                      if hasattr(os, "O_LARGEFILE"):
   28.14                          oflags |= os.O_LARGEFILE
   28.15 +                    fd = os.open(chkpath, oflags)
   28.16                      XendCheckpoint.restore(self,
   28.17 -                                           os.open(chkpath, oflags),
   28.18 +                                           fd,
   28.19                                             dominfo,
   28.20                                             paused = start_paused)
   28.21                      os.unlink(chkpath)
   28.22 @@ -921,6 +923,8 @@ class XendDomain:
   28.23                  log.exception("Exception occurred when resuming")
   28.24                  raise XendError("Error occurred when resuming: %s" % str(ex))
   28.25          finally:
   28.26 +            if fd is not None:
   28.27 +                os.close(fd)
   28.28              self.domains_lock.release()
   28.29  
   28.30  
    29.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Wed Oct 17 10:36:31 2007 -0600
    29.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Sun Oct 21 12:10:25 2007 -0600
    29.3 @@ -1586,6 +1586,20 @@ class XendDomainInfo:
    29.4          # Set maximum number of vcpus in domain
    29.5          xc.domain_max_vcpus(self.domid, int(self.info['VCPUs_max']))
    29.6  
    29.7 +        # Assign devices with VT-d
    29.8 +        pci_str = str(self.info["platform"].get("pci"))
    29.9 +        if hvm and pci_str:
   29.10 +            bdf = xc.assign_device(self.domid, pci_str)
   29.11 +            if bdf != 0:
   29.12 +                bus = (bdf >> 16) & 0xff
   29.13 +                devfn = (bdf >> 8) & 0xff
   29.14 +                dev = (devfn >> 3) & 0x1f
   29.15 +                func = devfn & 0x7
   29.16 +                raise VmError("Fail to assign device(%x:%x.%x): maybe VT-d is "
   29.17 +                              "not enabled, or the device is not exist, or it "
   29.18 +                              "has already been assigned to other domain"
   29.19 +                              % (bus, dev, func))
   29.20 +
   29.21          # register the domain in the list 
   29.22          from xen.xend import XendDomain
   29.23          XendDomain.instance().add_domain(self)
    30.1 --- a/tools/python/xen/xend/XendNode.py	Wed Oct 17 10:36:31 2007 -0600
    30.2 +++ b/tools/python/xen/xend/XendNode.py	Sun Oct 21 12:10:25 2007 -0600
    30.3 @@ -475,7 +475,7 @@ class XendNode:
    30.4  
    30.5          cpu_info = {
    30.6              "nr_nodes":         phys_info["nr_nodes"],
    30.7 -            "sockets_per_node": phys_info["sockets_per_node"],
    30.8 +            "nr_cpus":          phys_info["nr_cpus"],
    30.9              "cores_per_socket": phys_info["cores_per_socket"],
   30.10              "threads_per_core": phys_info["threads_per_core"]
   30.11              }
   30.12 @@ -580,17 +580,9 @@ class XendNode:
   30.13              str='none\n'
   30.14          return str[:-1];
   30.15  
   30.16 -    def count_cpus(self, pinfo):
   30.17 -        count=0
   30.18 -        node_to_cpu=pinfo['node_to_cpu']
   30.19 -        for i in range(0, pinfo['nr_nodes']):
   30.20 -            count+=len(node_to_cpu[i])
   30.21 -        return count;
   30.22 -
   30.23      def physinfo(self):
   30.24          info = self.xc.physinfo()
   30.25  
   30.26 -        info['nr_cpus'] = self.count_cpus(info)
   30.27          info['cpu_mhz'] = info['cpu_khz'] / 1000
   30.28          
   30.29          # physinfo is in KiB, need it in MiB
   30.30 @@ -600,7 +592,6 @@ class XendNode:
   30.31  
   30.32          ITEM_ORDER = ['nr_cpus',
   30.33                        'nr_nodes',
   30.34 -                      'sockets_per_node',
   30.35                        'cores_per_socket',
   30.36                        'threads_per_core',
   30.37                        'cpu_mhz',
    31.1 --- a/tools/python/xen/xend/image.py	Wed Oct 17 10:36:31 2007 -0600
    31.2 +++ b/tools/python/xen/xend/image.py	Sun Oct 21 12:10:25 2007 -0600
    31.3 @@ -24,7 +24,7 @@ import time
    31.4  import signal
    31.5  
    31.6  import xen.lowlevel.xc
    31.7 -from xen.xend.XendConstants import REVERSE_DOMAIN_SHUTDOWN_REASONS
    31.8 +from xen.xend.XendConstants import *
    31.9  from xen.xend.XendError import VmError, XendError, HVMRequired
   31.10  from xen.xend.XendLogging import log
   31.11  from xen.xend.XendOptions import instance as xenopts
   31.12 @@ -197,6 +197,7 @@ class ImageHandler:
   31.13  class LinuxImageHandler(ImageHandler):
   31.14  
   31.15      ostype = "linux"
   31.16 +    flags = 0
   31.17  
   31.18      def buildDomain(self):
   31.19          store_evtchn = self.vm.getStorePort()
   31.20 @@ -213,6 +214,8 @@ class LinuxImageHandler(ImageHandler):
   31.21          log.debug("ramdisk        = %s", self.ramdisk)
   31.22          log.debug("vcpus          = %d", self.vm.getVCpuCount())
   31.23          log.debug("features       = %s", self.vm.getFeatures())
   31.24 +        if arch.type == "ia64":
   31.25 +            log.debug("vhpt          = %d", self.flags)
   31.26  
   31.27          return xc.linux_build(domid          = self.vm.getDomid(),
   31.28                                memsize        = mem_mb,
   31.29 @@ -221,7 +224,8 @@ class LinuxImageHandler(ImageHandler):
   31.30                                console_evtchn = console_evtchn,
   31.31                                cmdline        = self.cmdline,
   31.32                                ramdisk        = self.ramdisk,
   31.33 -                              features       = self.vm.getFeatures())
   31.34 +                              features       = self.vm.getFeatures(),
   31.35 +                              flags          = self.flags)
   31.36  
   31.37  class PPC_LinuxImageHandler(LinuxImageHandler):
   31.38  
   31.39 @@ -274,7 +278,6 @@ class HVMImageHandler(ImageHandler):
   31.40  
   31.41          self.pid = None
   31.42  
   31.43 -        self.pae  = int(vmConfig['platform'].get('pae',  0))
   31.44          self.apic = int(vmConfig['platform'].get('apic', 0))
   31.45          self.acpi = int(vmConfig['platform'].get('acpi', 0))
   31.46          
   31.47 @@ -289,19 +292,23 @@ class HVMImageHandler(ImageHandler):
   31.48          log.debug("store_evtchn   = %d", store_evtchn)
   31.49          log.debug("memsize        = %d", mem_mb)
   31.50          log.debug("vcpus          = %d", self.vm.getVCpuCount())
   31.51 -        log.debug("pae            = %d", self.pae)
   31.52          log.debug("acpi           = %d", self.acpi)
   31.53          log.debug("apic           = %d", self.apic)
   31.54  
   31.55          rc = xc.hvm_build(domid          = self.vm.getDomid(),
   31.56                            image          = self.kernel,
   31.57 -                          store_evtchn   = store_evtchn,
   31.58                            memsize        = mem_mb,
   31.59                            vcpus          = self.vm.getVCpuCount(),
   31.60 -                          pae            = self.pae,
   31.61                            acpi           = self.acpi,
   31.62                            apic           = self.apic)
   31.63 +
   31.64          rc['notes'] = { 'SUSPEND_CANCEL': 1 }
   31.65 +
   31.66 +        rc['store_mfn'] = xc.hvm_get_param(self.vm.getDomid(),
   31.67 +                                           HVM_PARAM_STORE_PFN)
   31.68 +        xc.hvm_set_param(self.vm.getDomid(), HVM_PARAM_STORE_EVTCHN,
   31.69 +                         store_evtchn)
   31.70 +
   31.71          return rc
   31.72  
   31.73      # Return a list of cmd line args to the device models based on the
   31.74 @@ -497,8 +504,13 @@ class HVMImageHandler(ImageHandler):
   31.75  
   31.76  class IA64_HVM_ImageHandler(HVMImageHandler):
   31.77  
   31.78 +    def configure(self, vmConfig):
   31.79 +        HVMImageHandler.configure(self, vmConfig)
   31.80 +        self.vhpt = int(vmConfig['platform'].get('vhpt',  0))
   31.81 +
   31.82      def buildDomain(self):
   31.83          xc.nvram_init(self.vm.getName(), self.vm.getDomid())
   31.84 +        xc.hvm_set_param(self.vm.getDomid(), HVM_PARAM_VHPT_SIZE, self.vhpt)
   31.85          return HVMImageHandler.buildDomain(self)
   31.86  
   31.87      def getRequiredAvailableMemory(self, mem_kb):
   31.88 @@ -515,8 +527,26 @@ class IA64_HVM_ImageHandler(HVMImageHand
   31.89          # Explicit shadow memory is not a concept 
   31.90          return 0
   31.91  
   31.92 +class IA64_Linux_ImageHandler(LinuxImageHandler):
   31.93 +
   31.94 +    def configure(self, vmConfig):
   31.95 +        LinuxImageHandler.configure(self, vmConfig)
   31.96 +        self.vhpt = int(vmConfig['platform'].get('vhpt',  0))
   31.97 +
   31.98 +    def buildDomain(self):
   31.99 +        self.flags = self.vhpt
  31.100 +        return LinuxImageHandler.buildDomain(self)
  31.101 +
  31.102  class X86_HVM_ImageHandler(HVMImageHandler):
  31.103  
  31.104 +    def configure(self, vmConfig):
  31.105 +        HVMImageHandler.configure(self, vmConfig)
  31.106 +        self.pae = int(vmConfig['platform'].get('pae',  0))
  31.107 +
  31.108 +    def buildDomain(self):
  31.109 +        xc.hvm_set_param(self.vm.getDomid(), HVM_PARAM_PAE_ENABLED, self.pae)
  31.110 +        return HVMImageHandler.buildDomain(self)
  31.111 +
  31.112      def getRequiredAvailableMemory(self, mem_kb):
  31.113          # Add 8 MiB overhead for QEMU's video RAM.
  31.114          return mem_kb + 8192
  31.115 @@ -551,7 +581,7 @@ class X86_Linux_ImageHandler(LinuxImageH
  31.116          "linux": PPC_LinuxImageHandler,
  31.117      },
  31.118      "ia64": {
  31.119 -        "linux": LinuxImageHandler,
  31.120 +        "linux": IA64_Linux_ImageHandler,
  31.121          "hvm": IA64_HVM_ImageHandler,
  31.122      },
  31.123      "x86": {
    32.1 --- a/tools/python/xen/xm/create.py	Wed Oct 17 10:36:31 2007 -0600
    32.2 +++ b/tools/python/xen/xm/create.py	Sun Oct 21 12:10:25 2007 -0600
    32.3 @@ -210,6 +210,10 @@ gopts.var('vcpu_avail', val='VCPUS',
    32.4            fn=set_long, default=None,
    32.5            use="Bitmask for virtual CPUs to make available immediately.")
    32.6  
    32.7 +gopts.var('vhpt', val='VHPT',
    32.8 +          fn=set_int, default=0,
    32.9 +          use="Log2 of domain VHPT size for IA64.")
   32.10 +
   32.11  gopts.var('cpu_cap', val='CAP',
   32.12            fn=set_int, default=None,
   32.13            use="""Set the maximum amount of cpu.
   32.14 @@ -555,7 +559,10 @@ def configure_image(vals):
   32.15  
   32.16      if vals.builder == 'hvm':
   32.17          configure_hvm(config_image, vals) 
   32.18 -       
   32.19 +
   32.20 +    if vals.vhpt != 0:
   32.21 +        config_image.append(['vhpt', vals.vhpt])
   32.22 +
   32.23      return config_image
   32.24      
   32.25  def configure_disks(config_devs, vals):
    33.1 --- a/tools/python/xen/xm/main.py	Wed Oct 17 10:36:31 2007 -0600
    33.2 +++ b/tools/python/xen/xm/main.py	Sun Oct 21 12:10:25 2007 -0600
    33.3 @@ -1667,9 +1667,8 @@ def xm_info(args):
    33.4              "release":           getVal(["software_version", "release"]),
    33.5              "version":           getVal(["software_version", "version"]),
    33.6              "machine":           getVal(["software_version", "machine"]),
    33.7 -            "nr_cpus":           len(getVal(["host_CPUs"], [])),
    33.8 +            "nr_cpus":           getVal(["cpu_configuration", "nr_cpus"]),
    33.9              "nr_nodes":          getVal(["cpu_configuration", "nr_nodes"]),
   33.10 -            "sockets_per_node":  getVal(["cpu_configuration", "sockets_per_node"]),
   33.11              "cores_per_socket":  getVal(["cpu_configuration", "cores_per_socket"]),
   33.12              "threads_per_core":  getVal(["cpu_configuration", "threads_per_core"]),
   33.13              "cpu_mhz":           getCpuMhz(),
    34.1 --- a/tools/python/xen/xm/xenapi_create.py	Wed Oct 17 10:36:31 2007 -0600
    34.2 +++ b/tools/python/xen/xm/xenapi_create.py	Sun Oct 21 12:10:25 2007 -0600
    34.3 @@ -818,7 +818,7 @@ class sxp2xml:
    34.4  
    34.5  
    34.6      def extract_platform(self, image, document):
    34.7 -        platform_keys = ['acpi', 'apic', 'pae']
    34.8 +        platform_keys = ['acpi', 'apic', 'pae', 'vhpt']
    34.9  
   34.10          def extract_platform_key(key):
   34.11              platform = document.createElement("platform")
    35.1 --- a/tools/xenmon/xenbaked.c	Wed Oct 17 10:36:31 2007 -0600
    35.2 +++ b/tools/xenmon/xenbaked.c	Sun Oct 21 12:10:25 2007 -0600
    35.3 @@ -460,10 +460,7 @@ unsigned int get_num_cpus(void)
    35.4      xc_interface_close(xc_handle);
    35.5      opts.cpu_freq = (double)physinfo.cpu_khz/1000.0;
    35.6  
    35.7 -    return (physinfo.threads_per_core *
    35.8 -            physinfo.cores_per_socket *
    35.9 -            physinfo.sockets_per_node *
   35.10 -            physinfo.nr_nodes);
   35.11 +    return physinfo.nr_cpus;
   35.12  }
   35.13  
   35.14  
    36.1 --- a/tools/xenstat/libxenstat/src/xenstat.c	Wed Oct 17 10:36:31 2007 -0600
    36.2 +++ b/tools/xenstat/libxenstat/src/xenstat.c	Sun Oct 21 12:10:25 2007 -0600
    36.3 @@ -155,9 +155,7 @@ xenstat_node *xenstat_get_node(xenstat_h
    36.4  	}
    36.5  
    36.6  	node->cpu_hz = ((unsigned long long)physinfo.cpu_khz) * 1000ULL;
    36.7 -	node->num_cpus =
    36.8 -	    (physinfo.threads_per_core * physinfo.cores_per_socket *
    36.9 -	     physinfo.sockets_per_node * physinfo.nr_nodes);
   36.10 +        node->num_cpus = physinfo.nr_cpus;
   36.11  	node->tot_mem = ((unsigned long long)physinfo.total_pages)
   36.12  	    * handle->page_size;
   36.13  	node->free_mem = ((unsigned long long)physinfo.free_pages)
    37.1 --- a/tools/xenstore/xenstored_watch.c	Wed Oct 17 10:36:31 2007 -0600
    37.2 +++ b/tools/xenstore/xenstored_watch.c	Sun Oct 21 12:10:25 2007 -0600
    37.3 @@ -170,7 +170,7 @@ void do_unwatch(struct connection *conn,
    37.4  		return;
    37.5  	}
    37.6  
    37.7 -	node = canonicalize(conn, vec[0]);
    37.8 +	node = strstarts(vec[0], "@") ? vec[0] : canonicalize(conn, vec[0]);
    37.9  	list_for_each_entry(watch, &conn->watches, list) {
   37.10  		if (streq(watch->node, node) && streq(watch->token, vec[1])) {
   37.11  			list_del(&watch->list);
    38.1 --- a/tools/xentrace/xentrace.c	Wed Oct 17 10:36:31 2007 -0600
    38.2 +++ b/tools/xentrace/xentrace.c	Sun Oct 21 12:10:25 2007 -0600
    38.3 @@ -309,10 +309,7 @@ unsigned int get_num_cpus(void)
    38.4  
    38.5      xc_interface_close(xc_handle);
    38.6  
    38.7 -    return (physinfo.threads_per_core *
    38.8 -            physinfo.cores_per_socket *
    38.9 -            physinfo.sockets_per_node *
   38.10 -            physinfo.nr_nodes);
   38.11 +    return physinfo.nr_cpus;
   38.12  }
   38.13  
   38.14  
    39.1 --- a/tools/xm-test/lib/XmTestLib/Xm.py	Wed Oct 17 10:36:31 2007 -0600
    39.2 +++ b/tools/xm-test/lib/XmTestLib/Xm.py	Sun Oct 21 12:10:25 2007 -0600
    39.3 @@ -218,11 +218,9 @@ def restartXend():
    39.4          return status
    39.5  
    39.6  def smpConcurrencyLevel():
    39.7 -    cores = int(getInfo("cores_per_socket"))
    39.8 -    threads = int(getInfo("threads_per_core"))
    39.9 -    sockets = int(getInfo("sockets_per_node"))
   39.10 +    nr_cpus = int(getInfo("nr_cpus"))
   39.11  
   39.12 -    return cores * sockets * threads
   39.13 +    return nr_cpus
   39.14  
   39.15  if __name__ == "__main__":
   39.16      if isDomainRunning("0"):
    40.1 --- a/tools/xm-test/lib/XmTestReport/OSReport.py	Wed Oct 17 10:36:31 2007 -0600
    40.2 +++ b/tools/xm-test/lib/XmTestReport/OSReport.py	Sun Oct 21 12:10:25 2007 -0600
    40.3 @@ -92,7 +92,6 @@ class Machine:
    40.4  
    40.5          xenValues = {"nr_cpus"          : "Unknown",
    40.6                       "nr_nodes"         : "Unknown",
    40.7 -                     "sockets_per_node" : "Unknown",
    40.8                       "cores_per_socket" : "Unknown",
    40.9                       "threads_per_core" : "Unknown",
   40.10                       "cpu_mhz"          : "Unknown",
    41.1 --- a/xen/Rules.mk	Wed Oct 17 10:36:31 2007 -0600
    41.2 +++ b/xen/Rules.mk	Sun Oct 21 12:10:25 2007 -0600
    41.3 @@ -39,7 +39,9 @@ TARGET := $(BASEDIR)/xen
    41.4  
    41.5  HDRS := $(wildcard *.h)
    41.6  HDRS += $(wildcard $(BASEDIR)/include/xen/*.h)
    41.7 +HDRS := $(wildcard $(BASEDIR)/include/xen/hvm/*.h)
    41.8  HDRS += $(wildcard $(BASEDIR)/include/public/*.h)
    41.9 +HDRS += $(wildcard $(BASEDIR)/include/public/*/*.h)
   41.10  HDRS += $(wildcard $(BASEDIR)/include/compat/*.h)
   41.11  HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h)
   41.12  HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h)
    42.1 --- a/xen/arch/ia64/vmx/vmmu.c	Wed Oct 17 10:36:31 2007 -0600
    42.2 +++ b/xen/arch/ia64/vmx/vmmu.c	Sun Oct 21 12:10:25 2007 -0600
    42.3 @@ -22,6 +22,7 @@
    42.4  #include <asm/vmx_vcpu.h>
    42.5  #include <asm/vmx_pal_vsa.h>
    42.6  #include <xen/sched-if.h>
    42.7 +#include <asm/vhpt.h>
    42.8  
    42.9  static int default_vtlb_sz = DEFAULT_VTLB_SZ;
   42.10  static int default_vhpt_sz = DEFAULT_VHPT_SZ;
   42.11 @@ -38,17 +39,6 @@ static void __init parse_vtlb_size(char 
   42.12      }
   42.13  }
   42.14  
   42.15 -static int canonicalize_vhpt_size(int sz)
   42.16 -{
   42.17 -    /* minimum 32KB */
   42.18 -    if (sz < 15)
   42.19 -        return 15;
   42.20 -    /* maximum 8MB (since purging TR is hard coded) */
   42.21 -    if (sz > IA64_GRANULE_SHIFT - 1)
   42.22 -        return IA64_GRANULE_SHIFT - 1;
   42.23 -    return sz;
   42.24 -}
   42.25 -
   42.26  static void __init parse_vhpt_size(char *s)
   42.27  {
   42.28      int sz = parse_size_and_unit(s, NULL);
   42.29 @@ -96,8 +86,14 @@ static u64 get_mfn(struct domain *d, u64
   42.30  static int init_domain_vhpt(struct vcpu *v)
   42.31  {
   42.32      int rc;
   42.33 +    u64 size = v->domain->arch.hvm_domain.params[HVM_PARAM_VHPT_SIZE];
   42.34  
   42.35 -    rc = thash_alloc(&(v->arch.vhpt), default_vhpt_sz, "vhpt");
   42.36 +    if (size == 0)
   42.37 +        size = default_vhpt_sz;
   42.38 +    else
   42.39 +        size = canonicalize_vhpt_size(size);
   42.40 +
   42.41 +    rc = thash_alloc(&(v->arch.vhpt), size, "vhpt");
   42.42      v->arch.arch_vmx.mpta = v->arch.vhpt.pta.val;
   42.43      return rc;
   42.44  }
    43.1 --- a/xen/arch/ia64/xen/dom0_ops.c	Wed Oct 17 10:36:31 2007 -0600
    43.2 +++ b/xen/arch/ia64/xen/dom0_ops.c	Sun Oct 21 12:10:25 2007 -0600
    43.3 @@ -93,6 +93,9 @@ long arch_do_domctl(xen_domctl_t *op, XE
    43.4              ds->maxmem = d->arch.convmem_end;
    43.5              ds->xsi_va = d->arch.shared_info_va;
    43.6              ds->hypercall_imm = d->arch.breakimm;
    43.7 +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
    43.8 +            ds->vhpt_size_log2 = d->arch.vhpt_size_log2;
    43.9 +#endif
   43.10              /* Copy back.  */
   43.11              if ( copy_to_guest(u_domctl, op, 1) )
   43.12                  ret = -EFAULT;
   43.13 @@ -116,6 +119,20 @@ long arch_do_domctl(xen_domctl_t *op, XE
   43.14                      for_each_vcpu (d, v)
   43.15                          v->arch.breakimm = d->arch.breakimm;
   43.16                  }
   43.17 +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
   43.18 +                if (ds->vhpt_size_log2 == -1) {
   43.19 +                    d->arch.has_pervcpu_vhpt = 0;
   43.20 +                    ds->vhpt_size_log2 = -1;
   43.21 +                    printk(XENLOG_INFO "XEN_DOMCTL_arch_setup: "
   43.22 +                           "domain %d VHPT is global.\n", d->domain_id);
   43.23 +                } else {
   43.24 +                    d->arch.has_pervcpu_vhpt = 1;
   43.25 +                    d->arch.vhpt_size_log2 = ds->vhpt_size_log2;
   43.26 +                    printk(XENLOG_INFO "XEN_DOMCTL_arch_setup: "
   43.27 +                           "domain %d VHPT is per vcpu. size=2**%d\n",
   43.28 +                           d->domain_id, ds->vhpt_size_log2);
   43.29 +                }
   43.30 +#endif
   43.31                  if (ds->xsi_va)
   43.32                      d->arch.shared_info_va = ds->xsi_va;
   43.33                  ret = dom_fw_setup(d, ds->bp, ds->maxmem);
   43.34 @@ -234,7 +251,7 @@ long arch_do_sysctl(xen_sysctl_t *op, XE
   43.35      {
   43.36      case XEN_SYSCTL_physinfo:
   43.37      {
   43.38 -        int i, node_cpus = 0;
   43.39 +        int i;
   43.40          uint32_t max_array_ent;
   43.41  
   43.42          xen_sysctl_physinfo_t *pi = &op->u.physinfo;
   43.43 @@ -242,18 +259,8 @@ long arch_do_sysctl(xen_sysctl_t *op, XE
   43.44          pi->threads_per_core = cpus_weight(cpu_sibling_map[0]);
   43.45          pi->cores_per_socket =
   43.46              cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
   43.47 +        pi->nr_cpus          = (u32)num_online_cpus();
   43.48          pi->nr_nodes         = num_online_nodes();
   43.49 -
   43.50 -        /*
   43.51 -         * Guess at a sockets_per_node value.  Use the maximum number of
   43.52 -         * CPUs per node to avoid deconfigured CPUs breaking the average.
   43.53 -         */
   43.54 -        for_each_online_node(i)
   43.55 -            node_cpus = max(node_cpus, cpus_weight(node_to_cpumask(i)));
   43.56 -
   43.57 -        pi->sockets_per_node = node_cpus / 
   43.58 -            (pi->cores_per_socket * pi->threads_per_core);
   43.59 -
   43.60          pi->total_pages      = total_pages; 
   43.61          pi->free_pages       = avail_domheap_pages();
   43.62          pi->scrub_pages      = avail_scrub_pages();
    44.1 --- a/xen/arch/ia64/xen/vhpt.c	Wed Oct 17 10:36:31 2007 -0600
    44.2 +++ b/xen/arch/ia64/xen/vhpt.c	Sun Oct 21 12:10:25 2007 -0600
    44.3 @@ -28,12 +28,13 @@ DEFINE_PER_CPU(volatile u32, vhpt_tlbflu
    44.4  #endif
    44.5  
    44.6  static void
    44.7 -__vhpt_flush(unsigned long vhpt_maddr)
    44.8 +__vhpt_flush(unsigned long vhpt_maddr, unsigned long vhpt_size_log2)
    44.9  {
   44.10  	struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
   44.11 +	unsigned long num_entries = 1 << (vhpt_size_log2 - 5);
   44.12  	int i;
   44.13  
   44.14 -	for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
   44.15 +	for (i = 0; i < num_entries; i++, v++)
   44.16  		v->ti_tag = INVALID_TI_TAG;
   44.17  }
   44.18  
   44.19 @@ -42,7 +43,7 @@ local_vhpt_flush(void)
   44.20  {
   44.21  	/* increment flush clock before flush */
   44.22  	u32 flush_time = tlbflush_clock_inc_and_return();
   44.23 -	__vhpt_flush(__ia64_per_cpu_var(vhpt_paddr));
   44.24 +	__vhpt_flush(__ia64_per_cpu_var(vhpt_paddr), VHPT_SIZE_LOG2);
   44.25  	/* this must be after flush */
   44.26  	tlbflush_update_time(&__get_cpu_var(vhpt_tlbflush_timestamp),
   44.27  	                     flush_time);
   44.28 @@ -52,17 +53,23 @@ local_vhpt_flush(void)
   44.29  void
   44.30  vcpu_vhpt_flush(struct vcpu* v)
   44.31  {
   44.32 -	__vhpt_flush(vcpu_vhpt_maddr(v));
   44.33 +	unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;
   44.34 +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
   44.35 +	if (HAS_PERVCPU_VHPT(v->domain))
   44.36 +		vhpt_size_log2 = v->arch.pta.size;
   44.37 +#endif
   44.38 +	__vhpt_flush(vcpu_vhpt_maddr(v), vhpt_size_log2);
   44.39  	perfc_incr(vcpu_vhpt_flush);
   44.40  }
   44.41  
   44.42  static void
   44.43 -vhpt_erase(unsigned long vhpt_maddr)
   44.44 +vhpt_erase(unsigned long vhpt_maddr, unsigned long vhpt_size_log2)
   44.45  {
   44.46  	struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
   44.47 +	unsigned long num_entries = 1 << (vhpt_size_log2 - 5);
   44.48  	int i;
   44.49  
   44.50 -	for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
   44.51 +	for (i = 0; i < num_entries; i++, v++) {
   44.52  		v->itir = 0;
   44.53  		v->CChain = 0;
   44.54  		v->page_flags = 0;
   44.55 @@ -140,7 +147,7 @@ void __init vhpt_init(void)
   44.56  	__get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;
   44.57  	printk(XENLOG_DEBUG "vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
   44.58  	       paddr, __get_cpu_var(vhpt_pend));
   44.59 -	vhpt_erase(paddr);
   44.60 +	vhpt_erase(paddr, VHPT_SIZE_LOG2);
   44.61  	// we don't enable VHPT here.
   44.62  	// context_switch() or schedule_tail() does it.
   44.63  }
   44.64 @@ -151,6 +158,11 @@ pervcpu_vhpt_alloc(struct vcpu *v)
   44.65  {
   44.66  	unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;
   44.67  
   44.68 +	if (v->domain->arch.vhpt_size_log2 > 0)
   44.69 +	    vhpt_size_log2 =
   44.70 +		canonicalize_vhpt_size(v->domain->arch.vhpt_size_log2);
   44.71 +	printk(XENLOG_DEBUG "%s vhpt_size_log2=%ld\n",
   44.72 +	       __func__, vhpt_size_log2);
   44.73  	v->arch.vhpt_entries =
   44.74  		(1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry);
   44.75  	v->arch.vhpt_page =
   44.76 @@ -164,11 +176,11 @@ pervcpu_vhpt_alloc(struct vcpu *v)
   44.77  
   44.78  	v->arch.pta.val = 0; // to zero reserved bits
   44.79  	v->arch.pta.ve = 1; // enable vhpt
   44.80 -	v->arch.pta.size = VHPT_SIZE_LOG2;
   44.81 +	v->arch.pta.size = vhpt_size_log2;
   44.82  	v->arch.pta.vf = 1; // long format
   44.83  	v->arch.pta.base = __va_ul(v->arch.vhpt_maddr) >> 15;
   44.84  
   44.85 -	vhpt_erase(v->arch.vhpt_maddr);
   44.86 +	vhpt_erase(v->arch.vhpt_maddr, vhpt_size_log2);
   44.87  	smp_mb(); // per vcpu vhpt may be used by another physical cpu.
   44.88  	return 0;
   44.89  }
   44.90 @@ -178,7 +190,7 @@ pervcpu_vhpt_free(struct vcpu *v)
   44.91  {
   44.92  	if (likely(v->arch.vhpt_page != NULL))
   44.93  		free_domheap_pages(v->arch.vhpt_page,
   44.94 -		                   VHPT_SIZE_LOG2 - PAGE_SHIFT);
   44.95 +		                   v->arch.pta.size - PAGE_SHIFT);
   44.96  }
   44.97  #endif
   44.98  
    45.1 --- a/xen/arch/powerpc/sysctl.c	Wed Oct 17 10:36:31 2007 -0600
    45.2 +++ b/xen/arch/powerpc/sysctl.c	Sun Oct 21 12:10:25 2007 -0600
    45.3 @@ -45,9 +45,7 @@ long arch_do_sysctl(struct xen_sysctl *s
    45.4              cpus_weight(cpu_sibling_map[0]);
    45.5          pi->cores_per_socket =
    45.6              cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
    45.7 -        pi->sockets_per_node = num_online_cpus() / 
    45.8 -            (num_online_nodes() * pi->cores_per_socket * pi->threads_per_core);
    45.9 -
   45.10 +        pi->nr_cpus          = (u32)num_online_cpus();
   45.11          pi->nr_nodes         = num_online_nodes();
   45.12          pi->total_pages      = total_pages;
   45.13          pi->free_pages       = avail_domheap_pages();
    46.1 --- a/xen/arch/x86/Makefile	Wed Oct 17 10:36:31 2007 -0600
    46.2 +++ b/xen/arch/x86/Makefile	Sun Oct 21 12:10:25 2007 -0600
    46.3 @@ -40,6 +40,7 @@ obj-y += srat.o
    46.4  obj-y += string.o
    46.5  obj-y += sysctl.o
    46.6  obj-y += time.o
    46.7 +obj-y += trace.o
    46.8  obj-y += traps.o
    46.9  obj-y += usercopy.o
   46.10  obj-y += x86_emulate.o
    47.1 --- a/xen/arch/x86/acpi/boot.c	Wed Oct 17 10:36:31 2007 -0600
    47.2 +++ b/xen/arch/x86/acpi/boot.c	Sun Oct 21 12:10:25 2007 -0600
    47.3 @@ -491,11 +491,9 @@ static int __init acpi_parse_fadt(unsign
    47.4  	/* detect the location of the ACPI PM Timer */
    47.5  	if (fadt->revision >= FADT2_REVISION_ID) {
    47.6  		/* FADT rev. 2 */
    47.7 -		if (fadt->xpm_tmr_blk.address_space_id !=
    47.8 +		if (fadt->xpm_tmr_blk.address_space_id ==
    47.9  		    ACPI_ADR_SPACE_SYSTEM_IO)
   47.10 -			return 0;
   47.11 -
   47.12 -		pmtmr_ioport = fadt->xpm_tmr_blk.address;
   47.13 +			pmtmr_ioport = fadt->xpm_tmr_blk.address;
   47.14  		/*
   47.15  		 * "X" fields are optional extensions to the original V1.0
   47.16  		 * fields, so we must selectively expand V1.0 fields if the
    48.1 --- a/xen/arch/x86/boot/video.S	Wed Oct 17 10:36:31 2007 -0600
    48.2 +++ b/xen/arch/x86/boot/video.S	Sun Oct 21 12:10:25 2007 -0600
    48.3 @@ -44,6 +44,7 @@
    48.4  #define PARAM_LFB_COLORS        0x1c
    48.5  #define PARAM_VESAPM_SEG        0x24
    48.6  #define PARAM_VESAPM_OFF        0x26
    48.7 +#define PARAM_VESA_ATTRIB       0x28
    48.8  #define _param(param) bootsym(boot_vid_info)+(param)
    48.9  
   48.10  video:  xorw    %ax, %ax
   48.11 @@ -129,6 +130,8 @@ mopar_gr:
   48.12          movl    %eax, _param(PARAM_LFB_COLORS)
   48.13          movl    35(%di), %eax
   48.14          movl    %eax, _param(PARAM_LFB_COLORS+4)
   48.15 +        movw    0(%di), %ax
   48.16 +        movw    %ax, _param(PARAM_VESA_ATTRIB)
   48.17  
   48.18  # get video mem size
   48.19          leaw    vesa_glob_info, %di
    49.1 --- a/xen/arch/x86/cpu/common.c	Wed Oct 17 10:36:31 2007 -0600
    49.2 +++ b/xen/arch/x86/cpu/common.c	Sun Oct 21 12:10:25 2007 -0600
    49.3 @@ -229,7 +229,6 @@ static void __init early_cpu_detect(void
    49.4  void __devinit generic_identify(struct cpuinfo_x86 * c)
    49.5  {
    49.6  	u32 tfms, xlvl;
    49.7 -	int junk;
    49.8  
    49.9  	if (have_cpuid_p()) {
   49.10  		/* Get vendor name */
   49.11 @@ -244,8 +243,8 @@ void __devinit generic_identify(struct c
   49.12  	
   49.13  		/* Intel-defined flags: level 0x00000001 */
   49.14  		if ( c->cpuid_level >= 0x00000001 ) {
   49.15 -			u32 capability, excap;
   49.16 -			cpuid(0x00000001, &tfms, &junk, &excap, &capability);
   49.17 +			u32 capability, excap, ebx;
   49.18 +			cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
   49.19  			c->x86_capability[0] = capability;
   49.20  			c->x86_capability[4] = excap;
   49.21  			c->x86 = (tfms >> 8) & 15;
   49.22 @@ -255,6 +254,8 @@ void __devinit generic_identify(struct c
   49.23  				c->x86_model += ((tfms >> 16) & 0xF) << 4;
   49.24  			} 
   49.25  			c->x86_mask = tfms & 15;
   49.26 +			if ( cpu_has(c, X86_FEATURE_CLFLSH) )
   49.27 +				c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
   49.28  		} else {
   49.29  			/* Have CPUID level 0 only - unheard of */
   49.30  			c->x86 = 4;
   49.31 @@ -313,6 +314,7 @@ void __devinit identify_cpu(struct cpuin
   49.32  	c->x86_vendor_id[0] = '\0'; /* Unset */
   49.33  	c->x86_model_id[0] = '\0';  /* Unset */
   49.34  	c->x86_max_cores = 1;
   49.35 +	c->x86_clflush_size = 0;
   49.36  	memset(&c->x86_capability, 0, sizeof c->x86_capability);
   49.37  
   49.38  	if (!have_cpuid_p()) {
    50.1 --- a/xen/arch/x86/cpu/cyrix.c	Wed Oct 17 10:36:31 2007 -0600
    50.2 +++ b/xen/arch/x86/cpu/cyrix.c	Sun Oct 21 12:10:25 2007 -0600
    50.3 @@ -239,7 +239,7 @@ static void __init init_cyrix(struct cpu
    50.4  		/* Emulate MTRRs using Cyrix's ARRs. */
    50.5  		set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
    50.6  		/* 6x86's contain this bug */
    50.7 -		c->coma_bug = 1;
    50.8 +		/*c->coma_bug = 1;*/
    50.9  		break;
   50.10  
   50.11  	case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
   50.12 @@ -272,7 +272,7 @@ static void __init init_cyrix(struct cpu
   50.13  		}
   50.14  		else
   50.15  		{
   50.16 -			c->coma_bug = 1;      /* 6x86MX, it has the bug. */
   50.17 +			/*c->coma_bug = 1;*/      /* 6x86MX, it has the bug. */
   50.18  		}
   50.19  		tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0;
   50.20  		Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7];
   50.21 @@ -287,7 +287,7 @@ static void __init init_cyrix(struct cpu
   50.22  		switch (dir0_lsn) {
   50.23  		case 0xd:  /* either a 486SLC or DLC w/o DEVID */
   50.24  			dir0_msn = 0;
   50.25 -			p = Cx486_name[(c->hard_math) ? 1 : 0];
   50.26 +			p = Cx486_name[/*(c->hard_math) ? 1 : 0*/1];
   50.27  			break;
   50.28  
   50.29  		case 0xe:  /* a 486S A step */
    51.1 --- a/xen/arch/x86/cpu/mtrr/generic.c	Wed Oct 17 10:36:31 2007 -0600
    51.2 +++ b/xen/arch/x86/cpu/mtrr/generic.c	Sun Oct 21 12:10:25 2007 -0600
    51.3 @@ -313,7 +313,7 @@ static void prepare_set(void)
    51.4  	}
    51.5  
    51.6  	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
    51.7 -	local_flush_tlb();
    51.8 +	flush_tlb_local();
    51.9  
   51.10  	/*  Save MTRR state */
   51.11  	rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
   51.12 @@ -325,7 +325,7 @@ static void prepare_set(void)
   51.13  static void post_set(void)
   51.14  {
   51.15  	/*  Flush TLBs (no need to flush caches - they are disabled)  */
   51.16 -	local_flush_tlb();
   51.17 +	flush_tlb_local();
   51.18  
   51.19  	/* Intel (P6) standard MTRRs */
   51.20  	mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
    52.1 --- a/xen/arch/x86/domain.c	Wed Oct 17 10:36:31 2007 -0600
    52.2 +++ b/xen/arch/x86/domain.c	Sun Oct 21 12:10:25 2007 -0600
    52.3 @@ -1299,7 +1299,7 @@ void context_switch(struct vcpu *prev, s
    52.4          {
    52.5              uint64_t efer = read_efer();
    52.6  
    52.7 -            local_flush_tlb_one(GDT_VIRT_START(next) +
    52.8 +            flush_tlb_one_local(GDT_VIRT_START(next) +
    52.9                                  FIRST_RESERVED_GDT_BYTE);
   52.10  
   52.11              if ( !is_pv_32on64_vcpu(next) == !(efer & EFER_SCE) )
    53.1 --- a/xen/arch/x86/domain_build.c	Wed Oct 17 10:36:31 2007 -0600
    53.2 +++ b/xen/arch/x86/domain_build.c	Sun Oct 21 12:10:25 2007 -0600
    53.3 @@ -347,7 +347,7 @@ int __init construct_dom0(
    53.4          for ( i = 0; i < MAX_VIRT_CPUS; i++ )
    53.5              d->arch.mm_perdomain_pt[((i << GDT_LDT_VCPU_SHIFT) +
    53.6                                       FIRST_RESERVED_GDT_PAGE)] = gdt_l1e;
    53.7 -        local_flush_tlb_one(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE);
    53.8 +        flush_tlb_one_local(GDT_LDT_VIRT_START + FIRST_RESERVED_GDT_BYTE);
    53.9      }
   53.10  #endif
   53.11      if ( parms.pae == PAEKERN_extended_cr3 )
    54.1 --- a/xen/arch/x86/domctl.c	Wed Oct 17 10:36:31 2007 -0600
    54.2 +++ b/xen/arch/x86/domctl.c	Sun Oct 21 12:10:25 2007 -0600
    54.3 @@ -531,10 +531,10 @@ long arch_do_domctl(
    54.4          struct hvm_iommu *hd;
    54.5          u8 bus, devfn;
    54.6  
    54.7 -        if (!vtd_enabled)
    54.8 +        ret = -EINVAL;
    54.9 +        if ( !vtd_enabled )
   54.10              break;
   54.11  
   54.12 -        ret = -EINVAL;
   54.13          if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) ) {
   54.14              gdprintk(XENLOG_ERR,
   54.15                  "XEN_DOMCTL_assign_device: get_domain_by_id() failed\n"); 
   54.16 @@ -543,6 +543,10 @@ long arch_do_domctl(
   54.17          hd = domain_hvm_iommu(d);
   54.18          bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
   54.19          devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
   54.20 +
   54.21 +        if ( device_assigned(bus, devfn) )
   54.22 +            break;
   54.23 +
   54.24          ret = assign_device(d, bus, devfn);
   54.25          gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: bdf = %x:%x:%x\n",
   54.26              bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
    55.1 --- a/xen/arch/x86/flushtlb.c	Wed Oct 17 10:36:31 2007 -0600
    55.2 +++ b/xen/arch/x86/flushtlb.c	Sun Oct 21 12:10:25 2007 -0600
    55.3 @@ -84,10 +84,10 @@ void write_cr3(unsigned long cr3)
    55.4  
    55.5  #ifdef USER_MAPPINGS_ARE_GLOBAL
    55.6      __pge_off();
    55.7 -    __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
    55.8 +    asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
    55.9      __pge_on();
   55.10  #else
   55.11 -    __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
   55.12 +    asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
   55.13  #endif
   55.14  
   55.15      post_flush(t);
   55.16 @@ -95,26 +95,75 @@ void write_cr3(unsigned long cr3)
   55.17      local_irq_restore(flags);
   55.18  }
   55.19  
   55.20 -void local_flush_tlb(void)
   55.21 +void flush_area_local(const void *va, unsigned int flags)
   55.22  {
   55.23 -    unsigned long flags;
   55.24 -    u32 t;
   55.25 +    const struct cpuinfo_x86 *c = &current_cpu_data;
   55.26 +    unsigned int level = flags & FLUSH_LEVEL_MASK;
   55.27 +    unsigned long irqfl;
   55.28 +
   55.29 +    ASSERT(level < CONFIG_PAGING_LEVELS);
   55.30  
   55.31      /* This non-reentrant function is sometimes called in interrupt context. */
   55.32 -    local_irq_save(flags);
   55.33 +    local_irq_save(irqfl);
   55.34  
   55.35 -    t = pre_flush();
   55.36 +    if ( flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL) )
   55.37 +    {
   55.38 +        if ( level == 1 )
   55.39 +        {
   55.40 +            /*
   55.41 +             * We don't INVLPG multi-page regions because the 2M/4M/1G
   55.42 +             * region may not have been mapped with a superpage. Also there
   55.43 +             * are various errata surrounding INVLPG usage on superpages, and
   55.44 +             * a full flush is in any case not *that* expensive.
   55.45 +             */
   55.46 +            asm volatile ( "invlpg %0"
   55.47 +                           : : "m" (*(const char *)(va)) : "memory" );
   55.48 +        }
   55.49 +        else
   55.50 +        {
   55.51 +            u32 t = pre_flush();
   55.52  
   55.53 -    hvm_flush_guest_tlbs();
   55.54 +            hvm_flush_guest_tlbs();
   55.55  
   55.56 -#ifdef USER_MAPPINGS_ARE_GLOBAL
   55.57 -    __pge_off();
   55.58 -    __pge_on();
   55.59 -#else
   55.60 -    __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
   55.61 +#ifndef USER_MAPPINGS_ARE_GLOBAL
   55.62 +            if ( !(flags & FLUSH_TLB_GLOBAL) ||
   55.63 +                 !(mmu_cr4_features & X86_CR4_PGE) )
   55.64 +            {
   55.65 +                asm volatile ( "mov %0, %%cr3"
   55.66 +                               : : "r" (read_cr3()) : "memory" );
   55.67 +            }
   55.68 +            else
   55.69  #endif
   55.70 +            {
   55.71 +                __pge_off();
   55.72 +                barrier();
   55.73 +                __pge_on();
   55.74 +            }
   55.75  
   55.76 -    post_flush(t);
   55.77 +            post_flush(t);
   55.78 +        }
   55.79 +    }
   55.80 +
   55.81 +    if ( flags & FLUSH_CACHE )
   55.82 +    {
   55.83 +        unsigned long i, sz;
   55.84 +
   55.85 +        sz = level ? (1UL << ((level - 1) * PAGETABLE_ORDER)) : ULONG_MAX;
   55.86  
   55.87 -    local_irq_restore(flags);
   55.88 +        if ( c->x86_clflush_size && c->x86_cache_size &&
   55.89 +             (sz < (c->x86_cache_size >> (PAGE_SHIFT - 10))) )
   55.90 +        {
   55.91 +            sz <<= PAGE_SHIFT;
   55.92 +            va = (const void *)((unsigned long)va & ~(sz - 1));
   55.93 +            for ( i = 0; i < sz; i += c->x86_clflush_size )
   55.94 +                 asm volatile ( "clflush %0"
   55.95 +                                : : "m" (((const char *)va)[i]) );
   55.96 +        }
   55.97 +        else
   55.98 +        {
   55.99 +            wbinvd();
  55.100 +        }
  55.101 +    }
  55.102 +
  55.103 +    local_irq_restore(irqfl);
  55.104  }
    56.1 --- a/xen/arch/x86/hvm/hvm.c	Wed Oct 17 10:36:31 2007 -0600
    56.2 +++ b/xen/arch/x86/hvm/hvm.c	Sun Oct 21 12:10:25 2007 -0600
    56.3 @@ -358,10 +358,12 @@ static int hvm_load_cpu_ctxt(struct doma
    56.4          return -EINVAL;
    56.5      }
    56.6  
    56.7 -    if ( (ctxt.msr_efer & ~(EFER_LME | EFER_LMA | EFER_NX | EFER_SCE)) ||
    56.8 +    if ( (ctxt.msr_efer & ~(EFER_FFXSE | EFER_LME | EFER_LMA |
    56.9 +                            EFER_NX | EFER_SCE)) ||
   56.10           ((sizeof(long) != 8) && (ctxt.msr_efer & EFER_LME)) ||
   56.11           (!cpu_has_nx && (ctxt.msr_efer & EFER_NX)) ||
   56.12           (!cpu_has_syscall && (ctxt.msr_efer & EFER_SCE)) ||
   56.13 +         (!cpu_has_ffxsr && (ctxt.msr_efer & EFER_FFXSE)) ||
   56.14           ((ctxt.msr_efer & (EFER_LME|EFER_LMA)) == EFER_LMA) )
   56.15      {
   56.16          gdprintk(XENLOG_ERR, "HVM restore: bad EFER 0x%"PRIx64"\n",
   56.17 @@ -576,10 +578,11 @@ int hvm_set_efer(uint64_t value)
   56.18  
   56.19      value &= ~EFER_LMA;
   56.20  
   56.21 -    if ( (value & ~(EFER_LME | EFER_NX | EFER_SCE)) ||
   56.22 +    if ( (value & ~(EFER_FFXSE | EFER_LME | EFER_NX | EFER_SCE)) ||
   56.23           ((sizeof(long) != 8) && (value & EFER_LME)) ||
   56.24           (!cpu_has_nx && (value & EFER_NX)) ||
   56.25 -         (!cpu_has_syscall && (value & EFER_SCE)) )
   56.26 +         (!cpu_has_syscall && (value & EFER_SCE)) ||
   56.27 +         (!cpu_has_ffxsr && (value & EFER_FFXSE)) )
   56.28      {
   56.29          gdprintk(XENLOG_WARNING, "Trying to set reserved bit in "
   56.30                   "EFER: %"PRIx64"\n", value);
   56.31 @@ -1256,40 +1259,40 @@ void hvm_print_line(struct vcpu *v, cons
   56.32  void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
   56.33                                     unsigned int *ecx, unsigned int *edx)
   56.34  {
   56.35 -    if ( !cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) )
   56.36 -    {
   56.37 -        cpuid(input, eax, ebx, ecx, edx);
   56.38 +    struct vcpu *v = current;
   56.39 +
   56.40 +    if ( cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) )
   56.41 +        return;
   56.42 +
   56.43 +    cpuid(input, eax, ebx, ecx, edx);
   56.44  
   56.45 -        if ( input == 0x00000001 )
   56.46 -        {
   56.47 -            struct vcpu *v = current;
   56.48 +    switch ( input )
   56.49 +    {
   56.50 +    case 0x00000001:
   56.51 +        __clear_bit(X86_FEATURE_MWAIT & 31, ecx);
   56.52  
   56.53 -            clear_bit(X86_FEATURE_MWAIT & 31, ecx);
   56.54 -
   56.55 -            if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
   56.56 -                clear_bit(X86_FEATURE_APIC & 31, edx);
   56.57 +        if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
   56.58 +            __clear_bit(X86_FEATURE_APIC & 31, edx);
   56.59  
   56.60  #if CONFIG_PAGING_LEVELS >= 3
   56.61 -            if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
   56.62 +        if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
   56.63  #endif
   56.64 -                clear_bit(X86_FEATURE_PAE & 31, edx);
   56.65 -            clear_bit(X86_FEATURE_PSE36 & 31, edx);
   56.66 -        }
   56.67 -        else if ( input == 0x80000001 )
   56.68 -        {
   56.69 +            __clear_bit(X86_FEATURE_PAE & 31, edx);
   56.70 +        __clear_bit(X86_FEATURE_PSE36 & 31, edx);
   56.71 +        break;
   56.72 +
   56.73 +    case 0x80000001:
   56.74  #if CONFIG_PAGING_LEVELS >= 3
   56.75 -            struct vcpu *v = current;
   56.76 -            if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
   56.77 +        if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
   56.78  #endif
   56.79 -                clear_bit(X86_FEATURE_NX & 31, edx);
   56.80 +            __clear_bit(X86_FEATURE_NX & 31, edx);
   56.81  #ifdef __i386__
   56.82 -            /* Mask feature for Intel ia32e or AMD long mode. */
   56.83 -            clear_bit(X86_FEATURE_LAHF_LM & 31, ecx);
   56.84 -
   56.85 -            clear_bit(X86_FEATURE_LM & 31, edx);
   56.86 -            clear_bit(X86_FEATURE_SYSCALL & 31, edx);
   56.87 +        /* Mask feature for Intel ia32e or AMD long mode. */
   56.88 +        __clear_bit(X86_FEATURE_LAHF_LM & 31, ecx);
   56.89 +        __clear_bit(X86_FEATURE_LM & 31, edx);
   56.90 +        __clear_bit(X86_FEATURE_SYSCALL & 31, edx);
   56.91  #endif
   56.92 -        }
   56.93 +        break;
   56.94      }
   56.95  }
   56.96  
    57.1 --- a/xen/arch/x86/hvm/save.c	Wed Oct 17 10:36:31 2007 -0600
    57.2 +++ b/xen/arch/x86/hvm/save.c	Sun Oct 21 12:10:25 2007 -0600
    57.3 @@ -3,6 +3,9 @@
    57.4   *
    57.5   * Copyright (c) 2004, Intel Corporation.
    57.6   * Copyright (c) 2007, XenSource Inc.
    57.7 + * Copyright (c) 2007, Isaku Yamahata <yamahata at valinux co jp>
    57.8 + *                     VA Linux Systems Japan K.K.
    57.9 + *                     split x86 specific part
   57.10   *
   57.11   * This program is free software; you can redistribute it and/or modify it
   57.12   * under the terms and conditions of the GNU General Public License,
   57.13 @@ -18,212 +21,54 @@
   57.14   * Place - Suite 330, Boston, MA 02111-1307 USA.
   57.15   */
   57.16  
   57.17 -#include <xen/config.h>
   57.18 -#include <xen/lib.h>
   57.19 -#include <xen/version.h>
   57.20 -#include <public/version.h>
   57.21 -#include <xen/sched.h>
   57.22 -#include <asm/hvm/hvm.h>
   57.23  #include <asm/hvm/support.h>
   57.24 -#include <asm/hvm/domain.h>
   57.25 -#include <asm/current.h>
   57.26 -
   57.27 -/* List of handlers for various HVM save and restore types */
   57.28 -static struct { 
   57.29 -    hvm_save_handler save;
   57.30 -    hvm_load_handler load; 
   57.31 -    const char *name;
   57.32 -    size_t size;
   57.33 -    int kind;
   57.34 -} hvm_sr_handlers [HVM_SAVE_CODE_MAX + 1] = {{NULL, NULL, "<?>"},};
   57.35 +#include <public/hvm/save.h>
   57.36  
   57.37 -/* Init-time function to add entries to that list */
   57.38 -void hvm_register_savevm(uint16_t typecode, 
   57.39 -                         const char *name,
   57.40 -                         hvm_save_handler save_state,
   57.41 -                         hvm_load_handler load_state,
   57.42 -                         size_t size, int kind)
   57.43 -{
   57.44 -    ASSERT(typecode <= HVM_SAVE_CODE_MAX);
   57.45 -    ASSERT(hvm_sr_handlers[typecode].save == NULL);
   57.46 -    ASSERT(hvm_sr_handlers[typecode].load == NULL);
   57.47 -    hvm_sr_handlers[typecode].save = save_state;
   57.48 -    hvm_sr_handlers[typecode].load = load_state;
   57.49 -    hvm_sr_handlers[typecode].name = name;
   57.50 -    hvm_sr_handlers[typecode].size = size;
   57.51 -    hvm_sr_handlers[typecode].kind = kind;
   57.52 -}
   57.53 -
   57.54 -size_t hvm_save_size(struct domain *d) 
   57.55 -{
   57.56 -    struct vcpu *v;
   57.57 -    size_t sz;
   57.58 -    int i;
   57.59 -    
   57.60 -    /* Basic overhead for header and footer */
   57.61 -    sz = (2 * sizeof (struct hvm_save_descriptor)) + HVM_SAVE_LENGTH(HEADER);
   57.62 -
   57.63 -    /* Plus space for each thing we will be saving */
   57.64 -    for ( i = 0; i <= HVM_SAVE_CODE_MAX; i++ ) 
   57.65 -        if ( hvm_sr_handlers[i].kind == HVMSR_PER_VCPU )
   57.66 -            for_each_vcpu(d, v)
   57.67 -                sz += hvm_sr_handlers[i].size;
   57.68 -        else 
   57.69 -            sz += hvm_sr_handlers[i].size;
   57.70 -
   57.71 -    return sz;
   57.72 -}
   57.73 -
   57.74 -
   57.75 -int hvm_save(struct domain *d, hvm_domain_context_t *h)
   57.76 +void
   57.77 +arch_hvm_save(struct hvm_save_header *hdr)
   57.78  {
   57.79      uint32_t eax, ebx, ecx, edx;
   57.80 -    char *c;
   57.81 -    struct hvm_save_header hdr;
   57.82 -    struct hvm_save_end end;
   57.83 -    hvm_save_handler handler;
   57.84 -    uint16_t i;
   57.85 -
   57.86 -    hdr.magic = HVM_FILE_MAGIC;
   57.87 -    hdr.version = HVM_FILE_VERSION;
   57.88  
   57.89      /* Save some CPUID bits */
   57.90      cpuid(1, &eax, &ebx, &ecx, &edx);
   57.91 -    hdr.cpuid = eax;
   57.92 -
   57.93 -    /* Save xen changeset */
   57.94 -    c = strrchr(xen_changeset(), ':');
   57.95 -    if ( c )
   57.96 -        hdr.changeset = simple_strtoll(c, NULL, 16);
   57.97 -    else 
   57.98 -        hdr.changeset = -1ULL; /* Unknown */
   57.99 -
  57.100 -    hdr.pad0 = 0;
  57.101 -
  57.102 -    if ( hvm_save_entry(HEADER, 0, h, &hdr) != 0 )
  57.103 -    {
  57.104 -        gdprintk(XENLOG_ERR, "HVM save: failed to write header\n");
  57.105 -        return -EFAULT;
  57.106 -    } 
  57.107 +    hdr->cpuid = eax;
  57.108  
  57.109 -    /* Save all available kinds of state */
  57.110 -    for ( i = 0; i <= HVM_SAVE_CODE_MAX; i++ ) 
  57.111 -    {
  57.112 -        handler = hvm_sr_handlers[i].save;
  57.113 -        if ( handler != NULL ) 
  57.114 -        {
  57.115 -            gdprintk(XENLOG_INFO, "HVM save: %s\n",  hvm_sr_handlers[i].name);
  57.116 -            if ( handler(d, h) != 0 ) 
  57.117 -            {
  57.118 -                gdprintk(XENLOG_ERR, 
  57.119 -                         "HVM save: failed to save type %"PRIu16"\n", i);
  57.120 -                return -EFAULT;
  57.121 -            } 
  57.122 -        }
  57.123 -    }
  57.124 -
  57.125 -    /* Save an end-of-file marker */
  57.126 -    if ( hvm_save_entry(END, 0, h, &end) != 0 )
  57.127 -    {
  57.128 -        /* Run out of data */
  57.129 -        gdprintk(XENLOG_ERR, "HVM save: no room for end marker.\n");
  57.130 -        return -EFAULT;
  57.131 -    }
  57.132 -
  57.133 -    /* Save macros should not have let us overrun */
  57.134 -    ASSERT(h->cur <= h->size);
  57.135 -    return 0;
  57.136 +    hdr->pad0 = 0;
  57.137  }
  57.138  
  57.139 -int hvm_load(struct domain *d, hvm_domain_context_t *h)
  57.140 +int
  57.141 +arch_hvm_load(struct hvm_save_header *hdr)
  57.142  {
  57.143      uint32_t eax, ebx, ecx, edx;
  57.144 -    char *c;
  57.145 -    uint64_t cset;
  57.146 -    struct hvm_save_header hdr;
  57.147 -    struct hvm_save_descriptor *desc;
  57.148 -    hvm_load_handler handler;
  57.149 -    struct vcpu *v;
  57.150 -    
  57.151 -    /* Read the save header, which must be first */
  57.152 -    if ( hvm_load_entry(HEADER, h, &hdr) != 0 ) 
  57.153 -        return -1;
  57.154 -
  57.155 -    if (hdr.magic != HVM_FILE_MAGIC) {
  57.156 +    if ( hdr->magic != HVM_FILE_MAGIC )
  57.157 +    {
  57.158          gdprintk(XENLOG_ERR, 
  57.159 -                 "HVM restore: bad magic number %#"PRIx32"\n", hdr.magic);
  57.160 +                 "HVM restore: bad magic number %#"PRIx32"\n", hdr->magic);
  57.161          return -1;
  57.162      }
  57.163  
  57.164 -    if (hdr.version != HVM_FILE_VERSION) {
  57.165 +    if ( hdr->version != HVM_FILE_VERSION )
  57.166 +    {
  57.167          gdprintk(XENLOG_ERR, 
  57.168 -                 "HVM restore: unsupported version %u\n", hdr.version);
  57.169 +                 "HVM restore: unsupported version %u\n", hdr->version);
  57.170          return -1;
  57.171      }
  57.172  
  57.173      cpuid(1, &eax, &ebx, &ecx, &edx);
  57.174      /*TODO: need to define how big a difference is acceptable */
  57.175 -    if (hdr.cpuid != eax)
  57.176 +    if ( hdr->cpuid != eax )
  57.177          gdprintk(XENLOG_WARNING, "HVM restore: saved CPUID (%#"PRIx32") "
  57.178 -               "does not match host (%#"PRIx32").\n", hdr.cpuid, eax);
  57.179 -
  57.180 +               "does not match host (%#"PRIx32").\n", hdr->cpuid, eax);
  57.181  
  57.182 -    c = strrchr(xen_changeset(), ':');
  57.183 -    if ( hdr.changeset == -1ULL )
  57.184 -        gdprintk(XENLOG_WARNING, 
  57.185 -                 "HVM restore: Xen changeset was not saved.\n");
  57.186 -    else if ( c == NULL )
  57.187 -        gdprintk(XENLOG_WARNING, 
  57.188 -                 "HVM restore: Xen changeset is not available.\n");
  57.189 -    else
  57.190 -    {
  57.191 -        cset = simple_strtoll(c, NULL, 16);
  57.192 -        if ( hdr.changeset != cset )
  57.193 -        gdprintk(XENLOG_WARNING, "HVM restore: saved Xen changeset (%#"PRIx64
  57.194 -                 ") does not match host (%#"PRIx64").\n", hdr.changeset, cset);
  57.195 -    }
  57.196 -
  57.197 -    /* Down all the vcpus: we only re-enable the ones that had state saved. */
  57.198 -    for_each_vcpu(d, v) 
  57.199 -        if ( test_and_set_bit(_VPF_down, &v->pause_flags) )
  57.200 -            vcpu_sleep_nosync(v);
  57.201 -
  57.202 -    while(1) {
  57.203 +    return 0;
  57.204 +}
  57.205  
  57.206 -        if ( h->size - h->cur < sizeof(struct hvm_save_descriptor) )
  57.207 -        {
  57.208 -            /* Run out of data */
  57.209 -            gdprintk(XENLOG_ERR, 
  57.210 -                     "HVM restore: save did not end with a null entry\n");
  57.211 -            return -1;
  57.212 -        }
  57.213 -        
  57.214 -        /* Read the typecode of the next entry  and check for the end-marker */
  57.215 -        desc = (struct hvm_save_descriptor *)(&h->data[h->cur]);
  57.216 -        if ( desc->typecode == 0 )
  57.217 -            return 0; 
  57.218 -        
  57.219 -        /* Find the handler for this entry */
  57.220 -        if ( desc->typecode > HVM_SAVE_CODE_MAX 
  57.221 -             || (handler = hvm_sr_handlers[desc->typecode].load) == NULL ) 
  57.222 -        {
  57.223 -            gdprintk(XENLOG_ERR, 
  57.224 -                     "HVM restore: unknown entry typecode %u\n", 
  57.225 -                     desc->typecode);
  57.226 -            return -1;
  57.227 -        }
  57.228 -
  57.229 -        /* Load the entry */
  57.230 -        gdprintk(XENLOG_INFO, "HVM restore: %s %"PRIu16"\n",  
  57.231 -                 hvm_sr_handlers[desc->typecode].name, desc->instance);
  57.232 -        if ( handler(d, h) != 0 ) 
  57.233 -        {
  57.234 -            gdprintk(XENLOG_ERR, 
  57.235 -                     "HVM restore: failed to load entry %u/%u\n", 
  57.236 -                     desc->typecode, desc->instance);
  57.237 -            return -1;
  57.238 -        }
  57.239 -    }
  57.240 -
  57.241 -    /* Not reached */
  57.242 -}
  57.243 +/*
  57.244 + * Local variables:
  57.245 + * mode: C
  57.246 + * c-set-style: "BSD"
  57.247 + * c-basic-offset: 4
  57.248 + * tab-width: 4
  57.249 + * indent-tabs-mode: nil
  57.250 + * End:
  57.251 + */
    58.1 --- a/xen/arch/x86/hvm/svm/amd_iommu/amd-iommu-map.c	Wed Oct 17 10:36:31 2007 -0600
    58.2 +++ b/xen/arch/x86/hvm/svm/amd_iommu/amd-iommu-map.c	Sun Oct 21 12:10:25 2007 -0600
    58.3 @@ -18,10 +18,10 @@
    58.4   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
    58.5   */
    58.6  
    58.7 +#include <xen/sched.h>
    58.8  #include <asm/hvm/iommu.h>
    58.9  #include <asm/amd-iommu.h>
   58.10  #include <asm/hvm/svm/amd-iommu-proto.h>
   58.11 -#include <xen/sched.h>
   58.12  
   58.13  extern long amd_iommu_poll_comp_wait;
   58.14  
    59.1 --- a/xen/arch/x86/hvm/svm/svm.c	Wed Oct 17 10:36:31 2007 -0600
    59.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Sun Oct 21 12:10:25 2007 -0600
    59.3 @@ -999,8 +999,9 @@ static void svm_vmexit_do_cpuid(struct v
    59.4  
    59.5      hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
    59.6  
    59.7 -    if ( input == 0x00000001 )
    59.8 +    switch ( input )
    59.9      {
   59.10 +    case 0x00000001:
   59.11          /* Clear out reserved bits. */
   59.12          ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
   59.13          edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
   59.14 @@ -1008,52 +1009,56 @@ static void svm_vmexit_do_cpuid(struct v
   59.15          /* Guest should only see one logical processor.
   59.16           * See details on page 23 of AMD CPUID Specification.
   59.17           */
   59.18 -        clear_bit(X86_FEATURE_HT & 31, &edx);  /* clear the hyperthread bit */
   59.19 +        __clear_bit(X86_FEATURE_HT & 31, &edx);
   59.20          ebx &= 0xFF00FFFF;  /* clear the logical processor count when HTT=0 */
   59.21          ebx |= 0x00010000;  /* set to 1 just for precaution */
   59.22 -    }
   59.23 -    else if ( input == 0x80000001 )
   59.24 -    {
   59.25 +        break;
   59.26 +
   59.27 +    case 0x80000001:
   59.28          if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
   59.29 -            clear_bit(X86_FEATURE_APIC & 31, &edx);
   59.30 +            __clear_bit(X86_FEATURE_APIC & 31, &edx);
   59.31  
   59.32  #if CONFIG_PAGING_LEVELS >= 3
   59.33          if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
   59.34  #endif
   59.35 -            clear_bit(X86_FEATURE_PAE & 31, &edx);
   59.36 +            __clear_bit(X86_FEATURE_PAE & 31, &edx);
   59.37  
   59.38 -        clear_bit(X86_FEATURE_PSE36 & 31, &edx);
   59.39 +        __clear_bit(X86_FEATURE_PSE36 & 31, &edx);
   59.40  
   59.41          /* Clear the Cmp_Legacy bit
   59.42           * This bit is supposed to be zero when HTT = 0.
   59.43           * See details on page 23 of AMD CPUID Specification.
   59.44           */
   59.45 -        clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
   59.46 +        __clear_bit(X86_FEATURE_CMP_LEGACY & 31, &ecx);
   59.47  
   59.48          /* Make SVM feature invisible to the guest. */
   59.49 -        clear_bit(X86_FEATURE_SVME & 31, &ecx);
   59.50 +        __clear_bit(X86_FEATURE_SVME & 31, &ecx);
   59.51 +        __clear_bit(X86_FEATURE_SKINIT & 31, &ecx);
   59.52 +
   59.53 +        __clear_bit(X86_FEATURE_OSVW & 31, &ecx);
   59.54 +        __clear_bit(X86_FEATURE_WDT & 31, &ecx);
   59.55  
   59.56          /* So far, we do not support 3DNow for the guest. */
   59.57 -        clear_bit(X86_FEATURE_3DNOW & 31, &edx);
   59.58 -        clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
   59.59 -        /* no FFXSR instructions feature. */
   59.60 -        clear_bit(X86_FEATURE_FFXSR & 31, &edx);
   59.61 -    }
   59.62 -    else if ( input == 0x80000007 || input == 0x8000000A )
   59.63 -    {
   59.64 +        __clear_bit(X86_FEATURE_3DNOW & 31, &edx);
   59.65 +        __clear_bit(X86_FEATURE_3DNOWEXT & 31, &edx);
   59.66 +        break;
   59.67 +
   59.68 +    case 0x80000007:
   59.69 +    case 0x8000000A:
   59.70          /* Mask out features of power management and SVM extension. */
   59.71          eax = ebx = ecx = edx = 0;
   59.72 -    }
   59.73 -    else if ( input == 0x80000008 )
   59.74 -    {
   59.75 +        break;
   59.76 +
   59.77 +    case 0x80000008:
   59.78          /* Make sure Number of CPU core is 1 when HTT=0 */
   59.79          ecx &= 0xFFFFFF00;
   59.80 +        break;
   59.81      }
   59.82  
   59.83 -    regs->eax = (unsigned long)eax;
   59.84 -    regs->ebx = (unsigned long)ebx;
   59.85 -    regs->ecx = (unsigned long)ecx;
   59.86 -    regs->edx = (unsigned long)edx;
   59.87 +    regs->eax = eax;
   59.88 +    regs->ebx = ebx;
   59.89 +    regs->ecx = ecx;
   59.90 +    regs->edx = edx;
   59.91  
   59.92      HVMTRACE_3D(CPUID, v, input,
   59.93                  ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
    60.1 --- a/xen/arch/x86/hvm/vioapic.c	Wed Oct 17 10:36:31 2007 -0600
    60.2 +++ b/xen/arch/x86/hvm/vioapic.c	Sun Oct 21 12:10:25 2007 -0600
    60.3 @@ -459,7 +459,7 @@ void vioapic_update_EOI(struct domain *d
    60.4      ent->fields.remote_irr = 0;
    60.5  
    60.6      if ( vtd_enabled )
    60.7 -        hvm_dpci_eoi(gsi, ent);
    60.8 +        hvm_dpci_eoi(current->domain, gsi, ent);
    60.9  
   60.10      if ( (ent->fields.trig_mode == VIOAPIC_LEVEL_TRIG) &&
   60.11           !ent->fields.mask &&
    61.1 --- a/xen/arch/x86/hvm/vmx/intr.c	Wed Oct 17 10:36:31 2007 -0600
    61.2 +++ b/xen/arch/x86/hvm/vmx/intr.c	Sun Oct 21 12:10:25 2007 -0600
    61.3 @@ -121,10 +121,22 @@ static void vmx_dirq_assist(struct vcpu 
    61.4            irq < NR_IRQS;
    61.5            irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
    61.6      {
    61.7 +        stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)]);
    61.8 +
    61.9          test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask);
   61.10          device = hvm_irq_dpci->mirq[irq].device;
   61.11          intx = hvm_irq_dpci->mirq[irq].intx;
   61.12          hvm_pci_intx_assert(d, device, intx);
   61.13 +
   61.14 +        /*
   61.15 +         * Set a timer to see if the guest can finish the interrupt or not. For
   61.16 +         * example, the guest OS may unmask the PIC during boot, before the
   61.17 +         * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
   61.18 +         * guest will never deal with the irq, then the physical interrupt line
   61.19 +         * will never be deasserted.
   61.20 +         */
   61.21 +        set_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)],
   61.22 +                  NOW() + PT_IRQ_TIME_OUT);
   61.23      }
   61.24  }
   61.25  
    62.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Wed Oct 17 10:36:31 2007 -0600
    62.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Sun Oct 21 12:10:25 2007 -0600
    62.3 @@ -413,9 +413,35 @@ static void vmx_set_host_env(struct vcpu
    62.4                (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
    62.5  }
    62.6  
    62.7 +void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr)
    62.8 +{
    62.9 +    char *msr_bitmap = v->arch.hvm_vmx.msr_bitmap;
   62.10 +
   62.11 +    /* VMX MSR bitmap supported? */
   62.12 +    if ( msr_bitmap == NULL )
   62.13 +        return;
   62.14 +
   62.15 +    /*
   62.16 +     * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
   62.17 +     * have the write-low and read-high bitmap offsets the wrong way round.
   62.18 +     * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
   62.19 +     */
   62.20 +    if ( msr <= 0x1fff )
   62.21 +    {
   62.22 +        __clear_bit(msr, msr_bitmap + 0x000); /* read-low */
   62.23 +        __clear_bit(msr, msr_bitmap + 0x800); /* write-low */
   62.24 +    }
   62.25 +    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
   62.26 +    {
   62.27 +        msr &= 0x1fff;
   62.28 +        __clear_bit(msr, msr_bitmap + 0x400); /* read-high */
   62.29 +        __clear_bit(msr, msr_bitmap + 0xc00); /* write-high */
   62.30 +    }
   62.31 +}
   62.32 +
   62.33  #define GUEST_SEGMENT_LIMIT     0xffffffff
   62.34  
   62.35 -static void construct_vmcs(struct vcpu *v)
   62.36 +static int construct_vmcs(struct vcpu *v)
   62.37  {
   62.38      union vmcs_arbytes arbytes;
   62.39  
   62.40 @@ -430,8 +456,24 @@ static void construct_vmcs(struct vcpu *
   62.41      if ( vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
   62.42          __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);
   62.43  
   62.44 +    /* MSR access bitmap. */
   62.45      if ( cpu_has_vmx_msr_bitmap )
   62.46 -        __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap));
   62.47 +    {
   62.48 +        char *msr_bitmap = alloc_xenheap_page();
   62.49 +
   62.50 +        if ( msr_bitmap == NULL )
   62.51 +            return -ENOMEM;
   62.52 +
   62.53 +        memset(msr_bitmap, ~0, PAGE_SIZE);
   62.54 +        v->arch.hvm_vmx.msr_bitmap = msr_bitmap;
   62.55 +        __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
   62.56 +
   62.57 +        vmx_disable_intercept_for_msr(v, MSR_FS_BASE);
   62.58 +        vmx_disable_intercept_for_msr(v, MSR_GS_BASE);
   62.59 +        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
   62.60 +        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
   62.61 +        vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
   62.62 +    }
   62.63  
   62.64      /* I/O access bitmap. */
   62.65      __vmwrite(IO_BITMAP_A, virt_to_maddr(hvm_io_bitmap));
   62.66 @@ -463,10 +505,8 @@ static void construct_vmcs(struct vcpu *
   62.67      __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
   62.68  
   62.69      /* MSR intercepts. */
   62.70 -    __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
   62.71 -    __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
   62.72 +    __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
   62.73      __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
   62.74 -    __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
   62.75      __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
   62.76  
   62.77      __vmwrite(VM_ENTRY_INTR_INFO, 0);
   62.78 @@ -565,11 +605,108 @@ static void construct_vmcs(struct vcpu *
   62.79      paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
   62.80  
   62.81      vmx_vlapic_msr_changed(v);
   62.82 +
   62.83 +    return 0;
   62.84 +}
   62.85 +
   62.86 +int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val)
   62.87 +{
   62.88 +    unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
   62.89 +    const struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
   62.90 +
   62.91 +    for ( i = 0; i < msr_count; i++ )
   62.92 +    {
   62.93 +        if ( msr_area[i].index == msr )
   62.94 +        {
   62.95 +            *val = msr_area[i].data;
   62.96 +            return 0;
   62.97 +        }
   62.98 +    }
   62.99 +
  62.100 +    return -ESRCH;
  62.101 +}
  62.102 +
  62.103 +int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val)
  62.104 +{
  62.105 +    unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
  62.106 +    struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
  62.107 +
  62.108 +    for ( i = 0; i < msr_count; i++ )
  62.109 +    {
  62.110 +        if ( msr_area[i].index == msr )
  62.111 +        {
  62.112 +            msr_area[i].data = val;
  62.113 +            return 0;
  62.114 +        }
  62.115 +    }
  62.116 +
  62.117 +    return -ESRCH;
  62.118 +}
  62.119 +
  62.120 +int vmx_add_guest_msr(struct vcpu *v, u32 msr)
  62.121 +{
  62.122 +    unsigned int i, msr_count = v->arch.hvm_vmx.msr_count;
  62.123 +    struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.msr_area;
  62.124 +
  62.125 +    for ( i = 0; i < msr_count; i++ )
  62.126 +        if ( msr_area[i].index == msr )
  62.127 +            return 0;
  62.128 +
  62.129 +    if ( msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
  62.130 +        return -ENOSPC;
  62.131 +
  62.132 +    if ( msr_area == NULL )
  62.133 +    {
  62.134 +        if ( (msr_area = alloc_xenheap_page()) == NULL )
  62.135 +            return -ENOMEM;
  62.136 +        v->arch.hvm_vmx.msr_area = msr_area;
  62.137 +        __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(msr_area));
  62.138 +        __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
  62.139 +    }
  62.140 +
  62.141 +    msr_area[msr_count].index = msr;
  62.142 +    msr_area[msr_count].mbz   = 0;
  62.143 +    msr_area[msr_count].data  = 0;
  62.144 +    v->arch.hvm_vmx.msr_count = ++msr_count;
  62.145 +    __vmwrite(VM_EXIT_MSR_STORE_COUNT, msr_count);
  62.146 +    __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, msr_count);
  62.147 +
  62.148 +    return 0;
  62.149 +}
  62.150 +
  62.151 +int vmx_add_host_load_msr(struct vcpu *v, u32 msr)
  62.152 +{
  62.153 +    unsigned int i, msr_count = v->arch.hvm_vmx.host_msr_count;
  62.154 +    struct vmx_msr_entry *msr_area = v->arch.hvm_vmx.host_msr_area;
  62.155 +
  62.156 +    for ( i = 0; i < msr_count; i++ )
  62.157 +        if ( msr_area[i].index == msr )
  62.158 +            return 0;
  62.159 +
  62.160 +    if ( msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
  62.161 +        return -ENOSPC;
  62.162 +
  62.163 +    if ( msr_area == NULL )
  62.164 +    {
  62.165 +        if ( (msr_area = alloc_xenheap_page()) == NULL )
  62.166 +            return -ENOMEM;
  62.167 +        v->arch.hvm_vmx.host_msr_area = msr_area;
  62.168 +        __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(msr_area));
  62.169 +    }
  62.170 +
  62.171 +    msr_area[msr_count].index = msr;
  62.172 +    msr_area[msr_count].mbz   = 0;
  62.173 +    rdmsrl(msr, msr_area[msr_count].data);
  62.174 +    v->arch.hvm_vmx.host_msr_count = ++msr_count;
  62.175 +    __vmwrite(VM_EXIT_MSR_LOAD_COUNT, msr_count);
  62.176 +
  62.177 +    return 0;
  62.178  }
  62.179  
  62.180  int vmx_create_vmcs(struct vcpu *v)
  62.181  {
  62.182      struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx;
  62.183 +    int rc;
  62.184  
  62.185      if ( arch_vmx->vmcs == NULL )
  62.186      {
  62.187 @@ -582,7 +719,12 @@ int vmx_create_vmcs(struct vcpu *v)
  62.188          arch_vmx->launched   = 0;
  62.189      }
  62.190  
  62.191 -    construct_vmcs(v);
  62.192 +    if ( (rc = construct_vmcs(v)) != 0 )
  62.193 +    {
  62.194 +        vmx_free_vmcs(arch_vmx->vmcs);
  62.195 +        arch_vmx->vmcs = NULL;
  62.196 +        return rc;
  62.197 +    }
  62.198  
  62.199      return 0;
  62.200  }
    63.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Wed Oct 17 10:36:31 2007 -0600
    63.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Sun Oct 21 12:10:25 2007 -0600
    63.3 @@ -53,8 +53,6 @@
    63.4  
    63.5  enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
    63.6  
    63.7 -char *vmx_msr_bitmap;
    63.8 -
    63.9  static void vmx_ctxt_switch_from(struct vcpu *v);
   63.10  static void vmx_ctxt_switch_to(struct vcpu *v);
   63.11  
   63.12 @@ -1106,26 +1104,6 @@ static int vmx_event_pending(struct vcpu
   63.13      return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
   63.14  }
   63.15  
   63.16 -static void disable_intercept_for_msr(u32 msr)
   63.17 -{
   63.18 -    /*
   63.19 -     * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
   63.20 -     * have the write-low and read-high bitmap offsets the wrong way round.
   63.21 -     * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
   63.22 -     */
   63.23 -    if ( msr <= 0x1fff )
   63.24 -    {
   63.25 -        __clear_bit(msr, vmx_msr_bitmap + 0x000); /* read-low */
   63.26 -        __clear_bit(msr, vmx_msr_bitmap + 0x800); /* write-low */
   63.27 -    }
   63.28 -    else if ( (msr >= 0xc0000000) && (msr <= 0xc0001fff) )
   63.29 -    {
   63.30 -        msr &= 0x1fff;
   63.31 -        __clear_bit(msr, vmx_msr_bitmap + 0x400); /* read-high */
   63.32 -        __clear_bit(msr, vmx_msr_bitmap + 0xc00); /* write-high */
   63.33 -    }
   63.34 -}
   63.35 -
   63.36  static struct hvm_function_table vmx_function_table = {
   63.37      .name                 = "VMX",
   63.38      .domain_initialise    = vmx_domain_initialise,
   63.39 @@ -1190,21 +1168,6 @@ void start_vmx(void)
   63.40      setup_vmcs_dump();
   63.41  
   63.42      hvm_enable(&vmx_function_table);
   63.43 -
   63.44 -    if ( cpu_has_vmx_msr_bitmap )
   63.45 -    {
   63.46 -        printk("VMX: MSR intercept bitmap enabled\n");
   63.47 -        vmx_msr_bitmap = alloc_xenheap_page();
   63.48 -        BUG_ON(vmx_msr_bitmap == NULL);
   63.49 -        memset(vmx_msr_bitmap, ~0, PAGE_SIZE);
   63.50 -
   63.51 -        disable_intercept_for_msr(MSR_FS_BASE);
   63.52 -        disable_intercept_for_msr(MSR_GS_BASE);
   63.53 -
   63.54 -        disable_intercept_for_msr(MSR_IA32_SYSENTER_CS);
   63.55 -        disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP);
   63.56 -        disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP);
   63.57 -    }
   63.58  }
   63.59  
   63.60  /*
   63.61 @@ -1253,16 +1216,10 @@ static void vmx_do_no_device_fault(void)
   63.62  #define bitmaskof(idx)  (1U << ((idx) & 31))
   63.63  static void vmx_do_cpuid(struct cpu_user_regs *regs)
   63.64  {
   63.65 -    unsigned int input = (unsigned int)regs->eax;
   63.66 -    unsigned int count = (unsigned int)regs->ecx;
   63.67 +    unsigned int input = regs->eax;
   63.68      unsigned int eax, ebx, ecx, edx;
   63.69  
   63.70 -    if ( input == 0x00000004 )
   63.71 -    {
   63.72 -        cpuid_count(input, count, &eax, &ebx, &ecx, &edx);
   63.73 -        eax &= NUM_CORES_RESET_MASK;
   63.74 -    }
   63.75 -    else if ( input == 0x40000003 )
   63.76 +    if ( input == 0x40000003 )
   63.77      {
   63.78          /*
   63.79           * NB. Unsupported interface for private use of VMXASSIST only.
   63.80 @@ -1292,37 +1249,46 @@ static void vmx_do_cpuid(struct cpu_user
   63.81          unmap_domain_page(p);
   63.82  
   63.83          gdprintk(XENLOG_INFO, "Output value is 0x%"PRIx64".\n", value);
   63.84 -        ecx = (u32)value;
   63.85 -        edx = (u32)(value >> 32);
   63.86 -    } else {
   63.87 -        hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
   63.88 -
   63.89 -        if ( input == 0x00000001 )
   63.90 -        {
   63.91 -            /* Mask off reserved bits. */
   63.92 -            ecx &= ~VMX_VCPU_CPUID_L1_ECX_RESERVED;
   63.93 -
   63.94 -            ebx &= NUM_THREADS_RESET_MASK;
   63.95 -
   63.96 -            /* Unsupportable for virtualised CPUs. */
   63.97 -            ecx &= ~(bitmaskof(X86_FEATURE_VMXE) |
   63.98 -                     bitmaskof(X86_FEATURE_EST)  |
   63.99 -                     bitmaskof(X86_FEATURE_TM2)  |
  63.100 -                     bitmaskof(X86_FEATURE_CID));
  63.101 -
  63.102 -            edx &= ~(bitmaskof(X86_FEATURE_HT)   |
  63.103 -                     bitmaskof(X86_FEATURE_ACPI) |
  63.104 -                     bitmaskof(X86_FEATURE_ACC));
  63.105 -        }
  63.106 -
  63.107 -        if ( input == 0x00000006 || input == 0x00000009 || input == 0x0000000A )
  63.108 -            eax = ebx = ecx = edx = 0x0;
  63.109 +        regs->ecx = (u32)value;
  63.110 +        regs->edx = (u32)(value >> 32);
  63.111 +        return;
  63.112      }
  63.113  
  63.114 -    regs->eax = (unsigned long)eax;
  63.115 -    regs->ebx = (unsigned long)ebx;
  63.116 -    regs->ecx = (unsigned long)ecx;
  63.117 -    regs->edx = (unsigned long)edx;
  63.118 +    hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
  63.119 +
  63.120 +    switch ( input )
  63.121 +    {
  63.122 +    case 0x00000001:
  63.123 +        ecx &= ~VMX_VCPU_CPUID_L1_ECX_RESERVED;
  63.124 +        ebx &= NUM_THREADS_RESET_MASK;
  63.125 +        ecx &= ~(bitmaskof(X86_FEATURE_VMXE) |
  63.126 +                 bitmaskof(X86_FEATURE_EST)  |
  63.127 +                 bitmaskof(X86_FEATURE_TM2)  |
  63.128 +                 bitmaskof(X86_FEATURE_CID)  |
  63.129 +                 bitmaskof(X86_FEATURE_PDCM) |
  63.130 +                 bitmaskof(X86_FEATURE_DSCPL));
  63.131 +        edx &= ~(bitmaskof(X86_FEATURE_HT)   |
  63.132 +                 bitmaskof(X86_FEATURE_ACPI) |
  63.133 +                 bitmaskof(X86_FEATURE_ACC)  |
  63.134 +                 bitmaskof(X86_FEATURE_DS));
  63.135 +        break;
  63.136 +
  63.137 +    case 0x00000004:
  63.138 +        cpuid_count(input, regs->ecx, &eax, &ebx, &ecx, &edx);
  63.139 +        eax &= NUM_CORES_RESET_MASK;
  63.140 +        break;
  63.141 +
  63.142 +    case 0x00000006:
  63.143 +    case 0x00000009:
  63.144 +    case 0x0000000A:
  63.145 +        eax = ebx = ecx = edx = 0;
  63.146 +        break;
  63.147 +    }
  63.148 +
  63.149 +    regs->eax = eax;
  63.150 +    regs->ebx = ebx;
  63.151 +    regs->ecx = ecx;
  63.152 +    regs->edx = edx;
  63.153  
  63.154      HVMTRACE_3D(CPUID, current, input,
  63.155                  ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
  63.156 @@ -2238,6 +2204,82 @@ static int vmx_cr_access(unsigned long e
  63.157      return 1;
  63.158  }
  63.159  
  63.160 +static const struct lbr_info {
  63.161 +    u32 base, count;
  63.162 +} p4_lbr[] = {
  63.163 +    { MSR_P4_LER_FROM_LIP,          1 },
  63.164 +    { MSR_P4_LER_TO_LIP,            1 },
  63.165 +    { MSR_P4_LASTBRANCH_TOS,        1 },
  63.166 +    { MSR_P4_LASTBRANCH_0_FROM_LIP, NUM_MSR_P4_LASTBRANCH_FROM_TO },
  63.167 +    { MSR_P4_LASTBRANCH_0_TO_LIP,   NUM_MSR_P4_LASTBRANCH_FROM_TO },
  63.168 +    { 0, 0 }
  63.169 +}, c2_lbr[] = {
  63.170 +    { MSR_IA32_LASTINTFROMIP,       1 },
  63.171 +    { MSR_IA32_LASTINTTOIP,         1 },
  63.172 +    { MSR_C2_LASTBRANCH_TOS,        1 },
  63.173 +    { MSR_C2_LASTBRANCH_0_FROM_IP,  NUM_MSR_C2_LASTBRANCH_FROM_TO },
  63.174 +    { MSR_C2_LASTBRANCH_0_TO_IP,    NUM_MSR_C2_LASTBRANCH_FROM_TO },
  63.175 +    { 0, 0 }
  63.176 +#ifdef __i386__
  63.177 +}, pm_lbr[] = {
  63.178 +    { MSR_IA32_LASTINTFROMIP,       1 },
  63.179 +    { MSR_IA32_LASTINTTOIP,         1 },
  63.180 +    { MSR_PM_LASTBRANCH_TOS,        1 },
  63.181 +    { MSR_PM_LASTBRANCH_0,          NUM_MSR_PM_LASTBRANCH },
  63.182 +    { 0, 0 }
  63.183 +#endif
  63.184 +};
  63.185 +
  63.186 +static const struct lbr_info *last_branch_msr_get(void)
  63.187 +{
  63.188 +    switch ( boot_cpu_data.x86 )
  63.189 +    {
  63.190 +    case 6:
  63.191 +        switch ( boot_cpu_data.x86_model )
  63.192 +        {
  63.193 +#ifdef __i386__
  63.194 +        /* PentiumM */
  63.195 +        case 9: case 13:
  63.196 +        /* Core Solo/Duo */
  63.197 +        case 14:
  63.198 +            return pm_lbr;
  63.199 +            break;
  63.200 +#endif
  63.201 +        /* Core2 Duo */
  63.202 +        case 15:
  63.203 +            return c2_lbr;
  63.204 +            break;
  63.205 +        }
  63.206 +        break;
  63.207 +
  63.208 +    case 15:
  63.209 +        switch ( boot_cpu_data.x86_model )
  63.210 +        {
  63.211 +        /* Pentium4/Xeon with em64t */
  63.212 +        case 3: case 4: case 6:
  63.213 +            return p4_lbr;
  63.214 +            break;
  63.215 +        }
  63.216 +        break;
  63.217 +    }
  63.218 +
  63.219 +    return NULL;
  63.220 +}
  63.221 +
  63.222 +static int is_last_branch_msr(u32 ecx)
  63.223 +{
  63.224 +    const struct lbr_info *lbr = last_branch_msr_get();
  63.225 +
  63.226 +    if ( lbr == NULL )
  63.227 +        return 0;
  63.228 +
  63.229 +    for ( ; lbr->count; lbr++ )
  63.230 +        if ( (ecx >= lbr->base) && (ecx < (lbr->base + lbr->count)) )
  63.231 +            return 1;
  63.232 +
  63.233 +    return 0;
  63.234 +}
  63.235 +
  63.236  static int vmx_do_msr_read(struct cpu_user_regs *regs)
  63.237  {
  63.238      u64 msr_content = 0;
  63.239 @@ -2263,6 +2305,10 @@ static int vmx_do_msr_read(struct cpu_us
  63.240      case MSR_IA32_APICBASE:
  63.241          msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
  63.242          break;
  63.243 +    case MSR_IA32_DEBUGCTLMSR:
  63.244 +        if ( vmx_read_guest_msr(v, ecx, &msr_content) != 0 )
  63.245 +            msr_content = 0;
  63.246 +        break;
  63.247      case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
  63.248          goto gp_fault;
  63.249      case MSR_IA32_MCG_CAP:
  63.250 @@ -2287,6 +2333,15 @@ static int vmx_do_msr_read(struct cpu_us
  63.251                  goto done;
  63.252          }
  63.253  
  63.254 +        if ( vmx_read_guest_msr(v, ecx, &msr_content) == 0 )
  63.255 +            break;
  63.256 +
  63.257 +        if ( is_last_branch_msr(ecx) )
  63.258 +        {
  63.259 +            msr_content = 0;
  63.260 +            break;
  63.261 +        }
  63.262 +
  63.263          if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
  63.264               rdmsr_safe(ecx, eax, edx) == 0 )
  63.265          {
  63.266 @@ -2404,13 +2459,42 @@ static int vmx_do_msr_write(struct cpu_u
  63.267      case MSR_IA32_APICBASE:
  63.268          vlapic_msr_set(vcpu_vlapic(v), msr_content);
  63.269          break;
  63.270 +    case MSR_IA32_DEBUGCTLMSR: {
  63.271 +        int i, rc = 0;
  63.272 +
  63.273 +        if ( !msr_content || (msr_content & ~3) )
  63.274 +            break;
  63.275 +
  63.276 +        if ( msr_content & 1 )
  63.277 +        {
  63.278 +            const struct lbr_info *lbr = last_branch_msr_get();
  63.279 +            if ( lbr == NULL )
  63.280 +                break;
  63.281 +
  63.282 +            for ( ; (rc == 0) && lbr->count; lbr++ )
  63.283 +                for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
  63.284 +                    if ( (rc = vmx_add_guest_msr(v, lbr->base + i)) == 0 )
  63.285 +                        vmx_disable_intercept_for_msr(v, lbr->base + i);
  63.286 +        }
  63.287 +
  63.288 +        if ( (rc < 0) ||
  63.289 +             (vmx_add_guest_msr(v, ecx) < 0) ||
  63.290 +             (vmx_add_host_load_msr(v, ecx) < 0) )
  63.291 +            vmx_inject_hw_exception(v, TRAP_machine_check, 0);
  63.292 +        else
  63.293 +            vmx_write_guest_msr(v, ecx, msr_content);
  63.294 +
  63.295 +        break;
  63.296 +    }
  63.297      case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
  63.298          goto gp_fault;
  63.299      default:
  63.300          switch ( long_mode_do_msr_write(regs) )
  63.301          {
  63.302              case HNDL_unhandled:
  63.303 -                wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
  63.304 +                if ( (vmx_write_guest_msr(v, ecx, msr_content) != 0) &&
  63.305 +                     !is_last_branch_msr(ecx) )
  63.306 +                    wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
  63.307                  break;
  63.308              case HNDL_exception_raised:
  63.309                  return 0;
    64.1 --- a/xen/arch/x86/hvm/vmx/vtd/dmar.c	Wed Oct 17 10:36:31 2007 -0600
    64.2 +++ b/xen/arch/x86/hvm/vmx/vtd/dmar.c	Sun Oct 21 12:10:25 2007 -0600
    64.3 @@ -492,7 +492,6 @@ acpi_parse_dmar(unsigned long phys_addr,
    64.4  
    64.5  int acpi_dmar_init(void)
    64.6  {
    64.7 -    extern int ioapic_ack_new;
    64.8      int rc;
    64.9  
   64.10      if (!vtd_enabled)
   64.11 @@ -509,8 +508,5 @@ int acpi_dmar_init(void)
   64.12          return -ENODEV;
   64.13      }
   64.14  
   64.15 -    /* Use fake-vector style of IOAPIC acknowledgement. */
   64.16 -    ioapic_ack_new = 0;
   64.17 -
   64.18      return 0;
   64.19  }
    65.1 --- a/xen/arch/x86/hvm/vmx/vtd/dmar.h	Wed Oct 17 10:36:31 2007 -0600
    65.2 +++ b/xen/arch/x86/hvm/vmx/vtd/dmar.h	Sun Oct 21 12:10:25 2007 -0600
    65.3 @@ -87,6 +87,13 @@ struct acpi_ioapic_unit {
    65.4      }ioapic;
    65.5  };
    65.6  
    65.7 +#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
    65.8 +#define time_after(a,b)         \
    65.9 +        (typecheck(unsigned long, a) && \
   65.10 +         typecheck(unsigned long, b) && \
   65.11 +         ((long)(b) - (long)(a) < 0))
   65.12 +
   65.13  int vtd_hw_check(void);
   65.14 +void disable_pmr(struct iommu *iommu);
   65.15  
   65.16  #endif // _DMAR_H_
    66.1 --- a/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c	Wed Oct 17 10:36:31 2007 -0600
    66.2 +++ b/xen/arch/x86/hvm/vmx/vtd/intel-iommu.c	Sun Oct 21 12:10:25 2007 -0600
    66.3 @@ -40,13 +40,6 @@ extern void print_iommu_regs(struct acpi
    66.4  extern void print_vtd_entries(struct domain *d, int bus, int devfn,
    66.5                                unsigned long gmfn);
    66.6  
    66.7 -#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
    66.8 -
    66.9 -#define time_after(a,b)         \
   66.10 -        (typecheck(unsigned long, a) && \
   66.11 -         typecheck(unsigned long, b) && \
   66.12 -         ((long)(b) - (long)(a) < 0))
   66.13 -
   66.14  unsigned int x86_clflush_size;
   66.15  void clflush_cache_range(void *adr, int size)
   66.16  {
   66.17 @@ -506,7 +499,7 @@ static int inline iommu_flush_iotlb_psi(
   66.18                                 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
   66.19  }
   66.20  
   66.21 -void flush_all(void)
   66.22 +void iommu_flush_all(void)
   66.23  {
   66.24      struct acpi_drhd_unit *drhd;
   66.25      struct iommu *iommu;
   66.26 @@ -1774,7 +1767,7 @@ int iommu_setup(void)
   66.27      struct hvm_iommu *hd  = domain_hvm_iommu(dom0);
   66.28      struct acpi_drhd_unit *drhd;
   66.29      struct iommu *iommu;
   66.30 -    unsigned long i;
   66.31 +    unsigned long i, status;
   66.32  
   66.33      if ( !vtd_enabled )
   66.34          return 0;
   66.35 @@ -1782,7 +1775,7 @@ int iommu_setup(void)
   66.36      INIT_LIST_HEAD(&hd->pdev_list);
   66.37  
   66.38      /* start from scratch */
   66.39 -    flush_all();
   66.40 +    iommu_flush_all();
   66.41  
   66.42      /* setup clflush size */
   66.43      x86_clflush_size = ((cpuid_ebx(1) >> 8) & 0xff) * 8;
   66.44 @@ -1804,6 +1797,10 @@ int iommu_setup(void)
   66.45      if ( enable_vtd_translation() )
   66.46          goto error;
   66.47  
   66.48 +    status = dmar_readl(iommu->reg, DMAR_PMEN_REG);
   66.49 +    if (status & DMA_PMEN_PRS)
   66.50 +        disable_pmr(iommu);
   66.51 +
   66.52      return 0;
   66.53  
   66.54   error:
   66.55 @@ -1816,6 +1813,21 @@ int iommu_setup(void)
   66.56      return -EIO;
   66.57  }
   66.58  
   66.59 +/*
   66.60 + * If the device isn't owned by dom0, it means it already
   66.61 + * has been assigned to other domain, or it's not exist.
   66.62 + */
   66.63 +int device_assigned(u8 bus, u8 devfn)
   66.64 +{
   66.65 +    struct pci_dev *pdev;
   66.66 +
   66.67 +    for_each_pdev( dom0, pdev )
   66.68 +        if ( (pdev->bus == bus ) && (pdev->devfn == devfn) )
   66.69 +            return 0;
   66.70 +
   66.71 +    return 1;
   66.72 +}
   66.73 +
   66.74  int assign_device(struct domain *d, u8 bus, u8 devfn)
   66.75  {
   66.76      struct hvm_iommu *hd  = domain_hvm_iommu(d);
   66.77 @@ -1961,7 +1973,7 @@ int iommu_suspend(void)
   66.78      struct iommu *iommu;
   66.79      int i = 0;
   66.80  
   66.81 -    flush_all();
   66.82 +    iommu_flush_all();
   66.83  
   66.84      for_each_drhd_unit ( drhd )
   66.85      {
   66.86 @@ -1996,7 +2008,7 @@ int iommu_resume(void)
   66.87      struct iommu *iommu;
   66.88      int i = 0;
   66.89  
   66.90 -    flush_all();
   66.91 +    iommu_flush_all();
   66.92  
   66.93      init_vtd_hw();
   66.94      for_each_drhd_unit ( drhd )
    67.1 --- a/xen/arch/x86/hvm/vmx/vtd/io.c	Wed Oct 17 10:36:31 2007 -0600
    67.2 +++ b/xen/arch/x86/hvm/vmx/vtd/io.c	Sun Oct 21 12:10:25 2007 -0600
    67.3 @@ -45,6 +45,18 @@
    67.4  #include <public/hvm/ioreq.h>
    67.5  #include <public/domctl.h>
    67.6  
    67.7 +static void pt_irq_time_out(void *data)
    67.8 +{
    67.9 +    struct hvm_irq_dpci_mapping *irq_map = data;
   67.10 +    unsigned int guest_gsi, machine_gsi;
   67.11 +    struct domain *d = irq_map->dom;
   67.12 +
   67.13 +    guest_gsi = irq_map->guest_gsi;
   67.14 +    machine_gsi = d->arch.hvm_domain.irq.dpci->girq[guest_gsi].machine_gsi;
   67.15 +    clear_bit(machine_gsi, d->arch.hvm_domain.irq.dpci->dirq_mask);
   67.16 +    hvm_dpci_eoi(irq_map->dom, guest_gsi, NULL);
   67.17 +}
   67.18 +
   67.19  int pt_irq_create_bind_vtd(
   67.20      struct domain *d, xen_domctl_bind_pt_irq_t *pt_irq_bind)
   67.21  {
   67.22 @@ -76,17 +88,22 @@ int pt_irq_create_bind_vtd(
   67.23      hvm_irq_dpci->mirq[machine_gsi].device = device;
   67.24      hvm_irq_dpci->mirq[machine_gsi].intx = intx;
   67.25      hvm_irq_dpci->mirq[machine_gsi].guest_gsi = guest_gsi;
   67.26 +    hvm_irq_dpci->mirq[machine_gsi].dom = d;
   67.27  
   67.28      hvm_irq_dpci->girq[guest_gsi].valid = 1;
   67.29      hvm_irq_dpci->girq[guest_gsi].device = device;
   67.30      hvm_irq_dpci->girq[guest_gsi].intx = intx;
   67.31      hvm_irq_dpci->girq[guest_gsi].machine_gsi = machine_gsi;
   67.32 +    hvm_irq_dpci->girq[guest_gsi].dom = d;
   67.33  
   67.34 -    /* Deal with gsi for legacy devices */
   67.35 +    init_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)],
   67.36 +               pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0);
   67.37 +
   67.38 +    /* Deal with GSI for legacy devices. */
   67.39      pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE);
   67.40      gdprintk(XENLOG_ERR,
   67.41 -        "XEN_DOMCTL_irq_mapping: m_irq = %x device = %x intx = %x\n",
   67.42 -        machine_gsi, device, intx);
   67.43 +             "XEN_DOMCTL_irq_mapping: m_irq = %x device = %x intx = %x\n",
   67.44 +             machine_gsi, device, intx);
   67.45  
   67.46      return 0;
   67.47  }
   67.48 @@ -114,22 +131,25 @@ int hvm_do_IRQ_dpci(struct domain *d, un
   67.49          hvm_irq->dpci->girq[isa_irq].machine_gsi = mirq;
   67.50      }
   67.51  
   67.52 -    if ( !test_and_set_bit(mirq, hvm_irq->dpci->dirq_mask) )
   67.53 -    {
   67.54 -        vcpu_kick(d->vcpu[0]);
   67.55 -        return 1;
   67.56 -    }
   67.57 +    /*
   67.58 +     * Set a timer here to avoid situations where the IRQ line is shared, and
   67.59 +     * the device belonging to the pass-through guest is not yet active. In
   67.60 +     * this case the guest may not pick up the interrupt (e.g., masked at the
   67.61 +     * PIC) and we need to detect that.
   67.62 +     */
   67.63 +    set_bit(mirq, hvm_irq->dpci->dirq_mask);
   67.64 +    set_timer(&hvm_irq->dpci->hvm_timer[irq_to_vector(mirq)],
   67.65 +              NOW() + PT_IRQ_TIME_OUT);
   67.66 +    vcpu_kick(d->vcpu[0]);
   67.67  
   67.68 -    dprintk(XENLOG_INFO, "mirq already pending\n");
   67.69 -    return 0;
   67.70 +    return 1;
   67.71  }
   67.72  
   67.73 -void hvm_dpci_eoi(unsigned int guest_gsi, union vioapic_redir_entry *ent)
   67.74 +void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
   67.75 +                  union vioapic_redir_entry *ent)
   67.76  {
   67.77 -    struct domain *d = current->domain;
   67.78      struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
   67.79      uint32_t device, intx, machine_gsi;
   67.80 -    irq_desc_t *desc;
   67.81  
   67.82      ASSERT(spin_is_locked(&d->arch.hvm_domain.irq_lock));
   67.83  
   67.84 @@ -137,17 +157,15 @@ void hvm_dpci_eoi(unsigned int guest_gsi
   67.85           !hvm_irq_dpci->girq[guest_gsi].valid )
   67.86          return;
   67.87  
   67.88 +    machine_gsi = hvm_irq_dpci->girq[guest_gsi].machine_gsi;
   67.89 +    stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(machine_gsi)]);
   67.90      device = hvm_irq_dpci->girq[guest_gsi].device;
   67.91      intx = hvm_irq_dpci->girq[guest_gsi].intx;
   67.92 -    machine_gsi = hvm_irq_dpci->girq[guest_gsi].machine_gsi;
   67.93      gdprintk(XENLOG_INFO, "hvm_dpci_eoi:: device %x intx %x\n",
   67.94               device, intx);
   67.95      __hvm_pci_intx_deassert(d, device, intx);
   67.96 -    if ( (ent == NULL) || (ent->fields.mask == 0) )
   67.97 -    {
   67.98 -        desc = &irq_desc[irq_to_vector(machine_gsi)];
   67.99 -        desc->handler->end(irq_to_vector(machine_gsi));
  67.100 -    }
  67.101 +    if ( (ent == NULL) || !ent->fields.mask )
  67.102 +        pirq_guest_eoi(d, machine_gsi);
  67.103  }
  67.104  
  67.105  void iommu_domain_destroy(struct domain *d)
    68.1 --- a/xen/arch/x86/hvm/vmx/vtd/utils.c	Wed Oct 17 10:36:31 2007 -0600
    68.2 +++ b/xen/arch/x86/hvm/vmx/vtd/utils.c	Sun Oct 21 12:10:25 2007 -0600
    68.3 @@ -64,6 +64,26 @@ int vtd_hw_check(void)
    68.4      return 0;
    68.5  }
    68.6  
    68.7 +/* disable vt-d protected memory registers */
    68.8 +void disable_pmr(struct iommu *iommu)
    68.9 +{
   68.10 +    unsigned long start_time, status;
   68.11 +
   68.12 +    gdprintk(XENLOG_INFO VTDPREFIX,
   68.13 +        "disabling protected memory registers\n");
   68.14 +
   68.15 +    dmar_writel(iommu->reg, DMAR_PMEN_REG, 0);
   68.16 +    start_time = jiffies;
   68.17 +    while (1) {
   68.18 +        status = dmar_readl(iommu->reg, DMAR_PMEN_REG);
   68.19 +        if ( (status & DMA_PMEN_PRS) == 0 )
   68.20 +            break;
   68.21 +        if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))
   68.22 +            panic("Cannot set QIE field for queue invalidation\n");
   68.23 +        cpu_relax();
   68.24 +    }
   68.25 +}
   68.26 +
   68.27  #if defined(__x86_64__)
   68.28  void print_iommu_regs(struct acpi_drhd_unit *drhd)
   68.29  {
    69.1 --- a/xen/arch/x86/hvm/vpic.c	Wed Oct 17 10:36:31 2007 -0600
    69.2 +++ b/xen/arch/x86/hvm/vpic.c	Sun Oct 21 12:10:25 2007 -0600
    69.3 @@ -252,7 +252,8 @@ static void vpic_ioport_write(
    69.4                  if ( vtd_enabled )
    69.5                  {
    69.6                      irq |= ((addr & 0xa0) == 0xa0) ? 8 : 0;
    69.7 -                    hvm_dpci_eoi(hvm_isa_irq_to_gsi(irq), NULL);
    69.8 +                    hvm_dpci_eoi(current->domain,
    69.9 +                                 hvm_isa_irq_to_gsi(irq), NULL);
   69.10                  }
   69.11                  break;
   69.12              case 6: /* Set Priority                */
    70.1 --- a/xen/arch/x86/hvm/vpt.c	Wed Oct 17 10:36:31 2007 -0600
    70.2 +++ b/xen/arch/x86/hvm/vpt.c	Sun Oct 21 12:10:25 2007 -0600
    70.3 @@ -46,7 +46,7 @@ static void missed_ticks(struct periodic
    70.4  {
    70.5      s_time_t missed_ticks;
    70.6  
    70.7 -    if ( unlikely(pt->one_shot) )
    70.8 +    if ( pt->one_shot )
    70.9          return;
   70.10  
   70.11      missed_ticks = NOW() - pt->scheduled;
   70.12 @@ -115,12 +115,7 @@ static void pt_timer_fn(void *data)
   70.13  
   70.14      pt->pending_intr_nr++;
   70.15  
   70.16 -    if ( unlikely(pt->one_shot) )
   70.17 -    {
   70.18 -        pt->enabled = 0;
   70.19 -        list_del(&pt->list);
   70.20 -    }
   70.21 -    else
   70.22 +    if ( !pt->one_shot )
   70.23      {
   70.24          pt->scheduled += pt->period;
   70.25          missed_ticks(pt);
   70.26 @@ -212,10 +207,16 @@ void pt_intr_post(struct vcpu *v, struct
   70.27          return;
   70.28      }
   70.29  
   70.30 -    ASSERT(pt->vcpu == v);
   70.31 -
   70.32 -    pt->pending_intr_nr--;
   70.33 -    pt->last_plt_gtime += pt->period_cycles;
   70.34 +    if ( pt->one_shot )
   70.35 +    {
   70.36 +        pt->enabled = 0;
   70.37 +        list_del(&pt->list);
   70.38 +    }
   70.39 +    else
   70.40 +    {
   70.41 +        pt->pending_intr_nr--;
   70.42 +        pt->last_plt_gtime += pt->period_cycles;
   70.43 +    }
   70.44  
   70.45      if ( hvm_get_guest_time(v) < pt->last_plt_gtime )
   70.46          hvm_set_guest_time(v, pt->last_plt_gtime);
    71.1 --- a/xen/arch/x86/io_apic.c	Wed Oct 17 10:36:31 2007 -0600
    71.2 +++ b/xen/arch/x86/io_apic.c	Sun Oct 21 12:10:25 2007 -0600
    71.3 @@ -184,68 +184,6 @@ static void __modify_IO_APIC_irq (unsign
    71.4      }
    71.5  }
    71.6  
    71.7 -static int real_vector[MAX_IRQ_SOURCES];
    71.8 -static int fake_vector=-1;
    71.9 -
   71.10 -/*
   71.11 - * Following 2 functions are used to workaround spurious interrupt
   71.12 - * problem related to mask/unmask of interrupts.  Instead we program
   71.13 - * an unused vector in the IOAPIC before issueing EOI to LAPIC.
   71.14 - */
   71.15 -static void write_fake_IO_APIC_vector (unsigned int irq)
   71.16 -{
   71.17 -    struct irq_pin_list *entry = irq_2_pin + irq;
   71.18 -    unsigned int pin, reg;
   71.19 -    unsigned long flags;
   71.20 -
   71.21 -    spin_lock_irqsave(&ioapic_lock, flags);
   71.22 -    for (;;) {
   71.23 -        pin = entry->pin;
   71.24 -        if (pin == -1)
   71.25 -            break;
   71.26 -        reg = io_apic_read(entry->apic, 0x10 + pin*2);
   71.27 -        real_vector[irq] = reg & 0xff;
   71.28 -        reg &= ~0xff;
   71.29 -
   71.30 -        if (fake_vector == -1)
   71.31 -            fake_vector = assign_irq_vector(MAX_IRQ_SOURCES-1);
   71.32 -
   71.33 -        reg |= fake_vector;
   71.34 -        io_apic_write(entry->apic, 0x10 + pin*2, reg);
   71.35 -
   71.36 -        if (!entry->next)
   71.37 -            break;
   71.38 -        entry = irq_2_pin + entry->next;
   71.39 -    }
   71.40 -    spin_unlock_irqrestore(&ioapic_lock, flags);
   71.41 -}
   71.42 -
   71.43 -static void restore_real_IO_APIC_vector (unsigned int irq)
   71.44 -{
   71.45 -    struct irq_pin_list *entry = irq_2_pin + irq;
   71.46 -    unsigned int pin, reg;
   71.47 -    unsigned long flags;
   71.48 -
   71.49 -    spin_lock_irqsave(&ioapic_lock, flags);
   71.50 -    for (;;) {
   71.51 -        pin = entry->pin;
   71.52 -        if (pin == -1)
   71.53 -            break;
   71.54 -
   71.55 -        reg = io_apic_read(entry->apic, 0x10 + pin*2);
   71.56 -        reg &= ~0xff;
   71.57 -        reg |= real_vector[irq];
   71.58 -        io_apic_write(entry->apic, 0x10 + pin*2, reg);
   71.59 -        mb();
   71.60 -        *(IO_APIC_BASE(entry->apic) + 0x10) = reg & 0xff;
   71.61 -
   71.62 -        if (!entry->next)
   71.63 -            break;
   71.64 -        entry = irq_2_pin + entry->next;
   71.65 -    }
   71.66 -    spin_unlock_irqrestore(&ioapic_lock, flags);
   71.67 -}
   71.68 -
   71.69  /* mask = 1 */
   71.70  static void __mask_IO_APIC_irq (unsigned int irq)
   71.71  {
   71.72 @@ -1418,10 +1356,7 @@ static void mask_and_ack_level_ioapic_ir
   71.73      if ( ioapic_ack_new )
   71.74          return;
   71.75  
   71.76 -    if ( vtd_enabled )
   71.77 -        write_fake_IO_APIC_vector(irq);
   71.78 -    else
   71.79 -        mask_IO_APIC_irq(irq);
   71.80 +    mask_IO_APIC_irq(irq);
   71.81  
   71.82  /*
   71.83   * It appears there is an erratum which affects at least version 0x11
   71.84 @@ -1464,12 +1399,8 @@ static void end_level_ioapic_irq (unsign
   71.85  
   71.86      if ( !ioapic_ack_new )
   71.87      {
   71.88 -        if ( !(irq_desc[IO_APIC_VECTOR(irq)].status & IRQ_DISABLED) ) {
   71.89 -            if ( vtd_enabled )
   71.90 -                restore_real_IO_APIC_vector(irq);
   71.91 -            else
   71.92 -                unmask_IO_APIC_irq(irq);
   71.93 -        }
   71.94 +        if ( !(irq_desc[IO_APIC_VECTOR(irq)].status & IRQ_DISABLED) )
   71.95 +            unmask_IO_APIC_irq(irq);
   71.96          return;
   71.97      }
   71.98  
    72.1 --- a/xen/arch/x86/mm.c	Wed Oct 17 10:36:31 2007 -0600
    72.2 +++ b/xen/arch/x86/mm.c	Sun Oct 21 12:10:25 2007 -0600
    72.3 @@ -111,6 +111,7 @@
    72.4  #include <asm/shared.h>
    72.5  #include <public/memory.h>
    72.6  #include <xsm/xsm.h>
    72.7 +#include <xen/trace.h>
    72.8  
    72.9  #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
   72.10  
   72.11 @@ -148,6 +149,14 @@ struct page_info *frame_table;
   72.12  unsigned long max_page;
   72.13  unsigned long total_pages;
   72.14  
   72.15 +#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
   72.16 +
   72.17 +#define l1_disallow_mask(d)                                     \
   72.18 +    ((d != dom_io) &&                                           \
   72.19 +     (rangeset_is_empty((d)->iomem_caps) &&                     \
   72.20 +      rangeset_is_empty((d)->arch.ioport_caps)) ?               \
   72.21 +     L1_DISALLOW_MASK : (L1_DISALLOW_MASK & ~PAGE_CACHE_ATTRS))
   72.22 +
   72.23  #ifdef CONFIG_COMPAT
   72.24  l2_pgentry_t *compat_idle_pg_table_l2 = NULL;
   72.25  #define l3_disallow_mask(d) (!is_pv_32on64_domain(d) ?  \
   72.26 @@ -371,7 +380,7 @@ void make_cr3(struct vcpu *v, unsigned l
   72.27      /* First check the previous high mapping can't be in the TLB. 
   72.28       * (i.e. have we loaded CR3 since we last did this?) */
   72.29      if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) )
   72.30 -        local_flush_tlb_one(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu));
   72.31 +        flush_tlb_one_local(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu));
   72.32      highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
   72.33      lowmem_l3tab  = cache->table[cache->inuse_idx];
   72.34      memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
   72.35 @@ -616,9 +625,10 @@ get_page_from_l1e(
   72.36      if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
   72.37          return 1;
   72.38  
   72.39 -    if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
   72.40 +    if ( unlikely(l1e_get_flags(l1e) & l1_disallow_mask(d)) )
   72.41      {
   72.42 -        MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK);
   72.43 +        MEM_LOG("Bad L1 flags %x",
   72.44 +                l1e_get_flags(l1e) & l1_disallow_mask(d));
   72.45          return 0;
   72.46      }
   72.47  
   72.48 @@ -1366,10 +1376,10 @@ static int mod_l1_entry(l1_pgentry_t *pl
   72.49          ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
   72.50          nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
   72.51  
   72.52 -        if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
   72.53 +        if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
   72.54          {
   72.55              MEM_LOG("Bad L1 flags %x",
   72.56 -                    l1e_get_flags(nl1e) & L1_DISALLOW_MASK);
   72.57 +                    l1e_get_flags(nl1e) & l1_disallow_mask(d));
   72.58              return 0;
   72.59          }
   72.60  
   72.61 @@ -1573,7 +1583,7 @@ static int mod_l4_entry(struct domain *d
   72.62  
   72.63  #endif
   72.64  
   72.65 -int alloc_page_type(struct page_info *page, unsigned long type)
   72.66 +static int alloc_page_type(struct page_info *page, unsigned long type)
   72.67  {
   72.68      struct domain *owner = page_get_owner(page);
   72.69  
   72.70 @@ -1770,13 +1780,20 @@ int get_page_type(struct page_info *page
   72.71          }
   72.72          else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
   72.73          {
   72.74 -            if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
   72.75 -                 (type != PGT_l1_page_table) )
   72.76 -                MEM_LOG("Bad type (saw %" PRtype_info
   72.77 -                        " != exp %" PRtype_info ") "
   72.78 -                        "for mfn %lx (pfn %lx)",
   72.79 -                        x, type, page_to_mfn(page),
   72.80 -                        get_gpfn_from_mfn(page_to_mfn(page)));
   72.81 +            /* Don't log failure if it could be a recursive-mapping attempt. */
   72.82 +            if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
   72.83 +                 (type == PGT_l1_page_table) )
   72.84 +                return 0;
   72.85 +            if ( ((x & PGT_type_mask) == PGT_l3_page_table) &&
   72.86 +                 (type == PGT_l2_page_table) )
   72.87 +                return 0;
   72.88 +            if ( ((x & PGT_type_mask) == PGT_l4_page_table) &&
   72.89 +                 (type == PGT_l3_page_table) )
   72.90 +                return 0;
   72.91 +            MEM_LOG("Bad type (saw %" PRtype_info " != exp %" PRtype_info ") "
   72.92 +                    "for mfn %lx (pfn %lx)",
   72.93 +                    x, type, page_to_mfn(page),
   72.94 +                    get_gpfn_from_mfn(page_to_mfn(page)));
   72.95              return 0;
   72.96          }
   72.97          else if ( unlikely(!(x & PGT_validated)) )
   72.98 @@ -1885,7 +1902,7 @@ static void process_deferred_ops(void)
   72.99          if ( deferred_ops & DOP_FLUSH_ALL_TLBS )
  72.100              flush_tlb_mask(d->domain_dirty_cpumask);
  72.101          else
  72.102 -            local_flush_tlb();
  72.103 +            flush_tlb_local();
  72.104      }
  72.105  
  72.106      if ( deferred_ops & DOP_RELOAD_LDT )
  72.107 @@ -2171,7 +2188,7 @@ int do_mmuext_op(
  72.108          case MMUEXT_INVLPG_LOCAL:
  72.109              if ( !paging_mode_enabled(d) 
  72.110                   || paging_invlpg(v, op.arg1.linear_addr) != 0 )
  72.111 -                local_flush_tlb_one(op.arg1.linear_addr);
  72.112 +                flush_tlb_one_local(op.arg1.linear_addr);
  72.113              break;
  72.114  
  72.115          case MMUEXT_TLB_FLUSH_MULTI:
  72.116 @@ -2847,7 +2864,7 @@ int do_update_va_mapping(unsigned long v
  72.117          switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
  72.118          {
  72.119          case UVMF_LOCAL:
  72.120 -            local_flush_tlb();
  72.121 +            flush_tlb_local();
  72.122              break;
  72.123          case UVMF_ALL:
  72.124              flush_tlb_mask(d->domain_dirty_cpumask);
  72.125 @@ -2869,7 +2886,7 @@ int do_update_va_mapping(unsigned long v
  72.126          case UVMF_LOCAL:
  72.127              if ( !paging_mode_enabled(d) 
  72.128                   || (paging_invlpg(current, va) != 0) ) 
  72.129 -                local_flush_tlb_one(va);
  72.130 +                flush_tlb_one_local(va);
  72.131              break;
  72.132          case UVMF_ALL:
  72.133              flush_tlb_one_mask(d->domain_dirty_cpumask, va);
  72.134 @@ -2988,7 +3005,7 @@ long do_set_gdt(XEN_GUEST_HANDLE(ulong) 
  72.135      LOCK_BIGLOCK(current->domain);
  72.136  
  72.137      if ( (ret = set_gdt(current, frames, entries)) == 0 )
  72.138 -        local_flush_tlb();
  72.139 +        flush_tlb_local();
  72.140  
  72.141      UNLOCK_BIGLOCK(current->domain);
  72.142  
  72.143 @@ -3385,7 +3402,7 @@ static int ptwr_emulated_update(
  72.144          ol1e = l1e_from_intpte(old);
  72.145  
  72.146          okay = paging_cmpxchg_guest_entry(v, &l1e_get_intpte(*pl1e),
  72.147 -                                          &t, val, _mfn(mfn));
  72.148 +                                          &t, l1e_get_intpte(nl1e), _mfn(mfn));
  72.149          okay = (okay && t == old);
  72.150  
  72.151          if ( !okay )
  72.152 @@ -3402,6 +3419,8 @@ static int ptwr_emulated_update(
  72.153              BUG();
  72.154      }
  72.155  
  72.156 +    trace_ptwr_emulation(addr, nl1e);
  72.157 +
  72.158      unmap_domain_page(pl1e);
  72.159  
  72.160      /* Finally, drop the old PTE. */
  72.161 @@ -3514,36 +3533,71 @@ void free_xen_pagetable(void *v)
  72.162          free_domheap_page(virt_to_page(v));
  72.163  }
  72.164  
  72.165 +/* Convert to from superpage-mapping flags for map_pages_to_xen(). */
  72.166 +#define l1f_to_l2f(f) ((f) | _PAGE_PSE)
  72.167 +#define l2f_to_l1f(f) ((f) & ~_PAGE_PSE)
  72.168 +
  72.169 +/*
  72.170 + * map_pages_to_xen() can be called with interrupts disabled:
  72.171 + *  * During early bootstrap; or
  72.172 + *  * alloc_xenheap_pages() via memguard_guard_range
  72.173 + * In these cases it is safe to use flush_area_local():
  72.174 + *  * Because only the local CPU is online; or
  72.175 + *  * Because stale TLB entries do not matter for memguard_[un]guard_range().
  72.176 + */
  72.177 +#define flush_area(v,f) (!local_irq_is_enabled() ?              \
  72.178 +                         flush_area_local((const void *)v, f) : \
  72.179 +                         flush_area_all((const void *)v, f))
  72.180 +
  72.181  int map_pages_to_xen(
  72.182      unsigned long virt,
  72.183      unsigned long mfn,
  72.184      unsigned long nr_mfns,
  72.185 -    unsigned long flags)
  72.186 +    unsigned int flags)
  72.187  {
  72.188      l2_pgentry_t *pl2e, ol2e;
  72.189      l1_pgentry_t *pl1e, ol1e;
  72.190      unsigned int  i;
  72.191  
  72.192 -    unsigned int  map_small_pages = !!(flags & MAP_SMALL_PAGES);
  72.193 -    flags &= ~MAP_SMALL_PAGES;
  72.194 -
  72.195      while ( nr_mfns != 0 )
  72.196      {
  72.197          pl2e = virt_to_xen_l2e(virt);
  72.198  
  72.199          if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<<PAGETABLE_ORDER)-1)) == 0) &&
  72.200               (nr_mfns >= (1<<PAGETABLE_ORDER)) &&
  72.201 -             !map_small_pages )
  72.202 +             !(flags & (_PAGE_PAT|MAP_SMALL_PAGES)) )
  72.203          {
  72.204              /* Super-page mapping. */
  72.205              ol2e = *pl2e;
  72.206 -            l2e_write_atomic(pl2e, l2e_from_pfn(mfn, flags|_PAGE_PSE));
  72.207 +            l2e_write_atomic(pl2e, l2e_from_pfn(mfn, l1f_to_l2f(flags)));
  72.208  
  72.209              if ( (l2e_get_flags(ol2e) & _PAGE_PRESENT) )
  72.210              {
  72.211 -                local_flush_tlb_pge();
  72.212 -                if ( !(l2e_get_flags(ol2e) & _PAGE_PSE) )
  72.213 -                    free_xen_pagetable(mfn_to_virt(l2e_get_pfn(ol2e)));
  72.214 +                unsigned int flush_flags = FLUSH_TLB | FLUSH_LEVEL(2);
  72.215 +
  72.216 +                if ( l2e_get_flags(ol2e) & _PAGE_PSE )
  72.217 +                {
  72.218 +                    if ( l2e_get_flags(ol2e) & _PAGE_GLOBAL )
  72.219 +                        flush_flags |= FLUSH_TLB_GLOBAL;
  72.220 +                    if ( (l2e_get_flags(ol2e) ^ l1f_to_l2f(flags)) &
  72.221 +                         l1f_to_l2f(PAGE_CACHE_ATTRS) )
  72.222 +                        flush_flags |= FLUSH_CACHE;
  72.223 +                    flush_area(virt, flush_flags);
  72.224 +                }
  72.225 +                else
  72.226 +                {
  72.227 +                    pl1e = l2e_to_l1e(ol2e);
  72.228 +                    for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
  72.229 +                    {
  72.230 +                        if ( l1e_get_flags(pl1e[i]) & _PAGE_GLOBAL )
  72.231 +                            flush_flags |= FLUSH_TLB_GLOBAL;
  72.232 +                        if ( (l1e_get_flags(pl1e[i]) ^ flags) &
  72.233 +                             PAGE_CACHE_ATTRS )
  72.234 +                            flush_flags |= FLUSH_CACHE;
  72.235 +                    }
  72.236 +                    flush_area(virt, flush_flags);
  72.237 +                    free_xen_pagetable(pl1e);
  72.238 +                }
  72.239              }
  72.240  
  72.241              virt    += 1UL << L2_PAGETABLE_SHIFT;
  72.242 @@ -3556,31 +3610,83 @@ int map_pages_to_xen(
  72.243              if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
  72.244              {
  72.245                  pl1e = alloc_xen_pagetable();
  72.246 +                if ( pl1e == NULL )
  72.247 +                    return -ENOMEM;
  72.248                  clear_page(pl1e);
  72.249                  l2e_write(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
  72.250                                               __PAGE_HYPERVISOR));
  72.251              }
  72.252              else if ( l2e_get_flags(*pl2e) & _PAGE_PSE )
  72.253              {
  72.254 +                unsigned int flush_flags = FLUSH_TLB | FLUSH_LEVEL(2);
  72.255 +
  72.256 +                /* Skip this PTE if there is no change. */
  72.257 +                if ( (((l2e_get_pfn(*pl2e) & ~(L1_PAGETABLE_ENTRIES - 1)) +
  72.258 +                       l1_table_offset(virt)) == mfn) &&
  72.259 +                     (((l2f_to_l1f(l2e_get_flags(*pl2e)) ^ flags) &
  72.260 +                       ~(_PAGE_ACCESSED|_PAGE_DIRTY)) == 0) )
  72.261 +                {
  72.262 +                    virt    += 1UL << L1_PAGETABLE_SHIFT;
  72.263 +                    mfn     += 1UL;
  72.264 +                    nr_mfns -= 1UL;
  72.265 +                    continue;
  72.266 +                }
  72.267 +
  72.268                  pl1e = alloc_xen_pagetable();
  72.269 +                if ( pl1e == NULL )
  72.270 +                    return -ENOMEM;
  72.271 +
  72.272                  for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
  72.273                      l1e_write(&pl1e[i],
  72.274                                l1e_from_pfn(l2e_get_pfn(*pl2e) + i,
  72.275 -                                           l2e_get_flags(*pl2e) & ~_PAGE_PSE));
  72.276 +                                           l2f_to_l1f(l2e_get_flags(*pl2e))));
  72.277 +
  72.278 +                if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL )
  72.279 +                    flush_flags |= FLUSH_TLB_GLOBAL;
  72.280 +
  72.281                  l2e_write_atomic(pl2e, l2e_from_pfn(virt_to_mfn(pl1e),
  72.282                                                      __PAGE_HYPERVISOR));
  72.283 -                local_flush_tlb_pge();
  72.284 +                flush_area(virt, flush_flags);
  72.285              }
  72.286  
  72.287              pl1e  = l2e_to_l1e(*pl2e) + l1_table_offset(virt);
  72.288              ol1e  = *pl1e;
  72.289              l1e_write_atomic(pl1e, l1e_from_pfn(mfn, flags));
  72.290              if ( (l1e_get_flags(ol1e) & _PAGE_PRESENT) )
  72.291 -                local_flush_tlb_one(virt);
  72.292 +            {
  72.293 +                unsigned int flush_flags = FLUSH_TLB | FLUSH_LEVEL(1);
  72.294 +                if ( l1e_get_flags(ol1e) & _PAGE_GLOBAL )
  72.295 +                    flush_flags |= FLUSH_TLB_GLOBAL;
  72.296 +                if ( (l1e_get_flags(ol1e) ^ flags) & PAGE_CACHE_ATTRS )
  72.297 +                    flush_flags |= FLUSH_CACHE;
  72.298 +                flush_area(virt, flush_flags);
  72.299 +            }
  72.300  
  72.301              virt    += 1UL << L1_PAGETABLE_SHIFT;
  72.302              mfn     += 1UL;
  72.303              nr_mfns -= 1UL;
  72.304 +
  72.305 +            if ( (flags == PAGE_HYPERVISOR) &&
  72.306 +                 ((nr_mfns == 0) ||
  72.307 +                  ((((virt >> PAGE_SHIFT) | mfn) &
  72.308 +                    ((1 << PAGETABLE_ORDER) - 1)) == 0)) )
  72.309 +            {
  72.310 +                unsigned long base_mfn;
  72.311 +                pl1e = l2e_to_l1e(*pl2e);
  72.312 +                base_mfn = l1e_get_pfn(*pl1e) & ~(L1_PAGETABLE_ENTRIES - 1);
  72.313 +                for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++, pl1e++ )
  72.314 +                    if ( (l1e_get_pfn(*pl1e) != (base_mfn + i)) ||
  72.315 +                         (l1e_get_flags(*pl1e) != flags) )
  72.316 +                        break;
  72.317 +                if ( i == L1_PAGETABLE_ENTRIES )
  72.318 +                {
  72.319 +                    ol2e = *pl2e;
  72.320 +                    l2e_write_atomic(pl2e, l2e_from_pfn(base_mfn,
  72.321 +                                                        l1f_to_l2f(flags)));
  72.322 +                    flush_area(virt, FLUSH_TLB_GLOBAL | FLUSH_LEVEL(2));
  72.323 +                    free_xen_pagetable(l2e_to_l1e(ol2e));
  72.324 +                }
  72.325 +            }
  72.326          }
  72.327      }
  72.328  
  72.329 @@ -3647,12 +3753,13 @@ void destroy_xen_mappings(unsigned long 
  72.330              {
  72.331                  /* Empty: zap the L2E and free the L1 page. */
  72.332                  l2e_write_atomic(pl2e, l2e_empty());
  72.333 +                flush_all(FLUSH_TLB_GLOBAL); /* flush before free */
  72.334                  free_xen_pagetable(pl1e);
  72.335              }
  72.336          }
  72.337      }
  72.338  
  72.339 -    flush_tlb_all_pge();
  72.340 +    flush_all(FLUSH_TLB_GLOBAL);
  72.341  }
  72.342  
  72.343  void __set_fixmap(
    73.1 --- a/xen/arch/x86/mm/p2m.c	Wed Oct 17 10:36:31 2007 -0600
    73.2 +++ b/xen/arch/x86/mm/p2m.c	Sun Oct 21 12:10:25 2007 -0600
    73.3 @@ -493,7 +493,7 @@ static void audit_p2m(struct domain *d)
    73.4      test_linear = ( (d == current->domain)
    73.5                      && !pagetable_is_null(current->arch.monitor_table) );
    73.6      if ( test_linear )
    73.7 -        local_flush_tlb();
    73.8 +        flush_tlb_local();
    73.9  
   73.10      /* Audit part one: walk the domain's page allocation list, checking
   73.11       * the m2p entries. */
    74.1 --- a/xen/arch/x86/mm/shadow/common.c	Wed Oct 17 10:36:31 2007 -0600
    74.2 +++ b/xen/arch/x86/mm/shadow/common.c	Sun Oct 21 12:10:25 2007 -0600
    74.3 @@ -708,15 +708,29 @@ shadow_order(unsigned int shadow_type)
    74.4  #endif
    74.5  }
    74.6  
    74.7 +static inline unsigned int
    74.8 +shadow_max_order(struct domain *d)
    74.9 +{
   74.10 +    return is_hvm_domain(d) ? SHADOW_MAX_ORDER : 0;
   74.11 +}
   74.12  
   74.13 -/* Do we have a free chunk of at least this order? */
   74.14 -static inline int chunk_is_available(struct domain *d, int order)
   74.15 +/* Do we have at total of count pages of the requested order free? */
   74.16 +static inline int space_is_available(
   74.17 +    struct domain *d,
   74.18 +    unsigned int order,
   74.19 +    unsigned int count)
   74.20  {
   74.21 -    int i;
   74.22 -    
   74.23 -    for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
   74.24 -        if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
   74.25 -            return 1;
   74.26 +    for ( ; order <= shadow_max_order(d); ++order )
   74.27 +    {
   74.28 +        unsigned int n = count;
   74.29 +        const struct list_head *p;
   74.30 +
   74.31 +        list_for_each ( p, &d->arch.paging.shadow.freelists[order] )
   74.32 +            if ( --n == 0 )
   74.33 +                return 1;
   74.34 +        count = (count + 1) >> 1;
   74.35 +    }
   74.36 +
   74.37      return 0;
   74.38  }
   74.39  
   74.40 @@ -752,12 +766,12 @@ static void shadow_unhook_mappings(struc
   74.41  }
   74.42  
   74.43  
   74.44 -/* Make sure there is at least one chunk of the required order available
   74.45 - * in the shadow page pool. This must be called before any calls to
   74.46 - * shadow_alloc().  Since this will free existing shadows to make room,
   74.47 - * it must be called early enough to avoid freeing shadows that the
   74.48 - * caller is currently working on. */
   74.49 -void shadow_prealloc(struct domain *d, unsigned int order)
   74.50 +/* Make sure there are at least count order-sized pages
   74.51 + * available in the shadow page pool. */
   74.52 +static void _shadow_prealloc(
   74.53 +    struct domain *d,
   74.54 +    unsigned int order,
   74.55 +    unsigned int count)
   74.56  {
   74.57      /* Need a vpcu for calling unpins; for now, since we don't have
   74.58       * per-vcpu shadows, any will do */
   74.59 @@ -768,7 +782,8 @@ void shadow_prealloc(struct domain *d, u
   74.60      mfn_t smfn;
   74.61      int i;
   74.62  
   74.63 -    if ( chunk_is_available(d, order) ) return; 
   74.64 +    ASSERT(order <= shadow_max_order(d));
   74.65 +    if ( space_is_available(d, order, count) ) return;
   74.66      
   74.67      v = current;
   74.68      if ( v->domain != d )
   74.69 @@ -785,8 +800,8 @@ void shadow_prealloc(struct domain *d, u
   74.70          /* Unpin this top-level shadow */
   74.71          sh_unpin(v, smfn);
   74.72  
   74.73 -        /* See if that freed up a chunk of appropriate size */
   74.74 -        if ( chunk_is_available(d, order) ) return;
   74.75 +        /* See if that freed up enough space */
   74.76 +        if ( space_is_available(d, order, count) ) return;
   74.77      }
   74.78  
   74.79      /* Stage two: all shadow pages are in use in hierarchies that are
   74.80 @@ -803,8 +818,8 @@ void shadow_prealloc(struct domain *d, u
   74.81                                 pagetable_get_mfn(v2->arch.shadow_table[i]));
   74.82                  cpus_or(flushmask, v2->vcpu_dirty_cpumask, flushmask);
   74.83  
   74.84 -                /* See if that freed up a chunk of appropriate size */
   74.85 -                if ( chunk_is_available(d, order) ) 
   74.86 +                /* See if that freed up enough space */
   74.87 +                if ( space_is_available(d, order, count) )
   74.88                  {
   74.89                      flush_tlb_mask(flushmask);
   74.90                      return;
   74.91 @@ -814,15 +829,26 @@ void shadow_prealloc(struct domain *d, u
   74.92      
   74.93      /* Nothing more we can do: all remaining shadows are of pages that
   74.94       * hold Xen mappings for some vcpu.  This can never happen. */
   74.95 -    SHADOW_ERROR("Can't pre-allocate %i shadow pages!\n"
   74.96 +    SHADOW_ERROR("Can't pre-allocate %u order-%u shadow pages!\n"
   74.97                   "  shadow pages total = %u, free = %u, p2m=%u\n",
   74.98 -                 1 << order,
   74.99 +                 count, order,
  74.100                   d->arch.paging.shadow.total_pages,
  74.101                   d->arch.paging.shadow.free_pages,
  74.102                   d->arch.paging.shadow.p2m_pages);
  74.103      BUG();
  74.104  }
  74.105  
  74.106 +/* Make sure there are at least count pages of the order according to
  74.107 + * type available in the shadow page pool.
  74.108 + * This must be called before any calls to shadow_alloc().  Since this
  74.109 + * will free existing shadows to make room, it must be called early enough
  74.110 + * to avoid freeing shadows that the caller is currently working on. */
  74.111 +void shadow_prealloc(struct domain *d, u32 type, unsigned int count)
  74.112 +{
  74.113 +    ASSERT(type != SH_type_p2m_table);
  74.114 +    return _shadow_prealloc(d, shadow_order(type), count);
  74.115 +}
  74.116 +
  74.117  /* Deliberately free all the memory we can: this will tear down all of
  74.118   * this domain's shadows */
  74.119  static void shadow_blow_tables(struct domain *d) 
  74.120 @@ -899,7 +925,9 @@ mfn_t shadow_alloc(struct domain *d,
  74.121      int i;
  74.122  
  74.123      ASSERT(shadow_locked_by_me(d));
  74.124 -    ASSERT(order <= SHADOW_MAX_ORDER);
  74.125 +    if (shadow_type == SH_type_p2m_table && order > shadow_max_order(d))
  74.126 +        order = shadow_max_order(d);
  74.127 +    ASSERT(order <= shadow_max_order(d));
  74.128      ASSERT(shadow_type != SH_type_none);
  74.129      perfc_incr(shadow_alloc);
  74.130  
  74.131 @@ -1000,7 +1028,7 @@ void shadow_free(struct domain *d, mfn_t
  74.132      }
  74.133  
  74.134      /* Merge chunks as far as possible. */
  74.135 -    while ( order < SHADOW_MAX_ORDER )
  74.136 +    for ( ; order < shadow_max_order(d); ++order )
  74.137      {
  74.138          mask = 1 << order;
  74.139          if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) {
  74.140 @@ -1015,7 +1043,6 @@ void shadow_free(struct domain *d, mfn_t
  74.141                  break;
  74.142              list_del(&(sp+mask)->list);
  74.143          }
  74.144 -        order++;
  74.145      }
  74.146  
  74.147      sp->order = order;
  74.148 @@ -1037,16 +1064,18 @@ sh_alloc_p2m_pages(struct domain *d)
  74.149  {
  74.150      struct page_info *pg;
  74.151      u32 i;
  74.152 +    unsigned int order = shadow_max_order(d);
  74.153 +
  74.154      ASSERT(shadow_locked_by_me(d));
  74.155      
  74.156      if ( d->arch.paging.shadow.total_pages 
  74.157 -         < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) )
  74.158 +         < (shadow_min_acceptable_pages(d) + (1 << order)) )
  74.159          return 0; /* Not enough shadow memory: need to increase it first */
  74.160      
  74.161      pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0));
  74.162 -    d->arch.paging.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER);
  74.163 -    d->arch.paging.shadow.total_pages -= (1<<SHADOW_MAX_ORDER);
  74.164 -    for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++)
  74.165 +    d->arch.paging.shadow.p2m_pages += (1 << order);
  74.166 +    d->arch.paging.shadow.total_pages -= (1 << order);
  74.167 +    for (i = 0; i < (1U << order); i++)
  74.168      {
  74.169          /* Unlike shadow pages, mark p2m pages as owned by the domain.
  74.170           * Marking the domain as the owner would normally allow the guest to
  74.171 @@ -1166,7 +1195,7 @@ static unsigned int sh_set_allocation(st
  74.172  {
  74.173      struct shadow_page_info *sp;
  74.174      unsigned int lower_bound;
  74.175 -    int j;
  74.176 +    unsigned int j, order = shadow_max_order(d);
  74.177  
  74.178      ASSERT(shadow_locked_by_me(d));
  74.179      
  74.180 @@ -1187,15 +1216,15 @@ static unsigned int sh_set_allocation(st
  74.181          {
  74.182              /* Need to allocate more memory from domheap */
  74.183              sp = (struct shadow_page_info *)
  74.184 -                alloc_domheap_pages(NULL, SHADOW_MAX_ORDER, 0); 
  74.185 +                alloc_domheap_pages(NULL, order, 0);
  74.186              if ( sp == NULL ) 
  74.187              { 
  74.188                  SHADOW_PRINTK("failed to allocate shadow pages.\n");
  74.189                  return -ENOMEM;
  74.190              }
  74.191 -            d->arch.paging.shadow.free_pages += 1<<SHADOW_MAX_ORDER;
  74.192 -            d->arch.paging.shadow.total_pages += 1<<SHADOW_MAX_ORDER;
  74.193 -            for ( j = 0; j < 1<<SHADOW_MAX_ORDER; j++ ) 
  74.194 +            d->arch.paging.shadow.free_pages += 1 << order;
  74.195 +            d->arch.paging.shadow.total_pages += 1 << order;
  74.196 +            for ( j = 0; j < 1U << order; j++ )
  74.197              {
  74.198                  sp[j].type = 0;  
  74.199                  sp[j].pinned = 0;
  74.200 @@ -1203,21 +1232,20 @@ static unsigned int sh_set_allocation(st
  74.201                  sp[j].mbz = 0;
  74.202                  sp[j].tlbflush_timestamp = 0; /* Not in any TLB */
  74.203              }
  74.204 -            sp->order = SHADOW_MAX_ORDER;
  74.205 -            list_add_tail(&sp->list, 
  74.206 -                          &d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]);
  74.207 +            sp->order = order;
  74.208 +            list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
  74.209          } 
  74.210          else if ( d->arch.paging.shadow.total_pages > pages ) 
  74.211          {
  74.212              /* Need to return memory to domheap */
  74.213 -            shadow_prealloc(d, SHADOW_MAX_ORDER);
  74.214 -            ASSERT(!list_empty(&d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]));
  74.215 -            sp = list_entry(d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER].next, 
  74.216 +            _shadow_prealloc(d, order, 1);
  74.217 +            ASSERT(!list_empty(&d->arch.paging.shadow.freelists[order]));
  74.218 +            sp = list_entry(d->arch.paging.shadow.freelists[order].next,
  74.219                              struct shadow_page_info, list);
  74.220              list_del(&sp->list);
  74.221 -            d->arch.paging.shadow.free_pages -= 1<<SHADOW_MAX_ORDER;
  74.222 -            d->arch.paging.shadow.total_pages -= 1<<SHADOW_MAX_ORDER;
  74.223 -            free_domheap_pages((struct page_info *)sp, SHADOW_MAX_ORDER);
  74.224 +            d->arch.paging.shadow.free_pages -= 1 << order;
  74.225 +            d->arch.paging.shadow.total_pages -= 1 << order;
  74.226 +            free_domheap_pages((struct page_info *)sp, order);
  74.227          }
  74.228  
  74.229          /* Check to see if we need to yield and try again */
    75.1 --- a/xen/arch/x86/mm/shadow/multi.c	Wed Oct 17 10:36:31 2007 -0600
    75.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Sun Oct 21 12:10:25 2007 -0600
    75.3 @@ -1690,7 +1690,7 @@ sh_make_monitor_table(struct vcpu *v)
    75.4      ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
    75.5      
    75.6      /* Guarantee we can get the memory we need */
    75.7 -    shadow_prealloc(d, SHADOW_MAX_ORDER);
    75.8 +    shadow_prealloc(d, SH_type_monitor_table, CONFIG_PAGING_LEVELS - 1);
    75.9  
   75.10  #if CONFIG_PAGING_LEVELS == 4    
   75.11      {
   75.12 @@ -2827,10 +2827,13 @@ static int sh_page_fault(struct vcpu *v,
   75.13      }
   75.14  
   75.15      /* Make sure there is enough free shadow memory to build a chain of
   75.16 -     * shadow tables: one SHADOW_MAX_ORDER chunk will always be enough
   75.17 -     * to allocate all we need.  (We never allocate a top-level shadow
   75.18 -     * on this path, only a 32b l1, pae l2+1 or 64b l3+2+1) */
   75.19 -    shadow_prealloc(d, SHADOW_MAX_ORDER);
   75.20 +     * shadow tables. (We never allocate a top-level shadow on this path,
   75.21 +     * only a 32b l1, pae l1, or 64b l3+2+1. Note that while
   75.22 +     * SH_type_l1_shadow isn't correct in the latter case, all page
   75.23 +     * tables are the same size there.) */
   75.24 +    shadow_prealloc(d,
   75.25 +                    SH_type_l1_shadow,
   75.26 +                    GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1);
   75.27  
   75.28      /* Acquire the shadow.  This must happen before we figure out the rights 
   75.29       * for the shadow entry, since we might promote a page here. */
   75.30 @@ -3086,7 +3089,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
   75.31      if ( mfn_to_shadow_page(shadow_l2e_get_mfn(sl2e))->type
   75.32           == SH_type_fl1_shadow )
   75.33      {
   75.34 -        local_flush_tlb();
   75.35 +        flush_tlb_local();
   75.36          return 0;
   75.37      }
   75.38  
   75.39 @@ -3444,7 +3447,7 @@ sh_set_toplevel_shadow(struct vcpu *v,
   75.40      if ( !mfn_valid(smfn) )
   75.41      {
   75.42          /* Make sure there's enough free shadow memory. */
   75.43 -        shadow_prealloc(d, SHADOW_MAX_ORDER); 
   75.44 +        shadow_prealloc(d, root_type, 1);
   75.45          /* Shadow the page. */
   75.46          smfn = sh_make_shadow(v, gmfn, root_type);
   75.47      }
    76.1 --- a/xen/arch/x86/mm/shadow/private.h	Wed Oct 17 10:36:31 2007 -0600
    76.2 +++ b/xen/arch/x86/mm/shadow/private.h	Sun Oct 21 12:10:25 2007 -0600
    76.3 @@ -354,7 +354,7 @@ void shadow_promote(struct vcpu *v, mfn_
    76.4  void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type);
    76.5  
    76.6  /* Shadow page allocation functions */
    76.7 -void  shadow_prealloc(struct domain *d, unsigned int order);
    76.8 +void  shadow_prealloc(struct domain *d, u32 shadow_type, unsigned int count);
    76.9  mfn_t shadow_alloc(struct domain *d, 
   76.10                      u32 shadow_type,
   76.11                      unsigned long backpointer);
    77.1 --- a/xen/arch/x86/setup.c	Wed Oct 17 10:36:31 2007 -0600
    77.2 +++ b/xen/arch/x86/setup.c	Sun Oct 21 12:10:25 2007 -0600
    77.3 @@ -104,7 +104,6 @@ unsigned long xenheap_phys_start, xenhea
    77.4  
    77.5  extern void arch_init_memory(void);
    77.6  extern void init_IRQ(void);
    77.7 -extern void trap_init(void);
    77.8  extern void early_time_init(void);
    77.9  extern void early_cpu_init(void);
   77.10  extern void vesa_init(void);
   77.11 @@ -114,7 +113,7 @@ struct tss_struct init_tss[NR_CPUS];
   77.12  
   77.13  char __attribute__ ((__section__(".bss.stack_aligned"))) cpu0_stack[STACK_SIZE];
   77.14  
   77.15 -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
   77.16 +struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1 };
   77.17  
   77.18  #if CONFIG_PAGING_LEVELS > 2
   77.19  unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
   77.20 @@ -308,6 +307,7 @@ struct boot_video_info {
   77.21      u8  rsvd_pos;           /* 0x23 */
   77.22      u16 vesapm_seg;         /* 0x24 */
   77.23      u16 vesapm_off;         /* 0x26 */
   77.24 +    u16 vesa_attrib;        /* 0x28 */
   77.25  };
   77.26  
   77.27  static void __init parse_video_info(void)
   77.28 @@ -340,6 +340,8 @@ static void __init parse_video_info(void
   77.29          vga_console_info.u.vesa_lfb.blue_size = bvi->blue_size;
   77.30          vga_console_info.u.vesa_lfb.rsvd_pos = bvi->rsvd_pos;
   77.31          vga_console_info.u.vesa_lfb.rsvd_size = bvi->rsvd_size;
   77.32 +        vga_console_info.u.vesa_lfb.gbl_caps = bvi->capabilities;
   77.33 +        vga_console_info.u.vesa_lfb.mode_attrs = bvi->vesa_attrib;
   77.34      }
   77.35  }
   77.36  
   77.37 @@ -970,6 +972,11 @@ void __init __start_xen(unsigned long mb
   77.38          if ( acpi_skip_timer_override &&
   77.39               !strstr(dom0_cmdline, "acpi_skip_timer_override") )
   77.40              safe_strcat(dom0_cmdline, " acpi_skip_timer_override");
   77.41 +        if ( (strlen(acpi_param) == 0) && acpi_disabled )
   77.42 +        {
   77.43 +            printk("ACPI is disabled, notifying Domain 0 (acpi=off)\n");
   77.44 +            safe_strcpy(acpi_param, "off");
   77.45 +        }
   77.46          if ( (strlen(acpi_param) != 0) && !strstr(dom0_cmdline, "acpi=") )
   77.47          {
   77.48              safe_strcat(dom0_cmdline, " acpi=");
    78.1 --- a/xen/arch/x86/smp.c	Wed Oct 17 10:36:31 2007 -0600
    78.2 +++ b/xen/arch/x86/smp.c	Sun Oct 21 12:10:25 2007 -0600
    78.3 @@ -164,34 +164,28 @@ void send_IPI_mask_phys(cpumask_t mask, 
    78.4  
    78.5  static DEFINE_SPINLOCK(flush_lock);
    78.6  static cpumask_t flush_cpumask;
    78.7 -static unsigned long flush_va;
    78.8 +static const void *flush_va;
    78.9 +static unsigned int flush_flags;
   78.10  
   78.11  fastcall void smp_invalidate_interrupt(void)
   78.12  {
   78.13      ack_APIC_irq();
   78.14      perfc_incr(ipis);
   78.15      irq_enter();
   78.16 -    if ( !__sync_lazy_execstate() )
   78.17 -    {
   78.18 -        if ( flush_va == FLUSHVA_ALL )
   78.19 -            local_flush_tlb();
   78.20 -        else
   78.21 -            local_flush_tlb_one(flush_va);
   78.22 -    }
   78.23 +    if ( !__sync_lazy_execstate() ||
   78.24 +         (flush_flags & (FLUSH_TLB_GLOBAL | FLUSH_CACHE)) )
   78.25 +        flush_area_local(flush_va, flush_flags);
   78.26      cpu_clear(smp_processor_id(), flush_cpumask);
   78.27      irq_exit();
   78.28  }
   78.29  
   78.30 -void __flush_tlb_mask(cpumask_t mask, unsigned long va)
   78.31 +void flush_area_mask(cpumask_t mask, const void *va, unsigned int flags)
   78.32  {
   78.33      ASSERT(local_irq_is_enabled());
   78.34 -    
   78.35 +
   78.36      if ( cpu_isset(smp_processor_id(), mask) )
   78.37      {
   78.38 -        if ( va == FLUSHVA_ALL )
   78.39 -            local_flush_tlb();
   78.40 -        else
   78.41 -            local_flush_tlb_one(va);
   78.42 +        flush_area_local(va, flags);
   78.43          cpu_clear(smp_processor_id(), mask);
   78.44      }
   78.45  
   78.46 @@ -200,6 +194,7 @@ void __flush_tlb_mask(cpumask_t mask, un
   78.47          spin_lock(&flush_lock);
   78.48          flush_cpumask = mask;
   78.49          flush_va      = va;
   78.50 +        flush_flags   = flags;
   78.51          send_IPI_mask(mask, INVALIDATE_TLB_VECTOR);
   78.52          while ( !cpus_empty(flush_cpumask) )
   78.53              cpu_relax();
   78.54 @@ -215,24 +210,13 @@ void new_tlbflush_clock_period(void)
   78.55      /* Flush everyone else. We definitely flushed just before entry. */
   78.56      allbutself = cpu_online_map;
   78.57      cpu_clear(smp_processor_id(), allbutself);
   78.58 -    __flush_tlb_mask(allbutself, FLUSHVA_ALL);
   78.59 +    flush_mask(allbutself, FLUSH_TLB);
   78.60  
   78.61      /* No need for atomicity: we are the only possible updater. */
   78.62      ASSERT(tlbflush_clock == 0);
   78.63      tlbflush_clock++;
   78.64  }
   78.65  
   78.66 -static void flush_tlb_all_pge_ipi(void *info)
   78.67 -{
   78.68 -    local_flush_tlb_pge();
   78.69 -}
   78.70 -
   78.71 -void flush_tlb_all_pge(void)
   78.72 -{
   78.73 -    smp_call_function(flush_tlb_all_pge_ipi, 0, 1, 1);
   78.74 -    local_flush_tlb_pge();
   78.75 -}
   78.76 -
   78.77  void smp_send_event_check_mask(cpumask_t mask)
   78.78  {
   78.79      cpu_clear(smp_processor_id(), mask);
    79.1 --- a/xen/arch/x86/smpboot.c	Wed Oct 17 10:36:31 2007 -0600
    79.2 +++ b/xen/arch/x86/smpboot.c	Sun Oct 21 12:10:25 2007 -0600
    79.3 @@ -492,8 +492,6 @@ void __devinit start_secondary(void *unu
    79.4  	 */
    79.5  	unsigned int cpu = booting_cpu;
    79.6  
    79.7 -	extern void percpu_traps_init(void);
    79.8 -
    79.9  	set_processor_id(cpu);
   79.10  	set_current(idle_vcpu[cpu]);
   79.11  	this_cpu(curr_vcpu) = idle_vcpu[cpu];
   79.12 @@ -518,7 +516,7 @@ void __devinit start_secondary(void *unu
   79.13  	 * low-memory mappings have been cleared, flush them from
   79.14  	 * the local TLBs too.
   79.15  	 */
   79.16 -	local_flush_tlb();
   79.17 +	flush_tlb_local();
   79.18  
   79.19  	/* This must be done before setting cpu_online_map */
   79.20  	set_cpu_sibling_map(raw_smp_processor_id());
    80.1 --- a/xen/arch/x86/sysctl.c	Wed Oct 17 10:36:31 2007 -0600
    80.2 +++ b/xen/arch/x86/sysctl.c	Sun Oct 21 12:10:25 2007 -0600
    80.3 @@ -51,10 +51,8 @@ long arch_do_sysctl(
    80.4              cpus_weight(cpu_sibling_map[0]);
    80.5          pi->cores_per_socket =
    80.6              cpus_weight(cpu_core_map[0]) / pi->threads_per_core;
    80.7 +        pi->nr_cpus = (u32)num_online_cpus();
    80.8          pi->nr_nodes = num_online_nodes();
    80.9 -        pi->sockets_per_node = num_online_cpus() / 
   80.10 -            (pi->nr_nodes * pi->cores_per_socket * pi->threads_per_core);
   80.11 -
   80.12          pi->total_pages      = total_pages;
   80.13          pi->free_pages       = avail_domheap_pages();
   80.14          pi->scrub_pages      = avail_scrub_pages();
    81.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    81.2 +++ b/xen/arch/x86/trace.c	Sun Oct 21 12:10:25 2007 -0600
    81.3 @@ -0,0 +1,231 @@
    81.4 +#include <xen/config.h>
    81.5 +#include <xen/init.h>
    81.6 +#include <xen/kernel.h>
    81.7 +#include <xen/lib.h>
    81.8 +#include <xen/domain.h>
    81.9 +#include <xen/sched.h>
   81.10 +#include <xen/trace.h>
   81.11 +
   81.12 +#ifndef __x86_64__
   81.13 +#undef TRC_PV_64_FLAG
   81.14 +#define TRC_PV_64_FLAG 0
   81.15 +#endif
   81.16 +
   81.17 +asmlinkage void trace_hypercall(void)
   81.18 +{
   81.19 +    struct cpu_user_regs *regs = guest_cpu_user_regs();
   81.20 +
   81.21 +    if ( !tb_init_done )
   81.22 +        return;
   81.23 +
   81.24 +#ifdef __x86_64__
   81.25 +    if ( is_pv_32on64_vcpu(current) )
   81.26 +    {
   81.27 +        struct {
   81.28 +            u32 eip,eax;
   81.29 +        } __attribute__((packed)) d;
   81.30 +            
   81.31 +        d.eip = regs->eip;
   81.32 +        d.eax = regs->eax;
   81.33 +
   81.34 +        __trace_var(TRC_PV_HYPERCALL, 1,
   81.35 +                    sizeof(d), (unsigned char *)&d);
   81.36 +    }
   81.37 +    else
   81.38 +#endif
   81.39 +    {
   81.40 +        struct {
   81.41 +            unsigned long eip;
   81.42 +            u32 eax;
   81.43 +        } __attribute__((packed)) d;
   81.44 +        u32 event;
   81.45 +
   81.46 +        event = TRC_PV_HYPERCALL;
   81.47 +        event |= TRC_PV_64_FLAG;
   81.48 +        d.eip = regs->eip;
   81.49 +        d.eax = regs->eax;
   81.50 +
   81.51 +        __trace_var(event, 1/*tsc*/, sizeof(d), (unsigned char*)&d);
   81.52 +    }
   81.53 +}
   81.54 +
   81.55 +void __trace_pv_trap(int trapnr, unsigned long eip,
   81.56 +                     int use_error_code, unsigned error_code)
   81.57 +{
   81.58 +    if ( !tb_init_done )
   81.59 +        return;
   81.60 +
   81.61 +#ifdef __x86_64__
   81.62 +    if ( is_pv_32on64_vcpu(current) )
   81.63 +    {
   81.64 +        struct {
   81.65 +            unsigned eip:32,
   81.66 +                trapnr:15,
   81.67 +                use_error_code:1,
   81.68 +                error_code:16;
   81.69 +        } __attribute__((packed)) d;
   81.70 +
   81.71 +        d.eip = eip;
   81.72 +        d.trapnr = trapnr;
   81.73 +        d.error_code = error_code;
   81.74 +        d.use_error_code=!!use_error_code;
   81.75 +                
   81.76 +        __trace_var(TRC_PV_TRAP, 1,
   81.77 +                    sizeof(d), (unsigned char *)&d);
   81.78 +    }
   81.79 +    else
   81.80 +#endif        
   81.81 +    {
   81.82 +        struct {
   81.83 +            unsigned long eip;
   81.84 +            unsigned trapnr:15,
   81.85 +                use_error_code:1,
   81.86 +                error_code:16;
   81.87 +        } __attribute__((packed)) d;
   81.88 +        unsigned event;
   81.89 +
   81.90 +        d.eip = eip;
   81.91 +        d.trapnr = trapnr;
   81.92 +        d.error_code = error_code;
   81.93 +        d.use_error_code=!!use_error_code;
   81.94 +                
   81.95 +        event = TRC_PV_TRAP;
   81.96 +        event |= TRC_PV_64_FLAG;
   81.97 +        __trace_var(event, 1, sizeof(d), (unsigned char *)&d);
   81.98 +    }
   81.99 +}
  81.100 +
  81.101 +void __trace_pv_page_fault(unsigned long addr, unsigned error_code)
  81.102 +{
  81.103 +    unsigned long eip = guest_cpu_user_regs()->eip;
  81.104 +
  81.105 +    if ( !tb_init_done )
  81.106 +        return;
  81.107 +
  81.108 +#ifdef __x86_64__
  81.109 +    if ( is_pv_32on64_vcpu(current) )
  81.110 +    {
  81.111 +        struct {
  81.112 +            u32 eip, addr, error_code;
  81.113 +        } __attribute__((packed)) d;
  81.114 +
  81.115 +        d.eip = eip;
  81.116 +        d.addr = addr;
  81.117 +        d.error_code = error_code;
  81.118 +                
  81.119 +        __trace_var(TRC_PV_PAGE_FAULT, 1, sizeof(d), (unsigned char *)&d);
  81.120 +    }
  81.121 +    else
  81.122 +#endif        
  81.123 +    {
  81.124 +        struct {
  81.125 +            unsigned long eip, addr;
  81.126 +            u32 error_code;
  81.127 +        } __attribute__((packed)) d;
  81.128 +        unsigned event;
  81.129 +
  81.130 +        d.eip = eip;
  81.131 +        d.addr = addr;
  81.132 +        d.error_code = error_code;
  81.133 +        event = TRC_PV_PAGE_FAULT;
  81.134 +        event |= TRC_PV_64_FLAG;
  81.135 +        __trace_var(event, 1, sizeof(d), (unsigned char *)&d);
  81.136 +    }
  81.137 +}
  81.138 +
  81.139 +void __trace_trap_one_addr(unsigned event, unsigned long va)
  81.140 +{
  81.141 +    if ( !tb_init_done )
  81.142 +        return;
  81.143 +
  81.144 +#ifdef __x86_64__
  81.145 +    if ( is_pv_32on64_vcpu(current) )
  81.146 +    {
  81.147 +        u32 d = va;
  81.148 +        __trace_var(event, 1, sizeof(d), (unsigned char *)&d);
  81.149 +    }
  81.150 +    else
  81.151 +#endif        
  81.152 +    {
  81.153 +        event |= TRC_PV_64_FLAG;
  81.154 +        __trace_var(event, 1, sizeof(va), (unsigned char *)&va);
  81.155 +    }
  81.156 +}
  81.157 +
  81.158 +void __trace_trap_two_addr(unsigned event, unsigned long va1,
  81.159 +                           unsigned long va2)
  81.160 +{
  81.161 +    if ( !tb_init_done )
  81.162 +        return;
  81.163 +
  81.164 +#ifdef __x86_64__
  81.165 +    if ( is_pv_32on64_vcpu(current) )
  81.166 +    {
  81.167 +        struct {
  81.168 +            u32 va1, va2;
  81.169 +        } __attribute__((packed)) d;
  81.170 +        d.va1=va1;
  81.171 +        d.va2=va2;
  81.172 +        __trace_var(event, 1, sizeof(d), (unsigned char *)&d);
  81.173 +    }
  81.174 +    else
  81.175 +#endif        
  81.176 +    {
  81.177 +        struct {
  81.178 +            unsigned long va1, va2;
  81.179 +        } __attribute__((packed)) d;
  81.180 +        d.va1=va1;
  81.181 +        d.va2=va2;
  81.182 +        event |= TRC_PV_64_FLAG;
  81.183 +        __trace_var(event, 1, sizeof(d), (unsigned char *)&d);
  81.184 +    }
  81.185 +}
  81.186 +
  81.187 +void __trace_ptwr_emulation(unsigned long addr, l1_pgentry_t npte)
  81.188 +{
  81.189 +    unsigned long eip = guest_cpu_user_regs()->eip;
  81.190 +
  81.191 +    if ( !tb_init_done )
  81.192 +        return;
  81.193 +
  81.194 +    /* We have a couple of different modes to worry about:
  81.195 +     * - 32-on-32: 32-bit pte, 32-bit virtual addresses
  81.196 +     * - pae-on-pae, pae-on-64: 64-bit pte, 32-bit virtual addresses
  81.197 +     * - 64-on-64: 64-bit pte, 64-bit virtual addresses
  81.198 +     * pae-on-64 is the only one that requires extra code; in all other
  81.199 +     * cases, "unsigned long" is the size of a guest virtual address.
  81.200 +     */
  81.201 +
  81.202 +#ifdef __x86_64__
  81.203 +    if ( is_pv_32on64_vcpu(current) )
  81.204 +    {
  81.205 +        struct {
  81.206 +            l1_pgentry_t pte;
  81.207 +            u32 addr, eip;
  81.208 +        } __attribute__((packed)) d;
  81.209 +        d.addr = addr;
  81.210 +        d.eip = eip;
  81.211 +        d.pte = npte;
  81.212 +
  81.213 +        __trace_var(TRC_PV_PTWR_EMULATION_PAE, 1,
  81.214 +                    sizeof(d), (unsigned char *)&d);
  81.215 +    }
  81.216 +    else
  81.217 +#endif        
  81.218 +    {
  81.219 +        struct {
  81.220 +            l1_pgentry_t pte;
  81.221 +            unsigned long addr, eip;
  81.222 +        } d;
  81.223 +        unsigned event;
  81.224 +
  81.225 +        d.addr = addr;
  81.226 +        d.eip = eip;
  81.227 +        d.pte = npte;
  81.228 +
  81.229 +        event = ((CONFIG_PAGING_LEVELS == 3) ?
  81.230 +                 TRC_PV_PTWR_EMULATION_PAE : TRC_PV_PTWR_EMULATION);
  81.231 +        event |= TRC_PV_64_FLAG;
  81.232 +        __trace_var(event, 1/*tsc*/, sizeof(d), (unsigned char *)&d);
  81.233 +    }
  81.234 +}
    82.1 --- a/xen/arch/x86/traps.c	Wed Oct 17 10:36:31 2007 -0600
    82.2 +++ b/xen/arch/x86/traps.c	Sun Oct 21 12:10:25 2007 -0600
    82.3 @@ -46,6 +46,7 @@
    82.4  #include <xen/nmi.h>
    82.5  #include <xen/version.h>
    82.6  #include <xen/kexec.h>
    82.7 +#include <xen/trace.h>
    82.8  #include <asm/paging.h>
    82.9  #include <asm/system.h>
   82.10  #include <asm/io.h>
   82.11 @@ -75,6 +76,8 @@ char opt_nmi[10] = "fatal";
   82.12  #endif
   82.13  string_param("nmi", opt_nmi);
   82.14  
   82.15 +DEFINE_PER_CPU(u32, ler_msr);
   82.16 +
   82.17  /* Master table, used by CPU0. */
   82.18  idt_entry_t idt_table[IDT_ENTRIES];
   82.19  
   82.20 @@ -111,6 +114,9 @@ unsigned long do_get_debugreg(int reg);
   82.21  static int debug_stack_lines = 20;
   82.22  integer_param("debug_stack_lines", debug_stack_lines);
   82.23  
   82.24 +static int opt_ler;
   82.25 +boolean_param("ler", opt_ler);
   82.26 +
   82.27  #ifdef CONFIG_X86_32
   82.28  #define stack_words_per_line 8
   82.29  #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)&regs->esp)
   82.30 @@ -380,6 +386,8 @@ static int do_guest_trap(
   82.31      struct trap_bounce *tb;
   82.32      const struct trap_info *ti;
   82.33  
   82.34 +    trace_pv_trap(trapnr, regs->eip, use_error_code, regs->error_code);
   82.35 +
   82.36      tb = &v->arch.trap_bounce;
   82.37      ti = &v->arch.guest_context.trap_ctxt[trapnr];
   82.38  
   82.39 @@ -633,6 +641,8 @@ static int emulate_forced_invalid_op(str
   82.40      regs->eip = eip;
   82.41      regs->eflags &= ~X86_EFLAGS_RF;
   82.42  
   82.43 +    trace_trap_one_addr(TRC_PV_FORCED_INVALID_OP, regs->eip);
   82.44 +
   82.45      return EXCRET_fault_fixed;
   82.46  }
   82.47  
   82.48 @@ -752,6 +762,8 @@ void propagate_page_fault(unsigned long 
   82.49      if ( !guest_kernel_mode(v, guest_cpu_user_regs()) )
   82.50          error_code |= PFEC_user_mode;
   82.51  
   82.52 +    trace_pv_page_fault(addr, error_code);
   82.53 +
   82.54      ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
   82.55      tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
   82.56      tb->error_code = error_code;
   82.57 @@ -783,7 +795,13 @@ static int handle_gdt_ldt_mapping_fault(
   82.58      if ( likely(is_ldt_area) )
   82.59      {
   82.60          /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
   82.61 -        if ( unlikely(map_ldt_shadow_page(offset >> PAGE_SHIFT) == 0) )
   82.62 +        if ( likely(map_ldt_shadow_page(offset >> PAGE_SHIFT)) )
   82.63 +        {
   82.64 +            if ( guest_mode(regs) )
   82.65 +                trace_trap_two_addr(TRC_PV_GDT_LDT_MAPPING_FAULT,
   82.66 +                                    regs->eip, offset);
   82.67 +        }
   82.68 +        else
   82.69          {
   82.70              /* In hypervisor mode? Leave it to the #PF handler to fix up. */
   82.71              if ( !guest_mode(regs) )
   82.72 @@ -939,7 +957,12 @@ static int fixup_page_fault(unsigned lon
   82.73      if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
   82.74      {
   82.75          if ( paging_mode_external(d) && guest_mode(regs) )
   82.76 -            return paging_fault(addr, regs);
   82.77 +        {
   82.78 +            int ret = paging_fault(addr, regs);
   82.79 +            if ( ret == EXCRET_fault_fixed )
   82.80 +                trace_trap_two_addr(TRC_PV_PAGING_FIXUP, regs->eip, addr);
   82.81 +            return ret;
   82.82 +        }
   82.83          if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
   82.84              return handle_gdt_ldt_mapping_fault(
   82.85                  addr - GDT_LDT_VIRT_START, regs);
   82.86 @@ -955,7 +978,12 @@ static int fixup_page_fault(unsigned lon
   82.87          return EXCRET_fault_fixed;
   82.88  
   82.89      if ( paging_mode_enabled(d) )
   82.90 -        return paging_fault(addr, regs);
   82.91 +    {
   82.92 +        int ret = paging_fault(addr, regs);
   82.93 +        if ( ret == EXCRET_fault_fixed )
   82.94 +            trace_trap_two_addr(TRC_PV_PAGING_FIXUP, regs->eip, addr);
   82.95 +        return ret;
   82.96 +    }
   82.97  
   82.98      return 0;
   82.99  }
  82.100 @@ -1872,13 +1900,19 @@ asmlinkage int do_general_protection(str
  82.101      /* Emulate some simple privileged and I/O instructions. */
  82.102      if ( (regs->error_code == 0) &&
  82.103           emulate_privileged_op(regs) )
  82.104 +    {
  82.105 +        trace_trap_one_addr(TRC_PV_EMULATE_PRIVOP, regs->eip);
  82.106          return 0;
  82.107 +    }
  82.108  
  82.109  #if defined(__i386__)
  82.110      if ( VM_ASSIST(v->domain, VMASST_TYPE_4gb_segments) && 
  82.111           (regs->error_code == 0) && 
  82.112           gpf_emulate_4gb(regs) )
  82.113 +    {
  82.114 +        TRACE_1D(TRC_PV_EMULATE_4GB, regs->eip);
  82.115          return 0;
  82.116 +    }
  82.117  #endif
  82.118  
  82.119      /* Pass on GPF as is. */
  82.120 @@ -2030,6 +2064,8 @@ asmlinkage int do_device_not_available(s
  82.121          do_guest_trap(TRAP_no_device, regs, 0);
  82.122          current->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
  82.123      }
  82.124 +    else
  82.125 +        TRACE_0D(TRC_PV_MATH_STATE_RESTORE);
  82.126  
  82.127      return EXCRET_fault_fixed;
  82.128  }
  82.129 @@ -2067,9 +2103,12 @@ asmlinkage int do_debug(struct cpu_user_
  82.130      /* Save debug status register where guest OS can peek at it */
  82.131      v->arch.guest_context.debugreg[6] = condition;
  82.132  
  82.133 +    ler_enable();
  82.134 +
  82.135      return do_guest_trap(TRAP_debug, regs, 0);
  82.136  
  82.137   out:
  82.138 +    ler_enable();
  82.139      return EXCRET_not_a_fault;
  82.140  }
  82.141  
  82.142 @@ -2115,10 +2154,43 @@ void set_tss_desc(unsigned int n, void *
  82.143  #endif
  82.144  }
  82.145  
  82.146 +void __devinit percpu_traps_init(void)
  82.147 +{
  82.148 +    subarch_percpu_traps_init();
  82.149 +
  82.150 +    if ( !opt_ler )
  82.151 +        return;
  82.152 +
  82.153 +    switch ( boot_cpu_data.x86_vendor )
  82.154 +    {
  82.155 +    case X86_VENDOR_INTEL:
  82.156 +        switch ( boot_cpu_data.x86 )
  82.157 +        {
  82.158 +        case 6:
  82.159 +            this_cpu(ler_msr) = MSR_IA32_LASTINTFROMIP;
  82.160 +            break;
  82.161 +        case 15:
  82.162 +            this_cpu(ler_msr) = MSR_P4_LER_FROM_LIP;
  82.163 +            break;
  82.164 +        }
  82.165 +        break;
  82.166 +    case X86_VENDOR_AMD:
  82.167 +        switch ( boot_cpu_data.x86 )
  82.168 +        {
  82.169 +        case 6:
  82.170 +        case 15:
  82.171 +        case 16:
  82.172 +            this_cpu(ler_msr) = MSR_IA32_LASTINTFROMIP;
  82.173 +            break;
  82.174 +        }
  82.175 +        break;
  82.176 +    }
  82.177 +
  82.178 +    ler_enable();
  82.179 +}
  82.180 +
  82.181  void __init trap_init(void)
  82.182  {
  82.183 -    extern void percpu_traps_init(void);
  82.184 -
  82.185      /*
  82.186       * Note that interrupt gates are always used, rather than trap gates. We 
  82.187       * must have interrupts disabled until DS/ES/FS/GS are saved because the 
    83.1 --- a/xen/arch/x86/x86_32/domain_page.c	Wed Oct 17 10:36:31 2007 -0600
    83.2 +++ b/xen/arch/x86/x86_32/domain_page.c	Sun Oct 21 12:10:25 2007 -0600
    83.3 @@ -78,7 +78,7 @@ void *map_domain_page(unsigned long mfn)
    83.4          if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
    83.5          {
    83.6              perfc_incr(domain_page_tlb_flush);
    83.7 -            local_flush_tlb();
    83.8 +            flush_tlb_local();
    83.9          }
   83.10      }
   83.11  
   83.12 @@ -94,7 +94,7 @@ void *map_domain_page(unsigned long mfn)
   83.13  
   83.14          /* /Second/, flush TLBs. */
   83.15          perfc_incr(domain_page_tlb_flush);
   83.16 -        local_flush_tlb();
   83.17 +        flush_tlb_local();
   83.18          vcache->shadow_epoch = ++dcache->epoch;
   83.19          dcache->tlbflush_timestamp = tlbflush_current_time();
   83.20  
    84.1 --- a/xen/arch/x86/x86_32/entry.S	Wed Oct 17 10:36:31 2007 -0600
    84.2 +++ b/xen/arch/x86/x86_32/entry.S	Sun Oct 21 12:10:25 2007 -0600
    84.3 @@ -194,6 +194,12 @@ 1:      sti
    84.4          pushl 20(%esp) # ECX
    84.5          pushl 20(%esp) # EBX
    84.6  #endif
    84.7 +        cmpb  $0,tb_init_done
    84.8 +        je    tracing_off
    84.9 +        call  trace_hypercall
   84.10 +        /* Now restore all the registers that trace_hypercall clobbered */
   84.11 +        movl  UREGS_eax+24(%esp),%eax /* Hypercall # */
   84.12 +tracing_off:
   84.13          call *hypercall_table(,%eax,4)
   84.14          addl  $24,%esp     # Discard the shadow parameters
   84.15  #ifndef NDEBUG
    85.1 --- a/xen/arch/x86/x86_32/mm.c	Wed Oct 17 10:36:31 2007 -0600
    85.2 +++ b/xen/arch/x86/x86_32/mm.c	Sun Oct 21 12:10:25 2007 -0600
    85.3 @@ -152,7 +152,7 @@ void __init zap_low_mappings(l2_pgentry_
    85.4      /* Now zap mappings in the idle pagetables. */
    85.5      destroy_xen_mappings(0, HYPERVISOR_VIRT_START);
    85.6  
    85.7 -    flush_tlb_all_pge();
    85.8 +    flush_all(FLUSH_TLB_GLOBAL);
    85.9  
   85.10      /* Replace with mapping of the boot trampoline only. */
   85.11      map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
    86.1 --- a/xen/arch/x86/x86_32/traps.c	Wed Oct 17 10:36:31 2007 -0600
    86.2 +++ b/xen/arch/x86/x86_32/traps.c	Sun Oct 21 12:10:25 2007 -0600
    86.3 @@ -104,6 +104,14 @@ void show_registers(struct cpu_user_regs
    86.4             "ss: %04x   cs: %04x\n",
    86.5             fault_regs.ds, fault_regs.es, fault_regs.fs,
    86.6             fault_regs.gs, fault_regs.ss, fault_regs.cs);
    86.7 +
    86.8 +    if ( this_cpu(ler_msr) && !guest_mode(regs) )
    86.9 +    {
   86.10 +        u32 from, to, hi;
   86.11 +        rdmsr(this_cpu(ler_msr), from, hi);
   86.12 +        rdmsr(this_cpu(ler_msr) + 1, to, hi);
   86.13 +        printk("ler: %08x -> %08x\n", from, to);
   86.14 +    }
   86.15  }
   86.16  
   86.17  void show_page_walk(unsigned long addr)
   86.18 @@ -250,7 +258,7 @@ unsigned long do_iret(void)
   86.19      return 0;
   86.20  }
   86.21  
   86.22 -void __devinit percpu_traps_init(void)
   86.23 +void __devinit subarch_percpu_traps_init(void)
   86.24  {
   86.25      struct tss_struct *tss = &doublefault_tss;
   86.26      asmlinkage int hypercall(void);
    87.1 --- a/xen/arch/x86/x86_64/compat/entry.S	Wed Oct 17 10:36:31 2007 -0600
    87.2 +++ b/xen/arch/x86/x86_64/compat/entry.S	Sun Oct 21 12:10:25 2007 -0600
    87.3 @@ -56,6 +56,18 @@ ENTRY(compat_hypercall)
    87.4          movl  %ebp,%r9d              /* Arg 6        */
    87.5          movl  UREGS_rbx(%rsp),%edi   /* Arg 1        */
    87.6  #endif
    87.7 +        cmpb  $0,tb_init_done(%rip)
    87.8 +        je    compat_tracing_off
    87.9 +        call  trace_hypercall
   87.10 +        /* Now restore all the registers that trace_hypercall clobbered */
   87.11 +        movl  UREGS_rax(%rsp),%eax   /* Hypercall #  */
   87.12 +        movl  UREGS_rbx(%rsp),%edi   /* Arg 1        */
   87.13 +        movl  UREGS_rcx(%rsp),%esi   /* Arg 2        */
   87.14 +        movl  UREGS_rdx(%rsp),%edx   /* Arg 3        */
   87.15 +        movl  UREGS_rsi(%rsp),%ecx   /* Arg 4        */
   87.16 +        movl  UREGS_rdi(%rsp),%r8d   /* Arg 5        */
   87.17 +        movl  UREGS_rbp(%rsp),%r9d   /* Arg 6        */
   87.18 +compat_tracing_off:
   87.19          leaq  compat_hypercall_table(%rip),%r10
   87.20          PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
   87.21          callq *(%r10,%rax,8)
    88.1 --- a/xen/arch/x86/x86_64/compat/mm.c	Wed Oct 17 10:36:31 2007 -0600
    88.2 +++ b/xen/arch/x86/x86_64/compat/mm.c	Sun Oct 21 12:10:25 2007 -0600
    88.3 @@ -31,7 +31,7 @@ int compat_set_gdt(XEN_GUEST_HANDLE(uint
    88.4      LOCK_BIGLOCK(current->domain);
    88.5  
    88.6      if ( (ret = set_gdt(current, frames, entries)) == 0 )
    88.7 -        local_flush_tlb();
    88.8 +        flush_tlb_local();
    88.9  
   88.10      UNLOCK_BIGLOCK(current->domain);
   88.11  
    89.1 --- a/xen/arch/x86/x86_64/entry.S	Wed Oct 17 10:36:31 2007 -0600
    89.2 +++ b/xen/arch/x86/x86_64/entry.S	Sun Oct 21 12:10:25 2007 -0600
    89.3 @@ -106,7 +106,7 @@ restore_all_xen:
    89.4   * When entering SYSCALL from kernel mode:
    89.5   *  %rax                            = hypercall vector
    89.6   *  %rdi, %rsi, %rdx, %r10, %r8, %9 = hypercall arguments
    89.7 - *  %r11, %rcx                      = SYSCALL-saved %rflags and %rip
    89.8 + *  %rcx                            = SYSCALL-saved %rip
    89.9   *  NB. We must move %r10 to %rcx for C function-calling ABI.
   89.10   *
   89.11   * When entering SYSCALL from user mode:
   89.12 @@ -125,6 +125,7 @@ ENTRY(syscall_enter)
   89.13          pushq %rcx
   89.14          pushq $0
   89.15          movl  $TRAP_syscall,4(%rsp)
   89.16 +        movq  24(%rsp),%r11 /* Re-load user RFLAGS into %r11 before SAVE_ALL */
   89.17          SAVE_ALL
   89.18          GET_CURRENT(%rbx)
   89.19          testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
   89.20 @@ -148,6 +149,18 @@ ENTRY(syscall_enter)
   89.21          pushq %rax
   89.22          pushq UREGS_rip+8(%rsp)
   89.23  #endif
   89.24 +        cmpb  $0,tb_init_done(%rip)
   89.25 +        je    tracing_off
   89.26 +        call  trace_hypercall
   89.27 +        /* Now restore all the registers that trace_hypercall clobbered */
   89.28 +        movq  UREGS_rax(%rsp),%rax   /* Hypercall #  */
   89.29 +        movq  UREGS_rdi(%rsp),%rdi   /* Arg 1        */
   89.30 +        movq  UREGS_rsi(%rsp),%rsi   /* Arg 2        */
   89.31 +        movq  UREGS_rdx(%rsp),%rdx   /* Arg 3        */
   89.32 +        movq  UREGS_r10(%rsp),%rcx   /* Arg 4        */
   89.33 +        movq  UREGS_rdi(%rsp),%r8    /* Arg 5        */
   89.34 +        movq  UREGS_rbp(%rsp),%r9    /* Arg 6        */
   89.35 +tracing_off:
   89.36          leaq  hypercall_table(%rip),%r10
   89.37          PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
   89.38          callq *(%r10,%rax,8)
    90.1 --- a/xen/arch/x86/x86_64/mm.c	Wed Oct 17 10:36:31 2007 -0600
    90.2 +++ b/xen/arch/x86/x86_64/mm.c	Sun Oct 21 12:10:25 2007 -0600
    90.3 @@ -205,7 +205,7 @@ void __init zap_low_mappings(void)
    90.4  
    90.5      /* Remove aliased mapping of first 1:1 PML4 entry. */
    90.6      l4e_write(&idle_pg_table[0], l4e_empty());
    90.7 -    local_flush_tlb_pge();
    90.8 +    flush_local(FLUSH_TLB_GLOBAL);
    90.9  
   90.10      /* Replace with mapping of the boot trampoline only. */
   90.11      map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
    91.1 --- a/xen/arch/x86/x86_64/traps.c	Wed Oct 17 10:36:31 2007 -0600
    91.2 +++ b/xen/arch/x86/x86_64/traps.c	Sun Oct 21 12:10:25 2007 -0600
    91.3 @@ -112,6 +112,14 @@ void show_registers(struct cpu_user_regs
    91.4             "ss: %04x   cs: %04x\n",
    91.5             fault_regs.ds, fault_regs.es, fault_regs.fs,
    91.6             fault_regs.gs, fault_regs.ss, fault_regs.cs);
    91.7 +
    91.8 +    if ( this_cpu(ler_msr) && !guest_mode(regs) )
    91.9 +    {
   91.10 +        u64 from, to;
   91.11 +        rdmsrl(this_cpu(ler_msr), from);
   91.12 +        rdmsrl(this_cpu(ler_msr) + 1, to);
   91.13 +        printk("ler: %016lx -> %016lx\n", from, to);
   91.14 +    }
   91.15  }
   91.16  
   91.17  void show_page_walk(unsigned long addr)
   91.18 @@ -302,7 +310,7 @@ static int write_stack_trampoline(
   91.19      return 34;
   91.20  }
   91.21  
   91.22 -void __devinit percpu_traps_init(void)
   91.23 +void __devinit subarch_percpu_traps_init(void)
   91.24  {
   91.25      char *stack_bottom, *stack;
   91.26      int   cpu = smp_processor_id();
    92.1 --- a/xen/arch/x86/x86_emulate.c	Wed Oct 17 10:36:31 2007 -0600
    92.2 +++ b/xen/arch/x86/x86_emulate.c	Sun Oct 21 12:10:25 2007 -0600
    92.3 @@ -299,21 +299,21 @@ struct operand {
    92.4  #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
    92.5  
    92.6  /* Before executing instruction: restore necessary bits in EFLAGS. */
    92.7 -#define _PRE_EFLAGS(_sav, _msk, _tmp)           \
    92.8 -/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); */\
    92.9 -"push %"_sav"; "                                \
   92.10 -"movl %"_msk",%"_LO32 _tmp"; "                  \
   92.11 -"andl %"_LO32 _tmp",("_STK"); "                 \
   92.12 -"pushf; "                                       \
   92.13 -"notl %"_LO32 _tmp"; "                          \
   92.14 -"andl %"_LO32 _tmp",("_STK"); "                 \
   92.15 -"pop  %"_tmp"; "                                \
   92.16 -"orl  %"_LO32 _tmp",("_STK"); "                 \
   92.17 -"popf; "                                        \
   92.18 -/* _sav &= ~msk; */                             \
   92.19 -"movl %"_msk",%"_LO32 _tmp"; "                  \
   92.20 -"notl %"_LO32 _tmp"; "                          \
   92.21 -"andl %"_LO32 _tmp",%"_sav"; "
   92.22 +#define _PRE_EFLAGS(_sav, _msk, _tmp)                           \
   92.23 +/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
   92.24 +"movl %"_sav",%"_LO32 _tmp"; "                                  \
   92.25 +"push %"_tmp"; "                                                \
   92.26 +"push %"_tmp"; "                                                \
   92.27 +"movl %"_msk",%"_LO32 _tmp"; "                                  \
   92.28 +"andl %"_LO32 _tmp",("_STK"); "                                 \
   92.29 +"pushf; "                                                       \
   92.30 +"notl %"_LO32 _tmp"; "                                          \
   92.31 +"andl %"_LO32 _tmp",("_STK"); "                                 \
   92.32 +"andl %"_LO32 _tmp","STR(BITS_PER_LONG/4)"("_STK"); "           \
   92.33 +"pop  %"_tmp"; "                                                \
   92.34 +"orl  %"_LO32 _tmp",("_STK"); "                                 \
   92.35 +"popf; "                                                        \
   92.36 +"pop  %"_sav"; "
   92.37  
   92.38  /* After executing instruction: write-back necessary bits in EFLAGS. */
   92.39  #define _POST_EFLAGS(_sav, _msk, _tmp)          \
    93.1 --- a/xen/common/Makefile	Wed Oct 17 10:36:31 2007 -0600
    93.2 +++ b/xen/common/Makefile	Sun Oct 21 12:10:25 2007 -0600
    93.3 @@ -35,6 +35,9 @@ obj-$(CONFIG_XENCOMM) += xencomm.o
    93.4  
    93.5  subdir-$(CONFIG_COMPAT) += compat
    93.6  
    93.7 +subdir-$(x86_32) += hvm
    93.8 +subdir-$(x86_64) += hvm
    93.9 +
   93.10  subdir-y += libelf
   93.11  
   93.12  # Object file contains changeset and compiler information.
    94.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    94.2 +++ b/xen/common/hvm/Makefile	Sun Oct 21 12:10:25 2007 -0600
    94.3 @@ -0,0 +1,1 @@
    94.4 +obj-y += save.o
    95.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    95.2 +++ b/xen/common/hvm/save.c	Sun Oct 21 12:10:25 2007 -0600
    95.3 @@ -0,0 +1,218 @@
    95.4 +/*
    95.5 + * hvm/save.c: Save and restore HVM guest's emulated hardware state.
    95.6 + *
    95.7 + * Copyright (c) 2004, Intel Corporation.
    95.8 + * Copyright (c) 2007, XenSource Inc.
    95.9 + * Copyright (c) 2007, Isaku Yamahata <yamahata at valinux co jp>
   95.10 + *                     VA Linux Systems Japan K.K.
   95.11 + *                     split arch generic part
   95.12 + *
   95.13 + * This program is free software; you can redistribute it and/or modify it
   95.14 + * under the terms and conditions of the GNU General Public License,
   95.15 + * version 2, as published by the Free Software Foundation.
   95.16 + *
   95.17 + * This program is distributed in the hope it will be useful, but WITHOUT
   95.18 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   95.19 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   95.20 + * more details.
   95.21 + *
   95.22 + * You should have received a copy of the GNU General Public License along with
   95.23 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   95.24 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   95.25 + */
   95.26 +
   95.27 +#include <xen/config.h>
   95.28 +#include <xen/lib.h>
   95.29 +#include <xen/version.h>
   95.30 +#include <public/version.h>
   95.31 +#include <xen/sched.h>
   95.32 +
   95.33 +#include <asm/hvm/support.h>
   95.34 +
   95.35 +/* List of handlers for various HVM save and restore types */
   95.36 +static struct { 
   95.37 +    hvm_save_handler save;
   95.38 +    hvm_load_handler load; 
   95.39 +    const char *name;
   95.40 +    size_t size;
   95.41 +    int kind;
   95.42 +} hvm_sr_handlers [HVM_SAVE_CODE_MAX + 1] = {{NULL, NULL, "<?>"},};
   95.43 +
   95.44 +/* Init-time function to add entries to that list */
   95.45 +void hvm_register_savevm(uint16_t typecode, 
   95.46 +                         const char *name,
   95.47 +                         hvm_save_handler save_state,
   95.48 +                         hvm_load_handler load_state,
   95.49 +                         size_t size, int kind)
   95.50 +{
   95.51 +    ASSERT(typecode <= HVM_SAVE_CODE_MAX);
   95.52 +    ASSERT(hvm_sr_handlers[typecode].save == NULL);
   95.53 +    ASSERT(hvm_sr_handlers[typecode].load == NULL);
   95.54 +    hvm_sr_handlers[typecode].save = save_state;
   95.55 +    hvm_sr_handlers[typecode].load = load_state;
   95.56 +    hvm_sr_handlers[typecode].name = name;
   95.57 +    hvm_sr_handlers[typecode].size = size;
   95.58 +    hvm_sr_handlers[typecode].kind = kind;
   95.59 +}
   95.60 +
   95.61 +size_t hvm_save_size(struct domain *d) 
   95.62 +{
   95.63 +    struct vcpu *v;
   95.64 +    size_t sz;
   95.65 +    int i;
   95.66 +    
   95.67 +    /* Basic overhead for header and footer */
   95.68 +    sz = (2 * sizeof (struct hvm_save_descriptor)) + HVM_SAVE_LENGTH(HEADER);
   95.69 +
   95.70 +    /* Plus space for each thing we will be saving */
   95.71 +    for ( i = 0; i <= HVM_SAVE_CODE_MAX; i++ ) 
   95.72 +        if ( hvm_sr_handlers[i].kind == HVMSR_PER_VCPU )
   95.73 +            for_each_vcpu(d, v)
   95.74 +                sz += hvm_sr_handlers[i].size;
   95.75 +        else 
   95.76 +            sz += hvm_sr_handlers[i].size;
   95.77 +
   95.78 +    return sz;
   95.79 +}
   95.80 +
   95.81 +
   95.82 +int hvm_save(struct domain *d, hvm_domain_context_t *h)
   95.83 +{
   95.84 +    char *c;
   95.85 +    struct hvm_save_header hdr;
   95.86 +    struct hvm_save_end end;
   95.87 +    hvm_save_handler handler;
   95.88 +    uint16_t i;
   95.89 +
   95.90 +    hdr.magic = HVM_FILE_MAGIC;
   95.91 +    hdr.version = HVM_FILE_VERSION;
   95.92 +
   95.93 +    /* Save xen changeset */
   95.94 +    c = strrchr(xen_changeset(), ':');
   95.95 +    if ( c )
   95.96 +        hdr.changeset = simple_strtoll(c, NULL, 16);
   95.97 +    else 
   95.98 +        hdr.changeset = -1ULL; /* Unknown */
   95.99 +
  95.100 +    arch_hvm_save(&hdr);
  95.101 +
  95.102 +    if ( hvm_save_entry(HEADER, 0, h, &hdr) != 0 )
  95.103 +    {
  95.104 +        gdprintk(XENLOG_ERR, "HVM save: failed to write header\n");
  95.105 +        return -EFAULT;
  95.106 +    } 
  95.107 +
  95.108 +    /* Save all available kinds of state */
  95.109 +    for ( i = 0; i <= HVM_SAVE_CODE_MAX; i++ ) 
  95.110 +    {
  95.111 +        handler = hvm_sr_handlers[i].save;
  95.112 +        if ( handler != NULL ) 
  95.113 +        {
  95.114 +            gdprintk(XENLOG_INFO, "HVM save: %s\n",  hvm_sr_handlers[i].name);
  95.115 +            if ( handler(d, h) != 0 ) 
  95.116 +            {
  95.117 +                gdprintk(XENLOG_ERR, 
  95.118 +                         "HVM save: failed to save type %"PRIu16"\n", i);
  95.119 +                return -EFAULT;
  95.120 +            } 
  95.121 +        }
  95.122 +    }
  95.123 +
  95.124 +    /* Save an end-of-file marker */
  95.125 +    if ( hvm_save_entry(END, 0, h, &end) != 0 )
  95.126 +    {
  95.127 +        /* Run out of data */
  95.128 +        gdprintk(XENLOG_ERR, "HVM save: no room for end marker.\n");
  95.129 +        return -EFAULT;
  95.130 +    }
  95.131 +
  95.132 +    /* Save macros should not have let us overrun */
  95.133 +    ASSERT(h->cur <= h->size);
  95.134 +    return 0;
  95.135 +}
  95.136 +
  95.137 +int hvm_load(struct domain *d, hvm_domain_context_t *h)
  95.138 +{
  95.139 +    char *c;
  95.140 +    uint64_t cset;
  95.141 +    struct hvm_save_header hdr;
  95.142 +    struct hvm_save_descriptor *desc;
  95.143 +    hvm_load_handler handler;
  95.144 +    struct vcpu *v;
  95.145 +    
  95.146 +    /* Read the save header, which must be first */
  95.147 +    if ( hvm_load_entry(HEADER, h, &hdr) != 0 ) 
  95.148 +        return -1;
  95.149 +
  95.150 +    if ( arch_hvm_load(&hdr) )
  95.151 +        return -1;
  95.152 +
  95.153 +    c = strrchr(xen_changeset(), ':');
  95.154 +    if ( hdr.changeset == -1ULL )
  95.155 +        gdprintk(XENLOG_WARNING, 
  95.156 +                 "HVM restore: Xen changeset was not saved.\n");
  95.157 +    else if ( c == NULL )
  95.158 +        gdprintk(XENLOG_WARNING, 
  95.159 +                 "HVM restore: Xen changeset is not available.\n");
  95.160 +    else
  95.161 +    {
  95.162 +        cset = simple_strtoll(c, NULL, 16);
  95.163 +        if ( hdr.changeset != cset )
  95.164 +        gdprintk(XENLOG_WARNING, "HVM restore: saved Xen changeset (%#"PRIx64
  95.165 +                 ") does not match host (%#"PRIx64").\n", hdr.changeset, cset);
  95.166 +    }
  95.167 +
  95.168 +    /* Down all the vcpus: we only re-enable the ones that had state saved. */
  95.169 +    for_each_vcpu(d, v) 
  95.170 +        if ( test_and_set_bit(_VPF_down, &v->pause_flags) )
  95.171 +            vcpu_sleep_nosync(v);
  95.172 +
  95.173 +    for ( ; ; )
  95.174 +    {
  95.175 +        if ( h->size - h->cur < sizeof(struct hvm_save_descriptor) )
  95.176 +        {
  95.177 +            /* Run out of data */
  95.178 +            gdprintk(XENLOG_ERR, 
  95.179 +                     "HVM restore: save did not end with a null entry\n");
  95.180 +            return -1;
  95.181 +        }
  95.182 +        
  95.183 +        /* Read the typecode of the next entry  and check for the end-marker */
  95.184 +        desc = (struct hvm_save_descriptor *)(&h->data[h->cur]);
  95.185 +        if ( desc->typecode == 0 )
  95.186 +            return 0; 
  95.187 +        
  95.188 +        /* Find the handler for this entry */
  95.189 +        if ( (desc->typecode > HVM_SAVE_CODE_MAX) ||
  95.190 +             ((handler = hvm_sr_handlers[desc->typecode].load) == NULL) )
  95.191 +        {
  95.192 +            gdprintk(XENLOG_ERR, 
  95.193 +                     "HVM restore: unknown entry typecode %u\n", 
  95.194 +                     desc->typecode);
  95.195 +            return -1;
  95.196 +        }
  95.197 +
  95.198 +        /* Load the entry */
  95.199 +        gdprintk(XENLOG_INFO, "HVM restore: %s %"PRIu16"\n",  
  95.200 +                 hvm_sr_handlers[desc->typecode].name, desc->instance);
  95.201 +        if ( handler(d, h) != 0 ) 
  95.202 +        {
  95.203 +            gdprintk(XENLOG_ERR, 
  95.204 +                     "HVM restore: failed to load entry %u/%u\n", 
  95.205 +                     desc->typecode, desc->instance);
  95.206 +            return -1;
  95.207 +        }
  95.208 +    }
  95.209 +
  95.210 +    /* Not reached */
  95.211 +}
  95.212 +
  95.213 +/*
  95.214 + * Local variables:
  95.215 + * mode: C
  95.216 + * c-set-style: "BSD"
  95.217 + * c-basic-offset: 4
  95.218 + * tab-width: 4
  95.219 + * indent-tabs-mode: nil
  95.220 + * End:
  95.221 + */
    96.1 --- a/xen/common/rangeset.c	Wed Oct 17 10:36:31 2007 -0600
    96.2 +++ b/xen/common/rangeset.c	Sun Oct 21 12:10:25 2007 -0600
    96.3 @@ -263,7 +263,7 @@ int rangeset_contains_singleton(
    96.4  int rangeset_is_empty(
    96.5      struct rangeset *r)
    96.6  {
    96.7 -    return list_empty(&r->range_list);
    96.8 +    return ((r == NULL) || list_empty(&r->range_list));
    96.9  }
   96.10  
   96.11  struct rangeset *rangeset_new(
    97.1 --- a/xen/drivers/video/vesa.c	Wed Oct 17 10:36:31 2007 -0600
    97.2 +++ b/xen/drivers/video/vesa.c	Sun Oct 21 12:10:25 2007 -0600
    97.3 @@ -46,7 +46,7 @@ void __init vesa_early_init(void)
    97.4  {
    97.5      unsigned int vram_vmode;
    97.6  
    97.7 -    /* XXX vga_compat = !(boot_video_info.capabilities & 2); */
    97.8 +    vga_compat = !(vga_console_info.u.vesa_lfb.gbl_caps & 2);
    97.9  
   97.10      if ( (vlfb_info.bits_per_pixel < 8) || (vlfb_info.bits_per_pixel > 32) )
   97.11          return;
    98.1 --- a/xen/include/Makefile	Wed Oct 17 10:36:31 2007 -0600
    98.2 +++ b/xen/include/Makefile	Sun Oct 21 12:10:25 2007 -0600
    98.3 @@ -61,7 +61,7 @@ compat/%.c: public/%.h xlat.lst Makefile
    98.4  compat/xlat.h: xlat.lst $(filter-out compat/xlat.h,$(headers-y)) $(BASEDIR)/tools/get-fields.sh Makefile
    98.5  	grep -v '^[	 ]*#' xlat.lst | \
    98.6  	while read what name hdr; do \
    98.7 -		/bin/bash $(BASEDIR)/tools/get-fields.sh "$$what" compat_$$name $$(echo compat/$$hdr | sed 's,@arch@,$(compat-arch-y),g') || exit $$?; \
    98.8 +		/bin/sh $(BASEDIR)/tools/get-fields.sh "$$what" compat_$$name $$(echo compat/$$hdr | sed 's,@arch@,$(compat-arch-y),g') || exit $$?; \
    98.9  	done >$@.new
   98.10  	mv -f $@.new $@
   98.11  
    99.1 --- a/xen/include/asm-ia64/domain.h	Wed Oct 17 10:36:31 2007 -0600
    99.2 +++ b/xen/include/asm-ia64/domain.h	Sun Oct 21 12:10:25 2007 -0600
    99.3 @@ -123,6 +123,7 @@ struct arch_domain {
    99.4              unsigned int is_vti : 1;
    99.5  #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
    99.6              unsigned int has_pervcpu_vhpt : 1;
    99.7 +            unsigned int vhpt_size_log2 : 6;
    99.8  #endif
    99.9          };
   99.10      };
   100.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   100.2 +++ b/xen/include/asm-ia64/trace.h	Sun Oct 21 12:10:25 2007 -0600
   100.3 @@ -0,0 +1,4 @@
   100.4 +#ifndef __ASM_TRACE_H__
   100.5 +#define __ASM_TRACE_H__
   100.6 +
   100.7 +#endif /* __ASM_TRACE_H__ */
   101.1 --- a/xen/include/asm-ia64/vhpt.h	Wed Oct 17 10:36:31 2007 -0600
   101.2 +++ b/xen/include/asm-ia64/vhpt.h	Sun Oct 21 12:10:25 2007 -0600
   101.3 @@ -84,5 +84,18 @@ vcpu_pta(struct vcpu* v)
   101.4          (VHPT_SIZE_LOG2 << 2) | VHPT_ENABLED;
   101.5  }
   101.6  
   101.7 +static inline int
   101.8 +canonicalize_vhpt_size(int sz)
   101.9 +{
  101.10 +    /* minimum 32KB */
  101.11 +    if (sz < 15)
  101.12 +        return 15;
  101.13 +    /* maximum 8MB (since purging TR is hard coded) */
  101.14 +    if (sz > IA64_GRANULE_SHIFT - 1)
  101.15 +        return IA64_GRANULE_SHIFT - 1;
  101.16 +    return sz;
  101.17 +}
  101.18 +
  101.19 +
  101.20  #endif /* !__ASSEMBLY */
  101.21  #endif
   102.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   102.2 +++ b/xen/include/asm-powerpc/trace.h	Sun Oct 21 12:10:25 2007 -0600
   102.3 @@ -0,0 +1,4 @@
   102.4 +#ifndef __ASM_TRACE_H__
   102.5 +#define __ASM_TRACE_H__
   102.6 +
   102.7 +#endif /* __ASM_TRACE_H__ */
   103.1 --- a/xen/include/asm-x86/cpufeature.h	Wed Oct 17 10:36:31 2007 -0600
   103.2 +++ b/xen/include/asm-x86/cpufeature.h	Sun Oct 21 12:10:25 2007 -0600
   103.3 @@ -31,7 +31,7 @@
   103.4  #define X86_FEATURE_PSE36	(0*32+17) /* 36-bit PSEs */
   103.5  #define X86_FEATURE_PN		(0*32+18) /* Processor serial number */
   103.6  #define X86_FEATURE_CLFLSH	(0*32+19) /* Supports the CLFLUSH instruction */
   103.7 -#define X86_FEATURE_DTES	(0*32+21) /* Debug Trace Store */
   103.8 +#define X86_FEATURE_DS		(0*32+21) /* Debug Store */
   103.9  #define X86_FEATURE_ACPI	(0*32+22) /* ACPI via MSR */
  103.10  #define X86_FEATURE_MMX		(0*32+23) /* Multimedia Extensions */
  103.11  #define X86_FEATURE_FXSR	(0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
  103.12 @@ -49,6 +49,8 @@
  103.13  #define X86_FEATURE_MP		(1*32+19) /* MP Capable. */
  103.14  #define X86_FEATURE_NX		(1*32+20) /* Execute Disable */
  103.15  #define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
  103.16 +#define X86_FEATURE_FFXSR       (1*32+25) /* FFXSR instruction optimizations */
  103.17 +#define X86_FEATURE_PAGE1GB	(1*32+26) /* 1Gb large page support */
  103.18  #define X86_FEATURE_RDTSCP	(1*32+27) /* RDTSCP */
  103.19  #define X86_FEATURE_LM		(1*32+29) /* Long Mode (x86-64) */
  103.20  #define X86_FEATURE_3DNOWEXT	(1*32+30) /* AMD 3DNow! extensions */
  103.21 @@ -80,21 +82,41 @@
  103.22  #define X86_FEATURE_SMXE	(4*32+ 6) /* Safer Mode Extensions */
  103.23  #define X86_FEATURE_EST		(4*32+ 7) /* Enhanced SpeedStep */
  103.24  #define X86_FEATURE_TM2		(4*32+ 8) /* Thermal Monitor 2 */
  103.25 +#define X86_FEATURE_SSSE3	(4*32+ 9) /* Supplemental Streaming SIMD Extensions-3 */
  103.26  #define X86_FEATURE_CID		(4*32+10) /* Context ID */
  103.27  #define X86_FEATURE_CX16        (4*32+13) /* CMPXCHG16B */
  103.28  #define X86_FEATURE_XTPR	(4*32+14) /* Send Task Priority Messages */
  103.29 +#define X86_FEATURE_PDCM	(4*32+15) /* Perf/Debug Capability MSR */
  103.30 +#define X86_FEATURE_DCA		(4*32+18) /* Direct Cache Access */
  103.31 +#define X86_FEATURE_SSE4_1	(4*32+19) /* Streaming SIMD Extensions 4.1 */
  103.32 +#define X86_FEATURE_SSE4_2	(4*32+20) /* Streaming SIMD Extensions 4.2 */
  103.33 +#define X86_FEATURE_POPCNT	(4*32+23) /* POPCNT instruction */
  103.34  
  103.35  /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
  103.36  #define X86_FEATURE_XSTORE	(5*32+ 2) /* on-CPU RNG present (xstore insn) */
  103.37  #define X86_FEATURE_XSTORE_EN	(5*32+ 3) /* on-CPU RNG enabled */
  103.38  #define X86_FEATURE_XCRYPT	(5*32+ 6) /* on-CPU crypto (xcrypt insn) */
  103.39  #define X86_FEATURE_XCRYPT_EN	(5*32+ 7) /* on-CPU crypto enabled */
  103.40 +#define X86_FEATURE_ACE2	(5*32+ 8) /* Advanced Cryptography Engine v2 */
  103.41 +#define X86_FEATURE_ACE2_EN	(5*32+ 9) /* ACE v2 enabled */
  103.42 +#define X86_FEATURE_PHE		(5*32+ 10) /* PadLock Hash Engine */
  103.43 +#define X86_FEATURE_PHE_EN	(5*32+ 11) /* PHE enabled */
  103.44 +#define X86_FEATURE_PMM		(5*32+ 12) /* PadLock Montgomery Multiplier */
  103.45 +#define X86_FEATURE_PMM_EN	(5*32+ 13) /* PMM enabled */
  103.46  
  103.47  /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
  103.48  #define X86_FEATURE_LAHF_LM	(6*32+ 0) /* LAHF/SAHF in long mode */
  103.49  #define X86_FEATURE_CMP_LEGACY	(6*32+ 1) /* If yes HyperThreading not valid */
  103.50  #define X86_FEATURE_SVME        (6*32+ 2) /* Secure Virtual Machine */
  103.51 -#define X86_FEATURE_FFXSR       (6*32+25) /* FFXSR instruction optimizations */
  103.52 +#define X86_FEATURE_EXTAPICSPACE (6*32+ 3) /* Extended APIC space */
  103.53 +#define X86_FEATURE_ALTMOVCR	(6*32+ 4) /* LOCK MOV CR accesses CR+8 */
  103.54 +#define X86_FEATURE_ABM		(6*32+ 5) /* Advanced Bit Manipulation */
  103.55 +#define X86_FEATURE_SSE4A	(6*32+ 6) /* AMD Streaming SIMD Extensions-4a */
  103.56 +#define X86_FEATURE_MISALIGNSSE	(6*32+ 7) /* Misaligned SSE Access */
  103.57 +#define X86_FEATURE_3DNOWPF	(6*32+ 8) /* 3DNow! Prefetch */
  103.58 +#define X86_FEATURE_OSVW	(6*32+ 9) /* OS Visible Workaround */
  103.59 +#define X86_FEATURE_SKINIT	(6*32+ 12) /* SKINIT, STGI/CLGI, DEV */
  103.60 +#define X86_FEATURE_WDT		(6*32+ 13) /* Watchdog Timer */
  103.61  
  103.62  #define cpu_has(c, bit)		test_bit(bit, (c)->x86_capability)
  103.63  #define boot_cpu_has(bit)	test_bit(bit, boot_cpu_data.x86_capability)
  103.64 @@ -122,6 +144,7 @@
  103.65  #define cpu_has_cyrix_arr	boot_cpu_has(X86_FEATURE_CYRIX_ARR)
  103.66  #define cpu_has_centaur_mcr	boot_cpu_has(X86_FEATURE_CENTAUR_MCR)
  103.67  #define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH)
  103.68 +#define cpu_has_page1gb		0
  103.69  #else /* __x86_64__ */
  103.70  #define cpu_has_vme		0
  103.71  #define cpu_has_de		1
  103.72 @@ -145,8 +168,12 @@
  103.73  #define cpu_has_cyrix_arr	0
  103.74  #define cpu_has_centaur_mcr	0
  103.75  #define cpu_has_clflush		boot_cpu_has(X86_FEATURE_CLFLSH)
  103.76 +#define cpu_has_page1gb		boot_cpu_has(X86_FEATURE_PAGE1GB)
  103.77  #endif
  103.78  
  103.79 +#define cpu_has_ffxsr           ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) \
  103.80 +                                 && boot_cpu_has(X86_FEATURE_FFXSR))
  103.81 +
  103.82  #endif /* __ASM_I386_CPUFEATURE_H */
  103.83  
  103.84  /* 
   104.1 --- a/xen/include/asm-x86/flushtlb.h	Wed Oct 17 10:36:31 2007 -0600
   104.2 +++ b/xen/include/asm-x86/flushtlb.h	Sun Oct 21 12:10:25 2007 -0600
   104.3 @@ -57,7 +57,7 @@ do {                                    
   104.4              cpu_clear(cpu, mask);                                       \
   104.5  } while ( 0 )
   104.6  
   104.7 -extern void new_tlbflush_clock_period(void);
   104.8 +void new_tlbflush_clock_period(void);
   104.9  
  104.10  /* Read pagetable base. */
  104.11  static inline unsigned long read_cr3(void)
  104.12 @@ -69,34 +69,54 @@ static inline unsigned long read_cr3(voi
  104.13  }
  104.14  
  104.15  /* Write pagetable base and implicitly tick the tlbflush clock. */
  104.16 -extern void write_cr3(unsigned long cr3);
  104.17 -
  104.18 -/* Flush guest mappings from the TLB and implicitly tick the tlbflush clock. */
  104.19 -extern void local_flush_tlb(void);
  104.20 +void write_cr3(unsigned long cr3);
  104.21  
  104.22 -#define local_flush_tlb_pge()                                     \
  104.23 -    do {                                                          \
  104.24 -        __pge_off();                                              \
  104.25 -        local_flush_tlb();                                        \
  104.26 -        __pge_on();                                               \
  104.27 -    } while ( 0 )
  104.28 +/* flush_* flag fields: */
  104.29 + /*
  104.30 +  * Area to flush:
  104.31 +  *  0 -> flush entire address space
  104.32 +  *  1 -> 4kB area containing specified virtual address
  104.33 +  *  2 -> 4MB/2MB area containing specified virtual address
  104.34 +  *  3 -> 1GB area containing specified virtual address (x86/64 only)
  104.35 +  * NB. Multi-page areas do not need to have been mapped with a superpage.
  104.36 +  */
  104.37 +#define FLUSH_LEVEL_MASK 0x0f
  104.38 +#define FLUSH_LEVEL(x)   (x)
  104.39 + /* Flush TLBs (or parts thereof) */
  104.40 +#define FLUSH_TLB        0x10
  104.41 + /* Flush TLBs (or parts thereof) including global mappings */
  104.42 +#define FLUSH_TLB_GLOBAL 0x20
  104.43 + /* Flush data caches */
  104.44 +#define FLUSH_CACHE      0x40
  104.45  
  104.46 -#define local_flush_tlb_one(__addr) \
  104.47 -    __asm__ __volatile__("invlpg %0": :"m" (*(char *) (__addr)))
  104.48 +/* Flush local TLBs/caches. */
  104.49 +void flush_area_local(const void *va, unsigned int flags);
  104.50 +#define flush_local(flags) flush_area_local(NULL, flags)
  104.51  
  104.52 -#define flush_tlb_all()     flush_tlb_mask(cpu_online_map)
  104.53 +/* Flush specified CPUs' TLBs/caches */
  104.54 +void flush_area_mask(cpumask_t, const void *va, unsigned int flags);
  104.55 +#define flush_mask(mask, flags) flush_area_mask(mask, NULL, flags)
  104.56 +
  104.57 +/* Flush all CPUs' TLBs/caches */
  104.58 +#define flush_area_all(va, flags) flush_area_mask(cpu_online_map, va, flags)
  104.59 +#define flush_all(flags) flush_mask(cpu_online_map, flags)
  104.60  
  104.61 -#ifndef CONFIG_SMP
  104.62 -#define flush_tlb_all_pge()        local_flush_tlb_pge()
  104.63 -#define flush_tlb_mask(mask)       local_flush_tlb()
  104.64 -#define flush_tlb_one_mask(mask,v) local_flush_tlb_one(_v)
  104.65 -#else
  104.66 -#include <xen/smp.h>
  104.67 -#define FLUSHVA_ALL (~0UL)
  104.68 -extern void flush_tlb_all_pge(void);
  104.69 -extern void __flush_tlb_mask(cpumask_t mask, unsigned long va);
  104.70 -#define flush_tlb_mask(mask)       __flush_tlb_mask(mask,FLUSHVA_ALL)
  104.71 -#define flush_tlb_one_mask(mask,v) __flush_tlb_mask(mask,(unsigned long)(v))
  104.72 -#endif
  104.73 +/* Flush local TLBs */
  104.74 +#define flush_tlb_local()                       \
  104.75 +    flush_local(FLUSH_TLB)
  104.76 +#define flush_tlb_one_local(v)                  \
  104.77 +    flush_area_local((const void *)(v), FLUSH_TLB|FLUSH_LEVEL(1))
  104.78 +
  104.79 +/* Flush specified CPUs' TLBs */
  104.80 +#define flush_tlb_mask(mask)                    \
  104.81 +    flush_mask(mask, FLUSH_TLB)
  104.82 +#define flush_tlb_one_mask(mask,v)              \
  104.83 +    flush_area_mask(mask, (const void *)(v), FLUSH_TLB|FLUSH_LEVEL(1))
  104.84 +
  104.85 +/* Flush all CPUs' TLBs */
  104.86 +#define flush_tlb_all()                         \
  104.87 +    flush_tlb_mask(cpu_online_map)
  104.88 +#define flush_tlb_one_all(v)                    \
  104.89 +    flush_tlb_one_mask(cpu_online_map, v)
  104.90  
  104.91  #endif /* __FLUSHTLB_H__ */
   105.1 --- a/xen/include/asm-x86/hvm/io.h	Wed Oct 17 10:36:31 2007 -0600
   105.2 +++ b/xen/include/asm-x86/hvm/io.h	Sun Oct 21 12:10:25 2007 -0600
   105.3 @@ -151,7 +151,8 @@ void send_invalidate_req(void);
   105.4  extern void handle_mmio(unsigned long gpa);
   105.5  extern void hvm_interrupt_post(struct vcpu *v, int vector, int type);
   105.6  extern void hvm_io_assist(void);
   105.7 -extern void hvm_dpci_eoi(unsigned int guest_irq, union vioapic_redir_entry *ent);
   105.8 +extern void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq,
   105.9 +                         union vioapic_redir_entry *ent);
  105.10  
  105.11  #endif /* __ASM_X86_HVM_IO_H__ */
  105.12  
   106.1 --- a/xen/include/asm-x86/hvm/irq.h	Wed Oct 17 10:36:31 2007 -0600
   106.2 +++ b/xen/include/asm-x86/hvm/irq.h	Sun Oct 21 12:10:25 2007 -0600
   106.3 @@ -33,6 +33,7 @@ struct hvm_irq_dpci_mapping {
   106.4      uint8_t valid;
   106.5      uint8_t device;
   106.6      uint8_t intx;
   106.7 +    struct domain *dom;
   106.8      union {
   106.9          uint8_t guest_gsi;
  106.10          uint8_t machine_gsi;
  106.11 @@ -45,6 +46,7 @@ struct hvm_irq_dpci {
  106.12      /* Guest IRQ to guest device/intx mapping. */
  106.13      struct hvm_irq_dpci_mapping girq[NR_IRQS];
  106.14      DECLARE_BITMAP(dirq_mask, NR_IRQS);
  106.15 +    struct timer hvm_timer[NR_IRQS];
  106.16  };
  106.17  
  106.18  struct hvm_irq {
   107.1 --- a/xen/include/asm-x86/hvm/support.h	Wed Oct 17 10:36:31 2007 -0600
   107.2 +++ b/xen/include/asm-x86/hvm/support.h	Sun Oct 21 12:10:25 2007 -0600
   107.3 @@ -22,6 +22,7 @@
   107.4  #define __ASM_X86_HVM_SUPPORT_H__
   107.5  
   107.6  #include <xen/sched.h>
   107.7 +#include <xen/hvm/save.h>
   107.8  #include <asm/types.h>
   107.9  #include <asm/regs.h>
  107.10  #include <asm/processor.h>
  107.11 @@ -76,144 +77,6 @@ extern unsigned int opt_hvm_debug_level;
  107.12  #define HVM_DBG_LOG(level, _f, _a...)
  107.13  #endif
  107.14  
  107.15 -/*
  107.16 - * Save/restore support 
  107.17 - */
  107.18 -
  107.19 -/* Marshalling and unmarshalling uses a buffer with size and cursor. */
  107.20 -typedef struct hvm_domain_context {
  107.21 -    uint32_t cur;
  107.22 -    uint32_t size;
  107.23 -    uint8_t *data;
  107.24 -} hvm_domain_context_t;
  107.25 -
  107.26 -/* Marshalling an entry: check space and fill in the header */
  107.27 -static inline int _hvm_init_entry(struct hvm_domain_context *h,
  107.28 -                                  uint16_t tc, uint16_t inst, uint32_t len)
  107.29 -{
  107.30 -    struct hvm_save_descriptor *d 
  107.31 -        = (struct hvm_save_descriptor *)&h->data[h->cur];
  107.32 -    if ( h->size - h->cur < len + sizeof (*d) )
  107.33 -    {
  107.34 -        gdprintk(XENLOG_WARNING,
  107.35 -                 "HVM save: no room for %"PRIu32" + %u bytes "
  107.36 -                 "for typecode %"PRIu16"\n",
  107.37 -                 len, (unsigned) sizeof (*d), tc);
  107.38 -        return -1;
  107.39 -    }
  107.40 -    d->typecode = tc;
  107.41 -    d->instance = inst;
  107.42 -    d->length = len;
  107.43 -    h->cur += sizeof (*d);
  107.44 -    return 0;
  107.45 -}
  107.46 -
  107.47 -/* Marshalling: copy the contents in a type-safe way */
  107.48 -#define _hvm_write_entry(_x, _h, _src) do {                     \
  107.49 -    *(HVM_SAVE_TYPE(_x) *)(&(_h)->data[(_h)->cur]) = *(_src);   \
  107.50 -    (_h)->cur += HVM_SAVE_LENGTH(_x);                           \
  107.51 -} while (0)
  107.52 -
  107.53 -/* Marshalling: init and copy; evaluates to zero on success */
  107.54 -#define hvm_save_entry(_x, _inst, _h, _src) ({          \
  107.55 -    int r;                                              \
  107.56 -    r = _hvm_init_entry((_h), HVM_SAVE_CODE(_x),        \
  107.57 -                        (_inst), HVM_SAVE_LENGTH(_x));  \
  107.58 -    if ( r == 0 )                                       \
  107.59 -        _hvm_write_entry(_x, (_h), (_src));             \
  107.60 -    r; })
  107.61 -
  107.62 -/* Unmarshalling: test an entry's size and typecode and record the instance */
  107.63 -static inline int _hvm_check_entry(struct hvm_domain_context *h, 
  107.64 -                                   uint16_t type, uint32_t len)
  107.65 -{
  107.66 -    struct hvm_save_descriptor *d 
  107.67 -        = (struct hvm_save_descriptor *)&h->data[h->cur];
  107.68 -    if ( len + sizeof (*d) > h->size - h->cur)
  107.69 -    {
  107.70 -        gdprintk(XENLOG_WARNING, 
  107.71 -                 "HVM restore: not enough data left to read %u bytes "
  107.72 -                 "for type %u\n", len, type);
  107.73 -        return -1;
  107.74 -    }    
  107.75 -    if ( type != d->typecode || len != d->length )
  107.76 -    {
  107.77 -        gdprintk(XENLOG_WARNING, 
  107.78 -                 "HVM restore mismatch: expected type %u length %u, "
  107.79 -                 "saw type %u length %u\n", type, len, d->typecode, d->length);
  107.80 -        return -1;
  107.81 -    }
  107.82 -    h->cur += sizeof (*d);
  107.83 -    return 0;
  107.84 -}
  107.85 -
  107.86 -/* Unmarshalling: copy the contents in a type-safe way */
  107.87 -#define _hvm_read_entry(_x, _h, _dst) do {                      \
  107.88 -    *(_dst) = *(HVM_SAVE_TYPE(_x) *) (&(_h)->data[(_h)->cur]);  \
  107.89 -    (_h)->cur += HVM_SAVE_LENGTH(_x);                           \
  107.90 -} while (0)
  107.91 -
  107.92 -/* Unmarshalling: check, then copy. Evaluates to zero on success. */
  107.93 -#define hvm_load_entry(_x, _h, _dst) ({                                 \
  107.94 -    int r;                                                              \
  107.95 -    r = _hvm_check_entry((_h), HVM_SAVE_CODE(_x), HVM_SAVE_LENGTH(_x)); \
  107.96 -    if ( r == 0 )                                                       \
  107.97 -        _hvm_read_entry(_x, (_h), (_dst));                              \
  107.98 -    r; })
  107.99 -
 107.100 -/* Unmarshalling: what is the instance ID of the next entry? */
 107.101 -static inline uint16_t hvm_load_instance(struct hvm_domain_context *h)
 107.102 -{
 107.103 -    struct hvm_save_descriptor *d 
 107.104 -        = (struct hvm_save_descriptor *)&h->data[h->cur];
 107.105 -    return d->instance;
 107.106 -}
 107.107 -
 107.108 -/* Handler types for different types of save-file entry. 
 107.109 - * The save handler may save multiple instances of a type into the buffer;
 107.110 - * the load handler will be called once for each instance found when
 107.111 - * restoring.  Both return non-zero on error. */
 107.112 -typedef int (*hvm_save_handler) (struct domain *d, 
 107.113 -                                 hvm_domain_context_t *h);
 107.114 -typedef int (*hvm_load_handler) (struct domain *d,
 107.115 -                                 hvm_domain_context_t *h);
 107.116 -
 107.117 -/* Init-time function to declare a pair of handlers for a type,
 107.118 - * and the maximum buffer space needed to save this type of state */
 107.119 -void hvm_register_savevm(uint16_t typecode,
 107.120 -                         const char *name, 
 107.121 -                         hvm_save_handler save_state,
 107.122 -                         hvm_load_handler load_state,
 107.123 -                         size_t size, int kind);
 107.124 -
 107.125 -/* The space needed for saving can be per-domain or per-vcpu: */
 107.126 -#define HVMSR_PER_DOM  0
 107.127 -#define HVMSR_PER_VCPU 1
 107.128 -
 107.129 -/* Syntactic sugar around that function: specify the max number of
 107.130 - * saves, and this calculates the size of buffer needed */
 107.131 -#define HVM_REGISTER_SAVE_RESTORE(_x, _save, _load, _num, _k)             \
 107.132 -static int __hvm_register_##_x##_save_and_restore(void)                   \
 107.133 -{                                                                         \
 107.134 -    hvm_register_savevm(HVM_SAVE_CODE(_x),                                \
 107.135 -                        #_x,                                              \
 107.136 -                        &_save,                                           \
 107.137 -                        &_load,                                           \
 107.138 -                        (_num) * (HVM_SAVE_LENGTH(_x)                     \
 107.139 -                                  + sizeof (struct hvm_save_descriptor)), \
 107.140 -                        _k);                                              \
 107.141 -    return 0;                                                             \
 107.142 -}                                                                         \
 107.143 -__initcall(__hvm_register_##_x##_save_and_restore);
 107.144 -
 107.145 -
 107.146 -/* Entry points for saving and restoring HVM domain state */
 107.147 -size_t hvm_save_size(struct domain *d);
 107.148 -int hvm_save(struct domain *d, hvm_domain_context_t *h);
 107.149 -int hvm_load(struct domain *d, hvm_domain_context_t *h);
 107.150 -
 107.151 -/* End of save/restore */
 107.152 -
 107.153  extern char hvm_io_bitmap[];
 107.154  
 107.155  void hvm_enable(struct hvm_function_table *);
   108.1 --- a/xen/include/asm-x86/hvm/vmx/intel-iommu.h	Wed Oct 17 10:36:31 2007 -0600
   108.2 +++ b/xen/include/asm-x86/hvm/vmx/intel-iommu.h	Sun Oct 21 12:10:25 2007 -0600
   108.3 @@ -145,6 +145,10 @@
   108.4  #define DMA_GSTS_QIES   (((u64)1) <<26)
   108.5  #define DMA_GSTS_IRES   (((u64)1) <<25)
   108.6  
   108.7 +/* PMEN_REG */
   108.8 +#define DMA_PMEN_EPM   (((u32)1) << 31)
   108.9 +#define DMA_PMEN_PRS   (((u32)1) << 1)
  108.10 +
  108.11  /* CCMD_REG */
  108.12  #define DMA_CCMD_INVL_GRANU_OFFSET  61
  108.13  #define DMA_CCMD_ICC (((u64)1) << 63)
   109.1 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h	Wed Oct 17 10:36:31 2007 -0600
   109.2 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h	Sun Oct 21 12:10:25 2007 -0600
   109.3 @@ -35,6 +35,12 @@ struct vmcs_struct {
   109.4      unsigned char data [0]; /* vmcs size is read from MSR */
   109.5  };
   109.6  
   109.7 +struct vmx_msr_entry {
   109.8 +    u32 index;
   109.9 +    u32 mbz;
  109.10 +    u64 data;
  109.11 +};
  109.12 +
  109.13  enum {
  109.14      VMX_INDEX_MSR_LSTAR = 0,
  109.15      VMX_INDEX_MSR_STAR,
  109.16 @@ -73,6 +79,12 @@ struct arch_vmx_struct {
  109.17      unsigned long        cstar;
  109.18  #endif
  109.19  
  109.20 +    char                *msr_bitmap;
  109.21 +    unsigned int         msr_count;
  109.22 +    struct vmx_msr_entry *msr_area;
  109.23 +    unsigned int         host_msr_count;
  109.24 +    struct vmx_msr_entry *host_msr_area;
  109.25 +
  109.26      /* Following fields are all specific to vmxassist. */
  109.27      unsigned long        vmxassist_enabled:1;
  109.28      unsigned long        irqbase_mode:1;
  109.29 @@ -131,7 +143,6 @@ extern bool_t cpu_has_vmx_ins_outs_instr
  109.30      (vmx_pin_based_exec_control & PIN_BASED_VIRTUAL_NMIS)
  109.31  #define cpu_has_vmx_msr_bitmap \
  109.32      (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
  109.33 -extern char *vmx_msr_bitmap;
  109.34  
  109.35  /* GUEST_INTERRUPTIBILITY_INFO flags. */
  109.36  #define VMX_INTR_SHADOW_STI             0x00000001
  109.37 @@ -268,6 +279,12 @@ enum vmcs_field {
  109.38      HOST_RIP                        = 0x00006c16,
  109.39  };
  109.40  
  109.41 +void vmx_disable_intercept_for_msr(struct vcpu *v, u32 msr);
  109.42 +int vmx_read_guest_msr(struct vcpu *v, u32 msr, u64 *val);
  109.43 +int vmx_write_guest_msr(struct vcpu *v, u32 msr, u64 val);
  109.44 +int vmx_add_guest_msr(struct vcpu *v, u32 msr);
  109.45 +int vmx_add_host_load_msr(struct vcpu *v, u32 msr);
  109.46 +
  109.47  #endif /* ASM_X86_HVM_VMX_VMCS_H__ */
  109.48  
  109.49  /*
   110.1 --- a/xen/include/asm-x86/iommu.h	Wed Oct 17 10:36:31 2007 -0600
   110.2 +++ b/xen/include/asm-x86/iommu.h	Sun Oct 21 12:10:25 2007 -0600
   110.3 @@ -69,6 +69,7 @@ struct iommu {
   110.4  int iommu_setup(void);
   110.5  int iommu_domain_init(struct domain *d);
   110.6  void iommu_domain_destroy(struct domain *d);
   110.7 +int device_assigned(u8 bus, u8 devfn);
   110.8  int assign_device(struct domain *d, u8 bus, u8 devfn);
   110.9  int iommu_map_page(struct domain *d, dma_addr_t gfn, dma_addr_t mfn);
  110.10  int iommu_unmap_page(struct domain *d, dma_addr_t gfn);
  110.11 @@ -78,7 +79,8 @@ void iommu_domain_teardown(struct domain
  110.12  int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq);
  110.13  int dpci_ioport_intercept(ioreq_t *p);
  110.14  int pt_irq_create_bind_vtd(struct domain *d,
  110.15 -    xen_domctl_bind_pt_irq_t * pt_irq_bind);
  110.16 +                           xen_domctl_bind_pt_irq_t *pt_irq_bind);
  110.17  
  110.18 +#define PT_IRQ_TIME_OUT MILLISECS(8)
  110.19  
  110.20 -#endif // _IOMMU_H_
  110.21 +#endif /* _IOMMU_H_ */
   111.1 --- a/xen/include/asm-x86/mach-default/smpboot_hooks.h	Wed Oct 17 10:36:31 2007 -0600
   111.2 +++ b/xen/include/asm-x86/mach-default/smpboot_hooks.h	Sun Oct 21 12:10:25 2007 -0600
   111.3 @@ -9,7 +9,7 @@ static inline void smpboot_clear_io_apic
   111.4  static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
   111.5  {
   111.6  	CMOS_WRITE(0xa, 0xf);
   111.7 -	local_flush_tlb();
   111.8 +	flush_tlb_local();
   111.9  	Dprintk("1.\n");
  111.10  	*((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4;
  111.11  	Dprintk("2.\n");
  111.12 @@ -22,7 +22,7 @@ static inline void smpboot_restore_warm_
  111.13  	/*
  111.14  	 * Install writable page 0 entry to set BIOS data area.
  111.15  	 */
  111.16 -	local_flush_tlb();
  111.17 +	flush_tlb_local();
  111.18  
  111.19  	/*
  111.20  	 * Paranoid:  Set warm reset code and vector here back
   112.1 --- a/xen/include/asm-x86/mm.h	Wed Oct 17 10:36:31 2007 -0600
   112.2 +++ b/xen/include/asm-x86/mm.h	Sun Oct 21 12:10:25 2007 -0600
   112.3 @@ -144,7 +144,6 @@ extern unsigned long max_page;
   112.4  extern unsigned long total_pages;
   112.5  void init_frametable(void);
   112.6  
   112.7 -int alloc_page_type(struct page_info *page, unsigned long type);
   112.8  void free_page_type(struct page_info *page, unsigned long type);
   112.9  int _shadow_mode_refcounts(struct domain *d);
  112.10  
   113.1 --- a/xen/include/asm-x86/msr-index.h	Wed Oct 17 10:36:31 2007 -0600
   113.2 +++ b/xen/include/asm-x86/msr-index.h	Sun Oct 21 12:10:25 2007 -0600
   113.3 @@ -18,13 +18,17 @@
   113.4  #define _EFER_LME		8  /* Long mode enable */
   113.5  #define _EFER_LMA		10 /* Long mode active (read-only) */
   113.6  #define _EFER_NX		11 /* No execute enable */
   113.7 -#define _EFER_SVME		12
   113.8 +#define _EFER_SVME		12 /* AMD: SVM enable */
   113.9 +#define _EFER_LMSLE		13 /* AMD: Long-mode segment limit enable */
  113.10 +#define _EFER_FFXSE		14 /* AMD: Fast FXSAVE/FXRSTOR enable */
  113.11  
  113.12  #define EFER_SCE		(1<<_EFER_SCE)
  113.13  #define EFER_LME		(1<<_EFER_LME)
  113.14  #define EFER_LMA		(1<<_EFER_LMA)
  113.15  #define EFER_NX			(1<<_EFER_NX)
  113.16  #define EFER_SVME		(1<<_EFER_SVME)
  113.17 +#define EFER_LMSLE		(1<<_EFER_LMSLE)
  113.18 +#define EFER_FFXSE		(1<<_EFER_FFXSE)
  113.19  
  113.20  /* Intel MSRs. Some also available on other CPUs */
  113.21  #define MSR_IA32_PERFCTR0		0x000000c1
  113.22 @@ -319,6 +323,27 @@
  113.23  #define MSR_P4_U2L_ESCR0		0x000003b0
  113.24  #define MSR_P4_U2L_ESCR1		0x000003b1
  113.25  
  113.26 +/* Netburst (P4) last-branch recording */
  113.27 +#define MSR_P4_LER_FROM_LIP 		0x000001d7
  113.28 +#define MSR_P4_LER_TO_LIP 		0x000001d8
  113.29 +#define MSR_P4_LASTBRANCH_TOS		0x000001da
  113.30 +#define MSR_P4_LASTBRANCH_0		0x000001db
  113.31 +#define NUM_MSR_P4_LASTBRANCH		4
  113.32 +#define MSR_P4_LASTBRANCH_0_FROM_LIP	0x00000680
  113.33 +#define MSR_P4_LASTBRANCH_0_TO_LIP	0x000006c0
  113.34 +#define NUM_MSR_P4_LASTBRANCH_FROM_TO	16
  113.35 +
  113.36 +/* Pentium M (and Core) last-branch recording */
  113.37 +#define MSR_PM_LASTBRANCH_TOS		0x000001c9
  113.38 +#define MSR_PM_LASTBRANCH_0		0x00000040
  113.39 +#define NUM_MSR_PM_LASTBRANCH		8
  113.40 +
  113.41 +/* Core 2 last-branch recording */
  113.42 +#define MSR_C2_LASTBRANCH_TOS		0x000001c9
  113.43 +#define MSR_C2_LASTBRANCH_0_FROM_IP	0x00000040
  113.44 +#define MSR_C2_LASTBRANCH_0_TO_IP	0x00000060
  113.45 +#define NUM_MSR_C2_LASTBRANCH_FROM_TO	4
  113.46 +
  113.47  /* Intel Core-based CPU performance counters */
  113.48  #define MSR_CORE_PERF_FIXED_CTR0	0x00000309
  113.49  #define MSR_CORE_PERF_FIXED_CTR1	0x0000030a
   114.1 --- a/xen/include/asm-x86/msr.h	Wed Oct 17 10:36:31 2007 -0600
   114.2 +++ b/xen/include/asm-x86/msr.h	Sun Oct 21 12:10:25 2007 -0600
   114.3 @@ -105,6 +105,19 @@ static inline void write_efer(__u64 val)
   114.4      wrmsrl(MSR_EFER, val);
   114.5  }
   114.6  
   114.7 +DECLARE_PER_CPU(u32, ler_msr);
   114.8 +
   114.9 +static inline void ler_enable(void)
  114.10 +{
  114.11 +    u64 debugctl;
  114.12 +    
  114.13 +    if ( !this_cpu(ler_msr) )
  114.14 +        return;
  114.15 +
  114.16 +    rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  114.17 +    wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl | 1);
  114.18 +}
  114.19 +
  114.20  #endif /* !__ASSEMBLY__ */
  114.21  
  114.22  #endif /* __ASM_MSR_H */
   115.1 --- a/xen/include/asm-x86/page.h	Wed Oct 17 10:36:31 2007 -0600
   115.2 +++ b/xen/include/asm-x86/page.h	Sun Oct 21 12:10:25 2007 -0600
   115.3 @@ -355,13 +355,12 @@ void free_xen_pagetable(void *v);
   115.4  l2_pgentry_t *virt_to_xen_l2e(unsigned long v);
   115.5  
   115.6  /* Map machine page range in Xen virtual address space. */
   115.7 -#define MAP_SMALL_PAGES (1UL<<16) /* don't use superpages for the mapping */
   115.8 -int
   115.9 -map_pages_to_xen(
  115.10 +#define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages for the mapping */
  115.11 +int map_pages_to_xen(
  115.12      unsigned long virt,
  115.13      unsigned long mfn,
  115.14      unsigned long nr_mfns,
  115.15 -    unsigned long flags);
  115.16 +    unsigned int flags);
  115.17  void destroy_xen_mappings(unsigned long v, unsigned long e);
  115.18  
  115.19  #endif /* !__ASSEMBLY__ */
   116.1 --- a/xen/include/asm-x86/processor.h	Wed Oct 17 10:36:31 2007 -0600
   116.2 +++ b/xen/include/asm-x86/processor.h	Sun Oct 21 12:10:25 2007 -0600
   116.3 @@ -160,28 +160,21 @@ struct vcpu;
   116.4  #endif
   116.5  
   116.6  struct cpuinfo_x86 {
   116.7 -    __u8 x86;		/* CPU family */
   116.8 -    __u8 x86_vendor;	/* CPU vendor */
   116.9 +    __u8 x86;            /* CPU family */
  116.10 +    __u8 x86_vendor;     /* CPU vendor */
  116.11      __u8 x86_model;
  116.12      __u8 x86_mask;
  116.13 -    char wp_works_ok;	/* It doesn't on 386's */
  116.14 -    char hlt_works_ok;	/* Problems on some 486Dx4's and old 386's */
  116.15 -    char hard_math;
  116.16 -    char rfu;
  116.17 -    int  cpuid_level;	/* Maximum supported CPUID level, -1=no CPUID */
  116.18 +    int  cpuid_level;    /* Maximum supported CPUID level, -1=no CPUID */
  116.19      unsigned int x86_capability[NCAPINTS];
  116.20      char x86_vendor_id[16];
  116.21      char x86_model_id[64];
  116.22 -    int  x86_cache_size;  /* in KB - valid for CPUS which support this call  */
  116.23 -    int  x86_cache_alignment;	/* In bytes */
  116.24 -    char fdiv_bug;
  116.25 -    char f00f_bug;
  116.26 -    char coma_bug;
  116.27 -    char pad0;
  116.28 +    int  x86_cache_size; /* in KB - valid for CPUS which support this call  */
  116.29 +    int  x86_cache_alignment;    /* In bytes */
  116.30      int  x86_power;
  116.31      unsigned char x86_max_cores; /* cpuid returned max cores value */
  116.32 -    unsigned char booted_cores; /* number of cores as seen by OS */
  116.33 +    unsigned char booted_cores;  /* number of cores as seen by OS */
  116.34      unsigned char apicid;
  116.35 +    unsigned short x86_clflush_size;
  116.36  } __cacheline_aligned;
  116.37  
  116.38  /*
   117.1 --- a/xen/include/asm-x86/system.h	Wed Oct 17 10:36:31 2007 -0600
   117.2 +++ b/xen/include/asm-x86/system.h	Sun Oct 21 12:10:25 2007 -0600
   117.3 @@ -314,4 +314,8 @@ static inline int local_irq_is_enabled(v
   117.4  #define BROKEN_ACPI_Sx		0x0001
   117.5  #define BROKEN_INIT_AFTER_S1	0x0002
   117.6  
   117.7 +void trap_init(void);
   117.8 +void percpu_traps_init(void);
   117.9 +void subarch_percpu_traps_init(void);
  117.10 +
  117.11  #endif
   118.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   118.2 +++ b/xen/include/asm-x86/trace.h	Sun Oct 21 12:10:25 2007 -0600
   118.3 @@ -0,0 +1,46 @@
   118.4 +#ifndef __ASM_TRACE_H__
   118.5 +#define __ASM_TRACE_H__
   118.6 +
   118.7 +#include <asm/page.h>
   118.8 +
   118.9 +void __trace_pv_trap(int trapnr, unsigned long eip,
  118.10 +                     int use_error_code, unsigned error_code);
  118.11 +static inline void trace_pv_trap(int trapnr, unsigned long eip,
  118.12 +                                 int use_error_code, unsigned error_code)
  118.13 +{
  118.14 +    if ( tb_init_done )
  118.15 +        __trace_pv_trap(trapnr, eip, use_error_code, error_code);
  118.16 +}
  118.17 +
  118.18 +void __trace_pv_page_fault(unsigned long addr, unsigned error_code);
  118.19 +static inline void trace_pv_page_fault(unsigned long addr,
  118.20 +                                       unsigned error_code)
  118.21 +{
  118.22 +    if ( tb_init_done )
  118.23 +        __trace_pv_page_fault(addr, error_code);
  118.24 +}
  118.25 +
  118.26 +void __trace_trap_one_addr(unsigned event, unsigned long va);
  118.27 +static inline void trace_trap_one_addr(unsigned event, unsigned long va)
  118.28 +{
  118.29 +    if ( tb_init_done )
  118.30 +        __trace_trap_one_addr(event, va);
  118.31 +}
  118.32 +
  118.33 +void __trace_trap_two_addr(unsigned event, unsigned long va1,
  118.34 +                           unsigned long va2);
  118.35 +static inline void trace_trap_two_addr(unsigned event, unsigned long va1,
  118.36 +                                       unsigned long va2)
  118.37 +{
  118.38 +    if ( tb_init_done )
  118.39 +        __trace_trap_two_addr(event, va1, va2);
  118.40 +}
  118.41 +
  118.42 +void __trace_ptwr_emulation(unsigned long addr, l1_pgentry_t npte);
  118.43 +static inline void trace_ptwr_emulation(unsigned long addr, l1_pgentry_t npte)
  118.44 +{
  118.45 +    if ( tb_init_done )
  118.46 +        __trace_ptwr_emulation(addr, npte);
  118.47 +}
  118.48 +
  118.49 +#endif /* __ASM_TRACE_H__ */
   119.1 --- a/xen/include/asm-x86/x86_32/page-3level.h	Wed Oct 17 10:36:31 2007 -0600
   119.2 +++ b/xen/include/asm-x86/x86_32/page-3level.h	Sun Oct 21 12:10:25 2007 -0600
   119.3 @@ -85,6 +85,6 @@ typedef l3_pgentry_t root_pgentry_t;
   119.4  #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
   119.5  #define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF))
   119.6  
   119.7 -#define L3_DISALLOW_MASK 0xFFFFF1E6U /* must-be-zero */
   119.8 +#define L3_DISALLOW_MASK 0xFFFFF1FEU /* must-be-zero */
   119.9  
  119.10  #endif /* __X86_32_PAGE_3LEVEL_H__ */
   120.1 --- a/xen/include/asm-x86/x86_32/page.h	Wed Oct 17 10:36:31 2007 -0600
   120.2 +++ b/xen/include/asm-x86/x86_32/page.h	Sun Oct 21 12:10:25 2007 -0600
   120.3 @@ -33,10 +33,10 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
   120.4      (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB)
   120.5  
   120.6  /*
   120.7 - * Disallow unused flag bits plus PAT, PSE and GLOBAL.
   120.8 + * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
   120.9   * Permit the NX bit if the hardware supports it.
  120.10   */
  120.11 -#define BASE_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX)
  120.12 +#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX)
  120.13  
  120.14  #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
  120.15  #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
   121.1 --- a/xen/include/asm-x86/x86_64/page.h	Wed Oct 17 10:36:31 2007 -0600
   121.2 +++ b/xen/include/asm-x86/x86_64/page.h	Sun Oct 21 12:10:25 2007 -0600
   121.3 @@ -105,18 +105,18 @@ typedef l4_pgentry_t root_pgentry_t;
   121.4  #define _PAGE_NX     (cpu_has_nx ? _PAGE_NX_BIT : 0U)
   121.5  
   121.6  /*
   121.7 - * Disallow unused flag bits plus PAT, PSE and GLOBAL.
   121.8 + * Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
   121.9   * Permit the NX bit if the hardware supports it.
  121.10   * Note that range [62:52] is available for software use on x86/64.
  121.11   */
  121.12 -#define BASE_DISALLOW_MASK (0xFF800180U & ~_PAGE_NX)
  121.13 +#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX)
  121.14  
  121.15  #define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
  121.16  #define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
  121.17  #define L3_DISALLOW_MASK (BASE_DISALLOW_MASK)
  121.18  #define L4_DISALLOW_MASK (BASE_DISALLOW_MASK)
  121.19  
  121.20 -#define COMPAT_L3_DISALLOW_MASK 0xFFFFF1E6U
  121.21 +#define COMPAT_L3_DISALLOW_MASK 0xFFFFF1FEU
  121.22  
  121.23  #define PAGE_HYPERVISOR         (__PAGE_HYPERVISOR         | _PAGE_GLOBAL)
  121.24  #define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL)
   122.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   122.2 +++ b/xen/include/public/arch-x86/hvm/save.h	Sun Oct 21 12:10:25 2007 -0600
   122.3 @@ -0,0 +1,413 @@
   122.4 +/* 
   122.5 + * Structure definitions for HVM state that is held by Xen and must
   122.6 + * be saved along with the domain's memory and device-model state.
   122.7 + * 
   122.8 + * Copyright (c) 2007 XenSource Ltd.
   122.9 + *
  122.10 + * Permission is hereby granted, free of charge, to any person obtaining a copy
  122.11 + * of this software and associated documentation files (the "Software"), to
  122.12 + * deal in the Software without restriction, including without limitation the
  122.13 + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  122.14 + * sell copies of the Software, and to permit persons to whom the Software is
  122.15 + * furnished to do so, subject to the following conditions:
  122.16 + *
  122.17 + * The above copyright notice and this permission notice shall be included in
  122.18 + * all copies or substantial portions of the Software.
  122.19 + *
  122.20 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  122.21 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  122.22 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  122.23 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  122.24 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  122.25 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  122.26 + * DEALINGS IN THE SOFTWARE.
  122.27 + */
  122.28 +
  122.29 +#ifndef __XEN_PUBLIC_HVM_SAVE_X86_H__
  122.30 +#define __XEN_PUBLIC_HVM_SAVE_X86_H__
  122.31 +
  122.32 +/* 
  122.33 + * Save/restore header: general info about the save file. 
  122.34 + */
  122.35 +
  122.36 +#define HVM_FILE_MAGIC   0x54381286
  122.37 +#define HVM_FILE_VERSION 0x00000001
  122.38 +
  122.39 +struct hvm_save_header {
  122.40 +    uint32_t magic;             /* Must be HVM_FILE_MAGIC */
  122.41 +    uint32_t version;           /* File format version */
  122.42 +    uint64_t changeset;         /* Version of Xen that saved this file */
  122.43 +    uint32_t cpuid;             /* CPUID[0x01][%eax] on the saving machine */
  122.44 +    uint32_t pad0;
  122.45 +};
  122.46 +
  122.47 +DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header);
  122.48 +
  122.49 +
  122.50 +/*
  122.51 + * Processor
  122.52 + */
  122.53 +
  122.54 +struct hvm_hw_cpu {
  122.55 +    uint8_t  fpu_regs[512];
  122.56 +
  122.57 +    uint64_t rax;
  122.58 +    uint64_t rbx;
  122.59 +    uint64_t rcx;
  122.60 +    uint64_t rdx;
  122.61 +    uint64_t rbp;
  122.62 +    uint64_t rsi;
  122.63 +    uint64_t rdi;
  122.64 +    uint64_t rsp;
  122.65 +    uint64_t r8;
  122.66 +    uint64_t r9;
  122.67 +    uint64_t r10;
  122.68 +    uint64_t r11;
  122.69 +    uint64_t r12;
  122.70 +    uint64_t r13;
  122.71 +    uint64_t r14;
  122.72 +    uint64_t r15;
  122.73 +
  122.74 +    uint64_t rip;
  122.75 +    uint64_t rflags;
  122.76 +
  122.77 +    uint64_t cr0;
  122.78 +    uint64_t cr2;
  122.79 +    uint64_t cr3;
  122.80 +    uint64_t cr4;
  122.81 +
  122.82 +    uint64_t dr0;
  122.83 +    uint64_t dr1;
  122.84 +    uint64_t dr2;
  122.85 +    uint64_t dr3;
  122.86 +    uint64_t dr6;
  122.87 +    uint64_t dr7;    
  122.88 +
  122.89 +    uint32_t cs_sel;
  122.90 +    uint32_t ds_sel;
  122.91 +    uint32_t es_sel;
  122.92 +    uint32_t fs_sel;
  122.93 +    uint32_t gs_sel;
  122.94 +    uint32_t ss_sel;
  122.95 +    uint32_t tr_sel;
  122.96 +    uint32_t ldtr_sel;
  122.97 +
  122.98 +    uint32_t cs_limit;
  122.99 +    uint32_t ds_limit;
 122.100 +    uint32_t es_limit;
 122.101 +    uint32_t fs_limit;
 122.102 +    uint32_t gs_limit;
 122.103 +    uint32_t ss_limit;
 122.104 +    uint32_t tr_limit;
 122.105 +    uint32_t ldtr_limit;
 122.106 +    uint32_t idtr_limit;
 122.107 +    uint32_t gdtr_limit;
 122.108 +
 122.109 +    uint64_t cs_base;
 122.110 +    uint64_t ds_base;
 122.111 +    uint64_t es_base;
 122.112 +    uint64_t fs_base;
 122.113 +    uint64_t gs_base;
 122.114 +    uint64_t ss_base;
 122.115 +    uint64_t tr_base;
 122.116 +    uint64_t ldtr_base;
 122.117 +    uint64_t idtr_base;
 122.118 +    uint64_t gdtr_base;
 122.119 +
 122.120 +    uint32_t cs_arbytes;
 122.121 +    uint32_t ds_arbytes;
 122.122 +    uint32_t es_arbytes;
 122.123 +    uint32_t fs_arbytes;
 122.124 +    uint32_t gs_arbytes;
 122.125 +    uint32_t ss_arbytes;
 122.126 +    uint32_t tr_arbytes;
 122.127 +    uint32_t ldtr_arbytes;
 122.128 +
 122.129 +    uint32_t sysenter_cs;
 122.130 +    uint32_t padding0;
 122.131 +
 122.132 +    uint64_t sysenter_esp;
 122.133 +    uint64_t sysenter_eip;
 122.134 +
 122.135 +    /* msr for em64t */
 122.136 +    uint64_t shadow_gs;
 122.137 +
 122.138 +    /* msr content saved/restored. */
 122.139 +    uint64_t msr_flags;
 122.140 +    uint64_t msr_lstar;
 122.141 +    uint64_t msr_star;
 122.142 +    uint64_t msr_cstar;
 122.143 +    uint64_t msr_syscall_mask;
 122.144 +    uint64_t msr_efer;
 122.145 +
 122.146 +    /* guest's idea of what rdtsc() would return */
 122.147 +    uint64_t tsc;
 122.148 +
 122.149 +    /* pending event, if any */
 122.150 +    union {
 122.151 +        uint32_t pending_event;
 122.152 +        struct {
 122.153 +            uint8_t  pending_vector:8;
 122.154 +            uint8_t  pending_type:3;
 122.155 +            uint8_t  pending_error_valid:1;
 122.156 +            uint32_t pending_reserved:19;
 122.157 +            uint8_t  pending_valid:1;
 122.158 +        };
 122.159 +    };
 122.160 +    /* error code for pending event */
 122.161 +    uint32_t error_code;
 122.162 +};
 122.163 +
 122.164 +DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu);
 122.165 +
 122.166 +
 122.167 +/*
 122.168 + * PIC
 122.169 + */
 122.170 +
 122.171 +struct hvm_hw_vpic {
 122.172 +    /* IR line bitmasks. */
 122.173 +    uint8_t irr;
 122.174 +    uint8_t imr;
 122.175 +    uint8_t isr;
 122.176 +
 122.177 +    /* Line IRx maps to IRQ irq_base+x */
 122.178 +    uint8_t irq_base;
 122.179 +
 122.180 +    /*
 122.181 +     * Where are we in ICW2-4 initialisation (0 means no init in progress)?
 122.182 +     * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1).
 122.183 +     * Bit 2: ICW1.IC4  (1 == ICW4 included in init sequence)
 122.184 +     * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence)
 122.185 +     */
 122.186 +    uint8_t init_state:4;
 122.187 +
 122.188 +    /* IR line with highest priority. */
 122.189 +    uint8_t priority_add:4;
 122.190 +
 122.191 +    /* Reads from A=0 obtain ISR or IRR? */
 122.192 +    uint8_t readsel_isr:1;
 122.193 +
 122.194 +    /* Reads perform a polling read? */
 122.195 +    uint8_t poll:1;
 122.196 +
 122.197 +    /* Automatically clear IRQs from the ISR during INTA? */
 122.198 +    uint8_t auto_eoi:1;
 122.199 +
 122.200 +    /* Automatically rotate IRQ priorities during AEOI? */
 122.201 +    uint8_t rotate_on_auto_eoi:1;
 122.202 +
 122.203 +    /* Exclude slave inputs when considering in-service IRQs? */
 122.204 +    uint8_t special_fully_nested_mode:1;
 122.205 +
 122.206 +    /* Special mask mode excludes masked IRs from AEOI and priority checks. */
 122.207 +    uint8_t special_mask_mode:1;
 122.208 +
 122.209 +    /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */
 122.210 +    uint8_t is_master:1;
 122.211 +
 122.212 +    /* Edge/trigger selection. */
 122.213 +    uint8_t elcr;
 122.214 +
 122.215 +    /* Virtual INT output. */
 122.216 +    uint8_t int_output;
 122.217 +};
 122.218 +
 122.219 +DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic);
 122.220 +
 122.221 +
 122.222 +/*
 122.223 + * IO-APIC
 122.224 + */
 122.225 +
 122.226 +#ifdef __ia64__
 122.227 +#define VIOAPIC_IS_IOSAPIC 1
 122.228 +#define VIOAPIC_NUM_PINS  24
 122.229 +#else
 122.230 +#define VIOAPIC_NUM_PINS  48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */
 122.231 +#endif
 122.232 +
 122.233 +struct hvm_hw_vioapic {
 122.234 +    uint64_t base_address;
 122.235 +    uint32_t ioregsel;
 122.236 +    uint32_t id;
 122.237 +    union vioapic_redir_entry
 122.238 +    {
 122.239 +        uint64_t bits;
 122.240 +        struct {
 122.241 +            uint8_t vector;
 122.242 +            uint8_t delivery_mode:3;
 122.243 +            uint8_t dest_mode:1;
 122.244 +            uint8_t delivery_status:1;
 122.245 +            uint8_t polarity:1;
 122.246 +            uint8_t remote_irr:1;
 122.247 +            uint8_t trig_mode:1;
 122.248 +            uint8_t mask:1;
 122.249 +            uint8_t reserve:7;
 122.250 +#if !VIOAPIC_IS_IOSAPIC
 122.251 +            uint8_t reserved[4];
 122.252 +            uint8_t dest_id;
 122.253 +#else
 122.254 +            uint8_t reserved[3];
 122.255 +            uint16_t dest_id;
 122.256 +#endif
 122.257 +        } fields;
 122.258 +    } redirtbl[VIOAPIC_NUM_PINS];
 122.259 +};
 122.260 +
 122.261 +DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic);
 122.262 +
 122.263 +
 122.264 +/*
 122.265 + * LAPIC
 122.266 + */
 122.267 +
 122.268 +struct hvm_hw_lapic {
 122.269 +    uint64_t             apic_base_msr;
 122.270 +    uint32_t             disabled; /* VLAPIC_xx_DISABLED */
 122.271 +    uint32_t             timer_divisor;
 122.272 +};
 122.273 +
 122.274 +DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic);
 122.275 +
 122.276 +struct hvm_hw_lapic_regs {
 122.277 +    /* A 4k page of register state */
 122.278 +    uint8_t  data[0x400];
 122.279 +};
 122.280 +
 122.281 +DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs);
 122.282 +
 122.283 +
 122.284 +/*
 122.285 + * IRQs
 122.286 + */
 122.287 +
 122.288 +struct hvm_hw_pci_irqs {
 122.289 +    /*
 122.290 +     * Virtual interrupt wires for a single PCI bus.
 122.291 +     * Indexed by: device*4 + INTx#.
 122.292 +     */
 122.293 +    union {
 122.294 +        DECLARE_BITMAP(i, 32*4);
 122.295 +        uint64_t pad[2];
 122.296 +    };
 122.297 +};
 122.298 +
 122.299 +DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs);
 122.300 +
 122.301 +struct hvm_hw_isa_irqs {
 122.302 +    /*
 122.303 +     * Virtual interrupt wires for ISA devices.
 122.304 +     * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing).
 122.305 +     */
 122.306 +    union {
 122.307 +        DECLARE_BITMAP(i, 16);
 122.308 +        uint64_t pad[1];
 122.309 +    };
 122.310 +};
 122.311 +
 122.312 +DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs);
 122.313 +
 122.314 +struct hvm_hw_pci_link {
 122.315 +    /*
 122.316 +     * PCI-ISA interrupt router.
 122.317 +     * Each PCI <device:INTx#> is 'wire-ORed' into one of four links using
 122.318 +     * the traditional 'barber's pole' mapping ((device + INTx#) & 3).
 122.319 +     * The router provides a programmable mapping from each link to a GSI.
 122.320 +     */
 122.321 +    uint8_t route[4];
 122.322 +    uint8_t pad0[4];
 122.323 +};
 122.324 +
 122.325 +DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link);
 122.326 +
 122.327 +/* 
 122.328 + *  PIT
 122.329 + */
 122.330 +
 122.331 +struct hvm_hw_pit {
 122.332 +    struct hvm_hw_pit_channel {
 122.333 +        uint32_t count; /* can be 65536 */
 122.334 +        uint16_t latched_count;
 122.335 +        uint8_t count_latched;
 122.336 +        uint8_t status_latched;
 122.337 +        uint8_t status;
 122.338 +        uint8_t read_state;
 122.339 +        uint8_t write_state;
 122.340 +        uint8_t write_latch;
 122.341 +        uint8_t rw_mode;
 122.342 +        uint8_t mode;
 122.343 +        uint8_t bcd; /* not supported */
 122.344 +        uint8_t gate; /* timer start */
 122.345 +    } channels[3];  /* 3 x 16 bytes */
 122.346 +    uint32_t speaker_data_on;
 122.347 +    uint32_t pad0;
 122.348 +};
 122.349 +
 122.350 +DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit);
 122.351 +
 122.352 +
 122.353 +/* 
 122.354 + * RTC
 122.355 + */ 
 122.356 +
 122.357 +#define RTC_CMOS_SIZE 14
 122.358 +struct hvm_hw_rtc {
 122.359 +    /* CMOS bytes */
 122.360 +    uint8_t cmos_data[RTC_CMOS_SIZE];
 122.361 +    /* Index register for 2-part operations */
 122.362 +    uint8_t cmos_index;
 122.363 +    uint8_t pad0;
 122.364 +};
 122.365 +
 122.366 +DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc);
 122.367 +
 122.368 +
 122.369 +/*
 122.370 + * HPET
 122.371 + */
 122.372 +
 122.373 +#define HPET_TIMER_NUM     3    /* 3 timers supported now */
 122.374 +struct hvm_hw_hpet {
 122.375 +    /* Memory-mapped, software visible registers */
 122.376 +    uint64_t capability;        /* capabilities */
 122.377 +    uint64_t res0;              /* reserved */
 122.378 +    uint64_t config;            /* configuration */
 122.379 +    uint64_t res1;              /* reserved */
 122.380 +    uint64_t isr;               /* interrupt status reg */
 122.381 +    uint64_t res2[25];          /* reserved */
 122.382 +    uint64_t mc64;              /* main counter */
 122.383 +    uint64_t res3;              /* reserved */
 122.384 +    struct {                    /* timers */
 122.385 +        uint64_t config;        /* configuration/cap */
 122.386 +        uint64_t cmp;           /* comparator */
 122.387 +        uint64_t fsb;           /* FSB route, not supported now */
 122.388 +        uint64_t res4;          /* reserved */
 122.389 +    } timers[HPET_TIMER_NUM];
 122.390 +    uint64_t res5[4*(24-HPET_TIMER_NUM)];  /* reserved, up to 0x3ff */
 122.391 +
 122.392 +    /* Hidden register state */
 122.393 +    uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
 122.394 +};
 122.395 +
 122.396 +DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet);
 122.397 +
 122.398 +
 122.399 +/*
 122.400 + * PM timer
 122.401 + */
 122.402 +
 122.403 +struct hvm_hw_pmtimer {
 122.404 +    uint32_t tmr_val;   /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */
 122.405 +    uint16_t pm1a_sts;  /* PM1a_EVT_BLK.PM1a_STS: status register */
 122.406 +    uint16_t pm1a_en;   /* PM1a_EVT_BLK.PM1a_EN: enable register */
 122.407 +};
 122.408 +
 122.409 +DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer);
 122.410 +
 122.411 +/* 
 122.412 + * Largest type-code in use
 122.413 + */
 122.414 +#define HVM_SAVE_CODE_MAX 13
 122.415 +
 122.416 +#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
   123.1 --- a/xen/include/public/domctl.h	Wed Oct 17 10:36:31 2007 -0600
   123.2 +++ b/xen/include/public/domctl.h	Sun Oct 21 12:10:25 2007 -0600
   123.3 @@ -380,6 +380,7 @@ typedef struct xen_domctl_arch_setup {
   123.4      uint64_aligned_t maxmem; /* Highest memory address for MDT.  */
   123.5      uint64_aligned_t xsi_va; /* Xen shared_info area virtual address.  */
   123.6      uint32_t hypercall_imm;  /* Break imm for Xen hypercalls.  */
   123.7 +    int8_t vhpt_size_log2;   /* Log2 of VHPT size. */
   123.8  #endif
   123.9  } xen_domctl_arch_setup_t;
  123.10  DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t);
   124.1 --- a/xen/include/public/hvm/params.h	Wed Oct 17 10:36:31 2007 -0600
   124.2 +++ b/xen/include/public/hvm/params.h	Sun Oct 21 12:10:25 2007 -0600
   124.3 @@ -52,7 +52,8 @@
   124.4  
   124.5  #ifdef __ia64__
   124.6  #define HVM_PARAM_NVRAM_FD     7
   124.7 -#define HVM_NR_PARAMS          8
   124.8 +#define HVM_PARAM_VHPT_SIZE    8
   124.9 +#define HVM_NR_PARAMS          9
  124.10  #else
  124.11  #define HVM_NR_PARAMS          7
  124.12  #endif
   125.1 --- a/xen/include/public/hvm/save.h	Wed Oct 17 10:36:31 2007 -0600
   125.2 +++ b/xen/include/public/hvm/save.h	Sun Oct 21 12:10:25 2007 -0600
   125.3 @@ -3,7 +3,6 @@
   125.4   *
   125.5   * Structure definitions for HVM state that is held by Xen and must
   125.6   * be saved along with the domain's memory and device-model state.
   125.7 - *
   125.8   * 
   125.9   * Copyright (c) 2007 XenSource Ltd.
  125.10   *
  125.11 @@ -67,391 +66,6 @@ struct hvm_save_descriptor {
  125.12  
  125.13  
  125.14  /* 
  125.15 - * Save/restore header: general info about the save file. 
  125.16 - */
  125.17 -
  125.18 -#define HVM_FILE_MAGIC   0x54381286
  125.19 -#define HVM_FILE_VERSION 0x00000001
  125.20 -
  125.21 -struct hvm_save_header {
  125.22 -    uint32_t magic;             /* Must be HVM_FILE_MAGIC */
  125.23 -    uint32_t version;           /* File format version */
  125.24 -    uint64_t changeset;         /* Version of Xen that saved this file */
  125.25 -    uint32_t cpuid;             /* CPUID[0x01][%eax] on the saving machine */
  125.26 -    uint32_t pad0;
  125.27 -};
  125.28 -
  125.29 -DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header);
  125.30 -
  125.31 -
  125.32 -/*
  125.33 - * Processor
  125.34 - */
  125.35 -
  125.36 -struct hvm_hw_cpu {
  125.37 -    uint8_t  fpu_regs[512];
  125.38 -
  125.39 -    uint64_t rax;
  125.40 -    uint64_t rbx;
  125.41 -    uint64_t rcx;
  125.42 -    uint64_t rdx;
  125.43 -    uint64_t rbp;
  125.44 -    uint64_t rsi;
  125.45 -    uint64_t rdi;
  125.46 -    uint64_t rsp;
  125.47 -    uint64_t r8;
  125.48 -    uint64_t r9;
  125.49 -    uint64_t r10;
  125.50 -    uint64_t r11;
  125.51 -    uint64_t r12;
  125.52 -    uint64_t r13;
  125.53 -    uint64_t r14;
  125.54 -    uint64_t r15;
  125.55 -
  125.56 -    uint64_t rip;
  125.57 -    uint64_t rflags;
  125.58 -
  125.59 -    uint64_t cr0;
  125.60 -    uint64_t cr2;
  125.61 -    uint64_t cr3;
  125.62 -    uint64_t cr4;
  125.63 -
  125.64 -    uint64_t dr0;
  125.65 -    uint64_t dr1;
  125.66 -    uint64_t dr2;
  125.67 -    uint64_t dr3;
  125.68 -    uint64_t dr6;
  125.69 -    uint64_t dr7;    
  125.70 -
  125.71 -    uint32_t cs_sel;
  125.72 -    uint32_t ds_sel;
  125.73 -    uint32_t es_sel;
  125.74 -    uint32_t fs_sel;
  125.75 -    uint32_t gs_sel;
  125.76 -    uint32_t ss_sel;
  125.77 -    uint32_t tr_sel;
  125.78 -    uint32_t ldtr_sel;
  125.79 -
  125.80 -    uint32_t cs_limit;
  125.81 -    uint32_t ds_limit;
  125.82 -    uint32_t es_limit;
  125.83 -    uint32_t fs_limit;
  125.84 -    uint32_t gs_limit;
  125.85 -    uint32_t ss_limit;
  125.86 -    uint32_t tr_limit;
  125.87 -    uint32_t ldtr_limit;
  125.88 -    uint32_t idtr_limit;
  125.89 -    uint32_t gdtr_limit;
  125.90 -
  125.91 -    uint64_t cs_base;
  125.92 -    uint64_t ds_base;
  125.93 -    uint64_t es_base;
  125.94 -    uint64_t fs_base;
  125.95 -    uint64_t gs_base;
  125.96 -    uint64_t ss_base;
  125.97 -    uint64_t tr_base;
  125.98 -    uint64_t ldtr_base;
  125.99 -    uint64_t idtr_base;
 125.100 -    uint64_t gdtr_base;
 125.101 -
 125.102 -    uint32_t cs_arbytes;
 125.103 -    uint32_t ds_arbytes;
 125.104 -    uint32_t es_arbytes;
 125.105 -    uint32_t fs_arbytes;
 125.106 -    uint32_t gs_arbytes;
 125.107 -    uint32_t ss_arbytes;
 125.108 -    uint32_t tr_arbytes;
 125.109 -    uint32_t ldtr_arbytes;
 125.110 -
 125.111 -    uint32_t sysenter_cs;
 125.112 -    uint32_t padding0;
 125.113 -
 125.114 -    uint64_t sysenter_esp;
 125.115 -    uint64_t sysenter_eip;
 125.116 -
 125.117 -    /* msr for em64t */
 125.118 -    uint64_t shadow_gs;
 125.119 -
 125.120 -    /* msr content saved/restored. */
 125.121 -    uint64_t msr_flags;
 125.122 -    uint64_t msr_lstar;
 125.123 -    uint64_t msr_star;
 125.124 -    uint64_t msr_cstar;
 125.125 -    uint64_t msr_syscall_mask;
 125.126 -    uint64_t msr_efer;
 125.127 -
 125.128 -    /* guest's idea of what rdtsc() would return */
 125.129 -    uint64_t tsc;
 125.130 -
 125.131 -    /* pending event, if any */
 125.132 -    union {
 125.133 -        uint32_t pending_event;
 125.134 -        struct {
 125.135 -            uint8_t  pending_vector:8;
 125.136 -            uint8_t  pending_type:3;
 125.137 -            uint8_t  pending_error_valid:1;
 125.138 -            uint32_t pending_reserved:19;
 125.139 -            uint8_t  pending_valid:1;
 125.140 -        };
 125.141 -    };
 125.142 -    /* error code for pending event */
 125.143 -    uint32_t error_code;
 125.144 -};
 125.145 -
 125.146 -DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu);
 125.147 -
 125.148 -
 125.149 -/*
 125.150 - * PIC
 125.151 - */
 125.152 -
 125.153 -struct hvm_hw_vpic {
 125.154 -    /* IR line bitmasks. */
 125.155 -    uint8_t irr;
 125.156 -    uint8_t imr;
 125.157 -    uint8_t isr;
 125.158 -
 125.159 -    /* Line IRx maps to IRQ irq_base+x */
 125.160 -    uint8_t irq_base;
 125.161 -
 125.162 -    /*
 125.163 -     * Where are we in ICW2-4 initialisation (0 means no init in progress)?
 125.164 -     * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1).
 125.165 -     * Bit 2: ICW1.IC4  (1 == ICW4 included in init sequence)
 125.166 -     * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence)
 125.167 -     */
 125.168 -    uint8_t init_state:4;
 125.169 -
 125.170 -    /* IR line with highest priority. */
 125.171 -    uint8_t priority_add:4;
 125.172 -
 125.173 -    /* Reads from A=0 obtain ISR or IRR? */
 125.174 -    uint8_t readsel_isr:1;
 125.175 -
 125.176 -    /* Reads perform a polling read? */
 125.177 -    uint8_t poll:1;
 125.178 -
 125.179 -    /* Automatically clear IRQs from the ISR during INTA? */
 125.180 -    uint8_t auto_eoi:1;
 125.181 -
 125.182 -    /* Automatically rotate IRQ priorities during AEOI? */
 125.183 -    uint8_t rotate_on_auto_eoi:1;
 125.184 -
 125.185 -    /* Exclude slave inputs when considering in-service IRQs? */
 125.186 -    uint8_t special_fully_nested_mode:1;
 125.187 -
 125.188 -    /* Special mask mode excludes masked IRs from AEOI and priority checks. */
 125.189 -    uint8_t special_mask_mode:1;
 125.190 -
 125.191 -    /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */
 125.192 -    uint8_t is_master:1;
 125.193 -
 125.194 -    /* Edge/trigger selection. */
 125.195 -    uint8_t elcr;
 125.196 -
 125.197 -    /* Virtual INT output. */
 125.198 -    uint8_t int_output;
 125.199 -};
 125.200 -
 125.201 -DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic);
 125.202 -
 125.203 -
 125.204 -/*
 125.205 - * IO-APIC
 125.206 - */
 125.207 -
 125.208 -#ifdef __ia64__
 125.209 -#define VIOAPIC_IS_IOSAPIC 1
 125.210 -#define VIOAPIC_NUM_PINS  24
 125.211 -#else
 125.212 -#define VIOAPIC_NUM_PINS  48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */
 125.213 -#endif
 125.214 -
 125.215 -struct hvm_hw_vioapic {
 125.216 -    uint64_t base_address;
 125.217 -    uint32_t ioregsel;
 125.218 -    uint32_t id;
 125.219 -    union vioapic_redir_entry
 125.220 -    {
 125.221 -        uint64_t bits;
 125.222 -        struct {
 125.223 -            uint8_t vector;
 125.224 -            uint8_t delivery_mode:3;
 125.225 -            uint8_t dest_mode:1;
 125.226 -            uint8_t delivery_status:1;
 125.227 -            uint8_t polarity:1;
 125.228 -            uint8_t remote_irr:1;
 125.229 -            uint8_t trig_mode:1;
 125.230 -            uint8_t mask:1;
 125.231 -            uint8_t reserve:7;
 125.232 -#if !VIOAPIC_IS_IOSAPIC
 125.233 -            uint8_t reserved[4];
 125.234 -            uint8_t dest_id;
 125.235 -#else
 125.236 -            uint8_t reserved[3];
 125.237 -            uint16_t dest_id;
 125.238 -#endif
 125.239 -        } fields;
 125.240 -    } redirtbl[VIOAPIC_NUM_PINS];
 125.241 -};
 125.242 -
 125.243 -DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic);
 125.244 -
 125.245 -
 125.246 -/*
 125.247 - * LAPIC
 125.248 - */
 125.249 -
 125.250 -struct hvm_hw_lapic {
 125.251 -    uint64_t             apic_base_msr;
 125.252 -    uint32_t             disabled; /* VLAPIC_xx_DISABLED */
 125.253 -    uint32_t             timer_divisor;
 125.254 -};
 125.255 -
 125.256 -DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic);
 125.257 -
 125.258 -struct hvm_hw_lapic_regs {
 125.259 -    /* A 4k page of register state */
 125.260 -    uint8_t  data[0x400];
 125.261 -};
 125.262 -
 125.263 -DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs);
 125.264 -
 125.265 -
 125.266 -/*
 125.267 - * IRQs
 125.268 - */
 125.269 -
 125.270 -struct hvm_hw_pci_irqs {
 125.271 -    /*
 125.272 -     * Virtual interrupt wires for a single PCI bus.
 125.273 -     * Indexed by: device*4 + INTx#.
 125.274 -     */
 125.275 -    union {
 125.276 -        DECLARE_BITMAP(i, 32*4);
 125.277 -        uint64_t pad[2];
 125.278 -    };
 125.279 -};
 125.280 -
 125.281 -DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs);
 125.282 -
 125.283 -struct hvm_hw_isa_irqs {
 125.284 -    /*
 125.285 -     * Virtual interrupt wires for ISA devices.
 125.286 -     * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing).
 125.287 -     */
 125.288 -    union {
 125.289 -        DECLARE_BITMAP(i, 16);
 125.290 -        uint64_t pad[1];
 125.291 -    };
 125.292 -};
 125.293 -
 125.294 -DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs);
 125.295 -
 125.296 -struct hvm_hw_pci_link {
 125.297 -    /*
 125.298 -     * PCI-ISA interrupt router.
 125.299 -     * Each PCI <device:INTx#> is 'wire-ORed' into one of four links using
 125.300 -     * the traditional 'barber's pole' mapping ((device + INTx#) & 3).
 125.301 -     * The router provides a programmable mapping from each link to a GSI.
 125.302 -     */
 125.303 -    uint8_t route[4];
 125.304 -    uint8_t pad0[4];
 125.305 -};
 125.306 -
 125.307 -DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link);
 125.308 -
 125.309 -/* 
 125.310 - *  PIT
 125.311 - */
 125.312 -
 125.313 -struct hvm_hw_pit {
 125.314 -    struct hvm_hw_pit_channel {
 125.315 -        uint32_t count; /* can be 65536 */
 125.316 -        uint16_t latched_count;
 125.317 -        uint8_t count_latched;
 125.318 -        uint8_t status_latched;
 125.319 -        uint8_t status;
 125.320 -        uint8_t read_state;
 125.321 -        uint8_t write_state;
 125.322 -        uint8_t write_latch;
 125.323 -        uint8_t rw_mode;
 125.324 -        uint8_t mode;
 125.325 -        uint8_t bcd; /* not supported */
 125.326 -        uint8_t gate; /* timer start */
 125.327 -    } channels[3];  /* 3 x 16 bytes */
 125.328 -    uint32_t speaker_data_on;
 125.329 -    uint32_t pad0;
 125.330 -};
 125.331 -
 125.332 -DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit);
 125.333 -
 125.334 -
 125.335 -/* 
 125.336 - * RTC
 125.337 - */ 
 125.338 -
 125.339 -#define RTC_CMOS_SIZE 14
 125.340 -struct hvm_hw_rtc {
 125.341 -    /* CMOS bytes */
 125.342 -    uint8_t cmos_data[RTC_CMOS_SIZE];
 125.343 -    /* Index register for 2-part operations */
 125.344 -    uint8_t cmos_index;
 125.345 -    uint8_t pad0;
 125.346 -};
 125.347 -
 125.348 -DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc);
 125.349 -
 125.350 -
 125.351 -/*
 125.352 - * HPET
 125.353 - */
 125.354 -
 125.355 -#define HPET_TIMER_NUM     3    /* 3 timers supported now */
 125.356 -struct hvm_hw_hpet {
 125.357 -    /* Memory-mapped, software visible registers */
 125.358 -    uint64_t capability;        /* capabilities */
 125.359 -    uint64_t res0;              /* reserved */
 125.360 -    uint64_t config;            /* configuration */
 125.361 -    uint64_t res1;              /* reserved */
 125.362 -    uint64_t isr;               /* interrupt status reg */
 125.363 -    uint64_t res2[25];          /* reserved */
 125.364 -    uint64_t mc64;              /* main counter */
 125.365 -    uint64_t res3;              /* reserved */
 125.366 -    struct {                    /* timers */
 125.367 -        uint64_t config;        /* configuration/cap */
 125.368 -        uint64_t cmp;           /* comparator */
 125.369 -        uint64_t fsb;           /* FSB route, not supported now */
 125.370 -        uint64_t res4;          /* reserved */
 125.371 -    } timers[HPET_TIMER_NUM];
 125.372 -    uint64_t res5[4*(24-HPET_TIMER_NUM)];  /* reserved, up to 0x3ff */
 125.373 -
 125.374 -    /* Hidden register state */
 125.375 -    uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
 125.376 -};
 125.377 -
 125.378 -DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet);
 125.379 -
 125.380 -
 125.381 -/*
 125.382 - * PM timer
 125.383 - */
 125.384 -
 125.385 -struct hvm_hw_pmtimer {
 125.386 -    uint32_t tmr_val;   /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */
 125.387 -    uint16_t pm1a_sts;  /* PM1a_EVT_BLK.PM1a_STS: status register */
 125.388 -    uint16_t pm1a_en;   /* PM1a_EVT_BLK.PM1a_EN: enable register */
 125.389 -};
 125.390 -
 125.391 -DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer);
 125.392 -
 125.393 -/* 
 125.394 - * Largest type-code in use
 125.395 - */
 125.396 -#define HVM_SAVE_CODE_MAX 13
 125.397 -
 125.398 -
 125.399 -/* 
 125.400   * The series of save records is teminated by a zero-type, zero-length 
 125.401   * descriptor.
 125.402   */
 125.403 @@ -459,4 +73,10 @@ DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struc
 125.404  struct hvm_save_end {};
 125.405  DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end);
 125.406  
 125.407 +#if defined(__i386__) || defined(__x86_64__)
 125.408 +#include "../arch-x86/hvm/save.h"
 125.409 +#else
 125.410 +#error "unsupported architecture"
 125.411 +#endif
 125.412 +
 125.413  #endif /* __XEN_PUBLIC_HVM_SAVE_H__ */
   126.1 --- a/xen/include/public/sysctl.h	Wed Oct 17 10:36:31 2007 -0600
   126.2 +++ b/xen/include/public/sysctl.h	Sun Oct 21 12:10:25 2007 -0600
   126.3 @@ -34,7 +34,7 @@
   126.4  #include "xen.h"
   126.5  #include "domctl.h"
   126.6  
   126.7 -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000004
   126.8 +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000005
   126.9  
  126.10  /*
  126.11   * Read console content from Xen buffer ring.
  126.12 @@ -79,7 +79,7 @@ struct xen_sysctl_physinfo {
  126.13      /* IN variables. */
  126.14      uint32_t threads_per_core;
  126.15      uint32_t cores_per_socket;
  126.16 -    uint32_t sockets_per_node;
  126.17 +    uint32_t nr_cpus;
  126.18      uint32_t nr_nodes;
  126.19      uint32_t cpu_khz;
  126.20      uint64_aligned_t total_pages;
   127.1 --- a/xen/include/public/trace.h	Wed Oct 17 10:36:31 2007 -0600
   127.2 +++ b/xen/include/public/trace.h	Sun Oct 21 12:10:25 2007 -0600
   127.3 @@ -36,6 +36,7 @@
   127.4  #define TRC_DOM0OP   0x0004f000    /* Xen DOM0 operation trace */
   127.5  #define TRC_HVM      0x0008f000    /* Xen HVM trace            */
   127.6  #define TRC_MEM      0x0010f000    /* Xen memory trace         */
   127.7 +#define TRC_PV       0x0020f000    /* Xen PV traces            */
   127.8  #define TRC_ALL      0x0ffff000
   127.9  #define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
  127.10  #define TRC_HD_CYCLE_FLAG (1UL<<31)
  127.11 @@ -74,6 +75,20 @@
  127.12  #define TRC_MEM_PAGE_GRANT_UNMAP    (TRC_MEM + 2)
  127.13  #define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3)
  127.14  
  127.15 +#define TRC_PV_HYPERCALL             (TRC_PV +  1)
  127.16 +#define TRC_PV_TRAP                  (TRC_PV +  3)
  127.17 +#define TRC_PV_PAGE_FAULT            (TRC_PV +  4)
  127.18 +#define TRC_PV_FORCED_INVALID_OP     (TRC_PV +  5)
  127.19 +#define TRC_PV_EMULATE_PRIVOP        (TRC_PV +  6)
  127.20 +#define TRC_PV_EMULATE_4GB           (TRC_PV +  7)
  127.21 +#define TRC_PV_MATH_STATE_RESTORE    (TRC_PV +  8)
  127.22 +#define TRC_PV_PAGING_FIXUP          (TRC_PV +  9)
  127.23 +#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV + 10)
  127.24 +#define TRC_PV_PTWR_EMULATION        (TRC_PV + 11)
  127.25 +#define TRC_PV_PTWR_EMULATION_PAE    (TRC_PV + 12)
  127.26 +  /* Indicates that addresses in trace record are 64 bits */
  127.27 +#define TRC_PV_64_FLAG               (0x100) 
  127.28 +
  127.29  /* trace events per subclass */
  127.30  #define TRC_HVM_VMENTRY         (TRC_HVM_ENTRYEXIT + 0x01)
  127.31  #define TRC_HVM_VMEXIT          (TRC_HVM_ENTRYEXIT + 0x02)
   128.1 --- a/xen/include/public/xen-compat.h	Wed Oct 17 10:36:31 2007 -0600
   128.2 +++ b/xen/include/public/xen-compat.h	Sun Oct 21 12:10:25 2007 -0600
   128.3 @@ -27,7 +27,7 @@
   128.4  #ifndef __XEN_PUBLIC_XEN_COMPAT_H__
   128.5  #define __XEN_PUBLIC_XEN_COMPAT_H__
   128.6  
   128.7 -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030205
   128.8 +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030206
   128.9  
  128.10  #if defined(__XEN__) || defined(__XEN_TOOLS__)
  128.11  /* Xen is built with matching headers and implements the latest interface. */
   129.1 --- a/xen/include/public/xen.h	Wed Oct 17 10:36:31 2007 -0600
   129.2 +++ b/xen/include/public/xen.h	Sun Oct 21 12:10:25 2007 -0600
   129.3 @@ -565,6 +565,12 @@ typedef struct dom0_vga_console_info {
   129.4              uint8_t  green_pos, green_size;
   129.5              uint8_t  blue_pos, blue_size;
   129.6              uint8_t  rsvd_pos, rsvd_size;
   129.7 +#if __XEN_INTERFACE_VERSION__ >= 0x00030206
   129.8 +            /* VESA capabilities (offset 0xa, VESA command 0x4f00). */
   129.9 +            uint32_t gbl_caps;
  129.10 +            /* Mode attributes (offset 0x0, VESA command 0x4f01). */
  129.11 +            uint16_t mode_attrs;
  129.12 +#endif
  129.13          } vesa_lfb;
  129.14      } u;
  129.15  } dom0_vga_console_info_t;
   130.1 --- a/xen/include/public/xsm/flask_op.h	Wed Oct 17 10:36:31 2007 -0600
   130.2 +++ b/xen/include/public/xsm/flask_op.h	Sun Oct 21 12:10:25 2007 -0600
   130.3 @@ -40,6 +40,4 @@ typedef struct flask_op {
   130.4  
   130.5  DEFINE_XEN_GUEST_HANDLE(flask_op_t);
   130.6  
   130.7 -long do_flask_op (XEN_GUEST_HANDLE(xsm_op_t) u_flask_op);
   130.8 -
   130.9  #endif
   131.1 --- a/xen/include/xen/domain.h	Wed Oct 17 10:36:31 2007 -0600
   131.2 +++ b/xen/include/xen/domain.h	Sun Oct 21 12:10:25 2007 -0600
   131.3 @@ -2,6 +2,8 @@
   131.4  #ifndef __XEN_DOMAIN_H__
   131.5  #define __XEN_DOMAIN_H__
   131.6  
   131.7 +#include <public/xen.h>
   131.8 +
   131.9  typedef union {
  131.10      struct vcpu_guest_context *nat;
  131.11      struct compat_vcpu_guest_context *cmp;
   132.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   132.2 +++ b/xen/include/xen/hvm/save.h	Sun Oct 21 12:10:25 2007 -0600
   132.3 @@ -0,0 +1,161 @@
   132.4 +/*
   132.5 + * save.h: HVM support routines for save/restore
   132.6 + *
   132.7 + * This program is free software; you can redistribute it and/or modify it
   132.8 + * under the terms and conditions of the GNU General Public License,
   132.9 + * version 2, as published by the Free Software Foundation.
  132.10 + *
  132.11 + * This program is distributed in the hope it will be useful, but WITHOUT
  132.12 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  132.13 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  132.14 + * more details.
  132.15 + *
  132.16 + * You should have received a copy of the GNU General Public License along with
  132.17 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  132.18 + * Place - Suite 330, Boston, MA 02111-1307 USA.
  132.19 + */
  132.20 +
  132.21 +#ifndef __XEN_HVM_SAVE_H__
  132.22 +#define __XEN_HVM_SAVE_H__
  132.23 +
  132.24 +#include <public/hvm/save.h>
  132.25 +#include <asm/types.h>
  132.26 +
  132.27 +/* Marshalling and unmarshalling uses a buffer with size and cursor. */
  132.28 +typedef struct hvm_domain_context {
  132.29 +    uint32_t cur;
  132.30 +    uint32_t size;
  132.31 +    uint8_t *data;
  132.32 +} hvm_domain_context_t;
  132.33 +
  132.34 +/* Marshalling an entry: check space and fill in the header */
  132.35 +static inline int _hvm_init_entry(struct hvm_domain_context *h,
  132.36 +                                  uint16_t tc, uint16_t inst, uint32_t len)
  132.37 +{
  132.38 +    struct hvm_save_descriptor *d 
  132.39 +        = (struct hvm_save_descriptor *)&h->data[h->cur];
  132.40 +    if ( h->size - h->cur < len + sizeof (*d) )
  132.41 +    {
  132.42 +        gdprintk(XENLOG_WARNING,
  132.43 +                 "HVM save: no room for %"PRIu32" + %u bytes "
  132.44 +                 "for typecode %"PRIu16"\n",
  132.45 +                 len, (unsigned) sizeof (*d), tc);
  132.46 +        return -1;
  132.47 +    }
  132.48 +    d->typecode = tc;
  132.49 +    d->instance = inst;
  132.50 +    d->length = len;
  132.51 +    h->cur += sizeof (*d);
  132.52 +    return 0;
  132.53 +}
  132.54 +
  132.55 +/* Marshalling: copy the contents in a type-safe way */
  132.56 +#define _hvm_write_entry(_x, _h, _src) do {                     \
  132.57 +    *(HVM_SAVE_TYPE(_x) *)(&(_h)->data[(_h)->cur]) = *(_src);   \
  132.58 +    (_h)->cur += HVM_SAVE_LENGTH(_x);                           \
  132.59 +} while (0)
  132.60 +
  132.61 +/* Marshalling: init and copy; evaluates to zero on success */
  132.62 +#define hvm_save_entry(_x, _inst, _h, _src) ({          \
  132.63 +    int r;                                              \
  132.64 +    r = _hvm_init_entry((_h), HVM_SAVE_CODE(_x),        \
  132.65 +                        (_inst), HVM_SAVE_LENGTH(_x));  \
  132.66 +    if ( r == 0 )                                       \
  132.67 +        _hvm_write_entry(_x, (_h), (_src));             \
  132.68 +    r; })
  132.69 +
  132.70 +/* Unmarshalling: test an entry's size and typecode and record the instance */
  132.71 +static inline int _hvm_check_entry(struct hvm_domain_context *h, 
  132.72 +                                   uint16_t type, uint32_t len)
  132.73 +{
  132.74 +    struct hvm_save_descriptor *d 
  132.75 +        = (struct hvm_save_descriptor *)&h->data[h->cur];
  132.76 +    if ( len + sizeof (*d) > h->size - h->cur)
  132.77 +    {
  132.78 +        gdprintk(XENLOG_WARNING, 
  132.79 +                 "HVM restore: not enough data left to read %u bytes "
  132.80 +                 "for type %u\n", len, type);
  132.81 +        return -1;
  132.82 +    }    
  132.83 +    if ( type != d->typecode || len != d->length )
  132.84 +    {
  132.85 +        gdprintk(XENLOG_WARNING, 
  132.86 +                 "HVM restore mismatch: expected type %u length %u, "
  132.87 +                 "saw type %u length %u\n", type, len, d->typecode, d->length);
  132.88 +        return -1;
  132.89 +    }
  132.90 +    h->cur += sizeof (*d);
  132.91 +    return 0;
  132.92 +}
  132.93 +
  132.94 +/* Unmarshalling: copy the contents in a type-safe way */
  132.95 +#define _hvm_read_entry(_x, _h, _dst) do {                      \
  132.96 +    *(_dst) = *(HVM_SAVE_TYPE(_x) *) (&(_h)->data[(_h)->cur]);  \
  132.97 +    (_h)->cur += HVM_SAVE_LENGTH(_x);                           \
  132.98 +} while (0)
  132.99 +
 132.100 +/* Unmarshalling: check, then copy. Evaluates to zero on success. */
 132.101 +#define hvm_load_entry(_x, _h, _dst) ({                                 \
 132.102 +    int r;                                                              \
 132.103 +    r = _hvm_check_entry((_h), HVM_SAVE_CODE(_x), HVM_SAVE_LENGTH(_x)); \
 132.104 +    if ( r == 0 )                                                       \
 132.105 +        _hvm_read_entry(_x, (_h), (_dst));                              \
 132.106 +    r; })
 132.107 +
 132.108 +/* Unmarshalling: what is the instance ID of the next entry? */
 132.109 +static inline uint16_t hvm_load_instance(struct hvm_domain_context *h)
 132.110 +{
 132.111 +    struct hvm_save_descriptor *d 
 132.112 +        = (struct hvm_save_descriptor *)&h->data[h->cur];
 132.113 +    return d->instance;
 132.114 +}
 132.115 +
 132.116 +/* Handler types for different types of save-file entry. 
 132.117 + * The save handler may save multiple instances of a type into the buffer;
 132.118 + * the load handler will be called once for each instance found when
 132.119 + * restoring.  Both return non-zero on error. */
 132.120 +typedef int (*hvm_save_handler) (struct domain *d, 
 132.121 +                                 hvm_domain_context_t *h);
 132.122 +typedef int (*hvm_load_handler) (struct domain *d,
 132.123 +                                 hvm_domain_context_t *h);
 132.124 +
 132.125 +/* Init-time function to declare a pair of handlers for a type,
 132.126 + * and the maximum buffer space needed to save this type of state */
 132.127 +void hvm_register_savevm(uint16_t typecode,
 132.128 +                         const char *name, 
 132.129 +                         hvm_save_handler save_state,
 132.130 +                         hvm_load_handler load_state,
 132.131 +                         size_t size, int kind);
 132.132 +
 132.133 +/* The space needed for saving can be per-domain or per-vcpu: */
 132.134 +#define HVMSR_PER_DOM  0
 132.135 +#define HVMSR_PER_VCPU 1
 132.136 +
 132.137 +/* Syntactic sugar around that function: specify the max number of
 132.138 + * saves, and this calculates the size of buffer needed */
 132.139 +#define HVM_REGISTER_SAVE_RESTORE(_x, _save, _load, _num, _k)             \
 132.140 +static int __hvm_register_##_x##_save_and_restore(void)                   \
 132.141 +{                                                                         \
 132.142 +    hvm_register_savevm(HVM_SAVE_CODE(_x),                                \
 132.143 +                        #_x,                                              \
 132.144 +                        &_save,                                           \
 132.145 +                        &_load,                                           \
 132.146 +                        (_num) * (HVM_SAVE_LENGTH(_x)                     \
 132.147 +                                  + sizeof (struct hvm_save_descriptor)), \
 132.148 +                        _k);                                              \
 132.149 +    return 0;                                                             \
 132.150 +}                                                                         \
 132.151 +__initcall(__hvm_register_##_x##_save_and_restore);
 132.152 +
 132.153 +
 132.154 +/* Entry points for saving and restoring HVM domain state */
 132.155 +size_t hvm_save_size(struct domain *d);
 132.156 +int hvm_save(struct domain *d, hvm_domain_context_t *h);
 132.157 +int hvm_load(struct domain *d, hvm_domain_context_t *h);
 132.158 +
 132.159 +/* Arch-specific definitions. */
 132.160 +struct hvm_save_header;
 132.161 +void arch_hvm_save(struct hvm_save_header *hdr);
 132.162 +int arch_hvm_load(struct hvm_save_header *hdr);
 132.163 +
 132.164 +#endif /* __XEN_HVM_SAVE_H__ */
   133.1 --- a/xen/include/xen/trace.h	Wed Oct 17 10:36:31 2007 -0600
   133.2 +++ b/xen/include/xen/trace.h	Sun Oct 21 12:10:25 2007 -0600
   133.3 @@ -21,11 +21,12 @@
   133.4  #ifndef __XEN_TRACE_H__
   133.5  #define __XEN_TRACE_H__
   133.6  
   133.7 +extern int tb_init_done;
   133.8 +
   133.9  #include <xen/config.h>
  133.10  #include <public/sysctl.h>
  133.11  #include <public/trace.h>
  133.12 -
  133.13 -extern int tb_init_done;
  133.14 +#include <asm/trace.h>
  133.15  
  133.16  /* Used to initialise trace buffer functionality */
  133.17  void init_trace_bufs(void);
   134.1 --- a/xen/tools/compat-build-header.py	Wed Oct 17 10:36:31 2007 -0600
   134.2 +++ b/xen/tools/compat-build-header.py	Sun Oct 21 12:10:25 2007 -0600
   134.3 @@ -1,4 +1,4 @@
   134.4 -#!/usr/bin/python
   134.5 +#!/usr/bin/env python
   134.6  
   134.7  import re,sys
   134.8  
   135.1 --- a/xen/tools/compat-build-source.py	Wed Oct 17 10:36:31 2007 -0600
   135.2 +++ b/xen/tools/compat-build-source.py	Sun Oct 21 12:10:25 2007 -0600
   135.3 @@ -1,4 +1,4 @@
   135.4 -#!/usr/bin/python
   135.5 +#!/usr/bin/env python
   135.6  
   135.7  import re,sys
   135.8  
   136.1 --- a/xen/tools/get-fields.sh	Wed Oct 17 10:36:31 2007 -0600
   136.2 +++ b/xen/tools/get-fields.sh	Sun Oct 21 12:10:25 2007 -0600
   136.3 @@ -1,11 +1,12 @@
   136.4 -#!/bin/bash
   136.5 +#!/bin/sh
   136.6  test -n "$1" -a -n "$2" -a -n "$3"
   136.7  set -ef
   136.8  
   136.9  SED=sed
  136.10 -[ -x /usr/xpg4/bin/sed ] && SED=/usr/xpg4/bin/sed
  136.11 +test -x /usr/xpg4/bin/sed && SED=/usr/xpg4/bin/sed
  136.12  
  136.13 -get_fields() {
  136.14 +get_fields ()
  136.15 +{
  136.16  	local level=1 aggr=0 name= fields=
  136.17  	for token in $2
  136.18  	do
  136.19 @@ -24,7 +25,7 @@ get_fields() {
  136.20  				return 0
  136.21  			fi
  136.22  			;;
  136.23 -		[[:alpha:]_]*)
  136.24 +		[a-zA-Z_]*)
  136.25  			test $aggr = 0 -o -n "$name" || name="$token"
  136.26  			;;
  136.27  		esac
  136.28 @@ -32,7 +33,8 @@ get_fields() {
  136.29  	done
  136.30  }
  136.31  
  136.32 -build_enums() {
  136.33 +build_enums ()
  136.34 +{
  136.35  	local level=1 kind= fields= members= named= id= token
  136.36  	for token in $2
  136.37  	do
  136.38 @@ -64,7 +66,7 @@ build_enums() {
  136.39  				named='?'
  136.40  			fi
  136.41  			;;
  136.42 -		[[:alpha:]]*)
  136.43 +		[a-zA-Z]*)
  136.44  			id=$token
  136.45  			if [ -n "$named" -a -n "${kind#*;}" ]
  136.46  			then
  136.47 @@ -85,7 +87,8 @@ build_enums() {
  136.48  	done
  136.49  }
  136.50  
  136.51 -handle_field() {
  136.52 +handle_field ()
  136.53 +{
  136.54  	if [ -z "$5" ]
  136.55  	then
  136.56  		echo " \\"
  136.57 @@ -161,7 +164,7 @@ for line in sys.stdin.readlines():
  136.58  					array_type=$token
  136.59  				fi
  136.60  				;;
  136.61 -			[[:alpha:]]*)
  136.62 +			[a-zA-Z]*)
  136.63  				id=$token
  136.64  				;;
  136.65  			[\,\;])
  136.66 @@ -202,13 +205,15 @@ for line in sys.stdin.readlines():
  136.67  	fi
  136.68  }
  136.69  
  136.70 -copy_array() {
  136.71 +copy_array ()
  136.72 +{
  136.73  	echo " \\"
  136.74  	echo "${1}if ((_d_)->$2 != (_s_)->$2) \\"
  136.75  	echo -n "$1    memcpy((_d_)->$2, (_s_)->$2, sizeof((_d_)->$2));"
  136.76  }
  136.77  
  136.78 -handle_array() {
  136.79 +handle_array ()
  136.80 +{
  136.81  	local i="i$(echo $4 | $SED 's,[^;], ,g' | wc -w | $SED 's,[[:space:]]*,,g')"
  136.82  	echo " \\"
  136.83  	echo "$1{ \\"
  136.84 @@ -225,7 +230,8 @@ handle_array() {
  136.85  	echo -n "$1}"
  136.86  }
  136.87  
  136.88 -build_body() {
  136.89 +build_body ()
  136.90 +{
  136.91  	echo
  136.92  	echo -n "#define XLAT_$1(_d_, _s_) do {"
  136.93  	local level=1 fields= id= array= arrlvl=1 array_type= type= token
  136.94 @@ -270,7 +276,7 @@ build_body() {
  136.95  				array_type=$token
  136.96  			fi
  136.97  			;;
  136.98 -		[[:alpha:]_]*)
  136.99 +		[a-zA-Z_]*)
 136.100  			if [ -n "$array" ]
 136.101  			then
 136.102  				array="$array $token"
 136.103 @@ -308,7 +314,8 @@ build_body() {
 136.104  	echo ""
 136.105  }
 136.106  
 136.107 -check_field() {
 136.108 +check_field ()
 136.109 +{
 136.110  	if [ -z "$(echo "$4" | $SED 's,[^{}],,g')" ]
 136.111  	then
 136.112  		echo "; \\"
 136.113 @@ -320,7 +327,7 @@ check_field() {
 136.114  				case $n in
 136.115  				struct|union)
 136.116  					;;
 136.117 -				[[:alpha:]_]*)
 136.118 +				[a-zA-Z_]*)
 136.119  					echo -n "    CHECK_$n"
 136.120  					break
 136.121  					;;
 136.122 @@ -350,7 +357,7 @@ check_field() {
 136.123  			"}")
 136.124  				level=$(expr $level - 1) id=
 136.125  				;;
 136.126 -			[[:alpha:]]*)
 136.127 +			[a-zA-Z]*)
 136.128  				id=$token
 136.129  				;;
 136.130  			[\,\;])
 136.131 @@ -366,7 +373,8 @@ check_field() {
 136.132  	fi
 136.133  }
 136.134  
 136.135 -build_check() {
 136.136 +build_check ()
 136.137 +{
 136.138  	echo
 136.139  	echo "#define CHECK_$1 \\"
 136.140  	local level=1 fields= kind= id= arrlvl=1 token
 136.141 @@ -395,7 +403,7 @@ build_check() {
 136.142  		"]")
 136.143  			arrlvl=$(expr $arrlvl - 1)
 136.144  			;;
 136.145 -		[[:alpha:]_]*)
 136.146 +		[a-zA-Z_]*)
 136.147  			test $level != 2 -o $arrlvl != 1 || id=$token
 136.148  			;;
 136.149  		[\,\;])
   137.1 --- a/xen/xsm/flask/hooks.c	Wed Oct 17 10:36:31 2007 -0600
   137.2 +++ b/xen/xsm/flask/hooks.c	Sun Oct 21 12:10:25 2007 -0600
   137.3 @@ -1042,6 +1042,8 @@ static int flask_add_to_physmap(struct d
   137.4  }
   137.5  #endif
   137.6  
   137.7 +long do_flask_op(XEN_GUEST_HANDLE(xsm_op_t) u_flask_op);
   137.8 +
   137.9  static struct xsm_operations flask_ops = {
  137.10      .security_domaininfo = flask_security_domaininfo,
  137.11      .setvcpucontext = flask_setvcpucontext,