ia64/xen-unstable

changeset 17766:f1508348ffab

merge with xen-unstable.hg
author Isaku Yamahata <yamahata@valinux.co.jp>
date Mon Jun 02 11:35:39 2008 +0900 (2008-06-02)
parents d2a239224cb2 85fa199b4b7b
children 8f2979e64af5 73a1daa9715f
files extras/mini-os/main-caml.c tools/examples/xend-config-xenapi.sxp tools/examples/xm-config-xenapi.xml
line diff
     1.1 --- a/extras/mini-os/Config.mk	Mon Jun 02 11:35:02 2008 +0900
     1.2 +++ b/extras/mini-os/Config.mk	Mon Jun 02 11:35:39 2008 +0900
     1.3 @@ -41,10 +41,7 @@ include $(MINI-OS_ROOT)/$(TARGET_ARCH_DI
     1.4  extra_incl := $(foreach dir,$(EXTRA_INC),-I$(MINI-OS_ROOT)/include/$(dir))
     1.5  
     1.6  DEF_CPPFLAGS += -I$(MINI-OS_ROOT)/include
     1.7 -
     1.8 -ifeq ($(stubdom),y)
     1.9 -DEF_CPPFLAGS += -DCONFIG_STUBDOM
    1.10 -endif
    1.11 +DEF_CPPFLAGS += -D__MINIOS__
    1.12  
    1.13  ifeq ($(libc),y)
    1.14  DEF_CPPFLAGS += -DHAVE_LIBC
    1.15 @@ -58,11 +55,3 @@ DEF_CPPFLAGS += -DHAVE_LWIP
    1.16  DEF_CPPFLAGS += -I$(LWIPDIR)/src/include
    1.17  DEF_CPPFLAGS += -I$(LWIPDIR)/src/include/ipv4
    1.18  endif
    1.19 -
    1.20 -ifneq ($(QEMUDIR),)
    1.21 -qemu=y
    1.22 -endif
    1.23 -
    1.24 -ifneq ($(CAMLDIR),)
    1.25 -caml=y
    1.26 -endif
     2.1 --- a/extras/mini-os/Makefile	Mon Jun 02 11:35:02 2008 +0900
     2.2 +++ b/extras/mini-os/Makefile	Mon Jun 02 11:35:39 2008 +0900
     2.3 @@ -73,44 +73,25 @@ lwip.a: $(LWO)
     2.4  OBJS += lwip.a
     2.5  endif
     2.6  
     2.7 -OBJS := $(filter-out lwip%.o $(LWO), $(OBJS))
     2.8 -
     2.9 -ifeq ($(caml),y)
    2.10 -CAMLLIB = $(shell ocamlc -where)
    2.11 -APP_OBJS += main-caml.o
    2.12 -APP_OBJS += $(CAMLDIR)/caml.o
    2.13 -APP_OBJS += $(CAMLLIB)/libasmrun.a
    2.14 -CFLAGS += -I$(CAMLLIB)
    2.15 -APP_LDLIBS += -lm
    2.16 -endif
    2.17 -OBJS := $(filter-out main-caml.o, $(OBJS))
    2.18 -
    2.19 -ifeq ($(qemu),y)
    2.20 -APP_OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a $(QEMUDIR)/i386-dm-stubdom/libqemu.a
    2.21 -CFLAGS += -DCONFIG_QEMU
    2.22 -endif
    2.23 -
    2.24 -ifneq ($(CDIR),)
    2.25 -APP_OBJS += $(CDIR)/main.a
    2.26 -APP_LDLIBS += 
    2.27 -endif
    2.28 +OBJS := $(filter-out main.o lwip%.o $(LWO), $(OBJS))
    2.29  
    2.30  ifeq ($(libc),y)
    2.31 -LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -lxenctrl -lxenguest
    2.32 +APP_LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -whole-archive -lxenguest -lxenctrl -no-whole-archive
    2.33  APP_LDLIBS += -lpci
    2.34  APP_LDLIBS += -lz
    2.35 +APP_LDLIBS += -lm
    2.36  LDLIBS += -lc
    2.37  endif
    2.38  
    2.39 -ifneq ($(caml)-$(qemu)-$(CDIR)-$(lwip),---y)
    2.40 +ifneq ($(APP_OBJS)-$(lwip),-y)
    2.41  OBJS := $(filter-out daytime.o, $(OBJS))
    2.42  endif
    2.43  
    2.44 -app.o: $(APP_OBJS) app.lds
    2.45 -	$(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined main -o $@
    2.46 +$(TARGET)_app.o: $(APP_OBJS) app.lds
    2.47 +	$(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined app_main -o $@
    2.48  
    2.49 -$(TARGET): links $(OBJS) app.o arch_lib
    2.50 -	$(LD) -r $(LDFLAGS) $(HEAD_OBJ) app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o
    2.51 +$(TARGET): links $(OBJS) $(TARGET)_app.o arch_lib
    2.52 +	$(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(TARGET)_app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o
    2.53  	$(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o
    2.54  	$(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@
    2.55  	gzip -f -9 -c $@ >$@.gz
     3.1 --- a/extras/mini-os/arch/ia64/minios-ia64.lds	Mon Jun 02 11:35:02 2008 +0900
     3.2 +++ b/extras/mini-os/arch/ia64/minios-ia64.lds	Mon Jun 02 11:35:39 2008 +0900
     3.3 @@ -52,6 +52,23 @@ SECTIONS
     3.4    .fini_array     : { *(.fini_array) }
     3.5    PROVIDE (__fini_array_end = .);
     3.6  
     3.7 +  .ctors : {
     3.8 +        __CTOR_LIST__ = .;
     3.9 +        QUAD((__CTOR_END__ - __CTOR_LIST__) / 8 - 2)
    3.10 +        *(SORT_BY_NAME(.ctors))
    3.11 +	SORT_BY_NAME(CONSTRUCTORS)
    3.12 +        QUAD(0)
    3.13 +        __CTOR_END__ = .;
    3.14 +        }
    3.15 +
    3.16 +  .dtors : {
    3.17 +        __DTOR_LIST__ = .;
    3.18 +        QUAD((__DTOR_END__ - __DTOR_LIST__) / 8 - 2)
    3.19 +        *(SORT_BY_NAME(.dtors))
    3.20 +        QUAD(0)
    3.21 +        __DTOR_END__ = .;
    3.22 +        }
    3.23 +
    3.24    .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - (((5<<(61))+0x100000000) - (1 << 20)))
    3.25    { *(.IA_64.unwind_info) }
    3.26  
     4.1 --- a/extras/mini-os/arch/ia64/mm.c	Mon Jun 02 11:35:02 2008 +0900
     4.2 +++ b/extras/mini-os/arch/ia64/mm.c	Mon Jun 02 11:35:39 2008 +0900
     4.3 @@ -131,6 +131,14 @@ arch_init_demand_mapping_area(unsigned l
     4.4  }
     4.5  
     4.6  /* Helper function used in gnttab.c. */
     4.7 +void do_map_frames(unsigned long addr,
     4.8 +        unsigned long *f, unsigned long n, unsigned long stride,
     4.9 +	unsigned long increment, domid_t id, int may_fail, unsigned long prot)
    4.10 +{
    4.11 +	/* TODO */
    4.12 +	ASSERT(0);
    4.13 +}
    4.14 +
    4.15  void*
    4.16  map_frames_ex(unsigned long* frames, unsigned long n, unsigned long stride,
    4.17  	unsigned long increment, unsigned long alignment, domid_t id,
     5.1 --- a/extras/mini-os/arch/x86/minios-x86_32.lds	Mon Jun 02 11:35:02 2008 +0900
     5.2 +++ b/extras/mini-os/arch/x86/minios-x86_32.lds	Mon Jun 02 11:35:39 2008 +0900
     5.3 @@ -28,9 +28,25 @@ SECTIONS
     5.4    .fini_array     : { *(.fini_array) }
     5.5    PROVIDE (__fini_array_end = .);
     5.6  
     5.7 +  .ctors : {
     5.8 +        __CTOR_LIST__ = .;
     5.9 +        LONG((__CTOR_END__ - __CTOR_LIST__) / 4 - 2)
    5.10 +        *(SORT_BY_NAME(.ctors))
    5.11 +	SORT_BY_NAME(CONSTRUCTORS)
    5.12 +        LONG(0)
    5.13 +        __CTOR_END__ = .;
    5.14 +        }
    5.15 +
    5.16 +  .dtors : {
    5.17 +        __DTOR_LIST__ = .;
    5.18 +        LONG((__DTOR_END__ - __DTOR_LIST__) / 4 - 2)
    5.19 +        *(SORT_BY_NAME(.dtors))
    5.20 +        LONG(0)
    5.21 +        __DTOR_END__ = .;
    5.22 +        }
    5.23 +
    5.24    .data : {			/* Data */
    5.25  	*(.data)
    5.26 -	CONSTRUCTORS
    5.27  	}
    5.28  
    5.29    _edata = .;			/* End of data section */
     6.1 --- a/extras/mini-os/arch/x86/minios-x86_64.lds	Mon Jun 02 11:35:02 2008 +0900
     6.2 +++ b/extras/mini-os/arch/x86/minios-x86_64.lds	Mon Jun 02 11:35:39 2008 +0900
     6.3 @@ -28,9 +28,25 @@ SECTIONS
     6.4    .fini_array     : { *(.fini_array) }
     6.5    PROVIDE (__fini_array_end = .);
     6.6  
     6.7 +  .ctors : {
     6.8 +        __CTOR_LIST__ = .;
     6.9 +        QUAD((__CTOR_END__ - __CTOR_LIST__) / 8 - 2)
    6.10 +        *(SORT_BY_NAME(.ctors))
    6.11 +	SORT_BY_NAME(CONSTRUCTORS)
    6.12 +        QUAD(0)
    6.13 +        __CTOR_END__ = .;
    6.14 +        }
    6.15 +
    6.16 +  .dtors : {
    6.17 +        __DTOR_LIST__ = .;
    6.18 +        QUAD((__DTOR_END__ - __DTOR_LIST__) / 8 - 2)
    6.19 +        *(SORT_BY_NAME(.dtors))
    6.20 +        QUAD(0)
    6.21 +        __DTOR_END__ = .;
    6.22 +        }
    6.23 +
    6.24    .data : {			/* Data */
    6.25  	*(.data)
    6.26 -	CONSTRUCTORS
    6.27  	}
    6.28  
    6.29    _edata = .;			/* End of data section */
     7.1 --- a/extras/mini-os/arch/x86/mm.c	Mon Jun 02 11:35:02 2008 +0900
     7.2 +++ b/extras/mini-os/arch/x86/mm.c	Mon Jun 02 11:35:39 2008 +0900
     7.3 @@ -59,11 +59,10 @@ void new_pt_frame(unsigned long *pt_pfn,
     7.4  {   
     7.5      pgentry_t *tab = (pgentry_t *)start_info.pt_base;
     7.6      unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn); 
     7.7 -    unsigned long prot_e, prot_t, pincmd;
     7.8 +    unsigned long prot_e, prot_t;
     7.9      mmu_update_t mmu_updates[1];
    7.10 -    struct mmuext_op pin_request;
    7.11      
    7.12 -    prot_e = prot_t = pincmd = 0;
    7.13 +    prot_e = prot_t = 0;
    7.14      DEBUG("Allocating new L%d pt frame for pt_pfn=%lx, "
    7.15             "prev_l_mfn=%lx, offset=%lx", 
    7.16             level, *pt_pfn, prev_l_mfn, offset);
    7.17 @@ -77,18 +76,15 @@ void new_pt_frame(unsigned long *pt_pfn,
    7.18      case L1_FRAME:
    7.19           prot_e = L1_PROT;
    7.20           prot_t = L2_PROT;
    7.21 -         pincmd = MMUEXT_PIN_L1_TABLE;
    7.22           break;
    7.23      case L2_FRAME:
    7.24           prot_e = L2_PROT;
    7.25           prot_t = L3_PROT;
    7.26 -         pincmd = MMUEXT_PIN_L2_TABLE;
    7.27           break;
    7.28  #if defined(__x86_64__)
    7.29      case L3_FRAME:
    7.30           prot_e = L3_PROT;
    7.31           prot_t = L4_PROT;
    7.32 -         pincmd = MMUEXT_PIN_L3_TABLE;
    7.33           break;
    7.34  #endif
    7.35      default:
    7.36 @@ -113,15 +109,6 @@ void new_pt_frame(unsigned long *pt_pfn,
    7.37           do_exit();
    7.38      }
    7.39                          
    7.40 -    /* Pin the page to provide correct protection */
    7.41 -    pin_request.cmd = pincmd;
    7.42 -    pin_request.arg1.mfn = pfn_to_mfn(*pt_pfn);
    7.43 -    if(HYPERVISOR_mmuext_op(&pin_request, 1, NULL, DOMID_SELF) < 0)
    7.44 -    {
    7.45 -        printk("ERROR: pinning failed\n");
    7.46 -        do_exit();
    7.47 -    }
    7.48 -
    7.49      /* Now fill the new page table page with entries.
    7.50         Update the page directory as well. */
    7.51      mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset;
     8.1 --- a/extras/mini-os/hypervisor.c	Mon Jun 02 11:35:02 2008 +0900
     8.2 +++ b/extras/mini-os/hypervisor.c	Mon Jun 02 11:35:39 2008 +0900
     8.3 @@ -55,12 +55,12 @@ void do_hypervisor_callback(struct pt_re
     8.4      while ( l1 != 0 )
     8.5      {
     8.6          l1i = __ffs(l1);
     8.7 -        l1 &= ~(1 << l1i);
     8.8 +        l1 &= ~(1UL << l1i);
     8.9          
    8.10          while ( (l2 = active_evtchns(cpu, s, l1i)) != 0 )
    8.11          {
    8.12              l2i = __ffs(l2);
    8.13 -            l2 &= ~(1 << l2i);
    8.14 +            l2 &= ~(1UL << l2i);
    8.15  
    8.16              port = (l1i * (sizeof(unsigned long) * 8)) + l2i;
    8.17  			do_event(port, regs);
     9.1 --- a/extras/mini-os/include/lib.h	Mon Jun 02 11:35:02 2008 +0900
     9.2 +++ b/extras/mini-os/include/lib.h	Mon Jun 02 11:35:39 2008 +0900
     9.3 @@ -136,6 +136,7 @@ enum fd_type {
     9.4      FTYPE_CONSOLE,
     9.5      FTYPE_FILE,
     9.6      FTYPE_XENBUS,
     9.7 +    FTYPE_XC,
     9.8      FTYPE_EVTCHN,
     9.9      FTYPE_SOCKET,
    9.10      FTYPE_TAP,
    10.1 --- a/extras/mini-os/include/mm.h	Mon Jun 02 11:35:02 2008 +0900
    10.2 +++ b/extras/mini-os/include/mm.h	Mon Jun 02 11:35:39 2008 +0900
    10.3 @@ -67,6 +67,9 @@ void arch_init_p2m(unsigned long max_pfn
    10.4  void *map_frames_ex(unsigned long *f, unsigned long n, unsigned long stride,
    10.5  	unsigned long increment, unsigned long alignment, domid_t id,
    10.6  	int may_fail, unsigned long prot);
    10.7 +void do_map_frames(unsigned long addr,
    10.8 +        unsigned long *f, unsigned long n, unsigned long stride,
    10.9 +	unsigned long increment, domid_t id, int may_fail, unsigned long prot);
   10.10  #ifdef HAVE_LIBC
   10.11  extern unsigned long heap, brk, heap_mapped, heap_end;
   10.12  #endif
    11.1 --- a/extras/mini-os/include/posix/pthread.h	Mon Jun 02 11:35:02 2008 +0900
    11.2 +++ b/extras/mini-os/include/posix/pthread.h	Mon Jun 02 11:35:39 2008 +0900
    11.3 @@ -1,18 +1,56 @@
    11.4  #ifndef _POSIX_PTHREAD_H
    11.5  #define _POSIX_PTHREAD_H
    11.6  
    11.7 +#include <stdlib.h>
    11.8 +
    11.9  /* Let's be single-threaded for now.  */
   11.10  
   11.11 -typedef void *pthread_key_t;
   11.12 -typedef struct {} pthread_mutex_t, pthread_once_t;
   11.13 +typedef struct {
   11.14 +    void *ptr;
   11.15 +} *pthread_key_t;
   11.16 +static inline int pthread_key_create(pthread_key_t *key, void (*destr_function)(void*))
   11.17 +{
   11.18 +    *key = malloc(sizeof(**key));
   11.19 +    (*key)->ptr = NULL;
   11.20 +    return 0;
   11.21 +}
   11.22 +static inline int pthread_setspecific(pthread_key_t key, const void *pointer)
   11.23 +{
   11.24 +    key->ptr = (void*) pointer;
   11.25 +    return 0;
   11.26 +}
   11.27 +static inline void *pthread_getspecific(pthread_key_t key)
   11.28 +{
   11.29 +    return key->ptr;
   11.30 +}
   11.31 +static inline int pthread_key_delete(pthread_key_t key)
   11.32 +{
   11.33 +    free(key);
   11.34 +    return 0;
   11.35 +}
   11.36 +
   11.37 +
   11.38 +
   11.39 +typedef struct {} pthread_mutex_t;
   11.40  #define PTHREAD_MUTEX_INITIALIZER {}
   11.41 -#define PTHREAD_ONCE_INIT {}
   11.42  static inline int pthread_mutex_lock(pthread_mutex_t *mutex) { return 0; }
   11.43  static inline int pthread_mutex_unlock(pthread_mutex_t *mutex) { return 0; }
   11.44 -static inline int pthread_key_create(pthread_key_t *key, void (*destr_function)(void*)) { *key = NULL; return 0; }
   11.45 -static inline int pthread_setspecific(pthread_key_t *key, const void *pointer) { *key = (void*) pointer; return 0; }
   11.46 -static inline void *pthread_getspecific(pthread_key_t *key) { return *key; }
   11.47 -static inline int pthread_once(pthread_once_t *once_control, void (*init_routine)(void)) { init_routine(); return 0; }
   11.48 +
   11.49 +
   11.50 +
   11.51 +typedef struct {
   11.52 +    int done;
   11.53 +} pthread_once_t;
   11.54 +#define PTHREAD_ONCE_INIT { 0 }
   11.55 +
   11.56 +static inline int pthread_once(pthread_once_t *once_control, void (*init_routine)(void))
   11.57 +{
   11.58 +    if (!once_control->done) {
   11.59 +        once_control->done = 1;
   11.60 +        init_routine();
   11.61 +    }
   11.62 +    return 0;
   11.63 +}
   11.64  
   11.65  #define __thread
   11.66  
    12.1 --- a/extras/mini-os/include/x86/arch_mm.h	Mon Jun 02 11:35:02 2008 +0900
    12.2 +++ b/extras/mini-os/include/x86/arch_mm.h	Mon Jun 02 11:35:39 2008 +0900
    12.3 @@ -219,11 +219,6 @@ static __inline__ paddr_t machine_to_phy
    12.4  
    12.5  #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, L1_PROT)
    12.6  #define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 0, L1_PROT_RO)
    12.7 -#ifndef __ASSEMBLY__
    12.8 -void do_map_frames(unsigned long addr,
    12.9 -        unsigned long *f, unsigned long n, unsigned long stride,
   12.10 -	unsigned long increment, domid_t id, int may_fail, unsigned long prot);
   12.11 -#endif
   12.12  #define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, DOMID_SELF, 0, L1_PROT_RO)
   12.13  
   12.14  #endif /* _ARCH_MM_H_ */
    13.1 --- a/extras/mini-os/lib/sys.c	Mon Jun 02 11:35:02 2008 +0900
    13.2 +++ b/extras/mini-os/lib/sys.c	Mon Jun 02 11:35:39 2008 +0900
    13.3 @@ -81,6 +81,7 @@
    13.4  
    13.5  #define NOFILE 32
    13.6  extern int xc_evtchn_close(int fd);
    13.7 +extern int xc_interface_close(int fd);
    13.8  
    13.9  pthread_mutex_t fd_lock = PTHREAD_MUTEX_INITIALIZER;
   13.10  struct file files[NOFILE] = {
   13.11 @@ -259,10 +260,7 @@ int read(int fd, void *buf, size_t nbyte
   13.12  	    }
   13.13  	    return ret * sizeof(union xenfb_in_event);
   13.14          }
   13.15 -	case FTYPE_NONE:
   13.16 -	case FTYPE_XENBUS:
   13.17 -	case FTYPE_EVTCHN:
   13.18 -	case FTYPE_BLK:
   13.19 +	default:
   13.20  	    break;
   13.21      }
   13.22      printk("read(%d): Bad descriptor\n", fd);
   13.23 @@ -295,12 +293,7 @@ int write(int fd, const void *buf, size_
   13.24  	case FTYPE_TAP:
   13.25  	    netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes);
   13.26  	    return nbytes;
   13.27 -	case FTYPE_NONE:
   13.28 -	case FTYPE_XENBUS:
   13.29 -	case FTYPE_EVTCHN:
   13.30 -	case FTYPE_BLK:
   13.31 -	case FTYPE_KBD:
   13.32 -	case FTYPE_FB:
   13.33 +	default:
   13.34  	    break;
   13.35      }
   13.36      printk("write(%d): Bad descriptor\n", fd);
   13.37 @@ -351,15 +344,7 @@ int fsync(int fd) {
   13.38  	    }
   13.39  	    return 0;
   13.40  	}
   13.41 -	case FTYPE_NONE:
   13.42 -	case FTYPE_CONSOLE:
   13.43 -	case FTYPE_SOCKET:
   13.44 -	case FTYPE_XENBUS:
   13.45 -	case FTYPE_EVTCHN:
   13.46 -	case FTYPE_TAP:
   13.47 -	case FTYPE_BLK:
   13.48 -	case FTYPE_KBD:
   13.49 -	case FTYPE_FB:
   13.50 +	default:
   13.51  	    break;
   13.52      }
   13.53      printk("fsync(%d): Bad descriptor\n", fd);
   13.54 @@ -391,6 +376,9 @@ int close(int fd)
   13.55  	    files[fd].type = FTYPE_NONE;
   13.56  	    return res;
   13.57  	}
   13.58 +	case FTYPE_XC:
   13.59 +	    xc_interface_close(fd);
   13.60 +	    return 0;
   13.61  	case FTYPE_EVTCHN:
   13.62              xc_evtchn_close(fd);
   13.63              return 0;
   13.64 @@ -495,13 +483,7 @@ int fstat(int fd, struct stat *buf)
   13.65  	    stat_from_fs(buf, &stat);
   13.66  	    return 0;
   13.67  	}
   13.68 -	case FTYPE_NONE:
   13.69 -	case FTYPE_XENBUS:
   13.70 -	case FTYPE_EVTCHN:
   13.71 -	case FTYPE_TAP:
   13.72 -	case FTYPE_BLK:
   13.73 -	case FTYPE_KBD:
   13.74 -	case FTYPE_FB:
   13.75 +	default:
   13.76  	    break;
   13.77      }
   13.78  
   13.79 @@ -522,15 +504,7 @@ int ftruncate(int fd, off_t length)
   13.80  	    }
   13.81  	    return 0;
   13.82  	}
   13.83 -	case FTYPE_NONE:
   13.84 -	case FTYPE_CONSOLE:
   13.85 -	case FTYPE_SOCKET:
   13.86 -	case FTYPE_XENBUS:
   13.87 -	case FTYPE_EVTCHN:
   13.88 -	case FTYPE_TAP:
   13.89 -	case FTYPE_BLK:
   13.90 -	case FTYPE_KBD:
   13.91 -	case FTYPE_FB:
   13.92 +	default:
   13.93  	    break;
   13.94      }
   13.95  
   13.96 @@ -636,9 +610,10 @@ static const char file_types[] = {
   13.97      [FTYPE_NONE]	= 'N',
   13.98      [FTYPE_CONSOLE]	= 'C',
   13.99      [FTYPE_FILE]	= 'F',
  13.100 -    [FTYPE_XENBUS]	= 'X',
  13.101 +    [FTYPE_XENBUS]	= 'S',
  13.102 +    [FTYPE_XC]		= 'X',
  13.103      [FTYPE_EVTCHN]	= 'E',
  13.104 -    [FTYPE_SOCKET]	= 'S',
  13.105 +    [FTYPE_SOCKET]	= 's',
  13.106      [FTYPE_TAP]		= 'T',
  13.107      [FTYPE_BLK]		= 'B',
  13.108      [FTYPE_KBD]		= 'K',
  13.109 @@ -722,7 +697,7 @@ static int select_poll(int nfds, fd_set 
  13.110      /* Then see others as well. */
  13.111      for (i = 0; i < nfds; i++) {
  13.112  	switch(files[i].type) {
  13.113 -	case FTYPE_NONE:
  13.114 +	default:
  13.115  	    if (FD_ISSET(i, readfds) || FD_ISSET(i, writefds) || FD_ISSET(i, exceptfds))
  13.116  		printk("bogus fd %d in select\n", i);
  13.117  	    /* Fallthrough.  */
  13.118 @@ -1083,14 +1058,20 @@ int clock_gettime(clockid_t clk_id, stru
  13.119  
  13.120  void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset)
  13.121  {
  13.122 +    unsigned long n = (length + PAGE_SIZE - 1) / PAGE_SIZE;
  13.123 +
  13.124      ASSERT(!start);
  13.125 -    length = (length + PAGE_SIZE - 1) & PAGE_MASK;
  13.126      ASSERT(prot == (PROT_READ|PROT_WRITE));
  13.127 -    ASSERT(flags == (MAP_SHARED|MAP_ANON) || flags == (MAP_PRIVATE|MAP_ANON));
  13.128 -    ASSERT(fd == -1);
  13.129 +    ASSERT((fd == -1 && (flags == (MAP_SHARED|MAP_ANON) || flags == (MAP_PRIVATE|MAP_ANON)))
  13.130 +        || (fd != -1 && flags == MAP_SHARED));
  13.131      ASSERT(offset == 0);
  13.132  
  13.133 -    return map_zero(length / PAGE_SIZE, 1);
  13.134 +    if (fd == -1)
  13.135 +        return map_zero(n, 1);
  13.136 +    else if (files[fd].type == FTYPE_XC) {
  13.137 +        unsigned long zero = 0;
  13.138 +        return map_frames_ex(&zero, n, 0, 0, 1, DOMID_SELF, 0, 0);
  13.139 +    } else ASSERT(0);
  13.140  }
  13.141  #if defined(__x86_64__) || defined(__ia64__)
  13.142  __typeof__(mmap) mmap64 __attribute__((__alias__("mmap")));
  13.143 @@ -1110,7 +1091,7 @@ int munmap(void *start, size_t length)
  13.144  	call[i].args[0] = (unsigned long) &data[i];
  13.145  	call[i].args[1] = 0;
  13.146  	call[i].args[2] = 0;
  13.147 -	call[i].args[3] = UVMF_INVLPG | UVMF_ALL;
  13.148 +	call[i].args[3] = UVMF_INVLPG;
  13.149      }
  13.150  
  13.151      ret = HYPERVISOR_multicall(call, n);
    14.1 --- a/extras/mini-os/lib/xmalloc.c	Mon Jun 02 11:35:02 2008 +0900
    14.2 +++ b/extras/mini-os/lib/xmalloc.c	Mon Jun 02 11:35:39 2008 +0900
    14.3 @@ -127,7 +127,7 @@ static void *xmalloc_whole_pages(size_t 
    14.4      if ( hdr == NULL )
    14.5          return NULL;
    14.6  
    14.7 -    hdr->size = (1 << (pageorder + PAGE_SHIFT));
    14.8 +    hdr->size = (1UL << (pageorder + PAGE_SHIFT));
    14.9      /* Debugging aid. */
   14.10      hdr->freelist.next = hdr->freelist.prev = NULL;
   14.11  
    15.1 --- a/extras/mini-os/main-caml.c	Mon Jun 02 11:35:02 2008 +0900
    15.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.3 @@ -1,42 +0,0 @@
    15.4 -/*
    15.5 - * Caml bootstrap
    15.6 - *
    15.7 - * Samuel Thibault <Samuel.Thibault@eu.citrix.net>, January 2008
    15.8 - */
    15.9 -
   15.10 -#include <stdio.h>
   15.11 -#include <errno.h>
   15.12 -
   15.13 -#include <caml/mlvalues.h>
   15.14 -#include <caml/callback.h>
   15.15 -#include <unistd.h>
   15.16 -
   15.17 -/* Ugly binary compatibility with Linux */
   15.18 -FILE *_stderr asm("stderr");
   15.19 -int *__errno_location;
   15.20 -/* Will probably break everything, probably need to fetch from glibc */
   15.21 -void *__ctype_b_loc;
   15.22 -
   15.23 -int main(int argc, char *argv[], char *envp[])
   15.24 -{
   15.25 -    value *val;
   15.26 -
   15.27 -    /* Get current thread's value */
   15.28 -    _stderr = stderr;
   15.29 -    __errno_location = &errno;
   15.30 -
   15.31 -    printf("starting caml\n");
   15.32 -
   15.33 -    /* Wait before things might hang up */
   15.34 -    sleep(1);
   15.35 -
   15.36 -    caml_startup(argv);
   15.37 -    val = caml_named_value("main");
   15.38 -    if (!val) {
   15.39 -        printf("Couldn't find Caml main");
   15.40 -        return 1;
   15.41 -    }
   15.42 -    caml_callback(*val, Val_int(0));
   15.43 -    printf("callback returned\n");
   15.44 -    return 0;
   15.45 -}
    16.1 --- a/extras/mini-os/main.c	Mon Jun 02 11:35:02 2008 +0900
    16.2 +++ b/extras/mini-os/main.c	Mon Jun 02 11:35:39 2008 +0900
    16.3 @@ -4,7 +4,6 @@
    16.4   * Samuel Thibault <Samuel.Thibault@eu.citrix.net>, October 2007
    16.5   */
    16.6  
    16.7 -#ifdef HAVE_LIBC
    16.8  #include <os.h>
    16.9  #include <sched.h>
   16.10  #include <console.h>
   16.11 @@ -19,8 +18,8 @@
   16.12  extern int main(int argc, char *argv[], char *envp[]);
   16.13  extern void __libc_init_array(void);
   16.14  extern void __libc_fini_array(void);
   16.15 -
   16.16 -struct thread *main_thread;
   16.17 +extern unsigned long __CTOR_LIST__[];
   16.18 +extern unsigned long __DTOR_LIST__[];
   16.19  
   16.20  #if 0
   16.21  #include <stdio.h>
   16.22 @@ -147,6 +146,8 @@ static void call_main(void *p)
   16.23  
   16.24      __libc_init_array();
   16.25      environ = envp;
   16.26 +    for (i = 1; i <= __CTOR_LIST__[0]; i++)
   16.27 +        ((void((*)(void)))__CTOR_LIST__[i]) ();
   16.28      tzset();
   16.29  
   16.30      exit(main(argc, argv, envp));
   16.31 @@ -154,6 +155,10 @@ static void call_main(void *p)
   16.32  
   16.33  void _exit(int ret)
   16.34  {
   16.35 +    int i;
   16.36 +
   16.37 +    for (i = 1; i <= __DTOR_LIST__[0]; i++)
   16.38 +        ((void((*)(void)))__DTOR_LIST__[i]) ();
   16.39      close_all_files();
   16.40      __libc_fini_array();
   16.41      printk("main returned %d\n", ret);
   16.42 @@ -172,4 +177,3 @@ int app_main(start_info_t *si)
   16.43      main_thread = create_thread("main", call_main, si);
   16.44      return 0;
   16.45  }
   16.46 -#endif
    17.1 --- a/extras/mini-os/mm.c	Mon Jun 02 11:35:02 2008 +0900
    17.2 +++ b/extras/mini-os/mm.c	Mon Jun 02 11:35:39 2008 +0900
    17.3 @@ -58,7 +58,7 @@ static unsigned long *alloc_bitmap;
    17.4  #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
    17.5  
    17.6  #define allocated_in_map(_pn) \
    17.7 -(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
    17.8 +(alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1UL<<((_pn)&(PAGES_PER_MAPWORD-1))))
    17.9  
   17.10  /*
   17.11   * Hint regarding bitwise arithmetic in map_{alloc,free}:
   17.12 @@ -80,13 +80,13 @@ static void map_alloc(unsigned long firs
   17.13  
   17.14      if ( curr_idx == end_idx )
   17.15      {
   17.16 -        alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
   17.17 +        alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
   17.18      }
   17.19      else 
   17.20      {
   17.21 -        alloc_bitmap[curr_idx] |= -(1<<start_off);
   17.22 -        while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
   17.23 -        alloc_bitmap[curr_idx] |= (1<<end_off)-1;
   17.24 +        alloc_bitmap[curr_idx] |= -(1UL<<start_off);
   17.25 +        while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
   17.26 +        alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
   17.27      }
   17.28  }
   17.29  
   17.30 @@ -102,13 +102,13 @@ static void map_free(unsigned long first
   17.31  
   17.32      if ( curr_idx == end_idx )
   17.33      {
   17.34 -        alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
   17.35 +        alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
   17.36      }
   17.37      else 
   17.38      {
   17.39 -        alloc_bitmap[curr_idx] &= (1<<start_off)-1;
   17.40 +        alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
   17.41          while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
   17.42 -        alloc_bitmap[curr_idx] &= -(1<<end_off);
   17.43 +        alloc_bitmap[curr_idx] &= -(1UL<<end_off);
   17.44      }
   17.45  }
   17.46  
   17.47 @@ -178,7 +178,7 @@ USED static void print_chunks(void *star
   17.48          head = free_head[order];
   17.49          while(!FREELIST_EMPTY(head))
   17.50          {
   17.51 -            for(count = 0; count < 1<< head->level; count++)
   17.52 +            for(count = 0; count < 1UL<< head->level; count++)
   17.53              {
   17.54                  if(count + virt_to_pfn(head) - pfn_start < 1000)
   17.55                      chunks[count + virt_to_pfn(head) - pfn_start] = current;
   17.56 @@ -235,13 +235,13 @@ static void init_page_allocator(unsigned
   17.57           * Next chunk is limited by alignment of min, but also
   17.58           * must not be bigger than remaining range.
   17.59           */
   17.60 -        for ( i = PAGE_SHIFT; (1<<(i+1)) <= range; i++ )
   17.61 -            if ( min & (1<<i) ) break;
   17.62 +        for ( i = PAGE_SHIFT; (1UL<<(i+1)) <= range; i++ )
   17.63 +            if ( min & (1UL<<i) ) break;
   17.64  
   17.65  
   17.66          ch = (chunk_head_t *)min;
   17.67 -        min   += (1<<i);
   17.68 -        range -= (1<<i);
   17.69 +        min   += (1UL<<i);
   17.70 +        range -= (1UL<<i);
   17.71          ct = (chunk_tail_t *)min-1;
   17.72          i -= PAGE_SHIFT;
   17.73          ch->level       = i;
   17.74 @@ -280,8 +280,8 @@ unsigned long alloc_pages(int order)
   17.75      {
   17.76          /* Split into two equal parts. */
   17.77          i--;
   17.78 -        spare_ch = (chunk_head_t *)((char *)alloc_ch + (1<<(i+PAGE_SHIFT)));
   17.79 -        spare_ct = (chunk_tail_t *)((char *)spare_ch + (1<<(i+PAGE_SHIFT)))-1;
   17.80 +        spare_ch = (chunk_head_t *)((char *)alloc_ch + (1UL<<(i+PAGE_SHIFT)));
   17.81 +        spare_ct = (chunk_tail_t *)((char *)spare_ch + (1UL<<(i+PAGE_SHIFT)))-1;
   17.82  
   17.83          /* Create new header for spare chunk. */
   17.84          spare_ch->level = i;
   17.85 @@ -294,7 +294,7 @@ unsigned long alloc_pages(int order)
   17.86          free_head[i] = spare_ch;
   17.87      }
   17.88      
   17.89 -    map_alloc(PHYS_PFN(to_phys(alloc_ch)), 1<<order);
   17.90 +    map_alloc(PHYS_PFN(to_phys(alloc_ch)), 1UL<<order);
   17.91  
   17.92      return((unsigned long)alloc_ch);
   17.93  
   17.94 @@ -312,16 +312,16 @@ void free_pages(void *pointer, int order
   17.95      unsigned long mask;
   17.96      
   17.97      /* First free the chunk */
   17.98 -    map_free(virt_to_pfn(pointer), 1 << order);
   17.99 +    map_free(virt_to_pfn(pointer), 1UL << order);
  17.100      
  17.101      /* Create free chunk */
  17.102      freed_ch = (chunk_head_t *)pointer;
  17.103 -    freed_ct = (chunk_tail_t *)((char *)pointer + (1<<(order + PAGE_SHIFT)))-1;
  17.104 +    freed_ct = (chunk_tail_t *)((char *)pointer + (1UL<<(order + PAGE_SHIFT)))-1;
  17.105      
  17.106      /* Now, possibly we can conseal chunks together */
  17.107      while(order < FREELIST_SIZE)
  17.108      {
  17.109 -        mask = 1 << (order + PAGE_SHIFT);
  17.110 +        mask = 1UL << (order + PAGE_SHIFT);
  17.111          if((unsigned long)freed_ch & mask) 
  17.112          {
  17.113              to_merge_ch = (chunk_head_t *)((char *)freed_ch - mask);
    18.1 --- a/extras/mini-os/sched.c	Mon Jun 02 11:35:02 2008 +0900
    18.2 +++ b/extras/mini-os/sched.c	Mon Jun 02 11:35:39 2008 +0900
    18.3 @@ -58,6 +58,8 @@ struct thread *idle_thread = NULL;
    18.4  LIST_HEAD(exited_threads);
    18.5  static int threads_started;
    18.6  
    18.7 +struct thread *main_thread;
    18.8 +
    18.9  void inline print_runqueue(void)
   18.10  {
   18.11      struct list_head *it;
    19.1 --- a/stubdom/Makefile	Mon Jun 02 11:35:02 2008 +0900
    19.2 +++ b/stubdom/Makefile	Mon Jun 02 11:35:39 2008 +0900
    19.3 @@ -37,7 +37,7 @@ export CROSS_COMPILE=$(GNU_TARGET_ARCH)-
    19.4  export PATH:=$(CROSS_PREFIX)/bin:$(PATH)
    19.5  
    19.6  .PHONY: all
    19.7 -all: qemu-stubdom
    19.8 +all: ioemu-stubdom c-stubdom
    19.9  
   19.10  ################
   19.11  # Cross-binutils
   19.12 @@ -174,6 +174,7 @@ mk-symlinks:
   19.13  	  ([ ! -h config-host.h ] || rm -f config-host.h) && \
   19.14  	  ([ ! -h config-host.mak ] || rm -f config-host.mak) )
   19.15  	[ -h mini-os ] || ln -sf ../extras/mini-os .
   19.16 +	[ -h mini-os/include/xen ] || ln -sf ../../../xen/include/public mini-os/include/xen
   19.17  
   19.18  #######
   19.19  # libxc
   19.20 @@ -198,40 +199,41 @@ ioemu: cross-zlib cross-libpci mk-symlin
   19.21  ######
   19.22  
   19.23  .PHONY: caml
   19.24 -caml:
   19.25 -	$(MAKE) -C $@
   19.26 +caml: mk-symlinks
   19.27 +	$(MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-cvs 
   19.28  
   19.29  ###
   19.30  # C
   19.31  ###
   19.32  
   19.33  .PHONY: c
   19.34 -c:
   19.35 -	$(MAKE) -C $@
   19.36 +c: mk-symlinks
   19.37 +	$(MAKE) -C $@ LWIPDIR=$(CURDIR)/lwip-cvs 
   19.38  
   19.39  ########
   19.40  # minios
   19.41  ########
   19.42  
   19.43 -.PHONY: qemu-stubdom
   19.44 -qemu-stubdom: mk-symlinks lwip-cvs libxc ioemu
   19.45 -	$(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs QEMUDIR=$(CURDIR)/ioemu
   19.46 +.PHONY: ioemu-stubdom
   19.47 +ioemu-stubdom: lwip-cvs libxc ioemu
   19.48 +	$(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs APP_OBJS="$(CURDIR)/ioemu/i386-dm-stubdom/qemu.a $(CURDIR)/ioemu/i386-dm-stubdom/libqemu.a"
   19.49  
   19.50 +CAMLLIB = $(shell ocamlc -where)
   19.51  .PHONY: caml-stubdom
   19.52 -caml-stubdom: mk-symlinks lwip-cvs libxc cross-libpci caml
   19.53 -	$(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs CAMLDIR=$(CURDIR)/caml
   19.54 +caml-stubdom: lwip-cvs libxc caml
   19.55 +	$(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs APP_OBJS="$(CURDIR)/caml/main-c.o $(CURDIR)/caml/main-caml.o $(CURDIR)/caml/caml.o $(CAMLLIB)/libasmrun.a"
   19.56  
   19.57  .PHONY: c-stubdom
   19.58 -c-stubdom: mk-symlinks lwip-cvs libxc cross-libpci c
   19.59 -	$(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs CDIR=$(CURDIR)/c
   19.60 +c-stubdom: lwip-cvs libxc c
   19.61 +	$(MAKE) -C mini-os TARGET=$@ LWIPDIR=$(CURDIR)/lwip-cvs APP_OBJS=$(CURDIR)/c/main.a
   19.62  
   19.63  #########
   19.64  # install
   19.65  #########
   19.66  
   19.67 -install: mini-os/mini-os.gz
   19.68 +install: mini-os/ioemu-stubdom.gz
   19.69  	$(INSTALL_PROG) stubdom-dm "$(DESTDIR)/usr/lib/xen/bin"
   19.70 -	$(INSTALL_PROG) mini-os/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/stubdom.gz"
   19.71 +	$(INSTALL_PROG) $< "$(DESTDIR)/usr/lib/xen/boot/stubdom.gz"
   19.72  
   19.73  #######
   19.74  # clean
   19.75 @@ -242,6 +244,7 @@ install: mini-os/mini-os.gz
   19.76  clean:
   19.77  	-$(MAKE) -C mini-os LWIPDIR=$(CURDIR)/lwip-cvs clean
   19.78  	$(MAKE) -C caml clean
   19.79 +	$(MAKE) -C c clean
   19.80  	rm -fr libxc ioemu mini-os include
   19.81  
   19.82  # clean the cross-compilation result
    20.1 --- a/stubdom/c/Makefile	Mon Jun 02 11:35:02 2008 +0900
    20.2 +++ b/stubdom/c/Makefile	Mon Jun 02 11:35:39 2008 +0900
    20.3 @@ -2,7 +2,12 @@ XEN_ROOT = ../..
    20.4  
    20.5  include $(XEN_ROOT)/Config.mk
    20.6  
    20.7 -main.a: main.o
    20.8 +all: main.a
    20.9 +
   20.10 +main-c.c:
   20.11 +	ln -sf $(XEN_ROOT)/extras/mini-os/main.c $@
   20.12 +
   20.13 +main.a: main-c.o main.o 
   20.14  	$(AR) cr $@ $^
   20.15  
   20.16  clean:
    21.1 --- a/stubdom/c/main.c	Mon Jun 02 11:35:02 2008 +0900
    21.2 +++ b/stubdom/c/main.c	Mon Jun 02 11:35:39 2008 +0900
    21.3 @@ -1,4 +1,6 @@
    21.4  #include <stdio.h>
    21.5 +#include <unistd.h>
    21.6 +
    21.7  int main(void) {
    21.8          sleep(2);
    21.9          printf("Hello, world!\n");
    22.1 --- a/stubdom/caml/Makefile	Mon Jun 02 11:35:02 2008 +0900
    22.2 +++ b/stubdom/caml/Makefile	Mon Jun 02 11:35:39 2008 +0900
    22.3 @@ -2,12 +2,20 @@ XEN_ROOT = ../..
    22.4  
    22.5  include $(XEN_ROOT)/Config.mk
    22.6  
    22.7 +CAMLLIB = $(shell ocamlc -where)
    22.8 +DEF_CPPFLAGS += -I$(CAMLLIB)
    22.9 +
   22.10  OCAMLFIND=ocamlfind
   22.11  OCAMLOPT=ocamlopt
   22.12  
   22.13  OBJS := hello.cmx
   22.14  LIBS := 
   22.15  
   22.16 +all: main-c.o main-caml.o caml.o
   22.17 +
   22.18 +main-c.c:
   22.19 +	ln -sf $(XEN_ROOT)/extras/mini-os/main.c $@
   22.20 +
   22.21  %.cmx: %.ml
   22.22  	$(OCAMLFIND) $(OCAMLOPT) -c $< -o $@
   22.23  
   22.24 @@ -15,4 +23,4 @@ caml.o: $(OBJS)
   22.25  	$(OCAMLFIND) $(OCAMLOPT) $(LIBS) $^ -output-obj -o $@
   22.26  
   22.27  clean:
   22.28 -	rm -f *.o *.cmx *.cmi
   22.29 +	rm -f *.a *.o *.cmx *.cmi
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/stubdom/caml/main-caml.c	Mon Jun 02 11:35:39 2008 +0900
    23.3 @@ -0,0 +1,42 @@
    23.4 +/*
    23.5 + * Caml bootstrap
    23.6 + *
    23.7 + * Samuel Thibault <Samuel.Thibault@eu.citrix.net>, January 2008
    23.8 + */
    23.9 +
   23.10 +#include <stdio.h>
   23.11 +#include <errno.h>
   23.12 +
   23.13 +#include <caml/mlvalues.h>
   23.14 +#include <caml/callback.h>
   23.15 +#include <unistd.h>
   23.16 +
   23.17 +/* Ugly binary compatibility with Linux */
   23.18 +FILE *_stderr asm("stderr");
   23.19 +int *__errno_location;
   23.20 +/* Will probably break everything, probably need to fetch from glibc */
   23.21 +void *__ctype_b_loc;
   23.22 +
   23.23 +int main(int argc, char *argv[], char *envp[])
   23.24 +{
   23.25 +    value *val;
   23.26 +
   23.27 +    /* Get current thread's value */
   23.28 +    _stderr = stderr;
   23.29 +    __errno_location = &errno;
   23.30 +
   23.31 +    printf("starting caml\n");
   23.32 +
   23.33 +    /* Wait before things might hang up */
   23.34 +    sleep(1);
   23.35 +
   23.36 +    caml_startup(argv);
   23.37 +    val = caml_named_value("main");
   23.38 +    if (!val) {
   23.39 +        printf("Couldn't find Caml main");
   23.40 +        return 1;
   23.41 +    }
   23.42 +    caml_callback(*val, Val_int(0));
   23.43 +    printf("callback returned\n");
   23.44 +    return 0;
   23.45 +}
    24.1 --- a/tools/examples/Makefile	Mon Jun 02 11:35:02 2008 +0900
    24.2 +++ b/tools/examples/Makefile	Mon Jun 02 11:35:39 2008 +0900
    24.3 @@ -9,9 +9,7 @@ XENDOMAINS_SYSCONFIG = init.d/sysconfig.
    24.4  # Xen configuration dir and configs to go there.
    24.5  XEN_CONFIG_DIR = /etc/xen
    24.6  XEN_CONFIGS = xend-config.sxp
    24.7 -XEN_CONFIGS += xend-config-xenapi.sxp
    24.8  XEN_CONFIGS += xm-config.xml
    24.9 -XEN_CONFIGS += xm-config-xenapi.xml
   24.10  XEN_CONFIGS += xmexample1 
   24.11  XEN_CONFIGS += xmexample2
   24.12  XEN_CONFIGS += xmexample.hvm
    25.1 --- a/tools/examples/xend-config-xenapi.sxp	Mon Jun 02 11:35:02 2008 +0900
    25.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.3 @@ -1,196 +0,0 @@
    25.4 -# -*- sh -*-
    25.5 -
    25.6 -#
    25.7 -# Xend configuration file.
    25.8 -#
    25.9 -
   25.10 -# This example configuration is appropriate for an installation that 
   25.11 -# utilizes a bridged network configuration. Access to xend via http
   25.12 -# is disabled.  
   25.13 -
   25.14 -# Commented out entries show the default for that entry, unless otherwise
   25.15 -# specified.
   25.16 -
   25.17 -#(logfile /var/log/xen/xend.log)
   25.18 -#(loglevel DEBUG)
   25.19 -
   25.20 -
   25.21 -# The Xen-API server configuration.  (Please note that this server is
   25.22 -# available as an UNSUPPORTED PREVIEW in Xen 3.0.4, and should not be relied
   25.23 -# upon).
   25.24 -#
   25.25 -# This value configures the ports, interfaces, and access controls for the
   25.26 -# Xen-API server.  Each entry in the list starts with either unix, a port
   25.27 -# number, or an address:port pair.  If this is "unix", then a UDP socket is
   25.28 -# opened, and this entry applies to that.  If it is a port, then Xend will
   25.29 -# listen on all interfaces on that TCP port, and if it is an address:port
   25.30 -# pair, then Xend will listen on the specified port, using the interface with
   25.31 -# the specified address.
   25.32 -#
   25.33 -# The subsequent string configures the user-based access control for the
   25.34 -# listener in question.  This can be one of "none" or "pam", indicating either
   25.35 -# that users should be allowed access unconditionally, or that the local
   25.36 -# Pluggable Authentication Modules configuration should be used.  If this
   25.37 -# string is missing or empty, then "pam" is used.
   25.38 -#
   25.39 -# The final string gives the host-based access control for that listener. If
   25.40 -# this is missing or empty, then all connections are accepted.  Otherwise,
   25.41 -# this should be a space-separated sequence of regular expressions; any host
   25.42 -# with a fully-qualified domain name or an IP address that matches one of
   25.43 -# these regular expressions will be accepted.
   25.44 -#
   25.45 -# Example: listen on TCP port 9363 on all interfaces, accepting connections
   25.46 -# only from machines in example.com or localhost, and allow access through
   25.47 -# the unix domain socket unconditionally:
   25.48 -#
   25.49 -   (xen-api-server ((9363 none)))
   25.50 -#                    (unix none)))
   25.51 -#
   25.52 -# Optionally, the TCP Xen-API server can use SSL by specifying the private
   25.53 -# key and certificate location:
   25.54 -#
   25.55 -#                    (9367 pam '' /etc/xen/xen-api.key /etc/xen/xen-api.crt)
   25.56 -#
   25.57 -# Default:
   25.58 -#   (xen-api-server ((unix)))
   25.59 -
   25.60 -
   25.61 -#(xend-http-server no)
   25.62 -#(xend-unix-server no)
   25.63 -#(xend-tcp-xmlrpc-server no)
   25.64 -#(xend-unix-xmlrpc-server yes)
   25.65 -#(xend-relocation-server no)
   25.66 -(xend-relocation-server yes)
   25.67 -
   25.68 -#(xend-unix-path /var/lib/xend/xend-socket)
   25.69 -
   25.70 -
   25.71 -# Address and port xend should use for the legacy TCP XMLRPC interface, 
   25.72 -# if xend-tcp-xmlrpc-server is set.
   25.73 -#(xend-tcp-xmlrpc-server-address 'localhost')
   25.74 -#(xend-tcp-xmlrpc-server-port 8006)
   25.75 -
   25.76 -# SSL key and certificate to use for the legacy TCP XMLRPC interface.
   25.77 -# Setting these will mean that this port serves only SSL connections as
   25.78 -# opposed to plaintext ones.
   25.79 -#(xend-tcp-xmlrpc-server-ssl-key-file  /etc/xen/xmlrpc.key)
   25.80 -#(xend-tcp-xmlrpc-server-ssl-cert-file /etc/xen/xmlrpc.crt)
   25.81 -
   25.82 -
   25.83 -# Port xend should use for the HTTP interface, if xend-http-server is set.
   25.84 -#(xend-port            8000)
   25.85 -
   25.86 -# Port xend should use for the relocation interface, if xend-relocation-server
   25.87 -# is set.
   25.88 -#(xend-relocation-port 8002)
   25.89 -
   25.90 -# Address xend should listen on for HTTP connections, if xend-http-server is
   25.91 -# set.
   25.92 -# Specifying 'localhost' prevents remote connections.
   25.93 -# Specifying the empty string '' (the default) allows all connections.
   25.94 -#(xend-address '')
   25.95 -#(xend-address localhost)
   25.96 -
   25.97 -# Address xend should listen on for relocation-socket connections, if
   25.98 -# xend-relocation-server is set.
   25.99 -# Meaning and default as for xend-address above.
  25.100 -#(xend-relocation-address '')
  25.101 -
  25.102 -# The hosts allowed to talk to the relocation port.  If this is empty (the
  25.103 -# default), then all connections are allowed (assuming that the connection
  25.104 -# arrives on a port and interface on which we are listening; see
  25.105 -# xend-relocation-port and xend-relocation-address above).  Otherwise, this
  25.106 -# should be a space-separated sequence of regular expressions.  Any host with
  25.107 -# a fully-qualified domain name or an IP address that matches one of these
  25.108 -# regular expressions will be accepted.
  25.109 -#
  25.110 -# For example:
  25.111 -#  (xend-relocation-hosts-allow '^localhost$ ^.*\\.example\\.org$')
  25.112 -#
  25.113 -#(xend-relocation-hosts-allow '')
  25.114 -(xend-relocation-hosts-allow '^localhost$ ^localhost\\.localdomain$')
  25.115 -
  25.116 -# The limit (in kilobytes) on the size of the console buffer
  25.117 -#(console-limit 1024)
  25.118 -
  25.119 -##
  25.120 -# To bridge network traffic, like this:
  25.121 -#
  25.122 -# dom0: ----------------- bridge -> real eth0 -> the network
  25.123 -#                            |
  25.124 -# domU: fake eth0 -> vifN.0 -+
  25.125 -#
  25.126 -# use
  25.127 -#
  25.128 -# (network-script network-bridge)
  25.129 -#
  25.130 -# Your default ethernet device is used as the outgoing interface, by default. 
  25.131 -# To use a different one (e.g. eth1) use
  25.132 -#
  25.133 -# (network-script 'network-bridge netdev=eth1')
  25.134 -#
  25.135 -# The bridge is named xenbr0, by default.  To rename the bridge, use
  25.136 -#
  25.137 -# (network-script 'network-bridge bridge=<name>')
  25.138 -#
  25.139 -# It is possible to use the network-bridge script in more complicated
  25.140 -# scenarios, such as having two outgoing interfaces, with two bridges, and
  25.141 -# two fake interfaces per guest domain.  To do things like this, write
  25.142 -# yourself a wrapper script, and call network-bridge from it, as appropriate.
  25.143 -#
  25.144 -(network-script network-bridge)
  25.145 -
  25.146 -# The script used to control virtual interfaces.  This can be overridden on a
  25.147 -# per-vif basis when creating a domain or a configuring a new vif.  The
  25.148 -# vif-bridge script is designed for use with the network-bridge script, or
  25.149 -# similar configurations.
  25.150 -#
  25.151 -# If you have overridden the bridge name using
  25.152 -# (network-script 'network-bridge bridge=<name>') then you may wish to do the
  25.153 -# same here.  The bridge name can also be set when creating a domain or
  25.154 -# configuring a new vif, but a value specified here would act as a default.
  25.155 -#
  25.156 -# If you are using only one bridge, the vif-bridge script will discover that,
  25.157 -# so there is no need to specify it explicitly.
  25.158 -#
  25.159 -(vif-script vif-bridge)
  25.160 -
  25.161 -
  25.162 -## Use the following if network traffic is routed, as an alternative to the
  25.163 -# settings for bridged networking given above.
  25.164 -#(network-script network-route)
  25.165 -#(vif-script     vif-route)
  25.166 -
  25.167 -
  25.168 -## Use the following if network traffic is routed with NAT, as an alternative
  25.169 -# to the settings for bridged networking given above.
  25.170 -#(network-script network-nat)
  25.171 -#(vif-script     vif-nat)
  25.172 -
  25.173 -# dom0-min-mem is the lowest permissible memory level (in MB) for dom0.
  25.174 -# This is a minimum both for auto-ballooning (as enabled by
  25.175 -# enable-dom0-ballooning below) and for xm mem-set when applied to dom0.
  25.176 -(dom0-min-mem 196)
  25.177 -
  25.178 -# Whether to enable auto-ballooning of dom0 to allow domUs to be created.
  25.179 -# If enable-dom0-ballooning = no, dom0 will never balloon out.
  25.180 -(enable-dom0-ballooning yes)
  25.181 -
  25.182 -# In SMP system, dom0 will use dom0-cpus # of CPUS
  25.183 -# If dom0-cpus = 0, dom0 will take all cpus available
  25.184 -(dom0-cpus 0)
  25.185 -
  25.186 -# Whether to enable core-dumps when domains crash.
  25.187 -#(enable-dump no)
  25.188 -
  25.189 -# The tool used for initiating virtual TPM migration
  25.190 -#(external-migration-tool '')
  25.191 -
  25.192 -# The interface for VNC servers to listen on. Defaults
  25.193 -# to 127.0.0.1  To restore old 'listen everywhere' behaviour
  25.194 -# set this to 0.0.0.0
  25.195 -#(vnc-listen '127.0.0.1')
  25.196 -
  25.197 -# The default password for VNC console on HVM domain.
  25.198 -# Empty string is no authentication.
  25.199 -(vncpasswd '')
    26.1 --- a/tools/examples/xend-config.sxp	Mon Jun 02 11:35:02 2008 +0900
    26.2 +++ b/tools/examples/xend-config.sxp	Mon Jun 02 11:35:39 2008 +0900
    26.3 @@ -59,6 +59,7 @@
    26.4  #(xend-unix-xmlrpc-server yes)
    26.5  #(xend-relocation-server no)
    26.6  (xend-relocation-server yes)
    26.7 +#(xend-relocation-ssl-server no)
    26.8  
    26.9  #(xend-unix-path /var/lib/xend/xend-socket)
   26.10  
   26.11 @@ -82,15 +83,18 @@
   26.12  # is set.
   26.13  #(xend-relocation-port 8002)
   26.14  
   26.15 -# Whether to use tls when relocating.
   26.16 -#(xend-relocation-tls no)
   26.17 +# Port xend should use for the ssl relocation interface, if
   26.18 +# xend-relocation-ssl-server is set.
   26.19 +#(xend-relocation-ssl-port 8003)
   26.20  
   26.21 -# SSL key and certificate to use for the relocation interface.
   26.22 -# Setting these will mean that this port serves only SSL connections as
   26.23 -# opposed to plaintext ones.
   26.24 +# SSL key and certificate to use for the ssl relocation interface, if
   26.25 +# xend-relocation-ssl-server is set.
   26.26  #(xend-relocation-server-ssl-key-file  /etc/xen/xmlrpc.key)
   26.27  #(xend-relocation-server-ssl-cert-file  /etc/xen/xmlrpc.crt)
   26.28  
   26.29 +# Whether to use ssl as default when relocating.
   26.30 +#(xend-relocation-ssl no)
   26.31 +
   26.32  # Address xend should listen on for HTTP connections, if xend-http-server is
   26.33  # set.
   26.34  # Specifying 'localhost' prevents remote connections.
    27.1 --- a/tools/examples/xm-config-xenapi.xml	Mon Jun 02 11:35:02 2008 +0900
    27.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.3 @@ -1,43 +0,0 @@
    27.4 -<!--
    27.5 -
    27.6 -Copyright (C) 2006 XenSource Inc.
    27.7 -
    27.8 -This library is free software; you can redistribute it and/or
    27.9 -modify it under the terms of version 2.1 of the GNU Lesser General Public
   27.10 -License as published by the Free Software Foundation.
   27.11 -
   27.12 -This library is distributed in the hope that it will be useful,
   27.13 -but WITHOUT ANY WARRANTY; without even the implied warranty of
   27.14 -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   27.15 -Lesser General Public License for more details.
   27.16 -
   27.17 -You should have received a copy of the GNU Lesser General Public
   27.18 -License along with this library; if not, write to the Free Software
   27.19 -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   27.20 -
   27.21 --->
   27.22 -
   27.23 -<!--
   27.24 -
   27.25 -This is a configuration file for xm; it should be placed in
   27.26 -/etc/xen/xm-config.xml.  If this file is missing, then xm will fall back to
   27.27 -the normal behaviour that's in Xen 3.0.4 and below.  The settings here are
   27.28 -most useful for experimenting with the Xen-API preview in Xen 3.0.4.
   27.29 -
   27.30 --->
   27.31 -
   27.32 -<xm>
   27.33 -  <!-- The server element describes how to talk to Xend.  The type may be 
   27.34 -       Xen-API or LegacyXMLRPC (the default).  The URI is that of the
   27.35 -       server; you might try http://server:9363/ or
   27.36 -       httpu:///var/run/xend/xen-api.sock for the Xen-API, or
   27.37 -       httpu:///var/run/xend/xmlrpc.sock for the legacy server.
   27.38 -
   27.39 -       The username and password attributes will be used to log in if Xen-API
   27.40 -       is being used.
   27.41 -    -->
   27.42 -  <server type='Xen-API'
   27.43 -          uri='http://localhost:9363/'
   27.44 -          username='me'
   27.45 -          password='mypassword' />
   27.46 -</xm>
    28.1 --- a/tools/firmware/hvmloader/util.c	Mon Jun 02 11:35:02 2008 +0900
    28.2 +++ b/tools/firmware/hvmloader/util.c	Mon Jun 02 11:35:39 2008 +0900
    28.3 @@ -609,7 +609,7 @@ int get_apic_mode(void)
    28.4  uint16_t get_cpu_mhz(void)
    28.5  {
    28.6      struct xen_add_to_physmap xatp;
    28.7 -    struct shared_info *shared_info = (struct shared_info *)0xa0000;
    28.8 +    struct shared_info *shared_info = (struct shared_info *)0xfffff000;
    28.9      struct vcpu_time_info *info = &shared_info->vcpu_info[0].time;
   28.10      uint64_t cpu_khz;
   28.11      uint32_t tsc_to_nsec_mul, version;
   28.12 @@ -619,7 +619,7 @@ uint16_t get_cpu_mhz(void)
   28.13      if ( cpu_mhz != 0 )
   28.14          return cpu_mhz;
   28.15  
   28.16 -    /* Map shared-info page to 0xa0000 (i.e., overlap VGA hole). */
   28.17 +    /* Map shared-info page. */
   28.18      xatp.domid = DOMID_SELF;
   28.19      xatp.space = XENMAPSPACE_shared_info;
   28.20      xatp.idx   = 0;
   28.21 @@ -644,14 +644,6 @@ uint16_t get_cpu_mhz(void)
   28.22      else
   28.23          cpu_khz = cpu_khz >> tsc_shift;
   28.24  
   28.25 -    /* Get the VGA MMIO hole back by remapping shared info to scratch. */
   28.26 -    xatp.domid = DOMID_SELF;
   28.27 -    xatp.space = XENMAPSPACE_shared_info;
   28.28 -    xatp.idx   = 0;
   28.29 -    xatp.gpfn  = 0xfffff; /* scratch pfn */
   28.30 -    if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
   28.31 -        BUG();
   28.32 -
   28.33      cpu_mhz = (uint16_t)(((uint32_t)cpu_khz + 500) / 1000);
   28.34      return cpu_mhz;
   28.35  }
    29.1 --- a/tools/firmware/rombios/rombios.c	Mon Jun 02 11:35:02 2008 +0900
    29.2 +++ b/tools/firmware/rombios/rombios.c	Mon Jun 02 11:35:39 2008 +0900
    29.3 @@ -2225,26 +2225,12 @@ void interactive_bootkey()
    29.4      Bit16u i;
    29.5      Bit8u scan = 0;
    29.6  
    29.7 -    bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\nPress F10 to select boot device.\n");
    29.8 -    for (i = 3; i > 0; i--)
    29.9 -    {
   29.10 -        scan = wait(WAIT_HZ, 0);
   29.11 -        switch (scan) {
   29.12 -        case 0x3D:
   29.13 -        case 0x3E:
   29.14 -        case 0x3F:
   29.15 -        case 0x58:
   29.16 -            break;
   29.17 -        case 0x44:
   29.18 -            scan = bootmenu(inb_cmos(0x3d) & 0x0f);
   29.19 -            break;
   29.20 -        default:
   29.21 -            scan = 0;
   29.22 -            break;
   29.23 -        }
   29.24 -        if (scan != 0)
   29.25 -            break;
   29.26 -    }
   29.27 +    bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO,
   29.28 +                "\n\nPress F10 to select boot device.\n");
   29.29 +
   29.30 +    scan = wait(1, 0);
   29.31 +    if (scan == 0x44)
   29.32 +        scan = bootmenu(inb_cmos(0x3d) & 0x0f);
   29.33  
   29.34      /* set the default based on the keypress or menu */
   29.35      switch(scan) {
    30.1 --- a/tools/ioemu/Makefile.target	Mon Jun 02 11:35:02 2008 +0900
    30.2 +++ b/tools/ioemu/Makefile.target	Mon Jun 02 11:35:39 2008 +0900
    30.3 @@ -358,6 +358,13 @@ VL_OBJS+=tap-win32.o
    30.4  endif
    30.5  
    30.6  ifdef CONFIG_STUBDOM
    30.7 +VL_OBJS+=main-qemu.o
    30.8 +CFLAGS += -DCONFIG_QEMU
    30.9 +main-qemu.c:
   30.10 +	ln -s $(XEN_ROOT)/extras/mini-os/main.c $@
   30.11 +endif
   30.12 +
   30.13 +ifdef CONFIG_STUBDOM
   30.14  #CONFIG_PASSTHROUGH=1
   30.15  else
   30.16    ifeq (,$(wildcard /usr/include/pci))
    31.1 --- a/tools/ioemu/hw/cirrus_vga.c	Mon Jun 02 11:35:02 2008 +0900
    31.2 +++ b/tools/ioemu/hw/cirrus_vga.c	Mon Jun 02 11:35:39 2008 +0900
    31.3 @@ -281,8 +281,6 @@ typedef struct PCICirrusVGAState {
    31.4  
    31.5  static uint8_t rop_to_index[256];
    31.6      
    31.7 -void *shared_vram;
    31.8 -
    31.9  /***************************************
   31.10   *
   31.11   *  prototypes.
    32.1 --- a/tools/ioemu/vl.c	Mon Jun 02 11:35:02 2008 +0900
    32.2 +++ b/tools/ioemu/vl.c	Mon Jun 02 11:35:39 2008 +0900
    32.3 @@ -7807,8 +7807,9 @@ int main(int argc, char **argv)
    32.4                  bdrv_set_type_hint(fd_table[i], BDRV_TYPE_FLOPPY);
    32.5              }
    32.6              if (fd_filename[i] != '\0') {
    32.7 -                if (bdrv_open(fd_table[i], fd_filename[i],
    32.8 -                              snapshot ? BDRV_O_SNAPSHOT : 0) < 0) {
    32.9 +                if (bdrv_open2(fd_table[i], fd_filename[i],
   32.10 +                               snapshot ? BDRV_O_SNAPSHOT : 0,
   32.11 +                               &bdrv_raw) < 0) {
   32.12                      fprintf(stderr, "qemu: could not open floppy disk image '%s'\n",
   32.13                              fd_filename[i]);
   32.14                      exit(1);
    33.1 --- a/tools/ioemu/vl.h	Mon Jun 02 11:35:02 2008 +0900
    33.2 +++ b/tools/ioemu/vl.h	Mon Jun 02 11:35:39 2008 +0900
    33.3 @@ -154,8 +154,6 @@ int unset_mm_mapping(int xc_handle, uint
    33.4  int set_mm_mapping(int xc_handle, uint32_t domid, unsigned long nr_pages,
    33.5                     unsigned int address_bits, unsigned long *extent_start);
    33.6  
    33.7 -extern void *shared_vram;
    33.8 -
    33.9  extern FILE *logfile;
   33.10  
   33.11  
    34.1 --- a/tools/ioemu/xenstore.c	Mon Jun 02 11:35:02 2008 +0900
    34.2 +++ b/tools/ioemu/xenstore.c	Mon Jun 02 11:35:39 2008 +0900
    34.3 @@ -112,7 +112,7 @@ void xenstore_parse_domain_config(int hv
    34.4  
    34.5      e = xs_directory(xsh, XBT_NULL, buf, &num);
    34.6      if (e == NULL)
    34.7 -        goto out;
    34.8 +        num = 0;
    34.9  
   34.10      for (i = 0; i < num; i++) {
   34.11          /* read the backend path */
    35.1 --- a/tools/libxc/Makefile	Mon Jun 02 11:35:02 2008 +0900
    35.2 +++ b/tools/libxc/Makefile	Mon Jun 02 11:35:39 2008 +0900
    35.3 @@ -5,11 +5,9 @@ MAJOR    = 3.2
    35.4  MINOR    = 0
    35.5  
    35.6  CTRL_SRCS-y       :=
    35.7 -ifneq ($(stubdom),y)
    35.8  CTRL_SRCS-y       += xc_core.c
    35.9  CTRL_SRCS-$(CONFIG_X86) += xc_core_x86.c
   35.10  CTRL_SRCS-$(CONFIG_IA64) += xc_core_ia64.c
   35.11 -endif
   35.12  CTRL_SRCS-y       += xc_domain.c
   35.13  CTRL_SRCS-y       += xc_evtchn.c
   35.14  CTRL_SRCS-y       += xc_misc.c
   35.15 @@ -21,9 +19,7 @@ CTRL_SRCS-y       += xc_sedf.c
   35.16  CTRL_SRCS-y       += xc_csched.c
   35.17  CTRL_SRCS-y       += xc_tbuf.c
   35.18  CTRL_SRCS-y       += xc_pm.c
   35.19 -ifneq ($(stubdom),y)
   35.20  CTRL_SRCS-y       += xc_resume.c
   35.21 -endif
   35.22  CTRL_SRCS-$(CONFIG_X86) += xc_pagetab.c
   35.23  CTRL_SRCS-$(CONFIG_Linux) += xc_linux.c
   35.24  CTRL_SRCS-$(CONFIG_SunOS) += xc_solaris.c
   35.25 @@ -33,15 +29,12 @@ CTRL_SRCS-$(CONFIG_MiniOS) += xc_minios.
   35.26  
   35.27  GUEST_SRCS-y :=
   35.28  GUEST_SRCS-y += xg_private.c
   35.29 -ifneq ($(stubdom),y)
   35.30  GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c
   35.31  GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
   35.32 -endif
   35.33  
   35.34  VPATH = ../../xen/common/libelf
   35.35  CFLAGS += -I../../xen/common/libelf
   35.36  
   35.37 -ifneq ($(stubdom),y)
   35.38  GUEST_SRCS-y += libelf-tools.c libelf-loader.c
   35.39  GUEST_SRCS-y += libelf-dominfo.c libelf-relocate.c
   35.40  
   35.41 @@ -55,7 +48,6 @@ GUEST_SRCS-y                 += xc_dom_c
   35.42  GUEST_SRCS-$(CONFIG_X86)     += xc_dom_x86.c
   35.43  GUEST_SRCS-$(CONFIG_X86)     += xc_cpuid_x86.c
   35.44  GUEST_SRCS-$(CONFIG_IA64)    += xc_dom_ia64.c
   35.45 -endif
   35.46  
   35.47  -include $(XEN_TARGET_ARCH)/Makefile
   35.48  
    36.1 --- a/tools/libxc/xc_core.c	Mon Jun 02 11:35:02 2008 +0900
    36.2 +++ b/tools/libxc/xc_core.c	Mon Jun 02 11:35:39 2008 +0900
    36.3 @@ -64,7 +64,7 @@
    36.4  /* string table */
    36.5  struct xc_core_strtab {
    36.6      char       *strings;
    36.7 -    uint16_t    current;
    36.8 +    uint16_t    length;
    36.9      uint16_t    max;
   36.10  };
   36.11  
   36.12 @@ -89,7 +89,7 @@ xc_core_strtab_init(void)
   36.13  
   36.14      /* index 0 represents none */
   36.15      strtab->strings[0] = '\0';
   36.16 -    strtab->current = 1;
   36.17 +    strtab->length = 1;
   36.18  
   36.19      return strtab;
   36.20  }
   36.21 @@ -107,14 +107,14 @@ xc_core_strtab_get(struct xc_core_strtab
   36.22      uint16_t ret = 0;
   36.23      uint16_t len = strlen(name) + 1;
   36.24  
   36.25 -    if ( strtab->current > UINT16_MAX - len )
   36.26 +    if ( strtab->length > UINT16_MAX - len )
   36.27      {
   36.28          PERROR("too long string table");
   36.29          errno = E2BIG;
   36.30          return ret;
   36.31      }
   36.32      
   36.33 -    if ( strtab->current + len > strtab->max )
   36.34 +    if ( strtab->length + len > strtab->max )
   36.35      {
   36.36          char *tmp;
   36.37          if ( strtab->max > UINT16_MAX / 2 )
   36.38 @@ -135,9 +135,9 @@ xc_core_strtab_get(struct xc_core_strtab
   36.39          strtab->max *= 2;
   36.40      }
   36.41  
   36.42 -    ret = strtab->current;
   36.43 -    strcpy(strtab->strings + strtab->current, name);
   36.44 -    strtab->current += len;
   36.45 +    ret = strtab->length;
   36.46 +    strcpy(strtab->strings + strtab->length, name);
   36.47 +    strtab->length += len;
   36.48      return ret;
   36.49  }
   36.50  
   36.51 @@ -669,7 +669,7 @@ xc_domain_dumpcore_via_callback(int xc_h
   36.52      offset += filesz;
   36.53  
   36.54      /* fixing up section header string table section header */
   36.55 -    filesz = strtab->current;
   36.56 +    filesz = strtab->length;
   36.57      sheaders->shdrs[strtab_idx].sh_offset = offset;
   36.58      sheaders->shdrs[strtab_idx].sh_size = filesz;
   36.59  
   36.60 @@ -829,7 +829,7 @@ copy_done:
   36.61          goto out;
   36.62  
   36.63      /* elf section header string table: .shstrtab */
   36.64 -    sts = dump_rtn(args, strtab->strings, strtab->current);
   36.65 +    sts = dump_rtn(args, strtab->strings, strtab->length);
   36.66      if ( sts != 0 )
   36.67          goto out;
   36.68  
    37.1 --- a/tools/libxc/xc_domain.c	Mon Jun 02 11:35:02 2008 +0900
    37.2 +++ b/tools/libxc/xc_domain.c	Mon Jun 02 11:35:39 2008 +0900
    37.3 @@ -767,6 +767,37 @@ int xc_assign_device(
    37.4      return do_domctl(xc_handle, &domctl);
    37.5  }
    37.6  
    37.7 +int xc_get_device_group(
    37.8 +    int xc_handle,
    37.9 +    uint32_t domid,
   37.10 +    uint32_t machine_bdf,
   37.11 +    uint32_t max_sdevs,
   37.12 +    uint32_t *num_sdevs,
   37.13 +    uint32_t *sdev_array)
   37.14 +{
   37.15 +    int rc;
   37.16 +    DECLARE_DOMCTL;
   37.17 +
   37.18 +    domctl.cmd = XEN_DOMCTL_get_device_group;
   37.19 +    domctl.domain = (domid_t)domid;
   37.20 +
   37.21 +    domctl.u.get_device_group.machine_bdf = machine_bdf;
   37.22 +    domctl.u.get_device_group.max_sdevs = max_sdevs;
   37.23 +
   37.24 +    set_xen_guest_handle(domctl.u.get_device_group.sdev_array, sdev_array);
   37.25 +
   37.26 +    if ( lock_pages(sdev_array, max_sdevs * sizeof(*sdev_array)) != 0 )
   37.27 +    {
   37.28 +        PERROR("Could not lock memory for xc_get_device_group\n");
   37.29 +        return -ENOMEM;
   37.30 +    }
   37.31 +    rc = do_domctl(xc_handle, &domctl);
   37.32 +    unlock_pages(sdev_array, max_sdevs * sizeof(*sdev_array));
   37.33 +
   37.34 +    *num_sdevs = domctl.u.get_device_group.num_sdevs;
   37.35 +    return rc;
   37.36 +}
   37.37 +
   37.38  int xc_test_assign_device(
   37.39      int xc_handle,
   37.40      uint32_t domid,
    38.1 --- a/tools/libxc/xc_minios.c	Mon Jun 02 11:35:02 2008 +0900
    38.2 +++ b/tools/libxc/xc_minios.c	Mon Jun 02 11:35:39 2008 +0900
    38.3 @@ -35,11 +35,12 @@ extern struct wait_queue_head event_queu
    38.4  
    38.5  int xc_interface_open(void)
    38.6  {
    38.7 -    return 0;
    38.8 +    return alloc_fd(FTYPE_XC);
    38.9  }
   38.10  
   38.11  int xc_interface_close(int xc_handle)
   38.12  {
   38.13 +    files[xc_handle].type = FTYPE_NONE;
   38.14      return 0;
   38.15  }
   38.16  
   38.17 @@ -79,8 +80,12 @@ void *xc_map_foreign_range(int xc_handle
   38.18  int xc_map_foreign_ranges(int xc_handle, uint32_t dom,
   38.19                            privcmd_mmap_entry_t *entries, int nr)
   38.20  {
   38.21 -    printf("xc_map_foreign_ranges, TODO\n");
   38.22 -    do_exit();
   38.23 +    int i;
   38.24 +    for (i = 0; i < nr; i++) {
   38.25 +	unsigned long mfn = entries[i].mfn;
   38.26 +        do_map_frames(entries[i].va, &mfn, entries[i].npages, 0, 1, dom, 0, L1_PROT);
   38.27 +    }
   38.28 +    return 0;
   38.29  }
   38.30  
   38.31  int do_xen_hypercall(int xc_handle, privcmd_hypercall_t *hypercall)
   38.32 @@ -294,6 +299,12 @@ int xc_evtchn_unmask(int xce_handle, evt
   38.33      return 0;
   38.34  }
   38.35  
   38.36 +/* Optionally flush file to disk and discard page cache */
   38.37 +void discard_file_cache(int fd, int flush)
   38.38 +{
   38.39 +    if (flush)
   38.40 +        fsync(fd);
   38.41 +}
   38.42  /*
   38.43   * Local variables:
   38.44   * mode: C
    39.1 --- a/tools/libxc/xenctrl.h	Mon Jun 02 11:35:02 2008 +0900
    39.2 +++ b/tools/libxc/xenctrl.h	Mon Jun 02 11:35:39 2008 +0900
    39.3 @@ -955,6 +955,13 @@ int xc_assign_device(int xc_handle,
    39.4                       uint32_t domid,
    39.5                       uint32_t machine_bdf);
    39.6  
    39.7 +int xc_get_device_group(int xc_handle,
    39.8 +                     uint32_t domid,
    39.9 +                     uint32_t machine_bdf,
   39.10 +                     uint32_t max_sdevs,
   39.11 +                     uint32_t *num_sdevs,
   39.12 +                     uint32_t *sdev_array);
   39.13 +
   39.14  int xc_test_assign_device(int xc_handle,
   39.15                            uint32_t domid,
   39.16                            uint32_t machine_bdf);
    40.1 --- a/tools/libxc/xg_private.c	Mon Jun 02 11:35:02 2008 +0900
    40.2 +++ b/tools/libxc/xg_private.c	Mon Jun 02 11:35:39 2008 +0900
    40.3 @@ -12,22 +12,6 @@
    40.4  
    40.5  #include "xg_private.h"
    40.6  
    40.7 -int lock_pages(void *addr, size_t len)
    40.8 -{
    40.9 -    int e = 0;
   40.10 -#ifndef __sun__
   40.11 -    e = mlock(addr, len);
   40.12 -#endif
   40.13 -    return (e);
   40.14 -}
   40.15 -
   40.16 -void unlock_pages(void *addr, size_t len)
   40.17 -{
   40.18 -#ifndef __sun__
   40.19 -    safe_munlock(addr, len);
   40.20 -#endif
   40.21 -}
   40.22 -
   40.23  char *xc_read_image(const char *filename, unsigned long *size)
   40.24  {
   40.25      int kernel_fd = -1;
    41.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Mon Jun 02 11:35:02 2008 +0900
    41.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Mon Jun 02 11:35:39 2008 +0900
    41.3 @@ -106,7 +106,7 @@ static PyObject *pyxc_domain_create(XcOb
    41.4      static char *kwd_list[] = { "domid", "ssidref", "handle", "flags", "target", NULL };
    41.5  
    41.6      if ( !PyArg_ParseTupleAndKeywords(args, kwds, "|iiOii", kwd_list,
    41.7 -				      &dom, &ssidref, &pyhandle, &flags, &target))
    41.8 +                                      &dom, &ssidref, &pyhandle, &flags, &target))
    41.9          return NULL;
   41.10      if ( pyhandle != NULL )
   41.11      {
   41.12 @@ -434,44 +434,44 @@ static PyObject *pyxc_linux_build(XcObje
   41.13      dom->vhpt_size_log2 = vhpt;
   41.14  
   41.15      if ( xc_dom_linux_build(self->xc_handle, dom, domid, mem_mb, image,
   41.16 -			    ramdisk, flags, store_evtchn, &store_mfn,
   41.17 -			    console_evtchn, &console_mfn) != 0 ) {
   41.18 -	goto out;
   41.19 +                            ramdisk, flags, store_evtchn, &store_mfn,
   41.20 +                            console_evtchn, &console_mfn) != 0 ) {
   41.21 +        goto out;
   41.22      }
   41.23  
   41.24      if ( !(elfnote_dict = PyDict_New()) )
   41.25 -	goto out;
   41.26 +        goto out;
   41.27      
   41.28      for ( i = 0; i < ARRAY_SIZE(dom->parms.elf_notes); i++ )
   41.29      {
   41.30 -	switch ( dom->parms.elf_notes[i].type )
   41.31 +        switch ( dom->parms.elf_notes[i].type )
   41.32          {
   41.33 -	case XEN_ENT_NONE:
   41.34 -	    continue;
   41.35 -	case XEN_ENT_LONG:
   41.36 -	    elfnote = Py_BuildValue("k", dom->parms.elf_notes[i].data.num);
   41.37 -	    break;
   41.38 -	case XEN_ENT_STR:
   41.39 -	    elfnote = Py_BuildValue("s", dom->parms.elf_notes[i].data.str);
   41.40 -	    break;
   41.41 -	}
   41.42 -	PyDict_SetItemString(elfnote_dict,
   41.43 -			     dom->parms.elf_notes[i].name,
   41.44 -			     elfnote);
   41.45 -	Py_DECREF(elfnote);
   41.46 +        case XEN_ENT_NONE:
   41.47 +            continue;
   41.48 +        case XEN_ENT_LONG:
   41.49 +            elfnote = Py_BuildValue("k", dom->parms.elf_notes[i].data.num);
   41.50 +            break;
   41.51 +        case XEN_ENT_STR:
   41.52 +            elfnote = Py_BuildValue("s", dom->parms.elf_notes[i].data.str);
   41.53 +            break;
   41.54 +        }
   41.55 +        PyDict_SetItemString(elfnote_dict,
   41.56 +                             dom->parms.elf_notes[i].name,
   41.57 +                             elfnote);
   41.58 +        Py_DECREF(elfnote);
   41.59      }
   41.60  
   41.61      ret = Py_BuildValue("{s:i,s:i,s:N}",
   41.62 -			"store_mfn", store_mfn,
   41.63 -			"console_mfn", console_mfn,
   41.64 -			"notes", elfnote_dict);
   41.65 +                        "store_mfn", store_mfn,
   41.66 +                        "console_mfn", console_mfn,
   41.67 +                        "notes", elfnote_dict);
   41.68  
   41.69      if ( dom->arch_hooks->native_protocol )
   41.70      {
   41.71 -	PyObject *native_protocol =
   41.72 -	    Py_BuildValue("s", dom->arch_hooks->native_protocol);
   41.73 -	PyDict_SetItemString(ret, "native_protocol", native_protocol);
   41.74 -	Py_DECREF(native_protocol);
   41.75 +        PyObject *native_protocol =
   41.76 +            Py_BuildValue("s", dom->arch_hooks->native_protocol);
   41.77 +        PyDict_SetItemString(ret, "native_protocol", native_protocol);
   41.78 +        Py_DECREF(native_protocol);
   41.79      }
   41.80  
   41.81      xc_dom_release(dom);
   41.82 @@ -556,7 +556,7 @@ static PyObject *pyxc_test_assign_device
   41.83  {
   41.84      uint32_t dom;
   41.85      char *pci_str;
   41.86 -    uint32_t bdf = 0;
   41.87 +    int32_t bdf = 0;
   41.88      int seg, bus, dev, func;
   41.89  
   41.90      static char *kwd_list[] = { "domid", "pci", NULL };
   41.91 @@ -571,14 +571,143 @@ static PyObject *pyxc_test_assign_device
   41.92          bdf |= (func & 0x7) << 8;
   41.93  
   41.94          if ( xc_test_assign_device(self->xc_handle, dom, bdf) != 0 )
   41.95 +        {
   41.96 +            if (errno == ENOSYS)
   41.97 +                bdf = -1;
   41.98              break;
   41.99 +        }
  41.100 +        bdf = 0;
  41.101 +    }
  41.102  
  41.103 +    return Py_BuildValue("i", bdf);
  41.104 +}
  41.105 +
  41.106 +static PyObject *pyxc_assign_device(XcObject *self,
  41.107 +                                    PyObject *args,
  41.108 +                                    PyObject *kwds)
  41.109 +{
  41.110 +    uint32_t dom;
  41.111 +    char *pci_str;
  41.112 +    int32_t bdf = 0;
  41.113 +    int seg, bus, dev, func;
  41.114 +
  41.115 +    static char *kwd_list[] = { "domid", "pci", NULL };
  41.116 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list,
  41.117 +                                      &dom, &pci_str) )
  41.118 +        return NULL;
  41.119 +
  41.120 +    while ( next_bdf(&pci_str, &seg, &bus, &dev, &func) )
  41.121 +    {
  41.122 +        bdf |= (bus & 0xff) << 16;
  41.123 +        bdf |= (dev & 0x1f) << 11;
  41.124 +        bdf |= (func & 0x7) << 8;
  41.125 +
  41.126 +        if ( xc_assign_device(self->xc_handle, dom, bdf) != 0 )
  41.127 +        {
  41.128 +            if (errno == ENOSYS)
  41.129 +                bdf = -1;
  41.130 +            break;
  41.131 +        }
  41.132          bdf = 0;
  41.133      }
  41.134  
  41.135      return Py_BuildValue("i", bdf);
  41.136  }
  41.137  
  41.138 +static PyObject *pyxc_deassign_device(XcObject *self,
  41.139 +                                      PyObject *args,
  41.140 +                                      PyObject *kwds)
  41.141 +{
  41.142 +    uint32_t dom;
  41.143 +    char *pci_str;
  41.144 +    int32_t bdf = 0;
  41.145 +    int seg, bus, dev, func;
  41.146 +
  41.147 +    static char *kwd_list[] = { "domid", "pci", NULL };
  41.148 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "is", kwd_list,
  41.149 +                                      &dom, &pci_str) )
  41.150 +        return NULL;
  41.151 +
  41.152 +    while ( next_bdf(&pci_str, &seg, &bus, &dev, &func) )
  41.153 +    {
  41.154 +        bdf |= (bus & 0xff) << 16;
  41.155 +        bdf |= (dev & 0x1f) << 11;
  41.156 +        bdf |= (func & 0x7) << 8;
  41.157 +
  41.158 +        if ( xc_deassign_device(self->xc_handle, dom, bdf) != 0 )
  41.159 +        {
  41.160 +            if (errno == ENOSYS)
  41.161 +                bdf = -1;
  41.162 +            break;
  41.163 +        }
  41.164 +        bdf = 0;
  41.165 +    }
  41.166 +
  41.167 +    return Py_BuildValue("i", bdf);
  41.168 +}
  41.169 +
  41.170 +static PyObject *pyxc_get_device_group(XcObject *self,
  41.171 +                                         PyObject *args)
  41.172 +{
  41.173 +    domid_t domid;
  41.174 +    uint32_t bdf = 0;
  41.175 +    uint32_t max_sdevs, num_sdevs;
  41.176 +    int seg, bus, dev, func, rc, i;
  41.177 +    PyObject *Pystr;
  41.178 +    char *group_str;
  41.179 +    char dev_str[9];
  41.180 +    uint32_t *sdev_array;
  41.181 +
  41.182 +    if ( !PyArg_ParseTuple(args, "iiiii", &domid, &seg, &bus, &dev, &func) )
  41.183 +        return NULL;
  41.184 +
  41.185 +    /* Maximum allowed siblings device number per group */
  41.186 +    max_sdevs = 1024;
  41.187 +
  41.188 +    if ( (sdev_array = malloc(max_sdevs * sizeof(*sdev_array))) == NULL )
  41.189 +        return PyErr_NoMemory();
  41.190 +    memset(sdev_array, 0, max_sdevs * sizeof(*sdev_array));
  41.191 +
  41.192 +    bdf |= (bus & 0xff) << 16;
  41.193 +    bdf |= (dev & 0x1f) << 11;
  41.194 +    bdf |= (func & 0x7) << 8;
  41.195 +
  41.196 +    rc = xc_get_device_group(self->xc_handle,
  41.197 +        domid, bdf, max_sdevs, &num_sdevs, sdev_array);
  41.198 +
  41.199 +    if ( rc < 0 )
  41.200 +    {
  41.201 +      free(sdev_array); 
  41.202 +      return pyxc_error_to_exception();
  41.203 +    }
  41.204 +
  41.205 +    if ( !num_sdevs )
  41.206 +    {
  41.207 +       free(sdev_array);
  41.208 +       return Py_BuildValue("s", "");
  41.209 +    }
  41.210 +
  41.211 +    if ( (group_str = malloc(num_sdevs * sizeof(dev_str))) == NULL )
  41.212 +        return PyErr_NoMemory();
  41.213 +    memset(group_str, '\0', num_sdevs * sizeof(dev_str));
  41.214 +
  41.215 +    for ( i = 0; i < num_sdevs; i++ )
  41.216 +    {
  41.217 +        bus = (sdev_array[i] >> 16) & 0xff;
  41.218 +        dev = (sdev_array[i] >> 11) & 0x1f;
  41.219 +        func = (sdev_array[i] >> 8) & 0x7;
  41.220 +        sprintf(dev_str, "%02x:%02x.%x,", bus, dev, func);
  41.221 +        strcat(group_str, dev_str);
  41.222 +    }
  41.223 +
  41.224 +    Pystr = Py_BuildValue("s", group_str);
  41.225 +
  41.226 +    free(sdev_array);
  41.227 +    free(group_str);
  41.228 +
  41.229 +    return Pystr;
  41.230 +}
  41.231 +
  41.232  #ifdef __ia64__
  41.233  static PyObject *pyxc_nvram_init(XcObject *self,
  41.234                                   PyObject *args)
  41.235 @@ -729,8 +858,8 @@ static PyObject *pyxc_hvm_build(XcObject
  41.236      int memsize, vcpus = 1, acpi = 0, apic = 1;
  41.237  
  41.238      static char *kwd_list[] = { "domid",
  41.239 -				"memsize", "image", "vcpus", "acpi",
  41.240 -				"apic", NULL };
  41.241 +                                "memsize", "image", "vcpus", "acpi",
  41.242 +                                "apic", NULL };
  41.243      if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iii", kwd_list,
  41.244                                        &dom, &memsize,
  41.245                                        &image, &vcpus, &acpi, &apic) )
  41.246 @@ -782,8 +911,8 @@ static PyObject *pyxc_evtchn_alloc_unbou
  41.247  }
  41.248  
  41.249  static PyObject *pyxc_evtchn_reset(XcObject *self,
  41.250 -				   PyObject *args,
  41.251 -				   PyObject *kwds)
  41.252 +                                   PyObject *args,
  41.253 +                                   PyObject *kwds)
  41.254  {
  41.255      uint32_t dom;
  41.256  
  41.257 @@ -947,11 +1076,11 @@ static PyObject *pyxc_physinfo(XcObject 
  41.258  
  41.259      for ( i = 0; i < info.nr_nodes; i++ )
  41.260      {
  41.261 -	xc_availheap(self->xc_handle, 0, 0, i, &free_heap);
  41.262 -	PyList_Append(node_to_memory_obj,
  41.263 -	    PyInt_FromLong(free_heap / 1024));
  41.264 +        xc_availheap(self->xc_handle, 0, 0, i, &free_heap);
  41.265 +        PyList_Append(node_to_memory_obj,
  41.266 +                      PyInt_FromLong(free_heap / 1024));
  41.267      }
  41.268 -	
  41.269 +
  41.270      PyDict_SetItemString(ret_obj, "node_to_cpu", node_to_cpu_obj);
  41.271      PyDict_SetItemString(ret_obj, "node_to_memory", node_to_memory_obj);
  41.272   
  41.273 @@ -1517,6 +1646,17 @@ static PyMethodDef pyxc_methods[] = {
  41.274        " value   [long]:     Value of param.\n"
  41.275        "Returns: [int] 0 on success.\n" },
  41.276  
  41.277 +    { "get_device_group",
  41.278 +      (PyCFunction)pyxc_get_device_group,
  41.279 +      METH_VARARGS, "\n"
  41.280 +      "get sibling devices infomation.\n"
  41.281 +      " dom     [int]:      Domain to assign device to.\n"
  41.282 +      " seg     [int]:      PCI segment.\n"
  41.283 +      " bus     [int]:      PCI bus.\n"
  41.284 +      " dev     [int]:      PCI dev.\n"
  41.285 +      " func    [int]:      PCI func.\n"
  41.286 +      "Returns: [string]:   Sibling devices \n" },
  41.287 +
  41.288       { "test_assign_device",
  41.289         (PyCFunction)pyxc_test_assign_device,
  41.290         METH_VARARGS | METH_KEYWORDS, "\n"
  41.291 @@ -1524,6 +1664,22 @@ static PyMethodDef pyxc_methods[] = {
  41.292         " dom     [int]:      Identifier of domain to build into.\n"
  41.293         " pci_str [str]:      PCI devices.\n"
  41.294         "Returns: [int] 0 on success, or device bdf that can't be assigned.\n" },
  41.295 +
  41.296 +     { "assign_device",
  41.297 +       (PyCFunction)pyxc_assign_device,
  41.298 +       METH_VARARGS | METH_KEYWORDS, "\n"
  41.299 +       "Assign device to IOMMU domain.\n"
  41.300 +       " dom     [int]:      Domain to assign device to.\n"
  41.301 +       " pci_str [str]:      PCI devices.\n"
  41.302 +       "Returns: [int] 0 on success, or device bdf that can't be assigned.\n" },
  41.303 +
  41.304 +     { "deassign_device",
  41.305 +       (PyCFunction)pyxc_deassign_device,
  41.306 +       METH_VARARGS | METH_KEYWORDS, "\n"
  41.307 +       "Deassign device from IOMMU domain.\n"
  41.308 +       " dom     [int]:      Domain to deassign device from.\n"
  41.309 +       " pci_str [str]:      PCI devices.\n"
  41.310 +       "Returns: [int] 0 on success, or device bdf that can't be deassigned.\n" },
  41.311    
  41.312      { "sched_id_get",
  41.313        (PyCFunction)pyxc_sched_id_get,
    42.1 --- a/tools/python/xen/xend/XendAPI.py	Mon Jun 02 11:35:02 2008 +0900
    42.2 +++ b/tools/python/xen/xend/XendAPI.py	Mon Jun 02 11:35:39 2008 +0900
    42.3 @@ -1759,12 +1759,12 @@ class XendAPI(object):
    42.4          xendom = XendDomain.instance()
    42.5          xeninfo = xendom.get_vm_by_uuid(vm_ref)
    42.6  
    42.7 -        resource = other_config.get("resource", 0)
    42.8          port = other_config.get("port", 0)
    42.9 -        node = other_config.get("node", 0)
   42.10 +        node = other_config.get("node", -1)
   42.11 +        ssl = other_config.get("ssl", None)
   42.12          
   42.13          xendom.domain_migrate(xeninfo.getDomid(), destination_url,
   42.14 -                              bool(live), resource, port, node)
   42.15 +                              bool(live), port, node, ssl)
   42.16          return xen_api_success_void()
   42.17  
   42.18      def VM_save(self, _, vm_ref, dest, checkpoint):
    43.1 --- a/tools/python/xen/xend/XendDomain.py	Mon Jun 02 11:35:02 2008 +0900
    43.2 +++ b/tools/python/xen/xend/XendDomain.py	Mon Jun 02 11:35:39 2008 +0900
    43.3 @@ -43,8 +43,8 @@ from xen.xend.XendConstants import XS_VM
    43.4  from xen.xend.XendConstants import DOM_STATE_HALTED, DOM_STATE_PAUSED
    43.5  from xen.xend.XendConstants import DOM_STATE_RUNNING, DOM_STATE_SUSPENDED
    43.6  from xen.xend.XendConstants import DOM_STATE_SHUTDOWN, DOM_STATE_UNKNOWN
    43.7 -from xen.xend.XendConstants import DOM_STATE_CRASHED
    43.8 -from xen.xend.XendConstants import TRIGGER_TYPE
    43.9 +from xen.xend.XendConstants import DOM_STATE_CRASHED, HVM_PARAM_ACPI_S_STATE
   43.10 +from xen.xend.XendConstants import TRIGGER_TYPE, TRIGGER_S3RESUME
   43.11  from xen.xend.XendDevices import XendDevices
   43.12  from xen.xend.XendAPIConstants import *
   43.13  
   43.14 @@ -1258,22 +1258,24 @@ class XendDomain:
   43.15  
   43.16          return val       
   43.17  
   43.18 -    def domain_migrate(self, domid, dst, live=False, port=0, node=-1):
   43.19 +    def domain_migrate(self, domid, dst, live=False, port=0, node=-1, ssl=None):
   43.20          """Start domain migration.
   43.21          
   43.22          @param domid: Domain ID or Name
   43.23          @type domid: int or string.
   43.24          @param dst: Destination IP address
   43.25          @type dst: string
   43.26 -        @keyword port: relocation port on destination
   43.27 -        @type port: int        
   43.28          @keyword live: Live migration
   43.29          @type live: bool
   43.30 -        @rtype: None
   43.31 +        @keyword port: relocation port on destination
   43.32 +        @type port: int
   43.33          @keyword node: use node number for target
   43.34 -        @rtype: int 
   43.35 +        @type node: int
   43.36 +        @keyword ssl: use ssl connection
   43.37 +        @type ssl: bool
   43.38 +        @rtype: None
   43.39          @raise XendError: Failed to migrate
   43.40 -        @raise XendInvalidDomain: Domain is not valid        
   43.41 +        @raise XendInvalidDomain: Domain is not valid
   43.42          """
   43.43  
   43.44          dominfo = self.domain_lookup_nr(domid)
   43.45 @@ -1294,13 +1296,14 @@ class XendDomain:
   43.46              """ Make sure there's memory free for enabling shadow mode """
   43.47              dominfo.checkLiveMigrateMemory()
   43.48  
   43.49 -        if port == 0:
   43.50 -            port = xoptions.get_xend_relocation_port()
   43.51 +        if ssl is None:
   43.52 +            ssl = xoptions.get_xend_relocation_ssl()
   43.53  
   43.54 -        tls = xoptions.get_xend_relocation_tls()
   43.55 -        if tls:
   43.56 +        if ssl:
   43.57              from OpenSSL import SSL
   43.58              from xen.web import connection
   43.59 +            if port == 0:
   43.60 +                port = xoptions.get_xend_relocation_ssl_port()
   43.61              try:
   43.62                  ctx = SSL.Context(SSL.SSLv23_METHOD)
   43.63                  sock = SSL.Connection(ctx,
   43.64 @@ -1328,6 +1331,8 @@ class XendDomain:
   43.65              os.close(p2cread)
   43.66              os.close(p2cwrite)
   43.67          else:
   43.68 +            if port == 0:
   43.69 +                port = xoptions.get_xend_relocation_port()
   43.70              try:
   43.71                  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
   43.72                  # When connecting to our ssl enabled relocation server using a
    44.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Mon Jun 02 11:35:02 2008 +0900
    44.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Mon Jun 02 11:35:39 2008 +0900
    44.3 @@ -2091,28 +2091,28 @@ class XendDomainInfo:
    44.4                          xc.vcpu_setaffinity(self.domid, v, self.info['cpus'][v])
    44.5              else:
    44.6                  def find_relaxed_node(node_list):
    44.7 -                    import sys 
    44.8 +                    import sys
    44.9 +                    nr_nodes = info['nr_nodes']
   44.10                      if node_list is None:
   44.11 -                        node_list = range(0, info['nr_nodes'])
   44.12 +                        node_list = range(0, nr_nodes)
   44.13                      nodeload = [0]
   44.14 -                    nodeload = nodeload * info['nr_nodes']
   44.15 +                    nodeload = nodeload * nr_nodes
   44.16                      from xen.xend import XendDomain
   44.17                      doms = XendDomain.instance().list('all')
   44.18 -                    for dom in doms:
   44.19 +                    for dom in filter (lambda d: d.domid != self.domid, doms):
   44.20                          cpuinfo = dom.getVCPUInfo()
   44.21                          for vcpu in sxp.children(cpuinfo, 'vcpu'):
   44.22 -                            def vinfo(n, t):
   44.23 -                                return t(sxp.child_value(vcpu, n))
   44.24 -                            cpumap = vinfo('cpumap', list)
   44.25 -                            for i in node_list:
   44.26 +                            if sxp.child_value(vcpu, 'online') == 0: continue
   44.27 +                            cpumap = list(sxp.child_value(vcpu,'cpumap'))
   44.28 +                            for i in range(0, nr_nodes):
   44.29                                  node_cpumask = info['node_to_cpu'][i]
   44.30                                  for j in node_cpumask:
   44.31                                      if j in cpumap:
   44.32                                          nodeload[i] += 1
   44.33                                          break
   44.34 -                    for i in node_list:
   44.35 -                        if len(info['node_to_cpu'][i]) > 0:
   44.36 -                            nodeload[i] = int(nodeload[i] / len(info['node_to_cpu'][i]))
   44.37 +                    for i in range(0, nr_nodes):
   44.38 +                        if len(info['node_to_cpu'][i]) > 0 and i in node_list:
   44.39 +                            nodeload[i] = int(nodeload[i] * 16 / len(info['node_to_cpu'][i]))
   44.40                          else:
   44.41                              nodeload[i] = sys.maxint
   44.42                      index = nodeload.index( min(nodeload) )    
    45.1 --- a/tools/python/xen/xend/XendOptions.py	Mon Jun 02 11:35:02 2008 +0900
    45.2 +++ b/tools/python/xen/xend/XendOptions.py	Mon Jun 02 11:35:39 2008 +0900
    45.3 @@ -72,6 +72,9 @@ class XendOptions:
    45.4      """Default for the flag indicating whether xend should run a relocation server."""
    45.5      xend_relocation_server_default = 'no'
    45.6  
    45.7 +    """Default for the flag indicating whether xend should run a ssl relocation server."""
    45.8 +    xend_relocation_ssl_server_default = 'no'
    45.9 +
   45.10      """Default interface address the xend relocation server listens at. """
   45.11      xend_relocation_address_default = ''
   45.12  
   45.13 @@ -81,6 +84,9 @@ class XendOptions:
   45.14      """Default port xend serves relocation at. """
   45.15      xend_relocation_port_default = 8002
   45.16  
   45.17 +    """Default port xend serves ssl relocation at. """
   45.18 +    xend_relocation_ssl_port_default = 8003
   45.19 +
   45.20      xend_relocation_hosts_allow_default = ''
   45.21  
   45.22      """Default for the flag indicating whether xend should run a unix-domain
   45.23 @@ -192,6 +198,12 @@ class XendOptions:
   45.24          return self.get_config_bool("xend-relocation-server",
   45.25                                      self.xend_relocation_server_default)
   45.26  
   45.27 +    def get_xend_relocation_ssl_server(self):
   45.28 +        """Get the flag indicating whether xend should run a ssl relocation server.
   45.29 +        """
   45.30 +        return self.get_config_bool("xend-relocation-ssl-server",
   45.31 +                                    self.xend_relocation_ssl_server_default)
   45.32 +
   45.33      def get_xend_relocation_server_ssl_key_file(self):
   45.34          return self.get_config_string("xend-relocation-server-ssl-key-file")
   45.35  
   45.36 @@ -209,10 +221,17 @@ class XendOptions:
   45.37          return self.get_config_int('xend-relocation-port',
   45.38                                     self.xend_relocation_port_default)
   45.39  
   45.40 -    def get_xend_relocation_tls(self):
   45.41 -        """Whether to use tls when relocating.
   45.42 +    def get_xend_relocation_ssl_port(self):
   45.43 +        """Get the port xend listens at for ssl connection to its relocation
   45.44 +        server.
   45.45          """
   45.46 -        return self.get_config_bool('xend-relocation-tls', 'no')
   45.47 +        return self.get_config_int('xend-relocation-ssl-port',
   45.48 +                                   self.xend_relocation_ssl_port_default)
   45.49 +
   45.50 +    def get_xend_relocation_ssl(self):
   45.51 +        """Whether to use ssl when relocating.
   45.52 +        """
   45.53 +        return self.get_config_bool('xend-relocation-ssl', 'no')
   45.54  
   45.55      def get_xend_relocation_hosts_allow(self):
   45.56          return self.get_config_string("xend-relocation-hosts-allow",
    46.1 --- a/tools/python/xen/xend/server/SrvDomain.py	Mon Jun 02 11:35:02 2008 +0900
    46.2 +++ b/tools/python/xen/xend/server/SrvDomain.py	Mon Jun 02 11:35:39 2008 +0900
    46.3 @@ -115,7 +115,9 @@ class SrvDomain(SrvDir):
    46.4                      [['dom',         'int'],
    46.5                       ['destination', 'str'],
    46.6                       ['live',        'int'],
    46.7 -                     ['port',        'int']])
    46.8 +                     ['port',        'int'],
    46.9 +                     ['node',        'int'],
   46.10 +                     ['ssl',         'int']])
   46.11          return fn(req.args, {'dom': self.dom.domid})
   46.12  
   46.13      def op_pincpu(self, _, req):
   46.14 @@ -217,6 +219,11 @@ class SrvDomain(SrvDir):
   46.15          return self.call(self.dom.getVCPUInfo, [], req)
   46.16  
   46.17  
   46.18 +    def op_reset(self, _, req):
   46.19 +        self.acceptCommand(req)
   46.20 +        return self.xd.domain_reset(self.dom.getName())
   46.21 +
   46.22 +
   46.23      def render_POST(self, req):
   46.24          return self.perform(req)
   46.25          
   46.26 @@ -257,6 +264,10 @@ class SrvDomain(SrvDir):
   46.27          req.write('</form>')
   46.28  
   46.29          req.write('<form method="post" action="%s">' % url)
   46.30 +        req.write('<input type="submit" name="op" value="reset">')
   46.31 +        req.write('</form>')
   46.32 +
   46.33 +        req.write('<form method="post" action="%s">' % url)
   46.34          req.write('<input type="submit" name="op" value="shutdown">')
   46.35          req.write('<input type="radio" name="reason" value="poweroff" checked>Poweroff')
   46.36          req.write('<input type="radio" name="reason" value="halt">Halt')
    48.1 --- a/tools/python/xen/xend/server/pciif.py	Mon Jun 02 11:35:02 2008 +0900
    48.2 +++ b/tools/python/xen/xend/server/pciif.py	Mon Jun 02 11:35:39 2008 +0900
    48.3 @@ -226,6 +226,39 @@ class PciController(DevController):
    48.4  
    48.5          return sxpr    
    48.6  
    48.7 +    def CheckSiblingDevices(self, domid, dev):
    48.8 +        """ Check if all sibling devices of dev are owned by pciback
    48.9 +        """
   48.10 +        if not self.vm.info.is_hvm():
   48.11 +            return
   48.12 +
   48.13 +        group_str = xc.get_device_group(domid, dev.domain, dev.bus, dev.slot, dev.func)
   48.14 +        if group_str == "":
   48.15 +            return
   48.16 +
   48.17 +        #group string format xx:xx.x,xx:xx.x,
   48.18 +        devstr_len = group_str.find(',')
   48.19 +        for i in range(0, len(group_str), devstr_len + 1):
   48.20 +            (bus, slotfunc) = group_str[i:i + devstr_len].split(':')
   48.21 +            (slot, func) = slotfunc.split('.')
   48.22 +            b = parse_hex(bus)
   48.23 +            d = parse_hex(slot)
   48.24 +            f = parse_hex(func)
   48.25 +            try:
   48.26 +                sdev = PciDevice(dev.domain, b, d, f)
   48.27 +            except Exception, e:
   48.28 +                #no dom0 drivers bound to sdev
   48.29 +                continue
   48.30 +
   48.31 +            if sdev.driver!='pciback':
   48.32 +                raise VmError(("pci: PCI Backend does not own\n "+ \
   48.33 +                    "sibling device %s of device %s\n"+ \
   48.34 +                    "See the pciback.hide kernel "+ \
   48.35 +                    "command-line parameter or\n"+ \
   48.36 +                    "bind your slot/device to the PCI backend using sysfs" \
   48.37 +                    )%(sdev.name, dev.name))
   48.38 +        return
   48.39 +
   48.40      def setupOneDevice(self, domain, bus, slot, func):
   48.41          """ Attach I/O resources for device to frontend domain
   48.42          """
   48.43 @@ -245,9 +278,20 @@ class PciController(DevController):
   48.44                      "bind your slot/device to the PCI backend using sysfs" \
   48.45                      )%(dev.name))
   48.46  
   48.47 +        self.CheckSiblingDevices(fe_domid, dev)
   48.48 +
   48.49          PCIQuirk(dev.vendor, dev.device, dev.subvendor, dev.subdevice, domain, 
   48.50                  bus, slot, func)
   48.51  
   48.52 +        if not self.vm.info.is_hvm():
   48.53 +            # Setup IOMMU device assignment
   48.54 +            pci_str = "0x%x, 0x%x, 0x%x, 0x%x" % (domain, bus, slot, func)
   48.55 +            bdf = xc.assign_device(fe_domid, pci_str)
   48.56 +            if bdf > 0:
   48.57 +                raise VmError("Failed to assign device to IOMMU (%x:%x.%x)"
   48.58 +                              % (bus, slot, func))
   48.59 +            log.debug("pci: assign device %x:%x.%x" % (bus, slot, func))
   48.60 +
   48.61          for (start, size) in dev.ioports:
   48.62              log.debug('pci: enabling ioport 0x%x/0x%x'%(start,size))
   48.63              rc = xc.domain_ioport_permission(domid = fe_domid, first_port = start,
   48.64 @@ -330,6 +374,14 @@ class PciController(DevController):
   48.65                      "bind your slot/device to the PCI backend using sysfs" \
   48.66                      )%(dev.name))
   48.67  
   48.68 +        if not self.vm.info.is_hvm():
   48.69 +            pci_str = "0x%x, 0x%x, 0x%x, 0x%x" % (domain, bus, slot, func)
   48.70 +            bdf = xc.deassign_device(fe_domid, pci_str)
   48.71 +            if bdf > 0:
   48.72 +                raise VmError("Failed to deassign device from IOMMU (%x:%x.%x)"
   48.73 +                              % (bus, slot, func))
   48.74 +            log.debug("pci: deassign device %x:%x.%x" % (bus, slot, func))
   48.75 +
   48.76          for (start, size) in dev.ioports:
   48.77              log.debug('pci: disabling ioport 0x%x/0x%x'%(start,size))
   48.78              rc = xc.domain_ioport_permission(domid = fe_domid, first_port = start,
    49.1 --- a/tools/python/xen/xend/server/relocate.py	Mon Jun 02 11:35:02 2008 +0900
    49.2 +++ b/tools/python/xen/xend/server/relocate.py	Mon Jun 02 11:35:39 2008 +0900
    49.3 @@ -142,16 +142,22 @@ def listenRelocation():
    49.4      if xoptions.get_xend_unix_server():
    49.5          path = '/var/lib/xend/relocation-socket'
    49.6          unix.UnixListener(path, RelocationProtocol)
    49.7 +
    49.8 +    interface = xoptions.get_xend_relocation_address()
    49.9 +
   49.10 +    hosts_allow = xoptions.get_xend_relocation_hosts_allow()
   49.11 +    if hosts_allow == '':
   49.12 +        hosts_allow = None
   49.13 +    else:
   49.14 +        hosts_allow = map(re.compile, hosts_allow.split(" "))
   49.15 +
   49.16      if xoptions.get_xend_relocation_server():
   49.17          port = xoptions.get_xend_relocation_port()
   49.18 -        interface = xoptions.get_xend_relocation_address()
   49.19 +        tcp.TCPListener(RelocationProtocol, port, interface = interface,
   49.20 +                        hosts_allow = hosts_allow)
   49.21  
   49.22 -        hosts_allow = xoptions.get_xend_relocation_hosts_allow()
   49.23 -        if hosts_allow == '':
   49.24 -            hosts_allow = None
   49.25 -        else:
   49.26 -            hosts_allow = map(re.compile, hosts_allow.split(" "))
   49.27 -
   49.28 +    if xoptions.get_xend_relocation_ssl_server():
   49.29 +        port = xoptions.get_xend_relocation_ssl_port()
   49.30          ssl_key_file = xoptions.get_xend_relocation_server_ssl_key_file()
   49.31          ssl_cert_file = xoptions.get_xend_relocation_server_ssl_cert_file()
   49.32  
   49.33 @@ -161,5 +167,5 @@ def listenRelocation():
   49.34                                 ssl_key_file = ssl_key_file,
   49.35                                 ssl_cert_file = ssl_cert_file)
   49.36          else:
   49.37 -            tcp.TCPListener(RelocationProtocol, port, interface = interface,
   49.38 -                            hosts_allow = hosts_allow)
   49.39 +            raise XendError("ssl_key_file or ssl_cert_file for ssl relocation server is missing.")
   49.40 +
    50.1 --- a/tools/python/xen/xm/migrate.py	Mon Jun 02 11:35:02 2008 +0900
    50.2 +++ b/tools/python/xen/xm/migrate.py	Mon Jun 02 11:35:39 2008 +0900
    50.3 @@ -47,6 +47,10 @@ gopts.opt('node', short='n', val='nodenu
    50.4            fn=set_int, default=-1,
    50.5            use="Use specified NUMA node on target.")
    50.6  
    50.7 +gopts.opt('ssl', short='s',
    50.8 +          fn=set_true, default=None,
    50.9 +          use="Use ssl connection for migration.")
   50.10 +
   50.11  def help():
   50.12      return str(gopts)
   50.13      
   50.14 @@ -65,11 +69,13 @@ def main(argv):
   50.15          vm_ref = get_single_vm(dom)
   50.16          other_config = {
   50.17              "port":     opts.vals.port,
   50.18 -            "node":     opts.vals.node
   50.19 +            "node":     opts.vals.node,
   50.20 +            "ssl":      opts.vals.ssl
   50.21              }
   50.22          server.xenapi.VM.migrate(vm_ref, dst, bool(opts.vals.live),
   50.23                                   other_config)
   50.24      else:
   50.25          server.xend.domain.migrate(dom, dst, opts.vals.live,
   50.26                                     opts.vals.port,
   50.27 -                                   opts.vals.node)
   50.28 +                                   opts.vals.node,
   50.29 +                                   opts.vals.ssl)
    51.1 --- a/tools/xenstat/libxenstat/src/xenstat.c	Mon Jun 02 11:35:02 2008 +0900
    51.2 +++ b/tools/xenstat/libxenstat/src/xenstat.c	Mon Jun 02 11:35:39 2008 +0900
    51.3 @@ -655,12 +655,20 @@ unsigned long long xenstat_vbd_wr_reqs(x
    51.4  
    51.5  static char *xenstat_get_domain_name(xenstat_handle *handle, unsigned int domain_id)
    51.6  {
    51.7 -	char path[80];
    51.8 +	char path[80], *vmpath;
    51.9 +
   51.10 +	snprintf(path, sizeof(path),"/local/domain/%i/vm", domain_id);
   51.11 +
   51.12 +	vmpath = xs_read(handle->xshandle, XBT_NULL, path, NULL);
   51.13  
   51.14 -	snprintf(path, sizeof(path),"/local/domain/%i/name", domain_id);
   51.15 -	
   51.16 +	if (vmpath == NULL)
   51.17 +		return NULL;
   51.18 +
   51.19 +	snprintf(path, sizeof(path),"%s/name", vmpath);
   51.20 +	free(vmpath);
   51.21 +
   51.22  	return xs_read(handle->xshandle, XBT_NULL, path, NULL);
   51.23 -}	
   51.24 +}
   51.25  
   51.26  /* Remove specified entry from list of domains */
   51.27  static void xenstat_prune_domain(xenstat_node *node, unsigned int entry)
    52.1 --- a/unmodified_drivers/linux-2.6/platform-pci/evtchn.c	Mon Jun 02 11:35:02 2008 +0900
    52.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/evtchn.c	Mon Jun 02 11:35:39 2008 +0900
    52.3 @@ -284,7 +284,7 @@ static irqreturn_t evtchn_interrupt(int 
    52.4  
    52.5  #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
    52.6  	/* Clear master flag /before/ clearing selector flag. */
    52.7 -	rmb();
    52.8 +	wmb();
    52.9  #endif
   52.10  	l1 = xchg(&v->evtchn_pending_sel, 0);
   52.11  
    53.1 --- a/xen/arch/x86/acpi/cpu_idle.c	Mon Jun 02 11:35:02 2008 +0900
    53.2 +++ b/xen/arch/x86/acpi/cpu_idle.c	Mon Jun 02 11:35:39 2008 +0900
    53.3 @@ -173,6 +173,8 @@ static inline u32 ticks_elapsed(u32 t1, 
    53.4  {
    53.5      if ( t2 >= t1 )
    53.6          return (t2 - t1);
    53.7 +    else if ( !(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER) )
    53.8 +        return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
    53.9      else
   53.10          return ((0xFFFFFFFF - t1) + t2);
   53.11  }
    54.1 --- a/xen/arch/x86/acpi/power.c	Mon Jun 02 11:35:02 2008 +0900
    54.2 +++ b/xen/arch/x86/acpi/power.c	Mon Jun 02 11:35:39 2008 +0900
    54.3 @@ -238,9 +238,17 @@ static int acpi_get_wake_status(void)
    54.4  static void tboot_sleep(u8 sleep_state)
    54.5  {
    54.6     uint32_t shutdown_type;
    54.7 -   
    54.8 -   *((struct acpi_sleep_info *)(unsigned long)g_tboot_shared->acpi_sinfo) =
    54.9 -       acpi_sinfo;
   54.10 +
   54.11 +   g_tboot_shared->acpi_sinfo.pm1a_cnt =
   54.12 +                           (uint16_t)acpi_sinfo.pm1a_cnt_blk.address;
   54.13 +   g_tboot_shared->acpi_sinfo.pm1b_cnt =
   54.14 +                           (uint16_t)acpi_sinfo.pm1b_cnt_blk.address;
   54.15 +   g_tboot_shared->acpi_sinfo.pm1a_evt =
   54.16 +                           (uint16_t)acpi_sinfo.pm1a_evt_blk.address;
   54.17 +   g_tboot_shared->acpi_sinfo.pm1b_evt =
   54.18 +                           (uint16_t)acpi_sinfo.pm1b_evt_blk.address;
   54.19 +   g_tboot_shared->acpi_sinfo.pm1a_cnt_val = acpi_sinfo.pm1a_cnt_val;
   54.20 +   g_tboot_shared->acpi_sinfo.pm1b_cnt_val = acpi_sinfo.pm1b_cnt_val;
   54.21  
   54.22     switch ( sleep_state )
   54.23     {
    55.1 --- a/xen/arch/x86/cpu/amd.c	Mon Jun 02 11:35:02 2008 +0900
    55.2 +++ b/xen/arch/x86/cpu/amd.c	Mon Jun 02 11:35:39 2008 +0900
    55.3 @@ -74,9 +74,11 @@ static int c1_ramping_may_cause_clock_dr
    55.4  static void disable_c1_ramping(void) 
    55.5  {
    55.6  	u8 pmm7;
    55.7 -	int node;
    55.8 +	int node, nr_nodes;
    55.9  
   55.10 -	for (node=0; node < NR_CPUS; node++) {
   55.11 +	/* Read the number of nodes from the first Northbridge. */
   55.12 +	nr_nodes = ((pci_conf_read32(0, 0x18, 0x0, 0x60)>>4)&0x07)+1;
   55.13 +	for (node = 0; node < nr_nodes; node++) {
   55.14  		/* PMM7: bus=0, dev=0x18+node, function=0x3, register=0x87. */
   55.15  		pmm7 = pci_conf_read8(0, 0x18+node, 0x3, 0x87);
   55.16  		/* Invalid read means we've updated every Northbridge. */
    56.1 --- a/xen/arch/x86/crash.c	Mon Jun 02 11:35:02 2008 +0900
    56.2 +++ b/xen/arch/x86/crash.c	Mon Jun 02 11:35:39 2008 +0900
    56.3 @@ -102,6 +102,7 @@ void machine_crash_shutdown(void)
    56.4      hvm_cpu_down();
    56.5  
    56.6      info = kexec_crash_save_info();
    56.7 +    info->xen_phys_start = xen_phys_start;
    56.8      info->dom0_pfn_to_mfn_frame_list_list =
    56.9          arch_get_pfn_to_mfn_frame_list_list(dom0);
   56.10  }
    57.1 --- a/xen/arch/x86/domain.c	Mon Jun 02 11:35:02 2008 +0900
    57.2 +++ b/xen/arch/x86/domain.c	Mon Jun 02 11:35:39 2008 +0900
    57.3 @@ -59,8 +59,6 @@ DEFINE_PER_CPU(unsigned long, cr4);
    57.4  static void default_idle(void);
    57.5  void (*pm_idle) (void) = default_idle;
    57.6  
    57.7 -static void unmap_vcpu_info(struct vcpu *v);
    57.8 -
    57.9  static void paravirt_ctxt_switch_from(struct vcpu *v);
   57.10  static void paravirt_ctxt_switch_to(struct vcpu *v);
   57.11  
   57.12 @@ -433,8 +431,6 @@ void vcpu_destroy(struct vcpu *v)
   57.13      if ( is_pv_32on64_vcpu(v) )
   57.14          release_compat_l4(v);
   57.15  
   57.16 -    unmap_vcpu_info(v);
   57.17 -
   57.18      if ( is_hvm_vcpu(v) )
   57.19          hvm_vcpu_destroy(v);
   57.20  }
   57.21 @@ -825,8 +821,15 @@ int arch_set_info_guest(
   57.22  
   57.23  void arch_vcpu_reset(struct vcpu *v)
   57.24  {
   57.25 -    destroy_gdt(v);
   57.26 -    vcpu_destroy_pagetables(v);
   57.27 +    if ( !is_hvm_vcpu(v) )
   57.28 +    {
   57.29 +        destroy_gdt(v);
   57.30 +        vcpu_destroy_pagetables(v);
   57.31 +    }
   57.32 +    else
   57.33 +    {
   57.34 +        vcpu_end_shutdown_deferral(v);
   57.35 +    }
   57.36  }
   57.37  
   57.38  /* 
   57.39 @@ -1857,17 +1860,20 @@ int domain_relinquish_resources(struct d
   57.40          /* Tear down paging-assistance stuff. */
   57.41          paging_teardown(d);
   57.42  
   57.43 -        /* Drop the in-use references to page-table bases. */
   57.44          for_each_vcpu ( d, v )
   57.45 +        {
   57.46 +            /* Drop the in-use references to page-table bases. */
   57.47              vcpu_destroy_pagetables(v);
   57.48  
   57.49 -        /*
   57.50 -         * Relinquish GDT mappings. No need for explicit unmapping of the LDT
   57.51 -         * as it automatically gets squashed when the guest's mappings go away.
   57.52 -         */
   57.53 -        for_each_vcpu(d, v)
   57.54 +            /*
   57.55 +             * Relinquish GDT mappings. No need for explicit unmapping of the
   57.56 +             * LDT as it automatically gets squashed with the guest mappings.
   57.57 +             */
   57.58              destroy_gdt(v);
   57.59  
   57.60 +            unmap_vcpu_info(v);
   57.61 +        }
   57.62 +
   57.63          d->arch.relmem = RELMEM_xen_l4;
   57.64          /* fallthrough */
   57.65  
    58.1 --- a/xen/arch/x86/domctl.c	Mon Jun 02 11:35:02 2008 +0900
    58.2 +++ b/xen/arch/x86/domctl.c	Mon Jun 02 11:35:39 2008 +0900
    58.3 @@ -526,14 +526,54 @@ long arch_do_domctl(
    58.4      }
    58.5      break;
    58.6  
    58.7 +    case XEN_DOMCTL_get_device_group:
    58.8 +    {
    58.9 +        struct domain *d;
   58.10 +        u32 max_sdevs;
   58.11 +        u8 bus, devfn;
   58.12 +        XEN_GUEST_HANDLE_64(uint32) sdevs;
   58.13 +        int num_sdevs;
   58.14 +
   58.15 +        ret = -ENOSYS;
   58.16 +        if ( !iommu_enabled )
   58.17 +            break;
   58.18 +
   58.19 +        ret = -EINVAL;
   58.20 +        if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
   58.21 +            break;
   58.22 +
   58.23 +        bus = (domctl->u.get_device_group.machine_bdf >> 16) & 0xff;
   58.24 +        devfn = (domctl->u.get_device_group.machine_bdf >> 8) & 0xff;
   58.25 +        max_sdevs = domctl->u.get_device_group.max_sdevs;
   58.26 +        sdevs = domctl->u.get_device_group.sdev_array;
   58.27 +
   58.28 +        num_sdevs = iommu_get_device_group(d, bus, devfn, sdevs, max_sdevs);
   58.29 +        if ( num_sdevs < 0 )
   58.30 +        {
   58.31 +            dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
   58.32 +            ret = -EFAULT;
   58.33 +            domctl->u.get_device_group.num_sdevs = 0;
   58.34 +        }
   58.35 +        else
   58.36 +        {
   58.37 +            ret = 0;
   58.38 +            domctl->u.get_device_group.num_sdevs = num_sdevs;
   58.39 +        }
   58.40 +        if ( copy_to_guest(u_domctl, domctl, 1) )
   58.41 +            ret = -EFAULT;
   58.42 +        rcu_unlock_domain(d);
   58.43 +    }
   58.44 +    break;
   58.45 +
   58.46      case XEN_DOMCTL_test_assign_device:
   58.47      {
   58.48          u8 bus, devfn;
   58.49  
   58.50 -        ret = -EINVAL;
   58.51 +        ret = -ENOSYS;
   58.52          if ( !iommu_enabled )
   58.53              break;
   58.54  
   58.55 +        ret = -EINVAL;
   58.56          bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
   58.57          devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
   58.58  
   58.59 @@ -553,10 +593,11 @@ long arch_do_domctl(
   58.60          struct domain *d;
   58.61          u8 bus, devfn;
   58.62  
   58.63 -        ret = -EINVAL;
   58.64 +        ret = -ENOSYS;
   58.65          if ( !iommu_enabled )
   58.66              break;
   58.67  
   58.68 +        ret = -EINVAL;
   58.69          if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) )
   58.70          {
   58.71              gdprintk(XENLOG_ERR,
   58.72 @@ -566,6 +607,12 @@ long arch_do_domctl(
   58.73          bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
   58.74          devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
   58.75  
   58.76 +        if ( !iommu_pv_enabled && !is_hvm_domain(d) )
   58.77 +        {
   58.78 +            ret = -ENOSYS;
   58.79 +            break;
   58.80 +        }
   58.81 +
   58.82          if ( device_assigned(bus, devfn) )
   58.83          {
   58.84              gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: "
   58.85 @@ -576,7 +623,7 @@ long arch_do_domctl(
   58.86  
   58.87          ret = assign_device(d, bus, devfn);
   58.88          gdprintk(XENLOG_INFO, "XEN_DOMCTL_assign_device: bdf = %x:%x:%x\n",
   58.89 -            bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   58.90 +                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   58.91          put_domain(d);
   58.92      }
   58.93      break;
   58.94 @@ -586,10 +633,11 @@ long arch_do_domctl(
   58.95          struct domain *d;
   58.96          u8 bus, devfn;
   58.97  
   58.98 -        ret = -EINVAL;
   58.99 +        ret = -ENOSYS;
  58.100          if ( !iommu_enabled )
  58.101              break;
  58.102  
  58.103 +        ret = -EINVAL;
  58.104          if ( unlikely((d = get_domain_by_id(domctl->domain)) == NULL) )
  58.105          {
  58.106              gdprintk(XENLOG_ERR,
  58.107 @@ -599,9 +647,16 @@ long arch_do_domctl(
  58.108          bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
  58.109          devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
  58.110  
  58.111 +        if ( !iommu_pv_enabled && !is_hvm_domain(d) )
  58.112 +        {
  58.113 +            ret = -ENOSYS;
  58.114 +            break;
  58.115 +        }
  58.116 +
  58.117          if ( !device_assigned(bus, devfn) )
  58.118              break;
  58.119  
  58.120 +        ret = 0;
  58.121          deassign_device(d, bus, devfn);
  58.122          gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
  58.123              bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
    59.1 --- a/xen/arch/x86/hvm/hpet.c	Mon Jun 02 11:35:02 2008 +0900
    59.2 +++ b/xen/arch/x86/hvm/hpet.c	Mon Jun 02 11:35:39 2008 +0900
    59.3 @@ -29,9 +29,9 @@
    59.4  #define S_TO_NS  1000000000ULL           /* 1s  = 10^9  ns */
    59.5  #define S_TO_FS  1000000000000000ULL     /* 1s  = 10^15 fs */
    59.6  
    59.7 -/* Frequency_of_TSC / frequency_of_HPET = 32 */
    59.8 -#define TSC_PER_HPET_TICK 32
    59.9 -#define guest_time_hpet(v) (hvm_get_guest_time(v) / TSC_PER_HPET_TICK)
   59.10 +/* Frequency_of_Xen_systeme_time / frequency_of_HPET = 16 */
   59.11 +#define STIME_PER_HPET_TICK 16
   59.12 +#define guest_time_hpet(v) (hvm_get_guest_time(v) / STIME_PER_HPET_TICK)
   59.13  
   59.14  #define HPET_ID         0x000
   59.15  #define HPET_PERIOD     0x004
   59.16 @@ -192,7 +192,7 @@ static void hpet_stop_timer(HPETState *h
   59.17  
   59.18  /* the number of HPET tick that stands for
   59.19   * 1/(2^10) second, namely, 0.9765625 milliseconds */
   59.20 -#define  HPET_TINY_TIME_SPAN  ((h->tsc_freq >> 10) / TSC_PER_HPET_TICK)
   59.21 +#define  HPET_TINY_TIME_SPAN  ((h->stime_freq >> 10) / STIME_PER_HPET_TICK)
   59.22  
   59.23  static void hpet_set_timer(HPETState *h, unsigned int tn)
   59.24  {
   59.25 @@ -558,17 +558,17 @@ void hpet_init(struct vcpu *v)
   59.26      spin_lock_init(&h->lock);
   59.27  
   59.28      h->vcpu = v;
   59.29 -    h->tsc_freq = ticks_per_sec(v);
   59.30 +    h->stime_freq = S_TO_NS;
   59.31  
   59.32 -    h->hpet_to_ns_scale = ((S_TO_NS * TSC_PER_HPET_TICK) << 10) / h->tsc_freq;
   59.33 +    h->hpet_to_ns_scale = ((S_TO_NS * STIME_PER_HPET_TICK) << 10) / h->stime_freq;
   59.34      h->hpet_to_ns_limit = ~0ULL / h->hpet_to_ns_scale;
   59.35  
   59.36      /* 64-bit main counter; 3 timers supported; LegacyReplacementRoute. */
   59.37      h->hpet.capability = 0x8086A201ULL;
   59.38  
   59.39      /* This is the number of femptoseconds per HPET tick. */
   59.40 -    /* Here we define HPET's frequency to be 1/32 of the TSC's */
   59.41 -    h->hpet.capability |= ((S_TO_FS*TSC_PER_HPET_TICK/h->tsc_freq) << 32);
   59.42 +    /* Here we define HPET's frequency to be 1/16 of Xen system time */
   59.43 +    h->hpet.capability |= ((S_TO_FS*STIME_PER_HPET_TICK/h->stime_freq) << 32);
   59.44  
   59.45      for ( i = 0; i < HPET_TIMER_NUM; i++ )
   59.46      {
    60.1 --- a/xen/arch/x86/hvm/hvm.c	Mon Jun 02 11:35:02 2008 +0900
    60.2 +++ b/xen/arch/x86/hvm/hvm.c	Mon Jun 02 11:35:39 2008 +0900
    60.3 @@ -296,6 +296,8 @@ int hvm_domain_initialise(struct domain 
    60.4      spin_lock_init(&d->arch.hvm_domain.irq_lock);
    60.5      spin_lock_init(&d->arch.hvm_domain.uc_lock);
    60.6  
    60.7 +    hvm_init_guest_time(d);
    60.8 +
    60.9      d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;
   60.10  
   60.11      hvm_init_cacheattr_region_list(d);
   60.12 @@ -661,7 +663,7 @@ int hvm_vcpu_initialise(struct vcpu *v)
   60.13          hpet_init(v);
   60.14   
   60.15          /* Init guest TSC to start from zero. */
   60.16 -        hvm_set_guest_time(v, 0);
   60.17 +        hvm_set_guest_tsc(v, 0);
   60.18  
   60.19          /* Can start up without SIPI-SIPI or setvcpucontext domctl. */
   60.20          v->is_initialised = 1;
   60.21 @@ -1098,16 +1100,17 @@ int hvm_virtual_to_linear_addr(
   60.22      return 0;
   60.23  }
   60.24  
   60.25 -static void *hvm_map(unsigned long va, int size)
   60.26 +static void *hvm_map_entry(unsigned long va)
   60.27  {
   60.28      unsigned long gfn, mfn;
   60.29      p2m_type_t p2mt;
   60.30      uint32_t pfec;
   60.31  
   60.32 -    if ( ((va & ~PAGE_MASK) + size) > PAGE_SIZE )
   60.33 +    if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE )
   60.34      {
   60.35 -        hvm_inject_exception(TRAP_page_fault, PFEC_write_access,
   60.36 -                             (va + PAGE_SIZE - 1) & PAGE_MASK);
   60.37 +        gdprintk(XENLOG_ERR, "Descriptor table entry "
   60.38 +                 "straddles page boundary\n");
   60.39 +        domain_crash(current->domain);
   60.40          return NULL;
   60.41      }
   60.42  
   60.43 @@ -1119,7 +1122,8 @@ static void *hvm_map(unsigned long va, i
   60.44      mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
   60.45      if ( !p2m_is_ram(p2mt) )
   60.46      {
   60.47 -        hvm_inject_exception(TRAP_page_fault, pfec, va);
   60.48 +        gdprintk(XENLOG_ERR, "Failed to look up descriptor table entry\n");
   60.49 +        domain_crash(current->domain);
   60.50          return NULL;
   60.51      }
   60.52  
   60.53 @@ -1130,7 +1134,7 @@ static void *hvm_map(unsigned long va, i
   60.54      return (char *)map_domain_page(mfn) + (va & ~PAGE_MASK);
   60.55  }
   60.56  
   60.57 -static void hvm_unmap(void *p)
   60.58 +static void hvm_unmap_entry(void *p)
   60.59  {
   60.60      if ( p )
   60.61          unmap_domain_page(p);
   60.62 @@ -1166,7 +1170,7 @@ static int hvm_load_segment_selector(
   60.63      if ( ((sel & 0xfff8) + 7) > desctab.limit )
   60.64          goto fail;
   60.65  
   60.66 -    pdesc = hvm_map(desctab.base + (sel & 0xfff8), 8);
   60.67 +    pdesc = hvm_map_entry(desctab.base + (sel & 0xfff8));
   60.68      if ( pdesc == NULL )
   60.69          goto hvm_map_fail;
   60.70  
   60.71 @@ -1226,7 +1230,7 @@ static int hvm_load_segment_selector(
   60.72      desc.b |= 0x100;
   60.73  
   60.74   skip_accessed_flag:
   60.75 -    hvm_unmap(pdesc);
   60.76 +    hvm_unmap_entry(pdesc);
   60.77  
   60.78      segr.base = (((desc.b <<  0) & 0xff000000u) |
   60.79                   ((desc.b << 16) & 0x00ff0000u) |
   60.80 @@ -1242,7 +1246,7 @@ static int hvm_load_segment_selector(
   60.81      return 0;
   60.82  
   60.83   unmap_and_fail:
   60.84 -    hvm_unmap(pdesc);
   60.85 +    hvm_unmap_entry(pdesc);
   60.86   fail:
   60.87      hvm_inject_exception(fault_type, sel & 0xfffc, 0);
   60.88   hvm_map_fail:
   60.89 @@ -1258,7 +1262,7 @@ void hvm_task_switch(
   60.90      struct segment_register gdt, tr, prev_tr, segr;
   60.91      struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
   60.92      unsigned long eflags;
   60.93 -    int exn_raised;
   60.94 +    int exn_raised, rc;
   60.95      struct {
   60.96          u16 back_link,__blh;
   60.97          u32 esp0;
   60.98 @@ -1270,7 +1274,7 @@ void hvm_task_switch(
   60.99          u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
  60.100          u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9;
  60.101          u16 trace, iomap;
  60.102 -    } *ptss, tss;
  60.103 +    } tss = { 0 };
  60.104  
  60.105      hvm_get_segment_register(v, x86_seg_gdtr, &gdt);
  60.106      hvm_get_segment_register(v, x86_seg_tr, &prev_tr);
  60.107 @@ -1283,11 +1287,11 @@ void hvm_task_switch(
  60.108          goto out;
  60.109      }
  60.110  
  60.111 -    optss_desc = hvm_map(gdt.base + (prev_tr.sel & 0xfff8), 8);
  60.112 +    optss_desc = hvm_map_entry(gdt.base + (prev_tr.sel & 0xfff8));
  60.113      if ( optss_desc == NULL )
  60.114          goto out;
  60.115  
  60.116 -    nptss_desc = hvm_map(gdt.base + (tss_sel & 0xfff8), 8);
  60.117 +    nptss_desc = hvm_map_entry(gdt.base + (tss_sel & 0xfff8));
  60.118      if ( nptss_desc == NULL )
  60.119          goto out;
  60.120  
  60.121 @@ -1322,85 +1326,90 @@ void hvm_task_switch(
  60.122          goto out;
  60.123      }
  60.124  
  60.125 -    ptss = hvm_map(prev_tr.base, sizeof(tss));
  60.126 -    if ( ptss == NULL )
  60.127 +    rc = hvm_copy_from_guest_virt(
  60.128 +        &tss, prev_tr.base, sizeof(tss), PFEC_page_present);
  60.129 +    if ( rc == HVMCOPY_bad_gva_to_gfn )
  60.130          goto out;
  60.131  
  60.132      eflags = regs->eflags;
  60.133      if ( taskswitch_reason == TSW_iret )
  60.134          eflags &= ~X86_EFLAGS_NT;
  60.135  
  60.136 -    ptss->cr3    = v->arch.hvm_vcpu.guest_cr[3];
  60.137 -    ptss->eip    = regs->eip;
  60.138 -    ptss->eflags = eflags;
  60.139 -    ptss->eax    = regs->eax;
  60.140 -    ptss->ecx    = regs->ecx;
  60.141 -    ptss->edx    = regs->edx;
  60.142 -    ptss->ebx    = regs->ebx;
  60.143 -    ptss->esp    = regs->esp;
  60.144 -    ptss->ebp    = regs->ebp;
  60.145 -    ptss->esi    = regs->esi;
  60.146 -    ptss->edi    = regs->edi;
  60.147 +    tss.cr3    = v->arch.hvm_vcpu.guest_cr[3];
  60.148 +    tss.eip    = regs->eip;
  60.149 +    tss.eflags = eflags;
  60.150 +    tss.eax    = regs->eax;
  60.151 +    tss.ecx    = regs->ecx;
  60.152 +    tss.edx    = regs->edx;
  60.153 +    tss.ebx    = regs->ebx;
  60.154 +    tss.esp    = regs->esp;
  60.155 +    tss.ebp    = regs->ebp;
  60.156 +    tss.esi    = regs->esi;
  60.157 +    tss.edi    = regs->edi;
  60.158  
  60.159      hvm_get_segment_register(v, x86_seg_es, &segr);
  60.160 -    ptss->es = segr.sel;
  60.161 +    tss.es = segr.sel;
  60.162      hvm_get_segment_register(v, x86_seg_cs, &segr);
  60.163 -    ptss->cs = segr.sel;
  60.164 +    tss.cs = segr.sel;
  60.165      hvm_get_segment_register(v, x86_seg_ss, &segr);
  60.166 -    ptss->ss = segr.sel;
  60.167 +    tss.ss = segr.sel;
  60.168      hvm_get_segment_register(v, x86_seg_ds, &segr);
  60.169 -    ptss->ds = segr.sel;
  60.170 +    tss.ds = segr.sel;
  60.171      hvm_get_segment_register(v, x86_seg_fs, &segr);
  60.172 -    ptss->fs = segr.sel;
  60.173 +    tss.fs = segr.sel;
  60.174      hvm_get_segment_register(v, x86_seg_gs, &segr);
  60.175 -    ptss->gs = segr.sel;
  60.176 +    tss.gs = segr.sel;
  60.177      hvm_get_segment_register(v, x86_seg_ldtr, &segr);
  60.178 -    ptss->ldt = segr.sel;
  60.179 -
  60.180 -    hvm_unmap(ptss);
  60.181 -
  60.182 -    ptss = hvm_map(tr.base, sizeof(tss));
  60.183 -    if ( ptss == NULL )
  60.184 +    tss.ldt = segr.sel;
  60.185 +
  60.186 +    rc = hvm_copy_to_guest_virt(
  60.187 +        prev_tr.base, &tss, sizeof(tss), PFEC_page_present);
  60.188 +    if ( rc == HVMCOPY_bad_gva_to_gfn )
  60.189          goto out;
  60.190  
  60.191 -    if ( hvm_set_cr3(ptss->cr3) )
  60.192 -    {
  60.193 -        hvm_unmap(ptss);
  60.194 +    rc = hvm_copy_from_guest_virt(
  60.195 +        &tss, tr.base, sizeof(tss), PFEC_page_present);
  60.196 +    if ( rc == HVMCOPY_bad_gva_to_gfn )
  60.197          goto out;
  60.198 -    }
  60.199 -
  60.200 -    regs->eip    = ptss->eip;
  60.201 -    regs->eflags = ptss->eflags | 2;
  60.202 -    regs->eax    = ptss->eax;
  60.203 -    regs->ecx    = ptss->ecx;
  60.204 -    regs->edx    = ptss->edx;
  60.205 -    regs->ebx    = ptss->ebx;
  60.206 -    regs->esp    = ptss->esp;
  60.207 -    regs->ebp    = ptss->ebp;
  60.208 -    regs->esi    = ptss->esi;
  60.209 -    regs->edi    = ptss->edi;
  60.210 +
  60.211 +    if ( hvm_set_cr3(tss.cr3) )
  60.212 +        goto out;
  60.213 +
  60.214 +    regs->eip    = tss.eip;
  60.215 +    regs->eflags = tss.eflags | 2;
  60.216 +    regs->eax    = tss.eax;
  60.217 +    regs->ecx    = tss.ecx;
  60.218 +    regs->edx    = tss.edx;
  60.219 +    regs->ebx    = tss.ebx;
  60.220 +    regs->esp    = tss.esp;
  60.221 +    regs->ebp    = tss.ebp;
  60.222 +    regs->esi    = tss.esi;
  60.223 +    regs->edi    = tss.edi;
  60.224  
  60.225      if ( (taskswitch_reason == TSW_call_or_int) )
  60.226      {
  60.227          regs->eflags |= X86_EFLAGS_NT;
  60.228 -        ptss->back_link = prev_tr.sel;
  60.229 +        tss.back_link = prev_tr.sel;
  60.230      }
  60.231  
  60.232      exn_raised = 0;
  60.233 -    if ( hvm_load_segment_selector(v, x86_seg_es, ptss->es) ||
  60.234 -         hvm_load_segment_selector(v, x86_seg_cs, ptss->cs) ||
  60.235 -         hvm_load_segment_selector(v, x86_seg_ss, ptss->ss) ||
  60.236 -         hvm_load_segment_selector(v, x86_seg_ds, ptss->ds) ||
  60.237 -         hvm_load_segment_selector(v, x86_seg_fs, ptss->fs) ||
  60.238 -         hvm_load_segment_selector(v, x86_seg_gs, ptss->gs) ||
  60.239 -         hvm_load_segment_selector(v, x86_seg_ldtr, ptss->ldt) )
  60.240 +    if ( hvm_load_segment_selector(v, x86_seg_es, tss.es) ||
  60.241 +         hvm_load_segment_selector(v, x86_seg_cs, tss.cs) ||
  60.242 +         hvm_load_segment_selector(v, x86_seg_ss, tss.ss) ||
  60.243 +         hvm_load_segment_selector(v, x86_seg_ds, tss.ds) ||
  60.244 +         hvm_load_segment_selector(v, x86_seg_fs, tss.fs) ||
  60.245 +         hvm_load_segment_selector(v, x86_seg_gs, tss.gs) ||
  60.246 +         hvm_load_segment_selector(v, x86_seg_ldtr, tss.ldt) )
  60.247          exn_raised = 1;
  60.248  
  60.249 -    if ( (ptss->trace & 1) && !exn_raised )
  60.250 +    rc = hvm_copy_to_guest_virt(
  60.251 +        tr.base, &tss, sizeof(tss), PFEC_page_present);
  60.252 +    if ( rc == HVMCOPY_bad_gva_to_gfn )
  60.253 +        exn_raised = 1;
  60.254 +
  60.255 +    if ( (tss.trace & 1) && !exn_raised )
  60.256          hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0);
  60.257  
  60.258 -    hvm_unmap(ptss);
  60.259 -
  60.260      tr.attr.fields.type = 0xb; /* busy 32-bit tss */
  60.261      hvm_set_segment_register(v, x86_seg_tr, &tr);
  60.262  
  60.263 @@ -1428,8 +1437,8 @@ void hvm_task_switch(
  60.264      }
  60.265  
  60.266   out:
  60.267 -    hvm_unmap(optss_desc);
  60.268 -    hvm_unmap(nptss_desc);
  60.269 +    hvm_unmap_entry(optss_desc);
  60.270 +    hvm_unmap_entry(nptss_desc);
  60.271  }
  60.272  
  60.273  #define HVMCOPY_from_guest (0u<<0)
  60.274 @@ -1632,7 +1641,7 @@ int hvm_msr_read_intercept(struct cpu_us
  60.275      switch ( ecx )
  60.276      {
  60.277      case MSR_IA32_TSC:
  60.278 -        msr_content = hvm_get_guest_time(v);
  60.279 +        msr_content = hvm_get_guest_tsc(v);
  60.280          break;
  60.281  
  60.282      case MSR_IA32_APICBASE:
  60.283 @@ -1725,7 +1734,7 @@ int hvm_msr_write_intercept(struct cpu_u
  60.284      switch ( ecx )
  60.285      {
  60.286       case MSR_IA32_TSC:
  60.287 -        hvm_set_guest_time(v, msr_content);
  60.288 +        hvm_set_guest_tsc(v, msr_content);
  60.289          pt_reset(v);
  60.290          break;
  60.291  
  60.292 @@ -2071,6 +2080,13 @@ void hvm_vcpu_reset_state(struct vcpu *v
  60.293      if ( v->is_initialised )
  60.294          goto out;
  60.295  
  60.296 +    if ( !paging_mode_hap(d) )
  60.297 +    {
  60.298 +        if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
  60.299 +            put_page(pagetable_get_page(v->arch.guest_table));
  60.300 +        v->arch.guest_table = pagetable_null();
  60.301 +    }
  60.302 +
  60.303      ctxt = &v->arch.guest_context;
  60.304      memset(ctxt, 0, sizeof(*ctxt));
  60.305      ctxt->flags = VGCF_online;
  60.306 @@ -2123,6 +2139,8 @@ void hvm_vcpu_reset_state(struct vcpu *v
  60.307          v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
  60.308      hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset);
  60.309  
  60.310 +    paging_update_paging_modes(v);
  60.311 +
  60.312      v->arch.flags |= TF_kernel_mode;
  60.313      v->is_initialised = 1;
  60.314      clear_bit(_VPF_down, &v->pause_flags);
    61.1 --- a/xen/arch/x86/hvm/i8254.c	Mon Jun 02 11:35:02 2008 +0900
    61.2 +++ b/xen/arch/x86/hvm/i8254.c	Mon Jun 02 11:35:39 2008 +0900
    61.3 @@ -31,6 +31,7 @@
    61.4  #include <xen/lib.h>
    61.5  #include <xen/errno.h>
    61.6  #include <xen/sched.h>
    61.7 +#include <asm/time.h>
    61.8  #include <asm/hvm/hvm.h>
    61.9  #include <asm/hvm/io.h>
   61.10  #include <asm/hvm/support.h>
   61.11 @@ -53,6 +54,9 @@ static int handle_pit_io(
   61.12  static int handle_speaker_io(
   61.13      int dir, uint32_t port, uint32_t bytes, uint32_t *val);
   61.14  
   61.15 +#define get_guest_time(v) \
   61.16 +   (is_hvm_vcpu(v) ? hvm_get_guest_time(v) : (u64)get_s_time())
   61.17 +
   61.18  /* Compute with 96 bit intermediate result: (a*b)/c */
   61.19  static uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
   61.20  {
   61.21 @@ -86,8 +90,8 @@ static int pit_get_count(PITState *pit, 
   61.22  
   61.23      ASSERT(spin_is_locked(&pit->lock));
   61.24  
   61.25 -    d = muldiv64(hvm_get_guest_time(v) - pit->count_load_time[channel],
   61.26 -                 PIT_FREQ, ticks_per_sec(v));
   61.27 +    d = muldiv64(get_guest_time(v) - pit->count_load_time[channel],
   61.28 +                 PIT_FREQ, SYSTEM_TIME_HZ);
   61.29  
   61.30      switch ( c->mode )
   61.31      {
   61.32 @@ -117,8 +121,8 @@ static int pit_get_out(PITState *pit, in
   61.33  
   61.34      ASSERT(spin_is_locked(&pit->lock));
   61.35  
   61.36 -    d = muldiv64(hvm_get_guest_time(v) - pit->count_load_time[channel], 
   61.37 -                 PIT_FREQ, ticks_per_sec(v));
   61.38 +    d = muldiv64(get_guest_time(v) - pit->count_load_time[channel], 
   61.39 +                 PIT_FREQ, SYSTEM_TIME_HZ);
   61.40  
   61.41      switch ( s->mode )
   61.42      {
   61.43 @@ -164,7 +168,7 @@ static void pit_set_gate(PITState *pit, 
   61.44      case 3:
   61.45          /* Restart counting on rising edge. */
   61.46          if ( s->gate < val )
   61.47 -            pit->count_load_time[channel] = hvm_get_guest_time(v);
   61.48 +            pit->count_load_time[channel] = get_guest_time(v);
   61.49          break;
   61.50      }
   61.51  
   61.52 @@ -180,7 +184,7 @@ int pit_get_gate(PITState *pit, int chan
   61.53  static void pit_time_fired(struct vcpu *v, void *priv)
   61.54  {
   61.55      uint64_t *count_load_time = priv;
   61.56 -    *count_load_time = hvm_get_guest_time(v);
   61.57 +    *count_load_time = get_guest_time(v);
   61.58  }
   61.59  
   61.60  static void pit_load_count(PITState *pit, int channel, int val)
   61.61 @@ -195,11 +199,11 @@ static void pit_load_count(PITState *pit
   61.62          val = 0x10000;
   61.63  
   61.64      if ( v == NULL )
   61.65 -        rdtscll(pit->count_load_time[channel]);
   61.66 +        pit->count_load_time[channel] = 0;
   61.67      else
   61.68 -        pit->count_load_time[channel] = hvm_get_guest_time(v);
   61.69 +        pit->count_load_time[channel] = get_guest_time(v);
   61.70      s->count = val;
   61.71 -    period = DIV_ROUND((val * 1000000000ULL), PIT_FREQ);
   61.72 +    period = DIV_ROUND(val * SYSTEM_TIME_HZ, PIT_FREQ);
   61.73  
   61.74      if ( (v == NULL) || !is_hvm_vcpu(v) || (channel != 0) )
   61.75          return;
   61.76 @@ -435,7 +439,7 @@ static int pit_load(struct domain *d, hv
   61.77       * time jitter here, but the wall-clock will have jumped massively, so 
   61.78       * we hope the guest can handle it.
   61.79       */
   61.80 -    pit->pt0.last_plt_gtime = hvm_get_guest_time(d->vcpu[0]);
   61.81 +    pit->pt0.last_plt_gtime = get_guest_time(d->vcpu[0]);
   61.82      for ( i = 0; i < 3; i++ )
   61.83          pit_load_count(pit, i, pit->hw.channels[i].count);
   61.84  
    62.1 --- a/xen/arch/x86/hvm/pmtimer.c	Mon Jun 02 11:35:02 2008 +0900
    62.2 +++ b/xen/arch/x86/hvm/pmtimer.c	Mon Jun 02 11:35:39 2008 +0900
    62.3 @@ -257,7 +257,7 @@ void pmtimer_init(struct vcpu *v)
    62.4  
    62.5      spin_lock_init(&s->lock);
    62.6  
    62.7 -    s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / ticks_per_sec(v);
    62.8 +    s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / SYSTEM_TIME_HZ;
    62.9      s->vcpu = v;
   62.10  
   62.11      /* Intercept port I/O (need two handlers because PM1a_CNT is between
    63.1 --- a/xen/arch/x86/hvm/svm/svm.c	Mon Jun 02 11:35:02 2008 +0900
    63.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Mon Jun 02 11:35:39 2008 +0900
    63.3 @@ -299,7 +299,7 @@ static void svm_save_cpu_state(struct vc
    63.4      data->msr_efer         = v->arch.hvm_vcpu.guest_efer;
    63.5      data->msr_flags        = -1ULL;
    63.6  
    63.7 -    data->tsc = hvm_get_guest_time(v);
    63.8 +    data->tsc = hvm_get_guest_tsc(v);
    63.9  }
   63.10  
   63.11  
   63.12 @@ -315,7 +315,7 @@ static void svm_load_cpu_state(struct vc
   63.13      v->arch.hvm_vcpu.guest_efer = data->msr_efer;
   63.14      svm_update_guest_efer(v);
   63.15  
   63.16 -    hvm_set_guest_time(v, data->tsc);
   63.17 +    hvm_set_guest_tsc(v, data->tsc);
   63.18  }
   63.19  
   63.20  static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
    64.1 --- a/xen/arch/x86/hvm/vlapic.c	Mon Jun 02 11:35:02 2008 +0900
    64.2 +++ b/xen/arch/x86/hvm/vlapic.c	Mon Jun 02 11:35:39 2008 +0900
    64.3 @@ -22,18 +22,19 @@
    64.4  #include <xen/types.h>
    64.5  #include <xen/mm.h>
    64.6  #include <xen/xmalloc.h>
    64.7 +#include <xen/domain.h>
    64.8  #include <xen/domain_page.h>
    64.9 -#include <asm/page.h>
   64.10  #include <xen/event.h>
   64.11  #include <xen/trace.h>
   64.12 +#include <xen/lib.h>
   64.13 +#include <xen/sched.h>
   64.14 +#include <xen/numa.h>
   64.15 +#include <asm/current.h>
   64.16 +#include <asm/page.h>
   64.17  #include <asm/hvm/hvm.h>
   64.18  #include <asm/hvm/io.h>
   64.19  #include <asm/hvm/support.h>
   64.20 -#include <xen/lib.h>
   64.21 -#include <xen/sched.h>
   64.22 -#include <asm/current.h>
   64.23  #include <asm/hvm/vmx/vmx.h>
   64.24 -#include <xen/numa.h>
   64.25  #include <public/hvm/ioreq.h>
   64.26  #include <public/hvm/params.h>
   64.27  
   64.28 @@ -259,6 +260,7 @@ static void vlapic_init_action(unsigned 
   64.29  {
   64.30      struct vcpu *v = (struct vcpu *)_vcpu;
   64.31      struct domain *d = v->domain;
   64.32 +    bool_t fpu_initialised;
   64.33  
   64.34      /* If the VCPU is not on its way down we have nothing to do. */
   64.35      if ( !test_bit(_VPF_down, &v->pause_flags) )
   64.36 @@ -270,15 +272,12 @@ static void vlapic_init_action(unsigned 
   64.37          return;
   64.38      }
   64.39  
   64.40 +    /* Reset necessary VCPU state. This does not include FPU state. */
   64.41      domain_lock(d);
   64.42 -
   64.43 -    /* Paranoia makes us re-assert VPF_down under the domain lock. */
   64.44 -    set_bit(_VPF_down, &v->pause_flags);
   64.45 -    v->is_initialised = 0;
   64.46 -    clear_bit(_VPF_blocked, &v->pause_flags);
   64.47 -
   64.48 +    fpu_initialised = v->fpu_initialised;
   64.49 +    vcpu_reset(v);
   64.50 +    v->fpu_initialised = fpu_initialised;
   64.51      vlapic_reset(vcpu_vlapic(v));
   64.52 -
   64.53      domain_unlock(d);
   64.54  
   64.55      vcpu_unpause(v);
   64.56 @@ -474,7 +473,6 @@ static uint32_t vlapic_get_tmcct(struct 
   64.57      uint64_t counter_passed;
   64.58  
   64.59      counter_passed = ((hvm_get_guest_time(v) - vlapic->timer_last_update)
   64.60 -                      * 1000000000ULL / ticks_per_sec(v)
   64.61                        / APIC_BUS_CYCLE_NS / vlapic->hw.timer_divisor);
   64.62      tmcct = tmict - counter_passed;
   64.63  
    65.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Mon Jun 02 11:35:02 2008 +0900
    65.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Mon Jun 02 11:35:39 2008 +0900
    65.3 @@ -607,7 +607,7 @@ static void vmx_save_cpu_state(struct vc
    65.4      data->msr_syscall_mask = guest_state->msrs[VMX_INDEX_MSR_SYSCALL_MASK];
    65.5  #endif
    65.6  
    65.7 -    data->tsc = hvm_get_guest_time(v);
    65.8 +    data->tsc = hvm_get_guest_tsc(v);
    65.9  }
   65.10  
   65.11  static void vmx_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
   65.12 @@ -625,7 +625,7 @@ static void vmx_load_cpu_state(struct vc
   65.13      v->arch.hvm_vmx.shadow_gs = data->shadow_gs;
   65.14  #endif
   65.15  
   65.16 -    hvm_set_guest_time(v, data->tsc);
   65.17 +    hvm_set_guest_tsc(v, data->tsc);
   65.18  }
   65.19  
   65.20  
    66.1 --- a/xen/arch/x86/hvm/vpt.c	Mon Jun 02 11:35:02 2008 +0900
    66.2 +++ b/xen/arch/x86/hvm/vpt.c	Mon Jun 02 11:35:39 2008 +0900
    66.3 @@ -25,6 +25,39 @@
    66.4  #define mode_is(d, name) \
    66.5      ((d)->arch.hvm_domain.params[HVM_PARAM_TIMER_MODE] == HVMPTM_##name)
    66.6  
    66.7 +void hvm_init_guest_time(struct domain *d)
    66.8 +{
    66.9 +    struct pl_time *pl = &d->arch.hvm_domain.pl_time;
   66.10 +
   66.11 +    spin_lock_init(&pl->pl_time_lock);
   66.12 +    pl->stime_offset = -(u64)get_s_time();
   66.13 +    pl->last_guest_time = 0;
   66.14 +}
   66.15 +
   66.16 +u64 hvm_get_guest_time(struct vcpu *v)
   66.17 +{
   66.18 +    struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time;
   66.19 +    u64 now;
   66.20 +
   66.21 +    /* Called from device models shared with PV guests. Be careful. */
   66.22 +    ASSERT(is_hvm_vcpu(v));
   66.23 +
   66.24 +    spin_lock(&pl->pl_time_lock);
   66.25 +    now = get_s_time() + pl->stime_offset;
   66.26 +    if ( (int64_t)(now - pl->last_guest_time) >= 0 )
   66.27 +        pl->last_guest_time = now;
   66.28 +    else
   66.29 +        now = pl->last_guest_time;
   66.30 +    spin_unlock(&pl->pl_time_lock);
   66.31 +
   66.32 +    return now + v->arch.hvm_vcpu.stime_offset;
   66.33 +}
   66.34 +
   66.35 +void hvm_set_guest_time(struct vcpu *v, u64 guest_time)
   66.36 +{
   66.37 +    v->arch.hvm_vcpu.stime_offset += guest_time - hvm_get_guest_time(v);
   66.38 +}
   66.39 +
   66.40  static int pt_irq_vector(struct periodic_time *pt, enum hvm_intsrc src)
   66.41  {
   66.42      struct vcpu *v = pt->vcpu;
   66.43 @@ -348,7 +381,7 @@ void create_periodic_time(
   66.44      pt->vcpu = v;
   66.45      pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
   66.46      pt->irq = irq;
   66.47 -    pt->period_cycles = (u64)period * cpu_khz / 1000000L;
   66.48 +    pt->period_cycles = (u64)period;
   66.49      pt->one_shot = one_shot;
   66.50      pt->scheduled = NOW() + period;
   66.51      /*
    67.1 --- a/xen/arch/x86/mm.c	Mon Jun 02 11:35:02 2008 +0900
    67.2 +++ b/xen/arch/x86/mm.c	Mon Jun 02 11:35:39 2008 +0900
    67.3 @@ -1939,6 +1939,20 @@ int get_page_type(struct page_info *page
    67.4      }
    67.5      while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
    67.6  
    67.7 +    if ( unlikely((x & PGT_type_mask) != type) )
    67.8 +    {
    67.9 +        /* Special pages should not be accessible from devices. */
   67.10 +        struct domain *d = page_get_owner(page);
   67.11 +        if ( d && unlikely(need_iommu(d)) )
   67.12 +        {
   67.13 +            if ( (x & PGT_type_mask) == PGT_writable_page )
   67.14 +                iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
   67.15 +            else if ( type == PGT_writable_page )
   67.16 +                iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
   67.17 +                               page_to_mfn(page));
   67.18 +        }
   67.19 +    }
   67.20 +
   67.21      if ( unlikely(!(nx & PGT_validated)) )
   67.22      {
   67.23          /* Try to validate page type; drop the new reference on failure. */
    68.1 --- a/xen/arch/x86/mm/hap/p2m-ept.c	Mon Jun 02 11:35:02 2008 +0900
    68.2 +++ b/xen/arch/x86/mm/hap/p2m-ept.c	Mon Jun 02 11:35:39 2008 +0900
    68.3 @@ -267,12 +267,6 @@ out:
    68.4          }
    68.5      }
    68.6  
    68.7 -#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE
    68.8 -    /* If p2m table is shared with vtd page-table. */
    68.9 -    if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
   68.10 -        iommu_flush(d, gfn, (u64*)ept_entry);
   68.11 -#endif
   68.12 -
   68.13      return rv;
   68.14  }
   68.15  
    69.1 --- a/xen/arch/x86/mm/p2m.c	Mon Jun 02 11:35:02 2008 +0900
    69.2 +++ b/xen/arch/x86/mm/p2m.c	Mon Jun 02 11:35:39 2008 +0900
    69.3 @@ -325,7 +325,7 @@ p2m_set_entry(struct domain *d, unsigned
    69.4      if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
    69.5          d->arch.p2m->max_mapped_pfn = gfn;
    69.6  
    69.7 -    if ( iommu_enabled && is_hvm_domain(d) )
    69.8 +    if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) )
    69.9      {
   69.10          if ( p2mt == p2m_ram_rw )
   69.11              for ( i = 0; i < (1UL << page_order); i++ )
   69.12 @@ -868,7 +868,12 @@ p2m_remove_page(struct domain *d, unsign
   69.13      unsigned long i;
   69.14  
   69.15      if ( !paging_mode_translate(d) )
   69.16 +    {
   69.17 +        if ( need_iommu(d) )
   69.18 +            for ( i = 0; i < (1 << page_order); i++ )
   69.19 +                iommu_unmap_page(d, mfn + i);
   69.20          return;
   69.21 +    }
   69.22  
   69.23      P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
   69.24  
   69.25 @@ -899,7 +904,19 @@ guest_physmap_add_entry(struct domain *d
   69.26      int rc = 0;
   69.27  
   69.28      if ( !paging_mode_translate(d) )
   69.29 -        return -EINVAL;
   69.30 +    {
   69.31 +        if ( need_iommu(d) && t == p2m_ram_rw )
   69.32 +        {
   69.33 +            for ( i = 0; i < (1 << page_order); i++ )
   69.34 +                if ( (rc = iommu_map_page(d, mfn + i, mfn + i)) != 0 )
   69.35 +                {
   69.36 +                    while ( i-- > 0 )
   69.37 +                        iommu_unmap_page(d, mfn + i);
   69.38 +                    return rc;
   69.39 +                }
   69.40 +        }
   69.41 +        return 0;
   69.42 +    }
   69.43  
   69.44  #if CONFIG_PAGING_LEVELS == 3
   69.45      /*
    70.1 --- a/xen/arch/x86/mm/shadow/common.c	Mon Jun 02 11:35:02 2008 +0900
    70.2 +++ b/xen/arch/x86/mm/shadow/common.c	Mon Jun 02 11:35:39 2008 +0900
    70.3 @@ -2799,8 +2799,11 @@ int shadow_track_dirty_vram(struct domai
    70.4      if ( !d->dirty_vram )
    70.5      {
    70.6          /* Just recount from start. */
    70.7 -        for ( i = begin_pfn; i < end_pfn; i++ )
    70.8 -            flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, i, &t));
    70.9 +        for ( i = begin_pfn; i < end_pfn; i++ ) {
   70.10 +            mfn_t mfn = gfn_to_mfn(d, i, &t);
   70.11 +            if (mfn_x(mfn) != INVALID_MFN)
   70.12 +                flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
   70.13 +        }
   70.14  
   70.15          gdprintk(XENLOG_INFO, "tracking VRAM %lx - %lx\n", begin_pfn, end_pfn);
   70.16  
   70.17 @@ -2840,61 +2843,70 @@ int shadow_track_dirty_vram(struct domai
   70.18          /* Iterate over VRAM to track dirty bits. */
   70.19          for ( i = 0; i < nr; i++ ) {
   70.20              mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
   70.21 -            struct page_info *page = mfn_to_page(mfn);
   70.22 -            u32 count_info = page->u.inuse.type_info & PGT_count_mask;
   70.23 +            struct page_info *page;
   70.24 +            u32 count_info;
   70.25              int dirty = 0;
   70.26              paddr_t sl1ma = d->dirty_vram->sl1ma[i];
   70.27  
   70.28 -            switch (count_info)
   70.29 +            if (mfn_x(mfn) == INVALID_MFN)
   70.30              {
   70.31 -            case 0:
   70.32 -                /* No guest reference, nothing to track. */
   70.33 -                break;
   70.34 -            case 1:
   70.35 -                /* One guest reference. */
   70.36 -                if ( sl1ma == INVALID_PADDR )
   70.37 +                dirty = 1;
   70.38 +            }
   70.39 +            else
   70.40 +            {
   70.41 +                page = mfn_to_page(mfn);
   70.42 +                count_info = page->u.inuse.type_info & PGT_count_mask;
   70.43 +                switch (count_info)
   70.44                  {
   70.45 -                    /* We don't know which sl1e points to this, too bad. */
   70.46 -                    dirty = 1;
   70.47 -                    /* TODO: Heuristics for finding the single mapping of
   70.48 -                     * this gmfn */
   70.49 -                    flush_tlb |= sh_remove_all_mappings(d->vcpu[0], gfn_to_mfn(d, begin_pfn + i, &t));
   70.50 -                }
   70.51 -                else
   70.52 -                {
   70.53 -                    /* Hopefully the most common case: only one mapping,
   70.54 -                     * whose dirty bit we can use. */
   70.55 -                    l1_pgentry_t *sl1e;
   70.56 +                case 0:
   70.57 +                    /* No guest reference, nothing to track. */
   70.58 +                    break;
   70.59 +                case 1:
   70.60 +                    /* One guest reference. */
   70.61 +                    if ( sl1ma == INVALID_PADDR )
   70.62 +                    {
   70.63 +                        /* We don't know which sl1e points to this, too bad. */
   70.64 +                        dirty = 1;
   70.65 +                        /* TODO: Heuristics for finding the single mapping of
   70.66 +                         * this gmfn */
   70.67 +                        flush_tlb |= sh_remove_all_mappings(d->vcpu[0], mfn);
   70.68 +                    }
   70.69 +                    else
   70.70 +                    {
   70.71 +                        /* Hopefully the most common case: only one mapping,
   70.72 +                         * whose dirty bit we can use. */
   70.73 +                        l1_pgentry_t *sl1e;
   70.74  #ifdef __i386__
   70.75 -                    void *sl1p = map_sl1p;
   70.76 -                    unsigned long sl1mfn = paddr_to_pfn(sl1ma);
   70.77 -
   70.78 -                    if ( sl1mfn != map_mfn ) {
   70.79 -                        if ( map_sl1p )
   70.80 -                            sh_unmap_domain_page(map_sl1p);
   70.81 -                        map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
   70.82 -                        map_mfn = sl1mfn;
   70.83 -                    }
   70.84 -                    sl1e = sl1p + (sl1ma & ~PAGE_MASK);
   70.85 +                        void *sl1p = map_sl1p;
   70.86 +                        unsigned long sl1mfn = paddr_to_pfn(sl1ma);
   70.87 +
   70.88 +                        if ( sl1mfn != map_mfn ) {
   70.89 +                            if ( map_sl1p )
   70.90 +                                sh_unmap_domain_page(map_sl1p);
   70.91 +                            map_sl1p = sl1p = sh_map_domain_page(_mfn(sl1mfn));
   70.92 +                            map_mfn = sl1mfn;
   70.93 +                        }
   70.94 +                        sl1e = sl1p + (sl1ma & ~PAGE_MASK);
   70.95  #else
   70.96 -                    sl1e = maddr_to_virt(sl1ma);
   70.97 +                        sl1e = maddr_to_virt(sl1ma);
   70.98  #endif
   70.99  
  70.100 -                    if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
  70.101 -                    {
  70.102 -                        dirty = 1;
  70.103 -                        /* Note: this is atomic, so we may clear a
  70.104 -                         * _PAGE_ACCESSED set by another processor. */
  70.105 -                        l1e_remove_flags(*sl1e, _PAGE_DIRTY);
  70.106 -                        flush_tlb = 1;
  70.107 +                        if ( l1e_get_flags(*sl1e) & _PAGE_DIRTY )
  70.108 +                        {
  70.109 +                            dirty = 1;
  70.110 +                            /* Note: this is atomic, so we may clear a
  70.111 +                             * _PAGE_ACCESSED set by another processor. */
  70.112 +                            l1e_remove_flags(*sl1e, _PAGE_DIRTY);
  70.113 +                            flush_tlb = 1;
  70.114 +                        }
  70.115                      }
  70.116 +                    break;
  70.117 +                default:
  70.118 +                    /* More than one guest reference,
  70.119 +                     * we don't afford tracking that. */
  70.120 +                    dirty = 1;
  70.121 +                    break;
  70.122                  }
  70.123 -                break;
  70.124 -            default:
  70.125 -                /* More than one guest reference,
  70.126 -                 * we don't afford tracking that. */
  70.127 -                dirty = 1;
  70.128 -                break;
  70.129              }
  70.130  
  70.131              if ( dirty )
  70.132 @@ -2916,8 +2928,11 @@ int shadow_track_dirty_vram(struct domai
  70.133              {
  70.134                  /* was clean for more than two seconds, try to disable guest
  70.135                   * write access */
  70.136 -                for ( i = begin_pfn; i < end_pfn; i++ )
  70.137 -                    flush_tlb |= sh_remove_write_access(d->vcpu[0], gfn_to_mfn(d, i, &t), 1, 0);
  70.138 +                for ( i = begin_pfn; i < end_pfn; i++ ) {
  70.139 +                    mfn_t mfn = gfn_to_mfn(d, i, &t);
  70.140 +                    if (mfn_x(mfn) != INVALID_MFN)
  70.141 +                        flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0);
  70.142 +                }
  70.143                  d->dirty_vram->last_dirty = -1;
  70.144              }
  70.145              rc = 0;
    71.1 --- a/xen/arch/x86/msi.c	Mon Jun 02 11:35:02 2008 +0900
    71.2 +++ b/xen/arch/x86/msi.c	Mon Jun 02 11:35:39 2008 +0900
    71.3 @@ -25,6 +25,7 @@
    71.4  #include <mach_apic.h>
    71.5  #include <io_ports.h>
    71.6  #include <public/physdev.h>
    71.7 +#include <xen/iommu.h>
    71.8  
    71.9  extern int msi_irq_enable;
   71.10  
   71.11 @@ -156,6 +157,9 @@ void read_msi_msg(unsigned int irq, stru
   71.12      default:
   71.13          BUG();
   71.14      }
   71.15 +
   71.16 +    if ( vtd_enabled )
   71.17 +        msi_msg_read_remap_rte(entry, msg);
   71.18  }
   71.19  
   71.20  static int set_vector_msi(struct msi_desc *entry)
   71.21 @@ -202,6 +206,9 @@ void write_msi_msg(unsigned int irq, str
   71.22  {
   71.23      struct msi_desc *entry = irq_desc[irq].msi_desc;
   71.24  
   71.25 +    if ( vtd_enabled )
   71.26 +        msi_msg_write_remap_rte(entry, msg);
   71.27 +
   71.28      switch ( entry->msi_attrib.type )
   71.29      {
   71.30      case PCI_CAP_ID_MSI:
    72.1 --- a/xen/arch/x86/setup.c	Mon Jun 02 11:35:02 2008 +0900
    72.2 +++ b/xen/arch/x86/setup.c	Mon Jun 02 11:35:39 2008 +0900
    72.3 @@ -1100,6 +1100,14 @@ void arch_get_xen_caps(xen_capabilities_
    72.4  #endif
    72.5  }
    72.6  
    72.7 +int xen_in_range(paddr_t start, paddr_t end)
    72.8 +{
    72.9 +    start = max_t(paddr_t, start, xenheap_phys_start);
   72.10 +    end = min_t(paddr_t, end, xenheap_phys_end);
   72.11 + 
   72.12 +    return start < end; 
   72.13 +}
   72.14 +
   72.15  /*
   72.16   * Local variables:
   72.17   * mode: C
    73.1 --- a/xen/arch/x86/smpboot.c	Mon Jun 02 11:35:02 2008 +0900
    73.2 +++ b/xen/arch/x86/smpboot.c	Mon Jun 02 11:35:39 2008 +0900
    73.3 @@ -1391,6 +1391,11 @@ void enable_nonboot_cpus(void)
    73.4  		panic("Not enough cpus");
    73.5  	}
    73.6  	cpus_clear(frozen_cpus);
    73.7 +
    73.8 +	/*
    73.9 +	 * Cleanup possible dangling ends after sleep...
   73.10 +	 */
   73.11 +	smpboot_restore_warm_reset_vector();
   73.12  }
   73.13  #else /* ... !CONFIG_HOTPLUG_CPU */
   73.14  int __cpu_disable(void)
    74.1 --- a/xen/arch/x86/tboot.c	Mon Jun 02 11:35:02 2008 +0900
    74.2 +++ b/xen/arch/x86/tboot.c	Mon Jun 02 11:35:39 2008 +0900
    74.3 @@ -96,6 +96,18 @@ int tboot_in_measured_env(void)
    74.4      return (g_tboot_shared != NULL);
    74.5  }
    74.6  
    74.7 +int tboot_in_range(paddr_t start, paddr_t end)
    74.8 +{
    74.9 +    if ( g_tboot_shared == NULL || g_tboot_shared->version < 0x02 )
   74.10 +        return 0;
   74.11 +
   74.12 +    start = max_t(paddr_t, start, g_tboot_shared->tboot_base);
   74.13 +    end = min_t(paddr_t, end, 
   74.14 +                g_tboot_shared->tboot_base + g_tboot_shared->tboot_size);
   74.15 + 
   74.16 +    return start < end; 
   74.17 +}
   74.18 +
   74.19  /*
   74.20   * Local variables:
   74.21   * mode: C
    75.1 --- a/xen/arch/x86/x86_emulate/x86_emulate.c	Mon Jun 02 11:35:02 2008 +0900
    75.2 +++ b/xen/arch/x86/x86_emulate/x86_emulate.c	Mon Jun 02 11:35:39 2008 +0900
    75.3 @@ -2105,12 +2105,14 @@ x86_emulate(
    75.4          break;
    75.5      }
    75.6  
    75.7 +    /* Inject #DB if single-step tracing was enabled at instruction start. */
    75.8 +    if ( (ctxt->regs->eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
    75.9 +         (ops->inject_hw_exception != NULL) )
   75.10 +        rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
   75.11 +
   75.12      /* Commit shadow register state. */
   75.13      _regs.eflags &= ~EFLG_RF;
   75.14      *ctxt->regs = _regs;
   75.15 -    if ( (_regs.eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
   75.16 -         (ops->inject_hw_exception != NULL) )
   75.17 -        rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
   75.18  
   75.19   done:
   75.20      return rc;
    76.1 --- a/xen/common/domain.c	Mon Jun 02 11:35:02 2008 +0900
    76.2 +++ b/xen/common/domain.c	Mon Jun 02 11:35:39 2008 +0900
    76.3 @@ -637,7 +637,7 @@ void vcpu_reset(struct vcpu *v)
    76.4  {
    76.5      struct domain *d = v->domain;
    76.6  
    76.7 -    domain_pause(d);
    76.8 +    vcpu_pause(v);
    76.9      domain_lock(d);
   76.10  
   76.11      arch_vcpu_reset(v);
   76.12 @@ -653,7 +653,7 @@ void vcpu_reset(struct vcpu *v)
   76.13      clear_bit(_VPF_blocked, &v->pause_flags);
   76.14  
   76.15      domain_unlock(v->domain);
   76.16 -    domain_unpause(d);
   76.17 +    vcpu_unpause(v);
   76.18  }
   76.19  
   76.20  
    77.1 --- a/xen/common/grant_table.c	Mon Jun 02 11:35:02 2008 +0900
    77.2 +++ b/xen/common/grant_table.c	Mon Jun 02 11:35:39 2008 +0900
    77.3 @@ -32,6 +32,8 @@
    77.4  #include <xen/trace.h>
    77.5  #include <xen/guest_access.h>
    77.6  #include <xen/domain_page.h>
    77.7 +#include <xen/iommu.h>
    77.8 +#include <xen/paging.h>
    77.9  #include <xsm/xsm.h>
   77.10  
   77.11  #ifndef max_nr_grant_frames
   77.12 @@ -196,8 +198,9 @@ static void
   77.13      struct domain *ld, *rd;
   77.14      struct vcpu   *led;
   77.15      int            handle;
   77.16 -    unsigned long  frame = 0;
   77.17 +    unsigned long  frame = 0, nr_gets = 0;
   77.18      int            rc = GNTST_okay;
   77.19 +    u32            old_pin;
   77.20      unsigned int   cache_flags;
   77.21      struct active_grant_entry *act;
   77.22      struct grant_mapping *mt;
   77.23 @@ -318,6 +321,7 @@ static void
   77.24          }
   77.25      }
   77.26  
   77.27 +    old_pin = act->pin;
   77.28      if ( op->flags & GNTMAP_device_map )
   77.29          act->pin += (op->flags & GNTMAP_readonly) ?
   77.30              GNTPIN_devr_inc : GNTPIN_devw_inc;
   77.31 @@ -361,20 +365,17 @@ static void
   77.32              rc = GNTST_general_error;
   77.33              goto undo_out;
   77.34          }
   77.35 -        
   77.36 +
   77.37 +        nr_gets++;
   77.38          if ( op->flags & GNTMAP_host_map )
   77.39          {
   77.40              rc = create_grant_host_mapping(op->host_addr, frame, op->flags, 0);
   77.41              if ( rc != GNTST_okay )
   77.42 -            {
   77.43 -                if ( gnttab_host_mapping_get_page_type(op, ld, rd) )
   77.44 -                    put_page_type(mfn_to_page(frame));
   77.45 -                put_page(mfn_to_page(frame));
   77.46                  goto undo_out;
   77.47 -            }
   77.48  
   77.49              if ( op->flags & GNTMAP_device_map )
   77.50              {
   77.51 +                nr_gets++;
   77.52                  (void)get_page(mfn_to_page(frame), rd);
   77.53                  if ( !(op->flags & GNTMAP_readonly) )
   77.54                      get_page_type(mfn_to_page(frame), PGT_writable_page);
   77.55 @@ -382,6 +383,17 @@ static void
   77.56          }
   77.57      }
   77.58  
   77.59 +    if ( need_iommu(ld) &&
   77.60 +         !(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
   77.61 +         (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
   77.62 +    {
   77.63 +        if ( iommu_map_page(ld, mfn_to_gmfn(ld, frame), frame) )
   77.64 +        {
   77.65 +            rc = GNTST_general_error;
   77.66 +            goto undo_out;
   77.67 +        }
   77.68 +    }
   77.69 +
   77.70      TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom);
   77.71  
   77.72      mt = &maptrack_entry(ld->grant_table, handle);
   77.73 @@ -397,6 +409,19 @@ static void
   77.74      return;
   77.75  
   77.76   undo_out:
   77.77 +    if ( nr_gets > 1 )
   77.78 +    {
   77.79 +        if ( !(op->flags & GNTMAP_readonly) )
   77.80 +            put_page_type(mfn_to_page(frame));
   77.81 +        put_page(mfn_to_page(frame));
   77.82 +    }
   77.83 +    if ( nr_gets > 0 )
   77.84 +    {
   77.85 +        if ( gnttab_host_mapping_get_page_type(op, ld, rd) )
   77.86 +            put_page_type(mfn_to_page(frame));
   77.87 +        put_page(mfn_to_page(frame));
   77.88 +    }
   77.89 +
   77.90      spin_lock(&rd->grant_table->lock);
   77.91  
   77.92      act = &active_entry(rd->grant_table, op->ref);
   77.93 @@ -451,6 +476,7 @@ static void
   77.94      struct active_grant_entry *act;
   77.95      grant_entry_t   *sha;
   77.96      s16              rc = 0;
   77.97 +    u32              old_pin;
   77.98  
   77.99      ld = current->domain;
  77.100  
  77.101 @@ -497,6 +523,7 @@ static void
  77.102  
  77.103      act = &active_entry(rd->grant_table, op->map->ref);
  77.104      sha = &shared_entry(rd->grant_table, op->map->ref);
  77.105 +    old_pin = act->pin;
  77.106  
  77.107      if ( op->frame == 0 )
  77.108      {
  77.109 @@ -534,6 +561,17 @@ static void
  77.110              act->pin -= GNTPIN_hstw_inc;
  77.111      }
  77.112  
  77.113 +    if ( need_iommu(ld) &&
  77.114 +         (old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
  77.115 +         !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
  77.116 +    {
  77.117 +        if ( iommu_unmap_page(ld, mfn_to_gmfn(ld, op->frame)) )
  77.118 +        {
  77.119 +            rc = GNTST_general_error;
  77.120 +            goto unmap_out;
  77.121 +        }
  77.122 +    }
  77.123 +
  77.124      /* If just unmapped a writable mapping, mark as dirtied */
  77.125      if ( !(op->flags & GNTMAP_readonly) )
  77.126           gnttab_mark_dirty(rd, op->frame);
  77.127 @@ -1074,6 +1112,11 @@ gnttab_transfer(
  77.128              goto copyback;
  77.129          }
  77.130  
  77.131 +#ifndef __ia64__ /* IA64 implicitly replaces the old page in steal_page(). */
  77.132 +        guest_physmap_remove_page(d, gop.mfn, mfn, 0);
  77.133 +#endif
  77.134 +        flush_tlb_mask(d->domain_dirty_cpumask);
  77.135 +
  77.136          /* Find the target domain. */
  77.137          if ( unlikely((e = rcu_lock_domain_by_id(gop.domid)) == NULL) )
  77.138          {
    78.1 --- a/xen/common/libelf/libelf-private.h	Mon Jun 02 11:35:02 2008 +0900
    78.2 +++ b/xen/common/libelf/libelf-private.h	Mon Jun 02 11:35:39 2008 +0900
    78.3 @@ -43,7 +43,7 @@
    78.4  #define bswap_16(x) swap16(x)
    78.5  #define bswap_32(x) swap32(x)
    78.6  #define bswap_64(x) swap64(x)
    78.7 -#elif defined(__linux__) || defined(__Linux__)
    78.8 +#elif defined(__linux__) || defined(__Linux__) || defined(__MINIOS__)
    78.9  #include <byteswap.h>
   78.10  #else
   78.11  #error Unsupported OS
    79.1 --- a/xen/common/memory.c	Mon Jun 02 11:35:02 2008 +0900
    79.2 +++ b/xen/common/memory.c	Mon Jun 02 11:35:39 2008 +0900
    79.3 @@ -124,12 +124,9 @@ static void populate_physmap(struct memo
    79.4          }
    79.5  
    79.6          mfn = page_to_mfn(page);
    79.7 +        guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
    79.8  
    79.9 -        if ( unlikely(paging_mode_translate(d)) )
   79.10 -        {
   79.11 -            guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
   79.12 -        }
   79.13 -        else
   79.14 +        if ( !paging_mode_translate(d) )
   79.15          {
   79.16              for ( j = 0; j < (1 << a->extent_order); j++ )
   79.17                  set_gpfn_from_mfn(mfn + j, gpfn + j);
   79.18 @@ -436,11 +433,9 @@ static long memory_exchange(XEN_GUEST_HA
   79.19                  &gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
   79.20  
   79.21              mfn = page_to_mfn(page);
   79.22 -            if ( unlikely(paging_mode_translate(d)) )
   79.23 -            {
   79.24 -                guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
   79.25 -            }
   79.26 -            else
   79.27 +            guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
   79.28 +
   79.29 +            if ( !paging_mode_translate(d) )
   79.30              {
   79.31                  for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
   79.32                      set_gpfn_from_mfn(mfn + k, gpfn + k);
    80.1 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Mon Jun 02 11:35:02 2008 +0900
    80.2 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Mon Jun 02 11:35:39 2008 +0900
    80.3 @@ -635,6 +635,16 @@ static void amd_iommu_return_device(
    80.4      reassign_device(s, t, bus, devfn);
    80.5  }
    80.6  
    80.7 +static int amd_iommu_group_id(u8 bus, u8 devfn)
    80.8 +{
    80.9 +    int rt;
   80.10 +    int bdf = (bus << 8) | devfn;
   80.11 +    rt = ( bdf < ivrs_bdf_entries ) ?
   80.12 +        ivrs_mappings[bdf].dte_requestor_id :
   80.13 +        bdf;
   80.14 +    return rt;
   80.15 +}
   80.16 +
   80.17  struct iommu_ops amd_iommu_ops = {
   80.18      .init = amd_iommu_domain_init,
   80.19      .assign_device  = amd_iommu_assign_device,
   80.20 @@ -642,4 +652,5 @@ struct iommu_ops amd_iommu_ops = {
   80.21      .map_page = amd_iommu_map_page,
   80.22      .unmap_page = amd_iommu_unmap_page,
   80.23      .reassign_device = amd_iommu_return_device,
   80.24 +    .get_device_group_id = amd_iommu_group_id,
   80.25  };
    81.1 --- a/xen/drivers/passthrough/iommu.c	Mon Jun 02 11:35:02 2008 +0900
    81.2 +++ b/xen/drivers/passthrough/iommu.c	Mon Jun 02 11:35:39 2008 +0900
    81.3 @@ -15,15 +15,21 @@
    81.4  
    81.5  #include <xen/sched.h>
    81.6  #include <xen/iommu.h>
    81.7 +#include <xen/paging.h>
    81.8 +#include <xen/guest_access.h>
    81.9  
   81.10  extern struct iommu_ops intel_iommu_ops;
   81.11  extern struct iommu_ops amd_iommu_ops;
   81.12 +static int iommu_populate_page_table(struct domain *d);
   81.13  int intel_vtd_setup(void);
   81.14  int amd_iov_detect(void);
   81.15  
   81.16  int iommu_enabled = 1;
   81.17  boolean_param("iommu", iommu_enabled);
   81.18  
   81.19 +int iommu_pv_enabled = 0;
   81.20 +boolean_param("iommu_pv", iommu_pv_enabled);
   81.21 +
   81.22  int iommu_domain_init(struct domain *domain)
   81.23  {
   81.24      struct hvm_iommu *hd = domain_hvm_iommu(domain);
   81.25 @@ -54,11 +60,46 @@ int iommu_domain_init(struct domain *dom
   81.26  int assign_device(struct domain *d, u8 bus, u8 devfn)
   81.27  {
   81.28      struct hvm_iommu *hd = domain_hvm_iommu(d);
   81.29 +    int rc;
   81.30  
   81.31      if ( !iommu_enabled || !hd->platform_ops )
   81.32          return 0;
   81.33  
   81.34 -    return hd->platform_ops->assign_device(d, bus, devfn);
   81.35 +    if ( (rc = hd->platform_ops->assign_device(d, bus, devfn)) )
   81.36 +        return rc;
   81.37 +
   81.38 +    if ( has_iommu_pdevs(d) && !need_iommu(d) )
   81.39 +    {
   81.40 +        d->need_iommu = 1;
   81.41 +        return iommu_populate_page_table(d);
   81.42 +    }
   81.43 +    return 0;
   81.44 +}
   81.45 +
   81.46 +static int iommu_populate_page_table(struct domain *d)
   81.47 +{
   81.48 +    struct hvm_iommu *hd = domain_hvm_iommu(d);
   81.49 +    struct page_info *page;
   81.50 +    int rc;
   81.51 +
   81.52 +    spin_lock(&d->page_alloc_lock);
   81.53 +
   81.54 +    list_for_each_entry ( page, &d->page_list, list )
   81.55 +    {
   81.56 +        if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
   81.57 +        {
   81.58 +            rc = hd->platform_ops->map_page(
   81.59 +                d, mfn_to_gmfn(d, page_to_mfn(page)), page_to_mfn(page));
   81.60 +            if (rc)
   81.61 +            {
   81.62 +                spin_unlock(&d->page_alloc_lock);
   81.63 +                hd->platform_ops->teardown(d);
   81.64 +                return rc;
   81.65 +            }
   81.66 +        }
   81.67 +    }
   81.68 +    spin_unlock(&d->page_alloc_lock);
   81.69 +    return 0;
   81.70  }
   81.71  
   81.72  void iommu_domain_destroy(struct domain *d)
   81.73 @@ -137,7 +178,13 @@ void deassign_device(struct domain *d, u
   81.74      if ( !iommu_enabled || !hd->platform_ops )
   81.75          return;
   81.76  
   81.77 -    return hd->platform_ops->reassign_device(d, dom0, bus, devfn);
   81.78 +    hd->platform_ops->reassign_device(d, dom0, bus, devfn);
   81.79 +
   81.80 +    if ( !has_iommu_pdevs(d) && need_iommu(d) )
   81.81 +    {
   81.82 +        d->need_iommu = 0;
   81.83 +        hd->platform_ops->teardown(d);
   81.84 +    }
   81.85  }
   81.86  
   81.87  static int iommu_setup(void)
   81.88 @@ -160,7 +207,56 @@ static int iommu_setup(void)
   81.89      iommu_enabled = (rc == 0);
   81.90  
   81.91   out:
   81.92 +    if ( !iommu_enabled || !vtd_enabled )
   81.93 +        iommu_pv_enabled = 0;
   81.94      printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis");
   81.95 +    if (iommu_enabled)
   81.96 +        printk("I/O virtualisation for PV guests %sabled\n",
   81.97 +               iommu_pv_enabled ? "en" : "dis");
   81.98      return rc;
   81.99  }
  81.100  __initcall(iommu_setup);
  81.101 +
  81.102 +int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
  81.103 +    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
  81.104 +{
  81.105 +    struct hvm_iommu *hd = domain_hvm_iommu(d);
  81.106 +    struct pci_dev *pdev;
  81.107 +    int group_id, sdev_id;
  81.108 +    u32 bdf;
  81.109 +    int i = 0;
  81.110 +    struct iommu_ops *ops = hd->platform_ops;
  81.111 +
  81.112 +    if ( !iommu_enabled || !ops || !ops->get_device_group_id )
  81.113 +        return 0;
  81.114 +
  81.115 +    group_id = ops->get_device_group_id(bus, devfn);
  81.116 +
  81.117 +    list_for_each_entry(pdev,
  81.118 +        &(dom0->arch.hvm_domain.hvm_iommu.pdev_list), list)
  81.119 +    {
  81.120 +        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
  81.121 +            continue;
  81.122 +
  81.123 +        sdev_id = ops->get_device_group_id(pdev->bus, pdev->devfn);
  81.124 +        if ( (sdev_id == group_id) && (i < max_sdevs) )
  81.125 +        {
  81.126 +            bdf = 0;
  81.127 +            bdf |= (pdev->bus & 0xff) << 16;
  81.128 +            bdf |= (pdev->devfn & 0xff) << 8;
  81.129 +            if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
  81.130 +                return -1;
  81.131 +            i++;
  81.132 +        }
  81.133 +    }
  81.134 +
  81.135 +    return i;
  81.136 +}
  81.137 +/*
  81.138 + * Local variables:
  81.139 + * mode: C
  81.140 + * c-set-style: "BSD"
  81.141 + * c-basic-offset: 4
  81.142 + * indent-tabs-mode: nil
  81.143 + * End:
  81.144 + */
    82.1 --- a/xen/drivers/passthrough/vtd/dmar.c	Mon Jun 02 11:35:02 2008 +0900
    82.2 +++ b/xen/drivers/passthrough/vtd/dmar.c	Mon Jun 02 11:35:39 2008 +0900
    82.3 @@ -147,39 +147,6 @@ struct acpi_drhd_unit * acpi_find_matche
    82.4      return NULL;
    82.5  }
    82.6  
    82.7 -struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev)
    82.8 -{
    82.9 -    struct acpi_rmrr_unit *rmrr;
   82.10 -
   82.11 -    list_for_each_entry ( rmrr, &acpi_rmrr_units, list )
   82.12 -        if ( acpi_pci_device_match(rmrr->devices,
   82.13 -                                   rmrr->devices_cnt, dev) )
   82.14 -            return rmrr;
   82.15 -
   82.16 -    return NULL;
   82.17 -}
   82.18 -
   82.19 -struct acpi_atsr_unit * acpi_find_matched_atsr_unit(struct pci_dev *dev)
   82.20 -{
   82.21 -    struct acpi_atsr_unit *atsru;
   82.22 -    struct acpi_atsr_unit *all_ports_atsru;
   82.23 -
   82.24 -    all_ports_atsru = NULL;
   82.25 -    list_for_each_entry ( atsru, &acpi_atsr_units, list )
   82.26 -    {
   82.27 -        if ( atsru->all_ports )
   82.28 -            all_ports_atsru = atsru;
   82.29 -        if ( acpi_pci_device_match(atsru->devices,
   82.30 -                                   atsru->devices_cnt, dev) )
   82.31 -            return atsru;
   82.32 -    }
   82.33 -
   82.34 -    if ( all_ports_atsru )
   82.35 -        return all_ports_atsru;;
   82.36 -
   82.37 -    return NULL;
   82.38 -}
   82.39 -
   82.40  static int scope_device_count(void *start, void *end)
   82.41  {
   82.42      struct acpi_dev_scope *scope;
    83.1 --- a/xen/drivers/passthrough/vtd/dmar.h	Mon Jun 02 11:35:02 2008 +0900
    83.2 +++ b/xen/drivers/passthrough/vtd/dmar.h	Mon Jun 02 11:35:39 2008 +0900
    83.3 @@ -86,7 +86,6 @@ struct acpi_atsr_unit {
    83.4      }
    83.5  
    83.6  struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *dev);
    83.7 -struct acpi_rmrr_unit * acpi_find_matched_rmrr_unit(struct pci_dev *dev);
    83.8  
    83.9  #define DMAR_TYPE 1
   83.10  #define RMRR_TYPE 2
    84.1 --- a/xen/drivers/passthrough/vtd/extern.h	Mon Jun 02 11:35:02 2008 +0900
    84.2 +++ b/xen/drivers/passthrough/vtd/extern.h	Mon Jun 02 11:35:39 2008 +0900
    84.3 @@ -27,8 +27,7 @@ extern struct qi_ctrl *qi_ctrl;
    84.4  extern struct ir_ctrl *ir_ctrl;
    84.5  
    84.6  void print_iommu_regs(struct acpi_drhd_unit *drhd);
    84.7 -void print_vtd_entries(struct domain *d, struct iommu *iommu,
    84.8 -                       int bus, int devfn, unsigned long gmfn);
    84.9 +void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn);
   84.10  void pdev_flr(u8 bus, u8 devfn);
   84.11  
   84.12  int qinval_setup(struct iommu *iommu);
    85.1 --- a/xen/drivers/passthrough/vtd/intremap.c	Mon Jun 02 11:35:02 2008 +0900
    85.2 +++ b/xen/drivers/passthrough/vtd/intremap.c	Mon Jun 02 11:35:39 2008 +0900
    85.3 @@ -48,14 +48,14 @@ static void remap_entry_to_ioapic_rte(
    85.4  {
    85.5      struct iremap_entry *iremap_entry = NULL, *iremap_entries;
    85.6      struct IO_APIC_route_remap_entry *remap_rte;
    85.7 -    unsigned int index;
    85.8 +    int index = 0;
    85.9      unsigned long flags;
   85.10      struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
   85.11  
   85.12 -    if ( ir_ctrl == NULL )
   85.13 +    if ( ir_ctrl == NULL || ir_ctrl->iremap_index < 0 )
   85.14      {
   85.15          dprintk(XENLOG_ERR VTDPREFIX,
   85.16 -                "remap_entry_to_ioapic_rte: ir_ctl == NULL");
   85.17 +                "remap_entry_to_ioapic_rte: ir_ctl is not ready\n");
   85.18          return;
   85.19      }
   85.20  
   85.21 @@ -63,11 +63,8 @@ static void remap_entry_to_ioapic_rte(
   85.22      index = (remap_rte->index_15 << 15) + remap_rte->index_0_14;
   85.23  
   85.24      if ( index > ir_ctrl->iremap_index )
   85.25 -    {
   85.26 -        dprintk(XENLOG_ERR VTDPREFIX,
   85.27 -            "Index is larger than remap table entry size. Error!\n");
   85.28 -        return;
   85.29 -    }
   85.30 +        panic("%s: index (%d) is larger than remap table entry size (%d)!\n",
   85.31 +              __func__, index, ir_ctrl->iremap_index);
   85.32  
   85.33      spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
   85.34  
   85.35 @@ -81,79 +78,90 @@ static void remap_entry_to_ioapic_rte(
   85.36      old_rte->trigger = iremap_entry->lo.tm;
   85.37      old_rte->__reserved_2 = 0;
   85.38      old_rte->dest.logical.__reserved_1 = 0;
   85.39 -    old_rte->dest.logical.logical_dest = iremap_entry->lo.dst;
   85.40 +    old_rte->dest.logical.logical_dest = iremap_entry->lo.dst >> 8;
   85.41  
   85.42      unmap_vtd_domain_page(iremap_entries);
   85.43      spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
   85.44  }
   85.45  
   85.46  static void ioapic_rte_to_remap_entry(struct iommu *iommu,
   85.47 -    int apic_id, struct IO_APIC_route_entry *old_rte)
   85.48 +    int apic_id, struct IO_APIC_route_entry *old_rte,
   85.49 +    unsigned int rte_upper, unsigned int value)
   85.50  {
   85.51      struct iremap_entry *iremap_entry = NULL, *iremap_entries;
   85.52 +    struct iremap_entry new_ire;
   85.53      struct IO_APIC_route_remap_entry *remap_rte;
   85.54 -    unsigned int index;
   85.55 +    struct IO_APIC_route_entry new_rte;
   85.56 +    int index;
   85.57      unsigned long flags;
   85.58 -    int ret = 0;
   85.59      struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
   85.60  
   85.61      remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
   85.62      spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
   85.63 -    index = ir_ctrl->iremap_index;
   85.64 -    if ( index > IREMAP_ENTRY_NR - 1 )
   85.65 +
   85.66 +    if ( remap_rte->format == 0 )
   85.67      {
   85.68 -        dprintk(XENLOG_ERR VTDPREFIX,
   85.69 -               "The interrupt number is more than 256!\n");
   85.70 -        goto out;
   85.71 +        ir_ctrl->iremap_index++;
   85.72 +        index = ir_ctrl->iremap_index;
   85.73      }
   85.74 +    else
   85.75 +        index = (remap_rte->index_15 << 15) | remap_rte->index_0_14;
   85.76 +
   85.77 +    if ( index > IREMAP_ENTRY_NR - 1 )
   85.78 +        panic("ioapic_rte_to_remap_entry: intremap index is more than 256!\n");
   85.79  
   85.80      iremap_entries =
   85.81          (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
   85.82      iremap_entry = &iremap_entries[index];
   85.83  
   85.84 -    if ( *(u64 *)iremap_entry != 0 )
   85.85 -        dprintk(XENLOG_WARNING VTDPREFIX,
   85.86 -               "Interrupt remapping entry is in use already!\n");
   85.87 -    iremap_entry->lo.fpd = 0;
   85.88 -    iremap_entry->lo.dm = old_rte->dest_mode;
   85.89 -    iremap_entry->lo.rh = 0;
   85.90 -    iremap_entry->lo.tm = old_rte->trigger;
   85.91 -    iremap_entry->lo.dlm = old_rte->delivery_mode;
   85.92 -    iremap_entry->lo.avail = 0;
   85.93 -    iremap_entry->lo.res_1 = 0;
   85.94 -    iremap_entry->lo.vector = old_rte->vector;
   85.95 -    iremap_entry->lo.res_2 = 0;
   85.96 -    iremap_entry->lo.dst = (old_rte->dest.logical.logical_dest << 8);
   85.97 -    iremap_entry->hi.sid = apicid_to_bdf(apic_id);
   85.98 -    iremap_entry->hi.sq = 0;    /* comparing all 16-bit of SID */
   85.99 -    iremap_entry->hi.svt = 1;   /* turn on requestor ID verification SID/SQ */
  85.100 -    iremap_entry->hi.res_1 = 0;
  85.101 -    iremap_entry->lo.p = 1;    /* finally, set present bit */
  85.102 -    ir_ctrl->iremap_index++;
  85.103 +    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
  85.104 +
  85.105 +    if ( rte_upper )
  85.106 +        new_ire.lo.dst = (value >> 24) << 8;
  85.107 +    else
  85.108 +    {
  85.109 +        *(((u32 *)&new_rte) + 0) = value;
  85.110 +        new_ire.lo.fpd = 0;
  85.111 +        new_ire.lo.dm = new_rte.dest_mode;
  85.112 +        new_ire.lo.rh = 0;
  85.113 +        new_ire.lo.tm = new_rte.trigger;
  85.114 +        new_ire.lo.dlm = new_rte.delivery_mode;
  85.115 +        new_ire.lo.avail = 0;
  85.116 +        new_ire.lo.res_1 = 0;
  85.117 +        new_ire.lo.vector = new_rte.vector;
  85.118 +        new_ire.lo.res_2 = 0;
  85.119 +        new_ire.hi.sid = apicid_to_bdf(apic_id);
  85.120 +
  85.121 +        new_ire.hi.sq = 0;    /* comparing all 16-bit of SID */
  85.122 +        new_ire.hi.svt = 1;   /* requestor ID verification SID/SQ */
  85.123 +        new_ire.hi.res_1 = 0;
  85.124 +        new_ire.lo.p = 1;     /* finally, set present bit */
  85.125 +
  85.126 +        /* now construct new ioapic rte entry */
  85.127 +        remap_rte->vector = new_rte.vector;
  85.128 +        remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */
  85.129 +        remap_rte->index_15 = index & 0x8000;
  85.130 +        remap_rte->index_0_14 = index & 0x7fff;
  85.131 +
  85.132 +        remap_rte->delivery_status = new_rte.delivery_status;
  85.133 +        remap_rte->polarity = new_rte.polarity;
  85.134 +        remap_rte->irr = new_rte.irr;
  85.135 +        remap_rte->trigger = new_rte.trigger;
  85.136 +        remap_rte->mask = new_rte.mask;
  85.137 +        remap_rte->reserved = 0;
  85.138 +        remap_rte->format = 1;    /* indicate remap format */
  85.139 +    }
  85.140 +
  85.141 +    memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
  85.142 +    iommu_flush_iec_index(iommu, 0, index);
  85.143 +    invalidate_sync(iommu);
  85.144  
  85.145      unmap_vtd_domain_page(iremap_entries);
  85.146 -    iommu_flush_iec_index(iommu, 0, index);
  85.147 -    ret = invalidate_sync(iommu);
  85.148 -
  85.149 -    /* now construct new ioapic rte entry */
  85.150 -    remap_rte->vector = old_rte->vector;
  85.151 -    remap_rte->delivery_mode = 0;    /* has to be 0 for remap format */
  85.152 -    remap_rte->index_15 = index & 0x8000;
  85.153 -    remap_rte->index_0_14 = index & 0x7fff;
  85.154 -    remap_rte->delivery_status = old_rte->delivery_status;
  85.155 -    remap_rte->polarity = old_rte->polarity;
  85.156 -    remap_rte->irr = old_rte->irr;
  85.157 -    remap_rte->trigger = old_rte->trigger;
  85.158 -    remap_rte->mask = 1;
  85.159 -    remap_rte->reserved = 0;
  85.160 -    remap_rte->format = 1;    /* indicate remap format */
  85.161 -out:
  85.162      spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
  85.163      return;
  85.164  }
  85.165  
  85.166 -unsigned int
  85.167 -io_apic_read_remap_rte(
  85.168 +unsigned int io_apic_read_remap_rte(
  85.169      unsigned int apic, unsigned int reg)
  85.170  {
  85.171      struct IO_APIC_route_entry old_rte = { 0 };
  85.172 @@ -198,15 +206,15 @@ io_apic_read_remap_rte(
  85.173      }
  85.174  }
  85.175  
  85.176 -void
  85.177 -io_apic_write_remap_rte(
  85.178 +void io_apic_write_remap_rte(
  85.179      unsigned int apic, unsigned int reg, unsigned int value)
  85.180  {
  85.181      struct IO_APIC_route_entry old_rte = { 0 };
  85.182      struct IO_APIC_route_remap_entry *remap_rte;
  85.183 -    int rte_upper = (reg & 1) ? 1 : 0;
  85.184 +    unsigned int rte_upper = (reg & 1) ? 1 : 0;
  85.185      struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
  85.186      struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
  85.187 +    int saved_mask;
  85.188  
  85.189      if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
  85.190      {
  85.191 @@ -225,21 +233,192 @@ io_apic_write_remap_rte(
  85.192      *(((u32 *)&old_rte) + 1) = *(IO_APIC_BASE(apic)+4);
  85.193  
  85.194      remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
  85.195 -    if ( remap_rte->mask || (remap_rte->format == 0) )
  85.196 +
  85.197 +    /* mask the interrupt while we change the intremap table */
  85.198 +    saved_mask = remap_rte->mask;
  85.199 +    remap_rte->mask = 1;
  85.200 +    *IO_APIC_BASE(apic) = reg;
  85.201 +    *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
  85.202 +    remap_rte->mask = saved_mask;
  85.203 +
  85.204 +    ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid,
  85.205 +                              &old_rte, rte_upper, value);
  85.206 +
  85.207 +    /* write new entry to ioapic */
  85.208 +    *IO_APIC_BASE(apic) = reg;
  85.209 +    *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+0);
  85.210 +    *IO_APIC_BASE(apic) = reg + 1;
  85.211 +    *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+1);
  85.212 +}
  85.213 +
  85.214 +static void remap_entry_to_msi_msg(
  85.215 +    struct iommu *iommu, struct msi_msg *msg)
  85.216 +{
  85.217 +    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
  85.218 +    struct msi_msg_remap_entry *remap_rte;
  85.219 +    int index;
  85.220 +    unsigned long flags;
  85.221 +    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
  85.222 +
  85.223 +    if ( ir_ctrl == NULL )
  85.224      {
  85.225 -        *IO_APIC_BASE(apic) = rte_upper ? ++reg : reg;
  85.226 -        *(IO_APIC_BASE(apic)+4) = value;
  85.227 +        dprintk(XENLOG_ERR VTDPREFIX,
  85.228 +                "remap_entry_to_msi_msg: ir_ctl == NULL");
  85.229          return;
  85.230      }
  85.231  
  85.232 -    *(((u32 *)&old_rte) + rte_upper) = value;
  85.233 -    ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid, &old_rte);
  85.234 +    remap_rte = (struct msi_msg_remap_entry *) msg;
  85.235 +    index = (remap_rte->address_lo.index_15 << 15) |
  85.236 +            remap_rte->address_lo.index_0_14;
  85.237 +
  85.238 +    if ( index > ir_ctrl->iremap_index )
  85.239 +        panic("%s: index (%d) is larger than remap table entry size (%d)\n",
  85.240 +              __func__, index, ir_ctrl->iremap_index);
  85.241 +
  85.242 +    spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
  85.243 +
  85.244 +    iremap_entries =
  85.245 +        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
  85.246 +    iremap_entry = &iremap_entries[index];
  85.247 +
  85.248 +    msg->address_hi = MSI_ADDR_BASE_HI;
  85.249 +    msg->address_lo =
  85.250 +        MSI_ADDR_BASE_LO |
  85.251 +        ((iremap_entry->lo.dm == 0) ?
  85.252 +            MSI_ADDR_DESTMODE_PHYS:
  85.253 +            MSI_ADDR_DESTMODE_LOGIC) |
  85.254 +        ((iremap_entry->lo.dlm != dest_LowestPrio) ?
  85.255 +            MSI_ADDR_REDIRECTION_CPU:
  85.256 +            MSI_ADDR_REDIRECTION_LOWPRI) |
  85.257 +        iremap_entry->lo.dst >> 8;
  85.258 +
  85.259 +    msg->data =
  85.260 +        MSI_DATA_TRIGGER_EDGE |
  85.261 +        MSI_DATA_LEVEL_ASSERT |
  85.262 +        ((iremap_entry->lo.dlm != dest_LowestPrio) ?
  85.263 +            MSI_DATA_DELIVERY_FIXED:
  85.264 +            MSI_DATA_DELIVERY_LOWPRI) |
  85.265 +        iremap_entry->lo.vector;
  85.266 +
  85.267 +    unmap_vtd_domain_page(iremap_entries);
  85.268 +    spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
  85.269 +}
  85.270 +
  85.271 +static void msi_msg_to_remap_entry(
  85.272 +    struct iommu *iommu, struct pci_dev *pdev, struct msi_msg *msg)
  85.273 +{
  85.274 +    struct iremap_entry *iremap_entry = NULL, *iremap_entries;
  85.275 +    struct iremap_entry new_ire;
  85.276 +    struct msi_msg_remap_entry *remap_rte;
  85.277 +    unsigned int index;
  85.278 +    unsigned long flags;
  85.279 +    struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
  85.280 +    int i = 0;
  85.281 +
  85.282 +    remap_rte = (struct msi_msg_remap_entry *) msg;
  85.283 +    spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
  85.284 +
  85.285 +    iremap_entries =
  85.286 +        (struct iremap_entry *)map_vtd_domain_page(ir_ctrl->iremap_maddr);
  85.287 +
  85.288 +    /* If the entry for a PCI device has been there, use the old entry,
  85.289 +     * Or, assign a new entry for it.
  85.290 +     */
  85.291 +    for ( i = 0; i <= ir_ctrl->iremap_index; i++ )
  85.292 +    {
  85.293 +        iremap_entry = &iremap_entries[i];
  85.294 +        if ( iremap_entry->hi.sid ==
  85.295 +             ((pdev->bus << 8) | pdev->devfn) )
  85.296 +           break;
  85.297 +    }
  85.298 +
  85.299 +    if ( i > ir_ctrl->iremap_index )
  85.300 +    {
  85.301 +    	ir_ctrl->iremap_index++;
  85.302 +        index = ir_ctrl->iremap_index;
  85.303 +    }
  85.304 +    else
  85.305 +        index = i;
  85.306 +
  85.307 +    if ( index > IREMAP_ENTRY_NR - 1 )
  85.308 +        panic("msi_msg_to_remap_entry: intremap index is more than 256!\n");
  85.309  
  85.310 -    /* write new entry to ioapic */
  85.311 -    *IO_APIC_BASE(apic) = reg;
  85.312 -    *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
  85.313 -    *IO_APIC_BASE(apic) = reg + 1;
  85.314 -    *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+1);
  85.315 +    iremap_entry = &iremap_entries[index];
  85.316 +    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
  85.317 +
  85.318 +    /* Set interrupt remapping table entry */
  85.319 +    new_ire.lo.fpd = 0;
  85.320 +    new_ire.lo.dm = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
  85.321 +    new_ire.lo.rh = 0;
  85.322 +    new_ire.lo.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
  85.323 +    new_ire.lo.dlm = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
  85.324 +    new_ire.lo.avail = 0;
  85.325 +    new_ire.lo.res_1 = 0;
  85.326 +    new_ire.lo.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
  85.327 +                        MSI_DATA_VECTOR_MASK;
  85.328 +    new_ire.lo.res_2 = 0;
  85.329 +    new_ire.lo.dst = ((msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT)
  85.330 +                      & 0xff) << 8;
  85.331 +
  85.332 +    new_ire.hi.sid = (pdev->bus << 8) | pdev->devfn;
  85.333 +    new_ire.hi.sq = 0;
  85.334 +    new_ire.hi.svt = 1;
  85.335 +    new_ire.hi.res_1 = 0;
  85.336 +    new_ire.lo.p = 1;    /* finally, set present bit */
  85.337 +
  85.338 +    /* now construct new MSI/MSI-X rte entry */
  85.339 +    remap_rte->address_lo.dontcare = 0;
  85.340 +    remap_rte->address_lo.index_15 = index & 0x8000;
  85.341 +    remap_rte->address_lo.index_0_14 = index & 0x7fff;
  85.342 +    remap_rte->address_lo.SHV = 1;
  85.343 +    remap_rte->address_lo.format = 1;
  85.344 +
  85.345 +    remap_rte->address_hi = 0;
  85.346 +    remap_rte->data = 0;
  85.347 +
  85.348 +    memcpy(iremap_entry, &new_ire, sizeof(struct iremap_entry));
  85.349 +    iommu_flush_iec_index(iommu, 0, index);
  85.350 +    invalidate_sync(iommu);
  85.351 +
  85.352 +    unmap_vtd_domain_page(iremap_entries);
  85.353 +    spin_unlock_irqrestore(&ir_ctrl->iremap_lock, flags);
  85.354 +    return;
  85.355 +}
  85.356 +
  85.357 +void msi_msg_read_remap_rte(
  85.358 +    struct msi_desc *msi_desc, struct msi_msg *msg)
  85.359 +{
  85.360 +    struct pci_dev *pdev = msi_desc->dev;
  85.361 +    struct acpi_drhd_unit *drhd = NULL;
  85.362 +    struct iommu *iommu = NULL;
  85.363 +    struct ir_ctrl *ir_ctrl;
  85.364 +
  85.365 +    drhd = acpi_find_matched_drhd_unit(pdev);
  85.366 +    iommu = drhd->iommu;
  85.367 +
  85.368 +    ir_ctrl = iommu_ir_ctrl(iommu);
  85.369 +    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
  85.370 +        return;
  85.371 +
  85.372 +    remap_entry_to_msi_msg(iommu, msg);
  85.373 +}
  85.374 +
  85.375 +void msi_msg_write_remap_rte(
  85.376 +    struct msi_desc *msi_desc, struct msi_msg *msg)
  85.377 +{
  85.378 +    struct pci_dev *pdev = msi_desc->dev;
  85.379 +    struct acpi_drhd_unit *drhd = NULL;
  85.380 +    struct iommu *iommu = NULL;
  85.381 +    struct ir_ctrl *ir_ctrl;
  85.382 +
  85.383 +    drhd = acpi_find_matched_drhd_unit(msi_desc->dev);
  85.384 +    iommu = drhd->iommu;
  85.385 +
  85.386 +    ir_ctrl = iommu_ir_ctrl(iommu);
  85.387 +    if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 )
  85.388 +        return;
  85.389 +
  85.390 +    msi_msg_to_remap_entry(iommu, pdev, msg);
  85.391  }
  85.392  
  85.393  int intremap_setup(struct iommu *iommu)
  85.394 @@ -260,6 +439,7 @@ int intremap_setup(struct iommu *iommu)
  85.395                      "Cannot allocate memory for ir_ctrl->iremap_maddr\n");
  85.396              return -ENODEV;
  85.397          }
  85.398 +        ir_ctrl->iremap_index = -1;
  85.399      }
  85.400  
  85.401  #if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT)
    86.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Mon Jun 02 11:35:02 2008 +0900
    86.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Mon Jun 02 11:35:39 2008 +0900
    86.3 @@ -112,28 +112,27 @@ struct iommu_flush *iommu_get_flush(stru
    86.4      return iommu ? &iommu->intel->flush : NULL;
    86.5  }
    86.6  
    86.7 -unsigned int clflush_size;
    86.8 -void clflush_cache_range(void *adr, int size)
    86.9 +static unsigned int clflush_size;
   86.10 +static int iommus_incoherent;
   86.11 +static void __iommu_flush_cache(void *addr, int size)
   86.12  {
   86.13      int i;
   86.14 +
   86.15 +    if ( !iommus_incoherent )
   86.16 +        return;
   86.17 +
   86.18      for ( i = 0; i < size; i += clflush_size )
   86.19 -        clflush(adr + i);
   86.20 +        clflush((char *)addr + i);
   86.21  }
   86.22  
   86.23 -static void __iommu_flush_cache(struct iommu *iommu, void *addr, int size)
   86.24 +void iommu_flush_cache_entry(void *addr)
   86.25  {
   86.26 -    if ( !ecap_coherent(iommu->ecap) )
   86.27 -        clflush_cache_range(addr, size);
   86.28 +    __iommu_flush_cache(addr, 8);
   86.29  }
   86.30  
   86.31 -void iommu_flush_cache_entry(struct iommu *iommu, void *addr)
   86.32 +void iommu_flush_cache_page(void *addr)
   86.33  {
   86.34 -    __iommu_flush_cache(iommu, addr, 8);
   86.35 -}
   86.36 -
   86.37 -void iommu_flush_cache_page(struct iommu *iommu, void *addr)
   86.38 -{
   86.39 -    __iommu_flush_cache(iommu, addr, PAGE_SIZE_4K);
   86.40 +    __iommu_flush_cache(addr, PAGE_SIZE_4K);
   86.41  }
   86.42  
   86.43  int nr_iommus;
   86.44 @@ -157,7 +156,7 @@ static u64 bus_to_context_maddr(struct i
   86.45          }
   86.46          set_root_value(*root, maddr);
   86.47          set_root_present(*root);
   86.48 -        iommu_flush_cache_entry(iommu, root);
   86.49 +        iommu_flush_cache_entry(root);
   86.50      }
   86.51      maddr = (u64) get_context_addr(*root);
   86.52      unmap_vtd_domain_page(root_entries);
   86.53 @@ -191,30 +190,22 @@ static int device_context_mapped(struct 
   86.54      return ret;
   86.55  }
   86.56  
   86.57 -static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr)
   86.58 +static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
   86.59  {
   86.60      struct hvm_iommu *hd = domain_hvm_iommu(domain);
   86.61 -    struct acpi_drhd_unit *drhd;
   86.62 -    struct iommu *iommu;
   86.63      int addr_width = agaw_to_width(hd->agaw);
   86.64      struct dma_pte *parent, *pte = NULL;
   86.65      int level = agaw_to_level(hd->agaw);
   86.66      int offset;
   86.67      unsigned long flags;
   86.68 -    u64 pte_maddr = 0;
   86.69 +    u64 pte_maddr = 0, maddr;
   86.70      u64 *vaddr = NULL;
   86.71  
   86.72 -    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   86.73 -    iommu = drhd->iommu;
   86.74 -
   86.75      addr &= (((u64)1) << addr_width) - 1;
   86.76      spin_lock_irqsave(&hd->mapping_lock, flags);
   86.77      if ( hd->pgd_maddr == 0 )
   86.78 -    {
   86.79 -        hd->pgd_maddr = alloc_pgtable_maddr();
   86.80 -        if ( hd->pgd_maddr == 0 )
   86.81 -            return 0;
   86.82 -    }
   86.83 +        if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr()) == 0) )
   86.84 +            goto out;
   86.85  
   86.86      parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
   86.87      while ( level > 1 )
   86.88 @@ -224,7 +215,9 @@ static u64 addr_to_dma_page_maddr(struct
   86.89  
   86.90          if ( dma_pte_addr(*pte) == 0 )
   86.91          {
   86.92 -            u64 maddr = alloc_pgtable_maddr();
   86.93 +            if ( !alloc )
   86.94 +                break;
   86.95 +            maddr = alloc_pgtable_maddr();
   86.96              dma_set_pte_addr(*pte, maddr);
   86.97              vaddr = map_vtd_domain_page(maddr);
   86.98              if ( !vaddr )
   86.99 @@ -236,7 +229,7 @@ static u64 addr_to_dma_page_maddr(struct
  86.100               */
  86.101              dma_set_pte_readable(*pte);
  86.102              dma_set_pte_writable(*pte);
  86.103 -            iommu_flush_cache_entry(iommu, pte);
  86.104 +            iommu_flush_cache_entry(pte);
  86.105          }
  86.106          else
  86.107          {
  86.108 @@ -259,45 +252,11 @@ static u64 addr_to_dma_page_maddr(struct
  86.109      }
  86.110  
  86.111      unmap_vtd_domain_page(parent);
  86.112 + out:
  86.113      spin_unlock_irqrestore(&hd->mapping_lock, flags);
  86.114      return pte_maddr;
  86.115  }
  86.116  
  86.117 -/* return address's page at specific level */
  86.118 -static u64 dma_addr_level_page_maddr(
  86.119 -    struct domain *domain, u64 addr, int level)
  86.120 -{
  86.121 -    struct hvm_iommu *hd = domain_hvm_iommu(domain);
  86.122 -    struct dma_pte *parent, *pte = NULL;
  86.123 -    int total = agaw_to_level(hd->agaw);
  86.124 -    int offset;
  86.125 -    u64 pg_maddr = hd->pgd_maddr;
  86.126 -
  86.127 -    if ( pg_maddr == 0 )
  86.128 -        return 0;
  86.129 -
  86.130 -    parent = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
  86.131 -    while ( level <= total )
  86.132 -    {
  86.133 -        offset = address_level_offset(addr, total);
  86.134 -        pte = &parent[offset];
  86.135 -        if ( dma_pte_addr(*pte) == 0 )
  86.136 -            break;
  86.137 -
  86.138 -        pg_maddr = pte->val & PAGE_MASK_4K;
  86.139 -        unmap_vtd_domain_page(parent);
  86.140 -
  86.141 -        if ( level == total )
  86.142 -            return pg_maddr;
  86.143 -
  86.144 -        parent = map_vtd_domain_page(pte->val);
  86.145 -        total--;
  86.146 -    }
  86.147 -
  86.148 -    unmap_vtd_domain_page(parent);
  86.149 -    return 0;
  86.150 -}
  86.151 -
  86.152  static void iommu_flush_write_buffer(struct iommu *iommu)
  86.153  {
  86.154      u32 val;
  86.155 @@ -485,9 +444,12 @@ static int flush_iotlb_reg(void *_iommu,
  86.156      /* check IOTLB invalidation granularity */
  86.157      if ( DMA_TLB_IAIG(val) == 0 )
  86.158          printk(KERN_ERR VTDPREFIX "IOMMU: flush IOTLB failed\n");
  86.159 +
  86.160 +#ifdef VTD_DEBUG
  86.161      if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
  86.162          printk(KERN_ERR VTDPREFIX "IOMMU: tlb flush request %x, actual %x\n",
  86.163                 (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
  86.164 +#endif
  86.165      /* flush context entry will implictly flush write buffer */
  86.166      return 0;
  86.167  }
  86.168 @@ -572,34 +534,36 @@ void iommu_flush_all(void)
  86.169  /* clear one page's page table */
  86.170  static void dma_pte_clear_one(struct domain *domain, u64 addr)
  86.171  {
  86.172 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
  86.173      struct acpi_drhd_unit *drhd;
  86.174      struct iommu *iommu;
  86.175      struct dma_pte *page = NULL, *pte = NULL;
  86.176      u64 pg_maddr;
  86.177  
  86.178 -    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  86.179 -
  86.180      /* get last level pte */
  86.181 -    pg_maddr = dma_addr_level_page_maddr(domain, addr, 1);
  86.182 +    pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
  86.183      if ( pg_maddr == 0 )
  86.184          return;
  86.185      page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
  86.186      pte = page + address_level_offset(addr, 1);
  86.187 -    if ( pte )
  86.188 +
  86.189 +    if ( !dma_pte_present(*pte) )
  86.190      {
  86.191 -        dma_clear_pte(*pte);
  86.192 -        iommu_flush_cache_entry(drhd->iommu, pte);
  86.193 +        unmap_vtd_domain_page(page);
  86.194 +        return;
  86.195 +    }
  86.196  
  86.197 -        for_each_drhd_unit ( drhd )
  86.198 -        {
  86.199 -            iommu = drhd->iommu;
  86.200 -            if ( cap_caching_mode(iommu->cap) )
  86.201 -                iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
  86.202 -                                      addr, 1, 0);
  86.203 -            else if (cap_rwbf(iommu->cap))
  86.204 -                iommu_flush_write_buffer(iommu);
  86.205 -        }
  86.206 +    dma_clear_pte(*pte); 
  86.207 +    iommu_flush_cache_entry(pte);
  86.208 +
  86.209 +    for_each_drhd_unit ( drhd )
  86.210 +    {
  86.211 +        iommu = drhd->iommu;
  86.212 +        if ( test_bit(iommu->index, &hd->iommu_bitmap) )
  86.213 +            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
  86.214 +                                  addr, 1, 0);
  86.215      }
  86.216 +
  86.217      unmap_vtd_domain_page(page);
  86.218  }
  86.219  
  86.220 @@ -626,7 +590,6 @@ static void dma_pte_clear_range(struct d
  86.221  static void iommu_free_next_pagetable(u64 pt_maddr, unsigned long index,
  86.222                                        int level)
  86.223  {
  86.224 -    struct acpi_drhd_unit *drhd;
  86.225      unsigned long next_index;
  86.226      struct dma_pte *pt_vaddr, *pde;
  86.227      int next_level;
  86.228 @@ -636,50 +599,38 @@ static void iommu_free_next_pagetable(u6
  86.229  
  86.230      pt_vaddr = (struct dma_pte *)map_vtd_domain_page(pt_maddr);
  86.231      pde = &pt_vaddr[index];
  86.232 -    if ( dma_pte_addr(*pde) != 0 )
  86.233 +    if ( dma_pte_addr(*pde) == 0 )
  86.234 +        goto out;
  86.235 +
  86.236 +    next_level = level - 1;
  86.237 +    if ( next_level > 1 )
  86.238      {
  86.239 -        next_level = level - 1;
  86.240 -        if ( next_level > 1 )
  86.241 -        {
  86.242 -            next_index = 0;
  86.243 -            do
  86.244 -            {
  86.245 -                iommu_free_next_pagetable(pde->val,
  86.246 -                                          next_index, next_level);
  86.247 -                next_index++;
  86.248 -            } while ( next_index < PTE_NUM );
  86.249 -        }
  86.250 +        for ( next_index = 0; next_index < PTE_NUM; next_index++ )
  86.251 +            iommu_free_next_pagetable(pde->val, next_index, next_level);
  86.252 +    }
  86.253  
  86.254 -        dma_clear_pte(*pde);
  86.255 -        drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  86.256 -        iommu_flush_cache_entry(drhd->iommu, pde);
  86.257 -        free_pgtable_maddr(pde->val);
  86.258 -        unmap_vtd_domain_page(pt_vaddr);
  86.259 -    }
  86.260 -    else
  86.261 -        unmap_vtd_domain_page(pt_vaddr);
  86.262 +    dma_clear_pte(*pde);
  86.263 +    iommu_flush_cache_entry(pde);
  86.264 +    free_pgtable_maddr(pde->val);
  86.265 +
  86.266 + out:
  86.267 +    unmap_vtd_domain_page(pt_vaddr);
  86.268  }
  86.269  
  86.270  /* free all VT-d page tables when shut down or destroy domain. */
  86.271  static void iommu_free_pagetable(struct domain *domain)
  86.272  {
  86.273 -    unsigned long index;
  86.274      struct hvm_iommu *hd = domain_hvm_iommu(domain);
  86.275 -    int total_level = agaw_to_level(hd->agaw);
  86.276 +    int i, total_level = agaw_to_level(hd->agaw);
  86.277 +
  86.278 +    if ( hd->pgd_maddr == 0 )
  86.279 +        return;
  86.280  
  86.281 -    if ( hd->pgd_maddr != 0 )
  86.282 -    {
  86.283 -        index = 0;
  86.284 -        do
  86.285 -        {
  86.286 -            iommu_free_next_pagetable(hd->pgd_maddr,
  86.287 -                                      index, total_level + 1);
  86.288 -            index++;
  86.289 -        } while ( index < PTE_NUM );
  86.290 +    for ( i = 0; i < PTE_NUM; i++ )
  86.291 +        iommu_free_next_pagetable(hd->pgd_maddr, i, total_level + 1);
  86.292  
  86.293 -        free_pgtable_maddr(hd->pgd_maddr);
  86.294 -        hd->pgd_maddr = 0;
  86.295 -    }
  86.296 +    free_pgtable_maddr(hd->pgd_maddr);
  86.297 +    hd->pgd_maddr = 0;
  86.298  }
  86.299  
  86.300  static int iommu_set_root_entry(struct iommu *iommu)
  86.301 @@ -777,16 +728,17 @@ int iommu_disable_translation(struct iom
  86.302  
  86.303  static struct iommu *vector_to_iommu[NR_VECTORS];
  86.304  static int iommu_page_fault_do_one(struct iommu *iommu, int type,
  86.305 -                                   u8 fault_reason, u16 source_id, u32 addr)
  86.306 +                                   u8 fault_reason, u16 source_id, u64 addr)
  86.307  {
  86.308      dprintk(XENLOG_WARNING VTDPREFIX,
  86.309 -            "iommu_fault:%s: %x:%x.%x addr %x REASON %x iommu->reg = %p\n",
  86.310 +            "iommu_fault:%s: %x:%x.%x addr %"PRIx64" REASON %x "
  86.311 +            "iommu->reg = %p\n",
  86.312              (type ? "DMA Read" : "DMA Write"), (source_id >> 8),
  86.313              PCI_SLOT(source_id & 0xFF), PCI_FUNC(source_id & 0xFF), addr,
  86.314              fault_reason, iommu->reg);
  86.315  
  86.316      if ( fault_reason < 0x20 )
  86.317 -        print_vtd_entries(current->domain, iommu, (source_id >> 8),
  86.318 +        print_vtd_entries(iommu, (source_id >> 8),
  86.319                            (source_id & 0xff), (addr >> PAGE_SHIFT));
  86.320  
  86.321      return 0;
  86.322 @@ -844,7 +796,8 @@ static void iommu_page_fault(int vector,
  86.323      {
  86.324          u8 fault_reason;
  86.325          u16 source_id;
  86.326 -        u32 guest_addr, data;
  86.327 +        u32 data;
  86.328 +        u64 guest_addr;
  86.329          int type;
  86.330  
  86.331          /* highest 32 bits */
  86.332 @@ -998,6 +951,8 @@ int iommu_set_interrupt(struct iommu *io
  86.333  static int iommu_alloc(struct acpi_drhd_unit *drhd)
  86.334  {
  86.335      struct iommu *iommu;
  86.336 +    unsigned long sagaw;
  86.337 +    int agaw;
  86.338  
  86.339      if ( nr_iommus > MAX_IOMMUS )
  86.340      {
  86.341 @@ -1020,11 +975,28 @@ static int iommu_alloc(struct acpi_drhd_
  86.342  
  86.343      set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
  86.344      iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
  86.345 -    nr_iommus++;
  86.346 +    iommu->index = nr_iommus++;
  86.347  
  86.348      iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
  86.349      iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
  86.350  
  86.351 +    /* Calculate number of pagetable levels: between 2 and 4. */
  86.352 +    sagaw = cap_sagaw(iommu->cap);
  86.353 +    for ( agaw = level_to_agaw(4); agaw >= 0; agaw-- )
  86.354 +        if ( test_bit(agaw, &sagaw) )
  86.355 +            break;
  86.356 +    if ( agaw < 0 )
  86.357 +    {
  86.358 +        gdprintk(XENLOG_ERR VTDPREFIX,
  86.359 +                 "IOMMU: unsupported sagaw %lx\n", sagaw);
  86.360 +        xfree(iommu);
  86.361 +        return -ENODEV;
  86.362 +    }
  86.363 +    iommu->nr_pt_levels = agaw_to_level(agaw);
  86.364 +
  86.365 +    if ( !ecap_coherent(iommu->ecap) )
  86.366 +        iommus_incoherent = 1;
  86.367 +
  86.368      spin_lock_init(&iommu->lock);
  86.369      spin_lock_init(&iommu->register_lock);
  86.370  
  86.371 @@ -1066,9 +1038,7 @@ static int intel_iommu_domain_init(struc
  86.372  {
  86.373      struct hvm_iommu *hd = domain_hvm_iommu(d);
  86.374      struct iommu *iommu = NULL;
  86.375 -    int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
  86.376 -    int i, adjust_width, agaw;
  86.377 -    unsigned long sagaw;
  86.378 +    u64 i;
  86.379      struct acpi_drhd_unit *drhd;
  86.380  
  86.381      INIT_LIST_HEAD(&hd->pdev_list);
  86.382 @@ -1076,28 +1046,25 @@ static int intel_iommu_domain_init(struc
  86.383      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  86.384      iommu = drhd->iommu;
  86.385  
  86.386 -    /* Calculate AGAW. */
  86.387 -    if ( guest_width > cap_mgaw(iommu->cap) )
  86.388 -        guest_width = cap_mgaw(iommu->cap);
  86.389 -    adjust_width = guestwidth_to_adjustwidth(guest_width);
  86.390 -    agaw = width_to_agaw(adjust_width);
  86.391 -    /* FIXME: hardware doesn't support it, choose a bigger one? */
  86.392 -    sagaw = cap_sagaw(iommu->cap);
  86.393 -    if ( !test_bit(agaw, &sagaw) )
  86.394 -    {
  86.395 -        gdprintk(XENLOG_ERR VTDPREFIX,
  86.396 -                 "IOMMU: hardware doesn't support the agaw\n");
  86.397 -        agaw = find_next_bit(&sagaw, 5, agaw);
  86.398 -        if ( agaw >= 5 )
  86.399 -            return -ENODEV;
  86.400 -    }
  86.401 -    hd->agaw = agaw;
  86.402 +    hd->agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
  86.403  
  86.404      if ( d->domain_id == 0 )
  86.405      {
  86.406 -        /* Set up 1:1 page table for dom0. */
  86.407 +        extern int xen_in_range(paddr_t start, paddr_t end);
  86.408 +        extern int tboot_in_range(paddr_t start, paddr_t end);
  86.409 +
  86.410 +        /* 
  86.411 +         * Set up 1:1 page table for dom0 except the critical segments
  86.412 +         * like Xen and tboot.
  86.413 +         */
  86.414          for ( i = 0; i < max_page; i++ )
  86.415 +        {
  86.416 +            if ( xen_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) ||
  86.417 +                 tboot_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) )
  86.418 +                continue;
  86.419 +
  86.420              iommu_map_page(d, i, i);
  86.421 +        }
  86.422  
  86.423          setup_dom0_devices(d);
  86.424          setup_dom0_rmrr(d);
  86.425 @@ -1123,7 +1090,8 @@ static int domain_context_mapping_one(
  86.426      struct hvm_iommu *hd = domain_hvm_iommu(domain);
  86.427      struct context_entry *context, *context_entries;
  86.428      unsigned long flags;
  86.429 -    u64 maddr;
  86.430 +    u64 maddr, pgd_maddr;
  86.431 +    int agaw;
  86.432  
  86.433      maddr = bus_to_context_maddr(iommu, bus);
  86.434      context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
  86.435 @@ -1136,38 +1104,64 @@ static int domain_context_mapping_one(
  86.436      }
  86.437  
  86.438      spin_lock_irqsave(&iommu->lock, flags);
  86.439 +
  86.440 +#ifdef CONTEXT_PASSTHRU
  86.441 +    if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
  86.442 +        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
  86.443 +    else
  86.444 +    {
  86.445 +#endif
  86.446 +        /* Ensure we have pagetables allocated down to leaf PTE. */
  86.447 +        if ( hd->pgd_maddr == 0 )
  86.448 +        {
  86.449 +            addr_to_dma_page_maddr(domain, 0, 1);
  86.450 +            if ( hd->pgd_maddr == 0 )
  86.451 +            {
  86.452 +            nomem:
  86.453 +                unmap_vtd_domain_page(context_entries);
  86.454 +                spin_unlock_irqrestore(&iommu->lock, flags);
  86.455 +                return -ENOMEM;
  86.456 +            }
  86.457 +        }
  86.458 +
  86.459 +        /* Skip top levels of page tables for 2- and 3-level DRHDs. */
  86.460 +        pgd_maddr = hd->pgd_maddr;
  86.461 +        for ( agaw = level_to_agaw(4);
  86.462 +              agaw != level_to_agaw(iommu->nr_pt_levels);
  86.463 +              agaw-- )
  86.464 +        {
  86.465 +            struct dma_pte *p = map_vtd_domain_page(pgd_maddr);
  86.466 +            pgd_maddr = dma_pte_addr(*p);
  86.467 +            unmap_vtd_domain_page(p);
  86.468 +            if ( pgd_maddr == 0 )
  86.469 +                goto nomem;
  86.470 +        }
  86.471 +
  86.472 +        context_set_address_root(*context, pgd_maddr);
  86.473 +        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
  86.474 +#ifdef CONTEXT_PASSTHRU
  86.475 +    }
  86.476 +#endif
  86.477 +
  86.478      /*
  86.479       * domain_id 0 is not valid on Intel's IOMMU, force domain_id to
  86.480       * be 1 based as required by intel's iommu hw.
  86.481       */
  86.482      context_set_domain_id(context, domain);
  86.483 -    context_set_address_width(*context, hd->agaw);
  86.484 -
  86.485 -    if ( ecap_pass_thru(iommu->ecap) )
  86.486 -        context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
  86.487 -#ifdef CONTEXT_PASSTHRU
  86.488 -    else
  86.489 -    {
  86.490 -#endif
  86.491 -        ASSERT(hd->pgd_maddr != 0);
  86.492 -        context_set_address_root(*context, hd->pgd_maddr);
  86.493 -        context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
  86.494 -#ifdef CONTEXT_PASSTHRU
  86.495 -    }
  86.496 -#endif
  86.497 -
  86.498 +    context_set_address_width(*context, agaw);
  86.499      context_set_fault_enable(*context);
  86.500      context_set_present(*context);
  86.501 -    iommu_flush_cache_entry(iommu, context);
  86.502 +    iommu_flush_cache_entry(context);
  86.503  
  86.504      unmap_vtd_domain_page(context_entries);
  86.505  
  86.506 -    if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
  86.507 -                                    (((u16)bus) << 8) | devfn,
  86.508 -                                    DMA_CCMD_MASK_NOBIT, 1) )
  86.509 +    /* Context entry was previously non-present (with domid 0). */
  86.510 +    iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
  86.511 +                               DMA_CCMD_MASK_NOBIT, 1);
  86.512 +    if ( iommu_flush_iotlb_dsi(iommu, 0, 1) )
  86.513          iommu_flush_write_buffer(iommu);
  86.514 -    else
  86.515 -        iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
  86.516 +
  86.517 +    set_bit(iommu->index, &hd->iommu_bitmap);
  86.518      spin_unlock_irqrestore(&iommu->lock, flags);
  86.519  
  86.520      return 0;
  86.521 @@ -1314,7 +1308,7 @@ static int domain_context_unmap_one(
  86.522      spin_lock_irqsave(&iommu->lock, flags);
  86.523      context_clear_present(*context);
  86.524      context_clear_entry(*context);
  86.525 -    iommu_flush_cache_entry(iommu, context);
  86.526 +    iommu_flush_cache_entry(context);
  86.527      iommu_flush_context_global(iommu, 0);
  86.528      iommu_flush_iotlb_global(iommu, 0);
  86.529      unmap_vtd_domain_page(context_entries);
  86.530 @@ -1395,11 +1389,12 @@ void reassign_device_ownership(
  86.531  {
  86.532      struct hvm_iommu *source_hd = domain_hvm_iommu(source);
  86.533      struct hvm_iommu *target_hd = domain_hvm_iommu(target);
  86.534 -    struct pci_dev *pdev;
  86.535 +    struct pci_dev *pdev, *pdev2;
  86.536      struct acpi_drhd_unit *drhd;
  86.537      struct iommu *iommu;
  86.538      int status;
  86.539      unsigned long flags;
  86.540 +    int found = 0;
  86.541  
  86.542      pdev_flr(bus, devfn);
  86.543  
  86.544 @@ -1421,6 +1416,18 @@ void reassign_device_ownership(
  86.545      spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
  86.546      spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
  86.547  
  86.548 +    for_each_pdev ( source, pdev2 )
  86.549 +    {
  86.550 +        drhd = acpi_find_matched_drhd_unit(pdev2);
  86.551 +        if ( drhd->iommu == iommu )
  86.552 +        {
  86.553 +            found = 1;
  86.554 +            break;
  86.555 +        }
  86.556 +    }
  86.557 +    if ( !found )
  86.558 +        clear_bit(iommu->index, &source_hd->iommu_bitmap);
  86.559 +
  86.560      status = domain_context_mapping(target, iommu, pdev);
  86.561      if ( status != 0 )
  86.562          gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
  86.563 @@ -1477,13 +1484,12 @@ static int domain_context_mapped(struct 
  86.564  int intel_iommu_map_page(
  86.565      struct domain *d, unsigned long gfn, unsigned long mfn)
  86.566  {
  86.567 +    struct hvm_iommu *hd = domain_hvm_iommu(d);
  86.568      struct acpi_drhd_unit *drhd;
  86.569      struct iommu *iommu;
  86.570      struct dma_pte *page = NULL, *pte = NULL;
  86.571      u64 pg_maddr;
  86.572 -
  86.573 -    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  86.574 -    iommu = drhd->iommu;
  86.575 +    int pte_present;
  86.576  
  86.577  #ifdef CONTEXT_PASSTHRU
  86.578      /* do nothing if dom0 and iommu supports pass thru */
  86.579 @@ -1491,23 +1497,27 @@ int intel_iommu_map_page(
  86.580          return 0;
  86.581  #endif
  86.582  
  86.583 -    pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K);
  86.584 +    pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
  86.585      if ( pg_maddr == 0 )
  86.586          return -ENOMEM;
  86.587      page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
  86.588      pte = page + (gfn & LEVEL_MASK);
  86.589 +    pte_present = dma_pte_present(*pte);
  86.590      dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
  86.591      dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
  86.592 -    iommu_flush_cache_entry(iommu, pte);
  86.593 +    iommu_flush_cache_entry(pte);
  86.594      unmap_vtd_domain_page(page);
  86.595  
  86.596      for_each_drhd_unit ( drhd )
  86.597      {
  86.598          iommu = drhd->iommu;
  86.599 -        if ( cap_caching_mode(iommu->cap) )
  86.600 -            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
  86.601 -                                  (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
  86.602 -        else if ( cap_rwbf(iommu->cap) )
  86.603 +
  86.604 +        if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
  86.605 +            continue;
  86.606 +
  86.607 +        if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
  86.608 +                                   (paddr_t)gfn << PAGE_SHIFT_4K, 1,
  86.609 +                                   !pte_present) )
  86.610              iommu_flush_write_buffer(iommu);
  86.611      }
  86.612  
  86.613 @@ -1536,6 +1546,7 @@ int intel_iommu_unmap_page(struct domain
  86.614  int iommu_page_mapping(struct domain *domain, paddr_t iova,
  86.615                         paddr_t hpa, size_t size, int prot)
  86.616  {
  86.617 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
  86.618      struct acpi_drhd_unit *drhd;
  86.619      struct iommu *iommu;
  86.620      u64 start_pfn, end_pfn;
  86.621 @@ -1543,24 +1554,23 @@ int iommu_page_mapping(struct domain *do
  86.622      int index;
  86.623      u64 pg_maddr;
  86.624  
  86.625 -    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  86.626 -    iommu = drhd->iommu;
  86.627      if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
  86.628          return -EINVAL;
  86.629 +
  86.630      iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
  86.631      start_pfn = hpa >> PAGE_SHIFT_4K;
  86.632      end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
  86.633      index = 0;
  86.634      while ( start_pfn < end_pfn )
  86.635      {
  86.636 -        pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K * index);
  86.637 +        pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K*index, 1);
  86.638          if ( pg_maddr == 0 )
  86.639              return -ENOMEM;
  86.640          page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
  86.641          pte = page + (start_pfn & LEVEL_MASK);
  86.642          dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
  86.643          dma_set_pte_prot(*pte, prot);
  86.644 -        iommu_flush_cache_entry(iommu, pte);
  86.645 +        iommu_flush_cache_entry(pte);
  86.646          unmap_vtd_domain_page(page);
  86.647          start_pfn++;
  86.648          index++;
  86.649 @@ -1569,10 +1579,12 @@ int iommu_page_mapping(struct domain *do
  86.650      for_each_drhd_unit ( drhd )
  86.651      {
  86.652          iommu = drhd->iommu;
  86.653 -        if ( cap_caching_mode(iommu->cap) )
  86.654 -            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
  86.655 -                                  iova, index, 0);
  86.656 -        else if ( cap_rwbf(iommu->cap) )
  86.657 +
  86.658 +        if ( !test_bit(iommu->index, &hd->iommu_bitmap) )
  86.659 +            continue;
  86.660 +
  86.661 +        if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
  86.662 +                                   iova, index, 1) )
  86.663              iommu_flush_write_buffer(iommu);
  86.664      }
  86.665  
  86.666 @@ -1586,25 +1598,6 @@ int iommu_page_unmapping(struct domain *
  86.667      return 0;
  86.668  }
  86.669  
  86.670 -void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry)
  86.671 -{
  86.672 -    struct acpi_drhd_unit *drhd;
  86.673 -    struct iommu *iommu = NULL;
  86.674 -    struct dma_pte *pte = (struct dma_pte *) p2m_entry;
  86.675 -
  86.676 -    for_each_drhd_unit ( drhd )
  86.677 -    {
  86.678 -        iommu = drhd->iommu;
  86.679 -        if ( cap_caching_mode(iommu->cap) )
  86.680 -            iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
  86.681 -                                  (paddr_t)gfn << PAGE_SHIFT_4K, 1, 0);
  86.682 -        else if ( cap_rwbf(iommu->cap) )
  86.683 -            iommu_flush_write_buffer(iommu);
  86.684 -    }
  86.685 -
  86.686 -    iommu_flush_cache_entry(iommu, pte);
  86.687 -}
  86.688 -
  86.689  static int iommu_prepare_rmrr_dev(
  86.690      struct domain *d,
  86.691      struct acpi_rmrr_unit *rmrr,
  86.692 @@ -1916,6 +1909,7 @@ struct iommu_ops intel_iommu_ops = {
  86.693      .map_page = intel_iommu_map_page,
  86.694      .unmap_page = intel_iommu_unmap_page,
  86.695      .reassign_device = reassign_device_ownership,
  86.696 +    .get_device_group_id = NULL,
  86.697  };
  86.698  
  86.699  /*
    87.1 --- a/xen/drivers/passthrough/vtd/iommu.h	Mon Jun 02 11:35:02 2008 +0900
    87.2 +++ b/xen/drivers/passthrough/vtd/iommu.h	Mon Jun 02 11:35:39 2008 +0900
    87.3 @@ -236,6 +236,7 @@ struct context_entry {
    87.4  #define LEVEL_STRIDE       (9)
    87.5  #define LEVEL_MASK         ((1 << LEVEL_STRIDE) - 1)
    87.6  #define PTE_NUM            (1 << LEVEL_STRIDE)
    87.7 +#define level_to_agaw(val) ((val) - 2)
    87.8  #define agaw_to_level(val) ((val) + 2)
    87.9  #define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
   87.10  #define width_to_agaw(w)   ((w - 30)/LEVEL_STRIDE)
    88.1 --- a/xen/drivers/passthrough/vtd/utils.c	Mon Jun 02 11:35:02 2008 +0900
    88.2 +++ b/xen/drivers/passthrough/vtd/utils.c	Mon Jun 02 11:35:39 2008 +0900
    88.3 @@ -213,109 +213,97 @@ u32 get_level_index(unsigned long gmfn, 
    88.4      return gmfn & LEVEL_MASK;
    88.5  }
    88.6  
    88.7 -void print_vtd_entries(
    88.8 -    struct domain *d,
    88.9 -    struct iommu *iommu,
   88.10 -    int bus, int devfn,
   88.11 -    unsigned long gmfn)
   88.12 +void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn)
   88.13  {
   88.14 -    struct hvm_iommu *hd = domain_hvm_iommu(d);
   88.15 -    struct acpi_drhd_unit *drhd;
   88.16      struct context_entry *ctxt_entry;
   88.17      struct root_entry *root_entry;
   88.18      struct dma_pte pte;
   88.19      u64 *l;
   88.20 -    u32 l_index;
   88.21 -    u32 i = 0;
   88.22 -    int level = agaw_to_level(hd->agaw);
   88.23 +    u32 l_index, level;
   88.24 +
   88.25 +    printk("print_vtd_entries: iommu = %p bdf = %x:%x:%x gmfn = %"PRIx64"\n",
   88.26 +           iommu, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn);
   88.27  
   88.28 -    printk("print_vtd_entries: domain_id = %x bdf = %x:%x:%x gmfn = %lx\n",
   88.29 -           d->domain_id, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), gmfn);
   88.30 +    if ( iommu->root_maddr == 0 )
   88.31 +    {
   88.32 +        printk("    iommu->root_maddr = 0\n");
   88.33 +        return;
   88.34 +    }
   88.35  
   88.36 -    if ( hd->pgd_maddr == 0 )
   88.37 +    root_entry = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
   88.38 + 
   88.39 +    printk("    root_entry = %p\n", root_entry);
   88.40 +    printk("    root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val);
   88.41 +    if ( !root_present(root_entry[bus]) )
   88.42      {
   88.43 -        printk("    hd->pgd_maddr == 0\n");
   88.44 +        unmap_vtd_domain_page(root_entry);
   88.45 +        printk("    root_entry[%x] not present\n", bus);
   88.46          return;
   88.47      }
   88.48 -    printk("    hd->pgd_maddr = %"PRIx64"\n", hd->pgd_maddr);
   88.49  
   88.50 -    for_each_drhd_unit ( drhd )
   88.51 +    ctxt_entry =
   88.52 +        (struct context_entry *)map_vtd_domain_page(root_entry[bus].val);
   88.53 +    if ( ctxt_entry == NULL )
   88.54      {
   88.55 -        printk("---- print_vtd_entries %d ----\n", i++);
   88.56 -
   88.57 -        if ( iommu->root_maddr == 0 )
   88.58 -        {
   88.59 -            printk("    iommu->root_maddr = 0\n");
   88.60 -            continue;
   88.61 -        }
   88.62 +        unmap_vtd_domain_page(root_entry);
   88.63 +        printk("    ctxt_entry == NULL\n");
   88.64 +        return;
   88.65 +    }
   88.66  
   88.67 -        root_entry =
   88.68 -            (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
   88.69 - 
   88.70 -        printk("    root_entry = %p\n", root_entry);
   88.71 -        printk("    root_entry[%x] = %"PRIx64"\n", bus, root_entry[bus].val);
   88.72 -        if ( !root_present(root_entry[bus]) )
   88.73 -        {
   88.74 -            unmap_vtd_domain_page(root_entry);
   88.75 -            printk("    root_entry[%x] not present\n", bus);
   88.76 -            continue;
   88.77 -        }
   88.78 +    printk("    context = %p\n", ctxt_entry);
   88.79 +    printk("    context[%x] = %"PRIx64"_%"PRIx64"\n",
   88.80 +           devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
   88.81 +    if ( !context_present(ctxt_entry[devfn]) )
   88.82 +    {
   88.83 +        unmap_vtd_domain_page(ctxt_entry);
   88.84 +        unmap_vtd_domain_page(root_entry);
   88.85 +        printk("    ctxt_entry[%x] not present\n", devfn);
   88.86 +        return;
   88.87 +    }
   88.88  
   88.89 -        ctxt_entry =
   88.90 -            (struct context_entry *)map_vtd_domain_page(root_entry[bus].val);
   88.91 -        if ( ctxt_entry == NULL )
   88.92 -        {
   88.93 -            unmap_vtd_domain_page(root_entry);
   88.94 -            printk("    ctxt_entry == NULL\n");
   88.95 -            continue;
   88.96 -        }
   88.97 +    level = agaw_to_level(context_address_width(ctxt_entry[devfn]));
   88.98 +    if ( level != VTD_PAGE_TABLE_LEVEL_3 &&
   88.99 +         level != VTD_PAGE_TABLE_LEVEL_4)
  88.100 +    {
  88.101 +        unmap_vtd_domain_page(ctxt_entry);
  88.102 +        unmap_vtd_domain_page(root_entry);
  88.103 +        printk("Unsupported VTD page table level (%d)!\n", level);
  88.104 +    }
  88.105  
  88.106 -        printk("    context = %p\n", ctxt_entry);
  88.107 -        printk("    context[%x] = %"PRIx64" %"PRIx64"\n",
  88.108 -               devfn, ctxt_entry[devfn].hi, ctxt_entry[devfn].lo);
  88.109 -        if ( !context_present(ctxt_entry[devfn]) )
  88.110 +    l = maddr_to_virt(ctxt_entry[devfn].lo);
  88.111 +    do
  88.112 +    {
  88.113 +        l = (u64*)(((unsigned long)l >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
  88.114 +        printk("    l%d = %p\n", level, l);
  88.115 +        if ( l == NULL )
  88.116          {
  88.117              unmap_vtd_domain_page(ctxt_entry);
  88.118              unmap_vtd_domain_page(root_entry);
  88.119 -            printk("    ctxt_entry[%x] not present\n", devfn);
  88.120 -            continue;
  88.121 +            printk("    l%d == NULL\n", level);
  88.122 +            break;
  88.123          }
  88.124 +        l_index = get_level_index(gmfn, level);
  88.125 +        printk("    l%d_index = %x\n", level, l_index);
  88.126 +        printk("    l%d[%x] = %"PRIx64"\n", level, l_index, l[l_index]);
  88.127  
  88.128 -        if ( level != VTD_PAGE_TABLE_LEVEL_3 &&
  88.129 -             level != VTD_PAGE_TABLE_LEVEL_4)
  88.130 +        pte.val = l[l_index];
  88.131 +        if ( !dma_pte_present(pte) )
  88.132          {
  88.133              unmap_vtd_domain_page(ctxt_entry);
  88.134              unmap_vtd_domain_page(root_entry);
  88.135 -            printk("Unsupported VTD page table level (%d)!\n", level);
  88.136 -            continue;
  88.137 +            printk("    l%d[%x] not present\n", level, l_index);
  88.138 +            break;
  88.139          }
  88.140  
  88.141 -        l = maddr_to_virt(ctxt_entry[devfn].lo);
  88.142 -        do
  88.143 -        {
  88.144 -            l = (u64*)(((unsigned long)l >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K);
  88.145 -            printk("    l%d = %p\n", level, l);
  88.146 -            if ( l == NULL )
  88.147 -            {
  88.148 -                unmap_vtd_domain_page(ctxt_entry);
  88.149 -                unmap_vtd_domain_page(root_entry);
  88.150 -                printk("    l%d == NULL\n", level);
  88.151 -                break;
  88.152 -            }
  88.153 -            l_index = get_level_index(gmfn, level);
  88.154 -            printk("    l%d_index = %x\n", level, l_index);
  88.155 -            printk("    l%d[%x] = %"PRIx64"\n", level, l_index, l[l_index]);
  88.156 +        l = maddr_to_virt(l[l_index]);
  88.157 +    } while ( --level );
  88.158 +}
  88.159  
  88.160 -            pte.val = l[l_index];
  88.161 -            if ( !dma_pte_present(pte) )
  88.162 -            {
  88.163 -                unmap_vtd_domain_page(ctxt_entry);
  88.164 -                unmap_vtd_domain_page(root_entry);
  88.165 -                printk("    l%d[%x] not present\n", level, l_index);
  88.166 -                break;
  88.167 -            }
  88.168 -
  88.169 -            l = maddr_to_virt(l[l_index]);
  88.170 -        } while ( --level );
  88.171 -    }
  88.172 -}
  88.173 +/*
  88.174 + * Local variables:
  88.175 + * mode: C
  88.176 + * c-set-style: "BSD"
  88.177 + * c-basic-offset: 4
  88.178 + * indent-tabs-mode: nil
  88.179 + * End:
  88.180 + */
    89.1 --- a/xen/drivers/passthrough/vtd/vtd.h	Mon Jun 02 11:35:02 2008 +0900
    89.2 +++ b/xen/drivers/passthrough/vtd/vtd.h	Mon Jun 02 11:35:39 2008 +0900
    89.3 @@ -42,13 +42,31 @@ struct IO_APIC_route_remap_entry {
    89.4      };
    89.5  };
    89.6  
    89.7 +struct msi_msg_remap_entry {
    89.8 +    union {
    89.9 +        u32 val;
   89.10 +        struct {
   89.11 +            u32 dontcare:2,
   89.12 +                index_15:1,
   89.13 +                SHV:1,
   89.14 +                format:1,
   89.15 +                index_0_14:15,
   89.16 +                addr_id_val:12; /* Interrupt address identifier value,
   89.17 +                                   must be 0FEEh */
   89.18 +        };
   89.19 +    } address_lo;   /* low 32 bits of msi message address */
   89.20 +
   89.21 +    u32	address_hi;	/* high 32 bits of msi message address */
   89.22 +    u32	data;		/* msi message data */
   89.23 +};
   89.24 +
   89.25  unsigned int get_clflush_size(void);
   89.26  u64 alloc_pgtable_maddr(void);
   89.27  void free_pgtable_maddr(u64 maddr);
   89.28  void *map_vtd_domain_page(u64 maddr);
   89.29  void unmap_vtd_domain_page(void *va);
   89.30  
   89.31 -void iommu_flush_cache_entry(struct iommu *iommu, void *addr);
   89.32 -void iommu_flush_cache_page(struct iommu *iommu, void *addr);
   89.33 +void iommu_flush_cache_entry(void *addr);
   89.34 +void iommu_flush_cache_page(void *addr);
   89.35  
   89.36  #endif // _VTD_H_
    90.1 --- a/xen/drivers/passthrough/vtd/x86/vtd.c	Mon Jun 02 11:35:02 2008 +0900
    90.2 +++ b/xen/drivers/passthrough/vtd/x86/vtd.c	Mon Jun 02 11:35:39 2008 +0900
    90.3 @@ -41,8 +41,6 @@ u64 alloc_pgtable_maddr(void)
    90.4  {
    90.5      struct page_info *pg;
    90.6      u64 *vaddr;
    90.7 -    struct acpi_drhd_unit *drhd;
    90.8 -    struct iommu *iommu;
    90.9  
   90.10      pg = alloc_domheap_page(NULL, 0);
   90.11      vaddr = map_domain_page(page_to_mfn(pg));
   90.12 @@ -50,9 +48,7 @@ u64 alloc_pgtable_maddr(void)
   90.13          return 0;
   90.14      memset(vaddr, 0, PAGE_SIZE);
   90.15  
   90.16 -    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   90.17 -    iommu = drhd->iommu;
   90.18 -    iommu_flush_cache_page(iommu, vaddr);
   90.19 +    iommu_flush_cache_page(vaddr);
   90.20      unmap_domain_page(vaddr);
   90.21  
   90.22      return page_to_maddr(pg);
   90.23 @@ -123,181 +119,3 @@ void hvm_dpci_isairq_eoi(struct domain *
   90.24          }
   90.25      }
   90.26  }
   90.27 -
   90.28 -void iommu_set_pgd(struct domain *d)
   90.29 -{
   90.30 -    struct hvm_iommu *hd  = domain_hvm_iommu(d);
   90.31 -    unsigned long p2m_table;
   90.32 -
   90.33 -    p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
   90.34 -
   90.35 -    if ( paging_mode_hap(d) )
   90.36 -    {
   90.37 -        int level = agaw_to_level(hd->agaw);
   90.38 -        struct dma_pte *dpte = NULL;
   90.39 -        mfn_t pgd_mfn;
   90.40 -
   90.41 -        switch ( level )
   90.42 -        {
   90.43 -        case VTD_PAGE_TABLE_LEVEL_3:
   90.44 -            dpte = map_domain_page(p2m_table);
   90.45 -            if ( !dma_pte_present(*dpte) )
   90.46 -            {
   90.47 -                gdprintk(XENLOG_ERR VTDPREFIX,
   90.48 -                         "iommu_set_pgd: second level wasn't there\n");
   90.49 -                unmap_domain_page(dpte);
   90.50 -                return;
   90.51 -            }
   90.52 -            pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
   90.53 -            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
   90.54 -            unmap_domain_page(dpte);
   90.55 -            break;
   90.56 -        case VTD_PAGE_TABLE_LEVEL_4:
   90.57 -            pgd_mfn = _mfn(p2m_table);
   90.58 -            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
   90.59 -            break;
   90.60 -        default:
   90.61 -            gdprintk(XENLOG_ERR VTDPREFIX,
   90.62 -                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
   90.63 -            break;
   90.64 -        }
   90.65 -    }
   90.66 -    else
   90.67 -    {
   90.68 -#if CONFIG_PAGING_LEVELS == 3
   90.69 -        struct dma_pte *pte = NULL, *pgd_vaddr = NULL, *pmd_vaddr = NULL;
   90.70 -        int i;
   90.71 -        u64 pmd_maddr;
   90.72 -        unsigned long flags;
   90.73 -        l3_pgentry_t *l3e;
   90.74 -        int level = agaw_to_level(hd->agaw);
   90.75 -
   90.76 -        spin_lock_irqsave(&hd->mapping_lock, flags);
   90.77 -        hd->pgd_maddr = alloc_pgtable_maddr();
   90.78 -        if ( hd->pgd_maddr == 0 )
   90.79 -        {
   90.80 -            spin_unlock_irqrestore(&hd->mapping_lock, flags);
   90.81 -            gdprintk(XENLOG_ERR VTDPREFIX,
   90.82 -                     "Allocate pgd memory failed!\n");
   90.83 -            return;
   90.84 -        }
   90.85 -
   90.86 -        pgd_vaddr = map_vtd_domain_page(hd->pgd_maddr);
   90.87 -        l3e = map_domain_page(p2m_table);
   90.88 -        switch ( level )
   90.89 -        {
   90.90 -        case VTD_PAGE_TABLE_LEVEL_3:        /* Weybridge */
   90.91 -            /* We only support 8 entries for the PAE L3 p2m table */
   90.92 -            for ( i = 0; i < 8 ; i++ )
   90.93 -            {
   90.94 -                /* Don't create new L2 entry, use ones from p2m table */
   90.95 -                pgd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
   90.96 -            }
   90.97 -            break;
   90.98 -
   90.99 -        case VTD_PAGE_TABLE_LEVEL_4:        /* Stoakley */
  90.100 -            /* We allocate one more page for the top vtd page table. */
  90.101 -            pmd_maddr = alloc_pgtable_maddr();
  90.102 -            if ( pmd_maddr == 0 )
  90.103 -            {
  90.104 -                unmap_vtd_domain_page(pgd_vaddr);
  90.105 -                unmap_domain_page(l3e);
  90.106 -                spin_unlock_irqrestore(&hd->mapping_lock, flags);
  90.107 -                gdprintk(XENLOG_ERR VTDPREFIX,
  90.108 -                         "Allocate pmd memory failed!\n");
  90.109 -                return;
  90.110 -            }
  90.111 -
  90.112 -            pte = &pgd_vaddr[0];
  90.113 -            dma_set_pte_addr(*pte, pmd_maddr);
  90.114 -            dma_set_pte_readable(*pte);
  90.115 -            dma_set_pte_writable(*pte);
  90.116 -
  90.117 -            pmd_vaddr = map_vtd_domain_page(pmd_maddr);
  90.118 -            for ( i = 0; i < 8; i++ )
  90.119 -            {
  90.120 -                /* Don't create new L2 entry, use ones from p2m table */
  90.121 -                pmd_vaddr[i].val = l3e[i].l3 | _PAGE_PRESENT | _PAGE_RW;
  90.122 -            }
  90.123 -
  90.124 -            unmap_vtd_domain_page(pmd_vaddr);
  90.125 -            break;
  90.126 -        default:
  90.127 -            gdprintk(XENLOG_ERR VTDPREFIX,
  90.128 -                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
  90.129 -            break;
  90.130 -        }
  90.131 -
  90.132 -        unmap_vtd_domain_page(pgd_vaddr);
  90.133 -        unmap_domain_page(l3e);
  90.134 -        spin_unlock_irqrestore(&hd->mapping_lock, flags);
  90.135 -
  90.136 -#elif CONFIG_PAGING_LEVELS == 4
  90.137 -        mfn_t pgd_mfn;
  90.138 -        l3_pgentry_t *l3e;
  90.139 -        int level = agaw_to_level(hd->agaw);
  90.140 -
  90.141 -        switch ( level )
  90.142 -        {
  90.143 -        case VTD_PAGE_TABLE_LEVEL_3:
  90.144 -            l3e = map_domain_page(p2m_table);
  90.145 -            if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 )
  90.146 -            {
  90.147 -                gdprintk(XENLOG_ERR VTDPREFIX,
  90.148 -                         "iommu_set_pgd: second level wasn't there\n");
  90.149 -                unmap_domain_page(l3e);
  90.150 -                return;
  90.151 -            }
  90.152 -
  90.153 -            pgd_mfn = _mfn(l3e_get_pfn(*l3e));
  90.154 -            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
  90.155 -            unmap_domain_page(l3e);
  90.156 -            break;
  90.157 -        case VTD_PAGE_TABLE_LEVEL_4:
  90.158 -            pgd_mfn = _mfn(p2m_table);
  90.159 -            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
  90.160 -            break;
  90.161 -        default:
  90.162 -            gdprintk(XENLOG_ERR VTDPREFIX,
  90.163 -                     "iommu_set_pgd:Unsupported p2m table sharing level!\n");
  90.164 -            break;
  90.165 -        }
  90.166 -#endif
  90.167 -    }
  90.168 -}
  90.169 -
  90.170 -void iommu_free_pgd(struct domain *d)
  90.171 -{
  90.172 -#if CONFIG_PAGING_LEVELS == 3
  90.173 -    struct hvm_iommu *hd  = domain_hvm_iommu(d);
  90.174 -    int level = agaw_to_level(hd->agaw);
  90.175 -    struct dma_pte *pgd_vaddr = NULL;
  90.176 -
  90.177 -    switch ( level )
  90.178 -    {
  90.179 -    case VTD_PAGE_TABLE_LEVEL_3:
  90.180 -        if ( hd->pgd_maddr != 0 )
  90.181 -        {
  90.182 -            free_pgtable_maddr(hd->pgd_maddr);
  90.183 -            hd->pgd_maddr = 0;
  90.184 -        }
  90.185 -        break;
  90.186 -    case VTD_PAGE_TABLE_LEVEL_4:
  90.187 -        if ( hd->pgd_maddr != 0 )
  90.188 -        {
  90.189 -            pgd_vaddr = (struct dma_pte*)map_vtd_domain_page(hd->pgd_maddr);
  90.190 -            if ( pgd_vaddr[0].val != 0 )
  90.191 -                free_pgtable_maddr(pgd_vaddr[0].val);
  90.192 -            unmap_vtd_domain_page(pgd_vaddr);
  90.193 -            free_pgtable_maddr(hd->pgd_maddr);
  90.194 -            hd->pgd_maddr = 0;
  90.195 -        }
  90.196 -        break;
  90.197 -    default:
  90.198 -        gdprintk(XENLOG_ERR VTDPREFIX,
  90.199 -                 "Unsupported p2m table sharing level!\n");
  90.200 -        break;
  90.201 -    }
  90.202 -#endif
  90.203 -}
  90.204 -
    91.1 --- a/xen/include/asm-x86/hvm/hvm.h	Mon Jun 02 11:35:02 2008 +0900
    91.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Mon Jun 02 11:35:39 2008 +0900
    91.3 @@ -147,8 +147,10 @@ void hvm_send_assist_req(struct vcpu *v)
    91.4  
    91.5  void hvm_set_guest_tsc(struct vcpu *v, u64 guest_tsc);
    91.6  u64 hvm_get_guest_tsc(struct vcpu *v);
    91.7 -#define hvm_set_guest_time(vcpu, gtime) hvm_set_guest_tsc(vcpu, gtime)
    91.8 -#define hvm_get_guest_time(vcpu)        hvm_get_guest_tsc(vcpu)
    91.9 +
   91.10 +void hvm_init_guest_time(struct domain *d);
   91.11 +void hvm_set_guest_time(struct vcpu *v, u64 guest_time);
   91.12 +u64 hvm_get_guest_time(struct vcpu *v);
   91.13  
   91.14  #define hvm_paging_enabled(v) \
   91.15      (!!((v)->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG))
    92.1 --- a/xen/include/asm-x86/hvm/vcpu.h	Mon Jun 02 11:35:02 2008 +0900
    92.2 +++ b/xen/include/asm-x86/hvm/vcpu.h	Mon Jun 02 11:35:39 2008 +0900
    92.3 @@ -68,6 +68,9 @@ struct hvm_vcpu {
    92.4      struct mtrr_state   mtrr;
    92.5      u64                 pat_cr;
    92.6  
    92.7 +    /* In mode delay_for_missed_ticks, VCPUs have differing guest times. */
    92.8 +    int64_t             stime_offset;
    92.9 +
   92.10      /* Which cache mode is this VCPU in (CR0:CD/NW)? */
   92.11      u8                  cache_mode;
   92.12  
    93.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h	Mon Jun 02 11:35:02 2008 +0900
    93.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h	Mon Jun 02 11:35:39 2008 +0900
    93.3 @@ -49,7 +49,6 @@ void vmx_asm_vmexit_handler(struct cpu_u
    93.4  void vmx_asm_do_vmentry(void);
    93.5  void vmx_intr_assist(void);
    93.6  void vmx_do_resume(struct vcpu *);
    93.7 -void set_guest_time(struct vcpu *v, u64 gtime);
    93.8  void vmx_vlapic_msr_changed(struct vcpu *v);
    93.9  void vmx_realmode(struct cpu_user_regs *regs);
   93.10  
    94.1 --- a/xen/include/asm-x86/hvm/vpt.h	Mon Jun 02 11:35:02 2008 +0900
    94.2 +++ b/xen/include/asm-x86/hvm/vpt.h	Mon Jun 02 11:35:39 2008 +0900
    94.3 @@ -57,7 +57,7 @@ struct hpet_registers {
    94.4  typedef struct HPETState {
    94.5      struct hpet_registers hpet;
    94.6      struct vcpu *vcpu;
    94.7 -    uint64_t tsc_freq;
    94.8 +    uint64_t stime_freq;
    94.9      uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */
   94.10      uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns      */
   94.11      uint64_t mc_offset;
   94.12 @@ -137,6 +137,11 @@ struct pl_time {    /* platform time */
   94.13      struct RTCState  vrtc;
   94.14      struct HPETState vhpet;
   94.15      struct PMTState  vpmt;
   94.16 +    /* guest_time = Xen sys time + stime_offset */
   94.17 +    int64_t stime_offset;
   94.18 +    /* Ensures monotonicity in appropriate timer modes. */
   94.19 +    uint64_t last_guest_time;
   94.20 +    spinlock_t pl_time_lock;
   94.21  };
   94.22  
   94.23  #define ticks_per_sec(v) (v->domain->arch.hvm_domain.tsc_frequency)
    95.1 --- a/xen/include/asm-x86/tboot.h	Mon Jun 02 11:35:02 2008 +0900
    95.2 +++ b/xen/include/asm-x86/tboot.h	Mon Jun 02 11:35:39 2008 +0900
    95.3 @@ -46,7 +46,15 @@ typedef struct __attribute__ ((__packed_
    95.4  } uuid_t;
    95.5  
    95.6  /* used to communicate between tboot and the launched kernel (i.e. Xen) */
    95.7 -#define MAX_TB_ACPI_SINFO_SIZE   64
    95.8 +
    95.9 +typedef struct __attribute__ ((__packed__)) {
   95.10 +    uint16_t pm1a_cnt;
   95.11 +    uint16_t pm1b_cnt;
   95.12 +    uint16_t pm1a_evt;
   95.13 +    uint16_t pm1b_evt;
   95.14 +    uint16_t pm1a_cnt_val;
   95.15 +    uint16_t pm1b_cnt_val;
   95.16 +} tboot_acpi_sleep_info;
   95.17  
   95.18  typedef struct __attribute__ ((__packed__)) {
   95.19      /* version 0x01+ fields: */
   95.20 @@ -58,8 +66,9 @@ typedef struct __attribute__ ((__packed_
   95.21      uint32_t  shutdown_type;     /* type of shutdown (TB_SHUTDOWN_*) */
   95.22      uint32_t  s3_tb_wakeup_entry;/* entry point for tboot s3 wake up */
   95.23      uint32_t  s3_k_wakeup_entry; /* entry point for xen s3 wake up */
   95.24 -    uint8_t   acpi_sinfo[MAX_TB_ACPI_SINFO_SIZE];
   95.25 -                                 /* where kernel put acpi sleep info in Sx */
   95.26 +    tboot_acpi_sleep_info
   95.27 +              acpi_sinfo;        /* where kernel put acpi sleep info in Sx */
   95.28 +    uint8_t   reserved[52];      /* this pad is for compat with old field */
   95.29      /* version 0x02+ fields: */
   95.30      uint32_t  tboot_base;        /* starting addr for tboot */
   95.31      uint32_t  tboot_size;        /* size of tboot */
    96.1 --- a/xen/include/public/domctl.h	Mon Jun 02 11:35:02 2008 +0900
    96.2 +++ b/xen/include/public/domctl.h	Mon Jun 02 11:35:39 2008 +0900
    96.3 @@ -448,6 +448,16 @@ struct xen_domctl_assign_device {
    96.4  typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
    96.5  DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);
    96.6  
    96.7 +/* Retrieve sibling devices infomation of machine_bdf */
    96.8 +#define XEN_DOMCTL_get_device_group 50
    96.9 +struct xen_domctl_get_device_group {
   96.10 +    uint32_t  machine_bdf;      /* IN */
   96.11 +    uint32_t  max_sdevs;        /* IN */
   96.12 +    uint32_t  num_sdevs;        /* OUT */
   96.13 +    XEN_GUEST_HANDLE_64(uint32)  sdev_array;   /* OUT */
   96.14 +};
   96.15 +typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t;
   96.16 +DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);
   96.17  
   96.18  /* Pass-through interrupts: bind real irq -> hvm devfn. */
   96.19  #define XEN_DOMCTL_bind_pt_irq       38
   96.20 @@ -619,6 +629,7 @@ struct xen_domctl {
   96.21          struct xen_domctl_hvmcontext        hvmcontext;
   96.22          struct xen_domctl_address_size      address_size;
   96.23          struct xen_domctl_sendtrigger       sendtrigger;
   96.24 +        struct xen_domctl_get_device_group  get_device_group;
   96.25          struct xen_domctl_assign_device     assign_device;
   96.26          struct xen_domctl_bind_pt_irq       bind_pt_irq;
   96.27          struct xen_domctl_memory_mapping    memory_mapping;
    97.1 --- a/xen/include/xen/elfcore.h	Mon Jun 02 11:35:02 2008 +0900
    97.2 +++ b/xen/include/xen/elfcore.h	Mon Jun 02 11:35:39 2008 +0900
    97.3 @@ -66,6 +66,7 @@ typedef struct {
    97.4      unsigned long xen_compile_time;
    97.5      unsigned long tainted;
    97.6  #if defined(__i386__) || defined(__x86_64__)
    97.7 +    unsigned long xen_phys_start;
    97.8      unsigned long dom0_pfn_to_mfn_frame_list_list;
    97.9  #endif
   97.10  #if defined(__ia64__)
    98.1 --- a/xen/include/xen/hvm/iommu.h	Mon Jun 02 11:35:02 2008 +0900
    98.2 +++ b/xen/include/xen/hvm/iommu.h	Mon Jun 02 11:35:39 2008 +0900
    98.3 @@ -43,6 +43,7 @@ struct hvm_iommu {
    98.4      int agaw;     /* adjusted guest address width, 0 is level 2 30-bit */
    98.5      struct list_head g2m_ioport_list;  /* guest to machine ioport mapping */
    98.6      domid_t iommu_domid;           /* domain id stored in iommu */
    98.7 +    u64 iommu_bitmap;              /* bitmap of iommu(s) that the domain uses */
    98.8  
    98.9      /* amd iommu support */
   98.10      int domain_id;
   98.11 @@ -54,4 +55,7 @@ struct hvm_iommu {
   98.12      struct iommu_ops *platform_ops;
   98.13  };
   98.14  
   98.15 +#define has_iommu_pdevs(domain) \
   98.16 +    (!list_empty(&(domain->arch.hvm_domain.hvm_iommu.pdev_list)))
   98.17 +
   98.18  #endif /* __ASM_X86_HVM_IOMMU_H__ */
    99.1 --- a/xen/include/xen/iommu.h	Mon Jun 02 11:35:02 2008 +0900
    99.2 +++ b/xen/include/xen/iommu.h	Mon Jun 02 11:35:39 2008 +0900
    99.3 @@ -29,6 +29,7 @@
    99.4  
    99.5  extern int vtd_enabled;
    99.6  extern int iommu_enabled;
    99.7 +extern int iommu_pv_enabled;
    99.8  
    99.9  #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
   99.10  #define domain_vmx_iommu(d)     (&d->arch.hvm_domain.hvm_iommu.vmx_iommu)
   99.11 @@ -43,7 +44,9 @@ extern int iommu_enabled;
   99.12  struct iommu {
   99.13      struct list_head list;
   99.14      void __iomem *reg; /* Pointer to hardware regs, virtual addr */
   99.15 +    u32	index;         /* Sequence number of iommu */
   99.16      u32	gcmd;          /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
   99.17 +    u32 nr_pt_levels;
   99.18      u64	cap;
   99.19      u64	ecap;
   99.20      spinlock_t lock; /* protect context, domain ids */
   99.21 @@ -58,14 +61,13 @@ void iommu_domain_destroy(struct domain 
   99.22  int device_assigned(u8 bus, u8 devfn);
   99.23  int assign_device(struct domain *d, u8 bus, u8 devfn);
   99.24  void deassign_device(struct domain *d, u8 bus, u8 devfn);
   99.25 +int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 
   99.26 +    XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
   99.27  void reassign_device_ownership(struct domain *source,
   99.28                                 struct domain *target,
   99.29                                 u8 bus, u8 devfn);
   99.30  int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
   99.31  int iommu_unmap_page(struct domain *d, unsigned long gfn);
   99.32 -void iommu_flush(struct domain *d, unsigned long gfn, u64 *p2m_entry);
   99.33 -void iommu_set_pgd(struct domain *d);
   99.34 -void iommu_free_pgd(struct domain *d);
   99.35  void iommu_domain_teardown(struct domain *d);
   99.36  int hvm_do_IRQ_dpci(struct domain *d, unsigned int irq);
   99.37  int dpci_ioport_intercept(ioreq_t *p);
   99.38 @@ -76,6 +78,11 @@ int pt_irq_destroy_bind_vtd(struct domai
   99.39  unsigned int io_apic_read_remap_rte(unsigned int apic, unsigned int reg);
   99.40  void io_apic_write_remap_rte(unsigned int apic,
   99.41                               unsigned int reg, unsigned int value);
   99.42 +
   99.43 +struct msi_desc;
   99.44 +struct msi_msg;
   99.45 +void msi_msg_read_remap_rte(struct msi_desc *msi_desc, struct msi_msg *msg);
   99.46 +void msi_msg_write_remap_rte(struct msi_desc *msi_desc, struct msi_msg *msg);
   99.47  struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu);
   99.48  struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu);
   99.49  struct iommu_flush *iommu_get_flush(struct iommu *iommu);
   99.50 @@ -94,6 +101,7 @@ struct iommu_ops {
   99.51      int (*unmap_page)(struct domain *d, unsigned long gfn);
   99.52      void (*reassign_device)(struct domain *s, struct domain *t,
   99.53                              u8 bus, u8 devfn);
   99.54 +    int (*get_device_group_id)(u8 bus, u8 devfn);
   99.55  };
   99.56  
   99.57  #endif /* _IOMMU_H_ */
   100.1 --- a/xen/include/xen/sched.h	Mon Jun 02 11:35:02 2008 +0900
   100.2 +++ b/xen/include/xen/sched.h	Mon Jun 02 11:35:39 2008 +0900
   100.3 @@ -186,6 +186,8 @@ struct domain
   100.4  
   100.5      /* Is this an HVM guest? */
   100.6      bool_t           is_hvm;
   100.7 +    /* Does this guest need iommu mappings? */
   100.8 +    bool_t           need_iommu;
   100.9      /* Is this guest fully privileged (aka dom0)? */
  100.10      bool_t           is_privileged;
  100.11      /* Which guest this guest has privileges on */
  100.12 @@ -515,6 +517,7 @@ static inline void vcpu_unblock(struct v
  100.13  
  100.14  #define is_hvm_domain(d) ((d)->is_hvm)
  100.15  #define is_hvm_vcpu(v)   (is_hvm_domain(v->domain))
  100.16 +#define need_iommu(d)    ((d)->need_iommu && !(d)->is_hvm)
  100.17  
  100.18  extern enum cpufreq_controller {
  100.19      FREQCTL_none, FREQCTL_dom0_kernel
   101.1 --- a/xen/include/xen/time.h	Mon Jun 02 11:35:02 2008 +0900
   101.2 +++ b/xen/include/xen/time.h	Mon Jun 02 11:35:39 2008 +0900
   101.3 @@ -47,6 +47,7 @@ struct tm {
   101.4  };
   101.5  struct tm gmtime(unsigned long t);
   101.6  
   101.7 +#define SYSTEM_TIME_HZ  1000000000ULL
   101.8  #define NOW()           ((s_time_t)get_s_time())
   101.9  #define SECONDS(_s)     ((s_time_t)((_s)  * 1000000000ULL))
  101.10  #define MILLISECS(_ms)  ((s_time_t)((_ms) * 1000000ULL))