ia64/xen-unstable

changeset 17525:dc510776dd59

merge with xen-unstable.hg
author Alex Williamson <alex.williamson@hp.com>
date Thu Apr 24 14:08:29 2008 -0600 (2008-04-24)
parents 239b44eeb2d6 97da69831384
children f2457c7aff8d
files xen/arch/x86/hvm/svm/x86_32/Makefile xen/arch/x86/hvm/svm/x86_32/exits.S xen/arch/x86/hvm/svm/x86_64/Makefile xen/arch/x86/hvm/svm/x86_64/exits.S xen/arch/x86/hvm/vmx/x86_32/Makefile xen/arch/x86/hvm/vmx/x86_32/exits.S xen/arch/x86/hvm/vmx/x86_64/Makefile xen/arch/x86/hvm/vmx/x86_64/exits.S
line diff
     1.1 --- a/.hgignore	Thu Apr 24 14:02:16 2008 -0600
     1.2 +++ b/.hgignore	Thu Apr 24 14:08:29 2008 -0600
     1.3 @@ -243,6 +243,7 @@
     1.4  ^tools/xm-test/lib/XmTestLib/config.py$
     1.5  ^tools/xm-test/lib/XmTestReport/xmtest.py$
     1.6  ^tools/xm-test/tests/.*\.test$
     1.7 +^xen/\.banner.*$
     1.8  ^xen/BLOG$
     1.9  ^xen/System.map$
    1.10  ^xen/TAGS$
     2.1 --- a/Makefile	Thu Apr 24 14:02:16 2008 -0600
     2.2 +++ b/Makefile	Thu Apr 24 14:08:29 2008 -0600
     2.3 @@ -122,6 +122,13 @@ distclean:
     2.4  .PHONY: mrproper
     2.5  mrproper: distclean
     2.6  
     2.7 +# Prepare for source tarball
     2.8 +.PHONY: src-tarball
     2.9 +src-tarball: distclean
    2.10 +	$(MAKE) -C xen .banner
    2.11 +	rm -rf xen/tools/figlet .[a-z]*
    2.12 +	$(MAKE) -C xen distclean
    2.13 +
    2.14  .PHONY: help
    2.15  help:
    2.16  	@echo 'Installation targets:'
     3.1 --- a/docs/misc/vtd.txt	Thu Apr 24 14:02:16 2008 -0600
     3.2 +++ b/docs/misc/vtd.txt	Thu Apr 24 14:08:29 2008 -0600
     3.3 @@ -21,7 +21,7 @@ 11) "hide" pci device from dom0 as follo
     3.4  
     3.5  title Xen-Fedora Core (2.6.18-xen)
     3.6          root (hd0,0)
     3.7 -        kernel /boot/xen.gz com1=115200,8n1 console=com1 vtd=1
     3.8 +        kernel /boot/xen.gz com1=115200,8n1 console=com1
     3.9          module /boot/vmlinuz-2.6.18.8-xen root=LABEL=/ ro console=tty0 console=ttyS0,115200,8n1 pciback.hide=(01:00.0)(03:00.0) pciback.verbose_request=1 apic=debug
    3.10          module /boot/initrd-2.6.18-xen.img
    3.11  
     4.1 --- a/docs/src/user.tex	Thu Apr 24 14:02:16 2008 -0600
     4.2 +++ b/docs/src/user.tex	Thu Apr 24 14:08:29 2008 -0600
     4.3 @@ -4088,6 +4088,8 @@ editing \path{grub.conf}.
     4.4    a list of pages not to be allocated for use because they contain bad
     4.5    bytes. For example, if your memory tester says that byte 0x12345678
     4.6    is bad, you would place `badpage=0x12345' on Xen's command line.
     4.7 +\item [ serial\_tx\_buffer=$<$size$>$ ] Size of serial transmit
     4.8 +  buffers. Default is 16kB.
     4.9  \item [ com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$
    4.10    com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\
    4.11    Xen supports up to two 16550-compatible serial ports.  For example:
    4.12 @@ -4239,10 +4241,11 @@ In addition to the standard Linux kernel
    4.13      \begin{tabular}{l}
    4.14        `xencons=off': disable virtual console \\
    4.15        `xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\
    4.16 -      `xencons=ttyS': attach console to /dev/ttyS0
    4.17 +      `xencons=ttyS': attach console to /dev/ttyS0 \\
    4.18 +      `xencons=xvc': attach console to /dev/xvc0
    4.19      \end{tabular}
    4.20  \end{center}
    4.21 -The default is ttyS for dom0 and tty for all other domains.
    4.22 +The default is ttyS for dom0 and xvc for all other domains.
    4.23  \end{description}
    4.24  
    4.25  
     5.1 --- a/docs/xen-api/revision-history.tex	Thu Apr 24 14:02:16 2008 -0600
     5.2 +++ b/docs/xen-api/revision-history.tex	Thu Apr 24 14:08:29 2008 -0600
     5.3 @@ -37,5 +37,15 @@
     5.4      \end{flushleft}
     5.5     \end{minipage}\\
     5.6    \hline
     5.7 +  1.0.5 & 17th Apr. 08 & S. Berger &
     5.8 +   \begin{minipage}[t]{7cm}
     5.9 +    \begin{flushleft}
    5.10 +     Added undocumented fields and methods for default\_netmask and
    5.11 +     default\_gateway to the Network class. Removed an unimplemented
    5.12 +     method from the XSPolicy class and removed the 'optional' from
    5.13 +     'oldlabel' parameters.
    5.14 +    \end{flushleft}
    5.15 +   \end{minipage}\\
    5.16 +  \hline
    5.17   \end{tabular}
    5.18  \end{center}
     6.1 --- a/docs/xen-api/xenapi-coversheet.tex	Thu Apr 24 14:02:16 2008 -0600
     6.2 +++ b/docs/xen-api/xenapi-coversheet.tex	Thu Apr 24 14:08:29 2008 -0600
     6.3 @@ -22,7 +22,7 @@
     6.4  \newcommand{\releasestatement}{Stable Release}
     6.5  
     6.6  %% Document revision
     6.7 -\newcommand{\revstring}{API Revision 1.0.4}
     6.8 +\newcommand{\revstring}{API Revision 1.0.5}
     6.9  
    6.10  %% Document authors
    6.11  \newcommand{\docauthors}{
     7.1 --- a/docs/xen-api/xenapi-datamodel.tex	Thu Apr 24 14:02:16 2008 -0600
     7.2 +++ b/docs/xen-api/xenapi-datamodel.tex	Thu Apr 24 14:08:29 2008 -0600
     7.3 @@ -4467,7 +4467,7 @@ security_label, string old_label)\end{ve
     7.4  {\bf type} & {\bf name} & {\bf description} \\ \hline
     7.5  {\tt VM ref } & self & reference to the object \\ \hline
     7.6  {\tt string } & security\_label & security label for the VM \\ \hline
     7.7 -{\tt string } & old\_label & Optional label value that the security label \\
     7.8 +{\tt string } & old\_label & Label value that the security label \\
     7.9  & & must currently have for the change to succeed.\\ \hline
    7.10  
    7.11  \end{tabular}
    7.12 @@ -7619,6 +7619,8 @@ Quals & Field & Type & Description \\
    7.13  $\mathit{RW}$ &  {\tt name/description} & string & a notes field containg human-readable description \\
    7.14  $\mathit{RO}_\mathit{run}$ &  {\tt VIFs} & (VIF ref) Set & list of connected vifs \\
    7.15  $\mathit{RO}_\mathit{run}$ &  {\tt PIFs} & (PIF ref) Set & list of connected pifs \\
    7.16 +$\mathit{RW}$ &  {\tt default\_gateway} & string & default gateway \\
    7.17 +$\mathit{RW}$ &  {\tt default\_netmask} & string & default netmask \\
    7.18  $\mathit{RW}$ &  {\tt other\_config} & (string $\rightarrow$ string) Map & additional configuration \\
    7.19  \hline
    7.20  \end{longtable}
    7.21 @@ -7872,6 +7874,138 @@ value of the field
    7.22  \vspace{0.3cm}
    7.23  \vspace{0.3cm}
    7.24  \vspace{0.3cm}
    7.25 +\subsubsection{RPC name:~get\_default\_gateway}
    7.26 +
    7.27 +{\bf Overview:} 
    7.28 +Get the default\_gateway field of the given network.
    7.29 +
    7.30 + \noindent {\bf Signature:} 
    7.31 +\begin{verbatim} string get_default_gateway (session_id s, network ref self)\end{verbatim}
    7.32 +
    7.33 +
    7.34 +\noindent{\bf Arguments:}
    7.35 +
    7.36 + 
    7.37 +\vspace{0.3cm}
    7.38 +\begin{tabular}{|c|c|p{7cm}|}
    7.39 + \hline
    7.40 +{\bf type} & {\bf name} & {\bf description} \\ \hline
    7.41 +{\tt network ref } & self & reference to the object \\ \hline 
    7.42 +
    7.43 +\end{tabular}
    7.44 +
    7.45 +\vspace{0.3cm}
    7.46 +
    7.47 + \noindent {\bf Return Type:} 
    7.48 +{\tt 
    7.49 +string
    7.50 +}
    7.51 +
    7.52 +
    7.53 +value of the field
    7.54 +\vspace{0.3cm}
    7.55 +\vspace{0.3cm}
    7.56 +\vspace{0.3cm}
    7.57 +\subsubsection{RPC name:~set\_default\_gateway}
    7.58 +
    7.59 +{\bf Overview:} 
    7.60 +Set the default\_gateway field of the given network.
    7.61 +
    7.62 + \noindent {\bf Signature:} 
    7.63 +\begin{verbatim} void set_default_gateway (session_id s, network ref self, string value)\end{verbatim}
    7.64 +
    7.65 +
    7.66 +\noindent{\bf Arguments:}
    7.67 +
    7.68 + 
    7.69 +\vspace{0.3cm}
    7.70 +\begin{tabular}{|c|c|p{7cm}|}
    7.71 + \hline
    7.72 +{\bf type} & {\bf name} & {\bf description} \\ \hline
    7.73 +{\tt network ref } & self & reference to the object \\ \hline 
    7.74 +
    7.75 +{\tt string } & value & New value to set \\ \hline 
    7.76 +
    7.77 +\end{tabular}
    7.78 +
    7.79 +\vspace{0.3cm}
    7.80 +
    7.81 + \noindent {\bf Return Type:} 
    7.82 +{\tt 
    7.83 +void
    7.84 +}
    7.85 +
    7.86 +
    7.87 +
    7.88 +\vspace{0.3cm}
    7.89 +\vspace{0.3cm}
    7.90 +\vspace{0.3cm}
    7.91 +\subsubsection{RPC name:~get\_default\_netmask}
    7.92 +
    7.93 +{\bf Overview:} 
    7.94 +Get the default\_netmask field of the given network.
    7.95 +
    7.96 + \noindent {\bf Signature:} 
    7.97 +\begin{verbatim} string get_default_netmask (session_id s, network ref self)\end{verbatim}
    7.98 +
    7.99 +
   7.100 +\noindent{\bf Arguments:}
   7.101 +
   7.102 + 
   7.103 +\vspace{0.3cm}
   7.104 +\begin{tabular}{|c|c|p{7cm}|}
   7.105 + \hline
   7.106 +{\bf type} & {\bf name} & {\bf description} \\ \hline
   7.107 +{\tt network ref } & self & reference to the object \\ \hline 
   7.108 +
   7.109 +\end{tabular}
   7.110 +
   7.111 +\vspace{0.3cm}
   7.112 +
   7.113 + \noindent {\bf Return Type:} 
   7.114 +{\tt 
   7.115 +string
   7.116 +}
   7.117 +
   7.118 +
   7.119 +value of the field
   7.120 +\vspace{0.3cm}
   7.121 +\vspace{0.3cm}
   7.122 +\vspace{0.3cm}
   7.123 +\subsubsection{RPC name:~set\_default\_netmask}
   7.124 +
   7.125 +{\bf Overview:} 
   7.126 +Set the default\_netmask field of the given network.
   7.127 +
   7.128 + \noindent {\bf Signature:} 
   7.129 +\begin{verbatim} void set_default_netmask (session_id s, network ref self, string value)\end{verbatim}
   7.130 +
   7.131 +
   7.132 +\noindent{\bf Arguments:}
   7.133 +
   7.134 + 
   7.135 +\vspace{0.3cm}
   7.136 +\begin{tabular}{|c|c|p{7cm}|}
   7.137 + \hline
   7.138 +{\bf type} & {\bf name} & {\bf description} \\ \hline
   7.139 +{\tt network ref } & self & reference to the object \\ \hline 
   7.140 +
   7.141 +{\tt string } & value & New value to set \\ \hline 
   7.142 +
   7.143 +\end{tabular}
   7.144 +
   7.145 +\vspace{0.3cm}
   7.146 +
   7.147 + \noindent {\bf Return Type:} 
   7.148 +{\tt 
   7.149 +void
   7.150 +}
   7.151 +
   7.152 +
   7.153 +
   7.154 +\vspace{0.3cm}
   7.155 +\vspace{0.3cm}
   7.156 +\vspace{0.3cm}
   7.157  \subsubsection{RPC name:~get\_other\_config}
   7.158  
   7.159  {\bf Overview:} 
   7.160 @@ -8999,7 +9133,7 @@ security_label, string old_label)\end{ve
   7.161  {\tt VIF ref } & self & reference to the object \\ \hline
   7.162  
   7.163  {\tt string } & security\_label & New value of the security label \\ \hline
   7.164 -{\tt string } & old\_label & Optional label value that the security label \\
   7.165 +{\tt string } & old\_label & Label value that the security label \\
   7.166  & & must currently have for the change to succeed.\\ \hline
   7.167  \end{tabular}
   7.168  
   7.169 @@ -11504,7 +11638,7 @@ security_label, string old_label)\end{ve
   7.170  {\tt VDI ref } & self & reference to the object \\ \hline
   7.171  
   7.172  {\tt string } & security\_label & New value of the security label \\ \hline
   7.173 -{\tt string } & old\_label & Optional label value that the security label \\
   7.174 +{\tt string } & old\_label & Label value that the security label \\
   7.175  & & must currently have for the change to succeed.\\ \hline
   7.176  \end{tabular}
   7.177  
   7.178 @@ -14898,46 +15032,6 @@ The label of the given resource.
   7.179  \vspace{0.3cm}
   7.180  \vspace{0.3cm}
   7.181  \vspace{0.3cm}
   7.182 -\subsubsection{RPC name:~activate\_xspolicy}
   7.183 -
   7.184 -{\bf Overview:}
   7.185 -Load the referenced policy into the hypervisor.
   7.186 -
   7.187 - \noindent {\bf Signature:}
   7.188 -\begin{verbatim} xs_instantiationflags activate_xspolicy (session_id s, xs_ref xspolicy,
   7.189 -xs_instantiationflags flags)\end{verbatim}
   7.190 -
   7.191 -
   7.192 -\noindent{\bf Arguments:}
   7.193 -
   7.194 -
   7.195 -\vspace{0.3cm}
   7.196 -\begin{tabular}{|c|c|p{7cm}|}
   7.197 - \hline
   7.198 -{\bf type} & {\bf name} & {\bf description} \\ \hline
   7.199 -{\tt xs ref } & self & reference to the object \\ \hline
   7.200 -{\tt xs\_instantiationflags } & flags & flags to activate on a policy; flags
   7.201 -  can only be set \\ \hline
   7.202 -
   7.203 -\end{tabular}
   7.204 -
   7.205 -\vspace{0.3cm}
   7.206 -
   7.207 -
   7.208 - \noindent {\bf Return Type:}
   7.209 -{\tt
   7.210 -xs\_instantiationflags
   7.211 -}
   7.212 -
   7.213 -
   7.214 -Currently active instantiation flags.
   7.215 -\vspace{0.3cm}
   7.216 -
   7.217 -\noindent{\bf Possible Error Codes:} {\tt SECURITY\_ERROR}
   7.218 -
   7.219 -\vspace{0.3cm}
   7.220 -\vspace{0.3cm}
   7.221 -\vspace{0.3cm}
   7.222  \subsubsection{RPC name:~can\_run}
   7.223  
   7.224  {\bf Overview:}
     8.1 --- a/extras/mini-os/Makefile	Thu Apr 24 14:02:16 2008 -0600
     8.2 +++ b/extras/mini-os/Makefile	Thu Apr 24 14:08:29 2008 -0600
     8.3 @@ -19,6 +19,7 @@ include minios.mk
     8.4  
     8.5  # Define some default flags for linking.
     8.6  LDLIBS := 
     8.7 +APP_LDLIBS := 
     8.8  LDARCHLIB := -L$(TARGET_ARCH_DIR) -l$(ARCH_LIB_NAME)
     8.9  LDFLAGS_FINAL := -T $(TARGET_ARCH_DIR)/minios-$(XEN_TARGET_ARCH).lds
    8.10  
    8.11 @@ -33,6 +34,7 @@ TARGET := mini-os
    8.12  SUBDIRS := lib xenbus console
    8.13  
    8.14  # The common mini-os objects to build.
    8.15 +APP_OBJS :=
    8.16  OBJS := $(patsubst %.c,%.o,$(wildcard *.c))
    8.17  OBJS += $(patsubst %.c,%.o,$(wildcard lib/*.c))
    8.18  OBJS += $(patsubst %.c,%.o,$(wildcard xenbus/*.c))
    8.19 @@ -75,28 +77,28 @@ OBJS := $(filter-out lwip%.o $(LWO), $(O
    8.20  
    8.21  ifeq ($(caml),y)
    8.22  CAMLLIB = $(shell ocamlc -where)
    8.23 -OBJS += $(CAMLDIR)/caml.o
    8.24 -OBJS += $(CAMLLIB)/libasmrun.a
    8.25 +APP_OBJS += main-caml.o
    8.26 +APP_OBJS += $(CAMLDIR)/caml.o
    8.27 +APP_OBJS += $(CAMLLIB)/libasmrun.a
    8.28  CFLAGS += -I$(CAMLLIB)
    8.29 -LDLIBS += -lm
    8.30 -else
    8.31 +APP_LDLIBS += -lm
    8.32 +endif
    8.33  OBJS := $(filter-out main-caml.o, $(OBJS))
    8.34 -endif
    8.35  
    8.36  ifeq ($(qemu),y)
    8.37 -OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a $(QEMUDIR)/i386-dm-stubdom/libqemu.a
    8.38 +APP_OBJS += $(QEMUDIR)/i386-dm-stubdom/qemu.a $(QEMUDIR)/i386-dm-stubdom/libqemu.a
    8.39  CFLAGS += -DCONFIG_QEMU
    8.40  endif
    8.41  
    8.42  ifneq ($(CDIR),)
    8.43 -OBJS += $(CDIR)/main.a
    8.44 -LDLIBS += 
    8.45 +APP_OBJS += $(CDIR)/main.a
    8.46 +APP_LDLIBS += 
    8.47  endif
    8.48  
    8.49  ifeq ($(libc),y)
    8.50  LDLIBS += -L$(XEN_ROOT)/stubdom/libxc -lxenctrl -lxenguest
    8.51 -LDLIBS += -lpci
    8.52 -LDLIBS += -lz
    8.53 +APP_LDLIBS += -lpci
    8.54 +APP_LDLIBS += -lz
    8.55  LDLIBS += -lc
    8.56  endif
    8.57  
    8.58 @@ -104,8 +106,11 @@ ifneq ($(caml)-$(qemu)-$(CDIR)-$(lwip),-
    8.59  OBJS := $(filter-out daytime.o, $(OBJS))
    8.60  endif
    8.61  
    8.62 -$(TARGET): links $(OBJS) arch_lib
    8.63 -	$(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o
    8.64 +app.o: $(APP_OBJS) app.lds
    8.65 +	$(LD) -r -d $(LDFLAGS) $^ $(APP_LDLIBS) --undefined main -o $@
    8.66 +
    8.67 +$(TARGET): links $(OBJS) app.o arch_lib
    8.68 +	$(LD) -r $(LDFLAGS) $(HEAD_OBJ) app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o
    8.69  	$(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o
    8.70  	$(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@
    8.71  	gzip -f -9 -c $@ >$@.gz
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/extras/mini-os/app.lds	Thu Apr 24 14:08:29 2008 -0600
     9.3 @@ -0,0 +1,11 @@
     9.4 +SECTIONS
     9.5 +{
     9.6 +        .app.bss : {
     9.7 +                __app_bss_start = . ;
     9.8 +                *(.bss .bss.*)
     9.9 +                *(COMMON)
    9.10 +                *(.lbss .lbss.*)
    9.11 +                *(LARGE_COMMON)
    9.12 +                __app_bss_end = . ;
    9.13 +        }
    9.14 +}
    10.1 --- a/extras/mini-os/arch/ia64/minios-ia64.lds	Thu Apr 24 14:02:16 2008 -0600
    10.2 +++ b/extras/mini-os/arch/ia64/minios-ia64.lds	Thu Apr 24 14:08:29 2008 -0600
    10.3 @@ -59,7 +59,10 @@ SECTIONS
    10.4    { *(.IA_64.unwind) }
    10.5  
    10.6    .bss : AT(ADDR(.bss) - (((5<<(61))+0x100000000) - (1 << 20)))
    10.7 -  { *(.bss) }
    10.8 +  {
    10.9 +    *(.bss)
   10.10 +    *(.app.bss)
   10.11 +  }
   10.12  
   10.13    _end = .;
   10.14  
    11.1 --- a/extras/mini-os/arch/x86/minios-x86_32.lds	Thu Apr 24 14:02:16 2008 -0600
    11.2 +++ b/extras/mini-os/arch/x86/minios-x86_32.lds	Thu Apr 24 14:08:29 2008 -0600
    11.3 @@ -38,6 +38,7 @@ SECTIONS
    11.4    __bss_start = .;		/* BSS */
    11.5    .bss : {
    11.6  	*(.bss)
    11.7 +        *(.app.bss)
    11.8  	}
    11.9    _end = . ;
   11.10  
    12.1 --- a/extras/mini-os/arch/x86/minios-x86_64.lds	Thu Apr 24 14:02:16 2008 -0600
    12.2 +++ b/extras/mini-os/arch/x86/minios-x86_64.lds	Thu Apr 24 14:08:29 2008 -0600
    12.3 @@ -38,6 +38,7 @@ SECTIONS
    12.4    __bss_start = .;		/* BSS */
    12.5    .bss : {
    12.6  	*(.bss)
    12.7 +        *(.app.bss)
    12.8  	}
    12.9    _end = . ;
   12.10  
    13.1 --- a/extras/mini-os/arch/x86/mm.c	Thu Apr 24 14:02:16 2008 -0600
    13.2 +++ b/extras/mini-os/arch/x86/mm.c	Thu Apr 24 14:08:29 2008 -0600
    13.3 @@ -556,7 +556,6 @@ void *map_frames_ex(unsigned long *f, un
    13.4  
    13.5  static void clear_bootstrap(void)
    13.6  {
    13.7 -    struct xen_memory_reservation reservation;
    13.8      xen_pfn_t mfns[] = { virt_to_mfn(&shared_info) };
    13.9      int n = sizeof(mfns)/sizeof(*mfns);
   13.10      pte_t nullpte = { };
   13.11 @@ -567,11 +566,7 @@ static void clear_bootstrap(void)
   13.12      if (HYPERVISOR_update_va_mapping((unsigned long) &_text, nullpte, UVMF_INVLPG))
   13.13  	printk("Unable to unmap first page\n");
   13.14  
   13.15 -    set_xen_guest_handle(reservation.extent_start, mfns);
   13.16 -    reservation.nr_extents = n;
   13.17 -    reservation.extent_order = 0;
   13.18 -    reservation.domid = DOMID_SELF;
   13.19 -    if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != n)
   13.20 +    if (free_physical_pages(mfns, n) != n)
   13.21  	printk("Unable to free bootstrap pages\n");
   13.22  }
   13.23  
    14.1 --- a/extras/mini-os/fbfront.c	Thu Apr 24 14:02:16 2008 -0600
    14.2 +++ b/extras/mini-os/fbfront.c	Thu Apr 24 14:08:29 2008 -0600
    14.3 @@ -243,12 +243,12 @@ struct fbfront_dev {
    14.4      char *backend;
    14.5      int request_update;
    14.6  
    14.7 -    char *data;
    14.8      int width;
    14.9      int height;
   14.10      int depth;
   14.11 -    int line_length;
   14.12 +    int stride;
   14.13      int mem_length;
   14.14 +    int offset;
   14.15  };
   14.16  
   14.17  void fbfront_handler(evtchn_port_t port, struct pt_regs *regs, void *data)
   14.18 @@ -256,7 +256,7 @@ void fbfront_handler(evtchn_port_t port,
   14.19      wake_up(&fbfront_queue);
   14.20  }
   14.21  
   14.22 -struct fbfront_dev *init_fbfront(char *nodename, void *data, int width, int height, int depth, int line_length, int mem_length)
   14.23 +struct fbfront_dev *init_fbfront(char *nodename, unsigned long *mfns, int width, int height, int depth, int stride, int n)
   14.24  {
   14.25      xenbus_transaction_t xbt;
   14.26      char* err;
   14.27 @@ -289,24 +289,17 @@ struct fbfront_dev *init_fbfront(char *n
   14.28      dev->width = s->width = width;
   14.29      dev->height = s->height = height;
   14.30      dev->depth = s->depth = depth;
   14.31 -    dev->line_length = s->line_length = line_length;
   14.32 -    dev->mem_length = s->mem_length = mem_length;
   14.33 -
   14.34 -    ASSERT(!((unsigned long)data & ~PAGE_MASK));
   14.35 -    dev->data = data;
   14.36 +    dev->stride = s->line_length = stride;
   14.37 +    dev->mem_length = s->mem_length = n * PAGE_SIZE;
   14.38 +    dev->offset = 0;
   14.39  
   14.40      const int max_pd = sizeof(s->pd) / sizeof(s->pd[0]);
   14.41      unsigned long mapped = 0;
   14.42  
   14.43 -    for (i = 0; mapped < mem_length && i < max_pd; i++) {
   14.44 +    for (i = 0; mapped < n && i < max_pd; i++) {
   14.45          unsigned long *pd = (unsigned long *) alloc_page();
   14.46 -        for (j = 0; mapped < mem_length && j < PAGE_SIZE / sizeof(unsigned long); j++) {
   14.47 -            /* Trigger CoW */
   14.48 -            * ((char *)data + mapped) = 0;
   14.49 -            barrier();
   14.50 -            pd[j] = virtual_to_mfn((unsigned long) data + mapped);
   14.51 -            mapped += PAGE_SIZE;
   14.52 -        }
   14.53 +        for (j = 0; mapped < n && j < PAGE_SIZE / sizeof(unsigned long); j++)
   14.54 +            pd[j] = mfns[mapped++];
   14.55          for ( ; j < PAGE_SIZE / sizeof(unsigned long); j++)
   14.56              pd[j] = 0;
   14.57          s->pd[i] = virt_to_mfn(pd);
   14.58 @@ -395,12 +388,29 @@ done:
   14.59      return dev;
   14.60  }
   14.61  
   14.62 -void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int height)
   14.63 +static void fbfront_out_event(struct fbfront_dev *dev, union xenfb_out_event *event)
   14.64  {
   14.65      struct xenfb_page *page = dev->page;
   14.66      uint32_t prod;
   14.67      DEFINE_WAIT(w);
   14.68  
   14.69 +    add_waiter(w, fbfront_queue);
   14.70 +    while (page->out_prod - page->out_cons == XENFB_OUT_RING_LEN)
   14.71 +        schedule();
   14.72 +    remove_waiter(w);
   14.73 +
   14.74 +    prod = page->out_prod;
   14.75 +    mb(); /* ensure ring space available */
   14.76 +    XENFB_OUT_RING_REF(page, prod) = *event;
   14.77 +    wmb(); /* ensure ring contents visible */
   14.78 +    page->out_prod = prod + 1;
   14.79 +    notify_remote_via_evtchn(dev->evtchn);
   14.80 +}
   14.81 +
   14.82 +void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int height)
   14.83 +{
   14.84 +    struct xenfb_update update;
   14.85 +
   14.86      if (dev->request_update <= 0)
   14.87          return;
   14.88  
   14.89 @@ -421,21 +431,25 @@ void fbfront_update(struct fbfront_dev *
   14.90      if (width <= 0 || height <= 0)
   14.91          return;
   14.92  
   14.93 -    add_waiter(w, fbfront_queue);
   14.94 -    while (page->out_prod - page->out_cons == XENFB_OUT_RING_LEN)
   14.95 -        schedule();
   14.96 -    remove_waiter(w);
   14.97 +    update.type = XENFB_TYPE_UPDATE;
   14.98 +    update.x = x;
   14.99 +    update.y = y;
  14.100 +    update.width = width;
  14.101 +    update.height = height;
  14.102 +    fbfront_out_event(dev, (union xenfb_out_event *) &update);
  14.103 +}
  14.104  
  14.105 -    prod = page->out_prod;
  14.106 -    mb(); /* ensure ring space available */
  14.107 -    XENFB_OUT_RING_REF(page, prod).type = XENFB_TYPE_UPDATE;
  14.108 -    XENFB_OUT_RING_REF(page, prod).update.x = x;
  14.109 -    XENFB_OUT_RING_REF(page, prod).update.y = y;
  14.110 -    XENFB_OUT_RING_REF(page, prod).update.width = width;
  14.111 -    XENFB_OUT_RING_REF(page, prod).update.height = height;
  14.112 -    wmb(); /* ensure ring contents visible */
  14.113 -    page->out_prod = prod + 1;
  14.114 -    notify_remote_via_evtchn(dev->evtchn);
  14.115 +void fbfront_resize(struct fbfront_dev *dev, int width, int height, int stride, int depth, int offset)
  14.116 +{
  14.117 +    struct xenfb_resize resize;
  14.118 +
  14.119 +    resize.type = XENFB_TYPE_RESIZE;
  14.120 +    dev->width  = resize.width = width;
  14.121 +    dev->height = resize.height = height;
  14.122 +    dev->stride = resize.stride = stride;
  14.123 +    dev->depth  = resize.depth = depth;
  14.124 +    dev->offset = resize.offset = offset;
  14.125 +    fbfront_out_event(dev, (union xenfb_out_event *) &resize);
  14.126  }
  14.127  
  14.128  void shutdown_fbfront(struct fbfront_dev *dev)
    15.1 --- a/extras/mini-os/hypervisor.c	Thu Apr 24 14:02:16 2008 -0600
    15.2 +++ b/extras/mini-os/hypervisor.c	Thu Apr 24 14:08:29 2008 -0600
    15.3 @@ -66,6 +66,21 @@ void do_hypervisor_callback(struct pt_re
    15.4      in_callback = 0;
    15.5  }
    15.6  
    15.7 +void force_evtchn_callback(void)
    15.8 +{
    15.9 +    vcpu_info_t *vcpu;
   15.10 +    vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];
   15.11 +    int save = vcpu->evtchn_upcall_mask;
   15.12 +
   15.13 +    while (vcpu->evtchn_upcall_pending) {
   15.14 +        vcpu->evtchn_upcall_mask = 1;
   15.15 +        barrier();
   15.16 +        do_hypervisor_callback(NULL);
   15.17 +        barrier();
   15.18 +        vcpu->evtchn_upcall_mask = save;
   15.19 +        barrier();
   15.20 +    };
   15.21 +}
   15.22  
   15.23  inline void mask_evtchn(u32 port)
   15.24  {
    16.1 --- a/extras/mini-os/include/fbfront.h	Thu Apr 24 14:02:16 2008 -0600
    16.2 +++ b/extras/mini-os/include/fbfront.h	Thu Apr 24 14:08:29 2008 -0600
    16.3 @@ -31,11 +31,12 @@ extern struct wait_queue_head kbdfront_q
    16.4  void shutdown_kbdfront(struct kbdfront_dev *dev);
    16.5  
    16.6  
    16.7 -struct fbfront_dev *init_fbfront(char *nodename, void *data, int width, int height, int depth, int line_length, int mem_length);
    16.8 +struct fbfront_dev *init_fbfront(char *nodename, unsigned long *mfns, int width, int height, int depth, int stride, int n);
    16.9  #ifdef HAVE_LIBC
   16.10  int fbfront_open(struct fbfront_dev *dev);
   16.11  #endif
   16.12  
   16.13  void fbfront_update(struct fbfront_dev *dev, int x, int y, int width, int height);
   16.14 +void fbfront_resize(struct fbfront_dev *dev, int width, int height, int stride, int depth, int offset);
   16.15  
   16.16  void shutdown_fbfront(struct fbfront_dev *dev);
    17.1 --- a/extras/mini-os/include/hypervisor.h	Thu Apr 24 14:02:16 2008 -0600
    17.2 +++ b/extras/mini-os/include/hypervisor.h	Thu Apr 24 14:08:29 2008 -0600
    17.3 @@ -24,6 +24,7 @@
    17.4  #else
    17.5  #error "Unsupported architecture"
    17.6  #endif
    17.7 +#include <traps.h>
    17.8  
    17.9  /*
   17.10   * a placeholder for the start of day information passed up from the hypervisor
   17.11 @@ -37,7 +38,8 @@ extern union start_info_union start_info
   17.12  #define start_info (start_info_union.start_info)
   17.13  
   17.14  /* hypervisor.c */
   17.15 -//void do_hypervisor_callback(struct pt_regs *regs);
   17.16 +void force_evtchn_callback(void);
   17.17 +void do_hypervisor_callback(struct pt_regs *regs);
   17.18  void mask_evtchn(u32 port);
   17.19  void unmask_evtchn(u32 port);
   17.20  void clear_evtchn(u32 port);
    18.1 --- a/extras/mini-os/include/ia64/arch_mm.h	Thu Apr 24 14:02:16 2008 -0600
    18.2 +++ b/extras/mini-os/include/ia64/arch_mm.h	Thu Apr 24 14:08:29 2008 -0600
    18.3 @@ -38,6 +38,6 @@
    18.4  #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, 0)
    18.5  /* TODO */
    18.6  #define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, 0, 0)
    18.7 -#define do_map_zero(start, n) ((void)0)
    18.8 +#define do_map_zero(start, n) ASSERT(n == 0)
    18.9  
   18.10  #endif /* __ARCH_MM_H__ */
    19.1 --- a/extras/mini-os/include/ia64/os.h	Thu Apr 24 14:02:16 2008 -0600
    19.2 +++ b/extras/mini-os/include/ia64/os.h	Thu Apr 24 14:08:29 2008 -0600
    19.3 @@ -189,17 +189,6 @@ static inline unsigned long
    19.4  	return ia64_cmpxchg_acq_64(ptr, old, new);
    19.5  }
    19.6  
    19.7 -/*
    19.8 - * Force a proper event-channel callback from Xen after clearing the
    19.9 - * callback mask. We do this in a very simple manner, by making a call
   19.10 - * down into Xen. The pending flag will be checked by Xen on return.
   19.11 - */
   19.12 -static inline void
   19.13 -force_evtchn_callback(void)
   19.14 -{
   19.15 -	(void)HYPERVISOR_xen_version(0, NULL);
   19.16 -}
   19.17 -
   19.18  extern shared_info_t *HYPERVISOR_shared_info;
   19.19  
   19.20  static inline int
    20.1 --- a/extras/mini-os/include/lib.h	Thu Apr 24 14:02:16 2008 -0600
    20.2 +++ b/extras/mini-os/include/lib.h	Thu Apr 24 14:08:29 2008 -0600
    20.3 @@ -187,6 +187,7 @@ extern struct file {
    20.4  int alloc_fd(enum fd_type type);
    20.5  void close_all_files(void);
    20.6  extern struct thread *main_thread;
    20.7 +void sparse(unsigned long data, size_t size);
    20.8  #endif
    20.9  
   20.10  #endif /* _LIB_H_ */
    21.1 --- a/extras/mini-os/include/mm.h	Thu Apr 24 14:02:16 2008 -0600
    21.2 +++ b/extras/mini-os/include/mm.h	Thu Apr 24 14:08:29 2008 -0600
    21.3 @@ -70,4 +70,6 @@ void *map_frames_ex(unsigned long *f, un
    21.4  extern unsigned long heap, brk, heap_mapped, heap_end;
    21.5  #endif
    21.6  
    21.7 +int free_physical_pages(xen_pfn_t *mfns, int n);
    21.8 +
    21.9  #endif /* _MM_H_ */
    22.1 --- a/extras/mini-os/include/x86/os.h	Thu Apr 24 14:02:16 2008 -0600
    22.2 +++ b/extras/mini-os/include/x86/os.h	Thu Apr 24 14:08:29 2008 -0600
    22.3 @@ -28,7 +28,6 @@ extern void do_exit(void) __attribute__(
    22.4  #include <xen/xen.h>
    22.5  
    22.6  
    22.7 -#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0, 0))
    22.8  
    22.9  #define __KERNEL_CS  FLAT_KERNEL_CS
   22.10  #define __KERNEL_DS  FLAT_KERNEL_DS
    23.1 --- a/extras/mini-os/kernel.c	Thu Apr 24 14:02:16 2008 -0600
    23.2 +++ b/extras/mini-os/kernel.c	Thu Apr 24 14:08:29 2008 -0600
    23.3 @@ -297,9 +297,20 @@ static void fbfront_thread(void *p)
    23.4  {
    23.5      size_t line_length = WIDTH * (DEPTH / 8);
    23.6      size_t memsize = HEIGHT * line_length;
    23.7 +    unsigned long *mfns;
    23.8 +    int i, n = (memsize + PAGE_SIZE-1) / PAGE_SIZE;
    23.9  
   23.10 +    memsize = n * PAGE_SIZE;
   23.11      fb = _xmalloc(memsize, PAGE_SIZE);
   23.12 -    fb_dev = init_fbfront(NULL, fb, WIDTH, HEIGHT, DEPTH, line_length, memsize);
   23.13 +    mfns = xmalloc_array(unsigned long, n);
   23.14 +    for (i = 0; i < n; i++) {
   23.15 +        /* trigger CoW */
   23.16 +        ((char *) fb) [i * PAGE_SIZE] = 0;
   23.17 +        barrier();
   23.18 +        mfns[i] = virtual_to_mfn((char *) fb + i * PAGE_SIZE);
   23.19 +    }
   23.20 +    fb_dev = init_fbfront(NULL, mfns, WIDTH, HEIGHT, DEPTH, line_length, n);
   23.21 +    xfree(mfns);
   23.22      if (!fb_dev) {
   23.23          xfree(fb);
   23.24          return;
    24.1 --- a/extras/mini-os/lib/sys.c	Thu Apr 24 14:02:16 2008 -0600
    24.2 +++ b/extras/mini-os/lib/sys.c	Thu Apr 24 14:08:29 2008 -0600
    24.3 @@ -1108,6 +1108,41 @@ int munmap(void *start, size_t length)
    24.4      return 0;
    24.5  }
    24.6  
    24.7 +void sparse(unsigned long data, size_t size)
    24.8 +{
    24.9 +    unsigned long newdata;
   24.10 +    xen_pfn_t *mfns;
   24.11 +    int i, n;
   24.12 +
   24.13 +    newdata = (data + PAGE_SIZE - 1) & PAGE_MASK;
   24.14 +    if (newdata - data > size)
   24.15 +        return;
   24.16 +    size -= newdata - data;
   24.17 +    data = newdata;
   24.18 +    n = size / PAGE_SIZE;
   24.19 +    size = n * PAGE_SIZE;
   24.20 +
   24.21 +    mfns = malloc(n * sizeof(*mfns));
   24.22 +    for (i = 0; i < n; i++) {
   24.23 +#ifdef LIBC_DEBUG
   24.24 +        int j;
   24.25 +        for (j=0; j<PAGE_SIZE; j++)
   24.26 +            if (((char*)data + i * PAGE_SIZE)[j]) {
   24.27 +                printk("%lx is not zero!\n", data + i * PAGE_SIZE + j);
   24.28 +                exit(1);
   24.29 +            }
   24.30 +#endif
   24.31 +        mfns[i] = virtual_to_mfn(data + i * PAGE_SIZE);
   24.32 +    }
   24.33 +
   24.34 +    printk("sparsing %ldMB at %lx\n", size >> 20, data);
   24.35 +
   24.36 +    munmap((void *) data, size);
   24.37 +    free_physical_pages(mfns, n);
   24.38 +    do_map_zero(data, n);
   24.39 +}
   24.40 +
   24.41 +
   24.42  /* Not supported by FS yet.  */
   24.43  unsupported_function_crash(link);
   24.44  unsupported_function(int, readlink, -1);
    25.1 --- a/extras/mini-os/main.c	Thu Apr 24 14:02:16 2008 -0600
    25.2 +++ b/extras/mini-os/main.c	Thu Apr 24 14:08:29 2008 -0600
    25.3 @@ -39,6 +39,7 @@ void _fini(void)
    25.4  {
    25.5  }
    25.6  
    25.7 +extern char __app_bss_start, __app_bss_end;
    25.8  static void call_main(void *p)
    25.9  {
   25.10      char *args, /**path,*/ *msg, *c;
   25.11 @@ -56,6 +57,7 @@ static void call_main(void *p)
   25.12       * crashing. */
   25.13      //sleep(1);
   25.14  
   25.15 +    sparse((unsigned long) &__app_bss_start, &__app_bss_end - &__app_bss_start);
   25.16      start_networking();
   25.17      init_fs_frontend();
   25.18  
    26.1 --- a/extras/mini-os/mm.c	Thu Apr 24 14:02:16 2008 -0600
    26.2 +++ b/extras/mini-os/mm.c	Thu Apr 24 14:08:29 2008 -0600
    26.3 @@ -36,6 +36,7 @@
    26.4  
    26.5  #include <os.h>
    26.6  #include <hypervisor.h>
    26.7 +#include <xen/memory.h>
    26.8  #include <mm.h>
    26.9  #include <types.h>
   26.10  #include <lib.h>
   26.11 @@ -360,6 +361,17 @@ void free_pages(void *pointer, int order
   26.12     
   26.13  }
   26.14  
   26.15 +int free_physical_pages(xen_pfn_t *mfns, int n)
   26.16 +{
   26.17 +    struct xen_memory_reservation reservation;
   26.18 +
   26.19 +    set_xen_guest_handle(reservation.extent_start, mfns);
   26.20 +    reservation.nr_extents = n;
   26.21 +    reservation.extent_order = 0;
   26.22 +    reservation.domid = DOMID_SELF;
   26.23 +    return HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
   26.24 +}
   26.25 +
   26.26  #ifdef HAVE_LIBC
   26.27  void *sbrk(ptrdiff_t increment)
   26.28  {
    27.1 --- a/extras/mini-os/sched.c	Thu Apr 24 14:02:16 2008 -0600
    27.2 +++ b/extras/mini-os/sched.c	Thu Apr 24 14:08:29 2008 -0600
    27.3 @@ -70,62 +70,15 @@ void inline print_runqueue(void)
    27.4      printk("\n");
    27.5  }
    27.6  
    27.7 -/* Find the time when the next timeout expires. If this is more than
    27.8 -   10 seconds from now, return 10 seconds from now. */
    27.9 -static s_time_t blocking_time(void)
   27.10 -{
   27.11 -    struct thread *thread;
   27.12 -    struct list_head *iterator;
   27.13 -    s_time_t min_wakeup_time;
   27.14 -    unsigned long flags;
   27.15 -    local_irq_save(flags);
   27.16 -    /* default-block the domain for 10 seconds: */
   27.17 -    min_wakeup_time = NOW() + SECONDS(10);
   27.18 -
   27.19 -    /* Thread list needs to be protected */
   27.20 -    list_for_each(iterator, &idle_thread->thread_list)
   27.21 -    {
   27.22 -        thread = list_entry(iterator, struct thread, thread_list);
   27.23 -        if(!is_runnable(thread) && thread->wakeup_time != 0LL)
   27.24 -        {
   27.25 -            if(thread->wakeup_time < min_wakeup_time)
   27.26 -            {
   27.27 -                min_wakeup_time = thread->wakeup_time;
   27.28 -            }
   27.29 -        }
   27.30 -    }
   27.31 -    local_irq_restore(flags);
   27.32 -    return(min_wakeup_time);
   27.33 -}
   27.34 -
   27.35 -/* Wake up all threads with expired timeouts. */
   27.36 -static void wake_expired(void)
   27.37 -{
   27.38 -    struct thread *thread;
   27.39 -    struct list_head *iterator;
   27.40 -    s_time_t now = NOW();
   27.41 -    unsigned long flags;
   27.42 -    local_irq_save(flags);
   27.43 -    /* Thread list needs to be protected */
   27.44 -    list_for_each(iterator, &idle_thread->thread_list)
   27.45 -    {
   27.46 -        thread = list_entry(iterator, struct thread, thread_list);
   27.47 -        if(!is_runnable(thread) && thread->wakeup_time != 0LL)
   27.48 -        {
   27.49 -            if(thread->wakeup_time <= now)
   27.50 -                wake(thread);
   27.51 -        }
   27.52 -    }
   27.53 -    local_irq_restore(flags);
   27.54 -}
   27.55 -
   27.56  void schedule(void)
   27.57  {
   27.58      struct thread *prev, *next, *thread;
   27.59      struct list_head *iterator;
   27.60      unsigned long flags;
   27.61 +
   27.62      prev = current;
   27.63      local_irq_save(flags); 
   27.64 +
   27.65      if (in_callback) {
   27.66          printk("Must not call schedule() from a callback\n");
   27.67          BUG();
   27.68 @@ -134,6 +87,45 @@ void schedule(void)
   27.69          printk("Must not call schedule() with IRQs disabled\n");
   27.70          BUG();
   27.71      }
   27.72 +
   27.73 +    do {
   27.74 +        /* Examine all threads.
   27.75 +           Find a runnable thread, but also wake up expired ones and find the
   27.76 +           time when the next timeout expires, else use 10 seconds. */
   27.77 +        s_time_t now = NOW();
   27.78 +        s_time_t min_wakeup_time = now + SECONDS(10);
   27.79 +        next = NULL;   
   27.80 +        list_for_each(iterator, &idle_thread->thread_list)
   27.81 +        {
   27.82 +            thread = list_entry(iterator, struct thread, thread_list);
   27.83 +            if (!is_runnable(thread) && thread->wakeup_time != 0LL)
   27.84 +            {
   27.85 +                if (thread->wakeup_time <= now)
   27.86 +                    wake(thread);
   27.87 +                else if (thread->wakeup_time < min_wakeup_time)
   27.88 +                    min_wakeup_time = thread->wakeup_time;
   27.89 +            }
   27.90 +            if(is_runnable(thread)) 
   27.91 +            {
   27.92 +                next = thread;
   27.93 +                /* Put this thread on the end of the list */
   27.94 +                list_del(&thread->thread_list);
   27.95 +                list_add_tail(&thread->thread_list, &idle_thread->thread_list);
   27.96 +                break;
   27.97 +            }
   27.98 +        }
   27.99 +        if (next)
  27.100 +            break;
  27.101 +        /* block until the next timeout expires, or for 10 secs, whichever comes first */
  27.102 +        block_domain(min_wakeup_time);
  27.103 +        /* handle pending events if any */
  27.104 +        force_evtchn_callback();
  27.105 +    } while(1);
  27.106 +    local_irq_restore(flags);
  27.107 +    /* Interrupting the switch is equivalent to having the next thread
  27.108 +       inturrupted at the return instruction. And therefore at safe point. */
  27.109 +    if(prev != next) switch_threads(prev, next);
  27.110 +
  27.111      list_for_each(iterator, &exited_threads)
  27.112      {
  27.113          thread = list_entry(iterator, struct thread, thread_list);
  27.114 @@ -144,24 +136,6 @@ void schedule(void)
  27.115              xfree(thread);
  27.116          }
  27.117      }
  27.118 -    next = idle_thread;   
  27.119 -    /* Thread list needs to be protected */
  27.120 -    list_for_each(iterator, &idle_thread->thread_list)
  27.121 -    {
  27.122 -        thread = list_entry(iterator, struct thread, thread_list);
  27.123 -        if(is_runnable(thread)) 
  27.124 -        {
  27.125 -            next = thread;
  27.126 -            /* Put this thread on the end of the list */
  27.127 -            list_del(&thread->thread_list);
  27.128 -            list_add_tail(&thread->thread_list, &idle_thread->thread_list);
  27.129 -            break;
  27.130 -        }
  27.131 -    }
  27.132 -    local_irq_restore(flags);
  27.133 -    /* Interrupting the switch is equivalent to having the next thread
  27.134 -       inturrupted at the return instruction. And therefore at safe point. */
  27.135 -    if(prev != next) switch_threads(prev, next);
  27.136  }
  27.137  
  27.138  struct thread* create_thread(char *name, void (*function)(void *), void *data)
  27.139 @@ -267,32 +241,10 @@ void wake(struct thread *thread)
  27.140  
  27.141  void idle_thread_fn(void *unused)
  27.142  {
  27.143 -    s_time_t until;
  27.144      threads_started = 1;
  27.145 -    unsigned long flags;
  27.146 -    struct list_head *iterator;
  27.147 -    struct thread *next, *thread;
  27.148 -    for(;;)
  27.149 -    {
  27.150 +    while (1) {
  27.151 +        block(current);
  27.152          schedule();
  27.153 -        next = NULL;
  27.154 -        local_irq_save(flags);
  27.155 -        list_for_each(iterator, &idle_thread->thread_list)
  27.156 -        {
  27.157 -            thread = list_entry(iterator, struct thread, thread_list);
  27.158 -            if(is_runnable(thread)) 
  27.159 -            {
  27.160 -                next = thread;
  27.161 -                break;
  27.162 -            }
  27.163 -        }
  27.164 -        if (!next) {
  27.165 -            /* block until the next timeout expires, or for 10 secs, whichever comes first */
  27.166 -            until = blocking_time();
  27.167 -            block_domain(until);
  27.168 -        }
  27.169 -        local_irq_restore(flags);
  27.170 -        wake_expired();
  27.171      }
  27.172  }
  27.173  
    28.1 --- a/tools/blktap/drivers/blktapctrl.c	Thu Apr 24 14:02:16 2008 -0600
    28.2 +++ b/tools/blktap/drivers/blktapctrl.c	Thu Apr 24 14:08:29 2008 -0600
    28.3 @@ -474,9 +474,8 @@ static int read_msg(int fd, int msgtype,
    28.4  
    28.5  }
    28.6  
    28.7 -int launch_tapdisk(char *wrctldev, char *rdctldev)
    28.8 +static int launch_tapdisk_provider(char **argv)
    28.9  {
   28.10 -	char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL };
   28.11  	pid_t child;
   28.12  	
   28.13  	if ((child = fork()) < 0)
   28.14 @@ -490,7 +489,9 @@ int launch_tapdisk(char *wrctldev, char 
   28.15  			    i != STDERR_FILENO)
   28.16  				close(i);
   28.17  
   28.18 -		execvp("tapdisk", argv);
   28.19 +		execvp(argv[0], argv);
   28.20 +		DPRINTF("execvp failed: %d (%s)\n", errno, strerror(errno));
   28.21 +		DPRINTF("PATH = %s\n", getenv("PATH"));
   28.22  		_exit(1);
   28.23  	} else {
   28.24  		pid_t got;
   28.25 @@ -498,28 +499,78 @@ int launch_tapdisk(char *wrctldev, char 
   28.26  			got = waitpid(child, NULL, 0);
   28.27  		} while (got != child);
   28.28  	}
   28.29 +	return child;
   28.30 +}
   28.31 +
   28.32 +static int launch_tapdisk(char *wrctldev, char *rdctldev)
   28.33 +{
   28.34 +	char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL };
   28.35 +
   28.36 +	if (launch_tapdisk_provider(argv) < 0)
   28.37 +		return -1;
   28.38 +
   28.39  	return 0;
   28.40  }
   28.41  
   28.42 -/* Connect to qemu-dm */
   28.43 -static int connect_qemu(blkif_t *blkif)
   28.44 +static int launch_tapdisk_ioemu(void)
   28.45 +{
   28.46 +	char *argv[] = { "tapdisk-ioemu", NULL };
   28.47 +	return launch_tapdisk_provider(argv);
   28.48 +}
   28.49 +
   28.50 +/* 
   28.51 + * Connect to an ioemu based disk provider (qemu-dm or tapdisk-ioemu)
   28.52 + *
   28.53 + * If the domain has a device model, connect to qemu-dm through the
   28.54 + * domain specific pipe. Otherwise use a single tapdisk-ioemu instance
   28.55 + * which is represented by domid 0 and provides access for Dom0 and
   28.56 + * all DomUs without device model.
   28.57 + */
   28.58 +static int connect_qemu(blkif_t *blkif, int domid)
   28.59  {
   28.60  	char *rdctldev, *wrctldev;
   28.61 +
   28.62 +	static int tapdisk_ioemu_pid = 0;
   28.63 +	static int dom0_readfd = 0;
   28.64 +	static int dom0_writefd = 0;
   28.65  	
   28.66 -	if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", 
   28.67 -			blkif->domid) < 0)
   28.68 +	if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) < 0)
   28.69  		return -1;
   28.70  
   28.71 -	if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", 
   28.72 -			blkif->domid) < 0) {
   28.73 +	if (asprintf(&wrctldev, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) < 0) {
   28.74  		free(rdctldev);
   28.75  		return -1;
   28.76  	}
   28.77  
   28.78  	DPRINTF("Using qemu blktap pipe: %s\n", rdctldev);
   28.79  	
   28.80 -	blkif->fds[READ] = open_ctrl_socket(wrctldev);
   28.81 -	blkif->fds[WRITE] = open_ctrl_socket(rdctldev);
   28.82 +	if (domid == 0) {
   28.83 +		/*
   28.84 +		 * tapdisk-ioemu exits as soon as the last image is 
   28.85 +		 * disconnected. Check if it is still running.
   28.86 +		 */
   28.87 +		if (tapdisk_ioemu_pid == 0 || kill(tapdisk_ioemu_pid, 0)) {
   28.88 +			/* No device model and tapdisk-ioemu doesn't run yet */
   28.89 +			DPRINTF("Launching tapdisk-ioemu\n");
   28.90 +			tapdisk_ioemu_pid = launch_tapdisk_ioemu();
   28.91 +			
   28.92 +			dom0_readfd = open_ctrl_socket(wrctldev);
   28.93 +			dom0_writefd = open_ctrl_socket(rdctldev);
   28.94 +		}
   28.95 +
   28.96 +		DPRINTF("Using tapdisk-ioemu connection\n");
   28.97 +		blkif->fds[READ] = dom0_readfd;
   28.98 +		blkif->fds[WRITE] = dom0_writefd;
   28.99 +	} else if (access(rdctldev, R_OK | W_OK) == 0) {
  28.100 +		/* Use existing pipe to the device model */
  28.101 +		DPRINTF("Using qemu-dm connection\n");
  28.102 +		blkif->fds[READ] = open_ctrl_socket(wrctldev);
  28.103 +		blkif->fds[WRITE] = open_ctrl_socket(rdctldev);
  28.104 +	} else {
  28.105 +		/* No device model => try with tapdisk-ioemu */
  28.106 +		DPRINTF("No device model\n");
  28.107 +		connect_qemu(blkif, 0);
  28.108 +	}
  28.109  	
  28.110  	free(rdctldev);
  28.111  	free(wrctldev);
  28.112 @@ -599,7 +650,7 @@ int blktapctrl_new_blkif(blkif_t *blkif)
  28.113  
  28.114  		if (!exist) {
  28.115  			if (type == DISK_TYPE_IOEMU) {
  28.116 -				if (connect_qemu(blkif))
  28.117 +				if (connect_qemu(blkif, blkif->domid))
  28.118  					goto fail;
  28.119  			} else {
  28.120  				if (connect_tapdisk(blkif, minor))
    29.1 --- a/tools/blktap/drivers/tapdisk.h	Thu Apr 24 14:02:16 2008 -0600
    29.2 +++ b/tools/blktap/drivers/tapdisk.h	Thu Apr 24 14:08:29 2008 -0600
    29.3 @@ -235,7 +235,7 @@ static disk_info_t ioemu_disk = {
    29.4  	DISK_TYPE_IOEMU,
    29.5  	"ioemu disk",
    29.6  	"ioemu",
    29.7 -	0,
    29.8 +	1,
    29.9  #ifdef TAPDISK
   29.10  	NULL
   29.11  #endif
    30.1 --- a/tools/console/daemon/io.c	Thu Apr 24 14:02:16 2008 -0600
    30.2 +++ b/tools/console/daemon/io.c	Thu Apr 24 14:08:29 2008 -0600
    30.3 @@ -63,6 +63,7 @@ extern int log_hv;
    30.4  extern int log_time_hv;
    30.5  extern int log_time_guest;
    30.6  extern char *log_dir;
    30.7 +extern int discard_overflowed_data;
    30.8  
    30.9  static int log_time_hv_needts = 1;
   30.10  static int log_time_guest_needts = 1;
   30.11 @@ -201,7 +202,7 @@ static void buffer_append(struct domain 
   30.12  			      dom->domid, errno, strerror(errno));
   30.13  	}
   30.14  
   30.15 -	if (buffer->max_capacity &&
   30.16 +	if (discard_overflowed_data && buffer->max_capacity &&
   30.17  	    buffer->size > buffer->max_capacity) {
   30.18  		/* Discard the middle of the data. */
   30.19  
   30.20 @@ -228,6 +229,11 @@ static void buffer_advance(struct buffer
   30.21  	if (buffer->consumed == buffer->size) {
   30.22  		buffer->consumed = 0;
   30.23  		buffer->size = 0;
   30.24 +		if (buffer->max_capacity &&
   30.25 +		    buffer->capacity > buffer->max_capacity) {
   30.26 +			buffer->data = realloc(buffer->data, buffer->max_capacity);
   30.27 +			buffer->capacity = buffer->max_capacity;
   30.28 +		}
   30.29  	}
   30.30  }
   30.31  
   30.32 @@ -1005,9 +1011,13 @@ void handle_io(void)
   30.33  				    d->next_period < next_timeout)
   30.34  					next_timeout = d->next_period;
   30.35  			} else if (d->xce_handle != -1) {
   30.36 -				int evtchn_fd = xc_evtchn_fd(d->xce_handle);
   30.37 -				FD_SET(evtchn_fd, &readfds);
   30.38 -				max_fd = MAX(evtchn_fd, max_fd);
   30.39 +				if (discard_overflowed_data ||
   30.40 +				    !d->buffer.max_capacity ||
   30.41 +				    d->buffer.size < d->buffer.max_capacity) {
   30.42 +					int evtchn_fd = xc_evtchn_fd(d->xce_handle);
   30.43 +					FD_SET(evtchn_fd, &readfds);
   30.44 +					max_fd = MAX(evtchn_fd, max_fd);
   30.45 +				}
   30.46  			}
   30.47  
   30.48  			if (d->master_fd != -1) {
    31.1 --- a/tools/console/daemon/main.c	Thu Apr 24 14:02:16 2008 -0600
    31.2 +++ b/tools/console/daemon/main.c	Thu Apr 24 14:08:29 2008 -0600
    31.3 @@ -38,6 +38,7 @@ int log_hv = 0;
    31.4  int log_time_hv = 0;
    31.5  int log_time_guest = 0;
    31.6  char *log_dir = NULL;
    31.7 +int discard_overflowed_data = 1;
    31.8  
    31.9  static void handle_hup(int sig)
   31.10  {
   31.11 @@ -46,7 +47,7 @@ static void handle_hup(int sig)
   31.12  
   31.13  static void usage(char *name)
   31.14  {
   31.15 -	printf("Usage: %s [-h] [-V] [-v] [-i] [--log=none|guest|hv|all] [--log-dir=DIR] [--pid-file=PATH] [-t, --timestamp=none|guest|hv|all]\n", name);
   31.16 +	printf("Usage: %s [-h] [-V] [-v] [-i] [--log=none|guest|hv|all] [--log-dir=DIR] [--pid-file=PATH] [-t, --timestamp=none|guest|hv|all] [-o, --overflow-data=discard|keep]\n", name);
   31.17  }
   31.18  
   31.19  static void version(char *name)
   31.20 @@ -56,7 +57,7 @@ static void version(char *name)
   31.21  
   31.22  int main(int argc, char **argv)
   31.23  {
   31.24 -	const char *sopts = "hVvit:";
   31.25 +	const char *sopts = "hVvit:o:";
   31.26  	struct option lopts[] = {
   31.27  		{ "help", 0, 0, 'h' },
   31.28  		{ "version", 0, 0, 'V' },
   31.29 @@ -66,6 +67,7 @@ int main(int argc, char **argv)
   31.30  		{ "log-dir", 1, 0, 'r' },
   31.31  		{ "pid-file", 1, 0, 'p' },
   31.32  		{ "timestamp", 1, 0, 't' },
   31.33 +		{ "overflow-data", 1, 0, 'o'},
   31.34  		{ 0 },
   31.35  	};
   31.36  	bool is_interactive = false;
   31.37 @@ -119,6 +121,13 @@ int main(int argc, char **argv)
   31.38  				log_time_hv = 0;
   31.39  			}
   31.40  			break;
   31.41 +		case 'o':
   31.42 +			if (!strcmp(optarg, "keep")) {
   31.43 +				discard_overflowed_data = 0;
   31.44 +			} else if (!strcmp(optarg, "discard")) {
   31.45 +				discard_overflowed_data = 1;
   31.46 +			}
   31.47 +			break;
   31.48  		case '?':
   31.49  			fprintf(stderr,
   31.50  				"Try `%s --help' for more information\n",
    32.1 --- a/tools/examples/blktap	Thu Apr 24 14:02:16 2008 -0600
    32.2 +++ b/tools/examples/blktap	Thu Apr 24 14:08:29 2008 -0600
    32.3 @@ -54,10 +54,6 @@ check_blktap_sharing()
    32.4      echo 'ok'
    32.5  }
    32.6  
    32.7 -FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id")
    32.8 -FRONTEND_UUID=$(xenstore_read "/local/domain/$FRONTEND_ID/vm")
    32.9 -mode=$(xenstore_read "$XENBUS_PATH/mode")
   32.10 -mode=$(canonicalise_mode "$mode")
   32.11  
   32.12  t=$(xenstore_read_default "$XENBUS_PATH/type" 'MISSING')
   32.13  if [ -n "$t" ]
   32.14 @@ -77,15 +73,21 @@ else
   32.15      file="$p"
   32.16  fi
   32.17  
   32.18 -if [ "$mode" != '!' ] 
   32.19 -then
   32.20 -    result=$(check_blktap_sharing "$file" "$mode")
   32.21 -    [ "$result" = 'ok' ] || ebusy "$file already in use by other domain"
   32.22 -fi
   32.23 -
   32.24  if [ "$command" = 'add' ]
   32.25  then
   32.26      [ -e "$file" ] || { fatal $file does not exist; }
   32.27 +
   32.28 +    FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id")
   32.29 +    FRONTEND_UUID=$(xenstore_read "/local/domain/$FRONTEND_ID/vm")
   32.30 +    mode=$(xenstore_read "$XENBUS_PATH/mode")
   32.31 +    mode=$(canonicalise_mode "$mode")
   32.32 +
   32.33 +    if [ "$mode" != '!' ] 
   32.34 +    then
   32.35 +        result=$(check_blktap_sharing "$file" "$mode")
   32.36 +        [ "$result" = 'ok' ] || ebusy "$file already in use by other domain"
   32.37 +    fi
   32.38 +
   32.39      success
   32.40  fi
   32.41  
    33.1 --- a/tools/firmware/hvmloader/Makefile	Thu Apr 24 14:02:16 2008 -0600
    33.2 +++ b/tools/firmware/hvmloader/Makefile	Thu Apr 24 14:08:29 2008 -0600
    33.3 @@ -28,8 +28,9 @@ LOADADDR = 0x100000
    33.4  
    33.5  CFLAGS += $(CFLAGS_include) -I.
    33.6  
    33.7 -SRCS = hvmloader.c mp_tables.c util.c smbios.c 32bitbios_support.c smp.c
    33.8 -OBJS = $(patsubst %.c,%.o,$(SRCS))
    33.9 +SRCS  = hvmloader.c mp_tables.c util.c smbios.c 
   33.10 +SRCS += 32bitbios_support.c smp.c cacheattr.c
   33.11 +OBJS  = $(patsubst %.c,%.o,$(SRCS))
   33.12  
   33.13  .PHONY: all
   33.14  all: hvmloader
    34.1 --- a/tools/firmware/hvmloader/acpi/build.c	Thu Apr 24 14:02:16 2008 -0600
    34.2 +++ b/tools/firmware/hvmloader/acpi/build.c	Thu Apr 24 14:08:29 2008 -0600
    34.3 @@ -84,8 +84,8 @@ static int construct_bios_info_table(uin
    34.4  
    34.5      bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS);
    34.6  
    34.7 -    bios_info->pci_min = 0xf0000000;
    34.8 -    bios_info->pci_len = 0x0c000000;
    34.9 +    bios_info->pci_min = PCI_MEMBASE;
   34.10 +    bios_info->pci_len = PCI_MEMSIZE;
   34.11  
   34.12      return align16(sizeof(*bios_info));
   34.13  }
    35.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.2 +++ b/tools/firmware/hvmloader/cacheattr.c	Thu Apr 24 14:08:29 2008 -0600
    35.3 @@ -0,0 +1,99 @@
    35.4 +/*
    35.5 + * cacheattr.c: MTRR and PAT initialisation.
    35.6 + *
    35.7 + * Copyright (c) 2008, Citrix Systems, Inc.
    35.8 + * 
    35.9 + * Authors:
   35.10 + *    Keir Fraser <keir.fraser@citrix.com>
   35.11 + * 
   35.12 + * This program is free software; you can redistribute it and/or modify it
   35.13 + * under the terms and conditions of the GNU General Public License,
   35.14 + * version 2, as published by the Free Software Foundation.
   35.15 + *
   35.16 + * This program is distributed in the hope it will be useful, but WITHOUT
   35.17 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   35.18 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   35.19 + * more details.
   35.20 + *
   35.21 + * You should have received a copy of the GNU General Public License along with
   35.22 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   35.23 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   35.24 + */
   35.25 +
   35.26 +#include "util.h"
   35.27 +#include "config.h"
   35.28 +
   35.29 +#define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg))
   35.30 +#define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1)
   35.31 +#define MSR_MTRRcap          0x00fe
   35.32 +#define MSR_MTRRfix64K_00000 0x0250
   35.33 +#define MSR_MTRRfix16K_80000 0x0258
   35.34 +#define MSR_MTRRfix16K_A0000 0x0259
   35.35 +#define MSR_MTRRfix4K_C0000  0x0268
   35.36 +#define MSR_MTRRfix4K_C8000  0x0269
   35.37 +#define MSR_MTRRfix4K_D0000  0x026a
   35.38 +#define MSR_MTRRfix4K_D8000  0x026b
   35.39 +#define MSR_MTRRfix4K_E0000  0x026c
   35.40 +#define MSR_MTRRfix4K_E8000  0x026d
   35.41 +#define MSR_MTRRfix4K_F0000  0x026e
   35.42 +#define MSR_MTRRfix4K_F8000  0x026f
   35.43 +#define MSR_PAT              0x0277
   35.44 +#define MSR_MTRRdefType      0x02ff
   35.45 +
   35.46 +void cacheattr_init(void)
   35.47 +{
   35.48 +    uint32_t eax, ebx, ecx, edx;
   35.49 +    uint64_t mtrr_cap, mtrr_def, content, addr_mask;
   35.50 +    unsigned int i, nr_var_ranges, phys_bits = 36;
   35.51 +
   35.52 +    /* Does the CPU support architectural MTRRs? */
   35.53 +    cpuid(0x00000001, &eax, &ebx, &ecx, &edx);
   35.54 +    if ( !(edx & (1u << 12)) )
   35.55 +         return;
   35.56 +
   35.57 +    /* Find the physical address size for this CPU. */
   35.58 +    cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
   35.59 +    if ( eax >= 0x80000008 )
   35.60 +    {
   35.61 +        cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
   35.62 +        phys_bits = (uint8_t)eax;
   35.63 +    }
   35.64 +
   35.65 +    printf("%u-bit phys ... ", phys_bits);
   35.66 +
   35.67 +    addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1);
   35.68 +    mtrr_cap = rdmsr(MSR_MTRRcap);
   35.69 +    mtrr_def = (1u << 11) | 6; /* E, default type WB */
   35.70 +
   35.71 +    /* Fixed-range MTRRs supported? */
   35.72 +    if ( mtrr_cap & (1u << 8) )
   35.73 +    {
   35.74 +        /* 0x00000-0x9ffff: Write Back (WB) */
   35.75 +        content = 0x0606060606060606ull;
   35.76 +        wrmsr(MSR_MTRRfix64K_00000, content);
   35.77 +        wrmsr(MSR_MTRRfix16K_80000, content);
   35.78 +        /* 0xa0000-0xbffff: Write Combining (WC) */
   35.79 +        if ( mtrr_cap & (1u << 10) ) /* WC supported? */
   35.80 +            content = 0x0101010101010101ull;
   35.81 +        wrmsr(MSR_MTRRfix16K_A0000, content);
   35.82 +        /* 0xc0000-0xfffff: Write Back (WB) */
   35.83 +        content = 0x0606060606060606ull;
   35.84 +        for ( i = 0; i < 8; i++ )
   35.85 +            wrmsr(MSR_MTRRfix4K_C0000 + i, content);
   35.86 +        mtrr_def |= 1u << 10; /* FE */
   35.87 +        printf("fixed MTRRs ... ");
   35.88 +    }
   35.89 +
   35.90 +    /* Variable-range MTRRs supported? */
   35.91 +    nr_var_ranges = (uint8_t)mtrr_cap;
   35.92 +    if ( nr_var_ranges != 0 )
   35.93 +    {
   35.94 +        /* A single UC range covering PCI space. */
   35.95 +        wrmsr(MSR_MTRRphysBase(0), PCI_MEMBASE);
   35.96 +        wrmsr(MSR_MTRRphysMask(0),
   35.97 +              ((uint64_t)(int32_t)PCI_MEMBASE & addr_mask) | (1u << 11));
   35.98 +        printf("var MTRRs ... ");
   35.99 +    }
  35.100 +
  35.101 +    wrmsr(MSR_MTRRdefType, mtrr_def);
  35.102 +}
    36.1 --- a/tools/firmware/hvmloader/config.h	Thu Apr 24 14:02:16 2008 -0600
    36.2 +++ b/tools/firmware/hvmloader/config.h	Thu Apr 24 14:08:29 2008 -0600
    36.3 @@ -11,6 +11,9 @@
    36.4  #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
    36.5  #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
    36.6  
    36.7 +#define PCI_MEMBASE         0xf0000000
    36.8 +#define PCI_MEMSIZE         0x0c000000
    36.9 +
   36.10  #define ROMBIOS_SEG            0xF000
   36.11  #define ROMBIOS_BEGIN          0x000F0000
   36.12  #define ROMBIOS_SIZE           0x00010000
    37.1 --- a/tools/firmware/hvmloader/hvmloader.c	Thu Apr 24 14:02:16 2008 -0600
    37.2 +++ b/tools/firmware/hvmloader/hvmloader.c	Thu Apr 24 14:08:29 2008 -0600
    37.3 @@ -96,6 +96,7 @@ asm (
    37.4      "stack:                          \n"
    37.5      "    .skip    0x4000             \n"
    37.6      "stack_top:                      \n"
    37.7 +    "    .text                       \n"
    37.8      );
    37.9  
   37.10  void smp_initialise(void);
   37.11 @@ -158,7 +159,7 @@ static void pci_setup(void)
   37.12      struct resource {
   37.13          uint32_t base, max;
   37.14      } *resource;
   37.15 -    struct resource mem_resource = { 0xf0000000, 0xfc000000 };
   37.16 +    struct resource mem_resource = { PCI_MEMBASE, PCI_MEMBASE + PCI_MEMSIZE };
   37.17      struct resource io_resource  = { 0xc000, 0x10000 };
   37.18  
   37.19      /* Create a list of device BARs in descending order of size. */
    38.1 --- a/tools/firmware/hvmloader/smp.c	Thu Apr 24 14:02:16 2008 -0600
    38.2 +++ b/tools/firmware/hvmloader/smp.c	Thu Apr 24 14:08:29 2008 -0600
    38.3 @@ -66,12 +66,15 @@ asm (
    38.4      "stack:                          \n"
    38.5      "    .skip    0x4000             \n"
    38.6      "stack_top:                      \n"
    38.7 +    "    .text                       \n"
    38.8      );
    38.9  
   38.10 +extern void cacheattr_init(void);
   38.11 +
   38.12  /*static*/ void ap_start(void)
   38.13  {
   38.14      printf(" - CPU%d ... ", ap_cpuid);
   38.15 -
   38.16 +    cacheattr_init();
   38.17      printf("done.\n");
   38.18      wmb();
   38.19      ap_callin = 1;
   38.20 @@ -121,12 +124,10 @@ void smp_initialise(void)
   38.21  {
   38.22      unsigned int i, nr_cpus = get_vcpu_nr();
   38.23  
   38.24 -    if ( nr_cpus <= 1 )
   38.25 -        return;
   38.26 -
   38.27      memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end - ap_boot_start);
   38.28  
   38.29      printf("Multiprocessor initialisation:\n");
   38.30 +    ap_start();
   38.31      for ( i = 1; i < nr_cpus; i++ )
   38.32          boot_cpu(i);
   38.33  }
    39.1 --- a/tools/ioemu/Makefile	Thu Apr 24 14:02:16 2008 -0600
    39.2 +++ b/tools/ioemu/Makefile	Thu Apr 24 14:08:29 2008 -0600
    39.3 @@ -87,7 +87,7 @@ endif
    39.4  
    39.5  install: all $(if $(BUILD_DOCS),install-doc)
    39.6  	mkdir -p "$(DESTDIR)$(bindir)"
    39.7 -	$(INSTALL) -m 755 -s $(TOOLS) "$(DESTDIR)$(prefix)/sbin"
    39.8 +	$(INSTALL) -m 755 $(TOOLS) "$(DESTDIR)$(SBINDIR)"
    39.9  #	mkdir -p "$(DESTDIR)$(datadir)"
   39.10  #	for x in bios.bin vgabios.bin vgabios-cirrus.bin ppc_rom.bin \
   39.11  #		video.x openbios-sparc32 linux_boot.bin pxe-ne2k_pci.bin \
    40.1 --- a/tools/ioemu/hw/cirrus_vga.c	Thu Apr 24 14:02:16 2008 -0600
    40.2 +++ b/tools/ioemu/hw/cirrus_vga.c	Thu Apr 24 14:08:29 2008 -0600
    40.3 @@ -2595,6 +2595,10 @@ static void *set_vram_mapping(unsigned l
    40.4  
    40.5      memset(vram_pointer, 0, nr_extents * TARGET_PAGE_SIZE);
    40.6  
    40.7 +#ifdef CONFIG_STUBDOM
    40.8 +    xenfb_pv_display_start(vram_pointer);
    40.9 +#endif
   40.10 +
   40.11      free(extent_start);
   40.12  
   40.13      return vram_pointer;
    41.1 --- a/tools/ioemu/hw/pci.c	Thu Apr 24 14:02:16 2008 -0600
    41.2 +++ b/tools/ioemu/hw/pci.c	Thu Apr 24 14:08:29 2008 -0600
    41.3 @@ -79,18 +79,30 @@ int pci_bus_num(PCIBus *s)
    41.4  
    41.5  void pci_device_save(PCIDevice *s, QEMUFile *f)
    41.6  {
    41.7 -    qemu_put_be32(f, 1); /* PCI device version */
    41.8 +    uint8_t irq_state = 0;
    41.9 +    int i;
   41.10 +    qemu_put_be32(f, 2); /* PCI device version */
   41.11      qemu_put_buffer(f, s->config, 256);
   41.12 +    for (i = 0; i < 4; i++)
   41.13 +        irq_state |= !!s->irq_state[i] << i;
   41.14 +    qemu_put_buffer(f, &irq_state, 1);
   41.15  }
   41.16  
   41.17  int pci_device_load(PCIDevice *s, QEMUFile *f)
   41.18  {
   41.19      uint32_t version_id;
   41.20      version_id = qemu_get_be32(f);
   41.21 -    if (version_id != 1)
   41.22 +    if (version_id != 1 && version_id != 2)
   41.23          return -EINVAL;
   41.24      qemu_get_buffer(f, s->config, 256);
   41.25      pci_update_mappings(s);
   41.26 +    if (version_id == 2) {
   41.27 +        uint8_t irq_state;
   41.28 +        int i;
   41.29 +        qemu_get_buffer(f, &irq_state, 1);
   41.30 +        for (i = 0; i < 4; i++)
   41.31 +            pci_set_irq(s, i, !!(irq_state >> i));
   41.32 +    }
   41.33      return 0;
   41.34  }
   41.35  
    42.1 --- a/tools/ioemu/hw/vga.c	Thu Apr 24 14:02:16 2008 -0600
    42.2 +++ b/tools/ioemu/hw/vga.c	Thu Apr 24 14:08:29 2008 -0600
    42.3 @@ -2067,8 +2067,8 @@ void vga_common_init(VGAState *s, Displa
    42.4                                   & ~(TARGET_PAGE_SIZE - 1));
    42.5  
    42.6      /* Video RAM must be 128-bit aligned for SSE optimizations later */
    42.7 -    s->vram_alloc = qemu_malloc(vga_ram_size + 15);
    42.8 -    s->vram_ptr = (uint8_t *)((long)(s->vram_alloc + 15) & ~15L);
    42.9 +    /* and page-aligned for PVFB memory sharing */
   42.10 +    s->vram_ptr = s->vram_alloc = qemu_memalign(TARGET_PAGE_SIZE, vga_ram_size);
   42.11  
   42.12      s->vram_offset = vga_ram_offset;
   42.13      s->vram_size = vga_ram_size;
   42.14 @@ -2210,7 +2210,7 @@ void *vga_update_vram(VGAState *s, void 
   42.15      }
   42.16  
   42.17      if (!vga_ram_base) {
   42.18 -        vga_ram_base = qemu_malloc(vga_ram_size + TARGET_PAGE_SIZE + 1);
   42.19 +        vga_ram_base = qemu_memalign(TARGET_PAGE_SIZE, vga_ram_size + TARGET_PAGE_SIZE + 1);
   42.20          if (!vga_ram_base) {
   42.21              fprintf(stderr, "reallocate error\n");
   42.22              return NULL;
    43.1 --- a/tools/ioemu/hw/xen_blktap.c	Thu Apr 24 14:02:16 2008 -0600
    43.2 +++ b/tools/ioemu/hw/xen_blktap.c	Thu Apr 24 14:08:29 2008 -0600
    43.3 @@ -581,17 +581,13 @@ static void handle_blktap_ctrlmsg(void* 
    43.4   */
    43.5  static int open_ctrl_socket(char *devname)
    43.6  {
    43.7 -	int ret;
    43.8  	int ipc_fd;
    43.9  
   43.10  	if (mkdir(BLKTAP_CTRL_DIR, 0755) == 0)
   43.11  		DPRINTF("Created %s directory\n", BLKTAP_CTRL_DIR);
   43.12  
   43.13 -	ret = mkfifo(devname,S_IRWXU|S_IRWXG|S_IRWXO);
   43.14 -	if ( (ret != 0) && (errno != EEXIST) ) {
   43.15 -		DPRINTF("ERROR: pipe failed (%d)\n", errno);
   43.16 +	if (access(devname, R_OK | W_OK))
   43.17  		return -1;
   43.18 -	}
   43.19  
   43.20  	ipc_fd = open(devname,O_RDWR|O_NONBLOCK);
   43.21  
   43.22 @@ -604,42 +600,6 @@ static int open_ctrl_socket(char *devnam
   43.23  }
   43.24  
   43.25  /**
   43.26 - * Unmaps all disks and closes their pipes
   43.27 - */
   43.28 -void shutdown_blktap(void)
   43.29 -{
   43.30 -	fd_list_entry_t *ptr;
   43.31 -	struct td_state *s;
   43.32 -	char *devname;
   43.33 -
   43.34 -	DPRINTF("Shutdown blktap\n");
   43.35 -
   43.36 -	/* Unmap all disks */
   43.37 -	ptr = fd_start;
   43.38 -	while (ptr != NULL) {
   43.39 -		s = ptr->s;
   43.40 -		unmap_disk(s);
   43.41 -		close(ptr->tap_fd);
   43.42 -		ptr = ptr->next;
   43.43 -	}
   43.44 -
   43.45 -	/* Delete control pipes */
   43.46 -	if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) >= 0) {
   43.47 -		DPRINTF("Delete %s\n", devname);
   43.48 -		if (unlink(devname))
   43.49 -			DPRINTF("Could not delete: %s\n", strerror(errno));
   43.50 -		free(devname);
   43.51 -	}
   43.52 -	
   43.53 -	if (asprintf(&devname, BLKTAP_CTRL_DIR "/qemu-write-%d", domid) >= 0) {	
   43.54 -		DPRINTF("Delete %s\n", devname);
   43.55 -		if (unlink(devname))
   43.56 -			DPRINTF("Could not delete: %s\n", strerror(errno));
   43.57 -		free(devname);
   43.58 -	}
   43.59 -}
   43.60 -
   43.61 -/**
   43.62   * Initialize the blktap interface, i.e. open a pair of pipes in /var/run/tap
   43.63   * and register a fd handler.
   43.64   *
   43.65 @@ -679,8 +639,5 @@ int init_blktap(void)
   43.66  	/* Attach a handler to the read pipe (called from qemu main loop) */
   43.67  	qemu_set_fd_handler2(read_fd, NULL, &handle_blktap_ctrlmsg, NULL, NULL);
   43.68  
   43.69 -	/* Register handler to clean up when the domain is destroyed */
   43.70 -	atexit(&shutdown_blktap);
   43.71 -
   43.72  	return 0;
   43.73  }
    44.1 --- a/tools/ioemu/hw/xenfb.c	Thu Apr 24 14:02:16 2008 -0600
    44.2 +++ b/tools/ioemu/hw/xenfb.c	Thu Apr 24 14:08:29 2008 -0600
    44.3 @@ -1235,15 +1235,11 @@ static int xenfb_register_console(struct
    44.4  static struct semaphore kbd_sem = __SEMAPHORE_INITIALIZER(kbd_sem, 0);
    44.5  static struct kbdfront_dev *kbd_dev;
    44.6  static char *kbd_path, *fb_path;
    44.7 +static void *vga_vram, *nonshared_vram;
    44.8 +static DisplayState *xenfb_ds;
    44.9  
   44.10  static unsigned char linux2scancode[KEY_MAX + 1];
   44.11  
   44.12 -#define WIDTH 1024
   44.13 -#define HEIGHT 768
   44.14 -#define DEPTH 32
   44.15 -#define LINESIZE (1280 * (DEPTH / 8))
   44.16 -#define MEMSIZE (LINESIZE * HEIGHT)
   44.17 -
   44.18  int xenfb_connect_vkbd(const char *path)
   44.19  {
   44.20      kbd_path = strdup(path);
   44.21 @@ -1256,33 +1252,73 @@ int xenfb_connect_vfb(const char *path)
   44.22      return 0;
   44.23  }
   44.24  
   44.25 -static void xenfb_pv_update(DisplayState *s, int x, int y, int w, int h)
   44.26 +static void xenfb_pv_update(DisplayState *ds, int x, int y, int w, int h)
   44.27  {
   44.28 -    struct fbfront_dev *fb_dev = s->opaque;
   44.29 +    struct fbfront_dev *fb_dev = ds->opaque;
   44.30 +    if (!fb_dev)
   44.31 +        return;
   44.32      fbfront_update(fb_dev, x, y, w, h);
   44.33  }
   44.34  
   44.35 -static void xenfb_pv_resize(DisplayState *s, int w, int h, int linesize)
   44.36 +static void xenfb_pv_resize(DisplayState *ds, int w, int h, int linesize)
   44.37  {
   44.38 -    struct fbfront_dev *fb_dev = s->opaque;
   44.39 -    fprintf(stderr,"resize to %dx%d required\n", w, h);
   44.40 -    s->width = w;
   44.41 -    s->height = h;
   44.42 -    /* TODO: send resize event if supported */
   44.43 -    memset(s->data, 0, MEMSIZE);
   44.44 -    fbfront_update(fb_dev, 0, 0, WIDTH, HEIGHT);
   44.45 +    struct fbfront_dev *fb_dev = ds->opaque;
   44.46 +    fprintf(stderr,"resize to %dx%d, %d required\n", w, h, linesize);
   44.47 +    ds->width = w;
   44.48 +    ds->height = h;
   44.49 +    if (!linesize)
   44.50 +        ds->shared_buf = 0;
   44.51 +    if (!ds->shared_buf)
   44.52 +        linesize = w * 4;
   44.53 +    ds->linesize = linesize;
   44.54 +    if (!fb_dev)
   44.55 +        return;
   44.56 +    if (ds->shared_buf) {
   44.57 +        ds->data = NULL;
   44.58 +    } else {
   44.59 +        ds->data = nonshared_vram;
   44.60 +        fbfront_resize(fb_dev, w, h, linesize, ds->depth, VGA_RAM_SIZE);
   44.61 +    }
   44.62  }
   44.63  
   44.64  static void xenfb_pv_colourdepth(DisplayState *ds, int depth)
   44.65  {
   44.66 -    /* TODO: send redepth event if supported */
   44.67 +    struct fbfront_dev *fb_dev = ds->opaque;
   44.68      static int lastdepth = -1;
   44.69 +    if (!depth) {
   44.70 +        ds->shared_buf = 0;
   44.71 +        ds->depth = 32;
   44.72 +    } else {
   44.73 +        ds->shared_buf = 1;
   44.74 +        ds->depth = depth;
   44.75 +    }
   44.76      if (depth != lastdepth) {
   44.77          fprintf(stderr,"redepth to %d required\n", depth);
   44.78          lastdepth = depth;
   44.79 +    } else return;
   44.80 +    if (!fb_dev)
   44.81 +        return;
   44.82 +    if (ds->shared_buf) {
   44.83 +        ds->data = NULL;
   44.84 +    } else {
   44.85 +        ds->data = nonshared_vram;
   44.86 +        fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, VGA_RAM_SIZE);
   44.87      }
   44.88 -    /* We can't redepth for now */
   44.89 -    ds->depth = DEPTH;
   44.90 +}
   44.91 +
   44.92 +static void xenfb_pv_setdata(DisplayState *ds, void *pixels)
   44.93 +{
   44.94 +    struct fbfront_dev *fb_dev = ds->opaque;
   44.95 +    int offset = pixels - vga_vram;
   44.96 +    ds->data = pixels;
   44.97 +    if (!fb_dev)
   44.98 +        return;
   44.99 +    fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, offset);
  44.100 +}
  44.101 +
  44.102 +static void xenfb_pv_refresh(DisplayState *ds)
  44.103 +{
  44.104 +    vga_hw_update();
  44.105  }
  44.106  
  44.107  static void xenfb_kbd_handler(void *opaque)
  44.108 @@ -1373,13 +1409,6 @@ static void xenfb_kbd_handler(void *opaq
  44.109      }
  44.110  }
  44.111  
  44.112 -static void xenfb_pv_refresh(DisplayState *ds)
  44.113 -{
  44.114 -    /* always request negociation */
  44.115 -    ds->depth = -1;
  44.116 -    vga_hw_update();
  44.117 -}
  44.118 -
  44.119  static void kbdfront_thread(void *p)
  44.120  {
  44.121      int scancode, keycode;
  44.122 @@ -1399,40 +1428,72 @@ static void kbdfront_thread(void *p)
  44.123  
  44.124  int xenfb_pv_display_init(DisplayState *ds)
  44.125  {
  44.126 -    void *data;
  44.127 -    struct fbfront_dev *fb_dev;
  44.128 -    int kbd_fd;
  44.129 -
  44.130      if (!fb_path || !kbd_path)
  44.131          return -1;
  44.132  
  44.133      create_thread("kbdfront", kbdfront_thread, (void*) kbd_path);
  44.134  
  44.135 -    data = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE);
  44.136 -    fb_dev = init_fbfront(fb_path, data, WIDTH, HEIGHT, DEPTH, LINESIZE, MEMSIZE);
  44.137 +    xenfb_ds = ds;
  44.138 +
  44.139 +    ds->data = nonshared_vram = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE);
  44.140 +    memset(ds->data, 0, VGA_RAM_SIZE);
  44.141 +    ds->depth = 32;
  44.142 +    ds->bgr = 0;
  44.143 +    ds->width = 640;
  44.144 +    ds->height = 400;
  44.145 +    ds->linesize = 640 * 4;
  44.146 +    ds->dpy_update = xenfb_pv_update;
  44.147 +    ds->dpy_resize = xenfb_pv_resize;
  44.148 +    ds->dpy_colourdepth = xenfb_pv_colourdepth;
  44.149 +    ds->dpy_setdata = xenfb_pv_setdata;
  44.150 +    ds->dpy_refresh = xenfb_pv_refresh;
  44.151 +    return 0;
  44.152 +}
  44.153 +
  44.154 +int xenfb_pv_display_start(void *data)
  44.155 +{
  44.156 +    DisplayState *ds = xenfb_ds;
  44.157 +    struct fbfront_dev *fb_dev;
  44.158 +    int kbd_fd;
  44.159 +    int offset = 0;
  44.160 +    unsigned long *mfns;
  44.161 +    int n = VGA_RAM_SIZE / PAGE_SIZE;
  44.162 +    int i;
  44.163 +
  44.164 +    if (!fb_path || !kbd_path)
  44.165 +        return 0;
  44.166 +
  44.167 +    vga_vram = data;
  44.168 +    mfns = malloc(2 * n * sizeof(*mfns));
  44.169 +    for (i = 0; i < n; i++)
  44.170 +        mfns[i] = virtual_to_mfn(vga_vram + i * PAGE_SIZE);
  44.171 +    for (i = 0; i < n; i++)
  44.172 +        mfns[n + i] = virtual_to_mfn(nonshared_vram + i * PAGE_SIZE);
  44.173 +
  44.174 +    fb_dev = init_fbfront(fb_path, mfns, ds->width, ds->height, ds->depth, ds->linesize, 2 * n);
  44.175 +    free(mfns);
  44.176      if (!fb_dev) {
  44.177          fprintf(stderr,"can't open frame buffer\n");
  44.178          exit(1);
  44.179      }
  44.180      free(fb_path);
  44.181  
  44.182 +    if (ds->shared_buf) {
  44.183 +        offset = (void*) ds->data - vga_vram;
  44.184 +    } else {
  44.185 +        offset = VGA_RAM_SIZE;
  44.186 +        ds->data = nonshared_vram;
  44.187 +    }
  44.188 +    if (offset)
  44.189 +        fbfront_resize(fb_dev, ds->width, ds->height, ds->linesize, ds->depth, offset);
  44.190 +
  44.191      down(&kbd_sem);
  44.192      free(kbd_path);
  44.193  
  44.194      kbd_fd = kbdfront_open(kbd_dev);
  44.195      qemu_set_fd_handler(kbd_fd, xenfb_kbd_handler, NULL, ds);
  44.196  
  44.197 -    ds->data = data;
  44.198 -    ds->linesize = LINESIZE;
  44.199 -    ds->depth = DEPTH;
  44.200 -    ds->bgr = 0;
  44.201 -    ds->width = WIDTH;
  44.202 -    ds->height = HEIGHT;
  44.203 -    ds->dpy_update = xenfb_pv_update;
  44.204 -    ds->dpy_resize = xenfb_pv_resize;
  44.205 -    ds->dpy_colourdepth = xenfb_pv_colourdepth;
  44.206 -    ds->dpy_refresh = xenfb_pv_refresh;
  44.207 -    ds->opaque = fb_dev;
  44.208 +    xenfb_ds->opaque = fb_dev;
  44.209      return 0;
  44.210  }
  44.211  #endif
    45.1 --- a/tools/ioemu/tapdisk-ioemu.c	Thu Apr 24 14:02:16 2008 -0600
    45.2 +++ b/tools/ioemu/tapdisk-ioemu.c	Thu Apr 24 14:08:29 2008 -0600
    45.3 @@ -4,6 +4,7 @@
    45.4  #include <string.h>
    45.5  #include <stdint.h>
    45.6  #include <signal.h>
    45.7 +#include <unistd.h>
    45.8  #include <sys/time.h>
    45.9  
   45.10  #include <assert.h>
   45.11 @@ -16,6 +17,8 @@ extern void bdrv_init(void);
   45.12  extern void *qemu_mallocz(size_t size);
   45.13  extern void qemu_free(void *ptr);
   45.14  
   45.15 +extern void *fd_start;
   45.16 +
   45.17  int domid = 0;
   45.18  FILE* logfile;
   45.19  
   45.20 @@ -95,12 +98,17 @@ int main(void)
   45.21      int max_fd;
   45.22      fd_set rfds;
   45.23      struct timeval tv;
   45.24 +    void *old_fd_start = NULL;
   45.25  
   45.26      logfile = stderr;
   45.27      
   45.28      bdrv_init();
   45.29      qemu_aio_init();
   45.30      init_blktap();
   45.31 +
   45.32 +    /* Daemonize */
   45.33 +    if (fork() != 0)
   45.34 +    	exit(0);
   45.35     
   45.36      /* 
   45.37       * Main loop: Pass events to the corrsponding handlers and check for
   45.38 @@ -137,6 +145,12 @@ int main(void)
   45.39              } else 
   45.40                  pioh = &ioh->next;
   45.41          }
   45.42 +
   45.43 +        /* Exit when the last image has been closed */
   45.44 +        if (old_fd_start != NULL && fd_start == NULL)
   45.45 +            exit(0);
   45.46 +
   45.47 +        old_fd_start = fd_start;
   45.48      }
   45.49      return 0;
   45.50  }
    46.1 --- a/tools/ioemu/target-i386-dm/helper2.c	Thu Apr 24 14:02:16 2008 -0600
    46.2 +++ b/tools/ioemu/target-i386-dm/helper2.c	Thu Apr 24 14:08:29 2008 -0600
    46.3 @@ -482,7 +482,7 @@ void cpu_handle_ioreq(void *opaque)
    46.4      CPUState *env = opaque;
    46.5      ioreq_t *req = cpu_get_ioreq();
    46.6  
    46.7 -    handle_buffered_io(env);
    46.8 +    __handle_buffered_iopage(env);
    46.9      if (req) {
   46.10          __handle_ioreq(env, req);
   46.11  
    47.1 --- a/tools/ioemu/vl.c	Thu Apr 24 14:02:16 2008 -0600
    47.2 +++ b/tools/ioemu/vl.c	Thu Apr 24 14:08:29 2008 -0600
    47.3 @@ -140,9 +140,9 @@
    47.4  #define MAX_IOPORTS 65536
    47.5  
    47.6  const char *bios_dir = CONFIG_QEMU_SHAREDIR;
    47.7 -void **ioport_opaque;
    47.8 -IOPortReadFunc *(*ioport_read_table)[MAX_IOPORTS];
    47.9 -IOPortWriteFunc *(*ioport_write_table)[MAX_IOPORTS];
   47.10 +void *ioport_opaque[MAX_IOPORTS];
   47.11 +IOPortReadFunc *ioport_read_table[3][MAX_IOPORTS];
   47.12 +IOPortWriteFunc *ioport_write_table[3][MAX_IOPORTS];
   47.13  /* Note: bs_table[MAX_DISKS] is a dummy block driver if none available
   47.14     to store the VM snapshots */
   47.15  BlockDriverState *bs_table[MAX_DISKS + MAX_SCSI_DISKS + 1], *fd_table[MAX_FD];
   47.16 @@ -281,9 +281,6 @@ void default_ioport_writel(void *opaque,
   47.17  
   47.18  void init_ioports(void)
   47.19  {
   47.20 -    ioport_opaque = calloc(MAX_IOPORTS, sizeof(*ioport_opaque));
   47.21 -    ioport_read_table = calloc(3 * MAX_IOPORTS, sizeof(**ioport_read_table));
   47.22 -    ioport_write_table = calloc(3 * MAX_IOPORTS, sizeof(**ioport_write_table));
   47.23  }
   47.24  
   47.25  /* size is the word size in byte */
   47.26 @@ -6278,12 +6275,6 @@ void qemu_system_powerdown_request(void)
   47.27          cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
   47.28  }
   47.29  
   47.30 -static void qemu_sighup_handler(int signal)
   47.31 -{
   47.32 -    fprintf(stderr, "Received SIGHUP, terminating.\n");
   47.33 -    exit(0);
   47.34 -}
   47.35 -
   47.36  void main_loop_wait(int timeout)
   47.37  {
   47.38      IOHandlerRecord *ioh;
   47.39 @@ -7979,7 +7970,7 @@ int main(int argc, char **argv)
   47.40  
   47.41  #ifndef CONFIG_STUBDOM
   47.42      /* Unblock SIGTERM and SIGHUP, which may have been blocked by the caller */
   47.43 -    signal(SIGHUP, qemu_sighup_handler);
   47.44 +    signal(SIGHUP, SIG_DFL);
   47.45      sigemptyset(&set);
   47.46      sigaddset(&set, SIGTERM);
   47.47      sigaddset(&set, SIGHUP);
    48.1 --- a/tools/ioemu/vl.h	Thu Apr 24 14:02:16 2008 -0600
    48.2 +++ b/tools/ioemu/vl.h	Thu Apr 24 14:08:29 2008 -0600
    48.3 @@ -1545,6 +1545,7 @@ char *xenstore_vm_read(int domid, char *
    48.4  
    48.5  /* xenfb.c */
    48.6  int xenfb_pv_display_init(DisplayState *ds);
    48.7 +int xenfb_pv_display_start(void *vram_start);
    48.8  int xenfb_connect_vkbd(const char *path);
    48.9  int xenfb_connect_vfb(const char *path);
   48.10  
    49.1 --- a/tools/libfsimage/ext2fs/fsys_ext2fs.c	Thu Apr 24 14:02:16 2008 -0600
    49.2 +++ b/tools/libfsimage/ext2fs/fsys_ext2fs.c	Thu Apr 24 14:08:29 2008 -0600
    49.3 @@ -77,7 +77,52 @@ struct ext2_super_block
    49.4      __u32 s_rev_level;		/* Revision level */
    49.5      __u16 s_def_resuid;		/* Default uid for reserved blocks */
    49.6      __u16 s_def_resgid;		/* Default gid for reserved blocks */
    49.7 -    __u32 s_reserved[235];	/* Padding to the end of the block */
    49.8 +    /*
    49.9 +     * These fields are for EXT2_DYNAMIC_REV superblocks only.
   49.10 +     *
   49.11 +     * Note: the difference between the compatible feature set and
   49.12 +     * the incompatible feature set is that if there is a bit set
   49.13 +     * in the incompatible feature set that the kernel doesn't
   49.14 +     * know about, it should refuse to mount the filesystem.
   49.15 +     *
   49.16 +     * e2fsck's requirements are more strict; if it doesn't know
   49.17 +     * about a feature in either the compatible or incompatible
   49.18 +     * feature set, it must abort and not try to meddle with
   49.19 +     * things it doesn't understand...
   49.20 +     */
   49.21 +    __u32 s_first_ino;		/* First non-reserved inode */
   49.22 +    __u16 s_inode_size;		/* size of inode structure */
   49.23 +    __u16 s_block_group_nr;	/* block group # of this superblock */
   49.24 +    __u32 s_feature_compat;	/* compatible feature set */
   49.25 +    __u32 s_feature_incompat;	/* incompatible feature set */
   49.26 +    __u32 s_feature_ro_compat;	/* readonly-compatible feature set */
   49.27 +    __u8  s_uuid[16];		/* 128-bit uuid for volume */
   49.28 +    char  s_volume_name[16];	/* volume name */
   49.29 +    char  s_last_mounted[64];	/* directory where last mounted */
   49.30 +    __u32 s_algorithm_usage_bitmap; /* For compression */
   49.31 +    /*
   49.32 +     * Performance hints.  Directory preallocation should only
   49.33 +     * happen if the EXT2_FEATURE_COMPAT_DIR_PREALLOC flag is on.
   49.34 +     */
   49.35 +    __u8  s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
   49.36 +    __u8  s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
   49.37 +    __u16 s_reserved_gdt_blocks;/* Per group table for online growth */
   49.38 +    /*
   49.39 +     * Journaling support valid if EXT2_FEATURE_COMPAT_HAS_JOURNAL set.
   49.40 +     */
   49.41 +    __u8 s_journal_uuid[16];	/* uuid of journal superblock */
   49.42 +    __u32 s_journal_inum;	/* inode number of journal file */
   49.43 +    __u32 s_journal_dev;	/* device number of journal file */
   49.44 +    __u32 s_last_orphan;	/* start of list of inodes to delete */
   49.45 +    __u32 s_hash_seed[4];	/* HTREE hash seed */
   49.46 +    __u8  s_def_hash_version;	/* Default hash version to use */
   49.47 +    __u8  s_jnl_backup_type; 	/* Default type of journal backup */
   49.48 +    __u16 s_reserved_word_pad;
   49.49 +    __u32 s_default_mount_opts;
   49.50 +    __u32 s_first_meta_bg;	/* First metablock group */
   49.51 +    __u32 s_mkfs_time;		/* When the filesystem was created */
   49.52 +    __u32 s_jnl_blocks[17]; 	/* Backup of the journal inode */
   49.53 +    __u32 s_reserved[172];	/* Padding to the end of the block */
   49.54    };
   49.55  
   49.56  struct ext2_group_desc
   49.57 @@ -216,6 +261,9 @@ struct ext2_dir_entry
   49.58  #define EXT2_ADDR_PER_BLOCK(s)          (EXT2_BLOCK_SIZE(s) / sizeof (__u32))
   49.59  #define EXT2_ADDR_PER_BLOCK_BITS(s)		(log2(EXT2_ADDR_PER_BLOCK(s)))
   49.60  
   49.61 +#define EXT2_INODE_SIZE(s)		(SUPERBLOCK->s_inode_size)
   49.62 +#define EXT2_INODES_PER_BLOCK(s)	(EXT2_BLOCK_SIZE(s)/EXT2_INODE_SIZE(s))
   49.63 +
   49.64  /* linux/ext2_fs.h */
   49.65  #define EXT2_BLOCK_SIZE_BITS(s)        ((s)->s_log_block_size + 10)
   49.66  /* kind of from ext2/super.c */
   49.67 @@ -537,7 +585,7 @@ ext2fs_dir (fsi_file_t *ffi, char *dirna
   49.68        gdp = GROUP_DESC;
   49.69        ino_blk = gdp[desc].bg_inode_table +
   49.70  	(((current_ino - 1) % (SUPERBLOCK->s_inodes_per_group))
   49.71 -	 >> log2 (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode)));
   49.72 +	 >> log2 (EXT2_INODES_PER_BLOCK (SUPERBLOCK)));
   49.73  #ifdef E2DEBUG
   49.74        printf ("inode table fsblock=%d\n", ino_blk);
   49.75  #endif /* E2DEBUG */
   49.76 @@ -549,13 +597,12 @@ ext2fs_dir (fsi_file_t *ffi, char *dirna
   49.77        /* reset indirect blocks! */
   49.78        mapblock2 = mapblock1 = -1;
   49.79  
   49.80 -      raw_inode = INODE +
   49.81 -	((current_ino - 1)
   49.82 -	 & (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode) - 1));
   49.83 +      raw_inode = (struct ext2_inode *)((char *)INODE +
   49.84 +	((current_ino - 1) & (EXT2_INODES_PER_BLOCK (SUPERBLOCK) - 1)) *
   49.85 +	EXT2_INODE_SIZE (SUPERBLOCK));
   49.86  #ifdef E2DEBUG
   49.87        printf ("ipb=%d, sizeof(inode)=%d\n",
   49.88 -	      (EXT2_BLOCK_SIZE (SUPERBLOCK) / sizeof (struct ext2_inode)),
   49.89 -	      sizeof (struct ext2_inode));
   49.90 +	      EXT2_INODES_PER_BLOCK (SUPERBLOCK), EXT2_INODE_SIZE (SUPERBLOCK));
   49.91        printf ("inode=%x, raw_inode=%x\n", INODE, raw_inode);
   49.92        printf ("offset into inode table block=%d\n", (int) raw_inode - (int) INODE);
   49.93        for (i = (unsigned char *) INODE; i <= (unsigned char *) raw_inode;
    50.1 --- a/tools/libxc/xc_hvm_build.c	Thu Apr 24 14:02:16 2008 -0600
    50.2 +++ b/tools/libxc/xc_hvm_build.c	Thu Apr 24 14:08:29 2008 -0600
    50.3 @@ -298,7 +298,7 @@ static int setup_guest(int xc_handle,
    50.4                         _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
    50.5      munmap(ident_pt, PAGE_SIZE);
    50.6      xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
    50.7 -                     special_page_nr + SPECIALPAGE_IDENT_PT);
    50.8 +                     (special_page_nr + SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
    50.9  
   50.10      /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */
   50.11      entry_eip = elf_uval(&elf, elf.ehdr, e_entry);
    51.1 --- a/tools/python/xen/util/acmpolicy.py	Thu Apr 24 14:02:16 2008 -0600
    51.2 +++ b/tools/python/xen/util/acmpolicy.py	Thu Apr 24 14:08:29 2008 -0600
    51.3 @@ -17,6 +17,7 @@
    51.4  #============================================================================
    51.5  
    51.6  import os
    51.7 +import sha
    51.8  import stat
    51.9  import array
   51.10  import struct
   51.11 @@ -35,7 +36,7 @@ ACM_POLICIES_DIR = security.policy_dir_p
   51.12  
   51.13  # Constants needed for generating a binary policy from its XML
   51.14  # representation
   51.15 -ACM_POLICY_VERSION = 3  # Latest one
   51.16 +ACM_POLICY_VERSION = 4  # Latest one
   51.17  ACM_CHWALL_VERSION = 1
   51.18  
   51.19  ACM_STE_VERSION = 1
   51.20 @@ -965,6 +966,10 @@ class ACMPolicy(XSPolicy):
   51.21              return dom.toxml()
   51.22          return None
   51.23  
   51.24 +    def hash(self):
   51.25 +        """ Calculate a SAH1 hash of the XML policy """
   51.26 +        return sha.sha(self.toxml())
   51.27 +
   51.28      def save(self):
   51.29          ### Save the XML policy into a file ###
   51.30          rc = -xsconstants.XSERR_FILE_ERROR
   51.31 @@ -1403,7 +1408,7 @@ class ACMPolicy(XSPolicy):
   51.32              ste_bin += "\x00"
   51.33  
   51.34          #Write binary header:
   51.35 -        headerformat="!iiiiiiiiii"
   51.36 +        headerformat="!iiiiiiiiii20s"
   51.37          totallen_bin = struct.calcsize(headerformat) + \
   51.38                         len(pr_bin) + len(chw_bin) + len(ste_bin)
   51.39          polref_offset = struct.calcsize(headerformat)
   51.40 @@ -1425,7 +1430,8 @@ class ACMPolicy(XSPolicy):
   51.41                                primpoloffset,
   51.42                                secpolcode,
   51.43                                secpoloffset,
   51.44 -                              major, minor)
   51.45 +                              major, minor,
   51.46 +                              self.hash().digest())
   51.47  
   51.48          all_bin = array.array('B')
   51.49          for s in [ hdr_bin, pr_bin, chw_bin, ste_bin ]:
   51.50 @@ -1443,6 +1449,21 @@ class ACMPolicy(XSPolicy):
   51.51              rc = -xsconstants.XSERR_BAD_LABEL
   51.52          return rc, mapfile, all_bin.tostring()
   51.53  
   51.54 +    def validate_enforced_policy_hash(self):
   51.55 +        """ verify that the policy hash embedded in the binary policy
   51.56 +            that is currently enforce matches the one of the XML policy.
   51.57 +        """
   51.58 +        if self.hash().digest() != self.get_enforced_policy_hash():
   51.59 +            raise Exception('Policy hashes do not match')
   51.60 +
   51.61 +    def get_enforced_policy_hash(self):
   51.62 +        binpol = self.get_enforced_binary()
   51.63 +        headerformat="!iiiiiiiiii20s"
   51.64 +        res = struct.unpack(headerformat, binpol[:60])
   51.65 +        if len(res) >= 11:
   51.66 +            return res[10]
   51.67 +        return None
   51.68 +
   51.69      def get_enforced_binary(self):
   51.70          rc, binpol = security.hv_get_policy()
   51.71          if rc != 0:
    52.1 --- a/tools/python/xen/xend/XendDomain.py	Thu Apr 24 14:02:16 2008 -0600
    52.2 +++ b/tools/python/xen/xend/XendDomain.py	Thu Apr 24 14:08:29 2008 -0600
    52.3 @@ -1622,7 +1622,31 @@ class XendDomain:
    52.4                                            vcpu)
    52.5          except Exception, ex:
    52.6              raise XendError(str(ex))
    52.7 - 
    52.8 +
    52.9 +    def domain_reset(self, domid):
   52.10 +        """Terminate domain immediately, and then create domain.
   52.11 +
   52.12 +        @param domid: Domain ID or Name
   52.13 +        @type domid: int or string.
   52.14 +        @rtype: None
   52.15 +        @raise XendError: Failed to destroy or create
   52.16 +        @raise XendInvalidDomain: Domain is not valid
   52.17 +        """
   52.18 +
   52.19 +        dominfo = self.domain_lookup_nr(domid)
   52.20 +        if not dominfo:
   52.21 +            raise XendInvalidDomain(str(domid))
   52.22 +        if dominfo and dominfo.getDomid() == DOM0_ID:
   52.23 +            raise XendError("Cannot reset privileged domain %s" % domid)
   52.24 +        if dominfo._stateGet() not in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
   52.25 +            raise VMBadState("Domain '%s' is not started" % domid,
   52.26 +                             POWER_STATE_NAMES[DOM_STATE_RUNNING],
   52.27 +                             POWER_STATE_NAMES[dominfo._stateGet()])
   52.28 +        try:
   52.29 +            dominfo.resetDomain()
   52.30 +        except Exception, ex:
   52.31 +            raise XendError(str(ex))
   52.32 +
   52.33  
   52.34  def instance():
   52.35      """Singleton constructor. Use this instead of the class constructor.
    53.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Thu Apr 24 14:02:16 2008 -0600
    53.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Thu Apr 24 14:08:29 2008 -0600
    53.3 @@ -1837,6 +1837,9 @@ class XendDomainInfo:
    53.4  
    53.5          @raise: VmError for invalid devices
    53.6          """
    53.7 +        if self.image:
    53.8 +            self.image.prepareEnvironment()
    53.9 +
   53.10          ordered_refs = self.info.ordered_device_refs()
   53.11          for dev_uuid in ordered_refs:
   53.12              devclass, config = self.info['devices'][dev_uuid]
   53.13 @@ -2323,6 +2326,34 @@ class XendDomainInfo:
   53.14          self._cleanup_phantom_devs(paths)
   53.15  
   53.16  
   53.17 +    def resetDomain(self):
   53.18 +        log.debug("XendDomainInfo.resetDomain(%s)", str(self.domid))
   53.19 +
   53.20 +        old_domid = self.domid
   53.21 +        prev_vm_xend = self._listRecursiveVm('xend')
   53.22 +        new_dom_info = self.info
   53.23 +        try:
   53.24 +            self._unwatchVm()
   53.25 +            self.destroy()
   53.26 +
   53.27 +            new_dom = None
   53.28 +            try:
   53.29 +                from xen.xend import XendDomain
   53.30 +                new_dom_info['domid'] = None
   53.31 +                new_dom = XendDomain.instance().domain_create_from_dict(
   53.32 +                    new_dom_info)
   53.33 +                for x in prev_vm_xend[0][1]:
   53.34 +                    new_dom._writeVm('xend/%s' % x[0], x[1])
   53.35 +                new_dom.waitForDevices()
   53.36 +                new_dom.unpause()
   53.37 +            except:
   53.38 +                if new_dom:
   53.39 +                    new_dom.destroy()
   53.40 +                raise
   53.41 +        except:
   53.42 +            log.exception('Failed to reset domain %s.', str(old_domid))
   53.43 +
   53.44 +
   53.45      def resumeDomain(self):
   53.46          log.debug("XendDomainInfo.resumeDomain(%s)", str(self.domid))
   53.47  
    54.1 --- a/tools/python/xen/xend/XendXSPolicyAdmin.py	Thu Apr 24 14:02:16 2008 -0600
    54.2 +++ b/tools/python/xen/xend/XendXSPolicyAdmin.py	Thu Apr 24 14:08:29 2008 -0600
    54.3 @@ -54,6 +54,7 @@ class XSPolicyAdmin:
    54.4          try:
    54.5              self.xsobjs[ref] = ACMPolicy(name=act_pol_name, ref=ref)
    54.6              self.policies[ref] = (act_pol_name, xsconstants.ACM_POLICY_ID)
    54.7 +            self.xsobjs[ref].validate_enforced_policy_hash()
    54.8          except Exception, e:
    54.9              log.error("Could not find XML representation of policy '%s': "
   54.10                        "%s" % (act_pol_name,e))
    55.1 --- a/tools/python/xen/xend/image.py	Thu Apr 24 14:02:16 2008 -0600
    55.2 +++ b/tools/python/xen/xend/image.py	Thu Apr 24 14:08:29 2008 -0600
    55.3 @@ -185,6 +185,42 @@ class ImageHandler:
    55.4          """Build the domain. Define in subclass."""
    55.5          raise NotImplementedError()
    55.6  
    55.7 +    def prepareEnvironment(self):
    55.8 +        """Prepare the environment for the execution of the domain. This
    55.9 +        method is called before any devices are set up."""
   55.10 +        
   55.11 +        domid = self.vm.getDomid()
   55.12 +	
   55.13 +        # Delete left-over pipes
   55.14 +        try:
   55.15 +            os.unlink('/var/run/tap/qemu-read-%d' % domid)
   55.16 +            os.unlink('/var/run/tap/qemu-write-%d' % domid)
   55.17 +        except:
   55.18 +            pass
   55.19 +
   55.20 +        # No device model, don't create pipes
   55.21 +        if self.device_model is None:
   55.22 +            return
   55.23 +
   55.24 +        # If we use a device model, the pipes for communication between
   55.25 +        # blktapctrl and ioemu must be present before the devices are 
   55.26 +        # created (blktapctrl must access them for new block devices)
   55.27 +
   55.28 +        # mkdir throws an exception if the path already exists
   55.29 +        try:
   55.30 +            os.mkdir('/var/run/tap', 0755)
   55.31 +        except:
   55.32 +            pass
   55.33 +
   55.34 +        try:
   55.35 +            os.mkfifo('/var/run/tap/qemu-read-%d' % domid, 0600)
   55.36 +            os.mkfifo('/var/run/tap/qemu-write-%d' % domid, 0600)
   55.37 +        except OSError, e:
   55.38 +            log.warn('Could not create blktap pipes for domain %d' % domid)
   55.39 +            log.exception(e)
   55.40 +            pass
   55.41 +
   55.42 +
   55.43      # Return a list of cmd line args to the device models based on the
   55.44      # xm config file
   55.45      def parseDeviceModelArgs(self, vmConfig):
   55.46 @@ -411,6 +447,12 @@ class ImageHandler:
   55.47              self.pid = None
   55.48              state = xstransact.Remove("/local/domain/0/device-model/%i"
   55.49                                        % self.vm.getDomid())
   55.50 +            
   55.51 +            try:
   55.52 +                os.unlink('/var/run/tap/qemu-read-%d' % self.vm.getDomid())
   55.53 +                os.unlink('/var/run/tap/qemu-write-%d' % self.vm.getDomid())
   55.54 +            except:
   55.55 +                pass
   55.56  
   55.57  
   55.58  class LinuxImageHandler(ImageHandler):
   55.59 @@ -643,7 +685,9 @@ class IA64_HVM_ImageHandler(HVMImageHand
   55.60          # ROM size for guest firmware, io page, xenstore page
   55.61          # buffer io page, buffer pio page and memmap info page
   55.62          extra_pages = 1024 + 5
   55.63 -        return mem_kb + extra_pages * page_kb
   55.64 +        mem_kb += extra_pages * page_kb
   55.65 +        # Add 8 MiB overhead for QEMU's video RAM.
   55.66 +        return mem_kb + 8192
   55.67  
   55.68      def getRequiredInitialReservation(self):
   55.69          return self.vm.getMemoryTarget()
    56.1 --- a/tools/python/xen/xm/main.py	Thu Apr 24 14:02:16 2008 -0600
    56.2 +++ b/tools/python/xen/xm/main.py	Thu Apr 24 14:08:29 2008 -0600
    56.3 @@ -107,6 +107,7 @@ SUBCOMMAND_HELP = {
    56.4                       'Migrate a domain to another machine.'),
    56.5      'pause'       : ('<Domain>', 'Pause execution of a domain.'),
    56.6      'reboot'      : ('<Domain> [-wa]', 'Reboot a domain.'),
    56.7 +    'reset'       : ('<Domain>', 'Reset a domain.'),
    56.8      'restore'     : ('<CheckpointFile> [-p]',
    56.9                       'Restore a domain from a saved state.'),
   56.10      'save'        : ('[-c] <Domain> <CheckpointFile>',
   56.11 @@ -274,6 +275,7 @@ common_commands = [
   56.12      "migrate",
   56.13      "pause",
   56.14      "reboot",
   56.15 +    "reset",
   56.16      "restore",
   56.17      "resume",
   56.18      "save",
   56.19 @@ -303,6 +305,7 @@ domain_commands = [
   56.20      "pause",
   56.21      "reboot",
   56.22      "rename",
   56.23 +    "reset",
   56.24      "restore",
   56.25      "resume",
   56.26      "save",
   56.27 @@ -1248,6 +1251,13 @@ def xm_shutdown(args):
   56.28      from xen.xm import shutdown
   56.29      shutdown.main(["shutdown"] + args)
   56.30  
   56.31 +def xm_reset(args):
   56.32 +    arg_check(args, "reset", 1)
   56.33 +    dom = args[0]
   56.34 +
   56.35 +    # TODO: XenAPI
   56.36 +    server.xend.domain.reset(dom)
   56.37 +
   56.38  def xm_pause(args):
   56.39      arg_check(args, "pause", 1)
   56.40      dom = args[0]
   56.41 @@ -2474,6 +2484,7 @@ commands = {
   56.42      "dump-core": xm_dump_core,
   56.43      "reboot": xm_reboot,
   56.44      "rename": xm_rename,
   56.45 +    "reset": xm_reset,
   56.46      "restore": xm_restore,
   56.47      "resume": xm_resume,
   56.48      "save": xm_save,
    57.1 --- a/tools/tests/test_x86_emulator.c	Thu Apr 24 14:02:16 2008 -0600
    57.2 +++ b/tools/tests/test_x86_emulator.c	Thu Apr 24 14:08:29 2008 -0600
    57.3 @@ -26,14 +26,8 @@ static int read(
    57.4      unsigned int bytes,
    57.5      struct x86_emulate_ctxt *ctxt)
    57.6  {
    57.7 -    unsigned long addr = offset;
    57.8 -    switch ( bytes )
    57.9 -    {
   57.10 -    case 1: *val = *(uint8_t *)addr; break;
   57.11 -    case 2: *val = *(uint16_t *)addr; break;
   57.12 -    case 4: *val = *(uint32_t *)addr; break;
   57.13 -    case 8: *val = *(unsigned long *)addr; break;
   57.14 -    }
   57.15 +    *val = 0;
   57.16 +    memcpy(val, (void *)offset, bytes);
   57.17      return X86EMUL_OKAY;
   57.18  }
   57.19  
   57.20 @@ -44,48 +38,19 @@ static int write(
   57.21      unsigned int bytes,
   57.22      struct x86_emulate_ctxt *ctxt)
   57.23  {
   57.24 -    unsigned long addr = offset;
   57.25 -    switch ( bytes )
   57.26 -    {
   57.27 -    case 1: *(uint8_t *)addr = (uint8_t)val; break;
   57.28 -    case 2: *(uint16_t *)addr = (uint16_t)val; break;
   57.29 -    case 4: *(uint32_t *)addr = (uint32_t)val; break;
   57.30 -    case 8: *(unsigned long *)addr = val; break;
   57.31 -    }
   57.32 +    memcpy((void *)offset, &val, bytes);
   57.33      return X86EMUL_OKAY;
   57.34  }
   57.35  
   57.36  static int cmpxchg(
   57.37      unsigned int seg,
   57.38      unsigned long offset,
   57.39 -    unsigned long old,
   57.40 -    unsigned long new,
   57.41 +    void *old,
   57.42 +    void *new,
   57.43      unsigned int bytes,
   57.44      struct x86_emulate_ctxt *ctxt)
   57.45  {
   57.46 -    unsigned long addr = offset;
   57.47 -    switch ( bytes )
   57.48 -    {
   57.49 -    case 1: *(uint8_t *)addr = (uint8_t)new; break;
   57.50 -    case 2: *(uint16_t *)addr = (uint16_t)new; break;
   57.51 -    case 4: *(uint32_t *)addr = (uint32_t)new; break;
   57.52 -    case 8: *(unsigned long *)addr = new; break;
   57.53 -    }
   57.54 -    return X86EMUL_OKAY;
   57.55 -}
   57.56 -
   57.57 -static int cmpxchg8b(
   57.58 -    unsigned int seg,
   57.59 -    unsigned long offset,
   57.60 -    unsigned long old_lo,
   57.61 -    unsigned long old_hi,
   57.62 -    unsigned long new_lo,
   57.63 -    unsigned long new_hi,
   57.64 -    struct x86_emulate_ctxt *ctxt)
   57.65 -{
   57.66 -    unsigned long addr = offset;
   57.67 -    ((unsigned long *)addr)[0] = new_lo;
   57.68 -    ((unsigned long *)addr)[1] = new_hi;
   57.69 +    memcpy((void *)offset, new, bytes);
   57.70      return X86EMUL_OKAY;
   57.71  }
   57.72  
   57.73 @@ -94,7 +59,6 @@ static struct x86_emulate_ops emulops = 
   57.74      .insn_fetch = read,
   57.75      .write      = write,
   57.76      .cmpxchg    = cmpxchg,
   57.77 -    .cmpxchg8b  = cmpxchg8b
   57.78  };
   57.79  
   57.80  int main(int argc, char **argv)
    58.1 --- a/tools/tests/x86_emulate.c	Thu Apr 24 14:02:16 2008 -0600
    58.2 +++ b/tools/tests/x86_emulate.c	Thu Apr 24 14:08:29 2008 -0600
    58.3 @@ -4,10 +4,4 @@
    58.4  #include <public/xen.h>
    58.5  
    58.6  #include "x86_emulate/x86_emulate.h"
    58.7 -
    58.8 -#define __emulate_fpu_insn(_op)                 \
    58.9 -do{ rc = X86EMUL_UNHANDLEABLE;                  \
   58.10 -    goto done;                                  \
   58.11 -} while (0)
   58.12 -
   58.13  #include "x86_emulate/x86_emulate.c"
    59.1 --- a/tools/xenmon/xenbaked.c	Thu Apr 24 14:02:16 2008 -0600
    59.2 +++ b/tools/xenmon/xenbaked.c	Thu Apr 24 14:08:29 2008 -0600
    59.3 @@ -509,14 +509,36 @@ int monitor_tbufs(void)
    59.4      {
    59.5          for ( i = 0; (i < num) && !interrupted; i++ )
    59.6          {
    59.7 -            while ( meta[i]->cons != meta[i]->prod )
    59.8 +            unsigned long start_offset, end_offset, cons, prod;
    59.9 +
   59.10 +            cons = meta[i]->cons;
   59.11 +            prod = meta[i]->prod;
   59.12 +            xen_rmb(); /* read prod, then read item. */
   59.13 +
   59.14 +            if ( cons == prod )
   59.15 +                continue;
   59.16 +
   59.17 +            start_offset = cons % data_size;
   59.18 +            end_offset = prod % data_size;
   59.19 +
   59.20 +            if ( start_offset >= end_offset )
   59.21              {
   59.22 -                xen_rmb(); /* read prod, then read item. */
   59.23 +                while ( start_offset != data_size )
   59.24 +                {
   59.25 +                    rec_size = process_record(
   59.26 +                        i, (struct t_rec *)(data[i] + start_offset));
   59.27 +                    start_offset += rec_size;
   59.28 +                }
   59.29 +                start_offset = 0;
   59.30 +            }
   59.31 +            while ( start_offset != end_offset )
   59.32 +            {
   59.33                  rec_size = process_record(
   59.34 -                    i, (struct t_rec *)(data[i] + meta[i]->cons % data_size));
   59.35 -                xen_mb(); /* read item, then update cons. */
   59.36 -                meta[i]->cons += rec_size;
   59.37 +                    i, (struct t_rec *)(data[i] + start_offset));
   59.38 +                start_offset += rec_size;
   59.39              }
   59.40 +            xen_mb(); /* read item, then update cons. */
   59.41 +            meta[i]->cons = prod;
   59.42          }
   59.43  
   59.44  	wait_for_event();
    60.1 --- a/xen/Makefile	Thu Apr 24 14:02:16 2008 -0600
    60.2 +++ b/xen/Makefile	Thu Apr 24 14:08:29 2008 -0600
    60.3 @@ -44,6 +44,7 @@ build install debug clean distclean csco
    60.4  	$(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) clean
    60.5  	rm -f include/asm *.o $(TARGET)* *~ core
    60.6  	rm -f include/asm-*/asm-offsets.h
    60.7 +	[ -d tools/figlet ] && rm -f .banner*
    60.8  
    60.9  .PHONY: _distclean
   60.10  _distclean: clean
   60.11 @@ -70,8 +71,14 @@ delete-unfresh-files:
   60.12  		rm -f include/xen/compile.h; \
   60.13  	fi
   60.14  
   60.15 +.banner: Makefile
   60.16 +	$(MAKE) -C tools
   60.17 +	@tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) 2>$@2 >$@1
   60.18 +	@cat $@1 $@2 >$@
   60.19 +	@rm -f $@1 $@2
   60.20 +
   60.21  # compile.h contains dynamic build info. Rebuilt on every 'make' invocation.
   60.22 -include/xen/compile.h: include/xen/compile.h.in
   60.23 +include/xen/compile.h: include/xen/compile.h.in .banner
   60.24  	@sed -e 's/@@date@@/$(shell LC_ALL=C date)/g' \
   60.25  	    -e 's/@@time@@/$(shell LC_ALL=C date +%T)/g' \
   60.26  	    -e 's/@@whoami@@/$(USER)/g' \
   60.27 @@ -83,7 +90,8 @@ include/xen/compile.h: include/xen/compi
   60.28  	    -e 's/@@extraversion@@/$(XEN_EXTRAVERSION)/g' \
   60.29  	    -e 's!@@changeset@@!$(shell ((hg parents --template "{date|date} {rev}:{node|short}" >/dev/null && hg parents --template "{date|date} {rev}:{node|short}") || echo "unavailable") 2>/dev/null)!g' \
   60.30  	    < include/xen/compile.h.in > $@.new
   60.31 -	tools/figlet/figlet -d tools/figlet Xen $(XEN_FULLVERSION) >> $@.new
   60.32 +	@grep \" .banner >> $@.new
   60.33 +	@grep -v \" .banner
   60.34  	@mv -f $@.new $@
   60.35  
   60.36  include/asm-$(TARGET_ARCH)/asm-offsets.h: arch/$(TARGET_ARCH)/asm-offsets.s
    61.1 --- a/xen/arch/x86/Makefile	Thu Apr 24 14:02:16 2008 -0600
    61.2 +++ b/xen/arch/x86/Makefile	Thu Apr 24 14:08:29 2008 -0600
    61.3 @@ -52,6 +52,8 @@ obj-y += tboot.o
    61.4  
    61.5  obj-$(crash_debug) += gdbstub.o
    61.6  
    61.7 +x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
    61.8 +
    61.9  $(TARGET): $(TARGET)-syms boot/mkelf32
   61.10  	./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 \
   61.11  	`$(NM) -nr $(TARGET)-syms | head -n 1 | sed -e 's/^\([^ ]*\).*/0x\1/'`
    62.1 --- a/xen/arch/x86/bitops.c	Thu Apr 24 14:02:16 2008 -0600
    62.2 +++ b/xen/arch/x86/bitops.c	Thu Apr 24 14:08:29 2008 -0600
    62.3 @@ -8,17 +8,18 @@ unsigned int __find_first_bit(
    62.4      unsigned long d0, d1, res;
    62.5  
    62.6      asm volatile (
    62.7 -        "   xor %%eax,%%eax\n\t" /* also ensures ZF==1 if size==0 */
    62.8 +        "1: xor %%eax,%%eax\n\t" /* also ensures ZF==1 if size==0 */
    62.9          "   repe; scas"__OS"\n\t"
   62.10 -        "   je 1f\n\t"
   62.11 +        "   je 2f\n\t"
   62.12 +        "   bsf -"STR(BITS_PER_LONG/8)"(%2),%0\n\t"
   62.13 +        "   jz 1b\n\t"
   62.14          "   lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t"
   62.15 -        "   bsf (%2),%0\n"
   62.16 -        "1: sub %%ebx,%%edi\n\t"
   62.17 +        "2: sub %%ebx,%%edi\n\t"
   62.18          "   shl $3,%%edi\n\t"
   62.19          "   add %%edi,%%eax"
   62.20          : "=&a" (res), "=&c" (d0), "=&D" (d1)
   62.21 -        : "1" ((size + BITS_PER_LONG - 1) / BITS_PER_LONG),
   62.22 -          "2" (addr), "b" ((int)(long)addr) : "memory" );
   62.23 +        : "1" (BITS_TO_LONGS(size)), "2" (addr), "b" ((int)(long)addr)
   62.24 +        : "memory" );
   62.25  
   62.26      return res;
   62.27  }
   62.28 @@ -34,8 +35,7 @@ unsigned int __find_next_bit(
   62.29      if ( bit != 0 )
   62.30      {
   62.31          /* Look for a bit in the first word. */
   62.32 -        asm ( "bsf %1,%%"__OP"ax"
   62.33 -              : "=a" (set) : "r" (*p >> bit), "0" (BITS_PER_LONG) );
   62.34 +        set = __scanbit(*p >> bit, BITS_PER_LONG - bit);
   62.35          if ( set < (BITS_PER_LONG - bit) )
   62.36              return (offset + set);
   62.37          offset += BITS_PER_LONG - bit;
   62.38 @@ -56,18 +56,20 @@ unsigned int __find_first_zero_bit(
   62.39      unsigned long d0, d1, d2, res;
   62.40  
   62.41      asm volatile (
   62.42 +        "1: xor %%eax,%%eax ; not %3\n\t" /* rAX == ~0ul */
   62.43          "   xor %%edx,%%edx\n\t" /* also ensures ZF==1 if size==0 */
   62.44          "   repe; scas"__OS"\n\t"
   62.45 -        "   je 1f\n\t"
   62.46 +        "   je 2f\n\t"
   62.47 +        "   xor -"STR(BITS_PER_LONG/8)"(%2),%3\n\t"
   62.48 +        "   jz 1b\n\t"
   62.49 +        "   bsf %3,%0\n\t"
   62.50          "   lea -"STR(BITS_PER_LONG/8)"(%2),%2\n\t"
   62.51 -        "   xor (%2),%3\n\t"
   62.52 -        "   bsf %3,%0\n"
   62.53 -        "1: sub %%ebx,%%edi\n\t"
   62.54 +        "2: sub %%ebx,%%edi\n\t"
   62.55          "   shl $3,%%edi\n\t"
   62.56          "   add %%edi,%%edx"
   62.57          : "=&d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
   62.58 -        : "1" ((size + BITS_PER_LONG - 1) / BITS_PER_LONG),
   62.59 -          "2" (addr), "b" ((int)(long)addr), "3" (-1L) : "memory" );
   62.60 +        : "1" (BITS_TO_LONGS(size)), "2" (addr), "b" ((int)(long)addr)
   62.61 +        : "memory" );
   62.62  
   62.63      return res;
   62.64  }
   62.65 @@ -83,7 +85,7 @@ unsigned int __find_next_zero_bit(
   62.66      if ( bit != 0 )
   62.67      {
   62.68          /* Look for zero in the first word. */
   62.69 -        asm ( "bsf %1,%%"__OP"ax" : "=a" (set) : "r" (~(*p >> bit)) );
   62.70 +        set = __scanbit(~(*p >> bit), BITS_PER_LONG - bit);
   62.71          if ( set < (BITS_PER_LONG - bit) )
   62.72              return (offset + set);
   62.73          offset += BITS_PER_LONG - bit;
    63.1 --- a/xen/arch/x86/cpu/mtrr/main.c	Thu Apr 24 14:02:16 2008 -0600
    63.2 +++ b/xen/arch/x86/cpu/mtrr/main.c	Thu Apr 24 14:08:29 2008 -0600
    63.3 @@ -586,8 +586,6 @@ struct mtrr_value {
    63.4  	unsigned long	lsize;
    63.5  };
    63.6  
    63.7 -extern void global_init_mtrr_pat(void);
    63.8 -
    63.9  /**
   63.10   * mtrr_bp_init - initialize mtrrs on the boot CPU
   63.11   *
   63.12 @@ -654,11 +652,8 @@ void __init mtrr_bp_init(void)
   63.13  	if (mtrr_if) {
   63.14  		set_num_var_ranges();
   63.15  		init_table();
   63.16 -		if (use_intel()) {
   63.17 +		if (use_intel())
   63.18  			get_mtrr_state();
   63.19 -			/* initialize some global data for MTRR/PAT virutalization */
   63.20 -			global_init_mtrr_pat();
   63.21 -		}
   63.22  	}
   63.23  }
   63.24  
    64.1 --- a/xen/arch/x86/domain.c	Thu Apr 24 14:02:16 2008 -0600
    64.2 +++ b/xen/arch/x86/domain.c	Thu Apr 24 14:08:29 2008 -0600
    64.3 @@ -521,11 +521,11 @@ int arch_domain_create(struct domain *d,
    64.4          clear_page(d->shared_info);
    64.5          share_xen_page_with_guest(
    64.6              virt_to_page(d->shared_info), d, XENSHARE_writable);
    64.7 +
    64.8 +        if ( (rc = iommu_domain_init(d)) != 0 )
    64.9 +            goto fail;
   64.10      }
   64.11  
   64.12 -    if ( (rc = iommu_domain_init(d)) != 0 )
   64.13 -        goto fail;
   64.14 -
   64.15      if ( is_hvm_domain(d) )
   64.16      {
   64.17          if ( (rc = hvm_domain_initialise(d)) != 0 )
   64.18 @@ -562,7 +562,8 @@ void arch_domain_destroy(struct domain *
   64.19      if ( is_hvm_domain(d) )
   64.20          hvm_domain_destroy(d);
   64.21  
   64.22 -    iommu_domain_destroy(d);
   64.23 +    if ( !is_idle_domain(d) )
   64.24 +        iommu_domain_destroy(d);
   64.25  
   64.26      paging_final_teardown(d);
   64.27  
    65.1 --- a/xen/arch/x86/domain_build.c	Thu Apr 24 14:02:16 2008 -0600
    65.2 +++ b/xen/arch/x86/domain_build.c	Thu Apr 24 14:08:29 2008 -0600
    65.3 @@ -957,8 +957,8 @@ int __init construct_dom0(
    65.4      rc |= ioports_deny_access(dom0, 0x40, 0x43);
    65.5      /* PIT Channel 2 / PC Speaker Control. */
    65.6      rc |= ioports_deny_access(dom0, 0x61, 0x61);
    65.7 -    /* PCI configuration spaces. */
    65.8 -    rc |= ioports_deny_access(dom0, 0xcf8, 0xcff);
    65.9 +    /* PCI configuration space (NB. 0xcf8 has special treatment). */
   65.10 +    rc |= ioports_deny_access(dom0, 0xcfc, 0xcff);
   65.11      /* Command-line I/O ranges. */
   65.12      process_dom0_ioports_disable();
   65.13  
    66.1 --- a/xen/arch/x86/hvm/emulate.c	Thu Apr 24 14:02:16 2008 -0600
    66.2 +++ b/xen/arch/x86/hvm/emulate.c	Thu Apr 24 14:08:29 2008 -0600
    66.3 @@ -28,6 +28,33 @@ static int hvmemul_do_io(
    66.4      ioreq_t *p = &vio->vp_ioreq;
    66.5      int rc;
    66.6  
    66.7 +    /* Only retrieve the value from singleton (non-REP) reads. */
    66.8 +    ASSERT((val == NULL) || ((dir == IOREQ_READ) && !value_is_ptr));
    66.9 +
   66.10 +    if ( is_mmio && !value_is_ptr )
   66.11 +    {
   66.12 +        /* Part of a multi-cycle read or write? */
   66.13 +        if ( dir == IOREQ_WRITE )
   66.14 +        {
   66.15 +            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa;
   66.16 +            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes;
   66.17 +            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
   66.18 +                return X86EMUL_OKAY;
   66.19 +        }
   66.20 +        else
   66.21 +        {
   66.22 +            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa;
   66.23 +            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes;
   66.24 +            if ( (addr >= pa) && ((addr + size) <= (pa + bytes)) )
   66.25 +            {
   66.26 +                *val = 0;
   66.27 +                memcpy(val, &curr->arch.hvm_vcpu.mmio_large_read[addr - pa],
   66.28 +                       size);
   66.29 +                return X86EMUL_OKAY;
   66.30 +            }
   66.31 +        }
   66.32 +    }
   66.33 +
   66.34      switch ( curr->arch.hvm_vcpu.io_state )
   66.35      {
   66.36      case HVMIO_none:
   66.37 @@ -36,8 +63,13 @@ static int hvmemul_do_io(
   66.38          curr->arch.hvm_vcpu.io_state = HVMIO_none;
   66.39          if ( val == NULL )
   66.40              return X86EMUL_UNHANDLEABLE;
   66.41 -        *val = curr->arch.hvm_vcpu.io_data;
   66.42 -        return X86EMUL_OKAY;
   66.43 +        goto finish_access;
   66.44 +    case HVMIO_dispatched:
   66.45 +        /* May have to wait for previous cycle of a multi-write to complete. */
   66.46 +        if ( is_mmio && !value_is_ptr && (dir == IOREQ_WRITE) &&
   66.47 +             (addr == (curr->arch.hvm_vcpu.mmio_large_write_pa +
   66.48 +                       curr->arch.hvm_vcpu.mmio_large_write_bytes)) )
   66.49 +            return X86EMUL_RETRY;
   66.50      default:
   66.51          return X86EMUL_UNHANDLEABLE;
   66.52      }
   66.53 @@ -80,8 +112,6 @@ static int hvmemul_do_io(
   66.54          *reps = p->count;
   66.55          p->state = STATE_IORESP_READY;
   66.56          hvm_io_assist();
   66.57 -        if ( val != NULL )
   66.58 -            *val = curr->arch.hvm_vcpu.io_data;
   66.59          curr->arch.hvm_vcpu.io_state = HVMIO_none;
   66.60          break;
   66.61      case X86EMUL_UNHANDLEABLE:
   66.62 @@ -92,7 +122,43 @@ static int hvmemul_do_io(
   66.63          BUG();
   66.64      }
   66.65  
   66.66 -    return rc;
   66.67 +    if ( rc != X86EMUL_OKAY )
   66.68 +        return rc;
   66.69 +
   66.70 + finish_access:
   66.71 +    if ( val != NULL )
   66.72 +        *val = curr->arch.hvm_vcpu.io_data;
   66.73 +
   66.74 +    if ( is_mmio && !value_is_ptr )
   66.75 +    {
   66.76 +        /* Part of a multi-cycle read or write? */
   66.77 +        if ( dir == IOREQ_WRITE )
   66.78 +        {
   66.79 +            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_write_pa;
   66.80 +            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_write_bytes;
   66.81 +            if ( bytes == 0 )
   66.82 +                pa = curr->arch.hvm_vcpu.mmio_large_write_pa = addr;
   66.83 +            if ( addr == (pa + bytes) )
   66.84 +                curr->arch.hvm_vcpu.mmio_large_write_bytes += size;
   66.85 +        }
   66.86 +        else
   66.87 +        {
   66.88 +            paddr_t pa = curr->arch.hvm_vcpu.mmio_large_read_pa;
   66.89 +            unsigned int bytes = curr->arch.hvm_vcpu.mmio_large_read_bytes;
   66.90 +            if ( bytes == 0 )
   66.91 +                pa = curr->arch.hvm_vcpu.mmio_large_read_pa = addr;
   66.92 +            if ( (addr == (pa + bytes)) &&
   66.93 +                 ((bytes + size) <
   66.94 +                  sizeof(curr->arch.hvm_vcpu.mmio_large_read)) )
   66.95 +            {
   66.96 +                memcpy(&curr->arch.hvm_vcpu.mmio_large_read[addr - pa],
   66.97 +                       val, size);
   66.98 +                curr->arch.hvm_vcpu.mmio_large_read_bytes += size;
   66.99 +            }
  66.100 +        }
  66.101 +    }
  66.102 +
  66.103 +    return X86EMUL_OKAY;
  66.104  }
  66.105  
  66.106  static int hvmemul_do_pio(
  66.107 @@ -371,11 +437,15 @@ static int hvmemul_write(
  66.108  static int hvmemul_cmpxchg(
  66.109      enum x86_segment seg,
  66.110      unsigned long offset,
  66.111 -    unsigned long old,
  66.112 -    unsigned long new,
  66.113 +    void *p_old,
  66.114 +    void *p_new,
  66.115      unsigned int bytes,
  66.116      struct x86_emulate_ctxt *ctxt)
  66.117  {
  66.118 +    unsigned long new = 0;
  66.119 +    if ( bytes > sizeof(new) )
  66.120 +        return X86EMUL_UNHANDLEABLE;
  66.121 +    memcpy(&new, p_new, bytes);
  66.122      /* Fix this in case the guest is really relying on r-m-w atomicity. */
  66.123      return hvmemul_write(seg, offset, new, bytes, ctxt);
  66.124  }
  66.125 @@ -603,7 +673,7 @@ static int hvmemul_read_msr(
  66.126  
  66.127      _regs.ecx = (uint32_t)reg;
  66.128  
  66.129 -    if ( (rc = hvm_funcs.msr_read_intercept(&_regs)) != 0 )
  66.130 +    if ( (rc = hvm_msr_read_intercept(&_regs)) != 0 )
  66.131          return rc;
  66.132  
  66.133      *val = ((uint64_t)(uint32_t)_regs.edx << 32) || (uint32_t)_regs.eax;
  66.134 @@ -621,7 +691,7 @@ static int hvmemul_write_msr(
  66.135      _regs.eax = (uint32_t)val;
  66.136      _regs.ecx = (uint32_t)reg;
  66.137  
  66.138 -    return hvm_funcs.msr_write_intercept(&_regs);
  66.139 +    return hvm_msr_write_intercept(&_regs);
  66.140  }
  66.141  
  66.142  static int hvmemul_wbinvd(
  66.143 @@ -674,11 +744,40 @@ static int hvmemul_inject_sw_interrupt(
  66.144      return X86EMUL_OKAY;
  66.145  }
  66.146  
  66.147 -static void hvmemul_load_fpu_ctxt(
  66.148 +static int hvmemul_get_fpu(
  66.149 +    void (*exception_callback)(void *, struct cpu_user_regs *),
  66.150 +    void *exception_callback_arg,
  66.151 +    enum x86_emulate_fpu_type type,
  66.152      struct x86_emulate_ctxt *ctxt)
  66.153  {
  66.154 -    if ( !current->fpu_dirtied )
  66.155 +    struct vcpu *curr = current;
  66.156 +
  66.157 +    switch ( type )
  66.158 +    {
  66.159 +    case X86EMUL_FPU_fpu:
  66.160 +        break;
  66.161 +    case X86EMUL_FPU_mmx:
  66.162 +        if ( !cpu_has_mmx )
  66.163 +            return X86EMUL_UNHANDLEABLE;
  66.164 +        break;
  66.165 +    default:
  66.166 +        return X86EMUL_UNHANDLEABLE;
  66.167 +    }
  66.168 +
  66.169 +    if ( !curr->fpu_dirtied )
  66.170          hvm_funcs.fpu_dirty_intercept();
  66.171 +
  66.172 +    curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
  66.173 +    curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
  66.174 +
  66.175 +    return X86EMUL_OKAY;
  66.176 +}
  66.177 +
  66.178 +static void hvmemul_put_fpu(
  66.179 +    struct x86_emulate_ctxt *ctxt)
  66.180 +{
  66.181 +    struct vcpu *curr = current;
  66.182 +    curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
  66.183  }
  66.184  
  66.185  static int hvmemul_invlpg(
  66.186 @@ -720,7 +819,8 @@ static struct x86_emulate_ops hvm_emulat
  66.187      .cpuid         = hvmemul_cpuid,
  66.188      .inject_hw_exception = hvmemul_inject_hw_exception,
  66.189      .inject_sw_interrupt = hvmemul_inject_sw_interrupt,
  66.190 -    .load_fpu_ctxt = hvmemul_load_fpu_ctxt,
  66.191 +    .get_fpu       = hvmemul_get_fpu,
  66.192 +    .put_fpu       = hvmemul_put_fpu,
  66.193      .invlpg        = hvmemul_invlpg
  66.194  };
  66.195  
  66.196 @@ -763,6 +863,11 @@ int hvm_emulate_one(
  66.197      hvmemul_ctxt->exn_pending = 0;
  66.198  
  66.199      rc = x86_emulate(&hvmemul_ctxt->ctxt, &hvm_emulate_ops);
  66.200 +
  66.201 +    if ( rc != X86EMUL_RETRY )
  66.202 +        curr->arch.hvm_vcpu.mmio_large_read_bytes =
  66.203 +            curr->arch.hvm_vcpu.mmio_large_write_bytes = 0;
  66.204 +
  66.205      if ( rc != X86EMUL_OKAY )
  66.206          return rc;
  66.207  
    67.1 --- a/xen/arch/x86/hvm/hvm.c	Thu Apr 24 14:02:16 2008 -0600
    67.2 +++ b/xen/arch/x86/hvm/hvm.c	Thu Apr 24 14:08:29 2008 -0600
    67.3 @@ -494,14 +494,14 @@ static int hvm_load_cpu_ctxt(struct doma
    67.4           ((ctxt.cr0 & (X86_CR0_PE|X86_CR0_PG)) == X86_CR0_PG) )
    67.5      {
    67.6          gdprintk(XENLOG_ERR, "HVM restore: bad CR0 0x%"PRIx64"\n",
    67.7 -                 ctxt.msr_efer);
    67.8 +                 ctxt.cr0);
    67.9          return -EINVAL;
   67.10      }
   67.11  
   67.12      if ( ctxt.cr4 & HVM_CR4_GUEST_RESERVED_BITS )
   67.13      {
   67.14          gdprintk(XENLOG_ERR, "HVM restore: bad CR4 0x%"PRIx64"\n",
   67.15 -                 ctxt.msr_efer);
   67.16 +                 ctxt.cr4);
   67.17          return -EINVAL;
   67.18      }
   67.19  
   67.20 @@ -620,8 +620,6 @@ static int hvm_load_cpu_ctxt(struct doma
   67.21  HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt,
   67.22                            1, HVMSR_PER_VCPU);
   67.23  
   67.24 -extern int reset_vmsr(struct mtrr_state *m, u64 *p);
   67.25 -
   67.26  int hvm_vcpu_initialise(struct vcpu *v)
   67.27  {
   67.28      int rc;
   67.29 @@ -647,7 +645,7 @@ int hvm_vcpu_initialise(struct vcpu *v)
   67.30      spin_lock_init(&v->arch.hvm_vcpu.tm_lock);
   67.31      INIT_LIST_HEAD(&v->arch.hvm_vcpu.tm_list);
   67.32  
   67.33 -    rc = reset_vmsr(&v->arch.hvm_vcpu.mtrr, &v->arch.hvm_vcpu.pat_cr);
   67.34 +    rc = hvm_vcpu_cacheattr_init(v);
   67.35      if ( rc != 0 )
   67.36          goto fail3;
   67.37  
   67.38 @@ -681,6 +679,7 @@ int hvm_vcpu_initialise(struct vcpu *v)
   67.39  
   67.40  void hvm_vcpu_destroy(struct vcpu *v)
   67.41  {
   67.42 +    hvm_vcpu_cacheattr_destroy(v);
   67.43      vlapic_destroy(v);
   67.44      hvm_funcs.vcpu_destroy(v);
   67.45  
   67.46 @@ -1604,6 +1603,9 @@ void hvm_cpuid(unsigned int input, unsig
   67.47          *ebx &= 0x0000FFFFu;
   67.48          *ebx |= (current->vcpu_id * 2) << 24;
   67.49  
   67.50 +        /* We always support MTRR MSRs. */
   67.51 +        *edx |= bitmaskof(X86_FEATURE_MTRR);
   67.52 +
   67.53          *ecx &= (bitmaskof(X86_FEATURE_XMM3) |
   67.54                   bitmaskof(X86_FEATURE_SSSE3) |
   67.55                   bitmaskof(X86_FEATURE_CX16) |
   67.56 @@ -1655,6 +1657,146 @@ void hvm_cpuid(unsigned int input, unsig
   67.57      }
   67.58  }
   67.59  
   67.60 +int hvm_msr_read_intercept(struct cpu_user_regs *regs)
   67.61 +{
   67.62 +    uint32_t ecx = regs->ecx;
   67.63 +    uint64_t msr_content = 0;
   67.64 +    struct vcpu *v = current;
   67.65 +    uint64_t *var_range_base, *fixed_range_base;
   67.66 +    int index;
   67.67 +
   67.68 +    var_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.var_ranges;
   67.69 +    fixed_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.fixed_ranges;
   67.70 +
   67.71 +    switch ( ecx )
   67.72 +    {
   67.73 +    case MSR_IA32_TSC:
   67.74 +        msr_content = hvm_get_guest_time(v);
   67.75 +        break;
   67.76 +
   67.77 +    case MSR_IA32_APICBASE:
   67.78 +        msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
   67.79 +        break;
   67.80 +
   67.81 +    case MSR_IA32_MCG_CAP:
   67.82 +    case MSR_IA32_MCG_STATUS:
   67.83 +    case MSR_IA32_MC0_STATUS:
   67.84 +    case MSR_IA32_MC1_STATUS:
   67.85 +    case MSR_IA32_MC2_STATUS:
   67.86 +    case MSR_IA32_MC3_STATUS:
   67.87 +    case MSR_IA32_MC4_STATUS:
   67.88 +    case MSR_IA32_MC5_STATUS:
   67.89 +        /* No point in letting the guest see real MCEs */
   67.90 +        msr_content = 0;
   67.91 +        break;
   67.92 +
   67.93 +    case MSR_IA32_CR_PAT:
   67.94 +        msr_content = v->arch.hvm_vcpu.pat_cr;
   67.95 +        break;
   67.96 +
   67.97 +    case MSR_MTRRcap:
   67.98 +        msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
   67.99 +        break;
  67.100 +    case MSR_MTRRdefType:
  67.101 +        msr_content = v->arch.hvm_vcpu.mtrr.def_type
  67.102 +                        | (v->arch.hvm_vcpu.mtrr.enabled << 10);
  67.103 +        break;
  67.104 +    case MSR_MTRRfix64K_00000:
  67.105 +        msr_content = fixed_range_base[0];
  67.106 +        break;
  67.107 +    case MSR_MTRRfix16K_80000:
  67.108 +    case MSR_MTRRfix16K_A0000:
  67.109 +        index = regs->ecx - MSR_MTRRfix16K_80000;
  67.110 +        msr_content = fixed_range_base[index + 1];
  67.111 +        break;
  67.112 +    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
  67.113 +        index = regs->ecx - MSR_MTRRfix4K_C0000;
  67.114 +        msr_content = fixed_range_base[index + 3];
  67.115 +        break;
  67.116 +    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
  67.117 +        index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
  67.118 +        msr_content = var_range_base[index];
  67.119 +        break;
  67.120 +
  67.121 +    default:
  67.122 +        return hvm_funcs.msr_read_intercept(regs);
  67.123 +    }
  67.124 +
  67.125 +    regs->eax = (uint32_t)msr_content;
  67.126 +    regs->edx = (uint32_t)(msr_content >> 32);
  67.127 +    return X86EMUL_OKAY;
  67.128 +}
  67.129 +
  67.130 +int hvm_msr_write_intercept(struct cpu_user_regs *regs)
  67.131 +{
  67.132 +    extern bool_t mtrr_var_range_msr_set(
  67.133 +        struct mtrr_state *v, u32 msr, u64 msr_content);
  67.134 +    extern bool_t mtrr_fix_range_msr_set(
  67.135 +        struct mtrr_state *v, int row, u64 msr_content);
  67.136 +    extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
  67.137 +    extern bool_t pat_msr_set(u64 *pat, u64 msr);
  67.138 +
  67.139 +    uint32_t ecx = regs->ecx;
  67.140 +    uint64_t msr_content = (uint32_t)regs->eax | ((uint64_t)regs->edx << 32);
  67.141 +    struct vcpu *v = current;
  67.142 +    int index;
  67.143 +
  67.144 +    switch ( ecx )
  67.145 +    {
  67.146 +     case MSR_IA32_TSC:
  67.147 +        hvm_set_guest_time(v, msr_content);
  67.148 +        pt_reset(v);
  67.149 +        break;
  67.150 +
  67.151 +    case MSR_IA32_APICBASE:
  67.152 +        vlapic_msr_set(vcpu_vlapic(v), msr_content);
  67.153 +        break;
  67.154 +
  67.155 +    case MSR_IA32_CR_PAT:
  67.156 +        if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
  67.157 +           goto gp_fault;
  67.158 +        break;
  67.159 +
  67.160 +    case MSR_MTRRcap:
  67.161 +        goto gp_fault;
  67.162 +    case MSR_MTRRdefType:
  67.163 +        if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
  67.164 +           goto gp_fault;
  67.165 +        break;
  67.166 +    case MSR_MTRRfix64K_00000:
  67.167 +        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
  67.168 +            goto gp_fault;
  67.169 +        break;
  67.170 +    case MSR_MTRRfix16K_80000:
  67.171 +    case MSR_MTRRfix16K_A0000:
  67.172 +        index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
  67.173 +        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
  67.174 +                                     index, msr_content) )
  67.175 +            goto gp_fault;
  67.176 +        break;
  67.177 +    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
  67.178 +        index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
  67.179 +        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
  67.180 +                                     index, msr_content) )
  67.181 +            goto gp_fault;
  67.182 +        break;
  67.183 +    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
  67.184 +        if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
  67.185 +                                     regs->ecx, msr_content) )
  67.186 +            goto gp_fault;
  67.187 +        break;
  67.188 +
  67.189 +    default:
  67.190 +        return hvm_funcs.msr_write_intercept(regs);
  67.191 +    }
  67.192 +
  67.193 +    return X86EMUL_OKAY;
  67.194 +
  67.195 +gp_fault:
  67.196 +    hvm_inject_exception(TRAP_gp_fault, 0, 0);
  67.197 +    return X86EMUL_EXCEPTION;
  67.198 +}
  67.199 +
  67.200  enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack)
  67.201  {
  67.202      unsigned long intr_shadow;
    68.1 --- a/xen/arch/x86/hvm/mtrr.c	Thu Apr 24 14:02:16 2008 -0600
    68.2 +++ b/xen/arch/x86/hvm/mtrr.c	Thu Apr 24 14:08:29 2008 -0600
    68.3 @@ -27,7 +27,6 @@
    68.4  #include <asm/hvm/support.h>
    68.5  #include <asm/hvm/cacheattr.h>
    68.6  
    68.7 -/* Xen holds the native MTRR MSRs */
    68.8  extern struct mtrr_state mtrr_state;
    68.9  
   68.10  static uint64_t phys_base_msr_mask;
   68.11 @@ -35,19 +34,17 @@ static uint64_t phys_mask_msr_mask;
   68.12  static uint32_t size_or_mask;
   68.13  static uint32_t size_and_mask;
   68.14  
   68.15 -static void init_pat_entry_tbl(uint64_t pat);
   68.16 -static void init_mtrr_epat_tbl(void);
   68.17 -static uint8_t get_mtrr_type(struct mtrr_state *m, paddr_t pa);
   68.18 -/* get page attribute fields (PAn) from PAT MSR */
   68.19 +/* Get page attribute fields (PAn) from PAT MSR. */
   68.20  #define pat_cr_2_paf(pat_cr,n)  ((((uint64_t)pat_cr) >> ((n)<<3)) & 0xff)
   68.21 -/* pat entry to PTE flags (PAT, PCD, PWT bits) */
   68.22 +
   68.23 +/* PAT entry to PTE flags (PAT, PCD, PWT bits). */
   68.24  static uint8_t pat_entry_2_pte_flags[8] = {
   68.25      0,           _PAGE_PWT,
   68.26      _PAGE_PCD,   _PAGE_PCD | _PAGE_PWT,
   68.27      _PAGE_PAT,   _PAGE_PAT | _PAGE_PWT,
   68.28      _PAGE_PAT | _PAGE_PCD, _PAGE_PAT | _PAGE_PCD | _PAGE_PWT };
   68.29  
   68.30 -/* effective mm type lookup table, according to MTRR and PAT */
   68.31 +/* Effective mm type lookup table, according to MTRR and PAT. */
   68.32  static uint8_t mm_type_tbl[MTRR_NUM_TYPES][PAT_TYPE_NUMS] = {
   68.33  /********PAT(UC,WC,RS,RS,WT,WP,WB,UC-)*/
   68.34  /* RS means reserved type(2,3), and type is hardcoded here */
   68.35 @@ -67,12 +64,13 @@ static uint8_t mm_type_tbl[MTRR_NUM_TYPE
   68.36              {0, 1, 2, 2, 4, 5, 6, 0}
   68.37  };
   68.38  
   68.39 -/* reverse lookup table, to find a pat type according to MTRR and effective
   68.40 - * memory type. This table is dynamically generated
   68.41 +/*
   68.42 + * Reverse lookup table, to find a pat type according to MTRR and effective
   68.43 + * memory type. This table is dynamically generated.
   68.44   */
   68.45  static uint8_t mtrr_epat_tbl[MTRR_NUM_TYPES][MEMORY_NUM_TYPES];
   68.46  
   68.47 -/* lookup table for PAT entry of a given PAT value in host pat */
   68.48 +/* Lookup table for PAT entry of a given PAT value in host PAT. */
   68.49  static uint8_t pat_entry_tbl[PAT_TYPE_NUMS];
   68.50  
   68.51  static void get_mtrr_range(uint64_t base_msr, uint64_t mask_msr,
   68.52 @@ -139,220 +137,63 @@ bool_t is_var_mtrr_overlapped(struct mtr
   68.53      return 0;
   68.54  }
   68.55  
   68.56 -/* reserved mtrr for guest OS */
   68.57 -#define RESERVED_MTRR 2
   68.58 +#define MTRR_PHYSMASK_VALID_BIT  11
   68.59 +#define MTRR_PHYSMASK_SHIFT      12
   68.60 +
   68.61 +#define MTRR_PHYSBASE_TYPE_MASK  0xff   /* lowest 8 bits */
   68.62 +#define MTRR_PHYSBASE_SHIFT      12
   68.63 +#define MTRR_VCNT                8
   68.64 +
   68.65  #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
   68.66  #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
   68.67  bool_t mtrr_var_range_msr_set(struct mtrr_state *m, uint32_t msr,
   68.68                                uint64_t msr_content);
   68.69 -bool_t mtrr_def_type_msr_set(struct mtrr_state *m, uint64_t msr_content);
   68.70  bool_t mtrr_fix_range_msr_set(struct mtrr_state *m, uint32_t row,
   68.71                                uint64_t msr_content);
   68.72 -static void set_var_mtrr(uint32_t reg, struct mtrr_state *m,
   68.73 -                         uint32_t base, uint32_t size,
   68.74 -                         uint32_t type)
   68.75 +
   68.76 +static int hvm_mtrr_pat_init(void)
   68.77  {
   68.78 -    struct mtrr_var_range *vr;
   68.79 -
   68.80 -    vr = &m->var_ranges[reg];
   68.81 -
   68.82 -    if ( size == 0 )
   68.83 -    {
   68.84 -        /* The invalid bit is kept in the mask, so we simply clear the
   68.85 -         * relevant mask register to disable a range.
   68.86 -         */
   68.87 -        mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg), 0);
   68.88 -    }
   68.89 -    else
   68.90 -    {
   68.91 -        vr->base_lo = base << PAGE_SHIFT | type;
   68.92 -        vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT);
   68.93 -        vr->mask_lo = -size << PAGE_SHIFT | 0x800;
   68.94 -        vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT);
   68.95 +    extern uint64_t host_pat;
   68.96 +    unsigned int i, j, phys_addr;
   68.97  
   68.98 -        mtrr_var_range_msr_set(m, MTRRphysBase_MSR(reg), *(uint64_t *)vr);
   68.99 -        mtrr_var_range_msr_set(m, MTRRphysMask_MSR(reg),
  68.100 -                               *((uint64_t *)vr + 1));
  68.101 -    }
  68.102 -}
  68.103 -/* From Intel Vol. III Section 10.11.4, the Range Size and Base Alignment has
  68.104 - * some kind of requirement:
  68.105 - * 1. The range size must be 2^N byte for N >= 12 (i.e 4KB minimum).
  68.106 - * 2. The base address must be 2^N aligned, where the N here is equal to
  68.107 - * the N in previous requirement. So a 8K range must be 8K aligned not 4K aligned.
  68.108 - */
  68.109 -static uint32_t range_to_mtrr(uint32_t reg, struct mtrr_state *m,
  68.110 -                              uint32_t range_startk, uint32_t range_sizek,
  68.111 -                              uint8_t type)
  68.112 -{
  68.113 -    if ( !range_sizek || (reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR)) )
  68.114 +    memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl));
  68.115 +    for ( i = 0; i < MTRR_NUM_TYPES; i++ )
  68.116      {
  68.117 -        gdprintk(XENLOG_WARNING,
  68.118 -                "Failed to init var mtrr msr[%d]"
  68.119 -                "range_size:%x, total available MSR:%d\n",
  68.120 -                reg, range_sizek,
  68.121 -                (uint32_t)((m->mtrr_cap & 0xff) - RESERVED_MTRR));
  68.122 -        return reg;
  68.123 -    }
  68.124 -
  68.125 -    while ( range_sizek )
  68.126 -    {
  68.127 -        uint32_t max_align, align, sizek;
  68.128 -
  68.129 -        max_align = (range_startk == 0) ? 32 : ffs(range_startk);
  68.130 -        align = min_t(uint32_t, fls(range_sizek), max_align);
  68.131 -        sizek = 1 << (align - 1);
  68.132 -
  68.133 -        set_var_mtrr(reg++, m, range_startk, sizek, type);
  68.134 -
  68.135 -        range_startk += sizek;
  68.136 -        range_sizek  -= sizek;
  68.137 -
  68.138 -        if ( reg >= ((m->mtrr_cap & 0xff) - RESERVED_MTRR) )
  68.139 +        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
  68.140          {
  68.141 -            gdprintk(XENLOG_WARNING,
  68.142 -                    "Failed to init var mtrr msr[%d],"
  68.143 -                    "total available MSR:%d\n",
  68.144 -                    reg, (uint32_t)((m->mtrr_cap & 0xff) - RESERVED_MTRR));
  68.145 -            break;
  68.146 +            int32_t tmp = mm_type_tbl[i][j];
  68.147 +            if ( (tmp >= 0) && (tmp < MEMORY_NUM_TYPES) )
  68.148 +                mtrr_epat_tbl[i][tmp] = j;
  68.149          }
  68.150      }
  68.151  
  68.152 -    return reg;
  68.153 -}
  68.154 -
  68.155 -static void setup_fixed_mtrrs(struct vcpu *v)
  68.156 -{
  68.157 -    uint64_t content;
  68.158 -    int32_t i;
  68.159 -    struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
  68.160 -
  68.161 -    /* 1. Map (0~A0000) as WB */
  68.162 -    content = 0x0606060606060606ull;
  68.163 -    mtrr_fix_range_msr_set(m, 0, content);
  68.164 -    mtrr_fix_range_msr_set(m, 1, content);
  68.165 -    /* 2. Map VRAM(A0000~C0000) as WC */
  68.166 -    content = 0x0101010101010101;
  68.167 -    mtrr_fix_range_msr_set(m, 2, content);
  68.168 -    /* 3. Map (C0000~100000) as UC */
  68.169 -    for ( i = 3; i < 11; i++)
  68.170 -        mtrr_fix_range_msr_set(m, i, 0);
  68.171 -}
  68.172 -
  68.173 -static void setup_var_mtrrs(struct vcpu *v)
  68.174 -{
  68.175 -    p2m_type_t p2m;
  68.176 -    uint64_t e820_mfn;
  68.177 -    int8_t *p = NULL;
  68.178 -    uint8_t nr = 0;
  68.179 -    int32_t i;
  68.180 -    uint32_t reg = 0;
  68.181 -    uint64_t size = 0;
  68.182 -    uint64_t addr = 0;
  68.183 -    struct e820entry *e820_table;
  68.184 -
  68.185 -    e820_mfn = mfn_x(gfn_to_mfn(v->domain,
  68.186 -                    HVM_E820_PAGE >> PAGE_SHIFT, &p2m));
  68.187 -
  68.188 -    p = (int8_t *)map_domain_page(e820_mfn);
  68.189 -
  68.190 -    nr = *(uint8_t*)(p + HVM_E820_NR_OFFSET);
  68.191 -    e820_table = (struct e820entry*)(p + HVM_E820_OFFSET);
  68.192 -    /* search E820 table, set MTRR for RAM */
  68.193 -    for ( i = 0; i < nr; i++)
  68.194 +    memset(&pat_entry_tbl, INVALID_MEM_TYPE,
  68.195 +           PAT_TYPE_NUMS * sizeof(pat_entry_tbl[0]));
  68.196 +    for ( i = 0; i < PAT_TYPE_NUMS; i++ )
  68.197      {
  68.198 -        if ( (e820_table[i].addr >= 0x100000) &&
  68.199 -             (e820_table[i].type == E820_RAM) )
  68.200 +        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
  68.201          {
  68.202 -            if ( e820_table[i].addr == 0x100000 )
  68.203 +            if ( pat_cr_2_paf(host_pat, j) == i )
  68.204              {
  68.205 -                size = e820_table[i].size + 0x100000 + PAGE_SIZE * 5;
  68.206 -                addr = 0;
  68.207 +                pat_entry_tbl[i] = j;
  68.208 +                break;
  68.209              }
  68.210 -            else
  68.211 -            {
  68.212 -                /* Larger than 4G */
  68.213 -                size = e820_table[i].size;
  68.214 -                addr = e820_table[i].addr;
  68.215 -            }
  68.216 -
  68.217 -            reg = range_to_mtrr(reg, &v->arch.hvm_vcpu.mtrr,
  68.218 -                                addr >> PAGE_SHIFT, size >> PAGE_SHIFT,
  68.219 -                                MTRR_TYPE_WRBACK);
  68.220          }
  68.221      }
  68.222 -}
  68.223  
  68.224 -void init_mtrr_in_hyper(struct vcpu *v)
  68.225 -{
  68.226 -    /* TODO:MTRR should be initialized in BIOS or other places.
  68.227 -     * workaround to do it in here
  68.228 -     */
  68.229 -    if ( v->arch.hvm_vcpu.mtrr.is_initialized )
  68.230 -        return;
  68.231 -
  68.232 -    setup_fixed_mtrrs(v);
  68.233 -    setup_var_mtrrs(v);
  68.234 -    /* enable mtrr */
  68.235 -    mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, 0xc00);
  68.236 -
  68.237 -    v->arch.hvm_vcpu.mtrr.is_initialized = 1;
  68.238 -}
  68.239 -
  68.240 -static int32_t reset_mtrr(struct mtrr_state *m)
  68.241 -{
  68.242 -    m->var_ranges = xmalloc_array(struct mtrr_var_range, MTRR_VCNT);
  68.243 -    if ( m->var_ranges == NULL )
  68.244 -        return -ENOMEM;
  68.245 -    memset(m->var_ranges, 0, MTRR_VCNT * sizeof(struct mtrr_var_range));
  68.246 -    memset(m->fixed_ranges, 0, sizeof(m->fixed_ranges));
  68.247 -    m->enabled = 0;
  68.248 -    m->def_type = 0;/*mtrr is disabled*/
  68.249 -    m->mtrr_cap = (0x5<<8)|MTRR_VCNT;/*wc,fix enabled, and vcnt=8*/
  68.250 -    m->overlapped = 0;
  68.251 -    return 0;
  68.252 -}
  68.253 -
  68.254 -/* init global variables for MTRR and PAT */
  68.255 -void global_init_mtrr_pat(void)
  68.256 -{
  68.257 -    extern uint64_t host_pat;
  68.258 -    uint32_t phys_addr;
  68.259 -
  68.260 -    init_mtrr_epat_tbl();
  68.261 -    init_pat_entry_tbl(host_pat);
  68.262 -    /* Get max physical address, set some global variable */
  68.263 -    if ( cpuid_eax(0x80000000) < 0x80000008 )
  68.264 -        phys_addr = 36;
  68.265 -    else
  68.266 -        phys_addr = cpuid_eax(0x80000008);
  68.267 +    phys_addr = 36;
  68.268 +    if ( cpuid_eax(0x80000000) >= 0x80000008 )
  68.269 +        phys_addr = (uint8_t)cpuid_eax(0x80000008);
  68.270  
  68.271      phys_base_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0xf00UL;
  68.272      phys_mask_msr_mask = ~((((uint64_t)1) << phys_addr) - 1) | 0x7ffUL;
  68.273  
  68.274      size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
  68.275      size_and_mask = ~size_or_mask & 0xfff00000;
  68.276 +
  68.277 +    return 0;
  68.278  }
  68.279 -
  68.280 -static void init_pat_entry_tbl(uint64_t pat)
  68.281 -{
  68.282 -    int32_t i, j;
  68.283 -
  68.284 -    memset(&pat_entry_tbl, INVALID_MEM_TYPE,
  68.285 -           PAT_TYPE_NUMS * sizeof(pat_entry_tbl[0]));
  68.286 -
  68.287 -    for ( i = 0; i < PAT_TYPE_NUMS; i++ )
  68.288 -    {
  68.289 -        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
  68.290 -        {
  68.291 -            if ( pat_cr_2_paf(pat, j) == i )
  68.292 -            {
  68.293 -                pat_entry_tbl[i] = j;
  68.294 -                break;
  68.295 -            }
  68.296 -        }
  68.297 -    }
  68.298 -}
  68.299 +__initcall(hvm_mtrr_pat_init);
  68.300  
  68.301  uint8_t pat_type_2_pte_flags(uint8_t pat_type)
  68.302  {
  68.303 @@ -368,26 +209,37 @@ uint8_t pat_type_2_pte_flags(uint8_t pat
  68.304      return pat_entry_2_pte_flags[pat_entry_tbl[PAT_TYPE_UNCACHABLE]];
  68.305  }
  68.306  
  68.307 -int32_t reset_vmsr(struct mtrr_state *m, uint64_t *pat_ptr)
  68.308 +int hvm_vcpu_cacheattr_init(struct vcpu *v)
  68.309  {
  68.310 -    int32_t rc;
  68.311 +    struct mtrr_state *m = &v->arch.hvm_vcpu.mtrr;
  68.312  
  68.313 -    rc = reset_mtrr(m);
  68.314 -    if ( rc != 0 )
  68.315 -        return rc;
  68.316 +    memset(m, 0, sizeof(*m));
  68.317 +
  68.318 +    m->var_ranges = xmalloc_array(struct mtrr_var_range, MTRR_VCNT);
  68.319 +    if ( m->var_ranges == NULL )
  68.320 +        return -ENOMEM;
  68.321 +    memset(m->var_ranges, 0, MTRR_VCNT * sizeof(struct mtrr_var_range));
  68.322  
  68.323 -    *pat_ptr = ((uint64_t)PAT_TYPE_WRBACK) |               /* PAT0: WB */
  68.324 -               ((uint64_t)PAT_TYPE_WRTHROUGH << 8) |       /* PAT1: WT */
  68.325 -               ((uint64_t)PAT_TYPE_UC_MINUS << 16) |       /* PAT2: UC- */
  68.326 -               ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |     /* PAT3: UC */
  68.327 -               ((uint64_t)PAT_TYPE_WRBACK << 32) |         /* PAT4: WB */
  68.328 -               ((uint64_t)PAT_TYPE_WRTHROUGH << 40) |      /* PAT5: WT */
  68.329 -               ((uint64_t)PAT_TYPE_UC_MINUS << 48) |       /* PAT6: UC- */
  68.330 -               ((uint64_t)PAT_TYPE_UNCACHABLE << 56);      /* PAT7: UC */
  68.331 +    m->mtrr_cap = (1u << 10) | (1u << 8) | MTRR_VCNT;
  68.332 +
  68.333 +    v->arch.hvm_vcpu.pat_cr =
  68.334 +        ((uint64_t)PAT_TYPE_WRBACK) |               /* PAT0: WB */
  68.335 +        ((uint64_t)PAT_TYPE_WRTHROUGH << 8) |       /* PAT1: WT */
  68.336 +        ((uint64_t)PAT_TYPE_UC_MINUS << 16) |       /* PAT2: UC- */
  68.337 +        ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |     /* PAT3: UC */
  68.338 +        ((uint64_t)PAT_TYPE_WRBACK << 32) |         /* PAT4: WB */
  68.339 +        ((uint64_t)PAT_TYPE_WRTHROUGH << 40) |      /* PAT5: WT */
  68.340 +        ((uint64_t)PAT_TYPE_UC_MINUS << 48) |       /* PAT6: UC- */
  68.341 +        ((uint64_t)PAT_TYPE_UNCACHABLE << 56);      /* PAT7: UC */
  68.342  
  68.343      return 0;
  68.344  }
  68.345  
  68.346 +void hvm_vcpu_cacheattr_destroy(struct vcpu *v)
  68.347 +{
  68.348 +    xfree(v->arch.hvm_vcpu.mtrr.var_ranges);
  68.349 +}
  68.350 +
  68.351  /*
  68.352   * Get MTRR memory type for physical address pa.
  68.353   */
  68.354 @@ -512,23 +364,6 @@ static uint8_t effective_mm_type(struct 
  68.355      return effective;
  68.356  }
  68.357  
  68.358 -static void init_mtrr_epat_tbl(void)
  68.359 -{
  68.360 -    int32_t i, j;
  68.361 -    /* set default value to an invalid type, just for checking conflict */
  68.362 -    memset(&mtrr_epat_tbl, INVALID_MEM_TYPE, sizeof(mtrr_epat_tbl));
  68.363 -
  68.364 -    for ( i = 0; i < MTRR_NUM_TYPES; i++ )
  68.365 -    {
  68.366 -        for ( j = 0; j < PAT_TYPE_NUMS; j++ )
  68.367 -        {
  68.368 -            int32_t tmp = mm_type_tbl[i][j];
  68.369 -            if ( (tmp >= 0) && (tmp < MEMORY_NUM_TYPES) )
  68.370 -                mtrr_epat_tbl[i][tmp] = j;
  68.371 -        }
  68.372 -    }
  68.373 -}
  68.374 -
  68.375  uint32_t get_pat_flags(struct vcpu *v,
  68.376                         uint32_t gl1e_flags,
  68.377                         paddr_t gpaddr,
  68.378 @@ -856,7 +691,6 @@ static int hvm_load_mtrr_msr(struct doma
  68.379  
  68.380      mtrr_def_type_msr_set(mtrr_state, hw_mtrr.msr_mtrr_def_type);
  68.381  
  68.382 -    v->arch.hvm_vcpu.mtrr.is_initialized = 1;
  68.383      return 0;
  68.384  }
  68.385  
    69.1 --- a/xen/arch/x86/hvm/svm/Makefile	Thu Apr 24 14:02:16 2008 -0600
    69.2 +++ b/xen/arch/x86/hvm/svm/Makefile	Thu Apr 24 14:08:29 2008 -0600
    69.3 @@ -1,8 +1,6 @@
    69.4 -subdir-$(x86_32) += x86_32
    69.5 -subdir-$(x86_64) += x86_64
    69.6 -
    69.7  obj-y += asid.o
    69.8  obj-y += emulate.o
    69.9 +obj-y += entry.o
   69.10  obj-y += intr.o
   69.11  obj-y += svm.o
   69.12  obj-y += vmcb.o
    70.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    70.2 +++ b/xen/arch/x86/hvm/svm/entry.S	Thu Apr 24 14:08:29 2008 -0600
    70.3 @@ -0,0 +1,178 @@
    70.4 +/*
    70.5 + * entry.S: SVM architecture-specific entry/exit handling.
    70.6 + * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
    70.7 + * Copyright (c) 2004, Intel Corporation.
    70.8 + * Copyright (c) 2008, Citrix Systems, Inc.
    70.9 + *
   70.10 + * This program is free software; you can redistribute it and/or modify it
   70.11 + * under the terms and conditions of the GNU General Public License,
   70.12 + * version 2, as published by the Free Software Foundation.
   70.13 + *
   70.14 + * This program is distributed in the hope it will be useful, but WITHOUT
   70.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   70.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   70.17 + * more details.
   70.18 + *
   70.19 + * You should have received a copy of the GNU General Public License along with
   70.20 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   70.21 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   70.22 + */
   70.23 +
   70.24 +#include <xen/config.h>
   70.25 +#include <xen/errno.h>
   70.26 +#include <xen/softirq.h>
   70.27 +#include <asm/types.h>
   70.28 +#include <asm/asm_defns.h>
   70.29 +#include <asm/apicdef.h>
   70.30 +#include <asm/page.h>
   70.31 +#include <public/xen.h>
   70.32 +
   70.33 +#define VMRUN  .byte 0x0F,0x01,0xD8
   70.34 +#define STGI   .byte 0x0F,0x01,0xDC
   70.35 +#define CLGI   .byte 0x0F,0x01,0xDD
   70.36 +
   70.37 +#define get_current(reg)                        \
   70.38 +        mov $STACK_SIZE-BYTES_PER_LONG, r(reg); \
   70.39 +        or  r(sp), r(reg);                      \
   70.40 +        and $~(BYTES_PER_LONG-1),r(reg);        \
   70.41 +        mov (r(reg)),r(reg);
   70.42 +
   70.43 +#if defined(__x86_64__)
   70.44 +#define r(reg) %r##reg
   70.45 +#define addr_of(lbl) lbl(%rip)
   70.46 +#define call_with_regs(fn)                      \
   70.47 +        mov  %rsp,%rdi;                         \
   70.48 +        call fn;
   70.49 +#else /* defined(__i386__) */
   70.50 +#define r(reg) %e##reg
   70.51 +#define addr_of(lbl) lbl
   70.52 +#define UREGS_rax UREGS_eax
   70.53 +#define UREGS_rip UREGS_eip
   70.54 +#define UREGS_rsp UREGS_esp
   70.55 +#define call_with_regs(fn)                      \
   70.56 +        mov  %esp,%eax;                         \
   70.57 +        push %eax;                              \
   70.58 +        call fn;                                \
   70.59 +        add  $4,%esp;
   70.60 +#endif
   70.61 +
   70.62 +ENTRY(svm_asm_do_resume)
   70.63 +        get_current(bx)
   70.64 +        CLGI
   70.65 +
   70.66 +        mov  VCPU_processor(r(bx)),%eax
   70.67 +        shl  $IRQSTAT_shift,r(ax)
   70.68 +        lea  addr_of(irq_stat),r(dx)
   70.69 +        testl $~0,(r(dx),r(ax),1)
   70.70 +        jnz  .Lsvm_process_softirqs
   70.71 +
   70.72 +        call svm_asid_handle_vmrun
   70.73 +        call svm_intr_assist
   70.74 +
   70.75 +        cmpb $0,addr_of(tb_init_done)
   70.76 +        jnz  .Lsvm_trace
   70.77 +.Lsvm_trace_done:
   70.78 +
   70.79 +        mov  VCPU_svm_vmcb(r(bx)),r(cx)
   70.80 +        mov  UREGS_rax(r(sp)),r(ax)
   70.81 +        mov  r(ax),VMCB_rax(r(cx))
   70.82 +        mov  UREGS_rip(r(sp)),r(ax)
   70.83 +        mov  r(ax),VMCB_rip(r(cx))
   70.84 +        mov  UREGS_rsp(r(sp)),r(ax)
   70.85 +        mov  r(ax),VMCB_rsp(r(cx))
   70.86 +        mov  UREGS_eflags(r(sp)),r(ax)
   70.87 +        mov  r(ax),VMCB_rflags(r(cx))
   70.88 +
   70.89 +        mov  VCPU_svm_vmcb_pa(r(bx)),r(ax)
   70.90 +
   70.91 +#if defined(__x86_64__)
   70.92 +        pop  %r15
   70.93 +        pop  %r14
   70.94 +        pop  %r13
   70.95 +        pop  %r12
   70.96 +        pop  %rbp
   70.97 +        pop  %rbx
   70.98 +        pop  %r11
   70.99 +        pop  %r10
  70.100 +        pop  %r9
  70.101 +        pop  %r8
  70.102 +        add  $8,%rsp /* Skip %rax: restored by VMRUN. */
  70.103 +        pop  %rcx
  70.104 +        pop  %rdx
  70.105 +        pop  %rsi
  70.106 +        pop  %rdi
  70.107 +#else /* defined(__i386__) */
  70.108 +        pop  %ebx
  70.109 +        pop  %ecx
  70.110 +        pop  %edx
  70.111 +        pop  %esi
  70.112 +        pop  %edi
  70.113 +        pop  %ebp
  70.114 +#endif
  70.115 +
  70.116 +        VMRUN
  70.117 +
  70.118 +#if defined(__x86_64__)
  70.119 +        push %rdi
  70.120 +        push %rsi
  70.121 +        push %rdx
  70.122 +        push %rcx
  70.123 +        push %rax
  70.124 +        push %r8
  70.125 +        push %r9
  70.126 +        push %r10
  70.127 +        push %r11
  70.128 +        push %rbx
  70.129 +        push %rbp
  70.130 +        push %r12
  70.131 +        push %r13
  70.132 +        push %r14
  70.133 +        push %r15
  70.134 +#else /* defined(__i386__) */
  70.135 +        push %ebp
  70.136 +        push %edi
  70.137 +        push %esi
  70.138 +        push %edx
  70.139 +        push %ecx
  70.140 +        push %ebx
  70.141 +#endif
  70.142 +
  70.143 +        get_current(bx)
  70.144 +        movb $0,VCPU_svm_vmcb_in_sync(r(bx))
  70.145 +        mov  VCPU_svm_vmcb(r(bx)),r(cx)
  70.146 +        mov  VMCB_rax(r(cx)),r(ax)
  70.147 +        mov  r(ax),UREGS_rax(r(sp))
  70.148 +        mov  VMCB_rip(r(cx)),r(ax)
  70.149 +        mov  r(ax),UREGS_rip(r(sp))
  70.150 +        mov  VMCB_rsp(r(cx)),r(ax)
  70.151 +        mov  r(ax),UREGS_rsp(r(sp))
  70.152 +        mov  VMCB_rflags(r(cx)),r(ax)
  70.153 +        mov  r(ax),UREGS_eflags(r(sp))
  70.154 +
  70.155 +#ifndef NDEBUG
  70.156 +        mov  $0xbeef,%ax
  70.157 +        mov  %ax,UREGS_error_code(r(sp))
  70.158 +        mov  %ax,UREGS_entry_vector(r(sp))
  70.159 +        mov  %ax,UREGS_saved_upcall_mask(r(sp))
  70.160 +        mov  %ax,UREGS_cs(r(sp))
  70.161 +        mov  %ax,UREGS_ds(r(sp))
  70.162 +        mov  %ax,UREGS_es(r(sp))
  70.163 +        mov  %ax,UREGS_fs(r(sp))
  70.164 +        mov  %ax,UREGS_gs(r(sp))
  70.165 +        mov  %ax,UREGS_ss(r(sp))
  70.166 +#endif
  70.167 +
  70.168 +        STGI
  70.169 +.globl svm_stgi_label
  70.170 +svm_stgi_label:
  70.171 +        call_with_regs(svm_vmexit_handler)
  70.172 +        jmp  svm_asm_do_resume
  70.173 +
  70.174 +.Lsvm_process_softirqs:
  70.175 +        STGI
  70.176 +        call do_softirq
  70.177 +        jmp  svm_asm_do_resume
  70.178 +
  70.179 +.Lsvm_trace:
  70.180 +        call svm_trace_vmentry
  70.181 +        jmp  .Lsvm_trace_done
    71.1 --- a/xen/arch/x86/hvm/svm/intr.c	Thu Apr 24 14:02:16 2008 -0600
    71.2 +++ b/xen/arch/x86/hvm/svm/intr.c	Thu Apr 24 14:08:29 2008 -0600
    71.3 @@ -102,15 +102,17 @@ static void svm_dirq_assist(struct vcpu 
    71.4      struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
    71.5      struct dev_intx_gsi_link *digl;
    71.6  
    71.7 -    if ( !amd_iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
    71.8 +    if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
    71.9          return;
   71.10  
   71.11      for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
   71.12            irq < NR_IRQS;
   71.13            irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
   71.14      {
   71.15 +        if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
   71.16 +            continue;
   71.17 +
   71.18          stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)]);
   71.19 -        clear_bit(irq, &hvm_irq_dpci->dirq_mask);
   71.20  
   71.21          list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
   71.22          {
    72.1 --- a/xen/arch/x86/hvm/svm/svm.c	Thu Apr 24 14:02:16 2008 -0600
    72.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Thu Apr 24 14:08:29 2008 -0600
    72.3 @@ -911,6 +911,9 @@ static void svm_cpuid_intercept(
    72.4              __clear_bit(X86_FEATURE_PAE & 31, edx);
    72.5          __clear_bit(X86_FEATURE_PSE36 & 31, edx);
    72.6  
    72.7 +        /* We always support MTRR MSRs. */
    72.8 +        *edx |= bitmaskof(X86_FEATURE_MTRR);
    72.9 +
   72.10          /* Filter all other features according to a whitelist. */
   72.11          *ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) |
   72.12                   bitmaskof(X86_FEATURE_ALTMOVCR) |
   72.13 @@ -924,7 +927,9 @@ static void svm_cpuid_intercept(
   72.14                   bitmaskof(X86_FEATURE_SYSCALL) |
   72.15                   bitmaskof(X86_FEATURE_MP) |
   72.16                   bitmaskof(X86_FEATURE_MMXEXT) |
   72.17 -                 bitmaskof(X86_FEATURE_FFXSR));
   72.18 +                 bitmaskof(X86_FEATURE_FFXSR) |
   72.19 +                 bitmaskof(X86_FEATURE_3DNOW) |
   72.20 +                 bitmaskof(X86_FEATURE_3DNOWEXT));
   72.21          break;
   72.22  
   72.23      case 0x80000007:
   72.24 @@ -981,14 +986,6 @@ static int svm_msr_read_intercept(struct
   72.25  
   72.26      switch ( ecx )
   72.27      {
   72.28 -    case MSR_IA32_TSC:
   72.29 -        msr_content = hvm_get_guest_time(v);
   72.30 -        break;
   72.31 -
   72.32 -    case MSR_IA32_APICBASE:
   72.33 -        msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
   72.34 -        break;
   72.35 -
   72.36      case MSR_EFER:
   72.37          msr_content = v->arch.hvm_vcpu.guest_efer;
   72.38          break;
   72.39 @@ -1014,18 +1011,6 @@ static int svm_msr_read_intercept(struct
   72.40      case MSR_K8_VM_HSAVE_PA:
   72.41          goto gpf;
   72.42  
   72.43 -    case MSR_IA32_MCG_CAP:
   72.44 -    case MSR_IA32_MCG_STATUS:
   72.45 -    case MSR_IA32_MC0_STATUS:
   72.46 -    case MSR_IA32_MC1_STATUS:
   72.47 -    case MSR_IA32_MC2_STATUS:
   72.48 -    case MSR_IA32_MC3_STATUS:
   72.49 -    case MSR_IA32_MC4_STATUS:
   72.50 -    case MSR_IA32_MC5_STATUS:
   72.51 -        /* No point in letting the guest see real MCEs */
   72.52 -        msr_content = 0;
   72.53 -        break;
   72.54 -
   72.55      case MSR_IA32_DEBUGCTLMSR:
   72.56          msr_content = vmcb->debugctlmsr;
   72.57          break;
   72.58 @@ -1083,15 +1068,6 @@ static int svm_msr_write_intercept(struc
   72.59  
   72.60      switch ( ecx )
   72.61      {
   72.62 -    case MSR_IA32_TSC:
   72.63 -        hvm_set_guest_time(v, msr_content);
   72.64 -        pt_reset(v);
   72.65 -        break;
   72.66 -
   72.67 -    case MSR_IA32_APICBASE:
   72.68 -        vlapic_msr_set(vcpu_vlapic(v), msr_content);
   72.69 -        break;
   72.70 -
   72.71      case MSR_K8_VM_HSAVE_PA:
   72.72          goto gpf;
   72.73  
   72.74 @@ -1152,12 +1128,12 @@ static void svm_do_msr_access(struct cpu
   72.75  
   72.76      if ( vmcb->exitinfo1 == 0 )
   72.77      {
   72.78 -        rc = svm_msr_read_intercept(regs);
   72.79 +        rc = hvm_msr_read_intercept(regs);
   72.80          inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
   72.81      }
   72.82      else
   72.83      {
   72.84 -        rc = svm_msr_write_intercept(regs);
   72.85 +        rc = hvm_msr_write_intercept(regs);
   72.86          inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
   72.87      }
   72.88  
    73.1 --- a/xen/arch/x86/hvm/svm/x86_32/Makefile	Thu Apr 24 14:02:16 2008 -0600
    73.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    73.3 @@ -1,1 +0,0 @@
    73.4 -obj-y += exits.o
    74.1 --- a/xen/arch/x86/hvm/svm/x86_32/exits.S	Thu Apr 24 14:02:16 2008 -0600
    74.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    74.3 @@ -1,131 +0,0 @@
    74.4 -/*
    74.5 - * exits.S: SVM architecture-specific exit handling.
    74.6 - * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
    74.7 - * Copyright (c) 2004, Intel Corporation.
    74.8 - *
    74.9 - * This program is free software; you can redistribute it and/or modify it
   74.10 - * under the terms and conditions of the GNU General Public License,
   74.11 - * version 2, as published by the Free Software Foundation.
   74.12 - *
   74.13 - * This program is distributed in the hope it will be useful, but WITHOUT
   74.14 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   74.15 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   74.16 - * more details.
   74.17 - *
   74.18 - * You should have received a copy of the GNU General Public License along with
   74.19 - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   74.20 - * Place - Suite 330, Boston, MA 02111-1307 USA.
   74.21 - */
   74.22 -
   74.23 -#include <xen/config.h>
   74.24 -#include <xen/errno.h>
   74.25 -#include <xen/softirq.h>
   74.26 -#include <asm/asm_defns.h>
   74.27 -#include <asm/apicdef.h>
   74.28 -#include <asm/page.h>
   74.29 -#include <public/xen.h>
   74.30 -
   74.31 -#define GET_CURRENT(reg)         \
   74.32 -        movl $STACK_SIZE-4,reg;  \
   74.33 -        orl  %esp,reg;           \
   74.34 -        andl $~3,reg;            \
   74.35 -        movl (reg),reg;
   74.36 -
   74.37 -#define VMRUN  .byte 0x0F,0x01,0xD8
   74.38 -#define STGI   .byte 0x0F,0x01,0xDC
   74.39 -#define CLGI   .byte 0x0F,0x01,0xDD
   74.40 -
   74.41 -ENTRY(svm_asm_do_resume)
   74.42 -        GET_CURRENT(%ebx)
   74.43 -        CLGI
   74.44 -
   74.45 -        movl VCPU_processor(%ebx),%eax
   74.46 -        shl  $IRQSTAT_shift,%eax
   74.47 -        testl $~0,irq_stat(%eax,1)
   74.48 -        jnz  svm_process_softirqs
   74.49 -
   74.50 -        call svm_asid_handle_vmrun
   74.51 -        call svm_intr_assist
   74.52 -
   74.53 -        /* Check if the trace buffer is initialized. 
   74.54 -         * Because the below condition is unlikely, we jump out of line
   74.55 -         * instead of having a mostly taken branch over the unlikely code.
   74.56 -         */
   74.57 -        cmpb $0,tb_init_done
   74.58 -        jnz  svm_trace
   74.59 -svm_trace_done:
   74.60 -
   74.61 -        movl VCPU_svm_vmcb(%ebx),%ecx
   74.62 -        movl UREGS_eax(%esp),%eax
   74.63 -        movl %eax,VMCB_rax(%ecx)
   74.64 -        movl UREGS_eip(%esp),%eax
   74.65 -        movl %eax,VMCB_rip(%ecx)
   74.66 -        movl UREGS_esp(%esp),%eax
   74.67 -        movl %eax,VMCB_rsp(%ecx)
   74.68 -        movl UREGS_eflags(%esp),%eax
   74.69 -        movl %eax,VMCB_rflags(%ecx)
   74.70 -
   74.71 -        movl VCPU_svm_vmcb_pa(%ebx),%eax
   74.72 -        popl %ebx
   74.73 -        popl %ecx
   74.74 -        popl %edx
   74.75 -        popl %esi
   74.76 -        popl %edi
   74.77 -        popl %ebp
   74.78 -
   74.79 -        VMRUN
   74.80 -
   74.81 -        pushl %ebp
   74.82 -        pushl %edi
   74.83 -        pushl %esi
   74.84 -        pushl %edx
   74.85 -        pushl %ecx
   74.86 -        pushl %ebx
   74.87 -
   74.88 -        GET_CURRENT(%ebx)
   74.89 -        movb $0,VCPU_svm_vmcb_in_sync(%ebx)
   74.90 -        movl VCPU_svm_vmcb(%ebx),%ecx
   74.91 -        movl VMCB_rax(%ecx),%eax
   74.92 -        movl %eax,UREGS_eax(%esp)
   74.93 -        movl VMCB_rip(%ecx),%eax
   74.94 -        movl %eax,UREGS_eip(%esp)
   74.95 -        movl VMCB_rsp(%ecx),%eax
   74.96 -        movl %eax,UREGS_esp(%esp)
   74.97 -        movl VMCB_rflags(%ecx),%eax
   74.98 -        movl %eax,UREGS_eflags(%esp)
   74.99 -
  74.100 -#ifndef NDEBUG
  74.101 -        movw $0xbeef,%ax
  74.102 -        movw %ax,UREGS_error_code(%esp)
  74.103 -        movw %ax,UREGS_entry_vector(%esp)
  74.104 -        movw %ax,UREGS_saved_upcall_mask(%esp)
  74.105 -        movw %ax,UREGS_cs(%esp)
  74.106 -        movw %ax,UREGS_ds(%esp)
  74.107 -        movw %ax,UREGS_es(%esp)
  74.108 -        movw %ax,UREGS_fs(%esp)
  74.109 -        movw %ax,UREGS_gs(%esp)
  74.110 -        movw %ax,UREGS_ss(%esp)
  74.111 -#endif
  74.112 -
  74.113 -        STGI
  74.114 -.globl svm_stgi_label;
  74.115 -svm_stgi_label:
  74.116 -        movl %esp,%eax
  74.117 -        push %eax
  74.118 -        call svm_vmexit_handler
  74.119 -        addl $4,%esp
  74.120 -        jmp  svm_asm_do_resume
  74.121 -
  74.122 -        ALIGN
  74.123 -svm_process_softirqs:
  74.124 -        STGI
  74.125 -        call do_softirq
  74.126 -        jmp  svm_asm_do_resume
  74.127 -
  74.128 -svm_trace:
  74.129 -        /* Call out to C, as this is not speed critical path
  74.130 -         * Note: svm_trace_vmentry will recheck the tb_init_done,
  74.131 -         * but this is on the slow path, so who cares 
  74.132 -         */
  74.133 -        call svm_trace_vmentry
  74.134 -        jmp  svm_trace_done
    75.1 --- a/xen/arch/x86/hvm/svm/x86_64/Makefile	Thu Apr 24 14:02:16 2008 -0600
    75.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    75.3 @@ -1,1 +0,0 @@
    75.4 -obj-y += exits.o
    76.1 --- a/xen/arch/x86/hvm/svm/x86_64/exits.S	Thu Apr 24 14:02:16 2008 -0600
    76.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    76.3 @@ -1,148 +0,0 @@
    76.4 -/*
    76.5 - * exits.S: AMD-V architecture-specific exit handling.
    76.6 - * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
    76.7 - * Copyright (c) 2004, Intel Corporation.
    76.8 - *
    76.9 - * This program is free software; you can redistribute it and/or modify it
   76.10 - * under the terms and conditions of the GNU General Public License,
   76.11 - * version 2, as published by the Free Software Foundation.
   76.12 - *
   76.13 - * This program is distributed in the hope it will be useful, but WITHOUT
   76.14 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   76.15 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   76.16 - * more details.
   76.17 - *
   76.18 - * You should have received a copy of the GNU General Public License along with
   76.19 - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   76.20 - * Place - Suite 330, Boston, MA 02111-1307 USA.
   76.21 - */
   76.22 -
   76.23 -#include <xen/config.h>
   76.24 -#include <xen/errno.h>
   76.25 -#include <xen/softirq.h>
   76.26 -#include <asm/asm_defns.h>
   76.27 -#include <asm/apicdef.h>
   76.28 -#include <asm/page.h>
   76.29 -#include <public/xen.h>
   76.30 -
   76.31 -#define GET_CURRENT(reg)         \
   76.32 -        movq $STACK_SIZE-8,reg;  \
   76.33 -        orq  %rsp,reg;           \
   76.34 -        andq $~7,reg;            \
   76.35 -        movq (reg),reg;
   76.36 -
   76.37 -#define VMRUN  .byte 0x0F,0x01,0xD8
   76.38 -#define STGI   .byte 0x0F,0x01,0xDC
   76.39 -#define CLGI   .byte 0x0F,0x01,0xDD
   76.40 -
   76.41 -ENTRY(svm_asm_do_resume)
   76.42 -        GET_CURRENT(%rbx)
   76.43 -        CLGI
   76.44 -
   76.45 -        movl VCPU_processor(%rbx),%eax
   76.46 -        shl  $IRQSTAT_shift,%rax
   76.47 -        leaq irq_stat(%rip),%rdx
   76.48 -        testl $~0,(%rdx,%rax,1)
   76.49 -        jnz  svm_process_softirqs
   76.50 -
   76.51 -        call svm_asid_handle_vmrun
   76.52 -        call svm_intr_assist
   76.53 -
   76.54 -        /* Check if the trace buffer is initialized. 
   76.55 -         * Because the below condition is unlikely, we jump out of line
   76.56 -         * instead of having a mostly taken branch over the unlikely code.
   76.57 -         */
   76.58 -        cmpb $0,tb_init_done(%rip)
   76.59 -        jnz  svm_trace
   76.60 -svm_trace_done:
   76.61 -
   76.62 -        movq VCPU_svm_vmcb(%rbx),%rcx
   76.63 -        movq UREGS_rax(%rsp),%rax
   76.64 -        movq %rax,VMCB_rax(%rcx)
   76.65 -        movq UREGS_rip(%rsp),%rax
   76.66 -        movq %rax,VMCB_rip(%rcx)
   76.67 -        movq UREGS_rsp(%rsp),%rax
   76.68 -        movq %rax,VMCB_rsp(%rcx)
   76.69 -        movq UREGS_eflags(%rsp),%rax
   76.70 -        movq %rax,VMCB_rflags(%rcx)
   76.71 -
   76.72 -        movq VCPU_svm_vmcb_pa(%rbx),%rax
   76.73 -        popq %r15
   76.74 -        popq %r14
   76.75 -        popq %r13
   76.76 -        popq %r12
   76.77 -        popq %rbp
   76.78 -        popq %rbx
   76.79 -        popq %r11
   76.80 -        popq %r10
   76.81 -        popq %r9
   76.82 -        popq %r8
   76.83 -        addq $8,%rsp /* Skip %rax: restored by VMRUN. */
   76.84 -        popq %rcx
   76.85 -        popq %rdx
   76.86 -        popq %rsi
   76.87 -        popq %rdi
   76.88 -
   76.89 -        VMRUN
   76.90 -
   76.91 -        pushq %rdi
   76.92 -        pushq %rsi
   76.93 -        pushq %rdx
   76.94 -        pushq %rcx
   76.95 -        pushq %rax
   76.96 -        pushq %r8
   76.97 -        pushq %r9
   76.98 -        pushq %r10
   76.99 -        pushq %r11
  76.100 -        pushq %rbx
  76.101 -        pushq %rbp
  76.102 -        pushq %r12
  76.103 -        pushq %r13
  76.104 -        pushq %r14
  76.105 -        pushq %r15
  76.106 -
  76.107 -        GET_CURRENT(%rbx)
  76.108 -        movb $0,VCPU_svm_vmcb_in_sync(%rbx)
  76.109 -        movq VCPU_svm_vmcb(%rbx),%rcx
  76.110 -        movq VMCB_rax(%rcx),%rax
  76.111 -        movq %rax,UREGS_rax(%rsp)
  76.112 -        movq VMCB_rip(%rcx),%rax
  76.113 -        movq %rax,UREGS_rip(%rsp)
  76.114 -        movq VMCB_rsp(%rcx),%rax
  76.115 -        movq %rax,UREGS_rsp(%rsp)
  76.116 -        movq VMCB_rflags(%rcx),%rax
  76.117 -        movq %rax,UREGS_eflags(%rsp)
  76.118 -
  76.119 -#ifndef NDEBUG
  76.120 -        movw $0xbeef,%ax
  76.121 -        movw %ax,UREGS_error_code(%rsp)
  76.122 -        movw %ax,UREGS_entry_vector(%rsp)
  76.123 -        movw %ax,UREGS_saved_upcall_mask(%rsp)
  76.124 -        movw %ax,UREGS_cs(%rsp)
  76.125 -        movw %ax,UREGS_ds(%rsp)
  76.126 -        movw %ax,UREGS_es(%rsp)
  76.127 -        movw %ax,UREGS_fs(%rsp)
  76.128 -        movw %ax,UREGS_gs(%rsp)
  76.129 -        movw %ax,UREGS_ss(%rsp)
  76.130 -#endif
  76.131 -
  76.132 -        STGI
  76.133 -.globl svm_stgi_label;
  76.134 -svm_stgi_label:
  76.135 -        movq %rsp,%rdi
  76.136 -        call svm_vmexit_handler
  76.137 -        jmp  svm_asm_do_resume
  76.138 -
  76.139 -        ALIGN
  76.140 -svm_process_softirqs:
  76.141 -        STGI
  76.142 -        call do_softirq
  76.143 -        jmp  svm_asm_do_resume
  76.144 -
  76.145 -svm_trace:
  76.146 -        /* Call out to C, as this is not speed critical path
  76.147 -         * Note: svm_trace_vmentry will recheck the tb_init_done,
  76.148 -         * but this is on the slow path, so who cares 
  76.149 -         */
  76.150 -        call svm_trace_vmentry
  76.151 -        jmp  svm_trace_done
    77.1 --- a/xen/arch/x86/hvm/vmx/Makefile	Thu Apr 24 14:02:16 2008 -0600
    77.2 +++ b/xen/arch/x86/hvm/vmx/Makefile	Thu Apr 24 14:08:29 2008 -0600
    77.3 @@ -1,6 +1,4 @@
    77.4 -subdir-$(x86_32) += x86_32
    77.5 -subdir-$(x86_64) += x86_64
    77.6 -
    77.7 +obj-y += entry.o
    77.8  obj-y += intr.o
    77.9  obj-y += realmode.o
   77.10  obj-y += vmcs.o
    78.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    78.2 +++ b/xen/arch/x86/hvm/vmx/entry.S	Thu Apr 24 14:08:29 2008 -0600
    78.3 @@ -0,0 +1,198 @@
    78.4 +/*
    78.5 + * entry.S: VMX architecture-specific entry/exit handling.
    78.6 + * Copyright (c) 2004, Intel Corporation.
    78.7 + * Copyright (c) 2008, Citrix Systems, Inc.
    78.8 + *
    78.9 + * This program is free software; you can redistribute it and/or modify it
   78.10 + * under the terms and conditions of the GNU General Public License,
   78.11 + * version 2, as published by the Free Software Foundation.
   78.12 + *
   78.13 + * This program is distributed in the hope it will be useful, but WITHOUT
   78.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   78.15 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   78.16 + * more details.
   78.17 + *
   78.18 + * You should have received a copy of the GNU General Public License along with
   78.19 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   78.20 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   78.21 + */
   78.22 +
   78.23 +#include <xen/config.h>
   78.24 +#include <xen/errno.h>
   78.25 +#include <xen/softirq.h>
   78.26 +#include <asm/types.h>
   78.27 +#include <asm/asm_defns.h>
   78.28 +#include <asm/apicdef.h>
   78.29 +#include <asm/page.h>
   78.30 +#include <public/xen.h>
   78.31 +
   78.32 +#define VMRESUME     .byte 0x0f,0x01,0xc3
   78.33 +#define VMLAUNCH     .byte 0x0f,0x01,0xc2
   78.34 +#define VMREAD(off)  .byte 0x0f,0x78,0x47,((off)-UREGS_rip)
   78.35 +#define VMWRITE(off) .byte 0x0f,0x79,0x47,((off)-UREGS_rip)
   78.36 +
   78.37 +/* VMCS field encodings */
   78.38 +#define GUEST_RSP    0x681c
   78.39 +#define GUEST_RIP    0x681e
   78.40 +#define GUEST_RFLAGS 0x6820
   78.41 +
   78.42 +#define get_current(reg)                        \
   78.43 +        mov $STACK_SIZE-BYTES_PER_LONG, r(reg); \
   78.44 +        or  r(sp), r(reg);                      \
   78.45 +        and $~(BYTES_PER_LONG-1),r(reg);        \
   78.46 +        mov (r(reg)),r(reg);
   78.47 +
   78.48 +#if defined(__x86_64__)
   78.49 +#define r(reg) %r##reg
   78.50 +#define addr_of(lbl) lbl(%rip)
   78.51 +#define call_with_regs(fn)                      \
   78.52 +        mov  %rsp,%rdi;                         \
   78.53 +        call fn;
   78.54 +#else /* defined(__i386__) */
   78.55 +#define r(reg) %e##reg
   78.56 +#define addr_of(lbl) lbl
   78.57 +#define UREGS_rip UREGS_eip
   78.58 +#define UREGS_rsp UREGS_esp
   78.59 +#define call_with_regs(fn)                      \
   78.60 +        mov  %esp,%eax;                         \
   78.61 +        push %eax;                              \
   78.62 +        call fn;                                \
   78.63 +        add  $4,%esp;
   78.64 +#endif
   78.65 +
   78.66 +        ALIGN
   78.67 +.globl vmx_asm_vmexit_handler
   78.68 +vmx_asm_vmexit_handler:
   78.69 +#if defined(__x86_64__)
   78.70 +        push %rdi
   78.71 +        push %rsi
   78.72 +        push %rdx
   78.73 +        push %rcx
   78.74 +        push %rax
   78.75 +        push %r8
   78.76 +        push %r9
   78.77 +        push %r10
   78.78 +        push %r11
   78.79 +        push %rbx
   78.80 +        push %rbp
   78.81 +        push %r12
   78.82 +        push %r13
   78.83 +        push %r14
   78.84 +        push %r15
   78.85 +#else /* defined(__i386__) */
   78.86 +        push %eax
   78.87 +        push %ebp
   78.88 +        push %edi
   78.89 +        push %esi
   78.90 +        push %edx
   78.91 +        push %ecx
   78.92 +        push %ebx
   78.93 +#endif
   78.94 +
   78.95 +        get_current(bx)
   78.96 +
   78.97 +        movb $1,VCPU_vmx_launched(r(bx))
   78.98 +
   78.99 +        lea  UREGS_rip(r(sp)),r(di)
  78.100 +        mov  $GUEST_RIP,%eax
  78.101 +        /*VMREAD(UREGS_rip)*/
  78.102 +        .byte 0x0f,0x78,0x07  /* vmread r(ax),(r(di)) */
  78.103 +        mov  $GUEST_RSP,%eax
  78.104 +        VMREAD(UREGS_rsp)
  78.105 +        mov  $GUEST_RFLAGS,%eax
  78.106 +        VMREAD(UREGS_eflags)
  78.107 +
  78.108 +        mov  %cr2,r(ax)
  78.109 +        mov  r(ax),VCPU_hvm_guest_cr2(r(bx))
  78.110 +
  78.111 +#ifndef NDEBUG
  78.112 +        mov  $0xbeef,%ax
  78.113 +        mov  %ax,UREGS_error_code(r(sp))
  78.114 +        mov  %ax,UREGS_entry_vector(r(sp))
  78.115 +        mov  %ax,UREGS_saved_upcall_mask(r(sp))
  78.116 +        mov  %ax,UREGS_cs(r(sp))
  78.117 +        mov  %ax,UREGS_ds(r(sp))
  78.118 +        mov  %ax,UREGS_es(r(sp))
  78.119 +        mov  %ax,UREGS_fs(r(sp))
  78.120 +        mov  %ax,UREGS_gs(r(sp))
  78.121 +        mov  %ax,UREGS_ss(r(sp))
  78.122 +#endif
  78.123 +
  78.124 +        call_with_regs(vmx_vmexit_handler)
  78.125 +
  78.126 +.globl vmx_asm_do_vmentry
  78.127 +vmx_asm_do_vmentry:
  78.128 +        get_current(bx)
  78.129 +        cli
  78.130 +
  78.131 +        mov  VCPU_processor(r(bx)),%eax
  78.132 +        shl  $IRQSTAT_shift,r(ax)
  78.133 +        lea  addr_of(irq_stat),r(dx)
  78.134 +        cmpl $0,(r(dx),r(ax),1)
  78.135 +        jnz  .Lvmx_process_softirqs
  78.136 +
  78.137 +        call vmx_intr_assist
  78.138 +
  78.139 +        testb $0xff,VCPU_vmx_emul(r(bx))
  78.140 +        jnz  .Lvmx_goto_realmode
  78.141 +
  78.142 +        mov  VCPU_hvm_guest_cr2(r(bx)),r(ax)
  78.143 +        mov  r(ax),%cr2
  78.144 +        call vmx_trace_vmentry
  78.145 +
  78.146 +        lea  UREGS_rip(r(sp)),r(di)
  78.147 +        mov  $GUEST_RIP,%eax
  78.148 +        /*VMWRITE(UREGS_rip)*/
  78.149 +        .byte 0x0f,0x79,0x07  /* vmwrite (r(di)),r(ax) */
  78.150 +        mov  $GUEST_RSP,%eax
  78.151 +        VMWRITE(UREGS_rsp)
  78.152 +        mov  $GUEST_RFLAGS,%eax
  78.153 +        VMWRITE(UREGS_eflags)
  78.154 +
  78.155 +        cmpb $0,VCPU_vmx_launched(r(bx))
  78.156 +#if defined(__x86_64__)
  78.157 +        pop  %r15
  78.158 +        pop  %r14
  78.159 +        pop  %r13
  78.160 +        pop  %r12
  78.161 +        pop  %rbp
  78.162 +        pop  %rbx
  78.163 +        pop  %r11
  78.164 +        pop  %r10
  78.165 +        pop  %r9
  78.166 +        pop  %r8
  78.167 +        pop  %rax
  78.168 +        pop  %rcx
  78.169 +        pop  %rdx
  78.170 +        pop  %rsi
  78.171 +        pop  %rdi
  78.172 +#else /* defined(__i386__) */
  78.173 +        pop  %ebx
  78.174 +        pop  %ecx
  78.175 +        pop  %edx
  78.176 +        pop  %esi
  78.177 +        pop  %edi
  78.178 +        pop  %ebp
  78.179 +        pop  %eax
  78.180 +#endif
  78.181 +        je   .Lvmx_launch
  78.182 +
  78.183 +/*.Lvmx_resume:*/
  78.184 +        VMRESUME
  78.185 +        call vm_resume_fail
  78.186 +        ud2
  78.187 +
  78.188 +.Lvmx_launch:
  78.189 +        VMLAUNCH
  78.190 +        call vm_launch_fail
  78.191 +        ud2
  78.192 +
  78.193 +.Lvmx_goto_realmode:
  78.194 +        sti
  78.195 +        call_with_regs(vmx_realmode)
  78.196 +        jmp  vmx_asm_do_vmentry
  78.197 +
  78.198 +.Lvmx_process_softirqs:
  78.199 +        sti
  78.200 +        call do_softirq
  78.201 +        jmp  vmx_asm_do_vmentry
    79.1 --- a/xen/arch/x86/hvm/vmx/intr.c	Thu Apr 24 14:02:16 2008 -0600
    79.2 +++ b/xen/arch/x86/hvm/vmx/intr.c	Thu Apr 24 14:08:29 2008 -0600
    79.3 @@ -111,15 +111,17 @@ static void vmx_dirq_assist(struct vcpu 
    79.4      struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
    79.5      struct dev_intx_gsi_link *digl;
    79.6  
    79.7 -    if ( !vtd_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
    79.8 +    if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
    79.9          return;
   79.10  
   79.11      for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
   79.12            irq < NR_IRQS;
   79.13            irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
   79.14      {
   79.15 +        if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
   79.16 +            continue;
   79.17 +
   79.18          stop_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(irq)]);
   79.19 -        clear_bit(irq, &hvm_irq_dpci->dirq_mask);
   79.20  
   79.21          list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
   79.22          {
    80.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Apr 24 14:02:16 2008 -0600
    80.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Thu Apr 24 14:08:29 2008 -0600
    80.3 @@ -1622,17 +1622,11 @@ static int vmx_msr_read_intercept(struct
    80.4      u64 msr_content = 0;
    80.5      u32 ecx = regs->ecx, eax, edx;
    80.6      struct vcpu *v = current;
    80.7 -    int index;
    80.8 -    u64 *var_range_base = (u64*)v->arch.hvm_vcpu.mtrr.var_ranges;
    80.9 -    u64 *fixed_range_base =  (u64*)v->arch.hvm_vcpu.mtrr.fixed_ranges;
   80.10  
   80.11      HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
   80.12  
   80.13      switch ( ecx )
   80.14      {
   80.15 -    case MSR_IA32_TSC:
   80.16 -        msr_content = hvm_get_guest_time(v);
   80.17 -        break;
   80.18      case MSR_IA32_SYSENTER_CS:
   80.19          msr_content = (u32)__vmread(GUEST_SYSENTER_CS);
   80.20          break;
   80.21 @@ -1642,35 +1636,6 @@ static int vmx_msr_read_intercept(struct
   80.22      case MSR_IA32_SYSENTER_EIP:
   80.23          msr_content = __vmread(GUEST_SYSENTER_EIP);
   80.24          break;
   80.25 -    case MSR_IA32_APICBASE:
   80.26 -        msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
   80.27 -        break;
   80.28 -    case MSR_IA32_CR_PAT:
   80.29 -        msr_content = v->arch.hvm_vcpu.pat_cr;
   80.30 -        break;
   80.31 -    case MSR_MTRRcap:
   80.32 -        msr_content = v->arch.hvm_vcpu.mtrr.mtrr_cap;
   80.33 -        break;
   80.34 -    case MSR_MTRRdefType:
   80.35 -        msr_content = v->arch.hvm_vcpu.mtrr.def_type
   80.36 -                        | (v->arch.hvm_vcpu.mtrr.enabled << 10);
   80.37 -        break;
   80.38 -    case MSR_MTRRfix64K_00000:
   80.39 -        msr_content = fixed_range_base[0];
   80.40 -        break;
   80.41 -    case MSR_MTRRfix16K_80000:
   80.42 -    case MSR_MTRRfix16K_A0000:
   80.43 -        index = regs->ecx - MSR_MTRRfix16K_80000;
   80.44 -        msr_content = fixed_range_base[index + 1];
   80.45 -        break;
   80.46 -    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
   80.47 -        index = regs->ecx - MSR_MTRRfix4K_C0000;
   80.48 -        msr_content = fixed_range_base[index + 3];
   80.49 -        break;
   80.50 -    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
   80.51 -        index = regs->ecx - MSR_IA32_MTRR_PHYSBASE0;
   80.52 -        msr_content = var_range_base[index];
   80.53 -        break;
   80.54      case MSR_IA32_DEBUGCTLMSR:
   80.55          msr_content = __vmread(GUEST_IA32_DEBUGCTL);
   80.56  #ifdef __i386__
   80.57 @@ -1679,17 +1644,6 @@ static int vmx_msr_read_intercept(struct
   80.58          break;
   80.59      case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
   80.60          goto gp_fault;
   80.61 -    case MSR_IA32_MCG_CAP:
   80.62 -    case MSR_IA32_MCG_STATUS:
   80.63 -    case MSR_IA32_MC0_STATUS:
   80.64 -    case MSR_IA32_MC1_STATUS:
   80.65 -    case MSR_IA32_MC2_STATUS:
   80.66 -    case MSR_IA32_MC3_STATUS:
   80.67 -    case MSR_IA32_MC4_STATUS:
   80.68 -    case MSR_IA32_MC5_STATUS:
   80.69 -        /* No point in letting the guest see real MCEs */
   80.70 -        msr_content = 0;
   80.71 -        break;
   80.72      case MSR_IA32_MISC_ENABLE:
   80.73          rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
   80.74          /* Debug Trace Store is not supported. */
   80.75 @@ -1729,8 +1683,8 @@ static int vmx_msr_read_intercept(struct
   80.76          goto gp_fault;
   80.77      }
   80.78  
   80.79 -    regs->eax = msr_content & 0xFFFFFFFF;
   80.80 -    regs->edx = msr_content >> 32;
   80.81 +    regs->eax = (uint32_t)msr_content;
   80.82 +    regs->edx = (uint32_t)(msr_content >> 32);
   80.83  
   80.84  done:
   80.85      hvmtrace_msr_read(v, ecx, msr_content);
   80.86 @@ -1833,19 +1787,11 @@ void vmx_vlapic_msr_changed(struct vcpu 
   80.87      vmx_vmcs_exit(v);
   80.88  }
   80.89  
   80.90 -extern bool_t mtrr_var_range_msr_set(struct mtrr_state *v,
   80.91 -        u32 msr, u64 msr_content);
   80.92 -extern bool_t mtrr_fix_range_msr_set(struct mtrr_state *v,
   80.93 -        int row, u64 msr_content);
   80.94 -extern bool_t mtrr_def_type_msr_set(struct mtrr_state *v, u64 msr_content);
   80.95 -extern bool_t pat_msr_set(u64 *pat, u64 msr);
   80.96 -
   80.97  static int vmx_msr_write_intercept(struct cpu_user_regs *regs)
   80.98  {
   80.99      u32 ecx = regs->ecx;
  80.100      u64 msr_content;
  80.101      struct vcpu *v = current;
  80.102 -    int index;
  80.103  
  80.104      HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x",
  80.105                  ecx, (u32)regs->eax, (u32)regs->edx);
  80.106 @@ -1856,10 +1802,6 @@ static int vmx_msr_write_intercept(struc
  80.107  
  80.108      switch ( ecx )
  80.109      {
  80.110 -    case MSR_IA32_TSC:
  80.111 -        hvm_set_guest_time(v, msr_content);
  80.112 -        pt_reset(v);
  80.113 -        break;
  80.114      case MSR_IA32_SYSENTER_CS:
  80.115          __vmwrite(GUEST_SYSENTER_CS, msr_content);
  80.116          break;
  80.117 @@ -1869,41 +1811,6 @@ static int vmx_msr_write_intercept(struc
  80.118      case MSR_IA32_SYSENTER_EIP:
  80.119          __vmwrite(GUEST_SYSENTER_EIP, msr_content);
  80.120          break;
  80.121 -    case MSR_IA32_APICBASE:
  80.122 -        vlapic_msr_set(vcpu_vlapic(v), msr_content);
  80.123 -        break;
  80.124 -    case MSR_IA32_CR_PAT:
  80.125 -        if ( !pat_msr_set(&v->arch.hvm_vcpu.pat_cr, msr_content) )
  80.126 -           goto gp_fault;
  80.127 -        break;
  80.128 -    case MSR_MTRRdefType:
  80.129 -        if ( !mtrr_def_type_msr_set(&v->arch.hvm_vcpu.mtrr, msr_content) )
  80.130 -           goto gp_fault;
  80.131 -        break;
  80.132 -    case MSR_MTRRfix64K_00000:
  80.133 -        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr, 0, msr_content) )
  80.134 -            goto gp_fault;
  80.135 -        break;
  80.136 -    case MSR_MTRRfix16K_80000:
  80.137 -    case MSR_MTRRfix16K_A0000:
  80.138 -        index = regs->ecx - MSR_MTRRfix16K_80000 + 1;
  80.139 -        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
  80.140 -                                     index, msr_content) )
  80.141 -            goto gp_fault;
  80.142 -        break;
  80.143 -    case MSR_MTRRfix4K_C0000...MSR_MTRRfix4K_F8000:
  80.144 -        index = regs->ecx - MSR_MTRRfix4K_C0000 + 3;
  80.145 -        if ( !mtrr_fix_range_msr_set(&v->arch.hvm_vcpu.mtrr,
  80.146 -                                     index, msr_content) )
  80.147 -            goto gp_fault;
  80.148 -        break;
  80.149 -    case MSR_IA32_MTRR_PHYSBASE0...MSR_IA32_MTRR_PHYSMASK7:
  80.150 -        if ( !mtrr_var_range_msr_set(&v->arch.hvm_vcpu.mtrr,
  80.151 -                                     regs->ecx, msr_content) )
  80.152 -            goto gp_fault;
  80.153 -        break;
  80.154 -    case MSR_MTRRcap:
  80.155 -        goto gp_fault;
  80.156      case MSR_IA32_DEBUGCTLMSR: {
  80.157          int i, rc = 0;
  80.158  
  80.159 @@ -2330,12 +2237,12 @@ asmlinkage void vmx_vmexit_handler(struc
  80.160          break;
  80.161      case EXIT_REASON_MSR_READ:
  80.162          inst_len = __get_instruction_length(); /* Safe: RDMSR */
  80.163 -        if ( vmx_msr_read_intercept(regs) == X86EMUL_OKAY )
  80.164 +        if ( hvm_msr_read_intercept(regs) == X86EMUL_OKAY )
  80.165              __update_guest_eip(inst_len);
  80.166          break;
  80.167      case EXIT_REASON_MSR_WRITE:
  80.168          inst_len = __get_instruction_length(); /* Safe: WRMSR */
  80.169 -        if ( vmx_msr_write_intercept(regs) == X86EMUL_OKAY )
  80.170 +        if ( hvm_msr_write_intercept(regs) == X86EMUL_OKAY )
  80.171              __update_guest_eip(inst_len);
  80.172          break;
  80.173  
    81.1 --- a/xen/arch/x86/hvm/vmx/x86_32/Makefile	Thu Apr 24 14:02:16 2008 -0600
    81.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    81.3 @@ -1,1 +0,0 @@
    81.4 -obj-y += exits.o
    82.1 --- a/xen/arch/x86/hvm/vmx/x86_32/exits.S	Thu Apr 24 14:02:16 2008 -0600
    82.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    82.3 @@ -1,148 +0,0 @@
    82.4 -/*
    82.5 - * exits.S: VMX architecture-specific exit handling.
    82.6 - * Copyright (c) 2004, Intel Corporation.
    82.7 - *
    82.8 - * This program is free software; you can redistribute it and/or modify it
    82.9 - * under the terms and conditions of the GNU General Public License,
   82.10 - * version 2, as published by the Free Software Foundation.
   82.11 - *
   82.12 - * This program is distributed in the hope it will be useful, but WITHOUT
   82.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   82.14 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   82.15 - * more details.
   82.16 - *
   82.17 - * You should have received a copy of the GNU General Public License along with
   82.18 - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   82.19 - * Place - Suite 330, Boston, MA 02111-1307 USA.
   82.20 - */
   82.21 -#include <xen/config.h>
   82.22 -#include <xen/errno.h>
   82.23 -#include <xen/softirq.h>
   82.24 -#include <asm/asm_defns.h>
   82.25 -#include <asm/apicdef.h>
   82.26 -#include <asm/page.h>
   82.27 -#include <public/xen.h>
   82.28 -
   82.29 -#define VMRESUME     .byte 0x0f,0x01,0xc3
   82.30 -#define VMLAUNCH     .byte 0x0f,0x01,0xc2
   82.31 -#define VMREAD(off)  .byte 0x0f,0x78,0x44,0x24,off
   82.32 -#define VMWRITE(off) .byte 0x0f,0x79,0x44,0x24,off
   82.33 -
   82.34 -/* VMCS field encodings */
   82.35 -#define GUEST_RSP    0x681c
   82.36 -#define GUEST_RIP    0x681e
   82.37 -#define GUEST_RFLAGS 0x6820
   82.38 -
   82.39 -#define GET_CURRENT(reg)         \
   82.40 -        movl $STACK_SIZE-4, reg; \
   82.41 -        orl  %esp, reg;          \
   82.42 -        andl $~3,reg;            \
   82.43 -        movl (reg),reg;
   82.44 -
   82.45 -#define HVM_SAVE_ALL_NOSEGREGS                                              \
   82.46 -        pushl %eax;                                                         \
   82.47 -        pushl %ebp;                                                         \
   82.48 -        pushl %edi;                                                         \
   82.49 -        pushl %esi;                                                         \
   82.50 -        pushl %edx;                                                         \
   82.51 -        pushl %ecx;                                                         \
   82.52 -        pushl %ebx;
   82.53 -
   82.54 -#define HVM_RESTORE_ALL_NOSEGREGS               \
   82.55 -        popl %ebx;                              \
   82.56 -        popl %ecx;                              \
   82.57 -        popl %edx;                              \
   82.58 -        popl %esi;                              \
   82.59 -        popl %edi;                              \
   82.60 -        popl %ebp;                              \
   82.61 -        popl %eax
   82.62 -
   82.63 -        ALIGN
   82.64 -ENTRY(vmx_asm_vmexit_handler)
   82.65 -        HVM_SAVE_ALL_NOSEGREGS
   82.66 -        GET_CURRENT(%ebx)
   82.67 -
   82.68 -        movl $GUEST_RIP,%eax
   82.69 -        VMREAD(UREGS_eip)
   82.70 -        movl $GUEST_RSP,%eax
   82.71 -        VMREAD(UREGS_esp)
   82.72 -        movl $GUEST_RFLAGS,%eax
   82.73 -        VMREAD(UREGS_eflags)
   82.74 -
   82.75 -        movl %cr2,%eax
   82.76 -        movl %eax,VCPU_hvm_guest_cr2(%ebx)
   82.77 -
   82.78 -#ifndef NDEBUG
   82.79 -        movw $0xbeef,%ax
   82.80 -        movw %ax,UREGS_error_code(%esp)
   82.81 -        movw %ax,UREGS_entry_vector(%esp)
   82.82 -        movw %ax,UREGS_saved_upcall_mask(%esp)
   82.83 -        movw %ax,UREGS_cs(%esp)
   82.84 -        movw %ax,UREGS_ds(%esp)
   82.85 -        movw %ax,UREGS_es(%esp)
   82.86 -        movw %ax,UREGS_fs(%esp)
   82.87 -        movw %ax,UREGS_gs(%esp)
   82.88 -        movw %ax,UREGS_ss(%esp)
   82.89 -#endif
   82.90 -
   82.91 -        movl %esp,%eax
   82.92 -        push %eax
   82.93 -        call vmx_vmexit_handler
   82.94 -        addl $4,%esp
   82.95 -        jmp vmx_asm_do_vmentry
   82.96 -
   82.97 -        ALIGN
   82.98 -vmx_process_softirqs:
   82.99 -        sti
  82.100 -        call do_softirq
  82.101 -        jmp vmx_asm_do_vmentry
  82.102 -
  82.103 -        ALIGN
  82.104 -ENTRY(vmx_asm_do_vmentry)
  82.105 -        GET_CURRENT(%ebx)
  82.106 -        cli                             # tests must not race interrupts
  82.107 -
  82.108 -        movl VCPU_processor(%ebx),%eax
  82.109 -        shl  $IRQSTAT_shift,%eax
  82.110 -        cmpl $0,irq_stat(%eax,1)
  82.111 -        jnz  vmx_process_softirqs
  82.112 -
  82.113 -        call vmx_intr_assist
  82.114 -
  82.115 -        testb $0xff,VCPU_vmx_emul(%ebx)
  82.116 -        jnz  vmx_goto_realmode
  82.117 -
  82.118 -        movl VCPU_hvm_guest_cr2(%ebx),%eax
  82.119 -        movl %eax,%cr2
  82.120 -        call vmx_trace_vmentry
  82.121 -
  82.122 -        movl $GUEST_RIP,%eax
  82.123 -        VMWRITE(UREGS_eip)
  82.124 -        movl $GUEST_RSP,%eax
  82.125 -        VMWRITE(UREGS_esp)
  82.126 -        movl $GUEST_RFLAGS,%eax
  82.127 -        VMWRITE(UREGS_eflags)
  82.128 -
  82.129 -        cmpb $0,VCPU_vmx_launched(%ebx)
  82.130 -        je   vmx_launch
  82.131 -
  82.132 -/*vmx_resume:*/
  82.133 -        HVM_RESTORE_ALL_NOSEGREGS
  82.134 -        VMRESUME
  82.135 -        call vm_resume_fail
  82.136 -        ud2
  82.137 -
  82.138 -vmx_launch:
  82.139 -        movb $1,VCPU_vmx_launched(%ebx)
  82.140 -        HVM_RESTORE_ALL_NOSEGREGS
  82.141 -        VMLAUNCH
  82.142 -        call vm_launch_fail
  82.143 -        ud2
  82.144 -
  82.145 -vmx_goto_realmode:
  82.146 -        sti
  82.147 -        movl %esp,%eax
  82.148 -        push %eax
  82.149 -        call vmx_realmode
  82.150 -        addl $4,%esp
  82.151 -        jmp vmx_asm_do_vmentry
    83.1 --- a/xen/arch/x86/hvm/vmx/x86_64/Makefile	Thu Apr 24 14:02:16 2008 -0600
    83.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    83.3 @@ -1,1 +0,0 @@
    83.4 -obj-y += exits.o
    84.1 --- a/xen/arch/x86/hvm/vmx/x86_64/exits.S	Thu Apr 24 14:02:16 2008 -0600
    84.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    84.3 @@ -1,165 +0,0 @@
    84.4 -/*
    84.5 - * exits.S: VMX architecture-specific exit handling.
    84.6 - * Copyright (c) 2004, Intel Corporation.
    84.7 - *
    84.8 - * This program is free software; you can redistribute it and/or modify it
    84.9 - * under the terms and conditions of the GNU General Public License,
   84.10 - * version 2, as published by the Free Software Foundation.
   84.11 - *
   84.12 - * This program is distributed in the hope it will be useful, but WITHOUT
   84.13 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   84.14 - * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   84.15 - * more details.
   84.16 - *
   84.17 - * You should have received a copy of the GNU General Public License along with
   84.18 - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   84.19 - * Place - Suite 330, Boston, MA 02111-1307 USA.
   84.20 - */
   84.21 -#include <xen/config.h>
   84.22 -#include <xen/errno.h>
   84.23 -#include <xen/softirq.h>
   84.24 -#include <asm/asm_defns.h>
   84.25 -#include <asm/apicdef.h>
   84.26 -#include <asm/page.h>
   84.27 -#include <public/xen.h>
   84.28 -
   84.29 -#define VMRESUME     .byte 0x0f,0x01,0xc3
   84.30 -#define VMLAUNCH     .byte 0x0f,0x01,0xc2
   84.31 -#define VMREAD(off)  .byte 0x0f,0x78,0x47,((off)-UREGS_rip)
   84.32 -#define VMWRITE(off) .byte 0x0f,0x79,0x47,((off)-UREGS_rip)
   84.33 -
   84.34 -/* VMCS field encodings */
   84.35 -#define GUEST_RSP    0x681c
   84.36 -#define GUEST_RIP    0x681e
   84.37 -#define GUEST_RFLAGS 0x6820
   84.38 -
   84.39 -#define GET_CURRENT(reg)         \
   84.40 -        movq $STACK_SIZE-8, reg; \
   84.41 -        orq  %rsp, reg;          \
   84.42 -        andq $~7,reg;            \
   84.43 -        movq (reg),reg;
   84.44 -
   84.45 -#define HVM_SAVE_ALL_NOSEGREGS                  \
   84.46 -        pushq %rdi;                             \
   84.47 -        pushq %rsi;                             \
   84.48 -        pushq %rdx;                             \
   84.49 -        pushq %rcx;                             \
   84.50 -        pushq %rax;                             \
   84.51 -        pushq %r8;                              \
   84.52 -        pushq %r9;                              \
   84.53 -        pushq %r10;                             \
   84.54 -        pushq %r11;                             \
   84.55 -        pushq %rbx;                             \
   84.56 -        pushq %rbp;                             \
   84.57 -        pushq %r12;                             \
   84.58 -        pushq %r13;                             \
   84.59 -        pushq %r14;                             \
   84.60 -        pushq %r15;
   84.61 -
   84.62 -#define HVM_RESTORE_ALL_NOSEGREGS               \
   84.63 -        popq %r15;                              \
   84.64 -        popq %r14;                              \
   84.65 -        popq %r13;                              \
   84.66 -        popq %r12;                              \
   84.67 -        popq %rbp;                              \
   84.68 -        popq %rbx;                              \
   84.69 -        popq %r11;                              \
   84.70 -        popq %r10;                              \
   84.71 -        popq %r9;                               \
   84.72 -        popq %r8;                               \
   84.73 -        popq %rax;                              \
   84.74 -        popq %rcx;                              \
   84.75 -        popq %rdx;                              \
   84.76 -        popq %rsi;                              \
   84.77 -        popq %rdi
   84.78 -
   84.79 -        ALIGN
   84.80 -ENTRY(vmx_asm_vmexit_handler)
   84.81 -        HVM_SAVE_ALL_NOSEGREGS
   84.82 -        GET_CURRENT(%rbx)
   84.83 -
   84.84 -        leaq UREGS_rip(%rsp),%rdi
   84.85 -        movl $GUEST_RIP,%eax
   84.86 -        /*VMREAD(UREGS_rip)*/
   84.87 -        .byte 0x0f,0x78,0x07  /* vmread %rax,(%rdi) */
   84.88 -        movl $GUEST_RSP,%eax
   84.89 -        VMREAD(UREGS_rsp)
   84.90 -        movl $GUEST_RFLAGS,%eax
   84.91 -        VMREAD(UREGS_eflags)
   84.92 -
   84.93 -        movq %cr2,%rax
   84.94 -        movq %rax,VCPU_hvm_guest_cr2(%rbx)
   84.95 -
   84.96 -#ifndef NDEBUG
   84.97 -        movw $0xbeef,%ax
   84.98 -        movw %ax,UREGS_error_code(%rsp)
   84.99 -        movw %ax,UREGS_entry_vector(%rsp)
  84.100 -        movw %ax,UREGS_saved_upcall_mask(%rsp)
  84.101 -        movw %ax,UREGS_cs(%rsp)
  84.102 -        movw %ax,UREGS_ds(%rsp)
  84.103 -        movw %ax,UREGS_es(%rsp)
  84.104 -        movw %ax,UREGS_fs(%rsp)
  84.105 -        movw %ax,UREGS_gs(%rsp)
  84.106 -        movw %ax,UREGS_ss(%rsp)
  84.107 -#endif
  84.108 -
  84.109 -        movq %rsp,%rdi
  84.110 -        call vmx_vmexit_handler
  84.111 -        jmp vmx_asm_do_vmentry
  84.112 -
  84.113 -        ALIGN
  84.114 -vmx_process_softirqs:
  84.115 -        sti
  84.116 -        call do_softirq
  84.117 -        jmp vmx_asm_do_vmentry
  84.118 -
  84.119 -        ALIGN
  84.120 -ENTRY(vmx_asm_do_vmentry)
  84.121 -        GET_CURRENT(%rbx)
  84.122 -        cli                             # tests must not race interrupts
  84.123 -
  84.124 -        movl  VCPU_processor(%rbx),%eax
  84.125 -        shl   $IRQSTAT_shift,%rax
  84.126 -        leaq  irq_stat(%rip),%rdx
  84.127 -        cmpl  $0,(%rdx,%rax,1)
  84.128 -        jnz   vmx_process_softirqs
  84.129 -
  84.130 -        call vmx_intr_assist
  84.131 -
  84.132 -        testb $0xff,VCPU_vmx_emul(%rbx)
  84.133 -        jnz  vmx_goto_realmode
  84.134 -
  84.135 -        movq VCPU_hvm_guest_cr2(%rbx),%rax
  84.136 -        movq %rax,%cr2
  84.137 -        call vmx_trace_vmentry
  84.138 -
  84.139 -        leaq UREGS_rip(%rsp),%rdi
  84.140 -        movl $GUEST_RIP,%eax
  84.141 -        /*VMWRITE(UREGS_rip)*/
  84.142 -        .byte 0x0f,0x79,0x07  /* vmwrite (%rdi),%rax */
  84.143 -        movl $GUEST_RSP,%eax
  84.144 -        VMWRITE(UREGS_rsp)
  84.145 -        movl $GUEST_RFLAGS,%eax
  84.146 -        VMWRITE(UREGS_eflags)
  84.147 -
  84.148 -        cmpb $0,VCPU_vmx_launched(%rbx)
  84.149 -        je   vmx_launch
  84.150 -
  84.151 -/*vmx_resume:*/
  84.152 -        HVM_RESTORE_ALL_NOSEGREGS
  84.153 -        VMRESUME
  84.154 -        call vm_resume_fail
  84.155 -        ud2
  84.156 -
  84.157 -vmx_launch:
  84.158 -        movb $1,VCPU_vmx_launched(%rbx)
  84.159 -        HVM_RESTORE_ALL_NOSEGREGS
  84.160 -        VMLAUNCH
  84.161 -        call vm_launch_fail
  84.162 -        ud2
  84.163 -
  84.164 -vmx_goto_realmode:
  84.165 -        sti
  84.166 -        movq %rsp,%rdi
  84.167 -        call vmx_realmode
  84.168 -        jmp vmx_asm_do_vmentry
    85.1 --- a/xen/arch/x86/mm.c	Thu Apr 24 14:02:16 2008 -0600
    85.2 +++ b/xen/arch/x86/mm.c	Thu Apr 24 14:08:29 2008 -0600
    85.3 @@ -3279,15 +3279,6 @@ long arch_memory_op(int op, XEN_GUEST_HA
    85.4          case XENMAPSPACE_shared_info:
    85.5              if ( xatp.idx == 0 )
    85.6                  mfn = virt_to_mfn(d->shared_info);
    85.7 -            /* XXX: assumption here, this is called after E820 table is build
    85.8 -             * need the E820 to initialize MTRR.
    85.9 -             */
   85.10 -            if ( is_hvm_domain(d) ) {
   85.11 -                extern void init_mtrr_in_hyper(struct vcpu *);
   85.12 -                struct vcpu *vs;
   85.13 -                for_each_vcpu(d, vs)
   85.14 -                    init_mtrr_in_hyper(vs);
   85.15 -            }
   85.16              break;
   85.17          case XENMAPSPACE_grant_table:
   85.18              spin_lock(&d->grant_table->lock);
   85.19 @@ -3625,38 +3616,26 @@ static int ptwr_emulated_write(
   85.20  static int ptwr_emulated_cmpxchg(
   85.21      enum x86_segment seg,
   85.22      unsigned long offset,
   85.23 -    unsigned long old,
   85.24 -    unsigned long new,
   85.25 +    void *p_old,
   85.26 +    void *p_new,
   85.27      unsigned int bytes,
   85.28      struct x86_emulate_ctxt *ctxt)
   85.29  {
   85.30 +    paddr_t old = 0, new = 0;
   85.31 +    if ( bytes > sizeof(paddr_t) )
   85.32 +        return X86EMUL_UNHANDLEABLE;
   85.33 +    memcpy(&old, p_old, bytes);
   85.34 +    memcpy(&new, p_new, bytes);
   85.35      return ptwr_emulated_update(
   85.36          offset, old, new, bytes, 1,
   85.37          container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
   85.38  }
   85.39  
   85.40 -static int ptwr_emulated_cmpxchg8b(
   85.41 -    enum x86_segment seg,
   85.42 -    unsigned long offset,
   85.43 -    unsigned long old,
   85.44 -    unsigned long old_hi,
   85.45 -    unsigned long new,
   85.46 -    unsigned long new_hi,
   85.47 -    struct x86_emulate_ctxt *ctxt)
   85.48 -{
   85.49 -    if ( CONFIG_PAGING_LEVELS == 2 )
   85.50 -        return X86EMUL_UNHANDLEABLE;
   85.51 -    return ptwr_emulated_update(
   85.52 -        offset, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1,
   85.53 -        container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
   85.54 -}
   85.55 -
   85.56  static struct x86_emulate_ops ptwr_emulate_ops = {
   85.57      .read       = ptwr_emulated_read,
   85.58      .insn_fetch = ptwr_emulated_read,
   85.59      .write      = ptwr_emulated_write,
   85.60      .cmpxchg    = ptwr_emulated_cmpxchg,
   85.61 -    .cmpxchg8b  = ptwr_emulated_cmpxchg8b
   85.62  };
   85.63  
   85.64  /* Write page fault handler: check if guest is trying to modify a PTE. */
    86.1 --- a/xen/arch/x86/mm/shadow/common.c	Thu Apr 24 14:02:16 2008 -0600
    86.2 +++ b/xen/arch/x86/mm/shadow/common.c	Thu Apr 24 14:08:29 2008 -0600
    86.3 @@ -239,15 +239,15 @@ hvm_emulate_write(enum x86_segment seg,
    86.4  static int 
    86.5  hvm_emulate_cmpxchg(enum x86_segment seg,
    86.6                      unsigned long offset,
    86.7 -                    unsigned long old,
    86.8 -                    unsigned long new,
    86.9 +                    void *p_old,
   86.10 +                    void *p_new,
   86.11                      unsigned int bytes,
   86.12                      struct x86_emulate_ctxt *ctxt)
   86.13  {
   86.14      struct sh_emulate_ctxt *sh_ctxt =
   86.15          container_of(ctxt, struct sh_emulate_ctxt, ctxt);
   86.16      struct vcpu *v = current;
   86.17 -    unsigned long addr;
   86.18 +    unsigned long addr, old[2], new[2];
   86.19      int rc;
   86.20  
   86.21      if ( !is_x86_user_segment(seg) )
   86.22 @@ -258,35 +258,21 @@ hvm_emulate_cmpxchg(enum x86_segment seg
   86.23      if ( rc )
   86.24          return rc;
   86.25  
   86.26 -    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
   86.27 -        v, addr, old, new, bytes, sh_ctxt);
   86.28 -}
   86.29 -
   86.30 -static int 
   86.31 -hvm_emulate_cmpxchg8b(enum x86_segment seg,
   86.32 -                      unsigned long offset,
   86.33 -                      unsigned long old_lo,
   86.34 -                      unsigned long old_hi,
   86.35 -                      unsigned long new_lo,
   86.36 -                      unsigned long new_hi,
   86.37 -                      struct x86_emulate_ctxt *ctxt)
   86.38 -{
   86.39 -    struct sh_emulate_ctxt *sh_ctxt =
   86.40 -        container_of(ctxt, struct sh_emulate_ctxt, ctxt);
   86.41 -    struct vcpu *v = current;
   86.42 -    unsigned long addr;
   86.43 -    int rc;
   86.44 -
   86.45 -    if ( !is_x86_user_segment(seg) )
   86.46 -        return X86EMUL_UNHANDLEABLE;
   86.47 -
   86.48 -    rc = hvm_translate_linear_addr(
   86.49 -        seg, offset, 8, hvm_access_write, sh_ctxt, &addr);
   86.50 -    if ( rc )
   86.51 -        return rc;
   86.52 -
   86.53 -    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
   86.54 -        v, addr, old_lo, old_hi, new_lo, new_hi, sh_ctxt);
   86.55 +    old[0] = new[0] = 0;
   86.56 +    memcpy(old, p_old, bytes);
   86.57 +    memcpy(new, p_new, bytes);
   86.58 +
   86.59 +    if ( bytes <= sizeof(long) )
   86.60 +        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
   86.61 +            v, addr, old[0], new[0], bytes, sh_ctxt);
   86.62 +
   86.63 +#ifdef __i386__
   86.64 +    if ( bytes == 8 )
   86.65 +        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
   86.66 +            v, addr, old[0], old[1], new[0], new[1], sh_ctxt);
   86.67 +#endif
   86.68 +
   86.69 +    return X86EMUL_UNHANDLEABLE;
   86.70  }
   86.71  
   86.72  static struct x86_emulate_ops hvm_shadow_emulator_ops = {
   86.73 @@ -294,7 +280,6 @@ static struct x86_emulate_ops hvm_shadow
   86.74      .insn_fetch = hvm_emulate_insn_fetch,
   86.75      .write      = hvm_emulate_write,
   86.76      .cmpxchg    = hvm_emulate_cmpxchg,
   86.77 -    .cmpxchg8b  = hvm_emulate_cmpxchg8b,
   86.78  };
   86.79  
   86.80  static int
   86.81 @@ -338,36 +323,34 @@ pv_emulate_write(enum x86_segment seg,
   86.82  static int 
   86.83  pv_emulate_cmpxchg(enum x86_segment seg,
   86.84                     unsigned long offset,
   86.85 -                   unsigned long old,
   86.86 -                   unsigned long new,
   86.87 +                   void *p_old,
   86.88 +                   void *p_new,
   86.89                     unsigned int bytes,
   86.90                     struct x86_emulate_ctxt *ctxt)
   86.91  {
   86.92      struct sh_emulate_ctxt *sh_ctxt =
   86.93          container_of(ctxt, struct sh_emulate_ctxt, ctxt);
   86.94 +    unsigned long old[2], new[2];
   86.95      struct vcpu *v = current;
   86.96 +
   86.97      if ( !is_x86_user_segment(seg) )
   86.98          return X86EMUL_UNHANDLEABLE;
   86.99 -    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
  86.100 -        v, offset, old, new, bytes, sh_ctxt);
  86.101 -}
  86.102 -
  86.103 -static int 
  86.104 -pv_emulate_cmpxchg8b(enum x86_segment seg,
  86.105 -                     unsigned long offset,
  86.106 -                     unsigned long old_lo,
  86.107 -                     unsigned long old_hi,
  86.108 -                     unsigned long new_lo,
  86.109 -                     unsigned long new_hi,
  86.110 -                     struct x86_emulate_ctxt *ctxt)
  86.111 -{
  86.112 -    struct sh_emulate_ctxt *sh_ctxt =
  86.113 -        container_of(ctxt, struct sh_emulate_ctxt, ctxt);
  86.114 -    struct vcpu *v = current;
  86.115 -    if ( !is_x86_user_segment(seg) )
  86.116 -        return X86EMUL_UNHANDLEABLE;
  86.117 -    return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
  86.118 -        v, offset, old_lo, old_hi, new_lo, new_hi, sh_ctxt);
  86.119 +
  86.120 +    old[0] = new[0] = 0;
  86.121 +    memcpy(old, p_old, bytes);
  86.122 +    memcpy(new, p_new, bytes);
  86.123 +
  86.124 +    if ( bytes <= sizeof(long) )
  86.125 +        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg(
  86.126 +            v, offset, old[0], new[0], bytes, sh_ctxt);
  86.127 +
  86.128 +#ifdef __i386__
  86.129 +    if ( bytes == 8 )
  86.130 +        return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b(
  86.131 +            v, offset, old[0], old[1], new[0], new[1], sh_ctxt);
  86.132 +#endif
  86.133 +
  86.134 +    return X86EMUL_UNHANDLEABLE;
  86.135  }
  86.136  
  86.137  static struct x86_emulate_ops pv_shadow_emulator_ops = {
  86.138 @@ -375,7 +358,6 @@ static struct x86_emulate_ops pv_shadow_
  86.139      .insn_fetch = pv_emulate_read,
  86.140      .write      = pv_emulate_write,
  86.141      .cmpxchg    = pv_emulate_cmpxchg,
  86.142 -    .cmpxchg8b  = pv_emulate_cmpxchg8b,
  86.143  };
  86.144  
  86.145  struct x86_emulate_ops *shadow_init_emulation(
    87.1 --- a/xen/arch/x86/mm/shadow/multi.c	Thu Apr 24 14:02:16 2008 -0600
    87.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Thu Apr 24 14:08:29 2008 -0600
    87.3 @@ -2089,7 +2089,7 @@ static shadow_l1e_t * shadow_get_and_cre
    87.4          else 
    87.5          {
    87.6              /* Shadowing an actual guest l1 table */
    87.7 -            if ( !mfn_valid(gw->l2mfn) ) return NULL; /* No guest page. */
    87.8 +            if ( !mfn_valid(gw->l1mfn) ) return NULL; /* No guest page. */
    87.9              *sl1mfn = get_shadow_status(v, gw->l1mfn, SH_type_l1_shadow);
   87.10              if ( !mfn_valid(*sl1mfn) ) 
   87.11              {
   87.12 @@ -4365,7 +4365,7 @@ static void emulate_unmap_dest(struct vc
   87.13      atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_version);
   87.14  }
   87.15  
   87.16 -int
   87.17 +static int
   87.18  sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
   87.19                        u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
   87.20  {
   87.21 @@ -4389,7 +4389,7 @@ sh_x86_emulate_write(struct vcpu *v, uns
   87.22      return X86EMUL_OKAY;
   87.23  }
   87.24  
   87.25 -int
   87.26 +static int
   87.27  sh_x86_emulate_cmpxchg(struct vcpu *v, unsigned long vaddr, 
   87.28                          unsigned long old, unsigned long new,
   87.29                          unsigned int bytes, struct sh_emulate_ctxt *sh_ctxt)
   87.30 @@ -4432,7 +4432,8 @@ sh_x86_emulate_cmpxchg(struct vcpu *v, u
   87.31      return rv;
   87.32  }
   87.33  
   87.34 -int
   87.35 +#ifdef __i386__
   87.36 +static int
   87.37  sh_x86_emulate_cmpxchg8b(struct vcpu *v, unsigned long vaddr, 
   87.38                            unsigned long old_lo, unsigned long old_hi,
   87.39                            unsigned long new_lo, unsigned long new_hi,
   87.40 @@ -4465,7 +4466,7 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v,
   87.41      shadow_unlock(v->domain);
   87.42      return rv;
   87.43  }
   87.44 -
   87.45 +#endif
   87.46  
   87.47  /**************************************************************************/
   87.48  /* Audit tools */
   87.49 @@ -4738,7 +4739,9 @@ struct paging_mode sh_paging_mode = {
   87.50      .shadow.detach_old_tables      = sh_detach_old_tables,
   87.51      .shadow.x86_emulate_write      = sh_x86_emulate_write,
   87.52      .shadow.x86_emulate_cmpxchg    = sh_x86_emulate_cmpxchg,
   87.53 +#ifdef __i386__
   87.54      .shadow.x86_emulate_cmpxchg8b  = sh_x86_emulate_cmpxchg8b,
   87.55 +#endif
   87.56      .shadow.make_monitor_table     = sh_make_monitor_table,
   87.57      .shadow.destroy_monitor_table  = sh_destroy_monitor_table,
   87.58  #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC
    88.1 --- a/xen/arch/x86/setup.c	Thu Apr 24 14:02:16 2008 -0600
    88.2 +++ b/xen/arch/x86/setup.c	Thu Apr 24 14:08:29 2008 -0600
    88.3 @@ -1019,10 +1019,6 @@ void __init __start_xen(unsigned long mb
    88.4          _initrd_len   = mod[initrdidx].mod_end - mod[initrdidx].mod_start;
    88.5      }
    88.6  
    88.7 -    iommu_setup();
    88.8 -
    88.9 -    amd_iommu_detect();
   88.10 -
   88.11      /*
   88.12       * We're going to setup domain0 using the module(s) that we stashed safely
   88.13       * above our heap. The second module, if present, is an initrd ramdisk.
    89.1 --- a/xen/arch/x86/smp.c	Thu Apr 24 14:02:16 2008 -0600
    89.2 +++ b/xen/arch/x86/smp.c	Thu Apr 24 14:08:29 2008 -0600
    89.3 @@ -75,20 +75,10 @@ static inline int __prepare_ICR2 (unsign
    89.4      return SET_APIC_DEST_FIELD(mask);
    89.5  }
    89.6  
    89.7 -static inline void check_IPI_mask(cpumask_t cpumask)
    89.8 -{
    89.9 -    /*
   89.10 -     * Sanity, and necessary. An IPI with no target generates a send accept
   89.11 -     * error with Pentium and P6 APICs.
   89.12 -     */
   89.13 -    ASSERT(cpus_subset(cpumask, cpu_online_map));
   89.14 -    ASSERT(!cpus_empty(cpumask));
   89.15 -}
   89.16 -
   89.17  void apic_wait_icr_idle(void)
   89.18  {
   89.19 -	while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY )
   89.20 -		cpu_relax();
   89.21 +    while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY )
   89.22 +        cpu_relax();
   89.23  }
   89.24  
   89.25  void send_IPI_mask_flat(cpumask_t cpumask, int vector)
   89.26 @@ -97,7 +87,8 @@ void send_IPI_mask_flat(cpumask_t cpumas
   89.27      unsigned long cfg;
   89.28      unsigned long flags;
   89.29  
   89.30 -    check_IPI_mask(cpumask);
   89.31 +    /* An IPI with no target generates a send accept error from P5/P6 APICs. */
   89.32 +    WARN_ON(mask == 0);
   89.33  
   89.34      local_irq_save(flags);
   89.35  
   89.36 @@ -130,17 +121,9 @@ void send_IPI_mask_phys(cpumask_t mask, 
   89.37      unsigned long cfg, flags;
   89.38      unsigned int query_cpu;
   89.39  
   89.40 -    check_IPI_mask(mask);
   89.41 -
   89.42 -    /*
   89.43 -     * Hack. The clustered APIC addressing mode doesn't allow us to send 
   89.44 -     * to an arbitrary mask, so I do a unicasts to each CPU instead. This 
   89.45 -     * should be modified to do 1 message per cluster ID - mbligh
   89.46 -     */ 
   89.47 -
   89.48      local_irq_save(flags);
   89.49  
   89.50 -    for_each_cpu_mask( query_cpu, mask )
   89.51 +    for_each_cpu_mask ( query_cpu, mask )
   89.52      {
   89.53          /*
   89.54           * Wait for idle.
    90.1 --- a/xen/arch/x86/traps.c	Thu Apr 24 14:02:16 2008 -0600
    90.2 +++ b/xen/arch/x86/traps.c	Thu Apr 24 14:08:29 2008 -0600
    90.3 @@ -479,6 +479,7 @@ asmlinkage int set_guest_nmi_trapbounce(
    90.4  static inline void do_trap(
    90.5      int trapnr, struct cpu_user_regs *regs, int use_error_code)
    90.6  {
    90.7 +    struct vcpu *curr = current;
    90.8      unsigned long fixup;
    90.9  
   90.10      DEBUGGER_trap_entry(trapnr, regs);
   90.11 @@ -497,6 +498,14 @@ static inline void do_trap(
   90.12          return;
   90.13      }
   90.14  
   90.15 +    if ( ((trapnr == TRAP_copro_error) || (trapnr == TRAP_simd_error)) &&
   90.16 +         is_hvm_vcpu(curr) && curr->arch.hvm_vcpu.fpu_exception_callback )
   90.17 +    {
   90.18 +        curr->arch.hvm_vcpu.fpu_exception_callback(
   90.19 +            curr->arch.hvm_vcpu.fpu_exception_callback_arg, regs);
   90.20 +        return;
   90.21 +    }
   90.22 +
   90.23      DEBUGGER_trap_fatal(trapnr, regs);
   90.24  
   90.25      show_execution_state(regs);
   90.26 @@ -1399,6 +1408,13 @@ static int admin_io_okay(
   90.27      unsigned int port, unsigned int bytes,
   90.28      struct vcpu *v, struct cpu_user_regs *regs)
   90.29  {
   90.30 +    /*
   90.31 +     * Port 0xcf8 (CONFIG_ADDRESS) is only visible for DWORD accesses.
   90.32 +     * We never permit direct access to that register.
   90.33 +     */
   90.34 +    if ( (port == 0xcf8) && (bytes == 4) )
   90.35 +        return 0;
   90.36 +
   90.37      return ioports_access_permitted(v->domain, port, port + bytes - 1);
   90.38  }
   90.39  
   90.40 @@ -1431,10 +1447,10 @@ static uint32_t guest_io_read(
   90.41          {
   90.42              sub_data = pv_pit_handler(port, 0, 0);
   90.43          }
   90.44 -        else if ( (port & 0xfffc) == 0xcf8 )
   90.45 +        else if ( (port == 0xcf8) && (bytes == 4) )
   90.46          {
   90.47 -            size = min(bytes, 4 - (port & 3));
   90.48 -            sub_data = v->domain->arch.pci_cf8 >> ((port & 3) * 8);
   90.49 +            size = 4;
   90.50 +            sub_data = v->domain->arch.pci_cf8;
   90.51          }
   90.52          else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
   90.53          {
   90.54 @@ -1489,19 +1505,10 @@ static void guest_io_write(
   90.55          {
   90.56              pv_pit_handler(port, (uint8_t)data, 1);
   90.57          }
   90.58 -        else if ( (port & 0xfffc) == 0xcf8 )
   90.59 +        else if ( (port == 0xcf8) && (bytes == 4) )
   90.60          {
   90.61 -            size = min(bytes, 4 - (port & 3));
   90.62 -            if ( size == 4 )
   90.63 -            {
   90.64 -                v->domain->arch.pci_cf8 = data;
   90.65 -            }
   90.66 -            else
   90.67 -            {
   90.68 -                uint32_t mask = ((1u << (size * 8)) - 1) << ((port & 3) * 8);
   90.69 -                v->domain->arch.pci_cf8 &= ~mask;
   90.70 -                v->domain->arch.pci_cf8 |= (data << ((port & 3) * 8)) & mask;
   90.71 -            }
   90.72 +            size = 4;
   90.73 +            v->domain->arch.pci_cf8 = data;
   90.74          }
   90.75          else if ( ((port & 0xfffc) == 0xcfc) && IS_PRIV(v->domain) )
   90.76          {
    91.1 --- a/xen/arch/x86/x86_emulate.c	Thu Apr 24 14:02:16 2008 -0600
    91.2 +++ b/xen/arch/x86/x86_emulate.c	Thu Apr 24 14:08:29 2008 -0600
    91.3 @@ -11,23 +11,7 @@
    91.4  
    91.5  #include <asm/x86_emulate.h>
    91.6  
    91.7 +/* Avoid namespace pollution. */
    91.8  #undef cmpxchg
    91.9  
   91.10 -#define __emulate_fpu_insn(_op)                 \
   91.11 -do{ int _exn;                                   \
   91.12 -    asm volatile (                              \
   91.13 -        "1: " _op "\n"                          \
   91.14 -        "2: \n"                                 \
   91.15 -        ".section .fixup,\"ax\"\n"              \
   91.16 -        "3: mov $1,%0\n"                        \
   91.17 -        "   jmp 2b\n"                           \
   91.18 -        ".previous\n"                           \
   91.19 -        ".section __ex_table,\"a\"\n"           \
   91.20 -        "   "__FIXUP_ALIGN"\n"                  \
   91.21 -        "   "__FIXUP_WORD" 1b,3b\n"             \
   91.22 -        ".previous"                             \
   91.23 -        : "=r" (_exn) : "0" (0) );              \
   91.24 -    generate_exception_if(_exn, EXC_MF, -1);    \
   91.25 -} while (0)
   91.26 -
   91.27  #include "x86_emulate/x86_emulate.c"
    92.1 --- a/xen/arch/x86/x86_emulate/x86_emulate.c	Thu Apr 24 14:02:16 2008 -0600
    92.2 +++ b/xen/arch/x86/x86_emulate/x86_emulate.c	Thu Apr 24 14:08:29 2008 -0600
    92.3 @@ -195,9 +195,9 @@ static uint8_t twobyte_table[256] = {
    92.4      /* 0x50 - 0x5F */
    92.5      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    92.6      /* 0x60 - 0x6F */
    92.7 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    92.8 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
    92.9      /* 0x70 - 0x7F */
   92.10 -    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   92.11 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM,
   92.12      /* 0x80 - 0x87 */
   92.13      ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
   92.14      ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
   92.15 @@ -546,6 +546,62 @@ do {                                    
   92.16                       ? (uint16_t)_regs.eip : (uint32_t)_regs.eip);      \
   92.17  } while (0)
   92.18  
   92.19 +struct fpu_insn_ctxt {
   92.20 +    uint8_t insn_bytes;
   92.21 +    uint8_t exn_raised;
   92.22 +};
   92.23 +
   92.24 +static void fpu_handle_exception(void *_fic, struct cpu_user_regs *regs)
   92.25 +{
   92.26 +    struct fpu_insn_ctxt *fic = _fic;
   92.27 +    fic->exn_raised = 1;
   92.28 +    regs->eip += fic->insn_bytes;
   92.29 +}
   92.30 +
   92.31 +#define get_fpu(_type, _fic)                                    \
   92.32 +do{ (_fic)->exn_raised = 0;                                     \
   92.33 +    fail_if(ops->get_fpu == NULL);                              \
   92.34 +    rc = ops->get_fpu(fpu_handle_exception, _fic, _type, ctxt); \
   92.35 +    if ( rc ) goto done;                                        \
   92.36 +} while (0)
   92.37 +#define put_fpu(_fic)                                           \
   92.38 +do{                                                             \
   92.39 +    if ( ops->put_fpu != NULL )                                 \
   92.40 +        ops->put_fpu(ctxt);                                     \
   92.41 +    generate_exception_if((_fic)->exn_raised, EXC_MF, -1);      \
   92.42 +} while (0)
   92.43 +
   92.44 +#define emulate_fpu_insn(_op)                           \
   92.45 +do{ struct fpu_insn_ctxt fic;                           \
   92.46 +    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
   92.47 +    asm volatile (                                      \
   92.48 +        "movb $2f-1f,%0 \n"                             \
   92.49 +        "1: " _op "     \n"                             \
   92.50 +        "2:             \n"                             \
   92.51 +        : "=m" (fic.insn_bytes) : : "memory" );         \
   92.52 +    put_fpu(&fic);                                      \
   92.53 +} while (0)
   92.54 +
   92.55 +#define emulate_fpu_insn_memdst(_op, _arg)              \
   92.56 +do{ struct fpu_insn_ctxt fic;                           \
   92.57 +    get_fpu(X86EMUL_FPU_fpu, &fic);                     \
   92.58 +    asm volatile (                                      \
   92.59 +        "movb $2f-1f,%0 \n"                             \
   92.60 +        "1: " _op " %1  \n"                             \
   92.61 +        "2:             \n"                             \
   92.62 +        : "=m" (fic.insn_bytes), "=m" (_arg)            \
   92.63 +        : : "memory" );                                 \
   92.64 +    put_fpu(&fic);                                      \
   92.65 +} while (0)
   92.66 +
   92.67 +#define emulate_fpu_insn_stub(_bytes...)                                \
   92.68 +do{ uint8_t stub[] = { _bytes, 0xc3 };                                  \
   92.69 +    struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };        \
   92.70 +    get_fpu(X86EMUL_FPU_fpu, &fic);                                     \
   92.71 +    (*(void(*)(void))stub)();                                           \
   92.72 +    put_fpu(&fic);                                                      \
   92.73 +} while (0)
   92.74 +
   92.75  static unsigned long __get_rep_prefix(
   92.76      struct cpu_user_regs *int_regs,
   92.77      struct cpu_user_regs *ext_regs,
   92.78 @@ -851,6 +907,7 @@ protmode_load_seg(
   92.79      struct { uint32_t a, b; } desc;
   92.80      unsigned long val;
   92.81      uint8_t dpl, rpl, cpl;
   92.82 +    uint32_t new_desc_b;
   92.83      int rc, fault_type = EXC_TS;
   92.84  
   92.85      /* NULL selector? */
   92.86 @@ -933,10 +990,11 @@ protmode_load_seg(
   92.87          }
   92.88  
   92.89          /* Ensure Accessed flag is set. */
   92.90 +        new_desc_b = desc.b | 0x100;
   92.91          rc = ((desc.b & 0x100) ? X86EMUL_OKAY : 
   92.92                ops->cmpxchg(
   92.93 -                  x86_seg_none, desctab.base + (sel & 0xfff8) + 4, desc.b,
   92.94 -                  desc.b | 0x100, 4, ctxt));
   92.95 +                  x86_seg_none, desctab.base + (sel & 0xfff8) + 4,
   92.96 +                  &desc.b, &new_desc_b, 4, ctxt));
   92.97      } while ( rc == X86EMUL_CMPXCHG_FAILED );
   92.98  
   92.99      if ( rc )
  92.100 @@ -2036,8 +2094,8 @@ x86_emulate(
  92.101              /* nothing to do */;
  92.102          else if ( lock_prefix )
  92.103              rc = ops->cmpxchg(
  92.104 -                dst.mem.seg, dst.mem.off, dst.orig_val,
  92.105 -                dst.val, dst.bytes, ctxt);
  92.106 +                dst.mem.seg, dst.mem.off, &dst.orig_val,
  92.107 +                &dst.val, dst.bytes, ctxt);
  92.108          else
  92.109              rc = ops->write(
  92.110                  dst.mem.seg, dst.mem.off, dst.val, dst.bytes, ctxt);
  92.111 @@ -2399,9 +2457,7 @@ x86_emulate(
  92.112      }
  92.113  
  92.114      case 0x9b:  /* wait/fwait */
  92.115 -        fail_if(ops->load_fpu_ctxt == NULL);
  92.116 -        ops->load_fpu_ctxt(ctxt);
  92.117 -        __emulate_fpu_insn("fwait");
  92.118 +        emulate_fpu_insn("fwait");
  92.119          break;
  92.120  
  92.121      case 0x9c: /* pushf */
  92.122 @@ -2721,77 +2777,89 @@ x86_emulate(
  92.123      }
  92.124  
  92.125      case 0xd9: /* FPU 0xd9 */
  92.126 -        fail_if(ops->load_fpu_ctxt == NULL);
  92.127 -        ops->load_fpu_ctxt(ctxt);
  92.128          switch ( modrm )
  92.129          {
  92.130 -        case 0xc0: __emulate_fpu_insn(".byte 0xd9,0xc0"); break;
  92.131 -        case 0xc1: __emulate_fpu_insn(".byte 0xd9,0xc1"); break;
  92.132 -        case 0xc2: __emulate_fpu_insn(".byte 0xd9,0xc2"); break;
  92.133 -        case 0xc3: __emulate_fpu_insn(".byte 0xd9,0xc3"); break;
  92.134 -        case 0xc4: __emulate_fpu_insn(".byte 0xd9,0xc4"); break;
  92.135 -        case 0xc5: __emulate_fpu_insn(".byte 0xd9,0xc5"); break;
  92.136 -        case 0xc6: __emulate_fpu_insn(".byte 0xd9,0xc6"); break;
  92.137 -        case 0xc7: __emulate_fpu_insn(".byte 0xd9,0xc7"); break;
  92.138 -        case 0xe0: __emulate_fpu_insn(".byte 0xd9,0xe0"); break;
  92.139 -        case 0xe8: __emulate_fpu_insn(".byte 0xd9,0xe8"); break;
  92.140 -        case 0xee: __emulate_fpu_insn(".byte 0xd9,0xee"); break;
  92.141 +        case 0xc0 ... 0xc7: /* fld %stN */
  92.142 +        case 0xc8 ... 0xcf: /* fxch %stN */
  92.143 +        case 0xd0: /* fnop */
  92.144 +        case 0xe0: /* fchs */
  92.145 +        case 0xe1: /* fabs */
  92.146 +        case 0xe4: /* ftst */
  92.147 +        case 0xe5: /* fxam */
  92.148 +        case 0xe8: /* fld1 */
  92.149 +        case 0xe9: /* fldl2t */
  92.150 +        case 0xea: /* fldl2e */
  92.151 +        case 0xeb: /* fldpi */
  92.152 +        case 0xec: /* fldlg2 */
  92.153 +        case 0xed: /* fldln2 */
  92.154 +        case 0xee: /* fldz */
  92.155 +        case 0xf0: /* f2xm1 */
  92.156 +        case 0xf1: /* fyl2x */
  92.157 +        case 0xf2: /* fptan */
  92.158 +        case 0xf3: /* fpatan */
  92.159 +        case 0xf4: /* fxtract */
  92.160 +        case 0xf5: /* fprem1 */
  92.161 +        case 0xf6: /* fdecstp */
  92.162 +        case 0xf7: /* fincstp */
  92.163 +        case 0xf8: /* fprem */
  92.164 +        case 0xf9: /* fyl2xp1 */
  92.165 +        case 0xfa: /* fsqrt */
  92.166 +        case 0xfb: /* fsincos */
  92.167 +        case 0xfc: /* frndint */
  92.168 +        case 0xfd: /* fscale */
  92.169 +        case 0xfe: /* fsin */
  92.170 +        case 0xff: /* fcos */
  92.171 +            emulate_fpu_insn_stub(0xd9, modrm);
  92.172 +            break;
  92.173          default:
  92.174              fail_if((modrm_reg & 7) != 7);
  92.175              fail_if(modrm >= 0xc0);
  92.176              /* fnstcw m2byte */
  92.177              ea.bytes = 2;
  92.178              dst = ea;
  92.179 -            asm volatile ( "fnstcw %0" : "=m" (dst.val) );
  92.180 +            emulate_fpu_insn_memdst("fnstcw", dst.val);
  92.181          }
  92.182          break;
  92.183  
  92.184      case 0xdb: /* FPU 0xdb */
  92.185 -        fail_if(ops->load_fpu_ctxt == NULL);
  92.186 -        ops->load_fpu_ctxt(ctxt);
  92.187          fail_if(modrm != 0xe3);
  92.188          /* fninit */
  92.189 -        asm volatile ( "fninit" );
  92.190 +        emulate_fpu_insn("fninit");
  92.191          break;
  92.192  
  92.193      case 0xdd: /* FPU 0xdd */
  92.194 -        fail_if(ops->load_fpu_ctxt == NULL);
  92.195 -        ops->load_fpu_ctxt(ctxt);
  92.196          fail_if((modrm_reg & 7) != 7);
  92.197          fail_if(modrm >= 0xc0);
  92.198          /* fnstsw m2byte */
  92.199          ea.bytes = 2;
  92.200          dst = ea;
  92.201 -        asm volatile ( "fnstsw %0" : "=m" (dst.val) );
  92.202 +        emulate_fpu_insn_memdst("fnstsw", dst.val);
  92.203          break;
  92.204  
  92.205      case 0xde: /* FPU 0xde */
  92.206 -        fail_if(ops->load_fpu_ctxt == NULL);
  92.207 -        ops->load_fpu_ctxt(ctxt);
  92.208          switch ( modrm )
  92.209          {
  92.210 -        case 0xd9: __emulate_fpu_insn(".byte 0xde,0xd9"); break;
  92.211 -        case 0xf8: __emulate_fpu_insn(".byte 0xde,0xf8"); break;
  92.212 -        case 0xf9: __emulate_fpu_insn(".byte 0xde,0xf9"); break;
  92.213 -        case 0xfa: __emulate_fpu_insn(".byte 0xde,0xfa"); break;
  92.214 -        case 0xfb: __emulate_fpu_insn(".byte 0xde,0xfb"); break;
  92.215 -        case 0xfc: __emulate_fpu_insn(".byte 0xde,0xfc"); break;
  92.216 -        case 0xfd: __emulate_fpu_insn(".byte 0xde,0xfd"); break;
  92.217 -        case 0xfe: __emulate_fpu_insn(".byte 0xde,0xfe"); break;
  92.218 -        case 0xff: __emulate_fpu_insn(".byte 0xde,0xff"); break;
  92.219 -        default: goto cannot_emulate;
  92.220 +        case 0xc0 ... 0xc7: /* faddp %stN */
  92.221 +        case 0xc8 ... 0xcf: /* fmulp %stN */
  92.222 +        case 0xd9: /* fcompp */
  92.223 +        case 0xe0 ... 0xe7: /* fsubrp %stN */
  92.224 +        case 0xe8 ... 0xef: /* fsubp %stN */
  92.225 +        case 0xf0 ... 0xf7: /* fdivrp %stN */
  92.226 +        case 0xf8 ... 0xff: /* fdivp %stN */
  92.227 +            emulate_fpu_insn_stub(0xde, modrm);
  92.228 +            break;
  92.229 +        default:
  92.230 +            goto cannot_emulate;
  92.231          }
  92.232          break;
  92.233  
  92.234      case 0xdf: /* FPU 0xdf */
  92.235 -        fail_if(ops->load_fpu_ctxt == NULL);
  92.236 -        ops->load_fpu_ctxt(ctxt);
  92.237          fail_if(modrm != 0xe0);
  92.238          /* fnstsw %ax */
  92.239          dst.bytes = 2;
  92.240          dst.type = OP_REG;
  92.241          dst.reg = (unsigned long *)&_regs.eax;
  92.242 -        asm volatile ( "fnstsw %0" : "=m" (dst.val) );
  92.243 +        emulate_fpu_insn_memdst("fnstsw", dst.val);
  92.244          break;
  92.245  
  92.246      case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
  92.247 @@ -2975,6 +3043,7 @@ x86_emulate(
  92.248  
  92.249      case 0xa3: bt: /* bt */
  92.250          emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
  92.251 +        dst.type = OP_NONE;
  92.252          break;
  92.253  
  92.254      case 0xa4: /* shld imm8,r,r/m */
  92.255 @@ -3067,7 +3136,11 @@ x86_emulate(
  92.256                : "=r" (dst.val), "=q" (zf)
  92.257                : "r" (src.val), "1" (0) );
  92.258          _regs.eflags &= ~EFLG_ZF;
  92.259 -        _regs.eflags |= zf ? EFLG_ZF : 0;
  92.260 +        if ( zf )
  92.261 +        {
  92.262 +            _regs.eflags |= EFLG_ZF;
  92.263 +            dst.type = OP_NONE;
  92.264 +        }
  92.265          break;
  92.266      }
  92.267  
  92.268 @@ -3077,7 +3150,11 @@ x86_emulate(
  92.269                : "=r" (dst.val), "=q" (zf)
  92.270                : "r" (src.val), "1" (0) );
  92.271          _regs.eflags &= ~EFLG_ZF;
  92.272 -        _regs.eflags |= zf ? EFLG_ZF : 0;
  92.273 +        if ( zf )
  92.274 +        {
  92.275 +            _regs.eflags |= EFLG_ZF;
  92.276 +            dst.type = OP_NONE;
  92.277 +        }
  92.278          break;
  92.279      }
  92.280  
  92.281 @@ -3310,6 +3387,44 @@ x86_emulate(
  92.282          break;
  92.283      }
  92.284  
  92.285 +    case 0x6f: /* movq mm/m64,mm */ {
  92.286 +        uint8_t stub[] = { 0x0f, 0x6f, modrm, 0xc3 };
  92.287 +        struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
  92.288 +        uint64_t val;
  92.289 +        if ( ea.type == OP_MEM )
  92.290 +        {
  92.291 +            unsigned long lval, hval;
  92.292 +            if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &lval, 4, ctxt)) ||
  92.293 +                 (rc = ops->read(ea.mem.seg, ea.mem.off+4, &hval, 4, ctxt)) )
  92.294 +                goto done;
  92.295 +            val = ((uint64_t)hval << 32) | (uint32_t)lval;
  92.296 +            stub[2] = modrm & 0x38; /* movq (%eax),%mmN */
  92.297 +        }
  92.298 +        get_fpu(X86EMUL_FPU_mmx, &fic);
  92.299 +        asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" );
  92.300 +        put_fpu(&fic);
  92.301 +        break;
  92.302 +    }
  92.303 +
  92.304 +    case 0x7f: /* movq mm,mm/m64 */ {
  92.305 +        uint8_t stub[] = { 0x0f, 0x7f, modrm, 0xc3 };
  92.306 +        struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
  92.307 +        uint64_t val;
  92.308 +        if ( ea.type == OP_MEM )
  92.309 +            stub[2] = modrm & 0x38; /* movq %mmN,(%eax) */
  92.310 +        get_fpu(X86EMUL_FPU_mmx, &fic);
  92.311 +        asm volatile ( "call *%0" : : "r" (stub), "a" (&val) : "memory" );
  92.312 +        put_fpu(&fic);
  92.313 +        if ( ea.type == OP_MEM )
  92.314 +        {
  92.315 +            unsigned long lval = (uint32_t)val, hval = (uint32_t)(val >> 32);
  92.316 +            if ( (rc = ops->write(ea.mem.seg, ea.mem.off+0, lval, 4, ctxt)) ||
  92.317 +                 (rc = ops->write(ea.mem.seg, ea.mem.off+4, hval, 4, ctxt)) )
  92.318 +                goto done;
  92.319 +        }
  92.320 +        break;
  92.321 +    }
  92.322 +
  92.323      case 0x80 ... 0x8f: /* jcc (near) */ {
  92.324          int rel = (((op_bytes == 2) && !mode_64bit())
  92.325                     ? (int32_t)insn_fetch_type(int16_t)
  92.326 @@ -3346,60 +3461,49 @@ x86_emulate(
  92.327          src.val = x86_seg_gs;
  92.328          goto pop_seg;
  92.329  
  92.330 -    case 0xc7: /* Grp9 (cmpxchg8b) */
  92.331 -#if defined(__i386__)
  92.332 -    {
  92.333 -        unsigned long old_lo, old_hi;
  92.334 +    case 0xc7: /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
  92.335 +        unsigned long old[2], exp[2], new[2];
  92.336 +        unsigned int i;
  92.337 +
  92.338          generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
  92.339          generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
  92.340 -        if ( (rc = ops->read(ea.mem.seg, ea.mem.off+0, &old_lo, 4, ctxt)) ||
  92.341 -             (rc = ops->read(ea.mem.seg, ea.mem.off+4, &old_hi, 4, ctxt)) )
  92.342 -            goto done;
  92.343 -        if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
  92.344 +        op_bytes *= 2;
  92.345 +
  92.346 +        /* Get actual old value. */
  92.347 +        for ( i = 0; i < (op_bytes/sizeof(long)); i++ )
  92.348 +            if ( (rc = ops->read(ea.mem.seg, ea.mem.off + i*sizeof(long),
  92.349 +                                 &old[i], sizeof(long), ctxt)) != 0 )
  92.350 +                goto done;
  92.351 +
  92.352 +        /* Get expected and proposed values. */
  92.353 +        if ( op_bytes == 8 )
  92.354          {
  92.355 -            _regs.eax = old_lo;
  92.356 -            _regs.edx = old_hi;
  92.357 -            _regs.eflags &= ~EFLG_ZF;
  92.358 -        }
  92.359 -        else if ( ops->cmpxchg8b == NULL )
  92.360 -        {
  92.361 -            rc = X86EMUL_UNHANDLEABLE;
  92.362 -            goto done;
  92.363 +            ((uint32_t *)exp)[0] = _regs.eax; ((uint32_t *)exp)[1] = _regs.edx;
  92.364 +            ((uint32_t *)new)[0] = _regs.ebx; ((uint32_t *)new)[1] = _regs.ecx;
  92.365          }
  92.366          else
  92.367          {
  92.368 -            if ( (rc = ops->cmpxchg8b(ea.mem.seg, ea.mem.off, old_lo, old_hi,
  92.369 -                                      _regs.ebx, _regs.ecx, ctxt)) != 0 )
  92.370 +            exp[0] = _regs.eax; exp[1] = _regs.edx;
  92.371 +            new[0] = _regs.ebx; new[1] = _regs.ecx;
  92.372 +        }
  92.373 +
  92.374 +        if ( memcmp(old, exp, op_bytes) )
  92.375 +        {
  92.376 +            /* Expected != actual: store actual to rDX:rAX and clear ZF. */
  92.377 +            _regs.eax = (op_bytes == 8) ? ((uint32_t *)old)[0] : old[0];
  92.378 +            _regs.edx = (op_bytes == 8) ? ((uint32_t *)old)[1] : old[1];
  92.379 +            _regs.eflags &= ~EFLG_ZF;
  92.380 +        }
  92.381 +        else
  92.382 +        {
  92.383 +            /* Expected == actual: attempt atomic cmpxchg and set ZF. */
  92.384 +            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
  92.385 +                                    new, op_bytes, ctxt)) != 0 )
  92.386                  goto done;
  92.387              _regs.eflags |= EFLG_ZF;
  92.388          }
  92.389          break;
  92.390      }
  92.391 -#elif defined(__x86_64__)
  92.392 -    {
  92.393 -        unsigned long old, new;
  92.394 -        generate_exception_if((modrm_reg & 7) != 1, EXC_UD, -1);
  92.395 -        generate_exception_if(ea.type != OP_MEM, EXC_UD, -1);
  92.396 -        if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &old, 8, ctxt)) != 0 )
  92.397 -            goto done;
  92.398 -        if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
  92.399 -             ((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
  92.400 -        {
  92.401 -            _regs.eax = (uint32_t)(old>>0);
  92.402 -            _regs.edx = (uint32_t)(old>>32);
  92.403 -            _regs.eflags &= ~EFLG_ZF;
  92.404 -        }
  92.405 -        else
  92.406 -        {
  92.407 -            new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
  92.408 -            if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old,
  92.409 -                                    new, 8, ctxt)) != 0 )
  92.410 -                goto done;
  92.411 -            _regs.eflags |= EFLG_ZF;
  92.412 -        }
  92.413 -        break;
  92.414 -    }
  92.415 -#endif
  92.416  
  92.417      case 0xc8 ... 0xcf: /* bswap */
  92.418          dst.type = OP_REG;
    93.1 --- a/xen/arch/x86/x86_emulate/x86_emulate.h	Thu Apr 24 14:02:16 2008 -0600
    93.2 +++ b/xen/arch/x86/x86_emulate/x86_emulate.h	Thu Apr 24 14:08:29 2008 -0600
    93.3 @@ -95,6 +95,12 @@ struct segment_register {
    93.4   /* (cmpxchg accessor): CMPXCHG failed. Maps to X86EMUL_RETRY in caller. */
    93.5  #define X86EMUL_CMPXCHG_FAILED 3
    93.6  
    93.7 +/* FPU sub-types which may be requested via ->get_fpu(). */
    93.8 +enum x86_emulate_fpu_type {
    93.9 +    X86EMUL_FPU_fpu, /* Standard FPU coprocessor instruction set */
   93.10 +    X86EMUL_FPU_mmx  /* MMX instruction set (%mm0-%mm7) */
   93.11 +};
   93.12 +
   93.13  /*
   93.14   * These operations represent the instruction emulator's interface to memory.
   93.15   * 
   93.16 @@ -104,8 +110,7 @@ struct segment_register {
   93.17   *     some out-of-band mechanism, unknown to the emulator. The memop signals
   93.18   *     failure by returning X86EMUL_EXCEPTION to the emulator, which will
   93.19   *     then immediately bail.
   93.20 - *  2. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
   93.21 - *     cmpxchg8b_emulated need support 8-byte accesses.
   93.22 + *  2. Valid access sizes are 1, 2, 4 and 8 (x86/64 only) bytes.
   93.23   *  3. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
   93.24   */
   93.25  struct x86_emulate_ops
   93.26 @@ -153,37 +158,19 @@ struct x86_emulate_ops
   93.27  
   93.28      /*
   93.29       * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation.
   93.30 -     *  @old:   [IN ] Value expected to be current at @addr.
   93.31 -     *  @new:   [IN ] Value to write to @addr.
   93.32 +     *  @p_old: [IN ] Pointer to value expected to be current at @addr.
   93.33 +     *  @p_new: [IN ] Pointer to value to write to @addr.
   93.34 +     *  @bytes: [IN ] Operation size (up to 8 (x86/32) or 16 (x86/64) bytes).
   93.35       */
   93.36      int (*cmpxchg)(
   93.37          enum x86_segment seg,
   93.38          unsigned long offset,
   93.39 -        unsigned long old,
   93.40 -        unsigned long new,
   93.41 +        void *p_old,
   93.42 +        void *p_new,
   93.43          unsigned int bytes,
   93.44          struct x86_emulate_ctxt *ctxt);
   93.45  
   93.46      /*
   93.47 -     * cmpxchg8b: Emulate an atomic (LOCKed) CMPXCHG8B operation.
   93.48 -     *  @old:   [IN ] Value expected to be current at @addr.
   93.49 -     *  @new:   [IN ] Value to write to @addr.
   93.50 -     * NOTES:
   93.51 -     *  1. This function is only ever called when emulating a real CMPXCHG8B.
   93.52 -     *  2. This function is *never* called on x86/64 systems.
   93.53 -     *  2. Not defining this function (i.e., specifying NULL) is equivalent
   93.54 -     *     to defining a function that always returns X86EMUL_UNHANDLEABLE.
   93.55 -     */
   93.56 -    int (*cmpxchg8b)(
   93.57 -        enum x86_segment seg,
   93.58 -        unsigned long offset,
   93.59 -        unsigned long old_lo,
   93.60 -        unsigned long old_hi,
   93.61 -        unsigned long new_lo,
   93.62 -        unsigned long new_hi,
   93.63 -        struct x86_emulate_ctxt *ctxt);
   93.64 -
   93.65 -    /*
   93.66       * rep_ins: Emulate INS: <src_port> -> <dst_seg:dst_offset>.
   93.67       *  @bytes_per_rep: [IN ] Bytes transferred per repetition.
   93.68       *  @reps:  [IN ] Maximum repetitions to be emulated.
   93.69 @@ -342,8 +329,19 @@ struct x86_emulate_ops
   93.70          uint8_t insn_len,
   93.71          struct x86_emulate_ctxt *ctxt);
   93.72  
   93.73 -    /* load_fpu_ctxt: Load emulated environment's FPU state onto processor. */
   93.74 -    void (*load_fpu_ctxt)(
   93.75 +    /*
   93.76 +     * get_fpu: Load emulated environment's FPU state onto processor.
   93.77 +     *  @exn_callback: On any FPU or SIMD exception, pass control to
   93.78 +     *                 (*exception_callback)(exception_callback_arg, regs).
   93.79 +     */
   93.80 +    int (*get_fpu)(
   93.81 +        void (*exception_callback)(void *, struct cpu_user_regs *),
   93.82 +        void *exception_callback_arg,
   93.83 +        enum x86_emulate_fpu_type type,
   93.84 +        struct x86_emulate_ctxt *ctxt);
   93.85 +
   93.86 +    /* put_fpu: Relinquish the FPU. Unhook from FPU/SIMD exception handlers. */
   93.87 +    void (*put_fpu)(
   93.88          struct x86_emulate_ctxt *ctxt);
   93.89  
   93.90      /* invlpg: Invalidate paging structures which map addressed byte. */
    94.1 --- a/xen/common/trace.c	Thu Apr 24 14:02:16 2008 -0600
    94.2 +++ b/xen/common/trace.c	Thu Apr 24 14:08:29 2008 -0600
    94.3 @@ -374,6 +374,15 @@ static inline int insert_lost_records(st
    94.4                             (unsigned char *)&ed);
    94.5  }
    94.6  
    94.7 +/*
    94.8 + * Notification is performed in qtasklet to avoid deadlocks with contexts
    94.9 + * which __trace_var() may be called from (e.g., scheduler critical regions).
   94.10 + */
   94.11 +static void trace_notify_dom0(unsigned long unused)
   94.12 +{
   94.13 +    send_guest_global_virq(dom0, VIRQ_TBUF);
   94.14 +}
   94.15 +static DECLARE_TASKLET(trace_notify_dom0_tasklet, trace_notify_dom0, 0);
   94.16  
   94.17  /**
   94.18   * trace - Enters a trace tuple into the trace buffer for the current CPU.
   94.19 @@ -506,7 +515,7 @@ void __trace_var(u32 event, int cycles, 
   94.20      /* Notify trace buffer consumer that we've crossed the high water mark. */
   94.21      if ( started_below_highwater &&
   94.22           (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
   94.23 -        send_guest_global_virq(dom0, VIRQ_TBUF);
   94.24 +        tasklet_schedule(&trace_notify_dom0_tasklet);
   94.25  }
   94.26  
   94.27  /*
    95.1 --- a/xen/common/xencomm.c	Thu Apr 24 14:02:16 2008 -0600
    95.2 +++ b/xen/common/xencomm.c	Thu Apr 24 14:08:29 2008 -0600
    95.3 @@ -323,7 +323,7 @@ xencomm_copy_chunk_to(
    95.4                 (unsigned long)xencomm_vaddr(paddr, page));
    95.5  
    95.6      memcpy(xencomm_vaddr(paddr, page), (void *)from, len);
    95.7 -    xencomm_mark_dirty(xencomm_vaddr(paddr, page), len);
    95.8 +    xencomm_mark_dirty((unsigned long)xencomm_vaddr(paddr, page), len);
    95.9      put_page(page);
   95.10  
   95.11      return 0;
    96.1 --- a/xen/drivers/char/console.c	Thu Apr 24 14:02:16 2008 -0600
    96.2 +++ b/xen/drivers/char/console.c	Thu Apr 24 14:08:29 2008 -0600
    96.3 @@ -322,7 +322,7 @@ static long guest_console_write(XEN_GUES
    96.4  
    96.5      while ( count > 0 )
    96.6      {
    96.7 -        while ( serial_tx_space(sercon_handle) < (SERIAL_TXBUFSZ / 2) )
    96.8 +        while ( serial_tx_space(sercon_handle) < (serial_txbufsz / 2) )
    96.9          {
   96.10              if ( hypercall_preempt_check() )
   96.11                  break;
    97.1 --- a/xen/drivers/char/serial.c	Thu Apr 24 14:02:16 2008 -0600
    97.2 +++ b/xen/drivers/char/serial.c	Thu Apr 24 14:08:29 2008 -0600
    97.3 @@ -15,6 +15,19 @@
    97.4  #include <xen/mm.h>
    97.5  #include <xen/serial.h>
    97.6  
    97.7 +/* Never drop characters, even if the async transmit buffer fills. */
    97.8 +/* #define SERIAL_NEVER_DROP_CHARS 1 */
    97.9 +
   97.10 +unsigned int serial_txbufsz = 16384;
   97.11 +static void __init parse_serial_tx_buffer(const char *s)
   97.12 +{
   97.13 +    serial_txbufsz = max((unsigned int)parse_size_and_unit(s, NULL), 512u);
   97.14 +}
   97.15 +custom_param("serial_tx_buffer", parse_serial_tx_buffer);
   97.16 +
   97.17 +#define mask_serial_rxbuf_idx(_i) ((_i)&(serial_rxbufsz-1))
   97.18 +#define mask_serial_txbuf_idx(_i) ((_i)&(serial_txbufsz-1))
   97.19 +
   97.20  static struct serial_port com[2] = {
   97.21      { .rx_lock = SPIN_LOCK_UNLOCKED, .tx_lock = SPIN_LOCK_UNLOCKED }, 
   97.22      { .rx_lock = SPIN_LOCK_UNLOCKED, .tx_lock = SPIN_LOCK_UNLOCKED }
   97.23 @@ -36,8 +49,8 @@ void serial_rx_interrupt(struct serial_p
   97.24              fn = port->rx_hi;
   97.25          else if ( !(c & 0x80) && (port->rx_lo != NULL) )
   97.26              fn = port->rx_lo;
   97.27 -        else if ( (port->rxbufp - port->rxbufc) != SERIAL_RXBUFSZ )
   97.28 -            port->rxbuf[MASK_SERIAL_RXBUF_IDX(port->rxbufp++)] = c;            
   97.29 +        else if ( (port->rxbufp - port->rxbufc) != serial_rxbufsz )
   97.30 +            port->rxbuf[mask_serial_rxbuf_idx(port->rxbufp++)] = c;            
   97.31      }
   97.32  
   97.33      spin_unlock_irqrestore(&port->rx_lock, flags);
   97.34 @@ -72,7 +85,7 @@ void serial_tx_interrupt(struct serial_p
   97.35              if ( port->txbufc == port->txbufp )
   97.36                  break;
   97.37              port->driver->putc(
   97.38 -                port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
   97.39 +                port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]);
   97.40          }
   97.41      }
   97.42  
   97.43 @@ -81,22 +94,24 @@ void serial_tx_interrupt(struct serial_p
   97.44  
   97.45  static void __serial_putc(struct serial_port *port, char c)
   97.46  {
   97.47 -    int i;
   97.48 -
   97.49      if ( (port->txbuf != NULL) && !port->sync )
   97.50      {
   97.51          /* Interrupt-driven (asynchronous) transmitter. */
   97.52 -        if ( (port->txbufp - port->txbufc) == SERIAL_TXBUFSZ )
   97.53 +#ifdef SERIAL_NEVER_DROP_CHARS
   97.54 +        if ( (port->txbufp - port->txbufc) == serial_txbufsz )
   97.55          {
   97.56 -            /* Buffer is full: we spin, but could alternatively drop chars. */
   97.57 +            /* Buffer is full: we spin waiting for space to appear. */
   97.58 +            int i;
   97.59              while ( !port->driver->tx_empty(port) )
   97.60                  cpu_relax();
   97.61              for ( i = 0; i < port->tx_fifo_size; i++ )
   97.62                  port->driver->putc(
   97.63 -                    port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
   97.64 -            port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufp++)] = c;
   97.65 +                    port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]);
   97.66 +            port->txbuf[mask_serial_txbuf_idx(port->txbufp++)] = c;
   97.67 +            return;
   97.68          }
   97.69 -        else if ( ((port->txbufp - port->txbufc) == 0) &&
   97.70 +#endif
   97.71 +        if ( ((port->txbufp - port->txbufc) == 0) &&
   97.72                    port->driver->tx_empty(port) )
   97.73          {
   97.74              /* Buffer and UART FIFO are both empty. */
   97.75 @@ -105,7 +120,7 @@ static void __serial_putc(struct serial_
   97.76          else
   97.77          {
   97.78              /* Normal case: buffer the character. */
   97.79 -            port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufp++)] = c;
   97.80 +            port->txbuf[mask_serial_txbuf_idx(port->txbufp++)] = c;
   97.81          }
   97.82      }
   97.83      else if ( port->driver->tx_empty )
   97.84 @@ -200,7 +215,7 @@ char serial_getc(int handle)
   97.85              
   97.86              if ( port->rxbufp != port->rxbufc )
   97.87              {
   97.88 -                c = port->rxbuf[MASK_SERIAL_RXBUF_IDX(port->rxbufc++)];
   97.89 +                c = port->rxbuf[mask_serial_rxbuf_idx(port->rxbufc++)];
   97.90                  spin_unlock_irqrestore(&port->rx_lock, flags);
   97.91                  break;
   97.92              }
   97.93 @@ -336,7 +351,7 @@ void serial_start_sync(int handle)
   97.94              while ( !port->driver->tx_empty(port) )
   97.95                  cpu_relax();
   97.96              port->driver->putc(
   97.97 -                port, port->txbuf[MASK_SERIAL_TXBUF_IDX(port->txbufc++)]);
   97.98 +                port, port->txbuf[mask_serial_txbuf_idx(port->txbufc++)]);
   97.99          }
  97.100      }
  97.101  
  97.102 @@ -364,9 +379,9 @@ int serial_tx_space(int handle)
  97.103  {
  97.104      struct serial_port *port;
  97.105      if ( handle == -1 )
  97.106 -        return SERIAL_TXBUFSZ;
  97.107 +        return serial_txbufsz;
  97.108      port = &com[handle & SERHND_IDX];
  97.109 -    return SERIAL_TXBUFSZ - (port->txbufp - port->txbufc);
  97.110 +    return serial_txbufsz - (port->txbufp - port->txbufc);
  97.111  }
  97.112  
  97.113  void __devinit serial_init_preirq(void)
  97.114 @@ -431,7 +446,7 @@ void serial_async_transmit(struct serial
  97.115      BUG_ON(!port->driver->tx_empty);
  97.116      if ( port->txbuf == NULL )
  97.117          port->txbuf = alloc_xenheap_pages(
  97.118 -            get_order_from_bytes(SERIAL_TXBUFSZ));
  97.119 +            get_order_from_bytes(serial_txbufsz));
  97.120  }
  97.121  
  97.122  /*
    98.1 --- a/xen/drivers/passthrough/amd/iommu_acpi.c	Thu Apr 24 14:02:16 2008 -0600
    98.2 +++ b/xen/drivers/passthrough/amd/iommu_acpi.c	Thu Apr 24 14:08:29 2008 -0600
    98.3 @@ -139,7 +139,7 @@ static int __init register_exclusion_ran
    98.4      iommu = find_iommu_for_device(bus, devfn);
    98.5      if ( !iommu )
    98.6      {
    98.7 -        dprintk(XENLOG_ERR, "IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf);
    98.8 +        amd_iov_error("IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf);
    98.9          return -ENODEV;
   98.10      }
   98.11      req = ivrs_mappings[bdf].dte_requestor_id;
   98.12 @@ -221,7 +221,7 @@ static int __init parse_ivmd_device_sele
   98.13      bdf = ivmd_block->header.dev_id;
   98.14      if ( bdf >= ivrs_bdf_entries )
   98.15      {
   98.16 -        dprintk(XENLOG_ERR, "IVMD Error: Invalid Dev_Id 0x%x\n", bdf);
   98.17 +        amd_iov_error("IVMD Error: Invalid Dev_Id 0x%x\n", bdf);
   98.18          return -ENODEV;
   98.19      }
   98.20  
   98.21 @@ -238,22 +238,19 @@ static int __init parse_ivmd_device_rang
   98.22      first_bdf = ivmd_block->header.dev_id;
   98.23      if ( first_bdf >= ivrs_bdf_entries )
   98.24      {
   98.25 -        dprintk(XENLOG_ERR, "IVMD Error: "
   98.26 -                "Invalid Range_First Dev_Id 0x%x\n", first_bdf);
   98.27 +        amd_iov_error(
   98.28 +            "IVMD Error: Invalid Range_First Dev_Id 0x%x\n", first_bdf);
   98.29          return -ENODEV;
   98.30      }
   98.31  
   98.32      last_bdf = ivmd_block->last_dev_id;
   98.33      if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) )
   98.34      {
   98.35 -        dprintk(XENLOG_ERR, "IVMD Error: "
   98.36 -                "Invalid Range_Last Dev_Id 0x%x\n", last_bdf);
   98.37 +        amd_iov_error(
   98.38 +            "IVMD Error: Invalid Range_Last Dev_Id 0x%x\n", last_bdf);
   98.39          return -ENODEV;
   98.40      }
   98.41  
   98.42 -    dprintk(XENLOG_ERR, " Dev_Id Range: 0x%x -> 0x%x\n",
   98.43 -            first_bdf, last_bdf);
   98.44 -
   98.45      for ( bdf = first_bdf, error = 0; (bdf <= last_bdf) && !error; bdf++ )
   98.46          error = register_exclusion_range_for_device(
   98.47              bdf, base, limit, iw, ir);
   98.48 @@ -272,8 +269,7 @@ static int __init parse_ivmd_device_iomm
   98.49                                      ivmd_block->cap_offset);
   98.50      if ( !iommu )
   98.51      {
   98.52 -        dprintk(XENLOG_ERR,
   98.53 -                "IVMD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
   98.54 +        amd_iov_error("IVMD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
   98.55                  ivmd_block->header.dev_id, ivmd_block->cap_offset);
   98.56          return -ENODEV;
   98.57      }
   98.58 @@ -290,7 +286,7 @@ static int __init parse_ivmd_block(struc
   98.59      if ( ivmd_block->header.length <
   98.60           sizeof(struct acpi_ivmd_block_header) )
   98.61      {
   98.62 -        dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Length!\n");
   98.63 +        amd_iov_error("IVMD Error: Invalid Block Length!\n");
   98.64          return -ENODEV;
   98.65      }
   98.66  
   98.67 @@ -299,10 +295,9 @@ static int __init parse_ivmd_block(struc
   98.68      base = start_addr & PAGE_MASK;
   98.69      limit = (start_addr + mem_length - 1) & PAGE_MASK;
   98.70  
   98.71 -    dprintk(XENLOG_INFO, "IVMD Block: Type 0x%x\n",
   98.72 -            ivmd_block->header.type);
   98.73 -    dprintk(XENLOG_INFO, " Start_Addr_Phys 0x%lx\n", start_addr);
   98.74 -    dprintk(XENLOG_INFO, " Mem_Length 0x%lx\n", mem_length);
   98.75 +    amd_iov_info("IVMD Block: Type 0x%x\n",ivmd_block->header.type);
   98.76 +    amd_iov_info(" Start_Addr_Phys 0x%lx\n", start_addr);
   98.77 +    amd_iov_info(" Mem_Length 0x%lx\n", mem_length);
   98.78  
   98.79      if ( get_field_from_byte(ivmd_block->header.flags,
   98.80                               AMD_IOMMU_ACPI_EXCLUSION_RANGE_MASK,
   98.81 @@ -321,7 +316,7 @@ static int __init parse_ivmd_block(struc
   98.82      }
   98.83      else
   98.84      {
   98.85 -        dprintk(KERN_ERR, "IVMD Error: Invalid Flag Field!\n");
   98.86 +        amd_iov_error("IVMD Error: Invalid Flag Field!\n");
   98.87          return -ENODEV;
   98.88      }
   98.89  
   98.90 @@ -344,7 +339,7 @@ static int __init parse_ivmd_block(struc
   98.91                                         base, limit, iw, ir);
   98.92  
   98.93      default:
   98.94 -        dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Type!\n");
   98.95 +        amd_iov_error("IVMD Error: Invalid Block Type!\n");
   98.96          return -ENODEV;
   98.97      }
   98.98  }
   98.99 @@ -354,7 +349,7 @@ static u16 __init parse_ivhd_device_padd
  98.100  {
  98.101      if ( header_length < (block_length + pad_length) )
  98.102      {
  98.103 -        dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n");
  98.104 +        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
  98.105          return 0;
  98.106      }
  98.107  
  98.108 @@ -369,8 +364,7 @@ static u16 __init parse_ivhd_device_sele
  98.109      bdf = ivhd_device->header.dev_id;
  98.110      if ( bdf >= ivrs_bdf_entries )
  98.111      {
  98.112 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.113 -                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
  98.114 +        amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf);
  98.115          return 0;
  98.116      }
  98.117  
  98.118 @@ -393,14 +387,14 @@ static u16 __init parse_ivhd_device_rang
  98.119      dev_length = sizeof(struct acpi_ivhd_device_range);
  98.120      if ( header_length < (block_length + dev_length) )
  98.121      {
  98.122 -        dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n");
  98.123 +        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
  98.124          return 0;
  98.125      }
  98.126  
  98.127      if ( ivhd_device->range.trailer.type !=
  98.128           AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
  98.129      {
  98.130 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.131 +        amd_iov_error("IVHD Error: "
  98.132                  "Invalid Range: End_Type 0x%x\n",
  98.133                  ivhd_device->range.trailer.type);
  98.134          return 0;
  98.135 @@ -409,21 +403,20 @@ static u16 __init parse_ivhd_device_rang
  98.136      first_bdf = ivhd_device->header.dev_id;
  98.137      if ( first_bdf >= ivrs_bdf_entries )
  98.138      {
  98.139 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.140 -                "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
  98.141 +        amd_iov_error(
  98.142 +            "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf);
  98.143          return 0;
  98.144      }
  98.145  
  98.146      last_bdf = ivhd_device->range.trailer.dev_id;
  98.147      if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) )
  98.148      {
  98.149 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.150 -                "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
  98.151 +        amd_iov_error(
  98.152 +            "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
  98.153          return 0;
  98.154      }
  98.155  
  98.156 -    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
  98.157 -            first_bdf, last_bdf);
  98.158 +    amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf);
  98.159  
  98.160      /* override flags for range of devices */
  98.161      sys_mgt = get_field_from_byte(ivhd_device->header.flags,
  98.162 @@ -444,28 +437,25 @@ static u16 __init parse_ivhd_device_alia
  98.163      dev_length = sizeof(struct acpi_ivhd_device_alias);
  98.164      if ( header_length < (block_length + dev_length) )
  98.165      {
  98.166 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.167 -                "Invalid Device_Entry Length!\n");
  98.168 +        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
  98.169          return 0;
  98.170      }
  98.171  
  98.172      bdf = ivhd_device->header.dev_id;
  98.173      if ( bdf >= ivrs_bdf_entries )
  98.174      {
  98.175 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.176 -                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
  98.177 +        amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf);
  98.178          return 0;
  98.179      }
  98.180  
  98.181      alias_id = ivhd_device->alias.dev_id;
  98.182      if ( alias_id >= ivrs_bdf_entries )
  98.183      {
  98.184 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.185 -                "Invalid Alias Dev_Id 0x%x\n", alias_id);
  98.186 +        amd_iov_error("IVHD Error: Invalid Alias Dev_Id 0x%x\n", alias_id);
  98.187          return 0;
  98.188      }
  98.189  
  98.190 -    dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id);
  98.191 +    amd_iov_info(" Dev_Id Alias: 0x%x\n", alias_id);
  98.192  
  98.193      /* override requestor_id and flags for device */
  98.194      ivrs_mappings[bdf].dte_requestor_id = alias_id;
  98.195 @@ -490,15 +480,14 @@ static u16 __init parse_ivhd_device_alia
  98.196      dev_length = sizeof(struct acpi_ivhd_device_alias_range);
  98.197      if ( header_length < (block_length + dev_length) )
  98.198      {
  98.199 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.200 -                "Invalid Device_Entry Length!\n");
  98.201 +        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
  98.202          return 0;
  98.203      }
  98.204  
  98.205      if ( ivhd_device->alias_range.trailer.type !=
  98.206           AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
  98.207      {
  98.208 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.209 +        amd_iov_error("IVHD Error: "
  98.210                  "Invalid Range: End_Type 0x%x\n",
  98.211                  ivhd_device->alias_range.trailer.type);
  98.212          return 0;
  98.213 @@ -507,30 +496,28 @@ static u16 __init parse_ivhd_device_alia
  98.214      first_bdf = ivhd_device->header.dev_id;
  98.215      if ( first_bdf >= ivrs_bdf_entries )
  98.216      {
  98.217 -        dprintk(XENLOG_ERR,"IVHD Error: "
  98.218 -                "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
  98.219 +        amd_iov_error(
  98.220 +            "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf);
  98.221          return 0;
  98.222      }
  98.223  
  98.224      last_bdf = ivhd_device->alias_range.trailer.dev_id;
  98.225      if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf )
  98.226      {
  98.227 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.228 -                "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
  98.229 +        amd_iov_error(
  98.230 +            "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
  98.231          return 0;
  98.232      }
  98.233  
  98.234      alias_id = ivhd_device->alias_range.alias.dev_id;
  98.235      if ( alias_id >= ivrs_bdf_entries )
  98.236      {
  98.237 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.238 -                "Invalid Alias Dev_Id 0x%x\n", alias_id);
  98.239 +        amd_iov_error("IVHD Error: Invalid Alias Dev_Id 0x%x\n", alias_id);
  98.240          return 0;
  98.241      }
  98.242  
  98.243 -    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
  98.244 -            first_bdf, last_bdf);
  98.245 -    dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id);
  98.246 +    amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n", first_bdf, last_bdf);
  98.247 +    amd_iov_info(" Dev_Id Alias: 0x%x\n", alias_id);
  98.248  
  98.249      /* override requestor_id and flags for range of devices */
  98.250      sys_mgt = get_field_from_byte(ivhd_device->header.flags,
  98.251 @@ -555,16 +542,14 @@ static u16 __init parse_ivhd_device_exte
  98.252      dev_length = sizeof(struct acpi_ivhd_device_extended);
  98.253      if ( header_length < (block_length + dev_length) )
  98.254      {
  98.255 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.256 -                "Invalid Device_Entry Length!\n");
  98.257 +        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
  98.258          return 0;
  98.259      }
  98.260  
  98.261      bdf = ivhd_device->header.dev_id;
  98.262      if ( bdf >= ivrs_bdf_entries )
  98.263      {
  98.264 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.265 -                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
  98.266 +        amd_iov_error("IVHD Error: Invalid Device_Entry Dev_Id 0x%x\n", bdf);
  98.267          return 0;
  98.268      }
  98.269  
  98.270 @@ -587,15 +572,14 @@ static u16 __init parse_ivhd_device_exte
  98.271      dev_length = sizeof(struct acpi_ivhd_device_extended_range);
  98.272      if ( header_length < (block_length + dev_length) )
  98.273      {
  98.274 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.275 -                "Invalid Device_Entry Length!\n");
  98.276 +        amd_iov_error("IVHD Error: Invalid Device_Entry Length!\n");
  98.277          return 0;
  98.278      }
  98.279  
  98.280      if ( ivhd_device->extended_range.trailer.type !=
  98.281           AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
  98.282      {
  98.283 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.284 +        amd_iov_error("IVHD Error: "
  98.285                  "Invalid Range: End_Type 0x%x\n",
  98.286                  ivhd_device->extended_range.trailer.type);
  98.287          return 0;
  98.288 @@ -604,20 +588,20 @@ static u16 __init parse_ivhd_device_exte
  98.289      first_bdf = ivhd_device->header.dev_id;
  98.290      if ( first_bdf >= ivrs_bdf_entries )
  98.291      {
  98.292 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.293 -                "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
  98.294 +        amd_iov_error(
  98.295 +            "IVHD Error: Invalid Range: First Dev_Id 0x%x\n", first_bdf);
  98.296          return 0;
  98.297      }
  98.298  
  98.299      last_bdf = ivhd_device->extended_range.trailer.dev_id;
  98.300      if ( (last_bdf >= ivrs_bdf_entries) || (last_bdf <= first_bdf) )
  98.301      {
  98.302 -        dprintk(XENLOG_ERR, "IVHD Error: "
  98.303 -                "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
  98.304 +        amd_iov_error(
  98.305 +            "IVHD Error: Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
  98.306          return 0;
  98.307      }
  98.308  
  98.309 -    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
  98.310 +    amd_iov_info(" Dev_Id Range: 0x%x -> 0x%x\n",
  98.311              first_bdf, last_bdf);
  98.312  
  98.313      /* override flags for range of devices */
  98.314 @@ -639,7 +623,7 @@ static int __init parse_ivhd_block(struc
  98.315      if ( ivhd_block->header.length <
  98.316           sizeof(struct acpi_ivhd_block_header) )
  98.317      {
  98.318 -        dprintk(XENLOG_ERR, "IVHD Error: Invalid Block Length!\n");
  98.319 +        amd_iov_error("IVHD Error: Invalid Block Length!\n");
  98.320          return -ENODEV;
  98.321      }
  98.322  
  98.323 @@ -647,21 +631,16 @@ static int __init parse_ivhd_block(struc
  98.324                                      ivhd_block->cap_offset);
  98.325      if ( !iommu )
  98.326      {
  98.327 -        dprintk(XENLOG_ERR,
  98.328 -                "IVHD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
  98.329 +        amd_iov_error("IVHD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
  98.330                  ivhd_block->header.dev_id, ivhd_block->cap_offset);
  98.331          return -ENODEV;
  98.332      }
  98.333  
  98.334 -    dprintk(XENLOG_INFO, "IVHD Block:\n");
  98.335 -    dprintk(XENLOG_INFO, " Cap_Offset 0x%x\n",
  98.336 -            ivhd_block->cap_offset);
  98.337 -    dprintk(XENLOG_INFO, " MMIO_BAR_Phys 0x%lx\n",
  98.338 -            (unsigned long)ivhd_block->mmio_base);
  98.339 -    dprintk(XENLOG_INFO, " PCI_Segment 0x%x\n",
  98.340 -            ivhd_block->pci_segment);
  98.341 -    dprintk(XENLOG_INFO, " IOMMU_Info 0x%x\n",
  98.342 -            ivhd_block->iommu_info);
  98.343 +    amd_iov_info("IVHD Block:\n");
  98.344 +    amd_iov_info(" Cap_Offset 0x%x\n", ivhd_block->cap_offset);
  98.345 +    amd_iov_info(" MMIO_BAR_Phys 0x%"PRIx64"\n",ivhd_block->mmio_base);
  98.346 +    amd_iov_info( " PCI_Segment 0x%x\n", ivhd_block->pci_segment);
  98.347 +    amd_iov_info( " IOMMU_Info 0x%x\n", ivhd_block->iommu_info);
  98.348  
  98.349      /* override IOMMU support flags */
  98.350      iommu->coherent = get_field_from_byte(ivhd_block->header.flags,
  98.351 @@ -692,13 +671,10 @@ static int __init parse_ivhd_block(struc
  98.352          ivhd_device = (union acpi_ivhd_device *)
  98.353              ((u8 *)ivhd_block + block_length);
  98.354  
  98.355 -        dprintk(XENLOG_INFO, "IVHD Device Entry:\n");
  98.356 -        dprintk(XENLOG_INFO, " Type 0x%x\n",
  98.357 -                ivhd_device->header.type);
  98.358 -        dprintk(XENLOG_INFO, " Dev_Id 0x%x\n",
  98.359 -                ivhd_device->header.dev_id);
  98.360 -        dprintk(XENLOG_INFO, " Flags 0x%x\n",
  98.361 -                ivhd_device->header.flags);
  98.362 +        amd_iov_info( "IVHD Device Entry:\n");
  98.363 +        amd_iov_info( " Type 0x%x\n", ivhd_device->header.type);
  98.364 +        amd_iov_info( " Dev_Id 0x%x\n", ivhd_device->header.dev_id);
  98.365 +        amd_iov_info( " Flags 0x%x\n", ivhd_device->header.flags);
  98.366  
  98.367          switch ( ivhd_device->header.type )
  98.368          {
  98.369 @@ -741,8 +717,7 @@ static int __init parse_ivhd_block(struc
  98.370                  ivhd_block->header.length, block_length);
  98.371              break;
  98.372          default:
  98.373 -            dprintk(XENLOG_ERR, "IVHD Error: "
  98.374 -                    "Invalid Device Type!\n");
  98.375 +            amd_iov_error("IVHD Error: Invalid Device Type!\n");
  98.376              dev_length = 0;
  98.377              break;
  98.378          }
  98.379 @@ -774,46 +749,49 @@ static int __init parse_ivrs_block(struc
  98.380          return parse_ivmd_block(ivmd_block);
  98.381  
  98.382      default:
  98.383 -        dprintk(XENLOG_ERR, "IVRS Error: Invalid Block Type!\n");
  98.384 +        amd_iov_error("IVRS Error: Invalid Block Type!\n");
  98.385          return -ENODEV;
  98.386      }
  98.387  
  98.388      return 0;
  98.389  }
  98.390  
  98.391 -void __init dump_acpi_table_header(struct acpi_table_header *table)
  98.392 +static void __init dump_acpi_table_header(struct acpi_table_header *table)
  98.393  {
  98.394 +#ifdef AMD_IOV_DEBUG
  98.395      int i;
  98.396  
  98.397 -    printk(XENLOG_INFO "AMD IOMMU: ACPI Table:\n");
  98.398 -    printk(XENLOG_INFO " Signature ");
  98.399 +    amd_iov_info("ACPI Table:\n");
  98.400 +    amd_iov_info(" Signature ");
  98.401      for ( i = 0; i < ACPI_NAME_SIZE; i++ )
  98.402          printk("%c", table->signature[i]);
  98.403      printk("\n");
  98.404  
  98.405 -    printk(" Length 0x%x\n", table->length);
  98.406 -    printk(" Revision 0x%x\n", table->revision);
  98.407 -    printk(" CheckSum 0x%x\n", table->checksum);
  98.408 +    amd_iov_info(" Length 0x%x\n", table->length);
  98.409 +    amd_iov_info(" Revision 0x%x\n", table->revision);
  98.410 +    amd_iov_info(" CheckSum 0x%x\n", table->checksum);
  98.411  
  98.412 -    printk(" OEM_Id ");
  98.413 +    amd_iov_info(" OEM_Id ");
  98.414      for ( i = 0; i < ACPI_OEM_ID_SIZE; i++ )
  98.415          printk("%c", table->oem_id[i]);
  98.416      printk("\n");
  98.417  
  98.418 -    printk(" OEM_Table_Id ");
  98.419 +    amd_iov_info(" OEM_Table_Id ");
  98.420      for ( i = 0; i < ACPI_OEM_TABLE_ID_SIZE; i++ )
  98.421          printk("%c", table->oem_table_id[i]);
  98.422      printk("\n");
  98.423  
  98.424 -    printk(" OEM_Revision 0x%x\n", table->oem_revision);
  98.425 +    amd_iov_info(" OEM_Revision 0x%x\n", table->oem_revision);
  98.426  
  98.427 -    printk(" Creator_Id ");
  98.428 +    amd_iov_info(" Creator_Id ");
  98.429      for ( i = 0; i < ACPI_NAME_SIZE; i++ )
  98.430          printk("%c", table->asl_compiler_id[i]);
  98.431      printk("\n");
  98.432  
  98.433 -    printk(" Creator_Revision 0x%x\n",
  98.434 +    amd_iov_info(" Creator_Revision 0x%x\n",
  98.435             table->asl_compiler_revision);
  98.436 +#endif
  98.437 +
  98.438  }
  98.439  
  98.440  int __init parse_ivrs_table(unsigned long phys_addr, unsigned long size)
  98.441 @@ -827,9 +805,7 @@ int __init parse_ivrs_table(unsigned lon
  98.442  
  98.443      BUG_ON(!table);
  98.444  
  98.445 -#if 0
  98.446      dump_acpi_table_header(table);
  98.447 -#endif
  98.448  
  98.449      /* validate checksum: sum of entire table == 0 */
  98.450      checksum = 0;
  98.451 @@ -838,7 +814,7 @@ int __init parse_ivrs_table(unsigned lon
  98.452          checksum += raw_table[i];
  98.453      if ( checksum )
  98.454      {
  98.455 -        dprintk(XENLOG_ERR, "IVRS Error: "
  98.456 +        amd_iov_error("IVRS Error: "
  98.457                  "Invalid Checksum 0x%x\n", checksum);
  98.458          return -ENODEV;
  98.459      }
  98.460 @@ -850,15 +826,15 @@ int __init parse_ivrs_table(unsigned lon
  98.461          ivrs_block = (struct acpi_ivrs_block_header *)
  98.462              ((u8 *)table + length);
  98.463  
  98.464 -        dprintk(XENLOG_INFO, "IVRS Block:\n");
  98.465 -        dprintk(XENLOG_INFO, " Type 0x%x\n", ivrs_block->type);
  98.466 -        dprintk(XENLOG_INFO, " Flags 0x%x\n", ivrs_block->flags);
  98.467 -        dprintk(XENLOG_INFO, " Length 0x%x\n", ivrs_block->length);
  98.468 -        dprintk(XENLOG_INFO, " Dev_Id 0x%x\n", ivrs_block->dev_id);
  98.469 +        amd_iov_info("IVRS Block:\n");
  98.470 +        amd_iov_info(" Type 0x%x\n", ivrs_block->type);
  98.471 +        amd_iov_info(" Flags 0x%x\n", ivrs_block->flags);
  98.472 +        amd_iov_info(" Length 0x%x\n", ivrs_block->length);
  98.473 +        amd_iov_info(" Dev_Id 0x%x\n", ivrs_block->dev_id);
  98.474  
  98.475          if ( table->length < (length + ivrs_block->length) )
  98.476          {
  98.477 -            dprintk(XENLOG_ERR, "IVRS Error: "
  98.478 +            amd_iov_error("IVRS Error: "
  98.479                      "Table Length Exceeded: 0x%x -> 0x%lx\n",
  98.480                      table->length,
  98.481                      (length + ivrs_block->length));
    99.1 --- a/xen/drivers/passthrough/amd/iommu_detect.c	Thu Apr 24 14:02:16 2008 -0600
    99.2 +++ b/xen/drivers/passthrough/amd/iommu_detect.c	Thu Apr 24 14:08:29 2008 -0600
    99.3 @@ -85,6 +85,45 @@ int __init get_iommu_last_downstream_bus
    99.4      return 0;
    99.5  }
    99.6  
    99.7 +static int __init get_iommu_msi_capabilities(u8 bus, u8 dev, u8 func,
    99.8 +            struct amd_iommu *iommu)
    99.9 +{
   99.10 +    int cap_ptr, cap_id;
   99.11 +    u32 cap_header;
   99.12 +    u16 control;
   99.13 +    int count = 0;
   99.14 +
   99.15 +    cap_ptr = pci_conf_read8(bus, dev, func,
   99.16 +            PCI_CAPABILITY_LIST);
   99.17 +
   99.18 +    while ( cap_ptr >= PCI_MIN_CAP_OFFSET &&
   99.19 +        count < PCI_MAX_CAP_BLOCKS )
   99.20 +    {
   99.21 +        cap_ptr &= PCI_CAP_PTR_MASK;
   99.22 +        cap_header = pci_conf_read32(bus, dev, func, cap_ptr);
   99.23 +        cap_id = get_field_from_reg_u32(cap_header,
   99.24 +                PCI_CAP_ID_MASK, PCI_CAP_ID_SHIFT);
   99.25 +
   99.26 +        if ( cap_id == PCI_CAP_ID_MSI )
   99.27 +        {
   99.28 +            iommu->msi_cap = cap_ptr;
   99.29 +            break;
   99.30 +        }
   99.31 +        cap_ptr = get_field_from_reg_u32(cap_header,
   99.32 +                PCI_CAP_NEXT_PTR_MASK, PCI_CAP_NEXT_PTR_SHIFT);
   99.33 +        count++;
   99.34 +    }
   99.35 +
   99.36 +    if ( !iommu->msi_cap )
   99.37 +        return -ENODEV;
   99.38 +
   99.39 +    amd_iov_info("Found MSI capability block \n");
   99.40 +    control = pci_conf_read16(bus, dev, func,
   99.41 +            iommu->msi_cap + PCI_MSI_FLAGS);
   99.42 +    iommu->maskbit = control & PCI_MSI_FLAGS_MASKBIT;
   99.43 +    return 0;
   99.44 +}
   99.45 +
   99.46  int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
   99.47                                    struct amd_iommu *iommu)
   99.48  {
   99.49 @@ -99,8 +138,7 @@ int __init get_iommu_capabilities(u8 bus
   99.50  
   99.51      if ( ((mmio_bar & 0x1) == 0) || (iommu->mmio_base_phys == 0) )
   99.52      {
   99.53 -        dprintk(XENLOG_ERR ,
   99.54 -                "AMD IOMMU: Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar);
   99.55 +        amd_iov_error("Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar);
   99.56          return -ENODEV;
   99.57      }
   99.58  
   99.59 @@ -133,6 +171,8 @@ int __init get_iommu_capabilities(u8 bus
   99.60      iommu->msi_number = get_field_from_reg_u32(
   99.61          misc_info, PCI_CAP_MSI_NUMBER_MASK, PCI_CAP_MSI_NUMBER_SHIFT);
   99.62  
   99.63 +    get_iommu_msi_capabilities(bus, dev, func, iommu);
   99.64 +
   99.65      return 0;
   99.66  }
   99.67  
   99.68 @@ -176,24 +216,24 @@ static int __init scan_functions_for_iom
   99.69      int bus, int dev, iommu_detect_callback_ptr_t iommu_detect_callback)
   99.70  {
   99.71      int func, hdr_type;
   99.72 -    int count, error = 0;
   99.73 +    int count = 1, error = 0;
   99.74  
   99.75 -    func = 0;
   99.76 -    count = 1;
   99.77 -    while ( VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
   99.78 -                                                PCI_VENDOR_ID)) &&
   99.79 -            !error && (func < count) )
   99.80 +    for ( func = 0;
   99.81 +          (func < count) && !error &&
   99.82 +              VALID_PCI_VENDOR_ID(pci_conf_read16(bus, dev, func,
   99.83 +                                                  PCI_VENDOR_ID));
   99.84 +          func++ )
   99.85 +
   99.86      {
   99.87          hdr_type = pci_conf_read8(bus, dev, func, PCI_HEADER_TYPE);
   99.88  
   99.89 -        if ( func == 0 && IS_PCI_MULTI_FUNCTION(hdr_type) )
   99.90 +        if ( (func == 0) && IS_PCI_MULTI_FUNCTION(hdr_type) )
   99.91              count = PCI_MAX_FUNC_COUNT;
   99.92  
   99.93          if ( IS_PCI_TYPE0_HEADER(hdr_type) ||
   99.94               IS_PCI_TYPE1_HEADER(hdr_type) )
   99.95              error = scan_caps_for_iommu(bus, dev, func,
   99.96                                          iommu_detect_callback);
   99.97 -        func++;
   99.98      }
   99.99  
  99.100      return error;
   100.1 --- a/xen/drivers/passthrough/amd/iommu_init.c	Thu Apr 24 14:02:16 2008 -0600
   100.2 +++ b/xen/drivers/passthrough/amd/iommu_init.c	Thu Apr 24 14:08:29 2008 -0600
   100.3 @@ -27,6 +27,7 @@
   100.4  #include "../pci_regs.h"
   100.5  
   100.6  extern int nr_amd_iommus;
   100.7 +static struct amd_iommu *vector_to_iommu[NR_VECTORS];
   100.8  
   100.9  int __init map_iommu_mmio_region(struct amd_iommu *iommu)
  100.10  {
  100.11 @@ -34,8 +35,7 @@ int __init map_iommu_mmio_region(struct 
  100.12  
  100.13      if ( nr_amd_iommus > MAX_AMD_IOMMUS )
  100.14      {
  100.15 -        gdprintk(XENLOG_ERR,
  100.16 -                 "IOMMU: nr_amd_iommus %d > MAX_IOMMUS\n", nr_amd_iommus);
  100.17 +        amd_iov_error("nr_amd_iommus %d > MAX_IOMMUS\n", nr_amd_iommus);
  100.18          return -ENOMEM;
  100.19      }
  100.20  
  100.21 @@ -109,6 +109,33 @@ void __init register_iommu_cmd_buffer_in
  100.22      writel(entry, iommu->mmio_base+IOMMU_CMD_BUFFER_BASE_HIGH_OFFSET);
  100.23  }
  100.24  
  100.25 +void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu)
  100.26 +{
  100.27 +    u64 addr_64, addr_lo, addr_hi;
  100.28 +    u32 power_of2_entries;
  100.29 +    u32 entry;
  100.30 +
  100.31 +    addr_64 = (u64)virt_to_maddr(iommu->event_log.buffer);
  100.32 +    addr_lo = addr_64 & DMA_32BIT_MASK;
  100.33 +    addr_hi = addr_64 >> 32;
  100.34 +
  100.35 +    set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
  100.36 +                         IOMMU_EVENT_LOG_BASE_LOW_MASK,
  100.37 +                         IOMMU_EVENT_LOG_BASE_LOW_SHIFT, &entry);
  100.38 +    writel(entry, iommu->mmio_base + IOMMU_EVENT_LOG_BASE_LOW_OFFSET);
  100.39 +
  100.40 +    power_of2_entries = get_order_from_bytes(iommu->event_log.alloc_size) +
  100.41 +                        IOMMU_EVENT_LOG_POWER_OF2_ENTRIES_PER_PAGE;
  100.42 +
  100.43 +    set_field_in_reg_u32((u32)addr_hi, 0,
  100.44 +                        IOMMU_EVENT_LOG_BASE_HIGH_MASK,
  100.45 +                        IOMMU_EVENT_LOG_BASE_HIGH_SHIFT, &entry);
  100.46 +    set_field_in_reg_u32(power_of2_entries, entry,
  100.47 +                        IOMMU_EVENT_LOG_LENGTH_MASK,
  100.48 +                        IOMMU_EVENT_LOG_LENGTH_SHIFT, &entry);
  100.49 +    writel(entry, iommu->mmio_base+IOMMU_EVENT_LOG_BASE_HIGH_OFFSET);
  100.50 +}
  100.51 +
  100.52  static void __init set_iommu_translation_control(struct amd_iommu *iommu,
  100.53                                                   int enable)
  100.54  {
  100.55 @@ -179,10 +206,281 @@ static void __init register_iommu_exclus
  100.56      writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_LOW_OFFSET);
  100.57  }
  100.58  
  100.59 +static void __init set_iommu_event_log_control(struct amd_iommu *iommu,
  100.60 +            int enable)
  100.61 +{
  100.62 +    u32 entry;
  100.63 +
  100.64 +    entry = readl(iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
  100.65 +    set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
  100.66 +                         IOMMU_CONTROL_DISABLED, entry,
  100.67 +                         IOMMU_CONTROL_EVENT_LOG_ENABLE_MASK,
  100.68 +                         IOMMU_CONTROL_EVENT_LOG_ENABLE_SHIFT, &entry);
  100.69 +    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
  100.70 +
  100.71 +    set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
  100.72 +                         IOMMU_CONTROL_DISABLED, entry,
  100.73 +                         IOMMU_CONTROL_EVENT_LOG_INT_MASK,
  100.74 +                         IOMMU_CONTROL_EVENT_LOG_INT_SHIFT, &entry);
  100.75 +    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
  100.76 +
  100.77 +    set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
  100.78 +                         IOMMU_CONTROL_DISABLED, entry,
  100.79 +                         IOMMU_CONTROL_COMP_WAIT_INT_MASK,
  100.80 +                         IOMMU_CONTROL_COMP_WAIT_INT_SHIFT, &entry);
  100.81 +    writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
  100.82 +}
  100.83 +
  100.84 +static int amd_iommu_read_event_log(struct amd_iommu *iommu, u32 event[])
  100.85 +{
  100.86 +    u32 tail, head, *event_log;
  100.87 +    int i;
  100.88 +
  100.89 +     BUG_ON( !iommu || !event );
  100.90 +
  100.91 +    /* make sure there's an entry in the log */
  100.92 +    tail = get_field_from_reg_u32(
  100.93 +                readl(iommu->mmio_base + IOMMU_EVENT_LOG_TAIL_OFFSET),
  100.94 +                IOMMU_EVENT_LOG_TAIL_MASK,
  100.95 +                IOMMU_EVENT_LOG_TAIL_SHIFT);
  100.96 +    if ( tail != iommu->event_log_head )
  100.97 +    {
  100.98 +        /* read event log entry */
  100.99 +        event_log = (u32 *)(iommu->event_log.buffer +
 100.100 +                                        (iommu->event_log_head *
 100.101 +                                        IOMMU_EVENT_LOG_ENTRY_SIZE));
 100.102 +        for ( i = 0; i < IOMMU_EVENT_LOG_U32_PER_ENTRY; i++ )
 100.103 +            event[i] = event_log[i];
 100.104 +        if ( ++iommu->event_log_head == iommu->event_log.entries )
 100.105 +            iommu->event_log_head = 0;
 100.106 +
 100.107 +        /* update head pointer */
 100.108 +        set_field_in_reg_u32(iommu->event_log_head, 0,
 100.109 +                             IOMMU_EVENT_LOG_HEAD_MASK,
 100.110 +                             IOMMU_EVENT_LOG_HEAD_SHIFT, &head);
 100.111 +        writel(head, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET);
 100.112 +        return 0;
 100.113 +    }
 100.114 +
 100.115 +    return -EFAULT;
 100.116 +}
 100.117 +
 100.118 +static void amd_iommu_msi_data_init(struct amd_iommu *iommu, int vector)
 100.119 +{
 100.120 +    u32 msi_data;
 100.121 +    u8 bus = (iommu->bdf >> 8) & 0xff;
 100.122 +    u8 dev = PCI_SLOT(iommu->bdf & 0xff);
 100.123 +    u8 func = PCI_FUNC(iommu->bdf & 0xff);
 100.124 +
 100.125 +    msi_data = MSI_DATA_TRIGGER_EDGE |
 100.126 +        MSI_DATA_LEVEL_ASSERT |
 100.127 +        MSI_DATA_DELIVERY_FIXED |
 100.128 +        MSI_DATA_VECTOR(vector);
 100.129 +
 100.130 +    pci_conf_write32(bus, dev, func,
 100.131 +        iommu->msi_cap + PCI_MSI_DATA_64, msi_data);
 100.132 +}
 100.133 +
 100.134 +static void amd_iommu_msi_addr_init(struct amd_iommu *iommu, int phy_cpu)
 100.135 +{
 100.136 +
 100.137 +    int bus = (iommu->bdf >> 8) & 0xff;
 100.138 +    int dev = PCI_SLOT(iommu->bdf & 0xff);
 100.139 +    int func = PCI_FUNC(iommu->bdf & 0xff);
 100.140 +
 100.141 +    u32 address_hi = 0;
 100.142 +    u32 address_lo = MSI_ADDR_HEADER |
 100.143 +            MSI_ADDR_DESTMODE_PHYS |
 100.144 +            MSI_ADDR_REDIRECTION_CPU |
 100.145 +            MSI_ADDR_DESTID_CPU(phy_cpu);
 100.146 +
 100.147 +    pci_conf_write32(bus, dev, func,
 100.148 +        iommu->msi_cap + PCI_MSI_ADDRESS_LO, address_lo);
 100.149 +    pci_conf_write32(bus, dev, func,
 100.150 +        iommu->msi_cap + PCI_MSI_ADDRESS_HI, address_hi);
 100.151 +}
 100.152 +
 100.153 +static void amd_iommu_msi_enable(struct amd_iommu *iommu, int flag)
 100.154 +{
 100.155 +    u16 control;
 100.156 +    int bus = (iommu->bdf >> 8) & 0xff;
 100.157 +    int dev = PCI_SLOT(iommu->bdf & 0xff);
 100.158 +    int func = PCI_FUNC(iommu->bdf & 0xff);
 100.159 +
 100.160 +    control = pci_conf_read16(bus, dev, func,
 100.161 +        iommu->msi_cap + PCI_MSI_FLAGS);
 100.162 +    control &= ~(1);
 100.163 +    if ( flag )
 100.164 +        control |= flag;
 100.165 +    pci_conf_write16(bus, dev, func,
 100.166 +        iommu->msi_cap + PCI_MSI_FLAGS, control);
 100.167 +}
 100.168 +
 100.169 +static void iommu_msi_unmask(unsigned int vector)
 100.170 +{
 100.171 +    unsigned long flags;
 100.172 +    struct amd_iommu *iommu = vector_to_iommu[vector];
 100.173 +
 100.174 +    /* FIXME: do not support mask bits at the moment */
 100.175 +    if ( iommu->maskbit )
 100.176 +        return;
 100.177 +
 100.178 +    spin_lock_irqsave(&iommu->lock, flags);
 100.179 +    amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
 100.180 +    spin_unlock_irqrestore(&iommu->lock, flags);
 100.181 +}
 100.182 +
 100.183 +static void iommu_msi_mask(unsigned int vector)
 100.184 +{
 100.185 +    unsigned long flags;
 100.186 +    struct amd_iommu *iommu = vector_to_iommu[vector];
 100.187 +
 100.188 +    /* FIXME: do not support mask bits at the moment */
 100.189 +    if ( iommu->maskbit )
 100.190 +        return;
 100.191 +
 100.192 +    spin_lock_irqsave(&iommu->lock, flags);
 100.193 +    amd_iommu_msi_enable(iommu, IOMMU_CONTROL_DISABLED);
 100.194 +    spin_unlock_irqrestore(&iommu->lock, flags);
 100.195 +}
 100.196 +
 100.197 +static unsigned int iommu_msi_startup(unsigned int vector)
 100.198 +{
 100.199 +    iommu_msi_unmask(vector);
 100.200 +    return 0;
 100.201 +}
 100.202 +
 100.203 +static void iommu_msi_end(unsigned int vector)
 100.204 +{
 100.205 +    iommu_msi_unmask(vector);
 100.206 +    ack_APIC_irq();
 100.207 +}
 100.208 +
 100.209 +static void iommu_msi_set_affinity(unsigned int vector, cpumask_t dest)
 100.210 +{
 100.211 +    struct amd_iommu *iommu = vector_to_iommu[vector];
 100.212 +    amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(dest)));
 100.213 +}
 100.214 +
 100.215 +static struct hw_interrupt_type iommu_msi_type = {
 100.216 +    .typename = "AMD_IOV_MSI",
 100.217 +    .startup = iommu_msi_startup,
 100.218 +    .shutdown = iommu_msi_mask,
 100.219 +    .enable = iommu_msi_unmask,
 100.220 +    .disable = iommu_msi_mask,
 100.221 +    .ack = iommu_msi_mask,
 100.222 +    .end = iommu_msi_end,
 100.223 +    .set_affinity = iommu_msi_set_affinity,
 100.224 +};
 100.225 +
 100.226 +static void parse_event_log_entry(u32 entry[])
 100.227 +{
 100.228 +    u16 domain_id, device_id;
 100.229 +    u32 code;
 100.230 +    u64 *addr;
 100.231 +    char * event_str[] = {"ILLEGAL_DEV_TABLE_ENTRY",
 100.232 +                                         "IO_PAGE_FALT",
 100.233 +                                         "DEV_TABLE_HW_ERROR",
 100.234 +                                         "PAGE_TABLE_HW_ERROR",
 100.235 +                                         "ILLEGAL_COMMAND_ERROR",
 100.236 +                                         "COMMAND_HW_ERROR",
 100.237 +                                         "IOTLB_INV_TIMEOUT",
 100.238 +                                         "INVALID_DEV_REQUEST"};
 100.239 +
 100.240 +    code = get_field_from_reg_u32(entry[1],
 100.241 +                                           IOMMU_EVENT_CODE_MASK,
 100.242 +                                           IOMMU_EVENT_CODE_SHIFT);
 100.243 +
 100.244 +    if ( (code > IOMMU_EVENT_INVALID_DEV_REQUEST)
 100.245 +        || (code < IOMMU_EVENT_ILLEGAL_DEV_TABLE_ENTRY) )
 100.246 +    {
 100.247 +        amd_iov_error("Invalid event log entry!\n");
 100.248 +        return;
 100.249 +    }
 100.250 +
 100.251 +    if ( code == IOMMU_EVENT_IO_PAGE_FALT )
 100.252 +    {
 100.253 +        device_id = get_field_from_reg_u32(entry[0],
 100.254 +                                           IOMMU_EVENT_DEVICE_ID_MASK,
 100.255 +                                           IOMMU_EVENT_DEVICE_ID_SHIFT);
 100.256 +        domain_id = get_field_from_reg_u32(entry[1],
 100.257 +                                           IOMMU_EVENT_DOMAIN_ID_MASK,
 100.258 +                                           IOMMU_EVENT_DOMAIN_ID_SHIFT);
 100.259 +        addr= (u64*) (entry + 2);
 100.260 +        printk(XENLOG_ERR "AMD_IOV: "
 100.261 +            "%s: domain:%d, device id:0x%x, fault address:0x%"PRIx64"\n",
 100.262 +            event_str[code-1], domain_id, device_id, *addr);
 100.263 +    }
 100.264 +}
 100.265 +
 100.266 +static void amd_iommu_page_fault(int vector, void *dev_id,
 100.267 +                             struct cpu_user_regs *regs)
 100.268 +{
 100.269 +    u32  event[4];
 100.270 +    unsigned long flags;
 100.271 +    int ret = 0;
 100.272 +    struct amd_iommu *iommu = dev_id;
 100.273 +
 100.274 +    spin_lock_irqsave(&iommu->lock, flags);
 100.275 +    ret = amd_iommu_read_event_log(iommu, event);
 100.276 +    spin_unlock_irqrestore(&iommu->lock, flags);
 100.277 +
 100.278 +    if ( ret != 0 )
 100.279 +        return;
 100.280 +    parse_event_log_entry(event);
 100.281 +}
 100.282 +
 100.283 +static int set_iommu_interrupt_handler(struct amd_iommu *iommu)
 100.284 +{
 100.285 +    int vector, ret;
 100.286 +    unsigned long flags;
 100.287 +
 100.288 +    vector = assign_irq_vector(AUTO_ASSIGN);
 100.289 +    vector_to_iommu[vector] = iommu;
 100.290 +
 100.291 +    /* make irq == vector */
 100.292 +    irq_vector[vector] = vector;
 100.293 +    vector_irq[vector] = vector;
 100.294 +
 100.295 +    if ( !vector )
 100.296 +    {
 100.297 +        amd_iov_error("no vectors\n");
 100.298 +        return 0;
 100.299 +    }
 100.300 +
 100.301 +    irq_desc[vector].handler = &iommu_msi_type;
 100.302 +    ret = request_irq(vector, amd_iommu_page_fault, 0, "dmar", iommu);
 100.303 +    if ( ret )
 100.304 +    {
 100.305 +        amd_iov_error("can't request irq\n");
 100.306 +        return 0;
 100.307 +    }
 100.308 +
 100.309 +    spin_lock_irqsave(&iommu->lock, flags);
 100.310 +
 100.311 +    amd_iommu_msi_data_init (iommu, vector);
 100.312 +    amd_iommu_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
 100.313 +    amd_iommu_msi_enable(iommu, IOMMU_CONTROL_ENABLED);
 100.314 +
 100.315 +    spin_unlock_irqrestore(&iommu->lock, flags);
 100.316 +
 100.317 +    return vector;
 100.318 +}
 100.319 +
 100.320  void __init enable_iommu(struct amd_iommu *iommu)
 100.321  {
 100.322 +    unsigned long flags;
 100.323 +
 100.324 +    set_iommu_interrupt_handler(iommu);
 100.325 +
 100.326 +    spin_lock_irqsave(&iommu->lock, flags);
 100.327 +
 100.328      register_iommu_exclusion_range(iommu);
 100.329      set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
 100.330 +    set_iommu_event_log_control(iommu, IOMMU_CONTROL_ENABLED);
 100.331      set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED);
 100.332 -    printk("AMD IOMMU %d: Enabled\n", nr_amd_iommus);
 100.333 +
 100.334 +    spin_unlock_irqrestore(&iommu->lock, flags);
 100.335 +
 100.336 +    printk("AMD_IOV: IOMMU %d Enabled.\n", nr_amd_iommus);
 100.337  }
   101.1 --- a/xen/drivers/passthrough/amd/iommu_map.c	Thu Apr 24 14:02:16 2008 -0600
   101.2 +++ b/xen/drivers/passthrough/amd/iommu_map.c	Thu Apr 24 14:08:29 2008 -0600
   101.3 @@ -154,8 +154,7 @@ void flush_command_buffer(struct amd_iom
   101.4          }
   101.5          else
   101.6          {
   101.7 -            dprintk(XENLOG_WARNING, "AMD IOMMU: Warning:"
   101.8 -                    " ComWaitInt bit did not assert!\n");
   101.9 +            amd_iov_warning("Warning: ComWaitInt bit did not assert!\n");
  101.10          }
  101.11      }
  101.12  }
  101.13 @@ -402,10 +401,9 @@ int amd_iommu_map_page(struct domain *d,
  101.14      pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
  101.15      if ( pte == NULL )
  101.16      {
  101.17 -        dprintk(XENLOG_ERR,
  101.18 -                "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn);
  101.19 +        amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
  101.20          spin_unlock_irqrestore(&hd->mapping_lock, flags);
  101.21 -        return -EIO;
  101.22 +        return -EFAULT;
  101.23      }
  101.24  
  101.25      set_page_table_entry_present((u32 *)pte, maddr, iw, ir);
  101.26 @@ -439,10 +437,9 @@ int amd_iommu_unmap_page(struct domain *
  101.27      pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
  101.28      if ( pte == NULL )
  101.29      {
  101.30 -        dprintk(XENLOG_ERR,
  101.31 -                "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn);
  101.32 +        amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
  101.33          spin_unlock_irqrestore(&hd->mapping_lock, flags);
  101.34 -        return -EIO;
  101.35 +        return -EFAULT;
  101.36      }
  101.37  
  101.38      /* mark PTE as 'page not present' */
  101.39 @@ -479,9 +476,8 @@ int amd_iommu_reserve_domain_unity_map(
  101.40              hd->root_table, hd->paging_mode, phys_addr >> PAGE_SHIFT);
  101.41          if ( pte == NULL )
  101.42          {
  101.43 -            dprintk(XENLOG_ERR,
  101.44 -                    "AMD IOMMU: Invalid IO pagetable entry "
  101.45 -                    "phys_addr = %lx\n", phys_addr);
  101.46 +            amd_iov_error(
  101.47 +            "Invalid IO pagetable entry phys_addr = %lx\n", phys_addr);
  101.48              spin_unlock_irqrestore(&hd->mapping_lock, flags);
  101.49              return -EFAULT;
  101.50          }
  101.51 @@ -528,8 +524,7 @@ int amd_iommu_sync_p2m(struct domain *d)
  101.52          pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
  101.53          if ( pte == NULL )
  101.54          {
  101.55 -            dprintk(XENLOG_ERR,
  101.56 -                    "AMD IOMMU: Invalid IO pagetable entry gfn = %lx\n", gfn);
  101.57 +            amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
  101.58              spin_unlock_irqrestore(&hd->mapping_lock, flags);
  101.59              return -EFAULT;
  101.60          }
   102.1 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Thu Apr 24 14:02:16 2008 -0600
   102.2 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Thu Apr 24 14:08:29 2008 -0600
   102.3 @@ -29,16 +29,11 @@
   102.4  struct list_head amd_iommu_head;
   102.5  long amd_iommu_poll_comp_wait = COMPLETION_WAIT_DEFAULT_POLLING_COUNT;
   102.6  static long amd_iommu_cmd_buffer_entries = IOMMU_CMD_BUFFER_DEFAULT_ENTRIES;
   102.7 -int nr_amd_iommus = 0;
   102.8 -
   102.9 -unsigned short ivrs_bdf_entries = 0;
  102.10 -struct ivrs_mappings *ivrs_mappings = NULL;
  102.11 +static long amd_iommu_event_log_entries = IOMMU_EVENT_LOG_DEFAULT_ENTRIES;
  102.12 +int nr_amd_iommus;
  102.13  
  102.14 -/* will set if amd-iommu HW is found */
  102.15 -int amd_iommu_enabled = 0;
  102.16 -
  102.17 -static int enable_amd_iommu = 0;
  102.18 -boolean_param("enable_amd_iommu", enable_amd_iommu);
  102.19 +unsigned short ivrs_bdf_entries;
  102.20 +struct ivrs_mappings *ivrs_mappings;
  102.21  
  102.22  static void deallocate_domain_page_tables(struct hvm_iommu *hd)
  102.23  {
  102.24 @@ -73,25 +68,8 @@ static void __init deallocate_iommu_tabl
  102.25  static void __init deallocate_iommu_resources(struct amd_iommu *iommu)
  102.26  {
  102.27      deallocate_iommu_table_struct(&iommu->dev_table);
  102.28 -    deallocate_iommu_table_struct(&iommu->cmd_buffer);;
  102.29 -}
  102.30 -
  102.31 -static void __init detect_cleanup(void)
  102.32 -{
  102.33 -    struct amd_iommu *iommu, *next;
  102.34 -
  102.35 -    list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list )
  102.36 -    {
  102.37 -        list_del(&iommu->list);
  102.38 -        deallocate_iommu_resources(iommu);
  102.39 -        xfree(iommu);
  102.40 -    }
  102.41 -
  102.42 -    if ( ivrs_mappings )
  102.43 -    {
  102.44 -        xfree(ivrs_mappings);
  102.45 -        ivrs_mappings = NULL;
  102.46 -    }
  102.47 +    deallocate_iommu_table_struct(&iommu->cmd_buffer);
  102.48 +    deallocate_iommu_table_struct(&iommu->event_log);
  102.49  }
  102.50  
  102.51  static int __init allocate_iommu_table_struct(struct table_struct *table,
  102.52 @@ -102,7 +80,7 @@ static int __init allocate_iommu_table_s
  102.53  
  102.54      if ( !table->buffer )
  102.55      {
  102.56 -        dprintk(XENLOG_ERR, "AMD IOMMU: Error allocating %s\n", name);
  102.57 +        amd_iov_error("Error allocating %s\n", name);
  102.58          return -ENOMEM;
  102.59      }
  102.60  
  102.61 @@ -139,6 +117,20 @@ static int __init allocate_iommu_resourc
  102.62                                       "Command Buffer") != 0 )
  102.63          goto error_out;
  102.64  
  102.65 +    /* allocate 'event log' in power of 2 increments of 4K */
  102.66 +    iommu->event_log_head = 0;
  102.67 +    iommu->event_log.alloc_size =
  102.68 +        PAGE_SIZE << get_order_from_bytes(
  102.69 +            PAGE_ALIGN(amd_iommu_event_log_entries *
  102.70 +                        IOMMU_EVENT_LOG_ENTRY_SIZE));
  102.71 +
  102.72 +    iommu->event_log.entries =
  102.73 +        iommu->event_log.alloc_size / IOMMU_EVENT_LOG_ENTRY_SIZE;
  102.74 +
  102.75 +    if ( allocate_iommu_table_struct(&iommu->event_log,
  102.76 +                                     "Event Log") != 0 )
  102.77 +        goto error_out;
  102.78 +
  102.79      return 0;
  102.80  
  102.81   error_out:
  102.82 @@ -153,7 +145,7 @@ int iommu_detect_callback(u8 bus, u8 dev
  102.83      iommu = (struct amd_iommu *) xmalloc(struct amd_iommu);
  102.84      if ( !iommu )
  102.85      {
  102.86 -        dprintk(XENLOG_ERR, "AMD IOMMU: Error allocating amd_iommu\n");
  102.87 +        amd_iov_error("Error allocating amd_iommu\n");
  102.88          return -ENOMEM;
  102.89      }
  102.90      memset(iommu, 0, sizeof(struct amd_iommu));
  102.91 @@ -203,6 +195,7 @@ static int __init amd_iommu_init(void)
  102.92              goto error_out;
  102.93          register_iommu_dev_table_in_mmio_space(iommu);
  102.94          register_iommu_cmd_buffer_in_mmio_space(iommu);
  102.95 +        register_iommu_event_log_in_mmio_space(iommu);
  102.96  
  102.97          spin_unlock_irqrestore(&iommu->lock, flags);
  102.98      }
  102.99 @@ -220,19 +213,15 @@ static int __init amd_iommu_init(void)
 102.100      }
 102.101  
 102.102      if ( acpi_table_parse(ACPI_IVRS, parse_ivrs_table) != 0 )
 102.103 -        dprintk(XENLOG_INFO, "AMD IOMMU: Did not find IVRS table!\n");
 102.104 +        amd_iov_error("Did not find IVRS table!\n");
 102.105  
 102.106      for_each_amd_iommu ( iommu )
 102.107      {
 102.108 -        spin_lock_irqsave(&iommu->lock, flags);
 102.109          /* enable IOMMU translation services */
 102.110          enable_iommu(iommu);
 102.111          nr_amd_iommus++;
 102.112 -        spin_unlock_irqrestore(&iommu->lock, flags);
 102.113      }
 102.114  
 102.115 -    amd_iommu_enabled = 1;
 102.116 -
 102.117      return 0;
 102.118  
 102.119   error_out:
 102.120 @@ -262,7 +251,7 @@ struct amd_iommu *find_iommu_for_device(
 102.121      return NULL;
 102.122  }
 102.123  
 102.124 -void amd_iommu_setup_domain_device(
 102.125 +static void amd_iommu_setup_domain_device(
 102.126      struct domain *domain, struct amd_iommu *iommu, int bdf)
 102.127  {
 102.128      void *dte;
 102.129 @@ -288,12 +277,12 @@ void amd_iommu_setup_domain_device(
 102.130          sys_mgt = ivrs_mappings[req_id].dte_sys_mgt_enable;
 102.131          dev_ex = ivrs_mappings[req_id].dte_allow_exclusion;
 102.132          amd_iommu_set_dev_table_entry((u32 *)dte, root_ptr,
 102.133 -                                      req_id, sys_mgt, dev_ex,
 102.134 +                                      hd->domain_id, sys_mgt, dev_ex,
 102.135                                        hd->paging_mode);
 102.136  
 102.137          invalidate_dev_table_entry(iommu, req_id);
 102.138          flush_command_buffer(iommu);
 102.139 -        dprintk(XENLOG_INFO, "AMD IOMMU: Set DTE req_id:%x, "
 102.140 +        amd_iov_info("Enable DTE:0x%x, "
 102.141                  "root_ptr:%"PRIx64", domain_id:%d, paging_mode:%d\n",
 102.142                  req_id, root_ptr, hd->domain_id, hd->paging_mode);
 102.143  
 102.144 @@ -301,9 +290,9 @@ void amd_iommu_setup_domain_device(
 102.145      }
 102.146  }
 102.147  
 102.148 -void __init amd_iommu_setup_dom0_devices(void)
 102.149 +static void amd_iommu_setup_dom0_devices(struct domain *d)
 102.150  {
 102.151 -    struct hvm_iommu *hd = domain_hvm_iommu(dom0);
 102.152 +    struct hvm_iommu *hd = domain_hvm_iommu(d);
 102.153      struct amd_iommu *iommu;
 102.154      struct pci_dev *pdev;
 102.155      int bus, dev, func;
 102.156 @@ -333,80 +322,72 @@ void __init amd_iommu_setup_dom0_devices
 102.157                      find_iommu_for_device(bus, pdev->devfn) : NULL;
 102.158  
 102.159                  if ( iommu )
 102.160 -                    amd_iommu_setup_domain_device(dom0, iommu, bdf);
 102.161 +                    amd_iommu_setup_domain_device(d, iommu, bdf);
 102.162              }
 102.163          }
 102.164      }
 102.165  }
 102.166  
 102.167 -int amd_iommu_detect(void)
 102.168 +int amd_iov_detect(void)
 102.169  {
 102.170 -    unsigned long i;
 102.171      int last_bus;
 102.172 -    struct amd_iommu *iommu;
 102.173 -
 102.174 -    if ( !enable_amd_iommu )
 102.175 -    {
 102.176 -        printk("AMD IOMMU: Disabled\n");
 102.177 -        return 0;
 102.178 -    }
 102.179 +    struct amd_iommu *iommu, *next;
 102.180  
 102.181      INIT_LIST_HEAD(&amd_iommu_head);
 102.182  
 102.183      if ( scan_for_iommu(iommu_detect_callback) != 0 )
 102.184      {
 102.185 -        dprintk(XENLOG_ERR, "AMD IOMMU: Error detection\n");
 102.186 +        amd_iov_error("Error detection\n");
 102.187          goto error_out;
 102.188      }
 102.189  
 102.190      if ( !iommu_found() )
 102.191      {
 102.192 -        printk("AMD IOMMU: Not found!\n");
 102.193 -        return 0;
 102.194 +        printk("AMD_IOV: IOMMU not found!\n");
 102.195 +        goto error_out;
 102.196      }
 102.197 -    else
 102.198 -    {
 102.199 -        /* allocate 'ivrs mappings' table */
 102.200 -        /* note: the table has entries to accomodate all IOMMUs */
 102.201 -        last_bus = 0;
 102.202 -        for_each_amd_iommu ( iommu )
 102.203 -            if ( iommu->last_downstream_bus > last_bus )
 102.204 -                last_bus = iommu->last_downstream_bus;
 102.205 +
 102.206 +    /* allocate 'ivrs mappings' table */
 102.207 +    /* note: the table has entries to accomodate all IOMMUs */
 102.208 +    last_bus = 0;
 102.209 +    for_each_amd_iommu ( iommu )
 102.210 +        if ( iommu->last_downstream_bus > last_bus )
 102.211 +            last_bus = iommu->last_downstream_bus;
 102.212  
 102.213 -        ivrs_bdf_entries = (last_bus + 1) *
 102.214 -            IOMMU_DEV_TABLE_ENTRIES_PER_BUS;
 102.215 -        ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries);
 102.216 -
 102.217 -        if ( !ivrs_mappings )
 102.218 -        {
 102.219 -            dprintk(XENLOG_ERR, "AMD IOMMU:"
 102.220 -                    " Error allocating IVRS DevMappings table\n");
 102.221 -            goto error_out;
 102.222 -        }
 102.223 -        memset(ivrs_mappings, 0,
 102.224 -               ivrs_bdf_entries * sizeof(struct ivrs_mappings));
 102.225 +    ivrs_bdf_entries = (last_bus + 1) *
 102.226 +        IOMMU_DEV_TABLE_ENTRIES_PER_BUS;
 102.227 +    ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries);
 102.228 +    if ( ivrs_mappings == NULL )
 102.229 +    {
 102.230 +        amd_iov_error("Error allocating IVRS DevMappings table\n");
 102.231 +        goto error_out;
 102.232      }
 102.233 +    memset(ivrs_mappings, 0,
 102.234 +           ivrs_bdf_entries * sizeof(struct ivrs_mappings));
 102.235  
 102.236      if ( amd_iommu_init() != 0 )
 102.237      {
 102.238 -        dprintk(XENLOG_ERR, "AMD IOMMU: Error initialization\n");
 102.239 +        amd_iov_error("Error initialization\n");
 102.240          goto error_out;
 102.241      }
 102.242  
 102.243 -    if ( iommu_domain_init(dom0) != 0 )
 102.244 -        goto error_out;
 102.245 -
 102.246 -    /* setup 1:1 page table for dom0 */
 102.247 -    for ( i = 0; i < max_page; i++ )
 102.248 -        amd_iommu_map_page(dom0, i, i);
 102.249 -
 102.250 -    amd_iommu_setup_dom0_devices();
 102.251      return 0;
 102.252  
 102.253   error_out:
 102.254 -    detect_cleanup();
 102.255 +    list_for_each_entry_safe ( iommu, next, &amd_iommu_head, list )
 102.256 +    {
 102.257 +        list_del(&iommu->list);
 102.258 +        deallocate_iommu_resources(iommu);
 102.259 +        xfree(iommu);
 102.260 +    }
 102.261 +
 102.262 +    if ( ivrs_mappings )
 102.263 +    {
 102.264 +        xfree(ivrs_mappings);
 102.265 +        ivrs_mappings = NULL;
 102.266 +    }
 102.267 +
 102.268      return -ENODEV;
 102.269 -
 102.270  }
 102.271  
 102.272  static int allocate_domain_resources(struct hvm_iommu *hd)
 102.273 @@ -447,12 +428,10 @@ static int get_paging_mode(unsigned long
 102.274              return -ENOMEM;
 102.275      }
 102.276  
 102.277 -    dprintk(XENLOG_INFO, "AMD IOMMU: paging mode = %d\n", level);
 102.278 -
 102.279      return level;
 102.280  }
 102.281  
 102.282 -int amd_iommu_domain_init(struct domain *domain)
 102.283 +static int amd_iommu_domain_init(struct domain *domain)
 102.284  {
 102.285      struct hvm_iommu *hd = domain_hvm_iommu(domain);
 102.286  
 102.287 @@ -463,10 +442,18 @@ int amd_iommu_domain_init(struct domain 
 102.288          return -ENOMEM;
 102.289      }
 102.290  
 102.291 -    if ( is_hvm_domain(domain) )
 102.292 -        hd->paging_mode = IOMMU_PAGE_TABLE_LEVEL_4;
 102.293 -    else
 102.294 -        hd->paging_mode = get_paging_mode(max_page);
 102.295 +    hd->paging_mode = is_hvm_domain(domain)?
 102.296 +        IOMMU_PAGE_TABLE_LEVEL_4 : get_paging_mode(max_page);
 102.297 +
 102.298 +    if ( domain->domain_id == 0 )
 102.299 +    {
 102.300 +        unsigned long i; 
 102.301 +       /* setup 1:1 page table for dom0 */
 102.302 +        for ( i = 0; i < max_page; i++ )
 102.303 +            amd_iommu_map_page(domain, i, i);
 102.304 +
 102.305 +        amd_iommu_setup_dom0_devices(domain);
 102.306 +    }
 102.307  
 102.308      hd->domain_id = domain->domain_id;
 102.309  
 102.310 @@ -490,7 +477,7 @@ static void amd_iommu_disable_domain_dev
 102.311          memset (dte, 0, IOMMU_DEV_TABLE_ENTRY_SIZE);
 102.312          invalidate_dev_table_entry(iommu, req_id);
 102.313          flush_command_buffer(iommu);
 102.314 -        dprintk(XENLOG_INFO , "AMD IOMMU: disable DTE 0x%x,"
 102.315 +        amd_iov_info("Disable DTE:0x%x,"
 102.316                  " domain_id:%d, paging_mode:%d\n",
 102.317                  req_id,  domain_hvm_iommu(domain)->domain_id,
 102.318                  domain_hvm_iommu(domain)->paging_mode);
 102.319 @@ -525,7 +512,7 @@ static int reassign_device( struct domai
 102.320  
 102.321          if ( !iommu )
 102.322          {
 102.323 -            gdprintk(XENLOG_ERR , "AMD IOMMU: fail to find iommu."
 102.324 +            amd_iov_error("Fail to find iommu."
 102.325                       " %x:%x.%x cannot be assigned to domain %d\n", 
 102.326                       bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
 102.327              return -ENODEV;
 102.328 @@ -540,8 +527,7 @@ static int reassign_device( struct domai
 102.329          spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
 102.330  
 102.331          amd_iommu_setup_domain_device(target, iommu, bdf);
 102.332 -        gdprintk(XENLOG_INFO ,
 102.333 -                 "AMD IOMMU: reassign %x:%x.%x domain %d -> domain %d\n",
 102.334 +        amd_iov_info("reassign %x:%x.%x domain %d -> domain %d\n",
 102.335                   bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
 102.336                   source->domain_id, target->domain_id);
 102.337  
 102.338 @@ -550,7 +536,7 @@ static int reassign_device( struct domai
 102.339      return 0;
 102.340  }
 102.341  
 102.342 -int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
 102.343 +static int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
 102.344  {
 102.345      int bdf = (bus << 8) | devfn;
 102.346      int req_id = ivrs_mappings[bdf].dte_requestor_id;
 102.347 @@ -580,8 +566,7 @@ static void release_domain_devices(struc
 102.348      {
 102.349          pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
 102.350          pdev_flr(pdev->bus, pdev->devfn);
 102.351 -        gdprintk(XENLOG_INFO ,
 102.352 -                 "AMD IOMMU: release devices %x:%x.%x\n",
 102.353 +        amd_iov_info("release domain %d devices %x:%x.%x\n", d->domain_id,
 102.354                   pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 102.355          reassign_device(d, dom0, pdev->bus, pdev->devfn);
 102.356      }
 102.357 @@ -637,16 +622,13 @@ static void deallocate_iommu_page_tables
 102.358      hd ->root_table = NULL;
 102.359  }
 102.360  
 102.361 -void amd_iommu_domain_destroy(struct domain *d)
 102.362 +static void amd_iommu_domain_destroy(struct domain *d)
 102.363  {
 102.364 -    if ( !amd_iommu_enabled )
 102.365 -        return;
 102.366 -
 102.367      deallocate_iommu_page_tables(d);
 102.368      release_domain_devices(d);
 102.369  }
 102.370  
 102.371 -void amd_iommu_return_device(
 102.372 +static void amd_iommu_return_device(
 102.373      struct domain *s, struct domain *t, u8 bus, u8 devfn)
 102.374  {
 102.375      pdev_flr(bus, devfn);
   103.1 --- a/xen/drivers/passthrough/iommu.c	Thu Apr 24 14:02:16 2008 -0600
   103.2 +++ b/xen/drivers/passthrough/iommu.c	Thu Apr 24 14:08:29 2008 -0600
   103.3 @@ -18,6 +18,11 @@
   103.4  
   103.5  extern struct iommu_ops intel_iommu_ops;
   103.6  extern struct iommu_ops amd_iommu_ops;
   103.7 +int intel_vtd_setup(void);
   103.8 +int amd_iov_detect(void);
   103.9 +
  103.10 +int iommu_enabled = 1;
  103.11 +boolean_param("iommu", iommu_enabled);
  103.12  
  103.13  int iommu_domain_init(struct domain *domain)
  103.14  {
  103.15 @@ -134,3 +139,28 @@ void deassign_device(struct domain *d, u
  103.16  
  103.17      return hd->platform_ops->reassign_device(d, dom0, bus, devfn);
  103.18  }
  103.19 +
  103.20 +static int iommu_setup(void)
  103.21 +{
  103.22 +    int rc = -ENODEV;
  103.23 +
  103.24 +    if ( !iommu_enabled )
  103.25 +        goto out;
  103.26 +
  103.27 +    switch ( boot_cpu_data.x86_vendor )
  103.28 +    {
  103.29 +    case X86_VENDOR_INTEL:
  103.30 +        rc = intel_vtd_setup();
  103.31 +        break;
  103.32 +    case X86_VENDOR_AMD:
  103.33 +        rc = amd_iov_detect();
  103.34 +        break;
  103.35 +    }
  103.36 +
  103.37 +    iommu_enabled = (rc == 0);
  103.38 +
  103.39 + out:
  103.40 +    printk("I/O virtualisation %sabled\n", iommu_enabled ? "en" : "dis");
  103.41 +    return rc;
  103.42 +}
  103.43 +__initcall(iommu_setup);
   104.1 --- a/xen/drivers/passthrough/vtd/dmar.c	Thu Apr 24 14:02:16 2008 -0600
   104.2 +++ b/xen/drivers/passthrough/vtd/dmar.c	Thu Apr 24 14:08:29 2008 -0600
   104.3 @@ -30,8 +30,7 @@
   104.4  #include "dmar.h"
   104.5  #include "../pci_regs.h"
   104.6  
   104.7 -int vtd_enabled;
   104.8 -boolean_param("vtd", vtd_enabled);
   104.9 +int vtd_enabled = 1;
  104.10  
  104.11  #undef PREFIX
  104.12  #define PREFIX VTDPREFIX "ACPI DMAR:"
  104.13 @@ -79,14 +78,9 @@ static int acpi_ioapic_device_match(
  104.14  struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id)
  104.15  {
  104.16      struct acpi_drhd_unit *drhd;
  104.17 -    list_for_each_entry( drhd, &acpi_drhd_units, list ) {
  104.18 -        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) ) {
  104.19 -            dprintk(XENLOG_INFO VTDPREFIX,
  104.20 -                    "ioapic_to_drhd: drhd->address = %lx\n",
  104.21 -                    drhd->address);
  104.22 +    list_for_each_entry( drhd, &acpi_drhd_units, list )
  104.23 +        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) )
  104.24              return drhd;
  104.25 -        }
  104.26 -    }
  104.27      return NULL;
  104.28  }
  104.29  
  104.30 @@ -94,15 +88,9 @@ struct iommu * ioapic_to_iommu(unsigned 
  104.31  {
  104.32      struct acpi_drhd_unit *drhd;
  104.33  
  104.34 -    list_for_each_entry( drhd, &acpi_drhd_units, list ) {
  104.35 -        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) ) {
  104.36 -            dprintk(XENLOG_INFO VTDPREFIX,
  104.37 -                    "ioapic_to_iommu: drhd->address = %lx\n",
  104.38 -                    drhd->address);
  104.39 +    list_for_each_entry( drhd, &acpi_drhd_units, list )
  104.40 +        if ( acpi_ioapic_device_match(&drhd->ioapic_list, apic_id) )
  104.41              return drhd->iommu;
  104.42 -        }
  104.43 -    }
  104.44 -    dprintk(XENLOG_INFO VTDPREFIX, "returning NULL\n");
  104.45      return NULL;
  104.46  }
  104.47  
  104.48 @@ -150,21 +138,11 @@ struct acpi_drhd_unit * acpi_find_matche
  104.49  
  104.50          if ( acpi_pci_device_match(drhd->devices,
  104.51                                     drhd->devices_cnt, dev) )
  104.52 -        {
  104.53 -            dprintk(XENLOG_INFO VTDPREFIX, 
  104.54 -                    "acpi_find_matched_drhd_unit: drhd->address = %lx\n",
  104.55 -                    drhd->address);
  104.56              return drhd;
  104.57 -        }
  104.58      }
  104.59  
  104.60      if ( include_all_drhd )
  104.61 -    {
  104.62 -        dprintk(XENLOG_INFO VTDPREFIX, 
  104.63 -                "acpi_find_matched_drhd_unit:include_all_drhd->addr = %lx\n",
  104.64 -                include_all_drhd->address);
  104.65          return include_all_drhd;
  104.66 -    }
  104.67  
  104.68      return NULL;
  104.69  }
  104.70 @@ -174,11 +152,9 @@ struct acpi_rmrr_unit * acpi_find_matche
  104.71      struct acpi_rmrr_unit *rmrr;
  104.72  
  104.73      list_for_each_entry ( rmrr, &acpi_rmrr_units, list )
  104.74 -    {
  104.75          if ( acpi_pci_device_match(rmrr->devices,
  104.76                                     rmrr->devices_cnt, dev) )
  104.77              return rmrr;
  104.78 -    }
  104.79  
  104.80      return NULL;
  104.81  }
  104.82 @@ -199,11 +175,7 @@ struct acpi_atsr_unit * acpi_find_matche
  104.83      }
  104.84  
  104.85      if ( all_ports_atsru )
  104.86 -    {
  104.87 -        dprintk(XENLOG_INFO VTDPREFIX,
  104.88 -                "acpi_find_matched_atsr_unit: all_ports_atsru\n");
  104.89          return all_ports_atsru;;
  104.90 -    }
  104.91  
  104.92      return NULL;
  104.93  }
  104.94 @@ -604,22 +576,24 @@ int acpi_dmar_init(void)
  104.95  {
  104.96      int rc;
  104.97  
  104.98 -    if ( !vtd_enabled )
  104.99 -        return -ENODEV;
 104.100 +    rc = -ENODEV;
 104.101 +    if ( !iommu_enabled )
 104.102 +        goto fail;
 104.103  
 104.104      if ( (rc = vtd_hw_check()) != 0 )
 104.105 -        return rc;
 104.106 +        goto fail;
 104.107  
 104.108      acpi_table_parse(ACPI_DMAR, acpi_parse_dmar);
 104.109  
 104.110 +    rc = -ENODEV;
 104.111      if ( list_empty(&acpi_drhd_units) )
 104.112 -    {
 104.113 -        dprintk(XENLOG_ERR VTDPREFIX, "No DMAR devices found\n");
 104.114 -        vtd_enabled = 0;
 104.115 -        return -ENODEV;
 104.116 -    }
 104.117 +        goto fail;
 104.118  
 104.119      printk("Intel VT-d has been enabled\n");
 104.120  
 104.121      return 0;
 104.122 +
 104.123 + fail:
 104.124 +    vtd_enabled = 0;
 104.125 +    return -ENODEV;
 104.126  }
   105.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Thu Apr 24 14:02:16 2008 -0600
   105.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Thu Apr 24 14:08:29 2008 -0600
   105.3 @@ -41,6 +41,9 @@ static spinlock_t domid_bitmap_lock;    
   105.4  static int domid_bitmap_size;           /* domain id bitmap size in bits */
   105.5  static unsigned long *domid_bitmap;     /* iommu domain id bitmap */
   105.6  
   105.7 +static void setup_dom0_devices(struct domain *d);
   105.8 +static void setup_dom0_rmrr(struct domain *d);
   105.9 +
  105.10  #define DID_FIELD_WIDTH 16
  105.11  #define DID_HIGH_OFFSET 8
  105.12  static void context_set_domain_id(struct context_entry *context,
  105.13 @@ -78,17 +81,12 @@ static struct intel_iommu *alloc_intel_i
  105.14      struct intel_iommu *intel;
  105.15  
  105.16      intel = xmalloc(struct intel_iommu);
  105.17 -    if ( !intel )
  105.18 -    {
  105.19 -        gdprintk(XENLOG_ERR VTDPREFIX,
  105.20 -                 "Allocate intel_iommu failed.\n");
  105.21 +    if ( intel == NULL )
  105.22          return NULL;
  105.23 -    }
  105.24      memset(intel, 0, sizeof(struct intel_iommu));
  105.25  
  105.26      spin_lock_init(&intel->qi_ctrl.qinval_lock);
  105.27      spin_lock_init(&intel->qi_ctrl.qinval_poll_lock);
  105.28 -
  105.29      spin_lock_init(&intel->ir_ctrl.iremap_lock);
  105.30  
  105.31      return intel;
  105.32 @@ -96,68 +94,22 @@ static struct intel_iommu *alloc_intel_i
  105.33  
  105.34  static void free_intel_iommu(struct intel_iommu *intel)
  105.35  {
  105.36 -    if ( intel )
  105.37 -    {
  105.38 -        xfree(intel);
  105.39 -        intel = NULL;
  105.40 -    }
  105.41 +    xfree(intel);
  105.42  }
  105.43  
  105.44  struct qi_ctrl *iommu_qi_ctrl(struct iommu *iommu)
  105.45  {
  105.46 -    if ( !iommu )
  105.47 -        return NULL;
  105.48 -
  105.49 -    if ( !iommu->intel )
  105.50 -    {
  105.51 -        iommu->intel = alloc_intel_iommu();
  105.52 -        if ( !iommu->intel )
  105.53 -        {
  105.54 -            dprintk(XENLOG_ERR VTDPREFIX,
  105.55 -                    "iommu_qi_ctrl: Allocate iommu->intel failed.\n");
  105.56 -            return NULL;
  105.57 -        }
  105.58 -    }
  105.59 -
  105.60 -    return &(iommu->intel->qi_ctrl);
  105.61 +    return iommu ? &iommu->intel->qi_ctrl : NULL;
  105.62  }
  105.63  
  105.64  struct ir_ctrl *iommu_ir_ctrl(struct iommu *iommu)
  105.65  {
  105.66 -    if ( !iommu )
  105.67 -        return NULL;
  105.68 -
  105.69 -    if ( !iommu->intel )
  105.70 -    {
  105.71 -        iommu->intel = alloc_intel_iommu();
  105.72 -        if ( !iommu->intel )
  105.73 -        {
  105.74 -            dprintk(XENLOG_ERR VTDPREFIX,
  105.75 -                    "iommu_ir_ctrl: Allocate iommu->intel failed.\n");
  105.76 -            return NULL;
  105.77 -        }
  105.78 -    }
  105.79 -
  105.80 -    return &(iommu->intel->ir_ctrl);
  105.81 +    return iommu ? &iommu->intel->ir_ctrl : NULL;
  105.82  }
  105.83  
  105.84  struct iommu_flush *iommu_get_flush(struct iommu *iommu)
  105.85  {
  105.86 -    if ( !iommu )
  105.87 -        return NULL;
  105.88 -
  105.89 -    if ( !iommu->intel )
  105.90 -    {
  105.91 -        iommu->intel = alloc_intel_iommu();
  105.92 -        if ( !iommu->intel )
  105.93 -        {
  105.94 -            dprintk(XENLOG_ERR VTDPREFIX,
  105.95 -                    "iommu_get_flush: Allocate iommu->intel failed.\n");
  105.96 -            return NULL;
  105.97 -        }
  105.98 -    }
  105.99 -
 105.100 -    return &(iommu->intel->flush);
 105.101 +    return iommu ? &iommu->intel->flush : NULL;
 105.102  }
 105.103  
 105.104  unsigned int clflush_size;
 105.105 @@ -276,11 +228,7 @@ static u64 addr_to_dma_page_maddr(struct
 105.106              dma_set_pte_addr(*pte, maddr);
 105.107              vaddr = map_vtd_domain_page(maddr);
 105.108              if ( !vaddr )
 105.109 -            {
 105.110 -                unmap_vtd_domain_page(parent);
 105.111 -                spin_unlock_irqrestore(&hd->mapping_lock, flags);
 105.112 -                return 0;
 105.113 -            }
 105.114 +                break;
 105.115  
 105.116              /*
 105.117               * high level table always sets r/w, last level
 105.118 @@ -294,14 +242,9 @@ static u64 addr_to_dma_page_maddr(struct
 105.119          {
 105.120              vaddr = map_vtd_domain_page(pte->val);
 105.121              if ( !vaddr )
 105.122 -            {
 105.123 -                unmap_vtd_domain_page(parent);
 105.124 -                spin_unlock_irqrestore(&hd->mapping_lock, flags);
 105.125 -                return 0;
 105.126 -            }
 105.127 +                break;
 105.128          }
 105.129  
 105.130 -        unmap_vtd_domain_page(parent);
 105.131          if ( level == 2 )
 105.132          {
 105.133              pte_maddr = pte->val & PAGE_MASK_4K;
 105.134 @@ -309,11 +252,13 @@ static u64 addr_to_dma_page_maddr(struct
 105.135              break;
 105.136          }
 105.137  
 105.138 +        unmap_vtd_domain_page(parent);
 105.139          parent = (struct dma_pte *)vaddr;
 105.140          vaddr = NULL;
 105.141          level--;
 105.142      }
 105.143  
 105.144 +    unmap_vtd_domain_page(parent);
 105.145      spin_unlock_irqrestore(&hd->mapping_lock, flags);
 105.146      return pte_maddr;
 105.147  }
 105.148 @@ -688,7 +633,7 @@ void dma_pte_free_pagetable(struct domai
 105.149      struct dma_pte *page, *pte;
 105.150      int total = agaw_to_level(hd->agaw);
 105.151      int level;
 105.152 -    u32 tmp;
 105.153 +    u64 tmp;
 105.154      u64 pg_maddr;
 105.155  
 105.156      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
 105.157 @@ -709,7 +654,10 @@ void dma_pte_free_pagetable(struct domai
 105.158          {
 105.159              pg_maddr = dma_addr_level_page_maddr(domain, tmp, level);
 105.160              if ( pg_maddr == 0 )
 105.161 -                return;
 105.162 +            {
 105.163 +                tmp += level_size(level);
 105.164 +                continue;
 105.165 +            }
 105.166              page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
 105.167              pte = page + address_level_offset(tmp, level);
 105.168              dma_clear_pte(*pte);
 105.169 @@ -730,18 +678,11 @@ void dma_pte_free_pagetable(struct domai
 105.170      }
 105.171  }
 105.172  
 105.173 -/* iommu handling */
 105.174  static int iommu_set_root_entry(struct iommu *iommu)
 105.175  {
 105.176      u32 cmd, sts;
 105.177      unsigned long flags;
 105.178 -
 105.179 -    if ( iommu == NULL )
 105.180 -    {
 105.181 -        gdprintk(XENLOG_ERR VTDPREFIX,
 105.182 -                 "iommu_set_root_entry: iommu == NULL\n");
 105.183 -        return -EINVAL;
 105.184 -    }
 105.185 +    s_time_t start_time;
 105.186  
 105.187      if ( iommu->root_maddr != 0 )
 105.188      {
 105.189 @@ -760,11 +701,14 @@ static int iommu_set_root_entry(struct i
 105.190      dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
 105.191  
 105.192      /* Make sure hardware complete it */
 105.193 +    start_time = NOW();
 105.194      for ( ; ; )
 105.195      {
 105.196          sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
 105.197          if ( sts & DMA_GSTS_RTPS )
 105.198              break;
 105.199 +        if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
 105.200 +            panic("DMAR hardware is malfunctional, please disable IOMMU\n");
 105.201          cpu_relax();
 105.202      }
 105.203  
 105.204 @@ -777,6 +721,7 @@ static int iommu_enable_translation(stru
 105.205  {
 105.206      u32 sts;
 105.207      unsigned long flags;
 105.208 +    s_time_t start_time;
 105.209  
 105.210      dprintk(XENLOG_INFO VTDPREFIX,
 105.211              "iommu_enable_translation: iommu->reg = %p\n", iommu->reg);
 105.212 @@ -784,11 +729,14 @@ static int iommu_enable_translation(stru
 105.213      iommu->gcmd |= DMA_GCMD_TE;
 105.214      dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
 105.215      /* Make sure hardware complete it */
 105.216 +    start_time = NOW();
 105.217      for ( ; ; )
 105.218      {
 105.219          sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
 105.220          if ( sts & DMA_GSTS_TES )
 105.221              break;
 105.222 +        if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
 105.223 +            panic("DMAR hardware is malfunctional, please disable IOMMU\n");
 105.224          cpu_relax();
 105.225      }
 105.226  
 105.227 @@ -802,17 +750,21 @@ int iommu_disable_translation(struct iom
 105.228  {
 105.229      u32 sts;
 105.230      unsigned long flags;
 105.231 +    s_time_t start_time;
 105.232  
 105.233      spin_lock_irqsave(&iommu->register_lock, flags);
 105.234      iommu->gcmd &= ~ DMA_GCMD_TE;
 105.235      dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
 105.236  
 105.237      /* Make sure hardware complete it */
 105.238 +    start_time = NOW();
 105.239      for ( ; ; )
 105.240      {
 105.241          sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
 105.242          if ( !(sts & DMA_GSTS_TES) )
 105.243              break;
 105.244 +        if ( NOW() > start_time + DMAR_OPERATION_TIMEOUT )
 105.245 +            panic("DMAR hardware is malfunctional, please disable IOMMU\n");
 105.246          cpu_relax();
 105.247      }
 105.248      spin_unlock_irqrestore(&iommu->register_lock, flags);
 105.249 @@ -1039,69 +991,64 @@ int iommu_set_interrupt(struct iommu *io
 105.250      return vector;
 105.251  }
 105.252  
 105.253 -struct iommu *iommu_alloc(void *hw_data)
 105.254 +static int iommu_alloc(struct acpi_drhd_unit *drhd)
 105.255  {
 105.256 -    struct acpi_drhd_unit *drhd = (struct acpi_drhd_unit *) hw_data;
 105.257      struct iommu *iommu;
 105.258  
 105.259      if ( nr_iommus > MAX_IOMMUS )
 105.260      {
 105.261          gdprintk(XENLOG_ERR VTDPREFIX,
 105.262                   "IOMMU: nr_iommus %d > MAX_IOMMUS\n", nr_iommus);
 105.263 -        return NULL;
 105.264 +        return -ENOMEM;
 105.265      }
 105.266  
 105.267      iommu = xmalloc(struct iommu);
 105.268 -    if ( !iommu )
 105.269 -        return NULL;
 105.270 +    if ( iommu == NULL )
 105.271 +        return -ENOMEM;
 105.272      memset(iommu, 0, sizeof(struct iommu));
 105.273  
 105.274 +    iommu->intel = alloc_intel_iommu();
 105.275 +    if ( iommu->intel == NULL )
 105.276 +    {
 105.277 +        xfree(iommu);
 105.278 +        return -ENOMEM;
 105.279 +    }
 105.280 +
 105.281      set_fixmap_nocache(FIX_IOMMU_REGS_BASE_0 + nr_iommus, drhd->address);
 105.282 -    iommu->reg = (void *) fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
 105.283 -
 105.284 -    printk("iommu_alloc: iommu->reg = %p drhd->address = %lx\n",
 105.285 -           iommu->reg, drhd->address);
 105.286 -
 105.287 +    iommu->reg = (void *)fix_to_virt(FIX_IOMMU_REGS_BASE_0 + nr_iommus);
 105.288      nr_iommus++;
 105.289  
 105.290 -    if ( !iommu->reg )
 105.291 -    {
 105.292 -        printk(KERN_ERR VTDPREFIX "IOMMU: can't mapping the region\n");
 105.293 -        goto error;
 105.294 -    }
 105.295 -
 105.296      iommu->cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
 105.297      iommu->ecap = dmar_readq(iommu->reg, DMAR_ECAP_REG);
 105.298  
 105.299 -    printk("iommu_alloc: cap = %"PRIx64"\n",iommu->cap);
 105.300 -    printk("iommu_alloc: ecap = %"PRIx64"\n", iommu->ecap);
 105.301 -
 105.302      spin_lock_init(&iommu->lock);
 105.303      spin_lock_init(&iommu->register_lock);
 105.304  
 105.305 -    iommu->intel = alloc_intel_iommu();
 105.306 -
 105.307      drhd->iommu = iommu;
 105.308 -    return iommu;
 105.309 - error:
 105.310 -    xfree(iommu);
 105.311 -    return NULL;
 105.312 +    return 0;
 105.313  }
 105.314  
 105.315 -static void free_iommu(struct iommu *iommu)
 105.316 +static void iommu_free(struct acpi_drhd_unit *drhd)
 105.317  {
 105.318 -    if ( !iommu )
 105.319 +    struct iommu *iommu = drhd->iommu;
 105.320 +
 105.321 +    if ( iommu == NULL )
 105.322          return;
 105.323 +
 105.324      if ( iommu->root_maddr != 0 )
 105.325      {
 105.326          free_pgtable_maddr(iommu->root_maddr);
 105.327          iommu->root_maddr = 0;
 105.328      }
 105.329 +
 105.330      if ( iommu->reg )
 105.331          iounmap(iommu->reg);
 105.332 +
 105.333      free_intel_iommu(iommu->intel);
 105.334      free_irq(iommu->vector);
 105.335      xfree(iommu);
 105.336 +
 105.337 +    drhd->iommu = NULL;
 105.338  }
 105.339  
 105.340  #define guestwidth_to_adjustwidth(gaw) ({       \
 105.341 @@ -1111,22 +1058,21 @@ static void free_iommu(struct iommu *iom
 105.342          agaw = 64;                              \
 105.343      agaw; })
 105.344  
 105.345 -int intel_iommu_domain_init(struct domain *domain)
 105.346 +static int intel_iommu_domain_init(struct domain *d)
 105.347  {
 105.348 -    struct hvm_iommu *hd = domain_hvm_iommu(domain);
 105.349 +    struct hvm_iommu *hd = domain_hvm_iommu(d);
 105.350      struct iommu *iommu = NULL;
 105.351      int guest_width = DEFAULT_DOMAIN_ADDRESS_WIDTH;
 105.352 -    int adjust_width, agaw;
 105.353 +    int i, adjust_width, agaw;
 105.354      unsigned long sagaw;
 105.355      struct acpi_drhd_unit *drhd;
 105.356  
 105.357 -    if ( !vtd_enabled || list_empty(&acpi_drhd_units) )
 105.358 -        return 0;
 105.359 +    INIT_LIST_HEAD(&hd->pdev_list);
 105.360  
 105.361 -    for_each_drhd_unit ( drhd )
 105.362 -        iommu = drhd->iommu ? : iommu_alloc(drhd);
 105.363 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
 105.364 +    iommu = drhd->iommu;
 105.365  
 105.366 -    /* calculate AGAW */
 105.367 +    /* Calculate AGAW. */
 105.368      if ( guest_width > cap_mgaw(iommu->cap) )
 105.369          guest_width = cap_mgaw(iommu->cap);
 105.370      adjust_width = guestwidth_to_adjustwidth(guest_width);
 105.371 @@ -1142,6 +1088,26 @@ int intel_iommu_domain_init(struct domai
 105.372              return -ENODEV;
 105.373      }
 105.374      hd->agaw = agaw;
 105.375 +
 105.376 +    if ( d->domain_id == 0 )
 105.377 +    {
 105.378 +        /* Set up 1:1 page table for dom0. */
 105.379 +        for ( i = 0; i < max_page; i++ )
 105.380 +            iommu_map_page(d, i, i);
 105.381 +
 105.382 +        setup_dom0_devices(d);
 105.383 +        setup_dom0_rmrr(d);
 105.384 +
 105.385 +        iommu_flush_all();
 105.386 +
 105.387 +        for_each_drhd_unit ( drhd )
 105.388 +        {
 105.389 +            iommu = drhd->iommu;
 105.390 +            if ( iommu_enable_translation(iommu) )
 105.391 +                return -EIO;
 105.392 +        }
 105.393 +    }
 105.394 +
 105.395      return 0;
 105.396  }
 105.397  
 105.398 @@ -1153,28 +1119,15 @@ static int domain_context_mapping_one(
 105.399      struct hvm_iommu *hd = domain_hvm_iommu(domain);
 105.400      struct context_entry *context, *context_entries;
 105.401      unsigned long flags;
 105.402 -    int ret = 0;
 105.403      u64 maddr;
 105.404  
 105.405      maddr = bus_to_context_maddr(iommu, bus);
 105.406      context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
 105.407      context = &context_entries[devfn];
 105.408 -    if ( !context )
 105.409 -    {
 105.410 -        unmap_vtd_domain_page(context_entries);
 105.411 -        gdprintk(XENLOG_ERR VTDPREFIX,
 105.412 -                 "domain_context_mapping_one:context == NULL:"
 105.413 -                 "bdf = %x:%x:%x\n",
 105.414 -                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 105.415 -        return -ENOMEM;
 105.416 -    }
 105.417  
 105.418      if ( context_present(*context) )
 105.419      {
 105.420          unmap_vtd_domain_page(context_entries);
 105.421 -        gdprintk(XENLOG_WARNING VTDPREFIX,
 105.422 -                 "domain_context_mapping_one:context present:bdf=%x:%x:%x\n",
 105.423 -                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 105.424          return 0;
 105.425      }
 105.426  
 105.427 @@ -1203,12 +1156,6 @@ static int domain_context_mapping_one(
 105.428      context_set_present(*context);
 105.429      iommu_flush_cache_entry(iommu, context);
 105.430  
 105.431 -    gdprintk(XENLOG_INFO VTDPREFIX,
 105.432 -             "domain_context_mapping_one-%x:%x:%x-*context=%"PRIx64":%"PRIx64
 105.433 -             " hd->pgd_maddr=%"PRIx64"\n",
 105.434 -             bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
 105.435 -             context->hi, context->lo, hd->pgd_maddr);
 105.436 -
 105.437      unmap_vtd_domain_page(context_entries);
 105.438  
 105.439      if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
 105.440 @@ -1218,7 +1165,8 @@ static int domain_context_mapping_one(
 105.441      else
 105.442          iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
 105.443      spin_unlock_irqrestore(&iommu->lock, flags);
 105.444 -    return ret;
 105.445 +
 105.446 +    return 0;
 105.447  }
 105.448  
 105.449  static int __pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap)
 105.450 @@ -1377,29 +1325,13 @@ static int domain_context_unmap_one(
 105.451      maddr = bus_to_context_maddr(iommu, bus);
 105.452      context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
 105.453      context = &context_entries[devfn];
 105.454 -    if ( !context )
 105.455 -    {
 105.456 -        unmap_vtd_domain_page(context_entries);
 105.457 -        gdprintk(XENLOG_ERR VTDPREFIX,
 105.458 -                 "domain_context_unmap_one-%x:%x:%x- context == NULL:return\n",
 105.459 -                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 105.460 -        return -ENOMEM;
 105.461 -    }
 105.462  
 105.463      if ( !context_present(*context) )
 105.464      {
 105.465          unmap_vtd_domain_page(context_entries);
 105.466 -        gdprintk(XENLOG_WARNING VTDPREFIX,
 105.467 -                 "domain_context_unmap_one-%x:%x:%x- "
 105.468 -                 "context NOT present:return\n",
 105.469 -                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 105.470          return 0;
 105.471      }
 105.472  
 105.473 -    gdprintk(XENLOG_INFO VTDPREFIX,
 105.474 -             "domain_context_unmap_one: bdf = %x:%x:%x\n",
 105.475 -             bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 105.476 -
 105.477      spin_lock_irqsave(&iommu->lock, flags);
 105.478      context_clear_present(*context);
 105.479      context_clear_entry(*context);
 105.480 @@ -1431,24 +1363,12 @@ static int domain_context_unmap(
 105.481          sub_bus = pci_conf_read8(
 105.482              pdev->bus, PCI_SLOT(pdev->devfn),
 105.483              PCI_FUNC(pdev->devfn), PCI_SUBORDINATE_BUS);
 105.484 -
 105.485 -        gdprintk(XENLOG_INFO VTDPREFIX,
 105.486 -                 "domain_context_unmap:BRIDGE:%x:%x:%x "
 105.487 -                 "sec_bus=%x sub_bus=%x\n",
 105.488 -                 pdev->bus, PCI_SLOT(pdev->devfn),
 105.489 -                 PCI_FUNC(pdev->devfn), sec_bus, sub_bus);
 105.490          break;
 105.491      case DEV_TYPE_PCIe_ENDPOINT:
 105.492 -        gdprintk(XENLOG_INFO VTDPREFIX,
 105.493 -                 "domain_context_unmap:PCIe : bdf = %x:%x:%x\n",
 105.494 -                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 105.495          ret = domain_context_unmap_one(domain, iommu,
 105.496                                         (u8)(pdev->bus), (u8)(pdev->devfn));
 105.497          break;
 105.498      case DEV_TYPE_PCI:
 105.499 -        gdprintk(XENLOG_INFO VTDPREFIX,
 105.500 -                 "domain_context_unmap:PCI: bdf = %x:%x:%x\n",
 105.501 -                 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 105.502          if ( pdev->bus == 0 )
 105.503              ret = domain_context_unmap_one(
 105.504                  domain, iommu,
 105.505 @@ -1502,35 +1422,29 @@ void reassign_device_ownership(
 105.506      int status;
 105.507      unsigned long flags;
 105.508  
 105.509 -    gdprintk(XENLOG_INFO VTDPREFIX,
 105.510 -             "reassign_device-%x:%x:%x- source = %d target = %d\n",
 105.511 -             bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
 105.512 -             source->domain_id, target->domain_id);
 105.513 -
 105.514      pdev_flr(bus, devfn);
 105.515  
 105.516      for_each_pdev( source, pdev )
 105.517 -    {
 105.518 -        if ( (pdev->bus != bus) || (pdev->devfn != devfn) )
 105.519 -            continue;
 105.520 +        if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
 105.521 +            goto found;
 105.522 +
 105.523 +    return;
 105.524  
 105.525 -        drhd = acpi_find_matched_drhd_unit(pdev);
 105.526 -        iommu = drhd->iommu;
 105.527 -        domain_context_unmap(source, iommu, pdev);
 105.528 + found:
 105.529 +    drhd = acpi_find_matched_drhd_unit(pdev);
 105.530 +    iommu = drhd->iommu;
 105.531 +    domain_context_unmap(source, iommu, pdev);
 105.532  
 105.533 -        /* Move pci device from the source domain to target domain. */
 105.534 -        spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
 105.535 -        spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
 105.536 -        list_move(&pdev->list, &target_hd->pdev_list);
 105.537 -        spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
 105.538 -        spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
 105.539 +    /* Move pci device from the source domain to target domain. */
 105.540 +    spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
 105.541 +    spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
 105.542 +    list_move(&pdev->list, &target_hd->pdev_list);
 105.543 +    spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
 105.544 +    spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
 105.545  
 105.546 -        status = domain_context_mapping(target, iommu, pdev);
 105.547 -        if ( status != 0 )
 105.548 -            gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
 105.549 -
 105.550 -        break;
 105.551 -    }
 105.552 +    status = domain_context_mapping(target, iommu, pdev);
 105.553 +    if ( status != 0 )
 105.554 +        gdprintk(XENLOG_ERR VTDPREFIX, "domain_context_mapping failed\n");
 105.555  }
 105.556  
 105.557  void return_devices_to_dom0(struct domain *d)
 105.558 @@ -1541,9 +1455,6 @@ void return_devices_to_dom0(struct domai
 105.559      while ( !list_empty(&hd->pdev_list) )
 105.560      {
 105.561          pdev = list_entry(hd->pdev_list.next, typeof(*pdev), list);
 105.562 -        dprintk(XENLOG_INFO VTDPREFIX,
 105.563 -                "return_devices_to_dom0: bdf = %x:%x:%x\n",
 105.564 -                pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 105.565          reassign_device_ownership(d, dom0, pdev->bus, pdev->devfn);
 105.566      }
 105.567  
 105.568 @@ -1600,7 +1511,7 @@ int intel_iommu_map_page(
 105.569          return 0;
 105.570  #endif
 105.571  
 105.572 -    pg_maddr = addr_to_dma_page_maddr(d, gfn << PAGE_SHIFT_4K);
 105.573 +    pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K);
 105.574      if ( pg_maddr == 0 )
 105.575          return -ENOMEM;
 105.576      page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
 105.577 @@ -1643,11 +1554,11 @@ int intel_iommu_unmap_page(struct domain
 105.578  }
 105.579  
 105.580  int iommu_page_mapping(struct domain *domain, paddr_t iova,
 105.581 -                       void *hpa, size_t size, int prot)
 105.582 +                       paddr_t hpa, size_t size, int prot)
 105.583  {
 105.584      struct acpi_drhd_unit *drhd;
 105.585      struct iommu *iommu;
 105.586 -    unsigned long start_pfn, end_pfn;
 105.587 +    u64 start_pfn, end_pfn;
 105.588      struct dma_pte *page = NULL, *pte = NULL;
 105.589      int index;
 105.590      u64 pg_maddr;
 105.591 @@ -1657,9 +1568,8 @@ int iommu_page_mapping(struct domain *do
 105.592      if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
 105.593          return -EINVAL;
 105.594      iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
 105.595 -    start_pfn = (unsigned long)(((unsigned long) hpa) >> PAGE_SHIFT_4K);
 105.596 -    end_pfn = (unsigned long)
 105.597 -        ((PAGE_ALIGN_4K(((unsigned long)hpa) + size)) >> PAGE_SHIFT_4K);
 105.598 +    start_pfn = hpa >> PAGE_SHIFT_4K;
 105.599 +    end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
 105.600      index = 0;
 105.601      while ( start_pfn < end_pfn )
 105.602      {
 105.603 @@ -1668,7 +1578,7 @@ int iommu_page_mapping(struct domain *do
 105.604              return -ENOMEM;
 105.605          page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
 105.606          pte = page + (start_pfn & LEVEL_MASK);
 105.607 -        dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
 105.608 +        dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
 105.609          dma_set_pte_prot(*pte, prot);
 105.610          iommu_flush_cache_entry(iommu, pte);
 105.611          unmap_vtd_domain_page(page);
 105.612 @@ -1727,7 +1637,7 @@ static int iommu_prepare_rmrr_dev(
 105.613      /* page table init */
 105.614      size = rmrr->end_address - rmrr->base_address + 1;
 105.615      ret = iommu_page_mapping(d, rmrr->base_address,
 105.616 -                             (void *)rmrr->base_address, size,
 105.617 +                             rmrr->base_address, size,
 105.618                               DMA_PTE_READ|DMA_PTE_WRITE);
 105.619      if ( ret )
 105.620          return ret;
 105.621 @@ -1743,37 +1653,15 @@ static int iommu_prepare_rmrr_dev(
 105.622      return ret;
 105.623  }
 105.624  
 105.625 -void __init setup_dom0_devices(void)
 105.626 +static void setup_dom0_devices(struct domain *d)
 105.627  {
 105.628 -    struct hvm_iommu *hd  = domain_hvm_iommu(dom0);
 105.629 +    struct hvm_iommu *hd;
 105.630      struct acpi_drhd_unit *drhd;
 105.631      struct pci_dev *pdev;
 105.632      int bus, dev, func, ret;
 105.633      u32 l;
 105.634  
 105.635 -#ifdef DEBUG_VTD_CONTEXT_ENTRY
 105.636 -    for ( bus = 0; bus < 256; bus++ )
 105.637 -    {
 105.638 -        for ( dev = 0; dev < 32; dev++ )
 105.639 -        { 
 105.640 -            for ( func = 0; func < 8; func++ )
 105.641 -            {
 105.642 -                struct context_entry *context;
 105.643 -                struct pci_dev device;
 105.644 -
 105.645 -                device.bus = bus; 
 105.646 -                device.devfn = PCI_DEVFN(dev, func); 
 105.647 -                drhd = acpi_find_matched_drhd_unit(&device);
 105.648 -                context = device_to_context_entry(drhd->iommu,
 105.649 -                                                  bus, PCI_DEVFN(dev, func));
 105.650 -                if ( (context->lo != 0) || (context->hi != 0) )
 105.651 -                    dprintk(XENLOG_INFO VTDPREFIX,
 105.652 -                            "setup_dom0_devices-%x:%x:%x- context not 0\n",
 105.653 -                            bus, dev, func);
 105.654 -            }
 105.655 -        }    
 105.656 -    }        
 105.657 -#endif
 105.658 +    hd = domain_hvm_iommu(d);
 105.659  
 105.660      for ( bus = 0; bus < 256; bus++ )
 105.661      {
 105.662 @@ -1792,18 +1680,13 @@ void __init setup_dom0_devices(void)
 105.663                  list_add_tail(&pdev->list, &hd->pdev_list);
 105.664  
 105.665                  drhd = acpi_find_matched_drhd_unit(pdev);
 105.666 -                ret = domain_context_mapping(dom0, drhd->iommu, pdev);
 105.667 +                ret = domain_context_mapping(d, drhd->iommu, pdev);
 105.668                  if ( ret != 0 )
 105.669                      gdprintk(XENLOG_ERR VTDPREFIX,
 105.670                               "domain_context_mapping failed\n");
 105.671              }
 105.672          }
 105.673      }
 105.674 -
 105.675 -    for_each_pdev ( dom0, pdev )
 105.676 -        dprintk(XENLOG_INFO VTDPREFIX,
 105.677 -                "setup_dom0_devices: bdf = %x:%x:%x\n",
 105.678 -                pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
 105.679  }
 105.680  
 105.681  void clear_fault_bits(struct iommu *iommu)
 105.682 @@ -1850,13 +1733,6 @@ static int init_vtd_hw(void)
 105.683          flush->context = flush_context_reg;
 105.684          flush->iotlb = flush_iotlb_reg;
 105.685      }
 105.686 -    return 0;
 105.687 -}
 105.688 -
 105.689 -static int init_vtd2_hw(void)
 105.690 -{
 105.691 -    struct acpi_drhd_unit *drhd;
 105.692 -    struct iommu *iommu;
 105.693  
 105.694      for_each_drhd_unit ( drhd )
 105.695      {
 105.696 @@ -1873,52 +1749,38 @@ static int init_vtd2_hw(void)
 105.697              dprintk(XENLOG_ERR VTDPREFIX,
 105.698                      "Interrupt Remapping hardware not found\n");
 105.699      }
 105.700 +
 105.701      return 0;
 105.702  }
 105.703  
 105.704 -static int enable_vtd_translation(void)
 105.705 -{
 105.706 -    struct acpi_drhd_unit *drhd;
 105.707 -    struct iommu *iommu;
 105.708 -
 105.709 -    for_each_drhd_unit ( drhd )
 105.710 -    {
 105.711 -        iommu = drhd->iommu;
 105.712 -        if ( iommu_enable_translation(iommu) )
 105.713 -            return -EIO;
 105.714 -    }
 105.715 -    return 0;
 105.716 -}
 105.717 -
 105.718 -static void setup_dom0_rmrr(void)
 105.719 +static void setup_dom0_rmrr(struct domain *d)
 105.720  {
 105.721      struct acpi_rmrr_unit *rmrr;
 105.722      struct pci_dev *pdev;
 105.723      int ret;
 105.724  
 105.725      for_each_rmrr_device ( rmrr, pdev )
 105.726 -        ret = iommu_prepare_rmrr_dev(dom0, rmrr, pdev);
 105.727 +        ret = iommu_prepare_rmrr_dev(d, rmrr, pdev);
 105.728          if ( ret )
 105.729              gdprintk(XENLOG_ERR VTDPREFIX,
 105.730                       "IOMMU: mapping reserved region failed\n");
 105.731      end_for_each_rmrr_device ( rmrr, pdev )
 105.732  }
 105.733  
 105.734 -int iommu_setup(void)
 105.735 +int intel_vtd_setup(void)
 105.736  {
 105.737 -    struct hvm_iommu *hd  = domain_hvm_iommu(dom0);
 105.738      struct acpi_drhd_unit *drhd;
 105.739      struct iommu *iommu;
 105.740 -    unsigned long i;
 105.741  
 105.742      if ( !vtd_enabled )
 105.743 -        return 0;
 105.744 +        return -ENODEV;
 105.745  
 105.746      spin_lock_init(&domid_bitmap_lock);
 105.747 -    INIT_LIST_HEAD(&hd->pdev_list);
 105.748 +    clflush_size = get_clflush_size();
 105.749  
 105.750 -    /* setup clflush size */
 105.751 -    clflush_size = get_clflush_size();
 105.752 +    for_each_drhd_unit ( drhd )
 105.753 +        if ( iommu_alloc(drhd) != 0 )
 105.754 +            goto error;
 105.755  
 105.756      /* Allocate IO page directory page for the domain. */
 105.757      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
 105.758 @@ -1933,27 +1795,15 @@ int iommu_setup(void)
 105.759      memset(domid_bitmap, 0, domid_bitmap_size / 8);
 105.760      set_bit(0, domid_bitmap);
 105.761  
 105.762 -    /* setup 1:1 page table for dom0 */
 105.763 -    for ( i = 0; i < max_page; i++ )
 105.764 -        iommu_map_page(dom0, i, i);
 105.765 -
 105.766      init_vtd_hw();
 105.767 -    setup_dom0_devices();
 105.768 -    setup_dom0_rmrr();
 105.769 -    iommu_flush_all();
 105.770 -    enable_vtd_translation();
 105.771 -    init_vtd2_hw();
 105.772  
 105.773      return 0;
 105.774  
 105.775   error:
 105.776 -    printk("iommu_setup() failed\n");
 105.777      for_each_drhd_unit ( drhd )
 105.778 -    {
 105.779 -        iommu = drhd->iommu;
 105.780 -        free_iommu(iommu);
 105.781 -    }
 105.782 -    return -EIO;
 105.783 +        iommu_free(drhd);
 105.784 +    vtd_enabled = 0;
 105.785 +    return -ENOMEM;
 105.786  }
 105.787  
 105.788  /*
 105.789 @@ -1980,10 +1830,6 @@ int intel_iommu_assign_device(struct dom
 105.790      if ( list_empty(&acpi_drhd_units) )
 105.791          return ret;
 105.792  
 105.793 -    gdprintk(XENLOG_INFO VTDPREFIX,
 105.794 -             "assign_device: bus = %x dev = %x func = %x\n",
 105.795 -             bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
 105.796 -
 105.797      reassign_device_ownership(dom0, d, bus, devfn);
 105.798  
 105.799      /* Setup rmrr identify mapping */
   106.1 --- a/xen/drivers/passthrough/vtd/utils.c	Thu Apr 24 14:02:16 2008 -0600
   106.2 +++ b/xen/drivers/passthrough/vtd/utils.c	Thu Apr 24 14:08:29 2008 -0600
   106.3 @@ -60,10 +60,10 @@ int vtd_hw_check(void)
   106.4              dprintk(XENLOG_WARNING VTDPREFIX,
   106.5                      "***  vendor = %x device = %x revision = %x\n",
   106.6                      vendor, device, revision);
   106.7 -            vtd_enabled = 0;
   106.8              return -ENODEV;
   106.9          }
  106.10      }
  106.11 +
  106.12      return 0;
  106.13  }
  106.14  
   107.1 --- a/xen/drivers/passthrough/vtd/x86/vtd.c	Thu Apr 24 14:02:16 2008 -0600
   107.2 +++ b/xen/drivers/passthrough/vtd/x86/vtd.c	Thu Apr 24 14:08:29 2008 -0600
   107.3 @@ -114,8 +114,6 @@ void hvm_dpci_isairq_eoi(struct domain *
   107.4                  if ( --dpci->mirq[i].pending == 0 )
   107.5                  {
   107.6                      spin_unlock(&dpci->dirq_lock);
   107.7 -                    gdprintk(XENLOG_INFO VTDPREFIX,
   107.8 -                             "hvm_dpci_isairq_eoi:: mirq = %x\n", i);
   107.9                      stop_timer(&dpci->hvm_timer[irq_to_vector(i)]);
  107.10                      pirq_guest_eoi(d, i);
  107.11                  }
  107.12 @@ -130,8 +128,6 @@ void iommu_set_pgd(struct domain *d)
  107.13  {
  107.14      struct hvm_iommu *hd  = domain_hvm_iommu(d);
  107.15      unsigned long p2m_table;
  107.16 -    int level = agaw_to_level(hd->agaw);
  107.17 -    l3_pgentry_t *l3e;
  107.18  
  107.19      p2m_table = mfn_x(pagetable_get_mfn(d->arch.phys_table));
  107.20  
  107.21 @@ -153,12 +149,12 @@ void iommu_set_pgd(struct domain *d)
  107.22                  return;
  107.23              }
  107.24              pgd_mfn = _mfn(dma_pte_addr(*dpte) >> PAGE_SHIFT_4K);
  107.25 -            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
  107.26 +            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
  107.27              unmap_domain_page(dpte);
  107.28              break;
  107.29          case VTD_PAGE_TABLE_LEVEL_4:
  107.30              pgd_mfn = _mfn(p2m_table);
  107.31 -            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
  107.32 +            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
  107.33              break;
  107.34          default:
  107.35              gdprintk(XENLOG_ERR VTDPREFIX,
  107.36 @@ -173,6 +169,8 @@ void iommu_set_pgd(struct domain *d)
  107.37          int i;
  107.38          u64 pmd_maddr;
  107.39          unsigned long flags;
  107.40 +        l3_pgentry_t *l3e;
  107.41 +        int level = agaw_to_level(hd->agaw);
  107.42  
  107.43          spin_lock_irqsave(&hd->mapping_lock, flags);
  107.44          hd->pgd_maddr = alloc_pgtable_maddr();
  107.45 @@ -236,6 +234,8 @@ void iommu_set_pgd(struct domain *d)
  107.46  
  107.47  #elif CONFIG_PAGING_LEVELS == 4
  107.48          mfn_t pgd_mfn;
  107.49 +        l3_pgentry_t *l3e;
  107.50 +        int level = agaw_to_level(hd->agaw);
  107.51  
  107.52          switch ( level )
  107.53          {
  107.54 @@ -250,12 +250,12 @@ void iommu_set_pgd(struct domain *d)
  107.55              }
  107.56  
  107.57              pgd_mfn = _mfn(l3e_get_pfn(*l3e));
  107.58 -            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
  107.59 +            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
  107.60              unmap_domain_page(l3e);
  107.61              break;
  107.62          case VTD_PAGE_TABLE_LEVEL_4:
  107.63              pgd_mfn = _mfn(p2m_table);
  107.64 -            hd->pgd_maddr = mfn_x(pgd_mfn) << PAGE_SHIFT_4K;
  107.65 +            hd->pgd_maddr = (paddr_t)(mfn_x(pgd_mfn)) << PAGE_SHIFT_4K;
  107.66              break;
  107.67          default:
  107.68              gdprintk(XENLOG_ERR VTDPREFIX,
   108.1 --- a/xen/include/asm-x86/amd-iommu.h	Thu Apr 24 14:02:16 2008 -0600
   108.2 +++ b/xen/include/asm-x86/amd-iommu.h	Thu Apr 24 14:08:29 2008 -0600
   108.3 @@ -28,10 +28,9 @@
   108.4  
   108.5  #define iommu_found()           (!list_empty(&amd_iommu_head))
   108.6  
   108.7 -extern int amd_iommu_enabled;
   108.8  extern struct list_head amd_iommu_head;
   108.9  
  108.10 -extern int __init amd_iommu_detect(void);
  108.11 +extern int __init amd_iov_detect(void);
  108.12  
  108.13  struct table_struct {
  108.14      void *buffer;
  108.15 @@ -79,6 +78,9 @@ struct amd_iommu {
  108.16      int exclusion_allow_all;
  108.17      uint64_t exclusion_base;
  108.18      uint64_t exclusion_limit;
  108.19 +
  108.20 +    int msi_cap;
  108.21 +    int maskbit;
  108.22  };
  108.23  
  108.24  struct ivrs_mappings {
   109.1 --- a/xen/include/asm-x86/bitops.h	Thu Apr 24 14:02:16 2008 -0600
   109.2 +++ b/xen/include/asm-x86/bitops.h	Thu Apr 24 14:08:29 2008 -0600
   109.3 @@ -331,10 +331,9 @@ extern unsigned int __find_first_zero_bi
   109.4  extern unsigned int __find_next_zero_bit(
   109.5      const unsigned long *addr, unsigned int size, unsigned int offset);
   109.6  
   109.7 -/* return index of first bit set in val or BITS_PER_LONG when no bit is set */
   109.8 -static inline unsigned int __scanbit(unsigned long val)
   109.9 +static inline unsigned int __scanbit(unsigned long val, unsigned long max)
  109.10  {
  109.11 -    asm ( "bsf %1,%0" : "=r" (val) : "r" (val), "0" (BITS_PER_LONG) );
  109.12 +    asm ( "bsf %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max) );
  109.13      return (unsigned int)val;
  109.14  }
  109.15  
  109.16 @@ -346,9 +345,9 @@ static inline unsigned int __scanbit(uns
  109.17   * Returns the bit-number of the first set bit, not the number of the byte
  109.18   * containing a bit.
  109.19   */
  109.20 -#define find_first_bit(addr,size) \
  109.21 -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
  109.22 -  (__scanbit(*(const unsigned long *)addr)) : \
  109.23 +#define find_first_bit(addr,size)                               \
  109.24 +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?       \
  109.25 +  (__scanbit(*(const unsigned long *)addr, size)) :             \
  109.26    __find_first_bit(addr,size)))
  109.27  
  109.28  /**
  109.29 @@ -357,9 +356,9 @@ static inline unsigned int __scanbit(uns
  109.30   * @offset: The bitnumber to start searching at
  109.31   * @size: The maximum size to search
  109.32   */
  109.33 -#define find_next_bit(addr,size,off) \
  109.34 -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
  109.35 -  ((off) + (__scanbit((*(const unsigned long *)addr) >> (off)))) : \
  109.36 +#define find_next_bit(addr,size,off)                                     \
  109.37 +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?                \
  109.38 +  ((off) + (__scanbit((*(const unsigned long *)addr) >> (off), size))) : \
  109.39    __find_next_bit(addr,size,off)))
  109.40  
  109.41  /**
  109.42 @@ -370,9 +369,9 @@ static inline unsigned int __scanbit(uns
  109.43   * Returns the bit-number of the first zero bit, not the number of the byte
  109.44   * containing a bit.
  109.45   */
  109.46 -#define find_first_zero_bit(addr,size) \
  109.47 -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
  109.48 -  (__scanbit(~*(const unsigned long *)addr)) : \
  109.49 +#define find_first_zero_bit(addr,size)                          \
  109.50 +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?       \
  109.51 +  (__scanbit(~*(const unsigned long *)addr, size)) :            \
  109.52    __find_first_zero_bit(addr,size)))
  109.53  
  109.54  /**
  109.55 @@ -381,9 +380,9 @@ static inline unsigned int __scanbit(uns
  109.56   * @offset: The bitnumber to start searching at
  109.57   * @size: The maximum size to search
  109.58   */
  109.59 -#define find_next_zero_bit(addr,size,off) \
  109.60 -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \
  109.61 -  ((off)+(__scanbit(~(((*(const unsigned long *)addr)) >> (off))))) : \
  109.62 +#define find_next_zero_bit(addr,size,off)                                   \
  109.63 +((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?                   \
  109.64 +  ((off)+(__scanbit(~(((*(const unsigned long *)addr)) >> (off)), size))) : \
  109.65    __find_next_zero_bit(addr,size,off)))
  109.66  
  109.67  
  109.68 @@ -391,8 +390,7 @@ static inline unsigned int __scanbit(uns
  109.69   * find_first_set_bit - find the first set bit in @word
  109.70   * @word: the word to search
  109.71   * 
  109.72 - * Returns the bit-number of the first set bit. If no bits are set then the
  109.73 - * result is undefined.
  109.74 + * Returns the bit-number of the first set bit. The input must *not* be zero.
  109.75   */
  109.76  static inline unsigned int find_first_set_bit(unsigned long word)
  109.77  {
  109.78 @@ -401,26 +399,10 @@ static inline unsigned int find_first_se
  109.79  }
  109.80  
  109.81  /**
  109.82 - * ffz - find first zero in word.
  109.83 - * @word: The word to search
  109.84 - *
  109.85 - * Undefined if no zero exists, so code should check against ~0UL first.
  109.86 - */
  109.87 -static inline unsigned long ffz(unsigned long word)
  109.88 -{
  109.89 -    asm ( "bsf %1,%0"
  109.90 -          :"=r" (word)
  109.91 -          :"r" (~word));
  109.92 -    return word;
  109.93 -}
  109.94 -
  109.95 -/**
  109.96   * ffs - find first bit set
  109.97   * @x: the word to search
  109.98   *
  109.99 - * This is defined the same way as
 109.100 - * the libc and compiler builtin ffs routines, therefore
 109.101 - * differs in spirit from the above ffz (man ffs).
 109.102 + * This is defined the same way as the libc and compiler builtin ffs routines.
 109.103   */
 109.104  static inline int ffs(unsigned long x)
 109.105  {
   110.1 --- a/xen/include/asm-x86/hvm/hvm.h	Thu Apr 24 14:02:16 2008 -0600
   110.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Thu Apr 24 14:08:29 2008 -0600
   110.3 @@ -139,6 +139,8 @@ void hvm_domain_destroy(struct domain *d
   110.4  int hvm_vcpu_initialise(struct vcpu *v);
   110.5  void hvm_vcpu_destroy(struct vcpu *v);
   110.6  void hvm_vcpu_down(struct vcpu *v);
   110.7 +int hvm_vcpu_cacheattr_init(struct vcpu *v);
   110.8 +void hvm_vcpu_cacheattr_destroy(struct vcpu *v);
   110.9  
  110.10  void hvm_send_assist_req(struct vcpu *v);
  110.11  
   111.1 --- a/xen/include/asm-x86/hvm/support.h	Thu Apr 24 14:02:16 2008 -0600
   111.2 +++ b/xen/include/asm-x86/hvm/support.h	Thu Apr 24 14:08:29 2008 -0600
   111.3 @@ -130,5 +130,7 @@ int hvm_set_efer(uint64_t value);
   111.4  int hvm_set_cr0(unsigned long value);
   111.5  int hvm_set_cr3(unsigned long value);
   111.6  int hvm_set_cr4(unsigned long value);
   111.7 +int hvm_msr_read_intercept(struct cpu_user_regs *regs);
   111.8 +int hvm_msr_write_intercept(struct cpu_user_regs *regs);
   111.9  
  111.10  #endif /* __ASM_X86_HVM_SUPPORT_H__ */
   112.1 --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Thu Apr 24 14:02:16 2008 -0600
   112.2 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Thu Apr 24 14:08:29 2008 -0600
   112.3 @@ -35,6 +35,9 @@
   112.4  /* IOMMU Command Buffer entries: in power of 2 increments, minimum of 256 */
   112.5  #define IOMMU_CMD_BUFFER_DEFAULT_ENTRIES	512
   112.6  
   112.7 +/* IOMMU Event Log entries: in power of 2 increments, minimum of 256 */
   112.8 +#define IOMMU_EVENT_LOG_DEFAULT_ENTRIES     512
   112.9 +
  112.10  #define BITMAP_ENTRIES_PER_BYTE		8
  112.11  
  112.12  #define PTE_PER_TABLE_SHIFT		9
  112.13 @@ -304,6 +307,11 @@
  112.14  #define IOMMU_EVENT_IOTLB_INV_TIMEOUT		0x7
  112.15  #define IOMMU_EVENT_INVALID_DEV_REQUEST		0x8
  112.16  
  112.17 +#define IOMMU_EVENT_DOMAIN_ID_MASK           0x0000FFFF
  112.18 +#define IOMMU_EVENT_DOMAIN_ID_SHIFT          0
  112.19 +#define IOMMU_EVENT_DEVICE_ID_MASK           0x0000FFFF
  112.20 +#define IOMMU_EVENT_DEVICE_ID_SHIFT          0
  112.21 +
  112.22  /* Control Register */
  112.23  #define IOMMU_CONTROL_MMIO_OFFSET			0x18
  112.24  #define IOMMU_CONTROL_TRANSLATION_ENABLE_MASK		0x00000001
  112.25 @@ -427,4 +435,33 @@
  112.26  #define IOMMU_IO_READ_ENABLED           1
  112.27  #define HACK_BIOS_SETTINGS                  0
  112.28  
  112.29 +/* MSI interrupt */
  112.30 +#define MSI_DATA_VECTOR_SHIFT       0
  112.31 +#define MSI_DATA_VECTOR(v)      (((u8)v) << MSI_DATA_VECTOR_SHIFT)
  112.32 +
  112.33 +#define MSI_DATA_DELIVERY_SHIFT     8
  112.34 +#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_SHIFT)
  112.35 +#define MSI_DATA_DELIVERY_LOWPRI    (1 << MSI_DATA_DELIVERY_SHIFT)
  112.36 +
  112.37 +#define MSI_DATA_LEVEL_SHIFT        14
  112.38 +#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
  112.39 +#define MSI_DATA_LEVEL_ASSERT   (1 << MSI_DATA_LEVEL_SHIFT)
  112.40 +
  112.41 +#define MSI_DATA_TRIGGER_SHIFT      15
  112.42 +#define MSI_DATA_TRIGGER_EDGE   (0 << MSI_DATA_TRIGGER_SHIFT)
  112.43 +#define  MSI_DATA_TRIGGER_LEVEL  (1 << MSI_DATA_TRIGGER_SHIFT)
  112.44 +
  112.45 +#define MSI_TARGET_CPU_SHIFT        12
  112.46 +#define MSI_ADDR_HEADER         0xfee00000
  112.47 +#define MSI_ADDR_DESTID_MASK        0xfff0000f
  112.48 +#define MSI_ADDR_DESTID_CPU(cpu)    ((cpu) << MSI_TARGET_CPU_SHIFT)
  112.49 +
  112.50 +#define MSI_ADDR_DESTMODE_SHIFT     2
  112.51 +#define MSI_ADDR_DESTMODE_PHYS  (0 << MSI_ADDR_DESTMODE_SHIFT)
  112.52 +#define MSI_ADDR_DESTMODE_LOGIC (1 << MSI_ADDR_DESTMODE_SHIFT)
  112.53 +
  112.54 +#define MSI_ADDR_REDIRECTION_SHIFT  3
  112.55 +#define MSI_ADDR_REDIRECTION_CPU    (0 << MSI_ADDR_REDIRECTION_SHIFT)
  112.56 +#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
  112.57 +
  112.58  #endif /* _ASM_X86_64_AMD_IOMMU_DEFS_H */
   113.1 --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h	Thu Apr 24 14:02:16 2008 -0600
   113.2 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h	Thu Apr 24 14:08:29 2008 -0600
   113.3 @@ -35,6 +35,19 @@
   113.4  #define DMA_32BIT_MASK  0x00000000ffffffffULL
   113.5  #define PAGE_ALIGN(addr)    (((addr) + PAGE_SIZE - 1) & PAGE_MASK)
   113.6  
   113.7 +#ifdef AMD_IOV_DEBUG
   113.8 +#define amd_iov_info(fmt, args...) \
   113.9 +    printk(XENLOG_INFO "AMD_IOV: " fmt, ## args)
  113.10 +#define amd_iov_warning(fmt, args...) \
  113.11 +    printk(XENLOG_WARNING "AMD_IOV: " fmt, ## args)
  113.12 +#define amd_iov_error(fmt, args...) \
  113.13 +    printk(XENLOG_ERR "AMD_IOV: %s:%d: " fmt, __FILE__ , __LINE__ , ## args)
  113.14 +#else
  113.15 +#define amd_iov_info(fmt, args...)
  113.16 +#define amd_iov_warning(fmt, args...)
  113.17 +#define amd_iov_error(fmt, args...)
  113.18 +#endif
  113.19 +
  113.20  typedef int (*iommu_detect_callback_ptr_t)(
  113.21      u8 bus, u8 dev, u8 func, u8 cap_ptr);
  113.22  
  113.23 @@ -49,6 +62,7 @@ int __init map_iommu_mmio_region(struct 
  113.24  void __init unmap_iommu_mmio_region(struct amd_iommu *iommu);
  113.25  void __init register_iommu_dev_table_in_mmio_space(struct amd_iommu *iommu);
  113.26  void __init register_iommu_cmd_buffer_in_mmio_space(struct amd_iommu *iommu);
  113.27 +void __init register_iommu_event_log_in_mmio_space(struct amd_iommu *iommu);
  113.28  void __init enable_iommu(struct amd_iommu *iommu);
  113.29  
  113.30  /* mapping functions */
  113.31 @@ -70,11 +84,6 @@ void invalidate_dev_table_entry(struct a
  113.32  int send_iommu_command(struct amd_iommu *iommu, u32 cmd[]);
  113.33  void flush_command_buffer(struct amd_iommu *iommu);
  113.34  
  113.35 -/* iommu domain funtions */
  113.36 -int amd_iommu_domain_init(struct domain *domain);
  113.37 -void amd_iommu_setup_domain_device(struct domain *domain,
  113.38 -    struct amd_iommu *iommu, int bdf);
  113.39 -
  113.40  /* find iommu for bdf */
  113.41  struct amd_iommu *find_iommu_for_device(int bus, int devfn);
  113.42  
   114.1 --- a/xen/include/asm-x86/hvm/vcpu.h	Thu Apr 24 14:02:16 2008 -0600
   114.2 +++ b/xen/include/asm-x86/hvm/vcpu.h	Thu Apr 24 14:08:29 2008 -0600
   114.3 @@ -83,7 +83,16 @@ struct hvm_vcpu {
   114.4       */
   114.5      unsigned long       mmio_gva;
   114.6      unsigned long       mmio_gpfn;
   114.7 +    /* Callback into x86_emulate when emulating FPU/MMX/XMM instructions. */
   114.8 +    void (*fpu_exception_callback)(void *, struct cpu_user_regs *);
   114.9 +    void *fpu_exception_callback_arg;
  114.10 +    /* We may read up to m128 as a number of device-model transactions. */
  114.11 +    paddr_t mmio_large_read_pa;
  114.12 +    uint8_t mmio_large_read[16];
  114.13 +    unsigned int mmio_large_read_bytes;
  114.14 +    /* We may write up to m128 as a number of device-model transactions. */
  114.15 +    paddr_t mmio_large_write_pa;
  114.16 +    unsigned int mmio_large_write_bytes;
  114.17  };
  114.18  
  114.19  #endif /* __ASM_X86_HVM_VCPU_H__ */
  114.20 -
   115.1 --- a/xen/include/asm-x86/mtrr.h	Thu Apr 24 14:02:16 2008 -0600
   115.2 +++ b/xen/include/asm-x86/mtrr.h	Thu Apr 24 14:08:29 2008 -0600
   115.3 @@ -12,13 +12,6 @@
   115.4  #define MTRR_NUM_TYPES       7
   115.5  #define MEMORY_NUM_TYPES     MTRR_NUM_TYPES
   115.6  
   115.7 -#define MTRR_PHYSMASK_VALID_BIT  11
   115.8 -#define MTRR_PHYSMASK_SHIFT      12
   115.9 -
  115.10 -#define MTRR_PHYSBASE_TYPE_MASK  0xff   /* lowest 8 bits */
  115.11 -#define MTRR_PHYSBASE_SHIFT      12
  115.12 -#define MTRR_VCNT            8
  115.13 -
  115.14  #define NORMAL_CACHE_MODE          0
  115.15  #define NO_FILL_CACHE_MODE         2
  115.16  
  115.17 @@ -58,7 +51,6 @@ struct mtrr_state {
  115.18  	u64       mtrr_cap;
  115.19  	/* ranges in var MSRs are overlapped or not:0(no overlapped) */
  115.20  	bool_t    overlapped;
  115.21 -	bool_t    is_initialized;
  115.22  };
  115.23  
  115.24  extern void mtrr_save_fixed_ranges(void *);
   116.1 --- a/xen/include/asm-x86/paging.h	Thu Apr 24 14:02:16 2008 -0600
   116.2 +++ b/xen/include/asm-x86/paging.h	Thu Apr 24 14:08:29 2008 -0600
   116.3 @@ -83,12 +83,14 @@ struct shadow_paging_mode {
   116.4                                              unsigned long new,
   116.5                                              unsigned int bytes,
   116.6                                              struct sh_emulate_ctxt *sh_ctxt);
   116.7 +#ifdef __i386__
   116.8      int           (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va,
   116.9                                              unsigned long old_lo, 
  116.10                                              unsigned long old_hi, 
  116.11                                              unsigned long new_lo,
  116.12                                              unsigned long new_hi,
  116.13                                              struct sh_emulate_ctxt *sh_ctxt);
  116.14 +#endif
  116.15      mfn_t         (*make_monitor_table    )(struct vcpu *v);
  116.16      void          (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
  116.17      int           (*guess_wrmap           )(struct vcpu *v, 
   117.1 --- a/xen/include/public/xsm/acm.h	Thu Apr 24 14:02:16 2008 -0600
   117.2 +++ b/xen/include/public/xsm/acm.h	Thu Apr 24 14:08:29 2008 -0600
   117.3 @@ -91,7 +91,7 @@
   117.4   * whenever the interpretation of the related
   117.5   * policy's data structure changes
   117.6   */
   117.7 -#define ACM_POLICY_VERSION 3
   117.8 +#define ACM_POLICY_VERSION 4
   117.9  #define ACM_CHWALL_VERSION 1
  117.10  #define ACM_STE_VERSION  1
  117.11  
  117.12 @@ -131,6 +131,10 @@ typedef uint16_t domaintype_t;
  117.13  /* high-16 = version, low-16 = check magic */
  117.14  #define ACM_MAGIC  0x0001debc
  117.15  
  117.16 +/* size of the SHA1 hash identifying the XML policy from which the
  117.17 +   binary policy was created */
  117.18 +#define ACM_SHA1_HASH_SIZE    20
  117.19 +
  117.20  /* each offset in bytes from start of the struct they
  117.21   * are part of */
  117.22  
  117.23 @@ -160,6 +164,7 @@ struct acm_policy_buffer {
  117.24      uint32_t secondary_policy_code;
  117.25      uint32_t secondary_buffer_offset;
  117.26      struct acm_policy_version xml_pol_version; /* add in V3 */
  117.27 +    uint8_t xml_policy_hash[ACM_SHA1_HASH_SIZE]; /* added in V4 */
  117.28  };
  117.29  
  117.30  
   118.1 --- a/xen/include/xen/iommu.h	Thu Apr 24 14:02:16 2008 -0600
   118.2 +++ b/xen/include/xen/iommu.h	Thu Apr 24 14:08:29 2008 -0600
   118.3 @@ -27,9 +27,8 @@
   118.4  #include <public/domctl.h>
   118.5  
   118.6  extern int vtd_enabled;
   118.7 -extern int amd_iommu_enabled;
   118.8 +extern int iommu_enabled;
   118.9  
  118.10 -#define iommu_enabled ( amd_iommu_enabled || vtd_enabled )
  118.11  #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
  118.12  #define domain_vmx_iommu(d)     (&d->arch.hvm_domain.hvm_iommu.vmx_iommu)
  118.13  
  118.14 @@ -72,7 +71,6 @@ struct iommu {
  118.15      struct intel_iommu *intel;
  118.16  };
  118.17  
  118.18 -int iommu_setup(void);
  118.19  int iommu_domain_init(struct domain *d);
  118.20  void iommu_domain_destroy(struct domain *d);
  118.21  int device_assigned(u8 bus, u8 devfn);
   119.1 --- a/xen/include/xen/serial.h	Thu Apr 24 14:02:16 2008 -0600
   119.2 +++ b/xen/include/xen/serial.h	Thu Apr 24 14:08:29 2008 -0600
   119.3 @@ -16,12 +16,10 @@ typedef void (*serial_rx_fn)(char, struc
   119.4  void serial_set_rx_handler(int handle, serial_rx_fn fn);
   119.5  
   119.6  /* Number of characters we buffer for a polling receiver. */
   119.7 -#define SERIAL_RXBUFSZ 32
   119.8 -#define MASK_SERIAL_RXBUF_IDX(_i) ((_i)&(SERIAL_RXBUFSZ-1))
   119.9 +#define serial_rxbufsz 32
  119.10  
  119.11  /* Number of characters we buffer for an interrupt-driven transmitter. */
  119.12 -#define SERIAL_TXBUFSZ 16384
  119.13 -#define MASK_SERIAL_TXBUF_IDX(_i) ((_i)&(SERIAL_TXBUFSZ-1))
  119.14 +extern unsigned int serial_txbufsz;
  119.15  
  119.16  struct uart_driver;
  119.17  
  119.18 @@ -39,7 +37,7 @@ struct serial_port {
  119.19      /* Receiver callback functions (asynchronous receivers). */
  119.20      serial_rx_fn        rx_lo, rx_hi, rx;
  119.21      /* Receive data buffer (polling receivers). */
  119.22 -    char                rxbuf[SERIAL_RXBUFSZ];
  119.23 +    char                rxbuf[serial_rxbufsz];
  119.24      unsigned int        rxbufp, rxbufc;
  119.25      /* Serial I/O is concurrency-safe. */
  119.26      spinlock_t          rx_lock, tx_lock;
   120.1 --- a/xen/include/xsm/acm/acm_core.h	Thu Apr 24 14:02:16 2008 -0600
   120.2 +++ b/xen/include/xsm/acm/acm_core.h	Thu Apr 24 14:08:29 2008 -0600
   120.3 @@ -34,6 +34,7 @@ struct acm_binary_policy {
   120.4      u16 primary_policy_code;
   120.5      u16 secondary_policy_code;
   120.6      struct acm_policy_version xml_pol_version;
   120.7 +    u8 xml_policy_hash[ACM_SHA1_HASH_SIZE];
   120.8  };
   120.9  
  120.10  struct chwall_binary_policy {
   121.1 --- a/xen/tools/Makefile	Thu Apr 24 14:02:16 2008 -0600
   121.2 +++ b/xen/tools/Makefile	Thu Apr 24 14:08:29 2008 -0600
   121.3 @@ -4,12 +4,12 @@ include $(XEN_ROOT)/Config.mk
   121.4  
   121.5  .PHONY: default
   121.6  default:
   121.7 -	$(MAKE) -C figlet
   121.8 +	[ -d figlet ] && $(MAKE) -C figlet
   121.9  	$(MAKE) symbols
  121.10  
  121.11  .PHONY: clean
  121.12  clean:
  121.13 -	$(MAKE) -C figlet clean
  121.14 +	[ -d figlet ] && $(MAKE) -C figlet clean
  121.15  	rm -f *.o symbols
  121.16  
  121.17  symbols: symbols.c
   122.1 --- a/xen/tools/figlet/figlet.c	Thu Apr 24 14:02:16 2008 -0600
   122.2 +++ b/xen/tools/figlet/figlet.c	Thu Apr 24 14:08:29 2008 -0600
   122.3 @@ -1488,7 +1488,16 @@ static void myputchar(unsigned char c)
   122.4  
   122.5      putc(c, stderr);
   122.6  
   122.7 -    if ( nr_chars == 0 )
   122.8 +    if ( nr_chars == 18 ) 
   122.9 +    {
  122.10 +        nr_chars = 0;
  122.11 +        putchar('"');
  122.12 +        putchar(' ');
  122.13 +        putchar('\\');
  122.14 +        putchar('\n');
  122.15 +    }
  122.16 +
  122.17 +    if ( nr_chars++ == 0 )
  122.18          putchar('"');
  122.19  
  122.20      putchar('\\');
  122.21 @@ -1498,15 +1507,6 @@ static void myputchar(unsigned char c)
  122.22  
  122.23      if ( c == '\n' )
  122.24          startline = 1;
  122.25 -
  122.26 -    if ( ++nr_chars == 18 ) 
  122.27 -    {
  122.28 -        nr_chars = 0;
  122.29 -        putchar('"');
  122.30 -        putchar(' ');
  122.31 -        putchar('\\');
  122.32 -        putchar('\n');
  122.33 -    }
  122.34  }
  122.35  
  122.36  void putstring(string)
   123.1 --- a/xen/xsm/acm/acm_policy.c	Thu Apr 24 14:02:16 2008 -0600
   123.2 +++ b/xen/xsm/acm/acm_policy.c	Thu Apr 24 14:08:29 2008 -0600
   123.3 @@ -156,6 +156,10 @@ static int
   123.4             &pol->xml_pol_version,
   123.5             sizeof(acm_bin_pol.xml_pol_version));
   123.6  
   123.7 +    memcpy(&acm_bin_pol.xml_policy_hash,
   123.8 +           pol->xml_policy_hash,
   123.9 +           sizeof(acm_bin_pol.xml_policy_hash));
  123.10 +
  123.11      if ( acm_primary_ops->is_default_policy() &&
  123.12           acm_secondary_ops->is_default_policy() )
  123.13          require_update = 0;
  123.14 @@ -258,6 +262,10 @@ acm_get_policy(XEN_GUEST_HANDLE_64(void)
  123.15             &acm_bin_pol.xml_pol_version,
  123.16             sizeof(struct acm_policy_version));
  123.17  
  123.18 +    memcpy(&bin_pol->xml_policy_hash,
  123.19 +           &acm_bin_pol.xml_policy_hash,
  123.20 +           sizeof(acm_bin_pol.xml_policy_hash));
  123.21 +
  123.22      ret = acm_dump_policy_reference(
  123.23                 policy_buffer + be32_to_cpu(bin_pol->policy_reference_offset),
  123.24                 buf_size - be32_to_cpu(bin_pol->policy_reference_offset));