ia64/xen-unstable

changeset 18483:ec8eaab557d8

merge with xen-unstable.hg
author Isaku Yamahata <yamahata@valinux.co.jp>
date Fri Sep 12 14:47:40 2008 +0900 (2008-09-12)
parents 4ddd63b4be9b 346c073ed6a4
children 4a381ddc764a
files
line diff
     1.1 --- a/.hgignore	Fri Sep 12 14:32:45 2008 +0900
     1.2 +++ b/.hgignore	Fri Sep 12 14:47:40 2008 +0900
     1.3 @@ -185,7 +185,6 @@
     1.4  ^tools/misc/xenperf$
     1.5  ^tools/pygrub/build/.*$
     1.6  ^tools/python/build/.*$
     1.7 -^tools/python/xen/util/xsm/xsm\.py$
     1.8  ^tools/security/secpol_tool$
     1.9  ^tools/security/xen/.*$
    1.10  ^tools/security/xensec_tool$
     2.1 --- a/Config.mk	Fri Sep 12 14:32:45 2008 +0900
     2.2 +++ b/Config.mk	Fri Sep 12 14:47:40 2008 +0900
     2.3 @@ -86,11 +86,7 @@ QEMU_REMOTE=http://xenbits.xensource.com
     2.4  # Mercurial in-tree version, or a local directory, or a git URL.
     2.5  # CONFIG_QEMU   ?= ioemu
     2.6  # CONFIG_QEMU   ?= ../qemu-xen.git
     2.7 -ifeq ($(XEN_TARGET_ARCH),ia64)
     2.8 -CONFIG_QEMU   ?= ioemu
     2.9 -else
    2.10  CONFIG_QEMU   ?= $(QEMU_REMOTE)
    2.11 -endif
    2.12  
    2.13  # Optional components
    2.14  XENSTAT_XENTOP     ?= y
     3.1 --- a/docs/misc/vtd.txt	Fri Sep 12 14:32:45 2008 +0900
     3.2 +++ b/docs/misc/vtd.txt	Fri Sep 12 14:47:40 2008 +0900
     3.3 @@ -1,8 +1,9 @@
     3.4  Title   : How to do PCI Passthrough with VT-d
     3.5  Authors : Allen Kay    <allen.m.kay@intel.com>
     3.6            Weidong Han  <weidong.han@intel.com>
     3.7 +          Yuji Shimada <shimada-yxb@necst.nec.co.jp>
     3.8  Created : October-24-2007
     3.9 -Updated : August-06-2008
    3.10 +Updated : September-09-2008
    3.11  
    3.12  How to turn on VT-d in Xen
    3.13  --------------------------
    3.14 @@ -106,3 +107,27 @@ http://h10010.www1.hp.com/wwpc/us/en/en/
    3.15  
    3.16  For more information, pls refer to http://wiki.xensource.com/xenwiki/VTdHowTo.
    3.17  
    3.18 +
    3.19 +Assigning devices to HVM domains
    3.20 +--------------------------------
    3.21 +
    3.22 +Most device types such as NIC, HBA, EHCI and UHCI can be assigned to
    3.23 +an HVM domain.
    3.24 +
    3.25 +But some devices have design features which make them unsuitable for
    3.26 +assignment to an HVM domain. Examples include:
    3.27 +
    3.28 + * Device has an internal resource, such as private memory, which is
    3.29 +   mapped to memory address space with BAR (Base Address Register).
    3.30 + * Driver submits command with a pointer to a buffer within internal
    3.31 +   resource. Device decodes the pointer (address), and accesses to the
    3.32 +   buffer.
    3.33 +
    3.34 +In an HVM domain, the BAR is virtualized, and host-BAR value and
    3.35 +guest-BAR value are different. The addresses of internal resource from
    3.36 +device's view and driver's view are different. Similarly, the
    3.37 +addresses of buffer within internal resource from device's view and
    3.38 +driver's view are different. As a result, device can't access to the
    3.39 +buffer specified by driver.
    3.40 +
    3.41 +Such devices assigned to HVM domain currently do not work.
     4.1 --- a/docs/src/user.tex	Fri Sep 12 14:32:45 2008 +0900
     4.2 +++ b/docs/src/user.tex	Fri Sep 12 14:47:40 2008 +0900
     4.3 @@ -4252,7 +4252,7 @@ directory of the Xen source distribution
     4.4  \section{Online References}
     4.5  
     4.6  The official Xen web site can be found at:
     4.7 -\begin{quote} {\tt http://www.xensource.com}
     4.8 +\begin{quote} {\tt http://www.xen.org}
     4.9  \end{quote}
    4.10  
    4.11  This contains links to the latest versions of all online
    4.12 @@ -4282,7 +4282,7 @@ mailing lists and subscription informati
    4.13    Subscribe at: \\
    4.14    {\small {\tt http://lists.xensource.com/xen-announce}}
    4.15  \item[xen-changelog@lists.xensource.com] Changelog feed
    4.16 -  from the unstable and 2.0 trees - developer oriented.  Subscribe at: \\
    4.17 +  from the unstable and 3.x trees - developer oriented.  Subscribe at: \\
    4.18    {\small {\tt http://lists.xensource.com/xen-changelog}}
    4.19  \end{description}
    4.20  
     5.1 --- a/stubdom/README	Fri Sep 12 14:32:45 2008 +0900
     5.2 +++ b/stubdom/README	Fri Sep 12 14:47:40 2008 +0900
     5.3 @@ -27,7 +27,7 @@ device_model = '/usr/lib/xen/bin/stubdom
     5.4  - disable anything related to dom0, like pty serial assignments
     5.5  
     5.6  
     5.7 -Create /etc/xen/stubdom-hvmconfig (where "hvmconfig" is the name of your HVM
     5.8 +Create /etc/xen/hvmconfig-dm (where "hvmconfig" is the name of your HVM
     5.9  guest) with
    5.10  
    5.11  kernel = "/usr/lib/xen/boot/ioemu-stubdom.gz"
    5.12 @@ -52,7 +52,7 @@ There are three posibilities
    5.13  vnc = 0
    5.14  sdl = 0
    5.15  
    5.16 -  - In stubdom-hvmconfig, set an sdl vfb:
    5.17 +  - In hvmconfig-dm, set an sdl vfb:
    5.18  
    5.19  vfb = [ 'type=sdl' ]
    5.20  
    5.21 @@ -65,7 +65,7 @@ then you will not be able to connect to 
    5.22  vnc = 1
    5.23  vnclisten = "172.30.206.1"
    5.24  
    5.25 -  - In stubdom-hvmconfig, fill the reserved vif with the same IP, for instance:
    5.26 +  - In hvmconfig-dm, fill the reserved vif with the same IP, for instance:
    5.27  
    5.28  vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,mac=aa:00:00:12:23:34']
    5.29  
    5.30 @@ -76,7 +76,7 @@ vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,
    5.31  vnc = 0
    5.32  sdl = 0
    5.33  
    5.34 -  - In stubdom-hvmconfig, set a vnc vfb:
    5.35 +  - In hvmconfig-dm, set a vnc vfb:
    5.36  
    5.37  vfb = [ 'type=vnc' ]
    5.38  
     6.1 --- a/tools/examples/init.d/xendomains	Fri Sep 12 14:32:45 2008 +0900
     6.2 +++ b/tools/examples/init.d/xendomains	Fri Sep 12 14:47:40 2008 +0900
     6.3 @@ -327,15 +327,17 @@ stop()
     6.4  	if test $id = 0; then continue; fi
     6.5  	echo -n " $name"
     6.6  	if test "$XENDOMAINS_AUTO_ONLY" = "true"; then
     6.7 -	    case $name in
     6.8 +	    eval "
     6.9 +	    case \"\$name\" in
    6.10  		($NAMES)
    6.11  		    # nothing
    6.12  		    ;;
    6.13  		(*)
    6.14 -		    echo -n "(skip)"
    6.15 +		    echo -n '(skip)'
    6.16  		    continue
    6.17  		    ;;
    6.18  	    esac
    6.19 +	    "
    6.20  	fi
    6.21  	# XENDOMAINS_SYSRQ chould be something like just "s" 
    6.22  	# or "s e i u" or even "s e s i u o"
     7.1 --- a/tools/examples/xend-config.sxp	Fri Sep 12 14:32:45 2008 +0900
     7.2 +++ b/tools/examples/xend-config.sxp	Fri Sep 12 14:47:40 2008 +0900
     7.3 @@ -14,6 +14,10 @@
     7.4  #(logfile /var/log/xen/xend.log)
     7.5  #(loglevel DEBUG)
     7.6  
     7.7 +# Uncomment the line below.  Set the value to flask, acm, or dummy to 
     7.8 +# select a security module.
     7.9 +
    7.10 +#(xsm_module_name dummy)
    7.11  
    7.12  # The Xen-API server configuration.
    7.13  #
     8.1 --- a/tools/examples/xmexample.hvm	Fri Sep 12 14:32:45 2008 +0900
     8.2 +++ b/tools/examples/xmexample.hvm	Fri Sep 12 14:47:40 2008 +0900
     8.3 @@ -220,7 +220,7 @@ serial='pty'
     8.4  #   Configure guest CPUID responses:
     8.5  #
     8.6  #cpuid=[ '1:ecx=xxxxxxxxxxx00xxxxxxxxxxxxxxxxxxx,
     8.7 -#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
     8.8 +#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
     8.9  # - Unset the SSE4 features (CPUID.1[ECX][20-19])
    8.10  # - Default behaviour for all other bits in ECX And EAX registers.
    8.11  # 
     9.1 --- a/tools/examples/xmexample.hvm-stubdom	Fri Sep 12 14:32:45 2008 +0900
     9.2 +++ b/tools/examples/xmexample.hvm-stubdom	Fri Sep 12 14:47:40 2008 +0900
     9.3 @@ -236,7 +236,7 @@ stdvga=0
     9.4  #   Configure guest CPUID responses:
     9.5  #
     9.6  #cpuid=[ '1:ecx=xxxxxxxxxxx00xxxxxxxxxxxxxxxxxxx,
     9.7 -#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
     9.8 +#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
     9.9  # - Unset the SSE4 features (CPUID.1[ECX][20-19])
    9.10  # - Default behaviour for all other bits in ECX And EAX registers.
    9.11  # 
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/tools/flask/policy/Makefile	Fri Sep 12 14:47:40 2008 +0900
    10.3 @@ -0,0 +1,234 @@
    10.4 +#
    10.5 +# Makefile for the security policy.
    10.6 +#
    10.7 +# Targets:
    10.8 +# 
    10.9 +# install       - compile and install the policy configuration, and context files.
   10.10 +# load          - compile, install, and load the policy configuration.
   10.11 +# reload        - compile, install, and load/reload the policy configuration.
   10.12 +# policy        - compile the policy configuration locally for testing/development.
   10.13 +#
   10.14 +# The default target is 'policy'.
   10.15 +#
   10.16 +
   10.17 +########################################
   10.18 +#
   10.19 +# Configurable portions of the Makefile
   10.20 +#
   10.21 +
   10.22 +# Policy version
   10.23 +# By default, checkpolicy will create the highest
   10.24 +# version policy it supports.  Setting this will
   10.25 +# override the version.
   10.26 +OUTPUT_POLICY = 20
   10.27 +
   10.28 +# Policy Type
   10.29 +# strict, targeted,
   10.30 +# strict-mls, targeted-mls,
   10.31 +# strict-mcs, targeted-mcs
   10.32 +TYPE = strict
   10.33 +
   10.34 +# Policy Name
   10.35 +# If set, this will be used as the policy
   10.36 +# name.  Otherwise the policy type will be
   10.37 +# used for the name.
   10.38 +NAME = xenrefpolicy
   10.39 +
   10.40 +# Distribution
   10.41 +# Some distributions have portions of policy
   10.42 +# for programs or configurations specific to the
   10.43 +# distribution.  Setting this will enable options
   10.44 +# for the distribution.
   10.45 +# redhat, gentoo, debian, and suse are current options.
   10.46 +# Fedora users should enable redhat.
   10.47 +#DISTRO = 
   10.48 +
   10.49 +# Build monolithic policy.  Putting n here
   10.50 +# will build a loadable module policy.
   10.51 +MONOLITHIC=y
   10.52 +
   10.53 +# Uncomment this to disable command echoing
   10.54 +#QUIET:=@
   10.55 +
   10.56 +########################################
   10.57 +#
   10.58 +# NO OPTIONS BELOW HERE
   10.59 +#
   10.60 +
   10.61 +# executable paths
   10.62 +PREFIX := /usr
   10.63 +BINDIR := $(PREFIX)/bin
   10.64 +SBINDIR := $(PREFIX)/sbin
   10.65 +CHECKPOLICY := $(BINDIR)/checkpolicy
   10.66 +CHECKMODULE := $(BINDIR)/checkmodule
   10.67 +SEMOD_PKG := $(BINDIR)/semodule_package
   10.68 +LOADPOLICY := $(SBINDIR)/flask-loadpolicy
   10.69 +
   10.70 +CFLAGS := -Wall
   10.71 +
   10.72 +# policy source layout
   10.73 +POLDIR := policy
   10.74 +MODDIR := $(POLDIR)/modules
   10.75 +FLASKDIR := $(POLDIR)/flask
   10.76 +SECCLASS := $(FLASKDIR)/security_classes
   10.77 +ISIDS := $(FLASKDIR)/initial_sids
   10.78 +AVS := $(FLASKDIR)/access_vectors
   10.79 +
   10.80 +#policy building support tools
   10.81 +SUPPORT := support
   10.82 +FCSORT := tmp/fc_sort
   10.83 +
   10.84 +# config file paths
   10.85 +GLOBALTUN := $(POLDIR)/global_tunables
   10.86 +GLOBALBOOL := $(POLDIR)/global_booleans
   10.87 +MOD_CONF := $(POLDIR)/modules.conf
   10.88 +TUNABLES := $(POLDIR)/tunables.conf
   10.89 +BOOLEANS := $(POLDIR)/booleans.conf
   10.90 +
   10.91 +# install paths
   10.92 +TOPDIR = $(DESTDIR)/etc/xen/
   10.93 +INSTALLDIR = $(TOPDIR)/$(NAME)
   10.94 +SRCPATH = $(INSTALLDIR)/src
   10.95 +USERPATH = $(INSTALLDIR)/users
   10.96 +CONTEXTPATH = $(INSTALLDIR)/contexts
   10.97 +
   10.98 +# enable MLS if requested.
   10.99 +ifneq ($(findstring -mls,$(TYPE)),)
  10.100 +	override M4PARAM += -D enable_mls
  10.101 +	CHECKPOLICY += -M
  10.102 +	CHECKMODULE += -M
  10.103 +endif
  10.104 +
  10.105 +# enable MLS if MCS requested.
  10.106 +ifneq ($(findstring -mcs,$(TYPE)),)
  10.107 +	override M4PARAM += -D enable_mcs
  10.108 +	CHECKPOLICY += -M
  10.109 +	CHECKMODULE += -M
  10.110 +endif
  10.111 +
  10.112 +# compile targeted policy if requested.
  10.113 +ifneq ($(findstring targeted,$(TYPE)),)
  10.114 +	override M4PARAM += -D targeted_policy
  10.115 +endif
  10.116 +
  10.117 +# enable distribution-specific policy
  10.118 +ifneq ($(DISTRO),)
  10.119 +	override M4PARAM += -D distro_$(DISTRO)
  10.120 +endif
  10.121 +
  10.122 +ifneq ($(OUTPUT_POLICY),)
  10.123 +	CHECKPOLICY += -c $(OUTPUT_POLICY)
  10.124 +endif
  10.125 +
  10.126 +ifeq ($(NAME),)
  10.127 +	NAME := $(TYPE)
  10.128 +endif
  10.129 +
  10.130 +# determine the policy version and current kernel version if possible
  10.131 +PV := $(shell $(CHECKPOLICY) -V |cut -f 1 -d ' ')
  10.132 +KV := $(shell cat /selinux/policyvers)
  10.133 +
  10.134 +# dont print version warnings if we are unable to determine
  10.135 +# the currently running kernel's policy version
  10.136 +ifeq ($(KV),)
  10.137 +	KV := $(PV)
  10.138 +endif
  10.139 +
  10.140 +FC := file_contexts
  10.141 +POLVER := policy.$(PV)
  10.142 +
  10.143 +M4SUPPORT = $(wildcard $(POLDIR)/support/*.spt)
  10.144 +
  10.145 +APPCONF := config/appconfig-$(TYPE)
  10.146 +APPDIR := $(CONTEXTPATH)
  10.147 +APPFILES := $(INSTALLDIR)/booleans
  10.148 +CONTEXTFILES += $(wildcard $(APPCONF)/*_context*) $(APPCONF)/media
  10.149 +USER_FILES := $(POLDIR)/systemuser $(POLDIR)/users
  10.150 +
  10.151 +ALL_LAYERS := $(filter-out $(MODDIR)/CVS,$(shell find $(wildcard $(MODDIR)/*) -maxdepth 0 -type d))
  10.152 +
  10.153 +GENERATED_TE := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard $(dir)/*.te.in)))
  10.154 +GENERATED_IF := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard $(dir)/*.if.in)))
  10.155 +GENERATED_FC := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard $(dir)/*.fc.in)))
  10.156 +
  10.157 +# sort here since it removes duplicates, which can happen
  10.158 +# when a generated file is already generated
  10.159 +DETECTED_MODS := $(sort $(foreach dir,$(ALL_LAYERS),$(wildcard $(dir)/*.te)) $(GENERATED_TE))
  10.160 +
  10.161 +# modules.conf setting for base module
  10.162 +MODBASE := base
  10.163 +
  10.164 +# modules.conf setting for module
  10.165 +MODMOD := module
  10.166 +
  10.167 +# extract settings from modules.conf
  10.168 +BASE_MODS := $(foreach mod,$(shell awk '/^[[:blank:]]*[[:alpha:]]/{ if ($$3 == "$(MODBASE)") print $$1 }' $(MOD_CONF) 2> /dev/null),$(subst ./,,$(shell find -iname $(mod).te)))
  10.169 +MOD_MODS := $(foreach mod,$(shell awk '/^[[:blank:]]*[[:alpha:]]/{ if ($$3 == "$(MODMOD)") print $$1 }' $(MOD_CONF) 2> /dev/null),$(subst ./,,$(shell find -iname $(mod).te)))
  10.170 +
  10.171 +HOMEDIR_TEMPLATE = tmp/homedir_template
  10.172 +
  10.173 +########################################
  10.174 +#
  10.175 +# Load appropriate rules
  10.176 +#
  10.177 +
  10.178 +ifeq ($(MONOLITHIC),y)
  10.179 +	include Rules.monolithic
  10.180 +else
  10.181 +	include Rules.modular
  10.182 +endif
  10.183 +
  10.184 +########################################
  10.185 +#
  10.186 +# Create config files
  10.187 +#
  10.188 +conf: $(MOD_CONF) $(BOOLEANS) $(GENERATED_TE) $(GENERATED_IF) $(GENERATED_FC)
  10.189 +
  10.190 +$(MOD_CONF) $(BOOLEANS): $(POLXML)
  10.191 +	@echo "Updating $(MOD_CONF) and $(BOOLEANS)"
  10.192 +	$(QUIET) cd $(DOCS) && ../$(GENDOC) -t ../$(BOOLEANS) -m ../$(MOD_CONF) -x ../$(POLXML)
  10.193 +
  10.194 +########################################
  10.195 +#
  10.196 +# Appconfig files
  10.197 +#
  10.198 +install-appconfig: $(APPFILES)
  10.199 +
  10.200 +$(INSTALLDIR)/booleans: $(BOOLEANS)
  10.201 +	@mkdir -p $(INSTALLDIR)
  10.202 +	$(QUIET) egrep '^[[:blank:]]*[[:alpha:]]' $(BOOLEANS) \
  10.203 +		| sed -e 's/false/0/g' -e 's/true/1/g' > tmp/booleans
  10.204 +	$(QUIET) install -m 644 tmp/booleans $@
  10.205 +
  10.206 +########################################
  10.207 +#
  10.208 +# Install policy sources
  10.209 +#
  10.210 +install-src:
  10.211 +	rm -rf $(SRCPATH)/policy.old
  10.212 +	-mv $(SRCPATH)/policy $(SRCPATH)/policy.old
  10.213 +	mkdir -p $(SRCPATH)/policy
  10.214 +	cp -R . $(SRCPATH)/policy
  10.215 +
  10.216 +########################################
  10.217 +#
  10.218 +# Clean everything
  10.219 +#
  10.220 +bare: clean
  10.221 +	rm -f $(POLXML)
  10.222 +	rm -f $(SUPPORT)/*.pyc
  10.223 +	rm -f $(FCSORT)
  10.224 +	rm -f $(MOD_CONF)
  10.225 +	rm -f $(BOOLEANS)
  10.226 +	rm -fR $(HTMLDIR)
  10.227 +ifneq ($(GENERATED_TE),)
  10.228 +	rm -f $(GENERATED_TE)
  10.229 +endif
  10.230 +ifneq ($(GENERATED_IF),)
  10.231 +	rm -f $(GENERATED_IF)
  10.232 +endif
  10.233 +ifneq ($(GENERATED_FC),)
  10.234 +	rm -f $(GENERATED_FC)
  10.235 +endif
  10.236 +
  10.237 +.PHONY: install-src install-appconfig conf html bare
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/tools/flask/policy/Rules.modular	Fri Sep 12 14:47:40 2008 +0900
    11.3 @@ -0,0 +1,166 @@
    11.4 +########################################
    11.5 +#
    11.6 +# Rules and Targets for building modular policies
    11.7 +#
    11.8 +
    11.9 +ALL_MODULES := $(filter $(BASE_MODS) $(MOD_MODS),$(DETECTED_MODS))
   11.10 +ALL_INTERFACES := $(ALL_MODULES:.te=.if)
   11.11 +
   11.12 +BASE_PKG := base.pp
   11.13 +BASE_FC := base.fc
   11.14 +
   11.15 +BASE_SECTIONS := tmp/pre_te_files.conf tmp/generated_definitions.conf tmp/all_interfaces.conf tmp/all_attrs_types.conf $(GLOBALBOOL) $(GLOBALTUN) tmp/only_te_rules.conf tmp/all_post.conf
   11.16 +
   11.17 +BASE_PRE_TE_FILES := $(SECCLASS) $(ISIDS) $(AVS) $(M4SUPPORT) $(POLDIR)/mls $(POLDIR)/mcs
   11.18 +BASE_TE_FILES := $(BASE_MODS)
   11.19 +BASE_POST_TE_FILES := $(POLDIR)/systemuser $(POLDIR)/constraints
   11.20 +BASE_FC_FILES := $(BASE_MODS:.te=.fc)
   11.21 +
   11.22 +MOD_MODULES := $(MOD_MODS:.te=.mod)
   11.23 +MOD_PKGS := $(notdir $(MOD_MODS:.te=.pp))
   11.24 +
   11.25 +# search layer dirs for source files
   11.26 +vpath %.te $(ALL_LAYERS)
   11.27 +vpath %.if $(ALL_LAYERS)
   11.28 +vpath %.fc $(ALL_LAYERS)
   11.29 +
   11.30 +########################################
   11.31 +#
   11.32 +# default action: create all module packages
   11.33 +#
   11.34 +default: base
   11.35 +
   11.36 +base: $(BASE_PKG)
   11.37 +
   11.38 +modules: $(MOD_PKGS)
   11.39 +
   11.40 +#policy: $(POLVER)
   11.41 +#install: $(LOADPATH) $(FCPATH) $(APPFILES) $(USERPATH)/local.users
   11.42 +#load: tmp/load
   11.43 +
   11.44 +########################################
   11.45 +#
   11.46 +# Create a base module package
   11.47 +#
   11.48 +$(BASE_PKG): tmp/base.mod $(BASE_FC)
   11.49 +	@echo "Creating $(NAME) base module package"
   11.50 +	$(QUIET) $(SEMOD_PKG) $@ $^
   11.51 +
   11.52 +########################################
   11.53 +#
   11.54 +# Compile a base module
   11.55 +#
   11.56 +tmp/base.mod: base.conf
   11.57 +	@echo "Compiling $(NAME) base module"
   11.58 +	$(QUIET) $(CHECKMODULE) $^ -o $@
   11.59 +
   11.60 +########################################
   11.61 +#
   11.62 +# Construct a base module policy.conf
   11.63 +#
   11.64 +base.conf: $(BASE_SECTIONS)
   11.65 +	@echo "Creating $(NAME) base module policy.conf"
   11.66 +# checkpolicy can use the #line directives provided by -s for error reporting:
   11.67 +	$(QUIET) m4 -D self_contained_policy $(M4PARAM) -s $^ > tmp/$@.tmp
   11.68 +	$(QUIET) sed -e /^portcon/d -e /^nodecon/d -e /^netifcon/d < tmp/$@.tmp > $@
   11.69 +# the ordering of these ocontexts matters:
   11.70 +	$(QUIET) grep ^portcon tmp/$@.tmp >> $@ || true
   11.71 +	$(QUIET) grep ^netifcon tmp/$@.tmp >> $@ || true
   11.72 +	$(QUIET) grep ^nodecon tmp/$@.tmp >> $@ || true
   11.73 +
   11.74 +tmp/pre_te_files.conf: $(BASE_PRE_TE_FILES)
   11.75 +	@test -d tmp || mkdir -p tmp
   11.76 +	$(QUIET) cat $^ > $@
   11.77 +
   11.78 +tmp/generated_definitions.conf: $(ALL_LAYERS) $(BASE_TE_FILES)
   11.79 +	@test -d tmp || mkdir -p tmp
   11.80 +# define all available object classes
   11.81 +	$(QUIET) $(GENPERM) $(AVS) $(SECCLASS) > $@
   11.82 +# per-userdomain templates
   11.83 +	$(QUIET) echo "define(\`per_userdomain_templates',\`" >> $@
   11.84 +	$(QUIET) for i in $(patsubst %.te,%,$(notdir $(ALL_MODULES))); do \
   11.85 +		echo "ifdef(\`""$$i""_per_userdomain_template',\`""$$i""_per_userdomain_template("'$$*'")')" \
   11.86 +			>> $@ ;\
   11.87 +	done
   11.88 +	$(QUIET) echo "')" >> $@
   11.89 +# define foo.te
   11.90 +	$(QUIET) for i in $(notdir $(BASE_TE_FILES)); do \
   11.91 +		echo "define(\`$$i')" >> $@ ;\
   11.92 +	done
   11.93 +	$(QUIET) $(SETTUN) $(BOOLEANS) >> $@
   11.94 +
   11.95 +tmp/all_interfaces.conf: $(M4SUPPORT) $(ALL_INTERFACES)
   11.96 +ifeq ($(ALL_INTERFACES),)
   11.97 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
   11.98 +endif
   11.99 +	@test -d tmp || mkdir -p tmp
  11.100 +	$(QUIET) m4 $^ | sed -e s/dollarsstar/\$$\*/g > $@
  11.101 +
  11.102 +tmp/all_te_files.conf: $(BASE_TE_FILES)
  11.103 +ifeq ($(BASE_TE_FILES),)
  11.104 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
  11.105 +endif
  11.106 +	@test -d tmp || mkdir -p tmp
  11.107 +	$(QUIET) cat $^ > $@
  11.108 +
  11.109 +tmp/post_te_files.conf: $(BASE_POST_TE_FILES)
  11.110 +	@test -d tmp || mkdir -p tmp
  11.111 +	$(QUIET) cat $^ > $@
  11.112 +
  11.113 +# extract attributes and put them first. extract post te stuff
  11.114 +# like genfscon and put last.  portcon, nodecon, and netifcon
  11.115 +# is delayed since they are generated by m4
  11.116 +tmp/all_attrs_types.conf tmp/only_te_rules.conf tmp/all_post.conf: tmp/all_te_files.conf tmp/post_te_files.conf
  11.117 +	$(QUIET) grep ^attribute tmp/all_te_files.conf > tmp/all_attrs_types.conf || true
  11.118 +	$(QUIET) grep '^type ' tmp/all_te_files.conf >> tmp/all_attrs_types.conf
  11.119 +	$(QUIET) cat tmp/post_te_files.conf > tmp/all_post.conf
  11.120 +	$(QUIET) grep '^sid ' tmp/all_te_files.conf >> tmp/all_post.conf || true
  11.121 +	$(QUIET) egrep '^fs_use_(xattr|task|trans)' tmp/all_te_files.conf >> tmp/all_post.conf || true
  11.122 +	$(QUIET) grep ^genfscon tmp/all_te_files.conf >> tmp/all_post.conf || true
  11.123 +	$(QUIET) sed -r -e /^attribute/d -e '/^type /d' -e /^genfscon/d \
  11.124 +			-e '/^sid /d' -e '/^fs_use_(xattr|task|trans)/d' \
  11.125 +			< tmp/all_te_files.conf > tmp/only_te_rules.conf
  11.126 +
  11.127 +########################################
  11.128 +#
  11.129 +# Construct base module file contexts
  11.130 +#
  11.131 +$(BASE_FC): $(M4SUPPORT) tmp/generated_definitions.conf $(BASE_FC_FILES) $(FCSORT)
  11.132 +ifeq ($(BASE_FC_FILES),)
  11.133 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
  11.134 +endif
  11.135 +	@echo "Creating $(NAME) base module file contexts."
  11.136 +	@test -d tmp || mkdir -p tmp
  11.137 +	$(QUIET) m4 $(M4PARAM) $(M4SUPPORT) tmp/generated_definitions.conf $(BASE_FC_FILES) > tmp/$@.tmp
  11.138 +	$(QUIET) grep -e HOME -e ROLE tmp/$@.tmp > $(HOMEDIR_TEMPLATE)
  11.139 +	$(QUIET) sed -i -e /HOME/d -e /ROLE/d tmp/$@.tmp
  11.140 +	$(QUIET) $(FCSORT) tmp/$@.tmp $@
  11.141 +
  11.142 +########################################
  11.143 +#
  11.144 +# Build module packages
  11.145 +#
  11.146 +tmp/%.mod: $(M4SUPPORT) tmp/generated_definitions.conf tmp/all_interfaces.conf %.te
  11.147 +	@if test -z "$(filter $^,$(MOD_MODS))"; then \
  11.148 +		echo "The $(notdir $(basename $@)) module is not configured to be compiled as a lodable module." ;\
  11.149 +		false ;\
  11.150 +	fi
  11.151 +	@echo "Compliling $(NAME) $(@F) module"
  11.152 +	$(QUIET) m4 $(M4PARAM) -s $^ > $(@:.mod=.tmp)
  11.153 +	$(QUIET) $(CHECKMODULE) -m $(@:.mod=.tmp) -o $@
  11.154 +
  11.155 +%.pp: tmp/%.mod %.fc
  11.156 +	@echo "Creating $(NAME) $(@F) policy package"
  11.157 +	$(QUIET) $(SEMOD_PKG) $@ $^
  11.158 +
  11.159 +########################################
  11.160 +#
  11.161 +# Clean the sources
  11.162 +#
  11.163 +clean:
  11.164 +	rm -fR tmp
  11.165 +	rm -f base.conf
  11.166 +	rm -f *.pp
  11.167 +	rm -f $(BASE_FC)
  11.168 +
  11.169 +.PHONY: default base modules clean
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/tools/flask/policy/Rules.monolithic	Fri Sep 12 14:47:40 2008 +0900
    12.3 @@ -0,0 +1,196 @@
    12.4 +########################################
    12.5 +#
    12.6 +# Rules and Targets for building monolithic policies
    12.7 +#
    12.8 +
    12.9 +# install paths
   12.10 +POLICYPATH = $(INSTALLDIR)/policy
   12.11 +LOADPATH = $(POLICYPATH)/$(POLVER)
   12.12 +FCPATH = $(CONTEXTPATH)/files/file_contexts
   12.13 +HOMEDIRPATH = $(CONTEXTPATH)/files/homedir_template
   12.14 +
   12.15 +# for monolithic policy use all base and module to create policy
   12.16 +ENABLEMOD := $(BASE_MODS) $(MOD_MODS)
   12.17 +
   12.18 +ALL_MODULES := $(filter $(ENABLEMOD),$(DETECTED_MODS))
   12.19 +
   12.20 +ALL_INTERFACES := $(ALL_MODULES:.te=.if)
   12.21 +ALL_TE_FILES := $(ALL_MODULES)
   12.22 +ALL_FC_FILES := $(ALL_MODULES:.te=.fc)
   12.23 +
   12.24 +PRE_TE_FILES := $(SECCLASS) $(ISIDS) $(AVS) $(M4SUPPORT) $(POLDIR)/mls $(POLDIR)/mcs
   12.25 +POST_TE_FILES := $(POLDIR)/systemuser $(POLDIR)/users $(POLDIR)/constraints
   12.26 +
   12.27 +POLICY_SECTIONS := tmp/pre_te_files.conf tmp/generated_definitions.conf tmp/all_interfaces.conf tmp/all_attrs_types.conf $(GLOBALBOOL) $(GLOBALTUN) tmp/only_te_rules.conf tmp/all_post.conf
   12.28 +
   12.29 +########################################
   12.30 +#
   12.31 +# default action: build policy locally
   12.32 +#
   12.33 +default: policy
   12.34 +
   12.35 +policy: $(POLVER)
   12.36 +
   12.37 +install: $(LOADPATH) $(FCPATH) $(APPFILES) $(USERPATH)/local.users
   12.38 +
   12.39 +load: tmp/load
   12.40 +
   12.41 +########################################
   12.42 +#
   12.43 +# Build a binary policy locally
   12.44 +#
   12.45 +$(POLVER): policy.conf
   12.46 +	@echo "Compiling $(NAME) $(POLVER)"
   12.47 +ifneq ($(PV),$(KV))
   12.48 +	@echo
   12.49 +	@echo "WARNING: Policy version mismatch!  Is your OUTPUT_POLICY set correctly?"
   12.50 +	@echo
   12.51 +endif
   12.52 +	$(QUIET) $(CHECKPOLICY) $^ -o $@
   12.53 +
   12.54 +########################################
   12.55 +#
   12.56 +# Install a binary policy
   12.57 +#
   12.58 +$(LOADPATH): policy.conf
   12.59 +	@mkdir -p $(POLICYPATH)
   12.60 +	@echo "Compiling and installing $(NAME) $(LOADPATH)"
   12.61 +ifneq ($(PV),$(KV))
   12.62 +	@echo
   12.63 +	@echo "WARNING: Policy version mismatch!  Is your OUTPUT_POLICY set correctly?"
   12.64 +	@echo
   12.65 +endif
   12.66 +	$(QUIET) $(CHECKPOLICY) $^ -o $@
   12.67 +
   12.68 +########################################
   12.69 +#
   12.70 +# Load the binary policy
   12.71 +#
   12.72 +reload tmp/load: $(LOADPATH) $(FCPATH)
   12.73 +	@echo "Loading $(NAME) $(LOADPATH)"
   12.74 +	$(QUIET) $(LOADPOLICY) -q $(LOADPATH)
   12.75 +	@touch tmp/load
   12.76 +
   12.77 +########################################
   12.78 +#
   12.79 +# Construct a monolithic policy.conf
   12.80 +#
   12.81 +policy.conf: $(POLICY_SECTIONS)
   12.82 +	@echo "Creating $(NAME) policy.conf"
   12.83 +# checkpolicy can use the #line directives provided by -s for error reporting:
   12.84 +	$(QUIET) m4 -D self_contained_policy $(M4PARAM) -s $^ > tmp/$@.tmp
   12.85 +	$(QUIET) sed -e /^portcon/d -e /^nodecon/d -e /^netifcon/d < tmp/$@.tmp > $@
   12.86 +
   12.87 +tmp/pre_te_files.conf: $(PRE_TE_FILES)
   12.88 +	@test -d tmp || mkdir -p tmp
   12.89 +	$(QUIET) cat $^ > $@
   12.90 +
   12.91 +tmp/generated_definitions.conf: $(ALL_LAYERS) $(ALL_TE_FILES)
   12.92 +# per-userdomain templates:
   12.93 +	@test -d tmp || mkdir -p tmp
   12.94 +	$(QUIET) echo "define(\`per_userdomain_templates',\`" > $@
   12.95 +	$(QUIET) for i in $(patsubst %.te,%,$(notdir $(ALL_MODULES))); do \
   12.96 +		echo "ifdef(\`""$$i""_per_userdomain_template',\`""$$i""_per_userdomain_template("'$$*'")')" \
   12.97 +			>> $@ ;\
   12.98 +	done
   12.99 +	$(QUIET) echo "')" >> $@
  12.100 +# define foo.te
  12.101 +	$(QUIET) for i in $(notdir $(ALL_MODULES)); do \
  12.102 +		echo "define(\`$$i')" >> $@ ;\
  12.103 +	done
  12.104 +#	$(QUIET) $(SETTUN) $(BOOLEANS) >> $@
  12.105 +
  12.106 +tmp/all_interfaces.conf: $(M4SUPPORT) $(ALL_INTERFACES)
  12.107 +ifeq ($(ALL_INTERFACES),)
  12.108 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
  12.109 +endif
  12.110 +	@test -d tmp || mkdir -p tmp
  12.111 +	$(QUIET) m4 $^ | sed -e s/dollarsstar/\$$\*/g > $@
  12.112 +
  12.113 +tmp/all_te_files.conf: $(ALL_TE_FILES)
  12.114 +ifeq ($(ALL_TE_FILES),)
  12.115 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
  12.116 +endif
  12.117 +	@test -d tmp || mkdir -p tmp
  12.118 +	$(QUIET) cat $^ > $@
  12.119 +
  12.120 +tmp/post_te_files.conf: $(POST_TE_FILES)
  12.121 +	@test -d tmp || mkdir -p tmp
  12.122 +	$(QUIET) cat $^ > $@
  12.123 +
  12.124 +# extract attributes and put them first. extract post te stuff
  12.125 +# like genfscon and put last.  portcon, nodecon, and netifcon
  12.126 +# is delayed since they are generated by m4
  12.127 +tmp/all_attrs_types.conf tmp/only_te_rules.conf tmp/all_post.conf: tmp/all_te_files.conf tmp/post_te_files.conf
  12.128 +	$(QUIET) grep ^attribute tmp/all_te_files.conf > tmp/all_attrs_types.conf || true
  12.129 +	$(QUIET) grep '^type ' tmp/all_te_files.conf >> tmp/all_attrs_types.conf
  12.130 +	$(QUIET) cat tmp/post_te_files.conf > tmp/all_post.conf
  12.131 +	$(QUIET) grep '^sid ' tmp/all_te_files.conf >> tmp/all_post.conf || true
  12.132 +	$(QUIET) egrep '^fs_use_(xattr|task|trans)' tmp/all_te_files.conf >> tmp/all_post.conf || true
  12.133 +	$(QUIET) grep ^genfscon tmp/all_te_files.conf >> tmp/all_post.conf || true
  12.134 +	$(QUIET) sed -r -e /^attribute/d -e '/^type /d' -e /^genfscon/d \
  12.135 +			-e '/^sid /d' -e '/^fs_use_(xattr|task|trans)/d' \
  12.136 +			< tmp/all_te_files.conf > tmp/only_te_rules.conf
  12.137 +
  12.138 +########################################
  12.139 +#
  12.140 +# Remove the dontaudit rules from the policy.conf
  12.141 +#
  12.142 +enableaudit: policy.conf
  12.143 +	@test -d tmp || mkdir -p tmp
  12.144 +	@echo "Removing dontaudit rules from policy.conf"
  12.145 +	$(QUIET) grep -v dontaudit policy.conf > tmp/policy.audit
  12.146 +	$(QUIET) mv tmp/policy.audit policy.conf
  12.147 +
  12.148 +########################################
  12.149 +#
  12.150 +# Construct file_contexts
  12.151 +#
  12.152 +$(FC): $(M4SUPPORT) tmp/generated_definitions.conf $(ALL_FC_FILES)
  12.153 +ifeq ($(ALL_FC_FILES),)
  12.154 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
  12.155 +endif
  12.156 +	@echo "Creating $(NAME) file_contexts."
  12.157 +	@test -d tmp || mkdir -p tmp
  12.158 +	$(QUIET) m4 $(M4PARAM) $(M4SUPPORT) tmp/generated_definitions.conf $(ALL_FC_FILES) > tmp/$@.tmp
  12.159 +#	$(QUIET) grep -e HOME -e ROLE tmp/$@.tmp > $(HOMEDIR_TEMPLATE)
  12.160 +#	$(QUIET) sed -i -e /HOME/d -e /ROLE/d tmp/$@.tmp
  12.161 +#	$(QUIET) $(FCSORT) tmp/$@.tmp $@
  12.162 +	$(QUIET) touch $(HOMEDIR_TEMPLATE)
  12.163 +	$(QUIET) touch $@
  12.164 +
  12.165 +########################################
  12.166 +#
  12.167 +# Install file_contexts
  12.168 +#
  12.169 +$(FCPATH): $(FC) $(LOADPATH) $(USERPATH)/system.users
  12.170 +	@echo "Validating $(NAME) file_contexts."
  12.171 +#	$(QUIET) $(SETFILES) -q -c $(LOADPATH) $(FC)
  12.172 +	@echo "Installing file_contexts."
  12.173 +	@mkdir -p $(CONTEXTPATH)/files
  12.174 +	$(QUIET) install -m 644 $(FC) $(FCPATH)
  12.175 +	$(QUIET) install -m 644 $(HOMEDIR_TEMPLATE) $(HOMEDIRPATH)
  12.176 +#	$(QUIET) $(GENHOMEDIRCON) -d $(TOPDIR) -t $(NAME) $(USEPWD)
  12.177 +
  12.178 +########################################
  12.179 +#
  12.180 +# Run policy source checks
  12.181 +#
  12.182 +check: policy.conf $(FC)
  12.183 +	$(SECHECK) -s --profile=development --policy=policy.conf --fcfile=$(FC) > $@.res
  12.184 +
  12.185 +longcheck: policy.conf $(FC)
  12.186 +	$(SECHECK) -s --profile=all --policy=policy.conf --fcfile=$(FC) > $@.res
  12.187 +
  12.188 +########################################
  12.189 +#
  12.190 +# Clean the sources
  12.191 +#
  12.192 +clean:
  12.193 +	rm -fR tmp
  12.194 +	rm -f policy.conf
  12.195 +	rm -f policy.$(PV)
  12.196 +	rm -f $(FC)
  12.197 +	rm -f *.res
  12.198 +
  12.199 +.PHONY: default policy install load reload enableaudit checklabels restorelabels relabel check longcheck clean
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/tools/flask/policy/policy/constraints	Fri Sep 12 14:47:40 2008 +0900
    13.3 @@ -0,0 +1,27 @@
    13.4 +
    13.5 +#
    13.6 +# Define the constraints
    13.7 +#
    13.8 +# constrain class_set perm_set expression ;
    13.9 +#
   13.10 +# expression : ( expression ) 
   13.11 +#	     | not expression
   13.12 +#	     | expression and expression
   13.13 +#	     | expression or expression
   13.14 +#	     | u1 op u2
   13.15 +#	     | r1 role_op r2
   13.16 +#	     | t1 op t2
   13.17 +#	     | u1 op names
   13.18 +#	     | u2 op names
   13.19 +#	     | r1 op names
   13.20 +#	     | r2 op names
   13.21 +#	     | t1 op names
   13.22 +#	     | t2 op names
   13.23 +#
   13.24 +# op : == | != 
   13.25 +# role_op : == | != | eq | dom | domby | incomp
   13.26 +#
   13.27 +# names : name | { name_list }
   13.28 +# name_list : name | name_list name		
   13.29 +#
   13.30 +
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/tools/flask/policy/policy/flask/Makefile	Fri Sep 12 14:47:40 2008 +0900
    14.3 @@ -0,0 +1,41 @@
    14.4 +# flask needs to know where to export the libselinux headers.
    14.5 +LIBSEL ?= ../../libselinux
    14.6 +
    14.7 +# flask needs to know where to export the kernel headers.
    14.8 +LINUXDIR ?= ../../../linux-2.6
    14.9 +
   14.10 +AWK = awk
   14.11 +
   14.12 +CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
   14.13 +          else if [ -x /bin/bash ]; then echo /bin/bash; \
   14.14 +          else echo sh; fi ; fi)
   14.15 +
   14.16 +FLASK_H_DEPEND = security_classes initial_sids
   14.17 +AV_H_DEPEND = access_vectors
   14.18 +
   14.19 +FLASK_H_FILES = class_to_string.h flask.h initial_sid_to_string.h
   14.20 +AV_H_FILES = av_inherit.h common_perm_to_string.h av_perm_to_string.h av_permissions.h
   14.21 +ALL_H_FILES = $(FLASK_H_FILES) $(AV_H_FILES)
   14.22 +
   14.23 +all:  $(ALL_H_FILES)
   14.24 +
   14.25 +$(FLASK_H_FILES): $(FLASK_H_DEPEND)
   14.26 +	$(CONFIG_SHELL) mkflask.sh $(AWK) $(FLASK_H_DEPEND)
   14.27 +
   14.28 +$(AV_H_FILES): $(AV_H_DEPEND)
   14.29 +	$(CONFIG_SHELL) mkaccess_vector.sh $(AWK) $(AV_H_DEPEND)
   14.30 +
   14.31 +tolib: all
   14.32 +	install -m 644 flask.h av_permissions.h $(LIBSEL)/include/selinux
   14.33 +	install -m 644 class_to_string.h av_inherit.h common_perm_to_string.h av_perm_to_string.h $(LIBSEL)/src
   14.34 +
   14.35 +tokern: all
   14.36 +	install -m 644 $(ALL_H_FILES) $(LINUXDIR)/security/selinux/include
   14.37 +
   14.38 +install: all
   14.39 +
   14.40 +relabel:
   14.41 +
   14.42 +clean:  
   14.43 +	rm -f $(FLASK_H_FILES)
   14.44 +	rm -f $(AV_H_FILES)
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/tools/flask/policy/policy/flask/access_vectors	Fri Sep 12 14:47:40 2008 +0900
    15.3 @@ -0,0 +1,166 @@
    15.4 +#
    15.5 +# Define common prefixes for access vectors
    15.6 +#
    15.7 +# common common_name { permission_name ... }
    15.8 +
    15.9 +#
   15.10 +# Define a common prefix for file access vectors.
   15.11 +#
   15.12 +
   15.13 +
   15.14 +#
   15.15 +# Define the access vectors.
   15.16 +#
   15.17 +# class class_name [ inherits common_name ] { permission_name ... }
   15.18 +
   15.19 +
   15.20 +#
   15.21 +# Define the access vector interpretation for file-related objects.
   15.22 +#
   15.23 +
   15.24 +class xen
   15.25 +{
   15.26 +	scheduler
   15.27 +	settime
   15.28 +	tbufcontrol
   15.29 +	readconsole
   15.30 +	clearconsole
   15.31 +	perfcontrol
   15.32 +	mtrr_add
   15.33 +	mtrr_del
   15.34 +	mtrr_read
   15.35 +	microcode
   15.36 +	physinfo
   15.37 +	quirk
   15.38 +    writeconsole
   15.39 +    readapic
   15.40 +    writeapic
   15.41 +    privprofile
   15.42 +    nonprivprofile
   15.43 +    kexec
   15.44 +	firmware
   15.45 +	sleep
   15.46 +	frequency
   15.47 +	getidle
   15.48 +	debug
   15.49 +	getcpuinfo
   15.50 +	heap
   15.51 +}
   15.52 +
   15.53 +class domain
   15.54 +{
   15.55 +	setvcpucontext
   15.56 +	pause
   15.57 +	unpause
   15.58 +    resume
   15.59 +    create
   15.60 +    transition
   15.61 +    max_vcpus
   15.62 +    destroy
   15.63 +    setvcpuaffinity
   15.64 +	getvcpuaffinity
   15.65 +	scheduler
   15.66 +	getdomaininfo
   15.67 +	getvcpuinfo
   15.68 +	getvcpucontext
   15.69 +	setdomainmaxmem
   15.70 +	setdomainhandle
   15.71 +	setdebugging
   15.72 +	hypercall
   15.73 +    settime
   15.74 +    set_target
   15.75 +    shutdown
   15.76 +    setaddrsize
   15.77 +    getaddrsize
   15.78 +	trigger
   15.79 +	getextvcpucontext
   15.80 +	setextvcpucontext
   15.81 +}
   15.82 +
   15.83 +class hvm
   15.84 +{
   15.85 +    sethvmc
   15.86 +    gethvmc
   15.87 +    setparam
   15.88 +    getparam
   15.89 +    pcilevel
   15.90 +    irqlevel
   15.91 +    pciroute
   15.92 +	bind_irq
   15.93 +	cacheattr
   15.94 +}
   15.95 +
   15.96 +class event
   15.97 +{
   15.98 +	bind
   15.99 +	send
  15.100 +	status
  15.101 +	notify
  15.102 +	create
  15.103 +    vector
  15.104 +    reset
  15.105 +}
  15.106 +
  15.107 +class grant
  15.108 +{
  15.109 +	map_read
  15.110 +	map_write
  15.111 +	unmap
  15.112 +	transfer
  15.113 +	setup
  15.114 +    copy
  15.115 +    query
  15.116 +}
  15.117 +
  15.118 +class mmu
  15.119 +{
  15.120 +	map_read
  15.121 +	map_write
  15.122 +	pageinfo
  15.123 +	pagelist
  15.124 +    adjust
  15.125 +    stat
  15.126 +    translategp
  15.127 +	updatemp
  15.128 +    physmap
  15.129 +    pinpage
  15.130 +    mfnlist
  15.131 +    memorymap
  15.132 +}
  15.133 +
  15.134 +class shadow
  15.135 +{
  15.136 +	disable
  15.137 +	enable
  15.138 +    logdirty
  15.139 +}
  15.140 +
  15.141 +class resource
  15.142 +{
  15.143 +	add
  15.144 +	remove
  15.145 +	use
  15.146 +	add_irq
  15.147 +	remove_irq
  15.148 +	add_ioport
  15.149 +	remove_ioport
  15.150 +	add_iomem
  15.151 +	remove_iomem
  15.152 +	stat_device
  15.153 +	add_device
  15.154 +	remove_device
  15.155 +}
  15.156 +
  15.157 +class security
  15.158 +{
  15.159 +	compute_av
  15.160 +	compute_create
  15.161 +	compute_member
  15.162 +	check_context
  15.163 +	load_policy
  15.164 +	compute_relabel
  15.165 +	compute_user
  15.166 +	setenforce
  15.167 +	setbool
  15.168 +	setsecparam
  15.169 +}
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/tools/flask/policy/policy/flask/initial_sids	Fri Sep 12 14:47:40 2008 +0900
    16.3 @@ -0,0 +1,17 @@
    16.4 +# FLASK
    16.5 +
    16.6 +#
    16.7 +# Define initial security identifiers 
    16.8 +#
    16.9 +sid xen
   16.10 +sid dom0
   16.11 +sid domU
   16.12 +sid domio
   16.13 +sid domxen
   16.14 +sid unlabeled
   16.15 +sid security
   16.16 +sid ioport
   16.17 +sid iomem
   16.18 +sid pirq
   16.19 +sid device
   16.20 +# FLASK
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/tools/flask/policy/policy/flask/mkaccess_vector.sh	Fri Sep 12 14:47:40 2008 +0900
    17.3 @@ -0,0 +1,227 @@
    17.4 +#!/bin/sh -
    17.5 +#
    17.6 +
    17.7 +# FLASK
    17.8 +
    17.9 +set -e
   17.10 +
   17.11 +awk=$1
   17.12 +shift
   17.13 +
   17.14 +# output files
   17.15 +av_permissions="av_permissions.h"
   17.16 +av_inherit="av_inherit.h"
   17.17 +common_perm_to_string="common_perm_to_string.h"
   17.18 +av_perm_to_string="av_perm_to_string.h"
   17.19 +
   17.20 +cat $* | $awk "
   17.21 +BEGIN	{
   17.22 +		outfile = \"$av_permissions\"
   17.23 +		inheritfile = \"$av_inherit\"
   17.24 +		cpermfile = \"$common_perm_to_string\"
   17.25 +		avpermfile = \"$av_perm_to_string\"
   17.26 +		"'
   17.27 +		nextstate = "COMMON_OR_AV";
   17.28 +		printf("/* This file is automatically generated.  Do not edit. */\n") > outfile;
   17.29 +		printf("/* This file is automatically generated.  Do not edit. */\n") > inheritfile;
   17.30 +		printf("/* This file is automatically generated.  Do not edit. */\n") > cpermfile;
   17.31 +		printf("/* This file is automatically generated.  Do not edit. */\n") > avpermfile;
   17.32 +;
   17.33 +	}
   17.34 +/^[ \t]*#/	{ 
   17.35 +			next;
   17.36 +		}
   17.37 +$1 == "common"	{ 
   17.38 +			if (nextstate != "COMMON_OR_AV")
   17.39 +			{
   17.40 +				printf("Parse error:  Unexpected COMMON definition on line %d\n", NR);
   17.41 +				next;	
   17.42 +			}
   17.43 +
   17.44 +			if ($2 in common_defined)
   17.45 +			{
   17.46 +				printf("Duplicate COMMON definition for %s on line %d.\n", $2, NR);
   17.47 +				next;
   17.48 +			}	
   17.49 +			common_defined[$2] = 1;
   17.50 +
   17.51 +			tclass = $2;
   17.52 +			common_name = $2; 
   17.53 +			permission = 1;
   17.54 +
   17.55 +			printf("TB_(common_%s_perm_to_string)\n", $2) > cpermfile;
   17.56 +
   17.57 +			nextstate = "COMMON-OPENBRACKET";
   17.58 +			next;
   17.59 +		}
   17.60 +$1 == "class"	{
   17.61 +			if (nextstate != "COMMON_OR_AV" &&
   17.62 +			    nextstate != "CLASS_OR_CLASS-OPENBRACKET")
   17.63 +			{
   17.64 +				printf("Parse error:  Unexpected class definition on line %d\n", NR);
   17.65 +				next;	
   17.66 +			}
   17.67 +
   17.68 +			tclass = $2;
   17.69 +
   17.70 +			if (tclass in av_defined)
   17.71 +			{
   17.72 +				printf("Duplicate access vector definition for %s on line %d\n", tclass, NR);
   17.73 +				next;
   17.74 +			} 
   17.75 +			av_defined[tclass] = 1;
   17.76 +
   17.77 +			inherits = "";
   17.78 +			permission = 1;
   17.79 +
   17.80 +			nextstate = "INHERITS_OR_CLASS-OPENBRACKET";
   17.81 +			next;
   17.82 +		}
   17.83 +$1 == "inherits" {			
   17.84 +			if (nextstate != "INHERITS_OR_CLASS-OPENBRACKET")
   17.85 +			{
   17.86 +				printf("Parse error:  Unexpected INHERITS definition on line %d\n", NR);
   17.87 +				next;	
   17.88 +			}
   17.89 +
   17.90 +			if (!($2 in common_defined))
   17.91 +			{
   17.92 +				printf("COMMON %s is not defined (line %d).\n", $2, NR);
   17.93 +				next;
   17.94 +			}
   17.95 +
   17.96 +			inherits = $2;
   17.97 +			permission = common_base[$2];
   17.98 +
   17.99 +			for (combined in common_perms)
  17.100 +			{
  17.101 +				split(combined,separate, SUBSEP);
  17.102 +				if (separate[1] == inherits)
  17.103 +				{
  17.104 +					inherited_perms[common_perms[combined]] = separate[2];
  17.105 +				}
  17.106 +			}
  17.107 +
  17.108 +                        j = 1;
  17.109 +                        for (i in inherited_perms) {
  17.110 +                            ind[j] = i + 0;
  17.111 +                            j++;
  17.112 +                        }
  17.113 +                        n = asort(ind);
  17.114 +			for (i = 1; i <= n; i++) {
  17.115 +				perm = inherited_perms[ind[i]];
  17.116 +				printf("#define %s__%s", toupper(tclass), toupper(perm)) > outfile; 
  17.117 +				spaces = 40 - (length(perm) + length(tclass));
  17.118 +				if (spaces < 1)
  17.119 +				      spaces = 1;
  17.120 +				for (j = 0; j < spaces; j++) 
  17.121 +					printf(" ") > outfile; 
  17.122 +				printf("0x%08xUL\n", ind[i]) > outfile; 
  17.123 +			}
  17.124 +			printf("\n") > outfile;
  17.125 +                        for (i in ind) delete ind[i];
  17.126 +                        for (i in inherited_perms) delete inherited_perms[i];
  17.127 +
  17.128 +			printf("   S_(SECCLASS_%s, %s, 0x%08xUL)\n", toupper(tclass), inherits, permission) > inheritfile; 
  17.129 +
  17.130 +			nextstate = "CLASS_OR_CLASS-OPENBRACKET";
  17.131 +			next;
  17.132 +		}
  17.133 +$1 == "{"	{ 
  17.134 +			if (nextstate != "INHERITS_OR_CLASS-OPENBRACKET" &&
  17.135 +			    nextstate != "CLASS_OR_CLASS-OPENBRACKET" &&
  17.136 +			    nextstate != "COMMON-OPENBRACKET")
  17.137 +			{
  17.138 +				printf("Parse error:  Unexpected { on line %d\n", NR);
  17.139 +				next;
  17.140 +			}
  17.141 +
  17.142 +			if (nextstate == "INHERITS_OR_CLASS-OPENBRACKET")
  17.143 +				nextstate = "CLASS-CLOSEBRACKET";
  17.144 +
  17.145 +			if (nextstate == "CLASS_OR_CLASS-OPENBRACKET")
  17.146 +				nextstate = "CLASS-CLOSEBRACKET";
  17.147 +
  17.148 +			if (nextstate == "COMMON-OPENBRACKET")
  17.149 +				nextstate = "COMMON-CLOSEBRACKET";
  17.150 +		}
  17.151 +/[a-z][a-z_]*/	{
  17.152 +			if (nextstate != "COMMON-CLOSEBRACKET" &&
  17.153 +			    nextstate != "CLASS-CLOSEBRACKET")
  17.154 +			{
  17.155 +				printf("Parse error:  Unexpected symbol %s on line %d\n", $1, NR);		
  17.156 +				next;
  17.157 +			}
  17.158 +
  17.159 +			if (nextstate == "COMMON-CLOSEBRACKET")
  17.160 +			{
  17.161 +				if ((common_name,$1) in common_perms)
  17.162 +				{
  17.163 +					printf("Duplicate permission %s for common %s on line %d.\n", $1, common_name, NR);
  17.164 +					next;
  17.165 +				}
  17.166 +
  17.167 +				common_perms[common_name,$1] = permission;
  17.168 +
  17.169 +				printf("#define COMMON_%s__%s", toupper(common_name), toupper($1)) > outfile; 
  17.170 +
  17.171 +				printf("    S_(\"%s\")\n", $1) > cpermfile;
  17.172 +			}
  17.173 +			else
  17.174 +			{
  17.175 +				if ((tclass,$1) in av_perms)
  17.176 +				{
  17.177 +					printf("Duplicate permission %s for %s on line %d.\n", $1, tclass, NR);
  17.178 +					next;
  17.179 +				}
  17.180 +
  17.181 +				av_perms[tclass,$1] = permission;
  17.182 +		
  17.183 +				if (inherits != "")
  17.184 +				{
  17.185 +					if ((inherits,$1) in common_perms)
  17.186 +					{
  17.187 +						printf("Permission %s in %s on line %d conflicts with common permission.\n", $1, tclass, inherits, NR);
  17.188 +						next;
  17.189 +					}
  17.190 +				}
  17.191 +
  17.192 +				printf("#define %s__%s", toupper(tclass), toupper($1)) > outfile; 
  17.193 +
  17.194 +				printf("   S_(SECCLASS_%s, %s__%s, \"%s\")\n", toupper(tclass), toupper(tclass), toupper($1), $1) > avpermfile; 
  17.195 +			}
  17.196 +
  17.197 +			spaces = 40 - (length($1) + length(tclass));
  17.198 +			if (spaces < 1)
  17.199 +			      spaces = 1;
  17.200 +
  17.201 +			for (i = 0; i < spaces; i++) 
  17.202 +				printf(" ") > outfile; 
  17.203 +			printf("0x%08xUL\n", permission) > outfile; 
  17.204 +			permission = permission * 2;
  17.205 +		}
  17.206 +$1 == "}"	{
  17.207 +			if (nextstate != "CLASS-CLOSEBRACKET" && 
  17.208 +			    nextstate != "COMMON-CLOSEBRACKET")
  17.209 +			{
  17.210 +				printf("Parse error:  Unexpected } on line %d\n", NR);
  17.211 +				next;
  17.212 +			}
  17.213 +
  17.214 +			if (nextstate == "COMMON-CLOSEBRACKET")
  17.215 +			{
  17.216 +				common_base[common_name] = permission;
  17.217 +				printf("TE_(common_%s_perm_to_string)\n\n", common_name) > cpermfile; 
  17.218 +			}
  17.219 +
  17.220 +			printf("\n") > outfile;
  17.221 +
  17.222 +			nextstate = "COMMON_OR_AV";
  17.223 +		}
  17.224 +END	{
  17.225 +		if (nextstate != "COMMON_OR_AV" && nextstate != "CLASS_OR_CLASS-OPENBRACKET")
  17.226 +			printf("Parse error:  Unexpected end of file\n");
  17.227 +
  17.228 +	}'
  17.229 +
  17.230 +# FLASK
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/tools/flask/policy/policy/flask/mkflask.sh	Fri Sep 12 14:47:40 2008 +0900
    18.3 @@ -0,0 +1,95 @@
    18.4 +#!/bin/sh -
    18.5 +#
    18.6 +
    18.7 +# FLASK
    18.8 +
    18.9 +set -e
   18.10 +
   18.11 +awk=$1
   18.12 +shift 1
   18.13 +
   18.14 +# output file
   18.15 +output_file="flask.h"
   18.16 +debug_file="class_to_string.h"
   18.17 +debug_file2="initial_sid_to_string.h"
   18.18 +
   18.19 +cat $* | $awk "
   18.20 +BEGIN	{
   18.21 +		outfile = \"$output_file\"
   18.22 +		debugfile = \"$debug_file\"
   18.23 +		debugfile2 = \"$debug_file2\"
   18.24 +		"'
   18.25 +		nextstate = "CLASS";
   18.26 +
   18.27 +		printf("/* This file is automatically generated.  Do not edit. */\n") > outfile;
   18.28 +
   18.29 +		printf("#ifndef _SELINUX_FLASK_H_\n") > outfile;
   18.30 +		printf("#define _SELINUX_FLASK_H_\n") > outfile;
   18.31 +		printf("\n/*\n * Security object class definitions\n */\n") > outfile;
   18.32 +		printf("/* This file is automatically generated.  Do not edit. */\n") > debugfile;
   18.33 +		printf("/*\n * Security object class definitions\n */\n") > debugfile;
   18.34 +		printf("    S_(\"null\")\n") > debugfile;
   18.35 +		printf("/* This file is automatically generated.  Do not edit. */\n") > debugfile2;
   18.36 +		printf("static char *initial_sid_to_string[] =\n{\n") > debugfile2;
   18.37 +		printf("    \"null\",\n") > debugfile2;
   18.38 +	}
   18.39 +/^[ \t]*#/	{ 
   18.40 +			next;
   18.41 +		}
   18.42 +$1 == "class"	{ 
   18.43 +			if (nextstate != "CLASS")
   18.44 +			{
   18.45 +				printf("Parse error:  Unexpected class definition on line %d\n", NR);
   18.46 +				next;	
   18.47 +			}
   18.48 +
   18.49 +			if ($2 in class_found)
   18.50 +			{
   18.51 +				printf("Duplicate class definition for %s on line %d.\n", $2, NR);
   18.52 +				next;
   18.53 +			}	
   18.54 +			class_found[$2] = 1;
   18.55 +
   18.56 +			class_value++;
   18.57 +
   18.58 +			printf("#define SECCLASS_%s", toupper($2)) > outfile;
   18.59 +			for (i = 0; i < 40 - length($2); i++) 
   18.60 +				printf(" ") > outfile; 
   18.61 +			printf("%d\n", class_value) > outfile; 
   18.62 +
   18.63 +			printf("    S_(\"%s\")\n", $2) > debugfile;
   18.64 +		}
   18.65 +$1 == "sid"	{ 
   18.66 +			if (nextstate == "CLASS")
   18.67 +			{
   18.68 +			    nextstate = "SID";
   18.69 +			    printf("\n/*\n * Security identifier indices for initial entities\n */\n") > outfile;			    
   18.70 +			}
   18.71 +
   18.72 +			if ($2 in sid_found)
   18.73 +			{
   18.74 +				printf("Duplicate SID definition for %s on line %d.\n", $2, NR);
   18.75 +				next;
   18.76 +			}	
   18.77 +			sid_found[$2] = 1;
   18.78 +			sid_value++;
   18.79 +
   18.80 +			printf("#define SECINITSID_%s", toupper($2)) > outfile;
   18.81 +			for (i = 0; i < 37 - length($2); i++) 
   18.82 +				printf(" ") > outfile; 
   18.83 +			printf("%d\n", sid_value) > outfile; 
   18.84 +			printf("    \"%s\",\n", $2) > debugfile2;
   18.85 +		}
   18.86 +END	{
   18.87 +		if (nextstate != "SID")
   18.88 +			printf("Parse error:  Unexpected end of file\n");
   18.89 +
   18.90 +		printf("\n#define SECINITSID_NUM") > outfile;
   18.91 +		for (i = 0; i < 34; i++) 
   18.92 +			printf(" ") > outfile; 
   18.93 +		printf("%d\n", sid_value) > outfile; 
   18.94 +		printf("\n#endif\n") > outfile;
   18.95 +		printf("};\n\n") > debugfile2;
   18.96 +	}'
   18.97 +
   18.98 +# FLASK
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/tools/flask/policy/policy/flask/security_classes	Fri Sep 12 14:47:40 2008 +0900
    19.3 @@ -0,0 +1,20 @@
    19.4 +# FLASK
    19.5 +
    19.6 +#
    19.7 +# Define the security object classes 
    19.8 +#
    19.9 +
   19.10 +# Classes marked as userspace are classes
   19.11 +# for userspace object managers
   19.12 +
   19.13 +class xen
   19.14 +class domain
   19.15 +class hvm
   19.16 +class mmu
   19.17 +class resource
   19.18 +class shadow
   19.19 +class event
   19.20 +class grant
   19.21 +class security
   19.22 +
   19.23 +# FLASK
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/tools/flask/policy/policy/global_booleans	Fri Sep 12 14:47:40 2008 +0900
    20.3 @@ -0,0 +1,5 @@
    20.4 +#
    20.5 +# This file is for the declaration of global booleans.
    20.6 +# To change the default value at build time, the booleans.conf
    20.7 +# file should be used.
    20.8 +#
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/tools/flask/policy/policy/global_tunables	Fri Sep 12 14:47:40 2008 +0900
    21.3 @@ -0,0 +1,6 @@
    21.4 +#
    21.5 +# This file is for the declaration of global tunables.
    21.6 +# To change the default value at build time, the booleans.conf
    21.7 +# file should be used.
    21.8 +#
    21.9 +
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/tools/flask/policy/policy/mcs	Fri Sep 12 14:47:40 2008 +0900
    22.3 @@ -0,0 +1,324 @@
    22.4 +ifdef(`enable_mcs',`
    22.5 +#
    22.6 +# Define sensitivities 
    22.7 +#
    22.8 +# Each sensitivity has a name and zero or more aliases.
    22.9 +#
   22.10 +# MCS is single-sensitivity.
   22.11 +#
   22.12 +sensitivity s0;
   22.13 +
   22.14 +#
   22.15 +# Define the ordering of the sensitivity levels (least to greatest)
   22.16 +#
   22.17 +dominance { s0 }
   22.18 +
   22.19 +
   22.20 +#
   22.21 +# Define the categories
   22.22 +#
   22.23 +# Each category has a name and zero or more aliases.
   22.24 +#
   22.25 +category c0;
   22.26 +category c1;
   22.27 +category c2;
   22.28 +category c3;
   22.29 +category c4;
   22.30 +category c5;
   22.31 +category c6;
   22.32 +category c7;
   22.33 +category c8;
   22.34 +category c9;
   22.35 +category c10;
   22.36 +category c11;
   22.37 +category c12;
   22.38 +category c13;
   22.39 +category c14;
   22.40 +category c15;
   22.41 +category c16;
   22.42 +category c17;
   22.43 +category c18;
   22.44 +category c19;
   22.45 +category c20;
   22.46 +category c21;
   22.47 +category c22;
   22.48 +category c23;
   22.49 +category c24;
   22.50 +category c25;
   22.51 +category c26;
   22.52 +category c27;
   22.53 +category c28;
   22.54 +category c29;
   22.55 +category c30;
   22.56 +category c31;
   22.57 +category c32;
   22.58 +category c33;
   22.59 +category c34;
   22.60 +category c35;
   22.61 +category c36;
   22.62 +category c37;
   22.63 +category c38;
   22.64 +category c39;
   22.65 +category c40;
   22.66 +category c41;
   22.67 +category c42;
   22.68 +category c43;
   22.69 +category c44;
   22.70 +category c45;
   22.71 +category c46;
   22.72 +category c47;
   22.73 +category c48;
   22.74 +category c49;
   22.75 +category c50;
   22.76 +category c51;
   22.77 +category c52;
   22.78 +category c53;
   22.79 +category c54;
   22.80 +category c55;
   22.81 +category c56;
   22.82 +category c57;
   22.83 +category c58;
   22.84 +category c59;
   22.85 +category c60;
   22.86 +category c61;
   22.87 +category c62;
   22.88 +category c63;
   22.89 +category c64;
   22.90 +category c65;
   22.91 +category c66;
   22.92 +category c67;
   22.93 +category c68;
   22.94 +category c69;
   22.95 +category c70;
   22.96 +category c71;
   22.97 +category c72;
   22.98 +category c73;
   22.99 +category c74;
  22.100 +category c75;
  22.101 +category c76;
  22.102 +category c77;
  22.103 +category c78;
  22.104 +category c79;
  22.105 +category c80;
  22.106 +category c81;
  22.107 +category c82;
  22.108 +category c83;
  22.109 +category c84;
  22.110 +category c85;
  22.111 +category c86;
  22.112 +category c87;
  22.113 +category c88;
  22.114 +category c89;
  22.115 +category c90;
  22.116 +category c91;
  22.117 +category c92;
  22.118 +category c93;
  22.119 +category c94;
  22.120 +category c95;
  22.121 +category c96;
  22.122 +category c97;
  22.123 +category c98;
  22.124 +category c99;
  22.125 +category c100;
  22.126 +category c101;
  22.127 +category c102;
  22.128 +category c103;
  22.129 +category c104;
  22.130 +category c105;
  22.131 +category c106;
  22.132 +category c107;
  22.133 +category c108;
  22.134 +category c109;
  22.135 +category c110;
  22.136 +category c111;
  22.137 +category c112;
  22.138 +category c113;
  22.139 +category c114;
  22.140 +category c115;
  22.141 +category c116;
  22.142 +category c117;
  22.143 +category c118;
  22.144 +category c119;
  22.145 +category c120;
  22.146 +category c121;
  22.147 +category c122;
  22.148 +category c123;
  22.149 +category c124;
  22.150 +category c125;
  22.151 +category c126;
  22.152 +category c127;
  22.153 +category c128;
  22.154 +category c129;
  22.155 +category c130;
  22.156 +category c131;
  22.157 +category c132;
  22.158 +category c133;
  22.159 +category c134;
  22.160 +category c135;
  22.161 +category c136;
  22.162 +category c137;
  22.163 +category c138;
  22.164 +category c139;
  22.165 +category c140;
  22.166 +category c141;
  22.167 +category c142;
  22.168 +category c143;
  22.169 +category c144;
  22.170 +category c145;
  22.171 +category c146;
  22.172 +category c147;
  22.173 +category c148;
  22.174 +category c149;
  22.175 +category c150;
  22.176 +category c151;
  22.177 +category c152;
  22.178 +category c153;
  22.179 +category c154;
  22.180 +category c155;
  22.181 +category c156;
  22.182 +category c157;
  22.183 +category c158;
  22.184 +category c159;
  22.185 +category c160;
  22.186 +category c161;
  22.187 +category c162;
  22.188 +category c163;
  22.189 +category c164;
  22.190 +category c165;
  22.191 +category c166;
  22.192 +category c167;
  22.193 +category c168;
  22.194 +category c169;
  22.195 +category c170;
  22.196 +category c171;
  22.197 +category c172;
  22.198 +category c173;
  22.199 +category c174;
  22.200 +category c175;
  22.201 +category c176;
  22.202 +category c177;
  22.203 +category c178;
  22.204 +category c179;
  22.205 +category c180;
  22.206 +category c181;
  22.207 +category c182;
  22.208 +category c183;
  22.209 +category c184;
  22.210 +category c185;
  22.211 +category c186;
  22.212 +category c187;
  22.213 +category c188;
  22.214 +category c189;
  22.215 +category c190;
  22.216 +category c191;
  22.217 +category c192;
  22.218 +category c193;
  22.219 +category c194;
  22.220 +category c195;
  22.221 +category c196;
  22.222 +category c197;
  22.223 +category c198;
  22.224 +category c199;
  22.225 +category c200;
  22.226 +category c201;
  22.227 +category c202;
  22.228 +category c203;
  22.229 +category c204;
  22.230 +category c205;
  22.231 +category c206;
  22.232 +category c207;
  22.233 +category c208;
  22.234 +category c209;
  22.235 +category c210;
  22.236 +category c211;
  22.237 +category c212;
  22.238 +category c213;
  22.239 +category c214;
  22.240 +category c215;
  22.241 +category c216;
  22.242 +category c217;
  22.243 +category c218;
  22.244 +category c219;
  22.245 +category c220;
  22.246 +category c221;
  22.247 +category c222;
  22.248 +category c223;
  22.249 +category c224;
  22.250 +category c225;
  22.251 +category c226;
  22.252 +category c227;
  22.253 +category c228;
  22.254 +category c229;
  22.255 +category c230;
  22.256 +category c231;
  22.257 +category c232;
  22.258 +category c233;
  22.259 +category c234;
  22.260 +category c235;
  22.261 +category c236;
  22.262 +category c237;
  22.263 +category c238;
  22.264 +category c239;
  22.265 +category c240;
  22.266 +category c241;
  22.267 +category c242;
  22.268 +category c243;
  22.269 +category c244;
  22.270 +category c245;
  22.271 +category c246;
  22.272 +category c247;
  22.273 +category c248;
  22.274 +category c249;
  22.275 +category c250;
  22.276 +category c251;
  22.277 +category c252;
  22.278 +category c253;
  22.279 +category c254;
  22.280 +category c255;
  22.281 +
  22.282 +
  22.283 +#
  22.284 +# Each MCS level specifies a sensitivity and zero or more categories which may
  22.285 +# be associated with that sensitivity.
  22.286 +#
  22.287 +level s0:c0.c255;
  22.288 +
  22.289 +#
  22.290 +# Define the MCS policy
  22.291 +#
  22.292 +# mlsconstrain class_set perm_set expression ;
  22.293 +#
  22.294 +# mlsvalidatetrans class_set expression ;
  22.295 +#
  22.296 +# expression : ( expression )
  22.297 +#	     | not expression
  22.298 +#	     | expression and expression
  22.299 +#	     | expression or expression
  22.300 +#	     | u1 op u2
  22.301 +#	     | r1 role_mls_op r2
  22.302 +#	     | t1 op t2
  22.303 +#	     | l1 role_mls_op l2
  22.304 +#	     | l1 role_mls_op h2
  22.305 +#	     | h1 role_mls_op l2
  22.306 +#	     | h1 role_mls_op h2
  22.307 +#	     | l1 role_mls_op h1
  22.308 +#	     | l2 role_mls_op h2
  22.309 +#	     | u1 op names
  22.310 +#	     | u2 op names
  22.311 +#	     | r1 op names
  22.312 +#	     | r2 op names
  22.313 +#	     | t1 op names
  22.314 +#	     | t2 op names
  22.315 +#	     | u3 op names (NOTE: this is only available for mlsvalidatetrans)
  22.316 +#	     | r3 op names (NOTE: this is only available for mlsvalidatetrans)
  22.317 +#	     | t3 op names (NOTE: this is only available for mlsvalidatetrans)
  22.318 +#
  22.319 +# op : == | !=
  22.320 +# role_mls_op : == | != | eq | dom | domby | incomp
  22.321 +#
  22.322 +# names : name | { name_list }
  22.323 +# name_list : name | name_list name
  22.324 +#
  22.325 +
  22.326 +
  22.327 +') dnl end enable_mcs
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/tools/flask/policy/policy/mls	Fri Sep 12 14:47:40 2008 +0900
    23.3 @@ -0,0 +1,354 @@
    23.4 +
    23.5 +ifdef(`enable_mls',`
    23.6 +#
    23.7 +# Define sensitivities 
    23.8 +#
    23.9 +# Each sensitivity has a name and zero or more aliases.
   23.10 +#
   23.11 +sensitivity s0;
   23.12 +sensitivity s1;
   23.13 +sensitivity s2;
   23.14 +sensitivity s3;
   23.15 +sensitivity s4;
   23.16 +sensitivity s5;
   23.17 +sensitivity s6;
   23.18 +sensitivity s7;
   23.19 +sensitivity s8;
   23.20 +sensitivity s9;
   23.21 +sensitivity s10;
   23.22 +sensitivity s11;
   23.23 +sensitivity s12;
   23.24 +sensitivity s13;
   23.25 +sensitivity s14;
   23.26 +sensitivity s15;
   23.27 +
   23.28 +#
   23.29 +# Define the ordering of the sensitivity levels (least to greatest)
   23.30 +#
   23.31 +dominance { s0 s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13 s14 s15 }
   23.32 +
   23.33 +
   23.34 +#
   23.35 +# Define the categories
   23.36 +#
   23.37 +# Each category has a name and zero or more aliases.
   23.38 +#
   23.39 +category c0;
   23.40 +category c1;
   23.41 +category c2;
   23.42 +category c3;
   23.43 +category c4;
   23.44 +category c5;
   23.45 +category c6;
   23.46 +category c7;
   23.47 +category c8;
   23.48 +category c9;
   23.49 +category c10;
   23.50 +category c11;
   23.51 +category c12;
   23.52 +category c13;
   23.53 +category c14;
   23.54 +category c15;
   23.55 +category c16;
   23.56 +category c17;
   23.57 +category c18;
   23.58 +category c19;
   23.59 +category c20;
   23.60 +category c21;
   23.61 +category c22;
   23.62 +category c23;
   23.63 +category c24;
   23.64 +category c25;
   23.65 +category c26;
   23.66 +category c27;
   23.67 +category c28;
   23.68 +category c29;
   23.69 +category c30;
   23.70 +category c31;
   23.71 +category c32;
   23.72 +category c33;
   23.73 +category c34;
   23.74 +category c35;
   23.75 +category c36;
   23.76 +category c37;
   23.77 +category c38;
   23.78 +category c39;
   23.79 +category c40;
   23.80 +category c41;
   23.81 +category c42;
   23.82 +category c43;
   23.83 +category c44;
   23.84 +category c45;
   23.85 +category c46;
   23.86 +category c47;
   23.87 +category c48;
   23.88 +category c49;
   23.89 +category c50;
   23.90 +category c51;
   23.91 +category c52;
   23.92 +category c53;
   23.93 +category c54;
   23.94 +category c55;
   23.95 +category c56;
   23.96 +category c57;
   23.97 +category c58;
   23.98 +category c59;
   23.99 +category c60;
  23.100 +category c61;
  23.101 +category c62;
  23.102 +category c63;
  23.103 +category c64;
  23.104 +category c65;
  23.105 +category c66;
  23.106 +category c67;
  23.107 +category c68;
  23.108 +category c69;
  23.109 +category c70;
  23.110 +category c71;
  23.111 +category c72;
  23.112 +category c73;
  23.113 +category c74;
  23.114 +category c75;
  23.115 +category c76;
  23.116 +category c77;
  23.117 +category c78;
  23.118 +category c79;
  23.119 +category c80;
  23.120 +category c81;
  23.121 +category c82;
  23.122 +category c83;
  23.123 +category c84;
  23.124 +category c85;
  23.125 +category c86;
  23.126 +category c87;
  23.127 +category c88;
  23.128 +category c89;
  23.129 +category c90;
  23.130 +category c91;
  23.131 +category c92;
  23.132 +category c93;
  23.133 +category c94;
  23.134 +category c95;
  23.135 +category c96;
  23.136 +category c97;
  23.137 +category c98;
  23.138 +category c99;
  23.139 +category c100;
  23.140 +category c101;
  23.141 +category c102;
  23.142 +category c103;
  23.143 +category c104;
  23.144 +category c105;
  23.145 +category c106;
  23.146 +category c107;
  23.147 +category c108;
  23.148 +category c109;
  23.149 +category c110;
  23.150 +category c111;
  23.151 +category c112;
  23.152 +category c113;
  23.153 +category c114;
  23.154 +category c115;
  23.155 +category c116;
  23.156 +category c117;
  23.157 +category c118;
  23.158 +category c119;
  23.159 +category c120;
  23.160 +category c121;
  23.161 +category c122;
  23.162 +category c123;
  23.163 +category c124;
  23.164 +category c125;
  23.165 +category c126;
  23.166 +category c127;
  23.167 +category c128;
  23.168 +category c129;
  23.169 +category c130;
  23.170 +category c131;
  23.171 +category c132;
  23.172 +category c133;
  23.173 +category c134;
  23.174 +category c135;
  23.175 +category c136;
  23.176 +category c137;
  23.177 +category c138;
  23.178 +category c139;
  23.179 +category c140;
  23.180 +category c141;
  23.181 +category c142;
  23.182 +category c143;
  23.183 +category c144;
  23.184 +category c145;
  23.185 +category c146;
  23.186 +category c147;
  23.187 +category c148;
  23.188 +category c149;
  23.189 +category c150;
  23.190 +category c151;
  23.191 +category c152;
  23.192 +category c153;
  23.193 +category c154;
  23.194 +category c155;
  23.195 +category c156;
  23.196 +category c157;
  23.197 +category c158;
  23.198 +category c159;
  23.199 +category c160;
  23.200 +category c161;
  23.201 +category c162;
  23.202 +category c163;
  23.203 +category c164;
  23.204 +category c165;
  23.205 +category c166;
  23.206 +category c167;
  23.207 +category c168;
  23.208 +category c169;
  23.209 +category c170;
  23.210 +category c171;
  23.211 +category c172;
  23.212 +category c173;
  23.213 +category c174;
  23.214 +category c175;
  23.215 +category c176;
  23.216 +category c177;
  23.217 +category c178;
  23.218 +category c179;
  23.219 +category c180;
  23.220 +category c181;
  23.221 +category c182;
  23.222 +category c183;
  23.223 +category c184;
  23.224 +category c185;
  23.225 +category c186;
  23.226 +category c187;
  23.227 +category c188;
  23.228 +category c189;
  23.229 +category c190;
  23.230 +category c191;
  23.231 +category c192;
  23.232 +category c193;
  23.233 +category c194;
  23.234 +category c195;
  23.235 +category c196;
  23.236 +category c197;
  23.237 +category c198;
  23.238 +category c199;
  23.239 +category c200;
  23.240 +category c201;
  23.241 +category c202;
  23.242 +category c203;
  23.243 +category c204;
  23.244 +category c205;
  23.245 +category c206;
  23.246 +category c207;
  23.247 +category c208;
  23.248 +category c209;
  23.249 +category c210;
  23.250 +category c211;
  23.251 +category c212;
  23.252 +category c213;
  23.253 +category c214;
  23.254 +category c215;
  23.255 +category c216;
  23.256 +category c217;
  23.257 +category c218;
  23.258 +category c219;
  23.259 +category c220;
  23.260 +category c221;
  23.261 +category c222;
  23.262 +category c223;
  23.263 +category c224;
  23.264 +category c225;
  23.265 +category c226;
  23.266 +category c227;
  23.267 +category c228;
  23.268 +category c229;
  23.269 +category c230;
  23.270 +category c231;
  23.271 +category c232;
  23.272 +category c233;
  23.273 +category c234;
  23.274 +category c235;
  23.275 +category c236;
  23.276 +category c237;
  23.277 +category c238;
  23.278 +category c239;
  23.279 +category c240;
  23.280 +category c241;
  23.281 +category c242;
  23.282 +category c243;
  23.283 +category c244;
  23.284 +category c245;
  23.285 +category c246;
  23.286 +category c247;
  23.287 +category c248;
  23.288 +category c249;
  23.289 +category c250;
  23.290 +category c251;
  23.291 +category c252;
  23.292 +category c253;
  23.293 +category c254;
  23.294 +category c255;
  23.295 +
  23.296 +
  23.297 +#
  23.298 +# Each MLS level specifies a sensitivity and zero or more categories which may
  23.299 +# be associated with that sensitivity.
  23.300 +#
  23.301 +level s0:c0.c255;
  23.302 +level s1:c0.c255;
  23.303 +level s2:c0.c255;
  23.304 +level s3:c0.c255;
  23.305 +level s4:c0.c255;
  23.306 +level s5:c0.c255;
  23.307 +level s6:c0.c255;
  23.308 +level s7:c0.c255;
  23.309 +level s8:c0.c255;
  23.310 +level s9:c0.c255;
  23.311 +level s10:c0.c255;
  23.312 +level s11:c0.c255;
  23.313 +level s12:c0.c255;
  23.314 +level s13:c0.c255;
  23.315 +level s14:c0.c255;
  23.316 +level s15:c0.c255;
  23.317 +
  23.318 +
  23.319 +#
  23.320 +# Define the MLS policy
  23.321 +#
  23.322 +# mlsconstrain class_set perm_set expression ;
  23.323 +#
  23.324 +# mlsvalidatetrans class_set expression ;
  23.325 +#
  23.326 +# expression : ( expression )
  23.327 +#	     | not expression
  23.328 +#	     | expression and expression
  23.329 +#	     | expression or expression
  23.330 +#	     | u1 op u2
  23.331 +#	     | r1 role_mls_op r2
  23.332 +#	     | t1 op t2
  23.333 +#	     | l1 role_mls_op l2
  23.334 +#	     | l1 role_mls_op h2
  23.335 +#	     | h1 role_mls_op l2
  23.336 +#	     | h1 role_mls_op h2
  23.337 +#	     | l1 role_mls_op h1
  23.338 +#	     | l2 role_mls_op h2
  23.339 +#	     | u1 op names
  23.340 +#	     | u2 op names
  23.341 +#	     | r1 op names
  23.342 +#	     | r2 op names
  23.343 +#	     | t1 op names
  23.344 +#	     | t2 op names
  23.345 +#	     | u3 op names (NOTE: this is only available for mlsvalidatetrans)
  23.346 +#	     | r3 op names (NOTE: this is only available for mlsvalidatetrans)
  23.347 +#	     | t3 op names (NOTE: this is only available for mlsvalidatetrans)
  23.348 +#
  23.349 +# op : == | !=
  23.350 +# role_mls_op : == | != | eq | dom | domby | incomp
  23.351 +#
  23.352 +# names : name | { name_list }
  23.353 +# name_list : name | name_list name
  23.354 +#
  23.355 +
  23.356 +
  23.357 +') dnl end enable_mls
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/tools/flask/policy/policy/modules.conf	Fri Sep 12 14:47:40 2008 +0900
    24.3 @@ -0,0 +1,21 @@
    24.4 +#
    24.5 +# This file contains a listing of available modules.
    24.6 +# To prevent a module from  being used in policy
    24.7 +# creation, set the module name to "off".
    24.8 +#
    24.9 +# For monolithic policies, modules set to "base" and "module"
   24.10 +# will be built into the policy.
   24.11 +#
   24.12 +# For modular policies, modules set to "base" will be
   24.13 +# included in the base module.  "module" will be compiled
   24.14 +# as individual loadable modules.
   24.15 +#
   24.16 +
   24.17 +# Layer: xen
   24.18 +# Module: xen
   24.19 +# Required in base
   24.20 +#
   24.21 +# Policy for xen.
   24.22 +# 
   24.23 +xen = base
   24.24 +
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/tools/flask/policy/policy/modules/xen/xen.if	Fri Sep 12 14:47:40 2008 +0900
    25.3 @@ -0,0 +1,1 @@
    25.4 +#
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/tools/flask/policy/policy/modules/xen/xen.te	Fri Sep 12 14:47:40 2008 +0900
    26.3 @@ -0,0 +1,135 @@
    26.4 +attribute xen_type;
    26.5 +attribute domain_type;
    26.6 +attribute resource_type;
    26.7 +attribute event_type;
    26.8 +
    26.9 +type xen_t, xen_type, domain_type;
   26.10 +
   26.11 +type dom0_t, domain_type;
   26.12 +
   26.13 +type domio_t, domain_type;
   26.14 +
   26.15 +type domxen_t, domain_type;
   26.16 +
   26.17 +type unlabeled_t, domain_type;
   26.18 +
   26.19 +type security_t, domain_type;
   26.20 +
   26.21 +type pirq_t, resource_type;
   26.22 +type ioport_t, resource_type;
   26.23 +type iomem_t, resource_type;
   26.24 +type device_t, resource_type;
   26.25 +
   26.26 +################################################################################
   26.27 +#
   26.28 +# create_domain(priv_dom, domain, channel)
   26.29 +#
   26.30 +################################################################################
   26.31 +define(`create_domain', `
   26.32 +	type $2, domain_type;
   26.33 +	allow $1 $2:domain {create max_vcpus setdomainmaxmem 
   26.34 +				setaddrsize getdomaininfo hypercall 
   26.35 +				setvcpucontext scheduler unpause 
   26.36 +				getvcpuinfo getaddrsize getvcpuaffinity};
   26.37 +	allow $1 $2:shadow {enable};
   26.38 +	allow $1 $2:mmu {map_read map_write memorymap adjust pinpage};
   26.39 +	allow $2 $2:mmu {map_read map_write pinpage};
   26.40 +	allow $2 domio_t:mmu {map_read};
   26.41 +	allow $2 $2:grant {query setup};
   26.42 +	allow $1 $2:grant {map_read unmap};
   26.43 +	allow $1 $3:event {create};
   26.44 +')
   26.45 +
   26.46 +################################################################################
   26.47 +#
   26.48 +# manage_domain(priv_dom, domain)
   26.49 +#
   26.50 +################################################################################
   26.51 +define(`manage_domain', `
   26.52 +	allow $1 $2:domain {pause destroy};
   26.53 +')
   26.54 +
   26.55 +################################################################################
   26.56 +#
   26.57 +# create_channel(caller, peer, channel)
   26.58 +#
   26.59 +################################################################################
   26.60 +define(`create_channel', `
   26.61 +	type $3, event_type;
   26.62 +	type_transition $1 $2:event $3;
   26.63 +	allow $1 $3:event {create};
   26.64 +	allow $3 $2:event {bind};
   26.65 +')
   26.66 +
   26.67 +################################################################################
   26.68 +#
   26.69 +# Boot the hypervisor and dom0
   26.70 +#
   26.71 +################################################################################
   26.72 +allow dom0_t xen_t:xen {kexec readapic writeapic mtrr_read mtrr_add mtrr_del 
   26.73 +scheduler physinfo heap quirk readconsole writeconsole settime microcode};
   26.74 +
   26.75 +allow dom0_t domio_t:mmu {map_read map_write};
   26.76 +allow dom0_t iomem_t:mmu {map_read map_write};
   26.77 +allow dom0_t pirq_t:event {vector};
   26.78 +allow dom0_t xen_t:mmu {memorymap};
   26.79 +
   26.80 +allow dom0_t dom0_t:mmu {pinpage map_read map_write adjust};
   26.81 +allow dom0_t dom0_t:grant {query setup};
   26.82 +allow dom0_t dom0_t:domain {scheduler getdomaininfo getvcpuinfo getvcpuaffinity};
   26.83 +
   26.84 +allow xen_t dom0_t:domain {create};
   26.85 +allow xen_t dom0_t:resource {add remove};
   26.86 +allow xen_t ioport_t:resource {add_ioport remove_ioport};
   26.87 +allow dom0_t ioport_t:resource {use};
   26.88 +allow xen_t iomem_t:resource {add_iomem remove_iomem};
   26.89 +allow dom0_t iomem_t:resource {use};
   26.90 +allow xen_t pirq_t:resource {add_irq remove_irq};
   26.91 +allow dom0_t pirq_t:resource {use};
   26.92 +
   26.93 +allow dom0_t security_t:security {compute_av compute_create compute_member 
   26.94 +check_context load_policy compute_relabel compute_user setenforce setbool
   26.95 +setsecparam};
   26.96 +
   26.97 +create_channel(dom0_t, dom0_t, evchn0-0_t)
   26.98 +allow dom0_t evchn0-0_t:event {send};
   26.99 +
  26.100 +################################################################################
  26.101 +#
  26.102 +# Create and manage a domU w/ dom0 IO
  26.103 +#
  26.104 +################################################################################
  26.105 +create_domain(dom0_t, domU_t, evchnU-0_t)
  26.106 +
  26.107 +create_channel(domU_t, domU_t, evchnU-U_t)
  26.108 +allow domU_t evchnU-U_t:event {send};
  26.109 +
  26.110 +create_channel(dom0_t, domU_t, evchn0-U_t)
  26.111 +allow dom0_t evchn0-U_t:event {send};
  26.112 +
  26.113 +create_channel(domU_t, dom0_t, evchnU-0_t)
  26.114 +allow domU_t evchnU-0_t:event {send};
  26.115 +
  26.116 +manage_domain(dom0_t, domU_t)
  26.117 +
  26.118 +################################################################################
  26.119 +#
  26.120 +#
  26.121 +#
  26.122 +################################################################################
  26.123 +sid xen gen_context(system_u:system_r:xen_t,s0)
  26.124 +sid dom0 gen_context(system_u:system_r:dom0_t,s0)
  26.125 +sid domU gen_context(system_u:system_r:domU_t,s0)
  26.126 +sid domxen gen_context(system_u:system_r:domxen_t,s0)
  26.127 +sid domio gen_context(system_u:system_r:domio_t,s0)
  26.128 +sid unlabeled gen_context(system_u:system_r:unlabeled_t,s0)
  26.129 +sid security gen_context(system_u:system_r:security_t,s0)
  26.130 +sid pirq gen_context(system_u:object_r:pirq_t,s0)
  26.131 +sid iomem gen_context(system_u:object_r:iomem_t,s0)
  26.132 +sid ioport gen_context(system_u:object_r:ioport_t,s0)
  26.133 +sid device gen_context(system_u:object_r:device_t,s0)
  26.134 +
  26.135 +role system_r types { xen_type domain_type };
  26.136 +role user_r types { xen_type domain_type };
  26.137 +role sysadm_r types { xen_type domain_type };
  26.138 +role staff_r types { xen_type domain_type };
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/tools/flask/policy/policy/support/loadable_module.spt	Fri Sep 12 14:47:40 2008 +0900
    27.3 @@ -0,0 +1,166 @@
    27.4 +########################################
    27.5 +#
    27.6 +# Macros for switching between source policy
    27.7 +# and loadable policy module support
    27.8 +#
    27.9 +
   27.10 +##############################
   27.11 +#
   27.12 +# For adding the module statement
   27.13 +#
   27.14 +define(`policy_module',`
   27.15 +	ifdef(`self_contained_policy',`',`
   27.16 +		module $1 $2;
   27.17 +
   27.18 +		require {
   27.19 +			role system_r;
   27.20 +			all_kernel_class_perms
   27.21 +		}
   27.22 +	')
   27.23 +')
   27.24 +
   27.25 +##############################
   27.26 +#
   27.27 +# For use in interfaces, to optionally insert a require block
   27.28 +#
   27.29 +define(`gen_require',`
   27.30 +	ifdef(`self_contained_policy',`',`
   27.31 +		define(`in_gen_require_block')
   27.32 +		require {
   27.33 +			$1
   27.34 +		}
   27.35 +		undefine(`in_gen_require_block')
   27.36 +	')
   27.37 +')
   27.38 +
   27.39 +##############################
   27.40 +#
   27.41 +# In the future interfaces should be in loadable modules
   27.42 +#
   27.43 +# template(name,rules)
   27.44 +#
   27.45 +define(`template',`
   27.46 +	`define(`$1',`
   27.47 +##### begin $1(dollarsstar)
   27.48 +		$2
   27.49 +##### end $1(dollarsstar)
   27.50 +	'')
   27.51 +')
   27.52 +
   27.53 +# helper function, since m4 wont expand macros
   27.54 +# if a line is a comment (#):
   27.55 +define(`policy_m4_comment',`dnl
   27.56 +##### $2 depth: $1
   27.57 +')dnl
   27.58 +
   27.59 +##############################
   27.60 +#
   27.61 +# In the future interfaces should be in loadable modules
   27.62 +#
   27.63 +# interface(name,rules)
   27.64 +#
   27.65 +define(`interface',`
   27.66 +	`define(`$1',`
   27.67 +
   27.68 +	define(`policy_temp',incr(policy_call_depth))
   27.69 +	pushdef(`policy_call_depth',policy_temp)
   27.70 +	undefine(`policy_temp')
   27.71 +
   27.72 +	policy_m4_comment(policy_call_depth,begin `$1'(dollarsstar))
   27.73 +
   27.74 +	$2
   27.75 +
   27.76 +	define(`policy_temp',decr(policy_call_depth))
   27.77 +	pushdef(`policy_call_depth',policy_temp)
   27.78 +	undefine(`policy_temp')
   27.79 +
   27.80 +	policy_m4_comment(policy_call_depth,end `$1'(dollarsstar))
   27.81 +
   27.82 +	'')
   27.83 +')
   27.84 +
   27.85 +define(`policy_call_depth',0)
   27.86 +
   27.87 +##############################
   27.88 +#
   27.89 +# Optional policy handling
   27.90 +#
   27.91 +define(`optional_policy',`
   27.92 +	ifdef(`self_contained_policy',`
   27.93 +		ifdef(`$1',`$2',`$3')
   27.94 +	',`
   27.95 +		optional {
   27.96 +			$2
   27.97 +		ifelse(`$3',`',`',`
   27.98 +		} else {
   27.99 +			$3
  27.100 +		')
  27.101 +		}
  27.102 +	')
  27.103 +')
  27.104 +
  27.105 +##############################
  27.106 +#
  27.107 +# Determine if we should use the default
  27.108 +# tunable value as specified by the policy
  27.109 +# or if the override value should be used
  27.110 +#
  27.111 +define(`dflt_or_overr',`ifdef(`$1',$1,$2)')
  27.112 +
  27.113 +##############################
  27.114 +#
  27.115 +# Extract booleans out of an expression.
  27.116 +# This needs to be reworked so expressions
  27.117 +# with parentheses can work.
  27.118 +
  27.119 +define(`delcare_required_symbols',`
  27.120 +ifelse(regexp($1, `\w'), -1, `', `dnl
  27.121 +bool regexp($1, `\(\w+\)', `\1');
  27.122 +delcare_required_symbols(regexp($1, `\w+\(.*\)', `\1'))dnl
  27.123 +') dnl
  27.124 +')
  27.125 +
  27.126 +##############################
  27.127 +#
  27.128 +# Tunable declaration
  27.129 +#
  27.130 +define(`gen_tunable',`
  27.131 +	ifdef(`self_contained_policy',`
  27.132 +		bool $1 dflt_or_overr(`$1'_conf,$2);
  27.133 +	',`
  27.134 +		# loadable module tunable
  27.135 +		# declaration will go here
  27.136 +		# instead of bool when
  27.137 +		# loadable modules support
  27.138 +		# tunables
  27.139 +		bool $1 dflt_or_overr(`$1'_conf,$2);
  27.140 +	')
  27.141 +')
  27.142 +
  27.143 +##############################
  27.144 +#
  27.145 +# Tunable policy handling
  27.146 +#
  27.147 +define(`tunable_policy',`
  27.148 +	ifdef(`self_contained_policy',`
  27.149 +		if (`$1') {
  27.150 +			$2
  27.151 +		} else {
  27.152 +			$3
  27.153 +		}
  27.154 +	',`
  27.155 +		# structure for tunables
  27.156 +		# will go here instead of a
  27.157 +		# conditional when loadable
  27.158 +		# modules support tunables
  27.159 +		gen_require(`
  27.160 +			delcare_required_symbols(`$1')
  27.161 +		')
  27.162 +
  27.163 +		if (`$1') {
  27.164 +			$2
  27.165 +		} else {
  27.166 +			$3
  27.167 +		}
  27.168 +	')
  27.169 +')
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/tools/flask/policy/policy/support/misc_macros.spt	Fri Sep 12 14:47:40 2008 +0900
    28.3 @@ -0,0 +1,32 @@
    28.4 +
    28.5 +########################################
    28.6 +#
    28.7 +# Helper macros
    28.8 +#
    28.9 +
   28.10 +#
   28.11 +# shiftn(num,list...)
   28.12 +#
   28.13 +# shift the list num times
   28.14 +#
   28.15 +define(`shiftn',`ifelse($1,0,`shift($*)',`shiftn(decr($1),shift(shift($*)))')')
   28.16 +
   28.17 +########################################
   28.18 +#
   28.19 +# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_categories])
   28.20 +#
   28.21 +define(`gen_user',`user $1 roles { $2 }`'ifdef(`enable_mls', ` level $3 range $4')`'ifdef(`enable_mcs',` level s0 range s0`'ifelse(`$5',,,` - s0:$5')');')
   28.22 +
   28.23 +########################################
   28.24 +#
   28.25 +# gen_context(context,mls_sensitivity,[mcs_categories])
   28.26 +#
   28.27 +define(`gen_context',`$1`'ifdef(`enable_mls',`:$2')`'ifdef(`enable_mcs',`:s0`'ifelse(`$3',,,`:$3')')') dnl
   28.28 +
   28.29 +########################################
   28.30 +#
   28.31 +# gen_bool(name,default_value)
   28.32 +#
   28.33 +define(`gen_bool',`
   28.34 +	bool $1 dflt_or_overr(`$1'_conf,$2);
   28.35 +')
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/tools/flask/policy/policy/systemuser	Fri Sep 12 14:47:40 2008 +0900
    29.3 @@ -0,0 +1,19 @@
    29.4 +##################################
    29.5 +#
    29.6 +# System User configuration.
    29.7 +#
    29.8 +
    29.9 +#
   29.10 +# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_categories])
   29.11 +#
   29.12 +
   29.13 +#
   29.14 +# system_u is the user identity for system processes and objects.
   29.15 +# There should be no corresponding Unix user identity for system,
   29.16 +# and a user process should never be assigned the system user
   29.17 +# identity.
   29.18 +#
   29.19 +gen_user(system_u, system_r, s0, s0 - s9:c0.c127, c0.c127)
   29.20 +
   29.21 +# Normal users should not be added to this file,
   29.22 +# but instead added to the users file.
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/tools/flask/policy/policy/users	Fri Sep 12 14:47:40 2008 +0900
    30.3 @@ -0,0 +1,39 @@
    30.4 +
    30.5 +##################################
    30.6 +#
    30.7 +# Core User configuration.
    30.8 +#
    30.9 +
   30.10 +#
   30.11 +# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_catetories])
   30.12 +#
   30.13 +
   30.14 +#
   30.15 +# user_u is a generic user identity for Linux users who have no
   30.16 +# SELinux user identity defined.  The modified daemons will use
   30.17 +# this user identity in the security context if there is no matching
   30.18 +# SELinux user identity for a Linux user.  If you do not want to
   30.19 +# permit any access to such users, then remove this entry.
   30.20 +#
   30.21 +ifdef(`targeted_policy',`
   30.22 +gen_user(user_u, user_r sysadm_r system_r, s0, s0 - s9:c0.c127)
   30.23 +',`
   30.24 +gen_user(user_u, user_r, s0, s0 - s9:c0.c127)
   30.25 +')
   30.26 +
   30.27 +#
   30.28 +# The following users correspond to Unix identities.
   30.29 +# These identities are typically assigned as the user attribute
   30.30 +# when login starts the user shell.  Users with access to the sysadm_r
   30.31 +# role should use the staff_r role instead of the user_r role when
   30.32 +# not in the sysadm_r.
   30.33 +#
   30.34 +ifdef(`targeted_policy',`
   30.35 +	gen_user(root, user_r sysadm_r system_r, s0, s0 - s9:c0.c127, c0.c127)
   30.36 +',`
   30.37 +	ifdef(`direct_sysadm_daemon',`
   30.38 +		gen_user(root, sysadm_r staff_r system_r, s0, s0 - s9:c0.c127, c0.c127)
   30.39 +	',`
   30.40 +		gen_user(root, sysadm_r staff_r, s0, s0 - s9:c0.c127, c0.c127)
   30.41 +	')
   30.42 +')
    31.1 --- a/tools/ioemu/hw/cirrus_vga.c	Fri Sep 12 14:32:45 2008 +0900
    31.2 +++ b/tools/ioemu/hw/cirrus_vga.c	Fri Sep 12 14:47:40 2008 +0900
    31.3 @@ -2554,6 +2554,9 @@ static void set_vram_mapping(CirrusVGASt
    31.4  
    31.5      fprintf(logfile,"mapping vram to %lx - %lx\n", begin, end);
    31.6  
    31.7 +    if (!s->vram_mfns)
    31.8 +        return;
    31.9 +
   31.10      xatp.domid = domid;
   31.11      xatp.space = XENMAPSPACE_mfn;
   31.12  
    32.1 --- a/tools/ioemu/hw/pass-through.c	Fri Sep 12 14:32:45 2008 +0900
    32.2 +++ b/tools/ioemu/hw/pass-through.c	Fri Sep 12 14:47:40 2008 +0900
    32.3 @@ -57,6 +57,10 @@ static uint32_t pt_irqpin_reg_init(struc
    32.4      struct pt_reg_info_tbl *reg, uint32_t real_offset);
    32.5  static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
    32.6      struct pt_reg_info_tbl *reg, uint32_t real_offset);
    32.7 +static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
    32.8 +    struct pt_reg_info_tbl *reg, uint32_t real_offset);
    32.9 +static uint32_t pt_devctrl2_reg_init(struct pt_dev *ptdev,
   32.10 +    struct pt_reg_info_tbl *reg, uint32_t real_offset);
   32.11  static uint32_t pt_linkctrl2_reg_init(struct pt_dev *ptdev,
   32.12      struct pt_reg_info_tbl *reg, uint32_t real_offset);
   32.13  static uint32_t pt_msgctrl_reg_init(struct pt_dev *ptdev,
   32.14 @@ -77,6 +81,8 @@ static uint8_t pt_msix_size_init(struct 
   32.15      struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
   32.16  static uint8_t pt_vendor_size_init(struct pt_dev *ptdev,
   32.17      struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
   32.18 +static uint8_t pt_pcie_size_init(struct pt_dev *ptdev,
   32.19 +    struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
   32.20  static int pt_byte_reg_read(struct pt_dev *ptdev,
   32.21      struct pt_reg_tbl *cfg_entry,
   32.22      uint8_t *valueu, uint8_t valid_mask);
   32.23 @@ -438,7 +444,7 @@ static struct pt_reg_info_tbl pt_emu_reg
   32.24          .init_val   = 0x0000,
   32.25          .ro_mask    = 0x0000,
   32.26          .emu_mask   = 0xFFFF,
   32.27 -        .init       = pt_common_reg_init,
   32.28 +        .init       = pt_linkctrl_reg_init,
   32.29          .u.w.read   = pt_word_reg_read,
   32.30          .u.w.write  = pt_linkctrl_reg_write,
   32.31      },
   32.32 @@ -449,7 +455,7 @@ static struct pt_reg_info_tbl pt_emu_reg
   32.33          .init_val   = 0x0000,
   32.34          .ro_mask    = 0x0000,
   32.35          .emu_mask   = 0xFFFF,
   32.36 -        .init       = pt_common_reg_init,
   32.37 +        .init       = pt_devctrl2_reg_init,
   32.38          .u.w.read   = pt_word_reg_read,
   32.39          .u.w.write  = pt_devctrl2_reg_write,
   32.40      },
   32.41 @@ -666,8 +672,8 @@ static const struct pt_reg_grp_info_tbl 
   32.42      {
   32.43          .grp_id     = PCI_CAP_ID_EXP,
   32.44          .grp_type   = GRP_TYPE_EMU,
   32.45 -        .grp_size   = 0x3C,
   32.46 -        .size_init  = pt_reg_grp_size_init,
   32.47 +        .grp_size   = 0xFF,
   32.48 +        .size_init  = pt_pcie_size_init,
   32.49          .emu_reg_tbl= pt_emu_reg_pcie_tbl,
   32.50      },
   32.51      /* MSI-X Capability Structure reg group */
   32.52 @@ -1869,12 +1875,57 @@ static uint32_t pt_bar_reg_init(struct p
   32.53      return reg_field;
   32.54  }
   32.55  
   32.56 +/* initialize Link Control register */
   32.57 +static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
   32.58 +        struct pt_reg_info_tbl *reg, uint32_t real_offset)
   32.59 +{
   32.60 +    uint8_t cap_ver = 0;
   32.61 +    uint8_t dev_type = 0;
   32.62 +
   32.63 +    cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
   32.64 +        (uint8_t)PCI_EXP_FLAGS_VERS);
   32.65 +    dev_type = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
   32.66 +        (uint8_t)PCI_EXP_FLAGS_TYPE) >> 4;
   32.67 +    
   32.68 +    /* no need to initialize in case of Root Complex Integrated Endpoint
   32.69 +     * with cap_ver 1.x 
   32.70 +     */
   32.71 +    if ((dev_type == PCI_EXP_TYPE_ROOT_INT_EP) && (cap_ver == 1))
   32.72 +        return PT_INVALID_REG;
   32.73 +
   32.74 +    return reg->init_val;
   32.75 +}
   32.76 +
   32.77 +/* initialize Device Control 2 register */
   32.78 +static uint32_t pt_devctrl2_reg_init(struct pt_dev *ptdev,
   32.79 +        struct pt_reg_info_tbl *reg, uint32_t real_offset)
   32.80 +{
   32.81 +    uint8_t cap_ver = 0;
   32.82 +
   32.83 +    cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
   32.84 +        (uint8_t)PCI_EXP_FLAGS_VERS);
   32.85 +    
   32.86 +    /* no need to initialize in case of cap_ver 1.x */
   32.87 +    if (cap_ver == 1)
   32.88 +        return PT_INVALID_REG;
   32.89 +
   32.90 +    return reg->init_val;
   32.91 +}
   32.92 +
   32.93  /* initialize Link Control 2 register */
   32.94  static uint32_t pt_linkctrl2_reg_init(struct pt_dev *ptdev,
   32.95          struct pt_reg_info_tbl *reg, uint32_t real_offset)
   32.96  {
   32.97      int reg_field = 0;
   32.98 -
   32.99 +    uint8_t cap_ver = 0;
  32.100 +
  32.101 +    cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
  32.102 +        (uint8_t)PCI_EXP_FLAGS_VERS);
  32.103 +    
  32.104 +    /* no need to initialize in case of cap_ver 1.x */
  32.105 +    if (cap_ver == 1)
  32.106 +        return PT_INVALID_REG;
  32.107 +    
  32.108      /* set Supported Link Speed */
  32.109      reg_field |= 
  32.110          (0x0F & 
  32.111 @@ -2036,6 +2087,91 @@ static uint8_t pt_vendor_size_init(struc
  32.112      return ptdev->dev.config[base_offset + 0x02];
  32.113  }
  32.114  
  32.115 +/* get PCI Express Capability Structure register group size */
  32.116 +static uint8_t pt_pcie_size_init(struct pt_dev *ptdev,
  32.117 +        struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
  32.118 +{
  32.119 +    PCIDevice *d = &ptdev->dev;
  32.120 +    uint16_t exp_flag = 0;
  32.121 +    uint16_t type = 0;
  32.122 +    uint16_t vers = 0;
  32.123 +    uint8_t pcie_size = 0;
  32.124 +
  32.125 +    exp_flag = *((uint16_t*)(d->config + (base_offset + PCI_EXP_FLAGS)));
  32.126 +    type = (exp_flag & PCI_EXP_FLAGS_TYPE) >> 4;
  32.127 +    vers = (exp_flag & PCI_EXP_FLAGS_VERS);
  32.128 +
  32.129 +    /* calculate size depend on capability version and device/port type */
  32.130 +    /* in case of PCI Express Base Specification Rev 1.x */
  32.131 +    if (vers == 1)
  32.132 +    {
  32.133 +        /* The PCI Express Capabilities, Device Capabilities, and Device 
  32.134 +         * Status/Control registers are required for all PCI Express devices. 
  32.135 +         * The Link Capabilities and Link Status/Control are required for all 
  32.136 +         * Endpoints that are not Root Complex Integrated Endpoints. Endpoints 
  32.137 +         * are not required to implement registers other than those listed 
  32.138 +         * above and terminate the capability structure.
  32.139 +         */
  32.140 +        switch (type) {
  32.141 +        case PCI_EXP_TYPE_ENDPOINT:
  32.142 +        case PCI_EXP_TYPE_LEG_END:
  32.143 +            pcie_size = 0x14;
  32.144 +            break;
  32.145 +        case PCI_EXP_TYPE_ROOT_INT_EP:
  32.146 +            /* has no link */
  32.147 +            pcie_size = 0x0C;
  32.148 +            break;
  32.149 +        /* only EndPoint passthrough is supported */
  32.150 +        case PCI_EXP_TYPE_ROOT_PORT:
  32.151 +        case PCI_EXP_TYPE_UPSTREAM:
  32.152 +        case PCI_EXP_TYPE_DOWNSTREAM:
  32.153 +        case PCI_EXP_TYPE_PCI_BRIDGE:
  32.154 +        case PCI_EXP_TYPE_PCIE_BRIDGE:
  32.155 +        case PCI_EXP_TYPE_ROOT_EC:
  32.156 +        default:
  32.157 +            /* exit I/O emulator */
  32.158 +            PT_LOG("Internal error: Unsupported device/port type[%d]. "
  32.159 +                "I/O emulator exit.\n", type);
  32.160 +            exit(1);
  32.161 +        }
  32.162 +    }
  32.163 +    /* in case of PCI Express Base Specification Rev 2.0 */
  32.164 +    else if (vers == 2)
  32.165 +    {
  32.166 +        switch (type) {
  32.167 +        case PCI_EXP_TYPE_ENDPOINT:
  32.168 +        case PCI_EXP_TYPE_LEG_END:
  32.169 +        case PCI_EXP_TYPE_ROOT_INT_EP:
  32.170 +            /* For Functions that do not implement the registers, 
  32.171 +             * these spaces must be hardwired to 0b.
  32.172 +             */
  32.173 +            pcie_size = 0x3C;
  32.174 +            break;
  32.175 +        /* only EndPoint passthrough is supported */
  32.176 +        case PCI_EXP_TYPE_ROOT_PORT:
  32.177 +        case PCI_EXP_TYPE_UPSTREAM:
  32.178 +        case PCI_EXP_TYPE_DOWNSTREAM:
  32.179 +        case PCI_EXP_TYPE_PCI_BRIDGE:
  32.180 +        case PCI_EXP_TYPE_PCIE_BRIDGE:
  32.181 +        case PCI_EXP_TYPE_ROOT_EC:
  32.182 +        default:
  32.183 +            /* exit I/O emulator */
  32.184 +            PT_LOG("Internal error: Unsupported device/port type[%d]. "
  32.185 +                "I/O emulator exit.\n", type);
  32.186 +            exit(1);
  32.187 +        }
  32.188 +    }
  32.189 +    else
  32.190 +    {
  32.191 +        /* exit I/O emulator */
  32.192 +        PT_LOG("Internal error: Unsupported capability version[%d]. "
  32.193 +            "I/O emulator exit.\n", vers);
  32.194 +        exit(1);
  32.195 +    }
  32.196 +
  32.197 +    return pcie_size;
  32.198 +}
  32.199 +
  32.200  /* read byte size emulate register */
  32.201  static int pt_byte_reg_read(struct pt_dev *ptdev,
  32.202          struct pt_reg_tbl *cfg_entry,
    33.1 --- a/tools/ioemu/hw/pass-through.h	Fri Sep 12 14:32:45 2008 +0900
    33.2 +++ b/tools/ioemu/hw/pass-through.h	Fri Sep 12 14:47:40 2008 +0900
    33.3 @@ -62,6 +62,21 @@
    33.4  #define PCI_MSI_FLAGS_MASK_BIT  0x0100
    33.5  #endif
    33.6  
    33.7 +#ifndef PCI_EXP_TYPE_PCIE_BRIDGE
    33.8 +/* PCI/PCI-X to PCIE Bridge */
    33.9 +#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8
   33.10 +#endif
   33.11 +
   33.12 +#ifndef PCI_EXP_TYPE_ROOT_INT_EP
   33.13 +/* Root Complex Integrated Endpoint */
   33.14 +#define PCI_EXP_TYPE_ROOT_INT_EP 0x9
   33.15 +#endif
   33.16 +
   33.17 +#ifndef PCI_EXP_TYPE_ROOT_EC
   33.18 +/* Root Complex Event Collector */
   33.19 +#define PCI_EXP_TYPE_ROOT_EC     0xa
   33.20 +#endif
   33.21 +
   33.22  #define PT_INVALID_REG          0xFFFFFFFF      /* invalid register value */
   33.23  #define PT_BAR_ALLF             0xFFFFFFFF      /* BAR ALLF value */
   33.24  #define PT_BAR_MEM_RO_MASK      0x0000000F      /* BAR ReadOnly mask(Memory) */
    34.1 --- a/tools/ioemu/hw/pci.c	Fri Sep 12 14:32:45 2008 +0900
    34.2 +++ b/tools/ioemu/hw/pci.c	Fri Sep 12 14:47:40 2008 +0900
    34.3 @@ -45,7 +45,6 @@ struct PCIBus {
    34.4  static void pci_update_mappings(PCIDevice *d);
    34.5  
    34.6  target_phys_addr_t pci_mem_base;
    34.7 -static int pci_irq_index;
    34.8  static PCIBus *first_bus;
    34.9  
   34.10  PCIBus *pci_register_bus(pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
   34.11 @@ -114,9 +113,6 @@ PCIDevice *pci_register_device(PCIBus *b
   34.12  {
   34.13      PCIDevice *pci_dev;
   34.14  
   34.15 -    if (pci_irq_index >= PCI_DEVICES_MAX)
   34.16 -        return NULL;
   34.17 -    
   34.18      if (devfn < 0) {
   34.19          for(devfn = bus->devfn_min ; devfn < 256; devfn += 8) {
   34.20              if ( !bus->devices[devfn] &&
   34.21 @@ -140,7 +136,6 @@ PCIDevice *pci_register_device(PCIBus *b
   34.22          config_write = pci_default_write_config;
   34.23      pci_dev->config_read = config_read;
   34.24      pci_dev->config_write = config_write;
   34.25 -    pci_dev->irq_index = pci_irq_index++;
   34.26      bus->devices[devfn] = pci_dev;
   34.27      return pci_dev;
   34.28  }
    35.1 --- a/tools/ioemu/hw/pt-msi.c	Fri Sep 12 14:32:45 2008 +0900
    35.2 +++ b/tools/ioemu/hw/pt-msi.c	Fri Sep 12 14:47:40 2008 +0900
    35.3 @@ -313,7 +313,7 @@ int pt_msix_init(struct pt_dev *dev, int
    35.4  
    35.5      table_off = pci_read_long(pd, pos + PCI_MSIX_TABLE);
    35.6      bar_index = dev->msix->bar_index = table_off & PCI_MSIX_BIR;
    35.7 -    table_off &= table_off & ~PCI_MSIX_BIR;
    35.8 +    table_off = dev->msix->table_off = table_off & ~PCI_MSIX_BIR;
    35.9      dev->msix->table_base = dev->pci_dev->base_addr[bar_index];
   35.10      PT_LOG("get MSI-X table bar base %llx\n",
   35.11             (unsigned long long)dev->msix->table_base);
    36.1 --- a/tools/ioemu/hw/vga.c	Fri Sep 12 14:32:45 2008 +0900
    36.2 +++ b/tools/ioemu/hw/vga.c	Fri Sep 12 14:47:40 2008 +0900
    36.3 @@ -2080,7 +2080,13 @@ void xen_vga_vram_map(uint64_t vram_addr
    36.4  
    36.5      if (copy)
    36.6          memcpy(vram, xen_vga_state->vram_ptr, VGA_RAM_SIZE);
    36.7 -    qemu_free(xen_vga_state->vram_ptr);
    36.8 +    if (xen_vga_state->vram_mfns) {
    36.9 +        /* In case this function is called more than once */
   36.10 +        free(xen_vga_state->vram_mfns);
   36.11 +        munmap(xen_vga_state->vram_ptr, VGA_RAM_SIZE);
   36.12 +    } else {
   36.13 +        qemu_free(xen_vga_state->vram_ptr);
   36.14 +    }
   36.15      xen_vga_state->vram_ptr = vram;
   36.16      xen_vga_state->vram_mfns = pfn_list;
   36.17  #ifdef CONFIG_STUBDOM
    37.1 --- a/tools/ioemu/hw/xen_machine_fv.c	Fri Sep 12 14:32:45 2008 +0900
    37.2 +++ b/tools/ioemu/hw/xen_machine_fv.c	Fri Sep 12 14:47:40 2008 +0900
    37.3 @@ -139,8 +139,10 @@ uint8_t *qemu_map_cache(target_phys_addr
    37.4          !test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping))
    37.5          qemu_remap_bucket(entry, address_index);
    37.6  
    37.7 -    if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping))
    37.8 +    if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) {
    37.9 +        last_address_index = ~0UL;
   37.10          return NULL;
   37.11 +    }
   37.12  
   37.13      last_address_index = address_index;
   37.14      last_address_vaddr = entry->vaddr_base;
    38.1 --- a/tools/ioemu/vl.h	Fri Sep 12 14:32:45 2008 +0900
    38.2 +++ b/tools/ioemu/vl.h	Fri Sep 12 14:47:40 2008 +0900
    38.3 @@ -812,8 +812,6 @@ struct PCIDevice {
    38.4      /* do not access the following fields */
    38.5      PCIConfigReadFunc *config_read;
    38.6      PCIConfigWriteFunc *config_write;
    38.7 -    /* ??? This is a PC-specific hack, and should be removed.  */
    38.8 -    int irq_index;
    38.9  
   38.10      /* Current IRQ levels.  Used internally by the generic PCI code.  */
   38.11      int irq_state[4];
    39.1 --- a/tools/libxc/ia64/xc_ia64_linux_save.c	Fri Sep 12 14:32:45 2008 +0900
    39.2 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c	Fri Sep 12 14:47:40 2008 +0900
    39.3 @@ -53,12 +53,12 @@ static inline void set_bit(int nr, volat
    39.4  }
    39.5  
    39.6  static int
    39.7 -suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
    39.8 +suspend_and_state(int (*suspend)(void), int xc_handle, int io_fd,
    39.9                    int dom, xc_dominfo_t *info)
   39.10  {
   39.11      int i = 0;
   39.12  
   39.13 -    if (!(*suspend)(dom)) {
   39.14 +    if (!(*suspend)()) {
   39.15          ERROR("Suspend request failed");
   39.16          return -1;
   39.17      }
   39.18 @@ -406,7 +406,7 @@ out:
   39.19  
   39.20  int
   39.21  xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   39.22 -               uint32_t max_factor, uint32_t flags, int (*suspend)(int),
   39.23 +               uint32_t max_factor, uint32_t flags, int (*suspend)(void),
   39.24                 int hvm, void *(*init_qemu_maps)(int, unsigned),
   39.25                 void (*qemu_flip_buffer)(int, int))
   39.26  {
    40.1 --- a/tools/libxc/xc_domain_save.c	Fri Sep 12 14:32:45 2008 +0900
    40.2 +++ b/tools/libxc/xc_domain_save.c	Fri Sep 12 14:47:40 2008 +0900
    40.3 @@ -338,72 +338,23 @@ static int analysis_phase(int xc_handle,
    40.4  }
    40.5  
    40.6  
    40.7 -static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
    40.8 +static int suspend_and_state(int (*suspend)(void), int xc_handle, int io_fd,
    40.9                               int dom, xc_dominfo_t *info)
   40.10  {
   40.11 -    int i = 0;
   40.12 -
   40.13 -    if ( !(*suspend)(dom) )
   40.14 +    if ( !(*suspend)() )
   40.15      {
   40.16          ERROR("Suspend request failed");
   40.17          return -1;
   40.18      }
   40.19  
   40.20 - retry:
   40.21 -
   40.22 -    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
   40.23 +    if ( (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) ||
   40.24 +         !info->shutdown || (info->shutdown_reason != SHUTDOWN_suspend) )
   40.25      {
   40.26 -        ERROR("Could not get domain info");
   40.27 -        return -1;
   40.28 -    }
   40.29 -
   40.30 -    if ( info->dying )
   40.31 -    {
   40.32 -        ERROR("domain is dying");
   40.33 -        return -1;
   40.34 -    }
   40.35 -
   40.36 -    if ( info->crashed )
   40.37 -    {
   40.38 -        ERROR("domain has crashed");
   40.39 +        ERROR("Domain not in suspended state");
   40.40          return -1;
   40.41      }
   40.42  
   40.43 -    if ( info->shutdown )
   40.44 -    {
   40.45 -        switch ( info->shutdown_reason )
   40.46 -        {
   40.47 -        case SHUTDOWN_poweroff:
   40.48 -        case SHUTDOWN_reboot:
   40.49 -            ERROR("domain has shut down");
   40.50 -            return -1;
   40.51 -        case SHUTDOWN_suspend:
   40.52 -            return 0;
   40.53 -        case SHUTDOWN_crash:
   40.54 -            ERROR("domain has crashed");
   40.55 -            return -1;
   40.56 -        }
   40.57 -    }
   40.58 -
   40.59 -    if ( info->paused )
   40.60 -    {
   40.61 -        /* Try unpausing domain, wait, and retest. */
   40.62 -        xc_domain_unpause( xc_handle, dom );
   40.63 -        ERROR("Domain was paused. Wait and re-test.");
   40.64 -        usleep(10000); /* 10ms */
   40.65 -        goto retry;
   40.66 -    }
   40.67 -
   40.68 -    if ( ++i < 100 )
   40.69 -    {
   40.70 -        ERROR("Retry suspend domain");
   40.71 -        usleep(10000); /* 10ms */
   40.72 -        goto retry;
   40.73 -    }
   40.74 -
   40.75 -    ERROR("Unable to suspend domain.");
   40.76 -
   40.77 -    return -1;
   40.78 +    return 0;
   40.79  }
   40.80  
   40.81  /*
   40.82 @@ -796,7 +747,7 @@ static xen_pfn_t *map_and_save_p2m_table
   40.83  
   40.84  
   40.85  int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   40.86 -                   uint32_t max_factor, uint32_t flags, int (*suspend)(int),
   40.87 +                   uint32_t max_factor, uint32_t flags, int (*suspend)(void),
   40.88                     int hvm, void *(*init_qemu_maps)(int, unsigned), 
   40.89                     void (*qemu_flip_buffer)(int, int))
   40.90  {
    41.1 --- a/tools/libxc/xc_evtchn.c	Fri Sep 12 14:32:45 2008 +0900
    41.2 +++ b/tools/libxc/xc_evtchn.c	Fri Sep 12 14:47:40 2008 +0900
    41.3 @@ -59,17 +59,8 @@ int xc_evtchn_reset(int xc_handle,
    41.4      return do_evtchn_op(xc_handle, EVTCHNOP_reset, &arg, sizeof(arg), 0);
    41.5  }
    41.6  
    41.7 -int xc_evtchn_status(int xc_handle,
    41.8 -                     uint32_t dom,
    41.9 -                     uint32_t port)
   41.10 +int xc_evtchn_status(int xc_handle, xc_evtchn_status_t *status)
   41.11  {
   41.12 -    int rc;
   41.13 -    struct evtchn_status arg = { .dom = (domid_t)dom,
   41.14 -                                 .port = (evtchn_port_t)port };
   41.15 -
   41.16 -    rc = do_evtchn_op(xc_handle, EVTCHNOP_status, &arg, sizeof(arg), 1);
   41.17 -    if ( rc == 0 )
   41.18 -        rc = arg.status;
   41.19 -
   41.20 -    return rc;
   41.21 +    return do_evtchn_op(xc_handle, EVTCHNOP_status, status,
   41.22 +                        sizeof(*status), 1);
   41.23  }
    42.1 --- a/tools/libxc/xc_private.c	Fri Sep 12 14:32:45 2008 +0900
    42.2 +++ b/tools/libxc/xc_private.c	Fri Sep 12 14:47:40 2008 +0900
    42.3 @@ -307,6 +307,13 @@ int xc_memory_op(int xc_handle,
    42.4              goto out1;
    42.5          }
    42.6          break;
    42.7 +    case XENMEM_remove_from_physmap:
    42.8 +        if ( lock_pages(arg, sizeof(struct xen_remove_from_physmap)) )
    42.9 +        {
   42.10 +            PERROR("Could not lock");
   42.11 +            goto out1;
   42.12 +        }
   42.13 +        break;
   42.14      case XENMEM_current_reservation:
   42.15      case XENMEM_maximum_reservation:
   42.16      case XENMEM_maximum_gpfn:
   42.17 @@ -340,6 +347,9 @@ int xc_memory_op(int xc_handle,
   42.18      case XENMEM_add_to_physmap:
   42.19          unlock_pages(arg, sizeof(struct xen_add_to_physmap));
   42.20          break;
   42.21 +    case XENMEM_remove_from_physmap:
   42.22 +        unlock_pages(arg, sizeof(struct xen_remove_from_physmap));
   42.23 +        break;
   42.24      case XENMEM_current_reservation:
   42.25      case XENMEM_maximum_reservation:
   42.26      case XENMEM_maximum_gpfn:
    43.1 --- a/tools/libxc/xenctrl.h	Fri Sep 12 14:32:45 2008 +0900
    43.2 +++ b/tools/libxc/xenctrl.h	Fri Sep 12 14:47:40 2008 +0900
    43.3 @@ -502,9 +502,9 @@ xc_evtchn_alloc_unbound(int xc_handle,
    43.4  
    43.5  int xc_evtchn_reset(int xc_handle,
    43.6                      uint32_t dom);
    43.7 -int xc_evtchn_status(int xc_handle,
    43.8 -                     uint32_t dom,
    43.9 -                     uint32_t port);
   43.10 +
   43.11 +typedef struct evtchn_status xc_evtchn_status_t;
   43.12 +int xc_evtchn_status(int xc_handle, xc_evtchn_status_t *status);
   43.13  
   43.14  /*
   43.15   * Return a handle to the event channel driver, or -1 on failure, in which case
    44.1 --- a/tools/libxc/xenguest.h	Fri Sep 12 14:32:45 2008 +0900
    44.2 +++ b/tools/libxc/xenguest.h	Fri Sep 12 14:47:40 2008 +0900
    44.3 @@ -25,7 +25,7 @@
    44.4   */
    44.5  int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
    44.6                     uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
    44.7 -                   int (*suspend)(int domid), int hvm,
    44.8 +                   int (*suspend)(void), int hvm,
    44.9                     void *(*init_qemu_maps)(int, unsigned),  /* HVM only */
   44.10                     void (*qemu_flip_buffer)(int, int));     /* HVM only */
   44.11  
    45.1 --- a/tools/python/Makefile	Fri Sep 12 14:32:45 2008 +0900
    45.2 +++ b/tools/python/Makefile	Fri Sep 12 14:47:40 2008 +0900
    45.3 @@ -1,14 +1,6 @@
    45.4  XEN_ROOT = ../..
    45.5  include $(XEN_ROOT)/tools/Rules.mk
    45.6  
    45.7 -XEN_SECURITY_MODULE = dummy
    45.8 -ifeq ($(FLASK_ENABLE),y)
    45.9 -XEN_SECURITY_MODULE = flask
   45.10 -endif
   45.11 -ifeq ($(ACM_SECURITY),y)
   45.12 -XEN_SECURITY_MODULE = acm
   45.13 -endif
   45.14 -
   45.15  .PHONY: all
   45.16  all: build
   45.17  
   45.18 @@ -23,8 +15,8 @@ CATALOGS = $(patsubst %,xen/xm/messages/
   45.19  NLSDIR = /usr/share/locale
   45.20  
   45.21  .PHONY: build buildpy
   45.22 -buildpy: xsm.py
   45.23 -	CC="$(CC)" CFLAGS="$(CFLAGS)" XEN_SECURITY_MODULE="$(XEN_SECURITY_MODULE)" python setup.py build
   45.24 +buildpy: 
   45.25 +	CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build
   45.26  
   45.27  build: buildpy refresh-pot refresh-po $(CATALOGS)
   45.28  
   45.29 @@ -61,18 +53,6 @@ refresh-po: $(POTFILE)
   45.30  %.mo: %.po
   45.31  	$(MSGFMT) -c -o $@ $<
   45.32  
   45.33 -xsm.py:
   45.34 -	@(set -e; \
   45.35 -	  echo "XEN_SECURITY_MODULE = \""$(XEN_SECURITY_MODULE)"\""; \
   45.36 -	  echo "from xsm_core import *"; \
   45.37 -	  echo ""; \
   45.38 -	  echo "import xen.util.xsm."$(XEN_SECURITY_MODULE)"."$(XEN_SECURITY_MODULE)" as xsm_module"; \
   45.39 -	  echo ""; \
   45.40 -	  echo "xsm_init(xsm_module)"; \
   45.41 -	  echo "from xen.util.xsm."$(XEN_SECURITY_MODULE)"."$(XEN_SECURITY_MODULE)" import *"; \
   45.42 -	  echo "del xsm_module"; \
   45.43 -	  echo "") >xen/util/xsm/$@
   45.44 -
   45.45  .PHONY: install
   45.46  ifndef XEN_PYTHON_NATIVE_INSTALL
   45.47  install: LIBPATH=$(shell PYTHONPATH=xen/util python -c "import auxbin; print auxbin.libpath()")
   45.48 @@ -104,4 +84,4 @@ test:
   45.49  
   45.50  .PHONY: clean
   45.51  clean:
   45.52 -	rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/xsm/xsm.py xen/util/auxbin.pyc
   45.53 +	rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/auxbin.pyc
    46.1 --- a/tools/python/xen/util/xsconstants.py	Fri Sep 12 14:32:45 2008 +0900
    46.2 +++ b/tools/python/xen/util/xsconstants.py	Fri Sep 12 14:47:40 2008 +0900
    46.3 @@ -20,8 +20,10 @@ XS_INST_NONE = 0
    46.4  XS_INST_BOOT = (1 << 0)
    46.5  XS_INST_LOAD = (1 << 1)
    46.6  
    46.7 -XS_POLICY_NONE  = 0
    46.8  XS_POLICY_ACM = (1 << 0)
    46.9 +XS_POLICY_FLASK = (1 << 1)
   46.10 +XS_POLICY_DUMMY  = (1 << 2)
   46.11 +XS_POLICY_USE = 0
   46.12  
   46.13  # Some internal variables used by the Xen-API
   46.14  ACM_LABEL_VM  = (1 << 0)
   46.15 @@ -107,6 +109,6 @@ ACM_POLICY_ID = 'ACM'
   46.16  
   46.17  INVALID_POLICY_PREFIX = 'INV_'
   46.18  
   46.19 -INVALID_SSIDREF = 0xFFFFFFFF
   46.20 +INVALID_SSIDREF = 0xFFFFFFFFL
   46.21  
   46.22  XS_INACCESSIBLE_LABEL = '__INACCESSIBLE__'
    48.1 --- a/tools/python/xen/util/xsm/flask/flask.py	Fri Sep 12 14:32:45 2008 +0900
    48.2 +++ b/tools/python/xen/util/xsm/flask/flask.py	Fri Sep 12 14:47:40 2008 +0900
    48.3 @@ -1,5 +1,6 @@
    48.4  import sys
    48.5  from xen.lowlevel import flask
    48.6 +from xen.util import xsconstants
    48.7  from xen.xend import sxp
    48.8  
    48.9  #Functions exported through XML-RPC
   48.10 @@ -12,7 +13,7 @@ def err(msg):
   48.11      raise XSMError(msg)
   48.12  
   48.13  def on():
   48.14 -    return 0 #xsconstants.XS_POLICY_FLASK
   48.15 +    return xsconstants.XS_POLICY_FLASK
   48.16  
   48.17  def ssidref2label(ssidref):
   48.18      try:
   48.19 @@ -37,8 +38,9 @@ def set_security_label(policy, label):
   48.20      return label
   48.21  
   48.22  def ssidref2security_label(ssidref):
   48.23 -    return ssidref2label(ssidref)
   48.24 +    label = ssidref2label(ssidref)
   48.25 +    return label
   48.26  
   48.27  def get_security_label(self, xspol=None):
   48.28 -    label = self.info.get('security_label', '')
   48.29 +    label = self.info['security_label']
   48.30      return label
    49.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.2 +++ b/tools/python/xen/util/xsm/xsm.py	Fri Sep 12 14:47:40 2008 +0900
    49.3 @@ -0,0 +1,20 @@
    49.4 +import sys
    49.5 +import string
    49.6 +from xen.xend import XendOptions
    49.7 +from xen.util import xsconstants
    49.8 +from xsm_core import xsm_init
    49.9 +
   49.10 +xoptions = XendOptions.instance()
   49.11 +xsm_module_name = xoptions.get_xsm_module_name()
   49.12 +
   49.13 +xsconstants.XS_POLICY_USE = eval("xsconstants.XS_POLICY_" +
   49.14 +                                 string.upper(xsm_module_name))
   49.15 +
   49.16 +xsm_module_path = "xen.util.xsm." + xsm_module_name + "." + xsm_module_name
   49.17 +xsm_module = __import__(xsm_module_path, globals(), locals(), ['*'])
   49.18 +
   49.19 +xsm_init(xsm_module)
   49.20 +
   49.21 +for op in dir(xsm_module):
   49.22 +    if not hasattr(sys.modules[__name__], op):
   49.23 +        setattr(sys.modules[__name__], op, getattr(xsm_module, op, None))
    50.1 --- a/tools/python/xen/xend/XendConfig.py	Fri Sep 12 14:32:45 2008 +0900
    50.2 +++ b/tools/python/xen/xend/XendConfig.py	Fri Sep 12 14:47:40 2008 +0900
    50.3 @@ -729,7 +729,7 @@ class XendConfig(dict):
    50.4              self.parse_cpuid(cfg, 'cpuid_check')
    50.5  
    50.6          import xen.util.xsm.xsm as security
    50.7 -        if security.on() == xsconstants.XS_POLICY_ACM:
    50.8 +        if security.on() == xsconstants.XS_POLICY_USE:
    50.9              from xen.util.acmpolicy import ACM_LABEL_UNLABELED
   50.10              if not 'security' in cfg and sxp.child_value(sxp_cfg, 'security'):
   50.11                  cfg['security'] = sxp.child_value(sxp_cfg, 'security')
    52.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Fri Sep 12 14:32:45 2008 +0900
    52.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Fri Sep 12 14:47:40 2008 +0900
    52.3 @@ -2069,7 +2069,7 @@ class XendDomainInfo:
    52.4          balloon.free(2*1024) # 2MB should be plenty
    52.5  
    52.6          ssidref = 0
    52.7 -        if security.on() == xsconstants.XS_POLICY_ACM:
    52.8 +        if security.on() == xsconstants.XS_POLICY_USE:
    52.9              ssidref = security.calc_dom_ssidref_from_info(self.info)
   52.10              if security.has_authorization(ssidref) == False:
   52.11                  raise VmError("VM is not authorized to run.")
   52.12 @@ -2855,10 +2855,6 @@ class XendDomainInfo:
   52.13              info["maxmem_kb"] = XendNode.instance() \
   52.14                                  .physinfo_dict()['total_memory'] * 1024
   52.15  
   52.16 -        #ssidref field not used any longer
   52.17 -        if 'ssidref' in info:
   52.18 -            info.pop('ssidref')
   52.19 -
   52.20          # make sure state is reset for info
   52.21          # TODO: we should eventually get rid of old_dom_states
   52.22  
    53.1 --- a/tools/python/xen/xend/XendOptions.py	Fri Sep 12 14:32:45 2008 +0900
    53.2 +++ b/tools/python/xen/xend/XendOptions.py	Fri Sep 12 14:47:40 2008 +0900
    53.3 @@ -132,6 +132,9 @@ class XendOptions:
    53.4      """Default script to configure a backend network interface"""
    53.5      vif_script = osdep.vif_script
    53.6  
    53.7 +    """Default Xen Security Module"""
    53.8 +    xsm_module_default = 'dummy'
    53.9 +
   53.10      """Default rotation count of qemu-dm log file."""
   53.11      qemu_dm_logrotate_count = 10
   53.12  
   53.13 @@ -427,6 +430,11 @@ class XendOptionsFile(XendOptions):
   53.14          return self.get_config_value('xen-api-server',
   53.15                                       self.xen_api_server_default)
   53.16  
   53.17 +    def get_xsm_module_name(self):
   53.18 +        """Get the Xen Security Module name.
   53.19 +        """
   53.20 +        return self.get_config_string('xsm_module_name', self.xsm_module_default)
   53.21 +
   53.22  if os.uname()[0] == 'SunOS':
   53.23      class XendOptionsSMF(XendOptions):
   53.24  
    54.1 --- a/tools/python/xen/xend/server/blkif.py	Fri Sep 12 14:32:45 2008 +0900
    54.2 +++ b/tools/python/xen/xend/server/blkif.py	Fri Sep 12 14:47:40 2008 +0900
    54.3 @@ -78,7 +78,7 @@ class BlkifController(DevController):
    54.4          if uuid:
    54.5              back['uuid'] = uuid
    54.6  
    54.7 -        if security.on() == xsconstants.XS_POLICY_ACM:
    54.8 +        if security.on() == xsconstants.XS_POLICY_USE:
    54.9              self.do_access_control(config, uname)
   54.10  
   54.11          (device_path, devid) = blkif.blkdev_name_to_number(dev)
    55.1 --- a/tools/python/xen/xend/server/netif.py	Fri Sep 12 14:32:45 2008 +0900
    55.2 +++ b/tools/python/xen/xend/server/netif.py	Fri Sep 12 14:47:40 2008 +0900
    55.3 @@ -156,7 +156,7 @@ class NetifController(DevController):
    55.4              front = { 'handle' : "%i" % devid,
    55.5                        'mac'    : mac }
    55.6  
    55.7 -        if security.on() == xsconstants.XS_POLICY_ACM:
    55.8 +        if security.on() == xsconstants.XS_POLICY_USE:
    55.9              self.do_access_control(config)
   55.10  
   55.11          return (devid, back, front)
    56.1 --- a/tools/python/xen/xend/server/pciif.py	Fri Sep 12 14:32:45 2008 +0900
    56.2 +++ b/tools/python/xen/xend/server/pciif.py	Fri Sep 12 14:47:40 2008 +0900
    56.3 @@ -286,7 +286,7 @@ class PciController(DevController):
    56.4                      )%(dev.name))
    56.5  
    56.6          if dev.has_non_page_aligned_bar and arch.type != "ia64":
    56.7 -            raise VmError("pci: %: non-page-aligned MMIO BAR found." % dev.name)
    56.8 +            raise VmError("pci: %s: non-page-aligned MMIO BAR found." % dev.name)
    56.9  
   56.10          self.CheckSiblingDevices(fe_domid, dev)
   56.11  
    57.1 --- a/tools/python/xen/xm/create.py	Fri Sep 12 14:32:45 2008 +0900
    57.2 +++ b/tools/python/xen/xm/create.py	Fri Sep 12 14:47:40 2008 +0900
    57.3 @@ -566,11 +566,11 @@ gopts.var('hap', val='HAP',
    57.4            use="""Hap status (0=hap is disabled;
    57.5            1=hap is enabled.""")
    57.6  
    57.7 -gopts.var('cpuid', val="IN[,SIN]:eax=EAX,ebx=EBX,exc=ECX,edx=EDX",
    57.8 +gopts.var('cpuid', val="IN[,SIN]:eax=EAX,ebx=EBX,ecx=ECX,edx=EDX",
    57.9            fn=append_value, default=[],
   57.10            use="""Cpuid description.""")
   57.11  
   57.12 -gopts.var('cpuid_check', val="IN[,SIN]:eax=EAX,ebx=EBX,exc=ECX,edx=EDX",
   57.13 +gopts.var('cpuid_check', val="IN[,SIN]:eax=EAX,ebx=EBX,ecx=ECX,edx=EDX",
   57.14            fn=append_value, default=[],
   57.15            use="""Cpuid check description.""")
   57.16  
   57.17 @@ -971,7 +971,7 @@ def preprocess_cpuid(vals, attr_name):
   57.18                          "of the register %s for input %s\n"
   57.19                          % (res['reg'], input) )
   57.20                  cpuid[input][res['reg']] = res['val'] # new register
   57.21 -    setattr(vals, attr_name, cpuid)
   57.22 +            setattr(vals, attr_name, cpuid)
   57.23  
   57.24  def preprocess_pci(vals):
   57.25      if not vals.pci: return
    58.1 --- a/tools/python/xen/xm/main.py	Fri Sep 12 14:32:45 2008 +0900
    58.2 +++ b/tools/python/xen/xm/main.py	Fri Sep 12 14:47:40 2008 +0900
    58.3 @@ -1812,7 +1812,7 @@ def domain_name_to_domid(domain_name):
    58.4      else:
    58.5          dom = server.xend.domain(domain_name)
    58.6          domid = int(sxp.child_value(dom, 'domid', '-1'))
    58.7 -    return domid
    58.8 +    return int(domid)
    58.9  
   58.10  def xm_vncviewer(args):
   58.11      autopass = False;
    59.1 --- a/tools/xcutils/lsevtchn.c	Fri Sep 12 14:32:45 2008 +0900
    59.2 +++ b/tools/xcutils/lsevtchn.c	Fri Sep 12 14:47:40 2008 +0900
    59.3 @@ -8,49 +8,55 @@
    59.4  #include <xenctrl.h>
    59.5  #include <xenguest.h>
    59.6  
    59.7 -int
    59.8 -main(int argc, char **argv)
    59.9 +int main(int argc, char **argv)
   59.10  {
   59.11 -    int xc_fd;
   59.12 -    int domid = 0, port = 0, status;
   59.13 -    const char *msg;
   59.14 +    int xc_fd, domid, port, rc;
   59.15 +    xc_evtchn_status_t status;
   59.16  
   59.17 -    if ( argc > 1 )
   59.18 -        domid = strtol(argv[1], NULL, 10);
   59.19 +    domid = (argc > 1) ? strtol(argv[1], NULL, 10) : 0;
   59.20  
   59.21      xc_fd = xc_interface_open();
   59.22      if ( xc_fd < 0 )
   59.23          errx(1, "failed to open control interface");
   59.24  
   59.25 -    while ( (status = xc_evtchn_status(xc_fd, domid, port)) >= 0 )
   59.26 +    for ( port = 0; ; port++ )
   59.27      {
   59.28 -        switch ( status )
   59.29 +        status.dom = domid;
   59.30 +        status.port = port;
   59.31 +        rc = xc_evtchn_status(xc_fd, &status);
   59.32 +        if ( rc < 0 )
   59.33 +            break;
   59.34 +
   59.35 +        if ( status.status == EVTCHNSTAT_closed )
   59.36 +            continue;
   59.37 +
   59.38 +        printf("%4d: VCPU %u: ", port, status.vcpu);
   59.39 +
   59.40 +        switch ( status.status )
   59.41          {
   59.42 -        case EVTCHNSTAT_closed:
   59.43 -            msg = "Channel is not in use.";
   59.44 -            break;
   59.45          case EVTCHNSTAT_unbound:
   59.46 -            msg = "Channel is waiting interdom connection.";
   59.47 +            printf("Interdomain (Waiting connection) - Remote Domain %u",
   59.48 +                   status.u.unbound.dom);
   59.49              break;
   59.50          case EVTCHNSTAT_interdomain:
   59.51 -            msg = "Channel is connected to remote domain.";
   59.52 +            printf("Interdomain (Connected) - Remote Domain %u, Port %u",
   59.53 +                   status.u.interdomain.dom, status.u.interdomain.port);
   59.54              break;
   59.55          case EVTCHNSTAT_pirq:
   59.56 -            msg = "Channel is bound to a phys IRQ line.";
   59.57 +            printf("Physical IRQ %u", status.u.pirq);
   59.58              break;
   59.59          case EVTCHNSTAT_virq:
   59.60 -            msg = "Channel is bound to a virtual IRQ line.";
   59.61 +            printf("Virtual IRQ %u", status.u.virq);
   59.62              break;
   59.63          case EVTCHNSTAT_ipi:
   59.64 -            msg = "Channel is bound to a virtual IPI line.";
   59.65 +            printf("IPI");
   59.66              break;
   59.67          default:
   59.68 -            msg = "Unknown.";
   59.69 +            printf("Unknown");
   59.70              break;
   59.71 +        }
   59.72  
   59.73 -        }
   59.74 -        printf("%03d: %d: %s\n", port, status, msg);
   59.75 -        port++;
   59.76 +        printf("\n");
   59.77      }
   59.78  
   59.79      xc_interface_close(xc_fd);
    60.1 --- a/tools/xcutils/xc_save.c	Fri Sep 12 14:32:45 2008 +0900
    60.2 +++ b/tools/xcutils/xc_save.c	Fri Sep 12 14:47:40 2008 +0900
    60.3 @@ -32,7 +32,7 @@ static struct suspendinfo {
    60.4   * Issue a suspend request through stdout, and receive the acknowledgement
    60.5   * from stdin.  This is handled by XendCheckpoint in the Python layer.
    60.6   */
    60.7 -static int compat_suspend(int domid)
    60.8 +static int compat_suspend(void)
    60.9  {
   60.10      char ans[30];
   60.11  
   60.12 @@ -43,20 +43,39 @@ static int compat_suspend(int domid)
   60.13              !strncmp(ans, "done\n", 5));
   60.14  }
   60.15  
   60.16 -static int suspend_evtchn_release(int xc, int domid)
   60.17 +static int suspend_evtchn_release(void)
   60.18  {
   60.19      if (si.suspend_evtchn >= 0) {
   60.20 -	xc_evtchn_unbind(si.xce, si.suspend_evtchn);
   60.21 -	si.suspend_evtchn = -1;
   60.22 +        xc_evtchn_unbind(si.xce, si.suspend_evtchn);
   60.23 +        si.suspend_evtchn = -1;
   60.24      }
   60.25      if (si.xce >= 0) {
   60.26 -	xc_evtchn_close(si.xce);
   60.27 -	si.xce = -1;
   60.28 +        xc_evtchn_close(si.xce);
   60.29 +        si.xce = -1;
   60.30      }
   60.31  
   60.32      return 0;
   60.33  }
   60.34  
   60.35 +static int await_suspend(void)
   60.36 +{
   60.37 +    int rc;
   60.38 +
   60.39 +    do {
   60.40 +        rc = xc_evtchn_pending(si.xce);
   60.41 +        if (rc < 0) {
   60.42 +            warnx("error polling suspend notification channel: %d", rc);
   60.43 +            return -1;
   60.44 +        }
   60.45 +    } while (rc != si.suspend_evtchn);
   60.46 +
   60.47 +    /* harmless for one-off suspend */
   60.48 +    if (xc_evtchn_unmask(si.xce, si.suspend_evtchn) < 0)
   60.49 +        warnx("failed to unmask suspend notification channel: %d", rc);
   60.50 +
   60.51 +    return 0;
   60.52 +}
   60.53 +
   60.54  static int suspend_evtchn_init(int xc, int domid)
   60.55  {
   60.56      struct xs_handle *xs;
   60.57 @@ -71,16 +90,16 @@ static int suspend_evtchn_init(int xc, i
   60.58  
   60.59      xs = xs_daemon_open();
   60.60      if (!xs) {
   60.61 -	errx(1, "failed to get xenstore handle");
   60.62 -	return -1;
   60.63 +        warnx("failed to get xenstore handle");
   60.64 +        return -1;
   60.65      }
   60.66      sprintf(path, "/local/domain/%d/device/suspend/event-channel", domid);
   60.67      portstr = xs_read(xs, XBT_NULL, path, &plen);
   60.68      xs_daemon_close(xs);
   60.69  
   60.70      if (!portstr || !plen) {
   60.71 -	warnx("could not read suspend event channel");
   60.72 -	return -1;
   60.73 +        warnx("could not read suspend event channel");
   60.74 +        return -1;
   60.75      }
   60.76  
   60.77      port = atoi(portstr);
   60.78 @@ -88,27 +107,29 @@ static int suspend_evtchn_init(int xc, i
   60.79  
   60.80      si.xce = xc_evtchn_open();
   60.81      if (si.xce < 0) {
   60.82 -	errx(1, "failed to open event channel handle");
   60.83 -	goto cleanup;
   60.84 +        warnx("failed to open event channel handle");
   60.85 +        goto cleanup;
   60.86      }
   60.87  
   60.88      si.suspend_evtchn = xc_evtchn_bind_interdomain(si.xce, domid, port);
   60.89      if (si.suspend_evtchn < 0) {
   60.90 -	errx(1, "failed to bind suspend event channel: %d",
   60.91 -	     si.suspend_evtchn);
   60.92 -	goto cleanup;
   60.93 +        warnx("failed to bind suspend event channel: %d", si.suspend_evtchn);
   60.94 +        goto cleanup;
   60.95      }
   60.96  
   60.97      rc = xc_domain_subscribe_for_suspend(xc, domid, port);
   60.98      if (rc < 0) {
   60.99 -	errx(1, "failed to subscribe to domain: %d", rc);
  60.100 -	goto cleanup;
  60.101 +        warnx("failed to subscribe to domain: %d", rc);
  60.102 +        goto cleanup;
  60.103      }
  60.104  
  60.105 +    /* event channel is pending immediately after binding */
  60.106 +    await_suspend();
  60.107 +
  60.108      return 0;
  60.109  
  60.110    cleanup:
  60.111 -    suspend_evtchn_release(xc, domid);
  60.112 +    suspend_evtchn_release();
  60.113  
  60.114      return -1;
  60.115  }
  60.116 @@ -116,29 +137,20 @@ static int suspend_evtchn_init(int xc, i
  60.117  /**
  60.118   * Issue a suspend request to a dedicated event channel in the guest, and
  60.119   * receive the acknowledgement from the subscribe event channel. */
  60.120 -static int evtchn_suspend(int domid)
  60.121 +static int evtchn_suspend(void)
  60.122  {
  60.123 -    int xcefd;
  60.124      int rc;
  60.125  
  60.126      rc = xc_evtchn_notify(si.xce, si.suspend_evtchn);
  60.127      if (rc < 0) {
  60.128 -	errx(1, "failed to notify suspend request channel: %d", rc);
  60.129 -	return 0;
  60.130 +        warnx("failed to notify suspend request channel: %d", rc);
  60.131 +        return 0;
  60.132      }
  60.133  
  60.134 -    xcefd = xc_evtchn_fd(si.xce);
  60.135 -    do {
  60.136 -      rc = xc_evtchn_pending(si.xce);
  60.137 -      if (rc < 0) {
  60.138 -	errx(1, "error polling suspend notification channel: %d", rc);
  60.139 -	return 0;
  60.140 -      }
  60.141 -    } while (rc != si.suspend_evtchn);
  60.142 -
  60.143 -    /* harmless for one-off suspend */
  60.144 -    if (xc_evtchn_unmask(si.xce, si.suspend_evtchn) < 0)
  60.145 -	errx(1, "failed to unmask suspend notification channel: %d", rc);
  60.146 +    if (await_suspend() < 0) {
  60.147 +        warnx("suspend failed");
  60.148 +        return 0;
  60.149 +    }
  60.150  
  60.151      /* notify xend that it can do device migration */
  60.152      printf("suspended\n");
  60.153 @@ -147,12 +159,12 @@ static int evtchn_suspend(int domid)
  60.154      return 1;
  60.155  }
  60.156  
  60.157 -static int suspend(int domid)
  60.158 +static int suspend(void)
  60.159  {
  60.160      if (si.suspend_evtchn >= 0)
  60.161 -	return evtchn_suspend(domid);
  60.162 +        return evtchn_suspend();
  60.163  
  60.164 -    return compat_suspend(domid);
  60.165 +    return compat_suspend();
  60.166  }
  60.167  
  60.168  /* For HVM guests, there are two sources of dirty pages: the Xen shadow
  60.169 @@ -195,11 +207,9 @@ static void qemu_flip_buffer(int domid, 
  60.170  
  60.171      /* Tell qemu that we want it to start writing log-dirty bits to the
  60.172       * other buffer */
  60.173 -    if (!xs_write(xs, XBT_NULL, qemu_next_active_path, &digit, 1)) {
  60.174 +    if (!xs_write(xs, XBT_NULL, qemu_next_active_path, &digit, 1))
  60.175          errx(1, "can't write next-active to store path (%s)\n", 
  60.176 -              qemu_next_active_path);
  60.177 -        exit(1);
  60.178 -    }
  60.179 +             qemu_next_active_path);
  60.180  
  60.181      /* Wait a while for qemu to signal that it has switched to the new 
  60.182       * active buffer */
  60.183 @@ -208,10 +218,8 @@ static void qemu_flip_buffer(int domid, 
  60.184      tv.tv_usec = 0;
  60.185      FD_ZERO(&fdset);
  60.186      FD_SET(xs_fileno(xs), &fdset);
  60.187 -    if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) {
  60.188 +    if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1)
  60.189          errx(1, "timed out waiting for qemu to switch buffers\n");
  60.190 -        exit(1);
  60.191 -    }
  60.192      watch = xs_read_watch(xs, &len);
  60.193      free(watch);
  60.194      
  60.195 @@ -221,7 +229,7 @@ static void qemu_flip_buffer(int domid, 
  60.196          goto read_again;
  60.197  }
  60.198  
  60.199 -static void * init_qemu_maps(int domid, unsigned int bitmap_size)
  60.200 +static void *init_qemu_maps(int domid, unsigned int bitmap_size)
  60.201  {
  60.202      key_t key;
  60.203      char key_ascii[17] = {0,};
  60.204 @@ -293,7 +301,7 @@ main(int argc, char **argv)
  60.205      int ret;
  60.206  
  60.207      if (argc != 6)
  60.208 -	errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
  60.209 +        errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
  60.210  
  60.211      xc_fd = xc_interface_open();
  60.212      if (xc_fd < 0)
  60.213 @@ -305,13 +313,14 @@ main(int argc, char **argv)
  60.214      max_f = atoi(argv[4]);
  60.215      flags = atoi(argv[5]);
  60.216  
  60.217 -    suspend_evtchn_init(xc_fd, domid);
  60.218 +    if (suspend_evtchn_init(xc_fd, domid) < 0)
  60.219 +        warnx("suspend event channel initialization failed, using slow path");
  60.220  
  60.221      ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
  60.222                           &suspend, !!(flags & XCFLAGS_HVM),
  60.223                           &init_qemu_maps, &qemu_flip_buffer);
  60.224  
  60.225 -    suspend_evtchn_release(xc_fd, domid);
  60.226 +    suspend_evtchn_release();
  60.227  
  60.228      xc_interface_close(xc_fd);
  60.229  
    61.1 --- a/tools/xenstore/xs.c	Fri Sep 12 14:32:45 2008 +0900
    61.2 +++ b/tools/xenstore/xs.c	Fri Sep 12 14:47:40 2008 +0900
    61.3 @@ -795,8 +795,11 @@ char *xs_get_domain_path(struct xs_handl
    61.4  
    61.5  bool xs_is_domain_introduced(struct xs_handle *h, unsigned int domid)
    61.6  {
    61.7 -	return strcmp("F",
    61.8 -		      single_with_domid(h, XS_IS_DOMAIN_INTRODUCED, domid));
    61.9 +	char *domain = single_with_domid(h, XS_IS_DOMAIN_INTRODUCED, domid);
   61.10 +	int rc = strcmp("F", domain);
   61.11 +
   61.12 +	free(domain);
   61.13 +	return rc;
   61.14  }
   61.15  
   61.16  /* Only useful for DEBUG versions */
    62.1 --- a/tools/xentrace/formats	Fri Sep 12 14:32:45 2008 +0900
    62.2 +++ b/tools/xentrace/formats	Fri Sep 12 14:47:40 2008 +0900
    62.3 @@ -4,56 +4,69 @@ 0x0001f001  CPU%(cpu)d  %(tsc)d (+%(relt
    62.4  0x0001f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  wrap_buffer       0x%(1)08x
    62.5  0x0001f003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  cpu_change        0x%(1)08x
    62.6  
    62.7 -0x0002f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_add_domain  [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
    62.8 -0x0002f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_rem_domain  [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
    62.9 -0x0002f003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_sleep      [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   62.10 -0x0002f004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_wake       [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   62.11 -0x0002f005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_yield          [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   62.12 -0x0002f006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_block          [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   62.13 -0x0002f007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_shutdown	  [ domid = 0x%(1)08x, edomid = 0x%(2)08x, reason = 0x%(3)08x ]
   62.14 -0x0002f008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_ctl
   62.15 -0x0002f009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_adjdom      [ domid = 0x%(1)08x ]
   62.16 -0x0002f00a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  __enter_scheduler [ prev<domid:edomid> = 0x%(1)08x : 0x%(2)08x, next<domid:edomid> = 0x%(3)08x : 0x%(4)08x ]
   62.17 -0x0002f00B  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  s_timer_fn
   62.18 -0x0002f00c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  t_timer_fn
   62.19 -0x0002f00d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  dom_timer_fn
   62.20 -0x0002f00e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infprev    [ old_domid = 0x%(1)08x, runtime = %(2)d ]
   62.21 -0x0002f00f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infnext    [ new_domid = 0x%(1)08x, time = %(2)d, r_time = %(3)d ]
   62.22 +0x00021011  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  running_to_runnable [ dom:vcpu = 0x%(1)08x ]
   62.23 +0x00021021  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  running_to_blocked  [ dom:vcpu = 0x%(1)08x ]
   62.24 +0x00021031  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  running_to_offline  [ dom:vcpu = 0x%(1)08x ]
   62.25 +0x00021101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  runnable_to_running [ dom:vcpu = 0x%(1)08x ]
   62.26 +0x00021121  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  runnable_to_blocked [ dom:vcpu = 0x%(1)08x ]
   62.27 +0x00021131  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  runnable_to_offline [ dom:vcpu = 0x%(1)08x ]
   62.28 +0x00021201  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  blocked_to_running  [ dom:vcpu = 0x%(1)08x ]
   62.29 +0x00021211  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  blocked_to_runnable [ dom:vcpu = 0x%(1)08x ]
   62.30 +0x00021231  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  blocked_to_offline  [ dom:vcpu = 0x%(1)08x ]
   62.31 +0x00021301  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  offline_to_running  [ dom:vcpu = 0x%(1)08x ]
   62.32 +0x00021311  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  offline_to_runnable [ dom:vcpu = 0x%(1)08x ]
   62.33 +0x00021321  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  offline_to_blocked  [ dom:vcpu = 0x%(1)08x ]
   62.34 +
   62.35 +0x00028001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_add_domain  [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   62.36 +0x00028002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_rem_domain  [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   62.37 +0x00028003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_sleep      [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   62.38 +0x00028004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_wake       [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   62.39 +0x00028005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_yield          [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   62.40 +0x00028006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_block          [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   62.41 +0x00028007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_shutdown	  [ domid = 0x%(1)08x, edomid = 0x%(2)08x, reason = 0x%(3)08x ]
   62.42 +0x00028008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_ctl
   62.43 +0x00028009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_adjdom      [ domid = 0x%(1)08x ]
   62.44 +0x0002800a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  __enter_scheduler [ prev<domid:edomid> = 0x%(1)08x : 0x%(2)08x, next<domid:edomid> = 0x%(3)08x : 0x%(4)08x ]
   62.45 +0x0002800b  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  s_timer_fn
   62.46 +0x0002800c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  t_timer_fn
   62.47 +0x0002800d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  dom_timer_fn
   62.48 +0x0002800e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infprev    [ old_domid = 0x%(1)08x, runtime = %(2)d ]
   62.49 +0x0002800f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infnext    [ new_domid = 0x%(1)08x, time = %(2)d, r_time = %(3)d ]
   62.50  
   62.51 -0x00081001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMENTRY     [ dom:vcpu = 0x%(1)08x ]
   62.52 -0x00081002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ dom:vcpu = 0x%(1)08x, exitcode = 0x%(2)08x, rIP  = 0x%(3)08x ]
   62.53 -0x00081102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ dom:vcpu = 0x%(1)08x, exitcode = 0x%(2)08x, rIP  = 0x%(3)016x ]
   62.54 -0x00082001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)08x ]
   62.55 -0x00082101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)016x ]
   62.56 -0x00082002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)08x ]
   62.57 -0x00082102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ dom:vcpu = 0x%(1)08x,  errorcode = 0x%(2)02x, virt = 0x%(3)016x ]
   62.58 -0x00082003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_EXC     [ dom:vcpu = 0x%(1)08x, vector = 0x%(2)02x, errorcode = 0x%(3)04x ]
   62.59 -0x00082004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_VIRQ    [ dom:vcpu = 0x%(1)08x, vector = 0x%(2)02x, fake = %(3)d ]
   62.60 -0x00082005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  REINJ_VIRQ  [ dom:vcpu = 0x%(1)08x, vector = 0x%(2)02x ]
   62.61 -0x00082006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_READ     [ dom:vcpu = 0x%(1)08x, port = 0x%(2)04x, size = %(3)d ]
   62.62 -0x00082007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_WRITE    [ dom:vcpu = 0x%(1)08x, port = 0x%(2)04x, size = %(3)d ]
   62.63 -0x00082008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ dom:vcpu = 0x%(1)08x, CR# = %(2)d, value = 0x%(3)08x ]
   62.64 -0x00082108  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ dom:vcpu = 0x%(1)08x, CR# = %(2)d, value = 0x%(3)016x ]
   62.65 -0x00082009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ dom:vcpu = 0x%(1)08x, CR# = %(2)d, value = 0x%(3)08x ]
   62.66 -0x00082109  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ dom:vcpu = 0x%(1)08x, CR# = %(2)d, value = 0x%(3)016x ]
   62.67 -0x0008200A  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_READ     [ dom:vcpu = 0x%(1)08x ]
   62.68 -0x0008200B  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_WRITE    [ dom:vcpu = 0x%(1)08x ]
   62.69 -0x0008200C  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_READ    [ dom:vcpu = 0x%(1)08x, MSR# = 0x%(2)08x, value = 0x%(3)016x ]
   62.70 -0x0008200D  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_WRITE   [ dom:vcpu = 0x%(1)08x, MSR# = 0x%(2)08x, value = 0x%(3)016x ]
   62.71 -0x0008200E  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CPUID       [ dom:vcpu = 0x%(1)08x, func = 0x%(2)08x, eax = 0x%(3)08x, ebx = 0x%(4)08x, ecx=0x%(5)08x, edx = 0x%(6)08x ]
   62.72 -0x0008200F  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INTR        [ dom:vcpu = 0x%(1)08x, vector = 0x%(2)02x ]
   62.73 -0x00082010  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  NMI         [ dom:vcpu = 0x%(1)08x ]
   62.74 -0x00082011  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  SMI         [ dom:vcpu = 0x%(1)08x ]
   62.75 -0x00082012  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMMCALL     [ dom:vcpu = 0x%(1)08x, func = 0x%(2)08x ]
   62.76 -0x00082013  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  HLT         [ dom:vcpu = 0x%(1)08x, intpending = %(2)d ]
   62.77 -0x00082014  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ dom:vcpu = 0x%(1)08x, is invlpga? = %(2)d, virt = 0x%(3)08x ]
   62.78 -0x00082114  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ dom:vcpu = 0x%(1)08x, is invlpga? = %(2)d, virt = 0x%(3)016x ]
   62.79 -0x00082015  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MCE         [ dom:vcpu = 0x%(1)08x ]
   62.80 -0x00082016  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_ASSIST   [ dom:vcpu = 0x%(1)08x, data = 0x%(2)04x ]
   62.81 -0x00082017  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MMIO_ASSIST [ dom:vcpu = 0x%(1)08x, data = 0x%(2)04x ]
   62.82 -0x00082018  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CLTS        [ dom:vcpu = 0x%(1)08x ]
   62.83 -0x00082019  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ dom:vcpu = 0x%(1)08x, value = 0x%(2)08x ]
   62.84 -0x00082119  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ dom:vcpu = 0x%(1)08x, value = 0x%(2)016x ]
   62.85 +0x00081001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMENTRY
   62.86 +0x00081002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ exitcode = 0x%(1)08x, rIP  = 0x%(2)08x ]
   62.87 +0x00081102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ exitcode = 0x%(1)08x, rIP  = 0x%(2)016x ]
   62.88 +0x00082001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ errorcode = 0x%(2)02x, virt = 0x%(1)08x ]
   62.89 +0x00082101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ errorcode = 0x%(2)02x, virt = 0x%(1)016x ]
   62.90 +0x00082002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ errorcode = 0x%(1)02x, virt = 0x%(2)08x ]
   62.91 +0x00082102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ errorcode = 0x%(1)02x, virt = 0x%(2)016x ]
   62.92 +0x00082003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_EXC     [ vector = 0x%(1)02x, errorcode = 0x%(2)04x ]
   62.93 +0x00082004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_VIRQ    [ vector = 0x%(1)02x, fake = %(2)d ]
   62.94 +0x00082005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  REINJ_VIRQ  [ vector = 0x%(1)02x ]
   62.95 +0x00082006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_READ     [ port = 0x%(1)04x, size = %(2)d ]
   62.96 +0x00082007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_WRITE    [ port = 0x%(1)04x, size = %(2)d ]
   62.97 +0x00082008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ CR# = %(1)d, value = 0x%(2)08x ]
   62.98 +0x00082108  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ CR# = %(1)d, value = 0x%(2)016x ]
   62.99 +0x00082009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ CR# = %(1)d, value = 0x%(2)08x ]
  62.100 +0x00082109  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ CR# = %(1)d, value = 0x%(2)016x ]
  62.101 +0x0008200A  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_READ    
  62.102 +0x0008200B  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_WRITE
  62.103 +0x0008200C  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_READ    [ MSR# = 0x%(1)08x, value = 0x%(2)016x ]
  62.104 +0x0008200D  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_WRITE   [ MSR# = 0x%(1)08x, value = 0x%(2)016x ]
  62.105 +0x0008200E  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CPUID       [ func = 0x%(1)08x, eax = 0x%(2)08x, ebx = 0x%(3)08x, ecx=0x%(4)08x, edx = 0x%(5)08x ]
  62.106 +0x0008200F  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INTR        [ vector = 0x%(1)02x ]
  62.107 +0x00082010  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  NMI
  62.108 +0x00082011  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  SMI
  62.109 +0x00082012  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMMCALL     [ func = 0x%(1)08x ]
  62.110 +0x00082013  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  HLT         [ intpending = %(1)d ]
  62.111 +0x00082014  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ is invlpga? = %(1)d, virt = 0x%(2)08x ]
  62.112 +0x00082114  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ is invlpga? = %(1)d, virt = 0x%(2)016x ]
  62.113 +0x00082015  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MCE
  62.114 +0x00082016  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_ASSIST   [ data = 0x%(1)04x ]
  62.115 +0x00082017  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MMIO_ASSIST [ data = 0x%(1)04x ]
  62.116 +0x00082018  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CLTS
  62.117 +0x00082019  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ value = 0x%(1)08x ]
  62.118 +0x00082119  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ value = 0x%(1)016x ]
  62.119  
  62.120  0x0010f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_map      [ domid = %(1)d ]
  62.121  0x0010f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_unmap    [ domid = %(1)d ]
  62.122 @@ -65,3 +78,41 @@ 0x0020f003  CPU%(cpu)d  %(tsc)d (+%(relt
  62.123  0x0020f103  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  trap       [ rip = 0x%(1)016x, trapnr:error = 0x%(2)08x ]
  62.124  0x0020f004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_fault [ eip = 0x%(1)08x, addr = 0x%(2)08x, error = 0x%(3)08x ]
  62.125  0x0020f104  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_fault [ rip = 0x%(1)16x, addr = 0x%(3)16x, error = 0x%(5)08x ]
  62.126 +
  62.127 +0x0020f006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_privop      [ eip = 0x%(1)08x ]
  62.128 +0x0020f106  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_privop      [ rip = 0x%(1)16x ]
  62.129 +0x0020f007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_4G          [ eip = 0x%(1)08x ]
  62.130 +0x0020f107  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_4G          [ rip = 0x%(1)16x ]
  62.131 +0x0020f00c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  ptwr_emulation_pae  [ addr = 0x%(2)08x, eip = 0x%(1)08x, npte = 0x%(1)16x ]
  62.132 +0x0020f10c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  ptwr_emulation_pae  [ addr = 0x%(2)16x, rip = 0x%(1)16x, npte = 0x%(1)16x ]
  62.133 +
  62.134 +0x0040f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_not_shadow                 [ gl1e = 0x%(1)16x, va = 0x%(2)08x, flags = 0x%(3)08x ]
  62.135 +0x0040f101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_not_shadow                 [ gl1e = 0x%(1)16x, va = 0x%(2)16x, flags = 0x%(3)08x ]
  62.136 +0x0040f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_propagate             [ va = 0x%(1)08x ]
  62.137 +0x0040f102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_propagate             [ va = 0x%(1)16x ]
  62.138 +0x0040f003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_mmio                  [ va = 0x%(1)08x ]
  62.139 +0x0040f103  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_mmio                  [ va = 0x%(1)16x ]
  62.140 +0x0040f004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_false_fast_path            [ va = 0x%(1)08x ]
  62.141 +0x0040f104  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_false_fast_path            [ va = 0x%(1)16x ]
  62.142 +0x0040f005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_mmio                       [ va = 0x%(1)08x ]
  62.143 +0x0040f105  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_mmio                       [ va = 0x%(1)16x ]
  62.144 +0x0040f006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fixup                      [ gl1e = 0x%(1)08x, va = 0x%(2)08x, flags = 0x%(3)08x ]
  62.145 +0x0040f106  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fixup                      [ gl1e = 0x%(1)16x, va = 0x%(2)16x, flags = 0x%(3)08x ]
  62.146 +0x0040f007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_domf_dying                 [ va = 0x%(1)08x ]
  62.147 +0x0040f107  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_domf_dying                 [ va = 0x%(1)16x ]
  62.148 +0x0040f008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate                    [ gl1e = 0x%(1)08x, write_val = 0x%(2)08x, va = 0x%(3)08x, flags = 0x%(4)08x, emulation_count = 0x%(5)08x]
  62.149 +0x0040f108  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate                    [ gl1e = 0x%(1)16x, write_val = 0x%(2)16x, va = 0x%(3)16x, flags = 0x%(4)08x, emulation_count = 0x%(5)08x]
  62.150 +0x0040f009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_user      [ va = 0x%(1)08x, gfn = 0x%(2)08x ]
  62.151 +0x0040f109  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_user      [ va = 0x%(1)16x, gfn = 0x%(2)16x ]
  62.152 +0x0040f00a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_evtinj    [ va = 0x%(1)08x, gfn = 0x%(2)08x ]
  62.153 +0x0040f10a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_evtinj    [ va = 0x%(1)16x, gfn = 0x%(2)16x ]
  62.154 +0x0040f00b  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_unhandled [ va = 0x%(1)08x, gfn = 0x%(2)08x ]
  62.155 +0x0040f10b  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_unhandled [ va = 0x%(1)16x, gfn = 0x%(2)16x ]
  62.156 +0x0040f00c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_wrmap_bf           [ gfn = 0x%(1)08x ]
  62.157 +0x0040f10c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_wrmap_bf           [ gfn = 0x%(1)16x ]
  62.158 +0x0040f00d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_prealloc_unpin     [ gfn = 0x%(1)08x ]
  62.159 +0x0040f10d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_prealloc_unpin     [ gfn = 0x%(1)16x ]
  62.160 +0x0040f00e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_full        [ gfn = 0x%(1)08x ]
  62.161 +0x0040f10e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_full        [ gfn = 0x%(1)16x ]
  62.162 +0x0040f00f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_only        [ gfn = 0x%(1)08x ]
  62.163 +0x0040f10f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_only        [ gfn = 0x%(1)16x ]
    63.1 --- a/tools/xentrace/xentrace.c	Fri Sep 12 14:32:45 2008 +0900
    63.2 +++ b/tools/xentrace/xentrace.c	Fri Sep 12 14:47:40 2008 +0900
    63.3 @@ -56,6 +56,7 @@ typedef struct settings_st {
    63.4      unsigned long tbuf_size;
    63.5      unsigned long disk_rsvd;
    63.6      unsigned long timeout;
    63.7 +    unsigned long memory_buffer;
    63.8      uint8_t discard:1,
    63.9          disable_tracing:1;
   63.10  } settings_t;
   63.11 @@ -67,12 +68,245 @@ int interrupted = 0; /* gets set if we g
   63.12  static int xc_handle = -1;
   63.13  static int event_fd = -1;
   63.14  static int virq_port = -1;
   63.15 +static int outfd = 1;
   63.16  
   63.17  static void close_handler(int signal)
   63.18  {
   63.19      interrupted = 1;
   63.20  }
   63.21  
   63.22 +static struct {
   63.23 +    char * buf;
   63.24 +    unsigned long prod, cons, size;
   63.25 +    unsigned long pending_size, pending_prod;
   63.26 +} membuf = { 0 };
   63.27 +
   63.28 +#define MEMBUF_INDEX_RESET_THRESHOLD (1<<29)
   63.29 +
   63.30 +/* FIXME -- make a power of 2 so we can mask instead. */
   63.31 +#define MEMBUF_POINTER(_i) (membuf.buf + ((_i) % membuf.size))
   63.32 +#define MEMBUF_CONS_INCREMENT(_n)               \
   63.33 +    do {                                        \
   63.34 +        membuf.cons += (_n);                    \
   63.35 +    } while(0)
   63.36 +#define MEMBUF_PROD_SET(_x)                                             \
   63.37 +    do {                                                                \
   63.38 +        if ( (_x) < membuf.prod ) {                                     \
   63.39 +            fprintf(stderr, "%s: INTERNAL_ERROR: prod %lu, trying to set to %lu!\n", \
   63.40 +                    __func__, membuf.prod, (unsigned long)(_x));        \
   63.41 +            exit(1);                                                    \
   63.42 +        }                                                               \
   63.43 +        membuf.prod = (_x);                                             \
   63.44 +        if ( (_x) > MEMBUF_INDEX_RESET_THRESHOLD )                      \
   63.45 +        {                                                               \
   63.46 +            membuf.prod %= membuf.size;                                 \
   63.47 +            membuf.cons %= membuf.size;                                 \
   63.48 +            if( membuf.prod < membuf.cons )                             \
   63.49 +                membuf.prod += membuf.size;                             \
   63.50 +        }                                                               \
   63.51 +    } while(0) 
   63.52 +
   63.53 +struct cpu_change_record {
   63.54 +    uint32_t header;
   63.55 +    struct {
   63.56 +        int cpu;
   63.57 +        unsigned window_size;
   63.58 +    } data;
   63.59 +};
   63.60 +
   63.61 +#define CPU_CHANGE_HEADER                                           \
   63.62 +    (TRC_TRACE_CPU_CHANGE                                           \
   63.63 +     | (((sizeof(struct cpu_change_record)/sizeof(uint32_t)) - 1)   \
   63.64 +        << TRACE_EXTRA_SHIFT) )
   63.65 +
   63.66 +void membuf_alloc(unsigned long size)
   63.67 +{
   63.68 +    membuf.buf = malloc(size);
   63.69 +
   63.70 +    if(!membuf.buf)
   63.71 +    {
   63.72 +        fprintf(stderr, "%s: Couldn't malloc %lu bytes!\n",
   63.73 +                __func__, size);
   63.74 +        exit(1);
   63.75 +    }
   63.76 +
   63.77 +    membuf.prod = membuf.cons = 0;
   63.78 +    membuf.size = size;
   63.79 +}
   63.80 +
   63.81 +/*
   63.82 + * Reserve a new window in the buffer.  Move the 'consumer' forward size
   63.83 + * bytes, re-adjusting the cpu window sizes as necessary, and insert a
   63.84 + * cpu_change record.
   63.85 + */
   63.86 +void membuf_reserve_window(unsigned cpu, unsigned long window_size)
   63.87 +{
   63.88 +    struct cpu_change_record *rec;
   63.89 +    long need_to_consume, free, freed;
   63.90 +
   63.91 +    if ( membuf.pending_size > 0 )
   63.92 +    {
   63.93 +        fprintf(stderr, "%s: INTERNAL_ERROR: pending_size %lu\n",
   63.94 +                __func__, membuf.pending_size);
   63.95 +        exit(1);
   63.96 +    }
   63.97 +
   63.98 +    need_to_consume = window_size + sizeof(*rec);
   63.99 +
  63.100 +    if ( window_size > membuf.size )
  63.101 +    {
  63.102 +        fprintf(stderr, "%s: reserve size %lu larger than buffer size %lu!\n",
  63.103 +                __func__, window_size, membuf.size);
  63.104 +        exit(1);
  63.105 +    }
  63.106 +
  63.107 +    /* Subtract free space already in buffer. */
  63.108 +    free = membuf.size - (membuf.prod - membuf.cons);
  63.109 +    if( need_to_consume < free)
  63.110 +        goto start_window;
  63.111 +
  63.112 +    need_to_consume -= free;
  63.113 +
  63.114 +    /*
  63.115 +     * "Free" up full windows until we have enough for this window.
  63.116 +     * It's a bit wasteful to throw away partial buffers, but the only
  63.117 +     * other option is to scan throught he buffer headers.  Since the
  63.118 +     * common case is that it's going to be thrown away next anyway, I
  63.119 +     * think minimizing the overall impact is more important.
  63.120 +     */
  63.121 +    do {
  63.122 +        rec = (struct cpu_change_record *)MEMBUF_POINTER(membuf.cons);
  63.123 +        if( rec->header != CPU_CHANGE_HEADER )
  63.124 +        {
  63.125 +            fprintf(stderr, "%s: INTERNAL ERROR: no cpu_change record at consumer!\n",
  63.126 +                    __func__);
  63.127 +            exit(EXIT_FAILURE);
  63.128 +        }
  63.129 +
  63.130 +        freed = sizeof(*rec) + rec->data.window_size;
  63.131 +
  63.132 +        if ( need_to_consume > 0 )
  63.133 +        {
  63.134 +            MEMBUF_CONS_INCREMENT(freed);
  63.135 +            need_to_consume -= freed;
  63.136 +        }
  63.137 +    } while( need_to_consume > 0 );
  63.138 +
  63.139 +start_window:
  63.140 +    /*
  63.141 +     * Start writing "pending" data.  Update prod once all this data is
  63.142 +     * written.
  63.143 +     */
  63.144 +    membuf.pending_prod = membuf.prod;
  63.145 +    membuf.pending_size = window_size;
  63.146 +
  63.147 +    rec = (struct cpu_change_record *)MEMBUF_POINTER(membuf.pending_prod);
  63.148 +
  63.149 +    rec->header = CPU_CHANGE_HEADER;
  63.150 +    rec->data.cpu = cpu;
  63.151 +    rec->data.window_size = window_size;
  63.152 +
  63.153 +    membuf.pending_prod += sizeof(*rec);
  63.154 +}
  63.155 +
  63.156 +void membuf_write(void *start, unsigned long size) {
  63.157 +    char * p;
  63.158 +    unsigned long wsize;
  63.159 +
  63.160 +    if( (membuf.size - (membuf.prod - membuf.cons)) < size )
  63.161 +    {
  63.162 +        fprintf(stderr, "%s: INTERNAL ERROR: need %lu bytes, only have %lu!\n",
  63.163 +                __func__, size, membuf.prod - membuf.cons);
  63.164 +        exit(1);
  63.165 +    }
  63.166 +
  63.167 +    if( size > membuf.pending_size )
  63.168 +    {
  63.169 +        fprintf(stderr, "%s: INTERNAL ERROR: size %lu, pending %lu!\n",
  63.170 +                __func__, size, membuf.pending_size);
  63.171 +        exit(1);
  63.172 +    }
  63.173 +
  63.174 +    wsize = size;
  63.175 +    p = MEMBUF_POINTER(membuf.pending_prod);
  63.176 +
  63.177 +    /* If the buffer overlaps the "wrap", do an extra write */
  63.178 +    if ( p + size > membuf.buf + membuf.size )
  63.179 +    {
  63.180 +        int usize = ( membuf.buf + membuf.size ) - p;
  63.181 +
  63.182 +        memcpy(p, start, usize);
  63.183 +
  63.184 +        start += usize;
  63.185 +        wsize -= usize;
  63.186 +        p = membuf.buf;
  63.187 +    }
  63.188 +
  63.189 +    memcpy(p, start, wsize);
  63.190 +
  63.191 +    membuf.pending_prod += size;
  63.192 +    membuf.pending_size -= size;
  63.193 +
  63.194 +    if ( membuf.pending_size == 0 )
  63.195 +    {
  63.196 +        MEMBUF_PROD_SET(membuf.pending_prod);
  63.197 +    }
  63.198 +}
  63.199 +
  63.200 +void membuf_dump(void) {
  63.201 +    /* Dump circular memory buffer */
  63.202 +    int cons, prod, wsize, written;
  63.203 +    char * wstart;
  63.204 +
  63.205 +    fprintf(stderr, "Dumping memory buffer.\n");
  63.206 +
  63.207 +    cons = membuf.cons % membuf.size; 
  63.208 +    prod = membuf.prod % membuf.size;
  63.209 +   
  63.210 +    if(prod > cons)
  63.211 +    {
  63.212 +        /* Write in one go */
  63.213 +        wstart = membuf.buf + cons;
  63.214 +        wsize = prod - cons;
  63.215 +
  63.216 +        written = write(outfd, wstart, wsize);
  63.217 +        if ( written != wsize )
  63.218 +            goto fail;
  63.219 +    }
  63.220 +    else
  63.221 +    {
  63.222 +        /* Write in two pieces: cons->end, beginning->prod. */
  63.223 +        wstart = membuf.buf + cons;
  63.224 +        wsize = membuf.size - cons;
  63.225 +
  63.226 +        written = write(outfd, wstart, wsize);
  63.227 +        if ( written != wsize )
  63.228 +        {
  63.229 +            fprintf(stderr, "Write failed! (size %d, returned %d)\n",
  63.230 +                    wsize, written);
  63.231 +            goto fail;
  63.232 +        }
  63.233 +
  63.234 +        wstart = membuf.buf;
  63.235 +        wsize = prod;
  63.236 +
  63.237 +        written = write(outfd, wstart, wsize);
  63.238 +        if ( written != wsize )
  63.239 +        {
  63.240 +            fprintf(stderr, "Write failed! (size %d, returned %d)\n",
  63.241 +                    wsize, written);
  63.242 +            goto fail;
  63.243 +        }
  63.244 +    }
  63.245 +
  63.246 +    membuf.cons = membuf.prod = 0;
  63.247 +    
  63.248 +    return;
  63.249 +fail:
  63.250 +    exit(1);
  63.251 +    return;
  63.252 +}
  63.253 +
  63.254  /**
  63.255   * write_buffer - write a section of the trace buffer
  63.256   * @cpu      - source buffer CPU ID
  63.257 @@ -85,20 +319,20 @@ static void close_handler(int signal)
  63.258   * of the buffer write.
  63.259   */
  63.260  static void write_buffer(unsigned int cpu, unsigned char *start, int size,
  63.261 -               int total_size, int outfd)
  63.262 +                         int total_size)
  63.263  {
  63.264      struct statvfs stat;
  63.265      size_t written = 0;
  63.266      
  63.267 -    if ( opts.disk_rsvd != 0 )
  63.268 +    if ( opts.memory_buffer == 0 && opts.disk_rsvd != 0 )
  63.269      {
  63.270          unsigned long long freespace;
  63.271  
  63.272          /* Check that filesystem has enough space. */
  63.273          if ( fstatvfs (outfd, &stat) )
  63.274          {
  63.275 -                fprintf(stderr, "Statfs failed!\n");
  63.276 -                goto fail;
  63.277 +            fprintf(stderr, "Statfs failed!\n");
  63.278 +            goto fail;
  63.279          }
  63.280  
  63.281          freespace = stat.f_frsize * (unsigned long long)stat.f_bfree;
  63.282 @@ -112,8 +346,8 @@ static void write_buffer(unsigned int cp
  63.283  
  63.284          if ( freespace <= opts.disk_rsvd )
  63.285          {
  63.286 -                fprintf(stderr, "Disk space limit reached (free space: %lluMB, limit: %luMB).\n", freespace, opts.disk_rsvd);
  63.287 -                exit (EXIT_FAILURE);
  63.288 +            fprintf(stderr, "Disk space limit reached (free space: %lluMB, limit: %luMB).\n", freespace, opts.disk_rsvd);
  63.289 +            exit (EXIT_FAILURE);
  63.290          }
  63.291      }
  63.292  
  63.293 @@ -122,40 +356,46 @@ static void write_buffer(unsigned int cp
  63.294       * first write. */
  63.295      if ( total_size != 0 )
  63.296      {
  63.297 -        struct {
  63.298 -            uint32_t header;
  63.299 -            struct {
  63.300 -                unsigned cpu;
  63.301 -                unsigned byte_count;
  63.302 -            } extra;
  63.303 -        } rec;
  63.304 +        if ( opts.memory_buffer )
  63.305 +        {
  63.306 +            membuf_reserve_window(cpu, total_size);
  63.307 +        }
  63.308 +        else
  63.309 +        {
  63.310 +            struct cpu_change_record rec;
  63.311 +
  63.312 +            rec.header = CPU_CHANGE_HEADER;
  63.313 +            rec.data.cpu = cpu;
  63.314 +            rec.data.window_size = total_size;
  63.315  
  63.316 -        rec.header = TRC_TRACE_CPU_CHANGE
  63.317 -            | ((sizeof(rec.extra)/sizeof(uint32_t)) << TRACE_EXTRA_SHIFT);
  63.318 -        rec.extra.cpu = cpu;
  63.319 -        rec.extra.byte_count = total_size;
  63.320 +            written = write(outfd, &rec, sizeof(rec));
  63.321 +            if ( written != sizeof(rec) )
  63.322 +            {
  63.323 +                fprintf(stderr, "Cannot write cpu change (write returned %zd)\n",
  63.324 +                        written);
  63.325 +                goto fail;
  63.326 +            }
  63.327 +        }
  63.328 +    }
  63.329  
  63.330 -        written = write(outfd, &rec, sizeof(rec));
  63.331 -
  63.332 -        if ( written != sizeof(rec) )
  63.333 +    if ( opts.memory_buffer )
  63.334 +    {
  63.335 +        membuf_write(start, size);
  63.336 +    }
  63.337 +    else
  63.338 +    {
  63.339 +        written = write(outfd, start, size);
  63.340 +        if ( written != size )
  63.341          {
  63.342 -            fprintf(stderr, "Cannot write cpu change (write returned %zd)\n",
  63.343 -                    written);
  63.344 +            fprintf(stderr, "Write failed! (size %d, returned %zd)\n",
  63.345 +                    size, written);
  63.346              goto fail;
  63.347          }
  63.348      }
  63.349  
  63.350 -    written = write(outfd, start, size);
  63.351 -    if ( written != size )
  63.352 -    {
  63.353 -        fprintf(stderr, "Write failed! (size %d, returned %zd)\n",
  63.354 -                size, written);
  63.355 -        goto fail;
  63.356 -    }
  63.357 -
  63.358      return;
  63.359  
  63.360 - fail:
  63.361 +fail:
  63.362      PERROR("Failed to write trace data");
  63.363      exit(EXIT_FAILURE);
  63.364  }
  63.365 @@ -394,7 +634,7 @@ static void wait_for_event_or_timeout(un
  63.366   * monitor_tbufs - monitor the contents of tbufs and output to a file
  63.367   * @logfile:       the FILE * representing the file to log to
  63.368   */
  63.369 -static int monitor_tbufs(int outfd)
  63.370 +static int monitor_tbufs(void)
  63.371  {
  63.372      int i;
  63.373  
  63.374 @@ -429,9 +669,9 @@ static int monitor_tbufs(int outfd)
  63.375              meta[i]->cons = meta[i]->prod;
  63.376  
  63.377      /* now, scan buffers for events */
  63.378 -    while ( !interrupted )
  63.379 +    while ( 1 )
  63.380      {
  63.381 -        for ( i = 0; (i < num) && !interrupted; i++ )
  63.382 +        for ( i = 0; i < num; i++ )
  63.383          {
  63.384              unsigned long start_offset, end_offset, window_size, cons, prod;
  63.385                  
  63.386 @@ -463,8 +703,7 @@ static int monitor_tbufs(int outfd)
  63.387                  /* If window does not wrap, write in one big chunk */
  63.388                  write_buffer(i, data[i]+start_offset,
  63.389                               window_size,
  63.390 -                             window_size,
  63.391 -                             outfd);
  63.392 +                             window_size);
  63.393              }
  63.394              else
  63.395              {
  63.396 @@ -474,24 +713,29 @@ static int monitor_tbufs(int outfd)
  63.397                   */
  63.398                  write_buffer(i, data[i] + start_offset,
  63.399                               data_size - start_offset,
  63.400 -                             window_size,
  63.401 -                             outfd);
  63.402 +                             window_size);
  63.403                  write_buffer(i, data[i],
  63.404                               end_offset,
  63.405 -                             0,
  63.406 -                             outfd);
  63.407 +                             0);
  63.408              }
  63.409  
  63.410              xen_mb(); /* read buffer, then update cons. */
  63.411              meta[i]->cons = prod;
  63.412 +
  63.413          }
  63.414  
  63.415 +        if ( interrupted )
  63.416 +            break;
  63.417 +
  63.418          wait_for_event_or_timeout(opts.poll_sleep);
  63.419      }
  63.420  
  63.421 -    if(opts.disable_tracing)
  63.422 +    if ( opts.disable_tracing )
  63.423          disable_tbufs();
  63.424  
  63.425 +    if ( opts.memory_buffer )
  63.426 +        membuf_dump();
  63.427 +
  63.428      /* cleanup */
  63.429      free(meta);
  63.430      free(data);
  63.431 @@ -538,6 +782,8 @@ static void usage(void)
  63.432  "  -T  --time-interval=s   Run xentrace for s seconds and quit.\n" \
  63.433  "  -?, --help              Show this message\n" \
  63.434  "  -V, --version           Print program version\n" \
  63.435 +"  -M, --memory-buffer=b   Copy trace records to a circular memory buffer.\n" \
  63.436 +"                          Dump to file on exit.\n" \
  63.437  "\n" \
  63.438  "This tool is used to capture trace buffer data from Xen. The\n" \
  63.439  "data is output in a binary format, in the following order:\n" \
  63.440 @@ -553,6 +799,53 @@ static void usage(void)
  63.441      exit(EXIT_FAILURE);
  63.442  }
  63.443  
  63.444 +/* convert the argument string pointed to by arg to a long int representation,
  63.445 + * including suffixes such as 'M' and 'k'. */
  63.446 +#define MB (1024*1024)
  63.447 +#define KB (1024)
  63.448 +long sargtol(const char *restrict arg, int base)
  63.449 +{
  63.450 +    char *endp;
  63.451 +    long val;
  63.452 +
  63.453 +    errno = 0;
  63.454 +    val = strtol(arg, &endp, base);
  63.455 +    
  63.456 +    if ( errno != 0 )
  63.457 +    {
  63.458 +        fprintf(stderr, "Invalid option argument: %s\n", arg);
  63.459 +        fprintf(stderr, "Error: %s\n\n", strerror(errno));
  63.460 +        usage();
  63.461 +    }
  63.462 +    else if (endp == arg)
  63.463 +    {
  63.464 +        goto invalid;
  63.465 +    }
  63.466 +
  63.467 +    switch(*endp)
  63.468 +    {
  63.469 +    case '\0':
  63.470 +        break;
  63.471 +    case 'M':
  63.472 +        val *= MB;
  63.473 +        break;
  63.474 +    case 'K':
  63.475 +    case 'k':
  63.476 +        val *= KB;
  63.477 +        break;
  63.478 +    default:
  63.479 +        fprintf(stderr, "Unknown suffix %c\n", *endp);
  63.480 +        exit(1);
  63.481 +    }
  63.482 +
  63.483 +
  63.484 +    return val;
  63.485 +invalid:
  63.486 +    return 0;
  63.487 +    fprintf(stderr, "Invalid option argument: %s\n\n", arg);
  63.488 +    usage();
  63.489 +}
  63.490 +
  63.491  /* convert the argument string pointed to by arg to a long int representation */
  63.492  static long argtol(const char *restrict arg, int base)
  63.493  {
  63.494 @@ -606,6 +899,7 @@ static void parse_args(int argc, char **
  63.495          { "trace-buf-size", required_argument, 0, 'S' },
  63.496          { "reserve-disk-space", required_argument, 0, 'r' },
  63.497          { "time-interval",  required_argument, 0, 'T' },
  63.498 +        { "memory-buffer",  required_argument, 0, 'M' },
  63.499          { "discard-buffers", no_argument,      0, 'D' },
  63.500          { "dont-disable-tracing", no_argument, 0, 'x' },
  63.501          { "help",           no_argument,       0, '?' },
  63.502 @@ -613,7 +907,7 @@ static void parse_args(int argc, char **
  63.503          { 0, 0, 0, 0 }
  63.504      };
  63.505  
  63.506 -    while ( (option = getopt_long(argc, argv, "c:e:s:S:t:?V",
  63.507 +    while ( (option = getopt_long(argc, argv, "t:s:c:e:S:r:T:M:Dx?V",
  63.508                      long_options, NULL)) != -1) 
  63.509      {
  63.510          switch ( option )
  63.511 @@ -655,6 +949,10 @@ static void parse_args(int argc, char **
  63.512              opts.timeout = argtol(optarg, 0);
  63.513              break;
  63.514  
  63.515 +        case 'M':
  63.516 +            opts.memory_buffer = sargtol(optarg, 0);
  63.517 +            break;
  63.518 +
  63.519          default:
  63.520              usage();
  63.521          }
  63.522 @@ -674,7 +972,7 @@ static void parse_args(int argc, char **
  63.523  
  63.524  int main(int argc, char **argv)
  63.525  {
  63.526 -    int outfd = 1, ret;
  63.527 +    int ret;
  63.528      struct sigaction act;
  63.529  
  63.530      opts.outfile = 0;
  63.531 @@ -720,6 +1018,9 @@ int main(int argc, char **argv)
  63.532          exit(EXIT_FAILURE);
  63.533      }
  63.534  
  63.535 +    if ( opts.memory_buffer > 0 )
  63.536 +        membuf_alloc(opts.memory_buffer);
  63.537 +
  63.538      /* ensure that if we get a signal, we'll do cleanup, then exit */
  63.539      act.sa_handler = close_handler;
  63.540      act.sa_flags = 0;
  63.541 @@ -729,7 +1030,7 @@ int main(int argc, char **argv)
  63.542      sigaction(SIGINT,  &act, NULL);
  63.543      sigaction(SIGALRM, &act, NULL);
  63.544  
  63.545 -    ret = monitor_tbufs(outfd);
  63.546 +    ret = monitor_tbufs();
  63.547  
  63.548      return ret;
  63.549  }
    64.1 --- a/xen/arch/x86/acpi/Makefile	Fri Sep 12 14:32:45 2008 +0900
    64.2 +++ b/xen/arch/x86/acpi/Makefile	Fri Sep 12 14:47:40 2008 +0900
    64.3 @@ -1,5 +1,5 @@
    64.4  subdir-y += cpufreq
    64.5  
    64.6  obj-y += boot.o
    64.7 -obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o
    64.8 +obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o cpuidle_menu.o
    64.9  obj-y += pmstat.o
    65.1 --- a/xen/arch/x86/acpi/cpu_idle.c	Fri Sep 12 14:32:45 2008 +0900
    65.2 +++ b/xen/arch/x86/acpi/cpu_idle.c	Fri Sep 12 14:47:40 2008 +0900
    65.3 @@ -39,6 +39,7 @@
    65.4  #include <xen/smp.h>
    65.5  #include <xen/guest_access.h>
    65.6  #include <xen/keyhandler.h>
    65.7 +#include <xen/cpuidle.h>
    65.8  #include <asm/cache.h>
    65.9  #include <asm/io.h>
   65.10  #include <asm/hpet.h>
   65.11 @@ -49,13 +50,10 @@
   65.12  #define DEBUG_PM_CX
   65.13  
   65.14  #define US_TO_PM_TIMER_TICKS(t)     ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
   65.15 +#define PM_TIMER_TICKS_TO_US(t)     ((t * 1000) / (PM_TIMER_FREQUENCY / 1000))
   65.16  #define C2_OVERHEAD         4   /* 1us (3.579 ticks per us) */
   65.17  #define C3_OVERHEAD         4   /* 1us (3.579 ticks per us) */
   65.18  
   65.19 -#define ACPI_PROCESSOR_MAX_POWER        8
   65.20 -#define ACPI_PROCESSOR_MAX_C2_LATENCY   100
   65.21 -#define ACPI_PROCESSOR_MAX_C3_LATENCY   1000
   65.22 -
   65.23  static void (*lapic_timer_off)(void);
   65.24  static void (*lapic_timer_on)(void);
   65.25  
   65.26 @@ -65,66 +63,6 @@ extern void (*pm_idle) (void);
   65.27  static void (*pm_idle_save) (void) __read_mostly;
   65.28  unsigned int max_cstate __read_mostly = 2;
   65.29  integer_param("max_cstate", max_cstate);
   65.30 -/*
   65.31 - * bm_history -- bit-mask with a bit per jiffy of bus-master activity
   65.32 - * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
   65.33 - * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
   65.34 - * 100 HZ: 0x0000000F: 4 jiffies = 40ms
   65.35 - * reduce history for more aggressive entry into C3
   65.36 - */
   65.37 -unsigned int bm_history __read_mostly =
   65.38 -    (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
   65.39 -integer_param("bm_history", bm_history);
   65.40 -
   65.41 -struct acpi_processor_cx;
   65.42 -
   65.43 -struct acpi_processor_cx_policy
   65.44 -{
   65.45 -    u32 count;
   65.46 -    struct acpi_processor_cx *state;
   65.47 -    struct
   65.48 -    {
   65.49 -        u32 time;
   65.50 -        u32 ticks;
   65.51 -        u32 count;
   65.52 -        u32 bm;
   65.53 -    } threshold;
   65.54 -};
   65.55 -
   65.56 -struct acpi_processor_cx
   65.57 -{
   65.58 -    u8 valid;
   65.59 -    u8 type;
   65.60 -    u32 address;
   65.61 -    u8 space_id;
   65.62 -    u32 latency;
   65.63 -    u32 latency_ticks;
   65.64 -    u32 power;
   65.65 -    u32 usage;
   65.66 -    u64 time;
   65.67 -    struct acpi_processor_cx_policy promotion;
   65.68 -    struct acpi_processor_cx_policy demotion;
   65.69 -};
   65.70 -
   65.71 -struct acpi_processor_flags
   65.72 -{
   65.73 -    u8 bm_control:1;
   65.74 -    u8 bm_check:1;
   65.75 -    u8 has_cst:1;
   65.76 -    u8 power_setup_done:1;
   65.77 -    u8 bm_rld_set:1;
   65.78 -};
   65.79 -
   65.80 -struct acpi_processor_power
   65.81 -{
   65.82 -    struct acpi_processor_flags flags;
   65.83 -    struct acpi_processor_cx *state;
   65.84 -    s_time_t bm_check_timestamp;
   65.85 -    u32 default_state;
   65.86 -    u32 bm_activity;
   65.87 -    u32 count;
   65.88 -    struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
   65.89 -};
   65.90  
   65.91  static struct acpi_processor_power processor_powers[NR_CPUS];
   65.92  
   65.93 @@ -133,26 +71,21 @@ static void print_acpi_power(uint32_t cp
   65.94      uint32_t i;
   65.95  
   65.96      printk("==cpu%d==\n", cpu);
   65.97 -    printk("active state:\t\tC%d\n", (power->state)?power->state->type:-1);
   65.98 +    printk("active state:\t\tC%d\n",
   65.99 +           (power->last_state) ? power->last_state->type : -1);
  65.100      printk("max_cstate:\t\tC%d\n", max_cstate);
  65.101 -    printk("bus master activity:\t%08x\n", power->bm_activity);
  65.102      printk("states:\n");
  65.103      
  65.104      for ( i = 1; i < power->count; i++ )
  65.105      {
  65.106 -        printk((power->states[i].type == power->state->type) ? "   *" : "    ");
  65.107 +        if ( power->last_state && 
  65.108 +             power->states[i].type == power->last_state->type )
  65.109 +            printk("   *");
  65.110 +        else
  65.111 +            printk("    ");
  65.112          printk("C%d:\t\t", i);
  65.113          printk("type[C%d] ", power->states[i].type);
  65.114 -        if ( power->states[i].promotion.state )
  65.115 -            printk("promotion[C%d] ", power->states[i].promotion.state->type);
  65.116 -        else
  65.117 -            printk("promotion[--] ");
  65.118 -        if ( power->states[i].demotion.state )
  65.119 -            printk("demotion[C%d] ", power->states[i].demotion.state->type);
  65.120 -        else
  65.121 -            printk("demotion[--] ");
  65.122 -        printk("latency[%03d]\n ", power->states[i].latency);
  65.123 -        printk("\t\t\t");
  65.124 +        printk("latency[%03d] ", power->states[i].latency);
  65.125          printk("usage[%08d] ", power->states[i].usage);
  65.126          printk("duration[%"PRId64"]\n", power->states[i].time);
  65.127      }
  65.128 @@ -182,48 +115,6 @@ static inline u32 ticks_elapsed(u32 t1, 
  65.129          return ((0xFFFFFFFF - t1) + t2);
  65.130  }
  65.131  
  65.132 -static void acpi_processor_power_activate(struct acpi_processor_power *power,
  65.133 -                                          struct acpi_processor_cx *new)
  65.134 -{
  65.135 -    struct acpi_processor_cx *old;
  65.136 -
  65.137 -    if ( !power || !new )
  65.138 -        return;
  65.139 -
  65.140 -    old = power->state;
  65.141 -
  65.142 -    if ( old )
  65.143 -        old->promotion.count = 0;
  65.144 -    new->demotion.count = 0;
  65.145 -
  65.146 -    /* Cleanup from old state. */
  65.147 -    if ( old )
  65.148 -    {
  65.149 -        switch ( old->type )
  65.150 -        {
  65.151 -        case ACPI_STATE_C3:
  65.152 -            /* Disable bus master reload */
  65.153 -            if ( new->type != ACPI_STATE_C3 && power->flags.bm_check )
  65.154 -                acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
  65.155 -            break;
  65.156 -        }
  65.157 -    }
  65.158 -
  65.159 -    /* Prepare to use new state. */
  65.160 -    switch ( new->type )
  65.161 -    {
  65.162 -    case ACPI_STATE_C3:
  65.163 -        /* Enable bus master reload */
  65.164 -        if ( old->type != ACPI_STATE_C3 && power->flags.bm_check )
  65.165 -            acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
  65.166 -        break;
  65.167 -    }
  65.168 -
  65.169 -    power->state = new;
  65.170 -
  65.171 -    return;
  65.172 -}
  65.173 -
  65.174  static void acpi_safe_halt(void)
  65.175  {
  65.176      smp_mb__after_clear_bit();
  65.177 @@ -263,13 +154,50 @@ static void acpi_idle_do_entry(struct ac
  65.178      }
  65.179  }
  65.180  
  65.181 -static atomic_t c3_cpu_count;
  65.182 +static inline void acpi_idle_update_bm_rld(struct acpi_processor_power *power,
  65.183 +                                           struct acpi_processor_cx *target)
  65.184 +{
  65.185 +    if ( !power->flags.bm_check )
  65.186 +        return;
  65.187 +
  65.188 +    if ( power->flags.bm_rld_set && target->type != ACPI_STATE_C3 )
  65.189 +    {
  65.190 +        acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
  65.191 +        power->flags.bm_rld_set = 0;
  65.192 +    }
  65.193 +
  65.194 +    if ( !power->flags.bm_rld_set && target->type == ACPI_STATE_C3 )
  65.195 +    {
  65.196 +        acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
  65.197 +        power->flags.bm_rld_set = 1;
  65.198 +    }
  65.199 +}
  65.200 +
  65.201 +static int acpi_idle_bm_check(void)
  65.202 +{
  65.203 +    u32 bm_status = 0;
  65.204 +
  65.205 +    acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
  65.206 +    if ( bm_status )
  65.207 +        acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
  65.208 +    /*
  65.209 +     * TBD: PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
  65.210 +     * the true state of bus mastering activity; forcing us to
  65.211 +     * manually check the BMIDEA bit of each IDE channel.
  65.212 +     */
  65.213 +    return bm_status;
  65.214 +}
  65.215 +
  65.216 +static struct {
  65.217 +    spinlock_t lock;
  65.218 +    unsigned int count;
  65.219 +} c3_cpu_status = { .lock = SPIN_LOCK_UNLOCKED };
  65.220  
  65.221  static void acpi_processor_idle(void)
  65.222  {
  65.223      struct acpi_processor_power *power = NULL;
  65.224      struct acpi_processor_cx *cx = NULL;
  65.225 -    struct acpi_processor_cx *next_state = NULL;
  65.226 +    int next_state;
  65.227      int sleep_ticks = 0;
  65.228      u32 t1, t2 = 0;
  65.229  
  65.230 @@ -287,7 +215,16 @@ static void acpi_processor_idle(void)
  65.231          return;
  65.232      }
  65.233  
  65.234 -    cx = power->state;
  65.235 +    next_state = cpuidle_current_governor->select(power);
  65.236 +    if ( next_state > 0 )
  65.237 +    {
  65.238 +        cx = &power->states[next_state];
  65.239 +        if ( power->flags.bm_check && acpi_idle_bm_check()
  65.240 +             && cx->type == ACPI_STATE_C3 )
  65.241 +            cx = power->safe_state;
  65.242 +        if ( cx->type > max_cstate )
  65.243 +            cx = &power->states[max_cstate];
  65.244 +    }
  65.245      if ( !cx )
  65.246      {
  65.247          if ( pm_idle_save )
  65.248 @@ -303,69 +240,14 @@ static void acpi_processor_idle(void)
  65.249          return;
  65.250      }
  65.251  
  65.252 -    /*
  65.253 -     * Check BM Activity
  65.254 -     * -----------------
  65.255 -     * Check for bus mastering activity (if required), record, and check
  65.256 -     * for demotion.
  65.257 -     */
  65.258 -    if ( power->flags.bm_check )
  65.259 -    {
  65.260 -        u32 bm_status = 0;
  65.261 -        unsigned long diff = (NOW() - power->bm_check_timestamp) >> 23;
  65.262 -
  65.263 -        if ( diff > 31 )
  65.264 -            diff = 31;
  65.265 -
  65.266 -        power->bm_activity <<= diff;
  65.267 -
  65.268 -        acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
  65.269 -        if ( bm_status )
  65.270 -        {
  65.271 -            power->bm_activity |= 0x1;
  65.272 -            acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
  65.273 -        }
  65.274 -        /*
  65.275 -         * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
  65.276 -         * the true state of bus mastering activity; forcing us to
  65.277 -         * manually check the BMIDEA bit of each IDE channel.
  65.278 -         */
  65.279 -        /*else if ( errata.piix4.bmisx )
  65.280 -        {
  65.281 -            if ( (inb_p(errata.piix4.bmisx + 0x02) & 0x01)
  65.282 -                || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01) )
  65.283 -                pr->power.bm_activity |= 0x1;
  65.284 -        }*/
  65.285 -
  65.286 -        power->bm_check_timestamp = NOW();
  65.287 -
  65.288 -        /*
  65.289 -         * If bus mastering is or was active this jiffy, demote
  65.290 -         * to avoid a faulty transition.  Note that the processor
  65.291 -         * won't enter a low-power state during this call (to this
  65.292 -         * function) but should upon the next.
  65.293 -         *
  65.294 -         * TBD: A better policy might be to fallback to the demotion
  65.295 -         *      state (use it for this quantum only) istead of
  65.296 -         *      demoting -- and rely on duration as our sole demotion
  65.297 -         *      qualification.  This may, however, introduce DMA
  65.298 -         *      issues (e.g. floppy DMA transfer overrun/underrun).
  65.299 -         */
  65.300 -        if ( (power->bm_activity & 0x1) && cx->demotion.threshold.bm )
  65.301 -        {
  65.302 -            local_irq_enable();
  65.303 -            next_state = cx->demotion.state;
  65.304 -            goto end;
  65.305 -        }
  65.306 -    }
  65.307 +    power->last_state = cx;
  65.308  
  65.309      /*
  65.310       * Sleep:
  65.311       * ------
  65.312       * Invoke the current Cx state to put the processor to sleep.
  65.313       */
  65.314 -    if ( cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3 )
  65.315 -        smp_mb__after_clear_bit();
  65.316 +    acpi_idle_update_bm_rld(power, cx);
  65.317  
  65.318      switch ( cx->type )
  65.319      {
  65.320 @@ -399,8 +281,7 @@ static void acpi_processor_idle(void)
  65.321          /* Re-enable interrupts */
  65.322          local_irq_enable();
  65.323          /* Compute time (ticks) that we were actually asleep */
  65.324 -        sleep_ticks =
  65.325 -            ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
  65.326 +        sleep_ticks = ticks_elapsed(t1, t2);
  65.327          break;
  65.328  
  65.329      case ACPI_STATE_C3:
  65.330 @@ -416,8 +297,8 @@ static void acpi_processor_idle(void)
  65.331           */
  65.332          if ( power->flags.bm_check && power->flags.bm_control )
  65.333          {
  65.334 -            atomic_inc(&c3_cpu_count);
  65.335 -            if ( atomic_read(&c3_cpu_count) == num_online_cpus() )
  65.336 +            spin_lock(&c3_cpu_status.lock);
  65.337 +            if ( ++c3_cpu_status.count == num_online_cpus() )
  65.338              {
  65.339                  /*
  65.340                   * All CPUs are trying to go to C3
  65.341 @@ -425,6 +306,7 @@ static void acpi_processor_idle(void)
  65.342                   */
  65.343                  acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
  65.344              }
  65.345 +            spin_unlock(&c3_cpu_status.lock);
  65.346          }
  65.347          else if ( !power->flags.bm_check )
  65.348          {
  65.349 @@ -455,8 +337,10 @@ static void acpi_processor_idle(void)
  65.350          if ( power->flags.bm_check && power->flags.bm_control )
  65.351          {
  65.352              /* Enable bus master arbitration */
  65.353 -            atomic_dec(&c3_cpu_count);
  65.354 -            acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
  65.355 +            spin_lock(&c3_cpu_status.lock);
  65.356 +            if ( c3_cpu_status.count-- == num_online_cpus() )
  65.357 +                acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
  65.358 +            spin_unlock(&c3_cpu_status.lock);
  65.359          }
  65.360  
  65.361          /* Re-enable interrupts */
  65.362 @@ -465,8 +349,6 @@ static void acpi_processor_idle(void)
  65.363          lapic_timer_on();
  65.364          /* Compute time (ticks) that we were actually asleep */
  65.365          sleep_ticks = ticks_elapsed(t1, t2);
  65.366 -        /* Do not account our idle-switching overhead: */
  65.367 -        sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;
  65.368  
  65.369          break;
  65.370  
  65.371 @@ -476,163 +358,14 @@ static void acpi_processor_idle(void)
  65.372      }
  65.373  
  65.374      cx->usage++;
  65.375 -    if ( (cx->type != ACPI_STATE_C1) && (sleep_ticks > 0) )
  65.376 -        cx->time += sleep_ticks;
  65.377 -
  65.378 -    next_state = power->state;
  65.379 -
  65.380 -    /*
  65.381 -     * Promotion?
  65.382 -     * ----------
  65.383 -     * Track the number of longs (time asleep is greater than threshold)
  65.384 -     * and promote when the count threshold is reached.  Note that bus
  65.385 -     * mastering activity may prevent promotions.
  65.386 -     * Do not promote above max_cstate.
  65.387 -     */
  65.388 -    if ( cx->promotion.state &&
  65.389 -         ((cx->promotion.state - power->states) <= max_cstate) )
  65.390 +    if ( sleep_ticks > 0 )
  65.391      {
  65.392 -        if ( sleep_ticks > cx->promotion.threshold.ticks )
  65.393 -        {
  65.394 -            cx->promotion.count++;
  65.395 -            cx->demotion.count = 0;
  65.396 -            if ( cx->promotion.count >= cx->promotion.threshold.count )
  65.397 -            {
  65.398 -                if ( power->flags.bm_check )
  65.399 -                {
  65.400 -                    if ( !(power->bm_activity & cx->promotion.threshold.bm) )
  65.401 -                    {
  65.402 -                        next_state = cx->promotion.state;
  65.403 -                        goto end;
  65.404 -                    }
  65.405 -                }
  65.406 -                else
  65.407 -                {
  65.408 -                    next_state = cx->promotion.state;
  65.409 -                    goto end;
  65.410 -                }
  65.411 -            }
  65.412 -        }
  65.413 -    }
  65.414 -
  65.415 -    /*
  65.416 -     * Demotion?
  65.417 -     * ---------
  65.418 -     * Track the number of shorts (time asleep is less than time threshold)
  65.419 -     * and demote when the usage threshold is reached.
  65.420 -     */
  65.421 -    if ( cx->demotion.state )
  65.422 -    {
  65.423 -        if ( sleep_ticks < cx->demotion.threshold.ticks )
  65.424 -        {
  65.425 -            cx->demotion.count++;
  65.426 -            cx->promotion.count = 0;
  65.427 -            if ( cx->demotion.count >= cx->demotion.threshold.count )
  65.428 -            {
  65.429 -                next_state = cx->demotion.state;
  65.430 -                goto end;
  65.431 -            }
  65.432 -        }
  65.433 -    }
  65.434 -
  65.435 -end:
  65.436 -    /*
  65.437 -     * Demote if current state exceeds max_cstate
  65.438 -     */
  65.439 -    if ( (power->state - power->states) > max_cstate )
  65.440 -    {
  65.441 -        if ( cx->demotion.state )
  65.442 -            next_state = cx->demotion.state;
  65.443 +        power->last_residency = PM_TIMER_TICKS_TO_US(sleep_ticks);
  65.444 +        cx->time += sleep_ticks;
  65.445      }
  65.446  
  65.447 -    /*
  65.448 -     * New Cx State?
  65.449 -     * -------------
  65.450 -     * If we're going to start using a new Cx state we must clean up
  65.451 -     * from the previous and prepare to use the new.
  65.452 -     */
  65.453 -    if ( next_state != power->state )
  65.454 -        acpi_processor_power_activate(power, next_state);
  65.455 -}
  65.456 -
  65.457 -static int acpi_processor_set_power_policy(struct acpi_processor_power *power)
  65.458 -{
  65.459 -    unsigned int i;
  65.460 -    unsigned int state_is_set = 0;
  65.461 -    struct acpi_processor_cx *lower = NULL;
  65.462 -    struct acpi_processor_cx *higher = NULL;
  65.463 -    struct acpi_processor_cx *cx;
  65.464 -
  65.465 -    if ( !power )
  65.466 -        return -EINVAL;
  65.467 -
  65.468 -    /*
  65.469 -     * This function sets the default Cx state policy (OS idle handler).
  65.470 -     * Our scheme is to promote quickly to C2 but more conservatively
  65.471 -     * to C3.  We're favoring C2  for its characteristics of low latency
  65.472 -     * (quick response), good power savings, and ability to allow bus
  65.473 -     * mastering activity.  Note that the Cx state policy is completely
  65.474 -     * customizable and can be altered dynamically.
  65.475 -     */
  65.476 -
  65.477 -    /* startup state */
  65.478 -    for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
  65.479 -    {
  65.480 -        cx = &power->states[i];
  65.481 -        if ( !cx->valid )
  65.482 -            continue;
  65.483 -
  65.484 -        if ( !state_is_set )
  65.485 -            power->state = cx;
  65.486 -        state_is_set++;
  65.487 -        break;
  65.488 -    }
  65.489 -
  65.490 -    if ( !state_is_set )
  65.491 -        return -ENODEV;
  65.492 -
  65.493 -    /* demotion */
  65.494 -    for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
  65.495 -    {
  65.496 -        cx = &power->states[i];
  65.497 -        if ( !cx->valid )
  65.498 -            continue;
  65.499 -
  65.500 -        if ( lower )
  65.501 -        {
  65.502 -            cx->demotion.state = lower;
  65.503 -            cx->demotion.threshold.ticks = cx->latency_ticks;
  65.504 -            cx->demotion.threshold.count = 1;
  65.505 -            if ( cx->type == ACPI_STATE_C3 )
  65.506 -                cx->demotion.threshold.bm = bm_history;
  65.507 -        }
  65.508 -
  65.509 -        lower = cx;
  65.510 -    }
  65.511 -
  65.512 -    /* promotion */
  65.513 -    for ( i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i-- )
  65.514 -    {
  65.515 -        cx = &power->states[i];
  65.516 -        if ( !cx->valid )
  65.517 -            continue;
  65.518 -
  65.519 -        if ( higher )
  65.520 -        {
  65.521 -            cx->promotion.state = higher;
  65.522 -            cx->promotion.threshold.ticks = cx->latency_ticks;
  65.523 -            if ( cx->type >= ACPI_STATE_C2 )
  65.524 -                cx->promotion.threshold.count = 4;
  65.525 -            else
  65.526 -                cx->promotion.threshold.count = 10;
  65.527 -            if ( higher->type == ACPI_STATE_C3 )
  65.528 -                cx->promotion.threshold.bm = bm_history;
  65.529 -        }
  65.530 -
  65.531 -        higher = cx;
  65.532 -    }
  65.533 -
  65.534 -    return 0;
  65.535 +    if ( cpuidle_current_governor->reflect )
  65.536 +        cpuidle_current_governor->reflect(power);
  65.537  }
  65.538  
  65.539  static int init_cx_pminfo(struct acpi_processor_power *acpi_power)
  65.540 @@ -821,6 +554,8 @@ static int check_cx(struct acpi_processo
  65.541      return 0;
  65.542  }
  65.543  
  65.544 +static unsigned int latency_factor = 2;
  65.545 +
  65.546  static void set_cx(
  65.547      struct acpi_processor_power *acpi_power,
  65.548      xen_processor_cx_t *xen_cx)
  65.549 @@ -842,6 +577,9 @@ static void set_cx(
  65.550      cx->power    = xen_cx->power;
  65.551      
  65.552      cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
  65.553 +    cx->target_residency = cx->latency * latency_factor;
  65.554 +    if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 )
  65.555 +        acpi_power->safe_state = cx;
  65.556  }
  65.557  
  65.558  int get_cpu_id(u8 acpi_id)
  65.559 @@ -936,6 +674,7 @@ long set_cx_pminfo(uint32_t cpu, struct 
  65.560  
  65.561      init_cx_pminfo(acpi_power);
  65.562  
  65.563 +    acpi_power->cpu = cpu_id;
  65.564      acpi_power->flags.bm_check = power->flags.bm_check;
  65.565      acpi_power->flags.bm_control = power->flags.bm_control;
  65.566      acpi_power->flags.has_cst = power->flags.has_cst;
  65.567 @@ -950,10 +689,11 @@ long set_cx_pminfo(uint32_t cpu, struct 
  65.568          set_cx(acpi_power, &xen_cx);
  65.569      }
  65.570  
  65.571 +    if ( cpuidle_current_governor->enable &&
  65.572 +         cpuidle_current_governor->enable(acpi_power) )
  65.573 +        return -EFAULT;
  65.574 +
  65.575      /* FIXME: C-state dependency is not supported by far */
  65.576 -    
  65.577 -    /* initialize default policy */
  65.578 -    acpi_processor_set_power_policy(acpi_power);
  65.579  
  65.580      print_acpi_power(cpu_id, acpi_power);
  65.581  
  65.582 @@ -978,7 +718,7 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
  65.583      uint64_t usage;
  65.584      int i;
  65.585  
  65.586 -    stat->last = (power->state) ? power->state->type : 0;
  65.587 +    stat->last = (power->last_state) ? power->last_state->type : 0;
  65.588      stat->nr = processor_powers[cpuid].count;
  65.589      stat->idle_time = v->runstate.time[RUNSTATE_running];
  65.590      if ( v->is_running )
    66.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Sep 12 14:32:45 2008 +0900
    66.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Sep 12 14:47:40 2008 +0900
    66.3 @@ -48,7 +48,7 @@ struct processor_pminfo processor_pminfo
    66.4  struct cpufreq_policy xen_px_policy[NR_CPUS];
    66.5  
    66.6  static cpumask_t *cpufreq_dom_pt;
    66.7 -static cpumask_t cpufreq_dom_mask;
    66.8 +static unsigned long *cpufreq_dom_mask;
    66.9  static unsigned int cpufreq_dom_max;
   66.10  
   66.11  enum {
   66.12 @@ -562,7 +562,8 @@ static struct cpufreq_driver acpi_cpufre
   66.13  void cpufreq_dom_exit(void)
   66.14  {
   66.15      cpufreq_dom_max = 0;
   66.16 -    cpus_clear(cpufreq_dom_mask);
   66.17 +    if (cpufreq_dom_mask)
   66.18 +        xfree(cpufreq_dom_mask);
   66.19      if (cpufreq_dom_pt)
   66.20          xfree(cpufreq_dom_pt);
   66.21  }
   66.22 @@ -572,22 +573,28 @@ int cpufreq_dom_init(void)
   66.23      unsigned int i;
   66.24  
   66.25      cpufreq_dom_max = 0;
   66.26 -    cpus_clear(cpufreq_dom_mask);
   66.27  
   66.28      for_each_online_cpu(i) {
   66.29 -        cpu_set(processor_pminfo[i].perf.domain_info.domain, cpufreq_dom_mask);
   66.30          if (cpufreq_dom_max < processor_pminfo[i].perf.domain_info.domain)
   66.31              cpufreq_dom_max = processor_pminfo[i].perf.domain_info.domain;
   66.32      }
   66.33      cpufreq_dom_max++;
   66.34  
   66.35 +    cpufreq_dom_mask = xmalloc_array(unsigned long,
   66.36 +                                     BITS_TO_LONGS(cpufreq_dom_max));
   66.37 +    if (!cpufreq_dom_mask)
   66.38 +        return -ENOMEM;
   66.39 +    bitmap_zero(cpufreq_dom_mask, cpufreq_dom_max);
   66.40 +
   66.41      cpufreq_dom_pt = xmalloc_array(cpumask_t, cpufreq_dom_max);
   66.42      if (!cpufreq_dom_pt)
   66.43          return -ENOMEM;
   66.44      memset(cpufreq_dom_pt, 0, cpufreq_dom_max * sizeof(cpumask_t));
   66.45  
   66.46 -    for_each_online_cpu(i)
   66.47 +    for_each_online_cpu(i) {
   66.48 +        __set_bit(processor_pminfo[i].perf.domain_info.domain, cpufreq_dom_mask);
   66.49          cpu_set(i, cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain]);
   66.50 +    }
   66.51  
   66.52      for_each_online_cpu(i)
   66.53          processor_pminfo[i].perf.shared_cpu_map =
   66.54 @@ -616,10 +623,11 @@ static int cpufreq_cpu_init(void)
   66.55  
   66.56  int cpufreq_dom_dbs(unsigned int event)
   66.57  {
   66.58 -    int cpu, dom, ret = 0;
   66.59 +    unsigned int cpu, dom;
   66.60 +    int ret = 0;
   66.61  
   66.62 -    for (dom=0; dom<cpufreq_dom_max; dom++) {
   66.63 -        if (!cpu_isset(dom, cpufreq_dom_mask))
   66.64 +    for (dom = 0; dom < cpufreq_dom_max; dom++) {
   66.65 +        if (!test_bit(dom, cpufreq_dom_mask))
   66.66              continue;
   66.67          cpu = first_cpu(cpufreq_dom_pt[dom]);
   66.68          ret = cpufreq_governor_dbs(&xen_px_policy[cpu], event);
    67.1 --- a/xen/arch/x86/acpi/cpufreq/powernow.c	Fri Sep 12 14:32:45 2008 +0900
    67.2 +++ b/xen/arch/x86/acpi/cpufreq/powernow.c	Fri Sep 12 14:47:40 2008 +0900
    67.3 @@ -197,8 +197,8 @@ static int powernow_cpufreq_cpu_init(str
    67.4  
    67.5      data->max_freq = perf->states[0].core_frequency * 1000;
    67.6      /* table init */
    67.7 -    for (i=0; i<perf->state_count && i<max_hw_pstate; i++) {
    67.8 -        if (i>0 && perf->states[i].core_frequency >=
    67.9 +    for (i = 0; i < perf->state_count && i <= max_hw_pstate; i++) {
   67.10 +        if (i > 0 && perf->states[i].core_frequency >=
   67.11              data->freq_table[valid_states-1].frequency / 1000)
   67.12              continue;
   67.13  
    68.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    68.2 +++ b/xen/arch/x86/acpi/cpuidle_menu.c	Fri Sep 12 14:47:40 2008 +0900
    68.3 @@ -0,0 +1,132 @@
    68.4 +/*
    68.5 + * cpuidle_menu - menu governor for cpu idle, main idea come from Linux.
    68.6 + *            drivers/cpuidle/governors/menu.c 
    68.7 + *
    68.8 + *  Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
    68.9 + *  Copyright (C) 2007, 2008 Intel Corporation
   68.10 + *
   68.11 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   68.12 + *
   68.13 + *  This program is free software; you can redistribute it and/or modify
   68.14 + *  it under the terms of the GNU General Public License as published by
   68.15 + *  the Free Software Foundation; either version 2 of the License, or (at
   68.16 + *  your option) any later version.
   68.17 + *
   68.18 + *  This program is distributed in the hope that it will be useful, but
   68.19 + *  WITHOUT ANY WARRANTY; without even the implied warranty of
   68.20 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   68.21 + *  General Public License for more details.
   68.22 + *
   68.23 + *  You should have received a copy of the GNU General Public License along
   68.24 + *  with this program; if not, write to the Free Software Foundation, Inc.,
   68.25 + *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
   68.26 + *
   68.27 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   68.28 + */
   68.29 +#include <xen/config.h>
   68.30 +#include <xen/errno.h>
   68.31 +#include <xen/lib.h>
   68.32 +#include <xen/types.h>
   68.33 +#include <xen/acpi.h>
   68.34 +#include <xen/timer.h>
   68.35 +#include <xen/cpuidle.h>
   68.36 +
   68.37 +#define BREAK_FUZZ      4       /* 4 us */
   68.38 +#define USEC_PER_SEC 1000000
   68.39 +
   68.40 +struct menu_device
   68.41 +{
   68.42 +    int             last_state_idx;
   68.43 +    unsigned int    expected_us;
   68.44 +    unsigned int    predicted_us;
   68.45 +    unsigned int    last_measured_us;
   68.46 +    unsigned int    elapsed_us;
   68.47 +};
   68.48 +
   68.49 +static DEFINE_PER_CPU(struct menu_device, menu_devices);
   68.50 +
   68.51 +static s_time_t get_sleep_length_ns(void)
   68.52 +{
   68.53 +    return per_cpu(timer_deadline, smp_processor_id()) - NOW();
   68.54 +}
   68.55 +
   68.56 +static int menu_select(struct acpi_processor_power *power)
   68.57 +{
   68.58 +    struct menu_device *data = &__get_cpu_var(menu_devices);
   68.59 +    int i;
   68.60 +
   68.61 +    /* determine the expected residency time */
   68.62 +    data->expected_us = (u32) get_sleep_length_ns() / 1000;
   68.63 +
   68.64 +    /* find the deepest idle state that satisfies our constraints */
   68.65 +    for ( i = 1; i < power->count; i++ )
   68.66 +    {
   68.67 +        struct acpi_processor_cx *s = &power->states[i];
   68.68 +
   68.69 +        if ( s->target_residency > data->expected_us + s->latency )
   68.70 +            break;
   68.71 +        if ( s->target_residency > data->predicted_us )
   68.72 +            break;
   68.73 +        /* TBD: we need to check the QoS requirment in future */
   68.74 +    }
   68.75 +
   68.76 +    data->last_state_idx = i - 1;
   68.77 +    return i - 1;
   68.78 +}
   68.79 +
   68.80 +static void menu_reflect(struct acpi_processor_power *power)
   68.81 +{
   68.82 +    struct menu_device *data = &__get_cpu_var(menu_devices);
   68.83 +    struct acpi_processor_cx *target = &power->states[data->last_state_idx];
   68.84 +    unsigned int last_residency; 
   68.85 +    unsigned int measured_us;
   68.86 +
   68.87 +    /*
   68.88 +     * Ugh, this idle state doesn't support residency measurements, so we
   68.89 +     * are basically lost in the dark.  As a compromise, assume we slept
   68.90 +     * for one full standard timer tick.  However, be aware that this
   68.91 +     * could potentially result in a suboptimal state transition.
   68.92 +     */
   68.93 +    if ( target->type == ACPI_STATE_C1 )
   68.94 +        last_residency = USEC_PER_SEC / HZ;
   68.95 +    else
   68.96 +        last_residency = power->last_residency;
   68.97 +
   68.98 +    measured_us = last_residency + data->elapsed_us;
   68.99 +
  68.100 +    /* if wrapping, set to max uint (-1) */
  68.101 +    measured_us = data->elapsed_us <= measured_us ? measured_us : -1;
  68.102 +
  68.103 +    /* Predict time remaining until next break event */
  68.104 +    data->predicted_us = max(measured_us, data->last_measured_us);
  68.105 +
  68.106 +    /* Distinguish between expected & non-expected events */
  68.107 +    if ( last_residency + BREAK_FUZZ
  68.108 +         < data->expected_us + target->latency )
  68.109 +    {
  68.110 +        data->last_measured_us = measured_us;
  68.111 +        data->elapsed_us = 0;
  68.112 +    }
  68.113 +    else
  68.114 +        data->elapsed_us = measured_us;
  68.115 +}
  68.116 +
  68.117 +static int menu_enable_device(struct acpi_processor_power *power)
  68.118 +{
  68.119 +    struct menu_device *data = &per_cpu(menu_devices, power->cpu);
  68.120 +
  68.121 +    memset(data, 0, sizeof(struct menu_device));
  68.122 +
  68.123 +    return 0;
  68.124 +}
  68.125 +
  68.126 +static struct cpuidle_governor menu_governor =
  68.127 +{
  68.128 +    .name =         "menu",
  68.129 +    .rating =       20,
  68.130 +    .enable =       menu_enable_device,
  68.131 +    .select =       menu_select,
  68.132 +    .reflect =      menu_reflect,
  68.133 +};
  68.134 +
  68.135 +struct cpuidle_governor *cpuidle_current_governor = &menu_governor;
    69.1 --- a/xen/arch/x86/domain.c	Fri Sep 12 14:32:45 2008 +0900
    69.2 +++ b/xen/arch/x86/domain.c	Fri Sep 12 14:47:40 2008 +0900
    69.3 @@ -31,6 +31,7 @@
    69.4  #include <xen/compat.h>
    69.5  #include <xen/acpi.h>
    69.6  #include <xen/pci.h>
    69.7 +#include <xen/paging.h>
    69.8  #include <asm/regs.h>
    69.9  #include <asm/mc146818rtc.h>
   69.10  #include <asm/system.h>
   69.11 @@ -40,7 +41,6 @@
   69.12  #include <asm/i387.h>
   69.13  #include <asm/mpspec.h>
   69.14  #include <asm/ldt.h>
   69.15 -#include <asm/paging.h>
   69.16  #include <asm/hypercall.h>
   69.17  #include <asm/hvm/hvm.h>
   69.18  #include <asm/hvm/support.h>
   69.19 @@ -302,7 +302,8 @@ int vcpu_initialise(struct vcpu *v)
   69.20      else
   69.21      {
   69.22          /* PV guests by default have a 100Hz ticker. */
   69.23 -        v->periodic_period = MILLISECS(10);
   69.24 +        if ( !is_idle_domain(d) )
   69.25 +            v->periodic_period = MILLISECS(10);
   69.26  
   69.27          /* PV guests get an emulated PIT too for video BIOSes to use. */
   69.28          if ( !is_idle_domain(d) && (v->vcpu_id == 0) )
   69.29 @@ -1645,23 +1646,26 @@ static int relinquish_memory(
   69.30  
   69.31          /*
   69.32           * Forcibly invalidate top-most, still valid page tables at this point
   69.33 -         * to break circular 'linear page table' references. This is okay
   69.34 -         * because MMU structures are not shared across domains and this domain
   69.35 -         * is now dead. Thus top-most valid tables are not in use so a non-zero
   69.36 -         * count means circular reference.
   69.37 +         * to break circular 'linear page table' references as well as clean up
   69.38 +         * partially validated pages. This is okay because MMU structures are
   69.39 +         * not shared across domains and this domain is now dead. Thus top-most
   69.40 +         * valid tables are not in use so a non-zero count means circular
   69.41 +         * reference or partially validated.
   69.42           */
   69.43          y = page->u.inuse.type_info;
   69.44          for ( ; ; )
   69.45          {
   69.46              x = y;
   69.47 -            if ( likely((x & (PGT_type_mask|PGT_validated)) !=
   69.48 -                        (type|PGT_validated)) )
   69.49 +            if ( likely((x & PGT_type_mask) != type) ||
   69.50 +                 likely(!(x & (PGT_validated|PGT_partial))) )
   69.51                  break;
   69.52  
   69.53 -            y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
   69.54 +            y = cmpxchg(&page->u.inuse.type_info, x,
   69.55 +                        x & ~(PGT_validated|PGT_partial));
   69.56              if ( likely(y == x) )
   69.57              {
   69.58 -                free_page_type(page, type);
   69.59 +                if ( free_page_type(page, x, 0) != 0 )
   69.60 +                    BUG();
   69.61                  break;
   69.62              }
   69.63          }
    70.1 --- a/xen/arch/x86/domain_build.c	Fri Sep 12 14:32:45 2008 +0900
    70.2 +++ b/xen/arch/x86/domain_build.c	Fri Sep 12 14:47:40 2008 +0900
    70.3 @@ -26,6 +26,7 @@
    70.4  #include <asm/desc.h>
    70.5  #include <asm/i387.h>
    70.6  #include <asm/paging.h>
    70.7 +#include <asm/p2m.h>
    70.8  #include <asm/e820.h>
    70.9  
   70.10  #include <public/version.h>
    71.1 --- a/xen/arch/x86/domctl.c	Fri Sep 12 14:32:45 2008 +0900
    71.2 +++ b/xen/arch/x86/domctl.c	Fri Sep 12 14:47:40 2008 +0900
    71.3 @@ -20,7 +20,7 @@
    71.4  #include <xen/trace.h>
    71.5  #include <xen/console.h>
    71.6  #include <xen/iocap.h>
    71.7 -#include <asm/paging.h>
    71.8 +#include <xen/paging.h>
    71.9  #include <asm/irq.h>
   71.10  #include <asm/hvm/hvm.h>
   71.11  #include <asm/hvm/support.h>
   71.12 @@ -68,14 +68,6 @@ long arch_do_domctl(
   71.13          if ( unlikely((d = rcu_lock_domain_by_id(domctl->domain)) == NULL) )
   71.14              break;
   71.15  
   71.16 -        ret = xsm_ioport_permission(d, fp, 
   71.17 -                                    domctl->u.ioport_permission.allow_access);
   71.18 -        if ( ret )
   71.19 -        {
   71.20 -            rcu_unlock_domain(d);
   71.21 -            break;
   71.22 -        }
   71.23 -
   71.24          if ( np == 0 )
   71.25              ret = 0;
   71.26          else if ( domctl->u.ioport_permission.allow_access )
   71.27 @@ -550,6 +542,10 @@ long arch_do_domctl(
   71.28          if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
   71.29              break;
   71.30  
   71.31 +        ret = xsm_sendtrigger(d);
   71.32 +        if ( ret )
   71.33 +            goto sendtrigger_out;
   71.34 +
   71.35          ret = -EINVAL;
   71.36          if ( domctl->u.sendtrigger.vcpu >= MAX_VIRT_CPUS )
   71.37              goto sendtrigger_out;
   71.38 @@ -628,6 +624,10 @@ long arch_do_domctl(
   71.39          bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
   71.40          devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
   71.41  
   71.42 +        ret = xsm_test_assign_device(domctl->u.assign_device.machine_bdf);
   71.43 +        if ( ret )
   71.44 +            break;
   71.45 +
   71.46          if ( device_assigned(bus, devfn) )
   71.47          {
   71.48              gdprintk(XENLOG_ERR, "XEN_DOMCTL_test_assign_device: "
   71.49 @@ -655,6 +655,11 @@ long arch_do_domctl(
   71.50                  "XEN_DOMCTL_assign_device: get_domain_by_id() failed\n");
   71.51              break;
   71.52          }
   71.53 +
   71.54 +        ret = xsm_assign_device(d, domctl->u.assign_device.machine_bdf);
   71.55 +        if ( ret )
   71.56 +            goto assign_device_out;
   71.57 +
   71.58          bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
   71.59          devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
   71.60  
   71.61 @@ -680,6 +685,7 @@ long arch_do_domctl(
   71.62                       "assign device (%x:%x:%x) failed\n",
   71.63                       bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   71.64  
   71.65 +    assign_device_out:
   71.66          put_domain(d);
   71.67      }
   71.68      break;
   71.69 @@ -700,6 +706,11 @@ long arch_do_domctl(
   71.70                  "XEN_DOMCTL_deassign_device: get_domain_by_id() failed\n"); 
   71.71              break;
   71.72          }
   71.73 +
   71.74 +        ret = xsm_assign_device(d, domctl->u.assign_device.machine_bdf);
   71.75 +        if ( ret )
   71.76 +            goto deassign_device_out;
   71.77 +
   71.78          bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
   71.79          devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
   71.80  
   71.81 @@ -720,6 +731,8 @@ long arch_do_domctl(
   71.82          deassign_device(d, bus, devfn);
   71.83          gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
   71.84              bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   71.85 +
   71.86 +    deassign_device_out:
   71.87          put_domain(d);
   71.88      }
   71.89      break;
   71.90 @@ -733,10 +746,17 @@ long arch_do_domctl(
   71.91          if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
   71.92              break;
   71.93          bind = &(domctl->u.bind_pt_irq);
   71.94 +
   71.95 +        ret = xsm_bind_pt_irq(d, bind);
   71.96 +        if ( ret )
   71.97 +            goto bind_out;
   71.98 +
   71.99          if ( iommu_enabled )
  71.100              ret = pt_irq_create_bind_vtd(d, bind);
  71.101          if ( ret < 0 )
  71.102              gdprintk(XENLOG_ERR, "pt_irq_create_bind failed!\n");
  71.103 +
  71.104 +    bind_out:
  71.105          rcu_unlock_domain(d);
  71.106      }
  71.107      break;    
  71.108 @@ -877,11 +897,16 @@ long arch_do_domctl(
  71.109          if ( d == NULL )
  71.110              break;
  71.111  
  71.112 +        ret = xsm_pin_mem_cacheattr(d);
  71.113 +        if ( ret )
  71.114 +            goto pin_out;
  71.115 +
  71.116          ret = hvm_set_mem_pinned_cacheattr(
  71.117              d, domctl->u.pin_mem_cacheattr.start,
  71.118              domctl->u.pin_mem_cacheattr.end,
  71.119              domctl->u.pin_mem_cacheattr.type);
  71.120  
  71.121 +    pin_out:
  71.122          rcu_unlock_domain(d);
  71.123      }
  71.124      break;
  71.125 @@ -900,6 +925,10 @@ long arch_do_domctl(
  71.126          if ( d == NULL )
  71.127              break;
  71.128  
  71.129 +        ret = xsm_ext_vcpucontext(d, domctl->cmd);
  71.130 +        if ( ret )
  71.131 +            goto ext_vcpucontext_out;
  71.132 +
  71.133          ret = -ESRCH;
  71.134          if ( (evc->vcpu >= MAX_VIRT_CPUS) ||
  71.135               ((v = d->vcpu[evc->vcpu]) == NULL) )
    72.1 --- a/xen/arch/x86/hpet.c	Fri Sep 12 14:32:45 2008 +0900
    72.2 +++ b/xen/arch/x86/hpet.c	Fri Sep 12 14:47:40 2008 +0900
    72.3 @@ -100,6 +100,13 @@ static int reprogram_hpet_evt_channel(
    72.4  
    72.5      ch->next_event = expire;
    72.6  
    72.7 +    if ( expire == STIME_MAX )
    72.8 +    {
    72.9 +        /* We assume it will take a long time for the timer to wrap. */
   72.10 +        hpet_write32(0, HPET_T0_CMP);
   72.11 +        return 0;
   72.12 +    }
   72.13 +
   72.14      delta = min_t(int64_t, delta, MAX_DELTA_NS);
   72.15      delta = max_t(int64_t, delta, MIN_DELTA_NS);
   72.16      delta = ns2ticks(delta, ch->shift, ch->mult);
   72.17 @@ -206,9 +213,11 @@ void hpet_broadcast_enter(void)
   72.18  {
   72.19      struct hpet_event_channel *ch = &hpet_event;
   72.20  
   72.21 -    cpu_set(smp_processor_id(), ch->cpumask);
   72.22 +    spin_lock(&ch->lock);
   72.23  
   72.24 -    spin_lock(&ch->lock);
   72.25 +    disable_APIC_timer();
   72.26 +
   72.27 +    cpu_set(smp_processor_id(), ch->cpumask);
   72.28  
   72.29      /* reprogram if current cpu expire time is nearer */
   72.30      if ( this_cpu(timer_deadline) < ch->next_event )
   72.31 @@ -222,8 +231,23 @@ void hpet_broadcast_exit(void)
   72.32      struct hpet_event_channel *ch = &hpet_event;
   72.33      int cpu = smp_processor_id();
   72.34  
   72.35 +    spin_lock_irq(&ch->lock);
   72.36 +
   72.37      if ( cpu_test_and_clear(cpu, ch->cpumask) )
   72.38 -        reprogram_timer(per_cpu(timer_deadline, cpu));
   72.39 +    {
   72.40 +        /* Cancel any outstanding LAPIC event and re-enable interrupts. */
   72.41 +        reprogram_timer(0);
   72.42 +        enable_APIC_timer();
   72.43 +        
   72.44 +        /* Reprogram the deadline; trigger timer work now if it has passed. */
   72.45 +        if ( !reprogram_timer(per_cpu(timer_deadline, cpu)) )
   72.46 +            raise_softirq(TIMER_SOFTIRQ);
   72.47 +
   72.48 +        if ( cpus_empty(ch->cpumask) && ch->next_event != STIME_MAX )
   72.49 +            reprogram_hpet_evt_channel(ch, STIME_MAX, 0, 0);
   72.50 +    }
   72.51 +
   72.52 +    spin_unlock_irq(&ch->lock);
   72.53  }
   72.54  
   72.55  int hpet_broadcast_is_available(void)
    73.1 --- a/xen/arch/x86/hvm/hvm.c	Fri Sep 12 14:32:45 2008 +0900
    73.2 +++ b/xen/arch/x86/hvm/hvm.c	Fri Sep 12 14:47:40 2008 +0900
    73.3 @@ -31,10 +31,11 @@
    73.4  #include <xen/hypercall.h>
    73.5  #include <xen/guest_access.h>
    73.6  #include <xen/event.h>
    73.7 +#include <xen/paging.h>
    73.8 +#include <asm/shadow.h>
    73.9  #include <asm/current.h>
   73.10  #include <asm/e820.h>
   73.11  #include <asm/io.h>
   73.12 -#include <asm/paging.h>
   73.13  #include <asm/regs.h>
   73.14  #include <asm/cpufeature.h>
   73.15  #include <asm/processor.h>
   73.16 @@ -772,7 +773,7 @@ void hvm_hlt(unsigned long rflags)
   73.17  
   73.18      do_sched_op_compat(SCHEDOP_block, 0);
   73.19  
   73.20 -    HVMTRACE_1D(HLT, curr, /* pending = */ vcpu_runnable(curr));
   73.21 +    HVMTRACE_1D(HLT, /* pending = */ vcpu_runnable(curr));
   73.22  }
   73.23  
   73.24  void hvm_triple_fault(void)
    74.1 --- a/xen/arch/x86/hvm/svm/intr.c	Fri Sep 12 14:32:45 2008 +0900
    74.2 +++ b/xen/arch/x86/hvm/svm/intr.c	Fri Sep 12 14:47:40 2008 +0900
    74.3 @@ -80,7 +80,7 @@ static void enable_intr_window(struct vc
    74.4  
    74.5      ASSERT(intack.source != hvm_intsrc_none);
    74.6  
    74.7 -    HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
    74.8 +    HVMTRACE_2D(INJ_VIRQ, 0x0, /*fake=*/ 1);
    74.9  
   74.10      /*
   74.11       * Create a dummy virtual interrupt to intercept as soon as the
   74.12 @@ -199,7 +199,7 @@ asmlinkage void svm_intr_assist(void)
   74.13      }
   74.14      else
   74.15      {
   74.16 -        HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
   74.17 +        HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
   74.18          svm_inject_extint(v, intack.vector);
   74.19          pt_intr_post(v, intack);
   74.20      }
    75.1 --- a/xen/arch/x86/hvm/svm/svm.c	Fri Sep 12 14:32:45 2008 +0900
    75.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Fri Sep 12 14:47:40 2008 +0900
    75.3 @@ -759,11 +759,11 @@ static void svm_inject_exception(
    75.4      if ( trapnr == TRAP_page_fault )
    75.5      {
    75.6          vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
    75.7 -        HVMTRACE_LONG_2D(PF_INJECT, curr, errcode, TRC_PAR_LONG(cr2));
    75.8 +        HVMTRACE_LONG_2D(PF_INJECT, errcode, TRC_PAR_LONG(cr2));
    75.9      }
   75.10      else
   75.11      {
   75.12 -        HVMTRACE_2D(INJ_EXC, curr, trapnr, errcode);
   75.13 +        HVMTRACE_2D(INJ_EXC, trapnr, errcode);
   75.14      }
   75.15  
   75.16      if ( (trapnr == TRAP_debug) &&
   75.17 @@ -919,7 +919,7 @@ static void svm_cpuid_intercept(
   75.18              __clear_bit(X86_FEATURE_APIC & 31, edx);
   75.19      }
   75.20  
   75.21 -    HVMTRACE_5D (CPUID, v, input, *eax, *ebx, *ecx, *edx);
   75.22 +    HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
   75.23  }
   75.24  
   75.25  static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
   75.26 @@ -946,7 +946,7 @@ static void svm_vmexit_do_cpuid(struct c
   75.27  
   75.28  static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
   75.29  {
   75.30 -    HVMTRACE_0D(DR_WRITE, v);
   75.31 +    HVMTRACE_0D(DR_WRITE);
   75.32      __restore_debug_registers(v);
   75.33  }
   75.34  
   75.35 @@ -1018,7 +1018,7 @@ static int svm_msr_read_intercept(struct
   75.36      regs->edx = msr_content >> 32;
   75.37  
   75.38   done:
   75.39 -    HVMTRACE_3D (MSR_READ, v, ecx, regs->eax, regs->edx);
   75.40 +    HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
   75.41      HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
   75.42                  ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
   75.43      return X86EMUL_OKAY;
   75.44 @@ -1037,7 +1037,7 @@ static int svm_msr_write_intercept(struc
   75.45  
   75.46      msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
   75.47  
   75.48 -    HVMTRACE_3D (MSR_WRITE, v, ecx, regs->eax, regs->edx);
   75.49 +    HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
   75.50  
   75.51      switch ( ecx )
   75.52      {
   75.53 @@ -1168,7 +1168,7 @@ static void svm_vmexit_do_invalidate_cac
   75.54  static void svm_invlpg_intercept(unsigned long vaddr)
   75.55  {
   75.56      struct vcpu *curr = current;
   75.57 -    HVMTRACE_LONG_2D(INVLPG, curr, 0, TRC_PAR_LONG(vaddr));
   75.58 +    HVMTRACE_LONG_2D(INVLPG, 0, TRC_PAR_LONG(vaddr));
   75.59      paging_invlpg(curr, vaddr);
   75.60      svm_asid_g_invlpg(curr, vaddr);
   75.61  }
   75.62 @@ -1191,7 +1191,7 @@ asmlinkage void svm_vmexit_handler(struc
   75.63  
   75.64      exit_reason = vmcb->exitcode;
   75.65  
   75.66 -    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, v, 3, exit_reason,
   75.67 +    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
   75.68                  (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
   75.69                  0, 0, 0);
   75.70  
   75.71 @@ -1216,17 +1216,17 @@ asmlinkage void svm_vmexit_handler(struc
   75.72      {
   75.73      case VMEXIT_INTR:
   75.74          /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
   75.75 -        HVMTRACE_0D(INTR, v);
   75.76 +        HVMTRACE_0D(INTR);
   75.77          break;
   75.78  
   75.79      case VMEXIT_NMI:
   75.80          /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
   75.81 -        HVMTRACE_0D(NMI, v);
   75.82 +        HVMTRACE_0D(NMI);
   75.83          break;
   75.84  
   75.85      case VMEXIT_SMI:
   75.86          /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
   75.87 -        HVMTRACE_0D(SMI, v);
   75.88 +        HVMTRACE_0D(SMI);
   75.89          break;
   75.90  
   75.91      case VMEXIT_EXCEPTION_DB:
   75.92 @@ -1261,10 +1261,12 @@ asmlinkage void svm_vmexit_handler(struc
   75.93  
   75.94          if ( paging_fault(va, regs) )
   75.95          {
   75.96 -            if (hvm_long_mode_enabled(v))
   75.97 -                HVMTRACE_LONG_2D(PF_XEN, v, regs->error_code, TRC_PAR_LONG(va));
   75.98 +            if ( trace_will_trace_event(TRC_SHADOW) )
   75.99 +                break;
  75.100 +            if ( hvm_long_mode_enabled(v) )
  75.101 +                HVMTRACE_LONG_2D(PF_XEN, regs->error_code, TRC_PAR_LONG(va));
  75.102              else
  75.103 -                HVMTRACE_2D(PF_XEN, v, regs->error_code, va);
  75.104 +                HVMTRACE_2D(PF_XEN, regs->error_code, va);
  75.105              break;
  75.106          }
  75.107  
  75.108 @@ -1274,7 +1276,7 @@ asmlinkage void svm_vmexit_handler(struc
  75.109  
  75.110      /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
  75.111      case VMEXIT_EXCEPTION_MC:
  75.112 -        HVMTRACE_0D(MCE, v);
  75.113 +        HVMTRACE_0D(MCE);
  75.114          break;
  75.115  
  75.116      case VMEXIT_VINTR:
  75.117 @@ -1331,7 +1333,7 @@ asmlinkage void svm_vmexit_handler(struc
  75.118      case VMEXIT_VMMCALL:
  75.119          if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
  75.120              break;
  75.121 -        HVMTRACE_1D(VMMCALL, v, regs->eax);
  75.122 +        HVMTRACE_1D(VMMCALL, regs->eax);
  75.123          rc = hvm_do_hypercall(regs);
  75.124          if ( rc != HVM_HCALL_preempted )
  75.125          {
  75.126 @@ -1406,7 +1408,7 @@ asmlinkage void svm_vmexit_handler(struc
  75.127  
  75.128  asmlinkage void svm_trace_vmentry(void)
  75.129  {
  75.130 -    HVMTRACE_ND (VMENTRY, 1/*cycles*/, current, 0, 0, 0, 0, 0, 0, 0);
  75.131 +    HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
  75.132  }
  75.133    
  75.134  /*
    76.1 --- a/xen/arch/x86/hvm/vmx/intr.c	Fri Sep 12 14:32:45 2008 +0900
    76.2 +++ b/xen/arch/x86/hvm/vmx/intr.c	Fri Sep 12 14:47:40 2008 +0900
    76.3 @@ -198,7 +198,7 @@ asmlinkage void vmx_intr_assist(void)
    76.4      }
    76.5      else
    76.6      {
    76.7 -        HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
    76.8 +        HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
    76.9          vmx_inject_extint(v, intack.vector);
   76.10          pt_intr_post(v, intack);
   76.11      }
    77.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Fri Sep 12 14:32:45 2008 +0900
    77.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Fri Sep 12 14:47:40 2008 +0900
    77.3 @@ -1114,10 +1114,10 @@ static void __vmx_inject_exception(
    77.4      __vmwrite(VM_ENTRY_INTR_INFO, intr_fields);
    77.5  
    77.6      if ( trap == TRAP_page_fault )
    77.7 -        HVMTRACE_LONG_2D(PF_INJECT, v, error_code,
    77.8 +        HVMTRACE_LONG_2D(PF_INJECT, error_code,
    77.9              TRC_PAR_LONG(v->arch.hvm_vcpu.guest_cr[2]));
   77.10      else
   77.11 -        HVMTRACE_2D(INJ_EXC, v, trap, error_code);
   77.12 +        HVMTRACE_2D(INJ_EXC, trap, error_code);
   77.13  }
   77.14  
   77.15  void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code)
   77.16 @@ -1345,7 +1345,7 @@ static void vmx_cpuid_intercept(
   77.17              break;
   77.18      }
   77.19  
   77.20 -    HVMTRACE_5D (CPUID, current, input, *eax, *ebx, *ecx, *edx);
   77.21 +    HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
   77.22  }
   77.23  
   77.24  static void vmx_do_cpuid(struct cpu_user_regs *regs)
   77.25 @@ -1370,7 +1370,7 @@ static void vmx_dr_access(unsigned long 
   77.26  {
   77.27      struct vcpu *v = current;
   77.28  
   77.29 -    HVMTRACE_0D(DR_WRITE, v);
   77.30 +    HVMTRACE_0D(DR_WRITE);
   77.31  
   77.32      if ( !v->arch.hvm_vcpu.flag_dr_dirty )
   77.33          __restore_debug_registers(v);
   77.34 @@ -1383,7 +1383,7 @@ static void vmx_dr_access(unsigned long 
   77.35  static void vmx_invlpg_intercept(unsigned long vaddr)
   77.36  {
   77.37      struct vcpu *curr = current;
   77.38 -    HVMTRACE_LONG_2D(INVLPG, curr, /*invlpga=*/ 0, TRC_PAR_LONG(vaddr));
   77.39 +    HVMTRACE_LONG_2D(INVLPG, /*invlpga=*/ 0, TRC_PAR_LONG(vaddr));
   77.40      if ( paging_invlpg(curr, vaddr) )
   77.41          vpid_sync_vcpu_gva(curr, vaddr);
   77.42  }
   77.43 @@ -1434,7 +1434,7 @@ static int mov_to_cr(int gp, int cr, str
   77.44          goto exit_and_crash;
   77.45      }
   77.46  
   77.47 -    HVMTRACE_LONG_2D(CR_WRITE, v, cr, TRC_PAR_LONG(value));
   77.48 +    HVMTRACE_LONG_2D(CR_WRITE, cr, TRC_PAR_LONG(value));
   77.49  
   77.50      HVM_DBG_LOG(DBG_LEVEL_1, "CR%d, value = %lx", cr, value);
   77.51  
   77.52 @@ -1505,7 +1505,7 @@ static void mov_from_cr(int cr, int gp, 
   77.53          break;
   77.54      }
   77.55  
   77.56 -    HVMTRACE_LONG_2D(CR_READ, v, cr, TRC_PAR_LONG(value));
   77.57 +    HVMTRACE_LONG_2D(CR_READ, cr, TRC_PAR_LONG(value));
   77.58  
   77.59      HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%d, value = %lx", cr, value);
   77.60  }
   77.61 @@ -1531,13 +1531,13 @@ static int vmx_cr_access(unsigned long e
   77.62      case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
   77.63          v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
   77.64          vmx_update_guest_cr(v, 0);
   77.65 -        HVMTRACE_0D(CLTS, current);
   77.66 +        HVMTRACE_0D(CLTS);
   77.67          break;
   77.68      case VMX_CONTROL_REG_ACCESS_TYPE_LMSW:
   77.69          value = v->arch.hvm_vcpu.guest_cr[0];
   77.70          /* LMSW can: (1) set bits 0-3; (2) clear bits 1-3. */
   77.71          value = (value & ~0xe) | ((exit_qualification >> 16) & 0xf);
   77.72 -        HVMTRACE_LONG_1D(LMSW, current, value);
   77.73 +        HVMTRACE_LONG_1D(LMSW, value);
   77.74          return !hvm_set_cr0(value);
   77.75      default:
   77.76          BUG();
   77.77 @@ -1692,7 +1692,7 @@ static int vmx_msr_read_intercept(struct
   77.78      regs->edx = (uint32_t)(msr_content >> 32);
   77.79  
   77.80  done:
   77.81 -    HVMTRACE_3D (MSR_READ, v, ecx, regs->eax, regs->edx);
   77.82 +    HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
   77.83      HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
   77.84                  ecx, (unsigned long)regs->eax,
   77.85                  (unsigned long)regs->edx);
   77.86 @@ -1803,7 +1803,7 @@ static int vmx_msr_write_intercept(struc
   77.87  
   77.88      msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
   77.89  
   77.90 -    HVMTRACE_3D (MSR_WRITE, v, ecx, regs->eax, regs->edx);
   77.91 +    HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
   77.92  
   77.93      switch ( ecx )
   77.94      {
   77.95 @@ -1894,7 +1894,7 @@ static void vmx_do_extint(struct cpu_use
   77.96      BUG_ON(!(vector & INTR_INFO_VALID_MASK));
   77.97  
   77.98      vector &= INTR_INFO_VECTOR_MASK;
   77.99 -    HVMTRACE_1D(INTR, current, vector);
  77.100 +    HVMTRACE_1D(INTR, vector);
  77.101  
  77.102      switch ( vector )
  77.103      {
  77.104 @@ -2010,7 +2010,7 @@ static void vmx_failed_vmentry(unsigned 
  77.105          break;
  77.106      case EXIT_REASON_MACHINE_CHECK:
  77.107          printk("caused by machine check.\n");
  77.108 -        HVMTRACE_0D(MCE, curr);
  77.109 +        HVMTRACE_0D(MCE);
  77.110          do_machine_check(regs);
  77.111          break;
  77.112      default:
  77.113 @@ -2037,7 +2037,7 @@ asmlinkage void vmx_vmexit_handler(struc
  77.114  
  77.115      exit_reason = __vmread(VM_EXIT_REASON);
  77.116  
  77.117 -    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, v, 3, exit_reason,
  77.118 +    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
  77.119                  (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
  77.120                  0, 0, 0);
  77.121  
  77.122 @@ -2101,7 +2101,8 @@ asmlinkage void vmx_vmexit_handler(struc
  77.123               !(__vmread(IDT_VECTORING_INFO) & INTR_INFO_VALID_MASK) &&
  77.124               (vector != TRAP_double_fault) )
  77.125              __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
  77.126 -                    __vmread(GUEST_INTERRUPTIBILITY_INFO)|VMX_INTR_SHADOW_NMI);
  77.127 +                      __vmread(GUEST_INTERRUPTIBILITY_INFO)
  77.128 +                      | VMX_INTR_SHADOW_NMI);
  77.129  
  77.130          perfc_incra(cause_vector, vector);
  77.131  
  77.132 @@ -2128,12 +2129,14 @@ asmlinkage void vmx_vmexit_handler(struc
  77.133  
  77.134              if ( paging_fault(exit_qualification, regs) )
  77.135              {
  77.136 +                if ( trace_will_trace_event(TRC_SHADOW) )
  77.137 +                    break;
  77.138                  if ( hvm_long_mode_enabled(v) )
  77.139 -                    HVMTRACE_LONG_2D (PF_XEN, v, regs->error_code,
  77.140 -                        TRC_PAR_LONG(exit_qualification) );
  77.141 +                    HVMTRACE_LONG_2D(PF_XEN, regs->error_code,
  77.142 +                                     TRC_PAR_LONG(exit_qualification) );
  77.143                  else
  77.144 -                    HVMTRACE_2D (PF_XEN, v,
  77.145 -                        regs->error_code, exit_qualification );
  77.146 +                    HVMTRACE_2D(PF_XEN,
  77.147 +                                regs->error_code, exit_qualification );
  77.148                  break;
  77.149              }
  77.150  
  77.151 @@ -2144,11 +2147,11 @@ asmlinkage void vmx_vmexit_handler(struc
  77.152              if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) !=
  77.153                   (X86_EVENTTYPE_NMI << 8) )
  77.154                  goto exit_and_crash;
  77.155 -            HVMTRACE_0D(NMI, v);
  77.156 +            HVMTRACE_0D(NMI);
  77.157              do_nmi(regs); /* Real NMI, vector 2: normal processing. */
  77.158              break;
  77.159          case TRAP_machine_check:
  77.160 -            HVMTRACE_0D(MCE, v);
  77.161 +            HVMTRACE_0D(MCE);
  77.162              do_machine_check(regs);
  77.163              break;
  77.164          default:
  77.165 @@ -2213,7 +2216,7 @@ asmlinkage void vmx_vmexit_handler(struc
  77.166      case EXIT_REASON_VMCALL:
  77.167      {
  77.168          int rc;
  77.169 -        HVMTRACE_1D(VMMCALL, v, regs->eax);
  77.170 +        HVMTRACE_1D(VMMCALL, regs->eax);
  77.171          inst_len = __get_instruction_length(); /* Safe: VMCALL */
  77.172          rc = hvm_do_hypercall(regs);
  77.173          if ( rc != HVM_HCALL_preempted )
  77.174 @@ -2300,7 +2303,7 @@ asmlinkage void vmx_vmexit_handler(struc
  77.175  
  77.176  asmlinkage void vmx_trace_vmentry(void)
  77.177  {
  77.178 -    HVMTRACE_ND (VMENTRY, 1/*cycles*/, current, 0, 0, 0, 0, 0, 0, 0);
  77.179 +    HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
  77.180  }
  77.181  
  77.182  /*
    78.1 --- a/xen/arch/x86/io_apic.c	Fri Sep 12 14:32:45 2008 +0900
    78.2 +++ b/xen/arch/x86/io_apic.c	Fri Sep 12 14:47:40 2008 +0900
    78.3 @@ -45,23 +45,14 @@
    78.4  int (*ioapic_renumber_irq)(int ioapic, int irq);
    78.5  atomic_t irq_mis_count;
    78.6  
    78.7 -int msi_enable = 0;
    78.8 -boolean_param("msi", msi_enable);
    78.9 -
   78.10  int domain_irq_to_vector(struct domain *d, int irq)
   78.11  {
   78.12 -    if ( !msi_enable )
   78.13 -        return irq_to_vector(irq);
   78.14 -    else
   78.15 -        return d->arch.pirq_vector[irq];
   78.16 +    return d->arch.pirq_vector[irq];
   78.17  }
   78.18  
   78.19  int domain_vector_to_irq(struct domain *d, int vector)
   78.20  {
   78.21 -    if ( !msi_enable )
   78.22 -        return vector_to_irq(vector);
   78.23 -    else
   78.24 -        return d->arch.vector_pirq[vector];
   78.25 +    return d->arch.vector_pirq[vector];
   78.26  }
   78.27  
   78.28  /* Where if anywhere is the i8259 connect in external int mode */
    79.1 --- a/xen/arch/x86/irq.c	Fri Sep 12 14:32:45 2008 +0900
    79.2 +++ b/xen/arch/x86/irq.c	Fri Sep 12 14:47:40 2008 +0900
    79.3 @@ -737,9 +737,12 @@ static int __init setup_dump_irqs(void)
    79.4  
    79.5  void fixup_irqs(cpumask_t map)
    79.6  {
    79.7 -    unsigned int irq;
    79.8 +    unsigned int irq, sp;
    79.9      static int warned;
   79.10 +    irq_guest_action_t *action;
   79.11 +    struct pending_eoi *peoi;
   79.12  
   79.13 +    /* Direct all future interrupts away from this CPU. */
   79.14      for ( irq = 0; irq < NR_IRQS; irq++ )
   79.15      {
   79.16          cpumask_t mask;
   79.17 @@ -758,8 +761,24 @@ void fixup_irqs(cpumask_t map)
   79.18              printk("Cannot set affinity for irq %i\n", irq);
   79.19      }
   79.20  
   79.21 +    /* Service any interrupts that beat us in the re-direction race. */
   79.22      local_irq_enable();
   79.23      mdelay(1);
   79.24      local_irq_disable();
   79.25 +
   79.26 +    /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
   79.27 +    for ( irq = 0; irq < NR_IRQS; irq++ )
   79.28 +    {
   79.29 +        if ( !(irq_desc[irq].status & IRQ_GUEST) )
   79.30 +            continue;
   79.31 +        action = (irq_guest_action_t *)irq_desc[irq].action;
   79.32 +        cpu_clear(smp_processor_id(), action->cpu_eoi_map);
   79.33 +    }
   79.34 +
   79.35 +    /* Flush the interrupt EOI stack. */
   79.36 +    peoi = this_cpu(pending_eoi);
   79.37 +    for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
   79.38 +        peoi[sp].ready = 1;
   79.39 +    flush_ready_eoi(NULL);
   79.40  }
   79.41  #endif
    80.1 --- a/xen/arch/x86/mm.c	Fri Sep 12 14:32:45 2008 +0900
    80.2 +++ b/xen/arch/x86/mm.c	Fri Sep 12 14:47:40 2008 +0900
    80.3 @@ -507,11 +507,11 @@ static int alloc_segdesc_page(struct pag
    80.4              goto fail;
    80.5  
    80.6      unmap_domain_page(descs);
    80.7 -    return 1;
    80.8 +    return 0;
    80.9  
   80.10   fail:
   80.11      unmap_domain_page(descs);
   80.12 -    return 0;
   80.13 +    return -EINVAL;
   80.14  }
   80.15  
   80.16  
   80.17 @@ -565,20 +565,23 @@ static int get_page_from_pagenr(unsigned
   80.18  
   80.19  static int get_page_and_type_from_pagenr(unsigned long page_nr, 
   80.20                                           unsigned long type,
   80.21 -                                         struct domain *d)
   80.22 +                                         struct domain *d,
   80.23 +                                         int preemptible)
   80.24  {
   80.25      struct page_info *page = mfn_to_page(page_nr);
   80.26 +    int rc;
   80.27  
   80.28      if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
   80.29 -        return 0;
   80.30 -
   80.31 -    if ( unlikely(!get_page_type(page, type)) )
   80.32 -    {
   80.33 +        return -EINVAL;
   80.34 +
   80.35 +    rc = (preemptible ?
   80.36 +          get_page_type_preemptible(page, type) :
   80.37 +          (get_page_type(page, type) ? 0 : -EINVAL));
   80.38 +
   80.39 +    if ( rc )
   80.40          put_page(page);
   80.41 -        return 0;
   80.42 -    }
   80.43 -
   80.44 -    return 1;
   80.45 +
   80.46 +    return rc;
   80.47  }
   80.48  
   80.49  /*
   80.50 @@ -754,22 +757,22 @@ get_page_from_l2e(
   80.51      if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
   80.52      {
   80.53          MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
   80.54 -        return 0;
   80.55 +        return -EINVAL;
   80.56      }
   80.57  
   80.58 -    rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
   80.59 -    if ( unlikely(!rc) )
   80.60 -        rc = get_l2_linear_pagetable(l2e, pfn, d);
   80.61 +    rc = get_page_and_type_from_pagenr(
   80.62 +        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
   80.63 +    if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
   80.64 +        rc = 0;
   80.65  
   80.66      return rc;
   80.67  }
   80.68  
   80.69  
   80.70 -#if CONFIG_PAGING_LEVELS >= 3
   80.71  define_get_linear_pagetable(l3);
   80.72  static int
   80.73  get_page_from_l3e(
   80.74 -    l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
   80.75 +    l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible)
   80.76  {
   80.77      int rc;
   80.78  
   80.79 @@ -779,22 +782,22 @@ get_page_from_l3e(
   80.80      if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
   80.81      {
   80.82          MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
   80.83 -        return 0;
   80.84 +        return -EINVAL;
   80.85      }
   80.86  
   80.87 -    rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
   80.88 -    if ( unlikely(!rc) )
   80.89 -        rc = get_l3_linear_pagetable(l3e, pfn, d);
   80.90 +    rc = get_page_and_type_from_pagenr(
   80.91 +        l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
   80.92 +    if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
   80.93 +        rc = 0;
   80.94  
   80.95      return rc;
   80.96  }
   80.97 -#endif /* 3 level */
   80.98  
   80.99  #if CONFIG_PAGING_LEVELS >= 4
  80.100  define_get_linear_pagetable(l4);
  80.101  static int
  80.102  get_page_from_l4e(
  80.103 -    l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
  80.104 +    l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible)
  80.105  {
  80.106      int rc;
  80.107  
  80.108 @@ -804,12 +807,13 @@ get_page_from_l4e(
  80.109      if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
  80.110      {
  80.111          MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
  80.112 -        return 0;
  80.113 +        return -EINVAL;
  80.114      }
  80.115  
  80.116 -    rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
  80.117 -    if ( unlikely(!rc) )
  80.118 -        rc = get_l4_linear_pagetable(l4e, pfn, d);
  80.119 +    rc = get_page_and_type_from_pagenr(
  80.120 +        l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
  80.121 +    if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
  80.122 +        rc = 0;
  80.123  
  80.124      return rc;
  80.125  }
  80.126 @@ -946,29 +950,35 @@ void put_page_from_l1e(l1_pgentry_t l1e,
  80.127   * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
  80.128   * Note also that this automatically deals correctly with linear p.t.'s.
  80.129   */
  80.130 -static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
  80.131 +static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
  80.132  {
  80.133      if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && 
  80.134           (l2e_get_pfn(l2e) != pfn) )
  80.135 +    {
  80.136          put_page_and_type(l2e_get_page(l2e));
  80.137 +        return 0;
  80.138 +    }
  80.139 +    return 1;
  80.140  }
  80.141  
  80.142  
  80.143 -#if CONFIG_PAGING_LEVELS >= 3
  80.144 -static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
  80.145 +static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
  80.146 +                             int preemptible)
  80.147  {
  80.148      if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && 
  80.149           (l3e_get_pfn(l3e) != pfn) )
  80.150 -        put_page_and_type(l3e_get_page(l3e));
  80.151 +        return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
  80.152 +    return 1;
  80.153  }
  80.154 -#endif
  80.155  
  80.156  #if CONFIG_PAGING_LEVELS >= 4
  80.157 -static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
  80.158 +static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
  80.159 +                             int preemptible)
  80.160  {
  80.161      if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && 
  80.162           (l4e_get_pfn(l4e) != pfn) )
  80.163 -        put_page_and_type(l4e_get_page(l4e));
  80.164 +        return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
  80.165 +    return 1;
  80.166  }
  80.167  #endif
  80.168  
  80.169 @@ -977,7 +987,7 @@ static int alloc_l1_table(struct page_in
  80.170      struct domain *d = page_get_owner(page);
  80.171      unsigned long  pfn = page_to_mfn(page);
  80.172      l1_pgentry_t  *pl1e;
  80.173 -    int            i;
  80.174 +    unsigned int   i;
  80.175  
  80.176      pl1e = map_domain_page(pfn);
  80.177  
  80.178 @@ -991,7 +1001,7 @@ static int alloc_l1_table(struct page_in
  80.179      }
  80.180  
  80.181      unmap_domain_page(pl1e);
  80.182 -    return 1;
  80.183 +    return 0;
  80.184  
  80.185   fail:
  80.186      MEM_LOG("Failure in alloc_l1_table: entry %d", i);
  80.187 @@ -1000,7 +1010,7 @@ static int alloc_l1_table(struct page_in
  80.188              put_page_from_l1e(pl1e[i], d);
  80.189  
  80.190      unmap_domain_page(pl1e);
  80.191 -    return 0;
  80.192 +    return -EINVAL;
  80.193  }
  80.194  
  80.195  static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
  80.196 @@ -1128,47 +1138,53 @@ static void pae_flush_pgd(
  80.197  # define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
  80.198  #endif
  80.199  
  80.200 -static int alloc_l2_table(struct page_info *page, unsigned long type)
  80.201 +static int alloc_l2_table(struct page_info *page, unsigned long type,
  80.202 +                          int preemptible)
  80.203  {
  80.204      struct domain *d = page_get_owner(page);
  80.205      unsigned long  pfn = page_to_mfn(page);
  80.206      l2_pgentry_t  *pl2e;
  80.207 -    int            i;
  80.208 +    unsigned int   i;
  80.209 +    int            rc = 0;
  80.210  
  80.211      pl2e = map_domain_page(pfn);
  80.212  
  80.213 -    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
  80.214 +    for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ )
  80.215      {
  80.216 -        if ( !is_guest_l2_slot(d, type, i) )
  80.217 +        if ( preemptible && i && hypercall_preempt_check() )
  80.218 +        {
  80.219 +            page->nr_validated_ptes = i;
  80.220 +            rc = -EAGAIN;
  80.221 +            break;
  80.222 +        }
  80.223 +
  80.224 +        if ( !is_guest_l2_slot(d, type, i) ||
  80.225 +             (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 )
  80.226              continue;
  80.227  
  80.228 -        if ( unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
  80.229 -            goto fail;
  80.230 -        
  80.231 +        if ( rc < 0 )
  80.232 +        {
  80.233 +            MEM_LOG("Failure in alloc_l2_table: entry %d", i);
  80.234 +            while ( i-- > 0 )
  80.235 +                if ( is_guest_l2_slot(d, type, i) )
  80.236 +                    put_page_from_l2e(pl2e[i], pfn);
  80.237 +            break;
  80.238 +        }
  80.239 +
  80.240          adjust_guest_l2e(pl2e[i], d);
  80.241      }
  80.242  
  80.243      unmap_domain_page(pl2e);
  80.244 -    return 1;
  80.245 -
  80.246 - fail:
  80.247 -    MEM_LOG("Failure in alloc_l2_table: entry %d", i);
  80.248 -    while ( i-- > 0 )
  80.249 -        if ( is_guest_l2_slot(d, type, i) )
  80.250 -            put_page_from_l2e(pl2e[i], pfn);
  80.251 -
  80.252 -    unmap_domain_page(pl2e);
  80.253 -    return 0;
  80.254 +    return rc > 0 ? 0 : rc;
  80.255  }
  80.256  
  80.257 -
  80.258 -#if CONFIG_PAGING_LEVELS >= 3
  80.259 -static int alloc_l3_table(struct page_info *page)
  80.260 +static int alloc_l3_table(struct page_info *page, int preemptible)
  80.261  {
  80.262      struct domain *d = page_get_owner(page);
  80.263      unsigned long  pfn = page_to_mfn(page);
  80.264      l3_pgentry_t  *pl3e;
  80.265 -    int            i;
  80.266 +    unsigned int   i;
  80.267 +    int            rc = 0;
  80.268  
  80.269  #if CONFIG_PAGING_LEVELS == 3
  80.270      /*
  80.271 @@ -1181,7 +1197,7 @@ static int alloc_l3_table(struct page_in
  80.272           d->vcpu[0] && d->vcpu[0]->is_initialised )
  80.273      {
  80.274          MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
  80.275 -        return 0;
  80.276 +        return -EINVAL;
  80.277      }
  80.278  #endif
  80.279  
  80.280 @@ -1197,64 +1213,96 @@ static int alloc_l3_table(struct page_in
  80.281      if ( is_pv_32on64_domain(d) )
  80.282          memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
  80.283  
  80.284 -    for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
  80.285 +    for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ )
  80.286      {
  80.287          if ( is_pv_32bit_domain(d) && (i == 3) )
  80.288          {
  80.289              if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
  80.290 -                 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
  80.291 -                 !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
  80.292 -                                                PGT_l2_page_table |
  80.293 -                                                PGT_pae_xen_l2,
  80.294 -                                                d) )
  80.295 -                goto fail;
  80.296 +                 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) )
  80.297 +                rc = -EINVAL;
  80.298 +            else
  80.299 +                rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
  80.300 +                                                   PGT_l2_page_table |
  80.301 +                                                   PGT_pae_xen_l2,
  80.302 +                                                   d, preemptible);
  80.303          }
  80.304 -        else if ( !is_guest_l3_slot(i) )
  80.305 +        else if ( !is_guest_l3_slot(i) ||
  80.306 +                  (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 )
  80.307              continue;
  80.308 -        else if ( unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
  80.309 -            goto fail;
  80.310 +
  80.311 +        if ( rc == -EAGAIN )
  80.312 +        {
  80.313 +            page->nr_validated_ptes = i;
  80.314 +            page->partial_pte = 1;
  80.315 +        }
  80.316 +        else if ( rc == -EINTR && i )
  80.317 +        {
  80.318 +            page->nr_validated_ptes = i;
  80.319 +            page->partial_pte = 0;
  80.320 +            rc = -EAGAIN;
  80.321 +        }
  80.322 +        if ( rc < 0 )
  80.323 +            break;
  80.324  
  80.325          adjust_guest_l3e(pl3e[i], d);
  80.326      }
  80.327  
  80.328 -    if ( !create_pae_xen_mappings(d, pl3e) )
  80.329 -        goto fail;
  80.330 -
  80.331 -    unmap_domain_page(pl3e);
  80.332 -    return 1;
  80.333 -
  80.334 - fail:
  80.335 -    MEM_LOG("Failure in alloc_l3_table: entry %d", i);
  80.336 -    while ( i-- > 0 )
  80.337 +    if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) )
  80.338 +        rc = -EINVAL;
  80.339 +    if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
  80.340      {
  80.341 -        if ( !is_guest_l3_slot(i) )
  80.342 -            continue;
  80.343 -        unadjust_guest_l3e(pl3e[i], d);
  80.344 -        put_page_from_l3e(pl3e[i], pfn);
  80.345 +        MEM_LOG("Failure in alloc_l3_table: entry %d", i);
  80.346 +        while ( i-- > 0 )
  80.347 +        {
  80.348 +            if ( !is_guest_l3_slot(i) )
  80.349 +                continue;
  80.350 +            unadjust_guest_l3e(pl3e[i], d);
  80.351 +            put_page_from_l3e(pl3e[i], pfn, 0);
  80.352 +        }
  80.353      }
  80.354  
  80.355      unmap_domain_page(pl3e);
  80.356 -    return 0;
  80.357 +    return rc > 0 ? 0 : rc;
  80.358  }
  80.359 -#else
  80.360 -#define alloc_l3_table(page) (0)
  80.361 -#endif
  80.362  
  80.363  #if CONFIG_PAGING_LEVELS >= 4
  80.364 -static int alloc_l4_table(struct page_info *page)
  80.365 +static int alloc_l4_table(struct page_info *page, int preemptible)
  80.366  {
  80.367      struct domain *d = page_get_owner(page);
  80.368      unsigned long  pfn = page_to_mfn(page);
  80.369      l4_pgentry_t  *pl4e = page_to_virt(page);
  80.370 -    int            i;
  80.371 -
  80.372 -    for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
  80.373 +    unsigned int   i;
  80.374 +    int            rc = 0;
  80.375 +
  80.376 +    for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ )
  80.377      {
  80.378 -        if ( !is_guest_l4_slot(d, i) )
  80.379 +        if ( !is_guest_l4_slot(d, i) ||
  80.380 +             (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 )
  80.381              continue;
  80.382  
  80.383 -        if ( unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
  80.384 -            goto fail;
  80.385 +        if ( rc == -EAGAIN )
  80.386 +        {
  80.387 +            page->nr_validated_ptes = i;
  80.388 +            page->partial_pte = 1;
  80.389 +        }
  80.390 +        else if ( rc == -EINTR )
  80.391 +        {
  80.392 +            if ( i )
  80.393 +            {
  80.394 +                page->nr_validated_ptes = i;
  80.395 +                page->partial_pte = 0;
  80.396 +                rc = -EAGAIN;
  80.397 +            }
  80.398 +        }
  80.399 +        else if ( rc < 0 )
  80.400 +        {
  80.401 +            MEM_LOG("Failure in alloc_l4_table: entry %d", i);
  80.402 +            while ( i-- > 0 )
  80.403 +                if ( is_guest_l4_slot(d, i) )
  80.404 +                    put_page_from_l4e(pl4e[i], pfn, 0);
  80.405 +        }
  80.406 +        if ( rc < 0 )
  80.407 +            return rc;
  80.408  
  80.409          adjust_guest_l4e(pl4e[i], d);
  80.410      }
  80.411 @@ -1269,18 +1317,10 @@ static int alloc_l4_table(struct page_in
  80.412          l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
  80.413                        __PAGE_HYPERVISOR);
  80.414  
  80.415 -    return 1;
  80.416 -
  80.417 - fail:
  80.418 -    MEM_LOG("Failure in alloc_l4_table: entry %d", i);
  80.419 -    while ( i-- > 0 )
  80.420 -        if ( is_guest_l4_slot(d, i) )
  80.421 -            put_page_from_l4e(pl4e[i], pfn);
  80.422 -
  80.423 -    return 0;
  80.424 +    return rc > 0 ? 0 : rc;
  80.425  }
  80.426  #else
  80.427 -#define alloc_l4_table(page) (0)
  80.428 +#define alloc_l4_table(page, preemptible) (-EINVAL)
  80.429  #endif
  80.430  
  80.431  
  80.432 @@ -1289,7 +1329,7 @@ static void free_l1_table(struct page_in
  80.433      struct domain *d = page_get_owner(page);
  80.434      unsigned long pfn = page_to_mfn(page);
  80.435      l1_pgentry_t *pl1e;
  80.436 -    int i;
  80.437 +    unsigned int  i;
  80.438  
  80.439      pl1e = map_domain_page(pfn);
  80.440  
  80.441 @@ -1301,74 +1341,114 @@ static void free_l1_table(struct page_in
  80.442  }
  80.443  
  80.444  
  80.445 -static void free_l2_table(struct page_info *page)
  80.446 +static int free_l2_table(struct page_info *page, int preemptible)
  80.447  {
  80.448  #ifdef CONFIG_COMPAT
  80.449      struct domain *d = page_get_owner(page);
  80.450  #endif
  80.451      unsigned long pfn = page_to_mfn(page);
  80.452      l2_pgentry_t *pl2e;
  80.453 -    int i;
  80.454 +    unsigned int  i = page->nr_validated_ptes - 1;
  80.455 +    int err = 0;
  80.456  
  80.457      pl2e = map_domain_page(pfn);
  80.458  
  80.459 -    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
  80.460 -        if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
  80.461 -            put_page_from_l2e(pl2e[i], pfn);
  80.462 +    ASSERT(page->nr_validated_ptes);
  80.463 +    do {
  80.464 +        if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) &&
  80.465 +             put_page_from_l2e(pl2e[i], pfn) == 0 &&
  80.466 +             preemptible && i && hypercall_preempt_check() )
  80.467 +        {
  80.468 +           page->nr_validated_ptes = i;
  80.469 +           err = -EAGAIN;
  80.470 +        }
  80.471 +    } while ( !err && i-- );
  80.472  
  80.473      unmap_domain_page(pl2e);
  80.474  
  80.475 -    page->u.inuse.type_info &= ~PGT_pae_xen_l2;
  80.476 +    if ( !err )
  80.477 +        page->u.inuse.type_info &= ~PGT_pae_xen_l2;
  80.478 +
  80.479 +    return err;
  80.480  }
  80.481  
  80.482 -
  80.483 -#if CONFIG_PAGING_LEVELS >= 3
  80.484 -
  80.485 -static void free_l3_table(struct page_info *page)
  80.486 +static int free_l3_table(struct page_info *page, int preemptible)
  80.487  {
  80.488      struct domain *d = page_get_owner(page);
  80.489      unsigned long pfn = page_to_mfn(page);
  80.490      l3_pgentry_t *pl3e;
  80.491 -    int           i;
  80.492 +    unsigned int  i = page->nr_validated_ptes - !page->partial_pte;
  80.493 +    int rc = 0;
  80.494  
  80.495  #ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
  80.496      if ( d->arch.relmem == RELMEM_l3 )
  80.497 -        return;
  80.498 +        return 0;
  80.499  #endif
  80.500  
  80.501      pl3e = map_domain_page(pfn);
  80.502  
  80.503 -    for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
  80.504 +    do {
  80.505          if ( is_guest_l3_slot(i) )
  80.506          {
  80.507 -            put_page_from_l3e(pl3e[i], pfn);
  80.508 +            rc = put_page_from_l3e(pl3e[i], pfn, preemptible);
  80.509 +            if ( rc > 0 )
  80.510 +                continue;
  80.511 +            if ( rc )
  80.512 +                break;
  80.513              unadjust_guest_l3e(pl3e[i], d);
  80.514          }
  80.515 +    } while ( i-- );
  80.516  
  80.517      unmap_domain_page(pl3e);
  80.518 +
  80.519 +    if ( rc == -EAGAIN )
  80.520 +    {
  80.521 +        page->nr_validated_ptes = i;
  80.522 +        page->partial_pte = 1;
  80.523 +    }
  80.524 +    else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
  80.525 +    {
  80.526 +        page->nr_validated_ptes = i + 1;
  80.527 +        page->partial_pte = 0;
  80.528 +        rc = -EAGAIN;
  80.529 +    }
  80.530 +    return rc > 0 ? 0 : rc;
  80.531  }
  80.532  
  80.533 -#endif
  80.534 -
  80.535  #if CONFIG_PAGING_LEVELS >= 4
  80.536 -
  80.537 -static void free_l4_table(struct page_info *page)
  80.538 +static int free_l4_table(struct page_info *page, int preemptible)
  80.539  {
  80.540      struct domain *d = page_get_owner(page);
  80.541      unsigned long pfn = page_to_mfn(page);
  80.542      l4_pgentry_t *pl4e = page_to_virt(page);
  80.543 -    int           i;
  80.544 +    unsigned int  i = page->nr_validated_ptes - !page->partial_pte;
  80.545 +    int rc = 0;
  80.546  
  80.547  #ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
  80.548      if ( d->arch.relmem == RELMEM_l4 )
  80.549 -        return;
  80.550 +        return 0;
  80.551  #endif
  80.552  
  80.553 -    for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
  80.554 +    do {
  80.555          if ( is_guest_l4_slot(d, i) )
  80.556 -            put_page_from_l4e(pl4e[i], pfn);
  80.557 +            rc = put_page_from_l4e(pl4e[i], pfn, preemptible);
  80.558 +    } while ( rc >= 0 && i-- );
  80.559 +
  80.560 +    if ( rc == -EAGAIN )
  80.561 +    {
  80.562 +        page->nr_validated_ptes = i;
  80.563 +        page->partial_pte = 1;
  80.564 +    }
  80.565 +    else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
  80.566 +    {
  80.567 +        page->nr_validated_ptes = i + 1;
  80.568 +        page->partial_pte = 0;
  80.569 +        rc = -EAGAIN;
  80.570 +    }
  80.571 +    return rc > 0 ? 0 : rc;
  80.572  }
  80.573 -
  80.574 +#else
  80.575 +#define free_l4_table(page, preemptible) (-EINVAL)
  80.576  #endif
  80.577  
  80.578  static void page_lock(struct page_info *page)
  80.579 @@ -1560,7 +1640,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
  80.580              return rc;
  80.581          }
  80.582  
  80.583 -        if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
  80.584 +        if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
  80.585              return page_unlock(l2pg), 0;
  80.586  
  80.587          adjust_guest_l2e(nl2e, d);
  80.588 @@ -1583,24 +1663,23 @@ static int mod_l2_entry(l2_pgentry_t *pl
  80.589      return rc;
  80.590  }
  80.591  
  80.592 -#if CONFIG_PAGING_LEVELS >= 3
  80.593 -
  80.594  /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
  80.595  static int mod_l3_entry(l3_pgentry_t *pl3e, 
  80.596                          l3_pgentry_t nl3e, 
  80.597                          unsigned long pfn,
  80.598 -                        int preserve_ad)
  80.599 +                        int preserve_ad,
  80.600 +                        int preemptible)
  80.601  {
  80.602      l3_pgentry_t ol3e;
  80.603      struct vcpu *curr = current;
  80.604      struct domain *d = curr->domain;
  80.605      struct page_info *l3pg = mfn_to_page(pfn);
  80.606 -    int rc = 1;
  80.607 +    int rc = 0;
  80.608  
  80.609      if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
  80.610      {
  80.611          MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
  80.612 -        return 0;
  80.613 +        return -EINVAL;
  80.614      }
  80.615  
  80.616      /*
  80.617 @@ -1608,12 +1687,12 @@ static int mod_l3_entry(l3_pgentry_t *pl
  80.618       * would be a pain to ensure they remain continuously valid throughout.
  80.619       */
  80.620      if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
  80.621 -        return 0;
  80.622 +        return -EINVAL;
  80.623  
  80.624      page_lock(l3pg);
  80.625  
  80.626      if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
  80.627 -        return page_unlock(l3pg), 0;
  80.628 +        return page_unlock(l3pg), -EFAULT;
  80.629  
  80.630      if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
  80.631      {
  80.632 @@ -1622,7 +1701,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
  80.633              page_unlock(l3pg);
  80.634              MEM_LOG("Bad L3 flags %x",
  80.635                      l3e_get_flags(nl3e) & l3_disallow_mask(d));
  80.636 -            return 0;
  80.637 +            return -EINVAL;
  80.638          }
  80.639  
  80.640          /* Fast path for identical mapping and presence. */
  80.641 @@ -1631,28 +1710,30 @@ static int mod_l3_entry(l3_pgentry_t *pl
  80.642              adjust_guest_l3e(nl3e, d);
  80.643              rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
  80.644              page_unlock(l3pg);
  80.645 -            return rc;
  80.646 +            return rc ? 0 : -EFAULT;
  80.647          }
  80.648  
  80.649 -        if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
  80.650 -            return page_unlock(l3pg), 0;
  80.651 +        rc = get_page_from_l3e(nl3e, pfn, d, preemptible);
  80.652 +        if ( unlikely(rc < 0) )
  80.653 +            return page_unlock(l3pg), rc;
  80.654 +        rc = 0;
  80.655  
  80.656          adjust_guest_l3e(nl3e, d);
  80.657          if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
  80.658                                      preserve_ad)) )
  80.659          {
  80.660              ol3e = nl3e;
  80.661 -            rc = 0;
  80.662 +            rc = -EFAULT;
  80.663          }
  80.664      }
  80.665      else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
  80.666                                       preserve_ad)) )
  80.667      {
  80.668          page_unlock(l3pg);
  80.669 -        return 0;
  80.670 +        return -EFAULT;
  80.671      }
  80.672  
  80.673 -    if ( likely(rc) )
  80.674 +    if ( likely(rc == 0) )
  80.675      {
  80.676          if ( !create_pae_xen_mappings(d, pl3e) )
  80.677              BUG();
  80.678 @@ -1661,36 +1742,35 @@ static int mod_l3_entry(l3_pgentry_t *pl
  80.679      }
  80.680  
  80.681      page_unlock(l3pg);
  80.682 -    put_page_from_l3e(ol3e, pfn);
  80.683 +    put_page_from_l3e(ol3e, pfn, 0);
  80.684      return rc;
  80.685  }
  80.686  
  80.687 -#endif
  80.688 -
  80.689  #if CONFIG_PAGING_LEVELS >= 4
  80.690  
  80.691  /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
  80.692  static int mod_l4_entry(l4_pgentry_t *pl4e, 
  80.693                          l4_pgentry_t nl4e, 
  80.694                          unsigned long pfn,
  80.695 -                        int preserve_ad)
  80.696 +                        int preserve_ad,
  80.697 +                        int preemptible)
  80.698  {
  80.699      struct vcpu *curr = current;
  80.700      struct domain *d = curr->domain;
  80.701      l4_pgentry_t ol4e;
  80.702      struct page_info *l4pg = mfn_to_page(pfn);
  80.703 -    int rc = 1;
  80.704 +    int rc = 0;
  80.705  
  80.706      if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
  80.707      {
  80.708          MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
  80.709 -        return 0;
  80.710 +        return -EINVAL;
  80.711      }
  80.712  
  80.713      page_lock(l4pg);
  80.714  
  80.715      if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
  80.716 -        return page_unlock(l4pg), 0;
  80.717 +        return page_unlock(l4pg), -EFAULT;
  80.718  
  80.719      if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
  80.720      {
  80.721 @@ -1699,7 +1779,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
  80.722              page_unlock(l4pg);
  80.723              MEM_LOG("Bad L4 flags %x",
  80.724                      l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
  80.725 -            return 0;
  80.726 +            return -EINVAL;
  80.727          }
  80.728  
  80.729          /* Fast path for identical mapping and presence. */
  80.730 @@ -1708,29 +1788,31 @@ static int mod_l4_entry(l4_pgentry_t *pl
  80.731              adjust_guest_l4e(nl4e, d);
  80.732              rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
  80.733              page_unlock(l4pg);
  80.734 -            return rc;
  80.735 +            return rc ? 0 : -EFAULT;
  80.736          }
  80.737  
  80.738 -        if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) )
  80.739 -            return page_unlock(l4pg), 0;
  80.740 +        rc = get_page_from_l4e(nl4e, pfn, d, preemptible);
  80.741 +        if ( unlikely(rc < 0) )
  80.742 +            return page_unlock(l4pg), rc;
  80.743 +        rc = 0;
  80.744  
  80.745          adjust_guest_l4e(nl4e, d);
  80.746          if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
  80.747                                      preserve_ad)) )
  80.748          {
  80.749              ol4e = nl4e;
  80.750 -            rc = 0;
  80.751 +            rc = -EFAULT;
  80.752          }
  80.753      }
  80.754      else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
  80.755                                       preserve_ad)) )
  80.756      {
  80.757          page_unlock(l4pg);
  80.758 -        return 0;
  80.759 +        return -EFAULT;
  80.760      }
  80.761  
  80.762      page_unlock(l4pg);
  80.763 -    put_page_from_l4e(ol4e, pfn);
  80.764 +    put_page_from_l4e(ol4e, pfn, 0);
  80.765      return rc;
  80.766  }
  80.767  
  80.768 @@ -1788,9 +1870,11 @@ int get_page(struct page_info *page, str
  80.769  }
  80.770  
  80.771  
  80.772 -static int alloc_page_type(struct page_info *page, unsigned long type)
  80.773 +static int alloc_page_type(struct page_info *page, unsigned long type,
  80.774 +                           int preemptible)
  80.775  {
  80.776      struct domain *owner = page_get_owner(page);
  80.777 +    int rc;
  80.778  
  80.779      /* A page table is dirtied when its type count becomes non-zero. */
  80.780      if ( likely(owner != NULL) )
  80.781 @@ -1799,30 +1883,65 @@ static int alloc_page_type(struct page_i
  80.782      switch ( type & PGT_type_mask )
  80.783      {
  80.784      case PGT_l1_page_table:
  80.785 -        return alloc_l1_table(page);
  80.786 +        alloc_l1_table(page);
  80.787 +        rc = 0;
  80.788 +        break;
  80.789      case PGT_l2_page_table:
  80.790 -        return alloc_l2_table(page, type);
  80.791 +        rc = alloc_l2_table(page, type, preemptible);
  80.792 +        break;
  80.793      case PGT_l3_page_table:
  80.794 -        return alloc_l3_table(page);
  80.795 +        rc = alloc_l3_table(page, preemptible);
  80.796 +        break;
  80.797      case PGT_l4_page_table:
  80.798 -        return alloc_l4_table(page);
  80.799 +        rc = alloc_l4_table(page, preemptible);
  80.800 +        break;
  80.801      case PGT_seg_desc_page:
  80.802 -        return alloc_segdesc_page(page);
  80.803 +        rc = alloc_segdesc_page(page);
  80.804 +        break;
  80.805      default:
  80.806          printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n", 
  80.807                 type, page->u.inuse.type_info,
  80.808                 page->count_info);
  80.809 +        rc = -EINVAL;
  80.810          BUG();
  80.811      }
  80.812  
  80.813 -    return 0;
  80.814 +    /* No need for atomic update of type_info here: noone else updates it. */
  80.815 +    wmb();
  80.816 +    if ( rc == -EAGAIN )
  80.817 +    {
  80.818 +        page->u.inuse.type_info |= PGT_partial;
  80.819 +    }
  80.820 +    else if ( rc == -EINTR )
  80.821 +    {
  80.822 +        ASSERT((page->u.inuse.type_info &
  80.823 +                (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
  80.824 +        page->u.inuse.type_info &= ~PGT_count_mask;
  80.825 +    }
  80.826 +    else if ( rc )
  80.827 +    {
  80.828 +        ASSERT(rc < 0);
  80.829 +        MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
  80.830 +                PRtype_info ": caf=%08x taf=%" PRtype_info,
  80.831 +                page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
  80.832 +                type, page->count_info, page->u.inuse.type_info);
  80.833 +        page->u.inuse.type_info = 0;
  80.834 +    }
  80.835 +    else
  80.836 +    {
  80.837 +        page->u.inuse.type_info |= PGT_validated;
  80.838 +    }
  80.839 +
  80.840 +    return rc;
  80.841  }
  80.842  
  80.843  
  80.844 -void free_page_type(struct page_info *page, unsigned long type)
  80.845 +int free_page_type(struct page_info *page, unsigned long type,
  80.846 +                   int preemptible)
  80.847  {
  80.848      struct domain *owner = page_get_owner(page);
  80.849      unsigned long gmfn;
  80.850 +    int rc;
  80.851  
  80.852      if ( likely(owner != NULL) )
  80.853      {
  80.854 @@ -1842,7 +1961,7 @@ void free_page_type(struct page_info *pa
  80.855              paging_mark_dirty(owner, page_to_mfn(page));
  80.856  
  80.857              if ( shadow_mode_refcounts(owner) )
  80.858 -                return;
  80.859 +                return 0;
  80.860  
  80.861              gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
  80.862              ASSERT(VALID_M2P(gmfn));
  80.863 @@ -1850,42 +1969,80 @@ void free_page_type(struct page_info *pa
  80.864          }
  80.865      }
  80.866  
  80.867 +    if ( !(type & PGT_partial) )
  80.868 +    {
  80.869 +        page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
  80.870 +        page->partial_pte = 0;
  80.871 +    }
  80.872      switch ( type & PGT_type_mask )
  80.873      {
  80.874      case PGT_l1_page_table:
  80.875          free_l1_table(page);
  80.876 +        rc = 0;
  80.877          break;
  80.878 -
  80.879      case PGT_l2_page_table:
  80.880 -        free_l2_table(page);
  80.881 +        rc = free_l2_table(page, preemptible);
  80.882          break;
  80.883 -
  80.884 -#if CONFIG_PAGING_LEVELS >= 3
  80.885      case PGT_l3_page_table:
  80.886 -        free_l3_table(page);
  80.887 -        break;
  80.888 +#if CONFIG_PAGING_LEVELS == 3
  80.889 +        if ( !(type & PGT_partial) )
  80.890 +            page->nr_validated_ptes = L3_PAGETABLE_ENTRIES;
  80.891  #endif
  80.892 -
  80.893 -#if CONFIG_PAGING_LEVELS >= 4
  80.894 -    case PGT_l4_page_table:
  80.895 -        free_l4_table(page);
  80.896 +        rc = free_l3_table(page, preemptible);
  80.897          break;
  80.898 -#endif
  80.899 -
  80.900 +    case PGT_l4_page_table:
  80.901 +        rc = free_l4_table(page, preemptible);
  80.902 +        break;
  80.903      default:
  80.904 -        printk("%s: type %lx pfn %lx\n",__FUNCTION__,
  80.905 -               type, page_to_mfn(page));
  80.906 +        MEM_LOG("type %lx pfn %lx\n", type, page_to_mfn(page));
  80.907 +        rc = -EINVAL;
  80.908          BUG();
  80.909      }
  80.910 +
  80.911 +    /* No need for atomic update of type_info here: noone else updates it. */
  80.912 +    if ( rc == 0 )
  80.913 +    {
  80.914 +        /*
  80.915 +         * Record TLB information for flush later. We do not stamp page tables
  80.916 +         * when running in shadow mode:
  80.917 +         *  1. Pointless, since it's the shadow pt's which must be tracked.
  80.918 +         *  2. Shadow mode reuses this field for shadowed page tables to
  80.919 +         *     store flags info -- we don't want to conflict with that.
  80.920 +         */
  80.921 +        if ( !(shadow_mode_enabled(page_get_owner(page)) &&
  80.922 +               (page->count_info & PGC_page_table)) )
  80.923 +            page->tlbflush_timestamp = tlbflush_current_time();
  80.924 +        wmb();
  80.925 +        page->u.inuse.type_info--;
  80.926 +    }
  80.927 +    else if ( rc == -EINTR )
  80.928 +    {
  80.929 +        ASSERT(!(page->u.inuse.type_info &
  80.930 +                 (PGT_count_mask|PGT_validated|PGT_partial)));
  80.931 +        if ( !(shadow_mode_enabled(page_get_owner(page)) &&
  80.932 +               (page->count_info & PGC_page_table)) )
  80.933 +            page->tlbflush_timestamp = tlbflush_current_time();
  80.934 +        wmb();
  80.935 +        page->u.inuse.type_info |= PGT_validated;
  80.936 +    }
  80.937 +    else
  80.938 +    {
  80.939 +        BUG_ON(rc != -EAGAIN);
  80.940 +        wmb();
  80.941 +        page->u.inuse.type_info |= PGT_partial;
  80.942 +    }
  80.943 +
  80.944 +    return rc;
  80.945  }
  80.946  
  80.947  
  80.948 -void put_page_type(struct page_info *page)
  80.949 +static int __put_page_type(struct page_info *page,
  80.950 +                           int preemptible)
  80.951  {
  80.952      unsigned long nx, x, y = page->u.inuse.type_info;
  80.953  
  80.954 - again:
  80.955 -    do {
  80.956 +    for ( ; ; )
  80.957 +    {
  80.958          x  = y;
  80.959          nx = x - 1;
  80.960  
  80.961 @@ -1894,21 +2051,19 @@ void put_page_type(struct page_info *pag
  80.962          if ( unlikely((nx & PGT_count_mask) == 0) )
  80.963          {
  80.964              if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
  80.965 -                 likely(nx & PGT_validated) )
  80.966 +                 likely(nx & (PGT_validated|PGT_partial)) )
  80.967              {
  80.968                  /*
  80.969                   * Page-table pages must be unvalidated when count is zero. The
  80.970                   * 'free' is safe because the refcnt is non-zero and validated
  80.971                   * bit is clear => other ops will spin or fail.
  80.972                   */
  80.973 -                if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, 
  80.974 -                                           x & ~PGT_validated)) != x) )
  80.975 -                    goto again;
  80.976 +                nx = x & ~(PGT_validated|PGT_partial);
  80.977 +                if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
  80.978 +                                           x, nx)) != x) )
  80.979 +                    continue;
  80.980                  /* We cleared the 'valid bit' so we do the clean up. */
  80.981 -                free_page_type(page, x);
  80.982 -                /* Carry on, but with the 'valid bit' now clear. */
  80.983 -                x  &= ~PGT_validated;
  80.984 -                nx &= ~PGT_validated;
  80.985 +                return free_page_type(page, x, preemptible);
  80.986              }
  80.987  
  80.988              /*
  80.989 @@ -1922,25 +2077,33 @@ void put_page_type(struct page_info *pag
  80.990                     (page->count_info & PGC_page_table)) )
  80.991                  page->tlbflush_timestamp = tlbflush_current_time();
  80.992          }
  80.993 +
  80.994 +        if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
  80.995 +            break;
  80.996 +
  80.997 +        if ( preemptible && hypercall_preempt_check() )
  80.998 +            return -EINTR;
  80.999      }
 80.1000 -    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
 80.1001 +
 80.1002 +    return 0;
 80.1003  }
 80.1004  
 80.1005  
 80.1006 -int get_page_type(struct page_info *page, unsigned long type)
 80.1007 +static int __get_page_type(struct page_info *page, unsigned long type,
 80.1008 +                           int preemptible)
 80.1009  {
 80.1010      unsigned long nx, x, y = page->u.inuse.type_info;
 80.1011  
 80.1012      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
 80.1013  
 80.1014 - again:
 80.1015 -    do {
 80.1016 +    for ( ; ; )
 80.1017 +    {
 80.1018          x  = y;
 80.1019          nx = x + 1;
 80.1020          if ( unlikely((nx & PGT_count_mask) == 0) )
 80.1021          {
 80.1022              MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
 80.1023 -            return 0;
 80.1024 +            return -EINVAL;
 80.1025          }
 80.1026          else if ( unlikely((x & PGT_count_mask) == 0) )
 80.1027          {
 80.1028 @@ -1993,28 +2156,43 @@ int get_page_type(struct page_info *page
 80.1029              /* Don't log failure if it could be a recursive-mapping attempt. */
 80.1030              if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
 80.1031                   (type == PGT_l1_page_table) )
 80.1032 -                return 0;
 80.1033 +                return -EINVAL;
 80.1034              if ( ((x & PGT_type_mask) == PGT_l3_page_table) &&
 80.1035                   (type == PGT_l2_page_table) )
 80.1036 -                return 0;
 80.1037 +                return -EINVAL;
 80.1038              if ( ((x & PGT_type_mask) == PGT_l4_page_table) &&
 80.1039                   (type == PGT_l3_page_table) )
 80.1040 -                return 0;
 80.1041 +                return -EINVAL;
 80.1042              MEM_LOG("Bad type (saw %" PRtype_info " != exp %" PRtype_info ") "
 80.1043                      "for mfn %lx (pfn %lx)",
 80.1044                      x, type, page_to_mfn(page),
 80.1045                      get_gpfn_from_mfn(page_to_mfn(page)));
 80.1046 -            return 0;
 80.1047 +            return -EINVAL;
 80.1048          }
 80.1049          else if ( unlikely(!(x & PGT_validated)) )
 80.1050          {
 80.1051 -            /* Someone else is updating validation of this page. Wait... */
 80.1052 -            while ( (y = page->u.inuse.type_info) == x )
 80.1053 -                cpu_relax();
 80.1054 -            goto again;
 80.1055 +            if ( !(x & PGT_partial) )
 80.1056 +            {
 80.1057 +                /* Someone else is updating validation of this page. Wait... */
 80.1058 +                while ( (y = page->u.inuse.type_info) == x )
 80.1059 +                {
 80.1060 +                    if ( preemptible && hypercall_preempt_check() )
 80.1061 +                        return -EINTR;
 80.1062 +                    cpu_relax();
 80.1063 +                }
 80.1064 +                continue;
 80.1065 +            }
 80.1066 +            /* Type ref count was left at 1 when PGT_partial got set. */
 80.1067 +            ASSERT((x & PGT_count_mask) == 1);
 80.1068 +            nx = x & ~PGT_partial;
 80.1069          }
 80.1070 +
 80.1071 +        if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
 80.1072 +            break;
 80.1073 +
 80.1074 +        if ( preemptible && hypercall_preempt_check() )
 80.1075 +            return -EINTR;
 80.1076      }
 80.1077 -    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
 80.1078  
 80.1079      if ( unlikely((x & PGT_type_mask) != type) )
 80.1080      {
 80.1081 @@ -2032,25 +2210,42 @@ int get_page_type(struct page_info *page
 80.1082  
 80.1083      if ( unlikely(!(nx & PGT_validated)) )
 80.1084      {
 80.1085 -        /* Try to validate page type; drop the new reference on failure. */
 80.1086 -        if ( unlikely(!alloc_page_type(page, type)) )
 80.1087 +        if ( !(x & PGT_partial) )
 80.1088          {
 80.1089 -            MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
 80.1090 -                    PRtype_info ": caf=%08x taf=%" PRtype_info,
 80.1091 -                    page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
 80.1092 -                    type, page->count_info, page->u.inuse.type_info);
 80.1093 -            /* Noone else can get a reference. We hold the only ref. */
 80.1094 -            page->u.inuse.type_info = 0;
 80.1095 -            return 0;
 80.1096 +            page->nr_validated_ptes = 0;
 80.1097 +            page->partial_pte = 0;
 80.1098          }
 80.1099 -
 80.1100 -        /* Noone else is updating simultaneously. */
 80.1101 -        __set_bit(_PGT_validated, &page->u.inuse.type_info);
 80.1102 +        return alloc_page_type(page, type, preemptible);
 80.1103      }
 80.1104  
 80.1105 -    return 1;
 80.1106 +    return 0;
 80.1107 +}
 80.1108 +
 80.1109 +void put_page_type(struct page_info *page)
 80.1110 +{
 80.1111 +    int rc = __put_page_type(page, 0);
 80.1112 +    ASSERT(rc == 0);
 80.1113 +    (void)rc;
 80.1114  }
 80.1115  
 80.1116 +int get_page_type(struct page_info *page, unsigned long type)
 80.1117 +{
 80.1118 +    int rc = __get_page_type(page, type, 0);
 80.1119 +    if ( likely(rc == 0) )
 80.1120 +        return 1;
 80.1121 +    ASSERT(rc == -EINVAL);
 80.1122 +    return 0;
 80.1123 +}
 80.1124 +
 80.1125 +int put_page_type_preemptible(struct page_info *page)
 80.1126 +{
 80.1127 +    return __put_page_type(page, 1);
 80.1128 +}
 80.1129 +
 80.1130 +int get_page_type_preemptible(struct page_info *page, unsigned long type)
 80.1131 +{
 80.1132 +    return __get_page_type(page, type, 1);
 80.1133 +}
 80.1134  
 80.1135  void cleanup_page_cacheattr(struct page_info *page)
 80.1136  {
 80.1137 @@ -2087,7 +2282,7 @@ int new_guest_cr3(unsigned long mfn)
 80.1138                      l4e_from_pfn(
 80.1139                          mfn,
 80.1140                          (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
 80.1141 -                    pagetable_get_pfn(v->arch.guest_table), 0);
 80.1142 +                    pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0;
 80.1143          if ( unlikely(!okay) )
 80.1144          {
 80.1145              MEM_LOG("Error while installing new compat baseptr %lx", mfn);
 80.1146 @@ -2102,7 +2297,7 @@ int new_guest_cr3(unsigned long mfn)
 80.1147  #endif
 80.1148      okay = paging_mode_refcounts(d)
 80.1149          ? get_page_from_pagenr(mfn, d)
 80.1150 -        : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
 80.1151 +        : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0);
 80.1152      if ( unlikely(!okay) )
 80.1153      {
 80.1154          MEM_LOG("Error while installing new baseptr %lx", mfn);
 80.1155 @@ -2276,9 +2471,7 @@ int do_mmuext_op(
 80.1156      {
 80.1157          if ( hypercall_preempt_check() )
 80.1158          {
 80.1159 -            rc = hypercall_create_continuation(
 80.1160 -                __HYPERVISOR_mmuext_op, "hihi",
 80.1161 -                uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
 80.1162 +            rc = -EAGAIN;
 80.1163              break;
 80.1164          }
 80.1165  
 80.1166 @@ -2325,10 +2518,14 @@ int do_mmuext_op(
 80.1167              if ( paging_mode_refcounts(FOREIGNDOM) )
 80.1168                  break;
 80.1169  
 80.1170 -            okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
 80.1171 +            rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1);
 80.1172 +            okay = !rc;
 80.1173              if ( unlikely(!okay) )
 80.1174              {
 80.1175 -                MEM_LOG("Error while pinning mfn %lx", mfn);
 80.1176 +                if ( rc == -EINTR )
 80.1177 +                    rc = -EAGAIN;
 80.1178 +                else if ( rc != -EAGAIN )
 80.1179 +                    MEM_LOG("Error while pinning mfn %lx", mfn);
 80.1180                  break;
 80.1181              }
 80.1182  
 80.1183 @@ -2373,8 +2570,11 @@ int do_mmuext_op(
 80.1184              {
 80.1185                  put_page_and_type(page);
 80.1186                  put_page(page);
 80.1187 -                /* A page is dirtied when its pin status is cleared. */
 80.1188 -                paging_mark_dirty(d, mfn);
 80.1189 +                if ( !rc )
 80.1190 +                {
 80.1191 +                    /* A page is dirtied when its pin status is cleared. */
 80.1192 +                    paging_mark_dirty(d, mfn);
 80.1193 +                }
 80.1194              }
 80.1195              else
 80.1196              {
 80.1197 @@ -2398,8 +2598,8 @@ int do_mmuext_op(
 80.1198                  if ( paging_mode_refcounts(d) )
 80.1199                      okay = get_page_from_pagenr(mfn, d);
 80.1200                  else
 80.1201 -                    okay = get_page_and_type_from_pagenr(
 80.1202 -                        mfn, PGT_root_page_table, d);
 80.1203 +                    okay = !get_page_and_type_from_pagenr(
 80.1204 +                        mfn, PGT_root_page_table, d, 0);
 80.1205                  if ( unlikely(!okay) )
 80.1206                  {
 80.1207                      MEM_LOG("Error while installing new mfn %lx", mfn);
 80.1208 @@ -2517,6 +2717,11 @@ int do_mmuext_op(
 80.1209          guest_handle_add_offset(uops, 1);
 80.1210      }
 80.1211  
 80.1212 +    if ( rc == -EAGAIN )
 80.1213 +        rc = hypercall_create_continuation(
 80.1214 +            __HYPERVISOR_mmuext_op, "hihi",
 80.1215 +            uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
 80.1216 +
 80.1217      process_deferred_ops();
 80.1218  
 80.1219      perfc_add(num_mmuext_ops, i);
 80.1220 @@ -2576,9 +2781,7 @@ int do_mmu_update(
 80.1221      {
 80.1222          if ( hypercall_preempt_check() )
 80.1223          {
 80.1224 -            rc = hypercall_create_continuation(
 80.1225 -                __HYPERVISOR_mmu_update, "hihi",
 80.1226 -                ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
 80.1227 +            rc = -EAGAIN;
 80.1228              break;
 80.1229          }
 80.1230  
 80.1231 @@ -2601,7 +2804,7 @@ int do_mmu_update(
 80.1232               */
 80.1233          case MMU_NORMAL_PT_UPDATE:
 80.1234          case MMU_PT_UPDATE_PRESERVE_AD:
 80.1235 -            rc = xsm_mmu_normal_update(d, req.val);
 80.1236 +            rc = xsm_mmu_normal_update(d, FOREIGNDOM, req.val);
 80.1237              if ( rc )
 80.1238                  break;
 80.1239  
 80.1240 @@ -2653,27 +2856,29 @@ int do_mmu_update(
 80.1241                                          cmd == MMU_PT_UPDATE_PRESERVE_AD);
 80.1242                  }
 80.1243                  break;
 80.1244 -#if CONFIG_PAGING_LEVELS >= 3
 80.1245                  case PGT_l3_page_table:
 80.1246                  {
 80.1247                      l3_pgentry_t l3e = l3e_from_intpte(req.val);
 80.1248 -                    okay = mod_l3_entry(va, l3e, mfn,
 80.1249 -                                        cmd == MMU_PT_UPDATE_PRESERVE_AD);
 80.1250 +                    rc = mod_l3_entry(va, l3e, mfn,
 80.1251 +                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
 80.1252 +                    okay = !rc;
 80.1253                  }
 80.1254                  break;
 80.1255 -#endif
 80.1256  #if CONFIG_PAGING_LEVELS >= 4
 80.1257                  case PGT_l4_page_table:
 80.1258                  {
 80.1259                      l4_pgentry_t l4e = l4e_from_intpte(req.val);
 80.1260 -                    okay = mod_l4_entry(va, l4e, mfn,
 80.1261 -                                        cmd == MMU_PT_UPDATE_PRESERVE_AD);
 80.1262 +                    rc = mod_l4_entry(va, l4e, mfn,
 80.1263 +                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
 80.1264 +                    okay = !rc;
 80.1265                  }
 80.1266                  break;
 80.1267  #endif
 80.1268                  }
 80.1269  
 80.1270                  put_page_type(page);
 80.1271 +                if ( rc == -EINTR )
 80.1272 +                    rc = -EAGAIN;
 80.1273              }
 80.1274              break;
 80.1275  
 80.1276 @@ -2742,6 +2947,11 @@ int do_mmu_update(
 80.1277          guest_handle_add_offset(ureqs, 1);
 80.1278      }
 80.1279  
 80.1280 +    if ( rc == -EAGAIN )
 80.1281 +        rc = hypercall_create_continuation(
 80.1282 +            __HYPERVISOR_mmu_update, "hihi",
 80.1283 +            ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
 80.1284 +
 80.1285      process_deferred_ops();
 80.1286  
 80.1287      domain_mmap_cache_destroy(&mapcache);
 80.1288 @@ -3111,7 +3321,7 @@ int do_update_va_mapping(unsigned long v
 80.1289      if ( unlikely(!access_ok(va, 1) && !paging_mode_external(d)) )
 80.1290          return -EINVAL;
 80.1291  
 80.1292 -    rc = xsm_update_va_mapping(d, val);
 80.1293 +    rc = xsm_update_va_mapping(d, FOREIGNDOM, val);
 80.1294      if ( rc )
 80.1295          return rc;
 80.1296  
 80.1297 @@ -3695,9 +3905,8 @@ static int ptwr_emulated_update(
 80.1298      nl1e = l1e_from_intpte(val);
 80.1299      if ( unlikely(!get_page_from_l1e(nl1e, d)) )
 80.1300      {
 80.1301 -        if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) &&
 80.1302 -             (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg &&
 80.1303 -             (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
 80.1304 +        if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
 80.1305 +             !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
 80.1306          {
 80.1307              /*
 80.1308               * If this is an upper-half write to a PAE PTE then we assume that
    81.1 --- a/xen/arch/x86/mm/hap/hap.c	Fri Sep 12 14:32:45 2008 +0900
    81.2 +++ b/xen/arch/x86/mm/hap/hap.c	Fri Sep 12 14:47:40 2008 +0900
    81.3 @@ -37,6 +37,7 @@
    81.4  #include <asm/shared.h>
    81.5  #include <asm/hap.h>
    81.6  #include <asm/paging.h>
    81.7 +#include <asm/p2m.h>
    81.8  #include <asm/domain.h>
    81.9  #include <xen/numa.h>
   81.10  
    82.1 --- a/xen/arch/x86/mm/shadow/common.c	Fri Sep 12 14:32:45 2008 +0900
    82.2 +++ b/xen/arch/x86/mm/shadow/common.c	Fri Sep 12 14:47:40 2008 +0900
    82.3 @@ -39,6 +39,7 @@
    82.4  #include <xen/numa.h>
    82.5  #include "private.h"
    82.6  
    82.7 +DEFINE_PER_CPU(uint32_t,trace_shadow_path_flags);
    82.8  
    82.9  /* Set up the shadow-specific parts of a domain struct at start of day.
   82.10   * Called for every domain from arch_domain_create() */
   82.11 @@ -630,6 +631,8 @@ void oos_fixup_add(struct vcpu *v, mfn_t
   82.12  
   82.13              if ( mfn_x(oos_fixup[idx].smfn[next]) != INVALID_MFN )
   82.14              {
   82.15 +                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_OOS_FIXUP_EVICT);
   82.16 +
   82.17                  /* Reuse this slot and remove current writable mapping. */
   82.18                  sh_remove_write_access_from_sl1p(v, gmfn, 
   82.19                                                   oos_fixup[idx].smfn[next],
   82.20 @@ -645,6 +648,8 @@ void oos_fixup_add(struct vcpu *v, mfn_t
   82.21              oos_fixup[idx].smfn[next] = smfn;
   82.22              oos_fixup[idx].off[next] = off;
   82.23              oos_fixup[idx].next = (next + 1) % SHADOW_OOS_FIXUPS;
   82.24 +
   82.25 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_OOS_FIXUP_ADD);
   82.26              return;
   82.27          }
   82.28      }
   82.29 @@ -687,6 +692,16 @@ static int oos_remove_write_access(struc
   82.30  }
   82.31  
   82.32  
   82.33 +static inline void trace_resync(int event, mfn_t gmfn)
   82.34 +{
   82.35 +    if ( tb_init_done )
   82.36 +    {
   82.37 +        /* Convert gmfn to gfn */
   82.38 +        unsigned long gfn = mfn_to_gfn(current->domain, gmfn);
   82.39 +        __trace_var(event, 0/*!tsc*/, sizeof(gfn), (unsigned char*)&gfn);
   82.40 +    }
   82.41 +}
   82.42 +
   82.43  /* Pull all the entries on an out-of-sync page back into sync. */
   82.44  static void _sh_resync(struct vcpu *v, mfn_t gmfn,
   82.45                         struct oos_fixup *fixup, mfn_t snp)
   82.46 @@ -700,8 +715,8 @@ static void _sh_resync(struct vcpu *v, m
   82.47               & ~SHF_L1_ANY));
   82.48      ASSERT(!sh_page_has_multiple_shadows(mfn_to_page(gmfn)));
   82.49  
   82.50 -    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, va=%lx\n",
   82.51 -                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va);
   82.52 +    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
   82.53 +                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
   82.54  
   82.55      /* Need to pull write access so the page *stays* in sync. */
   82.56      if ( oos_remove_write_access(v, gmfn, fixup) )
   82.57 @@ -719,6 +734,7 @@ static void _sh_resync(struct vcpu *v, m
   82.58      /* Now we know all the entries are synced, and will stay that way */
   82.59      pg->shadow_flags &= ~SHF_out_of_sync;
   82.60      perfc_incr(shadow_resync);
   82.61 +    trace_resync(TRC_SHADOW_RESYNC_FULL, gmfn);
   82.62  }
   82.63  
   82.64  
   82.65 @@ -930,6 +946,7 @@ void sh_resync_all(struct vcpu *v, int s
   82.66                  /* Update the shadows and leave the page OOS. */
   82.67                  if ( sh_skip_sync(v, oos[idx]) )
   82.68                      continue;
   82.69 +                trace_resync(TRC_SHADOW_RESYNC_ONLY, oos[idx]);
   82.70                  _sh_resync_l1(other, oos[idx], oos_snapshot[idx]);
   82.71              }
   82.72              else
   82.73 @@ -945,15 +962,16 @@ void sh_resync_all(struct vcpu *v, int s
   82.74      }
   82.75  }
   82.76  
   82.77 -/* Allow a shadowed page to go out of sync */
   82.78 +/* Allow a shadowed page to go out of sync. Unsyncs are traced in
   82.79 + * multi.c:sh_page_fault() */
   82.80  int sh_unsync(struct vcpu *v, mfn_t gmfn)
   82.81  {
   82.82      struct page_info *pg;
   82.83      
   82.84      ASSERT(shadow_locked_by_me(v->domain));
   82.85  
   82.86 -    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx va %lx\n",
   82.87 -                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va);
   82.88 +    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
   82.89 +                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
   82.90  
   82.91      pg = mfn_to_page(gmfn);
   82.92   
   82.93 @@ -970,6 +988,7 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn
   82.94      pg->shadow_flags |= SHF_out_of_sync|SHF_oos_may_write;
   82.95      oos_hash_add(v, gmfn);
   82.96      perfc_incr(shadow_unsync);
   82.97 +    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_UNSYNC);
   82.98      return 1;
   82.99  }
  82.100  
  82.101 @@ -1005,6 +1024,7 @@ void shadow_promote(struct vcpu *v, mfn_
  82.102  
  82.103      ASSERT(!test_bit(type, &page->shadow_flags));
  82.104      set_bit(type, &page->shadow_flags);
  82.105 +    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PROMOTE);
  82.106  }
  82.107  
  82.108  void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type)
  82.109 @@ -1027,6 +1047,8 @@ void shadow_demote(struct vcpu *v, mfn_t
  82.110  #endif 
  82.111          clear_bit(_PGC_page_table, &page->count_info);
  82.112      }
  82.113 +
  82.114 +    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_DEMOTE);
  82.115  }
  82.116  
  82.117  /**************************************************************************/
  82.118 @@ -1094,6 +1116,7 @@ sh_validate_guest_entry(struct vcpu *v, 
  82.119      ASSERT((page->shadow_flags 
  82.120              & (SHF_L4_64|SHF_L3_64|SHF_L2H_64|SHF_L2_64|SHF_L1_64)) == 0);
  82.121  #endif
  82.122 +    this_cpu(trace_shadow_path_flags) |= (result<<(TRCE_SFLAG_SET_CHANGED)); 
  82.123  
  82.124      return result;
  82.125  }
  82.126 @@ -1295,6 +1318,18 @@ static void shadow_unhook_mappings(struc
  82.127      }
  82.128  }
  82.129  
  82.130 +static inline void trace_shadow_prealloc_unpin(struct domain *d, mfn_t smfn)
  82.131 +{
  82.132 +    if ( tb_init_done )
  82.133 +    {
  82.134 +        /* Convert smfn to gfn */
  82.135 +        unsigned long gfn;
  82.136 +        ASSERT(mfn_valid(smfn));
  82.137 +        gfn = mfn_to_gfn(d, _mfn(mfn_to_shadow_page(smfn)->backpointer));
  82.138 +        __trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/,
  82.139 +                    sizeof(gfn), (unsigned char*)&gfn);
  82.140 +    }
  82.141 +}
  82.142  
  82.143  /* Make sure there are at least count order-sized pages
  82.144   * available in the shadow page pool. */
  82.145 @@ -1327,6 +1362,7 @@ static void _shadow_prealloc(
  82.146          smfn = shadow_page_to_mfn(sp);
  82.147  
  82.148          /* Unpin this top-level shadow */
  82.149 +        trace_shadow_prealloc_unpin(d, smfn);
  82.150          sh_unpin(v, smfn);
  82.151  
  82.152          /* See if that freed up enough space */
  82.153 @@ -1343,6 +1379,7 @@ static void _shadow_prealloc(
  82.154          {
  82.155              if ( !pagetable_is_null(v2->arch.shadow_table[i]) )
  82.156              {
  82.157 +                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PREALLOC_UNHOOK);
  82.158                  shadow_unhook_mappings(v, 
  82.159                                 pagetable_get_mfn(v2->arch.shadow_table[i]));
  82.160  
  82.161 @@ -2200,6 +2237,16 @@ void sh_destroy_shadow(struct vcpu *v, m
  82.162      }    
  82.163  }
  82.164  
  82.165 +static inline void trace_shadow_wrmap_bf(mfn_t gmfn)
  82.166 +{
  82.167 +    if ( tb_init_done )
  82.168 +    {
  82.169 +        /* Convert gmfn to gfn */
  82.170 +        unsigned long gfn = mfn_to_gfn(current->domain, gmfn);
  82.171 +        __trace_var(TRC_SHADOW_WRMAP_BF, 0/*!tsc*/, sizeof(gfn), (unsigned char*)&gfn);
  82.172 +    }
  82.173 +}
  82.174 +
  82.175  /**************************************************************************/
  82.176  /* Remove all writeable mappings of a guest frame from the shadow tables 
  82.177   * Returns non-zero if we need to flush TLBs. 
  82.178 @@ -2265,6 +2312,8 @@ int sh_remove_write_access(struct vcpu *
  82.179           || (pg->u.inuse.type_info & PGT_count_mask) == 0 )
  82.180          return 0;
  82.181  
  82.182 +    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP);
  82.183 +
  82.184      perfc_incr(shadow_writeable);
  82.185  
  82.186      /* If this isn't a "normal" writeable page, the domain is trying to 
  82.187 @@ -2285,11 +2334,14 @@ int sh_remove_write_access(struct vcpu *
  82.188           * and that mapping is likely to be in the current pagetable,
  82.189           * in the guest's linear map (on non-HIGHPTE linux and windows)*/
  82.190  
  82.191 -#define GUESS(_a, _h) do {                                                \
  82.192 +#define GUESS(_a, _h) do {                                              \
  82.193              if ( v->arch.paging.mode->shadow.guess_wrmap(v, (_a), gmfn) ) \
  82.194 -                perfc_incr(shadow_writeable_h_ ## _h);                   \
  82.195 -            if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )          \
  82.196 -                return 1;                                                 \
  82.197 +                perfc_incr(shadow_writeable_h_ ## _h);                  \
  82.198 +            if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )        \
  82.199 +            {                                                           \
  82.200 +                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND);   \
  82.201 +                return 1;                                               \
  82.202 +            }                                                           \
  82.203          } while (0)
  82.204  
  82.205          if ( level == 0 && fault_addr )
  82.206 @@ -2377,6 +2429,7 @@ int sh_remove_write_access(struct vcpu *
  82.207  #endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */
  82.208      
  82.209      /* Brute-force search of all the shadows, by walking the hash */
  82.210 +    trace_shadow_wrmap_bf(gmfn);
  82.211      if ( level == 0 )
  82.212          perfc_incr(shadow_writeable_bf_1);
  82.213      else
    83.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Sep 12 14:32:45 2008 +0900
    83.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Sep 12 14:47:40 2008 +0900
    83.3 @@ -225,6 +225,7 @@ static uint32_t mandatory_flags(struct v
    83.4  static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty)
    83.5  {
    83.6      guest_intpte_t old, new;
    83.7 +    int ret = 0;
    83.8  
    83.9      old = *(guest_intpte_t *)walk_p;
   83.10      new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0);
   83.11 @@ -234,10 +235,16 @@ static uint32_t set_ad_bits(void *guest_
   83.12           * into the guest table as well.  If the guest table has changed
   83.13           * under out feet then leave it alone. */
   83.14          *(guest_intpte_t *)walk_p = new;
   83.15 -        if ( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old ) 
   83.16 -            return 1;
   83.17 +        if( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old ) 
   83.18 +            ret = 1;
   83.19 +
   83.20 +        /* FIXME -- this code is longer than necessary */
   83.21 +        if(set_dirty)
   83.22 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_AD);
   83.23 +        else
   83.24 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_A);
   83.25      }
   83.26 -    return 0;
   83.27 +    return ret;
   83.28  }
   83.29  
   83.30  /* This validation is called with lock held, and after write permission
   83.31 @@ -1432,6 +1439,7 @@ static int shadow_set_l1e(struct vcpu *v
   83.32      {
   83.33          /* About to install a new reference */        
   83.34          if ( shadow_mode_refcounts(d) ) {
   83.35 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF);
   83.36              if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 ) 
   83.37              {
   83.38                  /* Doesn't look like a pagetable. */
   83.39 @@ -1461,6 +1469,7 @@ static int shadow_set_l1e(struct vcpu *v
   83.40          {
   83.41              shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
   83.42              shadow_put_page_from_l1e(old_sl1e, d);
   83.43 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
   83.44          } 
   83.45      }
   83.46      return flags;
   83.47 @@ -2896,6 +2905,7 @@ static inline void check_for_early_unsha
   83.48      {
   83.49          perfc_incr(shadow_early_unshadow);
   83.50          sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
   83.51 +        TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EARLY_UNSHADOW);
   83.52      }
   83.53      v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(gmfn);
   83.54  #endif
   83.55 @@ -3012,6 +3022,132 @@ static void sh_prefetch(struct vcpu *v, 
   83.56  
   83.57  #endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
   83.58  
   83.59 +#if GUEST_PAGING_LEVELS == 4
   83.60 +typedef u64 guest_va_t;
   83.61 +typedef u64 guest_pa_t;
   83.62 +#elif GUEST_PAGING_LEVELS == 3
   83.63 +typedef u32 guest_va_t;
   83.64 +typedef u64 guest_pa_t;
   83.65 +#else
   83.66 +typedef u32 guest_va_t;
   83.67 +typedef u32 guest_pa_t;
   83.68 +#endif
   83.69 +
   83.70 +static inline void trace_shadow_gen(u32 event, guest_va_t va)
   83.71 +{
   83.72 +    if ( tb_init_done )
   83.73 +    {
   83.74 +        event |= (GUEST_PAGING_LEVELS-2)<<8;
   83.75 +        __trace_var(event, 0/*!tsc*/, sizeof(va), (unsigned char*)&va);
   83.76 +    }
   83.77 +}
   83.78 +
   83.79 +static inline void trace_shadow_fixup(guest_l1e_t gl1e,
   83.80 +                                      guest_va_t va)
   83.81 +{
   83.82 +    if ( tb_init_done )
   83.83 +    {
   83.84 +        struct {
   83.85 +            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
   83.86 +               so put it first for alignment sake. */
   83.87 +            guest_l1e_t gl1e;
   83.88 +            guest_va_t va;
   83.89 +            u32 flags;
   83.90 +        } __attribute__((packed)) d;
   83.91 +        u32 event;
   83.92 +
   83.93 +        event = TRC_SHADOW_FIXUP | ((GUEST_PAGING_LEVELS-2)<<8);
   83.94 +
   83.95 +        d.gl1e = gl1e;
   83.96 +        d.va = va;
   83.97 +        d.flags = this_cpu(trace_shadow_path_flags);
   83.98 +
   83.99 +        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
  83.100 +    }
  83.101 +}
  83.102 +                                          
  83.103 +static inline void trace_not_shadow_fault(guest_l1e_t gl1e,
  83.104 +                                          guest_va_t va)
  83.105 +{
  83.106 +    if ( tb_init_done )
  83.107 +    {
  83.108 +        struct {
  83.109 +            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
  83.110 +               so put it first for alignment sake. */
  83.111 +            guest_l1e_t gl1e;
  83.112 +            guest_va_t va;
  83.113 +            u32 flags;
  83.114 +        } __attribute__((packed)) d;
  83.115 +        u32 event;
  83.116 +
  83.117 +        event = TRC_SHADOW_NOT_SHADOW | ((GUEST_PAGING_LEVELS-2)<<8);
  83.118 +
  83.119 +        d.gl1e = gl1e;
  83.120 +        d.va = va;
  83.121 +        d.flags = this_cpu(trace_shadow_path_flags);
  83.122 +
  83.123 +        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
  83.124 +    }
  83.125 +}
  83.126 +                                          
  83.127 +static inline void trace_shadow_emulate_other(u32 event,
  83.128 +                                                 guest_va_t va,
  83.129 +                                                 gfn_t gfn)
  83.130 +{
  83.131 +    if ( tb_init_done )
  83.132 +    {
  83.133 +        struct {
  83.134 +            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
  83.135 +               so put it first for alignment sake. */
  83.136 +#if GUEST_PAGING_LEVELS == 2
  83.137 +            u32 gfn;
  83.138 +#else
  83.139 +            u64 gfn;
  83.140 +#endif
  83.141 +            guest_va_t va;
  83.142 +        } __attribute__((packed)) d;
  83.143 +
  83.144 +        event |= ((GUEST_PAGING_LEVELS-2)<<8);
  83.145 +
  83.146 +        d.gfn=gfn_x(gfn);
  83.147 +        d.va = va;
  83.148 +
  83.149 +        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
  83.150 +    }
  83.151 +}
  83.152 +
  83.153 +#if GUEST_PAGING_LEVELS == 3
  83.154 +static DEFINE_PER_CPU(guest_va_t,trace_emulate_initial_va);
  83.155 +static DEFINE_PER_CPU(int,trace_extra_emulation_count);
  83.156 +#endif
  83.157 +static DEFINE_PER_CPU(guest_pa_t,trace_emulate_write_val);
  83.158 +
  83.159 +static inline void trace_shadow_emulate(guest_l1e_t gl1e, unsigned long va)
  83.160 +{
  83.161 +    if ( tb_init_done )
  83.162 +    {
  83.163 +        struct {
  83.164 +            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
  83.165 +               so put it first for alignment sake. */
  83.166 +            guest_l1e_t gl1e, write_val;
  83.167 +            guest_va_t va;
  83.168 +            unsigned flags:29, emulation_count:3;
  83.169 +        } __attribute__((packed)) d;
  83.170 +        u32 event;
  83.171 +
  83.172 +        event = TRC_SHADOW_EMULATE | ((GUEST_PAGING_LEVELS-2)<<8);
  83.173 +
  83.174 +        d.gl1e = gl1e;
  83.175 +        d.write_val.l1 = this_cpu(trace_emulate_write_val);
  83.176 +        d.va = va;
  83.177 +#if GUEST_PAGING_LEVELS == 3
  83.178 +        d.emulation_count = this_cpu(trace_extra_emulation_count);
  83.179 +#endif
  83.180 +        d.flags = this_cpu(trace_shadow_path_flags);
  83.181 +
  83.182 +        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
  83.183 +    }
  83.184 +}
  83.185  
  83.186  /**************************************************************************/
  83.187  /* Entry points into the shadow code */
  83.188 @@ -3027,8 +3163,8 @@ static int sh_page_fault(struct vcpu *v,
  83.189  {
  83.190      struct domain *d = v->domain;
  83.191      walk_t gw;
  83.192 -    gfn_t gfn;
  83.193 -    mfn_t gmfn, sl1mfn=_mfn(0);
  83.194 +    gfn_t gfn = _gfn(0);
  83.195 +    mfn_t gmfn, sl1mfn = _mfn(0);
  83.196      shadow_l1e_t sl1e, *ptr_sl1e;
  83.197      paddr_t gpa;
  83.198      struct sh_emulate_ctxt emul_ctxt;
  83.199 @@ -3043,7 +3179,7 @@ static int sh_page_fault(struct vcpu *v,
  83.200  
  83.201      SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u, rip=%lx\n",
  83.202                    v->domain->domain_id, v->vcpu_id, va, regs->error_code,
  83.203 -                  regs->rip);
  83.204 +                  regs->eip);
  83.205  
  83.206      perfc_incr(shadow_fault);
  83.207  
  83.208 @@ -3132,6 +3268,7 @@ static int sh_page_fault(struct vcpu *v,
  83.209                  reset_early_unshadow(v);
  83.210                  perfc_incr(shadow_fault_fast_gnp);
  83.211                  SHADOW_PRINTK("fast path not-present\n");
  83.212 +                trace_shadow_gen(TRC_SHADOW_FAST_PROPAGATE, va);
  83.213                  return 0;
  83.214              }
  83.215              else
  83.216 @@ -3145,6 +3282,7 @@ static int sh_page_fault(struct vcpu *v,
  83.217              perfc_incr(shadow_fault_fast_mmio);
  83.218              SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa);
  83.219              reset_early_unshadow(v);
  83.220 +            trace_shadow_gen(TRC_SHADOW_FAST_MMIO, va);
  83.221              return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT)
  83.222                      ? EXCRET_fault_fixed : 0);
  83.223          }
  83.224 @@ -3155,6 +3293,7 @@ static int sh_page_fault(struct vcpu *v,
  83.225               * Retry and let the hardware give us the right fault next time. */
  83.226              perfc_incr(shadow_fault_fast_fail);
  83.227              SHADOW_PRINTK("fast path false alarm!\n");            
  83.228 +            trace_shadow_gen(TRC_SHADOW_FALSE_FAST_PATH, va);
  83.229              return EXCRET_fault_fixed;
  83.230          }
  83.231      }
  83.232 @@ -3190,7 +3329,7 @@ static int sh_page_fault(struct vcpu *v,
  83.233          perfc_incr(shadow_fault_bail_real_fault);
  83.234          SHADOW_PRINTK("not a shadow fault\n");
  83.235          reset_early_unshadow(v);
  83.236 -        return 0;
  83.237 +        goto propagate;
  83.238      }
  83.239  
  83.240      /* It's possible that the guest has put pagetables in memory that it has 
  83.241 @@ -3200,7 +3339,7 @@ static int sh_page_fault(struct vcpu *v,
  83.242      if ( unlikely(d->is_shutting_down) )
  83.243      {
  83.244          SHADOW_PRINTK("guest is shutting down\n");
  83.245 -        return 0;
  83.246 +        goto propagate;
  83.247      }
  83.248  
  83.249      /* What kind of access are we dealing with? */
  83.250 @@ -3218,7 +3357,7 @@ static int sh_page_fault(struct vcpu *v,
  83.251          SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 
  83.252                        gfn_x(gfn), mfn_x(gmfn));
  83.253          reset_early_unshadow(v);
  83.254 -        return 0;
  83.255 +        goto propagate;
  83.256      }
  83.257  
  83.258  #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
  83.259 @@ -3229,6 +3368,8 @@ static int sh_page_fault(struct vcpu *v,
  83.260  
  83.261      shadow_lock(d);
  83.262  
  83.263 +    TRACE_CLEAR_PATH_FLAGS;
  83.264 +    
  83.265      rc = gw_remove_write_accesses(v, va, &gw);
  83.266  
  83.267      /* First bit set: Removed write access to a page. */
  83.268 @@ -3281,6 +3422,7 @@ static int sh_page_fault(struct vcpu *v,
  83.269           * Get out of the fault handler immediately. */
  83.270          ASSERT(d->is_shutting_down);
  83.271          shadow_unlock(d);
  83.272 +        trace_shadow_gen(TRC_SHADOW_DOMF_DYING, va);
  83.273          return 0;
  83.274      }
  83.275  
  83.276 @@ -3383,6 +3525,7 @@ static int sh_page_fault(struct vcpu *v,
  83.277      d->arch.paging.log_dirty.fault_count++;
  83.278      reset_early_unshadow(v);
  83.279  
  83.280 +    trace_shadow_fixup(gw.l1e, va);
  83.281   done:
  83.282      sh_audit_gw(v, &gw);
  83.283      SHADOW_PRINTK("fixed\n");
  83.284 @@ -3405,6 +3548,8 @@ static int sh_page_fault(struct vcpu *v,
  83.285                        mfn_x(gmfn));
  83.286          perfc_incr(shadow_fault_emulate_failed);
  83.287          sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
  83.288 +        trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_USER,
  83.289 +                                      va, gfn);
  83.290          goto done;
  83.291      }
  83.292  
  83.293 @@ -3421,6 +3566,8 @@ static int sh_page_fault(struct vcpu *v,
  83.294      shadow_audit_tables(v);
  83.295      shadow_unlock(d);
  83.296  
  83.297 +    this_cpu(trace_emulate_write_val) = 0;
  83.298 +
  83.299  #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
  83.300   early_emulation:
  83.301  #endif
  83.302 @@ -3446,6 +3593,8 @@ static int sh_page_fault(struct vcpu *v,
  83.303                       "injection: cr2=%#lx, mfn=%#lx\n", 
  83.304                       va, mfn_x(gmfn));
  83.305              sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
  83.306 +            trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ,
  83.307 +                                       va, gfn);
  83.308              return EXCRET_fault_fixed;
  83.309          }
  83.310      }
  83.311 @@ -3478,6 +3627,10 @@ static int sh_page_fault(struct vcpu *v,
  83.312           * to support more operations in the emulator.  More likely, 
  83.313           * though, this is a hint that this page should not be shadowed. */
  83.314          shadow_remove_all_shadows(v, gmfn);
  83.315 +
  83.316 +        trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED,
  83.317 +                                   va, gfn);
  83.318 +        goto emulate_done;
  83.319      }
  83.320  
  83.321  #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
  83.322 @@ -3504,7 +3657,8 @@ static int sh_page_fault(struct vcpu *v,
  83.323  
  83.324  #if GUEST_PAGING_LEVELS == 3 /* PAE guest */
  83.325      if ( r == X86EMUL_OKAY ) {
  83.326 -        int i;
  83.327 +        int i, emulation_count=0;
  83.328 +        this_cpu(trace_emulate_initial_va) = va;
  83.329          /* Emulate up to four extra instructions in the hope of catching 
  83.330           * the "second half" of a 64-bit pagetable write. */
  83.331          for ( i = 0 ; i < 4 ; i++ )
  83.332 @@ -3513,10 +3667,12 @@ static int sh_page_fault(struct vcpu *v,
  83.333              v->arch.paging.last_write_was_pt = 0;
  83.334              r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
  83.335              if ( r == X86EMUL_OKAY )
  83.336 -            {
  83.337 +            { 
  83.338 +                emulation_count++;
  83.339                  if ( v->arch.paging.last_write_was_pt )
  83.340                  {
  83.341                      perfc_incr(shadow_em_ex_pt);
  83.342 +                    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN);
  83.343                      break; /* Don't emulate past the other half of the write */
  83.344                  }
  83.345                  else 
  83.346 @@ -3525,12 +3681,16 @@ static int sh_page_fault(struct vcpu *v,
  83.347              else
  83.348              {
  83.349                  perfc_incr(shadow_em_ex_fail);
  83.350 +                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_LAST_FAILED);
  83.351                  break; /* Don't emulate again if we failed! */
  83.352              }
  83.353          }
  83.354 +        this_cpu(trace_extra_emulation_count)=emulation_count;
  83.355      }
  83.356  #endif /* PAE guest */
  83.357  
  83.358 +    trace_shadow_emulate(gw.l1e, va);
  83.359 + emulate_done:
  83.360      SHADOW_PRINTK("emulated\n");
  83.361      return EXCRET_fault_fixed;
  83.362  
  83.363 @@ -3543,6 +3703,7 @@ static int sh_page_fault(struct vcpu *v,
  83.364      shadow_audit_tables(v);
  83.365      reset_early_unshadow(v);
  83.366      shadow_unlock(d);
  83.367 +    trace_shadow_gen(TRC_SHADOW_MMIO, va);
  83.368      return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT)
  83.369              ? EXCRET_fault_fixed : 0);
  83.370  
  83.371 @@ -3552,6 +3713,10 @@ static int sh_page_fault(struct vcpu *v,
  83.372      shadow_audit_tables(v);
  83.373      reset_early_unshadow(v);
  83.374      shadow_unlock(d);
  83.375 +
  83.376 +propagate:
  83.377 +    trace_not_shadow_fault(gw.l1e, va);
  83.378 +
  83.379      return 0;
  83.380  }
  83.381  
  83.382 @@ -3990,7 +4155,7 @@ sh_detach_old_tables(struct vcpu *v)
  83.383              sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
  83.384          v->arch.paging.shadow.guest_vtable = NULL;
  83.385      }
  83.386 -#endif
  83.387 +#endif // !NDEBUG
  83.388  
  83.389  
  83.390      ////
  83.391 @@ -4446,6 +4611,7 @@ static int sh_guess_wrmap(struct vcpu *v
  83.392      sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
  83.393      r = shadow_set_l1e(v, sl1p, sl1e, sl1mfn);
  83.394      ASSERT( !(r & SHADOW_SET_ERROR) );
  83.395 +    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND);
  83.396      return 1;
  83.397  }
  83.398  #endif
  83.399 @@ -4800,7 +4966,7 @@ static void emulate_unmap_dest(struct vc
  83.400  
  83.401  static int
  83.402  sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
  83.403 -                      u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
  83.404 +                     u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
  83.405  {
  83.406      void *addr;
  83.407  
  83.408 @@ -4815,6 +4981,22 @@ sh_x86_emulate_write(struct vcpu *v, uns
  83.409      shadow_lock(v->domain);
  83.410      memcpy(addr, src, bytes);
  83.411  
  83.412 +    if ( tb_init_done )
  83.413 +    {
  83.414 +#if GUEST_PAGING_LEVELS == 3
  83.415 +        if ( vaddr == this_cpu(trace_emulate_initial_va) )
  83.416 +            memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
  83.417 +        else if ( (vaddr & ~(0x7UL)) == this_cpu(trace_emulate_initial_va) )
  83.418 +        {
  83.419 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATE_FULL_PT);
  83.420 +            memcpy(&this_cpu(trace_emulate_write_val),
  83.421 +                   (void *)(((unsigned long) addr) & ~(0x7UL)), GUEST_PTE_SIZE);
  83.422 +        }
  83.423 +#else
  83.424 +        memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
  83.425 +#endif
  83.426 +    }
  83.427 +
  83.428      emulate_unmap_dest(v, addr, bytes, sh_ctxt);
  83.429      shadow_audit_tables(v);
  83.430      shadow_unlock(v->domain);
    84.1 --- a/xen/arch/x86/mm/shadow/private.h	Fri Sep 12 14:32:45 2008 +0900
    84.2 +++ b/xen/arch/x86/mm/shadow/private.h	Fri Sep 12 14:47:40 2008 +0900
    84.3 @@ -92,6 +92,43 @@ extern int shadow_audit_enable;
    84.4  #define SHADOW_DEBUG_LOGDIRTY          0
    84.5  
    84.6  /******************************************************************************
    84.7 + * Tracing
    84.8 + */
    84.9 +DECLARE_PER_CPU(uint32_t,trace_shadow_path_flags);
   84.10 +
   84.11 +#define TRACE_SHADOW_PATH_FLAG(_x)                      \
   84.12 +    do {                                                \
   84.13 +        this_cpu(trace_shadow_path_flags) |= (1<<(_x));      \
   84.14 +    } while(0)
   84.15 +
   84.16 +#define TRACE_CLEAR_PATH_FLAGS                  \
   84.17 +    this_cpu(trace_shadow_path_flags) = 0
   84.18 +
   84.19 +enum {
   84.20 +    TRCE_SFLAG_SET_AD,
   84.21 +    TRCE_SFLAG_SET_A,
   84.22 +    TRCE_SFLAG_SHADOW_L1_GET_REF,
   84.23 +    TRCE_SFLAG_SHADOW_L1_PUT_REF,
   84.24 +    TRCE_SFLAG_L2_PROPAGATE,
   84.25 +    TRCE_SFLAG_SET_CHANGED,
   84.26 +    TRCE_SFLAG_SET_FLUSH,
   84.27 +    TRCE_SFLAG_SET_ERROR,
   84.28 +    TRCE_SFLAG_DEMOTE,
   84.29 +    TRCE_SFLAG_PROMOTE,
   84.30 +    TRCE_SFLAG_WRMAP,
   84.31 +    TRCE_SFLAG_WRMAP_GUESS_FOUND,
   84.32 +    TRCE_SFLAG_WRMAP_BRUTE_FORCE,
   84.33 +    TRCE_SFLAG_EARLY_UNSHADOW,
   84.34 +    TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN,
   84.35 +    TRCE_SFLAG_EMULATION_LAST_FAILED,
   84.36 +    TRCE_SFLAG_EMULATE_FULL_PT,
   84.37 +    TRCE_SFLAG_PREALLOC_UNHOOK,
   84.38 +    TRCE_SFLAG_UNSYNC,
   84.39 +    TRCE_SFLAG_OOS_FIXUP_ADD,
   84.40 +    TRCE_SFLAG_OOS_FIXUP_EVICT,
   84.41 +};
   84.42 +
   84.43 +/******************************************************************************
   84.44   * The shadow lock.
   84.45   *
   84.46   * This lock is per-domain.  It is intended to allow us to make atomic
   84.47 @@ -143,6 +180,12 @@ extern int shadow_audit_enable;
   84.48      } while (0)
   84.49  
   84.50  
   84.51 +/* Size (in bytes) of a guest PTE */
   84.52 +#if GUEST_PAGING_LEVELS >= 3
   84.53 +# define GUEST_PTE_SIZE 8
   84.54 +#else
   84.55 +# define GUEST_PTE_SIZE 4
   84.56 +#endif
   84.57  
   84.58  /******************************************************************************
   84.59   * Auditing routines 
    85.1 --- a/xen/arch/x86/physdev.c	Fri Sep 12 14:32:45 2008 +0900
    85.2 +++ b/xen/arch/x86/physdev.c	Fri Sep 12 14:47:40 2008 +0900
    85.3 @@ -58,9 +58,6 @@ static int get_free_pirq(struct domain *
    85.4      return i;
    85.5  }
    85.6  
    85.7 -/*
    85.8 - * Caller hold the irq_lock
    85.9 - */
   85.10  static int map_domain_pirq(struct domain *d, int pirq, int vector,
   85.11                             struct physdev_map_pirq *map)
   85.12  {
   85.13 @@ -136,13 +133,12 @@ done:
   85.14      return ret;
   85.15  }
   85.16  
   85.17 -/*
   85.18 - * The pirq should has been unbound before this call
   85.19 - */
   85.20 +/* The pirq should have been unbound before this call. */
   85.21  static int unmap_domain_pirq(struct domain *d, int pirq)
   85.22  {
   85.23 -    int ret = 0;
   85.24 -    int vector;
   85.25 +    unsigned long flags;
   85.26 +    irq_desc_t *desc;
   85.27 +    int vector, ret = 0;
   85.28  
   85.29      if ( d == NULL || pirq < 0 || pirq >= NR_PIRQS )
   85.30          return -EINVAL;
   85.31 @@ -159,33 +155,29 @@ static int unmap_domain_pirq(struct doma
   85.32          gdprintk(XENLOG_G_ERR, "domain %X: pirq %x not mapped still\n",
   85.33                   d->domain_id, pirq);
   85.34          ret = -EINVAL;
   85.35 -    }
   85.36 -    else
   85.37 -    {
   85.38 -        unsigned long flags;
   85.39 -        irq_desc_t *desc;
   85.40 -
   85.41 -        desc = &irq_desc[vector];
   85.42 -        spin_lock_irqsave(&desc->lock, flags);
   85.43 -        if ( desc->msi_desc )
   85.44 -            pci_disable_msi(vector);
   85.45 -
   85.46 -        if ( desc->handler == &pci_msi_type )
   85.47 -        {
   85.48 -            /* MSI is not shared, so should be released already */
   85.49 -            BUG_ON(desc->status & IRQ_GUEST);
   85.50 -            irq_desc[vector].handler = &no_irq_type;
   85.51 -        }
   85.52 -        spin_unlock_irqrestore(&desc->lock, flags);
   85.53 -
   85.54 -        d->arch.pirq_vector[pirq] = d->arch.vector_pirq[vector] = 0;
   85.55 +        goto done;
   85.56      }
   85.57  
   85.58 +    desc = &irq_desc[vector];
   85.59 +    spin_lock_irqsave(&desc->lock, flags);
   85.60 +    if ( desc->msi_desc )
   85.61 +        pci_disable_msi(vector);
   85.62 +
   85.63 +    if ( desc->handler == &pci_msi_type )
   85.64 +    {
   85.65 +        /* MSI is not shared, so should be released already */
   85.66 +        BUG_ON(desc->status & IRQ_GUEST);
   85.67 +        irq_desc[vector].handler = &no_irq_type;
   85.68 +    }
   85.69 +    spin_unlock_irqrestore(&desc->lock, flags);
   85.70 +
   85.71 +    d->arch.pirq_vector[pirq] = d->arch.vector_pirq[vector] = 0;
   85.72 +
   85.73      ret = irq_deny_access(d, pirq);
   85.74 -
   85.75      if ( ret )
   85.76          gdprintk(XENLOG_G_ERR, "deny irq %x access failed\n", pirq);
   85.77  
   85.78 + done:
   85.79      return ret;
   85.80  }
   85.81  
   85.82 @@ -195,10 +187,6 @@ static int physdev_map_pirq(struct physd
   85.83      int vector, pirq, ret = 0;
   85.84      unsigned long flags;
   85.85  
   85.86 -    /* if msi_enable is not enabled, map always succeeds */
   85.87 -    if ( !msi_enable )
   85.88 -        return 0;
   85.89 -
   85.90      if ( !IS_PRIV(current->domain) )
   85.91          return -EPERM;
   85.92  
   85.93 @@ -308,29 +296,21 @@ static int physdev_unmap_pirq(struct phy
   85.94      unsigned long flags;
   85.95      int ret;
   85.96  
   85.97 -    if ( !msi_enable )
   85.98 -        return 0;
   85.99 -
  85.100      if ( !IS_PRIV(current->domain) )
  85.101          return -EPERM;
  85.102  
  85.103 -    if ( !unmap )
  85.104 -        return -EINVAL;
  85.105 -
  85.106      if ( unmap->domid == DOMID_SELF )
  85.107          d = rcu_lock_domain(current->domain);
  85.108      else
  85.109          d = rcu_lock_domain_by_id(unmap->domid);
  85.110  
  85.111      if ( d == NULL )
  85.112 -    {
  85.113 -        rcu_unlock_domain(d);
  85.114          return -ESRCH;
  85.115 -    }
  85.116  
  85.117      spin_lock_irqsave(&d->arch.irq_lock, flags);
  85.118      ret = unmap_domain_pirq(d, unmap->pirq);
  85.119      spin_unlock_irqrestore(&d->arch.irq_lock, flags);
  85.120 +
  85.121      rcu_unlock_domain(d);
  85.122  
  85.123      return ret;
  85.124 @@ -452,20 +432,14 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
  85.125  
  85.126          irq = irq_op.irq;
  85.127          ret = -EINVAL;
  85.128 -        if ( ((irq < 0) && (irq != AUTO_ASSIGN)) || (irq >= NR_IRQS) )
  85.129 +        if ( (irq < 0) || (irq >= NR_IRQS) )
  85.130              break;
  85.131  
  85.132          irq_op.vector = assign_irq_vector(irq);
  85.133  
  85.134 -        ret = 0;
  85.135 -
  85.136 -        if ( msi_enable )
  85.137 -        {
  85.138 -            spin_lock_irqsave(&dom0->arch.irq_lock, flags);
  85.139 -            if ( irq != AUTO_ASSIGN )
  85.140 -                ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL);
  85.141 -            spin_unlock_irqrestore(&dom0->arch.irq_lock, flags);
  85.142 -        }
  85.143 +        spin_lock_irqsave(&dom0->arch.irq_lock, flags);
  85.144 +        ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL);
  85.145 +        spin_unlock_irqrestore(&dom0->arch.irq_lock, flags);
  85.146  
  85.147          if ( copy_to_guest(arg, &irq_op, 1) != 0 )
  85.148              ret = -EFAULT;
    86.1 --- a/xen/arch/x86/platform_hypercall.c	Fri Sep 12 14:32:45 2008 +0900
    86.2 +++ b/xen/arch/x86/platform_hypercall.c	Fri Sep 12 14:47:40 2008 +0900
    86.3 @@ -192,6 +192,10 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
    86.4      break;
    86.5  
    86.6      case XENPF_firmware_info:
    86.7 +        ret = xsm_firmware_info();
    86.8 +        if ( ret )
    86.9 +            break;
   86.10 +
   86.11          switch ( op->u.firmware_info.type )
   86.12          {
   86.13          case XEN_FW_DISK_INFO: {
   86.14 @@ -280,10 +284,18 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
   86.15          break;
   86.16  
   86.17      case XENPF_enter_acpi_sleep:
   86.18 +        ret = xsm_acpi_sleep();
   86.19 +        if ( ret )
   86.20 +            break;
   86.21 +
   86.22          ret = acpi_enter_sleep(&op->u.enter_acpi_sleep);
   86.23          break;
   86.24  
   86.25      case XENPF_change_freq:
   86.26 +        ret = xsm_change_freq();
   86.27 +        if ( ret )
   86.28 +            break;
   86.29 +
   86.30          ret = -ENOSYS;
   86.31          if ( cpufreq_controller != FREQCTL_dom0_kernel )
   86.32              break;
   86.33 @@ -306,6 +318,10 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
   86.34          XEN_GUEST_HANDLE(uint8) cpumap_bitmap;
   86.35          XEN_GUEST_HANDLE(uint64) idletimes;
   86.36  
   86.37 +        ret = xsm_getidletime();
   86.38 +        if ( ret )
   86.39 +            break;
   86.40 +
   86.41          ret = -ENOSYS;
   86.42          if ( cpufreq_controller != FREQCTL_dom0_kernel )
   86.43              break;
    87.1 --- a/xen/arch/x86/smpboot.c	Fri Sep 12 14:32:45 2008 +0900
    87.2 +++ b/xen/arch/x86/smpboot.c	Fri Sep 12 14:47:40 2008 +0900
    87.3 @@ -1225,15 +1225,6 @@ int __cpu_disable(void)
    87.4  	if (cpu == 0)
    87.5  		return -EBUSY;
    87.6  
    87.7 -	/*
    87.8 -	 * Only S3 is using this path, and thus idle vcpus are running on all
    87.9 -	 * APs when we are called. To support full cpu hotplug, other 
   87.10 -	 * notification mechanisms should be introduced (e.g., migrate vcpus
   87.11 -	 * off this physical cpu before rendezvous point).
   87.12 -	 */
   87.13 -	if (!is_idle_vcpu(current))
   87.14 -		return -EINVAL;
   87.15 -
   87.16  	local_irq_disable();
   87.17  	clear_local_APIC();
   87.18  	/* Allow any queued timer interrupts to get serviced */
   87.19 @@ -1249,6 +1240,9 @@ int __cpu_disable(void)
   87.20  	fixup_irqs(map);
   87.21  	/* It's now safe to remove this processor from the online map */
   87.22  	cpu_clear(cpu, cpu_online_map);
   87.23 +
   87.24 +	cpu_disable_scheduler();
   87.25 +
   87.26  	return 0;
   87.27  }
   87.28  
   87.29 @@ -1275,28 +1269,6 @@ static int take_cpu_down(void *unused)
   87.30      return __cpu_disable();
   87.31  }
   87.32  
   87.33 -/* 
   87.34 - * XXX: One important thing missed here is to migrate vcpus
   87.35 - * from dead cpu to other online ones and then put whole
   87.36 - * system into a stop state. It assures a safe environment
   87.37 - * for a cpu hotplug/remove at normal running state.
   87.38 - *
   87.39 - * However for xen PM case, at this point:
   87.40 - * 	-> All other domains should be notified with PM event,
   87.41 - *	   and then in following states:
   87.42 - *		* Suspend state, or
   87.43 - *		* Paused state, which is a force step to all
   87.44 - *		  domains if they do nothing to suspend
   87.45 - *	-> All vcpus of dom0 (except vcpu0) have already beem
   87.46 - *	   hot removed
   87.47 - * with the net effect that all other cpus only have idle vcpu
   87.48 - * running. In this special case, we can avoid vcpu migration
   87.49 - * then and system can be considered in a stop state.
   87.50 - *
   87.51 - * So current cpu hotplug is a special version for PM specific
   87.52 - * usage, and need more effort later for full cpu hotplug.
   87.53 - * (ktian1)
   87.54 - */
   87.55  int cpu_down(unsigned int cpu)
   87.56  {
   87.57  	int err = 0;
   87.58 @@ -1307,6 +1279,12 @@ int cpu_down(unsigned int cpu)
   87.59  		goto out;
   87.60  	}
   87.61  
   87.62 +	/* Can not offline BSP */
   87.63 +	if (cpu == 0) {
   87.64 +		err = -EINVAL;
   87.65 +		goto out;
   87.66 +	}
   87.67 +
   87.68  	if (!cpu_online(cpu)) {
   87.69  		err = -EINVAL;
   87.70  		goto out;
    88.1 --- a/xen/arch/x86/time.c	Fri Sep 12 14:32:45 2008 +0900
    88.2 +++ b/xen/arch/x86/time.c	Fri Sep 12 14:47:40 2008 +0900
    88.3 @@ -993,15 +993,16 @@ static void local_time_calibration(void)
    88.4   * All CPUS snapshot their local TSC and extrapolation of system time.
    88.5   */
    88.6  struct calibration_rendezvous {
    88.7 +    cpumask_t cpu_calibration_map;
    88.8      atomic_t nr_cpus;
    88.9      s_time_t master_stime;
   88.10  };
   88.11  
   88.12  static void time_calibration_rendezvous(void *_r)
   88.13  {
   88.14 -    unsigned int total_cpus = num_online_cpus();
   88.15      struct cpu_calibration *c = &this_cpu(cpu_calibration);
   88.16      struct calibration_rendezvous *r = _r;
   88.17 +    unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
   88.18  
   88.19      if ( smp_processor_id() == 0 )
   88.20      {
   88.21 @@ -1029,11 +1030,13 @@ static void time_calibration_rendezvous(
   88.22  static void time_calibration(void *unused)
   88.23  {
   88.24      struct calibration_rendezvous r = {
   88.25 +        .cpu_calibration_map = cpu_online_map,
   88.26          .nr_cpus = ATOMIC_INIT(0)
   88.27      };
   88.28  
   88.29      /* @wait=1 because we must wait for all cpus before freeing @r. */
   88.30 -    on_each_cpu(time_calibration_rendezvous, &r, 0, 1);
   88.31 +    on_selected_cpus(r.cpu_calibration_map,
   88.32 +                     time_calibration_rendezvous, &r, 0, 1);
   88.33  }
   88.34  
   88.35  void init_percpu_time(void)
    89.1 --- a/xen/arch/x86/traps.c	Fri Sep 12 14:32:45 2008 +0900
    89.2 +++ b/xen/arch/x86/traps.c	Fri Sep 12 14:47:40 2008 +0900
    89.3 @@ -47,7 +47,7 @@
    89.4  #include <xen/version.h>
    89.5  #include <xen/kexec.h>
    89.6  #include <xen/trace.h>
    89.7 -#include <asm/paging.h>
    89.8 +#include <xen/paging.h>
    89.9  #include <asm/system.h>
   89.10  #include <asm/io.h>
   89.11  #include <asm/atomic.h>
   89.12 @@ -2116,6 +2116,36 @@ static int emulate_privileged_op(struct 
   89.13              if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
   89.14                  goto fail;
   89.15              break;
   89.16 +        case MSR_AMD64_NB_CFG:
   89.17 +            if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
   89.18 +                 boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x11 )
   89.19 +                goto fail;
   89.20 +            if ( !IS_PRIV(v->domain) )
   89.21 +                break;
   89.22 +            if ( (rdmsr_safe(MSR_AMD64_NB_CFG, l, h) != 0) ||
   89.23 +                 (eax != l) ||
   89.24 +                 ((edx ^ h) & ~(1 << (AMD64_NB_CFG_CF8_EXT_ENABLE_BIT - 32))) )
   89.25 +                goto invalid;
   89.26 +            if ( wrmsr_safe(MSR_AMD64_NB_CFG, eax, edx) != 0 )
   89.27 +                goto fail;
   89.28 +            break;
   89.29 +        case MSR_FAM10H_MMIO_CONF_BASE:
   89.30 +            if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
   89.31 +                 boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x11 )
   89.32 +                goto fail;
   89.33 +            if ( !IS_PRIV(v->domain) )
   89.34 +                break;
   89.35 +            if ( (rdmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, l, h) != 0) ||
   89.36 +                 (((((u64)h << 32) | l) ^ res) &
   89.37 +                  ~((1 << FAM10H_MMIO_CONF_ENABLE_BIT) |
   89.38 +                    (FAM10H_MMIO_CONF_BUSRANGE_MASK <<
   89.39 +                     FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
   89.40 +                    ((u64)FAM10H_MMIO_CONF_BASE_MASK <<
   89.41 +                     FAM10H_MMIO_CONF_BASE_SHIFT))) )
   89.42 +                goto invalid;
   89.43 +            if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, eax, edx) != 0 )
   89.44 +                goto fail;
   89.45 +            break;
   89.46          case MSR_IA32_PERF_CTL:
   89.47              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
   89.48                  goto fail;
   89.49 @@ -2124,11 +2154,18 @@ static int emulate_privileged_op(struct 
   89.50              if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
   89.51                  goto fail;
   89.52              break;
   89.53 +        case MSR_IA32_THERM_CONTROL:
   89.54 +            if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
   89.55 +                goto fail;
   89.56 +            if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
   89.57 +                goto fail;
   89.58 +            break;
   89.59          default:
   89.60              if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) )
   89.61                  break;
   89.62              if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
   89.63                   (eax != l) || (edx != h) )
   89.64 +        invalid:
   89.65                  gdprintk(XENLOG_WARNING, "Domain attempted WRMSR %p from "
   89.66                          "%08x:%08x to %08x:%08x.\n",
   89.67                          _p(regs->ecx), h, l, edx, eax);
   89.68 @@ -2199,6 +2236,12 @@ static int emulate_privileged_op(struct 
   89.69                           MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
   89.70                           MSR_IA32_MISC_ENABLE_XTPR_DISABLE;
   89.71              break;
   89.72 +        case MSR_IA32_THERM_CONTROL:
   89.73 +            if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
   89.74 +                goto fail;
   89.75 +            if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
   89.76 +                goto fail;
   89.77 +            break;
   89.78          default:
   89.79              if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) )
   89.80              {
    90.1 --- a/xen/common/domain.c	Fri Sep 12 14:32:45 2008 +0900
    90.2 +++ b/xen/common/domain.c	Fri Sep 12 14:47:40 2008 +0900
    90.3 @@ -651,9 +651,11 @@ void vcpu_reset(struct vcpu *v)
    90.4  
    90.5      set_bit(_VPF_down, &v->pause_flags);
    90.6  
    90.7 +    clear_bit(v->vcpu_id, d->poll_mask);
    90.8 +    v->poll_evtchn = 0;
    90.9 +
   90.10      v->fpu_initialised = 0;
   90.11      v->fpu_dirtied     = 0;
   90.12 -    v->is_polling      = 0;
   90.13      v->is_initialised  = 0;
   90.14      v->nmi_pending     = 0;
   90.15      v->mce_pending     = 0;
    91.1 --- a/xen/common/domctl.c	Fri Sep 12 14:32:45 2008 +0900
    91.2 +++ b/xen/common/domctl.c	Fri Sep 12 14:47:40 2008 +0900
    91.3 @@ -655,9 +655,6 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
    91.4          spin_lock(&d->page_alloc_lock);
    91.5          if ( new_max >= d->tot_pages )
    91.6          {
    91.7 -            ret = guest_physmap_max_mem_pages(d, new_max);
    91.8 -            if ( ret != 0 )
    91.9 -                break;
   91.10              d->max_pages = new_max;
   91.11              ret = 0;
   91.12          }
   91.13 @@ -729,16 +726,11 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
   91.14          if ( d == NULL )
   91.15              break;
   91.16  
   91.17 -        ret = xsm_irq_permission(d, pirq, op->u.irq_permission.allow_access);
   91.18 -        if ( ret )
   91.19 -            goto irq_permission_out;
   91.20 -        
   91.21          if ( op->u.irq_permission.allow_access )
   91.22              ret = irq_permit_access(d, pirq);
   91.23          else
   91.24              ret = irq_deny_access(d, pirq);
   91.25  
   91.26 -    irq_permission_out:
   91.27          rcu_unlock_domain(d);
   91.28      }
   91.29      break;
   91.30 @@ -758,16 +750,11 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
   91.31          if ( d == NULL )
   91.32              break;
   91.33  
   91.34 -        ret = xsm_iomem_permission(d, mfn, op->u.iomem_permission.allow_access);
   91.35 -        if ( ret )
   91.36 -            goto iomem_permission_out;
   91.37 -
   91.38          if ( op->u.iomem_permission.allow_access )
   91.39              ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
   91.40          else
   91.41              ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
   91.42  
   91.43 -    iomem_permission_out:
   91.44          rcu_unlock_domain(d);
   91.45      }
   91.46      break;
   91.47 @@ -815,6 +802,12 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
   91.48              goto set_target_out;
   91.49          }
   91.50  
   91.51 +        ret = xsm_set_target(d, e);
   91.52 +        if ( ret ) {
   91.53 +            put_domain(e);
   91.54 +            goto set_target_out;            
   91.55 +        }
   91.56 +
   91.57          /* Hold reference on @e until we destroy @d. */
   91.58          d->target = e;
   91.59  
    92.1 --- a/xen/common/event_channel.c	Fri Sep 12 14:32:45 2008 +0900
    92.2 +++ b/xen/common/event_channel.c	Fri Sep 12 14:47:40 2008 +0900
    92.3 @@ -545,6 +545,7 @@ out:
    92.4  static int evtchn_set_pending(struct vcpu *v, int port)
    92.5  {
    92.6      struct domain *d = v->domain;
    92.7 +    int vcpuid;
    92.8  
    92.9      /*
   92.10       * The following bit operations must happen in strict order.
   92.11 @@ -564,15 +565,19 @@ static int evtchn_set_pending(struct vcp
   92.12      }
   92.13      
   92.14      /* Check if some VCPU might be polling for this event. */
   92.15 -    if ( unlikely(d->is_polling) )
   92.16 +    if ( likely(bitmap_empty(d->poll_mask, MAX_VIRT_CPUS)) )
   92.17 +        return 0;
   92.18 +
   92.19 +    /* Wake any interested (or potentially interested) pollers. */
   92.20 +    for ( vcpuid = find_first_bit(d->poll_mask, MAX_VIRT_CPUS);
   92.21 +          vcpuid < MAX_VIRT_CPUS;
   92.22 +          vcpuid = find_next_bit(d->poll_mask, MAX_VIRT_CPUS, vcpuid+1) )
   92.23      {
   92.24 -        d->is_polling = 0;
   92.25 -        smp_mb(); /* check vcpu poll-flags /after/ clearing domain poll-flag */
   92.26 -        for_each_vcpu ( d, v )
   92.27 +        v = d->vcpu[vcpuid];
   92.28 +        if ( ((v->poll_evtchn <= 0) || (v->poll_evtchn == port)) &&
   92.29 +             test_and_clear_bit(vcpuid, d->poll_mask) )
   92.30          {
   92.31 -            if ( !v->is_polling )
   92.32 -                continue;
   92.33 -            v->is_polling = 0;
   92.34 +            v->poll_evtchn = 0;
   92.35              vcpu_unblock(v);
   92.36          }
   92.37      }
    93.1 --- a/xen/common/rangeset.c	Fri Sep 12 14:32:45 2008 +0900
    93.2 +++ b/xen/common/rangeset.c	Fri Sep 12 14:47:40 2008 +0900
    93.3 @@ -10,6 +10,7 @@
    93.4  #include <xen/sched.h>
    93.5  #include <xen/errno.h>
    93.6  #include <xen/rangeset.h>
    93.7 +#include <xsm/xsm.h>
    93.8  
    93.9  /* An inclusive range [s,e] and pointer to next range in ascending order. */
   93.10  struct range {
   93.11 @@ -96,6 +97,10 @@ int rangeset_add_range(
   93.12      struct range *x, *y;
   93.13      int rc = 0;
   93.14  
   93.15 +    rc = xsm_add_range(r->domain, r->name, s, e);
   93.16 +    if ( rc )
   93.17 +        return rc;
   93.18 +
   93.19      ASSERT(s <= e);
   93.20  
   93.21      spin_lock(&r->lock);
   93.22 @@ -164,6 +169,10 @@ int rangeset_remove_range(
   93.23      struct range *x, *y, *t;
   93.24      int rc = 0;
   93.25  
   93.26 +    rc = xsm_remove_range(r->domain, r->name, s, e);
   93.27 +    if ( rc )
   93.28 +        return rc;
   93.29 +
   93.30      ASSERT(s <= e);
   93.31  
   93.32      spin_lock(&r->lock);
    94.1 --- a/xen/common/sched_credit.c	Fri Sep 12 14:32:45 2008 +0900
    94.2 +++ b/xen/common/sched_credit.c	Fri Sep 12 14:47:40 2008 +0900
    94.3 @@ -1107,6 +1107,10 @@ csched_load_balance(int cpu, struct csch
    94.4  
    94.5      BUG_ON( cpu != snext->vcpu->processor );
    94.6  
    94.7 +    /* If this CPU is going offline we shouldn't steal work. */
    94.8 +    if ( unlikely(!cpu_online(cpu)) )
    94.9 +        goto out;
   94.10 +
   94.11      if ( snext->pri == CSCHED_PRI_IDLE )
   94.12          CSCHED_STAT_CRANK(load_balance_idle);
   94.13      else if ( snext->pri == CSCHED_PRI_TS_OVER )
   94.14 @@ -1149,6 +1153,7 @@ csched_load_balance(int cpu, struct csch
   94.15              return speer;
   94.16      }
   94.17  
   94.18 + out:
   94.19      /* Failed to find more important work elsewhere... */
   94.20      __runq_remove(snext);
   94.21      return snext;
    95.1 --- a/xen/common/schedule.c	Fri Sep 12 14:32:45 2008 +0900
    95.2 +++ b/xen/common/schedule.c	Fri Sep 12 14:47:40 2008 +0900
    95.3 @@ -63,12 +63,32 @@ static struct scheduler ops;
    95.4           (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ )      \
    95.5            : (typeof(ops.fn(__VA_ARGS__)))0 )
    95.6  
    95.7 +static inline void trace_runstate_change(struct vcpu *v, int new_state)
    95.8 +{
    95.9 +    struct { uint32_t vcpu:16, domain:16; } d;
   95.10 +    uint32_t event;
   95.11 +
   95.12 +    if ( likely(!tb_init_done) )
   95.13 +        return;
   95.14 +
   95.15 +    d.vcpu = v->vcpu_id;
   95.16 +    d.domain = v->domain->domain_id;
   95.17 +
   95.18 +    event = TRC_SCHED_RUNSTATE_CHANGE;
   95.19 +    event |= ( v->runstate.state & 0x3 ) << 8;
   95.20 +    event |= ( new_state & 0x3 ) << 4;
   95.21 +
   95.22 +    __trace_var(event, 1/*tsc*/, sizeof(d), (unsigned char *)&d);
   95.23 +}
   95.24 +
   95.25  static inline void vcpu_runstate_change(
   95.26      struct vcpu *v, int new_state, s_time_t new_entry_time)
   95.27  {
   95.28      ASSERT(v->runstate.state != new_state);
   95.29      ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock));
   95.30  
   95.31 +    trace_runstate_change(v, new_state);
   95.32 +
   95.33      v->runstate.time[v->runstate.state] +=
   95.34          new_entry_time - v->runstate.state_entry_time;
   95.35      v->runstate.state_entry_time = new_entry_time;
   95.36 @@ -198,6 +218,27 @@ void vcpu_wake(struct vcpu *v)
   95.37      TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
   95.38  }
   95.39  
   95.40 +void vcpu_unblock(struct vcpu *v)
   95.41 +{
   95.42 +    if ( !test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
   95.43 +        return;
   95.44 +
   95.45 +    /* Polling period ends when a VCPU is unblocked. */
   95.46 +    if ( unlikely(v->poll_evtchn != 0) )
   95.47 +    {
   95.48 +        v->poll_evtchn = 0;
   95.49 +        /*
   95.50 +         * We *must* re-clear _VPF_blocked to avoid racing other wakeups of
   95.51 +         * this VCPU (and it then going back to sleep on poll_mask).
   95.52 +         * Test-and-clear is idiomatic and ensures clear_bit not reordered.
   95.53 +         */
   95.54 +        if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
   95.55 +            clear_bit(_VPF_blocked, &v->pause_flags);
   95.56 +    }
   95.57 +
   95.58 +    vcpu_wake(v);
   95.59 +}
   95.60 +
   95.61  static void vcpu_migrate(struct vcpu *v)
   95.62  {
   95.63      unsigned long flags;
   95.64 @@ -247,6 +288,48 @@ void vcpu_force_reschedule(struct vcpu *
   95.65      }
   95.66  }
   95.67  
   95.68 +/*
   95.69 + * This function is used by cpu_hotplug code from stop_machine context.
   95.70 + * Hence we can avoid needing to take the 
   95.71 + */
   95.72 +void cpu_disable_scheduler(void)
   95.73 +{
   95.74 +    struct domain *d;
   95.75 +    struct vcpu *v;
   95.76 +    unsigned int cpu = smp_processor_id();
   95.77 +
   95.78 +    for_each_domain ( d )
   95.79 +    {
   95.80 +        for_each_vcpu ( d, v )
   95.81 +        {
   95.82 +            if ( is_idle_vcpu(v) )
   95.83 +                continue;
   95.84 +
   95.85 +            if ( (cpus_weight(v->cpu_affinity) == 1) &&
   95.86 +                 cpu_isset(cpu, v->cpu_affinity) )
   95.87 +            {
   95.88 +                printk("Breaking vcpu affinity for domain %d vcpu %d\n",
   95.89 +                        v->domain->domain_id, v->vcpu_id);
   95.90 +                cpus_setall(v->cpu_affinity);
   95.91 +            }
   95.92 +
   95.93 +            /*
   95.94 +             * Migrate single-shot timers to CPU0. A new cpu will automatically
   95.95 +             * be chosen when the timer is next re-set.
   95.96 +             */
   95.97 +            if ( v->singleshot_timer.cpu == cpu )
   95.98 +                migrate_timer(&v->singleshot_timer, 0);
   95.99 +
  95.100 +            if ( v->processor == cpu )
  95.101 +            {
  95.102 +                set_bit(_VPF_migrating, &v->pause_flags);
  95.103 +                vcpu_sleep_nosync(v);
  95.104 +                vcpu_migrate(v);
  95.105 +            }
  95.106 +        }
  95.107 +    }
  95.108 +}
  95.109 +
  95.110  static int __vcpu_set_affinity(
  95.111      struct vcpu *v, cpumask_t *affinity,
  95.112      bool_t old_lock_status, bool_t new_lock_status)
  95.113 @@ -337,7 +420,7 @@ static long do_poll(struct sched_poll *s
  95.114      struct vcpu   *v = current;
  95.115      struct domain *d = v->domain;
  95.116      evtchn_port_t  port;
  95.117 -    long           rc = 0;
  95.118 +    long           rc;
  95.119      unsigned int   i;
  95.120  
  95.121      /* Fairly arbitrary limit. */
  95.122 @@ -348,11 +431,24 @@ static long do_poll(struct sched_poll *s
  95.123          return -EFAULT;
  95.124  
  95.125      set_bit(_VPF_blocked, &v->pause_flags);
  95.126 -    v->is_polling = 1;
  95.127 -    d->is_polling = 1;
  95.128 +    v->poll_evtchn = -1;
  95.129 +    set_bit(v->vcpu_id, d->poll_mask);
  95.130 +
  95.131 +#ifndef CONFIG_X86 /* set_bit() implies mb() on x86 */
  95.132 +    /* Check for events /after/ setting flags: avoids wakeup waiting race. */
  95.133 +    smp_mb();
  95.134  
  95.135 -    /* Check for events /after/ setting flags: avoids wakeup waiting race. */
  95.136 -    smp_wmb();
  95.137 +    /*
  95.138 +     * Someone may have seen we are blocked but not that we are polling, or
  95.139 +     * vice versa. We are certainly being woken, so clean up and bail. Beyond
  95.140 +     * this point others can be guaranteed to clean up for us if they wake us.
  95.141 +     */
  95.142 +    rc = 0;
  95.143 +    if ( (v->poll_evtchn == 0) ||
  95.144 +         !test_bit(_VPF_blocked, &v->pause_flags) ||
  95.145 +         !test_bit(v->vcpu_id, d->poll_mask) )
  95.146 +        goto out;
  95.147 +#endif
  95.148  
  95.149      for ( i = 0; i < sched_poll->nr_ports; i++ )
  95.150      {
  95.151 @@ -369,6 +465,9 @@ static long do_poll(struct sched_poll *s
  95.152              goto out;
  95.153      }
  95.154  
  95.155 +    if ( sched_poll->nr_ports == 1 )
  95.156 +        v->poll_evtchn = port;
  95.157 +
  95.158      if ( sched_poll->timeout != 0 )
  95.159          set_timer(&v->poll_timer, sched_poll->timeout);
  95.160  
  95.161 @@ -378,7 +477,8 @@ static long do_poll(struct sched_poll *s
  95.162      return 0;
  95.163  
  95.164   out:
  95.165 -    v->is_polling = 0;
  95.166 +    v->poll_evtchn = 0;
  95.167 +    clear_bit(v->vcpu_id, d->poll_mask);
  95.168      clear_bit(_VPF_blocked, &v->pause_flags);
  95.169      return rc;
  95.170  }
  95.171 @@ -628,7 +728,9 @@ static void vcpu_periodic_timer_work(str
  95.172          return;
  95.173  
  95.174      periodic_next_event = v->periodic_last_event + v->periodic_period;
  95.175 -    if ( now > periodic_next_event )
  95.176 +
  95.177 +    /* The timer subsystem may call us up to TIME_SLOP ahead of deadline. */
  95.178 +    if ( (now + TIME_SLOP) > periodic_next_event )
  95.179      {
  95.180          send_timer_event(v);
  95.181          v->periodic_last_event = now;
  95.182 @@ -758,11 +860,8 @@ static void poll_timer_fn(void *data)
  95.183  {
  95.184      struct vcpu *v = data;
  95.185  
  95.186 -    if ( !v->is_polling )
  95.187 -        return;
  95.188 -
  95.189 -    v->is_polling = 0;
  95.190 -    vcpu_unblock(v);
  95.191 +    if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
  95.192 +        vcpu_unblock(v);
  95.193  }
  95.194  
  95.195  /* Initialise the data structures. */
    96.1 --- a/xen/common/sysctl.c	Fri Sep 12 14:32:45 2008 +0900
    96.2 +++ b/xen/common/sysctl.c	Fri Sep 12 14:47:40 2008 +0900
    96.3 @@ -149,6 +149,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
    96.4          char c;
    96.5          uint32_t i;
    96.6  
    96.7 +        ret = xsm_debug_keys();
    96.8 +        if ( ret )
    96.9 +            break;
   96.10 +
   96.11          for ( i = 0; i < op->u.debug_keys.nr_keys; i++ )
   96.12          {
   96.13              if ( copy_from_guest_offset(&c, op->u.debug_keys.keys, i, 1) )
   96.14 @@ -166,6 +170,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
   96.15  
   96.16          nr_cpus = min_t(uint32_t, op->u.getcpuinfo.max_cpus, NR_CPUS);
   96.17  
   96.18 +        ret = xsm_getcpuinfo();
   96.19 +        if ( ret )
   96.20 +            break;
   96.21 +
   96.22          for ( i = 0; i < nr_cpus; i++ )
   96.23          {
   96.24              /* Assume no holes in idle-vcpu map. */
   96.25 @@ -188,6 +196,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
   96.26  
   96.27      case XEN_SYSCTL_availheap:
   96.28      { 
   96.29 +        ret = xsm_availheap();
   96.30 +        if ( ret )
   96.31 +            break;
   96.32 +
   96.33          op->u.availheap.avail_bytes = avail_domheap_pages_region(
   96.34              op->u.availheap.node,
   96.35              op->u.availheap.min_bitwidth,
    97.1 --- a/xen/common/trace.c	Fri Sep 12 14:32:45 2008 +0900
    97.2 +++ b/xen/common/trace.c	Fri Sep 12 14:47:40 2008 +0900
    97.3 @@ -58,6 +58,7 @@ static int t_buf_highwater;
    97.4  
    97.5  /* Number of records lost due to per-CPU trace buffer being full. */
    97.6  static DEFINE_PER_CPU(unsigned long, lost_records);
    97.7 +static DEFINE_PER_CPU(unsigned long, lost_records_first_tsc);
    97.8  
    97.9  /* a flag recording whether initialization has been done */
   97.10  /* or more properly, if the tbuf subsystem is enabled right now */
   97.11 @@ -147,6 +148,31 @@ static int tb_set_size(int size)
   97.12      return 0;
   97.13  }
   97.14  
   97.15 +int trace_will_trace_event(u32 event)
   97.16 +{
   97.17 +    if ( !tb_init_done )
   97.18 +        return 0;
   97.19 +
   97.20 +    /*
   97.21 +     * Copied from __trace_var()
   97.22 +     */
   97.23 +    if ( (tb_event_mask & event) == 0 )
   97.24 +        return 0;
   97.25 +
   97.26 +    /* match class */
   97.27 +    if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
   97.28 +        return 0;
   97.29 +
   97.30 +    /* then match subclass */
   97.31 +    if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
   97.32 +                & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
   97.33 +        return 0;
   97.34 +
   97.35 +    if ( !cpu_isset(smp_processor_id(), tb_cpu_mask) )
   97.36 +        return 0;
   97.37 +
   97.38 +    return 1;
   97.39 +}
   97.40  
   97.41  /**
   97.42   * init_trace_bufs - performs initialization of the per-cpu trace buffers.
   97.43 @@ -354,22 +380,27 @@ static inline int insert_wrap_record(str
   97.44                      NULL);
   97.45  }
   97.46  
   97.47 -#define LOST_REC_SIZE 8
   97.48 +#define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
   97.49  
   97.50  static inline int insert_lost_records(struct t_buf *buf)
   97.51  {
   97.52      struct {
   97.53          u32 lost_records;
   97.54 -    } ed;
   97.55 +        u32 did:16, vid:16;
   97.56 +        u64 first_tsc;
   97.57 +    } __attribute__((packed)) ed;
   97.58  
   97.59 +    ed.vid = current->vcpu_id;
   97.60 +    ed.did = current->domain->domain_id;
   97.61      ed.lost_records = this_cpu(lost_records);
   97.62 +    ed.first_tsc = this_cpu(lost_records_first_tsc);
   97.63  
   97.64      this_cpu(lost_records) = 0;
   97.65  
   97.66      return __insert_record(buf,
   97.67                             TRC_LOST_RECORDS,
   97.68                             sizeof(ed),
   97.69 -                           0 /* !cycles */,
   97.70 +                           1 /* cycles */,
   97.71                             LOST_REC_SIZE,
   97.72                             (unsigned char *)&ed);
   97.73  }
   97.74 @@ -401,7 +432,8 @@ void __trace_var(u32 event, int cycles, 
   97.75      int extra_word;
   97.76      int started_below_highwater;
   97.77  
   97.78 -    ASSERT(tb_init_done);
   97.79 +    if( !tb_init_done )
   97.80 +        return;
   97.81  
   97.82      /* Convert byte count into word count, rounding up */
   97.83      extra_word = (extra / sizeof(u32));
   97.84 @@ -479,7 +511,8 @@ void __trace_var(u32 event, int cycles, 
   97.85      /* Do we have enough space for everything? */
   97.86      if ( total_size > bytes_to_tail )
   97.87      {
   97.88 -        this_cpu(lost_records)++;
   97.89 +        if ( ++this_cpu(lost_records) == 1 )
   97.90 +            this_cpu(lost_records_first_tsc)=(u64)get_cycles();
   97.91          local_irq_restore(flags);
   97.92          return;
   97.93      }
    98.1 --- a/xen/drivers/acpi/hwregs.c	Fri Sep 12 14:32:45 2008 +0900
    98.2 +++ b/xen/drivers/acpi/hwregs.c	Fri Sep 12 14:47:40 2008 +0900
    98.3 @@ -239,11 +239,13 @@ acpi_status acpi_set_register(u32 regist
    98.4  
    98.5  	case ACPI_REGISTER_PM2_CONTROL:
    98.6  
    98.7 +#if 0 /* Redundant read in original Linux code. */
    98.8  		status = acpi_hw_register_read(ACPI_REGISTER_PM2_CONTROL,
    98.9  					       &register_value);
   98.10  		if (ACPI_FAILURE(status)) {
   98.11  			goto unlock_and_exit;
   98.12  		}
   98.13 +#endif
   98.14  
   98.15  		ACPI_DEBUG_PRINT((ACPI_DB_IO,
   98.16  				  "PM2 control: Read %X from %8.8X%8.8X\n",
    99.1 --- a/xen/drivers/passthrough/iommu.c	Fri Sep 12 14:32:45 2008 +0900
    99.2 +++ b/xen/drivers/passthrough/iommu.c	Fri Sep 12 14:47:40 2008 +0900
    99.3 @@ -33,11 +33,13 @@ int amd_iov_detect(void);
    99.4   *   pv                         Enable IOMMU for PV domains
    99.5   *   no-pv                      Disable IOMMU for PV domains (default)
    99.6   *   force|required             Don't boot unless IOMMU is enabled
    99.7 + *   passthrough                Bypass VT-d translation for Dom0
    99.8   */
    99.9  custom_param("iommu", parse_iommu_param);
   99.10  int iommu_enabled = 0;
   99.11  int iommu_pv_enabled = 0;
   99.12  int force_iommu = 0;
   99.13 +int iommu_passthrough = 0;
   99.14  
   99.15  static void __init parse_iommu_param(char *s)
   99.16  {
   99.17 @@ -58,6 +60,8 @@ static void __init parse_iommu_param(cha
   99.18              iommu_pv_enabled = 0;
   99.19          else if ( !strcmp(s, "force") || !strcmp(s, "required") )
   99.20              force_iommu = 1;
   99.21 +        else if ( !strcmp(s, "passthrough") )
   99.22 +            iommu_passthrough = 1;
   99.23  
   99.24          s = ss + 1;
   99.25      } while ( ss );
   100.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Fri Sep 12 14:32:45 2008 +0900
   100.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Fri Sep 12 14:47:40 2008 +0900
   100.3 @@ -1090,12 +1090,13 @@ static int domain_context_mapping_one(
   100.4      }
   100.5  
   100.6      spin_lock_irqsave(&iommu->lock, flags);
   100.7 -
   100.8 -#ifdef CONTEXT_PASSTHRU
   100.9 -    if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
  100.10 +    if ( iommu_passthrough &&
  100.11 +         ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
  100.12 +    {
  100.13          context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
  100.14 +        agaw = level_to_agaw(iommu->nr_pt_levels);
  100.15 +    }
  100.16      else
  100.17 -#endif
  100.18      {
  100.19          /* Ensure we have pagetables allocated down to leaf PTE. */
  100.20          if ( hd->pgd_maddr == 0 )
  100.21 @@ -1459,11 +1460,13 @@ int intel_iommu_map_page(
  100.22      u64 pg_maddr;
  100.23      int pte_present;
  100.24  
  100.25 -#ifdef CONTEXT_PASSTHRU
  100.26 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  100.27 +    iommu = drhd->iommu;
  100.28 +
  100.29      /* do nothing if dom0 and iommu supports pass thru */
  100.30 -    if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
  100.31 +    if ( iommu_passthrough &&
  100.32 +         ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
  100.33          return 0;
  100.34 -#endif
  100.35  
  100.36      pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
  100.37      if ( pg_maddr == 0 )
  100.38 @@ -1500,11 +1503,10 @@ int intel_iommu_unmap_page(struct domain
  100.39      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
  100.40      iommu = drhd->iommu;
  100.41  
  100.42 -#ifdef CONTEXT_PASSTHRU
  100.43      /* do nothing if dom0 and iommu supports pass thru */
  100.44 -    if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
  100.45 +    if ( iommu_passthrough &&
  100.46 +         ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
  100.47          return 0;
  100.48 -#endif
  100.49  
  100.50      dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
  100.51  
   101.1 --- a/xen/include/asm-ia64/shadow.h	Fri Sep 12 14:32:45 2008 +0900
   101.2 +++ b/xen/include/asm-ia64/shadow.h	Fri Sep 12 14:47:40 2008 +0900
   101.3 @@ -63,8 +63,6 @@ shadow_mark_page_dirty(struct domain *d,
   101.4          return 0;
   101.5  }
   101.6  
   101.7 -#define guest_physmap_max_mem_pages(d, n) (0)
   101.8 -
   101.9  #endif // _XEN_SHADOW_H
  101.10  
  101.11  /*
   102.1 --- a/xen/include/asm-x86/bitops.h	Fri Sep 12 14:32:45 2008 +0900
   102.2 +++ b/xen/include/asm-x86/bitops.h	Fri Sep 12 14:47:40 2008 +0900
   102.3 @@ -116,8 +116,8 @@ static inline void __clear_bit(int nr, v
   102.4      __clear_bit(nr, addr);                              \
   102.5  })
   102.6  
   102.7 -#define smp_mb__before_clear_bit() barrier()
   102.8 -#define smp_mb__after_clear_bit()  barrier()
   102.9 +#define smp_mb__before_clear_bit() ((void)0)
  102.10 +#define smp_mb__after_clear_bit()  ((void)0)
  102.11  
  102.12  /**
  102.13   * __change_bit - Toggle a bit in memory
   103.1 --- a/xen/include/asm-x86/guest_access.h	Fri Sep 12 14:32:45 2008 +0900
   103.2 +++ b/xen/include/asm-x86/guest_access.h	Fri Sep 12 14:47:40 2008 +0900
   103.3 @@ -8,7 +8,7 @@
   103.4  #define __ASM_X86_GUEST_ACCESS_H__
   103.5  
   103.6  #include <asm/uaccess.h>
   103.7 -#include <asm/shadow.h>
   103.8 +#include <asm/paging.h>
   103.9  #include <asm/hvm/support.h>
  103.10  #include <asm/hvm/guest_access.h>
  103.11  
  103.12 @@ -87,10 +87,10 @@
  103.13   * Allows use of faster __copy_* functions.
  103.14   */
  103.15  #define guest_handle_okay(hnd, nr)                      \
  103.16 -    (shadow_mode_external(current->domain) ||           \
  103.17 +    (paging_mode_external(current->domain) ||           \
  103.18       array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
  103.19  #define guest_handle_subrange_okay(hnd, first, last)    \
  103.20 -    (shadow_mode_external(current->domain) ||           \
  103.21 +    (paging_mode_external(current->domain) ||           \
  103.22       array_access_ok((hnd).p + (first),                 \
  103.23                       (last)-(first)+1,                  \
  103.24                       sizeof(*(hnd).p)))
   104.1 --- a/xen/include/asm-x86/hvm/trace.h	Fri Sep 12 14:32:45 2008 +0900
   104.2 +++ b/xen/include/asm-x86/hvm/trace.h	Fri Sep 12 14:47:40 2008 +0900
   104.3 @@ -56,16 +56,13 @@
   104.4  #define TRC_PAR_LONG(par) (par)
   104.5  #endif
   104.6  
   104.7 -#define HVMTRACE_ND(evt, cycles, vcpu, count, d1, d2, d3, d4, d5, d6)   \
   104.8 +#define HVMTRACE_ND(evt, cycles, count, d1, d2, d3, d4, d5, d6)         \
   104.9      do {                                                                \
  104.10          if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt )             \
  104.11          {                                                               \
  104.12              struct {                                                    \
  104.13 -                u32 did:16, vid:16;                                     \
  104.14                  u32 d[6];                                               \
  104.15              } _d;                                                       \
  104.16 -            _d.did=(vcpu)->domain->domain_id;                           \
  104.17 -            _d.vid=(vcpu)->vcpu_id;                                     \
  104.18              _d.d[0]=(d1);                                               \
  104.19              _d.d[1]=(d2);                                               \
  104.20              _d.d[2]=(d3);                                               \
  104.21 @@ -77,32 +74,32 @@
  104.22          }                                                               \
  104.23      } while(0)
  104.24  
  104.25 -#define HVMTRACE_6D(evt, vcpu, d1, d2, d3, d4, d5, d6)    \
  104.26 -                      HVMTRACE_ND(evt, 0, vcpu, 6, d1, d2, d3,  d4, d5, d6)
  104.27 -#define HVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5)        \
  104.28 -                      HVMTRACE_ND(evt, 0, vcpu, 5, d1, d2, d3,  d4, d5, 0)
  104.29 -#define HVMTRACE_4D(evt, vcpu, d1, d2, d3, d4)               \
  104.30 -                      HVMTRACE_ND(evt, 0, vcpu, 4, d1, d2, d3,  d4, 0, 0)
  104.31 -#define HVMTRACE_3D(evt, vcpu, d1, d2, d3)                   \
  104.32 -                      HVMTRACE_ND(evt, 0, vcpu, 3, d1, d2, d3,  0, 0, 0)
  104.33 -#define HVMTRACE_2D(evt, vcpu, d1, d2)                       \
  104.34 -                      HVMTRACE_ND(evt, 0, vcpu, 2, d1, d2,  0,  0, 0, 0)
  104.35 -#define HVMTRACE_1D(evt, vcpu, d1)                           \
  104.36 -                      HVMTRACE_ND(evt, 0, vcpu, 1, d1,  0,  0,  0, 0, 0)
  104.37 -#define HVMTRACE_0D(evt, vcpu)                               \
  104.38 -                      HVMTRACE_ND(evt, 0, vcpu, 0, 0,  0,  0,  0, 0, 0)
  104.39 +#define HVMTRACE_6D(evt, d1, d2, d3, d4, d5, d6)    \
  104.40 +                      HVMTRACE_ND(evt, 0, 6, d1, d2, d3,  d4, d5, d6)
  104.41 +#define HVMTRACE_5D(evt, d1, d2, d3, d4, d5)        \
  104.42 +                      HVMTRACE_ND(evt, 0, 5, d1, d2, d3,  d4, d5, 0)
  104.43 +#define HVMTRACE_4D(evt, d1, d2, d3, d4)               \
  104.44 +                      HVMTRACE_ND(evt, 0, 4, d1, d2, d3,  d4, 0, 0)
  104.45 +#define HVMTRACE_3D(evt, d1, d2, d3)                   \
  104.46 +                      HVMTRACE_ND(evt, 0, 3, d1, d2, d3,  0, 0, 0)
  104.47 +#define HVMTRACE_2D(evt, d1, d2)                       \
  104.48 +                      HVMTRACE_ND(evt, 0, 2, d1, d2,  0,  0, 0, 0)
  104.49 +#define HVMTRACE_1D(evt, d1)                           \
  104.50 +                      HVMTRACE_ND(evt, 0, 1, d1,  0,  0,  0, 0, 0)
  104.51 +#define HVMTRACE_0D(evt)                               \
  104.52 +                      HVMTRACE_ND(evt, 0, 0, 0,  0,  0,  0, 0, 0)
  104.53  
  104.54  
  104.55  
  104.56  #ifdef __x86_64__
  104.57 -#define HVMTRACE_LONG_1D(evt, vcpu, d1)                  \
  104.58 -                   HVMTRACE_2D(evt ## 64, vcpu, (d1) & 0xFFFFFFFF, (d1) >> 32)
  104.59 -#define HVMTRACE_LONG_2D(evt,vcpu,d1,d2, ...)              \
  104.60 -                   HVMTRACE_3D(evt ## 64, vcpu, d1, d2)
  104.61 -#define HVMTRACE_LONG_3D(evt, vcpu, d1, d2, d3, ...)      \
  104.62 -                   HVMTRACE_4D(evt ## 64, vcpu, d1, d2, d3)
  104.63 -#define HVMTRACE_LONG_4D(evt, vcpu, d1, d2, d3, d4, ...)  \
  104.64 -                   HVMTRACE_5D(evt ## 64, vcpu, d1, d2, d3, d4)
  104.65 +#define HVMTRACE_LONG_1D(evt, d1)                  \
  104.66 +                   HVMTRACE_2D(evt ## 64, (d1) & 0xFFFFFFFF, (d1) >> 32)
  104.67 +#define HVMTRACE_LONG_2D(evt, d1, d2, ...)              \
  104.68 +                   HVMTRACE_3D(evt ## 64, d1, d2)
  104.69 +#define HVMTRACE_LONG_3D(evt, d1, d2, d3, ...)      \
  104.70 +                   HVMTRACE_4D(evt ## 64, d1, d2, d3)
  104.71 +#define HVMTRACE_LONG_4D(evt, d1, d2, d3, d4, ...)  \
  104.72 +                   HVMTRACE_5D(evt ## 64, d1, d2, d3, d4)
  104.73  #else
  104.74  #define HVMTRACE_LONG_1D HVMTRACE_1D
  104.75  #define HVMTRACE_LONG_2D HVMTRACE_2D
   105.1 --- a/xen/include/asm-x86/io_apic.h	Fri Sep 12 14:32:45 2008 +0900
   105.2 +++ b/xen/include/asm-x86/io_apic.h	Fri Sep 12 14:47:40 2008 +0900
   105.3 @@ -162,8 +162,6 @@ static inline void io_apic_modify(unsign
   105.4  /* 1 if "noapic" boot option passed */
   105.5  extern int skip_ioapic_setup;
   105.6  
   105.7 -extern int msi_enable;
   105.8 -
   105.9  /*
  105.10   * If we use the IO-APIC for IRQ routing, disable automatic
  105.11   * assignment of PCI IRQ's.
   106.1 --- a/xen/include/asm-x86/mm.h	Fri Sep 12 14:32:45 2008 +0900
   106.2 +++ b/xen/include/asm-x86/mm.h	Fri Sep 12 14:47:40 2008 +0900
   106.3 @@ -59,6 +59,17 @@ struct page_info
   106.4          u32 tlbflush_timestamp;
   106.5  
   106.6          /*
   106.7 +         * When PGT_partial is true then this field is valid and indicates
   106.8 +         * that PTEs in the range [0, @nr_validated_ptes) have been validated.
   106.9 +         * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been
  106.10 +         * partially validated.
  106.11 +         */
  106.12 +        struct {
  106.13 +            u16 nr_validated_ptes;
  106.14 +            bool_t partial_pte;
  106.15 +        };
  106.16 +
  106.17 +        /*
  106.18           * Guest pages with a shadow.  This does not conflict with
  106.19           * tlbflush_timestamp since page table pages are explicitly not
  106.20           * tracked for TLB-flush avoidance when a guest runs in shadow mode.
  106.21 @@ -86,9 +97,12 @@ struct page_info
  106.22   /* PAE only: is this an L2 page directory containing Xen-private mappings? */
  106.23  #define _PGT_pae_xen_l2     26
  106.24  #define PGT_pae_xen_l2      (1U<<_PGT_pae_xen_l2)
  106.25 +/* Has this page been *partially* validated for use as its current type? */
  106.26 +#define _PGT_partial        25
  106.27 +#define PGT_partial         (1U<<_PGT_partial)
  106.28  
  106.29 - /* 26-bit count of uses of this frame as its current type. */
  106.30 -#define PGT_count_mask      ((1U<<26)-1)
  106.31 + /* 25-bit count of uses of this frame as its current type. */
  106.32 +#define PGT_count_mask      ((1U<<25)-1)
  106.33  
  106.34   /* Cleared when the owning guest 'frees' this page. */
  106.35  #define _PGC_allocated      31
  106.36 @@ -154,7 +168,8 @@ extern unsigned long max_page;
  106.37  extern unsigned long total_pages;
  106.38  void init_frametable(void);
  106.39  
  106.40 -void free_page_type(struct page_info *page, unsigned long type);
  106.41 +int free_page_type(struct page_info *page, unsigned long type,
  106.42 +                   int preemptible);
  106.43  int _shadow_mode_refcounts(struct domain *d);
  106.44  
  106.45  void cleanup_page_cacheattr(struct page_info *page);
  106.46 @@ -165,6 +180,8 @@ void put_page(struct page_info *page);
  106.47  int  get_page(struct page_info *page, struct domain *domain);
  106.48  void put_page_type(struct page_info *page);
  106.49  int  get_page_type(struct page_info *page, unsigned long type);
  106.50 +int  put_page_type_preemptible(struct page_info *page);
  106.51 +int  get_page_type_preemptible(struct page_info *page, unsigned long type);
  106.52  int  get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
  106.53  void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
  106.54  
  106.55 @@ -174,6 +191,19 @@ static inline void put_page_and_type(str
  106.56      put_page(page);
  106.57  }
  106.58  
  106.59 +static inline int put_page_and_type_preemptible(struct page_info *page,
  106.60 +                                                int preemptible)
  106.61 +{
  106.62 +    int rc = 0;
  106.63 +
  106.64 +    if ( preemptible )
  106.65 +        rc = put_page_type_preemptible(page);
  106.66 +    else
  106.67 +        put_page_type(page);
  106.68 +    if ( likely(rc == 0) )
  106.69 +        put_page(page);
  106.70 +    return rc;
  106.71 +}
  106.72  
  106.73  static inline int get_page_and_type(struct page_info *page,
  106.74                                      struct domain *domain,
   107.1 --- a/xen/include/asm-x86/msr-index.h	Fri Sep 12 14:32:45 2008 +0900
   107.2 +++ b/xen/include/asm-x86/msr-index.h	Fri Sep 12 14:47:40 2008 +0900
   107.3 @@ -194,11 +194,23 @@
   107.4  #define _K8_VMCR_SVME_DISABLE		4
   107.5  #define K8_VMCR_SVME_DISABLE		(1 << _K8_VMCR_SVME_DISABLE)
   107.6  
   107.7 +/* AMD64 MSRs */
   107.8 +#define MSR_AMD64_NB_CFG		0xc001001f
   107.9 +#define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT	46
  107.10 +
  107.11  /* AMD Family10h machine check MSRs */
  107.12  #define MSR_F10_MC4_MISC1		0xc0000408
  107.13  #define MSR_F10_MC4_MISC2		0xc0000409
  107.14  #define MSR_F10_MC4_MISC3		0xc000040A
  107.15  
  107.16 +/* Other AMD Fam10h MSRs */
  107.17 +#define MSR_FAM10H_MMIO_CONF_BASE	0xc0010058
  107.18 +#define FAM10H_MMIO_CONF_ENABLE_BIT	0
  107.19 +#define FAM10H_MMIO_CONF_BUSRANGE_MASK	0xf
  107.20 +#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
  107.21 +#define FAM10H_MMIO_CONF_BASE_MASK	0xfffffff
  107.22 +#define FAM10H_MMIO_CONF_BASE_SHIFT	20
  107.23 +
  107.24  /* K6 MSRs */
  107.25  #define MSR_K6_EFER			0xc0000080
  107.26  #define MSR_K6_STAR			0xc0000081
   108.1 --- a/xen/include/asm-x86/shadow.h	Fri Sep 12 14:32:45 2008 +0900
   108.2 +++ b/xen/include/asm-x86/shadow.h	Fri Sep 12 14:47:40 2008 +0900
   108.3 @@ -115,8 +115,6 @@ static inline void shadow_remove_all_sha
   108.4      sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
   108.5  }
   108.6  
   108.7 -#define guest_physmap_max_mem_pages(d, n) (0)
   108.8 -
   108.9  #endif /* _XEN_SHADOW_H */
  108.10  
  108.11  /*
   109.1 --- a/xen/include/public/trace.h	Fri Sep 12 14:32:45 2008 +0900
   109.2 +++ b/xen/include/public/trace.h	Fri Sep 12 14:47:40 2008 +0900
   109.3 @@ -37,6 +37,7 @@
   109.4  #define TRC_HVM      0x0008f000    /* Xen HVM trace            */
   109.5  #define TRC_MEM      0x0010f000    /* Xen memory trace         */
   109.6  #define TRC_PV       0x0020f000    /* Xen PV traces            */
   109.7 +#define TRC_SHADOW   0x0040f000    /* Xen shadow tracing       */
   109.8  #define TRC_ALL      0x0ffff000
   109.9  #define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
  109.10  #define TRC_HD_CYCLE_FLAG (1UL<<31)
  109.11 @@ -50,26 +51,30 @@
  109.12  #define TRC_HVM_ENTRYEXIT 0x00081000   /* VMENTRY and #VMEXIT       */
  109.13  #define TRC_HVM_HANDLER   0x00082000   /* various HVM handlers      */
  109.14  
  109.15 +#define TRC_SCHED_MIN       0x00021000   /* Just runstate changes */
  109.16 +#define TRC_SCHED_VERBOSE   0x00028000   /* More inclusive scheduling */
  109.17 +
  109.18  /* Trace events per class */
  109.19  #define TRC_LOST_RECORDS        (TRC_GEN + 1)
  109.20  #define TRC_TRACE_WRAP_BUFFER  (TRC_GEN + 2)
  109.21  #define TRC_TRACE_CPU_CHANGE    (TRC_GEN + 3)
  109.22  
  109.23 -#define TRC_SCHED_DOM_ADD       (TRC_SCHED +  1)
  109.24 -#define TRC_SCHED_DOM_REM       (TRC_SCHED +  2)
  109.25 -#define TRC_SCHED_SLEEP         (TRC_SCHED +  3)
  109.26 -#define TRC_SCHED_WAKE          (TRC_SCHED +  4)
  109.27 -#define TRC_SCHED_YIELD         (TRC_SCHED +  5)
  109.28 -#define TRC_SCHED_BLOCK         (TRC_SCHED +  6)
  109.29 -#define TRC_SCHED_SHUTDOWN      (TRC_SCHED +  7)
  109.30 -#define TRC_SCHED_CTL           (TRC_SCHED +  8)
  109.31 -#define TRC_SCHED_ADJDOM        (TRC_SCHED +  9)
  109.32 -#define TRC_SCHED_SWITCH        (TRC_SCHED + 10)
  109.33 -#define TRC_SCHED_S_TIMER_FN    (TRC_SCHED + 11)
  109.34 -#define TRC_SCHED_T_TIMER_FN    (TRC_SCHED + 12)
  109.35 -#define TRC_SCHED_DOM_TIMER_FN  (TRC_SCHED + 13)
  109.36 -#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED + 14)
  109.37 -#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED + 15)
  109.38 +#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1)
  109.39 +#define TRC_SCHED_DOM_ADD        (TRC_SCHED_VERBOSE +  1)
  109.40 +#define TRC_SCHED_DOM_REM        (TRC_SCHED_VERBOSE +  2)
  109.41 +#define TRC_SCHED_SLEEP          (TRC_SCHED_VERBOSE +  3)
  109.42 +#define TRC_SCHED_WAKE           (TRC_SCHED_VERBOSE +  4)
  109.43 +#define TRC_SCHED_YIELD          (TRC_SCHED_VERBOSE +  5)
  109.44 +#define TRC_SCHED_BLOCK          (TRC_SCHED_VERBOSE +  6)
  109.45 +#define TRC_SCHED_SHUTDOWN       (TRC_SCHED_VERBOSE +  7)
  109.46 +#define TRC_SCHED_CTL            (TRC_SCHED_VERBOSE +  8)
  109.47 +#define TRC_SCHED_ADJDOM         (TRC_SCHED_VERBOSE +  9)
  109.48 +#define TRC_SCHED_SWITCH         (TRC_SCHED_VERBOSE + 10)
  109.49 +#define TRC_SCHED_S_TIMER_FN     (TRC_SCHED_VERBOSE + 11)
  109.50 +#define TRC_SCHED_T_TIMER_FN     (TRC_SCHED_VERBOSE + 12)
  109.51 +#define TRC_SCHED_DOM_TIMER_FN   (TRC_SCHED_VERBOSE + 13)
  109.52 +#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14)
  109.53 +#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15)
  109.54  
  109.55  #define TRC_MEM_PAGE_GRANT_MAP      (TRC_MEM + 1)
  109.56  #define TRC_MEM_PAGE_GRANT_UNMAP    (TRC_MEM + 2)
  109.57 @@ -89,6 +94,22 @@
  109.58    /* Indicates that addresses in trace record are 64 bits */
  109.59  #define TRC_64_FLAG               (0x100) 
  109.60  
  109.61 +#define TRC_SHADOW_NOT_SHADOW                 (TRC_SHADOW +  1)
  109.62 +#define TRC_SHADOW_FAST_PROPAGATE             (TRC_SHADOW +  2)
  109.63 +#define TRC_SHADOW_FAST_MMIO                  (TRC_SHADOW +  3)
  109.64 +#define TRC_SHADOW_FALSE_FAST_PATH            (TRC_SHADOW +  4)
  109.65 +#define TRC_SHADOW_MMIO                       (TRC_SHADOW +  5)
  109.66 +#define TRC_SHADOW_FIXUP                      (TRC_SHADOW +  6)
  109.67 +#define TRC_SHADOW_DOMF_DYING                 (TRC_SHADOW +  7)
  109.68 +#define TRC_SHADOW_EMULATE                    (TRC_SHADOW +  8)
  109.69 +#define TRC_SHADOW_EMULATE_UNSHADOW_USER      (TRC_SHADOW +  9)
  109.70 +#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ    (TRC_SHADOW + 10)
  109.71 +#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11)
  109.72 +#define TRC_SHADOW_WRMAP_BF                   (TRC_SHADOW + 12)
  109.73 +#define TRC_SHADOW_PREALLOC_UNPIN             (TRC_SHADOW + 13)
  109.74 +#define TRC_SHADOW_RESYNC_FULL                (TRC_SHADOW + 14)
  109.75 +#define TRC_SHADOW_RESYNC_ONLY                (TRC_SHADOW + 15)
  109.76 +
  109.77  /* trace events per subclass */
  109.78  #define TRC_HVM_VMENTRY         (TRC_HVM_ENTRYEXIT + 0x01)
  109.79  #define TRC_HVM_VMEXIT          (TRC_HVM_ENTRYEXIT + 0x02)
   110.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   110.2 +++ b/xen/include/xen/cpuidle.h	Fri Sep 12 14:47:40 2008 +0900
   110.3 @@ -0,0 +1,82 @@
   110.4 +/*
   110.5 + * cpuidle.h - xen idle state module derived from Linux 
   110.6 + *
   110.7 + * (C) 2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
   110.8 + *          Shaohua Li <shaohua.li@intel.com>
   110.9 + *          Adam Belay <abelay@novell.com>
  110.10 + *  Copyright (C) 2008 Intel Corporation
  110.11 + *
  110.12 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  110.13 + *
  110.14 + *  This program is free software; you can redistribute it and/or modify
  110.15 + *  it under the terms of the GNU General Public License as published by
  110.16 + *  the Free Software Foundation; either version 2 of the License, or (at
  110.17 + *  your option) any later version.
  110.18 + *
  110.19 + *  This program is distributed in the hope that it will be useful, but
  110.20 + *  WITHOUT ANY WARRANTY; without even the implied warranty of
  110.21 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  110.22 + *  General Public License for more details.
  110.23 + *
  110.24 + *  You should have received a copy of the GNU General Public License along
  110.25 + *  with this program; if not, write to the Free Software Foundation, Inc.,
  110.26 + *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  110.27 + *
  110.28 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  110.29 + */
  110.30 +#ifndef _XEN_CPUIDLE_H
  110.31 +#define _XEN_CPUIDLE_H
  110.32 +
  110.33 +#define ACPI_PROCESSOR_MAX_POWER        8
  110.34 +#define CPUIDLE_NAME_LEN                16
  110.35 +
  110.36 +struct acpi_processor_cx
  110.37 +{
  110.38 +    u8 valid;
  110.39 +    u8 type;
  110.40 +    u32 address;
  110.41 +    u8 space_id;
  110.42 +    u32 latency;
  110.43 +    u32 latency_ticks;
  110.44 +    u32 power;
  110.45 +    u32 usage;
  110.46 +    u64 time;
  110.47 +    u32 target_residency;
  110.48 +};
  110.49 +
  110.50 +struct acpi_processor_flags
  110.51 +{
  110.52 +    u8 bm_control:1;
  110.53 +    u8 bm_check:1;
  110.54 +    u8 has_cst:1;
  110.55 +    u8 power_setup_done:1;
  110.56 +    u8 bm_rld_set:1;
  110.57 +};
  110.58 +
  110.59 +struct acpi_processor_power
  110.60 +{
  110.61 +    unsigned int cpu;
  110.62 +    struct acpi_processor_flags flags;
  110.63 +    struct acpi_processor_cx *last_state;
  110.64 +    struct acpi_processor_cx *safe_state;
  110.65 +    u32 last_residency;
  110.66 +    void *gdata; /* governor specific data */
  110.67 +    u32 count;
  110.68 +    struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
  110.69 +};
  110.70 +
  110.71 +struct cpuidle_governor
  110.72 +{
  110.73 +    char                    name[CPUIDLE_NAME_LEN];
  110.74 +    unsigned int            rating;
  110.75 +
  110.76 +    int  (*enable)          (struct acpi_processor_power *dev);
  110.77 +    void (*disable)         (struct acpi_processor_power *dev);
  110.78 +
  110.79 +    int  (*select)          (struct acpi_processor_power *dev);
  110.80 +    void (*reflect)         (struct acpi_processor_power *dev);
  110.81 +};
  110.82 +
  110.83 +extern struct cpuidle_governor *cpuidle_current_governor;
  110.84 +
  110.85 +#endif /* _XEN_CPUIDLE_H */
   111.1 --- a/xen/include/xen/iommu.h	Fri Sep 12 14:32:45 2008 +0900
   111.2 +++ b/xen/include/xen/iommu.h	Fri Sep 12 14:47:40 2008 +0900
   111.3 @@ -31,6 +31,7 @@ extern int vtd_enabled;
   111.4  extern int iommu_enabled;
   111.5  extern int iommu_pv_enabled;
   111.6  extern int force_iommu;
   111.7 +extern int iommu_passthrough;
   111.8  
   111.9  #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
  111.10  
   112.1 --- a/xen/include/xen/sched.h	Fri Sep 12 14:32:45 2008 +0900
   112.2 +++ b/xen/include/xen/sched.h	Fri Sep 12 14:47:40 2008 +0900
   112.3 @@ -106,8 +106,6 @@ struct vcpu
   112.4      bool_t           fpu_initialised;
   112.5      /* Has the FPU been used since it was last saved? */
   112.6      bool_t           fpu_dirtied;
   112.7 -    /* Is this VCPU polling any event channels (SCHEDOP_poll)? */
   112.8 -    bool_t           is_polling;
   112.9      /* Initialization completed for this VCPU? */
  112.10      bool_t           is_initialised;
  112.11      /* Currently running on a CPU? */
  112.12 @@ -134,6 +132,13 @@ struct vcpu
  112.13      /* VCPU affinity is temporarily locked from controller changes? */
  112.14      bool_t           affinity_locked;
  112.15  
  112.16 +    /*
  112.17 +     * > 0: a single port is being polled;
  112.18 +     * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
  112.19 +     * < 0: multiple ports may be being polled.
  112.20 +     */
  112.21 +    int              poll_evtchn;
  112.22 +
  112.23      unsigned long    pause_flags;
  112.24      atomic_t         pause_count;
  112.25  
  112.26 @@ -209,8 +214,6 @@ struct domain
  112.27      struct domain   *target;
  112.28      /* Is this guest being debugged by dom0? */
  112.29      bool_t           debugger_attached;
  112.30 -    /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
  112.31 -    bool_t           is_polling;
  112.32      /* Is this guest dying (i.e., a zombie)? */
  112.33      enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
  112.34      /* Domain is paused by controller software? */
  112.35 @@ -218,6 +221,9 @@ struct domain
  112.36      /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
  112.37      bool_t           is_pinned;
  112.38  
  112.39 +    /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
  112.40 +    DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
  112.41 +
  112.42      /* Guest has shut down (inc. reason code)? */
  112.43      spinlock_t       shutdown_lock;
  112.44      bool_t           is_shutting_down; /* in process of shutting down? */
  112.45 @@ -507,6 +513,7 @@ static inline int vcpu_runnable(struct v
  112.46               atomic_read(&v->domain->pause_count));
  112.47  }
  112.48  
  112.49 +void vcpu_unblock(struct vcpu *v);
  112.50  void vcpu_pause(struct vcpu *v);
  112.51  void vcpu_pause_nosync(struct vcpu *v);
  112.52  void domain_pause(struct domain *d);
  112.53 @@ -517,18 +524,13 @@ void domain_unpause_by_systemcontroller(
  112.54  void cpu_init(void);
  112.55  
  112.56  void vcpu_force_reschedule(struct vcpu *v);
  112.57 +void cpu_disable_scheduler(void);
  112.58  int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
  112.59  int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
  112.60  void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
  112.61  
  112.62  void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
  112.63  
  112.64 -static inline void vcpu_unblock(struct vcpu *v)
  112.65 -{
  112.66 -    if ( test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
  112.67 -        vcpu_wake(v);
  112.68 -}
  112.69 -
  112.70  #define IS_PRIV(_d) ((_d)->is_privileged)
  112.71  #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t)))
  112.72  
   113.1 --- a/xen/include/xen/trace.h	Fri Sep 12 14:32:45 2008 +0900
   113.2 +++ b/xen/include/xen/trace.h	Fri Sep 12 14:47:40 2008 +0900
   113.3 @@ -34,6 +34,8 @@ void init_trace_bufs(void);
   113.4  /* used to retrieve the physical address of the trace buffers */
   113.5  int tb_control(struct xen_sysctl_tbuf_op *tbc);
   113.6  
   113.7 +int trace_will_trace_event(u32 event);
   113.8 +
   113.9  void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
  113.10  
  113.11  static inline void trace_var(u32 event, int cycles, int extra,
   114.1 --- a/xen/include/xsm/xsm.h	Fri Sep 12 14:32:45 2008 +0900
   114.2 +++ b/xen/include/xsm/xsm.h	Fri Sep 12 14:47:40 2008 +0900
   114.3 @@ -64,16 +64,17 @@ struct xsm_operations {
   114.4      int (*getvcpucontext) (struct domain *d);
   114.5      int (*getvcpuinfo) (struct domain *d);
   114.6      int (*domain_settime) (struct domain *d);
   114.7 +    int (*set_target) (struct domain *d, struct domain *e);
   114.8      int (*tbufcontrol) (void);
   114.9      int (*readconsole) (uint32_t clear);
  114.10      int (*sched_id) (void);
  114.11      int (*setdomainmaxmem) (struct domain *d);
  114.12      int (*setdomainhandle) (struct domain *d);
  114.13      int (*setdebugging) (struct domain *d);
  114.14 -    int (*irq_permission) (struct domain *d, uint8_t pirq, uint8_t access);
  114.15 -    int (*iomem_permission) (struct domain *d, unsigned long mfn, 
  114.16 -                                                                uint8_t access);
  114.17      int (*perfcontrol) (void);
  114.18 +    int (*debug_keys) (void);
  114.19 +    int (*getcpuinfo) (void);
  114.20 +    int (*availheap) (void);
  114.21  
  114.22      int (*evtchn_unbound) (struct domain *d, struct evtchn *chn, domid_t id2);
  114.23      int (*evtchn_interdomain) (struct domain *d1, struct evtchn *chn1,
  114.24 @@ -106,13 +107,13 @@ struct xsm_operations {
  114.25  
  114.26      int (*kexec) (void);
  114.27      int (*schedop_shutdown) (struct domain *d1, struct domain *d2);
  114.28 +    int (*add_range) (struct domain *d, char *name, unsigned long s, unsigned long e);
  114.29 +    int (*remove_range) (struct domain *d, char *name, unsigned long s, unsigned long e);
  114.30  
  114.31      long (*__do_xsm_op) (XEN_GUEST_HANDLE(xsm_op_t) op);
  114.32  
  114.33  #ifdef CONFIG_X86
  114.34      int (*shadow_control) (struct domain *d, uint32_t op);
  114.35 -    int (*ioport_permission) (struct domain *d, uint32_t ioport, 
  114.36 -                                                                uint8_t access);
  114.37      int (*getpageframeinfo) (struct page_info *page);
  114.38      int (*getmemlist) (struct domain *d);
  114.39      int (*hypercall_init) (struct domain *d);
  114.40 @@ -130,13 +131,26 @@ struct xsm_operations {
  114.41      int (*microcode) (void);
  114.42      int (*physinfo) (void);
  114.43      int (*platform_quirk) (uint32_t);
  114.44 +    int (*firmware_info) (void);
  114.45 +    int (*acpi_sleep) (void);
  114.46 +    int (*change_freq) (void);
  114.47 +    int (*getidletime) (void);
  114.48      int (*machine_memory_map) (void);
  114.49      int (*domain_memory_map) (struct domain *d);
  114.50 -    int (*mmu_normal_update) (struct domain *d, intpte_t fpte);
  114.51 +    int (*mmu_normal_update) (struct domain *d, struct domain *f, 
  114.52 +                                                                intpte_t fpte);
  114.53      int (*mmu_machphys_update) (struct domain *d, unsigned long mfn);
  114.54 -    int (*update_va_mapping) (struct domain *d, l1_pgentry_t pte);
  114.55 +    int (*update_va_mapping) (struct domain *d, struct domain *f, 
  114.56 +                                                            l1_pgentry_t pte);
  114.57      int (*add_to_physmap) (struct domain *d1, struct domain *d2);
  114.58      int (*remove_from_physmap) (struct domain *d1, struct domain *d2);
  114.59 +    int (*sendtrigger) (struct domain *d);
  114.60 +    int (*test_assign_device) (uint32_t machine_bdf);
  114.61 +    int (*assign_device) (struct domain *d, uint32_t machine_bdf);
  114.62 +    int (*deassign_device) (struct domain *d, uint32_t machine_bdf);
  114.63 +    int (*bind_pt_irq) (struct domain *d, struct xen_domctl_bind_pt_irq *bind);
  114.64 +    int (*pin_mem_cacheattr) (struct domain *d);
  114.65 +    int (*ext_vcpucontext) (struct domain *d, uint32_t cmd);
  114.66  #endif
  114.67  };
  114.68  
  114.69 @@ -215,6 +229,11 @@ static inline int xsm_domain_settime (st
  114.70      return xsm_call(domain_settime(d));
  114.71  }
  114.72  
  114.73 +static inline int xsm_set_target (struct domain *d, struct domain *e)
  114.74 +{
  114.75 +    return xsm_call(set_target(d, e));
  114.76 +}
  114.77 +
  114.78  static inline int xsm_tbufcontrol (void)
  114.79  {
  114.80      return xsm_call(tbufcontrol());
  114.81 @@ -245,23 +264,26 @@ static inline int xsm_setdebugging (stru
  114.82      return xsm_call(setdebugging(d));
  114.83  }
  114.84  
  114.85 -static inline int xsm_irq_permission (struct domain *d, uint8_t pirq,
  114.86 -                                                                uint8_t access)
  114.87 -{
  114.88 -    return xsm_call(irq_permission(d, pirq, access));
  114.89 -} 
  114.90 -
  114.91 -static inline int xsm_iomem_permission (struct domain *d, unsigned long mfn,
  114.92 -                                                                uint8_t access)
  114.93 -{
  114.94 -    return xsm_call(iomem_permission(d, mfn, access));
  114.95 -}
  114.96 -
  114.97  static inline int xsm_perfcontrol (void)
  114.98  {
  114.99      return xsm_call(perfcontrol());
 114.100  }
 114.101  
 114.102 +static inline int xsm_debug_keys (void)
 114.103 +{
 114.104 +    return xsm_call(debug_keys());
 114.105 +}
 114.106 +
 114.107 +static inline int xsm_availheap (void)
 114.108 +{
 114.109 +    return xsm_call(availheap());
 114.110 +}
 114.111 +
 114.112 +static inline int xsm_getcpuinfo (void)
 114.113 +{
 114.114 +    return xsm_call(getcpuinfo());
 114.115 +}
 114.116 +
 114.117  static inline int xsm_evtchn_unbound (struct domain *d1, struct evtchn *chn,
 114.118                                                                      domid_t id2)
 114.119  {
 114.120 @@ -387,6 +409,18 @@ static inline int xsm_schedop_shutdown (
 114.121      return xsm_call(schedop_shutdown(d1, d2));
 114.122  }
 114.123  
 114.124 +static inline int xsm_add_range (struct domain *d, char *name, unsigned long s,
 114.125 +                                                                        unsigned long e)
 114.126 +{
 114.127 +    return xsm_call(add_range(d, name, s, e));
 114.128 +}
 114.129 + 
 114.130 +static inline int xsm_remove_range (struct domain *d, char *name, unsigned long s,
 114.131 +                                                                        unsigned long e)
 114.132 +{
 114.133 +    return xsm_call(remove_range(d, name, s, e));
 114.134 +}
 114.135 +
 114.136  static inline long __do_xsm_op (XEN_GUEST_HANDLE(xsm_op_t) op)
 114.137  {
 114.138      return xsm_call(__do_xsm_op(op));
 114.139 @@ -413,12 +447,6 @@ static inline int xsm_shadow_control (st
 114.140      return xsm_call(shadow_control(d, op));
 114.141  }
 114.142  
 114.143 -static inline int xsm_ioport_permission (struct domain *d, uint32_t ioport,
 114.144 -                                                                uint8_t access)
 114.145 -{
 114.146 -    return xsm_call(ioport_permission(d, ioport, access));
 114.147 -}
 114.148 -
 114.149  static inline int xsm_getpageframeinfo (struct page_info *page)
 114.150  {
 114.151      return xsm_call(getpageframeinfo(page));
 114.152 @@ -504,6 +532,26 @@ static inline int xsm_platform_quirk (ui
 114.153      return xsm_call(platform_quirk(quirk));
 114.154  }
 114.155  
 114.156 +static inline int xsm_firmware_info (void)
 114.157 +{
 114.158 +    return xsm_call(firmware_info());
 114.159 +}
 114.160 +
 114.161 +static inline int xsm_acpi_sleep (void)
 114.162 +{
 114.163 +    return xsm_call(acpi_sleep());
 114.164 +}
 114.165 +
 114.166 +static inline int xsm_change_freq (void)
 114.167 +{
 114.168 +    return xsm_call(change_freq());
 114.169 +}
 114.170 +
 114.171 +static inline int xsm_getidletime (void)
 114.172 +{
 114.173 +    return xsm_call(getidletime());
 114.174 +}
 114.175 +
 114.176  static inline int xsm_machine_memory_map(void)
 114.177  {
 114.178      return xsm_call(machine_memory_map());
 114.179 @@ -514,9 +562,10 @@ static inline int xsm_domain_memory_map(
 114.180      return xsm_call(domain_memory_map(d));
 114.181  }
 114.182  
 114.183 -static inline int xsm_mmu_normal_update (struct domain *d, intpte_t fpte)
 114.184 +static inline int xsm_mmu_normal_update (struct domain *d, struct domain *f, 
 114.185 +                                                                intpte_t fpte)
 114.186  {
 114.187 -    return xsm_call(mmu_normal_update(d, fpte));
 114.188 +    return xsm_call(mmu_normal_update(d, f, fpte));
 114.189  }
 114.190  
 114.191  static inline int xsm_mmu_machphys_update (struct domain *d, unsigned long mfn)
 114.192 @@ -524,9 +573,10 @@ static inline int xsm_mmu_machphys_updat
 114.193      return xsm_call(mmu_machphys_update(d, mfn));
 114.194  }
 114.195  
 114.196 -static inline int xsm_update_va_mapping(struct domain *d, l1_pgentry_t pte)
 114.197 +static inline int xsm_update_va_mapping(struct domain *d, struct domain *f, 
 114.198 +                                                            l1_pgentry_t pte)
 114.199  {
 114.200 -    return xsm_call(update_va_mapping(d, pte));
 114.201 +    return xsm_call(update_va_mapping(d, f, pte));
 114.202  }
 114.203  
 114.204  static inline int xsm_add_to_physmap(struct domain *d1, struct domain *d2)
 114.205 @@ -538,6 +588,42 @@ static inline int xsm_remove_from_physma
 114.206  {
 114.207      return xsm_call(remove_from_physmap(d1, d2));
 114.208  }
 114.209 +
 114.210 +static inline int xsm_sendtrigger(struct domain *d)
 114.211 +{
 114.212 +    return xsm_call(sendtrigger(d));
 114.213 +}
 114.214 +
 114.215 +static inline int xsm_test_assign_device(uint32_t machine_bdf)
 114.216 +{
 114.217 +    return xsm_call(test_assign_device(machine_bdf));
 114.218 +}
 114.219 +
 114.220 +static inline int xsm_assign_device(struct domain *d, uint32_t machine_bdf)
 114.221 +{
 114.222 +    return xsm_call(assign_device(d, machine_bdf));
 114.223 +}
 114.224 +
 114.225 +static inline int xsm_deassign_device(struct domain *d, uint32_t machine_bdf)
 114.226 +{
 114.227 +    return xsm_call(deassign_device(d, machine_bdf));
 114.228 +}
 114.229 +
 114.230 +static inline int xsm_bind_pt_irq(struct domain *d, 
 114.231 +                                                struct xen_domctl_bind_pt_irq *bind)
 114.232 +{
 114.233 +    return xsm_call(bind_pt_irq(d, bind));
 114.234 +}
 114.235 +
 114.236 +static inline int xsm_pin_mem_cacheattr(struct domain *d)
 114.237 +{
 114.238 +    return xsm_call(pin_mem_cacheattr(d));
 114.239 +}
 114.240 +
 114.241 +static inline int xsm_ext_vcpucontext(struct domain *d, uint32_t cmd)
 114.242 +{
 114.243 +    return xsm_call(ext_vcpucontext(d, cmd));
 114.244 +}
 114.245  #endif /* CONFIG_X86 */
 114.246  
 114.247  #endif /* __XSM_H */
   115.1 --- a/xen/xsm/dummy.c	Fri Sep 12 14:32:45 2008 +0900
   115.2 +++ b/xen/xsm/dummy.c	Fri Sep 12 14:47:40 2008 +0900
   115.3 @@ -84,6 +84,11 @@ static int dummy_domain_settime (struct 
   115.4      return 0;
   115.5  }
   115.6  
   115.7 +static int dummy_set_target (struct domain *d, struct domain *e)
   115.8 +{
   115.9 +    return 0;
  115.10 +}
  115.11 +
  115.12  static int dummy_tbufcontrol (void)
  115.13  {
  115.14      return 0;
  115.15 @@ -114,18 +119,22 @@ static int dummy_setdebugging (struct do
  115.16      return 0;
  115.17  }
  115.18  
  115.19 -static int dummy_irq_permission (struct domain *d, uint8_t pirq, uint8_t access)
  115.20 +static int dummy_perfcontrol (void)
  115.21  {
  115.22      return 0;
  115.23  }
  115.24  
  115.25 -static int dummy_iomem_permission (struct domain *d, unsigned long mfn,
  115.26 -                                                                uint8_t access)
  115.27 +static int dummy_debug_keys (void)
  115.28  {
  115.29      return 0;
  115.30  }
  115.31  
  115.32 -static int dummy_perfcontrol (void)
  115.33 +static int dummy_getcpuinfo (void)
  115.34 +{
  115.35 +    return 0;
  115.36 +}
  115.37 +
  115.38 +static int dummy_availheap (void)
  115.39  {
  115.40      return 0;
  115.41  }
  115.42 @@ -259,14 +268,19 @@ static long dummy___do_xsm_op(XEN_GUEST_
  115.43      return -ENOSYS;
  115.44  }
  115.45  
  115.46 -#ifdef CONFIG_X86
  115.47 -static int dummy_shadow_control (struct domain *d, uint32_t op)
  115.48 +static int dummy_add_range (struct domain *d, char *name, unsigned long s, unsigned long e)
  115.49  {
  115.50      return 0;
  115.51  }
  115.52  
  115.53 -static int dummy_ioport_permission (struct domain *d, uint32_t ioport, 
  115.54 -                                                                uint8_t access)
  115.55 +static int dummy_remove_range (struct domain *d, char *name, unsigned long s, 
  115.56 +                                                                        unsigned long e)
  115.57 +{
  115.58 +    return 0;
  115.59 +}
  115.60 +
  115.61 +#ifdef CONFIG_X86
  115.62 +static int dummy_shadow_control (struct domain *d, uint32_t op)
  115.63  {
  115.64      return 0;
  115.65  }
  115.66 @@ -356,6 +370,26 @@ static int dummy_platform_quirk (uint32_
  115.67      return 0;
  115.68  }
  115.69  
  115.70 +static int dummy_firmware_info (void)
  115.71 +{
  115.72 +    return 0;
  115.73 +}
  115.74 +
  115.75 +static int dummy_acpi_sleep (void)
  115.76 +{
  115.77 +    return 0;
  115.78 +}
  115.79 +
  115.80 +static int dummy_change_freq (void)
  115.81 +{
  115.82 +    return 0;
  115.83 +}
  115.84 +
  115.85 +static int dummy_getidletime (void)
  115.86 +{
  115.87 +    return 0;
  115.88 +}
  115.89 +
  115.90  static int dummy_machine_memory_map (void)
  115.91  {
  115.92      return 0;
  115.93 @@ -366,7 +400,8 @@ static int dummy_domain_memory_map (stru
  115.94      return 0;
  115.95  }
  115.96  
  115.97 -static int dummy_mmu_normal_update (struct domain *d, intpte_t fpte)
  115.98 +static int dummy_mmu_normal_update (struct domain *d, struct domain *f, 
  115.99 +                                                                intpte_t fpte)
 115.100  {
 115.101      return 0;
 115.102  }
 115.103 @@ -376,7 +411,8 @@ static int dummy_mmu_machphys_update (st
 115.104      return 0;
 115.105  }
 115.106  
 115.107 -static int dummy_update_va_mapping (struct domain *d, l1_pgentry_t pte)
 115.108 +static int dummy_update_va_mapping (struct domain *d, struct domain *f, 
 115.109 +                                                            l1_pgentry_t pte)
 115.110  {
 115.111      return 0;
 115.112  }
 115.113 @@ -386,6 +422,41 @@ static int dummy_add_to_physmap (struct 
 115.114      return 0;
 115.115  }
 115.116  
 115.117 +static int dummy_sendtrigger (struct domain *d)
 115.118 +{
 115.119 +    return 0;
 115.120 +}
 115.121 +
 115.122 +static int dummy_test_assign_device (uint32_t machine_bdf)
 115.123 +{
 115.124 +    return 0;
 115.125 +}
 115.126 +
 115.127 +static int dummy_assign_device (struct domain *d, uint32_t machine_bdf)
 115.128 +{
 115.129 +    return 0;
 115.130 +}
 115.131 +
 115.132 +static int dummy_deassign_device (struct domain *d, uint32_t machine_bdf)
 115.133 +{
 115.134 +    return 0;
 115.135 +}
 115.136 +
 115.137 +static int dummy_bind_pt_irq (struct domain *d, struct xen_domctl_bind_pt_irq *bind)
 115.138 +{
 115.139 +    return 0;
 115.140 +}
 115.141 +
 115.142 +static int dummy_pin_mem_cacheattr (struct domain *d)
 115.143 +{
 115.144 +    return 0;
 115.145 +}
 115.146 +
 115.147 +static int dummy_ext_vcpucontext (struct domain *d, uint32_t cmd)
 115.148 +{
 115.149 +    return 0;
 115.150 +}
 115.151 +
 115.152  static int dummy_remove_from_physmap (struct domain *d1, struct domain *d2)
 115.153  {
 115.154      return 0;
 115.155 @@ -420,15 +491,17 @@ void xsm_fixup_ops (struct xsm_operation
 115.156      set_to_dummy_if_null(ops, getvcpucontext);
 115.157      set_to_dummy_if_null(ops, getvcpuinfo);
 115.158      set_to_dummy_if_null(ops, domain_settime);
 115.159 +    set_to_dummy_if_null(ops, set_target);
 115.160      set_to_dummy_if_null(ops, tbufcontrol);
 115.161      set_to_dummy_if_null(ops, readconsole);
 115.162      set_to_dummy_if_null(ops, sched_id);
 115.163      set_to_dummy_if_null(ops, setdomainmaxmem);
 115.164      set_to_dummy_if_null(ops, setdomainhandle);
 115.165      set_to_dummy_if_null(ops, setdebugging);
 115.166 -    set_to_dummy_if_null(ops, irq_permission);
 115.167 -    set_to_dummy_if_null(ops, iomem_permission);
 115.168      set_to_dummy_if_null(ops, perfcontrol);
 115.169 +    set_to_dummy_if_null(ops, debug_keys);
 115.170 +    set_to_dummy_if_null(ops, getcpuinfo);
 115.171 +    set_to_dummy_if_null(ops, availheap);
 115.172  
 115.173      set_to_dummy_if_null(ops, evtchn_unbound);
 115.174      set_to_dummy_if_null(ops, evtchn_interdomain);
 115.175 @@ -461,11 +534,13 @@ void xsm_fixup_ops (struct xsm_operation
 115.176      set_to_dummy_if_null(ops, kexec);
 115.177      set_to_dummy_if_null(ops, schedop_shutdown);
 115.178  
 115.179 +    set_to_dummy_if_null(ops, add_range);
 115.180 +    set_to_dummy_if_null(ops, remove_range);
 115.181 +
 115.182      set_to_dummy_if_null(ops, __do_xsm_op);
 115.183  
 115.184  #ifdef CONFIG_X86
 115.185      set_to_dummy_if_null(ops, shadow_control);
 115.186 -    set_to_dummy_if_null(ops, ioport_permission);
 115.187      set_to_dummy_if_null(ops, getpageframeinfo);
 115.188      set_to_dummy_if_null(ops, getmemlist);
 115.189      set_to_dummy_if_null(ops, hypercall_init);
 115.190 @@ -483,6 +558,10 @@ void xsm_fixup_ops (struct xsm_operation
 115.191      set_to_dummy_if_null(ops, microcode);
 115.192      set_to_dummy_if_null(ops, physinfo);
 115.193      set_to_dummy_if_null(ops, platform_quirk);
 115.194 +    set_to_dummy_if_null(ops, firmware_info);
 115.195 +    set_to_dummy_if_null(ops, acpi_sleep);
 115.196 +    set_to_dummy_if_null(ops, change_freq);
 115.197 +    set_to_dummy_if_null(ops, getidletime);
 115.198      set_to_dummy_if_null(ops, machine_memory_map);
 115.199      set_to_dummy_if_null(ops, domain_memory_map);
 115.200      set_to_dummy_if_null(ops, mmu_normal_update);
 115.201 @@ -490,5 +569,12 @@ void xsm_fixup_ops (struct xsm_operation
 115.202      set_to_dummy_if_null(ops, update_va_mapping);
 115.203      set_to_dummy_if_null(ops, add_to_physmap);
 115.204      set_to_dummy_if_null(ops, remove_from_physmap);
 115.205 +    set_to_dummy_if_null(ops, sendtrigger);
 115.206 +    set_to_dummy_if_null(ops, test_assign_device);
 115.207 +    set_to_dummy_if_null(ops, assign_device);
 115.208 +    set_to_dummy_if_null(ops, deassign_device);
 115.209 +    set_to_dummy_if_null(ops, bind_pt_irq);
 115.210 +    set_to_dummy_if_null(ops, pin_mem_cacheattr);
 115.211 +    set_to_dummy_if_null(ops, ext_vcpucontext);
 115.212  #endif
 115.213  }
   116.1 --- a/xen/xsm/flask/hooks.c	Fri Sep 12 14:32:45 2008 +0900
   116.2 +++ b/xen/xsm/flask/hooks.c	Fri Sep 12 14:47:40 2008 +0900
   116.3 @@ -11,6 +11,7 @@
   116.4  #include <xen/init.h>
   116.5  #include <xen/lib.h>
   116.6  #include <xen/sched.h>
   116.7 +#include <xen/paging.h>
   116.8  #include <xen/xmalloc.h>
   116.9  #include <xsm/xsm.h>
  116.10  #include <xen/spinlock.h>
  116.11 @@ -129,8 +130,7 @@ static int flask_evtchn_unbound(struct d
  116.12      if ( rc )
  116.13          goto out;
  116.14  
  116.15 -    rc = avc_has_perm(dsec->sid, newsid, SECCLASS_EVENT,
  116.16 -                                            EVENT__CREATE|EVENT__ALLOC, NULL);
  116.17 +    rc = avc_has_perm(dsec->sid, newsid, SECCLASS_EVENT, EVENT__CREATE, NULL);
  116.18      if ( rc )
  116.19          goto out;
  116.20  
  116.21 @@ -210,7 +210,22 @@ static void flask_evtchn_close_post(stru
  116.22  
  116.23  static int flask_evtchn_send(struct domain *d, struct evtchn *chn)
  116.24  {
  116.25 -    return domain_has_evtchn(d, chn, EVENT__SEND);
  116.26 +    int rc;
  116.27 +
  116.28 +    switch ( chn->state )
  116.29 +    {
  116.30 +    case ECS_INTERDOMAIN:
  116.31 +        rc = domain_has_evtchn(d, chn, EVENT__SEND);
  116.32 +    break;
  116.33 +    case ECS_IPI:
  116.34 +    case ECS_UNBOUND:
  116.35 +        rc = 0;
  116.36 +    break;
  116.37 +    default:
  116.38 +        rc = -EPERM;
  116.39 +    }
  116.40 +
  116.41 +    return rc;
  116.42  }
  116.43  
  116.44  static int flask_evtchn_status(struct domain *d, struct evtchn *chn)
  116.45 @@ -340,7 +355,7 @@ static int get_mfn_sid(unsigned long mfn
  116.46      if ( mfn_valid(mfn) )
  116.47      {
  116.48          /*mfn is valid if this is a page that Xen is tracking!*/
  116.49 -        page = mfn_to_page(mfn);        
  116.50 +        page = mfn_to_page(mfn);
  116.51          rc = get_page_sid(page, sid);
  116.52      }
  116.53      else
  116.54 @@ -390,23 +405,6 @@ static int flask_memory_pin_page(struct 
  116.55      return avc_has_perm(dsec->sid, sid, SECCLASS_MMU, MMU__PINPAGE, NULL);
  116.56  }
  116.57  
  116.58 -/* Used to defer flushing of memory structures. */
  116.59 -struct percpu_mm_info {
  116.60 -#define DOP_FLUSH_TLB      (1<<0) /* Flush the local TLB.                    */
  116.61 -#define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */
  116.62 -#define DOP_RELOAD_LDT     (1<<2) /* Reload the LDT shadow mapping.          */
  116.63 -    unsigned int   deferred_ops;
  116.64 -    /* If non-NULL, specifies a foreign subject domain for some operations. */
  116.65 -    struct domain *foreign;
  116.66 -};
  116.67 -static DEFINE_PER_CPU(struct percpu_mm_info, percpu_mm_info);
  116.68 -
  116.69 -/*
  116.70 - * Returns the current foreign domain; defaults to the currently-executing
  116.71 - * domain if a foreign override hasn't been specified.
  116.72 - */
  116.73 -#define FOREIGNDOM (this_cpu(percpu_mm_info).foreign ?: current->domain)
  116.74 -
  116.75  static int flask_console_io(struct domain *d, int cmd)
  116.76  {
  116.77      u32 perm;
  116.78 @@ -506,22 +504,22 @@ static int flask_domain_create(struct do
  116.79  
  116.80      dsec1 = current->domain->ssid;
  116.81  
  116.82 -    if ( dsec1->create_sid == SECSID_NULL )
  116.83 -        dsec1->create_sid = ssidref;
  116.84 +    if ( dsec1->create_sid == SECSID_NULL ) 
  116.85 +		dsec1->create_sid = ssidref;
  116.86  
  116.87 -    rc = avc_has_perm(dsec1->sid, dsec1->create_sid, SECCLASS_DOMAIN, 
  116.88 -                                                        DOMAIN__CREATE, NULL);
  116.89 -    if ( rc )
  116.90 +	rc = avc_has_perm(dsec1->sid, dsec1->create_sid, SECCLASS_DOMAIN, 
  116.91 +          		                                          DOMAIN__CREATE, NULL);
  116.92 +	if ( rc )
  116.93      {
  116.94 -        dsec1->create_sid = SECSID_NULL;
  116.95 -        return rc;
  116.96 +	    dsec1->create_sid = SECSID_NULL;
  116.97 +		return rc;
  116.98      }
  116.99  
 116.100      dsec2 = d->ssid;
 116.101      dsec2->sid = dsec1->create_sid;
 116.102  
 116.103 -    dsec1->create_sid = SECSID_NULL;
 116.104 -    dsec2->create_sid = SECSID_NULL;
 116.105 +	dsec1->create_sid = SECSID_NULL;
 116.106 +	dsec2->create_sid = SECSID_NULL;
 116.107  
 116.108      return rc;
 116.109  }
 116.110 @@ -592,6 +590,11 @@ static int flask_domain_settime(struct d
 116.111      return domain_has_perm(current->domain, d, SECCLASS_DOMAIN, DOMAIN__SETTIME);
 116.112  }
 116.113  
 116.114 +static int flask_set_target(struct domain *d, struct domain *e)
 116.115 +{
 116.116 +    return domain_has_perm(d, e, SECCLASS_DOMAIN, DOMAIN__SET_TARGET);
 116.117 +}
 116.118 +
 116.119  static int flask_tbufcontrol(void)
 116.120  {
 116.121      return domain_has_xen(current->domain, SECCLASS_XEN);
 116.122 @@ -630,6 +633,21 @@ static int flask_setdebugging(struct dom
 116.123                                                          DOMAIN__SETDEBUGGING);
 116.124  }
 116.125  
 116.126 +static int flask_debug_keys(void)
 116.127 +{
 116.128 +    return domain_has_xen(current->domain, XEN__DEBUG);
 116.129 +}
 116.130 +
 116.131 +static int flask_getcpuinfo(void)
 116.132 +{
 116.133 +    return domain_has_xen(current->domain, XEN__GETCPUINFO);
 116.134 +}
 116.135 +
 116.136 +static int flask_availheap(void)
 116.137 +{
 116.138 +    return domain_has_xen(current->domain, XEN__HEAP);
 116.139 +}
 116.140 +
 116.141  static inline u32 resource_to_perm(uint8_t access)
 116.142  {
 116.143      if ( access )
 116.144 @@ -638,7 +656,7 @@ static inline u32 resource_to_perm(uint8
 116.145          return RESOURCE__REMOVE;
 116.146  }
 116.147  
 116.148 -static int flask_irq_permission(struct domain *d, uint8_t pirq, uint8_t access)
 116.149 +static int irq_has_perm(struct domain *d, uint8_t pirq, uint8_t access)
 116.150  {
 116.151      u32 perm;
 116.152      u32 rsid;
 116.153 @@ -665,16 +683,17 @@ static int flask_irq_permission(struct d
 116.154          return rc;
 116.155  
 116.156      rc = avc_has_perm(ssec->sid, rsid, SECCLASS_RESOURCE, perm, NULL);
 116.157 -
 116.158      if ( rc )
 116.159          return rc;
 116.160  
 116.161 -    return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, 
 116.162 +    if ( access )
 116.163 +        return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, 
 116.164                                                          RESOURCE__USE, NULL);
 116.165 +    else
 116.166 +        return rc;
 116.167  }
 116.168  
 116.169 -static int flask_iomem_permission(struct domain *d, unsigned long mfn, 
 116.170 -                                                                uint8_t access)
 116.171 +static int iomem_has_perm(struct domain *d, unsigned long mfn, uint8_t access)
 116.172  {
 116.173      u32 perm;
 116.174      u32 rsid;
 116.175 @@ -684,7 +703,6 @@ static int flask_iomem_permission(struct
 116.176  
 116.177      rc = domain_has_perm(current->domain, d, SECCLASS_RESOURCE,
 116.178                                                      resource_to_perm(access));
 116.179 -
 116.180      if ( rc )
 116.181          return rc;
 116.182  
 116.183 @@ -743,8 +761,7 @@ static int flask_shadow_control(struct d
 116.184      return domain_has_perm(current->domain, d, SECCLASS_SHADOW, perm);
 116.185  }
 116.186  
 116.187 -static int flask_ioport_permission(struct domain *d, uint32_t ioport, 
 116.188 -                                                                uint8_t access)
 116.189 +static int ioport_has_perm(struct domain *d, uint32_t ioport, uint8_t access)
 116.190  {
 116.191      u32 perm;
 116.192      u32 rsid;
 116.193 @@ -774,8 +791,11 @@ static int flask_ioport_permission(struc
 116.194      if ( rc )
 116.195          return rc;
 116.196  
 116.197 -    return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, 
 116.198 +    if ( access )
 116.199 +        return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, 
 116.200                                                          RESOURCE__USE, NULL);    
 116.201 +    else
 116.202 +        return rc;
 116.203  }
 116.204  
 116.205  static int flask_getpageframeinfo(struct page_info *page)
 116.206 @@ -953,6 +973,26 @@ static int flask_platform_quirk(uint32_t
 116.207                                                              XEN__QUIRK, NULL);
 116.208  }
 116.209  
 116.210 +static int flask_firmware_info(void)
 116.211 +{
 116.212 +    return domain_has_xen(current->domain, XEN__FIRMWARE);
 116.213 +}
 116.214 +
 116.215 +static int flask_acpi_sleep(void)
 116.216 +{
 116.217 +    return domain_has_xen(current->domain, XEN__SLEEP);
 116.218 +}
 116.219 +
 116.220 +static int flask_change_freq(void)
 116.221 +{
 116.222 +    return domain_has_xen(current->domain, XEN__FREQUENCY);
 116.223 +}
 116.224 +
 116.225 +static int flask_getidletime(void)
 116.226 +{
 116.227 +    return domain_has_xen(current->domain, XEN__GETIDLE);
 116.228 +}
 116.229 +
 116.230  static int flask_machine_memory_map(void)
 116.231  {
 116.232      struct domain_security_struct *dsec;
 116.233 @@ -967,7 +1007,8 @@ static int flask_domain_memory_map(struc
 116.234      return domain_has_perm(current->domain, d, SECCLASS_MMU, MMU__MEMORYMAP);
 116.235  }
 116.236  
 116.237 -static int flask_mmu_normal_update(struct domain *d, intpte_t fpte)
 116.238 +static int flask_mmu_normal_update(struct domain *d, struct domain *f, 
 116.239 +                                                                intpte_t fpte)
 116.240  {
 116.241      int rc = 0;
 116.242      u32 map_perms = MMU__MAP_READ;
 116.243 @@ -980,7 +1021,7 @@ static int flask_mmu_normal_update(struc
 116.244      if ( l1e_get_flags(l1e_from_intpte(fpte)) & _PAGE_RW )
 116.245          map_perms |= MMU__MAP_WRITE;
 116.246  
 116.247 -    fmfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(l1e_from_intpte(fpte)));
 116.248 +    fmfn = gmfn_to_mfn(f, l1e_get_pfn(l1e_from_intpte(fpte)));
 116.249  
 116.250      rc = get_mfn_sid(fmfn, &fsid);
 116.251      if ( rc )
 116.252 @@ -1003,7 +1044,8 @@ static int flask_mmu_machphys_update(str
 116.253      return avc_has_perm(dsec->sid, psid, SECCLASS_MMU, MMU__UPDATEMP, NULL);
 116.254  }
 116.255  
 116.256 -static int flask_update_va_mapping(struct domain *d, l1_pgentry_t pte)
 116.257 +static int flask_update_va_mapping(struct domain *d, struct domain *f, 
 116.258 +                                                            l1_pgentry_t pte)
 116.259  {
 116.260      int rc = 0;
 116.261      u32 psid;
 116.262 @@ -1013,7 +1055,7 @@ static int flask_update_va_mapping(struc
 116.263  
 116.264      dsec = d->ssid;
 116.265  
 116.266 -    mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(pte));        
 116.267 +    mfn = gmfn_to_mfn(f, l1e_get_pfn(pte));        
 116.268      rc = get_mfn_sid(mfn, &psid);
 116.269      if ( rc )
 116.270          return rc;
 116.271 @@ -1033,8 +1075,163 @@ static int flask_remove_from_physmap(str
 116.272  {
 116.273      return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__PHYSMAP);
 116.274  }
 116.275 +
 116.276 +static int flask_sendtrigger(struct domain *d)
 116.277 +{
 116.278 +    return domain_has_perm(current->domain, d, SECCLASS_DOMAIN, DOMAIN__TRIGGER);
 116.279 +}
 116.280 +
 116.281 +static int flask_test_assign_device(uint32_t machine_bdf)
 116.282 +{
 116.283 +    u32 rsid;
 116.284 +    int rc = -EPERM;
 116.285 +    struct domain_security_struct *ssec = current->domain->ssid;
 116.286 +
 116.287 +    rc = security_device_sid(machine_bdf, &rsid);
 116.288 +    if ( rc )
 116.289 +        return rc;
 116.290 +
 116.291 +    return rc = avc_has_perm(ssec->sid, rsid, SECCLASS_RESOURCE, RESOURCE__STAT_DEVICE, NULL);
 116.292 +}
 116.293 +
 116.294 +static int flask_assign_device(struct domain *d, uint32_t machine_bdf)
 116.295 +{
 116.296 +    u32 rsid;
 116.297 +    int rc = -EPERM;
 116.298 +    struct domain_security_struct *ssec, *tsec;
 116.299 +
 116.300 +    rc = domain_has_perm(current->domain, d, SECCLASS_RESOURCE, RESOURCE__ADD);
 116.301 +    if ( rc )
 116.302 +        return rc;
 116.303 +
 116.304 +    rc = security_device_sid(machine_bdf, &rsid);
 116.305 +    if ( rc )
 116.306 +        return rc;
 116.307 +
 116.308 +    ssec = current->domain->ssid;
 116.309 +    rc = avc_has_perm(ssec->sid, rsid, SECCLASS_RESOURCE, RESOURCE__ADD_DEVICE, NULL);
 116.310 +    if ( rc )
 116.311 +        return rc;
 116.312 +
 116.313 +    tsec = d->ssid;
 116.314 +    return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, RESOURCE__USE, NULL);
 116.315 +}
 116.316 +
 116.317 +static int flask_deassign_device(struct domain *d, uint32_t machine_bdf)
 116.318 +{
 116.319 +    u32 rsid;
 116.320 +    int rc = -EPERM;
 116.321 +    struct domain_security_struct *ssec = current->domain->ssid;
 116.322 +
 116.323 +    rc = domain_has_perm(current->domain, d, SECCLASS_RESOURCE, RESOURCE__REMOVE);
 116.324 +    if ( rc )
 116.325 +        return rc;
 116.326 +
 116.327 +    rc = security_device_sid(machine_bdf, &rsid);
 116.328 +    if ( rc )
 116.329 +        return rc;
 116.330 +
 116.331 +    return rc = avc_has_perm(ssec->sid, rsid, SECCLASS_RESOURCE, RESOURCE__REMOVE_DEVICE, NULL);
 116.332 +}
 116.333 +
 116.334 +static int flask_bind_pt_irq (struct domain *d, struct xen_domctl_bind_pt_irq *bind)
 116.335 +{
 116.336 +    u32 rsid;
 116.337 +    int rc = -EPERM;
 116.338 +    struct domain_security_struct *ssec, *tsec;
 116.339 +
 116.340 +    rc = domain_has_perm(current->domain, d, SECCLASS_RESOURCE, RESOURCE__ADD);
 116.341 +    if ( rc )
 116.342 +        return rc;
 116.343 +
 116.344 +    rc = security_pirq_sid(bind->machine_irq, &rsid);
 116.345 +    if ( rc )
 116.346 +        return rc;
 116.347 +
 116.348 +    ssec = current->domain->ssid;
 116.349 +    rc = avc_has_perm(ssec->sid, rsid, SECCLASS_HVM, HVM__BIND_IRQ, NULL);
 116.350 +    if ( rc )
 116.351 +        return rc;
 116.352 +
 116.353 +    tsec = d->ssid;
 116.354 +    return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, RESOURCE__USE, NULL);
 116.355 +}
 116.356 +
 116.357 +static int flask_pin_mem_cacheattr (struct domain *d)
 116.358 +{
 116.359 +    return domain_has_perm(current->domain, d, SECCLASS_HVM, HVM__CACHEATTR);
 116.360 +}
 116.361 +
 116.362 +static int flask_ext_vcpucontext (struct domain *d, uint32_t cmd)
 116.363 +{
 116.364 +    u32 perm;
 116.365 +
 116.366 +    switch ( cmd )
 116.367 +    {
 116.368 +        case XEN_DOMCTL_set_ext_vcpucontext:
 116.369 +            perm = DOMAIN__SETEXTVCPUCONTEXT;
 116.370 +        break;
 116.371 +        case XEN_DOMCTL_get_ext_vcpucontext:
 116.372 +            perm = DOMAIN__GETEXTVCPUCONTEXT;
 116.373 +        break;
 116.374 +        default:
 116.375 +            return -EPERM;
 116.376 +    }
 116.377 +
 116.378 +    return domain_has_perm(current->domain, d, SECCLASS_DOMAIN, perm);
 116.379 +}
 116.380  #endif
 116.381  
 116.382 +static int io_has_perm(struct domain *d, char *name, unsigned long s, 
 116.383 +                                                    unsigned long e, u32 access)
 116.384 +{
 116.385 +    int rc = -EPERM;
 116.386 +
 116.387 +    if ( strcmp(name, "I/O Memory") == 0 )
 116.388 +    {
 116.389 +        rc = iomem_has_perm(d, s, access);
 116.390 +        if ( rc )
 116.391 +            return rc;
 116.392 +
 116.393 +        if ( s != e )
 116.394 +            rc = iomem_has_perm(d, s, access);
 116.395 +    }
 116.396 +    else if ( strcmp(name, "Interrupts") == 0 )
 116.397 +    {
 116.398 +        rc = irq_has_perm(d, s, access);
 116.399 +        if ( rc )
 116.400 +            return rc;
 116.401 +
 116.402 +        if ( s != e )
 116.403 +            rc = irq_has_perm(d, e, access);
 116.404 +    }
 116.405 +#ifdef CONFIG_X86
 116.406 +    else if ( strcmp(name, "I/O Ports") == 0 )
 116.407 +    {
 116.408 +        rc = ioport_has_perm(d, s, access);
 116.409 +        if ( rc )
 116.410 +            return rc;
 116.411 +
 116.412 +        if ( s != e )
 116.413 +            rc = ioport_has_perm(d, e, access);
 116.414 +    }
 116.415 +#endif
 116.416 +
 116.417 +    return rc;    
 116.418 +}
 116.419 +
 116.420 +static int flask_add_range(struct domain *d, char *name, unsigned long s,
 116.421 +                                                                    unsigned long e)
 116.422 +{
 116.423 +    return io_has_perm(d, name, s, e, 1);
 116.424 +}
 116.425 +
 116.426 +static int flask_remove_range(struct domain *d, char *name, unsigned long s,
 116.427 +                                                                    unsigned long e)
 116.428 +{
 116.429 +    return io_has_perm(d, name, s, e, 0);
 116.430 +}
 116.431 +
 116.432  long do_flask_op(XEN_GUEST_HANDLE(xsm_op_t) u_flask_op);
 116.433  
 116.434  static struct xsm_operations flask_ops = {
 116.435 @@ -1052,15 +1249,17 @@ static struct xsm_operations flask_ops =
 116.436      .getvcpucontext = flask_getvcpucontext,
 116.437      .getvcpuinfo = flask_getvcpuinfo,
 116.438      .domain_settime = flask_domain_settime,
 116.439 +    .set_target = flask_set_target,
 116.440      .tbufcontrol = flask_tbufcontrol,
 116.441      .readconsole = flask_readconsole,
 116.442      .sched_id = flask_sched_id,
 116.443      .setdomainmaxmem = flask_setdomainmaxmem,
 116.444      .setdomainhandle = flask_setdomainhandle,
 116.445      .setdebugging = flask_setdebugging,
 116.446 -    .irq_permission = flask_irq_permission,
 116.447 -    .iomem_permission = flask_iomem_permission,
 116.448      .perfcontrol = flask_perfcontrol,
 116.449 +    .debug_keys = flask_debug_keys,
 116.450 +    .getcpuinfo = flask_getcpuinfo,
 116.451 +    .availheap = flask_availheap,
 116.452  
 116.453      .evtchn_unbound = flask_evtchn_unbound,
 116.454      .evtchn_interdomain = flask_evtchn_interdomain,
 116.455 @@ -1093,11 +1292,13 @@ static struct xsm_operations flask_ops =
 116.456      .kexec = flask_kexec,
 116.457      .schedop_shutdown = flask_schedop_shutdown,
 116.458  
 116.459 +    .add_range = flask_add_range,
 116.460 +    .remove_range = flask_remove_range,
 116.461 +
 116.462      .__do_xsm_op = do_flask_op,
 116.463  
 116.464  #ifdef CONFIG_X86
 116.465      .shadow_control = flask_shadow_control,
 116.466 -    .ioport_permission = flask_ioport_permission,
 116.467      .getpageframeinfo = flask_getpageframeinfo,
 116.468      .getmemlist = flask_getmemlist,
 116.469      .hypercall_init = flask_hypercall_init,
 116.470 @@ -1114,6 +1315,10 @@ static struct xsm_operations flask_ops =
 116.471      .microcode = flask_microcode,
 116.472      .physinfo = flask_physinfo,
 116.473      .platform_quirk = flask_platform_quirk,
 116.474 +    .firmware_info = flask_firmware_info,
 116.475 +    .acpi_sleep = flask_acpi_sleep,
 116.476 +    .change_freq = flask_change_freq,
 116.477 +    .getidletime = flask_getidletime,
 116.478      .machine_memory_map = flask_machine_memory_map,
 116.479      .domain_memory_map = flask_domain_memory_map,
 116.480      .mmu_normal_update = flask_mmu_normal_update,
 116.481 @@ -1121,6 +1326,13 @@ static struct xsm_operations flask_ops =
 116.482      .update_va_mapping = flask_update_va_mapping,
 116.483      .add_to_physmap = flask_add_to_physmap,
 116.484      .remove_from_physmap = flask_remove_from_physmap,
 116.485 +    .sendtrigger = flask_sendtrigger,
 116.486 +    .test_assign_device = flask_test_assign_device,
 116.487 +    .assign_device = flask_assign_device,
 116.488 +    .deassign_device = flask_deassign_device,
 116.489 +    .bind_pt_irq = flask_bind_pt_irq,
 116.490 +    .pin_mem_cacheattr = flask_pin_mem_cacheattr,
 116.491 +    .ext_vcpucontext = flask_ext_vcpucontext,
 116.492  #endif
 116.493  };
 116.494  
   117.1 --- a/xen/xsm/flask/include/av_perm_to_string.h	Fri Sep 12 14:32:45 2008 +0900
   117.2 +++ b/xen/xsm/flask/include/av_perm_to_string.h	Fri Sep 12 14:47:40 2008 +0900
   117.3 @@ -17,11 +17,19 @@
   117.4     S_(SECCLASS_XEN, XEN__PRIVPROFILE, "privprofile")
   117.5     S_(SECCLASS_XEN, XEN__NONPRIVPROFILE, "nonprivprofile")
   117.6     S_(SECCLASS_XEN, XEN__KEXEC, "kexec")
   117.7 +   S_(SECCLASS_XEN, XEN__FIRMWARE, "firmware")
   117.8 +   S_(SECCLASS_XEN, XEN__SLEEP, "sleep")
   117.9 +   S_(SECCLASS_XEN, XEN__FREQUENCY, "frequency")
  117.10 +   S_(SECCLASS_XEN, XEN__GETIDLE, "getidle")
  117.11 +   S_(SECCLASS_XEN, XEN__DEBUG, "debug")
  117.12 +   S_(SECCLASS_XEN, XEN__GETCPUINFO, "getcpuinfo")
  117.13 +   S_(SECCLASS_XEN, XEN__HEAP, "heap")
  117.14     S_(SECCLASS_DOMAIN, DOMAIN__SETVCPUCONTEXT, "setvcpucontext")
  117.15     S_(SECCLASS_DOMAIN, DOMAIN__PAUSE, "pause")
  117.16     S_(SECCLASS_DOMAIN, DOMAIN__UNPAUSE, "unpause")
  117.17     S_(SECCLASS_DOMAIN, DOMAIN__RESUME, "resume")
  117.18     S_(SECCLASS_DOMAIN, DOMAIN__CREATE, "create")
  117.19 +   S_(SECCLASS_DOMAIN, DOMAIN__TRANSITION, "transition")
  117.20     S_(SECCLASS_DOMAIN, DOMAIN__MAX_VCPUS, "max_vcpus")
  117.21     S_(SECCLASS_DOMAIN, DOMAIN__DESTROY, "destroy")
  117.22     S_(SECCLASS_DOMAIN, DOMAIN__SETVCPUAFFINITY, "setvcpuaffinity")
  117.23 @@ -34,11 +42,14 @@
  117.24     S_(SECCLASS_DOMAIN, DOMAIN__SETDOMAINHANDLE, "setdomainhandle")
  117.25     S_(SECCLASS_DOMAIN, DOMAIN__SETDEBUGGING, "setdebugging")
  117.26     S_(SECCLASS_DOMAIN, DOMAIN__HYPERCALL, "hypercall")
  117.27 -   S_(SECCLASS_DOMAIN, DOMAIN__TRANSITION, "transition")
  117.28     S_(SECCLASS_DOMAIN, DOMAIN__SETTIME, "settime")
  117.29 +   S_(SECCLASS_DOMAIN, DOMAIN__SET_TARGET, "set_target")
  117.30     S_(SECCLASS_DOMAIN, DOMAIN__SHUTDOWN, "shutdown")
  117.31     S_(SECCLASS_DOMAIN, DOMAIN__SETADDRSIZE, "setaddrsize")
  117.32     S_(SECCLASS_DOMAIN, DOMAIN__GETADDRSIZE, "getaddrsize")
  117.33 +   S_(SECCLASS_DOMAIN, DOMAIN__TRIGGER, "trigger")
  117.34 +   S_(SECCLASS_DOMAIN, DOMAIN__GETEXTVCPUCONTEXT, "getextvcpucontext")
  117.35 +   S_(SECCLASS_DOMAIN, DOMAIN__SETEXTVCPUCONTEXT, "setextvcpucontext")
  117.36     S_(SECCLASS_HVM, HVM__SETHVMC, "sethvmc")
  117.37     S_(SECCLASS_HVM, HVM__GETHVMC, "gethvmc")
  117.38     S_(SECCLASS_HVM, HVM__SETPARAM, "setparam")
  117.39 @@ -46,14 +57,13 @@
  117.40     S_(SECCLASS_HVM, HVM__PCILEVEL, "pcilevel")
  117.41     S_(SECCLASS_HVM, HVM__IRQLEVEL, "irqlevel")
  117.42     S_(SECCLASS_HVM, HVM__PCIROUTE, "pciroute")
  117.43 +   S_(SECCLASS_HVM, HVM__BIND_IRQ, "bind_irq")
  117.44 +   S_(SECCLASS_HVM, HVM__CACHEATTR, "cacheattr")
  117.45     S_(SECCLASS_EVENT, EVENT__BIND, "bind")
  117.46 -   S_(SECCLASS_EVENT, EVENT__CLOSE, "close")
  117.47     S_(SECCLASS_EVENT, EVENT__SEND, "send")
  117.48     S_(SECCLASS_EVENT, EVENT__STATUS, "status")
  117.49 -   S_(SECCLASS_EVENT, EVENT__UNMASK, "unmask")
  117.50     S_(SECCLASS_EVENT, EVENT__NOTIFY, "notify")
  117.51     S_(SECCLASS_EVENT, EVENT__CREATE, "create")
  117.52 -   S_(SECCLASS_EVENT, EVENT__ALLOC, "alloc")
  117.53     S_(SECCLASS_EVENT, EVENT__VECTOR, "vector")
  117.54     S_(SECCLASS_EVENT, EVENT__RESET, "reset")
  117.55     S_(SECCLASS_GRANT, GRANT__MAP_READ, "map_read")
  117.56 @@ -87,6 +97,9 @@
  117.57     S_(SECCLASS_RESOURCE, RESOURCE__REMOVE_IOPORT, "remove_ioport")
  117.58     S_(SECCLASS_RESOURCE, RESOURCE__ADD_IOMEM, "add_iomem")
  117.59     S_(SECCLASS_RESOURCE, RESOURCE__REMOVE_IOMEM, "remove_iomem")
  117.60 +   S_(SECCLASS_RESOURCE, RESOURCE__STAT_DEVICE, "stat_device")
  117.61 +   S_(SECCLASS_RESOURCE, RESOURCE__ADD_DEVICE, "add_device")
  117.62 +   S_(SECCLASS_RESOURCE, RESOURCE__REMOVE_DEVICE, "remove_device")
  117.63     S_(SECCLASS_SECURITY, SECURITY__COMPUTE_AV, "compute_av")
  117.64     S_(SECCLASS_SECURITY, SECURITY__COMPUTE_CREATE, "compute_create")
  117.65     S_(SECCLASS_SECURITY, SECURITY__COMPUTE_MEMBER, "compute_member")
   118.1 --- a/xen/xsm/flask/include/av_permissions.h	Fri Sep 12 14:32:45 2008 +0900
   118.2 +++ b/xen/xsm/flask/include/av_permissions.h	Fri Sep 12 14:47:40 2008 +0900
   118.3 @@ -17,29 +17,40 @@
   118.4  #define XEN__PRIVPROFILE                          0x00008000UL
   118.5  #define XEN__NONPRIVPROFILE                       0x00010000UL
   118.6  #define XEN__KEXEC                                0x00020000UL
   118.7 +#define XEN__FIRMWARE                             0x00040000UL
   118.8 +#define XEN__SLEEP                                0x00080000UL
   118.9 +#define XEN__FREQUENCY                            0x00100000UL
  118.10 +#define XEN__GETIDLE                              0x00200000UL
  118.11 +#define XEN__DEBUG                                0x00400000UL
  118.12 +#define XEN__GETCPUINFO                           0x00800000UL
  118.13 +#define XEN__HEAP                                 0x01000000UL
  118.14  
  118.15  #define DOMAIN__SETVCPUCONTEXT                    0x00000001UL
  118.16  #define DOMAIN__PAUSE                             0x00000002UL
  118.17  #define DOMAIN__UNPAUSE                           0x00000004UL
  118.18  #define DOMAIN__RESUME                            0x00000008UL
  118.19  #define DOMAIN__CREATE                            0x00000010UL
  118.20 -#define DOMAIN__MAX_VCPUS                         0x00000020UL
  118.21 -#define DOMAIN__DESTROY                           0x00000040UL
  118.22 -#define DOMAIN__SETVCPUAFFINITY                   0x00000080UL
  118.23 -#define DOMAIN__GETVCPUAFFINITY                   0x00000100UL
  118.24 -#define DOMAIN__SCHEDULER                         0x00000200UL
  118.25 -#define DOMAIN__GETDOMAININFO                     0x00000400UL
  118.26 -#define DOMAIN__GETVCPUINFO                       0x00000800UL
  118.27 -#define DOMAIN__GETVCPUCONTEXT                    0x00001000UL
  118.28 -#define DOMAIN__SETDOMAINMAXMEM                   0x00002000UL
  118.29 -#define DOMAIN__SETDOMAINHANDLE                   0x00004000UL
  118.30 -#define DOMAIN__SETDEBUGGING                      0x00008000UL
  118.31 -#define DOMAIN__HYPERCALL                         0x00010000UL
  118.32 -#define DOMAIN__TRANSITION                        0x00020000UL
  118.33 +#define DOMAIN__TRANSITION                        0x00000020UL
  118.34 +#define DOMAIN__MAX_VCPUS                         0x00000040UL
  118.35 +#define DOMAIN__DESTROY                           0x00000080UL
  118.36 +#define DOMAIN__SETVCPUAFFINITY                   0x00000100UL
  118.37 +#define DOMAIN__GETVCPUAFFINITY                   0x00000200UL
  118.38 +#define DOMAIN__SCHEDULER                         0x00000400UL
  118.39 +#define DOMAIN__GETDOMAININFO                     0x00000800UL
  118.40 +#define DOMAIN__GETVCPUINFO                       0x00001000UL
  118.41 +#define DOMAIN__GETVCPUCONTEXT                    0x00002000UL
  118.42 +#define DOMAIN__SETDOMAINMAXMEM                   0x00004000UL
  118.43 +#define DOMAIN__SETDOMAINHANDLE                   0x00008000UL
  118.44 +#define DOMAIN__SETDEBUGGING                      0x00010000UL
  118.45 +#define DOMAIN__HYPERCALL                         0x00020000UL
  118.46  #define DOMAIN__SETTIME                           0x00040000UL
  118.47 -#define DOMAIN__SHUTDOWN                          0x00080000UL
  118.48 -#define DOMAIN__SETADDRSIZE                       0x00100000UL
  118.49 -#define DOMAIN__GETADDRSIZE                       0x00200000UL
  118.50 +#define DOMAIN__SET_TARGET                        0x00080000UL
  118.51 +#define DOMAIN__SHUTDOWN                          0x00100000UL
  118.52 +#define DOMAIN__SETADDRSIZE                       0x00200000UL
  118.53 +#define DOMAIN__GETADDRSIZE                       0x00400000UL
  118.54 +#define DOMAIN__TRIGGER                           0x00800000UL
  118.55 +#define DOMAIN__GETEXTVCPUCONTEXT                 0x01000000UL
  118.56 +#define DOMAIN__SETEXTVCPUCONTEXT                 0x02000000UL
  118.57  
  118.58  #define HVM__SETHVMC                              0x00000001UL
  118.59  #define HVM__GETHVMC                              0x00000002UL
  118.60 @@ -48,17 +59,16 @@
  118.61  #define HVM__PCILEVEL                             0x00000010UL
  118.62  #define HVM__IRQLEVEL                             0x00000020UL
  118.63  #define HVM__PCIROUTE                             0x00000040UL
  118.64 +#define HVM__BIND_IRQ                             0x00000080UL
  118.65 +#define HVM__CACHEATTR                            0x00000100UL
  118.66  
  118.67  #define EVENT__BIND                               0x00000001UL
  118.68 -#define EVENT__CLOSE                              0x00000002UL
  118.69 -#define EVENT__SEND                               0x00000004UL
  118.70 -#define EVENT__STATUS                             0x00000008UL
  118.71 -#define EVENT__UNMASK                             0x00000010UL
  118.72 -#define EVENT__NOTIFY                             0x00000020UL
  118.73 -#define EVENT__CREATE                             0x00000040UL
  118.74 -#define EVENT__ALLOC                              0x00000080UL
  118.75 -#define EVENT__VECTOR                             0x00000100UL
  118.76 -#define EVENT__RESET                              0x00000200UL
  118.77 +#define EVENT__SEND                               0x00000002UL
  118.78 +#define EVENT__STATUS                             0x00000004UL
  118.79 +#define EVENT__NOTIFY                             0x00000008UL
  118.80 +#define EVENT__CREATE                             0x00000010UL
  118.81 +#define EVENT__VECTOR                             0x00000020UL
  118.82 +#define EVENT__RESET                              0x00000040UL
  118.83  
  118.84  #define GRANT__MAP_READ                           0x00000001UL
  118.85  #define GRANT__MAP_WRITE                          0x00000002UL
  118.86 @@ -94,6 +104,9 @@
  118.87  #define RESOURCE__REMOVE_IOPORT                   0x00000040UL
  118.88  #define RESOURCE__ADD_IOMEM                       0x00000080UL
  118.89  #define RESOURCE__REMOVE_IOMEM                    0x00000100UL
  118.90 +#define RESOURCE__STAT_DEVICE                     0x00000200UL
  118.91 +#define RESOURCE__ADD_DEVICE                      0x00000400UL
  118.92 +#define RESOURCE__REMOVE_DEVICE                   0x00000800UL
  118.93  
  118.94  #define SECURITY__COMPUTE_AV                      0x00000001UL
  118.95  #define SECURITY__COMPUTE_CREATE                  0x00000002UL
   119.1 --- a/xen/xsm/flask/include/flask.h	Fri Sep 12 14:32:45 2008 +0900
   119.2 +++ b/xen/xsm/flask/include/flask.h	Fri Sep 12 14:47:40 2008 +0900
   119.3 @@ -1,6 +1,6 @@
   119.4  /* This file is automatically generated.  Do not edit. */
   119.5 -#ifndef _FLASK_FLASK_H_
   119.6 -#define _FLASK_FLASK_H_
   119.7 +#ifndef _SELINUX_FLASK_H_
   119.8 +#define _SELINUX_FLASK_H_
   119.9  
  119.10  /*
  119.11   * Security object class definitions
  119.12 @@ -27,10 +27,9 @@
  119.13  #define SECINITSID_SECURITY                             7
  119.14  #define SECINITSID_IOPORT                               8
  119.15  #define SECINITSID_IOMEM                                9
  119.16 -#define SECINITSID_VCPU                                 10
  119.17 -#define SECINITSID_VIRQ                                 11
  119.18 -#define SECINITSID_PIRQ                                 12
  119.19 +#define SECINITSID_PIRQ                                 10
  119.20 +#define SECINITSID_DEVICE                               11
  119.21  
  119.22 -#define SECINITSID_NUM                                  12
  119.23 +#define SECINITSID_NUM                                  11
  119.24  
  119.25  #endif
   120.1 --- a/xen/xsm/flask/include/initial_sid_to_string.h	Fri Sep 12 14:32:45 2008 +0900
   120.2 +++ b/xen/xsm/flask/include/initial_sid_to_string.h	Fri Sep 12 14:47:40 2008 +0900
   120.3 @@ -11,8 +11,7 @@ static char *initial_sid_to_string[] =
   120.4      "security",
   120.5      "ioport",
   120.6      "iomem",
   120.7 -    "vcpu",
   120.8 -    "virq",
   120.9      "pirq",
  120.10 +    "device",
  120.11  };
  120.12  
   121.1 --- a/xen/xsm/flask/include/security.h	Fri Sep 12 14:32:45 2008 +0900
   121.2 +++ b/xen/xsm/flask/include/security.h	Fri Sep 12 14:47:40 2008 +0900
   121.3 @@ -69,14 +69,12 @@ int security_get_user_sids(u32 callsid, 
   121.4  
   121.5  int security_pirq_sid(int pirq, u32 *out_sid);
   121.6  
   121.7 -int security_virq_sid(int virq, u32 *out_sid);
   121.8 -
   121.9 -int security_vcpu_sid(int vcpu, u32 *out_sid);
  121.10 -
  121.11  int security_iomem_sid(unsigned long, u32 *out_sid);
  121.12  
  121.13  int security_ioport_sid(u32 ioport, u32 *out_sid);
  121.14  
  121.15 +int security_device_sid(u32 device, u32 *out_sid);
  121.16 +
  121.17  int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid,
  121.18                                                                      u16 tclass);
  121.19  
   122.1 --- a/xen/xsm/flask/ss/policydb.h	Fri Sep 12 14:32:45 2008 +0900
   122.2 +++ b/xen/xsm/flask/ss/policydb.h	Fri Sep 12 14:47:40 2008 +0900
   122.3 @@ -133,10 +133,9 @@ struct ocontext {
   122.4      union {
   122.5          char *name;    /* name of initial SID, fs, netif, fstype, path */
   122.6          int pirq;
   122.7 -        int virq;
   122.8 -        int vcpu;
   122.9          u32 ioport;
  122.10          unsigned long iomem;
  122.11 +        u32 device;
  122.12      } u;
  122.13      struct context context[2];    /* security context(s) */
  122.14      u32 sid[2];    /* SID(s) */
  122.15 @@ -157,11 +156,11 @@ struct ocontext {
  122.16  /* object context array indices */
  122.17  #define OCON_ISID    0    /* initial SIDs */
  122.18  #define OCON_PIRQ    1    /* physical irqs */
  122.19 -#define OCON_VIRQ    2    /* virtual irqs */
  122.20 -#define OCON_VCPU    3    /* virtual cpus */
  122.21 -#define OCON_IOPORT  4    /* io ports */
  122.22 -#define OCON_IOMEM   5    /* io memory */
  122.23 -#define OCON_DUMMY   6
  122.24 +#define OCON_IOPORT  2    /* io ports */
  122.25 +#define OCON_IOMEM   3    /* io memory */
  122.26 +#define OCON_DEVICE  4    /* pci devices */
  122.27 +#define OCON_DUMMY1  5    /* reserved */
  122.28 +#define OCON_DUMMY2  6    /* reserved */
  122.29  #define OCON_NUM     7
  122.30  
  122.31  /* The policy database */
   123.1 --- a/xen/xsm/flask/ss/services.c	Fri Sep 12 14:32:45 2008 +0900
   123.2 +++ b/xen/xsm/flask/ss/services.c	Fri Sep 12 14:47:40 2008 +0900
   123.3 @@ -1418,6 +1418,46 @@ out:
   123.4      return rc;
   123.5  }
   123.6  
   123.7 +/**
   123.8 + * security_ioport_sid - Obtain the SID for an ioport.
   123.9 + * @ioport: ioport
  123.10 + * @out_sid: security identifier
  123.11 + */
  123.12 +int security_device_sid(u32 device, u32 *out_sid)
  123.13 +{
  123.14 +    struct ocontext *c;
  123.15 +    int rc = 0;
  123.16 +
  123.17 +    POLICY_RDLOCK;
  123.18 +
  123.19 +    c = policydb.ocontexts[OCON_DEVICE];
  123.20 +    while ( c )
  123.21 +    {
  123.