ia64/xen-unstable

changeset 18483:ec8eaab557d8

merge with xen-unstable.hg
author Isaku Yamahata <yamahata@valinux.co.jp>
date Fri Sep 12 14:47:40 2008 +0900 (2008-09-12)
parents 4ddd63b4be9b 346c073ed6a4
children 4a381ddc764a
files
line diff
     1.1 --- a/.hgignore	Fri Sep 12 14:32:45 2008 +0900
     1.2 +++ b/.hgignore	Fri Sep 12 14:47:40 2008 +0900
     1.3 @@ -185,7 +185,6 @@
     1.4  ^tools/misc/xenperf$
     1.5  ^tools/pygrub/build/.*$
     1.6  ^tools/python/build/.*$
     1.7 -^tools/python/xen/util/xsm/xsm\.py$
     1.8  ^tools/security/secpol_tool$
     1.9  ^tools/security/xen/.*$
    1.10  ^tools/security/xensec_tool$
     2.1 --- a/Config.mk	Fri Sep 12 14:32:45 2008 +0900
     2.2 +++ b/Config.mk	Fri Sep 12 14:47:40 2008 +0900
     2.3 @@ -86,11 +86,7 @@ QEMU_REMOTE=http://xenbits.xensource.com
     2.4  # Mercurial in-tree version, or a local directory, or a git URL.
     2.5  # CONFIG_QEMU   ?= ioemu
     2.6  # CONFIG_QEMU   ?= ../qemu-xen.git
     2.7 -ifeq ($(XEN_TARGET_ARCH),ia64)
     2.8 -CONFIG_QEMU   ?= ioemu
     2.9 -else
    2.10  CONFIG_QEMU   ?= $(QEMU_REMOTE)
    2.11 -endif
    2.12  
    2.13  # Optional components
    2.14  XENSTAT_XENTOP     ?= y
     3.1 --- a/docs/misc/vtd.txt	Fri Sep 12 14:32:45 2008 +0900
     3.2 +++ b/docs/misc/vtd.txt	Fri Sep 12 14:47:40 2008 +0900
     3.3 @@ -1,8 +1,9 @@
     3.4  Title   : How to do PCI Passthrough with VT-d
     3.5  Authors : Allen Kay    <allen.m.kay@intel.com>
     3.6            Weidong Han  <weidong.han@intel.com>
     3.7 +          Yuji Shimada <shimada-yxb@necst.nec.co.jp>
     3.8  Created : October-24-2007
     3.9 -Updated : August-06-2008
    3.10 +Updated : September-09-2008
    3.11  
    3.12  How to turn on VT-d in Xen
    3.13  --------------------------
    3.14 @@ -106,3 +107,27 @@ http://h10010.www1.hp.com/wwpc/us/en/en/
    3.15  
    3.16  For more information, pls refer to http://wiki.xensource.com/xenwiki/VTdHowTo.
    3.17  
    3.18 +
    3.19 +Assigning devices to HVM domains
    3.20 +--------------------------------
    3.21 +
    3.22 +Most device types such as NIC, HBA, EHCI and UHCI can be assigned to
    3.23 +an HVM domain.
    3.24 +
    3.25 +But some devices have design features which make them unsuitable for
    3.26 +assignment to an HVM domain. Examples include:
    3.27 +
    3.28 + * Device has an internal resource, such as private memory, which is
    3.29 +   mapped to memory address space with BAR (Base Address Register).
    3.30 + * Driver submits command with a pointer to a buffer within internal
    3.31 +   resource. Device decodes the pointer (address), and accesses to the
    3.32 +   buffer.
    3.33 +
    3.34 +In an HVM domain, the BAR is virtualized, and host-BAR value and
    3.35 +guest-BAR value are different. The addresses of internal resource from
    3.36 +device's view and driver's view are different. Similarly, the
    3.37 +addresses of buffer within internal resource from device's view and
    3.38 +driver's view are different. As a result, device can't access to the
    3.39 +buffer specified by driver.
    3.40 +
    3.41 +Such devices assigned to HVM domain currently do not work.
     4.1 --- a/docs/src/user.tex	Fri Sep 12 14:32:45 2008 +0900
     4.2 +++ b/docs/src/user.tex	Fri Sep 12 14:47:40 2008 +0900
     4.3 @@ -4252,7 +4252,7 @@ directory of the Xen source distribution
     4.4  \section{Online References}
     4.5  
     4.6  The official Xen web site can be found at:
     4.7 -\begin{quote} {\tt http://www.xensource.com}
     4.8 +\begin{quote} {\tt http://www.xen.org}
     4.9  \end{quote}
    4.10  
    4.11  This contains links to the latest versions of all online
    4.12 @@ -4282,7 +4282,7 @@ mailing lists and subscription informati
    4.13    Subscribe at: \\
    4.14    {\small {\tt http://lists.xensource.com/xen-announce}}
    4.15  \item[xen-changelog@lists.xensource.com] Changelog feed
    4.16 -  from the unstable and 2.0 trees - developer oriented.  Subscribe at: \\
    4.17 +  from the unstable and 3.x trees - developer oriented.  Subscribe at: \\
    4.18    {\small {\tt http://lists.xensource.com/xen-changelog}}
    4.19  \end{description}
    4.20  
     5.1 --- a/stubdom/README	Fri Sep 12 14:32:45 2008 +0900
     5.2 +++ b/stubdom/README	Fri Sep 12 14:47:40 2008 +0900
     5.3 @@ -27,7 +27,7 @@ device_model = '/usr/lib/xen/bin/stubdom
     5.4  - disable anything related to dom0, like pty serial assignments
     5.5  
     5.6  
     5.7 -Create /etc/xen/stubdom-hvmconfig (where "hvmconfig" is the name of your HVM
     5.8 +Create /etc/xen/hvmconfig-dm (where "hvmconfig" is the name of your HVM
     5.9  guest) with
    5.10  
    5.11  kernel = "/usr/lib/xen/boot/ioemu-stubdom.gz"
    5.12 @@ -52,7 +52,7 @@ There are three posibilities
    5.13  vnc = 0
    5.14  sdl = 0
    5.15  
    5.16 -  - In stubdom-hvmconfig, set an sdl vfb:
    5.17 +  - In hvmconfig-dm, set an sdl vfb:
    5.18  
    5.19  vfb = [ 'type=sdl' ]
    5.20  
    5.21 @@ -65,7 +65,7 @@ then you will not be able to connect to 
    5.22  vnc = 1
    5.23  vnclisten = "172.30.206.1"
    5.24  
    5.25 -  - In stubdom-hvmconfig, fill the reserved vif with the same IP, for instance:
    5.26 +  - In hvmconfig-dm, fill the reserved vif with the same IP, for instance:
    5.27  
    5.28  vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,mac=aa:00:00:12:23:34']
    5.29  
    5.30 @@ -76,7 +76,7 @@ vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,
    5.31  vnc = 0
    5.32  sdl = 0
    5.33  
    5.34 -  - In stubdom-hvmconfig, set a vnc vfb:
    5.35 +  - In hvmconfig-dm, set a vnc vfb:
    5.36  
    5.37  vfb = [ 'type=vnc' ]
    5.38  
     6.1 --- a/tools/examples/init.d/xendomains	Fri Sep 12 14:32:45 2008 +0900
     6.2 +++ b/tools/examples/init.d/xendomains	Fri Sep 12 14:47:40 2008 +0900
     6.3 @@ -327,15 +327,17 @@ stop()
     6.4  	if test $id = 0; then continue; fi
     6.5  	echo -n " $name"
     6.6  	if test "$XENDOMAINS_AUTO_ONLY" = "true"; then
     6.7 -	    case $name in
     6.8 +	    eval "
     6.9 +	    case \"\$name\" in
    6.10  		($NAMES)
    6.11  		    # nothing
    6.12  		    ;;
    6.13  		(*)
    6.14 -		    echo -n "(skip)"
    6.15 +		    echo -n '(skip)'
    6.16  		    continue
    6.17  		    ;;
    6.18  	    esac
    6.19 +	    "
    6.20  	fi
    6.21  	# XENDOMAINS_SYSRQ chould be something like just "s" 
    6.22  	# or "s e i u" or even "s e s i u o"
     7.1 --- a/tools/examples/xend-config.sxp	Fri Sep 12 14:32:45 2008 +0900
     7.2 +++ b/tools/examples/xend-config.sxp	Fri Sep 12 14:47:40 2008 +0900
     7.3 @@ -14,6 +14,10 @@
     7.4  #(logfile /var/log/xen/xend.log)
     7.5  #(loglevel DEBUG)
     7.6  
     7.7 +# Uncomment the line below.  Set the value to flask, acm, or dummy to 
     7.8 +# select a security module.
     7.9 +
    7.10 +#(xsm_module_name dummy)
    7.11  
    7.12  # The Xen-API server configuration.
    7.13  #
     8.1 --- a/tools/examples/xmexample.hvm	Fri Sep 12 14:32:45 2008 +0900
     8.2 +++ b/tools/examples/xmexample.hvm	Fri Sep 12 14:47:40 2008 +0900
     8.3 @@ -220,7 +220,7 @@ serial='pty'
     8.4  #   Configure guest CPUID responses:
     8.5  #
     8.6  #cpuid=[ '1:ecx=xxxxxxxxxxx00xxxxxxxxxxxxxxxxxxx,
     8.7 -#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
     8.8 +#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
     8.9  # - Unset the SSE4 features (CPUID.1[ECX][20-19])
    8.10  # - Default behaviour for all other bits in ECX And EAX registers.
    8.11  # 
     9.1 --- a/tools/examples/xmexample.hvm-stubdom	Fri Sep 12 14:32:45 2008 +0900
     9.2 +++ b/tools/examples/xmexample.hvm-stubdom	Fri Sep 12 14:47:40 2008 +0900
     9.3 @@ -236,7 +236,7 @@ stdvga=0
     9.4  #   Configure guest CPUID responses:
     9.5  #
     9.6  #cpuid=[ '1:ecx=xxxxxxxxxxx00xxxxxxxxxxxxxxxxxxx,
     9.7 -#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
     9.8 +#           eax=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' ]
     9.9  # - Unset the SSE4 features (CPUID.1[ECX][20-19])
    9.10  # - Default behaviour for all other bits in ECX And EAX registers.
    9.11  # 
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/tools/flask/policy/Makefile	Fri Sep 12 14:47:40 2008 +0900
    10.3 @@ -0,0 +1,234 @@
    10.4 +#
    10.5 +# Makefile for the security policy.
    10.6 +#
    10.7 +# Targets:
    10.8 +# 
    10.9 +# install       - compile and install the policy configuration, and context files.
   10.10 +# load          - compile, install, and load the policy configuration.
   10.11 +# reload        - compile, install, and load/reload the policy configuration.
   10.12 +# policy        - compile the policy configuration locally for testing/development.
   10.13 +#
   10.14 +# The default target is 'policy'.
   10.15 +#
   10.16 +
   10.17 +########################################
   10.18 +#
   10.19 +# Configurable portions of the Makefile
   10.20 +#
   10.21 +
   10.22 +# Policy version
   10.23 +# By default, checkpolicy will create the highest
   10.24 +# version policy it supports.  Setting this will
   10.25 +# override the version.
   10.26 +OUTPUT_POLICY = 20
   10.27 +
   10.28 +# Policy Type
   10.29 +# strict, targeted,
   10.30 +# strict-mls, targeted-mls,
   10.31 +# strict-mcs, targeted-mcs
   10.32 +TYPE = strict
   10.33 +
   10.34 +# Policy Name
   10.35 +# If set, this will be used as the policy
   10.36 +# name.  Otherwise the policy type will be
   10.37 +# used for the name.
   10.38 +NAME = xenrefpolicy
   10.39 +
   10.40 +# Distribution
   10.41 +# Some distributions have portions of policy
   10.42 +# for programs or configurations specific to the
   10.43 +# distribution.  Setting this will enable options
   10.44 +# for the distribution.
   10.45 +# redhat, gentoo, debian, and suse are current options.
   10.46 +# Fedora users should enable redhat.
   10.47 +#DISTRO = 
   10.48 +
   10.49 +# Build monolithic policy.  Putting n here
   10.50 +# will build a loadable module policy.
   10.51 +MONOLITHIC=y
   10.52 +
   10.53 +# Uncomment this to disable command echoing
   10.54 +#QUIET:=@
   10.55 +
   10.56 +########################################
   10.57 +#
   10.58 +# NO OPTIONS BELOW HERE
   10.59 +#
   10.60 +
   10.61 +# executable paths
   10.62 +PREFIX := /usr
   10.63 +BINDIR := $(PREFIX)/bin
   10.64 +SBINDIR := $(PREFIX)/sbin
   10.65 +CHECKPOLICY := $(BINDIR)/checkpolicy
   10.66 +CHECKMODULE := $(BINDIR)/checkmodule
   10.67 +SEMOD_PKG := $(BINDIR)/semodule_package
   10.68 +LOADPOLICY := $(SBINDIR)/flask-loadpolicy
   10.69 +
   10.70 +CFLAGS := -Wall
   10.71 +
   10.72 +# policy source layout
   10.73 +POLDIR := policy
   10.74 +MODDIR := $(POLDIR)/modules
   10.75 +FLASKDIR := $(POLDIR)/flask
   10.76 +SECCLASS := $(FLASKDIR)/security_classes
   10.77 +ISIDS := $(FLASKDIR)/initial_sids
   10.78 +AVS := $(FLASKDIR)/access_vectors
   10.79 +
   10.80 +#policy building support tools
   10.81 +SUPPORT := support
   10.82 +FCSORT := tmp/fc_sort
   10.83 +
   10.84 +# config file paths
   10.85 +GLOBALTUN := $(POLDIR)/global_tunables
   10.86 +GLOBALBOOL := $(POLDIR)/global_booleans
   10.87 +MOD_CONF := $(POLDIR)/modules.conf
   10.88 +TUNABLES := $(POLDIR)/tunables.conf
   10.89 +BOOLEANS := $(POLDIR)/booleans.conf
   10.90 +
   10.91 +# install paths
   10.92 +TOPDIR = $(DESTDIR)/etc/xen/
   10.93 +INSTALLDIR = $(TOPDIR)/$(NAME)
   10.94 +SRCPATH = $(INSTALLDIR)/src
   10.95 +USERPATH = $(INSTALLDIR)/users
   10.96 +CONTEXTPATH = $(INSTALLDIR)/contexts
   10.97 +
   10.98 +# enable MLS if requested.
   10.99 +ifneq ($(findstring -mls,$(TYPE)),)
  10.100 +	override M4PARAM += -D enable_mls
  10.101 +	CHECKPOLICY += -M
  10.102 +	CHECKMODULE += -M
  10.103 +endif
  10.104 +
  10.105 +# enable MLS if MCS requested.
  10.106 +ifneq ($(findstring -mcs,$(TYPE)),)
  10.107 +	override M4PARAM += -D enable_mcs
  10.108 +	CHECKPOLICY += -M
  10.109 +	CHECKMODULE += -M
  10.110 +endif
  10.111 +
  10.112 +# compile targeted policy if requested.
  10.113 +ifneq ($(findstring targeted,$(TYPE)),)
  10.114 +	override M4PARAM += -D targeted_policy
  10.115 +endif
  10.116 +
  10.117 +# enable distribution-specific policy
  10.118 +ifneq ($(DISTRO),)
  10.119 +	override M4PARAM += -D distro_$(DISTRO)
  10.120 +endif
  10.121 +
  10.122 +ifneq ($(OUTPUT_POLICY),)
  10.123 +	CHECKPOLICY += -c $(OUTPUT_POLICY)
  10.124 +endif
  10.125 +
  10.126 +ifeq ($(NAME),)
  10.127 +	NAME := $(TYPE)
  10.128 +endif
  10.129 +
  10.130 +# determine the policy version and current kernel version if possible
  10.131 +PV := $(shell $(CHECKPOLICY) -V |cut -f 1 -d ' ')
  10.132 +KV := $(shell cat /selinux/policyvers)
  10.133 +
  10.134 +# dont print version warnings if we are unable to determine
  10.135 +# the currently running kernel's policy version
  10.136 +ifeq ($(KV),)
  10.137 +	KV := $(PV)
  10.138 +endif
  10.139 +
  10.140 +FC := file_contexts
  10.141 +POLVER := policy.$(PV)
  10.142 +
  10.143 +M4SUPPORT = $(wildcard $(POLDIR)/support/*.spt)
  10.144 +
  10.145 +APPCONF := config/appconfig-$(TYPE)
  10.146 +APPDIR := $(CONTEXTPATH)
  10.147 +APPFILES := $(INSTALLDIR)/booleans
  10.148 +CONTEXTFILES += $(wildcard $(APPCONF)/*_context*) $(APPCONF)/media
  10.149 +USER_FILES := $(POLDIR)/systemuser $(POLDIR)/users
  10.150 +
  10.151 +ALL_LAYERS := $(filter-out $(MODDIR)/CVS,$(shell find $(wildcard $(MODDIR)/*) -maxdepth 0 -type d))
  10.152 +
  10.153 +GENERATED_TE := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard $(dir)/*.te.in)))
  10.154 +GENERATED_IF := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard $(dir)/*.if.in)))
  10.155 +GENERATED_FC := $(basename $(foreach dir,$(ALL_LAYERS),$(wildcard $(dir)/*.fc.in)))
  10.156 +
  10.157 +# sort here since it removes duplicates, which can happen
  10.158 +# when a generated file is already generated
  10.159 +DETECTED_MODS := $(sort $(foreach dir,$(ALL_LAYERS),$(wildcard $(dir)/*.te)) $(GENERATED_TE))
  10.160 +
  10.161 +# modules.conf setting for base module
  10.162 +MODBASE := base
  10.163 +
  10.164 +# modules.conf setting for module
  10.165 +MODMOD := module
  10.166 +
  10.167 +# extract settings from modules.conf
  10.168 +BASE_MODS := $(foreach mod,$(shell awk '/^[[:blank:]]*[[:alpha:]]/{ if ($$3 == "$(MODBASE)") print $$1 }' $(MOD_CONF) 2> /dev/null),$(subst ./,,$(shell find -iname $(mod).te)))
  10.169 +MOD_MODS := $(foreach mod,$(shell awk '/^[[:blank:]]*[[:alpha:]]/{ if ($$3 == "$(MODMOD)") print $$1 }' $(MOD_CONF) 2> /dev/null),$(subst ./,,$(shell find -iname $(mod).te)))
  10.170 +
  10.171 +HOMEDIR_TEMPLATE = tmp/homedir_template
  10.172 +
  10.173 +########################################
  10.174 +#
  10.175 +# Load appropriate rules
  10.176 +#
  10.177 +
  10.178 +ifeq ($(MONOLITHIC),y)
  10.179 +	include Rules.monolithic
  10.180 +else
  10.181 +	include Rules.modular
  10.182 +endif
  10.183 +
  10.184 +########################################
  10.185 +#
  10.186 +# Create config files
  10.187 +#
  10.188 +conf: $(MOD_CONF) $(BOOLEANS) $(GENERATED_TE) $(GENERATED_IF) $(GENERATED_FC)
  10.189 +
  10.190 +$(MOD_CONF) $(BOOLEANS): $(POLXML)
  10.191 +	@echo "Updating $(MOD_CONF) and $(BOOLEANS)"
  10.192 +	$(QUIET) cd $(DOCS) && ../$(GENDOC) -t ../$(BOOLEANS) -m ../$(MOD_CONF) -x ../$(POLXML)
  10.193 +
  10.194 +########################################
  10.195 +#
  10.196 +# Appconfig files
  10.197 +#
  10.198 +install-appconfig: $(APPFILES)
  10.199 +
  10.200 +$(INSTALLDIR)/booleans: $(BOOLEANS)
  10.201 +	@mkdir -p $(INSTALLDIR)
  10.202 +	$(QUIET) egrep '^[[:blank:]]*[[:alpha:]]' $(BOOLEANS) \
  10.203 +		| sed -e 's/false/0/g' -e 's/true/1/g' > tmp/booleans
  10.204 +	$(QUIET) install -m 644 tmp/booleans $@
  10.205 +
  10.206 +########################################
  10.207 +#
  10.208 +# Install policy sources
  10.209 +#
  10.210 +install-src:
  10.211 +	rm -rf $(SRCPATH)/policy.old
  10.212 +	-mv $(SRCPATH)/policy $(SRCPATH)/policy.old
  10.213 +	mkdir -p $(SRCPATH)/policy
  10.214 +	cp -R . $(SRCPATH)/policy
  10.215 +
  10.216 +########################################
  10.217 +#
  10.218 +# Clean everything
  10.219 +#
  10.220 +bare: clean
  10.221 +	rm -f $(POLXML)
  10.222 +	rm -f $(SUPPORT)/*.pyc
  10.223 +	rm -f $(FCSORT)
  10.224 +	rm -f $(MOD_CONF)
  10.225 +	rm -f $(BOOLEANS)
  10.226 +	rm -fR $(HTMLDIR)
  10.227 +ifneq ($(GENERATED_TE),)
  10.228 +	rm -f $(GENERATED_TE)
  10.229 +endif
  10.230 +ifneq ($(GENERATED_IF),)
  10.231 +	rm -f $(GENERATED_IF)
  10.232 +endif
  10.233 +ifneq ($(GENERATED_FC),)
  10.234 +	rm -f $(GENERATED_FC)
  10.235 +endif
  10.236 +
  10.237 +.PHONY: install-src install-appconfig conf html bare
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/tools/flask/policy/Rules.modular	Fri Sep 12 14:47:40 2008 +0900
    11.3 @@ -0,0 +1,166 @@
    11.4 +########################################
    11.5 +#
    11.6 +# Rules and Targets for building modular policies
    11.7 +#
    11.8 +
    11.9 +ALL_MODULES := $(filter $(BASE_MODS) $(MOD_MODS),$(DETECTED_MODS))
   11.10 +ALL_INTERFACES := $(ALL_MODULES:.te=.if)
   11.11 +
   11.12 +BASE_PKG := base.pp
   11.13 +BASE_FC := base.fc
   11.14 +
   11.15 +BASE_SECTIONS := tmp/pre_te_files.conf tmp/generated_definitions.conf tmp/all_interfaces.conf tmp/all_attrs_types.conf $(GLOBALBOOL) $(GLOBALTUN) tmp/only_te_rules.conf tmp/all_post.conf
   11.16 +
   11.17 +BASE_PRE_TE_FILES := $(SECCLASS) $(ISIDS) $(AVS) $(M4SUPPORT) $(POLDIR)/mls $(POLDIR)/mcs
   11.18 +BASE_TE_FILES := $(BASE_MODS)
   11.19 +BASE_POST_TE_FILES := $(POLDIR)/systemuser $(POLDIR)/constraints
   11.20 +BASE_FC_FILES := $(BASE_MODS:.te=.fc)
   11.21 +
   11.22 +MOD_MODULES := $(MOD_MODS:.te=.mod)
   11.23 +MOD_PKGS := $(notdir $(MOD_MODS:.te=.pp))
   11.24 +
   11.25 +# search layer dirs for source files
   11.26 +vpath %.te $(ALL_LAYERS)
   11.27 +vpath %.if $(ALL_LAYERS)
   11.28 +vpath %.fc $(ALL_LAYERS)
   11.29 +
   11.30 +########################################
   11.31 +#
   11.32 +# default action: create all module packages
   11.33 +#
   11.34 +default: base
   11.35 +
   11.36 +base: $(BASE_PKG)
   11.37 +
   11.38 +modules: $(MOD_PKGS)
   11.39 +
   11.40 +#policy: $(POLVER)
   11.41 +#install: $(LOADPATH) $(FCPATH) $(APPFILES) $(USERPATH)/local.users
   11.42 +#load: tmp/load
   11.43 +
   11.44 +########################################
   11.45 +#
   11.46 +# Create a base module package
   11.47 +#
   11.48 +$(BASE_PKG): tmp/base.mod $(BASE_FC)
   11.49 +	@echo "Creating $(NAME) base module package"
   11.50 +	$(QUIET) $(SEMOD_PKG) $@ $^
   11.51 +
   11.52 +########################################
   11.53 +#
   11.54 +# Compile a base module
   11.55 +#
   11.56 +tmp/base.mod: base.conf
   11.57 +	@echo "Compiling $(NAME) base module"
   11.58 +	$(QUIET) $(CHECKMODULE) $^ -o $@
   11.59 +
   11.60 +########################################
   11.61 +#
   11.62 +# Construct a base module policy.conf
   11.63 +#
   11.64 +base.conf: $(BASE_SECTIONS)
   11.65 +	@echo "Creating $(NAME) base module policy.conf"
   11.66 +# checkpolicy can use the #line directives provided by -s for error reporting:
   11.67 +	$(QUIET) m4 -D self_contained_policy $(M4PARAM) -s $^ > tmp/$@.tmp
   11.68 +	$(QUIET) sed -e /^portcon/d -e /^nodecon/d -e /^netifcon/d < tmp/$@.tmp > $@
   11.69 +# the ordering of these ocontexts matters:
   11.70 +	$(QUIET) grep ^portcon tmp/$@.tmp >> $@ || true
   11.71 +	$(QUIET) grep ^netifcon tmp/$@.tmp >> $@ || true
   11.72 +	$(QUIET) grep ^nodecon tmp/$@.tmp >> $@ || true
   11.73 +
   11.74 +tmp/pre_te_files.conf: $(BASE_PRE_TE_FILES)
   11.75 +	@test -d tmp || mkdir -p tmp
   11.76 +	$(QUIET) cat $^ > $@
   11.77 +
   11.78 +tmp/generated_definitions.conf: $(ALL_LAYERS) $(BASE_TE_FILES)
   11.79 +	@test -d tmp || mkdir -p tmp
   11.80 +# define all available object classes
   11.81 +	$(QUIET) $(GENPERM) $(AVS) $(SECCLASS) > $@
   11.82 +# per-userdomain templates
   11.83 +	$(QUIET) echo "define(\`per_userdomain_templates',\`" >> $@
   11.84 +	$(QUIET) for i in $(patsubst %.te,%,$(notdir $(ALL_MODULES))); do \
   11.85 +		echo "ifdef(\`""$$i""_per_userdomain_template',\`""$$i""_per_userdomain_template("'$$*'")')" \
   11.86 +			>> $@ ;\
   11.87 +	done
   11.88 +	$(QUIET) echo "')" >> $@
   11.89 +# define foo.te
   11.90 +	$(QUIET) for i in $(notdir $(BASE_TE_FILES)); do \
   11.91 +		echo "define(\`$$i')" >> $@ ;\
   11.92 +	done
   11.93 +	$(QUIET) $(SETTUN) $(BOOLEANS) >> $@
   11.94 +
   11.95 +tmp/all_interfaces.conf: $(M4SUPPORT) $(ALL_INTERFACES)
   11.96 +ifeq ($(ALL_INTERFACES),)
   11.97 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
   11.98 +endif
   11.99 +	@test -d tmp || mkdir -p tmp
  11.100 +	$(QUIET) m4 $^ | sed -e s/dollarsstar/\$$\*/g > $@
  11.101 +
  11.102 +tmp/all_te_files.conf: $(BASE_TE_FILES)
  11.103 +ifeq ($(BASE_TE_FILES),)
  11.104 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
  11.105 +endif
  11.106 +	@test -d tmp || mkdir -p tmp
  11.107 +	$(QUIET) cat $^ > $@
  11.108 +
  11.109 +tmp/post_te_files.conf: $(BASE_POST_TE_FILES)
  11.110 +	@test -d tmp || mkdir -p tmp
  11.111 +	$(QUIET) cat $^ > $@
  11.112 +
  11.113 +# extract attributes and put them first. extract post te stuff
  11.114 +# like genfscon and put last.  portcon, nodecon, and netifcon
  11.115 +# is delayed since they are generated by m4
  11.116 +tmp/all_attrs_types.conf tmp/only_te_rules.conf tmp/all_post.conf: tmp/all_te_files.conf tmp/post_te_files.conf
  11.117 +	$(QUIET) grep ^attribute tmp/all_te_files.conf > tmp/all_attrs_types.conf || true
  11.118 +	$(QUIET) grep '^type ' tmp/all_te_files.conf >> tmp/all_attrs_types.conf
  11.119 +	$(QUIET) cat tmp/post_te_files.conf > tmp/all_post.conf
  11.120 +	$(QUIET) grep '^sid ' tmp/all_te_files.conf >> tmp/all_post.conf || true
  11.121 +	$(QUIET) egrep '^fs_use_(xattr|task|trans)' tmp/all_te_files.conf >> tmp/all_post.conf || true
  11.122 +	$(QUIET) grep ^genfscon tmp/all_te_files.conf >> tmp/all_post.conf || true
  11.123 +	$(QUIET) sed -r -e /^attribute/d -e '/^type /d' -e /^genfscon/d \
  11.124 +			-e '/^sid /d' -e '/^fs_use_(xattr|task|trans)/d' \
  11.125 +			< tmp/all_te_files.conf > tmp/only_te_rules.conf
  11.126 +
  11.127 +########################################
  11.128 +#
  11.129 +# Construct base module file contexts
  11.130 +#
  11.131 +$(BASE_FC): $(M4SUPPORT) tmp/generated_definitions.conf $(BASE_FC_FILES) $(FCSORT)
  11.132 +ifeq ($(BASE_FC_FILES),)
  11.133 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
  11.134 +endif
  11.135 +	@echo "Creating $(NAME) base module file contexts."
  11.136 +	@test -d tmp || mkdir -p tmp
  11.137 +	$(QUIET) m4 $(M4PARAM) $(M4SUPPORT) tmp/generated_definitions.conf $(BASE_FC_FILES) > tmp/$@.tmp
  11.138 +	$(QUIET) grep -e HOME -e ROLE tmp/$@.tmp > $(HOMEDIR_TEMPLATE)
  11.139 +	$(QUIET) sed -i -e /HOME/d -e /ROLE/d tmp/$@.tmp
  11.140 +	$(QUIET) $(FCSORT) tmp/$@.tmp $@
  11.141 +
  11.142 +########################################
  11.143 +#
  11.144 +# Build module packages
  11.145 +#
  11.146 +tmp/%.mod: $(M4SUPPORT) tmp/generated_definitions.conf tmp/all_interfaces.conf %.te
  11.147 +	@if test -z "$(filter $^,$(MOD_MODS))"; then \
  11.148 +		echo "The $(notdir $(basename $@)) module is not configured to be compiled as a lodable module." ;\
  11.149 +		false ;\
  11.150 +	fi
  11.151 +	@echo "Compliling $(NAME) $(@F) module"
  11.152 +	$(QUIET) m4 $(M4PARAM) -s $^ > $(@:.mod=.tmp)
  11.153 +	$(QUIET) $(CHECKMODULE) -m $(@:.mod=.tmp) -o $@
  11.154 +
  11.155 +%.pp: tmp/%.mod %.fc
  11.156 +	@echo "Creating $(NAME) $(@F) policy package"
  11.157 +	$(QUIET) $(SEMOD_PKG) $@ $^
  11.158 +
  11.159 +########################################
  11.160 +#
  11.161 +# Clean the sources
  11.162 +#
  11.163 +clean:
  11.164 +	rm -fR tmp
  11.165 +	rm -f base.conf
  11.166 +	rm -f *.pp
  11.167 +	rm -f $(BASE_FC)
  11.168 +
  11.169 +.PHONY: default base modules clean
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/tools/flask/policy/Rules.monolithic	Fri Sep 12 14:47:40 2008 +0900
    12.3 @@ -0,0 +1,196 @@
    12.4 +########################################
    12.5 +#
    12.6 +# Rules and Targets for building monolithic policies
    12.7 +#
    12.8 +
    12.9 +# install paths
   12.10 +POLICYPATH = $(INSTALLDIR)/policy
   12.11 +LOADPATH = $(POLICYPATH)/$(POLVER)
   12.12 +FCPATH = $(CONTEXTPATH)/files/file_contexts
   12.13 +HOMEDIRPATH = $(CONTEXTPATH)/files/homedir_template
   12.14 +
   12.15 +# for monolithic policy use all base and module to create policy
   12.16 +ENABLEMOD := $(BASE_MODS) $(MOD_MODS)
   12.17 +
   12.18 +ALL_MODULES := $(filter $(ENABLEMOD),$(DETECTED_MODS))
   12.19 +
   12.20 +ALL_INTERFACES := $(ALL_MODULES:.te=.if)
   12.21 +ALL_TE_FILES := $(ALL_MODULES)
   12.22 +ALL_FC_FILES := $(ALL_MODULES:.te=.fc)
   12.23 +
   12.24 +PRE_TE_FILES := $(SECCLASS) $(ISIDS) $(AVS) $(M4SUPPORT) $(POLDIR)/mls $(POLDIR)/mcs
   12.25 +POST_TE_FILES := $(POLDIR)/systemuser $(POLDIR)/users $(POLDIR)/constraints
   12.26 +
   12.27 +POLICY_SECTIONS := tmp/pre_te_files.conf tmp/generated_definitions.conf tmp/all_interfaces.conf tmp/all_attrs_types.conf $(GLOBALBOOL) $(GLOBALTUN) tmp/only_te_rules.conf tmp/all_post.conf
   12.28 +
   12.29 +########################################
   12.30 +#
   12.31 +# default action: build policy locally
   12.32 +#
   12.33 +default: policy
   12.34 +
   12.35 +policy: $(POLVER)
   12.36 +
   12.37 +install: $(LOADPATH) $(FCPATH) $(APPFILES) $(USERPATH)/local.users
   12.38 +
   12.39 +load: tmp/load
   12.40 +
   12.41 +########################################
   12.42 +#
   12.43 +# Build a binary policy locally
   12.44 +#
   12.45 +$(POLVER): policy.conf
   12.46 +	@echo "Compiling $(NAME) $(POLVER)"
   12.47 +ifneq ($(PV),$(KV))
   12.48 +	@echo
   12.49 +	@echo "WARNING: Policy version mismatch!  Is your OUTPUT_POLICY set correctly?"
   12.50 +	@echo
   12.51 +endif
   12.52 +	$(QUIET) $(CHECKPOLICY) $^ -o $@
   12.53 +
   12.54 +########################################
   12.55 +#
   12.56 +# Install a binary policy
   12.57 +#
   12.58 +$(LOADPATH): policy.conf
   12.59 +	@mkdir -p $(POLICYPATH)
   12.60 +	@echo "Compiling and installing $(NAME) $(LOADPATH)"
   12.61 +ifneq ($(PV),$(KV))
   12.62 +	@echo
   12.63 +	@echo "WARNING: Policy version mismatch!  Is your OUTPUT_POLICY set correctly?"
   12.64 +	@echo
   12.65 +endif
   12.66 +	$(QUIET) $(CHECKPOLICY) $^ -o $@
   12.67 +
   12.68 +########################################
   12.69 +#
   12.70 +# Load the binary policy
   12.71 +#
   12.72 +reload tmp/load: $(LOADPATH) $(FCPATH)
   12.73 +	@echo "Loading $(NAME) $(LOADPATH)"
   12.74 +	$(QUIET) $(LOADPOLICY) -q $(LOADPATH)
   12.75 +	@touch tmp/load
   12.76 +
   12.77 +########################################
   12.78 +#
   12.79 +# Construct a monolithic policy.conf
   12.80 +#
   12.81 +policy.conf: $(POLICY_SECTIONS)
   12.82 +	@echo "Creating $(NAME) policy.conf"
   12.83 +# checkpolicy can use the #line directives provided by -s for error reporting:
   12.84 +	$(QUIET) m4 -D self_contained_policy $(M4PARAM) -s $^ > tmp/$@.tmp
   12.85 +	$(QUIET) sed -e /^portcon/d -e /^nodecon/d -e /^netifcon/d < tmp/$@.tmp > $@
   12.86 +
   12.87 +tmp/pre_te_files.conf: $(PRE_TE_FILES)
   12.88 +	@test -d tmp || mkdir -p tmp
   12.89 +	$(QUIET) cat $^ > $@
   12.90 +
   12.91 +tmp/generated_definitions.conf: $(ALL_LAYERS) $(ALL_TE_FILES)
   12.92 +# per-userdomain templates:
   12.93 +	@test -d tmp || mkdir -p tmp
   12.94 +	$(QUIET) echo "define(\`per_userdomain_templates',\`" > $@
   12.95 +	$(QUIET) for i in $(patsubst %.te,%,$(notdir $(ALL_MODULES))); do \
   12.96 +		echo "ifdef(\`""$$i""_per_userdomain_template',\`""$$i""_per_userdomain_template("'$$*'")')" \
   12.97 +			>> $@ ;\
   12.98 +	done
   12.99 +	$(QUIET) echo "')" >> $@
  12.100 +# define foo.te
  12.101 +	$(QUIET) for i in $(notdir $(ALL_MODULES)); do \
  12.102 +		echo "define(\`$$i')" >> $@ ;\
  12.103 +	done
  12.104 +#	$(QUIET) $(SETTUN) $(BOOLEANS) >> $@
  12.105 +
  12.106 +tmp/all_interfaces.conf: $(M4SUPPORT) $(ALL_INTERFACES)
  12.107 +ifeq ($(ALL_INTERFACES),)
  12.108 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
  12.109 +endif
  12.110 +	@test -d tmp || mkdir -p tmp
  12.111 +	$(QUIET) m4 $^ | sed -e s/dollarsstar/\$$\*/g > $@
  12.112 +
  12.113 +tmp/all_te_files.conf: $(ALL_TE_FILES)
  12.114 +ifeq ($(ALL_TE_FILES),)
  12.115 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
  12.116 +endif
  12.117 +	@test -d tmp || mkdir -p tmp
  12.118 +	$(QUIET) cat $^ > $@
  12.119 +
  12.120 +tmp/post_te_files.conf: $(POST_TE_FILES)
  12.121 +	@test -d tmp || mkdir -p tmp
  12.122 +	$(QUIET) cat $^ > $@
  12.123 +
  12.124 +# extract attributes and put them first. extract post te stuff
  12.125 +# like genfscon and put last.  portcon, nodecon, and netifcon
  12.126 +# is delayed since they are generated by m4
  12.127 +tmp/all_attrs_types.conf tmp/only_te_rules.conf tmp/all_post.conf: tmp/all_te_files.conf tmp/post_te_files.conf
  12.128 +	$(QUIET) grep ^attribute tmp/all_te_files.conf > tmp/all_attrs_types.conf || true
  12.129 +	$(QUIET) grep '^type ' tmp/all_te_files.conf >> tmp/all_attrs_types.conf
  12.130 +	$(QUIET) cat tmp/post_te_files.conf > tmp/all_post.conf
  12.131 +	$(QUIET) grep '^sid ' tmp/all_te_files.conf >> tmp/all_post.conf || true
  12.132 +	$(QUIET) egrep '^fs_use_(xattr|task|trans)' tmp/all_te_files.conf >> tmp/all_post.conf || true
  12.133 +	$(QUIET) grep ^genfscon tmp/all_te_files.conf >> tmp/all_post.conf || true
  12.134 +	$(QUIET) sed -r -e /^attribute/d -e '/^type /d' -e /^genfscon/d \
  12.135 +			-e '/^sid /d' -e '/^fs_use_(xattr|task|trans)/d' \
  12.136 +			< tmp/all_te_files.conf > tmp/only_te_rules.conf
  12.137 +
  12.138 +########################################
  12.139 +#
  12.140 +# Remove the dontaudit rules from the policy.conf
  12.141 +#
  12.142 +enableaudit: policy.conf
  12.143 +	@test -d tmp || mkdir -p tmp
  12.144 +	@echo "Removing dontaudit rules from policy.conf"
  12.145 +	$(QUIET) grep -v dontaudit policy.conf > tmp/policy.audit
  12.146 +	$(QUIET) mv tmp/policy.audit policy.conf
  12.147 +
  12.148 +########################################
  12.149 +#
  12.150 +# Construct file_contexts
  12.151 +#
  12.152 +$(FC): $(M4SUPPORT) tmp/generated_definitions.conf $(ALL_FC_FILES)
  12.153 +ifeq ($(ALL_FC_FILES),)
  12.154 +	$(error No enabled modules! $(notdir $(MOD_CONF)) may need to be generated by using "make conf")
  12.155 +endif
  12.156 +	@echo "Creating $(NAME) file_contexts."
  12.157 +	@test -d tmp || mkdir -p tmp
  12.158 +	$(QUIET) m4 $(M4PARAM) $(M4SUPPORT) tmp/generated_definitions.conf $(ALL_FC_FILES) > tmp/$@.tmp
  12.159 +#	$(QUIET) grep -e HOME -e ROLE tmp/$@.tmp > $(HOMEDIR_TEMPLATE)
  12.160 +#	$(QUIET) sed -i -e /HOME/d -e /ROLE/d tmp/$@.tmp
  12.161 +#	$(QUIET) $(FCSORT) tmp/$@.tmp $@
  12.162 +	$(QUIET) touch $(HOMEDIR_TEMPLATE)
  12.163 +	$(QUIET) touch $@
  12.164 +
  12.165 +########################################
  12.166 +#
  12.167 +# Install file_contexts
  12.168 +#
  12.169 +$(FCPATH): $(FC) $(LOADPATH) $(USERPATH)/system.users
  12.170 +	@echo "Validating $(NAME) file_contexts."
  12.171 +#	$(QUIET) $(SETFILES) -q -c $(LOADPATH) $(FC)
  12.172 +	@echo "Installing file_contexts."
  12.173 +	@mkdir -p $(CONTEXTPATH)/files
  12.174 +	$(QUIET) install -m 644 $(FC) $(FCPATH)
  12.175 +	$(QUIET) install -m 644 $(HOMEDIR_TEMPLATE) $(HOMEDIRPATH)
  12.176 +#	$(QUIET) $(GENHOMEDIRCON) -d $(TOPDIR) -t $(NAME) $(USEPWD)
  12.177 +
  12.178 +########################################
  12.179 +#
  12.180 +# Run policy source checks
  12.181 +#
  12.182 +check: policy.conf $(FC)
  12.183 +	$(SECHECK) -s --profile=development --policy=policy.conf --fcfile=$(FC) > $@.res
  12.184 +
  12.185 +longcheck: policy.conf $(FC)
  12.186 +	$(SECHECK) -s --profile=all --policy=policy.conf --fcfile=$(FC) > $@.res
  12.187 +
  12.188 +########################################
  12.189 +#
  12.190 +# Clean the sources
  12.191 +#
  12.192 +clean:
  12.193 +	rm -fR tmp
  12.194 +	rm -f policy.conf
  12.195 +	rm -f policy.$(PV)
  12.196 +	rm -f $(FC)
  12.197 +	rm -f *.res
  12.198 +
  12.199 +.PHONY: default policy install load reload enableaudit checklabels restorelabels relabel check longcheck clean
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/tools/flask/policy/policy/constraints	Fri Sep 12 14:47:40 2008 +0900
    13.3 @@ -0,0 +1,27 @@
    13.4 +
    13.5 +#
    13.6 +# Define the constraints
    13.7 +#
    13.8 +# constrain class_set perm_set expression ;
    13.9 +#
   13.10 +# expression : ( expression ) 
   13.11 +#	     | not expression
   13.12 +#	     | expression and expression
   13.13 +#	     | expression or expression
   13.14 +#	     | u1 op u2
   13.15 +#	     | r1 role_op r2
   13.16 +#	     | t1 op t2
   13.17 +#	     | u1 op names
   13.18 +#	     | u2 op names
   13.19 +#	     | r1 op names
   13.20 +#	     | r2 op names
   13.21 +#	     | t1 op names
   13.22 +#	     | t2 op names
   13.23 +#
   13.24 +# op : == | != 
   13.25 +# role_op : == | != | eq | dom | domby | incomp
   13.26 +#
   13.27 +# names : name | { name_list }
   13.28 +# name_list : name | name_list name		
   13.29 +#
   13.30 +
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/tools/flask/policy/policy/flask/Makefile	Fri Sep 12 14:47:40 2008 +0900
    14.3 @@ -0,0 +1,41 @@
    14.4 +# flask needs to know where to export the libselinux headers.
    14.5 +LIBSEL ?= ../../libselinux
    14.6 +
    14.7 +# flask needs to know where to export the kernel headers.
    14.8 +LINUXDIR ?= ../../../linux-2.6
    14.9 +
   14.10 +AWK = awk
   14.11 +
   14.12 +CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
   14.13 +          else if [ -x /bin/bash ]; then echo /bin/bash; \
   14.14 +          else echo sh; fi ; fi)
   14.15 +
   14.16 +FLASK_H_DEPEND = security_classes initial_sids
   14.17 +AV_H_DEPEND = access_vectors
   14.18 +
   14.19 +FLASK_H_FILES = class_to_string.h flask.h initial_sid_to_string.h
   14.20 +AV_H_FILES = av_inherit.h common_perm_to_string.h av_perm_to_string.h av_permissions.h
   14.21 +ALL_H_FILES = $(FLASK_H_FILES) $(AV_H_FILES)
   14.22 +
   14.23 +all:  $(ALL_H_FILES)
   14.24 +
   14.25 +$(FLASK_H_FILES): $(FLASK_H_DEPEND)
   14.26 +	$(CONFIG_SHELL) mkflask.sh $(AWK) $(FLASK_H_DEPEND)
   14.27 +
   14.28 +$(AV_H_FILES): $(AV_H_DEPEND)
   14.29 +	$(CONFIG_SHELL) mkaccess_vector.sh $(AWK) $(AV_H_DEPEND)
   14.30 +
   14.31 +tolib: all
   14.32 +	install -m 644 flask.h av_permissions.h $(LIBSEL)/include/selinux
   14.33 +	install -m 644 class_to_string.h av_inherit.h common_perm_to_string.h av_perm_to_string.h $(LIBSEL)/src
   14.34 +
   14.35 +tokern: all
   14.36 +	install -m 644 $(ALL_H_FILES) $(LINUXDIR)/security/selinux/include
   14.37 +
   14.38 +install: all
   14.39 +
   14.40 +relabel:
   14.41 +
   14.42 +clean:  
   14.43 +	rm -f $(FLASK_H_FILES)
   14.44 +	rm -f $(AV_H_FILES)
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/tools/flask/policy/policy/flask/access_vectors	Fri Sep 12 14:47:40 2008 +0900
    15.3 @@ -0,0 +1,166 @@
    15.4 +#
    15.5 +# Define common prefixes for access vectors
    15.6 +#
    15.7 +# common common_name { permission_name ... }
    15.8 +
    15.9 +#
   15.10 +# Define a common prefix for file access vectors.
   15.11 +#
   15.12 +
   15.13 +
   15.14 +#
   15.15 +# Define the access vectors.
   15.16 +#
   15.17 +# class class_name [ inherits common_name ] { permission_name ... }
   15.18 +
   15.19 +
   15.20 +#
   15.21 +# Define the access vector interpretation for file-related objects.
   15.22 +#
   15.23 +
   15.24 +class xen
   15.25 +{
   15.26 +	scheduler
   15.27 +	settime
   15.28 +	tbufcontrol
   15.29 +	readconsole
   15.30 +	clearconsole
   15.31 +	perfcontrol
   15.32 +	mtrr_add
   15.33 +	mtrr_del
   15.34 +	mtrr_read
   15.35 +	microcode
   15.36 +	physinfo
   15.37 +	quirk
   15.38 +    writeconsole
   15.39 +    readapic
   15.40 +    writeapic
   15.41 +    privprofile
   15.42 +    nonprivprofile
   15.43 +    kexec
   15.44 +	firmware
   15.45 +	sleep
   15.46 +	frequency
   15.47 +	getidle
   15.48 +	debug
   15.49 +	getcpuinfo
   15.50 +	heap
   15.51 +}
   15.52 +
   15.53 +class domain
   15.54 +{
   15.55 +	setvcpucontext
   15.56 +	pause
   15.57 +	unpause
   15.58 +    resume
   15.59 +    create
   15.60 +    transition
   15.61 +    max_vcpus
   15.62 +    destroy
   15.63 +    setvcpuaffinity
   15.64 +	getvcpuaffinity
   15.65 +	scheduler
   15.66 +	getdomaininfo
   15.67 +	getvcpuinfo
   15.68 +	getvcpucontext
   15.69 +	setdomainmaxmem
   15.70 +	setdomainhandle
   15.71 +	setdebugging
   15.72 +	hypercall
   15.73 +    settime
   15.74 +    set_target
   15.75 +    shutdown
   15.76 +    setaddrsize
   15.77 +    getaddrsize
   15.78 +	trigger
   15.79 +	getextvcpucontext
   15.80 +	setextvcpucontext
   15.81 +}
   15.82 +
   15.83 +class hvm
   15.84 +{
   15.85 +    sethvmc
   15.86 +    gethvmc
   15.87 +    setparam
   15.88 +    getparam
   15.89 +    pcilevel
   15.90 +    irqlevel
   15.91 +    pciroute
   15.92 +	bind_irq
   15.93 +	cacheattr
   15.94 +}
   15.95 +
   15.96 +class event
   15.97 +{
   15.98 +	bind
   15.99 +	send
  15.100 +	status
  15.101 +	notify
  15.102 +	create
  15.103 +    vector
  15.104 +    reset
  15.105 +}
  15.106 +
  15.107 +class grant
  15.108 +{
  15.109 +	map_read
  15.110 +	map_write
  15.111 +	unmap
  15.112 +	transfer
  15.113 +	setup
  15.114 +    copy
  15.115 +    query
  15.116 +}
  15.117 +
  15.118 +class mmu
  15.119 +{
  15.120 +	map_read
  15.121 +	map_write
  15.122 +	pageinfo
  15.123 +	pagelist
  15.124 +    adjust
  15.125 +    stat
  15.126 +    translategp
  15.127 +	updatemp
  15.128 +    physmap
  15.129 +    pinpage
  15.130 +    mfnlist
  15.131 +    memorymap
  15.132 +}
  15.133 +
  15.134 +class shadow
  15.135 +{
  15.136 +	disable
  15.137 +	enable
  15.138 +    logdirty
  15.139 +}
  15.140 +
  15.141 +class resource
  15.142 +{
  15.143 +	add
  15.144 +	remove
  15.145 +	use
  15.146 +	add_irq
  15.147 +	remove_irq
  15.148 +	add_ioport
  15.149 +	remove_ioport
  15.150 +	add_iomem
  15.151 +	remove_iomem
  15.152 +	stat_device
  15.153 +	add_device
  15.154 +	remove_device
  15.155 +}
  15.156 +
  15.157 +class security
  15.158 +{
  15.159 +	compute_av
  15.160 +	compute_create
  15.161 +	compute_member
  15.162 +	check_context
  15.163 +	load_policy
  15.164 +	compute_relabel
  15.165 +	compute_user
  15.166 +	setenforce
  15.167 +	setbool
  15.168 +	setsecparam
  15.169 +}
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/tools/flask/policy/policy/flask/initial_sids	Fri Sep 12 14:47:40 2008 +0900
    16.3 @@ -0,0 +1,17 @@
    16.4 +# FLASK
    16.5 +
    16.6 +#
    16.7 +# Define initial security identifiers 
    16.8 +#
    16.9 +sid xen
   16.10 +sid dom0
   16.11 +sid domU
   16.12 +sid domio
   16.13 +sid domxen
   16.14 +sid unlabeled
   16.15 +sid security
   16.16 +sid ioport
   16.17 +sid iomem
   16.18 +sid pirq
   16.19 +sid device
   16.20 +# FLASK
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/tools/flask/policy/policy/flask/mkaccess_vector.sh	Fri Sep 12 14:47:40 2008 +0900
    17.3 @@ -0,0 +1,227 @@
    17.4 +#!/bin/sh -
    17.5 +#
    17.6 +
    17.7 +# FLASK
    17.8 +
    17.9 +set -e
   17.10 +
   17.11 +awk=$1
   17.12 +shift
   17.13 +
   17.14 +# output files
   17.15 +av_permissions="av_permissions.h"
   17.16 +av_inherit="av_inherit.h"
   17.17 +common_perm_to_string="common_perm_to_string.h"
   17.18 +av_perm_to_string="av_perm_to_string.h"
   17.19 +
   17.20 +cat $* | $awk "
   17.21 +BEGIN	{
   17.22 +		outfile = \"$av_permissions\"
   17.23 +		inheritfile = \"$av_inherit\"
   17.24 +		cpermfile = \"$common_perm_to_string\"
   17.25 +		avpermfile = \"$av_perm_to_string\"
   17.26 +		"'
   17.27 +		nextstate = "COMMON_OR_AV";
   17.28 +		printf("/* This file is automatically generated.  Do not edit. */\n") > outfile;
   17.29 +		printf("/* This file is automatically generated.  Do not edit. */\n") > inheritfile;
   17.30 +		printf("/* This file is automatically generated.  Do not edit. */\n") > cpermfile;
   17.31 +		printf("/* This file is automatically generated.  Do not edit. */\n") > avpermfile;
   17.32 +;
   17.33 +	}
   17.34 +/^[ \t]*#/	{ 
   17.35 +			next;
   17.36 +		}
   17.37 +$1 == "common"	{ 
   17.38 +			if (nextstate != "COMMON_OR_AV")
   17.39 +			{
   17.40 +				printf("Parse error:  Unexpected COMMON definition on line %d\n", NR);
   17.41 +				next;	
   17.42 +			}
   17.43 +
   17.44 +			if ($2 in common_defined)
   17.45 +			{
   17.46 +				printf("Duplicate COMMON definition for %s on line %d.\n", $2, NR);
   17.47 +				next;
   17.48 +			}	
   17.49 +			common_defined[$2] = 1;
   17.50 +
   17.51 +			tclass = $2;
   17.52 +			common_name = $2; 
   17.53 +			permission = 1;
   17.54 +
   17.55 +			printf("TB_(common_%s_perm_to_string)\n", $2) > cpermfile;
   17.56 +
   17.57 +			nextstate = "COMMON-OPENBRACKET";
   17.58 +			next;
   17.59 +		}
   17.60 +$1 == "class"	{
   17.61 +			if (nextstate != "COMMON_OR_AV" &&
   17.62 +			    nextstate != "CLASS_OR_CLASS-OPENBRACKET")
   17.63 +			{
   17.64 +				printf("Parse error:  Unexpected class definition on line %d\n", NR);
   17.65 +				next;	
   17.66 +			}
   17.67 +
   17.68 +			tclass = $2;
   17.69 +
   17.70 +			if (tclass in av_defined)
   17.71 +			{
   17.72 +				printf("Duplicate access vector definition for %s on line %d\n", tclass, NR);
   17.73 +				next;
   17.74 +			} 
   17.75 +			av_defined[tclass] = 1;
   17.76 +
   17.77 +			inherits = "";
   17.78 +			permission = 1;
   17.79 +
   17.80 +			nextstate = "INHERITS_OR_CLASS-OPENBRACKET";
   17.81 +			next;
   17.82 +		}
   17.83 +$1 == "inherits" {			
   17.84 +			if (nextstate != "INHERITS_OR_CLASS-OPENBRACKET")
   17.85 +			{
   17.86 +				printf("Parse error:  Unexpected INHERITS definition on line %d\n", NR);
   17.87 +				next;	
   17.88 +			}
   17.89 +
   17.90 +			if (!($2 in common_defined))
   17.91 +			{
   17.92 +				printf("COMMON %s is not defined (line %d).\n", $2, NR);
   17.93 +				next;
   17.94 +			}
   17.95 +
   17.96 +			inherits = $2;
   17.97 +			permission = common_base[$2];
   17.98 +
   17.99 +			for (combined in common_perms)
  17.100 +			{
  17.101 +				split(combined,separate, SUBSEP);
  17.102 +				if (separate[1] == inherits)
  17.103 +				{
  17.104 +					inherited_perms[common_perms[combined]] = separate[2];
  17.105 +				}
  17.106 +			}
  17.107 +
  17.108 +                        j = 1;
  17.109 +                        for (i in inherited_perms) {
  17.110 +                            ind[j] = i + 0;
  17.111 +                            j++;
  17.112 +                        }
  17.113 +                        n = asort(ind);
  17.114 +			for (i = 1; i <= n; i++) {
  17.115 +				perm = inherited_perms[ind[i]];
  17.116 +				printf("#define %s__%s", toupper(tclass), toupper(perm)) > outfile; 
  17.117 +				spaces = 40 - (length(perm) + length(tclass));
  17.118 +				if (spaces < 1)
  17.119 +				      spaces = 1;
  17.120 +				for (j = 0; j < spaces; j++) 
  17.121 +					printf(" ") > outfile; 
  17.122 +				printf("0x%08xUL\n", ind[i]) > outfile; 
  17.123 +			}
  17.124 +			printf("\n") > outfile;
  17.125 +                        for (i in ind) delete ind[i];
  17.126 +                        for (i in inherited_perms) delete inherited_perms[i];
  17.127 +
  17.128 +			printf("   S_(SECCLASS_%s, %s, 0x%08xUL)\n", toupper(tclass), inherits, permission) > inheritfile; 
  17.129 +
  17.130 +			nextstate = "CLASS_OR_CLASS-OPENBRACKET";
  17.131 +			next;
  17.132 +		}
  17.133 +$1 == "{"	{ 
  17.134 +			if (nextstate != "INHERITS_OR_CLASS-OPENBRACKET" &&
  17.135 +			    nextstate != "CLASS_OR_CLASS-OPENBRACKET" &&
  17.136 +			    nextstate != "COMMON-OPENBRACKET")
  17.137 +			{
  17.138 +				printf("Parse error:  Unexpected { on line %d\n", NR);
  17.139 +				next;
  17.140 +			}
  17.141 +
  17.142 +			if (nextstate == "INHERITS_OR_CLASS-OPENBRACKET")
  17.143 +				nextstate = "CLASS-CLOSEBRACKET";
  17.144 +
  17.145 +			if (nextstate == "CLASS_OR_CLASS-OPENBRACKET")
  17.146 +				nextstate = "CLASS-CLOSEBRACKET";
  17.147 +
  17.148 +			if (nextstate == "COMMON-OPENBRACKET")
  17.149 +				nextstate = "COMMON-CLOSEBRACKET";
  17.150 +		}
  17.151 +/[a-z][a-z_]*/	{
  17.152 +			if (nextstate != "COMMON-CLOSEBRACKET" &&
  17.153 +			    nextstate != "CLASS-CLOSEBRACKET")
  17.154 +			{
  17.155 +				printf("Parse error:  Unexpected symbol %s on line %d\n", $1, NR);		
  17.156 +				next;
  17.157 +			}
  17.158 +
  17.159 +			if (nextstate == "COMMON-CLOSEBRACKET")
  17.160 +			{
  17.161 +				if ((common_name,$1) in common_perms)
  17.162 +				{
  17.163 +					printf("Duplicate permission %s for common %s on line %d.\n", $1, common_name, NR);
  17.164 +					next;
  17.165 +				}
  17.166 +
  17.167 +				common_perms[common_name,$1] = permission;
  17.168 +
  17.169 +				printf("#define COMMON_%s__%s", toupper(common_name), toupper($1)) > outfile; 
  17.170 +
  17.171 +				printf("    S_(\"%s\")\n", $1) > cpermfile;
  17.172 +			}
  17.173 +			else
  17.174 +			{
  17.175 +				if ((tclass,$1) in av_perms)
  17.176 +				{
  17.177 +					printf("Duplicate permission %s for %s on line %d.\n", $1, tclass, NR);
  17.178 +					next;
  17.179 +				}
  17.180 +
  17.181 +				av_perms[tclass,$1] = permission;
  17.182 +		
  17.183 +				if (inherits != "")
  17.184 +				{
  17.185 +					if ((inherits,$1) in common_perms)
  17.186 +					{
  17.187 +						printf("Permission %s in %s on line %d conflicts with common permission.\n", $1, tclass, inherits, NR);
  17.188 +						next;
  17.189 +					}
  17.190 +				}
  17.191 +
  17.192 +				printf("#define %s__%s", toupper(tclass), toupper($1)) > outfile; 
  17.193 +
  17.194 +				printf("   S_(SECCLASS_%s, %s__%s, \"%s\")\n", toupper(tclass), toupper(tclass), toupper($1), $1) > avpermfile; 
  17.195 +			}
  17.196 +
  17.197 +			spaces = 40 - (length($1) + length(tclass));
  17.198 +			if (spaces < 1)
  17.199 +			      spaces = 1;
  17.200 +
  17.201 +			for (i = 0; i < spaces; i++) 
  17.202 +				printf(" ") > outfile; 
  17.203 +			printf("0x%08xUL\n", permission) > outfile; 
  17.204 +			permission = permission * 2;
  17.205 +		}
  17.206 +$1 == "}"	{
  17.207 +			if (nextstate != "CLASS-CLOSEBRACKET" && 
  17.208 +			    nextstate != "COMMON-CLOSEBRACKET")
  17.209 +			{
  17.210 +				printf("Parse error:  Unexpected } on line %d\n", NR);
  17.211 +				next;
  17.212 +			}
  17.213 +
  17.214 +			if (nextstate == "COMMON-CLOSEBRACKET")
  17.215 +			{
  17.216 +				common_base[common_name] = permission;
  17.217 +				printf("TE_(common_%s_perm_to_string)\n\n", common_name) > cpermfile; 
  17.218 +			}
  17.219 +
  17.220 +			printf("\n") > outfile;
  17.221 +
  17.222 +			nextstate = "COMMON_OR_AV";
  17.223 +		}
  17.224 +END	{
  17.225 +		if (nextstate != "COMMON_OR_AV" && nextstate != "CLASS_OR_CLASS-OPENBRACKET")
  17.226 +			printf("Parse error:  Unexpected end of file\n");
  17.227 +
  17.228 +	}'
  17.229 +
  17.230 +# FLASK
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/tools/flask/policy/policy/flask/mkflask.sh	Fri Sep 12 14:47:40 2008 +0900
    18.3 @@ -0,0 +1,95 @@
    18.4 +#!/bin/sh -
    18.5 +#
    18.6 +
    18.7 +# FLASK
    18.8 +
    18.9 +set -e
   18.10 +
   18.11 +awk=$1
   18.12 +shift 1
   18.13 +
   18.14 +# output file
   18.15 +output_file="flask.h"
   18.16 +debug_file="class_to_string.h"
   18.17 +debug_file2="initial_sid_to_string.h"
   18.18 +
   18.19 +cat $* | $awk "
   18.20 +BEGIN	{
   18.21 +		outfile = \"$output_file\"
   18.22 +		debugfile = \"$debug_file\"
   18.23 +		debugfile2 = \"$debug_file2\"
   18.24 +		"'
   18.25 +		nextstate = "CLASS";
   18.26 +
   18.27 +		printf("/* This file is automatically generated.  Do not edit. */\n") > outfile;
   18.28 +
   18.29 +		printf("#ifndef _SELINUX_FLASK_H_\n") > outfile;
   18.30 +		printf("#define _SELINUX_FLASK_H_\n") > outfile;
   18.31 +		printf("\n/*\n * Security object class definitions\n */\n") > outfile;
   18.32 +		printf("/* This file is automatically generated.  Do not edit. */\n") > debugfile;
   18.33 +		printf("/*\n * Security object class definitions\n */\n") > debugfile;
   18.34 +		printf("    S_(\"null\")\n") > debugfile;
   18.35 +		printf("/* This file is automatically generated.  Do not edit. */\n") > debugfile2;
   18.36 +		printf("static char *initial_sid_to_string[] =\n{\n") > debugfile2;
   18.37 +		printf("    \"null\",\n") > debugfile2;
   18.38 +	}
   18.39 +/^[ \t]*#/	{ 
   18.40 +			next;
   18.41 +		}
   18.42 +$1 == "class"	{ 
   18.43 +			if (nextstate != "CLASS")
   18.44 +			{
   18.45 +				printf("Parse error:  Unexpected class definition on line %d\n", NR);
   18.46 +				next;	
   18.47 +			}
   18.48 +
   18.49 +			if ($2 in class_found)
   18.50 +			{
   18.51 +				printf("Duplicate class definition for %s on line %d.\n", $2, NR);
   18.52 +				next;
   18.53 +			}	
   18.54 +			class_found[$2] = 1;
   18.55 +
   18.56 +			class_value++;
   18.57 +
   18.58 +			printf("#define SECCLASS_%s", toupper($2)) > outfile;
   18.59 +			for (i = 0; i < 40 - length($2); i++) 
   18.60 +				printf(" ") > outfile; 
   18.61 +			printf("%d\n", class_value) > outfile; 
   18.62 +
   18.63 +			printf("    S_(\"%s\")\n", $2) > debugfile;
   18.64 +		}
   18.65 +$1 == "sid"	{ 
   18.66 +			if (nextstate == "CLASS")
   18.67 +			{
   18.68 +			    nextstate = "SID";
   18.69 +			    printf("\n/*\n * Security identifier indices for initial entities\n */\n") > outfile;			    
   18.70 +			}
   18.71 +
   18.72 +			if ($2 in sid_found)
   18.73 +			{
   18.74 +				printf("Duplicate SID definition for %s on line %d.\n", $2, NR);
   18.75 +				next;
   18.76 +			}	
   18.77 +			sid_found[$2] = 1;
   18.78 +			sid_value++;
   18.79 +
   18.80 +			printf("#define SECINITSID_%s", toupper($2)) > outfile;
   18.81 +			for (i = 0; i < 37 - length($2); i++) 
   18.82 +				printf(" ") > outfile; 
   18.83 +			printf("%d\n", sid_value) > outfile; 
   18.84 +			printf("    \"%s\",\n", $2) > debugfile2;
   18.85 +		}
   18.86 +END	{
   18.87 +		if (nextstate != "SID")
   18.88 +			printf("Parse error:  Unexpected end of file\n");
   18.89 +
   18.90 +		printf("\n#define SECINITSID_NUM") > outfile;
   18.91 +		for (i = 0; i < 34; i++) 
   18.92 +			printf(" ") > outfile; 
   18.93 +		printf("%d\n", sid_value) > outfile; 
   18.94 +		printf("\n#endif\n") > outfile;
   18.95 +		printf("};\n\n") > debugfile2;
   18.96 +	}'
   18.97 +
   18.98 +# FLASK
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/tools/flask/policy/policy/flask/security_classes	Fri Sep 12 14:47:40 2008 +0900
    19.3 @@ -0,0 +1,20 @@
    19.4 +# FLASK
    19.5 +
    19.6 +#
    19.7 +# Define the security object classes 
    19.8 +#
    19.9 +
   19.10 +# Classes marked as userspace are classes
   19.11 +# for userspace object managers
   19.12 +
   19.13 +class xen
   19.14 +class domain
   19.15 +class hvm
   19.16 +class mmu
   19.17 +class resource
   19.18 +class shadow
   19.19 +class event
   19.20 +class grant
   19.21 +class security
   19.22 +
   19.23 +# FLASK
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/tools/flask/policy/policy/global_booleans	Fri Sep 12 14:47:40 2008 +0900
    20.3 @@ -0,0 +1,5 @@
    20.4 +#
    20.5 +# This file is for the declaration of global booleans.
    20.6 +# To change the default value at build time, the booleans.conf
    20.7 +# file should be used.
    20.8 +#
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/tools/flask/policy/policy/global_tunables	Fri Sep 12 14:47:40 2008 +0900
    21.3 @@ -0,0 +1,6 @@
    21.4 +#
    21.5 +# This file is for the declaration of global tunables.
    21.6 +# To change the default value at build time, the booleans.conf
    21.7 +# file should be used.
    21.8 +#
    21.9 +
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/tools/flask/policy/policy/mcs	Fri Sep 12 14:47:40 2008 +0900
    22.3 @@ -0,0 +1,324 @@
    22.4 +ifdef(`enable_mcs',`
    22.5 +#
    22.6 +# Define sensitivities 
    22.7 +#
    22.8 +# Each sensitivity has a name and zero or more aliases.
    22.9 +#
   22.10 +# MCS is single-sensitivity.
   22.11 +#
   22.12 +sensitivity s0;
   22.13 +
   22.14 +#
   22.15 +# Define the ordering of the sensitivity levels (least to greatest)
   22.16 +#
   22.17 +dominance { s0 }
   22.18 +
   22.19 +
   22.20 +#
   22.21 +# Define the categories
   22.22 +#
   22.23 +# Each category has a name and zero or more aliases.
   22.24 +#
   22.25 +category c0;
   22.26 +category c1;
   22.27 +category c2;
   22.28 +category c3;
   22.29 +category c4;
   22.30 +category c5;
   22.31 +category c6;
   22.32 +category c7;
   22.33 +category c8;
   22.34 +category c9;
   22.35 +category c10;
   22.36 +category c11;
   22.37 +category c12;
   22.38 +category c13;
   22.39 +category c14;
   22.40 +category c15;
   22.41 +category c16;
   22.42 +category c17;
   22.43 +category c18;
   22.44 +category c19;
   22.45 +category c20;
   22.46 +category c21;
   22.47 +category c22;
   22.48 +category c23;
   22.49 +category c24;
   22.50 +category c25;
   22.51 +category c26;
   22.52 +category c27;
   22.53 +category c28;
   22.54 +category c29;
   22.55 +category c30;
   22.56 +category c31;
   22.57 +category c32;
   22.58 +category c33;
   22.59 +category c34;
   22.60 +category c35;
   22.61 +category c36;
   22.62 +category c37;
   22.63 +category c38;
   22.64 +category c39;
   22.65 +category c40;
   22.66 +category c41;
   22.67 +category c42;
   22.68 +category c43;
   22.69 +category c44;
   22.70 +category c45;
   22.71 +category c46;
   22.72 +category c47;
   22.73 +category c48;
   22.74 +category c49;
   22.75 +category c50;
   22.76 +category c51;
   22.77 +category c52;
   22.78 +category c53;
   22.79 +category c54;
   22.80 +category c55;
   22.81 +category c56;
   22.82 +category c57;
   22.83 +category c58;
   22.84 +category c59;
   22.85 +category c60;
   22.86 +category c61;
   22.87 +category c62;
   22.88 +category c63;
   22.89 +category c64;
   22.90 +category c65;
   22.91 +category c66;
   22.92 +category c67;
   22.93 +category c68;
   22.94 +category c69;
   22.95 +category c70;
   22.96 +category c71;
   22.97 +category c72;
   22.98 +category c73;
   22.99 +category c74;
  22.100 +category c75;
  22.101 +category c76;
  22.102 +category c77;
  22.103 +category c78;
  22.104 +category c79;
  22.105 +category c80;
  22.106 +category c81;
  22.107 +category c82;
  22.108 +category c83;
  22.109 +category c84;
  22.110 +category c85;
  22.111 +category c86;
  22.112 +category c87;
  22.113 +category c88;
  22.114 +category c89;
  22.115 +category c90;
  22.116 +category c91;
  22.117 +category c92;
  22.118 +category c93;
  22.119 +category c94;
  22.120 +category c95;
  22.121 +category c96;
  22.122 +category c97;
  22.123 +category c98;
  22.124 +category c99;
  22.125 +category c100;
  22.126 +category c101;
  22.127 +category c102;
  22.128 +category c103;
  22.129 +category c104;
  22.130 +category c105;
  22.131 +category c106;
  22.132 +category c107;
  22.133 +category c108;
  22.134 +category c109;
  22.135 +category c110;
  22.136 +category c111;
  22.137 +category c112;
  22.138 +category c113;
  22.139 +category c114;
  22.140 +category c115;
  22.141 +category c116;
  22.142 +category c117;
  22.143 +category c118;
  22.144 +category c119;
  22.145 +category c120;
  22.146 +category c121;
  22.147 +category c122;
  22.148 +category c123;
  22.149 +category c124;
  22.150 +category c125;
  22.151 +category c126;
  22.152 +category c127;
  22.153 +category c128;
  22.154 +category c129;
  22.155 +category c130;
  22.156 +category c131;
  22.157 +category c132;
  22.158 +category c133;
  22.159 +category c134;
  22.160 +category c135;
  22.161 +category c136;
  22.162 +category c137;
  22.163 +category c138;
  22.164 +category c139;
  22.165 +category c140;
  22.166 +category c141;
  22.167 +category c142;
  22.168 +category c143;
  22.169 +category c144;
  22.170 +category c145;
  22.171 +category c146;
  22.172 +category c147;
  22.173 +category c148;
  22.174 +category c149;
  22.175 +category c150;
  22.176 +category c151;
  22.177 +category c152;
  22.178 +category c153;
  22.179 +category c154;
  22.180 +category c155;
  22.181 +category c156;
  22.182 +category c157;
  22.183 +category c158;
  22.184 +category c159;
  22.185 +category c160;
  22.186 +category c161;
  22.187 +category c162;
  22.188 +category c163;
  22.189 +category c164;
  22.190 +category c165;
  22.191 +category c166;
  22.192 +category c167;
  22.193 +category c168;
  22.194 +category c169;
  22.195 +category c170;
  22.196 +category c171;
  22.197 +category c172;
  22.198 +category c173;
  22.199 +category c174;
  22.200 +category c175;
  22.201 +category c176;
  22.202 +category c177;
  22.203 +category c178;
  22.204 +category c179;
  22.205 +category c180;
  22.206 +category c181;
  22.207 +category c182;
  22.208 +category c183;
  22.209 +category c184;
  22.210 +category c185;
  22.211 +category c186;
  22.212 +category c187;
  22.213 +category c188;
  22.214 +category c189;
  22.215 +category c190;
  22.216 +category c191;
  22.217 +category c192;
  22.218 +category c193;
  22.219 +category c194;
  22.220 +category c195;
  22.221 +category c196;
  22.222 +category c197;
  22.223 +category c198;
  22.224 +category c199;
  22.225 +category c200;
  22.226 +category c201;
  22.227 +category c202;
  22.228 +category c203;
  22.229 +category c204;
  22.230 +category c205;
  22.231 +category c206;
  22.232 +category c207;
  22.233 +category c208;
  22.234 +category c209;
  22.235 +category c210;
  22.236 +category c211;
  22.237 +category c212;
  22.238 +category c213;
  22.239 +category c214;
  22.240 +category c215;
  22.241 +category c216;
  22.242 +category c217;
  22.243 +category c218;
  22.244 +category c219;
  22.245 +category c220;
  22.246 +category c221;
  22.247 +category c222;
  22.248 +category c223;
  22.249 +category c224;
  22.250 +category c225;
  22.251 +category c226;
  22.252 +category c227;
  22.253 +category c228;
  22.254 +category c229;
  22.255 +category c230;
  22.256 +category c231;
  22.257 +category c232;
  22.258 +category c233;
  22.259 +category c234;
  22.260 +category c235;
  22.261 +category c236;
  22.262 +category c237;
  22.263 +category c238;
  22.264 +category c239;
  22.265 +category c240;
  22.266 +category c241;
  22.267 +category c242;
  22.268 +category c243;
  22.269 +category c244;
  22.270 +category c245;
  22.271 +category c246;
  22.272 +category c247;
  22.273 +category c248;
  22.274 +category c249;
  22.275 +category c250;
  22.276 +category c251;
  22.277 +category c252;
  22.278 +category c253;
  22.279 +category c254;
  22.280 +category c255;
  22.281 +
  22.282 +
  22.283 +#
  22.284 +# Each MCS level specifies a sensitivity and zero or more categories which may
  22.285 +# be associated with that sensitivity.
  22.286 +#
  22.287 +level s0:c0.c255;
  22.288 +
  22.289 +#
  22.290 +# Define the MCS policy
  22.291 +#
  22.292 +# mlsconstrain class_set perm_set expression ;
  22.293 +#
  22.294 +# mlsvalidatetrans class_set expression ;
  22.295 +#
  22.296 +# expression : ( expression )
  22.297 +#	     | not expression
  22.298 +#	     | expression and expression
  22.299 +#	     | expression or expression
  22.300 +#	     | u1 op u2
  22.301 +#	     | r1 role_mls_op r2
  22.302 +#	     | t1 op t2
  22.303 +#	     | l1 role_mls_op l2
  22.304 +#	     | l1 role_mls_op h2
  22.305 +#	     | h1 role_mls_op l2
  22.306 +#	     | h1 role_mls_op h2
  22.307 +#	     | l1 role_mls_op h1
  22.308 +#	     | l2 role_mls_op h2
  22.309 +#	     | u1 op names
  22.310 +#	     | u2 op names
  22.311 +#	     | r1 op names
  22.312 +#	     | r2 op names
  22.313 +#	     | t1 op names
  22.314 +#	     | t2 op names
  22.315 +#	     | u3 op names (NOTE: this is only available for mlsvalidatetrans)
  22.316 +#	     | r3 op names (NOTE: this is only available for mlsvalidatetrans)
  22.317 +#	     | t3 op names (NOTE: this is only available for mlsvalidatetrans)
  22.318 +#
  22.319 +# op : == | !=
  22.320 +# role_mls_op : == | != | eq | dom | domby | incomp
  22.321 +#
  22.322 +# names : name | { name_list }
  22.323 +# name_list : name | name_list name
  22.324 +#
  22.325 +
  22.326 +
  22.327 +') dnl end enable_mcs
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/tools/flask/policy/policy/mls	Fri Sep 12 14:47:40 2008 +0900
    23.3 @@ -0,0 +1,354 @@
    23.4 +
    23.5 +ifdef(`enable_mls',`
    23.6 +#
    23.7 +# Define sensitivities 
    23.8 +#
    23.9 +# Each sensitivity has a name and zero or more aliases.
   23.10 +#
   23.11 +sensitivity s0;
   23.12 +sensitivity s1;
   23.13 +sensitivity s2;
   23.14 +sensitivity s3;
   23.15 +sensitivity s4;
   23.16 +sensitivity s5;
   23.17 +sensitivity s6;
   23.18 +sensitivity s7;
   23.19 +sensitivity s8;
   23.20 +sensitivity s9;
   23.21 +sensitivity s10;
   23.22 +sensitivity s11;
   23.23 +sensitivity s12;
   23.24 +sensitivity s13;
   23.25 +sensitivity s14;
   23.26 +sensitivity s15;
   23.27 +
   23.28 +#
   23.29 +# Define the ordering of the sensitivity levels (least to greatest)
   23.30 +#
   23.31 +dominance { s0 s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13 s14 s15 }
   23.32 +
   23.33 +
   23.34 +#
   23.35 +# Define the categories
   23.36 +#
   23.37 +# Each category has a name and zero or more aliases.
   23.38 +#
   23.39 +category c0;
   23.40 +category c1;
   23.41 +category c2;
   23.42 +category c3;
   23.43 +category c4;
   23.44 +category c5;
   23.45 +category c6;
   23.46 +category c7;
   23.47 +category c8;
   23.48 +category c9;
   23.49 +category c10;
   23.50 +category c11;
   23.51 +category c12;
   23.52 +category c13;
   23.53 +category c14;
   23.54 +category c15;
   23.55 +category c16;
   23.56 +category c17;
   23.57 +category c18;
   23.58 +category c19;
   23.59 +category c20;
   23.60 +category c21;
   23.61 +category c22;
   23.62 +category c23;
   23.63 +category c24;
   23.64 +category c25;
   23.65 +category c26;
   23.66 +category c27;
   23.67 +category c28;
   23.68 +category c29;
   23.69 +category c30;
   23.70 +category c31;
   23.71 +category c32;
   23.72 +category c33;
   23.73 +category c34;
   23.74 +category c35;
   23.75 +category c36;
   23.76 +category c37;
   23.77 +category c38;
   23.78 +category c39;
   23.79 +category c40;
   23.80 +category c41;
   23.81 +category c42;
   23.82 +category c43;
   23.83 +category c44;
   23.84 +category c45;
   23.85 +category c46;
   23.86 +category c47;
   23.87 +category c48;
   23.88 +category c49;
   23.89 +category c50;
   23.90 +category c51;
   23.91 +category c52;
   23.92 +category c53;
   23.93 +category c54;
   23.94 +category c55;
   23.95 +category c56;
   23.96 +category c57;
   23.97 +category c58;
   23.98 +category c59;
   23.99 +category c60;
  23.100 +category c61;
  23.101 +category c62;
  23.102 +category c63;
  23.103 +category c64;
  23.104 +category c65;
  23.105 +category c66;
  23.106 +category c67;
  23.107 +category c68;
  23.108 +category c69;
  23.109 +category c70;
  23.110 +category c71;
  23.111 +category c72;
  23.112 +category c73;
  23.113 +category c74;
  23.114 +category c75;
  23.115 +category c76;
  23.116 +category c77;
  23.117 +category c78;
  23.118 +category c79;
  23.119 +category c80;
  23.120 +category c81;
  23.121 +category c82;
  23.122 +category c83;
  23.123 +category c84;
  23.124 +category c85;
  23.125 +category c86;
  23.126 +category c87;
  23.127 +category c88;
  23.128 +category c89;
  23.129 +category c90;
  23.130 +category c91;
  23.131 +category c92;
  23.132 +category c93;
  23.133 +category c94;
  23.134 +category c95;
  23.135 +category c96;
  23.136 +category c97;
  23.137 +category c98;
  23.138 +category c99;
  23.139 +category c100;
  23.140 +category c101;
  23.141 +category c102;
  23.142 +category c103;
  23.143 +category c104;
  23.144 +category c105;
  23.145 +category c106;
  23.146 +category c107;
  23.147 +category c108;
  23.148 +category c109;
  23.149 +category c110;
  23.150 +category c111;
  23.151 +category c112;
  23.152 +category c113;
  23.153 +category c114;
  23.154 +category c115;
  23.155 +category c116;
  23.156 +category c117;
  23.157 +category c118;
  23.158 +category c119;
  23.159 +category c120;
  23.160 +category c121;
  23.161 +category c122;
  23.162 +category c123;
  23.163 +category c124;
  23.164 +category c125;
  23.165 +category c126;
  23.166 +category c127;
  23.167 +category c128;
  23.168 +category c129;
  23.169 +category c130;
  23.170 +category c131;
  23.171 +category c132;
  23.172 +category c133;
  23.173 +category c134;
  23.174 +category c135;
  23.175 +category c136;
  23.176 +category c137;
  23.177 +category c138;
  23.178 +category c139;
  23.179 +category c140;
  23.180 +category c141;
  23.181 +category c142;
  23.182 +category c143;
  23.183 +category c144;
  23.184 +category c145;
  23.185 +category c146;
  23.186 +category c147;
  23.187 +category c148;
  23.188 +category c149;
  23.189 +category c150;
  23.190 +category c151;
  23.191 +category c152;
  23.192 +category c153;
  23.193 +category c154;
  23.194 +category c155;
  23.195 +category c156;
  23.196 +category c157;
  23.197 +category c158;
  23.198 +category c159;
  23.199 +category c160;
  23.200 +category c161;
  23.201 +category c162;
  23.202 +category c163;
  23.203 +category c164;
  23.204 +category c165;
  23.205 +category c166;
  23.206 +category c167;
  23.207 +category c168;
  23.208 +category c169;
  23.209 +category c170;
  23.210 +category c171;
  23.211 +category c172;
  23.212 +category c173;
  23.213 +category c174;
  23.214 +category c175;
  23.215 +category c176;
  23.216 +category c177;
  23.217 +category c178;
  23.218 +category c179;
  23.219 +category c180;
  23.220 +category c181;
  23.221 +category c182;
  23.222 +category c183;
  23.223 +category c184;
  23.224 +category c185;
  23.225 +category c186;
  23.226 +category c187;
  23.227 +category c188;
  23.228 +category c189;
  23.229 +category c190;
  23.230 +category c191;
  23.231 +category c192;
  23.232 +category c193;
  23.233 +category c194;
  23.234 +category c195;
  23.235 +category c196;
  23.236 +category c197;
  23.237 +category c198;
  23.238 +category c199;
  23.239 +category c200;
  23.240 +category c201;
  23.241 +category c202;
  23.242 +category c203;
  23.243 +category c204;
  23.244 +category c205;
  23.245 +category c206;
  23.246 +category c207;
  23.247 +category c208;
  23.248 +category c209;
  23.249 +category c210;
  23.250 +category c211;
  23.251 +category c212;
  23.252 +category c213;
  23.253 +category c214;
  23.254 +category c215;
  23.255 +category c216;
  23.256 +category c217;
  23.257 +category c218;
  23.258 +category c219;
  23.259 +category c220;
  23.260 +category c221;
  23.261 +category c222;
  23.262 +category c223;
  23.263 +category c224;
  23.264 +category c225;
  23.265 +category c226;
  23.266 +category c227;
  23.267 +category c228;
  23.268 +category c229;
  23.269 +category c230;
  23.270 +category c231;
  23.271 +category c232;
  23.272 +category c233;
  23.273 +category c234;
  23.274 +category c235;
  23.275 +category c236;
  23.276 +category c237;
  23.277 +category c238;
  23.278 +category c239;
  23.279 +category c240;
  23.280 +category c241;
  23.281 +category c242;
  23.282 +category c243;
  23.283 +category c244;
  23.284 +category c245;
  23.285 +category c246;
  23.286 +category c247;
  23.287 +category c248;
  23.288 +category c249;
  23.289 +category c250;
  23.290 +category c251;
  23.291 +category c252;
  23.292 +category c253;
  23.293 +category c254;
  23.294 +category c255;
  23.295 +
  23.296 +
  23.297 +#
  23.298 +# Each MLS level specifies a sensitivity and zero or more categories which may
  23.299 +# be associated with that sensitivity.
  23.300 +#
  23.301 +level s0:c0.c255;
  23.302 +level s1:c0.c255;
  23.303 +level s2:c0.c255;
  23.304 +level s3:c0.c255;
  23.305 +level s4:c0.c255;
  23.306 +level s5:c0.c255;
  23.307 +level s6:c0.c255;
  23.308 +level s7:c0.c255;
  23.309 +level s8:c0.c255;
  23.310 +level s9:c0.c255;
  23.311 +level s10:c0.c255;
  23.312 +level s11:c0.c255;
  23.313 +level s12:c0.c255;
  23.314 +level s13:c0.c255;
  23.315 +level s14:c0.c255;
  23.316 +level s15:c0.c255;
  23.317 +
  23.318 +
  23.319 +#
  23.320 +# Define the MLS policy
  23.321 +#
  23.322 +# mlsconstrain class_set perm_set expression ;
  23.323 +#
  23.324 +# mlsvalidatetrans class_set expression ;
  23.325 +#
  23.326 +# expression : ( expression )
  23.327 +#	     | not expression
  23.328 +#	     | expression and expression
  23.329 +#	     | expression or expression
  23.330 +#	     | u1 op u2
  23.331 +#	     | r1 role_mls_op r2
  23.332 +#	     | t1 op t2
  23.333 +#	     | l1 role_mls_op l2
  23.334 +#	     | l1 role_mls_op h2
  23.335 +#	     | h1 role_mls_op l2
  23.336 +#	     | h1 role_mls_op h2
  23.337 +#	     | l1 role_mls_op h1
  23.338 +#	     | l2 role_mls_op h2
  23.339 +#	     | u1 op names
  23.340 +#	     | u2 op names
  23.341 +#	     | r1 op names
  23.342 +#	     | r2 op names
  23.343 +#	     | t1 op names
  23.344 +#	     | t2 op names
  23.345 +#	     | u3 op names (NOTE: this is only available for mlsvalidatetrans)
  23.346 +#	     | r3 op names (NOTE: this is only available for mlsvalidatetrans)
  23.347 +#	     | t3 op names (NOTE: this is only available for mlsvalidatetrans)
  23.348 +#
  23.349 +# op : == | !=
  23.350 +# role_mls_op : == | != | eq | dom | domby | incomp
  23.351 +#
  23.352 +# names : name | { name_list }
  23.353 +# name_list : name | name_list name
  23.354 +#
  23.355 +
  23.356 +
  23.357 +') dnl end enable_mls
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/tools/flask/policy/policy/modules.conf	Fri Sep 12 14:47:40 2008 +0900
    24.3 @@ -0,0 +1,21 @@
    24.4 +#
    24.5 +# This file contains a listing of available modules.
    24.6 +# To prevent a module from  being used in policy
    24.7 +# creation, set the module name to "off".
    24.8 +#
    24.9 +# For monolithic policies, modules set to "base" and "module"
   24.10 +# will be built into the policy.
   24.11 +#
   24.12 +# For modular policies, modules set to "base" will be
   24.13 +# included in the base module.  "module" will be compiled
   24.14 +# as individual loadable modules.
   24.15 +#
   24.16 +
   24.17 +# Layer: xen
   24.18 +# Module: xen
   24.19 +# Required in base
   24.20 +#
   24.21 +# Policy for xen.
   24.22 +# 
   24.23 +xen = base
   24.24 +
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/tools/flask/policy/policy/modules/xen/xen.if	Fri Sep 12 14:47:40 2008 +0900
    25.3 @@ -0,0 +1,1 @@
    25.4 +#
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/tools/flask/policy/policy/modules/xen/xen.te	Fri Sep 12 14:47:40 2008 +0900
    26.3 @@ -0,0 +1,135 @@
    26.4 +attribute xen_type;
    26.5 +attribute domain_type;
    26.6 +attribute resource_type;
    26.7 +attribute event_type;
    26.8 +
    26.9 +type xen_t, xen_type, domain_type;
   26.10 +
   26.11 +type dom0_t, domain_type;
   26.12 +
   26.13 +type domio_t, domain_type;
   26.14 +
   26.15 +type domxen_t, domain_type;
   26.16 +
   26.17 +type unlabeled_t, domain_type;
   26.18 +
   26.19 +type security_t, domain_type;
   26.20 +
   26.21 +type pirq_t, resource_type;
   26.22 +type ioport_t, resource_type;
   26.23 +type iomem_t, resource_type;
   26.24 +type device_t, resource_type;
   26.25 +
   26.26 +################################################################################
   26.27 +#
   26.28 +# create_domain(priv_dom, domain, channel)
   26.29 +#
   26.30 +################################################################################
   26.31 +define(`create_domain', `
   26.32 +	type $2, domain_type;
   26.33 +	allow $1 $2:domain {create max_vcpus setdomainmaxmem 
   26.34 +				setaddrsize getdomaininfo hypercall 
   26.35 +				setvcpucontext scheduler unpause 
   26.36 +				getvcpuinfo getaddrsize getvcpuaffinity};
   26.37 +	allow $1 $2:shadow {enable};
   26.38 +	allow $1 $2:mmu {map_read map_write memorymap adjust pinpage};
   26.39 +	allow $2 $2:mmu {map_read map_write pinpage};
   26.40 +	allow $2 domio_t:mmu {map_read};
   26.41 +	allow $2 $2:grant {query setup};
   26.42 +	allow $1 $2:grant {map_read unmap};
   26.43 +	allow $1 $3:event {create};
   26.44 +')
   26.45 +
   26.46 +################################################################################
   26.47 +#
   26.48 +# manage_domain(priv_dom, domain)
   26.49 +#
   26.50 +################################################################################
   26.51 +define(`manage_domain', `
   26.52 +	allow $1 $2:domain {pause destroy};
   26.53 +')
   26.54 +
   26.55 +################################################################################
   26.56 +#
   26.57 +# create_channel(caller, peer, channel)
   26.58 +#
   26.59 +################################################################################
   26.60 +define(`create_channel', `
   26.61 +	type $3, event_type;
   26.62 +	type_transition $1 $2:event $3;
   26.63 +	allow $1 $3:event {create};
   26.64 +	allow $3 $2:event {bind};
   26.65 +')
   26.66 +
   26.67 +################################################################################
   26.68 +#
   26.69 +# Boot the hypervisor and dom0
   26.70 +#
   26.71 +################################################################################
   26.72 +allow dom0_t xen_t:xen {kexec readapic writeapic mtrr_read mtrr_add mtrr_del 
   26.73 +scheduler physinfo heap quirk readconsole writeconsole settime microcode};
   26.74 +
   26.75 +allow dom0_t domio_t:mmu {map_read map_write};
   26.76 +allow dom0_t iomem_t:mmu {map_read map_write};
   26.77 +allow dom0_t pirq_t:event {vector};
   26.78 +allow dom0_t xen_t:mmu {memorymap};
   26.79 +
   26.80 +allow dom0_t dom0_t:mmu {pinpage map_read map_write adjust};
   26.81 +allow dom0_t dom0_t:grant {query setup};
   26.82 +allow dom0_t dom0_t:domain {scheduler getdomaininfo getvcpuinfo getvcpuaffinity};
   26.83 +
   26.84 +allow xen_t dom0_t:domain {create};
   26.85 +allow xen_t dom0_t:resource {add remove};
   26.86 +allow xen_t ioport_t:resource {add_ioport remove_ioport};
   26.87 +allow dom0_t ioport_t:resource {use};
   26.88 +allow xen_t iomem_t:resource {add_iomem remove_iomem};
   26.89 +allow dom0_t iomem_t:resource {use};
   26.90 +allow xen_t pirq_t:resource {add_irq remove_irq};
   26.91 +allow dom0_t pirq_t:resource {use};
   26.92 +
   26.93 +allow dom0_t security_t:security {compute_av compute_create compute_member 
   26.94 +check_context load_policy compute_relabel compute_user setenforce setbool
   26.95 +setsecparam};
   26.96 +
   26.97 +create_channel(dom0_t, dom0_t, evchn0-0_t)
   26.98 +allow dom0_t evchn0-0_t:event {send};
   26.99 +
  26.100 +################################################################################
  26.101 +#
  26.102 +# Create and manage a domU w/ dom0 IO
  26.103 +#
  26.104 +################################################################################
  26.105 +create_domain(dom0_t, domU_t, evchnU-0_t)
  26.106 +
  26.107 +create_channel(domU_t, domU_t, evchnU-U_t)
  26.108 +allow domU_t evchnU-U_t:event {send};
  26.109 +
  26.110 +create_channel(dom0_t, domU_t, evchn0-U_t)
  26.111 +allow dom0_t evchn0-U_t:event {send};
  26.112 +
  26.113 +create_channel(domU_t, dom0_t, evchnU-0_t)
  26.114 +allow domU_t evchnU-0_t:event {send};
  26.115 +
  26.116 +manage_domain(dom0_t, domU_t)
  26.117 +
  26.118 +################################################################################
  26.119 +#
  26.120 +#
  26.121 +#
  26.122 +################################################################################
  26.123 +sid xen gen_context(system_u:system_r:xen_t,s0)
  26.124 +sid dom0 gen_context(system_u:system_r:dom0_t,s0)
  26.125 +sid domU gen_context(system_u:system_r:domU_t,s0)
  26.126 +sid domxen gen_context(system_u:system_r:domxen_t,s0)
  26.127 +sid domio gen_context(system_u:system_r:domio_t,s0)
  26.128 +sid unlabeled gen_context(system_u:system_r:unlabeled_t,s0)
  26.129 +sid security gen_context(system_u:system_r:security_t,s0)
  26.130 +sid pirq gen_context(system_u:object_r:pirq_t,s0)
  26.131 +sid iomem gen_context(system_u:object_r:iomem_t,s0)
  26.132 +sid ioport gen_context(system_u:object_r:ioport_t,s0)
  26.133 +sid device gen_context(system_u:object_r:device_t,s0)
  26.134 +
  26.135 +role system_r types { xen_type domain_type };
  26.136 +role user_r types { xen_type domain_type };
  26.137 +role sysadm_r types { xen_type domain_type };
  26.138 +role staff_r types { xen_type domain_type };
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/tools/flask/policy/policy/support/loadable_module.spt	Fri Sep 12 14:47:40 2008 +0900
    27.3 @@ -0,0 +1,166 @@
    27.4 +########################################
    27.5 +#
    27.6 +# Macros for switching between source policy
    27.7 +# and loadable policy module support
    27.8 +#
    27.9 +
   27.10 +##############################
   27.11 +#
   27.12 +# For adding the module statement
   27.13 +#
   27.14 +define(`policy_module',`
   27.15 +	ifdef(`self_contained_policy',`',`
   27.16 +		module $1 $2;
   27.17 +
   27.18 +		require {
   27.19 +			role system_r;
   27.20 +			all_kernel_class_perms
   27.21 +		}
   27.22 +	')
   27.23 +')
   27.24 +
   27.25 +##############################
   27.26 +#
   27.27 +# For use in interfaces, to optionally insert a require block
   27.28 +#
   27.29 +define(`gen_require',`
   27.30 +	ifdef(`self_contained_policy',`',`
   27.31 +		define(`in_gen_require_block')
   27.32 +		require {
   27.33 +			$1
   27.34 +		}
   27.35 +		undefine(`in_gen_require_block')
   27.36 +	')
   27.37 +')
   27.38 +
   27.39 +##############################
   27.40 +#
   27.41 +# In the future interfaces should be in loadable modules
   27.42 +#
   27.43 +# template(name,rules)
   27.44 +#
   27.45 +define(`template',`
   27.46 +	`define(`$1',`
   27.47 +##### begin $1(dollarsstar)
   27.48 +		$2
   27.49 +##### end $1(dollarsstar)
   27.50 +	'')
   27.51 +')
   27.52 +
   27.53 +# helper function, since m4 wont expand macros
   27.54 +# if a line is a comment (#):
   27.55 +define(`policy_m4_comment',`dnl
   27.56 +##### $2 depth: $1
   27.57 +')dnl
   27.58 +
   27.59 +##############################
   27.60 +#
   27.61 +# In the future interfaces should be in loadable modules
   27.62 +#
   27.63 +# interface(name,rules)
   27.64 +#
   27.65 +define(`interface',`
   27.66 +	`define(`$1',`
   27.67 +
   27.68 +	define(`policy_temp',incr(policy_call_depth))
   27.69 +	pushdef(`policy_call_depth',policy_temp)
   27.70 +	undefine(`policy_temp')
   27.71 +
   27.72 +	policy_m4_comment(policy_call_depth,begin `$1'(dollarsstar))
   27.73 +
   27.74 +	$2
   27.75 +
   27.76 +	define(`policy_temp',decr(policy_call_depth))
   27.77 +	pushdef(`policy_call_depth',policy_temp)
   27.78 +	undefine(`policy_temp')
   27.79 +
   27.80 +	policy_m4_comment(policy_call_depth,end `$1'(dollarsstar))
   27.81 +
   27.82 +	'')
   27.83 +')
   27.84 +
   27.85 +define(`policy_call_depth',0)
   27.86 +
   27.87 +##############################
   27.88 +#
   27.89 +# Optional policy handling
   27.90 +#
   27.91 +define(`optional_policy',`
   27.92 +	ifdef(`self_contained_policy',`
   27.93 +		ifdef(`$1',`$2',`$3')
   27.94 +	',`
   27.95 +		optional {
   27.96 +			$2
   27.97 +		ifelse(`$3',`',`',`
   27.98 +		} else {
   27.99 +			$3
  27.100 +		')
  27.101 +		}
  27.102 +	')
  27.103 +')
  27.104 +
  27.105 +##############################
  27.106 +#
  27.107 +# Determine if we should use the default
  27.108 +# tunable value as specified by the policy
  27.109 +# or if the override value should be used
  27.110 +#
  27.111 +define(`dflt_or_overr',`ifdef(`$1',$1,$2)')
  27.112 +
  27.113 +##############################
  27.114 +#
  27.115 +# Extract booleans out of an expression.
  27.116 +# This needs to be reworked so expressions
  27.117 +# with parentheses can work.
  27.118 +
  27.119 +define(`delcare_required_symbols',`
  27.120 +ifelse(regexp($1, `\w'), -1, `', `dnl
  27.121 +bool regexp($1, `\(\w+\)', `\1');
  27.122 +delcare_required_symbols(regexp($1, `\w+\(.*\)', `\1'))dnl
  27.123 +') dnl
  27.124 +')
  27.125 +
  27.126 +##############################
  27.127 +#
  27.128 +# Tunable declaration
  27.129 +#
  27.130 +define(`gen_tunable',`
  27.131 +	ifdef(`self_contained_policy',`
  27.132 +		bool $1 dflt_or_overr(`$1'_conf,$2);
  27.133 +	',`
  27.134 +		# loadable module tunable
  27.135 +		# declaration will go here
  27.136 +		# instead of bool when
  27.137 +		# loadable modules support
  27.138 +		# tunables
  27.139 +		bool $1 dflt_or_overr(`$1'_conf,$2);
  27.140 +	')
  27.141 +')
  27.142 +
  27.143 +##############################
  27.144 +#
  27.145 +# Tunable policy handling
  27.146 +#
  27.147 +define(`tunable_policy',`
  27.148 +	ifdef(`self_contained_policy',`
  27.149 +		if (`$1') {
  27.150 +			$2
  27.151 +		} else {
  27.152 +			$3
  27.153 +		}
  27.154 +	',`
  27.155 +		# structure for tunables
  27.156 +		# will go here instead of a
  27.157 +		# conditional when loadable
  27.158 +		# modules support tunables
  27.159 +		gen_require(`
  27.160 +			delcare_required_symbols(`$1')
  27.161 +		')
  27.162 +
  27.163 +		if (`$1') {
  27.164 +			$2
  27.165 +		} else {
  27.166 +			$3
  27.167 +		}
  27.168 +	')
  27.169 +')
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/tools/flask/policy/policy/support/misc_macros.spt	Fri Sep 12 14:47:40 2008 +0900
    28.3 @@ -0,0 +1,32 @@
    28.4 +
    28.5 +########################################
    28.6 +#
    28.7 +# Helper macros
    28.8 +#
    28.9 +
   28.10 +#
   28.11 +# shiftn(num,list...)
   28.12 +#
   28.13 +# shift the list num times
   28.14 +#
   28.15 +define(`shiftn',`ifelse($1,0,`shift($*)',`shiftn(decr($1),shift(shift($*)))')')
   28.16 +
   28.17 +########################################
   28.18 +#
   28.19 +# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_categories])
   28.20 +#
   28.21 +define(`gen_user',`user $1 roles { $2 }`'ifdef(`enable_mls', ` level $3 range $4')`'ifdef(`enable_mcs',` level s0 range s0`'ifelse(`$5',,,` - s0:$5')');')
   28.22 +
   28.23 +########################################
   28.24 +#
   28.25 +# gen_context(context,mls_sensitivity,[mcs_categories])
   28.26 +#
   28.27 +define(`gen_context',`$1`'ifdef(`enable_mls',`:$2')`'ifdef(`enable_mcs',`:s0`'ifelse(`$3',,,`:$3')')') dnl
   28.28 +
   28.29 +########################################
   28.30 +#
   28.31 +# gen_bool(name,default_value)
   28.32 +#
   28.33 +define(`gen_bool',`
   28.34 +	bool $1 dflt_or_overr(`$1'_conf,$2);
   28.35 +')
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/tools/flask/policy/policy/systemuser	Fri Sep 12 14:47:40 2008 +0900
    29.3 @@ -0,0 +1,19 @@
    29.4 +##################################
    29.5 +#
    29.6 +# System User configuration.
    29.7 +#
    29.8 +
    29.9 +#
   29.10 +# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_categories])
   29.11 +#
   29.12 +
   29.13 +#
   29.14 +# system_u is the user identity for system processes and objects.
   29.15 +# There should be no corresponding Unix user identity for system,
   29.16 +# and a user process should never be assigned the system user
   29.17 +# identity.
   29.18 +#
   29.19 +gen_user(system_u, system_r, s0, s0 - s9:c0.c127, c0.c127)
   29.20 +
   29.21 +# Normal users should not be added to this file,
   29.22 +# but instead added to the users file.
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/tools/flask/policy/policy/users	Fri Sep 12 14:47:40 2008 +0900
    30.3 @@ -0,0 +1,39 @@
    30.4 +
    30.5 +##################################
    30.6 +#
    30.7 +# Core User configuration.
    30.8 +#
    30.9 +
   30.10 +#
   30.11 +# gen_user(username, role_set, mls_defaultlevel, mls_range, [mcs_catetories])
   30.12 +#
   30.13 +
   30.14 +#
   30.15 +# user_u is a generic user identity for Linux users who have no
   30.16 +# SELinux user identity defined.  The modified daemons will use
   30.17 +# this user identity in the security context if there is no matching
   30.18 +# SELinux user identity for a Linux user.  If you do not want to
   30.19 +# permit any access to such users, then remove this entry.
   30.20 +#
   30.21 +ifdef(`targeted_policy',`
   30.22 +gen_user(user_u, user_r sysadm_r system_r, s0, s0 - s9:c0.c127)
   30.23 +',`
   30.24 +gen_user(user_u, user_r, s0, s0 - s9:c0.c127)
   30.25 +')
   30.26 +
   30.27 +#
   30.28 +# The following users correspond to Unix identities.
   30.29 +# These identities are typically assigned as the user attribute
   30.30 +# when login starts the user shell.  Users with access to the sysadm_r
   30.31 +# role should use the staff_r role instead of the user_r role when
   30.32 +# not in the sysadm_r.
   30.33 +#
   30.34 +ifdef(`targeted_policy',`
   30.35 +	gen_user(root, user_r sysadm_r system_r, s0, s0 - s9:c0.c127, c0.c127)
   30.36 +',`
   30.37 +	ifdef(`direct_sysadm_daemon',`
   30.38 +		gen_user(root, sysadm_r staff_r system_r, s0, s0 - s9:c0.c127, c0.c127)
   30.39 +	',`
   30.40 +		gen_user(root, sysadm_r staff_r, s0, s0 - s9:c0.c127, c0.c127)
   30.41 +	')
   30.42 +')
    31.1 --- a/tools/ioemu/hw/cirrus_vga.c	Fri Sep 12 14:32:45 2008 +0900
    31.2 +++ b/tools/ioemu/hw/cirrus_vga.c	Fri Sep 12 14:47:40 2008 +0900
    31.3 @@ -2554,6 +2554,9 @@ static void set_vram_mapping(CirrusVGASt
    31.4  
    31.5      fprintf(logfile,"mapping vram to %lx - %lx\n", begin, end);
    31.6  
    31.7 +    if (!s->vram_mfns)
    31.8 +        return;
    31.9 +
   31.10      xatp.domid = domid;
   31.11      xatp.space = XENMAPSPACE_mfn;
   31.12  
    32.1 --- a/tools/ioemu/hw/pass-through.c	Fri Sep 12 14:32:45 2008 +0900
    32.2 +++ b/tools/ioemu/hw/pass-through.c	Fri Sep 12 14:47:40 2008 +0900
    32.3 @@ -57,6 +57,10 @@ static uint32_t pt_irqpin_reg_init(struc
    32.4      struct pt_reg_info_tbl *reg, uint32_t real_offset);
    32.5  static uint32_t pt_bar_reg_init(struct pt_dev *ptdev,
    32.6      struct pt_reg_info_tbl *reg, uint32_t real_offset);
    32.7 +static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
    32.8 +    struct pt_reg_info_tbl *reg, uint32_t real_offset);
    32.9 +static uint32_t pt_devctrl2_reg_init(struct pt_dev *ptdev,
   32.10 +    struct pt_reg_info_tbl *reg, uint32_t real_offset);
   32.11  static uint32_t pt_linkctrl2_reg_init(struct pt_dev *ptdev,
   32.12      struct pt_reg_info_tbl *reg, uint32_t real_offset);
   32.13  static uint32_t pt_msgctrl_reg_init(struct pt_dev *ptdev,
   32.14 @@ -77,6 +81,8 @@ static uint8_t pt_msix_size_init(struct 
   32.15      struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
   32.16  static uint8_t pt_vendor_size_init(struct pt_dev *ptdev,
   32.17      struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
   32.18 +static uint8_t pt_pcie_size_init(struct pt_dev *ptdev,
   32.19 +    struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset);
   32.20  static int pt_byte_reg_read(struct pt_dev *ptdev,
   32.21      struct pt_reg_tbl *cfg_entry,
   32.22      uint8_t *valueu, uint8_t valid_mask);
   32.23 @@ -438,7 +444,7 @@ static struct pt_reg_info_tbl pt_emu_reg
   32.24          .init_val   = 0x0000,
   32.25          .ro_mask    = 0x0000,
   32.26          .emu_mask   = 0xFFFF,
   32.27 -        .init       = pt_common_reg_init,
   32.28 +        .init       = pt_linkctrl_reg_init,
   32.29          .u.w.read   = pt_word_reg_read,
   32.30          .u.w.write  = pt_linkctrl_reg_write,
   32.31      },
   32.32 @@ -449,7 +455,7 @@ static struct pt_reg_info_tbl pt_emu_reg
   32.33          .init_val   = 0x0000,
   32.34          .ro_mask    = 0x0000,
   32.35          .emu_mask   = 0xFFFF,
   32.36 -        .init       = pt_common_reg_init,
   32.37 +        .init       = pt_devctrl2_reg_init,
   32.38          .u.w.read   = pt_word_reg_read,
   32.39          .u.w.write  = pt_devctrl2_reg_write,
   32.40      },
   32.41 @@ -666,8 +672,8 @@ static const struct pt_reg_grp_info_tbl 
   32.42      {
   32.43          .grp_id     = PCI_CAP_ID_EXP,
   32.44          .grp_type   = GRP_TYPE_EMU,
   32.45 -        .grp_size   = 0x3C,
   32.46 -        .size_init  = pt_reg_grp_size_init,
   32.47 +        .grp_size   = 0xFF,
   32.48 +        .size_init  = pt_pcie_size_init,
   32.49          .emu_reg_tbl= pt_emu_reg_pcie_tbl,
   32.50      },
   32.51      /* MSI-X Capability Structure reg group */
   32.52 @@ -1869,12 +1875,57 @@ static uint32_t pt_bar_reg_init(struct p
   32.53      return reg_field;
   32.54  }
   32.55  
   32.56 +/* initialize Link Control register */
   32.57 +static uint32_t pt_linkctrl_reg_init(struct pt_dev *ptdev,
   32.58 +        struct pt_reg_info_tbl *reg, uint32_t real_offset)
   32.59 +{
   32.60 +    uint8_t cap_ver = 0;
   32.61 +    uint8_t dev_type = 0;
   32.62 +
   32.63 +    cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
   32.64 +        (uint8_t)PCI_EXP_FLAGS_VERS);
   32.65 +    dev_type = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
   32.66 +        (uint8_t)PCI_EXP_FLAGS_TYPE) >> 4;
   32.67 +    
   32.68 +    /* no need to initialize in case of Root Complex Integrated Endpoint
   32.69 +     * with cap_ver 1.x 
   32.70 +     */
   32.71 +    if ((dev_type == PCI_EXP_TYPE_ROOT_INT_EP) && (cap_ver == 1))
   32.72 +        return PT_INVALID_REG;
   32.73 +
   32.74 +    return reg->init_val;
   32.75 +}
   32.76 +
   32.77 +/* initialize Device Control 2 register */
   32.78 +static uint32_t pt_devctrl2_reg_init(struct pt_dev *ptdev,
   32.79 +        struct pt_reg_info_tbl *reg, uint32_t real_offset)
   32.80 +{
   32.81 +    uint8_t cap_ver = 0;
   32.82 +
   32.83 +    cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
   32.84 +        (uint8_t)PCI_EXP_FLAGS_VERS);
   32.85 +    
   32.86 +    /* no need to initialize in case of cap_ver 1.x */
   32.87 +    if (cap_ver == 1)
   32.88 +        return PT_INVALID_REG;
   32.89 +
   32.90 +    return reg->init_val;
   32.91 +}
   32.92 +
   32.93  /* initialize Link Control 2 register */
   32.94  static uint32_t pt_linkctrl2_reg_init(struct pt_dev *ptdev,
   32.95          struct pt_reg_info_tbl *reg, uint32_t real_offset)
   32.96  {
   32.97      int reg_field = 0;
   32.98 -
   32.99 +    uint8_t cap_ver = 0;
  32.100 +
  32.101 +    cap_ver = (ptdev->dev.config[(real_offset - reg->offset) + PCI_EXP_FLAGS] &
  32.102 +        (uint8_t)PCI_EXP_FLAGS_VERS);
  32.103 +    
  32.104 +    /* no need to initialize in case of cap_ver 1.x */
  32.105 +    if (cap_ver == 1)
  32.106 +        return PT_INVALID_REG;
  32.107 +    
  32.108      /* set Supported Link Speed */
  32.109      reg_field |= 
  32.110          (0x0F & 
  32.111 @@ -2036,6 +2087,91 @@ static uint8_t pt_vendor_size_init(struc
  32.112      return ptdev->dev.config[base_offset + 0x02];
  32.113  }
  32.114  
  32.115 +/* get PCI Express Capability Structure register group size */
  32.116 +static uint8_t pt_pcie_size_init(struct pt_dev *ptdev,
  32.117 +        struct pt_reg_grp_info_tbl *grp_reg, uint32_t base_offset)
  32.118 +{
  32.119 +    PCIDevice *d = &ptdev->dev;
  32.120 +    uint16_t exp_flag = 0;
  32.121 +    uint16_t type = 0;
  32.122 +    uint16_t vers = 0;
  32.123 +    uint8_t pcie_size = 0;
  32.124 +
  32.125 +    exp_flag = *((uint16_t*)(d->config + (base_offset + PCI_EXP_FLAGS)));
  32.126 +    type = (exp_flag & PCI_EXP_FLAGS_TYPE) >> 4;
  32.127 +    vers = (exp_flag & PCI_EXP_FLAGS_VERS);
  32.128 +
  32.129 +    /* calculate size depend on capability version and device/port type */
  32.130 +    /* in case of PCI Express Base Specification Rev 1.x */
  32.131 +    if (vers == 1)
  32.132 +    {
  32.133 +        /* The PCI Express Capabilities, Device Capabilities, and Device 
  32.134 +         * Status/Control registers are required for all PCI Express devices. 
  32.135 +         * The Link Capabilities and Link Status/Control are required for all 
  32.136 +         * Endpoints that are not Root Complex Integrated Endpoints. Endpoints 
  32.137 +         * are not required to implement registers other than those listed 
  32.138 +         * above and terminate the capability structure.
  32.139 +         */
  32.140 +        switch (type) {
  32.141 +        case PCI_EXP_TYPE_ENDPOINT:
  32.142 +        case PCI_EXP_TYPE_LEG_END:
  32.143 +            pcie_size = 0x14;
  32.144 +            break;
  32.145 +        case PCI_EXP_TYPE_ROOT_INT_EP:
  32.146 +            /* has no link */
  32.147 +            pcie_size = 0x0C;
  32.148 +            break;
  32.149 +        /* only EndPoint passthrough is supported */
  32.150 +        case PCI_EXP_TYPE_ROOT_PORT:
  32.151 +        case PCI_EXP_TYPE_UPSTREAM:
  32.152 +        case PCI_EXP_TYPE_DOWNSTREAM:
  32.153 +        case PCI_EXP_TYPE_PCI_BRIDGE:
  32.154 +        case PCI_EXP_TYPE_PCIE_BRIDGE:
  32.155 +        case PCI_EXP_TYPE_ROOT_EC:
  32.156 +        default:
  32.157 +            /* exit I/O emulator */
  32.158 +            PT_LOG("Internal error: Unsupported device/port type[%d]. "
  32.159 +                "I/O emulator exit.\n", type);
  32.160 +            exit(1);
  32.161 +        }
  32.162 +    }
  32.163 +    /* in case of PCI Express Base Specification Rev 2.0 */
  32.164 +    else if (vers == 2)
  32.165 +    {
  32.166 +        switch (type) {
  32.167 +        case PCI_EXP_TYPE_ENDPOINT:
  32.168 +        case PCI_EXP_TYPE_LEG_END:
  32.169 +        case PCI_EXP_TYPE_ROOT_INT_EP:
  32.170 +            /* For Functions that do not implement the registers, 
  32.171 +             * these spaces must be hardwired to 0b.
  32.172 +             */
  32.173 +            pcie_size = 0x3C;
  32.174 +            break;
  32.175 +        /* only EndPoint passthrough is supported */
  32.176 +        case PCI_EXP_TYPE_ROOT_PORT:
  32.177 +        case PCI_EXP_TYPE_UPSTREAM:
  32.178 +        case PCI_EXP_TYPE_DOWNSTREAM:
  32.179 +        case PCI_EXP_TYPE_PCI_BRIDGE:
  32.180 +        case PCI_EXP_TYPE_PCIE_BRIDGE:
  32.181 +        case PCI_EXP_TYPE_ROOT_EC:
  32.182 +        default:
  32.183 +            /* exit I/O emulator */
  32.184 +            PT_LOG("Internal error: Unsupported device/port type[%d]. "
  32.185 +                "I/O emulator exit.\n", type);
  32.186 +            exit(1);
  32.187 +        }
  32.188 +    }
  32.189 +    else
  32.190 +    {
  32.191 +        /* exit I/O emulator */
  32.192 +        PT_LOG("Internal error: Unsupported capability version[%d]. "
  32.193 +            "I/O emulator exit.\n", vers);
  32.194 +        exit(1);
  32.195 +    }
  32.196 +
  32.197 +    return pcie_size;
  32.198 +}
  32.199 +
  32.200  /* read byte size emulate register */
  32.201  static int pt_byte_reg_read(struct pt_dev *ptdev,
  32.202          struct pt_reg_tbl *cfg_entry,
    33.1 --- a/tools/ioemu/hw/pass-through.h	Fri Sep 12 14:32:45 2008 +0900
    33.2 +++ b/tools/ioemu/hw/pass-through.h	Fri Sep 12 14:47:40 2008 +0900
    33.3 @@ -62,6 +62,21 @@
    33.4  #define PCI_MSI_FLAGS_MASK_BIT  0x0100
    33.5  #endif
    33.6  
    33.7 +#ifndef PCI_EXP_TYPE_PCIE_BRIDGE
    33.8 +/* PCI/PCI-X to PCIE Bridge */
    33.9 +#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8
   33.10 +#endif
   33.11 +
   33.12 +#ifndef PCI_EXP_TYPE_ROOT_INT_EP
   33.13 +/* Root Complex Integrated Endpoint */
   33.14 +#define PCI_EXP_TYPE_ROOT_INT_EP 0x9
   33.15 +#endif
   33.16 +
   33.17 +#ifndef PCI_EXP_TYPE_ROOT_EC
   33.18 +/* Root Complex Event Collector */
   33.19 +#define PCI_EXP_TYPE_ROOT_EC     0xa
   33.20 +#endif
   33.21 +
   33.22  #define PT_INVALID_REG          0xFFFFFFFF      /* invalid register value */
   33.23  #define PT_BAR_ALLF             0xFFFFFFFF      /* BAR ALLF value */
   33.24  #define PT_BAR_MEM_RO_MASK      0x0000000F      /* BAR ReadOnly mask(Memory) */
    34.1 --- a/tools/ioemu/hw/pci.c	Fri Sep 12 14:32:45 2008 +0900
    34.2 +++ b/tools/ioemu/hw/pci.c	Fri Sep 12 14:47:40 2008 +0900
    34.3 @@ -45,7 +45,6 @@ struct PCIBus {
    34.4  static void pci_update_mappings(PCIDevice *d);
    34.5  
    34.6  target_phys_addr_t pci_mem_base;
    34.7 -static int pci_irq_index;
    34.8  static PCIBus *first_bus;
    34.9  
   34.10  PCIBus *pci_register_bus(pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
   34.11 @@ -114,9 +113,6 @@ PCIDevice *pci_register_device(PCIBus *b
   34.12  {
   34.13      PCIDevice *pci_dev;
   34.14  
   34.15 -    if (pci_irq_index >= PCI_DEVICES_MAX)
   34.16 -        return NULL;
   34.17 -    
   34.18      if (devfn < 0) {
   34.19          for(devfn = bus->devfn_min ; devfn < 256; devfn += 8) {
   34.20              if ( !bus->devices[devfn] &&
   34.21 @@ -140,7 +136,6 @@ PCIDevice *pci_register_device(PCIBus *b
   34.22          config_write = pci_default_write_config;
   34.23      pci_dev->config_read = config_read;
   34.24      pci_dev->config_write = config_write;
   34.25 -    pci_dev->irq_index = pci_irq_index++;
   34.26      bus->devices[devfn] = pci_dev;
   34.27      return pci_dev;
   34.28  }
    35.1 --- a/tools/ioemu/hw/pt-msi.c	Fri Sep 12 14:32:45 2008 +0900
    35.2 +++ b/tools/ioemu/hw/pt-msi.c	Fri Sep 12 14:47:40 2008 +0900
    35.3 @@ -313,7 +313,7 @@ int pt_msix_init(struct pt_dev *dev, int
    35.4  
    35.5      table_off = pci_read_long(pd, pos + PCI_MSIX_TABLE);
    35.6      bar_index = dev->msix->bar_index = table_off & PCI_MSIX_BIR;
    35.7 -    table_off &= table_off & ~PCI_MSIX_BIR;
    35.8 +    table_off = dev->msix->table_off = table_off & ~PCI_MSIX_BIR;
    35.9      dev->msix->table_base = dev->pci_dev->base_addr[bar_index];
   35.10      PT_LOG("get MSI-X table bar base %llx\n",
   35.11             (unsigned long long)dev->msix->table_base);
    36.1 --- a/tools/ioemu/hw/vga.c	Fri Sep 12 14:32:45 2008 +0900
    36.2 +++ b/tools/ioemu/hw/vga.c	Fri Sep 12 14:47:40 2008 +0900
    36.3 @@ -2080,7 +2080,13 @@ void xen_vga_vram_map(uint64_t vram_addr
    36.4  
    36.5      if (copy)
    36.6          memcpy(vram, xen_vga_state->vram_ptr, VGA_RAM_SIZE);
    36.7 -    qemu_free(xen_vga_state->vram_ptr);
    36.8 +    if (xen_vga_state->vram_mfns) {
    36.9 +        /* In case this function is called more than once */
   36.10 +        free(xen_vga_state->vram_mfns);
   36.11 +        munmap(xen_vga_state->vram_ptr, VGA_RAM_SIZE);
   36.12 +    } else {
   36.13 +        qemu_free(xen_vga_state->vram_ptr);
   36.14 +    }
   36.15      xen_vga_state->vram_ptr = vram;
   36.16      xen_vga_state->vram_mfns = pfn_list;
   36.17  #ifdef CONFIG_STUBDOM
    37.1 --- a/tools/ioemu/hw/xen_machine_fv.c	Fri Sep 12 14:32:45 2008 +0900
    37.2 +++ b/tools/ioemu/hw/xen_machine_fv.c	Fri Sep 12 14:47:40 2008 +0900
    37.3 @@ -139,8 +139,10 @@ uint8_t *qemu_map_cache(target_phys_addr
    37.4          !test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping))
    37.5          qemu_remap_bucket(entry, address_index);
    37.6  
    37.7 -    if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping))
    37.8 +    if (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) {
    37.9 +        last_address_index = ~0UL;
   37.10          return NULL;
   37.11 +    }
   37.12  
   37.13      last_address_index = address_index;
   37.14      last_address_vaddr = entry->vaddr_base;
    38.1 --- a/tools/ioemu/vl.h	Fri Sep 12 14:32:45 2008 +0900
    38.2 +++ b/tools/ioemu/vl.h	Fri Sep 12 14:47:40 2008 +0900
    38.3 @@ -812,8 +812,6 @@ struct PCIDevice {
    38.4      /* do not access the following fields */
    38.5      PCIConfigReadFunc *config_read;
    38.6      PCIConfigWriteFunc *config_write;
    38.7 -    /* ??? This is a PC-specific hack, and should be removed.  */
    38.8 -    int irq_index;
    38.9  
   38.10      /* Current IRQ levels.  Used internally by the generic PCI code.  */
   38.11      int irq_state[4];
    39.1 --- a/tools/libxc/ia64/xc_ia64_linux_save.c	Fri Sep 12 14:32:45 2008 +0900
    39.2 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c	Fri Sep 12 14:47:40 2008 +0900
    39.3 @@ -53,12 +53,12 @@ static inline void set_bit(int nr, volat
    39.4  }
    39.5  
    39.6  static int
    39.7 -suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
    39.8 +suspend_and_state(int (*suspend)(void), int xc_handle, int io_fd,
    39.9                    int dom, xc_dominfo_t *info)
   39.10  {
   39.11      int i = 0;
   39.12  
   39.13 -    if (!(*suspend)(dom)) {
   39.14 +    if (!(*suspend)()) {
   39.15          ERROR("Suspend request failed");
   39.16          return -1;
   39.17      }
   39.18 @@ -406,7 +406,7 @@ out:
   39.19  
   39.20  int
   39.21  xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   39.22 -               uint32_t max_factor, uint32_t flags, int (*suspend)(int),
   39.23 +               uint32_t max_factor, uint32_t flags, int (*suspend)(void),
   39.24                 int hvm, void *(*init_qemu_maps)(int, unsigned),
   39.25                 void (*qemu_flip_buffer)(int, int))
   39.26  {
    40.1 --- a/tools/libxc/xc_domain_save.c	Fri Sep 12 14:32:45 2008 +0900
    40.2 +++ b/tools/libxc/xc_domain_save.c	Fri Sep 12 14:47:40 2008 +0900
    40.3 @@ -338,72 +338,23 @@ static int analysis_phase(int xc_handle,
    40.4  }
    40.5  
    40.6  
    40.7 -static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
    40.8 +static int suspend_and_state(int (*suspend)(void), int xc_handle, int io_fd,
    40.9                               int dom, xc_dominfo_t *info)
   40.10  {
   40.11 -    int i = 0;
   40.12 -
   40.13 -    if ( !(*suspend)(dom) )
   40.14 +    if ( !(*suspend)() )
   40.15      {
   40.16          ERROR("Suspend request failed");
   40.17          return -1;
   40.18      }
   40.19  
   40.20 - retry:
   40.21 -
   40.22 -    if ( xc_domain_getinfo(xc_handle, dom, 1, info) != 1 )
   40.23 +    if ( (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) ||
   40.24 +         !info->shutdown || (info->shutdown_reason != SHUTDOWN_suspend) )
   40.25      {
   40.26 -        ERROR("Could not get domain info");
   40.27 -        return -1;
   40.28 -    }
   40.29 -
   40.30 -    if ( info->dying )
   40.31 -    {
   40.32 -        ERROR("domain is dying");
   40.33 -        return -1;
   40.34 -    }
   40.35 -
   40.36 -    if ( info->crashed )
   40.37 -    {
   40.38 -        ERROR("domain has crashed");
   40.39 +        ERROR("Domain not in suspended state");
   40.40          return -1;
   40.41      }
   40.42  
   40.43 -    if ( info->shutdown )
   40.44 -    {
   40.45 -        switch ( info->shutdown_reason )
   40.46 -        {
   40.47 -        case SHUTDOWN_poweroff:
   40.48 -        case SHUTDOWN_reboot:
   40.49 -            ERROR("domain has shut down");
   40.50 -            return -1;
   40.51 -        case SHUTDOWN_suspend:
   40.52 -            return 0;
   40.53 -        case SHUTDOWN_crash:
   40.54 -            ERROR("domain has crashed");
   40.55 -            return -1;
   40.56 -        }
   40.57 -    }
   40.58 -
   40.59 -    if ( info->paused )
   40.60 -    {
   40.61 -        /* Try unpausing domain, wait, and retest. */
   40.62 -        xc_domain_unpause( xc_handle, dom );
   40.63 -        ERROR("Domain was paused. Wait and re-test.");
   40.64 -        usleep(10000); /* 10ms */
   40.65 -        goto retry;
   40.66 -    }
   40.67 -
   40.68 -    if ( ++i < 100 )
   40.69 -    {
   40.70 -        ERROR("Retry suspend domain");
   40.71 -        usleep(10000); /* 10ms */
   40.72 -        goto retry;
   40.73 -    }
   40.74 -
   40.75 -    ERROR("Unable to suspend domain.");
   40.76 -
   40.77 -    return -1;
   40.78 +    return 0;
   40.79  }
   40.80  
   40.81  /*
   40.82 @@ -796,7 +747,7 @@ static xen_pfn_t *map_and_save_p2m_table
   40.83  
   40.84  
   40.85  int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
   40.86 -                   uint32_t max_factor, uint32_t flags, int (*suspend)(int),
   40.87 +                   uint32_t max_factor, uint32_t flags, int (*suspend)(void),
   40.88                     int hvm, void *(*init_qemu_maps)(int, unsigned), 
   40.89                     void (*qemu_flip_buffer)(int, int))
   40.90  {
    41.1 --- a/tools/libxc/xc_evtchn.c	Fri Sep 12 14:32:45 2008 +0900
    41.2 +++ b/tools/libxc/xc_evtchn.c	Fri Sep 12 14:47:40 2008 +0900
    41.3 @@ -59,17 +59,8 @@ int xc_evtchn_reset(int xc_handle,
    41.4      return do_evtchn_op(xc_handle, EVTCHNOP_reset, &arg, sizeof(arg), 0);
    41.5  }
    41.6  
    41.7 -int xc_evtchn_status(int xc_handle,
    41.8 -                     uint32_t dom,
    41.9 -                     uint32_t port)
   41.10 +int xc_evtchn_status(int xc_handle, xc_evtchn_status_t *status)
   41.11  {
   41.12 -    int rc;
   41.13 -    struct evtchn_status arg = { .dom = (domid_t)dom,
   41.14 -                                 .port = (evtchn_port_t)port };
   41.15 -
   41.16 -    rc = do_evtchn_op(xc_handle, EVTCHNOP_status, &arg, sizeof(arg), 1);
   41.17 -    if ( rc == 0 )
   41.18 -        rc = arg.status;
   41.19 -
   41.20 -    return rc;
   41.21 +    return do_evtchn_op(xc_handle, EVTCHNOP_status, status,
   41.22 +                        sizeof(*status), 1);
   41.23  }
    42.1 --- a/tools/libxc/xc_private.c	Fri Sep 12 14:32:45 2008 +0900
    42.2 +++ b/tools/libxc/xc_private.c	Fri Sep 12 14:47:40 2008 +0900
    42.3 @@ -307,6 +307,13 @@ int xc_memory_op(int xc_handle,
    42.4              goto out1;
    42.5          }
    42.6          break;
    42.7 +    case XENMEM_remove_from_physmap:
    42.8 +        if ( lock_pages(arg, sizeof(struct xen_remove_from_physmap)) )
    42.9 +        {
   42.10 +            PERROR("Could not lock");
   42.11 +            goto out1;
   42.12 +        }
   42.13 +        break;
   42.14      case XENMEM_current_reservation:
   42.15      case XENMEM_maximum_reservation:
   42.16      case XENMEM_maximum_gpfn:
   42.17 @@ -340,6 +347,9 @@ int xc_memory_op(int xc_handle,
   42.18      case XENMEM_add_to_physmap:
   42.19          unlock_pages(arg, sizeof(struct xen_add_to_physmap));
   42.20          break;
   42.21 +    case XENMEM_remove_from_physmap:
   42.22 +        unlock_pages(arg, sizeof(struct xen_remove_from_physmap));
   42.23 +        break;
   42.24      case XENMEM_current_reservation:
   42.25      case XENMEM_maximum_reservation:
   42.26      case XENMEM_maximum_gpfn:
    43.1 --- a/tools/libxc/xenctrl.h	Fri Sep 12 14:32:45 2008 +0900
    43.2 +++ b/tools/libxc/xenctrl.h	Fri Sep 12 14:47:40 2008 +0900
    43.3 @@ -502,9 +502,9 @@ xc_evtchn_alloc_unbound(int xc_handle,
    43.4  
    43.5  int xc_evtchn_reset(int xc_handle,
    43.6                      uint32_t dom);
    43.7 -int xc_evtchn_status(int xc_handle,
    43.8 -                     uint32_t dom,
    43.9 -                     uint32_t port);
   43.10 +
   43.11 +typedef struct evtchn_status xc_evtchn_status_t;
   43.12 +int xc_evtchn_status(int xc_handle, xc_evtchn_status_t *status);
   43.13  
   43.14  /*
   43.15   * Return a handle to the event channel driver, or -1 on failure, in which case
    44.1 --- a/tools/libxc/xenguest.h	Fri Sep 12 14:32:45 2008 +0900
    44.2 +++ b/tools/libxc/xenguest.h	Fri Sep 12 14:47:40 2008 +0900
    44.3 @@ -25,7 +25,7 @@
    44.4   */
    44.5  int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
    44.6                     uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */,
    44.7 -                   int (*suspend)(int domid), int hvm,
    44.8 +                   int (*suspend)(void), int hvm,
    44.9                     void *(*init_qemu_maps)(int, unsigned),  /* HVM only */
   44.10                     void (*qemu_flip_buffer)(int, int));     /* HVM only */
   44.11  
    45.1 --- a/tools/python/Makefile	Fri Sep 12 14:32:45 2008 +0900
    45.2 +++ b/tools/python/Makefile	Fri Sep 12 14:47:40 2008 +0900
    45.3 @@ -1,14 +1,6 @@
    45.4  XEN_ROOT = ../..
    45.5  include $(XEN_ROOT)/tools/Rules.mk
    45.6  
    45.7 -XEN_SECURITY_MODULE = dummy
    45.8 -ifeq ($(FLASK_ENABLE),y)
    45.9 -XEN_SECURITY_MODULE = flask
   45.10 -endif
   45.11 -ifeq ($(ACM_SECURITY),y)
   45.12 -XEN_SECURITY_MODULE = acm
   45.13 -endif
   45.14 -
   45.15  .PHONY: all
   45.16  all: build
   45.17  
   45.18 @@ -23,8 +15,8 @@ CATALOGS = $(patsubst %,xen/xm/messages/
   45.19  NLSDIR = /usr/share/locale
   45.20  
   45.21  .PHONY: build buildpy
   45.22 -buildpy: xsm.py
   45.23 -	CC="$(CC)" CFLAGS="$(CFLAGS)" XEN_SECURITY_MODULE="$(XEN_SECURITY_MODULE)" python setup.py build
   45.24 +buildpy: 
   45.25 +	CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build
   45.26  
   45.27  build: buildpy refresh-pot refresh-po $(CATALOGS)
   45.28  
   45.29 @@ -61,18 +53,6 @@ refresh-po: $(POTFILE)
   45.30  %.mo: %.po
   45.31  	$(MSGFMT) -c -o $@ $<
   45.32  
   45.33 -xsm.py:
   45.34 -	@(set -e; \
   45.35 -	  echo "XEN_SECURITY_MODULE = \""$(XEN_SECURITY_MODULE)"\""; \
   45.36 -	  echo "from xsm_core import *"; \
   45.37 -	  echo ""; \
   45.38 -	  echo "import xen.util.xsm."$(XEN_SECURITY_MODULE)"."$(XEN_SECURITY_MODULE)" as xsm_module"; \
   45.39 -	  echo ""; \
   45.40 -	  echo "xsm_init(xsm_module)"; \
   45.41 -	  echo "from xen.util.xsm."$(XEN_SECURITY_MODULE)"."$(XEN_SECURITY_MODULE)" import *"; \
   45.42 -	  echo "del xsm_module"; \
   45.43 -	  echo "") >xen/util/xsm/$@
   45.44 -
   45.45  .PHONY: install
   45.46  ifndef XEN_PYTHON_NATIVE_INSTALL
   45.47  install: LIBPATH=$(shell PYTHONPATH=xen/util python -c "import auxbin; print auxbin.libpath()")
   45.48 @@ -104,4 +84,4 @@ test:
   45.49  
   45.50  .PHONY: clean
   45.51  clean:
   45.52 -	rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/xsm/xsm.py xen/util/auxbin.pyc
   45.53 +	rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/auxbin.pyc
    46.1 --- a/tools/python/xen/util/xsconstants.py	Fri Sep 12 14:32:45 2008 +0900
    46.2 +++ b/tools/python/xen/util/xsconstants.py	Fri Sep 12 14:47:40 2008 +0900
    46.3 @@ -20,8 +20,10 @@ XS_INST_NONE = 0
    46.4  XS_INST_BOOT = (1 << 0)
    46.5  XS_INST_LOAD = (1 << 1)
    46.6  
    46.7 -XS_POLICY_NONE  = 0
    46.8  XS_POLICY_ACM = (1 << 0)
    46.9 +XS_POLICY_FLASK = (1 << 1)
   46.10 +XS_POLICY_DUMMY  = (1 << 2)
   46.11 +XS_POLICY_USE = 0
   46.12  
   46.13  # Some internal variables used by the Xen-API
   46.14  ACM_LABEL_VM  = (1 << 0)
   46.15 @@ -107,6 +109,6 @@ ACM_POLICY_ID = 'ACM'
   46.16  
   46.17  INVALID_POLICY_PREFIX = 'INV_'
   46.18  
   46.19 -INVALID_SSIDREF = 0xFFFFFFFF
   46.20 +INVALID_SSIDREF = 0xFFFFFFFFL
   46.21  
   46.22  XS_INACCESSIBLE_LABEL = '__INACCESSIBLE__'
    47.1 --- a/tools/python/xen/util/xsm/flask/flask.py	Fri Sep 12 14:32:45 2008 +0900
    47.2 +++ b/tools/python/xen/util/xsm/flask/flask.py	Fri Sep 12 14:47:40 2008 +0900
    47.3 @@ -1,5 +1,6 @@
    47.4  import sys
    47.5  from xen.lowlevel import flask
    47.6 +from xen.util import xsconstants
    47.7  from xen.xend import sxp
    47.8  
    47.9  #Functions exported through XML-RPC
   47.10 @@ -12,7 +13,7 @@ def err(msg):
   47.11      raise XSMError(msg)
   47.12  
   47.13  def on():
   47.14 -    return 0 #xsconstants.XS_POLICY_FLASK
   47.15 +    return xsconstants.XS_POLICY_FLASK
   47.16  
   47.17  def ssidref2label(ssidref):
   47.18      try:
   47.19 @@ -37,8 +38,9 @@ def set_security_label(policy, label):
   47.20      return label
   47.21  
   47.22  def ssidref2security_label(ssidref):
   47.23 -    return ssidref2label(ssidref)
   47.24 +    label = ssidref2label(ssidref)
   47.25 +    return label
   47.26  
   47.27  def get_security_label(self, xspol=None):
   47.28 -    label = self.info.get('security_label', '')
   47.29 +    label = self.info['security_label']
   47.30      return label
    48.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    48.2 +++ b/tools/python/xen/util/xsm/xsm.py	Fri Sep 12 14:47:40 2008 +0900
    48.3 @@ -0,0 +1,20 @@
    48.4 +import sys
    48.5 +import string
    48.6 +from xen.xend import XendOptions
    48.7 +from xen.util import xsconstants
    48.8 +from xsm_core import xsm_init
    48.9 +
   48.10 +xoptions = XendOptions.instance()
   48.11 +xsm_module_name = xoptions.get_xsm_module_name()
   48.12 +
   48.13 +xsconstants.XS_POLICY_USE = eval("xsconstants.XS_POLICY_" +
   48.14 +                                 string.upper(xsm_module_name))
   48.15 +
   48.16 +xsm_module_path = "xen.util.xsm." + xsm_module_name + "." + xsm_module_name
   48.17 +xsm_module = __import__(xsm_module_path, globals(), locals(), ['*'])
   48.18 +
   48.19 +xsm_init(xsm_module)
   48.20 +
   48.21 +for op in dir(xsm_module):
   48.22 +    if not hasattr(sys.modules[__name__], op):
   48.23 +        setattr(sys.modules[__name__], op, getattr(xsm_module, op, None))
    49.1 --- a/tools/python/xen/xend/XendConfig.py	Fri Sep 12 14:32:45 2008 +0900
    49.2 +++ b/tools/python/xen/xend/XendConfig.py	Fri Sep 12 14:47:40 2008 +0900
    49.3 @@ -729,7 +729,7 @@ class XendConfig(dict):
    49.4              self.parse_cpuid(cfg, 'cpuid_check')
    49.5  
    49.6          import xen.util.xsm.xsm as security
    49.7 -        if security.on() == xsconstants.XS_POLICY_ACM:
    49.8 +        if security.on() == xsconstants.XS_POLICY_USE:
    49.9              from xen.util.acmpolicy import ACM_LABEL_UNLABELED
   49.10              if not 'security' in cfg and sxp.child_value(sxp_cfg, 'security'):
   49.11                  cfg['security'] = sxp.child_value(sxp_cfg, 'security')
    50.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Fri Sep 12 14:32:45 2008 +0900
    50.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Fri Sep 12 14:47:40 2008 +0900
    50.3 @@ -2069,7 +2069,7 @@ class XendDomainInfo:
    50.4          balloon.free(2*1024) # 2MB should be plenty
    50.5  
    50.6          ssidref = 0
    50.7 -        if security.on() == xsconstants.XS_POLICY_ACM:
    50.8 +        if security.on() == xsconstants.XS_POLICY_USE:
    50.9              ssidref = security.calc_dom_ssidref_from_info(self.info)
   50.10              if security.has_authorization(ssidref) == False:
   50.11                  raise VmError("VM is not authorized to run.")
   50.12 @@ -2855,10 +2855,6 @@ class XendDomainInfo:
   50.13              info["maxmem_kb"] = XendNode.instance() \
   50.14                                  .physinfo_dict()['total_memory'] * 1024
   50.15  
   50.16 -        #ssidref field not used any longer
   50.17 -        if 'ssidref' in info:
   50.18 -            info.pop('ssidref')
   50.19 -
   50.20          # make sure state is reset for info
   50.21          # TODO: we should eventually get rid of old_dom_states
   50.22  
    51.1 --- a/tools/python/xen/xend/XendOptions.py	Fri Sep 12 14:32:45 2008 +0900
    51.2 +++ b/tools/python/xen/xend/XendOptions.py	Fri Sep 12 14:47:40 2008 +0900
    51.3 @@ -132,6 +132,9 @@ class XendOptions:
    51.4      """Default script to configure a backend network interface"""
    51.5      vif_script = osdep.vif_script
    51.6  
    51.7 +    """Default Xen Security Module"""
    51.8 +    xsm_module_default = 'dummy'
    51.9 +
   51.10      """Default rotation count of qemu-dm log file."""
   51.11      qemu_dm_logrotate_count = 10
   51.12  
   51.13 @@ -427,6 +430,11 @@ class XendOptionsFile(XendOptions):
   51.14          return self.get_config_value('xen-api-server',
   51.15                                       self.xen_api_server_default)
   51.16  
   51.17 +    def get_xsm_module_name(self):
   51.18 +        """Get the Xen Security Module name.
   51.19 +        """
   51.20 +        return self.get_config_string('xsm_module_name', self.xsm_module_default)
   51.21 +
   51.22  if os.uname()[0] == 'SunOS':
   51.23      class XendOptionsSMF(XendOptions):
   51.24  
    52.1 --- a/tools/python/xen/xend/server/blkif.py	Fri Sep 12 14:32:45 2008 +0900
    52.2 +++ b/tools/python/xen/xend/server/blkif.py	Fri Sep 12 14:47:40 2008 +0900
    52.3 @@ -78,7 +78,7 @@ class BlkifController(DevController):
    52.4          if uuid:
    52.5              back['uuid'] = uuid
    52.6  
    52.7 -        if security.on() == xsconstants.XS_POLICY_ACM:
    52.8 +        if security.on() == xsconstants.XS_POLICY_USE:
    52.9              self.do_access_control(config, uname)
   52.10  
   52.11          (device_path, devid) = blkif.blkdev_name_to_number(dev)
    53.1 --- a/tools/python/xen/xend/server/netif.py	Fri Sep 12 14:32:45 2008 +0900
    53.2 +++ b/tools/python/xen/xend/server/netif.py	Fri Sep 12 14:47:40 2008 +0900
    53.3 @@ -156,7 +156,7 @@ class NetifController(DevController):
    53.4              front = { 'handle' : "%i" % devid,
    53.5                        'mac'    : mac }
    53.6  
    53.7 -        if security.on() == xsconstants.XS_POLICY_ACM:
    53.8 +        if security.on() == xsconstants.XS_POLICY_USE:
    53.9              self.do_access_control(config)
   53.10  
   53.11          return (devid, back, front)
    54.1 --- a/tools/python/xen/xend/server/pciif.py	Fri Sep 12 14:32:45 2008 +0900
    54.2 +++ b/tools/python/xen/xend/server/pciif.py	Fri Sep 12 14:47:40 2008 +0900
    54.3 @@ -286,7 +286,7 @@ class PciController(DevController):
    54.4                      )%(dev.name))
    54.5  
    54.6          if dev.has_non_page_aligned_bar and arch.type != "ia64":
    54.7 -            raise VmError("pci: %: non-page-aligned MMIO BAR found." % dev.name)
    54.8 +            raise VmError("pci: %s: non-page-aligned MMIO BAR found." % dev.name)
    54.9  
   54.10          self.CheckSiblingDevices(fe_domid, dev)
   54.11  
    55.1 --- a/tools/python/xen/xm/create.py	Fri Sep 12 14:32:45 2008 +0900
    55.2 +++ b/tools/python/xen/xm/create.py	Fri Sep 12 14:47:40 2008 +0900
    55.3 @@ -566,11 +566,11 @@ gopts.var('hap', val='HAP',
    55.4            use="""Hap status (0=hap is disabled;
    55.5            1=hap is enabled.""")
    55.6  
    55.7 -gopts.var('cpuid', val="IN[,SIN]:eax=EAX,ebx=EBX,exc=ECX,edx=EDX",
    55.8 +gopts.var('cpuid', val="IN[,SIN]:eax=EAX,ebx=EBX,ecx=ECX,edx=EDX",
    55.9            fn=append_value, default=[],
   55.10            use="""Cpuid description.""")
   55.11  
   55.12 -gopts.var('cpuid_check', val="IN[,SIN]:eax=EAX,ebx=EBX,exc=ECX,edx=EDX",
   55.13 +gopts.var('cpuid_check', val="IN[,SIN]:eax=EAX,ebx=EBX,ecx=ECX,edx=EDX",
   55.14            fn=append_value, default=[],
   55.15            use="""Cpuid check description.""")
   55.16  
   55.17 @@ -971,7 +971,7 @@ def preprocess_cpuid(vals, attr_name):
   55.18                          "of the register %s for input %s\n"
   55.19                          % (res['reg'], input) )
   55.20                  cpuid[input][res['reg']] = res['val'] # new register
   55.21 -    setattr(vals, attr_name, cpuid)
   55.22 +            setattr(vals, attr_name, cpuid)
   55.23  
   55.24  def preprocess_pci(vals):
   55.25      if not vals.pci: return
    56.1 --- a/tools/python/xen/xm/main.py	Fri Sep 12 14:32:45 2008 +0900
    56.2 +++ b/tools/python/xen/xm/main.py	Fri Sep 12 14:47:40 2008 +0900
    56.3 @@ -1812,7 +1812,7 @@ def domain_name_to_domid(domain_name):
    56.4      else:
    56.5          dom = server.xend.domain(domain_name)
    56.6          domid = int(sxp.child_value(dom, 'domid', '-1'))
    56.7 -    return domid
    56.8 +    return int(domid)
    56.9  
   56.10  def xm_vncviewer(args):
   56.11      autopass = False;
    57.1 --- a/tools/xcutils/lsevtchn.c	Fri Sep 12 14:32:45 2008 +0900
    57.2 +++ b/tools/xcutils/lsevtchn.c	Fri Sep 12 14:47:40 2008 +0900
    57.3 @@ -8,49 +8,55 @@
    57.4  #include <xenctrl.h>
    57.5  #include <xenguest.h>
    57.6  
    57.7 -int
    57.8 -main(int argc, char **argv)
    57.9 +int main(int argc, char **argv)
   57.10  {
   57.11 -    int xc_fd;
   57.12 -    int domid = 0, port = 0, status;
   57.13 -    const char *msg;
   57.14 +    int xc_fd, domid, port, rc;
   57.15 +    xc_evtchn_status_t status;
   57.16  
   57.17 -    if ( argc > 1 )
   57.18 -        domid = strtol(argv[1], NULL, 10);
   57.19 +    domid = (argc > 1) ? strtol(argv[1], NULL, 10) : 0;
   57.20  
   57.21      xc_fd = xc_interface_open();
   57.22      if ( xc_fd < 0 )
   57.23          errx(1, "failed to open control interface");
   57.24  
   57.25 -    while ( (status = xc_evtchn_status(xc_fd, domid, port)) >= 0 )
   57.26 +    for ( port = 0; ; port++ )
   57.27      {
   57.28 -        switch ( status )
   57.29 +        status.dom = domid;
   57.30 +        status.port = port;
   57.31 +        rc = xc_evtchn_status(xc_fd, &status);
   57.32 +        if ( rc < 0 )
   57.33 +            break;
   57.34 +
   57.35 +        if ( status.status == EVTCHNSTAT_closed )
   57.36 +            continue;
   57.37 +
   57.38 +        printf("%4d: VCPU %u: ", port, status.vcpu);
   57.39 +
   57.40 +        switch ( status.status )
   57.41          {
   57.42 -        case EVTCHNSTAT_closed:
   57.43 -            msg = "Channel is not in use.";
   57.44 -            break;
   57.45          case EVTCHNSTAT_unbound:
   57.46 -            msg = "Channel is waiting interdom connection.";
   57.47 +            printf("Interdomain (Waiting connection) - Remote Domain %u",
   57.48 +                   status.u.unbound.dom);
   57.49              break;
   57.50          case EVTCHNSTAT_interdomain:
   57.51 -            msg = "Channel is connected to remote domain.";
   57.52 +            printf("Interdomain (Connected) - Remote Domain %u, Port %u",
   57.53 +                   status.u.interdomain.dom, status.u.interdomain.port);
   57.54              break;
   57.55          case EVTCHNSTAT_pirq:
   57.56 -            msg = "Channel is bound to a phys IRQ line.";
   57.57 +            printf("Physical IRQ %u", status.u.pirq);
   57.58              break;
   57.59          case EVTCHNSTAT_virq:
   57.60 -            msg = "Channel is bound to a virtual IRQ line.";
   57.61 +            printf("Virtual IRQ %u", status.u.virq);
   57.62              break;
   57.63          case EVTCHNSTAT_ipi:
   57.64 -            msg = "Channel is bound to a virtual IPI line.";
   57.65 +            printf("IPI");
   57.66              break;
   57.67          default:
   57.68 -            msg = "Unknown.";
   57.69 +            printf("Unknown");
   57.70              break;
   57.71 +        }
   57.72  
   57.73 -        }
   57.74 -        printf("%03d: %d: %s\n", port, status, msg);
   57.75 -        port++;
   57.76 +        printf("\n");
   57.77      }
   57.78  
   57.79      xc_interface_close(xc_fd);
    58.1 --- a/tools/xcutils/xc_save.c	Fri Sep 12 14:32:45 2008 +0900
    58.2 +++ b/tools/xcutils/xc_save.c	Fri Sep 12 14:47:40 2008 +0900
    58.3 @@ -32,7 +32,7 @@ static struct suspendinfo {
    58.4   * Issue a suspend request through stdout, and receive the acknowledgement
    58.5   * from stdin.  This is handled by XendCheckpoint in the Python layer.
    58.6   */
    58.7 -static int compat_suspend(int domid)
    58.8 +static int compat_suspend(void)
    58.9  {
   58.10      char ans[30];
   58.11  
   58.12 @@ -43,20 +43,39 @@ static int compat_suspend(int domid)
   58.13              !strncmp(ans, "done\n", 5));
   58.14  }
   58.15  
   58.16 -static int suspend_evtchn_release(int xc, int domid)
   58.17 +static int suspend_evtchn_release(void)
   58.18  {
   58.19      if (si.suspend_evtchn >= 0) {
   58.20 -	xc_evtchn_unbind(si.xce, si.suspend_evtchn);
   58.21 -	si.suspend_evtchn = -1;
   58.22 +        xc_evtchn_unbind(si.xce, si.suspend_evtchn);
   58.23 +        si.suspend_evtchn = -1;
   58.24      }
   58.25      if (si.xce >= 0) {
   58.26 -	xc_evtchn_close(si.xce);
   58.27 -	si.xce = -1;
   58.28 +        xc_evtchn_close(si.xce);
   58.29 +        si.xce = -1;
   58.30      }
   58.31  
   58.32      return 0;
   58.33  }
   58.34  
   58.35 +static int await_suspend(void)
   58.36 +{
   58.37 +    int rc;
   58.38 +
   58.39 +    do {
   58.40 +        rc = xc_evtchn_pending(si.xce);
   58.41 +        if (rc < 0) {
   58.42 +            warnx("error polling suspend notification channel: %d", rc);
   58.43 +            return -1;
   58.44 +        }
   58.45 +    } while (rc != si.suspend_evtchn);
   58.46 +
   58.47 +    /* harmless for one-off suspend */
   58.48 +    if (xc_evtchn_unmask(si.xce, si.suspend_evtchn) < 0)
   58.49 +        warnx("failed to unmask suspend notification channel: %d", rc);
   58.50 +
   58.51 +    return 0;
   58.52 +}
   58.53 +
   58.54  static int suspend_evtchn_init(int xc, int domid)
   58.55  {
   58.56      struct xs_handle *xs;
   58.57 @@ -71,16 +90,16 @@ static int suspend_evtchn_init(int xc, i
   58.58  
   58.59      xs = xs_daemon_open();
   58.60      if (!xs) {
   58.61 -	errx(1, "failed to get xenstore handle");
   58.62 -	return -1;
   58.63 +        warnx("failed to get xenstore handle");
   58.64 +        return -1;
   58.65      }
   58.66      sprintf(path, "/local/domain/%d/device/suspend/event-channel", domid);
   58.67      portstr = xs_read(xs, XBT_NULL, path, &plen);
   58.68      xs_daemon_close(xs);
   58.69  
   58.70      if (!portstr || !plen) {
   58.71 -	warnx("could not read suspend event channel");
   58.72 -	return -1;
   58.73 +        warnx("could not read suspend event channel");
   58.74 +        return -1;
   58.75      }
   58.76  
   58.77      port = atoi(portstr);
   58.78 @@ -88,27 +107,29 @@ static int suspend_evtchn_init(int xc, i
   58.79  
   58.80      si.xce = xc_evtchn_open();
   58.81      if (si.xce < 0) {
   58.82 -	errx(1, "failed to open event channel handle");
   58.83 -	goto cleanup;
   58.84 +        warnx("failed to open event channel handle");
   58.85 +        goto cleanup;
   58.86      }
   58.87  
   58.88      si.suspend_evtchn = xc_evtchn_bind_interdomain(si.xce, domid, port);
   58.89      if (si.suspend_evtchn < 0) {
   58.90 -	errx(1, "failed to bind suspend event channel: %d",
   58.91 -	     si.suspend_evtchn);
   58.92 -	goto cleanup;
   58.93 +        warnx("failed to bind suspend event channel: %d", si.suspend_evtchn);
   58.94 +        goto cleanup;
   58.95      }
   58.96  
   58.97      rc = xc_domain_subscribe_for_suspend(xc, domid, port);
   58.98      if (rc < 0) {
   58.99 -	errx(1, "failed to subscribe to domain: %d", rc);
  58.100 -	goto cleanup;
  58.101 +        warnx("failed to subscribe to domain: %d", rc);
  58.102 +        goto cleanup;
  58.103      }
  58.104  
  58.105 +    /* event channel is pending immediately after binding */
  58.106 +    await_suspend();
  58.107 +
  58.108      return 0;
  58.109  
  58.110    cleanup:
  58.111 -    suspend_evtchn_release(xc, domid);
  58.112 +    suspend_evtchn_release();
  58.113  
  58.114      return -1;
  58.115  }
  58.116 @@ -116,29 +137,20 @@ static int suspend_evtchn_init(int xc, i
  58.117  /**
  58.118   * Issue a suspend request to a dedicated event channel in the guest, and
  58.119   * receive the acknowledgement from the subscribe event channel. */
  58.120 -static int evtchn_suspend(int domid)
  58.121 +static int evtchn_suspend(void)
  58.122  {
  58.123 -    int xcefd;
  58.124      int rc;
  58.125  
  58.126      rc = xc_evtchn_notify(si.xce, si.suspend_evtchn);
  58.127      if (rc < 0) {
  58.128 -	errx(1, "failed to notify suspend request channel: %d", rc);
  58.129 -	return 0;
  58.130 +        warnx("failed to notify suspend request channel: %d", rc);
  58.131 +        return 0;
  58.132      }
  58.133  
  58.134 -    xcefd = xc_evtchn_fd(si.xce);
  58.135 -    do {
  58.136 -      rc = xc_evtchn_pending(si.xce);
  58.137 -      if (rc < 0) {
  58.138 -	errx(1, "error polling suspend notification channel: %d", rc);
  58.139 -	return 0;
  58.140 -      }
  58.141 -    } while (rc != si.suspend_evtchn);
  58.142 -
  58.143 -    /* harmless for one-off suspend */
  58.144 -    if (xc_evtchn_unmask(si.xce, si.suspend_evtchn) < 0)
  58.145 -	errx(1, "failed to unmask suspend notification channel: %d", rc);
  58.146 +    if (await_suspend() < 0) {
  58.147 +        warnx("suspend failed");
  58.148 +        return 0;
  58.149 +    }
  58.150  
  58.151      /* notify xend that it can do device migration */
  58.152      printf("suspended\n");
  58.153 @@ -147,12 +159,12 @@ static int evtchn_suspend(int domid)
  58.154      return 1;
  58.155  }
  58.156  
  58.157 -static int suspend(int domid)
  58.158 +static int suspend(void)
  58.159  {
  58.160      if (si.suspend_evtchn >= 0)
  58.161 -	return evtchn_suspend(domid);
  58.162 +        return evtchn_suspend();
  58.163  
  58.164 -    return compat_suspend(domid);
  58.165 +    return compat_suspend();
  58.166  }
  58.167  
  58.168  /* For HVM guests, there are two sources of dirty pages: the Xen shadow
  58.169 @@ -195,11 +207,9 @@ static void qemu_flip_buffer(int domid, 
  58.170  
  58.171      /* Tell qemu that we want it to start writing log-dirty bits to the
  58.172       * other buffer */
  58.173 -    if (!xs_write(xs, XBT_NULL, qemu_next_active_path, &digit, 1)) {
  58.174 +    if (!xs_write(xs, XBT_NULL, qemu_next_active_path, &digit, 1))
  58.175          errx(1, "can't write next-active to store path (%s)\n", 
  58.176 -              qemu_next_active_path);
  58.177 -        exit(1);
  58.178 -    }
  58.179 +             qemu_next_active_path);
  58.180  
  58.181      /* Wait a while for qemu to signal that it has switched to the new 
  58.182       * active buffer */
  58.183 @@ -208,10 +218,8 @@ static void qemu_flip_buffer(int domid, 
  58.184      tv.tv_usec = 0;
  58.185      FD_ZERO(&fdset);
  58.186      FD_SET(xs_fileno(xs), &fdset);
  58.187 -    if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1) {
  58.188 +    if ((select(xs_fileno(xs) + 1, &fdset, NULL, NULL, &tv)) != 1)
  58.189          errx(1, "timed out waiting for qemu to switch buffers\n");
  58.190 -        exit(1);
  58.191 -    }
  58.192      watch = xs_read_watch(xs, &len);
  58.193      free(watch);
  58.194      
  58.195 @@ -221,7 +229,7 @@ static void qemu_flip_buffer(int domid, 
  58.196          goto read_again;
  58.197  }
  58.198  
  58.199 -static void * init_qemu_maps(int domid, unsigned int bitmap_size)
  58.200 +static void *init_qemu_maps(int domid, unsigned int bitmap_size)
  58.201  {
  58.202      key_t key;
  58.203      char key_ascii[17] = {0,};
  58.204 @@ -293,7 +301,7 @@ main(int argc, char **argv)
  58.205      int ret;
  58.206  
  58.207      if (argc != 6)
  58.208 -	errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
  58.209 +        errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
  58.210  
  58.211      xc_fd = xc_interface_open();
  58.212      if (xc_fd < 0)
  58.213 @@ -305,13 +313,14 @@ main(int argc, char **argv)
  58.214      max_f = atoi(argv[4]);
  58.215      flags = atoi(argv[5]);
  58.216  
  58.217 -    suspend_evtchn_init(xc_fd, domid);
  58.218 +    if (suspend_evtchn_init(xc_fd, domid) < 0)
  58.219 +        warnx("suspend event channel initialization failed, using slow path");
  58.220  
  58.221      ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags, 
  58.222                           &suspend, !!(flags & XCFLAGS_HVM),
  58.223                           &init_qemu_maps, &qemu_flip_buffer);
  58.224  
  58.225 -    suspend_evtchn_release(xc_fd, domid);
  58.226 +    suspend_evtchn_release();
  58.227  
  58.228      xc_interface_close(xc_fd);
  58.229  
    59.1 --- a/tools/xenstore/xs.c	Fri Sep 12 14:32:45 2008 +0900
    59.2 +++ b/tools/xenstore/xs.c	Fri Sep 12 14:47:40 2008 +0900
    59.3 @@ -795,8 +795,11 @@ char *xs_get_domain_path(struct xs_handl
    59.4  
    59.5  bool xs_is_domain_introduced(struct xs_handle *h, unsigned int domid)
    59.6  {
    59.7 -	return strcmp("F",
    59.8 -		      single_with_domid(h, XS_IS_DOMAIN_INTRODUCED, domid));
    59.9 +	char *domain = single_with_domid(h, XS_IS_DOMAIN_INTRODUCED, domid);
   59.10 +	int rc = strcmp("F", domain);
   59.11 +
   59.12 +	free(domain);
   59.13 +	return rc;
   59.14  }
   59.15  
   59.16  /* Only useful for DEBUG versions */
    60.1 --- a/tools/xentrace/formats	Fri Sep 12 14:32:45 2008 +0900
    60.2 +++ b/tools/xentrace/formats	Fri Sep 12 14:47:40 2008 +0900
    60.3 @@ -4,56 +4,69 @@ 0x0001f001  CPU%(cpu)d  %(tsc)d (+%(relt
    60.4  0x0001f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  wrap_buffer       0x%(1)08x
    60.5  0x0001f003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  cpu_change        0x%(1)08x
    60.6  
    60.7 -0x0002f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_add_domain  [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
    60.8 -0x0002f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_rem_domain  [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
    60.9 -0x0002f003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_sleep      [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   60.10 -0x0002f004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_wake       [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   60.11 -0x0002f005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_yield          [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   60.12 -0x0002f006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_block          [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   60.13 -0x0002f007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_shutdown	  [ domid = 0x%(1)08x, edomid = 0x%(2)08x, reason = 0x%(3)08x ]
   60.14 -0x0002f008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_ctl
   60.15 -0x0002f009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_adjdom      [ domid = 0x%(1)08x ]
   60.16 -0x0002f00a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  __enter_scheduler [ prev<domid:edomid> = 0x%(1)08x : 0x%(2)08x, next<domid:edomid> = 0x%(3)08x : 0x%(4)08x ]
   60.17 -0x0002f00B  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  s_timer_fn
   60.18 -0x0002f00c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  t_timer_fn
   60.19 -0x0002f00d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  dom_timer_fn
   60.20 -0x0002f00e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infprev    [ old_domid = 0x%(1)08x, runtime = %(2)d ]
   60.21 -0x0002f00f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infnext    [ new_domid = 0x%(1)08x, time = %(2)d, r_time = %(3)d ]
   60.22 +0x00021011  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  running_to_runnable [ dom:vcpu = 0x%(1)08x ]
   60.23 +0x00021021  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  running_to_blocked  [ dom:vcpu = 0x%(1)08x ]
   60.24 +0x00021031  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  running_to_offline  [ dom:vcpu = 0x%(1)08x ]
   60.25 +0x00021101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  runnable_to_running [ dom:vcpu = 0x%(1)08x ]
   60.26 +0x00021121  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  runnable_to_blocked [ dom:vcpu = 0x%(1)08x ]
   60.27 +0x00021131  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  runnable_to_offline [ dom:vcpu = 0x%(1)08x ]
   60.28 +0x00021201  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  blocked_to_running  [ dom:vcpu = 0x%(1)08x ]
   60.29 +0x00021211  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  blocked_to_runnable [ dom:vcpu = 0x%(1)08x ]
   60.30 +0x00021231  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  blocked_to_offline  [ dom:vcpu = 0x%(1)08x ]
   60.31 +0x00021301  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  offline_to_running  [ dom:vcpu = 0x%(1)08x ]
   60.32 +0x00021311  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  offline_to_runnable [ dom:vcpu = 0x%(1)08x ]
   60.33 +0x00021321  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  offline_to_blocked  [ dom:vcpu = 0x%(1)08x ]
   60.34  
   60.35 -0x00081001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMENTRY     [ dom:vcpu = 0x%(1)08x ]
   60.36 -0x00081002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ dom:vcpu = 0x%(1)08x, exitcode = 0x%(2)08x, rIP  = 0x%(3)08x ]
   60.37 -0x00081102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ dom:vcpu = 0x%(1)08x, exitcode = 0x%(2)08x, rIP  = 0x%(3)016x ]
   60.38 -0x00082001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)08x ]
   60.39 -0x00082101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)016x ]
   60.40 -0x00082002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ dom:vcpu = 0x%(1)08x, errorcode = 0x%(2)02x, virt = 0x%(3)08x ]
   60.41 -0x00082102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ dom:vcpu = 0x%(1)08x,  errorcode = 0x%(2)02x, virt = 0x%(3)016x ]
   60.42 -0x00082003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_EXC     [ dom:vcpu = 0x%(1)08x, vector = 0x%(2)02x, errorcode = 0x%(3)04x ]
   60.43 -0x00082004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_VIRQ    [ dom:vcpu = 0x%(1)08x, vector = 0x%(2)02x, fake = %(3)d ]
   60.44 -0x00082005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  REINJ_VIRQ  [ dom:vcpu = 0x%(1)08x, vector = 0x%(2)02x ]
   60.45 -0x00082006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_READ     [ dom:vcpu = 0x%(1)08x, port = 0x%(2)04x, size = %(3)d ]
   60.46 -0x00082007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_WRITE    [ dom:vcpu = 0x%(1)08x, port = 0x%(2)04x, size = %(3)d ]
   60.47 -0x00082008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ dom:vcpu = 0x%(1)08x, CR# = %(2)d, value = 0x%(3)08x ]
   60.48 -0x00082108  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ dom:vcpu = 0x%(1)08x, CR# = %(2)d, value = 0x%(3)016x ]
   60.49 -0x00082009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ dom:vcpu = 0x%(1)08x, CR# = %(2)d, value = 0x%(3)08x ]
   60.50 -0x00082109  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ dom:vcpu = 0x%(1)08x, CR# = %(2)d, value = 0x%(3)016x ]
   60.51 -0x0008200A  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_READ     [ dom:vcpu = 0x%(1)08x ]
   60.52 -0x0008200B  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_WRITE    [ dom:vcpu = 0x%(1)08x ]
   60.53 -0x0008200C  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_READ    [ dom:vcpu = 0x%(1)08x, MSR# = 0x%(2)08x, value = 0x%(3)016x ]
   60.54 -0x0008200D  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_WRITE   [ dom:vcpu = 0x%(1)08x, MSR# = 0x%(2)08x, value = 0x%(3)016x ]
   60.55 -0x0008200E  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CPUID       [ dom:vcpu = 0x%(1)08x, func = 0x%(2)08x, eax = 0x%(3)08x, ebx = 0x%(4)08x, ecx=0x%(5)08x, edx = 0x%(6)08x ]
   60.56 -0x0008200F  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INTR        [ dom:vcpu = 0x%(1)08x, vector = 0x%(2)02x ]
   60.57 -0x00082010  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  NMI         [ dom:vcpu = 0x%(1)08x ]
   60.58 -0x00082011  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  SMI         [ dom:vcpu = 0x%(1)08x ]
   60.59 -0x00082012  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMMCALL     [ dom:vcpu = 0x%(1)08x, func = 0x%(2)08x ]
   60.60 -0x00082013  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  HLT         [ dom:vcpu = 0x%(1)08x, intpending = %(2)d ]
   60.61 -0x00082014  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ dom:vcpu = 0x%(1)08x, is invlpga? = %(2)d, virt = 0x%(3)08x ]
   60.62 -0x00082114  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ dom:vcpu = 0x%(1)08x, is invlpga? = %(2)d, virt = 0x%(3)016x ]
   60.63 -0x00082015  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MCE         [ dom:vcpu = 0x%(1)08x ]
   60.64 -0x00082016  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_ASSIST   [ dom:vcpu = 0x%(1)08x, data = 0x%(2)04x ]
   60.65 -0x00082017  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MMIO_ASSIST [ dom:vcpu = 0x%(1)08x, data = 0x%(2)04x ]
   60.66 -0x00082018  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CLTS        [ dom:vcpu = 0x%(1)08x ]
   60.67 -0x00082019  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ dom:vcpu = 0x%(1)08x, value = 0x%(2)08x ]
   60.68 -0x00082119  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ dom:vcpu = 0x%(1)08x, value = 0x%(2)016x ]
   60.69 +0x00028001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_add_domain  [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   60.70 +0x00028002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_rem_domain  [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   60.71 +0x00028003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_sleep      [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   60.72 +0x00028004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_wake       [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   60.73 +0x00028005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_yield          [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   60.74 +0x00028006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  do_block          [ domid = 0x%(1)08x, edomid = 0x%(2)08x ]
   60.75 +0x00028007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  domain_shutdown	  [ domid = 0x%(1)08x, edomid = 0x%(2)08x, reason = 0x%(3)08x ]
   60.76 +0x00028008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_ctl
   60.77 +0x00028009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  sched_adjdom      [ domid = 0x%(1)08x ]
   60.78 +0x0002800a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  __enter_scheduler [ prev<domid:edomid> = 0x%(1)08x : 0x%(2)08x, next<domid:edomid> = 0x%(3)08x : 0x%(4)08x ]
   60.79 +0x0002800b  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  s_timer_fn
   60.80 +0x0002800c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  t_timer_fn
   60.81 +0x0002800d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  dom_timer_fn
   60.82 +0x0002800e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infprev    [ old_domid = 0x%(1)08x, runtime = %(2)d ]
   60.83 +0x0002800f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  switch_infnext    [ new_domid = 0x%(1)08x, time = %(2)d, r_time = %(3)d ]
   60.84 +
   60.85 +0x00081001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMENTRY
   60.86 +0x00081002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ exitcode = 0x%(1)08x, rIP  = 0x%(2)08x ]
   60.87 +0x00081102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMEXIT      [ exitcode = 0x%(1)08x, rIP  = 0x%(2)016x ]
   60.88 +0x00082001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ errorcode = 0x%(2)02x, virt = 0x%(1)08x ]
   60.89 +0x00082101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_XEN      [ errorcode = 0x%(2)02x, virt = 0x%(1)016x ]
   60.90 +0x00082002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ errorcode = 0x%(1)02x, virt = 0x%(2)08x ]
   60.91 +0x00082102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PF_INJECT   [ errorcode = 0x%(1)02x, virt = 0x%(2)016x ]
   60.92 +0x00082003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_EXC     [ vector = 0x%(1)02x, errorcode = 0x%(2)04x ]
   60.93 +0x00082004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INJ_VIRQ    [ vector = 0x%(1)02x, fake = %(2)d ]
   60.94 +0x00082005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  REINJ_VIRQ  [ vector = 0x%(1)02x ]
   60.95 +0x00082006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_READ     [ port = 0x%(1)04x, size = %(2)d ]
   60.96 +0x00082007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_WRITE    [ port = 0x%(1)04x, size = %(2)d ]
   60.97 +0x00082008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ CR# = %(1)d, value = 0x%(2)08x ]
   60.98 +0x00082108  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_READ     [ CR# = %(1)d, value = 0x%(2)016x ]
   60.99 +0x00082009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ CR# = %(1)d, value = 0x%(2)08x ]
  60.100 +0x00082109  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CR_WRITE    [ CR# = %(1)d, value = 0x%(2)016x ]
  60.101 +0x0008200A  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_READ    
  60.102 +0x0008200B  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  DR_WRITE
  60.103 +0x0008200C  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_READ    [ MSR# = 0x%(1)08x, value = 0x%(2)016x ]
  60.104 +0x0008200D  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MSR_WRITE   [ MSR# = 0x%(1)08x, value = 0x%(2)016x ]
  60.105 +0x0008200E  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CPUID       [ func = 0x%(1)08x, eax = 0x%(2)08x, ebx = 0x%(3)08x, ecx=0x%(4)08x, edx = 0x%(5)08x ]
  60.106 +0x0008200F  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INTR        [ vector = 0x%(1)02x ]
  60.107 +0x00082010  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  NMI
  60.108 +0x00082011  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  SMI
  60.109 +0x00082012  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  VMMCALL     [ func = 0x%(1)08x ]
  60.110 +0x00082013  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  HLT         [ intpending = %(1)d ]
  60.111 +0x00082014  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ is invlpga? = %(1)d, virt = 0x%(2)08x ]
  60.112 +0x00082114  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INVLPG      [ is invlpga? = %(1)d, virt = 0x%(2)016x ]
  60.113 +0x00082015  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MCE
  60.114 +0x00082016  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  IO_ASSIST   [ data = 0x%(1)04x ]
  60.115 +0x00082017  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  MMIO_ASSIST [ data = 0x%(1)04x ]
  60.116 +0x00082018  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  CLTS
  60.117 +0x00082019  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ value = 0x%(1)08x ]
  60.118 +0x00082119  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  LMSW        [ value = 0x%(1)016x ]
  60.119  
  60.120  0x0010f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_map      [ domid = %(1)d ]
  60.121  0x0010f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_unmap    [ domid = %(1)d ]
  60.122 @@ -65,3 +78,41 @@ 0x0020f003  CPU%(cpu)d  %(tsc)d (+%(relt
  60.123  0x0020f103  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  trap       [ rip = 0x%(1)016x, trapnr:error = 0x%(2)08x ]
  60.124  0x0020f004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_fault [ eip = 0x%(1)08x, addr = 0x%(2)08x, error = 0x%(3)08x ]
  60.125  0x0020f104  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_fault [ rip = 0x%(1)16x, addr = 0x%(3)16x, error = 0x%(5)08x ]
  60.126 +
  60.127 +0x0020f006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_privop      [ eip = 0x%(1)08x ]
  60.128 +0x0020f106  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_privop      [ rip = 0x%(1)16x ]
  60.129 +0x0020f007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_4G          [ eip = 0x%(1)08x ]
  60.130 +0x0020f107  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  emulate_4G          [ rip = 0x%(1)16x ]
  60.131 +0x0020f00c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  ptwr_emulation_pae  [ addr = 0x%(2)08x, eip = 0x%(1)08x, npte = 0x%(1)16x ]
  60.132 +0x0020f10c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  ptwr_emulation_pae  [ addr = 0x%(2)16x, rip = 0x%(1)16x, npte = 0x%(1)16x ]
  60.133 +
  60.134 +0x0040f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_not_shadow                 [ gl1e = 0x%(1)16x, va = 0x%(2)08x, flags = 0x%(3)08x ]
  60.135 +0x0040f101  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_not_shadow                 [ gl1e = 0x%(1)16x, va = 0x%(2)16x, flags = 0x%(3)08x ]
  60.136 +0x0040f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_propagate             [ va = 0x%(1)08x ]
  60.137 +0x0040f102  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_propagate             [ va = 0x%(1)16x ]
  60.138 +0x0040f003  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_mmio                  [ va = 0x%(1)08x ]
  60.139 +0x0040f103  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fast_mmio                  [ va = 0x%(1)16x ]
  60.140 +0x0040f004  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_false_fast_path            [ va = 0x%(1)08x ]
  60.141 +0x0040f104  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_false_fast_path            [ va = 0x%(1)16x ]
  60.142 +0x0040f005  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_mmio                       [ va = 0x%(1)08x ]
  60.143 +0x0040f105  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_mmio                       [ va = 0x%(1)16x ]
  60.144 +0x0040f006  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fixup                      [ gl1e = 0x%(1)08x, va = 0x%(2)08x, flags = 0x%(3)08x ]
  60.145 +0x0040f106  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_fixup                      [ gl1e = 0x%(1)16x, va = 0x%(2)16x, flags = 0x%(3)08x ]
  60.146 +0x0040f007  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_domf_dying                 [ va = 0x%(1)08x ]
  60.147 +0x0040f107  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_domf_dying                 [ va = 0x%(1)16x ]
  60.148 +0x0040f008  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate                    [ gl1e = 0x%(1)08x, write_val = 0x%(2)08x, va = 0x%(3)08x, flags = 0x%(4)08x, emulation_count = 0x%(5)08x]
  60.149 +0x0040f108  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate                    [ gl1e = 0x%(1)16x, write_val = 0x%(2)16x, va = 0x%(3)16x, flags = 0x%(4)08x, emulation_count = 0x%(5)08x]
  60.150 +0x0040f009  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_user      [ va = 0x%(1)08x, gfn = 0x%(2)08x ]
  60.151 +0x0040f109  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_user      [ va = 0x%(1)16x, gfn = 0x%(2)16x ]
  60.152 +0x0040f00a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_evtinj    [ va = 0x%(1)08x, gfn = 0x%(2)08x ]
  60.153 +0x0040f10a  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_evtinj    [ va = 0x%(1)16x, gfn = 0x%(2)16x ]
  60.154 +0x0040f00b  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_unhandled [ va = 0x%(1)08x, gfn = 0x%(2)08x ]
  60.155 +0x0040f10b  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_unshadow_unhandled [ va = 0x%(1)16x, gfn = 0x%(2)16x ]
  60.156 +0x0040f00c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_wrmap_bf           [ gfn = 0x%(1)08x ]
  60.157 +0x0040f10c  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_wrmap_bf           [ gfn = 0x%(1)16x ]
  60.158 +0x0040f00d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_prealloc_unpin     [ gfn = 0x%(1)08x ]
  60.159 +0x0040f10d  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_prealloc_unpin     [ gfn = 0x%(1)16x ]
  60.160 +0x0040f00e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_full        [ gfn = 0x%(1)08x ]
  60.161 +0x0040f10e  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_full        [ gfn = 0x%(1)16x ]
  60.162 +0x0040f00f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_only        [ gfn = 0x%(1)08x ]
  60.163 +0x0040f10f  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  shadow_emulate_resync_only        [ gfn = 0x%(1)16x ]
    61.1 --- a/tools/xentrace/xentrace.c	Fri Sep 12 14:32:45 2008 +0900
    61.2 +++ b/tools/xentrace/xentrace.c	Fri Sep 12 14:47:40 2008 +0900
    61.3 @@ -56,6 +56,7 @@ typedef struct settings_st {
    61.4      unsigned long tbuf_size;
    61.5      unsigned long disk_rsvd;
    61.6      unsigned long timeout;
    61.7 +    unsigned long memory_buffer;
    61.8      uint8_t discard:1,
    61.9          disable_tracing:1;
   61.10  } settings_t;
   61.11 @@ -67,12 +68,245 @@ int interrupted = 0; /* gets set if we g
   61.12  static int xc_handle = -1;
   61.13  static int event_fd = -1;
   61.14  static int virq_port = -1;
   61.15 +static int outfd = 1;
   61.16  
   61.17  static void close_handler(int signal)
   61.18  {
   61.19      interrupted = 1;
   61.20  }
   61.21  
   61.22 +static struct {
   61.23 +    char * buf;
   61.24 +    unsigned long prod, cons, size;
   61.25 +    unsigned long pending_size, pending_prod;
   61.26 +} membuf = { 0 };
   61.27 +
   61.28 +#define MEMBUF_INDEX_RESET_THRESHOLD (1<<29)
   61.29 +
   61.30 +/* FIXME -- make a power of 2 so we can mask instead. */
   61.31 +#define MEMBUF_POINTER(_i) (membuf.buf + ((_i) % membuf.size))
   61.32 +#define MEMBUF_CONS_INCREMENT(_n)               \
   61.33 +    do {                                        \
   61.34 +        membuf.cons += (_n);                    \
   61.35 +    } while(0)
   61.36 +#define MEMBUF_PROD_SET(_x)                                             \
   61.37 +    do {                                                                \
   61.38 +        if ( (_x) < membuf.prod ) {                                     \
   61.39 +            fprintf(stderr, "%s: INTERNAL_ERROR: prod %lu, trying to set to %lu!\n", \
   61.40 +                    __func__, membuf.prod, (unsigned long)(_x));        \
   61.41 +            exit(1);                                                    \
   61.42 +        }                                                               \
   61.43 +        membuf.prod = (_x);                                             \
   61.44 +        if ( (_x) > MEMBUF_INDEX_RESET_THRESHOLD )                      \
   61.45 +        {                                                               \
   61.46 +            membuf.prod %= membuf.size;                                 \
   61.47 +            membuf.cons %= membuf.size;                                 \
   61.48 +            if( membuf.prod < membuf.cons )                             \
   61.49 +                membuf.prod += membuf.size;                             \
   61.50 +        }                                                               \
   61.51 +    } while(0) 
   61.52 +
   61.53 +struct cpu_change_record {
   61.54 +    uint32_t header;
   61.55 +    struct {
   61.56 +        int cpu;
   61.57 +        unsigned window_size;
   61.58 +    } data;
   61.59 +};
   61.60 +
   61.61 +#define CPU_CHANGE_HEADER                                           \
   61.62 +    (TRC_TRACE_CPU_CHANGE                                           \
   61.63 +     | (((sizeof(struct cpu_change_record)/sizeof(uint32_t)) - 1)   \
   61.64 +        << TRACE_EXTRA_SHIFT) )
   61.65 +
   61.66 +void membuf_alloc(unsigned long size)
   61.67 +{
   61.68 +    membuf.buf = malloc(size);
   61.69 +
   61.70 +    if(!membuf.buf)
   61.71 +    {
   61.72 +        fprintf(stderr, "%s: Couldn't malloc %lu bytes!\n",
   61.73 +                __func__, size);
   61.74 +        exit(1);
   61.75 +    }
   61.76 +
   61.77 +    membuf.prod = membuf.cons = 0;
   61.78 +    membuf.size = size;
   61.79 +}
   61.80 +
   61.81 +/*
   61.82 + * Reserve a new window in the buffer.  Move the 'consumer' forward size
   61.83 + * bytes, re-adjusting the cpu window sizes as necessary, and insert a
   61.84 + * cpu_change record.
   61.85 + */
   61.86 +void membuf_reserve_window(unsigned cpu, unsigned long window_size)
   61.87 +{
   61.88 +    struct cpu_change_record *rec;
   61.89 +    long need_to_consume, free, freed;
   61.90 +
   61.91 +    if ( membuf.pending_size > 0 )
   61.92 +    {
   61.93 +        fprintf(stderr, "%s: INTERNAL_ERROR: pending_size %lu\n",
   61.94 +                __func__, membuf.pending_size);
   61.95 +        exit(1);
   61.96 +    }
   61.97 +
   61.98 +    need_to_consume = window_size + sizeof(*rec);
   61.99 +
  61.100 +    if ( window_size > membuf.size )
  61.101 +    {
  61.102 +        fprintf(stderr, "%s: reserve size %lu larger than buffer size %lu!\n",
  61.103 +                __func__, window_size, membuf.size);
  61.104 +        exit(1);
  61.105 +    }
  61.106 +
  61.107 +    /* Subtract free space already in buffer. */
  61.108 +    free = membuf.size - (membuf.prod - membuf.cons);
  61.109 +    if( need_to_consume < free)
  61.110 +        goto start_window;
  61.111 +
  61.112 +    need_to_consume -= free;
  61.113 +
  61.114 +    /*
  61.115 +     * "Free" up full windows until we have enough for this window.
  61.116 +     * It's a bit wasteful to throw away partial buffers, but the only
  61.117 +     * other option is to scan throught he buffer headers.  Since the
  61.118 +     * common case is that it's going to be thrown away next anyway, I
  61.119 +     * think minimizing the overall impact is more important.
  61.120 +     */
  61.121 +    do {
  61.122 +        rec = (struct cpu_change_record *)MEMBUF_POINTER(membuf.cons);
  61.123 +        if( rec->header != CPU_CHANGE_HEADER )
  61.124 +        {
  61.125 +            fprintf(stderr, "%s: INTERNAL ERROR: no cpu_change record at consumer!\n",
  61.126 +                    __func__);
  61.127 +            exit(EXIT_FAILURE);
  61.128 +        }
  61.129 +
  61.130 +        freed = sizeof(*rec) + rec->data.window_size;
  61.131 +
  61.132 +        if ( need_to_consume > 0 )
  61.133 +        {
  61.134 +            MEMBUF_CONS_INCREMENT(freed);
  61.135 +            need_to_consume -= freed;
  61.136 +        }
  61.137 +    } while( need_to_consume > 0 );
  61.138 +
  61.139 +start_window:
  61.140 +    /*
  61.141 +     * Start writing "pending" data.  Update prod once all this data is
  61.142 +     * written.
  61.143 +     */
  61.144 +    membuf.pending_prod = membuf.prod;
  61.145 +    membuf.pending_size = window_size;
  61.146 +
  61.147 +    rec = (struct cpu_change_record *)MEMBUF_POINTER(membuf.pending_prod);
  61.148 +
  61.149 +    rec->header = CPU_CHANGE_HEADER;
  61.150 +    rec->data.cpu = cpu;
  61.151 +    rec->data.window_size = window_size;
  61.152 +
  61.153 +    membuf.pending_prod += sizeof(*rec);
  61.154 +}
  61.155 +
  61.156 +void membuf_write(void *start, unsigned long size) {
  61.157 +    char * p;
  61.158 +    unsigned long wsize;
  61.159 +
  61.160 +    if( (membuf.size - (membuf.prod - membuf.cons)) < size )
  61.161 +    {
  61.162 +        fprintf(stderr, "%s: INTERNAL ERROR: need %lu bytes, only have %lu!\n",
  61.163 +                __func__, size, membuf.prod - membuf.cons);
  61.164 +        exit(1);
  61.165 +    }
  61.166 +
  61.167 +    if( size > membuf.pending_size )
  61.168 +    {
  61.169 +        fprintf(stderr, "%s: INTERNAL ERROR: size %lu, pending %lu!\n",
  61.170 +                __func__, size, membuf.pending_size);
  61.171 +        exit(1);
  61.172 +    }
  61.173 +
  61.174 +    wsize = size;
  61.175 +    p = MEMBUF_POINTER(membuf.pending_prod);
  61.176 +
  61.177 +    /* If the buffer overlaps the "wrap", do an extra write */
  61.178 +    if ( p + size > membuf.buf + membuf.size )
  61.179 +    {
  61.180 +        int usize = ( membuf.buf + membuf.size ) - p;
  61.181 +
  61.182 +        memcpy(p, start, usize);
  61.183 +
  61.184 +        start += usize;
  61.185 +        wsize -= usize;
  61.186 +        p = membuf.buf;
  61.187 +    }
  61.188 +
  61.189 +    memcpy(p, start, wsize);
  61.190 +
  61.191 +    membuf.pending_prod += size;
  61.192 +    membuf.pending_size -= size;
  61.193 +
  61.194 +    if ( membuf.pending_size == 0 )
  61.195 +    {
  61.196 +        MEMBUF_PROD_SET(membuf.pending_prod);
  61.197 +    }
  61.198 +}
  61.199 +
  61.200 +void membuf_dump(void) {
  61.201 +    /* Dump circular memory buffer */
  61.202 +    int cons, prod, wsize, written;
  61.203 +    char * wstart;
  61.204 +
  61.205 +    fprintf(stderr, "Dumping memory buffer.\n");
  61.206 +
  61.207 +    cons = membuf.cons % membuf.size; 
  61.208 +    prod = membuf.prod % membuf.size;
  61.209 +   
  61.210 +    if(prod > cons)
  61.211 +    {
  61.212 +        /* Write in one go */
  61.213 +        wstart = membuf.buf + cons;
  61.214 +        wsize = prod - cons;
  61.215 +
  61.216 +        written = write(outfd, wstart, wsize);
  61.217 +        if ( written != wsize )
  61.218 +            goto fail;
  61.219 +    }
  61.220 +    else
  61.221 +    {
  61.222 +        /* Write in two pieces: cons->end, beginning->prod. */
  61.223 +        wstart = membuf.buf + cons;
  61.224 +        wsize = membuf.size - cons;
  61.225 +
  61.226 +        written = write(outfd, wstart, wsize);
  61.227 +        if ( written != wsize )
  61.228 +        {
  61.229 +            fprintf(stderr, "Write failed! (size %d, returned %d)\n",
  61.230 +                    wsize, written);
  61.231 +            goto fail;
  61.232 +        }
  61.233 +
  61.234 +        wstart = membuf.buf;
  61.235 +        wsize = prod;
  61.236 +
  61.237 +        written = write(outfd, wstart, wsize);
  61.238 +        if ( written != wsize )
  61.239 +        {
  61.240 +            fprintf(stderr, "Write failed! (size %d, returned %d)\n",
  61.241 +                    wsize, written);
  61.242 +            goto fail;
  61.243 +        }
  61.244 +    }
  61.245 +
  61.246 +    membuf.cons = membuf.prod = 0;
  61.247 +    
  61.248 +    return;
  61.249 +fail:
  61.250 +    exit(1);
  61.251 +    return;
  61.252 +}
  61.253 +
  61.254  /**
  61.255   * write_buffer - write a section of the trace buffer
  61.256   * @cpu      - source buffer CPU ID
  61.257 @@ -85,20 +319,20 @@ static void close_handler(int signal)
  61.258   * of the buffer write.
  61.259   */
  61.260  static void write_buffer(unsigned int cpu, unsigned char *start, int size,
  61.261 -               int total_size, int outfd)
  61.262 +                         int total_size)
  61.263  {
  61.264      struct statvfs stat;
  61.265      size_t written = 0;
  61.266      
  61.267 -    if ( opts.disk_rsvd != 0 )
  61.268 +    if ( opts.memory_buffer == 0 && opts.disk_rsvd != 0 )
  61.269      {
  61.270          unsigned long long freespace;
  61.271  
  61.272          /* Check that filesystem has enough space. */
  61.273          if ( fstatvfs (outfd, &stat) )
  61.274          {
  61.275 -                fprintf(stderr, "Statfs failed!\n");
  61.276 -                goto fail;
  61.277 +            fprintf(stderr, "Statfs failed!\n");
  61.278 +            goto fail;
  61.279          }
  61.280  
  61.281          freespace = stat.f_frsize * (unsigned long long)stat.f_bfree;
  61.282 @@ -112,8 +346,8 @@ static void write_buffer(unsigned int cp
  61.283  
  61.284          if ( freespace <= opts.disk_rsvd )
  61.285          {
  61.286 -                fprintf(stderr, "Disk space limit reached (free space: %lluMB, limit: %luMB).\n", freespace, opts.disk_rsvd);
  61.287 -                exit (EXIT_FAILURE);
  61.288 +            fprintf(stderr, "Disk space limit reached (free space: %lluMB, limit: %luMB).\n", freespace, opts.disk_rsvd);
  61.289 +            exit (EXIT_FAILURE);
  61.290          }
  61.291      }
  61.292  
  61.293 @@ -122,40 +356,46 @@ static void write_buffer(unsigned int cp
  61.294       * first write. */
  61.295      if ( total_size != 0 )
  61.296      {
  61.297 -        struct {
  61.298 -            uint32_t header;
  61.299 -            struct {
  61.300 -                unsigned cpu;
  61.301 -                unsigned byte_count;
  61.302 -            } extra;
  61.303 -        } rec;
  61.304 +        if ( opts.memory_buffer )
  61.305 +        {
  61.306 +            membuf_reserve_window(cpu, total_size);
  61.307 +        }
  61.308 +        else
  61.309 +        {
  61.310 +            struct cpu_change_record rec;
  61.311  
  61.312 -        rec.header = TRC_TRACE_CPU_CHANGE
  61.313 -            | ((sizeof(rec.extra)/sizeof(uint32_t)) << TRACE_EXTRA_SHIFT);
  61.314 -        rec.extra.cpu = cpu;
  61.315 -        rec.extra.byte_count = total_size;
  61.316 +            rec.header = CPU_CHANGE_HEADER;
  61.317 +            rec.data.cpu = cpu;
  61.318 +            rec.data.window_size = total_size;
  61.319  
  61.320 -        written = write(outfd, &rec, sizeof(rec));
  61.321 +            written = write(outfd, &rec, sizeof(rec));
  61.322 +            if ( written != sizeof(rec) )
  61.323 +            {
  61.324 +                fprintf(stderr, "Cannot write cpu change (write returned %zd)\n",
  61.325 +                        written);
  61.326 +                goto fail;
  61.327 +            }
  61.328 +        }
  61.329 +    }
  61.330  
  61.331 -        if ( written != sizeof(rec) )
  61.332 +    if ( opts.memory_buffer )
  61.333 +    {
  61.334 +        membuf_write(start, size);
  61.335 +    }
  61.336 +    else
  61.337 +    {
  61.338 +        written = write(outfd, start, size);
  61.339 +        if ( written != size )
  61.340          {
  61.341 -            fprintf(stderr, "Cannot write cpu change (write returned %zd)\n",
  61.342 -                    written);
  61.343 +            fprintf(stderr, "Write failed! (size %d, returned %zd)\n",
  61.344 +                    size, written);
  61.345              goto fail;
  61.346          }
  61.347      }
  61.348  
  61.349 -    written = write(outfd, start, size);
  61.350 -    if ( written != size )
  61.351 -    {
  61.352 -        fprintf(stderr, "Write failed! (size %d, returned %zd)\n",
  61.353 -                size, written);
  61.354 -        goto fail;
  61.355 -    }
  61.356 -
  61.357      return;
  61.358  
  61.359 - fail:
  61.360 +fail:
  61.361      PERROR("Failed to write trace data");
  61.362      exit(EXIT_FAILURE);
  61.363  }
  61.364 @@ -394,7 +634,7 @@ static void wait_for_event_or_timeout(un
  61.365   * monitor_tbufs - monitor the contents of tbufs and output to a file
  61.366   * @logfile:       the FILE * representing the file to log to
  61.367   */
  61.368 -static int monitor_tbufs(int outfd)
  61.369 +static int monitor_tbufs(void)
  61.370  {
  61.371      int i;
  61.372  
  61.373 @@ -429,9 +669,9 @@ static int monitor_tbufs(int outfd)
  61.374              meta[i]->cons = meta[i]->prod;
  61.375  
  61.376      /* now, scan buffers for events */
  61.377 -    while ( !interrupted )
  61.378 +    while ( 1 )
  61.379      {
  61.380 -        for ( i = 0; (i < num) && !interrupted; i++ )
  61.381 +        for ( i = 0; i < num; i++ )
  61.382          {
  61.383              unsigned long start_offset, end_offset, window_size, cons, prod;
  61.384                  
  61.385 @@ -463,8 +703,7 @@ static int monitor_tbufs(int outfd)
  61.386                  /* If window does not wrap, write in one big chunk */
  61.387                  write_buffer(i, data[i]+start_offset,
  61.388                               window_size,
  61.389 -                             window_size,
  61.390 -                             outfd);
  61.391 +                             window_size);
  61.392              }
  61.393              else
  61.394              {
  61.395 @@ -474,24 +713,29 @@ static int monitor_tbufs(int outfd)
  61.396                   */
  61.397                  write_buffer(i, data[i] + start_offset,
  61.398                               data_size - start_offset,
  61.399 -                             window_size,
  61.400 -                             outfd);
  61.401 +                             window_size);
  61.402                  write_buffer(i, data[i],
  61.403                               end_offset,
  61.404 -                             0,
  61.405 -                             outfd);
  61.406 +                             0);
  61.407              }
  61.408  
  61.409              xen_mb(); /* read buffer, then update cons. */
  61.410              meta[i]->cons = prod;
  61.411 +
  61.412          }
  61.413  
  61.414 +        if ( interrupted )
  61.415 +            break;
  61.416 +
  61.417          wait_for_event_or_timeout(opts.poll_sleep);
  61.418      }
  61.419  
  61.420 -    if(opts.disable_tracing)
  61.421 +    if ( opts.disable_tracing )
  61.422          disable_tbufs();
  61.423  
  61.424 +    if ( opts.memory_buffer )
  61.425 +        membuf_dump();
  61.426 +
  61.427      /* cleanup */
  61.428      free(meta);
  61.429      free(data);
  61.430 @@ -538,6 +782,8 @@ static void usage(void)
  61.431  "  -T  --time-interval=s   Run xentrace for s seconds and quit.\n" \
  61.432  "  -?, --help              Show this message\n" \
  61.433  "  -V, --version           Print program version\n" \
  61.434 +"  -M, --memory-buffer=b   Copy trace records to a circular memory buffer.\n" \
  61.435 +"                          Dump to file on exit.\n" \
  61.436  "\n" \
  61.437  "This tool is used to capture trace buffer data from Xen. The\n" \
  61.438  "data is output in a binary format, in the following order:\n" \
  61.439 @@ -553,6 +799,53 @@ static void usage(void)
  61.440      exit(EXIT_FAILURE);
  61.441  }
  61.442  
  61.443 +/* convert the argument string pointed to by arg to a long int representation,
  61.444 + * including suffixes such as 'M' and 'k'. */
  61.445 +#define MB (1024*1024)
  61.446 +#define KB (1024)
  61.447 +long sargtol(const char *restrict arg, int base)
  61.448 +{
  61.449 +    char *endp;
  61.450 +    long val;
  61.451 +
  61.452 +    errno = 0;
  61.453 +    val = strtol(arg, &endp, base);
  61.454 +    
  61.455 +    if ( errno != 0 )
  61.456 +    {
  61.457 +        fprintf(stderr, "Invalid option argument: %s\n", arg);
  61.458 +        fprintf(stderr, "Error: %s\n\n", strerror(errno));
  61.459 +        usage();
  61.460 +    }
  61.461 +    else if (endp == arg)
  61.462 +    {
  61.463 +        goto invalid;
  61.464 +    }
  61.465 +
  61.466 +    switch(*endp)
  61.467 +    {
  61.468 +    case '\0':
  61.469 +        break;
  61.470 +    case 'M':
  61.471 +        val *= MB;
  61.472 +        break;
  61.473 +    case 'K':
  61.474 +    case 'k':
  61.475 +        val *= KB;
  61.476 +        break;
  61.477 +    default:
  61.478 +        fprintf(stderr, "Unknown suffix %c\n", *endp);
  61.479 +        exit(1);
  61.480 +    }
  61.481 +
  61.482 +
  61.483 +    return val;
  61.484 +invalid:
  61.485 +    return 0;
  61.486 +    fprintf(stderr, "Invalid option argument: %s\n\n", arg);
  61.487 +    usage();
  61.488 +}
  61.489 +
  61.490  /* convert the argument string pointed to by arg to a long int representation */
  61.491  static long argtol(const char *restrict arg, int base)
  61.492  {
  61.493 @@ -606,6 +899,7 @@ static void parse_args(int argc, char **
  61.494          { "trace-buf-size", required_argument, 0, 'S' },
  61.495          { "reserve-disk-space", required_argument, 0, 'r' },
  61.496          { "time-interval",  required_argument, 0, 'T' },
  61.497 +        { "memory-buffer",  required_argument, 0, 'M' },
  61.498          { "discard-buffers", no_argument,      0, 'D' },
  61.499          { "dont-disable-tracing", no_argument, 0, 'x' },
  61.500          { "help",           no_argument,       0, '?' },
  61.501 @@ -613,7 +907,7 @@ static void parse_args(int argc, char **
  61.502          { 0, 0, 0, 0 }
  61.503      };
  61.504  
  61.505 -    while ( (option = getopt_long(argc, argv, "c:e:s:S:t:?V",
  61.506 +    while ( (option = getopt_long(argc, argv, "t:s:c:e:S:r:T:M:Dx?V",
  61.507                      long_options, NULL)) != -1) 
  61.508      {
  61.509          switch ( option )
  61.510 @@ -655,6 +949,10 @@ static void parse_args(int argc, char **
  61.511              opts.timeout = argtol(optarg, 0);
  61.512              break;
  61.513  
  61.514 +        case 'M':
  61.515 +            opts.memory_buffer = sargtol(optarg, 0);
  61.516 +            break;
  61.517 +
  61.518          default:
  61.519              usage();
  61.520          }
  61.521 @@ -674,7 +972,7 @@ static void parse_args(int argc, char **
  61.522  
  61.523  int main(int argc, char **argv)
  61.524  {
  61.525 -    int outfd = 1, ret;
  61.526 +    int ret;
  61.527      struct sigaction act;
  61.528  
  61.529      opts.outfile = 0;
  61.530 @@ -720,6 +1018,9 @@ int main(int argc, char **argv)
  61.531          exit(EXIT_FAILURE);
  61.532      }
  61.533  
  61.534 +    if ( opts.memory_buffer > 0 )
  61.535 +        membuf_alloc(opts.memory_buffer);
  61.536 +
  61.537      /* ensure that if we get a signal, we'll do cleanup, then exit */
  61.538      act.sa_handler = close_handler;
  61.539      act.sa_flags = 0;
  61.540 @@ -729,7 +1030,7 @@ int main(int argc, char **argv)
  61.541      sigaction(SIGINT,  &act, NULL);
  61.542      sigaction(SIGALRM, &act, NULL);
  61.543  
  61.544 -    ret = monitor_tbufs(outfd);
  61.545 +    ret = monitor_tbufs();
  61.546  
  61.547      return ret;
  61.548  }
    62.1 --- a/xen/arch/x86/acpi/Makefile	Fri Sep 12 14:32:45 2008 +0900
    62.2 +++ b/xen/arch/x86/acpi/Makefile	Fri Sep 12 14:47:40 2008 +0900
    62.3 @@ -1,5 +1,5 @@
    62.4  subdir-y += cpufreq
    62.5  
    62.6  obj-y += boot.o
    62.7 -obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o
    62.8 +obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o cpuidle_menu.o
    62.9  obj-y += pmstat.o
    63.1 --- a/xen/arch/x86/acpi/cpu_idle.c	Fri Sep 12 14:32:45 2008 +0900
    63.2 +++ b/xen/arch/x86/acpi/cpu_idle.c	Fri Sep 12 14:47:40 2008 +0900
    63.3 @@ -39,6 +39,7 @@
    63.4  #include <xen/smp.h>
    63.5  #include <xen/guest_access.h>
    63.6  #include <xen/keyhandler.h>
    63.7 +#include <xen/cpuidle.h>
    63.8  #include <asm/cache.h>
    63.9  #include <asm/io.h>
   63.10  #include <asm/hpet.h>
   63.11 @@ -49,13 +50,10 @@
   63.12  #define DEBUG_PM_CX
   63.13  
   63.14  #define US_TO_PM_TIMER_TICKS(t)     ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
   63.15 +#define PM_TIMER_TICKS_TO_US(t)     ((t * 1000) / (PM_TIMER_FREQUENCY / 1000))
   63.16  #define C2_OVERHEAD         4   /* 1us (3.579 ticks per us) */
   63.17  #define C3_OVERHEAD         4   /* 1us (3.579 ticks per us) */
   63.18  
   63.19 -#define ACPI_PROCESSOR_MAX_POWER        8
   63.20 -#define ACPI_PROCESSOR_MAX_C2_LATENCY   100
   63.21 -#define ACPI_PROCESSOR_MAX_C3_LATENCY   1000
   63.22 -
   63.23  static void (*lapic_timer_off)(void);
   63.24  static void (*lapic_timer_on)(void);
   63.25  
   63.26 @@ -65,66 +63,6 @@ extern void (*pm_idle) (void);
   63.27  static void (*pm_idle_save) (void) __read_mostly;
   63.28  unsigned int max_cstate __read_mostly = 2;
   63.29  integer_param("max_cstate", max_cstate);
   63.30 -/*
   63.31 - * bm_history -- bit-mask with a bit per jiffy of bus-master activity
   63.32 - * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
   63.33 - * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
   63.34 - * 100 HZ: 0x0000000F: 4 jiffies = 40ms
   63.35 - * reduce history for more aggressive entry into C3
   63.36 - */
   63.37 -unsigned int bm_history __read_mostly =
   63.38 -    (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
   63.39 -integer_param("bm_history", bm_history);
   63.40 -
   63.41 -struct acpi_processor_cx;
   63.42 -
   63.43 -struct acpi_processor_cx_policy
   63.44 -{
   63.45 -    u32 count;
   63.46 -    struct acpi_processor_cx *state;
   63.47 -    struct
   63.48 -    {
   63.49 -        u32 time;
   63.50 -        u32 ticks;
   63.51 -        u32 count;
   63.52 -        u32 bm;
   63.53 -    } threshold;
   63.54 -};
   63.55 -
   63.56 -struct acpi_processor_cx
   63.57 -{
   63.58 -    u8 valid;
   63.59 -    u8 type;
   63.60 -    u32 address;
   63.61 -    u8 space_id;
   63.62 -    u32 latency;
   63.63 -    u32 latency_ticks;
   63.64 -    u32 power;
   63.65 -    u32 usage;
   63.66 -    u64 time;
   63.67 -    struct acpi_processor_cx_policy promotion;
   63.68 -    struct acpi_processor_cx_policy demotion;
   63.69 -};
   63.70 -
   63.71 -struct acpi_processor_flags
   63.72 -{
   63.73 -    u8 bm_control:1;
   63.74 -    u8 bm_check:1;
   63.75 -    u8 has_cst:1;
   63.76 -    u8 power_setup_done:1;
   63.77 -    u8 bm_rld_set:1;
   63.78 -};
   63.79 -
   63.80 -struct acpi_processor_power
   63.81 -{
   63.82 -    struct acpi_processor_flags flags;
   63.83 -    struct acpi_processor_cx *state;
   63.84 -    s_time_t bm_check_timestamp;
   63.85 -    u32 default_state;
   63.86 -    u32 bm_activity;
   63.87 -    u32 count;
   63.88 -    struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
   63.89 -};
   63.90  
   63.91  static struct acpi_processor_power processor_powers[NR_CPUS];
   63.92  
   63.93 @@ -133,26 +71,21 @@ static void print_acpi_power(uint32_t cp
   63.94      uint32_t i;
   63.95  
   63.96      printk("==cpu%d==\n", cpu);
   63.97 -    printk("active state:\t\tC%d\n", (power->state)?power->state->type:-1);
   63.98 +    printk("active state:\t\tC%d\n",
   63.99 +           (power->last_state) ? power->last_state->type : -1);
  63.100      printk("max_cstate:\t\tC%d\n", max_cstate);
  63.101 -    printk("bus master activity:\t%08x\n", power->bm_activity);
  63.102      printk("states:\n");
  63.103      
  63.104      for ( i = 1; i < power->count; i++ )
  63.105      {
  63.106 -        printk((power->states[i].type == power->state->type) ? "   *" : "    ");
  63.107 +        if ( power->last_state && 
  63.108 +             power->states[i].type == power->last_state->type )
  63.109 +            printk("   *");
  63.110 +        else
  63.111 +            printk("    ");
  63.112          printk("C%d:\t\t", i);
  63.113          printk("type[C%d] ", power->states[i].type);
  63.114 -        if ( power->states[i].promotion.state )
  63.115 -            printk("promotion[C%d] ", power->states[i].promotion.state->type);
  63.116 -        else
  63.117 -            printk("promotion[--] ");
  63.118 -        if ( power->states[i].demotion.state )
  63.119 -            printk("demotion[C%d] ", power->states[i].demotion.state->type);
  63.120 -        else
  63.121 -            printk("demotion[--] ");
  63.122 -        printk("latency[%03d]\n ", power->states[i].latency);
  63.123 -        printk("\t\t\t");
  63.124 +        printk("latency[%03d] ", power->states[i].latency);
  63.125          printk("usage[%08d] ", power->states[i].usage);
  63.126          printk("duration[%"PRId64"]\n", power->states[i].time);
  63.127      }
  63.128 @@ -182,48 +115,6 @@ static inline u32 ticks_elapsed(u32 t1, 
  63.129          return ((0xFFFFFFFF - t1) + t2);
  63.130  }
  63.131  
  63.132 -static void acpi_processor_power_activate(struct acpi_processor_power *power,
  63.133 -                                          struct acpi_processor_cx *new)
  63.134 -{
  63.135 -    struct acpi_processor_cx *old;
  63.136 -
  63.137 -    if ( !power || !new )
  63.138 -        return;
  63.139 -
  63.140 -    old = power->state;
  63.141 -
  63.142 -    if ( old )
  63.143 -        old->promotion.count = 0;
  63.144 -    new->demotion.count = 0;
  63.145 -
  63.146 -    /* Cleanup from old state. */
  63.147 -    if ( old )
  63.148 -    {
  63.149 -        switch ( old->type )
  63.150 -        {
  63.151 -        case ACPI_STATE_C3:
  63.152 -            /* Disable bus master reload */
  63.153 -            if ( new->type != ACPI_STATE_C3 && power->flags.bm_check )
  63.154 -                acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
  63.155 -            break;
  63.156 -        }
  63.157 -    }
  63.158 -
  63.159 -    /* Prepare to use new state. */
  63.160 -    switch ( new->type )
  63.161 -    {
  63.162 -    case ACPI_STATE_C3:
  63.163 -        /* Enable bus master reload */
  63.164 -        if ( old->type != ACPI_STATE_C3 && power->flags.bm_check )
  63.165 -            acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
  63.166 -        break;
  63.167 -    }
  63.168 -
  63.169 -    power->state = new;
  63.170 -
  63.171 -    return;
  63.172 -}
  63.173 -
  63.174  static void acpi_safe_halt(void)
  63.175  {
  63.176      smp_mb__after_clear_bit();
  63.177 @@ -263,13 +154,50 @@ static void acpi_idle_do_entry(struct ac
  63.178      }
  63.179  }
  63.180  
  63.181 -static atomic_t c3_cpu_count;
  63.182 +static inline void acpi_idle_update_bm_rld(struct acpi_processor_power *power,
  63.183 +                                           struct acpi_processor_cx *target)
  63.184 +{
  63.185 +    if ( !power->flags.bm_check )
  63.186 +        return;
  63.187 +
  63.188 +    if ( power->flags.bm_rld_set && target->type != ACPI_STATE_C3 )
  63.189 +    {
  63.190 +        acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
  63.191 +        power->flags.bm_rld_set = 0;
  63.192 +    }
  63.193 +
  63.194 +    if ( !power->flags.bm_rld_set && target->type == ACPI_STATE_C3 )
  63.195 +    {
  63.196 +        acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
  63.197 +        power->flags.bm_rld_set = 1;
  63.198 +    }
  63.199 +}
  63.200 +
  63.201 +static int acpi_idle_bm_check(void)
  63.202 +{
  63.203 +    u32 bm_status = 0;
  63.204 +
  63.205 +    acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
  63.206 +    if ( bm_status )
  63.207 +        acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
  63.208 +    /*
  63.209 +     * TBD: PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
  63.210 +     * the true state of bus mastering activity; forcing us to
  63.211 +     * manually check the BMIDEA bit of each IDE channel.
  63.212 +     */
  63.213 +    return bm_status;
  63.214 +}
  63.215 +
  63.216 +static struct {
  63.217 +    spinlock_t lock;
  63.218 +    unsigned int count;
  63.219 +} c3_cpu_status = { .lock = SPIN_LOCK_UNLOCKED };
  63.220  
  63.221  static void acpi_processor_idle(void)
  63.222  {
  63.223      struct acpi_processor_power *power = NULL;
  63.224      struct acpi_processor_cx *cx = NULL;
  63.225 -    struct acpi_processor_cx *next_state = NULL;
  63.226 +    int next_state;
  63.227      int sleep_ticks = 0;
  63.228      u32 t1, t2 = 0;
  63.229  
  63.230 @@ -287,7 +215,16 @@ static void acpi_processor_idle(void)
  63.231          return;
  63.232      }
  63.233  
  63.234 -    cx = power->state;
  63.235 +    next_state = cpuidle_current_governor->select(power);
  63.236 +    if ( next_state > 0 )
  63.237 +    {
  63.238 +        cx = &power->states[next_state];
  63.239 +        if ( power->flags.bm_check && acpi_idle_bm_check()
  63.240 +             && cx->type == ACPI_STATE_C3 )
  63.241 +            cx = power->safe_state;
  63.242 +        if ( cx->type > max_cstate )
  63.243 +            cx = &power->states[max_cstate];
  63.244 +    }
  63.245      if ( !cx )
  63.246      {
  63.247          if ( pm_idle_save )
  63.248 @@ -303,69 +240,14 @@ static void acpi_processor_idle(void)
  63.249          return;
  63.250      }
  63.251  
  63.252 -    /*
  63.253 -     * Check BM Activity
  63.254 -     * -----------------
  63.255 -     * Check for bus mastering activity (if required), record, and check
  63.256 -     * for demotion.
  63.257 -     */
  63.258 -    if ( power->flags.bm_check )
  63.259 -    {
  63.260 -        u32 bm_status = 0;
  63.261 -        unsigned long diff = (NOW() - power->bm_check_timestamp) >> 23;
  63.262 -
  63.263 -        if ( diff > 31 )
  63.264 -            diff = 31;
  63.265 -
  63.266 -        power->bm_activity <<= diff;
  63.267 -
  63.268 -        acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
  63.269 -        if ( bm_status )
  63.270 -        {
  63.271 -            power->bm_activity |= 0x1;
  63.272 -            acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
  63.273 -        }
  63.274 -        /*
  63.275 -         * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
  63.276 -         * the true state of bus mastering activity; forcing us to
  63.277 -         * manually check the BMIDEA bit of each IDE channel.
  63.278 -         */
  63.279 -        /*else if ( errata.piix4.bmisx )
  63.280 -        {
  63.281 -            if ( (inb_p(errata.piix4.bmisx + 0x02) & 0x01)
  63.282 -                || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01) )
  63.283 -                pr->power.bm_activity |= 0x1;
  63.284 -        }*/
  63.285 -
  63.286 -        power->bm_check_timestamp = NOW();
  63.287 -
  63.288 -        /*
  63.289 -         * If bus mastering is or was active this jiffy, demote
  63.290 -         * to avoid a faulty transition.  Note that the processor
  63.291 -         * won't enter a low-power state during this call (to this
  63.292 -         * function) but should upon the next.
  63.293 -         *
  63.294 -         * TBD: A better policy might be to fallback to the demotion
  63.295 -         *      state (use it for this quantum only) istead of
  63.296 -         *      demoting -- and rely on duration as our sole demotion
  63.297 -         *      qualification.  This may, however, introduce DMA
  63.298 -         *      issues (e.g. floppy DMA transfer overrun/underrun).
  63.299 -         */
  63.300 -        if ( (power->bm_activity & 0x1) && cx->demotion.threshold.bm )
  63.301 -        {
  63.302 -            local_irq_enable();
  63.303 -            next_state = cx->demotion.state;
  63.304 -            goto end;
  63.305 -        }
  63.306 -    }
  63.307 +    power->last_state = cx;
  63.308  
  63.309      /*
  63.310       * Sleep:
  63.311       * ------
  63.312       * Invoke the current Cx state to put the processor to sleep.
  63.313       */
  63.314 -    if ( cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3 )
  63.315 -        smp_mb__after_clear_bit();
  63.316 +    acpi_idle_update_bm_rld(power, cx);
  63.317  
  63.318      switch ( cx->type )
  63.319      {
  63.320 @@ -399,8 +281,7 @@ static void acpi_processor_idle(void)
  63.321          /* Re-enable interrupts */
  63.322          local_irq_enable();
  63.323          /* Compute time (ticks) that we were actually asleep */
  63.324 -        sleep_ticks =
  63.325 -            ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
  63.326 +        sleep_ticks = ticks_elapsed(t1, t2);
  63.327          break;
  63.328  
  63.329      case ACPI_STATE_C3:
  63.330 @@ -416,8 +297,8 @@ static void acpi_processor_idle(void)
  63.331           */
  63.332          if ( power->flags.bm_check && power->flags.bm_control )
  63.333          {
  63.334 -            atomic_inc(&c3_cpu_count);
  63.335 -            if ( atomic_read(&c3_cpu_count) == num_online_cpus() )
  63.336 +            spin_lock(&c3_cpu_status.lock);
  63.337 +            if ( ++c3_cpu_status.count == num_online_cpus() )
  63.338              {
  63.339                  /*
  63.340                   * All CPUs are trying to go to C3
  63.341 @@ -425,6 +306,7 @@ static void acpi_processor_idle(void)
  63.342                   */
  63.343                  acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
  63.344              }
  63.345 +            spin_unlock(&c3_cpu_status.lock);
  63.346          }
  63.347          else if ( !power->flags.bm_check )
  63.348          {
  63.349 @@ -455,8 +337,10 @@ static void acpi_processor_idle(void)
  63.350          if ( power->flags.bm_check && power->flags.bm_control )
  63.351          {
  63.352              /* Enable bus master arbitration */
  63.353 -            atomic_dec(&c3_cpu_count);
  63.354 -            acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
  63.355 +            spin_lock(&c3_cpu_status.lock);
  63.356 +            if ( c3_cpu_status.count-- == num_online_cpus() )
  63.357 +                acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
  63.358 +            spin_unlock(&c3_cpu_status.lock);
  63.359          }
  63.360  
  63.361          /* Re-enable interrupts */
  63.362 @@ -465,8 +349,6 @@ static void acpi_processor_idle(void)
  63.363          lapic_timer_on();
  63.364          /* Compute time (ticks) that we were actually asleep */
  63.365          sleep_ticks = ticks_elapsed(t1, t2);
  63.366 -        /* Do not account our idle-switching overhead: */
  63.367 -        sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;
  63.368  
  63.369          break;
  63.370  
  63.371 @@ -476,163 +358,14 @@ static void acpi_processor_idle(void)
  63.372      }
  63.373  
  63.374      cx->usage++;
  63.375 -    if ( (cx->type != ACPI_STATE_C1) && (sleep_ticks > 0) )
  63.376 -        cx->time += sleep_ticks;
  63.377 -
  63.378 -    next_state = power->state;
  63.379 -
  63.380 -    /*
  63.381 -     * Promotion?
  63.382 -     * ----------
  63.383 -     * Track the number of longs (time asleep is greater than threshold)
  63.384 -     * and promote when the count threshold is reached.  Note that bus
  63.385 -     * mastering activity may prevent promotions.
  63.386 -     * Do not promote above max_cstate.
  63.387 -     */
  63.388 -    if ( cx->promotion.state &&
  63.389 -         ((cx->promotion.state - power->states) <= max_cstate) )
  63.390 +    if ( sleep_ticks > 0 )
  63.391      {
  63.392 -        if ( sleep_ticks > cx->promotion.threshold.ticks )
  63.393 -        {
  63.394 -            cx->promotion.count++;
  63.395 -            cx->demotion.count = 0;
  63.396 -            if ( cx->promotion.count >= cx->promotion.threshold.count )
  63.397 -            {
  63.398 -                if ( power->flags.bm_check )
  63.399 -                {
  63.400 -                    if ( !(power->bm_activity & cx->promotion.threshold.bm) )
  63.401 -                    {
  63.402 -                        next_state = cx->promotion.state;
  63.403 -                        goto end;
  63.404 -                    }
  63.405 -                }
  63.406 -                else
  63.407 -                {
  63.408 -                    next_state = cx->promotion.state;
  63.409 -                    goto end;
  63.410 -                }
  63.411 -            }
  63.412 -        }
  63.413 -    }
  63.414 -
  63.415 -    /*
  63.416 -     * Demotion?
  63.417 -     * ---------
  63.418 -     * Track the number of shorts (time asleep is less than time threshold)
  63.419 -     * and demote when the usage threshold is reached.
  63.420 -     */
  63.421 -    if ( cx->demotion.state )
  63.422 -    {
  63.423 -        if ( sleep_ticks < cx->demotion.threshold.ticks )
  63.424 -        {
  63.425 -            cx->demotion.count++;
  63.426 -            cx->promotion.count = 0;
  63.427 -            if ( cx->demotion.count >= cx->demotion.threshold.count )
  63.428 -            {
  63.429 -                next_state = cx->demotion.state;
  63.430 -                goto end;
  63.431 -            }
  63.432 -        }
  63.433 -    }
  63.434 -
  63.435 -end:
  63.436 -    /*
  63.437 -     * Demote if current state exceeds max_cstate
  63.438 -     */
  63.439 -    if ( (power->state - power->states) > max_cstate )
  63.440 -    {
  63.441 -        if ( cx->demotion.state )
  63.442 -            next_state = cx->demotion.state;
  63.443 +        power->last_residency = PM_TIMER_TICKS_TO_US(sleep_ticks);
  63.444 +        cx->time += sleep_ticks;
  63.445      }
  63.446  
  63.447 -    /*
  63.448 -     * New Cx State?
  63.449 -     * -------------
  63.450 -     * If we're going to start using a new Cx state we must clean up
  63.451 -     * from the previous and prepare to use the new.
  63.452 -     */
  63.453 -    if ( next_state != power->state )
  63.454 -        acpi_processor_power_activate(power, next_state);
  63.455 -}
  63.456 -
  63.457 -static int acpi_processor_set_power_policy(struct acpi_processor_power *power)
  63.458 -{
  63.459 -    unsigned int i;
  63.460 -    unsigned int state_is_set = 0;
  63.461 -    struct acpi_processor_cx *lower = NULL;
  63.462 -    struct acpi_processor_cx *higher = NULL;
  63.463 -    struct acpi_processor_cx *cx;
  63.464 -
  63.465 -    if ( !power )
  63.466 -        return -EINVAL;
  63.467 -
  63.468 -    /*
  63.469 -     * This function sets the default Cx state policy (OS idle handler).
  63.470 -     * Our scheme is to promote quickly to C2 but more conservatively
  63.471 -     * to C3.  We're favoring C2  for its characteristics of low latency
  63.472 -     * (quick response), good power savings, and ability to allow bus
  63.473 -     * mastering activity.  Note that the Cx state policy is completely
  63.474 -     * customizable and can be altered dynamically.
  63.475 -     */
  63.476 -
  63.477 -    /* startup state */
  63.478 -    for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
  63.479 -    {
  63.480 -        cx = &power->states[i];
  63.481 -        if ( !cx->valid )
  63.482 -            continue;
  63.483 -
  63.484 -        if ( !state_is_set )
  63.485 -            power->state = cx;
  63.486 -        state_is_set++;
  63.487 -        break;
  63.488 -    }
  63.489 -
  63.490 -    if ( !state_is_set )
  63.491 -        return -ENODEV;
  63.492 -
  63.493 -    /* demotion */
  63.494 -    for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
  63.495 -    {
  63.496 -        cx = &power->states[i];
  63.497 -        if ( !cx->valid )
  63.498 -            continue;
  63.499 -
  63.500 -        if ( lower )
  63.501 -        {
  63.502 -            cx->demotion.state = lower;
  63.503 -            cx->demotion.threshold.ticks = cx->latency_ticks;
  63.504 -            cx->demotion.threshold.count = 1;
  63.505 -            if ( cx->type == ACPI_STATE_C3 )
  63.506 -                cx->demotion.threshold.bm = bm_history;
  63.507 -        }
  63.508 -
  63.509 -        lower = cx;
  63.510 -    }
  63.511 -
  63.512 -    /* promotion */
  63.513 -    for ( i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i-- )
  63.514 -    {
  63.515 -        cx = &power->states[i];
  63.516 -        if ( !cx->valid )
  63.517 -            continue;
  63.518 -
  63.519 -        if ( higher )
  63.520 -        {
  63.521 -            cx->promotion.state = higher;
  63.522 -            cx->promotion.threshold.ticks = cx->latency_ticks;
  63.523 -            if ( cx->type >= ACPI_STATE_C2 )
  63.524 -                cx->promotion.threshold.count = 4;
  63.525 -            else
  63.526 -                cx->promotion.threshold.count = 10;
  63.527 -            if ( higher->type == ACPI_STATE_C3 )
  63.528 -                cx->promotion.threshold.bm = bm_history;
  63.529 -        }
  63.530 -
  63.531 -        higher = cx;
  63.532 -    }
  63.533 -
  63.534 -    return 0;
  63.535 +    if ( cpuidle_current_governor->reflect )
  63.536 +        cpuidle_current_governor->reflect(power);
  63.537  }
  63.538  
  63.539  static int init_cx_pminfo(struct acpi_processor_power *acpi_power)
  63.540 @@ -821,6 +554,8 @@ static int check_cx(struct acpi_processo
  63.541      return 0;
  63.542  }
  63.543  
  63.544 +static unsigned int latency_factor = 2;
  63.545 +
  63.546  static void set_cx(
  63.547      struct acpi_processor_power *acpi_power,
  63.548      xen_processor_cx_t *xen_cx)
  63.549 @@ -842,6 +577,9 @@ static void set_cx(
  63.550      cx->power    = xen_cx->power;
  63.551      
  63.552      cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
  63.553 +    cx->target_residency = cx->latency * latency_factor;
  63.554 +    if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 )
  63.555 +        acpi_power->safe_state = cx;
  63.556  }
  63.557  
  63.558  int get_cpu_id(u8 acpi_id)
  63.559 @@ -936,6 +674,7 @@ long set_cx_pminfo(uint32_t cpu, struct 
  63.560  
  63.561      init_cx_pminfo(acpi_power);
  63.562  
  63.563 +    acpi_power->cpu = cpu_id;
  63.564      acpi_power->flags.bm_check = power->flags.bm_check;
  63.565      acpi_power->flags.bm_control = power->flags.bm_control;
  63.566      acpi_power->flags.has_cst = power->flags.has_cst;
  63.567 @@ -950,10 +689,11 @@ long set_cx_pminfo(uint32_t cpu, struct 
  63.568          set_cx(acpi_power, &xen_cx);
  63.569      }
  63.570  
  63.571 +    if ( cpuidle_current_governor->enable &&
  63.572 +         cpuidle_current_governor->enable(acpi_power) )
  63.573 +        return -EFAULT;
  63.574 +
  63.575      /* FIXME: C-state dependency is not supported by far */
  63.576 -    
  63.577 -    /* initialize default policy */
  63.578 -    acpi_processor_set_power_policy(acpi_power);
  63.579  
  63.580      print_acpi_power(cpu_id, acpi_power);
  63.581  
  63.582 @@ -978,7 +718,7 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
  63.583      uint64_t usage;
  63.584      int i;
  63.585  
  63.586 -    stat->last = (power->state) ? power->state->type : 0;
  63.587 +    stat->last = (power->last_state) ? power->last_state->type : 0;
  63.588      stat->nr = processor_powers[cpuid].count;
  63.589      stat->idle_time = v->runstate.time[RUNSTATE_running];
  63.590      if ( v->is_running )
    64.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Sep 12 14:32:45 2008 +0900
    64.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Sep 12 14:47:40 2008 +0900
    64.3 @@ -48,7 +48,7 @@ struct processor_pminfo processor_pminfo
    64.4  struct cpufreq_policy xen_px_policy[NR_CPUS];
    64.5  
    64.6  static cpumask_t *cpufreq_dom_pt;
    64.7 -static cpumask_t cpufreq_dom_mask;
    64.8 +static unsigned long *cpufreq_dom_mask;
    64.9  static unsigned int cpufreq_dom_max;
   64.10  
   64.11  enum {
   64.12 @@ -562,7 +562,8 @@ static struct cpufreq_driver acpi_cpufre
   64.13  void cpufreq_dom_exit(void)
   64.14  {
   64.15      cpufreq_dom_max = 0;
   64.16 -    cpus_clear(cpufreq_dom_mask);
   64.17 +    if (cpufreq_dom_mask)
   64.18 +        xfree(cpufreq_dom_mask);
   64.19      if (cpufreq_dom_pt)
   64.20          xfree(cpufreq_dom_pt);
   64.21  }
   64.22 @@ -572,22 +573,28 @@ int cpufreq_dom_init(void)
   64.23      unsigned int i;
   64.24  
   64.25      cpufreq_dom_max = 0;
   64.26 -    cpus_clear(cpufreq_dom_mask);
   64.27  
   64.28      for_each_online_cpu(i) {
   64.29 -        cpu_set(processor_pminfo[i].perf.domain_info.domain, cpufreq_dom_mask);
   64.30          if (cpufreq_dom_max < processor_pminfo[i].perf.domain_info.domain)
   64.31              cpufreq_dom_max = processor_pminfo[i].perf.domain_info.domain;
   64.32      }
   64.33      cpufreq_dom_max++;
   64.34  
   64.35 +    cpufreq_dom_mask = xmalloc_array(unsigned long,
   64.36 +                                     BITS_TO_LONGS(cpufreq_dom_max));
   64.37 +    if (!cpufreq_dom_mask)
   64.38 +        return -ENOMEM;
   64.39 +    bitmap_zero(cpufreq_dom_mask, cpufreq_dom_max);
   64.40 +
   64.41      cpufreq_dom_pt = xmalloc_array(cpumask_t, cpufreq_dom_max);
   64.42      if (!cpufreq_dom_pt)
   64.43          return -ENOMEM;
   64.44      memset(cpufreq_dom_pt, 0, cpufreq_dom_max * sizeof(cpumask_t));
   64.45  
   64.46 -    for_each_online_cpu(i)
   64.47 +    for_each_online_cpu(i) {
   64.48 +        __set_bit(processor_pminfo[i].perf.domain_info.domain, cpufreq_dom_mask);
   64.49          cpu_set(i, cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain]);
   64.50 +    }
   64.51  
   64.52      for_each_online_cpu(i)
   64.53          processor_pminfo[i].perf.shared_cpu_map =
   64.54 @@ -616,10 +623,11 @@ static int cpufreq_cpu_init(void)
   64.55  
   64.56  int cpufreq_dom_dbs(unsigned int event)
   64.57  {
   64.58 -    int cpu, dom, ret = 0;
   64.59 +    unsigned int cpu, dom;
   64.60 +    int ret = 0;
   64.61  
   64.62 -    for (dom=0; dom<cpufreq_dom_max; dom++) {
   64.63 -        if (!cpu_isset(dom, cpufreq_dom_mask))
   64.64 +    for (dom = 0; dom < cpufreq_dom_max; dom++) {
   64.65 +        if (!test_bit(dom, cpufreq_dom_mask))
   64.66              continue;
   64.67          cpu = first_cpu(cpufreq_dom_pt[dom]);
   64.68          ret = cpufreq_governor_dbs(&xen_px_policy[cpu], event);
    65.1 --- a/xen/arch/x86/acpi/cpufreq/powernow.c	Fri Sep 12 14:32:45 2008 +0900
    65.2 +++ b/xen/arch/x86/acpi/cpufreq/powernow.c	Fri Sep 12 14:47:40 2008 +0900
    65.3 @@ -197,8 +197,8 @@ static int powernow_cpufreq_cpu_init(str
    65.4  
    65.5      data->max_freq = perf->states[0].core_frequency * 1000;
    65.6      /* table init */
    65.7 -    for (i=0; i<perf->state_count && i<max_hw_pstate; i++) {
    65.8 -        if (i>0 && perf->states[i].core_frequency >=
    65.9 +    for (i = 0; i < perf->state_count && i <= max_hw_pstate; i++) {
   65.10 +        if (i > 0 && perf->states[i].core_frequency >=
   65.11              data->freq_table[valid_states-1].frequency / 1000)
   65.12              continue;
   65.13  
    66.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    66.2 +++ b/xen/arch/x86/acpi/cpuidle_menu.c	Fri Sep 12 14:47:40 2008 +0900
    66.3 @@ -0,0 +1,132 @@
    66.4 +/*
    66.5 + * cpuidle_menu - menu governor for cpu idle, main idea come from Linux.
    66.6 + *            drivers/cpuidle/governors/menu.c 
    66.7 + *
    66.8 + *  Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
    66.9 + *  Copyright (C) 2007, 2008 Intel Corporation
   66.10 + *
   66.11 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   66.12 + *
   66.13 + *  This program is free software; you can redistribute it and/or modify
   66.14 + *  it under the terms of the GNU General Public License as published by
   66.15 + *  the Free Software Foundation; either version 2 of the License, or (at
   66.16 + *  your option) any later version.
   66.17 + *
   66.18 + *  This program is distributed in the hope that it will be useful, but
   66.19 + *  WITHOUT ANY WARRANTY; without even the implied warranty of
   66.20 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   66.21 + *  General Public License for more details.
   66.22 + *
   66.23 + *  You should have received a copy of the GNU General Public License along
   66.24 + *  with this program; if not, write to the Free Software Foundation, Inc.,
   66.25 + *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
   66.26 + *
   66.27 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   66.28 + */
   66.29 +#include <xen/config.h>
   66.30 +#include <xen/errno.h>
   66.31 +#include <xen/lib.h>
   66.32 +#include <xen/types.h>
   66.33 +#include <xen/acpi.h>
   66.34 +#include <xen/timer.h>
   66.35 +#include <xen/cpuidle.h>
   66.36 +
   66.37 +#define BREAK_FUZZ      4       /* 4 us */
   66.38 +#define USEC_PER_SEC 1000000
   66.39 +
   66.40 +struct menu_device
   66.41 +{
   66.42 +    int             last_state_idx;
   66.43 +    unsigned int    expected_us;
   66.44 +    unsigned int    predicted_us;
   66.45 +    unsigned int    last_measured_us;
   66.46 +    unsigned int    elapsed_us;
   66.47 +};
   66.48 +
   66.49 +static DEFINE_PER_CPU(struct menu_device, menu_devices);
   66.50 +
   66.51 +static s_time_t get_sleep_length_ns(void)
   66.52 +{
   66.53 +    return per_cpu(timer_deadline, smp_processor_id()) - NOW();
   66.54 +}
   66.55 +
   66.56 +static int menu_select(struct acpi_processor_power *power)
   66.57 +{
   66.58 +    struct menu_device *data = &__get_cpu_var(menu_devices);
   66.59 +    int i;
   66.60 +
   66.61 +    /* determine the expected residency time */
   66.62 +    data->expected_us = (u32) get_sleep_length_ns() / 1000;
   66.63 +
   66.64 +    /* find the deepest idle state that satisfies our constraints */
   66.65 +    for ( i = 1; i < power->count; i++ )
   66.66 +    {
   66.67 +        struct acpi_processor_cx *s = &power->states[i];
   66.68 +
   66.69 +        if ( s->target_residency > data->expected_us + s->latency )
   66.70 +            break;
   66.71 +        if ( s->target_residency > data->predicted_us )
   66.72 +            break;
   66.73 +        /* TBD: we need to check the QoS requirment in future */
   66.74 +    }
   66.75 +
   66.76 +    data->last_state_idx = i - 1;
   66.77 +    return i - 1;
   66.78 +}
   66.79 +
   66.80 +static void menu_reflect(struct acpi_processor_power *power)
   66.81 +{
   66.82 +    struct menu_device *data = &__get_cpu_var(menu_devices);
   66.83 +    struct acpi_processor_cx *target = &power->states[data->last_state_idx];
   66.84 +    unsigned int last_residency; 
   66.85 +    unsigned int measured_us;
   66.86 +
   66.87 +    /*
   66.88 +     * Ugh, this idle state doesn't support residency measurements, so we
   66.89 +     * are basically lost in the dark.  As a compromise, assume we slept
   66.90 +     * for one full standard timer tick.  However, be aware that this
   66.91 +     * could potentially result in a suboptimal state transition.
   66.92 +     */
   66.93 +    if ( target->type == ACPI_STATE_C1 )
   66.94 +        last_residency = USEC_PER_SEC / HZ;
   66.95 +    else
   66.96 +        last_residency = power->last_residency;
   66.97 +
   66.98 +    measured_us = last_residency + data->elapsed_us;
   66.99 +
  66.100 +    /* if wrapping, set to max uint (-1) */
  66.101 +    measured_us = data->elapsed_us <= measured_us ? measured_us : -1;
  66.102 +
  66.103 +    /* Predict time remaining until next break event */
  66.104 +    data->predicted_us = max(measured_us, data->last_measured_us);
  66.105 +
  66.106 +    /* Distinguish between expected & non-expected events */
  66.107 +    if ( last_residency + BREAK_FUZZ
  66.108 +         < data->expected_us + target->latency )
  66.109 +    {
  66.110 +        data->last_measured_us = measured_us;
  66.111 +        data->elapsed_us = 0;
  66.112 +    }
  66.113 +    else
  66.114 +        data->elapsed_us = measured_us;
  66.115 +}
  66.116 +
  66.117 +static int menu_enable_device(struct acpi_processor_power *power)
  66.118 +{
  66.119 +    struct menu_device *data = &per_cpu(menu_devices, power->cpu);
  66.120 +
  66.121 +    memset(data, 0, sizeof(struct menu_device));
  66.122 +
  66.123 +    return 0;
  66.124 +}
  66.125 +
  66.126 +static struct cpuidle_governor menu_governor =
  66.127 +{
  66.128 +    .name =         "menu",
  66.129 +    .rating =       20,
  66.130 +    .enable =       menu_enable_device,
  66.131 +    .select =       menu_select,
  66.132 +    .reflect =      menu_reflect,
  66.133 +};
  66.134 +
  66.135 +struct cpuidle_governor *cpuidle_current_governor = &menu_governor;
    67.1 --- a/xen/arch/x86/domain.c	Fri Sep 12 14:32:45 2008 +0900
    67.2 +++ b/xen/arch/x86/domain.c	Fri Sep 12 14:47:40 2008 +0900
    67.3 @@ -31,6 +31,7 @@
    67.4  #include <xen/compat.h>
    67.5  #include <xen/acpi.h>
    67.6  #include <xen/pci.h>
    67.7 +#include <xen/paging.h>
    67.8  #include <asm/regs.h>
    67.9  #include <asm/mc146818rtc.h>
   67.10  #include <asm/system.h>
   67.11 @@ -40,7 +41,6 @@
   67.12  #include <asm/i387.h>
   67.13  #include <asm/mpspec.h>
   67.14  #include <asm/ldt.h>
   67.15 -#include <asm/paging.h>
   67.16  #include <asm/hypercall.h>
   67.17  #include <asm/hvm/hvm.h>
   67.18  #include <asm/hvm/support.h>
   67.19 @@ -302,7 +302,8 @@ int vcpu_initialise(struct vcpu *v)
   67.20      else
   67.21      {
   67.22          /* PV guests by default have a 100Hz ticker. */
   67.23 -        v->periodic_period = MILLISECS(10);
   67.24 +        if ( !is_idle_domain(d) )
   67.25 +            v->periodic_period = MILLISECS(10);
   67.26  
   67.27          /* PV guests get an emulated PIT too for video BIOSes to use. */
   67.28          if ( !is_idle_domain(d) && (v->vcpu_id == 0) )
   67.29 @@ -1645,23 +1646,26 @@ static int relinquish_memory(
   67.30  
   67.31          /*
   67.32           * Forcibly invalidate top-most, still valid page tables at this point
   67.33 -         * to break circular 'linear page table' references. This is okay
   67.34 -         * because MMU structures are not shared across domains and this domain
   67.35 -         * is now dead. Thus top-most valid tables are not in use so a non-zero
   67.36 -         * count means circular reference.
   67.37 +         * to break circular 'linear page table' references as well as clean up
   67.38 +         * partially validated pages. This is okay because MMU structures are
   67.39 +         * not shared across domains and this domain is now dead. Thus top-most
   67.40 +         * valid tables are not in use so a non-zero count means circular
   67.41 +         * reference or partially validated.
   67.42           */
   67.43          y = page->u.inuse.type_info;
   67.44          for ( ; ; )
   67.45          {
   67.46              x = y;
   67.47 -            if ( likely((x & (PGT_type_mask|PGT_validated)) !=
   67.48 -                        (type|PGT_validated)) )
   67.49 +            if ( likely((x & PGT_type_mask) != type) ||
   67.50 +                 likely(!(x & (PGT_validated|PGT_partial))) )
   67.51                  break;
   67.52  
   67.53 -            y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
   67.54 +            y = cmpxchg(&page->u.inuse.type_info, x,
   67.55 +                        x & ~(PGT_validated|PGT_partial));
   67.56              if ( likely(y == x) )
   67.57              {
   67.58 -                free_page_type(page, type);
   67.59 +                if ( free_page_type(page, x, 0) != 0 )
   67.60 +                    BUG();
   67.61                  break;
   67.62              }
   67.63          }
    68.1 --- a/xen/arch/x86/domain_build.c	Fri Sep 12 14:32:45 2008 +0900
    68.2 +++ b/xen/arch/x86/domain_build.c	Fri Sep 12 14:47:40 2008 +0900
    68.3 @@ -26,6 +26,7 @@
    68.4  #include <asm/desc.h>
    68.5  #include <asm/i387.h>
    68.6  #include <asm/paging.h>
    68.7 +#include <asm/p2m.h>
    68.8  #include <asm/e820.h>
    68.9  
   68.10  #include <public/version.h>
    69.1 --- a/xen/arch/x86/domctl.c	Fri Sep 12 14:32:45 2008 +0900
    69.2 +++ b/xen/arch/x86/domctl.c	Fri Sep 12 14:47:40 2008 +0900
    69.3 @@ -20,7 +20,7 @@
    69.4  #include <xen/trace.h>
    69.5  #include <xen/console.h>
    69.6  #include <xen/iocap.h>
    69.7 -#include <asm/paging.h>
    69.8 +#include <xen/paging.h>
    69.9  #include <asm/irq.h>
   69.10  #include <asm/hvm/hvm.h>
   69.11  #include <asm/hvm/support.h>
   69.12 @@ -68,14 +68,6 @@ long arch_do_domctl(
   69.13          if ( unlikely((d = rcu_lock_domain_by_id(domctl->domain)) == NULL) )
   69.14              break;
   69.15  
   69.16 -        ret = xsm_ioport_permission(d, fp, 
   69.17 -                                    domctl->u.ioport_permission.allow_access);
   69.18 -        if ( ret )
   69.19 -        {
   69.20 -            rcu_unlock_domain(d);
   69.21 -            break;
   69.22 -        }
   69.23 -
   69.24          if ( np == 0 )
   69.25              ret = 0;
   69.26          else if ( domctl->u.ioport_permission.allow_access )
   69.27 @@ -550,6 +542,10 @@ long arch_do_domctl(
   69.28          if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
   69.29              break;
   69.30  
   69.31 +        ret = xsm_sendtrigger(d);
   69.32 +        if ( ret )
   69.33 +            goto sendtrigger_out;
   69.34 +
   69.35          ret = -EINVAL;
   69.36          if ( domctl->u.sendtrigger.vcpu >= MAX_VIRT_CPUS )
   69.37              goto sendtrigger_out;
   69.38 @@ -628,6 +624,10 @@ long arch_do_domctl(
   69.39          bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
   69.40          devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
   69.41  
   69.42 +        ret = xsm_test_assign_device(domctl->u.assign_device.machine_bdf);
   69.43 +        if ( ret )
   69.44 +            break;
   69.45 +
   69.46          if ( device_assigned(bus, devfn) )
   69.47          {
   69.48              gdprintk(XENLOG_ERR, "XEN_DOMCTL_test_assign_device: "
   69.49 @@ -655,6 +655,11 @@ long arch_do_domctl(
   69.50                  "XEN_DOMCTL_assign_device: get_domain_by_id() failed\n");
   69.51              break;
   69.52          }
   69.53 +
   69.54 +        ret = xsm_assign_device(d, domctl->u.assign_device.machine_bdf);
   69.55 +        if ( ret )
   69.56 +            goto assign_device_out;
   69.57 +
   69.58          bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
   69.59          devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
   69.60  
   69.61 @@ -680,6 +685,7 @@ long arch_do_domctl(
   69.62                       "assign device (%x:%x:%x) failed\n",
   69.63                       bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   69.64  
   69.65 +    assign_device_out:
   69.66          put_domain(d);
   69.67      }
   69.68      break;
   69.69 @@ -700,6 +706,11 @@ long arch_do_domctl(
   69.70                  "XEN_DOMCTL_deassign_device: get_domain_by_id() failed\n"); 
   69.71              break;
   69.72          }
   69.73 +
   69.74 +        ret = xsm_assign_device(d, domctl->u.assign_device.machine_bdf);
   69.75 +        if ( ret )
   69.76 +            goto deassign_device_out;
   69.77 +
   69.78          bus = (domctl->u.assign_device.machine_bdf >> 16) & 0xff;
   69.79          devfn = (domctl->u.assign_device.machine_bdf >> 8) & 0xff;
   69.80  
   69.81 @@ -720,6 +731,8 @@ long arch_do_domctl(
   69.82          deassign_device(d, bus, devfn);
   69.83          gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
   69.84              bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
   69.85 +
   69.86 +    deassign_device_out:
   69.87          put_domain(d);
   69.88      }
   69.89      break;
   69.90 @@ -733,10 +746,17 @@ long arch_do_domctl(
   69.91          if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
   69.92              break;
   69.93          bind = &(domctl->u.bind_pt_irq);
   69.94 +
   69.95 +        ret = xsm_bind_pt_irq(d, bind);
   69.96 +        if ( ret )
   69.97 +            goto bind_out;
   69.98 +
   69.99          if ( iommu_enabled )
  69.100              ret = pt_irq_create_bind_vtd(d, bind);
  69.101          if ( ret < 0 )
  69.102              gdprintk(XENLOG_ERR, "pt_irq_create_bind failed!\n");
  69.103 +
  69.104 +    bind_out:
  69.105          rcu_unlock_domain(d);
  69.106      }
  69.107      break;    
  69.108 @@ -877,11 +897,16 @@ long arch_do_domctl(
  69.109          if ( d == NULL )
  69.110              break;
  69.111  
  69.112 +        ret = xsm_pin_mem_cacheattr(d);
  69.113 +        if ( ret )
  69.114 +            goto pin_out;
  69.115 +
  69.116          ret = hvm_set_mem_pinned_cacheattr(
  69.117              d, domctl->u.pin_mem_cacheattr.start,
  69.118              domctl->u.pin_mem_cacheattr.end,
  69.119              domctl->u.pin_mem_cacheattr.type);
  69.120  
  69.121 +    pin_out:
  69.122          rcu_unlock_domain(d);
  69.123      }
  69.124      break;
  69.125 @@ -900,6 +925,10 @@ long arch_do_domctl(
  69.126          if ( d == NULL )
  69.127              break;
  69.128  
  69.129 +        ret = xsm_ext_vcpucontext(d, domctl->cmd);
  69.130 +        if ( ret )
  69.131 +            goto ext_vcpucontext_out;
  69.132 +
  69.133          ret = -ESRCH;
  69.134          if ( (evc->vcpu >= MAX_VIRT_CPUS) ||
  69.135               ((v = d->vcpu[evc->vcpu]) == NULL) )
    70.1 --- a/xen/arch/x86/hpet.c	Fri Sep 12 14:32:45 2008 +0900
    70.2 +++ b/xen/arch/x86/hpet.c	Fri Sep 12 14:47:40 2008 +0900
    70.3 @@ -100,6 +100,13 @@ static int reprogram_hpet_evt_channel(
    70.4  
    70.5      ch->next_event = expire;
    70.6  
    70.7 +    if ( expire == STIME_MAX )
    70.8 +    {
    70.9 +        /* We assume it will take a long time for the timer to wrap. */
   70.10 +        hpet_write32(0, HPET_T0_CMP);
   70.11 +        return 0;
   70.12 +    }
   70.13 +
   70.14      delta = min_t(int64_t, delta, MAX_DELTA_NS);
   70.15      delta = max_t(int64_t, delta, MIN_DELTA_NS);
   70.16      delta = ns2ticks(delta, ch->shift, ch->mult);
   70.17 @@ -206,10 +213,12 @@ void hpet_broadcast_enter(void)
   70.18  {
   70.19      struct hpet_event_channel *ch = &hpet_event;
   70.20  
   70.21 +    spin_lock(&ch->lock);
   70.22 +
   70.23 +    disable_APIC_timer();
   70.24 +
   70.25      cpu_set(smp_processor_id(), ch->cpumask);
   70.26  
   70.27 -    spin_lock(&ch->lock);
   70.28 -
   70.29      /* reprogram if current cpu expire time is nearer */
   70.30      if ( this_cpu(timer_deadline) < ch->next_event )
   70.31          reprogram_hpet_evt_channel(ch, this_cpu(timer_deadline), NOW(), 1);
   70.32 @@ -222,8 +231,23 @@ void hpet_broadcast_exit(void)
   70.33      struct hpet_event_channel *ch = &hpet_event;
   70.34      int cpu = smp_processor_id();
   70.35  
   70.36 +    spin_lock_irq(&ch->lock);
   70.37 +
   70.38      if ( cpu_test_and_clear(cpu, ch->cpumask) )
   70.39 -        reprogram_timer(per_cpu(timer_deadline, cpu));
   70.40 +    {
   70.41 +        /* Cancel any outstanding LAPIC event and re-enable interrupts. */
   70.42 +        reprogram_timer(0);
   70.43 +        enable_APIC_timer();
   70.44 +        
   70.45 +        /* Reprogram the deadline; trigger timer work now if it has passed. */
   70.46 +        if ( !reprogram_timer(per_cpu(timer_deadline, cpu)) )
   70.47 +            raise_softirq(TIMER_SOFTIRQ);
   70.48 +
   70.49 +        if ( cpus_empty(ch->cpumask) && ch->next_event != STIME_MAX )
   70.50 +            reprogram_hpet_evt_channel(ch, STIME_MAX, 0, 0);
   70.51 +    }
   70.52 +
   70.53 +    spin_unlock_irq(&ch->lock);
   70.54  }
   70.55  
   70.56  int hpet_broadcast_is_available(void)
    71.1 --- a/xen/arch/x86/hvm/hvm.c	Fri Sep 12 14:32:45 2008 +0900
    71.2 +++ b/xen/arch/x86/hvm/hvm.c	Fri Sep 12 14:47:40 2008 +0900
    71.3 @@ -31,10 +31,11 @@
    71.4  #include <xen/hypercall.h>
    71.5  #include <xen/guest_access.h>
    71.6  #include <xen/event.h>
    71.7 +#include <xen/paging.h>
    71.8 +#include <asm/shadow.h>
    71.9  #include <asm/current.h>
   71.10  #include <asm/e820.h>
   71.11  #include <asm/io.h>
   71.12 -#include <asm/paging.h>
   71.13  #include <asm/regs.h>
   71.14  #include <asm/cpufeature.h>
   71.15  #include <asm/processor.h>
   71.16 @@ -772,7 +773,7 @@ void hvm_hlt(unsigned long rflags)
   71.17  
   71.18      do_sched_op_compat(SCHEDOP_block, 0);
   71.19  
   71.20 -    HVMTRACE_1D(HLT, curr, /* pending = */ vcpu_runnable(curr));
   71.21 +    HVMTRACE_1D(HLT, /* pending = */ vcpu_runnable(curr));
   71.22  }
   71.23  
   71.24  void hvm_triple_fault(void)
    72.1 --- a/xen/arch/x86/hvm/svm/intr.c	Fri Sep 12 14:32:45 2008 +0900
    72.2 +++ b/xen/arch/x86/hvm/svm/intr.c	Fri Sep 12 14:47:40 2008 +0900
    72.3 @@ -80,7 +80,7 @@ static void enable_intr_window(struct vc
    72.4  
    72.5      ASSERT(intack.source != hvm_intsrc_none);
    72.6  
    72.7 -    HVMTRACE_2D(INJ_VIRQ, v, 0x0, /*fake=*/ 1);
    72.8 +    HVMTRACE_2D(INJ_VIRQ, 0x0, /*fake=*/ 1);
    72.9  
   72.10      /*
   72.11       * Create a dummy virtual interrupt to intercept as soon as the
   72.12 @@ -199,7 +199,7 @@ asmlinkage void svm_intr_assist(void)
   72.13      }
   72.14      else
   72.15      {
   72.16 -        HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
   72.17 +        HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
   72.18          svm_inject_extint(v, intack.vector);
   72.19          pt_intr_post(v, intack);
   72.20      }
    73.1 --- a/xen/arch/x86/hvm/svm/svm.c	Fri Sep 12 14:32:45 2008 +0900
    73.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Fri Sep 12 14:47:40 2008 +0900
    73.3 @@ -759,11 +759,11 @@ static void svm_inject_exception(
    73.4      if ( trapnr == TRAP_page_fault )
    73.5      {
    73.6          vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
    73.7 -        HVMTRACE_LONG_2D(PF_INJECT, curr, errcode, TRC_PAR_LONG(cr2));
    73.8 +        HVMTRACE_LONG_2D(PF_INJECT, errcode, TRC_PAR_LONG(cr2));
    73.9      }
   73.10      else
   73.11      {
   73.12 -        HVMTRACE_2D(INJ_EXC, curr, trapnr, errcode);
   73.13 +        HVMTRACE_2D(INJ_EXC, trapnr, errcode);
   73.14      }
   73.15  
   73.16      if ( (trapnr == TRAP_debug) &&
   73.17 @@ -919,7 +919,7 @@ static void svm_cpuid_intercept(
   73.18              __clear_bit(X86_FEATURE_APIC & 31, edx);
   73.19      }
   73.20  
   73.21 -    HVMTRACE_5D (CPUID, v, input, *eax, *ebx, *ecx, *edx);
   73.22 +    HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
   73.23  }
   73.24  
   73.25  static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
   73.26 @@ -946,7 +946,7 @@ static void svm_vmexit_do_cpuid(struct c
   73.27  
   73.28  static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
   73.29  {
   73.30 -    HVMTRACE_0D(DR_WRITE, v);
   73.31 +    HVMTRACE_0D(DR_WRITE);
   73.32      __restore_debug_registers(v);
   73.33  }
   73.34  
   73.35 @@ -1018,7 +1018,7 @@ static int svm_msr_read_intercept(struct
   73.36      regs->edx = msr_content >> 32;
   73.37  
   73.38   done:
   73.39 -    HVMTRACE_3D (MSR_READ, v, ecx, regs->eax, regs->edx);
   73.40 +    HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
   73.41      HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
   73.42                  ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
   73.43      return X86EMUL_OKAY;
   73.44 @@ -1037,7 +1037,7 @@ static int svm_msr_write_intercept(struc
   73.45  
   73.46      msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
   73.47  
   73.48 -    HVMTRACE_3D (MSR_WRITE, v, ecx, regs->eax, regs->edx);
   73.49 +    HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
   73.50  
   73.51      switch ( ecx )
   73.52      {
   73.53 @@ -1168,7 +1168,7 @@ static void svm_vmexit_do_invalidate_cac
   73.54  static void svm_invlpg_intercept(unsigned long vaddr)
   73.55  {
   73.56      struct vcpu *curr = current;
   73.57 -    HVMTRACE_LONG_2D(INVLPG, curr, 0, TRC_PAR_LONG(vaddr));
   73.58 +    HVMTRACE_LONG_2D(INVLPG, 0, TRC_PAR_LONG(vaddr));
   73.59      paging_invlpg(curr, vaddr);
   73.60      svm_asid_g_invlpg(curr, vaddr);
   73.61  }
   73.62 @@ -1191,7 +1191,7 @@ asmlinkage void svm_vmexit_handler(struc
   73.63  
   73.64      exit_reason = vmcb->exitcode;
   73.65  
   73.66 -    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, v, 3, exit_reason,
   73.67 +    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
   73.68                  (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
   73.69                  0, 0, 0);
   73.70  
   73.71 @@ -1216,17 +1216,17 @@ asmlinkage void svm_vmexit_handler(struc
   73.72      {
   73.73      case VMEXIT_INTR:
   73.74          /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
   73.75 -        HVMTRACE_0D(INTR, v);
   73.76 +        HVMTRACE_0D(INTR);
   73.77          break;
   73.78  
   73.79      case VMEXIT_NMI:
   73.80          /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
   73.81 -        HVMTRACE_0D(NMI, v);
   73.82 +        HVMTRACE_0D(NMI);
   73.83          break;
   73.84  
   73.85      case VMEXIT_SMI:
   73.86          /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
   73.87 -        HVMTRACE_0D(SMI, v);
   73.88 +        HVMTRACE_0D(SMI);
   73.89          break;
   73.90  
   73.91      case VMEXIT_EXCEPTION_DB:
   73.92 @@ -1261,10 +1261,12 @@ asmlinkage void svm_vmexit_handler(struc
   73.93  
   73.94          if ( paging_fault(va, regs) )
   73.95          {
   73.96 -            if (hvm_long_mode_enabled(v))
   73.97 -                HVMTRACE_LONG_2D(PF_XEN, v, regs->error_code, TRC_PAR_LONG(va));
   73.98 +            if ( trace_will_trace_event(TRC_SHADOW) )
   73.99 +                break;
  73.100 +            if ( hvm_long_mode_enabled(v) )
  73.101 +                HVMTRACE_LONG_2D(PF_XEN, regs->error_code, TRC_PAR_LONG(va));
  73.102              else
  73.103 -                HVMTRACE_2D(PF_XEN, v, regs->error_code, va);
  73.104 +                HVMTRACE_2D(PF_XEN, regs->error_code, va);
  73.105              break;
  73.106          }
  73.107  
  73.108 @@ -1274,7 +1276,7 @@ asmlinkage void svm_vmexit_handler(struc
  73.109  
  73.110      /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
  73.111      case VMEXIT_EXCEPTION_MC:
  73.112 -        HVMTRACE_0D(MCE, v);
  73.113 +        HVMTRACE_0D(MCE);
  73.114          break;
  73.115  
  73.116      case VMEXIT_VINTR:
  73.117 @@ -1331,7 +1333,7 @@ asmlinkage void svm_vmexit_handler(struc
  73.118      case VMEXIT_VMMCALL:
  73.119          if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
  73.120              break;
  73.121 -        HVMTRACE_1D(VMMCALL, v, regs->eax);
  73.122 +        HVMTRACE_1D(VMMCALL, regs->eax);
  73.123          rc = hvm_do_hypercall(regs);
  73.124          if ( rc != HVM_HCALL_preempted )
  73.125          {
  73.126 @@ -1406,7 +1408,7 @@ asmlinkage void svm_vmexit_handler(struc
  73.127  
  73.128  asmlinkage void svm_trace_vmentry(void)
  73.129  {
  73.130 -    HVMTRACE_ND (VMENTRY, 1/*cycles*/, current, 0, 0, 0, 0, 0, 0, 0);
  73.131 +    HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
  73.132  }
  73.133    
  73.134  /*
    74.1 --- a/xen/arch/x86/hvm/vmx/intr.c	Fri Sep 12 14:32:45 2008 +0900
    74.2 +++ b/xen/arch/x86/hvm/vmx/intr.c	Fri Sep 12 14:47:40 2008 +0900
    74.3 @@ -198,7 +198,7 @@ asmlinkage void vmx_intr_assist(void)
    74.4      }
    74.5      else
    74.6      {
    74.7 -        HVMTRACE_2D(INJ_VIRQ, v, intack.vector, /*fake=*/ 0);
    74.8 +        HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
    74.9          vmx_inject_extint(v, intack.vector);
   74.10          pt_intr_post(v, intack);
   74.11      }
    75.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Fri Sep 12 14:32:45 2008 +0900
    75.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Fri Sep 12 14:47:40 2008 +0900
    75.3 @@ -1114,10 +1114,10 @@ static void __vmx_inject_exception(
    75.4      __vmwrite(VM_ENTRY_INTR_INFO, intr_fields);
    75.5  
    75.6      if ( trap == TRAP_page_fault )
    75.7 -        HVMTRACE_LONG_2D(PF_INJECT, v, error_code,
    75.8 +        HVMTRACE_LONG_2D(PF_INJECT, error_code,
    75.9              TRC_PAR_LONG(v->arch.hvm_vcpu.guest_cr[2]));
   75.10      else
   75.11 -        HVMTRACE_2D(INJ_EXC, v, trap, error_code);
   75.12 +        HVMTRACE_2D(INJ_EXC, trap, error_code);
   75.13  }
   75.14  
   75.15  void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code)
   75.16 @@ -1345,7 +1345,7 @@ static void vmx_cpuid_intercept(
   75.17              break;
   75.18      }
   75.19  
   75.20 -    HVMTRACE_5D (CPUID, current, input, *eax, *ebx, *ecx, *edx);
   75.21 +    HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
   75.22  }
   75.23  
   75.24  static void vmx_do_cpuid(struct cpu_user_regs *regs)
   75.25 @@ -1370,7 +1370,7 @@ static void vmx_dr_access(unsigned long 
   75.26  {
   75.27      struct vcpu *v = current;
   75.28  
   75.29 -    HVMTRACE_0D(DR_WRITE, v);
   75.30 +    HVMTRACE_0D(DR_WRITE);
   75.31  
   75.32      if ( !v->arch.hvm_vcpu.flag_dr_dirty )
   75.33          __restore_debug_registers(v);
   75.34 @@ -1383,7 +1383,7 @@ static void vmx_dr_access(unsigned long 
   75.35  static void vmx_invlpg_intercept(unsigned long vaddr)
   75.36  {
   75.37      struct vcpu *curr = current;
   75.38 -    HVMTRACE_LONG_2D(INVLPG, curr, /*invlpga=*/ 0, TRC_PAR_LONG(vaddr));
   75.39 +    HVMTRACE_LONG_2D(INVLPG, /*invlpga=*/ 0, TRC_PAR_LONG(vaddr));
   75.40      if ( paging_invlpg(curr, vaddr) )
   75.41          vpid_sync_vcpu_gva(curr, vaddr);
   75.42  }
   75.43 @@ -1434,7 +1434,7 @@ static int mov_to_cr(int gp, int cr, str
   75.44          goto exit_and_crash;
   75.45      }
   75.46  
   75.47 -    HVMTRACE_LONG_2D(CR_WRITE, v, cr, TRC_PAR_LONG(value));
   75.48 +    HVMTRACE_LONG_2D(CR_WRITE, cr, TRC_PAR_LONG(value));
   75.49  
   75.50      HVM_DBG_LOG(DBG_LEVEL_1, "CR%d, value = %lx", cr, value);
   75.51  
   75.52 @@ -1505,7 +1505,7 @@ static void mov_from_cr(int cr, int gp, 
   75.53          break;
   75.54      }
   75.55  
   75.56 -    HVMTRACE_LONG_2D(CR_READ, v, cr, TRC_PAR_LONG(value));
   75.57 +    HVMTRACE_LONG_2D(CR_READ, cr, TRC_PAR_LONG(value));
   75.58  
   75.59      HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR%d, value = %lx", cr, value);
   75.60  }
   75.61 @@ -1531,13 +1531,13 @@ static int vmx_cr_access(unsigned long e
   75.62      case VMX_CONTROL_REG_ACCESS_TYPE_CLTS:
   75.63          v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
   75.64          vmx_update_guest_cr(v, 0);
   75.65 -        HVMTRACE_0D(CLTS, current);
   75.66 +        HVMTRACE_0D(CLTS);
   75.67          break;
   75.68      case VMX_CONTROL_REG_ACCESS_TYPE_LMSW:
   75.69          value = v->arch.hvm_vcpu.guest_cr[0];
   75.70          /* LMSW can: (1) set bits 0-3; (2) clear bits 1-3. */
   75.71          value = (value & ~0xe) | ((exit_qualification >> 16) & 0xf);
   75.72 -        HVMTRACE_LONG_1D(LMSW, current, value);
   75.73 +        HVMTRACE_LONG_1D(LMSW, value);
   75.74          return !hvm_set_cr0(value);
   75.75      default:
   75.76          BUG();
   75.77 @@ -1692,7 +1692,7 @@ static int vmx_msr_read_intercept(struct
   75.78      regs->edx = (uint32_t)(msr_content >> 32);
   75.79  
   75.80  done:
   75.81 -    HVMTRACE_3D (MSR_READ, v, ecx, regs->eax, regs->edx);
   75.82 +    HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
   75.83      HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
   75.84                  ecx, (unsigned long)regs->eax,
   75.85                  (unsigned long)regs->edx);
   75.86 @@ -1803,7 +1803,7 @@ static int vmx_msr_write_intercept(struc
   75.87  
   75.88      msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
   75.89  
   75.90 -    HVMTRACE_3D (MSR_WRITE, v, ecx, regs->eax, regs->edx);
   75.91 +    HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
   75.92  
   75.93      switch ( ecx )
   75.94      {
   75.95 @@ -1894,7 +1894,7 @@ static void vmx_do_extint(struct cpu_use
   75.96      BUG_ON(!(vector & INTR_INFO_VALID_MASK));
   75.97  
   75.98      vector &= INTR_INFO_VECTOR_MASK;
   75.99 -    HVMTRACE_1D(INTR, current, vector);
  75.100 +    HVMTRACE_1D(INTR, vector);
  75.101  
  75.102      switch ( vector )
  75.103      {
  75.104 @@ -2010,7 +2010,7 @@ static void vmx_failed_vmentry(unsigned 
  75.105          break;
  75.106      case EXIT_REASON_MACHINE_CHECK:
  75.107          printk("caused by machine check.\n");
  75.108 -        HVMTRACE_0D(MCE, curr);
  75.109 +        HVMTRACE_0D(MCE);
  75.110          do_machine_check(regs);
  75.111          break;
  75.112      default:
  75.113 @@ -2037,7 +2037,7 @@ asmlinkage void vmx_vmexit_handler(struc
  75.114  
  75.115      exit_reason = __vmread(VM_EXIT_REASON);
  75.116  
  75.117 -    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, v, 3, exit_reason,
  75.118 +    HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
  75.119                  (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
  75.120                  0, 0, 0);
  75.121  
  75.122 @@ -2101,7 +2101,8 @@ asmlinkage void vmx_vmexit_handler(struc
  75.123               !(__vmread(IDT_VECTORING_INFO) & INTR_INFO_VALID_MASK) &&
  75.124               (vector != TRAP_double_fault) )
  75.125              __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
  75.126 -                    __vmread(GUEST_INTERRUPTIBILITY_INFO)|VMX_INTR_SHADOW_NMI);
  75.127 +                      __vmread(GUEST_INTERRUPTIBILITY_INFO)
  75.128 +                      | VMX_INTR_SHADOW_NMI);
  75.129  
  75.130          perfc_incra(cause_vector, vector);
  75.131  
  75.132 @@ -2128,12 +2129,14 @@ asmlinkage void vmx_vmexit_handler(struc
  75.133  
  75.134              if ( paging_fault(exit_qualification, regs) )
  75.135              {
  75.136 +                if ( trace_will_trace_event(TRC_SHADOW) )
  75.137 +                    break;
  75.138                  if ( hvm_long_mode_enabled(v) )
  75.139 -                    HVMTRACE_LONG_2D (PF_XEN, v, regs->error_code,
  75.140 -                        TRC_PAR_LONG(exit_qualification) );
  75.141 +                    HVMTRACE_LONG_2D(PF_XEN, regs->error_code,
  75.142 +                                     TRC_PAR_LONG(exit_qualification) );
  75.143                  else
  75.144 -                    HVMTRACE_2D (PF_XEN, v,
  75.145 -                        regs->error_code, exit_qualification );
  75.146 +                    HVMTRACE_2D(PF_XEN,
  75.147 +                                regs->error_code, exit_qualification );
  75.148                  break;
  75.149              }
  75.150  
  75.151 @@ -2144,11 +2147,11 @@ asmlinkage void vmx_vmexit_handler(struc
  75.152              if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) !=
  75.153                   (X86_EVENTTYPE_NMI << 8) )
  75.154                  goto exit_and_crash;
  75.155 -            HVMTRACE_0D(NMI, v);
  75.156 +            HVMTRACE_0D(NMI);
  75.157              do_nmi(regs); /* Real NMI, vector 2: normal processing. */
  75.158              break;
  75.159          case TRAP_machine_check:
  75.160 -            HVMTRACE_0D(MCE, v);
  75.161 +            HVMTRACE_0D(MCE);
  75.162              do_machine_check(regs);
  75.163              break;
  75.164          default:
  75.165 @@ -2213,7 +2216,7 @@ asmlinkage void vmx_vmexit_handler(struc
  75.166      case EXIT_REASON_VMCALL:
  75.167      {
  75.168          int rc;
  75.169 -        HVMTRACE_1D(VMMCALL, v, regs->eax);
  75.170 +        HVMTRACE_1D(VMMCALL, regs->eax);
  75.171          inst_len = __get_instruction_length(); /* Safe: VMCALL */
  75.172          rc = hvm_do_hypercall(regs);
  75.173          if ( rc != HVM_HCALL_preempted )
  75.174 @@ -2300,7 +2303,7 @@ asmlinkage void vmx_vmexit_handler(struc
  75.175  
  75.176  asmlinkage void vmx_trace_vmentry(void)
  75.177  {
  75.178 -    HVMTRACE_ND (VMENTRY, 1/*cycles*/, current, 0, 0, 0, 0, 0, 0, 0);
  75.179 +    HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
  75.180  }
  75.181  
  75.182  /*
    76.1 --- a/xen/arch/x86/io_apic.c	Fri Sep 12 14:32:45 2008 +0900
    76.2 +++ b/xen/arch/x86/io_apic.c	Fri Sep 12 14:47:40 2008 +0900
    76.3 @@ -45,23 +45,14 @@
    76.4  int (*ioapic_renumber_irq)(int ioapic, int irq);
    76.5  atomic_t irq_mis_count;
    76.6  
    76.7 -int msi_enable = 0;
    76.8 -boolean_param("msi", msi_enable);
    76.9 -
   76.10  int domain_irq_to_vector(struct domain *d, int irq)
   76.11  {
   76.12 -    if ( !msi_enable )
   76.13 -        return irq_to_vector(irq);
   76.14 -    else
   76.15 -        return d->arch.pirq_vector[irq];
   76.16 +    return d->arch.pirq_vector[irq];
   76.17  }
   76.18  
   76.19  int domain_vector_to_irq(struct domain *d, int vector)
   76.20  {
   76.21 -    if ( !msi_enable )
   76.22 -        return vector_to_irq(vector);
   76.23 -    else
   76.24 -        return d->arch.vector_pirq[vector];
   76.25 +    return d->arch.vector_pirq[vector];
   76.26  }
   76.27  
   76.28  /* Where if anywhere is the i8259 connect in external int mode */
    77.1 --- a/xen/arch/x86/irq.c	Fri Sep 12 14:32:45 2008 +0900
    77.2 +++ b/xen/arch/x86/irq.c	Fri Sep 12 14:47:40 2008 +0900
    77.3 @@ -737,9 +737,12 @@ static int __init setup_dump_irqs(void)
    77.4  
    77.5  void fixup_irqs(cpumask_t map)
    77.6  {
    77.7 -    unsigned int irq;
    77.8 +    unsigned int irq, sp;
    77.9      static int warned;
   77.10 +    irq_guest_action_t *action;
   77.11 +    struct pending_eoi *peoi;
   77.12  
   77.13 +    /* Direct all future interrupts away from this CPU. */
   77.14      for ( irq = 0; irq < NR_IRQS; irq++ )
   77.15      {
   77.16          cpumask_t mask;
   77.17 @@ -758,8 +761,24 @@ void fixup_irqs(cpumask_t map)
   77.18              printk("Cannot set affinity for irq %i\n", irq);
   77.19      }
   77.20  
   77.21 +    /* Service any interrupts that beat us in the re-direction race. */
   77.22      local_irq_enable();
   77.23      mdelay(1);
   77.24      local_irq_disable();
   77.25 +
   77.26 +    /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
   77.27 +    for ( irq = 0; irq < NR_IRQS; irq++ )
   77.28 +    {
   77.29 +        if ( !(irq_desc[irq].status & IRQ_GUEST) )
   77.30 +            continue;
   77.31 +        action = (irq_guest_action_t *)irq_desc[irq].action;
   77.32 +        cpu_clear(smp_processor_id(), action->cpu_eoi_map);
   77.33 +    }
   77.34 +
   77.35 +    /* Flush the interrupt EOI stack. */
   77.36 +    peoi = this_cpu(pending_eoi);
   77.37 +    for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
   77.38 +        peoi[sp].ready = 1;
   77.39 +    flush_ready_eoi(NULL);
   77.40  }
   77.41  #endif
    78.1 --- a/xen/arch/x86/mm.c	Fri Sep 12 14:32:45 2008 +0900
    78.2 +++ b/xen/arch/x86/mm.c	Fri Sep 12 14:47:40 2008 +0900
    78.3 @@ -507,11 +507,11 @@ static int alloc_segdesc_page(struct pag
    78.4              goto fail;
    78.5  
    78.6      unmap_domain_page(descs);
    78.7 -    return 1;
    78.8 +    return 0;
    78.9  
   78.10   fail:
   78.11      unmap_domain_page(descs);
   78.12 -    return 0;
   78.13 +    return -EINVAL;
   78.14  }
   78.15  
   78.16  
   78.17 @@ -565,20 +565,23 @@ static int get_page_from_pagenr(unsigned
   78.18  
   78.19  static int get_page_and_type_from_pagenr(unsigned long page_nr, 
   78.20                                           unsigned long type,
   78.21 -                                         struct domain *d)
   78.22 +                                         struct domain *d,
   78.23 +                                         int preemptible)
   78.24  {
   78.25      struct page_info *page = mfn_to_page(page_nr);
   78.26 +    int rc;
   78.27  
   78.28      if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
   78.29 -        return 0;
   78.30 -
   78.31 -    if ( unlikely(!get_page_type(page, type)) )
   78.32 -    {
   78.33 +        return -EINVAL;
   78.34 +
   78.35 +    rc = (preemptible ?
   78.36 +          get_page_type_preemptible(page, type) :
   78.37 +          (get_page_type(page, type) ? 0 : -EINVAL));
   78.38 +
   78.39 +    if ( rc )
   78.40          put_page(page);
   78.41 -        return 0;
   78.42 -    }
   78.43 -
   78.44 -    return 1;
   78.45 +
   78.46 +    return rc;
   78.47  }
   78.48  
   78.49  /*
   78.50 @@ -754,22 +757,22 @@ get_page_from_l2e(
   78.51      if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
   78.52      {
   78.53          MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
   78.54 -        return 0;
   78.55 +        return -EINVAL;
   78.56      }
   78.57  
   78.58 -    rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
   78.59 -    if ( unlikely(!rc) )
   78.60 -        rc = get_l2_linear_pagetable(l2e, pfn, d);
   78.61 +    rc = get_page_and_type_from_pagenr(
   78.62 +        l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
   78.63 +    if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
   78.64 +        rc = 0;
   78.65  
   78.66      return rc;
   78.67  }
   78.68  
   78.69  
   78.70 -#if CONFIG_PAGING_LEVELS >= 3
   78.71  define_get_linear_pagetable(l3);
   78.72  static int
   78.73  get_page_from_l3e(
   78.74 -    l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
   78.75 +    l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible)
   78.76  {
   78.77      int rc;
   78.78  
   78.79 @@ -779,22 +782,22 @@ get_page_from_l3e(
   78.80      if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
   78.81      {
   78.82          MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
   78.83 -        return 0;
   78.84 +        return -EINVAL;
   78.85      }
   78.86  
   78.87 -    rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
   78.88 -    if ( unlikely(!rc) )
   78.89 -        rc = get_l3_linear_pagetable(l3e, pfn, d);
   78.90 +    rc = get_page_and_type_from_pagenr(
   78.91 +        l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
   78.92 +    if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
   78.93 +        rc = 0;
   78.94  
   78.95      return rc;
   78.96  }
   78.97 -#endif /* 3 level */
   78.98  
   78.99  #if CONFIG_PAGING_LEVELS >= 4
  78.100  define_get_linear_pagetable(l4);
  78.101  static int
  78.102  get_page_from_l4e(
  78.103 -    l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
  78.104 +    l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible)
  78.105  {
  78.106      int rc;
  78.107  
  78.108 @@ -804,12 +807,13 @@ get_page_from_l4e(
  78.109      if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
  78.110      {
  78.111          MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
  78.112 -        return 0;
  78.113 +        return -EINVAL;
  78.114      }
  78.115  
  78.116 -    rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
  78.117 -    if ( unlikely(!rc) )
  78.118 -        rc = get_l4_linear_pagetable(l4e, pfn, d);
  78.119 +    rc = get_page_and_type_from_pagenr(
  78.120 +        l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
  78.121 +    if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
  78.122 +        rc = 0;
  78.123  
  78.124      return rc;
  78.125  }
  78.126 @@ -946,29 +950,35 @@ void put_page_from_l1e(l1_pgentry_t l1e,
  78.127   * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
  78.128   * Note also that this automatically deals correctly with linear p.t.'s.
  78.129   */
  78.130 -static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
  78.131 +static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
  78.132  {
  78.133      if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && 
  78.134           (l2e_get_pfn(l2e) != pfn) )
  78.135 +    {
  78.136          put_page_and_type(l2e_get_page(l2e));
  78.137 +        return 0;
  78.138 +    }
  78.139 +    return 1;
  78.140  }
  78.141  
  78.142  
  78.143 -#if CONFIG_PAGING_LEVELS >= 3
  78.144 -static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
  78.145 +static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
  78.146 +                             int preemptible)
  78.147  {
  78.148      if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && 
  78.149           (l3e_get_pfn(l3e) != pfn) )
  78.150 -        put_page_and_type(l3e_get_page(l3e));
  78.151 +        return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
  78.152 +    return 1;
  78.153  }
  78.154 -#endif
  78.155  
  78.156  #if CONFIG_PAGING_LEVELS >= 4
  78.157 -static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
  78.158 +static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
  78.159 +                             int preemptible)
  78.160  {
  78.161      if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && 
  78.162           (l4e_get_pfn(l4e) != pfn) )
  78.163 -        put_page_and_type(l4e_get_page(l4e));
  78.164 +        return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
  78.165 +    return 1;
  78.166  }
  78.167  #endif
  78.168  
  78.169 @@ -977,7 +987,7 @@ static int alloc_l1_table(struct page_in
  78.170      struct domain *d = page_get_owner(page);
  78.171      unsigned long  pfn = page_to_mfn(page);
  78.172      l1_pgentry_t  *pl1e;
  78.173 -    int            i;
  78.174 +    unsigned int   i;
  78.175  
  78.176      pl1e = map_domain_page(pfn);
  78.177  
  78.178 @@ -991,7 +1001,7 @@ static int alloc_l1_table(struct page_in
  78.179      }
  78.180  
  78.181      unmap_domain_page(pl1e);
  78.182 -    return 1;
  78.183 +    return 0;
  78.184  
  78.185   fail:
  78.186      MEM_LOG("Failure in alloc_l1_table: entry %d", i);
  78.187 @@ -1000,7 +1010,7 @@ static int alloc_l1_table(struct page_in
  78.188              put_page_from_l1e(pl1e[i], d);
  78.189  
  78.190      unmap_domain_page(pl1e);
  78.191 -    return 0;
  78.192 +    return -EINVAL;
  78.193  }
  78.194  
  78.195  static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
  78.196 @@ -1128,47 +1138,53 @@ static void pae_flush_pgd(
  78.197  # define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
  78.198  #endif
  78.199  
  78.200 -static int alloc_l2_table(struct page_info *page, unsigned long type)
  78.201 +static int alloc_l2_table(struct page_info *page, unsigned long type,
  78.202 +                          int preemptible)
  78.203  {
  78.204      struct domain *d = page_get_owner(page);
  78.205      unsigned long  pfn = page_to_mfn(page);
  78.206      l2_pgentry_t  *pl2e;
  78.207 -    int            i;
  78.208 +    unsigned int   i;
  78.209 +    int            rc = 0;
  78.210  
  78.211      pl2e = map_domain_page(pfn);
  78.212  
  78.213 -    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
  78.214 +    for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ )
  78.215      {
  78.216 -        if ( !is_guest_l2_slot(d, type, i) )
  78.217 +        if ( preemptible && i && hypercall_preempt_check() )
  78.218 +        {
  78.219 +            page->nr_validated_ptes = i;
  78.220 +            rc = -EAGAIN;
  78.221 +            break;
  78.222 +        }
  78.223 +
  78.224 +        if ( !is_guest_l2_slot(d, type, i) ||
  78.225 +             (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 )
  78.226              continue;
  78.227  
  78.228 -        if ( unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
  78.229 -            goto fail;
  78.230 -        
  78.231 +        if ( rc < 0 )
  78.232 +        {
  78.233 +            MEM_LOG("Failure in alloc_l2_table: entry %d", i);
  78.234 +            while ( i-- > 0 )
  78.235 +                if ( is_guest_l2_slot(d, type, i) )
  78.236 +                    put_page_from_l2e(pl2e[i], pfn);
  78.237 +            break;
  78.238 +        }
  78.239 +
  78.240          adjust_guest_l2e(pl2e[i], d);
  78.241      }
  78.242  
  78.243      unmap_domain_page(pl2e);
  78.244 -    return 1;
  78.245 -
  78.246 - fail:
  78.247 -    MEM_LOG("Failure in alloc_l2_table: entry %d", i);
  78.248 -    while ( i-- > 0 )
  78.249 -        if ( is_guest_l2_slot(d, type, i) )
  78.250 -            put_page_from_l2e(pl2e[i], pfn);
  78.251 -
  78.252 -    unmap_domain_page(pl2e);
  78.253 -    return 0;
  78.254 +    return rc > 0 ? 0 : rc;
  78.255  }
  78.256  
  78.257 -
  78.258 -#if CONFIG_PAGING_LEVELS >= 3
  78.259 -static int alloc_l3_table(struct page_info *page)
  78.260 +static int alloc_l3_table(struct page_info *page, int preemptible)
  78.261  {
  78.262      struct domain *d = page_get_owner(page);
  78.263      unsigned long  pfn = page_to_mfn(page);
  78.264      l3_pgentry_t  *pl3e;
  78.265 -    int            i;
  78.266 +    unsigned int   i;
  78.267 +    int            rc = 0;
  78.268  
  78.269  #if CONFIG_PAGING_LEVELS == 3
  78.270      /*
  78.271 @@ -1181,7 +1197,7 @@ static int alloc_l3_table(struct page_in
  78.272           d->vcpu[0] && d->vcpu[0]->is_initialised )
  78.273      {
  78.274          MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
  78.275 -        return 0;
  78.276 +        return -EINVAL;
  78.277      }
  78.278  #endif
  78.279  
  78.280 @@ -1197,64 +1213,96 @@ static int alloc_l3_table(struct page_in
  78.281      if ( is_pv_32on64_domain(d) )
  78.282          memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
  78.283  
  78.284 -    for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
  78.285 +    for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ )
  78.286      {
  78.287          if ( is_pv_32bit_domain(d) && (i == 3) )
  78.288          {
  78.289              if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
  78.290 -                 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
  78.291 -                 !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
  78.292 -                                                PGT_l2_page_table |
  78.293 -                                                PGT_pae_xen_l2,
  78.294 -                                                d) )
  78.295 -                goto fail;
  78.296 +                 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) )
  78.297 +                rc = -EINVAL;
  78.298 +            else
  78.299 +                rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
  78.300 +                                                   PGT_l2_page_table |
  78.301 +                                                   PGT_pae_xen_l2,
  78.302 +                                                   d, preemptible);
  78.303          }
  78.304 -        else if ( !is_guest_l3_slot(i) )
  78.305 +        else if ( !is_guest_l3_slot(i) ||
  78.306 +                  (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 )
  78.307              continue;
  78.308 -        else if ( unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
  78.309 -            goto fail;
  78.310 +
  78.311 +        if ( rc == -EAGAIN )
  78.312 +        {
  78.313 +            page->nr_validated_ptes = i;
  78.314 +            page->partial_pte = 1;
  78.315 +        }
  78.316 +        else if ( rc == -EINTR && i )
  78.317 +        {
  78.318 +            page->nr_validated_ptes = i;
  78.319 +            page->partial_pte = 0;
  78.320 +            rc = -EAGAIN;
  78.321 +        }
  78.322 +        if ( rc < 0 )
  78.323 +            break;
  78.324  
  78.325          adjust_guest_l3e(pl3e[i], d);
  78.326      }
  78.327  
  78.328 -    if ( !create_pae_xen_mappings(d, pl3e) )
  78.329 -        goto fail;
  78.330 -
  78.331 -    unmap_domain_page(pl3e);
  78.332 -    return 1;
  78.333 -
  78.334 - fail:
  78.335 -    MEM_LOG("Failure in alloc_l3_table: entry %d", i);
  78.336 -    while ( i-- > 0 )
  78.337 +    if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) )
  78.338 +        rc = -EINVAL;
  78.339 +    if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
  78.340      {
  78.341 -        if ( !is_guest_l3_slot(i) )
  78.342 -            continue;
  78.343 -        unadjust_guest_l3e(pl3e[i], d);
  78.344 -        put_page_from_l3e(pl3e[i], pfn);
  78.345 +        MEM_LOG("Failure in alloc_l3_table: entry %d", i);
  78.346 +        while ( i-- > 0 )
  78.347 +        {
  78.348 +            if ( !is_guest_l3_slot(i) )
  78.349 +                continue;
  78.350 +            unadjust_guest_l3e(pl3e[i], d);
  78.351 +            put_page_from_l3e(pl3e[i], pfn, 0);
  78.352 +        }
  78.353      }
  78.354  
  78.355      unmap_domain_page(pl3e);
  78.356 -    return 0;
  78.357 +    return rc > 0 ? 0 : rc;
  78.358  }
  78.359 -#else
  78.360 -#define alloc_l3_table(page) (0)
  78.361 -#endif
  78.362  
  78.363  #if CONFIG_PAGING_LEVELS >= 4
  78.364 -static int alloc_l4_table(struct page_info *page)
  78.365 +static int alloc_l4_table(struct page_info *page, int preemptible)
  78.366  {
  78.367      struct domain *d = page_get_owner(page);
  78.368      unsigned long  pfn = page_to_mfn(page);
  78.369      l4_pgentry_t  *pl4e = page_to_virt(page);
  78.370 -    int            i;
  78.371 -
  78.372 -    for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
  78.373 +    unsigned int   i;
  78.374 +    int            rc = 0;
  78.375 +
  78.376 +    for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ )
  78.377      {
  78.378 -        if ( !is_guest_l4_slot(d, i) )
  78.379 +        if ( !is_guest_l4_slot(d, i) ||
  78.380 +             (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 )
  78.381              continue;
  78.382  
  78.383 -        if ( unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
  78.384 -            goto fail;
  78.385 +        if ( rc == -EAGAIN )
  78.386 +        {
  78.387 +            page->nr_validated_ptes = i;
  78.388 +            page->partial_pte = 1;
  78.389 +        }
  78.390 +        else if ( rc == -EINTR )
  78.391 +        {
  78.392 +            if ( i )
  78.393 +            {
  78.394 +                page->nr_validated_ptes = i;
  78.395 +                page->partial_pte = 0;
  78.396 +                rc = -EAGAIN;
  78.397 +            }
  78.398 +        }
  78.399 +        else if ( rc < 0 )
  78.400 +        {
  78.401 +            MEM_LOG("Failure in alloc_l4_table: entry %d", i);
  78.402 +            while ( i-- > 0 )
  78.403 +                if ( is_guest_l4_slot(d, i) )
  78.404 +                    put_page_from_l4e(pl4e[i], pfn, 0);
  78.405 +        }
  78.406 +        if ( rc < 0 )
  78.407 +            return rc;
  78.408  
  78.409          adjust_guest_l4e(pl4e[i], d);
  78.410      }
  78.411 @@ -1269,18 +1317,10 @@ static int alloc_l4_table(struct page_in
  78.412          l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
  78.413                        __PAGE_HYPERVISOR);
  78.414  
  78.415 -    return 1;
  78.416 -
  78.417 - fail:
  78.418 -    MEM_LOG("Failure in alloc_l4_table: entry %d", i);
  78.419 -    while ( i-- > 0 )
  78.420 -        if ( is_guest_l4_slot(d, i) )
  78.421 -            put_page_from_l4e(pl4e[i], pfn);
  78.422 -
  78.423 -    return 0;
  78.424 +    return rc > 0 ? 0 : rc;
  78.425  }
  78.426  #else
  78.427 -#define alloc_l4_table(page) (0)
  78.428 +#define alloc_l4_table(page, preemptible) (-EINVAL)
  78.429  #endif
  78.430  
  78.431  
  78.432 @@ -1289,7 +1329,7 @@ static void free_l1_table(struct page_in
  78.433      struct domain *d = page_get_owner(page);
  78.434      unsigned long pfn = page_to_mfn(page);
  78.435      l1_pgentry_t *pl1e;
  78.436 -    int i;
  78.437 +    unsigned int  i;
  78.438  
  78.439      pl1e = map_domain_page(pfn);
  78.440  
  78.441 @@ -1301,74 +1341,114 @@ static void free_l1_table(struct page_in
  78.442  }
  78.443  
  78.444  
  78.445 -static void free_l2_table(struct page_info *page)
  78.446 +static int free_l2_table(struct page_info *page, int preemptible)
  78.447  {
  78.448  #ifdef CONFIG_COMPAT
  78.449      struct domain *d = page_get_owner(page);
  78.450  #endif
  78.451      unsigned long pfn = page_to_mfn(page);
  78.452      l2_pgentry_t *pl2e;
  78.453 -    int i;
  78.454 +    unsigned int  i = page->nr_validated_ptes - 1;
  78.455 +    int err = 0;
  78.456  
  78.457      pl2e = map_domain_page(pfn);
  78.458  
  78.459 -    for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
  78.460 -        if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
  78.461 -            put_page_from_l2e(pl2e[i], pfn);
  78.462 +    ASSERT(page->nr_validated_ptes);
  78.463 +    do {
  78.464 +        if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) &&
  78.465 +             put_page_from_l2e(pl2e[i], pfn) == 0 &&
  78.466 +             preemptible && i && hypercall_preempt_check() )
  78.467 +        {
  78.468 +           page->nr_validated_ptes = i;
  78.469 +           err = -EAGAIN;
  78.470 +        }
  78.471 +    } while ( !err && i-- );
  78.472  
  78.473      unmap_domain_page(pl2e);
  78.474  
  78.475 -    page->u.inuse.type_info &= ~PGT_pae_xen_l2;
  78.476 +    if ( !err )
  78.477 +        page->u.inuse.type_info &= ~PGT_pae_xen_l2;
  78.478 +
  78.479 +    return err;
  78.480  }
  78.481  
  78.482 -
  78.483 -#if CONFIG_PAGING_LEVELS >= 3
  78.484 -
  78.485 -static void free_l3_table(struct page_info *page)
  78.486 +static int free_l3_table(struct page_info *page, int preemptible)
  78.487  {
  78.488      struct domain *d = page_get_owner(page);
  78.489      unsigned long pfn = page_to_mfn(page);
  78.490      l3_pgentry_t *pl3e;
  78.491 -    int           i;
  78.492 +    unsigned int  i = page->nr_validated_ptes - !page->partial_pte;
  78.493 +    int rc = 0;
  78.494  
  78.495  #ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
  78.496      if ( d->arch.relmem == RELMEM_l3 )
  78.497 -        return;
  78.498 +        return 0;
  78.499  #endif
  78.500  
  78.501      pl3e = map_domain_page(pfn);
  78.502  
  78.503 -    for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
  78.504 +    do {
  78.505          if ( is_guest_l3_slot(i) )
  78.506          {
  78.507 -            put_page_from_l3e(pl3e[i], pfn);
  78.508 +            rc = put_page_from_l3e(pl3e[i], pfn, preemptible);
  78.509 +            if ( rc > 0 )
  78.510 +                continue;
  78.511 +            if ( rc )
  78.512 +                break;
  78.513              unadjust_guest_l3e(pl3e[i], d);
  78.514          }
  78.515 +    } while ( i-- );
  78.516  
  78.517      unmap_domain_page(pl3e);
  78.518 +
  78.519 +    if ( rc == -EAGAIN )
  78.520 +    {
  78.521 +        page->nr_validated_ptes = i;
  78.522 +        page->partial_pte = 1;
  78.523 +    }
  78.524 +    else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
  78.525 +    {
  78.526 +        page->nr_validated_ptes = i + 1;
  78.527 +        page->partial_pte = 0;
  78.528 +        rc = -EAGAIN;
  78.529 +    }
  78.530 +    return rc > 0 ? 0 : rc;
  78.531  }
  78.532  
  78.533 -#endif
  78.534 -
  78.535  #if CONFIG_PAGING_LEVELS >= 4
  78.536 -
  78.537 -static void free_l4_table(struct page_info *page)
  78.538 +static int free_l4_table(struct page_info *page, int preemptible)
  78.539  {
  78.540      struct domain *d = page_get_owner(page);
  78.541      unsigned long pfn = page_to_mfn(page);
  78.542      l4_pgentry_t *pl4e = page_to_virt(page);
  78.543 -    int           i;
  78.544 +    unsigned int  i = page->nr_validated_ptes - !page->partial_pte;
  78.545 +    int rc = 0;
  78.546  
  78.547  #ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
  78.548      if ( d->arch.relmem == RELMEM_l4 )
  78.549 -        return;
  78.550 +        return 0;
  78.551  #endif
  78.552  
  78.553 -    for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
  78.554 +    do {
  78.555          if ( is_guest_l4_slot(d, i) )
  78.556 -            put_page_from_l4e(pl4e[i], pfn);
  78.557 +            rc = put_page_from_l4e(pl4e[i], pfn, preemptible);
  78.558 +    } while ( rc >= 0 && i-- );
  78.559 +
  78.560 +    if ( rc == -EAGAIN )
  78.561 +    {
  78.562 +        page->nr_validated_ptes = i;
  78.563 +        page->partial_pte = 1;
  78.564 +    }
  78.565 +    else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
  78.566 +    {
  78.567 +        page->nr_validated_ptes = i + 1;
  78.568 +        page->partial_pte = 0;
  78.569 +        rc = -EAGAIN;
  78.570 +    }
  78.571 +    return rc > 0 ? 0 : rc;
  78.572  }
  78.573 -
  78.574 +#else
  78.575 +#define free_l4_table(page, preemptible) (-EINVAL)
  78.576  #endif
  78.577  
  78.578  static void page_lock(struct page_info *page)
  78.579 @@ -1560,7 +1640,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
  78.580              return rc;
  78.581          }
  78.582  
  78.583 -        if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
  78.584 +        if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
  78.585              return page_unlock(l2pg), 0;
  78.586  
  78.587          adjust_guest_l2e(nl2e, d);
  78.588 @@ -1583,24 +1663,23 @@ static int mod_l2_entry(l2_pgentry_t *pl
  78.589      return rc;
  78.590  }
  78.591  
  78.592 -#if CONFIG_PAGING_LEVELS >= 3
  78.593 -
  78.594  /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
  78.595  static int mod_l3_entry(l3_pgentry_t *pl3e, 
  78.596                          l3_pgentry_t nl3e, 
  78.597                          unsigned long pfn,
  78.598 -                        int preserve_ad)
  78.599 +                        int preserve_ad,
  78.600 +                        int preemptible)
  78.601  {
  78.602      l3_pgentry_t ol3e;
  78.603      struct vcpu *curr = current;
  78.604      struct domain *d = curr->domain;
  78.605      struct page_info *l3pg = mfn_to_page(pfn);
  78.606 -    int rc = 1;
  78.607 +    int rc = 0;
  78.608  
  78.609      if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
  78.610      {
  78.611          MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
  78.612 -        return 0;
  78.613 +        return -EINVAL;
  78.614      }
  78.615  
  78.616      /*
  78.617 @@ -1608,12 +1687,12 @@ static int mod_l3_entry(l3_pgentry_t *pl
  78.618       * would be a pain to ensure they remain continuously valid throughout.
  78.619       */
  78.620      if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
  78.621 -        return 0;
  78.622 +        return -EINVAL;
  78.623  
  78.624      page_lock(l3pg);
  78.625  
  78.626      if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
  78.627 -        return page_unlock(l3pg), 0;
  78.628 +        return page_unlock(l3pg), -EFAULT;
  78.629  
  78.630      if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
  78.631      {
  78.632 @@ -1622,7 +1701,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
  78.633              page_unlock(l3pg);
  78.634              MEM_LOG("Bad L3 flags %x",
  78.635                      l3e_get_flags(nl3e) & l3_disallow_mask(d));
  78.636 -            return 0;
  78.637 +            return -EINVAL;
  78.638          }
  78.639  
  78.640          /* Fast path for identical mapping and presence. */
  78.641 @@ -1631,28 +1710,30 @@ static int mod_l3_entry(l3_pgentry_t *pl
  78.642              adjust_guest_l3e(nl3e, d);
  78.643              rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
  78.644              page_unlock(l3pg);
  78.645 -            return rc;
  78.646 +            return rc ? 0 : -EFAULT;
  78.647          }
  78.648  
  78.649 -        if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
  78.650 -            return page_unlock(l3pg), 0;
  78.651 +        rc = get_page_from_l3e(nl3e, pfn, d, preemptible);
  78.652 +        if ( unlikely(rc < 0) )
  78.653 +            return page_unlock(l3pg), rc;
  78.654 +        rc = 0;
  78.655  
  78.656          adjust_guest_l3e(nl3e, d);
  78.657          if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
  78.658                                      preserve_ad)) )
  78.659          {
  78.660              ol3e = nl3e;
  78.661 -            rc = 0;
  78.662 +            rc = -EFAULT;
  78.663          }
  78.664      }
  78.665      else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
  78.666                                       preserve_ad)) )
  78.667      {
  78.668          page_unlock(l3pg);
  78.669 -        return 0;
  78.670 +        return -EFAULT;
  78.671      }
  78.672  
  78.673 -    if ( likely(rc) )
  78.674 +    if ( likely(rc == 0) )
  78.675      {
  78.676          if ( !create_pae_xen_mappings(d, pl3e) )
  78.677              BUG();
  78.678 @@ -1661,36 +1742,35 @@ static int mod_l3_entry(l3_pgentry_t *pl
  78.679      }
  78.680  
  78.681      page_unlock(l3pg);
  78.682 -    put_page_from_l3e(ol3e, pfn);
  78.683 +    put_page_from_l3e(ol3e, pfn, 0);
  78.684      return rc;
  78.685  }
  78.686  
  78.687 -#endif
  78.688 -
  78.689  #if CONFIG_PAGING_LEVELS >= 4
  78.690  
  78.691  /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
  78.692  static int mod_l4_entry(l4_pgentry_t *pl4e, 
  78.693                          l4_pgentry_t nl4e, 
  78.694                          unsigned long pfn,
  78.695 -                        int preserve_ad)
  78.696 +                        int preserve_ad,
  78.697 +                        int preemptible)
  78.698  {
  78.699      struct vcpu *curr = current;
  78.700      struct domain *d = curr->domain;
  78.701      l4_pgentry_t ol4e;
  78.702      struct page_info *l4pg = mfn_to_page(pfn);
  78.703 -    int rc = 1;
  78.704 +    int rc = 0;
  78.705  
  78.706      if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
  78.707      {
  78.708          MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
  78.709 -        return 0;
  78.710 +        return -EINVAL;
  78.711      }
  78.712  
  78.713      page_lock(l4pg);
  78.714  
  78.715      if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
  78.716 -        return page_unlock(l4pg), 0;
  78.717 +        return page_unlock(l4pg), -EFAULT;
  78.718  
  78.719      if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
  78.720      {
  78.721 @@ -1699,7 +1779,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
  78.722              page_unlock(l4pg);
  78.723              MEM_LOG("Bad L4 flags %x",
  78.724                      l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
  78.725 -            return 0;
  78.726 +            return -EINVAL;
  78.727          }
  78.728  
  78.729          /* Fast path for identical mapping and presence. */
  78.730 @@ -1708,29 +1788,31 @@ static int mod_l4_entry(l4_pgentry_t *pl
  78.731              adjust_guest_l4e(nl4e, d);
  78.732              rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
  78.733              page_unlock(l4pg);
  78.734 -            return rc;
  78.735 +            return rc ? 0 : -EFAULT;
  78.736          }
  78.737  
  78.738 -        if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) )
  78.739 -            return page_unlock(l4pg), 0;
  78.740 +        rc = get_page_from_l4e(nl4e, pfn, d, preemptible);
  78.741 +        if ( unlikely(rc < 0) )
  78.742 +            return page_unlock(l4pg), rc;
  78.743 +        rc = 0;
  78.744  
  78.745          adjust_guest_l4e(nl4e, d);
  78.746          if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
  78.747                                      preserve_ad)) )
  78.748          {
  78.749              ol4e = nl4e;
  78.750 -            rc = 0;
  78.751 +            rc = -EFAULT;
  78.752          }
  78.753      }
  78.754      else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
  78.755                                       preserve_ad)) )
  78.756      {
  78.757          page_unlock(l4pg);
  78.758 -        return 0;
  78.759 +        return -EFAULT;
  78.760      }
  78.761  
  78.762      page_unlock(l4pg);
  78.763 -    put_page_from_l4e(ol4e, pfn);
  78.764 +    put_page_from_l4e(ol4e, pfn, 0);
  78.765      return rc;
  78.766  }
  78.767  
  78.768 @@ -1788,9 +1870,11 @@ int get_page(struct page_info *page, str
  78.769  }
  78.770  
  78.771  
  78.772 -static int alloc_page_type(struct page_info *page, unsigned long type)
  78.773 +static int alloc_page_type(struct page_info *page, unsigned long type,
  78.774 +                           int preemptible)
  78.775  {
  78.776      struct domain *owner = page_get_owner(page);
  78.777 +    int rc;
  78.778  
  78.779      /* A page table is dirtied when its type count becomes non-zero. */
  78.780      if ( likely(owner != NULL) )
  78.781 @@ -1799,30 +1883,65 @@ static int alloc_page_type(struct page_i
  78.782      switch ( type & PGT_type_mask )
  78.783      {
  78.784      case PGT_l1_page_table:
  78.785 -        return alloc_l1_table(page);
  78.786 +        alloc_l1_table(page);
  78.787 +        rc = 0;
  78.788 +        break;
  78.789      case PGT_l2_page_table:
  78.790 -        return alloc_l2_table(page, type);
  78.791 +        rc = alloc_l2_table(page, type, preemptible);
  78.792 +        break;
  78.793      case PGT_l3_page_table:
  78.794 -        return alloc_l3_table(page);
  78.795 +        rc = alloc_l3_table(page, preemptible);
  78.796 +        break;
  78.797      case PGT_l4_page_table:
  78.798 -        return alloc_l4_table(page);
  78.799 +        rc = alloc_l4_table(page, preemptible);
  78.800 +        break;
  78.801      case PGT_seg_desc_page:
  78.802 -        return alloc_segdesc_page(page);
  78.803 +        rc = alloc_segdesc_page(page);
  78.804 +        break;
  78.805      default:
  78.806          printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n", 
  78.807                 type, page->u.inuse.type_info,
  78.808                 page->count_info);
  78.809 +        rc = -EINVAL;
  78.810          BUG();
  78.811      }
  78.812  
  78.813 -    return 0;
  78.814 +    /* No need for atomic update of type_info here: noone else updates it. */
  78.815 +    wmb();
  78.816 +    if ( rc == -EAGAIN )
  78.817 +    {
  78.818 +        page->u.inuse.type_info |= PGT_partial;
  78.819 +    }
  78.820 +    else if ( rc == -EINTR )
  78.821 +    {
  78.822 +        ASSERT((page->u.inuse.type_info &
  78.823 +                (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
  78.824 +        page->u.inuse.type_info &= ~PGT_count_mask;
  78.825 +    }
  78.826 +    else if ( rc )
  78.827 +    {
  78.828 +        ASSERT(rc < 0);
  78.829 +        MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
  78.830 +                PRtype_info ": caf=%08x taf=%" PRtype_info,
  78.831 +                page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
  78.832 +                type, page->count_info, page->u.inuse.type_info);
  78.833 +        page->u.inuse.type_info = 0;
  78.834 +    }
  78.835 +    else
  78.836 +    {
  78.837 +        page->u.inuse.type_info |= PGT_validated;
  78.838 +    }
  78.839 +
  78.840 +    return rc;
  78.841  }
  78.842  
  78.843  
  78.844 -void free_page_type(struct page_info *page, unsigned long type)
  78.845 +int free_page_type(struct page_info *page, unsigned long type,
  78.846 +                   int preemptible)
  78.847  {
  78.848      struct domain *owner = page_get_owner(page);
  78.849      unsigned long gmfn;
  78.850 +    int rc;
  78.851  
  78.852      if ( likely(owner != NULL) )
  78.853      {
  78.854 @@ -1842,7 +1961,7 @@ void free_page_type(struct page_info *pa
  78.855              paging_mark_dirty(owner, page_to_mfn(page));
  78.856  
  78.857              if ( shadow_mode_refcounts(owner) )
  78.858 -                return;
  78.859 +                return 0;
  78.860  
  78.861              gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
  78.862              ASSERT(VALID_M2P(gmfn));
  78.863 @@ -1850,42 +1969,80 @@ void free_page_type(struct page_info *pa
  78.864          }
  78.865      }
  78.866  
  78.867 +    if ( !(type & PGT_partial) )
  78.868 +    {
  78.869 +        page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
  78.870 +        page->partial_pte = 0;
  78.871 +    }
  78.872      switch ( type & PGT_type_mask )
  78.873      {
  78.874      case PGT_l1_page_table:
  78.875          free_l1_table(page);
  78.876 -        break;
  78.877 -
  78.878 -    case PGT_l2_page_table:
  78.879 -        free_l2_table(page);
  78.880 -        break;
  78.881 -
  78.882 -#if CONFIG_PAGING_LEVELS >= 3
  78.883 -    case PGT_l3_page_table:
  78.884 -        free_l3_table(page);
  78.885 +        rc = 0;
  78.886          break;
  78.887 +    case PGT_l2_page_table:
  78.888 +        rc = free_l2_table(page, preemptible);
  78.889 +        break;
  78.890 +    case PGT_l3_page_table:
  78.891 +#if CONFIG_PAGING_LEVELS == 3
  78.892 +        if ( !(type & PGT_partial) )
  78.893 +            page->nr_validated_ptes = L3_PAGETABLE_ENTRIES;
  78.894  #endif
  78.895 -
  78.896 -#if CONFIG_PAGING_LEVELS >= 4
  78.897 +        rc = free_l3_table(page, preemptible);
  78.898 +        break;
  78.899      case PGT_l4_page_table:
  78.900 -        free_l4_table(page);
  78.901 +        rc = free_l4_table(page, preemptible);
  78.902          break;
  78.903 -#endif
  78.904 -
  78.905      default:
  78.906 -        printk("%s: type %lx pfn %lx\n",__FUNCTION__,
  78.907 -               type, page_to_mfn(page));
  78.908 +        MEM_LOG("type %lx pfn %lx\n", type, page_to_mfn(page));
  78.909 +        rc = -EINVAL;
  78.910          BUG();
  78.911      }
  78.912 +
  78.913 +    /* No need for atomic update of type_info here: noone else updates it. */
  78.914 +    if ( rc == 0 )
  78.915 +    {
  78.916 +        /*
  78.917 +         * Record TLB information for flush later. We do not stamp page tables
  78.918 +         * when running in shadow mode:
  78.919 +         *  1. Pointless, since it's the shadow pt's which must be tracked.
  78.920 +         *  2. Shadow mode reuses this field for shadowed page tables to
  78.921 +         *     store flags info -- we don't want to conflict with that.
  78.922 +         */
  78.923 +        if ( !(shadow_mode_enabled(page_get_owner(page)) &&
  78.924 +               (page->count_info & PGC_page_table)) )
  78.925 +            page->tlbflush_timestamp = tlbflush_current_time();
  78.926 +        wmb();
  78.927 +        page->u.inuse.type_info--;
  78.928 +    }
  78.929 +    else if ( rc == -EINTR )
  78.930 +    {
  78.931 +        ASSERT(!(page->u.inuse.type_info &
  78.932 +                 (PGT_count_mask|PGT_validated|PGT_partial)));
  78.933 +        if ( !(shadow_mode_enabled(page_get_owner(page)) &&
  78.934 +               (page->count_info & PGC_page_table)) )
  78.935 +            page->tlbflush_timestamp = tlbflush_current_time();
  78.936 +        wmb();
  78.937 +        page->u.inuse.type_info |= PGT_validated;
  78.938 +    }
  78.939 +    else
  78.940 +    {
  78.941 +        BUG_ON(rc != -EAGAIN);
  78.942 +        wmb();
  78.943 +        page->u.inuse.type_info |= PGT_partial;
  78.944 +    }
  78.945 +
  78.946 +    return rc;
  78.947  }
  78.948  
  78.949  
  78.950 -void put_page_type(struct page_info *page)
  78.951 +static int __put_page_type(struct page_info *page,
  78.952 +                           int preemptible)
  78.953  {
  78.954      unsigned long nx, x, y = page->u.inuse.type_info;
  78.955  
  78.956 - again:
  78.957 -    do {
  78.958 +    for ( ; ; )
  78.959 +    {
  78.960          x  = y;
  78.961          nx = x - 1;
  78.962  
  78.963 @@ -1894,21 +2051,19 @@ void put_page_type(struct page_info *pag
  78.964          if ( unlikely((nx & PGT_count_mask) == 0) )
  78.965          {
  78.966              if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
  78.967 -                 likely(nx & PGT_validated) )
  78.968 +                 likely(nx & (PGT_validated|PGT_partial)) )
  78.969              {
  78.970                  /*
  78.971                   * Page-table pages must be unvalidated when count is zero. The
  78.972                   * 'free' is safe because the refcnt is non-zero and validated
  78.973                   * bit is clear => other ops will spin or fail.
  78.974                   */
  78.975 -                if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, 
  78.976 -                                           x & ~PGT_validated)) != x) )
  78.977 -                    goto again;
  78.978 +                nx = x & ~(PGT_validated|PGT_partial);
  78.979 +                if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
  78.980 +                                           x, nx)) != x) )
  78.981 +                    continue;
  78.982                  /* We cleared the 'valid bit' so we do the clean up. */
  78.983 -                free_page_type(page, x);
  78.984 -                /* Carry on, but with the 'valid bit' now clear. */
  78.985 -                x  &= ~PGT_validated;
  78.986 -                nx &= ~PGT_validated;
  78.987 +                return free_page_type(page, x, preemptible);
  78.988              }
  78.989  
  78.990              /*
  78.991 @@ -1922,25 +2077,33 @@ void put_page_type(struct page_info *pag
  78.992                     (page->count_info & PGC_page_table)) )
  78.993                  page->tlbflush_timestamp = tlbflush_current_time();
  78.994          }
  78.995 +
  78.996 +        if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
  78.997 +            break;
  78.998 +
  78.999 +        if ( preemptible && hypercall_preempt_check() )
 78.1000 +            return -EINTR;
 78.1001      }
 78.1002 -    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
 78.1003 +
 78.1004 +    return 0;
 78.1005  }
 78.1006  
 78.1007  
 78.1008 -int get_page_type(struct page_info *page, unsigned long type)
 78.1009 +static int __get_page_type(struct page_info *page, unsigned long type,
 78.1010 +                           int preemptible)
 78.1011  {
 78.1012      unsigned long nx, x, y = page->u.inuse.type_info;
 78.1013  
 78.1014      ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
 78.1015  
 78.1016 - again:
 78.1017 -    do {
 78.1018 +    for ( ; ; )
 78.1019 +    {
 78.1020          x  = y;
 78.1021          nx = x + 1;
 78.1022          if ( unlikely((nx & PGT_count_mask) == 0) )
 78.1023          {
 78.1024              MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
 78.1025 -            return 0;
 78.1026 +            return -EINVAL;
 78.1027          }
 78.1028          else if ( unlikely((x & PGT_count_mask) == 0) )
 78.1029          {
 78.1030 @@ -1993,28 +2156,43 @@ int get_page_type(struct page_info *page
 78.1031              /* Don't log failure if it could be a recursive-mapping attempt. */
 78.1032              if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
 78.1033                   (type == PGT_l1_page_table) )
 78.1034 -                return 0;
 78.1035 +                return -EINVAL;
 78.1036              if ( ((x & PGT_type_mask) == PGT_l3_page_table) &&
 78.1037                   (type == PGT_l2_page_table) )
 78.1038 -                return 0;
 78.1039 +                return -EINVAL;
 78.1040              if ( ((x & PGT_type_mask) == PGT_l4_page_table) &&
 78.1041                   (type == PGT_l3_page_table) )
 78.1042 -                return 0;
 78.1043 +                return -EINVAL;
 78.1044              MEM_LOG("Bad type (saw %" PRtype_info " != exp %" PRtype_info ") "
 78.1045                      "for mfn %lx (pfn %lx)",
 78.1046                      x, type, page_to_mfn(page),
 78.1047                      get_gpfn_from_mfn(page_to_mfn(page)));
 78.1048 -            return 0;
 78.1049 +            return -EINVAL;
 78.1050          }
 78.1051          else if ( unlikely(!(x & PGT_validated)) )
 78.1052          {
 78.1053 -            /* Someone else is updating validation of this page. Wait... */
 78.1054 -            while ( (y = page->u.inuse.type_info) == x )
 78.1055 -                cpu_relax();
 78.1056 -            goto again;
 78.1057 +            if ( !(x & PGT_partial) )
 78.1058 +            {
 78.1059 +                /* Someone else is updating validation of this page. Wait... */
 78.1060 +                while ( (y = page->u.inuse.type_info) == x )
 78.1061 +                {
 78.1062 +                    if ( preemptible && hypercall_preempt_check() )
 78.1063 +                        return -EINTR;
 78.1064 +                    cpu_relax();
 78.1065 +                }
 78.1066 +                continue;
 78.1067 +            }
 78.1068 +            /* Type ref count was left at 1 when PGT_partial got set. */
 78.1069 +            ASSERT((x & PGT_count_mask) == 1);
 78.1070 +            nx = x & ~PGT_partial;
 78.1071          }
 78.1072 +
 78.1073 +        if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
 78.1074 +            break;
 78.1075 +
 78.1076 +        if ( preemptible && hypercall_preempt_check() )
 78.1077 +            return -EINTR;
 78.1078      }
 78.1079 -    while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
 78.1080  
 78.1081      if ( unlikely((x & PGT_type_mask) != type) )
 78.1082      {
 78.1083 @@ -2032,25 +2210,42 @@ int get_page_type(struct page_info *page
 78.1084  
 78.1085      if ( unlikely(!(nx & PGT_validated)) )
 78.1086      {
 78.1087 -        /* Try to validate page type; drop the new reference on failure. */
 78.1088 -        if ( unlikely(!alloc_page_type(page, type)) )
 78.1089 +        if ( !(x & PGT_partial) )
 78.1090          {
 78.1091 -            MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
 78.1092 -                    PRtype_info ": caf=%08x taf=%" PRtype_info,
 78.1093 -                    page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
 78.1094 -                    type, page->count_info, page->u.inuse.type_info);
 78.1095 -            /* Noone else can get a reference. We hold the only ref. */
 78.1096 -            page->u.inuse.type_info = 0;
 78.1097 -            return 0;
 78.1098 +            page->nr_validated_ptes = 0;
 78.1099 +            page->partial_pte = 0;
 78.1100          }
 78.1101 -
 78.1102 -        /* Noone else is updating simultaneously. */
 78.1103 -        __set_bit(_PGT_validated, &page->u.inuse.type_info);
 78.1104 +        return alloc_page_type(page, type, preemptible);
 78.1105      }
 78.1106  
 78.1107 -    return 1;
 78.1108 +    return 0;
 78.1109  }
 78.1110  
 78.1111 +void put_page_type(struct page_info *page)
 78.1112 +{
 78.1113 +    int rc = __put_page_type(page, 0);
 78.1114 +    ASSERT(rc == 0);
 78.1115 +    (void)rc;
 78.1116 +}
 78.1117 +
 78.1118 +int get_page_type(struct page_info *page, unsigned long type)
 78.1119 +{
 78.1120 +    int rc = __get_page_type(page, type, 0);
 78.1121 +    if ( likely(rc == 0) )
 78.1122 +        return 1;
 78.1123 +    ASSERT(rc == -EINVAL);
 78.1124 +    return 0;
 78.1125 +}
 78.1126 +
 78.1127 +int put_page_type_preemptible(struct page_info *page)
 78.1128 +{
 78.1129 +    return __put_page_type(page, 1);
 78.1130 +}
 78.1131 +
 78.1132 +int get_page_type_preemptible(struct page_info *page, unsigned long type)
 78.1133 +{
 78.1134 +    return __get_page_type(page, type, 1);
 78.1135 +}
 78.1136  
 78.1137  void cleanup_page_cacheattr(struct page_info *page)
 78.1138  {
 78.1139 @@ -2087,7 +2282,7 @@ int new_guest_cr3(unsigned long mfn)
 78.1140                      l4e_from_pfn(
 78.1141                          mfn,
 78.1142                          (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
 78.1143 -                    pagetable_get_pfn(v->arch.guest_table), 0);
 78.1144 +                    pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0;
 78.1145          if ( unlikely(!okay) )
 78.1146          {
 78.1147              MEM_LOG("Error while installing new compat baseptr %lx", mfn);
 78.1148 @@ -2102,7 +2297,7 @@ int new_guest_cr3(unsigned long mfn)
 78.1149  #endif
 78.1150      okay = paging_mode_refcounts(d)
 78.1151          ? get_page_from_pagenr(mfn, d)
 78.1152 -        : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
 78.1153 +        : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0);
 78.1154      if ( unlikely(!okay) )
 78.1155      {
 78.1156          MEM_LOG("Error while installing new baseptr %lx", mfn);
 78.1157 @@ -2276,9 +2471,7 @@ int do_mmuext_op(
 78.1158      {
 78.1159          if ( hypercall_preempt_check() )
 78.1160          {
 78.1161 -            rc = hypercall_create_continuation(
 78.1162 -                __HYPERVISOR_mmuext_op, "hihi",
 78.1163 -                uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
 78.1164 +            rc = -EAGAIN;
 78.1165              break;
 78.1166          }
 78.1167  
 78.1168 @@ -2325,10 +2518,14 @@ int do_mmuext_op(
 78.1169              if ( paging_mode_refcounts(FOREIGNDOM) )
 78.1170                  break;
 78.1171  
 78.1172 -            okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
 78.1173 +            rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1);
 78.1174 +            okay = !rc;
 78.1175              if ( unlikely(!okay) )
 78.1176              {
 78.1177 -                MEM_LOG("Error while pinning mfn %lx", mfn);
 78.1178 +                if ( rc == -EINTR )
 78.1179 +                    rc = -EAGAIN;
 78.1180 +                else if ( rc != -EAGAIN )
 78.1181 +                    MEM_LOG("Error while pinning mfn %lx", mfn);
 78.1182                  break;
 78.1183              }
 78.1184  
 78.1185 @@ -2373,8 +2570,11 @@ int do_mmuext_op(
 78.1186              {
 78.1187                  put_page_and_type(page);
 78.1188                  put_page(page);
 78.1189 -                /* A page is dirtied when its pin status is cleared. */
 78.1190 -                paging_mark_dirty(d, mfn);
 78.1191 +                if ( !rc )
 78.1192 +                {
 78.1193 +                    /* A page is dirtied when its pin status is cleared. */
 78.1194 +                    paging_mark_dirty(d, mfn);
 78.1195 +                }
 78.1196              }
 78.1197              else
 78.1198              {
 78.1199 @@ -2398,8 +2598,8 @@ int do_mmuext_op(
 78.1200                  if ( paging_mode_refcounts(d) )
 78.1201                      okay = get_page_from_pagenr(mfn, d);
 78.1202                  else
 78.1203 -                    okay = get_page_and_type_from_pagenr(
 78.1204 -                        mfn, PGT_root_page_table, d);
 78.1205 +                    okay = !get_page_and_type_from_pagenr(
 78.1206 +                        mfn, PGT_root_page_table, d, 0);
 78.1207                  if ( unlikely(!okay) )
 78.1208                  {
 78.1209                      MEM_LOG("Error while installing new mfn %lx", mfn);
 78.1210 @@ -2517,6 +2717,11 @@ int do_mmuext_op(
 78.1211          guest_handle_add_offset(uops, 1);
 78.1212      }
 78.1213  
 78.1214 +    if ( rc == -EAGAIN )
 78.1215 +        rc = hypercall_create_continuation(
 78.1216 +            __HYPERVISOR_mmuext_op, "hihi",
 78.1217 +            uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
 78.1218 +
 78.1219      process_deferred_ops();
 78.1220  
 78.1221      perfc_add(num_mmuext_ops, i);
 78.1222 @@ -2576,9 +2781,7 @@ int do_mmu_update(
 78.1223      {
 78.1224          if ( hypercall_preempt_check() )
 78.1225          {
 78.1226 -            rc = hypercall_create_continuation(
 78.1227 -                __HYPERVISOR_mmu_update, "hihi",
 78.1228 -                ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
 78.1229 +            rc = -EAGAIN;
 78.1230              break;
 78.1231          }
 78.1232  
 78.1233 @@ -2601,7 +2804,7 @@ int do_mmu_update(
 78.1234               */
 78.1235          case MMU_NORMAL_PT_UPDATE:
 78.1236          case MMU_PT_UPDATE_PRESERVE_AD:
 78.1237 -            rc = xsm_mmu_normal_update(d, req.val);
 78.1238 +            rc = xsm_mmu_normal_update(d, FOREIGNDOM, req.val);
 78.1239              if ( rc )
 78.1240                  break;
 78.1241  
 78.1242 @@ -2653,27 +2856,29 @@ int do_mmu_update(
 78.1243                                          cmd == MMU_PT_UPDATE_PRESERVE_AD);
 78.1244                  }
 78.1245                  break;
 78.1246 -#if CONFIG_PAGING_LEVELS >= 3
 78.1247                  case PGT_l3_page_table:
 78.1248                  {
 78.1249                      l3_pgentry_t l3e = l3e_from_intpte(req.val);
 78.1250 -                    okay = mod_l3_entry(va, l3e, mfn,
 78.1251 -                                        cmd == MMU_PT_UPDATE_PRESERVE_AD);
 78.1252 +                    rc = mod_l3_entry(va, l3e, mfn,
 78.1253 +                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
 78.1254 +                    okay = !rc;
 78.1255                  }
 78.1256                  break;
 78.1257 -#endif
 78.1258  #if CONFIG_PAGING_LEVELS >= 4
 78.1259                  case PGT_l4_page_table:
 78.1260                  {
 78.1261                      l4_pgentry_t l4e = l4e_from_intpte(req.val);
 78.1262 -                    okay = mod_l4_entry(va, l4e, mfn,
 78.1263 -                                        cmd == MMU_PT_UPDATE_PRESERVE_AD);
 78.1264 +                    rc = mod_l4_entry(va, l4e, mfn,
 78.1265 +                                      cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
 78.1266 +                    okay = !rc;
 78.1267                  }
 78.1268                  break;
 78.1269  #endif
 78.1270                  }
 78.1271  
 78.1272                  put_page_type(page);
 78.1273 +                if ( rc == -EINTR )
 78.1274 +                    rc = -EAGAIN;
 78.1275              }
 78.1276              break;
 78.1277  
 78.1278 @@ -2742,6 +2947,11 @@ int do_mmu_update(
 78.1279          guest_handle_add_offset(ureqs, 1);
 78.1280      }
 78.1281  
 78.1282 +    if ( rc == -EAGAIN )
 78.1283 +        rc = hypercall_create_continuation(
 78.1284 +            __HYPERVISOR_mmu_update, "hihi",
 78.1285 +            ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
 78.1286 +
 78.1287      process_deferred_ops();
 78.1288  
 78.1289      domain_mmap_cache_destroy(&mapcache);
 78.1290 @@ -3111,7 +3321,7 @@ int do_update_va_mapping(unsigned long v
 78.1291      if ( unlikely(!access_ok(va, 1) && !paging_mode_external(d)) )
 78.1292          return -EINVAL;
 78.1293  
 78.1294 -    rc = xsm_update_va_mapping(d, val);
 78.1295 +    rc = xsm_update_va_mapping(d, FOREIGNDOM, val);
 78.1296      if ( rc )
 78.1297          return rc;
 78.1298  
 78.1299 @@ -3695,9 +3905,8 @@ static int ptwr_emulated_update(
 78.1300      nl1e = l1e_from_intpte(val);
 78.1301      if ( unlikely(!get_page_from_l1e(nl1e, d)) )
 78.1302      {
 78.1303 -        if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) &&
 78.1304 -             (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg &&
 78.1305 -             (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
 78.1306 +        if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
 78.1307 +             !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
 78.1308          {
 78.1309              /*
 78.1310               * If this is an upper-half write to a PAE PTE then we assume that
    79.1 --- a/xen/arch/x86/mm/hap/hap.c	Fri Sep 12 14:32:45 2008 +0900
    79.2 +++ b/xen/arch/x86/mm/hap/hap.c	Fri Sep 12 14:47:40 2008 +0900
    79.3 @@ -37,6 +37,7 @@
    79.4  #include <asm/shared.h>
    79.5  #include <asm/hap.h>
    79.6  #include <asm/paging.h>
    79.7 +#include <asm/p2m.h>
    79.8  #include <asm/domain.h>
    79.9  #include <xen/numa.h>
   79.10  
    80.1 --- a/xen/arch/x86/mm/shadow/common.c	Fri Sep 12 14:32:45 2008 +0900
    80.2 +++ b/xen/arch/x86/mm/shadow/common.c	Fri Sep 12 14:47:40 2008 +0900
    80.3 @@ -39,6 +39,7 @@
    80.4  #include <xen/numa.h>
    80.5  #include "private.h"
    80.6  
    80.7 +DEFINE_PER_CPU(uint32_t,trace_shadow_path_flags);
    80.8  
    80.9  /* Set up the shadow-specific parts of a domain struct at start of day.
   80.10   * Called for every domain from arch_domain_create() */
   80.11 @@ -630,6 +631,8 @@ void oos_fixup_add(struct vcpu *v, mfn_t
   80.12  
   80.13              if ( mfn_x(oos_fixup[idx].smfn[next]) != INVALID_MFN )
   80.14              {
   80.15 +                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_OOS_FIXUP_EVICT);
   80.16 +
   80.17                  /* Reuse this slot and remove current writable mapping. */
   80.18                  sh_remove_write_access_from_sl1p(v, gmfn, 
   80.19                                                   oos_fixup[idx].smfn[next],
   80.20 @@ -645,6 +648,8 @@ void oos_fixup_add(struct vcpu *v, mfn_t
   80.21              oos_fixup[idx].smfn[next] = smfn;
   80.22              oos_fixup[idx].off[next] = off;
   80.23              oos_fixup[idx].next = (next + 1) % SHADOW_OOS_FIXUPS;
   80.24 +
   80.25 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_OOS_FIXUP_ADD);
   80.26              return;
   80.27          }
   80.28      }
   80.29 @@ -687,6 +692,16 @@ static int oos_remove_write_access(struc
   80.30  }
   80.31  
   80.32  
   80.33 +static inline void trace_resync(int event, mfn_t gmfn)
   80.34 +{
   80.35 +    if ( tb_init_done )
   80.36 +    {
   80.37 +        /* Convert gmfn to gfn */
   80.38 +        unsigned long gfn = mfn_to_gfn(current->domain, gmfn);
   80.39 +        __trace_var(event, 0/*!tsc*/, sizeof(gfn), (unsigned char*)&gfn);
   80.40 +    }
   80.41 +}
   80.42 +
   80.43  /* Pull all the entries on an out-of-sync page back into sync. */
   80.44  static void _sh_resync(struct vcpu *v, mfn_t gmfn,
   80.45                         struct oos_fixup *fixup, mfn_t snp)
   80.46 @@ -700,8 +715,8 @@ static void _sh_resync(struct vcpu *v, m
   80.47               & ~SHF_L1_ANY));
   80.48      ASSERT(!sh_page_has_multiple_shadows(mfn_to_page(gmfn)));
   80.49  
   80.50 -    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx, va=%lx\n",
   80.51 -                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va);
   80.52 +    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
   80.53 +                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
   80.54  
   80.55      /* Need to pull write access so the page *stays* in sync. */
   80.56      if ( oos_remove_write_access(v, gmfn, fixup) )
   80.57 @@ -719,6 +734,7 @@ static void _sh_resync(struct vcpu *v, m
   80.58      /* Now we know all the entries are synced, and will stay that way */
   80.59      pg->shadow_flags &= ~SHF_out_of_sync;
   80.60      perfc_incr(shadow_resync);
   80.61 +    trace_resync(TRC_SHADOW_RESYNC_FULL, gmfn);
   80.62  }
   80.63  
   80.64  
   80.65 @@ -930,6 +946,7 @@ void sh_resync_all(struct vcpu *v, int s
   80.66                  /* Update the shadows and leave the page OOS. */
   80.67                  if ( sh_skip_sync(v, oos[idx]) )
   80.68                      continue;
   80.69 +                trace_resync(TRC_SHADOW_RESYNC_ONLY, oos[idx]);
   80.70                  _sh_resync_l1(other, oos[idx], oos_snapshot[idx]);
   80.71              }
   80.72              else
   80.73 @@ -945,15 +962,16 @@ void sh_resync_all(struct vcpu *v, int s
   80.74      }
   80.75  }
   80.76  
   80.77 -/* Allow a shadowed page to go out of sync */
   80.78 +/* Allow a shadowed page to go out of sync. Unsyncs are traced in
   80.79 + * multi.c:sh_page_fault() */
   80.80  int sh_unsync(struct vcpu *v, mfn_t gmfn)
   80.81  {
   80.82      struct page_info *pg;
   80.83      
   80.84      ASSERT(shadow_locked_by_me(v->domain));
   80.85  
   80.86 -    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx va %lx\n",
   80.87 -                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn), va);
   80.88 +    SHADOW_PRINTK("d=%d, v=%d, gmfn=%05lx\n",
   80.89 +                  v->domain->domain_id, v->vcpu_id, mfn_x(gmfn));
   80.90  
   80.91      pg = mfn_to_page(gmfn);
   80.92   
   80.93 @@ -970,6 +988,7 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn
   80.94      pg->shadow_flags |= SHF_out_of_sync|SHF_oos_may_write;
   80.95      oos_hash_add(v, gmfn);
   80.96      perfc_incr(shadow_unsync);
   80.97 +    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_UNSYNC);
   80.98      return 1;
   80.99  }
  80.100  
  80.101 @@ -1005,6 +1024,7 @@ void shadow_promote(struct vcpu *v, mfn_
  80.102  
  80.103      ASSERT(!test_bit(type, &page->shadow_flags));
  80.104      set_bit(type, &page->shadow_flags);
  80.105 +    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PROMOTE);
  80.106  }
  80.107  
  80.108  void shadow_demote(struct vcpu *v, mfn_t gmfn, u32 type)
  80.109 @@ -1027,6 +1047,8 @@ void shadow_demote(struct vcpu *v, mfn_t
  80.110  #endif 
  80.111          clear_bit(_PGC_page_table, &page->count_info);
  80.112      }
  80.113 +
  80.114 +    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_DEMOTE);
  80.115  }
  80.116  
  80.117  /**************************************************************************/
  80.118 @@ -1094,6 +1116,7 @@ sh_validate_guest_entry(struct vcpu *v, 
  80.119      ASSERT((page->shadow_flags 
  80.120              & (SHF_L4_64|SHF_L3_64|SHF_L2H_64|SHF_L2_64|SHF_L1_64)) == 0);
  80.121  #endif
  80.122 +    this_cpu(trace_shadow_path_flags) |= (result<<(TRCE_SFLAG_SET_CHANGED)); 
  80.123  
  80.124      return result;
  80.125  }
  80.126 @@ -1295,6 +1318,18 @@ static void shadow_unhook_mappings(struc
  80.127      }
  80.128  }
  80.129  
  80.130 +static inline void trace_shadow_prealloc_unpin(struct domain *d, mfn_t smfn)
  80.131 +{
  80.132 +    if ( tb_init_done )
  80.133 +    {
  80.134 +        /* Convert smfn to gfn */
  80.135 +        unsigned long gfn;
  80.136 +        ASSERT(mfn_valid(smfn));
  80.137 +        gfn = mfn_to_gfn(d, _mfn(mfn_to_shadow_page(smfn)->backpointer));
  80.138 +        __trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/,
  80.139 +                    sizeof(gfn), (unsigned char*)&gfn);
  80.140 +    }
  80.141 +}
  80.142  
  80.143  /* Make sure there are at least count order-sized pages
  80.144   * available in the shadow page pool. */
  80.145 @@ -1327,6 +1362,7 @@ static void _shadow_prealloc(
  80.146          smfn = shadow_page_to_mfn(sp);
  80.147  
  80.148          /* Unpin this top-level shadow */
  80.149 +        trace_shadow_prealloc_unpin(d, smfn);
  80.150          sh_unpin(v, smfn);
  80.151  
  80.152          /* See if that freed up enough space */
  80.153 @@ -1343,6 +1379,7 @@ static void _shadow_prealloc(
  80.154          {
  80.155              if ( !pagetable_is_null(v2->arch.shadow_table[i]) )
  80.156              {
  80.157 +                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PREALLOC_UNHOOK);
  80.158                  shadow_unhook_mappings(v, 
  80.159                                 pagetable_get_mfn(v2->arch.shadow_table[i]));
  80.160  
  80.161 @@ -2200,6 +2237,16 @@ void sh_destroy_shadow(struct vcpu *v, m
  80.162      }    
  80.163  }
  80.164  
  80.165 +static inline void trace_shadow_wrmap_bf(mfn_t gmfn)
  80.166 +{
  80.167 +    if ( tb_init_done )
  80.168 +    {
  80.169 +        /* Convert gmfn to gfn */
  80.170 +        unsigned long gfn = mfn_to_gfn(current->domain, gmfn);
  80.171 +        __trace_var(TRC_SHADOW_WRMAP_BF, 0/*!tsc*/, sizeof(gfn), (unsigned char*)&gfn);
  80.172 +    }
  80.173 +}
  80.174 +
  80.175  /**************************************************************************/
  80.176  /* Remove all writeable mappings of a guest frame from the shadow tables 
  80.177   * Returns non-zero if we need to flush TLBs. 
  80.178 @@ -2265,6 +2312,8 @@ int sh_remove_write_access(struct vcpu *
  80.179           || (pg->u.inuse.type_info & PGT_count_mask) == 0 )
  80.180          return 0;
  80.181  
  80.182 +    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP);
  80.183 +
  80.184      perfc_incr(shadow_writeable);
  80.185  
  80.186      /* If this isn't a "normal" writeable page, the domain is trying to 
  80.187 @@ -2285,11 +2334,14 @@ int sh_remove_write_access(struct vcpu *
  80.188           * and that mapping is likely to be in the current pagetable,
  80.189           * in the guest's linear map (on non-HIGHPTE linux and windows)*/
  80.190  
  80.191 -#define GUESS(_a, _h) do {                                                \
  80.192 +#define GUESS(_a, _h) do {                                              \
  80.193              if ( v->arch.paging.mode->shadow.guess_wrmap(v, (_a), gmfn) ) \
  80.194 -                perfc_incr(shadow_writeable_h_ ## _h);                   \
  80.195 -            if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )          \
  80.196 -                return 1;                                                 \
  80.197 +                perfc_incr(shadow_writeable_h_ ## _h);                  \
  80.198 +            if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 )        \
  80.199 +            {                                                           \
  80.200 +                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND);   \
  80.201 +                return 1;                                               \
  80.202 +            }                                                           \
  80.203          } while (0)
  80.204  
  80.205          if ( level == 0 && fault_addr )
  80.206 @@ -2377,6 +2429,7 @@ int sh_remove_write_access(struct vcpu *
  80.207  #endif /* SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC */
  80.208      
  80.209      /* Brute-force search of all the shadows, by walking the hash */
  80.210 +    trace_shadow_wrmap_bf(gmfn);
  80.211      if ( level == 0 )
  80.212          perfc_incr(shadow_writeable_bf_1);
  80.213      else
    81.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Sep 12 14:32:45 2008 +0900
    81.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Sep 12 14:47:40 2008 +0900
    81.3 @@ -225,6 +225,7 @@ static uint32_t mandatory_flags(struct v
    81.4  static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty)
    81.5  {
    81.6      guest_intpte_t old, new;
    81.7 +    int ret = 0;
    81.8  
    81.9      old = *(guest_intpte_t *)walk_p;
   81.10      new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0);
   81.11 @@ -234,10 +235,16 @@ static uint32_t set_ad_bits(void *guest_
   81.12           * into the guest table as well.  If the guest table has changed
   81.13           * under out feet then leave it alone. */
   81.14          *(guest_intpte_t *)walk_p = new;
   81.15 -        if ( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old ) 
   81.16 -            return 1;
   81.17 +        if( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old ) 
   81.18 +            ret = 1;
   81.19 +
   81.20 +        /* FIXME -- this code is longer than necessary */
   81.21 +        if(set_dirty)
   81.22 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_AD);
   81.23 +        else
   81.24 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_A);
   81.25      }
   81.26 -    return 0;
   81.27 +    return ret;
   81.28  }
   81.29  
   81.30  /* This validation is called with lock held, and after write permission
   81.31 @@ -1432,6 +1439,7 @@ static int shadow_set_l1e(struct vcpu *v
   81.32      {
   81.33          /* About to install a new reference */        
   81.34          if ( shadow_mode_refcounts(d) ) {
   81.35 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF);
   81.36              if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 ) 
   81.37              {
   81.38                  /* Doesn't look like a pagetable. */
   81.39 @@ -1461,6 +1469,7 @@ static int shadow_set_l1e(struct vcpu *v
   81.40          {
   81.41              shadow_vram_put_l1e(old_sl1e, sl1e, sl1mfn, d);
   81.42              shadow_put_page_from_l1e(old_sl1e, d);
   81.43 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
   81.44          } 
   81.45      }
   81.46      return flags;
   81.47 @@ -2896,6 +2905,7 @@ static inline void check_for_early_unsha
   81.48      {
   81.49          perfc_incr(shadow_early_unshadow);
   81.50          sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ );
   81.51 +        TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EARLY_UNSHADOW);
   81.52      }
   81.53      v->arch.paging.shadow.last_emulated_mfn_for_unshadow = mfn_x(gmfn);
   81.54  #endif
   81.55 @@ -3012,6 +3022,132 @@ static void sh_prefetch(struct vcpu *v, 
   81.56  
   81.57  #endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
   81.58  
   81.59 +#if GUEST_PAGING_LEVELS == 4
   81.60 +typedef u64 guest_va_t;
   81.61 +typedef u64 guest_pa_t;
   81.62 +#elif GUEST_PAGING_LEVELS == 3
   81.63 +typedef u32 guest_va_t;
   81.64 +typedef u64 guest_pa_t;
   81.65 +#else
   81.66 +typedef u32 guest_va_t;
   81.67 +typedef u32 guest_pa_t;
   81.68 +#endif
   81.69 +
   81.70 +static inline void trace_shadow_gen(u32 event, guest_va_t va)
   81.71 +{
   81.72 +    if ( tb_init_done )
   81.73 +    {
   81.74 +        event |= (GUEST_PAGING_LEVELS-2)<<8;
   81.75 +        __trace_var(event, 0/*!tsc*/, sizeof(va), (unsigned char*)&va);
   81.76 +    }
   81.77 +}
   81.78 +
   81.79 +static inline void trace_shadow_fixup(guest_l1e_t gl1e,
   81.80 +                                      guest_va_t va)
   81.81 +{
   81.82 +    if ( tb_init_done )
   81.83 +    {
   81.84 +        struct {
   81.85 +            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
   81.86 +               so put it first for alignment sake. */
   81.87 +            guest_l1e_t gl1e;
   81.88 +            guest_va_t va;
   81.89 +            u32 flags;
   81.90 +        } __attribute__((packed)) d;
   81.91 +        u32 event;
   81.92 +
   81.93 +        event = TRC_SHADOW_FIXUP | ((GUEST_PAGING_LEVELS-2)<<8);
   81.94 +
   81.95 +        d.gl1e = gl1e;
   81.96 +        d.va = va;
   81.97 +        d.flags = this_cpu(trace_shadow_path_flags);
   81.98 +
   81.99 +        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
  81.100 +    }
  81.101 +}
  81.102 +                                          
  81.103 +static inline void trace_not_shadow_fault(guest_l1e_t gl1e,
  81.104 +                                          guest_va_t va)
  81.105 +{
  81.106 +    if ( tb_init_done )
  81.107 +    {
  81.108 +        struct {
  81.109 +            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
  81.110 +               so put it first for alignment sake. */
  81.111 +            guest_l1e_t gl1e;
  81.112 +            guest_va_t va;
  81.113 +            u32 flags;
  81.114 +        } __attribute__((packed)) d;
  81.115 +        u32 event;
  81.116 +
  81.117 +        event = TRC_SHADOW_NOT_SHADOW | ((GUEST_PAGING_LEVELS-2)<<8);
  81.118 +
  81.119 +        d.gl1e = gl1e;
  81.120 +        d.va = va;
  81.121 +        d.flags = this_cpu(trace_shadow_path_flags);
  81.122 +
  81.123 +        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
  81.124 +    }
  81.125 +}
  81.126 +                                          
  81.127 +static inline void trace_shadow_emulate_other(u32 event,
  81.128 +                                                 guest_va_t va,
  81.129 +                                                 gfn_t gfn)
  81.130 +{
  81.131 +    if ( tb_init_done )
  81.132 +    {
  81.133 +        struct {
  81.134 +            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
  81.135 +               so put it first for alignment sake. */
  81.136 +#if GUEST_PAGING_LEVELS == 2
  81.137 +            u32 gfn;
  81.138 +#else
  81.139 +            u64 gfn;
  81.140 +#endif
  81.141 +            guest_va_t va;
  81.142 +        } __attribute__((packed)) d;
  81.143 +
  81.144 +        event |= ((GUEST_PAGING_LEVELS-2)<<8);
  81.145 +
  81.146 +        d.gfn=gfn_x(gfn);
  81.147 +        d.va = va;
  81.148 +
  81.149 +        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
  81.150 +    }
  81.151 +}
  81.152 +
  81.153 +#if GUEST_PAGING_LEVELS == 3
  81.154 +static DEFINE_PER_CPU(guest_va_t,trace_emulate_initial_va);
  81.155 +static DEFINE_PER_CPU(int,trace_extra_emulation_count);
  81.156 +#endif
  81.157 +static DEFINE_PER_CPU(guest_pa_t,trace_emulate_write_val);
  81.158 +
  81.159 +static inline void trace_shadow_emulate(guest_l1e_t gl1e, unsigned long va)
  81.160 +{
  81.161 +    if ( tb_init_done )
  81.162 +    {
  81.163 +        struct {
  81.164 +            /* for PAE, guest_l1e may be 64 while guest_va may be 32;
  81.165 +               so put it first for alignment sake. */
  81.166 +            guest_l1e_t gl1e, write_val;
  81.167 +            guest_va_t va;
  81.168 +            unsigned flags:29, emulation_count:3;
  81.169 +        } __attribute__((packed)) d;
  81.170 +        u32 event;
  81.171 +
  81.172 +        event = TRC_SHADOW_EMULATE | ((GUEST_PAGING_LEVELS-2)<<8);
  81.173 +
  81.174 +        d.gl1e = gl1e;
  81.175 +        d.write_val.l1 = this_cpu(trace_emulate_write_val);
  81.176 +        d.va = va;
  81.177 +#if GUEST_PAGING_LEVELS == 3
  81.178 +        d.emulation_count = this_cpu(trace_extra_emulation_count);
  81.179 +#endif
  81.180 +        d.flags = this_cpu(trace_shadow_path_flags);
  81.181 +
  81.182 +        __trace_var(event, 0/*!tsc*/, sizeof(d), (unsigned char*)&d);
  81.183 +    }
  81.184 +}
  81.185  
  81.186  /**************************************************************************/
  81.187  /* Entry points into the shadow code */
  81.188 @@ -3027,8 +3163,8 @@ static int sh_page_fault(struct vcpu *v,
  81.189  {
  81.190      struct domain *d = v->domain;
  81.191      walk_t gw;
  81.192 -    gfn_t gfn;
  81.193 -    mfn_t gmfn, sl1mfn=_mfn(0);
  81.194 +    gfn_t gfn = _gfn(0);
  81.195 +    mfn_t gmfn, sl1mfn = _mfn(0);
  81.196      shadow_l1e_t sl1e, *ptr_sl1e;
  81.197      paddr_t gpa;
  81.198      struct sh_emulate_ctxt emul_ctxt;
  81.199 @@ -3043,7 +3179,7 @@ static int sh_page_fault(struct vcpu *v,
  81.200  
  81.201      SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u, rip=%lx\n",
  81.202                    v->domain->domain_id, v->vcpu_id, va, regs->error_code,
  81.203 -                  regs->rip);
  81.204 +                  regs->eip);
  81.205  
  81.206      perfc_incr(shadow_fault);
  81.207  
  81.208 @@ -3132,6 +3268,7 @@ static int sh_page_fault(struct vcpu *v,
  81.209                  reset_early_unshadow(v);
  81.210                  perfc_incr(shadow_fault_fast_gnp);
  81.211                  SHADOW_PRINTK("fast path not-present\n");
  81.212 +                trace_shadow_gen(TRC_SHADOW_FAST_PROPAGATE, va);
  81.213                  return 0;
  81.214              }
  81.215              else
  81.216 @@ -3145,6 +3282,7 @@ static int sh_page_fault(struct vcpu *v,
  81.217              perfc_incr(shadow_fault_fast_mmio);
  81.218              SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa);
  81.219              reset_early_unshadow(v);
  81.220 +            trace_shadow_gen(TRC_SHADOW_FAST_MMIO, va);
  81.221              return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT)
  81.222                      ? EXCRET_fault_fixed : 0);
  81.223          }
  81.224 @@ -3155,6 +3293,7 @@ static int sh_page_fault(struct vcpu *v,
  81.225               * Retry and let the hardware give us the right fault next time. */
  81.226              perfc_incr(shadow_fault_fast_fail);
  81.227              SHADOW_PRINTK("fast path false alarm!\n");            
  81.228 +            trace_shadow_gen(TRC_SHADOW_FALSE_FAST_PATH, va);
  81.229              return EXCRET_fault_fixed;
  81.230          }
  81.231      }
  81.232 @@ -3190,7 +3329,7 @@ static int sh_page_fault(struct vcpu *v,
  81.233          perfc_incr(shadow_fault_bail_real_fault);
  81.234          SHADOW_PRINTK("not a shadow fault\n");
  81.235          reset_early_unshadow(v);
  81.236 -        return 0;
  81.237 +        goto propagate;
  81.238      }
  81.239  
  81.240      /* It's possible that the guest has put pagetables in memory that it has 
  81.241 @@ -3200,7 +3339,7 @@ static int sh_page_fault(struct vcpu *v,
  81.242      if ( unlikely(d->is_shutting_down) )
  81.243      {
  81.244          SHADOW_PRINTK("guest is shutting down\n");
  81.245 -        return 0;
  81.246 +        goto propagate;
  81.247      }
  81.248  
  81.249      /* What kind of access are we dealing with? */
  81.250 @@ -3218,7 +3357,7 @@ static int sh_page_fault(struct vcpu *v,
  81.251          SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 
  81.252                        gfn_x(gfn), mfn_x(gmfn));
  81.253          reset_early_unshadow(v);
  81.254 -        return 0;
  81.255 +        goto propagate;
  81.256      }
  81.257  
  81.258  #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
  81.259 @@ -3229,6 +3368,8 @@ static int sh_page_fault(struct vcpu *v,
  81.260  
  81.261      shadow_lock(d);
  81.262  
  81.263 +    TRACE_CLEAR_PATH_FLAGS;
  81.264 +    
  81.265      rc = gw_remove_write_accesses(v, va, &gw);
  81.266  
  81.267      /* First bit set: Removed write access to a page. */
  81.268 @@ -3281,6 +3422,7 @@ static int sh_page_fault(struct vcpu *v,
  81.269           * Get out of the fault handler immediately. */
  81.270          ASSERT(d->is_shutting_down);
  81.271          shadow_unlock(d);
  81.272 +        trace_shadow_gen(TRC_SHADOW_DOMF_DYING, va);
  81.273          return 0;
  81.274      }
  81.275  
  81.276 @@ -3383,6 +3525,7 @@ static int sh_page_fault(struct vcpu *v,
  81.277      d->arch.paging.log_dirty.fault_count++;
  81.278      reset_early_unshadow(v);
  81.279  
  81.280 +    trace_shadow_fixup(gw.l1e, va);
  81.281   done:
  81.282      sh_audit_gw(v, &gw);
  81.283      SHADOW_PRINTK("fixed\n");
  81.284 @@ -3405,6 +3548,8 @@ static int sh_page_fault(struct vcpu *v,
  81.285                        mfn_x(gmfn));
  81.286          perfc_incr(shadow_fault_emulate_failed);
  81.287          sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
  81.288 +        trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_USER,
  81.289 +                                      va, gfn);
  81.290          goto done;
  81.291      }
  81.292  
  81.293 @@ -3421,6 +3566,8 @@ static int sh_page_fault(struct vcpu *v,
  81.294      shadow_audit_tables(v);
  81.295      shadow_unlock(d);
  81.296  
  81.297 +    this_cpu(trace_emulate_write_val) = 0;
  81.298 +
  81.299  #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
  81.300   early_emulation:
  81.301  #endif
  81.302 @@ -3446,6 +3593,8 @@ static int sh_page_fault(struct vcpu *v,
  81.303                       "injection: cr2=%#lx, mfn=%#lx\n", 
  81.304                       va, mfn_x(gmfn));
  81.305              sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
  81.306 +            trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ,
  81.307 +                                       va, gfn);
  81.308              return EXCRET_fault_fixed;
  81.309          }
  81.310      }
  81.311 @@ -3478,6 +3627,10 @@ static int sh_page_fault(struct vcpu *v,
  81.312           * to support more operations in the emulator.  More likely, 
  81.313           * though, this is a hint that this page should not be shadowed. */
  81.314          shadow_remove_all_shadows(v, gmfn);
  81.315 +
  81.316 +        trace_shadow_emulate_other(TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED,
  81.317 +                                   va, gfn);
  81.318 +        goto emulate_done;
  81.319      }
  81.320  
  81.321  #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
  81.322 @@ -3504,7 +3657,8 @@ static int sh_page_fault(struct vcpu *v,
  81.323  
  81.324  #if GUEST_PAGING_LEVELS == 3 /* PAE guest */
  81.325      if ( r == X86EMUL_OKAY ) {
  81.326 -        int i;
  81.327 +        int i, emulation_count=0;
  81.328 +        this_cpu(trace_emulate_initial_va) = va;
  81.329          /* Emulate up to four extra instructions in the hope of catching 
  81.330           * the "second half" of a 64-bit pagetable write. */
  81.331          for ( i = 0 ; i < 4 ; i++ )
  81.332 @@ -3513,10 +3667,12 @@ static int sh_page_fault(struct vcpu *v,
  81.333              v->arch.paging.last_write_was_pt = 0;
  81.334              r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
  81.335              if ( r == X86EMUL_OKAY )
  81.336 -            {
  81.337 +            { 
  81.338 +                emulation_count++;
  81.339                  if ( v->arch.paging.last_write_was_pt )
  81.340                  {
  81.341                      perfc_incr(shadow_em_ex_pt);
  81.342 +                    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN);
  81.343                      break; /* Don't emulate past the other half of the write */
  81.344                  }
  81.345                  else 
  81.346 @@ -3525,12 +3681,16 @@ static int sh_page_fault(struct vcpu *v,
  81.347              else
  81.348              {
  81.349                  perfc_incr(shadow_em_ex_fail);
  81.350 +                TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATION_LAST_FAILED);
  81.351                  break; /* Don't emulate again if we failed! */
  81.352              }
  81.353          }
  81.354 +        this_cpu(trace_extra_emulation_count)=emulation_count;
  81.355      }
  81.356  #endif /* PAE guest */
  81.357  
  81.358 +    trace_shadow_emulate(gw.l1e, va);
  81.359 + emulate_done:
  81.360      SHADOW_PRINTK("emulated\n");
  81.361      return EXCRET_fault_fixed;
  81.362  
  81.363 @@ -3543,6 +3703,7 @@ static int sh_page_fault(struct vcpu *v,
  81.364      shadow_audit_tables(v);
  81.365      reset_early_unshadow(v);
  81.366      shadow_unlock(d);
  81.367 +    trace_shadow_gen(TRC_SHADOW_MMIO, va);
  81.368      return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT)
  81.369              ? EXCRET_fault_fixed : 0);
  81.370  
  81.371 @@ -3552,6 +3713,10 @@ static int sh_page_fault(struct vcpu *v,
  81.372      shadow_audit_tables(v);
  81.373      reset_early_unshadow(v);
  81.374      shadow_unlock(d);
  81.375 +
  81.376 +propagate:
  81.377 +    trace_not_shadow_fault(gw.l1e, va);
  81.378 +
  81.379      return 0;
  81.380  }
  81.381  
  81.382 @@ -3990,7 +4155,7 @@ sh_detach_old_tables(struct vcpu *v)
  81.383              sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable);
  81.384          v->arch.paging.shadow.guest_vtable = NULL;
  81.385      }
  81.386 -#endif
  81.387 +#endif // !NDEBUG
  81.388  
  81.389  
  81.390      ////
  81.391 @@ -4446,6 +4611,7 @@ static int sh_guess_wrmap(struct vcpu *v
  81.392      sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW);
  81.393      r = shadow_set_l1e(v, sl1p, sl1e, sl1mfn);
  81.394      ASSERT( !(r & SHADOW_SET_ERROR) );
  81.395 +    TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND);
  81.396      return 1;
  81.397  }
  81.398  #endif
  81.399 @@ -4800,7 +4966,7 @@ static void emulate_unmap_dest(struct vc
  81.400  
  81.401  static int
  81.402  sh_x86_emulate_write(struct vcpu *v, unsigned long vaddr, void *src,
  81.403 -                      u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
  81.404 +                     u32 bytes, struct sh_emulate_ctxt *sh_ctxt)
  81.405  {
  81.406      void *addr;
  81.407  
  81.408 @@ -4815,6 +4981,22 @@ sh_x86_emulate_write(struct vcpu *v, uns
  81.409      shadow_lock(v->domain);
  81.410      memcpy(addr, src, bytes);
  81.411  
  81.412 +    if ( tb_init_done )
  81.413 +    {
  81.414 +#if GUEST_PAGING_LEVELS == 3
  81.415 +        if ( vaddr == this_cpu(trace_emulate_initial_va) )
  81.416 +            memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
  81.417 +        else if ( (vaddr & ~(0x7UL)) == this_cpu(trace_emulate_initial_va) )
  81.418 +        {
  81.419 +            TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_EMULATE_FULL_PT);
  81.420 +            memcpy(&this_cpu(trace_emulate_write_val),
  81.421 +                   (void *)(((unsigned long) addr) & ~(0x7UL)), GUEST_PTE_SIZE);
  81.422 +        }
  81.423 +#else
  81.424 +        memcpy(&this_cpu(trace_emulate_write_val), src, bytes);
  81.425 +#endif
  81.426 +    }
  81.427 +
  81.428      emulate_unmap_dest(v, addr, bytes, sh_ctxt);
  81.429      shadow_audit_tables(v);
  81.430      shadow_unlock(v->domain);
    82.1 --- a/xen/arch/x86/mm/shadow/private.h	Fri Sep 12 14:32:45 2008 +0900
    82.2 +++ b/xen/arch/x86/mm/shadow/private.h	Fri Sep 12 14:47:40 2008 +0900
    82.3 @@ -92,6 +92,43 @@ extern int shadow_audit_enable;
    82.4  #define SHADOW_DEBUG_LOGDIRTY          0
    82.5  
    82.6  /******************************************************************************
    82.7 + * Tracing
    82.8 + */
    82.9 +DECLARE_PER_CPU(uint32_t,trace_shadow_path_flags);
   82.10 +
   82.11 +#define TRACE_SHADOW_PATH_FLAG(_x)                      \
   82.12 +    do {                                                \
   82.13 +        this_cpu(trace_shadow_path_flags) |= (1<<(_x));      \
   82.14 +    } while(0)
   82.15 +
   82.16 +#define TRACE_CLEAR_PATH_FLAGS                  \
   82.17 +    this_cpu(trace_shadow_path_flags) = 0
   82.18 +
   82.19 +enum {
   82.20 +    TRCE_SFLAG_SET_AD,
   82.21 +    TRCE_SFLAG_SET_A,
   82.22 +    TRCE_SFLAG_SHADOW_L1_GET_REF,
   82.23 +    TRCE_SFLAG_SHADOW_L1_PUT_REF,
   82.24 +    TRCE_SFLAG_L2_PROPAGATE,
   82.25 +    TRCE_SFLAG_SET_CHANGED,
   82.26 +    TRCE_SFLAG_SET_FLUSH,
   82.27 +    TRCE_SFLAG_SET_ERROR,
   82.28 +    TRCE_SFLAG_DEMOTE,
   82.29 +    TRCE_SFLAG_PROMOTE,
   82.30 +    TRCE_SFLAG_WRMAP,
   82.31 +    TRCE_SFLAG_WRMAP_GUESS_FOUND,
   82.32 +    TRCE_SFLAG_WRMAP_BRUTE_FORCE,
   82.33 +    TRCE_SFLAG_EARLY_UNSHADOW,
   82.34 +    TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN,
   82.35 +    TRCE_SFLAG_EMULATION_LAST_FAILED,
   82.36 +    TRCE_SFLAG_EMULATE_FULL_PT,
   82.37 +    TRCE_SFLAG_PREALLOC_UNHOOK,
   82.38 +    TRCE_SFLAG_UNSYNC,
   82.39 +    TRCE_SFLAG_OOS_FIXUP_ADD,
   82.40 +    TRCE_SFLAG_OOS_FIXUP_EVICT,
   82.41 +};
   82.42 +
   82.43 +/******************************************************************************
   82.44   * The shadow lock.
   82.45   *
   82.46   * This lock is per-domain.  It is intended to allow us to make atomic
   82.47 @@ -143,6 +180,12 @@ extern int shadow_audit_enable;
   82.48      } while (0)
   82.49  
   82.50  
   82.51 +/* Size (in bytes) of a guest PTE */
   82.52 +#if GUEST_PAGING_LEVELS >= 3
   82.53 +# define GUEST_PTE_SIZE 8
   82.54 +#else
   82.55 +# define GUEST_PTE_SIZE 4
   82.56 +#endif
   82.57  
   82.58  /******************************************************************************
   82.59   * Auditing routines 
    83.1 --- a/xen/arch/x86/physdev.c	Fri Sep 12 14:32:45 2008 +0900
    83.2 +++ b/xen/arch/x86/physdev.c	Fri Sep 12 14:47:40 2008 +0900
    83.3 @@ -58,9 +58,6 @@ static int get_free_pirq(struct domain *
    83.4      return i;
    83.5  }
    83.6  
    83.7 -/*
    83.8 - * Caller hold the irq_lock
    83.9 - */
   83.10  static int map_domain_pirq(struct domain *d, int pirq, int vector,
   83.11                             struct physdev_map_pirq *map)
   83.12  {
   83.13 @@ -136,13 +133,12 @@ done:
   83.14      return ret;
   83.15  }
   83.16  
   83.17 -/*
   83.18 - * The pirq should has been unbound before this call
   83.19 - */
   83.20 +/* The pirq should have been unbound before this call. */
   83.21  static int unmap_domain_pirq(struct domain *d, int pirq)
   83.22  {
   83.23 -    int ret = 0;
   83.24 -    int vector;
   83.25 +    unsigned long flags;
   83.26 +    irq_desc_t *desc;
   83.27 +    int vector, ret = 0;
   83.28  
   83.29      if ( d == NULL || pirq < 0 || pirq >= NR_PIRQS )
   83.30          return -EINVAL;
   83.31 @@ -159,33 +155,29 @@ static int unmap_domain_pirq(struct doma
   83.32          gdprintk(XENLOG_G_ERR, "domain %X: pirq %x not mapped still\n",
   83.33                   d->domain_id, pirq);
   83.34          ret = -EINVAL;
   83.35 -    }
   83.36 -    else
   83.37 -    {
   83.38 -        unsigned long flags;
   83.39 -        irq_desc_t *desc;
   83.40 -
   83.41 -        desc = &irq_desc[vector];
   83.42 -        spin_lock_irqsave(&desc->lock, flags);
   83.43 -        if ( desc->msi_desc )
   83.44 -            pci_disable_msi(vector);
   83.45 -
   83.46 -        if ( desc->handler == &pci_msi_type )
   83.47 -        {
   83.48 -            /* MSI is not shared, so should be released already */
   83.49 -            BUG_ON(desc->status & IRQ_GUEST);
   83.50 -            irq_desc[vector].handler = &no_irq_type;
   83.51 -        }
   83.52 -        spin_unlock_irqrestore(&desc->lock, flags);
   83.53 -
   83.54 -        d->arch.pirq_vector[pirq] = d->arch.vector_pirq[vector] = 0;
   83.55 +        goto done;
   83.56      }
   83.57  
   83.58 +    desc = &irq_desc[vector];
   83.59 +    spin_lock_irqsave(&desc->lock, flags);
   83.60 +    if ( desc->msi_desc )
   83.61 +        pci_disable_msi(vector);
   83.62 +
   83.63 +    if ( desc->handler == &pci_msi_type )
   83.64 +    {
   83.65 +        /* MSI is not shared, so should be released already */
   83.66 +        BUG_ON(desc->status & IRQ_GUEST);
   83.67 +        irq_desc[vector].handler = &no_irq_type;
   83.68 +    }
   83.69 +    spin_unlock_irqrestore(&desc->lock, flags);
   83.70 +
   83.71 +    d->arch.pirq_vector[pirq] = d->arch.vector_pirq[vector] = 0;
   83.72 +
   83.73      ret = irq_deny_access(d, pirq);
   83.74 -
   83.75      if ( ret )
   83.76          gdprintk(XENLOG_G_ERR, "deny irq %x access failed\n", pirq);
   83.77  
   83.78 + done:
   83.79      return ret;
   83.80  }
   83.81  
   83.82 @@ -195,10 +187,6 @@ static int physdev_map_pirq(struct physd
   83.83      int vector, pirq, ret = 0;
   83.84      unsigned long flags;
   83.85  
   83.86 -    /* if msi_enable is not enabled, map always succeeds */
   83.87 -    if ( !msi_enable )
   83.88 -        return 0;
   83.89 -
   83.90      if ( !IS_PRIV(current->domain) )
   83.91          return -EPERM;
   83.92  
   83.93 @@ -308,29 +296,21 @@ static int physdev_unmap_pirq(struct phy
   83.94      unsigned long flags;
   83.95      int ret;
   83.96  
   83.97 -    if ( !msi_enable )
   83.98 -        return 0;
   83.99 -
  83.100      if ( !IS_PRIV(current->domain) )
  83.101          return -EPERM;
  83.102  
  83.103 -    if ( !unmap )
  83.104 -        return -EINVAL;
  83.105 -
  83.106      if ( unmap->domid == DOMID_SELF )
  83.107          d = rcu_lock_domain(current->domain);
  83.108      else
  83.109          d = rcu_lock_domain_by_id(unmap->domid);
  83.110  
  83.111      if ( d == NULL )
  83.112 -    {
  83.113 -        rcu_unlock_domain(d);
  83.114          return -ESRCH;
  83.115 -    }
  83.116  
  83.117      spin_lock_irqsave(&d->arch.irq_lock, flags);
  83.118      ret = unmap_domain_pirq(d, unmap->pirq);
  83.119      spin_unlock_irqrestore(&d->arch.irq_lock, flags);
  83.120 +
  83.121      rcu_unlock_domain(d);
  83.122  
  83.123      return ret;
  83.124 @@ -452,20 +432,14 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
  83.125  
  83.126          irq = irq_op.irq;
  83.127          ret = -EINVAL;
  83.128 -        if ( ((irq < 0) && (irq != AUTO_ASSIGN)) || (irq >= NR_IRQS) )
  83.129 +        if ( (irq < 0) || (irq >= NR_IRQS) )
  83.130              break;
  83.131  
  83.132          irq_op.vector = assign_irq_vector(irq);
  83.133  
  83.134 -        ret = 0;
  83.135 -
  83.136 -        if ( msi_enable )
  83.137 -        {
  83.138 -            spin_lock_irqsave(&dom0->arch.irq_lock, flags);
  83.139 -            if ( irq != AUTO_ASSIGN )
  83.140 -                ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL);
  83.141 -            spin_unlock_irqrestore(&dom0->arch.irq_lock, flags);
  83.142 -        }
  83.143 +        spin_lock_irqsave(&dom0->arch.irq_lock, flags);
  83.144 +        ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL);
  83.145 +        spin_unlock_irqrestore(&dom0->arch.irq_lock, flags);
  83.146  
  83.147          if ( copy_to_guest(arg, &irq_op, 1) != 0 )
  83.148              ret = -EFAULT;
    84.1 --- a/xen/arch/x86/platform_hypercall.c	Fri Sep 12 14:32:45 2008 +0900
    84.2 +++ b/xen/arch/x86/platform_hypercall.c	Fri Sep 12 14:47:40 2008 +0900
    84.3 @@ -192,6 +192,10 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
    84.4      break;
    84.5  
    84.6      case XENPF_firmware_info:
    84.7 +        ret = xsm_firmware_info();
    84.8 +        if ( ret )
    84.9 +            break;
   84.10 +
   84.11          switch ( op->u.firmware_info.type )
   84.12          {
   84.13          case XEN_FW_DISK_INFO: {
   84.14 @@ -280,10 +284,18 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
   84.15          break;
   84.16  
   84.17      case XENPF_enter_acpi_sleep:
   84.18 +        ret = xsm_acpi_sleep();
   84.19 +        if ( ret )
   84.20 +            break;
   84.21 +
   84.22          ret = acpi_enter_sleep(&op->u.enter_acpi_sleep);
   84.23          break;
   84.24  
   84.25      case XENPF_change_freq:
   84.26 +        ret = xsm_change_freq();
   84.27 +        if ( ret )
   84.28 +            break;
   84.29 +
   84.30          ret = -ENOSYS;
   84.31          if ( cpufreq_controller != FREQCTL_dom0_kernel )
   84.32              break;
   84.33 @@ -306,6 +318,10 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
   84.34          XEN_GUEST_HANDLE(uint8) cpumap_bitmap;
   84.35          XEN_GUEST_HANDLE(uint64) idletimes;
   84.36  
   84.37 +        ret = xsm_getidletime();
   84.38 +        if ( ret )
   84.39 +            break;
   84.40 +
   84.41          ret = -ENOSYS;
   84.42          if ( cpufreq_controller != FREQCTL_dom0_kernel )
   84.43              break;
    85.1 --- a/xen/arch/x86/smpboot.c	Fri Sep 12 14:32:45 2008 +0900
    85.2 +++ b/xen/arch/x86/smpboot.c	Fri Sep 12 14:47:40 2008 +0900
    85.3 @@ -1225,15 +1225,6 @@ int __cpu_disable(void)
    85.4  	if (cpu == 0)
    85.5  		return -EBUSY;
    85.6  
    85.7 -	/*
    85.8 -	 * Only S3 is using this path, and thus idle vcpus are running on all
    85.9 -	 * APs when we are called. To support full cpu hotplug, other 
   85.10 -	 * notification mechanisms should be introduced (e.g., migrate vcpus
   85.11 -	 * off this physical cpu before rendezvous point).
   85.12 -	 */
   85.13 -	if (!is_idle_vcpu(current))
   85.14 -		return -EINVAL;
   85.15 -
   85.16  	local_irq_disable();
   85.17  	clear_local_APIC();
   85.18  	/* Allow any queued timer interrupts to get serviced */
   85.19 @@ -1249,6 +1240,9 @@ int __cpu_disable(void)
   85.20  	fixup_irqs(map);
   85.21  	/* It's now safe to remove this processor from the online map */
   85.22  	cpu_clear(cpu, cpu_online_map);
   85.23 +
   85.24 +	cpu_disable_scheduler();
   85.25 +
   85.26  	return 0;
   85.27  }
   85.28  
   85.29 @@ -1275,28 +1269,6 @@ static int take_cpu_down(void *unused)
   85.30      return __cpu_disable();
   85.31  }
   85.32  
   85.33 -/* 
   85.34 - * XXX: One important thing missed here is to migrate vcpus
   85.35 - * from dead cpu to other online ones and then put whole
   85.36 - * system into a stop state. It assures a safe environment
   85.37 - * for a cpu hotplug/remove at normal running state.
   85.38 - *
   85.39 - * However for xen PM case, at this point:
   85.40 - * 	-> All other domains should be notified with PM event,
   85.41 - *	   and then in following states:
   85.42 - *		* Suspend state, or
   85.43 - *		* Paused state, which is a force step to all
   85.44 - *		  domains if they do nothing to suspend
   85.45 - *	-> All vcpus of dom0 (except vcpu0) have already beem
   85.46 - *	   hot removed
   85.47 - * with the net effect that all other cpus only have idle vcpu
   85.48 - * running. In this special case, we can avoid vcpu migration
   85.49 - * then and system can be considered in a stop state.
   85.50 - *
   85.51 - * So current cpu hotplug is a special version for PM specific
   85.52 - * usage, and need more effort later for full cpu hotplug.
   85.53 - * (ktian1)
   85.54 - */
   85.55  int cpu_down(unsigned int cpu)
   85.56  {
   85.57  	int err = 0;
   85.58 @@ -1307,6 +1279,12 @@ int cpu_down(unsigned int cpu)
   85.59  		goto out;
   85.60  	}
   85.61  
   85.62 +	/* Can not offline BSP */
   85.63 +	if (cpu == 0) {
   85.64 +		err = -EINVAL;
   85.65 +		goto out;
   85.66 +	}
   85.67 +
   85.68  	if (!cpu_online(cpu)) {
   85.69  		err = -EINVAL;
   85.70  		goto out;
    86.1 --- a/xen/arch/x86/time.c	Fri Sep 12 14:32:45 2008 +0900
    86.2 +++ b/xen/arch/x86/time.c	Fri Sep 12 14:47:40 2008 +0900
    86.3 @@ -993,15 +993,16 @@ static void local_time_calibration(void)
    86.4   * All CPUS snapshot their local TSC and extrapolation of system time.
    86.5   */
    86.6  struct calibration_rendezvous {
    86.7 +    cpumask_t cpu_calibration_map;
    86.8      atomic_t nr_cpus;
    86.9      s_time_t master_stime;
   86.10  };
   86.11  
   86.12  static void time_calibration_rendezvous(void *_r)
   86.13  {
   86.14 -    unsigned int total_cpus = num_online_cpus();
   86.15      struct cpu_calibration *c = &this_cpu(cpu_calibration);
   86.16      struct calibration_rendezvous *r = _r;
   86.17 +    unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
   86.18  
   86.19      if ( smp_processor_id() == 0 )
   86.20      {
   86.21 @@ -1029,11 +1030,13 @@ static void time_calibration_rendezvous(
   86.22  static void time_calibration(void *unused)
   86.23  {
   86.24      struct calibration_rendezvous r = {
   86.25 +        .cpu_calibration_map = cpu_online_map,
   86.26          .nr_cpus = ATOMIC_INIT(0)
   86.27      };
   86.28  
   86.29      /* @wait=1 because we must wait for all cpus before freeing @r. */
   86.30 -    on_each_cpu(time_calibration_rendezvous, &r, 0, 1);
   86.31 +    on_selected_cpus(r.cpu_calibration_map,
   86.32 +                     time_calibration_rendezvous, &r, 0, 1);
   86.33  }
   86.34  
   86.35  void init_percpu_time(void)
    87.1 --- a/xen/arch/x86/traps.c	Fri Sep 12 14:32:45 2008 +0900
    87.2 +++ b/xen/arch/x86/traps.c	Fri Sep 12 14:47:40 2008 +0900
    87.3 @@ -47,7 +47,7 @@
    87.4  #include <xen/version.h>
    87.5  #include <xen/kexec.h>
    87.6  #include <xen/trace.h>
    87.7 -#include <asm/paging.h>
    87.8 +#include <xen/paging.h>
    87.9  #include <asm/system.h>
   87.10  #include <asm/io.h>
   87.11  #include <asm/atomic.h>
   87.12 @@ -2116,6 +2116,36 @@ static int emulate_privileged_op(struct 
   87.13              if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
   87.14                  goto fail;
   87.15              break;
   87.16 +        case MSR_AMD64_NB_CFG:
   87.17 +            if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
   87.18 +                 boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x11 )
   87.19 +                goto fail;
   87.20 +            if ( !IS_PRIV(v->domain) )
   87.21 +                break;
   87.22 +            if ( (rdmsr_safe(MSR_AMD64_NB_CFG, l, h) != 0) ||
   87.23 +                 (eax != l) ||
   87.24 +                 ((edx ^ h) & ~(1 << (AMD64_NB_CFG_CF8_EXT_ENABLE_BIT - 32))) )
   87.25 +                goto invalid;
   87.26 +            if ( wrmsr_safe(MSR_AMD64_NB_CFG, eax, edx) != 0 )
   87.27 +                goto fail;
   87.28 +            break;
   87.29 +        case MSR_FAM10H_MMIO_CONF_BASE:
   87.30 +            if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
   87.31 +                 boot_cpu_data.x86 < 0x10 || boot_cpu_data.x86 > 0x11 )
   87.32 +                goto fail;
   87.33 +            if ( !IS_PRIV(v->domain) )
   87.34 +                break;
   87.35 +            if ( (rdmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, l, h) != 0) ||
   87.36 +                 (((((u64)h << 32) | l) ^ res) &
   87.37 +                  ~((1 << FAM10H_MMIO_CONF_ENABLE_BIT) |
   87.38 +                    (FAM10H_MMIO_CONF_BUSRANGE_MASK <<
   87.39 +                     FAM10H_MMIO_CONF_BUSRANGE_SHIFT) |
   87.40 +                    ((u64)FAM10H_MMIO_CONF_BASE_MASK <<
   87.41 +                     FAM10H_MMIO_CONF_BASE_SHIFT))) )
   87.42 +                goto invalid;
   87.43 +            if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, eax, edx) != 0 )
   87.44 +                goto fail;
   87.45 +            break;
   87.46          case MSR_IA32_PERF_CTL:
   87.47              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
   87.48                  goto fail;
   87.49 @@ -2124,11 +2154,18 @@ static int emulate_privileged_op(struct 
   87.50              if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
   87.51                  goto fail;
   87.52              break;
   87.53 +        case MSR_IA32_THERM_CONTROL:
   87.54 +            if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
   87.55 +                goto fail;
   87.56 +            if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
   87.57 +                goto fail;
   87.58 +            break;
   87.59          default:
   87.60              if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) )
   87.61                  break;
   87.62              if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
   87.63                   (eax != l) || (edx != h) )
   87.64 +        invalid:
   87.65                  gdprintk(XENLOG_WARNING, "Domain attempted WRMSR %p from "
   87.66                          "%08x:%08x to %08x:%08x.\n",
   87.67                          _p(regs->ecx), h, l, edx, eax);
   87.68 @@ -2199,6 +2236,12 @@ static int emulate_privileged_op(struct 
   87.69                           MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
   87.70                           MSR_IA32_MISC_ENABLE_XTPR_DISABLE;
   87.71              break;
   87.72 +        case MSR_IA32_THERM_CONTROL:
   87.73 +            if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
   87.74 +                goto fail;
   87.75 +            if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
   87.76 +                goto fail;
   87.77 +            break;
   87.78          default:
   87.79              if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) )
   87.80              {
    88.1 --- a/xen/common/domain.c	Fri Sep 12 14:32:45 2008 +0900
    88.2 +++ b/xen/common/domain.c	Fri Sep 12 14:47:40 2008 +0900
    88.3 @@ -651,9 +651,11 @@ void vcpu_reset(struct vcpu *v)
    88.4  
    88.5      set_bit(_VPF_down, &v->pause_flags);
    88.6  
    88.7 +    clear_bit(v->vcpu_id, d->poll_mask);
    88.8 +    v->poll_evtchn = 0;
    88.9 +
   88.10      v->fpu_initialised = 0;
   88.11      v->fpu_dirtied     = 0;
   88.12 -    v->is_polling      = 0;
   88.13      v->is_initialised  = 0;
   88.14      v->nmi_pending     = 0;
   88.15      v->mce_pending     = 0;
    89.1 --- a/xen/common/domctl.c	Fri Sep 12 14:32:45 2008 +0900
    89.2 +++ b/xen/common/domctl.c	Fri Sep 12 14:47:40 2008 +0900
    89.3 @@ -655,9 +655,6 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
    89.4          spin_lock(&d->page_alloc_lock);
    89.5          if ( new_max >= d->tot_pages )
    89.6          {
    89.7 -            ret = guest_physmap_max_mem_pages(d, new_max);
    89.8 -            if ( ret != 0 )
    89.9 -                break;
   89.10              d->max_pages = new_max;
   89.11              ret = 0;
   89.12          }
   89.13 @@ -729,16 +726,11 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
   89.14          if ( d == NULL )
   89.15              break;
   89.16  
   89.17 -        ret = xsm_irq_permission(d, pirq, op->u.irq_permission.allow_access);
   89.18 -        if ( ret )
   89.19 -            goto irq_permission_out;
   89.20 -        
   89.21          if ( op->u.irq_permission.allow_access )
   89.22              ret = irq_permit_access(d, pirq);
   89.23          else
   89.24              ret = irq_deny_access(d, pirq);
   89.25  
   89.26 -    irq_permission_out:
   89.27          rcu_unlock_domain(d);
   89.28      }
   89.29      break;
   89.30 @@ -758,16 +750,11 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
   89.31          if ( d == NULL )
   89.32              break;
   89.33  
   89.34 -        ret = xsm_iomem_permission(d, mfn, op->u.iomem_permission.allow_access);
   89.35 -        if ( ret )
   89.36 -            goto iomem_permission_out;
   89.37 -
   89.38          if ( op->u.iomem_permission.allow_access )
   89.39              ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
   89.40          else
   89.41              ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
   89.42  
   89.43 -    iomem_permission_out:
   89.44          rcu_unlock_domain(d);
   89.45      }
   89.46      break;
   89.47 @@ -815,6 +802,12 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
   89.48              goto set_target_out;
   89.49          }
   89.50  
   89.51 +        ret = xsm_set_target(d, e);
   89.52 +        if ( ret ) {
   89.53 +            put_domain(e);
   89.54 +            goto set_target_out;            
   89.55 +        }
   89.56 +
   89.57          /* Hold reference on @e until we destroy @d. */
   89.58          d->target = e;
   89.59  
    90.1 --- a/xen/common/event_channel.c	Fri Sep 12 14:32:45 2008 +0900
    90.2 +++ b/xen/common/event_channel.c	Fri Sep 12 14:47:40 2008 +0900
    90.3 @@ -545,6 +545,7 @@ out:
    90.4  static int evtchn_set_pending(struct vcpu *v, int port)
    90.5  {
    90.6      struct domain *d = v->domain;
    90.7 +    int vcpuid;
    90.8  
    90.9      /*
   90.10       * The following bit operations must happen in strict order.
   90.11 @@ -564,15 +565,19 @@ static int evtchn_set_pending(struct vcp
   90.12      }
   90.13      
   90.14      /* Check if some VCPU might be polling for this event. */
   90.15 -    if ( unlikely(d->is_polling) )
   90.16 +    if ( likely(bitmap_empty(d->poll_mask, MAX_VIRT_CPUS)) )
   90.17 +        return 0;
   90.18 +
   90.19 +    /* Wake any interested (or potentially interested) pollers. */
   90.20 +    for ( vcpuid = find_first_bit(d->poll_mask, MAX_VIRT_CPUS);
   90.21 +          vcpuid < MAX_VIRT_CPUS;
   90.22 +          vcpuid = find_next_bit(d->poll_mask, MAX_VIRT_CPUS, vcpuid+1) )
   90.23      {
   90.24 -        d->is_polling = 0;
   90.25 -        smp_mb(); /* check vcpu poll-flags /after/ clearing domain poll-flag */
   90.26 -        for_each_vcpu ( d, v )
   90.27 +        v = d->vcpu[vcpuid];
   90.28 +        if ( ((v->poll_evtchn <= 0) || (v->poll_evtchn == port)) &&
   90.29 +             test_and_clear_bit(vcpuid, d->poll_mask) )
   90.30          {
   90.31 -            if ( !v->is_polling )
   90.32 -                continue;
   90.33 -            v->is_polling = 0;
   90.34 +            v->poll_evtchn = 0;
   90.35              vcpu_unblock(v);
   90.36          }
   90.37      }
    91.1 --- a/xen/common/rangeset.c	Fri Sep 12 14:32:45 2008 +0900
    91.2 +++ b/xen/common/rangeset.c	Fri Sep 12 14:47:40 2008 +0900
    91.3 @@ -10,6 +10,7 @@
    91.4  #include <xen/sched.h>
    91.5  #include <xen/errno.h>
    91.6  #include <xen/rangeset.h>
    91.7 +#include <xsm/xsm.h>
    91.8  
    91.9  /* An inclusive range [s,e] and pointer to next range in ascending order. */
   91.10  struct range {
   91.11 @@ -96,6 +97,10 @@ int rangeset_add_range(
   91.12      struct range *x, *y;
   91.13      int rc = 0;
   91.14  
   91.15 +    rc = xsm_add_range(r->domain, r->name, s, e);
   91.16 +    if ( rc )
   91.17 +        return rc;
   91.18 +
   91.19      ASSERT(s <= e);
   91.20  
   91.21      spin_lock(&r->lock);
   91.22 @@ -164,6 +169,10 @@ int rangeset_remove_range(
   91.23      struct range *x, *y, *t;
   91.24      int rc = 0;
   91.25  
   91.26 +    rc = xsm_remove_range(r->domain, r->name, s, e);
   91.27 +    if ( rc )
   91.28 +        return rc;
   91.29 +
   91.30      ASSERT(s <= e);
   91.31  
   91.32      spin_lock(&r->lock);
    92.1 --- a/xen/common/sched_credit.c	Fri Sep 12 14:32:45 2008 +0900
    92.2 +++ b/xen/common/sched_credit.c	Fri Sep 12 14:47:40 2008 +0900
    92.3 @@ -1107,6 +1107,10 @@ csched_load_balance(int cpu, struct csch
    92.4  
    92.5      BUG_ON( cpu != snext->vcpu->processor );
    92.6  
    92.7 +    /* If this CPU is going offline we shouldn't steal work. */
    92.8 +    if ( unlikely(!cpu_online(cpu)) )
    92.9 +        goto out;
   92.10 +
   92.11      if ( snext->pri == CSCHED_PRI_IDLE )
   92.12          CSCHED_STAT_CRANK(load_balance_idle);
   92.13      else if ( snext->pri == CSCHED_PRI_TS_OVER )
   92.14 @@ -1149,6 +1153,7 @@ csched_load_balance(int cpu, struct csch
   92.15              return speer;
   92.16      }
   92.17  
   92.18 + out:
   92.19      /* Failed to find more important work elsewhere... */
   92.20      __runq_remove(snext);
   92.21      return snext;
    93.1 --- a/xen/common/schedule.c	Fri Sep 12 14:32:45 2008 +0900
    93.2 +++ b/xen/common/schedule.c	Fri Sep 12 14:47:40 2008 +0900
    93.3 @@ -63,12 +63,32 @@ static struct scheduler ops;
    93.4           (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ )      \
    93.5            : (typeof(ops.fn(__VA_ARGS__)))0 )
    93.6  
    93.7 +static inline void trace_runstate_change(struct vcpu *v, int new_state)
    93.8 +{
    93.9 +    struct { uint32_t vcpu:16, domain:16; } d;
   93.10 +    uint32_t event;
   93.11 +
   93.12 +    if ( likely(!tb_init_done) )
   93.13 +        return;
   93.14 +
   93.15 +    d.vcpu = v->vcpu_id;
   93.16 +    d.domain = v->domain->domain_id;
   93.17 +
   93.18 +    event = TRC_SCHED_RUNSTATE_CHANGE;
   93.19 +    event |= ( v->runstate.state & 0x3 ) << 8;
   93.20 +    event |= ( new_state & 0x3 ) << 4;
   93.21 +
   93.22 +    __trace_var(event, 1/*tsc*/, sizeof(d), (unsigned char *)&d);
   93.23 +}
   93.24 +
   93.25  static inline void vcpu_runstate_change(
   93.26      struct vcpu *v, int new_state, s_time_t new_entry_time)
   93.27  {
   93.28      ASSERT(v->runstate.state != new_state);
   93.29      ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock));
   93.30  
   93.31 +    trace_runstate_change(v, new_state);
   93.32 +
   93.33      v->runstate.time[v->runstate.state] +=
   93.34          new_entry_time - v->runstate.state_entry_time;
   93.35      v->runstate.state_entry_time = new_entry_time;
   93.36 @@ -198,6 +218,27 @@ void vcpu_wake(struct vcpu *v)
   93.37      TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
   93.38  }
   93.39  
   93.40 +void vcpu_unblock(struct vcpu *v)
   93.41 +{
   93.42 +    if ( !test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
   93.43 +        return;
   93.44 +
   93.45 +    /* Polling period ends when a VCPU is unblocked. */
   93.46 +    if ( unlikely(v->poll_evtchn != 0) )
   93.47 +    {
   93.48 +        v->poll_evtchn = 0;
   93.49 +        /*
   93.50 +         * We *must* re-clear _VPF_blocked to avoid racing other wakeups of
   93.51 +         * this VCPU (and it then going back to sleep on poll_mask).
   93.52 +         * Test-and-clear is idiomatic and ensures clear_bit not reordered.
   93.53 +         */
   93.54 +        if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
   93.55 +            clear_bit(_VPF_blocked, &v->pause_flags);
   93.56 +    }
   93.57 +
   93.58 +    vcpu_wake(v);
   93.59 +}
   93.60 +
   93.61  static void vcpu_migrate(struct vcpu *v)
   93.62  {
   93.63      unsigned long flags;
   93.64 @@ -247,6 +288,48 @@ void vcpu_force_reschedule(struct vcpu *
   93.65      }
   93.66  }
   93.67  
   93.68 +/*
   93.69 + * This function is used by cpu_hotplug code from stop_machine context.
   93.70 + * Hence we can avoid needing to take the 
   93.71 + */
   93.72 +void cpu_disable_scheduler(void)
   93.73 +{
   93.74 +    struct domain *d;
   93.75 +    struct vcpu *v;
   93.76 +    unsigned int cpu = smp_processor_id();
   93.77 +
   93.78 +    for_each_domain ( d )
   93.79 +    {
   93.80 +        for_each_vcpu ( d, v )
   93.81 +        {
   93.82 +            if ( is_idle_vcpu(v) )
   93.83 +                continue;
   93.84 +
   93.85 +            if ( (cpus_weight(v->cpu_affinity) == 1) &&
   93.86 +                 cpu_isset(cpu, v->cpu_affinity) )
   93.87 +            {
   93.88 +                printk("Breaking vcpu affinity for domain %d vcpu %d\n",
   93.89 +                        v->domain->domain_id, v->vcpu_id);
   93.90 +                cpus_setall(v->cpu_affinity);
   93.91 +            }
   93.92 +
   93.93 +            /*
   93.94 +             * Migrate single-shot timers to CPU0. A new cpu will automatically
   93.95 +             * be chosen when the timer is next re-set.
   93.96 +             */
   93.97 +            if ( v->singleshot_timer.cpu == cpu )
   93.98 +                migrate_timer(&v->singleshot_timer, 0);
   93.99 +
  93.100 +            if ( v->processor == cpu )
  93.101 +            {
  93.102 +                set_bit(_VPF_migrating, &v->pause_flags);
  93.103 +                vcpu_sleep_nosync(v);
  93.104 +                vcpu_migrate(v);
  93.105 +            }
  93.106 +        }
  93.107 +    }
  93.108 +}
  93.109 +
  93.110  static int __vcpu_set_affinity(
  93.111      struct vcpu *v, cpumask_t *affinity,
  93.112      bool_t old_lock_status, bool_t new_lock_status)
  93.113 @@ -337,7 +420,7 @@ static long do_poll(struct sched_poll *s
  93.114      struct vcpu   *v = current;
  93.115      struct domain *d = v->domain;
  93.116      evtchn_port_t  port;
  93.117 -    long           rc = 0;
  93.118 +    long           rc;
  93.119      unsigned int   i;
  93.120  
  93.121      /* Fairly arbitrary limit. */
  93.122 @@ -348,11 +431,24 @@ static long do_poll(struct sched_poll *s
  93.123          return -EFAULT;
  93.124  
  93.125      set_bit(_VPF_blocked, &v->pause_flags);
  93.126 -    v->is_polling = 1;
  93.127 -    d->is_polling = 1;
  93.128 +    v->poll_evtchn = -1;
  93.129 +    set_bit(v->vcpu_id, d->poll_mask);
  93.130  
  93.131 +#ifndef CONFIG_X86 /* set_bit() implies mb() on x86 */
  93.132      /* Check for events /after/ setting flags: avoids wakeup waiting race. */
  93.133 -    smp_wmb();
  93.134 +    smp_mb();
  93.135 +
  93.136 +    /*
  93.137 +     * Someone may have seen we are blocked but not that we are polling, or
  93.138 +     * vice versa. We are certainly being woken, so clean up and bail. Beyond
  93.139 +     * this point others can be guaranteed to clean up for us if they wake us.
  93.140 +     */
  93.141 +    rc = 0;
  93.142 +    if ( (v->poll_evtchn == 0) ||
  93.143 +         !test_bit(_VPF_blocked, &v->pause_flags) ||
  93.144 +         !test_bit(v->vcpu_id, d->poll_mask) )
  93.145 +        goto out;
  93.146 +#endif
  93.147  
  93.148      for ( i = 0; i < sched_poll->nr_ports; i++ )
  93.149      {
  93.150 @@ -369,6 +465,9 @@ static long do_poll(struct sched_poll *s
  93.151              goto out;
  93.152      }
  93.153  
  93.154 +    if ( sched_poll->nr_ports == 1 )
  93.155 +        v->poll_evtchn = port;
  93.156 +
  93.157      if ( sched_poll->timeout != 0 )
  93.158          set_timer(&v->poll_timer, sched_poll->timeout);
  93.159  
  93.160 @@ -378,7 +477,8 @@ static long do_poll(struct sched_poll *s
  93.161      return 0;
  93.162  
  93.163   out:
  93.164 -    v->is_polling = 0;
  93.165 +    v->poll_evtchn = 0;
  93.166 +    clear_bit(v->vcpu_id, d->poll_mask);
  93.167      clear_bit(_VPF_blocked, &v->pause_flags);
  93.168      return rc;
  93.169  }
  93.170 @@ -628,7 +728,9 @@ static void vcpu_periodic_timer_work(str
  93.171          return;
  93.172  
  93.173      periodic_next_event = v->periodic_last_event + v->periodic_period;
  93.174 -    if ( now > periodic_next_event )
  93.175 +
  93.176 +    /* The timer subsystem may call us up to TIME_SLOP ahead of deadline. */
  93.177 +    if ( (now + TIME_SLOP) > periodic_next_event )
  93.178      {
  93.179          send_timer_event(v);
  93.180          v->periodic_last_event = now;
  93.181 @@ -758,11 +860,8 @@ static void poll_timer_fn(void *data)
  93.182  {
  93.183      struct vcpu *v = data;
  93.184  
  93.185 -    if ( !v->is_polling )
  93.186 -        return;
  93.187 -
  93.188 -    v->is_polling = 0;
  93.189 -    vcpu_unblock(v);
  93.190 +    if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
  93.191 +        vcpu_unblock(v);
  93.192  }
  93.193  
  93.194  /* Initialise the data structures. */
    94.1 --- a/xen/common/sysctl.c	Fri Sep 12 14:32:45 2008 +0900
    94.2 +++ b/xen/common/sysctl.c	Fri Sep 12 14:47:40 2008 +0900
    94.3 @@ -149,6 +149,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
    94.4          char c;
    94.5          uint32_t i;
    94.6  
    94.7 +        ret = xsm_debug_keys();
    94.8 +        if ( ret )
    94.9 +            break;
   94.10 +
   94.11          for ( i = 0; i < op->u.debug_keys.nr_keys; i++ )
   94.12          {
   94.13              if ( copy_from_guest_offset(&c, op->u.debug_keys.keys, i, 1) )
   94.14 @@ -166,6 +170,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
   94.15  
   94.16          nr_cpus = min_t(uint32_t, op->u.getcpuinfo.max_cpus, NR_CPUS);
   94.17  
   94.18 +        ret = xsm_getcpuinfo();
   94.19 +        if ( ret )
   94.20 +            break;
   94.21 +
   94.22          for ( i = 0; i < nr_cpus; i++ )
   94.23          {
   94.24              /* Assume no holes in idle-vcpu map. */
   94.25 @@ -188,6 +196,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
   94.26  
   94.27      case XEN_SYSCTL_availheap:
   94.28      { 
   94.29 +        ret = xsm_availheap();
   94.30 +        if ( ret )
   94.31 +            break;
   94.32 +
   94.33          op->u.availheap.avail_bytes = avail_domheap_pages_region(
   94.34              op->u.availheap.node,
   94.35              op->u.availheap.min_bitwidth,
    95.1 --- a/xen/common/trace.c	Fri Sep 12 14:32:45 2008 +0900
    95.2 +++ b/xen/common/trace.c	Fri Sep 12 14:47:40 2008 +0900
    95.3 @@ -58,6 +58,7 @@ static int t_buf_highwater;
    95.4  
    95.5  /* Number of records lost due to per-CPU trace buffer being full. */
    95.6  static DEFINE_PER_CPU(unsigned long, lost_records);
    95.7 +static DEFINE_PER_CPU(unsigned long, lost_records_first_tsc);
    95.8  
    95.9  /* a flag recording whether initialization has been done */
   95.10  /* or more properly, if the tbuf subsystem is enabled right now */
   95.11 @@ -147,6 +148,31 @@ static int tb_set_size(int size)
   95.12      return 0;
   95.13  }
   95.14  
   95.15 +int trace_will_trace_event(u32 event)
   95.16 +{
   95.17 +    if ( !tb_init_done )
   95.18 +        return 0;
   95.19 +
   95.20 +    /*
   95.21 +     * Copied from __trace_var()
   95.22 +     */
   95.23 +    if ( (tb_event_mask & event) == 0 )
   95.24 +        return 0;
   95.25 +
   95.26 +    /* match class */
   95.27 +    if ( ((tb_event_mask >> TRC_CLS_SHIFT) & (event >> TRC_CLS_SHIFT)) == 0 )
   95.28 +        return 0;
   95.29 +
   95.30 +    /* then match subclass */
   95.31 +    if ( (((tb_event_mask >> TRC_SUBCLS_SHIFT) & 0xf )
   95.32 +                & ((event >> TRC_SUBCLS_SHIFT) & 0xf )) == 0 )
   95.33 +        return 0;
   95.34 +
   95.35 +    if ( !cpu_isset(smp_processor_id(), tb_cpu_mask) )
   95.36 +        return 0;
   95.37 +
   95.38 +    return 1;
   95.39 +}
   95.40  
   95.41  /**
   95.42   * init_trace_bufs - performs initialization of the per-cpu trace buffers.
   95.43 @@ -354,22 +380,27 @@ static inline int insert_wrap_record(str
   95.44                      NULL);
   95.45  }
   95.46  
   95.47 -#define LOST_REC_SIZE 8
   95.48 +#define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
   95.49  
   95.50  static inline int insert_lost_records(struct t_buf *buf)
   95.51  {
   95.52      struct {
   95.53          u32 lost_records;
   95.54 -    } ed;
   95.55 +        u32 did:16, vid:16;
   95.56 +        u64 first_tsc;
   95.57 +    } __attribute__((packed)) ed;
   95.58  
   95.59 +    ed.vid = current->vcpu_id;
   95.60 +    ed.did = current->domain->domain_id;
   95.61      ed.lost_records = this_cpu(lost_records);
   95.62 +    ed.first_tsc = this_cpu(lost_records_first_tsc);
   95.63  
   95.64      this_cpu(lost_records) = 0;
   95.65  
   95.66      return __insert_record(buf,
   95.67                             TRC_LOST_RECORDS,
   95.68                             sizeof(ed),
   95.69 -                           0 /* !cycles */,
   95.70 +                           1 /* cycles */,
   95.71                             LOST_REC_SIZE,
   95.72                             (unsigned char *)&ed);
   95.73  }
   95.74 @@ -401,7 +432,8 @@ void __trace_var(u32 event, int cycles, 
   95.75      int extra_word;
   95.76      int started_below_highwater;
   95.77  
   95.78 -    ASSERT(tb_init_done);
   95.79 +    if( !tb_init_done )
   95.80 +        return;
   95.81  
   95.82      /* Convert byte count into word count, rounding up */
   95.83      extra_word = (extra / sizeof(u32));
   95.84 @@ -479,7 +511,8 @@ void __trace_var(u32 event, int cycles, 
   95.85      /* Do we have enough space for everything? */
   95.86      if ( total_size > bytes_to_tail )
   95.87      {
   95.88 -        this_cpu(lost_records)++;
   95.89 +        if ( ++this_cpu(lost_records) == 1 )
   95.90 +            this_cpu(lost_records_first_tsc)=(u64)get_cycles();
   95.91          local_irq_restore(flags);
   95.92          return;
   95.93      }
    96.1 --- a/xen/drivers/acpi/hwregs.c	Fri Sep 12 14:32:45 2008 +0900
    96.2 +++ b/xen/drivers/acpi/hwregs.c	Fri Sep 12 14:47:40 2008 +0900
    96.3 @@ -239,11 +239,13 @@ acpi_status acpi_set_register(u32 regist
    96.4  
    96.5  	case ACPI_REGISTER_PM2_CONTROL:
    96.6  
    96.7 +#if 0 /* Redundant read in original Linux code. */
    96.8  		status = acpi_hw_register_read(ACPI_REGISTER_PM2_CONTROL,
    96.9  					       &register_value);
   96.10  		if (ACPI_FAILURE(status)) {
   96.11  			goto unlock_and_exit;
   96.12  		}
   96.13 +#endif
   96.14  
   96.15  		ACPI_DEBUG_PRINT((ACPI_DB_IO,
   96.16  				  "PM2 control: Read %X from %8.8X%8.8X\n",
    97.1 --- a/xen/drivers/passthrough/iommu.c	Fri Sep 12 14:32:45 2008 +0900
    97.2 +++ b/xen/drivers/passthrough/iommu.c	Fri Sep 12 14:47:40 2008 +0900
    97.3 @@ -33,11 +33,13 @@ int amd_iov_detect(void);
    97.4   *   pv                         Enable IOMMU for PV domains
    97.5   *   no-pv                      Disable IOMMU for PV domains (default)
    97.6   *   force|required             Don't boot unless IOMMU is enabled
    97.7 + *   passthrough                Bypass VT-d translation for Dom0
    97.8   */
    97.9  custom_param("iommu", parse_iommu_param);
   97.10  int iommu_enabled = 0;
   97.11  int iommu_pv_enabled = 0;
   97.12  int force_iommu = 0;
   97.13 +int iommu_passthrough = 0;
   97.14  
   97.15  static void __init parse_iommu_param(char *s)
   97.16  {
   97.17 @@ -58,6 +60,8 @@ static void __init parse_iommu_param(cha
   97.18              iommu_pv_enabled = 0;
   97.19          else if ( !strcmp(s, "force") || !strcmp(s, "required") )
   97.20              force_iommu = 1;
   97.21 +        else if ( !strcmp(s, "passthrough") )
   97.22 +            iommu_passthrough = 1;
   97.23  
   97.24          s = ss + 1;
   97.25      } while ( ss );
    98.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Fri Sep 12 14:32:45 2008 +0900
    98.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Fri Sep 12 14:47:40 2008 +0900
    98.3 @@ -1090,12 +1090,13 @@ static int domain_context_mapping_one(
    98.4      }
    98.5  
    98.6      spin_lock_irqsave(&iommu->lock, flags);
    98.7 -
    98.8 -#ifdef CONTEXT_PASSTHRU
    98.9 -    if ( ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
   98.10 +    if ( iommu_passthrough &&
   98.11 +         ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
   98.12 +    {
   98.13          context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
   98.14 +        agaw = level_to_agaw(iommu->nr_pt_levels);
   98.15 +    }
   98.16      else
   98.17 -#endif
   98.18      {
   98.19          /* Ensure we have pagetables allocated down to leaf PTE. */
   98.20          if ( hd->pgd_maddr == 0 )
   98.21 @@ -1459,11 +1460,13 @@ int intel_iommu_map_page(
   98.22      u64 pg_maddr;
   98.23      int pte_present;
   98.24  
   98.25 -#ifdef CONTEXT_PASSTHRU
   98.26 +    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   98.27 +    iommu = drhd->iommu;
   98.28 +
   98.29      /* do nothing if dom0 and iommu supports pass thru */
   98.30 -    if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
   98.31 +    if ( iommu_passthrough &&
   98.32 +         ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
   98.33          return 0;
   98.34 -#endif
   98.35  
   98.36      pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
   98.37      if ( pg_maddr == 0 )
   98.38 @@ -1500,11 +1503,10 @@ int intel_iommu_unmap_page(struct domain
   98.39      drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   98.40      iommu = drhd->iommu;
   98.41  
   98.42 -#ifdef CONTEXT_PASSTHRU
   98.43      /* do nothing if dom0 and iommu supports pass thru */
   98.44 -    if ( ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
   98.45 +    if ( iommu_passthrough &&
   98.46 +         ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
   98.47          return 0;
   98.48 -#endif
   98.49  
   98.50      dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
   98.51  
    99.1 --- a/xen/include/asm-ia64/shadow.h	Fri Sep 12 14:32:45 2008 +0900
    99.2 +++ b/xen/include/asm-ia64/shadow.h	Fri Sep 12 14:47:40 2008 +0900
    99.3 @@ -63,8 +63,6 @@ shadow_mark_page_dirty(struct domain *d,
    99.4          return 0;
    99.5  }
    99.6  
    99.7 -#define guest_physmap_max_mem_pages(d, n) (0)
    99.8 -
    99.9  #endif // _XEN_SHADOW_H
   99.10  
   99.11  /*
   100.1 --- a/xen/include/asm-x86/bitops.h	Fri Sep 12 14:32:45 2008 +0900
   100.2 +++ b/xen/include/asm-x86/bitops.h	Fri Sep 12 14:47:40 2008 +0900
   100.3 @@ -116,8 +116,8 @@ static inline void __clear_bit(int nr, v
   100.4      __clear_bit(nr, addr);                              \
   100.5  })
   100.6  
   100.7 -#define smp_mb__before_clear_bit() barrier()
   100.8 -#define smp_mb__after_clear_bit()  barrier()
   100.9 +#define smp_mb__before_clear_bit() ((void)0)
  100.10 +#define smp_mb__after_clear_bit()  ((void)0)
  100.11  
  100.12  /**
  100.13   * __change_bit - Toggle a bit in memory
   101.1 --- a/xen/include/asm-x86/guest_access.h	Fri Sep 12 14:32:45 2008 +0900
   101.2 +++ b/xen/include/asm-x86/guest_access.h	Fri Sep 12 14:47:40 2008 +0900
   101.3 @@ -8,7 +8,7 @@
   101.4  #define __ASM_X86_GUEST_ACCESS_H__
   101.5  
   101.6  #include <asm/uaccess.h>
   101.7 -#include <asm/shadow.h>
   101.8 +#include <asm/paging.h>
   101.9  #include <asm/hvm/support.h>
  101.10  #include <asm/hvm/guest_access.h>
  101.11  
  101.12 @@ -87,10 +87,10 @@
  101.13   * Allows use of faster __copy_* functions.
  101.14   */
  101.15  #define guest_handle_okay(hnd, nr)                      \
  101.16 -    (shadow_mode_external(current->domain) ||           \
  101.17 +    (paging_mode_external(current->domain) ||           \
  101.18       array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)))
  101.19  #define guest_handle_subrange_okay(hnd, first, last)    \
  101.20 -    (shadow_mode_external(current->domain) ||           \
  101.21 +    (paging_mode_external(current->domain) ||           \
  101.22       array_access_ok((hnd).p + (first),                 \
  101.23                       (last)-(first)+1,                  \
  101.24                       sizeof(*(hnd).p)))
   102.1 --- a/xen/include/asm-x86/hvm/trace.h	Fri Sep 12 14:32:45 2008 +0900
   102.2 +++ b/xen/include/asm-x86/hvm/trace.h	Fri Sep 12 14:47:40 2008 +0900
   102.3 @@ -56,16 +56,13 @@
   102.4  #define TRC_PAR_LONG(par) (par)
   102.5  #endif
   102.6  
   102.7 -#define HVMTRACE_ND(evt, cycles, vcpu, count, d1, d2, d3, d4, d5, d6)   \
   102.8 +#define HVMTRACE_ND(evt, cycles, count, d1, d2, d3, d4, d5, d6)         \
   102.9      do {                                                                \
  102.10          if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt )             \
  102.11          {                                                               \
  102.12              struct {                                                    \
  102.13 -                u32 did:16, vid:16;                                     \
  102.14                  u32 d[6];                                               \
  102.15              } _d;                                                       \
  102.16 -            _d.did=(vcpu)->domain->domain_id;                           \
  102.17 -            _d.vid=(vcpu)->vcpu_id;                                     \
  102.18              _d.d[0]=(d1);                                               \
  102.19              _d.d[1]=(d2);                                               \
  102.20              _d.d[2]=(d3);                                               \
  102.21 @@ -77,32 +74,32 @@
  102.22          }                                                               \
  102.23      } while(0)
  102.24  
  102.25 -#define HVMTRACE_6D(evt, vcpu, d1, d2, d3, d4, d5, d6)    \
  102.26 -                      HVMTRACE_ND(evt, 0, vcpu, 6, d1, d2, d3,  d4, d5, d6)
  102.27 -#define HVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5)        \
  102.28 -                      HVMTRACE_ND(evt, 0, vcpu, 5, d1, d2, d3,  d4, d5, 0)
  102.29 -#define HVMTRACE_4D(evt, vcpu, d1, d2, d3, d4)               \
  102.30 -                      HVMTRACE_ND(evt, 0, vcpu, 4, d1, d2, d3,  d4, 0, 0)
  102.31 -#define HVMTRACE_3D(evt, vcpu, d1, d2, d3)                   \
  102.32 -                      HVMTRACE_ND(evt, 0, vcpu, 3, d1, d2, d3,  0, 0, 0)
  102.33 -#define HVMTRACE_2D(evt, vcpu, d1, d2)                       \
  102.34 -                      HVMTRACE_ND(evt, 0, vcpu, 2, d1, d2,  0,  0, 0, 0)
  102.35 -#define HVMTRACE_1D(evt, vcpu, d1)                           \
  102.36 -                      HVMTRACE_ND(evt, 0, vcpu, 1, d1,  0,  0,  0, 0, 0)
  102.37 -#define HVMTRACE_0D(evt, vcpu)                               \
  102.38 -                      HVMTRACE_ND(evt, 0, vcpu, 0, 0,  0,  0,  0, 0, 0)
  102.39 +#define HVMTRACE_6D(evt, d1, d2, d3, d4, d5, d6)    \
  102.40 +                      HVMTRACE_ND(evt, 0, 6, d1, d2, d3,  d4, d5, d6)
  102.41 +#define HVMTRACE_5D(evt, d1, d2, d3, d4, d5)        \
  102.42 +                      HVMTRACE_ND(evt, 0, 5, d1, d2, d3,  d4, d5, 0)
  102.43 +#define HVMTRACE_4D(evt, d1, d2, d3, d4)               \
  102.44 +                      HVMTRACE_ND(evt, 0, 4, d1, d2, d3,  d4, 0, 0)
  102.45 +#define HVMTRACE_3D(evt, d1, d2, d3)                   \
  102.46 +                      HVMTRACE_ND(evt, 0, 3, d1, d2, d3,  0, 0, 0)
  102.47 +#define HVMTRACE_2D(evt, d1, d2)                       \
  102.48 +                      HVMTRACE_ND(evt, 0, 2, d1, d2,  0,  0, 0, 0)
  102.49 +#define HVMTRACE_1D(evt, d1)                           \
  102.50 +                      HVMTRACE_ND(evt, 0, 1, d1,  0,  0,  0, 0, 0)
  102.51 +#define HVMTRACE_0D(evt)                               \
  102.52 +                      HVMTRACE_ND(evt, 0, 0, 0,  0,  0,  0, 0, 0)
  102.53  
  102.54  
  102.55  
  102.56  #ifdef __x86_64__
  102.57 -#define HVMTRACE_LONG_1D(evt, vcpu, d1)                  \
  102.58 -                   HVMTRACE_2D(evt ## 64, vcpu, (d1) & 0xFFFFFFFF, (d1) >> 32)
  102.59 -#define HVMTRACE_LONG_2D(evt,vcpu,d1,d2, ...)              \
  102.60 -                   HVMTRACE_3D(evt ## 64, vcpu, d1, d2)
  102.61 -#define HVMTRACE_LONG_3D(evt, vcpu, d1, d2, d3, ...)      \
  102.62 -                   HVMTRACE_4D(evt ## 64, vcpu, d1, d2, d3)
  102.63 -#define HVMTRACE_LONG_4D(evt, vcpu, d1, d2, d3, d4, ...)  \
  102.64 -                   HVMTRACE_5D(evt ## 64, vcpu, d1, d2, d3, d4)
  102.65 +#define HVMTRACE_LONG_1D(evt, d1)                  \
  102.66 +                   HVMTRACE_2D(evt ## 64, (d1) & 0xFFFFFFFF, (d1) >> 32)
  102.67 +#define HVMTRACE_LONG_2D(evt, d1, d2, ...)              \
  102.68 +                   HVMTRACE_3D(evt ## 64, d1, d2)
  102.69 +#define HVMTRACE_LONG_3D(evt, d1, d2, d3, ...)      \
  102.70 +                   HVMTRACE_4D(evt ## 64, d1, d2, d3)
  102.71 +#define HVMTRACE_LONG_4D(evt, d1, d2, d3, d4, ...)  \
  102.72 +                   HVMTRACE_5D(evt ## 64, d1, d2, d3, d4)
  102.73  #else
  102.74  #define HVMTRACE_LONG_1D HVMTRACE_1D
  102.75  #define HVMTRACE_LONG_2D HVMTRACE_2D
   103.1 --- a/xen/include/asm-x86/io_apic.h	Fri Sep 12 14:32:45 2008 +0900
   103.2 +++ b/xen/include/asm-x86/io_apic.h	Fri Sep 12 14:47:40 2008 +0900
   103.3 @@ -162,8 +162,6 @@ static inline void io_apic_modify(unsign
   103.4  /* 1 if "noapic" boot option passed */
   103.5  extern int skip_ioapic_setup;
   103.6  
   103.7 -extern int msi_enable;
   103.8 -
   103.9  /*
  103.10   * If we use the IO-APIC for IRQ routing, disable automatic
  103.11   * assignment of PCI IRQ's.
   104.1 --- a/xen/include/asm-x86/mm.h	Fri Sep 12 14:32:45 2008 +0900
   104.2 +++ b/xen/include/asm-x86/mm.h	Fri Sep 12 14:47:40 2008 +0900
   104.3 @@ -59,6 +59,17 @@ struct page_info
   104.4          u32 tlbflush_timestamp;
   104.5  
   104.6          /*
   104.7 +         * When PGT_partial is true then this field is valid and indicates
   104.8 +         * that PTEs in the range [0, @nr_validated_ptes) have been validated.
   104.9 +         * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been
  104.10 +         * partially validated.
  104.11 +         */
  104.12 +        struct {
  104.13 +            u16 nr_validated_ptes;
  104.14 +            bool_t partial_pte;
  104.15 +        };
  104.16 +
  104.17 +        /*
  104.18           * Guest pages with a shadow.  This does not conflict with
  104.19           * tlbflush_timestamp since page table pages are explicitly not
  104.20           * tracked for TLB-flush avoidance when a guest runs in shadow mode.
  104.21 @@ -86,9 +97,12 @@ struct page_info
  104.22   /* PAE only: is this an L2 page directory containing Xen-private mappings? */
  104.23  #define _PGT_pae_xen_l2     26
  104.24  #define PGT_pae_xen_l2      (1U<<_PGT_pae_xen_l2)
  104.25 +/* Has this page been *partially* validated for use as its current type? */
  104.26 +#define _PGT_partial        25
  104.27 +#define PGT_partial         (1U<<_PGT_partial)
  104.28  
  104.29 - /* 26-bit count of uses of this frame as its current type. */
  104.30 -#define PGT_count_mask      ((1U<<26)-1)
  104.31 + /* 25-bit count of uses of this frame as its current type. */
  104.32 +#define PGT_count_mask      ((1U<<25)-1)
  104.33  
  104.34   /* Cleared when the owning guest 'frees' this page. */
  104.35  #define _PGC_allocated      31
  104.36 @@ -154,7 +168,8 @@ extern unsigned long max_page;
  104.37  extern unsigned long total_pages;
  104.38  void init_frametable(void);
  104.39  
  104.40 -void free_page_type(struct page_info *page, unsigned long type);
  104.41 +int free_page_type(struct page_info *page, unsigned long type,
  104.42 +                   int preemptible);
  104.43  int _shadow_mode_refcounts(struct domain *d);
  104.44  
  104.45  void cleanup_page_cacheattr(struct page_info *page);
  104.46 @@ -165,6 +180,8 @@ void put_page(struct page_info *page);
  104.47  int  get_page(struct page_info *page, struct domain *domain);
  104.48  void put_page_type(struct page_info *page);
  104.49  int  get_page_type(struct page_info *page, unsigned long type);
  104.50 +int  put_page_type_preemptible(struct page_info *page);
  104.51 +int  get_page_type_preemptible(struct page_info *page, unsigned long type);
  104.52  int  get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
  104.53  void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
  104.54  
  104.55 @@ -174,6 +191,19 @@ static inline void put_page_and_type(str
  104.56      put_page(page);
  104.57  }
  104.58  
  104.59 +static inline int put_page_and_type_preemptible(struct page_info *page,
  104.60 +                                                int preemptible)
  104.61 +{
  104.62 +    int rc = 0;
  104.63 +
  104.64 +    if ( preemptible )
  104.65 +        rc = put_page_type_preemptible(page);
  104.66 +    else
  104.67 +        put_page_type(page);
  104.68 +    if ( likely(rc == 0) )
  104.69 +        put_page(page);
  104.70 +    return rc;
  104.71 +}
  104.72  
  104.73  static inline int get_page_and_type(struct page_info *page,
  104.74                                      struct domain *domain,
   105.1 --- a/xen/include/asm-x86/msr-index.h	Fri Sep 12 14:32:45 2008 +0900
   105.2 +++ b/xen/include/asm-x86/msr-index.h	Fri Sep 12 14:47:40 2008 +0900
   105.3 @@ -194,11 +194,23 @@
   105.4  #define _K8_VMCR_SVME_DISABLE		4
   105.5  #define K8_VMCR_SVME_DISABLE		(1 << _K8_VMCR_SVME_DISABLE)
   105.6  
   105.7 +/* AMD64 MSRs */
   105.8 +#define MSR_AMD64_NB_CFG		0xc001001f
   105.9 +#define AMD64_NB_CFG_CF8_EXT_ENABLE_BIT	46
  105.10 +
  105.11  /* AMD Family10h machine check MSRs */
  105.12  #define MSR_F10_MC4_MISC1		0xc0000408
  105.13  #define MSR_F10_MC4_MISC2		0xc0000409
  105.14  #define MSR_F10_MC4_MISC3		0xc000040A
  105.15  
  105.16 +/* Other AMD Fam10h MSRs */
  105.17 +#define MSR_FAM10H_MMIO_CONF_BASE	0xc0010058
  105.18 +#define FAM10H_MMIO_CONF_ENABLE_BIT	0
  105.19 +#define FAM10H_MMIO_CONF_BUSRANGE_MASK	0xf
  105.20 +#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
  105.21 +#define FAM10H_MMIO_CONF_BASE_MASK	0xfffffff
  105.22 +#define FAM10H_MMIO_CONF_BASE_SHIFT	20
  105.23 +
  105.24  /* K6 MSRs */
  105.25  #define MSR_K6_EFER			0xc0000080
  105.26  #define MSR_K6_STAR			0xc0000081
   106.1 --- a/xen/include/asm-x86/shadow.h	Fri Sep 12 14:32:45 2008 +0900
   106.2 +++ b/xen/include/asm-x86/shadow.h	Fri Sep 12 14:47:40 2008 +0900
   106.3 @@ -115,8 +115,6 @@ static inline void shadow_remove_all_sha
   106.4      sh_remove_shadows(v, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
   106.5  }
   106.6  
   106.7 -#define guest_physmap_max_mem_pages(d, n) (0)
   106.8 -
   106.9  #endif /* _XEN_SHADOW_H */
  106.10  
  106.11  /*
   107.1 --- a/xen/include/public/trace.h	Fri Sep 12 14:32:45 2008 +0900
   107.2 +++ b/xen/include/public/trace.h	Fri Sep 12 14:47:40 2008 +0900
   107.3 @@ -37,6 +37,7 @@
   107.4  #define TRC_HVM      0x0008f000    /* Xen HVM trace            */
   107.5  #define TRC_MEM      0x0010f000    /* Xen memory trace         */
   107.6  #define TRC_PV       0x0020f000    /* Xen PV traces            */
   107.7 +#define TRC_SHADOW   0x0040f000    /* Xen shadow tracing       */
   107.8  #define TRC_ALL      0x0ffff000
   107.9  #define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
  107.10  #define TRC_HD_CYCLE_FLAG (1UL<<31)
  107.11 @@ -50,26 +51,30 @@
  107.12  #define TRC_HVM_ENTRYEXIT 0x00081000   /* VMENTRY and #VMEXIT       */
  107.13  #define TRC_HVM_HANDLER   0x00082000   /* various HVM handlers      */
  107.14  
  107.15 +#define TRC_SCHED_MIN       0x00021000   /* Just runstate changes */
  107.16 +#define TRC_SCHED_VERBOSE   0x00028000   /* More inclusive scheduling */
  107.17 +
  107.18  /* Trace events per class */
  107.19  #define TRC_LOST_RECORDS        (TRC_GEN + 1)
  107.20  #define TRC_TRACE_WRAP_BUFFER  (TRC_GEN + 2)
  107.21  #define TRC_TRACE_CPU_CHANGE    (TRC_GEN + 3)
  107.22  
  107.23 -#define TRC_SCHED_DOM_ADD       (TRC_SCHED +  1)
  107.24 -#define TRC_SCHED_DOM_REM       (TRC_SCHED +  2)
  107.25 -#define TRC_SCHED_SLEEP         (TRC_SCHED +  3)
  107.26 -#define TRC_SCHED_WAKE          (TRC_SCHED +  4)
  107.27 -#define TRC_SCHED_YIELD         (TRC_SCHED +  5)
  107.28 -#define TRC_SCHED_BLOCK         (TRC_SCHED +  6)
  107.29 -#define TRC_SCHED_SHUTDOWN      (TRC_SCHED +  7)
  107.30 -#define TRC_SCHED_CTL           (TRC_SCHED +  8)
  107.31 -#define TRC_SCHED_ADJDOM        (TRC_SCHED +  9)
  107.32 -#define TRC_SCHED_SWITCH        (TRC_SCHED + 10)
  107.33 -#define TRC_SCHED_S_TIMER_FN    (TRC_SCHED + 11)
  107.34 -#define TRC_SCHED_T_TIMER_FN    (TRC_SCHED + 12)
  107.35 -#define TRC_SCHED_DOM_TIMER_FN  (TRC_SCHED + 13)
  107.36 -#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED + 14)
  107.37 -#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED + 15)
  107.38 +#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1)
  107.39 +#define TRC_SCHED_DOM_ADD        (TRC_SCHED_VERBOSE +  1)
  107.40 +#define TRC_SCHED_DOM_REM        (TRC_SCHED_VERBOSE +  2)
  107.41 +#define TRC_SCHED_SLEEP          (TRC_SCHED_VERBOSE +  3)
  107.42 +#define TRC_SCHED_WAKE           (TRC_SCHED_VERBOSE +  4)
  107.43 +#define TRC_SCHED_YIELD          (TRC_SCHED_VERBOSE +  5)
  107.44 +#define TRC_SCHED_BLOCK          (TRC_SCHED_VERBOSE +  6)
  107.45 +#define TRC_SCHED_SHUTDOWN       (TRC_SCHED_VERBOSE +  7)
  107.46 +#define TRC_SCHED_CTL            (TRC_SCHED_VERBOSE +  8)
  107.47 +#define TRC_SCHED_ADJDOM         (TRC_SCHED_VERBOSE +  9)
  107.48 +#define TRC_SCHED_SWITCH         (TRC_SCHED_VERBOSE + 10)
  107.49 +#define TRC_SCHED_S_TIMER_FN     (TRC_SCHED_VERBOSE + 11)
  107.50 +#define TRC_SCHED_T_TIMER_FN     (TRC_SCHED_VERBOSE + 12)
  107.51 +#define TRC_SCHED_DOM_TIMER_FN   (TRC_SCHED_VERBOSE + 13)
  107.52 +#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14)
  107.53 +#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15)
  107.54  
  107.55  #define TRC_MEM_PAGE_GRANT_MAP      (TRC_MEM + 1)
  107.56  #define TRC_MEM_PAGE_GRANT_UNMAP    (TRC_MEM + 2)
  107.57 @@ -89,6 +94,22 @@
  107.58    /* Indicates that addresses in trace record are 64 bits */
  107.59  #define TRC_64_FLAG               (0x100) 
  107.60  
  107.61 +#define TRC_SHADOW_NOT_SHADOW                 (TRC_SHADOW +  1)
  107.62 +#define TRC_SHADOW_FAST_PROPAGATE             (TRC_SHADOW +  2)
  107.63 +#define TRC_SHADOW_FAST_MMIO                  (TRC_SHADOW +  3)
  107.64 +#define TRC_SHADOW_FALSE_FAST_PATH            (TRC_SHADOW +  4)
  107.65 +#define TRC_SHADOW_MMIO                       (TRC_SHADOW +  5)
  107.66 +#define TRC_SHADOW_FIXUP                      (TRC_SHADOW +  6)
  107.67 +#define TRC_SHADOW_DOMF_DYING                 (TRC_SHADOW +  7)
  107.68 +#define TRC_SHADOW_EMULATE                    (TRC_SHADOW +  8)
  107.69 +#define TRC_SHADOW_EMULATE_UNSHADOW_USER      (TRC_SHADOW +  9)
  107.70 +#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ    (TRC_SHADOW + 10)
  107.71 +#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11)
  107.72 +#define TRC_SHADOW_WRMAP_BF                   (TRC_SHADOW + 12)
  107.73 +#define TRC_SHADOW_PREALLOC_UNPIN             (TRC_SHADOW + 13)
  107.74 +#define TRC_SHADOW_RESYNC_FULL                (TRC_SHADOW + 14)
  107.75 +#define TRC_SHADOW_RESYNC_ONLY                (TRC_SHADOW + 15)
  107.76 +
  107.77  /* trace events per subclass */
  107.78  #define TRC_HVM_VMENTRY         (TRC_HVM_ENTRYEXIT + 0x01)
  107.79  #define TRC_HVM_VMEXIT          (TRC_HVM_ENTRYEXIT + 0x02)
   108.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   108.2 +++ b/xen/include/xen/cpuidle.h	Fri Sep 12 14:47:40 2008 +0900
   108.3 @@ -0,0 +1,82 @@
   108.4 +/*
   108.5 + * cpuidle.h - xen idle state module derived from Linux 
   108.6 + *
   108.7 + * (C) 2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
   108.8 + *          Shaohua Li <shaohua.li@intel.com>
   108.9 + *          Adam Belay <abelay@novell.com>
  108.10 + *  Copyright (C) 2008 Intel Corporation
  108.11 + *
  108.12 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  108.13 + *
  108.14 + *  This program is free software; you can redistribute it and/or modify
  108.15 + *  it under the terms of the GNU General Public License as published by
  108.16 + *  the Free Software Foundation; either version 2 of the License, or (at
  108.17 + *  your option) any later version.
  108.18 + *
  108.19 + *  This program is distributed in the hope that it will be useful, but
  108.20 + *  WITHOUT ANY WARRANTY; without even the implied warranty of
  108.21 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  108.22 + *  General Public License for more details.
  108.23 + *
  108.24 + *  You should have received a copy of the GNU General Public License along
  108.25 + *  with this program; if not, write to the Free Software Foundation, Inc.,
  108.26 + *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
  108.27 + *
  108.28 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  108.29 + */
  108.30 +#ifndef _XEN_CPUIDLE_H
  108.31 +#define _XEN_CPUIDLE_H
  108.32 +
  108.33 +#define ACPI_PROCESSOR_MAX_POWER        8
  108.34 +#define CPUIDLE_NAME_LEN                16
  108.35 +
  108.36 +struct acpi_processor_cx
  108.37 +{
  108.38 +    u8 valid;
  108.39 +    u8 type;
  108.40 +    u32 address;
  108.41 +    u8 space_id;
  108.42 +    u32 latency;
  108.43 +    u32 latency_ticks;
  108.44 +    u32 power;
  108.45 +    u32 usage;
  108.46 +    u64 time;
  108.47 +    u32 target_residency;
  108.48 +};
  108.49 +
  108.50 +struct acpi_processor_flags
  108.51 +{
  108.52 +    u8 bm_control:1;
  108.53 +    u8 bm_check:1;
  108.54 +    u8 has_cst:1;
  108.55 +    u8 power_setup_done:1;
  108.56 +    u8 bm_rld_set:1;
  108.57 +};
  108.58 +
  108.59 +struct acpi_processor_power
  108.60 +{
  108.61 +    unsigned int cpu;
  108.62 +    struct acpi_processor_flags flags;
  108.63 +    struct acpi_processor_cx *last_state;
  108.64 +    struct acpi_processor_cx *safe_state;
  108.65 +    u32 last_residency;
  108.66 +    void *gdata; /* governor specific data */
  108.67 +    u32 count;
  108.68 +    struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
  108.69 +};
  108.70 +
  108.71 +struct cpuidle_governor
  108.72 +{
  108.73 +    char                    name[CPUIDLE_NAME_LEN];
  108.74 +    unsigned int            rating;
  108.75 +
  108.76 +    int  (*enable)          (struct acpi_processor_power *dev);
  108.77 +    void (*disable)         (struct acpi_processor_power *dev);
  108.78 +
  108.79 +    int  (*select)          (struct acpi_processor_power *dev);
  108.80 +    void (*reflect)         (struct acpi_processor_power *dev);
  108.81 +};
  108.82 +
  108.83 +extern struct cpuidle_governor *cpuidle_current_governor;
  108.84 +
  108.85 +#endif /* _XEN_CPUIDLE_H */
   109.1 --- a/xen/include/xen/iommu.h	Fri Sep 12 14:32:45 2008 +0900
   109.2 +++ b/xen/include/xen/iommu.h	Fri Sep 12 14:47:40 2008 +0900
   109.3 @@ -31,6 +31,7 @@ extern int vtd_enabled;
   109.4  extern int iommu_enabled;
   109.5  extern int iommu_pv_enabled;
   109.6  extern int force_iommu;
   109.7 +extern int iommu_passthrough;
   109.8  
   109.9  #define domain_hvm_iommu(d)     (&d->arch.hvm_domain.hvm_iommu)
  109.10  
   110.1 --- a/xen/include/xen/sched.h	Fri Sep 12 14:32:45 2008 +0900
   110.2 +++ b/xen/include/xen/sched.h	Fri Sep 12 14:47:40 2008 +0900
   110.3 @@ -106,8 +106,6 @@ struct vcpu
   110.4      bool_t           fpu_initialised;
   110.5      /* Has the FPU been used since it was last saved? */
   110.6      bool_t           fpu_dirtied;
   110.7 -    /* Is this VCPU polling any event channels (SCHEDOP_poll)? */
   110.8 -    bool_t           is_polling;
   110.9      /* Initialization completed for this VCPU? */
  110.10      bool_t           is_initialised;
  110.11      /* Currently running on a CPU? */
  110.12 @@ -134,6 +132,13 @@ struct vcpu
  110.13      /* VCPU affinity is temporarily locked from controller changes? */
  110.14      bool_t           affinity_locked;
  110.15  
  110.16 +    /*
  110.17 +     * > 0: a single port is being polled;
  110.18 +     * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
  110.19 +     * < 0: multiple ports may be being polled.
  110.20 +     */
  110.21 +    int              poll_evtchn;
  110.22 +
  110.23      unsigned long    pause_flags;
  110.24      atomic_t         pause_count;
  110.25  
  110.26 @@ -209,8 +214,6 @@ struct domain
  110.27      struct domain   *target;
  110.28      /* Is this guest being debugged by dom0? */
  110.29      bool_t           debugger_attached;
  110.30 -    /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
  110.31 -    bool_t           is_polling;
  110.32      /* Is this guest dying (i.e., a zombie)? */
  110.33      enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
  110.34      /* Domain is paused by controller software? */
  110.35 @@ -218,6 +221,9 @@ struct domain
  110.36      /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
  110.37      bool_t           is_pinned;
  110.38  
  110.39 +    /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
  110.40 +    DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
  110.41 +
  110.42      /* Guest has shut down (inc. reason code)? */
  110.43      spinlock_t       shutdown_lock;
  110.44      bool_t           is_shutting_down; /* in process of shutting down? */
  110.45 @@ -507,6 +513,7 @@ static inline int vcpu_runnable(struct v
  110.46               atomic_read(&v->domain->pause_count));
  110.47  }
  110.48  
  110.49 +void vcpu_unblock(struct vcpu *v);
  110.50  void vcpu_pause(struct vcpu *v);
  110.51  void vcpu_pause_nosync(struct vcpu *v);
  110.52  void domain_pause(struct domain *d);
  110.53 @@ -517,18 +524,13 @@ void domain_unpause_by_systemcontroller(
  110.54  void cpu_init(void);
  110.55  
  110.56  void vcpu_force_reschedule(struct vcpu *v);
  110.57 +void cpu_disable_scheduler(void);
  110.58  int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
  110.59  int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
  110.60  void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
  110.61  
  110.62  void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
  110.63  
  110.64 -static inline void vcpu_unblock(struct vcpu *v)
  110.65 -{
  110.66 -    if ( test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
  110.67 -        vcpu_wake(v);
  110.68 -}
  110.69 -
  110.70  #define IS_PRIV(_d) ((_d)->is_privileged)
  110.71  #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t)))
  110.72  
   111.1 --- a/xen/include/xen/trace.h	Fri Sep 12 14:32:45 2008 +0900
   111.2 +++ b/xen/include/xen/trace.h	Fri Sep 12 14:47:40 2008 +0900
   111.3 @@ -34,6 +34,8 @@ void init_trace_bufs(void);
   111.4  /* used to retrieve the physical address of the trace buffers */
   111.5  int tb_control(struct xen_sysctl_tbuf_op *tbc);
   111.6  
   111.7 +int trace_will_trace_event(u32 event);
   111.8 +
   111.9  void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
  111.10  
  111.11  static inline void trace_var(u32 event, int cycles, int extra,
   112.1 --- a/xen/include/xsm/xsm.h	Fri Sep 12 14:32:45 2008 +0900
   112.2 +++ b/xen/include/xsm/xsm.h	Fri Sep 12 14:47:40 2008 +0900
   112.3 @@ -64,16 +64,17 @@ struct xsm_operations {
   112.4      int (*getvcpucontext) (struct domain *d);
   112.5      int (*getvcpuinfo) (struct domain *d);
   112.6      int (*domain_settime) (struct domain *d);
   112.7 +    int (*set_target) (struct domain *d, struct domain *e);
   112.8      int (*tbufcontrol) (void);
   112.9      int (*readconsole) (uint32_t clear);
  112.10      int (*sched_id) (void);
  112.11      int (*setdomainmaxmem) (struct domain *d);
  112.12      int (*setdomainhandle) (struct domain *d);
  112.13      int (*setdebugging) (struct domain *d);
  112.14 -    int (*irq_permission) (struct domain *d, uint8_t pirq, uint8_t access);
  112.15 -    int (*iomem_permission) (struct domain *d, unsigned long mfn, 
  112.16 -                                                                uint8_t access);
  112.17      int (*perfcontrol) (void);
  112.18 +    int (*debug_keys) (void);
  112.19 +    int (*getcpuinfo) (void);
  112.20 +    int (*availheap) (void);
  112.21  
  112.22      int (*evtchn_unbound) (struct domain *d, struct evtchn *chn, domid_t id2);
  112.23      int (*evtchn_interdomain) (struct domain *d1, struct evtchn *chn1,
  112.24 @@ -106,13 +107,13 @@ struct xsm_operations {
  112.25  
  112.26      int (*kexec) (void);
  112.27      int (*schedop_shutdown) (struct domain *d1, struct domain *d2);
  112.28 +    int (*add_range) (struct domain *d, char *name, unsigned long s, unsigned long e);
  112.29 +    int (*remove_range) (struct domain *d, char *name, unsigned long s, unsigned long e);
  112.30  
  112.31      long (*__do_xsm_op) (XEN_GUEST_HANDLE(xsm_op_t) op);
  112.32  
  112.33  #ifdef CONFIG_X86
  112.34      int (*shadow_control) (struct domain *d, uint32_t op);
  112.35 -    int (*ioport_permission) (struct domain *d, uint32_t ioport, 
  112.36 -                                                                uint8_t access);
  112.37      int (*getpageframeinfo) (struct page_info *page);
  112.38      int (*getmemlist) (struct domain *d);
  112.39      int (*hypercall_init) (struct domain *d);
  112.40 @@ -130,13 +131,26 @@ struct xsm_operations {
  112.41      int (*microcode) (void);
  112.42      int (*physinfo) (void);
  112.43      int (*platform_quirk) (uint32_t);
  112.44 +    int (*firmware_info) (void);
  112.45 +    int (*acpi_sleep) (void);
  112.46 +    int (*change_freq) (void);
  112.47 +    int (*getidletime) (void);
  112.48      int (*machine_memory_map) (void);
  112.49      int (*domain_memory_map) (struct domain *d);
  112.50 -    int (*mmu_normal_update) (struct domain *d, intpte_t fpte);
  112.51 +    int (*mmu_normal_update) (struct domain *d, struct domain *f, 
  112.52 +                                                                intpte_t fpte);
  112.53      int (*mmu_machphys_update) (struct domain *d, unsigned long mfn);
  112.54 -    int (*update_va_mapping) (struct domain *d, l1_pgentry_t pte);
  112.55 +    int (*update_va_mapping) (struct domain *d, struct domain *f, 
  112.56 +                                                            l1_pgentry_t pte);
  112.57      int (*add_to_physmap) (struct domain *d1, struct domain *d2);
  112.58      int (*remove_from_physmap) (struct domain *d1, struct domain *d2);
  112.59 +    int (*sendtrigger) (struct domain *d);
  112.60 +    int (*test_assign_device) (uint32_t machine_bdf);
  112.61 +    int (*assign_device) (struct domain *d, uint32_t machine_bdf);
  112.62 +    int (*deassign_device) (struct domain *d, uint32_t machine_bdf);
  112.63 +    int (*bind_pt_irq) (struct domain *d, struct xen_domctl_bind_pt_irq *bind);
  112.64 +    int (*pin_mem_cacheattr) (struct domain *d);
  112.65 +    int (*ext_vcpucontext) (struct domain *d, uint32_t cmd);
  112.66  #endif
  112.67  };
  112.68  
  112.69 @@ -215,6 +229,11 @@ static inline int xsm_domain_settime (st
  112.70      return xsm_call(domain_settime(d));
  112.71  }
  112.72  
  112.73 +static inline int xsm_set_target (struct domain *d, struct domain *e)
  112.74 +{
  112.75 +    return xsm_call(set_target(d, e));
  112.76 +}
  112.77 +
  112.78  static inline int xsm_tbufcontrol (void)
  112.79  {
  112.80      return xsm_call(tbufcontrol());
  112.81 @@ -245,23 +264,26 @@ static inline int xsm_setdebugging (stru
  112.82      return xsm_call(setdebugging(d));
  112.83  }
  112.84  
  112.85 -static inline int xsm_irq_permission (struct domain *d, uint8_t pirq,
  112.86 -                                                                uint8_t access)
  112.87 -{
  112.88 -    return xsm_call(irq_permission(d, pirq, access));
  112.89 -} 
  112.90 -
  112.91 -static inline int xsm_iomem_permission (struct domain *d, unsigned long mfn,
  112.92 -                                                                uint8_t access)
  112.93 -{
  112.94 -    return xsm_call(iomem_permission(d, mfn, access));
  112.95 -}
  112.96 -
  112.97  static inline int xsm_perfcontrol (void)
  112.98  {
  112.99      return xsm_call(perfcontrol());
 112.100  }
 112.101  
 112.102 +static inline int xsm_debug_keys (void)
 112.103 +{
 112.104 +    return xsm_call(debug_keys());
 112.105 +}
 112.106 +
 112.107 +static inline int xsm_availheap (void)
 112.108 +{
 112.109 +    return xsm_call(availheap());
 112.110 +}
 112.111 +
 112.112 +static inline int xsm_getcpuinfo (void)
 112.113 +{
 112.114 +    return xsm_call(getcpuinfo());
 112.115 +}
 112.116 +
 112.117  static inline int xsm_evtchn_unbound (struct domain *d1, struct evtchn *chn,
 112.118                                                                      domid_t id2)
 112.119  {
 112.120 @@ -387,6 +409,18 @@ static inline int xsm_schedop_shutdown (
 112.121      return xsm_call(schedop_shutdown(d1, d2));
 112.122  }
 112.123  
 112.124 +static inline int xsm_add_range (struct domain *d, char *name, unsigned long s,
 112.125 +                                                                        unsigned long e)
 112.126 +{
 112.127 +    return xsm_call(add_range(d, name, s, e));
 112.128 +}
 112.129 + 
 112.130 +static inline int xsm_remove_range (struct domain *d, char *name, unsigned long s,
 112.131 +                                                                        unsigned long e)
 112.132 +{
 112.133 +    return xsm_call(remove_range(d, name, s, e));
 112.134 +}
 112.135 +
 112.136  static inline long __do_xsm_op (XEN_GUEST_HANDLE(xsm_op_t) op)
 112.137  {
 112.138      return xsm_call(__do_xsm_op(op));
 112.139 @@ -413,12 +447,6 @@ static inline int xsm_shadow_control (st
 112.140      return xsm_call(shadow_control(d, op));
 112.141  }
 112.142  
 112.143 -static inline int xsm_ioport_permission (struct domain *d, uint32_t ioport,
 112.144 -                                                                uint8_t access)
 112.145 -{
 112.146 -    return xsm_call(ioport_permission(d, ioport, access));
 112.147 -}
 112.148 -
 112.149  static inline int xsm_getpageframeinfo (struct page_info *page)
 112.150  {
 112.151      return xsm_call(getpageframeinfo(page));
 112.152 @@ -504,6 +532,26 @@ static inline int xsm_platform_quirk (ui
 112.153      return xsm_call(platform_quirk(quirk));
 112.154  }
 112.155  
 112.156 +static inline int xsm_firmware_info (void)
 112.157 +{
 112.158 +    return xsm_call(firmware_info());
 112.159 +}
 112.160 +
 112.161 +static inline int xsm_acpi_sleep (void)
 112.162 +{
 112.163 +    return xsm_call(acpi_sleep());
 112.164 +}
 112.165 +
 112.166 +static inline int xsm_change_freq (void)
 112.167 +{
 112.168 +    return xsm_call(change_freq());
 112.169 +}
 112.170 +
 112.171 +static inline int xsm_getidletime (void)
 112.172 +{
 112.173 +    return xsm_call(getidletime());
 112.174 +}
 112.175 +
 112.176  static inline int xsm_machine_memory_map(void)
 112.177  {
 112.178      return xsm_call(machine_memory_map());
 112.179 @@ -514,9 +562,10 @@ static inline int xsm_domain_memory_map(
 112.180      return xsm_call(domain_memory_map(d));
 112.181  }
 112.182  
 112.183 -static inline int xsm_mmu_normal_update (struct domain *d, intpte_t fpte)
 112.184 +static inline int xsm_mmu_normal_update (struct domain *d, struct domain *f, 
 112.185 +                                                                intpte_t fpte)
 112.186  {
 112.187 -    return xsm_call(mmu_normal_update(d, fpte));
 112.188 +    return xsm_call(mmu_normal_update(d, f, fpte));
 112.189  }
 112.190  
 112.191  static inline int xsm_mmu_machphys_update (struct domain *d, unsigned long mfn)
 112.192 @@ -524,9 +573,10 @@ static inline int xsm_mmu_machphys_updat
 112.193      return xsm_call(mmu_machphys_update(d, mfn));
 112.194  }
 112.195  
 112.196 -static inline int xsm_update_va_mapping(struct domain *d, l1_pgentry_t pte)
 112.197 +static inline int xsm_update_va_mapping(struct domain *d, struct domain *f, 
 112.198 +                                                            l1_pgentry_t pte)
 112.199  {
 112.200 -    return xsm_call(update_va_mapping(d, pte));
 112.201 +    return xsm_call(update_va_mapping(d, f, pte));
 112.202  }
 112.203  
 112.204  static inline int xsm_add_to_physmap(struct domain *d1, struct domain *d2)
 112.205 @@ -538,6 +588,42 @@ static inline int xsm_remove_from_physma
 112.206  {
 112.207      return xsm_call(remove_from_physmap(d1, d2));
 112.208  }
 112.209 +
 112.210 +static inline int xsm_sendtrigger(struct domain *d)
 112.211 +{
 112.212 +    return xsm_call(sendtrigger(d));
 112.213 +}
 112.214 +
 112.215 +static inline int xsm_test_assign_device(uint32_t machine_bdf)
 112.216 +{
 112.217 +    return xsm_call(test_assign_device(machine_bdf));
 112.218 +}
 112.219 +
 112.220 +static inline int xsm_assign_device(struct domain *d, uint32_t machine_bdf)
 112.221 +{
 112.222 +    return xsm_call(assign_device(d, machine_bdf));
 112.223 +}
 112.224 +
 112.225 +static inline int xsm_deassign_device(struct domain *d, uint32_t machine_bdf)
 112.226 +{
 112.227 +    return xsm_call(deassign_device(d, machine_bdf));
 112.228 +}
 112.229 +
 112.230 +static inline int xsm_bind_pt_irq(struct domain *d, 
 112.231 +                                                struct xen_domctl_bind_pt_irq *bind)
 112.232 +{
 112.233 +    return xsm_call(bind_pt_irq(d, bind));
 112.234 +}
 112.235 +
 112.236 +static inline int xsm_pin_mem_cacheattr(struct domain *d)
 112.237 +{
 112.238 +    return xsm_call(pin_mem_cacheattr(d));
 112.239 +}
 112.240 +
 112.241 +static inline int xsm_ext_vcpucontext(struct domain *d, uint32_t cmd)
 112.242 +{
 112.243 +    return xsm_call(ext_vcpucontext(d, cmd));
 112.244 +}
 112.245  #endif /* CONFIG_X86 */
 112.246  
 112.247  #endif /* __XSM_H */
   113.1 --- a/xen/xsm/dummy.c	Fri Sep 12 14:32:45 2008 +0900
   113.2 +++ b/xen/xsm/dummy.c	Fri Sep 12 14:47:40 2008 +0900
   113.3 @@ -84,6 +84,11 @@ static int dummy_domain_settime (struct 
   113.4      return 0;
   113.5  }
   113.6  
   113.7 +static int dummy_set_target (struct domain *d, struct domain *e)
   113.8 +{
   113.9 +    return 0;
  113.10 +}
  113.11 +
  113.12  static int dummy_tbufcontrol (void)
  113.13  {
  113.14      return 0;
  113.15 @@ -114,18 +119,22 @@ static int dummy_setdebugging (struct do
  113.16      return 0;
  113.17  }
  113.18  
  113.19 -static int dummy_irq_permission (struct domain *d, uint8_t pirq, uint8_t access)
  113.20 +static int dummy_perfcontrol (void)
  113.21  {
  113.22      return 0;
  113.23  }
  113.24  
  113.25 -static int dummy_iomem_permission (struct domain *d, unsigned long mfn,
  113.26 -                                                                uint8_t access)
  113.27 +static int dummy_debug_keys (void)
  113.28  {
  113.29      return 0;
  113.30  }
  113.31  
  113.32 -static int dummy_perfcontrol (void)
  113.33 +static int dummy_getcpuinfo (void)
  113.34 +{
  113.35 +    return 0;
  113.36 +}
  113.37 +
  113.38 +static int dummy_availheap (void)
  113.39  {
  113.40      return 0;
  113.41  }
  113.42 @@ -259,14 +268,19 @@ static long dummy___do_xsm_op(XEN_GUEST_
  113.43      return -ENOSYS;
  113.44  }
  113.45  
  113.46 -#ifdef CONFIG_X86
  113.47 -static int dummy_shadow_control (struct domain *d, uint32_t op)
  113.48 +static int dummy_add_range (struct domain *d, char *name, unsigned long s, unsigned long e)
  113.49  {
  113.50      return 0;
  113.51  }
  113.52  
  113.53 -static int dummy_ioport_permission (struct domain *d, uint32_t ioport, 
  113.54 -                                                                uint8_t access)
  113.55 +static int dummy_remove_range (struct domain *d, char *name, unsigned long s, 
  113.56 +                                                                        unsigned long e)
  113.57 +{
  113.58 +    return 0;
  113.59 +}
  113.60 +
  113.61 +#ifdef CONFIG_X86
  113.62 +static int dummy_shadow_control (struct domain *d, uint32_t op)
  113.63  {
  113.64      return 0;
  113.65  }
  113.66 @@ -356,6 +370,26 @@ static int dummy_platform_quirk (uint32_
  113.67      return 0;
  113.68  }
  113.69  
  113.70 +static int dummy_firmware_info (void)
  113.71 +{
  113.72 +    return 0;
  113.73 +}
  113.74 +
  113.75 +static int dummy_acpi_sleep (void)
  113.76 +{
  113.77 +    return 0;
  113.78 +}
  113.79 +
  113.80 +static int dummy_change_freq (void)
  113.81 +{
  113.82 +    return 0;
  113.83 +}
  113.84 +
  113.85 +static int dummy_getidletime (void)
  113.86 +{
  113.87 +    return 0;
  113.88 +}
  113.89 +
  113.90  static int dummy_machine_memory_map (void)
  113.91  {
  113.92      return 0;
  113.93 @@ -366,7 +400,8 @@ static int dummy_domain_memory_map (stru
  113.94      return 0;
  113.95  }
  113.96  
  113.97 -static int dummy_mmu_normal_update (struct domain *d, intpte_t fpte)
  113.98 +static int dummy_mmu_normal_update (struct domain *d, struct domain *f, 
  113.99 +                                                                intpte_t fpte)
 113.100  {
 113.101      return 0;
 113.102  }
 113.103 @@ -376,7 +411,8 @@ static int dummy_mmu_machphys_update (st
 113.104      return 0;
 113.105  }
 113.106  
 113.107 -static int dummy_update_va_mapping (struct domain *d, l1_pgentry_t pte)
 113.108 +static int dummy_update_va_mapping (struct domain *d, struct domain *f, 
 113.109 +                                                            l1_pgentry_t pte)
 113.110  {
 113.111      return 0;
 113.112  }
 113.113 @@ -386,6 +422,41 @@ static int dummy_add_to_physmap (struct 
 113.114      return 0;
 113.115  }
 113.116  
 113.117 +static int dummy_sendtrigger (struct domain *d)
 113.118 +{
 113.119 +    return 0;
 113.120 +}
 113.121 +
 113.122 +static int dummy_test_assign_device (uint32_t machine_bdf)
 113.123 +{
 113.124 +    return 0;
 113.125 +}
 113.126 +
 113.127 +static int dummy_assign_device (struct domain *d, uint32_t machine_bdf)
 113.128 +{
 113.129 +    return 0;
 113.130 +}
 113.131 +
 113.132 +static int dummy_deassign_device (struct domain *d, uint32_t machine_bdf)
 113.133 +{
 113.134 +    return 0;
 113.135 +}
 113.136 +
 113.137 +static int dummy_bind_pt_irq (struct domain *d, struct xen_domctl_bind_pt_irq *bind)
 113.138 +{
 113.139 +    return 0;
 113.140 +}
 113.141 +
 113.142 +static int dummy_pin_mem_cacheattr (struct domain *d)
 113.143 +{
 113.144 +    return 0;
 113.145 +}
 113.146 +
 113.147 +static int dummy_ext_vcpucontext (struct domain *d, uint32_t cmd)
 113.148 +{
 113.149 +    return 0;
 113.150 +}
 113.151 +
 113.152  static int dummy_remove_from_physmap (struct domain *d1, struct domain *d2)
 113.153  {
 113.154      return 0;
 113.155 @@ -420,15 +491,17 @@ void xsm_fixup_ops (struct xsm_operation
 113.156      set_to_dummy_if_null(ops, getvcpucontext);
 113.157      set_to_dummy_if_null(ops, getvcpuinfo);
 113.158      set_to_dummy_if_null(ops, domain_settime);
 113.159 +    set_to_dummy_if_null(ops, set_target);
 113.160      set_to_dummy_if_null(ops, tbufcontrol);
 113.161      set_to_dummy_if_null(ops, readconsole);
 113.162      set_to_dummy_if_null(ops, sched_id);
 113.163      set_to_dummy_if_null(ops, setdomainmaxmem);
 113.164      set_to_dummy_if_null(ops, setdomainhandle);
 113.165      set_to_dummy_if_null(ops, setdebugging);
 113.166 -    set_to_dummy_if_null(ops, irq_permission);
 113.167 -    set_to_dummy_if_null(ops, iomem_permission);
 113.168      set_to_dummy_if_null(ops, perfcontrol);
 113.169 +    set_to_dummy_if_null(ops, debug_keys);
 113.170 +    set_to_dummy_if_null(ops, getcpuinfo);
 113.171 +    set_to_dummy_if_null(ops, availheap);
 113.172  
 113.173      set_to_dummy_if_null(ops, evtchn_unbound);
 113.174      set_to_dummy_if_null(ops, evtchn_interdomain);
 113.175 @@ -461,11 +534,13 @@ void xsm_fixup_ops (struct xsm_operation
 113.176      set_to_dummy_if_null(ops, kexec);
 113.177      set_to_dummy_if_null(ops, schedop_shutdown);
 113.178  
 113.179 +    set_to_dummy_if_null(ops, add_range);
 113.180 +    set_to_dummy_if_null(ops, remove_range);
 113.181 +
 113.182      set_to_dummy_if_null(ops, __do_xsm_op);
 113.183  
 113.184  #ifdef CONFIG_X86
 113.185      set_to_dummy_if_null(ops, shadow_control);
 113.186 -    set_to_dummy_if_null(ops, ioport_permission);
 113.187      set_to_dummy_if_null(ops, getpageframeinfo);
 113.188      set_to_dummy_if_null(ops, getmemlist);
 113.189      set_to_dummy_if_null(ops, hypercall_init);
 113.190 @@ -483,6 +558,10 @@ void xsm_fixup_ops (struct xsm_operation
 113.191      set_to_dummy_if_null(ops, microcode);
 113.192      set_to_dummy_if_null(ops, physinfo);
 113.193      set_to_dummy_if_null(ops, platform_quirk);
 113.194 +    set_to_dummy_if_null(ops, firmware_info);
 113.195 +    set_to_dummy_if_null(ops, acpi_sleep);
 113.196 +    set_to_dummy_if_null(ops, change_freq);
 113.197 +    set_to_dummy_if_null(ops, getidletime);
 113.198      set_to_dummy_if_null(ops, machine_memory_map);
 113.199      set_to_dummy_if_null(ops, domain_memory_map);
 113.200      set_to_dummy_if_null(ops, mmu_normal_update);
 113.201 @@ -490,5 +569,12 @@ void xsm_fixup_ops (struct xsm_operation
 113.202      set_to_dummy_if_null(ops, update_va_mapping);
 113.203      set_to_dummy_if_null(ops, add_to_physmap);
 113.204      set_to_dummy_if_null(ops, remove_from_physmap);
 113.205 +    set_to_dummy_if_null(ops, sendtrigger);
 113.206 +    set_to_dummy_if_null(ops, test_assign_device);
 113.207 +    set_to_dummy_if_null(ops, assign_device);
 113.208 +    set_to_dummy_if_null(ops, deassign_device);
 113.209 +    set_to_dummy_if_null(ops, bind_pt_irq);
 113.210 +    set_to_dummy_if_null(ops, pin_mem_cacheattr);
 113.211 +    set_to_dummy_if_null(ops, ext_vcpucontext);
 113.212  #endif
 113.213  }
   114.1 --- a/xen/xsm/flask/hooks.c	Fri Sep 12 14:32:45 2008 +0900
   114.2 +++ b/xen/xsm/flask/hooks.c	Fri Sep 12 14:47:40 2008 +0900
   114.3 @@ -11,6 +11,7 @@
   114.4  #include <xen/init.h>
   114.5  #include <xen/lib.h>
   114.6  #include <xen/sched.h>
   114.7 +#include <xen/paging.h>
   114.8  #include <xen/xmalloc.h>
   114.9  #include <xsm/xsm.h>
  114.10  #include <xen/spinlock.h>
  114.11 @@ -129,8 +130,7 @@ static int flask_evtchn_unbound(struct d
  114.12      if ( rc )
  114.13          goto out;
  114.14  
  114.15 -    rc = avc_has_perm(dsec->sid, newsid, SECCLASS_EVENT,
  114.16 -                                            EVENT__CREATE|EVENT__ALLOC, NULL);
  114.17 +    rc = avc_has_perm(dsec->sid, newsid, SECCLASS_EVENT, EVENT__CREATE, NULL);
  114.18      if ( rc )
  114.19          goto out;
  114.20  
  114.21 @@ -210,7 +210,22 @@ static void flask_evtchn_close_post(stru
  114.22  
  114.23  static int flask_evtchn_send(struct domain *d, struct evtchn *chn)
  114.24  {
  114.25 -    return domain_has_evtchn(d, chn, EVENT__SEND);
  114.26 +    int rc;
  114.27 +
  114.28 +    switch ( chn->state )
  114.29 +    {
  114.30 +    case ECS_INTERDOMAIN:
  114.31 +        rc = domain_has_evtchn(d, chn, EVENT__SEND);
  114.32 +    break;
  114.33 +    case ECS_IPI:
  114.34 +    case ECS_UNBOUND:
  114.35 +        rc = 0;
  114.36 +    break;
  114.37 +    default:
  114.38 +        rc = -EPERM;
  114.39 +    }
  114.40 +
  114.41 +    return rc;
  114.42  }
  114.43  
  114.44  static int flask_evtchn_status(struct domain *d, struct evtchn *chn)
  114.45 @@ -340,7 +355,7 @@ static int get_mfn_sid(unsigned long mfn
  114.46      if ( mfn_valid(mfn) )
  114.47      {
  114.48          /*mfn is valid if this is a page that Xen is tracking!*/
  114.49 -        page = mfn_to_page(mfn);        
  114.50 +        page = mfn_to_page(mfn);
  114.51          rc = get_page_sid(page, sid);
  114.52      }
  114.53      else
  114.54 @@ -390,23 +405,6 @@ static int flask_memory_pin_page(struct 
  114.55      return avc_has_perm(dsec->sid, sid, SECCLASS_MMU, MMU__PINPAGE, NULL);
  114.56  }
  114.57  
  114.58 -/* Used to defer flushing of memory structures. */
  114.59 -struct percpu_mm_info {
  114.60 -#define DOP_FLUSH_TLB      (1<<0) /* Flush the local TLB.                    */
  114.61 -#define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */
  114.62 -#define DOP_RELOAD_LDT     (1<<2) /* Reload the LDT shadow mapping.          */
  114.63 -    unsigned int   deferred_ops;
  114.64 -    /* If non-NULL, specifies a foreign subject domain for some operations. */
  114.65 -    struct domain *foreign;
  114.66 -};
  114.67 -static DEFINE_PER_CPU(struct percpu_mm_info, percpu_mm_info);
  114.68 -
  114.69 -/*
  114.70 - * Returns the current foreign domain; defaults to the currently-executing
  114.71 - * domain if a foreign override hasn't been specified.
  114.72 - */
  114.73 -#define FOREIGNDOM (this_cpu(percpu_mm_info).foreign ?: current->domain)
  114.74 -
  114.75  static int flask_console_io(struct domain *d, int cmd)
  114.76  {
  114.77      u32 perm;
  114.78 @@ -506,22 +504,22 @@ static int flask_domain_create(struct do
  114.79  
  114.80      dsec1 = current->domain->ssid;
  114.81  
  114.82 -    if ( dsec1->create_sid == SECSID_NULL )
  114.83 -        dsec1->create_sid = ssidref;
  114.84 +    if ( dsec1->create_sid == SECSID_NULL ) 
  114.85 +		dsec1->create_sid = ssidref;
  114.86  
  114.87 -    rc = avc_has_perm(dsec1->sid, dsec1->create_sid, SECCLASS_DOMAIN, 
  114.88 -                                                        DOMAIN__CREATE, NULL);
  114.89 -    if ( rc )
  114.90 +	rc = avc_has_perm(dsec1->sid, dsec1->create_sid, SECCLASS_DOMAIN, 
  114.91 +          		                                          DOMAIN__CREATE, NULL);
  114.92 +	if ( rc )
  114.93      {
  114.94 -        dsec1->create_sid = SECSID_NULL;
  114.95 -        return rc;
  114.96 +	    dsec1->create_sid = SECSID_NULL;
  114.97 +		return rc;
  114.98      }
  114.99  
 114.100      dsec2 = d->ssid;
 114.101      dsec2->sid = dsec1->create_sid;
 114.102  
 114.103 -    dsec1->create_sid = SECSID_NULL;
 114.104 -    dsec2->create_sid = SECSID_NULL;
 114.105 +	dsec1->create_sid = SECSID_NULL;
 114.106 +	dsec2->create_sid = SECSID_NULL;
 114.107  
 114.108      return rc;
 114.109  }
 114.110 @@ -592,6 +590,11 @@ static int flask_domain_settime(struct d
 114.111      return domain_has_perm(current->domain, d, SECCLASS_DOMAIN, DOMAIN__SETTIME);
 114.112  }
 114.113  
 114.114 +static int flask_set_target(struct domain *d, struct domain *e)
 114.115 +{
 114.116 +    return domain_has_perm(d, e, SECCLASS_DOMAIN, DOMAIN__SET_TARGET);
 114.117 +}
 114.118 +
 114.119  static int flask_tbufcontrol(void)
 114.120  {
 114.121      return domain_has_xen(current->domain, SECCLASS_XEN);
 114.122 @@ -630,6 +633,21 @@ static int flask_setdebugging(struct dom
 114.123                                                          DOMAIN__SETDEBUGGING);
 114.124  }
 114.125  
 114.126 +static int flask_debug_keys(void)
 114.127 +{
 114.128 +    return domain_has_xen(current->domain, XEN__DEBUG);
 114.129 +}
 114.130 +
 114.131 +static int flask_getcpuinfo(void)
 114.132 +{
 114.133 +    return domain_has_xen(current->domain, XEN__GETCPUINFO);
 114.134 +}
 114.135 +
 114.136 +static int flask_availheap(void)
 114.137 +{
 114.138 +    return domain_has_xen(current->domain, XEN__HEAP);
 114.139 +}
 114.140 +
 114.141  static inline u32 resource_to_perm(uint8_t access)
 114.142  {
 114.143      if ( access )
 114.144 @@ -638,7 +656,7 @@ static inline u32 resource_to_perm(uint8
 114.145          return RESOURCE__REMOVE;
 114.146  }
 114.147  
 114.148 -static int flask_irq_permission(struct domain *d, uint8_t pirq, uint8_t access)
 114.149 +static int irq_has_perm(struct domain *d, uint8_t pirq, uint8_t access)
 114.150  {
 114.151      u32 perm;
 114.152      u32 rsid;
 114.153 @@ -665,16 +683,17 @@ static int flask_irq_permission(struct d
 114.154          return rc;
 114.155  
 114.156      rc = avc_has_perm(ssec->sid, rsid, SECCLASS_RESOURCE, perm, NULL);
 114.157 -
 114.158      if ( rc )
 114.159          return rc;
 114.160  
 114.161 -    return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, 
 114.162 +    if ( access )
 114.163 +        return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, 
 114.164                                                          RESOURCE__USE, NULL);
 114.165 +    else
 114.166 +        return rc;
 114.167  }
 114.168  
 114.169 -static int flask_iomem_permission(struct domain *d, unsigned long mfn, 
 114.170 -                                                                uint8_t access)
 114.171 +static int iomem_has_perm(struct domain *d, unsigned long mfn, uint8_t access)
 114.172  {
 114.173      u32 perm;
 114.174      u32 rsid;
 114.175 @@ -684,7 +703,6 @@ static int flask_iomem_permission(struct
 114.176  
 114.177      rc = domain_has_perm(current->domain, d, SECCLASS_RESOURCE,
 114.178                                                      resource_to_perm(access));
 114.179 -
 114.180      if ( rc )
 114.181          return rc;
 114.182  
 114.183 @@ -743,8 +761,7 @@ static int flask_shadow_control(struct d
 114.184      return domain_has_perm(current->domain, d, SECCLASS_SHADOW, perm);
 114.185  }
 114.186  
 114.187 -static int flask_ioport_permission(struct domain *d, uint32_t ioport, 
 114.188 -                                                                uint8_t access)
 114.189 +static int ioport_has_perm(struct domain *d, uint32_t ioport, uint8_t access)
 114.190  {
 114.191      u32 perm;
 114.192      u32 rsid;
 114.193 @@ -774,8 +791,11 @@ static int flask_ioport_permission(struc
 114.194      if ( rc )
 114.195          return rc;
 114.196  
 114.197 -    return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, 
 114.198 +    if ( access )
 114.199 +        return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, 
 114.200                                                          RESOURCE__USE, NULL);    
 114.201 +    else
 114.202 +        return rc;
 114.203  }
 114.204  
 114.205  static int flask_getpageframeinfo(struct page_info *page)
 114.206 @@ -953,6 +973,26 @@ static int flask_platform_quirk(uint32_t
 114.207                                                              XEN__QUIRK, NULL);
 114.208  }
 114.209  
 114.210 +static int flask_firmware_info(void)
 114.211 +{
 114.212 +    return domain_has_xen(current->domain, XEN__FIRMWARE);
 114.213 +}
 114.214 +
 114.215 +static int flask_acpi_sleep(void)
 114.216 +{
 114.217 +    return domain_has_xen(current->domain, XEN__SLEEP);
 114.218 +}
 114.219 +
 114.220 +static int flask_change_freq(void)
 114.221 +{
 114.222 +    return domain_has_xen(current->domain, XEN__FREQUENCY);
 114.223 +}
 114.224 +
 114.225 +static int flask_getidletime(void)
 114.226 +{
 114.227 +    return domain_has_xen(current->domain, XEN__GETIDLE);
 114.228 +}
 114.229 +
 114.230  static int flask_machine_memory_map(void)
 114.231  {
 114.232      struct domain_security_struct *dsec;
 114.233 @@ -967,7 +1007,8 @@ static int flask_domain_memory_map(struc
 114.234      return domain_has_perm(current->domain, d, SECCLASS_MMU, MMU__MEMORYMAP);
 114.235  }
 114.236  
 114.237 -static int flask_mmu_normal_update(struct domain *d, intpte_t fpte)
 114.238 +static int flask_mmu_normal_update(struct domain *d, struct domain *f, 
 114.239 +                                                                intpte_t fpte)
 114.240  {
 114.241      int rc = 0;
 114.242      u32 map_perms = MMU__MAP_READ;
 114.243 @@ -980,7 +1021,7 @@ static int flask_mmu_normal_update(struc
 114.244      if ( l1e_get_flags(l1e_from_intpte(fpte)) & _PAGE_RW )
 114.245          map_perms |= MMU__MAP_WRITE;
 114.246  
 114.247 -    fmfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(l1e_from_intpte(fpte)));
 114.248 +    fmfn = gmfn_to_mfn(f, l1e_get_pfn(l1e_from_intpte(fpte)));
 114.249  
 114.250      rc = get_mfn_sid(fmfn, &fsid);
 114.251      if ( rc )
 114.252 @@ -1003,7 +1044,8 @@ static int flask_mmu_machphys_update(str
 114.253      return avc_has_perm(dsec->sid, psid, SECCLASS_MMU, MMU__UPDATEMP, NULL);
 114.254  }
 114.255  
 114.256 -static int flask_update_va_mapping(struct domain *d, l1_pgentry_t pte)
 114.257 +static int flask_update_va_mapping(struct domain *d, struct domain *f, 
 114.258 +                                                            l1_pgentry_t pte)
 114.259  {
 114.260      int rc = 0;
 114.261      u32 psid;
 114.262 @@ -1013,7 +1055,7 @@ static int flask_update_va_mapping(struc
 114.263  
 114.264      dsec = d->ssid;
 114.265  
 114.266 -    mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(pte));        
 114.267 +    mfn = gmfn_to_mfn(f, l1e_get_pfn(pte));        
 114.268      rc = get_mfn_sid(mfn, &psid);
 114.269      if ( rc )
 114.270          return rc;
 114.271 @@ -1033,8 +1075,163 @@ static int flask_remove_from_physmap(str
 114.272  {
 114.273      return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__PHYSMAP);
 114.274  }
 114.275 +
 114.276 +static int flask_sendtrigger(struct domain *d)
 114.277 +{
 114.278 +    return domain_has_perm(current->domain, d, SECCLASS_DOMAIN, DOMAIN__TRIGGER);
 114.279 +}
 114.280 +
 114.281 +static int flask_test_assign_device(uint32_t machine_bdf)
 114.282 +{
 114.283 +    u32 rsid;
 114.284 +    int rc = -EPERM;
 114.285 +    struct domain_security_struct *ssec = current->domain->ssid;
 114.286 +
 114.287 +    rc = security_device_sid(machine_bdf, &rsid);
 114.288 +    if ( rc )
 114.289 +        return rc;
 114.290 +
 114.291 +    return rc = avc_has_perm(ssec->sid, rsid, SECCLASS_RESOURCE, RESOURCE__STAT_DEVICE, NULL);
 114.292 +}
 114.293 +
 114.294 +static int flask_assign_device(struct domain *d, uint32_t machine_bdf)
 114.295 +{
 114.296 +    u32 rsid;
 114.297 +    int rc = -EPERM;
 114.298 +    struct domain_security_struct *ssec, *tsec;
 114.299 +
 114.300 +    rc = domain_has_perm(current->domain, d, SECCLASS_RESOURCE, RESOURCE__ADD);
 114.301 +    if ( rc )
 114.302 +        return rc;
 114.303 +
 114.304 +    rc = security_device_sid(machine_bdf, &rsid);
 114.305 +    if ( rc )
 114.306 +        return rc;
 114.307 +
 114.308 +    ssec = current->domain->ssid;
 114.309 +    rc = avc_has_perm(ssec->sid, rsid, SECCLASS_RESOURCE, RESOURCE__ADD_DEVICE, NULL);
 114.310 +    if ( rc )
 114.311 +        return rc;
 114.312 +
 114.313 +    tsec = d->ssid;
 114.314 +    return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, RESOURCE__USE, NULL);
 114.315 +}
 114.316 +
 114.317 +static int flask_deassign_device(struct domain *d, uint32_t machine_bdf)
 114.318 +{
 114.319 +    u32 rsid;
 114.320 +    int rc = -EPERM;
 114.321 +    struct domain_security_struct *ssec = current->domain->ssid;
 114.322 +
 114.323 +    rc = domain_has_perm(current->domain, d, SECCLASS_RESOURCE, RESOURCE__REMOVE);
 114.324 +    if ( rc )
 114.325 +        return rc;
 114.326 +
 114.327 +    rc = security_device_sid(machine_bdf, &rsid);
 114.328 +    if ( rc )
 114.329 +        return rc;
 114.330 +
 114.331 +    return rc = avc_has_perm(ssec->sid, rsid, SECCLASS_RESOURCE, RESOURCE__REMOVE_DEVICE, NULL);
 114.332 +}
 114.333 +
 114.334 +static int flask_bind_pt_irq (struct domain *d, struct xen_domctl_bind_pt_irq *bind)
 114.335 +{
 114.336 +    u32 rsid;
 114.337 +    int rc = -EPERM;
 114.338 +    struct domain_security_struct *ssec, *tsec;
 114.339 +
 114.340 +    rc = domain_has_perm(current->domain, d, SECCLASS_RESOURCE, RESOURCE__ADD);
 114.341 +    if ( rc )
 114.342 +        return rc;
 114.343 +
 114.344 +    rc = security_pirq_sid(bind->machine_irq, &rsid);
 114.345 +    if ( rc )
 114.346 +        return rc;
 114.347 +
 114.348 +    ssec = current->domain->ssid;
 114.349 +    rc = avc_has_perm(ssec->sid, rsid, SECCLASS_HVM, HVM__BIND_IRQ, NULL);
 114.350 +    if ( rc )
 114.351 +        return rc;
 114.352 +
 114.353 +    tsec = d->ssid;
 114.354 +    return avc_has_perm(tsec->sid, rsid, SECCLASS_RESOURCE, RESOURCE__USE, NULL);
 114.355 +}
 114.356 +
 114.357 +static int flask_pin_mem_cacheattr (struct domain *d)
 114.358 +{
 114.359 +    return domain_has_perm(current->domain, d, SECCLASS_HVM, HVM__CACHEATTR);
 114.360 +}
 114.361 +
 114.362 +static int flask_ext_vcpucontext (struct domain *d, uint32_t cmd)
 114.363 +{
 114.364 +    u32 perm;
 114.365 +
 114.366 +    switch ( cmd )
 114.367 +    {
 114.368 +        case XEN_DOMCTL_set_ext_vcpucontext:
 114.369 +            perm = DOMAIN__SETEXTVCPUCONTEXT;
 114.370 +        break;
 114.371 +        case XEN_DOMCTL_get_ext_vcpucontext:
 114.372 +            perm = DOMAIN__GETEXTVCPUCONTEXT;
 114.373 +        break;
 114.374 +        default:
 114.375 +            return -EPERM;
 114.376 +    }
 114.377 +
 114.378 +    return domain_has_perm(current->domain, d, SECCLASS_DOMAIN, perm);
 114.379 +}
 114.380  #endif
 114.381  
 114.382 +static int io_has_perm(struct domain *d, char *name, unsigned long s, 
 114.383 +                                                    unsigned long e, u32 access)
 114.384 +{
 114.385 +    int rc = -EPERM;
 114.386 +
 114.387 +    if ( strcmp(name, "I/O Memory") == 0 )
 114.388 +    {
 114.389 +        rc = iomem_has_perm(d, s, access);
 114.390 +        if ( rc )
 114.391 +            return rc;
 114.392 +
 114.393 +        if ( s != e )
 114.394 +            rc = iomem_has_perm(d, s, access);
 114.395 +    }
 114.396 +    else if ( strcmp(name, "Interrupts") == 0 )
 114.397 +    {
 114.398 +        rc = irq_has_perm(d, s, access);
 114.399 +        if ( rc )
 114.400 +            return rc;
 114.401 +
 114.402 +        if ( s != e )
 114.403 +            rc = irq_has_perm(d, e, access);
 114.404 +    }
 114.405 +#ifdef CONFIG_X86
 114.406 +    else if ( strcmp(name, "I/O Ports") == 0 )
 114.407 +    {
 114.408 +        rc = ioport_has_perm(d, s, access);
 114.409 +        if ( rc )
 114.410 +            return rc;
 114.411 +
 114.412 +        if ( s != e )
 114.413 +            rc = ioport_has_perm(d, e, access);
 114.414 +    }
 114.415 +#endif
 114.416 +
 114.417 +    return rc;    
 114.418 +}
 114.419 +
 114.420 +static int flask_add_range(struct domain *d, char *name, unsigned long s,
 114.421 +                                                                    unsigned long e)
 114.422 +{
 114.423 +    return io_has_perm(d, name, s, e, 1);
 114.424 +}
 114.425 +
 114.426 +static int flask_remove_range(struct domain *d, char *name, unsigned long s,
 114.427 +                                                                    unsigned long e)
 114.428 +{
 114.429 +    return io_has_perm(d, name, s, e, 0);
 114.430 +}
 114.431 +
 114.432  long do_flask_op(XEN_GUEST_HANDLE(xsm_op_t) u_flask_op);
 114.433  
 114.434  static struct xsm_operations flask_ops = {
 114.435 @@ -1052,15 +1249,17 @@ static struct xsm_operations flask_ops =
 114.436      .getvcpucontext = flask_getvcpucontext,
 114.437      .getvcpuinfo = flask_getvcpuinfo,
 114.438      .domain_settime = flask_domain_settime,
 114.439 +    .set_target = flask_set_target,
 114.440      .tbufcontrol = flask_tbufcontrol,
 114.441      .readconsole = flask_readconsole,
 114.442      .sched_id = flask_sched_id,
 114.443      .setdomainmaxmem = flask_setdomainmaxmem,
 114.444      .setdomainhandle = flask_setdomainhandle,
 114.445      .setdebugging = flask_setdebugging,
 114.446 -    .irq_permission = flask_irq_permission,
 114.447 -    .iomem_permission = flask_iomem_permission,
 114.448      .perfcontrol = flask_perfcontrol,
 114.449 +    .debug_keys = flask_debug_keys,
 114.450 +    .getcpuinfo = flask_getcpuinfo,
 114.451 +    .availheap = flask_availheap,
 114.452  
 114.453      .evtchn_unbound = flask_evtchn_unbound,
 114.454      .evtchn_interdomain = flask_evtchn_interdomain,
 114.455 @@ -1093,11 +1292,13 @@ static struct xsm_operations flask_ops =
 114.456      .kexec = flask_kexec,
 114.457      .schedop_shutdown = flask_schedop_shutdown,
 114.458  
 114.459 +    .add_range = flask_add_range,
 114.460 +    .remove_range = flask_remove_range,
 114.461 +
 114.462      .__do_xsm_op = do_flask_op,
 114.463  
 114.464  #ifdef CONFIG_X86
 114.465      .shadow_control = flask_shadow_control,
 114.466 -    .ioport_permission = flask_ioport_permission,
 114.467      .getpageframeinfo = flask_getpageframeinfo,
 114.468      .getmemlist = flask_getmemlist,
 114.469      .hypercall_init = flask_hypercall_init,
 114.470 @@ -1114,6 +1315,10 @@ static struct xsm_operations flask_ops =
 114.471      .microcode = flask_microcode,
 114.472      .physinfo = flask_physinfo,
 114.473      .platform_quirk = flask_platform_quirk,
 114.474 +    .firmware_info = flask_firmware_info,
 114.475 +    .acpi_sleep = flask_acpi_sleep,
 114.476 +    .change_freq = flask_change_freq,
 114.477 +    .getidletime = flask_getidletime,
 114.478      .machine_memory_map = flask_machine_memory_map,
 114.479      .domain_memory_map = flask_domain_memory_map,
 114.480      .mmu_normal_update = flask_mmu_normal_update,
 114.481 @@ -1121,6 +1326,13 @@ static struct xsm_operations flask_ops =
 114.482      .update_va_mapping = flask_update_va_mapping,
 114.483      .add_to_physmap = flask_add_to_physmap,
 114.484      .remove_from_physmap = flask_remove_from_physmap,
 114.485 +    .sendtrigger = flask_sendtrigger,
 114.486 +    .test_assign_device = flask_test_assign_device,
 114.487 +    .assign_device = flask_assign_device,
 114.488 +    .deassign_device = flask_deassign_device,
 114.489 +    .bind_pt_irq = flask_bind_pt_irq,
 114.490 +    .pin_mem_cacheattr = flask_pin_mem_cacheattr,
 114.491 +    .ext_vcpucontext = flask_ext_vcpucontext,
 114.492  #endif
 114.493  };
 114.494  
   115.1 --- a/xen/xsm/flask/include/av_perm_to_string.h	Fri Sep 12 14:32:45 2008 +0900
   115.2 +++ b/xen/xsm/flask/include/av_perm_to_string.h	Fri Sep 12 14:47:40 2008 +0900
   115.3 @@ -17,11 +17,19 @@
   115.4     S_(SECCLASS_XEN, XEN__PRIVPROFILE, "privprofile")
   115.5     S_(SECCLASS_XEN, XEN__NONPRIVPROFILE, "nonprivprofile")
   115.6     S_(SECCLASS_XEN, XEN__KEXEC, "kexec")
   115.7 +   S_(SECCLASS_XEN, XEN__FIRMWARE, "firmware")
   115.8 +   S_(SECCLASS_XEN, XEN__SLEEP, "sleep")
   115.9 +   S_(SECCLASS_XEN, XEN__FREQUENCY, "frequency")
  115.10 +   S_(SECCLASS_XEN, XEN__GETIDLE, "getidle")
  115.11 +   S_(SECCLASS_XEN, XEN__DEBUG, "debug")
  115.12 +   S_(SECCLASS_XEN, XEN__GETCPUINFO, "getcpuinfo")
  115.13 +   S_(SECCLASS_XEN, XEN__HEAP, "heap")
  115.14     S_(SECCLASS_DOMAIN, DOMAIN__SETVCPUCONTEXT, "setvcpucontext")
  115.15     S_(SECCLASS_DOMAIN, DOMAIN__PAUSE, "pause")
  115.16     S_(SECCLASS_DOMAIN, DOMAIN__UNPAUSE, "unpause")
  115.17     S_(SECCLASS_DOMAIN, DOMAIN__RESUME, "resume")
  115.18     S_(SECCLASS_DOMAIN, DOMAIN__CREATE, "create")
  115.19 +   S_(SECCLASS_DOMAIN, DOMAIN__TRANSITION, "transition")
  115.20     S_(SECCLASS_DOMAIN, DOMAIN__MAX_VCPUS, "max_vcpus")
  115.21     S_(SECCLASS_DOMAIN, DOMAIN__DESTROY, "destroy")
  115.22     S_(SECCLASS_DOMAIN, DOMAIN__SETVCPUAFFINITY, "setvcpuaffinity")
  115.23 @@ -34,11 +42,14 @@
  115.24     S_(SECCLASS_DOMAIN, DOMAIN__SETDOMAINHANDLE, "setdomainhandle")
  115.25     S_(SECCLASS_DOMAIN, DOMAIN__SETDEBUGGING, "setdebugging")
  115.26     S_(SECCLASS_DOMAIN, DOMAIN__HYPERCALL, "hypercall")
  115.27 -   S_(SECCLASS_DOMAIN, DOMAIN__TRANSITION, "transition")
  115.28     S_(SECCLASS_DOMAIN, DOMAIN__SETTIME, "settime")
  115.29 +   S_(SECCLASS_DOMAIN, DOMAIN__SET_TARGET, "set_target")
  115.30     S_(SECCLASS_DOMAIN, DOMAIN__SHUTDOWN, "shutdown")
  115.31     S_(SECCLASS_DOMAIN, DOMAIN__SETADDRSIZE, "setaddrsize")
  115.32     S_(SECCLASS_DOMAIN, DOMAIN__GETADDRSIZE, "getaddrsize")
  115.33 +   S_(SECCLASS_DOMAIN, DOMAIN__TRIGGER, "trigger")
  115.34 +   S_(SECCLASS_DOMAIN, DOMAIN__GETEXTVCPUCONTEXT, "getextvcpucontext")
  115.35 +   S_(SECCLASS_DOMAIN, DOMAIN__SETEXTVCPUCONTEXT, "setextvcpucontext")
  115.36     S_(SECCLASS_HVM, HVM__SETHVMC, "sethvmc")
  115.37     S_(SECCLASS_HVM, HVM__GETHVMC, "gethvmc")
  115.38     S_(SECCLASS_HVM, HVM__SETPARAM, "setparam")
  115.39 @@ -46,14 +57,13 @@
  115.40     S_(SECCLASS_HVM, HVM__PCILEVEL, "pcilevel")
  115.41     S_(SECCLASS_HVM, HVM__IRQLEVEL, "irqlevel")
  115.42     S_(SECCLASS_HVM, HVM__PCIROUTE, "pciroute")
  115.43 +   S_(SECCLASS_HVM, HVM__BIND_IRQ, "bind_irq")
  115.44 +   S_(SECCLASS_HVM, HVM__CACHEATTR, "cacheattr")
  115.45     S_(SECCLASS_EVENT, EVENT__BIND, "bind")
  115.46 -   S_(SECCLASS_EVENT, EVENT__CLOSE, "close")
  115.47     S_(SECCLASS_EVENT, EVENT__SEND, "send")
  115.48     S_(SECCLASS_EVENT, EVENT__STATUS, "status")
  115.49 -   S_(SECCLASS_EVENT, EVENT__UNMASK, "unmask")
  115.50     S_(SECCLASS_EVENT, EVENT__NOTIFY, "notify")
  115.51     S_(SECCLASS_EVENT, EVENT__CREATE, "create")
  115.52 -   S_(SECCLASS_EVENT, EVENT__ALLOC, "alloc")
  115.53     S_(SECCLASS_EVENT, EVENT__VECTOR, "vector")
  115.54     S_(SECCLASS_EVENT, EVENT__RESET, "reset")
  115.55     S_(SECCLASS_GRANT, GRANT__MAP_READ, "map_read")
  115.56 @@ -87,6 +97,9 @@
  115.57     S_(SECCLASS_RESOURCE, RESOURCE__REMOVE_IOPORT, "remove_ioport")
  115.58     S_(SECCLASS_RESOURCE, RESOURCE__ADD_IOMEM, "add_iomem")
  115.59     S_(SECCLASS_RESOURCE, RESOURCE__REMOVE_IOMEM, "remove_iomem")
  115.60 +   S_(SECCLASS_RESOURCE, RESOURCE__STAT_DEVICE, "stat_device")
  115.61 +   S_(SECCLASS_RESOURCE, RESOURCE__ADD_DEVICE, "add_device")
  115.62 +   S_(SECCLASS_RESOURCE, RESOURCE__REMOVE_DEVICE, "remove_device")
  115.63     S_(SECCLASS_SECURITY, SECURITY__COMPUTE_AV, "compute_av")
  115.64     S_(SECCLASS_SECURITY, SECURITY__COMPUTE_CREATE, "compute_create")
  115.65     S_(SECCLASS_SECURITY, SECURITY__COMPUTE_MEMBER, "compute_member")
   116.1 --- a/xen/xsm/flask/include/av_permissions.h	Fri Sep 12 14:32:45 2008 +0900
   116.2 +++ b/xen/xsm/flask/include/av_permissions.h	Fri Sep 12 14:47:40 2008 +0900
   116.3 @@ -17,29 +17,40 @@
   116.4  #define XEN__PRIVPROFILE                          0x00008000UL
   116.5  #define XEN__NONPRIVPROFILE                       0x00010000UL
   116.6  #define XEN__KEXEC                                0x00020000UL
   116.7 +#define XEN__FIRMWARE                             0x00040000UL
   116.8 +#define XEN__SLEEP                                0x00080000UL
   116.9 +#define XEN__FREQUENCY                            0x00100000UL
  116.10 +#define XEN__GETIDLE                              0x00200000UL
  116.11 +#define XEN__DEBUG                                0x00400000UL
  116.12 +#define XEN__GETCPUINFO                           0x00800000UL
  116.13 +#define XEN__HEAP                                 0x01000000UL
  116.14  
  116.15  #define DOMAIN__SETVCPUCONTEXT                    0x00000001UL
  116.16  #define DOMAIN__PAUSE                             0x00000002UL
  116.17  #define DOMAIN__UNPAUSE                           0x00000004UL
  116.18  #define DOMAIN__RESUME                            0x00000008UL
  116.19  #define DOMAIN__CREATE                            0x00000010UL
  116.20 -#define DOMAIN__MAX_VCPUS                         0x00000020UL
  116.21 -#define DOMAIN__DESTROY                           0x00000040UL
  116.22 -#define DOMAIN__SETVCPUAFFINITY                   0x00000080UL
  116.23 -#define DOMAIN__GETVCPUAFFINITY                   0x00000100UL
  116.24 -#define DOMAIN__SCHEDULER                         0x00000200UL
  116.25 -#define DOMAIN__GETDOMAININFO                     0x00000400UL
  116.26 -#define DOMAIN__GETVCPUINFO                       0x00000800UL
  116.27 -#define DOMAIN__GETVCPUCONTEXT                    0x00001000UL
  116.28 -#define DOMAIN__SETDOMAINMAXMEM                   0x00002000UL
  116.29 -#define DOMAIN__SETDOMAINHANDLE                   0x00004000UL
  116.30 -#define DOMAIN__SETDEBUGGING                      0x00008000UL
  116.31 -#define DOMAIN__HYPERCALL                         0x00010000UL
  116.32 -#define DOMAIN__TRANSITION                        0x00020000UL
  116.33 +#define DOMAIN__TRANSITION                        0x00000020UL
  116.34 +#define DOMAIN__MAX_VCPUS                         0x00000040UL
  116.35 +#define DOMAIN__DESTROY                           0x00000080UL
  116.36 +#define DOMAIN__SETVCPUAFFINITY                   0x00000100UL
  116.37 +#define DOMAIN__GETVCPUAFFINITY                   0x00000200UL
  116.38 +#define DOMAIN__SCHEDULER                         0x00000400UL
  116.39 +#define DOMAIN__GETDOMAININFO                     0x00000800UL
  116.40 +#define DOMAIN__GETVCPUINFO                       0x00001000UL
  116.41 +#define DOMAIN__GETVCPUCONTEXT                    0x00002000UL
  116.42 +#define DOMAIN__SETDOMAINMAXMEM                   0x00004000UL
  116.43 +#define DOMAIN__SETDOMAINHANDLE                   0x00008000UL
  116.44 +#define DOMAIN__SETDEBUGGING                      0x00010000UL
  116.45 +#define DOMAIN__HYPERCALL                         0x00020000UL
  116.46  #define DOMAIN__SETTIME                           0x00040000UL
  116.47 -#define DOMAIN__SHUTDOWN                          0x00080000UL
  116.48 -#define DOMAIN__SETADDRSIZE                       0x00100000UL
  116.49 -#define DOMAIN__GETADDRSIZE                       0x00200000UL
  116.50 +#define DOMAIN__SET_TARGET                        0x00080000UL
  116.51 +#define DOMAIN__SHUTDOWN                          0x00100000UL
  116.52 +#define DOMAIN__SETADDRSIZE                       0x00200000UL
  116.53 +#define DOMAIN__GETADDRSIZE                       0x00400000UL
  116.54 +#define DOMAIN__TRIGGER                           0x00800000UL
  116.55 +#define DOMAIN__GETEXTVCPUCONTEXT                 0x01000000UL
  116.56 +#define DOMAIN__SETEXTVCPUCONTEXT                 0x02000000UL
  116.57  
  116.58  #define HVM__SETHVMC                              0x00000001UL
  116.59  #define HVM__GETHVMC                              0x00000002UL
  116.60 @@ -48,17 +59,16 @@
  116.61  #define HVM__PCILEVEL                             0x00000010UL
  116.62  #define HVM__IRQLEVEL                             0x00000020UL
  116.63  #define HVM__PCIROUTE                             0x00000040UL
  116.64 +#define HVM__BIND_IRQ                             0x00000080UL
  116.65 +#define HVM__CACHEATTR                            0x00000100UL
  116.66  
  116.67  #define EVENT__BIND                               0x00000001UL
  116.68 -#define EVENT__CLOSE                              0x00000002UL
  116.69 -#define EVENT__SEND                               0x00000004UL
  116.70 -#define EVENT__STATUS                             0x00000008UL
  116.71 -#define EVENT__UNMASK                             0x00000010UL
  116.72 -#define EVENT__NOTIFY                             0x00000020UL
  116.73 -#define EVENT__CREATE                             0x00000040UL
  116.74 -#define EVENT__ALLOC                              0x00000080UL
  116.75 -#define EVENT__VECTOR                             0x00000100UL
  116.76 -#define EVENT__RESET                              0x00000200UL
  116.77 +#define EVENT__SEND                               0x00000002UL
  116.78 +#define EVENT__STATUS                             0x00000004UL
  116.79 +#define EVENT__NOTIFY                             0x00000008UL
  116.80 +#define EVENT__CREATE                             0x00000010UL
  116.81 +#define EVENT__VECTOR                             0x00000020UL
  116.82 +#define EVENT__RESET                              0x00000040UL
  116.83  
  116.84  #define GRANT__MAP_READ                           0x00000001UL
  116.85  #define GRANT__MAP_WRITE                          0x00000002UL
  116.86 @@ -94,6 +104,9 @@
  116.87  #define RESOURCE__REMOVE_IOPORT                   0x00000040UL
  116.88  #define RESOURCE__ADD_IOMEM                       0x00000080UL
  116.89  #define RESOURCE__REMOVE_IOMEM                    0x00000100UL
  116.90 +#define RESOURCE__STAT_DEVICE                     0x00000200UL
  116.91 +#define RESOURCE__ADD_DEVICE                      0x00000400UL
  116.92 +#define RESOURCE__REMOVE_DEVICE                   0x00000800UL
  116.93  
  116.94  #define SECURITY__COMPUTE_AV                      0x00000001UL
  116.95  #define SECURITY__COMPUTE_CREATE                  0x00000002UL
   117.1 --- a/xen/xsm/flask/include/flask.h	Fri Sep 12 14:32:45 2008 +0900
   117.2 +++ b/xen/xsm/flask/include/flask.h	Fri Sep 12 14:47:40 2008 +0900
   117.3 @@ -1,6 +1,6 @@
   117.4  /* This file is automatically generated.  Do not edit. */
   117.5 -#ifndef _FLASK_FLASK_H_
   117.6 -#define _FLASK_FLASK_H_
   117.7 +#ifndef _SELINUX_FLASK_H_
   117.8 +#define _SELINUX_FLASK_H_
   117.9  
  117.10  /*
  117.11   * Security object class definitions
  117.12 @@ -27,10 +27,9 @@
  117.13  #define SECINITSID_SECURITY                             7
  117.14  #define SECINITSID_IOPORT                               8
  117.15  #define SECINITSID_IOMEM                                9
  117.16 -#define SECINITSID_VCPU                                 10
  117.17 -#define SECINITSID_VIRQ                                 11
  117.18 -#define SECINITSID_PIRQ                                 12
  117.19 +#define SECINITSID_PIRQ                                 10
  117.20 +#define SECINITSID_DEVICE                               11
  117.21  
  117.22 -#define SECINITSID_NUM                                  12
  117.23 +#define SECINITSID_NUM                                  11
  117.24  
  117.25  #endif
   118.1 --- a/xen/xsm/flask/include/initial_sid_to_string.h	Fri Sep 12 14:32:45 2008 +0900
   118.2 +++ b/xen/xsm/flask/include/initial_sid_to_string.h	Fri Sep 12 14:47:40 2008 +0900
   118.3 @@ -11,8 +11,7 @@ static char *initial_sid_to_string[] =
   118.4      "security",
   118.5      "ioport",
   118.6      "iomem",
   118.7 -    "vcpu",
   118.8 -    "virq",
   118.9      "pirq",
  118.10 +    "device",
  118.11  };
  118.12  
   119.1 --- a/xen/xsm/flask/include/security.h	Fri Sep 12 14:32:45 2008 +0900
   119.2 +++ b/xen/xsm/flask/include/security.h	Fri Sep 12 14:47:40 2008 +0900
   119.3 @@ -69,14 +69,12 @@ int security_get_user_sids(u32 callsid, 
   119.4  
   119.5  int security_pirq_sid(int pirq, u32 *out_sid);
   119.6  
   119.7 -int security_virq_sid(int virq, u32 *out_sid);
   119.8 -
   119.9 -int security_vcpu_sid(int vcpu, u32 *out_sid);
  119.10 -
  119.11  int security_iomem_sid(unsigned long, u32 *out_sid);
  119.12  
  119.13  int security_ioport_sid(u32 ioport, u32 *out_sid);
  119.14  
  119.15 +int security_device_sid(u32 device, u32 *out_sid);
  119.16 +
  119.17  int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid,
  119.18                                                                      u16 tclass);
  119.19  
   120.1 --- a/xen/xsm/flask/ss/policydb.h	Fri Sep 12 14:32:45 2008 +0900
   120.2 +++ b/xen/xsm/flask/ss/policydb.h	Fri Sep 12 14:47:40 2008 +0900
   120.3 @@ -133,10 +133,9 @@ struct ocontext {
   120.4      union {
   120.5          char *name;    /* name of initial SID, fs, netif, fstype, path */
   120.6          int pirq;
   120.7 -        int virq;
   120.8 -        int vcpu;
   120.9          u32 ioport;
  120.10          unsigned long iomem;
  120.11 +        u32 device;
  120.12      } u;
  120.13      struct context context[2];    /* security context(s) */
  120.14      u32 sid[2];    /* SID(s) */
  120.15 @@ -157,11 +156,11 @@ struct ocontext {
  120.16  /* object context array indices */
  120.17  #define OCON_ISID    0    /* initial SIDs */
  120.18  #define OCON_PIRQ    1    /* physical irqs */
  120.19 -#define OCON_VIRQ    2    /* virtual irqs */
  120.20 -#define OCON_VCPU    3    /* virtual cpus */
  120.21 -#define OCON_IOPORT  4    /* io ports */
  120.22 -#define OCON_IOMEM   5    /* io memory */
  120.23 -#define OCON_DUMMY   6
  120.24 +#define OCON_IOPORT  2    /* io ports */
  120.25 +#define OCON_IOMEM   3    /* io memory */
  120.26 +#define OCON_DEVICE  4    /* pci devices */
  120.27 +#define OCON_DUMMY1  5    /* reserved */
  120.28 +#define OCON_DUMMY2  6    /* reserved */
  120.29  #define OCON_NUM     7
  120.30  
  120.31  /* The policy database */
   121.1 --- a/xen/xsm/flask/ss/services.c	Fri Sep 12 14:32:45 2008 +0900
   121.2 +++ b/xen/xsm/flask/ss/services.c	Fri Sep 12 14:47:40 2008 +0900
   121.3 @@ -1418,6 +1418,46 @@ out:
   121.4      return rc;
   121.5  }
   121.6  
   121.7 +/**
   121.8 + * security_ioport_sid - Obtain the SID for an ioport.
   121.9 + * @ioport: ioport
  121.10 + * @out_sid: security identifier
  121.11 + */
  121.12 +int security_device_sid(u32 device, u32 *out_sid)
  121.13 +{
  121.14 +    struct ocontext *c;
  121.15 +    int rc = 0;
  121.16 +
  121.17 +    POLICY_RDLOCK;
  121.18 +
  121.19 +    c = policydb.ocontexts[OCON_DEVICE];
  121.20 +    while ( c )
  121.21 +    {
  121.22 +        if ( c->u.device == device )