ia64/xen-unstable

changeset 6979:f8e7af29daa1

merge?
author cl349@firebug.cl.cam.ac.uk
date Tue Sep 20 09:43:46 2005 +0000 (2005-09-20)
parents a6b72464a042 750ad97f37b0
children 9776d03bf108 ee8226e15e9f
files Makefile docs/Makefile docs/src/interface.tex docs/src/interface/architecture.tex docs/src/interface/debugging.tex docs/src/interface/devices.tex docs/src/interface/further_info.tex docs/src/interface/hypercalls.tex docs/src/interface/memory.tex docs/src/interface/scheduling.tex docs/src/user.tex docs/src/user/build.tex docs/src/user/control_software.tex docs/src/user/debian.tex docs/src/user/domain_configuration.tex docs/src/user/domain_filesystem.tex docs/src/user/domain_mgmt.tex docs/src/user/glossary.tex docs/src/user/installation.tex docs/src/user/introduction.tex docs/src/user/redhat.tex docs/src/user/start_addl_dom.tex tools/firmware/acpi/acpi_madt.c tools/firmware/acpi/acpi_madt.h tools/firmware/vmxassist/Makefile tools/firmware/vmxassist/acpi_madt.c tools/firmware/vmxassist/vmxloader.c tools/libxc/xc_vmx_build.c tools/python/xen/xend/image.py tools/vtpm/Makefile tools/vtpm/README tools/vtpm/tpm_emulator.patch tools/vtpm/vtpm.patch tools/vtpm_manager/README tools/vtpm_manager/Rules.mk tools/vtpm_manager/crypto/Makefile tools/vtpm_manager/manager/Makefile tools/vtpm_manager/manager/dmictl.c tools/vtpm_manager/manager/securestorage.c tools/vtpm_manager/manager/vtpm_manager.c tools/vtpm_manager/manager/vtpmpriv.h tools/vtpm_manager/tcs/Makefile tools/vtpm_manager/tcs/contextmgr.c tools/vtpm_manager/tcs/contextmgr.h tools/vtpm_manager/tcs/tcs.c tools/vtpm_manager/tcs/tcs.h tools/vtpm_manager/tcs/transmit.c tools/vtpm_manager/util/Makefile tools/vtpm_manager/util/tcg.h xen/arch/x86/vmx_platform.c
line diff
     1.1 --- a/Makefile	Tue Sep 20 09:43:29 2005 +0000
     1.2 +++ b/Makefile	Tue Sep 20 09:43:46 2005 +0000
     1.3 @@ -166,27 +166,25 @@ uninstall: D=$(DESTDIR)
     1.4  uninstall:
     1.5  	[ -d $(D)/etc/xen ] && mv -f $(D)/etc/xen $(D)/etc/xen.old-`date +%s`
     1.6  	rm -rf $(D)/etc/init.d/xend*
     1.7 -	rm -rf $(D)/usr/$(LIBDIR)/libxc* $(D)/usr/$(LIBDIR)/libxutil*
     1.8 -	rm -rf $(D)/usr/$(LIBDIR)/python/xen $(D)/usr/include/xen
     1.9 -	rm -rf $(D)/usr/$(LIBDIR)/share/xen $(D)/usr/$(LIBDIR)/libxenstore*
    1.10 +	rm -rf $(D)/etc/hotplug/xen-backend.agent
    1.11  	rm -rf $(D)/var/run/xen* $(D)/var/lib/xen*
    1.12 -	rm -rf $(D)/usr/include/xcs_proto.h $(D)/usr/include/xc.h
    1.13 -	rm -rf $(D)/usr/include/xs_lib.h $(D)/usr/include/xs.h
    1.14 -	rm -rf $(D)/usr/sbin/xcs $(D)/usr/sbin/xcsdump $(D)/usr/sbin/xen*
    1.15 -	rm -rf $(D)/usr/sbin/netfix
    1.16 -	rm -rf $(D)/usr/sbin/xfrd $(D)/usr/sbin/xm
    1.17 -	rm -rf $(D)/usr/share/doc/xen  $(D)/usr/man/man*/xentrace*
    1.18 -	rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/miniterm
    1.19  	rm -rf $(D)/boot/*xen*
    1.20  	rm -rf $(D)/lib/modules/*xen*
    1.21 +	rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/lomount
    1.22  	rm -rf $(D)/usr/bin/cpuperf-perfcntr $(D)/usr/bin/cpuperf-xen
    1.23  	rm -rf $(D)/usr/bin/xc_shadow
    1.24 -	rm -rf $(D)/usr/share/xen $(D)/usr/libexec/xen
    1.25 +	rm -rf $(D)/usr/include/xenctrl.h
    1.26 +	rm -rf $(D)/usr/include/xs_lib.h $(D)/usr/include/xs.h
    1.27 +	rm -rf $(D)/usr/include/xen
    1.28 +	rm -rf $(D)/usr/$(LIBDIR)/libxenctrl* $(D)/usr/$(LIBDIR)/libxenguest*
    1.29 +	rm -rf $(D)/usr/$(LIBDIR)/libxenstore*
    1.30 +	rm -rf $(D)/usr/$(LIBDIR)/python/xen $(D)/usr/$(LIBDIR)/xen 
    1.31 +	rm -rf $(D)/usr/libexec/xen
    1.32 +	rm -rf $(D)/usr/sbin/xen* $(D)/usr/sbin/netfix $(D)/usr/sbin/xm
    1.33 +	rm -rf $(D)/usr/share/doc/xen
    1.34 +	rm -rf $(D)/usr/share/xen
    1.35  	rm -rf $(D)/usr/share/man/man1/xen*
    1.36  	rm -rf $(D)/usr/share/man/man8/xen*
    1.37 -	rm -rf $(D)/usr/lib/xen
    1.38 -	rm -rf $(D)/etc/hotplug.d/xen-backend
    1.39 -	rm -rf $(D)/etc/hotplug/xen-backend.agent
    1.40  
    1.41  # Legacy targets for compatibility
    1.42  linux24:
     2.1 --- a/docs/Makefile	Tue Sep 20 09:43:29 2005 +0000
     2.2 +++ b/docs/Makefile	Tue Sep 20 09:43:46 2005 +0000
     2.3 @@ -12,7 +12,7 @@ DOXYGEN		:= doxygen
     2.4  
     2.5  pkgdocdir	:= /usr/share/doc/xen
     2.6  
     2.7 -DOC_TEX		:= $(wildcard src/*.tex)
     2.8 +DOC_TEX		:= src/user.tex src/interface.tex
     2.9  DOC_PS		:= $(patsubst src/%.tex,ps/%.ps,$(DOC_TEX))
    2.10  DOC_PDF		:= $(patsubst src/%.tex,pdf/%.pdf,$(DOC_TEX))
    2.11  DOC_HTML	:= $(patsubst src/%.tex,html/%/index.html,$(DOC_TEX))
     3.1 --- a/docs/src/interface.tex	Tue Sep 20 09:43:29 2005 +0000
     3.2 +++ b/docs/src/interface.tex	Tue Sep 20 09:43:46 2005 +0000
     3.3 @@ -87,1084 +87,23 @@ itself, allows the Xen framework to sepa
     3.4  mechanism and policy within the system.
     3.5  
     3.6  
     3.7 -
     3.8 -\chapter{Virtual Architecture}
     3.9 -
    3.10 -On a Xen-based system, the hypervisor itself runs in {\it ring 0}.  It
    3.11 -has full access to the physical memory available in the system and is
    3.12 -responsible for allocating portions of it to the domains.  Guest
    3.13 -operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
    3.14 -they see fit. Segmentation is used to prevent the guest OS from
    3.15 -accessing the portion of the address space that is reserved for
    3.16 -Xen. We expect most guest operating systems will use ring 1 for their
    3.17 -own operation and place applications in ring 3.
    3.18 -
    3.19 -In this chapter we consider the basic virtual architecture provided 
    3.20 -by Xen: the basic CPU state, exception and interrupt handling, and
    3.21 -time. Other aspects such as memory and device access are discussed 
    3.22 -in later chapters. 
    3.23 -
    3.24 -\section{CPU state}
    3.25 -
    3.26 -All privileged state must be handled by Xen.  The guest OS has no
    3.27 -direct access to CR3 and is not permitted to update privileged bits in
    3.28 -EFLAGS. Guest OSes use \emph{hypercalls} to invoke operations in Xen; 
    3.29 -these are analogous to system calls but occur from ring 1 to ring 0. 
    3.30 -
    3.31 -A list of all hypercalls is given in Appendix~\ref{a:hypercalls}. 
    3.32 -
    3.33 -
    3.34 -
    3.35 -\section{Exceptions}
    3.36 -
    3.37 -A virtual IDT is provided --- a domain can submit a table of trap
    3.38 -handlers to Xen via the {\tt set\_trap\_table()} hypercall.  Most trap
    3.39 -handlers are identical to native x86 handlers, although the page-fault
    3.40 -handler is somewhat different.
    3.41 -
    3.42 -
    3.43 -\section{Interrupts and events}
    3.44 -
    3.45 -Interrupts are virtualized by mapping them to \emph{events}, which are
    3.46 -delivered asynchronously to the target domain using a callback
    3.47 -supplied via the {\tt set\_callbacks()} hypercall.  A guest OS can map
    3.48 -these events onto its standard interrupt dispatch mechanisms.  Xen is
    3.49 -responsible for determining the target domain that will handle each
    3.50 -physical interrupt source. For more details on the binding of event
    3.51 -sources to events, see Chapter~\ref{c:devices}. 
    3.52 -
    3.53 -
    3.54 -
    3.55 -\section{Time}
    3.56 -
    3.57 -Guest operating systems need to be aware of the passage of both real
    3.58 -(or wallclock) time and their own `virtual time' (the time for
    3.59 -which they have been executing). Furthermore, Xen has a notion of 
    3.60 -time which is used for scheduling. The following notions of 
    3.61 -time are provided: 
    3.62 -
    3.63 -\begin{description}
    3.64 -\item[Cycle counter time.]
    3.65 -
    3.66 -This provides a fine-grained time reference.  The cycle counter time is
    3.67 -used to accurately extrapolate the other time references.  On SMP machines
    3.68 -it is currently assumed that the cycle counter time is synchronized between
    3.69 -CPUs.  The current x86-based implementation achieves this within inter-CPU
    3.70 -communication latencies.
    3.71 -
    3.72 -\item[System time.]
    3.73 -
    3.74 -This is a 64-bit counter which holds the number of nanoseconds that
    3.75 -have elapsed since system boot.
    3.76 -
    3.77 -
    3.78 -\item[Wall clock time.]
    3.79 -
    3.80 -This is the time of day in a Unix-style {\tt struct timeval} (seconds
    3.81 -and microseconds since 1 January 1970, adjusted by leap seconds).  An
    3.82 -NTP client hosted by {\it domain 0} can keep this value accurate.  
    3.83 -
    3.84 -
    3.85 -\item[Domain virtual time.]
    3.86 -
    3.87 -This progresses at the same pace as system time, but only while a
    3.88 -domain is executing --- it stops while a domain is de-scheduled.
    3.89 -Therefore the share of the CPU that a domain receives is indicated by
    3.90 -the rate at which its virtual time increases.
    3.91 -
    3.92 -\end{description}
    3.93 -
    3.94 -
    3.95 -Xen exports timestamps for system time and wall-clock time to guest
    3.96 -operating systems through a shared page of memory.  Xen also provides
    3.97 -the cycle counter time at the instant the timestamps were calculated,
    3.98 -and the CPU frequency in Hertz.  This allows the guest to extrapolate
    3.99 -system and wall-clock times accurately based on the current cycle
   3.100 -counter time.
   3.101 -
   3.102 -Since all time stamps need to be updated and read \emph{atomically}
   3.103 -two version numbers are also stored in the shared info page. The 
   3.104 -first is incremented prior to an update, while the second is only
   3.105 -incremented afterwards. Thus a guest can be sure that it read a consistent 
   3.106 -state by checking the two version numbers are equal. 
   3.107 -
   3.108 -Xen includes a periodic ticker which sends a timer event to the
   3.109 -currently executing domain every 10ms.  The Xen scheduler also sends a
   3.110 -timer event whenever a domain is scheduled; this allows the guest OS
   3.111 -to adjust for the time that has passed while it has been inactive.  In
   3.112 -addition, Xen allows each domain to request that they receive a timer
   3.113 -event sent at a specified system time by using the {\tt
   3.114 -set\_timer\_op()} hypercall.  Guest OSes may use this timer to
   3.115 -implement timeout values when they block.
   3.116 -
   3.117 -
   3.118 -
   3.119 -%% % akw: demoting this to a section -- not sure if there is any point
   3.120 -%% % though, maybe just remove it.
   3.121 -
   3.122 -\section{Xen CPU Scheduling}
   3.123 -
   3.124 -Xen offers a uniform API for CPU schedulers.  It is possible to choose
   3.125 -from a number of schedulers at boot and it should be easy to add more.
   3.126 -The BVT, Atropos and Round Robin schedulers are part of the normal
   3.127 -Xen distribution.  BVT provides proportional fair shares of the CPU to
   3.128 -the running domains.  Atropos can be used to reserve absolute shares
   3.129 -of the CPU for each domain.  Round-robin is provided as an example of
   3.130 -Xen's internal scheduler API.
   3.131 -
   3.132 -\paragraph*{Note: SMP host support}
   3.133 -Xen has always supported SMP host systems.  Domains are statically assigned to
   3.134 -CPUs, either at creation time or when manually pinning to a particular CPU.
   3.135 -The current schedulers then run locally on each CPU to decide which of the
   3.136 -assigned domains should be run there. The user-level control software 
   3.137 -can be used to perform coarse-grain load-balancing between CPUs. 
   3.138 -
   3.139 -
   3.140 -%% More information on the characteristics and use of these schedulers is
   3.141 -%% available in {\tt Sched-HOWTO.txt}.
   3.142 -
   3.143 -
   3.144 -\section{Privileged operations}
   3.145 -
   3.146 -Xen exports an extended interface to privileged domains (viz.\ {\it
   3.147 -  Domain 0}). This allows such domains to build and boot other domains 
   3.148 -on the server, and provides control interfaces for managing 
   3.149 -scheduling, memory, networking, and block devices. 
   3.150 -
   3.151 -
   3.152 -\chapter{Memory}
   3.153 -\label{c:memory} 
   3.154 -
   3.155 -Xen is responsible for managing the allocation of physical memory to
   3.156 -domains, and for ensuring safe use of the paging and segmentation
   3.157 -hardware.
   3.158 -
   3.159 -
   3.160 -\section{Memory Allocation}
   3.161 -
   3.162 -
   3.163 -Xen resides within a small fixed portion of physical memory; it also
   3.164 -reserves the top 64MB of every virtual address space. The remaining
   3.165 -physical memory is available for allocation to domains at a page
   3.166 -granularity.  Xen tracks the ownership and use of each page, which
   3.167 -allows it to enforce secure partitioning between domains.
   3.168 -
   3.169 -Each domain has a maximum and current physical memory allocation. 
   3.170 -A guest OS may run a `balloon driver' to dynamically adjust its 
   3.171 -current memory allocation up to its limit. 
   3.172 -
   3.173 -
   3.174 -%% XXX SMH: I use machine and physical in the next section (which 
   3.175 -%% is kinda required for consistency with code); wonder if this 
   3.176 -%% section should use same terms? 
   3.177 -%%
   3.178 -%% Probably. 
   3.179 -%%
   3.180 -%% Merging this and below section at some point prob makes sense. 
   3.181 -
   3.182 -\section{Pseudo-Physical Memory}
   3.183 -
   3.184 -Since physical memory is allocated and freed on a page granularity,
   3.185 -there is no guarantee that a domain will receive a contiguous stretch
   3.186 -of physical memory. However most operating systems do not have good
   3.187 -support for operating in a fragmented physical address space. To aid
   3.188 -porting such operating systems to run on top of Xen, we make a
   3.189 -distinction between \emph{machine memory} and \emph{pseudo-physical
   3.190 -memory}.
   3.191 -
   3.192 -Put simply, machine memory refers to the entire amount of memory
   3.193 -installed in the machine, including that reserved by Xen, in use by
   3.194 -various domains, or currently unallocated. We consider machine memory
   3.195 -to comprise a set of 4K \emph{machine page frames} numbered
   3.196 -consecutively starting from 0. Machine frame numbers mean the same
   3.197 -within Xen or any domain.
   3.198 -
   3.199 -Pseudo-physical memory, on the other hand, is a per-domain
   3.200 -abstraction. It allows a guest operating system to consider its memory
   3.201 -allocation to consist of a contiguous range of physical page frames
   3.202 -starting at physical frame 0, despite the fact that the underlying
   3.203 -machine page frames may be sparsely allocated and in any order.
   3.204 -
   3.205 -To achieve this, Xen maintains a globally readable {\it
   3.206 -machine-to-physical} table which records the mapping from machine page
   3.207 -frames to pseudo-physical ones. In addition, each domain is supplied
   3.208 -with a {\it physical-to-machine} table which performs the inverse
   3.209 -mapping. Clearly the machine-to-physical table has size proportional
   3.210 -to the amount of RAM installed in the machine, while each
   3.211 -physical-to-machine table has size proportional to the memory
   3.212 -allocation of the given domain.
   3.213 -
   3.214 -Architecture dependent code in guest operating systems can then use
   3.215 -the two tables to provide the abstraction of pseudo-physical
   3.216 -memory. In general, only certain specialized parts of the operating
   3.217 -system (such as page table management) needs to understand the
   3.218 -difference between machine and pseudo-physical addresses.
   3.219 -
   3.220 -\section{Page Table Updates}
   3.221 -
   3.222 -In the default mode of operation, Xen enforces read-only access to
   3.223 -page tables and requires guest operating systems to explicitly request
   3.224 -any modifications.  Xen validates all such requests and only applies
   3.225 -updates that it deems safe.  This is necessary to prevent domains from
   3.226 -adding arbitrary mappings to their page tables.
   3.227 -
   3.228 -To aid validation, Xen associates a type and reference count with each
   3.229 -memory page. A page has one of the following
   3.230 -mutually-exclusive types at any point in time: page directory ({\sf
   3.231 -PD}), page table ({\sf PT}), local descriptor table ({\sf LDT}),
   3.232 -global descriptor table ({\sf GDT}), or writable ({\sf RW}). Note that
   3.233 -a guest OS may always create readable mappings of its own memory 
   3.234 -regardless of its current type. 
   3.235 -%%% XXX: possibly explain more about ref count 'lifecyle' here?
   3.236 -This mechanism is used to
   3.237 -maintain the invariants required for safety; for example, a domain
   3.238 -cannot have a writable mapping to any part of a page table as this
   3.239 -would require the page concerned to simultaneously be of types {\sf
   3.240 -  PT} and {\sf RW}.
   3.241 -
   3.242 -
   3.243 -%\section{Writable Page Tables}
   3.244 -
   3.245 -Xen also provides an alternative mode of operation in which guests be
   3.246 -have the illusion that their page tables are directly writable.  Of
   3.247 -course this is not really the case, since Xen must still validate
   3.248 -modifications to ensure secure partitioning. To this end, Xen traps
   3.249 -any write attempt to a memory page of type {\sf PT} (i.e., that is
   3.250 -currently part of a page table).  If such an access occurs, Xen
   3.251 -temporarily allows write access to that page while at the same time
   3.252 -{\em disconnecting} it from the page table that is currently in
   3.253 -use. This allows the guest to safely make updates to the page because
   3.254 -the newly-updated entries cannot be used by the MMU until Xen
   3.255 -revalidates and reconnects the page.
   3.256 -Reconnection occurs automatically in a number of situations: for
   3.257 -example, when the guest modifies a different page-table page, when the
   3.258 -domain is preempted, or whenever the guest uses Xen's explicit
   3.259 -page-table update interfaces.
   3.260 -
   3.261 -Finally, Xen also supports a form of \emph{shadow page tables} in
   3.262 -which the guest OS uses a independent copy of page tables which are
   3.263 -unknown to the hardware (i.e.\ which are never pointed to by {\tt
   3.264 -cr3}). Instead Xen propagates changes made to the guest's tables to the
   3.265 -real ones, and vice versa. This is useful for logging page writes
   3.266 -(e.g.\ for live migration or checkpoint). A full version of the shadow
   3.267 -page tables also allows guest OS porting with less effort.
   3.268 -
   3.269 -\section{Segment Descriptor Tables}
   3.270 +%% chapter Virtual Architecture moved to architecture.tex
   3.271 +\include{src/interface/architecture}
   3.272  
   3.273 -On boot a guest is supplied with a default GDT, which does not reside
   3.274 -within its own memory allocation.  If the guest wishes to use other
   3.275 -than the default `flat' ring-1 and ring-3 segments that this GDT
   3.276 -provides, it must register a custom GDT and/or LDT with Xen,
   3.277 -allocated from its own memory. Note that a number of GDT 
   3.278 -entries are reserved by Xen -- any custom GDT must also include
   3.279 -sufficient space for these entries. 
   3.280 -
   3.281 -For example, the following hypercall is used to specify a new GDT: 
   3.282 -
   3.283 -\begin{quote}
   3.284 -int {\bf set\_gdt}(unsigned long *{\em frame\_list}, int {\em entries})
   3.285 -
   3.286 -{\em frame\_list}: An array of up to 16 machine page frames within
   3.287 -which the GDT resides.  Any frame registered as a GDT frame may only
   3.288 -be mapped read-only within the guest's address space (e.g., no
   3.289 -writable mappings, no use as a page-table page, and so on).
   3.290 -
   3.291 -{\em entries}: The number of descriptor-entry slots in the GDT.  Note
   3.292 -that the table must be large enough to contain Xen's reserved entries;
   3.293 -thus we must have `{\em entries $>$ LAST\_RESERVED\_GDT\_ENTRY}\ '.
   3.294 -Note also that, after registering the GDT, slots {\em FIRST\_} through
   3.295 -{\em LAST\_RESERVED\_GDT\_ENTRY} are no longer usable by the guest and
   3.296 -may be overwritten by Xen.
   3.297 -\end{quote}
   3.298 -
   3.299 -The LDT is updated via the generic MMU update mechanism (i.e., via 
   3.300 -the {\tt mmu\_update()} hypercall. 
   3.301 -
   3.302 -\section{Start of Day} 
   3.303 -
   3.304 -The start-of-day environment for guest operating systems is rather
   3.305 -different to that provided by the underlying hardware. In particular,
   3.306 -the processor is already executing in protected mode with paging
   3.307 -enabled.
   3.308 -
   3.309 -{\it Domain 0} is created and booted by Xen itself. For all subsequent
   3.310 -domains, the analogue of the boot-loader is the {\it domain builder},
   3.311 -user-space software running in {\it domain 0}. The domain builder 
   3.312 -is responsible for building the initial page tables for a domain  
   3.313 -and loading its kernel image at the appropriate virtual address. 
   3.314 -
   3.315 -
   3.316 -
   3.317 -\chapter{Devices}
   3.318 -\label{c:devices}
   3.319 -
   3.320 -Devices such as network and disk are exported to guests using a
   3.321 -split device driver.  The device driver domain, which accesses the
   3.322 -physical device directly also runs a {\em backend} driver, serving
   3.323 -requests to that device from guests.  Each guest will use a simple
   3.324 -{\em frontend} driver, to access the backend.  Communication between these
   3.325 -domains is composed of two parts:  First, data is placed onto a shared
   3.326 -memory page between the domains.  Second, an event channel between the
   3.327 -two domains is used to pass notification that data is outstanding.
   3.328 -This separation of notification from data transfer allows message
   3.329 -batching, and results in very efficient device access.  
   3.330 -
   3.331 -Event channels are used extensively in device virtualization; each
   3.332 -domain has a number of end-points or \emph{ports} each of which
   3.333 -may be bound to one of the following \emph{event sources}:
   3.334 -\begin{itemize} 
   3.335 -  \item a physical interrupt from a real device, 
   3.336 -  \item a virtual interrupt (callback) from Xen, or 
   3.337 -  \item a signal from another domain 
   3.338 -\end{itemize}
   3.339 -
   3.340 -Events are lightweight and do not carry much information beyond 
   3.341 -the source of the notification. Hence when performing bulk data
   3.342 -transfer, events are typically used as synchronization primitives
   3.343 -over a shared memory transport. Event channels are managed via 
   3.344 -the {\tt event\_channel\_op()} hypercall; for more details see
   3.345 -Section~\ref{s:idc}. 
   3.346 -
   3.347 -This chapter focuses on some individual device interfaces
   3.348 -available to Xen guests. 
   3.349 -
   3.350 -\section{Network I/O}
   3.351 -
   3.352 -Virtual network device services are provided by shared memory
   3.353 -communication with a backend domain.  From the point of view of
   3.354 -other domains, the backend may be viewed as a virtual ethernet switch
   3.355 -element with each domain having one or more virtual network interfaces
   3.356 -connected to it.
   3.357 -
   3.358 -\subsection{Backend Packet Handling}
   3.359 -
   3.360 -The backend driver is responsible for a variety of actions relating to
   3.361 -the transmission and reception of packets from the physical device.
   3.362 -With regard to transmission, the backend performs these key actions:
   3.363 -
   3.364 -\begin{itemize}
   3.365 -\item {\bf Validation:} To ensure that domains do not attempt to
   3.366 -  generate invalid (e.g. spoofed) traffic, the backend driver may
   3.367 -  validate headers ensuring that source MAC and IP addresses match the
   3.368 -  interface that they have been sent from.
   3.369 -
   3.370 -  Validation functions can be configured using standard firewall rules
   3.371 -  ({\small{\tt iptables}} in the case of Linux).
   3.372 -  
   3.373 -\item {\bf Scheduling:} Since a number of domains can share a single
   3.374 -  physical network interface, the backend must mediate access when
   3.375 -  several domains each have packets queued for transmission.  This
   3.376 -  general scheduling function subsumes basic shaping or rate-limiting
   3.377 -  schemes.
   3.378 -  
   3.379 -\item {\bf Logging and Accounting:} The backend domain can be
   3.380 -  configured with classifier rules that control how packets are
   3.381 -  accounted or logged.  For example, log messages might be generated
   3.382 -  whenever a domain attempts to send a TCP packet containing a SYN.
   3.383 -\end{itemize}
   3.384 -
   3.385 -On receipt of incoming packets, the backend acts as a simple
   3.386 -demultiplexer:  Packets are passed to the appropriate virtual
   3.387 -interface after any necessary logging and accounting have been carried
   3.388 -out.
   3.389 -
   3.390 -\subsection{Data Transfer}
   3.391 -
   3.392 -Each virtual interface uses two ``descriptor rings'', one for transmit,
   3.393 -the other for receive.  Each descriptor identifies a block of contiguous
   3.394 -physical memory allocated to the domain.  
   3.395 -
   3.396 -The transmit ring carries packets to transmit from the guest to the
   3.397 -backend domain.  The return path of the transmit ring carries messages
   3.398 -indicating that the contents have been physically transmitted and the
   3.399 -backend no longer requires the associated pages of memory.
   3.400 +%% chapter Memory moved to memory.tex
   3.401 +\include{src/interface/memory}
   3.402  
   3.403 -To receive packets, the guest places descriptors of unused pages on
   3.404 -the receive ring.  The backend will return received packets by
   3.405 -exchanging these pages in the domain's memory with new pages
   3.406 -containing the received data, and passing back descriptors regarding
   3.407 -the new packets on the ring.  This zero-copy approach allows the
   3.408 -backend to maintain a pool of free pages to receive packets into, and
   3.409 -then deliver them to appropriate domains after examining their
   3.410 -headers.
   3.411 -
   3.412 -%
   3.413 -%Real physical addresses are used throughout, with the domain performing 
   3.414 -%translation from pseudo-physical addresses if that is necessary.
   3.415 -
   3.416 -If a domain does not keep its receive ring stocked with empty buffers then 
   3.417 -packets destined to it may be dropped.  This provides some defence against 
   3.418 -receive livelock problems because an overload domain will cease to receive
   3.419 -further data.  Similarly, on the transmit path, it provides the application
   3.420 -with feedback on the rate at which packets are able to leave the system.
   3.421 -
   3.422 -
   3.423 -Flow control on rings is achieved by including a pair of producer
   3.424 -indexes on the shared ring page.  Each side will maintain a private
   3.425 -consumer index indicating the next outstanding message.  In this
   3.426 -manner, the domains cooperate to divide the ring into two message
   3.427 -lists, one in each direction.  Notification is decoupled from the
   3.428 -immediate placement of new messages on the ring; the event channel
   3.429 -will be used to generate notification when {\em either} a certain
   3.430 -number of outstanding messages are queued, {\em or} a specified number
   3.431 -of nanoseconds have elapsed since the oldest message was placed on the
   3.432 -ring.
   3.433 -
   3.434 -% Not sure if my version is any better -- here is what was here before:
   3.435 -%% Synchronization between the backend domain and the guest is achieved using 
   3.436 -%% counters held in shared memory that is accessible to both.  Each ring has
   3.437 -%% associated producer and consumer indices indicating the area in the ring
   3.438 -%% that holds descriptors that contain data.  After receiving {\it n} packets
   3.439 -%% or {\t nanoseconds} after receiving the first packet, the hypervisor sends
   3.440 -%% an event to the domain. 
   3.441 -
   3.442 -\section{Block I/O}
   3.443 -
   3.444 -All guest OS disk access goes through the virtual block device VBD
   3.445 -interface.  This interface allows domains access to portions of block
   3.446 -storage devices visible to the the block backend device.  The VBD
   3.447 -interface is a split driver, similar to the network interface
   3.448 -described above.  A single shared memory ring is used between the
   3.449 -frontend and backend drivers, across which read and write messages are
   3.450 -sent.
   3.451 -
   3.452 -Any block device accessible to the backend domain, including
   3.453 -network-based block (iSCSI, *NBD, etc), loopback and LVM/MD devices,
   3.454 -can be exported as a VBD.  Each VBD is mapped to a device node in the
   3.455 -guest, specified in the guest's startup configuration.
   3.456 -
   3.457 -Old (Xen 1.2) virtual disks are not supported under Xen 2.0, since
   3.458 -similar functionality can be achieved using the more complete LVM
   3.459 -system, which is already in widespread use.
   3.460 -
   3.461 -\subsection{Data Transfer}
   3.462 -
   3.463 -The single ring between the guest and the block backend supports three
   3.464 -messages:
   3.465 -
   3.466 -\begin{description}
   3.467 -\item [{\small {\tt PROBE}}:] Return a list of the VBDs available to this guest
   3.468 -  from the backend.  The request includes a descriptor of a free page
   3.469 -  into which the reply will be written by the backend.
   3.470 +%% chapter Devices moved to devices.tex
   3.471 +\include{src/interface/devices}
   3.472  
   3.473 -\item [{\small {\tt READ}}:] Read data from the specified block device.  The
   3.474 -  front end identifies the device and location to read from and
   3.475 -  attaches pages for the data to be copied to (typically via DMA from
   3.476 -  the device).  The backend acknowledges completed read requests as
   3.477 -  they finish.
   3.478 -
   3.479 -\item [{\small {\tt WRITE}}:] Write data to the specified block device.  This
   3.480 -  functions essentially as {\small {\tt READ}}, except that the data moves to
   3.481 -  the device instead of from it.
   3.482 -\end{description}
   3.483 -
   3.484 -% um... some old text
   3.485 -%% In overview, the same style of descriptor-ring that is used for
   3.486 -%% network packets is used here.  Each domain has one ring that carries
   3.487 -%% operation requests to the hypervisor and carries the results back
   3.488 -%% again.
   3.489 -
   3.490 -%% Rather than copying data, the backend simply maps the domain's buffers
   3.491 -%% in order to enable direct DMA to them.  The act of mapping the buffers
   3.492 -%% also increases the reference counts of the underlying pages, so that
   3.493 -%% the unprivileged domain cannot try to return them to the hypervisor,
   3.494 -%% install them as page tables, or any other unsafe behaviour.
   3.495 -%% %block API here 
   3.496 -
   3.497 -
   3.498 -\chapter{Further Information} 
   3.499 -
   3.500 -
   3.501 -If you have questions that are not answered by this manual, the
   3.502 -sources of information listed below may be of interest to you.  Note
   3.503 -that bug reports, suggestions and contributions related to the
   3.504 -software (or the documentation) should be sent to the Xen developers'
   3.505 -mailing list (address below).
   3.506 -
   3.507 -\section{Other documentation}
   3.508 -
   3.509 -If you are mainly interested in using (rather than developing for)
   3.510 -Xen, the {\em Xen Users' Manual} is distributed in the {\tt docs/}
   3.511 -directory of the Xen source distribution.  
   3.512 -
   3.513 -% Various HOWTOs are also available in {\tt docs/HOWTOS}.
   3.514 -
   3.515 -\section{Online references}
   3.516 -
   3.517 -The official Xen web site is found at:
   3.518 -\begin{quote}
   3.519 -{\tt http://www.cl.cam.ac.uk/Research/SRG/netos/xen/}
   3.520 -\end{quote}
   3.521 -
   3.522 -This contains links to the latest versions of all on-line 
   3.523 -documentation. 
   3.524 -
   3.525 -\section{Mailing lists}
   3.526 -
   3.527 -There are currently four official Xen mailing lists:
   3.528 -
   3.529 -\begin{description}
   3.530 -\item[xen-devel@lists.xensource.com] Used for development
   3.531 -discussions and bug reports.  Subscribe at: \\
   3.532 -{\small {\tt http://lists.xensource.com/xen-devel}}
   3.533 -\item[xen-users@lists.xensource.com] Used for installation and usage
   3.534 -discussions and requests for help.  Subscribe at: \\
   3.535 -{\small {\tt http://lists.xensource.com/xen-users}}
   3.536 -\item[xen-announce@lists.xensource.com] Used for announcements only.
   3.537 -Subscribe at: \\
   3.538 -{\small {\tt http://lists.xensource.com/xen-announce}}
   3.539 -\item[xen-changelog@lists.xensource.com]  Changelog feed
   3.540 -from the unstable and 2.0 trees - developer oriented.  Subscribe at: \\
   3.541 -{\small {\tt http://lists.xensource.com/xen-changelog}}
   3.542 -\end{description}
   3.543 -
   3.544 -Of these, xen-devel is the most active.
   3.545 -
   3.546 -
   3.547 +%% chapter Further Information moved to further_info.tex
   3.548 +\include{src/interface/further_info}
   3.549  
   3.550  
   3.551  \appendix
   3.552  
   3.553 -%\newcommand{\hypercall}[1]{\vspace{5mm}{\large\sf #1}}
   3.554 -
   3.555 -
   3.556 -
   3.557 -
   3.558 -
   3.559 -\newcommand{\hypercall}[1]{\vspace{2mm}{\sf #1}}
   3.560 -
   3.561 -
   3.562 -
   3.563 -
   3.564 -
   3.565 -
   3.566 -\chapter{Xen Hypercalls}
   3.567 -\label{a:hypercalls}
   3.568 -
   3.569 -Hypercalls represent the procedural interface to Xen; this appendix 
   3.570 -categorizes and describes the current set of hypercalls. 
   3.571 -
   3.572 -\section{Invoking Hypercalls} 
   3.573 -
   3.574 -Hypercalls are invoked in a manner analogous to system calls in a
   3.575 -conventional operating system; a software interrupt is issued which
   3.576 -vectors to an entry point within Xen. On x86\_32 machines the
   3.577 -instruction required is {\tt int \$82}; the (real) IDT is setup so
   3.578 -that this may only be issued from within ring 1. The particular 
   3.579 -hypercall to be invoked is contained in {\tt EAX} --- a list 
   3.580 -mapping these values to symbolic hypercall names can be found 
   3.581 -in {\tt xen/include/public/xen.h}. 
   3.582 -
   3.583 -On some occasions a set of hypercalls will be required to carry
   3.584 -out a higher-level function; a good example is when a guest 
   3.585 -operating wishes to context switch to a new process which 
   3.586 -requires updating various privileged CPU state. As an optimization
   3.587 -for these cases, there is a generic mechanism to issue a set of 
   3.588 -hypercalls as a batch: 
   3.589 -
   3.590 -\begin{quote}
   3.591 -\hypercall{multicall(void *call\_list, int nr\_calls)}
   3.592 -
   3.593 -Execute a series of hypervisor calls; {\tt nr\_calls} is the length of
   3.594 -the array of {\tt multicall\_entry\_t} structures pointed to be {\tt
   3.595 -call\_list}. Each entry contains the hypercall operation code followed
   3.596 -by up to 7 word-sized arguments.
   3.597 -\end{quote}
   3.598 -
   3.599 -Note that multicalls are provided purely as an optimization; there is
   3.600 -no requirement to use them when first porting a guest operating
   3.601 -system.
   3.602 -
   3.603 -
   3.604 -\section{Virtual CPU Setup} 
   3.605 -
   3.606 -At start of day, a guest operating system needs to setup the virtual
   3.607 -CPU it is executing on. This includes installing vectors for the
   3.608 -virtual IDT so that the guest OS can handle interrupts, page faults,
   3.609 -etc. However the very first thing a guest OS must setup is a pair 
   3.610 -of hypervisor callbacks: these are the entry points which Xen will
   3.611 -use when it wishes to notify the guest OS of an occurrence. 
   3.612 -
   3.613 -\begin{quote}
   3.614 -\hypercall{set\_callbacks(unsigned long event\_selector, unsigned long
   3.615 -  event\_address, unsigned long failsafe\_selector, unsigned long
   3.616 -  failsafe\_address) }
   3.617 -
   3.618 -Register the normal (``event'') and failsafe callbacks for 
   3.619 -event processing. In each case the code segment selector and 
   3.620 -address within that segment are provided. The selectors must
   3.621 -have RPL 1; in XenLinux we simply use the kernel's CS for both 
   3.622 -{\tt event\_selector} and {\tt failsafe\_selector}.
   3.623 -
   3.624 -The value {\tt event\_address} specifies the address of the guest OSes
   3.625 -event handling and dispatch routine; the {\tt failsafe\_address}
   3.626 -specifies a separate entry point which is used only if a fault occurs
   3.627 -when Xen attempts to use the normal callback. 
   3.628 -\end{quote} 
   3.629 -
   3.630 -
   3.631 -After installing the hypervisor callbacks, the guest OS can 
   3.632 -install a `virtual IDT' by using the following hypercall: 
   3.633 -
   3.634 -\begin{quote} 
   3.635 -\hypercall{set\_trap\_table(trap\_info\_t *table)} 
   3.636 -
   3.637 -Install one or more entries into the per-domain 
   3.638 -trap handler table (essentially a software version of the IDT). 
   3.639 -Each entry in the array pointed to by {\tt table} includes the 
   3.640 -exception vector number with the corresponding segment selector 
   3.641 -and entry point. Most guest OSes can use the same handlers on 
   3.642 -Xen as when running on the real hardware; an exception is the 
   3.643 -page fault handler (exception vector 14) where a modified 
   3.644 -stack-frame layout is used. 
   3.645 -
   3.646 -
   3.647 -\end{quote} 
   3.648 -
   3.649 -
   3.650 -
   3.651 -\section{Scheduling and Timer}
   3.652 -
   3.653 -Domains are preemptively scheduled by Xen according to the 
   3.654 -parameters installed by domain 0 (see Section~\ref{s:dom0ops}). 
   3.655 -In addition, however, a domain may choose to explicitly 
   3.656 -control certain behavior with the following hypercall: 
   3.657 -
   3.658 -\begin{quote} 
   3.659 -\hypercall{sched\_op(unsigned long op)} 
   3.660 -
   3.661 -Request scheduling operation from hypervisor. The options are: {\it
   3.662 -yield}, {\it block}, and {\it shutdown}.  {\it yield} keeps the
   3.663 -calling domain runnable but may cause a reschedule if other domains
   3.664 -are runnable.  {\it block} removes the calling domain from the run
   3.665 -queue and cause is to sleeps until an event is delivered to it.  {\it
   3.666 -shutdown} is used to end the domain's execution; the caller can
   3.667 -additionally specify whether the domain should reboot, halt or
   3.668 -suspend.
   3.669 -\end{quote} 
   3.670 -
   3.671 -To aid the implementation of a process scheduler within a guest OS,
   3.672 -Xen provides a virtual programmable timer:
   3.673 -
   3.674 -\begin{quote}
   3.675 -\hypercall{set\_timer\_op(uint64\_t timeout)} 
   3.676 -
   3.677 -Request a timer event to be sent at the specified system time (time 
   3.678 -in nanoseconds since system boot). The hypercall actually passes the 
   3.679 -64-bit timeout value as a pair of 32-bit values. 
   3.680 -
   3.681 -\end{quote} 
   3.682 -
   3.683 -Note that calling {\tt set\_timer\_op()} prior to {\tt sched\_op} 
   3.684 -allows block-with-timeout semantics. 
   3.685 -
   3.686 -
   3.687 -\section{Page Table Management} 
   3.688 -
   3.689 -Since guest operating systems have read-only access to their page 
   3.690 -tables, Xen must be involved when making any changes. The following
   3.691 -multi-purpose hypercall can be used to modify page-table entries, 
   3.692 -update the machine-to-physical mapping table, flush the TLB, install 
   3.693 -a new page-table base pointer, and more.
   3.694 -
   3.695 -\begin{quote} 
   3.696 -\hypercall{mmu\_update(mmu\_update\_t *req, int count, int *success\_count)} 
   3.697 -
   3.698 -Update the page table for the domain; a set of {\tt count} updates are
   3.699 -submitted for processing in a batch, with {\tt success\_count} being 
   3.700 -updated to report the number of successful updates.  
   3.701 -
   3.702 -Each element of {\tt req[]} contains a pointer (address) and value; 
   3.703 -the least significant 2-bits of the pointer are used to distinguish 
   3.704 -the type of update requested as follows:
   3.705 -\begin{description} 
   3.706 -
   3.707 -\item[\it MMU\_NORMAL\_PT\_UPDATE:] update a page directory entry or
   3.708 -page table entry to the associated value; Xen will check that the
   3.709 -update is safe, as described in Chapter~\ref{c:memory}.
   3.710 -
   3.711 -\item[\it MMU\_MACHPHYS\_UPDATE:] update an entry in the
   3.712 -  machine-to-physical table. The calling domain must own the machine
   3.713 -  page in question (or be privileged).
   3.714 -
   3.715 -\item[\it MMU\_EXTENDED\_COMMAND:] perform additional MMU operations.
   3.716 -The set of additional MMU operations is considerable, and includes
   3.717 -updating {\tt cr3} (or just re-installing it for a TLB flush),
   3.718 -flushing the cache, installing a new LDT, or pinning \& unpinning
   3.719 -page-table pages (to ensure their reference count doesn't drop to zero
   3.720 -which would require a revalidation of all entries).
   3.721 -
   3.722 -Further extended commands are used to deal with granting and 
   3.723 -acquiring page ownership; see Section~\ref{s:idc}. 
   3.724 -
   3.725 -
   3.726 -\end{description}
   3.727 -
   3.728 -More details on the precise format of all commands can be 
   3.729 -found in {\tt xen/include/public/xen.h}. 
   3.730 -
   3.731 -
   3.732 -\end{quote}
   3.733 -
   3.734 -Explicitly updating batches of page table entries is extremely
   3.735 -efficient, but can require a number of alterations to the guest
   3.736 -OS. Using the writable page table mode (Chapter~\ref{c:memory}) is
   3.737 -recommended for new OS ports.
   3.738 -
   3.739 -Regardless of which page table update mode is being used, however,
   3.740 -there are some occasions (notably handling a demand page fault) where
   3.741 -a guest OS will wish to modify exactly one PTE rather than a
   3.742 -batch. This is catered for by the following:
   3.743 -
   3.744 -\begin{quote} 
   3.745 -\hypercall{update\_va\_mapping(unsigned long page\_nr, unsigned long
   3.746 -val, \\ unsigned long flags)}
   3.747 -
   3.748 -Update the currently installed PTE for the page {\tt page\_nr} to 
   3.749 -{\tt val}. As with {\tt mmu\_update()}, Xen checks the modification 
   3.750 -is safe before applying it. The {\tt flags} determine which kind
   3.751 -of TLB flush, if any, should follow the update. 
   3.752 -
   3.753 -\end{quote} 
   3.754 -
   3.755 -Finally, sufficiently privileged domains may occasionally wish to manipulate 
   3.756 -the pages of others: 
   3.757 -\begin{quote}
   3.758 -
   3.759 -\hypercall{update\_va\_mapping\_otherdomain(unsigned long page\_nr,
   3.760 -unsigned long val, unsigned long flags, uint16\_t domid)}
   3.761 -
   3.762 -Identical to {\tt update\_va\_mapping()} save that the pages being
   3.763 -mapped must belong to the domain {\tt domid}. 
   3.764 -
   3.765 -\end{quote}
   3.766 -
   3.767 -This privileged operation is currently used by backend virtual device
   3.768 -drivers to safely map pages containing I/O data. 
   3.769 -
   3.770 -
   3.771 -
   3.772 -\section{Segmentation Support}
   3.773 -
   3.774 -Xen allows guest OSes to install a custom GDT if they require it; 
   3.775 -this is context switched transparently whenever a domain is 
   3.776 -[de]scheduled.  The following hypercall is effectively a 
   3.777 -`safe' version of {\tt lgdt}: 
   3.778 -
   3.779 -\begin{quote}
   3.780 -\hypercall{set\_gdt(unsigned long *frame\_list, int entries)} 
   3.781 -
   3.782 -Install a global descriptor table for a domain; {\tt frame\_list} is
   3.783 -an array of up to 16 machine page frames within which the GDT resides,
   3.784 -with {\tt entries} being the actual number of descriptor-entry
   3.785 -slots. All page frames must be mapped read-only within the guest's
   3.786 -address space, and the table must be large enough to contain Xen's
   3.787 -reserved entries (see {\tt xen/include/public/arch-x86\_32.h}).
   3.788 -
   3.789 -\end{quote}
   3.790 -
   3.791 -Many guest OSes will also wish to install LDTs; this is achieved by
   3.792 -using {\tt mmu\_update()} with an extended command, passing the
   3.793 -linear address of the LDT base along with the number of entries. No
   3.794 -special safety checks are required; Xen needs to perform this task
   3.795 -simply since {\tt lldt} requires CPL 0.
   3.796 -
   3.797 -
   3.798 -Xen also allows guest operating systems to update just an 
   3.799 -individual segment descriptor in the GDT or LDT:  
   3.800 -
   3.801 -\begin{quote}
   3.802 -\hypercall{update\_descriptor(unsigned long ma, unsigned long word1,
   3.803 -unsigned long word2)}
   3.804 -
   3.805 -Update the GDT/LDT entry at machine address {\tt ma}; the new
   3.806 -8-byte descriptor is stored in {\tt word1} and {\tt word2}.
   3.807 -Xen performs a number of checks to ensure the descriptor is 
   3.808 -valid. 
   3.809 -
   3.810 -\end{quote}
   3.811 -
   3.812 -Guest OSes can use the above in place of context switching entire 
   3.813 -LDTs (or the GDT) when the number of changing descriptors is small. 
   3.814 -
   3.815 -\section{Context Switching} 
   3.816 -
   3.817 -When a guest OS wishes to context switch between two processes, 
   3.818 -it can use the page table and segmentation hypercalls described
   3.819 -above to perform the the bulk of the privileged work. In addition, 
   3.820 -however, it will need to invoke Xen to switch the kernel (ring 1) 
   3.821 -stack pointer: 
   3.822 -
   3.823 -\begin{quote} 
   3.824 -\hypercall{stack\_switch(unsigned long ss, unsigned long esp)} 
   3.825 -
   3.826 -Request kernel stack switch from hypervisor; {\tt ss} is the new 
   3.827 -stack segment, which {\tt esp} is the new stack pointer. 
   3.828 -
   3.829 -\end{quote} 
   3.830 -
   3.831 -A final useful hypercall for context switching allows ``lazy'' 
   3.832 -save and restore of floating point state: 
   3.833 -
   3.834 -\begin{quote}
   3.835 -\hypercall{fpu\_taskswitch(void)} 
   3.836 -
   3.837 -This call instructs Xen to set the {\tt TS} bit in the {\tt cr0}
   3.838 -control register; this means that the next attempt to use floating
   3.839 -point will cause a trap which the guest OS can trap. Typically it will
   3.840 -then save/restore the FP state, and clear the {\tt TS} bit. 
   3.841 -\end{quote} 
   3.842 -
   3.843 -This is provided as an optimization only; guest OSes can also choose
   3.844 -to save and restore FP state on all context switches for simplicity. 
   3.845 -
   3.846 -
   3.847 -\section{Physical Memory Management}
   3.848 -
   3.849 -As mentioned previously, each domain has a maximum and current 
   3.850 -memory allocation. The maximum allocation, set at domain creation 
   3.851 -time, cannot be modified. However a domain can choose to reduce 
   3.852 -and subsequently grow its current allocation by using the
   3.853 -following call: 
   3.854 -
   3.855 -\begin{quote} 
   3.856 -\hypercall{dom\_mem\_op(unsigned int op, unsigned long *extent\_list,
   3.857 -  unsigned long nr\_extents, unsigned int extent\_order)}
   3.858 -
   3.859 -Increase or decrease current memory allocation (as determined by 
   3.860 -the value of {\tt op}). Each invocation provides a list of 
   3.861 -extents each of which is $2^s$ pages in size, 
   3.862 -where $s$ is the value of {\tt extent\_order}. 
   3.863 -
   3.864 -\end{quote} 
   3.865 -
   3.866 -In addition to simply reducing or increasing the current memory
   3.867 -allocation via a `balloon driver', this call is also useful for 
   3.868 -obtaining contiguous regions of machine memory when required (e.g. 
   3.869 -for certain PCI devices, or if using superpages).  
   3.870 -
   3.871 -
   3.872 -\section{Inter-Domain Communication}
   3.873 -\label{s:idc} 
   3.874 -
   3.875 -Xen provides a simple asynchronous notification mechanism via
   3.876 -\emph{event channels}. Each domain has a set of end-points (or
   3.877 -\emph{ports}) which may be bound to an event source (e.g. a physical
   3.878 -IRQ, a virtual IRQ, or an port in another domain). When a pair of
   3.879 -end-points in two different domains are bound together, then a `send'
   3.880 -operation on one will cause an event to be received by the destination
   3.881 -domain.
   3.882 -
   3.883 -The control and use of event channels involves the following hypercall: 
   3.884 -
   3.885 -\begin{quote}
   3.886 -\hypercall{event\_channel\_op(evtchn\_op\_t *op)} 
   3.887 -
   3.888 -Inter-domain event-channel management; {\tt op} is a discriminated 
   3.889 -union which allows the following 7 operations: 
   3.890 -
   3.891 -\begin{description} 
   3.892 -
   3.893 -\item[\it alloc\_unbound:] allocate a free (unbound) local
   3.894 -  port and prepare for connection from a specified domain. 
   3.895 -\item[\it bind\_virq:] bind a local port to a virtual 
   3.896 -IRQ; any particular VIRQ can be bound to at most one port per domain. 
   3.897 -\item[\it bind\_pirq:] bind a local port to a physical IRQ;
   3.898 -once more, a given pIRQ can be bound to at most one port per
   3.899 -domain. Furthermore the calling domain must be sufficiently
   3.900 -privileged.
   3.901 -\item[\it bind\_interdomain:] construct an interdomain event 
   3.902 -channel; in general, the target domain must have previously allocated 
   3.903 -an unbound port for this channel, although this can be bypassed by 
   3.904 -privileged domains during domain setup. 
   3.905 -\item[\it close:] close an interdomain event channel. 
   3.906 -\item[\it send:] send an event to the remote end of a 
   3.907 -interdomain event channel. 
   3.908 -\item[\it status:] determine the current status of a local port. 
   3.909 -\end{description} 
   3.910 -
   3.911 -For more details see
   3.912 -{\tt xen/include/public/event\_channel.h}. 
   3.913 -
   3.914 -\end{quote} 
   3.915 -
   3.916 -Event channels are the fundamental communication primitive between 
   3.917 -Xen domains and seamlessly support SMP. However they provide little
   3.918 -bandwidth for communication {\sl per se}, and hence are typically 
   3.919 -married with a piece of shared memory to produce effective and 
   3.920 -high-performance inter-domain communication. 
   3.921 -
   3.922 -Safe sharing of memory pages between guest OSes is carried out by
   3.923 -granting access on a per page basis to individual domains. This is
   3.924 -achieved by using the {\tt grant\_table\_op()} hypercall.
   3.925 -
   3.926 -\begin{quote}
   3.927 -\hypercall{grant\_table\_op(unsigned int cmd, void *uop, unsigned int count)}
   3.928 -
   3.929 -Grant or remove access to a particular page to a particular domain. 
   3.930 -
   3.931 -\end{quote} 
   3.932 -
   3.933 -This is not currently widely in use by guest operating systems, but 
   3.934 -we intend to integrate support more fully in the near future. 
   3.935 -
   3.936 -\section{PCI Configuration} 
   3.937 -
   3.938 -Domains with physical device access (i.e.\ driver domains) receive
   3.939 -limited access to certain PCI devices (bus address space and
   3.940 -interrupts). However many guest operating systems attempt to 
   3.941 -determine the PCI configuration by directly access the PCI BIOS, 
   3.942 -which cannot be allowed for safety. 
   3.943 -
   3.944 -Instead, Xen provides the following hypercall: 
   3.945 -
   3.946 -\begin{quote}
   3.947 -\hypercall{physdev\_op(void *physdev\_op)}
   3.948 -
   3.949 -Perform a PCI configuration option; depending on the value 
   3.950 -of {\tt physdev\_op} this can be a PCI config read, a PCI config 
   3.951 -write, or a small number of other queries. 
   3.952 -
   3.953 -\end{quote} 
   3.954 -
   3.955 -
   3.956 -For examples of using {\tt physdev\_op()}, see the 
   3.957 -Xen-specific PCI code in the linux sparse tree. 
   3.958 -
   3.959 -\section{Administrative Operations}
   3.960 -\label{s:dom0ops}
   3.961 -
   3.962 -A large number of control operations are available to a sufficiently
   3.963 -privileged domain (typically domain 0). These allow the creation and
   3.964 -management of new domains, for example. A complete list is given 
   3.965 -below: for more details on any or all of these, please see 
   3.966 -{\tt xen/include/public/dom0\_ops.h} 
   3.967 -
   3.968 -
   3.969 -\begin{quote}
   3.970 -\hypercall{dom0\_op(dom0\_op\_t *op)} 
   3.971 -
   3.972 -Administrative domain operations for domain management. The options are:
   3.973 -
   3.974 -\begin{description} 
   3.975 -\item [\it DOM0\_CREATEDOMAIN:] create a new domain
   3.976 -
   3.977 -\item [\it DOM0\_PAUSEDOMAIN:] remove a domain from the scheduler run 
   3.978 -queue. 
   3.979 -
   3.980 -\item [\it DOM0\_UNPAUSEDOMAIN:] mark a paused domain as schedulable
   3.981 -  once again. 
   3.982 -
   3.983 -\item [\it DOM0\_DESTROYDOMAIN:] deallocate all resources associated
   3.984 -with a domain
   3.985 -
   3.986 -\item [\it DOM0\_GETMEMLIST:] get list of pages used by the domain
   3.987 -
   3.988 -\item [\it DOM0\_SCHEDCTL:]
   3.989 -
   3.990 -\item [\it DOM0\_ADJUSTDOM:] adjust scheduling priorities for domain
   3.991 -
   3.992 -\item [\it DOM0\_BUILDDOMAIN:] do final guest OS setup for domain
   3.993 -
   3.994 -\item [\it DOM0\_GETDOMAINFO:] get statistics about the domain
   3.995 -
   3.996 -\item [\it DOM0\_GETPAGEFRAMEINFO:] 
   3.997 -
   3.998 -\item [\it DOM0\_GETPAGEFRAMEINFO2:]
   3.999 -
  3.1000 -\item [\it DOM0\_IOPL:] set I/O privilege level
  3.1001 -
  3.1002 -\item [\it DOM0\_MSR:] read or write model specific registers
  3.1003 -
  3.1004 -\item [\it DOM0\_DEBUG:] interactively invoke the debugger
  3.1005 -
  3.1006 -\item [\it DOM0\_SETTIME:] set system time
  3.1007 -
  3.1008 -\item [\it DOM0\_READCONSOLE:] read console content from hypervisor buffer ring
  3.1009 -
  3.1010 -\item [\it DOM0\_PINCPUDOMAIN:] pin domain to a particular CPU
  3.1011 -
  3.1012 -\item [\it DOM0\_GETTBUFS:] get information about the size and location of
  3.1013 -                      the trace buffers (only on trace-buffer enabled builds)
  3.1014 -
  3.1015 -\item [\it DOM0\_PHYSINFO:] get information about the host machine
  3.1016 -
  3.1017 -\item [\it DOM0\_PCIDEV\_ACCESS:] modify PCI device access permissions
  3.1018 -
  3.1019 -\item [\it DOM0\_SCHED\_ID:] get the ID of the current Xen scheduler
  3.1020 -
  3.1021 -\item [\it DOM0\_SHADOW\_CONTROL:] switch between shadow page-table modes
  3.1022 -
  3.1023 -\item [\it DOM0\_SETDOMAININITIALMEM:] set initial memory allocation of a domain
  3.1024 -
  3.1025 -\item [\it DOM0\_SETDOMAINMAXMEM:] set maximum memory allocation of a domain
  3.1026 -
  3.1027 -\item [\it DOM0\_SETDOMAINVMASSIST:] set domain VM assist options
  3.1028 -\end{description} 
  3.1029 -\end{quote} 
  3.1030 -
  3.1031 -Most of the above are best understood by looking at the code 
  3.1032 -implementing them (in {\tt xen/common/dom0\_ops.c}) and in 
  3.1033 -the user-space tools that use them (mostly in {\tt tools/libxc}). 
  3.1034 -
  3.1035 -\section{Debugging Hypercalls} 
  3.1036 -
  3.1037 -A few additional hypercalls are mainly useful for debugging: 
  3.1038 -
  3.1039 -\begin{quote} 
  3.1040 -\hypercall{console\_io(int cmd, int count, char *str)}
  3.1041 -
  3.1042 -Use Xen to interact with the console; operations are:
  3.1043 -
  3.1044 -{\it CONSOLEIO\_write}: Output count characters from buffer str.
  3.1045 -
  3.1046 -{\it CONSOLEIO\_read}: Input at most count characters into buffer str.
  3.1047 -\end{quote} 
  3.1048 -
  3.1049 -A pair of hypercalls allows access to the underlying debug registers: 
  3.1050 -\begin{quote}
  3.1051 -\hypercall{set\_debugreg(int reg, unsigned long value)}
  3.1052 -
  3.1053 -Set debug register {\tt reg} to {\tt value} 
  3.1054 -
  3.1055 -\hypercall{get\_debugreg(int reg)}
  3.1056 -
  3.1057 -Return the contents of the debug register {\tt reg}
  3.1058 -\end{quote}
  3.1059 -
  3.1060 -And finally: 
  3.1061 -\begin{quote}
  3.1062 -\hypercall{xen\_version(int cmd)}
  3.1063 -
  3.1064 -Request Xen version number.
  3.1065 -\end{quote} 
  3.1066 -
  3.1067 -This is useful to ensure that user-space tools are in sync 
  3.1068 -with the underlying hypervisor. 
  3.1069 -
  3.1070 -\section{Deprecated Hypercalls}
  3.1071 -
  3.1072 -Xen is under constant development and refinement; as such there 
  3.1073 -are plans to improve the way in which various pieces of functionality 
  3.1074 -are exposed to guest OSes. 
  3.1075 -
  3.1076 -\begin{quote} 
  3.1077 -\hypercall{vm\_assist(unsigned int cmd, unsigned int type)}
  3.1078 -
  3.1079 -Toggle various memory management modes (in particular wrritable page
  3.1080 -tables and superpage support). 
  3.1081 -
  3.1082 -\end{quote} 
  3.1083 -
  3.1084 -This is likely to be replaced with mode values in the shared 
  3.1085 -information page since this is more resilient for resumption 
  3.1086 -after migration or checkpoint. 
  3.1087 -
  3.1088 -
  3.1089 -
  3.1090 -
  3.1091 -
  3.1092 -
  3.1093 +%% chapter hypercalls moved to hypercalls.tex
  3.1094 +\include{src/interface/hypercalls}
  3.1095  
  3.1096  
  3.1097  %% 
  3.1098 @@ -1173,279 +112,9 @@ after migration or checkpoint.
  3.1099  %% new scheduler... not clear how many of them there are...
  3.1100  %%
  3.1101  
  3.1102 -\begin{comment}
  3.1103 -
  3.1104 -\chapter{Scheduling API}  
  3.1105 -
  3.1106 -The scheduling API is used by both the schedulers described above and should
  3.1107 -also be used by any new schedulers.  It provides a generic interface and also
  3.1108 -implements much of the ``boilerplate'' code.
  3.1109 -
  3.1110 -Schedulers conforming to this API are described by the following
  3.1111 -structure:
  3.1112 -
  3.1113 -\begin{verbatim}
  3.1114 -struct scheduler
  3.1115 -{
  3.1116 -    char *name;             /* full name for this scheduler      */
  3.1117 -    char *opt_name;         /* option name for this scheduler    */
  3.1118 -    unsigned int sched_id;  /* ID for this scheduler             */
  3.1119 -
  3.1120 -    int          (*init_scheduler) ();
  3.1121 -    int          (*alloc_task)     (struct task_struct *);
  3.1122 -    void         (*add_task)       (struct task_struct *);
  3.1123 -    void         (*free_task)      (struct task_struct *);
  3.1124 -    void         (*rem_task)       (struct task_struct *);
  3.1125 -    void         (*wake_up)        (struct task_struct *);
  3.1126 -    void         (*do_block)       (struct task_struct *);
  3.1127 -    task_slice_t (*do_schedule)    (s_time_t);
  3.1128 -    int          (*control)        (struct sched_ctl_cmd *);
  3.1129 -    int          (*adjdom)         (struct task_struct *,
  3.1130 -                                    struct sched_adjdom_cmd *);
  3.1131 -    s32          (*reschedule)     (struct task_struct *);
  3.1132 -    void         (*dump_settings)  (void);
  3.1133 -    void         (*dump_cpu_state) (int);
  3.1134 -    void         (*dump_runq_el)   (struct task_struct *);
  3.1135 -};
  3.1136 -\end{verbatim}
  3.1137 -
  3.1138 -The only method that {\em must} be implemented is
  3.1139 -{\tt do\_schedule()}.  However, if there is not some implementation for the
  3.1140 -{\tt wake\_up()} method then waking tasks will not get put on the runqueue!
  3.1141 -
  3.1142 -The fields of the above structure are described in more detail below.
  3.1143 -
  3.1144 -\subsubsection{name}
  3.1145 -
  3.1146 -The name field should point to a descriptive ASCII string.
  3.1147 -
  3.1148 -\subsubsection{opt\_name}
  3.1149 -
  3.1150 -This field is the value of the {\tt sched=} boot-time option that will select
  3.1151 -this scheduler.
  3.1152 -
  3.1153 -\subsubsection{sched\_id}
  3.1154 -
  3.1155 -This is an integer that uniquely identifies this scheduler.  There should be a
  3.1156 -macro corrsponding to this scheduler ID in {\tt <xen/sched-if.h>}.
  3.1157 -
  3.1158 -\subsubsection{init\_scheduler}
  3.1159 -
  3.1160 -\paragraph*{Purpose}
  3.1161 -
  3.1162 -This is a function for performing any scheduler-specific initialisation.  For
  3.1163 -instance, it might allocate memory for per-CPU scheduler data and initialise it
  3.1164 -appropriately.
  3.1165 -
  3.1166 -\paragraph*{Call environment}
  3.1167 -
  3.1168 -This function is called after the initialisation performed by the generic
  3.1169 -layer.  The function is called exactly once, for the scheduler that has been
  3.1170 -selected.
  3.1171 -
  3.1172 -\paragraph*{Return values}
  3.1173 -
  3.1174 -This should return negative on failure --- this will cause an
  3.1175 -immediate panic and the system will fail to boot.
  3.1176 -
  3.1177 -\subsubsection{alloc\_task}
  3.1178 -
  3.1179 -\paragraph*{Purpose}
  3.1180 -Called when a {\tt task\_struct} is allocated by the generic scheduler
  3.1181 -layer.  A particular scheduler implementation may use this method to
  3.1182 -allocate per-task data for this task.  It may use the {\tt
  3.1183 -sched\_priv} pointer in the {\tt task\_struct} to point to this data.
  3.1184 -
  3.1185 -\paragraph*{Call environment}
  3.1186 -The generic layer guarantees that the {\tt sched\_priv} field will
  3.1187 -remain intact from the time this method is called until the task is
  3.1188 -deallocated (so long as the scheduler implementation does not change
  3.1189 -it explicitly!).
  3.1190 -
  3.1191 -\paragraph*{Return values}
  3.1192 -Negative on failure.
  3.1193 -
  3.1194 -\subsubsection{add\_task}
  3.1195 -
  3.1196 -\paragraph*{Purpose}
  3.1197 -
  3.1198 -Called when a task is initially added by the generic layer.
  3.1199 -
  3.1200 -\paragraph*{Call environment}
  3.1201 -
  3.1202 -The fields in the {\tt task\_struct} are now filled out and available for use.
  3.1203 -Schedulers should implement appropriate initialisation of any per-task private
  3.1204 -information in this method.
  3.1205 -
  3.1206 -\subsubsection{free\_task}
  3.1207 -
  3.1208 -\paragraph*{Purpose}
  3.1209 -
  3.1210 -Schedulers should free the space used by any associated private data
  3.1211 -structures.
  3.1212 -
  3.1213 -\paragraph*{Call environment}
  3.1214 -
  3.1215 -This is called when a {\tt task\_struct} is about to be deallocated.
  3.1216 -The generic layer will have done generic task removal operations and
  3.1217 -(if implemented) called the scheduler's {\tt rem\_task} method before
  3.1218 -this method is called.
  3.1219 -
  3.1220 -\subsubsection{rem\_task}
  3.1221 -
  3.1222 -\paragraph*{Purpose}
  3.1223 -
  3.1224 -This is called when a task is being removed from scheduling (but is
  3.1225 -not yet being freed).
  3.1226 -
  3.1227 -\subsubsection{wake\_up}
  3.1228 -
  3.1229 -\paragraph*{Purpose}
  3.1230 -
  3.1231 -Called when a task is woken up, this method should put the task on the runqueue
  3.1232 -(or do the scheduler-specific equivalent action).
  3.1233 -
  3.1234 -\paragraph*{Call environment}
  3.1235 -
  3.1236 -The task is already set to state RUNNING.
  3.1237 -
  3.1238 -\subsubsection{do\_block}
  3.1239 -
  3.1240 -\paragraph*{Purpose}
  3.1241 -
  3.1242 -This function is called when a task is blocked.  This function should
  3.1243 -not remove the task from the runqueue.
  3.1244 -
  3.1245 -\paragraph*{Call environment}
  3.1246 -
  3.1247 -The EVENTS\_MASTER\_ENABLE\_BIT is already set and the task state changed to
  3.1248 -TASK\_INTERRUPTIBLE on entry to this method.  A call to the {\tt
  3.1249 -  do\_schedule} method will be made after this method returns, in
  3.1250 -order to select the next task to run.
  3.1251 -
  3.1252 -\subsubsection{do\_schedule}
  3.1253 -
  3.1254 -This method must be implemented.
  3.1255 -
  3.1256 -\paragraph*{Purpose}
  3.1257 -
  3.1258 -The method is called each time a new task must be chosen for scheduling on the
  3.1259 -current CPU.  The current time as passed as the single argument (the current
  3.1260 -task can be found using the {\tt current} macro).
  3.1261 -
  3.1262 -This method should select the next task to run on this CPU and set it's minimum
  3.1263 -time to run as well as returning the data described below.
  3.1264 -
  3.1265 -This method should also take the appropriate action if the previous
  3.1266 -task has blocked, e.g. removing it from the runqueue.
  3.1267 -
  3.1268 -\paragraph*{Call environment}
  3.1269 -
  3.1270 -The other fields in the {\tt task\_struct} are updated by the generic layer,
  3.1271 -which also performs all Xen-specific tasks and performs the actual task switch
  3.1272 -(unless the previous task has been chosen again).
  3.1273 -
  3.1274 -This method is called with the {\tt schedule\_lock} held for the current CPU
  3.1275 -and local interrupts disabled.
  3.1276 -
  3.1277 -\paragraph*{Return values}
  3.1278 -
  3.1279 -Must return a {\tt struct task\_slice} describing what task to run and how long
  3.1280 -for (at maximum).
  3.1281 -
  3.1282 -\subsubsection{control}
  3.1283 -
  3.1284 -\paragraph*{Purpose}
  3.1285 -
  3.1286 -This method is called for global scheduler control operations.  It takes a
  3.1287 -pointer to a {\tt struct sched\_ctl\_cmd}, which it should either
  3.1288 -source data from or populate with data, depending on the value of the
  3.1289 -{\tt direction} field.
  3.1290 -
  3.1291 -\paragraph*{Call environment}
  3.1292 -
  3.1293 -The generic layer guarantees that when this method is called, the
  3.1294 -caller selected the correct scheduler ID, hence the scheduler's
  3.1295 -implementation does not need to sanity-check these parts of the call.
  3.1296 -
  3.1297 -\paragraph*{Return values}
  3.1298 -
  3.1299 -This function should return the value to be passed back to user space, hence it
  3.1300 -should either be 0 or an appropriate errno value.
  3.1301 -
  3.1302 -\subsubsection{sched\_adjdom}
  3.1303 -
  3.1304 -\paragraph*{Purpose}
  3.1305 -
  3.1306 -This method is called to adjust the scheduling parameters of a particular
  3.1307 -domain, or to query their current values.  The function should check
  3.1308 -the {\tt direction} field of the {\tt sched\_adjdom\_cmd} it receives in
  3.1309 -order to determine which of these operations is being performed.
  3.1310 -
  3.1311 -\paragraph*{Call environment}
  3.1312 -
  3.1313 -The generic layer guarantees that the caller has specified the correct
  3.1314 -control interface version and scheduler ID and that the supplied {\tt
  3.1315 -task\_struct} will not be deallocated during the call (hence it is not
  3.1316 -necessary to {\tt get\_task\_struct}).
  3.1317 -
  3.1318 -\paragraph*{Return values}
  3.1319 -
  3.1320 -This function should return the value to be passed back to user space, hence it
  3.1321 -should either be 0 or an appropriate errno value.
  3.1322 -
  3.1323 -\subsubsection{reschedule}
  3.1324 -
  3.1325 -\paragraph*{Purpose}
  3.1326 -
  3.1327 -This method is called to determine if a reschedule is required as a result of a
  3.1328 -particular task.
  3.1329 -
  3.1330 -\paragraph*{Call environment}
  3.1331 -The generic layer will cause a reschedule if the current domain is the idle
  3.1332 -task or it has exceeded its minimum time slice before a reschedule.  The
  3.1333 -generic layer guarantees that the task passed is not currently running but is
  3.1334 -on the runqueue.
  3.1335 -
  3.1336 -\paragraph*{Return values}
  3.1337 -
  3.1338 -Should return a mask of CPUs to cause a reschedule on.
  3.1339 -
  3.1340 -\subsubsection{dump\_settings}
  3.1341 -
  3.1342 -\paragraph*{Purpose}
  3.1343 -
  3.1344 -If implemented, this should dump any private global settings for this
  3.1345 -scheduler to the console.
  3.1346 -
  3.1347 -\paragraph*{Call environment}
  3.1348 -
  3.1349 -This function is called with interrupts enabled.
  3.1350 -
  3.1351 -\subsubsection{dump\_cpu\_state}
  3.1352 -
  3.1353 -\paragraph*{Purpose}
  3.1354 -
  3.1355 -This method should dump any private settings for the specified CPU.
  3.1356 -
  3.1357 -\paragraph*{Call environment}
  3.1358 -
  3.1359 -This function is called with interrupts disabled and the {\tt schedule\_lock}
  3.1360 -for the specified CPU held.
  3.1361 -
  3.1362 -\subsubsection{dump\_runq\_el}
  3.1363 -
  3.1364 -\paragraph*{Purpose}
  3.1365 -
  3.1366 -This method should dump any private settings for the specified task.
  3.1367 -
  3.1368 -\paragraph*{Call environment}
  3.1369 -
  3.1370 -This function is called with interrupts disabled and the {\tt schedule\_lock}
  3.1371 -for the task's CPU held.
  3.1372 -
  3.1373 -\end{comment} 
  3.1374 -
  3.1375 +%% \include{src/interface/scheduling}
  3.1376 +%% scheduling information moved to scheduling.tex
  3.1377 +%% still commented out
  3.1378  
  3.1379  
  3.1380  
  3.1381 @@ -1457,74 +126,9 @@ for the task's CPU held.
  3.1382  %% (and/or kip's stuff?) and write about that instead? 
  3.1383  %%
  3.1384  
  3.1385 -\begin{comment} 
  3.1386 -
  3.1387 -\chapter{Debugging}
  3.1388 -
  3.1389 -Xen provides tools for debugging both Xen and guest OSes.  Currently, the
  3.1390 -Pervasive Debugger provides a GDB stub, which provides facilities for symbolic
  3.1391 -debugging of Xen itself and of OS kernels running on top of Xen.  The Trace
  3.1392 -Buffer provides a lightweight means to log data about Xen's internal state and
  3.1393 -behaviour at runtime, for later analysis.
  3.1394 -
  3.1395 -\section{Pervasive Debugger}
  3.1396 -
  3.1397 -Information on using the pervasive debugger is available in pdb.txt.
  3.1398 -
  3.1399 -
  3.1400 -\section{Trace Buffer}
  3.1401 -
  3.1402 -The trace buffer provides a means to observe Xen's operation from domain 0.
  3.1403 -Trace events, inserted at key points in Xen's code, record data that can be
  3.1404 -read by the {\tt xentrace} tool.  Recording these events has a low overhead
  3.1405 -and hence the trace buffer may be useful for debugging timing-sensitive
  3.1406 -behaviours.
  3.1407 -
  3.1408 -\subsection{Internal API}
  3.1409 -
  3.1410 -To use the trace buffer functionality from within Xen, you must {\tt \#include
  3.1411 -<xen/trace.h>}, which contains definitions related to the trace buffer.  Trace
  3.1412 -events are inserted into the buffer using the {\tt TRACE\_xD} ({\tt x} = 0, 1,
  3.1413 -2, 3, 4 or 5) macros.  These all take an event number, plus {\tt x} additional
  3.1414 -(32-bit) data as their arguments.  For trace buffer-enabled builds of Xen these
  3.1415 -will insert the event ID and data into the trace buffer, along with the current
  3.1416 -value of the CPU cycle-counter.  For builds without the trace buffer enabled,
  3.1417 -the macros expand to no-ops and thus can be left in place without incurring
  3.1418 -overheads.
  3.1419 -
  3.1420 -\subsection{Trace-enabled builds}
  3.1421 -
  3.1422 -By default, the trace buffer is enabled only in debug builds (i.e. {\tt NDEBUG}
  3.1423 -is not defined).  It can be enabled separately by defining {\tt TRACE\_BUFFER},
  3.1424 -either in {\tt <xen/config.h>} or on the gcc command line.
  3.1425 -
  3.1426 -The size (in pages) of the per-CPU trace buffers can be specified using the
  3.1427 -{\tt tbuf\_size=n } boot parameter to Xen.  If the size is set to 0, the trace
  3.1428 -buffers will be disabled.
  3.1429 -
  3.1430 -\subsection{Dumping trace data}
  3.1431 -
  3.1432 -When running a trace buffer build of Xen, trace data are written continuously
  3.1433 -into the buffer data areas, with newer data overwriting older data.  This data
  3.1434 -can be captured using the {\tt xentrace} program in domain 0.
  3.1435 -
  3.1436 -The {\tt xentrace} tool uses {\tt /dev/mem} in domain 0 to map the trace
  3.1437 -buffers into its address space.  It then periodically polls all the buffers for
  3.1438 -new data, dumping out any new records from each buffer in turn.  As a result,
  3.1439 -for machines with multiple (logical) CPUs, the trace buffer output will not be
  3.1440 -in overall chronological order.
  3.1441 -
  3.1442 -The output from {\tt xentrace} can be post-processed using {\tt
  3.1443 -xentrace\_cpusplit} (used to split trace data out into per-cpu log files) and
  3.1444 -{\tt xentrace\_format} (used to pretty-print trace data).  For the predefined
  3.1445 -trace points, there is an example format file in {\tt tools/xentrace/formats }.
  3.1446 -
  3.1447 -For more information, see the manual pages for {\tt xentrace}, {\tt
  3.1448 -xentrace\_format} and {\tt xentrace\_cpusplit}.
  3.1449 -
  3.1450 -\end{comment} 
  3.1451 -
  3.1452 -
  3.1453 +%% \include{src/interface/debugging}
  3.1454 +%% debugging information moved to debugging.tex
  3.1455 +%% still commented out
  3.1456  
  3.1457  
  3.1458  \end{document}
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/docs/src/interface/architecture.tex	Tue Sep 20 09:43:46 2005 +0000
     4.3 @@ -0,0 +1,140 @@
     4.4 +\chapter{Virtual Architecture}
     4.5 +
     4.6 +On a Xen-based system, the hypervisor itself runs in {\it ring 0}.  It
     4.7 +has full access to the physical memory available in the system and is
     4.8 +responsible for allocating portions of it to the domains.  Guest
     4.9 +operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
    4.10 +they see fit. Segmentation is used to prevent the guest OS from
    4.11 +accessing the portion of the address space that is reserved for Xen.
    4.12 +We expect most guest operating systems will use ring 1 for their own
    4.13 +operation and place applications in ring 3.
    4.14 +
    4.15 +In this chapter we consider the basic virtual architecture provided by
    4.16 +Xen: the basic CPU state, exception and interrupt handling, and time.
    4.17 +Other aspects such as memory and device access are discussed in later
    4.18 +chapters.
    4.19 +
    4.20 +
    4.21 +\section{CPU state}
    4.22 +
    4.23 +All privileged state must be handled by Xen.  The guest OS has no
    4.24 +direct access to CR3 and is not permitted to update privileged bits in
    4.25 +EFLAGS. Guest OSes use \emph{hypercalls} to invoke operations in Xen;
    4.26 +these are analogous to system calls but occur from ring 1 to ring 0.
    4.27 +
    4.28 +A list of all hypercalls is given in Appendix~\ref{a:hypercalls}.
    4.29 +
    4.30 +
    4.31 +\section{Exceptions}
    4.32 +
    4.33 +A virtual IDT is provided --- a domain can submit a table of trap
    4.34 +handlers to Xen via the {\tt set\_trap\_table()} hypercall.  Most trap
    4.35 +handlers are identical to native x86 handlers, although the page-fault
    4.36 +handler is somewhat different.
    4.37 +
    4.38 +
    4.39 +\section{Interrupts and events}
    4.40 +
    4.41 +Interrupts are virtualized by mapping them to \emph{events}, which are
    4.42 +delivered asynchronously to the target domain using a callback
    4.43 +supplied via the {\tt set\_callbacks()} hypercall.  A guest OS can map
    4.44 +these events onto its standard interrupt dispatch mechanisms.  Xen is
    4.45 +responsible for determining the target domain that will handle each
    4.46 +physical interrupt source. For more details on the binding of event
    4.47 +sources to events, see Chapter~\ref{c:devices}.
    4.48 +
    4.49 +
    4.50 +\section{Time}
    4.51 +
    4.52 +Guest operating systems need to be aware of the passage of both real
    4.53 +(or wallclock) time and their own `virtual time' (the time for which
    4.54 +they have been executing). Furthermore, Xen has a notion of time which
    4.55 +is used for scheduling. The following notions of time are provided:
    4.56 +
    4.57 +\begin{description}
    4.58 +\item[Cycle counter time.]
    4.59 +
    4.60 +  This provides a fine-grained time reference.  The cycle counter time
    4.61 +  is used to accurately extrapolate the other time references.  On SMP
    4.62 +  machines it is currently assumed that the cycle counter time is
    4.63 +  synchronized between CPUs.  The current x86-based implementation
    4.64 +  achieves this within inter-CPU communication latencies.
    4.65 +
    4.66 +\item[System time.]
    4.67 +
    4.68 +  This is a 64-bit counter which holds the number of nanoseconds that
    4.69 +  have elapsed since system boot.
    4.70 +
    4.71 +\item[Wall clock time.]
    4.72 +
    4.73 +  This is the time of day in a Unix-style {\tt struct timeval}
    4.74 +  (seconds and microseconds since 1 January 1970, adjusted by leap
    4.75 +  seconds).  An NTP client hosted by {\it domain 0} can keep this
    4.76 +  value accurate.
    4.77 +
    4.78 +\item[Domain virtual time.]
    4.79 +
    4.80 +  This progresses at the same pace as system time, but only while a
    4.81 +  domain is executing --- it stops while a domain is de-scheduled.
    4.82 +  Therefore the share of the CPU that a domain receives is indicated
    4.83 +  by the rate at which its virtual time increases.
    4.84 +
    4.85 +\end{description}
    4.86 +
    4.87 +
    4.88 +Xen exports timestamps for system time and wall-clock time to guest
    4.89 +operating systems through a shared page of memory.  Xen also provides
    4.90 +the cycle counter time at the instant the timestamps were calculated,
    4.91 +and the CPU frequency in Hertz.  This allows the guest to extrapolate
    4.92 +system and wall-clock times accurately based on the current cycle
    4.93 +counter time.
    4.94 +
    4.95 +Since all time stamps need to be updated and read \emph{atomically}
    4.96 +two version numbers are also stored in the shared info page. The first
    4.97 +is incremented prior to an update, while the second is only
    4.98 +incremented afterwards. Thus a guest can be sure that it read a
    4.99 +consistent state by checking the two version numbers are equal.
   4.100 +
   4.101 +Xen includes a periodic ticker which sends a timer event to the
   4.102 +currently executing domain every 10ms.  The Xen scheduler also sends a
   4.103 +timer event whenever a domain is scheduled; this allows the guest OS
   4.104 +to adjust for the time that has passed while it has been inactive.  In
   4.105 +addition, Xen allows each domain to request that they receive a timer
   4.106 +event sent at a specified system time by using the {\tt
   4.107 +  set\_timer\_op()} hypercall.  Guest OSes may use this timer to
   4.108 +implement timeout values when they block.
   4.109 +
   4.110 +
   4.111 +
   4.112 +%% % akw: demoting this to a section -- not sure if there is any point
   4.113 +%% % though, maybe just remove it.
   4.114 +
   4.115 +\section{Xen CPU Scheduling}
   4.116 +
   4.117 +Xen offers a uniform API for CPU schedulers.  It is possible to choose
   4.118 +from a number of schedulers at boot and it should be easy to add more.
   4.119 +The BVT, Atropos and Round Robin schedulers are part of the normal Xen
   4.120 +distribution.  BVT provides proportional fair shares of the CPU to the
   4.121 +running domains.  Atropos can be used to reserve absolute shares of
   4.122 +the CPU for each domain.  Round-robin is provided as an example of
   4.123 +Xen's internal scheduler API.
   4.124 +
   4.125 +\paragraph*{Note: SMP host support}
   4.126 +Xen has always supported SMP host systems.  Domains are statically
   4.127 +assigned to CPUs, either at creation time or when manually pinning to
   4.128 +a particular CPU.  The current schedulers then run locally on each CPU
   4.129 +to decide which of the assigned domains should be run there. The
   4.130 +user-level control software can be used to perform coarse-grain
   4.131 +load-balancing between CPUs.
   4.132 +
   4.133 +
   4.134 +%% More information on the characteristics and use of these schedulers
   4.135 +%% is available in {\tt Sched-HOWTO.txt}.
   4.136 +
   4.137 +
   4.138 +\section{Privileged operations}
   4.139 +
   4.140 +Xen exports an extended interface to privileged domains (viz.\ {\it
   4.141 +  Domain 0}). This allows such domains to build and boot other domains
   4.142 +on the server, and provides control interfaces for managing
   4.143 +scheduling, memory, networking, and block devices.
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/docs/src/interface/debugging.tex	Tue Sep 20 09:43:46 2005 +0000
     5.3 @@ -0,0 +1,62 @@
     5.4 +\chapter{Debugging}
     5.5 +
     5.6 +Xen provides tools for debugging both Xen and guest OSes.  Currently, the
     5.7 +Pervasive Debugger provides a GDB stub, which provides facilities for symbolic
     5.8 +debugging of Xen itself and of OS kernels running on top of Xen.  The Trace
     5.9 +Buffer provides a lightweight means to log data about Xen's internal state and
    5.10 +behaviour at runtime, for later analysis.
    5.11 +
    5.12 +\section{Pervasive Debugger}
    5.13 +
    5.14 +Information on using the pervasive debugger is available in pdb.txt.
    5.15 +
    5.16 +
    5.17 +\section{Trace Buffer}
    5.18 +
    5.19 +The trace buffer provides a means to observe Xen's operation from domain 0.
    5.20 +Trace events, inserted at key points in Xen's code, record data that can be
    5.21 +read by the {\tt xentrace} tool.  Recording these events has a low overhead
    5.22 +and hence the trace buffer may be useful for debugging timing-sensitive
    5.23 +behaviours.
    5.24 +
    5.25 +\subsection{Internal API}
    5.26 +
    5.27 +To use the trace buffer functionality from within Xen, you must {\tt \#include
    5.28 +<xen/trace.h>}, which contains definitions related to the trace buffer.  Trace
    5.29 +events are inserted into the buffer using the {\tt TRACE\_xD} ({\tt x} = 0, 1,
    5.30 +2, 3, 4 or 5) macros.  These all take an event number, plus {\tt x} additional
    5.31 +(32-bit) data as their arguments.  For trace buffer-enabled builds of Xen these
    5.32 +will insert the event ID and data into the trace buffer, along with the current
    5.33 +value of the CPU cycle-counter.  For builds without the trace buffer enabled,
    5.34 +the macros expand to no-ops and thus can be left in place without incurring
    5.35 +overheads.
    5.36 +
    5.37 +\subsection{Trace-enabled builds}
    5.38 +
    5.39 +By default, the trace buffer is enabled only in debug builds (i.e. {\tt NDEBUG}
    5.40 +is not defined).  It can be enabled separately by defining {\tt TRACE\_BUFFER},
    5.41 +either in {\tt <xen/config.h>} or on the gcc command line.
    5.42 +
    5.43 +The size (in pages) of the per-CPU trace buffers can be specified using the
    5.44 +{\tt tbuf\_size=n } boot parameter to Xen.  If the size is set to 0, the trace
    5.45 +buffers will be disabled.
    5.46 +
    5.47 +\subsection{Dumping trace data}
    5.48 +
    5.49 +When running a trace buffer build of Xen, trace data are written continuously
    5.50 +into the buffer data areas, with newer data overwriting older data.  This data
    5.51 +can be captured using the {\tt xentrace} program in domain 0.
    5.52 +
    5.53 +The {\tt xentrace} tool uses {\tt /dev/mem} in domain 0 to map the trace
    5.54 +buffers into its address space.  It then periodically polls all the buffers for
    5.55 +new data, dumping out any new records from each buffer in turn.  As a result,
    5.56 +for machines with multiple (logical) CPUs, the trace buffer output will not be
    5.57 +in overall chronological order.
    5.58 +
    5.59 +The output from {\tt xentrace} can be post-processed using {\tt
    5.60 +xentrace\_cpusplit} (used to split trace data out into per-cpu log files) and
    5.61 +{\tt xentrace\_format} (used to pretty-print trace data).  For the predefined
    5.62 +trace points, there is an example format file in {\tt tools/xentrace/formats }.
    5.63 +
    5.64 +For more information, see the manual pages for {\tt xentrace}, {\tt
    5.65 +xentrace\_format} and {\tt xentrace\_cpusplit}.
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/docs/src/interface/devices.tex	Tue Sep 20 09:43:46 2005 +0000
     6.3 @@ -0,0 +1,178 @@
     6.4 +\chapter{Devices}
     6.5 +\label{c:devices}
     6.6 +
     6.7 +Devices such as network and disk are exported to guests using a split
     6.8 +device driver.  The device driver domain, which accesses the physical
     6.9 +device directly also runs a \emph{backend} driver, serving requests to
    6.10 +that device from guests.  Each guest will use a simple \emph{frontend}
    6.11 +driver, to access the backend.  Communication between these domains is
    6.12 +composed of two parts: First, data is placed onto a shared memory page
    6.13 +between the domains.  Second, an event channel between the two domains
    6.14 +is used to pass notification that data is outstanding.  This
    6.15 +separation of notification from data transfer allows message batching,
    6.16 +and results in very efficient device access.
    6.17 +
    6.18 +Event channels are used extensively in device virtualization; each
    6.19 +domain has a number of end-points or \emph{ports} each of which may be
    6.20 +bound to one of the following \emph{event sources}:
    6.21 +\begin{itemize}
    6.22 +  \item a physical interrupt from a real device, 
    6.23 +  \item a virtual interrupt (callback) from Xen, or 
    6.24 +  \item a signal from another domain 
    6.25 +\end{itemize}
    6.26 +
    6.27 +Events are lightweight and do not carry much information beyond the
    6.28 +source of the notification. Hence when performing bulk data transfer,
    6.29 +events are typically used as synchronization primitives over a shared
    6.30 +memory transport. Event channels are managed via the {\tt
    6.31 +  event\_channel\_op()} hypercall; for more details see
    6.32 +Section~\ref{s:idc}.
    6.33 +
    6.34 +This chapter focuses on some individual device interfaces available to
    6.35 +Xen guests.
    6.36 +
    6.37 +
    6.38 +\section{Network I/O}
    6.39 +
    6.40 +Virtual network device services are provided by shared memory
    6.41 +communication with a backend domain.  From the point of view of other
    6.42 +domains, the backend may be viewed as a virtual ethernet switch
    6.43 +element with each domain having one or more virtual network interfaces
    6.44 +connected to it.
    6.45 +
    6.46 +\subsection{Backend Packet Handling}
    6.47 +
    6.48 +The backend driver is responsible for a variety of actions relating to
    6.49 +the transmission and reception of packets from the physical device.
    6.50 +With regard to transmission, the backend performs these key actions:
    6.51 +
    6.52 +\begin{itemize}
    6.53 +\item {\bf Validation:} To ensure that domains do not attempt to
    6.54 +  generate invalid (e.g. spoofed) traffic, the backend driver may
    6.55 +  validate headers ensuring that source MAC and IP addresses match the
    6.56 +  interface that they have been sent from.
    6.57 +
    6.58 +  Validation functions can be configured using standard firewall rules
    6.59 +  ({\small{\tt iptables}} in the case of Linux).
    6.60 +  
    6.61 +\item {\bf Scheduling:} Since a number of domains can share a single
    6.62 +  physical network interface, the backend must mediate access when
    6.63 +  several domains each have packets queued for transmission.  This
    6.64 +  general scheduling function subsumes basic shaping or rate-limiting
    6.65 +  schemes.
    6.66 +  
    6.67 +\item {\bf Logging and Accounting:} The backend domain can be
    6.68 +  configured with classifier rules that control how packets are
    6.69 +  accounted or logged.  For example, log messages might be generated
    6.70 +  whenever a domain attempts to send a TCP packet containing a SYN.
    6.71 +\end{itemize}
    6.72 +
    6.73 +On receipt of incoming packets, the backend acts as a simple
    6.74 +demultiplexer: Packets are passed to the appropriate virtual interface
    6.75 +after any necessary logging and accounting have been carried out.
    6.76 +
    6.77 +\subsection{Data Transfer}
    6.78 +
    6.79 +Each virtual interface uses two ``descriptor rings'', one for
    6.80 +transmit, the other for receive.  Each descriptor identifies a block
    6.81 +of contiguous physical memory allocated to the domain.
    6.82 +
    6.83 +The transmit ring carries packets to transmit from the guest to the
    6.84 +backend domain.  The return path of the transmit ring carries messages
    6.85 +indicating that the contents have been physically transmitted and the
    6.86 +backend no longer requires the associated pages of memory.
    6.87 +
    6.88 +To receive packets, the guest places descriptors of unused pages on
    6.89 +the receive ring.  The backend will return received packets by
    6.90 +exchanging these pages in the domain's memory with new pages
    6.91 +containing the received data, and passing back descriptors regarding
    6.92 +the new packets on the ring.  This zero-copy approach allows the
    6.93 +backend to maintain a pool of free pages to receive packets into, and
    6.94 +then deliver them to appropriate domains after examining their
    6.95 +headers.
    6.96 +
    6.97 +% Real physical addresses are used throughout, with the domain
    6.98 +% performing translation from pseudo-physical addresses if that is
    6.99 +% necessary.
   6.100 +
   6.101 +If a domain does not keep its receive ring stocked with empty buffers
   6.102 +then packets destined to it may be dropped.  This provides some
   6.103 +defence against receive livelock problems because an overload domain
   6.104 +will cease to receive further data.  Similarly, on the transmit path,
   6.105 +it provides the application with feedback on the rate at which packets
   6.106 +are able to leave the system.
   6.107 +
   6.108 +Flow control on rings is achieved by including a pair of producer
   6.109 +indexes on the shared ring page.  Each side will maintain a private
   6.110 +consumer index indicating the next outstanding message.  In this
   6.111 +manner, the domains cooperate to divide the ring into two message
   6.112 +lists, one in each direction.  Notification is decoupled from the
   6.113 +immediate placement of new messages on the ring; the event channel
   6.114 +will be used to generate notification when {\em either} a certain
   6.115 +number of outstanding messages are queued, {\em or} a specified number
   6.116 +of nanoseconds have elapsed since the oldest message was placed on the
   6.117 +ring.
   6.118 +
   6.119 +%% Not sure if my version is any better -- here is what was here
   6.120 +%% before: Synchronization between the backend domain and the guest is
   6.121 +%% achieved using counters held in shared memory that is accessible to
   6.122 +%% both.  Each ring has associated producer and consumer indices
   6.123 +%% indicating the area in the ring that holds descriptors that contain
   6.124 +%% data.  After receiving {\it n} packets or {\t nanoseconds} after
   6.125 +%% receiving the first packet, the hypervisor sends an event to the
   6.126 +%% domain.
   6.127 +
   6.128 +
   6.129 +\section{Block I/O}
   6.130 +
   6.131 +All guest OS disk access goes through the virtual block device VBD
   6.132 +interface.  This interface allows domains access to portions of block
   6.133 +storage devices visible to the the block backend device.  The VBD
   6.134 +interface is a split driver, similar to the network interface
   6.135 +described above.  A single shared memory ring is used between the
   6.136 +frontend and backend drivers, across which read and write messages are
   6.137 +sent.
   6.138 +
   6.139 +Any block device accessible to the backend domain, including
   6.140 +network-based block (iSCSI, *NBD, etc), loopback and LVM/MD devices,
   6.141 +can be exported as a VBD.  Each VBD is mapped to a device node in the
   6.142 +guest, specified in the guest's startup configuration.
   6.143 +
   6.144 +Old (Xen 1.2) virtual disks are not supported under Xen 2.0, since
   6.145 +similar functionality can be achieved using the more complete LVM
   6.146 +system, which is already in widespread use.
   6.147 +
   6.148 +\subsection{Data Transfer}
   6.149 +
   6.150 +The single ring between the guest and the block backend supports three
   6.151 +messages:
   6.152 +
   6.153 +\begin{description}
   6.154 +\item [{\small {\tt PROBE}}:] Return a list of the VBDs available to
   6.155 +  this guest from the backend.  The request includes a descriptor of a
   6.156 +  free page into which the reply will be written by the backend.
   6.157 +
   6.158 +\item [{\small {\tt READ}}:] Read data from the specified block
   6.159 +  device.  The front end identifies the device and location to read
   6.160 +  from and attaches pages for the data to be copied to (typically via
   6.161 +  DMA from the device).  The backend acknowledges completed read
   6.162 +  requests as they finish.
   6.163 +
   6.164 +\item [{\small {\tt WRITE}}:] Write data to the specified block
   6.165 +  device.  This functions essentially as {\small {\tt READ}}, except
   6.166 +  that the data moves to the device instead of from it.
   6.167 +\end{description}
   6.168 +
   6.169 +%% um... some old text: In overview, the same style of descriptor-ring
   6.170 +%% that is used for network packets is used here.  Each domain has one
   6.171 +%% ring that carries operation requests to the hypervisor and carries
   6.172 +%% the results back again.
   6.173 +
   6.174 +%% Rather than copying data, the backend simply maps the domain's
   6.175 +%% buffers in order to enable direct DMA to them.  The act of mapping
   6.176 +%% the buffers also increases the reference counts of the underlying
   6.177 +%% pages, so that the unprivileged domain cannot try to return them to
   6.178 +%% the hypervisor, install them as page tables, or any other unsafe
   6.179 +%% behaviour.
   6.180 +%%
   6.181 +%% % block API here
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/docs/src/interface/further_info.tex	Tue Sep 20 09:43:46 2005 +0000
     7.3 @@ -0,0 +1,49 @@
     7.4 +\chapter{Further Information}
     7.5 +
     7.6 +If you have questions that are not answered by this manual, the
     7.7 +sources of information listed below may be of interest to you.  Note
     7.8 +that bug reports, suggestions and contributions related to the
     7.9 +software (or the documentation) should be sent to the Xen developers'
    7.10 +mailing list (address below).
    7.11 +
    7.12 +
    7.13 +\section{Other documentation}
    7.14 +
    7.15 +If you are mainly interested in using (rather than developing for)
    7.16 +Xen, the \emph{Xen Users' Manual} is distributed in the {\tt docs/}
    7.17 +directory of the Xen source distribution.
    7.18 +
    7.19 +% Various HOWTOs are also available in {\tt docs/HOWTOS}.
    7.20 +
    7.21 +
    7.22 +\section{Online references}
    7.23 +
    7.24 +The official Xen web site is found at:
    7.25 +\begin{quote}
    7.26 +{\tt http://www.cl.cam.ac.uk/Research/SRG/netos/xen/}
    7.27 +\end{quote}
    7.28 +
    7.29 +This contains links to the latest versions of all on-line
    7.30 +documentation.
    7.31 +
    7.32 +
    7.33 +\section{Mailing lists}
    7.34 +
    7.35 +There are currently four official Xen mailing lists:
    7.36 +
    7.37 +\begin{description}
    7.38 +\item[xen-devel@lists.xensource.com] Used for development
    7.39 +  discussions and bug reports.  Subscribe at: \\
    7.40 +  {\small {\tt http://lists.xensource.com/xen-devel}}
    7.41 +\item[xen-users@lists.xensource.com] Used for installation and usage
    7.42 +  discussions and requests for help.  Subscribe at: \\
    7.43 +  {\small {\tt http://lists.xensource.com/xen-users}}
    7.44 +\item[xen-announce@lists.xensource.com] Used for announcements only.
    7.45 +  Subscribe at: \\
    7.46 +  {\small {\tt http://lists.xensource.com/xen-announce}}
    7.47 +\item[xen-changelog@lists.xensource.com] Changelog feed
    7.48 +  from the unstable and 2.0 trees - developer oriented.  Subscribe at: \\
    7.49 +  {\small {\tt http://lists.xensource.com/xen-changelog}}
    7.50 +\end{description}
    7.51 +
    7.52 +Of these, xen-devel is the most active.
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/docs/src/interface/hypercalls.tex	Tue Sep 20 09:43:46 2005 +0000
     8.3 @@ -0,0 +1,524 @@
     8.4 +
     8.5 +\newcommand{\hypercall}[1]{\vspace{2mm}{\sf #1}}
     8.6 +
     8.7 +\chapter{Xen Hypercalls}
     8.8 +\label{a:hypercalls}
     8.9 +
    8.10 +Hypercalls represent the procedural interface to Xen; this appendix 
    8.11 +categorizes and describes the current set of hypercalls. 
    8.12 +
    8.13 +\section{Invoking Hypercalls} 
    8.14 +
    8.15 +Hypercalls are invoked in a manner analogous to system calls in a
    8.16 +conventional operating system; a software interrupt is issued which
    8.17 +vectors to an entry point within Xen. On x86\_32 machines the
    8.18 +instruction required is {\tt int \$82}; the (real) IDT is setup so
    8.19 +that this may only be issued from within ring 1. The particular 
    8.20 +hypercall to be invoked is contained in {\tt EAX} --- a list 
    8.21 +mapping these values to symbolic hypercall names can be found 
    8.22 +in {\tt xen/include/public/xen.h}. 
    8.23 +
    8.24 +On some occasions a set of hypercalls will be required to carry
    8.25 +out a higher-level function; a good example is when a guest 
    8.26 +operating wishes to context switch to a new process which 
    8.27 +requires updating various privileged CPU state. As an optimization
    8.28 +for these cases, there is a generic mechanism to issue a set of 
    8.29 +hypercalls as a batch: 
    8.30 +
    8.31 +\begin{quote}
    8.32 +\hypercall{multicall(void *call\_list, int nr\_calls)}
    8.33 +
    8.34 +Execute a series of hypervisor calls; {\tt nr\_calls} is the length of
    8.35 +the array of {\tt multicall\_entry\_t} structures pointed to be {\tt
    8.36 +call\_list}. Each entry contains the hypercall operation code followed
    8.37 +by up to 7 word-sized arguments.
    8.38 +\end{quote}
    8.39 +
    8.40 +Note that multicalls are provided purely as an optimization; there is
    8.41 +no requirement to use them when first porting a guest operating
    8.42 +system.
    8.43 +
    8.44 +
    8.45 +\section{Virtual CPU Setup} 
    8.46 +
    8.47 +At start of day, a guest operating system needs to setup the virtual
    8.48 +CPU it is executing on. This includes installing vectors for the
    8.49 +virtual IDT so that the guest OS can handle interrupts, page faults,
    8.50 +etc. However the very first thing a guest OS must setup is a pair 
    8.51 +of hypervisor callbacks: these are the entry points which Xen will
    8.52 +use when it wishes to notify the guest OS of an occurrence. 
    8.53 +
    8.54 +\begin{quote}
    8.55 +\hypercall{set\_callbacks(unsigned long event\_selector, unsigned long
    8.56 +  event\_address, unsigned long failsafe\_selector, unsigned long
    8.57 +  failsafe\_address) }
    8.58 +
    8.59 +Register the normal (``event'') and failsafe callbacks for 
    8.60 +event processing. In each case the code segment selector and 
    8.61 +address within that segment are provided. The selectors must
    8.62 +have RPL 1; in XenLinux we simply use the kernel's CS for both 
    8.63 +{\tt event\_selector} and {\tt failsafe\_selector}.
    8.64 +
    8.65 +The value {\tt event\_address} specifies the address of the guest OSes
    8.66 +event handling and dispatch routine; the {\tt failsafe\_address}
    8.67 +specifies a separate entry point which is used only if a fault occurs
    8.68 +when Xen attempts to use the normal callback. 
    8.69 +\end{quote} 
    8.70 +
    8.71 +
    8.72 +After installing the hypervisor callbacks, the guest OS can 
    8.73 +install a `virtual IDT' by using the following hypercall: 
    8.74 +
    8.75 +\begin{quote} 
    8.76 +\hypercall{set\_trap\_table(trap\_info\_t *table)} 
    8.77 +
    8.78 +Install one or more entries into the per-domain 
    8.79 +trap handler table (essentially a software version of the IDT). 
    8.80 +Each entry in the array pointed to by {\tt table} includes the 
    8.81 +exception vector number with the corresponding segment selector 
    8.82 +and entry point. Most guest OSes can use the same handlers on 
    8.83 +Xen as when running on the real hardware; an exception is the 
    8.84 +page fault handler (exception vector 14) where a modified 
    8.85 +stack-frame layout is used. 
    8.86 +
    8.87 +
    8.88 +\end{quote} 
    8.89 +
    8.90 +
    8.91 +
    8.92 +\section{Scheduling and Timer}
    8.93 +
    8.94 +Domains are preemptively scheduled by Xen according to the 
    8.95 +parameters installed by domain 0 (see Section~\ref{s:dom0ops}). 
    8.96 +In addition, however, a domain may choose to explicitly 
    8.97 +control certain behavior with the following hypercall: 
    8.98 +
    8.99 +\begin{quote} 
   8.100 +\hypercall{sched\_op(unsigned long op)} 
   8.101 +
   8.102 +Request scheduling operation from hypervisor. The options are: {\it
   8.103 +yield}, {\it block}, and {\it shutdown}.  {\it yield} keeps the
   8.104 +calling domain runnable but may cause a reschedule if other domains
   8.105 +are runnable.  {\it block} removes the calling domain from the run
   8.106 +queue and cause is to sleeps until an event is delivered to it.  {\it
   8.107 +shutdown} is used to end the domain's execution; the caller can
   8.108 +additionally specify whether the domain should reboot, halt or
   8.109 +suspend.
   8.110 +\end{quote} 
   8.111 +
   8.112 +To aid the implementation of a process scheduler within a guest OS,
   8.113 +Xen provides a virtual programmable timer:
   8.114 +
   8.115 +\begin{quote}
   8.116 +\hypercall{set\_timer\_op(uint64\_t timeout)} 
   8.117 +
   8.118 +Request a timer event to be sent at the specified system time (time 
   8.119 +in nanoseconds since system boot). The hypercall actually passes the 
   8.120 +64-bit timeout value as a pair of 32-bit values. 
   8.121 +
   8.122 +\end{quote} 
   8.123 +
   8.124 +Note that calling {\tt set\_timer\_op()} prior to {\tt sched\_op} 
   8.125 +allows block-with-timeout semantics. 
   8.126 +
   8.127 +
   8.128 +\section{Page Table Management} 
   8.129 +
   8.130 +Since guest operating systems have read-only access to their page 
   8.131 +tables, Xen must be involved when making any changes. The following
   8.132 +multi-purpose hypercall can be used to modify page-table entries, 
   8.133 +update the machine-to-physical mapping table, flush the TLB, install 
   8.134 +a new page-table base pointer, and more.
   8.135 +
   8.136 +\begin{quote} 
   8.137 +\hypercall{mmu\_update(mmu\_update\_t *req, int count, int *success\_count)} 
   8.138 +
   8.139 +Update the page table for the domain; a set of {\tt count} updates are
   8.140 +submitted for processing in a batch, with {\tt success\_count} being 
   8.141 +updated to report the number of successful updates.  
   8.142 +
   8.143 +Each element of {\tt req[]} contains a pointer (address) and value; 
   8.144 +the least significant 2-bits of the pointer are used to distinguish 
   8.145 +the type of update requested as follows:
   8.146 +\begin{description} 
   8.147 +
   8.148 +\item[\it MMU\_NORMAL\_PT\_UPDATE:] update a page directory entry or
   8.149 +page table entry to the associated value; Xen will check that the
   8.150 +update is safe, as described in Chapter~\ref{c:memory}.
   8.151 +
   8.152 +\item[\it MMU\_MACHPHYS\_UPDATE:] update an entry in the
   8.153 +  machine-to-physical table. The calling domain must own the machine
   8.154 +  page in question (or be privileged).
   8.155 +
   8.156 +\item[\it MMU\_EXTENDED\_COMMAND:] perform additional MMU operations.
   8.157 +The set of additional MMU operations is considerable, and includes
   8.158 +updating {\tt cr3} (or just re-installing it for a TLB flush),
   8.159 +flushing the cache, installing a new LDT, or pinning \& unpinning
   8.160 +page-table pages (to ensure their reference count doesn't drop to zero
   8.161 +which would require a revalidation of all entries).
   8.162 +
   8.163 +Further extended commands are used to deal with granting and 
   8.164 +acquiring page ownership; see Section~\ref{s:idc}. 
   8.165 +
   8.166 +
   8.167 +\end{description}
   8.168 +
   8.169 +More details on the precise format of all commands can be 
   8.170 +found in {\tt xen/include/public/xen.h}. 
   8.171 +
   8.172 +
   8.173 +\end{quote}
   8.174 +
   8.175 +Explicitly updating batches of page table entries is extremely
   8.176 +efficient, but can require a number of alterations to the guest
   8.177 +OS. Using the writable page table mode (Chapter~\ref{c:memory}) is
   8.178 +recommended for new OS ports.
   8.179 +
   8.180 +Regardless of which page table update mode is being used, however,
   8.181 +there are some occasions (notably handling a demand page fault) where
   8.182 +a guest OS will wish to modify exactly one PTE rather than a
   8.183 +batch. This is catered for by the following:
   8.184 +
   8.185 +\begin{quote} 
   8.186 +\hypercall{update\_va\_mapping(unsigned long page\_nr, unsigned long
   8.187 +val, \\ unsigned long flags)}
   8.188 +
   8.189 +Update the currently installed PTE for the page {\tt page\_nr} to 
   8.190 +{\tt val}. As with {\tt mmu\_update()}, Xen checks the modification 
   8.191 +is safe before applying it. The {\tt flags} determine which kind
   8.192 +of TLB flush, if any, should follow the update. 
   8.193 +
   8.194 +\end{quote} 
   8.195 +
   8.196 +Finally, sufficiently privileged domains may occasionally wish to manipulate 
   8.197 +the pages of others: 
   8.198 +\begin{quote}
   8.199 +
   8.200 +\hypercall{update\_va\_mapping\_otherdomain(unsigned long page\_nr,
   8.201 +unsigned long val, unsigned long flags, uint16\_t domid)}
   8.202 +
   8.203 +Identical to {\tt update\_va\_mapping()} save that the pages being
   8.204 +mapped must belong to the domain {\tt domid}. 
   8.205 +
   8.206 +\end{quote}
   8.207 +
   8.208 +This privileged operation is currently used by backend virtual device
   8.209 +drivers to safely map pages containing I/O data. 
   8.210 +
   8.211 +
   8.212 +
   8.213 +\section{Segmentation Support}
   8.214 +
   8.215 +Xen allows guest OSes to install a custom GDT if they require it; 
   8.216 +this is context switched transparently whenever a domain is 
   8.217 +[de]scheduled.  The following hypercall is effectively a 
   8.218 +`safe' version of {\tt lgdt}: 
   8.219 +
   8.220 +\begin{quote}
   8.221 +\hypercall{set\_gdt(unsigned long *frame\_list, int entries)} 
   8.222 +
   8.223 +Install a global descriptor table for a domain; {\tt frame\_list} is
   8.224 +an array of up to 16 machine page frames within which the GDT resides,
   8.225 +with {\tt entries} being the actual number of descriptor-entry
   8.226 +slots. All page frames must be mapped read-only within the guest's
   8.227 +address space, and the table must be large enough to contain Xen's
   8.228 +reserved entries (see {\tt xen/include/public/arch-x86\_32.h}).
   8.229 +
   8.230 +\end{quote}
   8.231 +
   8.232 +Many guest OSes will also wish to install LDTs; this is achieved by
   8.233 +using {\tt mmu\_update()} with an extended command, passing the
   8.234 +linear address of the LDT base along with the number of entries. No
   8.235 +special safety checks are required; Xen needs to perform this task
   8.236 +simply since {\tt lldt} requires CPL 0.
   8.237 +
   8.238 +
   8.239 +Xen also allows guest operating systems to update just an 
   8.240 +individual segment descriptor in the GDT or LDT:  
   8.241 +
   8.242 +\begin{quote}
   8.243 +\hypercall{update\_descriptor(unsigned long ma, unsigned long word1,
   8.244 +unsigned long word2)}
   8.245 +
   8.246 +Update the GDT/LDT entry at machine address {\tt ma}; the new
   8.247 +8-byte descriptor is stored in {\tt word1} and {\tt word2}.
   8.248 +Xen performs a number of checks to ensure the descriptor is 
   8.249 +valid. 
   8.250 +
   8.251 +\end{quote}
   8.252 +
   8.253 +Guest OSes can use the above in place of context switching entire 
   8.254 +LDTs (or the GDT) when the number of changing descriptors is small. 
   8.255 +
   8.256 +\section{Context Switching} 
   8.257 +
   8.258 +When a guest OS wishes to context switch between two processes, 
   8.259 +it can use the page table and segmentation hypercalls described
   8.260 +above to perform the the bulk of the privileged work. In addition, 
   8.261 +however, it will need to invoke Xen to switch the kernel (ring 1) 
   8.262 +stack pointer: 
   8.263 +
   8.264 +\begin{quote} 
   8.265 +\hypercall{stack\_switch(unsigned long ss, unsigned long esp)} 
   8.266 +
   8.267 +Request kernel stack switch from hypervisor; {\tt ss} is the new 
   8.268 +stack segment, which {\tt esp} is the new stack pointer. 
   8.269 +
   8.270 +\end{quote} 
   8.271 +
   8.272 +A final useful hypercall for context switching allows ``lazy'' 
   8.273 +save and restore of floating point state: 
   8.274 +
   8.275 +\begin{quote}
   8.276 +\hypercall{fpu\_taskswitch(void)} 
   8.277 +
   8.278 +This call instructs Xen to set the {\tt TS} bit in the {\tt cr0}
   8.279 +control register; this means that the next attempt to use floating
   8.280 +point will cause a trap which the guest OS can trap. Typically it will
   8.281 +then save/restore the FP state, and clear the {\tt TS} bit. 
   8.282 +\end{quote} 
   8.283 +
   8.284 +This is provided as an optimization only; guest OSes can also choose
   8.285 +to save and restore FP state on all context switches for simplicity. 
   8.286 +
   8.287 +
   8.288 +\section{Physical Memory Management}
   8.289 +
   8.290 +As mentioned previously, each domain has a maximum and current 
   8.291 +memory allocation. The maximum allocation, set at domain creation 
   8.292 +time, cannot be modified. However a domain can choose to reduce 
   8.293 +and subsequently grow its current allocation by using the
   8.294 +following call: 
   8.295 +
   8.296 +\begin{quote} 
   8.297 +\hypercall{dom\_mem\_op(unsigned int op, unsigned long *extent\_list,
   8.298 +  unsigned long nr\_extents, unsigned int extent\_order)}
   8.299 +
   8.300 +Increase or decrease current memory allocation (as determined by 
   8.301 +the value of {\tt op}). Each invocation provides a list of 
   8.302 +extents each of which is $2^s$ pages in size, 
   8.303 +where $s$ is the value of {\tt extent\_order}. 
   8.304 +
   8.305 +\end{quote} 
   8.306 +
   8.307 +In addition to simply reducing or increasing the current memory
   8.308 +allocation via a `balloon driver', this call is also useful for 
   8.309 +obtaining contiguous regions of machine memory when required (e.g. 
   8.310 +for certain PCI devices, or if using superpages).  
   8.311 +
   8.312 +
   8.313 +\section{Inter-Domain Communication}
   8.314 +\label{s:idc} 
   8.315 +
   8.316 +Xen provides a simple asynchronous notification mechanism via
   8.317 +\emph{event channels}. Each domain has a set of end-points (or
   8.318 +\emph{ports}) which may be bound to an event source (e.g. a physical
   8.319 +IRQ, a virtual IRQ, or an port in another domain). When a pair of
   8.320 +end-points in two different domains are bound together, then a `send'
   8.321 +operation on one will cause an event to be received by the destination
   8.322 +domain.
   8.323 +
   8.324 +The control and use of event channels involves the following hypercall: 
   8.325 +
   8.326 +\begin{quote}
   8.327 +\hypercall{event\_channel\_op(evtchn\_op\_t *op)} 
   8.328 +
   8.329 +Inter-domain event-channel management; {\tt op} is a discriminated 
   8.330 +union which allows the following 7 operations: 
   8.331 +
   8.332 +\begin{description} 
   8.333 +
   8.334 +\item[\it alloc\_unbound:] allocate a free (unbound) local
   8.335 +  port and prepare for connection from a specified domain. 
   8.336 +\item[\it bind\_virq:] bind a local port to a virtual 
   8.337 +IRQ; any particular VIRQ can be bound to at most one port per domain. 
   8.338 +\item[\it bind\_pirq:] bind a local port to a physical IRQ;
   8.339 +once more, a given pIRQ can be bound to at most one port per
   8.340 +domain. Furthermore the calling domain must be sufficiently
   8.341 +privileged.
   8.342 +\item[\it bind\_interdomain:] construct an interdomain event 
   8.343 +channel; in general, the target domain must have previously allocated 
   8.344 +an unbound port for this channel, although this can be bypassed by 
   8.345 +privileged domains during domain setup. 
   8.346 +\item[\it close:] close an interdomain event channel. 
   8.347 +\item[\it send:] send an event to the remote end of a 
   8.348 +interdomain event channel. 
   8.349 +\item[\it status:] determine the current status of a local port. 
   8.350 +\end{description} 
   8.351 +
   8.352 +For more details see
   8.353 +{\tt xen/include/public/event\_channel.h}. 
   8.354 +
   8.355 +\end{quote} 
   8.356 +
   8.357 +Event channels are the fundamental communication primitive between 
   8.358 +Xen domains and seamlessly support SMP. However they provide little
   8.359 +bandwidth for communication {\sl per se}, and hence are typically 
   8.360 +married with a piece of shared memory to produce effective and 
   8.361 +high-performance inter-domain communication. 
   8.362 +
   8.363 +Safe sharing of memory pages between guest OSes is carried out by
   8.364 +granting access on a per page basis to individual domains. This is
   8.365 +achieved by using the {\tt grant\_table\_op()} hypercall.
   8.366 +
   8.367 +\begin{quote}
   8.368 +\hypercall{grant\_table\_op(unsigned int cmd, void *uop, unsigned int count)}
   8.369 +
   8.370 +Grant or remove access to a particular page to a particular domain. 
   8.371 +
   8.372 +\end{quote} 
   8.373 +
   8.374 +This is not currently widely in use by guest operating systems, but 
   8.375 +we intend to integrate support more fully in the near future. 
   8.376 +
   8.377 +\section{PCI Configuration} 
   8.378 +
   8.379 +Domains with physical device access (i.e.\ driver domains) receive
   8.380 +limited access to certain PCI devices (bus address space and
   8.381 +interrupts). However many guest operating systems attempt to 
   8.382 +determine the PCI configuration by directly access the PCI BIOS, 
   8.383 +which cannot be allowed for safety. 
   8.384 +
   8.385 +Instead, Xen provides the following hypercall: 
   8.386 +
   8.387 +\begin{quote}
   8.388 +\hypercall{physdev\_op(void *physdev\_op)}
   8.389 +
   8.390 +Perform a PCI configuration option; depending on the value 
   8.391 +of {\tt physdev\_op} this can be a PCI config read, a PCI config 
   8.392 +write, or a small number of other queries. 
   8.393 +
   8.394 +\end{quote} 
   8.395 +
   8.396 +
   8.397 +For examples of using {\tt physdev\_op()}, see the 
   8.398 +Xen-specific PCI code in the linux sparse tree. 
   8.399 +
   8.400 +\section{Administrative Operations}
   8.401 +\label{s:dom0ops}
   8.402 +
   8.403 +A large number of control operations are available to a sufficiently
   8.404 +privileged domain (typically domain 0). These allow the creation and
   8.405 +management of new domains, for example. A complete list is given 
   8.406 +below: for more details on any or all of these, please see 
   8.407 +{\tt xen/include/public/dom0\_ops.h} 
   8.408 +
   8.409 +
   8.410 +\begin{quote}
   8.411 +\hypercall{dom0\_op(dom0\_op\_t *op)} 
   8.412 +
   8.413 +Administrative domain operations for domain management. The options are:
   8.414 +
   8.415 +\begin{description} 
   8.416 +\item [\it DOM0\_CREATEDOMAIN:] create a new domain
   8.417 +
   8.418 +\item [\it DOM0\_PAUSEDOMAIN:] remove a domain from the scheduler run 
   8.419 +queue. 
   8.420 +
   8.421 +\item [\it DOM0\_UNPAUSEDOMAIN:] mark a paused domain as schedulable
   8.422 +  once again. 
   8.423 +
   8.424 +\item [\it DOM0\_DESTROYDOMAIN:] deallocate all resources associated
   8.425 +with a domain
   8.426 +
   8.427 +\item [\it DOM0\_GETMEMLIST:] get list of pages used by the domain
   8.428 +
   8.429 +\item [\it DOM0\_SCHEDCTL:]
   8.430 +
   8.431 +\item [\it DOM0\_ADJUSTDOM:] adjust scheduling priorities for domain
   8.432 +
   8.433 +\item [\it DOM0\_BUILDDOMAIN:] do final guest OS setup for domain
   8.434 +
   8.435 +\item [\it DOM0\_GETDOMAINFO:] get statistics about the domain
   8.436 +
   8.437 +\item [\it DOM0\_GETPAGEFRAMEINFO:] 
   8.438 +
   8.439 +\item [\it DOM0\_GETPAGEFRAMEINFO2:]
   8.440 +
   8.441 +\item [\it DOM0\_IOPL:] set I/O privilege level
   8.442 +
   8.443 +\item [\it DOM0\_MSR:] read or write model specific registers
   8.444 +
   8.445 +\item [\it DOM0\_DEBUG:] interactively invoke the debugger
   8.446 +
   8.447 +\item [\it DOM0\_SETTIME:] set system time
   8.448 +
   8.449 +\item [\it DOM0\_READCONSOLE:] read console content from hypervisor buffer ring
   8.450 +
   8.451 +\item [\it DOM0\_PINCPUDOMAIN:] pin domain to a particular CPU
   8.452 +
   8.453 +\item [\it DOM0\_GETTBUFS:] get information about the size and location of
   8.454 +                      the trace buffers (only on trace-buffer enabled builds)
   8.455 +
   8.456 +\item [\it DOM0\_PHYSINFO:] get information about the host machine
   8.457 +
   8.458 +\item [\it DOM0\_PCIDEV\_ACCESS:] modify PCI device access permissions
   8.459 +
   8.460 +\item [\it DOM0\_SCHED\_ID:] get the ID of the current Xen scheduler
   8.461 +
   8.462 +\item [\it DOM0\_SHADOW\_CONTROL:] switch between shadow page-table modes
   8.463 +
   8.464 +\item [\it DOM0\_SETDOMAININITIALMEM:] set initial memory allocation of a domain
   8.465 +
   8.466 +\item [\it DOM0\_SETDOMAINMAXMEM:] set maximum memory allocation of a domain
   8.467 +
   8.468 +\item [\it DOM0\_SETDOMAINVMASSIST:] set domain VM assist options
   8.469 +\end{description} 
   8.470 +\end{quote} 
   8.471 +
   8.472 +Most of the above are best understood by looking at the code 
   8.473 +implementing them (in {\tt xen/common/dom0\_ops.c}) and in 
   8.474 +the user-space tools that use them (mostly in {\tt tools/libxc}). 
   8.475 +
   8.476 +\section{Debugging Hypercalls} 
   8.477 +
   8.478 +A few additional hypercalls are mainly useful for debugging: 
   8.479 +
   8.480 +\begin{quote} 
   8.481 +\hypercall{console\_io(int cmd, int count, char *str)}
   8.482 +
   8.483 +Use Xen to interact with the console; operations are:
   8.484 +
   8.485 +{\it CONSOLEIO\_write}: Output count characters from buffer str.
   8.486 +
   8.487 +{\it CONSOLEIO\_read}: Input at most count characters into buffer str.
   8.488 +\end{quote} 
   8.489 +
   8.490 +A pair of hypercalls allows access to the underlying debug registers: 
   8.491 +\begin{quote}
   8.492 +\hypercall{set\_debugreg(int reg, unsigned long value)}
   8.493 +
   8.494 +Set debug register {\tt reg} to {\tt value} 
   8.495 +
   8.496 +\hypercall{get\_debugreg(int reg)}
   8.497 +
   8.498 +Return the contents of the debug register {\tt reg}
   8.499 +\end{quote}
   8.500 +
   8.501 +And finally: 
   8.502 +\begin{quote}
   8.503 +\hypercall{xen\_version(int cmd)}
   8.504 +
   8.505 +Request Xen version number.
   8.506 +\end{quote} 
   8.507 +
   8.508 +This is useful to ensure that user-space tools are in sync 
   8.509 +with the underlying hypervisor. 
   8.510 +
   8.511 +\section{Deprecated Hypercalls}
   8.512 +
   8.513 +Xen is under constant development and refinement; as such there 
   8.514 +are plans to improve the way in which various pieces of functionality 
   8.515 +are exposed to guest OSes. 
   8.516 +
   8.517 +\begin{quote} 
   8.518 +\hypercall{vm\_assist(unsigned int cmd, unsigned int type)}
   8.519 +
   8.520 +Toggle various memory management modes (in particular wrritable page
   8.521 +tables and superpage support). 
   8.522 +
   8.523 +\end{quote} 
   8.524 +
   8.525 +This is likely to be replaced with mode values in the shared 
   8.526 +information page since this is more resilient for resumption 
   8.527 +after migration or checkpoint. 
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/docs/src/interface/memory.tex	Tue Sep 20 09:43:46 2005 +0000
     9.3 @@ -0,0 +1,162 @@
     9.4 +\chapter{Memory}
     9.5 +\label{c:memory} 
     9.6 +
     9.7 +Xen is responsible for managing the allocation of physical memory to
     9.8 +domains, and for ensuring safe use of the paging and segmentation
     9.9 +hardware.
    9.10 +
    9.11 +
    9.12 +\section{Memory Allocation}
    9.13 +
    9.14 +Xen resides within a small fixed portion of physical memory; it also
    9.15 +reserves the top 64MB of every virtual address space. The remaining
    9.16 +physical memory is available for allocation to domains at a page
    9.17 +granularity.  Xen tracks the ownership and use of each page, which
    9.18 +allows it to enforce secure partitioning between domains.
    9.19 +
    9.20 +Each domain has a maximum and current physical memory allocation.  A
    9.21 +guest OS may run a `balloon driver' to dynamically adjust its current
    9.22 +memory allocation up to its limit.
    9.23 +
    9.24 +
    9.25 +%% XXX SMH: I use machine and physical in the next section (which is
    9.26 +%% kinda required for consistency with code); wonder if this section
    9.27 +%% should use same terms?
    9.28 +%%
    9.29 +%% Probably. 
    9.30 +%%
    9.31 +%% Merging this and below section at some point prob makes sense.
    9.32 +
    9.33 +\section{Pseudo-Physical Memory}
    9.34 +
    9.35 +Since physical memory is allocated and freed on a page granularity,
    9.36 +there is no guarantee that a domain will receive a contiguous stretch
    9.37 +of physical memory. However most operating systems do not have good
    9.38 +support for operating in a fragmented physical address space. To aid
    9.39 +porting such operating systems to run on top of Xen, we make a
    9.40 +distinction between \emph{machine memory} and \emph{pseudo-physical
    9.41 +  memory}.
    9.42 +
    9.43 +Put simply, machine memory refers to the entire amount of memory
    9.44 +installed in the machine, including that reserved by Xen, in use by
    9.45 +various domains, or currently unallocated. We consider machine memory
    9.46 +to comprise a set of 4K \emph{machine page frames} numbered
    9.47 +consecutively starting from 0. Machine frame numbers mean the same
    9.48 +within Xen or any domain.
    9.49 +
    9.50 +Pseudo-physical memory, on the other hand, is a per-domain
    9.51 +abstraction. It allows a guest operating system to consider its memory
    9.52 +allocation to consist of a contiguous range of physical page frames
    9.53 +starting at physical frame 0, despite the fact that the underlying
    9.54 +machine page frames may be sparsely allocated and in any order.
    9.55 +
    9.56 +To achieve this, Xen maintains a globally readable {\it
    9.57 +  machine-to-physical} table which records the mapping from machine
    9.58 +page frames to pseudo-physical ones. In addition, each domain is
    9.59 +supplied with a {\it physical-to-machine} table which performs the
    9.60 +inverse mapping. Clearly the machine-to-physical table has size
    9.61 +proportional to the amount of RAM installed in the machine, while each
    9.62 +physical-to-machine table has size proportional to the memory
    9.63 +allocation of the given domain.
    9.64 +
    9.65 +Architecture dependent code in guest operating systems can then use
    9.66 +the two tables to provide the abstraction of pseudo-physical memory.
    9.67 +In general, only certain specialized parts of the operating system
    9.68 +(such as page table management) needs to understand the difference
    9.69 +between machine and pseudo-physical addresses.
    9.70 +
    9.71 +
    9.72 +\section{Page Table Updates}
    9.73 +
    9.74 +In the default mode of operation, Xen enforces read-only access to
    9.75 +page tables and requires guest operating systems to explicitly request
    9.76 +any modifications.  Xen validates all such requests and only applies
    9.77 +updates that it deems safe.  This is necessary to prevent domains from
    9.78 +adding arbitrary mappings to their page tables.
    9.79 +
    9.80 +To aid validation, Xen associates a type and reference count with each
    9.81 +memory page. A page has one of the following mutually-exclusive types
    9.82 +at any point in time: page directory ({\sf PD}), page table ({\sf
    9.83 +  PT}), local descriptor table ({\sf LDT}), global descriptor table
    9.84 +({\sf GDT}), or writable ({\sf RW}). Note that a guest OS may always
    9.85 +create readable mappings of its own memory regardless of its current
    9.86 +type.
    9.87 +
    9.88 +%%% XXX: possibly explain more about ref count 'lifecyle' here?
    9.89 +This mechanism is used to maintain the invariants required for safety;
    9.90 +for example, a domain cannot have a writable mapping to any part of a
    9.91 +page table as this would require the page concerned to simultaneously
    9.92 +be of types {\sf PT} and {\sf RW}.
    9.93 +
    9.94 +
    9.95 +% \section{Writable Page Tables}
    9.96 +
    9.97 +Xen also provides an alternative mode of operation in which guests be
    9.98 +have the illusion that their page tables are directly writable.  Of
    9.99 +course this is not really the case, since Xen must still validate
   9.100 +modifications to ensure secure partitioning. To this end, Xen traps
   9.101 +any write attempt to a memory page of type {\sf PT} (i.e., that is
   9.102 +currently part of a page table).  If such an access occurs, Xen
   9.103 +temporarily allows write access to that page while at the same time
   9.104 +\emph{disconnecting} it from the page table that is currently in use.
   9.105 +This allows the guest to safely make updates to the page because the
   9.106 +newly-updated entries cannot be used by the MMU until Xen revalidates
   9.107 +and reconnects the page.  Reconnection occurs automatically in a
   9.108 +number of situations: for example, when the guest modifies a different
   9.109 +page-table page, when the domain is preempted, or whenever the guest
   9.110 +uses Xen's explicit page-table update interfaces.
   9.111 +
   9.112 +Finally, Xen also supports a form of \emph{shadow page tables} in
   9.113 +which the guest OS uses a independent copy of page tables which are
   9.114 +unknown to the hardware (i.e.\ which are never pointed to by {\tt
   9.115 +  cr3}). Instead Xen propagates changes made to the guest's tables to
   9.116 +the real ones, and vice versa. This is useful for logging page writes
   9.117 +(e.g.\ for live migration or checkpoint). A full version of the shadow
   9.118 +page tables also allows guest OS porting with less effort.
   9.119 +
   9.120 +
   9.121 +\section{Segment Descriptor Tables}
   9.122 +
   9.123 +On boot a guest is supplied with a default GDT, which does not reside
   9.124 +within its own memory allocation.  If the guest wishes to use other
   9.125 +than the default `flat' ring-1 and ring-3 segments that this GDT
   9.126 +provides, it must register a custom GDT and/or LDT with Xen, allocated
   9.127 +from its own memory. Note that a number of GDT entries are reserved by
   9.128 +Xen -- any custom GDT must also include sufficient space for these
   9.129 +entries.
   9.130 +
   9.131 +For example, the following hypercall is used to specify a new GDT:
   9.132 +
   9.133 +\begin{quote}
   9.134 +  int {\bf set\_gdt}(unsigned long *{\em frame\_list}, int {\em
   9.135 +    entries})
   9.136 +
   9.137 +  \emph{frame\_list}: An array of up to 16 machine page frames within
   9.138 +  which the GDT resides.  Any frame registered as a GDT frame may only
   9.139 +  be mapped read-only within the guest's address space (e.g., no
   9.140 +  writable mappings, no use as a page-table page, and so on).
   9.141 +
   9.142 +  \emph{entries}: The number of descriptor-entry slots in the GDT.
   9.143 +  Note that the table must be large enough to contain Xen's reserved
   9.144 +  entries; thus we must have `{\em entries $>$
   9.145 +    LAST\_RESERVED\_GDT\_ENTRY}\ '.  Note also that, after registering
   9.146 +  the GDT, slots \emph{FIRST\_} through
   9.147 +  \emph{LAST\_RESERVED\_GDT\_ENTRY} are no longer usable by the guest
   9.148 +  and may be overwritten by Xen.
   9.149 +\end{quote}
   9.150 +
   9.151 +The LDT is updated via the generic MMU update mechanism (i.e., via the
   9.152 +{\tt mmu\_update()} hypercall.
   9.153 +
   9.154 +\section{Start of Day}
   9.155 +
   9.156 +The start-of-day environment for guest operating systems is rather
   9.157 +different to that provided by the underlying hardware. In particular,
   9.158 +the processor is already executing in protected mode with paging
   9.159 +enabled.
   9.160 +
   9.161 +{\it Domain 0} is created and booted by Xen itself. For all subsequent
   9.162 +domains, the analogue of the boot-loader is the {\it domain builder},
   9.163 +user-space software running in {\it domain 0}. The domain builder is
   9.164 +responsible for building the initial page tables for a domain and
   9.165 +loading its kernel image at the appropriate virtual address.
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/docs/src/interface/scheduling.tex	Tue Sep 20 09:43:46 2005 +0000
    10.3 @@ -0,0 +1,268 @@
    10.4 +\chapter{Scheduling API}  
    10.5 +
    10.6 +The scheduling API is used by both the schedulers described above and should
    10.7 +also be used by any new schedulers.  It provides a generic interface and also
    10.8 +implements much of the ``boilerplate'' code.
    10.9 +
   10.10 +Schedulers conforming to this API are described by the following
   10.11 +structure:
   10.12 +
   10.13 +\begin{verbatim}
   10.14 +struct scheduler
   10.15 +{
   10.16 +    char *name;             /* full name for this scheduler      */
   10.17 +    char *opt_name;         /* option name for this scheduler    */
   10.18 +    unsigned int sched_id;  /* ID for this scheduler             */
   10.19 +
   10.20 +    int          (*init_scheduler) ();
   10.21 +    int          (*alloc_task)     (struct task_struct *);
   10.22 +    void         (*add_task)       (struct task_struct *);
   10.23 +    void         (*free_task)      (struct task_struct *);
   10.24 +    void         (*rem_task)       (struct task_struct *);
   10.25 +    void         (*wake_up)        (struct task_struct *);
   10.26 +    void         (*do_block)       (struct task_struct *);
   10.27 +    task_slice_t (*do_schedule)    (s_time_t);
   10.28 +    int          (*control)        (struct sched_ctl_cmd *);
   10.29 +    int          (*adjdom)         (struct task_struct *,
   10.30 +                                    struct sched_adjdom_cmd *);
   10.31 +    s32          (*reschedule)     (struct task_struct *);
   10.32 +    void         (*dump_settings)  (void);
   10.33 +    void         (*dump_cpu_state) (int);
   10.34 +    void         (*dump_runq_el)   (struct task_struct *);
   10.35 +};
   10.36 +\end{verbatim}
   10.37 +
   10.38 +The only method that {\em must} be implemented is
   10.39 +{\tt do\_schedule()}.  However, if there is not some implementation for the
   10.40 +{\tt wake\_up()} method then waking tasks will not get put on the runqueue!
   10.41 +
   10.42 +The fields of the above structure are described in more detail below.
   10.43 +
   10.44 +\subsubsection{name}
   10.45 +
   10.46 +The name field should point to a descriptive ASCII string.
   10.47 +
   10.48 +\subsubsection{opt\_name}
   10.49 +
   10.50 +This field is the value of the {\tt sched=} boot-time option that will select
   10.51 +this scheduler.
   10.52 +
   10.53 +\subsubsection{sched\_id}
   10.54 +
   10.55 +This is an integer that uniquely identifies this scheduler.  There should be a
   10.56 +macro corrsponding to this scheduler ID in {\tt <xen/sched-if.h>}.
   10.57 +
   10.58 +\subsubsection{init\_scheduler}
   10.59 +
   10.60 +\paragraph*{Purpose}
   10.61 +
   10.62 +This is a function for performing any scheduler-specific initialisation.  For
   10.63 +instance, it might allocate memory for per-CPU scheduler data and initialise it
   10.64 +appropriately.
   10.65 +
   10.66 +\paragraph*{Call environment}
   10.67 +
   10.68 +This function is called after the initialisation performed by the generic
   10.69 +layer.  The function is called exactly once, for the scheduler that has been
   10.70 +selected.
   10.71 +
   10.72 +\paragraph*{Return values}
   10.73 +
   10.74 +This should return negative on failure --- this will cause an
   10.75 +immediate panic and the system will fail to boot.
   10.76 +
   10.77 +\subsubsection{alloc\_task}
   10.78 +
   10.79 +\paragraph*{Purpose}
   10.80 +Called when a {\tt task\_struct} is allocated by the generic scheduler
   10.81 +layer.  A particular scheduler implementation may use this method to
   10.82 +allocate per-task data for this task.  It may use the {\tt
   10.83 +sched\_priv} pointer in the {\tt task\_struct} to point to this data.
   10.84 +
   10.85 +\paragraph*{Call environment}
   10.86 +The generic layer guarantees that the {\tt sched\_priv} field will
   10.87 +remain intact from the time this method is called until the task is
   10.88 +deallocated (so long as the scheduler implementation does not change
   10.89 +it explicitly!).
   10.90 +
   10.91 +\paragraph*{Return values}
   10.92 +Negative on failure.
   10.93 +
   10.94 +\subsubsection{add\_task}
   10.95 +
   10.96 +\paragraph*{Purpose}
   10.97 +
   10.98 +Called when a task is initially added by the generic layer.
   10.99 +
  10.100 +\paragraph*{Call environment}
  10.101 +
  10.102 +The fields in the {\tt task\_struct} are now filled out and available for use.
  10.103 +Schedulers should implement appropriate initialisation of any per-task private
  10.104 +information in this method.
  10.105 +
  10.106 +\subsubsection{free\_task}
  10.107 +
  10.108 +\paragraph*{Purpose}
  10.109 +
  10.110 +Schedulers should free the space used by any associated private data
  10.111 +structures.
  10.112 +
  10.113 +\paragraph*{Call environment}
  10.114 +
  10.115 +This is called when a {\tt task\_struct} is about to be deallocated.
  10.116 +The generic layer will have done generic task removal operations and
  10.117 +(if implemented) called the scheduler's {\tt rem\_task} method before
  10.118 +this method is called.
  10.119 +
  10.120 +\subsubsection{rem\_task}
  10.121 +
  10.122 +\paragraph*{Purpose}
  10.123 +
  10.124 +This is called when a task is being removed from scheduling (but is
  10.125 +not yet being freed).
  10.126 +
  10.127 +\subsubsection{wake\_up}
  10.128 +
  10.129 +\paragraph*{Purpose}
  10.130 +
  10.131 +Called when a task is woken up, this method should put the task on the runqueue
  10.132 +(or do the scheduler-specific equivalent action).
  10.133 +
  10.134 +\paragraph*{Call environment}
  10.135 +
  10.136 +The task is already set to state RUNNING.
  10.137 +
  10.138 +\subsubsection{do\_block}
  10.139 +
  10.140 +\paragraph*{Purpose}
  10.141 +
  10.142 +This function is called when a task is blocked.  This function should
  10.143 +not remove the task from the runqueue.
  10.144 +
  10.145 +\paragraph*{Call environment}
  10.146 +
  10.147 +The EVENTS\_MASTER\_ENABLE\_BIT is already set and the task state changed to
  10.148 +TASK\_INTERRUPTIBLE on entry to this method.  A call to the {\tt
  10.149 +  do\_schedule} method will be made after this method returns, in
  10.150 +order to select the next task to run.
  10.151 +
  10.152 +\subsubsection{do\_schedule}
  10.153 +
  10.154 +This method must be implemented.
  10.155 +
  10.156 +\paragraph*{Purpose}
  10.157 +
  10.158 +The method is called each time a new task must be chosen for scheduling on the
  10.159 +current CPU.  The current time as passed as the single argument (the current
  10.160 +task can be found using the {\tt current} macro).
  10.161 +
  10.162 +This method should select the next task to run on this CPU and set it's minimum
  10.163 +time to run as well as returning the data described below.
  10.164 +
  10.165 +This method should also take the appropriate action if the previous
  10.166 +task has blocked, e.g. removing it from the runqueue.
  10.167 +
  10.168 +\paragraph*{Call environment}
  10.169 +
  10.170 +The other fields in the {\tt task\_struct} are updated by the generic layer,
  10.171 +which also performs all Xen-specific tasks and performs the actual task switch
  10.172 +(unless the previous task has been chosen again).
  10.173 +
  10.174 +This method is called with the {\tt schedule\_lock} held for the current CPU
  10.175 +and local interrupts disabled.
  10.176 +
  10.177 +\paragraph*{Return values}
  10.178 +
  10.179 +Must return a {\tt struct task\_slice} describing what task to run and how long
  10.180 +for (at maximum).
  10.181 +
  10.182 +\subsubsection{control}
  10.183 +
  10.184 +\paragraph*{Purpose}
  10.185 +
  10.186 +This method is called for global scheduler control operations.  It takes a
  10.187 +pointer to a {\tt struct sched\_ctl\_cmd}, which it should either
  10.188 +source data from or populate with data, depending on the value of the
  10.189 +{\tt direction} field.
  10.190 +
  10.191 +\paragraph*{Call environment}
  10.192 +
  10.193 +The generic layer guarantees that when this method is called, the
  10.194 +caller selected the correct scheduler ID, hence the scheduler's
  10.195 +implementation does not need to sanity-check these parts of the call.
  10.196 +
  10.197 +\paragraph*{Return values}
  10.198 +
  10.199 +This function should return the value to be passed back to user space, hence it
  10.200 +should either be 0 or an appropriate errno value.
  10.201 +
  10.202 +\subsubsection{sched\_adjdom}
  10.203 +
  10.204 +\paragraph*{Purpose}
  10.205 +
  10.206 +This method is called to adjust the scheduling parameters of a particular
  10.207 +domain, or to query their current values.  The function should check
  10.208 +the {\tt direction} field of the {\tt sched\_adjdom\_cmd} it receives in
  10.209 +order to determine which of these operations is being performed.
  10.210 +
  10.211 +\paragraph*{Call environment}
  10.212 +
  10.213 +The generic layer guarantees that the caller has specified the correct
  10.214 +control interface version and scheduler ID and that the supplied {\tt
  10.215 +task\_struct} will not be deallocated during the call (hence it is not
  10.216 +necessary to {\tt get\_task\_struct}).
  10.217 +
  10.218 +\paragraph*{Return values}
  10.219 +
  10.220 +This function should return the value to be passed back to user space, hence it
  10.221 +should either be 0 or an appropriate errno value.
  10.222 +
  10.223 +\subsubsection{reschedule}
  10.224 +
  10.225 +\paragraph*{Purpose}
  10.226 +
  10.227 +This method is called to determine if a reschedule is required as a result of a
  10.228 +particular task.
  10.229 +
  10.230 +\paragraph*{Call environment}
  10.231 +The generic layer will cause a reschedule if the current domain is the idle
  10.232 +task or it has exceeded its minimum time slice before a reschedule.  The
  10.233 +generic layer guarantees that the task passed is not currently running but is
  10.234 +on the runqueue.
  10.235 +
  10.236 +\paragraph*{Return values}
  10.237 +
  10.238 +Should return a mask of CPUs to cause a reschedule on.
  10.239 +
  10.240 +\subsubsection{dump\_settings}
  10.241 +
  10.242 +\paragraph*{Purpose}
  10.243 +
  10.244 +If implemented, this should dump any private global settings for this
  10.245 +scheduler to the console.
  10.246 +
  10.247 +\paragraph*{Call environment}
  10.248 +
  10.249 +This function is called with interrupts enabled.
  10.250 +
  10.251 +\subsubsection{dump\_cpu\_state}
  10.252 +
  10.253 +\paragraph*{Purpose}
  10.254 +
  10.255 +This method should dump any private settings for the specified CPU.
  10.256 +
  10.257 +\paragraph*{Call environment}
  10.258 +
  10.259 +This function is called with interrupts disabled and the {\tt schedule\_lock}
  10.260 +for the specified CPU held.
  10.261 +
  10.262 +\subsubsection{dump\_runq\_el}
  10.263 +
  10.264 +\paragraph*{Purpose}
  10.265 +
  10.266 +This method should dump any private settings for the specified task.
  10.267 +
  10.268 +\paragraph*{Call environment}
  10.269 +
  10.270 +This function is called with interrupts disabled and the {\tt schedule\_lock}
  10.271 +for the task's CPU held.
    11.1 --- a/docs/src/user.tex	Tue Sep 20 09:43:29 2005 +0000
    11.2 +++ b/docs/src/user.tex	Tue Sep 20 09:43:46 2005 +0000
    11.3 @@ -59,1803 +59,36 @@ Contributions of material, suggestions a
    11.4  \renewcommand{\floatpagefraction}{.8}
    11.5  \setstretch{1.1}
    11.6  
    11.7 -\part{Introduction and Tutorial}
    11.8 -\chapter{Introduction}
    11.9 -
   11.10 -Xen is a {\em paravirtualising} virtual machine monitor (VMM), or
   11.11 -`hypervisor', for the x86 processor architecture.  Xen can securely
   11.12 -execute multiple virtual machines on a single physical system with
   11.13 -close-to-native performance.  The virtual machine technology
   11.14 -facilitates enterprise-grade functionality, including:
   11.15 -
   11.16 -\begin{itemize}
   11.17 -\item Virtual machines with performance close to native
   11.18 -  hardware.
   11.19 -\item Live migration of running virtual machines between physical hosts.
   11.20 -\item Excellent hardware support (supports most Linux device drivers).
   11.21 -\item Sandboxed, restartable device drivers.
   11.22 -\end{itemize}
   11.23 -
   11.24 -Paravirtualisation permits very high performance virtualisation,
   11.25 -even on architectures like x86 that are traditionally
   11.26 -very hard to virtualise.
   11.27 -The drawback of this approach is that it requires operating systems to
   11.28 -be {\em ported} to run on Xen.  Porting an OS to run on Xen is similar
   11.29 -to supporting a new hardware platform, however the process
   11.30 -is simplified because the paravirtual machine architecture is very
   11.31 -similar to the underlying native hardware. Even though operating system
   11.32 -kernels must explicitly support Xen, a key feature is that user space
   11.33 -applications and libraries {\em do not} require modification.
   11.34 -
   11.35 -Xen support is available for increasingly many operating systems:
   11.36 -right now, Linux 2.4, Linux 2.6 and NetBSD are available for Xen 2.0.
   11.37 -A FreeBSD port is undergoing testing and will be incorporated into the
   11.38 -release soon. Other OS ports, including Plan 9, are in progress.  We
   11.39 -hope that that arch-xen patches will be incorporated into the
   11.40 -mainstream releases of these operating systems in due course (as has
   11.41 -already happened for NetBSD).
   11.42 -
   11.43 -Possible usage scenarios for Xen include:
   11.44 -\begin{description}
   11.45 -\item [Kernel development.] Test and debug kernel modifications in a
   11.46 -      sandboxed virtual machine --- no need for a separate test
   11.47 -      machine.
   11.48 -\item [Multiple OS configurations.] Run multiple operating systems
   11.49 -      simultaneously, for instance for compatibility or QA purposes.
   11.50 -\item [Server consolidation.] Move multiple servers onto a single
   11.51 -      physical host with performance and fault isolation provided at
   11.52 -      virtual machine boundaries. 
   11.53 -\item [Cluster computing.] Management at VM granularity provides more
   11.54 -      flexibility than separately managing each physical host, but
   11.55 -      better control and isolation than single-system image solutions, 
   11.56 -      particularly by using live migration for load balancing. 
   11.57 -\item [Hardware support for custom OSes.] Allow development of new OSes
   11.58 -      while benefiting from the wide-ranging hardware support of
   11.59 -      existing OSes such as Linux.
   11.60 -\end{description}
   11.61 -
   11.62 -\section{Structure of a Xen-Based System}
   11.63 -
   11.64 -A Xen system has multiple layers, the lowest and most privileged of
   11.65 -which is Xen itself. 
   11.66 -Xen in turn may host multiple {\em guest} operating systems, each of
   11.67 -which is executed within a secure virtual machine (in Xen terminology,
   11.68 -a {\em domain}). Domains are scheduled by Xen to make effective use of
   11.69 -the available physical CPUs.  Each guest OS manages its own
   11.70 -applications, which includes responsibility for scheduling each
   11.71 -application within the time allotted to the VM by Xen.
   11.72 -
   11.73 -The first domain, {\em domain 0}, is created automatically when the
   11.74 -system boots and has special management privileges. Domain 0 builds
   11.75 -other domains and manages their virtual devices. It also performs
   11.76 -administrative tasks such as suspending, resuming and migrating other
   11.77 -virtual machines.
   11.78 -
   11.79 -Within domain 0, a process called \emph{xend} runs to manage the system.
   11.80 -\Xend is responsible for managing virtual machines and providing access
   11.81 -to their consoles.  Commands are issued to \xend over an HTTP
   11.82 -interface, either from a command-line tool or from a web browser.
   11.83 -
   11.84 -\section{Hardware Support}
   11.85 -
   11.86 -Xen currently runs only on the x86 architecture, requiring a `P6' or
   11.87 -newer processor (e.g. Pentium Pro, Celeron, Pentium II, Pentium III,
   11.88 -Pentium IV, Xeon, AMD Athlon, AMD Duron).  Multiprocessor machines are
   11.89 -supported, and we also have basic support for HyperThreading (SMT),
   11.90 -although this remains a topic for ongoing research. A port
   11.91 -specifically for x86/64 is in progress, although Xen already runs on
   11.92 -such systems in 32-bit legacy mode. In addition a port to the IA64
   11.93 -architecture is approaching completion. We hope to add other
   11.94 -architectures such as PPC and ARM in due course.
   11.95 -
   11.96 -
   11.97 -Xen can currently use up to 4GB of memory.  It is possible for x86
   11.98 -machines to address up to 64GB of physical memory but there are no
   11.99 -current plans to support these systems: The x86/64 port is the
  11.100 -planned route to supporting larger memory sizes.
  11.101 -
  11.102 -Xen offloads most of the hardware support issues to the guest OS
  11.103 -running in Domain~0.  Xen itself contains only the code required to
  11.104 -detect and start secondary processors, set up interrupt routing, and
  11.105 -perform PCI bus enumeration.  Device drivers run within a privileged
  11.106 -guest OS rather than within Xen itself. This approach provides
  11.107 -compatibility with the majority of device hardware supported by Linux.
  11.108 -The default XenLinux build contains support for relatively modern
  11.109 -server-class network and disk hardware, but you can add support for
  11.110 -other hardware by configuring your XenLinux kernel in the normal way.
  11.111 -
  11.112 -\section{History}
  11.113 -
  11.114 -Xen was originally developed by the Systems Research Group at the
  11.115 -University of Cambridge Computer Laboratory as part of the XenoServers
  11.116 -project, funded by the UK-EPSRC.
  11.117 -XenoServers aim to provide a `public infrastructure for
  11.118 -global distributed computing', and Xen plays a key part in that,
  11.119 -allowing us to efficiently partition a single machine to enable
  11.120 -multiple independent clients to run their operating systems and
  11.121 -applications in an environment providing protection, resource
  11.122 -isolation and accounting.  The project web page contains further
  11.123 -information along with pointers to papers and technical reports:
  11.124 -\path{http://www.cl.cam.ac.uk/xeno} 
  11.125 -
  11.126 -Xen has since grown into a fully-fledged project in its own right,
  11.127 -enabling us to investigate interesting research issues regarding the
  11.128 -best techniques for virtualising resources such as the CPU, memory,
  11.129 -disk and network.  The project has been bolstered by support from
  11.130 -Intel Research Cambridge, and HP Labs, who are now working closely
  11.131 -with us.
  11.132 -
  11.133 -Xen was first described in a paper presented at SOSP in
  11.134 -2003\footnote{\tt
  11.135 -http://www.cl.cam.ac.uk/netos/papers/2003-xensosp.pdf}, and the first
  11.136 -public release (1.0) was made that October.  Since then, Xen has
  11.137 -significantly matured and is now used in production scenarios on
  11.138 -many sites.
  11.139 -
  11.140 -Xen 2.0 features greatly enhanced hardware support, configuration
  11.141 -flexibility, usability and a larger complement of supported operating
  11.142 -systems. This latest release takes Xen a step closer to becoming the 
  11.143 -definitive open source solution for virtualisation.
  11.144 -
  11.145 -\chapter{Installation}
  11.146 -
  11.147 -The Xen distribution includes three main components: Xen itself, ports
  11.148 -of Linux 2.4 and 2.6 and NetBSD to run on Xen, and the user-space
  11.149 -tools required to manage a Xen-based system.  This chapter describes
  11.150 -how to install the Xen 2.0 distribution from source.  Alternatively,
  11.151 -there may be pre-built packages available as part of your operating
  11.152 -system distribution.
  11.153 -
  11.154 -\section{Prerequisites}
  11.155 -\label{sec:prerequisites}
  11.156 -
  11.157 -The following is a full list of prerequisites.  Items marked `$\dag$'
  11.158 -are required by the \xend control tools, and hence required if you
  11.159 -want to run more than one virtual machine; items marked `$*$' are only
  11.160 -required if you wish to build from source.
  11.161 -\begin{itemize}
  11.162 -\item A working Linux distribution using the GRUB bootloader and
  11.163 -running on a P6-class (or newer) CPU.
  11.164 -\item [$\dag$] The \path{iproute2} package. 
  11.165 -\item [$\dag$] The Linux bridge-utils\footnote{Available from 
  11.166 -{\tt http://bridge.sourceforge.net}} (e.g., \path{/sbin/brctl})
  11.167 -\item [$\dag$] An installation of Twisted v1.3 or
  11.168 -above\footnote{Available from {\tt
  11.169 -http://www.twistedmatrix.com}}. There may be a binary package
  11.170 -available for your distribution; alternatively it can be installed by
  11.171 -running `{\sl make install-twisted}' in the root of the Xen source
  11.172 -tree.
  11.173 -\item [$*$] Build tools (gcc v3.2.x or v3.3.x, binutils, GNU make).
  11.174 -\item [$*$] Development installation of libcurl (e.g., libcurl-devel) 
  11.175 -\item [$*$] Development installation of zlib (e.g., zlib-dev).
  11.176 -\item [$*$] Development installation of Python v2.2 or later (e.g., python-dev).
  11.177 -\item [$*$] \LaTeX and transfig are required to build the documentation.
  11.178 -\end{itemize}
  11.179 -
  11.180 -Once you have satisfied the relevant prerequisites, you can 
  11.181 -now install either a binary or source distribution of Xen. 
  11.182 -
  11.183 -\section{Installing from Binary Tarball} 
  11.184 -
  11.185 -Pre-built tarballs are available for download from the Xen 
  11.186 -download page
  11.187 -\begin{quote} 
  11.188 -{\tt http://xen.sf.net}
  11.189 -\end{quote} 
  11.190 -
  11.191 -Once you've downloaded the tarball, simply unpack and install: 
  11.192 -\begin{verbatim}
  11.193 -# tar zxvf xen-2.0-install.tgz
  11.194 -# cd xen-2.0-install
  11.195 -# sh ./install.sh 
  11.196 -\end{verbatim} 
  11.197 -
  11.198 -Once you've installed the binaries you need to configure
  11.199 -your system as described in Section~\ref{s:configure}. 
  11.200 -
  11.201 -\section{Installing from Source} 
  11.202 -
  11.203 -This section describes how to obtain, build, and install 
  11.204 -Xen from source. 
  11.205 -
  11.206 -\subsection{Obtaining the Source} 
  11.207 -
  11.208 -The Xen source tree is available as either a compressed source tar
  11.209 -ball or as a clone of our master BitKeeper repository.
  11.210 -
  11.211 -\begin{description} 
  11.212 -\item[Obtaining the Source Tarball]\mbox{} \\  
  11.213 -Stable versions (and daily snapshots) of the Xen source tree are
  11.214 -available as compressed tarballs from the Xen download page
  11.215 -\begin{quote} 
  11.216 -{\tt http://xen.sf.net}
  11.217 -\end{quote} 
  11.218 -
  11.219 -\item[Using BitKeeper]\mbox{} \\  
  11.220 -If you wish to install Xen from a clone of our latest BitKeeper
  11.221 -repository then you will need to install the BitKeeper tools.
  11.222 -Download instructions for BitKeeper can be obtained by filling out the
  11.223 -form at:
  11.224 -
  11.225 -\begin{quote} 
  11.226 -{\tt http://www.bitmover.com/cgi-bin/download.cgi}
  11.227 -\end{quote}
  11.228 -The public master BK repository for the 2.0 release lives at: 
  11.229 -\begin{quote}
  11.230 -{\tt bk://xen.bkbits.net/xen-2.0.bk}  
  11.231 -\end{quote} 
  11.232 -You can use BitKeeper to
  11.233 -download it and keep it updated with the latest features and fixes.
  11.234 -
  11.235 -Change to the directory in which you want to put the source code, then
  11.236 -run:
  11.237 -\begin{verbatim}
  11.238 -# bk clone bk://xen.bkbits.net/xen-2.0.bk
  11.239 -\end{verbatim}
  11.240 -
  11.241 -Under your current directory, a new directory named \path{xen-2.0.bk}
  11.242 -has been created, which contains all the source code for Xen, the OS
  11.243 -ports, and the control tools. You can update your repository with the
  11.244 -latest changes at any time by running:
  11.245 -\begin{verbatim}
  11.246 -# cd xen-2.0.bk # to change into the local repository
  11.247 -# bk pull       # to update the repository
  11.248 -\end{verbatim}
  11.249 -\end{description} 
  11.250 -
  11.251 -%\section{The distribution}
  11.252 -%
  11.253 -%The Xen source code repository is structured as follows:
  11.254 -%
  11.255 -%\begin{description}
  11.256 -%\item[\path{tools/}] Xen node controller daemon (Xend), command line tools, 
  11.257 -%  control libraries
  11.258 -%\item[\path{xen/}] The Xen VMM.
  11.259 -%\item[\path{linux-*-xen-sparse/}] Xen support for Linux.
  11.260 -%\item[\path{linux-*-patches/}] Experimental patches for Linux.
  11.261 -%\item[\path{netbsd-*-xen-sparse/}] Xen support for NetBSD.
  11.262 -%\item[\path{docs/}] Various documentation files for users and developers.
  11.263 -%\item[\path{extras/}] Bonus extras.
  11.264 -%\end{description}
  11.265 -
  11.266 -\subsection{Building from Source} 
  11.267 -
  11.268 -The top-level Xen Makefile includes a target `world' that will do the
  11.269 -following:
  11.270 -
  11.271 -\begin{itemize}
  11.272 -\item Build Xen
  11.273 -\item Build the control tools, including \xend
  11.274 -\item Download (if necessary) and unpack the Linux 2.6 source code,
  11.275 -      and patch it for use with Xen
  11.276 -\item Build a Linux kernel to use in domain 0 and a smaller
  11.277 -      unprivileged kernel, which can optionally be used for
  11.278 -      unprivileged virtual machines.
  11.279 -\end{itemize}
  11.280 -
  11.281 -
  11.282 -After the build has completed you should have a top-level 
  11.283 -directory called \path{dist/} in which all resulting targets 
  11.284 -will be placed; of particular interest are the two kernels 
  11.285 -XenLinux kernel images, one with a `-xen0' extension
  11.286 -which contains hardware device drivers and drivers for Xen's virtual
  11.287 -devices, and one with a `-xenU' extension that just contains the
  11.288 -virtual ones. These are found in \path{dist/install/boot/} along
  11.289 -with the image for Xen itself and the configuration files used
  11.290 -during the build. 
  11.291 -
  11.292 -The NetBSD port can be built using: 
  11.293 -\begin{quote}
  11.294 -\begin{verbatim}
  11.295 -# make netbsd20
  11.296 -\end{verbatim} 
  11.297 -\end{quote} 
  11.298 -NetBSD port is built using a snapshot of the netbsd-2-0 cvs branch.
  11.299 -The snapshot is downloaded as part of the build process, if it is not
  11.300 -yet present in the \path{NETBSD\_SRC\_PATH} search path.  The build
  11.301 -process also downloads a toolchain which includes all the tools
  11.302 -necessary to build the NetBSD kernel under Linux.
  11.303 -
  11.304 -To customize further the set of kernels built you need to edit
  11.305 -the top-level Makefile. Look for the line: 
  11.306 -
  11.307 -\begin{quote}
  11.308 -\begin{verbatim}
  11.309 -KERNELS ?= mk.linux-2.6-xen0 mk.linux-2.6-xenU
  11.310 -\end{verbatim} 
  11.311 -\end{quote} 
  11.312 -
  11.313 -You can edit this line to include any set of operating system kernels
  11.314 -which have configurations in the top-level \path{buildconfigs/}
  11.315 -directory, for example \path{mk.linux-2.4-xenU} to build a Linux 2.4
  11.316 -kernel containing only virtual device drivers.
  11.317 -
  11.318 -%% Inspect the Makefile if you want to see what goes on during a build.
  11.319 -%% Building Xen and the tools is straightforward, but XenLinux is more
  11.320 -%% complicated.  The makefile needs a `pristine' Linux kernel tree to which
  11.321 -%% it will then add the Xen architecture files.  You can tell the
  11.322 -%% makefile the location of the appropriate Linux compressed tar file by
  11.323 -%% setting the LINUX\_SRC environment variable, e.g. \\
  11.324 -%% \verb!# LINUX_SRC=/tmp/linux-2.6.11.tar.bz2 make world! \\ or by
  11.325 -%% placing the tar file somewhere in the search path of {\tt
  11.326 -%% LINUX\_SRC\_PATH} which defaults to `{\tt .:..}'.  If the makefile
  11.327 -%% can't find a suitable kernel tar file it attempts to download it from
  11.328 -%% kernel.org (this won't work if you're behind a firewall).
  11.329 -
  11.330 -%% After untaring the pristine kernel tree, the makefile uses the {\tt
  11.331 -%% mkbuildtree} script to add the Xen patches to the kernel. 
  11.332 -
  11.333 -
  11.334 -%% The procedure is similar to build the Linux 2.4 port: \\
  11.335 -%% \verb!# LINUX_SRC=/path/to/linux2.4/source make linux24!
  11.336 -
  11.337 -
  11.338 -%% \framebox{\parbox{5in}{
  11.339 -%% {\bf Distro specific:} \\
  11.340 -%% {\it Gentoo} --- if not using udev (most installations, currently), you'll need
  11.341 -%% to enable devfs and devfs mount at boot time in the xen0 config.
  11.342 -%% }}
  11.343 -
  11.344 -\subsection{Custom XenLinux Builds}
  11.345 -
  11.346 -% If you have an SMP machine you may wish to give the {\tt '-j4'}
  11.347 -% argument to make to get a parallel build.
  11.348 -
  11.349 -If you wish to build a customized XenLinux kernel (e.g. to support
  11.350 -additional devices or enable distribution-required features), you can
  11.351 -use the standard Linux configuration mechanisms, specifying that the
  11.352 -architecture being built for is \path{xen}, e.g:
  11.353 -\begin{quote}
  11.354 -\begin{verbatim} 
  11.355 -# cd linux-2.6.11-xen0 
  11.356 -# make ARCH=xen xconfig 
  11.357 -# cd ..
  11.358 -# make
  11.359 -\end{verbatim} 
  11.360 -\end{quote} 
  11.361 -
  11.362 -You can also copy an existing Linux configuration (\path{.config}) 
  11.363 -into \path{linux-2.6.11-xen0} and execute:  
  11.364 -\begin{quote}
  11.365 -\begin{verbatim} 
  11.366 -# make ARCH=xen oldconfig 
  11.367 -\end{verbatim} 
  11.368 -\end{quote} 
  11.369 -
  11.370 -You may be prompted with some Xen-specific options; we 
  11.371 -advise accepting the defaults for these options.
  11.372 -
  11.373 -Note that the only difference between the two types of Linux kernel
  11.374 -that are built is the configuration file used for each.  The "U"
  11.375 -suffixed (unprivileged) versions don't contain any of the physical
  11.376 -hardware device drivers, leading to a 30\% reduction in size; hence
  11.377 -you may prefer these for your non-privileged domains.  The `0'
  11.378 -suffixed privileged versions can be used to boot the system, as well
  11.379 -as in driver domains and unprivileged domains.
  11.380 -
  11.381 -
  11.382 -\subsection{Installing the Binaries}
  11.383 -
  11.384 -
  11.385 -The files produced by the build process are stored under the
  11.386 -\path{dist/install/} directory. To install them in their default
  11.387 -locations, do:
  11.388 -\begin{quote}
  11.389 -\begin{verbatim}
  11.390 -# make install
  11.391 -\end{verbatim} 
  11.392 -\end{quote}
  11.393 -
  11.394 -
  11.395 -Alternatively, users with special installation requirements may wish
  11.396 -to install them manually by copying the files to their appropriate
  11.397 -destinations.
  11.398 -
  11.399 -%% Files in \path{install/boot/} include:
  11.400 -%% \begin{itemize}
  11.401 -%% \item \path{install/boot/xen-2.0.gz} Link to the Xen 'kernel'
  11.402 -%% \item \path{install/boot/vmlinuz-2.6-xen0}  Link to domain 0 XenLinux kernel
  11.403 -%% \item \path{install/boot/vmlinuz-2.6-xenU}  Link to unprivileged XenLinux kernel
  11.404 -%% \end{itemize}
  11.405 -
  11.406 -The \path{dist/install/boot} directory will also contain the config files
  11.407 -used for building the XenLinux kernels, and also versions of Xen and
  11.408 -XenLinux kernels that contain debug symbols (\path{xen-syms-2.0.6} and
  11.409 -\path{vmlinux-syms-2.6.11.11-xen0}) which are essential for interpreting crash
  11.410 -dumps.  Retain these files as the developers may wish to see them if
  11.411 -you post on the mailing list.
  11.412 -
  11.413 -
  11.414 -
  11.415 -
  11.416 -
  11.417 -\section{Configuration}
  11.418 -\label{s:configure}
  11.419 -Once you have built and installed the Xen distribution, it is 
  11.420 -simple to prepare the machine for booting and running Xen. 
  11.421 -
  11.422 -\subsection{GRUB Configuration}
  11.423 -
  11.424 -An entry should be added to \path{grub.conf} (often found under
  11.425 -\path{/boot/} or \path{/boot/grub/}) to allow Xen / XenLinux to boot.
  11.426 -This file is sometimes called \path{menu.lst}, depending on your
  11.427 -distribution.  The entry should look something like the following:
  11.428 -
  11.429 -{\small
  11.430 -\begin{verbatim}
  11.431 -title Xen 2.0 / XenLinux 2.6
  11.432 -  kernel /boot/xen-2.0.gz dom0_mem=131072
  11.433 -  module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro console=tty0
  11.434 -\end{verbatim}
  11.435 -}
  11.436 -
  11.437 -The kernel line tells GRUB where to find Xen itself and what boot
  11.438 -parameters should be passed to it (in this case, setting domain 0's
  11.439 -memory allocation in kilobytes and the settings for the serial port). For more
  11.440 -details on the various Xen boot parameters see Section~\ref{s:xboot}. 
  11.441 -
  11.442 -The module line of the configuration describes the location of the
  11.443 -XenLinux kernel that Xen should start and the parameters that should
  11.444 -be passed to it (these are standard Linux parameters, identifying the
  11.445 -root device and specifying it be initially mounted read only and
  11.446 -instructing that console output be sent to the screen).  Some
  11.447 -distributions such as SuSE do not require the \path{ro} parameter.
  11.448 -
  11.449 -%% \framebox{\parbox{5in}{
  11.450 -%% {\bf Distro specific:} \\
  11.451 -%% {\it SuSE} --- Omit the {\tt ro} option from the XenLinux kernel
  11.452 -%% command line, since the partition won't be remounted rw during boot.
  11.453 -%% }}
  11.454 -
  11.455 -
  11.456 -If you want to use an initrd, just add another \path{module} line to
  11.457 -the configuration, as usual:
  11.458 -{\small
  11.459 -\begin{verbatim}
  11.460 -  module /boot/my_initrd.gz
  11.461 -\end{verbatim}
  11.462 -}
  11.463 -
  11.464 -As always when installing a new kernel, it is recommended that you do
  11.465 -not delete existing menu options from \path{menu.lst} --- you may want
  11.466 -to boot your old Linux kernel in future, particularly if you
  11.467 -have problems.
  11.468 -
  11.469 -
  11.470 -\subsection{Serial Console (optional)}
  11.471 -
  11.472 -%%   kernel /boot/xen-2.0.gz dom0_mem=131072 com1=115200,8n1
  11.473 -%%   module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro 
  11.474 -
  11.475 -
  11.476 -In order to configure Xen serial console output, it is necessary to add 
  11.477 -an boot option to your GRUB config; e.g. replace the above kernel line 
  11.478 -with: 
  11.479 -\begin{quote}
  11.480 -{\small
  11.481 -\begin{verbatim}
  11.482 -   kernel /boot/xen.gz dom0_mem=131072 com1=115200,8n1
  11.483 -\end{verbatim}}
  11.484 -\end{quote}
  11.485 -
  11.486 -This configures Xen to output on COM1 at 115,200 baud, 8 data bits, 
  11.487 -1 stop bit and no parity. Modify these parameters for your set up. 
  11.488 -
  11.489 -One can also configure XenLinux to share the serial console; to 
  11.490 -achieve this append ``\path{console=ttyS0}'' to your 
  11.491 -module line. 
  11.492 -
  11.493 -
  11.494 -If you wish to be able to log in over the XenLinux serial console it
  11.495 -is necessary to add a line into \path{/etc/inittab}, just as per 
  11.496 -regular Linux. Simply add the line:
  11.497 -\begin{quote}
  11.498 -{\small 
  11.499 -{\tt c:2345:respawn:/sbin/mingetty ttyS0}
  11.500 -}
  11.501 -\end{quote} 
  11.502 -
  11.503 -and you should be able to log in. Note that to successfully log in 
  11.504 -as root over the serial line will require adding \path{ttyS0} to
  11.505 -\path{/etc/securetty} in most modern distributions. 
  11.506 -
  11.507 -\subsection{TLS Libraries}
  11.508 -
  11.509 -Users of the XenLinux 2.6 kernel should disable Thread Local Storage
  11.510 -(e.g.\ by doing a \path{mv /lib/tls /lib/tls.disabled}) before
  11.511 -attempting to run with a XenLinux kernel\footnote{If you boot without first
  11.512 -disabling TLS, you will get a warning message during the boot
  11.513 -process. In this case, simply perform the rename after the machine is
  11.514 -up and then run \texttt{/sbin/ldconfig} to make it take effect.}.  You can
  11.515 -always reenable it by restoring the directory to its original location
  11.516 -(i.e.\ \path{mv /lib/tls.disabled /lib/tls}).
  11.517 -
  11.518 -The reason for this is that the current TLS implementation uses
  11.519 -segmentation in a way that is not permissible under Xen.  If TLS is
  11.520 -not disabled, an emulation mode is used within Xen which reduces
  11.521 -performance substantially.
  11.522 -
  11.523 -We hope that this issue can be resolved by working with Linux
  11.524 -distribution vendors to implement a minor backward-compatible change
  11.525 -to the TLS library.
  11.526 -
  11.527 -\section{Booting Xen} 
  11.528 -
  11.529 -It should now be possible to restart the system and use Xen.  Reboot
  11.530 -as usual but choose the new Xen option when the Grub screen appears.
  11.531 -
  11.532 -What follows should look much like a conventional Linux boot.  The
  11.533 -first portion of the output comes from Xen itself, supplying low level
  11.534 -information about itself and the machine it is running on.  The
  11.535 -following portion of the output comes from XenLinux.
  11.536 -
  11.537 -You may see some errors during the XenLinux boot.  These are not
  11.538 -necessarily anything to worry about --- they may result from kernel
  11.539 -configuration differences between your XenLinux kernel and the one you
  11.540 -usually use.
  11.541 -
  11.542 -When the boot completes, you should be able to log into your system as
  11.543 -usual.  If you are unable to log in to your system running Xen, you
  11.544 -should still be able to reboot with your normal Linux kernel.
  11.545 -
  11.546 -
  11.547 -\chapter{Starting Additional Domains}
  11.548 -
  11.549 -The first step in creating a new domain is to prepare a root
  11.550 -filesystem for it to boot off.  Typically, this might be stored in a
  11.551 -normal partition, an LVM or other volume manager partition, a disk
  11.552 -file or on an NFS server.  A simple way to do this is simply to boot
  11.553 -from your standard OS install CD and install the distribution into
  11.554 -another partition on your hard drive.
  11.555 -
  11.556 -To start the \xend control daemon, type
  11.557 -\begin{quote}
  11.558 -\verb!# xend start!
  11.559 -\end{quote}
  11.560 -If you
  11.561 -wish the daemon to start automatically, see the instructions in
  11.562 -Section~\ref{s:xend}. Once the daemon is running, you can use the
  11.563 -\path{xm} tool to monitor and maintain the domains running on your
  11.564 -system. This chapter provides only a brief tutorial: we provide full
  11.565 -details of the \path{xm} tool in the next chapter. 
  11.566 -
  11.567 -%\section{From the web interface}
  11.568 -%
  11.569 -%Boot the Xen machine and start Xensv (see Chapter~\ref{cha:xensv} for
  11.570 -%more details) using the command: \\
  11.571 -%\verb_# xensv start_ \\
  11.572 -%This will also start Xend (see Chapter~\ref{cha:xend} for more information).
  11.573 -%
  11.574 -%The domain management interface will then be available at {\tt
  11.575 -%http://your\_machine:8080/}.  This provides a user friendly wizard for
  11.576 -%starting domains and functions for managing running domains.
  11.577 -%
  11.578 -%\section{From the command line}
  11.579 -
  11.580 -
  11.581 -\section{Creating a Domain Configuration File} 
  11.582  
  11.583 -Before you can start an additional domain, you must create a
  11.584 -configuration file. We provide two example files which you 
  11.585 -can use as a starting point: 
  11.586 -\begin{itemize} 
  11.587 -  \item \path{/etc/xen/xmexample1} is a simple template configuration file
  11.588 -    for describing a single VM.
  11.589 -
  11.590 -  \item \path{/etc/xen/xmexample2} file is a template description that
  11.591 -    is intended to be reused for multiple virtual machines.  Setting
  11.592 -    the value of the \path{vmid} variable on the \path{xm} command line
  11.593 -    fills in parts of this template.
  11.594 -\end{itemize} 
  11.595 -
  11.596 -Copy one of these files and edit it as appropriate.
  11.597 -Typical values you may wish to edit include: 
  11.598 -
  11.599 -\begin{quote}
  11.600 -\begin{description}
  11.601 -\item[kernel] Set this to the path of the kernel you compiled for use
  11.602 -              with Xen (e.g.\  \path{kernel = '/boot/vmlinuz-2.6-xenU'})
  11.603 -\item[memory] Set this to the size of the domain's memory in
  11.604 -megabytes (e.g.\ \path{memory = 64})
  11.605 -\item[disk] Set the first entry in this list to calculate the offset
  11.606 -of the domain's root partition, based on the domain ID.  Set the
  11.607 -second to the location of \path{/usr} if you are sharing it between
  11.608 -domains (e.g.\ \path{disk = ['phy:your\_hard\_drive\%d,sda1,w' \%
  11.609 -(base\_partition\_number + vmid), 'phy:your\_usr\_partition,sda6,r' ]}
  11.610 -\item[dhcp] Uncomment the dhcp variable, so that the domain will
  11.611 -receive its IP address from a DHCP server (e.g.\ \path{dhcp='dhcp'})
  11.612 -\end{description}
  11.613 -\end{quote}
  11.614 -
  11.615 -You may also want to edit the {\bf vif} variable in order to choose
  11.616 -the MAC address of the virtual ethernet interface yourself.  For
  11.617 -example: 
  11.618 -\begin{quote}
  11.619 -\verb_vif = ['mac=00:06:AA:F6:BB:B3']_
  11.620 -\end{quote}
  11.621 -If you do not set this variable, \xend will automatically generate a
  11.622 -random MAC address from an unused range.
  11.623 -
  11.624 -
  11.625 -\section{Booting the Domain}
  11.626 -
  11.627 -The \path{xm} tool provides a variety of commands for managing domains.
  11.628 -Use the \path{create} command to start new domains. Assuming you've 
  11.629 -created a configuration file \path{myvmconf} based around
  11.630 -\path{/etc/xen/xmexample2}, to start a domain with virtual 
  11.631 -machine ID~1 you should type: 
  11.632 -
  11.633 -\begin{quote}
  11.634 -\begin{verbatim}
  11.635 -# xm create -c myvmconf vmid=1
  11.636 -\end{verbatim}
  11.637 -\end{quote}
  11.638 -
  11.639 -
  11.640 -The \path{-c} switch causes \path{xm} to turn into the domain's
  11.641 -console after creation.  The \path{vmid=1} sets the \path{vmid}
  11.642 -variable used in the \path{myvmconf} file. 
  11.643 -
  11.644 -
  11.645 -You should see the console boot messages from the new domain 
  11.646 -appearing in the terminal in which you typed the command, 
  11.647 -culminating in a login prompt. 
  11.648 -
  11.649 -
  11.650 -\section{Example: ttylinux}
  11.651 -
  11.652 -Ttylinux is a very small Linux distribution, designed to require very
  11.653 -few resources.  We will use it as a concrete example of how to start a
  11.654 -Xen domain.  Most users will probably want to install a full-featured
  11.655 -distribution once they have mastered the basics\footnote{ttylinux is
  11.656 -maintained by Pascal Schmidt. You can download source packages from
  11.657 -the distribution's home page: {\tt http://www.minimalinux.org/ttylinux/}}.
  11.658 -
  11.659 -\begin{enumerate}
  11.660 -\item Download and extract the ttylinux disk image from the Files
  11.661 -section of the project's SourceForge site (see 
  11.662 -\path{http://sf.net/projects/xen/}).
  11.663 -\item Create a configuration file like the following:
  11.664 -\begin{verbatim}
  11.665 -kernel = "/boot/vmlinuz-2.6-xenU"
  11.666 -memory = 64
  11.667 -name = "ttylinux"
  11.668 -nics = 1
  11.669 -ip = "1.2.3.4"
  11.670 -disk = ['file:/path/to/ttylinux/rootfs,sda1,w']
  11.671 -root = "/dev/sda1 ro"
  11.672 -\end{verbatim}
  11.673 -\item Now start the domain and connect to its console:
  11.674 -\begin{verbatim}
  11.675 -xm create configfile -c
  11.676 -\end{verbatim}
  11.677 -\item Login as root, password root.
  11.678 -\end{enumerate}
  11.679 -
  11.680 -
  11.681 -\section{Starting / Stopping Domains Automatically}
  11.682 -
  11.683 -It is possible to have certain domains start automatically at boot
  11.684 -time and to have dom0 wait for all running domains to shutdown before
  11.685 -it shuts down the system.
  11.686 -
  11.687 -To specify a domain is to start at boot-time, place its
  11.688 -configuration file (or a link to it) under \path{/etc/xen/auto/}.
  11.689 -
  11.690 -A Sys-V style init script for RedHat and LSB-compliant systems is
  11.691 -provided and will be automatically copied to \path{/etc/init.d/}
  11.692 -during install.  You can then enable it in the appropriate way for
  11.693 -your distribution.
  11.694 -
  11.695 -For instance, on RedHat:
  11.696 -
  11.697 -\begin{quote}
  11.698 -\verb_# chkconfig --add xendomains_
  11.699 -\end{quote}
  11.700 -
  11.701 -By default, this will start the boot-time domains in runlevels 3, 4
  11.702 -and 5.
  11.703 -
  11.704 -You can also use the \path{service} command to run this script
  11.705 -manually, e.g:
  11.706 -
  11.707 -\begin{quote}
  11.708 -\verb_# service xendomains start_
  11.709 -
  11.710 -Starts all the domains with config files under /etc/xen/auto/.
  11.711 -\end{quote}
  11.712 -
  11.713 -
  11.714 -\begin{quote}
  11.715 -\verb_# service xendomains stop_
  11.716 -
  11.717 -Shuts down ALL running Xen domains.
  11.718 -\end{quote}
  11.719 -
  11.720 -\chapter{Domain Management Tools}
  11.721 -
  11.722 -The previous chapter described a simple example of how to configure
  11.723 -and start a domain.  This chapter summarises the tools available to
  11.724 -manage running domains.
  11.725 -
  11.726 -\section{Command-line Management}
  11.727 -
  11.728 -Command line management tasks are also performed using the \path{xm}
  11.729 -tool.  For online help for the commands available, type:
  11.730 -\begin{quote}
  11.731 -\verb_# xm help_
  11.732 -\end{quote}
  11.733 -
  11.734 -You can also type \path{xm help $<$command$>$} for more information 
  11.735 -on a given command. 
  11.736 -
  11.737 -\subsection{Basic Management Commands}
  11.738 -
  11.739 -The most important \path{xm} commands are: 
  11.740 -\begin{quote}
  11.741 -\verb_# xm list_: Lists all domains running.\\
  11.742 -\verb_# xm consoles_ : Gives information about the domain consoles.\\
  11.743 -\verb_# xm console_: Opens a console to a domain (e.g.\
  11.744 -  \verb_# xm console myVM_
  11.745 -\end{quote}
  11.746 -
  11.747 -\subsection{\tt xm list}
  11.748 -
  11.749 -The output of \path{xm list} is in rows of the following format:
  11.750 -\begin{center}
  11.751 -{\tt name domid memory cpu state cputime console}
  11.752 -\end{center}
  11.753 -
  11.754 -\begin{quote}
  11.755 -\begin{description}
  11.756 -\item[name]  The descriptive name of the virtual machine.
  11.757 -\item[domid] The number of the domain ID this virtual machine is running in.
  11.758 -\item[memory] Memory size in megabytes.
  11.759 -\item[cpu]   The CPU this domain is running on.
  11.760 -\item[state] Domain state consists of 5 fields:
  11.761 -  \begin{description}
  11.762 -  \item[r] running
  11.763 -  \item[b] blocked
  11.764 -  \item[p] paused
  11.765 -  \item[s] shutdown
  11.766 -  \item[c] crashed
  11.767 -  \end{description}
  11.768 -\item[cputime] How much CPU time (in seconds) the domain has used so far.
  11.769 -\item[console] TCP port accepting connections to the domain's console.
  11.770 -\end{description}
  11.771 -\end{quote}
  11.772 -
  11.773 -The \path{xm list} command also supports a long output format when the
  11.774 -\path{-l} switch is used.  This outputs the fulls details of the
  11.775 -running domains in \xend's SXP configuration format.
  11.776 -
  11.777 -For example, suppose the system is running the ttylinux domain as
  11.778 -described earlier.  The list command should produce output somewhat
  11.779 -like the following:
  11.780 -\begin{verbatim}
  11.781 -# xm list
  11.782 -Name              Id  Mem(MB)  CPU  State  Time(s)  Console
  11.783 -Domain-0           0      251    0  r----    172.2        
  11.784 -ttylinux           5       63    0  -b---      3.0    9605
  11.785 -\end{verbatim}
  11.786 -
  11.787 -Here we can see the details for the ttylinux domain, as well as for
  11.788 -domain 0 (which, of course, is always running).  Note that the console
  11.789 -port for the ttylinux domain is 9605.  This can be connected to by TCP
  11.790 -using a terminal program (e.g. \path{telnet} or, better, 
  11.791 -\path{xencons}).  The simplest way to connect is to use the \path{xm console}
  11.792 -command, specifying the domain name or ID.  To connect to the console
  11.793 -of the ttylinux domain, we could use any of the following: 
  11.794 -\begin{verbatim}
  11.795 -# xm console ttylinux
  11.796 -# xm console 5
  11.797 -# xencons localhost 9605
  11.798 -\end{verbatim}
  11.799 -
  11.800 -\section{Domain Save and Restore}
  11.801 -
  11.802 -The administrator of a Xen system may suspend a virtual machine's
  11.803 -current state into a disk file in domain 0, allowing it to be resumed
  11.804 -at a later time.
  11.805 -
  11.806 -The ttylinux domain described earlier can be suspended to disk using
  11.807 -the command:
  11.808 -\begin{verbatim}
  11.809 -# xm save ttylinux ttylinux.xen
  11.810 -\end{verbatim}
  11.811 -
  11.812 -This will stop the domain named `ttylinux' and save its current state
  11.813 -into a file called \path{ttylinux.xen}.
  11.814 -
  11.815 -To resume execution of this domain, use the \path{xm restore} command:
  11.816 -\begin{verbatim}
  11.817 -# xm restore ttylinux.xen
  11.818 -\end{verbatim}
  11.819 -
  11.820 -This will restore the state of the domain and restart it.  The domain
  11.821 -will carry on as before and the console may be reconnected using the
  11.822 -\path{xm console} command, as above.
  11.823 -
  11.824 -\section{Live Migration}
  11.825 -
  11.826 -Live migration is used to transfer a domain between physical hosts
  11.827 -whilst that domain continues to perform its usual activities --- from
  11.828 -the user's perspective, the migration should be imperceptible.
  11.829 -
  11.830 -To perform a live migration, both hosts must be running Xen / \xend and
  11.831 -the destination host must have sufficient resources (e.g. memory
  11.832 -capacity) to accommodate the domain after the move. Furthermore we
  11.833 -currently require both source and destination machines to be on the 
  11.834 -same L2 subnet. 
  11.835 -
  11.836 -Currently, there is no support for providing automatic remote access
  11.837 -to filesystems stored on local disk when a domain is migrated.
  11.838 -Administrators should choose an appropriate storage solution
  11.839 -(i.e. SAN, NAS, etc.) to ensure that domain filesystems are also
  11.840 -available on their destination node. GNBD is a good method for
  11.841 -exporting a volume from one machine to another. iSCSI can do a similar
  11.842 -job, but is more complex to set up.
  11.843 -
  11.844 -When a domain migrates, it's MAC and IP address move with it, thus it
  11.845 -is only possible to migrate VMs within the same layer-2 network and IP
  11.846 -subnet. If the destination node is on a different subnet, the
  11.847 -administrator would need to manually configure a suitable etherip or
  11.848 -IP tunnel in the domain 0 of the remote node. 
  11.849 -
  11.850 -A domain may be migrated using the \path{xm migrate} command.  To
  11.851 -live migrate a domain to another machine, we would use
  11.852 -the command:
  11.853 -
  11.854 -\begin{verbatim}
  11.855 -# xm migrate --live mydomain destination.ournetwork.com
  11.856 -\end{verbatim}
  11.857 -
  11.858 -Without the \path{--live} flag, \xend simply stops the domain and
  11.859 -copies the memory image over to the new node and restarts it. Since
  11.860 -domains can have large allocations this can be quite time consuming,
  11.861 -even on a Gigabit network. With the \path{--live} flag \xend attempts
  11.862 -to keep the domain running while the migration is in progress,
  11.863 -resulting in typical `downtimes' of just 60--300ms.
  11.864 -
  11.865 -For now it will be necessary to reconnect to the domain's console on
  11.866 -the new machine using the \path{xm console} command.  If a migrated
  11.867 -domain has any open network connections then they will be preserved,
  11.868 -so SSH connections do not have this limitation.
  11.869 -
  11.870 -\section{Managing Domain Memory}
  11.871 -
  11.872 -XenLinux domains have the ability to relinquish / reclaim machine
  11.873 -memory at the request of the administrator or the user of the domain.
  11.874 +\part{Introduction and Tutorial}
  11.875  
  11.876 -\subsection{Setting memory footprints from dom0}
  11.877 -
  11.878 -The machine administrator can request that a domain alter its memory
  11.879 -footprint using the \path{xm set-mem} command.  For instance, we can
  11.880 -request that our example ttylinux domain reduce its memory footprint
  11.881 -to 32 megabytes.
  11.882 -
  11.883 -\begin{verbatim}
  11.884 -# xm set-mem ttylinux 32
  11.885 -\end{verbatim}
  11.886 -
  11.887 -We can now see the result of this in the output of \path{xm list}:
  11.888 -
  11.889 -\begin{verbatim}
  11.890 -# xm list
  11.891 -Name              Id  Mem(MB)  CPU  State  Time(s)  Console
  11.892 -Domain-0           0      251    0  r----    172.2        
  11.893 -ttylinux           5       31    0  -b---      4.3    9605
  11.894 -\end{verbatim}
  11.895 -
  11.896 -The domain has responded to the request by returning memory to Xen. We
  11.897 -can restore the domain to its original size using the command line:
  11.898 -
  11.899 -\begin{verbatim}
  11.900 -# xm set-mem ttylinux 64
  11.901 -\end{verbatim}
  11.902 -
  11.903 -\subsection{Setting memory footprints from within a domain}
  11.904 -
  11.905 -The virtual file \path{/proc/xen/balloon} allows the owner of a
  11.906 -domain to adjust their own memory footprint.  Reading the file
  11.907 -(e.g. \path{cat /proc/xen/balloon}) prints out the current
  11.908 -memory footprint of the domain.  Writing the file
  11.909 -(e.g. \path{echo new\_target > /proc/xen/balloon}) requests
  11.910 -that the kernel adjust the domain's memory footprint to a new value.
  11.911 -
  11.912 -\subsection{Setting memory limits}
  11.913 -
  11.914 -Xen associates a memory size limit with each domain.  By default, this
  11.915 -is the amount of memory the domain is originally started with,
  11.916 -preventing the domain from ever growing beyond this size.  To permit a
  11.917 -domain to grow beyond its original allocation or to prevent a domain
  11.918 -you've shrunk from reclaiming the memory it relinquished, use the 
  11.919 -\path{xm maxmem} command.
  11.920 -
  11.921 -\chapter{Domain Filesystem Storage}
  11.922 -
  11.923 -It is possible to directly export any Linux block device in dom0 to
  11.924 -another domain, or to export filesystems / devices to virtual machines
  11.925 -using standard network protocols (e.g. NBD, iSCSI, NFS, etc).  This
  11.926 -chapter covers some of the possibilities.
  11.927 -
  11.928 -
  11.929 -\section{Exporting Physical Devices as VBDs} 
  11.930 -\label{s:exporting-physical-devices-as-vbds}
  11.931 -
  11.932 -One of the simplest configurations is to directly export 
  11.933 -individual partitions from domain 0 to other domains. To 
  11.934 -achieve this use the \path{phy:} specifier in your domain 
  11.935 -configuration file. For example a line like
  11.936 -\begin{quote}
  11.937 -\verb_disk = ['phy:hda3,sda1,w']_
  11.938 -\end{quote}
  11.939 -specifies that the partition \path{/dev/hda3} in domain 0 
  11.940 -should be exported read-write to the new domain as \path{/dev/sda1}; 
  11.941 -one could equally well export it as \path{/dev/hda} or 
  11.942 -\path{/dev/sdb5} should one wish. 
  11.943 -
  11.944 -In addition to local disks and partitions, it is possible to export
  11.945 -any device that Linux considers to be ``a disk'' in the same manner.
  11.946 -For example, if you have iSCSI disks or GNBD volumes imported into
  11.947 -domain 0 you can export these to other domains using the \path{phy:}
  11.948 -disk syntax. E.g.:
  11.949 -\begin{quote}
  11.950 -\verb_disk = ['phy:vg/lvm1,sda2,w']_
  11.951 -\end{quote}
  11.952 -
  11.953 -
  11.954 -
  11.955 -\begin{center}
  11.956 -\framebox{\bf Warning: Block device sharing}
  11.957 -\end{center}
  11.958 -\begin{quote}
  11.959 -Block devices should typically only be shared between domains in a
  11.960 -read-only fashion otherwise the Linux kernel's file systems will get
  11.961 -very confused as the file system structure may change underneath them
  11.962 -(having the same ext3 partition mounted rw twice is a sure fire way to
  11.963 -cause irreparable damage)!  \Xend will attempt to prevent you from
  11.964 -doing this by checking that the device is not mounted read-write in
  11.965 -domain 0, and hasn't already been exported read-write to another
  11.966 -domain.
  11.967 -If you want read-write sharing, export the directory to other domains
  11.968 -via NFS from domain0 (or use a cluster file system such as GFS or
  11.969 -ocfs2).
  11.970 -
  11.971 -\end{quote}
  11.972 -
  11.973 -
  11.974 -\section{Using File-backed VBDs}
  11.975 -
  11.976 -It is also possible to use a file in Domain 0 as the primary storage
  11.977 -for a virtual machine.  As well as being convenient, this also has the
  11.978 -advantage that the virtual block device will be {\em sparse} --- space
  11.979 -will only really be allocated as parts of the file are used.  So if a
  11.980 -virtual machine uses only half of its disk space then the file really
  11.981 -takes up half of the size allocated.
  11.982 -
  11.983 -For example, to create a 2GB sparse file-backed virtual block device
  11.984 -(actually only consumes 1KB of disk):
  11.985 -\begin{quote}
  11.986 -\verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=1_
  11.987 -\end{quote}
  11.988 -
  11.989 -Make a file system in the disk file: 
  11.990 -\begin{quote}
  11.991 -\verb_# mkfs -t ext3 vm1disk_
  11.992 -\end{quote}
  11.993 -
  11.994 -(when the tool asks for confirmation, answer `y')
  11.995 -
  11.996 -Populate the file system e.g. by copying from the current root:
  11.997 -\begin{quote}
  11.998 -\begin{verbatim}
  11.999 -# mount -o loop vm1disk /mnt
 11.1000 -# cp -ax /{root,dev,var,etc,usr,bin,sbin,lib} /mnt
 11.1001 -# mkdir /mnt/{proc,sys,home,tmp}
 11.1002 -\end{verbatim}
 11.1003 -\end{quote}
 11.1004 -
 11.1005 -Tailor the file system by editing \path{/etc/fstab},
 11.1006 -\path{/etc/hostname}, etc (don't forget to edit the files in the
 11.1007 -mounted file system, instead of your domain 0 filesystem, e.g. you
 11.1008 -would edit \path{/mnt/etc/fstab} instead of \path{/etc/fstab} ).  For
 11.1009 -this example put \path{/dev/sda1} to root in fstab.
 11.1010 -
 11.1011 -Now unmount (this is important!):
 11.1012 -\begin{quote}
 11.1013 -\verb_# umount /mnt_
 11.1014 -\end{quote}
 11.1015 -
 11.1016 -In the configuration file set:
 11.1017 -\begin{quote}
 11.1018 -\verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_
 11.1019 -\end{quote}
 11.1020 +%% Chapter Introduction moved to introduction.tex
 11.1021 +\include{src/user/introduction}
 11.1022  
 11.1023 -As the virtual machine writes to its `disk', the sparse file will be
 11.1024 -filled in and consume more space up to the original 2GB.
 11.1025 -
 11.1026 -{\bf Note that file-backed VBDs may not be appropriate for backing
 11.1027 -I/O-intensive domains.}  File-backed VBDs are known to experience
 11.1028 -substantial slowdowns under heavy I/O workloads, due to the I/O handling
 11.1029 -by the loopback block device used to support file-backed VBDs in dom0.
 11.1030 -Better I/O performance can be achieved by using either LVM-backed VBDs
 11.1031 -(Section~\ref{s:using-lvm-backed-vbds}) or physical devices as VBDs
 11.1032 -(Section~\ref{s:exporting-physical-devices-as-vbds}).
 11.1033 -
 11.1034 -Linux supports a maximum of eight file-backed VBDs across all domains by
 11.1035 -default.  This limit can be statically increased by using the {\em
 11.1036 -max\_loop} module parameter if CONFIG\_BLK\_DEV\_LOOP is compiled as a
 11.1037 -module in the dom0 kernel, or by using the {\em max\_loop=n} boot option
 11.1038 -if CONFIG\_BLK\_DEV\_LOOP is compiled directly into the dom0 kernel.
 11.1039 -
 11.1040 -
 11.1041 -\section{Using LVM-backed VBDs}
 11.1042 -\label{s:using-lvm-backed-vbds}
 11.1043 -
 11.1044 -A particularly appealing solution is to use LVM volumes 
 11.1045 -as backing for domain file-systems since this allows dynamic
 11.1046 -growing/shrinking of volumes as well as snapshot and other 
 11.1047 -features. 
 11.1048 -
 11.1049 -To initialise a partition to support LVM volumes:
 11.1050 -\begin{quote}
 11.1051 -\begin{verbatim} 
 11.1052 -# pvcreate /dev/sda10           
 11.1053 -\end{verbatim} 
 11.1054 -\end{quote}
 11.1055 -
 11.1056 -Create a volume group named `vg' on the physical partition:
 11.1057 -\begin{quote}
 11.1058 -\begin{verbatim} 
 11.1059 -# vgcreate vg /dev/sda10
 11.1060 -\end{verbatim} 
 11.1061 -\end{quote}
 11.1062 -
 11.1063 -Create a logical volume of size 4GB named `myvmdisk1':
 11.1064 -\begin{quote}
 11.1065 -\begin{verbatim} 
 11.1066 -# lvcreate -L4096M -n myvmdisk1 vg
 11.1067 -\end{verbatim} 
 11.1068 -\end{quote}
 11.1069 -
 11.1070 -You should now see that you have a \path{/dev/vg/myvmdisk1}
 11.1071 -Make a filesystem, mount it and populate it, e.g.:
 11.1072 -\begin{quote}
 11.1073 -\begin{verbatim} 
 11.1074 -# mkfs -t ext3 /dev/vg/myvmdisk1
 11.1075 -# mount /dev/vg/myvmdisk1 /mnt
 11.1076 -# cp -ax / /mnt
 11.1077 -# umount /mnt
 11.1078 -\end{verbatim} 
 11.1079 -\end{quote}
 11.1080 -
 11.1081 -Now configure your VM with the following disk configuration:
 11.1082 -\begin{quote}
 11.1083 -\begin{verbatim} 
 11.1084 - disk = [ 'phy:vg/myvmdisk1,sda1,w' ]
 11.1085 -\end{verbatim} 
 11.1086 -\end{quote}
 11.1087 -
 11.1088 -LVM enables you to grow the size of logical volumes, but you'll need
 11.1089 -to resize the corresponding file system to make use of the new
 11.1090 -space. Some file systems (e.g. ext3) now support on-line resize.  See
 11.1091 -the LVM manuals for more details.
 11.1092 +%% Chapter Installation moved to installation.tex
 11.1093 +\include{src/user/installation}
 11.1094  
 11.1095 -You can also use LVM for creating copy-on-write clones of LVM
 11.1096 -volumes (known as writable persistent snapshots in LVM
 11.1097 -terminology). This facility is new in Linux 2.6.8, so isn't as
 11.1098 -stable as one might hope. In particular, using lots of CoW LVM
 11.1099 -disks consumes a lot of dom0 memory, and error conditions such as
 11.1100 -running out of disk space are not handled well. Hopefully this
 11.1101 -will improve in future.
 11.1102 -
 11.1103 -To create two copy-on-write clone of the above file system you
 11.1104 -would use the following commands:
 11.1105 -
 11.1106 -\begin{quote}
 11.1107 -\begin{verbatim} 
 11.1108 -# lvcreate -s -L1024M -n myclonedisk1 /dev/vg/myvmdisk1
 11.1109 -# lvcreate -s -L1024M -n myclonedisk2 /dev/vg/myvmdisk1
 11.1110 -\end{verbatim} 
 11.1111 -\end{quote}
 11.1112 -
 11.1113 -Each of these can grow to have 1GB of differences from the master
 11.1114 -volume. You can grow the amount of space for storing the
 11.1115 -differences using the lvextend command, e.g.:
 11.1116 -\begin{quote}
 11.1117 -\begin{verbatim} 
 11.1118 -# lvextend +100M /dev/vg/myclonedisk1
 11.1119 -\end{verbatim} 
 11.1120 -\end{quote}
 11.1121 -
 11.1122 -Don't let the `differences volume' ever fill up otherwise LVM gets
 11.1123 -rather confused. It may be possible to automate the growing
 11.1124 -process by using \path{dmsetup wait} to spot the volume getting full
 11.1125 -and then issue an \path{lvextend}.
 11.1126 -
 11.1127 -In principle, it is possible to continue writing to the volume
 11.1128 -that has been cloned (the changes will not be visible to the
 11.1129 -clones), but we wouldn't recommend this: have the cloned volume
 11.1130 -as a `pristine' file system install that isn't mounted directly
 11.1131 -by any of the virtual machines.
 11.1132 -
 11.1133 +%% Chapter Starting Additional Domains  moved to start_addl_dom.tex
 11.1134 +\include{src/user/start_addl_dom}
 11.1135  
 11.1136 -\section{Using NFS Root}
 11.1137 -
 11.1138 -First, populate a root filesystem in a directory on the server
 11.1139 -machine. This can be on a distinct physical machine, or simply 
 11.1140 -run within a virtual machine on the same node.
 11.1141 -
 11.1142 -Now configure the NFS server to export this filesystem over the
 11.1143 -network by adding a line to \path{/etc/exports}, for instance:
 11.1144 -
 11.1145 -\begin{quote}
 11.1146 -\begin{small}
 11.1147 -\begin{verbatim}
 11.1148 -/export/vm1root      1.2.3.4/24 (rw,sync,no_root_squash)
 11.1149 -\end{verbatim}
 11.1150 -\end{small}
 11.1151 -\end{quote}
 11.1152 +%% Chapter Domain Management Tools moved to domain_mgmt.tex
 11.1153 +\include{src/user/domain_mgmt}
 11.1154  
 11.1155 -Finally, configure the domain to use NFS root.  In addition to the
 11.1156 -normal variables, you should make sure to set the following values in
 11.1157 -the domain's configuration file:
 11.1158 +%% Chapter Domain Filesystem Storage moved to domain_filesystem.tex
 11.1159 +\include{src/user/domain_filesystem}
 11.1160  
 11.1161 -\begin{quote}
 11.1162 -\begin{small}
 11.1163 -\begin{verbatim}
 11.1164 -root       = '/dev/nfs'
 11.1165 -nfs_server = '2.3.4.5'       # substitute IP address of server 
 11.1166 -nfs_root   = '/path/to/root' # path to root FS on the server
 11.1167 -\end{verbatim}
 11.1168 -\end{small}
 11.1169 -\end{quote}
 11.1170 -
 11.1171 -The domain will need network access at boot time, so either statically
 11.1172 -configure an IP address (Using the config variables \path{ip}, 
 11.1173 -\path{netmask}, \path{gateway}, \path{hostname}) or enable DHCP (
 11.1174 -\path{dhcp='dhcp'}).
 11.1175 -
 11.1176 -Note that the Linux NFS root implementation is known to have stability
 11.1177 -problems under high load (this is not a Xen-specific problem), so this
 11.1178 -configuration may not be appropriate for critical servers.
 11.1179  
 11.1180  
 11.1181  \part{User Reference Documentation}
 11.1182  
 11.1183 -\chapter{Control Software} 
 11.1184 -
 11.1185 -The Xen control software includes the \xend node control daemon (which 
 11.1186 -must be running), the xm command line tools, and the prototype 
 11.1187 -xensv web interface. 
 11.1188 -
 11.1189 -\section{\Xend (node control daemon)}
 11.1190 -\label{s:xend}
 11.1191 -
 11.1192 -The Xen Daemon (\Xend) performs system management functions related to
 11.1193 -virtual machines.  It forms a central point of control for a machine
 11.1194 -and can be controlled using an HTTP-based protocol.  \Xend must be
 11.1195 -running in order to start and manage virtual machines.
 11.1196 -
 11.1197 -\Xend must be run as root because it needs access to privileged system
 11.1198 -management functions.  A small set of commands may be issued on the
 11.1199 -\xend command line:
 11.1200 -
 11.1201 -\begin{tabular}{ll}
 11.1202 -\verb!# xend start! & start \xend, if not already running \\
 11.1203 -\verb!# xend stop!  & stop \xend if already running       \\
 11.1204 -\verb!# xend restart! & restart \xend if running, otherwise start it \\
 11.1205 -% \verb!# xend trace_start! & start \xend, with very detailed debug logging \\
 11.1206 -\verb!# xend status! & indicates \xend status by its return code
 11.1207 -\end{tabular}
 11.1208 -
 11.1209 -A SysV init script called {\tt xend} is provided to start \xend at boot
 11.1210 -time.  {\tt make install} installs this script in {\path{/etc/init.d}.
 11.1211 -To enable it, you have to make symbolic links in the appropriate
 11.1212 -runlevel directories or use the {\tt chkconfig} tool, where available.
 11.1213 -
 11.1214 -Once \xend is running, more sophisticated administration can be done
 11.1215 -using the xm tool (see Section~\ref{s:xm}) and the experimental
 11.1216 -Xensv web interface (see Section~\ref{s:xensv}).
 11.1217 -
 11.1218 -As \xend runs, events will be logged to \path{/var/log/xend.log} and, 
 11.1219 -if the migration assistant daemon (\path{xfrd}) has been started, 
 11.1220 -\path{/var/log/xfrd.log}. These may be of use for troubleshooting
 11.1221 -problems.
 11.1222 -
 11.1223 -\section{Xm (command line interface)}
 11.1224 -\label{s:xm}
 11.1225 -
 11.1226 -The xm tool is the primary tool for managing Xen from the console.
 11.1227 -The general format of an xm command line is:
 11.1228 -
 11.1229 -\begin{verbatim}
 11.1230 -# xm command [switches] [arguments] [variables]
 11.1231 -\end{verbatim}
 11.1232 -
 11.1233 -The available {\em switches} and {\em arguments} are dependent on the
 11.1234 -{\em command} chosen.  The {\em variables} may be set using
 11.1235 -declarations of the form {\tt variable=value} and command line
 11.1236 -declarations override any of the values in the configuration file
 11.1237 -being used, including the standard variables described above and any
 11.1238 -custom variables (for instance, the \path{xmdefconfig} file uses a
 11.1239 -{\tt vmid} variable).
 11.1240 -
 11.1241 -The available commands are as follows:
 11.1242 -
 11.1243 -\begin{description}
 11.1244 -\item[set-mem] Request a domain to adjust its memory footprint.
 11.1245 -\item[create] Create a new domain.
 11.1246 -\item[destroy] Kill a domain immediately.
 11.1247 -\item[list] List running domains.
 11.1248 -\item[shutdown] Ask a domain to shutdown.
 11.1249 -\item[dmesg] Fetch the Xen (not Linux!) boot output.
 11.1250 -\item[consoles] Lists the available consoles.
 11.1251 -\item[console] Connect to the console for a domain.
 11.1252 -\item[help] Get help on xm commands.
 11.1253 -\item[save] Suspend a domain to disk.
 11.1254 -\item[restore] Restore a domain from disk.
 11.1255 -\item[pause] Pause a domain's execution.
 11.1256 -\item[unpause] Unpause a domain.
 11.1257 -\item[pincpu] Pin a domain to a CPU.
 11.1258 -\item[bvt] Set BVT scheduler parameters for a domain.
 11.1259 -\item[bvt\_ctxallow] Set the BVT context switching allowance for the system.
 11.1260 -\item[atropos] Set the atropos parameters for a domain.
 11.1261 -\item[rrobin] Set the round robin time slice for the system.
 11.1262 -\item[info] Get information about the Xen host.
 11.1263 -\item[call] Call a \xend HTTP API function directly.
 11.1264 -\end{description}
 11.1265 -
 11.1266 -For a detailed overview of switches, arguments and variables to each command
 11.1267 -try
 11.1268 -\begin{quote}
 11.1269 -\begin{verbatim}
 11.1270 -# xm help command
 11.1271 -\end{verbatim}
 11.1272 -\end{quote}
 11.1273 -
 11.1274 -\section{Xensv (web control interface)}
 11.1275 -\label{s:xensv}
 11.1276 -
 11.1277 -Xensv is the experimental web control interface for managing a Xen
 11.1278 -machine.  It can be used to perform some (but not yet all) of the
 11.1279 -management tasks that can be done using the xm tool.
 11.1280 -
 11.1281 -It can be started using:
 11.1282 -\begin{quote}
 11.1283 -\verb_# xensv start_
 11.1284 -\end{quote}
 11.1285 -and stopped using: 
 11.1286 -\begin{quote}
 11.1287 -\verb_# xensv stop_
 11.1288 -\end{quote}
 11.1289 -
 11.1290 -By default, Xensv will serve out the web interface on port 8080.  This
 11.1291 -can be changed by editing 
 11.1292 -\path{/usr/lib/python2.3/site-packages/xen/sv/params.py}.
 11.1293 -
 11.1294 -Once Xensv is running, the web interface can be used to create and
 11.1295 -manage running domains.
 11.1296 -
 11.1297 -
 11.1298 -
 11.1299 -
 11.1300 -\chapter{Domain Configuration}
 11.1301 -\label{cha:config}
 11.1302 -
 11.1303 -The following contains the syntax of the domain configuration 
 11.1304 -files and description of how to further specify networking, 
 11.1305 -driver domain and general scheduling behaviour. 
 11.1306 -
 11.1307 -\section{Configuration Files}
 11.1308 -\label{s:cfiles}
 11.1309 -
 11.1310 -Xen configuration files contain the following standard variables.
 11.1311 -Unless otherwise stated, configuration items should be enclosed in
 11.1312 -quotes: see \path{/etc/xen/xmexample1} and \path{/etc/xen/xmexample2} 
 11.1313 -for concrete examples of the syntax.
 11.1314 -
 11.1315 -\begin{description}
 11.1316 -\item[kernel] Path to the kernel image 
 11.1317 -\item[ramdisk] Path to a ramdisk image (optional).
 11.1318 -% \item[builder] The name of the domain build function (e.g. {\tt'linux'} or {\tt'netbsd'}.
 11.1319 -\item[memory] Memory size in megabytes.
 11.1320 -\item[cpu] CPU to run this domain on, or {\tt -1} for
 11.1321 -  auto-allocation. 
 11.1322 -\item[console] Port to export the domain console on (default 9600 + domain ID).
 11.1323 -\item[nics] Number of virtual network interfaces.
 11.1324 -\item[vif] List of MAC addresses (random addresses are assigned if not
 11.1325 -  given) and bridges to use for the domain's network interfaces, e.g.
 11.1326 -\begin{verbatim}
 11.1327 -vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0',
 11.1328 -        'bridge=xen-br1' ]
 11.1329 -\end{verbatim}
 11.1330 -  to assign a MAC address and bridge to the first interface and assign
 11.1331 -  a different bridge to the second interface, leaving \xend to choose
 11.1332 -  the MAC address.
 11.1333 -\item[disk] List of block devices to export to the domain,  e.g. \\
 11.1334 -  \verb_disk = [ 'phy:hda1,sda1,r' ]_ \\
 11.1335 -  exports physical device \path{/dev/hda1} to the domain 
 11.1336 -  as \path{/dev/sda1} with read-only access. Exporting a disk read-write 
 11.1337 -  which is currently mounted is dangerous -- if you are \emph{certain}
 11.1338 -  you wish to do this, you can specify \path{w!} as the mode. 
 11.1339 -\item[dhcp] Set to {\tt 'dhcp'} if you want to use DHCP to configure
 11.1340 -  networking. 
 11.1341 -\item[netmask] Manually configured IP netmask.
 11.1342 -\item[gateway] Manually configured IP gateway. 
 11.1343 -\item[hostname] Set the hostname for the virtual machine.
 11.1344 -\item[root] Specify the root device parameter on the kernel command
 11.1345 -  line. 
 11.1346 -\item[nfs\_server] IP address for the NFS server (if any). 
 11.1347 -\item[nfs\_root] Path of the root filesystem on the NFS server (if any).
 11.1348 -\item[extra] Extra string to append to the kernel command line (if
 11.1349 -  any) 
 11.1350 -\item[restart] Three possible options:
 11.1351 -  \begin{description}
 11.1352 -  \item[always] Always restart the domain, no matter what
 11.1353 -                its exit code is.
 11.1354 -  \item[never]  Never restart the domain.
 11.1355 -  \item[onreboot] Restart the domain iff it requests reboot.
 11.1356 -  \end{description}
 11.1357 -\end{description}
 11.1358 -
 11.1359 -For additional flexibility, it is also possible to include Python
 11.1360 -scripting commands in configuration files.  An example of this is the
 11.1361 -\path{xmexample2} file, which uses Python code to handle the 
 11.1362 -\path{vmid} variable.
 11.1363 -
 11.1364 -
 11.1365 -%\part{Advanced Topics}
 11.1366 -
 11.1367 -\section{Network Configuration}
 11.1368 -
 11.1369 -For many users, the default installation should work `out of the box'.
 11.1370 -More complicated network setups, for instance with multiple ethernet
 11.1371 -interfaces and/or existing bridging setups will require some
 11.1372 -special configuration.
 11.1373 -
 11.1374 -The purpose of this section is to describe the mechanisms provided by
 11.1375 -\xend to allow a flexible configuration for Xen's virtual networking.
 11.1376 -
 11.1377 -\subsection{Xen virtual network topology}
 11.1378 -
 11.1379 -Each domain network interface is connected to a virtual network
 11.1380 -interface in dom0 by a point to point link (effectively a `virtual
 11.1381 -crossover cable').  These devices are named {\tt
 11.1382 -vif$<$domid$>$.$<$vifid$>$} (e.g. {\tt vif1.0} for the first interface
 11.1383 -in domain 1, {\tt vif3.1} for the second interface in domain 3).
 11.1384 -
 11.1385 -Traffic on these virtual interfaces is handled in domain 0 using
 11.1386 -standard Linux mechanisms for bridging, routing, rate limiting, etc.
 11.1387 -Xend calls on two shell scripts to perform initial configuration of
 11.1388 -the network and configuration of new virtual interfaces.  By default,
 11.1389 -these scripts configure a single bridge for all the virtual
 11.1390 -interfaces.  Arbitrary routing / bridging configurations can be
 11.1391 -configured by customising the scripts, as described in the following
 11.1392 -section.
 11.1393 -
 11.1394 -\subsection{Xen networking scripts}
 11.1395 -
 11.1396 -Xen's virtual networking is configured by two shell scripts (by
 11.1397 -default \path{network} and \path{vif-bridge}).  These are
 11.1398 -called automatically by \xend when certain events occur, with
 11.1399 -arguments to the scripts providing further contextual information.
 11.1400 -These scripts are found by default in \path{/etc/xen/scripts}.  The
 11.1401 -names and locations of the scripts can be configured in
 11.1402 -\path{/etc/xen/xend-config.sxp}.
 11.1403 -
 11.1404 -\begin{description} 
 11.1405 -
 11.1406 -\item[network:] This script is called whenever \xend is started or
 11.1407 -stopped to respectively initialise or tear down the Xen virtual
 11.1408 -network. In the default configuration initialisation creates the
 11.1409 -bridge `xen-br0' and moves eth0 onto that bridge, modifying the
 11.1410 -routing accordingly. When \xend exits, it deletes the Xen bridge and
 11.1411 -removes eth0, restoring the normal IP and routing configuration.
 11.1412 -
 11.1413 -%% In configurations where the bridge already exists, this script could
 11.1414 -%% be replaced with a link to \path{/bin/true} (for instance).
 11.1415 -
 11.1416 -\item[vif-bridge:] This script is called for every domain virtual
 11.1417 -interface and can configure firewalling rules and add the vif 
 11.1418 -to the appropriate bridge. By default, this adds and removes 
 11.1419 -VIFs on the default Xen bridge.
 11.1420 -
 11.1421 -\end{description} 
 11.1422 -
 11.1423 -For more complex network setups (e.g. where routing is required or
 11.1424 -integrate with existing bridges) these scripts may be replaced with
 11.1425 -customised variants for your site's preferred configuration.
 11.1426 -
 11.1427 -%% There are two possible types of privileges:  IO privileges and
 11.1428 -%% administration privileges.
 11.1429 -
 11.1430 -\section{Driver Domain Configuration} 
 11.1431 -
 11.1432 -I/O privileges can be assigned to allow a domain to directly access
 11.1433 -PCI devices itself.  This is used to support driver domains.
 11.1434 -
 11.1435 -Setting backend privileges is currently only supported in SXP format
 11.1436 -config files.  To allow a domain to function as a backend for others,
 11.1437 -somewhere within the {\tt vm} element of its configuration file must
 11.1438 -be a {\tt backend} element of the form {\tt (backend ({\em type}))}
 11.1439 -where {\tt \em type} may be either {\tt netif} or {\tt blkif},
 11.1440 -according to the type of virtual device this domain will service.
 11.1441 -%% After this domain has been built, \xend will connect all new and
 11.1442 -%% existing {\em virtual} devices (of the appropriate type) to that
 11.1443 -%% backend.
 11.1444 -
 11.1445 -Note that a block backend cannot currently import virtual block
 11.1446 -devices from other domains, and a network backend cannot import
 11.1447 -virtual network devices from other domains.  Thus (particularly in the
 11.1448 -case of block backends, which cannot import a virtual block device as
 11.1449 -their root filesystem), you may need to boot a backend domain from a
 11.1450 -ramdisk or a network device.
 11.1451 -
 11.1452 -Access to PCI devices may be configured on a per-device basis.  Xen
 11.1453 -will assign the minimal set of hardware privileges to a domain that
 11.1454 -are required to control its devices.  This can be configured in either
 11.1455 -format of configuration file:
 11.1456 -
 11.1457 -\begin{itemize}
 11.1458 -\item SXP Format: Include device elements of the form: \\
 11.1459 -\centerline{  {\tt (device (pci (bus {\em x}) (dev {\em y}) (func {\em z})))}} \\
 11.1460 -  inside the top-level {\tt vm} element.  Each one specifies the address
 11.1461 -  of a device this domain is allowed to access ---
 11.1462 -  the numbers {\em x},{\em y} and {\em z} may be in either decimal or
 11.1463 -  hexadecimal format.
 11.1464 -\item Flat Format: Include a list of PCI device addresses of the
 11.1465 -  format: \\ 
 11.1466 -\centerline{{\tt pci = ['x,y,z', ...]}} \\ 
 11.1467 -where each element in the
 11.1468 -  list is a string specifying the components of the PCI device
 11.1469 -  address, separated by commas.  The components ({\tt \em x}, {\tt \em
 11.1470 -  y} and {\tt \em z}) of the list may be formatted as either decimal
 11.1471 -  or hexadecimal.
 11.1472 -\end{itemize}
 11.1473 -
 11.1474 -%% \section{Administration Domains}
 11.1475 -
 11.1476 -%% Administration privileges allow a domain to use the `dom0
 11.1477 -%% operations' (so called because they are usually available only to
 11.1478 -%% domain 0).  A privileged domain can build other domains, set scheduling
 11.1479 -%% parameters, etc.
 11.1480 -
 11.1481 -% Support for other administrative domains is not yet available...  perhaps
 11.1482 -% we should plumb it in some time
 11.1483 -
 11.1484 -
 11.1485 -
 11.1486 -
 11.1487 -
 11.1488 -\section{Scheduler Configuration}
 11.1489 -\label{s:sched} 
 11.1490 -
 11.1491 -
 11.1492 -Xen offers a boot time choice between multiple schedulers.  To select
 11.1493 -a scheduler, pass the boot parameter {\em sched=sched\_name} to Xen,
 11.1494 -substituting the appropriate scheduler name.  Details of the schedulers
 11.1495 -and their parameters are included below; future versions of the tools
 11.1496 -will provide a higher-level interface to these tools.
 11.1497 +%% Chapter Control Software moved to control_software.tex
 11.1498 +\include{src/user/control_software}
 11.1499  
 11.1500 -It is expected that system administrators configure their system to
 11.1501 -use the scheduler most appropriate to their needs.  Currently, the BVT
 11.1502 -scheduler is the recommended choice. 
 11.1503 -
 11.1504 -\subsection{Borrowed Virtual Time}
 11.1505 -
 11.1506 -{\tt sched=bvt} (the default) \\ 
 11.1507 -
 11.1508 -BVT provides proportional fair shares of the CPU time.  It has been
 11.1509 -observed to penalise domains that block frequently (e.g. I/O intensive
 11.1510 -domains), but this can be compensated for by using warping. 
 11.1511 -
 11.1512 -\subsubsection{Global Parameters}
 11.1513 -
 11.1514 -\begin{description}
 11.1515 -\item[ctx\_allow]
 11.1516 -  the context switch allowance is similar to the `quantum'
 11.1517 -  in traditional schedulers.  It is the minimum time that
 11.1518 -  a scheduled domain will be allowed to run before being
 11.1519 -  pre-empted. 
 11.1520 -\end{description}
 11.1521 -
 11.1522 -\subsubsection{Per-domain parameters}
 11.1523 -
 11.1524 -\begin{description}
 11.1525 -\item[mcuadv]
 11.1526 -  the MCU (Minimum Charging Unit) advance determines the
 11.1527 -  proportional share of the CPU that a domain receives.  It
 11.1528 -  is set inversely proportionally to a domain's sharing weight.
 11.1529 -\item[warp]
 11.1530 -  the amount of `virtual time' the domain is allowed to warp
 11.1531 -  backwards
 11.1532 -\item[warpl]
 11.1533 -  the warp limit is the maximum time a domain can run warped for
 11.1534 -\item[warpu]
 11.1535 -  the unwarp requirement is the minimum time a domain must
 11.1536 -  run unwarped for before it can warp again
 11.1537 -\end{description}
 11.1538 -
 11.1539 -\subsection{Atropos}
 11.1540 -
 11.1541 -{\tt sched=atropos} \\
 11.1542 -
 11.1543 -Atropos is a soft real time scheduler.  It provides guarantees about
 11.1544 -absolute shares of the CPU, with a facility for sharing
 11.1545 -slack CPU time on a best-effort basis. It can provide timeliness
 11.1546 -guarantees for latency-sensitive domains.
 11.1547 -
 11.1548 -Every domain has an associated period and slice.  The domain should
 11.1549 -receive `slice' nanoseconds every `period' nanoseconds.  This allows
 11.1550 -the administrator to configure both the absolute share of the CPU a
 11.1551 -domain receives and the frequency with which it is scheduled. 
 11.1552 -
 11.1553 -%%  When
 11.1554 -%% domains unblock, their period is reduced to the value of the latency
 11.1555 -%% hint (the slice is scaled accordingly so that they still get the same
 11.1556 -%% proportion of the CPU).  For each subsequent period, the slice and
 11.1557 -%% period times are doubled until they reach their original values.
 11.1558 -
 11.1559 -Note: don't overcommit the CPU when using Atropos (i.e. don't reserve
 11.1560 -more CPU than is available --- the utilisation should be kept to
 11.1561 -slightly less than 100\% in order to ensure predictable behaviour).
 11.1562 -
 11.1563 -\subsubsection{Per-domain parameters}
 11.1564 -
 11.1565 -\begin{description}
 11.1566 -\item[period] The regular time interval during which a domain is
 11.1567 -  guaranteed to receive its allocation of CPU time.
 11.1568 -\item[slice]
 11.1569 -  The length of time per period that a domain is guaranteed to run
 11.1570 -  for (in the absence of voluntary yielding of the CPU). 
 11.1571 -\item[latency]
 11.1572 -  The latency hint is used to control how soon after
 11.1573 -  waking up a domain it should be scheduled.
 11.1574 -\item[xtratime] This is a boolean flag that specifies whether a domain
 11.1575 -  should be allowed a share of the system slack time.
 11.1576 -\end{description}
 11.1577 -
 11.1578 -\subsection{Round Robin}
 11.1579 -
 11.1580 -{\tt sched=rrobin} \\
 11.1581 -
 11.1582 -The round robin scheduler is included as a simple demonstration of
 11.1583 -Xen's internal scheduler API.  It is not intended for production use. 
 11.1584 -
 11.1585 -\subsubsection{Global Parameters}
 11.1586 -
 11.1587 -\begin{description}
 11.1588 -\item[rr\_slice]
 11.1589 -  The maximum time each domain runs before the next
 11.1590 -  scheduling decision is made.
 11.1591 -\end{description}
 11.1592 -
 11.1593 -
 11.1594 -
 11.1595 -
 11.1596 -
 11.1597 -
 11.1598 -
 11.1599 -
 11.1600 -
 11.1601 -
 11.1602 -
 11.1603 -
 11.1604 -\chapter{Build, Boot and Debug options} 
 11.1605 -
 11.1606 -This chapter describes the build- and boot-time options 
 11.1607 -which may be used to tailor your Xen system. 
 11.1608 -
 11.1609 -\section{Xen Build Options}
 11.1610 -
 11.1611 -Xen provides a number of build-time options which should be 
 11.1612 -set as environment variables or passed on make's command-line.  
 11.1613 -
 11.1614 -\begin{description} 
 11.1615 -\item[verbose=y] Enable debugging messages when Xen detects an unexpected condition.
 11.1616 -Also enables console output from all domains.
 11.1617 -\item[debug=y] 
 11.1618 -Enable debug assertions.  Implies {\bf verbose=y}.
 11.1619 -(Primarily useful for tracing bugs in Xen).       
 11.1620 -\item[debugger=y] 
 11.1621 -Enable the in-Xen debugger. This can be used to debug 
 11.1622 -Xen, guest OSes, and applications.
 11.1623 -\item[perfc=y] 
 11.1624 -Enable performance counters for significant events
 11.1625 -within Xen. The counts can be reset or displayed
 11.1626 -on Xen's console via console control keys.
 11.1627 -\item[trace=y] 
 11.1628 -Enable per-cpu trace buffers which log a range of
 11.1629 -events within Xen for collection by control
 11.1630 -software. 
 11.1631 -\end{description} 
 11.1632 -
 11.1633 -\section{Xen Boot Options}
 11.1634 -\label{s:xboot}
 11.1635 -
 11.1636 -These options are used to configure Xen's behaviour at runtime.  They
 11.1637 -should be appended to Xen's command line, either manually or by
 11.1638 -editing \path{grub.conf}.
 11.1639 -
 11.1640 -\begin{description}
 11.1641 -\item [noreboot ] 
 11.1642 - Don't reboot the machine automatically on errors.  This is
 11.1643 - useful to catch debug output if you aren't catching console messages
 11.1644 - via the serial line. 
 11.1645 -
 11.1646 -\item [nosmp ] 
 11.1647 - Disable SMP support.
 11.1648 - This option is implied by `ignorebiostables'. 
 11.1649 -
 11.1650 -\item [watchdog ] 
 11.1651 - Enable NMI watchdog which can report certain failures. 
 11.1652 -
 11.1653 -\item [noirqbalance ] 
 11.1654 - Disable software IRQ balancing and affinity. This can be used on
 11.1655 - systems such as Dell 1850/2850 that have workarounds in hardware for
 11.1656 - IRQ-routing issues.
 11.1657 +%% Chapter Domain Configuration moved to domain_configuration.tex
 11.1658 +\include{src/user/domain_configuration}
 11.1659  
 11.1660 -\item [badpage=$<$page number$>$,$<$page number$>$, \ldots ] 
 11.1661 - Specify a list of pages not to be allocated for use 
 11.1662 - because they contain bad bytes. For example, if your
 11.1663 - memory tester says that byte 0x12345678 is bad, you would
 11.1664 - place `badpage=0x12345' on Xen's command line. 
 11.1665 -
 11.1666 -\item [com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$
 11.1667 - com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\ 
 11.1668 - Xen supports up to two 16550-compatible serial ports.
 11.1669 - For example: `com1=9600, 8n1, 0x408, 5' maps COM1 to a
 11.1670 - 9600-baud port, 8 data bits, no parity, 1 stop bit,
 11.1671 - I/O port base 0x408, IRQ 5.
 11.1672 - If some configuration options are standard (e.g., I/O base and IRQ),
 11.1673 - then only a prefix of the full configuration string need be
 11.1674 - specified. If the baud rate is pre-configured (e.g., by the
 11.1675 - bootloader) then you can specify `auto' in place of a numeric baud
 11.1676 - rate. 
 11.1677 -
 11.1678 -\item [console=$<$specifier list$>$ ] 
 11.1679 - Specify the destination for Xen console I/O.
 11.1680 - This is a comma-separated list of, for example:
 11.1681 -\begin{description}
 11.1682 - \item[vga]  use VGA console and allow keyboard input
 11.1683 - \item[com1] use serial port com1
 11.1684 - \item[com2H] use serial port com2. Transmitted chars will
 11.1685 -   have the MSB set. Received chars must have
 11.1686 -   MSB set.
 11.1687 - \item[com2L] use serial port com2. Transmitted chars will
 11.1688 -   have the MSB cleared. Received chars must
 11.1689 -   have MSB cleared.
 11.1690 -\end{description}
 11.1691 - The latter two examples allow a single port to be
 11.1692 - shared by two subsystems (e.g. console and
 11.1693 - debugger). Sharing is controlled by MSB of each
 11.1694 - transmitted/received character.
 11.1695 - [NB. Default for this option is `com1,vga'] 
 11.1696 -
 11.1697 -\item [sync\_console ]
 11.1698 - Force synchronous console output. This is useful if you system fails
 11.1699 - unexpectedly before it has sent all available output to the
 11.1700 - console. In most cases Xen will automatically enter synchronous mode
 11.1701 - when an exceptional event occurs, but this option provides a manual
 11.1702 - fallback.
 11.1703 -
 11.1704 -\item [conswitch=$<$switch-char$><$auto-switch-char$>$ ] 
 11.1705 - Specify how to switch serial-console input between
 11.1706 - Xen and DOM0. The required sequence is CTRL-$<$switch-char$>$
 11.1707 - pressed three times. Specifying the backtick character 
 11.1708 - disables switching.
 11.1709 - The $<$auto-switch-char$>$ specifies whether Xen should
 11.1710 - auto-switch input to DOM0 when it boots --- if it is `x'
 11.1711 - then auto-switching is disabled.  Any other value, or
 11.1712 - omitting the character, enables auto-switching.
 11.1713 - [NB. default switch-char is `a'] 
 11.1714 -
 11.1715 -\item [nmi=xxx ] 
 11.1716 - Specify what to do with an NMI parity or I/O error. \\
 11.1717 - `nmi=fatal':  Xen prints a diagnostic and then hangs. \\
 11.1718 - `nmi=dom0':   Inform DOM0 of the NMI. \\
 11.1719 - `nmi=ignore': Ignore the NMI. 
 11.1720 -
 11.1721 -\item [mem=xxx ]
 11.1722 - Set the physical RAM address limit. Any RAM appearing beyond this
 11.1723 - physical address in the memory map will be ignored. This parameter
 11.1724 - may be specified with a B, K, M or G suffix, representing bytes,
 11.1725 - kilobytes, megabytes and gigabytes respectively. The
 11.1726 - default unit, if no suffix is specified, is kilobytes.
 11.1727 -
 11.1728 -\item [dom0\_mem=xxx ] 
 11.1729 - Set the amount of memory to be allocated to domain0. In Xen 3.x the parameter
 11.1730 - may be specified with a B, K, M or G suffix, representing bytes,
 11.1731 - kilobytes, megabytes and gigabytes respectively; if no suffix is specified, 
 11.1732 - the parameter defaults to kilobytes. In previous versions of Xen, suffixes
 11.1733 - were not supported and the value is always interpreted as kilobytes. 
 11.1734 -
 11.1735 -\item [tbuf\_size=xxx ] 
 11.1736 - Set the size of the per-cpu trace buffers, in pages
 11.1737 - (default 1).  Note that the trace buffers are only
 11.1738 - enabled in debug builds.  Most users can ignore
 11.1739 - this feature completely. 
 11.1740 -
 11.1741 -\item [sched=xxx ] 
 11.1742 - Select the CPU scheduler Xen should use.  The current
 11.1743 - possibilities are `bvt' (default), `atropos' and `rrobin'. 
 11.1744 - For more information see Section~\ref{s:sched}. 
 11.1745 -
 11.1746 -\item [apic\_verbosity=debug,verbose ]
 11.1747 - Print more detailed information about local APIC and IOAPIC configuration.
 11.1748 -
 11.1749 -\item [lapic ]
 11.1750 - Force use of local APIC even when left disabled by uniprocessor BIOS.
 11.1751 -
 11.1752 -\item [nolapic ]
 11.1753 - Ignore local APIC in a uniprocessor system, even if enabled by the BIOS.
 11.1754 -
 11.1755 -\item [apic=bigsmp,default,es7000,summit ]
 11.1756 - Specify NUMA platform. This can usually be probed automatically.
 11.1757 -
 11.1758 -\end{description} 
 11.1759 -
 11.1760 -In addition, the following options may be specified on the Xen command
 11.1761 -line. Since domain 0 shares responsibility for booting the platform,
 11.1762 -Xen will automatically propagate these options to its command
 11.1763 -line. These options are taken from Linux's command-line syntax with
 11.1764 -unchanged semantics.
 11.1765 -
 11.1766 -\begin{description}
 11.1767 -\item [acpi=off,force,strict,ht,noirq,\ldots ] 
 11.1768 - Modify how Xen (and domain 0) parses the BIOS ACPI tables.
 11.1769 -
 11.1770 -\item [acpi\_skip\_timer\_override ]
 11.1771 - Instruct Xen (and domain 0) to ignore timer-interrupt override
 11.1772 - instructions specified by the BIOS ACPI tables.
 11.1773 -
 11.1774 -\item [noapic ]
 11.1775 - Instruct Xen (and domain 0) to ignore any IOAPICs that are present in
 11.1776 - the system, and instead continue to use the legacy PIC.
 11.1777 -
 11.1778 -\end{description} 
 11.1779 -
 11.1780 -\section{XenLinux Boot Options}
 11.1781 -
 11.1782 -In addition to the standard Linux kernel boot options, we support: 
 11.1783 -\begin{description} 
 11.1784 -\item[xencons=xxx ] Specify the device node to which the Xen virtual
 11.1785 -console driver is attached. The following options are supported:
 11.1786 -\begin{center}
 11.1787 -\begin{tabular}{l}
 11.1788 -`xencons=off': disable virtual console \\ 
 11.1789 -`xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\
 11.1790 -`xencons=ttyS': attach console to /dev/ttyS0
 11.1791 -\end{tabular}
 11.1792 -\end{center}
 11.1793 -The default is ttyS for dom0 and tty for all other domains.
 11.1794 -\end{description} 
 11.1795 -
 11.1796 -
 11.1797 -
 11.1798 -\section{Debugging}
 11.1799 -\label{s:keys} 
 11.1800 -
 11.1801 -Xen has a set of debugging features that can be useful to try and
 11.1802 -figure out what's going on. Hit 'h' on the serial line (if you
 11.1803 -specified a baud rate on the Xen command line) or ScrollLock-h on the
 11.1804 -keyboard to get a list of supported commands.
 11.1805 -
 11.1806 -If you have a crash you'll likely get a crash dump containing an EIP
 11.1807 -(PC) which, along with an \path{objdump -d image}, can be useful in
 11.1808 -figuring out what's happened.  Debug a Xenlinux image just as you
 11.1809 -would any other Linux kernel.
 11.1810 -
 11.1811 -%% We supply a handy debug terminal program which you can find in
 11.1812 -%% \path{/usr/local/src/xen-2.0.bk/tools/misc/miniterm/}
 11.1813 -%% This should be built and executed on another machine that is connected
 11.1814 -%% via a null modem cable. Documentation is included.
 11.1815 -%% Alternatively, if the Xen machine is connected to a serial-port server
 11.1816 -%% then we supply a dumb TCP terminal client, {\tt xencons}.
 11.1817 -
 11.1818 -
 11.1819 +%% Chapter Build, Boot and Debug Options moved to build.tex
 11.1820 +\include{src/user/build}
 11.1821  
 11.1822  
 11.1823  \chapter{Further Support}
 11.1824 @@ -1875,6 +108,7 @@ directory of the Xen source distribution
 11.1825  %Various HOWTOs are available in \path{docs/HOWTOS} but this content is
 11.1826  %being integrated into this manual.
 11.1827  
 11.1828 +
 11.1829  \section{Online References}
 11.1830  
 11.1831  The official Xen web site is found at:
 11.1832 @@ -1885,6 +119,7 @@ The official Xen web site is found at:
 11.1833  This contains links to the latest versions of all on-line 
 11.1834  documentation (including the lateset version of the FAQ). 
 11.1835  
 11.1836 +
 11.1837  \section{Mailing Lists}
 11.1838  
 11.1839  There are currently four official Xen mailing lists:
 11.1840 @@ -1905,326 +140,18 @@ from the unstable and 2.0 trees - develo
 11.1841  \end{description}
 11.1842  
 11.1843  
 11.1844 +
 11.1845  \appendix
 11.1846  
 11.1847 +%% Chapter Installing Xen / XenLinux on Debian moved to debian.tex
 11.1848 +\include{src/user/debian}
 11.1849 +
 11.1850 +%% Chapter Installing Xen on Red Hat moved to redhat.tex
 11.1851 +\include{src/user/redhat}
 11.1852 +
 11.1853  
 11.1854 -\chapter{Installing Xen / XenLinux on Debian}
 11.1855 -
 11.1856 -The Debian project provides a tool called \path{debootstrap} which
 11.1857 -allows a base Debian system to be installed into a filesystem without
 11.1858 -requiring the host system to have any Debian-specific software (such
 11.1859 -as \path{apt}. 
 11.1860 -
 11.1861 -Here's some info how to install Debian 3.1 (Sarge) for an unprivileged
 11.1862 -Xen domain:
 11.1863 -
 11.1864 -\begin{enumerate}
 11.1865 -\item Set up Xen 2.0 and test that it's working, as described earlier in
 11.1866 -      this manual.
 11.1867 -
 11.1868 -\item Create disk images for root-fs and swap (alternatively, you
 11.1869 -      might create dedicated partitions, LVM logical volumes, etc. if
 11.1870 -      that suits your setup).
 11.1871 -\begin{small}\begin{verbatim}  
 11.1872 -dd if=/dev/zero of=/path/diskimage bs=1024k count=size_in_mbytes
 11.1873 -dd if=/dev/zero of=/path/swapimage bs=1024k count=size_in_mbytes
 11.1874 -\end{verbatim}\end{small}
 11.1875 -      If you're going to use this filesystem / disk image only as a
 11.1876 -      `template' for other vm disk images, something like 300 MB should
 11.1877 -      be enough.. (of course it depends what kind of packages you are
 11.1878 -      planning to install to the template)
 11.1879 -
 11.1880 -\item Create the filesystem and initialise the swap image
 11.1881 -\begin{small}\begin{verbatim}
 11.1882 -mkfs.ext3 /path/diskimage
 11.1883 -mkswap /path/swapimage
 11.1884 -\end{verbatim}\end{small}
 11.1885 -
 11.1886 -\item Mount the disk image for installation
 11.1887 -\begin{small}\begin{verbatim}
 11.1888 -mount -o loop /path/diskimage /mnt/disk
 11.1889 -\end{verbatim}\end{small}
 11.1890 -
 11.1891 -\item Install \path{debootstrap}
 11.1892 -
 11.1893 -Make sure you have debootstrap installed on the host.  If you are
 11.1894 -running Debian sarge (3.1 / testing) or unstable you can install it by
 11.1895 -running \path{apt-get install debootstrap}.  Otherwise, it can be
 11.1896 -downloaded from the Debian project website.
 11.1897 -
 11.1898 -\item Install Debian base to the disk image:
 11.1899 -\begin{small}\begin{verbatim}
 11.1900 -debootstrap --arch i386 sarge /mnt/disk  \
 11.1901 -            http://ftp.<countrycode>.debian.org/debian
 11.1902 -\end{verbatim}\end{small}
 11.1903 -
 11.1904 -You can use any other Debian http/ftp mirror you want.
 11.1905 -
 11.1906 -\item When debootstrap completes successfully, modify settings:
 11.1907 -\begin{small}\begin{verbatim}
 11.1908 -chroot /mnt/disk /bin/bash
 11.1909 -\end{verbatim}\end{small}
 11.1910 -
 11.1911 -Edit the following files using vi or nano and make needed changes:
 11.1912 -\begin{small}\begin{verbatim}
 11.1913 -/etc/hostname
 11.1914 -/etc/hosts
 11.1915 -/etc/resolv.conf
 11.1916 -/etc/network/interfaces
 11.1917 -/etc/networks
 11.1918 -\end{verbatim}\end{small}
 11.1919 -
 11.1920 -Set up access to the services, edit:
 11.1921 -\begin{small}\begin{verbatim}
 11.1922 -/etc/hosts.deny
 11.1923 -/etc/hosts.allow
 11.1924 -/etc/inetd.conf
 11.1925 -\end{verbatim}\end{small}
 11.1926 -
 11.1927 -Add Debian mirror to:   
 11.1928 -\begin{small}\begin{verbatim}
 11.1929 -/etc/apt/sources.list
 11.1930 -\end{verbatim}\end{small}
 11.1931 -
 11.1932 -Create fstab like this:
 11.1933 -\begin{small}\begin{verbatim}
 11.1934 -/dev/sda1       /       ext3    errors=remount-ro       0       1
 11.1935 -/dev/sda2       none    swap    sw                      0       0
 11.1936 -proc            /proc   proc    defaults                0       0
 11.1937 -\end{verbatim}\end{small}
 11.1938 -
 11.1939 -Logout
 11.1940 -
 11.1941 -\item      Unmount the disk image
 11.1942 -\begin{small}\begin{verbatim}
 11.1943 -umount /mnt/disk
 11.1944 -\end{verbatim}\end{small}
 11.1945 -
 11.1946 -\item Create Xen 2.0 configuration file for the new domain. You can
 11.1947 -        use the example-configurations coming with Xen as a template.
 11.1948 -
 11.1949 -        Make sure you have the following set up:
 11.1950 -\begin{small}\begin{verbatim}
 11.1951 -disk = [ 'file:/path/diskimage,sda1,w', 'file:/path/swapimage,sda2,w' ]
 11.1952 -root = "/dev/sda1 ro"
 11.1953 -\end{verbatim}\end{small}
 11.1954 -
 11.1955 -\item Start the new domain
 11.1956 -\begin{small}\begin{verbatim}
 11.1957 -xm create -f domain_config_file
 11.1958 -\end{verbatim}\end{small}
 11.1959 -
 11.1960 -Check that the new domain is running:
 11.1961 -\begin{small}\begin{verbatim}
 11.1962 -xm list
 11.1963 -\end{verbatim}\end{small}
 11.1964 -
 11.1965 -\item   Attach to the console of the new domain.
 11.1966 -        You should see something like this when starting the new domain:
 11.1967 -
 11.1968 -\begin{small}\begin{verbatim}
 11.1969 -Started domain testdomain2, console on port 9626
 11.1970 -\end{verbatim}\end{small}
 11.1971 -        
 11.1972 -        There you can see the ID of the console: 26. You can also list
 11.1973 -        the consoles with \path{xm consoles} (ID is the last two
 11.1974 -        digits of the port number.)
 11.1975 -
 11.1976 -        Attach to the console:
 11.1977 -
 11.1978 -\begin{small}\begin{verbatim}
 11.1979 -xm console 26
 11.1980 -\end{verbatim}\end{small}
 11.1981 -
 11.1982 -        or by telnetting to the port 9626 of localhost (the xm console
 11.1983 -        program works better).
 11.1984 -
 11.1985 -\item   Log in and run base-config
 11.1986 -
 11.1987 -        As a default there's no password for the root.
 11.1988 -
 11.1989 -        Check that everything looks OK, and the system started without
 11.1990 -        errors.  Check that the swap is active, and the network settings are
 11.1991 -        correct.
 11.1992 -
 11.1993 -        Run \path{/usr/sbin/base-config} to set up the Debian settings.
 11.1994 -
 11.1995 -        Set up the password for root using passwd.
 11.1996 -
 11.1997 -\item     Done. You can exit the console by pressing \path{Ctrl + ]}
 11.1998 -
 11.1999 -\end{enumerate}
 11.2000 -
 11.2001 -If you need to create new domains, you can just copy the contents of
 11.2002 -the `template'-image to the new disk images, either by mounting the
 11.2003 -template and the new image, and using \path{cp -a} or \path{tar} or by
 11.2004 -simply copying the image file.  Once this is done, modify the
 11.2005 -image-specific settings (hostname, network settings, etc).
 11.2006 -
 11.2007 -\chapter{Installing Xen / XenLinux on Redhat or Fedora Core}
 11.2008 -
 11.2009 -When using Xen / XenLinux on a standard Linux distribution there are
 11.2010 -a couple of things to watch out for:
 11.2011 -
 11.2012 -Note that, because domains>0 don't have any privileged access at all,
 11.2013 -certain commands in the default boot sequence will fail e.g. attempts
 11.2014 -to update the hwclock, change the console font, update the keytable
 11.2015 -map, start apmd (power management), or gpm (mouse cursor).  Either
 11.2016 -ignore the errors (they should be harmless), or remove them from the
 11.2017 -startup scripts.  Deleting the following links are a good start:
 11.2018 -{\path{S24pcmcia}}, {\path{S09isdn}},
 11.2019 -{\path{S17keytable}}, {\path{S26apmd}},
 11.2020 -{\path{S85gpm}}.
 11.2021 -
 11.2022 -If you want to use a single root file system that works cleanly for
 11.2023 -both domain 0 and unprivileged domains, a useful trick is to use
 11.2024 -different 'init' run levels. For example, use
 11.2025 -run level 3 for domain 0, and run level 4 for other domains. This
 11.2026 -enables different startup scripts to be run in depending on the run
 11.2027 -level number passed on the kernel command line.
 11.2028 -
 11.2029 -If using NFS root files systems mounted either from an
 11.2030 -external server or from domain0 there are a couple of other gotchas.
 11.2031 -The default {\path{/etc/sysconfig/iptables}} rules block NFS, so part
 11.2032 -way through the boot sequence things will suddenly go dead.
 11.2033 -
 11.2034 -If you're planning on having a separate NFS {\path{/usr}} partition, the
 11.2035 -RH9 boot scripts don't make life easy - they attempt to mount NFS file
 11.2036 -systems way to late in the boot process. The easiest way I found to do
 11.2037 -this was to have a {\path{/linuxrc}} script run ahead of
 11.2038 -{\path{/sbin/init}} that mounts {\path{/usr}}:
 11.2039 -
 11.2040 -\begin{quote}
 11.2041 -\begin{small}\begin{verbatim}
 11.2042 - #!/bin/bash
 11.2043 - /sbin/ipconfig lo 127.0.0.1
 11.2044 - /sbin/portmap
 11.2045 - /bin/mount /usr
 11.2046 - exec /sbin/init "$@" <>/dev/console 2>&1
 11.2047 -\end{verbatim}\end{small}
 11.2048 -\end{quote}
 11.2049 -
 11.2050 -%$ XXX SMH: font lock fix :-)  
 11.2051 -
 11.2052 -The one slight complication with the above is that
 11.2053 -{\path{/sbin/portmap}} is dynamically linked against
 11.2054 -{\path{/usr/lib/libwrap.so.0}} Since this is in
 11.2055 -{\path{/usr}}, it won't work. This can be solved by copying the
 11.2056 -file (and link) below the /usr mount point, and just let the file be
 11.2057 -'covered' when the mount happens.
 11.2058 -
 11.2059 -In some installations, where a shared read-only {\path{/usr}} is
 11.2060 -being used, it may be desirable to move other large directories over
 11.2061 -into the read-only {\path{/usr}}. For example, you might replace
 11.2062 -{\path{/bin}}, {\path{/lib}} and {\path{/sbin}} with
 11.2063 -links into {\path{/usr/root/bin}}, {\path{/usr/root/lib}}
 11.2064 -and {\path{/usr/root/sbin}} respectively. This creates other
 11.2065 -problems for running the {\path{/linuxrc}} script, requiring
 11.2066 -bash, portmap, mount, ifconfig, and a handful of other shared
 11.2067 -libraries to be copied below the mount point --- a simple
 11.2068 -statically-linked C program would solve this problem.
 11.2069 -
 11.2070 -
 11.2071 -
 11.2072 -
 11.2073 -\chapter{Glossary of Terms}
 11.2074 -
 11.2075 -\begin{description}
 11.2076 -\item[Atropos]             One of the CPU schedulers provided by Xen.
 11.2077 -                           Atropos provides domains with absolute shares
 11.2078 -                           of the CPU, with timeliness guarantees and a
 11.2079 -                           mechanism for sharing out `slack time'.
 11.2080 -
 11.2081 -\item[BVT]                 The BVT scheduler is used to give proportional
 11.2082 -                           fair shares of the CPU to domains.
 11.2083 -
 11.2084 -\item[Exokernel]           A minimal piece of privileged code, similar to
 11.2085 -                           a {\bf microkernel} but providing a more
 11.2086 -                           `hardware-like' interface to the tasks it
 11.2087 -                           manages.  This is similar to a paravirtualising
 11.2088 -                           VMM like {\bf Xen} but was designed as a new
 11.2089 -                           operating system structure, rather than
 11.2090 -                           specifically to run multiple conventional OSs.
 11.2091 -
 11.2092 -\item[Domain]              A domain is the execution context that
 11.2093 -                           contains a running {\bf virtual machine}.
 11.2094 -                           The relationship between virtual machines
 11.2095 -                           and domains on Xen is similar to that between
 11.2096 -                           programs and processes in an operating
 11.2097 -                           system: a virtual machine is a persistent
 11.2098 -                           entity that resides on disk (somewhat like
 11.2099 -                           a program).  When it is loaded for execution,
 11.2100 -                           it runs in a domain.  Each domain has a
 11.2101 -                           {\bf domain ID}.
 11.2102 -
 11.2103 -\item[Domain 0]            The first domain to be started on a Xen
 11.2104 -                           machine.  Domain 0 is responsible for managing
 11.2105 -                           the system.
 11.2106 -
 11.2107 -\item[Domain ID]           A unique identifier for a {\bf domain},
 11.2108 -                           analogous to a process ID in an operating
 11.2109 -                           system.
 11.2110 -
 11.2111 -\item[Full virtualisation] An approach to virtualisation which
 11.2112 -                           requires no modifications to the hosted
 11.2113 -                           operating system, providing the illusion of
 11.2114 -                           a complete system of real hardware devices.
 11.2115 -
 11.2116 -\item[Hypervisor]          An alternative term for {\bf VMM}, used
 11.2117 -                           because it means `beyond supervisor',
 11.2118 -                           since it is responsible for managing multiple
 11.2119 -                           `supervisor' kernels.
 11.2120 -
 11.2121 -\item[Live migration]      A technique for moving a running virtual
 11.2122 -                           machine to another physical host, without
 11.2123 -                           stopping it or the services running on it.
 11.2124 -
 11.2125 -\item[Microkernel]         A small base of code running at the highest
 11.2126 -                           hardware privilege level.  A microkernel is
 11.2127 -                           responsible for sharing CPU and memory (and
 11.2128 -                           sometimes other devices) between less
 11.2129 -                           privileged tasks running on the system.
 11.2130 -                           This is similar to a VMM, particularly a
 11.2131 -                           {\bf paravirtualising} VMM but typically
 11.2132 -                           addressing a different problem space and
 11.2133 -                           providing different kind of interface.
 11.2134 -
 11.2135 -\item[NetBSD/Xen]          A port of NetBSD to the Xen architecture.
 11.2136 -
 11.2137 -\item[Paravirtualisation]  An approach to virtualisation which requires
 11.2138 -                           modifications to the operating system in
 11.2139 -                           order to run in a virtual machine.  Xen
 11.2140 -                           uses paravirtualisation but preserves
 11.2141 -                           binary compatibility for user space
 11.2142 -                           applications.
 11.2143 -
 11.2144 -\item[Shadow pagetables]   A technique for hiding the layout of machine
 11.2145 -                           memory from a virtual machine's operating
 11.2146 -                           system.  Used in some {\bf VMMs} to provide
 11.2147 -                           the illusion of contiguous physical memory,
 11.2148 -                           in Xen this is used during
 11.2149 -                           {\bf live migration}.
 11.2150 -
 11.2151 -\item[Virtual Machine]     The environment in which a hosted operating
 11.2152 -                           system runs, providing the abstraction of a
 11.2153 -                           dedicated machine.  A virtual machine may
 11.2154 -                           be identical to the underlying hardware (as
 11.2155 -                           in {\bf full virtualisation}, or it may
 11.2156 -                           differ, as in {\bf paravirtualisation}.
 11.2157 -
 11.2158 -\item[VMM]                 Virtual Machine Monitor - the software that
 11.2159 -                           allows multiple virtual machines to be
 11.2160 -                           multiplexed on a single physical machine.
 11.2161 -
 11.2162 -\item[Xen]                 Xen is a paravirtualising virtual machine
 11.2163 -                           monitor, developed primarily by the
 11.2164 -                           Systems Research Group at the University
 11.2165 -                           of Cambridge Computer Laboratory.
 11.2166 -
 11.2167 -\item[XenLinux]            Official name for the port of the Linux kernel
 11.2168 -                           that runs on Xen.
 11.2169 -
 11.2170 -\end{description}
 11.2171 +%% Chapter Glossary of Terms moved to glossary.tex
 11.2172 +\include{src/user/glossary}
 11.2173  
 11.2174  
 11.2175  \end{document}
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/docs/src/user/build.tex	Tue Sep 20 09:43:46 2005 +0000
    12.3 @@ -0,0 +1,170 @@
    12.4 +\chapter{Build, Boot and Debug Options} 
    12.5 +
    12.6 +This chapter describes the build- and boot-time options which may be
    12.7 +used to tailor your Xen system.
    12.8 +
    12.9 +
   12.10 +\section{Xen Build Options}
   12.11 +
   12.12 +Xen provides a number of build-time options which should be set as
   12.13 +environment variables or passed on make's command-line.
   12.14 +
   12.15 +\begin{description}
   12.16 +\item[verbose=y] Enable debugging messages when Xen detects an
   12.17 +  unexpected condition.  Also enables console output from all domains.
   12.18 +\item[debug=y] Enable debug assertions.  Implies {\bf verbose=y}.
   12.19 +  (Primarily useful for tracing bugs in Xen).
   12.20 +\item[debugger=y] Enable the in-Xen debugger. This can be used to
   12.21 +  debug Xen, guest OSes, and applications.
   12.22 +\item[perfc=y] Enable performance counters for significant events
   12.23 +  within Xen. The counts can be reset or displayed on Xen's console
   12.24 +  via console control keys.
   12.25 +\item[trace=y] Enable per-cpu trace buffers which log a range of
   12.26 +  events within Xen for collection by control software.
   12.27 +\end{description}
   12.28 +
   12.29 +
   12.30 +\section{Xen Boot Options}
   12.31 +\label{s:xboot}
   12.32 +
   12.33 +These options are used to configure Xen's behaviour at runtime.  They
   12.34 +should be appended to Xen's command line, either manually or by
   12.35 +editing \path{grub.conf}.
   12.36 +
   12.37 +\begin{description}
   12.38 +\item [ noreboot ] Don't reboot the machine automatically on errors.
   12.39 +  This is useful to catch debug output if you aren't catching console
   12.40 +  messages via the serial line.
   12.41 +\item [ nosmp ] Disable SMP support.  This option is implied by
   12.42 +  `ignorebiostables'.
   12.43 +\item [ watchdog ] Enable NMI watchdog which can report certain
   12.44 +  failures.
   12.45 +\item [ noirqbalance ] Disable software IRQ balancing and affinity.
   12.46 +  This can be used on systems such as Dell 1850/2850 that have
   12.47 +  workarounds in hardware for IRQ-routing issues.
   12.48 +\item [ badpage=$<$page number$>$,$<$page number$>$, \ldots ] Specify
   12.49 +  a list of pages not to be allocated for use because they contain bad
   12.50 +  bytes. For example, if your memory tester says that byte 0x12345678
   12.51 +  is bad, you would place `badpage=0x12345' on Xen's command line.
   12.52 +\item [ com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$
   12.53 +  com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\
   12.54 +  Xen supports up to two 16550-compatible serial ports.  For example:
   12.55 +  `com1=9600, 8n1, 0x408, 5' maps COM1 to a 9600-baud port, 8 data
   12.56 +  bits, no parity, 1 stop bit, I/O port base 0x408, IRQ 5.  If some
   12.57 +  configuration options are standard (e.g., I/O base and IRQ), then
   12.58 +  only a prefix of the full configuration string need be specified. If
   12.59 +  the baud rate is pre-configured (e.g., by the bootloader) then you
   12.60 +  can specify `auto' in place of a numeric baud rate.
   12.61 +\item [ console=$<$specifier list$>$ ] Specify the destination for Xen
   12.62 +  console I/O.  This is a comma-separated list of, for example:
   12.63 +  \begin{description}
   12.64 +  \item[ vga ] Use VGA console and allow keyboard input.
   12.65 +  \item[ com1 ] Use serial port com1.
   12.66 +  \item[ com2H ] Use serial port com2. Transmitted chars will have the
   12.67 +    MSB set. Received chars must have MSB set.
   12.68 +  \item[ com2L] Use serial port com2. Transmitted chars will have the
   12.69 +    MSB cleared. Received chars must have MSB cleared.
   12.70 +  \end{description}
   12.71 +  The latter two examples allow a single port to be shared by two
   12.72 +  subsystems (e.g.\ console and debugger). Sharing is controlled by
   12.73 +  MSB of each transmitted/received character.  [NB. Default for this
   12.74 +  option is `com1,vga']
   12.75 +\item [ sync\_console ] Force synchronous console output. This is
   12.76 +  useful if you system fails unexpectedly before it has sent all
   12.77 +  available output to the console. In most cases Xen will
   12.78 +  automatically enter synchronous mode when an exceptional event
   12.79 +  occurs, but this option provides a manual fallback.
   12.80 +\item [ conswitch=$<$switch-char$><$auto-switch-char$>$ ] Specify how
   12.81 +  to switch serial-console input between Xen and DOM0. The required
   12.82 +  sequence is CTRL-$<$switch-char$>$ pressed three times. Specifying
   12.83 +  the backtick character disables switching.  The
   12.84 +  $<$auto-switch-char$>$ specifies whether Xen should auto-switch
   12.85 +  input to DOM0 when it boots --- if it is `x' then auto-switching is
   12.86 +  disabled.  Any other value, or omitting the character, enables
   12.87 +  auto-switching.  [NB. Default switch-char is `a'.]
   12.88 +\item [ nmi=xxx ]
   12.89 +  Specify what to do with an NMI parity or I/O error. \\
   12.90 +  `nmi=fatal':  Xen prints a diagnostic and then hangs. \\
   12.91 +  `nmi=dom0':   Inform DOM0 of the NMI. \\
   12.92 +  `nmi=ignore': Ignore the NMI.
   12.93 +\item [ mem=xxx ] Set the physical RAM address limit. Any RAM
   12.94 +  appearing beyond this physical address in the memory map will be
   12.95 +  ignored. This parameter may be specified with a B, K, M or G suffix,
   12.96 +  representing bytes, kilobytes, megabytes and gigabytes respectively.
   12.97 +  The default unit, if no suffix is specified, is kilobytes.
   12.98 +\item [ dom0\_mem=xxx ] Set the amount of memory to be allocated to
   12.99 +  domain0. In Xen 3.x the parameter may be specified with a B, K, M or
  12.100 +  G suffix, representing bytes, kilobytes, megabytes and gigabytes
  12.101 +  respectively; if no suffix is specified, the parameter defaults to
  12.102 +  kilobytes. In previous versions of Xen, suffixes were not supported
  12.103 +  and the value is always interpreted as kilobytes.
  12.104 +\item [ tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in
  12.105 +  pages (default 1).  Note that the trace buffers are only enabled in
  12.106 +  debug builds.  Most users can ignore this feature completely.
  12.107 +\item [ sched=xxx ] Select the CPU scheduler Xen should use.  The
  12.108 +  current possibilities are `bvt' (default), `atropos' and `rrobin'.
  12.109 +  For more information see Section~\ref{s:sched}.
  12.110 +\item [ apic\_verbosity=debug,verbose ] Print more detailed
  12.111 +  information about local APIC and IOAPIC configuration.
  12.112 +\item [ lapic ] Force use of local APIC even when left disabled by
  12.113 +  uniprocessor BIOS.
  12.114 +\item [ nolapic ] Ignore local APIC in a uniprocessor system, even if
  12.115 +  enabled by the BIOS.
  12.116 +\item [ apic=bigsmp,default,es7000,summit ] Specify NUMA platform.
  12.117 +  This can usually be probed automatically.
  12.118 +\end{description}
  12.119 +
  12.120 +In addition, the following options may be specified on the Xen command
  12.121 +line. Since domain 0 shares responsibility for booting the platform,
  12.122 +Xen will automatically propagate these options to its command line.
  12.123 +These options are taken from Linux's command-line syntax with
  12.124 +unchanged semantics.
  12.125 +
  12.126 +\begin{description}
  12.127 +\item [ acpi=off,force,strict,ht,noirq,\ldots ] Modify how Xen (and
  12.128 +  domain 0) parses the BIOS ACPI tables.
  12.129 +\item [ acpi\_skip\_timer\_override ] Instruct Xen (and domain~0) to
  12.130 +  ignore timer-interrupt override instructions specified by the BIOS
  12.131 +  ACPI tables.
  12.132 +\item [ noapic ] Instruct Xen (and domain~0) to ignore any IOAPICs
  12.133 +  that are present in the system, and instead continue to use the
  12.134 +  legacy PIC.
  12.135 +\end{description} 
  12.136 +
  12.137 +
  12.138 +\section{XenLinux Boot Options}
  12.139 +
  12.140 +In addition to the standard Linux kernel boot options, we support:
  12.141 +\begin{description}
  12.142 +\item[ xencons=xxx ] Specify the device node to which the Xen virtual
  12.143 +  console driver is attached. The following options are supported:
  12.144 +  \begin{center}
  12.145 +    \begin{tabular}{l}
  12.146 +      `xencons=off': disable virtual console \\
  12.147 +      `xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\
  12.148 +      `xencons=ttyS': attach console to /dev/ttyS0
  12.149 +    \end{tabular}
  12.150 +\end{center}
  12.151 +The default is ttyS for dom0 and tty for all other domains.
  12.152 +\end{description}
  12.153 +
  12.154 +
  12.155 +\section{Debugging}
  12.156 +\label{s:keys}
  12.157 +
  12.158 +Xen has a set of debugging features that can be useful to try and
  12.159 +figure out what's going on. Hit `h' on the serial line (if you
  12.160 +specified a baud rate on the Xen command line) or ScrollLock-h on the
  12.161 +keyboard to get a list of supported commands.
  12.162 +
  12.163 +If you have a crash you'll likely get a crash dump containing an EIP
  12.164 +(PC) which, along with an \path{objdump -d image}, can be useful in
  12.165 +figuring out what's happened.  Debug a Xenlinux image just as you
  12.166 +would any other Linux kernel.
  12.167 +
  12.168 +%% We supply a handy debug terminal program which you can find in
  12.169 +%% \path{/usr/local/src/xen-2.0.bk/tools/misc/miniterm/} This should
  12.170 +%% be built and executed on another machine that is connected via a
  12.171 +%% null modem cable. Documentation is included.  Alternatively, if the
  12.172 +%% Xen machine is connected to a serial-port server then we supply a
  12.173 +%% dumb TCP terminal client, {\tt xencons}.
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/docs/src/user/control_software.tex	Tue Sep 20 09:43:46 2005 +0000
    13.3 @@ -0,0 +1,115 @@
    13.4 +\chapter{Control Software} 
    13.5 +
    13.6 +The Xen control software includes the \xend\ node control daemon
    13.7 +(which must be running), the xm command line tools, and the prototype
    13.8 +xensv web interface.
    13.9 +
   13.10 +\section{\Xend\ (node control daemon)}
   13.11 +\label{s:xend}
   13.12 +
   13.13 +The Xen Daemon (\Xend) performs system management functions related to
   13.14 +virtual machines.  It forms a central point of control for a machine
   13.15 +and can be controlled using an HTTP-based protocol.  \Xend\ must be
   13.16 +running in order to start and manage virtual machines.
   13.17 +
   13.18 +\Xend\ must be run as root because it needs access to privileged
   13.19 +system management functions.  A small set of commands may be issued on
   13.20 +the \xend\ command line:
   13.21 +
   13.22 +\begin{tabular}{ll}
   13.23 +  \verb!# xend start! & start \xend, if not already running \\
   13.24 +  \verb!# xend stop!  & stop \xend\ if already running       \\
   13.25 +  \verb!# xend restart! & restart \xend\ if running, otherwise start it \\
   13.26 +  % \verb!# xend trace_start! & start \xend, with very detailed debug logging \\
   13.27 +  \verb!# xend status! & indicates \xend\ status by its return code
   13.28 +\end{tabular}
   13.29 +
   13.30 +A SysV init script called {\tt xend} is provided to start \xend\ at
   13.31 +boot time.  {\tt make install} installs this script in
   13.32 +\path{/etc/init.d}.  To enable it, you have to make symbolic links in
   13.33 +the appropriate runlevel directories or use the {\tt chkconfig} tool,
   13.34 +where available.
   13.35 +
   13.36 +Once \xend\ is running, more sophisticated administration can be done
   13.37 +using the xm tool (see Section~\ref{s:xm}) and the experimental Xensv
   13.38 +web interface (see Section~\ref{s:xensv}).
   13.39 +
   13.40 +As \xend\ runs, events will be logged to \path{/var/log/xend.log} and,
   13.41 +if the migration assistant daemon (\path{xfrd}) has been started,
   13.42 +\path{/var/log/xfrd.log}. These may be of use for troubleshooting
   13.43 +problems.
   13.44 +
   13.45 +\section{Xm (command line interface)}
   13.46 +\label{s:xm}
   13.47 +
   13.48 +The xm tool is the primary tool for managing Xen from the console.
   13.49 +The general format of an xm command line is:
   13.50 +
   13.51 +\begin{verbatim}
   13.52 +# xm command [switches] [arguments] [variables]
   13.53 +\end{verbatim}
   13.54 +
   13.55 +The available \emph{switches} and \emph{arguments} are dependent on
   13.56 +the \emph{command} chosen.  The \emph{variables} may be set using
   13.57 +declarations of the form {\tt variable=value} and command line
   13.58 +declarations override any of the values in the configuration file
   13.59 +being used, including the standard variables described above and any
   13.60 +custom variables (for instance, the \path{xmdefconfig} file uses a
   13.61 +{\tt vmid} variable).
   13.62 +
   13.63 +The available commands are as follows:
   13.64 +
   13.65 +\begin{description}
   13.66 +\item[set-mem] Request a domain to adjust its memory footprint.
   13.67 +\item[create] Create a new domain.
   13.68 +\item[destroy] Kill a domain immediately.
   13.69 +\item[list] List running domains.
   13.70 +\item[shutdown] Ask a domain to shutdown.
   13.71 +\item[dmesg] Fetch the Xen (not Linux!) boot output.
   13.72 +\item[consoles] Lists the available consoles.
   13.73 +\item[console] Connect to the console for a domain.
   13.74 +\item[help] Get help on xm commands.
   13.75 +\item[save] Suspend a domain to disk.
   13.76 +\item[restore] Restore a domain from disk.
   13.77 +\item[pause] Pause a domain's execution.
   13.78 +\item[unpause] Un-pause a domain.
   13.79 +\item[pincpu] Pin a domain to a CPU.
   13.80 +\item[bvt] Set BVT scheduler parameters for a domain.
   13.81 +\item[bvt\_ctxallow] Set the BVT context switching allowance for the
   13.82 +  system.
   13.83 +\item[atropos] Set the atropos parameters for a domain.
   13.84 +\item[rrobin] Set the round robin time slice for the system.
   13.85 +\item[info] Get information about the Xen host.
   13.86 +\item[call] Call a \xend\ HTTP API function directly.
   13.87 +\end{description}
   13.88 +
   13.89 +For a detailed overview of switches, arguments and variables to each
   13.90 +command try
   13.91 +\begin{quote}
   13.92 +\begin{verbatim}
   13.93 +# xm help command
   13.94 +\end{verbatim}
   13.95 +\end{quote}
   13.96 +
   13.97 +\section{Xensv (web control interface)}
   13.98 +\label{s:xensv}
   13.99 +
  13.100 +Xensv is the experimental web control interface for managing a Xen
  13.101 +machine.  It can be used to perform some (but not yet all) of the
  13.102 +management tasks that can be done using the xm tool.
  13.103 +
  13.104 +It can be started using:
  13.105 +\begin{quote}
  13.106 +  \verb_# xensv start_
  13.107 +\end{quote}
  13.108 +and stopped using:
  13.109 +\begin{quote}
  13.110 +  \verb_# xensv stop_
  13.111 +\end{quote}
  13.112 +
  13.113 +By default, Xensv will serve out the web interface on port 8080.  This
  13.114 +can be changed by editing
  13.115 +\path{/usr/lib/python2.3/site-packages/xen/sv/params.py}.
  13.116 +
  13.117 +Once Xensv is running, the web interface can be used to create and
  13.118 +manage running domains.
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/docs/src/user/debian.tex	Tue Sep 20 09:43:46 2005 +0000
    14.3 @@ -0,0 +1,154 @@
    14.4 +\chapter{Installing Xen / XenLinux on Debian}
    14.5 +
    14.6 +The Debian project provides a tool called \path{debootstrap} which
    14.7 +allows a base Debian system to be installed into a filesystem without
    14.8 +requiring the host system to have any Debian-specific software (such
    14.9 +as \path{apt}).
   14.10 +
   14.11 +Here's some info how to install Debian 3.1 (Sarge) for an unprivileged
   14.12 +Xen domain:
   14.13 +
   14.14 +\begin{enumerate}
   14.15 +
   14.16 +\item Set up Xen and test that it's working, as described earlier in
   14.17 +  this manual.
   14.18 +
   14.19 +\item Create disk images for rootfs and swap. Alternatively, you might
   14.20 +  create dedicated partitions, LVM logical volumes, etc.\ if that
   14.21 +  suits your setup.
   14.22 +\begin{verbatim}
   14.23 +dd if=/dev/zero of=/path/diskimage bs=1024k count=size_in_mbytes
   14.24 +dd if=/dev/zero of=/path/swapimage bs=1024k count=size_in_mbytes
   14.25 +\end{verbatim}
   14.26 +
   14.27 +  If you're going to use this filesystem / disk image only as a
   14.28 +  `template' for other vm disk images, something like 300 MB should be
   14.29 +  enough. (of course it depends what kind of packages you are planning
   14.30 +  to install to the template)
   14.31 +
   14.32 +\item Create the filesystem and initialise the swap image
   14.33 +\begin{verbatim}
   14.34 +mkfs.ext3 /path/diskimage
   14.35 +mkswap /path/swapimage
   14.36 +\end{verbatim}
   14.37 +
   14.38 +\item Mount the disk image for installation
   14.39 +\begin{verbatim}
   14.40 +mount -o loop /path/diskimage /mnt/disk
   14.41 +\end{verbatim}
   14.42 +
   14.43 +\item Install \path{debootstrap}. Make sure you have debootstrap
   14.44 +  installed on the host.  If you are running Debian Sarge (3.1 /
   14.45 +  testing) or unstable you can install it by running \path{apt-get
   14.46 +    install debootstrap}.  Otherwise, it can be downloaded from the
   14.47 +  Debian project website.
   14.48 +
   14.49 +\item Install Debian base to the disk image:
   14.50 +\begin{verbatim}
   14.51 +debootstrap --arch i386 sarge /mnt/disk  \
   14.52 +            http://ftp.<countrycode>.debian.org/debian
   14.53 +\end{verbatim}
   14.54 +
   14.55 +  You can use any other Debian http/ftp mirror you want.
   14.56 +
   14.57 +\item When debootstrap completes successfully, modify settings:
   14.58 +\begin{verbatim}
   14.59 +chroot /mnt/disk /bin/bash
   14.60 +\end{verbatim}
   14.61 +
   14.62 +Edit the following files using vi or nano and make needed changes:
   14.63 +\begin{verbatim}
   14.64 +/etc/hostname
   14.65 +/etc/hosts
   14.66 +/etc/resolv.conf
   14.67 +/etc/network/interfaces
   14.68 +/etc/networks
   14.69 +\end{verbatim}
   14.70 +
   14.71 +Set up access to the services, edit:
   14.72 +\begin{verbatim}
   14.73 +/etc/hosts.deny
   14.74 +/etc/hosts.allow
   14.75 +/etc/inetd.conf
   14.76 +\end{verbatim}
   14.77 +
   14.78 +Add Debian mirror to:   
   14.79 +\begin{verbatim}
   14.80 +/etc/apt/sources.list
   14.81 +\end{verbatim}
   14.82 +
   14.83 +Create fstab like this:
   14.84 +\begin{verbatim}
   14.85 +/dev/sda1       /       ext3    errors=remount-ro       0       1
   14.86 +/dev/sda2       none    swap    sw                      0       0
   14.87 +proc            /proc   proc    defaults                0       0
   14.88 +\end{verbatim}
   14.89 +
   14.90 +Logout
   14.91 +
   14.92 +\item Unmount the disk image
   14.93 +\begin{verbatim}
   14.94 +umount /mnt/disk
   14.95 +\end{verbatim}
   14.96 +
   14.97 +\item Create Xen 2.0 configuration file for the new domain. You can
   14.98 +  use the example-configurations coming with Xen as a template.
   14.99 +
  14.100 +  Make sure you have the following set up:
  14.101 +\begin{verbatim}
  14.102 +disk = [ 'file:/path/diskimage,sda1,w', 'file:/path/swapimage,sda2,w' ]
  14.103 +root = "/dev/sda1 ro"
  14.104 +\end{verbatim}
  14.105 +
  14.106 +\item Start the new domain
  14.107 +\begin{verbatim}
  14.108 +xm create -f domain_config_file
  14.109 +\end{verbatim}
  14.110 +
  14.111 +Check that the new domain is running:
  14.112 +\begin{verbatim}
  14.113 +xm list
  14.114 +\end{verbatim}
  14.115 +
  14.116 +\item Attach to the console of the new domain.  You should see
  14.117 +  something like this when starting the new domain:
  14.118 +
  14.119 +\begin{verbatim}
  14.120 +Started domain testdomain2, console on port 9626
  14.121 +\end{verbatim}
  14.122 +        
  14.123 +  There you can see the ID of the console: 26. You can also list the
  14.124 +  consoles with \path{xm consoles} (ID is the last two digits of the
  14.125 +  port number.)
  14.126 +
  14.127 +  Attach to the console:
  14.128 +
  14.129 +\begin{verbatim}
  14.130 +xm console 26
  14.131 +\end{verbatim}
  14.132 +
  14.133 +  or by telnetting to the port 9626 of localhost (the xm console
  14.134 +  program works better).
  14.135 +
  14.136 +\item Log in and run base-config
  14.137 +
  14.138 +  As a default there's no password for the root.
  14.139 +
  14.140 +  Check that everything looks OK, and the system started without
  14.141 +  errors.  Check that the swap is active, and the network settings are
  14.142 +  correct.
  14.143 +
  14.144 +  Run \path{/usr/sbin/base-config} to set up the Debian settings.
  14.145 +
  14.146 +  Set up the password for root using passwd.
  14.147 +
  14.148 +\item Done. You can exit the console by pressing {\path{Ctrl + ]}}
  14.149 +
  14.150 +\end{enumerate}
  14.151 +
  14.152 +
  14.153 +If you need to create new domains, you can just copy the contents of
  14.154 +the `template'-image to the new disk images, either by mounting the
  14.155 +template and the new image, and using \path{cp -a} or \path{tar} or by
  14.156 +simply copying the image file.  Once this is done, modify the
  14.157 +image-specific settings (hostname, network settings, etc).
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/docs/src/user/domain_configuration.tex	Tue Sep 20 09:43:46 2005 +0000
    15.3 @@ -0,0 +1,281 @@
    15.4 +\chapter{Domain Configuration}
    15.5 +\label{cha:config}
    15.6 +
    15.7 +The following contains the syntax of the domain configuration files
    15.8 +and description of how to further specify networking, driver domain
    15.9 +and general scheduling behavior.
   15.10 +
   15.11 +
   15.12 +\section{Configuration Files}
   15.13 +\label{s:cfiles}
   15.14 +
   15.15 +Xen configuration files contain the following standard variables.
   15.16 +Unless otherwise stated, configuration items should be enclosed in
   15.17 +quotes: see \path{/etc/xen/xmexample1} and \path{/etc/xen/xmexample2}
   15.18 +for concrete examples of the syntax.
   15.19 +
   15.20 +\begin{description}
   15.21 +\item[kernel] Path to the kernel image.
   15.22 +\item[ramdisk] Path to a ramdisk image (optional).
   15.23 +  % \item[builder] The name of the domain build function (e.g.
   15.24 +  %   {\tt'linux'} or {\tt'netbsd'}.
   15.25 +\item[memory] Memory size in megabytes.
   15.26 +\item[cpu] CPU to run this domain on, or {\tt -1} for auto-allocation.
   15.27 +\item[console] Port to export the domain console on (default 9600 +
   15.28 +  domain ID).
   15.29 +\item[nics] Number of virtual network interfaces.
   15.30 +\item[vif] List of MAC addresses (random addresses are assigned if not
   15.31 +  given) and bridges to use for the domain's network interfaces, e.g.\ 
   15.32 +\begin{verbatim}
   15.33 +vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0',
   15.34 +        'bridge=xen-br1' ]
   15.35 +\end{verbatim}
   15.36 +  to assign a MAC address and bridge to the first interface and assign
   15.37 +  a different bridge to the second interface, leaving \xend\ to choose
   15.38 +  the MAC address.
   15.39 +\item[disk] List of block devices to export to the domain, e.g.\ \\
   15.40 +  \verb_disk = [ 'phy:hda1,sda1,r' ]_ \\
   15.41 +  exports physical device \path{/dev/hda1} to the domain as
   15.42 +  \path{/dev/sda1} with read-only access. Exporting a disk read-write
   15.43 +  which is currently mounted is dangerous -- if you are \emph{certain}
   15.44 +  you wish to do this, you can specify \path{w!} as the mode.
   15.45 +\item[dhcp] Set to {\tt `dhcp'} if you want to use DHCP to configure
   15.46 +  networking.
   15.47 +\item[netmask] Manually configured IP netmask.
   15.48 +\item[gateway] Manually configured IP gateway.
   15.49 +\item[hostname] Set the hostname for the virtual machine.
   15.50 +\item[root] Specify the root device parameter on the kernel command
   15.51 +  line.
   15.52 +\item[nfs\_server] IP address for the NFS server (if any).
   15.53 +\item[nfs\_root] Path of the root filesystem on the NFS server (if
   15.54 +  any).
   15.55 +\item[extra] Extra string to append to the kernel command line (if
   15.56 +  any)
   15.57 +\item[restart] Three possible options:
   15.58 +  \begin{description}
   15.59 +  \item[always] Always restart the domain, no matter what its exit
   15.60 +    code is.
   15.61 +  \item[never] Never restart the domain.
   15.62 +  \item[onreboot] Restart the domain iff it requests reboot.
   15.63 +  \end{description}
   15.64 +\end{description}
   15.65 +
   15.66 +For additional flexibility, it is also possible to include Python
   15.67 +scripting commands in configuration files.  An example of this is the
   15.68 +\path{xmexample2} file, which uses Python code to handle the
   15.69 +\path{vmid} variable.
   15.70 +
   15.71 +
   15.72 +%\part{Advanced Topics}
   15.73 +
   15.74 +
   15.75 +\section{Network Configuration}
   15.76 +
   15.77 +For many users, the default installation should work ``out of the
   15.78 +box''.  More complicated network setups, for instance with multiple
   15.79 +Ethernet interfaces and/or existing bridging setups will require some
   15.80 +special configuration.
   15.81 +
   15.82 +The purpose of this section is to describe the mechanisms provided by
   15.83 +\xend\ to allow a flexible configuration for Xen's virtual networking.
   15.84 +
   15.85 +\subsection{Xen virtual network topology}
   15.86 +
   15.87 +Each domain network interface is connected to a virtual network
   15.88 +interface in dom0 by a point to point link (effectively a ``virtual
   15.89 +crossover cable'').  These devices are named {\tt
   15.90 +  vif$<$domid$>$.$<$vifid$>$} (e.g.\ {\tt vif1.0} for the first
   15.91 +interface in domain~1, {\tt vif3.1} for the second interface in
   15.92 +domain~3).
   15.93 +
   15.94 +Traffic on these virtual interfaces is handled in domain~0 using
   15.95 +standard Linux mechanisms for bridging, routing, rate limiting, etc.
   15.96 +Xend calls on two shell scripts to perform initial configuration of
   15.97 +the network and configuration of new virtual interfaces.  By default,
   15.98 +these scripts configure a single bridge for all the virtual
   15.99 +interfaces.  Arbitrary routing / bridging configurations can be
  15.100 +configured by customizing the scripts, as described in the following
  15.101 +section.
  15.102 +
  15.103 +\subsection{Xen networking scripts}
  15.104 +
  15.105 +Xen's virtual networking is configured by two shell scripts (by
  15.106 +default \path{network} and \path{vif-bridge}).  These are called
  15.107 +automatically by \xend\ when certain events occur, with arguments to
  15.108 +the scripts providing further contextual information.  These scripts
  15.109 +are found by default in \path{/etc/xen/scripts}.  The names and
  15.110 +locations of the scripts can be configured in
  15.111 +\path{/etc/xen/xend-config.sxp}.
  15.112 +
  15.113 +\begin{description}
  15.114 +\item[network:] This script is called whenever \xend\ is started or
  15.115 +  stopped to respectively initialize or tear down the Xen virtual
  15.116 +  network. In the default configuration initialization creates the
  15.117 +  bridge `xen-br0' and moves eth0 onto that bridge, modifying the
  15.118 +  routing accordingly. When \xend\ exits, it deletes the Xen bridge
  15.119 +  and removes eth0, restoring the normal IP and routing configuration.
  15.120 +
  15.121 +  %% In configurations where the bridge already exists, this script
  15.122 +  %% could be replaced with a link to \path{/bin/true} (for instance).
  15.123 +
  15.124 +\item[vif-bridge:] This script is called for every domain virtual
  15.125 +  interface and can configure firewalling rules and add the vif to the
  15.126 +  appropriate bridge. By default, this adds and removes VIFs on the
  15.127 +  default Xen bridge.
  15.128 +\end{description}
  15.129 +
  15.130 +For more complex network setups (e.g.\ where routing is required or
  15.131 +integrate with existing bridges) these scripts may be replaced with
  15.132 +customized variants for your site's preferred configuration.
  15.133 +
  15.134 +%% There are two possible types of privileges: IO privileges and
  15.135 +%% administration privileges.
  15.136 +
  15.137 +
  15.138 +\section{Driver Domain Configuration}
  15.139 +
  15.140 +I/O privileges can be assigned to allow a domain to directly access
  15.141 +PCI devices itself.  This is used to support driver domains.
  15.142 +
  15.143 +Setting back-end privileges is currently only supported in SXP format
  15.144 +config files.  To allow a domain to function as a back-end for others,
  15.145 +somewhere within the {\tt vm} element of its configuration file must
  15.146 +be a {\tt back-end} element of the form {\tt (back-end ({\em type}))}
  15.147 +where {\tt \em type} may be either {\tt netif} or {\tt blkif},
  15.148 +according to the type of virtual device this domain will service.
  15.149 +%% After this domain has been built, \xend will connect all new and
  15.150 +%% existing {\em virtual} devices (of the appropriate type) to that
  15.151 +%% back-end.
  15.152 +
  15.153 +Note that a block back-end cannot currently import virtual block
  15.154 +devices from other domains, and a network back-end cannot import
  15.155 +virtual network devices from other domains.  Thus (particularly in the
  15.156 +case of block back-ends, which cannot import a virtual block device as
  15.157 +their root filesystem), you may need to boot a back-end domain from a
  15.158 +ramdisk or a network device.
  15.159 +
  15.160 +Access to PCI devices may be configured on a per-device basis.  Xen
  15.161 +will assign the minimal set of hardware privileges to a domain that
  15.162 +are required to control its devices.  This can be configured in either
  15.163 +format of configuration file:
  15.164 +
  15.165 +\begin{itemize}
  15.166 +\item SXP Format: Include device elements of the form: \\
  15.167 +  \centerline{  {\tt (device (pci (bus {\em x}) (dev {\em y}) (func {\em z})))}} \\
  15.168 +  inside the top-level {\tt vm} element.  Each one specifies the
  15.169 +  address of a device this domain is allowed to access --- the numbers
  15.170 +  \emph{x},\emph{y} and \emph{z} may be in either decimal or
  15.171 +  hexadecimal format.
  15.172 +\item Flat Format: Include a list of PCI device addresses of the
  15.173 +  format: \\
  15.174 +  \centerline{{\tt pci = ['x,y,z', \ldots]}} \\
  15.175 +  where each element in the list is a string specifying the components
  15.176 +  of the PCI device address, separated by commas.  The components
  15.177 +  ({\tt \em x}, {\tt \em y} and {\tt \em z}) of the list may be
  15.178 +  formatted as either decimal or hexadecimal.
  15.179 +\end{itemize}
  15.180 +
  15.181 +%% \section{Administration Domains}
  15.182 +
  15.183 +%% Administration privileges allow a domain to use the `dom0
  15.184 +%% operations' (so called because they are usually available only to
  15.185 +%% domain 0).  A privileged domain can build other domains, set
  15.186 +%% scheduling parameters, etc.
  15.187 +
  15.188 +% Support for other administrative domains is not yet available...
  15.189 +% perhaps we should plumb it in some time
  15.190 +
  15.191 +
  15.192 +\section{Scheduler Configuration}
  15.193 +\label{s:sched}
  15.194 +
  15.195 +Xen offers a boot time choice between multiple schedulers.  To select
  15.196 +a scheduler, pass the boot parameter \emph{sched=sched\_name} to Xen,
  15.197 +substituting the appropriate scheduler name.  Details of the
  15.198 +schedulers and their parameters are included below; future versions of
  15.199 +the tools will provide a higher-level interface to these tools.
  15.200 +
  15.201 +It is expected that system administrators configure their system to
  15.202 +use the scheduler most appropriate to their needs.  Currently, the BVT
  15.203 +scheduler is the recommended choice.
  15.204 +
  15.205 +\subsection{Borrowed Virtual Time}
  15.206 +
  15.207 +{\tt sched=bvt} (the default) \\
  15.208 +
  15.209 +BVT provides proportional fair shares of the CPU time.  It has been
  15.210 +observed to penalize domains that block frequently (e.g.\ I/O
  15.211 +intensive domains), but this can be compensated for by using warping.
  15.212 +
  15.213 +\subsubsection{Global Parameters}
  15.214 +
  15.215 +\begin{description}
  15.216 +\item[ctx\_allow] The context switch allowance is similar to the
  15.217 +  ``quantum'' in traditional schedulers.  It is the minimum time that
  15.218 +  a scheduled domain will be allowed to run before being preempted.
  15.219 +\end{description}
  15.220 +
  15.221 +\subsubsection{Per-domain parameters}
  15.222 +
  15.223 +\begin{description}
  15.224 +\item[mcuadv] The MCU (Minimum Charging Unit) advance determines the
  15.225 +  proportional share of the CPU that a domain receives.  It is set
  15.226 +  inversely proportionally to a domain's sharing weight.
  15.227 +\item[warp] The amount of ``virtual time'' the domain is allowed to
  15.228 +  warp backwards.
  15.229 +\item[warpl] The warp limit is the maximum time a domain can run
  15.230 +  warped for.
  15.231 +\item[warpu] The unwarp requirement is the minimum time a domain must
  15.232 +  run unwarped for before it can warp again.
  15.233 +\end{description}
  15.234 +
  15.235 +\subsection{Atropos}
  15.236 +
  15.237 +{\tt sched=atropos} \\
  15.238 +
  15.239 +Atropos is a soft real time scheduler.  It provides guarantees about
  15.240 +absolute shares of the CPU, with a facility for sharing slack CPU time
  15.241 +on a best-effort basis. It can provide timeliness guarantees for
  15.242 +latency-sensitive domains.
  15.243 +
  15.244 +Every domain has an associated period and slice.  The domain should
  15.245 +receive `slice' nanoseconds every `period' nanoseconds.  This allows
  15.246 +the administrator to configure both the absolute share of the CPU a
  15.247 +domain receives and the frequency with which it is scheduled.
  15.248 +
  15.249 +%% When domains unblock, their period is reduced to the value of the
  15.250 +%% latency hint (the slice is scaled accordingly so that they still
  15.251 +%% get the same proportion of the CPU).  For each subsequent period,
  15.252 +%% the slice and period times are doubled until they reach their
  15.253 +%% original values.
  15.254 +
  15.255 +Note: don't over-commit the CPU when using Atropos (i.e.\ don't reserve
  15.256 +more CPU than is available --- the utilization should be kept to
  15.257 +slightly less than 100\% in order to ensure predictable behavior).
  15.258 +
  15.259 +\subsubsection{Per-domain parameters}
  15.260 +
  15.261 +\begin{description}
  15.262 +\item[period] The regular time interval during which a domain is
  15.263 +  guaranteed to receive its allocation of CPU time.
  15.264 +\item[slice] The length of time per period that a domain is guaranteed
  15.265 +  to run for (in the absence of voluntary yielding of the CPU).
  15.266 +\item[latency] The latency hint is used to control how soon after
  15.267 +  waking up a domain it should be scheduled.
  15.268 +\item[xtratime] This is a boolean flag that specifies whether a domain
  15.269 +  should be allowed a share of the system slack time.
  15.270 +\end{description}
  15.271 +
  15.272 +\subsection{Round Robin}
  15.273 +
  15.274 +{\tt sched=rrobin} \\
  15.275 +
  15.276 +The round robin scheduler is included as a simple demonstration of
  15.277 +Xen's internal scheduler API.  It is not intended for production use.
  15.278 +
  15.279 +\subsubsection{Global Parameters}
  15.280 +
  15.281 +\begin{description}
  15.282 +\item[rr\_slice] The maximum time each domain runs before the next
  15.283 +  scheduling decision is made.
  15.284 +\end{description}
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/docs/src/user/domain_filesystem.tex	Tue Sep 20 09:43:46 2005 +0000
    16.3 @@ -0,0 +1,243 @@
    16.4 +\chapter{Domain Filesystem Storage}
    16.5 +
    16.6 +It is possible to directly export any Linux block device in dom0 to
    16.7 +another domain, or to export filesystems / devices to virtual machines
    16.8 +using standard network protocols (e.g.\ NBD, iSCSI, NFS, etc.).  This
    16.9 +chapter covers some of the possibilities.
   16.10 +
   16.11 +
   16.12 +\section{Exporting Physical Devices as VBDs}
   16.13 +\label{s:exporting-physical-devices-as-vbds}
   16.14 +
   16.15 +One of the simplest configurations is to directly export individual
   16.16 +partitions from domain~0 to other domains. To achieve this use the
   16.17 +\path{phy:} specifier in your domain configuration file. For example a
   16.18 +line like
   16.19 +\begin{quote}
   16.20 +  \verb_disk = ['phy:hda3,sda1,w']_
   16.21 +\end{quote}
   16.22 +specifies that the partition \path{/dev/hda3} in domain~0 should be
   16.23 +exported read-write to the new domain as \path{/dev/sda1}; one could
   16.24 +equally well export it as \path{/dev/hda} or \path{/dev/sdb5} should
   16.25 +one wish.
   16.26 +
   16.27 +In addition to local disks and partitions, it is possible to export
   16.28 +any device that Linux considers to be ``a disk'' in the same manner.
   16.29 +For example, if you have iSCSI disks or GNBD volumes imported into
   16.30 +domain~0 you can export these to other domains using the \path{phy:}
   16.31 +disk syntax. E.g.:
   16.32 +\begin{quote}
   16.33 +  \verb_disk = ['phy:vg/lvm1,sda2,w']_
   16.34 +\end{quote}
   16.35 +
   16.36 +\begin{center}
   16.37 +  \framebox{\bf Warning: Block device sharing}
   16.38 +\end{center}
   16.39 +\begin{quote}
   16.40 +  Block devices should typically only be shared between domains in a
   16.41 +  read-only fashion otherwise the Linux kernel's file systems will get
   16.42 +  very confused as the file system structure may change underneath
   16.43 +  them (having the same ext3 partition mounted \path{rw} twice is a
   16.44 +  sure fire way to cause irreparable damage)!  \Xend\ will attempt to
   16.45 +  prevent you from doing this by checking that the device is not
   16.46 +  mounted read-write in domain~0, and hasn't already been exported
   16.47 +  read-write to another domain.  If you want read-write sharing,
   16.48 +  export the directory to other domains via NFS from domain~0 (or use
   16.49 +  a cluster file system such as GFS or ocfs2).
   16.50 +\end{quote}
   16.51 +
   16.52 +
   16.53 +\section{Using File-backed VBDs}
   16.54 +
   16.55 +It is also possible to use a file in Domain~0 as the primary storage
   16.56 +for a virtual machine.  As well as being convenient, this also has the
   16.57 +advantage that the virtual block device will be \emph{sparse} ---
   16.58 +space will only really be allocated as parts of the file are used.  So
   16.59 +if a virtual machine uses only half of its disk space then the file
   16.60 +really takes up half of the size allocated.
   16.61 +
   16.62 +For example, to create a 2GB sparse file-backed virtual block device
   16.63 +(actually only consumes 1KB of disk):
   16.64 +\begin{quote}
   16.65 +  \verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=1_
   16.66 +\end{quote}
   16.67 +
   16.68 +Make a file system in the disk file:
   16.69 +\begin{quote}
   16.70 +  \verb_# mkfs -t ext3 vm1disk_
   16.71 +\end{quote}
   16.72 +
   16.73 +(when the tool asks for confirmation, answer `y')
   16.74 +
   16.75 +Populate the file system e.g.\ by copying from the current root:
   16.76 +\begin{quote}
   16.77 +\begin{verbatim}
   16.78 +# mount -o loop vm1disk /mnt
   16.79 +# cp -ax /{root,dev,var,etc,usr,bin,sbin,lib} /mnt
   16.80 +# mkdir /mnt/{proc,sys,home,tmp}
   16.81 +\end{verbatim}
   16.82 +\end{quote}
   16.83 +
   16.84 +Tailor the file system by editing \path{/etc/fstab},
   16.85 +\path{/etc/hostname}, etc.\ Don't forget to edit the files in the
   16.86 +mounted file system, instead of your domain~0 filesystem, e.g.\ you
   16.87 +would edit \path{/mnt/etc/fstab} instead of \path{/etc/fstab}.  For
   16.88 +this example put \path{/dev/sda1} to root in fstab.
   16.89 +
   16.90 +Now unmount (this is important!):
   16.91 +\begin{quote}
   16.92 +  \verb_# umount /mnt_
   16.93 +\end{quote}
   16.94 +
   16.95 +In the configuration file set:
   16.96 +\begin{quote}
   16.97 +  \verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_
   16.98 +\end{quote}
   16.99 +
  16.100 +As the virtual machine writes to its `disk', the sparse file will be
  16.101 +filled in and consume more space up to the original 2GB.
  16.102 +
  16.103 +{\bf Note that file-backed VBDs may not be appropriate for backing
  16.104 +  I/O-intensive domains.}  File-backed VBDs are known to experience
  16.105 +substantial slowdowns under heavy I/O workloads, due to the I/O
  16.106 +handling by the loopback block device used to support file-backed VBDs
  16.107 +in dom0.  Better I/O performance can be achieved by using either
  16.108 +LVM-backed VBDs (Section~\ref{s:using-lvm-backed-vbds}) or physical
  16.109 +devices as VBDs (Section~\ref{s:exporting-physical-devices-as-vbds}).
  16.110 +
  16.111 +Linux supports a maximum of eight file-backed VBDs across all domains
  16.112 +by default.  This limit can be statically increased by using the
  16.113 +\emph{max\_loop} module parameter if CONFIG\_BLK\_DEV\_LOOP is
  16.114 +compiled as a module in the dom0 kernel, or by using the
  16.115 +\emph{max\_loop=n} boot option if CONFIG\_BLK\_DEV\_LOOP is compiled
  16.116 +directly into the dom0 kernel.
  16.117 +
  16.118 +
  16.119 +\section{Using LVM-backed VBDs}
  16.120 +\label{s:using-lvm-backed-vbds}
  16.121 +
  16.122 +A particularly appealing solution is to use LVM volumes as backing for
  16.123 +domain file-systems since this allows dynamic growing/shrinking of
  16.124 +volumes as well as snapshot and other features.
  16.125 +
  16.126 +To initialize a partition to support LVM volumes:
  16.127 +\begin{quote}
  16.128 +\begin{verbatim}
  16.129 +# pvcreate /dev/sda10           
  16.130 +\end{verbatim} 
  16.131 +\end{quote}
  16.132 +
  16.133 +Create a volume group named `vg' on the physical partition:
  16.134 +\begin{quote}
  16.135 +\begin{verbatim}
  16.136 +# vgcreate vg /dev/sda10
  16.137 +\end{verbatim} 
  16.138 +\end{quote}
  16.139 +
  16.140 +Create a logical volume of size 4GB named `myvmdisk1':
  16.141 +\begin{quote}
  16.142 +\begin{verbatim}
  16.143 +# lvcreate -L4096M -n myvmdisk1 vg
  16.144 +\end{verbatim}
  16.145 +\end{quote}
  16.146 +
  16.147 +You should now see that you have a \path{/dev/vg/myvmdisk1} Make a
  16.148 +filesystem, mount it and populate it, e.g.:
  16.149 +\begin{quote}
  16.150 +\begin{verbatim}
  16.151 +# mkfs -t ext3 /dev/vg/myvmdisk1
  16.152 +# mount /dev/vg/myvmdisk1 /mnt
  16.153 +# cp -ax / /mnt
  16.154 +# umount /mnt
  16.155 +\end{verbatim}
  16.156 +\end{quote}
  16.157 +
  16.158 +Now configure your VM with the following disk configuration:
  16.159 +\begin{quote}
  16.160 +\begin{verbatim}
  16.161 + disk = [ 'phy:vg/myvmdisk1,sda1,w' ]
  16.162 +\end{verbatim}
  16.163 +\end{quote}
  16.164 +
  16.165 +LVM enables you to grow the size of logical volumes, but you'll need
  16.166 +to resize the corresponding file system to make use of the new space.
  16.167 +Some file systems (e.g.\ ext3) now support online resize.  See the LVM
  16.168 +manuals for more details.
  16.169 +
  16.170 +You can also use LVM for creating copy-on-write (CoW) clones of LVM
  16.171 +volumes (known as writable persistent snapshots in LVM terminology).
  16.172 +This facility is new in Linux 2.6.8, so isn't as stable as one might
  16.173 +hope.  In particular, using lots of CoW LVM disks consumes a lot of
  16.174 +dom0 memory, and error conditions such as running out of disk space
  16.175 +are not handled well. Hopefully this will improve in future.
  16.176 +
  16.177 +To create two copy-on-write clone of the above file system you would
  16.178 +use the following commands:
  16.179 +
  16.180 +\begin{quote}
  16.181 +\begin{verbatim}
  16.182 +# lvcreate -s -L1024M -n myclonedisk1 /dev/vg/myvmdisk1
  16.183 +# lvcreate -s -L1024M -n myclonedisk2 /dev/vg/myvmdisk1
  16.184 +\end{verbatim}
  16.185 +\end{quote}
  16.186 +
  16.187 +Each of these can grow to have 1GB of differences from the master
  16.188 +volume. You can grow the amount of space for storing the differences
  16.189 +using the lvextend command, e.g.:
  16.190 +\begin{quote}
  16.191 +\begin{verbatim}
  16.192 +# lvextend +100M /dev/vg/myclonedisk1
  16.193 +\end{verbatim}
  16.194 +\end{quote}
  16.195 +
  16.196 +Don't let the `differences volume' ever fill up otherwise LVM gets
  16.197 +rather confused. It may be possible to automate the growing process by
  16.198 +using \path{dmsetup wait} to spot the volume getting full and then
  16.199 +issue an \path{lvextend}.
  16.200 +
  16.201 +In principle, it is possible to continue writing to the volume that
  16.202 +has been cloned (the changes will not be visible to the clones), but
  16.203 +we wouldn't recommend this: have the cloned volume as a `pristine'
  16.204 +file system install that isn't mounted directly by any of the virtual
  16.205 +machines.
  16.206 +
  16.207 +
  16.208 +\section{Using NFS Root}
  16.209 +
  16.210 +First, populate a root filesystem in a directory on the server
  16.211 +machine. This can be on a distinct physical machine, or simply run
  16.212 +within a virtual machine on the same node.
  16.213 +
  16.214 +Now configure the NFS server to export this filesystem over the
  16.215 +network by adding a line to \path{/etc/exports}, for instance:
  16.216 +
  16.217 +\begin{quote}
  16.218 +  \begin{small}
  16.219 +\begin{verbatim}
  16.220 +/export/vm1root      1.2.3.4/24 (rw,sync,no_root_squash)
  16.221 +\end{verbatim}
  16.222 +  \end{small}
  16.223 +\end{quote}
  16.224 +
  16.225 +Finally, configure the domain to use NFS root.  In addition to the
  16.226 +normal variables, you should make sure to set the following values in
  16.227 +the domain's configuration file:
  16.228 +
  16.229 +\begin{quote}
  16.230 +  \begin{small}
  16.231 +\begin{verbatim}
  16.232 +root       = '/dev/nfs'
  16.233 +nfs_server = '2.3.4.5'       # substitute IP address of server
  16.234 +nfs_root   = '/path/to/root' # path to root FS on the server
  16.235 +\end{verbatim}
  16.236 +  \end{small}
  16.237 +\end{quote}
  16.238 +
  16.239 +The domain will need network access at boot time, so either statically
  16.240 +configure an IP address using the config variables \path{ip},
  16.241 +\path{netmask}, \path{gateway}, \path{hostname}; or enable DHCP
  16.242 +(\path{dhcp='dhcp'}).
  16.243 +
  16.244 +Note that the Linux NFS root implementation is known to have stability
  16.245 +problems under high load (this is not a Xen-specific problem), so this
  16.246 +configuration may not be appropriate for critical servers.
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/docs/src/user/domain_mgmt.tex	Tue Sep 20 09:43:46 2005 +0000
    17.3 @@ -0,0 +1,203 @@
    17.4 +\chapter{Domain Management Tools}
    17.5 +
    17.6 +The previous chapter described a simple example of how to configure
    17.7 +and start a domain.  This chapter summarises the tools available to
    17.8 +manage running domains.
    17.9 +
   17.10 +
   17.11 +\section{Command-line Management}
   17.12 +
   17.13 +Command line management tasks are also performed using the \path{xm}
   17.14 +tool.  For online help for the commands available, type:
   17.15 +\begin{quote}
   17.16 +  \verb_# xm help_
   17.17 +\end{quote}
   17.18 +
   17.19 +You can also type \path{xm help $<$command$>$} for more information on
   17.20 +a given command.
   17.21 +
   17.22 +\subsection{Basic Management Commands}
   17.23 +
   17.24 +The most important \path{xm} commands are:
   17.25 +\begin{quote}
   17.26 +  \verb_# xm list_: Lists all domains running.\\
   17.27 +  \verb_# xm consoles_: Gives information about the domain consoles.\\
   17.28 +  \verb_# xm console_: Opens a console to a domain (e.g.\
   17.29 +  \verb_# xm console myVM_)
   17.30 +\end{quote}
   17.31 +
   17.32 +\subsection{\tt xm list}
   17.33 +
   17.34 +The output of \path{xm list} is in rows of the following format:
   17.35 +\begin{center} {\tt name domid memory cpu state cputime console}
   17.36 +\end{center}
   17.37 +
   17.38 +\begin{quote}
   17.39 +  \begin{description}
   17.40 +  \item[name] The descriptive name of the virtual machine.
   17.41 +  \item[domid] The number of the domain ID this virtual machine is
   17.42 +    running in.
   17.43 +  \item[memory] Memory size in megabytes.
   17.44 +  \item[cpu] The CPU this domain is running on.
   17.45 +  \item[state] Domain state consists of 5 fields:
   17.46 +    \begin{description}
   17.47 +    \item[r] running
   17.48 +    \item[b] blocked
   17.49 +    \item[p] paused
   17.50 +    \item[s] shutdown
   17.51 +    \item[c] crashed
   17.52 +    \end{description}
   17.53 +  \item[cputime] How much CPU time (in seconds) the domain has used so
   17.54 +    far.
   17.55 +  \item[console] TCP port accepting connections to the domain's
   17.56 +    console.
   17.57 +  \end{description}
   17.58 +\end{quote}
   17.59 +
   17.60 +The \path{xm list} command also supports a long output format when the
   17.61 +\path{-l} switch is used.  This outputs the fulls details of the
   17.62 +running domains in \xend's SXP configuration format.
   17.63 +
   17.64 +For example, suppose the system is running the ttylinux domain as
   17.65 +described earlier.  The list command should produce output somewhat
   17.66 +like the following:
   17.67 +\begin{verbatim}
   17.68 +# xm list
   17.69 +Name              Id  Mem(MB)  CPU  State  Time(s)  Console
   17.70 +Domain-0           0      251    0  r----    172.2        
   17.71 +ttylinux           5       63    0  -b---      3.0    9605
   17.72 +\end{verbatim}
   17.73 +
   17.74 +Here we can see the details for the ttylinux domain, as well as for
   17.75 +domain~0 (which, of course, is always running).  Note that the console
   17.76 +port for the ttylinux domain is 9605.  This can be connected to by TCP
   17.77 +using a terminal program (e.g. \path{telnet} or, better,
   17.78 +\path{xencons}).  The simplest way to connect is to use the
   17.79 +\path{xm~console} command, specifying the domain name or ID.  To
   17.80 +connect to the console of the ttylinux domain, we could use any of the
   17.81 +following:
   17.82 +\begin{verbatim}
   17.83 +# xm console ttylinux
   17.84 +# xm console 5
   17.85 +# xencons localhost 9605
   17.86 +\end{verbatim}
   17.87 +
   17.88 +\section{Domain Save and Restore}
   17.89 +
   17.90 +The administrator of a Xen system may suspend a virtual machine's
   17.91 +current state into a disk file in domain~0, allowing it to be resumed
   17.92 +at a later time.
   17.93 +
   17.94 +The ttylinux domain described earlier can be suspended to disk using
   17.95 +the command:
   17.96 +\begin{verbatim}
   17.97 +# xm save ttylinux ttylinux.xen
   17.98 +\end{verbatim}
   17.99 +
  17.100 +This will stop the domain named `ttylinux' and save its current state
  17.101 +into a file called \path{ttylinux.xen}.
  17.102 +
  17.103 +To resume execution of this domain, use the \path{xm restore} command:
  17.104 +\begin{verbatim}
  17.105 +# xm restore ttylinux.xen
  17.106 +\end{verbatim}
  17.107 +
  17.108 +This will restore the state of the domain and restart it.  The domain
  17.109 +will carry on as before and the console may be reconnected using the
  17.110 +\path{xm console} command, as above.
  17.111 +
  17.112 +\section{Live Migration}
  17.113 +
  17.114 +Live migration is used to transfer a domain between physical hosts
  17.115 +whilst that domain continues to perform its usual activities --- from
  17.116 +the user's perspective, the migration should be imperceptible.
  17.117 +
  17.118 +To perform a live migration, both hosts must be running Xen / \xend\
  17.119 +and the destination host must have sufficient resources (e.g.\ memory
  17.120 +capacity) to accommodate the domain after the move. Furthermore we
  17.121 +currently require both source and destination machines to be on the
  17.122 +same L2 subnet.
  17.123 +
  17.124 +Currently, there is no support for providing automatic remote access
  17.125 +to filesystems stored on local disk when a domain is migrated.
  17.126 +Administrators should choose an appropriate storage solution (i.e.\
  17.127 +SAN, NAS, etc.) to ensure that domain filesystems are also available
  17.128 +on their destination node. GNBD is a good method for exporting a
  17.129 +volume from one machine to another. iSCSI can do a similar job, but is
  17.130 +more complex to set up.
  17.131 +
  17.132 +When a domain migrates, it's MAC and IP address move with it, thus it
  17.133 +is only possible to migrate VMs within the same layer-2 network and IP
  17.134 +subnet. If the destination node is on a different subnet, the
  17.135 +administrator would need to manually configure a suitable etherip or
  17.136 +IP tunnel in the domain~0 of the remote node.
  17.137 +
  17.138 +A domain may be migrated using the \path{xm migrate} command.  To live
  17.139 +migrate a domain to another machine, we would use the command:
  17.140 +
  17.141 +\begin{verbatim}
  17.142 +# xm migrate --live mydomain destination.ournetwork.com
  17.143 +\end{verbatim}
  17.144 +
  17.145 +Without the \path{--live} flag, \xend\ simply stops the domain and
  17.146 +copies the memory image over to the new node and restarts it. Since
  17.147 +domains can have large allocations this can be quite time consuming,
  17.148 +even on a Gigabit network. With the \path{--live} flag \xend\ attempts
  17.149 +to keep the domain running while the migration is in progress,
  17.150 +resulting in typical `downtimes' of just 60--300ms.
  17.151 +
  17.152 +For now it will be necessary to reconnect to the domain's console on
  17.153 +the new machine using the \path{xm console} command.  If a migrated
  17.154 +domain has any open network connections then they will be preserved,
  17.155 +so SSH connections do not have this limitation.
  17.156 +
  17.157 +
  17.158 +\section{Managing Domain Memory}
  17.159 +
  17.160 +XenLinux domains have the ability to relinquish / reclaim machine
  17.161 +memory at the request of the administrator or the user of the domain.
  17.162 +
  17.163 +\subsection{Setting memory footprints from dom0}
  17.164 +
  17.165 +The machine administrator can request that a domain alter its memory
  17.166 +footprint using the \path{xm set-mem} command.  For instance, we can
  17.167 +request that our example ttylinux domain reduce its memory footprint
  17.168 +to 32 megabytes.
  17.169 +
  17.170 +\begin{verbatim}
  17.171 +# xm set-mem ttylinux 32
  17.172 +\end{verbatim}
  17.173 +
  17.174 +We can now see the result of this in the output of \path{xm list}:
  17.175 +
  17.176 +\begin{verbatim}
  17.177 +# xm list
  17.178 +Name              Id  Mem(MB)  CPU  State  Time(s)  Console
  17.179 +Domain-0           0      251    0  r----    172.2        
  17.180 +ttylinux           5       31    0  -b---      4.3    9605
  17.181 +\end{verbatim}
  17.182 +
  17.183 +The domain has responded to the request by returning memory to Xen. We
  17.184 +can restore the domain to its original size using the command line:
  17.185 +
  17.186 +\begin{verbatim}
  17.187 +# xm set-mem ttylinux 64
  17.188 +\end{verbatim}
  17.189 +
  17.190 +\subsection{Setting memory footprints from within a domain}
  17.191 +
  17.192 +The virtual file \path{/proc/xen/balloon} allows the owner of a domain
  17.193 +to adjust their own memory footprint.  Reading the file (e.g.\
  17.194 +\path{cat /proc/xen/balloon}) prints out the current memory footprint
  17.195 +of the domain.  Writing the file (e.g.\ \path{echo new\_target >
  17.196 +  /proc/xen/balloon}) requests that the kernel adjust the domain's
  17.197 +memory footprint to a new value.
  17.198 +
  17.199 +\subsection{Setting memory limits}
  17.200 +
  17.201 +Xen associates a memory size limit with each domain.  By default, this
  17.202 +is the amount of memory the domain is originally started with,
  17.203 +preventing the domain from ever growing beyond this size.  To permit a
  17.204 +domain to grow beyond its original allocation or to prevent a domain
  17.205 +you've shrunk from reclaiming the memory it relinquished, use the
  17.206 +\path{xm maxmem} command.
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/docs/src/user/glossary.tex	Tue Sep 20 09:43:46 2005 +0000
    18.3 @@ -0,0 +1,79 @@
    18.4 +\chapter{Glossary of Terms}
    18.5 +
    18.6 +\begin{description}
    18.7 +
    18.8 +\item[Atropos] One of the CPU schedulers provided by Xen.  Atropos
    18.9 +  provides domains with absolute shares of the CPU, with timeliness
   18.10 +  guarantees and a mechanism for sharing out `slack time'.
   18.11 +
   18.12 +\item[BVT] The BVT scheduler is used to give proportional fair shares
   18.13 +  of the CPU to domains.
   18.14 +
   18.15 +\item[Exokernel] A minimal piece of privileged code, similar to a {\bf
   18.16 +    microkernel} but providing a more `hardware-like' interface to the
   18.17 +  tasks it manages.  This is similar to a paravirtualising VMM like
   18.18 +  {\bf Xen} but was designed as a new operating system structure,
   18.19 +  rather than specifically to run multiple conventional OSs.
   18.20 +
   18.21 +\item[Domain] A domain is the execution context that contains a
   18.22 +  running {\bf virtual machine}.  The relationship between virtual
   18.23 +  machines and domains on Xen is similar to that between programs and
   18.24 +  processes in an operating system: a virtual machine is a persistent
   18.25 +  entity that resides on disk (somewhat like a program).  When it is
   18.26 +  loaded for execution, it runs in a domain.  Each domain has a {\bf
   18.27 +    domain ID}.
   18.28 +
   18.29 +\item[Domain 0] The first domain to be started on a Xen machine.
   18.30 +  Domain 0 is responsible for managing the system.
   18.31 +
   18.32 +\item[Domain ID] A unique identifier for a {\bf domain}, analogous to
   18.33 +  a process ID in an operating system.
   18.34 +
   18.35 +\item[Full virtualisation] An approach to virtualisation which
   18.36 +  requires no modifications to the hosted operating system, providing
   18.37 +  the illusion of a complete system of real hardware devices.
   18.38 +
   18.39 +\item[Hypervisor] An alternative term for {\bf VMM}, used because it
   18.40 +  means `beyond supervisor', since it is responsible for managing
   18.41 +  multiple `supervisor' kernels.
   18.42 +
   18.43 +\item[Live migration] A technique for moving a running virtual machine
   18.44 +  to another physical host, without stopping it or the services
   18.45 +  running on it.
   18.46 +
   18.47 +\item[Microkernel] A small base of code running at the highest
   18.48 +  hardware privilege level.  A microkernel is responsible for sharing
   18.49 +  CPU and memory (and sometimes other devices) between less privileged
   18.50 +  tasks running on the system.  This is similar to a VMM, particularly
   18.51 +  a {\bf paravirtualising} VMM but typically addressing a different
   18.52 +  problem space and providing different kind of interface.
   18.53 +
   18.54 +\item[NetBSD/Xen] A port of NetBSD to the Xen architecture.
   18.55 +
   18.56 +\item[Paravirtualisation] An approach to virtualisation which requires
   18.57 +  modifications to the operating system in order to run in a virtual
   18.58 +  machine.  Xen uses paravirtualisation but preserves binary
   18.59 +  compatibility for user space applications.
   18.60 +
   18.61 +\item[Shadow pagetables] A technique for hiding the layout of machine
   18.62 +  memory from a virtual machine's operating system.  Used in some {\bf
   18.63 +    VMMs} to provide the illusion of contiguous physical memory, in
   18.64 +  Xen this is used during {\bf live migration}.
   18.65 +
   18.66 +\item[Virtual Machine] The environment in which a hosted operating
   18.67 +  system runs, providing the abstraction of a dedicated machine.  A
   18.68 +  virtual machine may be identical to the underlying hardware (as in
   18.69 +  {\bf full virtualisation}, or it may differ, as in {\bf
   18.70 +    paravirtualisation}).
   18.71 +
   18.72 +\item[VMM] Virtual Machine Monitor - the software that allows multiple
   18.73 +  virtual machines to be multiplexed on a single physical machine.
   18.74 +
   18.75 +\item[Xen] Xen is a paravirtualising virtual machine monitor,
   18.76 +  developed primarily by the Systems Research Group at the University
   18.77 +  of Cambridge Computer Laboratory.
   18.78 +
   18.79 +\item[XenLinux] Official name for the port of the Linux kernel that
   18.80 +  runs on Xen.
   18.81 +
   18.82 +\end{description}
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/docs/src/user/installation.tex	Tue Sep 20 09:43:46 2005 +0000
    19.3 @@ -0,0 +1,394 @@
    19.4 +\chapter{Installation}
    19.5 +
    19.6 +The Xen distribution includes three main components: Xen itself, ports
    19.7 +of Linux 2.4 and 2.6 and NetBSD to run on Xen, and the userspace
    19.8 +tools required to manage a Xen-based system.  This chapter describes
    19.9 +how to install the Xen~2.0 distribution from source.  Alternatively,
   19.10 +there may be pre-built packages available as part of your operating
   19.11 +system distribution.
   19.12 +
   19.13 +
   19.14 +\section{Prerequisites}
   19.15 +\label{sec:prerequisites}
   19.16 +
   19.17 +The following is a full list of prerequisites.  Items marked `$\dag$'
   19.18 +are required by the \xend\ control tools, and hence required if you
   19.19 +want to run more than one virtual machine; items marked `$*$' are only
   19.20 +required if you wish to build from source.
   19.21 +\begin{itemize}
   19.22 +\item A working Linux distribution using the GRUB bootloader and
   19.23 +  running on a P6-class (or newer) CPU.
   19.24 +\item [$\dag$] The \path{iproute2} package.
   19.25 +\item [$\dag$] The Linux bridge-utils\footnote{Available from {\tt
   19.26 +      http://bridge.sourceforge.net}} (e.g., \path{/sbin/brctl})
   19.27 +\item [$\dag$] An installation of Twisted~v1.3 or
   19.28 +  above\footnote{Available from {\tt http://www.twistedmatrix.com}}.
   19.29 +  There may be a binary package available for your distribution;
   19.30 +  alternatively it can be installed by running `{\sl make
   19.31 +    install-twisted}' in the root of the Xen source tree.
   19.32 +\item [$*$] Build tools (gcc v3.2.x or v3.3.x, binutils, GNU make).
   19.33 +\item [$*$] Development installation of libcurl (e.g., libcurl-devel)
   19.34 +\item [$*$] Development installation of zlib (e.g., zlib-dev).
   19.35 +\item [$*$] Development installation of Python v2.2 or later (e.g.,
   19.36 +  python-dev).
   19.37 +\item [$*$] \LaTeX\ and transfig are required to build the
   19.38 +  documentation.
   19.39 +\end{itemize}
   19.40 +
   19.41 +Once you have satisfied the relevant prerequisites, you can now
   19.42 +install either a binary or source distribution of Xen.
   19.43 +
   19.44 +
   19.45 +\section{Installing from Binary Tarball}
   19.46 +
   19.47 +Pre-built tarballs are available for download from the Xen download
   19.48 +page
   19.49 +\begin{quote} {\tt http://xen.sf.net}
   19.50 +\end{quote}
   19.51 +
   19.52 +Once you've downloaded the tarball, simply unpack and install:
   19.53 +\begin{verbatim}
   19.54 +# tar zxvf xen-2.0-install.tgz
   19.55 +# cd xen-2.0-install
   19.56 +# sh ./install.sh
   19.57 +\end{verbatim}
   19.58 +
   19.59 +Once you've installed the binaries you need to configure your system
   19.60 +as described in Section~\ref{s:configure}.
   19.61 +
   19.62 +
   19.63 +\section{Installing from Source}
   19.64 +
   19.65 +This section describes how to obtain, build, and install Xen from
   19.66 +source.
   19.67 +
   19.68 +\subsection{Obtaining the Source}
   19.69 +
   19.70 +The Xen source tree is available as either a compressed source tar
   19.71 +ball or as a clone of our master BitKeeper repository.
   19.72 +
   19.73 +\begin{description}
   19.74 +\item[Obtaining the Source Tarball]\mbox{} \\
   19.75 +  Stable versions (and daily snapshots) of the Xen source tree are
   19.76 +  available as compressed tarballs from the Xen download page
   19.77 +  \begin{quote} {\tt http://xen.sf.net}
   19.78 +  \end{quote}
   19.79 +
   19.80 +\item[Using BitKeeper]\mbox{} \\
   19.81 +  If you wish to install Xen from a clone of our latest BitKeeper
   19.82 +  repository then you will need to install the BitKeeper tools.
   19.83 +  Download instructions for BitKeeper can be obtained by filling out
   19.84 +  the form at:
   19.85 +  \begin{quote} {\tt http://www.bitmover.com/cgi-bin/download.cgi}
   19.86 +\end{quote}
   19.87 +The public master BK repository for the 2.0 release lives at:
   19.88 +\begin{quote} {\tt bk://xen.bkbits.net/xen-2.0.bk}
   19.89 +\end{quote} 
   19.90 +You can use BitKeeper to download it and keep it updated with the
   19.91 +latest features and fixes.
   19.92 +
   19.93 +Change to the directory in which you want to put the source code, then
   19.94 +run:
   19.95 +\begin{verbatim}
   19.96 +# bk clone bk://xen.bkbits.net/xen-2.0.bk
   19.97 +\end{verbatim}
   19.98 +
   19.99 +Under your current directory, a new directory named \path{xen-2.0.bk}
  19.100 +has been created, which contains all the source code for Xen, the OS
  19.101 +ports, and the control tools. You can update your repository with the
  19.102 +latest changes at any time by running:
  19.103 +\begin{verbatim}
  19.104 +# cd xen-2.0.bk # to change into the local repository
  19.105 +# bk pull       # to update the repository
  19.106 +\end{verbatim}
  19.107 +\end{description}
  19.108 +
  19.109 +% \section{The distribution}
  19.110 +%
  19.111 +% The Xen source code repository is structured as follows:
  19.112 +%
  19.113 +% \begin{description}
  19.114 +% \item[\path{tools/}] Xen node controller daemon (Xend), command line
  19.115 +%   tools, control libraries
  19.116 +% \item[\path{xen/}] The Xen VMM.
  19.117 +% \item[\path{linux-*-xen-sparse/}] Xen support for Linux.
  19.118 +% \item[\path{linux-*-patches/}] Experimental patches for Linux.
  19.119 +% \item[\path{netbsd-*-xen-sparse/}] Xen support for NetBSD.
  19.120 +% \item[\path{docs/}] Various documentation files for users and
  19.121 +%   developers.
  19.122 +% \item[\path{extras/}] Bonus extras.
  19.123 +% \end{description}
  19.124 +
  19.125 +\subsection{Building from Source}
  19.126 +
  19.127 +The top-level Xen Makefile includes a target `world' that will do the
  19.128 +following:
  19.129 +
  19.130 +\begin{itemize}
  19.131 +\item Build Xen.
  19.132 +\item Build the control tools, including \xend.
  19.133 +\item Download (if necessary) and unpack the Linux 2.6 source code,
  19.134 +  and patch it for use with Xen.
  19.135 +\item Build a Linux kernel to use in domain 0 and a smaller
  19.136 +  unprivileged kernel, which can optionally be used for unprivileged
  19.137 +  virtual machines.
  19.138 +\end{itemize}
  19.139 +
  19.140 +After the build has completed you should have a top-level directory
  19.141 +called \path{dist/} in which all resulting targets will be placed; of
  19.142 +particular interest are the two kernels XenLinux kernel images, one
  19.143 +with a `-xen0' extension which contains hardware device drivers and
  19.144 +drivers for Xen's virtual devices, and one with a `-xenU' extension
  19.145 +that just contains the virtual ones. These are found in
  19.146 +\path{dist/install/boot/} along with the image for Xen itself and the
  19.147 +configuration files used during the build.
  19.148 +
  19.149 +The NetBSD port can be built using:
  19.150 +\begin{quote}
  19.151 +\begin{verbatim}
  19.152 +# make netbsd20
  19.153 +\end{verbatim}
  19.154 +\end{quote}
  19.155 +NetBSD port is built using a snapshot of the netbsd-2-0 cvs branch.
  19.156 +The snapshot is downloaded as part of the build process, if it is not
  19.157 +yet present in the \path{NETBSD\_SRC\_PATH} search path.  The build
  19.158 +process also downloads a toolchain which includes all the tools
  19.159 +necessary to build the NetBSD kernel under Linux.
  19.160 +
  19.161 +To customize further the set of kernels built you need to edit the
  19.162 +top-level Makefile. Look for the line:
  19.163 +
  19.164 +\begin{quote}
  19.165 +\begin{verbatim}
  19.166 +KERNELS ?= mk.linux-2.6-xen0 mk.linux-2.6-xenU
  19.167 +\end{verbatim}
  19.168 +\end{quote}
  19.169 +
  19.170 +You can edit this line to include any set of operating system kernels
  19.171 +which have configurations in the top-level \path{buildconfigs/}
  19.172 +directory, for example \path{mk.linux-2.4-xenU} to build a Linux 2.4
  19.173 +kernel containing only virtual device drivers.
  19.174 +
  19.175 +%% Inspect the Makefile if you want to see what goes on during a
  19.176 +%% build.  Building Xen and the tools is straightforward, but XenLinux
  19.177 +%% is more complicated.  The makefile needs a `pristine' Linux kernel
  19.178 +%% tree to which it will then add the Xen architecture files.  You can
  19.179 +%% tell the makefile the location of the appropriate Linux compressed
  19.180 +%% tar file by
  19.181 +%% setting the LINUX\_SRC environment variable, e.g. \\
  19.182 +%% \verb!# LINUX_SRC=/tmp/linux-2.6.11.tar.bz2 make world! \\ or by
  19.183 +%% placing the tar file somewhere in the search path of {\tt
  19.184 +%%   LINUX\_SRC\_PATH} which defaults to `{\tt .:..}'.  If the
  19.185 +%% makefile can't find a suitable kernel tar file it attempts to
  19.186 +%% download it from kernel.org (this won't work if you're behind a
  19.187 +%% firewall).
  19.188 +
  19.189 +%% After untaring the pristine kernel tree, the makefile uses the {\tt
  19.190 +%%   mkbuildtree} script to add the Xen patches to the kernel.
  19.191 +
  19.192 +
  19.193 +%% The procedure is similar to build the Linux 2.4 port: \\
  19.194 +%% \verb!# LINUX_SRC=/path/to/linux2.4/source make linux24!
  19.195 +
  19.196 +
  19.197 +%% \framebox{\parbox{5in}{
  19.198 +%%     {\bf Distro specific:} \\
  19.199 +%%     {\it Gentoo} --- if not using udev (most installations,
  19.200 +%%     currently), you'll need to enable devfs and devfs mount at boot
  19.201 +%%     time in the xen0 config.  }}
  19.202 +
  19.203 +\subsection{Custom XenLinux Builds}
  19.204 +
  19.205 +% If you have an SMP machine you may wish to give the {\tt '-j4'}
  19.206 +% argument to make to get a parallel build.
  19.207 +
  19.208 +If you wish to build a customized XenLinux kernel (e.g. to support
  19.209 +additional devices or enable distribution-required features), you can
  19.210 +use the standard Linux configuration mechanisms, specifying that the
  19.211 +architecture being built for is \path{xen}, e.g:
  19.212 +\begin{quote}
  19.213 +\begin{verbatim}
  19.214 +# cd linux-2.6.11-xen0
  19.215 +# make ARCH=xen xconfig
  19.216 +# cd ..
  19.217 +# make
  19.218 +\end{verbatim}
  19.219 +\end{quote}
  19.220 +
  19.221 +You can also copy an existing Linux configuration (\path{.config})
  19.222 +into \path{linux-2.6.11-xen0} and execute:
  19.223 +\begin{quote}
  19.224 +\begin{verbatim}
  19.225 +# make ARCH=xen oldconfig
  19.226 +\end{verbatim}
  19.227 +\end{quote}
  19.228 +
  19.229 +You may be prompted with some Xen-specific options; we advise
  19.230 +accepting the defaults for these options.
  19.231 +
  19.232 +Note that the only difference between the two types of Linux kernel
  19.233 +that are built is the configuration file used for each.  The `U'
  19.234 +suffixed (unprivileged) versions don't contain any of the physical
  19.235 +hardware device drivers, leading to a 30\% reduction in size; hence
  19.236 +you may prefer these for your non-privileged domains.  The `0'
  19.237 +suffixed privileged versions can be used to boot the system, as well
  19.238 +as in driver domains and unprivileged domains.
  19.239 +
  19.240 +\subsection{Installing the Binaries}
  19.241 +
  19.242 +The files produced by the build process are stored under the
  19.243 +\path{dist/install/} directory. To install them in their default
  19.244 +locations, do:
  19.245 +\begin{quote}
  19.246 +\begin{verbatim}
  19.247 +# make install
  19.248 +\end{verbatim}
  19.249 +\end{quote}
  19.250 +
  19.251 +Alternatively, users with special installation requirements may wish
  19.252 +to install them manually by copying the files to their appropriate
  19.253 +destinations.
  19.254 +
  19.255 +%% Files in \path{install/boot/} include:
  19.256 +%% \begin{itemize}
  19.257 +%% \item \path{install/boot/xen-2.0.gz} Link to the Xen 'kernel'
  19.258 +%% \item \path{install/boot/vmlinuz-2.6-xen0} Link to domain 0
  19.259 +%%   XenLinux kernel
  19.260 +%% \item \path{install/boot/vmlinuz-2.6-xenU} Link to unprivileged
  19.261 +%%   XenLinux kernel
  19.262 +%% \end{itemize}
  19.263 +
  19.264 +The \path{dist/install/boot} directory will also contain the config
  19.265 +files used for building the XenLinux kernels, and also versions of Xen
  19.266 +and XenLinux kernels that contain debug symbols (\path{xen-syms-2.0.6}
  19.267 +and \path{vmlinux-syms-2.6.11.11-xen0}) which are essential for
  19.268 +interpreting crash dumps.  Retain these files as the developers may
  19.269 +wish to see them if you post on the mailing list.
  19.270 +
  19.271 +
  19.272 +\section{Configuration}
  19.273 +\label{s:configure}
  19.274 +
  19.275 +Once you have built and installed the Xen distribution, it is simple
  19.276 +to prepare the machine for booting and running Xen.
  19.277 +
  19.278 +\subsection{GRUB Configuration}
  19.279 +
  19.280 +An entry should be added to \path{grub.conf} (often found under
  19.281 +\path{/boot/} or \path{/boot/grub/}) to allow Xen / XenLinux to boot.
  19.282 +This file is sometimes called \path{menu.lst}, depending on your
  19.283 +distribution.  The entry should look something like the following:
  19.284 +
  19.285 +{\small
  19.286 +\begin{verbatim}
  19.287 +title Xen 2.0 / XenLinux 2.6
  19.288 +  kernel /boot/xen-2.0.gz dom0_mem=131072
  19.289 +  module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro console=tty0
  19.290 +\end{verbatim}
  19.291 +}
  19.292 +
  19.293 +The kernel line tells GRUB where to find Xen itself and what boot
  19.294 +parameters should be passed to it (in this case, setting domain 0's
  19.295 +memory allocation in kilobytes and the settings for the serial port).
  19.296 +For more details on the various Xen boot parameters see
  19.297 +Section~\ref{s:xboot}.
  19.298 +
  19.299 +The module line of the configuration describes the location of the
  19.300 +XenLinux kernel that Xen should start and the parameters that should
  19.301 +be passed to it (these are standard Linux parameters, identifying the
  19.302 +root device and specifying it be initially mounted read only and
  19.303 +instructing that console output be sent to the screen).  Some
  19.304 +distributions such as SuSE do not require the \path{ro} parameter.
  19.305 +
  19.306 +%% \framebox{\parbox{5in}{
  19.307 +%%     {\bf Distro specific:} \\
  19.308 +%%     {\it SuSE} --- Omit the {\tt ro} option from the XenLinux
  19.309 +%%     kernel command line, since the partition won't be remounted rw
  19.310 +%%     during boot.  }}
  19.311 +
  19.312 +
  19.313 +If you want to use an initrd, just add another \path{module} line to
  19.314 +the configuration, as usual:
  19.315 +
  19.316 +{\small
  19.317 +\begin{verbatim}
  19.318 +  module /boot/my_initrd.gz
  19.319 +\end{verbatim}
  19.320 +}
  19.321 +
  19.322 +As always when installing a new kernel, it is recommended that you do
  19.323 +not delete existing menu options from \path{menu.lst} --- you may want
  19.324 +to boot your old Linux kernel in future, particularly if you have
  19.325 +problems.
  19.326 +
  19.327 +\subsection{Serial Console (optional)}
  19.328 +
  19.329 +%% kernel /boot/xen-2.0.gz dom0_mem=131072 com1=115200,8n1
  19.330 +%% module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro
  19.331 +
  19.332 +
  19.333 +In order to configure Xen serial console output, it is necessary to
  19.334 +add an boot option to your GRUB config; e.g.\ replace the above kernel
  19.335 +line with:
  19.336 +\begin{quote}
  19.337 +{\small
  19.338 +\begin{verbatim}
  19.339 +   kernel /boot/xen.gz dom0_mem=131072 com1=115200,8n1
  19.340 +\end{verbatim}}
  19.341 +\end{quote}
  19.342 +
  19.343 +This configures Xen to output on COM1 at 115,200 baud, 8 data bits, 1
  19.344 +stop bit and no parity. Modify these parameters for your set up.
  19.345 +
  19.346 +One can also configure XenLinux to share the serial console; to
  19.347 +achieve this append ``\path{console=ttyS0}'' to your module line.
  19.348 +
  19.349 +If you wish to be able to log in over the XenLinux serial console it
  19.350 +is necessary to add a line into \path{/etc/inittab}, just as per
  19.351 +regular Linux. Simply add the line:
  19.352 +\begin{quote} {\small {\tt c:2345:respawn:/sbin/mingetty ttyS0}}
  19.353 +\end{quote}
  19.354 +
  19.355 +and you should be able to log in. Note that to successfully log in as
  19.356 +root over the serial line will require adding \path{ttyS0} to
  19.357 +\path{/etc/securetty} in most modern distributions.
  19.358 +
  19.359 +\subsection{TLS Libraries}
  19.360 +
  19.361 +Users of the XenLinux 2.6 kernel should disable Thread Local Storage
  19.362 +(e.g.\ by doing a \path{mv /lib/tls /lib/tls.disabled}) before
  19.363 +attempting to run with a XenLinux kernel\footnote{If you boot without
  19.364 +  first disabling TLS, you will get a warning message during the boot
  19.365 +  process. In this case, simply perform the rename after the machine
  19.366 +  is up and then run \texttt{/sbin/ldconfig} to make it take effect.}.
  19.367 +You can always reenable it by restoring the directory to its original
  19.368 +location (i.e.\ \path{mv /lib/tls.disabled /lib/tls}).
  19.369 +
  19.370 +The reason for this is that the current TLS implementation uses
  19.371 +segmentation in a way that is not permissible under Xen.  If TLS is
  19.372 +not disabled, an emulation mode is used within Xen which reduces
  19.373 +performance substantially.
  19.374 +
  19.375 +We hope that this issue can be resolved by working with Linux
  19.376 +distribution vendors to implement a minor backward-compatible change
  19.377 +to the TLS library.
  19.378 +
  19.379 +
  19.380 +\section{Booting Xen}
  19.381 +
  19.382 +It should now be possible to restart the system and use Xen.  Reboot
  19.383 +as usual but choose the new Xen option when the Grub screen appears.
  19.384 +
  19.385 +What follows should look much like a conventional Linux boot.  The
  19.386 +first portion of the output comes from Xen itself, supplying low level
  19.387 +information about itself and the machine it is running on.  The
  19.388 +following portion of the output comes from XenLinux.
  19.389 +
  19.390 +You may see some errors during the XenLinux boot.  These are not
  19.391 +necessarily anything to worry about --- they may result from kernel
  19.392 +configuration differences between your XenLinux kernel and the one you
  19.393 +usually use.
  19.394 +
  19.395 +When the boot completes, you should be able to log into your system as
  19.396 +usual.  If you are unable to log in to your system running Xen, you
  19.397 +should still be able to reboot with your normal Linux kernel.
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/docs/src/user/introduction.tex	Tue Sep 20 09:43:46 2005 +0000
    20.3 @@ -0,0 +1,143 @@
    20.4 +\chapter{Introduction}
    20.5 +
    20.6 +
    20.7 +Xen is a \emph{paravirtualising} virtual machine monitor (VMM), or
    20.8 +`hypervisor', for the x86 processor architecture.  Xen can securely
    20.9 +execute multiple virtual machines on a single physical system with
   20.10 +close-to-native performance.  The virtual machine technology
   20.11 +facilitates enterprise-grade functionality, including:
   20.12 +
   20.13 +\begin{itemize}
   20.14 +\item Virtual machines with performance close to native hardware.
   20.15 +\item Live migration of running virtual machines between physical
   20.16 +  hosts.
   20.17 +\item Excellent hardware support (supports most Linux device drivers).
   20.18 +\item Sandboxed, re-startable device drivers.
   20.19 +\end{itemize}
   20.20 +
   20.21 +Paravirtualisation permits very high performance virtualisation, even
   20.22 +on architectures like x86 that are traditionally very hard to
   20.23 +virtualise.
   20.24 +
   20.25 +The drawback of this approach is that it requires operating systems to
   20.26 +be \emph{ported} to run on Xen.  Porting an OS to run on Xen is
   20.27 +similar to supporting a new hardware platform, however the process is
   20.28 +simplified because the paravirtual machine architecture is very
   20.29 +similar to the underlying native hardware. Even though operating
   20.30 +system kernels must explicitly support Xen, a key feature is that user
   20.31 +space applications and libraries \emph{do not} require modification.
   20.32 +
   20.33 +Xen support is available for increasingly many operating systems:
   20.34 +right now, Linux 2.4, Linux 2.6 and NetBSD are available for Xen 2.0.
   20.35 +A FreeBSD port is undergoing testing and will be incorporated into the
   20.36 +release soon. Other OS ports, including Plan 9, are in progress.  We
   20.37 +hope that that arch-xen patches will be incorporated into the
   20.38 +mainstream releases of these operating systems in due course (as has
   20.39 +already happened for NetBSD).
   20.40 +
   20.41 +Possible usage scenarios for Xen include:
   20.42 +
   20.43 +\begin{description}
   20.44 +\item [Kernel development.] Test and debug kernel modifications in a
   20.45 +  sandboxed virtual machine --- no need for a separate test machine.
   20.46 +\item [Multiple OS configurations.] Run multiple operating systems
   20.47 +  simultaneously, for instance for compatibility or QA purposes.
   20.48 +\item [Server consolidation.] Move multiple servers onto a single
   20.49 +  physical host with performance and fault isolation provided at
   20.50 +  virtual machine boundaries.
   20.51 +\item [Cluster computing.] Management at VM granularity provides more
   20.52 +  flexibility than separately managing each physical host, but better
   20.53 +  control and isolation than single-system image solutions,
   20.54 +  particularly by using live migration for load balancing.
   20.55 +\item [Hardware support for custom OSes.] Allow development of new
   20.56 +  OSes while benefiting from the wide-ranging hardware support of
   20.57 +  existing OSes such as Linux.
   20.58 +\end{description}
   20.59 +
   20.60 +
   20.61 +\section{Structure of a Xen-Based System}
   20.62 +
   20.63 +A Xen system has multiple layers, the lowest and most privileged of
   20.64 +which is Xen itself. 
   20.65 +
   20.66 +Xen in turn may host multiple \emph{guest} operating systems, each of
   20.67 +which is executed within a secure virtual machine (in Xen terminology,
   20.68 +a \emph{domain}). Domains are scheduled by Xen to make effective use
   20.69 +of the available physical CPUs.  Each guest OS manages its own
   20.70 +applications, which includes responsibility for scheduling each
   20.71 +application within the time allotted to the VM by Xen.
   20.72 +
   20.73 +The first domain, \emph{domain 0}, is created automatically when the
   20.74 +system boots and has special management privileges. Domain 0 builds
   20.75 +other domains and manages their virtual devices. It also performs
   20.76 +administrative tasks such as suspending, resuming and migrating other
   20.77 +virtual machines.
   20.78 +
   20.79 +Within domain 0, a process called \emph{xend} runs to manage the
   20.80 +system.  \Xend is responsible for managing virtual machines and
   20.81 +providing access to their consoles.  Commands are issued to \xend over
   20.82 +an HTTP interface, either from a command-line tool or from a web
   20.83 +browser.
   20.84 +
   20.85 +
   20.86 +\section{Hardware Support}
   20.87 +
   20.88 +Xen currently runs only on the x86 architecture, requiring a `P6' or
   20.89 +newer processor (e.g. Pentium Pro, Celeron, Pentium II, Pentium III,
   20.90 +Pentium IV, Xeon, AMD Athlon, AMD Duron).  Multiprocessor machines are
   20.91 +supported, and we also have basic support for HyperThreading (SMT),
   20.92 +although this remains a topic for ongoing research. A port
   20.93 +specifically for x86/64 is in progress, although Xen already runs on
   20.94 +such systems in 32-bit legacy mode. In addition a port to the IA64
   20.95 +architecture is approaching completion. We hope to add other
   20.96 +architectures such as PPC and ARM in due course.
   20.97 +
   20.98 +Xen can currently use up to 4GB of memory.  It is possible for x86
   20.99 +machines to address up to 64GB of physical memory but there are no
  20.100 +current plans to support these systems: The x86/64 port is the planned
  20.101 +route to supporting larger memory sizes.
  20.102 +
  20.103 +Xen offloads most of the hardware support issues to the guest OS
  20.104 +running in Domain~0.  Xen itself contains only the code required to
  20.105 +detect and start secondary processors, set up interrupt routing, and
  20.106 +perform PCI bus enumeration.  Device drivers run within a privileged
  20.107 +guest OS rather than within Xen itself. This approach provides
  20.108 +compatibility with the majority of device hardware supported by Linux.
  20.109 +The default XenLinux build contains support for relatively modern
  20.110 +server-class network and disk hardware, but you can add support for
  20.111 +other hardware by configuring your XenLinux kernel in the normal way.
  20.112 +
  20.113 +
  20.114 +\section{History}
  20.115 +
  20.116 +Xen was originally developed by the Systems Research Group at the
  20.117 +University of Cambridge Computer Laboratory as part of the XenoServers
  20.118 +project, funded by the UK-EPSRC.
  20.119 +
  20.120 +XenoServers aim to provide a `public infrastructure for global
  20.121 +distributed computing', and Xen plays a key part in that, allowing us
  20.122 +to efficiently partition a single machine to enable multiple
  20.123 +independent clients to run their operating systems and applications in
  20.124 +an environment providing protection, resource isolation and
  20.125 +accounting.  The project web page contains further information along
  20.126 +with pointers to papers and technical reports:
  20.127 +\path{http://www.cl.cam.ac.uk/xeno}
  20.128 +
  20.129 +Xen has since grown into a fully-fledged project in its own right,
  20.130 +enabling us to investigate interesting research issues regarding the
  20.131 +best techniques for virtualising resources such as the CPU, memory,
  20.132 +disk and network.  The project has been bolstered by support from
  20.133 +Intel Research Cambridge, and HP Labs, who are now working closely
  20.134 +with us.
  20.135 +
  20.136 +Xen was first described in a paper presented at SOSP in
  20.137 +2003\footnote{\tt
  20.138 +  http://www.cl.cam.ac.uk/netos/papers/2003-xensosp.pdf}, and the
  20.139 +first public release (1.0) was made that October.  Since then, Xen has
  20.140 +significantly matured and is now used in production scenarios on many
  20.141 +sites.
  20.142 +
  20.143 +Xen 2.0 features greatly enhanced hardware support, configuration
  20.144 +flexibility, usability and a larger complement of supported operating
  20.145 +systems. This latest release takes Xen a step closer to becoming the
  20.146 +definitive open source solution for virtualisation.
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/docs/src/user/redhat.tex	Tue Sep 20 09:43:46 2005 +0000
    21.3 @@ -0,0 +1,61 @@
    21.4 +\chapter{Installing Xen / XenLinux on Red~Hat or Fedora Core}
    21.5 +
    21.6 +When using Xen / XenLinux on a standard Linux distribution there are a
    21.7 +couple of things to watch out for:
    21.8 +
    21.9 +Note that, because domains greater than 0 don't have any privileged
   21.10 +access at all, certain commands in the default boot sequence will fail
   21.11 +e.g.\ attempts to update the hwclock, change the console font, update
   21.12 +the keytable map, start apmd (power management), or gpm (mouse
   21.13 +cursor).  Either ignore the errors (they should be harmless), or
   21.14 +remove them from the startup scripts.  Deleting the following links
   21.15 +are a good start: {\path{S24pcmcia}}, {\path{S09isdn}},
   21.16 +{\path{S17keytable}}, {\path{S26apmd}}, {\path{S85gpm}}.
   21.17 +
   21.18 +If you want to use a single root file system that works cleanly for
   21.19 +both domain~0 and unprivileged domains, a useful trick is to use
   21.20 +different `init' run levels. For example, use run level 3 for
   21.21 +domain~0, and run level 4 for other domains. This enables different
   21.22 +startup scripts to be run in depending on the run level number passed
   21.23 +on the kernel command line.
   21.24 +
   21.25 +If using NFS root files systems mounted either from an external server
   21.26 +or from domain0 there are a couple of other gotchas.  The default
   21.27 +{\path{/etc/sysconfig/iptables}} rules block NFS, so part way through
   21.28 +the boot sequence things will suddenly go dead.
   21.29 +
   21.30 +If you're planning on having a separate NFS {\path{/usr}} partition,
   21.31 +the RH9 boot scripts don't make life easy - they attempt to mount NFS
   21.32 +file systems way to late in the boot process. The easiest way I found
   21.33 +to do this was to have a {\path{/linuxrc}} script run ahead of
   21.34 +{\path{/sbin/init}} that mounts {\path{/usr}}:
   21.35 +
   21.36 +\begin{quote}
   21.37 +  \begin{small}\begin{verbatim}
   21.38 + #!/bin/bash
   21.39 + /sbin/ipconfig lo 127.0.0.1
   21.40 + /sbin/portmap
   21.41 + /bin/mount /usr
   21.42 + exec /sbin/init "$@" <>/dev/console 2>&1
   21.43 +\end{verbatim}\end{small}
   21.44 +\end{quote}
   21.45 +
   21.46 +%% $ XXX SMH: font lock fix :-)
   21.47 +
   21.48 +The one slight complication with the above is that
   21.49 +{\path{/sbin/portmap}} is dynamically linked against
   21.50 +{\path{/usr/lib/libwrap.so.0}} Since this is in {\path{/usr}}, it
   21.51 +won't work. This can be solved by copying the file (and link) below
   21.52 +the {\path{/usr}} mount point, and just let the file be `covered' when
   21.53 +the mount happens.
   21.54 +
   21.55 +In some installations, where a shared read-only {\path{/usr}} is being
   21.56 +used, it may be desirable to move other large directories over into
   21.57 +the read-only {\path{/usr}}. For example, you might replace
   21.58 +{\path{/bin}}, {\path{/lib}} and {\path{/sbin}} with links into
   21.59 +{\path{/usr/root/bin}}, {\path{/usr/root/lib}} and
   21.60 +{\path{/usr/root/sbin}} respectively. This creates other problems for
   21.61 +running the {\path{/linuxrc}} script, requiring bash, portmap, mount,
   21.62 +ifconfig, and a handful of other shared libraries to be copied below
   21.63 +the mount point --- a simple statically-linked C program would solve
   21.64 +this problem.
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/docs/src/user/start_addl_dom.tex	Tue Sep 20 09:43:46 2005 +0000
    22.3 @@ -0,0 +1,172 @@
    22.4 +\chapter{Starting Additional Domains}
    22.5 +
    22.6 +The first step in creating a new domain is to prepare a root
    22.7 +filesystem for it to boot from.  Typically, this might be stored in a
    22.8 +normal partition, an LVM or other volume manager partition, a disk
    22.9 +file or on an NFS server.  A simple way to do this is simply to boot
   22.10 +from your standard OS install CD and install the distribution into
   22.11 +another partition on your hard drive.
   22.12 +
   22.13 +To start the \xend\ control daemon, type
   22.14 +\begin{quote}
   22.15 +  \verb!# xend start!
   22.16 +\end{quote}
   22.17 +
   22.18 +If you wish the daemon to start automatically, see the instructions in
   22.19 +Section~\ref{s:xend}. Once the daemon is running, you can use the
   22.20 +\path{xm} tool to monitor and maintain the domains running on your
   22.21 +system. This chapter provides only a brief tutorial. We provide full
   22.22 +details of the \path{xm} tool in the next chapter.
   22.23 +
   22.24 +% \section{From the web interface}
   22.25 +%
   22.26 +% Boot the Xen machine and start Xensv (see Chapter~\ref{cha:xensv}
   22.27 +% for more details) using the command: \\
   22.28 +% \verb_# xensv start_ \\
   22.29 +% This will also start Xend (see Chapter~\ref{cha:xend} for more
   22.30 +% information).
   22.31 +%
   22.32 +% The domain management interface will then be available at {\tt
   22.33 +%   http://your\_machine:8080/}.  This provides a user friendly wizard
   22.34 +% for starting domains and functions for managing running domains.
   22.35 +%
   22.36 +% \section{From the command line}
   22.37 +
   22.38 +
   22.39 +\section{Creating a Domain Configuration File}
   22.40 +
   22.41 +Before you can start an additional domain, you must create a
   22.42 +configuration file. We provide two example files which you can use as
   22.43 +a starting point:
   22.44 +\begin{itemize}
   22.45 +\item \path{/etc/xen/xmexample1} is a simple template configuration
   22.46 +  file for describing a single VM.
   22.47 +
   22.48 +\item \path{/etc/xen/xmexample2} file is a template description that
   22.49 +  is intended to be reused for multiple virtual machines.  Setting the
   22.50 +  value of the \path{vmid} variable on the \path{xm} command line
   22.51 +  fills in parts of this template.
   22.52 +\end{itemize}
   22.53 +
   22.54 +Copy one of these files and edit it as appropriate.  Typical values
   22.55 +you may wish to edit include:
   22.56 +
   22.57 +\begin{quote}
   22.58 +\begin{description}
   22.59 +\item[kernel] Set this to the path of the kernel you compiled for use
   22.60 +  with Xen (e.g.\ \path{kernel = `/boot/vmlinuz-2.6-xenU'})
   22.61 +\item[memory] Set this to the size of the domain's memory in megabytes
   22.62 +  (e.g.\ \path{memory = 64})
   22.63 +\item[disk] Set the first entry in this list to calculate the offset
   22.64 +  of the domain's root partition, based on the domain ID.  Set the
   22.65 +  second to the location of \path{/usr} if you are sharing it between
   22.66 +  domains (e.g.\ \path{disk = [`phy:your\_hard\_drive\%d,sda1,w' \%
   22.67 +    (base\_partition\_number + vmid),
   22.68 +    `phy:your\_usr\_partition,sda6,r' ]}
   22.69 +\item[dhcp] Uncomment the dhcp variable, so that the domain will
   22.70 +  receive its IP address from a DHCP server (e.g.\ \path{dhcp=`dhcp'})
   22.71 +\end{description}
   22.72 +\end{quote}
   22.73 +
   22.74 +You may also want to edit the {\bf vif} variable in order to choose
   22.75 +the MAC address of the virtual ethernet interface yourself.  For
   22.76 +example:
   22.77 +\begin{quote}
   22.78 +\verb_vif = [`mac=00:06:AA:F6:BB:B3']_
   22.79 +\end{quote}
   22.80 +If you do not set this variable, \xend\ will automatically generate a
   22.81 +random MAC address from an unused range.
   22.82 +
   22.83 +
   22.84 +\section{Booting the Domain}
   22.85 +
   22.86 +The \path{xm} tool provides a variety of commands for managing
   22.87 +domains.  Use the \path{create} command to start new domains. Assuming
   22.88 +you've created a configuration file \path{myvmconf} based around
   22.89 +\path{/etc/xen/xmexample2}, to start a domain with virtual machine
   22.90 +ID~1 you should type:
   22.91 +
   22.92 +\begin{quote}
   22.93 +\begin{verbatim}
   22.94 +# xm create -c myvmconf vmid=1
   22.95 +\end{verbatim}
   22.96 +\end{quote}
   22.97 +
   22.98 +The \path{-c} switch causes \path{xm} to turn into the domain's
   22.99 +console after creation.  The \path{vmid=1} sets the \path{vmid}
  22.100 +variable used in the \path{myvmconf} file.
  22.101 +
  22.102 +You should see the console boot messages from the new domain appearing
  22.103 +in the terminal in which you typed the command, culminating in a login
  22.104 +prompt.
  22.105 +
  22.106 +
  22.107 +\section{Example: ttylinux}
  22.108 +
  22.109 +Ttylinux is a very small Linux distribution, designed to require very
  22.110 +few resources.  We will use it as a concrete example of how to start a
  22.111 +Xen domain.  Most users will probably want to install a full-featured
  22.112 +distribution once they have mastered the basics\footnote{ttylinux is
  22.113 +  maintained by Pascal Schmidt. You can download source packages from
  22.114 +  the distribution's home page: {\tt
  22.115 +    http://www.minimalinux.org/ttylinux/}}.
  22.116 +
  22.117 +\begin{enumerate}
  22.118 +\item Download and extract the ttylinux disk image from the Files
  22.119 +  section of the project's SourceForge site (see
  22.120 +  \path{http://sf.net/projects/xen/}).
  22.121 +\item Create a configuration file like the following:
  22.122 +\begin{verbatim}
  22.123 +kernel = "/boot/vmlinuz-2.6-xenU"
  22.124 +memory = 64
  22.125 +name = "ttylinux"
  22.126 +nics = 1
  22.127 +ip = "1.2.3.4"
  22.128 +disk = ['file:/path/to/ttylinux/rootfs,sda1,w']
  22.129 +root = "/dev/sda1 ro"
  22.130 +\end{verbatim}
  22.131 +\item Now start the domain and connect to its console:
  22.132 +\begin{verbatim}
  22.133 +xm create configfile -c
  22.134 +\end{verbatim}
  22.135 +\item Login as root, password root.
  22.136 +\end{enumerate}
  22.137 +
  22.138 +
  22.139 +\section{Starting / Stopping Domains Automatically}
  22.140 +
  22.141 +It is possible to have certain domains start automatically at boot
  22.142 +time and to have dom0 wait for all running domains to shutdown before
  22.143 +it shuts down the system.
  22.144 +
  22.145 +To specify a domain is to start at boot-time, place its configuration
  22.146 +file (or a link to it) under \path{/etc/xen/auto/}.
  22.147 +
  22.148 +A Sys-V style init script for Red Hat and LSB-compliant systems is
  22.149 +provided and will be automatically copied to \path{/etc/init.d/}
  22.150 +during install.  You can then enable it in the appropriate way for
  22.151 +your distribution.
  22.152 +
  22.153 +For instance, on Red Hat:
  22.154 +
  22.155 +\begin{quote}
  22.156 +  \verb_# chkconfig --add xendomains_
  22.157 +\end{quote}
  22.158 +
  22.159 +By default, this will start the boot-time domains in runlevels 3, 4
  22.160 +and 5.
  22.161 +
  22.162 +You can also use the \path{service} command to run this script
  22.163 +manually, e.g:
  22.164 +
  22.165 +\begin{quote}
  22.166 +  \verb_# service xendomains start_
  22.167 +
  22.168 +  Starts all the domains with config files under /etc/xen/auto/.
  22.169 +\end{quote}
  22.170 +
  22.171 +\begin{quote}
  22.172 +  \verb_# service xendomains stop_
  22.173 +
  22.174 +  Shuts down ALL running Xen domains.
  22.175 +\end{quote}
    23.1 --- a/tools/firmware/acpi/acpi_madt.c	Tue Sep 20 09:43:29 2005 +0000
    23.2 +++ b/tools/firmware/acpi/acpi_madt.c	Tue Sep 20 09:43:46 2005 +0000
    23.3 @@ -37,44 +37,7 @@ ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE Mad
    23.4  				ACPI_LOCAL_APIC_ADDRESS,
    23.5  				ACPI_MULTIPLE_APIC_FLAGS,
    23.6  		},
    23.7 -		//
    23.8 -		// LOCAL APIC Entries for 4 processors.
    23.9 -		//
   23.10 -		{
   23.11 -				{
   23.12 -						ACPI_PROCESSOR_LOCAL_APIC,                          
   23.13 -						sizeof (ACPI_LOCAL_APIC_STRUCTURE),     
   23.14 -						0x00,                                                     
   23.15 -						0x00,                                                     
   23.16 -						0x00000001,                                               
   23.17 -				},
   23.18 -
   23.19 -				{
   23.20 -						ACPI_PROCESSOR_LOCAL_APIC,                          
   23.21 -						sizeof (ACPI_LOCAL_APIC_STRUCTURE),     
   23.22 -						0x01,                                                     
   23.23 -						0x00,                                                     
   23.24 -						0x00000000
   23.25 -				},                                               
   23.26 -
   23.27 -				{
   23.28 -						ACPI_PROCESSOR_LOCAL_APIC,                          
   23.29 -						sizeof (ACPI_LOCAL_APIC_STRUCTURE),     
   23.30 -						0x02,                                                     
   23.31 -						0x00,                                                     
   23.32 -						0x00000000
   23.33 -				},                                               
   23.34 -
   23.35 -				{
   23.36 -						ACPI_PROCESSOR_LOCAL_APIC,                          
   23.37 -						sizeof (ACPI_LOCAL_APIC_STRUCTURE),     
   23.38 -						0x03,                                                     
   23.39 -						0x00,                                                     
   23.40 -						0x00000000
   23.41 -				}
   23.42 -		}
   23.43 -		,
   23.44 -
   23.45 +	
   23.46  		//
   23.47  		// IO APIC
   23.48  		// 
   23.49 @@ -87,5 +50,19 @@ ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE Mad
   23.50  						ACPI_IO_APIC_ADDRESS_1,
   23.51  						0x0000
   23.52  				}
   23.53 +		},
   23.54 +
   23.55 +		//
   23.56 +		// LOCAL APIC Entries for up to 32 processors.
   23.57 +		//
   23.58 +		{
   23.59 +				{
   23.60 +						ACPI_PROCESSOR_LOCAL_APIC,
   23.61 +						sizeof (ACPI_LOCAL_APIC_STRUCTURE),
   23.62 +						0x00,
   23.63 +						0x00,
   23.64 +						0x00000001,
   23.65 +				}
   23.66 +
   23.67  		}
   23.68  };
    24.1 --- a/tools/firmware/acpi/acpi_madt.h	Tue Sep 20 09:43:29 2005 +0000
    24.2 +++ b/tools/firmware/acpi/acpi_madt.h	Tue Sep 20 09:43:46 2005 +0000
    24.3 @@ -35,9 +35,9 @@
    24.4  //
    24.5  #pragma pack (1)
    24.6  typedef struct {
    24.7 -  ACPI_2_0_MADT   				Header;
    24.8 -  ACPI_LOCAL_APIC_STRUCTURE     LocalApic[4];
    24.9 -  ACPI_IO_APIC_STRUCTURE        IoApic[1];
   24.10 +	ACPI_2_0_MADT			Header;
   24.11 +	ACPI_IO_APIC_STRUCTURE		IoApic[1];
   24.12 +	ACPI_LOCAL_APIC_STRUCTURE	LocalApic[32];
   24.13  } ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE;
   24.14  #pragma pack ()
   24.15  
    25.1 --- a/tools/firmware/vmxassist/Makefile	Tue Sep 20 09:43:29 2005 +0000
    25.2 +++ b/tools/firmware/vmxassist/Makefile	Tue Sep 20 09:43:46 2005 +0000
    25.3 @@ -41,9 +41,9 @@ OBJECTS = head.o trap.o vm86.o setup.o u
    25.4  
    25.5  all: vmxloader
    25.6  
    25.7 -vmxloader: roms.h vmxloader.c acpi.h
    25.8 -	${CC} ${CFLAGS} ${DEFINES} -c vmxloader.c
    25.9 -	$(CC) -o vmxloader.tmp -m32 -nostdlib -Wl,-N -Wl,-Ttext -Wl,0x100000 vmxloader.o
   25.10 +vmxloader: roms.h vmxloader.c acpi.h acpi_madt.c
   25.11 +	${CC} ${CFLAGS} ${DEFINES} -c vmxloader.c -c acpi_madt.c
   25.12 +	$(CC) -o vmxloader.tmp -m32 -nostdlib -Wl,-N -Wl,-Ttext -Wl,0x100000 vmxloader.o acpi_madt.o
   25.13  	objcopy --change-addresses=0xC0000000 vmxloader.tmp vmxloader
   25.14  	rm -f vmxloader.tmp
   25.15  
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/tools/firmware/vmxassist/acpi_madt.c	Tue Sep 20 09:43:46 2005 +0000
    26.3 @@ -0,0 +1,145 @@
    26.4 +/*
    26.5 + * acpi_madt.c: Update ACPI MADT table for multiple processor guest.
    26.6 + *
    26.7 + * Yu Ke, ke.yu@intel.com
    26.8 + * Copyright (c) 2005, Intel Corporation.
    26.9 + *
   26.10 + * This program is free software; you can redistribute it and/or modify it
   26.11 + * under the terms and conditions of the GNU General Public License,
   26.12 + * version 2, as published by the Free Software Foundation.
   26.13 + *
   26.14 + * This program is distributed in the hope it will be useful, but WITHOUT
   26.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   26.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   26.17 + * more details.
   26.18 + *
   26.19 + * You should have received a copy of the GNU General Public License along with
   26.20 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   26.21 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   26.22 + */
   26.23 +#include "../acpi/acpi2_0.h"
   26.24 +#include "../acpi/acpi_madt.h"
   26.25 +
   26.26 +#define NULL ((void*)0)
   26.27 +
   26.28 +extern int puts(const char *s);
   26.29 +
   26.30 +#define VCPU_MAGIC 0x76637075 /* "vcpu" */
   26.31 +
   26.32 +/* xc_vmx_builder wrote vcpu block at 0x9F800. Return it. */
   26.33 +static int 
   26.34 +get_vcpus(void)
   26.35 +{
   26.36 +	unsigned long *vcpus;
   26.37 +
   26.38 +	vcpus = (unsigned long *)0x9F800;
   26.39 +	if (vcpus[0] != VCPU_MAGIC) {
   26.40 +		puts("Bad vcpus magic, set vcpu number=1\n");
   26.41 +		return 1;
   26.42 +	}
   26.43 +
   26.44 +	return vcpus[1];
   26.45 +}
   26.46 +
   26.47 +static void *
   26.48 +acpi_madt_get_madt(unsigned char *acpi_start)
   26.49 +{
   26.50 +	ACPI_2_0_RSDP *rsdp=NULL;
   26.51 +	ACPI_2_0_RSDT *rsdt=NULL;
   26.52 +	ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt;
   26.53 +
   26.54 +	rsdp = (ACPI_2_0_RSDP *)(acpi_start + sizeof(ACPI_2_0_FACS));
   26.55 +	if (rsdp->Signature != ACPI_2_0_RSDP_SIGNATURE) {
   26.56 +		puts("Bad RSDP signature\n");
   26.57 +		return NULL;
   26.58 +	}
   26.59 +
   26.60 +	rsdt= (ACPI_2_0_RSDT *)
   26.61 +		(acpi_start + rsdp->RsdtAddress - ACPI_PHYSICAL_ADDRESS);
   26.62 +	if (rsdt->Header.Signature != ACPI_2_0_RSDT_SIGNATURE) {
   26.63 +		puts("Bad RSDT signature\n");
   26.64 +		return NULL;
   26.65 +	}
   26.66 +
   26.67 +	madt = (ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *)
   26.68 +		( acpi_start+ rsdt->Entry[1] - ACPI_PHYSICAL_ADDRESS);
   26.69 +	if (madt->Header.Header.Signature !=
   26.70 +	    ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE_SIGNATURE) {
   26.71 +		puts("Bad MADT signature \n");
   26.72 +		return NULL;
   26.73 +	}
   26.74 +
   26.75 +	return madt;
   26.76 +}
   26.77 +
   26.78 +static void 
   26.79 +set_checksum(void *start, int checksum_offset, int len)
   26.80 +{
   26.81 +	unsigned char sum = 0;  
   26.82 +	unsigned char *ptr;
   26.83 +
   26.84 +	ptr = start;
   26.85 +	ptr[checksum_offset] = 0;
   26.86 +	while (len--)
   26.87 +		sum += *ptr++;
   26.88 +
   26.89 +	ptr = start;
   26.90 +	ptr[checksum_offset] = -sum;
   26.91 +}
   26.92 +
   26.93 +static int 
   26.94 +acpi_madt_set_local_apics(
   26.95 +	int nr_vcpu, 
   26.96 +	ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt)
   26.97 +{
   26.98 +	int i;
   26.99 +
  26.100 +	if ((nr_vcpu > MAX_VIRT_CPUS) || (nr_vcpu < 0) || !madt)
  26.101 +		return -1;
  26.102 +
  26.103 +	for (i = 0; i < nr_vcpu; i++) {
  26.104 +		madt->LocalApic[i].Type            = ACPI_PROCESSOR_LOCAL_APIC;
  26.105 +		madt->LocalApic[i].Length          = sizeof (ACPI_LOCAL_APIC_STRUCTURE);
  26.106 +		madt->LocalApic[i].AcpiProcessorId = i;
  26.107 +		madt->LocalApic[i].ApicId          = i;
  26.108 +		madt->LocalApic[i].Flags           = 1; 
  26.109 +	}
  26.110 +
  26.111 +	madt->Header.Header.Length =
  26.112 +		sizeof(ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE) - 
  26.113 +		(MAX_VIRT_CPUS - nr_vcpu)* sizeof(ACPI_LOCAL_APIC_STRUCTURE);
  26.114 +
  26.115 +	return 0;                            
  26.116 +}
  26.117 +
  26.118 +#define FIELD_OFFSET(TYPE,Field) ((unsigned int)(&(((TYPE *) 0)->Field)))
  26.119 +
  26.120 +int acpi_madt_update(unsigned char *acpi_start)
  26.121 +{
  26.122 +	int rc;
  26.123 +	ACPI_MULTIPLE_APIC_DESCRIPTION_TABLE *madt;
  26.124 +
  26.125 +	madt = acpi_madt_get_madt(acpi_start);
  26.126 +	if (!madt)
  26.127 +		return -1;
  26.128 +
  26.129 +	rc = acpi_madt_set_local_apics(get_vcpus(), madt);
  26.130 +	if (rc != 0)
  26.131 +		return rc;
  26.132 +
  26.133 +	set_checksum(
  26.134 +		madt, FIELD_OFFSET(ACPI_TABLE_HEADER, Checksum),
  26.135 +		madt->Header.Header.Length);
  26.136 +
  26.137 +	return 0;              
  26.138 +}
  26.139 +
  26.140 +/*
  26.141 + * Local variables:
  26.142 + *  c-file-style: "linux"
  26.143 + *  indent-tabs-mode: t
  26.144 + *  c-indent-level: 8
  26.145 + *  c-basic-offset: 8
  26.146 + *  tab-width: 8
  26.147 + * End:
  26.148 + */
    27.1 --- a/tools/firmware/vmxassist/vmxloader.c	Tue Sep 20 09:43:29 2005 +0000
    27.2 +++ b/tools/firmware/vmxassist/vmxloader.c	Tue Sep 20 09:43:46 2005 +0000
    27.3 @@ -27,6 +27,7 @@
    27.4  #ifdef _ACPI_
    27.5  #include "acpi.h"
    27.6  #include "../acpi/acpi2_0.h"  // for ACPI_PHYSICAL_ADDRESS
    27.7 +int acpi_madt_update(unsigned char* acpi_start);
    27.8  #endif
    27.9  
   27.10  
   27.11 @@ -110,7 +111,10 @@ main()
   27.12  	}
   27.13  #ifdef _ACPI_
   27.14  	puts("Loading ACPI ...\n");
   27.15 -	if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000 ){
   27.16 +
   27.17 +	acpi_madt_update(acpi);
   27.18 +
   27.19 +	if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000) {
   27.20  		/* make sure acpi table does not overlap rombios
   27.21   		 * currently acpi less than 8K will be OK.
   27.22  		 */
    28.1 --- a/tools/libxc/xc_vmx_build.c	Tue Sep 20 09:43:29 2005 +0000
    28.2 +++ b/tools/libxc/xc_vmx_build.c	Tue Sep 20 09:43:46 2005 +0000
    28.3 @@ -107,6 +107,33 @@ static void build_e820map(struct mem_map
    28.4      mem_mapp->nr_map = nr_map;
    28.5  }
    28.6  
    28.7 +/*
    28.8 + * Use E820 reserved memory 0x9F800 to pass number of vcpus to vmxloader
    28.9 + * vmxloader will use it to config ACPI MADT table
   28.10 + */
   28.11 +#define VCPU_MAGIC 0x76637075 /* "vcpu" */
   28.12 +static int 
   28.13 +set_nr_vcpus(int xc_handle, u32 dom, unsigned long *pfn_list, 
   28.14 +             struct domain_setup_info *dsi, unsigned long vcpus)
   28.15 +{
   28.16 +    char          *va_map;
   28.17 +    unsigned long *va_vcpus;
   28.18 +    
   28.19 +    va_map = xc_map_foreign_range(
   28.20 +        xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
   28.21 +        pfn_list[(0x9F000 - dsi->v_start) >> PAGE_SHIFT]);    
   28.22 +    if ( va_map == NULL )
   28.23 +        return -1;
   28.24 +    
   28.25 +    va_vcpus = (unsigned long *)(va_map + 0x800);
   28.26 +    *va_vcpus++ = VCPU_MAGIC;
   28.27 +    *va_vcpus++ = vcpus;
   28.28 +
   28.29 +    munmap(va_map, PAGE_SIZE);
   28.30 +
   28.31 +    return 0;
   28.32 +}
   28.33 +
   28.34  #ifdef __i386__
   28.35  static int zap_mmio_range(int xc_handle, u32 dom,
   28.36                            l2_pgentry_32_t *vl2tab,
   28.37 @@ -496,7 +523,8 @@ static int setup_guest(int xc_handle,
   28.38                                 MMU_MACHPHYS_UPDATE, count) )
   28.39              goto error_out;
   28.40      }
   28.41 -    
   28.42 +
   28.43 +    set_nr_vcpus(xc_handle, dom, page_array, &dsi, vcpus);
   28.44  
   28.45      if ((boot_paramsp = xc_map_foreign_range(
   28.46          xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
    29.1 --- a/tools/vtpm/Makefile	Tue Sep 20 09:43:29 2005 +0000
    29.2 +++ b/tools/vtpm/Makefile	Tue Sep 20 09:43:46 2005 +0000
    29.3 @@ -4,7 +4,7 @@ XEN_ROOT = ../..
    29.4  include $(XEN_ROOT)/tools/vtpm/Rules.mk
    29.5  
    29.6  # Dir name for emulator (as dom0 tpm driver)
    29.7 -TPM_EMULATOR_DIR = tpm_emulator-0.2
    29.8 +TPM_EMULATOR_DIR = tpm_emulator
    29.9  # Dir name for vtpm instance
   29.10  VTPM_DIR = vtpm
   29.11  
   29.12 @@ -13,7 +13,7 @@ TPM_EMULATOR_TARFILE = tpm_emulator-0.2b
   29.13  
   29.14  all: build
   29.15  
   29.16 -build: $(TPM_EMULATOR_TARFILE) extract patch build_sub
   29.17 +build: $(TPM_EMULATOR_DIR) $(VTPM_DIR) build_sub
   29.18  
   29.19  install: build
   29.20  	$(MAKE) -C $(TPM_EMULATOR_DIR) $@
   29.21 @@ -26,36 +26,32 @@ clean:
   29.22  	if [ -d $(VTPM_DIR) ]; \
   29.23  		then $(MAKE) -C $(VTPM_DIR) clean; \
   29.24  	fi
   29.25 +
   29.26 +mrproper:
   29.27 +	rm -f $(TPM_EMULATOR_TARFILE)
   29.28  	rm -rf $(TPM_EMULATOR_DIR)
   29.29  	rm -rf $(VTPM_DIR)
   29.30  
   29.31 -mrproper: clean
   29.32 -	rm -f $(TPM_EMULATOR_TARFILE)
   29.33 -
   29.34  # Download Swiss emulator
   29.35  $(TPM_EMULATOR_TARFILE):
   29.36  	wget http://download.berlios.de/tpm-emulator/$(TPM_EMULATOR_TARFILE)
   29.37  
   29.38  # Create vtpm and TPM emulator dirs
   29.39 -extract: $(TPM_EMULATOR_DIR)/README $(VTPM_DIR)/README
   29.40 -
   29.41 -$(TPM_EMULATOR_DIR)/README:
   29.42 -	-rm -rf $(TPM_EMULATOR_DIR)
   29.43 -	tar -xzf $(TPM_EMULATOR_TARFILE)
   29.44 -
   29.45 -$(VTPM_DIR)/README:
   29.46 -	-rm -rf $(VTPM_DIR)
   29.47 -	cp -r --preserve $(TPM_EMULATOR_DIR) $(VTPM_DIR)
   29.48 -
   29.49  # apply patches for 1) used as dom0 tpm driver 2) used as vtpm device instance
   29.50 -patch: $(TPM_EMULATOR_DIR)/Makefile $(VTPM_DIR)/Makefile
   29.51 -
   29.52 -$(TPM_EMULATOR_DIR)/Makefile: tpm_emulator.patch
   29.53 +$(TPM_EMULATOR_DIR): $(TPM_EMULATOR_TARFILE) 
   29.54 +	tar -xzf $(TPM_EMULATOR_TARFILE);  
   29.55 +	mv tpm_emulator-0.2 $(TPM_EMULATOR_DIR); 
   29.56 +	
   29.57  	-cd $(TPM_EMULATOR_DIR); \
   29.58 +	patch -p1 < ../tpm_emulator-0.2b-x86_64.patch; \
   29.59  	patch -p1 <../tpm_emulator.patch
   29.60  
   29.61 -$(VTPM_DIR)/Makefile: vtpm.patch
   29.62 +$(VTPM_DIR): $(TPM_EMULATOR_TARFILE)
   29.63 +	tar -xzf $(TPM_EMULATOR_TARFILE);  
   29.64 +	mv tpm_emulator-0.2 $(VTPM_DIR); 
   29.65 +
   29.66  	-cd $(VTPM_DIR); \
   29.67 +	patch -p1 < ../tpm_emulator-0.2b-x86_64.patch; \
   29.68  	patch -p1 <../vtpm.patch
   29.69  
   29.70  build_sub:
    30.1 --- a/tools/vtpm/README	Tue Sep 20 09:43:29 2005 +0000
    30.2 +++ b/tools/vtpm/README	Tue Sep 20 09:43:46 2005 +0000
    30.3 @@ -23,6 +23,7 @@ Requirements
    30.4  - xen-unstable 
    30.5  - IBM frontend/backend vtpm driver patch
    30.6  - vtpm_managerd
    30.7 +- GNU MP Big number library (GMP)
    30.8  
    30.9  vtpmd Flow (for vtpm_manager. vtpmd never run by default)
   30.10  ============================
    31.1 --- a/tools/vtpm/tpm_emulator.patch	Tue Sep 20 09:43:29 2005 +0000
    31.2 +++ b/tools/vtpm/tpm_emulator.patch	Tue Sep 20 09:43:46 2005 +0000
    31.3 @@ -1,12 +1,12 @@
    31.4 -diff -uprN orig/tpm_emulator-0.2/AUTHORS tpm_emulator-0.2/AUTHORS
    31.5 ---- orig/tpm_emulator-0.2/AUTHORS	2005-08-17 10:58:36.000000000 -0700
    31.6 -+++ tpm_emulator-0.2/AUTHORS	2005-08-17 10:55:52.000000000 -0700
    31.7 +diff -uprN orig/tpm_emulator-0.2-x86_64/AUTHORS tpm_emulator/AUTHORS
    31.8 +--- orig/tpm_emulator-0.2-x86_64/AUTHORS	2005-08-15 00:58:57.000000000 -0700
    31.9 ++++ tpm_emulator/AUTHORS	2005-09-14 20:27:22.000000000 -0700
   31.10  @@ -1 +1,2 @@
   31.11   Mario Strasser <mast@gmx.net>
   31.12  +INTEL Corp <>
   31.13 -diff -uprN orig/tpm_emulator-0.2/ChangeLog tpm_emulator-0.2/ChangeLog
   31.14 ---- orig/tpm_emulator-0.2/ChangeLog	2005-08-17 10:58:36.000000000 -0700
   31.15 -+++ tpm_emulator-0.2/ChangeLog	2005-08-17 10:55:52.000000000 -0700
   31.16 +diff -uprN orig/tpm_emulator-0.2-x86_64/ChangeLog tpm_emulator/ChangeLog
   31.17 +--- orig/tpm_emulator-0.2-x86_64/ChangeLog	2005-08-15 00:58:57.000000000 -0700
   31.18 ++++ tpm_emulator/ChangeLog	2005-09-14 20:27:22.000000000 -0700
   31.19  @@ -1,3 +1,7 @@
   31.20  +2005-08-16: INTEL Corp
   31.21  +	* Set default permissions to PCRs
   31.22 @@ -15,10 +15,29 @@ diff -uprN orig/tpm_emulator-0.2/ChangeL
   31.23   2005-08-15  Mario Strasser <mast@gmx.net>
   31.24   	* all: some typos corrected
   31.25   	* tpm_integrity.c: bug in TPM_Extend fixed
   31.26 -diff -uprN orig/tpm_emulator-0.2/Makefile tpm_emulator-0.2/Makefile
   31.27 ---- orig/tpm_emulator-0.2/Makefile	2005-08-17 10:58:36.000000000 -0700
   31.28 -+++ tpm_emulator-0.2/Makefile	2005-08-17 10:55:52.000000000 -0700
   31.29 -@@ -1,15 +1,19 @@
   31.30 +diff -uprN orig/tpm_emulator-0.2-x86_64/linux_module.h tpm_emulator/linux_module.h
   31.31 +--- orig/tpm_emulator-0.2-x86_64/linux_module.h	2005-09-15 19:21:14.844078720 -0700
   31.32 ++++ tpm_emulator/linux_module.h	2005-09-14 20:27:22.000000000 -0700
   31.33 +@@ -1,5 +1,6 @@
   31.34 + /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
   31.35 +  * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
   31.36 ++ * Copyright (C) 2005 INTEL Corp.
   31.37 +  *
   31.38 +  * This module is free software; you can redistribute it and/or modify
   31.39 +  * it under the terms of the GNU General Public License as published
   31.40 +@@ -35,7 +36,7 @@
   31.41 + #include "tpm_version.h"
   31.42 + 
   31.43 + #define TPM_DEVICE_MINOR	224
   31.44 +-#define TPM_DEVICE_NAME         "tpm"
   31.45 ++#define TPM_DEVICE_NAME         "tpm0"
   31.46 + #define TPM_MODULE_NAME 	"tpm_emulator"
   31.47 + 
   31.48 + /* debug and log output functions */
   31.49 +diff -uprN orig/tpm_emulator-0.2-x86_64/Makefile tpm_emulator/Makefile
   31.50 +--- orig/tpm_emulator-0.2-x86_64/Makefile	2005-09-15 19:21:14.845078568 -0700
   31.51 ++++ tpm_emulator/Makefile	2005-09-14 20:27:22.000000000 -0700
   31.52 +@@ -1,16 +1,20 @@
   31.53   # Software-Based Trusted Platform Module (TPM) Emulator for Linux
   31.54   # Copyright (C) 2004 Mario Strasser <mast@gmx.net>
   31.55  +# Copyright (C) 2005 INTEL Corp.
   31.56 @@ -33,6 +52,7 @@ diff -uprN orig/tpm_emulator-0.2/Makefil
   31.57  -KERNEL_BUILD   := /lib/modules/$(KERNEL_RELEASE)/build
   31.58  +KERNEL_BUILD   := $(XEN_ROOT)/linux-2.6.12-xen0
   31.59   MOD_SUBDIR     := misc
   31.60 + COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/)
   31.61   
   31.62   # module settings
   31.63  -MODULE_NAME    := tpm_emulator
   31.64 @@ -40,7 +60,7 @@ diff -uprN orig/tpm_emulator-0.2/Makefil
   31.65   VERSION_MAJOR  := 0
   31.66   VERSION_MINOR  := 2
   31.67   VERSION_BUILD  := $(shell date +"%s")
   31.68 -@@ -27,11 +30,9 @@ DIRS           := . crypto tpm 
   31.69 +@@ -34,11 +38,9 @@ DIRS           := . crypto tpm 
   31.70   SRCS           := $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.c))
   31.71   OBJS           := $(patsubst %.c, %.o, $(SRCS))
   31.72   SRCS           += $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.h))
   31.73 @@ -54,7 +74,7 @@ diff -uprN orig/tpm_emulator-0.2/Makefil
   31.74   
   31.75   EXTRA_CFLAGS   += -I$(src) -I$(src)/crypto -I$(src)/tpm 
   31.76   
   31.77 -@@ -42,23 +43,17 @@ all:	$(src)/crypto/gmp.h $(src)/crypto/l
   31.78 +@@ -49,23 +51,17 @@ all:	$(src)/crypto/gmp.h $(src)/crypto/l
   31.79   	@$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules
   31.80   
   31.81   install:
   31.82 @@ -84,9 +104,9 @@ diff -uprN orig/tpm_emulator-0.2/Makefil
   31.83   
   31.84   $(src)/crypto/libgmp.a:
   31.85   	test -f $(src)/crypto/libgmp.a || ln -s $(GMP_LIB) $(src)/crypto/libgmp.a
   31.86 -diff -uprN orig/tpm_emulator-0.2/README tpm_emulator-0.2/README
   31.87 ---- orig/tpm_emulator-0.2/README	2005-08-17 10:58:36.000000000 -0700
   31.88 -+++ tpm_emulator-0.2/README	2005-08-17 10:55:52.000000000 -0700
   31.89 +diff -uprN orig/tpm_emulator-0.2-x86_64/README tpm_emulator/README
   31.90 +--- orig/tpm_emulator-0.2-x86_64/README	2005-08-15 00:58:57.000000000 -0700
   31.91 ++++ tpm_emulator/README	2005-09-14 20:27:22.000000000 -0700
   31.92  @@ -13,7 +13,8 @@ $Id: README 8 2005-01-25 21:11:45Z jmoli
   31.93   Copyright
   31.94   --------------------------------------------------------------------------
   31.95 @@ -97,28 +117,9 @@ diff -uprN orig/tpm_emulator-0.2/README 
   31.96                 
   31.97   This program is free software; you can redistribute it and/or modify
   31.98   it under the terms of the GNU General Public License as published by
   31.99 -diff -uprN orig/tpm_emulator-0.2/linux_module.h tpm_emulator-0.2/linux_module.h
  31.100 ---- orig/tpm_emulator-0.2/linux_module.h	2005-08-17 10:58:36.000000000 -0700
  31.101 -+++ tpm_emulator-0.2/linux_module.h	2005-08-17 10:55:52.000000000 -0700
  31.102 -@@ -1,5 +1,6 @@
  31.103 - /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  31.104 -  * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  31.105 -+ * Copyright (C) 2005 INTEL Corp.
  31.106 -  *
  31.107 -  * This module is free software; you can redistribute it and/or modify
  31.108 -  * it under the terms of the GNU General Public License as published
  31.109 -@@ -33,7 +34,7 @@
  31.110 - #include "tpm_version.h"
  31.111 - 
  31.112 - #define TPM_DEVICE_MINOR	224
  31.113 --#define TPM_DEVICE_NAME         "tpm"
  31.114 -+#define TPM_DEVICE_NAME         "tpm0"
  31.115 - #define TPM_MODULE_NAME 	"tpm_emulator"
  31.116 - 
  31.117 - /* debug and log output functions */
  31.118 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_data.c tpm_emulator-0.2/tpm/tpm_data.c
  31.119 ---- orig/tpm_emulator-0.2/tpm/tpm_data.c	2005-08-17 10:58:36.000000000 -0700
  31.120 -+++ tpm_emulator-0.2/tpm/tpm_data.c	2005-08-17 10:55:52.000000000 -0700
  31.121 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c tpm_emulator/tpm/tpm_data.c
  31.122 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c	2005-09-15 19:21:14.847078264 -0700
  31.123 ++++ tpm_emulator/tpm/tpm_data.c	2005-09-14 20:27:22.000000000 -0700
  31.124  @@ -1,6 +1,7 @@
  31.125   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  31.126    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  31.127 @@ -139,13 +140,3 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  31.128       tpmData.permanent.data.pcrAttrib[i].pcrReset = TRUE;
  31.129     }
  31.130     /* set tick type */
  31.131 -diff -uprN orig/tpm_emulator-0.2/tpm_version.h tpm_emulator-0.2/tpm_version.h
  31.132 ---- orig/tpm_emulator-0.2/tpm_version.h	2005-08-17 10:58:36.000000000 -0700
  31.133 -+++ tpm_emulator-0.2/tpm_version.h	2005-08-17 10:55:53.000000000 -0700
  31.134 -@@ -2,5 +2,5 @@
  31.135 - #define _TPM_VERSION_H_
  31.136 - #define VERSION_MAJOR 0
  31.137 - #define VERSION_MINOR 2
  31.138 --#define VERSION_BUILD 1123950310
  31.139 -+#define VERSION_BUILD 1124301353
  31.140 - #endif /* _TPM_VERSION_H_ */
    32.1 --- a/tools/vtpm/vtpm.patch	Tue Sep 20 09:43:29 2005 +0000
    32.2 +++ b/tools/vtpm/vtpm.patch	Tue Sep 20 09:43:46 2005 +0000
    32.3 @@ -1,12 +1,12 @@
    32.4 -diff -uprN orig/tpm_emulator-0.2/AUTHORS vtpm/AUTHORS
    32.5 ---- orig/tpm_emulator-0.2/AUTHORS	2005-08-17 10:58:36.000000000 -0700
    32.6 -+++ vtpm/AUTHORS	2005-08-17 10:55:52.000000000 -0700
    32.7 +diff -uprN orig/tpm_emulator-0.2-x86_64/AUTHORS vtpm/AUTHORS
    32.8 +--- orig/tpm_emulator-0.2-x86_64/AUTHORS	2005-08-15 00:58:57.000000000 -0700
    32.9 ++++ vtpm/AUTHORS	2005-09-14 20:27:22.000000000 -0700
   32.10  @@ -1 +1,2 @@
   32.11   Mario Strasser <mast@gmx.net>
   32.12  +INTEL Corp <>
   32.13 -diff -uprN orig/tpm_emulator-0.2/ChangeLog vtpm/ChangeLog
   32.14 ---- orig/tpm_emulator-0.2/ChangeLog	2005-08-17 10:58:36.000000000 -0700
   32.15 -+++ vtpm/ChangeLog	2005-08-17 10:55:52.000000000 -0700
   32.16 +diff -uprN orig/tpm_emulator-0.2-x86_64/ChangeLog vtpm/ChangeLog
   32.17 +--- orig/tpm_emulator-0.2-x86_64/ChangeLog	2005-08-15 00:58:57.000000000 -0700
   32.18 ++++ vtpm/ChangeLog	2005-09-14 20:27:22.000000000 -0700
   32.19  @@ -1,3 +1,7 @@
   32.20  +2005-08-16 Intel Corp
   32.21  +	Moved module out of kernel to run as a ring 3 app
   32.22 @@ -15,115 +15,9 @@ diff -uprN orig/tpm_emulator-0.2/ChangeL
   32.23   2005-08-15  Mario Strasser <mast@gmx.net>
   32.24   	* all: some typos corrected
   32.25   	* tpm_integrity.c: bug in TPM_Extend fixed
   32.26 -diff -uprN orig/tpm_emulator-0.2/Makefile vtpm/Makefile
   32.27 ---- orig/tpm_emulator-0.2/Makefile	2005-08-17 10:58:36.000000000 -0700
   32.28 -+++ vtpm/Makefile	2005-08-17 10:55:52.000000000 -0700
   32.29 -@@ -1,21 +1,29 @@
   32.30 - # Software-Based Trusted Platform Module (TPM) Emulator for Linux
   32.31 - # Copyright (C) 2004 Mario Strasser <mast@gmx.net>
   32.32 -+# Copyright (C) 2005 INTEL Corp.
   32.33 - #
   32.34 - # $Id: Makefile 10 2005-04-26 20:59:50Z mast $
   32.35 - 
   32.36 --# kernel settings
   32.37 --KERNEL_RELEASE := $(shell uname -r)
   32.38 --KERNEL_BUILD   := /lib/modules/$(KERNEL_RELEASE)/build
   32.39 --MOD_SUBDIR     := misc
   32.40 --
   32.41 - # module settings
   32.42 --MODULE_NAME    := tpm_emulator
   32.43 -+BIN            := vtpmd
   32.44 - VERSION_MAJOR  := 0
   32.45 - VERSION_MINOR  := 2
   32.46 - VERSION_BUILD  := $(shell date +"%s")
   32.47 - 
   32.48 --# enable/disable DEBUG messages
   32.49 --EXTRA_CFLAGS   += -DDEBUG -g  
   32.50 -+# Installation program and options
   32.51 -+INSTALL         = install
   32.52 -+INSTALL_PROG    = $(INSTALL) -m0755
   32.53 -+INSTALL_DIR     = $(INSTALL) -d -m0755
   32.54 -+
   32.55 -+# Xen tools installation directory
   32.56 -+TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
   32.57 -+
   32.58 -+CC      := gcc
   32.59 -+CFLAGS  += -g -Wall $(INCLUDE) -DDEBUG
   32.60 -+CFLAGS  += -I. -Itpm
   32.61 -+
   32.62 -+# Is the simulator running in it's own vm?
   32.63 -+#CFLAGS += -DVTPM_MULTI_VM
   32.64 - 
   32.65 - # GNU MP configuration
   32.66 - GMP_LIB        := /usr/lib/libgmp.a
   32.67 -@@ -27,38 +35,31 @@ DIRS           := . crypto tpm 
   32.68 - SRCS           := $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.c))
   32.69 - OBJS           := $(patsubst %.c, %.o, $(SRCS))
   32.70 - SRCS           += $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.h))
   32.71 --DISTSRC        := ./README ./AUTHORS ./ChangeLog ./Makefile $(SRCS)
   32.72 --DISTDIR        := tpm_emulator-$(VERSION_MAJOR).$(VERSION_MINOR)
   32.73 - 
   32.74 --obj-m               := $(MODULE_NAME).o
   32.75 --$(MODULE_NAME)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
   32.76 -+obj-m               := $(BIN)
   32.77 -+$(BIN)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
   32.78 - 
   32.79 - EXTRA_CFLAGS   += -I$(src) -I$(src)/crypto -I$(src)/tpm 
   32.80 - 
   32.81 - # do not print "Entering directory ..."
   32.82 - MAKEFLAGS      += --no-print-directory
   32.83 - 
   32.84 --all:	$(src)/crypto/gmp.h $(src)/crypto/libgmp.a version
   32.85 --	@$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules
   32.86 -+all: $(BIN)
   32.87 -+
   32.88 -+$(BIN):	$(src)/crypto/gmp.h $(src)/crypto/libgmp.a version $(SRCS) $(OBJS)
   32.89 -+	$(CC) $(CFLAGS) $(OBJS) $(src)/crypto/libgmp.a -o $(BIN)
   32.90 -+
   32.91 -+%.o: %.c
   32.92 -+	$(CC) $(CFLAGS) -c $< -o $@
   32.93 - 
   32.94 - install:
   32.95 --	@$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules_install
   32.96 --	test -d /var/tpm || mkdir /var/tpm
   32.97 --	test -c /dev/tpm || mknod /dev/tpm c 10 224
   32.98 --	chmod 666 /dev/tpm
   32.99 --	depmod -a
  32.100 -+	$(INSTALL_PROG) $(BIN) $(TOOLS_INSTALL_DIR)
  32.101 - 
  32.102 - clean:
  32.103 --	@$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) clean
  32.104 --	rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a
  32.105 -+	rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a $(OBJS)
  32.106 - 
  32.107 --dist:	$(DISTSRC)
  32.108 --	rm -rf $(DISTDIR)
  32.109 --	mkdir $(DISTDIR)
  32.110 --	cp --parents $(DISTSRC) $(DISTDIR)/
  32.111 --	rm -f $(DISTDIR)/crypto/gmp.h 
  32.112 --	tar -chzf $(DISTDIR).tar.gz $(DISTDIR)
  32.113 --	rm -rf $(DISTDIR)
  32.114 -+mrproper: clean
  32.115 -+	rm -f $(BIN)
  32.116 - 
  32.117 - $(src)/crypto/libgmp.a:
  32.118 - 	test -f $(src)/crypto/libgmp.a || ln -s $(GMP_LIB) $(src)/crypto/libgmp.a
  32.119 -diff -uprN orig/tpm_emulator-0.2/README vtpm/README
  32.120 ---- orig/tpm_emulator-0.2/README	2005-08-17 10:58:36.000000000 -0700
  32.121 -+++ vtpm/README	2005-08-17 10:55:52.000000000 -0700
  32.122 -@@ -13,7 +13,8 @@ $Id: README 8 2005-01-25 21:11:45Z jmoli
  32.123 - Copyright
  32.124 - --------------------------------------------------------------------------
  32.125 - Copyright (C) 2004 Mario Strasser <mast@gmx.net> and Swiss Federal 
  32.126 --Institute of Technology (ETH) Zurich.
  32.127 -+                   Institute of Technology (ETH) Zurich.
  32.128 -+Copyright (C) 2005 INTEL Corp 
  32.129 -               
  32.130 - This program is free software; you can redistribute it and/or modify
  32.131 - it under the terms of the GNU General Public License as published by
  32.132 -diff -uprN orig/tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c vtpm/crypto/gmp_kernel_wrapper.c
  32.133 ---- orig/tpm_emulator-0.2/crypto/gmp_kernel_wrapper.c	2005-08-17 10:58:36.000000000 -0700
  32.134 -+++ vtpm/crypto/gmp_kernel_wrapper.c	2005-08-17 10:55:52.000000000 -0700
  32.135 +diff -uprN orig/tpm_emulator-0.2-x86_64/crypto/gmp_kernel_wrapper.c vtpm/crypto/gmp_kernel_wrapper.c
  32.136 +--- orig/tpm_emulator-0.2-x86_64/crypto/gmp_kernel_wrapper.c	2005-09-15 19:21:42.508873032 -0700
  32.137 ++++ vtpm/crypto/gmp_kernel_wrapper.c	2005-09-15 19:25:37.319176440 -0700
  32.138  @@ -1,5 +1,6 @@
  32.139   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.140    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.141 @@ -154,9 +48,9 @@ diff -uprN orig/tpm_emulator-0.2/crypto/
  32.142   {
  32.143  -  void *ret  = (void*)kmalloc(size, GFP_KERNEL);
  32.144  -  if (!ret) panic(KERN_CRIT TPM_MODULE_NAME 
  32.145 --    "GMP: cannot allocate memory (size=%u)\n", size);
  32.146 +-    "GMP: cannot allocate memory (size=%Zu)\n", size);
  32.147  +  void *ret  = (void*)malloc(size);
  32.148 -+  if (!ret) error("GMP: cannot allocate memory (size=%u)\n", size);
  32.149 ++  if (!ret) error("GMP: cannot allocate memory (size=%Zu)\n", size);
  32.150     return ret;
  32.151   }
  32.152   
  32.153 @@ -165,9 +59,10 @@ diff -uprN orig/tpm_emulator-0.2/crypto/
  32.154   {
  32.155  -  void *ret = (void*)kmalloc(new_size, GFP_KERNEL);
  32.156  -  if (!ret) panic(KERN_CRIT TPM_MODULE_NAME "GMP: Cannot reallocate memory "
  32.157 +-    "(old_size=%Zu new_size=%Zu)\n", old_size, new_size);
  32.158  +  void *ret = (void*)malloc(new_size);
  32.159  +  if (!ret) error("GMP: Cannot reallocate memory "
  32.160 -     "(old_size=%u new_size=%u)\n", old_size, new_size);
  32.161 ++    "(old_size=%Zu new_size=%Zu)\n", old_size, new_size);
  32.162     memcpy(ret, oldptr, old_size);
  32.163  -  kfree(oldptr);
  32.164  +  free(oldptr);
  32.165 @@ -183,9 +78,9 @@ diff -uprN orig/tpm_emulator-0.2/crypto/
  32.166     }
  32.167   }
  32.168   
  32.169 -diff -uprN orig/tpm_emulator-0.2/crypto/rsa.c vtpm/crypto/rsa.c
  32.170 ---- orig/tpm_emulator-0.2/crypto/rsa.c	2005-08-17 10:58:36.000000000 -0700
  32.171 -+++ vtpm/crypto/rsa.c	2005-08-17 10:55:52.000000000 -0700
  32.172 +diff -uprN orig/tpm_emulator-0.2-x86_64/crypto/rsa.c vtpm/crypto/rsa.c
  32.173 +--- orig/tpm_emulator-0.2-x86_64/crypto/rsa.c	2005-08-15 00:58:57.000000000 -0700
  32.174 ++++ vtpm/crypto/rsa.c	2005-09-14 20:27:22.000000000 -0700
  32.175  @@ -1,5 +1,6 @@
  32.176   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.177    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.178 @@ -211,8 +106,8 @@ diff -uprN orig/tpm_emulator-0.2/crypto/
  32.179         sha1_final(&ctx, &msg[1]);
  32.180         if (memcmp(&msg[1], &msg[1 + SHA1_DIGEST_LENGTH], 
  32.181             SHA1_DIGEST_LENGTH) != 0) return -1;
  32.182 -diff -uprN orig/tpm_emulator-0.2/linux_module.c vtpm/linux_module.c
  32.183 ---- orig/tpm_emulator-0.2/linux_module.c	2005-08-17 10:58:36.000000000 -0700
  32.184 +diff -uprN orig/tpm_emulator-0.2-x86_64/linux_module.c vtpm/linux_module.c
  32.185 +--- orig/tpm_emulator-0.2-x86_64/linux_module.c	2005-09-15 19:22:40.343080896 -0700
  32.186  +++ vtpm/linux_module.c	1969-12-31 16:00:00.000000000 -0800
  32.187  @@ -1,163 +0,0 @@
  32.188  -/* Software-Based Trusted Platform Module (TPM) Emulator for Linux 
  32.189 @@ -283,7 +178,7 @@ diff -uprN orig/tpm_emulator-0.2/linux_m
  32.190  -
  32.191  -static ssize_t tpm_read(struct file *file, char *buf, size_t count, loff_t *ppos)
  32.192  -{
  32.193 --  debug("%s(%d)", __FUNCTION__, count);
  32.194 +-  debug("%s(%Zu)", __FUNCTION__, count);
  32.195  -  down(&tpm_mutex);
  32.196  -  if (tpm_response.data != NULL) {
  32.197  -    count = min(count, (size_t)tpm_response.size - (size_t)*ppos);
  32.198 @@ -298,7 +193,7 @@ diff -uprN orig/tpm_emulator-0.2/linux_m
  32.199  -
  32.200  -static ssize_t tpm_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
  32.201  -{
  32.202 --  debug("%s(%d)", __FUNCTION__, count);
  32.203 +-  debug("%s(%Zu)", __FUNCTION__, count);
  32.204  -  down(&tpm_mutex);
  32.205  -  *ppos = 0;
  32.206  -  if (tpm_response.data != NULL) kfree(tpm_response.data);
  32.207 @@ -378,9 +273,9 @@ diff -uprN orig/tpm_emulator-0.2/linux_m
  32.208  -  return (ticks > 0) ? ticks : 1;
  32.209  -}
  32.210  -
  32.211 -diff -uprN orig/tpm_emulator-0.2/linux_module.h vtpm/linux_module.h
  32.212 ---- orig/tpm_emulator-0.2/linux_module.h	2005-08-17 10:58:36.000000000 -0700
  32.213 -+++ vtpm/linux_module.h	2005-08-17 10:55:52.000000000 -0700
  32.214 +diff -uprN orig/tpm_emulator-0.2-x86_64/linux_module.h vtpm/linux_module.h
  32.215 +--- orig/tpm_emulator-0.2-x86_64/linux_module.h	2005-09-15 19:21:14.844078720 -0700
  32.216 ++++ vtpm/linux_module.h	2005-09-14 20:27:22.000000000 -0700
  32.217  @@ -1,5 +1,6 @@
  32.218   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.219    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.220 @@ -416,17 +311,20 @@ diff -uprN orig/tpm_emulator-0.2/linux_m
  32.221   
  32.222  +/* module settings */
  32.223  +#define min(A,B) ((A)<(B)?(A):(B))
  32.224 + #ifndef STR
  32.225   #define STR(s) __STR__(s)
  32.226   #define __STR__(s) #s
  32.227 - #include "tpm_version.h"
  32.228 -@@ -39,32 +45,35 @@
  32.229 +@@ -39,34 +45,38 @@
  32.230 + #define TPM_MODULE_NAME 	"tpm_emulator"
  32.231 + 
  32.232   /* debug and log output functions */
  32.233 ++extern int dmi_id; 
  32.234   
  32.235   #ifdef DEBUG
  32.236  -#define debug(fmt, ...) printk(KERN_DEBUG "%s %s:%d: Debug: " fmt "\n", \
  32.237  -                        TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__)
  32.238 -+#define debug(fmt, ...) printf("%s:%d: Debug: " fmt "\n", \
  32.239 -+                        __FILE__, __LINE__, ## __VA_ARGS__)
  32.240 ++#define debug(fmt, ...) printf("TPMD[%d]: %s:%d: Debug: " fmt "\n", \
  32.241 ++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
  32.242   #else
  32.243   #define debug(fmt, ...) 
  32.244   #endif
  32.245 @@ -436,12 +334,12 @@ diff -uprN orig/tpm_emulator-0.2/linux_m
  32.246  -                        TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__)
  32.247  -#define alert(fmt, ...) printk(KERN_ALERT "%s %s:%d: Alert: " fmt "\n", \
  32.248  -                        TPM_MODULE_NAME, __FILE__, __LINE__, ## __VA_ARGS__)
  32.249 -+#define info(fmt, ...)  printf("%s:%d: Info: " fmt "\n", \
  32.250 -+                        __FILE__, __LINE__, ## __VA_ARGS__)
  32.251 -+#define error(fmt, ...) printf("%s:%d: Error: " fmt "\n", \
  32.252 -+                        __FILE__, __LINE__, ## __VA_ARGS__)
  32.253 -+#define alert(fmt, ...) printf("%s:%d: Alert: " fmt "\n", \
  32.254 -+                        __FILE__, __LINE__, ## __VA_ARGS__)
  32.255 ++#define info(fmt, ...)  printf("TPMD[%d]: %s:%d: Info: " fmt "\n", \
  32.256 ++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
  32.257 ++#define error(fmt, ...) printf("TPMD[%d]: %s:%d: Error: " fmt "\n", \
  32.258 ++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
  32.259 ++#define alert(fmt, ...) printf("TPMD[%d]: %s:%d: Alert: " fmt "\n", \
  32.260 ++                        dmi_id, __FILE__, __LINE__, ## __VA_ARGS__)
  32.261   
  32.262   /* memory allocation */
  32.263   
  32.264 @@ -465,7 +363,7 @@ diff -uprN orig/tpm_emulator-0.2/linux_m
  32.265   static inline void tpm_get_random_bytes(void *buf, int nbytes)
  32.266   {
  32.267     get_random_bytes(buf, nbytes);
  32.268 -@@ -84,9 +93,9 @@ uint64_t tpm_get_ticks(void);
  32.269 +@@ -86,9 +96,9 @@ uint64_t tpm_get_ticks(void);
  32.270   #define CPU_TO_LE16(x) __cpu_to_le16(x)
  32.271   
  32.272   #define BE64_TO_CPU(x) __be64_to_cpu(x)
  32.273 @@ -477,9 +375,116 @@ diff -uprN orig/tpm_emulator-0.2/linux_m
  32.274   #define BE16_TO_CPU(x) __be16_to_cpu(x)
  32.275   #define LE16_TO_CPU(x) __le16_to_cpu(x)
  32.276   
  32.277 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_audit.c vtpm/tpm/tpm_audit.c
  32.278 ---- orig/tpm_emulator-0.2/tpm/tpm_audit.c	2005-08-17 10:58:36.000000000 -0700
  32.279 -+++ vtpm/tpm/tpm_audit.c	2005-08-17 10:55:52.000000000 -0700
  32.280 +diff -uprN orig/tpm_emulator-0.2-x86_64/Makefile vtpm/Makefile
  32.281 +--- orig/tpm_emulator-0.2-x86_64/Makefile	2005-09-15 19:21:14.845078568 -0700
  32.282 ++++ vtpm/Makefile	2005-09-14 20:27:22.000000000 -0700
  32.283 +@@ -1,22 +1,31 @@
  32.284 + # Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.285 + # Copyright (C) 2004 Mario Strasser <mast@gmx.net>
  32.286 ++# Copyright (C) 2005 INTEL Corp.
  32.287 + #
  32.288 + # $Id: Makefile 10 2005-04-26 20:59:50Z mast $
  32.289 + 
  32.290 +-# kernel settings
  32.291 +-KERNEL_RELEASE := $(shell uname -r)
  32.292 +-KERNEL_BUILD   := /lib/modules/$(KERNEL_RELEASE)/build
  32.293 +-MOD_SUBDIR     := misc
  32.294 + COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/)
  32.295 + 
  32.296 + # module settings
  32.297 +-MODULE_NAME    := tpm_emulator
  32.298 ++BIN            := vtpmd
  32.299 + VERSION_MAJOR  := 0
  32.300 + VERSION_MINOR  := 2
  32.301 + VERSION_BUILD  := $(shell date +"%s")
  32.302 + 
  32.303 +-# enable/disable DEBUG messages
  32.304 +-EXTRA_CFLAGS   += -DDEBUG -g  
  32.305 ++# Installation program and options
  32.306 ++INSTALL         = install
  32.307 ++INSTALL_PROG    = $(INSTALL) -m0755
  32.308 ++INSTALL_DIR     = $(INSTALL) -d -m0755
  32.309 ++
  32.310 ++# Xen tools installation directory
  32.311 ++TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
  32.312 ++
  32.313 ++CC      := gcc
  32.314 ++CFLAGS  += -g -Wall $(INCLUDE) -DDEBUG
  32.315 ++CFLAGS  += -I. -Itpm
  32.316 ++
  32.317 ++# Is the simulator running in it's own vm?
  32.318 ++#CFLAGS += -DVTPM_MULTI_VM
  32.319 + 
  32.320 + ifeq ($(COMPILE_ARCH),x86_64)
  32.321 + LIBDIR = lib64
  32.322 +@@ -34,38 +43,31 @@ DIRS           := . crypto tpm 
  32.323 + SRCS           := $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.c))
  32.324 + OBJS           := $(patsubst %.c, %.o, $(SRCS))
  32.325 + SRCS           += $(foreach dir, $(DIRS), $(wildcard $(src)/$(dir)/*.h))
  32.326 +-DISTSRC        := ./README ./AUTHORS ./ChangeLog ./Makefile $(SRCS)
  32.327 +-DISTDIR        := tpm_emulator-$(VERSION_MAJOR).$(VERSION_MINOR)
  32.328 + 
  32.329 +-obj-m               := $(MODULE_NAME).o
  32.330 +-$(MODULE_NAME)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
  32.331 ++obj-m               := $(BIN)
  32.332 ++$(BIN)-objs := $(patsubst $(src)/%.o, %.o, $(OBJS)) crypto/libgmp.a
  32.333 + 
  32.334 + EXTRA_CFLAGS   += -I$(src) -I$(src)/crypto -I$(src)/tpm 
  32.335 + 
  32.336 + # do not print "Entering directory ..."
  32.337 + MAKEFLAGS      += --no-print-directory
  32.338 + 
  32.339 +-all:	$(src)/crypto/gmp.h $(src)/crypto/libgmp.a version
  32.340 +-	@$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules
  32.341 ++all: $(BIN)
  32.342 ++
  32.343 ++$(BIN):	$(src)/crypto/gmp.h $(src)/crypto/libgmp.a version $(SRCS) $(OBJS)
  32.344 ++	$(CC) $(CFLAGS) $(OBJS) $(src)/crypto/libgmp.a -o $(BIN)
  32.345 ++
  32.346 ++%.o: %.c
  32.347 ++	$(CC) $(CFLAGS) -c $< -o $@
  32.348 + 
  32.349 + install:
  32.350 +-	@$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) modules_install
  32.351 +-	test -d /var/tpm || mkdir /var/tpm
  32.352 +-	test -c /dev/tpm || mknod /dev/tpm c 10 224
  32.353 +-	chmod 666 /dev/tpm
  32.354 +-	depmod -a
  32.355 ++	$(INSTALL_PROG) $(BIN) $(TOOLS_INSTALL_DIR)
  32.356 + 
  32.357 + clean:
  32.358 +-	@$(MAKE) -C $(KERNEL_BUILD) M=$(CURDIR) clean
  32.359 +-	rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a
  32.360 ++	rm -f $(src)/crypto/gmp.h $(src)/crypto/libgmp.a $(OBJS)
  32.361 + 
  32.362 +-dist:	$(DISTSRC)
  32.363 +-	rm -rf $(DISTDIR)
  32.364 +-	mkdir $(DISTDIR)
  32.365 +-	cp --parents $(DISTSRC) $(DISTDIR)/
  32.366 +-	rm -f $(DISTDIR)/crypto/gmp.h 
  32.367 +-	tar -chzf $(DISTDIR).tar.gz $(DISTDIR)
  32.368 +-	rm -rf $(DISTDIR)
  32.369 ++mrproper: clean
  32.370 ++	rm -f $(BIN) tpm_version.h
  32.371 + 
  32.372 + $(src)/crypto/libgmp.a:
  32.373 + 	test -f $(src)/crypto/libgmp.a || ln -s $(GMP_LIB) $(src)/crypto/libgmp.a
  32.374 +diff -uprN orig/tpm_emulator-0.2-x86_64/README vtpm/README
  32.375 +--- orig/tpm_emulator-0.2-x86_64/README	2005-08-15 00:58:57.000000000 -0700
  32.376 ++++ vtpm/README	2005-09-14 20:27:22.000000000 -0700
  32.377 +@@ -13,7 +13,8 @@ $Id: README 8 2005-01-25 21:11:45Z jmoli
  32.378 + Copyright
  32.379 + --------------------------------------------------------------------------
  32.380 + Copyright (C) 2004 Mario Strasser <mast@gmx.net> and Swiss Federal 
  32.381 +-Institute of Technology (ETH) Zurich.
  32.382 ++                   Institute of Technology (ETH) Zurich.
  32.383 ++Copyright (C) 2005 INTEL Corp 
  32.384 +               
  32.385 + This program is free software; you can redistribute it and/or modify
  32.386 + it under the terms of the GNU General Public License as published by
  32.387 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_audit.c vtpm/tpm/tpm_audit.c
  32.388 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_audit.c	2005-08-15 00:58:57.000000000 -0700
  32.389 ++++ vtpm/tpm/tpm_audit.c	2005-09-14 20:27:22.000000000 -0700
  32.390  @@ -1,6 +1,7 @@
  32.391   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.392    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.393 @@ -542,9 +547,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.394     return TPM_SUCCESS;
  32.395   }
  32.396  -
  32.397 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_authorization.c vtpm/tpm/tpm_authorization.c
  32.398 ---- orig/tpm_emulator-0.2/tpm/tpm_authorization.c	2005-08-17 10:58:36.000000000 -0700
  32.399 -+++ vtpm/tpm/tpm_authorization.c	2005-08-17 10:55:52.000000000 -0700
  32.400 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_authorization.c vtpm/tpm/tpm_authorization.c
  32.401 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_authorization.c	2005-08-15 00:58:57.000000000 -0700
  32.402 ++++ vtpm/tpm/tpm_authorization.c	2005-09-14 20:27:22.000000000 -0700
  32.403  @@ -1,6 +1,7 @@
  32.404   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.405    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.406 @@ -568,9 +573,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.407   }
  32.408  -
  32.409  -
  32.410 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_capability.c vtpm/tpm/tpm_capability.c
  32.411 ---- orig/tpm_emulator-0.2/tpm/tpm_capability.c	2005-08-17 10:58:36.000000000 -0700
  32.412 -+++ vtpm/tpm/tpm_capability.c	2005-08-17 10:55:52.000000000 -0700
  32.413 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_capability.c vtpm/tpm/tpm_capability.c
  32.414 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_capability.c	2005-08-15 00:58:57.000000000 -0700
  32.415 ++++ vtpm/tpm/tpm_capability.c	2005-09-14 20:27:22.000000000 -0700
  32.416  @@ -1,6 +1,7 @@
  32.417   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.418    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.419 @@ -593,9 +598,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.420     }
  32.421   }
  32.422  -
  32.423 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_cmd_handler.c vtpm/tpm/tpm_cmd_handler.c
  32.424 ---- orig/tpm_emulator-0.2/tpm/tpm_cmd_handler.c	2005-08-17 10:58:36.000000000 -0700
  32.425 -+++ vtpm/tpm/tpm_cmd_handler.c	2005-08-17 10:55:52.000000000 -0700
  32.426 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_cmd_handler.c vtpm/tpm/tpm_cmd_handler.c
  32.427 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_cmd_handler.c	2005-08-15 00:58:57.000000000 -0700
  32.428 ++++ vtpm/tpm/tpm_cmd_handler.c	2005-09-14 20:27:22.000000000 -0700
  32.429  @@ -1,6 +1,7 @@
  32.430   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.431    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.432 @@ -658,9 +663,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.433     return 0;
  32.434   }
  32.435  -
  32.436 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_crypto.c vtpm/tpm/tpm_crypto.c
  32.437 ---- orig/tpm_emulator-0.2/tpm/tpm_crypto.c	2005-08-17 10:58:36.000000000 -0700
  32.438 -+++ vtpm/tpm/tpm_crypto.c	2005-08-17 10:55:52.000000000 -0700
  32.439 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_crypto.c vtpm/tpm/tpm_crypto.c
  32.440 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_crypto.c	2005-09-15 19:21:14.846078416 -0700
  32.441 ++++ vtpm/tpm/tpm_crypto.c	2005-09-14 20:27:22.000000000 -0700
  32.442  @@ -1,6 +1,7 @@
  32.443   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.444    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.445 @@ -678,14 +683,14 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.446       memcpy(&buf[30], areaToSign, areaToSignSize);
  32.447       if (rsa_sign(&key->key, RSA_SSA_PKCS1_SHA1, 
  32.448           buf, areaToSignSize + 30, *sig)) {
  32.449 -@@ -379,4 +380,3 @@ TPM_RESULT TPM_CertifyKey2(TPM_KEY_HANDL
  32.450 +@@ -383,4 +384,3 @@ TPM_RESULT TPM_CertifyKey2(TPM_KEY_HANDL
  32.451     }  
  32.452     return TPM_SUCCESS;
  32.453   }
  32.454  -
  32.455 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_data.c vtpm/tpm/tpm_data.c
  32.456 ---- orig/tpm_emulator-0.2/tpm/tpm_data.c	2005-08-17 10:58:36.000000000 -0700
  32.457 -+++ vtpm/tpm/tpm_data.c	2005-08-17 10:55:52.000000000 -0700
  32.458 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c vtpm/tpm/tpm_data.c
  32.459 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_data.c	2005-09-15 19:21:14.847078264 -0700
  32.460 ++++ vtpm/tpm/tpm_data.c	2005-09-14 20:27:22.000000000 -0700
  32.461  @@ -1,6 +1,7 @@
  32.462   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.463    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.464 @@ -1005,7 +1010,7 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.465   }
  32.466   
  32.467   #else
  32.468 -@@ -231,7 +431,6 @@ int tpm_restore_permanent_data(void)
  32.469 +@@ -232,7 +432,6 @@ int tpm_restore_permanent_data(void)
  32.470   
  32.471   int tpm_erase_permanent_data(void)
  32.472   {
  32.473 @@ -1014,9 +1019,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.474     return res;
  32.475   }
  32.476  -
  32.477 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_deprecated.c vtpm/tpm/tpm_deprecated.c
  32.478 ---- orig/tpm_emulator-0.2/tpm/tpm_deprecated.c	2005-08-17 10:58:36.000000000 -0700
  32.479 -+++ vtpm/tpm/tpm_deprecated.c	2005-08-17 10:55:52.000000000 -0700
  32.480 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_deprecated.c vtpm/tpm/tpm_deprecated.c
  32.481 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_deprecated.c	2005-08-15 00:58:57.000000000 -0700
  32.482 ++++ vtpm/tpm/tpm_deprecated.c	2005-09-14 20:27:22.000000000 -0700
  32.483  @@ -1,6 +1,7 @@
  32.484   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.485    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.486 @@ -1043,9 +1048,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.487                           authContextSize, &contextBlob);
  32.488     if (res != TPM_SUCCESS) return res;
  32.489     len = *authContextSize;
  32.490 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_emulator.h vtpm/tpm/tpm_emulator.h
  32.491 ---- orig/tpm_emulator-0.2/tpm/tpm_emulator.h	2005-08-17 10:58:36.000000000 -0700
  32.492 -+++ vtpm/tpm/tpm_emulator.h	2005-08-17 10:55:52.000000000 -0700
  32.493 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_emulator.h vtpm/tpm/tpm_emulator.h
  32.494 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_emulator.h	2005-08-15 00:58:57.000000000 -0700
  32.495 ++++ vtpm/tpm/tpm_emulator.h	2005-09-14 20:27:22.000000000 -0700
  32.496  @@ -1,5 +1,6 @@
  32.497   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.498    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.499 @@ -1063,9 +1068,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.500   
  32.501   /**
  32.502    * tpm_emulator_init - initialises and starts the TPM emulator
  32.503 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_integrity.c vtpm/tpm/tpm_integrity.c
  32.504 ---- orig/tpm_emulator-0.2/tpm/tpm_integrity.c	2005-08-17 10:58:36.000000000 -0700
  32.505 -+++ vtpm/tpm/tpm_integrity.c	2005-08-17 10:55:52.000000000 -0700
  32.506 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_integrity.c vtpm/tpm/tpm_integrity.c
  32.507 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_integrity.c	2005-08-15 00:58:57.000000000 -0700
  32.508 ++++ vtpm/tpm/tpm_integrity.c	2005-09-14 20:27:22.000000000 -0700
  32.509  @@ -1,6 +1,7 @@
  32.510   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.511    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.512 @@ -1079,9 +1084,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.513     return TPM_SUCCESS;
  32.514   }
  32.515  -
  32.516 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_structures.h vtpm/tpm/tpm_structures.h
  32.517 ---- orig/tpm_emulator-0.2/tpm/tpm_structures.h	2005-08-17 10:58:36.000000000 -0700
  32.518 -+++ vtpm/tpm/tpm_structures.h	2005-08-17 10:55:52.000000000 -0700
  32.519 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_structures.h vtpm/tpm/tpm_structures.h
  32.520 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_structures.h	2005-08-15 00:58:57.000000000 -0700
  32.521 ++++ vtpm/tpm/tpm_structures.h	2005-09-14 20:27:22.000000000 -0700
  32.522  @@ -1,6 +1,7 @@
  32.523   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.524    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.525 @@ -1099,9 +1104,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.526   #include "crypto/rsa.h"
  32.527   
  32.528   /*
  32.529 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_testing.c vtpm/tpm/tpm_testing.c
  32.530 ---- orig/tpm_emulator-0.2/tpm/tpm_testing.c	2005-08-17 10:58:36.000000000 -0700
  32.531 -+++ vtpm/tpm/tpm_testing.c	2005-08-17 10:55:52.000000000 -0700
  32.532 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_testing.c vtpm/tpm/tpm_testing.c
  32.533 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_testing.c	2005-08-15 00:58:57.000000000 -0700
  32.534 ++++ vtpm/tpm/tpm_testing.c	2005-09-14 20:27:22.000000000 -0700
  32.535  @@ -1,6 +1,7 @@
  32.536   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.537    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.538 @@ -1217,9 +1222,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.539     rsa_private_key_t priv_key;
  32.540     rsa_public_key_t pub_key;
  32.541   
  32.542 -diff -uprN orig/tpm_emulator-0.2/tpm/tpm_ticks.c vtpm/tpm/tpm_ticks.c
  32.543 ---- orig/tpm_emulator-0.2/tpm/tpm_ticks.c	2005-08-17 10:58:36.000000000 -0700
  32.544 -+++ vtpm/tpm/tpm_ticks.c	2005-08-17 10:55:52.000000000 -0700
  32.545 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/tpm_ticks.c vtpm/tpm/tpm_ticks.c
  32.546 +--- orig/tpm_emulator-0.2-x86_64/tpm/tpm_ticks.c	2005-08-15 00:58:57.000000000 -0700
  32.547 ++++ vtpm/tpm/tpm_ticks.c	2005-09-14 20:27:22.000000000 -0700
  32.548  @@ -1,6 +1,7 @@
  32.549   /* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.550    * Copyright (C) 2004 Mario Strasser <mast@gmx.net>,
  32.551 @@ -1302,9 +1307,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/tpm
  32.552   }
  32.553     
  32.554   
  32.555 -diff -uprN orig/tpm_emulator-0.2/tpm/vtpm_manager.h vtpm/tpm/vtpm_manager.h
  32.556 ---- orig/tpm_emulator-0.2/tpm/vtpm_manager.h	1969-12-31 16:00:00.000000000 -0800
  32.557 -+++ vtpm/tpm/vtpm_manager.h	2005-08-17 10:55:52.000000000 -0700
  32.558 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpm/vtpm_manager.h vtpm/tpm/vtpm_manager.h
  32.559 +--- orig/tpm_emulator-0.2-x86_64/tpm/vtpm_manager.h	1969-12-31 16:00:00.000000000 -0800
  32.560 ++++ vtpm/tpm/vtpm_manager.h	2005-09-14 20:27:22.000000000 -0700
  32.561  @@ -0,0 +1,126 @@
  32.562  +// ===================================================================
  32.563  +// 
  32.564 @@ -1432,9 +1437,9 @@ diff -uprN orig/tpm_emulator-0.2/tpm/vtp
  32.565  +*********************************************************************/
  32.566  +
  32.567  +#endif //_VTPM_MANAGER_H_
  32.568 -diff -uprN orig/tpm_emulator-0.2/tpmd.c vtpm/tpmd.c
  32.569 ---- orig/tpm_emulator-0.2/tpmd.c	1969-12-31 16:00:00.000000000 -0800
  32.570 -+++ vtpm/tpmd.c	2005-08-17 10:55:52.000000000 -0700
  32.571 +diff -uprN orig/tpm_emulator-0.2-x86_64/tpmd.c vtpm/tpmd.c
  32.572 +--- orig/tpm_emulator-0.2-x86_64/tpmd.c	1969-12-31 16:00:00.000000000 -0800
  32.573 ++++ vtpm/tpmd.c	2005-09-15 19:28:55.783005352 -0700
  32.574  @@ -0,0 +1,207 @@
  32.575  +/* Software-Based Trusted Platform Module (TPM) Emulator for Linux
  32.576  + * Copyright (C) 2005 INTEL Corp
  32.577 @@ -1468,9 +1473,9 @@ diff -uprN orig/tpm_emulator-0.2/tpmd.c 
  32.578  +#else
  32.579  + #define GUEST_RX_FIFO_D "/var/vtpm/fifos/guest-to-%d.fifo"
  32.580  + #define GUEST_TX_FIFO "/var/vtpm/fifos/guest-from-all.fifo"
  32.581 ++#endif
  32.582  +
  32.583  + int dmi_id;
  32.584 -+#endif
  32.585  +						
  32.586  +#define BUFFER_SIZE 2048
  32.587  +
  32.588 @@ -1506,7 +1511,7 @@ diff -uprN orig/tpm_emulator-0.2/tpmd.c 
  32.589  +{
  32.590  +  uint8_t in[BUFFER_SIZE], *out, *addressed_out;
  32.591  +  uint32_t out_size;
  32.592 -+  int in_size, written ;
  32.593 ++  int in_size, written;
  32.594  +  int i, guest_id=-1;
  32.595  + 
  32.596  +  int vtpm_tx_fh=-1, vtpm_rx_fh=-1;
  32.597 @@ -1602,7 +1607,7 @@ diff -uprN orig/tpm_emulator-0.2/tpmd.c 
  32.598  +      written = write(vtpm_tx_fh, ctrl_msg, sizeof(ctrl_msg));
  32.599  +
  32.600  +      if (written != sizeof(ctrl_msg)) {
  32.601 -+        printf("ERROR: Part of response not written %d/%d.\n", written, sizeof(ctrl_msg));
  32.602 ++        printf("ERROR: Part of response not written %d/%Zu.\n", written, sizeof(ctrl_msg));
  32.603  +      } else {
  32.604  +        printf("Send Ctrl Message confermation\n");
  32.605  +      }
  32.606 @@ -1623,7 +1628,7 @@ diff -uprN orig/tpm_emulator-0.2/tpmd.c 
  32.607  +          printf("%x ", addressed_out[i]);
  32.608  +        printf("\n");
  32.609  +      } else {
  32.610 -+        printf("Sent[%d]: ", out_size + sizeof(uint32_t));
  32.611 ++        printf("Sent[%Zu]: ", out_size + sizeof(uint32_t));
  32.612  +        for (i=0; i< out_size+ sizeof(uint32_t); i++)
  32.613  +          printf("%x ", addressed_out[i]);
  32.614  +        printf("\n");
    33.1 --- a/tools/vtpm_manager/README	Tue Sep 20 09:43:29 2005 +0000
    33.2 +++ b/tools/vtpm_manager/README	Tue Sep 20 09:43:46 2005 +0000
    33.3 @@ -51,14 +51,24 @@ VTPM_MULTI_VM                -> Defined:
    33.4  DUMMY_BACKEND                -> vtpm_manager listens on /tmp/in.fifo and 
    33.5                                  /tmp/out.fifo rather than backend
    33.6  
    33.7 -MANUAL_DM_LAUNCH             -> User must manually launch & kill VTPMs
    33.8 +MANUAL_DM_LAUNCH             -> Must manually launch & kill VTPMs
    33.9  
   33.10 -USE_FIXED_SRK_AUTH           -> Do not randomly generate a random SRK & Owner auth
   33.11 +WELL_KNOWN_SRK_AUTH          -> Rather than randomly generating the password for the SRK,
   33.12 +                                use a well known value. This is necessary for sharing use
   33.13 +                                of the SRK across applications. Such as VTPM and Dom0
   33.14 +                                measurement software.
   33.15 +
   33.16 +WELL_KNOWN_OWNER_AUTH        -> Rather than randomly generating the password for the owner,
   33.17 +                                use a well known value. This is useful for debugging and for
   33.18 +                                poor bios which do not support clearing TPM if OwnerAuth is
   33.19 +                                lost. However this has no protection from malicious app
   33.20 +                                issuing a TPM_OwnerClear to wipe the TPM 
   33.21  
   33.22  Requirements
   33.23  ============
   33.24  - xen-unstable 
   33.25 -- IBM frontend/backend vtpm driver patch
   33.26 +- vtpm frontend/backend driver patch
   33.27 +- OpenSSL Library
   33.28  
   33.29  Single-VM Flow
   33.30  ============================
    34.1 --- a/tools/vtpm_manager/Rules.mk	Tue Sep 20 09:43:29 2005 +0000
    34.2 +++ b/tools/vtpm_manager/Rules.mk	Tue Sep 20 09:43:46 2005 +0000
    34.3 @@ -57,7 +57,8 @@ CFLAGS += -DLOGGING_MODULES="(BITMASK(VT
    34.4  #CFLAGS += -DMANUAL_DM_LAUNCH
    34.5  
    34.6  # Fixed SRK
    34.7 -CFLAGS += -DUSE_FIXED_SRK_AUTH
    34.8 +CFLAGS += -DWELL_KNOWN_SRK_AUTH
    34.9 +#CFLAGS += -DWELL_KNOWN_OWNER_AUTH
   34.10  
   34.11  # TPM Hardware Device or TPM Simulator
   34.12  #CFLAGS += -DTPM_HWDEV
    35.1 --- a/tools/vtpm_manager/crypto/Makefile	Tue Sep 20 09:43:29 2005 +0000
    35.2 +++ b/tools/vtpm_manager/crypto/Makefile	Tue Sep 20 09:43:46 2005 +0000
    35.3 @@ -13,6 +13,7 @@ clean:
    35.4  	rm -f *.a *.so *.o *.rpm $(DEP_FILES)
    35.5  
    35.6  mrproper: clean
    35.7 +	rm -f *~
    35.8  
    35.9  $(BIN): $(OBJS)
   35.10  	$(AR) rcs $(BIN) $(OBJS)
    36.1 --- a/tools/vtpm_manager/manager/Makefile	Tue Sep 20 09:43:29 2005 +0000
    36.2 +++ b/tools/vtpm_manager/manager/Makefile	Tue Sep 20 09:43:46 2005 +0000
    36.3 @@ -17,7 +17,7 @@ clean:
    36.4  	rm -f *.a *.so *.o *.rpm $(DEP_FILES)
    36.5  
    36.6  mrproper: clean
    36.7 -	rm -f $(BIN)
    36.8 +	rm -f $(BIN) *~
    36.9  
   36.10  $(BIN): $(OBJS)
   36.11  	$(CC) $(LDFLAGS) $^ $(LIBS) -o $@
    37.1 --- a/tools/vtpm_manager/manager/dmictl.c	Tue Sep 20 09:43:29 2005 +0000
    37.2 +++ b/tools/vtpm_manager/manager/dmictl.c	Tue Sep 20 09:43:46 2005 +0000
    37.3 @@ -1,339 +1,344 @@
    37.4 -// ===================================================================
    37.5 -// 
    37.6 -// Copyright (c) 2005, Intel Corp.
    37.7 -// All rights reserved.
    37.8 -//
    37.9 -// Redistribution and use in source and binary forms, with or without 
   37.10 -// modification, are permitted provided that the following conditions 
   37.11 -// are met:
   37.12 -//
   37.13 -//   * Redistributions of source code must retain the above copyright 
   37.14 -//     notice, this list of conditions and the following disclaimer.
   37.15 -//   * Redistributions in binary form must reproduce the above 
   37.16 -//     copyright notice, this list of conditions and the following 
   37.17 -//     disclaimer in the documentation and/or other materials provided 
   37.18 -//     with the distribution.
   37.19 -//   * Neither the name of Intel Corporation nor the names of its 
   37.20 -//     contributors may be used to endorse or promote products derived
   37.21 -//     from this software without specific prior written permission.
   37.22 -//
   37.23 -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
   37.24 -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
   37.25 -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
   37.26 -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
   37.27 -// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   37.28 -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   37.29 -// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
   37.30 -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   37.31 -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
   37.32 -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   37.33 -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
   37.34 -// OF THE POSSIBILITY OF SUCH DAMAGE.
   37.35 -// ===================================================================
   37.36 -// 
   37.37 -//   dmictl.c
   37.38 -// 
   37.39 -//     Functions for creating and destroying DMIs
   37.40 -//
   37.41 -// ==================================================================
   37.42 -
   37.43 -#include <stdio.h>
   37.44 -#include <unistd.h>
   37.45 -#include <string.h>
   37.46 -
   37.47 -#ifndef VTPM_MUTLI_VM
   37.48 - #include <sys/types.h>
   37.49 - #include <sys/stat.h>
   37.50 - #include <fcntl.h>
   37.51 - #include <signal.h>
   37.52 - #include <wait.h>
   37.53 -#endif
   37.54 -
   37.55 -#include "vtpmpriv.h"
   37.56 -#include "bsg.h"
   37.57 -#include "buffer.h"
   37.58 -#include "log.h"
   37.59 -#include "hashtable.h"
   37.60 -#include "hashtable_itr.h"
   37.61 -
   37.62 -#define TPM_EMULATOR_PATH "/usr/bin/vtpmd"
   37.63 -
   37.64 -TPM_RESULT close_dmi( VTPM_DMI_RESOURCE *dmi_res) {
   37.65 -	TPM_RESULT status = TPM_FAIL;
   37.66 -	
   37.67 -	if (dmi_res == NULL) 
   37.68 -		return TPM_SUCCESS;
   37.69 -	
   37.70 -	status = TCS_CloseContext(dmi_res->TCSContext);
   37.71 -	free ( dmi_res->NVMLocation );
   37.72 -	dmi_res->connected = FALSE;
   37.73 -
   37.74 -#ifndef VTPM_MULTI_VM	
   37.75 -	free(dmi_res->guest_tx_fname);
   37.76 -	free(dmi_res->vtpm_tx_fname);
   37.77 -		
   37.78 -	close(dmi_res->guest_tx_fh); dmi_res->guest_tx_fh = -1;
   37.79 -	close(dmi_res->vtpm_tx_fh);  dmi_res->vtpm_tx_fh = -1; 
   37.80 -	
   37.81 -		
   37.82 - #ifndef MANUAL_DM_LAUNCH
   37.83 -  if (dmi_res->dmi_id != VTPM_CTL_DM) {
   37.84 -    if (dmi_res->dmi_pid != 0) {
   37.85 -      vtpmloginfo(VTPM_LOG_VTPM, "Killing dmi on pid %d.\n", dmi_res->dmi_pid);
   37.86 -      if ((kill(dmi_res->dmi_pid, SIGKILL) !=0) ||
   37.87 -         (waitpid(dmi_res->dmi_pid, NULL, 0) != dmi_res->dmi_pid)){
   37.88 -        vtpmlogerror(VTPM_LOG_VTPM, "Could not kill dmi on pid %d.\n", dmi_res->dmi_pid);
   37.89 -        status = TPM_FAIL;
   37.90 -      }
   37.91 -    } else 
   37.92 -      vtpmlogerror(VTPM_LOG_VTPM, "Could not kill dmi because it's pid was 0.\n");
   37.93 -  }
   37.94 - #endif
   37.95 -#endif
   37.96 -
   37.97 -	return status;
   37.98 -}
   37.99 -	
  37.100 -TPM_RESULT VTPM_Handle_New_DMI( const buffer_t *param_buf) {
  37.101 -  
  37.102 -  VTPM_DMI_RESOURCE *new_dmi=NULL;
  37.103 -  TPM_RESULT status=TPM_FAIL;
  37.104 -  BYTE type;
  37.105 -  UINT32 dmi_id, domain_id, *dmi_id_key; 
  37.106 -  int fh;
  37.107 -
  37.108 -#ifndef VTPM_MUTLI_VM
  37.109 -  char dmi_id_str[11]; // UINT32s are up to 10 digits + NULL
  37.110 -  struct stat file_info;
  37.111 -#endif
  37.112 -  
  37.113 -  if (param_buf == NULL) { // Assume creation of Dom 0 control
  37.114 -    type = 0;
  37.115 -    domain_id = VTPM_CTL_DM;
  37.116 -    dmi_id = VTPM_CTL_DM;
  37.117 -  } else if (buffer_len(param_buf) != sizeof(BYTE) + sizeof(UINT32) *2) {
  37.118 -    vtpmloginfo(VTPM_LOG_VTPM, "New DMI command wrong length: %d.\n", buffer_len(param_buf));
  37.119 -    status = TPM_BAD_PARAMETER;
  37.120 -    goto abort_egress;
  37.121 -  } else {
  37.122 -    BSG_UnpackList( param_buf->bytes, 3,
  37.123 -		    BSG_TYPE_BYTE, &type,
  37.124 -		    BSG_TYPE_UINT32, &domain_id,
  37.125 -		    BSG_TYPE_UINT32,  &dmi_id);
  37.126 -  }
  37.127 -  
  37.128 -  new_dmi = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, &dmi_id);
  37.129 -  if (new_dmi == NULL) { 
  37.130 -    vtpmloginfo(VTPM_LOG_VTPM, "Creating new DMI instance %d attached on domain %d.\n", dmi_id, domain_id);
  37.131 -    // Brand New DMI. Initialize the persistent pieces
  37.132 -    if ((new_dmi = (VTPM_DMI_RESOURCE *) malloc (sizeof(VTPM_DMI_RESOURCE))) == NULL) {
  37.133 -      status = TPM_RESOURCES;
  37.134 -      goto abort_egress;
  37.135 -    }
  37.136 -    memset(new_dmi, 0, sizeof(VTPM_DMI_RESOURCE));
  37.137 -    new_dmi->dmi_id = dmi_id;
  37.138 -    new_dmi->connected = FALSE;
  37.139 -    
  37.140 -    if ((dmi_id_key = (UINT32 *) malloc (sizeof(UINT32))) == NULL) {
  37.141 -      status = TPM_RESOURCES;
  37.142 -      goto abort_egress;
  37.143 -    }      
  37.144 -    *dmi_id_key = new_dmi->dmi_id;
  37.145 -    
  37.146 -    // install into map
  37.147 -    if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, new_dmi)){
  37.148 -      free(new_dmi);
  37.149 -      free(dmi_id_key);
  37.150 -      status = TPM_FAIL;
  37.151 -      goto egress;
  37.152 -    }
  37.153 -    
  37.154 -  } else 
  37.155 -    vtpmloginfo(VTPM_LOG_VTPM, "Re-attaching DMI instance %d on domain %d .\n", dmi_id, domain_id);
  37.156 -  
  37.157 -  if (new_dmi->connected) {
  37.158 -    vtpmlogerror(VTPM_LOG_VTPM, "Attempt to re-attach, currently attached instance %d. Ignoring\n", dmi_id);
  37.159 -    status = TPM_BAD_PARAMETER;
  37.160 -    goto egress;
  37.161 -  }
  37.162 -  
  37.163 -  // Initialize the Non-persistent pieces
  37.164 -  new_dmi->dmi_domain_id = domain_id;
  37.165 -  new_dmi->NVMLocation = NULL;
  37.166 -  
  37.167 -  new_dmi->TCSContext = 0;
  37.168 -  TPMTRYRETURN( TCS_OpenContext(&new_dmi->TCSContext) );
  37.169 -  
  37.170 -  new_dmi->NVMLocation = (char *) malloc(11 + strlen(DMI_NVM_FILE));
  37.171 -  sprintf(new_dmi->NVMLocation, DMI_NVM_FILE, (uint32_t) new_dmi->dmi_id);
  37.172 -  
  37.173 -  // Measure DMI
  37.174 -  // FIXME: This will measure DMI. Until then use a fixed DMI_Measurement value
  37.175 -  /*
  37.176 -  fh = open(TPM_EMULATOR_PATH, O_RDONLY);
  37.177 -  stat_ret = fstat(fh, &file_stat);
  37.178 -  if (stat_ret == 0) 
  37.179 -    dmi_size = file_stat.st_size;
  37.180 -  else {
  37.181 -	vtpmlogerror(VTPM_LOG_VTPM, "Could not open tpm_emulator!!\n");
  37.182 -    status = TPM_IOERROR;
  37.183 -    goto abort_egress;
  37.184 -  }
  37.185 -  dmi_buffer
  37.186 -  */
  37.187 -  memset(&new_dmi->DMI_measurement, 0xcc, sizeof(TPM_DIGEST));
  37.188 -  
  37.189 -#ifndef VTPM_MULTI_VM
  37.190 -  if (dmi_id != VTPM_CTL_DM) {
  37.191 -    // Create a pair of fifo pipes
  37.192 -		if( (new_dmi->guest_tx_fname = (char *) malloc(11 + strlen(GUEST_TX_FIFO))) == NULL){ 
  37.193 -			status = TPM_RESOURCES;
  37.194 -			goto abort_egress;
  37.195 -		}
  37.196 -		sprintf(new_dmi->guest_tx_fname, GUEST_TX_FIFO, (uint32_t) dmi_id);
  37.197 -    
  37.198 -		if ((new_dmi->vtpm_tx_fname = (char *) malloc(11 + strlen(VTPM_TX_FIFO))) == NULL) {
  37.199 -			status = TPM_RESOURCES;
  37.200 -			goto abort_egress;
  37.201 -		}
  37.202 -		sprintf(new_dmi->vtpm_tx_fname, VTPM_TX_FIFO, (uint32_t) dmi_id);
  37.203 -    
  37.204 -    new_dmi->guest_tx_fh = -1;
  37.205 -    new_dmi->vtpm_tx_fh= -1;
  37.206 -    
  37.207 -    if ( stat(new_dmi->guest_tx_fname, &file_info) == -1) {
  37.208 -      if ( mkfifo(new_dmi->guest_tx_fname, S_IWUSR | S_IRUSR ) ){
  37.209 -				status = TPM_FAIL;
  37.210 -				goto abort_egress;
  37.211 -      }
  37.212 -    }
  37.213 -            
  37.214 -    if ( (fh = open(new_dmi->vtpm_tx_fname, O_RDWR)) == -1) {
  37.215 -      if ( mkfifo(new_dmi->vtpm_tx_fname, S_IWUSR | S_IRUSR ) ) {
  37.216 -	status = TPM_FAIL;
  37.217 -	goto abort_egress;
  37.218 -      }
  37.219 -    }
  37.220 -                
  37.221 -    // Launch DMI
  37.222 -    sprintf(dmi_id_str, "%d", (int) dmi_id);
  37.223 -#ifdef MANUAL_DM_LAUNCH
  37.224 -    vtpmlogerror(VTPM_LOG_VTPM, "FAKING starting vtpm with dmi=%s\n", dmi_id_str);
  37.225 -    new_dmi->dmi_pid = 0;
  37.226 -#else
  37.227 -    pid_t pid = fork();
  37.228 -    
  37.229 -    if (pid == -1) {
  37.230 -			vtpmlogerror(VTPM_LOG_VTPM, "Could not fork to launch vtpm\n");
  37.231 -		  status = TPM_RESOURCES;
  37.232 -      goto abort_egress;
  37.233 -		} else if (pid == 0) {
  37.234 -		  if ( stat(new_dmi->NVMLocation, &file_info) == -1)
  37.235 -				execl (TPM_EMULATOR_PATH, "vtmpd", "clear", dmi_id_str, NULL);
  37.236 -			else 
  37.237 -				execl (TPM_EMULATOR_PATH, "vtpmd", "save", dmi_id_str, NULL);
  37.238 -			
  37.239 -			// Returning from these at all is an error.
  37.240 -			vtpmlogerror(VTPM_LOG_VTPM, "Could not exec to launch vtpm\n");
  37.241 -    } else {
  37.242 -      new_dmi->dmi_pid = pid;
  37.243 -      vtpmloginfo(VTPM_LOG_VTPM, "Launching DMI on PID = %d\n", pid);
  37.244 -    }
  37.245 -#endif // MANUAL_DM_LAUNCH
  37.246 -  }
  37.247 -#else // VTPM_MUTLI_VM
  37.248 -  // FIXME: Measure DMI through call to Measurement agent in platform.
  37.249 -#endif 
  37.250 -	
  37.251 -  vtpm_globals->DMI_table_dirty = TRUE;
  37.252 -  new_dmi->connected = TRUE;  
  37.253 -  status=TPM_SUCCESS;
  37.254 -  goto egress;
  37.255 -  
  37.256 - abort_egress:
  37.257 -	close_dmi( new_dmi );
  37.258 -	
  37.259 - egress:
  37.260 -  return status;
  37.261 -}
  37.262 -
  37.263 -TPM_RESULT VTPM_Handle_Close_DMI( const buffer_t *param_buf) {
  37.264 -  
  37.265 -  TPM_RESULT status=TPM_FAIL;
  37.266 -  VTPM_DMI_RESOURCE *dmi_res=NULL;
  37.267 -  UINT32 dmi_id;
  37.268 -  
  37.269 -  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
  37.270 -    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.");
  37.271 -    status = TPM_BAD_PARAMETER;
  37.272 -    goto abort_egress;
  37.273 -  }
  37.274 -  
  37.275 -  BSG_UnpackList( param_buf->bytes, 1,
  37.276 -		  BSG_TYPE_UINT32, &dmi_id);
  37.277 -  
  37.278 -  vtpmloginfo(VTPM_LOG_VTPM, "Closing DMI %d.\n", dmi_id);
  37.279 -  
  37.280 -  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, &dmi_id);
  37.281 -  if (dmi_res == NULL ) {
  37.282 -    vtpmlogerror(VTPM_LOG_VTPM, "Trying to close nonexistent DMI.\n");
  37.283 -    status = TPM_BAD_PARAMETER;
  37.284 -    goto abort_egress;
  37.285 -  }
  37.286 -	
  37.287 -	if (!dmi_res->connected) {
  37.288 -    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-connected DMI.\n");
  37.289 -    status = TPM_BAD_PARAMETER;
  37.290 -    goto abort_egress;
  37.291 -  }
  37.292 -  
  37.293 -  // Close Dmi
  37.294 -	TPMTRYRETURN(close_dmi( dmi_res ));
  37.295 -  
  37.296 -  status=TPM_SUCCESS;    
  37.297 -  goto egress;
  37.298 -  
  37.299 - abort_egress:
  37.300 - egress:
  37.301 -  
  37.302 -  return status;
  37.303 -}
  37.304 -
  37.305 -TPM_RESULT VTPM_Handle_Delete_DMI( const buffer_t *param_buf) {
  37.306 -  
  37.307 -  TPM_RESULT status=TPM_FAIL;
  37.308 -  VTPM_DMI_RESOURCE *dmi_res=NULL;
  37.309 -  UINT32 dmi_id;
  37.310 -    
  37.311 -  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
  37.312 -    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.\n");
  37.313 -    status = TPM_BAD_PARAMETER;
  37.314 -    goto abort_egress;
  37.315 -  }
  37.316 -  
  37.317 -  BSG_UnpackList( param_buf->bytes, 1,
  37.318 -		  BSG_TYPE_UINT32, &dmi_id);
  37.319 -  
  37.320 -  vtpmloginfo(VTPM_LOG_VTPM, "Deleting DMI %d.\n", dmi_id);    
  37.321 -  
  37.322 -  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_remove(vtpm_globals->dmi_map, &dmi_id);
  37.323 -  if (dmi_res == NULL) {
  37.324 -    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-existent DMI.\n");
  37.325 -    status = TPM_BAD_PARAMETER;
  37.326 -    goto abort_egress;
  37.327 -  }
  37.328 -  
  37.329 -	//TODO: Automatically delete file dmi_res->NVMLocation
  37.330 -  
  37.331 -  // Close DMI first
  37.332 -  TPMTRYRETURN(close_dmi( dmi_res ));
  37.333 -	free ( dmi_res );
  37.334 -	
  37.335 -  status=TPM_SUCCESS;    
  37.336 -  goto egress;
  37.337 -  
  37.338 - abort_egress:
  37.339 - egress:
  37.340 -  
  37.341 -  return status;
  37.342 -}
  37.343 +// ===================================================================
  37.344 +// 
  37.345 +// Copyright (c) 2005, Intel Corp.
  37.346 +// All rights reserved.
  37.347 +//
  37.348 +// Redistribution and use in source and binary forms, with or without 
  37.349 +// modification, are permitted provided that the following conditions 
  37.350 +// are met:
  37.351 +//
  37.352 +//   * Redistributions of source code must retain the above copyright 
  37.353 +//     notice, this list of conditions and the following disclaimer.
  37.354 +//   * Redistributions in binary form must reproduce the above 
  37.355 +//     copyright notice, this list of conditions and the following 
  37.356 +//     disclaimer in the documentation and/or other materials provided 
  37.357 +//     with the distribution.
  37.358 +//   * Neither the name of Intel Corporation nor the names of its 
  37.359 +//     contributors may be used to endorse or promote products derived
  37.360 +//     from this software without specific prior written permission.
  37.361 +//
  37.362 +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
  37.363 +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
  37.364 +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
  37.365 +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
  37.366 +// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  37.367 +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  37.368 +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
  37.369 +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  37.370 +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
  37.371 +// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  37.372 +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  37.373 +// OF THE POSSIBILITY OF SUCH DAMAGE.
  37.374 +// ===================================================================
  37.375 +// 
  37.376 +//   dmictl.c
  37.377 +// 
  37.378 +//     Functions for creating and destroying DMIs
  37.379 +//
  37.380 +// ==================================================================
  37.381 +
  37.382 +#include <stdio.h>
  37.383 +#include <unistd.h>
  37.384 +#include <string.h>
  37.385 +
  37.386 +#ifndef VTPM_MUTLI_VM
  37.387 + #include <sys/types.h>
  37.388 + #include <sys/stat.h>
  37.389 + #include <fcntl.h>
  37.390 + #include <signal.h>
  37.391 + #include <wait.h>
  37.392 +#endif
  37.393 +
  37.394 +#include "vtpmpriv.h"
  37.395 +#include "bsg.h"
  37.396 +#include "buffer.h"
  37.397 +#include "log.h"
  37.398 +#include "hashtable.h"
  37.399 +#include "hashtable_itr.h"
  37.400 +
  37.401 +#define TPM_EMULATOR_PATH "/usr/bin/vtpmd"
  37.402 +
  37.403 +TPM_RESULT close_dmi( VTPM_DMI_RESOURCE *dmi_res) {
  37.404 +  TPM_RESULT status = TPM_FAIL;
  37.405 +  
  37.406 +  if (dmi_res == NULL) 
  37.407 +    return TPM_SUCCESS;
  37.408 +
  37.409 +  status = TCS_CloseContext(dmi_res->TCSContext);
  37.410 +  free ( dmi_res->NVMLocation );
  37.411 +  dmi_res->connected = FALSE;
  37.412 +
  37.413 +#ifndef VTPM_MULTI_VM	
  37.414 +  free(dmi_res->guest_tx_fname);
  37.415 +  free(dmi_res->vtpm_tx_fname);
  37.416 +	  
  37.417 +  close(dmi_res->guest_tx_fh); dmi_res->guest_tx_fh = -1;
  37.418 +  close(dmi_res->vtpm_tx_fh);  dmi_res->vtpm_tx_fh = -1; 
  37.419 +		
  37.420 + #ifndef MANUAL_DM_LAUNCH
  37.421 +  if (dmi_res->dmi_id != VTPM_CTL_DM) {
  37.422 +    if (dmi_res->dmi_pid != 0) {
  37.423 +      vtpmloginfo(VTPM_LOG_VTPM, "Killing dmi on pid %d.\n", dmi_res->dmi_pid);
  37.424 +      if (kill(dmi_res->dmi_pid, SIGKILL) !=0) {
  37.425 +        vtpmloginfo(VTPM_LOG_VTPM, "DMI on pid %d is already dead.\n", dmi_res->dmi_pid);
  37.426 +      } else if (waitpid(dmi_res->dmi_pid, NULL, 0) != dmi_res->dmi_pid) {
  37.427 +        vtpmlogerror(VTPM_LOG_VTPM, "DMI on pid %d failed to stop.\n", dmi_res->dmi_pid);
  37.428 +        status = TPM_FAIL;
  37.429 +      }
  37.430 +    } else { 
  37.431 +      vtpmlogerror(VTPM_LOG_VTPM, "Could not kill dmi because it's pid was 0.\n");
  37.432 +      status = TPM_FAIL;
  37.433 +    }
  37.434 +  }
  37.435 + #endif
  37.436 +#endif
  37.437 +
  37.438 +  return status;
  37.439 +}
  37.440 +	
  37.441 +TPM_RESULT VTPM_Handle_New_DMI( const buffer_t *param_buf) {
  37.442 +  
  37.443 +  VTPM_DMI_RESOURCE *new_dmi=NULL;
  37.444 +  TPM_RESULT status=TPM_FAIL;
  37.445 +  BYTE type;
  37.446 +  UINT32 dmi_id, domain_id, *dmi_id_key; 
  37.447 +
  37.448 +#ifndef VTPM_MULTI_VM
  37.449 +  int fh;
  37.450 +  char dmi_id_str[11]; // UINT32s are up to 10 digits + NULL
  37.451 +  struct stat file_info;
  37.452 +#endif
  37.453 +  
  37.454 +  if (param_buf == NULL) { // Assume creation of Dom 0 control
  37.455 +    type = 0;
  37.456 +    domain_id = VTPM_CTL_DM;
  37.457 +    dmi_id = VTPM_CTL_DM;
  37.458 +  } else if (buffer_len(param_buf) != sizeof(BYTE) + sizeof(UINT32) *2) {
  37.459 +    vtpmloginfo(VTPM_LOG_VTPM, "New DMI command wrong length: %d.\n", buffer_len(param_buf));
  37.460 +    status = TPM_BAD_PARAMETER;
  37.461 +    goto abort_egress;
  37.462 +  } else {
  37.463 +    BSG_UnpackList( param_buf->bytes, 3,
  37.464 +		    BSG_TYPE_BYTE, &type,
  37.465 +		    BSG_TYPE_UINT32, &domain_id,
  37.466 +		    BSG_TYPE_UINT32,  &dmi_id);
  37.467 +  }
  37.468 +  
  37.469 +  new_dmi = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, &dmi_id);
  37.470 +  if (new_dmi == NULL) { 
  37.471 +    vtpmloginfo(VTPM_LOG_VTPM, "Creating new DMI instance %d attached on domain %d.\n", dmi_id, domain_id);
  37.472 +    // Brand New DMI. Initialize the persistent pieces
  37.473 +    if ((new_dmi = (VTPM_DMI_RESOURCE *) malloc (sizeof(VTPM_DMI_RESOURCE))) == NULL) {
  37.474 +      status = TPM_RESOURCES;
  37.475 +      goto abort_egress;
  37.476 +    }
  37.477 +    memset(new_dmi, 0, sizeof(VTPM_DMI_RESOURCE));
  37.478 +    new_dmi->dmi_id = dmi_id;
  37.479 +    new_dmi->connected = FALSE;
  37.480 +    
  37.481 +    if ((dmi_id_key = (UINT32 *) malloc (sizeof(UINT32))) == NULL) {
  37.482 +      status = TPM_RESOURCES;
  37.483 +      goto abort_egress;
  37.484 +    }      
  37.485 +    *dmi_id_key = new_dmi->dmi_id;
  37.486 +    
  37.487 +    // install into map
  37.488 +    if (!hashtable_insert(vtpm_globals->dmi_map, dmi_id_key, new_dmi)){
  37.489 +      free(new_dmi);
  37.490 +      free(dmi_id_key);
  37.491 +      status = TPM_FAIL;
  37.492 +      goto egress;
  37.493 +    }
  37.494 +    
  37.495 +  } else 
  37.496 +    vtpmloginfo(VTPM_LOG_VTPM, "Re-attaching DMI instance %d on domain %d .\n", dmi_id, domain_id);
  37.497 +  
  37.498 +  if (new_dmi->connected) {
  37.499 +    vtpmlogerror(VTPM_LOG_VTPM, "Attempt to re-attach, currently attached instance %d. Ignoring\n", dmi_id);
  37.500 +    status = TPM_BAD_PARAMETER;
  37.501 +    goto egress;
  37.502 +  }
  37.503 +  
  37.504 +  // Initialize the Non-persistent pieces
  37.505 +  new_dmi->dmi_domain_id = domain_id;
  37.506 +  new_dmi->NVMLocation = NULL;
  37.507 +  
  37.508 +  new_dmi->TCSContext = 0;
  37.509 +  TPMTRYRETURN( TCS_OpenContext(&new_dmi->TCSContext) );
  37.510 +  
  37.511 +  new_dmi->NVMLocation = (char *) malloc(11 + strlen(DMI_NVM_FILE));
  37.512 +  sprintf(new_dmi->NVMLocation, DMI_NVM_FILE, (uint32_t) new_dmi->dmi_id);
  37.513 +  
  37.514 +  // Measure DMI
  37.515 +  // FIXME: This will measure DMI. Until then use a fixed DMI_Measurement value
  37.516 +  /*
  37.517 +  fh = open(TPM_EMULATOR_PATH, O_RDONLY);
  37.518 +  stat_ret = fstat(fh, &file_stat);
  37.519 +  if (stat_ret == 0) 
  37.520 +    dmi_size = file_stat.st_size;
  37.521 +  else {
  37.522 +      vtpmlogerror(VTPM_LOG_VTPM, "Could not open tpm_emulator!!\n");
  37.523 +    status = TPM_IOERROR;
  37.524 +    goto abort_egress;
  37.525 +  }
  37.526 +  dmi_buffer
  37.527 +  */
  37.528 +  memset(&new_dmi->DMI_measurement, 0xcc, sizeof(TPM_DIGEST));
  37.529 +  
  37.530 +#ifndef VTPM_MULTI_VM
  37.531 +  if (dmi_id != VTPM_CTL_DM) {
  37.532 +    // Create a pair of fifo pipes
  37.533 +    if( (new_dmi->guest_tx_fname = (char *) malloc(11 + strlen(GUEST_TX_FIFO))) == NULL){ 
  37.534 +      status = TPM_RESOURCES;
  37.535 +      goto abort_egress;
  37.536 +    }
  37.537 +    sprintf(new_dmi->guest_tx_fname, GUEST_TX_FIFO, (uint32_t) dmi_id);
  37.538 +    
  37.539 +    if ((new_dmi->vtpm_tx_fname = (char *) malloc(11 + strlen(VTPM_TX_FIFO))) == NULL) {
  37.540 +      status = TPM_RESOURCES;
  37.541 +      goto abort_egress;
  37.542 +    }
  37.543 +    sprintf(new_dmi->vtpm_tx_fname, VTPM_TX_FIFO, (uint32_t) dmi_id);
  37.544 +    
  37.545 +    new_dmi->guest_tx_fh = -1;
  37.546 +    new_dmi->vtpm_tx_fh= -1;
  37.547 +    
  37.548 +    if ( stat(new_dmi->guest_tx_fname, &file_info) == -1) {
  37.549 +      if ( mkfifo(new_dmi->guest_tx_fname, S_IWUSR | S_IRUSR ) ){
  37.550 +	vtpmlogerror(VTPM_LOG_VTPM, "Failed to create dmi fifo.\n");
  37.551 +	status = TPM_IOERROR;
  37.552 +	goto abort_egress;
  37.553 +      }
  37.554 +    }
  37.555 +            
  37.556 +    if ( (fh = open(new_dmi->vtpm_tx_fname, O_RDWR)) == -1) {
  37.557 +      if ( mkfifo(new_dmi->vtpm_tx_fname, S_IWUSR | S_IRUSR ) ) {
  37.558 +	vtpmlogerror(VTPM_LOG_VTPM, "Failed to create dmi fifo.\n");
  37.559 +	status = TPM_IOERROR;
  37.560 +	goto abort_egress;
  37.561 +      }
  37.562 +    }
  37.563 +                
  37.564 +    // Launch DMI
  37.565 +    sprintf(dmi_id_str, "%d", (int) dmi_id);
  37.566 +#ifdef MANUAL_DM_LAUNCH
  37.567 +    vtpmlogerror(VTPM_LOG_VTPM, "FAKING starting vtpm with dmi=%s\n", dmi_id_str);
  37.568 +    new_dmi->dmi_pid = 0;
  37.569 +#else
  37.570 +    pid_t pid = fork();
  37.571 +    
  37.572 +    if (pid == -1) {
  37.573 +      vtpmlogerror(VTPM_LOG_VTPM, "Could not fork to launch vtpm\n");
  37.574 +      status = TPM_RESOURCES;
  37.575 +      goto abort_egress;
  37.576 +    } else if (pid == 0) {
  37.577 +      if ( stat(new_dmi->NVMLocation, &file_info) == -1)
  37.578 +	execl (TPM_EMULATOR_PATH, "vtmpd", "clear", dmi_id_str, NULL);
  37.579 +      else 
  37.580 +	execl (TPM_EMULATOR_PATH, "vtpmd", "save", dmi_id_str, NULL);
  37.581 +			
  37.582 +      // Returning from these at all is an error.
  37.583 +      vtpmlogerror(VTPM_LOG_VTPM, "Could not exec to launch vtpm\n");
  37.584 +    } else {
  37.585 +      new_dmi->dmi_pid = pid;
  37.586 +      vtpmloginfo(VTPM_LOG_VTPM, "Launching DMI on PID = %d\n", pid);
  37.587 +    }
  37.588 +#endif // MANUAL_DM_LAUNCH
  37.589 +  }
  37.590 +#else // VTPM_MUTLI_VM
  37.591 +  // FIXME: Measure DMI through call to Measurement agent in platform.
  37.592 +#endif 
  37.593 +	
  37.594 +  vtpm_globals->DMI_table_dirty = TRUE;
  37.595 +  new_dmi->connected = TRUE;  
  37.596 +  status=TPM_SUCCESS;
  37.597 +  goto egress;
  37.598 +  
  37.599 + abort_egress:
  37.600 +  vtpmlogerror(VTPM_LOG_VTPM, "Failed to create DMI id=%d due to status=%s. Cleaning.\n", dmi_id, tpm_get_error_name(status));
  37.601 +  close_dmi( new_dmi );
  37.602 +	
  37.603 + egress:
  37.604 +  return status;
  37.605 +}
  37.606 +
  37.607 +TPM_RESULT VTPM_Handle_Close_DMI( const buffer_t *param_buf) {
  37.608 +  
  37.609 +  TPM_RESULT status=TPM_FAIL;
  37.610 +  VTPM_DMI_RESOURCE *dmi_res=NULL;
  37.611 +  UINT32 dmi_id;
  37.612 +  
  37.613 +  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
  37.614 +    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.");
  37.615 +    status = TPM_BAD_PARAMETER;
  37.616 +    goto abort_egress;
  37.617 +  }
  37.618 +  
  37.619 +  BSG_UnpackList( param_buf->bytes, 1,
  37.620 +		  BSG_TYPE_UINT32, &dmi_id);
  37.621 +  
  37.622 +  vtpmloginfo(VTPM_LOG_VTPM, "Closing DMI %d.\n", dmi_id);
  37.623 +  
  37.624 +  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_search(vtpm_globals->dmi_map, &dmi_id);
  37.625 +  if (dmi_res == NULL ) {
  37.626 +    vtpmlogerror(VTPM_LOG_VTPM, "Trying to close nonexistent DMI.\n");
  37.627 +    status = TPM_BAD_PARAMETER;
  37.628 +    goto abort_egress;
  37.629 +  }
  37.630 +	
  37.631 +	if (!dmi_res->connected) {
  37.632 +    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-connected DMI.\n");
  37.633 +    status = TPM_BAD_PARAMETER;
  37.634 +    goto abort_egress;
  37.635 +  }
  37.636 +  
  37.637 +  // Close Dmi
  37.638 +	TPMTRYRETURN(close_dmi( dmi_res ));
  37.639 +  
  37.640 +  status=TPM_SUCCESS;    
  37.641 +  goto egress;
  37.642 +  
  37.643 + abort_egress:
  37.644 + egress:
  37.645 +  
  37.646 +  return status;
  37.647 +}
  37.648 +
  37.649 +TPM_RESULT VTPM_Handle_Delete_DMI( const buffer_t *param_buf) {
  37.650 +  
  37.651 +  TPM_RESULT status=TPM_FAIL;
  37.652 +  VTPM_DMI_RESOURCE *dmi_res=NULL;
  37.653 +  UINT32 dmi_id;
  37.654 +    
  37.655 +  if ((param_buf == NULL) || (buffer_len(param_buf) != sizeof(UINT32)) ) {
  37.656 +    vtpmlogerror(VTPM_LOG_VTPM, "Closing DMI has bad size.\n");
  37.657 +    status = TPM_BAD_PARAMETER;
  37.658 +    goto abort_egress;
  37.659 +  }
  37.660 +  
  37.661 +  BSG_UnpackList( param_buf->bytes, 1,
  37.662 +		  BSG_TYPE_UINT32, &dmi_id);
  37.663 +  
  37.664 +  vtpmloginfo(VTPM_LOG_VTPM, "Deleting DMI %d.\n", dmi_id);    
  37.665 +  
  37.666 +  dmi_res = (VTPM_DMI_RESOURCE *) hashtable_remove(vtpm_globals->dmi_map, &dmi_id);
  37.667 +  if (dmi_res == NULL) {
  37.668 +    vtpmlogerror(VTPM_LOG_VTPM, "Closing non-existent DMI.\n");
  37.669 +    status = TPM_BAD_PARAMETER;
  37.670 +    goto abort_egress;
  37.671 +  }
  37.672 +  
  37.673 +	//TODO: Automatically delete file dmi_res->NVMLocation
  37.674 +  
  37.675 +  // Close DMI first
  37.676 +  TPMTRYRETURN(close_dmi( dmi_res ));
  37.677 +	free ( dmi_res );
  37.678 +	
  37.679 +  status=TPM_SUCCESS;    
  37.680 +  goto egress;
  37.681 +  
  37.682 + abort_egress:
  37.683 + egress:
  37.684 +  
  37.685 +  return status;
  37.686 +}
    38.1 --- a/tools/vtpm_manager/manager/securestorage.c	Tue Sep 20 09:43:29 2005 +0000
    38.2 +++ b/tools/vtpm_manager/manager/securestorage.c	Tue Sep 20 09:43:46 2005 +0000
    38.3 @@ -1,401 +1,401 @@
    38.4 -// ===================================================================
    38.5 -// 
    38.6 -// Copyright (c) 2005, Intel Corp.
    38.7 -// All rights reserved.
    38.8 -//
    38.9 -// Redistribution and use in source and binary forms, with or without 
   38.10 -// modification, are permitted provided that the following conditions 
   38.11 -// are met:
   38.12 -//
   38.13 -//   * Redistributions of source code must retain the above copyright 
   38.14 -//     notice, this list of conditions and the following disclaimer.
   38.15 -//   * Redistributions in binary form must reproduce the above 
   38.16 -//     copyright notice, this list of conditions and the following 
   38.17 -//     disclaimer in the documentation and/or other materials provided 
   38.18 -//     with the distribution.
   38.19 -//   * Neither the name of Intel Corporation nor the names of its 
   38.20 -//     contributors may be used to endorse or promote products derived
   38.21 -//     from this software without specific prior written permission.
   38.22 -//
   38.23 -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
   38.24 -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
   38.25 -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
   38.26 -// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
   38.27 -// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   38.28 -// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   38.29 -// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
   38.30 -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   38.31 -// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
   38.32 -// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   38.33 -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
   38.34 -// OF THE POSSIBILITY OF SUCH DAMAGE.
   38.35 -// ===================================================================
   38.36 -// 
   38.37 -// securestorage.c
   38.38 -// 
   38.39 -//  Functions regarding securely storing DMI secrets.
   38.40 -//
   38.41 -// ==================================================================
   38.42 -
   38.43 -#include <sys/types.h>
   38.44 -#include <sys/stat.h>
   38.45 -#include <fcntl.h>
   38.46 -#include <unistd.h>
   38.47 -#include <string.h>
   38.48 -
   38.49 -#include "tcg.h"
   38.50 -#include "vtpm_manager.h"
   38.51 -#include "vtpmpriv.h"
   38.52 -#include "vtsp.h"
   38.53 -#include "bsg.h"
   38.54 -#include "crypto.h"
   38.55 -#include "hashtable.h"
   38.56 -#include "hashtable_itr.h"
   38.57 -#include "buffer.h"
   38.58 -#include "log.h"
   38.59 -
   38.60 -TPM_RESULT VTPM_Handle_Save_NVM(VTPM_DMI_RESOURCE *myDMI, 
   38.61 -				const buffer_t *inbuf, 
   38.62 -				buffer_t *outbuf) {
   38.63 -  
   38.64 -  TPM_RESULT status = TPM_SUCCESS;
   38.65 -  symkey_t    symkey;
   38.66 -  buffer_t    state_cipher = NULL_BUF,
   38.67 -              symkey_cipher = NULL_BUF;
   38.68 -  int fh;
   38.69 -  long bytes_written;
   38.70 -  BYTE *sealed_NVM=NULL;
   38.71 -  UINT32 sealed_NVM_size, i;
   38.72 -  struct pack_constbuf_t symkey_cipher32, state_cipher32;
   38.73 -  
   38.74 -  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Save_NVMing[%d]: 0x", buffer_len(inbuf));
   38.75 -  for (i=0; i< buffer_len(inbuf); i++)
   38.76 -    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", inbuf->bytes[i]);
   38.77 -  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
   38.78 -  
   38.79 -  // Generate a sym key and encrypt state with it
   38.80 -  TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_genkey (&symkey) );
   38.81 -  TPMTRY(TPM_ENCRYPT_ERROR, Crypto_symcrypto_encrypt (&symkey, inbuf, &state_cipher) );
   38.82 -  
   38.83 -  // Encrypt symmetric key
   38.84 -  TPMTRYRETURN( VTSP_Bind(    &vtpm_globals->storageKey, 
   38.85 -			      &symkey.key, 
   38.86 -			      &symkey_cipher) );
   38.87 -  
   38.88 -  // Create output blob: symkey_size + symkey_cipher + state_cipher_size + state_cipher
   38.89 -  
   38.90 -  symkey_cipher32.size = buffer_len(&symkey_cipher);
   38.91 -  symkey_cipher32.data = symkey_cipher.bytes;
   38.92 -  
   38.93 -  state_cipher32.size = buffer_len(&state_cipher);
   38.94 -  state_cipher32.data = state_cipher.bytes;
   38.95 -  
   38.96 -  sealed_NVM = (BYTE *) malloc( 2 * sizeof(UINT32) + symkey_cipher32.size + state_cipher32.size);
   38.97 -  
   38.98 -  sealed_NVM_size = BSG_PackList(sealed_NVM, 2,
   38.99 -				 BSG_TPM_SIZE32_DATA, &symkey_cipher32,
  38.100 -				 BSG_TPM_SIZE32_DATA, &state_cipher32);
  38.101 -  
  38.102 -  // Mark DMI Table so new save state info will get pushed to disk on return.
  38.103 -  vtpm_globals->DMI_table_dirty = TRUE;
  38.104 -  
  38.105 -  // Write sealed blob off disk from NVMLocation
  38.106 -  // TODO: How to properly return from these. Do we care if we return failure
  38.107 -  //       after writing the file? We can't get the old one back.
  38.108 -  // TODO: Backup old file and try and recover that way.
  38.109 -  fh = open(myDMI->NVMLocation, O_WRONLY | O_CREAT, S_IREAD | S_IWRITE);
  38.110 -  if ( (bytes_written = write(fh, sealed_NVM, sealed_NVM_size) ) != (long) sealed_NVM_size) {
  38.111 -    vtpmlogerror(VTPM_LOG_VTPM, "We just overwrote a DMI_NVM and failed to finish. %ld/%ld bytes.\n", bytes_written, (long)sealed_NVM_size);
  38.112 -    status = TPM_IOERROR;
  38.113 -    goto abort_egress;
  38.114 -  }
  38.115 -  close(fh);
  38.116 -  
  38.117 -  Crypto_SHA1Full (sealed_NVM, sealed_NVM_size, (BYTE *) &myDMI->NVM_measurement);   
  38.118 -  
  38.119 -  vtpmloginfo(VTPM_LOG_VTPM, "Saved %d bytes of E(symkey) + %d bytes of E(NVM)\n", buffer_len(&symkey_cipher), buffer_len(&state_cipher));
  38.120 -  goto egress;
  38.121 -  
  38.122 - abort_egress:
  38.123 -  vtpmlogerror(VTPM_LOG_VTPM, "Failed to load NVM\n.");
  38.124 -  
  38.125 - egress:
  38.126 -  
  38.127 -  buffer_free ( &state_cipher);
  38.128 -  buffer_free ( &symkey_cipher);
  38.129 -  free(sealed_NVM);
  38.130 -  Crypto_symcrypto_freekey (&symkey);
  38.131 -  
  38.132 -  return status;
  38.133 -}
  38.134 -
  38.135 -
  38.136 -/* inbuf = null outbuf = sealed blob size, sealed blob.*/
  38.137 -TPM_RESULT VTPM_Handle_Load_NVM(VTPM_DMI_RESOURCE *myDMI, 
  38.138 -				const buffer_t *inbuf, 
  38.139 -				buffer_t *outbuf) {
  38.140 -  
  38.141 -  TPM_RESULT status = TPM_SUCCESS;
  38.142 -  symkey_t    symkey;
  38.143 -  buffer_t    state_cipher = NULL_BUF, 
  38.144 -              symkey_clear = NULL_BUF, 
  38.145 -              symkey_cipher = NULL_BUF;
  38.146 -  struct pack_buf_t symkey_cipher32, state_cipher32;
  38.147 -  
  38.148 -  UINT32 sealed_NVM_size;
  38.149 -  BYTE *sealed_NVM = NULL;
  38.150 -  long fh_size;
  38.151 -  int fh, stat_ret, i;
  38.152 -  struct stat file_stat;
  38.153 -  TPM_DIGEST sealedNVMHash;
  38.154 -  
  38.155 -  memset(&symkey, 0, sizeof(symkey_t));
  38.156 -  
  38.157 -  if (myDMI->NVMLocation == NULL) {
  38.158 -    vtpmlogerror(VTPM_LOG_VTPM, "Unable to load NVM because the file name NULL.\n");
  38.159 -    status = TPM_AUTHFAIL;
  38.160 -    goto abort_egress;
  38.161 -  }
  38.162 -  
  38.163 -  //Read sealed blob off disk from NVMLocation
  38.164 -  fh = open(myDMI->NVMLocation, O_RDONLY);
  38.165 -  stat_ret = fstat(fh, &file_stat);
  38.166 -  if (stat_ret == 0) 
  38.167 -    fh_size = file_stat.st_size;
  38.168 -  else {
  38.169 -    status = TPM_IOERROR;
  38.170 -    goto abort_egress;
  38.171 -  }
  38.172 -  
  38.173 -  sealed_NVM = (BYTE *) malloc(fh_size);
  38.174 -  if (read(fh, sealed_NVM, fh_size) != fh_size) {
  38.175 -    status = TPM_IOERROR;
  38.176 -    goto abort_egress;
  38.177 -  }
  38.178 -  close(fh);
  38.179 -  
  38.180 -  vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Load_NVMing[%ld]: 0x", fh_size);
  38.181 -  for (i=0; i< fh_size; i++)
  38.182 -    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", sealed_NVM[i]);
  38.183 -  vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
  38.184 -  
  38.185 -  sealed_NVM_size = BSG_UnpackList(sealed_NVM, 2,
  38.186 -				   BSG_TPM_SIZE32_DATA, &symkey_cipher32,
  38.187 -				   BSG_TPM_SIZE32_DATA, &state_cipher32);
  38.188 -  
  38.189 -  TPMTRYRETURN( buffer_init_convert (&symkey_cipher, 
  38.190 -				     symkey_cipher32.size, 
  38.191 -				     symkey_cipher32.data) );
  38.192 -  
  38.193 -  TPMTRYRETURN( buffer_init_convert (&state_cipher, 
  38.194 -				     state_cipher32.size, 
  38.195 -				     state_cipher32.data) );
  38.196 -  
  38.197 -  Crypto_SHA1Full(sealed_NVM, sealed_NVM_size, (BYTE *) &sealedNVMHash);    
  38.198 -  
  38.199 -  // Verify measurement of sealed blob.
  38.200 -  if (memcmp(&sealedNVMHash, &myDMI->NVM_measurement, sizeof(TPM_DIGEST)) ) {
  38.201 -    vtpmlogerror(VTPM_LOG_VTPM, "VTPM LoadNVM NVM measurement check failed.\n");
  38.202 -    vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Correct hash: ");
  38.203 -    for (i=0; i< sizeof(TPM_DIGEST); i++)
  38.204 -      vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", ((BYTE*)&myDMI->NVM_measurement)[i]);
  38.205 -    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
  38.206 -
  38.207 -    vtpmloginfo(VTPM_LOG_VTPM_DEEP, "Measured hash: ");
  38.208 -    for (i=0; i< sizeof(TPM_DIGEST); i++)
  38.209 -      vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "%x ", ((BYTE*)&sealedNVMHash)[i]);
  38.210 -    vtpmloginfomore(VTPM_LOG_VTPM_DEEP, "\n");
  38.211 -    
  38.212 -    status = TPM_AUTHFAIL;
  38.213 -    goto abort_egress;
  38.214 -  }
  38.215 -  
  38.216 -  // Decrypt Symmetric Key
  38.217 -  TPMTRYRETURN( VTSP_Unbind(  myDMI->TCSContext,
  38.218 -			      vtpm_globals->storageKeyHandle,
  38.219 -			      &symkey_cipher,
  38.220 -			      (const TPM_AUTHDATA*)&vtpm_globals->storage_key_usage_auth,
  38.221 -			      &symkey_clear,
  38.222 -			      &(vtpm_globals->keyAuth) ) );
  38.223 -  
  38.224 -  //