direct-io.hg

changeset 7027:06d84bf87159

Merge latest xen-unstable into xen-ia64-unstable
author djm@kirby.fc.hp.com
date Thu Sep 22 11:42:01 2005 -0600 (2005-09-22)
parents 97dbd9524a7e 2f83ff9f6bd2
children b6ee1d1cdc93
files .hgignore Config.mk Makefile buildconfigs/Rules.mk buildconfigs/mk.linux-2.4-xenU buildconfigs/mk.linux-2.6-xen buildconfigs/mk.linux-2.6-xen0 buildconfigs/mk.linux-2.6-xenU docs/Doxyfile docs/Doxyfilter docs/Makefile docs/misc/sedf_scheduler_mini-HOWTO.txt docs/misc/vtpm.txt docs/pythfilter.py docs/src/interface.tex docs/src/interface/architecture.tex docs/src/interface/debugging.tex docs/src/interface/devices.tex docs/src/interface/further_info.tex docs/src/interface/hypercalls.tex docs/src/interface/memory.tex docs/src/interface/scheduling.tex docs/src/user.tex docs/src/user/build.tex docs/src/user/control_software.tex docs/src/user/debian.tex docs/src/user/domain_configuration.tex docs/src/user/domain_filesystem.tex docs/src/user/domain_mgmt.tex docs/src/user/glossary.tex docs/src/user/installation.tex docs/src/user/introduction.tex docs/src/user/redhat.tex docs/src/user/start_addl_dom.tex extras/mini-os/Makefile extras/mini-os/README extras/mini-os/domain_config extras/mini-os/events.c extras/mini-os/include/ctype.h extras/mini-os/include/err.h extras/mini-os/include/errno-base.h extras/mini-os/include/errno.h extras/mini-os/include/events.h extras/mini-os/include/fcntl.h extras/mini-os/include/hypervisor.h extras/mini-os/include/lib.h extras/mini-os/include/list.h extras/mini-os/include/mm.h extras/mini-os/include/os.h extras/mini-os/include/sched.h extras/mini-os/include/semaphore.h extras/mini-os/include/time.h extras/mini-os/include/traps.h extras/mini-os/include/types.h extras/mini-os/include/wait.h extras/mini-os/include/xenbus.h extras/mini-os/include/xmalloc.h extras/mini-os/kernel.c extras/mini-os/lib/printf.c extras/mini-os/lib/string.c extras/mini-os/lib/xmalloc.c extras/mini-os/mm.c extras/mini-os/sched.c extras/mini-os/time.c extras/mini-os/traps.c extras/mini-os/xenbus/Makefile extras/mini-os/xenbus/xenbus_comms.c extras/mini-os/xenbus/xenbus_comms.h extras/mini-os/xenbus/xenbus_xs.c linux-2.4-xen-sparse/Makefile linux-2.4-xen-sparse/arch/xen/Makefile linux-2.4-xen-sparse/arch/xen/boot/Makefile linux-2.4-xen-sparse/arch/xen/config.in linux-2.4-xen-sparse/arch/xen/defconfig-xen0 linux-2.4-xen-sparse/arch/xen/defconfig-xenU linux-2.4-xen-sparse/arch/xen/drivers/balloon/Makefile linux-2.4-xen-sparse/arch/xen/drivers/blkif/Makefile linux-2.4-xen-sparse/arch/xen/drivers/blkif/backend/Makefile linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/Makefile linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/common.h linux-2.4-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c linux-2.4-xen-sparse/arch/xen/drivers/console/Makefile linux-2.4-xen-sparse/arch/xen/drivers/dom0/Makefile linux-2.4-xen-sparse/arch/xen/drivers/evtchn/Makefile linux-2.4-xen-sparse/arch/xen/drivers/netif/Makefile linux-2.4-xen-sparse/arch/xen/drivers/netif/backend/Makefile linux-2.4-xen-sparse/arch/xen/drivers/netif/frontend/Makefile linux-2.4-xen-sparse/arch/xen/kernel/Makefile linux-2.4-xen-sparse/arch/xen/kernel/entry.S linux-2.4-xen-sparse/arch/xen/kernel/head.S linux-2.4-xen-sparse/arch/xen/kernel/i386_ksyms.c linux-2.4-xen-sparse/arch/xen/kernel/irq.c linux-2.4-xen-sparse/arch/xen/kernel/ldt.c linux-2.4-xen-sparse/arch/xen/kernel/pci-pc.c linux-2.4-xen-sparse/arch/xen/kernel/process.c linux-2.4-xen-sparse/arch/xen/kernel/setup.c linux-2.4-xen-sparse/arch/xen/kernel/signal.c linux-2.4-xen-sparse/arch/xen/kernel/time.c linux-2.4-xen-sparse/arch/xen/kernel/traps.c linux-2.4-xen-sparse/arch/xen/lib/Makefile linux-2.4-xen-sparse/arch/xen/lib/delay.c linux-2.4-xen-sparse/arch/xen/mm/Makefile linux-2.4-xen-sparse/arch/xen/mm/fault.c linux-2.4-xen-sparse/arch/xen/mm/init.c linux-2.4-xen-sparse/arch/xen/mm/ioremap.c linux-2.4-xen-sparse/arch/xen/vmlinux.lds linux-2.4-xen-sparse/drivers/block/ll_rw_blk.c linux-2.4-xen-sparse/drivers/char/Makefile linux-2.4-xen-sparse/drivers/char/mem.c linux-2.4-xen-sparse/drivers/char/tty_io.c linux-2.4-xen-sparse/drivers/scsi/aic7xxx/Makefile linux-2.4-xen-sparse/include/asm-xen/bugs.h linux-2.4-xen-sparse/include/asm-xen/desc.h linux-2.4-xen-sparse/include/asm-xen/fixmap.h linux-2.4-xen-sparse/include/asm-xen/highmem.h linux-2.4-xen-sparse/include/asm-xen/hw_irq.h linux-2.4-xen-sparse/include/asm-xen/io.h linux-2.4-xen-sparse/include/asm-xen/irq.h linux-2.4-xen-sparse/include/asm-xen/keyboard.h linux-2.4-xen-sparse/include/asm-xen/mmu_context.h linux-2.4-xen-sparse/include/asm-xen/module.h linux-2.4-xen-sparse/include/asm-xen/page.h linux-2.4-xen-sparse/include/asm-xen/pci.h linux-2.4-xen-sparse/include/asm-xen/pgalloc.h linux-2.4-xen-sparse/include/asm-xen/pgtable-2level.h linux-2.4-xen-sparse/include/asm-xen/pgtable.h linux-2.4-xen-sparse/include/asm-xen/processor.h linux-2.4-xen-sparse/include/asm-xen/queues.h linux-2.4-xen-sparse/include/asm-xen/segment.h linux-2.4-xen-sparse/include/asm-xen/smp.h linux-2.4-xen-sparse/include/asm-xen/system.h linux-2.4-xen-sparse/include/asm-xen/vga.h linux-2.4-xen-sparse/include/asm-xen/xor.h linux-2.4-xen-sparse/include/linux/blk.h linux-2.4-xen-sparse/include/linux/highmem.h linux-2.4-xen-sparse/include/linux/irq.h linux-2.4-xen-sparse/include/linux/mm.h linux-2.4-xen-sparse/include/linux/sched.h linux-2.4-xen-sparse/include/linux/skbuff.h linux-2.4-xen-sparse/include/linux/timer.h linux-2.4-xen-sparse/kernel/time.c linux-2.4-xen-sparse/kernel/timer.c linux-2.4-xen-sparse/mkbuildtree linux-2.4-xen-sparse/mm/highmem.c linux-2.4-xen-sparse/mm/memory.c linux-2.4-xen-sparse/mm/mprotect.c linux-2.4-xen-sparse/mm/mremap.c linux-2.4-xen-sparse/mm/page_alloc.c linux-2.4-xen-sparse/net/core/skbuff.c linux-2.6-xen-sparse/arch/xen/Kconfig linux-2.6-xen-sparse/arch/xen/Kconfig.drivers linux-2.6-xen-sparse/arch/xen/Makefile linux-2.6-xen-sparse/arch/xen/boot/Makefile linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/i386/Kconfig linux-2.6-xen-sparse/arch/xen/i386/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c linux-2.6-xen-sparse/arch/xen/i386/kernel/apic.c linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S linux-2.6-xen-sparse/arch/xen/i386/kernel/head.S linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c linux-2.6-xen-sparse/arch/xen/i386/kernel/init_task.c linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c linux-2.6-xen-sparse/arch/xen/i386/kernel/ioport.c linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c linux-2.6-xen-sparse/arch/xen/i386/kernel/ldt.c linux-2.6-xen-sparse/arch/xen/i386/kernel/microcode.c linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c linux-2.6-xen-sparse/arch/xen/i386/kernel/vsyscall.S linux-2.6-xen-sparse/arch/xen/i386/mach-default/Makefile linux-2.6-xen-sparse/arch/xen/i386/mm/Makefile linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6-xen-sparse/arch/xen/i386/mm/init.c linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile linux-2.6-xen-sparse/arch/xen/i386/pci/i386.c linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c linux-2.6-xen-sparse/arch/xen/kernel/Makefile linux-2.6-xen-sparse/arch/xen/kernel/devmem.c linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c linux-2.6-xen-sparse/arch/xen/kernel/fixup.c linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c linux-2.6-xen-sparse/arch/xen/kernel/reboot.c linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c linux-2.6-xen-sparse/arch/xen/kernel/smp.c linux-2.6-xen-sparse/arch/xen/kernel/xen_proc.c linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig linux-2.6-xen-sparse/arch/xen/x86_64/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c linux-2.6-xen-sparse/arch/xen/x86_64/ia32/vsyscall-int80.S linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/kernel/acpi/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/pci-nommu.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/signal.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S linux-2.6-xen-sparse/arch/xen/x86_64/mm/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/pageattr.c linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile-BUS linux-2.6-xen-sparse/drivers/Makefile linux-2.6-xen-sparse/drivers/acpi/tables.c linux-2.6-xen-sparse/drivers/char/mem.c linux-2.6-xen-sparse/drivers/char/tpm/Kconfig.domU linux-2.6-xen-sparse/drivers/char/tpm/Makefile linux-2.6-xen-sparse/drivers/char/tpm/tpm_nopci.c linux-2.6-xen-sparse/drivers/char/tpm/tpm_nopci.h linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c linux-2.6-xen-sparse/drivers/char/tty_io.c linux-2.6-xen-sparse/drivers/xen/Makefile linux-2.6-xen-sparse/drivers/xen/balloon/Makefile linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c linux-2.6-xen-sparse/drivers/xen/blkback/Makefile linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6-xen-sparse/drivers/xen/blkback/common.h linux-2.6-xen-sparse/drivers/xen/blkback/interface.c linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c linux-2.6-xen-sparse/drivers/xen/blkfront/Kconfig linux-2.6-xen-sparse/drivers/xen/blkfront/Makefile linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6-xen-sparse/drivers/xen/blkfront/block.h linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c linux-2.6-xen-sparse/drivers/xen/blktap/Makefile linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c linux-2.6-xen-sparse/drivers/xen/blktap/common.h linux-2.6-xen-sparse/drivers/xen/blktap/interface.c linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c linux-2.6-xen-sparse/drivers/xen/console/Makefile linux-2.6-xen-sparse/drivers/xen/console/console.c linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.h linux-2.6-xen-sparse/drivers/xen/evtchn/Makefile linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c linux-2.6-xen-sparse/drivers/xen/netback/Makefile linux-2.6-xen-sparse/drivers/xen/netback/common.h linux-2.6-xen-sparse/drivers/xen/netback/interface.c linux-2.6-xen-sparse/drivers/xen/netback/loopback.c linux-2.6-xen-sparse/drivers/xen/netback/netback.c linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c linux-2.6-xen-sparse/drivers/xen/netfront/Kconfig linux-2.6-xen-sparse/drivers/xen/netfront/Makefile linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6-xen-sparse/drivers/xen/privcmd/Makefile linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6-xen-sparse/drivers/xen/tpmback/Makefile linux-2.6-xen-sparse/drivers/xen/tpmback/common.h linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c linux-2.6-xen-sparse/drivers/xen/tpmfront/Makefile linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.c linux-2.6-xen-sparse/drivers/xen/tpmfront/tpmfront.h linux-2.6-xen-sparse/drivers/xen/util.c linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c linux-2.6-xen-sparse/include/asm-generic/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/agp.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/fixmap.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/floppy.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/hw_irq.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypervisor.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/kmap_types.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/page.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/param.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pci.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-3level.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/ptrace.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/scatterlist.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/spinlock.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/swiotlb.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/tlbflush.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/vga.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/arch_hooks.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/desc.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/dma-mapping.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/fixmap.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/floppy.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hw_irq.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypervisor.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/irq.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/io_ports.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/irq_vectors.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/mach_timer.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_pre.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/param.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pci.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/segment.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/synch_bitops.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/timer.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/vga.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/xor.h linux-2.6-xen-sparse/include/asm-xen/balloon.h linux-2.6-xen-sparse/include/asm-xen/driver_util.h linux-2.6-xen-sparse/include/asm-xen/evtchn.h linux-2.6-xen-sparse/include/asm-xen/foreign_page.h linux-2.6-xen-sparse/include/asm-xen/gnttab.h linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h linux-2.6-xen-sparse/include/asm-xen/queues.h linux-2.6-xen-sparse/include/asm-xen/xen_proc.h linux-2.6-xen-sparse/include/asm-xen/xenbus.h linux-2.6-xen-sparse/include/linux/gfp.h linux-2.6-xen-sparse/include/linux/highmem.h linux-2.6-xen-sparse/include/linux/irq.h linux-2.6-xen-sparse/include/linux/mm.h linux-2.6-xen-sparse/include/linux/skbuff.h linux-2.6-xen-sparse/include/linux/tpmfe.h linux-2.6-xen-sparse/kernel/irq/manage.c linux-2.6-xen-sparse/mkbuildtree linux-2.6-xen-sparse/mm/highmem.c linux-2.6-xen-sparse/mm/memory.c linux-2.6-xen-sparse/mm/mmap.c linux-2.6-xen-sparse/mm/page_alloc.c linux-2.6-xen-sparse/net/core/dev.c linux-2.6-xen-sparse/net/core/skbuff.c patches/linux-2.6.12/i386-cpu-hotplug-updated-for-mm.patch patches/linux-2.6.12/net-csum.patch patches/linux-2.6.12/patch-2.6.12.5 patches/linux-2.6.12/rcu-nohz.patch patches/linux-2.6.12/smp-alts.patch patches/linux-2.6.12/tpm_partial_read.patch tools/Makefile tools/Rules.mk tools/blktap/Makefile tools/blktap/README.sept05 tools/blktap/blkdump.c tools/blktap/blkif.c tools/blktap/blktaplib.c tools/blktap/blktaplib.h tools/blktap/list.h tools/blktap/parallax/Makefile tools/blktap/parallax/block-async.h tools/blktap/parallax/blockstore.h tools/blktap/ublkback/Makefile tools/blktap/ublkback/ublkback.c tools/blktap/ublkback/ublkbacklib.c tools/blktap/ublkback/ublkbacklib.h tools/blktap/xenbus.c tools/check/check_brctl tools/check/check_iproute tools/check/check_logging tools/check/check_python tools/check/check_zlib_devel tools/check/check_zlib_lib tools/check/chk tools/console/Makefile tools/console/client/main.c tools/console/daemon/io.c tools/console/daemon/io.h tools/console/daemon/main.c tools/console/daemon/utils.c tools/console/daemon/utils.h tools/console/testsuite/Makefile tools/console/testsuite/README tools/console/testsuite/console-dom0.c tools/console/testsuite/console-domU.c tools/console/testsuite/procpipe.c tools/debugger/gdb/README tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/configure.in tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/linux-xen-low.c tools/debugger/gdb/gdbbuild tools/debugger/libxendebug/Makefile tools/debugger/libxendebug/xendebug.c tools/debugger/libxendebug/xendebug.h tools/debugger/pdb/Domain.ml tools/debugger/pdb/Domain.mli tools/debugger/pdb/Makefile tools/debugger/pdb/PDB.ml tools/debugger/pdb/Process.ml tools/debugger/pdb/Process.mli tools/debugger/pdb/Util.ml tools/debugger/pdb/Xen_domain.ml tools/debugger/pdb/Xen_domain.mli tools/debugger/pdb/debugger.ml tools/debugger/pdb/linux-2.6-module/Makefile tools/debugger/pdb/linux-2.6-module/debug.c tools/debugger/pdb/linux-2.6-module/module.c tools/debugger/pdb/linux-2.6-module/pdb_debug.h tools/debugger/pdb/linux-2.6-module/pdb_module.h tools/debugger/pdb/linux-2.6-patches/Makefile tools/debugger/pdb/linux-2.6-patches/i386_ksyms.patch tools/debugger/pdb/linux-2.6-patches/kdebug.patch tools/debugger/pdb/linux-2.6-patches/makefile.patch tools/debugger/pdb/linux-2.6-patches/ptrace.patch tools/debugger/pdb/linux-2.6-patches/traps.patch tools/debugger/pdb/pdb_caml_domain.c tools/debugger/pdb/pdb_caml_evtchn.c tools/debugger/pdb/pdb_caml_process.c tools/debugger/pdb/pdb_caml_xc.c tools/debugger/pdb/pdb_caml_xcs.c tools/debugger/pdb/pdb_caml_xen.h tools/debugger/pdb/pdb_xen.c tools/debugger/pdb/readme tools/debugger/pdb/server.ml tools/examples/Makefile tools/examples/README tools/examples/block-enbd tools/examples/block-file tools/examples/block-phy tools/examples/init.d/xend tools/examples/network-bridge tools/examples/vif-bridge tools/examples/vif-route tools/examples/xen-backend.agent tools/examples/xend-config.sxp tools/examples/xmexample.vmx tools/examples/xmexample1 tools/examples/xmexample2 tools/examples/xmexample3 tools/firmware/Makefile tools/firmware/acpi/acpi2_0.h tools/firmware/acpi/acpi_madt.c tools/firmware/acpi/acpi_madt.h tools/firmware/rombios/rombios.c tools/firmware/vmxassist/Makefile tools/firmware/vmxassist/acpi_madt.c tools/firmware/vmxassist/setup.c tools/firmware/vmxassist/vm86.c tools/firmware/vmxassist/vmxloader.c tools/ioemu/cpu-all.h tools/ioemu/exec.c tools/ioemu/hw/i8254.c tools/ioemu/hw/i8259.c tools/ioemu/hw/ide.c tools/ioemu/hw/ioapic.h tools/ioemu/hw/pc.c tools/ioemu/hw/pckbd.c tools/ioemu/hw/pcnet.c tools/ioemu/hw/pcnet.h tools/ioemu/hw/vga.c tools/ioemu/monitor.c tools/ioemu/target-i386-dm/Makefile tools/ioemu/target-i386-dm/helper2.c tools/ioemu/target-i386-dm/qemu-dm.debug tools/ioemu/vl.c tools/ioemu/vl.h tools/ioemu/vnc.c tools/libxc/Makefile tools/libxc/linux_boot_params.h tools/libxc/xc_core.c tools/libxc/xc_domain.c tools/libxc/xc_gnttab.c tools/libxc/xc_ia64_stubs.c tools/libxc/xc_linux_build.c tools/libxc/xc_linux_restore.c tools/libxc/xc_linux_save.c tools/libxc/xc_load_aout9.c tools/libxc/xc_load_bin.c tools/libxc/xc_load_elf.c tools/libxc/xc_misc.c tools/libxc/xc_private.c tools/libxc/xc_private.h tools/libxc/xc_ptrace.c tools/libxc/xc_ptrace_core.c tools/libxc/xc_vmx_build.c tools/libxc/xenctrl.h tools/libxc/xenguest.h tools/libxc/xg_private.c tools/libxc/xg_private.h tools/misc/Makefile tools/misc/cpuperf/Makefile tools/misc/cpuperf/cpuperf.c tools/misc/cpuperf/cpuperf_xeno.h tools/misc/mbootpack/Makefile tools/misc/mbootpack/buildimage.c tools/misc/mbootpack/mbootpack.c tools/misc/mbootpack/mbootpack.h tools/misc/xc_shadow.c tools/misc/xend tools/misc/xenperf.c tools/python/Makefile tools/python/pylintrc tools/python/setup.py tools/python/xen/lowlevel/xc/xc.c tools/python/xen/lowlevel/xs/xs.c tools/python/xen/sv/CreateDomain.py tools/python/xen/sv/DomInfo.py tools/python/xen/sv/GenTabbed.py tools/python/xen/sv/HTMLBase.py tools/python/xen/sv/Main.py tools/python/xen/sv/NodeInfo.py tools/python/xen/sv/RestoreDomain.py tools/python/xen/sv/Wizard.py tools/python/xen/sv/__init__.py tools/python/xen/sv/util.py tools/python/xen/util/Brctl.py tools/python/xen/util/process.py tools/python/xen/web/SrvBase.py tools/python/xen/web/SrvDir.py tools/python/xen/web/__init__.py tools/python/xen/web/connection.py tools/python/xen/web/httpserver.py tools/python/xen/web/protocol.py tools/python/xen/web/reactor.py tools/python/xen/web/resource.py tools/python/xen/web/static.py tools/python/xen/web/tcp.py tools/python/xen/web/unix.py tools/python/xen/xend/Args.py tools/python/xen/xend/EventServer.py tools/python/xen/xend/PrettyPrint.py tools/python/xen/xend/Vifctl.py tools/python/xen/xend/XendBootloader.py tools/python/xen/xend/XendCheckpoint.py tools/python/xen/xend/XendClient.py tools/python/xen/xend/XendDB.py tools/python/xen/xend/XendDmesg.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xend/XendError.py tools/python/xen/xend/XendLogging.py tools/python/xen/xend/XendNode.py tools/python/xen/xend/XendProtocol.py tools/python/xen/xend/XendRoot.py tools/python/xen/xend/XendVnet.py tools/python/xen/xend/encode.py tools/python/xen/xend/image.py tools/python/xen/xend/scheduler.py tools/python/xen/xend/server/DevController.py tools/python/xen/xend/server/SrvDaemon.py tools/python/xen/xend/server/SrvDmesg.py tools/python/xen/xend/server/SrvDomain.py tools/python/xen/xend/server/SrvDomainDir.py tools/python/xen/xend/server/SrvNode.py tools/python/xen/xend/server/SrvRoot.py tools/python/xen/xend/server/SrvServer.py tools/python/xen/xend/server/SrvVnetDir.py tools/python/xen/xend/server/SrvXendLog.py tools/python/xen/xend/server/blkif.py tools/python/xen/xend/server/channel.py tools/python/xen/xend/server/event.py tools/python/xen/xend/server/netif.py tools/python/xen/xend/server/params.py tools/python/xen/xend/server/pciif.py tools/python/xen/xend/server/relocate.py tools/python/xen/xend/server/tpmif.py tools/python/xen/xend/server/usbif.py tools/python/xen/xend/sxp.py tools/python/xen/xend/uuid.py tools/python/xen/xend/xenstore/__init__.py tools/python/xen/xend/xenstore/xsnode.py tools/python/xen/xend/xenstore/xsobj.py tools/python/xen/xend/xenstore/xsresource.py tools/python/xen/xend/xenstore/xstransact.py tools/python/xen/xend/xenstore/xsutil.py tools/python/xen/xend/xenstore/xswatch.py tools/python/xen/xm/create.py tools/python/xen/xm/destroy.py tools/python/xen/xm/help.py tools/python/xen/xm/main.py tools/python/xen/xm/migrate.py tools/python/xen/xm/opts.py tools/python/xen/xm/shutdown.py tools/python/xen/xm/sysrq.py tools/security/Makefile tools/security/example.txt tools/security/getlabel.sh tools/security/install.txt tools/security/labelfuncs.sh tools/security/policies/chwall/chwall-security_label_template.xml tools/security/policies/chwall/chwall-security_policy.xml tools/security/policies/chwall_ste/chwall_ste-security_label_template.xml tools/security/policies/chwall_ste/chwall_ste-security_policy.xml tools/security/policies/null/null-security_label_template.xml tools/security/policies/null/null-security_policy.xml tools/security/policies/security_policy.xsd tools/security/policies/ste/ste-security_label_template.xml tools/security/policies/ste/ste-security_policy.xml tools/security/policy.txt tools/security/readme.txt tools/security/secpol_compat.h tools/security/secpol_tool.c tools/security/secpol_xml2bin.c tools/security/secpol_xml2bin.h tools/security/setlabel.sh tools/security/updategrub.sh tools/sv/Makefile tools/sv/images/destroy.png tools/sv/images/finish.png tools/sv/images/next.png tools/sv/images/pause.png tools/sv/images/previous.png tools/sv/images/reboot.png tools/sv/images/shutdown.png tools/sv/images/small-destroy.png tools/sv/images/small-pause.png tools/sv/images/small-unpause.png tools/sv/images/unpause.png tools/sv/images/xen.png tools/sv/inc/script.js tools/sv/inc/style.css tools/sv/index.psp tools/vnet/00INSTALL tools/vnet/Make.env tools/vnet/Makefile tools/vnet/doc/vnet-module.txt tools/vnet/doc/vnet-xend.txt tools/vnet/examples/Makefile tools/vnet/examples/network-vnet tools/vnet/examples/vnet-insert tools/vnet/examples/vnet97.sxp tools/vnet/examples/vnet98.sxp tools/vnet/examples/vnet99.sxp tools/vnet/libxutil/Makefile tools/vnet/libxutil/debug.h tools/vnet/libxutil/mem_stream.c tools/vnet/libxutil/mem_stream.h tools/vnet/libxutil/sxpr.c tools/vnet/libxutil/sxpr.h tools/vnet/libxutil/sxpr_parser.c tools/vnet/libxutil/sxpr_parser.h tools/vnet/libxutil/sys_string.c tools/vnet/libxutil/sys_string.h tools/vnet/vnet-module/00README tools/vnet/vnet-module/Makefile tools/vnet/vnet-module/Makefile-2.4 tools/vnet/vnet-module/Makefile-2.6 tools/vnet/vnet-module/Makefile.ver tools/vnet/vnet-module/Makefile.vnet tools/vnet/vnet-module/etherip.c tools/vnet/vnet-module/if_etherip.h tools/vnet/vnet-module/if_varp.h tools/vnet/vnet-module/skb_util.h tools/vnet/vnet-module/tunnel.c tools/vnet/vnet-module/tunnel.h tools/vnet/vnet-module/varp.c tools/vnet/vnet-module/varp.h tools/vnet/vnet-module/varp_socket.c tools/vnet/vnet-module/varp_util.c tools/vnet/vnet-module/varp_util.h tools/vnet/vnet-module/vif.c tools/vnet/vnet-module/vif.h tools/vnet/vnet-module/vnet.c tools/vnet/vnet-module/vnet.h tools/vnet/vnet-module/vnet_dev.c tools/vnet/vnet-module/vnet_dev.h tools/vnet/vnet-module/vnet_ioctl.c tools/vnet/vnetd/Makefile tools/vnet/vnetd/vcache.c tools/vnet/vnetd/vcache.h tools/vnet/vnetd/vnetd.c tools/vnet/vnetd/vnetd.h tools/vtpm/Makefile tools/vtpm/README tools/vtpm/Rules.mk tools/vtpm/tpm_emulator-0.2b-x86_64.patch tools/vtpm/tpm_emulator.patch tools/vtpm/vtpm.patch tools/vtpm_manager/COPYING tools/vtpm_manager/Makefile tools/vtpm_manager/README tools/vtpm_manager/Rules.mk tools/vtpm_manager/crypto/Makefile tools/vtpm_manager/crypto/crypto.c tools/vtpm_manager/crypto/crypto.h tools/vtpm_manager/crypto/hash.c tools/vtpm_manager/crypto/rsa.c tools/vtpm_manager/crypto/sym_crypto.c tools/vtpm_manager/crypto/sym_crypto.h tools/vtpm_manager/manager/Makefile tools/vtpm_manager/manager/dmictl.c tools/vtpm_manager/manager/securestorage.c tools/vtpm_manager/manager/tpmpassthrough.c tools/vtpm_manager/manager/vtpm_manager.c tools/vtpm_manager/manager/vtpm_manager.h tools/vtpm_manager/manager/vtpmd.c tools/vtpm_manager/manager/vtpmpriv.h tools/vtpm_manager/manager/vtsp.c tools/vtpm_manager/manager/vtsp.h tools/vtpm_manager/tcs/Makefile tools/vtpm_manager/tcs/contextmgr.c tools/vtpm_manager/tcs/contextmgr.h tools/vtpm_manager/tcs/tcs.c tools/vtpm_manager/tcs/tcs.h tools/vtpm_manager/tcs/tpmddl.h tools/vtpm_manager/tcs/transmit.c tools/vtpm_manager/util/Makefile tools/vtpm_manager/util/bsg.c tools/vtpm_manager/util/bsg.h tools/vtpm_manager/util/buffer.c tools/vtpm_manager/util/buffer.h tools/vtpm_manager/util/depend tools/vtpm_manager/util/hashtable.c tools/vtpm_manager/util/hashtable.h tools/vtpm_manager/util/hashtable_itr.c tools/vtpm_manager/util/hashtable_itr.h tools/vtpm_manager/util/hashtable_private.h tools/vtpm_manager/util/log.c tools/vtpm_manager/util/log.h tools/vtpm_manager/util/tcg.h tools/xcutils/Makefile tools/xcutils/xc_restore.c tools/xcutils/xc_save.c tools/xenstat/Makefile tools/xenstat/libxenstat/COPYING tools/xenstat/libxenstat/Makefile tools/xenstat/libxenstat/bindings/swig/perl/.empty tools/xenstat/libxenstat/bindings/swig/python/.empty tools/xenstat/libxenstat/bindings/swig/xenstat.i tools/xenstat/libxenstat/src/xen-interface.c tools/xenstat/libxenstat/src/xen-interface.h tools/xenstat/libxenstat/src/xenstat.c tools/xenstat/libxenstat/src/xenstat.h tools/xenstat/xentop/Makefile tools/xenstat/xentop/TODO tools/xenstat/xentop/xentop.1 tools/xenstat/xentop/xentop.c tools/xenstore/COPYING tools/xenstore/Makefile tools/xenstore/TODO tools/xenstore/fake_libxc.c tools/xenstore/testsuite/01simple.test tools/xenstore/testsuite/02directory.test tools/xenstore/testsuite/03write.test tools/xenstore/testsuite/04rm.test tools/xenstore/testsuite/05filepermissions.test tools/xenstore/testsuite/06dirpermissions.test tools/xenstore/testsuite/07watch.test tools/xenstore/testsuite/08transaction.slowtest tools/xenstore/testsuite/08transaction.test tools/xenstore/testsuite/09domain.test tools/xenstore/testsuite/10domain-homedir.test tools/xenstore/testsuite/11domain-watch.test tools/xenstore/testsuite/12readonly.test tools/xenstore/testsuite/13watch-ack.test tools/xenstore/testsuite/14complexperms.test tools/xenstore/testsuite/15nowait.test tools/xenstore/testsuite/16block-watch-crash.test tools/xenstore/testsuite/test.sh tools/xenstore/testsuite/vg-suppressions tools/xenstore/utils.c tools/xenstore/utils.h tools/xenstore/xenstore_client.c tools/xenstore/xenstored.h tools/xenstore/xenstored_core.c tools/xenstore/xenstored_core.h tools/xenstore/xenstored_domain.c tools/xenstore/xenstored_domain.h tools/xenstore/xenstored_transaction.c tools/xenstore/xenstored_transaction.h tools/xenstore/xenstored_watch.c tools/xenstore/xenstored_watch.h tools/xenstore/xs.c tools/xenstore/xs.h tools/xenstore/xs_crashme.c tools/xenstore/xs_dom0_test.c tools/xenstore/xs_lib.c tools/xenstore/xs_lib.h tools/xenstore/xs_random.c tools/xenstore/xs_stress.c tools/xenstore/xs_test.c tools/xentrace/Makefile tools/xentrace/formats tools/xentrace/xenctx.c tools/xentrace/xentrace.c xen/Makefile xen/Rules.mk xen/acm/acm_chinesewall_hooks.c xen/acm/acm_core.c xen/acm/acm_null_hooks.c xen/acm/acm_policy.c xen/acm/acm_simple_type_enforcement_hooks.c xen/arch/ia64/Makefile xen/arch/ia64/Rules.mk xen/arch/ia64/asm-offsets.c xen/arch/ia64/asm-xsi-offsets.c xen/arch/ia64/linux-xen/efi.c xen/arch/ia64/linux-xen/entry.S xen/arch/ia64/linux-xen/entry.h xen/arch/ia64/linux-xen/head.S xen/arch/ia64/linux-xen/irq_ia64.c xen/arch/ia64/linux-xen/minstate.h xen/arch/ia64/linux-xen/mm_contig.c xen/arch/ia64/linux-xen/setup.c xen/arch/ia64/linux-xen/unaligned.c xen/arch/ia64/linux/extable.c xen/arch/ia64/linux/ia64_ksyms.c xen/arch/ia64/linux/irq_lsapic.c xen/arch/ia64/linux/pcdp.h xen/arch/ia64/tools/README.xenia64 xen/arch/ia64/tools/README.xenia64linux xen/arch/ia64/vmx/mm.c xen/arch/ia64/vmx/mmio.c xen/arch/ia64/vmx/pal_emul.c xen/arch/ia64/vmx/vmmu.c xen/arch/ia64/vmx/vmx_entry.S xen/arch/ia64/vmx/vmx_hypercall.c xen/arch/ia64/vmx/vmx_init.c xen/arch/ia64/vmx/vmx_interrupt.c xen/arch/ia64/vmx/vmx_irq_ia64.c xen/arch/ia64/vmx/vmx_ivt.S xen/arch/ia64/vmx/vmx_minstate.h xen/arch/ia64/vmx/vmx_process.c xen/arch/ia64/vmx/vmx_vcpu.c xen/arch/ia64/vmx/vmx_virt.c xen/arch/ia64/vmx/vtlb.c xen/arch/ia64/xen/dom_fw.c xen/arch/ia64/xen/domain.c xen/arch/ia64/xen/grant_table.c xen/arch/ia64/xen/hypercall.c xen/arch/ia64/xen/hyperprivop.S xen/arch/ia64/xen/ivt.S xen/arch/ia64/xen/privop.c xen/arch/ia64/xen/process.c xen/arch/ia64/xen/regionreg.c xen/arch/ia64/xen/vcpu.c xen/arch/ia64/xen/vhpt.c xen/arch/ia64/xen/xenmem.c xen/arch/ia64/xen/xenmisc.c xen/arch/x86/Makefile xen/arch/x86/Rules.mk xen/arch/x86/acpi/boot.c xen/arch/x86/apic.c xen/arch/x86/audit.c xen/arch/x86/boot/x86_32.S xen/arch/x86/boot/x86_64.S xen/arch/x86/cdb.c xen/arch/x86/cpu/amd.c xen/arch/x86/cpu/common.c xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/genapic/es7000plat.c xen/arch/x86/i8259.c xen/arch/x86/io_apic.c xen/arch/x86/mm.c xen/arch/x86/mpparse.c xen/arch/x86/physdev.c xen/arch/x86/setup.c xen/arch/x86/shadow.c xen/arch/x86/shadow32.c xen/arch/x86/shadow_guest32.c xen/arch/x86/shadow_public.c xen/arch/x86/smpboot.c xen/arch/x86/time.c xen/arch/x86/traps.c xen/arch/x86/vmx.c xen/arch/x86/vmx_intercept.c xen/arch/x86/vmx_io.c xen/arch/x86/vmx_platform.c xen/arch/x86/vmx_vmcs.c xen/arch/x86/x86_32/asm-offsets.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/asm-offsets.c xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_64/traps.c xen/common/Makefile xen/common/ac_timer.c xen/common/acm_ops.c xen/common/dom0_ops.c xen/common/domain.c xen/common/event_channel.c xen/common/grant_table.c xen/common/kernel.c xen/common/lib.c xen/common/memory.c xen/common/multicall.c xen/common/page_alloc.c xen/common/perfc.c xen/common/sched_sedf.c xen/common/schedule.c xen/common/symbols.c xen/common/trace.c xen/common/xmalloc.c xen/drivers/char/Makefile xen/drivers/char/console.c xen/drivers/char/ns16550.c xen/drivers/char/serial.c xen/include/acm/acm_core.h xen/include/acm/acm_hooks.h xen/include/asm-ia64/config.h xen/include/asm-ia64/domain.h xen/include/asm-ia64/event.h xen/include/asm-ia64/ia64_int.h xen/include/asm-ia64/linux-xen/asm/gcc_intrin.h xen/include/asm-ia64/linux-xen/asm/ia64regs.h xen/include/asm-ia64/linux-xen/asm/io.h xen/include/asm-ia64/linux-xen/asm/kregs.h xen/include/asm-ia64/linux-xen/asm/mca_asm.h xen/include/asm-ia64/linux-xen/asm/page.h xen/include/asm-ia64/linux-xen/asm/pal.h xen/include/asm-ia64/linux-xen/asm/pgalloc.h xen/include/asm-ia64/linux-xen/asm/processor.h xen/include/asm-ia64/linux-xen/asm/system.h xen/include/asm-ia64/linux-xen/asm/types.h xen/include/asm-ia64/linux-xen/asm/uaccess.h xen/include/asm-ia64/linux-xen/linux/cpumask.h xen/include/asm-ia64/linux-xen/linux/hardirq.h xen/include/asm-ia64/linux-xen/linux/interrupt.h xen/include/asm-ia64/linux/asm-generic/bug.h xen/include/asm-ia64/linux/asm-generic/errno.h xen/include/asm-ia64/linux/asm-generic/iomap.h xen/include/asm-ia64/linux/asm-generic/pci.h xen/include/asm-ia64/linux/asm-generic/pgtable-nopud.h xen/include/asm-ia64/linux/asm-generic/pgtable.h xen/include/asm-ia64/linux/asm-generic/sections.h xen/include/asm-ia64/linux/asm-generic/topology.h xen/include/asm-ia64/linux/asm-generic/vmlinux.lds.h xen/include/asm-ia64/linux/asm/acpi.h xen/include/asm-ia64/linux/asm/bitops.h xen/include/asm-ia64/linux/asm/break.h xen/include/asm-ia64/linux/asm/bug.h xen/include/asm-ia64/linux/asm/cacheflush.h xen/include/asm-ia64/linux/asm/param.h xen/include/asm-ia64/linux/asm/pci.h xen/include/asm-ia64/linux/asm/percpu.h xen/include/asm-ia64/linux/asm/sal.h xen/include/asm-ia64/linux/asm/sections.h xen/include/asm-ia64/linux/asm/signal.h xen/include/asm-ia64/linux/asm/smp.h xen/include/asm-ia64/linux/asm/thread_info.h xen/include/asm-ia64/linux/asm/topology.h xen/include/asm-ia64/linux/asm/unaligned.h xen/include/asm-ia64/linux/asm/unistd.h xen/include/asm-ia64/linux/bitmap.h xen/include/asm-ia64/linux/bitops.h xen/include/asm-ia64/linux/dma-mapping.h xen/include/asm-ia64/linux/efi.h xen/include/asm-ia64/linux/err.h xen/include/asm-ia64/linux/gfp.h xen/include/asm-ia64/linux/mmzone.h xen/include/asm-ia64/linux/numa.h xen/include/asm-ia64/linux/page-flags.h xen/include/asm-ia64/linux/slab.h xen/include/asm-ia64/linux/threads.h xen/include/asm-ia64/linux/timex.h xen/include/asm-ia64/linux/topology.h xen/include/asm-ia64/linux/wait.h xen/include/asm-ia64/mm.h xen/include/asm-ia64/mmu_context.h xen/include/asm-ia64/privop.h xen/include/asm-ia64/regionreg.h xen/include/asm-ia64/regs.h xen/include/asm-ia64/time.h xen/include/asm-ia64/tlb.h xen/include/asm-ia64/vcpu.h xen/include/asm-ia64/vhpt.h xen/include/asm-ia64/vmmu.h xen/include/asm-ia64/vmx.h xen/include/asm-ia64/vmx_uaccess.h xen/include/asm-ia64/vmx_vcpu.h xen/include/asm-ia64/vmx_vpd.h xen/include/asm-ia64/xenkregs.h xen/include/asm-ia64/xenprocessor.h xen/include/asm-ia64/xensystem.h xen/include/asm-x86/apicdef.h xen/include/asm-x86/asm_defns.h xen/include/asm-x86/bitops.h xen/include/asm-x86/config.h xen/include/asm-x86/e820.h xen/include/asm-x86/event.h xen/include/asm-x86/fixmap.h xen/include/asm-x86/genapic.h xen/include/asm-x86/hpet.h xen/include/asm-x86/io.h xen/include/asm-x86/mach-bigsmp/mach_apic.h xen/include/asm-x86/mach-default/mach_apic.h xen/include/asm-x86/mach-es7000/mach_apic.h xen/include/asm-x86/mach-generic/mach_apic.h xen/include/asm-x86/mach-summit/mach_apic.h xen/include/asm-x86/mach-summit/mach_mpparse.h xen/include/asm-x86/mm.h xen/include/asm-x86/page-guest32.h xen/include/asm-x86/page.h xen/include/asm-x86/processor.h xen/include/asm-x86/shadow.h xen/include/asm-x86/shadow_64.h xen/include/asm-x86/shadow_ops.h xen/include/asm-x86/shadow_public.h xen/include/asm-x86/time.h xen/include/asm-x86/types.h xen/include/asm-x86/uaccess.h xen/include/asm-x86/vmx.h xen/include/asm-x86/vmx_platform.h xen/include/asm-x86/vmx_virpit.h xen/include/asm-x86/vmx_vmcs.h xen/include/asm-x86/x86_32/asm_defns.h xen/include/asm-x86/x86_32/page-3level.h xen/include/asm-x86/x86_32/uaccess.h xen/include/asm-x86/x86_64/asm_defns.h xen/include/asm-x86/x86_64/page.h xen/include/public/acm.h xen/include/public/acm_ops.h xen/include/public/arch-ia64.h xen/include/public/arch-x86_32.h xen/include/public/arch-x86_64.h xen/include/public/dom0_ops.h xen/include/public/grant_table.h xen/include/public/io/blkif.h xen/include/public/io/ioreq.h xen/include/public/io/netif.h xen/include/public/io/ring.h xen/include/public/io/tpmif.h xen/include/public/memory.h xen/include/public/physdev.h xen/include/public/trace.h xen/include/public/version.h xen/include/public/xen.h xen/include/xen/ac_timer.h xen/include/xen/config.h xen/include/xen/domain.h xen/include/xen/event.h xen/include/xen/grant_table.h xen/include/xen/mm.h xen/include/xen/perfc.h xen/include/xen/perfc_defn.h xen/include/xen/sched.h xen/include/xen/serial.h xen/include/xen/symbols.h xen/include/xen/time.h xen/include/xen/trace.h xen/tools/Makefile xen/tools/symbols.c
line diff
     1.1 --- a/.hgignore	Thu Sep 22 11:34:14 2005 -0600
     1.2 +++ b/.hgignore	Thu Sep 22 11:42:01 2005 -0600
     1.3 @@ -86,6 +86,9 @@
     1.4  ^tools/check/\..*$
     1.5  ^tools/console/xenconsoled$
     1.6  ^tools/console/xenconsole$
     1.7 +^tools/debugger/gdb/gdb-6\.2\.1\.tar\.bz2$
     1.8 +^tools/debugger/gdb/gdb-6\.2\.1/.*$
     1.9 +^tools/debugger/gdb/gdb-6\.2\.1-linux-i386-xen/.*$
    1.10  ^tools/debugger/pdb/pdb$
    1.11  ^tools/debugger/pdb/linux-[0-9.]*-module/.*\.ko$
    1.12  ^tools/debugger/pdb/linux-[0-9.]*-module/.*\.mod.c$
    1.13 @@ -136,9 +139,10 @@
    1.14  ^tools/vnet/vnet-module/\..*\.cmd$
    1.15  ^tools/vnet/vnet-module/\.tmp_versions/.*$
    1.16  ^tools/vnet/vnet-module/vnet_module\.mod\..*$
    1.17 -^tools/vtpm/vtpm*
    1.18 -^tools/vtpm/tpm_emulator-*
    1.19 -^tools/vtpm_manager/manager/vtpm_managerd
    1.20 +^tools/vtpm/tpm_emulator/.*$
    1.21 +^tools/vtpm/tpm_emulator-.*\.tar\.gz$
    1.22 +^tools/vtpm/vtpm/.*$
    1.23 +^tools/vtpm_manager/manager/vtpm_managerd$
    1.24  ^tools/xcutils/xc_restore$
    1.25  ^tools/xcutils/xc_save$
    1.26  ^tools/xenstat/xentop/xentop$
    1.27 @@ -156,6 +160,7 @@
    1.28  ^tools/xenstore/xs_stress$
    1.29  ^tools/xenstore/xs_test$
    1.30  ^tools/xenstore/xs_watch_stress$
    1.31 +^tools/xentrace/xenctx$
    1.32  ^tools/xentrace/xentrace$
    1.33  ^xen/BLOG$
    1.34  ^xen/TAGS$
     3.1 --- a/Makefile	Thu Sep 22 11:34:14 2005 -0600
     3.2 +++ b/Makefile	Thu Sep 22 11:42:01 2005 -0600
     3.3 @@ -98,12 +98,15 @@ clean::
     3.4  	$(MAKE) -C tools clean
     3.5  	$(MAKE) -C docs clean
     3.6  
     3.7 -# clean, but blow away kernel build tree plus tar balls
     3.8 -mrproper: clean
     3.9 +# clean, but blow away kernel build tree plus tarballs
    3.10 +distclean: clean
    3.11  	rm -rf dist patches/tmp
    3.12  	for i in $(ALLKERNELS) ; do $(MAKE) $$i-delete ; done
    3.13  	for i in $(ALLSPARSETREES) ; do $(MAKE) $$i-mrproper ; done
    3.14  
    3.15 +# Linux name for GNU distclean
    3.16 +mrproper: distclean
    3.17 +
    3.18  install-logging: LOGGING=logging-0.4.9.2
    3.19  install-logging:
    3.20  	[ -f $(LOGGING).tar.gz ] || wget http://www.red-dove.com/$(LOGGING).tar.gz
    3.21 @@ -142,7 +145,7 @@ help:
    3.22  	@echo 'Cleaning targets:'
    3.23  	@echo '  clean            - clean the Xen, tools and docs (but not'
    3.24  	@echo '                     guest kernel) trees'
    3.25 -	@echo '  mrproper         - clean plus delete kernel tarballs and kernel'
    3.26 +	@echo '  distclean        - clean plus delete kernel tarballs and kernel'
    3.27  	@echo '                     build trees'
    3.28  	@echo '  kdelete          - delete guest kernel build trees'
    3.29  	@echo '  kclean           - clean guest kernel build trees'
    3.30 @@ -163,27 +166,25 @@ uninstall: D=$(DESTDIR)
    3.31  uninstall:
    3.32  	[ -d $(D)/etc/xen ] && mv -f $(D)/etc/xen $(D)/etc/xen.old-`date +%s`
    3.33  	rm -rf $(D)/etc/init.d/xend*
    3.34 -	rm -rf $(D)/usr/$(LIBDIR)/libxc* $(D)/usr/$(LIBDIR)/libxutil*
    3.35 -	rm -rf $(D)/usr/$(LIBDIR)/python/xen $(D)/usr/include/xen
    3.36 -	rm -rf $(D)/usr/$(LIBDIR)/share/xen $(D)/usr/$(LIBDIR)/libxenstore*
    3.37 +	rm -rf $(D)/etc/hotplug/xen-backend.agent
    3.38  	rm -rf $(D)/var/run/xen* $(D)/var/lib/xen*
    3.39 -	rm -rf $(D)/usr/include/xcs_proto.h $(D)/usr/include/xc.h
    3.40 -	rm -rf $(D)/usr/include/xs_lib.h $(D)/usr/include/xs.h
    3.41 -	rm -rf $(D)/usr/sbin/xcs $(D)/usr/sbin/xcsdump $(D)/usr/sbin/xen*
    3.42 -	rm -rf $(D)/usr/sbin/netfix
    3.43 -	rm -rf $(D)/usr/sbin/xfrd $(D)/usr/sbin/xm
    3.44 -	rm -rf $(D)/usr/share/doc/xen  $(D)/usr/man/man*/xentrace*
    3.45 -	rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/miniterm
    3.46  	rm -rf $(D)/boot/*xen*
    3.47  	rm -rf $(D)/lib/modules/*xen*
    3.48 +	rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/lomount
    3.49  	rm -rf $(D)/usr/bin/cpuperf-perfcntr $(D)/usr/bin/cpuperf-xen
    3.50  	rm -rf $(D)/usr/bin/xc_shadow
    3.51 -	rm -rf $(D)/usr/share/xen $(D)/usr/libexec/xen
    3.52 +	rm -rf $(D)/usr/include/xenctrl.h
    3.53 +	rm -rf $(D)/usr/include/xs_lib.h $(D)/usr/include/xs.h
    3.54 +	rm -rf $(D)/usr/include/xen
    3.55 +	rm -rf $(D)/usr/$(LIBDIR)/libxenctrl* $(D)/usr/$(LIBDIR)/libxenguest*
    3.56 +	rm -rf $(D)/usr/$(LIBDIR)/libxenstore*
    3.57 +	rm -rf $(D)/usr/$(LIBDIR)/python/xen $(D)/usr/$(LIBDIR)/xen 
    3.58 +	rm -rf $(D)/usr/libexec/xen
    3.59 +	rm -rf $(D)/usr/sbin/xen* $(D)/usr/sbin/netfix $(D)/usr/sbin/xm
    3.60 +	rm -rf $(D)/usr/share/doc/xen
    3.61 +	rm -rf $(D)/usr/share/xen
    3.62  	rm -rf $(D)/usr/share/man/man1/xen*
    3.63  	rm -rf $(D)/usr/share/man/man8/xen*
    3.64 -	rm -rf $(D)/usr/lib/xen
    3.65 -	rm -rf $(D)/etc/hotplug.d/xen-backend
    3.66 -	rm -rf $(D)/etc/hotplug/xen-backend.agent
    3.67  
    3.68  # Legacy targets for compatibility
    3.69  linux24:
    11.1 --- a/docs/Makefile	Thu Sep 22 11:34:14 2005 -0600
    11.2 +++ b/docs/Makefile	Thu Sep 22 11:42:01 2005 -0600
    11.3 @@ -12,7 +12,7 @@ DOXYGEN		:= doxygen
    11.4  
    11.5  pkgdocdir	:= /usr/share/doc/xen
    11.6  
    11.7 -DOC_TEX		:= $(wildcard src/*.tex)
    11.8 +DOC_TEX		:= src/user.tex src/interface.tex
    11.9  DOC_PS		:= $(patsubst src/%.tex,ps/%.ps,$(DOC_TEX))
   11.10  DOC_PDF		:= $(patsubst src/%.tex,pdf/%.pdf,$(DOC_TEX))
   11.11  DOC_HTML	:= $(patsubst src/%.tex,html/%/index.html,$(DOC_TEX))
   11.12 @@ -36,11 +36,12 @@ html:
   11.13  	$(MAKE) $(DOC_HTML); fi
   11.14  
   11.15  python-dev-docs:
   11.16 -	mkdir -p api/tools/python
   11.17 +	@mkdir -v -p api/tools/python
   11.18  	@if which $(DOXYGEN) 1>/dev/null 2>/dev/null; then         \
   11.19          echo "Running doxygen to generate Python tools APIs ... "; \
   11.20  	$(DOXYGEN) Doxyfile;                                       \
   11.21 -	$(MAKE) -C api/tools/python/latex ; fi
   11.22 +	$(MAKE) -C api/tools/python/latex ; else                   \
   11.23 +        echo "Doxygen not installed; skipping python-dev-docs."; fi
   11.24  
   11.25  clean:
   11.26  	rm -rf .word_count *.aux *.dvi *.bbl *.blg *.glo *.idx *~ 
    15.1 --- a/docs/src/interface.tex	Thu Sep 22 11:34:14 2005 -0600
    15.2 +++ b/docs/src/interface.tex	Thu Sep 22 11:42:01 2005 -0600
    15.3 @@ -87,1084 +87,23 @@ itself, allows the Xen framework to sepa
    15.4  mechanism and policy within the system.
    15.5  
    15.6  
    15.7 -
    15.8 -\chapter{Virtual Architecture}
    15.9 -
   15.10 -On a Xen-based system, the hypervisor itself runs in {\it ring 0}.  It
   15.11 -has full access to the physical memory available in the system and is
   15.12 -responsible for allocating portions of it to the domains.  Guest
   15.13 -operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
   15.14 -they see fit. Segmentation is used to prevent the guest OS from
   15.15 -accessing the portion of the address space that is reserved for
   15.16 -Xen. We expect most guest operating systems will use ring 1 for their
   15.17 -own operation and place applications in ring 3.
   15.18 -
   15.19 -In this chapter we consider the basic virtual architecture provided 
   15.20 -by Xen: the basic CPU state, exception and interrupt handling, and
   15.21 -time. Other aspects such as memory and device access are discussed 
   15.22 -in later chapters. 
   15.23 -
   15.24 -\section{CPU state}
   15.25 -
   15.26 -All privileged state must be handled by Xen.  The guest OS has no
   15.27 -direct access to CR3 and is not permitted to update privileged bits in
   15.28 -EFLAGS. Guest OSes use \emph{hypercalls} to invoke operations in Xen; 
   15.29 -these are analogous to system calls but occur from ring 1 to ring 0. 
   15.30 -
   15.31 -A list of all hypercalls is given in Appendix~\ref{a:hypercalls}. 
   15.32 -
   15.33 -
   15.34 -
   15.35 -\section{Exceptions}
   15.36 -
   15.37 -A virtual IDT is provided --- a domain can submit a table of trap
   15.38 -handlers to Xen via the {\tt set\_trap\_table()} hypercall.  Most trap
   15.39 -handlers are identical to native x86 handlers, although the page-fault
   15.40 -handler is somewhat different.
   15.41 -
   15.42 -
   15.43 -\section{Interrupts and events}
   15.44 -
   15.45 -Interrupts are virtualized by mapping them to \emph{events}, which are
   15.46 -delivered asynchronously to the target domain using a callback
   15.47 -supplied via the {\tt set\_callbacks()} hypercall.  A guest OS can map
   15.48 -these events onto its standard interrupt dispatch mechanisms.  Xen is
   15.49 -responsible for determining the target domain that will handle each
   15.50 -physical interrupt source. For more details on the binding of event
   15.51 -sources to events, see Chapter~\ref{c:devices}. 
   15.52 -
   15.53 -
   15.54 -
   15.55 -\section{Time}
   15.56 -
   15.57 -Guest operating systems need to be aware of the passage of both real
   15.58 -(or wallclock) time and their own `virtual time' (the time for
   15.59 -which they have been executing). Furthermore, Xen has a notion of 
   15.60 -time which is used for scheduling. The following notions of 
   15.61 -time are provided: 
   15.62 -
   15.63 -\begin{description}
   15.64 -\item[Cycle counter time.]
   15.65 -
   15.66 -This provides a fine-grained time reference.  The cycle counter time is
   15.67 -used to accurately extrapolate the other time references.  On SMP machines
   15.68 -it is currently assumed that the cycle counter time is synchronized between
   15.69 -CPUs.  The current x86-based implementation achieves this within inter-CPU
   15.70 -communication latencies.
   15.71 -
   15.72 -\item[System time.]
   15.73 -
   15.74 -This is a 64-bit counter which holds the number of nanoseconds that
   15.75 -have elapsed since system boot.
   15.76 -
   15.77 -
   15.78 -\item[Wall clock time.]
   15.79 -
   15.80 -This is the time of day in a Unix-style {\tt struct timeval} (seconds
   15.81 -and microseconds since 1 January 1970, adjusted by leap seconds).  An
   15.82 -NTP client hosted by {\it domain 0} can keep this value accurate.  
   15.83 -
   15.84 -
   15.85 -\item[Domain virtual time.]
   15.86 -
   15.87 -This progresses at the same pace as system time, but only while a
   15.88 -domain is executing --- it stops while a domain is de-scheduled.
   15.89 -Therefore the share of the CPU that a domain receives is indicated by
   15.90 -the rate at which its virtual time increases.
   15.91 -
   15.92 -\end{description}
   15.93 -
   15.94 -
   15.95 -Xen exports timestamps for system time and wall-clock time to guest
   15.96 -operating systems through a shared page of memory.  Xen also provides
   15.97 -the cycle counter time at the instant the timestamps were calculated,
   15.98 -and the CPU frequency in Hertz.  This allows the guest to extrapolate
   15.99 -system and wall-clock times accurately based on the current cycle
  15.100 -counter time.
  15.101 -
  15.102 -Since all time stamps need to be updated and read \emph{atomically}
  15.103 -two version numbers are also stored in the shared info page. The 
  15.104 -first is incremented prior to an update, while the second is only
  15.105 -incremented afterwards. Thus a guest can be sure that it read a consistent 
  15.106 -state by checking the two version numbers are equal. 
  15.107 -
  15.108 -Xen includes a periodic ticker which sends a timer event to the
  15.109 -currently executing domain every 10ms.  The Xen scheduler also sends a
  15.110 -timer event whenever a domain is scheduled; this allows the guest OS
  15.111 -to adjust for the time that has passed while it has been inactive.  In
  15.112 -addition, Xen allows each domain to request that they receive a timer
  15.113 -event sent at a specified system time by using the {\tt
  15.114 -set\_timer\_op()} hypercall.  Guest OSes may use this timer to
  15.115 -implement timeout values when they block.
  15.116 -
  15.117 -
  15.118 -
  15.119 -%% % akw: demoting this to a section -- not sure if there is any point
  15.120 -%% % though, maybe just remove it.
  15.121 -
  15.122 -\section{Xen CPU Scheduling}
  15.123 -
  15.124 -Xen offers a uniform API for CPU schedulers.  It is possible to choose
  15.125 -from a number of schedulers at boot and it should be easy to add more.
  15.126 -The BVT, Atropos and Round Robin schedulers are part of the normal
  15.127 -Xen distribution.  BVT provides proportional fair shares of the CPU to
  15.128 -the running domains.  Atropos can be used to reserve absolute shares
  15.129 -of the CPU for each domain.  Round-robin is provided as an example of
  15.130 -Xen's internal scheduler API.
  15.131 -
  15.132 -\paragraph*{Note: SMP host support}
  15.133 -Xen has always supported SMP host systems.  Domains are statically assigned to
  15.134 -CPUs, either at creation time or when manually pinning to a particular CPU.
  15.135 -The current schedulers then run locally on each CPU to decide which of the
  15.136 -assigned domains should be run there. The user-level control software 
  15.137 -can be used to perform coarse-grain load-balancing between CPUs. 
  15.138 +%% chapter Virtual Architecture moved to architecture.tex
  15.139 +\include{src/interface/architecture}
  15.140  
  15.141 -
  15.142 -%% More information on the characteristics and use of these schedulers is
  15.143 -%% available in {\tt Sched-HOWTO.txt}.
  15.144 -
  15.145 -
  15.146 -\section{Privileged operations}
  15.147 -
  15.148 -Xen exports an extended interface to privileged domains (viz.\ {\it
  15.149 -  Domain 0}). This allows such domains to build and boot other domains 
  15.150 -on the server, and provides control interfaces for managing 
  15.151 -scheduling, memory, networking, and block devices. 
  15.152 -
  15.153 -
  15.154 -\chapter{Memory}
  15.155 -\label{c:memory} 
  15.156 -
  15.157 -Xen is responsible for managing the allocation of physical memory to
  15.158 -domains, and for ensuring safe use of the paging and segmentation
  15.159 -hardware.
  15.160 -
  15.161 -
  15.162 -\section{Memory Allocation}
  15.163 -
  15.164 -
  15.165 -Xen resides within a small fixed portion of physical memory; it also
  15.166 -reserves the top 64MB of every virtual address space. The remaining
  15.167 -physical memory is available for allocation to domains at a page
  15.168 -granularity.  Xen tracks the ownership and use of each page, which
  15.169 -allows it to enforce secure partitioning between domains.
  15.170 -
  15.171 -Each domain has a maximum and current physical memory allocation. 
  15.172 -A guest OS may run a `balloon driver' to dynamically adjust its 
  15.173 -current memory allocation up to its limit. 
  15.174 -
  15.175 -
  15.176 -%% XXX SMH: I use machine and physical in the next section (which 
  15.177 -%% is kinda required for consistency with code); wonder if this 
  15.178 -%% section should use same terms? 
  15.179 -%%
  15.180 -%% Probably. 
  15.181 -%%
  15.182 -%% Merging this and below section at some point prob makes sense. 
  15.183 -
  15.184 -\section{Pseudo-Physical Memory}
  15.185 -
  15.186 -Since physical memory is allocated and freed on a page granularity,
  15.187 -there is no guarantee that a domain will receive a contiguous stretch
  15.188 -of physical memory. However most operating systems do not have good
  15.189 -support for operating in a fragmented physical address space. To aid
  15.190 -porting such operating systems to run on top of Xen, we make a
  15.191 -distinction between \emph{machine memory} and \emph{pseudo-physical
  15.192 -memory}.
  15.193 -
  15.194 -Put simply, machine memory refers to the entire amount of memory
  15.195 -installed in the machine, including that reserved by Xen, in use by
  15.196 -various domains, or currently unallocated. We consider machine memory
  15.197 -to comprise a set of 4K \emph{machine page frames} numbered
  15.198 -consecutively starting from 0. Machine frame numbers mean the same
  15.199 -within Xen or any domain.
  15.200 -
  15.201 -Pseudo-physical memory, on the other hand, is a per-domain
  15.202 -abstraction. It allows a guest operating system to consider its memory
  15.203 -allocation to consist of a contiguous range of physical page frames
  15.204 -starting at physical frame 0, despite the fact that the underlying
  15.205 -machine page frames may be sparsely allocated and in any order.
  15.206 -
  15.207 -To achieve this, Xen maintains a globally readable {\it
  15.208 -machine-to-physical} table which records the mapping from machine page
  15.209 -frames to pseudo-physical ones. In addition, each domain is supplied
  15.210 -with a {\it physical-to-machine} table which performs the inverse
  15.211 -mapping. Clearly the machine-to-physical table has size proportional
  15.212 -to the amount of RAM installed in the machine, while each
  15.213 -physical-to-machine table has size proportional to the memory
  15.214 -allocation of the given domain.
  15.215 -
  15.216 -Architecture dependent code in guest operating systems can then use
  15.217 -the two tables to provide the abstraction of pseudo-physical
  15.218 -memory. In general, only certain specialized parts of the operating
  15.219 -system (such as page table management) needs to understand the
  15.220 -difference between machine and pseudo-physical addresses.
  15.221 -
  15.222 -\section{Page Table Updates}
  15.223 -
  15.224 -In the default mode of operation, Xen enforces read-only access to
  15.225 -page tables and requires guest operating systems to explicitly request
  15.226 -any modifications.  Xen validates all such requests and only applies
  15.227 -updates that it deems safe.  This is necessary to prevent domains from
  15.228 -adding arbitrary mappings to their page tables.
  15.229 -
  15.230 -To aid validation, Xen associates a type and reference count with each
  15.231 -memory page. A page has one of the following
  15.232 -mutually-exclusive types at any point in time: page directory ({\sf
  15.233 -PD}), page table ({\sf PT}), local descriptor table ({\sf LDT}),
  15.234 -global descriptor table ({\sf GDT}), or writable ({\sf RW}). Note that
  15.235 -a guest OS may always create readable mappings of its own memory 
  15.236 -regardless of its current type. 
  15.237 -%%% XXX: possibly explain more about ref count 'lifecyle' here?
  15.238 -This mechanism is used to
  15.239 -maintain the invariants required for safety; for example, a domain
  15.240 -cannot have a writable mapping to any part of a page table as this
  15.241 -would require the page concerned to simultaneously be of types {\sf
  15.242 -  PT} and {\sf RW}.
  15.243 -
  15.244 -
  15.245 -%\section{Writable Page Tables}
  15.246 -
  15.247 -Xen also provides an alternative mode of operation in which guests be
  15.248 -have the illusion that their page tables are directly writable.  Of
  15.249 -course this is not really the case, since Xen must still validate
  15.250 -modifications to ensure secure partitioning. To this end, Xen traps
  15.251 -any write attempt to a memory page of type {\sf PT} (i.e., that is
  15.252 -currently part of a page table).  If such an access occurs, Xen
  15.253 -temporarily allows write access to that page while at the same time
  15.254 -{\em disconnecting} it from the page table that is currently in
  15.255 -use. This allows the guest to safely make updates to the page because
  15.256 -the newly-updated entries cannot be used by the MMU until Xen
  15.257 -revalidates and reconnects the page.
  15.258 -Reconnection occurs automatically in a number of situations: for
  15.259 -example, when the guest modifies a different page-table page, when the
  15.260 -domain is preempted, or whenever the guest uses Xen's explicit
  15.261 -page-table update interfaces.
  15.262 -
  15.263 -Finally, Xen also supports a form of \emph{shadow page tables} in
  15.264 -which the guest OS uses a independent copy of page tables which are
  15.265 -unknown to the hardware (i.e.\ which are never pointed to by {\tt
  15.266 -cr3}). Instead Xen propagates changes made to the guest's tables to the
  15.267 -real ones, and vice versa. This is useful for logging page writes
  15.268 -(e.g.\ for live migration or checkpoint). A full version of the shadow
  15.269 -page tables also allows guest OS porting with less effort.
  15.270 -
  15.271 -\section{Segment Descriptor Tables}
  15.272 +%% chapter Memory moved to memory.tex
  15.273 +\include{src/interface/memory}
  15.274  
  15.275 -On boot a guest is supplied with a default GDT, which does not reside
  15.276 -within its own memory allocation.  If the guest wishes to use other
  15.277 -than the default `flat' ring-1 and ring-3 segments that this GDT
  15.278 -provides, it must register a custom GDT and/or LDT with Xen,
  15.279 -allocated from its own memory. Note that a number of GDT 
  15.280 -entries are reserved by Xen -- any custom GDT must also include
  15.281 -sufficient space for these entries. 
  15.282 -
  15.283 -For example, the following hypercall is used to specify a new GDT: 
  15.284 -
  15.285 -\begin{quote}
  15.286 -int {\bf set\_gdt}(unsigned long *{\em frame\_list}, int {\em entries})
  15.287 -
  15.288 -{\em frame\_list}: An array of up to 16 machine page frames within
  15.289 -which the GDT resides.  Any frame registered as a GDT frame may only
  15.290 -be mapped read-only within the guest's address space (e.g., no
  15.291 -writable mappings, no use as a page-table page, and so on).
  15.292 -
  15.293 -{\em entries}: The number of descriptor-entry slots in the GDT.  Note
  15.294 -that the table must be large enough to contain Xen's reserved entries;
  15.295 -thus we must have `{\em entries $>$ LAST\_RESERVED\_GDT\_ENTRY}\ '.
  15.296 -Note also that, after registering the GDT, slots {\em FIRST\_} through
  15.297 -{\em LAST\_RESERVED\_GDT\_ENTRY} are no longer usable by the guest and
  15.298 -may be overwritten by Xen.
  15.299 -\end{quote}
  15.300 -
  15.301 -The LDT is updated via the generic MMU update mechanism (i.e., via 
  15.302 -the {\tt mmu\_update()} hypercall. 
  15.303 -
  15.304 -\section{Start of Day} 
  15.305 -
  15.306 -The start-of-day environment for guest operating systems is rather
  15.307 -different to that provided by the underlying hardware. In particular,
  15.308 -the processor is already executing in protected mode with paging
  15.309 -enabled.
  15.310 -
  15.311 -{\it Domain 0} is created and booted by Xen itself. For all subsequent
  15.312 -domains, the analogue of the boot-loader is the {\it domain builder},
  15.313 -user-space software running in {\it domain 0}. The domain builder 
  15.314 -is responsible for building the initial page tables for a domain  
  15.315 -and loading its kernel image at the appropriate virtual address. 
  15.316 -
  15.317 -
  15.318 -
  15.319 -\chapter{Devices}
  15.320 -\label{c:devices}
  15.321 -
  15.322 -Devices such as network and disk are exported to guests using a
  15.323 -split device driver.  The device driver domain, which accesses the
  15.324 -physical device directly also runs a {\em backend} driver, serving
  15.325 -requests to that device from guests.  Each guest will use a simple
  15.326 -{\em frontend} driver, to access the backend.  Communication between these
  15.327 -domains is composed of two parts:  First, data is placed onto a shared
  15.328 -memory page between the domains.  Second, an event channel between the
  15.329 -two domains is used to pass notification that data is outstanding.
  15.330 -This separation of notification from data transfer allows message
  15.331 -batching, and results in very efficient device access.  
  15.332 -
  15.333 -Event channels are used extensively in device virtualization; each
  15.334 -domain has a number of end-points or \emph{ports} each of which
  15.335 -may be bound to one of the following \emph{event sources}:
  15.336 -\begin{itemize} 
  15.337 -  \item a physical interrupt from a real device, 
  15.338 -  \item a virtual interrupt (callback) from Xen, or 
  15.339 -  \item a signal from another domain 
  15.340 -\end{itemize}
  15.341 -
  15.342 -Events are lightweight and do not carry much information beyond 
  15.343 -the source of the notification. Hence when performing bulk data
  15.344 -transfer, events are typically used as synchronization primitives
  15.345 -over a shared memory transport. Event channels are managed via 
  15.346 -the {\tt event\_channel\_op()} hypercall; for more details see
  15.347 -Section~\ref{s:idc}. 
  15.348 -
  15.349 -This chapter focuses on some individual device interfaces
  15.350 -available to Xen guests. 
  15.351 -
  15.352 -\section{Network I/O}
  15.353 -
  15.354 -Virtual network device services are provided by shared memory
  15.355 -communication with a backend domain.  From the point of view of
  15.356 -other domains, the backend may be viewed as a virtual ethernet switch
  15.357 -element with each domain having one or more virtual network interfaces
  15.358 -connected to it.
  15.359 -
  15.360 -\subsection{Backend Packet Handling}
  15.361 -
  15.362 -The backend driver is responsible for a variety of actions relating to
  15.363 -the transmission and reception of packets from the physical device.
  15.364 -With regard to transmission, the backend performs these key actions:
  15.365 -
  15.366 -\begin{itemize}
  15.367 -\item {\bf Validation:} To ensure that domains do not attempt to
  15.368 -  generate invalid (e.g. spoofed) traffic, the backend driver may
  15.369 -  validate headers ensuring that source MAC and IP addresses match the
  15.370 -  interface that they have been sent from.
  15.371 -
  15.372 -  Validation functions can be configured using standard firewall rules
  15.373 -  ({\small{\tt iptables}} in the case of Linux).
  15.374 -  
  15.375 -\item {\bf Scheduling:} Since a number of domains can share a single
  15.376 -  physical network interface, the backend must mediate access when
  15.377 -  several domains each have packets queued for transmission.  This
  15.378 -  general scheduling function subsumes basic shaping or rate-limiting
  15.379 -  schemes.
  15.380 -  
  15.381 -\item {\bf Logging and Accounting:} The backend domain can be
  15.382 -  configured with classifier rules that control how packets are
  15.383 -  accounted or logged.  For example, log messages might be generated
  15.384 -  whenever a domain attempts to send a TCP packet containing a SYN.
  15.385 -\end{itemize}
  15.386 -
  15.387 -On receipt of incoming packets, the backend acts as a simple
  15.388 -demultiplexer:  Packets are passed to the appropriate virtual
  15.389 -interface after any necessary logging and accounting have been carried
  15.390 -out.
  15.391 -
  15.392 -\subsection{Data Transfer}
  15.393 -
  15.394 -Each virtual interface uses two ``descriptor rings'', one for transmit,
  15.395 -the other for receive.  Each descriptor identifies a block of contiguous
  15.396 -physical memory allocated to the domain.  
  15.397 -
  15.398 -The transmit ring carries packets to transmit from the guest to the
  15.399 -backend domain.  The return path of the transmit ring carries messages
  15.400 -indicating that the contents have been physically transmitted and the
  15.401 -backend no longer requires the associated pages of memory.
  15.402 +%% chapter Devices moved to devices.tex
  15.403 +\include{src/interface/devices}
  15.404  
  15.405 -To receive packets, the guest places descriptors of unused pages on
  15.406 -the receive ring.  The backend will return received packets by
  15.407 -exchanging these pages in the domain's memory with new pages
  15.408 -containing the received data, and passing back descriptors regarding
  15.409 -the new packets on the ring.  This zero-copy approach allows the
  15.410 -backend to maintain a pool of free pages to receive packets into, and
  15.411 -then deliver them to appropriate domains after examining their
  15.412 -headers.
  15.413 -
  15.414 -%
  15.415 -%Real physical addresses are used throughout, with the domain performing 
  15.416 -%translation from pseudo-physical addresses if that is necessary.
  15.417 -
  15.418 -If a domain does not keep its receive ring stocked with empty buffers then 
  15.419 -packets destined to it may be dropped.  This provides some defence against 
  15.420 -receive livelock problems because an overload domain will cease to receive
  15.421 -further data.  Similarly, on the transmit path, it provides the application
  15.422 -with feedback on the rate at which packets are able to leave the system.
  15.423 -
  15.424 -
  15.425 -Flow control on rings is achieved by including a pair of producer
  15.426 -indexes on the shared ring page.  Each side will maintain a private
  15.427 -consumer index indicating the next outstanding message.  In this
  15.428 -manner, the domains cooperate to divide the ring into two message
  15.429 -lists, one in each direction.  Notification is decoupled from the
  15.430 -immediate placement of new messages on the ring; the event channel
  15.431 -will be used to generate notification when {\em either} a certain
  15.432 -number of outstanding messages are queued, {\em or} a specified number
  15.433 -of nanoseconds have elapsed since the oldest message was placed on the
  15.434 -ring.
  15.435 -
  15.436 -% Not sure if my version is any better -- here is what was here before:
  15.437 -%% Synchronization between the backend domain and the guest is achieved using 
  15.438 -%% counters held in shared memory that is accessible to both.  Each ring has
  15.439 -%% associated producer and consumer indices indicating the area in the ring
  15.440 -%% that holds descriptors that contain data.  After receiving {\it n} packets
  15.441 -%% or {\t nanoseconds} after receiving the first packet, the hypervisor sends
  15.442 -%% an event to the domain. 
  15.443 -
  15.444 -\section{Block I/O}
  15.445 -
  15.446 -All guest OS disk access goes through the virtual block device VBD
  15.447 -interface.  This interface allows domains access to portions of block
  15.448 -storage devices visible to the the block backend device.  The VBD
  15.449 -interface is a split driver, similar to the network interface
  15.450 -described above.  A single shared memory ring is used between the
  15.451 -frontend and backend drivers, across which read and write messages are
  15.452 -sent.
  15.453 -
  15.454 -Any block device accessible to the backend domain, including
  15.455 -network-based block (iSCSI, *NBD, etc), loopback and LVM/MD devices,
  15.456 -can be exported as a VBD.  Each VBD is mapped to a device node in the
  15.457 -guest, specified in the guest's startup configuration.
  15.458 -
  15.459 -Old (Xen 1.2) virtual disks are not supported under Xen 2.0, since
  15.460 -similar functionality can be achieved using the more complete LVM
  15.461 -system, which is already in widespread use.
  15.462 -
  15.463 -\subsection{Data Transfer}
  15.464 -
  15.465 -The single ring between the guest and the block backend supports three
  15.466 -messages:
  15.467 -
  15.468 -\begin{description}
  15.469 -\item [{\small {\tt PROBE}}:] Return a list of the VBDs available to this guest
  15.470 -  from the backend.  The request includes a descriptor of a free page
  15.471 -  into which the reply will be written by the backend.
  15.472 -
  15.473 -\item [{\small {\tt READ}}:] Read data from the specified block device.  The
  15.474 -  front end identifies the device and location to read from and
  15.475 -  attaches pages for the data to be copied to (typically via DMA from
  15.476 -  the device).  The backend acknowledges completed read requests as
  15.477 -  they finish.
  15.478 -
  15.479 -\item [{\small {\tt WRITE}}:] Write data to the specified block device.  This
  15.480 -  functions essentially as {\small {\tt READ}}, except that the data moves to
  15.481 -  the device instead of from it.
  15.482 -\end{description}
  15.483 -
  15.484 -% um... some old text
  15.485 -%% In overview, the same style of descriptor-ring that is used for
  15.486 -%% network packets is used here.  Each domain has one ring that carries
  15.487 -%% operation requests to the hypervisor and carries the results back
  15.488 -%% again.
  15.489 -
  15.490 -%% Rather than copying data, the backend simply maps the domain's buffers
  15.491 -%% in order to enable direct DMA to them.  The act of mapping the buffers
  15.492 -%% also increases the reference counts of the underlying pages, so that
  15.493 -%% the unprivileged domain cannot try to return them to the hypervisor,
  15.494 -%% install them as page tables, or any other unsafe behaviour.
  15.495 -%% %block API here 
  15.496 -
  15.497 -
  15.498 -\chapter{Further Information} 
  15.499 -
  15.500 -
  15.501 -If you have questions that are not answered by this manual, the
  15.502 -sources of information listed below may be of interest to you.  Note
  15.503 -that bug reports, suggestions and contributions related to the
  15.504 -software (or the documentation) should be sent to the Xen developers'
  15.505 -mailing list (address below).
  15.506 -
  15.507 -\section{Other documentation}
  15.508 -
  15.509 -If you are mainly interested in using (rather than developing for)
  15.510 -Xen, the {\em Xen Users' Manual} is distributed in the {\tt docs/}
  15.511 -directory of the Xen source distribution.  
  15.512 -
  15.513 -% Various HOWTOs are also available in {\tt docs/HOWTOS}.
  15.514 -
  15.515 -\section{Online references}
  15.516 -
  15.517 -The official Xen web site is found at:
  15.518 -\begin{quote}
  15.519 -{\tt http://www.cl.cam.ac.uk/Research/SRG/netos/xen/}
  15.520 -\end{quote}
  15.521 -
  15.522 -This contains links to the latest versions of all on-line 
  15.523 -documentation. 
  15.524 -
  15.525 -\section{Mailing lists}
  15.526 -
  15.527 -There are currently four official Xen mailing lists:
  15.528 -
  15.529 -\begin{description}
  15.530 -\item[xen-devel@lists.xensource.com] Used for development
  15.531 -discussions and bug reports.  Subscribe at: \\
  15.532 -{\small {\tt http://lists.xensource.com/xen-devel}}
  15.533 -\item[xen-users@lists.xensource.com] Used for installation and usage
  15.534 -discussions and requests for help.  Subscribe at: \\
  15.535 -{\small {\tt http://lists.xensource.com/xen-users}}
  15.536 -\item[xen-announce@lists.xensource.com] Used for announcements only.
  15.537 -Subscribe at: \\
  15.538 -{\small {\tt http://lists.xensource.com/xen-announce}}
  15.539 -\item[xen-changelog@lists.xensource.com]  Changelog feed
  15.540 -from the unstable and 2.0 trees - developer oriented.  Subscribe at: \\
  15.541 -{\small {\tt http://lists.xensource.com/xen-changelog}}
  15.542 -\end{description}
  15.543 -
  15.544 -Of these, xen-devel is the most active.
  15.545 -
  15.546 -
  15.547 +%% chapter Further Information moved to further_info.tex
  15.548 +\include{src/interface/further_info}
  15.549  
  15.550  
  15.551  \appendix
  15.552  
  15.553 -%\newcommand{\hypercall}[1]{\vspace{5mm}{\large\sf #1}}
  15.554 -
  15.555 -
  15.556 -
  15.557 -
  15.558 -
  15.559 -\newcommand{\hypercall}[1]{\vspace{2mm}{\sf #1}}
  15.560 -
  15.561 -
  15.562 -
  15.563 -
  15.564 -
  15.565 -
  15.566 -\chapter{Xen Hypercalls}
  15.567 -\label{a:hypercalls}
  15.568 -
  15.569 -Hypercalls represent the procedural interface to Xen; this appendix 
  15.570 -categorizes and describes the current set of hypercalls. 
  15.571 -
  15.572 -\section{Invoking Hypercalls} 
  15.573 -
  15.574 -Hypercalls are invoked in a manner analogous to system calls in a
  15.575 -conventional operating system; a software interrupt is issued which
  15.576 -vectors to an entry point within Xen. On x86\_32 machines the
  15.577 -instruction required is {\tt int \$82}; the (real) IDT is setup so
  15.578 -that this may only be issued from within ring 1. The particular 
  15.579 -hypercall to be invoked is contained in {\tt EAX} --- a list 
  15.580 -mapping these values to symbolic hypercall names can be found 
  15.581 -in {\tt xen/include/public/xen.h}. 
  15.582 -
  15.583 -On some occasions a set of hypercalls will be required to carry
  15.584 -out a higher-level function; a good example is when a guest 
  15.585 -operating wishes to context switch to a new process which 
  15.586 -requires updating various privileged CPU state. As an optimization
  15.587 -for these cases, there is a generic mechanism to issue a set of 
  15.588 -hypercalls as a batch: 
  15.589 -
  15.590 -\begin{quote}
  15.591 -\hypercall{multicall(void *call\_list, int nr\_calls)}
  15.592 -
  15.593 -Execute a series of hypervisor calls; {\tt nr\_calls} is the length of
  15.594 -the array of {\tt multicall\_entry\_t} structures pointed to be {\tt
  15.595 -call\_list}. Each entry contains the hypercall operation code followed
  15.596 -by up to 7 word-sized arguments.
  15.597 -\end{quote}
  15.598 -
  15.599 -Note that multicalls are provided purely as an optimization; there is
  15.600 -no requirement to use them when first porting a guest operating
  15.601 -system.
  15.602 -
  15.603 -
  15.604 -\section{Virtual CPU Setup} 
  15.605 -
  15.606 -At start of day, a guest operating system needs to setup the virtual
  15.607 -CPU it is executing on. This includes installing vectors for the
  15.608 -virtual IDT so that the guest OS can handle interrupts, page faults,
  15.609 -etc. However the very first thing a guest OS must setup is a pair 
  15.610 -of hypervisor callbacks: these are the entry points which Xen will
  15.611 -use when it wishes to notify the guest OS of an occurrence. 
  15.612 -
  15.613 -\begin{quote}
  15.614 -\hypercall{set\_callbacks(unsigned long event\_selector, unsigned long
  15.615 -  event\_address, unsigned long failsafe\_selector, unsigned long
  15.616 -  failsafe\_address) }
  15.617 -
  15.618 -Register the normal (``event'') and failsafe callbacks for 
  15.619 -event processing. In each case the code segment selector and 
  15.620 -address within that segment are provided. The selectors must
  15.621 -have RPL 1; in XenLinux we simply use the kernel's CS for both 
  15.622 -{\tt event\_selector} and {\tt failsafe\_selector}.
  15.623 -
  15.624 -The value {\tt event\_address} specifies the address of the guest OSes
  15.625 -event handling and dispatch routine; the {\tt failsafe\_address}
  15.626 -specifies a separate entry point which is used only if a fault occurs
  15.627 -when Xen attempts to use the normal callback. 
  15.628 -\end{quote} 
  15.629 -
  15.630 -
  15.631 -After installing the hypervisor callbacks, the guest OS can 
  15.632 -install a `virtual IDT' by using the following hypercall: 
  15.633 -
  15.634 -\begin{quote} 
  15.635 -\hypercall{set\_trap\_table(trap\_info\_t *table)} 
  15.636 -
  15.637 -Install one or more entries into the per-domain 
  15.638 -trap handler table (essentially a software version of the IDT). 
  15.639 -Each entry in the array pointed to by {\tt table} includes the 
  15.640 -exception vector number with the corresponding segment selector 
  15.641 -and entry point. Most guest OSes can use the same handlers on 
  15.642 -Xen as when running on the real hardware; an exception is the 
  15.643 -page fault handler (exception vector 14) where a modified 
  15.644 -stack-frame layout is used. 
  15.645 -
  15.646 -
  15.647 -\end{quote} 
  15.648 -
  15.649 -
  15.650 -
  15.651 -\section{Scheduling and Timer}
  15.652 -
  15.653 -Domains are preemptively scheduled by Xen according to the 
  15.654 -parameters installed by domain 0 (see Section~\ref{s:dom0ops}). 
  15.655 -In addition, however, a domain may choose to explicitly 
  15.656 -control certain behavior with the following hypercall: 
  15.657 -
  15.658 -\begin{quote} 
  15.659 -\hypercall{sched\_op(unsigned long op)} 
  15.660 -
  15.661 -Request scheduling operation from hypervisor. The options are: {\it
  15.662 -yield}, {\it block}, and {\it shutdown}.  {\it yield} keeps the
  15.663 -calling domain runnable but may cause a reschedule if other domains
  15.664 -are runnable.  {\it block} removes the calling domain from the run
  15.665 -queue and cause is to sleeps until an event is delivered to it.  {\it
  15.666 -shutdown} is used to end the domain's execution; the caller can
  15.667 -additionally specify whether the domain should reboot, halt or
  15.668 -suspend.
  15.669 -\end{quote} 
  15.670 -
  15.671 -To aid the implementation of a process scheduler within a guest OS,
  15.672 -Xen provides a virtual programmable timer:
  15.673 -
  15.674 -\begin{quote}
  15.675 -\hypercall{set\_timer\_op(uint64\_t timeout)} 
  15.676 -
  15.677 -Request a timer event to be sent at the specified system time (time 
  15.678 -in nanoseconds since system boot). The hypercall actually passes the 
  15.679 -64-bit timeout value as a pair of 32-bit values. 
  15.680 -
  15.681 -\end{quote} 
  15.682 -
  15.683 -Note that calling {\tt set\_timer\_op()} prior to {\tt sched\_op} 
  15.684 -allows block-with-timeout semantics. 
  15.685 -
  15.686 -
  15.687 -\section{Page Table Management} 
  15.688 -
  15.689 -Since guest operating systems have read-only access to their page 
  15.690 -tables, Xen must be involved when making any changes. The following
  15.691 -multi-purpose hypercall can be used to modify page-table entries, 
  15.692 -update the machine-to-physical mapping table, flush the TLB, install 
  15.693 -a new page-table base pointer, and more.
  15.694 -
  15.695 -\begin{quote} 
  15.696 -\hypercall{mmu\_update(mmu\_update\_t *req, int count, int *success\_count)} 
  15.697 -
  15.698 -Update the page table for the domain; a set of {\tt count} updates are
  15.699 -submitted for processing in a batch, with {\tt success\_count} being 
  15.700 -updated to report the number of successful updates.  
  15.701 -
  15.702 -Each element of {\tt req[]} contains a pointer (address) and value; 
  15.703 -the least significant 2-bits of the pointer are used to distinguish 
  15.704 -the type of update requested as follows:
  15.705 -\begin{description} 
  15.706 -
  15.707 -\item[\it MMU\_NORMAL\_PT\_UPDATE:] update a page directory entry or
  15.708 -page table entry to the associated value; Xen will check that the
  15.709 -update is safe, as described in Chapter~\ref{c:memory}.
  15.710 -
  15.711 -\item[\it MMU\_MACHPHYS\_UPDATE:] update an entry in the
  15.712 -  machine-to-physical table. The calling domain must own the machine
  15.713 -  page in question (or be privileged).
  15.714 -
  15.715 -\item[\it MMU\_EXTENDED\_COMMAND:] perform additional MMU operations.
  15.716 -The set of additional MMU operations is considerable, and includes
  15.717 -updating {\tt cr3} (or just re-installing it for a TLB flush),
  15.718 -flushing the cache, installing a new LDT, or pinning \& unpinning
  15.719 -page-table pages (to ensure their reference count doesn't drop to zero
  15.720 -which would require a revalidation of all entries).
  15.721 -
  15.722 -Further extended commands are used to deal with granting and 
  15.723 -acquiring page ownership; see Section~\ref{s:idc}. 
  15.724 -
  15.725 -
  15.726 -\end{description}
  15.727 -
  15.728 -More details on the precise format of all commands can be 
  15.729 -found in {\tt xen/include/public/xen.h}. 
  15.730 -
  15.731 -
  15.732 -\end{quote}
  15.733 -
  15.734 -Explicitly updating batches of page table entries is extremely
  15.735 -efficient, but can require a number of alterations to the guest
  15.736 -OS. Using the writable page table mode (Chapter~\ref{c:memory}) is
  15.737 -recommended for new OS ports.
  15.738 -
  15.739 -Regardless of which page table update mode is being used, however,
  15.740 -there are some occasions (notably handling a demand page fault) where
  15.741 -a guest OS will wish to modify exactly one PTE rather than a
  15.742 -batch. This is catered for by the following:
  15.743 -
  15.744 -\begin{quote} 
  15.745 -\hypercall{update\_va\_mapping(unsigned long page\_nr, unsigned long
  15.746 -val, \\ unsigned long flags)}
  15.747 -
  15.748 -Update the currently installed PTE for the page {\tt page\_nr} to 
  15.749 -{\tt val}. As with {\tt mmu\_update()}, Xen checks the modification 
  15.750 -is safe before applying it. The {\tt flags} determine which kind
  15.751 -of TLB flush, if any, should follow the update. 
  15.752 -
  15.753 -\end{quote} 
  15.754 -
  15.755 -Finally, sufficiently privileged domains may occasionally wish to manipulate 
  15.756 -the pages of others: 
  15.757 -\begin{quote}
  15.758 -
  15.759 -\hypercall{update\_va\_mapping\_otherdomain(unsigned long page\_nr,
  15.760 -unsigned long val, unsigned long flags, uint16\_t domid)}
  15.761 -
  15.762 -Identical to {\tt update\_va\_mapping()} save that the pages being
  15.763 -mapped must belong to the domain {\tt domid}. 
  15.764 -
  15.765 -\end{quote}
  15.766 -
  15.767 -This privileged operation is currently used by backend virtual device
  15.768 -drivers to safely map pages containing I/O data. 
  15.769 -
  15.770 -
  15.771 -
  15.772 -\section{Segmentation Support}
  15.773 -
  15.774 -Xen allows guest OSes to install a custom GDT if they require it; 
  15.775 -this is context switched transparently whenever a domain is 
  15.776 -[de]scheduled.  The following hypercall is effectively a 
  15.777 -`safe' version of {\tt lgdt}: 
  15.778 -
  15.779 -\begin{quote}
  15.780 -\hypercall{set\_gdt(unsigned long *frame\_list, int entries)} 
  15.781 -
  15.782 -Install a global descriptor table for a domain; {\tt frame\_list} is
  15.783 -an array of up to 16 machine page frames within which the GDT resides,
  15.784 -with {\tt entries} being the actual number of descriptor-entry
  15.785 -slots. All page frames must be mapped read-only within the guest's
  15.786 -address space, and the table must be large enough to contain Xen's
  15.787 -reserved entries (see {\tt xen/include/public/arch-x86\_32.h}).
  15.788 -
  15.789 -\end{quote}
  15.790 -
  15.791 -Many guest OSes will also wish to install LDTs; this is achieved by
  15.792 -using {\tt mmu\_update()} with an extended command, passing the
  15.793 -linear address of the LDT base along with the number of entries. No
  15.794 -special safety checks are required; Xen needs to perform this task
  15.795 -simply since {\tt lldt} requires CPL 0.
  15.796 -
  15.797 -
  15.798 -Xen also allows guest operating systems to update just an 
  15.799 -individual segment descriptor in the GDT or LDT:  
  15.800 -
  15.801 -\begin{quote}
  15.802 -\hypercall{update\_descriptor(unsigned long ma, unsigned long word1,
  15.803 -unsigned long word2)}
  15.804 -
  15.805 -Update the GDT/LDT entry at machine address {\tt ma}; the new
  15.806 -8-byte descriptor is stored in {\tt word1} and {\tt word2}.
  15.807 -Xen performs a number of checks to ensure the descriptor is 
  15.808 -valid. 
  15.809 -
  15.810 -\end{quote}
  15.811 -
  15.812 -Guest OSes can use the above in place of context switching entire 
  15.813 -LDTs (or the GDT) when the number of changing descriptors is small. 
  15.814 -
  15.815 -\section{Context Switching} 
  15.816 -
  15.817 -When a guest OS wishes to context switch between two processes, 
  15.818 -it can use the page table and segmentation hypercalls described
  15.819 -above to perform the the bulk of the privileged work. In addition, 
  15.820 -however, it will need to invoke Xen to switch the kernel (ring 1) 
  15.821 -stack pointer: 
  15.822 -
  15.823 -\begin{quote} 
  15.824 -\hypercall{stack\_switch(unsigned long ss, unsigned long esp)} 
  15.825 -
  15.826 -Request kernel stack switch from hypervisor; {\tt ss} is the new 
  15.827 -stack segment, which {\tt esp} is the new stack pointer. 
  15.828 -
  15.829 -\end{quote} 
  15.830 -
  15.831 -A final useful hypercall for context switching allows ``lazy'' 
  15.832 -save and restore of floating point state: 
  15.833 -
  15.834 -\begin{quote}
  15.835 -\hypercall{fpu\_taskswitch(void)} 
  15.836 -
  15.837 -This call instructs Xen to set the {\tt TS} bit in the {\tt cr0}
  15.838 -control register; this means that the next attempt to use floating
  15.839 -point will cause a trap which the guest OS can trap. Typically it will
  15.840 -then save/restore the FP state, and clear the {\tt TS} bit. 
  15.841 -\end{quote} 
  15.842 -
  15.843 -This is provided as an optimization only; guest OSes can also choose
  15.844 -to save and restore FP state on all context switches for simplicity. 
  15.845 -
  15.846 -
  15.847 -\section{Physical Memory Management}
  15.848 -
  15.849 -As mentioned previously, each domain has a maximum and current 
  15.850 -memory allocation. The maximum allocation, set at domain creation 
  15.851 -time, cannot be modified. However a domain can choose to reduce 
  15.852 -and subsequently grow its current allocation by using the
  15.853 -following call: 
  15.854 -
  15.855 -\begin{quote} 
  15.856 -\hypercall{dom\_mem\_op(unsigned int op, unsigned long *extent\_list,
  15.857 -  unsigned long nr\_extents, unsigned int extent\_order)}
  15.858 -
  15.859 -Increase or decrease current memory allocation (as determined by 
  15.860 -the value of {\tt op}). Each invocation provides a list of 
  15.861 -extents each of which is $2^s$ pages in size, 
  15.862 -where $s$ is the value of {\tt extent\_order}. 
  15.863 -
  15.864 -\end{quote} 
  15.865 -
  15.866 -In addition to simply reducing or increasing the current memory
  15.867 -allocation via a `balloon driver', this call is also useful for 
  15.868 -obtaining contiguous regions of machine memory when required (e.g. 
  15.869 -for certain PCI devices, or if using superpages).  
  15.870 -
  15.871 -
  15.872 -\section{Inter-Domain Communication}
  15.873 -\label{s:idc} 
  15.874 -
  15.875 -Xen provides a simple asynchronous notification mechanism via
  15.876 -\emph{event channels}. Each domain has a set of end-points (or
  15.877 -\emph{ports}) which may be bound to an event source (e.g. a physical
  15.878 -IRQ, a virtual IRQ, or an port in another domain). When a pair of
  15.879 -end-points in two different domains are bound together, then a `send'
  15.880 -operation on one will cause an event to be received by the destination
  15.881 -domain.
  15.882 -
  15.883 -The control and use of event channels involves the following hypercall: 
  15.884 -
  15.885 -\begin{quote}
  15.886 -\hypercall{event\_channel\_op(evtchn\_op\_t *op)} 
  15.887 -
  15.888 -Inter-domain event-channel management; {\tt op} is a discriminated 
  15.889 -union which allows the following 7 operations: 
  15.890 -
  15.891 -\begin{description} 
  15.892 -
  15.893 -\item[\it alloc\_unbound:] allocate a free (unbound) local
  15.894 -  port and prepare for connection from a specified domain. 
  15.895 -\item[\it bind\_virq:] bind a local port to a virtual 
  15.896 -IRQ; any particular VIRQ can be bound to at most one port per domain. 
  15.897 -\item[\it bind\_pirq:] bind a local port to a physical IRQ;
  15.898 -once more, a given pIRQ can be bound to at most one port per
  15.899 -domain. Furthermore the calling domain must be sufficiently
  15.900 -privileged.
  15.901 -\item[\it bind\_interdomain:] construct an interdomain event 
  15.902 -channel; in general, the target domain must have previously allocated 
  15.903 -an unbound port for this channel, although this can be bypassed by 
  15.904 -privileged domains during domain setup. 
  15.905 -\item[\it close:] close an interdomain event channel. 
  15.906 -\item[\it send:] send an event to the remote end of a 
  15.907 -interdomain event channel. 
  15.908 -\item[\it status:] determine the current status of a local port. 
  15.909 -\end{description} 
  15.910 -
  15.911 -For more details see
  15.912 -{\tt xen/include/public/event\_channel.h}. 
  15.913 -
  15.914 -\end{quote} 
  15.915 -
  15.916 -Event channels are the fundamental communication primitive between 
  15.917 -Xen domains and seamlessly support SMP. However they provide little
  15.918 -bandwidth for communication {\sl per se}, and hence are typically 
  15.919 -married with a piece of shared memory to produce effective and 
  15.920 -high-performance inter-domain communication. 
  15.921 -
  15.922 -Safe sharing of memory pages between guest OSes is carried out by
  15.923 -granting access on a per page basis to individual domains. This is
  15.924 -achieved by using the {\tt grant\_table\_op()} hypercall.
  15.925 -
  15.926 -\begin{quote}
  15.927 -\hypercall{grant\_table\_op(unsigned int cmd, void *uop, unsigned int count)}
  15.928 -
  15.929 -Grant or remove access to a particular page to a particular domain. 
  15.930 -
  15.931 -\end{quote} 
  15.932 -
  15.933 -This is not currently widely in use by guest operating systems, but 
  15.934 -we intend to integrate support more fully in the near future. 
  15.935 -
  15.936 -\section{PCI Configuration} 
  15.937 -
  15.938 -Domains with physical device access (i.e.\ driver domains) receive
  15.939 -limited access to certain PCI devices (bus address space and
  15.940 -interrupts). However many guest operating systems attempt to 
  15.941 -determine the PCI configuration by directly access the PCI BIOS, 
  15.942 -which cannot be allowed for safety. 
  15.943 -
  15.944 -Instead, Xen provides the following hypercall: 
  15.945 -
  15.946 -\begin{quote}
  15.947 -\hypercall{physdev\_op(void *physdev\_op)}
  15.948 -
  15.949 -Perform a PCI configuration option; depending on the value 
  15.950 -of {\tt physdev\_op} this can be a PCI config read, a PCI config 
  15.951 -write, or a small number of other queries. 
  15.952 -
  15.953 -\end{quote} 
  15.954 -
  15.955 -
  15.956 -For examples of using {\tt physdev\_op()}, see the 
  15.957 -Xen-specific PCI code in the linux sparse tree. 
  15.958 -
  15.959 -\section{Administrative Operations}
  15.960 -\label{s:dom0ops}
  15.961 -
  15.962 -A large number of control operations are available to a sufficiently
  15.963 -privileged domain (typically domain 0). These allow the creation and
  15.964 -management of new domains, for example. A complete list is given 
  15.965 -below: for more details on any or all of these, please see 
  15.966 -{\tt xen/include/public/dom0\_ops.h} 
  15.967 -
  15.968 -
  15.969 -\begin{quote}
  15.970 -\hypercall{dom0\_op(dom0\_op\_t *op)} 
  15.971 -
  15.972 -Administrative domain operations for domain management. The options are:
  15.973 -
  15.974 -\begin{description} 
  15.975 -\item [\it DOM0\_CREATEDOMAIN:] create a new domain
  15.976 -
  15.977 -\item [\it DOM0\_PAUSEDOMAIN:] remove a domain from the scheduler run 
  15.978 -queue. 
  15.979 -
  15.980 -\item [\it DOM0\_UNPAUSEDOMAIN:] mark a paused domain as schedulable
  15.981 -  once again. 
  15.982 -
  15.983 -\item [\it DOM0\_DESTROYDOMAIN:] deallocate all resources associated
  15.984 -with a domain
  15.985 -
  15.986 -\item [\it DOM0\_GETMEMLIST:] get list of pages used by the domain
  15.987 -
  15.988 -\item [\it DOM0\_SCHEDCTL:]
  15.989 -
  15.990 -\item [\it DOM0\_ADJUSTDOM:] adjust scheduling priorities for domain
  15.991 -
  15.992 -\item [\it DOM0\_BUILDDOMAIN:] do final guest OS setup for domain
  15.993 -
  15.994 -\item [\it DOM0\_GETDOMAINFO:] get statistics about the domain
  15.995 -
  15.996 -\item [\it DOM0\_GETPAGEFRAMEINFO:] 
  15.997 -
  15.998 -\item [\it DOM0\_GETPAGEFRAMEINFO2:]
  15.999 -
 15.1000 -\item [\it DOM0\_IOPL:] set I/O privilege level
 15.1001 -
 15.1002 -\item [\it DOM0\_MSR:] read or write model specific registers
 15.1003 -
 15.1004 -\item [\it DOM0\_DEBUG:] interactively invoke the debugger
 15.1005 -
 15.1006 -\item [\it DOM0\_SETTIME:] set system time
 15.1007 -
 15.1008 -\item [\it DOM0\_READCONSOLE:] read console content from hypervisor buffer ring
 15.1009 -
 15.1010 -\item [\it DOM0\_PINCPUDOMAIN:] pin domain to a particular CPU
 15.1011 -
 15.1012 -\item [\it DOM0\_GETTBUFS:] get information about the size and location of
 15.1013 -                      the trace buffers (only on trace-buffer enabled builds)
 15.1014 -
 15.1015 -\item [\it DOM0\_PHYSINFO:] get information about the host machine
 15.1016 -
 15.1017 -\item [\it DOM0\_PCIDEV\_ACCESS:] modify PCI device access permissions
 15.1018 -
 15.1019 -\item [\it DOM0\_SCHED\_ID:] get the ID of the current Xen scheduler
 15.1020 -
 15.1021 -\item [\it DOM0\_SHADOW\_CONTROL:] switch between shadow page-table modes
 15.1022 -
 15.1023 -\item [\it DOM0\_SETDOMAININITIALMEM:] set initial memory allocation of a domain
 15.1024 -
 15.1025 -\item [\it DOM0\_SETDOMAINMAXMEM:] set maximum memory allocation of a domain
 15.1026 -
 15.1027 -\item [\it DOM0\_SETDOMAINVMASSIST:] set domain VM assist options
 15.1028 -\end{description} 
 15.1029 -\end{quote} 
 15.1030 -
 15.1031 -Most of the above are best understood by looking at the code 
 15.1032 -implementing them (in {\tt xen/common/dom0\_ops.c}) and in 
 15.1033 -the user-space tools that use them (mostly in {\tt tools/libxc}). 
 15.1034 -
 15.1035 -\section{Debugging Hypercalls} 
 15.1036 -
 15.1037 -A few additional hypercalls are mainly useful for debugging: 
 15.1038 -
 15.1039 -\begin{quote} 
 15.1040 -\hypercall{console\_io(int cmd, int count, char *str)}
 15.1041 -
 15.1042 -Use Xen to interact with the console; operations are:
 15.1043 -
 15.1044 -{\it CONSOLEIO\_write}: Output count characters from buffer str.
 15.1045 -
 15.1046 -{\it CONSOLEIO\_read}: Input at most count characters into buffer str.
 15.1047 -\end{quote} 
 15.1048 -
 15.1049 -A pair of hypercalls allows access to the underlying debug registers: 
 15.1050 -\begin{quote}
 15.1051 -\hypercall{set\_debugreg(int reg, unsigned long value)}
 15.1052 -
 15.1053 -Set debug register {\tt reg} to {\tt value} 
 15.1054 -
 15.1055 -\hypercall{get\_debugreg(int reg)}
 15.1056 -
 15.1057 -Return the contents of the debug register {\tt reg}
 15.1058 -\end{quote}
 15.1059 -
 15.1060 -And finally: 
 15.1061 -\begin{quote}
 15.1062 -\hypercall{xen\_version(int cmd)}
 15.1063 -
 15.1064 -Request Xen version number.
 15.1065 -\end{quote} 
 15.1066 -
 15.1067 -This is useful to ensure that user-space tools are in sync 
 15.1068 -with the underlying hypervisor. 
 15.1069 -
 15.1070 -\section{Deprecated Hypercalls}
 15.1071 -
 15.1072 -Xen is under constant development and refinement; as such there 
 15.1073 -are plans to improve the way in which various pieces of functionality 
 15.1074 -are exposed to guest OSes. 
 15.1075 -
 15.1076 -\begin{quote} 
 15.1077 -\hypercall{vm\_assist(unsigned int cmd, unsigned int type)}
 15.1078 -
 15.1079 -Toggle various memory management modes (in particular wrritable page
 15.1080 -tables and superpage support). 
 15.1081 -
 15.1082 -\end{quote} 
 15.1083 -
 15.1084 -This is likely to be replaced with mode values in the shared 
 15.1085 -information page since this is more resilient for resumption 
 15.1086 -after migration or checkpoint. 
 15.1087 -
 15.1088 -
 15.1089 -
 15.1090 -
 15.1091 -
 15.1092 -
 15.1093 +%% chapter hypercalls moved to hypercalls.tex
 15.1094 +\include{src/interface/hypercalls}
 15.1095  
 15.1096  
 15.1097  %% 
 15.1098 @@ -1173,279 +112,9 @@ after migration or checkpoint.
 15.1099  %% new scheduler... not clear how many of them there are...
 15.1100  %%
 15.1101  
 15.1102 -\begin{comment}
 15.1103 -
 15.1104 -\chapter{Scheduling API}  
 15.1105 -
 15.1106 -The scheduling API is used by both the schedulers described above and should
 15.1107 -also be used by any new schedulers.  It provides a generic interface and also
 15.1108 -implements much of the ``boilerplate'' code.
 15.1109 -
 15.1110 -Schedulers conforming to this API are described by the following
 15.1111 -structure:
 15.1112 -
 15.1113 -\begin{verbatim}
 15.1114 -struct scheduler
 15.1115 -{
 15.1116 -    char *name;             /* full name for this scheduler      */
 15.1117 -    char *opt_name;         /* option name for this scheduler    */
 15.1118 -    unsigned int sched_id;  /* ID for this scheduler             */
 15.1119 -
 15.1120 -    int          (*init_scheduler) ();
 15.1121 -    int          (*alloc_task)     (struct task_struct *);
 15.1122 -    void         (*add_task)       (struct task_struct *);
 15.1123 -    void         (*free_task)      (struct task_struct *);
 15.1124 -    void         (*rem_task)       (struct task_struct *);
 15.1125 -    void         (*wake_up)        (struct task_struct *);
 15.1126 -    void         (*do_block)       (struct task_struct *);
 15.1127 -    task_slice_t (*do_schedule)    (s_time_t);
 15.1128 -    int          (*control)        (struct sched_ctl_cmd *);
 15.1129 -    int          (*adjdom)         (struct task_struct *,
 15.1130 -                                    struct sched_adjdom_cmd *);
 15.1131 -    s32          (*reschedule)     (struct task_struct *);
 15.1132 -    void         (*dump_settings)  (void);
 15.1133 -    void         (*dump_cpu_state) (int);
 15.1134 -    void         (*dump_runq_el)   (struct task_struct *);
 15.1135 -};
 15.1136 -\end{verbatim}
 15.1137 -
 15.1138 -The only method that {\em must} be implemented is
 15.1139 -{\tt do\_schedule()}.  However, if there is not some implementation for the
 15.1140 -{\tt wake\_up()} method then waking tasks will not get put on the runqueue!
 15.1141 -
 15.1142 -The fields of the above structure are described in more detail below.
 15.1143 -
 15.1144 -\subsubsection{name}
 15.1145 -
 15.1146 -The name field should point to a descriptive ASCII string.
 15.1147 -
 15.1148 -\subsubsection{opt\_name}
 15.1149 -
 15.1150 -This field is the value of the {\tt sched=} boot-time option that will select
 15.1151 -this scheduler.
 15.1152 -
 15.1153 -\subsubsection{sched\_id}
 15.1154 -
 15.1155 -This is an integer that uniquely identifies this scheduler.  There should be a
 15.1156 -macro corrsponding to this scheduler ID in {\tt <xen/sched-if.h>}.
 15.1157 -
 15.1158 -\subsubsection{init\_scheduler}
 15.1159 -
 15.1160 -\paragraph*{Purpose}
 15.1161 -
 15.1162 -This is a function for performing any scheduler-specific initialisation.  For
 15.1163 -instance, it might allocate memory for per-CPU scheduler data and initialise it
 15.1164 -appropriately.
 15.1165 -
 15.1166 -\paragraph*{Call environment}
 15.1167 -
 15.1168 -This function is called after the initialisation performed by the generic
 15.1169 -layer.  The function is called exactly once, for the scheduler that has been
 15.1170 -selected.
 15.1171 -
 15.1172 -\paragraph*{Return values}
 15.1173 -
 15.1174 -This should return negative on failure --- this will cause an
 15.1175 -immediate panic and the system will fail to boot.
 15.1176 -
 15.1177 -\subsubsection{alloc\_task}
 15.1178 -
 15.1179 -\paragraph*{Purpose}
 15.1180 -Called when a {\tt task\_struct} is allocated by the generic scheduler
 15.1181 -layer.  A particular scheduler implementation may use this method to
 15.1182 -allocate per-task data for this task.  It may use the {\tt
 15.1183 -sched\_priv} pointer in the {\tt task\_struct} to point to this data.
 15.1184 -
 15.1185 -\paragraph*{Call environment}
 15.1186 -The generic layer guarantees that the {\tt sched\_priv} field will
 15.1187 -remain intact from the time this method is called until the task is
 15.1188 -deallocated (so long as the scheduler implementation does not change
 15.1189 -it explicitly!).
 15.1190 -
 15.1191 -\paragraph*{Return values}
 15.1192 -Negative on failure.
 15.1193 -
 15.1194 -\subsubsection{add\_task}
 15.1195 -
 15.1196 -\paragraph*{Purpose}
 15.1197 -
 15.1198 -Called when a task is initially added by the generic layer.
 15.1199 -
 15.1200 -\paragraph*{Call environment}
 15.1201 -
 15.1202 -The fields in the {\tt task\_struct} are now filled out and available for use.
 15.1203 -Schedulers should implement appropriate initialisation of any per-task private
 15.1204 -information in this method.
 15.1205 -
 15.1206 -\subsubsection{free\_task}
 15.1207 -
 15.1208 -\paragraph*{Purpose}
 15.1209 -
 15.1210 -Schedulers should free the space used by any associated private data
 15.1211 -structures.
 15.1212 -
 15.1213 -\paragraph*{Call environment}
 15.1214 -
 15.1215 -This is called when a {\tt task\_struct} is about to be deallocated.
 15.1216 -The generic layer will have done generic task removal operations and
 15.1217 -(if implemented) called the scheduler's {\tt rem\_task} method before
 15.1218 -this method is called.
 15.1219 -
 15.1220 -\subsubsection{rem\_task}
 15.1221 -
 15.1222 -\paragraph*{Purpose}
 15.1223 -
 15.1224 -This is called when a task is being removed from scheduling (but is
 15.1225 -not yet being freed).
 15.1226 -
 15.1227 -\subsubsection{wake\_up}
 15.1228 -
 15.1229 -\paragraph*{Purpose}
 15.1230 -
 15.1231 -Called when a task is woken up, this method should put the task on the runqueue
 15.1232 -(or do the scheduler-specific equivalent action).
 15.1233 -
 15.1234 -\paragraph*{Call environment}
 15.1235 -
 15.1236 -The task is already set to state RUNNING.
 15.1237 -
 15.1238 -\subsubsection{do\_block}
 15.1239 -
 15.1240 -\paragraph*{Purpose}
 15.1241 -
 15.1242 -This function is called when a task is blocked.  This function should
 15.1243 -not remove the task from the runqueue.
 15.1244 -
 15.1245 -\paragraph*{Call environment}
 15.1246 -
 15.1247 -The EVENTS\_MASTER\_ENABLE\_BIT is already set and the task state changed to
 15.1248 -TASK\_INTERRUPTIBLE on entry to this method.  A call to the {\tt
 15.1249 -  do\_schedule} method will be made after this method returns, in
 15.1250 -order to select the next task to run.
 15.1251 -
 15.1252 -\subsubsection{do\_schedule}
 15.1253 -
 15.1254 -This method must be implemented.
 15.1255 -
 15.1256 -\paragraph*{Purpose}
 15.1257 -
 15.1258 -The method is called each time a new task must be chosen for scheduling on the
 15.1259 -current CPU.  The current time as passed as the single argument (the current
 15.1260 -task can be found using the {\tt current} macro).
 15.1261 -
 15.1262 -This method should select the next task to run on this CPU and set it's minimum
 15.1263 -time to run as well as returning the data described below.
 15.1264 -
 15.1265 -This method should also take the appropriate action if the previous
 15.1266 -task has blocked, e.g. removing it from the runqueue.
 15.1267 -
 15.1268 -\paragraph*{Call environment}
 15.1269 -
 15.1270 -The other fields in the {\tt task\_struct} are updated by the generic layer,
 15.1271 -which also performs all Xen-specific tasks and performs the actual task switch
 15.1272 -(unless the previous task has been chosen again).
 15.1273 -
 15.1274 -This method is called with the {\tt schedule\_lock} held for the current CPU
 15.1275 -and local interrupts disabled.
 15.1276 -
 15.1277 -\paragraph*{Return values}
 15.1278 -
 15.1279 -Must return a {\tt struct task\_slice} describing what task to run and how long
 15.1280 -for (at maximum).
 15.1281 -
 15.1282 -\subsubsection{control}
 15.1283 -
 15.1284 -\paragraph*{Purpose}
 15.1285 -
 15.1286 -This method is called for global scheduler control operations.  It takes a
 15.1287 -pointer to a {\tt struct sched\_ctl\_cmd}, which it should either
 15.1288 -source data from or populate with data, depending on the value of the
 15.1289 -{\tt direction} field.
 15.1290 -
 15.1291 -\paragraph*{Call environment}
 15.1292 -
 15.1293 -The generic layer guarantees that when this method is called, the
 15.1294 -caller selected the correct scheduler ID, hence the scheduler's
 15.1295 -implementation does not need to sanity-check these parts of the call.
 15.1296 -
 15.1297 -\paragraph*{Return values}
 15.1298 -
 15.1299 -This function should return the value to be passed back to user space, hence it
 15.1300 -should either be 0 or an appropriate errno value.
 15.1301 -
 15.1302 -\subsubsection{sched\_adjdom}
 15.1303 -
 15.1304 -\paragraph*{Purpose}
 15.1305 -
 15.1306 -This method is called to adjust the scheduling parameters of a particular
 15.1307 -domain, or to query their current values.  The function should check
 15.1308 -the {\tt direction} field of the {\tt sched\_adjdom\_cmd} it receives in
 15.1309 -order to determine which of these operations is being performed.
 15.1310 -
 15.1311 -\paragraph*{Call environment}
 15.1312 -
 15.1313 -The generic layer guarantees that the caller has specified the correct
 15.1314 -control interface version and scheduler ID and that the supplied {\tt
 15.1315 -task\_struct} will not be deallocated during the call (hence it is not
 15.1316 -necessary to {\tt get\_task\_struct}).
 15.1317 -
 15.1318 -\paragraph*{Return values}
 15.1319 -
 15.1320 -This function should return the value to be passed back to user space, hence it
 15.1321 -should either be 0 or an appropriate errno value.
 15.1322 -
 15.1323 -\subsubsection{reschedule}
 15.1324 -
 15.1325 -\paragraph*{Purpose}
 15.1326 -
 15.1327 -This method is called to determine if a reschedule is required as a result of a
 15.1328 -particular task.
 15.1329 -
 15.1330 -\paragraph*{Call environment}
 15.1331 -The generic layer will cause a reschedule if the current domain is the idle
 15.1332 -task or it has exceeded its minimum time slice before a reschedule.  The
 15.1333 -generic layer guarantees that the task passed is not currently running but is
 15.1334 -on the runqueue.
 15.1335 -
 15.1336 -\paragraph*{Return values}
 15.1337 -
 15.1338 -Should return a mask of CPUs to cause a reschedule on.
 15.1339 -
 15.1340 -\subsubsection{dump\_settings}
 15.1341 -
 15.1342 -\paragraph*{Purpose}
 15.1343 -
 15.1344 -If implemented, this should dump any private global settings for this
 15.1345 -scheduler to the console.
 15.1346 -
 15.1347 -\paragraph*{Call environment}
 15.1348 -
 15.1349 -This function is called with interrupts enabled.
 15.1350 -
 15.1351 -\subsubsection{dump\_cpu\_state}
 15.1352 -
 15.1353 -\paragraph*{Purpose}
 15.1354 -
 15.1355 -This method should dump any private settings for the specified CPU.
 15.1356 -
 15.1357 -\paragraph*{Call environment}
 15.1358 -
 15.1359 -This function is called with interrupts disabled and the {\tt schedule\_lock}
 15.1360 -for the specified CPU held.
 15.1361 -
 15.1362 -\subsubsection{dump\_runq\_el}
 15.1363 -
 15.1364 -\paragraph*{Purpose}
 15.1365 -
 15.1366 -This method should dump any private settings for the specified task.
 15.1367 -
 15.1368 -\paragraph*{Call environment}
 15.1369 -
 15.1370 -This function is called with interrupts disabled and the {\tt schedule\_lock}
 15.1371 -for the task's CPU held.
 15.1372 -
 15.1373 -\end{comment} 
 15.1374 -
 15.1375 +%% \include{src/interface/scheduling}
 15.1376 +%% scheduling information moved to scheduling.tex
 15.1377 +%% still commented out
 15.1378  
 15.1379  
 15.1380  
 15.1381 @@ -1457,74 +126,9 @@ for the task's CPU held.
 15.1382  %% (and/or kip's stuff?) and write about that instead? 
 15.1383  %%
 15.1384  
 15.1385 -\begin{comment} 
 15.1386 -
 15.1387 -\chapter{Debugging}
 15.1388 -
 15.1389 -Xen provides tools for debugging both Xen and guest OSes.  Currently, the
 15.1390 -Pervasive Debugger provides a GDB stub, which provides facilities for symbolic
 15.1391 -debugging of Xen itself and of OS kernels running on top of Xen.  The Trace
 15.1392 -Buffer provides a lightweight means to log data about Xen's internal state and
 15.1393 -behaviour at runtime, for later analysis.
 15.1394 -
 15.1395 -\section{Pervasive Debugger}
 15.1396 -
 15.1397 -Information on using the pervasive debugger is available in pdb.txt.
 15.1398 -
 15.1399 -
 15.1400 -\section{Trace Buffer}
 15.1401 -
 15.1402 -The trace buffer provides a means to observe Xen's operation from domain 0.
 15.1403 -Trace events, inserted at key points in Xen's code, record data that can be
 15.1404 -read by the {\tt xentrace} tool.  Recording these events has a low overhead
 15.1405 -and hence the trace buffer may be useful for debugging timing-sensitive
 15.1406 -behaviours.
 15.1407 -
 15.1408 -\subsection{Internal API}
 15.1409 -
 15.1410 -To use the trace buffer functionality from within Xen, you must {\tt \#include
 15.1411 -<xen/trace.h>}, which contains definitions related to the trace buffer.  Trace
 15.1412 -events are inserted into the buffer using the {\tt TRACE\_xD} ({\tt x} = 0, 1,
 15.1413 -2, 3, 4 or 5) macros.  These all take an event number, plus {\tt x} additional
 15.1414 -(32-bit) data as their arguments.  For trace buffer-enabled builds of Xen these
 15.1415 -will insert the event ID and data into the trace buffer, along with the current
 15.1416 -value of the CPU cycle-counter.  For builds without the trace buffer enabled,
 15.1417 -the macros expand to no-ops and thus can be left in place without incurring
 15.1418 -overheads.
 15.1419 -
 15.1420 -\subsection{Trace-enabled builds}
 15.1421 -
 15.1422 -By default, the trace buffer is enabled only in debug builds (i.e. {\tt NDEBUG}
 15.1423 -is not defined).  It can be enabled separately by defining {\tt TRACE\_BUFFER},
 15.1424 -either in {\tt <xen/config.h>} or on the gcc command line.
 15.1425 -
 15.1426 -The size (in pages) of the per-CPU trace buffers can be specified using the
 15.1427 -{\tt tbuf\_size=n } boot parameter to Xen.  If the size is set to 0, the trace
 15.1428 -buffers will be disabled.
 15.1429 -
 15.1430 -\subsection{Dumping trace data}
 15.1431 -
 15.1432 -When running a trace buffer build of Xen, trace data are written continuously
 15.1433 -into the buffer data areas, with newer data overwriting older data.  This data
 15.1434 -can be captured using the {\tt xentrace} program in domain 0.
 15.1435 -
 15.1436 -The {\tt xentrace} tool uses {\tt /dev/mem} in domain 0 to map the trace
 15.1437 -buffers into its address space.  It then periodically polls all the buffers for
 15.1438 -new data, dumping out any new records from each buffer in turn.  As a result,
 15.1439 -for machines with multiple (logical) CPUs, the trace buffer output will not be
 15.1440 -in overall chronological order.
 15.1441 -
 15.1442 -The output from {\tt xentrace} can be post-processed using {\tt
 15.1443 -xentrace\_cpusplit} (used to split trace data out into per-cpu log files) and
 15.1444 -{\tt xentrace\_format} (used to pretty-print trace data).  For the predefined
 15.1445 -trace points, there is an example format file in {\tt tools/xentrace/formats }.
 15.1446 -
 15.1447 -For more information, see the manual pages for {\tt xentrace}, {\tt
 15.1448 -xentrace\_format} and {\tt xentrace\_cpusplit}.
 15.1449 -
 15.1450 -\end{comment} 
 15.1451 -
 15.1452 -
 15.1453 +%% \include{src/interface/debugging}
 15.1454 +%% debugging information moved to debugging.tex
 15.1455 +%% still commented out
 15.1456  
 15.1457  
 15.1458  \end{document}
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/docs/src/interface/architecture.tex	Thu Sep 22 11:42:01 2005 -0600
    16.3 @@ -0,0 +1,140 @@
    16.4 +\chapter{Virtual Architecture}
    16.5 +
    16.6 +On a Xen-based system, the hypervisor itself runs in {\it ring 0}.  It
    16.7 +has full access to the physical memory available in the system and is
    16.8 +responsible for allocating portions of it to the domains.  Guest
    16.9 +operating systems run in and use {\it rings 1}, {\it 2} and {\it 3} as
   16.10 +they see fit. Segmentation is used to prevent the guest OS from
   16.11 +accessing the portion of the address space that is reserved for Xen.
   16.12 +We expect most guest operating systems will use ring 1 for their own
   16.13 +operation and place applications in ring 3.
   16.14 +
   16.15 +In this chapter we consider the basic virtual architecture provided by
   16.16 +Xen: the basic CPU state, exception and interrupt handling, and time.
   16.17 +Other aspects such as memory and device access are discussed in later
   16.18 +chapters.
   16.19 +
   16.20 +
   16.21 +\section{CPU state}
   16.22 +
   16.23 +All privileged state must be handled by Xen.  The guest OS has no
   16.24 +direct access to CR3 and is not permitted to update privileged bits in
   16.25 +EFLAGS. Guest OSes use \emph{hypercalls} to invoke operations in Xen;
   16.26 +these are analogous to system calls but occur from ring 1 to ring 0.
   16.27 +
   16.28 +A list of all hypercalls is given in Appendix~\ref{a:hypercalls}.
   16.29 +
   16.30 +
   16.31 +\section{Exceptions}
   16.32 +
   16.33 +A virtual IDT is provided --- a domain can submit a table of trap
   16.34 +handlers to Xen via the {\tt set\_trap\_table()} hypercall.  Most trap
   16.35 +handlers are identical to native x86 handlers, although the page-fault
   16.36 +handler is somewhat different.
   16.37 +
   16.38 +
   16.39 +\section{Interrupts and events}
   16.40 +
   16.41 +Interrupts are virtualized by mapping them to \emph{events}, which are
   16.42 +delivered asynchronously to the target domain using a callback
   16.43 +supplied via the {\tt set\_callbacks()} hypercall.  A guest OS can map
   16.44 +these events onto its standard interrupt dispatch mechanisms.  Xen is
   16.45 +responsible for determining the target domain that will handle each
   16.46 +physical interrupt source. For more details on the binding of event
   16.47 +sources to events, see Chapter~\ref{c:devices}.
   16.48 +
   16.49 +
   16.50 +\section{Time}
   16.51 +
   16.52 +Guest operating systems need to be aware of the passage of both real
   16.53 +(or wallclock) time and their own `virtual time' (the time for which
   16.54 +they have been executing). Furthermore, Xen has a notion of time which
   16.55 +is used for scheduling. The following notions of time are provided:
   16.56 +
   16.57 +\begin{description}
   16.58 +\item[Cycle counter time.]
   16.59 +
   16.60 +  This provides a fine-grained time reference.  The cycle counter time
   16.61 +  is used to accurately extrapolate the other time references.  On SMP
   16.62 +  machines it is currently assumed that the cycle counter time is
   16.63 +  synchronized between CPUs.  The current x86-based implementation
   16.64 +  achieves this within inter-CPU communication latencies.
   16.65 +
   16.66 +\item[System time.]
   16.67 +
   16.68 +  This is a 64-bit counter which holds the number of nanoseconds that
   16.69 +  have elapsed since system boot.
   16.70 +
   16.71 +\item[Wall clock time.]
   16.72 +
   16.73 +  This is the time of day in a Unix-style {\tt struct timeval}
   16.74 +  (seconds and microseconds since 1 January 1970, adjusted by leap
   16.75 +  seconds).  An NTP client hosted by {\it domain 0} can keep this
   16.76 +  value accurate.
   16.77 +
   16.78 +\item[Domain virtual time.]
   16.79 +
   16.80 +  This progresses at the same pace as system time, but only while a
   16.81 +  domain is executing --- it stops while a domain is de-scheduled.
   16.82 +  Therefore the share of the CPU that a domain receives is indicated
   16.83 +  by the rate at which its virtual time increases.
   16.84 +
   16.85 +\end{description}
   16.86 +
   16.87 +
   16.88 +Xen exports timestamps for system time and wall-clock time to guest
   16.89 +operating systems through a shared page of memory.  Xen also provides
   16.90 +the cycle counter time at the instant the timestamps were calculated,
   16.91 +and the CPU frequency in Hertz.  This allows the guest to extrapolate
   16.92 +system and wall-clock times accurately based on the current cycle
   16.93 +counter time.
   16.94 +
   16.95 +Since all time stamps need to be updated and read \emph{atomically}
   16.96 +two version numbers are also stored in the shared info page. The first
   16.97 +is incremented prior to an update, while the second is only
   16.98 +incremented afterwards. Thus a guest can be sure that it read a
   16.99 +consistent state by checking the two version numbers are equal.
  16.100 +
  16.101 +Xen includes a periodic ticker which sends a timer event to the
  16.102 +currently executing domain every 10ms.  The Xen scheduler also sends a
  16.103 +timer event whenever a domain is scheduled; this allows the guest OS
  16.104 +to adjust for the time that has passed while it has been inactive.  In
  16.105 +addition, Xen allows each domain to request that they receive a timer
  16.106 +event sent at a specified system time by using the {\tt
  16.107 +  set\_timer\_op()} hypercall.  Guest OSes may use this timer to
  16.108 +implement timeout values when they block.
  16.109 +
  16.110 +
  16.111 +
  16.112 +%% % akw: demoting this to a section -- not sure if there is any point
  16.113 +%% % though, maybe just remove it.
  16.114 +
  16.115 +\section{Xen CPU Scheduling}
  16.116 +
  16.117 +Xen offers a uniform API for CPU schedulers.  It is possible to choose
  16.118 +from a number of schedulers at boot and it should be easy to add more.
  16.119 +The BVT, Atropos and Round Robin schedulers are part of the normal Xen
  16.120 +distribution.  BVT provides proportional fair shares of the CPU to the
  16.121 +running domains.  Atropos can be used to reserve absolute shares of
  16.122 +the CPU for each domain.  Round-robin is provided as an example of
  16.123 +Xen's internal scheduler API.
  16.124 +
  16.125 +\paragraph*{Note: SMP host support}
  16.126 +Xen has always supported SMP host systems.  Domains are statically
  16.127 +assigned to CPUs, either at creation time or when manually pinning to
  16.128 +a particular CPU.  The current schedulers then run locally on each CPU
  16.129 +to decide which of the assigned domains should be run there. The
  16.130 +user-level control software can be used to perform coarse-grain
  16.131 +load-balancing between CPUs.
  16.132 +
  16.133 +
  16.134 +%% More information on the characteristics and use of these schedulers
  16.135 +%% is available in {\tt Sched-HOWTO.txt}.
  16.136 +
  16.137 +
  16.138 +\section{Privileged operations}
  16.139 +
  16.140 +Xen exports an extended interface to privileged domains (viz.\ {\it
  16.141 +  Domain 0}). This allows such domains to build and boot other domains
  16.142 +on the server, and provides control interfaces for managing
  16.143 +scheduling, memory, networking, and block devices.
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/docs/src/interface/debugging.tex	Thu Sep 22 11:42:01 2005 -0600
    17.3 @@ -0,0 +1,62 @@
    17.4 +\chapter{Debugging}
    17.5 +
    17.6 +Xen provides tools for debugging both Xen and guest OSes.  Currently, the
    17.7 +Pervasive Debugger provides a GDB stub, which provides facilities for symbolic
    17.8 +debugging of Xen itself and of OS kernels running on top of Xen.  The Trace
    17.9 +Buffer provides a lightweight means to log data about Xen's internal state and
   17.10 +behaviour at runtime, for later analysis.
   17.11 +
   17.12 +\section{Pervasive Debugger}
   17.13 +
   17.14 +Information on using the pervasive debugger is available in pdb.txt.
   17.15 +
   17.16 +
   17.17 +\section{Trace Buffer}
   17.18 +
   17.19 +The trace buffer provides a means to observe Xen's operation from domain 0.
   17.20 +Trace events, inserted at key points in Xen's code, record data that can be
   17.21 +read by the {\tt xentrace} tool.  Recording these events has a low overhead
   17.22 +and hence the trace buffer may be useful for debugging timing-sensitive
   17.23 +behaviours.
   17.24 +
   17.25 +\subsection{Internal API}
   17.26 +
   17.27 +To use the trace buffer functionality from within Xen, you must {\tt \#include
   17.28 +<xen/trace.h>}, which contains definitions related to the trace buffer.  Trace
   17.29 +events are inserted into the buffer using the {\tt TRACE\_xD} ({\tt x} = 0, 1,
   17.30 +2, 3, 4 or 5) macros.  These all take an event number, plus {\tt x} additional
   17.31 +(32-bit) data as their arguments.  For trace buffer-enabled builds of Xen these
   17.32 +will insert the event ID and data into the trace buffer, along with the current
   17.33 +value of the CPU cycle-counter.  For builds without the trace buffer enabled,
   17.34 +the macros expand to no-ops and thus can be left in place without incurring
   17.35 +overheads.
   17.36 +
   17.37 +\subsection{Trace-enabled builds}
   17.38 +
   17.39 +By default, the trace buffer is enabled only in debug builds (i.e. {\tt NDEBUG}
   17.40 +is not defined).  It can be enabled separately by defining {\tt TRACE\_BUFFER},
   17.41 +either in {\tt <xen/config.h>} or on the gcc command line.
   17.42 +
   17.43 +The size (in pages) of the per-CPU trace buffers can be specified using the
   17.44 +{\tt tbuf\_size=n } boot parameter to Xen.  If the size is set to 0, the trace
   17.45 +buffers will be disabled.
   17.46 +
   17.47 +\subsection{Dumping trace data}
   17.48 +
   17.49 +When running a trace buffer build of Xen, trace data are written continuously
   17.50 +into the buffer data areas, with newer data overwriting older data.  This data
   17.51 +can be captured using the {\tt xentrace} program in domain 0.
   17.52 +
   17.53 +The {\tt xentrace} tool uses {\tt /dev/mem} in domain 0 to map the trace
   17.54 +buffers into its address space.  It then periodically polls all the buffers for
   17.55 +new data, dumping out any new records from each buffer in turn.  As a result,
   17.56 +for machines with multiple (logical) CPUs, the trace buffer output will not be
   17.57 +in overall chronological order.
   17.58 +
   17.59 +The output from {\tt xentrace} can be post-processed using {\tt
   17.60 +xentrace\_cpusplit} (used to split trace data out into per-cpu log files) and
   17.61 +{\tt xentrace\_format} (used to pretty-print trace data).  For the predefined
   17.62 +trace points, there is an example format file in {\tt tools/xentrace/formats }.
   17.63 +
   17.64 +For more information, see the manual pages for {\tt xentrace}, {\tt
   17.65 +xentrace\_format} and {\tt xentrace\_cpusplit}.
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/docs/src/interface/devices.tex	Thu Sep 22 11:42:01 2005 -0600
    18.3 @@ -0,0 +1,178 @@
    18.4 +\chapter{Devices}
    18.5 +\label{c:devices}
    18.6 +
    18.7 +Devices such as network and disk are exported to guests using a split
    18.8 +device driver.  The device driver domain, which accesses the physical
    18.9 +device directly also runs a \emph{backend} driver, serving requests to
   18.10 +that device from guests.  Each guest will use a simple \emph{frontend}
   18.11 +driver, to access the backend.  Communication between these domains is
   18.12 +composed of two parts: First, data is placed onto a shared memory page
   18.13 +between the domains.  Second, an event channel between the two domains
   18.14 +is used to pass notification that data is outstanding.  This
   18.15 +separation of notification from data transfer allows message batching,
   18.16 +and results in very efficient device access.
   18.17 +
   18.18 +Event channels are used extensively in device virtualization; each
   18.19 +domain has a number of end-points or \emph{ports} each of which may be
   18.20 +bound to one of the following \emph{event sources}:
   18.21 +\begin{itemize}
   18.22 +  \item a physical interrupt from a real device, 
   18.23 +  \item a virtual interrupt (callback) from Xen, or 
   18.24 +  \item a signal from another domain 
   18.25 +\end{itemize}
   18.26 +
   18.27 +Events are lightweight and do not carry much information beyond the
   18.28 +source of the notification. Hence when performing bulk data transfer,
   18.29 +events are typically used as synchronization primitives over a shared
   18.30 +memory transport. Event channels are managed via the {\tt
   18.31 +  event\_channel\_op()} hypercall; for more details see
   18.32 +Section~\ref{s:idc}.
   18.33 +
   18.34 +This chapter focuses on some individual device interfaces available to
   18.35 +Xen guests.
   18.36 +
   18.37 +
   18.38 +\section{Network I/O}
   18.39 +
   18.40 +Virtual network device services are provided by shared memory
   18.41 +communication with a backend domain.  From the point of view of other
   18.42 +domains, the backend may be viewed as a virtual ethernet switch
   18.43 +element with each domain having one or more virtual network interfaces
   18.44 +connected to it.
   18.45 +
   18.46 +\subsection{Backend Packet Handling}
   18.47 +
   18.48 +The backend driver is responsible for a variety of actions relating to
   18.49 +the transmission and reception of packets from the physical device.
   18.50 +With regard to transmission, the backend performs these key actions:
   18.51 +
   18.52 +\begin{itemize}
   18.53 +\item {\bf Validation:} To ensure that domains do not attempt to
   18.54 +  generate invalid (e.g. spoofed) traffic, the backend driver may
   18.55 +  validate headers ensuring that source MAC and IP addresses match the
   18.56 +  interface that they have been sent from.
   18.57 +
   18.58 +  Validation functions can be configured using standard firewall rules
   18.59 +  ({\small{\tt iptables}} in the case of Linux).
   18.60 +  
   18.61 +\item {\bf Scheduling:} Since a number of domains can share a single
   18.62 +  physical network interface, the backend must mediate access when
   18.63 +  several domains each have packets queued for transmission.  This
   18.64 +  general scheduling function subsumes basic shaping or rate-limiting
   18.65 +  schemes.
   18.66 +  
   18.67 +\item {\bf Logging and Accounting:} The backend domain can be
   18.68 +  configured with classifier rules that control how packets are
   18.69 +  accounted or logged.  For example, log messages might be generated
   18.70 +  whenever a domain attempts to send a TCP packet containing a SYN.
   18.71 +\end{itemize}
   18.72 +
   18.73 +On receipt of incoming packets, the backend acts as a simple
   18.74 +demultiplexer: Packets are passed to the appropriate virtual interface
   18.75 +after any necessary logging and accounting have been carried out.
   18.76 +
   18.77 +\subsection{Data Transfer}
   18.78 +
   18.79 +Each virtual interface uses two ``descriptor rings'', one for
   18.80 +transmit, the other for receive.  Each descriptor identifies a block
   18.81 +of contiguous physical memory allocated to the domain.
   18.82 +
   18.83 +The transmit ring carries packets to transmit from the guest to the
   18.84 +backend domain.  The return path of the transmit ring carries messages
   18.85 +indicating that the contents have been physically transmitted and the
   18.86 +backend no longer requires the associated pages of memory.
   18.87 +
   18.88 +To receive packets, the guest places descriptors of unused pages on
   18.89 +the receive ring.  The backend will return received packets by
   18.90 +exchanging these pages in the domain's memory with new pages
   18.91 +containing the received data, and passing back descriptors regarding
   18.92 +the new packets on the ring.  This zero-copy approach allows the
   18.93 +backend to maintain a pool of free pages to receive packets into, and
   18.94 +then deliver them to appropriate domains after examining their
   18.95 +headers.
   18.96 +
   18.97 +% Real physical addresses are used throughout, with the domain
   18.98 +% performing translation from pseudo-physical addresses if that is
   18.99 +% necessary.
  18.100 +
  18.101 +If a domain does not keep its receive ring stocked with empty buffers
  18.102 +then packets destined to it may be dropped.  This provides some
  18.103 +defence against receive livelock problems because an overload domain
  18.104 +will cease to receive further data.  Similarly, on the transmit path,
  18.105 +it provides the application with feedback on the rate at which packets
  18.106 +are able to leave the system.
  18.107 +
  18.108 +Flow control on rings is achieved by including a pair of producer
  18.109 +indexes on the shared ring page.  Each side will maintain a private
  18.110 +consumer index indicating the next outstanding message.  In this
  18.111 +manner, the domains cooperate to divide the ring into two message
  18.112 +lists, one in each direction.  Notification is decoupled from the
  18.113 +immediate placement of new messages on the ring; the event channel
  18.114 +will be used to generate notification when {\em either} a certain
  18.115 +number of outstanding messages are queued, {\em or} a specified number
  18.116 +of nanoseconds have elapsed since the oldest message was placed on the
  18.117 +ring.
  18.118 +
  18.119 +%% Not sure if my version is any better -- here is what was here
  18.120 +%% before: Synchronization between the backend domain and the guest is
  18.121 +%% achieved using counters held in shared memory that is accessible to
  18.122 +%% both.  Each ring has associated producer and consumer indices
  18.123 +%% indicating the area in the ring that holds descriptors that contain
  18.124 +%% data.  After receiving {\it n} packets or {\t nanoseconds} after
  18.125 +%% receiving the first packet, the hypervisor sends an event to the
  18.126 +%% domain.
  18.127 +
  18.128 +
  18.129 +\section{Block I/O}
  18.130 +
  18.131 +All guest OS disk access goes through the virtual block device VBD
  18.132 +interface.  This interface allows domains access to portions of block
  18.133 +storage devices visible to the the block backend device.  The VBD
  18.134 +interface is a split driver, similar to the network interface
  18.135 +described above.  A single shared memory ring is used between the
  18.136 +frontend and backend drivers, across which read and write messages are
  18.137 +sent.
  18.138 +
  18.139 +Any block device accessible to the backend domain, including
  18.140 +network-based block (iSCSI, *NBD, etc), loopback and LVM/MD devices,
  18.141 +can be exported as a VBD.  Each VBD is mapped to a device node in the
  18.142 +guest, specified in the guest's startup configuration.
  18.143 +
  18.144 +Old (Xen 1.2) virtual disks are not supported under Xen 2.0, since
  18.145 +similar functionality can be achieved using the more complete LVM
  18.146 +system, which is already in widespread use.
  18.147 +
  18.148 +\subsection{Data Transfer}
  18.149 +
  18.150 +The single ring between the guest and the block backend supports three
  18.151 +messages:
  18.152 +
  18.153 +\begin{description}
  18.154 +\item [{\small {\tt PROBE}}:] Return a list of the VBDs available to
  18.155 +  this guest from the backend.  The request includes a descriptor of a
  18.156 +  free page into which the reply will be written by the backend.
  18.157 +
  18.158 +\item [{\small {\tt READ}}:] Read data from the specified block
  18.159 +  device.  The front end identifies the device and location to read
  18.160 +  from and attaches pages for the data to be copied to (typically via
  18.161 +  DMA from the device).  The backend acknowledges completed read
  18.162 +  requests as they finish.
  18.163 +
  18.164 +\item [{\small {\tt WRITE}}:] Write data to the specified block
  18.165 +  device.  This functions essentially as {\small {\tt READ}}, except
  18.166 +  that the data moves to the device instead of from it.
  18.167 +\end{description}
  18.168 +
  18.169 +%% um... some old text: In overview, the same style of descriptor-ring
  18.170 +%% that is used for network packets is used here.  Each domain has one
  18.171 +%% ring that carries operation requests to the hypervisor and carries
  18.172 +%% the results back again.
  18.173 +
  18.174 +%% Rather than copying data, the backend simply maps the domain's
  18.175 +%% buffers in order to enable direct DMA to them.  The act of mapping
  18.176 +%% the buffers also increases the reference counts of the underlying
  18.177 +%% pages, so that the unprivileged domain cannot try to return them to
  18.178 +%% the hypervisor, install them as page tables, or any other unsafe
  18.179 +%% behaviour.
  18.180 +%%
  18.181 +%% % block API here
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/docs/src/interface/further_info.tex	Thu Sep 22 11:42:01 2005 -0600
    19.3 @@ -0,0 +1,49 @@
    19.4 +\chapter{Further Information}
    19.5 +
    19.6 +If you have questions that are not answered by this manual, the
    19.7 +sources of information listed below may be of interest to you.  Note
    19.8 +that bug reports, suggestions and contributions related to the
    19.9 +software (or the documentation) should be sent to the Xen developers'
   19.10 +mailing list (address below).
   19.11 +
   19.12 +
   19.13 +\section{Other documentation}
   19.14 +
   19.15 +If you are mainly interested in using (rather than developing for)
   19.16 +Xen, the \emph{Xen Users' Manual} is distributed in the {\tt docs/}
   19.17 +directory of the Xen source distribution.
   19.18 +
   19.19 +% Various HOWTOs are also available in {\tt docs/HOWTOS}.
   19.20 +
   19.21 +
   19.22 +\section{Online references}
   19.23 +
   19.24 +The official Xen web site is found at:
   19.25 +\begin{quote}
   19.26 +{\tt http://www.cl.cam.ac.uk/Research/SRG/netos/xen/}
   19.27 +\end{quote}
   19.28 +
   19.29 +This contains links to the latest versions of all on-line
   19.30 +documentation.
   19.31 +
   19.32 +
   19.33 +\section{Mailing lists}
   19.34 +
   19.35 +There are currently four official Xen mailing lists:
   19.36 +
   19.37 +\begin{description}
   19.38 +\item[xen-devel@lists.xensource.com] Used for development
   19.39 +  discussions and bug reports.  Subscribe at: \\
   19.40 +  {\small {\tt http://lists.xensource.com/xen-devel}}
   19.41 +\item[xen-users@lists.xensource.com] Used for installation and usage
   19.42 +  discussions and requests for help.  Subscribe at: \\
   19.43 +  {\small {\tt http://lists.xensource.com/xen-users}}
   19.44 +\item[xen-announce@lists.xensource.com] Used for announcements only.
   19.45 +  Subscribe at: \\
   19.46 +  {\small {\tt http://lists.xensource.com/xen-announce}}
   19.47 +\item[xen-changelog@lists.xensource.com] Changelog feed
   19.48 +  from the unstable and 2.0 trees - developer oriented.  Subscribe at: \\
   19.49 +  {\small {\tt http://lists.xensource.com/xen-changelog}}
   19.50 +\end{description}
   19.51 +
   19.52 +Of these, xen-devel is the most active.
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/docs/src/interface/hypercalls.tex	Thu Sep 22 11:42:01 2005 -0600
    20.3 @@ -0,0 +1,524 @@
    20.4 +
    20.5 +\newcommand{\hypercall}[1]{\vspace{2mm}{\sf #1}}
    20.6 +
    20.7 +\chapter{Xen Hypercalls}
    20.8 +\label{a:hypercalls}
    20.9 +
   20.10 +Hypercalls represent the procedural interface to Xen; this appendix 
   20.11 +categorizes and describes the current set of hypercalls. 
   20.12 +
   20.13 +\section{Invoking Hypercalls} 
   20.14 +
   20.15 +Hypercalls are invoked in a manner analogous to system calls in a
   20.16 +conventional operating system; a software interrupt is issued which
   20.17 +vectors to an entry point within Xen. On x86\_32 machines the
   20.18 +instruction required is {\tt int \$82}; the (real) IDT is setup so
   20.19 +that this may only be issued from within ring 1. The particular 
   20.20 +hypercall to be invoked is contained in {\tt EAX} --- a list 
   20.21 +mapping these values to symbolic hypercall names can be found 
   20.22 +in {\tt xen/include/public/xen.h}. 
   20.23 +
   20.24 +On some occasions a set of hypercalls will be required to carry
   20.25 +out a higher-level function; a good example is when a guest 
   20.26 +operating wishes to context switch to a new process which 
   20.27 +requires updating various privileged CPU state. As an optimization
   20.28 +for these cases, there is a generic mechanism to issue a set of 
   20.29 +hypercalls as a batch: 
   20.30 +
   20.31 +\begin{quote}
   20.32 +\hypercall{multicall(void *call\_list, int nr\_calls)}
   20.33 +
   20.34 +Execute a series of hypervisor calls; {\tt nr\_calls} is the length of
   20.35 +the array of {\tt multicall\_entry\_t} structures pointed to be {\tt
   20.36 +call\_list}. Each entry contains the hypercall operation code followed
   20.37 +by up to 7 word-sized arguments.
   20.38 +\end{quote}
   20.39 +
   20.40 +Note that multicalls are provided purely as an optimization; there is
   20.41 +no requirement to use them when first porting a guest operating
   20.42 +system.
   20.43 +
   20.44 +
   20.45 +\section{Virtual CPU Setup} 
   20.46 +
   20.47 +At start of day, a guest operating system needs to setup the virtual
   20.48 +CPU it is executing on. This includes installing vectors for the
   20.49 +virtual IDT so that the guest OS can handle interrupts, page faults,
   20.50 +etc. However the very first thing a guest OS must setup is a pair 
   20.51 +of hypervisor callbacks: these are the entry points which Xen will
   20.52 +use when it wishes to notify the guest OS of an occurrence. 
   20.53 +
   20.54 +\begin{quote}
   20.55 +\hypercall{set\_callbacks(unsigned long event\_selector, unsigned long
   20.56 +  event\_address, unsigned long failsafe\_selector, unsigned long
   20.57 +  failsafe\_address) }
   20.58 +
   20.59 +Register the normal (``event'') and failsafe callbacks for 
   20.60 +event processing. In each case the code segment selector and 
   20.61 +address within that segment are provided. The selectors must
   20.62 +have RPL 1; in XenLinux we simply use the kernel's CS for both 
   20.63 +{\tt event\_selector} and {\tt failsafe\_selector}.
   20.64 +
   20.65 +The value {\tt event\_address} specifies the address of the guest OSes
   20.66 +event handling and dispatch routine; the {\tt failsafe\_address}
   20.67 +specifies a separate entry point which is used only if a fault occurs
   20.68 +when Xen attempts to use the normal callback. 
   20.69 +\end{quote} 
   20.70 +
   20.71 +
   20.72 +After installing the hypervisor callbacks, the guest OS can 
   20.73 +install a `virtual IDT' by using the following hypercall: 
   20.74 +
   20.75 +\begin{quote} 
   20.76 +\hypercall{set\_trap\_table(trap\_info\_t *table)} 
   20.77 +
   20.78 +Install one or more entries into the per-domain 
   20.79 +trap handler table (essentially a software version of the IDT). 
   20.80 +Each entry in the array pointed to by {\tt table} includes the 
   20.81 +exception vector number with the corresponding segment selector 
   20.82 +and entry point. Most guest OSes can use the same handlers on 
   20.83 +Xen as when running on the real hardware; an exception is the 
   20.84 +page fault handler (exception vector 14) where a modified 
   20.85 +stack-frame layout is used. 
   20.86 +
   20.87 +
   20.88 +\end{quote} 
   20.89 +
   20.90 +
   20.91 +
   20.92 +\section{Scheduling and Timer}
   20.93 +
   20.94 +Domains are preemptively scheduled by Xen according to the 
   20.95 +parameters installed by domain 0 (see Section~\ref{s:dom0ops}). 
   20.96 +In addition, however, a domain may choose to explicitly 
   20.97 +control certain behavior with the following hypercall: 
   20.98 +
   20.99 +\begin{quote} 
  20.100 +\hypercall{sched\_op(unsigned long op)} 
  20.101 +
  20.102 +Request scheduling operation from hypervisor. The options are: {\it
  20.103 +yield}, {\it block}, and {\it shutdown}.  {\it yield} keeps the
  20.104 +calling domain runnable but may cause a reschedule if other domains
  20.105 +are runnable.  {\it block} removes the calling domain from the run
  20.106 +queue and cause is to sleeps until an event is delivered to it.  {\it
  20.107 +shutdown} is used to end the domain's execution; the caller can
  20.108 +additionally specify whether the domain should reboot, halt or
  20.109 +suspend.
  20.110 +\end{quote} 
  20.111 +
  20.112 +To aid the implementation of a process scheduler within a guest OS,
  20.113 +Xen provides a virtual programmable timer:
  20.114 +
  20.115 +\begin{quote}
  20.116 +\hypercall{set\_timer\_op(uint64\_t timeout)} 
  20.117 +
  20.118 +Request a timer event to be sent at the specified system time (time 
  20.119 +in nanoseconds since system boot). The hypercall actually passes the 
  20.120 +64-bit timeout value as a pair of 32-bit values. 
  20.121 +
  20.122 +\end{quote} 
  20.123 +
  20.124 +Note that calling {\tt set\_timer\_op()} prior to {\tt sched\_op} 
  20.125 +allows block-with-timeout semantics. 
  20.126 +
  20.127 +
  20.128 +\section{Page Table Management} 
  20.129 +
  20.130 +Since guest operating systems have read-only access to their page 
  20.131 +tables, Xen must be involved when making any changes. The following
  20.132 +multi-purpose hypercall can be used to modify page-table entries, 
  20.133 +update the machine-to-physical mapping table, flush the TLB, install 
  20.134 +a new page-table base pointer, and more.
  20.135 +
  20.136 +\begin{quote} 
  20.137 +\hypercall{mmu\_update(mmu\_update\_t *req, int count, int *success\_count)} 
  20.138 +
  20.139 +Update the page table for the domain; a set of {\tt count} updates are
  20.140 +submitted for processing in a batch, with {\tt success\_count} being 
  20.141 +updated to report the number of successful updates.  
  20.142 +
  20.143 +Each element of {\tt req[]} contains a pointer (address) and value; 
  20.144 +the least significant 2-bits of the pointer are used to distinguish 
  20.145 +the type of update requested as follows:
  20.146 +\begin{description} 
  20.147 +
  20.148 +\item[\it MMU\_NORMAL\_PT\_UPDATE:] update a page directory entry or
  20.149 +page table entry to the associated value; Xen will check that the
  20.150 +update is safe, as described in Chapter~\ref{c:memory}.
  20.151 +
  20.152 +\item[\it MMU\_MACHPHYS\_UPDATE:] update an entry in the
  20.153 +  machine-to-physical table. The calling domain must own the machine
  20.154 +  page in question (or be privileged).
  20.155 +
  20.156 +\item[\it MMU\_EXTENDED\_COMMAND:] perform additional MMU operations.
  20.157 +The set of additional MMU operations is considerable, and includes
  20.158 +updating {\tt cr3} (or just re-installing it for a TLB flush),
  20.159 +flushing the cache, installing a new LDT, or pinning \& unpinning
  20.160 +page-table pages (to ensure their reference count doesn't drop to zero
  20.161 +which would require a revalidation of all entries).
  20.162 +
  20.163 +Further extended commands are used to deal with granting and 
  20.164 +acquiring page ownership; see Section~\ref{s:idc}. 
  20.165 +
  20.166 +
  20.167 +\end{description}
  20.168 +
  20.169 +More details on the precise format of all commands can be 
  20.170 +found in {\tt xen/include/public/xen.h}. 
  20.171 +
  20.172 +
  20.173 +\end{quote}
  20.174 +
  20.175 +Explicitly updating batches of page table entries is extremely
  20.176 +efficient, but can require a number of alterations to the guest
  20.177 +OS. Using the writable page table mode (Chapter~\ref{c:memory}) is
  20.178 +recommended for new OS ports.
  20.179 +
  20.180 +Regardless of which page table update mode is being used, however,
  20.181 +there are some occasions (notably handling a demand page fault) where
  20.182 +a guest OS will wish to modify exactly one PTE rather than a
  20.183 +batch. This is catered for by the following:
  20.184 +
  20.185 +\begin{quote} 
  20.186 +\hypercall{update\_va\_mapping(unsigned long page\_nr, unsigned long
  20.187 +val, \\ unsigned long flags)}
  20.188 +
  20.189 +Update the currently installed PTE for the page {\tt page\_nr} to 
  20.190 +{\tt val}. As with {\tt mmu\_update()}, Xen checks the modification 
  20.191 +is safe before applying it. The {\tt flags} determine which kind
  20.192 +of TLB flush, if any, should follow the update. 
  20.193 +
  20.194 +\end{quote} 
  20.195 +
  20.196 +Finally, sufficiently privileged domains may occasionally wish to manipulate 
  20.197 +the pages of others: 
  20.198 +\begin{quote}
  20.199 +
  20.200 +\hypercall{update\_va\_mapping\_otherdomain(unsigned long page\_nr,
  20.201 +unsigned long val, unsigned long flags, uint16\_t domid)}
  20.202 +
  20.203 +Identical to {\tt update\_va\_mapping()} save that the pages being
  20.204 +mapped must belong to the domain {\tt domid}. 
  20.205 +
  20.206 +\end{quote}
  20.207 +
  20.208 +This privileged operation is currently used by backend virtual device
  20.209 +drivers to safely map pages containing I/O data. 
  20.210 +
  20.211 +
  20.212 +
  20.213 +\section{Segmentation Support}
  20.214 +
  20.215 +Xen allows guest OSes to install a custom GDT if they require it; 
  20.216 +this is context switched transparently whenever a domain is 
  20.217 +[de]scheduled.  The following hypercall is effectively a 
  20.218 +`safe' version of {\tt lgdt}: 
  20.219 +
  20.220 +\begin{quote}
  20.221 +\hypercall{set\_gdt(unsigned long *frame\_list, int entries)} 
  20.222 +
  20.223 +Install a global descriptor table for a domain; {\tt frame\_list} is
  20.224 +an array of up to 16 machine page frames within which the GDT resides,
  20.225 +with {\tt entries} being the actual number of descriptor-entry
  20.226 +slots. All page frames must be mapped read-only within the guest's
  20.227 +address space, and the table must be large enough to contain Xen's
  20.228 +reserved entries (see {\tt xen/include/public/arch-x86\_32.h}).
  20.229 +
  20.230 +\end{quote}
  20.231 +
  20.232 +Many guest OSes will also wish to install LDTs; this is achieved by
  20.233 +using {\tt mmu\_update()} with an extended command, passing the
  20.234 +linear address of the LDT base along with the number of entries. No
  20.235 +special safety checks are required; Xen needs to perform this task
  20.236 +simply since {\tt lldt} requires CPL 0.
  20.237 +
  20.238 +
  20.239 +Xen also allows guest operating systems to update just an 
  20.240 +individual segment descriptor in the GDT or LDT:  
  20.241 +
  20.242 +\begin{quote}
  20.243 +\hypercall{update\_descriptor(unsigned long ma, unsigned long word1,
  20.244 +unsigned long word2)}
  20.245 +
  20.246 +Update the GDT/LDT entry at machine address {\tt ma}; the new
  20.247 +8-byte descriptor is stored in {\tt word1} and {\tt word2}.
  20.248 +Xen performs a number of checks to ensure the descriptor is 
  20.249 +valid. 
  20.250 +
  20.251 +\end{quote}
  20.252 +
  20.253 +Guest OSes can use the above in place of context switching entire 
  20.254 +LDTs (or the GDT) when the number of changing descriptors is small. 
  20.255 +
  20.256 +\section{Context Switching} 
  20.257 +
  20.258 +When a guest OS wishes to context switch between two processes, 
  20.259 +it can use the page table and segmentation hypercalls described
  20.260 +above to perform the the bulk of the privileged work. In addition, 
  20.261 +however, it will need to invoke Xen to switch the kernel (ring 1) 
  20.262 +stack pointer: 
  20.263 +
  20.264 +\begin{quote} 
  20.265 +\hypercall{stack\_switch(unsigned long ss, unsigned long esp)} 
  20.266 +
  20.267 +Request kernel stack switch from hypervisor; {\tt ss} is the new 
  20.268 +stack segment, which {\tt esp} is the new stack pointer. 
  20.269 +
  20.270 +\end{quote} 
  20.271 +
  20.272 +A final useful hypercall for context switching allows ``lazy'' 
  20.273 +save and restore of floating point state: 
  20.274 +
  20.275 +\begin{quote}
  20.276 +\hypercall{fpu\_taskswitch(void)} 
  20.277 +
  20.278 +This call instructs Xen to set the {\tt TS} bit in the {\tt cr0}
  20.279 +control register; this means that the next attempt to use floating
  20.280 +point will cause a trap which the guest OS can trap. Typically it will
  20.281 +then save/restore the FP state, and clear the {\tt TS} bit. 
  20.282 +\end{quote} 
  20.283 +
  20.284 +This is provided as an optimization only; guest OSes can also choose
  20.285 +to save and restore FP state on all context switches for simplicity. 
  20.286 +
  20.287 +
  20.288 +\section{Physical Memory Management}
  20.289 +
  20.290 +As mentioned previously, each domain has a maximum and current 
  20.291 +memory allocation. The maximum allocation, set at domain creation 
  20.292 +time, cannot be modified. However a domain can choose to reduce 
  20.293 +and subsequently grow its current allocation by using the
  20.294 +following call: 
  20.295 +
  20.296 +\begin{quote} 
  20.297 +\hypercall{dom\_mem\_op(unsigned int op, unsigned long *extent\_list,
  20.298 +  unsigned long nr\_extents, unsigned int extent\_order)}
  20.299 +
  20.300 +Increase or decrease current memory allocation (as determined by 
  20.301 +the value of {\tt op}). Each invocation provides a list of 
  20.302 +extents each of which is $2^s$ pages in size, 
  20.303 +where $s$ is the value of {\tt extent\_order}. 
  20.304 +
  20.305 +\end{quote} 
  20.306 +
  20.307 +In addition to simply reducing or increasing the current memory
  20.308 +allocation via a `balloon driver', this call is also useful for 
  20.309 +obtaining contiguous regions of machine memory when required (e.g. 
  20.310 +for certain PCI devices, or if using superpages).  
  20.311 +
  20.312 +
  20.313 +\section{Inter-Domain Communication}
  20.314 +\label{s:idc} 
  20.315 +
  20.316 +Xen provides a simple asynchronous notification mechanism via
  20.317 +\emph{event channels}. Each domain has a set of end-points (or
  20.318 +\emph{ports}) which may be bound to an event source (e.g. a physical
  20.319 +IRQ, a virtual IRQ, or an port in another domain). When a pair of
  20.320 +end-points in two different domains are bound together, then a `send'
  20.321 +operation on one will cause an event to be received by the destination
  20.322 +domain.
  20.323 +
  20.324 +The control and use of event channels involves the following hypercall: 
  20.325 +
  20.326 +\begin{quote}
  20.327 +\hypercall{event\_channel\_op(evtchn\_op\_t *op)} 
  20.328 +
  20.329 +Inter-domain event-channel management; {\tt op} is a discriminated 
  20.330 +union which allows the following 7 operations: 
  20.331 +
  20.332 +\begin{description} 
  20.333 +
  20.334 +\item[\it alloc\_unbound:] allocate a free (unbound) local
  20.335 +  port and prepare for connection from a specified domain. 
  20.336 +\item[\it bind\_virq:] bind a local port to a virtual 
  20.337 +IRQ; any particular VIRQ can be bound to at most one port per domain. 
  20.338 +\item[\it bind\_pirq:] bind a local port to a physical IRQ;
  20.339 +once more, a given pIRQ can be bound to at most one port per
  20.340 +domain. Furthermore the calling domain must be sufficiently
  20.341 +privileged.
  20.342 +\item[\it bind\_interdomain:] construct an interdomain event 
  20.343 +channel; in general, the target domain must have previously allocated 
  20.344 +an unbound port for this channel, although this can be bypassed by 
  20.345 +privileged domains during domain setup. 
  20.346 +\item[\it close:] close an interdomain event channel. 
  20.347 +\item[\it send:] send an event to the remote end of a 
  20.348 +interdomain event channel. 
  20.349 +\item[\it status:] determine the current status of a local port. 
  20.350 +\end{description} 
  20.351 +
  20.352 +For more details see
  20.353 +{\tt xen/include/public/event\_channel.h}. 
  20.354 +
  20.355 +\end{quote} 
  20.356 +
  20.357 +Event channels are the fundamental communication primitive between 
  20.358 +Xen domains and seamlessly support SMP. However they provide little
  20.359 +bandwidth for communication {\sl per se}, and hence are typically 
  20.360 +married with a piece of shared memory to produce effective and 
  20.361 +high-performance inter-domain communication. 
  20.362 +
  20.363 +Safe sharing of memory pages between guest OSes is carried out by
  20.364 +granting access on a per page basis to individual domains. This is
  20.365 +achieved by using the {\tt grant\_table\_op()} hypercall.
  20.366 +
  20.367 +\begin{quote}
  20.368 +\hypercall{grant\_table\_op(unsigned int cmd, void *uop, unsigned int count)}
  20.369 +
  20.370 +Grant or remove access to a particular page to a particular domain. 
  20.371 +
  20.372 +\end{quote} 
  20.373 +
  20.374 +This is not currently widely in use by guest operating systems, but 
  20.375 +we intend to integrate support more fully in the near future. 
  20.376 +
  20.377 +\section{PCI Configuration} 
  20.378 +
  20.379 +Domains with physical device access (i.e.\ driver domains) receive
  20.380 +limited access to certain PCI devices (bus address space and
  20.381 +interrupts). However many guest operating systems attempt to 
  20.382 +determine the PCI configuration by directly access the PCI BIOS, 
  20.383 +which cannot be allowed for safety. 
  20.384 +
  20.385 +Instead, Xen provides the following hypercall: 
  20.386 +
  20.387 +\begin{quote}
  20.388 +\hypercall{physdev\_op(void *physdev\_op)}
  20.389 +
  20.390 +Perform a PCI configuration option; depending on the value 
  20.391 +of {\tt physdev\_op} this can be a PCI config read, a PCI config 
  20.392 +write, or a small number of other queries. 
  20.393 +
  20.394 +\end{quote} 
  20.395 +
  20.396 +
  20.397 +For examples of using {\tt physdev\_op()}, see the 
  20.398 +Xen-specific PCI code in the linux sparse tree. 
  20.399 +
  20.400 +\section{Administrative Operations}
  20.401 +\label{s:dom0ops}
  20.402 +
  20.403 +A large number of control operations are available to a sufficiently
  20.404 +privileged domain (typically domain 0). These allow the creation and
  20.405 +management of new domains, for example. A complete list is given 
  20.406 +below: for more details on any or all of these, please see 
  20.407 +{\tt xen/include/public/dom0\_ops.h} 
  20.408 +
  20.409 +
  20.410 +\begin{quote}
  20.411 +\hypercall{dom0\_op(dom0\_op\_t *op)} 
  20.412 +
  20.413 +Administrative domain operations for domain management. The options are:
  20.414 +
  20.415 +\begin{description} 
  20.416 +\item [\it DOM0\_CREATEDOMAIN:] create a new domain
  20.417 +
  20.418 +\item [\it DOM0\_PAUSEDOMAIN:] remove a domain from the scheduler run 
  20.419 +queue. 
  20.420 +
  20.421 +\item [\it DOM0\_UNPAUSEDOMAIN:] mark a paused domain as schedulable
  20.422 +  once again. 
  20.423 +
  20.424 +\item [\it DOM0\_DESTROYDOMAIN:] deallocate all resources associated
  20.425 +with a domain
  20.426 +
  20.427 +\item [\it DOM0\_GETMEMLIST:] get list of pages used by the domain
  20.428 +
  20.429 +\item [\it DOM0\_SCHEDCTL:]
  20.430 +
  20.431 +\item [\it DOM0\_ADJUSTDOM:] adjust scheduling priorities for domain
  20.432 +
  20.433 +\item [\it DOM0\_BUILDDOMAIN:] do final guest OS setup for domain
  20.434 +
  20.435 +\item [\it DOM0\_GETDOMAINFO:] get statistics about the domain
  20.436 +
  20.437 +\item [\it DOM0\_GETPAGEFRAMEINFO:] 
  20.438 +
  20.439 +\item [\it DOM0\_GETPAGEFRAMEINFO2:]
  20.440 +
  20.441 +\item [\it DOM0\_IOPL:] set I/O privilege level
  20.442 +
  20.443 +\item [\it DOM0\_MSR:] read or write model specific registers
  20.444 +
  20.445 +\item [\it DOM0\_DEBUG:] interactively invoke the debugger
  20.446 +
  20.447 +\item [\it DOM0\_SETTIME:] set system time
  20.448 +
  20.449 +\item [\it DOM0\_READCONSOLE:] read console content from hypervisor buffer ring
  20.450 +
  20.451 +\item [\it DOM0\_PINCPUDOMAIN:] pin domain to a particular CPU
  20.452 +
  20.453 +\item [\it DOM0\_GETTBUFS:] get information about the size and location of
  20.454 +                      the trace buffers (only on trace-buffer enabled builds)
  20.455 +
  20.456 +\item [\it DOM0\_PHYSINFO:] get information about the host machine
  20.457 +
  20.458 +\item [\it DOM0\_PCIDEV\_ACCESS:] modify PCI device access permissions
  20.459 +
  20.460 +\item [\it DOM0\_SCHED\_ID:] get the ID of the current Xen scheduler
  20.461 +
  20.462 +\item [\it DOM0\_SHADOW\_CONTROL:] switch between shadow page-table modes
  20.463 +
  20.464 +\item [\it DOM0\_SETDOMAININITIALMEM:] set initial memory allocation of a domain
  20.465 +
  20.466 +\item [\it DOM0\_SETDOMAINMAXMEM:] set maximum memory allocation of a domain
  20.467 +
  20.468 +\item [\it DOM0\_SETDOMAINVMASSIST:] set domain VM assist options
  20.469 +\end{description} 
  20.470 +\end{quote} 
  20.471 +
  20.472 +Most of the above are best understood by looking at the code 
  20.473 +implementing them (in {\tt xen/common/dom0\_ops.c}) and in 
  20.474 +the user-space tools that use them (mostly in {\tt tools/libxc}). 
  20.475 +
  20.476 +\section{Debugging Hypercalls} 
  20.477 +
  20.478 +A few additional hypercalls are mainly useful for debugging: 
  20.479 +
  20.480 +\begin{quote} 
  20.481 +\hypercall{console\_io(int cmd, int count, char *str)}
  20.482 +
  20.483 +Use Xen to interact with the console; operations are:
  20.484 +
  20.485 +{\it CONSOLEIO\_write}: Output count characters from buffer str.
  20.486 +
  20.487 +{\it CONSOLEIO\_read}: Input at most count characters into buffer str.
  20.488 +\end{quote} 
  20.489 +
  20.490 +A pair of hypercalls allows access to the underlying debug registers: 
  20.491 +\begin{quote}
  20.492 +\hypercall{set\_debugreg(int reg, unsigned long value)}
  20.493 +
  20.494 +Set debug register {\tt reg} to {\tt value} 
  20.495 +
  20.496 +\hypercall{get\_debugreg(int reg)}
  20.497 +
  20.498 +Return the contents of the debug register {\tt reg}
  20.499 +\end{quote}
  20.500 +
  20.501 +And finally: 
  20.502 +\begin{quote}
  20.503 +\hypercall{xen\_version(int cmd)}
  20.504 +
  20.505 +Request Xen version number.
  20.506 +\end{quote} 
  20.507 +
  20.508 +This is useful to ensure that user-space tools are in sync 
  20.509 +with the underlying hypervisor. 
  20.510 +
  20.511 +\section{Deprecated Hypercalls}
  20.512 +
  20.513 +Xen is under constant development and refinement; as such there 
  20.514 +are plans to improve the way in which various pieces of functionality 
  20.515 +are exposed to guest OSes. 
  20.516 +
  20.517 +\begin{quote} 
  20.518 +\hypercall{vm\_assist(unsigned int cmd, unsigned int type)}
  20.519 +
  20.520 +Toggle various memory management modes (in particular wrritable page
  20.521 +tables and superpage support). 
  20.522 +
  20.523 +\end{quote} 
  20.524 +
  20.525 +This is likely to be replaced with mode values in the shared 
  20.526 +information page since this is more resilient for resumption 
  20.527 +after migration or checkpoint. 
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/docs/src/interface/memory.tex	Thu Sep 22 11:42:01 2005 -0600
    21.3 @@ -0,0 +1,162 @@
    21.4 +\chapter{Memory}
    21.5 +\label{c:memory} 
    21.6 +
    21.7 +Xen is responsible for managing the allocation of physical memory to
    21.8 +domains, and for ensuring safe use of the paging and segmentation
    21.9 +hardware.
   21.10 +
   21.11 +
   21.12 +\section{Memory Allocation}
   21.13 +
   21.14 +Xen resides within a small fixed portion of physical memory; it also
   21.15 +reserves the top 64MB of every virtual address space. The remaining
   21.16 +physical memory is available for allocation to domains at a page
   21.17 +granularity.  Xen tracks the ownership and use of each page, which
   21.18 +allows it to enforce secure partitioning between domains.
   21.19 +
   21.20 +Each domain has a maximum and current physical memory allocation.  A
   21.21 +guest OS may run a `balloon driver' to dynamically adjust its current
   21.22 +memory allocation up to its limit.
   21.23 +
   21.24 +
   21.25 +%% XXX SMH: I use machine and physical in the next section (which is
   21.26 +%% kinda required for consistency with code); wonder if this section
   21.27 +%% should use same terms?
   21.28 +%%
   21.29 +%% Probably. 
   21.30 +%%
   21.31 +%% Merging this and below section at some point prob makes sense.
   21.32 +
   21.33 +\section{Pseudo-Physical Memory}
   21.34 +
   21.35 +Since physical memory is allocated and freed on a page granularity,
   21.36 +there is no guarantee that a domain will receive a contiguous stretch
   21.37 +of physical memory. However most operating systems do not have good
   21.38 +support for operating in a fragmented physical address space. To aid
   21.39 +porting such operating systems to run on top of Xen, we make a
   21.40 +distinction between \emph{machine memory} and \emph{pseudo-physical
   21.41 +  memory}.
   21.42 +
   21.43 +Put simply, machine memory refers to the entire amount of memory
   21.44 +installed in the machine, including that reserved by Xen, in use by
   21.45 +various domains, or currently unallocated. We consider machine memory
   21.46 +to comprise a set of 4K \emph{machine page frames} numbered
   21.47 +consecutively starting from 0. Machine frame numbers mean the same
   21.48 +within Xen or any domain.
   21.49 +
   21.50 +Pseudo-physical memory, on the other hand, is a per-domain
   21.51 +abstraction. It allows a guest operating system to consider its memory
   21.52 +allocation to consist of a contiguous range of physical page frames
   21.53 +starting at physical frame 0, despite the fact that the underlying
   21.54 +machine page frames may be sparsely allocated and in any order.
   21.55 +
   21.56 +To achieve this, Xen maintains a globally readable {\it
   21.57 +  machine-to-physical} table which records the mapping from machine
   21.58 +page frames to pseudo-physical ones. In addition, each domain is
   21.59 +supplied with a {\it physical-to-machine} table which performs the
   21.60 +inverse mapping. Clearly the machine-to-physical table has size
   21.61 +proportional to the amount of RAM installed in the machine, while each
   21.62 +physical-to-machine table has size proportional to the memory
   21.63 +allocation of the given domain.
   21.64 +
   21.65 +Architecture dependent code in guest operating systems can then use
   21.66 +the two tables to provide the abstraction of pseudo-physical memory.
   21.67 +In general, only certain specialized parts of the operating system
   21.68 +(such as page table management) needs to understand the difference
   21.69 +between machine and pseudo-physical addresses.
   21.70 +
   21.71 +
   21.72 +\section{Page Table Updates}
   21.73 +
   21.74 +In the default mode of operation, Xen enforces read-only access to
   21.75 +page tables and requires guest operating systems to explicitly request
   21.76 +any modifications.  Xen validates all such requests and only applies
   21.77 +updates that it deems safe.  This is necessary to prevent domains from
   21.78 +adding arbitrary mappings to their page tables.
   21.79 +
   21.80 +To aid validation, Xen associates a type and reference count with each
   21.81 +memory page. A page has one of the following mutually-exclusive types
   21.82 +at any point in time: page directory ({\sf PD}), page table ({\sf
   21.83 +  PT}), local descriptor table ({\sf LDT}), global descriptor table
   21.84 +({\sf GDT}), or writable ({\sf RW}). Note that a guest OS may always
   21.85 +create readable mappings of its own memory regardless of its current
   21.86 +type.
   21.87 +
   21.88 +%%% XXX: possibly explain more about ref count 'lifecyle' here?
   21.89 +This mechanism is used to maintain the invariants required for safety;
   21.90 +for example, a domain cannot have a writable mapping to any part of a
   21.91 +page table as this would require the page concerned to simultaneously
   21.92 +be of types {\sf PT} and {\sf RW}.
   21.93 +
   21.94 +
   21.95 +% \section{Writable Page Tables}
   21.96 +
   21.97 +Xen also provides an alternative mode of operation in which guests be
   21.98 +have the illusion that their page tables are directly writable.  Of
   21.99 +course this is not really the case, since Xen must still validate
  21.100 +modifications to ensure secure partitioning. To this end, Xen traps
  21.101 +any write attempt to a memory page of type {\sf PT} (i.e., that is
  21.102 +currently part of a page table).  If such an access occurs, Xen
  21.103 +temporarily allows write access to that page while at the same time
  21.104 +\emph{disconnecting} it from the page table that is currently in use.
  21.105 +This allows the guest to safely make updates to the page because the
  21.106 +newly-updated entries cannot be used by the MMU until Xen revalidates
  21.107 +and reconnects the page.  Reconnection occurs automatically in a
  21.108 +number of situations: for example, when the guest modifies a different
  21.109 +page-table page, when the domain is preempted, or whenever the guest
  21.110 +uses Xen's explicit page-table update interfaces.
  21.111 +
  21.112 +Finally, Xen also supports a form of \emph{shadow page tables} in
  21.113 +which the guest OS uses a independent copy of page tables which are
  21.114 +unknown to the hardware (i.e.\ which are never pointed to by {\tt
  21.115 +  cr3}). Instead Xen propagates changes made to the guest's tables to
  21.116 +the real ones, and vice versa. This is useful for logging page writes
  21.117 +(e.g.\ for live migration or checkpoint). A full version of the shadow
  21.118 +page tables also allows guest OS porting with less effort.
  21.119 +
  21.120 +
  21.121 +\section{Segment Descriptor Tables}
  21.122 +
  21.123 +On boot a guest is supplied with a default GDT, which does not reside
  21.124 +within its own memory allocation.  If the guest wishes to use other
  21.125 +than the default `flat' ring-1 and ring-3 segments that this GDT
  21.126 +provides, it must register a custom GDT and/or LDT with Xen, allocated
  21.127 +from its own memory. Note that a number of GDT entries are reserved by
  21.128 +Xen -- any custom GDT must also include sufficient space for these
  21.129 +entries.
  21.130 +
  21.131 +For example, the following hypercall is used to specify a new GDT:
  21.132 +
  21.133 +\begin{quote}
  21.134 +  int {\bf set\_gdt}(unsigned long *{\em frame\_list}, int {\em
  21.135 +    entries})
  21.136 +
  21.137 +  \emph{frame\_list}: An array of up to 16 machine page frames within
  21.138 +  which the GDT resides.  Any frame registered as a GDT frame may only
  21.139 +  be mapped read-only within the guest's address space (e.g., no
  21.140 +  writable mappings, no use as a page-table page, and so on).
  21.141 +
  21.142 +  \emph{entries}: The number of descriptor-entry slots in the GDT.
  21.143 +  Note that the table must be large enough to contain Xen's reserved
  21.144 +  entries; thus we must have `{\em entries $>$
  21.145 +    LAST\_RESERVED\_GDT\_ENTRY}\ '.  Note also that, after registering
  21.146 +  the GDT, slots \emph{FIRST\_} through
  21.147 +  \emph{LAST\_RESERVED\_GDT\_ENTRY} are no longer usable by the guest
  21.148 +  and may be overwritten by Xen.
  21.149 +\end{quote}
  21.150 +
  21.151 +The LDT is updated via the generic MMU update mechanism (i.e., via the
  21.152 +{\tt mmu\_update()} hypercall.
  21.153 +
  21.154 +\section{Start of Day}
  21.155 +
  21.156 +The start-of-day environment for guest operating systems is rather
  21.157 +different to that provided by the underlying hardware. In particular,
  21.158 +the processor is already executing in protected mode with paging
  21.159 +enabled.
  21.160 +
  21.161 +{\it Domain 0} is created and booted by Xen itself. For all subsequent
  21.162 +domains, the analogue of the boot-loader is the {\it domain builder},
  21.163 +user-space software running in {\it domain 0}. The domain builder is
  21.164 +responsible for building the initial page tables for a domain and
  21.165 +loading its kernel image at the appropriate virtual address.
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/docs/src/interface/scheduling.tex	Thu Sep 22 11:42:01 2005 -0600
    22.3 @@ -0,0 +1,268 @@
    22.4 +\chapter{Scheduling API}  
    22.5 +
    22.6 +The scheduling API is used by both the schedulers described above and should
    22.7 +also be used by any new schedulers.  It provides a generic interface and also
    22.8 +implements much of the ``boilerplate'' code.
    22.9 +
   22.10 +Schedulers conforming to this API are described by the following
   22.11 +structure:
   22.12 +
   22.13 +\begin{verbatim}
   22.14 +struct scheduler
   22.15 +{
   22.16 +    char *name;             /* full name for this scheduler      */
   22.17 +    char *opt_name;         /* option name for this scheduler    */
   22.18 +    unsigned int sched_id;  /* ID for this scheduler             */
   22.19 +
   22.20 +    int          (*init_scheduler) ();
   22.21 +    int          (*alloc_task)     (struct task_struct *);
   22.22 +    void         (*add_task)       (struct task_struct *);
   22.23 +    void         (*free_task)      (struct task_struct *);
   22.24 +    void         (*rem_task)       (struct task_struct *);
   22.25 +    void         (*wake_up)        (struct task_struct *);
   22.26 +    void         (*do_block)       (struct task_struct *);
   22.27 +    task_slice_t (*do_schedule)    (s_time_t);
   22.28 +    int          (*control)        (struct sched_ctl_cmd *);
   22.29 +    int          (*adjdom)         (struct task_struct *,
   22.30 +                                    struct sched_adjdom_cmd *);
   22.31 +    s32          (*reschedule)     (struct task_struct *);
   22.32 +    void         (*dump_settings)  (void);
   22.33 +    void         (*dump_cpu_state) (int);
   22.34 +    void         (*dump_runq_el)   (struct task_struct *);
   22.35 +};
   22.36 +\end{verbatim}
   22.37 +
   22.38 +The only method that {\em must} be implemented is
   22.39 +{\tt do\_schedule()}.  However, if there is not some implementation for the
   22.40 +{\tt wake\_up()} method then waking tasks will not get put on the runqueue!
   22.41 +
   22.42 +The fields of the above structure are described in more detail below.
   22.43 +
   22.44 +\subsubsection{name}
   22.45 +
   22.46 +The name field should point to a descriptive ASCII string.
   22.47 +
   22.48 +\subsubsection{opt\_name}
   22.49 +
   22.50 +This field is the value of the {\tt sched=} boot-time option that will select
   22.51 +this scheduler.
   22.52 +
   22.53 +\subsubsection{sched\_id}
   22.54 +
   22.55 +This is an integer that uniquely identifies this scheduler.  There should be a
   22.56 +macro corrsponding to this scheduler ID in {\tt <xen/sched-if.h>}.
   22.57 +
   22.58 +\subsubsection{init\_scheduler}
   22.59 +
   22.60 +\paragraph*{Purpose}
   22.61 +
   22.62 +This is a function for performing any scheduler-specific initialisation.  For
   22.63 +instance, it might allocate memory for per-CPU scheduler data and initialise it
   22.64 +appropriately.
   22.65 +
   22.66 +\paragraph*{Call environment}
   22.67 +
   22.68 +This function is called after the initialisation performed by the generic
   22.69 +layer.  The function is called exactly once, for the scheduler that has been
   22.70 +selected.
   22.71 +
   22.72 +\paragraph*{Return values}
   22.73 +
   22.74 +This should return negative on failure --- this will cause an
   22.75 +immediate panic and the system will fail to boot.
   22.76 +
   22.77 +\subsubsection{alloc\_task}
   22.78 +
   22.79 +\paragraph*{Purpose}
   22.80 +Called when a {\tt task\_struct} is allocated by the generic scheduler
   22.81 +layer.  A particular scheduler implementation may use this method to
   22.82 +allocate per-task data for this task.  It may use the {\tt
   22.83 +sched\_priv} pointer in the {\tt task\_struct} to point to this data.
   22.84 +
   22.85 +\paragraph*{Call environment}
   22.86 +The generic layer guarantees that the {\tt sched\_priv} field will
   22.87 +remain intact from the time this method is called until the task is
   22.88 +deallocated (so long as the scheduler implementation does not change
   22.89 +it explicitly!).
   22.90 +
   22.91 +\paragraph*{Return values}
   22.92 +Negative on failure.
   22.93 +
   22.94 +\subsubsection{add\_task}
   22.95 +
   22.96 +\paragraph*{Purpose}
   22.97 +
   22.98 +Called when a task is initially added by the generic layer.
   22.99 +
  22.100 +\paragraph*{Call environment}
  22.101 +
  22.102 +The fields in the {\tt task\_struct} are now filled out and available for use.
  22.103 +Schedulers should implement appropriate initialisation of any per-task private
  22.104 +information in this method.
  22.105 +
  22.106 +\subsubsection{free\_task}
  22.107 +
  22.108 +\paragraph*{Purpose}
  22.109 +
  22.110 +Schedulers should free the space used by any associated private data
  22.111 +structures.
  22.112 +
  22.113 +\paragraph*{Call environment}
  22.114 +
  22.115 +This is called when a {\tt task\_struct} is about to be deallocated.
  22.116 +The generic layer will have done generic task removal operations and
  22.117 +(if implemented) called the scheduler's {\tt rem\_task} method before
  22.118 +this method is called.
  22.119 +
  22.120 +\subsubsection{rem\_task}
  22.121 +
  22.122 +\paragraph*{Purpose}
  22.123 +
  22.124 +This is called when a task is being removed from scheduling (but is
  22.125 +not yet being freed).
  22.126 +
  22.127 +\subsubsection{wake\_up}
  22.128 +
  22.129 +\paragraph*{Purpose}
  22.130 +
  22.131 +Called when a task is woken up, this method should put the task on the runqueue
  22.132 +(or do the scheduler-specific equivalent action).
  22.133 +
  22.134 +\paragraph*{Call environment}
  22.135 +
  22.136 +The task is already set to state RUNNING.
  22.137 +
  22.138 +\subsubsection{do\_block}
  22.139 +
  22.140 +\paragraph*{Purpose}
  22.141 +
  22.142 +This function is called when a task is blocked.  This function should
  22.143 +not remove the task from the runqueue.
  22.144 +
  22.145 +\paragraph*{Call environment}
  22.146 +
  22.147 +The EVENTS\_MASTER\_ENABLE\_BIT is already set and the task state changed to
  22.148 +TASK\_INTERRUPTIBLE on entry to this method.  A call to the {\tt
  22.149 +  do\_schedule} method will be made after this method returns, in
  22.150 +order to select the next task to run.
  22.151 +
  22.152 +\subsubsection{do\_schedule}
  22.153 +
  22.154 +This method must be implemented.
  22.155 +
  22.156 +\paragraph*{Purpose}
  22.157 +
  22.158 +The method is called each time a new task must be chosen for scheduling on the
  22.159 +current CPU.  The current time as passed as the single argument (the current
  22.160 +task can be found using the {\tt current} macro).
  22.161 +
  22.162 +This method should select the next task to run on this CPU and set it's minimum
  22.163 +time to run as well as returning the data described below.
  22.164 +
  22.165 +This method should also take the appropriate action if the previous
  22.166 +task has blocked, e.g. removing it from the runqueue.
  22.167 +
  22.168 +\paragraph*{Call environment}
  22.169 +
  22.170 +The other fields in the {\tt task\_struct} are updated by the generic layer,
  22.171 +which also performs all Xen-specific tasks and performs the actual task switch
  22.172 +(unless the previous task has been chosen again).
  22.173 +
  22.174 +This method is called with the {\tt schedule\_lock} held for the current CPU
  22.175 +and local interrupts disabled.
  22.176 +
  22.177 +\paragraph*{Return values}
  22.178 +
  22.179 +Must return a {\tt struct task\_slice} describing what task to run and how long
  22.180 +for (at maximum).
  22.181 +
  22.182 +\subsubsection{control}
  22.183 +
  22.184 +\paragraph*{Purpose}
  22.185 +
  22.186 +This method is called for global scheduler control operations.  It takes a
  22.187 +pointer to a {\tt struct sched\_ctl\_cmd}, which it should either
  22.188 +source data from or populate with data, depending on the value of the
  22.189 +{\tt direction} field.
  22.190 +
  22.191 +\paragraph*{Call environment}
  22.192 +
  22.193 +The generic layer guarantees that when this method is called, the
  22.194 +caller selected the correct scheduler ID, hence the scheduler's
  22.195 +implementation does not need to sanity-check these parts of the call.
  22.196 +
  22.197 +\paragraph*{Return values}
  22.198 +
  22.199 +This function should return the value to be passed back to user space, hence it
  22.200 +should either be 0 or an appropriate errno value.
  22.201 +
  22.202 +\subsubsection{sched\_adjdom}
  22.203 +
  22.204 +\paragraph*{Purpose}
  22.205 +
  22.206 +This method is called to adjust the scheduling parameters of a particular
  22.207 +domain, or to query their current values.  The function should check
  22.208 +the {\tt direction} field of the {\tt sched\_adjdom\_cmd} it receives in
  22.209 +order to determine which of these operations is being performed.
  22.210 +
  22.211 +\paragraph*{Call environment}
  22.212 +
  22.213 +The generic layer guarantees that the caller has specified the correct
  22.214 +control interface version and scheduler ID and that the supplied {\tt
  22.215 +task\_struct} will not be deallocated during the call (hence it is not
  22.216 +necessary to {\tt get\_task\_struct}).
  22.217 +
  22.218 +\paragraph*{Return values}
  22.219 +
  22.220 +This function should return the value to be passed back to user space, hence it
  22.221 +should either be 0 or an appropriate errno value.
  22.222 +
  22.223 +\subsubsection{reschedule}
  22.224 +
  22.225 +\paragraph*{Purpose}
  22.226 +
  22.227 +This method is called to determine if a reschedule is required as a result of a
  22.228 +particular task.
  22.229 +
  22.230 +\paragraph*{Call environment}
  22.231 +The generic layer will cause a reschedule if the current domain is the idle
  22.232 +task or it has exceeded its minimum time slice before a reschedule.  The
  22.233 +generic layer guarantees that the task passed is not currently running but is
  22.234 +on the runqueue.
  22.235 +
  22.236 +\paragraph*{Return values}
  22.237 +
  22.238 +Should return a mask of CPUs to cause a reschedule on.
  22.239 +
  22.240 +\subsubsection{dump\_settings}
  22.241 +
  22.242 +\paragraph*{Purpose}
  22.243 +
  22.244 +If implemented, this should dump any private global settings for this
  22.245 +scheduler to the console.
  22.246 +
  22.247 +\paragraph*{Call environment}
  22.248 +
  22.249 +This function is called with interrupts enabled.
  22.250 +
  22.251 +\subsubsection{dump\_cpu\_state}
  22.252 +
  22.253 +\paragraph*{Purpose}
  22.254 +
  22.255 +This method should dump any private settings for the specified CPU.
  22.256 +
  22.257 +\paragraph*{Call environment}
  22.258 +
  22.259 +This function is called with interrupts disabled and the {\tt schedule\_lock}
  22.260 +for the specified CPU held.
  22.261 +
  22.262 +\subsubsection{dump\_runq\_el}
  22.263 +
  22.264 +\paragraph*{Purpose}
  22.265 +
  22.266 +This method should dump any private settings for the specified task.
  22.267 +
  22.268 +\paragraph*{Call environment}
  22.269 +
  22.270 +This function is called with interrupts disabled and the {\tt schedule\_lock}
  22.271 +for the task's CPU held.
    23.1 --- a/docs/src/user.tex	Thu Sep 22 11:34:14 2005 -0600
    23.2 +++ b/docs/src/user.tex	Thu Sep 22 11:42:01 2005 -0600
    23.3 @@ -59,1803 +59,36 @@ Contributions of material, suggestions a
    23.4  \renewcommand{\floatpagefraction}{.8}
    23.5  \setstretch{1.1}
    23.6  
    23.7 +
    23.8  \part{Introduction and Tutorial}
    23.9 -\chapter{Introduction}
   23.10 -
   23.11 -Xen is a {\em paravirtualising} virtual machine monitor (VMM), or
   23.12 -`hypervisor', for the x86 processor architecture.  Xen can securely
   23.13 -execute multiple virtual machines on a single physical system with
   23.14 -close-to-native performance.  The virtual machine technology
   23.15 -facilitates enterprise-grade functionality, including:
   23.16 -
   23.17 -\begin{itemize}
   23.18 -\item Virtual machines with performance close to native
   23.19 -  hardware.
   23.20 -\item Live migration of running virtual machines between physical hosts.
   23.21 -\item Excellent hardware support (supports most Linux device drivers).
   23.22 -\item Sandboxed, restartable device drivers.
   23.23 -\end{itemize}
   23.24 -
   23.25 -Paravirtualisation permits very high performance virtualisation,
   23.26 -even on architectures like x86 that are traditionally
   23.27 -very hard to virtualise.
   23.28 -The drawback of this approach is that it requires operating systems to
   23.29 -be {\em ported} to run on Xen.  Porting an OS to run on Xen is similar
   23.30 -to supporting a new hardware platform, however the process
   23.31 -is simplified because the paravirtual machine architecture is very
   23.32 -similar to the underlying native hardware. Even though operating system
   23.33 -kernels must explicitly support Xen, a key feature is that user space
   23.34 -applications and libraries {\em do not} require modification.
   23.35 -
   23.36 -Xen support is available for increasingly many operating systems:
   23.37 -right now, Linux 2.4, Linux 2.6 and NetBSD are available for Xen 2.0.
   23.38 -A FreeBSD port is undergoing testing and will be incorporated into the
   23.39 -release soon. Other OS ports, including Plan 9, are in progress.  We
   23.40 -hope that that arch-xen patches will be incorporated into the
   23.41 -mainstream releases of these operating systems in due course (as has
   23.42 -already happened for NetBSD).
   23.43 -
   23.44 -Possible usage scenarios for Xen include:
   23.45 -\begin{description}
   23.46 -\item [Kernel development.] Test and debug kernel modifications in a
   23.47 -      sandboxed virtual machine --- no need for a separate test
   23.48 -      machine.
   23.49 -\item [Multiple OS configurations.] Run multiple operating systems
   23.50 -      simultaneously, for instance for compatibility or QA purposes.
   23.51 -\item [Server consolidation.] Move multiple servers onto a single
   23.52 -      physical host with performance and fault isolation provided at
   23.53 -      virtual machine boundaries. 
   23.54 -\item [Cluster computing.] Management at VM granularity provides more
   23.55 -      flexibility than separately managing each physical host, but
   23.56 -      better control and isolation than single-system image solutions, 
   23.57 -      particularly by using live migration for load balancing. 
   23.58 -\item [Hardware support for custom OSes.] Allow development of new OSes
   23.59 -      while benefiting from the wide-ranging hardware support of
   23.60 -      existing OSes such as Linux.
   23.61 -\end{description}
   23.62 -
   23.63 -\section{Structure of a Xen-Based System}
   23.64 -
   23.65 -A Xen system has multiple layers, the lowest and most privileged of
   23.66 -which is Xen itself. 
   23.67 -Xen in turn may host multiple {\em guest} operating systems, each of
   23.68 -which is executed within a secure virtual machine (in Xen terminology,
   23.69 -a {\em domain}). Domains are scheduled by Xen to make effective use of
   23.70 -the available physical CPUs.  Each guest OS manages its own
   23.71 -applications, which includes responsibility for scheduling each
   23.72 -application within the time allotted to the VM by Xen.
   23.73 -
   23.74 -The first domain, {\em domain 0}, is created automatically when the
   23.75 -system boots and has special management privileges. Domain 0 builds
   23.76 -other domains and manages their virtual devices. It also performs
   23.77 -administrative tasks such as suspending, resuming and migrating other
   23.78 -virtual machines.
   23.79 -
   23.80 -Within domain 0, a process called \emph{xend} runs to manage the system.
   23.81 -\Xend is responsible for managing virtual machines and providing access
   23.82 -to their consoles.  Commands are issued to \xend over an HTTP
   23.83 -interface, either from a command-line tool or from a web browser.
   23.84 -
   23.85 -\section{Hardware Support}
   23.86 -
   23.87 -Xen currently runs only on the x86 architecture, requiring a `P6' or
   23.88 -newer processor (e.g. Pentium Pro, Celeron, Pentium II, Pentium III,
   23.89 -Pentium IV, Xeon, AMD Athlon, AMD Duron).  Multiprocessor machines are
   23.90 -supported, and we also have basic support for HyperThreading (SMT),
   23.91 -although this remains a topic for ongoing research. A port
   23.92 -specifically for x86/64 is in progress, although Xen already runs on
   23.93 -such systems in 32-bit legacy mode. In addition a port to the IA64
   23.94 -architecture is approaching completion. We hope to add other
   23.95 -architectures such as PPC and ARM in due course.
   23.96 -
   23.97 -
   23.98 -Xen can currently use up to 4GB of memory.  It is possible for x86
   23.99 -machines to address up to 64GB of physical memory but there are no
  23.100 -current plans to support these systems: The x86/64 port is the
  23.101 -planned route to supporting larger memory sizes.
  23.102 -
  23.103 -Xen offloads most of the hardware support issues to the guest OS
  23.104 -running in Domain~0.  Xen itself contains only the code required to
  23.105 -detect and start secondary processors, set up interrupt routing, and
  23.106 -perform PCI bus enumeration.  Device drivers run within a privileged
  23.107 -guest OS rather than within Xen itself. This approach provides
  23.108 -compatibility with the majority of device hardware supported by Linux.
  23.109 -The default XenLinux build contains support for relatively modern
  23.110 -server-class network and disk hardware, but you can add support for
  23.111 -other hardware by configuring your XenLinux kernel in the normal way.
  23.112 -
  23.113 -\section{History}
  23.114 -
  23.115 -Xen was originally developed by the Systems Research Group at the
  23.116 -University of Cambridge Computer Laboratory as part of the XenoServers
  23.117 -project, funded by the UK-EPSRC.
  23.118 -XenoServers aim to provide a `public infrastructure for
  23.119 -global distributed computing', and Xen plays a key part in that,
  23.120 -allowing us to efficiently partition a single machine to enable
  23.121 -multiple independent clients to run their operating systems and
  23.122 -applications in an environment providing protection, resource
  23.123 -isolation and accounting.  The project web page contains further
  23.124 -information along with pointers to papers and technical reports:
  23.125 -\path{http://www.cl.cam.ac.uk/xeno} 
  23.126 -
  23.127 -Xen has since grown into a fully-fledged project in its own right,
  23.128 -enabling us to investigate interesting research issues regarding the
  23.129 -best techniques for virtualising resources such as the CPU, memory,
  23.130 -disk and network.  The project has been bolstered by support from
  23.131 -Intel Research Cambridge, and HP Labs, who are now working closely
  23.132 -with us.
  23.133 -
  23.134 -Xen was first described in a paper presented at SOSP in
  23.135 -2003\footnote{\tt
  23.136 -http://www.cl.cam.ac.uk/netos/papers/2003-xensosp.pdf}, and the first
  23.137 -public release (1.0) was made that October.  Since then, Xen has
  23.138 -significantly matured and is now used in production scenarios on
  23.139 -many sites.
  23.140 -
  23.141 -Xen 2.0 features greatly enhanced hardware support, configuration
  23.142 -flexibility, usability and a larger complement of supported operating
  23.143 -systems. This latest release takes Xen a step closer to becoming the 
  23.144 -definitive open source solution for virtualisation.
  23.145 -
  23.146 -\chapter{Installation}
  23.147 -
  23.148 -The Xen distribution includes three main components: Xen itself, ports
  23.149 -of Linux 2.4 and 2.6 and NetBSD to run on Xen, and the user-space
  23.150 -tools required to manage a Xen-based system.  This chapter describes
  23.151 -how to install the Xen 2.0 distribution from source.  Alternatively,
  23.152 -there may be pre-built packages available as part of your operating
  23.153 -system distribution.
  23.154 -
  23.155 -\section{Prerequisites}
  23.156 -\label{sec:prerequisites}
  23.157 -
  23.158 -The following is a full list of prerequisites.  Items marked `$\dag$'
  23.159 -are required by the \xend control tools, and hence required if you
  23.160 -want to run more than one virtual machine; items marked `$*$' are only
  23.161 -required if you wish to build from source.
  23.162 -\begin{itemize}
  23.163 -\item A working Linux distribution using the GRUB bootloader and
  23.164 -running on a P6-class (or newer) CPU.
  23.165 -\item [$\dag$] The \path{iproute2} package. 
  23.166 -\item [$\dag$] The Linux bridge-utils\footnote{Available from 
  23.167 -{\tt http://bridge.sourceforge.net}} (e.g., \path{/sbin/brctl})
  23.168 -\item [$\dag$] An installation of Twisted v1.3 or
  23.169 -above\footnote{Available from {\tt
  23.170 -http://www.twistedmatrix.com}}. There may be a binary package
  23.171 -available for your distribution; alternatively it can be installed by
  23.172 -running `{\sl make install-twisted}' in the root of the Xen source
  23.173 -tree.
  23.174 -\item [$*$] Build tools (gcc v3.2.x or v3.3.x, binutils, GNU make).
  23.175 -\item [$*$] Development installation of libcurl (e.g., libcurl-devel) 
  23.176 -\item [$*$] Development installation of zlib (e.g., zlib-dev).
  23.177 -\item [$*$] Development installation of Python v2.2 or later (e.g., python-dev).
  23.178 -\item [$*$] \LaTeX and transfig are required to build the documentation.
  23.179 -\end{itemize}
  23.180 -
  23.181 -Once you have satisfied the relevant prerequisites, you can 
  23.182 -now install either a binary or source distribution of Xen. 
  23.183 -
  23.184 -\section{Installing from Binary Tarball} 
  23.185 -
  23.186 -Pre-built tarballs are available for download from the Xen 
  23.187 -download page
  23.188 -\begin{quote} 
  23.189 -{\tt http://xen.sf.net}
  23.190 -\end{quote} 
  23.191 -
  23.192 -Once you've downloaded the tarball, simply unpack and install: 
  23.193 -\begin{verbatim}
  23.194 -# tar zxvf xen-2.0-install.tgz
  23.195 -# cd xen-2.0-install
  23.196 -# sh ./install.sh 
  23.197 -\end{verbatim} 
  23.198 -
  23.199 -Once you've installed the binaries you need to configure
  23.200 -your system as described in Section~\ref{s:configure}. 
  23.201 -
  23.202 -\section{Installing from Source} 
  23.203 -
  23.204 -This section describes how to obtain, build, and install 
  23.205 -Xen from source. 
  23.206 -
  23.207 -\subsection{Obtaining the Source} 
  23.208 -
  23.209 -The Xen source tree is available as either a compressed source tar
  23.210 -ball or as a clone of our master BitKeeper repository.
  23.211 -
  23.212 -\begin{description} 
  23.213 -\item[Obtaining the Source Tarball]\mbox{} \\  
  23.214 -Stable versions (and daily snapshots) of the Xen source tree are
  23.215 -available as compressed tarballs from the Xen download page
  23.216 -\begin{quote} 
  23.217 -{\tt http://xen.sf.net}
  23.218 -\end{quote} 
  23.219 -
  23.220 -\item[Using BitKeeper]\mbox{} \\  
  23.221 -If you wish to install Xen from a clone of our latest BitKeeper
  23.222 -repository then you will need to install the BitKeeper tools.
  23.223 -Download instructions for BitKeeper can be obtained by filling out the
  23.224 -form at:
  23.225 -
  23.226 -\begin{quote} 
  23.227 -{\tt http://www.bitmover.com/cgi-bin/download.cgi}
  23.228 -\end{quote}
  23.229 -The public master BK repository for the 2.0 release lives at: 
  23.230 -\begin{quote}
  23.231 -{\tt bk://xen.bkbits.net/xen-2.0.bk}  
  23.232 -\end{quote} 
  23.233 -You can use BitKeeper to
  23.234 -download it and keep it updated with the latest features and fixes.
  23.235 -
  23.236 -Change to the directory in which you want to put the source code, then
  23.237 -run:
  23.238 -\begin{verbatim}
  23.239 -# bk clone bk://xen.bkbits.net/xen-2.0.bk
  23.240 -\end{verbatim}
  23.241 -
  23.242 -Under your current directory, a new directory named \path{xen-2.0.bk}
  23.243 -has been created, which contains all the source code for Xen, the OS
  23.244 -ports, and the control tools. You can update your repository with the
  23.245 -latest changes at any time by running:
  23.246 -\begin{verbatim}
  23.247 -# cd xen-2.0.bk # to change into the local repository
  23.248 -# bk pull       # to update the repository
  23.249 -\end{verbatim}
  23.250 -\end{description} 
  23.251 -
  23.252 -%\section{The distribution}
  23.253 -%
  23.254 -%The Xen source code repository is structured as follows:
  23.255 -%
  23.256 -%\begin{description}
  23.257 -%\item[\path{tools/}] Xen node controller daemon (Xend), command line tools, 
  23.258 -%  control libraries
  23.259 -%\item[\path{xen/}] The Xen VMM.
  23.260 -%\item[\path{linux-*-xen-sparse/}] Xen support for Linux.
  23.261 -%\item[\path{linux-*-patches/}] Experimental patches for Linux.
  23.262 -%\item[\path{netbsd-*-xen-sparse/}] Xen support for NetBSD.
  23.263 -%\item[\path{docs/}] Various documentation files for users and developers.
  23.264 -%\item[\path{extras/}] Bonus extras.
  23.265 -%\end{description}
  23.266 -
  23.267 -\subsection{Building from Source} 
  23.268 -
  23.269 -The top-level Xen Makefile includes a target `world' that will do the
  23.270 -following:
  23.271 -
  23.272 -\begin{itemize}
  23.273 -\item Build Xen
  23.274 -\item Build the control tools, including \xend
  23.275 -\item Download (if necessary) and unpack the Linux 2.6 source code,
  23.276 -      and patch it for use with Xen
  23.277 -\item Build a Linux kernel to use in domain 0 and a smaller
  23.278 -      unprivileged kernel, which can optionally be used for
  23.279 -      unprivileged virtual machines.
  23.280 -\end{itemize}
  23.281 -
  23.282 -
  23.283 -After the build has completed you should have a top-level 
  23.284 -directory called \path{dist/} in which all resulting targets 
  23.285 -will be placed; of particular interest are the two kernels 
  23.286 -XenLinux kernel images, one with a `-xen0' extension
  23.287 -which contains hardware device drivers and drivers for Xen's virtual
  23.288 -devices, and one with a `-xenU' extension that just contains the
  23.289 -virtual ones. These are found in \path{dist/install/boot/} along
  23.290 -with the image for Xen itself and the configuration files used
  23.291 -during the build. 
  23.292  
  23.293 -The NetBSD port can be built using: 
  23.294 -\begin{quote}
  23.295 -\begin{verbatim}
  23.296 -# make netbsd20
  23.297 -\end{verbatim} 
  23.298 -\end{quote} 
  23.299 -NetBSD port is built using a snapshot of the netbsd-2-0 cvs branch.
  23.300 -The snapshot is downloaded as part of the build process, if it is not
  23.301 -yet present in the \path{NETBSD\_SRC\_PATH} search path.  The build
  23.302 -process also downloads a toolchain which includes all the tools
  23.303 -necessary to build the NetBSD kernel under Linux.
  23.304 -
  23.305 -To customize further the set of kernels built you need to edit
  23.306 -the top-level Makefile. Look for the line: 
  23.307 -
  23.308 -\begin{quote}
  23.309 -\begin{verbatim}
  23.310 -KERNELS ?= mk.linux-2.6-xen0 mk.linux-2.6-xenU
  23.311 -\end{verbatim} 
  23.312 -\end{quote} 
  23.313 -
  23.314 -You can edit this line to include any set of operating system kernels
  23.315 -which have configurations in the top-level \path{buildconfigs/}
  23.316 -directory, for example \path{mk.linux-2.4-xenU} to build a Linux 2.4
  23.317 -kernel containing only virtual device drivers.
  23.318 -
  23.319 -%% Inspect the Makefile if you want to see what goes on during a build.
  23.320 -%% Building Xen and the tools is straightforward, but XenLinux is more
  23.321 -%% complicated.  The makefile needs a `pristine' Linux kernel tree to which
  23.322 -%% it will then add the Xen architecture files.  You can tell the
  23.323 -%% makefile the location of the appropriate Linux compressed tar file by
  23.324 -%% setting the LINUX\_SRC environment variable, e.g. \\
  23.325 -%% \verb!# LINUX_SRC=/tmp/linux-2.6.11.tar.bz2 make world! \\ or by
  23.326 -%% placing the tar file somewhere in the search path of {\tt
  23.327 -%% LINUX\_SRC\_PATH} which defaults to `{\tt .:..}'.  If the makefile
  23.328 -%% can't find a suitable kernel tar file it attempts to download it from
  23.329 -%% kernel.org (this won't work if you're behind a firewall).
  23.330 -
  23.331 -%% After untaring the pristine kernel tree, the makefile uses the {\tt
  23.332 -%% mkbuildtree} script to add the Xen patches to the kernel. 
  23.333 -
  23.334 -
  23.335 -%% The procedure is similar to build the Linux 2.4 port: \\
  23.336 -%% \verb!# LINUX_SRC=/path/to/linux2.4/source make linux24!
  23.337 -
  23.338 -
  23.339 -%% \framebox{\parbox{5in}{
  23.340 -%% {\bf Distro specific:} \\
  23.341 -%% {\it Gentoo} --- if not using udev (most installations, currently), you'll need
  23.342 -%% to enable devfs and devfs mount at boot time in the xen0 config.
  23.343 -%% }}
  23.344 -
  23.345 -\subsection{Custom XenLinux Builds}
  23.346 -
  23.347 -% If you have an SMP machine you may wish to give the {\tt '-j4'}
  23.348 -% argument to make to get a parallel build.
  23.349 -
  23.350 -If you wish to build a customized XenLinux kernel (e.g. to support
  23.351 -additional devices or enable distribution-required features), you can
  23.352 -use the standard Linux configuration mechanisms, specifying that the
  23.353 -architecture being built for is \path{xen}, e.g:
  23.354 -\begin{quote}
  23.355 -\begin{verbatim} 
  23.356 -# cd linux-2.6.11-xen0 
  23.357 -# make ARCH=xen xconfig 
  23.358 -# cd ..
  23.359 -# make
  23.360 -\end{verbatim} 
  23.361 -\end{quote} 
  23.362 -
  23.363 -You can also copy an existing Linux configuration (\path{.config}) 
  23.364 -into \path{linux-2.6.11-xen0} and execute:  
  23.365 -\begin{quote}
  23.366 -\begin{verbatim} 
  23.367 -# make ARCH=xen oldconfig 
  23.368 -\end{verbatim} 
  23.369 -\end{quote} 
  23.370 -
  23.371 -You may be prompted with some Xen-specific options; we 
  23.372 -advise accepting the defaults for these options.
  23.373 -
  23.374 -Note that the only difference between the two types of Linux kernel
  23.375 -that are built is the configuration file used for each.  The "U"
  23.376 -suffixed (unprivileged) versions don't contain any of the physical
  23.377 -hardware device drivers, leading to a 30\% reduction in size; hence
  23.378 -you may prefer these for your non-privileged domains.  The `0'
  23.379 -suffixed privileged versions can be used to boot the system, as well
  23.380 -as in driver domains and unprivileged domains.
  23.381 -
  23.382 -
  23.383 -\subsection{Installing the Binaries}
  23.384 -
  23.385 -
  23.386 -The files produced by the build process are stored under the
  23.387 -\path{dist/install/} directory. To install them in their default
  23.388 -locations, do:
  23.389 -\begin{quote}
  23.390 -\begin{verbatim}
  23.391 -# make install
  23.392 -\end{verbatim} 
  23.393 -\end{quote}
  23.394 -
  23.395 -
  23.396 -Alternatively, users with special installation requirements may wish
  23.397 -to install them manually by copying the files to their appropriate
  23.398 -destinations.
  23.399 -
  23.400 -%% Files in \path{install/boot/} include:
  23.401 -%% \begin{itemize}
  23.402 -%% \item \path{install/boot/xen-2.0.gz} Link to the Xen 'kernel'
  23.403 -%% \item \path{install/boot/vmlinuz-2.6-xen0}  Link to domain 0 XenLinux kernel
  23.404 -%% \item \path{install/boot/vmlinuz-2.6-xenU}  Link to unprivileged XenLinux kernel
  23.405 -%% \end{itemize}
  23.406 -
  23.407 -The \path{dist/install/boot} directory will also contain the config files
  23.408 -used for building the XenLinux kernels, and also versions of Xen and
  23.409 -XenLinux kernels that contain debug symbols (\path{xen-syms-2.0.6} and
  23.410 -\path{vmlinux-syms-2.6.11.11-xen0}) which are essential for interpreting crash
  23.411 -dumps.  Retain these files as the developers may wish to see them if
  23.412 -you post on the mailing list.
  23.413 -
  23.414 -
  23.415 -
  23.416 -
  23.417 -
  23.418 -\section{Configuration}
  23.419 -\label{s:configure}
  23.420 -Once you have built and installed the Xen distribution, it is 
  23.421 -simple to prepare the machine for booting and running Xen. 
  23.422 -
  23.423 -\subsection{GRUB Configuration}
  23.424 -
  23.425 -An entry should be added to \path{grub.conf} (often found under
  23.426 -\path{/boot/} or \path{/boot/grub/}) to allow Xen / XenLinux to boot.
  23.427 -This file is sometimes called \path{menu.lst}, depending on your
  23.428 -distribution.  The entry should look something like the following:
  23.429 -
  23.430 -{\small
  23.431 -\begin{verbatim}
  23.432 -title Xen 2.0 / XenLinux 2.6
  23.433 -  kernel /boot/xen-2.0.gz dom0_mem=131072
  23.434 -  module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro console=tty0
  23.435 -\end{verbatim}
  23.436 -}
  23.437 +%% Chapter Introduction moved to introduction.tex
  23.438 +\include{src/user/introduction}
  23.439  
  23.440 -The kernel line tells GRUB where to find Xen itself and what boot
  23.441 -parameters should be passed to it (in this case, setting domain 0's
  23.442 -memory allocation in kilobytes and the settings for the serial port). For more
  23.443 -details on the various Xen boot parameters see Section~\ref{s:xboot}. 
  23.444 -
  23.445 -The module line of the configuration describes the location of the
  23.446 -XenLinux kernel that Xen should start and the parameters that should
  23.447 -be passed to it (these are standard Linux parameters, identifying the
  23.448 -root device and specifying it be initially mounted read only and
  23.449 -instructing that console output be sent to the screen).  Some
  23.450 -distributions such as SuSE do not require the \path{ro} parameter.
  23.451 -
  23.452 -%% \framebox{\parbox{5in}{
  23.453 -%% {\bf Distro specific:} \\
  23.454 -%% {\it SuSE} --- Omit the {\tt ro} option from the XenLinux kernel
  23.455 -%% command line, since the partition won't be remounted rw during boot.
  23.456 -%% }}
  23.457 -
  23.458 -
  23.459 -If you want to use an initrd, just add another \path{module} line to
  23.460 -the configuration, as usual:
  23.461 -{\small
  23.462 -\begin{verbatim}
  23.463 -  module /boot/my_initrd.gz
  23.464 -\end{verbatim}
  23.465 -}
  23.466 -
  23.467 -As always when installing a new kernel, it is recommended that you do
  23.468 -not delete existing menu options from \path{menu.lst} --- you may want
  23.469 -to boot your old Linux kernel in future, particularly if you
  23.470 -have problems.
  23.471 -
  23.472 -
  23.473 -\subsection{Serial Console (optional)}
  23.474 -
  23.475 -%%   kernel /boot/xen-2.0.gz dom0_mem=131072 com1=115200,8n1
  23.476 -%%   module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro 
  23.477 -
  23.478 -
  23.479 -In order to configure Xen serial console output, it is necessary to add 
  23.480 -an boot option to your GRUB config; e.g. replace the above kernel line 
  23.481 -with: 
  23.482 -\begin{quote}
  23.483 -{\small
  23.484 -\begin{verbatim}
  23.485 -   kernel /boot/xen.gz dom0_mem=131072 com1=115200,8n1
  23.486 -\end{verbatim}}
  23.487 -\end{quote}
  23.488 -
  23.489 -This configures Xen to output on COM1 at 115,200 baud, 8 data bits, 
  23.490 -1 stop bit and no parity. Modify these parameters for your set up. 
  23.491 -
  23.492 -One can also configure XenLinux to share the serial console; to 
  23.493 -achieve this append ``\path{console=ttyS0}'' to your 
  23.494 -module line. 
  23.495 -
  23.496 -
  23.497 -If you wish to be able to log in over the XenLinux serial console it
  23.498 -is necessary to add a line into \path{/etc/inittab}, just as per 
  23.499 -regular Linux. Simply add the line:
  23.500 -\begin{quote}
  23.501 -{\small 
  23.502 -{\tt c:2345:respawn:/sbin/mingetty ttyS0}
  23.503 -}
  23.504 -\end{quote} 
  23.505 -
  23.506 -and you should be able to log in. Note that to successfully log in 
  23.507 -as root over the serial line will require adding \path{ttyS0} to
  23.508 -\path{/etc/securetty} in most modern distributions. 
  23.509 -
  23.510 -\subsection{TLS Libraries}
  23.511 -
  23.512 -Users of the XenLinux 2.6 kernel should disable Thread Local Storage
  23.513 -(e.g.\ by doing a \path{mv /lib/tls /lib/tls.disabled}) before
  23.514 -attempting to run with a XenLinux kernel\footnote{If you boot without first
  23.515 -disabling TLS, you will get a warning message during the boot
  23.516 -process. In this case, simply perform the rename after the machine is
  23.517 -up and then run \texttt{/sbin/ldconfig} to make it take effect.}.  You can
  23.518 -always reenable it by restoring the directory to its original location
  23.519 -(i.e.\ \path{mv /lib/tls.disabled /lib/tls}).
  23.520 -
  23.521 -The reason for this is that the current TLS implementation uses
  23.522 -segmentation in a way that is not permissible under Xen.  If TLS is
  23.523 -not disabled, an emulation mode is used within Xen which reduces
  23.524 -performance substantially.
  23.525 -
  23.526 -We hope that this issue can be resolved by working with Linux
  23.527 -distribution vendors to implement a minor backward-compatible change
  23.528 -to the TLS library.
  23.529 -
  23.530 -\section{Booting Xen} 
  23.531 -
  23.532 -It should now be possible to restart the system and use Xen.  Reboot
  23.533 -as usual but choose the new Xen option when the Grub screen appears.
  23.534 -
  23.535 -What follows should look much like a conventional Linux boot.  The
  23.536 -first portion of the output comes from Xen itself, supplying low level
  23.537 -information about itself and the machine it is running on.  The
  23.538 -following portion of the output comes from XenLinux.
  23.539 -
  23.540 -You may see some errors during the XenLinux boot.  These are not
  23.541 -necessarily anything to worry about --- they may result from kernel
  23.542 -configuration differences between your XenLinux kernel and the one you
  23.543 -usually use.
  23.544 -
  23.545 -When the boot completes, you should be able to log into your system as
  23.546 -usual.  If you are unable to log in to your system running Xen, you
  23.547 -should still be able to reboot with your normal Linux kernel.
  23.548 -
  23.549 -
  23.550 -\chapter{Starting Additional Domains}
  23.551 -
  23.552 -The first step in creating a new domain is to prepare a root
  23.553 -filesystem for it to boot off.  Typically, this might be stored in a
  23.554 -normal partition, an LVM or other volume manager partition, a disk
  23.555 -file or on an NFS server.  A simple way to do this is simply to boot
  23.556 -from your standard OS install CD and install the distribution into
  23.557 -another partition on your hard drive.
  23.558 -
  23.559 -To start the \xend control daemon, type
  23.560 -\begin{quote}
  23.561 -\verb!# xend start!
  23.562 -\end{quote}
  23.563 -If you
  23.564 -wish the daemon to start automatically, see the instructions in
  23.565 -Section~\ref{s:xend}. Once the daemon is running, you can use the
  23.566 -\path{xm} tool to monitor and maintain the domains running on your
  23.567 -system. This chapter provides only a brief tutorial: we provide full
  23.568 -details of the \path{xm} tool in the next chapter. 
  23.569 -
  23.570 -%\section{From the web interface}
  23.571 -%
  23.572 -%Boot the Xen machine and start Xensv (see Chapter~\ref{cha:xensv} for
  23.573 -%more details) using the command: \\
  23.574 -%\verb_# xensv start_ \\
  23.575 -%This will also start Xend (see Chapter~\ref{cha:xend} for more information).
  23.576 -%
  23.577 -%The domain management interface will then be available at {\tt
  23.578 -%http://your\_machine:8080/}.  This provides a user friendly wizard for
  23.579 -%starting domains and functions for managing running domains.
  23.580 -%
  23.581 -%\section{From the command line}
  23.582 -
  23.583 -
  23.584 -\section{Creating a Domain Configuration File} 
  23.585 +%% Chapter Installation moved to installation.tex
  23.586 +\include{src/user/installation}
  23.587  
  23.588 -Before you can start an additional domain, you must create a
  23.589 -configuration file. We provide two example files which you 
  23.590 -can use as a starting point: 
  23.591 -\begin{itemize} 
  23.592 -  \item \path{/etc/xen/xmexample1} is a simple template configuration file
  23.593 -    for describing a single VM.
  23.594 -
  23.595 -  \item \path{/etc/xen/xmexample2} file is a template description that
  23.596 -    is intended to be reused for multiple virtual machines.  Setting
  23.597 -    the value of the \path{vmid} variable on the \path{xm} command line
  23.598 -    fills in parts of this template.
  23.599 -\end{itemize} 
  23.600 -
  23.601 -Copy one of these files and edit it as appropriate.
  23.602 -Typical values you may wish to edit include: 
  23.603 -
  23.604 -\begin{quote}
  23.605 -\begin{description}
  23.606 -\item[kernel] Set this to the path of the kernel you compiled for use
  23.607 -              with Xen (e.g.\  \path{kernel = '/boot/vmlinuz-2.6-xenU'})
  23.608 -\item[memory] Set this to the size of the domain's memory in
  23.609 -megabytes (e.g.\ \path{memory = 64})
  23.610 -\item[disk] Set the first entry in this list to calculate the offset
  23.611 -of the domain's root partition, based on the domain ID.  Set the
  23.612 -second to the location of \path{/usr} if you are sharing it between
  23.613 -domains (e.g.\ \path{disk = ['phy:your\_hard\_drive\%d,sda1,w' \%
  23.614 -(base\_partition\_number + vmid), 'phy:your\_usr\_partition,sda6,r' ]}
  23.615 -\item[dhcp] Uncomment the dhcp variable, so that the domain will
  23.616 -receive its IP address from a DHCP server (e.g.\ \path{dhcp='dhcp'})
  23.617 -\end{description}
  23.618 -\end{quote}
  23.619 -
  23.620 -You may also want to edit the {\bf vif} variable in order to choose
  23.621 -the MAC address of the virtual ethernet interface yourself.  For
  23.622 -example: 
  23.623 -\begin{quote}
  23.624 -\verb_vif = ['mac=00:06:AA:F6:BB:B3']_
  23.625 -\end{quote}
  23.626 -If you do not set this variable, \xend will automatically generate a
  23.627 -random MAC address from an unused range.
  23.628 -
  23.629 -
  23.630 -\section{Booting the Domain}
  23.631 -
  23.632 -The \path{xm} tool provides a variety of commands for managing domains.
  23.633 -Use the \path{create} command to start new domains. Assuming you've 
  23.634 -created a configuration file \path{myvmconf} based around
  23.635 -\path{/etc/xen/xmexample2}, to start a domain with virtual 
  23.636 -machine ID~1 you should type: 
  23.637 -
  23.638 -\begin{quote}
  23.639 -\begin{verbatim}
  23.640 -# xm create -c myvmconf vmid=1
  23.641 -\end{verbatim}
  23.642 -\end{quote}
  23.643 -
  23.644 -
  23.645 -The \path{-c} switch causes \path{xm} to turn into the domain's
  23.646 -console after creation.  The \path{vmid=1} sets the \path{vmid}
  23.647 -variable used in the \path{myvmconf} file. 
  23.648 -
  23.649 -
  23.650 -You should see the console boot messages from the new domain 
  23.651 -appearing in the terminal in which you typed the command, 
  23.652 -culminating in a login prompt. 
  23.653 -
  23.654 -
  23.655 -\section{Example: ttylinux}
  23.656 -
  23.657 -Ttylinux is a very small Linux distribution, designed to require very
  23.658 -few resources.  We will use it as a concrete example of how to start a
  23.659 -Xen domain.  Most users will probably want to install a full-featured
  23.660 -distribution once they have mastered the basics\footnote{ttylinux is
  23.661 -maintained by Pascal Schmidt. You can download source packages from
  23.662 -the distribution's home page: {\tt http://www.minimalinux.org/ttylinux/}}.
  23.663 -
  23.664 -\begin{enumerate}
  23.665 -\item Download and extract the ttylinux disk image from the Files
  23.666 -section of the project's SourceForge site (see 
  23.667 -\path{http://sf.net/projects/xen/}).
  23.668 -\item Create a configuration file like the following:
  23.669 -\begin{verbatim}
  23.670 -kernel = "/boot/vmlinuz-2.6-xenU"
  23.671 -memory = 64
  23.672 -name = "ttylinux"
  23.673 -nics = 1
  23.674 -ip = "1.2.3.4"
  23.675 -disk = ['file:/path/to/ttylinux/rootfs,sda1,w']
  23.676 -root = "/dev/sda1 ro"
  23.677 -\end{verbatim}
  23.678 -\item Now start the domain and connect to its console:
  23.679 -\begin{verbatim}
  23.680 -xm create configfile -c
  23.681 -\end{verbatim}
  23.682 -\item Login as root, password root.
  23.683 -\end{enumerate}
  23.684 -
  23.685 -
  23.686 -\section{Starting / Stopping Domains Automatically}
  23.687 -
  23.688 -It is possible to have certain domains start automatically at boot
  23.689 -time and to have dom0 wait for all running domains to shutdown before
  23.690 -it shuts down the system.
  23.691 -
  23.692 -To specify a domain is to start at boot-time, place its
  23.693 -configuration file (or a link to it) under \path{/etc/xen/auto/}.
  23.694 -
  23.695 -A Sys-V style init script for RedHat and LSB-compliant systems is
  23.696 -provided and will be automatically copied to \path{/etc/init.d/}
  23.697 -during install.  You can then enable it in the appropriate way for
  23.698 -your distribution.
  23.699 -
  23.700 -For instance, on RedHat:
  23.701 -
  23.702 -\begin{quote}
  23.703 -\verb_# chkconfig --add xendomains_
  23.704 -\end{quote}
  23.705 -
  23.706 -By default, this will start the boot-time domains in runlevels 3, 4
  23.707 -and 5.
  23.708 -
  23.709 -You can also use the \path{service} command to run this script
  23.710 -manually, e.g:
  23.711 -
  23.712 -\begin{quote}
  23.713 -\verb_# service xendomains start_
  23.714 -
  23.715 -Starts all the domains with config files under /etc/xen/auto/.
  23.716 -\end{quote}
  23.717 -
  23.718 -
  23.719 -\begin{quote}
  23.720 -\verb_# service xendomains stop_
  23.721 -
  23.722 -Shuts down ALL running Xen domains.
  23.723 -\end{quote}
  23.724 -
  23.725 -\chapter{Domain Management Tools}
  23.726 -
  23.727 -The previous chapter described a simple example of how to configure
  23.728 -and start a domain.  This chapter summarises the tools available to
  23.729 -manage running domains.
  23.730 -
  23.731 -\section{Command-line Management}
  23.732 -
  23.733 -Command line management tasks are also performed using the \path{xm}
  23.734 -tool.  For online help for the commands available, type:
  23.735 -\begin{quote}
  23.736 -\verb_# xm help_
  23.737 -\end{quote}
  23.738 -
  23.739 -You can also type \path{xm help $<$command$>$} for more information 
  23.740 -on a given command. 
  23.741 -
  23.742 -\subsection{Basic Management Commands}
  23.743 -
  23.744 -The most important \path{xm} commands are: 
  23.745 -\begin{quote}
  23.746 -\verb_# xm list_: Lists all domains running.\\
  23.747 -\verb_# xm consoles_ : Gives information about the domain consoles.\\
  23.748 -\verb_# xm console_: Opens a console to a domain (e.g.\
  23.749 -  \verb_# xm console myVM_
  23.750 -\end{quote}
  23.751 -
  23.752 -\subsection{\tt xm list}
  23.753 -
  23.754 -The output of \path{xm list} is in rows of the following format:
  23.755 -\begin{center}
  23.756 -{\tt name domid memory cpu state cputime console}
  23.757 -\end{center}
  23.758 -
  23.759 -\begin{quote}
  23.760 -\begin{description}
  23.761 -\item[name]  The descriptive name of the virtual machine.
  23.762 -\item[domid] The number of the domain ID this virtual machine is running in.
  23.763 -\item[memory] Memory size in megabytes.
  23.764 -\item[cpu]   The CPU this domain is running on.
  23.765 -\item[state] Domain state consists of 5 fields:
  23.766 -  \begin{description}
  23.767 -  \item[r] running
  23.768 -  \item[b] blocked
  23.769 -  \item[p] paused
  23.770 -  \item[s] shutdown
  23.771 -  \item[c] crashed
  23.772 -  \end{description}
  23.773 -\item[cputime] How much CPU time (in seconds) the domain has used so far.
  23.774 -\item[console] TCP port accepting connections to the domain's console.
  23.775 -\end{description}
  23.776 -\end{quote}
  23.777 -
  23.778 -The \path{xm list} command also supports a long output format when the
  23.779 -\path{-l} switch is used.  This outputs the fulls details of the
  23.780 -running domains in \xend's SXP configuration format.
  23.781 -
  23.782 -For example, suppose the system is running the ttylinux domain as
  23.783 -described earlier.  The list command should produce output somewhat
  23.784 -like the following:
  23.785 -\begin{verbatim}
  23.786 -# xm list
  23.787 -Name              Id  Mem(MB)  CPU  State  Time(s)  Console
  23.788 -Domain-0           0      251    0  r----    172.2        
  23.789 -ttylinux           5       63    0  -b---      3.0    9605
  23.790 -\end{verbatim}
  23.791 -
  23.792 -Here we can see the details for the ttylinux domain, as well as for
  23.793 -domain 0 (which, of course, is always running).  Note that the console
  23.794 -port for the ttylinux domain is 9605.  This can be connected to by TCP
  23.795 -using a terminal program (e.g. \path{telnet} or, better, 
  23.796 -\path{xencons}).  The simplest way to connect is to use the \path{xm console}
  23.797 -command, specifying the domain name or ID.  To connect to the console
  23.798 -of the ttylinux domain, we could use any of the following: 
  23.799 -\begin{verbatim}
  23.800 -# xm console ttylinux
  23.801 -# xm console 5
  23.802 -# xencons localhost 9605
  23.803 -\end{verbatim}
  23.804 -
  23.805 -\section{Domain Save and Restore}
  23.806 -
  23.807 -The administrator of a Xen system may suspend a virtual machine's
  23.808 -current state into a disk file in domain 0, allowing it to be resumed
  23.809 -at a later time.
  23.810 -
  23.811 -The ttylinux domain described earlier can be suspended to disk using
  23.812 -the command:
  23.813 -\begin{verbatim}
  23.814 -# xm save ttylinux ttylinux.xen
  23.815 -\end{verbatim}
  23.816 -
  23.817 -This will stop the domain named `ttylinux' and save its current state
  23.818 -into a file called \path{ttylinux.xen}.
  23.819 -
  23.820 -To resume execution of this domain, use the \path{xm restore} command:
  23.821 -\begin{verbatim}
  23.822 -# xm restore ttylinux.xen
  23.823 -\end{verbatim}
  23.824 -
  23.825 -This will restore the state of the domain and restart it.  The domain
  23.826 -will carry on as before and the console may be reconnected using the
  23.827 -\path{xm console} command, as above.
  23.828 -
  23.829 -\section{Live Migration}
  23.830 -
  23.831 -Live migration is used to transfer a domain between physical hosts
  23.832 -whilst that domain continues to perform its usual activities --- from
  23.833 -the user's perspective, the migration should be imperceptible.
  23.834 -
  23.835 -To perform a live migration, both hosts must be running Xen / \xend and
  23.836 -the destination host must have sufficient resources (e.g. memory
  23.837 -capacity) to accommodate the domain after the move. Furthermore we
  23.838 -currently require both source and destination machines to be on the 
  23.839 -same L2 subnet. 
  23.840 -
  23.841 -Currently, there is no support for providing automatic remote access
  23.842 -to filesystems stored on local disk when a domain is migrated.
  23.843 -Administrators should choose an appropriate storage solution
  23.844 -(i.e. SAN, NAS, etc.) to ensure that domain filesystems are also
  23.845 -available on their destination node. GNBD is a good method for
  23.846 -exporting a volume from one machine to another. iSCSI can do a similar
  23.847 -job, but is more complex to set up.
  23.848 -
  23.849 -When a domain migrates, it's MAC and IP address move with it, thus it
  23.850 -is only possible to migrate VMs within the same layer-2 network and IP
  23.851 -subnet. If the destination node is on a different subnet, the
  23.852 -administrator would need to manually configure a suitable etherip or
  23.853 -IP tunnel in the domain 0 of the remote node. 
  23.854 -
  23.855 -A domain may be migrated using the \path{xm migrate} command.  To
  23.856 -live migrate a domain to another machine, we would use
  23.857 -the command:
  23.858 -
  23.859 -\begin{verbatim}
  23.860 -# xm migrate --live mydomain destination.ournetwork.com
  23.861 -\end{verbatim}
  23.862 -
  23.863 -Without the \path{--live} flag, \xend simply stops the domain and
  23.864 -copies the memory image over to the new node and restarts it. Since
  23.865 -domains can have large allocations this can be quite time consuming,
  23.866 -even on a Gigabit network. With the \path{--live} flag \xend attempts
  23.867 -to keep the domain running while the migration is in progress,
  23.868 -resulting in typical `downtimes' of just 60--300ms.
  23.869 -
  23.870 -For now it will be necessary to reconnect to the domain's console on
  23.871 -the new machine using the \path{xm console} command.  If a migrated
  23.872 -domain has any open network connections then they will be preserved,
  23.873 -so SSH connections do not have this limitation.
  23.874 -
  23.875 -\section{Managing Domain Memory}
  23.876 -
  23.877 -XenLinux domains have the ability to relinquish / reclaim machine
  23.878 -memory at the request of the administrator or the user of the domain.
  23.879 +%% Chapter Starting Additional Domains  moved to start_addl_dom.tex
  23.880 +\include{src/user/start_addl_dom}
  23.881  
  23.882 -\subsection{Setting memory footprints from dom0}
  23.883 -
  23.884 -The machine administrator can request that a domain alter its memory
  23.885 -footprint using the \path{xm set-mem} command.  For instance, we can
  23.886 -request that our example ttylinux domain reduce its memory footprint
  23.887 -to 32 megabytes.
  23.888 -
  23.889 -\begin{verbatim}
  23.890 -# xm set-mem ttylinux 32
  23.891 -\end{verbatim}
  23.892 -
  23.893 -We can now see the result of this in the output of \path{xm list}:
  23.894 -
  23.895 -\begin{verbatim}
  23.896 -# xm list
  23.897 -Name              Id  Mem(MB)  CPU  State  Time(s)  Console
  23.898 -Domain-0           0      251    0  r----    172.2        
  23.899 -ttylinux           5       31    0  -b---      4.3    9605
  23.900 -\end{verbatim}
  23.901 -
  23.902 -The domain has responded to the request by returning memory to Xen. We
  23.903 -can restore the domain to its original size using the command line:
  23.904 -
  23.905 -\begin{verbatim}
  23.906 -# xm set-mem ttylinux 64
  23.907 -\end{verbatim}
  23.908 -
  23.909 -\subsection{Setting memory footprints from within a domain}
  23.910 -
  23.911 -The virtual file \path{/proc/xen/balloon} allows the owner of a
  23.912 -domain to adjust their own memory footprint.  Reading the file
  23.913 -(e.g. \path{cat /proc/xen/balloon}) prints out the current
  23.914 -memory footprint of the domain.  Writing the file
  23.915 -(e.g. \path{echo new\_target > /proc/xen/balloon}) requests
  23.916 -that the kernel adjust the domain's memory footprint to a new value.
  23.917 -
  23.918 -\subsection{Setting memory limits}
  23.919 -
  23.920 -Xen associates a memory size limit with each domain.  By default, this
  23.921 -is the amount of memory the domain is originally started with,
  23.922 -preventing the domain from ever growing beyond this size.  To permit a
  23.923 -domain to grow beyond its original allocation or to prevent a domain
  23.924 -you've shrunk from reclaiming the memory it relinquished, use the 
  23.925 -\path{xm maxmem} command.
  23.926 -
  23.927 -\chapter{Domain Filesystem Storage}
  23.928 -
  23.929 -It is possible to directly export any Linux block device in dom0 to
  23.930 -another domain, or to export filesystems / devices to virtual machines
  23.931 -using standard network protocols (e.g. NBD, iSCSI, NFS, etc).  This
  23.932 -chapter covers some of the possibilities.
  23.933 -
  23.934 -
  23.935 -\section{Exporting Physical Devices as VBDs} 
  23.936 -\label{s:exporting-physical-devices-as-vbds}
  23.937 -
  23.938 -One of the simplest configurations is to directly export 
  23.939 -individual partitions from domain 0 to other domains. To 
  23.940 -achieve this use the \path{phy:} specifier in your domain 
  23.941 -configuration file. For example a line like
  23.942 -\begin{quote}
  23.943 -\verb_disk = ['phy:hda3,sda1,w']_
  23.944 -\end{quote}
  23.945 -specifies that the partition \path{/dev/hda3} in domain 0 
  23.946 -should be exported read-write to the new domain as \path{/dev/sda1}; 
  23.947 -one could equally well export it as \path{/dev/hda} or 
  23.948 -\path{/dev/sdb5} should one wish. 
  23.949 -
  23.950 -In addition to local disks and partitions, it is possible to export
  23.951 -any device that Linux considers to be ``a disk'' in the same manner.
  23.952 -For example, if you have iSCSI disks or GNBD volumes imported into
  23.953 -domain 0 you can export these to other domains using the \path{phy:}
  23.954 -disk syntax. E.g.:
  23.955 -\begin{quote}
  23.956 -\verb_disk = ['phy:vg/lvm1,sda2,w']_
  23.957 -\end{quote}
  23.958 -
  23.959 -
  23.960 -
  23.961 -\begin{center}
  23.962 -\framebox{\bf Warning: Block device sharing}
  23.963 -\end{center}
  23.964 -\begin{quote}
  23.965 -Block devices should typically only be shared between domains in a
  23.966 -read-only fashion otherwise the Linux kernel's file systems will get
  23.967 -very confused as the file system structure may change underneath them
  23.968 -(having the same ext3 partition mounted rw twice is a sure fire way to
  23.969 -cause irreparable damage)!  \Xend will attempt to prevent you from
  23.970 -doing this by checking that the device is not mounted read-write in
  23.971 -domain 0, and hasn't already been exported read-write to another
  23.972 -domain.
  23.973 -If you want read-write sharing, export the directory to other domains
  23.974 -via NFS from domain0 (or use a cluster file system such as GFS or
  23.975 -ocfs2).
  23.976 -
  23.977 -\end{quote}
  23.978 -
  23.979 -
  23.980 -\section{Using File-backed VBDs}
  23.981 -
  23.982 -It is also possible to use a file in Domain 0 as the primary storage
  23.983 -for a virtual machine.  As well as being convenient, this also has the
  23.984 -advantage that the virtual block device will be {\em sparse} --- space
  23.985 -will only really be allocated as parts of the file are used.  So if a
  23.986 -virtual machine uses only half of its disk space then the file really
  23.987 -takes up half of the size allocated.
  23.988 -
  23.989 -For example, to create a 2GB sparse file-backed virtual block device
  23.990 -(actually only consumes 1KB of disk):
  23.991 -\begin{quote}
  23.992 -\verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=1_
  23.993 -\end{quote}
  23.994 -
  23.995 -Make a file system in the disk file: 
  23.996 -\begin{quote}
  23.997 -\verb_# mkfs -t ext3 vm1disk_
  23.998 -\end{quote}
  23.999 -
 23.1000 -(when the tool asks for confirmation, answer `y')
 23.1001 -
 23.1002 -Populate the file system e.g. by copying from the current root:
 23.1003 -\begin{quote}
 23.1004 -\begin{verbatim}
 23.1005 -# mount -o loop vm1disk /mnt
 23.1006 -# cp -ax /{root,dev,var,etc,usr,bin,sbin,lib} /mnt
 23.1007 -# mkdir /mnt/{proc,sys,home,tmp}
 23.1008 -\end{verbatim}
 23.1009 -\end{quote}
 23.1010 -
 23.1011 -Tailor the file system by editing \path{/etc/fstab},
 23.1012 -\path{/etc/hostname}, etc (don't forget to edit the files in the
 23.1013 -mounted file system, instead of your domain 0 filesystem, e.g. you
 23.1014 -would edit \path{/mnt/etc/fstab} instead of \path{/etc/fstab} ).  For
 23.1015 -this example put \path{/dev/sda1} to root in fstab.
 23.1016 -
 23.1017 -Now unmount (this is important!):
 23.1018 -\begin{quote}
 23.1019 -\verb_# umount /mnt_
 23.1020 -\end{quote}
 23.1021 -
 23.1022 -In the configuration file set:
 23.1023 -\begin{quote}
 23.1024 -\verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_
 23.1025 -\end{quote}
 23.1026 +%% Chapter Domain Management Tools moved to domain_mgmt.tex
 23.1027 +\include{src/user/domain_mgmt}
 23.1028  
 23.1029 -As the virtual machine writes to its `disk', the sparse file will be
 23.1030 -filled in and consume more space up to the original 2GB.
 23.1031 -
 23.1032 -{\bf Note that file-backed VBDs may not be appropriate for backing
 23.1033 -I/O-intensive domains.}  File-backed VBDs are known to experience
 23.1034 -substantial slowdowns under heavy I/O workloads, due to the I/O handling
 23.1035 -by the loopback block device used to support file-backed VBDs in dom0.
 23.1036 -Better I/O performance can be achieved by using either LVM-backed VBDs
 23.1037 -(Section~\ref{s:using-lvm-backed-vbds}) or physical devices as VBDs
 23.1038 -(Section~\ref{s:exporting-physical-devices-as-vbds}).
 23.1039 -
 23.1040 -Linux supports a maximum of eight file-backed VBDs across all domains by
 23.1041 -default.  This limit can be statically increased by using the {\em
 23.1042 -max\_loop} module parameter if CONFIG\_BLK\_DEV\_LOOP is compiled as a
 23.1043 -module in the dom0 kernel, or by using the {\em max\_loop=n} boot option
 23.1044 -if CONFIG\_BLK\_DEV\_LOOP is compiled directly into the dom0 kernel.
 23.1045 -
 23.1046 -
 23.1047 -\section{Using LVM-backed VBDs}
 23.1048 -\label{s:using-lvm-backed-vbds}
 23.1049 -
 23.1050 -A particularly appealing solution is to use LVM volumes 
 23.1051 -as backing for domain file-systems since this allows dynamic
 23.1052 -growing/shrinking of volumes as well as snapshot and other 
 23.1053 -features. 
 23.1054 -
 23.1055 -To initialise a partition to support LVM volumes:
 23.1056 -\begin{quote}
 23.1057 -\begin{verbatim} 
 23.1058 -# pvcreate /dev/sda10           
 23.1059 -\end{verbatim} 
 23.1060 -\end{quote}
 23.1061 -
 23.1062 -Create a volume group named `vg' on the physical partition:
 23.1063 -\begin{quote}
 23.1064 -\begin{verbatim} 
 23.1065 -# vgcreate vg /dev/sda10
 23.1066 -\end{verbatim} 
 23.1067 -\end{quote}
 23.1068 -
 23.1069 -Create a logical volume of size 4GB named `myvmdisk1':
 23.1070 -\begin{quote}
 23.1071 -\begin{verbatim} 
 23.1072 -# lvcreate -L4096M -n myvmdisk1 vg
 23.1073 -\end{verbatim} 
 23.1074 -\end{quote}
 23.1075 -
 23.1076 -You should now see that you have a \path{/dev/vg/myvmdisk1}
 23.1077 -Make a filesystem, mount it and populate it, e.g.:
 23.1078 -\begin{quote}
 23.1079 -\begin{verbatim} 
 23.1080 -# mkfs -t ext3 /dev/vg/myvmdisk1
 23.1081 -# mount /dev/vg/myvmdisk1 /mnt
 23.1082 -# cp -ax / /mnt
 23.1083 -# umount /mnt
 23.1084 -\end{verbatim} 
 23.1085 -\end{quote}
 23.1086 -
 23.1087 -Now configure your VM with the following disk configuration:
 23.1088 -\begin{quote}
 23.1089 -\begin{verbatim} 
 23.1090 - disk = [ 'phy:vg/myvmdisk1,sda1,w' ]
 23.1091 -\end{verbatim} 
 23.1092 -\end{quote}
 23.1093 -
 23.1094 -LVM enables you to grow the size of logical volumes, but you'll need
 23.1095 -to resize the corresponding file system to make use of the new
 23.1096 -space. Some file systems (e.g. ext3) now support on-line resize.  See
 23.1097 -the LVM manuals for more details.
 23.1098 +%% Chapter Domain Filesystem Storage moved to domain_filesystem.tex
 23.1099 +\include{src/user/domain_filesystem}
 23.1100  
 23.1101 -You can also use LVM for creating copy-on-write clones of LVM
 23.1102 -volumes (known as writable persistent snapshots in LVM
 23.1103 -terminology). This facility is new in Linux 2.6.8, so isn't as
 23.1104 -stable as one might hope. In particular, using lots of CoW LVM
 23.1105 -disks consumes a lot of dom0 memory, and error conditions such as
 23.1106 -running out of disk space are not handled well. Hopefully this
 23.1107 -will improve in future.
 23.1108 -
 23.1109 -To create two copy-on-write clone of the above file system you
 23.1110 -would use the following commands:
 23.1111 -
 23.1112 -\begin{quote}
 23.1113 -\begin{verbatim} 
 23.1114 -# lvcreate -s -L1024M -n myclonedisk1 /dev/vg/myvmdisk1
 23.1115 -# lvcreate -s -L1024M -n myclonedisk2 /dev/vg/myvmdisk1
 23.1116 -\end{verbatim} 
 23.1117 -\end{quote}
 23.1118 -
 23.1119 -Each of these can grow to have 1GB of differences from the master
 23.1120 -volume. You can grow the amount of space for storing the
 23.1121 -differences using the lvextend command, e.g.:
 23.1122 -\begin{quote}
 23.1123 -\begin{verbatim} 
 23.1124 -# lvextend +100M /dev/vg/myclonedisk1
 23.1125 -\end{verbatim} 
 23.1126 -\end{quote}
 23.1127 -
 23.1128 -Don't let the `differences volume' ever fill up otherwise LVM gets
 23.1129 -rather confused. It may be possible to automate the growing
 23.1130 -process by using \path{dmsetup wait} to spot the volume getting full
 23.1131 -and then issue an \path{lvextend}.
 23.1132 -
 23.1133 -In principle, it is possible to continue writing to the volume
 23.1134 -that has been cloned (the changes will not be visible to the
 23.1135 -clones), but we wouldn't recommend this: have the cloned volume
 23.1136 -as a `pristine' file system install that isn't mounted directly
 23.1137 -by any of the virtual machines.
 23.1138 -
 23.1139 -
 23.1140 -\section{Using NFS Root}
 23.1141 -
 23.1142 -First, populate a root filesystem in a directory on the server
 23.1143 -machine. This can be on a distinct physical machine, or simply 
 23.1144 -run within a virtual machine on the same node.
 23.1145 -
 23.1146 -Now configure the NFS server to export this filesystem over the
 23.1147 -network by adding a line to \path{/etc/exports}, for instance:
 23.1148 -
 23.1149 -\begin{quote}
 23.1150 -\begin{small}
 23.1151 -\begin{verbatim}
 23.1152 -/export/vm1root      1.2.3.4/24 (rw,sync,no_root_squash)
 23.1153 -\end{verbatim}
 23.1154 -\end{small}
 23.1155 -\end{quote}
 23.1156 -
 23.1157 -Finally, configure the domain to use NFS root.  In addition to the
 23.1158 -normal variables, you should make sure to set the following values in
 23.1159 -the domain's configuration file:
 23.1160 -
 23.1161 -\begin{quote}
 23.1162 -\begin{small}
 23.1163 -\begin{verbatim}
 23.1164 -root       = '/dev/nfs'
 23.1165 -nfs_server = '2.3.4.5'       # substitute IP address of server 
 23.1166 -nfs_root   = '/path/to/root' # path to root FS on the server
 23.1167 -\end{verbatim}
 23.1168 -\end{small}
 23.1169 -\end{quote}
 23.1170 -
 23.1171 -The domain will need network access at boot time, so either statically
 23.1172 -configure an IP address (Using the config variables \path{ip}, 
 23.1173 -\path{netmask}, \path{gateway}, \path{hostname}) or enable DHCP (
 23.1174 -\path{dhcp='dhcp'}).
 23.1175 -
 23.1176 -Note that the Linux NFS root implementation is known to have stability
 23.1177 -problems under high load (this is not a Xen-specific problem), so this
 23.1178 -configuration may not be appropriate for critical servers.
 23.1179  
 23.1180  
 23.1181  \part{User Reference Documentation}
 23.1182  
 23.1183 -\chapter{Control Software} 
 23.1184 -
 23.1185 -The Xen control software includes the \xend node control daemon (which 
 23.1186 -must be running), the xm command line tools, and the prototype 
 23.1187 -xensv web interface. 
 23.1188 -
 23.1189 -\section{\Xend (node control daemon)}
 23.1190 -\label{s:xend}
 23.1191 -
 23.1192 -The Xen Daemon (\Xend) performs system management functions related to
 23.1193 -virtual machines.  It forms a central point of control for a machine
 23.1194 -and can be controlled using an HTTP-based protocol.  \Xend must be
 23.1195 -running in order to start and manage virtual machines.
 23.1196 -
 23.1197 -\Xend must be run as root because it needs access to privileged system
 23.1198 -management functions.  A small set of commands may be issued on the
 23.1199 -\xend command line:
 23.1200 -
 23.1201 -\begin{tabular}{ll}
 23.1202 -\verb!# xend start! & start \xend, if not already running \\
 23.1203 -\verb!# xend stop!  & stop \xend if already running       \\
 23.1204 -\verb!# xend restart! & restart \xend if running, otherwise start it \\
 23.1205 -% \verb!# xend trace_start! & start \xend, with very detailed debug logging \\
 23.1206 -\verb!# xend status! & indicates \xend status by its return code
 23.1207 -\end{tabular}
 23.1208 -
 23.1209 -A SysV init script called {\tt xend} is provided to start \xend at boot
 23.1210 -time.  {\tt make install} installs this script in {\path{/etc/init.d}.
 23.1211 -To enable it, you have to make symbolic links in the appropriate
 23.1212 -runlevel directories or use the {\tt chkconfig} tool, where available.
 23.1213 -
 23.1214 -Once \xend is running, more sophisticated administration can be done
 23.1215 -using the xm tool (see Section~\ref{s:xm}) and the experimental
 23.1216 -Xensv web interface (see Section~\ref{s:xensv}).
 23.1217 -
 23.1218 -As \xend runs, events will be logged to \path{/var/log/xend.log} and, 
 23.1219 -if the migration assistant daemon (\path{xfrd}) has been started, 
 23.1220 -\path{/var/log/xfrd.log}. These may be of use for troubleshooting
 23.1221 -problems.
 23.1222 -
 23.1223 -\section{Xm (command line interface)}
 23.1224 -\label{s:xm}
 23.1225 -
 23.1226 -The xm tool is the primary tool for managing Xen from the console.
 23.1227 -The general format of an xm command line is:
 23.1228 -
 23.1229 -\begin{verbatim}
 23.1230 -# xm command [switches] [arguments] [variables]
 23.1231 -\end{verbatim}
 23.1232 -
 23.1233 -The available {\em switches} and {\em arguments} are dependent on the
 23.1234 -{\em command} chosen.  The {\em variables} may be set using
 23.1235 -declarations of the form {\tt variable=value} and command line
 23.1236 -declarations override any of the values in the configuration file
 23.1237 -being used, including the standard variables described above and any
 23.1238 -custom variables (for instance, the \path{xmdefconfig} file uses a
 23.1239 -{\tt vmid} variable).
 23.1240 -
 23.1241 -The available commands are as follows:
 23.1242 -
 23.1243 -\begin{description}
 23.1244 -\item[set-mem] Request a domain to adjust its memory footprint.
 23.1245 -\item[create] Create a new domain.
 23.1246 -\item[destroy] Kill a domain immediately.
 23.1247 -\item[list] List running domains.
 23.1248 -\item[shutdown] Ask a domain to shutdown.
 23.1249 -\item[dmesg] Fetch the Xen (not Linux!) boot output.
 23.1250 -\item[consoles] Lists the available consoles.
 23.1251 -\item[console] Connect to the console for a domain.
 23.1252 -\item[help] Get help on xm commands.
 23.1253 -\item[save] Suspend a domain to disk.
 23.1254 -\item[restore] Restore a domain from disk.
 23.1255 -\item[pause] Pause a domain's execution.
 23.1256 -\item[unpause] Unpause a domain.
 23.1257 -\item[pincpu] Pin a domain to a CPU.
 23.1258 -\item[bvt] Set BVT scheduler parameters for a domain.
 23.1259 -\item[bvt\_ctxallow] Set the BVT context switching allowance for the system.
 23.1260 -\item[atropos] Set the atropos parameters for a domain.
 23.1261 -\item[rrobin] Set the round robin time slice for the system.
 23.1262 -\item[info] Get information about the Xen host.
 23.1263 -\item[call] Call a \xend HTTP API function directly.
 23.1264 -\end{description}
 23.1265 -
 23.1266 -For a detailed overview of switches, arguments and variables to each command
 23.1267 -try
 23.1268 -\begin{quote}
 23.1269 -\begin{verbatim}
 23.1270 -# xm help command
 23.1271 -\end{verbatim}
 23.1272 -\end{quote}
 23.1273 -
 23.1274 -\section{Xensv (web control interface)}
 23.1275 -\label{s:xensv}
 23.1276 -
 23.1277 -Xensv is the experimental web control interface for managing a Xen
 23.1278 -machine.  It can be used to perform some (but not yet all) of the
 23.1279 -management tasks that can be done using the xm tool.
 23.1280 -
 23.1281 -It can be started using:
 23.1282 -\begin{quote}
 23.1283 -\verb_# xensv start_
 23.1284 -\end{quote}
 23.1285 -and stopped using: 
 23.1286 -\begin{quote}
 23.1287 -\verb_# xensv stop_
 23.1288 -\end{quote}
 23.1289 -
 23.1290 -By default, Xensv will serve out the web interface on port 8080.  This
 23.1291 -can be changed by editing 
 23.1292 -\path{/usr/lib/python2.3/site-packages/xen/sv/params.py}.
 23.1293 -
 23.1294 -Once Xensv is running, the web interface can be used to create and
 23.1295 -manage running domains.
 23.1296 -
 23.1297 -
 23.1298 -
 23.1299 -
 23.1300 -\chapter{Domain Configuration}
 23.1301 -\label{cha:config}
 23.1302 -
 23.1303 -The following contains the syntax of the domain configuration 
 23.1304 -files and description of how to further specify networking, 
 23.1305 -driver domain and general scheduling behaviour. 
 23.1306 -
 23.1307 -\section{Configuration Files}
 23.1308 -\label{s:cfiles}
 23.1309 -
 23.1310 -Xen configuration files contain the following standard variables.
 23.1311 -Unless otherwise stated, configuration items should be enclosed in
 23.1312 -quotes: see \path{/etc/xen/xmexample1} and \path{/etc/xen/xmexample2} 
 23.1313 -for concrete examples of the syntax.
 23.1314 -
 23.1315 -\begin{description}
 23.1316 -\item[kernel] Path to the kernel image 
 23.1317 -\item[ramdisk] Path to a ramdisk image (optional).
 23.1318 -% \item[builder] The name of the domain build function (e.g. {\tt'linux'} or {\tt'netbsd'}.
 23.1319 -\item[memory] Memory size in megabytes.
 23.1320 -\item[cpu] CPU to run this domain on, or {\tt -1} for
 23.1321 -  auto-allocation. 
 23.1322 -\item[console] Port to export the domain console on (default 9600 + domain ID).
 23.1323 -\item[nics] Number of virtual network interfaces.
 23.1324 -\item[vif] List of MAC addresses (random addresses are assigned if not
 23.1325 -  given) and bridges to use for the domain's network interfaces, e.g.
 23.1326 -\begin{verbatim}
 23.1327 -vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0',
 23.1328 -        'bridge=xen-br1' ]
 23.1329 -\end{verbatim}
 23.1330 -  to assign a MAC address and bridge to the first interface and assign
 23.1331 -  a different bridge to the second interface, leaving \xend to choose
 23.1332 -  the MAC address.
 23.1333 -\item[disk] List of block devices to export to the domain,  e.g. \\
 23.1334 -  \verb_disk = [ 'phy:hda1,sda1,r' ]_ \\
 23.1335 -  exports physical device \path{/dev/hda1} to the domain 
 23.1336 -  as \path{/dev/sda1} with read-only access. Exporting a disk read-write 
 23.1337 -  which is currently mounted is dangerous -- if you are \emph{certain}
 23.1338 -  you wish to do this, you can specify \path{w!} as the mode. 
 23.1339 -\item[dhcp] Set to {\tt 'dhcp'} if you want to use DHCP to configure
 23.1340 -  networking. 
 23.1341 -\item[netmask] Manually configured IP netmask.
 23.1342 -\item[gateway] Manually configured IP gateway. 
 23.1343 -\item[hostname] Set the hostname for the virtual machine.
 23.1344 -\item[root] Specify the root device parameter on the kernel command
 23.1345 -  line. 
 23.1346 -\item[nfs\_server] IP address for the NFS server (if any). 
 23.1347 -\item[nfs\_root] Path of the root filesystem on the NFS server (if any).
 23.1348 -\item[extra] Extra string to append to the kernel command line (if
 23.1349 -  any) 
 23.1350 -\item[restart] Three possible options:
 23.1351 -  \begin{description}
 23.1352 -  \item[always] Always restart the domain, no matter what
 23.1353 -                its exit code is.
 23.1354 -  \item[never]  Never restart the domain.
 23.1355 -  \item[onreboot] Restart the domain iff it requests reboot.
 23.1356 -  \end{description}
 23.1357 -\end{description}
 23.1358 -
 23.1359 -For additional flexibility, it is also possible to include Python
 23.1360 -scripting commands in configuration files.  An example of this is the
 23.1361 -\path{xmexample2} file, which uses Python code to handle the 
 23.1362 -\path{vmid} variable.
 23.1363 -
 23.1364 -
 23.1365 -%\part{Advanced Topics}
 23.1366 -
 23.1367 -\section{Network Configuration}
 23.1368 -
 23.1369 -For many users, the default installation should work `out of the box'.
 23.1370 -More complicated network setups, for instance with multiple ethernet
 23.1371 -interfaces and/or existing bridging setups will require some
 23.1372 -special configuration.
 23.1373 -
 23.1374 -The purpose of this section is to describe the mechanisms provided by
 23.1375 -\xend to allow a flexible configuration for Xen's virtual networking.
 23.1376 -
 23.1377 -\subsection{Xen virtual network topology}
 23.1378 -
 23.1379 -Each domain network interface is connected to a virtual network
 23.1380 -interface in dom0 by a point to point link (effectively a `virtual
 23.1381 -crossover cable').  These devices are named {\tt
 23.1382 -vif$<$domid$>$.$<$vifid$>$} (e.g. {\tt vif1.0} for the first interface
 23.1383 -in domain 1, {\tt vif3.1} for the second interface in domain 3).
 23.1384 -
 23.1385 -Traffic on these virtual interfaces is handled in domain 0 using
 23.1386 -standard Linux mechanisms for bridging, routing, rate limiting, etc.
 23.1387 -Xend calls on two shell scripts to perform initial configuration of
 23.1388 -the network and configuration of new virtual interfaces.  By default,
 23.1389 -these scripts configure a single bridge for all the virtual
 23.1390 -interfaces.  Arbitrary routing / bridging configurations can be
 23.1391 -configured by customising the scripts, as described in the following
 23.1392 -section.
 23.1393 -
 23.1394 -\subsection{Xen networking scripts}
 23.1395 -
 23.1396 -Xen's virtual networking is configured by two shell scripts (by
 23.1397 -default \path{network} and \path{vif-bridge}).  These are
 23.1398 -called automatically by \xend when certain events occur, with
 23.1399 -arguments to the scripts providing further contextual information.
 23.1400 -These scripts are found by default in \path{/etc/xen/scripts}.  The
 23.1401 -names and locations of the scripts can be configured in
 23.1402 -\path{/etc/xen/xend-config.sxp}.
 23.1403 -
 23.1404 -\begin{description} 
 23.1405 -
 23.1406 -\item[network:] This script is called whenever \xend is started or
 23.1407 -stopped to respectively initialise or tear down the Xen virtual
 23.1408 -network. In the default configuration initialisation creates the
 23.1409 -bridge `xen-br0' and moves eth0 onto that bridge, modifying the
 23.1410 -routing accordingly. When \xend exits, it deletes the Xen bridge and
 23.1411 -removes eth0, restoring the normal IP and routing configuration.
 23.1412 -
 23.1413 -%% In configurations where the bridge already exists, this script could
 23.1414 -%% be replaced with a link to \path{/bin/true} (for instance).
 23.1415 -
 23.1416 -\item[vif-bridge:] This script is called for every domain virtual
 23.1417 -interface and can configure firewalling rules and add the vif 
 23.1418 -to the appropriate bridge. By default, this adds and removes 
 23.1419 -VIFs on the default Xen bridge.
 23.1420 -
 23.1421 -\end{description} 
 23.1422 -
 23.1423 -For more complex network setups (e.g. where routing is required or
 23.1424 -integrate with existing bridges) these scripts may be replaced with
 23.1425 -customised variants for your site's preferred configuration.
 23.1426 -
 23.1427 -%% There are two possible types of privileges:  IO privileges and
 23.1428 -%% administration privileges.
 23.1429 -
 23.1430 -\section{Driver Domain Configuration} 
 23.1431 -
 23.1432 -I/O privileges can be assigned to allow a domain to directly access
 23.1433 -PCI devices itself.  This is used to support driver domains.
 23.1434 -
 23.1435 -Setting backend privileges is currently only supported in SXP format
 23.1436 -config files.  To allow a domain to function as a backend for others,
 23.1437 -somewhere within the {\tt vm} element of its configuration file must
 23.1438 -be a {\tt backend} element of the form {\tt (backend ({\em type}))}
 23.1439 -where {\tt \em type} may be either {\tt netif} or {\tt blkif},
 23.1440 -according to the type of virtual device this domain will service.
 23.1441 -%% After this domain has been built, \xend will connect all new and
 23.1442 -%% existing {\em virtual} devices (of the appropriate type) to that
 23.1443 -%% backend.
 23.1444 -
 23.1445 -Note that a block backend cannot currently import virtual block
 23.1446 -devices from other domains, and a network backend cannot import
 23.1447 -virtual network devices from other domains.  Thus (particularly in the
 23.1448 -case of block backends, which cannot import a virtual block device as
 23.1449 -their root filesystem), you may need to boot a backend domain from a
 23.1450 -ramdisk or a network device.
 23.1451 -
 23.1452 -Access to PCI devices may be configured on a per-device basis.  Xen
 23.1453 -will assign the minimal set of hardware privileges to a domain that
 23.1454 -are required to control its devices.  This can be configured in either
 23.1455 -format of configuration file:
 23.1456 -
 23.1457 -\begin{itemize}
 23.1458 -\item SXP Format: Include device elements of the form: \\
 23.1459 -\centerline{  {\tt (device (pci (bus {\em x}) (dev {\em y}) (func {\em z})))}} \\
 23.1460 -  inside the top-level {\tt vm} element.  Each one specifies the address
 23.1461 -  of a device this domain is allowed to access ---
 23.1462 -  the numbers {\em x},{\em y} and {\em z} may be in either decimal or
 23.1463 -  hexadecimal format.
 23.1464 -\item Flat Format: Include a list of PCI device addresses of the
 23.1465 -  format: \\ 
 23.1466 -\centerline{{\tt pci = ['x,y,z', ...]}} \\ 
 23.1467 -where each element in the
 23.1468 -  list is a string specifying the components of the PCI device
 23.1469 -  address, separated by commas.  The components ({\tt \em x}, {\tt \em
 23.1470 -  y} and {\tt \em z}) of the list may be formatted as either decimal
 23.1471 -  or hexadecimal.
 23.1472 -\end{itemize}
 23.1473 -
 23.1474 -%% \section{Administration Domains}
 23.1475 -
 23.1476 -%% Administration privileges allow a domain to use the `dom0
 23.1477 -%% operations' (so called because they are usually available only to
 23.1478 -%% domain 0).  A privileged domain can build other domains, set scheduling
 23.1479 -%% parameters, etc.
 23.1480 -
 23.1481 -% Support for other administrative domains is not yet available...  perhaps
 23.1482 -% we should plumb it in some time
 23.1483 -
 23.1484 -
 23.1485 -
 23.1486 -
 23.1487 -
 23.1488 -\section{Scheduler Configuration}
 23.1489 -\label{s:sched} 
 23.1490 -
 23.1491 -
 23.1492 -Xen offers a boot time choice between multiple schedulers.  To select
 23.1493 -a scheduler, pass the boot parameter {\em sched=sched\_name} to Xen,
 23.1494 -substituting the appropriate scheduler name.  Details of the schedulers
 23.1495 -and their parameters are included below; future versions of the tools
 23.1496 -will provide a higher-level interface to these tools.
 23.1497 +%% Chapter Control Software moved to control_software.tex
 23.1498 +\include{src/user/control_software}
 23.1499  
 23.1500 -It is expected that system administrators configure their system to
 23.1501 -use the scheduler most appropriate to their needs.  Currently, the BVT
 23.1502 -scheduler is the recommended choice. 
 23.1503 -
 23.1504 -\subsection{Borrowed Virtual Time}
 23.1505 -
 23.1506 -{\tt sched=bvt} (the default) \\ 
 23.1507 -
 23.1508 -BVT provides proportional fair shares of the CPU time.  It has been
 23.1509 -observed to penalise domains that block frequently (e.g. I/O intensive
 23.1510 -domains), but this can be compensated for by using warping. 
 23.1511 -
 23.1512 -\subsubsection{Global Parameters}
 23.1513 -
 23.1514 -\begin{description}
 23.1515 -\item[ctx\_allow]
 23.1516 -  the context switch allowance is similar to the `quantum'
 23.1517 -  in traditional schedulers.  It is the minimum time that
 23.1518 -  a scheduled domain will be allowed to run before being
 23.1519 -  pre-empted. 
 23.1520 -\end{description}
 23.1521 -
 23.1522 -\subsubsection{Per-domain parameters}
 23.1523 -
 23.1524 -\begin{description}
 23.1525 -\item[mcuadv]
 23.1526 -  the MCU (Minimum Charging Unit) advance determines the
 23.1527 -  proportional share of the CPU that a domain receives.  It
 23.1528 -  is set inversely proportionally to a domain's sharing weight.
 23.1529 -\item[warp]
 23.1530 -  the amount of `virtual time' the domain is allowed to warp
 23.1531 -  backwards
 23.1532 -\item[warpl]
 23.1533 -  the warp limit is the maximum time a domain can run warped for
 23.1534 -\item[warpu]
 23.1535 -  the unwarp requirement is the minimum time a domain must
 23.1536 -  run unwarped for before it can warp again
 23.1537 -\end{description}
 23.1538 -
 23.1539 -\subsection{Atropos}
 23.1540 -
 23.1541 -{\tt sched=atropos} \\
 23.1542 -
 23.1543 -Atropos is a soft real time scheduler.  It provides guarantees about
 23.1544 -absolute shares of the CPU, with a facility for sharing
 23.1545 -slack CPU time on a best-effort basis. It can provide timeliness
 23.1546 -guarantees for latency-sensitive domains.
 23.1547 -
 23.1548 -Every domain has an associated period and slice.  The domain should
 23.1549 -receive `slice' nanoseconds every `period' nanoseconds.  This allows
 23.1550 -the administrator to configure both the absolute share of the CPU a
 23.1551 -domain receives and the frequency with which it is scheduled. 
 23.1552 -
 23.1553 -%%  When
 23.1554 -%% domains unblock, their period is reduced to the value of the latency
 23.1555 -%% hint (the slice is scaled accordingly so that they still get the same
 23.1556 -%% proportion of the CPU).  For each subsequent period, the slice and
 23.1557 -%% period times are doubled until they reach their original values.
 23.1558 -
 23.1559 -Note: don't overcommit the CPU when using Atropos (i.e. don't reserve
 23.1560 -more CPU than is available --- the utilisation should be kept to
 23.1561 -slightly less than 100\% in order to ensure predictable behaviour).
 23.1562 -
 23.1563 -\subsubsection{Per-domain parameters}
 23.1564 -
 23.1565 -\begin{description}
 23.1566 -\item[period] The regular time interval during which a domain is
 23.1567 -  guaranteed to receive its allocation of CPU time.
 23.1568 -\item[slice]
 23.1569 -  The length of time per period that a domain is guaranteed to run
 23.1570 -  for (in the absence of voluntary yielding of the CPU). 
 23.1571 -\item[latency]
 23.1572 -  The latency hint is used to control how soon after
 23.1573 -  waking up a domain it should be scheduled.
 23.1574 -\item[xtratime] This is a boolean flag that specifies whether a domain
 23.1575 -  should be allowed a share of the system slack time.
 23.1576 -\end{description}
 23.1577 -
 23.1578 -\subsection{Round Robin}
 23.1579 -
 23.1580 -{\tt sched=rrobin} \\
 23.1581 -
 23.1582 -The round robin scheduler is included as a simple demonstration of
 23.1583 -Xen's internal scheduler API.  It is not intended for production use. 
 23.1584 -
 23.1585 -\subsubsection{Global Parameters}
 23.1586 -
 23.1587 -\begin{description}
 23.1588 -\item[rr\_slice]
 23.1589 -  The maximum time each domain runs before the next
 23.1590 -  scheduling decision is made.
 23.1591 -\end{description}
 23.1592 -
 23.1593 -
 23.1594 -
 23.1595 -
 23.1596 -
 23.1597 -
 23.1598 -
 23.1599 -
 23.1600 -
 23.1601 -
 23.1602 -
 23.1603 -
 23.1604 -\chapter{Build, Boot and Debug options} 
 23.1605 -
 23.1606 -This chapter describes the build- and boot-time options 
 23.1607 -which may be used to tailor your Xen system. 
 23.1608 -
 23.1609 -\section{Xen Build Options}
 23.1610 -
 23.1611 -Xen provides a number of build-time options which should be 
 23.1612 -set as environment variables or passed on make's command-line.  
 23.1613 -
 23.1614 -\begin{description} 
 23.1615 -\item[verbose=y] Enable debugging messages when Xen detects an unexpected condition.
 23.1616 -Also enables console output from all domains.
 23.1617 -\item[debug=y] 
 23.1618 -Enable debug assertions.  Implies {\bf verbose=y}.
 23.1619 -(Primarily useful for tracing bugs in Xen).       
 23.1620 -\item[debugger=y] 
 23.1621 -Enable the in-Xen debugger. This can be used to debug 
 23.1622 -Xen, guest OSes, and applications.
 23.1623 -\item[perfc=y] 
 23.1624 -Enable performance counters for significant events
 23.1625 -within Xen. The counts can be reset or displayed
 23.1626 -on Xen's console via console control keys.
 23.1627 -\item[trace=y] 
 23.1628 -Enable per-cpu trace buffers which log a range of
 23.1629 -events within Xen for collection by control
 23.1630 -software. 
 23.1631 -\end{description} 
 23.1632 -
 23.1633 -\section{Xen Boot Options}
 23.1634 -\label{s:xboot}
 23.1635 -
 23.1636 -These options are used to configure Xen's behaviour at runtime.  They
 23.1637 -should be appended to Xen's command line, either manually or by
 23.1638 -editing \path{grub.conf}.
 23.1639 -
 23.1640 -\begin{description}
 23.1641 -\item [noreboot ] 
 23.1642 - Don't reboot the machine automatically on errors.  This is
 23.1643 - useful to catch debug output if you aren't catching console messages
 23.1644 - via the serial line. 
 23.1645 -
 23.1646 -\item [nosmp ] 
 23.1647 - Disable SMP support.
 23.1648 - This option is implied by `ignorebiostables'. 
 23.1649 -
 23.1650 -\item [watchdog ] 
 23.1651 - Enable NMI watchdog which can report certain failures. 
 23.1652 -
 23.1653 -\item [noirqbalance ] 
 23.1654 - Disable software IRQ balancing and affinity. This can be used on
 23.1655 - systems such as Dell 1850/2850 that have workarounds in hardware for
 23.1656 - IRQ-routing issues.
 23.1657 +%% Chapter Domain Configuration moved to domain_configuration.tex
 23.1658 +\include{src/user/domain_configuration}
 23.1659  
 23.1660 -\item [badpage=$<$page number$>$,$<$page number$>$, \ldots ] 
 23.1661 - Specify a list of pages not to be allocated for use 
 23.1662 - because they contain bad bytes. For example, if your
 23.1663 - memory tester says that byte 0x12345678 is bad, you would
 23.1664 - place `badpage=0x12345' on Xen's command line. 
 23.1665 -
 23.1666 -\item [com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$
 23.1667 - com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\ 
 23.1668 - Xen supports up to two 16550-compatible serial ports.
 23.1669 - For example: `com1=9600, 8n1, 0x408, 5' maps COM1 to a
 23.1670 - 9600-baud port, 8 data bits, no parity, 1 stop bit,
 23.1671 - I/O port base 0x408, IRQ 5.
 23.1672 - If some configuration options are standard (e.g., I/O base and IRQ),
 23.1673 - then only a prefix of the full configuration string need be
 23.1674 - specified. If the baud rate is pre-configured (e.g., by the
 23.1675 - bootloader) then you can specify `auto' in place of a numeric baud
 23.1676 - rate. 
 23.1677 -
 23.1678 -\item [console=$<$specifier list$>$ ] 
 23.1679 - Specify the destination for Xen console I/O.
 23.1680 - This is a comma-separated list of, for example:
 23.1681 -\begin{description}
 23.1682 - \item[vga]  use VGA console and allow keyboard input
 23.1683 - \item[com1] use serial port com1
 23.1684 - \item[com2H] use serial port com2. Transmitted chars will
 23.1685 -   have the MSB set. Received chars must have
 23.1686 -   MSB set.
 23.1687 - \item[com2L] use serial port com2. Transmitted chars will
 23.1688 -   have the MSB cleared. Received chars must
 23.1689 -   have MSB cleared.
 23.1690 -\end{description}
 23.1691 - The latter two examples allow a single port to be
 23.1692 - shared by two subsystems (e.g. console and
 23.1693 - debugger). Sharing is controlled by MSB of each
 23.1694 - transmitted/received character.
 23.1695 - [NB. Default for this option is `com1,vga'] 
 23.1696 -
 23.1697 -\item [sync\_console ]
 23.1698 - Force synchronous console output. This is useful if you system fails
 23.1699 - unexpectedly before it has sent all available output to the
 23.1700 - console. In most cases Xen will automatically enter synchronous mode
 23.1701 - when an exceptional event occurs, but this option provides a manual
 23.1702 - fallback.
 23.1703 -
 23.1704 -\item [conswitch=$<$switch-char$><$auto-switch-char$>$ ] 
 23.1705 - Specify how to switch serial-console input between
 23.1706 - Xen and DOM0. The required sequence is CTRL-$<$switch-char$>$
 23.1707 - pressed three times. Specifying the backtick character 
 23.1708 - disables switching.
 23.1709 - The $<$auto-switch-char$>$ specifies whether Xen should
 23.1710 - auto-switch input to DOM0 when it boots --- if it is `x'
 23.1711 - then auto-switching is disabled.  Any other value, or
 23.1712 - omitting the character, enables auto-switching.
 23.1713 - [NB. default switch-char is `a'] 
 23.1714 -
 23.1715 -\item [nmi=xxx ] 
 23.1716 - Specify what to do with an NMI parity or I/O error. \\
 23.1717 - `nmi=fatal':  Xen prints a diagnostic and then hangs. \\
 23.1718 - `nmi=dom0':   Inform DOM0 of the NMI. \\
 23.1719 - `nmi=ignore': Ignore the NMI. 
 23.1720 -
 23.1721 -\item [mem=xxx ]
 23.1722 - Set the physical RAM address limit. Any RAM appearing beyond this
 23.1723 - physical address in the memory map will be ignored. This parameter
 23.1724 - may be specified with a B, K, M or G suffix, representing bytes,
 23.1725 - kilobytes, megabytes and gigabytes respectively. The
 23.1726 - default unit, if no suffix is specified, is kilobytes.
 23.1727 -
 23.1728 -\item [dom0\_mem=xxx ] 
 23.1729 - Set the amount of memory to be allocated to domain0. In Xen 3.x the parameter
 23.1730 - may be specified with a B, K, M or G suffix, representing bytes,
 23.1731 - kilobytes, megabytes and gigabytes respectively; if no suffix is specified, 
 23.1732 - the parameter defaults to kilobytes. In previous versions of Xen, suffixes
 23.1733 - were not supported and the value is always interpreted as kilobytes. 
 23.1734 -
 23.1735 -\item [tbuf\_size=xxx ] 
 23.1736 - Set the size of the per-cpu trace buffers, in pages
 23.1737 - (default 1).  Note that the trace buffers are only
 23.1738 - enabled in debug builds.  Most users can ignore
 23.1739 - this feature completely. 
 23.1740 -
 23.1741 -\item [sched=xxx ] 
 23.1742 - Select the CPU scheduler Xen should use.  The current
 23.1743 - possibilities are `bvt' (default), `atropos' and `rrobin'. 
 23.1744 - For more information see Section~\ref{s:sched}. 
 23.1745 -
 23.1746 -\item [apic\_verbosity=debug,verbose ]
 23.1747 - Print more detailed information about local APIC and IOAPIC configuration.
 23.1748 -
 23.1749 -\item [lapic ]
 23.1750 - Force use of local APIC even when left disabled by uniprocessor BIOS.
 23.1751 -
 23.1752 -\item [nolapic ]
 23.1753 - Ignore local APIC in a uniprocessor system, even if enabled by the BIOS.
 23.1754 -
 23.1755 -\item [apic=bigsmp,default,es7000,summit ]
 23.1756 - Specify NUMA platform. This can usually be probed automatically.
 23.1757 -
 23.1758 -\end{description} 
 23.1759 -
 23.1760 -In addition, the following options may be specified on the Xen command
 23.1761 -line. Since domain 0 shares responsibility for booting the platform,
 23.1762 -Xen will automatically propagate these options to its command
 23.1763 -line. These options are taken from Linux's command-line syntax with
 23.1764 -unchanged semantics.
 23.1765 -
 23.1766 -\begin{description}
 23.1767 -\item [acpi=off,force,strict,ht,noirq,\ldots ] 
 23.1768 - Modify how Xen (and domain 0) parses the BIOS ACPI tables.
 23.1769 -
 23.1770 -\item [acpi\_skip\_timer\_override ]
 23.1771 - Instruct Xen (and domain 0) to ignore timer-interrupt override
 23.1772 - instructions specified by the BIOS ACPI tables.
 23.1773 -
 23.1774 -\item [noapic ]
 23.1775 - Instruct Xen (and domain 0) to ignore any IOAPICs that are present in
 23.1776 - the system, and instead continue to use the legacy PIC.
 23.1777 -
 23.1778 -\end{description} 
 23.1779 -
 23.1780 -\section{XenLinux Boot Options}
 23.1781 -
 23.1782 -In addition to the standard Linux kernel boot options, we support: 
 23.1783 -\begin{description} 
 23.1784 -\item[xencons=xxx ] Specify the device node to which the Xen virtual
 23.1785 -console driver is attached. The following options are supported:
 23.1786 -\begin{center}
 23.1787 -\begin{tabular}{l}
 23.1788 -`xencons=off': disable virtual console \\ 
 23.1789 -`xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\
 23.1790 -`xencons=ttyS': attach console to /dev/ttyS0
 23.1791 -\end{tabular}
 23.1792 -\end{center}
 23.1793 -The default is ttyS for dom0 and tty for all other domains.
 23.1794 -\end{description} 
 23.1795 -
 23.1796 -
 23.1797 -
 23.1798 -\section{Debugging}
 23.1799 -\label{s:keys} 
 23.1800 -
 23.1801 -Xen has a set of debugging features that can be useful to try and
 23.1802 -figure out what's going on. Hit 'h' on the serial line (if you
 23.1803 -specified a baud rate on the Xen command line) or ScrollLock-h on the
 23.1804 -keyboard to get a list of supported commands.
 23.1805 -
 23.1806 -If you have a crash you'll likely get a crash dump containing an EIP
 23.1807 -(PC) which, along with an \path{objdump -d image}, can be useful in
 23.1808 -figuring out what's happened.  Debug a Xenlinux image just as you
 23.1809 -would any other Linux kernel.
 23.1810 -
 23.1811 -%% We supply a handy debug terminal program which you can find in
 23.1812 -%% \path{/usr/local/src/xen-2.0.bk/tools/misc/miniterm/}
 23.1813 -%% This should be built and executed on another machine that is connected
 23.1814 -%% via a null modem cable. Documentation is included.
 23.1815 -%% Alternatively, if the Xen machine is connected to a serial-port server
 23.1816 -%% then we supply a dumb TCP terminal client, {\tt xencons}.
 23.1817 -
 23.1818 -
 23.1819 +%% Chapter Build, Boot and Debug Options moved to build.tex
 23.1820 +\include{src/user/build}
 23.1821  
 23.1822  
 23.1823  \chapter{Further Support}
 23.1824 @@ -1875,6 +108,7 @@ directory of the Xen source distribution
 23.1825  %Various HOWTOs are available in \path{docs/HOWTOS} but this content is
 23.1826  %being integrated into this manual.
 23.1827  
 23.1828 +
 23.1829  \section{Online References}
 23.1830  
 23.1831  The official Xen web site is found at:
 23.1832 @@ -1885,6 +119,7 @@ The official Xen web site is found at:
 23.1833  This contains links to the latest versions of all on-line 
 23.1834  documentation (including the lateset version of the FAQ). 
 23.1835  
 23.1836 +
 23.1837  \section{Mailing Lists}
 23.1838  
 23.1839  There are currently four official Xen mailing lists:
 23.1840 @@ -1905,326 +140,18 @@ from the unstable and 2.0 trees - develo
 23.1841  \end{description}
 23.1842  
 23.1843  
 23.1844 +
 23.1845  \appendix
 23.1846  
 23.1847 +%% Chapter Installing Xen / XenLinux on Debian moved to debian.tex
 23.1848 +\include{src/user/debian}
 23.1849 +
 23.1850 +%% Chapter Installing Xen on Red Hat moved to redhat.tex
 23.1851 +\include{src/user/redhat}
 23.1852 +
 23.1853  
 23.1854 -\chapter{Installing Xen / XenLinux on Debian}
 23.1855 -
 23.1856 -The Debian project provides a tool called \path{debootstrap} which
 23.1857 -allows a base Debian system to be installed into a filesystem without
 23.1858 -requiring the host system to have any Debian-specific software (such
 23.1859 -as \path{apt}. 
 23.1860 -
 23.1861 -Here's some info how to install Debian 3.1 (Sarge) for an unprivileged
 23.1862 -Xen domain:
 23.1863 -
 23.1864 -\begin{enumerate}
 23.1865 -\item Set up Xen 2.0 and test that it's working, as described earlier in
 23.1866 -      this manual.
 23.1867 -
 23.1868 -\item Create disk images for root-fs and swap (alternatively, you
 23.1869 -      might create dedicated partitions, LVM logical volumes, etc. if
 23.1870 -      that suits your setup).
 23.1871 -\begin{small}\begin{verbatim}  
 23.1872 -dd if=/dev/zero of=/path/diskimage bs=1024k count=size_in_mbytes
 23.1873 -dd if=/dev/zero of=/path/swapimage bs=1024k count=size_in_mbytes
 23.1874 -\end{verbatim}\end{small}
 23.1875 -      If you're going to use this filesystem / disk image only as a
 23.1876 -      `template' for other vm disk images, something like 300 MB should
 23.1877 -      be enough.. (of course it depends what kind of packages you are
 23.1878 -      planning to install to the template)
 23.1879 -
 23.1880 -\item Create the filesystem and initialise the swap image
 23.1881 -\begin{small}\begin{verbatim}
 23.1882 -mkfs.ext3 /path/diskimage
 23.1883 -mkswap /path/swapimage
 23.1884 -\end{verbatim}\end{small}
 23.1885 -
 23.1886 -\item Mount the disk image for installation
 23.1887 -\begin{small}\begin{verbatim}
 23.1888 -mount -o loop /path/diskimage /mnt/disk
 23.1889 -\end{verbatim}\end{small}
 23.1890 -
 23.1891 -\item Install \path{debootstrap}
 23.1892 -
 23.1893 -Make sure you have debootstrap installed on the host.  If you are
 23.1894 -running Debian sarge (3.1 / testing) or unstable you can install it by
 23.1895 -running \path{apt-get install debootstrap}.  Otherwise, it can be
 23.1896 -downloaded from the Debian project website.
 23.1897 -
 23.1898 -\item Install Debian base to the disk image:
 23.1899 -\begin{small}\begin{verbatim}
 23.1900 -debootstrap --arch i386 sarge /mnt/disk  \
 23.1901 -            http://ftp.<countrycode>.debian.org/debian
 23.1902 -\end{verbatim}\end{small}
 23.1903 -
 23.1904 -You can use any other Debian http/ftp mirror you want.
 23.1905 -
 23.1906 -\item When debootstrap completes successfully, modify settings:
 23.1907 -\begin{small}\begin{verbatim}
 23.1908 -chroot /mnt/disk /bin/bash
 23.1909 -\end{verbatim}\end{small}
 23.1910 -
 23.1911 -Edit the following files using vi or nano and make needed changes:
 23.1912 -\begin{small}\begin{verbatim}
 23.1913 -/etc/hostname
 23.1914 -/etc/hosts
 23.1915 -/etc/resolv.conf
 23.1916 -/etc/network/interfaces
 23.1917 -/etc/networks
 23.1918 -\end{verbatim}\end{small}
 23.1919 -
 23.1920 -Set up access to the services, edit:
 23.1921 -\begin{small}\begin{verbatim}
 23.1922 -/etc/hosts.deny
 23.1923 -/etc/hosts.allow
 23.1924 -/etc/inetd.conf
 23.1925 -\end{verbatim}\end{small}
 23.1926 -
 23.1927 -Add Debian mirror to:   
 23.1928 -\begin{small}\begin{verbatim}
 23.1929 -/etc/apt/sources.list
 23.1930 -\end{verbatim}\end{small}
 23.1931 -
 23.1932 -Create fstab like this:
 23.1933 -\begin{small}\begin{verbatim}
 23.1934 -/dev/sda1       /       ext3    errors=remount-ro       0       1
 23.1935 -/dev/sda2       none    swap    sw                      0       0
 23.1936 -proc            /proc   proc    defaults                0       0
 23.1937 -\end{verbatim}\end{small}
 23.1938 -
 23.1939 -Logout
 23.1940 -
 23.1941 -\item      Unmount the disk image
 23.1942 -\begin{small}\begin{verbatim}
 23.1943 -umount /mnt/disk
 23.1944 -\end{verbatim}\end{small}
 23.1945 -
 23.1946 -\item Create Xen 2.0 configuration file for the new domain. You can
 23.1947 -        use the example-configurations coming with Xen as a template.
 23.1948 -
 23.1949 -        Make sure you have the following set up:
 23.1950 -\begin{small}\begin{verbatim}
 23.1951 -disk = [ 'file:/path/diskimage,sda1,w', 'file:/path/swapimage,sda2,w' ]
 23.1952 -root = "/dev/sda1 ro"
 23.1953 -\end{verbatim}\end{small}
 23.1954 -
 23.1955 -\item Start the new domain
 23.1956 -\begin{small}\begin{verbatim}
 23.1957 -xm create -f domain_config_file
 23.1958 -\end{verbatim}\end{small}
 23.1959 -
 23.1960 -Check that the new domain is running:
 23.1961 -\begin{small}\begin{verbatim}
 23.1962 -xm list
 23.1963 -\end{verbatim}\end{small}
 23.1964 -
 23.1965 -\item   Attach to the console of the new domain.
 23.1966 -        You should see something like this when starting the new domain:
 23.1967 -
 23.1968 -\begin{small}\begin{verbatim}
 23.1969 -Started domain testdomain2, console on port 9626
 23.1970 -\end{verbatim}\end{small}
 23.1971 -        
 23.1972 -        There you can see the ID of the console: 26. You can also list
 23.1973 -        the consoles with \path{xm consoles} (ID is the last two
 23.1974 -        digits of the port number.)
 23.1975 -
 23.1976 -        Attach to the console:
 23.1977 -
 23.1978 -\begin{small}\begin{verbatim}
 23.1979 -xm console 26
 23.1980 -\end{verbatim}\end{small}
 23.1981 -
 23.1982 -        or by telnetting to the port 9626 of localhost (the xm console
 23.1983 -        program works better).
 23.1984 -
 23.1985 -\item   Log in and run base-config
 23.1986 -
 23.1987 -        As a default there's no password for the root.
 23.1988 -
 23.1989 -        Check that everything looks OK, and the system started without
 23.1990 -        errors.  Check that the swap is active, and the network settings are
 23.1991 -        correct.
 23.1992 -
 23.1993 -        Run \path{/usr/sbin/base-config} to set up the Debian settings.
 23.1994 -
 23.1995 -        Set up the password for root using passwd.
 23.1996 -
 23.1997 -\item     Done. You can exit the console by pressing \path{Ctrl + ]}
 23.1998 -
 23.1999 -\end{enumerate}
 23.2000 -
 23.2001 -If you need to create new domains, you can just copy the contents of
 23.2002 -the `template'-image to the new disk images, either by mounting the
 23.2003 -template and the new image, and using \path{cp -a} or \path{tar} or by
 23.2004 -simply copying the image file.  Once this is done, modify the
 23.2005 -image-specific settings (hostname, network settings, etc).
 23.2006 -
 23.2007 -\chapter{Installing Xen / XenLinux on Redhat or Fedora Core}
 23.2008 -
 23.2009 -When using Xen / XenLinux on a standard Linux distribution there are
 23.2010 -a couple of things to watch out for:
 23.2011 -
 23.2012 -Note that, because domains>0 don't have any privileged access at all,
 23.2013 -certain commands in the default boot sequence will fail e.g. attempts
 23.2014 -to update the hwclock, change the console font, update the keytable
 23.2015 -map, start apmd (power management), or gpm (mouse cursor).  Either
 23.2016 -ignore the errors (they should be harmless), or remove them from the
 23.2017 -startup scripts.  Deleting the following links are a good start:
 23.2018 -{\path{S24pcmcia}}, {\path{S09isdn}},
 23.2019 -{\path{S17keytable}}, {\path{S26apmd}},
 23.2020 -{\path{S85gpm}}.
 23.2021 -
 23.2022 -If you want to use a single root file system that works cleanly for
 23.2023 -both domain 0 and unprivileged domains, a useful trick is to use
 23.2024 -different 'init' run levels. For example, use
 23.2025 -run level 3 for domain 0, and run level 4 for other domains. This
 23.2026 -enables different startup scripts to be run in depending on the run
 23.2027 -level number passed on the kernel command line.
 23.2028 -
 23.2029 -If using NFS root files systems mounted either from an
 23.2030 -external server or from domain0 there are a couple of other gotchas.
 23.2031 -The default {\path{/etc/sysconfig/iptables}} rules block NFS, so part
 23.2032 -way through the boot sequence things will suddenly go dead.
 23.2033 -
 23.2034 -If you're planning on having a separate NFS {\path{/usr}} partition, the
 23.2035 -RH9 boot scripts don't make life easy - they attempt to mount NFS file
 23.2036 -systems way to late in the boot process. The easiest way I found to do
 23.2037 -this was to have a {\path{/linuxrc}} script run ahead of
 23.2038 -{\path{/sbin/init}} that mounts {\path{/usr}}:
 23.2039 -
 23.2040 -\begin{quote}
 23.2041 -\begin{small}\begin{verbatim}
 23.2042 - #!/bin/bash
 23.2043 - /sbin/ipconfig lo 127.0.0.1
 23.2044 - /sbin/portmap
 23.2045 - /bin/mount /usr
 23.2046 - exec /sbin/init "$@" <>/dev/console 2>&1
 23.2047 -\end{verbatim}\end{small}
 23.2048 -\end{quote}
 23.2049 -
 23.2050 -%$ XXX SMH: font lock fix :-)  
 23.2051 -
 23.2052 -The one slight complication with the above is that
 23.2053 -{\path{/sbin/portmap}} is dynamically linked against
 23.2054 -{\path{/usr/lib/libwrap.so.0}} Since this is in
 23.2055 -{\path{/usr}}, it won't work. This can be solved by copying the
 23.2056 -file (and link) below the /usr mount point, and just let the file be
 23.2057 -'covered' when the mount happens.
 23.2058 -
 23.2059 -In some installations, where a shared read-only {\path{/usr}} is
 23.2060 -being used, it may be desirable to move other large directories over
 23.2061 -into the read-only {\path{/usr}}. For example, you might replace
 23.2062 -{\path{/bin}}, {\path{/lib}} and {\path{/sbin}} with
 23.2063 -links into {\path{/usr/root/bin}}, {\path{/usr/root/lib}}
 23.2064 -and {\path{/usr/root/sbin}} respectively. This creates other
 23.2065 -problems for running the {\path{/linuxrc}} script, requiring
 23.2066 -bash, portmap, mount, ifconfig, and a handful of other shared
 23.2067 -libraries to be copied below the mount point --- a simple
 23.2068 -statically-linked C program would solve this problem.
 23.2069 -
 23.2070 -
 23.2071 -
 23.2072 -
 23.2073 -\chapter{Glossary of Terms}
 23.2074 -
 23.2075 -\begin{description}
 23.2076 -\item[Atropos]             One of the CPU schedulers provided by Xen.
 23.2077 -                           Atropos provides domains with absolute shares
 23.2078 -                           of the CPU, with timeliness guarantees and a
 23.2079 -                           mechanism for sharing out `slack time'.
 23.2080 -
 23.2081 -\item[BVT]                 The BVT scheduler is used to give proportional
 23.2082 -                           fair shares of the CPU to domains.
 23.2083 -
 23.2084 -\item[Exokernel]           A minimal piece of privileged code, similar to
 23.2085 -                           a {\bf microkernel} but providing a more
 23.2086 -                           `hardware-like' interface to the tasks it
 23.2087 -                           manages.  This is similar to a paravirtualising
 23.2088 -                           VMM like {\bf Xen} but was designed as a new
 23.2089 -                           operating system structure, rather than
 23.2090 -                           specifically to run multiple conventional OSs.
 23.2091 -
 23.2092 -\item[Domain]              A domain is the execution context that
 23.2093 -                           contains a running {\bf virtual machine}.
 23.2094 -                           The relationship between virtual machines
 23.2095 -                           and domains on Xen is similar to that between
 23.2096 -                           programs and processes in an operating
 23.2097 -                           system: a virtual machine is a persistent
 23.2098 -                           entity that resides on disk (somewhat like
 23.2099 -                           a program).  When it is loaded for execution,
 23.2100 -                           it runs in a domain.  Each domain has a
 23.2101 -                           {\bf domain ID}.
 23.2102 -
 23.2103 -\item[Domain 0]            The first domain to be started on a Xen
 23.2104 -                           machine.  Domain 0 is responsible for managing
 23.2105 -                           the system.
 23.2106 -
 23.2107 -\item[Domain ID]           A unique identifier for a {\bf domain},
 23.2108 -                           analogous to a process ID in an operating
 23.2109 -                           system.
 23.2110 -
 23.2111 -\item[Full virtualisation] An approach to virtualisation which
 23.2112 -                           requires no modifications to the hosted
 23.2113 -                           operating system, providing the illusion of
 23.2114 -                           a complete system of real hardware devices.
 23.2115 -
 23.2116 -\item[Hypervisor]          An alternative term for {\bf VMM}, used
 23.2117 -                           because it means `beyond supervisor',
 23.2118 -                           since it is responsible for managing multiple
 23.2119 -                           `supervisor' kernels.
 23.2120 -
 23.2121 -\item[Live migration]      A technique for moving a running virtual
 23.2122 -                           machine to another physical host, without
 23.2123 -                           stopping it or the services running on it.
 23.2124 -
 23.2125 -\item[Microkernel]         A small base of code running at the highest
 23.2126 -                           hardware privilege level.  A microkernel is
 23.2127 -                           responsible for sharing CPU and memory (and
 23.2128 -                           sometimes other devices) between less
 23.2129 -                           privileged tasks running on the system.
 23.2130 -                           This is similar to a VMM, particularly a
 23.2131 -                           {\bf paravirtualising} VMM but typically
 23.2132 -                           addressing a different problem space and
 23.2133 -                           providing different kind of interface.
 23.2134 -
 23.2135 -\item[NetBSD/Xen]          A port of NetBSD to the Xen architecture.
 23.2136 -
 23.2137 -\item[Paravirtualisation]  An approach to virtualisation which requires
 23.2138 -                           modifications to the operating system in
 23.2139 -                           order to run in a virtual machine.  Xen
 23.2140 -                           uses paravirtualisation but preserves
 23.2141 -                           binary compatibility for user space
 23.2142 -                           applications.
 23.2143 -
 23.2144 -\item[Shadow pagetables]   A technique for hiding the layout of machine
 23.2145 -                           memory from a virtual machine's operating
 23.2146 -                           system.  Used in some {\bf VMMs} to provide
 23.2147 -                           the illusion of contiguous physical memory,
 23.2148 -                           in Xen this is used during
 23.2149 -                           {\bf live migration}.
 23.2150 -
 23.2151 -\item[Virtual Machine]     The environment in which a hosted operating
 23.2152 -                           system runs, providing the abstraction of a
 23.2153 -                           dedicated machine.  A virtual machine may
 23.2154 -                           be identical to the underlying hardware (as
 23.2155 -                           in {\bf full virtualisation}, or it may
 23.2156 -                           differ, as in {\bf paravirtualisation}.
 23.2157 -
 23.2158 -\item[VMM]                 Virtual Machine Monitor - the software that
 23.2159 -                           allows multiple virtual machines to be
 23.2160 -                           multiplexed on a single physical machine.
 23.2161 -
 23.2162 -\item[Xen]                 Xen is a paravirtualising virtual machine
 23.2163 -                           monitor, developed primarily by the
 23.2164 -                           Systems Research Group at the University
 23.2165 -                           of Cambridge Computer Laboratory.
 23.2166 -
 23.2167 -\item[XenLinux]            Official name for the port of the Linux kernel
 23.2168 -                           that runs on Xen.
 23.2169 -
 23.2170 -\end{description}
 23.2171 +%% Chapter Glossary of Terms moved to glossary.tex
 23.2172 +\include{src/user/glossary}
 23.2173  
 23.2174  
 23.2175  \end{document}
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/docs/src/user/build.tex	Thu Sep 22 11:42:01 2005 -0600
    24.3 @@ -0,0 +1,170 @@
    24.4 +\chapter{Build, Boot and Debug Options} 
    24.5 +
    24.6 +This chapter describes the build- and boot-time options which may be
    24.7 +used to tailor your Xen system.
    24.8 +
    24.9 +
   24.10 +\section{Xen Build Options}
   24.11 +
   24.12 +Xen provides a number of build-time options which should be set as
   24.13 +environment variables or passed on make's command-line.
   24.14 +
   24.15 +\begin{description}
   24.16 +\item[verbose=y] Enable debugging messages when Xen detects an
   24.17 +  unexpected condition.  Also enables console output from all domains.
   24.18 +\item[debug=y] Enable debug assertions.  Implies {\bf verbose=y}.
   24.19 +  (Primarily useful for tracing bugs in Xen).
   24.20 +\item[debugger=y] Enable the in-Xen debugger. This can be used to
   24.21 +  debug Xen, guest OSes, and applications.
   24.22 +\item[perfc=y] Enable performance counters for significant events
   24.23 +  within Xen. The counts can be reset or displayed on Xen's console
   24.24 +  via console control keys.
   24.25 +\item[trace=y] Enable per-cpu trace buffers which log a range of
   24.26 +  events within Xen for collection by control software.
   24.27 +\end{description}
   24.28 +
   24.29 +
   24.30 +\section{Xen Boot Options}
   24.31 +\label{s:xboot}
   24.32 +
   24.33 +These options are used to configure Xen's behaviour at runtime.  They
   24.34 +should be appended to Xen's command line, either manually or by
   24.35 +editing \path{grub.conf}.
   24.36 +
   24.37 +\begin{description}
   24.38 +\item [ noreboot ] Don't reboot the machine automatically on errors.
   24.39 +  This is useful to catch debug output if you aren't catching console
   24.40 +  messages via the serial line.
   24.41 +\item [ nosmp ] Disable SMP support.  This option is implied by
   24.42 +  `ignorebiostables'.
   24.43 +\item [ watchdog ] Enable NMI watchdog which can report certain
   24.44 +  failures.
   24.45 +\item [ noirqbalance ] Disable software IRQ balancing and affinity.
   24.46 +  This can be used on systems such as Dell 1850/2850 that have
   24.47 +  workarounds in hardware for IRQ-routing issues.
   24.48 +\item [ badpage=$<$page number$>$,$<$page number$>$, \ldots ] Specify
   24.49 +  a list of pages not to be allocated for use because they contain bad
   24.50 +  bytes. For example, if your memory tester says that byte 0x12345678
   24.51 +  is bad, you would place `badpage=0x12345' on Xen's command line.
   24.52 +\item [ com1=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$
   24.53 +  com2=$<$baud$>$,DPS,$<$io\_base$>$,$<$irq$>$ ] \mbox{}\\
   24.54 +  Xen supports up to two 16550-compatible serial ports.  For example:
   24.55 +  `com1=9600, 8n1, 0x408, 5' maps COM1 to a 9600-baud port, 8 data
   24.56 +  bits, no parity, 1 stop bit, I/O port base 0x408, IRQ 5.  If some
   24.57 +  configuration options are standard (e.g., I/O base and IRQ), then
   24.58 +  only a prefix of the full configuration string need be specified. If
   24.59 +  the baud rate is pre-configured (e.g., by the bootloader) then you
   24.60 +  can specify `auto' in place of a numeric baud rate.
   24.61 +\item [ console=$<$specifier list$>$ ] Specify the destination for Xen
   24.62 +  console I/O.  This is a comma-separated list of, for example:
   24.63 +  \begin{description}
   24.64 +  \item[ vga ] Use VGA console and allow keyboard input.
   24.65 +  \item[ com1 ] Use serial port com1.
   24.66 +  \item[ com2H ] Use serial port com2. Transmitted chars will have the
   24.67 +    MSB set. Received chars must have MSB set.
   24.68 +  \item[ com2L] Use serial port com2. Transmitted chars will have the
   24.69 +    MSB cleared. Received chars must have MSB cleared.
   24.70 +  \end{description}
   24.71 +  The latter two examples allow a single port to be shared by two
   24.72 +  subsystems (e.g.\ console and debugger). Sharing is controlled by
   24.73 +  MSB of each transmitted/received character.  [NB. Default for this
   24.74 +  option is `com1,vga']
   24.75 +\item [ sync\_console ] Force synchronous console output. This is
   24.76 +  useful if you system fails unexpectedly before it has sent all
   24.77 +  available output to the console. In most cases Xen will
   24.78 +  automatically enter synchronous mode when an exceptional event
   24.79 +  occurs, but this option provides a manual fallback.
   24.80 +\item [ conswitch=$<$switch-char$><$auto-switch-char$>$ ] Specify how
   24.81 +  to switch serial-console input between Xen and DOM0. The required
   24.82 +  sequence is CTRL-$<$switch-char$>$ pressed three times. Specifying
   24.83 +  the backtick character disables switching.  The
   24.84 +  $<$auto-switch-char$>$ specifies whether Xen should auto-switch
   24.85 +  input to DOM0 when it boots --- if it is `x' then auto-switching is
   24.86 +  disabled.  Any other value, or omitting the character, enables
   24.87 +  auto-switching.  [NB. Default switch-char is `a'.]
   24.88 +\item [ nmi=xxx ]
   24.89 +  Specify what to do with an NMI parity or I/O error. \\
   24.90 +  `nmi=fatal':  Xen prints a diagnostic and then hangs. \\
   24.91 +  `nmi=dom0':   Inform DOM0 of the NMI. \\
   24.92 +  `nmi=ignore': Ignore the NMI.
   24.93 +\item [ mem=xxx ] Set the physical RAM address limit. Any RAM
   24.94 +  appearing beyond this physical address in the memory map will be
   24.95 +  ignored. This parameter may be specified with a B, K, M or G suffix,
   24.96 +  representing bytes, kilobytes, megabytes and gigabytes respectively.
   24.97 +  The default unit, if no suffix is specified, is kilobytes.
   24.98 +\item [ dom0\_mem=xxx ] Set the amount of memory to be allocated to
   24.99 +  domain0. In Xen 3.x the parameter may be specified with a B, K, M or
  24.100 +  G suffix, representing bytes, kilobytes, megabytes and gigabytes
  24.101 +  respectively; if no suffix is specified, the parameter defaults to
  24.102 +  kilobytes. In previous versions of Xen, suffixes were not supported
  24.103 +  and the value is always interpreted as kilobytes.
  24.104 +\item [ tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in
  24.105 +  pages (default 1).  Note that the trace buffers are only enabled in
  24.106 +  debug builds.  Most users can ignore this feature completely.
  24.107 +\item [ sched=xxx ] Select the CPU scheduler Xen should use.  The
  24.108 +  current possibilities are `bvt' (default), `atropos' and `rrobin'.
  24.109 +  For more information see Section~\ref{s:sched}.
  24.110 +\item [ apic\_verbosity=debug,verbose ] Print more detailed
  24.111 +  information about local APIC and IOAPIC configuration.
  24.112 +\item [ lapic ] Force use of local APIC even when left disabled by
  24.113 +  uniprocessor BIOS.
  24.114 +\item [ nolapic ] Ignore local APIC in a uniprocessor system, even if
  24.115 +  enabled by the BIOS.
  24.116 +\item [ apic=bigsmp,default,es7000,summit ] Specify NUMA platform.
  24.117 +  This can usually be probed automatically.
  24.118 +\end{description}
  24.119 +
  24.120 +In addition, the following options may be specified on the Xen command
  24.121 +line. Since domain 0 shares responsibility for booting the platform,
  24.122 +Xen will automatically propagate these options to its command line.
  24.123 +These options are taken from Linux's command-line syntax with
  24.124 +unchanged semantics.
  24.125 +
  24.126 +\begin{description}
  24.127 +\item [ acpi=off,force,strict,ht,noirq,\ldots ] Modify how Xen (and
  24.128 +  domain 0) parses the BIOS ACPI tables.
  24.129 +\item [ acpi\_skip\_timer\_override ] Instruct Xen (and domain~0) to
  24.130 +  ignore timer-interrupt override instructions specified by the BIOS
  24.131 +  ACPI tables.
  24.132 +\item [ noapic ] Instruct Xen (and domain~0) to ignore any IOAPICs
  24.133 +  that are present in the system, and instead continue to use the
  24.134 +  legacy PIC.
  24.135 +\end{description} 
  24.136 +
  24.137 +
  24.138 +\section{XenLinux Boot Options}
  24.139 +
  24.140 +In addition to the standard Linux kernel boot options, we support:
  24.141 +\begin{description}
  24.142 +\item[ xencons=xxx ] Specify the device node to which the Xen virtual
  24.143 +  console driver is attached. The following options are supported:
  24.144 +  \begin{center}
  24.145 +    \begin{tabular}{l}
  24.146 +      `xencons=off': disable virtual console \\
  24.147 +      `xencons=tty': attach console to /dev/tty1 (tty0 at boot-time) \\
  24.148 +      `xencons=ttyS': attach console to /dev/ttyS0
  24.149 +    \end{tabular}
  24.150 +\end{center}
  24.151 +The default is ttyS for dom0 and tty for all other domains.
  24.152 +\end{description}
  24.153 +
  24.154 +
  24.155 +\section{Debugging}
  24.156 +\label{s:keys}
  24.157 +
  24.158 +Xen has a set of debugging features that can be useful to try and
  24.159 +figure out what's going on. Hit `h' on the serial line (if you
  24.160 +specified a baud rate on the Xen command line) or ScrollLock-h on the
  24.161 +keyboard to get a list of supported commands.
  24.162 +
  24.163 +If you have a crash you'll likely get a crash dump containing an EIP
  24.164 +(PC) which, along with an \path{objdump -d image}, can be useful in
  24.165 +figuring out what's happened.  Debug a Xenlinux image just as you
  24.166 +would any other Linux kernel.
  24.167 +
  24.168 +%% We supply a handy debug terminal program which you can find in
  24.169 +%% \path{/usr/local/src/xen-2.0.bk/tools/misc/miniterm/} This should
  24.170 +%% be built and executed on another machine that is connected via a
  24.171 +%% null modem cable. Documentation is included.  Alternatively, if the
  24.172 +%% Xen machine is connected to a serial-port server then we supply a
  24.173 +%% dumb TCP terminal client, {\tt xencons}.
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/docs/src/user/control_software.tex	Thu Sep 22 11:42:01 2005 -0600
    25.3 @@ -0,0 +1,115 @@
    25.4 +\chapter{Control Software} 
    25.5 +
    25.6 +The Xen control software includes the \xend\ node control daemon
    25.7 +(which must be running), the xm command line tools, and the prototype
    25.8 +xensv web interface.
    25.9 +
   25.10 +\section{\Xend\ (node control daemon)}
   25.11 +\label{s:xend}
   25.12 +
   25.13 +The Xen Daemon (\Xend) performs system management functions related to
   25.14 +virtual machines.  It forms a central point of control for a machine
   25.15 +and can be controlled using an HTTP-based protocol.  \Xend\ must be
   25.16 +running in order to start and manage virtual machines.
   25.17 +
   25.18 +\Xend\ must be run as root because it needs access to privileged
   25.19 +system management functions.  A small set of commands may be issued on
   25.20 +the \xend\ command line:
   25.21 +
   25.22 +\begin{tabular}{ll}
   25.23 +  \verb!# xend start! & start \xend, if not already running \\
   25.24 +  \verb!# xend stop!  & stop \xend\ if already running       \\
   25.25 +  \verb!# xend restart! & restart \xend\ if running, otherwise start it \\
   25.26 +  % \verb!# xend trace_start! & start \xend, with very detailed debug logging \\
   25.27 +  \verb!# xend status! & indicates \xend\ status by its return code
   25.28 +\end{tabular}
   25.29 +
   25.30 +A SysV init script called {\tt xend} is provided to start \xend\ at
   25.31 +boot time.  {\tt make install} installs this script in
   25.32 +\path{/etc/init.d}.  To enable it, you have to make symbolic links in
   25.33 +the appropriate runlevel directories or use the {\tt chkconfig} tool,
   25.34 +where available.
   25.35 +
   25.36 +Once \xend\ is running, more sophisticated administration can be done
   25.37 +using the xm tool (see Section~\ref{s:xm}) and the experimental Xensv
   25.38 +web interface (see Section~\ref{s:xensv}).
   25.39 +
   25.40 +As \xend\ runs, events will be logged to \path{/var/log/xend.log} and,
   25.41 +if the migration assistant daemon (\path{xfrd}) has been started,
   25.42 +\path{/var/log/xfrd.log}. These may be of use for troubleshooting
   25.43 +problems.
   25.44 +
   25.45 +\section{Xm (command line interface)}
   25.46 +\label{s:xm}
   25.47 +
   25.48 +The xm tool is the primary tool for managing Xen from the console.
   25.49 +The general format of an xm command line is:
   25.50 +
   25.51 +\begin{verbatim}
   25.52 +# xm command [switches] [arguments] [variables]
   25.53 +\end{verbatim}
   25.54 +
   25.55 +The available \emph{switches} and \emph{arguments} are dependent on
   25.56 +the \emph{command} chosen.  The \emph{variables} may be set using
   25.57 +declarations of the form {\tt variable=value} and command line
   25.58 +declarations override any of the values in the configuration file
   25.59 +being used, including the standard variables described above and any
   25.60 +custom variables (for instance, the \path{xmdefconfig} file uses a
   25.61 +{\tt vmid} variable).
   25.62 +
   25.63 +The available commands are as follows:
   25.64 +
   25.65 +\begin{description}
   25.66 +\item[set-mem] Request a domain to adjust its memory footprint.
   25.67 +\item[create] Create a new domain.
   25.68 +\item[destroy] Kill a domain immediately.
   25.69 +\item[list] List running domains.
   25.70 +\item[shutdown] Ask a domain to shutdown.
   25.71 +\item[dmesg] Fetch the Xen (not Linux!) boot output.
   25.72 +\item[consoles] Lists the available consoles.
   25.73 +\item[console] Connect to the console for a domain.
   25.74 +\item[help] Get help on xm commands.
   25.75 +\item[save] Suspend a domain to disk.
   25.76 +\item[restore] Restore a domain from disk.
   25.77 +\item[pause] Pause a domain's execution.
   25.78 +\item[unpause] Un-pause a domain.
   25.79 +\item[pincpu] Pin a domain to a CPU.
   25.80 +\item[bvt] Set BVT scheduler parameters for a domain.
   25.81 +\item[bvt\_ctxallow] Set the BVT context switching allowance for the
   25.82 +  system.
   25.83 +\item[atropos] Set the atropos parameters for a domain.
   25.84 +\item[rrobin] Set the round robin time slice for the system.
   25.85 +\item[info] Get information about the Xen host.
   25.86 +\item[call] Call a \xend\ HTTP API function directly.
   25.87 +\end{description}
   25.88 +
   25.89 +For a detailed overview of switches, arguments and variables to each
   25.90 +command try
   25.91 +\begin{quote}
   25.92 +\begin{verbatim}
   25.93 +# xm help command
   25.94 +\end{verbatim}
   25.95 +\end{quote}
   25.96 +
   25.97 +\section{Xensv (web control interface)}
   25.98 +\label{s:xensv}
   25.99 +
  25.100 +Xensv is the experimental web control interface for managing a Xen
  25.101 +machine.  It can be used to perform some (but not yet all) of the
  25.102 +management tasks that can be done using the xm tool.
  25.103 +
  25.104 +It can be started using:
  25.105 +\begin{quote}
  25.106 +  \verb_# xensv start_
  25.107 +\end{quote}
  25.108 +and stopped using:
  25.109 +\begin{quote}
  25.110 +  \verb_# xensv stop_
  25.111 +\end{quote}
  25.112 +
  25.113 +By default, Xensv will serve out the web interface on port 8080.  This
  25.114 +can be changed by editing
  25.115 +\path{/usr/lib/python2.3/site-packages/xen/sv/params.py}.
  25.116 +
  25.117 +Once Xensv is running, the web interface can be used to create and
  25.118 +manage running domains.
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/docs/src/user/debian.tex	Thu Sep 22 11:42:01 2005 -0600
    26.3 @@ -0,0 +1,154 @@
    26.4 +\chapter{Installing Xen / XenLinux on Debian}
    26.5 +
    26.6 +The Debian project provides a tool called \path{debootstrap} which
    26.7 +allows a base Debian system to be installed into a filesystem without
    26.8 +requiring the host system to have any Debian-specific software (such
    26.9 +as \path{apt}).
   26.10 +
   26.11 +Here's some info how to install Debian 3.1 (Sarge) for an unprivileged
   26.12 +Xen domain:
   26.13 +
   26.14 +\begin{enumerate}
   26.15 +
   26.16 +\item Set up Xen and test that it's working, as described earlier in
   26.17 +  this manual.
   26.18 +
   26.19 +\item Create disk images for rootfs and swap. Alternatively, you might
   26.20 +  create dedicated partitions, LVM logical volumes, etc.\ if that
   26.21 +  suits your setup.
   26.22 +\begin{verbatim}
   26.23 +dd if=/dev/zero of=/path/diskimage bs=1024k count=size_in_mbytes
   26.24 +dd if=/dev/zero of=/path/swapimage bs=1024k count=size_in_mbytes
   26.25 +\end{verbatim}
   26.26 +
   26.27 +  If you're going to use this filesystem / disk image only as a
   26.28 +  `template' for other vm disk images, something like 300 MB should be
   26.29 +  enough. (of course it depends what kind of packages you are planning
   26.30 +  to install to the template)
   26.31 +
   26.32 +\item Create the filesystem and initialise the swap image
   26.33 +\begin{verbatim}
   26.34 +mkfs.ext3 /path/diskimage
   26.35 +mkswap /path/swapimage
   26.36 +\end{verbatim}
   26.37 +
   26.38 +\item Mount the disk image for installation
   26.39 +\begin{verbatim}
   26.40 +mount -o loop /path/diskimage /mnt/disk
   26.41 +\end{verbatim}
   26.42 +
   26.43 +\item Install \path{debootstrap}. Make sure you have debootstrap
   26.44 +  installed on the host.  If you are running Debian Sarge (3.1 /
   26.45 +  testing) or unstable you can install it by running \path{apt-get
   26.46 +    install debootstrap}.  Otherwise, it can be downloaded from the
   26.47 +  Debian project website.
   26.48 +
   26.49 +\item Install Debian base to the disk image:
   26.50 +\begin{verbatim}
   26.51 +debootstrap --arch i386 sarge /mnt/disk  \
   26.52 +            http://ftp.<countrycode>.debian.org/debian
   26.53 +\end{verbatim}
   26.54 +
   26.55 +  You can use any other Debian http/ftp mirror you want.
   26.56 +
   26.57 +\item When debootstrap completes successfully, modify settings:
   26.58 +\begin{verbatim}
   26.59 +chroot /mnt/disk /bin/bash
   26.60 +\end{verbatim}
   26.61 +
   26.62 +Edit the following files using vi or nano and make needed changes:
   26.63 +\begin{verbatim}
   26.64 +/etc/hostname
   26.65 +/etc/hosts
   26.66 +/etc/resolv.conf
   26.67 +/etc/network/interfaces
   26.68 +/etc/networks
   26.69 +\end{verbatim}
   26.70 +
   26.71 +Set up access to the services, edit:
   26.72 +\begin{verbatim}
   26.73 +/etc/hosts.deny
   26.74 +/etc/hosts.allow
   26.75 +/etc/inetd.conf
   26.76 +\end{verbatim}
   26.77 +
   26.78 +Add Debian mirror to:   
   26.79 +\begin{verbatim}
   26.80 +/etc/apt/sources.list
   26.81 +\end{verbatim}
   26.82 +
   26.83 +Create fstab like this:
   26.84 +\begin{verbatim}
   26.85 +/dev/sda1       /       ext3    errors=remount-ro       0       1
   26.86 +/dev/sda2       none    swap    sw                      0       0
   26.87 +proc            /proc   proc    defaults                0       0
   26.88 +\end{verbatim}
   26.89 +
   26.90 +Logout
   26.91 +
   26.92 +\item Unmount the disk image
   26.93 +\begin{verbatim}
   26.94 +umount /mnt/disk
   26.95 +\end{verbatim}
   26.96 +
   26.97 +\item Create Xen 2.0 configuration file for the new domain. You can
   26.98 +  use the example-configurations coming with Xen as a template.
   26.99 +
  26.100 +  Make sure you have the following set up:
  26.101 +\begin{verbatim}
  26.102 +disk = [ 'file:/path/diskimage,sda1,w', 'file:/path/swapimage,sda2,w' ]
  26.103 +root = "/dev/sda1 ro"
  26.104 +\end{verbatim}
  26.105 +
  26.106 +\item Start the new domain
  26.107 +\begin{verbatim}
  26.108 +xm create -f domain_config_file
  26.109 +\end{verbatim}
  26.110 +
  26.111 +Check that the new domain is running:
  26.112 +\begin{verbatim}
  26.113 +xm list
  26.114 +\end{verbatim}
  26.115 +
  26.116 +\item Attach to the console of the new domain.  You should see
  26.117 +  something like this when starting the new domain:
  26.118 +
  26.119 +\begin{verbatim}
  26.120 +Started domain testdomain2, console on port 9626
  26.121 +\end{verbatim}
  26.122 +        
  26.123 +  There you can see the ID of the console: 26. You can also list the
  26.124 +  consoles with \path{xm consoles} (ID is the last two digits of the
  26.125 +  port number.)
  26.126 +
  26.127 +  Attach to the console:
  26.128 +
  26.129 +\begin{verbatim}
  26.130 +xm console 26
  26.131 +\end{verbatim}
  26.132 +
  26.133 +  or by telnetting to the port 9626 of localhost (the xm console
  26.134 +  program works better).
  26.135 +
  26.136 +\item Log in and run base-config
  26.137 +
  26.138 +  As a default there's no password for the root.
  26.139 +
  26.140 +  Check that everything looks OK, and the system started without
  26.141 +  errors.  Check that the swap is active, and the network settings are
  26.142 +  correct.
  26.143 +
  26.144 +  Run \path{/usr/sbin/base-config} to set up the Debian settings.
  26.145 +
  26.146 +  Set up the password for root using passwd.
  26.147 +
  26.148 +\item Done. You can exit the console by pressing {\path{Ctrl + ]}}
  26.149 +
  26.150 +\end{enumerate}
  26.151 +
  26.152 +
  26.153 +If you need to create new domains, you can just copy the contents of
  26.154 +the `template'-image to the new disk images, either by mounting the
  26.155 +template and the new image, and using \path{cp -a} or \path{tar} or by
  26.156 +simply copying the image file.  Once this is done, modify the
  26.157 +image-specific settings (hostname, network settings, etc).
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/docs/src/user/domain_configuration.tex	Thu Sep 22 11:42:01 2005 -0600
    27.3 @@ -0,0 +1,281 @@
    27.4 +\chapter{Domain Configuration}
    27.5 +\label{cha:config}
    27.6 +
    27.7 +The following contains the syntax of the domain configuration files
    27.8 +and description of how to further specify networking, driver domain
    27.9 +and general scheduling behavior.
   27.10 +
   27.11 +
   27.12 +\section{Configuration Files}
   27.13 +\label{s:cfiles}
   27.14 +
   27.15 +Xen configuration files contain the following standard variables.
   27.16 +Unless otherwise stated, configuration items should be enclosed in
   27.17 +quotes: see \path{/etc/xen/xmexample1} and \path{/etc/xen/xmexample2}
   27.18 +for concrete examples of the syntax.
   27.19 +
   27.20 +\begin{description}
   27.21 +\item[kernel] Path to the kernel image.
   27.22 +\item[ramdisk] Path to a ramdisk image (optional).
   27.23 +  % \item[builder] The name of the domain build function (e.g.
   27.24 +  %   {\tt'linux'} or {\tt'netbsd'}.
   27.25 +\item[memory] Memory size in megabytes.
   27.26 +\item[cpu] CPU to run this domain on, or {\tt -1} for auto-allocation.
   27.27 +\item[console] Port to export the domain console on (default 9600 +
   27.28 +  domain ID).
   27.29 +\item[nics] Number of virtual network interfaces.
   27.30 +\item[vif] List of MAC addresses (random addresses are assigned if not
   27.31 +  given) and bridges to use for the domain's network interfaces, e.g.\ 
   27.32 +\begin{verbatim}
   27.33 +vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0',
   27.34 +        'bridge=xen-br1' ]
   27.35 +\end{verbatim}
   27.36 +  to assign a MAC address and bridge to the first interface and assign
   27.37 +  a different bridge to the second interface, leaving \xend\ to choose
   27.38 +  the MAC address.
   27.39 +\item[disk] List of block devices to export to the domain, e.g.\ \\
   27.40 +  \verb_disk = [ 'phy:hda1,sda1,r' ]_ \\
   27.41 +  exports physical device \path{/dev/hda1} to the domain as
   27.42 +  \path{/dev/sda1} with read-only access. Exporting a disk read-write
   27.43 +  which is currently mounted is dangerous -- if you are \emph{certain}
   27.44 +  you wish to do this, you can specify \path{w!} as the mode.
   27.45 +\item[dhcp] Set to {\tt `dhcp'} if you want to use DHCP to configure
   27.46 +  networking.
   27.47 +\item[netmask] Manually configured IP netmask.
   27.48 +\item[gateway] Manually configured IP gateway.
   27.49 +\item[hostname] Set the hostname for the virtual machine.
   27.50 +\item[root] Specify the root device parameter on the kernel command
   27.51 +  line.
   27.52 +\item[nfs\_server] IP address for the NFS server (if any).
   27.53 +\item[nfs\_root] Path of the root filesystem on the NFS server (if
   27.54 +  any).
   27.55 +\item[extra] Extra string to append to the kernel command line (if
   27.56 +  any)
   27.57 +\item[restart] Three possible options:
   27.58 +  \begin{description}
   27.59 +  \item[always] Always restart the domain, no matter what its exit
   27.60 +    code is.
   27.61 +  \item[never] Never restart the domain.
   27.62 +  \item[onreboot] Restart the domain iff it requests reboot.
   27.63 +  \end{description}
   27.64 +\end{description}
   27.65 +
   27.66 +For additional flexibility, it is also possible to include Python
   27.67 +scripting commands in configuration files.  An example of this is the
   27.68 +\path{xmexample2} file, which uses Python code to handle the
   27.69 +\path{vmid} variable.
   27.70 +
   27.71 +
   27.72 +%\part{Advanced Topics}
   27.73 +
   27.74 +
   27.75 +\section{Network Configuration}
   27.76 +
   27.77 +For many users, the default installation should work ``out of the
   27.78 +box''.  More complicated network setups, for instance with multiple
   27.79 +Ethernet interfaces and/or existing bridging setups will require some
   27.80 +special configuration.
   27.81 +
   27.82 +The purpose of this section is to describe the mechanisms provided by
   27.83 +\xend\ to allow a flexible configuration for Xen's virtual networking.
   27.84 +
   27.85 +\subsection{Xen virtual network topology}
   27.86 +
   27.87 +Each domain network interface is connected to a virtual network
   27.88 +interface in dom0 by a point to point link (effectively a ``virtual
   27.89 +crossover cable'').  These devices are named {\tt
   27.90 +  vif$<$domid$>$.$<$vifid$>$} (e.g.\ {\tt vif1.0} for the first
   27.91 +interface in domain~1, {\tt vif3.1} for the second interface in
   27.92 +domain~3).
   27.93 +
   27.94 +Traffic on these virtual interfaces is handled in domain~0 using
   27.95 +standard Linux mechanisms for bridging, routing, rate limiting, etc.
   27.96 +Xend calls on two shell scripts to perform initial configuration of
   27.97 +the network and configuration of new virtual interfaces.  By default,
   27.98 +these scripts configure a single bridge for all the virtual
   27.99 +interfaces.  Arbitrary routing / bridging configurations can be
  27.100 +configured by customizing the scripts, as described in the following
  27.101 +section.
  27.102 +
  27.103 +\subsection{Xen networking scripts}
  27.104 +
  27.105 +Xen's virtual networking is configured by two shell scripts (by
  27.106 +default \path{network} and \path{vif-bridge}).  These are called
  27.107 +automatically by \xend\ when certain events occur, with arguments to
  27.108 +the scripts providing further contextual information.  These scripts
  27.109 +are found by default in \path{/etc/xen/scripts}.  The names and
  27.110 +locations of the scripts can be configured in
  27.111 +\path{/etc/xen/xend-config.sxp}.
  27.112 +
  27.113 +\begin{description}
  27.114 +\item[network:] This script is called whenever \xend\ is started or
  27.115 +  stopped to respectively initialize or tear down the Xen virtual
  27.116 +  network. In the default configuration initialization creates the
  27.117 +  bridge `xen-br0' and moves eth0 onto that bridge, modifying the
  27.118 +  routing accordingly. When \xend\ exits, it deletes the Xen bridge
  27.119 +  and removes eth0, restoring the normal IP and routing configuration.
  27.120 +
  27.121 +  %% In configurations where the bridge already exists, this script
  27.122 +  %% could be replaced with a link to \path{/bin/true} (for instance).
  27.123 +
  27.124 +\item[vif-bridge:] This script is called for every domain virtual
  27.125 +  interface and can configure firewalling rules and add the vif to the
  27.126 +  appropriate bridge. By default, this adds and removes VIFs on the
  27.127 +  default Xen bridge.
  27.128 +\end{description}
  27.129 +
  27.130 +For more complex network setups (e.g.\ where routing is required or
  27.131 +integrate with existing bridges) these scripts may be replaced with
  27.132 +customized variants for your site's preferred configuration.
  27.133 +
  27.134 +%% There are two possible types of privileges: IO privileges and
  27.135 +%% administration privileges.
  27.136 +
  27.137 +
  27.138 +\section{Driver Domain Configuration}
  27.139 +
  27.140 +I/O privileges can be assigned to allow a domain to directly access
  27.141 +PCI devices itself.  This is used to support driver domains.
  27.142 +
  27.143 +Setting back-end privileges is currently only supported in SXP format
  27.144 +config files.  To allow a domain to function as a back-end for others,
  27.145 +somewhere within the {\tt vm} element of its configuration file must
  27.146 +be a {\tt back-end} element of the form {\tt (back-end ({\em type}))}
  27.147 +where {\tt \em type} may be either {\tt netif} or {\tt blkif},
  27.148 +according to the type of virtual device this domain will service.
  27.149 +%% After this domain has been built, \xend will connect all new and
  27.150 +%% existing {\em virtual} devices (of the appropriate type) to that
  27.151 +%% back-end.
  27.152 +
  27.153 +Note that a block back-end cannot currently import virtual block
  27.154 +devices from other domains, and a network back-end cannot import
  27.155 +virtual network devices from other domains.  Thus (particularly in the
  27.156 +case of block back-ends, which cannot import a virtual block device as
  27.157 +their root filesystem), you may need to boot a back-end domain from a
  27.158 +ramdisk or a network device.
  27.159 +
  27.160 +Access to PCI devices may be configured on a per-device basis.  Xen
  27.161 +will assign the minimal set of hardware privileges to a domain that
  27.162 +are required to control its devices.  This can be configured in either
  27.163 +format of configuration file:
  27.164 +
  27.165 +\begin{itemize}
  27.166 +\item SXP Format: Include device elements of the form: \\
  27.167 +  \centerline{  {\tt (device (pci (bus {\em x}) (dev {\em y}) (func {\em z})))}} \\
  27.168 +  inside the top-level {\tt vm} element.  Each one specifies the
  27.169 +  address of a device this domain is allowed to access --- the numbers
  27.170 +  \emph{x},\emph{y} and \emph{z} may be in either decimal or
  27.171 +  hexadecimal format.
  27.172 +\item Flat Format: Include a list of PCI device addresses of the
  27.173 +  format: \\
  27.174 +  \centerline{{\tt pci = ['x,y,z', \ldots]}} \\
  27.175 +  where each element in the list is a string specifying the components
  27.176 +  of the PCI device address, separated by commas.  The components
  27.177 +  ({\tt \em x}, {\tt \em y} and {\tt \em z}) of the list may be
  27.178 +  formatted as either decimal or hexadecimal.
  27.179 +\end{itemize}
  27.180 +
  27.181 +%% \section{Administration Domains}
  27.182 +
  27.183 +%% Administration privileges allow a domain to use the `dom0
  27.184 +%% operations' (so called because they are usually available only to
  27.185 +%% domain 0).  A privileged domain can build other domains, set
  27.186 +%% scheduling parameters, etc.
  27.187 +
  27.188 +% Support for other administrative domains is not yet available...
  27.189 +% perhaps we should plumb it in some time
  27.190 +
  27.191 +
  27.192 +\section{Scheduler Configuration}
  27.193 +\label{s:sched}
  27.194 +
  27.195 +Xen offers a boot time choice between multiple schedulers.  To select
  27.196 +a scheduler, pass the boot parameter \emph{sched=sched\_name} to Xen,
  27.197 +substituting the appropriate scheduler name.  Details of the
  27.198 +schedulers and their parameters are included below; future versions of
  27.199 +the tools will provide a higher-level interface to these tools.
  27.200 +
  27.201 +It is expected that system administrators configure their system to
  27.202 +use the scheduler most appropriate to their needs.  Currently, the BVT
  27.203 +scheduler is the recommended choice.
  27.204 +
  27.205 +\subsection{Borrowed Virtual Time}
  27.206 +
  27.207 +{\tt sched=bvt} (the default) \\
  27.208 +
  27.209 +BVT provides proportional fair shares of the CPU time.  It has been
  27.210 +observed to penalize domains that block frequently (e.g.\ I/O
  27.211 +intensive domains), but this can be compensated for by using warping.
  27.212 +
  27.213 +\subsubsection{Global Parameters}
  27.214 +
  27.215 +\begin{description}
  27.216 +\item[ctx\_allow] The context switch allowance is similar to the
  27.217 +  ``quantum'' in traditional schedulers.  It is the minimum time that
  27.218 +  a scheduled domain will be allowed to run before being preempted.
  27.219 +\end{description}
  27.220 +
  27.221 +\subsubsection{Per-domain parameters}
  27.222 +
  27.223 +\begin{description}
  27.224 +\item[mcuadv] The MCU (Minimum Charging Unit) advance determines the
  27.225 +  proportional share of the CPU that a domain receives.  It is set
  27.226 +  inversely proportionally to a domain's sharing weight.
  27.227 +\item[warp] The amount of ``virtual time'' the domain is allowed to
  27.228 +  warp backwards.
  27.229 +\item[warpl] The warp limit is the maximum time a domain can run
  27.230 +  warped for.
  27.231 +\item[warpu] The unwarp requirement is the minimum time a domain must
  27.232 +  run unwarped for before it can warp again.
  27.233 +\end{description}
  27.234 +
  27.235 +\subsection{Atropos}
  27.236 +
  27.237 +{\tt sched=atropos} \\
  27.238 +
  27.239 +Atropos is a soft real time scheduler.  It provides guarantees about
  27.240 +absolute shares of the CPU, with a facility for sharing slack CPU time
  27.241 +on a best-effort basis. It can provide timeliness guarantees for
  27.242 +latency-sensitive domains.
  27.243 +
  27.244 +Every domain has an associated period and slice.  The domain should
  27.245 +receive `slice' nanoseconds every `period' nanoseconds.  This allows
  27.246 +the administrator to configure both the absolute share of the CPU a
  27.247 +domain receives and the frequency with which it is scheduled.
  27.248 +
  27.249 +%% When domains unblock, their period is reduced to the value of the
  27.250 +%% latency hint (the slice is scaled accordingly so that they still
  27.251 +%% get the same proportion of the CPU).  For each subsequent period,
  27.252 +%% the slice and period times are doubled until they reach their
  27.253 +%% original values.
  27.254 +
  27.255 +Note: don't over-commit the CPU when using Atropos (i.e.\ don't reserve
  27.256 +more CPU than is available --- the utilization should be kept to
  27.257 +slightly less than 100\% in order to ensure predictable behavior).
  27.258 +
  27.259 +\subsubsection{Per-domain parameters}
  27.260 +
  27.261 +\begin{description}
  27.262 +\item[period] The regular time interval during which a domain is
  27.263 +  guaranteed to receive its allocation of CPU time.
  27.264 +\item[slice] The length of time per period that a domain is guaranteed
  27.265 +  to run for (in the absence of voluntary yielding of the CPU).
  27.266 +\item[latency] The latency hint is used to control how soon after
  27.267 +  waking up a domain it should be scheduled.
  27.268 +\item[xtratime] This is a boolean flag that specifies whether a domain
  27.269 +  should be allowed a share of the system slack time.
  27.270 +\end{description}
  27.271 +
  27.272 +\subsection{Round Robin}
  27.273 +
  27.274 +{\tt sched=rrobin} \\
  27.275 +
  27.276 +The round robin scheduler is included as a simple demonstration of
  27.277 +Xen's internal scheduler API.  It is not intended for production use.
  27.278 +
  27.279 +\subsubsection{Global Parameters}
  27.280 +
  27.281 +\begin{description}
  27.282 +\item[rr\_slice] The maximum time each domain runs before the next
  27.283 +  scheduling decision is made.
  27.284 +\end{description}
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/docs/src/user/domain_filesystem.tex	Thu Sep 22 11:42:01 2005 -0600
    28.3 @@ -0,0 +1,243 @@
    28.4 +\chapter{Domain Filesystem Storage}
    28.5 +
    28.6 +It is possible to directly export any Linux block device in dom0 to
    28.7 +another domain, or to export filesystems / devices to virtual machines
    28.8 +using standard network protocols (e.g.\ NBD, iSCSI, NFS, etc.).  This
    28.9 +chapter covers some of the possibilities.
   28.10 +
   28.11 +
   28.12 +\section{Exporting Physical Devices as VBDs}
   28.13 +\label{s:exporting-physical-devices-as-vbds}
   28.14 +
   28.15 +One of the simplest configurations is to directly export individual
   28.16 +partitions from domain~0 to other domains. To achieve this use the
   28.17 +\path{phy:} specifier in your domain configuration file. For example a
   28.18 +line like
   28.19 +\begin{quote}
   28.20 +  \verb_disk = ['phy:hda3,sda1,w']_
   28.21 +\end{quote}
   28.22 +specifies that the partition \path{/dev/hda3} in domain~0 should be
   28.23 +exported read-write to the new domain as \path{/dev/sda1}; one could
   28.24 +equally well export it as \path{/dev/hda} or \path{/dev/sdb5} should
   28.25 +one wish.
   28.26 +
   28.27 +In addition to local disks and partitions, it is possible to export
   28.28 +any device that Linux considers to be ``a disk'' in the same manner.
   28.29 +For example, if you have iSCSI disks or GNBD volumes imported into
   28.30 +domain~0 you can export these to other domains using the \path{phy:}
   28.31 +disk syntax. E.g.:
   28.32 +\begin{quote}
   28.33 +  \verb_disk = ['phy:vg/lvm1,sda2,w']_
   28.34 +\end{quote}
   28.35 +
   28.36 +\begin{center}
   28.37 +  \framebox{\bf Warning: Block device sharing}
   28.38 +\end{center}
   28.39 +\begin{quote}
   28.40 +  Block devices should typically only be shared between domains in a
   28.41 +  read-only fashion otherwise the Linux kernel's file systems will get
   28.42 +  very confused as the file system structure may change underneath
   28.43 +  them (having the same ext3 partition mounted \path{rw} twice is a
   28.44 +  sure fire way to cause irreparable damage)!  \Xend\ will attempt to
   28.45 +  prevent you from doing this by checking that the device is not
   28.46 +  mounted read-write in domain~0, and hasn't already been exported
   28.47 +  read-write to another domain.  If you want read-write sharing,
   28.48 +  export the directory to other domains via NFS from domain~0 (or use
   28.49 +  a cluster file system such as GFS or ocfs2).
   28.50 +\end{quote}
   28.51 +
   28.52 +
   28.53 +\section{Using File-backed VBDs}
   28.54 +
   28.55 +It is also possible to use a file in Domain~0 as the primary storage
   28.56 +for a virtual machine.  As well as being convenient, this also has the
   28.57 +advantage that the virtual block device will be \emph{sparse} ---
   28.58 +space will only really be allocated as parts of the file are used.  So
   28.59 +if a virtual machine uses only half of its disk space then the file
   28.60 +really takes up half of the size allocated.
   28.61 +
   28.62 +For example, to create a 2GB sparse file-backed virtual block device
   28.63 +(actually only consumes 1KB of disk):
   28.64 +\begin{quote}
   28.65 +  \verb_# dd if=/dev/zero of=vm1disk bs=1k seek=2048k count=1_
   28.66 +\end{quote}
   28.67 +
   28.68 +Make a file system in the disk file:
   28.69 +\begin{quote}
   28.70 +  \verb_# mkfs -t ext3 vm1disk_
   28.71 +\end{quote}
   28.72 +
   28.73 +(when the tool asks for confirmation, answer `y')
   28.74 +
   28.75 +Populate the file system e.g.\ by copying from the current root:
   28.76 +\begin{quote}
   28.77 +\begin{verbatim}
   28.78 +# mount -o loop vm1disk /mnt
   28.79 +# cp -ax /{root,dev,var,etc,usr,bin,sbin,lib} /mnt
   28.80 +# mkdir /mnt/{proc,sys,home,tmp}
   28.81 +\end{verbatim}
   28.82 +\end{quote}
   28.83 +
   28.84 +Tailor the file system by editing \path{/etc/fstab},
   28.85 +\path{/etc/hostname}, etc.\ Don't forget to edit the files in the
   28.86 +mounted file system, instead of your domain~0 filesystem, e.g.\ you
   28.87 +would edit \path{/mnt/etc/fstab} instead of \path{/etc/fstab}.  For
   28.88 +this example put \path{/dev/sda1} to root in fstab.
   28.89 +
   28.90 +Now unmount (this is important!):
   28.91 +\begin{quote}
   28.92 +  \verb_# umount /mnt_
   28.93 +\end{quote}
   28.94 +
   28.95 +In the configuration file set:
   28.96 +\begin{quote}
   28.97 +  \verb_disk = ['file:/full/path/to/vm1disk,sda1,w']_
   28.98 +\end{quote}
   28.99 +
  28.100 +As the virtual machine writes to its `disk', the sparse file will be
  28.101 +filled in and consume more space up to the original 2GB.
  28.102 +
  28.103 +{\bf Note that file-backed VBDs may not be appropriate for backing
  28.104 +  I/O-intensive domains.}  File-backed VBDs are known to experience
  28.105 +substantial slowdowns under heavy I/O workloads, due to the I/O
  28.106 +handling by the loopback block device used to support file-backed VBDs
  28.107 +in dom0.  Better I/O performance can be achieved by using either
  28.108 +LVM-backed VBDs (Section~\ref{s:using-lvm-backed-vbds}) or physical
  28.109 +devices as VBDs (Section~\ref{s:exporting-physical-devices-as-vbds}).
  28.110 +
  28.111 +Linux supports a maximum of eight file-backed VBDs across all domains
  28.112 +by default.  This limit can be statically increased by using the
  28.113 +\emph{max\_loop} module parameter if CONFIG\_BLK\_DEV\_LOOP is
  28.114 +compiled as a module in the dom0 kernel, or by using the
  28.115 +\emph{max\_loop=n} boot option if CONFIG\_BLK\_DEV\_LOOP is compiled
  28.116 +directly into the dom0 kernel.
  28.117 +
  28.118 +
  28.119 +\section{Using LVM-backed VBDs}
  28.120 +\label{s:using-lvm-backed-vbds}
  28.121 +
  28.122 +A particularly appealing solution is to use LVM volumes as backing for
  28.123 +domain file-systems since this allows dynamic growing/shrinking of
  28.124 +volumes as well as snapshot and other features.
  28.125 +
  28.126 +To initialize a partition to support LVM volumes:
  28.127 +\begin{quote}
  28.128 +\begin{verbatim}
  28.129 +# pvcreate /dev/sda10           
  28.130 +\end{verbatim} 
  28.131 +\end{quote}
  28.132 +
  28.133 +Create a volume group named `vg' on the physical partition:
  28.134 +\begin{quote}
  28.135 +\begin{verbatim}
  28.136 +# vgcreate vg /dev/sda10
  28.137 +\end{verbatim} 
  28.138 +\end{quote}
  28.139 +
  28.140 +Create a logical volume of size 4GB named `myvmdisk1':
  28.141 +\begin{quote}
  28.142 +\begin{verbatim}
  28.143 +# lvcreate -L4096M -n myvmdisk1 vg
  28.144 +\end{verbatim}
  28.145 +\end{quote}
  28.146 +
  28.147 +You should now see that you have a \path{/dev/vg/myvmdisk1} Make a
  28.148 +filesystem, mount it and populate it, e.g.:
  28.149 +\begin{quote}
  28.150 +\begin{verbatim}
  28.151 +# mkfs -t ext3 /dev/vg/myvmdisk1
  28.152 +# mount /dev/vg/myvmdisk1 /mnt
  28.153 +# cp -ax / /mnt
  28.154 +# umount /mnt
  28.155 +\end{verbatim}
  28.156 +\end{quote}
  28.157 +
  28.158 +Now configure your VM with the following disk configuration:
  28.159 +\begin{quote}
  28.160 +\begin{verbatim}
  28.161 + disk = [ 'phy:vg/myvmdisk1,sda1,w' ]
  28.162 +\end{verbatim}
  28.163 +\end{quote}
  28.164 +
  28.165 +LVM enables you to grow the size of logical volumes, but you'll need
  28.166 +to resize the corresponding file system to make use of the new space.
  28.167 +Some file systems (e.g.\ ext3) now support online resize.  See the LVM
  28.168 +manuals for more details.
  28.169 +
  28.170 +You can also use LVM for creating copy-on-write (CoW) clones of LVM
  28.171 +volumes (known as writable persistent snapshots in LVM terminology).
  28.172 +This facility is new in Linux 2.6.8, so isn't as stable as one might
  28.173 +hope.  In particular, using lots of CoW LVM disks consumes a lot of
  28.174 +dom0 memory, and error conditions such as running out of disk space
  28.175 +are not handled well. Hopefully this will improve in future.
  28.176 +
  28.177 +To create two copy-on-write clone of the above file system you would
  28.178 +use the following commands:
  28.179 +
  28.180 +\begin{quote}
  28.181 +\begin{verbatim}
  28.182 +# lvcreate -s -L1024M -n myclonedisk1 /dev/vg/myvmdisk1
  28.183 +# lvcreate -s -L1024M -n myclonedisk2 /dev/vg/myvmdisk1
  28.184 +\end{verbatim}
  28.185 +\end{quote}
  28.186 +
  28.187 +Each of these can grow to have 1GB of differences from the master
  28.188 +volume. You can grow the amount of space for storing the differences
  28.189 +using the lvextend command, e.g.:
  28.190 +\begin{quote}
  28.191 +\begin{verbatim}
  28.192 +# lvextend +100M /dev/vg/myclonedisk1
  28.193 +\end{verbatim}
  28.194 +\end{quote}
  28.195 +
  28.196 +Don't let the `differences volume' ever fill up otherwise LVM gets
  28.197 +rather confused. It may be possible to automate the growing process by
  28.198 +using \path{dmsetup wait} to spot the volume getting full and then
  28.199 +issue an \path{lvextend}.
  28.200 +
  28.201 +In principle, it is possible to continue writing to the volume that
  28.202 +has been cloned (the changes will not be visible to the clones), but
  28.203 +we wouldn't recommend this: have the cloned volume as a `pristine'
  28.204 +file system install that isn't mounted directly by any of the virtual
  28.205 +machines.
  28.206 +
  28.207 +
  28.208 +\section{Using NFS Root}
  28.209 +
  28.210 +First, populate a root filesystem in a directory on the server
  28.211 +machine. This can be on a distinct physical machine, or simply run
  28.212 +within a virtual machine on the same node.
  28.213 +
  28.214 +Now configure the NFS server to export this filesystem over the
  28.215 +network by adding a line to \path{/etc/exports}, for instance:
  28.216 +
  28.217 +\begin{quote}
  28.218 +  \begin{small}
  28.219 +\begin{verbatim}
  28.220 +/export/vm1root      1.2.3.4/24 (rw,sync,no_root_squash)
  28.221 +\end{verbatim}
  28.222 +  \end{small}
  28.223 +\end{quote}
  28.224 +
  28.225 +Finally, configure the domain to use NFS root.  In addition to the
  28.226 +normal variables, you should make sure to set the following values in
  28.227 +the domain's configuration file:
  28.228 +
  28.229 +\begin{quote}
  28.230 +  \begin{small}
  28.231 +\begin{verbatim}
  28.232 +root       = '/dev/nfs'
  28.233 +nfs_server = '2.3.4.5'       # substitute IP address of server
  28.234 +nfs_root   = '/path/to/root' # path to root FS on the server
  28.235 +\end{verbatim}
  28.236 +  \end{small}
  28.237 +\end{quote}
  28.238 +
  28.239 +The domain will need network access at boot time, so either statically
  28.240 +configure an IP address using the config variables \path{ip},
  28.241 +\path{netmask}, \path{gateway}, \path{hostname}; or enable DHCP
  28.242 +(\path{dhcp='dhcp'}).
  28.243 +
  28.244 +Note that the Linux NFS root implementation is known to have stability
  28.245 +problems under high load (this is not a Xen-specific problem), so this
  28.246 +configuration may not be appropriate for critical servers.
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/docs/src/user/domain_mgmt.tex	Thu Sep 22 11:42:01 2005 -0600
    29.3 @@ -0,0 +1,203 @@
    29.4 +\chapter{Domain Management Tools}
    29.5 +
    29.6 +The previous chapter described a simple example of how to configure
    29.7 +and start a domain.  This chapter summarises the tools available to
    29.8 +manage running domains.
    29.9 +
   29.10 +
   29.11 +\section{Command-line Management}
   29.12 +
   29.13 +Command line management tasks are also performed using the \path{xm}
   29.14 +tool.  For online help for the commands available, type:
   29.15 +\begin{quote}
   29.16 +  \verb_# xm help_
   29.17 +\end{quote}
   29.18 +
   29.19 +You can also type \path{xm help $<$command$>$} for more information on
   29.20 +a given command.
   29.21 +
   29.22 +\subsection{Basic Management Commands}
   29.23 +
   29.24 +The most important \path{xm} commands are:
   29.25 +\begin{quote}
   29.26 +  \verb_# xm list_: Lists all domains running.\\
   29.27 +  \verb_# xm consoles_: Gives information about the domain consoles.\\
   29.28 +  \verb_# xm console_: Opens a console to a domain (e.g.\
   29.29 +  \verb_# xm console myVM_)
   29.30 +\end{quote}
   29.31 +
   29.32 +\subsection{\tt xm list}
   29.33 +
   29.34 +The output of \path{xm list} is in rows of the following format:
   29.35 +\begin{center} {\tt name domid memory cpu state cputime console}
   29.36 +\end{center}
   29.37 +
   29.38 +\begin{quote}
   29.39 +  \begin{description}
   29.40 +  \item[name] The descriptive name of the virtual machine.
   29.41 +  \item[domid] The number of the domain ID this virtual machine is
   29.42 +    running in.
   29.43 +  \item[memory] Memory size in megabytes.
   29.44 +  \item[cpu] The CPU this domain is running on.
   29.45 +  \item[state] Domain state consists of 5 fields:
   29.46 +    \begin{description}
   29.47 +    \item[r] running
   29.48 +    \item[b] blocked
   29.49 +    \item[p] paused
   29.50 +    \item[s] shutdown
   29.51 +    \item[c] crashed
   29.52 +    \end{description}
   29.53 +  \item[cputime] How much CPU time (in seconds) the domain has used so
   29.54 +    far.
   29.55 +  \item[console] TCP port accepting connections to the domain's
   29.56 +    console.
   29.57 +  \end{description}
   29.58 +\end{quote}
   29.59 +
   29.60 +The \path{xm list} command also supports a long output format when the
   29.61 +\path{-l} switch is used.  This outputs the fulls details of the
   29.62 +running domains in \xend's SXP configuration format.
   29.63 +
   29.64 +For example, suppose the system is running the ttylinux domain as
   29.65 +described earlier.  The list command should produce output somewhat
   29.66 +like the following:
   29.67 +\begin{verbatim}
   29.68 +# xm list
   29.69 +Name              Id  Mem(MB)  CPU  State  Time(s)  Console
   29.70 +Domain-0           0      251    0  r----    172.2        
   29.71 +ttylinux           5       63    0  -b---      3.0    9605
   29.72 +\end{verbatim}
   29.73 +
   29.74 +Here we can see the details for the ttylinux domain, as well as for
   29.75 +domain~0 (which, of course, is always running).  Note that the console
   29.76 +port for the ttylinux domain is 9605.  This can be connected to by TCP
   29.77 +using a terminal program (e.g. \path{telnet} or, better,
   29.78 +\path{xencons}).  The simplest way to connect is to use the
   29.79 +\path{xm~console} command, specifying the domain name or ID.  To
   29.80 +connect to the console of the ttylinux domain, we could use any of the
   29.81 +following:
   29.82 +\begin{verbatim}
   29.83 +# xm console ttylinux
   29.84 +# xm console 5
   29.85 +# xencons localhost 9605
   29.86 +\end{verbatim}
   29.87 +
   29.88 +\section{Domain Save and Restore}
   29.89 +
   29.90 +The administrator of a Xen system may suspend a virtual machine's
   29.91 +current state into a disk file in domain~0, allowing it to be resumed
   29.92 +at a later time.
   29.93 +
   29.94 +The ttylinux domain described earlier can be suspended to disk using
   29.95 +the command:
   29.96 +\begin{verbatim}
   29.97 +# xm save ttylinux ttylinux.xen
   29.98 +\end{verbatim}
   29.99 +
  29.100 +This will stop the domain named `ttylinux' and save its current state
  29.101 +into a file called \path{ttylinux.xen}.
  29.102 +
  29.103 +To resume execution of this domain, use the \path{xm restore} command:
  29.104 +\begin{verbatim}
  29.105 +# xm restore ttylinux.xen
  29.106 +\end{verbatim}
  29.107 +
  29.108 +This will restore the state of the domain and restart it.  The domain
  29.109 +will carry on as before and the console may be reconnected using the
  29.110 +\path{xm console} command, as above.
  29.111 +
  29.112 +\section{Live Migration}
  29.113 +
  29.114 +Live migration is used to transfer a domain between physical hosts
  29.115 +whilst that domain continues to perform its usual activities --- from
  29.116 +the user's perspective, the migration should be imperceptible.
  29.117 +
  29.118 +To perform a live migration, both hosts must be running Xen / \xend\
  29.119 +and the destination host must have sufficient resources (e.g.\ memory
  29.120 +capacity) to accommodate the domain after the move. Furthermore we
  29.121 +currently require both source and destination machines to be on the
  29.122 +same L2 subnet.
  29.123 +
  29.124 +Currently, there is no support for providing automatic remote access
  29.125 +to filesystems stored on local disk when a domain is migrated.
  29.126 +Administrators should choose an appropriate storage solution (i.e.\
  29.127 +SAN, NAS, etc.) to ensure that domain filesystems are also available
  29.128 +on their destination node. GNBD is a good method for exporting a
  29.129 +volume from one machine to another. iSCSI can do a similar job, but is
  29.130 +more complex to set up.
  29.131 +
  29.132 +When a domain migrates, it's MAC and IP address move with it, thus it
  29.133 +is only possible to migrate VMs within the same layer-2 network and IP
  29.134 +subnet. If the destination node is on a different subnet, the
  29.135 +administrator would need to manually configure a suitable etherip or
  29.136 +IP tunnel in the domain~0 of the remote node.
  29.137 +
  29.138 +A domain may be migrated using the \path{xm migrate} command.  To live
  29.139 +migrate a domain to another machine, we would use the command:
  29.140 +
  29.141 +\begin{verbatim}
  29.142 +# xm migrate --live mydomain destination.ournetwork.com
  29.143 +\end{verbatim}
  29.144 +
  29.145 +Without the \path{--live} flag, \xend\ simply stops the domain and
  29.146 +copies the memory image over to the new node and restarts it. Since
  29.147 +domains can have large allocations this can be quite time consuming,
  29.148 +even on a Gigabit network. With the \path{--live} flag \xend\ attempts
  29.149 +to keep the domain running while the migration is in progress,
  29.150 +resulting in typical `downtimes' of just 60--300ms.
  29.151 +
  29.152 +For now it will be necessary to reconnect to the domain's console on
  29.153 +the new machine using the \path{xm console} command.  If a migrated
  29.154 +domain has any open network connections then they will be preserved,
  29.155 +so SSH connections do not have this limitation.
  29.156 +
  29.157 +
  29.158 +\section{Managing Domain Memory}
  29.159 +
  29.160 +XenLinux domains have the ability to relinquish / reclaim machine
  29.161 +memory at the request of the administrator or the user of the domain.
  29.162 +
  29.163 +\subsection{Setting memory footprints from dom0}
  29.164 +
  29.165 +The machine administrator can request that a domain alter its memory
  29.166 +footprint using the \path{xm set-mem} command.  For instance, we can
  29.167 +request that our example ttylinux domain reduce its memory footprint
  29.168 +to 32 megabytes.
  29.169 +
  29.170 +\begin{verbatim}
  29.171 +# xm set-mem ttylinux 32
  29.172 +\end{verbatim}
  29.173 +
  29.174 +We can now see the result of this in the output of \path{xm list}:
  29.175 +
  29.176 +\begin{verbatim}
  29.177 +# xm list
  29.178 +Name              Id  Mem(MB)  CPU  State  Time(s)  Console
  29.179 +Domain-0           0      251    0  r----    172.2        
  29.180 +ttylinux           5       31    0  -b---      4.3    9605
  29.181 +\end{verbatim}
  29.182 +
  29.183 +The domain has responded to the request by returning memory to Xen. We
  29.184 +can restore the domain to its original size using the command line:
  29.185 +
  29.186 +\begin{verbatim}
  29.187 +# xm set-mem ttylinux 64
  29.188 +\end{verbatim}
  29.189 +
  29.190 +\subsection{Setting memory footprints from within a domain}
  29.191 +
  29.192 +The virtual file \path{/proc/xen/balloon} allows the owner of a domain
  29.193 +to adjust their own memory footprint.  Reading the file (e.g.\
  29.194 +\path{cat /proc/xen/balloon}) prints out the current memory footprint
  29.195 +of the domain.  Writing the file (e.g.\ \path{echo new\_target >
  29.196 +  /proc/xen/balloon}) requests that the kernel adjust the domain's
  29.197 +memory footprint to a new value.
  29.198 +
  29.199 +\subsection{Setting memory limits}
  29.200 +
  29.201 +Xen associates a memory size limit with each domain.  By default, this
  29.202 +is the amount of memory the domain is originally started with,
  29.203 +preventing the domain from ever growing beyond this size.  To permit a
  29.204 +domain to grow beyond its original allocation or to prevent a domain
  29.205 +you've shrunk from reclaiming the memory it relinquished, use the
  29.206 +\path{xm maxmem} command.
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/docs/src/user/glossary.tex	Thu Sep 22 11:42:01 2005 -0600
    30.3 @@ -0,0 +1,79 @@
    30.4 +\chapter{Glossary of Terms}
    30.5 +
    30.6 +\begin{description}
    30.7 +
    30.8 +\item[Atropos] One of the CPU schedulers provided by Xen.  Atropos
    30.9 +  provides domains with absolute shares of the CPU, with timeliness
   30.10 +  guarantees and a mechanism for sharing out `slack time'.
   30.11 +
   30.12 +\item[BVT] The BVT scheduler is used to give proportional fair shares
   30.13 +  of the CPU to domains.
   30.14 +
   30.15 +\item[Exokernel] A minimal piece of privileged code, similar to a {\bf
   30.16 +    microkernel} but providing a more `hardware-like' interface to the
   30.17 +  tasks it manages.  This is similar to a paravirtualising VMM like
   30.18 +  {\bf Xen} but was designed as a new operating system structure,
   30.19 +  rather than specifically to run multiple conventional OSs.
   30.20 +
   30.21 +\item[Domain] A domain is the execution context that contains a
   30.22 +  running {\bf virtual machine}.  The relationship between virtual
   30.23 +  machines and domains on Xen is similar to that between programs and
   30.24 +  processes in an operating system: a virtual machine is a persistent
   30.25 +  entity that resides on disk (somewhat like a program).  When it is
   30.26 +  loaded for execution, it runs in a domain.  Each domain has a {\bf
   30.27 +    domain ID}.
   30.28 +
   30.29 +\item[Domain 0] The first domain to be started on a Xen machine.
   30.30 +  Domain 0 is responsible for managing the system.
   30.31 +
   30.32 +\item[Domain ID] A unique identifier for a {\bf domain}, analogous to
   30.33 +  a process ID in an operating system.
   30.34 +
   30.35 +\item[Full virtualisation] An approach to virtualisation which
   30.36 +  requires no modifications to the hosted operating system, providing
   30.37 +  the illusion of a complete system of real hardware devices.
   30.38 +
   30.39 +\item[Hypervisor] An alternative term for {\bf VMM}, used because it
   30.40 +  means `beyond supervisor', since it is responsible for managing
   30.41 +  multiple `supervisor' kernels.
   30.42 +
   30.43 +\item[Live migration] A technique for moving a running virtual machine
   30.44 +  to another physical host, without stopping it or the services
   30.45 +  running on it.
   30.46 +
   30.47 +\item[Microkernel] A small base of code running at the highest
   30.48 +  hardware privilege level.  A microkernel is responsible for sharing
   30.49 +  CPU and memory (and sometimes other devices) between less privileged
   30.50 +  tasks running on the system.  This is similar to a VMM, particularly
   30.51 +  a {\bf paravirtualising} VMM but typically addressing a different
   30.52 +  problem space and providing different kind of interface.
   30.53 +
   30.54 +\item[NetBSD/Xen] A port of NetBSD to the Xen architecture.
   30.55 +
   30.56 +\item[Paravirtualisation] An approach to virtualisation which requires
   30.57 +  modifications to the operating system in order to run in a virtual
   30.58 +  machine.  Xen uses paravirtualisation but preserves binary
   30.59 +  compatibility for user space applications.
   30.60 +
   30.61 +\item[Shadow pagetables] A technique for hiding the layout of machine
   30.62 +  memory from a virtual machine's operating system.  Used in some {\bf
   30.63 +    VMMs} to provide the illusion of contiguous physical memory, in
   30.64 +  Xen this is used during {\bf live migration}.
   30.65 +
   30.66 +\item[Virtual Machine] The environment in which a hosted operating
   30.67 +  system runs, providing the abstraction of a dedicated machine.  A
   30.68 +  virtual machine may be identical to the underlying hardware (as in
   30.69 +  {\bf full virtualisation}, or it may differ, as in {\bf
   30.70 +    paravirtualisation}).
   30.71 +
   30.72 +\item[VMM] Virtual Machine Monitor - the software that allows multiple
   30.73 +  virtual machines to be multiplexed on a single physical machine.
   30.74 +
   30.75 +\item[Xen] Xen is a paravirtualising virtual machine monitor,
   30.76 +  developed primarily by the Systems Research Group at the University
   30.77 +  of Cambridge Computer Laboratory.
   30.78 +
   30.79 +\item[XenLinux] Official name for the port of the Linux kernel that
   30.80 +  runs on Xen.
   30.81 +
   30.82 +\end{description}
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/docs/src/user/installation.tex	Thu Sep 22 11:42:01 2005 -0600
    31.3 @@ -0,0 +1,394 @@
    31.4 +\chapter{Installation}
    31.5 +
    31.6 +The Xen distribution includes three main components: Xen itself, ports
    31.7 +of Linux 2.4 and 2.6 and NetBSD to run on Xen, and the userspace
    31.8 +tools required to manage a Xen-based system.  This chapter describes
    31.9 +how to install the Xen~2.0 distribution from source.  Alternatively,
   31.10 +there may be pre-built packages available as part of your operating
   31.11 +system distribution.
   31.12 +
   31.13 +
   31.14 +\section{Prerequisites}
   31.15 +\label{sec:prerequisites}
   31.16 +
   31.17 +The following is a full list of prerequisites.  Items marked `$\dag$'
   31.18 +are required by the \xend\ control tools, and hence required if you
   31.19 +want to run more than one virtual machine; items marked `$*$' are only
   31.20 +required if you wish to build from source.
   31.21 +\begin{itemize}
   31.22 +\item A working Linux distribution using the GRUB bootloader and
   31.23 +  running on a P6-class (or newer) CPU.
   31.24 +\item [$\dag$] The \path{iproute2} package.
   31.25 +\item [$\dag$] The Linux bridge-utils\footnote{Available from {\tt
   31.26 +      http://bridge.sourceforge.net}} (e.g., \path{/sbin/brctl})
   31.27 +\item [$\dag$] An installation of Twisted~v1.3 or
   31.28 +  above\footnote{Available from {\tt http://www.twistedmatrix.com}}.
   31.29 +  There may be a binary package available for your distribution;
   31.30 +  alternatively it can be installed by running `{\sl make
   31.31 +    install-twisted}' in the root of the Xen source tree.
   31.32 +\item [$*$] Build tools (gcc v3.2.x or v3.3.x, binutils, GNU make).
   31.33 +\item [$*$] Development installation of libcurl (e.g., libcurl-devel)
   31.34 +\item [$*$] Development installation of zlib (e.g., zlib-dev).
   31.35 +\item [$*$] Development installation of Python v2.2 or later (e.g.,
   31.36 +  python-dev).
   31.37 +\item [$*$] \LaTeX\ and transfig are required to build the
   31.38 +  documentation.
   31.39 +\end{itemize}
   31.40 +
   31.41 +Once you have satisfied the relevant prerequisites, you can now
   31.42 +install either a binary or source distribution of Xen.
   31.43 +
   31.44 +
   31.45 +\section{Installing from Binary Tarball}
   31.46 +
   31.47 +Pre-built tarballs are available for download from the Xen download
   31.48 +page
   31.49 +\begin{quote} {\tt http://xen.sf.net}
   31.50 +\end{quote}
   31.51 +
   31.52 +Once you've downloaded the tarball, simply unpack and install:
   31.53 +\begin{verbatim}
   31.54 +# tar zxvf xen-2.0-install.tgz
   31.55 +# cd xen-2.0-install
   31.56 +# sh ./install.sh
   31.57 +\end{verbatim}
   31.58 +
   31.59 +Once you've installed the binaries you need to configure your system
   31.60 +as described in Section~\ref{s:configure}.
   31.61 +
   31.62 +
   31.63 +\section{Installing from Source}
   31.64 +
   31.65 +This section describes how to obtain, build, and install Xen from
   31.66 +source.
   31.67 +
   31.68 +\subsection{Obtaining the Source}
   31.69 +
   31.70 +The Xen source tree is available as either a compressed source tar
   31.71 +ball or as a clone of our master BitKeeper repository.
   31.72 +
   31.73 +\begin{description}
   31.74 +\item[Obtaining the Source Tarball]\mbox{} \\
   31.75 +  Stable versions (and daily snapshots) of the Xen source tree are
   31.76 +  available as compressed tarballs from the Xen download page
   31.77 +  \begin{quote} {\tt http://xen.sf.net}
   31.78 +  \end{quote}
   31.79 +
   31.80 +\item[Using BitKeeper]\mbox{} \\
   31.81 +  If you wish to install Xen from a clone of our latest BitKeeper
   31.82 +  repository then you will need to install the BitKeeper tools.
   31.83 +  Download instructions for BitKeeper can be obtained by filling out
   31.84 +  the form at:
   31.85 +  \begin{quote} {\tt http://www.bitmover.com/cgi-bin/download.cgi}
   31.86 +\end{quote}
   31.87 +The public master BK repository for the 2.0 release lives at:
   31.88 +\begin{quote} {\tt bk://xen.bkbits.net/xen-2.0.bk}
   31.89 +\end{quote} 
   31.90 +You can use BitKeeper to download it and keep it updated with the
   31.91 +latest features and fixes.
   31.92 +
   31.93 +Change to the directory in which you want to put the source code, then
   31.94 +run:
   31.95 +\begin{verbatim}
   31.96 +# bk clone bk://xen.bkbits.net/xen-2.0.bk
   31.97 +\end{verbatim}
   31.98 +
   31.99 +Under your current directory, a new directory named \path{xen-2.0.bk}
  31.100 +has been created, which contains all the source code for Xen, the OS
  31.101 +ports, and the control tools. You can update your repository with the
  31.102 +latest changes at any time by running:
  31.103 +\begin{verbatim}
  31.104 +# cd xen-2.0.bk # to change into the local repository
  31.105 +# bk pull       # to update the repository
  31.106 +\end{verbatim}
  31.107 +\end{description}
  31.108 +
  31.109 +% \section{The distribution}
  31.110 +%
  31.111 +% The Xen source code repository is structured as follows:
  31.112 +%
  31.113 +% \begin{description}
  31.114 +% \item[\path{tools/}] Xen node controller daemon (Xend), command line
  31.115 +%   tools, control libraries
  31.116 +% \item[\path{xen/}] The Xen VMM.
  31.117 +% \item[\path{linux-*-xen-sparse/}] Xen support for Linux.
  31.118 +% \item[\path{linux-*-patches/}] Experimental patches for Linux.
  31.119 +% \item[\path{netbsd-*-xen-sparse/}] Xen support for NetBSD.
  31.120 +% \item[\path{docs/}] Various documentation files for users and
  31.121 +%   developers.
  31.122 +% \item[\path{extras/}] Bonus extras.
  31.123 +% \end{description}
  31.124 +
  31.125 +\subsection{Building from Source}
  31.126 +
  31.127 +The top-level Xen Makefile includes a target `world' that will do the
  31.128 +following:
  31.129 +
  31.130 +\begin{itemize}
  31.131 +\item Build Xen.
  31.132 +\item Build the control tools, including \xend.
  31.133 +\item Download (if necessary) and unpack the Linux 2.6 source code,
  31.134 +  and patch it for use with Xen.
  31.135 +\item Build a Linux kernel to use in domain 0 and a smaller
  31.136 +  unprivileged kernel, which can optionally be used for unprivileged
  31.137 +  virtual machines.
  31.138 +\end{itemize}
  31.139 +
  31.140 +After the build has completed you should have a top-level directory
  31.141 +called \path{dist/} in which all resulting targets will be placed; of
  31.142 +particular interest are the two kernels XenLinux kernel images, one
  31.143 +with a `-xen0' extension which contains hardware device drivers and
  31.144 +drivers for Xen's virtual devices, and one with a `-xenU' extension
  31.145 +that just contains the virtual ones. These are found in
  31.146 +\path{dist/install/boot/} along with the image for Xen itself and the
  31.147 +configuration files used during the build.
  31.148 +
  31.149 +The NetBSD port can be built using:
  31.150 +\begin{quote}
  31.151 +\begin{verbatim}
  31.152 +# make netbsd20
  31.153 +\end{verbatim}
  31.154 +\end{quote}
  31.155 +NetBSD port is built using a snapshot of the netbsd-2-0 cvs branch.
  31.156 +The snapshot is downloaded as part of the build process, if it is not
  31.157 +yet present in the \path{NETBSD\_SRC\_PATH} search path.  The build
  31.158 +process also downloads a toolchain which includes all the tools
  31.159 +necessary to build the NetBSD kernel under Linux.
  31.160 +
  31.161 +To customize further the set of kernels built you need to edit the
  31.162 +top-level Makefile. Look for the line:
  31.163 +
  31.164 +\begin{quote}
  31.165 +\begin{verbatim}
  31.166 +KERNELS ?= mk.linux-2.6-xen0 mk.linux-2.6-xenU
  31.167 +\end{verbatim}
  31.168 +\end{quote}
  31.169 +
  31.170 +You can edit this line to include any set of operating system kernels
  31.171 +which have configurations in the top-level \path{buildconfigs/}
  31.172 +directory, for example \path{mk.linux-2.4-xenU} to build a Linux 2.4
  31.173 +kernel containing only virtual device drivers.
  31.174 +
  31.175 +%% Inspect the Makefile if you want to see what goes on during a
  31.176 +%% build.  Building Xen and the tools is straightforward, but XenLinux
  31.177 +%% is more complicated.  The makefile needs a `pristine' Linux kernel
  31.178 +%% tree to which it will then add the Xen architecture files.  You can
  31.179 +%% tell the makefile the location of the appropriate Linux compressed
  31.180 +%% tar file by
  31.181 +%% setting the LINUX\_SRC environment variable, e.g. \\
  31.182 +%% \verb!# LINUX_SRC=/tmp/linux-2.6.11.tar.bz2 make world! \\ or by
  31.183 +%% placing the tar file somewhere in the search path of {\tt
  31.184 +%%   LINUX\_SRC\_PATH} which defaults to `{\tt .:..}'.  If the
  31.185 +%% makefile can't find a suitable kernel tar file it attempts to
  31.186 +%% download it from kernel.org (this won't work if you're behind a
  31.187 +%% firewall).
  31.188 +
  31.189 +%% After untaring the pristine kernel tree, the makefile uses the {\tt
  31.190 +%%   mkbuildtree} script to add the Xen patches to the kernel.
  31.191 +
  31.192 +
  31.193 +%% The procedure is similar to build the Linux 2.4 port: \\
  31.194 +%% \verb!# LINUX_SRC=/path/to/linux2.4/source make linux24!
  31.195 +
  31.196 +
  31.197 +%% \framebox{\parbox{5in}{
  31.198 +%%     {\bf Distro specific:} \\
  31.199 +%%     {\it Gentoo} --- if not using udev (most installations,
  31.200 +%%     currently), you'll need to enable devfs and devfs mount at boot
  31.201 +%%     time in the xen0 config.  }}
  31.202 +
  31.203 +\subsection{Custom XenLinux Builds}
  31.204 +
  31.205 +% If you have an SMP machine you may wish to give the {\tt '-j4'}
  31.206 +% argument to make to get a parallel build.
  31.207 +
  31.208 +If you wish to build a customized XenLinux kernel (e.g. to support
  31.209 +additional devices or enable distribution-required features), you can
  31.210 +use the standard Linux configuration mechanisms, specifying that the
  31.211 +architecture being built for is \path{xen}, e.g:
  31.212 +\begin{quote}
  31.213 +\begin{verbatim}
  31.214 +# cd linux-2.6.11-xen0
  31.215 +# make ARCH=xen xconfig
  31.216 +# cd ..
  31.217 +# make
  31.218 +\end{verbatim}
  31.219 +\end{quote}
  31.220 +
  31.221 +You can also copy an existing Linux configuration (\path{.config})
  31.222 +into \path{linux-2.6.11-xen0} and execute:
  31.223 +\begin{quote}
  31.224 +\begin{verbatim}
  31.225 +# make ARCH=xen oldconfig
  31.226 +\end{verbatim}
  31.227 +\end{quote}
  31.228 +
  31.229 +You may be prompted with some Xen-specific options; we advise
  31.230 +accepting the defaults for these options.
  31.231 +
  31.232 +Note that the only difference between the two types of Linux kernel
  31.233 +that are built is the configuration file used for each.  The `U'
  31.234 +suffixed (unprivileged) versions don't contain any of the physical
  31.235 +hardware device drivers, leading to a 30\% reduction in size; hence
  31.236 +you may prefer these for your non-privileged domains.  The `0'
  31.237 +suffixed privileged versions can be used to boot the system, as well
  31.238 +as in driver domains and unprivileged domains.
  31.239 +
  31.240 +\subsection{Installing the Binaries}
  31.241 +
  31.242 +The files produced by the build process are stored under the
  31.243 +\path{dist/install/} directory. To install them in their default
  31.244 +locations, do:
  31.245 +\begin{quote}
  31.246 +\begin{verbatim}
  31.247 +# make install
  31.248 +\end{verbatim}
  31.249 +\end{quote}
  31.250 +
  31.251 +Alternatively, users with special installation requirements may wish
  31.252 +to install them manually by copying the files to their appropriate
  31.253 +destinations.
  31.254 +
  31.255 +%% Files in \path{install/boot/} include:
  31.256 +%% \begin{itemize}
  31.257 +%% \item \path{install/boot/xen-2.0.gz} Link to the Xen 'kernel'
  31.258 +%% \item \path{install/boot/vmlinuz-2.6-xen0} Link to domain 0
  31.259 +%%   XenLinux kernel
  31.260 +%% \item \path{install/boot/vmlinuz-2.6-xenU} Link to unprivileged
  31.261 +%%   XenLinux kernel
  31.262 +%% \end{itemize}
  31.263 +
  31.264 +The \path{dist/install/boot} directory will also contain the config
  31.265 +files used for building the XenLinux kernels, and also versions of Xen
  31.266 +and XenLinux kernels that contain debug symbols (\path{xen-syms-2.0.6}
  31.267 +and \path{vmlinux-syms-2.6.11.11-xen0}) which are essential for
  31.268 +interpreting crash dumps.  Retain these files as the developers may
  31.269 +wish to see them if you post on the mailing list.
  31.270 +
  31.271 +
  31.272 +\section{Configuration}
  31.273 +\label{s:configure}
  31.274 +
  31.275 +Once you have built and installed the Xen distribution, it is simple
  31.276 +to prepare the machine for booting and running Xen.
  31.277 +
  31.278 +\subsection{GRUB Configuration}
  31.279 +
  31.280 +An entry should be added to \path{grub.conf} (often found under
  31.281 +\path{/boot/} or \path{/boot/grub/}) to allow Xen / XenLinux to boot.
  31.282 +This file is sometimes called \path{menu.lst}, depending on your
  31.283 +distribution.  The entry should look something like the following:
  31.284 +
  31.285 +{\small
  31.286 +\begin{verbatim}
  31.287 +title Xen 2.0 / XenLinux 2.6
  31.288 +  kernel /boot/xen-2.0.gz dom0_mem=131072
  31.289 +  module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro console=tty0
  31.290 +\end{verbatim}
  31.291 +}
  31.292 +
  31.293 +The kernel line tells GRUB where to find Xen itself and what boot
  31.294 +parameters should be passed to it (in this case, setting domain 0's
  31.295 +memory allocation in kilobytes and the settings for the serial port).
  31.296 +For more details on the various Xen boot parameters see
  31.297 +Section~\ref{s:xboot}.
  31.298 +
  31.299 +The module line of the configuration describes the location of the
  31.300 +XenLinux kernel that Xen should start and the parameters that should
  31.301 +be passed to it (these are standard Linux parameters, identifying the
  31.302 +root device and specifying it be initially mounted read only and
  31.303 +instructing that console output be sent to the screen).  Some
  31.304 +distributions such as SuSE do not require the \path{ro} parameter.
  31.305 +
  31.306 +%% \framebox{\parbox{5in}{
  31.307 +%%     {\bf Distro specific:} \\
  31.308 +%%     {\it SuSE} --- Omit the {\tt ro} option from the XenLinux
  31.309 +%%     kernel command line, since the partition won't be remounted rw
  31.310 +%%     during boot.  }}
  31.311 +
  31.312 +
  31.313 +If you want to use an initrd, just add another \path{module} line to
  31.314 +the configuration, as usual:
  31.315 +
  31.316 +{\small
  31.317 +\begin{verbatim}
  31.318 +  module /boot/my_initrd.gz
  31.319 +\end{verbatim}
  31.320 +}
  31.321 +
  31.322 +As always when installing a new kernel, it is recommended that you do
  31.323 +not delete existing menu options from \path{menu.lst} --- you may want
  31.324 +to boot your old Linux kernel in future, particularly if you have
  31.325 +problems.
  31.326 +
  31.327 +\subsection{Serial Console (optional)}
  31.328 +
  31.329 +%% kernel /boot/xen-2.0.gz dom0_mem=131072 com1=115200,8n1
  31.330 +%% module /boot/vmlinuz-2.6-xen0 root=/dev/sda4 ro
  31.331 +
  31.332 +
  31.333 +In order to configure Xen serial console output, it is necessary to
  31.334 +add an boot option to your GRUB config; e.g.\ replace the above kernel
  31.335 +line with:
  31.336 +\begin{quote}
  31.337 +{\small
  31.338 +\begin{verbatim}
  31.339 +   kernel /boot/xen.gz dom0_mem=131072 com1=115200,8n1
  31.340 +\end{verbatim}}
  31.341 +\end{quote}
  31.342 +
  31.343 +This configures Xen to output on COM1 at 115,200 baud, 8 data bits, 1
  31.344 +stop bit and no parity. Modify these parameters for your set up.
  31.345 +
  31.346 +One can also configure XenLinux to share the serial console; to
  31.347 +achieve this append ``\path{console=ttyS0}'' to your module line.
  31.348 +
  31.349 +If you wish to be able to log in over the XenLinux serial console it
  31.350 +is necessary to add a line into \path{/etc/inittab}, just as per
  31.351 +regular Linux. Simply add the line:
  31.352 +\begin{quote} {\small {\tt c:2345:respawn:/sbin/mingetty ttyS0}}
  31.353 +\end{quote}
  31.354 +
  31.355 +and you should be able to log in. Note that to successfully log in as
  31.356 +root over the serial line will require adding \path{ttyS0} to
  31.357 +\path{/etc/securetty} in most modern distributions.
  31.358 +
  31.359 +\subsection{TLS Libraries}
  31.360 +
  31.361 +Users of the XenLinux 2.6 kernel should disable Thread Local Storage
  31.362 +(e.g.\ by doing a \path{mv /lib/tls /lib/tls.disabled}) before
  31.363 +attempting to run with a XenLinux kernel\footnote{If you boot without
  31.364 +  first disabling TLS, you will get a warning message during the boot
  31.365 +  process. In this case, simply perform the rename after the machine
  31.366 +  is up and then run \texttt{/sbin/ldconfig} to make it take effect.}.
  31.367 +You can always reenable it by restoring the directory to its original
  31.368 +location (i.e.\ \path{mv /lib/tls.disabled /lib/tls}).
  31.369 +
  31.370 +The reason for this is that the current TLS implementation uses
  31.371 +segmentation in a way that is not permissible under Xen.  If TLS is
  31.372 +not disabled, an emulation mode is used within Xen which reduces
  31.373 +performance substantially.
  31.374 +
  31.375 +We hope that this issue can be resolved by working with Linux
  31.376 +distribution vendors to implement a minor backward-compatible change
  31.377 +to the TLS library.
  31.378 +
  31.379 +
  31.380 +\section{Booting Xen}
  31.381 +
  31.382 +It should now be possible to restart the system and use Xen.  Reboot
  31.383 +as usual but choose the new Xen option when the Grub screen appears.
  31.384 +
  31.385 +What follows should look much like a conventional Linux boot.  The
  31.386 +first portion of the output comes from Xen itself, supplying low level
  31.387 +information about itself and the machine it is running on.  The
  31.388 +following portion of the output comes from XenLinux.
  31.389 +
  31.390 +You may see some errors during the XenLinux boot.  These are not
  31.391 +necessarily anything to worry about --- they may result from kernel
  31.392 +configuration differences between your XenLinux kernel and the one you
  31.393 +usually use.
  31.394 +
  31.395 +When the boot completes, you should be able to log into your system as
  31.396 +usual.  If you are unable to log in to your system running Xen, you
  31.397 +should still be able to reboot with your normal Linux kernel.
    32.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.2 +++ b/docs/src/user/introduction.tex	Thu Sep 22 11:42:01 2005 -0600
    32.3 @@ -0,0 +1,143 @@
    32.4 +\chapter{Introduction}
    32.5 +
    32.6 +
    32.7 +Xen is a \emph{paravirtualising} virtual machine monitor (VMM), or
    32.8 +`hypervisor', for the x86 processor architecture.  Xen can securely
    32.9 +execute multiple virtual machines on a single physical system with
   32.10 +close-to-native performance.  The virtual machine technology
   32.11 +facilitates enterprise-grade functionality, including:
   32.12 +
   32.13 +\begin{itemize}
   32.14 +\item Virtual machines with performance close to native hardware.
   32.15 +\item Live migration of running virtual machines between physical
   32.16 +  hosts.
   32.17 +\item Excellent hardware support (supports most Linux device drivers).
   32.18 +\item Sandboxed, re-startable device drivers.
   32.19 +\end{itemize}
   32.20 +
   32.21 +Paravirtualisation permits very high performance virtualisation, even
   32.22 +on architectures like x86 that are traditionally very hard to
   32.23 +virtualise.
   32.24 +
   32.25 +The drawback of this approach is that it requires operating systems to
   32.26 +be \emph{ported} to run on Xen.  Porting an OS to run on Xen is
   32.27 +similar to supporting a new hardware platform, however the process is
   32.28 +simplified because the paravirtual machine architecture is very
   32.29 +similar to the underlying native hardware. Even though operating
   32.30 +system kernels must explicitly support Xen, a key feature is that user
   32.31 +space applications and libraries \emph{do not} require modification.
   32.32 +
   32.33 +Xen support is available for increasingly many operating systems:
   32.34 +right now, Linux 2.4, Linux 2.6 and NetBSD are available for Xen 2.0.
   32.35 +A FreeBSD port is undergoing testing and will be incorporated into the
   32.36 +release soon. Other OS ports, including Plan 9, are in progress.  We
   32.37 +hope that that arch-xen patches will be incorporated into the
   32.38 +mainstream releases of these operating systems in due course (as has
   32.39 +already happened for NetBSD).
   32.40 +
   32.41 +Possible usage scenarios for Xen include:
   32.42 +
   32.43 +\begin{description}
   32.44 +\item [Kernel development.] Test and debug kernel modifications in a
   32.45 +  sandboxed virtual machine --- no need for a separate test machine.
   32.46 +\item [Multiple OS configurations.] Run multiple operating systems
   32.47 +  simultaneously, for instance for compatibility or QA purposes.
   32.48 +\item [Server consolidation.] Move multiple servers onto a single
   32.49 +  physical host with performance and fault isolation provided at
   32.50 +  virtual machine boundaries.
   32.51 +\item [Cluster computing.] Management at VM granularity provides more
   32.52 +  flexibility than separately managing each physical host, but better
   32.53 +  control and isolation than single-system image solutions,
   32.54 +  particularly by using live migration for load balancing.
   32.55 +\item [Hardware support for custom OSes.] Allow development of new
   32.56 +  OSes while benefiting from the wide-ranging hardware support of
   32.57 +  existing OSes such as Linux.
   32.58 +\end{description}
   32.59 +
   32.60 +
   32.61 +\section{Structure of a Xen-Based System}
   32.62 +
   32.63 +A Xen system has multiple layers, the lowest and most privileged of
   32.64 +which is Xen itself. 
   32.65 +
   32.66 +Xen in turn may host multiple \emph{guest} operating systems, each of
   32.67 +which is executed within a secure virtual machine (in Xen terminology,
   32.68 +a \emph{domain}). Domains are scheduled by Xen to make effective use
   32.69 +of the available physical CPUs.  Each guest OS manages its own
   32.70 +applications, which includes responsibility for scheduling each
   32.71 +application within the time allotted to the VM by Xen.
   32.72 +
   32.73 +The first domain, \emph{domain 0}, is created automatically when the
   32.74 +system boots and has special management privileges. Domain 0 builds
   32.75 +other domains and manages their virtual devices. It also performs
   32.76 +administrative tasks such as suspending, resuming and migrating other
   32.77 +virtual machines.
   32.78 +
   32.79 +Within domain 0, a process called \emph{xend} runs to manage the
   32.80 +system.  \Xend is responsible for managing virtual machines and
   32.81 +providing access to their consoles.  Commands are issued to \xend over
   32.82 +an HTTP interface, either from a command-line tool or from a web
   32.83 +browser.
   32.84 +
   32.85 +
   32.86 +\section{Hardware Support}
   32.87 +
   32.88 +Xen currently runs only on the x86 architecture, requiring a `P6' or
   32.89 +newer processor (e.g. Pentium Pro, Celeron, Pentium II, Pentium III,
   32.90 +Pentium IV, Xeon, AMD Athlon, AMD Duron).  Multiprocessor machines are
   32.91 +supported, and we also have basic support for HyperThreading (SMT),
   32.92 +although this remains a topic for ongoing research. A port
   32.93 +specifically for x86/64 is in progress, although Xen already runs on
   32.94 +such systems in 32-bit legacy mode. In addition a port to the IA64
   32.95 +architecture is approaching completion. We hope to add other
   32.96 +architectures such as PPC and ARM in due course.
   32.97 +
   32.98 +Xen can currently use up to 4GB of memory.  It is possible for x86
   32.99 +machines to address up to 64GB of physical memory but there are no
  32.100 +current plans to support these systems: The x86/64 port is the planned
  32.101 +route to supporting larger memory sizes.
  32.102 +
  32.103 +Xen offloads most of the hardware support issues to the guest OS
  32.104 +running in Domain~0.  Xen itself contains only the code required to
  32.105 +detect and start secondary processors, set up interrupt routing, and
  32.106 +perform PCI bus enumeration.  Device drivers run within a privileged
  32.107 +guest OS rather than within Xen itself. This approach provides
  32.108 +compatibility with the majority of device hardware supported by Linux.
  32.109 +The default XenLinux build contains support for relatively modern
  32.110 +server-class network and disk hardware, but you can add support for
  32.111 +other hardware by configuring your XenLinux kernel in the normal way.
  32.112 +
  32.113 +
  32.114 +\section{History}
  32.115 +
  32.116 +Xen was originally developed by the Systems Research Group at the
  32.117 +University of Cambridge Computer Laboratory as part of the XenoServers
  32.118 +project, funded by the UK-EPSRC.
  32.119 +
  32.120 +XenoServers aim to provide a `public infrastructure for global
  32.121 +distributed computing', and Xen plays a key part in that, allowing us
  32.122 +to efficiently partition a single machine to enable multiple
  32.123 +independent clients to run their operating systems and applications in
  32.124 +an environment providing protection, resource isolation and
  32.125 +accounting.  The project web page contains further information along
  32.126 +with pointers to papers and technical reports:
  32.127 +\path{http://www.cl.cam.ac.uk/xeno}
  32.128 +
  32.129 +Xen has since grown into a fully-fledged project in its own right,
  32.130 +enabling us to investigate interesting research issues regarding the
  32.131 +best techniques for virtualising resources such as the CPU, memory,
  32.132 +disk and network.  The project has been bolstered by support from
  32.133 +Intel Research Cambridge, and HP Labs, who are now working closely
  32.134 +with us.
  32.135 +
  32.136 +Xen was first described in a paper presented at SOSP in
  32.137 +2003\footnote{\tt
  32.138 +  http://www.cl.cam.ac.uk/netos/papers/2003-xensosp.pdf}, and the
  32.139 +first public release (1.0) was made that October.  Since then, Xen has
  32.140 +significantly matured and is now used in production scenarios on many
  32.141 +sites.
  32.142 +
  32.143 +Xen 2.0 features greatly enhanced hardware support, configuration
  32.144 +flexibility, usability and a larger complement of supported operating
  32.145 +systems. This latest release takes Xen a step closer to becoming the
  32.146 +definitive open source solution for virtualisation.
    33.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.2 +++ b/docs/src/user/redhat.tex	Thu Sep 22 11:42:01 2005 -0600
    33.3 @@ -0,0 +1,61 @@
    33.4 +\chapter{Installing Xen / XenLinux on Red~Hat or Fedora Core}
    33.5 +
    33.6 +When using Xen / XenLinux on a standard Linux distribution there are a
    33.7 +couple of things to watch out for:
    33.8 +
    33.9 +Note that, because domains greater than 0 don't have any privileged
   33.10 +access at all, certain commands in the default boot sequence will fail
   33.11 +e.g.\ attempts to update the hwclock, change the console font, update
   33.12 +the keytable map, start apmd (power management), or gpm (mouse
   33.13 +cursor).  Either ignore the errors (they should be harmless), or
   33.14 +remove them from the startup scripts.  Deleting the following links
   33.15 +are a good start: {\path{S24pcmcia}}, {\path{S09isdn}},
   33.16 +{\path{S17keytable}}, {\path{S26apmd}}, {\path{S85gpm}}.
   33.17 +
   33.18 +If you want to use a single root file system that works cleanly for
   33.19 +both domain~0 and unprivileged domains, a useful trick is to use
   33.20 +different `init' run levels. For example, use run level 3 for
   33.21 +domain~0, and run level 4 for other domains. This enables different
   33.22 +startup scripts to be run in depending on the run level number passed
   33.23 +on the kernel command line.
   33.24 +
   33.25 +If using NFS root files systems mounted either from an external server
   33.26 +or from domain0 there are a couple of other gotchas.  The default
   33.27 +{\path{/etc/sysconfig/iptables}} rules block NFS, so part way through
   33.28 +the boot sequence things will suddenly go dead.
   33.29 +
   33.30 +If you're planning on having a separate NFS {\path{/usr}} partition,
   33.31 +the RH9 boot scripts don't make life easy - they attempt to mount NFS
   33.32 +file systems way to late in the boot process. The easiest way I found
   33.33 +to do this was to have a {\path{/linuxrc}} script run ahead of
   33.34 +{\path{/sbin/init}} that mounts {\path{/usr}}:
   33.35 +
   33.36 +\begin{quote}
   33.37 +  \begin{small}\begin{verbatim}
   33.38 + #!/bin/bash
   33.39 + /sbin/ipconfig lo 127.0.0.1
   33.40 + /sbin/portmap
   33.41 + /bin/mount /usr
   33.42 + exec /sbin/init "$@" <>/dev/console 2>&1
   33.43 +\end{verbatim}\end{small}
   33.44 +\end{quote}
   33.45 +
   33.46 +%% $ XXX SMH: font lock fix :-)
   33.47 +
   33.48 +The one slight complication with the above is that
   33.49 +{\path{/sbin/portmap}} is dynamically linked against
   33.50 +{\path{/usr/lib/libwrap.so.0}} Since this is in {\path{/usr}}, it
   33.51 +won't work. This can be solved by copying the file (and link) below
   33.52 +the {\path{/usr}} mount point, and just let the file be `covered' when
   33.53 +the mount happens.
   33.54 +
   33.55 +In some installations, where a shared read-only {\path{/usr}} is being
   33.56 +used, it may be desirable to move other large directories over into
   33.57 +the read-only {\path{/usr}}. For example, you might replace
   33.58 +{\path{/bin}}, {\path{/lib}} and {\path{/sbin}} with links into
   33.59 +{\path{/usr/root/bin}}, {\path{/usr/root/lib}} and
   33.60 +{\path{/usr/root/sbin}} respectively. This creates other problems for
   33.61 +running the {\path{/linuxrc}} script, requiring bash, portmap, mount,
   33.62 +ifconfig, and a handful of other shared libraries to be copied below
   33.63 +the mount point --- a simple statically-linked C program would solve
   33.64 +this problem.
    34.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.2 +++ b/docs/src/user/start_addl_dom.tex	Thu Sep 22 11:42:01 2005 -0600
    34.3 @@ -0,0 +1,172 @@
    34.4 +\chapter{Starting Additional Domains}
    34.5 +
    34.6 +The first step in creating a new domain is to prepare a root
    34.7 +filesystem for it to boot from.  Typically, this might be stored in a
    34.8 +normal partition, an LVM or other volume manager partition, a disk
    34.9 +file or on an NFS server.  A simple way to do this is simply to boot
   34.10 +from your standard OS install CD and install the distribution into
   34.11 +another partition on your hard drive.
   34.12 +
   34.13 +To start the \xend\ control daemon, type
   34.14 +\begin{quote}
   34.15 +  \verb!# xend start!
   34.16 +\end{quote}
   34.17 +
   34.18 +If you wish the daemon to start automatically, see the instructions in
   34.19 +Section~\ref{s:xend}. Once the daemon is running, you can use the
   34.20 +\path{xm} tool to monitor and maintain the domains running on your
   34.21 +system. This chapter provides only a brief tutorial. We provide full
   34.22 +details of the \path{xm} tool in the next chapter.
   34.23 +
   34.24 +% \section{From the web interface}
   34.25 +%
   34.26 +% Boot the Xen machine and start Xensv (see Chapter~\ref{cha:xensv}
   34.27 +% for more details) using the command: \\
   34.28 +% \verb_# xensv start_ \\
   34.29 +% This will also start Xend (see Chapter~\ref{cha:xend} for more
   34.30 +% information).
   34.31 +%
   34.32 +% The domain management interface will then be available at {\tt
   34.33 +%   http://your\_machine:8080/}.  This provides a user friendly wizard
   34.34 +% for starting domains and functions for managing running domains.
   34.35 +%
   34.36 +% \section{From the command line}
   34.37 +
   34.38 +
   34.39 +\section{Creating a Domain Configuration File}
   34.40 +
   34.41 +Before you can start an additional domain, you must create a
   34.42 +configuration file. We provide two example files which you can use as
   34.43 +a starting point:
   34.44 +\begin{itemize}
   34.45 +\item \path{/etc/xen/xmexample1} is a simple template configuration
   34.46 +  file for describing a single VM.
   34.47 +
   34.48 +\item \path{/etc/xen/xmexample2} file is a template description that
   34.49 +  is intended to be reused for multiple virtual machines.  Setting the
   34.50 +  value of the \path{vmid} variable on the \path{xm} command line
   34.51 +  fills in parts of this template.
   34.52 +\end{itemize}
   34.53 +
   34.54 +Copy one of these files and edit it as appropriate.  Typical values
   34.55 +you may wish to edit include:
   34.56 +
   34.57 +\begin{quote}
   34.58 +\begin{description}
   34.59 +\item[kernel] Set this to the path of the kernel you compiled for use
   34.60 +  with Xen (e.g.\ \path{kernel = `/boot/vmlinuz-2.6-xenU'})
   34.61 +\item[memory] Set this to the size of the domain's memory in megabytes
   34.62 +  (e.g.\ \path{memory = 64})
   34.63 +\item[disk] Set the first entry in this list to calculate the offset
   34.64 +  of the domain's root partition, based on the domain ID.  Set the
   34.65 +  second to the location of \path{/usr} if you are sharing it between
   34.66 +  domains (e.g.\ \path{disk = [`phy:your\_hard\_drive\%d,sda1,w' \%
   34.67 +    (base\_partition\_number + vmid),
   34.68 +    `phy:your\_usr\_partition,sda6,r' ]}
   34.69 +\item[dhcp] Uncomment the dhcp variable, so that the domain will
   34.70 +  receive its IP address from a DHCP server (e.g.\ \path{dhcp=`dhcp'})
   34.71 +\end{description}
   34.72 +\end{quote}
   34.73 +
   34.74 +You may also want to edit the {\bf vif} variable in order to choose
   34.75 +the MAC address of the virtual ethernet interface yourself.  For
   34.76 +example:
   34.77 +\begin{quote}
   34.78 +\verb_vif = [`mac=00:06:AA:F6:BB:B3']_
   34.79 +\end{quote}
   34.80 +If you do not set this variable, \xend\ will automatically generate a
   34.81 +random MAC address from an unused range.
   34.82 +
   34.83 +
   34.84 +\section{Booting the Domain}
   34.85 +
   34.86 +The \path{xm} tool provides a variety of commands for managing
   34.87 +domains.  Use the \path{create} command to start new domains. Assuming
   34.88 +you've created a configuration file \path{myvmconf} based around
   34.89 +\path{/etc/xen/xmexample2}, to start a domain with virtual machine
   34.90 +ID~1 you should type:
   34.91 +
   34.92 +\begin{quote}
   34.93 +\begin{verbatim}
   34.94 +# xm create -c myvmconf vmid=1
   34.95 +\end{verbatim}
   34.96 +\end{quote}
   34.97 +
   34.98 +The \path{-c} switch causes \path{xm} to turn into the domain's
   34.99 +console after creation.  The \path{vmid=1} sets the \path{vmid}
  34.100 +variable used in the \path{myvmconf} file.
  34.101 +
  34.102 +You should see the console boot messages from the new domain appearing
  34.103 +in the terminal in which you typed the command, culminating in a login
  34.104 +prompt.
  34.105 +
  34.106 +
  34.107 +\section{Example: ttylinux}
  34.108 +
  34.109 +Ttylinux is a very small Linux distribution, designed to require very
  34.110 +few resources.  We will use it as a concrete example of how to start a
  34.111 +Xen domain.  Most users will probably want to install a full-featured
  34.112 +distribution once they have mastered the basics\footnote{ttylinux is
  34.113 +  maintained by Pascal Schmidt. You can download source packages from
  34.114 +  the distribution's home page: {\tt
  34.115 +    http://www.minimalinux.org/ttylinux/}}.
  34.116 +
  34.117 +\begin{enumerate}
  34.118 +\item Download and extract the ttylinux disk image from the Files
  34.119 +  section of the project's SourceForge site (see
  34.120 +  \path{http://sf.net/projects/xen/}).
  34.121 +\item Create a configuration file like the following:
  34.122 +\begin{verbatim}
  34.123 +kernel = "/boot/vmlinuz-2.6-xenU"
  34.124 +memory = 64
  34.125 +name = "ttylinux"
  34.126 +nics = 1
  34.127 +ip = "1.2.3.4"
  34.128 +disk = ['file:/path/to/ttylinux/rootfs,sda1,w']
  34.129 +root = "/dev/sda1 ro"
  34.130 +\end{verbatim}
  34.131 +\item Now start the domain and connect to its console:
  34.132 +\begin{verbatim}
  34.133 +xm create configfile -c
  34.134 +\end{verbatim}
  34.135 +\item Login as root, password root.
  34.136 +\end{enumerate}
  34.137 +
  34.138 +
  34.139 +\section{Starting / Stopping Domains Automatically}
  34.140 +
  34.141 +It is possible to have certain domains start automatically at boot
  34.142 +time and to have dom0 wait for all running domains to shutdown before
  34.143 +it shuts down the system.
  34.144 +
  34.145 +To specify a domain is to start at boot-time, place its configuration
  34.146 +file (or a link to it) under \path{/etc/xen/auto/}.
  34.147 +
  34.148 +A Sys-V style init script for Red Hat and LSB-compliant systems is
  34.149 +provided and will be automatically copied to \path{/etc/init.d/}
  34.150 +during install.  You can then enable it in the appropriate way for
  34.151 +your distribution.
  34.152 +
  34.153 +For instance, on Red Hat:
  34.154 +
  34.155 +\begin{quote}
  34.156 +  \verb_# chkconfig --add xendomains_
  34.157 +\end{quote}
  34.158 +
  34.159 +By default, this will start the boot-time domains in runlevels 3, 4
  34.160 +and 5.
  34.161 +
  34.162 +You can also use the \path{service} command to run this script
  34.163 +manually, e.g:
  34.164 +
  34.165 +\begin{quote}
  34.166 +  \verb_# service xendomains start_
  34.167 +
  34.168 +  Starts all the domains with config files under /etc/xen/auto/.
  34.169 +\end{quote}
  34.170 +
  34.171 +\begin{quote}
  34.172 +  \verb_# service xendomains stop_
  34.173 +
  34.174 +  Shuts down ALL running Xen domains.
  34.175 +\end{quote}
    69.1 --- a/extras/mini-os/xenbus/xenbus_xs.c	Thu Sep 22 11:34:14 2005 -0600
    69.2 +++ b/extras/mini-os/xenbus/xenbus_xs.c	Thu Sep 22 11:42:01 2005 -0600
    69.3 @@ -127,7 +127,7 @@ static void *xs_talkv(enum xsd_sockmsg_t
    69.4  		return ERR_PTR(err);
    69.5  
    69.6  	for (i = 0; i < num_vecs; i++) {
    69.7 -		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
    69.8 +		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
    69.9  		if (err)
   69.10  			return ERR_PTR(err);
   69.11  	}
   150.1 --- a/linux-2.6-xen-sparse/arch/xen/Kconfig	Thu Sep 22 11:34:14 2005 -0600
   150.2 +++ b/linux-2.6-xen-sparse/arch/xen/Kconfig	Thu Sep 22 11:42:01 2005 -0600
   150.3 @@ -73,6 +73,8 @@ config XEN_NETDEV_BACKEND
   150.4  config XEN_TPMDEV_FRONTEND
   150.5          bool "TPM-device frontend driver"
   150.6          default n
   150.7 +	select TCG_TPM
   150.8 +	select TCG_XEN
   150.9          help
  150.10            The TPM-device frontend driver.
  150.11  
  150.12 @@ -109,13 +111,6 @@ config XEN_NETDEV_FRONTEND
  150.13  	  dedicated device-driver domain, or your master control domain
  150.14  	  (domain 0), then you almost certainly want to say Y here.
  150.15  
  150.16 -config XEN_NETDEV_GRANT
  150.17 -        bool "Grant table substrate for network drivers (DANGEROUS)"
  150.18 -        default n
  150.19 -        help
  150.20 -          This introduces the use of grant tables as a data exhange mechanism
  150.21 -          between the frontend and backend network drivers.
  150.22 -
  150.23  config XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
  150.24  	bool "Pipelined transmitter (DANGEROUS)"
  150.25  	depends on XEN_NETDEV_FRONTEND
   154.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32	Thu Sep 22 11:34:14 2005 -0600
   154.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32	Thu Sep 22 11:42:01 2005 -0600
   154.3 @@ -19,7 +19,6 @@ CONFIG_XEN_NETDEV_BACKEND=y
   154.4  # CONFIG_XEN_TPMDEV_BACKEND is not set
   154.5  CONFIG_XEN_BLKDEV_FRONTEND=y
   154.6  CONFIG_XEN_NETDEV_FRONTEND=y
   154.7 -CONFIG_XEN_NETDEV_GRANT=y
   154.8  # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
   154.9  # CONFIG_XEN_BLKDEV_TAP is not set
  154.10  # CONFIG_XEN_SHADOW_MODE is not set
   155.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64	Thu Sep 22 11:34:14 2005 -0600
   155.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64	Thu Sep 22 11:42:01 2005 -0600
   155.3 @@ -19,7 +19,6 @@ CONFIG_XEN_NETDEV_BACKEND=y
   155.4  # CONFIG_XEN_TPMDEV_BACKEND is not set
   155.5  CONFIG_XEN_BLKDEV_FRONTEND=y
   155.6  CONFIG_XEN_NETDEV_FRONTEND=y
   155.7 -CONFIG_XEN_NETDEV_GRANT=y
   155.8  # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
   155.9  # CONFIG_XEN_BLKDEV_TAP is not set
  155.10  # CONFIG_XEN_SHADOW_MODE is not set
   156.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32	Thu Sep 22 11:34:14 2005 -0600
   156.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32	Thu Sep 22 11:42:01 2005 -0600
   156.3 @@ -16,7 +16,6 @@ CONFIG_NO_IDLE_HZ=y
   156.4  # CONFIG_XEN_TPMDEV_BACKEND is not set
   156.5  CONFIG_XEN_BLKDEV_FRONTEND=y
   156.6  CONFIG_XEN_NETDEV_FRONTEND=y
   156.7 -CONFIG_XEN_NETDEV_GRANT=y
   156.8  # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
   156.9  # CONFIG_XEN_BLKDEV_TAP is not set
  156.10  # CONFIG_XEN_SHADOW_MODE is not set
   157.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64	Thu Sep 22 11:34:14 2005 -0600
   157.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64	Thu Sep 22 11:42:01 2005 -0600
   157.3 @@ -16,7 +16,6 @@ CONFIG_NO_IDLE_HZ=y
   157.4  # CONFIG_XEN_TPMDEV_BACKEND is not set
   157.5  CONFIG_XEN_BLKDEV_FRONTEND=y
   157.6  CONFIG_XEN_NETDEV_FRONTEND=y
   157.7 -CONFIG_XEN_NETDEV_GRANT=y
   157.8  # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
   157.9  # CONFIG_XEN_BLKDEV_TAP is not set
  157.10  # CONFIG_XEN_SHADOW_MODE is not set
   158.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32	Thu Sep 22 11:34:14 2005 -0600
   158.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32	Thu Sep 22 11:42:01 2005 -0600
   158.3 @@ -19,7 +19,6 @@ CONFIG_XEN_NETDEV_BACKEND=y
   158.4  # CONFIG_XEN_TPMDEV_BACKEND is not set
   158.5  CONFIG_XEN_BLKDEV_FRONTEND=y
   158.6  CONFIG_XEN_NETDEV_FRONTEND=y
   158.7 -CONFIG_XEN_NETDEV_GRANT=y
   158.8  # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
   158.9  # CONFIG_XEN_BLKDEV_TAP is not set
  158.10  # CONFIG_XEN_SHADOW_MODE is not set
  158.11 @@ -372,7 +371,7 @@ CONFIG_PNP=y
  158.12  #
  158.13  CONFIG_ISAPNP=y
  158.14  # CONFIG_PNPBIOS is not set
  158.15 -CONFIG_PNPACPI=y
  158.16 +# CONFIG_PNPACPI is not set
  158.17  
  158.18  #
  158.19  # Block devices
   159.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64	Thu Sep 22 11:34:14 2005 -0600
   159.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64	Thu Sep 22 11:42:01 2005 -0600
   159.3 @@ -19,7 +19,6 @@ CONFIG_XEN_NETDEV_BACKEND=y
   159.4  # CONFIG_XEN_TPMDEV_BACKEND is not set
   159.5  CONFIG_XEN_BLKDEV_FRONTEND=y
   159.6  CONFIG_XEN_NETDEV_FRONTEND=y
   159.7 -CONFIG_XEN_NETDEV_GRANT=y
   159.8  # CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
   159.9  # CONFIG_XEN_BLKDEV_TAP is not set
  159.10  # CONFIG_XEN_SHADOW_MODE is not set
   196.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c	Thu Sep 22 11:34:14 2005 -0600
   196.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c	Thu Sep 22 11:42:01 2005 -0600
   196.3 @@ -45,12 +45,12 @@ static int direct_remap_area_pte_fn(pte_
   196.4  	return 0;
   196.5  }
   196.6  
   196.7 -int direct_remap_pfn_range(struct mm_struct *mm,
   196.8 -			    unsigned long address, 
   196.9 -			    unsigned long mfn,
  196.10 -			    unsigned long size, 
  196.11 -			    pgprot_t prot,
  196.12 -			    domid_t  domid)
  196.13 +static int __direct_remap_pfn_range(struct mm_struct *mm,
  196.14 +				    unsigned long address, 
  196.15 +				    unsigned long mfn,
  196.16 +				    unsigned long size, 
  196.17 +				    pgprot_t prot,
  196.18 +				    domid_t  domid)
  196.19  {
  196.20  	int i;
  196.21  	unsigned long start_address;
  196.22 @@ -98,6 +98,20 @@ int direct_remap_pfn_range(struct mm_str
  196.23  	return 0;
  196.24  }
  196.25  
  196.26 +int direct_remap_pfn_range(struct vm_area_struct *vma,
  196.27 +			   unsigned long address, 
  196.28 +			   unsigned long mfn,
  196.29 +			   unsigned long size, 
  196.30 +			   pgprot_t prot,
  196.31 +			   domid_t  domid)
  196.32 +{
  196.33 +	/* Same as remap_pfn_range(). */
  196.34 +	vma->vm_flags |= VM_IO | VM_RESERVED;
  196.35 +
  196.36 +	return __direct_remap_pfn_range(
  196.37 +		vma->vm_mm, address, mfn, size, prot, domid);
  196.38 +}
  196.39 +
  196.40  EXPORT_SYMBOL(direct_remap_pfn_range);
  196.41  
  196.42  
  196.43 @@ -221,8 +235,9 @@ void __iomem * __ioremap(unsigned long p
  196.44  #ifdef __x86_64__
  196.45  	flags |= _PAGE_USER;
  196.46  #endif
  196.47 -	if (direct_remap_pfn_range(&init_mm, (unsigned long) addr, phys_addr>>PAGE_SHIFT,
  196.48 -				    size, __pgprot(flags), domid)) {
  196.49 +	if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
  196.50 +				     phys_addr>>PAGE_SHIFT,
  196.51 +				     size, __pgprot(flags), domid)) {
  196.52  		vunmap((void __force *) addr);
  196.53  		return NULL;
  196.54  	}
   199.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/pci/i386.c	Thu Sep 22 11:34:14 2005 -0600
   199.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/i386.c	Thu Sep 22 11:42:01 2005 -0600
   199.3 @@ -295,7 +295,7 @@ int pci_mmap_page_range(struct pci_dev *
   199.4  	/* Write-combine setting is ignored, it is changed via the mtrr
   199.5  	 * interfaces on this platform.
   199.6  	 */
   199.7 -	if (direct_remap_pfn_range(vma->vm_mm, vma->vm_start, vma->vm_pgoff,
   199.8 +	if (direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
   199.9  				   vma->vm_end - vma->vm_start,
  199.10  				   vma->vm_page_prot, DOMID_IO))
  199.11  		return -EAGAIN;
   202.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/devmem.c	Thu Sep 22 11:34:14 2005 -0600
   202.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/devmem.c	Thu Sep 22 11:42:01 2005 -0600
   202.3 @@ -90,22 +90,10 @@ out:
   202.4  
   202.5  static int mmap_mem(struct file * file, struct vm_area_struct * vma)
   202.6  {
   202.7 -	int uncached;
   202.8 -
   202.9 -	uncached = uncached_access(file);
  202.10 -	if (uncached)
  202.11 +	if (uncached_access(file))
  202.12  		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  202.13  
  202.14 -	/* Don't try to swap out physical pages.. */
  202.15 -	vma->vm_flags |= VM_RESERVED;
  202.16 -
  202.17 -	/*
  202.18 -	 * Don't dump addresses that are not real memory to a core file.
  202.19 -	 */
  202.20 -	if (uncached)
  202.21 -		vma->vm_flags |= VM_IO;
  202.22 -
  202.23 -	if (direct_remap_pfn_range(vma->vm_mm, vma->vm_start, vma->vm_pgoff,
  202.24 +	if (direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
  202.25  				   vma->vm_end - vma->vm_start,
  202.26  				   vma->vm_page_prot, DOMID_IO))
  202.27  		return -EAGAIN;
   205.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c	Thu Sep 22 11:34:14 2005 -0600
   205.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/gnttab.c	Thu Sep 22 11:42:01 2005 -0600
   205.3 @@ -182,14 +182,14 @@ gnttab_end_foreign_access(grant_ref_t re
   205.4  }
   205.5  
   205.6  int
   205.7 -gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
   205.8 +gnttab_grant_foreign_transfer(domid_t domid)
   205.9  {
  205.10      int ref;
  205.11  
  205.12      if ( unlikely((ref = get_free_entry()) == -1) )
  205.13          return -ENOSPC;
  205.14  
  205.15 -    shared[ref].frame = pfn;
  205.16 +    shared[ref].frame = 0;
  205.17      shared[ref].domid = domid;
  205.18      wmb();
  205.19      shared[ref].flags = GTF_accept_transfer;
  205.20 @@ -198,10 +198,9 @@ gnttab_grant_foreign_transfer(domid_t do
  205.21  }
  205.22  
  205.23  void
  205.24 -gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
  205.25 -				  unsigned long pfn)
  205.26 +gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid)
  205.27  {
  205.28 -    shared[ref].frame = pfn;
  205.29 +    shared[ref].frame = 0;
  205.30      shared[ref].domid = domid;
  205.31      wmb();
  205.32      shared[ref].flags = GTF_accept_transfer;
   206.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c	Thu Sep 22 11:34:14 2005 -0600
   206.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c	Thu Sep 22 11:42:01 2005 -0600
   206.3 @@ -334,7 +334,7 @@ static void shutdown_handler(struct xenb
   206.4  	return;
   206.5      }
   206.6  
   206.7 -    xenbus_write("control", "shutdown", "", O_CREAT);
   206.8 +    xenbus_write("control", "shutdown", "");
   206.9  
  206.10      err = xenbus_transaction_end(0);
  206.11      if (err == -ETIMEDOUT) {
   253.1 --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm.c	Thu Sep 22 11:34:14 2005 -0600
   253.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
   253.3 @@ -1,627 +0,0 @@
   253.4 -/*
   253.5 - * Copyright (C) 2004 IBM Corporation
   253.6 - *
   253.7 - * Authors:
   253.8 - * Leendert van Doorn <leendert@watson.ibm.com>
   253.9 - * Dave Safford <safford@watson.ibm.com>
  253.10 - * Reiner Sailer <sailer@watson.ibm.com>
  253.11 - * Kylene Hall <kjhall@us.ibm.com>
  253.12 - *
  253.13 - * Maintained by: <tpmdd_devel@lists.sourceforge.net>
  253.14 - *
  253.15 - * Device driver for TCG/TCPA TPM (trusted platform module).
  253.16 - * Specifications at www.trustedcomputinggroup.org
  253.17 - *
  253.18 - * This program is free software; you can redistribute it and/or
  253.19 - * modify it under the terms of the GNU General Public License as
  253.20 - * published by the Free Software Foundation, version 2 of the
  253.21 - * License.
  253.22 - *
  253.23 - * Note, the TPM chip is not interrupt driven (only polling)
  253.24 - * and can have very long timeouts (minutes!). Hence the unusual
  253.25 - * calls to schedule_timeout.
  253.26 - *
  253.27 - */
  253.28 -
  253.29 -#include <linux/sched.h>
  253.30 -#include <linux/poll.h>
  253.31 -#include <linux/spinlock.h>
  253.32 -#include "tpm.h"
  253.33 -
  253.34 -#define	TPM_MINOR			224	/* officially assigned */
  253.35 -
  253.36 -#define	TPM_BUFSIZE			2048
  253.37 -
  253.38 -static LIST_HEAD(tpm_chip_list);
  253.39 -static DEFINE_SPINLOCK(driver_lock);
  253.40 -static int dev_mask[32];
  253.41 -
  253.42 -static void user_reader_timeout(unsigned long ptr)
  253.43 -{
  253.44 -	struct tpm_chip *chip = (struct tpm_chip *) ptr;
  253.45 -
  253.46 -	down(&chip->buffer_mutex);
  253.47 -	atomic_set(&chip->data_pending, 0);
  253.48 -	memset(chip->data_buffer, 0, TPM_BUFSIZE);
  253.49 -	up(&chip->buffer_mutex);
  253.50 -}
  253.51 -
  253.52 -void tpm_time_expired(unsigned long ptr)
  253.53 -{
  253.54 -	int *exp = (int *) ptr;
  253.55 -	*exp = 1;
  253.56 -}
  253.57 -
  253.58 -EXPORT_SYMBOL_GPL(tpm_time_expired);
  253.59 -
  253.60 -/*
  253.61 - * Internal kernel interface to transmit TPM commands
  253.62 - */
  253.63 -static ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
  253.64 -			    size_t bufsiz)
  253.65 -{
  253.66 -	ssize_t len;
  253.67 -	u32 count;
  253.68 -	__be32 *native_size;
  253.69 -
  253.70 -	native_size = (__force __be32 *) (buf + 2);
  253.71 -	count = be32_to_cpu(*native_size);
  253.72 -
  253.73 -	if (count == 0)
  253.74 -		return -ENODATA;
  253.75 -	if (count > bufsiz) {
  253.76 -		dev_err(&chip->pci_dev->dev,
  253.77 -			"invalid count value %x %zx \n", count, bufsiz);
  253.78 -		return -E2BIG;
  253.79 -	}
  253.80 -
  253.81 -	down(&chip->tpm_mutex);
  253.82 -
  253.83 -	if ((len = chip->vendor->send(chip, (u8 *) buf, count)) < 0) {
  253.84 -		dev_err(&chip->pci_dev->dev,
  253.85 -			"tpm_transmit: tpm_send: error %zd\n", len);
  253.86 -		return len;
  253.87 -	}
  253.88 -
  253.89 -	down(&chip->timer_manipulation_mutex);
  253.90 -	chip->time_expired = 0;
  253.91 -	init_timer(&chip->device_timer);
  253.92 -	chip->device_timer.function = tpm_time_expired;
  253.93 -	chip->device_timer.expires = jiffies + 2 * 60 * HZ;
  253.94 -	chip->device_timer.data = (unsigned long) &chip->time_expired;
  253.95 -	add_timer(&chip->device_timer);
  253.96 -	up(&chip->timer_manipulation_mutex);
  253.97 -
  253.98 -	do {
  253.99 -		u8 status = inb(chip->vendor->base + 1);
 253.100 -		if ((status & chip->vendor->req_complete_mask) ==
 253.101 -		    chip->vendor->req_complete_val) {
 253.102 -			down(&chip->timer_manipulation_mutex);
 253.103 -			del_singleshot_timer_sync(&chip->device_timer);
 253.104 -			up(&chip->timer_manipulation_mutex);
 253.105 -			goto out_recv;
 253.106 -		}
 253.107 -		set_current_state(TASK_UNINTERRUPTIBLE);
 253.108 -		schedule_timeout(TPM_TIMEOUT);
 253.109 -		rmb();
 253.110 -	} while (!chip->time_expired);
 253.111 -
 253.112 -
 253.113 -	chip->vendor->cancel(chip);
 253.114 -	dev_err(&chip->pci_dev->dev, "Time expired\n");
 253.115 -	up(&chip->tpm_mutex);
 253.116 -	return -EIO;
 253.117 -
 253.118 -out_recv:
 253.119 -	len = chip->vendor->recv(chip, (u8 *) buf, bufsiz);
 253.120 -	if (len < 0)
 253.121 -		dev_err(&chip->pci_dev->dev,
 253.122 -			"tpm_transmit: tpm_recv: error %zd\n", len);
 253.123 -	up(&chip->tpm_mutex);
 253.124 -	return len;
 253.125 -}
 253.126 -
 253.127 -#define TPM_DIGEST_SIZE 20
 253.128 -#define CAP_PCR_RESULT_SIZE 18
 253.129 -static u8 cap_pcr[] = {
 253.130 -	0, 193,			/* TPM_TAG_RQU_COMMAND */
 253.131 -	0, 0, 0, 22,		/* length */
 253.132 -	0, 0, 0, 101,		/* TPM_ORD_GetCapability */
 253.133 -	0, 0, 0, 5,
 253.134 -	0, 0, 0, 4,
 253.135 -	0, 0, 1, 1
 253.136 -};
 253.137 -
 253.138 -#define READ_PCR_RESULT_SIZE 30
 253.139 -static u8 pcrread[] = {
 253.140 -	0, 193,			/* TPM_TAG_RQU_COMMAND */
 253.141 -	0, 0, 0, 14,		/* length */
 253.142 -	0, 0, 0, 21,		/* TPM_ORD_PcrRead */
 253.143 -	0, 0, 0, 0		/* PCR index */
 253.144 -};
 253.145 -
 253.146 -static ssize_t show_pcrs(struct device *dev, char *buf)
 253.147 -{
 253.148 -	u8 data[READ_PCR_RESULT_SIZE];
 253.149 -	ssize_t len;
 253.150 -	int i, j, index, num_pcrs;
 253.151 -	char *str = buf;
 253.152 -
 253.153 -	struct tpm_chip *chip =
 253.154 -	    pci_get_drvdata(container_of(dev, struct pci_dev, dev));
 253.155 -	if (chip == NULL)
 253.156 -		return -ENODEV;
 253.157 -
 253.158 -	memcpy(data, cap_pcr, sizeof(cap_pcr));
 253.159 -	if ((len = tpm_transmit(chip, data, sizeof(data)))
 253.160 -	    < CAP_PCR_RESULT_SIZE)
 253.161 -		return len;
 253.162 -
 253.163 -	num_pcrs = be32_to_cpu(*((__force __be32 *) (data + 14)));
 253.164 -
 253.165 -	for (i = 0; i < num_pcrs; i++) {
 253.166 -		memcpy(data, pcrread, sizeof(pcrread));
 253.167 -		index = cpu_to_be32(i);
 253.168 -		memcpy(data + 10, &index, 4);
 253.169 -		if ((len = tpm_transmit(chip, data, sizeof(data)))
 253.170 -		    < READ_PCR_RESULT_SIZE)
 253.171 -			return len;
 253.172 -		str += sprintf(str, "PCR-%02d: ", i);
 253.173 -		for (j = 0; j < TPM_DIGEST_SIZE; j++)
 253.174 -			str += sprintf(str, "%02X ", *(data + 10 + j));
 253.175 -		str += sprintf(str, "\n");
 253.176 -	}
 253.177 -	return str - buf;
 253.178 -}
 253.179 -
 253.180 -static DEVICE_ATTR(pcrs, S_IRUGO, show_pcrs, NULL);
 253.181 -
 253.182 -#define  READ_PUBEK_RESULT_SIZE 314
 253.183 -static u8 readpubek[] = {
 253.184 -	0, 193,			/* TPM_TAG_RQU_COMMAND */
 253.185 -	0, 0, 0, 30,		/* length */
 253.186 -	0, 0, 0, 124,		/* TPM_ORD_ReadPubek */
 253.187 -};
 253.188 -
 253.189 -static ssize_t show_pubek(struct device *dev, char *buf)
 253.190 -{
 253.191 -	u8 data[READ_PUBEK_RESULT_SIZE];
 253.192 -	ssize_t len;
 253.193 -	__be32 *native_val;
 253.194 -	int i;
 253.195 -	char *str = buf;
 253.196 -
 253.197 -	struct tpm_chip *chip =
 253.198 -	    pci_get_drvdata(container_of(dev, struct pci_dev, dev));
 253.199 -	if (chip == NULL)
 253.200 -		return -ENODEV;
 253.201 -
 253.202 -	memcpy(data, readpubek, sizeof(readpubek));
 253.203 -	memset(data + sizeof(readpubek), 0, 20);	/* zero nonce */
 253.204 -
 253.205 -	if ((len = tpm_transmit(chip, data, sizeof(data))) <
 253.206 -	    READ_PUBEK_RESULT_SIZE)
 253.207 -		return len;
 253.208 -
 253.209 -	/*
 253.210 -	   ignore header 10 bytes
 253.211 -	   algorithm 32 bits (1 == RSA )
 253.212 -	   encscheme 16 bits
 253.213 -	   sigscheme 16 bits
 253.214 -	   parameters (RSA 12->bytes: keybit, #primes, expbit)
 253.215 -	   keylenbytes 32 bits
 253.216 -	   256 byte modulus
 253.217 -	   ignore checksum 20 bytes
 253.218 -	 */
 253.219 -
 253.220 -	native_val = (__force __be32 *) (data + 34);
 253.221 -
 253.222 -	str +=
 253.223 -	    sprintf(str,
 253.224 -		    "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n"
 253.225 -		    "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X"
 253.226 -		    " %02X %02X %02X %02X %02X %02X %02X %02X\n"
 253.227 -		    "Modulus length: %d\nModulus: \n",
 253.228 -		    data[10], data[11], data[12], data[13], data[14],
 253.229 -		    data[15], data[16], data[17], data[22], data[23],
 253.230 -		    data[24], data[25], data[26], data[27], data[28],
 253.231 -		    data[29], data[30], data[31], data[32], data[33],
 253.232 -		    be32_to_cpu(*native_val)
 253.233 -	    );
 253.234 -
 253.235 -	for (i = 0; i < 256; i++) {
 253.236 -		str += sprintf(str, "%02X ", data[i + 39]);
 253.237 -		if ((i + 1) % 16 == 0)
 253.238 -			str += sprintf(str, "\n");
 253.239 -	}
 253.240 -	return str - buf;
 253.241 -}
 253.242 -
 253.243 -static DEVICE_ATTR(pubek, S_IRUGO, show_pubek, NULL);
 253.244 -
 253.245 -#define CAP_VER_RESULT_SIZE 18
 253.246 -static u8 cap_version[] = {
 253.247 -	0, 193,			/* TPM_TAG_RQU_COMMAND */
 253.248 -	0, 0, 0, 18,		/* length */
 253.249 -	0, 0, 0, 101,		/* TPM_ORD_GetCapability */
 253.250 -	0, 0, 0, 6,
 253.251 -	0, 0, 0, 0
 253.252 -};
 253.253 -
 253.254 -#define CAP_MANUFACTURER_RESULT_SIZE 18
 253.255 -static u8 cap_manufacturer[] = {
 253.256 -	0, 193,			/* TPM_TAG_RQU_COMMAND */
 253.257 -	0, 0, 0, 22,		/* length */
 253.258 -	0, 0, 0, 101,		/* TPM_ORD_GetCapability */
 253.259 -	0, 0, 0, 5,
 253.260 -	0, 0, 0, 4,
 253.261 -	0, 0, 1, 3
 253.262 -};
 253.263 -
 253.264 -static ssize_t show_caps(struct device *dev, char *buf)
 253.265 -{
 253.266 -	u8 data[READ_PUBEK_RESULT_SIZE];
 253.267 -	ssize_t len;
 253.268 -	char *str = buf;
 253.269 -
 253.270 -	struct tpm_chip *chip =
 253.271 -	    pci_get_drvdata(container_of(dev, struct pci_dev, dev));
 253.272 -	if (chip == NULL)
 253.273 -		return -ENODEV;
 253.274 -
 253.275 -	memcpy(data, cap_manufacturer, sizeof(cap_manufacturer));
 253.276 -
 253.277 -	if ((len = tpm_transmit(chip, data, sizeof(data))) <
 253.278 -	    CAP_MANUFACTURER_RESULT_SIZE)
 253.279 -		return len;
 253.280 -
 253.281 -	str += sprintf(str, "Manufacturer: 0x%x\n",
 253.282 -		       be32_to_cpu(*(data + 14)));
 253.283 -
 253.284 -	memcpy(data, cap_version, sizeof(cap_version));
 253.285 -
 253.286 -	if ((len = tpm_transmit(chip, data, sizeof(data))) <
 253.287 -	    CAP_VER_RESULT_SIZE)
 253.288 -		return len;
 253.289 -
 253.290 -	str +=
 253.291 -	    sprintf(str, "TCG version: %d.%d\nFirmware version: %d.%d\n",
 253.292 -		    (int) data[14], (int) data[15], (int) data[16],
 253.293 -		    (int) data[17]);
 253.294 -
 253.295 -	return str - buf;
 253.296 -}
 253.297 -
 253.298 -static DEVICE_ATTR(caps, S_IRUGO, show_caps, NULL);
 253.299 -
 253.300 -/*
 253.301 - * Device file system interface to the TPM
 253.302 - */
 253.303 -int tpm_open(struct inode *inode, struct file *file)
 253.304 -{
 253.305 -	int rc = 0, minor = iminor(inode);
 253.306 -	struct tpm_chip *chip = NULL, *pos;
 253.307 -
 253.308 -	spin_lock(&driver_lock);
 253.309 -
 253.310 -	list_for_each_entry(pos, &tpm_chip_list, list) {
 253.311 -		if (pos->vendor->miscdev.minor == minor) {
 253.312 -			chip = pos;
 253.313 -			break;
 253.314 -		}
 253.315 -	}
 253.316 -
 253.317 -	if (chip == NULL) {
 253.318 -		rc = -ENODEV;
 253.319 -		goto err_out;
 253.320 -	}
 253.321 -
 253.322 -	if (chip->num_opens) {
 253.323 -		dev_dbg(&chip->pci_dev->dev,
 253.324 -			"Another process owns this TPM\n");
 253.325 -		rc = -EBUSY;
 253.326 -		goto err_out;
 253.327 -	}
 253.328 -
 253.329 -	chip->num_opens++;
 253.330 -	pci_dev_get(chip->pci_dev);
 253.331 -
 253.332 -	spin_unlock(&driver_lock);
 253.333 -
 253.334 -	chip->data_buffer = kmalloc(TPM_BUFSIZE * sizeof(u8), GFP_KERNEL);
 253.335 -	if (chip->data_buffer == NULL) {
 253.336 -		chip->num_opens--;
 253.337 -		pci_dev_put(chip->pci_dev);
 253.338 -		return -ENOMEM;
 253.339 -	}
 253.340 -
 253.341 -	atomic_set(&chip->data_pending, 0);
 253.342 -
 253.343 -	file->private_data = chip;
 253.344 -	return 0;
 253.345 -
 253.346 -err_out:
 253.347 -	spin_unlock(&driver_lock);
 253.348 -	return rc;
 253.349 -}
 253.350 -
 253.351 -EXPORT_SYMBOL_GPL(tpm_open);
 253.352 -
 253.353 -int tpm_release(struct inode *inode, struct file *file)
 253.354 -{
 253.355 -	struct tpm_chip *chip = file->private_data;
 253.356 -
 253.357 -	file->private_data = NULL;
 253.358 -
 253.359 -	spin_lock(&driver_lock);
 253.360 -	chip->num_opens--;
 253.361 -	spin_unlock(&driver_lock);
 253.362 -
 253.363 -	down(&chip->timer_manipulation_mutex);
 253.364 -	if (timer_pending(&chip->user_read_timer))
 253.365 -		del_singleshot_timer_sync(&chip->user_read_timer);
 253.366 -	else if (timer_pending(&chip->device_timer))
 253.367 -		del_singleshot_timer_sync(&chip->device_timer);
 253.368 -	up(&chip->timer_manipulation_mutex);
 253.369 -
 253.370 -	kfree(chip->data_buffer);
 253.371 -	atomic_set(&chip->data_pending, 0);
 253.372 -
 253.373 -	pci_dev_put(chip->pci_dev);
 253.374 -	return 0;
 253.375 -}
 253.376 -
 253.377 -EXPORT_SYMBOL_GPL(tpm_release);
 253.378 -
 253.379 -ssize_t tpm_write(struct file * file, const char __user * buf,
 253.380 -		  size_t size, loff_t * off)
 253.381 -{
 253.382 -	struct tpm_chip *chip = file->private_data;
 253.383 -	int in_size = size, out_size;
 253.384 -
 253.385 -	/* cannot perform a write until the read has cleared
 253.386 -	   either via tpm_read or a user_read_timer timeout */
 253.387 -	while (atomic_read(&chip->data_pending) != 0) {
 253.388 -		set_current_state(TASK_UNINTERRUPTIBLE);
 253.389 -		schedule_timeout(TPM_TIMEOUT);
 253.390 -	}
 253.391 -
 253.392 -	down(&chip->buffer_mutex);
 253.393 -
 253.394 -	if (in_size > TPM_BUFSIZE)
 253.395 -		in_size = TPM_BUFSIZE;
 253.396 -
 253.397 -	if (copy_from_user
 253.398 -	    (chip->data_buffer, (void __user *) buf, in_size)) {
 253.399 -		up(&chip->buffer_mutex);
 253.400 -		return -EFAULT;
 253.401 -	}
 253.402 -
 253.403 -	/* atomic tpm command send and result receive */
 253.404 -	out_size = tpm_transmit(chip, chip->data_buffer, TPM_BUFSIZE);
 253.405 -
 253.406 -	atomic_set(&chip->data_pending, out_size);
 253.407 -	atomic_set(&chip->data_position, 0);
 253.408 -	up(&chip->buffer_mutex);
 253.409 -
 253.410 -	/* Set a timeout by which the reader must come claim the result */
 253.411 -	down(&chip->timer_manipulation_mutex);
 253.412 -	init_timer(&chip->user_read_timer);
 253.413 -	chip->user_read_timer.function = user_reader_timeout;
 253.414 -	chip->user_read_timer.data = (unsigned long) chip;
 253.415 -	chip->user_read_timer.expires = jiffies + (60 * HZ);
 253.416 -	add_timer(&chip->user_read_timer);
 253.417 -	up(&chip->timer_manipulation_mutex);
 253.418 -
 253.419 -	return in_size;
 253.420 -}
 253.421 -
 253.422 -EXPORT_SYMBOL_GPL(tpm_write);
 253.423 -
 253.424 -ssize_t tpm_read(struct file * file, char __user * buf,
 253.425 -		 size_t size, loff_t * off)
 253.426 -{
 253.427 -	struct tpm_chip *chip = file->private_data;
 253.428 -	int ret_size = -ENODATA;
 253.429 -	int pos, pending = 0;
 253.430 -
 253.431 -	down(&chip->buffer_mutex);
 253.432 -	ret_size = atomic_read(&chip->data_pending);
 253.433 -	if ( ret_size > 0 ) {	/* Result available */
 253.434 -		if (size < ret_size)
 253.435 -			ret_size = size;
 253.436 -
 253.437 -		pos = atomic_read(&chip->data_position);
 253.438 -
 253.439 -		if (copy_to_user((void __user *) buf,
 253.440 -				 &chip->data_buffer[pos], ret_size)) {
 253.441 -			ret_size = -EFAULT;
 253.442 -		} else {
 253.443 -			pending = atomic_read(&chip->data_pending) - ret_size;
 253.444 -			if ( pending ) {
 253.445 -				atomic_set( &chip->data_pending, pending );
 253.446 -				atomic_set( &chip->data_position, pos+ret_size );
 253.447 -			}
 253.448 -		}
 253.449 -	}
 253.450 -	up(&chip->buffer_mutex);
 253.451 -
 253.452 -	if ( ret_size <= 0 || pending == 0 ) {
 253.453 -		atomic_set( &chip->data_pending, 0 );
 253.454 -		down(&chip->timer_manipulation_mutex);
 253.455 -		del_singleshot_timer_sync(&chip->user_read_timer);
 253.456 -		up(&chip->timer_manipulation_mutex);
 253.457 -	}
 253.458 -
 253.459 -	return ret_size;
 253.460 -}
 253.461 -
 253.462 -EXPORT_SYMBOL_GPL(tpm_read);
 253.463 -
 253.464 -void __devexit tpm_remove(struct pci_dev *pci_dev)
 253.465 -{
 253.466 -	struct tpm_chip *chip = pci_get_drvdata(pci_dev);
 253.467 -
 253.468 -	if (chip == NULL) {
 253.469 -		dev_err(&pci_dev->dev, "No device data found\n");
 253.470 -		return;
 253.471 -	}
 253.472 -
 253.473 -	spin_lock(&driver_lock);
 253.474 -
 253.475 -	list_del(&chip->list);
 253.476 -
 253.477 -	spin_unlock(&driver_lock);
 253.478 -
 253.479 -	pci_set_drvdata(pci_dev, NULL);
 253.480 -	misc_deregister(&chip->vendor->miscdev);
 253.481 -
 253.482 -	device_remove_file(&pci_dev->dev, &dev_attr_pubek);
 253.483 -	device_remove_file(&pci_dev->dev, &dev_attr_pcrs);
 253.484 -	device_remove_file(&pci_dev->dev, &dev_attr_caps);
 253.485 -
 253.486 -	pci_disable_device(pci_dev);
 253.487 -
 253.488 -	dev_mask[chip->dev_num / 32] &= !(1 << (chip->dev_num % 32));
 253.489 -
 253.490 -	kfree(chip);
 253.491 -
 253.492 -	pci_dev_put(pci_dev);
 253.493 -}
 253.494 -
 253.495 -EXPORT_SYMBOL_GPL(tpm_remove);
 253.496 -
 253.497 -static u8 savestate[] = {
 253.498 -	0, 193,			/* TPM_TAG_RQU_COMMAND */
 253.499 -	0, 0, 0, 10,		/* blob length (in bytes) */
 253.500 -	0, 0, 0, 152		/* TPM_ORD_SaveState */
 253.501 -};
 253.502 -
 253.503 -/*
 253.504 - * We are about to suspend. Save the TPM state
 253.505 - * so that it can be restored.
 253.506 - */
 253.507 -int tpm_pm_suspend(struct pci_dev *pci_dev, pm_message_t pm_state)
 253.508 -{
 253.509 -	struct tpm_chip *chip = pci_get_drvdata(pci_dev);
 253.510 -	if (chip == NULL)
 253.511 -		return -ENODEV;
 253.512 -
 253.513 -	tpm_transmit(chip, savestate, sizeof(savestate));
 253.514 -	return 0;
 253.515 -}
 253.516 -
 253.517 -EXPORT_SYMBOL_GPL(tpm_pm_suspend);
 253.518 -
 253.519 -/*
 253.520 - * Resume from a power safe. The BIOS already restored
 253.521 - * the TPM state.
 253.522 - */
 253.523 -int tpm_pm_resume(struct pci_dev *pci_dev)
 253.524 -{
 253.525 -	struct tpm_chip *chip = pci_get_drvdata(pci_dev);
 253.526 -
 253.527 -	if (chip == NULL)
 253.528 -		return -ENODEV;
 253.529 -
 253.530 -	return 0;
 253.531 -}
 253.532 -
 253.533 -EXPORT_SYMBOL_GPL(tpm_pm_resume);
 253.534 -
 253.535 -/*
 253.536 - * Called from tpm_<specific>.c probe function only for devices
 253.537 - * the driver has determined it should claim.  Prior to calling
 253.538 - * this function the specific probe function has called pci_enable_device
 253.539 - * upon errant exit from this function specific probe function should call
 253.540 - * pci_disable_device
 253.541 - */
 253.542 -int tpm_register_hardware(struct pci_dev *pci_dev,
 253.543 -			  struct tpm_vendor_specific *entry)
 253.544 -{
 253.545 -	char devname[7];
 253.546 -	struct tpm_chip *chip;
 253.547 -	int i, j;
 253.548 -
 253.549 -	/* Driver specific per-device data */
 253.550 -	chip = kmalloc(sizeof(*chip), GFP_KERNEL);
 253.551 -	if (chip == NULL)
 253.552 -		return -ENOMEM;
 253.553 -
 253.554 -	memset(chip, 0, sizeof(struct tpm_chip));
 253.555 -
 253.556 -	init_MUTEX(&chip->buffer_mutex);
 253.557 -	init_MUTEX(&chip->tpm_mutex);
 253.558 -	init_MUTEX(&chip->timer_manipulation_mutex);
 253.559 -	INIT_LIST_HEAD(&chip->list);
 253.560 -
 253.561 -	chip->vendor = entry;
 253.562 -
 253.563 -	chip->dev_num = -1;
 253.564 -
 253.565 -	for (i = 0; i < 32; i++)
 253.566 -		for (j = 0; j < 8; j++)
 253.567 -			if ((dev_mask[i] & (1 << j)) == 0) {
 253.568 -				chip->dev_num = i * 32 + j;
 253.569 -				dev_mask[i] |= 1 << j;
 253.570 -				goto dev_num_search_complete;
 253.571 -			}
 253.572 -
 253.573 -dev_num_search_complete:
 253.574 -	if (chip->dev_num < 0) {
 253.575 -		dev_err(&pci_dev->dev,
 253.576 -			"No available tpm device numbers\n");
 253.577 -		kfree(chip);
 253.578 -		return -ENODEV;
 253.579 -	} else if (chip->dev_num == 0)
 253.580 -		chip->vendor->miscdev.minor = TPM_MINOR;
 253.581 -	else
 253.582 -		chip->vendor->miscdev.minor = MISC_DYNAMIC_MINOR;
 253.583 -
 253.584 -	snprintf(devname, sizeof(devname), "%s%d", "tpm", chip->dev_num);
 253.585 -	chip->vendor->miscdev.name = devname;
 253.586 -
 253.587 -	chip->vendor->miscdev.dev = &(pci_dev->dev);
 253.588 -	chip->pci_dev = pci_