ia64/xen-unstable

changeset 4198:8ba8ae4eba9b

bitkeeper revision 1.1236.1.79 (423a4d121851oHDLywIdWZDzRRlchA)

Merge ssh://freefall.cl.cam.ac.uk//auto/groups/xeno/BK/xeno.bk
into equilibrium.research:/export/scratch/xeno-usbfixes.bk
author mwilli2@equilibrium.research
date Fri Mar 18 03:37:54 2005 +0000 (2005-03-18)
parents f234096eb41e fe8583803c7d
children 83ac50ad8b2d
files .rootkeys BitKeeper/etc/ignore BitKeeper/etc/logging_ok linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c linux-2.4.29-xen-sparse/arch/xen/kernel/setup.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile linux-2.6.11-xen-sparse/arch/xen/i386/mm/highmem.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/highmem.h linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h patches/linux-2.6.11/agpgart.patch patches/linux-2.6.11/iomap.patch tools/blktap/Makefile tools/blktap/blockstore.c tools/blktap/blockstore.h tools/blktap/blockstored.c tools/blktap/bstest.c tools/examples/xend-config.sxp tools/libxc/Makefile tools/libxc/xc_linux_build.c tools/libxc/xc_linux_restore.c tools/libxc/xc_plan9_build.c tools/libxc/xc_ptrace.c tools/libxc/xc_vmx_build.c tools/misc/xend tools/python/xen/xend/Args.py tools/python/xen/xend/XendRoot.py tools/python/xen/xend/server/SrvBase.py tools/python/xen/xend/server/SrvDaemon.py tools/python/xen/xend/server/SrvDir.py tools/python/xen/xend/server/console.py tools/python/xen/xend/server/params.py tools/tests/test_x86_emulator.c tools/xcs/evtchn.c tools/xcs/xcs.c xen/Rules.mk xen/arch/ia64/domain.c xen/arch/x86/domain.c xen/arch/x86/mm.c xen/arch/x86/smp.c xen/arch/x86/traps.c xen/arch/x86/x86_32/asm-offsets.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_64/asm-offsets.c xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_emulate.c xen/common/dom0_ops.c xen/common/dom_mem_ops.c xen/common/domain.c xen/common/elf.c xen/common/page_alloc.c xen/common/xmalloc.c xen/include/asm-x86/desc.h xen/include/asm-x86/mm.h xen/include/asm-x86/processor.h xen/include/asm-x86/x86_emulate.h xen/include/public/arch-x86_32.h xen/include/public/arch-x86_64.h xen/include/public/dom0_ops.h xen/include/public/io/domain_controller.h xen/include/public/xen.h xen/include/xen/domain.h xen/include/xen/irq_cpustat.h xen/include/xen/perfc_defn.h xen/include/xen/sched.h xen/include/xen/softirq.h
line diff
     1.1 --- a/.rootkeys	Sat Mar 12 21:43:58 2005 +0000
     1.2 +++ b/.rootkeys	Fri Mar 18 03:37:54 2005 +0000
     1.3 @@ -172,7 +172,6 @@ 4118cc35CbY8rfGVspF5O-7EkXBEAA linux-2.6
     1.4  40f562383SKvDStdtrvzr5fyCbW4rw linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c
     1.5  40f56239xcNylAxuGsQHwi1AyMLV8w linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c
     1.6  41062ab7CjxC1UBaFhOMWWdhHkIUyg linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c
     1.7 -413b5ab8LIowAnQrEmaOJSdmqm96jQ linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c
     1.8  40f5623906UYHv1rsVUeRc0tFT0dWw linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c
     1.9  4107adf12ndy94MidCaivDibJ3pPAg linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile
    1.10  4107adf1WcCgkhsdLTRGX52cOG1vJg linux-2.6.11-xen-sparse/arch/xen/i386/pci/direct.c
    1.11 @@ -316,6 +315,7 @@ 422e4430vKaHLOOGS7X-SUUe3EBCgw netbsd-2.
    1.12  422e4430-gOD358H8nGGnNWes08Nng netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs_vnops.c
    1.13  413cb3b53nyOv1OIeDSsCXhBFDXvJA netbsd-2.0-xen-sparse/sys/nfs/files.nfs
    1.14  413aa1d0oNP8HXLvfPuMe6cSroUfSA patches/linux-2.6.11/agpgart.patch
    1.15 +42372652KCUP-IOH9RN19YQmGhs4aA patches/linux-2.6.11/iomap.patch
    1.16  3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile
    1.17  40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Rules.mk
    1.18  4209033eUwhDBJ_bxejiv5c6gjXS4A tools/blktap/Makefile
    1.19 @@ -341,6 +341,8 @@ 42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blk
    1.20  42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h
    1.21  42277b02WrfP1meTDPv1M5swFq8oHQ tools/blktap/blockstore.c
    1.22  42277b02P1C0FYj3gqwTZUD8sxKCug tools/blktap/blockstore.h
    1.23 +42371b8aL1JsxAXOd4bBhmZKDyjiJg tools/blktap/blockstored.c
    1.24 +42371b8aD_x3L9MKsXciMNqkuk58eQ tools/blktap/bstest.c
    1.25  42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile
    1.26  42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c
    1.27  42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c
    1.28 @@ -572,6 +574,7 @@ 4051bce6CHAsYh8P5t2OHDtRWOP9og tools/lib
    1.29  41cc934aO1m6NxEh_8eDr9bJIMoLFA tools/libxc/xc_plan9_build.c
    1.30  3fbba6dctWRWlFJkYb6hdix2X4WMuw tools/libxc/xc_private.c
    1.31  3fbba6dcbVrG2hPzEzwdeV_UC8kydQ tools/libxc/xc_private.h
    1.32 +42337174PxyzzPk62raDiYCIsfStDg tools/libxc/xc_ptrace.c
    1.33  40589968UQFnJeOMn8UIFLbXBuwXjw tools/libxc/xc_rrobin.c
    1.34  41dde8b0pLfAKMs_L9Uri2hnzHiCRQ tools/libxc/xc_vmx_build.c
    1.35  40e1b09dMYB4ItGCqcMIzirdMd9I-w tools/libxutil/Makefile
     2.1 --- a/BitKeeper/etc/ignore	Sat Mar 12 21:43:58 2005 +0000
     2.2 +++ b/BitKeeper/etc/ignore	Fri Mar 18 03:37:54 2005 +0000
     2.3 @@ -124,3 +124,5 @@ tools/blktap/vdi_validate
     2.4  tools/blktap/xen/*
     2.5  tools/cmdline/*
     2.6  tools/tests/test_x86_emulator
     2.7 +tools/blktap/blockstored
     2.8 +tools/blktap/bstest
     3.1 --- a/BitKeeper/etc/logging_ok	Sat Mar 12 21:43:58 2005 +0000
     3.2 +++ b/BitKeeper/etc/logging_ok	Fri Mar 18 03:37:54 2005 +0000
     3.3 @@ -33,6 +33,7 @@ iap10@pb001.cl.cam.ac.uk
     3.4  iap10@pb007.cl.cam.ac.uk
     3.5  iap10@striker.cl.cam.ac.uk
     3.6  iap10@tetris.cl.cam.ac.uk
     3.7 +jrb44@plym.cl.cam.ac.uk
     3.8  jws22@gauntlet.cl.cam.ac.uk
     3.9  jws@cairnwell.research
    3.10  kaf24@camelot.eng.3leafnetworks.com
    3.11 @@ -46,6 +47,7 @@ kaf24@plym.cl.cam.ac.uk
    3.12  kaf24@scramble.cl.cam.ac.uk
    3.13  kaf24@striker.cl.cam.ac.uk
    3.14  kaf24@viper.(none)
    3.15 +kmacy@shemp.lab.netapp.com
    3.16  laudney@eclipse.(none)
    3.17  lynx@idefix.cl.cam.ac.uk
    3.18  maf46@burn.cl.cam.ac.uk
    3.19 @@ -80,5 +82,6 @@ tlh20@labyrinth.cl.cam.ac.uk
    3.20  tw275@labyrinth.cl.cam.ac.uk
    3.21  tw275@striker.cl.cam.ac.uk
    3.22  vh249@airwolf.cl.cam.ac.uk
    3.23 +vh249@arcadians.cl.cam.ac.uk
    3.24  xen-ia64.adm@bkbits.net
    3.25  xenbk@gandalf.hpl.hp.com
     4.1 --- a/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c	Sat Mar 12 21:43:58 2005 +0000
     4.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c	Fri Mar 18 03:37:54 2005 +0000
     4.3 @@ -442,11 +442,11 @@ void xlvbd_update_vbds(void)
     4.4      old_nr   = nr_vbds;
     4.5  
     4.6      new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
     4.7 +    if (!new_info)
     4.8 +        return;
     4.9 +
    4.10      if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
    4.11 -    {
    4.12 -        kfree(new_info);
    4.13 -        return;
    4.14 -    }
    4.15 +        goto out;
    4.16  
    4.17      /*
    4.18       * Final list maximum size is old list + new list. This occurs only when
    4.19 @@ -454,6 +454,8 @@ void xlvbd_update_vbds(void)
    4.20       * VBDs in the old list because the usage counts are busy.
    4.21       */
    4.22      merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
    4.23 +    if (!merged_info)
    4.24 +        goto out;
    4.25  
    4.26      /* @i tracks old list; @j tracks new list; @k tracks merged list. */
    4.27      i = j = k = 0;
    4.28 @@ -500,6 +502,7 @@ void xlvbd_update_vbds(void)
    4.29      nr_vbds  = k;
    4.30  
    4.31      kfree(old_info);
    4.32 +out:
    4.33      kfree(new_info);
    4.34  }
    4.35  
    4.36 @@ -543,6 +546,9 @@ int xlvbd_init(void)
    4.37      }
    4.38  
    4.39      vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
    4.40 +    if (!vbd_info)
    4.41 +        return -ENOMEM;
    4.42 +
    4.43      nr_vbds  = xlvbd_get_vbd_info(vbd_info);
    4.44  
    4.45      if ( nr_vbds < 0 )
     5.1 --- a/linux-2.4.29-xen-sparse/arch/xen/kernel/setup.c	Sat Mar 12 21:43:58 2005 +0000
     5.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/kernel/setup.c	Fri Mar 18 03:37:54 2005 +0000
     5.3 @@ -377,15 +377,31 @@ void __init setup_arch(char **cmdline_p)
     5.4  
     5.5      paging_init();
     5.6  
     5.7 -    /* Make sure we have a large enough P->M table. */
     5.8 -    if ( max_pfn > xen_start_info.nr_pages )
     5.9 +    /* Make sure we have a correctly sized P->M table. */
    5.10 +    if ( max_pfn != xen_start_info.nr_pages )
    5.11      {
    5.12          phys_to_machine_mapping = alloc_bootmem_low_pages(
    5.13              max_pfn * sizeof(unsigned long));
    5.14 -        memset(phys_to_machine_mapping, ~0, max_pfn * sizeof(unsigned int));
    5.15 -        memcpy(phys_to_machine_mapping,
    5.16 -               (unsigned long *)xen_start_info.mfn_list,
    5.17 -               xen_start_info.nr_pages * sizeof(unsigned long));
    5.18 +        if ( max_pfn > xen_start_info.nr_pages )
    5.19 +        {
    5.20 +            memset(phys_to_machine_mapping, ~0,
    5.21 +                   max_pfn * sizeof(unsigned long));
    5.22 +            memcpy(phys_to_machine_mapping,
    5.23 +                   (unsigned long *)xen_start_info.mfn_list,
    5.24 +                   xen_start_info.nr_pages * sizeof(unsigned long));
    5.25 +        }
    5.26 +        else
    5.27 +        {
    5.28 +            memcpy(phys_to_machine_mapping,
    5.29 +                   (unsigned long *)xen_start_info.mfn_list,
    5.30 +                   max_pfn * sizeof(unsigned long));
    5.31 +            if (HYPERVISOR_dom_mem_op(
    5.32 +                MEMOP_decrease_reservation,
    5.33 +                (unsigned long *)xen_start_info.mfn_list + max_pfn,
    5.34 +                xen_start_info.nr_pages - max_pfn, 0) !=
    5.35 +                (xen_start_info.nr_pages - max_pfn))
    5.36 +                BUG();
    5.37 +        }
    5.38          free_bootmem(__pa(xen_start_info.mfn_list), 
    5.39                       PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
    5.40                                       sizeof(unsigned long))));
     6.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c	Sat Mar 12 21:43:58 2005 +0000
     6.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c	Fri Mar 18 03:37:54 2005 +0000
     6.3 @@ -41,6 +41,7 @@
     6.4  #include <linux/init.h>
     6.5  #include <linux/edd.h>
     6.6  #include <linux/percpu.h>
     6.7 +#include <linux/notifier.h>
     6.8  #include <video/edid.h>
     6.9  #include <asm/e820.h>
    6.10  #include <asm/mpspec.h>
    6.11 @@ -57,6 +58,15 @@
    6.12  /* Allows setting of maximum possible memory size  */
    6.13  static unsigned long xen_override_max_pfn;
    6.14  
    6.15 +extern struct notifier_block *panic_notifier_list;
    6.16 +static int xen_panic_event(struct notifier_block *, unsigned long, void *);
    6.17 +static struct notifier_block xen_panic_block = {
    6.18 +	xen_panic_event,
    6.19 +        NULL,
    6.20 +        0 /* try to go last */
    6.21 +};
    6.22 +
    6.23 +
    6.24  int disable_pse __initdata = 0;
    6.25  
    6.26  /*
    6.27 @@ -1398,6 +1408,9 @@ void __init setup_arch(char **cmdline_p)
    6.28  	if ( panic_timeout == 0 )
    6.29  		panic_timeout = 1;
    6.30  
    6.31 +	/* Register a call for panic conditions. */
    6.32 +	notifier_chain_register(&panic_notifier_list, &xen_panic_block);
    6.33 +
    6.34  	HYPERVISOR_vm_assist(VMASST_CMD_enable,
    6.35  			     VMASST_TYPE_4gb_segments);
    6.36  
    6.37 @@ -1492,15 +1505,26 @@ void __init setup_arch(char **cmdline_p)
    6.38  #endif
    6.39  	paging_init();
    6.40  
    6.41 -	/* Make sure we have a large enough P->M table. */
    6.42 -	if (max_pfn > xen_start_info.nr_pages) {
    6.43 +	/* Make sure we have a correctly sized P->M table. */
    6.44 +	if (max_pfn != xen_start_info.nr_pages) {
    6.45  		phys_to_machine_mapping = alloc_bootmem_low_pages(
    6.46  			max_pfn * sizeof(unsigned long));
    6.47 -		memset(phys_to_machine_mapping, ~0,
    6.48 -			max_pfn * sizeof(unsigned long));
    6.49 -		memcpy(phys_to_machine_mapping,
    6.50 -			(unsigned long *)xen_start_info.mfn_list,
    6.51 -			xen_start_info.nr_pages * sizeof(unsigned long));
    6.52 +		if (max_pfn > xen_start_info.nr_pages) {
    6.53 +			memset(phys_to_machine_mapping, ~0,
    6.54 +				max_pfn * sizeof(unsigned long));
    6.55 +			memcpy(phys_to_machine_mapping,
    6.56 +				(unsigned long *)xen_start_info.mfn_list,
    6.57 +				xen_start_info.nr_pages * sizeof(unsigned long));
    6.58 +		} else {
    6.59 +			memcpy(phys_to_machine_mapping,
    6.60 +				(unsigned long *)xen_start_info.mfn_list,
    6.61 +				max_pfn * sizeof(unsigned long));
    6.62 +			if (HYPERVISOR_dom_mem_op(
    6.63 +				MEMOP_decrease_reservation,
    6.64 +				(unsigned long *)xen_start_info.mfn_list + max_pfn,
    6.65 +				xen_start_info.nr_pages - max_pfn, 0) !=
    6.66 +			    (xen_start_info.nr_pages - max_pfn)) BUG();
    6.67 +		}
    6.68  		free_bootmem(
    6.69  			__pa(xen_start_info.mfn_list), 
    6.70  			PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
    6.71 @@ -1598,6 +1622,16 @@ void __init setup_arch(char **cmdline_p)
    6.72  	}
    6.73  }
    6.74  
    6.75 +
    6.76 +static int
    6.77 +xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
    6.78 +{
    6.79 +     HYPERVISOR_crash();    
    6.80 +     /* we're never actually going to get here... */
    6.81 +     return NOTIFY_DONE;
    6.82 +}
    6.83 +
    6.84 +
    6.85  #include "setup_arch_post.h"
    6.86  /*
    6.87   * Local Variables:
     7.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile	Sat Mar 12 21:43:58 2005 +0000
     7.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile	Fri Mar 18 03:37:54 2005 +0000
     7.3 @@ -6,8 +6,8 @@ XENARCH	:= $(subst ",,$(CONFIG_XENARCH))
     7.4  
     7.5  CFLAGS	+= -Iarch/$(XENARCH)/mm
     7.6  
     7.7 -obj-y	:= init.o pgtable.o fault.o ioremap.o pageattr.o hypervisor.o
     7.8 -c-obj-y	:= extable.o mmap.o
     7.9 +obj-y	:= init.o pgtable.o fault.o ioremap.o hypervisor.o
    7.10 +c-obj-y	:= extable.o mmap.o pageattr.o
    7.11  
    7.12  c-obj-$(CONFIG_DISCONTIGMEM)	+= discontig.o
    7.13  c-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
     8.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/highmem.c	Sat Mar 12 21:43:58 2005 +0000
     8.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/highmem.c	Fri Mar 18 03:37:54 2005 +0000
     8.3 @@ -25,7 +25,7 @@ void kunmap(struct page *page)
     8.4   * However when holding an atomic kmap is is not legal to sleep, so atomic
     8.5   * kmaps are appropriate for short, tight code paths only.
     8.6   */
     8.7 -void *kmap_atomic(struct page *page, enum km_type type)
     8.8 +static void *__kmap_atomic(struct page *page, enum km_type type, pgprot_t prot)
     8.9  {
    8.10  	enum fixed_addresses idx;
    8.11  	unsigned long vaddr;
    8.12 @@ -41,33 +41,21 @@ void *kmap_atomic(struct page *page, enu
    8.13  	if (!pte_none(*(kmap_pte-idx)))
    8.14  		BUG();
    8.15  #endif
    8.16 -	set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
    8.17 +	set_pte(kmap_pte-idx, mk_pte(page, prot));
    8.18  	__flush_tlb_one(vaddr);
    8.19  
    8.20  	return (void*) vaddr;
    8.21  }
    8.22  
    8.23 -/* Same as kmap_atomic but with PAGE_KERNEL_RO page protection */
    8.24 +void *kmap_atomic(struct page *page, enum km_type type)
    8.25 +{
    8.26 +	return __kmap_atomic(page, type, kmap_prot);
    8.27 +}
    8.28 +
    8.29 +/* Same as kmap_atomic but with PAGE_KERNEL_RO page protection. */
    8.30  void *kmap_atomic_pte(struct page *page, enum km_type type)
    8.31  {
    8.32 -	enum fixed_addresses idx;
    8.33 -	unsigned long vaddr;
    8.34 -
    8.35 -	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
    8.36 -	inc_preempt_count();
    8.37 -	if (page < highmem_start_page)
    8.38 -		return page_address(page);
    8.39 -
    8.40 -	idx = type + KM_TYPE_NR*smp_processor_id();
    8.41 -	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
    8.42 -#ifdef CONFIG_DEBUG_HIGHMEM
    8.43 -	if (!pte_none(*(kmap_pte-idx)))
    8.44 -		BUG();
    8.45 -#endif
    8.46 -	set_pte(kmap_pte-idx, mk_pte(page, PAGE_KERNEL_RO));
    8.47 -	__flush_tlb_one(vaddr);
    8.48 -
    8.49 -	return (void*) vaddr;
    8.50 +	return __kmap_atomic(page, type, PAGE_KERNEL_RO);
    8.51  }
    8.52  
    8.53  void kunmap_atomic(void *kvaddr, enum km_type type)
    8.54 @@ -97,31 +85,6 @@ void kunmap_atomic(void *kvaddr, enum km
    8.55  	preempt_check_resched();
    8.56  }
    8.57  
    8.58 -void kunmap_atomic_force(void *kvaddr, enum km_type type)
    8.59 -{
    8.60 -	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
    8.61 -	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
    8.62 -
    8.63 -	if (vaddr < FIXADDR_START) { // FIXME
    8.64 -		dec_preempt_count();
    8.65 -		preempt_check_resched();
    8.66 -		return;
    8.67 -	}
    8.68 -
    8.69 -	if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
    8.70 -		BUG();
    8.71 -
    8.72 -	/*
    8.73 -	 * force other mappings to Oops if they'll try to access
    8.74 -	 * this pte without first remap it
    8.75 -	 */
    8.76 -	pte_clear(kmap_pte-idx);
    8.77 -	__flush_tlb_one(vaddr);
    8.78 -
    8.79 -	dec_preempt_count();
    8.80 -	preempt_check_resched();
    8.81 -}
    8.82 -
    8.83  struct page *kmap_atomic_to_page(void *ptr)
    8.84  {
    8.85  	unsigned long idx, vaddr = (unsigned long)ptr;
     9.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c	Sat Mar 12 21:43:58 2005 +0000
     9.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c	Fri Mar 18 03:37:54 2005 +0000
     9.3 @@ -60,9 +60,9 @@ static spinlock_t update_lock = SPIN_LOC
     9.4  #define QUEUE_SIZE 1
     9.5  #else
     9.6  #define QUEUE_SIZE 128
     9.7 +#endif
     9.8  #define pmd_val_ma(v) (v).pud.pgd.pgd;
     9.9  #endif
    9.10 -#endif
    9.11  
    9.12  DEFINE_PER_CPU(mmu_update_t, update_queue[QUEUE_SIZE]);
    9.13  DEFINE_PER_CPU(unsigned int, mmu_update_queue_idx);
    10.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c	Sat Mar 12 21:43:58 2005 +0000
    10.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.3 @@ -1,226 +0,0 @@
    10.4 -/* 
    10.5 - * Copyright 2002 Andi Kleen, SuSE Labs. 
    10.6 - * Thanks to Ben LaHaise for precious feedback.
    10.7 - */ 
    10.8 -
    10.9 -#include <linux/config.h>
   10.10 -#include <linux/mm.h>
   10.11 -#include <linux/sched.h>
   10.12 -#include <linux/highmem.h>
   10.13 -#include <linux/module.h>
   10.14 -#include <linux/slab.h>
   10.15 -#include <asm/uaccess.h>
   10.16 -#include <asm/processor.h>
   10.17 -#include <asm/tlbflush.h>
   10.18 -
   10.19 -static DEFINE_SPINLOCK(cpa_lock);
   10.20 -static struct list_head df_list = LIST_HEAD_INIT(df_list);
   10.21 -
   10.22 -
   10.23 -pte_t *lookup_address(unsigned long address) 
   10.24 -{ 
   10.25 -	pgd_t *pgd = pgd_offset_k(address);
   10.26 -	pud_t *pud;
   10.27 -	pmd_t *pmd;
   10.28 -	if (pgd_none(*pgd))
   10.29 -		return NULL;
   10.30 -	pud = pud_offset(pgd, address);
   10.31 -	if (pud_none(*pud))
   10.32 -		return NULL;
   10.33 -	pmd = pmd_offset(pud, address);
   10.34 -	if (pmd_none(*pmd))
   10.35 -		return NULL;
   10.36 -	if (pmd_large(*pmd))
   10.37 -		return (pte_t *)pmd;
   10.38 -        return pte_offset_kernel(pmd, address);
   10.39 -} 
   10.40 -
   10.41 -static struct page *split_large_page(unsigned long address, pgprot_t prot)
   10.42 -{ 
   10.43 -	int i; 
   10.44 -	unsigned long addr;
   10.45 -	struct page *base;
   10.46 -	pte_t *pbase;
   10.47 -
   10.48 -	spin_unlock_irq(&cpa_lock);
   10.49 -	base = alloc_pages(GFP_KERNEL, 0);
   10.50 -	spin_lock_irq(&cpa_lock);
   10.51 -	if (!base) 
   10.52 -		return NULL;
   10.53 -
   10.54 -	address = __pa(address);
   10.55 -	addr = address & LARGE_PAGE_MASK; 
   10.56 -	pbase = (pte_t *)page_address(base);
   10.57 -	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
   10.58 -		pbase[i] = pfn_pte(addr >> PAGE_SHIFT, 
   10.59 -				   addr == address ? prot : PAGE_KERNEL);
   10.60 -	}
   10.61 -	return base;
   10.62 -} 
   10.63 -
   10.64 -static void flush_kernel_map(void *dummy) 
   10.65 -{ 
   10.66 -	/* Could use CLFLUSH here if the CPU supports it (Hammer,P4) */
   10.67 -	if (boot_cpu_data.x86_model >= 4) 
   10.68 -		wbinvd();
   10.69 -	/* Flush all to work around Errata in early athlons regarding 
   10.70 -	 * large page flushing. 
   10.71 -	 */
   10.72 -	__flush_tlb_all(); 	
   10.73 -}
   10.74 -
   10.75 -static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 
   10.76 -{ 
   10.77 -	struct page *page;
   10.78 -	unsigned long flags;
   10.79 -
   10.80 -	set_pte_atomic(kpte, pte); 	/* change init_mm */
   10.81 -	if (PTRS_PER_PMD > 1)
   10.82 -		return;
   10.83 -
   10.84 -	spin_lock_irqsave(&pgd_lock, flags);
   10.85 -	for (page = pgd_list; page; page = (struct page *)page->index) {
   10.86 -		pgd_t *pgd;
   10.87 -		pud_t *pud;
   10.88 -		pmd_t *pmd;
   10.89 -		pgd = (pgd_t *)page_address(page) + pgd_index(address);
   10.90 -		pud = pud_offset(pgd, address);
   10.91 -		pmd = pmd_offset(pud, address);
   10.92 -		set_pte_atomic((pte_t *)pmd, pte);
   10.93 -	}
   10.94 -	spin_unlock_irqrestore(&pgd_lock, flags);
   10.95 -}
   10.96 -
   10.97 -/* 
   10.98 - * No more special protections in this 2/4MB area - revert to a
   10.99 - * large page again. 
  10.100 - */
  10.101 -static inline void revert_page(struct page *kpte_page, unsigned long address)
  10.102 -{
  10.103 -	pte_t *linear = (pte_t *) 
  10.104 -		pmd_offset(pud_offset(pgd_offset_k(address), address), address);
  10.105 -	set_pmd_pte(linear,  address,
  10.106 -		    pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
  10.107 -			    PAGE_KERNEL_LARGE));
  10.108 -}
  10.109 -
  10.110 -static int
  10.111 -__change_page_attr(struct page *page, pgprot_t prot)
  10.112 -{ 
  10.113 -	pte_t *kpte; 
  10.114 -	unsigned long address;
  10.115 -	struct page *kpte_page;
  10.116 -
  10.117 -	BUG_ON(PageHighMem(page));
  10.118 -	address = (unsigned long)page_address(page);
  10.119 -
  10.120 -	kpte = lookup_address(address);
  10.121 -	if (!kpte)
  10.122 -		return -EINVAL;
  10.123 -	kpte_page = virt_to_page(kpte);
  10.124 -	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { 
  10.125 -		if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
  10.126 -			set_pte_batched(kpte, mk_pte(page, prot)); 
  10.127 -		} else {
  10.128 -			struct page *split = split_large_page(address, prot); 
  10.129 -			if (!split)
  10.130 -				return -ENOMEM;
  10.131 -			set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL));
  10.132 -			kpte_page = split;
  10.133 -		}	
  10.134 -		get_page(kpte_page);
  10.135 -	} else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { 
  10.136 -		set_pte_batched(kpte, mk_pte(page, PAGE_KERNEL));
  10.137 -		__put_page(kpte_page);
  10.138 -	} else
  10.139 -		BUG();
  10.140 -
  10.141 -	/*
  10.142 -	 * If the pte was reserved, it means it was created at boot
  10.143 -	 * time (not via split_large_page) and in turn we must not
  10.144 -	 * replace it with a largepage.
  10.145 -	 */
  10.146 -	if (!PageReserved(kpte_page)) {
  10.147 -		/* memleak and potential failed 2M page regeneration */
  10.148 -		BUG_ON(!page_count(kpte_page));
  10.149 -
  10.150 -		if (cpu_has_pse && (page_count(kpte_page) == 1)) {
  10.151 -			list_add(&kpte_page->lru, &df_list);
  10.152 -			revert_page(kpte_page, address);
  10.153 -		}
  10.154 -	}
  10.155 -	return 0;
  10.156 -} 
  10.157 -
  10.158 -static inline void flush_map(void)
  10.159 -{
  10.160 -	on_each_cpu(flush_kernel_map, NULL, 1, 1);
  10.161 -}
  10.162 -
  10.163 -/*
  10.164 - * Change the page attributes of an page in the linear mapping.
  10.165 - *
  10.166 - * This should be used when a page is mapped with a different caching policy
  10.167 - * than write-back somewhere - some CPUs do not like it when mappings with
  10.168 - * different caching policies exist. This changes the page attributes of the
  10.169 - * in kernel linear mapping too.
  10.170 - * 
  10.171 - * The caller needs to ensure that there are no conflicting mappings elsewhere.
  10.172 - * This function only deals with the kernel linear map.
  10.173 - * 
  10.174 - * Caller must call global_flush_tlb() after this.
  10.175 - */
  10.176 -int change_page_attr(struct page *page, int numpages, pgprot_t prot)
  10.177 -{
  10.178 -	int err = 0; 
  10.179 -	int i; 
  10.180 -	unsigned long flags;
  10.181 -
  10.182 -	spin_lock_irqsave(&cpa_lock, flags);
  10.183 -	for (i = 0; i < numpages; i++, page++) { 
  10.184 -		err = __change_page_attr(page, prot);
  10.185 -		if (err) 
  10.186 -			break; 
  10.187 -	} 	
  10.188 -	flush_page_update_queue();
  10.189 -	spin_unlock_irqrestore(&cpa_lock, flags);
  10.190 -	return err;
  10.191 -}
  10.192 -
  10.193 -void global_flush_tlb(void)
  10.194 -{ 
  10.195 -	LIST_HEAD(l);
  10.196 -	struct list_head* n;
  10.197 -
  10.198 -	BUG_ON(irqs_disabled());
  10.199 -
  10.200 -	spin_lock_irq(&cpa_lock);
  10.201 -	list_splice_init(&df_list, &l);
  10.202 -	spin_unlock_irq(&cpa_lock);
  10.203 -	flush_map();
  10.204 -	n = l.next;
  10.205 -	while (n != &l) {
  10.206 -		struct page *pg = list_entry(n, struct page, lru);
  10.207 -		n = n->next;
  10.208 -		__free_page(pg);
  10.209 -	}
  10.210 -} 
  10.211 -
  10.212 -#ifdef CONFIG_DEBUG_PAGEALLOC
  10.213 -void kernel_map_pages(struct page *page, int numpages, int enable)
  10.214 -{
  10.215 -	if (PageHighMem(page))
  10.216 -		return;
  10.217 -	/* the return value is ignored - the calls cannot fail,
  10.218 -	 * large pages are disabled at boot time.
  10.219 -	 */
  10.220 -	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
  10.221 -	/* we should perform an IPI and flush all tlbs,
  10.222 -	 * but that can deadlock->flush only current cpu.
  10.223 -	 */
  10.224 -	__flush_tlb_all();
  10.225 -}
  10.226 -#endif
  10.227 -
  10.228 -EXPORT_SYMBOL(change_page_attr);
  10.229 -EXPORT_SYMBOL(global_flush_tlb);
    11.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c	Sat Mar 12 21:43:58 2005 +0000
    11.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c	Fri Mar 18 03:37:54 2005 +0000
    11.3 @@ -232,7 +232,7 @@ struct page *pte_alloc_one(struct mm_str
    11.4  	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
    11.5  	if (pte == NULL)
    11.6  		return pte;
    11.7 -	if (pte >= highmem_start_page)
    11.8 +	if (PageHighMem(pte))
    11.9  		return pte;
   11.10  	/* not a highmem page -- free page and grab one from the cache */
   11.11  	__free_page(pte);
   11.12 @@ -247,7 +247,7 @@ void pte_free(struct page *pte)
   11.13  {
   11.14  	set_page_count(pte, 1);
   11.15  #ifdef CONFIG_HIGHPTE
   11.16 -	if (pte < highmem_start_page)
   11.17 +	if (!PageHighMem(pte))
   11.18  #endif
   11.19  		kmem_cache_free(pte_cache,
   11.20  				phys_to_virt(page_to_pseudophys(pte)));
    12.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c	Sat Mar 12 21:43:58 2005 +0000
    12.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c	Fri Mar 18 03:37:54 2005 +0000
    12.3 @@ -165,8 +165,15 @@ static inline void flush_requests(void)
    12.4  module_init(xlblk_init);
    12.5  
    12.6  #if ENABLE_VBD_UPDATE
    12.7 +static void update_vbds_task(void *unused)
    12.8 +{ 
    12.9 +    xlvbd_update_vbds();
   12.10 +}
   12.11 +
   12.12  static void vbd_update(void)
   12.13  {
   12.14 +    static DECLARE_WORK(update_tq, update_vbds_task, NULL);
   12.15 +    schedule_work(&update_tq);
   12.16  }
   12.17  #endif /* ENABLE_VBD_UPDATE */
   12.18  
    13.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c	Sat Mar 12 21:43:58 2005 +0000
    13.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c	Fri Mar 18 03:37:54 2005 +0000
    13.3 @@ -354,7 +354,6 @@ static int xlvbd_init_device(vdisk_t *xd
    13.4      return err;
    13.5  }
    13.6  
    13.7 -#if 0
    13.8  /*
    13.9   * xlvbd_remove_device - remove a device node if possible
   13.10   * @device:       numeric device ID
   13.11 @@ -364,14 +363,16 @@ static int xlvbd_init_device(vdisk_t *xd
   13.12   * This is OK for now but in future, should perhaps consider where this should
   13.13   * deallocate gendisks / unregister devices.
   13.14   */
   13.15 -static int xlvbd_remove_device(int device)
   13.16 +static int xlvbd_remove_device(int dev16)
   13.17  {
   13.18 -    int i, rc = 0, minor = MINOR(device);
   13.19 +    int i, rc = 0, minor = MINOR(dev16);
   13.20      struct gendisk *gd;
   13.21      struct block_device *bd;
   13.22 -    xen_block_t *disk = NULL;
   13.23 +    struct xlbd_disk_info *di;
   13.24 +    dev_t device = MKDEV(MAJOR_XEN(dev16), MINOR_XEN(dev16));
   13.25  
   13.26 -    if ( (bd = bdget(device)) == NULL )
   13.27 +    bd = bdget(device);
   13.28 +    if (!bd)
   13.29          return -1;
   13.30  
   13.31      /*
   13.32 @@ -380,67 +381,25 @@ static int xlvbd_remove_device(int devic
   13.33       */
   13.34      down(&bd->bd_sem);
   13.35  
   13.36 -    if ( ((gd = get_gendisk(device)) == NULL) ||
   13.37 -         ((disk = xldev_to_xldisk(device)) == NULL) )
   13.38 -        BUG();
   13.39 +    gd = get_gendisk(device, &i);
   13.40 +    BUG_ON(gd == NULL);
   13.41 +    di = (struct xlbd_disk_info *) gd->private_data;
   13.42 +    BUG_ON(di == NULL);
   13.43  
   13.44 -    if ( disk->usage != 0 )
   13.45 +    if ( di->mi->usage != 0 )
   13.46      {
   13.47          printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
   13.48          rc = -1;
   13.49          goto out;
   13.50      }
   13.51 - 
   13.52 -    if ( (minor & (gd->max_p-1)) != 0 )
   13.53 -    {
   13.54 -        /* 1: The VBD is mapped to a partition rather than a whole unit. */
   13.55 -        invalidate_device(device, 1);
   13.56 -        gd->part[minor].start_sect = 0;
   13.57 -        gd->part[minor].nr_sects   = 0;
   13.58 -        gd->sizes[minor]           = 0;
   13.59 -
   13.60 -        /* Clear the consists-of-virtual-partitions flag if possible. */
   13.61 -        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
   13.62 -        for ( i = 1; i < gd->max_p; i++ )
   13.63 -            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
   13.64 -                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
   13.65  
   13.66 -        /*
   13.67 -         * If all virtual partitions are now gone, and a 'whole unit' VBD is
   13.68 -         * present, then we can try to grok the unit's real partition table.
   13.69 -         */
   13.70 -        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
   13.71 -             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
   13.72 -             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
   13.73 -        {
   13.74 -            register_disk(gd,
   13.75 -                          device&~(gd->max_p-1), 
   13.76 -                          gd->max_p, 
   13.77 -                          &xlvbd_block_fops,
   13.78 -                          gd->part[minor&~(gd->max_p-1)].nr_sects);
   13.79 -        }
   13.80 -    }
   13.81 -    else
   13.82 -    {
   13.83 -        /*
   13.84 -         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
   13.85 -         * NB. The partition entries are only cleared if there are no VBDs
   13.86 -         * mapped to individual partitions on this unit.
   13.87 -         */
   13.88 -        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
   13.89 -        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
   13.90 -            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
   13.91 -        while ( i >= 0 )
   13.92 -        {
   13.93 -            invalidate_device(device+i, 1);
   13.94 -            gd->part[minor+i].start_sect = 0;
   13.95 -            gd->part[minor+i].nr_sects   = 0;
   13.96 -            gd->sizes[minor+i]           = 0;
   13.97 -            i--;
   13.98 -        }
   13.99 -    }
  13.100 +    BUG_ON(minor != gd->first_minor);
  13.101 +    /* The VBD is mapped to an entire unit. */
  13.102 +    
  13.103 +    invalidate_partition(gd, 0);
  13.104 +    set_capacity(gd, 0);
  13.105  
  13.106 - out:
  13.107 +out:
  13.108      up(&bd->bd_sem);
  13.109      bdput(bd);
  13.110      return rc;
  13.111 @@ -460,11 +419,11 @@ void xlvbd_update_vbds(void)
  13.112      old_nr   = nr_vbds;
  13.113  
  13.114      new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
  13.115 +    if (!new_info)
  13.116 +        return;
  13.117 +
  13.118      if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
  13.119 -    {
  13.120 -        kfree(new_info);
  13.121 -        return;
  13.122 -    }
  13.123 +        goto out;
  13.124  
  13.125      /*
  13.126       * Final list maximum size is old list + new list. This occurs only when
  13.127 @@ -472,6 +431,8 @@ void xlvbd_update_vbds(void)
  13.128       * VBDs in the old list because the usage counts are busy.
  13.129       */
  13.130      merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
  13.131 +    if (!merged_info)
  13.132 +        goto out;
  13.133  
  13.134      /* @i tracks old list; @j tracks new list; @k tracks merged list. */
  13.135      i = j = k = 0;
  13.136 @@ -518,9 +479,9 @@ void xlvbd_update_vbds(void)
  13.137      nr_vbds  = k;
  13.138  
  13.139      kfree(old_info);
  13.140 +out:
  13.141      kfree(new_info);
  13.142  }
  13.143 -#endif
  13.144  
  13.145  /*
  13.146   * Set up all the linux device goop for the virtual block devices
    14.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c	Sat Mar 12 21:43:58 2005 +0000
    14.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_datapath.c	Fri Mar 18 03:37:54 2005 +0000
    14.3 @@ -77,7 +77,7 @@ void active_reqs_init(void)
    14.4  
    14.5  static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx)
    14.6  {
    14.7 -    return ( (fe_dom << 16) | idx );
    14.8 +    return ( (fe_dom << 16) | MASK_ACTIVE_IDX(idx) );
    14.9  }
   14.10  
   14.11  /*-----[ Ring helpers ]---------------------------------------------------*/
    15.1 --- a/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c	Sat Mar 12 21:43:58 2005 +0000
    15.2 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blktap/blktap_userdev.c	Fri Mar 18 03:37:54 2005 +0000
    15.3 @@ -355,24 +355,6 @@ int blktap_write_be_ring(blkif_response_
    15.4      return 0;
    15.5  }
    15.6  
    15.7 -static void blktap_fast_flush_area(int idx, int nr_pages)
    15.8 -{
    15.9 -    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
   15.10 -    int               i;
   15.11 -
   15.12 -    for ( i = 0; i < nr_pages; i++ )
   15.13 -    {
   15.14 -        mcl[i].op = __HYPERVISOR_update_va_mapping;
   15.15 -        mcl[i].args[0] = MMAP_VADDR(idx, i);
   15.16 -        mcl[i].args[1] = 0;
   15.17 -        mcl[i].args[2] = 0;
   15.18 -    }
   15.19 -
   15.20 -    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
   15.21 -    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
   15.22 -        BUG();
   15.23 -}
   15.24 -
   15.25  static int blktap_read_fe_ring(void)
   15.26  {
   15.27      /* This is called to read responses from the UFE ring. */
   15.28 @@ -398,7 +380,8 @@ static int blktap_read_fe_ring(void)
   15.29              DPRINTK("resp->fe_ring\n");
   15.30              ar = lookup_active_req(ID_TO_IDX(resp_s->id));
   15.31              blkif = ar->blkif;
   15.32 -            blktap_fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
   15.33 +            zap_page_range(blktap_vma, MMAP_VADDR(ID_TO_IDX(resp_s->id), 0), 
   15.34 +                    ar->nr_pages << PAGE_SHIFT, NULL);
   15.35              write_resp_to_fe_ring(blkif, resp_s);
   15.36              kick_fe_domain(blkif);
   15.37          }
    16.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/highmem.h	Sat Mar 12 21:43:58 2005 +0000
    16.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/highmem.h	Fri Mar 18 03:37:54 2005 +0000
    16.3 @@ -73,7 +73,6 @@ void kunmap(struct page *page);
    16.4  void *kmap_atomic(struct page *page, enum km_type type);
    16.5  void *kmap_atomic_pte(struct page *page, enum km_type type);
    16.6  void kunmap_atomic(void *kvaddr, enum km_type type);
    16.7 -void kunmap_atomic_force(void *kvaddr, enum km_type type);
    16.8  struct page *kmap_atomic_to_page(void *ptr);
    16.9  
   16.10  #define flush_cache_kmaps()	do { } while (0)
    17.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Sat Mar 12 21:43:58 2005 +0000
    17.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Fri Mar 18 03:37:54 2005 +0000
    17.3 @@ -485,7 +485,10 @@ int __direct_remap_area_pages(struct mm_
    17.4  			      mmu_update_t *v);
    17.5  
    17.6  #define io_remap_page_range(vma,from,phys,size,prot) \
    17.7 -	direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO)
    17.8 +direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO)
    17.9 +
   17.10 +#define io_remap_pfn_range(vma,from,pfn,size,prot) \
   17.11 +direct_remap_area_pages(vma->vm_mm,from,pfn<<PAGE_SHIFT,size,prot,DOMID_IO)
   17.12  
   17.13  #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
   17.14  #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
    18.1 --- a/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h	Sat Mar 12 21:43:58 2005 +0000
    18.2 +++ b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h	Fri Mar 18 03:37:54 2005 +0000
    18.3 @@ -311,6 +311,22 @@ HYPERVISOR_suspend(
    18.4      return ret;
    18.5  }
    18.6  
    18.7 +static inline int
    18.8 +HYPERVISOR_crash(
    18.9 +    void)
   18.10 +{
   18.11 +    int ret;
   18.12 +    unsigned long ign1;
   18.13 +    __asm__ __volatile__ (
   18.14 +        TRAP_INSTR
   18.15 +        : "=a" (ret), "=b" (ign1)
   18.16 +	: "0" (__HYPERVISOR_sched_op),
   18.17 +	  "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
   18.18 +        : "memory" );
   18.19 +
   18.20 +    return ret;
   18.21 +}
   18.22 +
   18.23  static inline long
   18.24  HYPERVISOR_set_timer_op(
   18.25      u64 timeout)
    19.1 --- a/patches/linux-2.6.11/agpgart.patch	Sat Mar 12 21:43:58 2005 +0000
    19.2 +++ b/patches/linux-2.6.11/agpgart.patch	Fri Mar 18 03:37:54 2005 +0000
    19.3 @@ -350,22 +350,3 @@ diff -rup pristine-linux-2.6.11/drivers/
    19.4   
    19.5   	for (i = 0; i < num_entries; i++) {
    19.6   		agp_bridge->gatt_table[i] =
    19.7 -diff -rup pristine-linux-2.6.11/include/asm-i386/agp.h linux-2.6.11/include/asm-i386/agp.h
    19.8 ---- pristine-linux-2.6.11/include/asm-i386/agp.h	2005-03-02 07:37:31.000000000 +0000
    19.9 -+++ linux-2.6.11/include/asm-i386/agp.h	2005-03-11 00:35:21.331193069 +0000
   19.10 -@@ -3,6 +3,7 @@
   19.11 - 
   19.12 - #include <asm/pgtable.h>
   19.13 - #include <asm/cacheflush.h>
   19.14 -+#include <asm/system.h>
   19.15 - 
   19.16 - /* 
   19.17 -  * Functions to keep the agpgart mappings coherent with the MMU.
   19.18 -@@ -19,6 +20,6 @@ int unmap_page_from_agp(struct page *pag
   19.19 - /* Could use CLFLUSH here if the cpu supports it. But then it would
   19.20 -    need to be called for each cacheline of the whole page so it may not be 
   19.21 -    worth it. Would need a page for it. */
   19.22 --#define flush_agp_cache() asm volatile("wbinvd":::"memory")
   19.23 -+#define flush_agp_cache() wbinvd()
   19.24 - 
   19.25 - #endif
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/patches/linux-2.6.11/iomap.patch	Fri Mar 18 03:37:54 2005 +0000
    20.3 @@ -0,0 +1,120 @@
    20.4 +diff -ur linux-2.6.11/drivers/char/agp/frontend.c linux-2.6.11-io/drivers/char/agp/frontend.c
    20.5 +--- linux-2.6.11/drivers/char/agp/frontend.c	2005-03-02 07:37:49.000000000 +0000
    20.6 ++++ linux-2.6.11-io/drivers/char/agp/frontend.c	2005-03-15 17:38:30.000000000 +0000
    20.7 +@@ -627,7 +627,7 @@
    20.8 + 		DBG("client vm_ops=%p", kerninfo.vm_ops);
    20.9 + 		if (kerninfo.vm_ops) {
   20.10 + 			vma->vm_ops = kerninfo.vm_ops;
   20.11 +-		} else if (remap_pfn_range(vma, vma->vm_start,
   20.12 ++		} else if (io_remap_pfn_range(vma, vma->vm_start,
   20.13 + 				(kerninfo.aper_base + offset) >> PAGE_SHIFT,
   20.14 + 					    size, vma->vm_page_prot)) {
   20.15 + 			goto out_again;
   20.16 +@@ -643,7 +643,7 @@
   20.17 + 		DBG("controller vm_ops=%p", kerninfo.vm_ops);
   20.18 + 		if (kerninfo.vm_ops) {
   20.19 + 			vma->vm_ops = kerninfo.vm_ops;
   20.20 +-		} else if (remap_pfn_range(vma, vma->vm_start,
   20.21 ++		} else if (io_remap_pfn_range(vma, vma->vm_start,
   20.22 + 					    kerninfo.aper_base >> PAGE_SHIFT,
   20.23 + 					    size, vma->vm_page_prot)) {
   20.24 + 			goto out_again;
   20.25 +diff -ur linux-2.6.11/drivers/char/drm/drm_vm.c linux-2.6.11-io/drivers/char/drm/drm_vm.c
   20.26 +--- linux-2.6.11/drivers/char/drm/drm_vm.c	2005-03-02 07:38:33.000000000 +0000
   20.27 ++++ linux-2.6.11-io/drivers/char/drm/drm_vm.c	2005-03-15 17:43:26.000000000 +0000
   20.28 +@@ -630,7 +630,7 @@
   20.29 + 					vma->vm_end - vma->vm_start,
   20.30 + 					vma->vm_page_prot, 0))
   20.31 + #else
   20.32 +-		if (remap_pfn_range(DRM_RPR_ARG(vma) vma->vm_start,
   20.33 ++		if (io_remap_pfn_range(vma, vma->vm_start,
   20.34 + 				     (VM_OFFSET(vma) + offset) >> PAGE_SHIFT,
   20.35 + 				     vma->vm_end - vma->vm_start,
   20.36 + 				     vma->vm_page_prot))
   20.37 +diff -ur linux-2.6.11/drivers/char/drm/i810_dma.c linux-2.6.11-io/drivers/char/drm/i810_dma.c
   20.38 +--- linux-2.6.11/drivers/char/drm/i810_dma.c	2005-03-02 07:37:55.000000000 +0000
   20.39 ++++ linux-2.6.11-io/drivers/char/drm/i810_dma.c	2005-03-15 17:53:36.000000000 +0000
   20.40 +@@ -139,7 +139,7 @@
   20.41 +    	buf_priv->currently_mapped = I810_BUF_MAPPED;
   20.42 + 	unlock_kernel();
   20.43 + 
   20.44 +-	if (remap_pfn_range(DRM_RPR_ARG(vma) vma->vm_start,
   20.45 ++	if (io_remap_pfn_range(vma, vma->vm_start,
   20.46 + 			     VM_OFFSET(vma) >> PAGE_SHIFT,
   20.47 + 			     vma->vm_end - vma->vm_start,
   20.48 + 			     vma->vm_page_prot)) return -EAGAIN;
   20.49 +diff -ur linux-2.6.11/drivers/char/drm/i830_dma.c linux-2.6.11-io/drivers/char/drm/i830_dma.c
   20.50 +--- linux-2.6.11/drivers/char/drm/i830_dma.c	2005-03-02 07:37:48.000000000 +0000
   20.51 ++++ linux-2.6.11-io/drivers/char/drm/i830_dma.c	2005-03-15 17:53:46.000000000 +0000
   20.52 +@@ -157,7 +157,7 @@
   20.53 +    	buf_priv->currently_mapped = I830_BUF_MAPPED;
   20.54 + 	unlock_kernel();
   20.55 + 
   20.56 +-	if (remap_pfn_range(DRM_RPR_ARG(vma) vma->vm_start,
   20.57 ++	if (io_remap_pfn_range(vma, vma->vm_start,
   20.58 + 			     VM_OFFSET(vma) >> PAGE_SHIFT,
   20.59 + 			     vma->vm_end - vma->vm_start,
   20.60 + 			     vma->vm_page_prot)) return -EAGAIN;
   20.61 +diff -ur linux-2.6.11/drivers/char/hpet.c linux-2.6.11-io/drivers/char/hpet.c
   20.62 +--- linux-2.6.11/drivers/char/hpet.c	2005-03-02 07:38:10.000000000 +0000
   20.63 ++++ linux-2.6.11-io/drivers/char/hpet.c	2005-03-15 17:37:22.000000000 +0000
   20.64 +@@ -76,6 +76,7 @@
   20.65 + struct hpets {
   20.66 + 	struct hpets *hp_next;
   20.67 + 	struct hpet __iomem *hp_hpet;
   20.68 ++	unsigned long hp_hpet_phys;
   20.69 + 	struct time_interpolator *hp_interpolator;
   20.70 + 	unsigned long hp_period;
   20.71 + 	unsigned long hp_delta;
   20.72 +@@ -265,7 +266,7 @@
   20.73 + 		return -EINVAL;
   20.74 + 
   20.75 + 	devp = file->private_data;
   20.76 +-	addr = (unsigned long)devp->hd_hpet;
   20.77 ++	addr = devp->hd_hpets->hp_hpet_phys;
   20.78 + 
   20.79 + 	if (addr & (PAGE_SIZE - 1))
   20.80 + 		return -ENOSYS;
   20.81 +@@ -274,7 +275,7 @@
   20.82 + 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
   20.83 + 	addr = __pa(addr);
   20.84 + 
   20.85 +-	if (remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT,
   20.86 ++	if (io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT,
   20.87 + 					PAGE_SIZE, vma->vm_page_prot)) {
   20.88 + 		printk(KERN_ERR "remap_pfn_range failed in hpet.c\n");
   20.89 + 		return -EAGAIN;
   20.90 +@@ -795,6 +796,7 @@
   20.91 + 
   20.92 + 	hpetp->hp_which = hpet_nhpet++;
   20.93 + 	hpetp->hp_hpet = hdp->hd_address;
   20.94 ++	hpetp->hp_hpet_phys = hdp->hd_phys_address;
   20.95 + 
   20.96 + 	hpetp->hp_ntimer = hdp->hd_nirqs;
   20.97 + 
   20.98 +diff -ur linux-2.6.11/drivers/sbus/char/flash.c linux-2.6.11-io/drivers/sbus/char/flash.c
   20.99 +--- linux-2.6.11/drivers/sbus/char/flash.c	2005-03-02 07:38:10.000000000 +0000
  20.100 ++++ linux-2.6.11-io/drivers/sbus/char/flash.c	2005-03-15 17:20:22.000000000 +0000
  20.101 +@@ -75,7 +75,7 @@
  20.102 + 	pgprot_val(vma->vm_page_prot) |= _PAGE_E;
  20.103 + 	vma->vm_flags |= (VM_SHM | VM_LOCKED);
  20.104 + 
  20.105 +-	if (remap_pfn_range(vma, vma->vm_start, addr, size, vma->vm_page_prot))
  20.106 ++	if (io_remap_pfn_range(vma, vma->vm_start, addr, size, vma->vm_page_prot))
  20.107 + 		return -EAGAIN;
  20.108 + 		
  20.109 + 	return 0;
  20.110 +diff -ur linux-2.6.11/include/linux/mm.h linux-2.6.11-io/include/linux/mm.h
  20.111 +--- linux-2.6.11/include/linux/mm.h	2005-03-02 07:37:47.000000000 +0000
  20.112 ++++ linux-2.6.11-io/include/linux/mm.h	2005-03-15 17:03:46.000000000 +0000
  20.113 +@@ -815,6 +815,10 @@
  20.114 + extern int check_user_page_readable(struct mm_struct *mm, unsigned long address);
  20.115 + int remap_pfn_range(struct vm_area_struct *, unsigned long,
  20.116 + 		unsigned long, unsigned long, pgprot_t);
  20.117 ++/* Allow arch override for mapping of device and I/O (non-RAM) pages. */
  20.118 ++#ifndef io_remap_pfn_range
  20.119 ++#define io_remap_pfn_range remap_pfn_range
  20.120 ++#endif
  20.121 + 
  20.122 + #ifdef CONFIG_PROC_FS
  20.123 + void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
    21.1 --- a/tools/blktap/Makefile	Sat Mar 12 21:43:58 2005 +0000
    21.2 +++ b/tools/blktap/Makefile	Fri Mar 18 03:37:54 2005 +0000
    21.3 @@ -141,6 +141,10 @@ vdi_fill: $(LIB) vdi_fill.c $(VDI_SRCS)
    21.4  vdi_validate: $(LIB) vdi_validate.c $(VDI_SRCS)
    21.5  	$(CC) $(CFLAGS) -g3 -o vdi_validate vdi_validate.c $(VDI_SRCS)
    21.6  
    21.7 +blockstored: blockstored.c
    21.8 +	$(CC) $(CFLAGS) -g3 -o blockstored blockstored.c
    21.9 +bstest: bstest.c blockstore.c
   21.10 +	$(CC) $(CFLAGS) -g3 -o bstest bstest.c blockstore.c
   21.11  
   21.12  rdx_cmp: $(LIB) rdx_cmp.c $(VDI_SRCS)
   21.13  	$(CC) $(CFLAGS) -g3 -o rdx_cmp rdx_cmp.c $(VDI_SRCS)
    22.1 --- a/tools/blktap/blockstore.c	Sat Mar 12 21:43:58 2005 +0000
    22.2 +++ b/tools/blktap/blockstore.c	Fri Mar 18 03:37:54 2005 +0000
    22.3 @@ -15,6 +15,408 @@
    22.4  #include <sys/stat.h>
    22.5  #include "blockstore.h"
    22.6  
    22.7 +#define BLOCKSTORE_REMOTE
    22.8 +
    22.9 +#ifdef BLOCKSTORE_REMOTE
   22.10 +
   22.11 +//#define BSDEBUG
   22.12 +
   22.13 +#include <sys/socket.h>
   22.14 +#include <sys/ioctl.h>
   22.15 +#include <netinet/in.h>
   22.16 +#include <netdb.h>
   22.17 +
   22.18 +#define ENTER_QUEUE_CR (void)0
   22.19 +#define LEAVE_QUEUE_CR (void)0
   22.20 +
   22.21 +bsserver_t bsservers[MAX_SERVERS];
   22.22 +bscluster_t bsclusters[MAX_CLUSTERS];
   22.23 +
   22.24 +struct sockaddr_in sin_local;
   22.25 +int bssock = 0;
   22.26 +
   22.27 +typedef struct bsq_t_struct {
   22.28 +    struct bsq_t_struct *prev;
   22.29 +    struct bsq_t_struct *next;
   22.30 +    int server;
   22.31 +    int length;
   22.32 +    struct msghdr msghdr;
   22.33 +    struct iovec iov[2];
   22.34 +    bshdr_t message;
   22.35 +    void *block;
   22.36 +} bsq_t;
   22.37 +
   22.38 +bsq_t *bs_head = NULL;
   22.39 +bsq_t *bs_tail = NULL;
   22.40 +
   22.41 +int send_message(bsq_t *qe) {
   22.42 +    int rc;
   22.43 +
   22.44 +    qe->msghdr.msg_name = (void *)&(bsservers[qe->server].sin);
   22.45 +    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
   22.46 +    qe->msghdr.msg_iov = qe->iov;
   22.47 +    if (qe->block)
   22.48 +        qe->msghdr.msg_iovlen = 2;
   22.49 +    else
   22.50 +        qe->msghdr.msg_iovlen = 1;
   22.51 +    qe->msghdr.msg_control = NULL;
   22.52 +    qe->msghdr.msg_controllen = 0;
   22.53 +    qe->msghdr.msg_flags = 0;
   22.54 +
   22.55 +    qe->iov[0].iov_base = (void *)&(qe->message);
   22.56 +    qe->iov[0].iov_len = MSGBUFSIZE_ID;
   22.57 +
   22.58 +    if (qe->block) {
   22.59 +        qe->iov[1].iov_base = qe->block;
   22.60 +        qe->iov[1].iov_len = BLOCK_SIZE;
   22.61 +    }
   22.62 +
   22.63 +    rc = sendmsg(bssock, &(qe->msghdr), 0);
   22.64 +    //rc = sendto(bssock, (void *)&(qe->message), qe->length, 0,
   22.65 +    //           (struct sockaddr *)&(bsservers[qe->server].sin),
   22.66 +    //           sizeof(struct sockaddr_in));
   22.67 +    if (rc < 0)
   22.68 +        return rc;
   22.69 +    
   22.70 +    ENTER_QUEUE_CR;
   22.71 +    
   22.72 +    LEAVE_QUEUE_CR;
   22.73 +
   22.74 +    return rc;
   22.75 +}
   22.76 +
   22.77 +int recv_message(bsq_t *qe) {
   22.78 +    struct sockaddr_in from;
   22.79 +    //int flen = sizeof(from);
   22.80 +    int rc;
   22.81 +
   22.82 +    qe->msghdr.msg_name = &from;
   22.83 +    qe->msghdr.msg_namelen = sizeof(struct sockaddr_in);
   22.84 +    qe->msghdr.msg_iov = qe->iov;
   22.85 +    if (qe->block)
   22.86 +        qe->msghdr.msg_iovlen = 2;
   22.87 +    else
   22.88 +        qe->msghdr.msg_iovlen = 1;
   22.89 +    qe->msghdr.msg_control = NULL;
   22.90 +    qe->msghdr.msg_controllen = 0;
   22.91 +    qe->msghdr.msg_flags = 0;
   22.92 +
   22.93 +    qe->iov[0].iov_base = (void *)&(qe->message);
   22.94 +    qe->iov[0].iov_len = MSGBUFSIZE_ID;
   22.95 +    if (qe->block) {
   22.96 +        qe->iov[1].iov_base = qe->block;
   22.97 +        qe->iov[1].iov_len = BLOCK_SIZE;
   22.98 +    }
   22.99 +
  22.100 +    rc = recvmsg(bssock, &(qe->msghdr), 0);
  22.101 +
  22.102 +    //return recvfrom(bssock, (void *)&(qe->message), sizeof(bsmsg_t), 0,
  22.103 +    //               (struct sockaddr *)&from, &flen);
  22.104 +    return rc;
  22.105 +}
  22.106 +
  22.107 +void *readblock_indiv(int server, u64 id) {
  22.108 +    void *block;
  22.109 +    bsq_t *qe;
  22.110 +    int len;
  22.111 +
  22.112 +    qe = (bsq_t *)malloc(sizeof(bsq_t));
  22.113 +    if (!qe) {
  22.114 +        perror("readblock qe malloc");
  22.115 +        return NULL;
  22.116 +    }
  22.117 +    qe->block = malloc(BLOCK_SIZE);
  22.118 +    if (!qe->block) {
  22.119 +        perror("readblock qe malloc");
  22.120 +        free((void *)qe);
  22.121 +        return NULL;
  22.122 +    }
  22.123 +
  22.124 +    qe->server = server;
  22.125 +
  22.126 +    qe->message.operation = BSOP_READBLOCK;
  22.127 +    qe->message.flags = 0;
  22.128 +    qe->message.id = id;
  22.129 +    qe->length = MSGBUFSIZE_ID;
  22.130 +
  22.131 +    if (send_message(qe) < 0) {
  22.132 +        perror("readblock sendto");
  22.133 +        goto err;
  22.134 +    }
  22.135 +    
  22.136 +    len = recv_message(qe);
  22.137 +    if (len < 0) {
  22.138 +        perror("readblock recv");
  22.139 +        goto err;
  22.140 +    }
  22.141 +    if ((qe->message.flags & BSOP_FLAG_ERROR)) {
  22.142 +        fprintf(stderr, "readblock server error\n");
  22.143 +        goto err;
  22.144 +    }
  22.145 +    if (len < MSGBUFSIZE_BLOCK) {
  22.146 +        fprintf(stderr, "readblock recv short (%u)\n", len);
  22.147 +        goto err;
  22.148 +    }
  22.149 +    if ((block = malloc(BLOCK_SIZE)) == NULL) {
  22.150 +        perror("readblock malloc");
  22.151 +        goto err;
  22.152 +    }
  22.153 +    //memcpy(block, qe->message.block, BLOCK_SIZE);
  22.154 +    block = qe->block;
  22.155 +
  22.156 +    free((void *)qe);
  22.157 +    return block;
  22.158 +
  22.159 +    err:
  22.160 +    free(qe->block);
  22.161 +    free((void *)qe);
  22.162 +    return NULL;
  22.163 +}
  22.164 +
  22.165 +/**
  22.166 + * readblock: read a block from disk
  22.167 + *   @id: block id to read
  22.168 + *
  22.169 + *   @return: pointer to block, NULL on error
  22.170 + */
  22.171 +void *readblock(u64 id) {
  22.172 +    int map = (int)BSID_MAP(id);
  22.173 +    u64 xid;
  22.174 +    static int i = CLUSTER_MAX_REPLICAS - 1;
  22.175 +    void *block = NULL;
  22.176 +
  22.177 +    /* special case for the "superblock" just use the first block on the
  22.178 +     * first replica. (extend to blocks < 6 for vdi bug)
  22.179 +     */
  22.180 +    if (id < 6) {
  22.181 +        block = readblock_indiv(bsclusters[map].servers[0], id);
  22.182 +        goto out;
  22.183 +    }
  22.184 +
  22.185 +    i++;
  22.186 +    if (i >= CLUSTER_MAX_REPLICAS)
  22.187 +        i = 0;
  22.188 +    switch (i) {
  22.189 +    case 0:
  22.190 +        xid = BSID_REPLICA0(id);
  22.191 +        break;
  22.192 +    case 1:
  22.193 +        xid = BSID_REPLICA1(id);
  22.194 +        break;
  22.195 +    case 2:
  22.196 +        xid = BSID_REPLICA2(id);
  22.197 +        break;
  22.198 +    }
  22.199 +    
  22.200 +    block = readblock_indiv(bsclusters[map].servers[i], xid);
  22.201 +
  22.202 +    out:
  22.203 +#ifdef BSDEBUG
  22.204 +    if (block)
  22.205 +        fprintf(stderr, "READ:  %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
  22.206 +                id,
  22.207 +                (unsigned int)((unsigned char *)block)[0],
  22.208 +                (unsigned int)((unsigned char *)block)[1],
  22.209 +                (unsigned int)((unsigned char *)block)[2],
  22.210 +                (unsigned int)((unsigned char *)block)[3],
  22.211 +                (unsigned int)((unsigned char *)block)[4],
  22.212 +                (unsigned int)((unsigned char *)block)[5],
  22.213 +                (unsigned int)((unsigned char *)block)[6],
  22.214 +                (unsigned int)((unsigned char *)block)[7]);
  22.215 +    else
  22.216 +        fprintf(stderr, "READ:  %016llx NULL\n", id);
  22.217 +#endif
  22.218 +    return block;
  22.219 +}
  22.220 +
  22.221 +int writeblock_indiv(int server, u64 id, void *block) {
  22.222 +    bsq_t *qe;
  22.223 +    int len;
  22.224 +
  22.225 +    qe = (bsq_t *)malloc(sizeof(bsq_t));
  22.226 +    if (!qe) {
  22.227 +        perror("writeblock qe malloc");
  22.228 +        goto err;
  22.229 +    }
  22.230 +    qe->server = server;
  22.231 +
  22.232 +    qe->message.operation = BSOP_WRITEBLOCK;
  22.233 +    qe->message.flags = 0;
  22.234 +    qe->message.id = id;
  22.235 +    //memcpy(qe->message.block, block, BLOCK_SIZE);
  22.236 +    qe->block = block;
  22.237 +    qe->length = MSGBUFSIZE_BLOCK;
  22.238 +
  22.239 +    if (send_message(qe) < 0) {
  22.240 +        perror("writeblock sendto");
  22.241 +        goto err;
  22.242 +    }
  22.243 +    
  22.244 +    len = recv_message(qe);
  22.245 +    if (len < 0) {
  22.246 +        perror("writeblock recv");
  22.247 +        goto err;
  22.248 +    }
  22.249 +    if ((qe->message.flags & BSOP_FLAG_ERROR)) {
  22.250 +        fprintf(stderr, "writeblock server error\n");
  22.251 +        goto err;
  22.252 +    }
  22.253 +    if (len < MSGBUFSIZE_ID) {
  22.254 +        fprintf(stderr, "writeblock recv short (%u)\n", len);
  22.255 +        goto err;
  22.256 +    }
  22.257 +
  22.258 +    free((void *)qe);
  22.259 +    return 0;
  22.260 +
  22.261 +    err:
  22.262 +    free((void *)qe);
  22.263 +    return -1;
  22.264 +}
  22.265 +
  22.266 +/**
  22.267 + * writeblock: write an existing block to disk
  22.268 + *   @id: block id
  22.269 + *   @block: pointer to block
  22.270 + *
  22.271 + *   @return: zero on success, -1 on failure
  22.272 + */
  22.273 +int writeblock(u64 id, void *block) {
  22.274 +    int map = (int)BSID_MAP(id);
  22.275 +    
  22.276 +    int rep0 = bsclusters[map].servers[0];
  22.277 +    int rep1 = bsclusters[map].servers[1];
  22.278 +    int rep2 = bsclusters[map].servers[2];
  22.279 +
  22.280 +#ifdef BSDEBUG
  22.281 +    fprintf(stderr,
  22.282 +            "WRITE: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
  22.283 +            id,
  22.284 +            (unsigned int)((unsigned char *)block)[0],
  22.285 +            (unsigned int)((unsigned char *)block)[1],
  22.286 +            (unsigned int)((unsigned char *)block)[2],
  22.287 +            (unsigned int)((unsigned char *)block)[3],
  22.288 +            (unsigned int)((unsigned char *)block)[4],
  22.289 +            (unsigned int)((unsigned char *)block)[5],
  22.290 +            (unsigned int)((unsigned char *)block)[6],
  22.291 +            (unsigned int)((unsigned char *)block)[7]);
  22.292 +#endif
  22.293 +
  22.294 +/* special case for the "superblock" just use the first block on the
  22.295 +     * first replica. (extend to blocks < 6 for vdi bug)
  22.296 +     */
  22.297 +    if (id < 6) {
  22.298 +        return writeblock_indiv(rep0, id, block);
  22.299 +    }
  22.300 +
  22.301 +    if (writeblock_indiv(rep0, BSID_REPLICA0(id), block) < 0)
  22.302 +        return -1;
  22.303 +    if (writeblock_indiv(rep1, BSID_REPLICA1(id), block) < 0)
  22.304 +        return -1;
  22.305 +    if (writeblock_indiv(rep2, BSID_REPLICA2(id), block) < 0)
  22.306 +        return -1;
  22.307 +    return 0;
  22.308 +}
  22.309 +
  22.310 +/**
  22.311 + * allocblock: write a new block to disk
  22.312 + *   @block: pointer to block
  22.313 + *
  22.314 + *   @return: new id of block on disk
  22.315 + */
  22.316 +u64 allocblock(void *block) {
  22.317 +    return allocblock_hint(block, 0);
  22.318 +}
  22.319 +
  22.320 +u64 allocblock_hint_indiv(int server, void *block, u64 hint) {
  22.321 +    bsq_t *qe;
  22.322 +    int len;
  22.323 +
  22.324 +    qe = (bsq_t *)malloc(sizeof(bsq_t));
  22.325 +    if (!qe) {
  22.326 +        perror("allocblock_hint qe malloc");
  22.327 +        goto err;
  22.328 +    }
  22.329 +    qe->server = server;
  22.330 +
  22.331 +    qe->message.operation = BSOP_ALLOCBLOCK;
  22.332 +    qe->message.flags = 0;
  22.333 +    qe->message.id = hint;
  22.334 +    //memcpy(qe->message.block, block, BLOCK_SIZE);
  22.335 +    qe->block = block;
  22.336 +    qe->length = MSGBUFSIZE_BLOCK;
  22.337 +
  22.338 +    if (send_message(qe) < 0) {
  22.339 +        perror("allocblock_hint sendto");
  22.340 +        goto err;
  22.341 +    }
  22.342 +    
  22.343 +    len = recv_message(qe);
  22.344 +    if (len < 0) {
  22.345 +        perror("allocblock_hint recv");
  22.346 +        goto err;
  22.347 +    }
  22.348 +    if ((qe->message.flags & BSOP_FLAG_ERROR)) {
  22.349 +        fprintf(stderr, "allocblock_hint server error\n");
  22.350 +        goto err;
  22.351 +    }
  22.352 +    if (len < MSGBUFSIZE_ID) {
  22.353 +        fprintf(stderr, "allocblock_hint recv short (%u)\n", len);
  22.354 +        goto err;
  22.355 +    }
  22.356 +
  22.357 +    free((void *)qe);
  22.358 +    return qe->message.id;
  22.359 +
  22.360 +    err:
  22.361 +    free((void *)qe);
  22.362 +    return 0;
  22.363 +}
  22.364 +
  22.365 +/**
  22.366 + * allocblock_hint: write a new block to disk
  22.367 + *   @block: pointer to block
  22.368 + *   @hint: allocation hint
  22.369 + *
  22.370 + *   @return: new id of block on disk
  22.371 + */
  22.372 +u64 allocblock_hint(void *block, u64 hint) {
  22.373 +    int map = (int)hint;
  22.374 +    
  22.375 +    int rep0 = bsclusters[map].servers[0];
  22.376 +    int rep1 = bsclusters[map].servers[1];
  22.377 +    int rep2 = bsclusters[map].servers[2];
  22.378 +
  22.379 +    u64 id0, id1, id2;
  22.380 +
  22.381 +    id0 = allocblock_hint_indiv(rep0, block, 0);
  22.382 +    if (id0 == 0)
  22.383 +        return 0;
  22.384 +    id1 = allocblock_hint_indiv(rep1, block, 0);
  22.385 +    if (id1 == 0)
  22.386 +        return 0;
  22.387 +    id2 = allocblock_hint_indiv(rep2, block, 0);
  22.388 +    if (id2 == 0)
  22.389 +        return 0;
  22.390 +
  22.391 +#ifdef BSDEBUG
  22.392 +    fprintf(stderr, "ALLOC: %016llx %02x%02x %02x%02x %02x%02x %02x%02x\n",
  22.393 +            BSID(map, id0, id1, id2),
  22.394 +            (unsigned int)((unsigned char *)block)[0],
  22.395 +            (unsigned int)((unsigned char *)block)[1],
  22.396 +            (unsigned int)((unsigned char *)block)[2],
  22.397 +            (unsigned int)((unsigned char *)block)[3],
  22.398 +            (unsigned int)((unsigned char *)block)[4],
  22.399 +            (unsigned int)((unsigned char *)block)[5],
  22.400 +            (unsigned int)((unsigned char *)block)[6],
  22.401 +            (unsigned int)((unsigned char *)block)[7]);
  22.402 +#endif
  22.403 +
  22.404 +    return BSID(map, id0, id1, id2);
  22.405 +}
  22.406 +
  22.407 +#else /* /BLOCKSTORE_REMOTE */
  22.408 +
  22.409  static int block_fp = -1;
  22.410   
  22.411  /**
  22.412 @@ -94,6 +496,18 @@ u64 allocblock(void *block) {
  22.413      return lb;
  22.414  }
  22.415  
  22.416 +/**
  22.417 + * allocblock_hint: write a new block to disk
  22.418 + *   @block: pointer to block
  22.419 + *   @hint: allocation hint
  22.420 + *
  22.421 + *   @return: new id of block on disk
  22.422 + */
  22.423 +u64 allocblock_hint(void *block, u64 hint) {
  22.424 +    return allocblock(block);
  22.425 +}
  22.426 +
  22.427 +#endif /* BLOCKSTORE_REMOTE */
  22.428  
  22.429  /**
  22.430   * newblock: get a new in-memory block set to zeros
  22.431 @@ -124,12 +538,92 @@ void freeblock(void *block) {
  22.432  
  22.433  int __init_blockstore(void)
  22.434  {
  22.435 +#ifdef BLOCKSTORE_REMOTE
  22.436 +    struct hostent *addr;
  22.437 +    int i;
  22.438 +
  22.439 +    bsservers[0].hostname = "firebug.cl.cam.ac.uk";
  22.440 +    bsservers[1].hostname = "tetris.cl.cam.ac.uk";
  22.441 +    bsservers[2].hostname = "donkeykong.cl.cam.ac.uk";
  22.442 +    bsservers[3].hostname = "gunfighter.cl.cam.ac.uk";
  22.443 +    bsservers[4].hostname = "galaxian.cl.cam.ac.uk";
  22.444 +    bsservers[5].hostname = "firetrack.cl.cam.ac.uk";
  22.445 +    bsservers[6].hostname = "funfair.cl.cam.ac.uk";
  22.446 +    bsservers[7].hostname = "felix.cl.cam.ac.uk";
  22.447 +    bsservers[8].hostname = NULL;
  22.448 +    bsservers[9].hostname = NULL;
  22.449 +    bsservers[10].hostname = NULL;
  22.450 +    bsservers[11].hostname = NULL;
  22.451 +    bsservers[12].hostname = NULL;
  22.452 +    bsservers[13].hostname = NULL;
  22.453 +    bsservers[14].hostname = NULL;
  22.454 +    bsservers[15].hostname = NULL;
  22.455 +
  22.456 +    for (i = 0; i < MAX_SERVERS; i++) {
  22.457 +        if (!bsservers[i].hostname)
  22.458 +            continue;
  22.459 +        addr = gethostbyname(bsservers[i].hostname);
  22.460 +        if (!addr) {
  22.461 +            perror("bad hostname");
  22.462 +            return -1;
  22.463 +        }
  22.464 +        bsservers[i].sin.sin_family = addr->h_addrtype;
  22.465 +        bsservers[i].sin.sin_port = htons(BLOCKSTORED_PORT);
  22.466 +        bsservers[i].sin.sin_addr.s_addr = 
  22.467 +            ((struct in_addr *)(addr->h_addr))->s_addr;
  22.468 +    }
  22.469 +
  22.470 +    /* Cluster map
  22.471 +     */
  22.472 +    bsclusters[0].servers[0] = 0;
  22.473 +    bsclusters[0].servers[1] = 1;
  22.474 +    bsclusters[0].servers[2] = 2;
  22.475 +    bsclusters[1].servers[0] = 1;
  22.476 +    bsclusters[1].servers[1] = 2;
  22.477 +    bsclusters[1].servers[2] = 3;
  22.478 +    bsclusters[2].servers[0] = 2;
  22.479 +    bsclusters[2].servers[1] = 3;
  22.480 +    bsclusters[2].servers[2] = 4;
  22.481 +    bsclusters[3].servers[0] = 3;
  22.482 +    bsclusters[3].servers[1] = 4;
  22.483 +    bsclusters[3].servers[2] = 5;
  22.484 +    bsclusters[4].servers[0] = 4;
  22.485 +    bsclusters[4].servers[1] = 5;
  22.486 +    bsclusters[4].servers[2] = 6;
  22.487 +    bsclusters[5].servers[0] = 5;
  22.488 +    bsclusters[5].servers[1] = 6;
  22.489 +    bsclusters[5].servers[2] = 7;
  22.490 +    bsclusters[6].servers[0] = 6;
  22.491 +    bsclusters[6].servers[1] = 7;
  22.492 +    bsclusters[6].servers[2] = 0;
  22.493 +    bsclusters[7].servers[0] = 7;
  22.494 +    bsclusters[7].servers[1] = 0;
  22.495 +    bsclusters[7].servers[2] = 1;
  22.496 +
  22.497 +    /* Local socket set up
  22.498 +     */
  22.499 +    bssock = socket(AF_INET, SOCK_DGRAM, 0);
  22.500 +    if (bssock < 0) {
  22.501 +        perror("Bad socket");
  22.502 +        return -1;
  22.503 +    }
  22.504 +    memset(&sin_local, 0, sizeof(sin_local));
  22.505 +    sin_local.sin_family = AF_INET;
  22.506 +    sin_local.sin_port = htons(BLOCKSTORED_PORT);
  22.507 +    sin_local.sin_addr.s_addr = htonl(INADDR_ANY);
  22.508 +    if (bind(bssock, (struct sockaddr *)&sin_local, sizeof(sin_local)) < 0) {
  22.509 +        perror("bind");
  22.510 +        close(bssock);
  22.511 +        return -1;
  22.512 +    }
  22.513 +
  22.514 +#else /* /BLOCKSTORE_REMOTE */
  22.515      block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
  22.516  
  22.517      if (block_fp < 0) {
  22.518          perror("open");
  22.519          return -1;
  22.520      }
  22.521 -    
  22.522 +#endif /*  BLOCKSTORE_REMOTE */   
  22.523      return 0;
  22.524  }
    23.1 --- a/tools/blktap/blockstore.h	Sat Mar 12 21:43:58 2005 +0000
    23.2 +++ b/tools/blktap/blockstore.h	Fri Mar 18 03:37:54 2005 +0000
    23.3 @@ -9,6 +9,7 @@
    23.4  #ifndef __BLOCKSTORE_H__
    23.5  #define __BLOCKSTORE_H__
    23.6  
    23.7 +#include <netinet/in.h>
    23.8  #include <xc.h>
    23.9  
   23.10  #define BLOCK_SIZE  4096
   23.11 @@ -24,8 +25,83 @@
   23.12  extern void *newblock();
   23.13  extern void *readblock(u64 id);
   23.14  extern u64 allocblock(void *block);
   23.15 +extern u64 allocblock_hint(void *block, u64 hint);
   23.16  extern int writeblock(u64 id, void *block);
   23.17  extern void freeblock(void *block);
   23.18  extern int __init_blockstore(void);
   23.19  
   23.20 +#define ALLOCFAIL (((u64)(-1)))
   23.21 +
   23.22 +/* Distribution
   23.23 + */
   23.24 +#define BLOCKSTORED_PORT 9346
   23.25 +
   23.26 +struct bshdr_t_struct {
   23.27 +    u32            operation;
   23.28 +    u32            flags;
   23.29 +    u64            id;
   23.30 +} __attribute__ ((packed));
   23.31 +typedef struct bshdr_t_struct bshdr_t;
   23.32 +
   23.33 +struct bsmsg_t_struct {
   23.34 +    bshdr_t        hdr;
   23.35 +    unsigned char  block[BLOCK_SIZE];
   23.36 +} __attribute__ ((packed));
   23.37 +
   23.38 +typedef struct bsmsg_t_struct bsmsg_t;
   23.39 +
   23.40 +#define MSGBUFSIZE_OP    sizeof(u32)
   23.41 +#define MSGBUFSIZE_FLAGS (sizeof(u32) + sizeof(u32))
   23.42 +#define MSGBUFSIZE_ID    (sizeof(u32) + sizeof(u32) + sizeof(u64))
   23.43 +#define MSGBUFSIZE_BLOCK sizeof(bsmsg_t)
   23.44 +
   23.45 +#define BSOP_READBLOCK  0x01
   23.46 +#define BSOP_WRITEBLOCK 0x02
   23.47 +#define BSOP_ALLOCBLOCK 0x03
   23.48 +
   23.49 +#define BSOP_FLAG_ERROR 0x01
   23.50 +
   23.51 +#define BS_ALLOC_SKIP 10
   23.52 +#define BS_ALLOC_HACK
   23.53 +
   23.54 +/* Remote hosts and cluster map - XXX need to generalise
   23.55 + */
   23.56 +
   23.57 +/*
   23.58 +
   23.59 +  Interim ID format is
   23.60 +
   23.61 +  63 60 59                40 39                20 19                 0
   23.62 +  +----+--------------------+--------------------+--------------------+
   23.63 +  |map | replica 2          | replica 1          | replica 0          |
   23.64 +  +----+--------------------+--------------------+--------------------+
   23.65 +
   23.66 +  The map is an index into a table detailing which machines form the
   23.67 +  cluster.
   23.68 +
   23.69 + */
   23.70 +
   23.71 +#define BSID_REPLICA0(_id) ((_id)&0xfffffULL)
   23.72 +#define BSID_REPLICA1(_id) (((_id)>>20)&0xfffffULL)
   23.73 +#define BSID_REPLICA2(_id) (((_id)>>40)&0xfffffULL)
   23.74 +#define BSID_MAP(_id)      (((_id)>>60)&0xfULL)
   23.75 +
   23.76 +#define BSID(_map, _rep0, _rep1, _rep2) ((((u64)(_map))<<60) | \
   23.77 +                                         (((u64)(_rep2))<<40) | \
   23.78 +                                         (((u64)(_rep1))<<20) | ((u64)(_rep0)))
   23.79 +
   23.80 +typedef struct bsserver_t_struct {
   23.81 +    char              *hostname;
   23.82 +    struct sockaddr_in sin;
   23.83 +} bsserver_t;
   23.84 +
   23.85 +#define MAX_SERVERS 16
   23.86 +
   23.87 +#define CLUSTER_MAX_REPLICAS 3
   23.88 +typedef struct bscluster_t_struct {
   23.89 +    int servers[CLUSTER_MAX_REPLICAS];
   23.90 +} bscluster_t;
   23.91 +
   23.92 +#define MAX_CLUSTERS 16
   23.93 +
   23.94  #endif /* __BLOCKSTORE_H__ */
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/tools/blktap/blockstored.c	Fri Mar 18 03:37:54 2005 +0000
    24.3 @@ -0,0 +1,276 @@
    24.4 +/**************************************************************************
    24.5 + * 
    24.6 + * blockstored.c
    24.7 + *
    24.8 + * Block store daemon.
    24.9 + *
   24.10 + */
   24.11 +
   24.12 +#include <fcntl.h>
   24.13 +#include <unistd.h>
   24.14 +#include <stdio.h>
   24.15 +#include <stdlib.h>
   24.16 +#include <string.h>
   24.17 +#include <sys/types.h>
   24.18 +#include <sys/stat.h>
   24.19 +#include <sys/socket.h>
   24.20 +#include <sys/ioctl.h>
   24.21 +#include <netinet/in.h>
   24.22 +#include <errno.h>
   24.23 +#include "blockstore.h"
   24.24 +
   24.25 +//#define BSDEBUG
   24.26 +
   24.27 +int readblock_into(u64 id, void *block);
   24.28 +
   24.29 +int open_socket(u16 port) {
   24.30 +    
   24.31 +    struct sockaddr_in sn;
   24.32 +    int sock;
   24.33 +
   24.34 +    sock = socket(AF_INET, SOCK_DGRAM, 0);
   24.35 +    if (sock < 0) {
   24.36 +        perror("Bad socket");
   24.37 +        return -1;
   24.38 +    }
   24.39 +    memset(&sn, 0, sizeof(sn));
   24.40 +    sn.sin_family = AF_INET;
   24.41 +    sn.sin_port = htons(port);
   24.42 +    sn.sin_addr.s_addr = htonl(INADDR_ANY);
   24.43 +    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
   24.44 +        perror("bind");
   24.45 +        close(sock);
   24.46 +        return -1;
   24.47 +    }
   24.48 +
   24.49 +    return sock;
   24.50 +}
   24.51 +
   24.52 +static int block_fp = -1;
   24.53 +static int bssock = -1;
   24.54 +
   24.55 +int send_reply(struct sockaddr_in *peer, void *buffer, int len) {
   24.56 +
   24.57 +    int rc;
   24.58 +    
   24.59 +#ifdef BSDEBUG
   24.60 +    fprintf(stdout, "TX: %u bytes op=%u id=0x%llx\n",
   24.61 +            len, ((bsmsg_t *)buffer)->hdr.operation, ((bsmsg_t *)buffer)->hdr.id);
   24.62 +#endif
   24.63 +    rc = sendto(bssock, buffer, len, 0, (struct sockaddr *)peer, sizeof(*peer));
   24.64 +    if (rc < 0) {
   24.65 +        perror("send_reply");
   24.66 +        return 1;
   24.67 +    }
   24.68 +
   24.69 +
   24.70 +    return 0;
   24.71 +}
   24.72 +
   24.73 +static bsmsg_t msgbuf;
   24.74 +
   24.75 +void service_loop(void) {
   24.76 +
   24.77 +    for (;;) {
   24.78 +        int rc, len;
   24.79 +        struct sockaddr_in from;
   24.80 +        size_t slen = sizeof(from);
   24.81 +        u64 bid;
   24.82 +
   24.83 +        len = recvfrom(bssock, (void *)&msgbuf, sizeof(msgbuf), 0,
   24.84 +                       (struct sockaddr *)&from, &slen);
   24.85 +
   24.86 +        if (len < 0) {
   24.87 +            perror("recvfrom");
   24.88 +            continue;
   24.89 +        }
   24.90 +
   24.91 +        if (len < MSGBUFSIZE_OP) {
   24.92 +            fprintf(stderr, "Short packet.\n");
   24.93 +            continue;
   24.94 +        }
   24.95 +
   24.96 +#ifdef BSDEBUG
   24.97 +        fprintf(stdout, "RX: %u bytes op=%u id=0x%llx\n",
   24.98 +                len, msgbuf.hdr.operation, msgbuf.hdr.id);
   24.99 +#endif
  24.100 +
  24.101 +        switch (msgbuf.hdr.operation) {
  24.102 +        case BSOP_READBLOCK:
  24.103 +            if (len < MSGBUFSIZE_ID) {
  24.104 +                fprintf(stderr, "Short packet (readblock %u).\n", len);
  24.105 +                continue;
  24.106 +            }
  24.107 +            rc = readblock_into(msgbuf.hdr.id, msgbuf.block);
  24.108 +            if (rc < 0) {
  24.109 +                fprintf(stderr, "readblock error\n");
  24.110 +                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
  24.111 +                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  24.112 +                continue;
  24.113 +            }
  24.114 +            msgbuf.hdr.flags = 0;
  24.115 +            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_BLOCK);
  24.116 +            break;
  24.117 +        case BSOP_WRITEBLOCK:
  24.118 +            if (len < MSGBUFSIZE_BLOCK) {
  24.119 +                fprintf(stderr, "Short packet (writeblock %u).\n", len);
  24.120 +                continue;
  24.121 +            }
  24.122 +            rc = writeblock(msgbuf.hdr.id, msgbuf.block);
  24.123 +            if (rc < 0) {
  24.124 +                fprintf(stderr, "writeblock error\n");
  24.125 +                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
  24.126 +                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  24.127 +                continue;
  24.128 +            }
  24.129 +            msgbuf.hdr.flags = 0;
  24.130 +            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  24.131 +            break;
  24.132 +        case BSOP_ALLOCBLOCK:
  24.133 +            if (len < MSGBUFSIZE_BLOCK) {
  24.134 +                fprintf(stderr, "Short packet (allocblock %u).\n", len);
  24.135 +                continue;
  24.136 +            }
  24.137 +            bid = allocblock(msgbuf.block);
  24.138 +            if (bid == ALLOCFAIL) {
  24.139 +                fprintf(stderr, "allocblock error\n");
  24.140 +                msgbuf.hdr.flags = BSOP_FLAG_ERROR;
  24.141 +                send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  24.142 +                continue;
  24.143 +            }
  24.144 +            msgbuf.hdr.id = bid;
  24.145 +            msgbuf.hdr.flags = 0;
  24.146 +            send_reply(&from, (void *)&msgbuf, MSGBUFSIZE_ID);
  24.147 +            break;
  24.148 +        }
  24.149 +
  24.150 +    }
  24.151 +}
  24.152 + 
  24.153 +/**
  24.154 + * readblock: read a block from disk
  24.155 + *   @id: block id to read
  24.156 + *   @block: pointer to buffer to receive block
  24.157 + *
  24.158 + *   @return: 0 if OK, other on error
  24.159 + */
  24.160 +
  24.161 +int readblock_into(u64 id, void *block) {
  24.162 +    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
  24.163 +        printf ("%Ld\n", (id - 1) * BLOCK_SIZE);
  24.164 +        perror("readblock lseek");
  24.165 +        return -1;
  24.166 +    }
  24.167 +    if (read(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
  24.168 +        perror("readblock read");
  24.169 +        return -1;
  24.170 +    }
  24.171 +    return 0;
  24.172 +}
  24.173 +
  24.174 +/**
  24.175 + * writeblock: write an existing block to disk
  24.176 + *   @id: block id
  24.177 + *   @block: pointer to block
  24.178 + *
  24.179 + *   @return: zero on success, -1 on failure
  24.180 + */
  24.181 +int writeblock(u64 id, void *block) {
  24.182 +    if (lseek64(block_fp, ((off64_t) id - 1LL) * BLOCK_SIZE, SEEK_SET) < 0) {
  24.183 +        perror("writeblock lseek");
  24.184 +        return -1;
  24.185 +    }
  24.186 +    if (write(block_fp, block, BLOCK_SIZE) < 0) {
  24.187 +        perror("writeblock write");
  24.188 +        return -1;
  24.189 +    }
  24.190 +    return 0;
  24.191 +}
  24.192 +
  24.193 +/**
  24.194 + * allocblock: write a new block to disk
  24.195 + *   @block: pointer to block
  24.196 + *
  24.197 + *   @return: new id of block on disk
  24.198 + */
  24.199 +static u64 lastblock = 0;
  24.200 +
  24.201 +u64 allocblock(void *block) {
  24.202 +    u64 lb;
  24.203 +    off64_t pos;
  24.204 +
  24.205 +    retry:
  24.206 +    pos = lseek64(block_fp, 0, SEEK_END);
  24.207 +    if (pos == (off64_t)-1) {
  24.208 +        perror("allocblock lseek");
  24.209 +        return ALLOCFAIL;
  24.210 +    }
  24.211 +    if (pos % BLOCK_SIZE != 0) {
  24.212 +        fprintf(stderr, "file size not multiple of %d\n", BLOCK_SIZE);
  24.213 +        return ALLOCFAIL;
  24.214 +    }
  24.215 +    if (write(block_fp, block, BLOCK_SIZE) != BLOCK_SIZE) {
  24.216 +        perror("allocblock write");
  24.217 +        return ALLOCFAIL;
  24.218 +    }
  24.219 +    lb = pos / BLOCK_SIZE + 1;
  24.220 +
  24.221 +#ifdef BS_ALLOC_HACK
  24.222 +    if (lb < BS_ALLOC_SKIP)
  24.223 +        goto retry;
  24.224 +#endif
  24.225 +    
  24.226 +    if (lb <= lastblock)
  24.227 +        printf("[*** %Ld alredy allocated! ***]\n", lb);
  24.228 +    
  24.229 +    lastblock = lb;
  24.230 +    return lb;
  24.231 +}
  24.232 +
  24.233 +/**
  24.234 + * newblock: get a new in-memory block set to zeros
  24.235 + *
  24.236 + *   @return: pointer to new block, NULL on error
  24.237 + */
  24.238 +void *newblock() {
  24.239 +    void *block = malloc(BLOCK_SIZE);
  24.240 +    if (block == NULL) {
  24.241 +        perror("newblock");
  24.242 +        return NULL;
  24.243 +    }
  24.244 +    memset(block, 0, BLOCK_SIZE);
  24.245 +    return block;
  24.246 +}
  24.247 +
  24.248 +
  24.249 +/**
  24.250 + * freeblock: unallocate an in-memory block
  24.251 + *   @id: block id (zero if this is only in-memory)
  24.252 + *   @block: block to be freed
  24.253 + */
  24.254 +void freeblock(void *block) {
  24.255 +    if (block != NULL)
  24.256 +        free(block);
  24.257 +}
  24.258 +
  24.259 +
  24.260 +int main(int argc, char **argv)
  24.261 +{
  24.262 +    block_fp = open("blockstore.dat", O_RDWR | O_CREAT | O_LARGEFILE, 0644);
  24.263 +
  24.264 +    if (block_fp < 0) {
  24.265 +        perror("open");
  24.266 +        return -1;
  24.267 +    }
  24.268 +
  24.269 +    bssock = open_socket(BLOCKSTORED_PORT);
  24.270 +    if (bssock < 0) {
  24.271 +        return -1;
  24.272 +    }
  24.273 +
  24.274 +    service_loop();
  24.275 +    
  24.276 +    close(bssock);
  24.277 +
  24.278 +    return 0;
  24.279 +}
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/tools/blktap/bstest.c	Fri Mar 18 03:37:54 2005 +0000
    25.3 @@ -0,0 +1,191 @@
    25.4 +/**************************************************************************
    25.5 + * 
    25.6 + * bstest.c
    25.7 + *
    25.8 + * Block store daemon test program.
    25.9 + *
   25.10 + * usage: bstest <host>|X {r|w|a} ID 
   25.11 + *
   25.12 + */
   25.13 +
   25.14 +#include <fcntl.h>
   25.15 +#include <unistd.h>
   25.16 +#include <stdio.h>
   25.17 +#include <stdlib.h>
   25.18 +#include <string.h>
   25.19 +#include <sys/types.h>
   25.20 +#include <sys/stat.h>
   25.21 +#include <sys/socket.h>
   25.22 +#include <sys/ioctl.h>
   25.23 +#include <netinet/in.h>
   25.24 +#include <netdb.h>
   25.25 +#include <errno.h>
   25.26 +#include "blockstore.h"
   25.27 +
   25.28 +int direct(char *host, u32 op, u64 id, int len) {
   25.29 +    struct sockaddr_in sn, peer;
   25.30 +    int sock;
   25.31 +    bsmsg_t msgbuf;
   25.32 +    int rc, slen;
   25.33 +    struct hostent *addr;
   25.34 +
   25.35 +    addr = gethostbyname(host);
   25.36 +    if (!addr) {
   25.37 +        perror("bad hostname");
   25.38 +        exit(1);
   25.39 +    }
   25.40 +    peer.sin_family = addr->h_addrtype;
   25.41 +    peer.sin_port = htons(BLOCKSTORED_PORT);
   25.42 +    peer.sin_addr.s_addr =  ((struct in_addr *)(addr->h_addr))->s_addr;
   25.43 +    fprintf(stderr, "Sending to: %u.%u.%u.%u\n",
   25.44 +            (unsigned int)(unsigned char)addr->h_addr[0],
   25.45 +            (unsigned int)(unsigned char)addr->h_addr[1],
   25.46 +            (unsigned int)(unsigned char)addr->h_addr[2],
   25.47 +            (unsigned int)(unsigned char)addr->h_addr[3]);
   25.48 +
   25.49 +    sock = socket(AF_INET, SOCK_DGRAM, 0);
   25.50 +    if (sock < 0) {
   25.51 +        perror("Bad socket");
   25.52 +        exit(1);
   25.53 +    }
   25.54 +    memset(&sn, 0, sizeof(sn));
   25.55 +    sn.sin_family = AF_INET;
   25.56 +    sn.sin_port = htons(BLOCKSTORED_PORT);
   25.57 +    sn.sin_addr.s_addr = htonl(INADDR_ANY);
   25.58 +    if (bind(sock, (struct sockaddr *)&sn, sizeof(sn)) < 0) {
   25.59 +        perror("bind");
   25.60 +        close(sock);
   25.61 +        exit(1);
   25.62 +    }
   25.63 +
   25.64 +    memset((void *)&msgbuf, 0, sizeof(msgbuf));
   25.65 +    msgbuf.operation = op;
   25.66 +    msgbuf.id = id;
   25.67 +
   25.68 +    rc = sendto(sock, (void *)&msgbuf, len, 0,
   25.69 +                (struct sockaddr *)&peer, sizeof(peer));
   25.70 +    if (rc < 0) {
   25.71 +        perror("sendto");
   25.72 +        exit(1);
   25.73 +    }
   25.74 +
   25.75 +    slen = sizeof(peer);
   25.76 +    len = recvfrom(sock, (void *)&msgbuf, sizeof(msgbuf), 0,
   25.77 +                   (struct sockaddr *)&peer, &slen);
   25.78 +    if (len < 0) {
   25.79 +        perror("recvfrom");
   25.80 +        exit(1);
   25.81 +    }
   25.82 +
   25.83 +    printf("Reply %u bytes:\n", len);
   25.84 +    if (len >= MSGBUFSIZE_OP)
   25.85 +        printf("  operation: %u\n", msgbuf.operation);
   25.86 +    if (len >= MSGBUFSIZE_FLAGS)
   25.87 +        printf("  flags: 0x%x\n", msgbuf.flags);
   25.88 +    if (len >= MSGBUFSIZE_ID)
   25.89 +        printf("  id: %llu\n", msgbuf.id);
   25.90 +    if (len >= (MSGBUFSIZE_ID + 4))
   25.91 +        printf("  data: %02x %02x %02x %02x...\n",
   25.92 +               (unsigned int)msgbuf.block[0],
   25.93 +               (unsigned int)msgbuf.block[1],
   25.94 +               (unsigned int)msgbuf.block[2],
   25.95 +               (unsigned int)msgbuf.block[3]);
   25.96 +    
   25.97 +    if (sock > 0)
   25.98 +        close(sock);
   25.99 +   
  25.100 +    return 0;
  25.101 +}
  25.102 +
  25.103 +int main (int argc, char **argv) {
  25.104 +
  25.105 +    u32 op = 0;
  25.106 +    u64 id = 0;
  25.107 +    int len = 0, rc;
  25.108 +    void *block;
  25.109 +
  25.110 +    if (argc < 3) {
  25.111 +        fprintf(stderr, "usage: bstest <host>|X {r|w|a} ID\n");
  25.112 +        return 1;
  25.113 +    }
  25.114 +
  25.115 +    switch (argv[2][0]) {
  25.116 +    case 'r':
  25.117 +    case 'R':
  25.118 +        op = BSOP_READBLOCK;
  25.119 +        len = MSGBUFSIZE_ID;
  25.120 +        break;
  25.121 +    case 'w':
  25.122 +    case 'W':
  25.123 +        op = BSOP_WRITEBLOCK;
  25.124 +        len = MSGBUFSIZE_BLOCK;
  25.125 +        break;
  25.126 +    case 'a':
  25.127 +    case 'A':
  25.128 +        op = BSOP_ALLOCBLOCK;
  25.129 +        len = MSGBUFSIZE_BLOCK;
  25.130 +        break;
  25.131 +    default:
  25.132 +        fprintf(stderr, "Unknown action '%s'.\n", argv[2]);
  25.133 +        return 1;
  25.134 +    }
  25.135 +
  25.136 +    if (argc >= 4)
  25.137 +        id = atoll(argv[3]);
  25.138 +
  25.139 +    if (strcmp(argv[1], "X") == 0) {
  25.140 +        rc = __init_blockstore();
  25.141 +        if (rc < 0) {
  25.142 +            fprintf(stderr, "blockstore init failed.\n");
  25.143 +            return 1;
  25.144 +        }
  25.145 +        switch(op) {
  25.146 +        case BSOP_READBLOCK:
  25.147 +            block = readblock(id);
  25.148 +            if (block) {
  25.149 +                printf("data: %02x %02x %02x %02x...\n",
  25.150 +                       (unsigned int)((unsigned char*)block)[0],
  25.151 +                       (unsigned int)((unsigned char*)block)[1],
  25.152 +                       (unsigned int)((unsigned char*)block)[2],
  25.153 +                       (unsigned int)((unsigned char*)block)[3]);
  25.154 +            }
  25.155 +            break;
  25.156 +        case BSOP_WRITEBLOCK:
  25.157 +            block = malloc(BLOCK_SIZE);
  25.158 +            if (!block) {
  25.159 +                perror("bstest malloc");
  25.160 +                return 1;
  25.161 +            }
  25.162 +            memset(block, 0, BLOCK_SIZE);
  25.163 +            rc = writeblock(id, block);
  25.164 +            if (rc != 0) {
  25.165 +                printf("error\n");
  25.166 +            }
  25.167 +            else {
  25.168 +                printf("OK\n");
  25.169 +            }
  25.170 +            break;
  25.171 +        case BSOP_ALLOCBLOCK:
  25.172 +            block = malloc(BLOCK_SIZE);
  25.173 +            if (!block) {
  25.174 +                perror("bstest malloc");
  25.175 +                return 1;
  25.176 +            }
  25.177 +            memset(block, 0, BLOCK_SIZE);
  25.178 +            id = allocblock_hint(block, id);
  25.179 +            if (id == 0) {
  25.180 +                printf("error\n");
  25.181 +            }
  25.182 +            else {
  25.183 +                printf("ID: %llu\n", id);
  25.184 +            }
  25.185 +            break;
  25.186 +        }
  25.187 +    }
  25.188 +    else {
  25.189 +        direct(argv[1], op, id, len);
  25.190 +    }
  25.191 +
  25.192 +
  25.193 +    return 0;
  25.194 +}
    26.1 --- a/tools/examples/xend-config.sxp	Sat Mar 12 21:43:58 2005 +0000
    26.2 +++ b/tools/examples/xend-config.sxp	Fri Mar 18 03:37:54 2005 +0000
    26.3 @@ -3,10 +3,22 @@
    26.4  # Port xend should use for the HTTP interface.
    26.5  (xend-port         8000)
    26.6  
    26.7 -# Address xend should listen on.
    26.8 +# Port xend should use for the event interface.
    26.9 +(xend-event-port   8001)
   26.10 +
   26.11 +# Address xend should listen on for HTTP connections.
   26.12  # Specifying 'localhost' prevents remote connections.
   26.13  # Specifying the empty string '' allows all connections.
   26.14 -(xend-address      '')
   26.15 +(xend-address      'localhost')
   26.16 +
   26.17 +# The port xend should start from when allocating a port
   26.18 +# for a domain console.
   26.19 +(console-port-base 9600)
   26.20 +
   26.21 +# Address xend should listen on for console connections.
   26.22 +# Specifying 'localhost' prevents remote connections.
   26.23 +# Specifying the empty string '' allows all connections.
   26.24 +(console-address   'localhost')
   26.25  
   26.26  ## Use the following if VIF traffic is routed.
   26.27  # The script used to start/stop networking for xend.
    27.1 --- a/tools/libxc/Makefile	Sat Mar 12 21:43:58 2005 +0000
    27.2 +++ b/tools/libxc/Makefile	Fri Mar 18 03:37:54 2005 +0000
    27.3 @@ -29,6 +29,7 @@ SRCS     += xc_linux_save.c
    27.4  SRCS     += xc_misc.c
    27.5  SRCS     += xc_physdev.c
    27.6  SRCS     += xc_private.c
    27.7 +SRCS     += xc_ptrace.c
    27.8  SRCS     += xc_rrobin.c
    27.9  SRCS     += xc_vmx_build.c
   27.10  
    28.1 --- a/tools/libxc/xc_linux_build.c	Sat Mar 12 21:43:58 2005 +0000
    28.2 +++ b/tools/libxc/xc_linux_build.c	Fri Mar 18 03:37:54 2005 +0000
    28.3 @@ -459,10 +459,11 @@ int xc_linux_build(int xc_handle,
    28.4  
    28.5      memset( &launch_op, 0, sizeof(launch_op) );
    28.6  
    28.7 -    launch_op.u.builddomain.domain   = (domid_t)domid;
    28.8 -    launch_op.u.builddomain.ctxt = ctxt;
    28.9 +    launch_op.u.setdomaininfo.domain   = (domid_t)domid;
   28.10 +    launch_op.u.setdomaininfo.exec_domain = 0;
   28.11 +    launch_op.u.setdomaininfo.ctxt = ctxt;
   28.12  
   28.13 -    launch_op.cmd = DOM0_BUILDDOMAIN;
   28.14 +    launch_op.cmd = DOM0_SETDOMAININFO;
   28.15      rc = do_dom0_op(xc_handle, &launch_op);
   28.16      
   28.17      return rc;
    29.1 --- a/tools/libxc/xc_linux_restore.c	Sat Mar 12 21:43:58 2005 +0000
    29.2 +++ b/tools/libxc/xc_linux_restore.c	Fri Mar 18 03:37:54 2005 +0000
    29.3 @@ -638,9 +638,10 @@ int xc_linux_restore(int xc_handle, XcIO
    29.4  
    29.5      xcio_info(ioctxt, "Domain ready to be built.\n");
    29.6  
    29.7 -    op.cmd = DOM0_BUILDDOMAIN;
    29.8 -    op.u.builddomain.domain   = (domid_t)dom;
    29.9 -    op.u.builddomain.ctxt = &ctxt;
   29.10 +    op.cmd = DOM0_SETDOMAININFO;
   29.11 +    op.u.setdomaininfo.domain   = (domid_t)dom;
   29.12 +    op.u.setdomaininfo.exec_domain   = 0;
   29.13 +    op.u.setdomaininfo.ctxt = &ctxt;
   29.14      rc = do_dom0_op(xc_handle, &op);
   29.15  
   29.16      if ( rc != 0 )
    30.1 --- a/tools/libxc/xc_plan9_build.c	Sat Mar 12 21:43:58 2005 +0000
    30.2 +++ b/tools/libxc/xc_plan9_build.c	Fri Mar 18 03:37:54 2005 +0000
    30.3 @@ -533,10 +533,11 @@ xc_plan9_build(int xc_handle,
    30.4  
    30.5  	memset(&launch_op, 0, sizeof (launch_op));
    30.6  
    30.7 -	launch_op.u.builddomain.domain = (domid_t) domid;
    30.8 -	//  launch_op.u.builddomain.num_vifs = 1;
    30.9 -	launch_op.u.builddomain.ctxt = ctxt;
   30.10 -	launch_op.cmd = DOM0_BUILDDOMAIN;
   30.11 +	launch_op.u.setdomaininfo.domain = (domid_t) domid;
   30.12 +	launch_op.u.setdomaininfo.exec_domain = 0;
   30.13 +	//  launch_op.u.setdomaininfo.num_vifs = 1;
   30.14 +	launch_op.u.setdomaininfo.ctxt = ctxt;
   30.15 +	launch_op.cmd = DOM0_SETDOMAININFO;
   30.16  	rc = do_dom0_op(xc_handle, &launch_op);
   30.17  
   30.18  	fprintf(stderr, "RC is %d\n", rc);
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/tools/libxc/xc_ptrace.c	Fri Mar 18 03:37:54 2005 +0000
    31.3 @@ -0,0 +1,354 @@
    31.4 +#include <sys/ptrace.h>
    31.5 +#include <sys/wait.h>
    31.6 +#include "xc_private.h"
    31.7 +#include <asm/elf.h>
    31.8 +#include <time.h>
    31.9 +
   31.10 +
   31.11 +#define BSD_PAGE_MASK	(PAGE_SIZE-1)
   31.12 +#define	PG_FRAME	(~((unsigned long)BSD_PAGE_MASK)
   31.13 +#define PDRSHIFT        22
   31.14 +#define	PSL_T		0x00000100	/* trace enable bit */
   31.15 +
   31.16 +
   31.17 +/*
   31.18 + * long  
   31.19 + * ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
   31.20 + */
   31.21 +
   31.22 +long xc_ptrace(enum __ptrace_request request, 
   31.23 +	       pid_t pid, void *addr, void *data);
   31.24 +int waitdomain(int domain, int *status, int options);
   31.25 +
   31.26 +char * ptrace_names[] = {
   31.27 +    "PTRACE_TRACEME",
   31.28 +    "PTRACE_PEEKTEXT",
   31.29 +    "PTRACE_PEEKDATA",
   31.30 +    "PTRACE_PEEKUSER",
   31.31 +    "PTRACE_POKETEXT",
   31.32 +    "PTRACE_POKEDATA",
   31.33 +    "PTRACE_POKEUSER",
   31.34 +    "PTRACE_CONT",
   31.35 +    "PTRACE_KILL",
   31.36 +    "PTRACE_SINGLESTEP",
   31.37 +    "PTRACE_INVALID",
   31.38 +    "PTRACE_INVALID",
   31.39 +    "PTRACE_GETREGS",
   31.40 +    "PTRACE_SETREGS",
   31.41 +    "PTRACE_GETFPREGS",
   31.42 +    "PTRACE_SETFPREGS",
   31.43 +    "PTRACE_ATTACH",
   31.44 +    "PTRACE_DETACH",
   31.45 +    "PTRACE_GETFPXREGS",
   31.46 +    "PTRACE_SETFPXREGS",
   31.47 +    "PTRACE_INVALID",
   31.48 +    "PTRACE_INVALID",
   31.49 +    "PTRACE_INVALID",
   31.50 +    "PTRACE_INVALID",
   31.51 +    "PTRACE_SYSCALL",
   31.52 +};
   31.53 +
   31.54 +struct gdb_regs {
   31.55 +    long ebx; /* 0 */
   31.56 +    long ecx; /* 4 */
   31.57 +    long edx; /* 8 */
   31.58 +    long esi; /* 12 */
   31.59 +    long edi; /* 16 */
   31.60 +    long ebp; /* 20 */
   31.61 +    long eax; /* 24 */ 
   31.62 +    int  xds; /* 28 */
   31.63 +    int  xes; /* 32 */
   31.64 +    int  xfs; /* 36 */
   31.65 +    int  xgs; /* 40 */
   31.66 +    long orig_eax; /* 44 */
   31.67 +    long eip;    /* 48 */
   31.68 +    int  xcs;    /* 52 */
   31.69 +    long eflags; /* 56 */
   31.70 +    long esp;    /* 60 */     
   31.71 +    int  xss;    /* 64 */
   31.72 +};
   31.73 +#define printval(x) printf("%s = %lx\n", #x, (long)x);
   31.74 +#define SET_PT_REGS(pt, xc) \
   31.75 +{ \
   31.76 +    pt.ebx = xc.ebx; \
   31.77 +    pt.ecx = xc.ecx; \
   31.78 +    pt.edx = xc.edx; \
   31.79 +    pt.esi = xc.esi; \
   31.80 +    pt.edi = xc.edi; \
   31.81 +    pt.ebp = xc.ebp; \
   31.82 +    pt.eax = xc.eax; \
   31.83 +    pt.eip = xc.eip; \
   31.84 +    pt.xcs = xc.cs; \
   31.85 +    pt.eflags = xc.eflags; \
   31.86 +    pt.esp = xc.esp; \
   31.87 +    pt.xss = xc.ss; \
   31.88 +    pt.xes = xc.es; \
   31.89 +    pt.xds = xc.ds; \
   31.90 +    pt.xfs = xc.fs; \
   31.91 +    pt.xgs = xc.gs; \
   31.92 +}
   31.93 +
   31.94 +#define SET_XC_REGS(pt, xc) \
   31.95 +{ \
   31.96 +    xc.ebx = pt->ebx; \
   31.97 +    xc.ecx = pt->ecx; \
   31.98 +    xc.edx = pt->edx; \
   31.99 +    xc.esi = pt->esi; \
  31.100 +    xc.edi = pt->edi; \
  31.101 +    xc.ebp = pt->ebp; \
  31.102 +    xc.eax = pt->eax; \
  31.103 +    xc.eip = pt->eip; \
  31.104 +    xc.cs = pt->xcs; \
  31.105 +    xc.eflags = pt->eflags; \
  31.106 +    xc.esp = pt->esp; \
  31.107 +    xc.ss = pt->xss; \
  31.108 +    xc.es = pt->xes; \
  31.109 +    xc.ds = pt->xds; \
  31.110 +    xc.fs = pt->xfs; \
  31.111 +    xc.gs = pt->xgs; \
  31.112 +}
  31.113 +
  31.114 +
  31.115 +#define vtopdi(va) ((va) >> PDRSHIFT)
  31.116 +#define vtopti(va) (((va) >> PAGE_SHIFT) & 0x3ff)
  31.117 +
  31.118 +/* XXX application state */
  31.119 +
  31.120 +
  31.121 +static int xc_handle;
  31.122 +static int regs_valid;
  31.123 +static unsigned long cr3;
  31.124 +static full_execution_context_t ctxt;
  31.125 +
  31.126 +/* --------------------- */
  31.127 +
  31.128 +static void *
  31.129 +map_domain_va(unsigned long domid, void * guest_va)
  31.130 +{
  31.131 +    unsigned long pde, page;
  31.132 +    unsigned long va = (unsigned long)guest_va;
  31.133 +
  31.134 +    static unsigned long cr3_phys;
  31.135 +    static unsigned long *cr3_virt;
  31.136 +    static unsigned long pde_phys;
  31.137 +    static unsigned long *pde_virt;
  31.138 +    static unsigned long page_phys;
  31.139 +    static unsigned long *page_virt;
  31.140 +    if (!regs_valid) 
  31.141 +    {
  31.142 +	int retval = xc_domain_getfullinfo(xc_handle, domid, 0, NULL, &ctxt);
  31.143 +	if (retval)
  31.144 +	    goto error_out;
  31.145 +	cr3 = ctxt.pt_base;
  31.146 +	regs_valid = 1;
  31.147 +    }
  31.148 +    if (cr3 != cr3_phys) 
  31.149 +    {
  31.150 +	cr3_phys = cr3;
  31.151 +	if (cr3_virt)
  31.152 +	    munmap(cr3_virt, PAGE_SIZE);
  31.153 +	if ((cr3_virt = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
  31.154 +					     PROT_READ,
  31.155 +					     cr3_phys >> PAGE_SHIFT)) == NULL)
  31.156 +	    goto error_out;
  31.157 +    } 
  31.158 +    if ((pde = cr3_virt[vtopdi(va)]) == 0)
  31.159 +	goto error_out;
  31.160 +    if (pde != pde_phys) 
  31.161 +    {
  31.162 +	pde_phys = pde;
  31.163 +	if (pde_virt)
  31.164 +	    munmap(pde_virt, PAGE_SIZE);
  31.165 +	if ((pde_virt = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
  31.166 +					     PROT_READ,
  31.167 +					     pde_phys >> PAGE_SHIFT)) == NULL)
  31.168 +	    goto error_out;
  31.169 +    }
  31.170 +    if ((page = pde_virt[vtopti(va)]) == 0)
  31.171 +	goto error_out;
  31.172 +    if (page != page_phys) 
  31.173 +    {
  31.174 +	page_phys = page;
  31.175 +	if (page_virt)
  31.176 +	    munmap(page_virt, PAGE_SIZE);
  31.177 +	if ((page_virt = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
  31.178 +					     PROT_READ|PROT_WRITE,
  31.179 +					      page_phys >> PAGE_SHIFT)) == NULL) {
  31.180 +	    printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3, pde, page, vtopti(va));
  31.181 +	    goto error_out;
  31.182 +	}
  31.183 +    }	
  31.184 +    return (void *)(((unsigned long)page_virt) | (va & BSD_PAGE_MASK));
  31.185 +
  31.186 + error_out:
  31.187 +    return 0;
  31.188 +}
  31.189 +
  31.190 +int 
  31.191 +waitdomain(int domain, int *status, int options)
  31.192 +{
  31.193 +    dom0_op_t op;
  31.194 +    int retval;
  31.195 +    full_execution_context_t ctxt;
  31.196 +    struct timespec ts;
  31.197 +    ts.tv_sec = 0;
  31.198 +    ts.tv_nsec = 10*1000*1000;
  31.199 +
  31.200 +    if (!xc_handle)
  31.201 +	if ((xc_handle = xc_interface_open()) < 0) 
  31.202 +	{
  31.203 +	    printf("xc_interface_open failed\n");
  31.204 +	    return -1;
  31.205 +	}
  31.206 +    op.cmd = DOM0_GETDOMAININFO;
  31.207 +    op.u.getdomaininfo.domain = domain;
  31.208 +    op.u.getdomaininfo.exec_domain = 0;
  31.209 +    op.u.getdomaininfo.ctxt = &ctxt;
  31.210 + retry:
  31.211 +
  31.212 +    retval = do_dom0_op(xc_handle, &op);
  31.213 +    if (retval) {
  31.214 +	printf("getdomaininfo failed\n");
  31.215 +	goto done;
  31.216 +    }
  31.217 +    *status = op.u.getdomaininfo.flags;
  31.218 +    
  31.219 +    if (options & WNOHANG)
  31.220 +	goto done;
  31.221 +	
  31.222 +
  31.223 +    if (!(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED)) {	
  31.224 +	nanosleep(&ts,NULL);
  31.225 +	goto retry;
  31.226 +    }
  31.227 + done:
  31.228 +    return retval;
  31.229 +
  31.230 +}
  31.231 +
  31.232 +long
  31.233 +xc_ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data)
  31.234 +{
  31.235 +    dom0_op_t op;
  31.236 +    int status = 0;
  31.237 +    xc_domaininfo_t info;
  31.238 +    struct gdb_regs pt;
  31.239 +    long retval = 0;
  31.240 +    long *guest_va;
  31.241 +
  31.242 +    op.interface_version = DOM0_INTERFACE_VERSION;
  31.243 +    
  31.244 +    if (!xc_handle)
  31.245 +	if ((xc_handle = xc_interface_open()) < 0)
  31.246 +	    return -1;
  31.247 +#if 0
  31.248 +    printf("%20s %d, %p, %p \n", ptrace_names[request], pid, addr, data);
  31.249 +#endif
  31.250 +    switch (request) {	
  31.251 +    case PTRACE_PEEKTEXT:
  31.252 +    case PTRACE_PEEKDATA:
  31.253 +    case PTRACE_POKETEXT:
  31.254 +    case PTRACE_POKEDATA:
  31.255 +	if ((guest_va = (unsigned long *)map_domain_va(pid, addr)) == NULL) {
  31.256 +	    status = EFAULT;
  31.257 +	    goto done;
  31.258 +	}
  31.259 +
  31.260 +	if (request == PTRACE_PEEKTEXT || request == PTRACE_PEEKDATA)
  31.261 +	    retval = *guest_va;
  31.262 +	else
  31.263 +	    *guest_va = (unsigned long)data;
  31.264 +	break;
  31.265 +    case PTRACE_GETREGS:
  31.266 +    case PTRACE_GETFPREGS:
  31.267 +    case PTRACE_GETFPXREGS:
  31.268 +	/* XXX hard-coding UP */
  31.269 +	retval = xc_domain_getfullinfo(xc_handle, pid, 0, &info, &ctxt);
  31.270 +
  31.271 +	if (retval) {
  31.272 +	    printf("getfullinfo failed\n");
  31.273 +	    goto done;
  31.274 +	}
  31.275 +	if (request == PTRACE_GETREGS) {
  31.276 +		SET_PT_REGS(pt, ctxt.cpu_ctxt); 
  31.277 +		memcpy(data, &pt, sizeof(elf_gregset_t));
  31.278 +	} else if (request == PTRACE_GETFPREGS)
  31.279 +	    memcpy(data, &ctxt.fpu_ctxt, sizeof(elf_fpregset_t));
  31.280 +	else /*if (request == PTRACE_GETFPXREGS)*/
  31.281 +	    memcpy(data, &ctxt.fpu_ctxt, sizeof(elf_fpxregset_t));
  31.282 +	cr3 = ctxt.pt_base;
  31.283 +	regs_valid = 1;
  31.284 +	break;
  31.285 +    case PTRACE_SETREGS:
  31.286 +	op.cmd = DOM0_SETDOMAININFO;
  31.287 +	SET_XC_REGS(((struct gdb_regs *)data), ctxt.cpu_ctxt);
  31.288 +	op.u.setdomaininfo.domain = pid;
  31.289 +	/* XXX need to understand multiple exec_domains */
  31.290 +	op.u.setdomaininfo.exec_domain = 0;
  31.291 +	op.u.setdomaininfo.ctxt = &ctxt;
  31.292 +	retval = do_dom0_op(xc_handle, &op);
  31.293 +	if (retval)
  31.294 +	    goto done;
  31.295 +
  31.296 +	break;
  31.297 +    case PTRACE_ATTACH:
  31.298 +	op.cmd = DOM0_GETDOMAININFO;
  31.299 +	op.u.getdomaininfo.domain = pid;
  31.300 +	op.u.getdomaininfo.exec_domain = 0;
  31.301 +	op.u.getdomaininfo.ctxt = &ctxt;
  31.302 +	retval = do_dom0_op(xc_handle, &op);
  31.303 +	if (retval) {
  31.304 +	    perror("dom0 op failed");
  31.305 +	    goto done;
  31.306 +	}
  31.307 +	if (op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) {
  31.308 +	    printf("domain currently paused\n");
  31.309 +	    goto done;
  31.310 +	}
  31.311 +	printf("domain not currently paused\n");
  31.312 +	op.cmd = DOM0_PAUSEDOMAIN;
  31.313 +	op.u.pausedomain.domain = pid;
  31.314 +	retval = do_dom0_op(xc_handle, &op);
  31.315 +	break;
  31.316 +    case PTRACE_SINGLESTEP:
  31.317 +	ctxt.cpu_ctxt.eflags |= PSL_T;
  31.318 +	op.cmd = DOM0_SETDOMAININFO;
  31.319 +	op.u.setdomaininfo.domain = pid;
  31.320 +	op.u.setdomaininfo.exec_domain = 0;
  31.321 +	op.u.setdomaininfo.ctxt = &ctxt;
  31.322 +	retval = do_dom0_op(xc_handle, &op);	
  31.323 +	if (retval) {
  31.324 +	    perror("dom0 op failed");
  31.325 +	    goto done;
  31.326 +	}
  31.327 +    case PTRACE_CONT:
  31.328 +    case PTRACE_DETACH:
  31.329 +	regs_valid = 0;
  31.330 +	op.cmd = DOM0_UNPAUSEDOMAIN;
  31.331 +	op.u.unpausedomain.domain = pid > 0 ? pid : -pid;
  31.332 +	retval = do_dom0_op(xc_handle, &op);
  31.333 +	break;
  31.334 +    case PTRACE_SETFPREGS:
  31.335 +    case PTRACE_SETFPXREGS:
  31.336 +    case PTRACE_PEEKUSER:
  31.337 +    case PTRACE_POKEUSER:
  31.338 +    case PTRACE_SYSCALL:
  31.339 +    case PTRACE_KILL:
  31.340 +#ifdef DEBUG
  31.341 +	printf("unsupported xc_ptrace request %s\n", ptrace_names[request]);
  31.342 +#endif
  31.343 +	/* XXX not yet supported */
  31.344 +	status = ENOSYS;
  31.345 +	break;
  31.346 +    case PTRACE_TRACEME:
  31.347 +	printf("PTRACE_TRACEME is an invalid request under Xen\n");
  31.348 +	status = EINVAL;
  31.349 +    }
  31.350 +    
  31.351 +    if (status) {
  31.352 +	errno = status;
  31.353 +	retval = -1;
  31.354 +    }
  31.355 + done:
  31.356 +    return retval;
  31.357 +}
    32.1 --- a/tools/libxc/xc_vmx_build.c	Sat Mar 12 21:43:58 2005 +0000
    32.2 +++ b/tools/libxc/xc_vmx_build.c	Fri Mar 18 03:37:54 2005 +0000
    32.3 @@ -603,10 +603,11 @@ int xc_vmx_build(int xc_handle,
    32.4  
    32.5      memset( &launch_op, 0, sizeof(launch_op) );
    32.6  
    32.7 -    launch_op.u.builddomain.domain   = (domid_t)domid;
    32.8 -    launch_op.u.builddomain.ctxt = ctxt;
    32.9 +    launch_op.u.setdomaininfo.domain   = (domid_t)domid;
   32.10 +    launch_op.u.setdomaininfo.exec_domain = 0;
   32.11 +    launch_op.u.setdomaininfo.ctxt = ctxt;
   32.12  
   32.13 -    launch_op.cmd = DOM0_BUILDDOMAIN;
   32.14 +    launch_op.cmd = DOM0_SETDOMAININFO;
   32.15      rc = do_dom0_op(xc_handle, &launch_op);
   32.16      return rc;
   32.17  
    33.1 --- a/tools/misc/xend	Sat Mar 12 21:43:58 2005 +0000
    33.2 +++ b/tools/misc/xend	Fri Mar 18 03:37:54 2005 +0000
    33.3 @@ -22,11 +22,13 @@
    33.4  import os
    33.5  import sys
    33.6  import socket
    33.7 +import signal
    33.8  import time
    33.9  
   33.10 -XCS_PATH = "/var/lib/xen/xcs_socket"
   33.11 -XCS_EXEC = "/usr/sbin/xcs"
   33.12 -XCS_LOGFILE = "/var/log/xcs.log"
   33.13 +XCS_PATH    = "/var/lib/xen/xcs_socket"
   33.14 +XCS_EXEC    = "/usr/sbin/xcs"
   33.15 +XCS_PIDFILE = "/var/run/xcs.pid"
   33.16 +XCS_ARGS    = (XCS_EXEC, "-p", XCS_PIDFILE)
   33.17  
   33.18  # Default install path for Xen binary packages.
   33.19  sys.path = [ '/usr/lib/python' ] + sys.path
   33.20 @@ -98,37 +100,28 @@ def check_user():
   33.21  
   33.22  def xcs_running():
   33.23      """ See if the control switch is running.
   33.24 -    """
   33.25 -    ret = 1
   33.26 +    """	
   33.27      s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
   33.28      try:
   33.29          s.connect( (XCS_PATH) )
   33.30 +        s.close()
   33.31      except:
   33.32 -        ret = 0
   33.33 -    s.close()
   33.34 -    return (ret)
   33.35 +        try:
   33.36 +            os.remove(XCS_PIDFILE)
   33.37 +        except:
   33.38 +            pass
   33.39 +	return 0
   33.40 +    return 1
   33.41      
   33.42 -def main():
   33.43 -    try:
   33.44 -        check_logging()
   33.45 -        check_twisted_version()
   33.46 -        check_user()
   33.47 -    except CheckError:
   33.48 -        sys.exit(1)
   33.49 -    
   33.50 +def start_xcs():
   33.51      if (not xcs_running()):
   33.52          if os.fork():
   33.53 -            time.sleep(0.5) # let xcs start
   33.54 +            time.sleep(0.1) # let xcs start
   33.55          else:
   33.56 +            if not os.path.isdir(os.path.dirname(XCS_PATH)):
   33.57 +                os.makedirs(os.path.dirname(XCS_PATH))
   33.58              try:
   33.59 -                logfile = os.open(XCS_LOGFILE, 
   33.60 -                                  os.O_WRONLY|os.O_APPEND|os.O_CREAT)
   33.61 -                os.close(1)
   33.62 -                os.dup(logfile)
   33.63 -                os.close(2)
   33.64 -                os.dup(logfile)
   33.65 -                os.close(logfile)
   33.66 -                os.execlp(XCS_EXEC, XCS_EXEC)
   33.67 +                os.execvp(XCS_EXEC, XCS_ARGS)
   33.68              except:
   33.69                  hline()
   33.70                  msg("Tried to start xcs, but failed. Is it installed?")
   33.71 @@ -139,7 +132,24 @@ def main():
   33.72              msg("Failed to start the control interface switch.")
   33.73              hline()
   33.74              raise CheckError("xcs not running")
   33.75 -        
   33.76 +            
   33.77 +def stop_xcs():
   33.78 +    try:
   33.79 +	xcs_pidfile = open(XCS_PIDFILE)
   33.80 +        xcs_pid = int(xcs_pidfile.read().strip())
   33.81 +        os.kill(xcs_pid, signal.SIGTERM)
   33.82 +        xcs_pidfile.close()
   33.83 +    except:
   33.84 +	return    
   33.85 +            
   33.86 +def main():
   33.87 +    try:
   33.88 +        check_logging()
   33.89 +        check_twisted_version()
   33.90 +        check_user()
   33.91 +    except CheckError:
   33.92 +        sys.exit(1)
   33.93 +    
   33.94      daemon = SrvDaemon.instance()
   33.95      if not sys.argv[1:]:
   33.96          print 'usage: %s {start|stop|restart}' % sys.argv[0]
   33.97 @@ -147,12 +157,17 @@ def main():
   33.98          pid, status = os.wait()
   33.99          return status >> 8
  33.100      elif sys.argv[1] == 'start':
  33.101 +        start_xcs()
  33.102          return daemon.start()
  33.103      elif sys.argv[1] == 'trace_start':
  33.104 +        start_xcs()
  33.105          return daemon.start(trace=1)
  33.106      elif sys.argv[1] == 'stop':
  33.107 +        stop_xcs()
  33.108          return daemon.stop()
  33.109      elif sys.argv[1] == 'restart':
  33.110 +        stop_xcs()
  33.111 +        start_xcs()
  33.112          return daemon.stop() or daemon.start()
  33.113      elif sys.argv[1] == 'status':
  33.114          return daemon.status()
    34.1 --- a/tools/python/xen/xend/Args.py	Sat Mar 12 21:43:58 2005 +0000
    34.2 +++ b/tools/python/xen/xend/Args.py	Fri Mar 18 03:37:54 2005 +0000
    34.3 @@ -78,6 +78,8 @@ class Args:
    34.4                  val = str(v)
    34.5              elif type == 'sxpr':
    34.6                  val = self.sxpr(v)
    34.7 +            elif type == 'bool':
    34.8 +                val = self.bool(v)
    34.9              else:
   34.10                  raise ArgError('invalid type:' + str(type))
   34.11              return val
   34.12 @@ -86,6 +88,9 @@ class Args:
   34.13          except StandardError, ex:
   34.14              raise ArgError(str(ex))
   34.15  
   34.16 +    def bool(self, v):
   34.17 +        return (v.lower() in ['on', 'yes', '1', 'true'])
   34.18 +
   34.19      def sxpr(self, v):
   34.20          if isinstance(v, types.ListType):
   34.21              val = v
    35.1 --- a/tools/python/xen/xend/XendRoot.py	Sat Mar 12 21:43:58 2005 +0000
    35.2 +++ b/tools/python/xen/xend/XendRoot.py	Fri Mar 18 03:37:54 2005 +0000
    35.3 @@ -2,6 +2,11 @@
    35.4  
    35.5  """Xend root class.
    35.6  Creates the event server and handles configuration.
    35.7 +
    35.8 +Other classes get config variables by importing this module,
    35.9 +using instance() to get a XendRoot instance, and then
   35.10 +the config functions (e.g. get_xend_port()) to get
   35.11 +configured values.
   35.12  """
   35.13  
   35.14  import os
   35.15 @@ -34,17 +39,33 @@ class XendRoot:
   35.16      """Where block control scripts live."""
   35.17      block_script_dir = "/etc/xen/scripts"
   35.18  
   35.19 +    """Default path to the log file. """
   35.20      logfile_default = "/var/log/xend.log"
   35.21  
   35.22      loglevel_default = 'DEBUG'
   35.23  
   35.24 +    """Default interface address xend listens at. """
   35.25 +    xend_address_default      = ''
   35.26 +
   35.27 +    """Default port xend serves HTTP at. """
   35.28 +    xend_port_default         = '8000'
   35.29 +
   35.30 +    """Default port xend serves events at. """
   35.31 +    xend_event_port_default   = '8001'
   35.32 +
   35.33 +    """Default inteface address xend listens at for consoles."""
   35.34 +    console_address_default   = ''
   35.35 +
   35.36 +    """Default port xend serves consoles at. """
   35.37 +    console_port_base_default = '9600'
   35.38 +
   35.39      components = {}
   35.40  
   35.41      def __init__(self):
   35.42          self.dbroot = None
   35.43          self.config_path = None
   35.44          self.config = None
   35.45 -        self.logger = None
   35.46 +        self.logging = None
   35.47          self.configure()
   35.48          eserver.subscribe('xend.*', self.event_handler)
   35.49          #eserver.subscribe('xend.domain.created', self.event_handler)
   35.50 @@ -73,9 +94,9 @@ class XendRoot:
   35.51  
   35.52      def _format(self, msg, args):
   35.53          if args:
   35.54 +            return str(msg) % args
   35.55 +        else:
   35.56              return str(msg)
   35.57 -        else:
   35.58 -            return str(msg) % args
   35.59  
   35.60      def _log(self, mode, fmt, args):
   35.61          """Logging function that uses the logger if it exists, otherwise
   35.62 @@ -90,7 +111,7 @@ class XendRoot:
   35.63          if log:
   35.64              getattr(log, mode)(fmt, *args)
   35.65          else:
   35.66 -            print >>stderr, "xend", "[%s]" % level, self._format(msg, args)
   35.67 +            print >>sys.stderr, "xend", "[%s]" % level, self._format(fmt, args)
   35.68  
   35.69      def logDebug(self, fmt, *args):
   35.70          """Log a debug message.
   35.71 @@ -132,7 +153,6 @@ class XendRoot:
   35.72          self.configure_logger()
   35.73          self.dbroot = self.get_config_value("dbroot", self.dbroot_default)
   35.74  
   35.75 -
   35.76      def configure_logger(self):
   35.77          logfile = self.get_config_value("logfile", self.logfile_default)
   35.78          loglevel = self.get_config_value("loglevel", self.loglevel_default)
   35.79 @@ -146,7 +166,7 @@ class XendRoot:
   35.80      def get_logger(self):
   35.81          """Get the logger.
   35.82          """
   35.83 -        return self.logging.getLogger()
   35.84 +        return self.logging and self.logging.getLogger()
   35.85  
   35.86      def get_dbroot(self):
   35.87          """Get the path to the database root.
   35.88 @@ -160,14 +180,20 @@ class XendRoot:
   35.89          """
   35.90          self.config_path = os.getenv(self.config_var, self.config_default)
   35.91          if os.path.exists(self.config_path):
   35.92 -            fin = file(self.config_path, 'rb')
   35.93 +            #self.logInfo('Reading config file %s', self.config_path)
   35.94              try:
   35.95 -                config = sxp.parse(fin)
   35.96 +                fin = file(self.config_path, 'rb')
   35.97 +                try:
   35.98 +                    config = sxp.parse(fin)
   35.99 +                finally:
  35.100 +                    fin.close()
  35.101                  config.insert(0, 'xend-config')
  35.102                  self.config = config
  35.103 -            finally:
  35.104 -                fin.close()
  35.105 +            except Exception, ex:
  35.106 +                self.logError('Reading config file %s: %s', self.config_path, str(ex))
  35.107 +                raise
  35.108          else:
  35.109 +            self.logError('Config file does not exist: %s', self.config_path)
  35.110              self.config = ['xend-config']
  35.111  
  35.112      def get_config(self, name=None):
  35.113 @@ -193,10 +219,35 @@ class XendRoot:
  35.114          return sxp.child_value(self.config, name, val=val)
  35.115  
  35.116      def get_xend_port(self):
  35.117 -        return int(self.get_config_value('xend-port', '8000'))
  35.118 +        """Get the port xend listens at for its HTTP interface.
  35.119 +        """
  35.120 +        return int(self.get_config_value('xend-port', self.xend_port_default))
  35.121 +
  35.122 +    def get_xend_event_port(self):
  35.123 +        """Get the port xend listens at for connection to its event server.
  35.124 +        """
  35.125 +        return int(self.get_config_value('xend-event-port', self.xend_event_port_default))
  35.126  
  35.127      def get_xend_address(self):
  35.128 -        return self.get_config_value('xend-address', '')
  35.129 +        """Get the address xend listens at for its HTTP and event ports.
  35.130 +        This defaults to the empty string which allows all hosts to connect.
  35.131 +        If this is set to 'localhost' only the localhost will be able to connect
  35.132 +        to the HTTP and event ports.
  35.133 +        """
  35.134 +        return self.get_config_value('xend-address', self.xend_address_default)
  35.135 +
  35.136 +    def get_console_address(self):
  35.137 +        """Get the address xend listens at for its console ports.
  35.138 +        This defaults to the empty string which allows all hosts to connect.
  35.139 +        If this is set to 'localhost' only the localhost will be able to connect
  35.140 +        to the console ports.
  35.141 +        """
  35.142 +        return self.get_config_value('console-address', self.console_address_default)
  35.143 +
  35.144 +    def get_console_port_base(self):
  35.145 +        """Get the base port number used to generate console ports for domains.
  35.146 +        """
  35.147 +        return int(self.get_config_value('console-port-base', self.console_port_base_default))
  35.148  
  35.149      def get_block_script(self, type):
  35.150          return self.get_config_value('block-%s' % type, '')
    36.1 --- a/tools/python/xen/xend/server/SrvBase.py	Sat Mar 12 21:43:58 2005 +0000
    36.2 +++ b/tools/python/xen/xend/server/SrvBase.py	Fri Mar 18 03:37:54 2005 +0000
    36.3 @@ -106,7 +106,8 @@ class SrvBase(resource.Resource):
    36.4          try:
    36.5              val = op_method(op, req)
    36.6          except Exception, err:
    36.7 -            return self._perform_err(err, op, req)
    36.8 +            self._perform_err(err, op, req)
    36.9 +            return ''
   36.10              
   36.11          if isinstance(val, defer.Deferred):
   36.12              val.addCallback(self._perform_cb, op, req, dfr=1)
    37.1 --- a/tools/python/xen/xend/server/SrvDaemon.py	Sat Mar 12 21:43:58 2005 +0000
    37.2 +++ b/tools/python/xen/xend/server/SrvDaemon.py	Fri Mar 18 03:37:54 2005 +0000
    37.3 @@ -598,10 +598,10 @@ class Daemon:
    37.4      def set_user(self):
    37.5          # Set the UID.
    37.6          try:
    37.7 -            os.setuid(pwd.getpwnam(USER)[2])
    37.8 +            os.setuid(pwd.getpwnam(XEND_USER)[2])
    37.9              return 0
   37.10          except KeyError, error:
   37.11 -            print "Error: no such user '%s'" % USER
   37.12 +            print "Error: no such user '%s'" % XEND_USER
   37.13              return 1
   37.14  
   37.15      def stop(self):
   37.16 @@ -611,7 +611,7 @@ class Daemon:
   37.17          xroot = XendRoot.instance()
   37.18          log.info("Xend Daemon started")
   37.19          self.createFactories()
   37.20 -        self.listenEvent()
   37.21 +        self.listenEvent(xroot)
   37.22          self.listenNotifier()
   37.23          self.listenVirq()
   37.24          SrvServer.create(bridge=1)
   37.25 @@ -625,9 +625,11 @@ class Daemon:
   37.26          self.usbifCF = usbif.UsbifControllerFactory()
   37.27          self.consoleCF = console.ConsoleControllerFactory()
   37.28  
   37.29 -    def listenEvent(self):
   37.30 +    def listenEvent(self, xroot):
   37.31          protocol = EventFactory(self)
   37.32 -        return reactor.listenTCP(EVENT_PORT, protocol)
   37.33 +        port = xroot.get_xend_event_port()
   37.34 +        interface = xroot.get_xend_address()
   37.35 +        return reactor.listenTCP(port, protocol, interface=interface)
   37.36  
   37.37      def listenNotifier(self):
   37.38          protocol = NotifierProtocol(self.channelF)
    38.1 --- a/tools/python/xen/xend/server/SrvDir.py	Sat Mar 12 21:43:58 2005 +0000
    38.2 +++ b/tools/python/xen/xend/server/SrvDir.py	Fri Mar 18 03:37:54 2005 +0000
    38.3 @@ -88,7 +88,7 @@ class SrvDir(SrvBase):
    38.4                  req.write('</body></html>')
    38.5              return ''
    38.6          except Exception, ex:
    38.7 -            self._perform_err(ex, req)
    38.8 +            self._perform_err(ex, "GET", req)
    38.9              
   38.10      def ls(self, req, use_sxp=0):
   38.11          url = req.prePathURL()
    39.1 --- a/tools/python/xen/xend/server/console.py	Sat Mar 12 21:43:58 2005 +0000
    39.2 +++ b/tools/python/xen/xend/server/console.py	Fri Mar 18 03:37:54 2005 +0000
    39.3 @@ -11,6 +11,8 @@ from xen.xend.XendError import XendError
    39.4  from xen.xend import EventServer
    39.5  eserver = EventServer.instance()
    39.6  from xen.xend.XendLogging import log
    39.7 +from xen.xend import XendRoot
    39.8 +xroot = XendRoot.instance()
    39.9  
   39.10  import controller
   39.11  from messages import *
   39.12 @@ -82,7 +84,7 @@ class ConsoleControllerFactory(controlle
   39.13  
   39.14      def createController(self, dom, console_port=None):
   39.15          if console_port is None:
   39.16 -            console_port = CONSOLE_PORT_BASE + dom
   39.17 +            console_port = xroot.get_console_port_base() + dom
   39.18          for c in self.getControllers():
   39.19              if c.console_port == console_port:
   39.20                  raise XendError('console port in use: ' + str(console_port))
   39.21 @@ -191,7 +193,8 @@ class ConsoleController(controller.Contr
   39.22              pass
   39.23          else:
   39.24              f = ConsoleFactory(self, self.idx)
   39.25 -            self.listener = reactor.listenTCP(self.console_port, f)
   39.26 +            interface = xroot.get_console_address()
   39.27 +            self.listener = reactor.listenTCP(self.console_port, f, interface=interface)
   39.28  
   39.29      def connect(self, addr, conn):
   39.30          """Connect a TCP connection to the console.
    40.1 --- a/tools/python/xen/xend/server/params.py	Sat Mar 12 21:43:58 2005 +0000
    40.2 +++ b/tools/python/xen/xend/server/params.py	Fri Mar 18 03:37:54 2005 +0000
    40.3 @@ -3,9 +3,5 @@ XEND_PID_FILE = '/var/run/xend.pid'
    40.4  XFRD_PID_FILE = '/var/run/xfrd.pid'
    40.5  XEND_TRACE_FILE = '/var/log/xend.trace'
    40.6  
    40.7 -USER = 'root'
    40.8 +XEND_USER = 'root'
    40.9  
   40.10 -EVENT_PORT = 8001
   40.11 -
   40.12 -CONSOLE_PORT_BASE = 9600
   40.13 -
    41.1 --- a/tools/tests/test_x86_emulator.c	Sat Mar 12 21:43:58 2005 +0000
    41.2 +++ b/tools/tests/test_x86_emulator.c	Fri Mar 18 03:37:54 2005 +0000
    41.3 @@ -26,7 +26,7 @@ static int read_any(
    41.4      case 4: *val = *(u32 *)addr; break;
    41.5      case 8: *val = *(unsigned long *)addr; break;
    41.6      }
    41.7 -    return 0;
    41.8 +    return X86EMUL_CONTINUE;
    41.9  }
   41.10  
   41.11  static int write_any(
   41.12 @@ -41,17 +41,15 @@ static int write_any(
   41.13      case 4: *(u32 *)addr = (u32)val; break;
   41.14      case 8: *(unsigned long *)addr = val; break;
   41.15      }
   41.16 -    return 0;
   41.17 +    return X86EMUL_CONTINUE;
   41.18  }
   41.19  
   41.20  static int cmpxchg_any(
   41.21      unsigned long addr,
   41.22      unsigned long old,
   41.23      unsigned long new,
   41.24 -    unsigned long *seen,
   41.25      unsigned int bytes)
   41.26  {
   41.27 -    *seen = old;
   41.28      switch ( bytes )
   41.29      {
   41.30      case 1: *(u8 *)addr = (u8)new; break;
   41.31 @@ -59,7 +57,7 @@ static int cmpxchg_any(
   41.32      case 4: *(u32 *)addr = (u32)new; break;
   41.33      case 8: *(unsigned long *)addr = new; break;
   41.34      }
   41.35 -    return 0;
   41.36 +    return X86EMUL_CONTINUE;
   41.37  }
   41.38  
   41.39  static struct x86_mem_emulator emulops = {
    42.1 --- a/tools/xcs/evtchn.c	Sat Mar 12 21:43:58 2005 +0000
    42.2 +++ b/tools/xcs/evtchn.c	Fri Mar 18 03:37:54 2005 +0000
    42.3 @@ -95,8 +95,6 @@ int evtchn_open(void)
    42.4              goto reopen;
    42.5          return -errno;
    42.6      }
    42.7 -    /*set_cloexec(evtchn_fd); -- no longer required*/
    42.8 -printf("Eventchan_fd is %d\n", evtchn_fd);
    42.9      return evtchn_fd;
   42.10  }
   42.11  
    43.1 --- a/tools/xcs/xcs.c	Sat Mar 12 21:43:58 2005 +0000
    43.2 +++ b/tools/xcs/xcs.c	Fri Mar 18 03:37:54 2005 +0000
    43.3 @@ -75,6 +75,7 @@
    43.4  #include <errno.h>
    43.5  #include <malloc.h>
    43.6  #include <fcntl.h>
    43.7 +#include <ctype.h>
    43.8  #include "xcs.h"
    43.9  
   43.10  #undef fd_max
   43.11 @@ -97,7 +98,7 @@ static void map_dom_to_port(u32 dom, int
   43.12              exit(1);
   43.13          }
   43.14  
   43.15 -        for (; dom_port_map_size < dom + 10; dom_port_map_size++) {
   43.16 +        for (; dom_port_map_size < dom + 256; dom_port_map_size++) {
   43.17              dom_port_map[dom_port_map_size] = -1;
   43.18          }
   43.19      }
   43.20 @@ -123,7 +124,7 @@ static control_channel_t *add_interface(
   43.21      control_channel_t *cc=NULL, *oldcc;
   43.22      int ret;
   43.23      
   43.24 -    if (cc_list[dom_to_port(dom)] != NULL)
   43.25 +    if ((dom_to_port(dom) >= 0) && (cc_list[dom_to_port(dom)] != NULL))
   43.26      {
   43.27          return(cc_list[dom_to_port(dom)]);
   43.28      }
   43.29 @@ -155,10 +156,13 @@ static control_channel_t *add_interface(
   43.30          if ((oldcc->remote_dom != cc->remote_dom) ||
   43.31              (oldcc->remote_port != cc->remote_port))
   43.32          {
   43.33 -            DPRINTF("CC conflict! (port: %d, old dom: %u, new dom: %u)\n",
   43.34 -                    cc->local_port, oldcc->remote_dom, cc->remote_dom);
   43.35 +            DPRINTF("CC conflict! (port: %d, old dom: %u, new dom: %u, "
   43.36 +                    "old ref_count: %d)\n",
   43.37 +                    cc->local_port, oldcc->remote_dom, cc->remote_dom, 
   43.38 +                    oldcc->ref_count);
   43.39              map_dom_to_port(oldcc->remote_dom, -1);
   43.40              ctrl_chan_free(cc_list[cc->local_port]);
   43.41 +            cc_list[cc->local_port] = NULL;
   43.42          }
   43.43      }
   43.44       
   43.45 @@ -210,6 +214,8 @@ void put_interface(control_channel_t *cc
   43.46          {
   43.47              DPRINTF("Freeing cc on port %d.\n", cc->local_port);
   43.48              (void)evtchn_unbind(cc->local_port);
   43.49 +            cc_list[cc->local_port] = NULL;
   43.50 +            map_dom_to_port(cc->remote_dom, -1);
   43.51              ctrl_chan_free(cc);
   43.52          }
   43.53      }
   43.54 @@ -245,7 +251,7 @@ static int listen_socket (char *listen_p
   43.55          close (s);
   43.56          return -1;
   43.57      }
   43.58 -    printf ("accepting connections on path %s\n", listen_path);
   43.59 +    DPRINTF ("accepting connections on path %s\n", listen_path);
   43.60      listen (s, 10);
   43.61      return s;
   43.62  }
   43.63 @@ -623,13 +629,66 @@ void gc_ufd_list( unbound_fd_t **ufd )
   43.64      }
   43.65  }
   43.66  
   43.67 +void daemonize_xcs(void)
   43.68 +{
   43.69 +    
   43.70 +    /* detach from our controlling tty so that a shell does hang waiting for
   43.71 +       stopped jobs. */
   43.72 +    
   43.73 +    pid_t pid = fork();
   43.74 +    int fd;
   43.75 +
   43.76 +    if (pid == -1) {
   43.77 +	    perror("fork()");
   43.78 +    } else if (pid) {
   43.79 +	    exit(0);
   43.80 +    }
   43.81 +
   43.82 +    fd = open("/var/log/xcs.log", O_WRONLY | O_APPEND | O_CREAT);
   43.83 +    if ( fd == -1 ) {
   43.84 +        fprintf(stderr, "xcs couldn't open logfile.  Directing all output to "
   43.85 +                "/dev/null instead.\n");
   43.86 +        fd = open("/dev/null", O_WRONLY);
   43.87 +    }
   43.88 +    
   43.89 +    setsid();
   43.90 +    close(2);
   43.91 +    close(1);
   43.92 +    close(0);
   43.93 +    dup(fd);
   43.94 +    dup(fd);
   43.95 +}
   43.96 +
   43.97 +
   43.98 +static char *pidfilename = NULL;
   43.99 +void cleanup(int sig)
  43.100 +{
  43.101 +    /* throw away our pidfile if we created one. */
  43.102 +    if ( pidfilename != NULL ) 
  43.103 +        unlink(pidfilename);
  43.104 +    exit(0);
  43.105 +}
  43.106 +
  43.107  int main (int argc, char *argv[])
  43.108  {
  43.109      int listen_fd, evtchn_fd;
  43.110      unbound_fd_t *unbound_fd_list = NULL, **ufd;
  43.111      struct timeval timeout = { XCS_GC_INTERVAL, 0 };
  43.112      connection_t **con;
  43.113 +    int c, daemonize;
  43.114 +    FILE *pidfile;
  43.115 +    struct stat s;
  43.116 +    
  43.117 +    daemonize = 1;
  43.118 +    pidfile = NULL;
  43.119  
  43.120 +    signal(SIGHUP, cleanup);
  43.121 +    signal(SIGTERM, cleanup);
  43.122 +    signal(SIGINT, cleanup);
  43.123 +    
  43.124 +    /* Do a bunch of stuff before potentially daemonizing so we can 
  43.125 +     * print error messages sanely before redirecting output. */
  43.126 +    
  43.127      /* Initialize xc and event connections. */
  43.128      if (ctrl_chan_init() != 0)
  43.129      {
  43.130 @@ -643,35 +702,61 @@ int main (int argc, char *argv[])
  43.131          exit(-1);
  43.132      }
  43.133     
  43.134 +    /* Bind listen_fd to the client socket. */
  43.135 +    listen_fd = listen_socket(XCS_SUN_PATH);
  43.136 +     
  43.137 +    while ((c = getopt (argc, argv, "ip:")) != -1)
  43.138 +    {
  43.139 +        switch (c)
  43.140 +        {
  43.141 +        case 'i': /* interactive */
  43.142 +            daemonize = 0;
  43.143 +            break;
  43.144 +        case 'p': /* pid file */
  43.145 +            pidfilename = optarg;
  43.146 +            break;          
  43.147 +        case '?':
  43.148 +            if (isprint (optopt))
  43.149 +                fprintf (stderr, "Unknown option `-%c'.\n", optopt);
  43.150 +            else
  43.151 +                fprintf (stderr,
  43.152 +                    "Bad option character `\\x%x'.\n", optopt);
  43.153 +            break;
  43.154 +        }    
  43.155 +    }
  43.156 +    
  43.157 +    if ( pidfilename != NULL )
  43.158 +    {
  43.159 +        if ( stat(pidfilename, &s) == 0 )
  43.160 +        {
  43.161 +            fprintf(stderr, "Thre specified pid file (%s) already exists.\n"
  43.162 +                    "Is another instance of xcs running?\n", pidfilename);
  43.163 +            exit(-1);
  43.164 +        }
  43.165 +
  43.166 +        pidfile = fopen(pidfilename, "w");
  43.167 +        if (pidfile == NULL)
  43.168 +        {
  43.169 +            fprintf(stderr, "Error openning pidfile (%s).\n", pidfilename);
  43.170 +            exit(-1);
  43.171 +        }
  43.172 +    }
  43.173 +        
  43.174 +    if (daemonize == 1) 
  43.175 +        daemonize_xcs();
  43.176 +    
  43.177 +    if (pidfile != NULL)
  43.178 +    {
  43.179 +        fprintf(pidfile, "%d", getpid());
  43.180 +        fclose(pidfile); 
  43.181 +    }
  43.182 +    
  43.183 +    
  43.184      /* Initialize control interfaces, bindings. */
  43.185      init_interfaces();
  43.186      init_bindings();
  43.187      
  43.188 -    listen_fd = listen_socket(XCS_SUN_PATH);
  43.189     
  43.190 -    /* detach from our controlling tty so that a shell does hang waiting for
  43.191 -       stopped jobs. */
  43.192 -    /* we should use getopt() here */
  43.193 -
  43.194 -    if (!(argc == 2 && !strcmp(argv[1], "-i"))) {
  43.195 -	pid_t pid = fork();
  43.196 -	int fd;
  43.197 -
  43.198 -	if (pid == -1) {
  43.199 -		perror("fork()");
  43.200 -	} else if (pid) {
  43.201 -		exit(0);
  43.202 -	}
  43.203 -
  43.204 -    	setsid();
  43.205 -	close(2);
  43.206 -	close(1);
  43.207 -	close(0);
  43.208 -	fd = open("/dev/null", O_RDWR);
  43.209 -	dup(fd);
  43.210 -	dup(fd);
  43.211 -    }
  43.212 - 
  43.213      for (;;)
  43.214      {
  43.215          int n = 0, ret;
    44.1 --- a/xen/Rules.mk	Sat Mar 12 21:43:58 2005 +0000
    44.2 +++ b/xen/Rules.mk	Fri Mar 18 03:37:54 2005 +0000
    44.3 @@ -4,6 +4,7 @@ debug       ?= n
    44.4  perfc       ?= n
    44.5  trace       ?= n
    44.6  optimize    ?= y
    44.7 +domu_debug  ?= n
    44.8  crash_debug ?= n
    44.9  
   44.10  # Currently supported architectures: x86_32, x86_64
   44.11 @@ -54,6 +55,10 @@ else
   44.12  CFLAGS += -DVERBOSE
   44.13  endif
   44.14  
   44.15 +ifeq ($(domu_debug),y)
   44.16 +CFLAGS += -DDOMU_DEBUG
   44.17 +endif
   44.18 +
   44.19  ifeq ($(crash_debug),y)
   44.20  CFLAGS += -g -DCRASH_DEBUG
   44.21  endif
    45.1 --- a/xen/arch/ia64/domain.c	Sat Mar 12 21:43:58 2005 +0000
    45.2 +++ b/xen/arch/ia64/domain.c	Fri Mar 18 03:37:54 2005 +0000
    45.3 @@ -200,7 +200,7 @@ void arch_do_boot_vcpu(struct exec_domai
    45.4  	return;
    45.5  }
    45.6  
    45.7 -int arch_final_setup_guest(struct exec_domain *p, full_execution_context_t *c)
    45.8 +int arch_set_info_guest(struct exec_domain *p, full_execution_context_t *c)
    45.9  {
   45.10  	dummy();
   45.11  	return 1;
    46.1 --- a/xen/arch/x86/domain.c	Sat Mar 12 21:43:58 2005 +0000
    46.2 +++ b/xen/arch/x86/domain.c	Fri Mar 18 03:37:54 2005 +0000
    46.3 @@ -423,13 +423,23 @@ out:
    46.4  
    46.5  
    46.6  /* This is called by arch_final_setup_guest and do_boot_vcpu */
    46.7 -int arch_final_setup_guest(
    46.8 +int arch_set_info_guest(
    46.9      struct exec_domain *ed, full_execution_context_t *c)
   46.10  {
   46.11      struct domain *d = ed->domain;
   46.12      unsigned long phys_basetab;
   46.13      int i, rc;
   46.14  
   46.15 +    /*
   46.16 +     * This is sufficient! If the descriptor DPL differs from CS RPL then we'll
   46.17 +     * #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically.
   46.18 +     * If SS RPL or DPL differs from CS RPL then we'll #GP.
   46.19 +     */
   46.20 +    if (!(c->flags & ECF_VMX_GUEST)) 
   46.21 +        if ( ((c->cpu_ctxt.cs & 3) == 0) ||
   46.22 +             ((c->cpu_ctxt.ss & 3) == 0) )
   46.23 +                return -EINVAL;
   46.24 +
   46.25      clear_bit(EDF_DONEFPUINIT, &ed->ed_flags);
   46.26      if ( c->flags & ECF_I387_VALID )
   46.27          set_bit(EDF_DONEFPUINIT, &ed->ed_flags);
   46.28 @@ -441,6 +451,11 @@ int arch_final_setup_guest(
   46.29      memcpy(&ed->arch.user_ctxt,
   46.30             &c->cpu_ctxt,
   46.31             sizeof(ed->arch.user_ctxt));
   46.32 +
   46.33 +    memcpy(&ed->arch.i387,
   46.34 +           &c->fpu_ctxt,
   46.35 +           sizeof(ed->arch.i387));
   46.36 +
   46.37      /* IOPL privileges are virtualised. */
   46.38      ed->arch.iopl = (ed->arch.user_ctxt.eflags >> 12) & 3;
   46.39      ed->arch.user_ctxt.eflags &= ~EF_IOPL;
   46.40 @@ -449,19 +464,8 @@ int arch_final_setup_guest(
   46.41      if (!IS_PRIV(d))
   46.42          ed->arch.user_ctxt.eflags &= 0xffffcfff;
   46.43  
   46.44 -    /*
   46.45 -     * This is sufficient! If the descriptor DPL differs from CS RPL then we'll
   46.46 -     * #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically.
   46.47 -     * If SS RPL or DPL differs from CS RPL then we'll #GP.
   46.48 -     */
   46.49 -    if (!(c->flags & ECF_VMX_GUEST)) 
   46.50 -        if ( ((ed->arch.user_ctxt.cs & 3) == 0) ||
   46.51 -             ((ed->arch.user_ctxt.ss & 3) == 0) )
   46.52 -                return -EINVAL;
   46.53 -
   46.54 -    memcpy(&ed->arch.i387,
   46.55 -           &c->fpu_ctxt,
   46.56 -           sizeof(ed->arch.i387));
   46.57 +    if (test_bit(EDF_DONEINIT, &ed->ed_flags))
   46.58 +        return 0;
   46.59  
   46.60      memcpy(ed->arch.traps,
   46.61             &c->trap_ctxt,
   46.62 @@ -509,10 +513,14 @@ int arch_final_setup_guest(
   46.63  #endif
   46.64  
   46.65      update_pagetables(ed);
   46.66 +    
   46.67 +    /* Don't redo final setup */
   46.68 +    set_bit(EDF_DONEINIT, &ed->ed_flags);
   46.69  
   46.70      return 0;
   46.71  }
   46.72  
   46.73 +
   46.74  void new_thread(struct exec_domain *d,
   46.75                  unsigned long start_pc,
   46.76                  unsigned long start_stack,
   46.77 @@ -647,6 +655,11 @@ static void switch_segments(
   46.78              (unsigned long *)regs->rsp : 
   46.79              (unsigned long *)n->arch.kernel_sp;
   46.80  
   46.81 +        if ( !(n->arch.flags & TF_kernel_mode) )
   46.82 +            toggle_guest_mode(n);
   46.83 +        else
   46.84 +            regs->cs &= ~3;
   46.85 +
   46.86          if ( put_user(regs->ss,     rsp- 1) |
   46.87               put_user(regs->rsp,    rsp- 2) |
   46.88               put_user(regs->rflags, rsp- 3) |
   46.89 @@ -663,9 +676,6 @@ static void switch_segments(
   46.90              domain_crash();
   46.91          }
   46.92  
   46.93 -        if ( !(n->arch.flags & TF_kernel_mode) )
   46.94 -            toggle_guest_mode(n);
   46.95 -
   46.96          regs->entry_vector  = TRAP_syscall;
   46.97          regs->rflags       &= 0xFFFCBEFFUL;
   46.98          regs->ss            = __GUEST_SS;
   46.99 @@ -688,10 +698,10 @@ long do_switch_to_user(void)
  46.100      toggle_guest_mode(ed);
  46.101  
  46.102      regs->rip    = stu.rip;
  46.103 -    regs->cs     = stu.cs;
  46.104 +    regs->cs     = stu.cs | 3; /* force guest privilege */
  46.105      regs->rflags = stu.rflags;
  46.106      regs->rsp    = stu.rsp;
  46.107 -    regs->ss     = stu.ss;
  46.108 +    regs->ss     = stu.ss | 3; /* force guest privilege */
  46.109  
  46.110      if ( !(stu.flags & ECF_IN_SYSCALL) )
  46.111      {
  46.112 @@ -754,20 +764,14 @@ void context_switch(struct exec_domain *
  46.113              loaddebug(&next_p->arch, 7);
  46.114          }
  46.115  
  46.116 -#ifdef CONFIG_VMX
  46.117          if ( VMX_DOMAIN(next_p) )
  46.118          {
  46.119 -            /* Switch page tables. */
  46.120              write_ptbase(next_p);
  46.121 - 
  46.122              set_current(next_p);
  46.123 -            /* Switch GDT and LDT. */
  46.124              __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->arch.gdt));
  46.125 -
  46.126              __sti();
  46.127              goto done;
  46.128          }
  46.129 -#endif
  46.130   
  46.131          SET_FAST_TRAP(&next_p->arch);
  46.132  
    47.1 --- a/xen/arch/x86/mm.c	Sat Mar 12 21:43:58 2005 +0000
    47.2 +++ b/xen/arch/x86/mm.c	Fri Mar 18 03:37:54 2005 +0000
    47.3 @@ -101,6 +101,7 @@
    47.4  #include <asm/uaccess.h>
    47.5  #include <asm/domain_page.h>
    47.6  #include <asm/ldt.h>
    47.7 +#include <asm/x86_emulate.h>
    47.8  
    47.9  #ifdef VERBOSE
   47.10  #define MEM_LOG(_f, _a...)                           \
   47.11 @@ -256,11 +257,21 @@ int map_ldt_shadow_page(unsigned int off
   47.12      struct domain *d = ed->domain;
   47.13      unsigned long l1e;
   47.14  
   47.15 -    if ( unlikely(in_irq()) )
   47.16 -        BUG();
   47.17 -
   47.18 +#if defined(__x86_64__)
   47.19 +    /* If in user mode, switch to kernel mode just to read LDT mapping. */
   47.20 +    extern void toggle_guest_mode(struct exec_domain *);
   47.21 +    int user_mode = !(ed->arch.flags & TF_kernel_mode);
   47.22 +#define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(ed)
   47.23 +#elif defined(__i386__)
   47.24 +#define TOGGLE_MODE() ((void)0)
   47.25 +#endif
   47.26 +
   47.27 +    BUG_ON(unlikely(in_irq()));
   47.28 +
   47.29 +    TOGGLE_MODE();
   47.30      __get_user(l1e, (unsigned long *)
   47.31                 &linear_pg_table[l1_linear_offset(ed->arch.ldt_base) + off]);
   47.32 +    TOGGLE_MODE();
   47.33  
   47.34      if ( unlikely(!(l1e & _PAGE_PRESENT)) ||
   47.35           unlikely(!get_page_and_type(
   47.36 @@ -1928,12 +1939,13 @@ void update_shadow_va_mapping(unsigned l
   47.37          &shadow_linear_pg_table[l1_linear_offset(va)])))) )
   47.38      {
   47.39          /*
   47.40 -         * Since L2's are guranteed RW, failure indicates either that the
   47.41 +         * Since L2's are guaranteed RW, failure indicates either that the
   47.42           * page was not shadowed, or that the L2 entry has not yet been
   47.43           * updated to reflect the shadow.
   47.44           */
   47.45 -        if ( shadow_mode_external(current->domain) )
   47.46 -            BUG(); // can't use linear_l2_table with external tables.
   47.47 +
   47.48 +        /* Can't use linear_l2_table with external tables. */
   47.49 +        BUG_ON(shadow_mode_external(current->domain));
   47.50  
   47.51          l2_pgentry_t gpde = linear_l2_table[l2_table_offset(va)];
   47.52          unsigned long gpfn = l2_pgentry_val(gpde) >> PAGE_SHIFT;
   47.53 @@ -2283,9 +2295,7 @@ void ptwr_flush(const int which)
   47.54      int            i, cpu = smp_processor_id();
   47.55      struct exec_domain *ed = current;
   47.56      struct domain *d = ed->domain;
   47.57 -#ifdef PERF_COUNTERS
   47.58      unsigned int   modified = 0;
   47.59 -#endif
   47.60  
   47.61      l1va = ptwr_info[cpu].ptinfo[which].l1va;
   47.62      ptep = (unsigned long *)&linear_pg_table[l1_linear_offset(l1va)];
   47.63 @@ -2333,11 +2343,7 @@ void ptwr_flush(const int which)
   47.64  
   47.65      /* Ensure that there are no stale writable mappings in any TLB. */
   47.66      /* NB. INVLPG is a serialising instruction: flushes pending updates. */
   47.67 -#if 1
   47.68      __flush_tlb_one(l1va); /* XXX Multi-CPU guests? */
   47.69 -#else
   47.70 -    flush_tlb_all();
   47.71 -#endif
   47.72      PTWR_PRINTK("[%c] disconnected_l1va at %p now %p\n",
   47.73                  PTWR_PRINT_WHICH, ptep, pte);
   47.74  
   47.75 @@ -2354,10 +2360,8 @@ void ptwr_flush(const int which)
   47.76          if ( likely(l1_pgentry_val(ol1e) == l1_pgentry_val(nl1e)) )
   47.77              continue;
   47.78  
   47.79 -#ifdef PERF_COUNTERS
   47.80          /* Update number of entries modified. */
   47.81          modified++;
   47.82 -#endif
   47.83  
   47.84          /*
   47.85           * Fast path for PTEs that have merely been write-protected
   47.86 @@ -2400,6 +2404,8 @@ void ptwr_flush(const int which)
   47.87      unmap_domain_mem(pl1e);
   47.88  
   47.89      perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
   47.90 +    ptwr_info[cpu].ptinfo[which].prev_exec_domain = ed;
   47.91 +    ptwr_info[cpu].ptinfo[which].prev_nr_updates  = modified;
   47.92  
   47.93      /*
   47.94       * STEP 3. Reattach the L1 p.t. page into the current address space.
   47.95 @@ -2424,6 +2430,133 @@ void ptwr_flush(const int which)
   47.96      }
   47.97  }
   47.98  
   47.99 +static int ptwr_emulated_update(
  47.100 +    unsigned long addr,
  47.101 +    unsigned long old,
  47.102 +    unsigned long val,
  47.103 +    unsigned int bytes,
  47.104 +    unsigned int do_cmpxchg)
  47.105 +{
  47.106 +    unsigned long sstat, pte, pfn;
  47.107 +    struct pfn_info *page;
  47.108 +    l1_pgentry_t ol1e, nl1e, *pl1e, *sl1e;
  47.109 +    struct domain *d = current->domain;
  47.110 +
  47.111 +    /* Aligned access only, thank you. */
  47.112 +    if ( (addr & (bytes-1)) != 0 )
  47.113 +    {
  47.114 +        MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %p)\n",
  47.115 +                bytes, addr);
  47.116 +        return X86EMUL_UNHANDLEABLE;
  47.117 +    }
  47.118 +
  47.119 +    /* Turn a sub-word access into a full-word access. */
  47.120 +    if ( (addr & ((BITS_PER_LONG/8)-1)) != 0 )
  47.121 +    {
  47.122 +        int           rc;
  47.123 +        unsigned long full;
  47.124 +        unsigned int  mask = addr & ((BITS_PER_LONG/8)-1);
  47.125 +        /* Align address; read full word. */
  47.126 +        addr &= ~((BITS_PER_LONG/8)-1);
  47.127 +        if ( (rc = x86_emulate_read_std(addr, &full, BITS_PER_LONG/8)) )
  47.128 +            return rc;
  47.129 +        /* Mask out bits provided by caller. */
  47.130 +        full &= ~((1UL << (bytes*8)) - 1UL) << (mask*8);
  47.131 +        /* Shift the caller value and OR in the missing bits. */
  47.132 +        val  &= (1UL << (bytes*8)) - 1UL;
  47.133 +        val <<= mask*8;
  47.134 +        val  |= full;
  47.135 +    }
  47.136 +
  47.137 +    /* Read the PTE that maps the page being updated. */
  47.138 +    if ( __get_user(pte, (unsigned long *)
  47.139 +                    &linear_pg_table[l1_linear_offset(addr)]) )
  47.140 +    {
  47.141 +        MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table\n");
  47.142 +        return X86EMUL_UNHANDLEABLE;
  47.143 +    }
  47.144 +
  47.145 +    pfn  = pte >> PAGE_SHIFT;
  47.146 +    page = &frame_table[pfn];
  47.147 +
  47.148 +    /* We are looking only for read-only mappings of p.t. pages. */
  47.149 +    if ( ((pte & (_PAGE_RW | _PAGE_PRESENT)) != _PAGE_PRESENT) ||
  47.150 +         ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) )
  47.151 +    {
  47.152 +        MEM_LOG("ptwr_emulate: Page is mistyped or bad pte (%p, %x)\n",
  47.153 +                pte, page->u.inuse.type_info);
  47.154 +        return X86EMUL_UNHANDLEABLE;
  47.155 +    }
  47.156 +
  47.157 +    /* Check the new PTE. */
  47.158 +    nl1e = mk_l1_pgentry(val);
  47.159 +    if ( unlikely(!get_page_from_l1e(nl1e, d)) )
  47.160 +        return X86EMUL_UNHANDLEABLE;
  47.161 +
  47.162 +    /* Checked successfully: do the update (write or cmpxchg). */
  47.163 +    pl1e = map_domain_mem(page_to_phys(page) + (addr & ~PAGE_MASK));
  47.164 +    if ( do_cmpxchg )
  47.165 +    {
  47.166 +        ol1e = mk_l1_pgentry(old);
  47.167 +        if ( cmpxchg((unsigned long *)pl1e, old, val) != old )
  47.168 +        {
  47.169 +            unmap_domain_mem(pl1e);
  47.170 +            return X86EMUL_CMPXCHG_FAILED;
  47.171 +        }
  47.172 +    }
  47.173 +    else
  47.174 +    {
  47.175 +        ol1e  = *pl1e;
  47.176 +        *pl1e = nl1e;
  47.177 +    }
  47.178 +    unmap_domain_mem(pl1e);
  47.179 +
  47.180 +    /* Propagate update to shadow cache. */
  47.181 +    if ( unlikely(shadow_mode_enabled(d)) )
  47.182 +    {
  47.183 +        sstat = get_shadow_status(d, page_to_pfn(page));
  47.184 +        if ( sstat & PSH_shadowed )
  47.185 +        {
  47.186 +            sl1e = map_domain_mem(
  47.187 +                ((sstat & PSH_pfn_mask) << PAGE_SHIFT) + (addr & ~PAGE_MASK));
  47.188 +            l1pte_propagate_from_guest(
  47.189 +                d, &l1_pgentry_val(nl1e), &l1_pgentry_val(*sl1e));
  47.190 +            unmap_domain_mem(sl1e);
  47.191 +        }
  47.192 +    }
  47.193 +
  47.194 +    /* Finally, drop the old PTE. */
  47.195 +    if ( unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT) )
  47.196 +        put_page_from_l1e(ol1e, d);
  47.197 +
  47.198 +    return X86EMUL_CONTINUE;
  47.199 +}
  47.200 +
  47.201 +static int ptwr_emulated_write(
  47.202 +    unsigned long addr,
  47.203 +    unsigned long val,
  47.204 +    unsigned int bytes)
  47.205 +{
  47.206 +    return ptwr_emulated_update(addr, 0, val, bytes, 0);
  47.207 +}
  47.208 +
  47.209 +static int ptwr_emulated_cmpxchg(
  47.210 +    unsigned long addr,
  47.211 +    unsigned long old,
  47.212 +    unsigned long new,
  47.213 +    unsigned int bytes)
  47.214 +{
  47.215 +    return ptwr_emulated_update(addr, old, new, bytes, 1);
  47.216 +}
  47.217 +
  47.218 +static struct x86_mem_emulator ptwr_mem_emulator = {
  47.219 +    .read_std         = x86_emulate_read_std,
  47.220 +    .write_std        = x86_emulate_write_std,
  47.221 +    .read_emulated    = x86_emulate_read_std,
  47.222 +    .write_emulated   = ptwr_emulated_write,
  47.223 +    .cmpxchg_emulated = ptwr_emulated_cmpxchg
  47.224 +};
  47.225 +
  47.226  /* Write page fault handler: check if guest is trying to modify a PTE. */
  47.227  int ptwr_do_page_fault(unsigned long addr)
  47.228  {
  47.229 @@ -2437,13 +2570,13 @@ int ptwr_do_page_fault(unsigned long add
  47.230      return 0; /* Writable pagetables need fixing for x86_64. */
  47.231  #endif
  47.232  
  47.233 +    /* Can't use linear_l2_table with external tables. */
  47.234 +    BUG_ON(shadow_mode_external(current->domain));
  47.235 +
  47.236      /*
  47.237       * Attempt to read the PTE that maps the VA being accessed. By checking for
  47.238       * PDE validity in the L2 we avoid many expensive fixups in __get_user().
  47.239       */
  47.240 -    if ( shadow_mode_external(current->domain) )
  47.241 -        BUG(); // can't use linear_l2_table with external tables.
  47.242 -
  47.243      if ( !(l2_pgentry_val(linear_l2_table[addr>>L2_PAGETABLE_SHIFT]) &
  47.244             _PAGE_PRESENT) ||
  47.245           __get_user(pte, (unsigned long *)
  47.246 @@ -2461,47 +2594,35 @@ int ptwr_do_page_fault(unsigned long add
  47.247      {
  47.248          return 0;
  47.249      }
  47.250 -    
  47.251 +
  47.252      /* Get the L2 index at which this L1 p.t. is always mapped. */
  47.253      l2_idx = page->u.inuse.type_info & PGT_va_mask;
  47.254      if ( unlikely(l2_idx >= PGT_va_unknown) )
  47.255 -    {
  47.256 -        domain_crash(); /* Urk! This L1 is mapped in multiple L2 slots! */
  47.257 -    }
  47.258 +        goto emulate; /* Urk! This L1 is mapped in multiple L2 slots! */
  47.259      l2_idx >>= PGT_va_shift;
  47.260  
  47.261 -    if ( l2_idx == (addr >> L2_PAGETABLE_SHIFT) )
  47.262 -    {
  47.263 -        MEM_LOG("PTWR failure! Pagetable maps itself at %p\n", addr);
  47.264 -        domain_crash();
  47.265 -    }
  47.266 +    if ( unlikely(l2_idx == (addr >> L2_PAGETABLE_SHIFT)) )
  47.267 +        goto emulate; /* Urk! Pagetable maps itself! */
  47.268  
  47.269      /*
  47.270       * Is the L1 p.t. mapped into the current address space? If so we call it
  47.271       * an ACTIVE p.t., otherwise it is INACTIVE.
  47.272       */
  47.273 -    if ( shadow_mode_external(current->domain) )
  47.274 -        BUG(); // can't use linear_l2_table with external tables.
  47.275 -
  47.276      pl2e = &linear_l2_table[l2_idx];
  47.277      l2e  = l2_pgentry_val(*pl2e);
  47.278      which = PTWR_PT_INACTIVE;
  47.279      if ( (l2e >> PAGE_SHIFT) == pfn )
  47.280      {
  47.281 -        /* Check the PRESENT bit to set ACTIVE. */
  47.282 -        if ( likely(l2e & _PAGE_PRESENT) )
  47.283 +        /*
  47.284 +         * Check the PRESENT bit to set ACTIVE mode.
  47.285 +         * If the PRESENT bit is clear, we may be conflicting with the current 
  47.286 +         * ACTIVE p.t. (it may be the same p.t. mapped at another virt addr).
  47.287 +         * The ptwr_flush call below will restore the PRESENT bit.
  47.288 +         */
  47.289 +        if ( likely(l2e & _PAGE_PRESENT) ||
  47.290 +             (ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va &&
  47.291 +              (l2_idx == ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx)) )
  47.292              which = PTWR_PT_ACTIVE;
  47.293 -        else {
  47.294 -            /*
  47.295 -             * If the PRESENT bit is clear, we may be conflicting with
  47.296 -             * the current ACTIVE p.t. (it may be the same p.t. mapped
  47.297 -             * at another virt addr).
  47.298 -             * The ptwr_flush call below will restore the PRESENT bit.
  47.299 -             */
  47.300 -            if ( ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va &&
  47.301 -                 l2_idx == ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx )
  47.302 -                which = PTWR_PT_ACTIVE;
  47.303 -        }
  47.304      }
  47.305      
  47.306      PTWR_PRINTK("[%c] page_fault on l1 pt at va %p, pt for %08x, "
  47.307 @@ -2515,6 +2636,18 @@ int ptwr_do_page_fault(unsigned long add
  47.308      if ( ptwr_info[cpu].ptinfo[which].l1va )
  47.309          ptwr_flush(which);
  47.310  
  47.311 +    /*
  47.312 +     * If last batch made no updates then we are probably stuck. Emulate this 
  47.313 +     * update to ensure we make progress.
  47.314 +     */
  47.315 +    if ( (ptwr_info[cpu].ptinfo[which].prev_exec_domain == current) &&
  47.316 +         (ptwr_info[cpu].ptinfo[which].prev_nr_updates  == 0) )
  47.317 +    {
  47.318 +        /* Force non-emul next time, or we can get stuck emulating forever. */
  47.319 +        ptwr_info[cpu].ptinfo[which].prev_exec_domain = NULL;
  47.320 +        goto emulate;
  47.321 +    }
  47.322 +
  47.323      ptwr_info[cpu].ptinfo[which].l1va   = addr | 1;
  47.324      ptwr_info[cpu].ptinfo[which].l2_idx = l2_idx;
  47.325      
  47.326 @@ -2523,11 +2656,7 @@ int ptwr_do_page_fault(unsigned long add
  47.327           likely(!shadow_mode_enabled(current->domain)) )
  47.328      {
  47.329          *pl2e = mk_l2_pgentry(l2e & ~_PAGE_PRESENT);
  47.330 -#if 1
  47.331          flush_tlb(); /* XXX Multi-CPU guests? */
  47.332 -#else
  47.333 -        flush_tlb_all();
  47.334 -#endif
  47.335      }
  47.336      
  47.337      /* Temporarily map the L1 page, and make a copy of it. */
  47.338 @@ -2552,6 +2681,13 @@ int ptwr_do_page_fault(unsigned long add
  47.339      }
  47.340      
  47.341      return EXCRET_fault_fixed;
  47.342 +
  47.343 + emulate:
  47.344 +    if ( x86_emulate_memop(get_execution_context(), addr,
  47.345 +                           &ptwr_mem_emulator, BITS_PER_LONG/8) )
  47.346 +        return 0;
  47.347 +    perfc_incrc(ptwr_emulations);
  47.348 +    return EXCRET_fault_fixed;
  47.349  }
  47.350  
  47.351  static __init int ptwr_init(void)
  47.352 @@ -2751,8 +2887,7 @@ void audit_domain(struct domain *d)
  47.353          pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;       
  47.354          page = &frame_table[pfn];
  47.355  
  47.356 -        if ( page_get_owner(page) != d )
  47.357 -            BUG();
  47.358 +        BUG_ON(page_get_owner(page) != d);
  47.359  
  47.360          if ( (page->u.inuse.type_info & PGT_count_mask) >
  47.361               (page->count_info & PGC_count_mask) )
  47.362 @@ -2798,8 +2933,7 @@ void audit_domain(struct domain *d)
  47.363          pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;       
  47.364          page = &frame_table[pfn];
  47.365  
  47.366 -        if ( page_get_owner(page) != d )
  47.367 -            BUG();
  47.368 +        BUG_ON(page_get_owner(page) != d);
  47.369  
  47.370          switch ( page->u.inuse.type_info & PGT_type_mask )
  47.371          {
  47.372 @@ -3049,7 +3183,10 @@ void audit_domain(struct domain *d)
  47.373              d->exec_domain[0]->arch.guest_table)>>PAGE_SHIFT], 1, 1);
  47.374  
  47.375      spin_unlock(&d->page_alloc_lock);
  47.376 -    printk("Audit %d: Done. ref=%d xenpages=%d pages=%d l1=%d l2=%d ctot=%d ttot=%d\n", d->id, atomic_read(&d->refcnt), d->xenheap_pages, d->tot_pages, l1, l2, ctot, ttot );
  47.377 +    printk("Audit %d: Done. ref=%d xenpages=%d pages=%d l1=%d"
  47.378 +           " l2=%d ctot=%d ttot=%d\n", 
  47.379 +           d->id, atomic_read(&d->refcnt), d->xenheap_pages, d->tot_pages,
  47.380 +           l1, l2, ctot, ttot );
  47.381  
  47.382      if ( d != current->domain )
  47.383          domain_unpause(d);
    48.1 --- a/xen/arch/x86/smp.c	Sat Mar 12 21:43:58 2005 +0000
    48.2 +++ b/xen/arch/x86/smp.c	Fri Mar 18 03:37:54 2005 +0000
    48.3 @@ -8,9 +8,11 @@
    48.4   *	later.
    48.5   */
    48.6  
    48.7 +#include <xen/config.h>
    48.8  #include <xen/irq.h>
    48.9  #include <xen/sched.h>
   48.10  #include <xen/delay.h>
   48.11 +#include <xen/perfc.h>
   48.12  #include <xen/spinlock.h>
   48.13  #include <asm/smp.h>
   48.14  #include <asm/mc146818rtc.h>
   48.15 @@ -18,8 +20,6 @@
   48.16  #include <asm/smpboot.h>
   48.17  #include <asm/hardirq.h>
   48.18  
   48.19 -#ifdef CONFIG_SMP
   48.20 -
   48.21  /*
   48.22   *	Some notes on x86 processor bugs affecting SMP operation:
   48.23   *
   48.24 @@ -420,5 +420,3 @@ asmlinkage void smp_call_function_interr
   48.25          atomic_inc(&call_data->finished);
   48.26      }
   48.27  }
   48.28 -
   48.29 -#endif /* CONFIG_SMP */
    49.1 --- a/xen/arch/x86/traps.c	Sat Mar 12 21:43:58 2005 +0000
    49.2 +++ b/xen/arch/x86/traps.c	Fri Mar 18 03:37:54 2005 +0000
    49.3 @@ -222,8 +222,19 @@ asmlinkage int do_int3(struct xen_regs *
    49.4          DEBUGGER_trap_fatal(TRAP_int3, regs);
    49.5          show_registers(regs);
    49.6          panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
    49.7 +    } 
    49.8 +#ifdef DOMU_DEBUG
    49.9 +    else if ( KERNEL_MODE(ed, regs) && ed->domain->id != 0 ) 
   49.10 +    {
   49.11 +        if ( !test_and_set_bit(EDF_CTRLPAUSE, &ed->ed_flags) ) {
   49.12 +            while (ed == current)
   49.13 +                __enter_scheduler();
   49.14 +            domain_pause_by_systemcontroller(ed->domain);
   49.15 +        }
   49.16 +        
   49.17 +        return 0;
   49.18      }
   49.19 -
   49.20 +#endif /* DOMU_DEBUG */
   49.21      ti = current->arch.traps + 3;
   49.22      tb->flags = TBF_EXCEPTION;
   49.23      tb->cs    = ti->cs;
   49.24 @@ -886,8 +897,8 @@ asmlinkage int math_state_restore(struct
   49.25  asmlinkage int do_debug(struct xen_regs *regs)
   49.26  {
   49.27      unsigned long condition;
   49.28 -    struct exec_domain *d = current;
   49.29 -    struct trap_bounce *tb = &d->arch.trap_bounce;
   49.30 +    struct exec_domain *ed = current;
   49.31 +    struct trap_bounce *tb = &ed->arch.trap_bounce;
   49.32  
   49.33      DEBUGGER_trap_entry(TRAP_debug, regs);
   49.34  
   49.35 @@ -895,7 +906,7 @@ asmlinkage int do_debug(struct xen_regs 
   49.36  
   49.37      /* Mask out spurious debug traps due to lazy DR7 setting */
   49.38      if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
   49.39 -         (d->arch.debugreg[7] == 0) )
   49.40 +         (ed->arch.debugreg[7] == 0) )
   49.41      {
   49.42          __asm__("mov %0,%%db7" : : "r" (0UL));
   49.43          goto out;
   49.44 @@ -912,14 +923,26 @@ asmlinkage int do_debug(struct xen_regs 
   49.45           * breakpoint, which can't happen to us.
   49.46           */
   49.47          goto out;
   49.48 -    }
   49.49 +    } 
   49.50 +#ifdef DOMU_DEBUG
   49.51 +    else if ( KERNEL_MODE(ed, regs) && ed->domain->id != 0 ) 
   49.52 +    {
   49.53 +        regs->eflags &= ~EF_TF;
   49.54 +        if ( !test_and_set_bit(EDF_CTRLPAUSE, &ed->ed_flags) ) {
   49.55 +            while (ed == current)
   49.56 +                __enter_scheduler();
   49.57 +            domain_pause_by_systemcontroller(ed->domain);
   49.58 +        }
   49.59  
   49.60 +        goto out;
   49.61 +    }    
   49.62 +#endif /* DOMU_DEBUG */
   49.63      /* Save debug status register where guest OS can peek at it */
   49.64 -    d->arch.debugreg[6] = condition;
   49.65 +    ed->arch.debugreg[6] = condition;
   49.66  
   49.67      tb->flags = TBF_EXCEPTION;
   49.68 -    tb->cs    = d->arch.traps[1].cs;
   49.69 -    tb->eip   = d->arch.traps[1].address;
   49.70 +    tb->cs    = ed->arch.traps[1].cs;
   49.71 +    tb->eip   = ed->arch.traps[1].address;
   49.72  
   49.73   out:
   49.74      return EXCRET_not_a_fault;
    50.1 --- a/xen/arch/x86/x86_32/asm-offsets.c	Sat Mar 12 21:43:58 2005 +0000
    50.2 +++ b/xen/arch/x86/x86_32/asm-offsets.c	Fri Mar 18 03:37:54 2005 +0000
    50.3 @@ -5,6 +5,7 @@
    50.4   */
    50.5  
    50.6  #include <xen/config.h>
    50.7 +#include <xen/perfc.h>
    50.8  #include <xen/sched.h>
    50.9  
   50.10  #define DEFINE(_sym, _val) \
    51.1 --- a/xen/arch/x86/x86_32/entry.S	Sat Mar 12 21:43:58 2005 +0000
    51.2 +++ b/xen/arch/x86/x86_32/entry.S	Fri Mar 18 03:37:54 2005 +0000
    51.3 @@ -134,7 +134,7 @@ ENTRY(vmx_asm_do_launch)
    51.4  ENTRY(vmx_asm_do_resume)
    51.5  vmx_test_all_events:
    51.6          GET_CURRENT(%ebx)
    51.7 -/* test_all_events: */
    51.8 +/*test_all_events:*/
    51.9          xorl %ecx,%ecx
   51.10          notl %ecx
   51.11          cli                             # tests must not race interrupts
   51.12 @@ -174,8 +174,6 @@ vmx_process_softirqs:
   51.13  
   51.14          ALIGN
   51.15  restore_all_guest:
   51.16 -        btr  $_TF_failsafe_return,EDOMAIN_thread_flags(%ebx)
   51.17 -        jc   failsafe_callback
   51.18          testl $X86_EFLAGS_VM,XREGS_eflags(%esp)
   51.19          jnz  restore_all_vm86
   51.20  FLT1:   movl XREGS_ds(%esp),%ds
   51.21 @@ -216,10 +214,20 @@ FIX1:   SET_XEN_SEGMENTS(a)
   51.22          jmp   error_code
   51.23  DBLFLT1:GET_CURRENT(%ebx)
   51.24          jmp   test_all_events
   51.25 -DBLFIX1:GET_CURRENT(%ebx)
   51.26 -        bts   $_TF_failsafe_return,EDOMAIN_thread_flags(%ebx)
   51.27 -        jc    domain_crash             # cannot reenter failsafe code
   51.28 -        jmp   test_all_events          # will return via failsafe code
   51.29 +failsafe_callback:
   51.30 +        GET_CURRENT(%ebx)
   51.31 +        leal  EDOMAIN_trap_bounce(%ebx),%edx
   51.32 +        movl  EDOMAIN_failsafe_addr(%ebx),%eax
   51.33 +        movl  EDOMAIN_failsafe_sel(%ebx),%eax
   51.34 +        movw  %ax,TRAPBOUNCE_cs(%edx)
   51.35 +        movw  $TBF_FAILSAFE,TRAPBOUNCE_flags(%edx)
   51.36 +        call  create_bounce_frame
   51.37 +        xorl  %eax,%eax
   51.38 +        movl  %eax,XREGS_ds(%esp)
   51.39 +        movl  %eax,XREGS_es(%esp)
   51.40 +        movl  %eax,XREGS_fs(%esp)
   51.41 +        movl  %eax,XREGS_gs(%esp)
   51.42 +        jmp   test_all_events
   51.43  .previous
   51.44  .section __pre_ex_table,"a"
   51.45  	.long FLT1,FIX1
   51.46 @@ -229,37 +237,7 @@ DBLFIX1:GET_CURRENT(%ebx)
   51.47  	.long FLT5,FIX5
   51.48  .previous
   51.49  .section __ex_table,"a"
   51.50 -        .long DBLFLT1,DBLFIX1
   51.51 -.previous
   51.52 -
   51.53 -/* No special register assumptions */
   51.54 -failsafe_callback:
   51.55 -        GET_CURRENT(%ebx)
   51.56 -        leal EDOMAIN_trap_bounce(%ebx),%edx
   51.57 -        movl EDOMAIN_failsafe_addr(%ebx),%eax
   51.58 -        movl %eax,TRAPBOUNCE_eip(%edx)
   51.59 -        movl EDOMAIN_failsafe_sel(%ebx),%eax
   51.60 -        movw %ax,TRAPBOUNCE_cs(%edx)
   51.61 -        movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%edx)
   51.62 -        call create_bounce_frame
   51.63 -        popl %ebx
   51.64 -        popl %ecx
   51.65 -        popl %edx
   51.66 -        popl %esi
   51.67 -        popl %edi
   51.68 -        popl %ebp
   51.69 -        popl %eax
   51.70 -        addl $4,%esp
   51.71 -FLT6:   iret 
   51.72 -.section .fixup,"ax"
   51.73 -FIX6:   pushl %ebx
   51.74 -        GET_CURRENT(%ebx)
   51.75 -        orb   $TF_failsafe_return,EDOMAIN_thread_flags(%ebx)
   51.76 -        pop   %ebx
   51.77 -        jmp   FIX5
   51.78 -.previous
   51.79 -.section __pre_ex_table,"a"
   51.80 -	.long FLT6,FIX6
   51.81 +        .long DBLFLT1,failsafe_callback
   51.82  .previous
   51.83  
   51.84          ALIGN
   51.85 @@ -301,6 +279,7 @@ test_all_events:
   51.86          testb $0xFF,VCPUINFO_upcall_pending(%eax)
   51.87          jz   restore_all_guest
   51.88  /*process_guest_events:*/
   51.89 +        sti
   51.90          leal EDOMAIN_trap_bounce(%ebx),%edx
   51.91          movl EDOMAIN_event_addr(%ebx),%eax
   51.92          movl %eax,TRAPBOUNCE_eip(%edx)
   51.93 @@ -310,7 +289,7 @@ test_all_events:
   51.94          call create_bounce_frame
   51.95          movl EDOMAIN_vcpu_info(%ebx),%eax
   51.96          movb $1,VCPUINFO_upcall_mask(%eax) # Upcalls are masked during delivery
   51.97 -        jmp  restore_all_guest
   51.98 +        jmp  test_all_events
   51.99  
  51.100          ALIGN
  51.101  process_softirqs:
  51.102 @@ -336,67 +315,66 @@ create_bounce_frame:
  51.103          addl %ecx,%eax
  51.104          addl $init_tss + 12,%eax
  51.105          movl (%eax),%esi /* tss->esp1 */
  51.106 -FLT7:   movl 4(%eax),%gs /* tss->ss1  */
  51.107 +FLT6:   movl 4(%eax),%gs /* tss->ss1  */
  51.108          testl $X86_EFLAGS_VM,XREGS_eflags+4(%esp)
  51.109          jz   nvm86_1
  51.110 -	subl $16,%esi       /* push ES/DS/FS/GS (VM86 stack frame) */
  51.111 -	movl XREGS_es+4(%esp),%eax
  51.112 -FLT8:   movl %eax,%gs:(%esi)
  51.113 -	movl XREGS_ds+4(%esp),%eax
  51.114 -FLT9:   movl %eax,%gs:4(%esi)
  51.115 -	movl XREGS_fs+4(%esp),%eax
  51.116 -FLT10:  movl %eax,%gs:8(%esi)
  51.117 -	movl XREGS_gs+4(%esp),%eax
  51.118 -FLT11:  movl %eax,%gs:12(%esi)
  51.119 +        subl $16,%esi       /* push ES/DS/FS/GS (VM86 stack frame) */
  51.120 +        movl XREGS_es+4(%esp),%eax
  51.121 +FLT7:   movl %eax,%gs:(%esi)
  51.122 +        movl XREGS_ds+4(%esp),%eax
  51.123 +FLT8:   movl %eax,%gs:4(%esi)
  51.124 +        movl XREGS_fs+4(%esp),%eax
  51.125 +FLT9:   movl %eax,%gs:8(%esi)
  51.126 +        movl XREGS_gs+4(%esp),%eax
  51.127 +FLT10:  movl %eax,%gs:12(%esi)
  51.128  nvm86_1:subl $8,%esi        /* push SS/ESP (inter-priv iret) */
  51.129          movl XREGS_esp+4(%esp),%eax
  51.130 -FLT12:  movl %eax,%gs:(%esi) 
  51.131 +FLT11:  movl %eax,%gs:(%esi) 
  51.132          movl XREGS_ss+4(%esp),%eax
  51.133 -FLT13:  movl %eax,%gs:4(%esi) 
  51.134 +FLT12:  movl %eax,%gs:4(%esi) 
  51.135          jmp 1f
  51.136  ring1:  /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */
  51.137          movl XREGS_esp+4(%esp),%esi
  51.138 -FLT14:  movl XREGS_ss+4(%esp),%gs 
  51.139 +FLT13:  movl XREGS_ss+4(%esp),%gs 
  51.140  1:      /* Construct a stack frame: EFLAGS, CS/EIP */
  51.141          subl $12,%esi
  51.142          movl XREGS_eip+4(%esp),%eax
  51.143 -FLT15:  movl %eax,%gs:(%esi) 
  51.144 +FLT14:  movl %eax,%gs:(%esi) 
  51.145          movl XREGS_cs+4(%esp),%eax
  51.146 -FLT16:  movl %eax,%gs:4(%esi) 
  51.147 +FLT15:  movl %eax,%gs:4(%esi) 
  51.148          movl XREGS_eflags+4(%esp),%eax
  51.149 -FLT17:  movl %eax,%gs:8(%esi)
  51.150 +FLT16:  movl %eax,%gs:8(%esi)
  51.151          movb TRAPBOUNCE_flags(%edx),%cl
  51.152          test $TBF_EXCEPTION_ERRCODE,%cl
  51.153          jz   1f
  51.154          subl $4,%esi                    # push error_code onto guest frame
  51.155          movl TRAPBOUNCE_error_code(%edx),%eax
  51.156 -FLT18:  movl %eax,%gs:(%esi)
  51.157 +FLT17:  movl %eax,%gs:(%esi)
  51.158          testb $TBF_EXCEPTION_CR2,%cl
  51.159          jz   2f
  51.160          subl $4,%esi                    # push %cr2 onto guest frame
  51.161          movl TRAPBOUNCE_cr2(%edx),%eax
  51.162 -FLT19:  movl %eax,%gs:(%esi)
  51.163 +FLT18:  movl %eax,%gs:(%esi)
  51.164  1:      testb $TBF_FAILSAFE,%cl
  51.165          jz   2f
  51.166          subl $16,%esi                # add DS/ES/FS/GS to failsafe stack frame
  51.167          testl $X86_EFLAGS_VM,XREGS_eflags+4(%esp)
  51.168          jz   nvm86_2
  51.169          xorl %eax,%eax               # VM86: we write zero selector values
  51.170 -FLT20:  movl %eax,%gs:(%esi) 
  51.171 -FLT21:  movl %eax,%gs:4(%esi)
  51.172 -FLT22:  movl %eax,%gs:8(%esi) 
  51.173 -FLT23:  movl %eax,%gs:12(%esi)
  51.174 +FLT19:  movl %eax,%gs:(%esi) 
  51.175 +FLT20:  movl %eax,%gs:4(%esi)
  51.176 +FLT21:  movl %eax,%gs:8(%esi) 
  51.177 +FLT22:  movl %eax,%gs:12(%esi)
  51.178          jmp  2f
  51.179  nvm86_2:movl XREGS_ds+4(%esp),%eax   # non-VM86: write real selector values
  51.180 -FLT24:  movl %eax,%gs:(%esi) 
  51.181 +FLT23:  movl %eax,%gs:(%esi) 
  51.182          movl XREGS_es+4(%esp),%eax
  51.183 -FLT25:  movl %eax,%gs:4(%esi)
  51.184 +FLT24:  movl %eax,%gs:4(%esi)
  51.185          movl XREGS_fs+4(%esp),%eax
  51.186 -FLT26:  movl %eax,%gs:8(%esi) 
  51.187 +FLT25:  movl %eax,%gs:8(%esi) 
  51.188          movl XREGS_gs+4(%esp),%eax
  51.189 -FLT27:  movl %eax,%gs:12(%esi)
  51.190 -2:      movb $0,TRAPBOUNCE_flags(%edx)
  51.191 -        testl $X86_EFLAGS_VM,XREGS_eflags+4(%esp)
  51.192 +FLT26:  movl %eax,%gs:12(%esi)
  51.193 +2:      testl $X86_EFLAGS_VM,XREGS_eflags+4(%esp)
  51.194          jz   nvm86_3
  51.195          xorl %eax,%eax      /* zero DS-GS, just as a real CPU would */
  51.196          movl %eax,XREGS_ds+4(%esp)
  51.197 @@ -412,29 +390,16 @@ nvm86_3:/* Rewrite our stack frame and r
  51.198          movl %eax,XREGS_cs+4(%esp)
  51.199          movl TRAPBOUNCE_eip(%edx),%eax
  51.200          movl %eax,XREGS_eip+4(%esp)
  51.201 +        movb $0,TRAPBOUNCE_flags(%edx)
  51.202          ret
  51.203 -.section .fixup,"ax"
  51.204 -FIX7:   sti
  51.205 -        popl  %esi
  51.206 -        addl  $4,%esp                  # Discard create_b_frame return address
  51.207 -        pushfl                         # EFLAGS
  51.208 -        movl  $__HYPERVISOR_CS,%eax
  51.209 -        pushl %eax                     # CS
  51.210 -        movl  $DBLFLT2,%eax
  51.211 -        pushl %eax                     # EIP
  51.212 -        pushl %esi                     # error_code/entry_vector
  51.213 -        jmp   error_code
  51.214 -DBLFLT2:jmp   process_guest_exception_and_events
  51.215 -.previous
  51.216 -.section __pre_ex_table,"a"
  51.217 -	.long  FLT7,FIX7 ,  FLT8,FIX7 ,  FLT9,FIX7 , FLT10,FIX7
  51.218 -	.long FLT11,FIX7 , FLT12,FIX7 , FLT13,FIX7 , FLT14,FIX7
  51.219 -	.long FLT15,FIX7 , FLT16,FIX7 , FLT17,FIX7 , FLT18,FIX7
  51.220 -	.long FLT19,FIX7 , FLT20,FIX7 , FLT21,FIX7 , FLT22,FIX7
  51.221 -	.long FLT23,FIX7 , FLT24,FIX7 , FLT25,FIX7 , FLT26,FIX7 , FLT27,FIX7
  51.222 -.previous
  51.223  .section __ex_table,"a"
  51.224 -        .long DBLFLT2,domain_crash
  51.225 +	.long  FLT6,domain_crash ,  FLT7,domain_crash ,  FLT8,domain_crash
  51.226 +        .long  FLT9,domain_crash , FLT10,domain_crash , FLT11,domain_crash
  51.227 +        .long FLT12,domain_crash , FLT13,domain_crash , FLT14,domain_crash
  51.228 +        .long FLT15,domain_crash , FLT16,domain_crash , FLT17,domain_crash
  51.229 +	.long FLT18,domain_crash , FLT19,domain_crash , FLT20,domain_crash
  51.230 +        .long FLT21,domain_crash , FLT22,domain_crash , FLT23,domain_crash
  51.231 +        .long FLT24,domain_crash , FLT25,domain_crash , FLT26,domain_crash
  51.232  .previous
  51.233  
  51.234          ALIGN
  51.235 @@ -442,7 +407,6 @@ process_guest_exception_and_events:
  51.236          leal EDOMAIN_trap_bounce(%ebx),%edx
  51.237          testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx)
  51.238          jz   test_all_events
  51.239 -        cli  # create_bounce_frame needs CLI for pre-exceptions to work
  51.240          call create_bounce_frame
  51.241          jmp  test_all_events
  51.242  
    52.1 --- a/xen/arch/x86/x86_32/mm.c	Sat Mar 12 21:43:58 2005 +0000
    52.2 +++ b/xen/arch/x86/x86_32/mm.c	Fri Mar 18 03:37:54 2005 +0000
    52.3 @@ -202,8 +202,7 @@ long do_stack_switch(unsigned long ss, u
    52.4      int nr = smp_processor_id();
    52.5      struct tss_struct *t = &init_tss[nr];
    52.6  
    52.7 -    /* We need to do this check as we load and use SS on guest's behalf. */
    52.8 -    if ( (ss & 3) == 0 )
    52.9 +    if ( (ss & 3) != 1 )
   52.10          return -EPERM;
   52.11  
   52.12      current->arch.kernel_ss = ss;
   52.13 @@ -278,6 +277,7 @@ int check_descriptor(struct desc_struct 
   52.14      if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
   52.15      {
   52.16          /*
   52.17 +         * DATA, GROWS-DOWN.
   52.18           * Grows-down limit check. 
   52.19           * NB. limit == 0xFFFFF provides no access      (if G=1).
   52.20           *     limit == 0x00000 provides 4GB-4kB access (if G=1).
   52.21 @@ -291,6 +291,8 @@ int check_descriptor(struct desc_struct 
   52.22      else
   52.23      {
   52.24          /*
   52.25 +         * DATA, GROWS-UP. 
   52.26 +         * CODE (CONFORMING AND NON-CONFORMING).
   52.27           * Grows-up limit check.
   52.28           * NB. limit == 0xFFFFF provides 4GB access (if G=1).
   52.29           *     limit == 0x00000 provides 4kB access (if G=1).
    53.1 --- a/xen/arch/x86/x86_64/asm-offsets.c	Sat Mar 12 21:43:58 2005 +0000
    53.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c	Fri Mar 18 03:37:54 2005 +0000
    53.3 @@ -5,6 +5,7 @@
    53.4   */
    53.5  
    53.6  #include <xen/config.h>
    53.7 +#include <xen/perfc.h>
    53.8  #include <xen/sched.h>
    53.9  
   53.10  #define DEFINE(_sym, _val) \
    54.1 --- a/xen/arch/x86/x86_64/entry.S	Sat Mar 12 21:43:58 2005 +0000
    54.2 +++ b/xen/arch/x86/x86_64/entry.S	Fri Mar 18 03:37:54 2005 +0000
    54.3 @@ -21,8 +21,6 @@
    54.4          ALIGN
    54.5  /* %rbx: struct exec_domain */
    54.6  restore_all_guest:
    54.7 -        btr   $_TF_failsafe_return,EDOMAIN_thread_flags(%rbx)
    54.8 -        jc    failsafe_callback
    54.9          RESTORE_ALL
   54.10          testw $TRAP_syscall,4(%rsp)
   54.11          jz    iret_exit_to_guest
   54.12 @@ -59,39 +57,20 @@ FIX1:   popq  -15*8-8(%rsp)            #
   54.13          jmp   error_code
   54.14  DBLFLT1:GET_CURRENT(%rbx)
   54.15          jmp   test_all_events
   54.16 -DBLFIX1:GET_CURRENT(%rbx)
   54.17 -        bts   $_TF_failsafe_return,EDOMAIN_thread_flags(%rbx)
   54.18 -        jc    domain_crash             # cannot reenter failsafe code
   54.19 -        jmp   test_all_events          # will return via failsafe code
   54.20 +failsafe_callback:
   54.21 +        GET_CURRENT(%rbx)
   54.22 +        leaq  EDOMAIN_trap_bounce(%rbx),%rdx
   54.23 +        movq  EDOMAIN_failsafe_addr(%rbx),%rax
   54.24 +        movq  %rax,TRAPBOUNCE_eip(%rdx)
   54.25 +        movw  $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx)
   54.26 +        call  create_bounce_frame
   54.27 +        jmp   test_all_events
   54.28  .previous
   54.29  .section __pre_ex_table,"a"
   54.30  	.quad FLT1,FIX1
   54.31  .previous
   54.32  .section __ex_table,"a"
   54.33 -        .quad DBLFLT1,DBLFIX1
   54.34 -.previous
   54.35 -
   54.36 -/* No special register assumptions */
   54.37 -failsafe_callback:
   54.38 -        GET_CURRENT(%rbx)
   54.39 -        leaq EDOMAIN_trap_bounce(%rbx),%rdx
   54.40 -        movq EDOMAIN_failsafe_addr(%rbx),%rax
   54.41 -        movq %rax,TRAPBOUNCE_eip(%rdx)
   54.42 -        movw $TBF_FAILSAFE,TRAPBOUNCE_flags(%rdx)
   54.43 -        call create_bounce_frame
   54.44 -        RESTORE_ALL
   54.45 -        addq $8,%rsp
   54.46 -FLT2:   iret 
   54.47 -
   54.48 -.section .fixup,"ax"
   54.49 -FIX2:   pushq %rbx
   54.50 -        GET_CURRENT(%rbx)
   54.51 -        orb   $TF_failsafe_return,EDOMAIN_thread_flags(%rbx)
   54.52 -        popq  %rbx
   54.53 -        jmp   FIX1
   54.54 -.previous
   54.55 -.section __pre_ex_table,"a"
   54.56 -	.quad FLT2,FIX2
   54.57 +        .quad DBLFLT1,failsafe_callback
   54.58  .previous
   54.59  
   54.60          ALIGN
   54.61 @@ -124,6 +103,7 @@ ENTRY(syscall_enter)
   54.62          pushq $0
   54.63          movl  $TRAP_syscall,4(%rsp)
   54.64          SAVE_ALL
   54.65 +        sti
   54.66          GET_CURRENT(%rbx)
   54.67          testb $TF_kernel_mode,EDOMAIN_thread_flags(%rbx)
   54.68          jnz   hypercall
   54.69 @@ -137,7 +117,6 @@ ENTRY(syscall_enter)
   54.70  
   54.71  /* %rbx: struct exec_domain */
   54.72  hypercall:
   54.73 -        sti
   54.74          movq  %r10,%rcx
   54.75          andq  $(NR_hypercalls-1),%rax
   54.76          leaq  SYMBOL_NAME(hypercall_table)(%rip),%r10
   54.77 @@ -161,6 +140,7 @@ test_all_events:
   54.78          testb $0xFF,VCPUINFO_upcall_pending(%rax)
   54.79          jz    restore_all_guest
   54.80  /*process_guest_events:*/
   54.81 +        sti
   54.82          leaq  EDOMAIN_trap_bounce(%rbx),%rdx
   54.83          movq  EDOMAIN_event_addr(%rbx),%rax
   54.84          movq  %rax,TRAPBOUNCE_eip(%rdx)
   54.85 @@ -168,7 +148,7 @@ test_all_events:
   54.86          call  create_bounce_frame
   54.87          movq  EDOMAIN_vcpu_info(%rbx),%rax
   54.88          movb  $1,VCPUINFO_upcall_mask(%rax) # Upcalls masked during delivery
   54.89 -        jmp   restore_all_guest
   54.90 +        jmp   test_all_events
   54.91  
   54.92          ALIGN
   54.93  /* %rbx: struct exec_domain */
   54.94 @@ -176,19 +156,25 @@ process_softirqs:
   54.95          sti       
   54.96          call SYMBOL_NAME(do_softirq)
   54.97          jmp  test_all_events
   54.98 -                
   54.99 +
  54.100  /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK:                     */
  54.101  /*   { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS }   */
  54.102  /* %rdx: trap_bounce, %rbx: struct exec_domain                           */
  54.103  /* On return only %rbx is guaranteed non-clobbered.                      */
  54.104  create_bounce_frame:
  54.105 -        /* Push new frame at existing %rsp if already in guest-OS mode. */
  54.106 -        movq  XREGS_rsp+8(%rsp),%rsi
  54.107          testb $TF_kernel_mode,EDOMAIN_thread_flags(%rbx)
  54.108          jnz   1f
  54.109          /* Push new frame at registered guest-OS stack base. */
  54.110          movq  EDOMAIN_kernel_sp(%rbx),%rsi
  54.111 -1:      movq  $HYPERVISOR_VIRT_START,%rax
  54.112 +        pushq %rdx
  54.113 +        movq  %rbx,%rdi
  54.114 +        call  SYMBOL_NAME(toggle_guest_mode)
  54.115 +        popq  %rdx
  54.116 +        jmp   2f
  54.117 +1:      /* In kernel context already: push new frame at existing %rsp. */
  54.118 +        movq  XREGS_rsp+8(%rsp),%rsi
  54.119 +        andb  $0xfc,XREGS_cs+8(%rsp)    # Indicate kernel context to guest.
  54.120 +2:      movq  $HYPERVISOR_VIRT_START,%rax
  54.121          cmpq  %rax,%rsi
  54.122          jb    1f                        # In +ve address space? Then okay.
  54.123          movq  $HYPERVISOR_VIRT_END+60,%rax
  54.124 @@ -196,78 +182,59 @@ 1:      movq  $HYPERVISOR_VIRT_START,%ra
  54.125          jb    domain_crash              # Above Xen private area? Then okay.
  54.126  1:      subq  $40,%rsi
  54.127          movq  XREGS_ss+8(%rsp),%rax
  54.128 -FLT3:   movq  %rax,32(%rsi)             # SS
  54.129 +FLT2:   movq  %rax,32(%rsi)             # SS
  54.130          movq  XREGS_rsp+8(%rsp),%rax
  54.131 -FLT4:   movq  %rax,24(%rsi)             # RSP
  54.132 +FLT3:   movq  %rax,24(%rsi)             # RSP
  54.133          movq  XREGS_eflags+8(%rsp),%rax
  54.134 -FLT5:   movq  %rax,16(%rsi)             # RFLAGS
  54.135 +FLT4:   movq  %rax,16(%rsi)             # RFLAGS
  54.136          movq  XREGS_cs+8(%rsp),%rax
  54.137 -FLT6:   movq  %rax,8(%rsi)              # CS
  54.138 +FLT5:   movq  %rax,8(%rsi)              # CS
  54.139          movq  XREGS_rip+8(%rsp),%rax
  54.140 -FLT7:   movq  %rax,(%rsi)               # RIP
  54.141 +FLT6:   movq  %rax,(%rsi)               # RIP
  54.142          movb  TRAPBOUNCE_flags(%rdx),%cl
  54.143          testb $TBF_EXCEPTION_ERRCODE,%cl
  54.144          jz    1f
  54.145          subq  $8,%rsi
  54.146 -        movq  TRAPBOUNCE_error_code(%rdx),%rax
  54.147 -FLT8:   movq  %rax,(%rsi)               # ERROR CODE
  54.148 +        movl  TRAPBOUNCE_error_code(%rdx),%eax
  54.149 +FLT7:   movq  %rax,(%rsi)               # ERROR CODE
  54.150          testb $TBF_EXCEPTION_CR2,%cl
  54.151          jz    2f
  54.152          subq  $8,%rsi
  54.153          movq  TRAPBOUNCE_cr2(%rdx),%rax
  54.154 -FLT9:   movq  %rax,(%rsi)               # CR2
  54.155 +FLT8:   movq  %rax,(%rsi)               # CR2
  54.156  1:      testb $TBF_FAILSAFE,%cl
  54.157          jz    2f
  54.158          subq  $32,%rsi
  54.159          movl  %gs,%eax
  54.160 -FLT10:  movq  %rax,24(%rsi)             # GS
  54.161 +FLT9:   movq  %rax,24(%rsi)             # GS
  54.162          movl  %fs,%eax
  54.163 -FLT11:  movq  %rax,16(%rsi)             # FS
  54.164 +FLT10:  movq  %rax,16(%rsi)             # FS
  54.165          movl  %es,%eax
  54.166 -FLT12:  movq  %rax,8(%rsi)              # ES
  54.167 +FLT11:  movq  %rax,8(%rsi)              # ES
  54.168          movl  %ds,%eax
  54.169 -FLT13:  movq  %rax,(%rsi)               # DS
  54.170 +FLT12:  movq  %rax,(%rsi)               # DS
  54.171  2:      subq  $16,%rsi
  54.172          movq  XREGS_r11+8(%rsp),%rax
  54.173 -FLT14:  movq  %rax,(%rsi)               # R11
  54.174 +FLT13:  movq  %rax,(%rsi)               # R11
  54.175          movq  XREGS_rcx+8(%rsp),%rax
  54.176 -FLT15:  movq  %rax,(%rsi)               # RCX
  54.177 +FLT14:  movq  %rax,(%rsi)               # RCX
  54.178          /* Rewrite our stack frame and return to guest-OS mode. */
  54.179          /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */
  54.180 -        movl  $TRAP_syscall,XREGS_entry_vector+8(%rsp)
  54.181 +        movq  $TRAP_syscall,XREGS_entry_vector+8(%rsp)
  54.182          andl  $0xfffcbeff,XREGS_eflags+8(%rsp)
  54.183 -        movl  $__GUEST_SS,XREGS_ss+8(%rsp)
  54.184 +        movq  $__GUEST_SS,XREGS_ss+8(%rsp)
  54.185          movq  %rsi,XREGS_rsp+8(%rsp)
  54.186 -        movl  $__GUEST_CS,XREGS_cs+8(%rsp)
  54.187 +        movq  $__GUEST_CS,XREGS_cs+8(%rsp)
  54.188          movq  TRAPBOUNCE_eip(%rdx),%rax
  54.189          movq  %rax,XREGS_rip+8(%rsp)
  54.190          movb  $0,TRAPBOUNCE_flags(%rdx)
  54.191 -        testb $TF_kernel_mode,EDOMAIN_thread_flags(%rbx)
  54.192 -        movq  %rbx,%rdi # toggle_guest_mode(current)
  54.193 -        jz    SYMBOL_NAME(toggle_guest_mode)
  54.194          ret
  54.195 -.section .fixup,"ax"
  54.196 -FIX3:   sti
  54.197 -        popq  %rsi
  54.198 -        addq  $8,%rsp                  # Discard create_b_frame return address
  54.199 -        pushq $__HYPERVISOR_DS         # SS
  54.200 -        leaq  8(%rsp),%rax
  54.201 -        pushq %rax                     # RSP
  54.202 -        pushf                          # RFLAGS
  54.203 -        pushq $__HYPERVISOR_CS         # CS
  54.204 -        leaq  DBLFLT2(%rip),%rax
  54.205 -        pushq %rax                     # RIP
  54.206 -        pushq %rsi                     # error_code/entry_vector
  54.207 -        jmp   error_code
  54.208 -DBLFLT2:jmp   process_guest_exception_and_events
  54.209 -.previous
  54.210 -.section __pre_ex_table,"a"
  54.211 -	.quad  FLT3,FIX3 ,  FLT4,FIX3 ,  FLT5,FIX3 ,  FLT6,FIX3
  54.212 -	.quad  FLT7,FIX3 ,  FLT8,FIX3 ,  FLT9,FIX3 , FLT10,FIX3
  54.213 -	.quad FLT11,FIX3 , FLT12,FIX3 , FLT13,FIX3 , FLT14,FIX3 , FLT15,FIX3
  54.214 -.previous
  54.215  .section __ex_table,"a"
  54.216 -        .quad DBLFLT2,domain_crash
  54.217 +        .quad  FLT2,domain_crash ,  FLT3,domain_crash ,  FLT4,domain_crash
  54.218 +        .quad  FLT5,domain_crash ,  FLT6,domain_crash ,  FLT7,domain_crash
  54.219 +        .quad  FLT8,domain_crash ,  FLT9,domain_crash , FLT10,domain_crash
  54.220 +        .quad FLT11,domain_crash , FLT12,domain_crash , FLT13,domain_crash
  54.221 +        .quad FLT14,domain_crash
  54.222  .previous
  54.223  
  54.224          ALIGN
  54.225 @@ -276,7 +243,6 @@ process_guest_exception_and_events:
  54.226          leaq  EDOMAIN_trap_bounce(%rbx),%rdx
  54.227          testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
  54.228          jz    test_all_events
  54.229 -        cli   # create_bounce_frame needs CLI for pre-exceptions to work
  54.230          call  create_bounce_frame
  54.231          jmp   test_all_events
  54.232  
    55.1 --- a/xen/arch/x86/x86_64/mm.c	Sat Mar 12 21:43:58 2005 +0000
    55.2 +++ b/xen/arch/x86/x86_64/mm.c	Fri Mar 18 03:37:54 2005 +0000
    55.3 @@ -287,7 +287,7 @@ int check_descriptor(struct desc_struct 
    55.4      if ( (b & _SEGMENT_DPL) != 3 )
    55.5          goto bad;
    55.6  
    55.7 -    /* Any code or data segment is okay. No base/limit checking. */
    55.8 +    /* All code and data segments are okay. No base/limit checking. */
    55.9      if ( (b & _SEGMENT_S) )
   55.10          goto good;
   55.11  
    56.1 --- a/xen/arch/x86/x86_emulate.c	Sat Mar 12 21:43:58 2005 +0000
    56.2 +++ b/xen/arch/x86/x86_emulate.c	Fri Mar 18 03:37:54 2005 +0000
    56.3 @@ -22,6 +22,7 @@ typedef int64_t            s64;
    56.4  #include <xen/config.h>
    56.5  #include <xen/types.h>
    56.6  #include <xen/lib.h>
    56.7 +#include <xen/mm.h>
    56.8  #include <asm/regs.h>
    56.9  #endif
   56.10  #include <asm-x86/x86_emulate.h>
   56.11 @@ -363,7 +364,7 @@ do{ __asm__ __volatile__ (              
   56.12  /* Fetch next part of the instruction being emulated. */
   56.13  #define insn_fetch(_type, _size, _eip) \
   56.14  ({ unsigned long _x; \
   56.15 -   if ( ops->read_std((unsigned long)(_eip), &_x, (_size)) ) \
   56.16 +   if ( (rc = ops->read_std((unsigned long)(_eip), &_x, (_size))) != 0 ) \
   56.17         goto done; \
   56.18     (_eip) += (_size); \
   56.19     (_type)_x; \
   56.20 @@ -422,6 +423,7 @@ x86_emulate_memop(
   56.21      u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
   56.22      unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode;
   56.23      unsigned int lock_prefix = 0, rep_prefix = 0, i;
   56.24 +    int rc = 0;
   56.25      struct operand src, dst;
   56.26  
   56.27      /* Shadow copy of register state. Committed on successful emulation. */
   56.28 @@ -556,7 +558,8 @@ x86_emulate_memop(
   56.29          dst.ptr   = (unsigned long *)cr2;
   56.30          dst.bytes = (d & ByteOp) ? 1 : op_bytes;
   56.31          if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
   56.32 -             ops->read_emulated((unsigned long)dst.ptr, &dst.val, dst.bytes) )
   56.33 +             ((rc = ops->read_emulated((unsigned long)dst.ptr,
   56.34 +                                       &dst.val, dst.bytes)) != 0) )
   56.35               goto done;
   56.36          break;
   56.37      }
   56.38 @@ -590,7 +593,8 @@ x86_emulate_memop(
   56.39          src.type  = OP_MEM;
   56.40          src.ptr   = (unsigned long *)cr2;
   56.41          src.bytes = (d & ByteOp) ? 1 : op_bytes;
   56.42 -        if ( ops->read_emulated((unsigned long)src.ptr, &src.val, src.bytes) )
   56.43 +        if ( (rc = ops->read_emulated((unsigned long)src.ptr, 
   56.44 +                                      &src.val, src.bytes)) != 0 )
   56.45              goto done;
   56.46          src.orig_val = src.val;
   56.47          break;
   56.48 @@ -664,6 +668,7 @@ x86_emulate_memop(
   56.49          src.val ^= dst.val;
   56.50          dst.val ^= src.val;
   56.51          src.val ^= dst.val;
   56.52 +        lock_prefix = 1;
   56.53          break;
   56.54      case 0xa0 ... 0xa1: /* mov */
   56.55          dst.ptr = (unsigned long *)&_regs.eax;
   56.56 @@ -682,7 +687,7 @@ x86_emulate_memop(
   56.57          /* 64-bit mode: POP defaults to 64-bit operands. */
   56.58          if ( (mode == 8) && (dst.bytes == 4) )
   56.59              dst.bytes = 8;
   56.60 -        if ( ops->read_std(_regs.esp, &dst.val, dst.bytes) )
   56.61 +        if ( (rc = ops->read_std(_regs.esp, &dst.val, dst.bytes)) != 0 )
   56.62              goto done;
   56.63          _regs.esp += dst.bytes;
   56.64          break;
   56.65 @@ -759,11 +764,12 @@ x86_emulate_memop(
   56.66              if ( (mode == 8) && (dst.bytes == 4) )
   56.67              {
   56.68                  dst.bytes = 8;
   56.69 -                if ( ops->read_std((unsigned long)dst.ptr, &dst.val, 8) )
   56.70 +                if ( (rc = ops->read_std((unsigned long)dst.ptr,
   56.71 +                                         &dst.val, 8)) != 0 )
   56.72                      goto done;
   56.73              }
   56.74              _regs.esp -= dst.bytes;
   56.75 -            if ( ops->write_std(_regs.esp, dst.val, dst.bytes) )
   56.76 +            if ( (rc = ops->write_std(_regs.esp, dst.val, dst.bytes)) != 0 )
   56.77                  goto done;
   56.78              dst.val = dst.orig_val; /* skanky: disable writeback */
   56.79              break;
   56.80 @@ -790,22 +796,13 @@ x86_emulate_memop(
   56.81              break;
   56.82          case OP_MEM:
   56.83              if ( lock_prefix )
   56.84 -            {
   56.85 -                unsigned long seen;
   56.86 -                if ( ops->cmpxchg_emulated((unsigned long)dst.ptr,
   56.87 -                                           dst.orig_val, dst.val,
   56.88 -                                           &seen, dst.bytes) )
   56.89 -                    goto done;
   56.90 -                if ( seen != dst.orig_val )
   56.91 -                    goto done; /* Try again... */
   56.92 -            }
   56.93 +                rc = ops->cmpxchg_emulated(
   56.94 +                    (unsigned long)dst.ptr, dst.orig_val, dst.val, dst.bytes);
   56.95              else
   56.96 -            {
   56.97 -                if ( ops->write_emulated((unsigned long)dst.ptr,
   56.98 -                                         dst.val, dst.bytes) )
   56.99 -                    goto done;
  56.100 -            }
  56.101 -            break;
  56.102 +                rc = ops->write_emulated(
  56.103 +                    (unsigned long)dst.ptr, dst.val, dst.bytes);
  56.104 +            if ( rc != 0 )
  56.105 +                goto done;
  56.106          default:
  56.107              break;
  56.108          }
  56.109 @@ -815,7 +812,7 @@ x86_emulate_memop(
  56.110      *regs = _regs;
  56.111  
  56.112   done:
  56.113 -    return 0;
  56.114 +    return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
  56.115  
  56.116   special_insn:
  56.117      if ( twobyte )
  56.118 @@ -839,15 +836,15 @@ x86_emulate_memop(
  56.119          {
  56.120              /* Write fault: destination is special memory. */
  56.121              dst.ptr = (unsigned long *)cr2;
  56.122 -            if ( ops->read_std(_regs.esi - _regs.edi + cr2, 
  56.123 -                               &dst.val, dst.bytes) )
  56.124 +            if ( (rc = ops->read_std(_regs.esi - _regs.edi + cr2, 
  56.125 +                                     &dst.val, dst.bytes)) != 0 )
  56.126                  goto done;
  56.127          }
  56.128          else
  56.129          {
  56.130              /* Read fault: source is special memory. */
  56.131              dst.ptr = (unsigned long *)(_regs.edi - _regs.esi + cr2);
  56.132 -            if ( ops->read_emulated(cr2, &dst.val, dst.bytes) )
  56.133 +            if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
  56.134                  goto done;
  56.135          }
  56.136          _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
  56.137 @@ -867,7 +864,7 @@ x86_emulate_memop(
  56.138          dst.type  = OP_REG;
  56.139          dst.bytes = (d & ByteOp) ? 1 : op_bytes;
  56.140          dst.ptr   = (unsigned long *)&_regs.eax;
  56.141 -        if ( ops->read_emulated(cr2, &dst.val, dst.bytes) )
  56.142 +        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes)) != 0 )
  56.143              goto done;
  56.144          _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
  56.145          break;
  56.146 @@ -971,3 +968,39 @@ x86_emulate_memop(
  56.147      DPRINTF("Cannot emulate %02x\n", b);
  56.148      return -1;
  56.149  }
  56.150 +
  56.151 +#ifndef __TEST_HARNESS__
  56.152 +
  56.153 +#include <asm/mm.h>
  56.154 +#include <asm/uaccess.h>
  56.155 +
  56.156 +int
  56.157 +x86_emulate_read_std(
  56.158 +    unsigned long addr,
  56.159 +    unsigned long *val,
  56.160 +    unsigned int bytes)
  56.161 +{
  56.162 +    *val = 0;
  56.163 +    if ( copy_from_user((void *)val, (void *)addr, bytes) )
  56.164 +    {
  56.165 +        propagate_page_fault(addr, 4); /* user mode, read fault */
  56.166 +        return X86EMUL_PROPAGATE_FAULT;
  56.167 +    }
  56.168 +    return X86EMUL_CONTINUE;
  56.169 +}
  56.170 +
  56.171 +int
  56.172 +x86_emulate_write_std(
  56.173 +    unsigned long addr,
  56.174 +    unsigned long val,
  56.175 +    unsigned int bytes)
  56.176 +{
  56.177 +    if ( copy_to_user((void *)addr, (void *)&val, bytes) )
  56.178 +    {
  56.179 +        propagate_page_fault(addr, 6); /* user mode, write fault */
  56.180 +        return X86EMUL_PROPAGATE_FAULT;
  56.181 +    }
  56.182 +    return X86EMUL_CONTINUE;
  56.183 +}
  56.184 +
  56.185 +#endif
    57.1 --- a/xen/common/dom0_ops.c	Sat Mar 12 21:43:58 2005 +0000
    57.2 +++ b/xen/common/dom0_ops.c	Fri Mar 18 03:37:54 2005 +0000
    57.3 @@ -111,13 +111,13 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    57.4      switch ( op->cmd )
    57.5      {
    57.6  
    57.7 -    case DOM0_BUILDDOMAIN:
    57.8 +    case DOM0_SETDOMAININFO:
    57.9      {
   57.10 -        struct domain *d = find_domain_by_id(op->u.builddomain.domain);
   57.11 +        struct domain *d = find_domain_by_id(op->u.setdomaininfo.domain);
   57.12          ret = -ESRCH;
   57.13          if ( d != NULL )
   57.14          {
   57.15 -            ret = final_setup_guest(d, &op->u.builddomain);
   57.16 +            ret = set_info_guest(d, &op->u.setdomaininfo);
   57.17              put_domain(d);
   57.18          }
   57.19      }
    58.1 --- a/xen/common/dom_mem_ops.c	Sat Mar 12 21:43:58 2005 +0000
    58.2 +++ b/xen/common/dom_mem_ops.c	Fri Mar 18 03:37:54 2005 +0000
    58.3 @@ -142,8 +142,6 @@ do_dom_mem_op(unsigned long  op,
    58.4      else if ( unlikely((d = find_domain_by_id(domid)) == NULL) )
    58.5          return -ESRCH;
    58.6  
    58.7 -    LOCK_BIGLOCK(d);
    58.8 -
    58.9      switch ( op )
   58.10      {
   58.11      case MEMOP_increase_reservation:
   58.12 @@ -162,8 +160,6 @@ do_dom_mem_op(unsigned long  op,
   58.13      if ( unlikely(domid != DOMID_SELF) )
   58.14          put_domain(d);
   58.15  
   58.16 -    UNLOCK_BIGLOCK(d);
   58.17 -
   58.18      return rc;
   58.19  }
   58.20  
    59.1 --- a/xen/common/domain.c	Sat Mar 12 21:43:58 2005 +0000
    59.2 +++ b/xen/common/domain.c	Fri Mar 18 03:37:54 2005 +0000
    59.3 @@ -172,7 +172,7 @@ void domain_shutdown(u8 reason)
    59.4  
    59.5          debugger_trap_immediate();
    59.6  
    59.7 -        if ( reason == 0 ) 
    59.8 +        if ( reason == SHUTDOWN_poweroff ) 
    59.9          {
   59.10              printk("Domain 0 halted: halting machine.\n");
   59.11              machine_halt();
   59.12 @@ -184,6 +184,12 @@ void domain_shutdown(u8 reason)
   59.13          }
   59.14      }
   59.15  
   59.16 +    if ( reason == SHUTDOWN_crash )
   59.17 +    {
   59.18 +        domain_crash();
   59.19 +        BUG();
   59.20 +    }
   59.21 +
   59.22      d->shutdown_code = reason;
   59.23      set_bit(DF_SHUTDOWN, &d->d_flags);
   59.24  
   59.25 @@ -256,31 +262,34 @@ void domain_destruct(struct domain *d)
   59.26  
   59.27  
   59.28  /*
   59.29 - * final_setup_guest is used for final setup and launching of domains other
   59.30 - * than domain 0. ie. the domains that are being built by the userspace dom0
   59.31 - * domain builder.
   59.32 + * set_info_guest is used for final setup, launching, and state modification 
   59.33 + * of domains other than domain 0. ie. the domains that are being built by 
   59.34 + * the userspace dom0 domain builder.
   59.35   */
   59.36 -int final_setup_guest(struct domain *p, dom0_builddomain_t *builddomain)
   59.37 +int set_info_guest(struct domain *p, dom0_setdomaininfo_t *setdomaininfo)
   59.38  {
   59.39      int rc = 0;
   59.40 -    full_execution_context_t *c;
   59.41 +    full_execution_context_t *c = NULL;
   59.42 +    unsigned long vcpu = setdomaininfo->exec_domain;
   59.43 +    struct exec_domain *ed; 
   59.44 +
   59.45 +    if ( (vcpu >= MAX_VIRT_CPUS) || ((ed = p->exec_domain[vcpu]) == NULL) )
   59.46 +        return -EINVAL;
   59.47 +    
   59.48 +    if (test_bit(DF_CONSTRUCTED, &p->d_flags) && 
   59.49 +        !test_bit(EDF_CTRLPAUSE, &ed->ed_flags))
   59.50 +        return -EINVAL;
   59.51  
   59.52      if ( (c = xmalloc(full_execution_context_t)) == NULL )
   59.53          return -ENOMEM;
   59.54  
   59.55 -    if ( test_bit(DF_CONSTRUCTED, &p->d_flags) )
   59.56 -    {
   59.57 -        rc = -EINVAL;
   59.58 -        goto out;
   59.59 -    }
   59.60 -
   59.61 -    if ( copy_from_user(c, builddomain->ctxt, sizeof(*c)) )
   59.62 +    if ( copy_from_user(c, setdomaininfo->ctxt, sizeof(*c)) )
   59.63      {
   59.64          rc = -EFAULT;
   59.65          goto out;
   59.66      }
   59.67      
   59.68 -    if ( (rc = arch_final_setup_guest(p->exec_domain[0],c)) != 0 )
   59.69 +    if ( (rc = arch_set_info_guest(ed, c)) != 0 )
   59.70          goto out;
   59.71  
   59.72      set_bit(DF_CONSTRUCTED, &p->d_flags);
   59.73 @@ -331,7 +340,7 @@ long do_boot_vcpu(unsigned long vcpu, fu
   59.74  
   59.75      sched_add_domain(ed);
   59.76  
   59.77 -    if ( (rc = arch_final_setup_guest(ed, c)) != 0 ) {
   59.78 +    if ( (rc = arch_set_info_guest(ed, c)) != 0 ) {
   59.79          sched_rem_domain(ed);
   59.80          goto out;
   59.81      }
    60.1 --- a/xen/common/elf.c	Sat Mar 12 21:43:58 2005 +0000
    60.2 +++ b/xen/common/elf.c	Fri Mar 18 03:37:54 2005 +0000
    60.3 @@ -9,6 +9,7 @@
    60.4  #include <xen/lib.h>
    60.5  #include <xen/mm.h>
    60.6  #include <xen/elf.h>
    60.7 +#include <xen/sched.h>
    60.8  
    60.9  #ifdef CONFIG_X86
   60.10  #define FORCE_XENELF_IMAGE 1
    61.1 --- a/xen/common/page_alloc.c	Sat Mar 12 21:43:58 2005 +0000
    61.2 +++ b/xen/common/page_alloc.c	Fri Mar 18 03:37:54 2005 +0000
    61.3 @@ -24,11 +24,12 @@
    61.4  #include <xen/init.h>
    61.5  #include <xen/types.h>
    61.6  #include <xen/lib.h>
    61.7 -#include <asm/page.h>
    61.8 +#include <xen/perfc.h>
    61.9  #include <xen/spinlock.h>
   61.10  #include <xen/slab.h>
   61.11  #include <xen/irq.h>
   61.12  #include <asm/domain_page.h>
   61.13 +#include <asm/page.h>
   61.14  
   61.15  /*
   61.16   * Comma-separated list of hexadecimal page numbers containing bad bytes.
    62.1 --- a/xen/common/xmalloc.c	Sat Mar 12 21:43:58 2005 +0000
    62.2 +++ b/xen/common/xmalloc.c	Fri Mar 18 03:37:54 2005 +0000
    62.3 @@ -27,10 +27,12 @@
    62.4   *     (Disadvantage is potentially greater internal fragmentation).
    62.5   */
    62.6  
    62.7 +#include <xen/config.h>
    62.8  #include <xen/mm.h>
    62.9  #include <xen/spinlock.h>
   62.10  #include <xen/ac_timer.h>
   62.11  #include <xen/cache.h>
   62.12 +#include <xen/prefetch.h>
   62.13  
   62.14  static LIST_HEAD(freelist);
   62.15  static spinlock_t freelist_lock = SPIN_LOCK_UNLOCKED;
    63.1 --- a/xen/include/asm-x86/desc.h	Sat Mar 12 21:43:58 2005 +0000
    63.2 +++ b/xen/include/asm-x86/desc.h	Fri Mar 18 03:37:54 2005 +0000
    63.3 @@ -6,18 +6,23 @@
    63.4  
    63.5  #define load_TR(n)  __asm__ __volatile__ ("ltr  %%ax" : : "a" (__TSS(n)<<3) )
    63.6  
    63.7 +#if defined(__x86_64__)
    63.8 +#define GUEST_KERNEL_RPL 3
    63.9 +#elif defined(__i386__)
   63.10 +#define GUEST_KERNEL_RPL 1
   63.11 +#endif
   63.12 +
   63.13  /*
   63.14 - * Guest OS must provide its own code selectors, or use the one we provide. The
   63.15 - * RPL must be 1, as we only create bounce frames to ring 1. Any LDT selector
   63.16 - * value is okay. Note that checking only the RPL is insufficient: if the
   63.17 - * selector is poked into an interrupt, trap or call gate then the RPL is
   63.18 - * ignored when the gate is accessed.
   63.19 + * Guest OS must provide its own code selectors, or use the one we provide. Any
   63.20 + * LDT selector value is okay. Note that checking only the RPL is insufficient:
   63.21 + * if the selector is poked into an interrupt, trap or call gate then the RPL
   63.22 + * is ignored when the gate is accessed.
   63.23   */
   63.24  #define VALID_SEL(_s)                                                      \
   63.25      (((((_s)>>3) < FIRST_RESERVED_GDT_ENTRY) ||                            \
   63.26        (((_s)>>3) >  LAST_RESERVED_GDT_ENTRY) ||                            \
   63.27        ((_s)&4)) &&                                                         \
   63.28 -     (((_s)&3) == 1))
   63.29 +     (((_s)&3) == GUEST_KERNEL_RPL))
   63.30  #define VALID_CODESEL(_s) ((_s) == FLAT_KERNEL_CS || VALID_SEL(_s))
   63.31  
   63.32  /* These are bitmasks for the high 32 bits of a descriptor table entry. */
    64.1 --- a/xen/include/asm-x86/mm.h	Sat Mar 12 21:43:58 2005 +0000
    64.2 +++ b/xen/include/asm-x86/mm.h	Fri Mar 18 03:37:54 2005 +0000
    64.3 @@ -4,18 +4,7 @@
    64.4  
    64.5  #include <xen/config.h>
    64.6  #include <xen/list.h>
    64.7 -#include <xen/spinlock.h>
    64.8 -#include <xen/perfc.h>
    64.9 -#include <xen/sched.h>
   64.10 -
   64.11 -#include <asm/processor.h>
   64.12 -#include <asm/atomic.h>
   64.13 -#include <asm/desc.h>
   64.14 -#include <asm/flushtlb.h>
   64.15  #include <asm/io.h>
   64.16 -#include <asm/uaccess.h>
   64.17 -
   64.18 -#include <public/xen.h>
   64.19  
   64.20  /*
   64.21   * Per-page-frame information.
   64.22 @@ -241,19 +230,11 @@ void synchronise_pagetables(unsigned lon
   64.23   */
   64.24  #define __phys_to_machine_mapping ((unsigned long *)RO_MPT_VIRT_START)
   64.25  
   64.26 -/* Returns the machine physical */
   64.27 -static inline unsigned long phys_to_machine_mapping(unsigned long pfn) 
   64.28 -{
   64.29 -    unsigned long mfn;
   64.30 -    l1_pgentry_t pte;
   64.31 -
   64.32 -   if (__get_user(l1_pgentry_val(pte), (__phys_to_machine_mapping + pfn)))
   64.33 -       mfn = 0;
   64.34 -   else
   64.35 -       mfn = l1_pgentry_to_phys(pte) >> PAGE_SHIFT;
   64.36 -
   64.37 -   return mfn; 
   64.38 -}
   64.39 +#define phys_to_machine_mapping(_pfn)                                      \
   64.40 +({ l1_pgentry_t l1e; unsigned long mfn;                                    \
   64.41 +   mfn = __get_user(l1_pgentry_val(l1e), &__phys_to_machine_mapping[_pfn]) \
   64.42 +       ? 0 : l1_pgentry_to_pfn(l1e);                                       \
   64.43 +   mfn; })
   64.44  #define set_machinetophys(_mfn, _pfn) machine_to_phys_mapping[(_mfn)] = (_pfn)
   64.45  
   64.46  #define DEFAULT_GDT_ENTRIES     (LAST_RESERVED_GDT_ENTRY+1)
   64.47 @@ -289,6 +270,9 @@ typedef struct {
   64.48      l1_pgentry_t *pl1e;
   64.49      /* Index in L2 page table where this L1 p.t. is always hooked. */
   64.50      unsigned int l2_idx; /* NB. Only used for PTWR_PT_ACTIVE. */
   64.51 +    /* Info about last ptwr update batch. */
   64.52 +    struct exec_domain *prev_exec_domain; /* domain making the update */
   64.53 +    unsigned int        prev_nr_updates;  /* size of update batch */
   64.54  } ptwr_ptinfo_t;
   64.55  
   64.56  typedef struct {
   64.57 @@ -336,9 +320,10 @@ void audit_domains(void);
   64.58  
   64.59  void propagate_page_fault(unsigned long addr, u16 error_code);
   64.60  
   64.61 -/* update_grant_va_mapping
   64.62 - * Caller must own d's BIGLOCK, is responsible for flushing the TLB,
   64.63 - * and have already get_page'd */
   64.64 +/*
   64.65 + * Caller must own d's BIGLOCK, is responsible for flushing the TLB, and must 
   64.66 + * hold a reference to the page.
   64.67 + */
   64.68  int update_grant_va_mapping(unsigned long va,
   64.69                              unsigned long val,
   64.70                              struct domain *d,
    65.1 --- a/xen/include/asm-x86/processor.h	Sat Mar 12 21:43:58 2005 +0000
    65.2 +++ b/xen/include/asm-x86/processor.h	Fri Mar 18 03:37:54 2005 +0000
    65.3 @@ -110,7 +110,8 @@
    65.4  #define TRAP_deferred_nmi     31
    65.5  
    65.6  /* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */
    65.7 -#define TRAP_syscall         256 /* NB. Same as ECF_IN_SYSCALL */
    65.8 +/* NB. Same as ECF_IN_SYSCALL. No bits in common with any other TRAP_* defn. */
    65.9 +#define TRAP_syscall         256
   65.10  
   65.11  /*
   65.12   * Non-fatal fault/trap handlers return an error code to the caller. If the
   65.13 @@ -128,10 +129,8 @@
   65.14  #define TBF_INTERRUPT          8
   65.15  #define TBF_FAILSAFE          16
   65.16  
   65.17 -/* arch_exec_domain' flags values */
   65.18 -#define _TF_failsafe_return    0
   65.19 -#define _TF_kernel_mode        1
   65.20 -#define TF_failsafe_return     (1<<_TF_failsafe_return)
   65.21 +/* 'arch_exec_domain' flags values */
   65.22 +#define _TF_kernel_mode        0
   65.23  #define TF_kernel_mode         (1<<_TF_kernel_mode)
   65.24  
   65.25  #ifndef __ASSEMBLY__
    66.1 --- a/xen/include/asm-x86/x86_emulate.h	Sat Mar 12 21:43:58 2005 +0000
    66.2 +++ b/xen/include/asm-x86/x86_emulate.h	Fri Mar 18 03:37:54 2005 +0000
    66.3 @@ -32,9 +32,17 @@
    66.4   *  2. If the access fails (cannot emulate, or a standard access faults) then
    66.5   *     it is up to the memop to propagate the fault to the guest VM via
    66.6   *     some out-of-band mechanism, unknown to the emulator. The memop signals
    66.7 - *     failure by returning a non-zero value to the emulator, which will then
    66.8 - *     immediately bail.
    66.9 + *     failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will
   66.10 + *     then immediately bail.
   66.11   */
   66.12 +/* Access completed successfully: continue emulation as normal. */
   66.13 +#define X86EMUL_CONTINUE        0
   66.14 +/* Access is unhandleable: bail from emulation and return error to caller. */
   66.15 +#define X86EMUL_UNHANDLEABLE    1
   66.16 +/* Terminate emulation but return success to the caller. */
   66.17 +#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
   66.18 +#define X86EMUL_RETRY_INSTR     2 /* retry the instruction for some reason */
   66.19 +#define X86EMUL_CMPXCHG_FAILED  2 /* cmpxchg did not see expected value */
   66.20  struct x86_mem_emulator
   66.21  {
   66.22      /*
   66.23 @@ -89,17 +97,26 @@ struct x86_mem_emulator
   66.24       *  @addr:  [IN ] Linear address to access.
   66.25       *  @old:   [IN ] Value expected to be current at @addr.
   66.26       *  @new:   [IN ] Value to write to @addr.
   66.27 -     *  @seen:  [OUT] Value actually seen at @addr, zero-extended to 'u_long'.
   66.28       *  @bytes: [IN ] Number of bytes to access using CMPXCHG.
   66.29       */
   66.30      int (*cmpxchg_emulated)(
   66.31          unsigned long addr,
   66.32 -        unsigned long old, 
   66.33 +        unsigned long old,
   66.34          unsigned long new,
   66.35 -        unsigned long *seen,
   66.36          unsigned int bytes);
   66.37  };
   66.38  
   66.39 +/* Standard reader/writer functions that callers may wish to use. */
   66.40 +extern int
   66.41 +x86_emulate_read_std(
   66.42 +    unsigned long addr,
   66.43 +    unsigned long *val,
   66.44 +    unsigned int bytes);
   66.45 +extern int
   66.46 +x86_emulate_write_std(
   66.47 +    unsigned long addr,
   66.48 +    unsigned long val,
   66.49 +    unsigned int bytes);
   66.50  
   66.51  struct xen_regs;
   66.52  
    67.1 --- a/xen/include/public/arch-x86_32.h	Sat Mar 12 21:43:58 2005 +0000
    67.2 +++ b/xen/include/public/arch-x86_32.h	Fri Mar 18 03:37:54 2005 +0000
    67.3 @@ -103,11 +103,8 @@ typedef struct xen_regs
    67.4      u32 edi;
    67.5      u32 ebp;
    67.6      u32 eax;
    67.7 -    u16 error_code;        /* private */
    67.8 -    union { 
    67.9 -        u16 entry_vector;  /* private */
   67.10 -        u16 flags;
   67.11 -    } PACKED;
   67.12 +    u16 error_code;    /* private */
   67.13 +    u16 entry_vector;  /* private */
   67.14      u32 eip;
   67.15      u32 cs;
   67.16      u32 eflags;
    68.1 --- a/xen/include/public/arch-x86_64.h	Sat Mar 12 21:43:58 2005 +0000
    68.2 +++ b/xen/include/public/arch-x86_64.h	Fri Mar 18 03:37:54 2005 +0000
    68.3 @@ -104,6 +104,8 @@
    68.4   *   Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
    68.5   * All other registers are saved on hypercall entry and restored to user.
    68.6   */
    68.7 +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
    68.8 +#define ECF_IN_SYSCALL (1<<8)
    68.9  struct switch_to_user {
   68.10      /* Top of stack (%rsp at point of hypercall). */
   68.11      u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
   68.12 @@ -153,12 +155,8 @@ typedef struct xen_regs
   68.13      union { u64 rdx, edx; } PACKED;
   68.14      union { u64 rsi, esi; } PACKED;
   68.15      union { u64 rdi, edi; } PACKED;
   68.16 -    u32 error_code;        /* private */
   68.17 -    union { 
   68.18 -        u32 entry_vector;  /* private */
   68.19 -#define ECF_IN_SYSCALL (1<<8) /* Guest synchronously interrupted by SYSCALL? */
   68.20 -        u32 flags;
   68.21 -    } PACKED;
   68.22 +    u32 error_code;    /* private */
   68.23 +    u32 entry_vector;  /* private */
   68.24      union { u64 rip, eip; } PACKED;
   68.25      u64 cs;
   68.26      union { u64 rflags, eflags; } PACKED;
    69.1 --- a/xen/include/public/dom0_ops.h	Sat Mar 12 21:43:58 2005 +0000
    69.2 +++ b/xen/include/public/dom0_ops.h	Fri Mar 18 03:37:54 2005 +0000
    69.3 @@ -109,16 +109,16 @@ typedef struct {
    69.4      u64      cpu_time;                /* 40 */
    69.5  } PACKED dom0_getdomaininfo_t; /* 48 bytes */
    69.6  
    69.7 -#define DOM0_BUILDDOMAIN      13
    69.8 +#define DOM0_SETDOMAININFO      13
    69.9  typedef struct {
   69.10      /* IN variables. */
   69.11 -    domid_t                 domain;   /*  0 */
   69.12 -    u16                     __pad0;   /*  2 */
   69.13 -    u32                     __pad1;   /*  4 */
   69.14 +    domid_t                   domain;       /*  0 */
   69.15 +    u16                       exec_domain;  /*  2 */
   69.16 +    u32                       __pad0;       /*  4 */
   69.17      /* IN/OUT parameters */
   69.18 -    full_execution_context_t *ctxt;   /*  8 */
   69.19 +    full_execution_context_t *ctxt;         /*  8 */
   69.20      MEMORY_PADDING;
   69.21 -} PACKED dom0_builddomain_t; /* 16 bytes */
   69.22 +} PACKED dom0_setdomaininfo_t;              /* 16 bytes */
   69.23  
   69.24  #define DOM0_IOPL             14
   69.25  typedef struct {
   69.26 @@ -426,7 +426,7 @@ typedef struct {
   69.27          dom0_getmemlist_t        getmemlist;
   69.28          dom0_schedctl_t          schedctl;
   69.29          dom0_adjustdom_t         adjustdom;
   69.30 -        dom0_builddomain_t       builddomain;
   69.31 +        dom0_setdomaininfo_t     setdomaininfo;
   69.32          dom0_getdomaininfo_t     getdomaininfo;
   69.33          dom0_getpageframeinfo_t  getpageframeinfo;
   69.34          dom0_iopl_t              iopl;
    70.1 --- a/xen/include/public/io/domain_controller.h	Sat Mar 12 21:43:58 2005 +0000
    70.2 +++ b/xen/include/public/io/domain_controller.h	Fri Mar 18 03:37:54 2005 +0000
    70.3 @@ -13,16 +13,6 @@
    70.4  #include "ring.h"
    70.5  
    70.6  /*
    70.7 - * Reason codes for SCHEDOP_shutdown. These are opaque to Xen but may be
    70.8 - * interpreted by control software to determine the appropriate action. These 
    70.9 - * are only really advisories: the controller can actually do as it likes.
   70.10 - */
   70.11 -#define SHUTDOWN_poweroff   0  /* Domain exited normally. Clean up and kill. */
   70.12 -#define SHUTDOWN_reboot     1  /* Clean up, kill, and then restart.          */
   70.13 -#define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */
   70.14 -
   70.15 -
   70.16 -/*
   70.17   * CONTROLLER MESSAGING INTERFACE.
   70.18   */
   70.19  
    71.1 --- a/xen/include/public/xen.h	Sat Mar 12 21:43:58 2005 +0000
    71.2 +++ b/xen/include/public/xen.h	Fri Mar 18 03:37:54 2005 +0000
    71.3 @@ -191,6 +191,16 @@
    71.4  #define SCHEDOP_reasonshift     8   /* 8-bit reason code. (SCHEDOP_shutdown) */
    71.5  
    71.6  /*
    71.7 + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control 
    71.8 + * software to determine the appropriate action. For the most part, Xen does
    71.9 + * not care about the shutdown code (SHUTDOWN_crash excepted).
   71.10 + */
   71.11 +#define SHUTDOWN_poweroff   0  /* Domain exited normally. Clean up and kill. */
   71.12 +#define SHUTDOWN_reboot     1  /* Clean up, kill, and then restart.          */
   71.13 +#define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */
   71.14 +#define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */
   71.15 +
   71.16 +/*
   71.17   * Commands to HYPERVISOR_console_io().
   71.18   */
   71.19  #define CONSOLEIO_write         0
    72.1 --- a/xen/include/xen/domain.h	Sat Mar 12 21:43:58 2005 +0000
    72.2 +++ b/xen/include/xen/domain.h	Fri Mar 18 03:37:54 2005 +0000
    72.3 @@ -18,7 +18,7 @@ extern void arch_do_createdomain(struct 
    72.4  
    72.5  extern void arch_do_boot_vcpu(struct exec_domain *ed);
    72.6  
    72.7 -extern int  arch_final_setup_guest(
    72.8 +extern int  arch_set_info_guest(
    72.9      struct exec_domain *d, full_execution_context_t *c);
   72.10  
   72.11  extern void free_perdomain_pt(struct domain *d);
    73.1 --- a/xen/include/xen/irq_cpustat.h	Sat Mar 12 21:43:58 2005 +0000
    73.2 +++ b/xen/include/xen/irq_cpustat.h	Fri Mar 18 03:37:54 2005 +0000
    73.3 @@ -20,11 +20,7 @@
    73.4  
    73.5  extern irq_cpustat_t irq_stat[];
    73.6  
    73.7 -#ifdef CONFIG_SMP
    73.8  #define __IRQ_STAT(cpu, member)	(irq_stat[cpu].member)
    73.9 -#else
   73.10 -#define __IRQ_STAT(cpu, member)	((void)(cpu), irq_stat[0].member)
   73.11 -#endif	
   73.12  
   73.13    /* arch independent irq_stat fields */
   73.14  #define softirq_pending(cpu)	__IRQ_STAT((cpu), __softirq_pending)
    74.1 --- a/xen/include/xen/perfc_defn.h	Sat Mar 12 21:43:58 2005 +0000
    74.2 +++ b/xen/include/xen/perfc_defn.h	Fri Mar 18 03:37:54 2005 +0000
    74.3 @@ -20,6 +20,7 @@ PERFCOUNTER_CPU( calls_to_update_va, "ca
    74.4  PERFCOUNTER_CPU( page_faults, "page faults" )
    74.5  PERFCOUNTER_CPU( copy_user_faults, "copy_user faults" )
    74.6  PERFCOUNTER_CPU( map_domain_mem_count, "map_domain_mem count" )
    74.7 +PERFCOUNTER_CPU( ptwr_emulations, "writable pt emulations" )
    74.8  
    74.9  PERFCOUNTER_CPU( shadow_l2_table_count, "shadow_l2_table count" )
   74.10  PERFCOUNTER_CPU( shadow_l1_table_count, "shadow_l1_table count" )
    75.1 --- a/xen/include/xen/sched.h	Sat Mar 12 21:43:58 2005 +0000
    75.2 +++ b/xen/include/xen/sched.h	Fri Mar 18 03:37:54 2005 +0000
    75.3 @@ -22,6 +22,7 @@
    75.4  #include <xen/grant_table.h>
    75.5  #include <asm/hardirq.h>
    75.6  #include <asm/domain.h>
    75.7 +#include <asm/bitops.h>
    75.8  
    75.9  extern unsigned long volatile jiffies;
   75.10  extern rwlock_t domlist_lock;
   75.11 @@ -88,19 +89,9 @@ struct exec_domain
   75.12      struct arch_exec_domain arch;
   75.13  };
   75.14  
   75.15 -/*
   75.16 -** SMH: do_mmu_update() grabs big_lock and subsequently can fault 
   75.17 -** on map_ldt_shadow_page(), enter do_page_fault() and then deadlock 
   75.18 -** trying to reacquire big_lock. A temporary fix is to make big_lock
   75.19 -** recursive; overall probably needs more thought. 
   75.20 -*/
   75.21 -#if 0
   75.22 -#define LOCK_BIGLOCK(_d) spin_lock(&(_d)->big_lock)
   75.23 -#define UNLOCK_BIGLOCK(_d) spin_unlock(&(_d)->big_lock)
   75.24 -#else
   75.25 +/* Per-domain lock can be recursively acquired in fault handlers. */
   75.26  #define LOCK_BIGLOCK(_d) spin_lock_recursive(&(_d)->big_lock)
   75.27  #define UNLOCK_BIGLOCK(_d) spin_unlock_recursive(&(_d)->big_lock)
   75.28 -#endif
   75.29  
   75.30  struct domain
   75.31  {
   75.32 @@ -220,7 +211,7 @@ extern int construct_dom0(
   75.33      unsigned long image_start, unsigned long image_len, 
   75.34      unsigned long initrd_start, unsigned long initrd_len,
   75.35      char *cmdline);
   75.36 -extern int final_setup_guest(struct domain *d, dom0_builddomain_t *);
   75.37 +extern int set_info_guest(struct domain *d, dom0_setdomaininfo_t *);
   75.38  
   75.39  struct domain *find_domain_by_id(domid_t dom);
   75.40  struct domain *find_last_domain(void);
   75.41 @@ -317,6 +308,7 @@ extern struct domain *domain_list;
   75.42  #define EDF_RUNNING     12 /* Currently running on a CPU.                    */
   75.43  #define EDF_CPUPINNED   13 /* Disables auto-migration.                       */
   75.44  #define EDF_MIGRATED    14 /* Domain migrated between CPUs.                  */
   75.45 +#define EDF_DONEINIT    15 /* Initialization completed    .                  */
   75.46  
   75.47  static inline int domain_runnable(struct exec_domain *d)
   75.48  {