ia64/xen-unstable

changeset 11256:79afceca9065

merge with xen-unstable.hg
author awilliam@xenbuild.aw
date Wed Aug 23 11:11:27 2006 -0600 (2006-08-23)
parents 91169603a8e8 0bb18319b8a0
children 8293018c1a24
files extras/mini-os/include/hypercall-x86_32.h extras/mini-os/include/hypercall-x86_64.h extras/mini-os/include/os.h extras/mini-os/include/spinlock.h extras/mini-os/include/traps.h extras/mini-os/traps.c linux-2.6-xen-sparse/arch/ia64/kernel/setup.c linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c tools/examples/xmexample.vti tools/ioemu/patches/qemu-fix-write-to-disk-synchronous tools/libxc/xc_ppc_linux_build.c xen/arch/ia64/vmx/mmio.c xen/arch/ia64/xen/domain.c xen/arch/ia64/xen/hyperprivop.S xen/arch/ia64/xen/mm.c xen/arch/ia64/xen/vcpu.c xen/arch/ia64/xen/xenasm.S xen/arch/x86/audit.c xen/arch/x86/shadow.c xen/arch/x86/shadow32.c xen/arch/x86/shadow_guest32.c xen/arch/x86/shadow_guest32pae.c xen/arch/x86/shadow_public.c xen/include/asm-ia64/config.h xen/include/asm-ia64/domain.h xen/include/asm-ia64/linux-null/linux/kallsyms.h xen/include/asm-ia64/linux-null/linux/workqueue.h xen/include/asm-ia64/privop_stat.h xen/include/asm-ia64/vmx_vpd.h xen/include/asm-x86/shadow_64.h xen/include/asm-x86/shadow_ops.h xen/include/asm-x86/shadow_public.h
line diff
     1.1 --- a/.hgignore	Tue Aug 22 14:45:49 2006 -0600
     1.2 +++ b/.hgignore	Wed Aug 23 11:11:27 2006 -0600
     1.3 @@ -151,7 +151,7 @@
     1.4  ^tools/vtpm_manager/manager/vtpm_managerd$
     1.5  ^tools/xcutils/xc_restore$
     1.6  ^tools/xcutils/xc_save$
     1.7 -^tools/xenmon/setmask$
     1.8 +^tools/xenmon/xentrace_setmask$
     1.9  ^tools/xenmon/xenbaked$
    1.10  ^tools/xenstat/xentop/xentop$
    1.11  ^tools/xenstore/testsuite/tmp/.*$
    1.12 @@ -172,7 +172,7 @@
    1.13  ^tools/xenstore/xs_tdb_dump$
    1.14  ^tools/xenstore/xs_test$
    1.15  ^tools/xenstore/xs_watch_stress$
    1.16 -^tools/xentrace/setsize$
    1.17 +^tools/xentrace/xentrace_setsize$
    1.18  ^tools/xentrace/tbctl$
    1.19  ^tools/xentrace/xenctx$
    1.20  ^tools/xentrace/xentrace$
    1.21 @@ -197,7 +197,12 @@
    1.22  ^xen/xen$
    1.23  ^xen/xen-syms$
    1.24  ^xen/xen\..*$
    1.25 -^xen/arch/ppc/dom0\.bin$
    1.26 -^xen/arch/ppc/asm-offsets\.s$
    1.27 -^xen/arch/ppc/firmware
    1.28 -^xen/arch/ppc/firmware_image
    1.29 +^xen/arch/powerpc/dom0\.bin$
    1.30 +^xen/arch/powerpc/asm-offsets\.s$
    1.31 +^xen/arch/powerpc/firmware$
    1.32 +^xen/arch/powerpc/firmware_image$
    1.33 +^xen/arch/powerpc/xen\.lds$
    1.34 +^unmodified_drivers/linux-2.6/\.tmp_versions
    1.35 +^unmodified_drivers/linux-2.6/.*\.cmd$
    1.36 +^unmodified_drivers/linux-2.6/.*\.ko$
    1.37 +^unmodified_drivers/linux-2.6/.*\.mod\.c$
     2.1 --- a/buildconfigs/Rules.mk	Tue Aug 22 14:45:49 2006 -0600
     2.2 +++ b/buildconfigs/Rules.mk	Wed Aug 23 11:11:27 2006 -0600
     2.3 @@ -63,8 +63,12 @@ ref-%/.valid-ref: pristine-%/.valid-pris
     2.4  	set -e
     2.5  	rm -rf $(@D)
     2.6  	cp -al $(<D) $(@D)
     2.7 -	if [ -d patches/$* ] ; then \
     2.8 -	    for i in patches/$*/*.patch ; do patch -d $(@D) -p1 <$$i || exit 1 ; done ; \
     2.9 +	if [ -d patches/$* ] ; then                                    \
    2.10 +	    echo Applying patches from patches/$*... ;                 \
    2.11 +	    for i in $$(cat patches/$*/series) ; do                    \
    2.12 +	        echo ... $$i ;                                         \
    2.13 +	        patch -d $(@D) -p1 --quiet <patches/$*/$$i || exit 1 ; \
    2.14 +	     done ;                                                    \
    2.15  	fi
    2.16  	touch $@ # update timestamp to avoid rebuild
    2.17  endif
     3.1 --- a/docs/src/user.tex	Tue Aug 22 14:45:49 2006 -0600
     3.2 +++ b/docs/src/user.tex	Wed Aug 23 11:11:27 2006 -0600
     3.3 @@ -1090,6 +1090,9 @@ The \path{xm list} command also supports
     3.4  \path{-l} switch is used.  This outputs the full details of the
     3.5  running domains in \xend's SXP configuration format.
     3.6  
     3.7 +If you want to know how long your domains have been running for, then 
     3.8 +you can use the \verb_# xm uptime_ command.
     3.9 +
    3.10  
    3.11  You can get access to the console of a particular domain using 
    3.12  the \verb_# xm console_ command  (e.g.\ \verb_# xm console myVM_). 
    3.13 @@ -3126,8 +3129,8 @@ editing \path{grub.conf}.
    3.14  \item [ console=$<$specifier list$>$ ] Specify the destination for Xen
    3.15    console I/O.  This is a comma-separated list of, for example:
    3.16    \begin{description}
    3.17 -  \item[ vga ] Use VGA console (only until domain 0 boots, unless {\bf
    3.18 -  vga[keep] } is specified).
    3.19 +  \item[ vga ] Use VGA console (until domain 0 boots, unless {\bf
    3.20 +  vga=keep } is specified).
    3.21    \item[ com1 ] Use serial port com1.
    3.22    \item[ com2H ] Use serial port com2. Transmitted chars will have the
    3.23      MSB set. Received chars must have MSB set.
    3.24 @@ -3138,6 +3141,12 @@ editing \path{grub.conf}.
    3.25    subsystems (e.g.\ console and debugger). Sharing is controlled by
    3.26    MSB of each transmitted/received character.  [NB. Default for this
    3.27    option is `com1,vga']
    3.28 +\item [ vga=$<$options$>$ ] This is a comma-separated list of options:
    3.29 +  \begin{description}
    3.30 +  \item[ text-$<$mode$>$ ] Select text-mode resolution, where mode is
    3.31 +  one of 80x25, 80x28, 80x30, 80x34, 80x43, 80x50, 80x60.
    3.32 +  \item[ keep ] Keep the VGA console even after domain 0 boots.
    3.33 +  \end{description}
    3.34  \item [ sync\_console ] Force synchronous console output. This is
    3.35    useful if you system fails unexpectedly before it has sent all
    3.36    available output to the console. In most cases Xen will
     4.1 --- a/extras/mini-os/Makefile	Tue Aug 22 14:45:49 2006 -0600
     4.2 +++ b/extras/mini-os/Makefile	Wed Aug 23 11:11:27 2006 -0600
     4.3 @@ -11,26 +11,54 @@ override TARGET_ARCH     := $(XEN_TARGET
     4.4  CFLAGS := -fno-builtin -Wall -Werror -Wredundant-decls -Wno-format
     4.5  CFLAGS += -Wstrict-prototypes -Wnested-externs -Wpointer-arith -Winline
     4.6  
     4.7 -override CPPFLAGS := -Iinclude $(CPPFLAGS)
     4.8  ASFLAGS = -D__ASSEMBLY__
     4.9  
    4.10  LDLIBS =  -L. -lminios
    4.11  LDFLAGS := -N -T minios-$(TARGET_ARCH).lds
    4.12  
    4.13 +# For possible special source directories.
    4.14 +EXTRA_SRC =
    4.15 +# For possible special header directories.
    4.16 +EXTRA_INC =
    4.17 +
    4.18 +# Standard name for architecture specific subdirectories.
    4.19 +TARGET_ARCH_DIR = $(TARGET_ARCH)
    4.20 +# This is used for architecture specific links.
    4.21 +ARCH_LINKS =
    4.22 +
    4.23  ifeq ($(TARGET_ARCH),x86_32)
    4.24  CFLAGS += -m32 -march=i686
    4.25  LDFLAGS += -m elf_i386
    4.26 +TARGET_ARCH_DIR = x86
    4.27 +EXTRA_INC += $(TARGET_ARCH_DIR)/$(TARGET_ARCH)
    4.28 +EXTRA_SRC += arch/$(EXTRA_INC)
    4.29  endif
    4.30  
    4.31  ifeq ($(TARGET_ARCH)$(pae),x86_32y)
    4.32  CFLAGS  += -DCONFIG_X86_PAE=1
    4.33  ASFLAGS += -DCONFIG_X86_PAE=1
    4.34 +TARGET_ARCH_DIR = x86
    4.35 +EXTRA_INC += $(TARGET_ARCH_DIR)/$(TARGET_ARCH)
    4.36 +EXTRA_SRC += arch/$(EXTRA_INC)
    4.37  endif
    4.38  
    4.39  ifeq ($(TARGET_ARCH),x86_64)
    4.40  CFLAGS += -m64 -mno-red-zone -fpic -fno-reorder-blocks
    4.41  CFLAGS += -fno-asynchronous-unwind-tables
    4.42  LDFLAGS += -m elf_x86_64
    4.43 +TARGET_ARCH_DIR = x86
    4.44 +EXTRA_INC += $(TARGET_ARCH_DIR)/$(TARGET_ARCH)
    4.45 +EXTRA_SRC += arch/$(EXTRA_INC)
    4.46 +endif
    4.47 +
    4.48 +ifeq ($(TARGET_ARCH),ia64)
    4.49 +CFLAGS += -mfixed-range=f12-f15,f32-f127
    4.50 +ASFLAGS += -x assembler-with-cpp -ansi -Wall
    4.51 +ASFLAGS += -mfixed-range=f12-f15,f32-f127
    4.52 +ARCH_LINKS = IA64_LINKS		# Special link on ia64 needed
    4.53 +define arch_links
    4.54 +[ -e include/ia64/asm-xsi-offsets.h ] || ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/ia64/asm-xsi-offsets.h
    4.55 +endef
    4.56  endif
    4.57  
    4.58  ifeq ($(debug),y)
    4.59 @@ -39,6 +67,10 @@ else
    4.60  CFLAGS += -O3
    4.61  endif
    4.62  
    4.63 +# Add the special header directories to the include paths.
    4.64 +extra_incl := $(foreach dir,$(EXTRA_INC),-Iinclude/$(dir))
    4.65 +override CPPFLAGS := -Iinclude $(CPPFLAGS) -Iinclude/$(TARGET_ARCH_DIR)	$(extra_incl)
    4.66 +
    4.67  TARGET := mini-os
    4.68  
    4.69  HEAD := $(TARGET_ARCH).o
    4.70 @@ -46,15 +78,32 @@ OBJS := $(patsubst %.c,%.o,$(wildcard *.
    4.71  OBJS += $(patsubst %.c,%.o,$(wildcard lib/*.c))
    4.72  OBJS += $(patsubst %.c,%.o,$(wildcard xenbus/*.c))
    4.73  OBJS += $(patsubst %.c,%.o,$(wildcard console/*.c))
    4.74 +OBJS += $(patsubst %.S,%.o,$(wildcard arch/$(TARGET_ARCH_DIR)/*.S))
    4.75 +OBJS += $(patsubst %.c,%.o,$(wildcard arch/$(TARGET_ARCH_DIR)/*.c))
    4.76 +# For special wanted source directories.
    4.77 +extra_objs := $(foreach dir,$(EXTRA_SRC),$(patsubst %.c,%.o,$(wildcard $(dir)/*.c)))
    4.78 +OBJS += $(extra_objs)
    4.79 +extra_objs := $(foreach dir,$(EXTRA_SRC),$(patsubst %.S,%.o,$(wildcard $(dir)/*.S)))
    4.80 +OBJS += $(extra_objs)
    4.81  
    4.82  HDRS := $(wildcard include/*.h)
    4.83  HDRS += $(wildcard include/xen/*.h)
    4.84 +HDRS += $(wildcard include/$(TARGET_ARCH_DIR)/*.h)
    4.85 +# For special wanted header directories.
    4.86 +extra_heads := $(foreach dir,$(EXTRA_INC),$(wildcard $(dir)/*.h))
    4.87 +HDRS += $(extra_heads)
    4.88  
    4.89  .PHONY: default
    4.90  default: $(TARGET)
    4.91  
    4.92 +# Create special architecture specific links.
    4.93 +ifneq ($(ARCH_LINKS),)
    4.94 +$(ARCH_LINKS):
    4.95 +	$(arch_links)
    4.96 +endif
    4.97 +
    4.98  .PHONY: links
    4.99 -links:
   4.100 +links:	$(ARCH_LINKS)
   4.101  	[ -e include/xen ] || ln -sf ../../../xen/include/public include/xen
   4.102  
   4.103  libminios.a: links $(OBJS) $(HEAD)
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/extras/mini-os/arch/x86/traps.c	Wed Aug 23 11:11:27 2006 -0600
     5.3 @@ -0,0 +1,229 @@
     5.4 +
     5.5 +#include <os.h>
     5.6 +#include <traps.h>
     5.7 +#include <hypervisor.h>
     5.8 +#include <mm.h>
     5.9 +#include <lib.h>
    5.10 +#include <sched.h>
    5.11 +
    5.12 +/*
    5.13 + * These are assembler stubs in entry.S.
    5.14 + * They are the actual entry points for virtual exceptions.
    5.15 + */
    5.16 +void divide_error(void);
    5.17 +void debug(void);
    5.18 +void int3(void);
    5.19 +void overflow(void);
    5.20 +void bounds(void);
    5.21 +void invalid_op(void);
    5.22 +void device_not_available(void);
    5.23 +void coprocessor_segment_overrun(void);
    5.24 +void invalid_TSS(void);
    5.25 +void segment_not_present(void);
    5.26 +void stack_segment(void);
    5.27 +void general_protection(void);
    5.28 +void page_fault(void);
    5.29 +void coprocessor_error(void);
    5.30 +void simd_coprocessor_error(void);
    5.31 +void alignment_check(void);
    5.32 +void spurious_interrupt_bug(void);
    5.33 +void machine_check(void);
    5.34 +
    5.35 +
    5.36 +void dump_regs(struct pt_regs *regs)
    5.37 +{
    5.38 +    printk("Thread: %s\n", current->name);
    5.39 +#ifdef __i386__    
    5.40 +    printk("EIP: %x, EFLAGS %x.\n", regs->eip, regs->eflags);
    5.41 +    printk("EBX: %08x ECX: %08x EDX: %08x\n",
    5.42 +	   regs->ebx, regs->ecx, regs->edx);
    5.43 +    printk("ESI: %08x EDI: %08x EBP: %08x EAX: %08x\n",
    5.44 +	   regs->esi, regs->edi, regs->ebp, regs->eax);
    5.45 +    printk("DS: %04x ES: %04x orig_eax: %08x, eip: %08x\n",
    5.46 +	   regs->xds, regs->xes, regs->orig_eax, regs->eip);
    5.47 +    printk("CS: %04x EFLAGS: %08x esp: %08x ss: %04x\n",
    5.48 +	   regs->xcs, regs->eflags, regs->esp, regs->xss);
    5.49 +#else
    5.50 +    printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
    5.51 +    printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", 
    5.52 +           regs->ss, regs->rsp, regs->eflags);
    5.53 +    printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
    5.54 +           regs->rax, regs->rbx, regs->rcx);
    5.55 +    printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
    5.56 +           regs->rdx, regs->rsi, regs->rdi); 
    5.57 +    printk("RBP: %016lx R08: %016lx R09: %016lx\n",
    5.58 +           regs->rbp, regs->r8, regs->r9); 
    5.59 +    printk("R10: %016lx R11: %016lx R12: %016lx\n",
    5.60 +           regs->r10, regs->r11, regs->r12); 
    5.61 +    printk("R13: %016lx R14: %016lx R15: %016lx\n",
    5.62 +           regs->r13, regs->r14, regs->r15); 
    5.63 +#endif
    5.64 +}
    5.65 +
    5.66 +static void do_trap(int trapnr, char *str, struct pt_regs * regs, unsigned long error_code)
    5.67 +{
    5.68 +    printk("FATAL:  Unhandled Trap %d (%s), error code=0x%lx\n", trapnr, str, error_code);
    5.69 +    printk("Regs address %p\n", regs);
    5.70 +    dump_regs(regs);
    5.71 +    do_exit();
    5.72 +}
    5.73 +
    5.74 +#define DO_ERROR(trapnr, str, name) \
    5.75 +void do_##name(struct pt_regs * regs, unsigned long error_code) \
    5.76 +{ \
    5.77 +	do_trap(trapnr, str, regs, error_code); \
    5.78 +}
    5.79 +
    5.80 +#define DO_ERROR_INFO(trapnr, str, name, sicode, siaddr) \
    5.81 +void do_##name(struct pt_regs * regs, unsigned long error_code) \
    5.82 +{ \
    5.83 +	do_trap(trapnr, str, regs, error_code); \
    5.84 +}
    5.85 +
    5.86 +DO_ERROR_INFO( 0, "divide error", divide_error, FPE_INTDIV, regs->eip)
    5.87 +DO_ERROR( 3, "int3", int3)
    5.88 +DO_ERROR( 4, "overflow", overflow)
    5.89 +DO_ERROR( 5, "bounds", bounds)
    5.90 +DO_ERROR_INFO( 6, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
    5.91 +DO_ERROR( 7, "device not available", device_not_available)
    5.92 +DO_ERROR( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
    5.93 +DO_ERROR(10, "invalid TSS", invalid_TSS)
    5.94 +DO_ERROR(11, "segment not present", segment_not_present)
    5.95 +DO_ERROR(12, "stack segment", stack_segment)
    5.96 +DO_ERROR_INFO(17, "alignment check", alignment_check, BUS_ADRALN, 0)
    5.97 +DO_ERROR(18, "machine check", machine_check)
    5.98 +
    5.99 +void page_walk(unsigned long virt_address)
   5.100 +{
   5.101 +        pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
   5.102 +        unsigned long addr = virt_address;
   5.103 +        printk("Pagetable walk from virt %lx, base %lx:\n", virt_address, start_info.pt_base);
   5.104 +    
   5.105 +#if defined(__x86_64__)
   5.106 +        page = tab[l4_table_offset(addr)];
   5.107 +        tab = pte_to_virt(page);
   5.108 +        printk(" L4 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, l4_table_offset(addr));
   5.109 +#endif
   5.110 +#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
   5.111 +        page = tab[l3_table_offset(addr)];
   5.112 +        tab = pte_to_virt(page);
   5.113 +        printk("  L3 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, l3_table_offset(addr));
   5.114 +#endif
   5.115 +        page = tab[l2_table_offset(addr)];
   5.116 +        tab = pte_to_virt(page);
   5.117 +        printk("   L2 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, l2_table_offset(addr));
   5.118 +        
   5.119 +        page = tab[l1_table_offset(addr)];
   5.120 +        printk("    L1 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, l1_table_offset(addr));
   5.121 +
   5.122 +}
   5.123 +
   5.124 +#define read_cr2() \
   5.125 +        (HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].arch.cr2)
   5.126 +
   5.127 +static int handling_pg_fault = 0;
   5.128 +
   5.129 +void do_page_fault(struct pt_regs *regs, unsigned long error_code)
   5.130 +{
   5.131 +    unsigned long addr = read_cr2();
   5.132 +    /* If we are already handling a page fault, and got another one
   5.133 +       that means we faulted in pagetable walk. Continuing here would cause
   5.134 +       a recursive fault */       
   5.135 +    if(handling_pg_fault) 
   5.136 +    {
   5.137 +        printk("Page fault in pagetable walk (access to invalid memory?).\n"); 
   5.138 +        do_exit();
   5.139 +    }
   5.140 +    handling_pg_fault = 1;
   5.141 +
   5.142 +#if defined(__x86_64__)
   5.143 +    printk("Page fault at linear address %p, rip %p, code %lx\n",
   5.144 +           addr, regs->rip, error_code);
   5.145 +#else
   5.146 +    printk("Page fault at linear address %p, eip %p, code %lx\n",
   5.147 +           addr, regs->eip, error_code);
   5.148 +#endif
   5.149 +
   5.150 +    dump_regs(regs);
   5.151 +    page_walk(addr);
   5.152 +    do_exit();
   5.153 +    /* We should never get here ... but still */
   5.154 +    handling_pg_fault = 0;
   5.155 +}
   5.156 +
   5.157 +void do_general_protection(struct pt_regs *regs, long error_code)
   5.158 +{
   5.159 +#ifdef __i386__
   5.160 +    printk("GPF eip: %p, error_code=%lx\n", regs->eip, error_code);
   5.161 +#else    
   5.162 +    printk("GPF rip: %p, error_code=%lx\n", regs->rip, error_code);
   5.163 +#endif
   5.164 +    dump_regs(regs);
   5.165 +    do_exit();
   5.166 +}
   5.167 +
   5.168 +
   5.169 +void do_debug(struct pt_regs * regs)
   5.170 +{
   5.171 +    printk("Debug exception\n");
   5.172 +#define TF_MASK 0x100
   5.173 +    regs->eflags &= ~TF_MASK;
   5.174 +    dump_regs(regs);
   5.175 +    do_exit();
   5.176 +}
   5.177 +
   5.178 +void do_coprocessor_error(struct pt_regs * regs)
   5.179 +{
   5.180 +    printk("Copro error\n");
   5.181 +    dump_regs(regs);
   5.182 +    do_exit();
   5.183 +}
   5.184 +
   5.185 +void simd_math_error(void *eip)
   5.186 +{
   5.187 +    printk("SIMD error\n");
   5.188 +}
   5.189 +
   5.190 +void do_simd_coprocessor_error(struct pt_regs * regs)
   5.191 +{
   5.192 +    printk("SIMD copro error\n");
   5.193 +}
   5.194 +
   5.195 +void do_spurious_interrupt_bug(struct pt_regs * regs)
   5.196 +{
   5.197 +}
   5.198 +
   5.199 +/*
   5.200 + * Submit a virtual IDT to teh hypervisor. This consists of tuples
   5.201 + * (interrupt vector, privilege ring, CS:EIP of handler).
   5.202 + * The 'privilege ring' field specifies the least-privileged ring that
   5.203 + * can trap to that vector using a software-interrupt instruction (INT).
   5.204 + */
   5.205 +static trap_info_t trap_table[] = {
   5.206 +    {  0, 0, __KERNEL_CS, (unsigned long)divide_error                },
   5.207 +    {  1, 0, __KERNEL_CS, (unsigned long)debug                       },
   5.208 +    {  3, 3, __KERNEL_CS, (unsigned long)int3                        },
   5.209 +    {  4, 3, __KERNEL_CS, (unsigned long)overflow                    },
   5.210 +    {  5, 3, __KERNEL_CS, (unsigned long)bounds                      },
   5.211 +    {  6, 0, __KERNEL_CS, (unsigned long)invalid_op                  },
   5.212 +    {  7, 0, __KERNEL_CS, (unsigned long)device_not_available        },
   5.213 +    {  9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
   5.214 +    { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS                 },
   5.215 +    { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present         },
   5.216 +    { 12, 0, __KERNEL_CS, (unsigned long)stack_segment               },
   5.217 +    { 13, 0, __KERNEL_CS, (unsigned long)general_protection          },
   5.218 +    { 14, 0, __KERNEL_CS, (unsigned long)page_fault                  },
   5.219 +    { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug      },
   5.220 +    { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error           },
   5.221 +    { 17, 0, __KERNEL_CS, (unsigned long)alignment_check             },
   5.222 +    { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error      },
   5.223 +    {  0, 0,           0, 0                           }
   5.224 +};
   5.225 +    
   5.226 +
   5.227 +
   5.228 +void trap_init(void)
   5.229 +{
   5.230 +    HYPERVISOR_set_trap_table(trap_table);    
   5.231 +}
   5.232 +
     6.1 --- a/extras/mini-os/console/xencons_ring.c	Tue Aug 22 14:45:49 2006 -0600
     6.2 +++ b/extras/mini-os/console/xencons_ring.c	Wed Aug 23 11:11:27 2006 -0600
     6.3 @@ -14,13 +14,13 @@
     6.4  
     6.5  static inline struct xencons_interface *xencons_interface(void)
     6.6  {
     6.7 -    return mfn_to_virt(start_info.console_mfn);
     6.8 +    return mfn_to_virt(start_info.console.domU.mfn);
     6.9  }
    6.10  
    6.11  static inline void notify_daemon(void)
    6.12  {
    6.13      /* Use evtchn: this is called early, before irq is set up. */
    6.14 -    notify_remote_via_evtchn(start_info.console_evtchn);
    6.15 +    notify_remote_via_evtchn(start_info.console.domU.evtchn);
    6.16  }
    6.17  
    6.18  int xencons_ring_send_no_notify(const char *data, unsigned len)
    6.19 @@ -80,10 +80,10 @@ int xencons_ring_init(void)
    6.20  {
    6.21  	int err;
    6.22  
    6.23 -	if (!start_info.console_evtchn)
    6.24 +	if (!start_info.console.domU.evtchn)
    6.25  		return 0;
    6.26  
    6.27 -	err = bind_evtchn(start_info.console_evtchn, handle_input,
    6.28 +	err = bind_evtchn(start_info.console.domU.evtchn, handle_input,
    6.29  			  NULL);
    6.30  	if (err <= 0) {
    6.31  		printk("XEN console request chn bind failed %i\n", err);
     7.1 --- a/extras/mini-os/include/hypercall-x86_32.h	Tue Aug 22 14:45:49 2006 -0600
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,326 +0,0 @@
     7.4 -/******************************************************************************
     7.5 - * hypercall-x86_32.h
     7.6 - * 
     7.7 - * Copied from XenLinux.
     7.8 - * 
     7.9 - * Copyright (c) 2002-2004, K A Fraser
    7.10 - * 
    7.11 - * This file may be distributed separately from the Linux kernel, or
    7.12 - * incorporated into other software packages, subject to the following license:
    7.13 - * 
    7.14 - * Permission is hereby granted, free of charge, to any person obtaining a copy
    7.15 - * of this source file (the "Software"), to deal in the Software without
    7.16 - * restriction, including without limitation the rights to use, copy, modify,
    7.17 - * merge, publish, distribute, sublicense, and/or sell copies of the Software,
    7.18 - * and to permit persons to whom the Software is furnished to do so, subject to
    7.19 - * the following conditions:
    7.20 - * 
    7.21 - * The above copyright notice and this permission notice shall be included in
    7.22 - * all copies or substantial portions of the Software.
    7.23 - * 
    7.24 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    7.25 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    7.26 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    7.27 - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    7.28 - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    7.29 - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    7.30 - * IN THE SOFTWARE.
    7.31 - */
    7.32 -
    7.33 -#ifndef __HYPERCALL_X86_32_H__
    7.34 -#define __HYPERCALL_X86_32_H__
    7.35 -
    7.36 -#include <xen/xen.h>
    7.37 -#include <xen/sched.h>
    7.38 -#include <xen/nmi.h>
    7.39 -#include <mm.h>
    7.40 -
    7.41 -#define __STR(x) #x
    7.42 -#define STR(x) __STR(x)
    7.43 -
    7.44 -extern char hypercall_page[PAGE_SIZE];
    7.45 -
    7.46 -#define _hypercall0(type, name)			\
    7.47 -({						\
    7.48 -	long __res;				\
    7.49 -	asm volatile (				\
    7.50 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
    7.51 -		: "=a" (__res)			\
    7.52 -		:				\
    7.53 -		: "memory" );			\
    7.54 -	(type)__res;				\
    7.55 -})
    7.56 -
    7.57 -#define _hypercall1(type, name, a1)				\
    7.58 -({								\
    7.59 -	long __res, __ign1;					\
    7.60 -	asm volatile (						\
    7.61 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
    7.62 -		: "=a" (__res), "=b" (__ign1)			\
    7.63 -		: "1" ((long)(a1))				\
    7.64 -		: "memory" );					\
    7.65 -	(type)__res;						\
    7.66 -})
    7.67 -
    7.68 -#define _hypercall2(type, name, a1, a2)				\
    7.69 -({								\
    7.70 -	long __res, __ign1, __ign2;				\
    7.71 -	asm volatile (						\
    7.72 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
    7.73 -		: "=a" (__res), "=b" (__ign1), "=c" (__ign2)	\
    7.74 -		: "1" ((long)(a1)), "2" ((long)(a2))		\
    7.75 -		: "memory" );					\
    7.76 -	(type)__res;						\
    7.77 -})
    7.78 -
    7.79 -#define _hypercall3(type, name, a1, a2, a3)			\
    7.80 -({								\
    7.81 -	long __res, __ign1, __ign2, __ign3;			\
    7.82 -	asm volatile (						\
    7.83 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
    7.84 -		: "=a" (__res), "=b" (__ign1), "=c" (__ign2), 	\
    7.85 -		"=d" (__ign3)					\
    7.86 -		: "1" ((long)(a1)), "2" ((long)(a2)),		\
    7.87 -		"3" ((long)(a3))				\
    7.88 -		: "memory" );					\
    7.89 -	(type)__res;						\
    7.90 -})
    7.91 -
    7.92 -#define _hypercall4(type, name, a1, a2, a3, a4)			\
    7.93 -({								\
    7.94 -	long __res, __ign1, __ign2, __ign3, __ign4;		\
    7.95 -	asm volatile (						\
    7.96 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
    7.97 -		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),	\
    7.98 -		"=d" (__ign3), "=S" (__ign4)			\
    7.99 -		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   7.100 -		"3" ((long)(a3)), "4" ((long)(a4))		\
   7.101 -		: "memory" );					\
   7.102 -	(type)__res;						\
   7.103 -})
   7.104 -
   7.105 -#define _hypercall5(type, name, a1, a2, a3, a4, a5)		\
   7.106 -({								\
   7.107 -	long __res, __ign1, __ign2, __ign3, __ign4, __ign5;	\
   7.108 -	asm volatile (						\
   7.109 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   7.110 -		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),	\
   7.111 -		"=d" (__ign3), "=S" (__ign4), "=D" (__ign5)	\
   7.112 -		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   7.113 -		"3" ((long)(a3)), "4" ((long)(a4)),		\
   7.114 -		"5" ((long)(a5))				\
   7.115 -		: "memory" );					\
   7.116 -	(type)__res;						\
   7.117 -})
   7.118 -
   7.119 -static inline int
   7.120 -HYPERVISOR_set_trap_table(
   7.121 -	trap_info_t *table)
   7.122 -{
   7.123 -	return _hypercall1(int, set_trap_table, table);
   7.124 -}
   7.125 -
   7.126 -static inline int
   7.127 -HYPERVISOR_mmu_update(
   7.128 -	mmu_update_t *req, int count, int *success_count, domid_t domid)
   7.129 -{
   7.130 -	return _hypercall4(int, mmu_update, req, count, success_count, domid);
   7.131 -}
   7.132 -
   7.133 -static inline int
   7.134 -HYPERVISOR_mmuext_op(
   7.135 -	struct mmuext_op *op, int count, int *success_count, domid_t domid)
   7.136 -{
   7.137 -	return _hypercall4(int, mmuext_op, op, count, success_count, domid);
   7.138 -}
   7.139 -
   7.140 -static inline int
   7.141 -HYPERVISOR_set_gdt(
   7.142 -	unsigned long *frame_list, int entries)
   7.143 -{
   7.144 -	return _hypercall2(int, set_gdt, frame_list, entries);
   7.145 -}
   7.146 -
   7.147 -static inline int
   7.148 -HYPERVISOR_stack_switch(
   7.149 -	unsigned long ss, unsigned long esp)
   7.150 -{
   7.151 -	return _hypercall2(int, stack_switch, ss, esp);
   7.152 -}
   7.153 -
   7.154 -static inline int
   7.155 -HYPERVISOR_set_callbacks(
   7.156 -	unsigned long event_selector, unsigned long event_address,
   7.157 -	unsigned long failsafe_selector, unsigned long failsafe_address)
   7.158 -{
   7.159 -	return _hypercall4(int, set_callbacks,
   7.160 -			   event_selector, event_address,
   7.161 -			   failsafe_selector, failsafe_address);
   7.162 -}
   7.163 -
   7.164 -static inline int
   7.165 -HYPERVISOR_fpu_taskswitch(
   7.166 -	int set)
   7.167 -{
   7.168 -	return _hypercall1(int, fpu_taskswitch, set);
   7.169 -}
   7.170 -
   7.171 -static inline int
   7.172 -HYPERVISOR_sched_op(
   7.173 -	int cmd, unsigned long arg)
   7.174 -{
   7.175 -	return _hypercall2(int, sched_op, cmd, arg);
   7.176 -}
   7.177 -
   7.178 -static inline long
   7.179 -HYPERVISOR_set_timer_op(
   7.180 -	u64 timeout)
   7.181 -{
   7.182 -	unsigned long timeout_hi = (unsigned long)(timeout>>32);
   7.183 -	unsigned long timeout_lo = (unsigned long)timeout;
   7.184 -	return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
   7.185 -}
   7.186 -
   7.187 -static inline int
   7.188 -HYPERVISOR_dom0_op(
   7.189 -	dom0_op_t *dom0_op)
   7.190 -{
   7.191 -	dom0_op->interface_version = DOM0_INTERFACE_VERSION;
   7.192 -	return _hypercall1(int, dom0_op, dom0_op);
   7.193 -}
   7.194 -
   7.195 -static inline int
   7.196 -HYPERVISOR_set_debugreg(
   7.197 -	int reg, unsigned long value)
   7.198 -{
   7.199 -	return _hypercall2(int, set_debugreg, reg, value);
   7.200 -}
   7.201 -
   7.202 -static inline unsigned long
   7.203 -HYPERVISOR_get_debugreg(
   7.204 -	int reg)
   7.205 -{
   7.206 -	return _hypercall1(unsigned long, get_debugreg, reg);
   7.207 -}
   7.208 -
   7.209 -static inline int
   7.210 -HYPERVISOR_update_descriptor(
   7.211 -	u64 ma, u64 desc)
   7.212 -{
   7.213 -	return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
   7.214 -}
   7.215 -
   7.216 -static inline int
   7.217 -HYPERVISOR_memory_op(
   7.218 -	unsigned int cmd, void *arg)
   7.219 -{
   7.220 -	return _hypercall2(int, memory_op, cmd, arg);
   7.221 -}
   7.222 -
   7.223 -static inline int
   7.224 -HYPERVISOR_multicall(
   7.225 -	void *call_list, int nr_calls)
   7.226 -{
   7.227 -	return _hypercall2(int, multicall, call_list, nr_calls);
   7.228 -}
   7.229 -
   7.230 -static inline int
   7.231 -HYPERVISOR_update_va_mapping(
   7.232 -	unsigned long va, pte_t new_val, unsigned long flags)
   7.233 -{
   7.234 -	unsigned long pte_hi = 0;
   7.235 -#ifdef CONFIG_X86_PAE
   7.236 -	pte_hi = new_val.pte_high;
   7.237 -#endif
   7.238 -	return _hypercall4(int, update_va_mapping, va,
   7.239 -			   new_val.pte_low, pte_hi, flags);
   7.240 -}
   7.241 -
   7.242 -static inline int
   7.243 -HYPERVISOR_event_channel_op(
   7.244 -	void *op)
   7.245 -{
   7.246 -	return _hypercall1(int, event_channel_op, op);
   7.247 -}
   7.248 -
   7.249 -static inline int
   7.250 -HYPERVISOR_xen_version(
   7.251 -	int cmd, void *arg)
   7.252 -{
   7.253 -	return _hypercall2(int, xen_version, cmd, arg);
   7.254 -}
   7.255 -
   7.256 -static inline int
   7.257 -HYPERVISOR_console_io(
   7.258 -	int cmd, int count, char *str)
   7.259 -{
   7.260 -	return _hypercall3(int, console_io, cmd, count, str);
   7.261 -}
   7.262 -
   7.263 -static inline int
   7.264 -HYPERVISOR_physdev_op(
   7.265 -	void *physdev_op)
   7.266 -{
   7.267 -	return _hypercall1(int, physdev_op, physdev_op);
   7.268 -}
   7.269 -
   7.270 -static inline int
   7.271 -HYPERVISOR_grant_table_op(
   7.272 -	unsigned int cmd, void *uop, unsigned int count)
   7.273 -{
   7.274 -	return _hypercall3(int, grant_table_op, cmd, uop, count);
   7.275 -}
   7.276 -
   7.277 -static inline int
   7.278 -HYPERVISOR_update_va_mapping_otherdomain(
   7.279 -	unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
   7.280 -{
   7.281 -	unsigned long pte_hi = 0;
   7.282 -#ifdef CONFIG_X86_PAE
   7.283 -	pte_hi = new_val.pte_high;
   7.284 -#endif
   7.285 -	return _hypercall5(int, update_va_mapping_otherdomain, va,
   7.286 -			   new_val.pte_low, pte_hi, flags, domid);
   7.287 -}
   7.288 -
   7.289 -static inline int
   7.290 -HYPERVISOR_vm_assist(
   7.291 -	unsigned int cmd, unsigned int type)
   7.292 -{
   7.293 -	return _hypercall2(int, vm_assist, cmd, type);
   7.294 -}
   7.295 -
   7.296 -static inline int
   7.297 -HYPERVISOR_vcpu_op(
   7.298 -	int cmd, int vcpuid, void *extra_args)
   7.299 -{
   7.300 -	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
   7.301 -}
   7.302 -
   7.303 -static inline int
   7.304 -HYPERVISOR_suspend(
   7.305 -	unsigned long srec)
   7.306 -{
   7.307 -	return _hypercall3(int, sched_op, SCHEDOP_shutdown,
   7.308 -			   SHUTDOWN_suspend, srec);
   7.309 -}
   7.310 -
   7.311 -static inline int
   7.312 -HYPERVISOR_nmi_op(
   7.313 -	unsigned long op,
   7.314 -	unsigned long arg)
   7.315 -{
   7.316 -	return _hypercall2(int, nmi_op, op, arg);
   7.317 -}
   7.318 -
   7.319 -#endif /* __HYPERCALL_X86_32_H__ */
   7.320 -
   7.321 -/*
   7.322 - * Local variables:
   7.323 - *  c-file-style: "linux"
   7.324 - *  indent-tabs-mode: t
   7.325 - *  c-indent-level: 8
   7.326 - *  c-basic-offset: 8
   7.327 - *  tab-width: 8
   7.328 - * End:
   7.329 - */
     8.1 --- a/extras/mini-os/include/hypercall-x86_64.h	Tue Aug 22 14:45:49 2006 -0600
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,326 +0,0 @@
     8.4 -/******************************************************************************
     8.5 - * hypercall-x86_64.h
     8.6 - * 
     8.7 - * Copied from XenLinux.
     8.8 - * 
     8.9 - * Copyright (c) 2002-2004, K A Fraser
    8.10 - * 
    8.11 - * 64-bit updates:
    8.12 - *   Benjamin Liu <benjamin.liu@intel.com>
    8.13 - *   Jun Nakajima <jun.nakajima@intel.com>
    8.14 - * 
    8.15 - * This file may be distributed separately from the Linux kernel, or
    8.16 - * incorporated into other software packages, subject to the following license:
    8.17 - * 
    8.18 - * Permission is hereby granted, free of charge, to any person obtaining a copy
    8.19 - * of this source file (the "Software"), to deal in the Software without
    8.20 - * restriction, including without limitation the rights to use, copy, modify,
    8.21 - * merge, publish, distribute, sublicense, and/or sell copies of the Software,
    8.22 - * and to permit persons to whom the Software is furnished to do so, subject to
    8.23 - * the following conditions:
    8.24 - * 
    8.25 - * The above copyright notice and this permission notice shall be included in
    8.26 - * all copies or substantial portions of the Software.
    8.27 - * 
    8.28 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    8.29 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    8.30 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    8.31 - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    8.32 - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    8.33 - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
    8.34 - * IN THE SOFTWARE.
    8.35 - */
    8.36 -
    8.37 -#ifndef __HYPERCALL_X86_64_H__
    8.38 -#define __HYPERCALL_X86_64_H__
    8.39 -
    8.40 -#include <xen/xen.h>
    8.41 -#include <xen/sched.h>
    8.42 -#include <mm.h>
    8.43 -
    8.44 -#define __STR(x) #x
    8.45 -#define STR(x) __STR(x)
    8.46 -
    8.47 -extern char hypercall_page[PAGE_SIZE];
    8.48 -
    8.49 -#define _hypercall0(type, name)			\
    8.50 -({						\
    8.51 -	long __res;				\
    8.52 -	asm volatile (				\
    8.53 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
    8.54 -		: "=a" (__res)			\
    8.55 -		:				\
    8.56 -		: "memory" );			\
    8.57 -	(type)__res;				\
    8.58 -})
    8.59 -
    8.60 -#define _hypercall1(type, name, a1)				\
    8.61 -({								\
    8.62 -	long __res, __ign1;					\
    8.63 -	asm volatile (						\
    8.64 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
    8.65 -		: "=a" (__res), "=D" (__ign1)			\
    8.66 -		: "1" ((long)(a1))				\
    8.67 -		: "memory" );					\
    8.68 -	(type)__res;						\
    8.69 -})
    8.70 -
    8.71 -#define _hypercall2(type, name, a1, a2)				\
    8.72 -({								\
    8.73 -	long __res, __ign1, __ign2;				\
    8.74 -	asm volatile (						\
    8.75 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
    8.76 -		: "=a" (__res), "=D" (__ign1), "=S" (__ign2)	\
    8.77 -		: "1" ((long)(a1)), "2" ((long)(a2))		\
    8.78 -		: "memory" );					\
    8.79 -	(type)__res;						\
    8.80 -})
    8.81 -
    8.82 -#define _hypercall3(type, name, a1, a2, a3)			\
    8.83 -({								\
    8.84 -	long __res, __ign1, __ign2, __ign3;			\
    8.85 -	asm volatile (						\
    8.86 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
    8.87 -		: "=a" (__res), "=D" (__ign1), "=S" (__ign2), 	\
    8.88 -		"=d" (__ign3)					\
    8.89 -		: "1" ((long)(a1)), "2" ((long)(a2)),		\
    8.90 -		"3" ((long)(a3))				\
    8.91 -		: "memory" );					\
    8.92 -	(type)__res;						\
    8.93 -})
    8.94 -
    8.95 -#define _hypercall4(type, name, a1, a2, a3, a4)			\
    8.96 -({								\
    8.97 -	long __res, __ign1, __ign2, __ign3;			\
    8.98 -	asm volatile (						\
    8.99 -		"movq %7,%%r10; "				\
   8.100 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   8.101 -		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
   8.102 -		"=d" (__ign3)					\
   8.103 -		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   8.104 -		"3" ((long)(a3)), "g" ((long)(a4))		\
   8.105 -		: "memory", "r10" );				\
   8.106 -	(type)__res;						\
   8.107 -})
   8.108 -
   8.109 -#define _hypercall5(type, name, a1, a2, a3, a4, a5)		\
   8.110 -({								\
   8.111 -	long __res, __ign1, __ign2, __ign3;			\
   8.112 -	asm volatile (						\
   8.113 -		"movq %7,%%r10; movq %8,%%r8; "			\
   8.114 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   8.115 -		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
   8.116 -		"=d" (__ign3)					\
   8.117 -		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   8.118 -		"3" ((long)(a3)), "g" ((long)(a4)),		\
   8.119 -		"g" ((long)(a5))				\
   8.120 -		: "memory", "r10", "r8" );			\
   8.121 -	(type)__res;						\
   8.122 -})
   8.123 -
   8.124 -static inline int
   8.125 -HYPERVISOR_set_trap_table(
   8.126 -	trap_info_t *table)
   8.127 -{
   8.128 -	return _hypercall1(int, set_trap_table, table);
   8.129 -}
   8.130 -
   8.131 -static inline int
   8.132 -HYPERVISOR_mmu_update(
   8.133 -	mmu_update_t *req, int count, int *success_count, domid_t domid)
   8.134 -{
   8.135 -	return _hypercall4(int, mmu_update, req, count, success_count, domid);
   8.136 -}
   8.137 -
   8.138 -static inline int
   8.139 -HYPERVISOR_mmuext_op(
   8.140 -	struct mmuext_op *op, int count, int *success_count, domid_t domid)
   8.141 -{
   8.142 -	return _hypercall4(int, mmuext_op, op, count, success_count, domid);
   8.143 -}
   8.144 -
   8.145 -static inline int
   8.146 -HYPERVISOR_set_gdt(
   8.147 -	unsigned long *frame_list, int entries)
   8.148 -{
   8.149 -	return _hypercall2(int, set_gdt, frame_list, entries);
   8.150 -}
   8.151 -
   8.152 -static inline int
   8.153 -HYPERVISOR_stack_switch(
   8.154 -	unsigned long ss, unsigned long esp)
   8.155 -{
   8.156 -	return _hypercall2(int, stack_switch, ss, esp);
   8.157 -}
   8.158 -
   8.159 -static inline int
   8.160 -HYPERVISOR_set_callbacks(
   8.161 -	unsigned long event_address, unsigned long failsafe_address, 
   8.162 -	unsigned long syscall_address)
   8.163 -{
   8.164 -	return _hypercall3(int, set_callbacks,
   8.165 -			   event_address, failsafe_address, syscall_address);
   8.166 -}
   8.167 -
   8.168 -static inline int
   8.169 -HYPERVISOR_fpu_taskswitch(
   8.170 -	int set)
   8.171 -{
   8.172 -	return _hypercall1(int, fpu_taskswitch, set);
   8.173 -}
   8.174 -
   8.175 -static inline int
   8.176 -HYPERVISOR_sched_op(
   8.177 -	int cmd, unsigned long arg)
   8.178 -{
   8.179 -	return _hypercall2(int, sched_op, cmd, arg);
   8.180 -}
   8.181 -
   8.182 -static inline long
   8.183 -HYPERVISOR_set_timer_op(
   8.184 -	u64 timeout)
   8.185 -{
   8.186 -	return _hypercall1(long, set_timer_op, timeout);
   8.187 -}
   8.188 -
   8.189 -static inline int
   8.190 -HYPERVISOR_dom0_op(
   8.191 -	dom0_op_t *dom0_op)
   8.192 -{
   8.193 -	dom0_op->interface_version = DOM0_INTERFACE_VERSION;
   8.194 -	return _hypercall1(int, dom0_op, dom0_op);
   8.195 -}
   8.196 -
   8.197 -static inline int
   8.198 -HYPERVISOR_set_debugreg(
   8.199 -	int reg, unsigned long value)
   8.200 -{
   8.201 -	return _hypercall2(int, set_debugreg, reg, value);
   8.202 -}
   8.203 -
   8.204 -static inline unsigned long
   8.205 -HYPERVISOR_get_debugreg(
   8.206 -	int reg)
   8.207 -{
   8.208 -	return _hypercall1(unsigned long, get_debugreg, reg);
   8.209 -}
   8.210 -
   8.211 -static inline int
   8.212 -HYPERVISOR_update_descriptor(
   8.213 -	unsigned long ma, unsigned long word)
   8.214 -{
   8.215 -	return _hypercall2(int, update_descriptor, ma, word);
   8.216 -}
   8.217 -
   8.218 -static inline int
   8.219 -HYPERVISOR_memory_op(
   8.220 -	unsigned int cmd, void *arg)
   8.221 -{
   8.222 -	return _hypercall2(int, memory_op, cmd, arg);
   8.223 -}
   8.224 -
   8.225 -static inline int
   8.226 -HYPERVISOR_multicall(
   8.227 -	void *call_list, int nr_calls)
   8.228 -{
   8.229 -	return _hypercall2(int, multicall, call_list, nr_calls);
   8.230 -}
   8.231 -
   8.232 -static inline int
   8.233 -HYPERVISOR_update_va_mapping(
   8.234 -	unsigned long va, pte_t new_val, unsigned long flags)
   8.235 -{
   8.236 -	return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
   8.237 -}
   8.238 -
   8.239 -static inline int
   8.240 -HYPERVISOR_event_channel_op(
   8.241 -	void *op)
   8.242 -{
   8.243 -	return _hypercall1(int, event_channel_op, op);
   8.244 -}
   8.245 -
   8.246 -static inline int
   8.247 -HYPERVISOR_xen_version(
   8.248 -	int cmd, void *arg)
   8.249 -{
   8.250 -	return _hypercall2(int, xen_version, cmd, arg);
   8.251 -}
   8.252 -
   8.253 -static inline int
   8.254 -HYPERVISOR_console_io(
   8.255 -	int cmd, int count, char *str)
   8.256 -{
   8.257 -	return _hypercall3(int, console_io, cmd, count, str);
   8.258 -}
   8.259 -
   8.260 -static inline int
   8.261 -HYPERVISOR_physdev_op(
   8.262 -	void *physdev_op)
   8.263 -{
   8.264 -	return _hypercall1(int, physdev_op, physdev_op);
   8.265 -}
   8.266 -
   8.267 -static inline int
   8.268 -HYPERVISOR_grant_table_op(
   8.269 -	unsigned int cmd, void *uop, unsigned int count)
   8.270 -{
   8.271 -	return _hypercall3(int, grant_table_op, cmd, uop, count);
   8.272 -}
   8.273 -
   8.274 -static inline int
   8.275 -HYPERVISOR_update_va_mapping_otherdomain(
   8.276 -	unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
   8.277 -{
   8.278 -	return _hypercall4(int, update_va_mapping_otherdomain, va,
   8.279 -			   new_val.pte, flags, domid);
   8.280 -}
   8.281 -
   8.282 -static inline int
   8.283 -HYPERVISOR_vm_assist(
   8.284 -	unsigned int cmd, unsigned int type)
   8.285 -{
   8.286 -	return _hypercall2(int, vm_assist, cmd, type);
   8.287 -}
   8.288 -
   8.289 -static inline int
   8.290 -HYPERVISOR_vcpu_op(
   8.291 -	int cmd, int vcpuid, void *extra_args)
   8.292 -{
   8.293 -	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
   8.294 -}
   8.295 -
   8.296 -static inline int
   8.297 -HYPERVISOR_set_segment_base(
   8.298 -	int reg, unsigned long value)
   8.299 -{
   8.300 -	return _hypercall2(int, set_segment_base, reg, value);
   8.301 -}
   8.302 -
   8.303 -static inline int
   8.304 -HYPERVISOR_suspend(
   8.305 -	unsigned long srec)
   8.306 -{
   8.307 -	return _hypercall3(int, sched_op, SCHEDOP_shutdown,
   8.308 -			   SHUTDOWN_suspend, srec);
   8.309 -}
   8.310 -
   8.311 -static inline int
   8.312 -HYPERVISOR_nmi_op(
   8.313 -	unsigned long op,
   8.314 -	unsigned long arg)
   8.315 -{
   8.316 -	return _hypercall2(int, nmi_op, op, arg);
   8.317 -}
   8.318 -
   8.319 -#endif /* __HYPERCALL_X86_64_H__ */
   8.320 -
   8.321 -/*
   8.322 - * Local variables:
   8.323 - *  c-file-style: "linux"
   8.324 - *  indent-tabs-mode: t
   8.325 - *  c-indent-level: 8
   8.326 - *  c-basic-offset: 8
   8.327 - *  tab-width: 8
   8.328 - * End:
   8.329 - */
     9.1 --- a/extras/mini-os/include/hypervisor.h	Tue Aug 22 14:45:49 2006 -0600
     9.2 +++ b/extras/mini-os/include/hypervisor.h	Wed Aug 23 11:11:27 2006 -0600
     9.3 @@ -7,6 +7,7 @@
     9.4   * Copyright (c) 2002, K A Fraser
     9.5   * Copyright (c) 2005, Grzegorz Milos
     9.6   * Updates: Aravindh Puthiyaparambil <aravindh.puthiyaparambil@unisys.com>
     9.7 + * Updates: Dietmar Hahn <dietmar.hahn@fujitsu-siemens.com> for ia64
     9.8   */
     9.9  
    9.10  #ifndef _HYPERVISOR_H_
    9.11 @@ -19,6 +20,8 @@
    9.12  #include <hypercall-x86_32.h>
    9.13  #elif defined(__x86_64__)
    9.14  #include <hypercall-x86_64.h>
    9.15 +#elif defined(__ia64__)
    9.16 +#include <hypercall-ia64.h>
    9.17  #else
    9.18  #error "Unsupported architecture"
    9.19  #endif
    10.1 --- a/extras/mini-os/include/os.h	Tue Aug 22 14:45:49 2006 -0600
    10.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.3 @@ -1,561 +0,0 @@
    10.4 -/******************************************************************************
    10.5 - * os.h
    10.6 - * 
    10.7 - * random collection of macros and definition
    10.8 - */
    10.9 -
   10.10 -#ifndef _OS_H_
   10.11 -#define _OS_H_
   10.12 -
   10.13 -#if __GNUC__ == 2 && __GNUC_MINOR__ < 96
   10.14 -#define __builtin_expect(x, expected_value) (x)
   10.15 -#endif
   10.16 -#define unlikely(x)  __builtin_expect((x),0)
   10.17 -
   10.18 -#define smp_processor_id() 0
   10.19 -
   10.20 -
   10.21 -#ifndef __ASSEMBLY__
   10.22 -#include <types.h>
   10.23 -#include <hypervisor.h>
   10.24 -
   10.25 -extern void do_exit(void);
   10.26 -#define BUG do_exit
   10.27 -
   10.28 -#endif
   10.29 -#include <xen/xen.h>
   10.30 -
   10.31 -
   10.32 -#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0, 0))
   10.33 -
   10.34 -#define __KERNEL_CS  FLAT_KERNEL_CS
   10.35 -#define __KERNEL_DS  FLAT_KERNEL_DS
   10.36 -#define __KERNEL_SS  FLAT_KERNEL_SS
   10.37 -
   10.38 -#define TRAP_divide_error      0
   10.39 -#define TRAP_debug             1
   10.40 -#define TRAP_nmi               2
   10.41 -#define TRAP_int3              3
   10.42 -#define TRAP_overflow          4
   10.43 -#define TRAP_bounds            5
   10.44 -#define TRAP_invalid_op        6
   10.45 -#define TRAP_no_device         7
   10.46 -#define TRAP_double_fault      8
   10.47 -#define TRAP_copro_seg         9
   10.48 -#define TRAP_invalid_tss      10
   10.49 -#define TRAP_no_segment       11
   10.50 -#define TRAP_stack_error      12
   10.51 -#define TRAP_gp_fault         13
   10.52 -#define TRAP_page_fault       14
   10.53 -#define TRAP_spurious_int     15
   10.54 -#define TRAP_copro_error      16
   10.55 -#define TRAP_alignment_check  17
   10.56 -#define TRAP_machine_check    18
   10.57 -#define TRAP_simd_error       19
   10.58 -#define TRAP_deferred_nmi     31
   10.59 -
   10.60 -/* Everything below this point is not included by assembler (.S) files. */
   10.61 -#ifndef __ASSEMBLY__
   10.62 -
   10.63 -extern shared_info_t *HYPERVISOR_shared_info;
   10.64 -
   10.65 -void trap_init(void);
   10.66 -
   10.67 -
   10.68 -
   10.69 -/* 
   10.70 - * The use of 'barrier' in the following reflects their use as local-lock
   10.71 - * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following
   10.72 - * critical operations are executed. All critical operations must complete
   10.73 - * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also
   10.74 - * includes these barriers, for example.
   10.75 - */
   10.76 -
   10.77 -#define __cli()								\
   10.78 -do {									\
   10.79 -	vcpu_info_t *_vcpu;						\
   10.80 -	_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];	\
   10.81 -	_vcpu->evtchn_upcall_mask = 1;					\
   10.82 -	barrier();							\
   10.83 -} while (0)
   10.84 -
   10.85 -#define __sti()								\
   10.86 -do {									\
   10.87 -	vcpu_info_t *_vcpu;						\
   10.88 -	barrier();							\
   10.89 -	_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];	\
   10.90 -	_vcpu->evtchn_upcall_mask = 0;					\
   10.91 -	barrier(); /* unmask then check (avoid races) */		\
   10.92 -	if ( unlikely(_vcpu->evtchn_upcall_pending) )			\
   10.93 -		force_evtchn_callback();				\
   10.94 -} while (0)
   10.95 -
   10.96 -#define __save_flags(x)							\
   10.97 -do {									\
   10.98 -	vcpu_info_t *_vcpu;						\
   10.99 -	_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];	\
  10.100 -	(x) = _vcpu->evtchn_upcall_mask;				\
  10.101 -} while (0)
  10.102 -
  10.103 -#define __restore_flags(x)						\
  10.104 -do {									\
  10.105 -	vcpu_info_t *_vcpu;						\
  10.106 -	barrier();							\
  10.107 -	_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];	\
  10.108 -	if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {			\
  10.109 -		barrier(); /* unmask then check (avoid races) */	\
  10.110 -		if ( unlikely(_vcpu->evtchn_upcall_pending) )		\
  10.111 -			force_evtchn_callback();			\
  10.112 -	}\
  10.113 -} while (0)
  10.114 -
  10.115 -#define safe_halt()		((void)0)
  10.116 -
  10.117 -#define __save_and_cli(x)						\
  10.118 -do {									\
  10.119 -	vcpu_info_t *_vcpu;						\
  10.120 -	_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];	\
  10.121 -	(x) = _vcpu->evtchn_upcall_mask;				\
  10.122 -	_vcpu->evtchn_upcall_mask = 1;					\
  10.123 -	barrier();							\
  10.124 -} while (0)
  10.125 -
  10.126 -#define local_irq_save(x)	__save_and_cli(x)
  10.127 -#define local_irq_restore(x)	__restore_flags(x)
  10.128 -#define local_save_flags(x)	__save_flags(x)
  10.129 -#define local_irq_disable()	__cli()
  10.130 -#define local_irq_enable()	__sti()
  10.131 -
  10.132 -#define irqs_disabled()			\
  10.133 -    HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].evtchn_upcall_mask
  10.134 -
  10.135 -/* This is a barrier for the compiler only, NOT the processor! */
  10.136 -#define barrier() __asm__ __volatile__("": : :"memory")
  10.137 -
  10.138 -#if defined(__i386__)
  10.139 -#define mb()    __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
  10.140 -#define rmb()   __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
  10.141 -#define wmb()	__asm__ __volatile__ ("": : :"memory")
  10.142 -#elif defined(__x86_64__)
  10.143 -#define mb()    __asm__ __volatile__ ("mfence":::"memory")
  10.144 -#define rmb()   __asm__ __volatile__ ("lfence":::"memory")
  10.145 -#define wmb()	__asm__ __volatile__ ("sfence" ::: "memory") /* From CONFIG_UNORDERED_IO (linux) */
  10.146 -#endif
  10.147 -
  10.148 -
  10.149 -#define LOCK_PREFIX ""
  10.150 -#define LOCK ""
  10.151 -#define ADDR (*(volatile long *) addr)
  10.152 -/*
  10.153 - * Make sure gcc doesn't try to be clever and move things around
  10.154 - * on us. We need to use _exactly_ the address the user gave us,
  10.155 - * not some alias that contains the same information.
  10.156 - */
  10.157 -typedef struct { volatile int counter; } atomic_t;
  10.158 -
  10.159 -
  10.160 -/************************** i386 *******************************/
  10.161 -#if defined (__i386__)
  10.162 -
  10.163 -#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
  10.164 -struct __xchg_dummy { unsigned long a[100]; };
  10.165 -#define __xg(x) ((struct __xchg_dummy *)(x))
  10.166 -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
  10.167 -{
  10.168 -	switch (size) {
  10.169 -		case 1:
  10.170 -			__asm__ __volatile__("xchgb %b0,%1"
  10.171 -				:"=q" (x)
  10.172 -				:"m" (*__xg(ptr)), "0" (x)
  10.173 -				:"memory");
  10.174 -			break;
  10.175 -		case 2:
  10.176 -			__asm__ __volatile__("xchgw %w0,%1"
  10.177 -				:"=r" (x)
  10.178 -				:"m" (*__xg(ptr)), "0" (x)
  10.179 -				:"memory");
  10.180 -			break;
  10.181 -		case 4:
  10.182 -			__asm__ __volatile__("xchgl %0,%1"
  10.183 -				:"=r" (x)
  10.184 -				:"m" (*__xg(ptr)), "0" (x)
  10.185 -				:"memory");
  10.186 -			break;
  10.187 -	}
  10.188 -	return x;
  10.189 -}
  10.190 -
  10.191 -/**
  10.192 - * test_and_clear_bit - Clear a bit and return its old value
  10.193 - * @nr: Bit to clear
  10.194 - * @addr: Address to count from
  10.195 - *
  10.196 - * This operation is atomic and cannot be reordered.
  10.197 - * It can be reorderdered on other architectures other than x86.
  10.198 - * It also implies a memory barrier.
  10.199 - */
  10.200 -static inline int test_and_clear_bit(int nr, volatile unsigned long * addr)
  10.201 -{
  10.202 -	int oldbit;
  10.203 -
  10.204 -	__asm__ __volatile__( LOCK
  10.205 -		"btrl %2,%1\n\tsbbl %0,%0"
  10.206 -		:"=r" (oldbit),"=m" (ADDR)
  10.207 -		:"Ir" (nr) : "memory");
  10.208 -	return oldbit;
  10.209 -}
  10.210 -
  10.211 -static inline int constant_test_bit(int nr, const volatile unsigned long *addr)
  10.212 -{
  10.213 -	return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0;
  10.214 -}
  10.215 -
  10.216 -static inline int variable_test_bit(int nr, const volatile unsigned long * addr)
  10.217 -{
  10.218 -	int oldbit;
  10.219 -
  10.220 -	__asm__ __volatile__(
  10.221 -		"btl %2,%1\n\tsbbl %0,%0"
  10.222 -		:"=r" (oldbit)
  10.223 -		:"m" (ADDR),"Ir" (nr));
  10.224 -	return oldbit;
  10.225 -}
  10.226 -
  10.227 -#define test_bit(nr,addr) \
  10.228 -(__builtin_constant_p(nr) ? \
  10.229 - constant_test_bit((nr),(addr)) : \
  10.230 - variable_test_bit((nr),(addr)))
  10.231 -
  10.232 -/**
  10.233 - * set_bit - Atomically set a bit in memory
  10.234 - * @nr: the bit to set
  10.235 - * @addr: the address to start counting from
  10.236 - *
  10.237 - * This function is atomic and may not be reordered.  See __set_bit()
  10.238 - * if you do not require the atomic guarantees.
  10.239 - *
  10.240 - * Note: there are no guarantees that this function will not be reordered
  10.241 - * on non x86 architectures, so if you are writting portable code,
  10.242 - * make sure not to rely on its reordering guarantees.
  10.243 - *
  10.244 - * Note that @nr may be almost arbitrarily large; this function is not
  10.245 - * restricted to acting on a single-word quantity.
  10.246 - */
  10.247 -static inline void set_bit(int nr, volatile unsigned long * addr)
  10.248 -{
  10.249 -	__asm__ __volatile__( LOCK
  10.250 -		"btsl %1,%0"
  10.251 -		:"=m" (ADDR)
  10.252 -		:"Ir" (nr));
  10.253 -}
  10.254 -
  10.255 -/**
  10.256 - * clear_bit - Clears a bit in memory
  10.257 - * @nr: Bit to clear
  10.258 - * @addr: Address to start counting from
  10.259 - *
  10.260 - * clear_bit() is atomic and may not be reordered.  However, it does
  10.261 - * not contain a memory barrier, so if it is used for locking purposes,
  10.262 - * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  10.263 - * in order to ensure changes are visible on other processors.
  10.264 - */
  10.265 -static inline void clear_bit(int nr, volatile unsigned long * addr)
  10.266 -{
  10.267 -	__asm__ __volatile__( LOCK
  10.268 -		"btrl %1,%0"
  10.269 -		:"=m" (ADDR)
  10.270 -		:"Ir" (nr));
  10.271 -}
  10.272 -
  10.273 -/**
  10.274 - * __ffs - find first bit in word.
  10.275 - * @word: The word to search
  10.276 - *
  10.277 - * Undefined if no bit exists, so code should check against 0 first.
  10.278 - */
  10.279 -static inline unsigned long __ffs(unsigned long word)
  10.280 -{
  10.281 -	__asm__("bsfl %1,%0"
  10.282 -		:"=r" (word)
  10.283 -		:"rm" (word));
  10.284 -	return word;
  10.285 -}
  10.286 -
  10.287 -
  10.288 -/*
  10.289 - * These have to be done with inline assembly: that way the bit-setting
  10.290 - * is guaranteed to be atomic. All bit operations return 0 if the bit
  10.291 - * was cleared before the operation and != 0 if it was not.
  10.292 - *
  10.293 - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  10.294 - */
  10.295 -#define ADDR (*(volatile long *) addr)
  10.296 -
  10.297 -#define rdtscll(val) \
  10.298 -     __asm__ __volatile__("rdtsc" : "=A" (val))
  10.299 -
  10.300 -
  10.301 -
  10.302 -#elif defined(__x86_64__)/* ifdef __i386__ */
  10.303 -/************************** x86_84 *******************************/
  10.304 -
  10.305 -#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
  10.306 -#define __xg(x) ((volatile long *)(x))
  10.307 -static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
  10.308 -{
  10.309 -	switch (size) {
  10.310 -		case 1:
  10.311 -			__asm__ __volatile__("xchgb %b0,%1"
  10.312 -				:"=q" (x)
  10.313 -				:"m" (*__xg(ptr)), "0" (x)
  10.314 -				:"memory");
  10.315 -			break;
  10.316 -		case 2:
  10.317 -			__asm__ __volatile__("xchgw %w0,%1"
  10.318 -				:"=r" (x)
  10.319 -				:"m" (*__xg(ptr)), "0" (x)
  10.320 -				:"memory");
  10.321 -			break;
  10.322 -		case 4:
  10.323 -			__asm__ __volatile__("xchgl %k0,%1"
  10.324 -				:"=r" (x)
  10.325 -				:"m" (*__xg(ptr)), "0" (x)
  10.326 -				:"memory");
  10.327 -			break;
  10.328 -		case 8:
  10.329 -			__asm__ __volatile__("xchgq %0,%1"
  10.330 -				:"=r" (x)
  10.331 -				:"m" (*__xg(ptr)), "0" (x)
  10.332 -				:"memory");
  10.333 -			break;
  10.334 -	}
  10.335 -	return x;
  10.336 -}
  10.337 -
  10.338 -/**
  10.339 - * test_and_clear_bit - Clear a bit and return its old value
  10.340 - * @nr: Bit to clear
  10.341 - * @addr: Address to count from
  10.342 - *
  10.343 - * This operation is atomic and cannot be reordered.  
  10.344 - * It also implies a memory barrier.
  10.345 - */
  10.346 -static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
  10.347 -{
  10.348 -	int oldbit;
  10.349 -
  10.350 -	__asm__ __volatile__( LOCK_PREFIX
  10.351 -		"btrl %2,%1\n\tsbbl %0,%0"
  10.352 -		:"=r" (oldbit),"=m" (ADDR)
  10.353 -		:"dIr" (nr) : "memory");
  10.354 -	return oldbit;
  10.355 -}
  10.356 -
  10.357 -static __inline__ int constant_test_bit(int nr, const volatile void * addr)
  10.358 -{
  10.359 -	return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
  10.360 -}
  10.361 -
  10.362 -static __inline__ int variable_test_bit(int nr, volatile const void * addr)
  10.363 -{
  10.364 -	int oldbit;
  10.365 -
  10.366 -	__asm__ __volatile__(
  10.367 -		"btl %2,%1\n\tsbbl %0,%0"
  10.368 -		:"=r" (oldbit)
  10.369 -		:"m" (ADDR),"dIr" (nr));
  10.370 -	return oldbit;
  10.371 -}
  10.372 -
  10.373 -#define test_bit(nr,addr) \
  10.374 -(__builtin_constant_p(nr) ? \
  10.375 - constant_test_bit((nr),(addr)) : \
  10.376 - variable_test_bit((nr),(addr)))
  10.377 -
  10.378 -
  10.379 -/**
  10.380 - * set_bit - Atomically set a bit in memory
  10.381 - * @nr: the bit to set
  10.382 - * @addr: the address to start counting from
  10.383 - *
  10.384 - * This function is atomic and may not be reordered.  See __set_bit()
  10.385 - * if you do not require the atomic guarantees.
  10.386 - * Note that @nr may be almost arbitrarily large; this function is not
  10.387 - * restricted to acting on a single-word quantity.
  10.388 - */
  10.389 -static __inline__ void set_bit(int nr, volatile void * addr)
  10.390 -{
  10.391 -	__asm__ __volatile__( LOCK_PREFIX
  10.392 -		"btsl %1,%0"
  10.393 -		:"=m" (ADDR)
  10.394 -		:"dIr" (nr) : "memory");
  10.395 -}
  10.396 -
  10.397 -/**
  10.398 - * clear_bit - Clears a bit in memory
  10.399 - * @nr: Bit to clear
  10.400 - * @addr: Address to start counting from
  10.401 - *
  10.402 - * clear_bit() is atomic and may not be reordered.  However, it does
  10.403 - * not contain a memory barrier, so if it is used for locking purposes,
  10.404 - * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  10.405 - * in order to ensure changes are visible on other processors.
  10.406 - */
  10.407 -static __inline__ void clear_bit(int nr, volatile void * addr)
  10.408 -{
  10.409 -	__asm__ __volatile__( LOCK_PREFIX
  10.410 -		"btrl %1,%0"
  10.411 -		:"=m" (ADDR)
  10.412 -		:"dIr" (nr));
  10.413 -}
  10.414 -
  10.415 -/**
  10.416 - * __ffs - find first bit in word.
  10.417 - * @word: The word to search
  10.418 - *
  10.419 - * Undefined if no bit exists, so code should check against 0 first.
  10.420 - */
  10.421 -static __inline__ unsigned long __ffs(unsigned long word)
  10.422 -{
  10.423 -	__asm__("bsfq %1,%0"
  10.424 -		:"=r" (word)
  10.425 -		:"rm" (word));
  10.426 -	return word;
  10.427 -}
  10.428 -
  10.429 -#define ADDR (*(volatile long *) addr)
  10.430 -
  10.431 -#define rdtscll(val) do { \
  10.432 -     unsigned int __a,__d; \
  10.433 -     asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
  10.434 -     (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
  10.435 -} while(0)
  10.436 -
  10.437 -#define wrmsr(msr,val1,val2) \
  10.438 -      __asm__ __volatile__("wrmsr" \
  10.439 -                           : /* no outputs */ \
  10.440 -                           : "c" (msr), "a" (val1), "d" (val2))
  10.441 -
  10.442 -#define wrmsrl(msr,val) wrmsr(msr,(u32)((u64)(val)),((u64)(val))>>32)
  10.443 -
  10.444 -
  10.445 -#else /* ifdef __x86_64__ */
  10.446 -#error "Unsupported architecture"
  10.447 -#endif
  10.448 -
  10.449 -
  10.450 -/********************* common i386 and x86_64  ****************************/
  10.451 -struct __synch_xchg_dummy { unsigned long a[100]; };
  10.452 -#define __synch_xg(x) ((struct __synch_xchg_dummy *)(x))
  10.453 -
  10.454 -#define synch_cmpxchg(ptr, old, new) \
  10.455 -((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\
  10.456 -                                     (unsigned long)(old), \
  10.457 -                                     (unsigned long)(new), \
  10.458 -                                     sizeof(*(ptr))))
  10.459 -
  10.460 -static inline unsigned long __synch_cmpxchg(volatile void *ptr,
  10.461 -        unsigned long old,
  10.462 -        unsigned long new, int size)
  10.463 -{
  10.464 -    unsigned long prev;
  10.465 -    switch (size) {
  10.466 -        case 1:
  10.467 -            __asm__ __volatile__("lock; cmpxchgb %b1,%2"
  10.468 -                    : "=a"(prev)
  10.469 -                    : "q"(new), "m"(*__synch_xg(ptr)),
  10.470 -                    "0"(old)
  10.471 -                    : "memory");
  10.472 -            return prev;
  10.473 -        case 2:
  10.474 -            __asm__ __volatile__("lock; cmpxchgw %w1,%2"
  10.475 -                    : "=a"(prev)
  10.476 -                    : "r"(new), "m"(*__synch_xg(ptr)),
  10.477 -                    "0"(old)
  10.478 -                    : "memory");
  10.479 -            return prev;
  10.480 -#ifdef __x86_64__
  10.481 -        case 4:
  10.482 -            __asm__ __volatile__("lock; cmpxchgl %k1,%2"
  10.483 -                    : "=a"(prev)
  10.484 -                    : "r"(new), "m"(*__synch_xg(ptr)),
  10.485 -                    "0"(old)
  10.486 -                    : "memory");
  10.487 -            return prev;
  10.488 -        case 8:
  10.489 -            __asm__ __volatile__("lock; cmpxchgq %1,%2"
  10.490 -                    : "=a"(prev)
  10.491 -                    : "r"(new), "m"(*__synch_xg(ptr)),
  10.492 -                    "0"(old)
  10.493 -                    : "memory");
  10.494 -            return prev;
  10.495 -#else
  10.496 -        case 4:
  10.497 -            __asm__ __volatile__("lock; cmpxchgl %1,%2"
  10.498 -                    : "=a"(prev)
  10.499 -                    : "r"(new), "m"(*__synch_xg(ptr)),
  10.500 -                    "0"(old)
  10.501 -                    : "memory");
  10.502 -            return prev;
  10.503 -#endif
  10.504 -    }
  10.505 -    return old;
  10.506 -}
  10.507 -
  10.508 -
  10.509 -static __inline__ void synch_set_bit(int nr, volatile void * addr)
  10.510 -{
  10.511 -    __asm__ __volatile__ ( 
  10.512 -        "lock btsl %1,%0"
  10.513 -        : "=m" (ADDR) : "Ir" (nr) : "memory" );
  10.514 -}
  10.515 -
  10.516 -static __inline__ void synch_clear_bit(int nr, volatile void * addr)
  10.517 -{
  10.518 -    __asm__ __volatile__ (
  10.519 -        "lock btrl %1,%0"
  10.520 -        : "=m" (ADDR) : "Ir" (nr) : "memory" );
  10.521 -}
  10.522 -
  10.523 -static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr)
  10.524 -{
  10.525 -    int oldbit;
  10.526 -    __asm__ __volatile__ (
  10.527 -        "lock btsl %2,%1\n\tsbbl %0,%0"
  10.528 -        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
  10.529 -    return oldbit;
  10.530 -}
  10.531 -
  10.532 -static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
  10.533 -{
  10.534 -    int oldbit;
  10.535 -    __asm__ __volatile__ (
  10.536 -        "lock btrl %2,%1\n\tsbbl %0,%0"
  10.537 -        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
  10.538 -    return oldbit;
  10.539 -}
  10.540 -
  10.541 -static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
  10.542 -{
  10.543 -    return ((1UL << (nr & 31)) & 
  10.544 -            (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
  10.545 -}
  10.546 -
  10.547 -static __inline__ int synch_var_test_bit(int nr, volatile void * addr)
  10.548 -{
  10.549 -    int oldbit;
  10.550 -    __asm__ __volatile__ (
  10.551 -        "btl %2,%1\n\tsbbl %0,%0"
  10.552 -        : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) );
  10.553 -    return oldbit;
  10.554 -}
  10.555 -
  10.556 -#define synch_test_bit(nr,addr) \
  10.557 -(__builtin_constant_p(nr) ? \
  10.558 - synch_const_test_bit((nr),(addr)) : \
  10.559 - synch_var_test_bit((nr),(addr)))
  10.560 -
  10.561 -
  10.562 -
  10.563 -#endif /* not assembly */
  10.564 -#endif /* _OS_H_ */
    11.1 --- a/extras/mini-os/include/spinlock.h	Tue Aug 22 14:45:49 2006 -0600
    11.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.3 @@ -1,121 +0,0 @@
    11.4 -#ifndef __ASM_SPINLOCK_H
    11.5 -#define __ASM_SPINLOCK_H
    11.6 -
    11.7 -#include <lib.h>
    11.8 -
    11.9 -/*
   11.10 - * Your basic SMP spinlocks, allowing only a single CPU anywhere
   11.11 - */
   11.12 -
   11.13 -typedef struct {
   11.14 -	volatile unsigned int slock;
   11.15 -} spinlock_t;
   11.16 -
   11.17 -#define SPINLOCK_MAGIC	0xdead4ead
   11.18 -
   11.19 -#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 }
   11.20 -
   11.21 -#define spin_lock_init(x)	do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
   11.22 -
   11.23 -/*
   11.24 - * Simple spin lock operations.  There are two variants, one clears IRQ's
   11.25 - * on the local processor, one does not.
   11.26 - *
   11.27 - * We make no fairness assumptions. They have a cost.
   11.28 - */
   11.29 -
   11.30 -#define spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) <= 0)
   11.31 -#define spin_unlock_wait(x)	do { barrier(); } while(spin_is_locked(x))
   11.32 -
   11.33 -#define spin_lock_string \
   11.34 -        "1:\n" \
   11.35 -	LOCK \
   11.36 -	"decb %0\n\t" \
   11.37 -	"jns 3f\n" \
   11.38 -	"2:\t" \
   11.39 -	"rep;nop\n\t" \
   11.40 -	"cmpb $0,%0\n\t" \
   11.41 -	"jle 2b\n\t" \
   11.42 -	"jmp 1b\n" \
   11.43 -	"3:\n\t"
   11.44 -
   11.45 -#define spin_lock_string_flags \
   11.46 -        "1:\n" \
   11.47 -	LOCK \
   11.48 -	"decb %0\n\t" \
   11.49 -	"jns 4f\n\t" \
   11.50 -	"2:\t" \
   11.51 -	"testl $0x200, %1\n\t" \
   11.52 -	"jz 3f\n\t" \
   11.53 -	"#sti\n\t" \
   11.54 -	"3:\t" \
   11.55 -	"rep;nop\n\t" \
   11.56 -	"cmpb $0, %0\n\t" \
   11.57 -	"jle 3b\n\t" \
   11.58 -	"#cli\n\t" \
   11.59 -	"jmp 1b\n" \
   11.60 -	"4:\n\t"
   11.61 -
   11.62 -/*
   11.63 - * This works. Despite all the confusion.
   11.64 - * (except on PPro SMP or if we are using OOSTORE)
   11.65 - * (PPro errata 66, 92)
   11.66 - */
   11.67 -
   11.68 -#define spin_unlock_string \
   11.69 -	"xchgb %b0, %1" \
   11.70 -		:"=q" (oldval), "=m" (lock->slock) \
   11.71 -		:"0" (oldval) : "memory"
   11.72 -
   11.73 -static inline void _raw_spin_unlock(spinlock_t *lock)
   11.74 -{
   11.75 -	char oldval = 1;
   11.76 -	__asm__ __volatile__(
   11.77 -		spin_unlock_string
   11.78 -	);
   11.79 -}
   11.80 -
   11.81 -static inline int _raw_spin_trylock(spinlock_t *lock)
   11.82 -{
   11.83 -	char oldval;
   11.84 -	__asm__ __volatile__(
   11.85 -		"xchgb %b0,%1\n"
   11.86 -		:"=q" (oldval), "=m" (lock->slock)
   11.87 -		:"0" (0) : "memory");
   11.88 -	return oldval > 0;
   11.89 -}
   11.90 -
   11.91 -static inline void _raw_spin_lock(spinlock_t *lock)
   11.92 -{
   11.93 -	__asm__ __volatile__(
   11.94 -		spin_lock_string
   11.95 -		:"=m" (lock->slock) : : "memory");
   11.96 -}
   11.97 -
   11.98 -static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
   11.99 -{
  11.100 -	__asm__ __volatile__(
  11.101 -		spin_lock_string_flags
  11.102 -		:"=m" (lock->slock) : "r" (flags) : "memory");
  11.103 -}
  11.104 -
  11.105 -#define _spin_trylock(lock)     ({_raw_spin_trylock(lock) ? \
  11.106 -                                1 : ({ 0;});})
  11.107 -
  11.108 -#define _spin_lock(lock)        \
  11.109 -do {                            \
  11.110 -        _raw_spin_lock(lock);   \
  11.111 -} while(0)
  11.112 -
  11.113 -#define _spin_unlock(lock)      \
  11.114 -do {                            \
  11.115 -        _raw_spin_unlock(lock); \
  11.116 -} while (0)
  11.117 -
  11.118 -
  11.119 -#define spin_lock(lock)       _spin_lock(lock)
  11.120 -#define spin_unlock(lock)       _spin_unlock(lock)
  11.121 -
  11.122 -#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
  11.123 -
  11.124 -#endif
    12.1 --- a/extras/mini-os/include/traps.h	Tue Aug 22 14:45:49 2006 -0600
    12.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.3 @@ -1,73 +0,0 @@
    12.4 -/* 
    12.5 - ****************************************************************************
    12.6 - * (C) 2005 - Grzegorz Milos - Intel Reseach Cambridge
    12.7 - ****************************************************************************
    12.8 - *
    12.9 - *        File: traps.h
   12.10 - *      Author: Grzegorz Milos (gm281@cam.ac.uk)
   12.11 - *              
   12.12 - *        Date: Jun 2005
   12.13 - * 
   12.14 - * Environment: Xen Minimal OS
   12.15 - * Description: Deals with traps
   12.16 - *
   12.17 - ****************************************************************************
   12.18 - */
   12.19 -
   12.20 -#ifndef _TRAPS_H_
   12.21 -#define _TRAPS_H_
   12.22 -
   12.23 -#ifdef __i386__
   12.24 -struct pt_regs {
   12.25 -	long ebx;
   12.26 -	long ecx;
   12.27 -	long edx;
   12.28 -	long esi;
   12.29 -	long edi;
   12.30 -	long ebp;
   12.31 -	long eax;
   12.32 -	int  xds;
   12.33 -	int  xes;
   12.34 -	long orig_eax;
   12.35 -	long eip;
   12.36 -	int  xcs;
   12.37 -	long eflags;
   12.38 -	long esp;
   12.39 -	int  xss;
   12.40 -};
   12.41 -#elif __x86_64__
   12.42 -
   12.43 -struct pt_regs {
   12.44 -	unsigned long r15;
   12.45 -	unsigned long r14;
   12.46 -	unsigned long r13;
   12.47 -	unsigned long r12;
   12.48 -	unsigned long rbp;
   12.49 -	unsigned long rbx;
   12.50 -/* arguments: non interrupts/non tracing syscalls only save upto here*/
   12.51 - 	unsigned long r11;
   12.52 -	unsigned long r10;	
   12.53 -	unsigned long r9;
   12.54 -	unsigned long r8;
   12.55 -	unsigned long rax;
   12.56 -	unsigned long rcx;
   12.57 -	unsigned long rdx;
   12.58 -	unsigned long rsi;
   12.59 -	unsigned long rdi;
   12.60 -	unsigned long orig_rax;
   12.61 -/* end of arguments */ 	
   12.62 -/* cpu exception frame or undefined */
   12.63 -	unsigned long rip;
   12.64 -	unsigned long cs;
   12.65 -	unsigned long eflags; 
   12.66 -	unsigned long rsp; 
   12.67 -	unsigned long ss;
   12.68 -/* top of stack page */ 
   12.69 -};
   12.70 -
   12.71 -
   12.72 -#endif
   12.73 -
   12.74 -void dump_regs(struct pt_regs *regs);
   12.75 -
   12.76 -#endif /* _TRAPS_H_ */
    13.1 --- a/extras/mini-os/include/types.h	Tue Aug 22 14:45:49 2006 -0600
    13.2 +++ b/extras/mini-os/include/types.h	Wed Aug 23 11:11:27 2006 -0600
    13.3 @@ -29,7 +29,7 @@ typedef unsigned int        u32;
    13.4  #ifdef __i386__
    13.5  typedef signed long long    s64;
    13.6  typedef unsigned long long  u64;
    13.7 -#elif defined(__x86_64__)
    13.8 +#elif defined(__x86_64__) || defined(__ia64__)
    13.9  typedef signed long         s64;
   13.10  typedef unsigned long       u64;
   13.11  #endif
   13.12 @@ -49,7 +49,7 @@ typedef struct { unsigned long pte_low; 
   13.13  typedef struct { unsigned long pte_low, pte_high; } pte_t;
   13.14  #endif /* CONFIG_X86_PAE */
   13.15  
   13.16 -#elif defined(__x86_64__)
   13.17 +#elif defined(__x86_64__) || defined(__ia64__)
   13.18  typedef long                quad_t;
   13.19  typedef unsigned long       u_quad_t;
   13.20  typedef unsigned long       uintptr_t;
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/extras/mini-os/include/x86/os.h	Wed Aug 23 11:11:27 2006 -0600
    14.3 @@ -0,0 +1,561 @@
    14.4 +/******************************************************************************
    14.5 + * os.h
    14.6 + * 
    14.7 + * random collection of macros and definition
    14.8 + */
    14.9 +
   14.10 +#ifndef _OS_H_
   14.11 +#define _OS_H_
   14.12 +
   14.13 +#if __GNUC__ == 2 && __GNUC_MINOR__ < 96
   14.14 +#define __builtin_expect(x, expected_value) (x)
   14.15 +#endif
   14.16 +#define unlikely(x)  __builtin_expect((x),0)
   14.17 +
   14.18 +#define smp_processor_id() 0
   14.19 +
   14.20 +
   14.21 +#ifndef __ASSEMBLY__
   14.22 +#include <types.h>
   14.23 +#include <hypervisor.h>
   14.24 +
   14.25 +extern void do_exit(void);
   14.26 +#define BUG do_exit
   14.27 +
   14.28 +#endif
   14.29 +#include <xen/xen.h>
   14.30 +
   14.31 +
   14.32 +#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0, 0))
   14.33 +
   14.34 +#define __KERNEL_CS  FLAT_KERNEL_CS
   14.35 +#define __KERNEL_DS  FLAT_KERNEL_DS
   14.36 +#define __KERNEL_SS  FLAT_KERNEL_SS
   14.37 +
   14.38 +#define TRAP_divide_error      0
   14.39 +#define TRAP_debug             1
   14.40 +#define TRAP_nmi               2
   14.41 +#define TRAP_int3              3
   14.42 +#define TRAP_overflow          4
   14.43 +#define TRAP_bounds            5
   14.44 +#define TRAP_invalid_op        6
   14.45 +#define TRAP_no_device         7
   14.46 +#define TRAP_double_fault      8
   14.47 +#define TRAP_copro_seg         9
   14.48 +#define TRAP_invalid_tss      10
   14.49 +#define TRAP_no_segment       11
   14.50 +#define TRAP_stack_error      12
   14.51 +#define TRAP_gp_fault         13
   14.52 +#define TRAP_page_fault       14
   14.53 +#define TRAP_spurious_int     15
   14.54 +#define TRAP_copro_error      16
   14.55 +#define TRAP_alignment_check  17
   14.56 +#define TRAP_machine_check    18
   14.57 +#define TRAP_simd_error       19
   14.58 +#define TRAP_deferred_nmi     31
   14.59 +
   14.60 +/* Everything below this point is not included by assembler (.S) files. */
   14.61 +#ifndef __ASSEMBLY__
   14.62 +
   14.63 +extern shared_info_t *HYPERVISOR_shared_info;
   14.64 +
   14.65 +void trap_init(void);
   14.66 +
   14.67 +
   14.68 +
   14.69 +/* 
   14.70 + * The use of 'barrier' in the following reflects their use as local-lock
   14.71 + * operations. Reentrancy must be prevented (e.g., __cli()) /before/ following
   14.72 + * critical operations are executed. All critical operations must complete
   14.73 + * /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also
   14.74 + * includes these barriers, for example.
   14.75 + */
   14.76 +
   14.77 +#define __cli()								\
   14.78 +do {									\
   14.79 +	vcpu_info_t *_vcpu;						\
   14.80 +	_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];	\
   14.81 +	_vcpu->evtchn_upcall_mask = 1;					\
   14.82 +	barrier();							\
   14.83 +} while (0)
   14.84 +
   14.85 +#define __sti()								\
   14.86 +do {									\
   14.87 +	vcpu_info_t *_vcpu;						\
   14.88 +	barrier();							\
   14.89 +	_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];	\
   14.90 +	_vcpu->evtchn_upcall_mask = 0;					\
   14.91 +	barrier(); /* unmask then check (avoid races) */		\
   14.92 +	if ( unlikely(_vcpu->evtchn_upcall_pending) )			\
   14.93 +		force_evtchn_callback();				\
   14.94 +} while (0)
   14.95 +
   14.96 +#define __save_flags(x)							\
   14.97 +do {									\
   14.98 +	vcpu_info_t *_vcpu;						\
   14.99 +	_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];	\
  14.100 +	(x) = _vcpu->evtchn_upcall_mask;				\
  14.101 +} while (0)
  14.102 +
  14.103 +#define __restore_flags(x)						\
  14.104 +do {									\
  14.105 +	vcpu_info_t *_vcpu;						\
  14.106 +	barrier();							\
  14.107 +	_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];	\
  14.108 +	if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {			\
  14.109 +		barrier(); /* unmask then check (avoid races) */	\
  14.110 +		if ( unlikely(_vcpu->evtchn_upcall_pending) )		\
  14.111 +			force_evtchn_callback();			\
  14.112 +	}\
  14.113 +} while (0)
  14.114 +
  14.115 +#define safe_halt()		((void)0)
  14.116 +
  14.117 +#define __save_and_cli(x)						\
  14.118 +do {									\
  14.119 +	vcpu_info_t *_vcpu;						\
  14.120 +	_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()];	\
  14.121 +	(x) = _vcpu->evtchn_upcall_mask;				\
  14.122 +	_vcpu->evtchn_upcall_mask = 1;					\
  14.123 +	barrier();							\
  14.124 +} while (0)
  14.125 +
  14.126 +#define local_irq_save(x)	__save_and_cli(x)
  14.127 +#define local_irq_restore(x)	__restore_flags(x)
  14.128 +#define local_save_flags(x)	__save_flags(x)
  14.129 +#define local_irq_disable()	__cli()
  14.130 +#define local_irq_enable()	__sti()
  14.131 +
  14.132 +#define irqs_disabled()			\
  14.133 +    HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].evtchn_upcall_mask
  14.134 +
  14.135 +/* This is a barrier for the compiler only, NOT the processor! */
  14.136 +#define barrier() __asm__ __volatile__("": : :"memory")
  14.137 +
  14.138 +#if defined(__i386__)
  14.139 +#define mb()    __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
  14.140 +#define rmb()   __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
  14.141 +#define wmb()	__asm__ __volatile__ ("": : :"memory")
  14.142 +#elif defined(__x86_64__)
  14.143 +#define mb()    __asm__ __volatile__ ("mfence":::"memory")
  14.144 +#define rmb()   __asm__ __volatile__ ("lfence":::"memory")
  14.145 +#define wmb()	__asm__ __volatile__ ("sfence" ::: "memory") /* From CONFIG_UNORDERED_IO (linux) */
  14.146 +#endif
  14.147 +
  14.148 +
  14.149 +#define LOCK_PREFIX ""
  14.150 +#define LOCK ""
  14.151 +#define ADDR (*(volatile long *) addr)
  14.152 +/*
  14.153 + * Make sure gcc doesn't try to be clever and move things around
  14.154 + * on us. We need to use _exactly_ the address the user gave us,
  14.155 + * not some alias that contains the same information.
  14.156 + */
  14.157 +typedef struct { volatile int counter; } atomic_t;
  14.158 +
  14.159 +
  14.160 +/************************** i386 *******************************/
  14.161 +#if defined (__i386__)
  14.162 +
  14.163 +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
  14.164 +struct __xchg_dummy { unsigned long a[100]; };
  14.165 +#define __xg(x) ((struct __xchg_dummy *)(x))
  14.166 +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
  14.167 +{
  14.168 +	switch (size) {
  14.169 +		case 1:
  14.170 +			__asm__ __volatile__("xchgb %b0,%1"
  14.171 +				:"=q" (x)
  14.172 +				:"m" (*__xg(ptr)), "0" (x)
  14.173 +				:"memory");
  14.174 +			break;
  14.175 +		case 2:
  14.176 +			__asm__ __volatile__("xchgw %w0,%1"
  14.177 +				:"=r" (x)
  14.178 +				:"m" (*__xg(ptr)), "0" (x)
  14.179 +				:"memory");
  14.180 +			break;
  14.181 +		case 4:
  14.182 +			__asm__ __volatile__("xchgl %0,%1"
  14.183 +				:"=r" (x)
  14.184 +				:"m" (*__xg(ptr)), "0" (x)
  14.185 +				:"memory");
  14.186 +			break;
  14.187 +	}
  14.188 +	return x;
  14.189 +}
  14.190 +
  14.191 +/**
  14.192 + * test_and_clear_bit - Clear a bit and return its old value
  14.193 + * @nr: Bit to clear
  14.194 + * @addr: Address to count from
  14.195 + *
  14.196 + * This operation is atomic and cannot be reordered.
  14.197 + * It can be reorderdered on other architectures other than x86.
  14.198 + * It also implies a memory barrier.
  14.199 + */
  14.200 +static inline int test_and_clear_bit(int nr, volatile unsigned long * addr)
  14.201 +{
  14.202 +	int oldbit;
  14.203 +
  14.204 +	__asm__ __volatile__( LOCK
  14.205 +		"btrl %2,%1\n\tsbbl %0,%0"
  14.206 +		:"=r" (oldbit),"=m" (ADDR)
  14.207 +		:"Ir" (nr) : "memory");
  14.208 +	return oldbit;
  14.209 +}
  14.210 +
  14.211 +static inline int constant_test_bit(int nr, const volatile unsigned long *addr)
  14.212 +{
  14.213 +	return ((1UL << (nr & 31)) & (addr[nr >> 5])) != 0;
  14.214 +}
  14.215 +
  14.216 +static inline int variable_test_bit(int nr, const volatile unsigned long * addr)
  14.217 +{
  14.218 +	int oldbit;
  14.219 +
  14.220 +	__asm__ __volatile__(
  14.221 +		"btl %2,%1\n\tsbbl %0,%0"
  14.222 +		:"=r" (oldbit)
  14.223 +		:"m" (ADDR),"Ir" (nr));
  14.224 +	return oldbit;
  14.225 +}
  14.226 +
  14.227 +#define test_bit(nr,addr) \
  14.228 +(__builtin_constant_p(nr) ? \
  14.229 + constant_test_bit((nr),(addr)) : \
  14.230 + variable_test_bit((nr),(addr)))
  14.231 +
  14.232 +/**
  14.233 + * set_bit - Atomically set a bit in memory
  14.234 + * @nr: the bit to set
  14.235 + * @addr: the address to start counting from
  14.236 + *
  14.237 + * This function is atomic and may not be reordered.  See __set_bit()
  14.238 + * if you do not require the atomic guarantees.
  14.239 + *
  14.240 + * Note: there are no guarantees that this function will not be reordered
  14.241 + * on non x86 architectures, so if you are writting portable code,
  14.242 + * make sure not to rely on its reordering guarantees.
  14.243 + *
  14.244 + * Note that @nr may be almost arbitrarily large; this function is not
  14.245 + * restricted to acting on a single-word quantity.
  14.246 + */
  14.247 +static inline void set_bit(int nr, volatile unsigned long * addr)
  14.248 +{
  14.249 +	__asm__ __volatile__( LOCK
  14.250 +		"btsl %1,%0"
  14.251 +		:"=m" (ADDR)
  14.252 +		:"Ir" (nr));
  14.253 +}
  14.254 +
  14.255 +/**
  14.256 + * clear_bit - Clears a bit in memory
  14.257 + * @nr: Bit to clear
  14.258 + * @addr: Address to start counting from
  14.259 + *
  14.260 + * clear_bit() is atomic and may not be reordered.  However, it does
  14.261 + * not contain a memory barrier, so if it is used for locking purposes,
  14.262 + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  14.263 + * in order to ensure changes are visible on other processors.
  14.264 + */
  14.265 +static inline void clear_bit(int nr, volatile unsigned long * addr)
  14.266 +{
  14.267 +	__asm__ __volatile__( LOCK
  14.268 +		"btrl %1,%0"
  14.269 +		:"=m" (ADDR)
  14.270 +		:"Ir" (nr));
  14.271 +}
  14.272 +
  14.273 +/**
  14.274 + * __ffs - find first bit in word.
  14.275 + * @word: The word to search
  14.276 + *
  14.277 + * Undefined if no bit exists, so code should check against 0 first.
  14.278 + */
  14.279 +static inline unsigned long __ffs(unsigned long word)
  14.280 +{
  14.281 +	__asm__("bsfl %1,%0"
  14.282 +		:"=r" (word)
  14.283 +		:"rm" (word));
  14.284 +	return word;
  14.285 +}
  14.286 +
  14.287 +
  14.288 +/*
  14.289 + * These have to be done with inline assembly: that way the bit-setting
  14.290 + * is guaranteed to be atomic. All bit operations return 0 if the bit
  14.291 + * was cleared before the operation and != 0 if it was not.
  14.292 + *
  14.293 + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  14.294 + */
  14.295 +#define ADDR (*(volatile long *) addr)
  14.296 +
  14.297 +#define rdtscll(val) \
  14.298 +     __asm__ __volatile__("rdtsc" : "=A" (val))
  14.299 +
  14.300 +
  14.301 +
  14.302 +#elif defined(__x86_64__)/* ifdef __i386__ */
  14.303 +/************************** x86_84 *******************************/
  14.304 +
  14.305 +#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
  14.306 +#define __xg(x) ((volatile long *)(x))
  14.307 +static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
  14.308 +{
  14.309 +	switch (size) {
  14.310 +		case 1:
  14.311 +			__asm__ __volatile__("xchgb %b0,%1"
  14.312 +				:"=q" (x)
  14.313 +				:"m" (*__xg(ptr)), "0" (x)
  14.314 +				:"memory");
  14.315 +			break;
  14.316 +		case 2:
  14.317 +			__asm__ __volatile__("xchgw %w0,%1"
  14.318 +				:"=r" (x)
  14.319 +				:"m" (*__xg(ptr)), "0" (x)
  14.320 +				:"memory");
  14.321 +			break;
  14.322 +		case 4:
  14.323 +			__asm__ __volatile__("xchgl %k0,%1"
  14.324 +				:"=r" (x)
  14.325 +				:"m" (*__xg(ptr)), "0" (x)
  14.326 +				:"memory");
  14.327 +			break;
  14.328 +		case 8:
  14.329 +			__asm__ __volatile__("xchgq %0,%1"
  14.330 +				:"=r" (x)
  14.331 +				:"m" (*__xg(ptr)), "0" (x)
  14.332 +				:"memory");
  14.333 +			break;
  14.334 +	}
  14.335 +	return x;
  14.336 +}
  14.337 +
  14.338 +/**
  14.339 + * test_and_clear_bit - Clear a bit and return its old value
  14.340 + * @nr: Bit to clear
  14.341 + * @addr: Address to count from
  14.342 + *
  14.343 + * This operation is atomic and cannot be reordered.  
  14.344 + * It also implies a memory barrier.
  14.345 + */
  14.346 +static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
  14.347 +{
  14.348 +	int oldbit;
  14.349 +
  14.350 +	__asm__ __volatile__( LOCK_PREFIX
  14.351 +		"btrl %2,%1\n\tsbbl %0,%0"
  14.352 +		:"=r" (oldbit),"=m" (ADDR)
  14.353 +		:"dIr" (nr) : "memory");
  14.354 +	return oldbit;
  14.355 +}
  14.356 +
  14.357 +static __inline__ int constant_test_bit(int nr, const volatile void * addr)
  14.358 +{
  14.359 +	return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
  14.360 +}
  14.361 +
  14.362 +static __inline__ int variable_test_bit(int nr, volatile const void * addr)
  14.363 +{
  14.364 +	int oldbit;
  14.365 +
  14.366 +	__asm__ __volatile__(
  14.367 +		"btl %2,%1\n\tsbbl %0,%0"
  14.368 +		:"=r" (oldbit)
  14.369 +		:"m" (ADDR),"dIr" (nr));
  14.370 +	return oldbit;
  14.371 +}
  14.372 +
  14.373 +#define test_bit(nr,addr) \
  14.374 +(__builtin_constant_p(nr) ? \
  14.375 + constant_test_bit((nr),(addr)) : \
  14.376 + variable_test_bit((nr),(addr)))
  14.377 +
  14.378 +
  14.379 +/**
  14.380 + * set_bit - Atomically set a bit in memory
  14.381 + * @nr: the bit to set
  14.382 + * @addr: the address to start counting from
  14.383 + *
  14.384 + * This function is atomic and may not be reordered.  See __set_bit()
  14.385 + * if you do not require the atomic guarantees.
  14.386 + * Note that @nr may be almost arbitrarily large; this function is not
  14.387 + * restricted to acting on a single-word quantity.
  14.388 + */
  14.389 +static __inline__ void set_bit(int nr, volatile void * addr)
  14.390 +{
  14.391 +	__asm__ __volatile__( LOCK_PREFIX
  14.392 +		"btsl %1,%0"
  14.393 +		:"=m" (ADDR)
  14.394 +		:"dIr" (nr) : "memory");
  14.395 +}
  14.396 +
  14.397 +/**
  14.398 + * clear_bit - Clears a bit in memory
  14.399 + * @nr: Bit to clear
  14.400 + * @addr: Address to start counting from
  14.401 + *
  14.402 + * clear_bit() is atomic and may not be reordered.  However, it does
  14.403 + * not contain a memory barrier, so if it is used for locking purposes,
  14.404 + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  14.405 + * in order to ensure changes are visible on other processors.
  14.406 + */
  14.407 +static __inline__ void clear_bit(int nr, volatile void * addr)
  14.408 +{
  14.409 +	__asm__ __volatile__( LOCK_PREFIX
  14.410 +		"btrl %1,%0"
  14.411 +		:"=m" (ADDR)
  14.412 +		:"dIr" (nr));
  14.413 +}
  14.414 +
  14.415 +/**
  14.416 + * __ffs - find first bit in word.
  14.417 + * @word: The word to search
  14.418 + *
  14.419 + * Undefined if no bit exists, so code should check against 0 first.
  14.420 + */
  14.421 +static __inline__ unsigned long __ffs(unsigned long word)
  14.422 +{
  14.423 +	__asm__("bsfq %1,%0"
  14.424 +		:"=r" (word)
  14.425 +		:"rm" (word));
  14.426 +	return word;
  14.427 +}
  14.428 +
  14.429 +#define ADDR (*(volatile long *) addr)
  14.430 +
  14.431 +#define rdtscll(val) do { \
  14.432 +     unsigned int __a,__d; \
  14.433 +     asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
  14.434 +     (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
  14.435 +} while(0)
  14.436 +
  14.437 +#define wrmsr(msr,val1,val2) \
  14.438 +      __asm__ __volatile__("wrmsr" \
  14.439 +                           : /* no outputs */ \
  14.440 +                           : "c" (msr), "a" (val1), "d" (val2))
  14.441 +
  14.442 +#define wrmsrl(msr,val) wrmsr(msr,(u32)((u64)(val)),((u64)(val))>>32)
  14.443 +
  14.444 +
  14.445 +#else /* ifdef __x86_64__ */
  14.446 +#error "Unsupported architecture"
  14.447 +#endif
  14.448 +
  14.449 +
  14.450 +/********************* common i386 and x86_64  ****************************/
  14.451 +struct __synch_xchg_dummy { unsigned long a[100]; };
  14.452 +#define __synch_xg(x) ((struct __synch_xchg_dummy *)(x))
  14.453 +
  14.454 +#define synch_cmpxchg(ptr, old, new) \
  14.455 +((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\
  14.456 +                                     (unsigned long)(old), \
  14.457 +                                     (unsigned long)(new), \
  14.458 +                                     sizeof(*(ptr))))
  14.459 +
  14.460 +static inline unsigned long __synch_cmpxchg(volatile void *ptr,
  14.461 +        unsigned long old,
  14.462 +        unsigned long new, int size)
  14.463 +{
  14.464 +    unsigned long prev;
  14.465 +    switch (size) {
  14.466 +        case 1:
  14.467 +            __asm__ __volatile__("lock; cmpxchgb %b1,%2"
  14.468 +                    : "=a"(prev)
  14.469 +                    : "q"(new), "m"(*__synch_xg(ptr)),
  14.470 +                    "0"(old)
  14.471 +                    : "memory");
  14.472 +            return prev;
  14.473 +        case 2:
  14.474 +            __asm__ __volatile__("lock; cmpxchgw %w1,%2"
  14.475 +                    : "=a"(prev)
  14.476 +                    : "r"(new), "m"(*__synch_xg(ptr)),
  14.477 +                    "0"(old)
  14.478 +                    : "memory");
  14.479 +            return prev;
  14.480 +#ifdef __x86_64__
  14.481 +        case 4:
  14.482 +            __asm__ __volatile__("lock; cmpxchgl %k1,%2"
  14.483 +                    : "=a"(prev)
  14.484 +                    : "r"(new), "m"(*__synch_xg(ptr)),
  14.485 +                    "0"(old)
  14.486 +                    : "memory");
  14.487 +            return prev;
  14.488 +        case 8:
  14.489 +            __asm__ __volatile__("lock; cmpxchgq %1,%2"
  14.490 +                    : "=a"(prev)
  14.491 +                    : "r"(new), "m"(*__synch_xg(ptr)),
  14.492 +                    "0"(old)
  14.493 +                    : "memory");
  14.494 +            return prev;
  14.495 +#else
  14.496 +        case 4:
  14.497 +            __asm__ __volatile__("lock; cmpxchgl %1,%2"
  14.498 +                    : "=a"(prev)
  14.499 +                    : "r"(new), "m"(*__synch_xg(ptr)),
  14.500 +                    "0"(old)
  14.501 +                    : "memory");
  14.502 +            return prev;
  14.503 +#endif
  14.504 +    }
  14.505 +    return old;
  14.506 +}
  14.507 +
  14.508 +
  14.509 +static __inline__ void synch_set_bit(int nr, volatile void * addr)
  14.510 +{
  14.511 +    __asm__ __volatile__ ( 
  14.512 +        "lock btsl %1,%0"
  14.513 +        : "=m" (ADDR) : "Ir" (nr) : "memory" );
  14.514 +}
  14.515 +
  14.516 +static __inline__ void synch_clear_bit(int nr, volatile void * addr)
  14.517 +{
  14.518 +    __asm__ __volatile__ (
  14.519 +        "lock btrl %1,%0"
  14.520 +        : "=m" (ADDR) : "Ir" (nr) : "memory" );
  14.521 +}
  14.522 +
  14.523 +static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr)
  14.524 +{
  14.525 +    int oldbit;
  14.526 +    __asm__ __volatile__ (
  14.527 +        "lock btsl %2,%1\n\tsbbl %0,%0"
  14.528 +        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
  14.529 +    return oldbit;
  14.530 +}
  14.531 +
  14.532 +static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
  14.533 +{
  14.534 +    int oldbit;
  14.535 +    __asm__ __volatile__ (
  14.536 +        "lock btrl %2,%1\n\tsbbl %0,%0"
  14.537 +        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
  14.538 +    return oldbit;
  14.539 +}
  14.540 +
  14.541 +static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
  14.542 +{
  14.543 +    return ((1UL << (nr & 31)) & 
  14.544 +            (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
  14.545 +}
  14.546 +
  14.547 +static __inline__ int synch_var_test_bit(int nr, volatile void * addr)
  14.548 +{
  14.549 +    int oldbit;
  14.550 +    __asm__ __volatile__ (
  14.551 +        "btl %2,%1\n\tsbbl %0,%0"
  14.552 +        : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) );
  14.553 +    return oldbit;
  14.554 +}
  14.555 +
  14.556 +#define synch_test_bit(nr,addr) \
  14.557 +(__builtin_constant_p(nr) ? \
  14.558 + synch_const_test_bit((nr),(addr)) : \
  14.559 + synch_var_test_bit((nr),(addr)))
  14.560 +
  14.561 +
  14.562 +
  14.563 +#endif /* not assembly */
  14.564 +#endif /* _OS_H_ */
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/extras/mini-os/include/x86/spinlock.h	Wed Aug 23 11:11:27 2006 -0600
    15.3 @@ -0,0 +1,121 @@
    15.4 +#ifndef __ASM_SPINLOCK_H
    15.5 +#define __ASM_SPINLOCK_H
    15.6 +
    15.7 +#include <lib.h>
    15.8 +
    15.9 +/*
   15.10 + * Your basic SMP spinlocks, allowing only a single CPU anywhere
   15.11 + */
   15.12 +
   15.13 +typedef struct {
   15.14 +	volatile unsigned int slock;
   15.15 +} spinlock_t;
   15.16 +
   15.17 +#define SPINLOCK_MAGIC	0xdead4ead
   15.18 +
   15.19 +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 }
   15.20 +
   15.21 +#define spin_lock_init(x)	do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
   15.22 +
   15.23 +/*
   15.24 + * Simple spin lock operations.  There are two variants, one clears IRQ's
   15.25 + * on the local processor, one does not.
   15.26 + *
   15.27 + * We make no fairness assumptions. They have a cost.
   15.28 + */
   15.29 +
   15.30 +#define spin_is_locked(x)	(*(volatile signed char *)(&(x)->slock) <= 0)
   15.31 +#define spin_unlock_wait(x)	do { barrier(); } while(spin_is_locked(x))
   15.32 +
   15.33 +#define spin_lock_string \
   15.34 +        "1:\n" \
   15.35 +	LOCK \
   15.36 +	"decb %0\n\t" \
   15.37 +	"jns 3f\n" \
   15.38 +	"2:\t" \
   15.39 +	"rep;nop\n\t" \
   15.40 +	"cmpb $0,%0\n\t" \
   15.41 +	"jle 2b\n\t" \
   15.42 +	"jmp 1b\n" \
   15.43 +	"3:\n\t"
   15.44 +
   15.45 +#define spin_lock_string_flags \
   15.46 +        "1:\n" \
   15.47 +	LOCK \
   15.48 +	"decb %0\n\t" \
   15.49 +	"jns 4f\n\t" \
   15.50 +	"2:\t" \
   15.51 +	"testl $0x200, %1\n\t" \
   15.52 +	"jz 3f\n\t" \
   15.53 +	"#sti\n\t" \
   15.54 +	"3:\t" \
   15.55 +	"rep;nop\n\t" \
   15.56 +	"cmpb $0, %0\n\t" \
   15.57 +	"jle 3b\n\t" \
   15.58 +	"#cli\n\t" \
   15.59 +	"jmp 1b\n" \
   15.60 +	"4:\n\t"
   15.61 +
   15.62 +/*
   15.63 + * This works. Despite all the confusion.
   15.64 + * (except on PPro SMP or if we are using OOSTORE)
   15.65 + * (PPro errata 66, 92)
   15.66 + */
   15.67 +
   15.68 +#define spin_unlock_string \
   15.69 +	"xchgb %b0, %1" \
   15.70 +		:"=q" (oldval), "=m" (lock->slock) \
   15.71 +		:"0" (oldval) : "memory"
   15.72 +
   15.73 +static inline void _raw_spin_unlock(spinlock_t *lock)
   15.74 +{
   15.75 +	char oldval = 1;
   15.76 +	__asm__ __volatile__(
   15.77 +		spin_unlock_string
   15.78 +	);
   15.79 +}
   15.80 +
   15.81 +static inline int _raw_spin_trylock(spinlock_t *lock)
   15.82 +{
   15.83 +	char oldval;
   15.84 +	__asm__ __volatile__(
   15.85 +		"xchgb %b0,%1\n"
   15.86 +		:"=q" (oldval), "=m" (lock->slock)
   15.87 +		:"0" (0) : "memory");
   15.88 +	return oldval > 0;
   15.89 +}
   15.90 +
   15.91 +static inline void _raw_spin_lock(spinlock_t *lock)
   15.92 +{
   15.93 +	__asm__ __volatile__(
   15.94 +		spin_lock_string
   15.95 +		:"=m" (lock->slock) : : "memory");
   15.96 +}
   15.97 +
   15.98 +static inline void _raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
   15.99 +{
  15.100 +	__asm__ __volatile__(
  15.101 +		spin_lock_string_flags
  15.102 +		:"=m" (lock->slock) : "r" (flags) : "memory");
  15.103 +}
  15.104 +
  15.105 +#define _spin_trylock(lock)     ({_raw_spin_trylock(lock) ? \
  15.106 +                                1 : ({ 0;});})
  15.107 +
  15.108 +#define _spin_lock(lock)        \
  15.109 +do {                            \
  15.110 +        _raw_spin_lock(lock);   \
  15.111 +} while(0)
  15.112 +
  15.113 +#define _spin_unlock(lock)      \
  15.114 +do {                            \
  15.115 +        _raw_spin_unlock(lock); \
  15.116 +} while (0)
  15.117 +
  15.118 +
  15.119 +#define spin_lock(lock)       _spin_lock(lock)
  15.120 +#define spin_unlock(lock)       _spin_unlock(lock)
  15.121 +
  15.122 +#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
  15.123 +
  15.124 +#endif
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/extras/mini-os/include/x86/traps.h	Wed Aug 23 11:11:27 2006 -0600
    16.3 @@ -0,0 +1,73 @@
    16.4 +/* 
    16.5 + ****************************************************************************
    16.6 + * (C) 2005 - Grzegorz Milos - Intel Reseach Cambridge
    16.7 + ****************************************************************************
    16.8 + *
    16.9 + *        File: traps.h
   16.10 + *      Author: Grzegorz Milos (gm281@cam.ac.uk)
   16.11 + *              
   16.12 + *        Date: Jun 2005
   16.13 + * 
   16.14 + * Environment: Xen Minimal OS
   16.15 + * Description: Deals with traps
   16.16 + *
   16.17 + ****************************************************************************
   16.18 + */
   16.19 +
   16.20 +#ifndef _TRAPS_H_
   16.21 +#define _TRAPS_H_
   16.22 +
   16.23 +#ifdef __i386__
   16.24 +struct pt_regs {
   16.25 +	long ebx;
   16.26 +	long ecx;
   16.27 +	long edx;
   16.28 +	long esi;
   16.29 +	long edi;
   16.30 +	long ebp;
   16.31 +	long eax;
   16.32 +	int  xds;
   16.33 +	int  xes;
   16.34 +	long orig_eax;
   16.35 +	long eip;
   16.36 +	int  xcs;
   16.37 +	long eflags;
   16.38 +	long esp;
   16.39 +	int  xss;
   16.40 +};
   16.41 +#elif __x86_64__
   16.42 +
   16.43 +struct pt_regs {
   16.44 +	unsigned long r15;
   16.45 +	unsigned long r14;
   16.46 +	unsigned long r13;
   16.47 +	unsigned long r12;
   16.48 +	unsigned long rbp;
   16.49 +	unsigned long rbx;
   16.50 +/* arguments: non interrupts/non tracing syscalls only save upto here*/
   16.51 + 	unsigned long r11;
   16.52 +	unsigned long r10;	
   16.53 +	unsigned long r9;
   16.54 +	unsigned long r8;
   16.55 +	unsigned long rax;
   16.56 +	unsigned long rcx;
   16.57 +	unsigned long rdx;
   16.58 +	unsigned long rsi;
   16.59 +	unsigned long rdi;
   16.60 +	unsigned long orig_rax;
   16.61 +/* end of arguments */ 	
   16.62 +/* cpu exception frame or undefined */
   16.63 +	unsigned long rip;
   16.64 +	unsigned long cs;
   16.65 +	unsigned long eflags; 
   16.66 +	unsigned long rsp; 
   16.67 +	unsigned long ss;
   16.68 +/* top of stack page */ 
   16.69 +};
   16.70 +
   16.71 +
   16.72 +#endif
   16.73 +
   16.74 +void dump_regs(struct pt_regs *regs);
   16.75 +
   16.76 +#endif /* _TRAPS_H_ */
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/extras/mini-os/include/x86/x86_32/hypercall-x86_32.h	Wed Aug 23 11:11:27 2006 -0600
    17.3 @@ -0,0 +1,326 @@
    17.4 +/******************************************************************************
    17.5 + * hypercall-x86_32.h
    17.6 + * 
    17.7 + * Copied from XenLinux.
    17.8 + * 
    17.9 + * Copyright (c) 2002-2004, K A Fraser
   17.10 + * 
   17.11 + * This file may be distributed separately from the Linux kernel, or
   17.12 + * incorporated into other software packages, subject to the following license:
   17.13 + * 
   17.14 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   17.15 + * of this source file (the "Software"), to deal in the Software without
   17.16 + * restriction, including without limitation the rights to use, copy, modify,
   17.17 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   17.18 + * and to permit persons to whom the Software is furnished to do so, subject to
   17.19 + * the following conditions:
   17.20 + * 
   17.21 + * The above copyright notice and this permission notice shall be included in
   17.22 + * all copies or substantial portions of the Software.
   17.23 + * 
   17.24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   17.25 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17.26 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   17.27 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   17.28 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   17.29 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   17.30 + * IN THE SOFTWARE.
   17.31 + */
   17.32 +
   17.33 +#ifndef __HYPERCALL_X86_32_H__
   17.34 +#define __HYPERCALL_X86_32_H__
   17.35 +
   17.36 +#include <xen/xen.h>
   17.37 +#include <xen/sched.h>
   17.38 +#include <xen/nmi.h>
   17.39 +#include <mm.h>
   17.40 +
   17.41 +#define __STR(x) #x
   17.42 +#define STR(x) __STR(x)
   17.43 +
   17.44 +extern char hypercall_page[PAGE_SIZE];
   17.45 +
   17.46 +#define _hypercall0(type, name)			\
   17.47 +({						\
   17.48 +	long __res;				\
   17.49 +	asm volatile (				\
   17.50 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   17.51 +		: "=a" (__res)			\
   17.52 +		:				\
   17.53 +		: "memory" );			\
   17.54 +	(type)__res;				\
   17.55 +})
   17.56 +
   17.57 +#define _hypercall1(type, name, a1)				\
   17.58 +({								\
   17.59 +	long __res, __ign1;					\
   17.60 +	asm volatile (						\
   17.61 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   17.62 +		: "=a" (__res), "=b" (__ign1)			\
   17.63 +		: "1" ((long)(a1))				\
   17.64 +		: "memory" );					\
   17.65 +	(type)__res;						\
   17.66 +})
   17.67 +
   17.68 +#define _hypercall2(type, name, a1, a2)				\
   17.69 +({								\
   17.70 +	long __res, __ign1, __ign2;				\
   17.71 +	asm volatile (						\
   17.72 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   17.73 +		: "=a" (__res), "=b" (__ign1), "=c" (__ign2)	\
   17.74 +		: "1" ((long)(a1)), "2" ((long)(a2))		\
   17.75 +		: "memory" );					\
   17.76 +	(type)__res;						\
   17.77 +})
   17.78 +
   17.79 +#define _hypercall3(type, name, a1, a2, a3)			\
   17.80 +({								\
   17.81 +	long __res, __ign1, __ign2, __ign3;			\
   17.82 +	asm volatile (						\
   17.83 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   17.84 +		: "=a" (__res), "=b" (__ign1), "=c" (__ign2), 	\
   17.85 +		"=d" (__ign3)					\
   17.86 +		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   17.87 +		"3" ((long)(a3))				\
   17.88 +		: "memory" );					\
   17.89 +	(type)__res;						\
   17.90 +})
   17.91 +
   17.92 +#define _hypercall4(type, name, a1, a2, a3, a4)			\
   17.93 +({								\
   17.94 +	long __res, __ign1, __ign2, __ign3, __ign4;		\
   17.95 +	asm volatile (						\
   17.96 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   17.97 +		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),	\
   17.98 +		"=d" (__ign3), "=S" (__ign4)			\
   17.99 +		: "1" ((long)(a1)), "2" ((long)(a2)),		\
  17.100 +		"3" ((long)(a3)), "4" ((long)(a4))		\
  17.101 +		: "memory" );					\
  17.102 +	(type)__res;						\
  17.103 +})
  17.104 +
  17.105 +#define _hypercall5(type, name, a1, a2, a3, a4, a5)		\
  17.106 +({								\
  17.107 +	long __res, __ign1, __ign2, __ign3, __ign4, __ign5;	\
  17.108 +	asm volatile (						\
  17.109 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
  17.110 +		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),	\
  17.111 +		"=d" (__ign3), "=S" (__ign4), "=D" (__ign5)	\
  17.112 +		: "1" ((long)(a1)), "2" ((long)(a2)),		\
  17.113 +		"3" ((long)(a3)), "4" ((long)(a4)),		\
  17.114 +		"5" ((long)(a5))				\
  17.115 +		: "memory" );					\
  17.116 +	(type)__res;						\
  17.117 +})
  17.118 +
  17.119 +static inline int
  17.120 +HYPERVISOR_set_trap_table(
  17.121 +	trap_info_t *table)
  17.122 +{
  17.123 +	return _hypercall1(int, set_trap_table, table);
  17.124 +}
  17.125 +
  17.126 +static inline int
  17.127 +HYPERVISOR_mmu_update(
  17.128 +	mmu_update_t *req, int count, int *success_count, domid_t domid)
  17.129 +{
  17.130 +	return _hypercall4(int, mmu_update, req, count, success_count, domid);
  17.131 +}
  17.132 +
  17.133 +static inline int
  17.134 +HYPERVISOR_mmuext_op(
  17.135 +	struct mmuext_op *op, int count, int *success_count, domid_t domid)
  17.136 +{
  17.137 +	return _hypercall4(int, mmuext_op, op, count, success_count, domid);
  17.138 +}
  17.139 +
  17.140 +static inline int
  17.141 +HYPERVISOR_set_gdt(
  17.142 +	unsigned long *frame_list, int entries)
  17.143 +{
  17.144 +	return _hypercall2(int, set_gdt, frame_list, entries);
  17.145 +}
  17.146 +
  17.147 +static inline int
  17.148 +HYPERVISOR_stack_switch(
  17.149 +	unsigned long ss, unsigned long esp)
  17.150 +{
  17.151 +	return _hypercall2(int, stack_switch, ss, esp);
  17.152 +}
  17.153 +
  17.154 +static inline int
  17.155 +HYPERVISOR_set_callbacks(
  17.156 +	unsigned long event_selector, unsigned long event_address,
  17.157 +	unsigned long failsafe_selector, unsigned long failsafe_address)
  17.158 +{
  17.159 +	return _hypercall4(int, set_callbacks,
  17.160 +			   event_selector, event_address,
  17.161 +			   failsafe_selector, failsafe_address);
  17.162 +}
  17.163 +
  17.164 +static inline int
  17.165 +HYPERVISOR_fpu_taskswitch(
  17.166 +	int set)
  17.167 +{
  17.168 +	return _hypercall1(int, fpu_taskswitch, set);
  17.169 +}
  17.170 +
  17.171 +static inline int
  17.172 +HYPERVISOR_sched_op(
  17.173 +	int cmd, unsigned long arg)
  17.174 +{
  17.175 +	return _hypercall2(int, sched_op, cmd, arg);
  17.176 +}
  17.177 +
  17.178 +static inline long
  17.179 +HYPERVISOR_set_timer_op(
  17.180 +	u64 timeout)
  17.181 +{
  17.182 +	unsigned long timeout_hi = (unsigned long)(timeout>>32);
  17.183 +	unsigned long timeout_lo = (unsigned long)timeout;
  17.184 +	return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
  17.185 +}
  17.186 +
  17.187 +static inline int
  17.188 +HYPERVISOR_dom0_op(
  17.189 +	dom0_op_t *dom0_op)
  17.190 +{
  17.191 +	dom0_op->interface_version = DOM0_INTERFACE_VERSION;
  17.192 +	return _hypercall1(int, dom0_op, dom0_op);
  17.193 +}
  17.194 +
  17.195 +static inline int
  17.196 +HYPERVISOR_set_debugreg(
  17.197 +	int reg, unsigned long value)
  17.198 +{
  17.199 +	return _hypercall2(int, set_debugreg, reg, value);
  17.200 +}
  17.201 +
  17.202 +static inline unsigned long
  17.203 +HYPERVISOR_get_debugreg(
  17.204 +	int reg)
  17.205 +{
  17.206 +	return _hypercall1(unsigned long, get_debugreg, reg);
  17.207 +}
  17.208 +
  17.209 +static inline int
  17.210 +HYPERVISOR_update_descriptor(
  17.211 +	u64 ma, u64 desc)
  17.212 +{
  17.213 +	return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
  17.214 +}
  17.215 +
  17.216 +static inline int
  17.217 +HYPERVISOR_memory_op(
  17.218 +	unsigned int cmd, void *arg)
  17.219 +{
  17.220 +	return _hypercall2(int, memory_op, cmd, arg);
  17.221 +}
  17.222 +
  17.223 +static inline int
  17.224 +HYPERVISOR_multicall(
  17.225 +	void *call_list, int nr_calls)
  17.226 +{
  17.227 +	return _hypercall2(int, multicall, call_list, nr_calls);
  17.228 +}
  17.229 +
  17.230 +static inline int
  17.231 +HYPERVISOR_update_va_mapping(
  17.232 +	unsigned long va, pte_t new_val, unsigned long flags)
  17.233 +{
  17.234 +	unsigned long pte_hi = 0;
  17.235 +#ifdef CONFIG_X86_PAE
  17.236 +	pte_hi = new_val.pte_high;
  17.237 +#endif
  17.238 +	return _hypercall4(int, update_va_mapping, va,
  17.239 +			   new_val.pte_low, pte_hi, flags);
  17.240 +}
  17.241 +
  17.242 +static inline int
  17.243 +HYPERVISOR_event_channel_op(
  17.244 +	void *op)
  17.245 +{
  17.246 +	return _hypercall1(int, event_channel_op, op);
  17.247 +}
  17.248 +
  17.249 +static inline int
  17.250 +HYPERVISOR_xen_version(
  17.251 +	int cmd, void *arg)
  17.252 +{
  17.253 +	return _hypercall2(int, xen_version, cmd, arg);
  17.254 +}
  17.255 +
  17.256 +static inline int
  17.257 +HYPERVISOR_console_io(
  17.258 +	int cmd, int count, char *str)
  17.259 +{
  17.260 +	return _hypercall3(int, console_io, cmd, count, str);
  17.261 +}
  17.262 +
  17.263 +static inline int
  17.264 +HYPERVISOR_physdev_op(
  17.265 +	void *physdev_op)
  17.266 +{
  17.267 +	return _hypercall1(int, physdev_op, physdev_op);
  17.268 +}
  17.269 +
  17.270 +static inline int
  17.271 +HYPERVISOR_grant_table_op(
  17.272 +	unsigned int cmd, void *uop, unsigned int count)
  17.273 +{
  17.274 +	return _hypercall3(int, grant_table_op, cmd, uop, count);
  17.275 +}
  17.276 +
  17.277 +static inline int
  17.278 +HYPERVISOR_update_va_mapping_otherdomain(
  17.279 +	unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
  17.280 +{
  17.281 +	unsigned long pte_hi = 0;
  17.282 +#ifdef CONFIG_X86_PAE
  17.283 +	pte_hi = new_val.pte_high;
  17.284 +#endif
  17.285 +	return _hypercall5(int, update_va_mapping_otherdomain, va,
  17.286 +			   new_val.pte_low, pte_hi, flags, domid);
  17.287 +}
  17.288 +
  17.289 +static inline int
  17.290 +HYPERVISOR_vm_assist(
  17.291 +	unsigned int cmd, unsigned int type)
  17.292 +{
  17.293 +	return _hypercall2(int, vm_assist, cmd, type);
  17.294 +}
  17.295 +
  17.296 +static inline int
  17.297 +HYPERVISOR_vcpu_op(
  17.298 +	int cmd, int vcpuid, void *extra_args)
  17.299 +{
  17.300 +	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
  17.301 +}
  17.302 +
  17.303 +static inline int
  17.304 +HYPERVISOR_suspend(
  17.305 +	unsigned long srec)
  17.306 +{
  17.307 +	return _hypercall3(int, sched_op, SCHEDOP_shutdown,
  17.308 +			   SHUTDOWN_suspend, srec);
  17.309 +}
  17.310 +
  17.311 +static inline int
  17.312 +HYPERVISOR_nmi_op(
  17.313 +	unsigned long op,
  17.314 +	unsigned long arg)
  17.315 +{
  17.316 +	return _hypercall2(int, nmi_op, op, arg);
  17.317 +}
  17.318 +
  17.319 +#endif /* __HYPERCALL_X86_32_H__ */
  17.320 +
  17.321 +/*
  17.322 + * Local variables:
  17.323 + *  c-file-style: "linux"
  17.324 + *  indent-tabs-mode: t
  17.325 + *  c-indent-level: 8
  17.326 + *  c-basic-offset: 8
  17.327 + *  tab-width: 8
  17.328 + * End:
  17.329 + */
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/extras/mini-os/include/x86/x86_64/hypercall-x86_64.h	Wed Aug 23 11:11:27 2006 -0600
    18.3 @@ -0,0 +1,326 @@
    18.4 +/******************************************************************************
    18.5 + * hypercall-x86_64.h
    18.6 + * 
    18.7 + * Copied from XenLinux.
    18.8 + * 
    18.9 + * Copyright (c) 2002-2004, K A Fraser
   18.10 + * 
   18.11 + * 64-bit updates:
   18.12 + *   Benjamin Liu <benjamin.liu@intel.com>
   18.13 + *   Jun Nakajima <jun.nakajima@intel.com>
   18.14 + * 
   18.15 + * This file may be distributed separately from the Linux kernel, or
   18.16 + * incorporated into other software packages, subject to the following license:
   18.17 + * 
   18.18 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   18.19 + * of this source file (the "Software"), to deal in the Software without
   18.20 + * restriction, including without limitation the rights to use, copy, modify,
   18.21 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   18.22 + * and to permit persons to whom the Software is furnished to do so, subject to
   18.23 + * the following conditions:
   18.24 + * 
   18.25 + * The above copyright notice and this permission notice shall be included in
   18.26 + * all copies or substantial portions of the Software.
   18.27 + * 
   18.28 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   18.29 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   18.30 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   18.31 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   18.32 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   18.33 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   18.34 + * IN THE SOFTWARE.
   18.35 + */
   18.36 +
   18.37 +#ifndef __HYPERCALL_X86_64_H__
   18.38 +#define __HYPERCALL_X86_64_H__
   18.39 +
   18.40 +#include <xen/xen.h>
   18.41 +#include <xen/sched.h>
   18.42 +#include <mm.h>
   18.43 +
   18.44 +#define __STR(x) #x
   18.45 +#define STR(x) __STR(x)
   18.46 +
   18.47 +extern char hypercall_page[PAGE_SIZE];
   18.48 +
   18.49 +#define _hypercall0(type, name)			\
   18.50 +({						\
   18.51 +	long __res;				\
   18.52 +	asm volatile (				\
   18.53 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   18.54 +		: "=a" (__res)			\
   18.55 +		:				\
   18.56 +		: "memory" );			\
   18.57 +	(type)__res;				\
   18.58 +})
   18.59 +
   18.60 +#define _hypercall1(type, name, a1)				\
   18.61 +({								\
   18.62 +	long __res, __ign1;					\
   18.63 +	asm volatile (						\
   18.64 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   18.65 +		: "=a" (__res), "=D" (__ign1)			\
   18.66 +		: "1" ((long)(a1))				\
   18.67 +		: "memory" );					\
   18.68 +	(type)__res;						\
   18.69 +})
   18.70 +
   18.71 +#define _hypercall2(type, name, a1, a2)				\
   18.72 +({								\
   18.73 +	long __res, __ign1, __ign2;				\
   18.74 +	asm volatile (						\
   18.75 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   18.76 +		: "=a" (__res), "=D" (__ign1), "=S" (__ign2)	\
   18.77 +		: "1" ((long)(a1)), "2" ((long)(a2))		\
   18.78 +		: "memory" );					\
   18.79 +	(type)__res;						\
   18.80 +})
   18.81 +
   18.82 +#define _hypercall3(type, name, a1, a2, a3)			\
   18.83 +({								\
   18.84 +	long __res, __ign1, __ign2, __ign3;			\
   18.85 +	asm volatile (						\
   18.86 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   18.87 +		: "=a" (__res), "=D" (__ign1), "=S" (__ign2), 	\
   18.88 +		"=d" (__ign3)					\
   18.89 +		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   18.90 +		"3" ((long)(a3))				\
   18.91 +		: "memory" );					\
   18.92 +	(type)__res;						\
   18.93 +})
   18.94 +
   18.95 +#define _hypercall4(type, name, a1, a2, a3, a4)			\
   18.96 +({								\
   18.97 +	long __res, __ign1, __ign2, __ign3;			\
   18.98 +	asm volatile (						\
   18.99 +		"movq %7,%%r10; "				\
  18.100 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
  18.101 +		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
  18.102 +		"=d" (__ign3)					\
  18.103 +		: "1" ((long)(a1)), "2" ((long)(a2)),		\
  18.104 +		"3" ((long)(a3)), "g" ((long)(a4))		\
  18.105 +		: "memory", "r10" );				\
  18.106 +	(type)__res;						\
  18.107 +})
  18.108 +
  18.109 +#define _hypercall5(type, name, a1, a2, a3, a4, a5)		\
  18.110 +({								\
  18.111 +	long __res, __ign1, __ign2, __ign3;			\
  18.112 +	asm volatile (						\
  18.113 +		"movq %7,%%r10; movq %8,%%r8; "			\
  18.114 +		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
  18.115 +		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
  18.116 +		"=d" (__ign3)					\
  18.117 +		: "1" ((long)(a1)), "2" ((long)(a2)),		\
  18.118 +		"3" ((long)(a3)), "g" ((long)(a4)),		\
  18.119 +		"g" ((long)(a5))				\
  18.120 +		: "memory", "r10", "r8" );			\
  18.121 +	(type)__res;						\
  18.122 +})
  18.123 +
  18.124 +static inline int
  18.125 +HYPERVISOR_set_trap_table(
  18.126 +	trap_info_t *table)
  18.127 +{
  18.128 +	return _hypercall1(int, set_trap_table, table);
  18.129 +}
  18.130 +
  18.131 +static inline int
  18.132 +HYPERVISOR_mmu_update(
  18.133 +	mmu_update_t *req, int count, int *success_count, domid_t domid)
  18.134 +{
  18.135 +	return _hypercall4(int, mmu_update, req, count, success_count, domid);
  18.136 +}
  18.137 +
  18.138 +static inline int
  18.139 +HYPERVISOR_mmuext_op(
  18.140 +	struct mmuext_op *op, int count, int *success_count, domid_t domid)
  18.141 +{
  18.142 +	return _hypercall4(int, mmuext_op, op, count, success_count, domid);
  18.143 +}
  18.144 +
  18.145 +static inline int
  18.146 +HYPERVISOR_set_gdt(
  18.147 +	unsigned long *frame_list, int entries)
  18.148 +{
  18.149 +	return _hypercall2(int, set_gdt, frame_list, entries);
  18.150 +}
  18.151 +
  18.152 +static inline int
  18.153 +HYPERVISOR_stack_switch(
  18.154 +	unsigned long ss, unsigned long esp)
  18.155 +{
  18.156 +	return _hypercall2(int, stack_switch, ss, esp);
  18.157 +}
  18.158 +
  18.159 +static inline int
  18.160 +HYPERVISOR_set_callbacks(
  18.161 +	unsigned long event_address, unsigned long failsafe_address, 
  18.162 +	unsigned long syscall_address)
  18.163 +{
  18.164 +	return _hypercall3(int, set_callbacks,
  18.165 +			   event_address, failsafe_address, syscall_address);
  18.166 +}
  18.167 +
  18.168 +static inline int
  18.169 +HYPERVISOR_fpu_taskswitch(
  18.170 +	int set)
  18.171 +{
  18.172 +	return _hypercall1(int, fpu_taskswitch, set);
  18.173 +}
  18.174 +
  18.175 +static inline int
  18.176 +HYPERVISOR_sched_op(
  18.177 +	int cmd, unsigned long arg)
  18.178 +{
  18.179 +	return _hypercall2(int, sched_op, cmd, arg);
  18.180 +}
  18.181 +
  18.182 +static inline long
  18.183 +HYPERVISOR_set_timer_op(
  18.184 +	u64 timeout)
  18.185 +{
  18.186 +	return _hypercall1(long, set_timer_op, timeout);
  18.187 +}
  18.188 +
  18.189 +static inline int
  18.190 +HYPERVISOR_dom0_op(
  18.191 +	dom0_op_t *dom0_op)
  18.192 +{
  18.193 +	dom0_op->interface_version = DOM0_INTERFACE_VERSION;
  18.194 +	return _hypercall1(int, dom0_op, dom0_op);
  18.195 +}
  18.196 +
  18.197 +static inline int
  18.198 +HYPERVISOR_set_debugreg(
  18.199 +	int reg, unsigned long value)
  18.200 +{
  18.201 +	return _hypercall2(int, set_debugreg, reg, value);
  18.202 +}
  18.203 +
  18.204 +static inline unsigned long
  18.205 +HYPERVISOR_get_debugreg(
  18.206 +	int reg)
  18.207 +{
  18.208 +	return _hypercall1(unsigned long, get_debugreg, reg);
  18.209 +}
  18.210 +
  18.211 +static inline int
  18.212 +HYPERVISOR_update_descriptor(
  18.213 +	unsigned long ma, unsigned long word)
  18.214 +{
  18.215 +	return _hypercall2(int, update_descriptor, ma, word);
  18.216 +}
  18.217 +
  18.218 +static inline int
  18.219 +HYPERVISOR_memory_op(
  18.220 +	unsigned int cmd, void *arg)
  18.221 +{
  18.222 +	return _hypercall2(int, memory_op, cmd, arg);
  18.223 +}
  18.224 +
  18.225 +static inline int
  18.226 +HYPERVISOR_multicall(
  18.227 +	void *call_list, int nr_calls)
  18.228 +{
  18.229 +	return _hypercall2(int, multicall, call_list, nr_calls);
  18.230 +}
  18.231 +
  18.232 +static inline int
  18.233 +HYPERVISOR_update_va_mapping(
  18.234 +	unsigned long va, pte_t new_val, unsigned long flags)
  18.235 +{
  18.236 +	return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
  18.237 +}
  18.238 +
  18.239 +static inline int
  18.240 +HYPERVISOR_event_channel_op(
  18.241 +	void *op)
  18.242 +{
  18.243 +	return _hypercall1(int, event_channel_op, op);
  18.244 +}
  18.245 +
  18.246 +static inline int
  18.247 +HYPERVISOR_xen_version(
  18.248 +	int cmd, void *arg)
  18.249 +{
  18.250 +	return _hypercall2(int, xen_version, cmd, arg);
  18.251 +}
  18.252 +
  18.253 +static inline int
  18.254 +HYPERVISOR_console_io(
  18.255 +	int cmd, int count, char *str)
  18.256 +{
  18.257 +	return _hypercall3(int, console_io, cmd, count, str);
  18.258 +}
  18.259 +
  18.260 +static inline int
  18.261 +HYPERVISOR_physdev_op(
  18.262 +	void *physdev_op)
  18.263 +{
  18.264 +	return _hypercall1(int, physdev_op, physdev_op);
  18.265 +}
  18.266 +
  18.267 +static inline int
  18.268 +HYPERVISOR_grant_table_op(
  18.269 +	unsigned int cmd, void *uop, unsigned int count)
  18.270 +{
  18.271 +	return _hypercall3(int, grant_table_op, cmd, uop, count);
  18.272 +}
  18.273 +
  18.274 +static inline int
  18.275 +HYPERVISOR_update_va_mapping_otherdomain(
  18.276 +	unsigned long va, pte_t new_val, unsigned long flags, domid_t domid)
  18.277 +{
  18.278 +	return _hypercall4(int, update_va_mapping_otherdomain, va,
  18.279 +			   new_val.pte, flags, domid);
  18.280 +}
  18.281 +
  18.282 +static inline int
  18.283 +HYPERVISOR_vm_assist(
  18.284 +	unsigned int cmd, unsigned int type)
  18.285 +{
  18.286 +	return _hypercall2(int, vm_assist, cmd, type);
  18.287 +}
  18.288 +
  18.289 +static inline int
  18.290 +HYPERVISOR_vcpu_op(
  18.291 +	int cmd, int vcpuid, void *extra_args)
  18.292 +{
  18.293 +	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
  18.294 +}
  18.295 +
  18.296 +static inline int
  18.297 +HYPERVISOR_set_segment_base(
  18.298 +	int reg, unsigned long value)
  18.299 +{
  18.300 +	return _hypercall2(int, set_segment_base, reg, value);
  18.301 +}
  18.302 +
  18.303 +static inline int
  18.304 +HYPERVISOR_suspend(
  18.305 +	unsigned long srec)
  18.306 +{
  18.307 +	return _hypercall3(int, sched_op, SCHEDOP_shutdown,
  18.308 +			   SHUTDOWN_suspend, srec);
  18.309 +}
  18.310 +
  18.311 +static inline int
  18.312 +HYPERVISOR_nmi_op(
  18.313 +	unsigned long op,
  18.314 +	unsigned long arg)
  18.315 +{
  18.316 +	return _hypercall2(int, nmi_op, op, arg);
  18.317 +}
  18.318 +
  18.319 +#endif /* __HYPERCALL_X86_64_H__ */
  18.320 +
  18.321 +/*
  18.322 + * Local variables:
  18.323 + *  c-file-style: "linux"
  18.324 + *  indent-tabs-mode: t
  18.325 + *  c-indent-level: 8
  18.326 + *  c-basic-offset: 8
  18.327 + *  tab-width: 8
  18.328 + * End:
  18.329 + */
    19.1 --- a/extras/mini-os/traps.c	Tue Aug 22 14:45:49 2006 -0600
    19.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.3 @@ -1,229 +0,0 @@
    19.4 -
    19.5 -#include <os.h>
    19.6 -#include <traps.h>
    19.7 -#include <hypervisor.h>
    19.8 -#include <mm.h>
    19.9 -#include <lib.h>
   19.10 -#include <sched.h>
   19.11 -
   19.12 -/*
   19.13 - * These are assembler stubs in entry.S.
   19.14 - * They are the actual entry points for virtual exceptions.
   19.15 - */
   19.16 -void divide_error(void);
   19.17 -void debug(void);
   19.18 -void int3(void);
   19.19 -void overflow(void);
   19.20 -void bounds(void);
   19.21 -void invalid_op(void);
   19.22 -void device_not_available(void);
   19.23 -void coprocessor_segment_overrun(void);
   19.24 -void invalid_TSS(void);
   19.25 -void segment_not_present(void);
   19.26 -void stack_segment(void);
   19.27 -void general_protection(void);
   19.28 -void page_fault(void);
   19.29 -void coprocessor_error(void);
   19.30 -void simd_coprocessor_error(void);
   19.31 -void alignment_check(void);
   19.32 -void spurious_interrupt_bug(void);
   19.33 -void machine_check(void);
   19.34 -
   19.35 -
   19.36 -void dump_regs(struct pt_regs *regs)
   19.37 -{
   19.38 -    printk("Thread: %s\n", current->name);
   19.39 -#ifdef __i386__    
   19.40 -    printk("EIP: %x, EFLAGS %x.\n", regs->eip, regs->eflags);
   19.41 -    printk("EBX: %08x ECX: %08x EDX: %08x\n",
   19.42 -	   regs->ebx, regs->ecx, regs->edx);
   19.43 -    printk("ESI: %08x EDI: %08x EBP: %08x EAX: %08x\n",
   19.44 -	   regs->esi, regs->edi, regs->ebp, regs->eax);
   19.45 -    printk("DS: %04x ES: %04x orig_eax: %08x, eip: %08x\n",
   19.46 -	   regs->xds, regs->xes, regs->orig_eax, regs->eip);
   19.47 -    printk("CS: %04x EFLAGS: %08x esp: %08x ss: %04x\n",
   19.48 -	   regs->xcs, regs->eflags, regs->esp, regs->xss);
   19.49 -#else
   19.50 -    printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
   19.51 -    printk("\nRSP: %04lx:%016lx  EFLAGS: %08lx\n", 
   19.52 -           regs->ss, regs->rsp, regs->eflags);
   19.53 -    printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
   19.54 -           regs->rax, regs->rbx, regs->rcx);
   19.55 -    printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
   19.56 -           regs->rdx, regs->rsi, regs->rdi); 
   19.57 -    printk("RBP: %016lx R08: %016lx R09: %016lx\n",
   19.58 -           regs->rbp, regs->r8, regs->r9); 
   19.59 -    printk("R10: %016lx R11: %016lx R12: %016lx\n",
   19.60 -           regs->r10, regs->r11, regs->r12); 
   19.61 -    printk("R13: %016lx R14: %016lx R15: %016lx\n",
   19.62 -           regs->r13, regs->r14, regs->r15); 
   19.63 -#endif
   19.64 -}
   19.65 -
   19.66 -static void do_trap(int trapnr, char *str, struct pt_regs * regs, unsigned long error_code)
   19.67 -{
   19.68 -    printk("FATAL:  Unhandled Trap %d (%s), error code=0x%lx\n", trapnr, str, error_code);
   19.69 -    printk("Regs address %p\n", regs);
   19.70 -    dump_regs(regs);
   19.71 -    do_exit();
   19.72 -}
   19.73 -
   19.74 -#define DO_ERROR(trapnr, str, name) \
   19.75 -void do_##name(struct pt_regs * regs, unsigned long error_code) \
   19.76 -{ \
   19.77 -	do_trap(trapnr, str, regs, error_code); \
   19.78 -}
   19.79 -
   19.80 -#define DO_ERROR_INFO(trapnr, str, name, sicode, siaddr) \
   19.81 -void do_##name(struct pt_regs * regs, unsigned long error_code) \
   19.82 -{ \
   19.83 -	do_trap(trapnr, str, regs, error_code); \
   19.84 -}
   19.85 -
   19.86 -DO_ERROR_INFO( 0, "divide error", divide_error, FPE_INTDIV, regs->eip)
   19.87 -DO_ERROR( 3, "int3", int3)
   19.88 -DO_ERROR( 4, "overflow", overflow)
   19.89 -DO_ERROR( 5, "bounds", bounds)
   19.90 -DO_ERROR_INFO( 6, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip)
   19.91 -DO_ERROR( 7, "device not available", device_not_available)
   19.92 -DO_ERROR( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
   19.93 -DO_ERROR(10, "invalid TSS", invalid_TSS)
   19.94 -DO_ERROR(11, "segment not present", segment_not_present)
   19.95 -DO_ERROR(12, "stack segment", stack_segment)
   19.96 -DO_ERROR_INFO(17, "alignment check", alignment_check, BUS_ADRALN, 0)
   19.97 -DO_ERROR(18, "machine check", machine_check)
   19.98 -
   19.99 -void page_walk(unsigned long virt_address)
  19.100 -{
  19.101 -        pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
  19.102 -        unsigned long addr = virt_address;
  19.103 -        printk("Pagetable walk from virt %lx, base %lx:\n", virt_address, start_info.pt_base);
  19.104 -    
  19.105 -#if defined(__x86_64__)
  19.106 -        page = tab[l4_table_offset(addr)];
  19.107 -        tab = pte_to_virt(page);
  19.108 -        printk(" L4 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, l4_table_offset(addr));
  19.109 -#endif
  19.110 -#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
  19.111 -        page = tab[l3_table_offset(addr)];
  19.112 -        tab = pte_to_virt(page);
  19.113 -        printk("  L3 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, l3_table_offset(addr));
  19.114 -#endif
  19.115 -        page = tab[l2_table_offset(addr)];
  19.116 -        tab = pte_to_virt(page);
  19.117 -        printk("   L2 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, l2_table_offset(addr));
  19.118 -        
  19.119 -        page = tab[l1_table_offset(addr)];
  19.120 -        printk("    L1 = %"PRIpte" (%p)  [offset = %lx]\n", page, tab, l1_table_offset(addr));
  19.121 -
  19.122 -}
  19.123 -
  19.124 -#define read_cr2() \
  19.125 -        (HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].arch.cr2)
  19.126 -
  19.127 -static int handling_pg_fault = 0;
  19.128 -
  19.129 -void do_page_fault(struct pt_regs *regs, unsigned long error_code)
  19.130 -{
  19.131 -    unsigned long addr = read_cr2();
  19.132 -    /* If we are already handling a page fault, and got another one
  19.133 -       that means we faulted in pagetable walk. Continuing here would cause
  19.134 -       a recursive fault */       
  19.135 -    if(handling_pg_fault) 
  19.136 -    {
  19.137 -        printk("Page fault in pagetable walk (access to invalid memory?).\n"); 
  19.138 -        do_exit();
  19.139 -    }
  19.140 -    handling_pg_fault = 1;
  19.141 -
  19.142 -#if defined(__x86_64__)
  19.143 -    printk("Page fault at linear address %p, rip %p, code %lx\n",
  19.144 -           addr, regs->rip, error_code);
  19.145 -#else
  19.146 -    printk("Page fault at linear address %p, eip %p, code %lx\n",
  19.147 -           addr, regs->eip, error_code);
  19.148 -#endif
  19.149 -
  19.150 -    dump_regs(regs);
  19.151 -    page_walk(addr);
  19.152 -    do_exit();
  19.153 -    /* We should never get here ... but still */
  19.154 -    handling_pg_fault = 0;
  19.155 -}
  19.156 -
  19.157 -void do_general_protection(struct pt_regs *regs, long error_code)
  19.158 -{
  19.159 -#ifdef __i386__
  19.160 -    printk("GPF eip: %p, error_code=%lx\n", regs->eip, error_code);
  19.161 -#else    
  19.162 -    printk("GPF rip: %p, error_code=%lx\n", regs->rip, error_code);
  19.163 -#endif
  19.164 -    dump_regs(regs);
  19.165 -    do_exit();
  19.166 -}
  19.167 -
  19.168 -
  19.169 -void do_debug(struct pt_regs * regs)
  19.170 -{
  19.171 -    printk("Debug exception\n");
  19.172 -#define TF_MASK 0x100
  19.173 -    regs->eflags &= ~TF_MASK;
  19.174 -    dump_regs(regs);
  19.175 -    do_exit();
  19.176 -}
  19.177 -
  19.178 -void do_coprocessor_error(struct pt_regs * regs)
  19.179 -{
  19.180 -    printk("Copro error\n");
  19.181 -    dump_regs(regs);
  19.182 -    do_exit();
  19.183 -}
  19.184 -
  19.185 -void simd_math_error(void *eip)
  19.186 -{
  19.187 -    printk("SIMD error\n");
  19.188 -}
  19.189 -
  19.190 -void do_simd_coprocessor_error(struct pt_regs * regs)
  19.191 -{
  19.192 -    printk("SIMD copro error\n");
  19.193 -}
  19.194 -
  19.195 -void do_spurious_interrupt_bug(struct pt_regs * regs)
  19.196 -{
  19.197 -}
  19.198 -
  19.199 -/*
  19.200 - * Submit a virtual IDT to teh hypervisor. This consists of tuples
  19.201 - * (interrupt vector, privilege ring, CS:EIP of handler).
  19.202 - * The 'privilege ring' field specifies the least-privileged ring that
  19.203 - * can trap to that vector using a software-interrupt instruction (INT).
  19.204 - */
  19.205 -static trap_info_t trap_table[] = {
  19.206 -    {  0, 0, __KERNEL_CS, (unsigned long)divide_error                },
  19.207 -    {  1, 0, __KERNEL_CS, (unsigned long)debug                       },
  19.208 -    {  3, 3, __KERNEL_CS, (unsigned long)int3                        },
  19.209 -    {  4, 3, __KERNEL_CS, (unsigned long)overflow                    },
  19.210 -    {  5, 3, __KERNEL_CS, (unsigned long)bounds                      },
  19.211 -    {  6, 0, __KERNEL_CS, (unsigned long)invalid_op                  },
  19.212 -    {  7, 0, __KERNEL_CS, (unsigned long)device_not_available        },
  19.213 -    {  9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
  19.214 -    { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS                 },
  19.215 -    { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present         },
  19.216 -    { 12, 0, __KERNEL_CS, (unsigned long)stack_segment               },
  19.217 -    { 13, 0, __KERNEL_CS, (unsigned long)general_protection          },
  19.218 -    { 14, 0, __KERNEL_CS, (unsigned long)page_fault                  },
  19.219 -    { 15, 0, __KERNEL_CS, (unsigned long)spurious_interrupt_bug      },
  19.220 -    { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error           },
  19.221 -    { 17, 0, __KERNEL_CS, (unsigned long)alignment_check             },
  19.222 -    { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error      },
  19.223 -    {  0, 0,           0, 0                           }
  19.224 -};
  19.225 -    
  19.226 -
  19.227 -
  19.228 -void trap_init(void)
  19.229 -{
  19.230 -    HYPERVISOR_set_trap_table(trap_table);    
  19.231 -}
  19.232 -
    20.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/main-xen.c	Tue Aug 22 14:45:49 2006 -0600
    20.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/cpu/mtrr/main-xen.c	Wed Aug 23 11:11:27 2006 -0600
    20.3 @@ -178,7 +178,7 @@ static int __init mtrr_init(void)
    20.4  {
    20.5  	struct cpuinfo_x86 *c = &boot_cpu_data;
    20.6  
    20.7 -	if (!(xen_start_info->flags & SIF_PRIVILEGED))
    20.8 +	if (!is_initial_xendomain())
    20.9  		return -ENODEV;
   20.10  
   20.11  	if ((!cpu_has(c, X86_FEATURE_MTRR)) &&
    21.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c	Tue Aug 22 14:45:49 2006 -0600
    21.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/io_apic-xen.c	Wed Aug 23 11:11:27 2006 -0600
    21.3 @@ -2480,7 +2480,7 @@ static int __init io_apic_bug_finalize(v
    21.4  {
    21.5  	if(sis_apic_bug == -1)
    21.6  		sis_apic_bug = 0;
    21.7 -	if (xen_start_info->flags & SIF_INITDOMAIN) {
    21.8 +	if (is_initial_xendomain()) {
    21.9  		dom0_op_t op = { .cmd = DOM0_PLATFORM_QUIRK };
   21.10  		op.u.platform_quirk.quirk_id = sis_apic_bug ?
   21.11  			QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL;
    22.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c	Tue Aug 22 14:45:49 2006 -0600
    22.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c	Wed Aug 23 11:11:27 2006 -0600
    22.3 @@ -184,7 +184,6 @@ static struct resource code_resource = {
    22.4  	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
    22.5  };
    22.6  
    22.7 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
    22.8  static struct resource system_rom_resource = {
    22.9  	.name	= "System ROM",
   22.10  	.start	= 0xf0000,
   22.11 @@ -240,7 +239,6 @@ static struct resource video_rom_resourc
   22.12  	.end	= 0xc7fff,
   22.13  	.flags	= IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
   22.14  };
   22.15 -#endif
   22.16  
   22.17  static struct resource video_ram_resource = {
   22.18  	.name	= "Video RAM area",
   22.19 @@ -299,7 +297,6 @@ static struct resource standard_io_resou
   22.20  #define STANDARD_IO_RESOURCES \
   22.21  	(sizeof standard_io_resources / sizeof standard_io_resources[0])
   22.22  
   22.23 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   22.24  #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
   22.25  
   22.26  static int __init romchecksum(unsigned char *rom, unsigned long length)
   22.27 @@ -317,9 +314,11 @@ static void __init probe_roms(void)
   22.28  	unsigned char *rom;
   22.29  	int	      i;
   22.30  
   22.31 +#ifdef CONFIG_XEN
   22.32  	/* Nothing to do if not running in dom0. */
   22.33 -	if (!(xen_start_info->flags & SIF_INITDOMAIN))
   22.34 +	if (!is_initial_xendomain())
   22.35  		return;
   22.36 +#endif
   22.37  
   22.38  	/* video rom */
   22.39  	upper = adapter_rom_resources[0].start;
   22.40 @@ -379,7 +378,6 @@ static void __init probe_roms(void)
   22.41  		start = adapter_rom_resources[i++].end & ~2047UL;
   22.42  	}
   22.43  }
   22.44 -#endif
   22.45  
   22.46  /*
   22.47   * Point at the empty zero page to start with. We map the real shared_info
   22.48 @@ -1359,9 +1357,7 @@ legacy_init_iomem_resources(struct e820e
   22.49  {
   22.50  	int i;
   22.51  
   22.52 -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
   22.53  	probe_roms();
   22.54 -#endif
   22.55  
   22.56  	for (i = 0; i < nr_map; i++) {
   22.57  		struct resource *res;
   22.58 @@ -1458,7 +1454,7 @@ static void __init register_memory(void)
   22.59  	int	      i;
   22.60  
   22.61  	/* Nothing to do if not running in dom0. */
   22.62 -	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
   22.63 +	if (!is_initial_xendomain()) {
   22.64  		legacy_init_iomem_resources(e820.map, e820.nr_map,
   22.65  					    &code_resource, &data_resource);
   22.66  		return;
   22.67 @@ -1618,7 +1614,7 @@ void __init setup_arch(char **cmdline_p)
   22.68  
   22.69  	/* Force a quick death if the kernel panics (not domain 0). */
   22.70  	extern int panic_timeout;
   22.71 -	if (!panic_timeout && !(xen_start_info->flags & SIF_INITDOMAIN))
   22.72 +	if (!panic_timeout && !is_initial_xendomain())
   22.73  		panic_timeout = 1;
   22.74  
   22.75  	/* Register a call for panic conditions. */
   22.76 @@ -1661,7 +1657,7 @@ void __init setup_arch(char **cmdline_p)
   22.77  	}
   22.78  	bootloader_type = LOADER_TYPE;
   22.79  
   22.80 -	if (xen_start_info->flags & SIF_INITDOMAIN) {
   22.81 +	if (is_initial_xendomain()) {
   22.82  		/* This is drawn from a dump from vgacon:startup in
   22.83  		 * standard Linux. */
   22.84  		screen_info.orig_video_mode = 3; 
   22.85 @@ -1670,6 +1666,35 @@ void __init setup_arch(char **cmdline_p)
   22.86  		screen_info.orig_video_cols = 80;
   22.87  		screen_info.orig_video_ega_bx = 3;
   22.88  		screen_info.orig_video_points = 16;
   22.89 +		if (xen_start_info->console.dom0.info_size >=
   22.90 +		    sizeof(struct dom0_vga_console_info)) {
   22.91 +			const struct dom0_vga_console_info *info =
   22.92 +				(struct dom0_vga_console_info *)(
   22.93 +					(char *)xen_start_info +
   22.94 +					xen_start_info->console.dom0.info_off);
   22.95 +			screen_info.orig_video_mode = info->txt_mode;
   22.96 +			screen_info.orig_video_isVGA = info->video_type;
   22.97 +			screen_info.orig_video_lines = info->video_height;
   22.98 +			screen_info.orig_video_cols = info->video_width;
   22.99 +			screen_info.orig_video_points = info->txt_points;
  22.100 +			screen_info.lfb_width = info->video_width;
  22.101 +			screen_info.lfb_height = info->video_height;
  22.102 +			screen_info.lfb_depth = info->lfb_depth;
  22.103 +			screen_info.lfb_base = info->lfb_base;
  22.104 +			screen_info.lfb_size = info->lfb_size;
  22.105 +			screen_info.lfb_linelength = info->lfb_linelen;
  22.106 +			screen_info.red_size = info->red_size;
  22.107 +			screen_info.red_pos = info->red_pos;
  22.108 +			screen_info.green_size = info->green_size;
  22.109 +			screen_info.green_pos = info->green_pos;
  22.110 +			screen_info.blue_size = info->blue_size;
  22.111 +			screen_info.blue_pos = info->blue_pos;
  22.112 +			screen_info.rsvd_size = info->rsvd_size;
  22.113 +			screen_info.rsvd_pos = info->rsvd_pos;
  22.114 +		}
  22.115 +		screen_info.orig_y = screen_info.orig_video_lines - 1;
  22.116 +		xen_start_info->console.domU.mfn = 0;
  22.117 +		xen_start_info->console.domU.evtchn = 0;
  22.118  	} else
  22.119  		screen_info.orig_video_isVGA = 0;
  22.120  
  22.121 @@ -1788,7 +1813,7 @@ void __init setup_arch(char **cmdline_p)
  22.122  	}
  22.123  #endif
  22.124  
  22.125 -	if (xen_start_info->flags & SIF_INITDOMAIN)
  22.126 +	if (is_initial_xendomain())
  22.127  		dmi_scan_machine();
  22.128  
  22.129  #ifdef CONFIG_X86_GENERICARCH
  22.130 @@ -1805,7 +1830,7 @@ void __init setup_arch(char **cmdline_p)
  22.131  #endif
  22.132  
  22.133  #ifdef CONFIG_ACPI
  22.134 -	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
  22.135 +	if (!is_initial_xendomain()) {
  22.136  		printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
  22.137  		acpi_disabled = 1;
  22.138  		acpi_ht = 0;
  22.139 @@ -1831,11 +1856,7 @@ void __init setup_arch(char **cmdline_p)
  22.140  
  22.141  	register_memory();
  22.142  
  22.143 -	if (xen_start_info->flags & SIF_INITDOMAIN) {
  22.144 -		if (!(xen_start_info->flags & SIF_PRIVILEGED))
  22.145 -			panic("Xen granted us console access "
  22.146 -			      "but not privileged status");
  22.147 -
  22.148 +	if (is_initial_xendomain()) {
  22.149  #ifdef CONFIG_VT
  22.150  #if defined(CONFIG_VGA_CONSOLE)
  22.151  		if (!efi_enabled ||
    23.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Tue Aug 22 14:45:49 2006 -0600
    23.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/swiotlb.c	Wed Aug 23 11:11:27 2006 -0600
    23.3 @@ -199,7 +199,7 @@ swiotlb_init(void)
    23.4  		swiotlb = 1;
    23.5  	} else if ((swiotlb_force != -1) &&
    23.6  		   is_running_on_xen() &&
    23.7 -		   (xen_start_info->flags & SIF_INITDOMAIN)) {
    23.8 +		   is_initial_xendomain()) {
    23.9  		/* Domain 0 always has a swiotlb. */
   23.10  		ram_end = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
   23.11  		if (ram_end <= 0x7ffff)
    24.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Tue Aug 22 14:45:49 2006 -0600
    24.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Wed Aug 23 11:11:27 2006 -0600
    24.3 @@ -470,8 +470,7 @@ int do_settimeofday(struct timespec *tv)
    24.4  	sec = tv->tv_sec;
    24.5  	__normalize_time(&sec, &nsec);
    24.6  
    24.7 -	if ((xen_start_info->flags & SIF_INITDOMAIN) &&
    24.8 -	    !independent_wallclock) {
    24.9 +	if (is_initial_xendomain() && !independent_wallclock) {
   24.10  		op.cmd = DOM0_SETTIME;
   24.11  		op.u.settime.secs        = sec;
   24.12  		op.u.settime.nsecs       = nsec;
   24.13 @@ -502,8 +501,7 @@ static void sync_xen_wallclock(unsigned 
   24.14  	s64 nsec;
   24.15  	dom0_op_t op;
   24.16  
   24.17 -	if (!ntp_synced() || independent_wallclock ||
   24.18 -	    !(xen_start_info->flags & SIF_INITDOMAIN))
   24.19 +	if (!ntp_synced() || independent_wallclock || !is_initial_xendomain())
   24.20  		return;
   24.21  
   24.22  	write_seqlock_irq(&xtime_lock);
   24.23 @@ -532,7 +530,7 @@ static int set_rtc_mmss(unsigned long no
   24.24  
   24.25  	WARN_ON(irqs_disabled());
   24.26  
   24.27 -	if (independent_wallclock || !(xen_start_info->flags & SIF_INITDOMAIN))
   24.28 +	if (independent_wallclock || !is_initial_xendomain())
   24.29  		return 0;
   24.30  
   24.31  	/* gets recalled with irq locally disabled */
    25.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S	Tue Aug 22 14:45:49 2006 -0600
    25.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/vmlinux.lds.S	Wed Aug 23 11:11:27 2006 -0600
    25.3 @@ -12,6 +12,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386"
    25.4  OUTPUT_ARCH(i386)
    25.5  ENTRY(phys_startup_32)
    25.6  jiffies = jiffies_64;
    25.7 +
    25.8 +PHDRS {
    25.9 +	text PT_LOAD FLAGS(5);	/* R_E */
   25.10 +	data PT_LOAD FLAGS(7);	/* RWE */
   25.11 +	note PT_NOTE FLAGS(4);	/* R__ */
   25.12 +}
   25.13  SECTIONS
   25.14  {
   25.15    . = __KERNEL_START;
   25.16 @@ -25,7 +31,7 @@ SECTIONS
   25.17  	KPROBES_TEXT
   25.18  	*(.fixup)
   25.19  	*(.gnu.warning)
   25.20 -	} = 0x9090
   25.21 +	} :text = 0x9090
   25.22  
   25.23    _etext = .;			/* End of text section */
   25.24  
   25.25 @@ -47,7 +53,7 @@ SECTIONS
   25.26    .data : AT(ADDR(.data) - LOAD_OFFSET) {	/* Data */
   25.27  	*(.data)
   25.28  	CONSTRUCTORS
   25.29 -	}
   25.30 +	} :data
   25.31  
   25.32    . = ALIGN(4096);
   25.33    __nosave_begin = .;
   25.34 @@ -154,4 +160,6 @@ SECTIONS
   25.35    STABS_DEBUG
   25.36  
   25.37    DWARF_DEBUG
   25.38 +
   25.39 +  NOTES
   25.40  }
    26.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c	Tue Aug 22 14:45:49 2006 -0600
    26.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c	Wed Aug 23 11:11:27 2006 -0600
    26.3 @@ -566,7 +566,7 @@ void __init paging_init(void)
    26.4  
    26.5  	/* Setup mapping of lower 1st MB */
    26.6  	for (i = 0; i < NR_FIX_ISAMAPS; i++)
    26.7 -		if (xen_start_info->flags & SIF_PRIVILEGED)
    26.8 +		if (is_initial_xendomain())
    26.9  			set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
   26.10  		else
   26.11  			__set_fixmap(FIX_ISAMAP_BEGIN - i,
    27.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Tue Aug 22 14:45:49 2006 -0600
    27.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Wed Aug 23 11:11:27 2006 -0600
    27.3 @@ -121,7 +121,7 @@ int direct_remap_pfn_range(struct vm_are
    27.4  			   domid_t  domid)
    27.5  {
    27.6  	/* Same as remap_pfn_range(). */
    27.7 -	vma->vm_flags |= VM_IO | VM_RESERVED;
    27.8 +	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
    27.9  
   27.10  	if (domid == DOMID_SELF)
   27.11  		return -EINVAL;
   27.12 @@ -245,7 +245,7 @@ void __iomem * __ioremap(unsigned long p
   27.13  	/*
   27.14  	 * Don't remap the low PCI/ISA area, it's always mapped..
   27.15  	 */
   27.16 -	if (xen_start_info->flags & SIF_PRIVILEGED &&
   27.17 +	if (is_initial_xendomain() &&
   27.18  	    phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
   27.19  		return (void __iomem *) isa_bus_to_virt(phys_addr);
   27.20  
   27.21 @@ -282,9 +282,6 @@ void __iomem * __ioremap(unsigned long p
   27.22  	area->phys_addr = phys_addr;
   27.23  	addr = (void __iomem *) area->addr;
   27.24  	flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
   27.25 -#ifdef __x86_64__
   27.26 -	flags |= _PAGE_USER;
   27.27 -#endif
   27.28  	if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
   27.29  				     phys_addr>>PAGE_SHIFT,
   27.30  				     size, __pgprot(flags), domid)) {
   27.31 @@ -425,7 +422,7 @@ void __init *bt_ioremap(unsigned long ph
   27.32  	/*
   27.33  	 * Don't remap the low PCI/ISA area, it's always mapped..
   27.34  	 */
   27.35 -	if (xen_start_info->flags & SIF_PRIVILEGED &&
   27.36 +	if (is_initial_xendomain() &&
   27.37  	    phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS)
   27.38  		return isa_bus_to_virt(phys_addr);
   27.39  
    28.1 --- a/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c	Tue Aug 22 14:45:49 2006 -0600
    28.2 +++ b/linux-2.6-xen-sparse/arch/i386/pci/irq-xen.c	Wed Aug 23 11:11:27 2006 -0600
    28.3 @@ -95,7 +95,10 @@ static struct irq_routing_table * __init
    28.4  	u8 *addr;
    28.5  	struct irq_routing_table *rt;
    28.6  
    28.7 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
    28.8 +#ifdef CONFIG_XEN
    28.9 +	if (!is_initial_xendomain())
   28.10 +		return NULL;
   28.11 +#endif
   28.12  	if (pirq_table_addr) {
   28.13  		rt = pirq_check_routing_table((u8 *) isa_bus_to_virt(pirq_table_addr));
   28.14  		if (rt)
   28.15 @@ -107,7 +110,6 @@ static struct irq_routing_table * __init
   28.16  		if (rt)
   28.17  			return rt;
   28.18  	}
   28.19 -#endif
   28.20  	
   28.21  	return NULL;
   28.22  }
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/linux-2.6-xen-sparse/arch/ia64/dig/setup.c	Wed Aug 23 11:11:27 2006 -0600
    29.3 @@ -0,0 +1,110 @@
    29.4 +/*
    29.5 + * Platform dependent support for DIG64 platforms.
    29.6 + *
    29.7 + * Copyright (C) 1999 Intel Corp.
    29.8 + * Copyright (C) 1999, 2001 Hewlett-Packard Co
    29.9 + * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
   29.10 + * Copyright (C) 1999 VA Linux Systems
   29.11 + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
   29.12 + * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
   29.13 + */
   29.14 +#include <linux/config.h>
   29.15 +
   29.16 +#include <linux/init.h>
   29.17 +#include <linux/delay.h>
   29.18 +#include <linux/kernel.h>
   29.19 +#include <linux/kdev_t.h>
   29.20 +#include <linux/string.h>
   29.21 +#include <linux/tty.h>
   29.22 +#include <linux/console.h>
   29.23 +#include <linux/timex.h>
   29.24 +#include <linux/sched.h>
   29.25 +#include <linux/root_dev.h>
   29.26 +
   29.27 +#include <asm/io.h>
   29.28 +#include <asm/machvec.h>
   29.29 +#include <asm/system.h>
   29.30 +
   29.31 +void __init
   29.32 +dig_setup (char **cmdline_p)
   29.33 +{
   29.34 +	unsigned int orig_x, orig_y, num_cols, num_rows, font_height;
   29.35 +
   29.36 +	/*
   29.37 +	 * Default to /dev/sda2.  This assumes that the EFI partition
   29.38 +	 * is physical disk 1 partition 1 and the Linux root disk is
   29.39 +	 * physical disk 1 partition 2.
   29.40 +	 */
   29.41 +	ROOT_DEV = Root_SDA2;		/* default to second partition on first drive */
   29.42 +
   29.43 +#ifdef CONFIG_SMP
   29.44 +	init_smp_config();
   29.45 +#endif
   29.46 +
   29.47 +	memset(&screen_info, 0, sizeof(screen_info));
   29.48 +
   29.49 +	if (!ia64_boot_param->console_info.num_rows
   29.50 +	    || !ia64_boot_param->console_info.num_cols)
   29.51 +	{
   29.52 +		printk(KERN_WARNING "dig_setup: warning: invalid screen-info, guessing 80x25\n");
   29.53 +		orig_x = 0;
   29.54 +		orig_y = 0;
   29.55 +		num_cols = 80;
   29.56 +		num_rows = 25;
   29.57 +		font_height = 16;
   29.58 +	} else {
   29.59 +		orig_x = ia64_boot_param->console_info.orig_x;
   29.60 +		orig_y = ia64_boot_param->console_info.orig_y;
   29.61 +		num_cols = ia64_boot_param->console_info.num_cols;
   29.62 +		num_rows = ia64_boot_param->console_info.num_rows;
   29.63 +		font_height = 400 / num_rows;
   29.64 +	}
   29.65 +
   29.66 +	screen_info.orig_x = orig_x;
   29.67 +	screen_info.orig_y = orig_y;
   29.68 +	screen_info.orig_video_cols  = num_cols;
   29.69 +	screen_info.orig_video_lines = num_rows;
   29.70 +	screen_info.orig_video_points = font_height;
   29.71 +	screen_info.orig_video_mode = 3;	/* XXX fake */
   29.72 +	screen_info.orig_video_isVGA = 1;	/* XXX fake */
   29.73 +	screen_info.orig_video_ega_bx = 3;	/* XXX fake */
   29.74 +#ifdef CONFIG_XEN
   29.75 +	if (!is_running_on_xen())
   29.76 +		return;
   29.77 +
   29.78 +	if (xen_start_info->console.dom0.info_size >=
   29.79 +	    sizeof(struct dom0_vga_console_info)) {
   29.80 +		const struct dom0_vga_console_info *info =
   29.81 +		        (struct dom0_vga_console_info *)(
   29.82 +		                (char *)xen_start_info +
   29.83 +		                xen_start_info->console.dom0.info_off);
   29.84 +		screen_info.orig_video_mode = info->txt_mode;
   29.85 +		screen_info.orig_video_isVGA = info->video_type;
   29.86 +		screen_info.orig_video_lines = info->video_height;
   29.87 +		screen_info.orig_video_cols = info->video_width;
   29.88 +		screen_info.orig_video_points = info->txt_points;
   29.89 +		screen_info.lfb_width = info->video_width;
   29.90 +		screen_info.lfb_height = info->video_height;
   29.91 +		screen_info.lfb_depth = info->lfb_depth;
   29.92 +		screen_info.lfb_base = info->lfb_base;
   29.93 +		screen_info.lfb_size = info->lfb_size;
   29.94 +		screen_info.lfb_linelength = info->lfb_linelen;
   29.95 +		screen_info.red_size = info->red_size;
   29.96 +		screen_info.red_pos = info->red_pos;
   29.97 +		screen_info.green_size = info->green_size;
   29.98 +		screen_info.green_pos = info->green_pos;
   29.99 +		screen_info.blue_size = info->blue_size;
  29.100 +		screen_info.blue_pos = info->blue_pos;
  29.101 +		screen_info.rsvd_size = info->rsvd_size;
  29.102 +		screen_info.rsvd_pos = info->rsvd_pos;
  29.103 +	}
  29.104 +	screen_info.orig_y = screen_info.orig_video_lines - 1;
  29.105 +	xen_start_info->console.domU.mfn = 0;
  29.106 +	xen_start_info->console.domU.evtchn = 0;
  29.107 +#endif
  29.108 +}
  29.109 +
  29.110 +void __init
  29.111 +dig_irq_init (void)
  29.112 +{
  29.113 +}
    30.1 --- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c	Tue Aug 22 14:45:49 2006 -0600
    30.2 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c	Wed Aug 23 11:11:27 2006 -0600
    30.3 @@ -545,12 +545,7 @@ setup_arch (char **cmdline_p)
    30.4  		       "flags=0x%x\n", s->arch.start_info_pfn,
    30.5  		       xen_start_info->nr_pages, xen_start_info->flags);
    30.6  
    30.7 -		/* xen_start_info isn't setup yet, get the flags manually */
    30.8 -		if (xen_start_info->flags & SIF_INITDOMAIN) {
    30.9 -			if (!(xen_start_info->flags & SIF_PRIVILEGED))
   30.10 -				panic("Xen granted us console access "
   30.11 -				      "but not privileged status");
   30.12 -		} else {
   30.13 +		if (!is_initial_xendomain()) {
   30.14  			extern int console_use_vt;
   30.15  			conswitchp = NULL;
   30.16  			console_use_vt = 0;
    31.1 --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c	Tue Aug 22 14:45:49 2006 -0600
    31.2 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c	Wed Aug 23 11:11:27 2006 -0600
    31.3 @@ -82,7 +82,7 @@ ia64_xenmem_reservation_op(unsigned long
    31.4  			//    of a non-privileged domain, 
    31.5  			if ((op == XENMEM_increase_reservation ||
    31.6  			     op == XENMEM_populate_physmap) &&
    31.7 -			    !(xen_start_info->flags & SIF_PRIVILEGED) &&
    31.8 +			    !is_initial_xendomain() &&
    31.9  			    reservation.extent_order > 0)
   31.10  				return ret;
   31.11  		}
    32.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S	Tue Aug 22 14:45:49 2006 -0600
    32.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S	Wed Aug 23 11:11:27 2006 -0600
    32.3 @@ -271,7 +271,7 @@ sysret_careful:
    32.4  	CFI_RESTORE_STATE
    32.5  	bt $TIF_NEED_RESCHED,%edx
    32.6  	jnc sysret_signal
    32.7 -        XEN_BLOCK_EVENTS(%rsi)        
    32.8 +	XEN_UNBLOCK_EVENTS(%rsi)
    32.9  	pushq %rdi
   32.10  	CFI_ADJUST_CFA_OFFSET 8
   32.11  	call schedule
   32.12 @@ -295,7 +295,7 @@ sysret_signal:
   32.13  1:	movl $_TIF_NEED_RESCHED,%edi
   32.14  	/* Use IRET because user could have changed frame. This
   32.15  	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
   32.16 -	cli
   32.17 +	XEN_BLOCK_EVENTS(%rsi)
   32.18  	jmp int_with_check
   32.19  	
   32.20  badsys:
   32.21 @@ -377,7 +377,7 @@ int_careful:
   32.22  	call schedule
   32.23  	popq %rdi
   32.24  	CFI_ADJUST_CFA_OFFSET -8
   32.25 -	cli
   32.26 +	XEN_BLOCK_EVENTS(%rsi)
   32.27  	jmp int_with_check
   32.28  
   32.29  	/* handle signals and tracing -- both require a full stack frame */
   32.30 @@ -395,7 +395,7 @@ int_very_careful:
   32.31  	popq %rdi
   32.32  	CFI_ADJUST_CFA_OFFSET -8
   32.33  	andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
   32.34 -	cli
   32.35 +	XEN_BLOCK_EVENTS(%rsi)
   32.36  	jmp int_restore_rest
   32.37  	
   32.38  int_signal:
   32.39 @@ -407,7 +407,7 @@ int_signal:
   32.40  1:	movl $_TIF_NEED_RESCHED,%edi	
   32.41  int_restore_rest:
   32.42  	RESTORE_REST
   32.43 -	cli
   32.44 +	XEN_BLOCK_EVENTS(%rsi)
   32.45  	jmp int_with_check
   32.46  	CFI_ENDPROC
   32.47  		
   32.48 @@ -535,8 +535,8 @@ retint_careful:
   32.49  	call  schedule
   32.50  	popq %rdi		
   32.51  	CFI_ADJUST_CFA_OFFSET	-8
   32.52 +	GET_THREAD_INFO(%rcx)
   32.53  	XEN_BLOCK_EVENTS(%rsi)		
   32.54 -	GET_THREAD_INFO(%rcx)
   32.55  /*	cli */
   32.56  	jmp retint_check
   32.57  	
    33.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Tue Aug 22 14:45:49 2006 -0600
    33.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Wed Aug 23 11:11:27 2006 -0600
    33.3 @@ -189,7 +189,6 @@ struct resource code_resource = {
    33.4  
    33.5  #define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM)
    33.6  
    33.7 -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
    33.8  static struct resource system_rom_resource = {
    33.9  	.name = "System ROM",
   33.10  	.start = 0xf0000,
   33.11 @@ -218,19 +217,16 @@ static struct resource adapter_rom_resou
   33.12  	{ .name = "Adapter ROM", .start = 0, .end = 0,
   33.13  		.flags = IORESOURCE_ROM }
   33.14  };
   33.15 -#endif
   33.16  
   33.17  #define ADAPTER_ROM_RESOURCES \
   33.18  	(sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
   33.19  
   33.20 -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
   33.21  static struct resource video_rom_resource = {
   33.22  	.name = "Video ROM",
   33.23  	.start = 0xc0000,
   33.24  	.end = 0xc7fff,
   33.25  	.flags = IORESOURCE_ROM,
   33.26  };
   33.27 -#endif
   33.28  
   33.29  static struct resource video_ram_resource = {
   33.30  	.name = "Video RAM area",
   33.31 @@ -239,7 +235,6 @@ static struct resource video_ram_resourc
   33.32  	.flags = IORESOURCE_RAM,
   33.33  };
   33.34  
   33.35 -#if defined(CONFIG_XEN_PRIVILEGED_GUEST) || !defined(CONFIG_XEN)
   33.36  #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
   33.37  
   33.38  static int __init romchecksum(unsigned char *rom, unsigned long length)
   33.39 @@ -257,6 +252,12 @@ static void __init probe_roms(void)
   33.40  	unsigned char *rom;
   33.41  	int	      i;
   33.42  
   33.43 +#ifdef CONFIG_XEN
   33.44 +	/* Nothing to do if not running in dom0. */
   33.45 +	if (!is_initial_xendomain())
   33.46 +		return;
   33.47 +#endif
   33.48 +
   33.49  	/* video rom */
   33.50  	upper = adapter_rom_resources[0].start;
   33.51  	for (start = video_rom_resource.start; start < upper; start += 2048) {
   33.52 @@ -315,7 +316,6 @@ static void __init probe_roms(void)
   33.53  		start = adapter_rom_resources[i++].end & ~2047UL;
   33.54  	}
   33.55  }
   33.56 -#endif
   33.57  
   33.58  static __init void parse_cmdline_early (char ** cmdline_p)
   33.59  {
   33.60 @@ -625,11 +625,8 @@ static void __init reserve_ebda_region(v
   33.61  void __init setup_arch(char **cmdline_p)
   33.62  {
   33.63  	unsigned long kernel_end;
   33.64 -
   33.65 -#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
   33.66  	struct e820entry *machine_e820;
   33.67  	struct xen_memory_map memmap;
   33.68 -#endif
   33.69  
   33.70  #ifdef CONFIG_XEN
   33.71  	/* Register a call for panic conditions. */
   33.72 @@ -639,7 +636,7 @@ void __init setup_arch(char **cmdline_p)
   33.73  	kernel_end = 0;		/* dummy */
   33.74   	screen_info = SCREEN_INFO;
   33.75  
   33.76 -	if (xen_start_info->flags & SIF_INITDOMAIN) {
   33.77 +	if (is_initial_xendomain()) {
   33.78  		/* This is drawn from a dump from vgacon:startup in
   33.79  		 * standard Linux. */
   33.80  		screen_info.orig_video_mode = 3;
   33.81 @@ -648,6 +645,35 @@ void __init setup_arch(char **cmdline_p)
   33.82  		screen_info.orig_video_cols = 80;
   33.83  		screen_info.orig_video_ega_bx = 3;
   33.84  		screen_info.orig_video_points = 16;
   33.85 +		if (xen_start_info->console.dom0.info_size >=
   33.86 +		    sizeof(struct dom0_vga_console_info)) {
   33.87 +			const struct dom0_vga_console_info *info =
   33.88 +				(struct dom0_vga_console_info *)(
   33.89 +					(char *)xen_start_info +
   33.90 +					xen_start_info->console.dom0.info_off);
   33.91 +			screen_info.orig_video_mode = info->txt_mode;
   33.92 +			screen_info.orig_video_isVGA = info->video_type;
   33.93 +			screen_info.orig_video_lines = info->video_height;
   33.94 +			screen_info.orig_video_cols = info->video_width;
   33.95 +			screen_info.orig_video_points = info->txt_points;
   33.96 +			screen_info.lfb_width = info->video_width;
   33.97 +			screen_info.lfb_height = info->video_height;
   33.98 +			screen_info.lfb_depth = info->lfb_depth;
   33.99 +			screen_info.lfb_base = info->lfb_base;
  33.100 +			screen_info.lfb_size = info->lfb_size;
  33.101 +			screen_info.lfb_linelength = info->lfb_linelen;
  33.102 +			screen_info.red_size = info->red_size;
  33.103 +			screen_info.red_pos = info->red_pos;
  33.104 +			screen_info.green_size = info->green_size;
  33.105 +			screen_info.green_pos = info->green_pos;
  33.106 +			screen_info.blue_size = info->blue_size;
  33.107 +			screen_info.blue_pos = info->blue_pos;
  33.108 +			screen_info.rsvd_size = info->rsvd_size;
  33.109 +			screen_info.rsvd_pos = info->rsvd_pos;
  33.110 +		}
  33.111 +		screen_info.orig_y = screen_info.orig_video_lines - 1;
  33.112 +		xen_start_info->console.domU.mfn = 0;
  33.113 +		xen_start_info->console.domU.evtchn = 0;
  33.114  	} else
  33.115  		screen_info.orig_video_isVGA = 0;
  33.116  
  33.117 @@ -860,8 +886,7 @@ void __init setup_arch(char **cmdline_p)
  33.118  
  33.119  	}
  33.120  
  33.121 -	if ( ! (xen_start_info->flags & SIF_INITDOMAIN))
  33.122 -	{
  33.123 +	if (!is_initial_xendomain()) {
  33.124  		acpi_disabled = 1;
  33.125  #ifdef  CONFIG_ACPI
  33.126  		acpi_ht = 0;
  33.127 @@ -908,9 +933,9 @@ void __init setup_arch(char **cmdline_p)
  33.128  	 * Request address space for all standard RAM and ROM resources
  33.129  	 * and also for regions reported as reserved by the e820.
  33.130  	 */
  33.131 -#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
  33.132  	probe_roms();
  33.133 -	if (xen_start_info->flags & SIF_INITDOMAIN) {
  33.134 +#ifdef CONFIG_XEN
  33.135 +	if (is_initial_xendomain()) {
  33.136  		machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
  33.137  
  33.138  		memmap.nr_entries = E820MAX;
  33.139 @@ -919,14 +944,9 @@ void __init setup_arch(char **cmdline_p)
  33.140  		BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap));
  33.141  
  33.142  		e820_reserve_resources(machine_e820, memmap.nr_entries);
  33.143 -	} else if (!(xen_start_info->flags & SIF_INITDOMAIN))
  33.144 -		e820_reserve_resources(e820.map, e820.nr_map);
  33.145 -#elif defined(CONFIG_XEN)
  33.146 +	} else
  33.147 +#endif
  33.148  	e820_reserve_resources(e820.map, e820.nr_map);
  33.149 -#else
  33.150 -	probe_roms();
  33.151 -	e820_reserve_resources(e820.map, e820.nr_map);
  33.152 -#endif
  33.153  
  33.154  	request_resource(&iomem_resource, &video_ram_resource);
  33.155  
  33.156 @@ -937,12 +957,12 @@ void __init setup_arch(char **cmdline_p)
  33.157  		request_resource(&ioport_resource, &standard_io_resources[i]);
  33.158  	}
  33.159  
  33.160 -#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
  33.161 -	if (xen_start_info->flags & SIF_INITDOMAIN) {
  33.162 +#ifdef CONFIG_XEN
  33.163 +	if (is_initial_xendomain()) {
  33.164  		e820_setup_gap(machine_e820, memmap.nr_entries);
  33.165  		free_bootmem(__pa(machine_e820), PAGE_SIZE);
  33.166  	}
  33.167 -#elif !defined(CONFIG_XEN)
  33.168 +#else
  33.169  	e820_setup_gap(e820.map, e820.nr_map);
  33.170  #endif
  33.171  
  33.172 @@ -957,11 +977,7 @@ void __init setup_arch(char **cmdline_p)
  33.173  		set_iopl.iopl = 1;
  33.174  		HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
  33.175  
  33.176 -		if (xen_start_info->flags & SIF_INITDOMAIN) {
  33.177 -			if (!(xen_start_info->flags & SIF_PRIVILEGED))
  33.178 -				panic("Xen granted us console access "
  33.179 -				      "but not privileged status");
  33.180 -		       
  33.181 +		if (is_initial_xendomain()) {
  33.182  #ifdef CONFIG_VT
  33.183  #if defined(CONFIG_VGA_CONSOLE)
  33.184  			conswitchp = &vga_con;
    34.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c	Tue Aug 22 14:45:49 2006 -0600
    34.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c	Wed Aug 23 11:11:27 2006 -0600
    34.3 @@ -529,7 +529,7 @@ void __init xen_init_pt(void)
    34.4  		mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
    34.5  	level3_kernel_pgt[pud_index(__START_KERNEL_map)] = 
    34.6  		__pud(__pa_symbol(level2_kernel_pgt) |
    34.7 -		      _KERNPG_TABLE | _PAGE_USER);
    34.8 +		      _KERNPG_TABLE);
    34.9  	memcpy((void *)level2_kernel_pgt, page, PAGE_SIZE);
   34.10  
   34.11  	early_make_page_readonly(init_level4_pgt,
   34.12 @@ -578,7 +578,7 @@ void __init extend_init_mapping(unsigned
   34.13  			pte_page = alloc_static_page(&phys);
   34.14  			early_make_page_readonly(
   34.15  				pte_page, XENFEAT_writable_page_tables);
   34.16 -			set_pmd(pmd, __pmd(phys | _KERNPG_TABLE | _PAGE_USER));
   34.17 +			set_pmd(pmd, __pmd(phys | _KERNPG_TABLE));
   34.18  		} else {
   34.19  			addr = page[pmd_index(va)];
   34.20  			addr_to_page(addr, pte_page);
   34.21 @@ -587,7 +587,7 @@ void __init extend_init_mapping(unsigned
   34.22  		if (pte_none(*pte)) {
   34.23  			new_pte = pfn_pte(
   34.24  				(va - __START_KERNEL_map) >> PAGE_SHIFT, 
   34.25 -				__pgprot(_KERNPG_TABLE | _PAGE_USER));
   34.26 +				__pgprot(_KERNPG_TABLE));
   34.27  			xen_l1_entry_update(pte, new_pte);
   34.28  		}
   34.29  		va += PAGE_SIZE;
   34.30 @@ -789,7 +789,7 @@ void __init paging_init(void)
   34.31  
   34.32  	/* Setup mapping of lower 1st MB */
   34.33  	for (i = 0; i < NR_FIX_ISAMAPS; i++)
   34.34 -		if (xen_start_info->flags & SIF_PRIVILEGED)
   34.35 +		if (is_initial_xendomain())
   34.36  			set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
   34.37  		else
   34.38  			__set_fixmap(FIX_ISAMAP_BEGIN - i,
    35.1 --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c	Tue Aug 22 14:45:49 2006 -0600
    35.2 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c	Wed Aug 23 11:11:27 2006 -0600
    35.3 @@ -343,7 +343,6 @@ static void backend_changed(struct xenbu
    35.4  	case XenbusStateInitialising:
    35.5  	case XenbusStateInitWait:
    35.6  	case XenbusStateInitialised:
    35.7 -	case XenbusStateUnknown:
    35.8  		break;
    35.9  
   35.10  	case XenbusStateConnected:
   35.11 @@ -354,10 +353,10 @@ static void backend_changed(struct xenbu
   35.12  		tpmif_set_connected_state(tp, 0);
   35.13  		break;
   35.14  
   35.15 +	case XenbusStateUnknown:
   35.16  	case XenbusStateClosed:
   35.17 -		if (tp->is_suspended == 0) {
   35.18 +		if (tp->is_suspended == 0)
   35.19  			device_unregister(&dev->dev);
   35.20 -		}
   35.21  		xenbus_switch_state(dev, XenbusStateClosed);
   35.22  		break;
   35.23  	}
   35.24 @@ -718,9 +717,8 @@ static int __init tpmif_init(void)
   35.25  	long rc = 0;
   35.26  	struct tpm_private *tp;
   35.27  
   35.28 -	if ((xen_start_info->flags & SIF_INITDOMAIN)) {
   35.29 +	if (is_initial_xendomain())
   35.30  		return -EPERM;
   35.31 -	}
   35.32  
   35.33  	tp = tpm_private_get();
   35.34  	if (!tp) {
    36.1 --- a/linux-2.6-xen-sparse/drivers/xen/Kconfig	Tue Aug 22 14:45:49 2006 -0600
    36.2 +++ b/linux-2.6-xen-sparse/drivers/xen/Kconfig	Wed Aug 23 11:11:27 2006 -0600
    36.3 @@ -13,7 +13,7 @@ config XEN
    36.4  if XEN
    36.5  config XEN_INTERFACE_VERSION
    36.6  	hex
    36.7 -	default 0x00030202
    36.8 +	default 0x00030203
    36.9  
   36.10  menu "XEN"
   36.11  
    37.1 --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Tue Aug 22 14:45:49 2006 -0600
    37.2 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c	Wed Aug 23 11:11:27 2006 -0600
    37.3 @@ -76,7 +76,7 @@ static unsigned long current_pages;
    37.4  static unsigned long target_pages;
    37.5  
    37.6  /* We increase/decrease in batches which fit in a page */
    37.7 -static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 
    37.8 +static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
    37.9  
   37.10  /* VM /proc information for memory */
   37.11  extern unsigned long totalram_pages;
   37.12 @@ -440,20 +440,16 @@ static int balloon_read(char *page, char
   37.13  		"Requested target:   %8lu kB\n"
   37.14  		"Low-mem balloon:    %8lu kB\n"
   37.15  		"High-mem balloon:   %8lu kB\n"
   37.16 +		"Driver pages:       %8lu kB\n"
   37.17  		"Xen hard limit:     ",
   37.18  		PAGES2KB(current_pages), PAGES2KB(target_pages), 
   37.19 -		PAGES2KB(balloon_low), PAGES2KB(balloon_high));
   37.20 +		PAGES2KB(balloon_low), PAGES2KB(balloon_high),
   37.21 +		PAGES2KB(driver_pages));
   37.22  
   37.23 -	if (hard_limit != ~0UL) {
   37.24 -		len += sprintf(
   37.25 -			page + len, 
   37.26 -			"%8lu kB (inc. %8lu kB driver headroom)\n",
   37.27 -			PAGES2KB(hard_limit), PAGES2KB(driver_pages));
   37.28 -	} else {
   37.29 -		len += sprintf(
   37.30 -			page + len,
   37.31 -			"     ??? kB\n");
   37.32 -	}
   37.33 +	if (hard_limit != ~0UL)
   37.34 +		len += sprintf(page + len, "%8lu kB\n", PAGES2KB(hard_limit));
   37.35 +	else
   37.36 +		len += sprintf(page + len, "     ??? kB\n");
   37.37  
   37.38  	*eof = 1;
   37.39  	return len;
   37.40 @@ -610,8 +606,21 @@ void balloon_dealloc_empty_page_range(
   37.41  	schedule_work(&balloon_worker);
   37.42  }
   37.43  
   37.44 +void balloon_release_driver_page(struct page *page)
   37.45 +{
   37.46 +	unsigned long flags;
   37.47 +
   37.48 +	balloon_lock(flags);
   37.49 +	balloon_append(page);
   37.50 +	driver_pages--;
   37.51 +	balloon_unlock(flags);
   37.52 +
   37.53 +	schedule_work(&balloon_worker);
   37.54 +}
   37.55 +
   37.56  EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
   37.57  EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range);
   37.58  EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range);
   37.59 +EXPORT_SYMBOL_GPL(balloon_release_driver_page);
   37.60  
   37.61  MODULE_LICENSE("Dual BSD/GPL");
    38.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Tue Aug 22 14:45:49 2006 -0600
    38.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c	Wed Aug 23 11:11:27 2006 -0600
    38.3 @@ -341,7 +341,7 @@ static void dispatch_rw_block_io(blkif_t
    38.4  				 blkif_request_t *req,
    38.5  				 pending_req_t *pending_req)
    38.6  {
    38.7 -	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
    38.8 +	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
    38.9  	int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
   38.10  	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   38.11  	struct phys_req preq;
   38.12 @@ -409,7 +409,7 @@ static void dispatch_rw_block_io(blkif_t
   38.13  		DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
   38.14  			operation == READ ? "read" : "write",
   38.15  			preq.sector_number,
   38.16 -			preq.sector_number + preq.nr_sects, preq.dev); 
   38.17 +			preq.sector_number + preq.nr_sects, preq.dev);
   38.18  		goto fail_flush;
   38.19  	}
   38.20  
    39.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h	Tue Aug 22 14:45:49 2006 -0600
    39.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h	Wed Aug 23 11:11:27 2006 -0600
    39.3 @@ -55,9 +55,9 @@ struct vbd {
    39.4  	unsigned char  type;        /* VDISK_xxx */
    39.5  	u32            pdevice;     /* phys device that this vbd maps to */
    39.6  	struct block_device *bdev;
    39.7 -}; 
    39.8 +};
    39.9  
   39.10 -struct backend_info; 
   39.11 +struct backend_info;
   39.12  
   39.13  typedef struct blkif_st {
   39.14  	/* Unique identifier for this interface. */
   39.15 @@ -72,7 +72,7 @@ typedef struct blkif_st {
   39.16  	/* The VBD attached to this interface. */
   39.17  	struct vbd        vbd;
   39.18  	/* Back pointer to the backend_info. */
   39.19 -	struct backend_info *be; 
   39.20 +	struct backend_info *be;
   39.21  	/* Private fields. */
   39.22  	spinlock_t       blk_ring_lock;
   39.23  	atomic_t         refcnt;
   39.24 @@ -122,7 +122,7 @@ struct phys_req {
   39.25  	blkif_sector_t       sector_number;
   39.26  };
   39.27  
   39.28 -int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
   39.29 +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
   39.30  
   39.31  void blkif_interface_init(void);
   39.32  
    40.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Tue Aug 22 14:45:49 2006 -0600
    40.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c	Wed Aug 23 11:11:27 2006 -0600
    40.3 @@ -194,7 +194,7 @@ static int blkback_probe(struct xenbus_d
    40.4  	}
    40.5  
    40.6  	/* setup back pointer */
    40.7 -	be->blkif->be = be; 
    40.8 +	be->blkif->be = be;
    40.9  
   40.10  	err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
   40.11  				 &be->backend_watch, backend_changed);
   40.12 @@ -287,7 +287,7 @@ static void backend_changed(struct xenbu
   40.13  		}
   40.14  
   40.15  		/* We're potentially connected now */
   40.16 -		update_blkif_status(be->blkif); 
   40.17 +		update_blkif_status(be->blkif);
   40.18  	}
   40.19  }
   40.20  
   40.21 @@ -305,6 +305,11 @@ static void frontend_changed(struct xenb
   40.22  
   40.23  	switch (frontend_state) {
   40.24  	case XenbusStateInitialising:
   40.25 +		if (dev->state == XenbusStateClosing) {
   40.26 +			printk("%s: %s: prepare for reconnect\n",
   40.27 +			       __FUNCTION__, dev->nodename);
   40.28 +			xenbus_switch_state(dev, XenbusStateInitWait);
   40.29 +		}
   40.30  		break;
   40.31  
   40.32  	case XenbusStateInitialised:
   40.33 @@ -326,12 +331,11 @@ static void frontend_changed(struct xenb
   40.34  		xenbus_switch_state(dev, XenbusStateClosing);
   40.35  		break;
   40.36  
   40.37 +	case XenbusStateUnknown:
   40.38  	case XenbusStateClosed:
   40.39  		device_unregister(&dev->dev);
   40.40  		break;
   40.41  
   40.42 -	case XenbusStateUnknown:
   40.43 -	case XenbusStateInitWait:
   40.44  	default:
   40.45  		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
   40.46  				 frontend_state);
    41.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Tue Aug 22 14:45:49 2006 -0600
    41.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c	Wed Aug 23 11:11:27 2006 -0600
    41.3 @@ -46,6 +46,7 @@
    41.4  #include <xen/interface/grant_table.h>
    41.5  #include <xen/gnttab.h>
    41.6  #include <asm/hypervisor.h>
    41.7 +#include <asm/maddr.h>
    41.8  
    41.9  #define BLKIF_STATE_DISCONNECTED 0
   41.10  #define BLKIF_STATE_CONNECTED    1
   41.11 @@ -255,10 +256,10 @@ static void backend_changed(struct xenbu
   41.12  	DPRINTK("blkfront:backend_changed.\n");
   41.13  
   41.14  	switch (backend_state) {
   41.15 -	case XenbusStateUnknown:
   41.16  	case XenbusStateInitialising:
   41.17  	case XenbusStateInitWait:
   41.18  	case XenbusStateInitialised:
   41.19 +	case XenbusStateUnknown:
   41.20  	case XenbusStateClosed:
   41.21  		break;
   41.22  
    42.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Tue Aug 22 14:45:49 2006 -0600
    42.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Wed Aug 23 11:11:27 2006 -0600
    42.3 @@ -157,7 +157,7 @@ static int alloc_pending_reqs;
    42.4  typedef unsigned int PEND_RING_IDX;
    42.5  
    42.6  static inline int MASK_PEND_IDX(int i) { 
    42.7 -	return (i & (MAX_PENDING_REQS-1)); 
    42.8 +	return (i & (MAX_PENDING_REQS-1));
    42.9  }
   42.10  
   42.11  static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
   42.12 @@ -754,7 +754,7 @@ static int req_increase(void)
   42.13  	if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) {
   42.14  		kfree(pending_reqs[mmap_alloc]);
   42.15  		kfree(pending_addrs[mmap_alloc]);
   42.16 -		WPRINTK("%s: out of memory\n", __FUNCTION__); 
   42.17 +		WPRINTK("%s: out of memory\n", __FUNCTION__);
   42.18  		ret = -ENOMEM;
   42.19  		goto done;
   42.20  	}
   42.21 @@ -1051,7 +1051,7 @@ static int blktap_read_ufe_ring(int idx)
   42.22  			unsigned long kvaddr, uvaddr;
   42.23  			struct page **map = info->vma->vm_private_data;
   42.24  			struct page *pg;
   42.25 -			int offset; 
   42.26 +			int offset;
   42.27  
   42.28  			uvaddr  = MMAP_VADDR(info->user_vstart, usr_idx, j);
   42.29  			kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
   42.30 @@ -1063,7 +1063,7 @@ static int blktap_read_ufe_ring(int idx)
   42.31  				>> PAGE_SHIFT;
   42.32  			map[offset] = NULL;
   42.33  		}
   42.34 -		fast_flush_area(pending_req, pending_idx, usr_idx, idx); 
   42.35 +		fast_flush_area(pending_req, pending_idx, usr_idx, idx);
   42.36  		make_response(blkif, pending_req->id, resp->operation,
   42.37  			      resp->status);
   42.38  		info->idx_map[usr_idx] = INVALID_REQ;
   42.39 @@ -1118,7 +1118,7 @@ static int do_block_io_op(blkif_t *blkif
   42.40  			       "ring does not exist!\n");
   42.41  			print_dbug = 0; /*We only print this message once*/
   42.42  		}
   42.43 -		return 1; 
   42.44 +		return 1;
   42.45  	}
   42.46  
   42.47  	info = tapfds[blkif->dev_num];
   42.48 @@ -1185,7 +1185,7 @@ static void dispatch_rw_block_io(blkif_t
   42.49  				 blkif_request_t *req,
   42.50  				 pending_req_t *pending_req)
   42.51  {
   42.52 -	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
   42.53 +	extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
   42.54  	int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
   42.55  	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
   42.56  	unsigned int nseg;
    43.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/common.h	Tue Aug 22 14:45:49 2006 -0600
    43.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/common.h	Wed Aug 23 11:11:27 2006 -0600
    43.3 @@ -49,7 +49,7 @@
    43.4  
    43.5  #define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
    43.6  
    43.7 -struct backend_info; 
    43.8 +struct backend_info;
    43.9  
   43.10  typedef struct blkif_st {
   43.11  	/* Unique identifier for this interface. */
   43.12 @@ -62,7 +62,7 @@ typedef struct blkif_st {
   43.13  	blkif_back_ring_t blk_ring;
   43.14  	struct vm_struct *blk_ring_area;
   43.15  	/* Back pointer to the backend_info. */
   43.16 -	struct backend_info *be; 
   43.17 +	struct backend_info *be;
   43.18  	/* Private fields. */
   43.19  	spinlock_t       blk_ring_lock;
   43.20  	atomic_t         refcnt;
    44.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c	Tue Aug 22 14:45:49 2006 -0600
    44.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c	Wed Aug 23 11:11:27 2006 -0600
    44.3 @@ -174,7 +174,7 @@ static int blktap_probe(struct xenbus_de
    44.4  	}
    44.5  
    44.6  	/* setup back pointer */
    44.7 -	be->blkif->be = be; 
    44.8 +	be->blkif->be = be;
    44.9  	be->blkif->sectors = 0;
   44.10  
   44.11  	/* set a watch on disk info, waiting for userspace to update details*/
   44.12 @@ -267,12 +267,11 @@ static void tap_frontend_changed(struct 
   44.13  		xenbus_switch_state(dev, XenbusStateClosing);
   44.14  		break;
   44.15  
   44.16 +	case XenbusStateUnknown:
   44.17  	case XenbusStateClosed:
   44.18  		device_unregister(&dev->dev);
   44.19  		break;
   44.20  
   44.21 -	case XenbusStateUnknown:
   44.22 -	case XenbusStateInitWait:
   44.23  	default:
   44.24  		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
   44.25  				 frontend_state);
    45.1 --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c	Tue Aug 22 14:45:49 2006 -0600
    45.2 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c	Wed Aug 23 11:11:27 2006 -0600
    45.3 @@ -178,7 +178,7 @@ static struct tty_driver *kcons_device(s
    45.4  
    45.5  static struct console kcons_info = {
    45.6  	.device	= kcons_device,
    45.7 -	.flags	= CON_PRINTBUFFER,
    45.8 +	.flags	= CON_PRINTBUFFER | CON_ENABLED,
    45.9  	.index	= -1,
   45.10  };
   45.11  
   45.12 @@ -188,12 +188,10 @@ static int __init xen_console_init(void)
   45.13  	if (!is_running_on_xen())
   45.14  		return __RETCODE;
   45.15  
   45.16 -	if (xen_start_info->flags & SIF_INITDOMAIN) {
   45.17 +	if (is_initial_xendomain()) {
   45.18  		if (xc_mode == XC_DEFAULT)
   45.19  			xc_mode = XC_SERIAL;
   45.20  		kcons_info.write = kcons_write_dom0;
   45.21 -		if (xc_mode == XC_SERIAL)
   45.22 -			kcons_info.flags |= CON_ENABLED;
   45.23  	} else {
   45.24  		if (xc_mode == XC_DEFAULT)
   45.25  			xc_mode = XC_TTY;
   45.26 @@ -249,7 +247,7 @@ void xencons_force_flush(void)
   45.27  	int sz;
   45.28  
   45.29  	/* Emergency console is synchronous, so there's nothing to flush. */
   45.30 -	if (xen_start_info->flags & SIF_INITDOMAIN)
   45.31 +	if (is_initial_xendomain())
   45.32  		return;
   45.33  
   45.34  	/* Spin until console data is flushed through to the daemon. */
   45.35 @@ -320,7 +318,7 @@ static void __xencons_tx_flush(void)
   45.36  	int sent, sz, work_done = 0;
   45.37  
   45.38  	if (x_char) {
   45.39 -		if (xen_start_info->flags & SIF_INITDOMAIN)
   45.40 +		if (is_initial_xendomain())
   45.41  			kcons_write_dom0(NULL, &x_char, 1);
   45.42  		else
   45.43  			while (x_char)
   45.44 @@ -334,7 +332,7 @@ static void __xencons_tx_flush(void)
   45.45  		sz = wp - wc;
   45.46  		if (sz > (wbuf_size - WBUF_MASK(wc)))
   45.47  			sz = wbuf_size - WBUF_MASK(wc);
   45.48 -		if (xen_start_info->flags & SIF_INITDOMAIN) {
   45.49 +		if (is_initial_xendomain()) {
   45.50  			kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
   45.51  			wc += sz;
   45.52  		} else {
   45.53 @@ -624,7 +622,7 @@ static int __init xencons_init(void)
   45.54  		return rc;
   45.55  	}
   45.56  
   45.57 -	if (xen_start_info->flags & SIF_INITDOMAIN) {
   45.58 +	if (is_initial_xendomain()) {
   45.59  		xencons_priv_irq = bind_virq_to_irqhandler(
   45.60  			VIRQ_CONSOLE,
   45.61  			0,
    46.1 --- a/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c	Tue Aug 22 14:45:49 2006 -0600
    46.2 +++ b/linux-2.6-xen-sparse/drivers/xen/console/xencons_ring.c	Wed Aug 23 11:11:27 2006 -0600
    46.3 @@ -52,13 +52,13 @@ static int xencons_irq;
    46.4  
    46.5  static inline struct xencons_interface *xencons_interface(void)
    46.6  {
    46.7 -	return mfn_to_virt(xen_start_info->console_mfn);
    46.8 +	return mfn_to_virt(xen_start_info->console.domU.mfn);
    46.9  }
   46.10  
   46.11  static inline void notify_daemon(void)
   46.12  {
   46.13  	/* Use evtchn: this is called early, before irq is set up. */
   46.14 -	notify_remote_via_evtchn(xen_start_info->console_evtchn);
   46.15 +	notify_remote_via_evtchn(xen_start_info->console.domU.evtchn);
   46.16  }
   46.17  
   46.18  int xencons_ring_send(const char *data, unsigned len)
   46.19 @@ -116,11 +116,11 @@ int xencons_ring_init(void)
   46.20  		unbind_from_irqhandler(xencons_irq, NULL);
   46.21  	xencons_irq = 0;
   46.22  
   46.23 -	if (!xen_start_info->console_evtchn)
   46.24 +	if (!xen_start_info->console.domU.evtchn)
   46.25  		return 0;
   46.26  
   46.27  	err = bind_evtchn_to_irqhandler(
   46.28 -		xen_start_info->console_evtchn,
   46.29 +		xen_start_info->console.domU.evtchn,
   46.30  		handle_input, 0, "xencons", NULL);
   46.31  	if (err <= 0) {
   46.32  		printk(KERN_ERR "XEN console request irq failed %i\n", err);
    47.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c	Tue Aug 22 14:45:49 2006 -0600
    47.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/cpu_hotplug.c	Wed Aug 23 11:11:27 2006 -0600
    47.3 @@ -92,7 +92,7 @@ static int setup_cpu_watcher(struct noti
    47.4  		.flags = XBWF_new_thread };
    47.5  	(void)register_xenbus_watch(&cpu_watch);
    47.6  
    47.7 -	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
    47.8 +	if (!is_initial_xendomain()) {
    47.9  		for_each_cpu(i)
   47.10  			vcpu_hotplug(i);
   47.11  		printk(KERN_INFO "Brought up %ld CPUs\n",
    48.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c	Tue Aug 22 14:45:49 2006 -0600
    48.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/evtchn.c	Wed Aug 23 11:11:27 2006 -0600
    48.3 @@ -840,8 +840,7 @@ void __init xen_init_IRQ(void)
    48.4  
    48.5  #ifdef RTC_IRQ
    48.6  		/* If not domain 0, force our RTC driver to fail its probe. */
    48.7 -		if ((i == RTC_IRQ) &&
    48.8 -		    !(xen_start_info->flags & SIF_INITDOMAIN))
    48.9 +		if ((i == RTC_IRQ) && !is_initial_xendomain())
   48.10  			continue;
   48.11  #endif
   48.12  
    49.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Tue Aug 22 14:45:49 2006 -0600
    49.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c	Wed Aug 23 11:11:27 2006 -0600
    49.3 @@ -41,6 +41,8 @@
    49.4  #include <asm/pgtable.h>
    49.5  #include <asm/uaccess.h>
    49.6  #include <asm/synch_bitops.h>
    49.7 +#include <asm/io.h>
    49.8 +#include <xen/interface/memory.h>
    49.9  
   49.10  /* External tools reserve first few grant table entries. */
   49.11  #define NR_RESERVED_ENTRIES 8
   49.12 @@ -350,6 +352,8 @@ void gnttab_cancel_free_callback(struct 
   49.13  }
   49.14  EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
   49.15  
   49.16 +#ifdef CONFIG_XEN
   49.17 +
   49.18  #ifndef __ia64__
   49.19  static int map_pte_fn(pte_t *pte, struct page *pmd_page,
   49.20  		      unsigned long addr, void *data)
   49.21 @@ -410,17 +414,53 @@ int gnttab_resume(void)
   49.22  
   49.23  int gnttab_suspend(void)
   49.24  {
   49.25 -
   49.26  #ifndef __ia64__
   49.27  	apply_to_page_range(&init_mm, (unsigned long)shared,
   49.28  			    PAGE_SIZE * NR_GRANT_FRAMES,
   49.29  			    unmap_pte_fn, NULL);
   49.30  #endif
   49.31 +	return 0;
   49.32 +}
   49.33 +
   49.34 +#else /* !CONFIG_XEN */
   49.35 +
   49.36 +#include <platform-pci.h>
   49.37 +
   49.38 +int gnttab_resume(void)
   49.39 +{
   49.40 +	unsigned long frames;
   49.41 +	struct xen_add_to_physmap xatp;
   49.42 +	unsigned int i;
   49.43 +
   49.44 +	frames = alloc_xen_mmio(PAGE_SIZE * NR_GRANT_FRAMES);
   49.45 +
   49.46 +	for (i = 0; i < NR_GRANT_FRAMES; i++) {
   49.47 +		xatp.domid = DOMID_SELF;
   49.48 +		xatp.idx = i;
   49.49 +		xatp.space = XENMAPSPACE_grant_table;
   49.50 +		xatp.gpfn = (frames >> PAGE_SHIFT) + i;
   49.51 +		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
   49.52 +			BUG();
   49.53 +	}
   49.54 +
   49.55 +	shared = ioremap(frames, PAGE_SIZE * NR_GRANT_FRAMES);
   49.56 +	if (shared == NULL) {
   49.57 +		printk("error to ioremap gnttab share frames\n");
   49.58 +		return -1;
   49.59 +	}
   49.60  
   49.61  	return 0;
   49.62  }
   49.63  
   49.64 -static int __init gnttab_init(void)
   49.65 +int gnttab_suspend(void)
   49.66 +{
   49.67 +	iounmap(shared);
   49.68 +	return 0;
   49.69 +}
   49.70 +
   49.71 +#endif /* !CONFIG_XEN */
   49.72 +
   49.73 +int __init gnttab_init(void)
   49.74  {
   49.75  	int i;
   49.76  
   49.77 @@ -439,4 +479,6 @@ static int __init gnttab_init(void)
   49.78  	return 0;
   49.79  }
   49.80  
   49.81 +#ifdef CONFIG_XEN
   49.82  core_initcall(gnttab_init);
   49.83 +#endif
    50.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Tue Aug 22 14:45:49 2006 -0600
    50.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c	Wed Aug 23 11:11:27 2006 -0600
    50.3 @@ -106,7 +106,8 @@ static void pre_suspend(void)
    50.4  	clear_fixmap(FIX_SHARED_INFO);
    50.5  
    50.6  	xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
    50.7 -	xen_start_info->console_mfn = mfn_to_pfn(xen_start_info->console_mfn);
    50.8 +	xen_start_info->console.domU.mfn =
    50.9 +		mfn_to_pfn(xen_start_info->console.domU.mfn);
   50.10  }
   50.11  
   50.12  static void post_suspend(void)
    51.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c	Tue Aug 22 14:45:49 2006 -0600
    51.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c	Wed Aug 23 11:11:27 2006 -0600
    51.3 @@ -121,8 +121,7 @@ static int __init skbuff_init(void)
    51.4  	for (order = 0; order <= MAX_SKBUFF_ORDER; order++) {
    51.5  		size = PAGE_SIZE << order;
    51.6  		sprintf(name[order], "xen-skb-%lu", size);
    51.7 -		if (is_running_on_xen() &&
    51.8 -		    (xen_start_info->flags & SIF_PRIVILEGED))
    51.9 +		if (is_running_on_xen() && is_initial_xendomain())
   51.10  			skbuff_order_cachep[order] = kmem_cache_create(
   51.11  				name[order], size, size, 0,
   51.12  				skbuff_ctor, skbuff_dtor);
    52.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c	Tue Aug 22 14:45:49 2006 -0600
    52.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c	Wed Aug 23 11:11:27 2006 -0600
    52.3 @@ -255,7 +255,14 @@ void __init smp_prepare_cpus(unsigned in
    52.4  
    52.5  	xen_smp_intr_init(0);
    52.6  
    52.7 -	for_each_cpu_mask (cpu, cpu_possible_map) {
    52.8 +	/* Restrict the possible_map according to max_cpus. */
    52.9 +	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
   52.10 +		for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
   52.11 +			continue;
   52.12 +		cpu_clear(cpu, cpu_possible_map);
   52.13 +	}
   52.14 +
   52.15 +	for_each_cpu (cpu) {
   52.16  		if (cpu == 0)
   52.17  			continue;
   52.18  
   52.19 @@ -266,7 +273,8 @@ void __init smp_prepare_cpus(unsigned in
   52.20  #endif
   52.21  		gdt_descr->address = get_zeroed_page(GFP_KERNEL);
   52.22  		if (unlikely(!gdt_descr->address)) {
   52.23 -			printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
   52.24 +			printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
   52.25 +			       cpu);
   52.26  			continue;
   52.27  		}
   52.28  		gdt_descr->size = GDT_SIZE;
   52.29 @@ -294,7 +302,7 @@ void __init smp_prepare_cpus(unsigned in
   52.30  		irq_ctx_init(cpu);
   52.31  
   52.32  #ifdef CONFIG_HOTPLUG_CPU
   52.33 -		if (xen_start_info->flags & SIF_INITDOMAIN)
   52.34 +		if (is_initial_xendomain())
   52.35  			cpu_set(cpu, cpu_present_map);
   52.36  #else
   52.37  		cpu_set(cpu, cpu_present_map);
   52.38 @@ -305,12 +313,6 @@ void __init smp_prepare_cpus(unsigned in
   52.39  
   52.40  	init_xenbus_allowed_cpumask();
   52.41  
   52.42 -	/* Currently, Xen gives no dynamic NUMA/HT info. */
   52.43 -	for (cpu = 1; cpu < NR_CPUS; cpu++) {
   52.44 -		cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
   52.45 -		cpu_core_map[cpu]    = cpumask_of_cpu(cpu);
   52.46 -	}
   52.47 -
   52.48  #ifdef CONFIG_X86_IO_APIC
   52.49  	/*
   52.50  	 * Here we can be sure that there is an IO-APIC in the system. Let's
    53.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h	Tue Aug 22 14:45:49 2006 -0600
    53.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h	Wed Aug 23 11:11:27 2006 -0600
    53.3 @@ -64,9 +64,9 @@ typedef struct netif_st {
    53.4  
    53.5  	/* Physical parameters of the comms window. */
    53.6  	grant_handle_t   tx_shmem_handle;
    53.7 -	grant_ref_t      tx_shmem_ref; 
    53.8 +	grant_ref_t      tx_shmem_ref;
    53.9  	grant_handle_t   rx_shmem_handle;
   53.10 -	grant_ref_t      rx_shmem_ref; 
   53.11 +	grant_ref_t      rx_shmem_ref;
   53.12  	unsigned int     evtchn;
   53.13  	unsigned int     irq;
   53.14  
   53.15 @@ -78,7 +78,10 @@ typedef struct netif_st {
   53.16  
   53.17  	/* Set of features that can be turned on in dev->features. */
   53.18  	int features;
   53.19 -	int can_queue;
   53.20 +
   53.21 +	/* Internal feature information. */
   53.22 +	int can_queue:1;	/* can queue packets for receiver? */
   53.23 +	int copying_receiver:1;	/* copy packets to receiver?       */
   53.24  
   53.25  	/* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
   53.26  	RING_IDX rx_req_cons_peek;
    54.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c	Tue Aug 22 14:45:49 2006 -0600
    54.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c	Wed Aug 23 11:11:27 2006 -0600
    54.3 @@ -218,7 +218,7 @@ static int __init make_loopback(int i)
    54.4  	return err;
    54.5  }
    54.6  
    54.7 -static void __init clean_loopback(int i)
    54.8 +static void __exit clean_loopback(int i)
    54.9  {
   54.10  	struct net_device *dev1, *dev2;
   54.11  	char dev_name[IFNAMSIZ];
    55.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Tue Aug 22 14:45:49 2006 -0600
    55.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Wed Aug 23 11:11:27 2006 -0600
    55.3 @@ -43,6 +43,7 @@
    55.4  struct netbk_rx_meta {
    55.5  	skb_frag_t frag;
    55.6  	int id;
    55.7 +	int copy:1;
    55.8  };
    55.9  
   55.10  static void netif_idx_release(u16 pending_idx);
   55.11 @@ -68,14 +69,12 @@ static struct timer_list net_timer;
   55.12  #define MAX_PENDING_REQS 256
   55.13  
   55.14  static struct sk_buff_head rx_queue;
   55.15 -static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
   55.16 -static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
   55.17 -static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
   55.18 -static unsigned char rx_notify[NR_IRQS];
   55.19  
   55.20  static unsigned long mmap_vstart;
   55.21  #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
   55.22  
   55.23 +static void *rx_mmap_area;
   55.24 +
   55.25  #define PKT_PROT_LEN 64
   55.26  
   55.27  static struct {
   55.28 @@ -147,6 +146,31 @@ static inline int is_xen_skb(struct sk_b
   55.29  	return (cp == skbuff_cachep);
   55.30  }
   55.31  
   55.32 +/*
   55.33 + * We can flip without copying the packet unless:
   55.34 + *  1. The data is not allocated from our special cache; or
   55.35 + *  2. The main data area is shared; or
   55.36 + *  3. One or more fragments are shared; or
   55.37 + *  4. There are chained fragments.
   55.38 + */
   55.39 +static inline int is_flippable_skb(struct sk_buff *skb)
   55.40 +{
   55.41 +	int frag;
   55.42 +
   55.43 +	if (!is_xen_skb(skb) || skb_cloned(skb))
   55.44 +		return 0;
   55.45 +
   55.46 +	for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) {
   55.47 +		if (page_count(skb_shinfo(skb)->frags[frag].page) > 1)
   55.48 +			return 0;
   55.49 +	}
   55.50 +
   55.51 +	if (skb_shinfo(skb)->frag_list != NULL)
   55.52 +		return 0;
   55.53 +
   55.54 +	return 1;
   55.55 +}
   55.56 +
   55.57  static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
   55.58  {
   55.59  	struct skb_shared_info *ninfo;
   55.60 @@ -156,6 +180,8 @@ static struct sk_buff *netbk_copy_skb(st
   55.61  	int len;
   55.62  	int headlen;
   55.63  
   55.64 +	BUG_ON(skb_shinfo(skb)->frag_list != NULL);
   55.65 +
   55.66  	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC);
   55.67  	if (unlikely(!nskb))
   55.68  		goto err;
   55.69 @@ -254,13 +280,11 @@ int netif_be_start_xmit(struct sk_buff *
   55.70  		goto drop;
   55.71  	}
   55.72  
   55.73 -	/*
   55.74 -	 * We do not copy the packet unless:
   55.75 -	 *  1. The data is shared; or
   55.76 -	 *  2. The data is not allocated from our special cache.
   55.77 -	 *  3. The data is fragmented.
   55.78 -	 */
   55.79 -	if (skb_cloned(skb) || skb_is_nonlinear(skb) || !is_xen_skb(skb)) {
   55.80 +	/* Copy the packet here if it's destined for a flipping
   55.81 +	   interface but isn't flippable (e.g. extra references to
   55.82 +	   data)
   55.83 +	*/
   55.84 +	if (!netif->copying_receiver && !is_flippable_skb(skb)) {
   55.85  		struct sk_buff *nskb = netbk_copy_skb(skb);
   55.86  		if ( unlikely(nskb == NULL) )
   55.87  			goto drop;
   55.88 @@ -306,7 +330,7 @@ static void xen_network_done_notify(void
   55.89  /* 
   55.90   * Add following to poll() function in NAPI driver (Tigon3 is example):
   55.91   *  if ( xen_network_done() )
   55.92 - *      tg3_enable_ints(tp); 
   55.93 + *      tg3_enable_ints(tp);
   55.94   */
   55.95  int xen_network_done(void)
   55.96  {
   55.97 @@ -314,66 +338,113 @@ int xen_network_done(void)
   55.98  }
   55.99  #endif
  55.100  
  55.101 -static u16 netbk_gop_frag(netif_t *netif, struct page *page, int count, int i)
  55.102 +struct netrx_pending_operations {
  55.103 +	unsigned trans_prod, trans_cons;
  55.104 +	unsigned mmu_prod, mmu_cons;
  55.105 +	unsigned mcl_prod, mcl_cons;
  55.106 +	unsigned copy_prod, copy_cons;
  55.107 +	unsigned meta_prod, meta_cons;
  55.108 +	mmu_update_t *mmu;
  55.109 +	gnttab_transfer_t *trans;
  55.110 +	gnttab_copy_t *copy;
  55.111 +	multicall_entry_t *mcl;
  55.112 +	struct netbk_rx_meta *meta;
  55.113 +};
  55.114 +
  55.115 +/* Set up the grant operations for this fragment.  If it's a flipping
  55.116 +   interface, we also set up the unmap request from here. */
  55.117 +static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
  55.118 +			  int i, struct netrx_pending_operations *npo,
  55.119 +			  struct page *page, unsigned long size,
  55.120 +			  unsigned long offset)
  55.121  {
  55.122 -	multicall_entry_t *mcl = rx_mcl + count;
  55.123 -	mmu_update_t *mmu = rx_mmu + count;
  55.124 -	gnttab_transfer_t *gop = grant_rx_op + count;
  55.125 +	mmu_update_t *mmu;
  55.126 +	gnttab_transfer_t *gop;
  55.127 +	gnttab_copy_t *copy_gop;
  55.128 +	multicall_entry_t *mcl;
  55.129  	netif_rx_request_t *req;
  55.130  	unsigned long old_mfn, new_mfn;
  55.131  
  55.132  	old_mfn = virt_to_mfn(page_address(page));
  55.133  
  55.134 -	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  55.135 -		new_mfn = alloc_mfn();
  55.136 -
  55.137 -		/*
  55.138 -		 * Set the new P2M table entry before reassigning
  55.139 -		 * the old data page. Heed the comment in
  55.140 -		 * pgtable-2level.h:pte_page(). :-)
  55.141 -		 */
  55.142 -		set_phys_to_machine(page_to_pfn(page), new_mfn);
  55.143 +	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
  55.144 +	if (netif->copying_receiver) {
  55.145 +		/* The fragment needs to be copied rather than
  55.146 +		   flipped. */
  55.147 +		meta->copy = 1;
  55.148 +		copy_gop = npo->copy + npo->copy_prod++;
  55.149 +		copy_gop->source.domid = DOMID_SELF;
  55.150 +		copy_gop->source.offset = offset;
  55.151 +		copy_gop->source.u.gmfn = old_mfn;
  55.152 +		copy_gop->dest.domid = netif->domid;
  55.153 +		copy_gop->dest.offset = 0;
  55.154 +		copy_gop->dest.u.ref = req->gref;
  55.155 +		copy_gop->len = size;
  55.156 +		copy_gop->flags = GNTCOPY_dest_gref;
  55.157 +	} else {
  55.158 +		meta->copy = 0;
  55.159 +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  55.160 +			new_mfn = alloc_mfn();
  55.161  
  55.162 -		MULTI_update_va_mapping(mcl, (unsigned long)page_address(page),
  55.163 -					pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
  55.164 +			/*
  55.165 +			 * Set the new P2M table entry before
  55.166 +			 * reassigning the old data page. Heed the
  55.167 +			 * comment in pgtable-2level.h:pte_page(). :-)
  55.168 +			 */
  55.169 +			set_phys_to_machine(page_to_pfn(page), new_mfn);
  55.170  
  55.171 -		mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
  55.172 -			MMU_MACHPHYS_UPDATE;
  55.173 -		mmu->val = page_to_pfn(page);
  55.174 +			mcl = npo->mcl + npo->mcl_prod++;
  55.175 +			MULTI_update_va_mapping(mcl,
  55.176 +					     (unsigned long)page_address(page),
  55.177 +					     pfn_pte_ma(new_mfn, PAGE_KERNEL),
  55.178 +					     0);
  55.179 +
  55.180 +			mmu = npo->mmu + npo->mmu_prod++;
  55.181 +			mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
  55.182 +				MMU_MACHPHYS_UPDATE;
  55.183 +			mmu->val = page_to_pfn(page);
  55.184 +		}
  55.185 +
  55.186 +		gop = npo->trans + npo->trans_prod++;
  55.187 +		gop->mfn = old_mfn;
  55.188 +		gop->domid = netif->domid;
  55.189 +		gop->ref = req->gref;
  55.190  	}
  55.191 -
  55.192 -	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
  55.193 -	gop->mfn = old_mfn;
  55.194 -	gop->domid = netif->domid;
  55.195 -	gop->ref = req->gref;
  55.196  	return req->id;
  55.197  }
  55.198  
  55.199 -static void netbk_gop_skb(struct sk_buff *skb, struct netbk_rx_meta *meta,
  55.200 -			  int count)
  55.201 +static void netbk_gop_skb(struct sk_buff *skb,
  55.202 +			  struct netrx_pending_operations *npo)
  55.203  {
  55.204  	netif_t *netif = netdev_priv(skb->dev);
  55.205  	int nr_frags = skb_shinfo(skb)->nr_frags;
  55.206  	int i;
  55.207  	int extra;
  55.208 +	struct netbk_rx_meta *head_meta, *meta;
  55.209  
  55.210 -	meta[count].frag.page_offset = skb_shinfo(skb)->gso_type;
  55.211 -	meta[count].frag.size = skb_shinfo(skb)->gso_size;
  55.212 -	extra = !!meta[count].frag.size + 1;
  55.213 +	head_meta = npo->meta + npo->meta_prod++;
  55.214 +	head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
  55.215 +	head_meta->frag.size = skb_shinfo(skb)->gso_size;
  55.216 +	extra = !!head_meta->frag.size + 1;
  55.217  
  55.218  	for (i = 0; i < nr_frags; i++) {
  55.219 -		meta[++count].frag = skb_shinfo(skb)->frags[i];
  55.220 -		meta[count].id = netbk_gop_frag(netif, meta[count].frag.page,
  55.221 -						count, i + extra);
  55.222 +		meta = npo->meta + npo->meta_prod++;
  55.223 +		meta->frag = skb_shinfo(skb)->frags[i];
  55.224 +		meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
  55.225 +					  meta->frag.page,
  55.226 +					  meta->frag.size,
  55.227 +					  meta->frag.page_offset);
  55.228  	}
  55.229  
  55.230  	/*
  55.231  	 * This must occur at the end to ensure that we don't trash
  55.232  	 * skb_shinfo until we're done.
  55.233  	 */
  55.234 -	meta[count - nr_frags].id = netbk_gop_frag(netif,
  55.235 -						   virt_to_page(skb->data),
  55.236 -						   count - nr_frags, 0);
  55.237 +	head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
  55.238 +				       virt_to_page(skb->data),
  55.239 +				       skb_headlen(skb),
  55.240 +				       offset_in_page(skb->data));
  55.241 +
  55.242  	netif->rx.req_cons += nr_frags + extra;
  55.243  }
  55.244  
  55.245 @@ -385,32 +456,48 @@ static inline void netbk_free_pages(int 
  55.246  		put_page(meta[i].frag.page);
  55.247  }
  55.248  
  55.249 -static int netbk_check_gop(int nr_frags, domid_t domid, int count)
  55.250 +/* This is a twin to netbk_gop_skb.  Assume that netbk_gop_skb was
  55.251 +   used to set up the operations on the top of
  55.252 +   netrx_pending_operations, which have since been done.  Check that
  55.253 +   they didn't give any errors and advance over them. */
  55.254 +static int netbk_check_gop(int nr_frags, domid_t domid,
  55.255 +			   struct netrx_pending_operations *npo)
  55.256  {
  55.257 -	multicall_entry_t *mcl = rx_mcl + count;
  55.258 -	gnttab_transfer_t *gop = grant_rx_op + count;
  55.259 +	multicall_entry_t *mcl;
  55.260 +	gnttab_transfer_t *gop;
  55.261 +	gnttab_copy_t     *copy_op;
  55.262  	int status = NETIF_RSP_OKAY;
  55.263  	int i;
  55.264  
  55.265  	for (i = 0; i <= nr_frags; i++) {
  55.266 -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  55.267 -			/* The update_va_mapping() must not fail. */
  55.268 -			BUG_ON(mcl->result != 0);
  55.269 -			mcl++;
  55.270 -		}
  55.271 +		if (npo->meta[npo->meta_cons + i].copy) {
  55.272 +			copy_op = npo->copy + npo->copy_cons++;
  55.273 +			if (copy_op->status != GNTST_okay) {
  55.274 +				DPRINTK("Bad status %d from copy to DOM%d.\n",
  55.275 +					gop->status, domid);
  55.276 +				status = NETIF_RSP_ERROR;
  55.277 +			}
  55.278 +		} else {
  55.279 +			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  55.280 +				mcl = npo->mcl + npo->mcl_cons++;
  55.281 +				/* The update_va_mapping() must not fail. */
  55.282 +				BUG_ON(mcl->result != 0);
  55.283 +			}
  55.284  
  55.285 -		/* Check the reassignment error code. */
  55.286 -		if (gop->status != 0) { 
  55.287 -			DPRINTK("Bad status %d from grant transfer to DOM%u\n",
  55.288 -				gop->status, domid);
  55.289 -			/*
  55.290 -			 * Page no longer belongs to us unless GNTST_bad_page,
  55.291 -			 * but that should be a fatal error anyway.
  55.292 -			 */
  55.293 -			BUG_ON(gop->status == GNTST_bad_page);
  55.294 -			status = NETIF_RSP_ERROR; 
  55.295 +			gop = npo->trans + npo->trans_cons++;
  55.296 +			/* Check the reassignment error code. */
  55.297 +			if (gop->status != 0) {
  55.298 +				DPRINTK("Bad status %d from grant transfer to DOM%u\n",
  55.299 +					gop->status, domid);
  55.300 +				/*
  55.301 +				 * Page no longer belongs to us unless
  55.302 +				 * GNTST_bad_page, but that should be
  55.303 +				 * a fatal error anyway.
  55.304 +				 */
  55.305 +				BUG_ON(gop->status == GNTST_bad_page);
  55.306 +				status = NETIF_RSP_ERROR;
  55.307 +			}
  55.308  		}
  55.309 -		gop++;
  55.310  	}
  55.311  
  55.312  	return status;
  55.313 @@ -420,23 +507,27 @@ static void netbk_add_frag_responses(net
  55.314  				     struct netbk_rx_meta *meta, int nr_frags)
  55.315  {
  55.316  	int i;
  55.317 +	unsigned long offset;
  55.318  
  55.319  	for (i = 0; i < nr_frags; i++) {
  55.320  		int id = meta[i].id;
  55.321  		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
  55.322  
  55.323 -		make_rx_response(netif, id, status, meta[i].frag.page_offset,
  55.324 +		if (meta[i].copy)
  55.325 +			offset = 0;
  55.326 +		else
  55.327 +			offset = meta[i].frag.page_offset;
  55.328 +		make_rx_response(netif, id, status, offset,
  55.329  				 meta[i].frag.size, flags);
  55.330  	}
  55.331  }
  55.332  
  55.333  static void net_rx_action(unsigned long unused)
  55.334  {
  55.335 -	netif_t *netif = NULL; 
  55.336 +	netif_t *netif = NULL;
  55.337  	s8 status;
  55.338  	u16 id, irq, flags;
  55.339  	netif_rx_response_t *resp;
  55.340 -	struct netif_extra_info *extra;
  55.341  	multicall_entry_t *mcl;
  55.342  	struct sk_buff_head rxq;
  55.343  	struct sk_buff *skb;
  55.344 @@ -444,14 +535,27 @@ static void net_rx_action(unsigned long 
  55.345  	int ret;
  55.346  	int nr_frags;
  55.347  	int count;
  55.348 +	unsigned long offset;
  55.349  
  55.350  	/*
  55.351  	 * Putting hundreds of bytes on the stack is considered rude.
  55.352  	 * Static works because a tasklet can only be on one CPU at any time.
  55.353  	 */
  55.354 +	static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
  55.355 +	static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
  55.356 +	static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
  55.357 +	static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
  55.358 +	static unsigned char rx_notify[NR_IRQS];
  55.359  	static u16 notify_list[NET_RX_RING_SIZE];
  55.360  	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
  55.361  
  55.362 +	struct netrx_pending_operations npo = {
  55.363 +		mmu: rx_mmu,
  55.364 +		trans: grant_trans_op,
  55.365 +		copy: grant_copy_op,
  55.366 +		mcl: rx_mcl,
  55.367 +		meta: meta};
  55.368 +
  55.369  	skb_queue_head_init(&rxq);
  55.370  
  55.371  	count = 0;
  55.372 @@ -471,7 +575,7 @@ static void net_rx_action(unsigned long 
  55.373  			break;
  55.374  		}
  55.375  
  55.376 -		netbk_gop_skb(skb, meta, count);
  55.377 +		netbk_gop_skb(skb, &npo);
  55.378  
  55.379  		count += nr_frags + 1;
  55.380  
  55.381 @@ -482,43 +586,74 @@ static void net_rx_action(unsigned long 
  55.382  			break;
  55.383  	}
  55.384  
  55.385 -	if (!count)
  55.386 -		return;
  55.387 +	if (npo.mcl_prod &&
  55.388 +	    !xen_feature(XENFEAT_auto_translated_physmap)) {
  55.389 +		mcl = npo.mcl + npo.mcl_prod++;
  55.390  
  55.391 -	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  55.392 -		mcl = rx_mcl + count;
  55.393 -
  55.394 +		BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
  55.395  		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
  55.396  
  55.397  		mcl->op = __HYPERVISOR_mmu_update;
  55.398  		mcl->args[0] = (unsigned long)rx_mmu;
  55.399 -		mcl->args[1] = count;
  55.400 +		mcl->args[1] = npo.mmu_prod;
  55.401  		mcl->args[2] = 0;
  55.402  		mcl->args[3] = DOMID_SELF;
  55.403 -
  55.404 -		ret = HYPERVISOR_multicall(rx_mcl, count + 1);
  55.405 -		BUG_ON(ret != 0);
  55.406  	}
  55.407  
  55.408 -	ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, count);
  55.409 +	if (npo.trans_prod) {
  55.410 +		mcl = npo.mcl + npo.mcl_prod++;
  55.411 +		mcl->op = __HYPERVISOR_grant_table_op;
  55.412 +		mcl->args[0] = GNTTABOP_transfer;
  55.413 +		mcl->args[1] = (unsigned long)grant_trans_op;
  55.414 +		mcl->args[2] = npo.trans_prod;
  55.415 +	}
  55.416 +
  55.417 +	if (npo.copy_prod) {
  55.418 +		mcl = npo.mcl + npo.mcl_prod++;
  55.419 +		mcl->op = __HYPERVISOR_grant_table_op;
  55.420 +		mcl->args[0] = GNTTABOP_copy;
  55.421 +		mcl->args[1] = (unsigned long)grant_copy_op;
  55.422 +		mcl->args[2] = npo.copy_prod;
  55.423 +	}
  55.424 +
  55.425 +	/* Nothing to do? */
  55.426 +	if (!npo.mcl_prod)
  55.427 +		return;
  55.428 +
  55.429 +	BUG_ON(npo.copy_prod > NET_RX_RING_SIZE);
  55.430 +	BUG_ON(npo.mmu_prod > NET_RX_RING_SIZE);
  55.431 +	BUG_ON(npo.trans_prod > NET_RX_RING_SIZE);
  55.432 +	BUG_ON(npo.mcl_prod > NET_RX_RING_SIZE+3);
  55.433 +	BUG_ON(npo.meta_prod > NET_RX_RING_SIZE);
  55.434 +
  55.435 +	ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
  55.436  	BUG_ON(ret != 0);
  55.437  
  55.438 -	count = 0;
  55.439  	while ((skb = __skb_dequeue(&rxq)) != NULL) {
  55.440  		nr_frags = *(int *)skb->cb;
  55.441  
  55.442 -		atomic_set(&(skb_shinfo(skb)->dataref), 1);
  55.443 -		skb_shinfo(skb)->nr_frags = 0;
  55.444 -		skb_shinfo(skb)->frag_list = NULL;
  55.445 +		netif = netdev_priv(skb->dev);
  55.446 +		/* We can't rely on skb_release_data to release the
  55.447 +		   pages used by fragments for us, since it tries to
  55.448 +		   touch the pages in the fraglist.  If we're in
  55.449 +		   flipping mode, that doesn't work.  In copying mode,
  55.450 +		   we still have access to all of the pages, and so
  55.451 +		   it's safe to let release_data deal with it. */
  55.452 +		/* (Freeing the fragments is safe since we copy
  55.453 +		   non-linear skbs destined for flipping interfaces) */
  55.454 +		if (!netif->copying_receiver) {
  55.455 +			atomic_set(&(skb_shinfo(skb)->dataref), 1);
  55.456 +			skb_shinfo(skb)->frag_list = NULL;
  55.457 +			skb_shinfo(skb)->nr_frags = 0;
  55.458 +			netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
  55.459 +		}
  55.460  
  55.461 -		netif = netdev_priv(skb->dev);
  55.462  		netif->stats.tx_bytes += skb->len;
  55.463  		netif->stats.tx_packets++;
  55.464  
  55.465 -		netbk_free_pages(nr_frags, meta + count + 1);
  55.466 -		status = netbk_check_gop(nr_frags, netif->domid, count);
  55.467 +		status = netbk_check_gop(nr_frags, netif->domid, &npo);
  55.468  
  55.469 -		id = meta[count].id;
  55.470 +		id = meta[npo.meta_cons].id;
  55.471  		flags = nr_frags ? NETRXF_more_data : 0;
  55.472  
  55.473  		if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
  55.474 @@ -526,34 +661,32 @@ static void net_rx_action(unsigned long 
  55.475  		else if (skb->proto_data_valid) /* remote but checksummed? */
  55.476  			flags |= NETRXF_data_validated;
  55.477  
  55.478 -		resp = make_rx_response(netif, id, status,
  55.479 -					offset_in_page(skb->data),
  55.480 +		if (meta[npo.meta_cons].copy)
  55.481 +			offset = 0;
  55.482 +		else
  55.483 +			offset = offset_in_page(skb->data);
  55.484 +		resp = make_rx_response(netif, id, status, offset,
  55.485  					skb_headlen(skb), flags);
  55.486  
  55.487 -		extra = NULL;
  55.488 -
  55.489 -		if (meta[count].frag.size) {
  55.490 +		if (meta[npo.meta_cons].frag.size) {
  55.491  			struct netif_extra_info *gso =
  55.492  				(struct netif_extra_info *)
  55.493  				RING_GET_RESPONSE(&netif->rx,
  55.494  						  netif->rx.rsp_prod_pvt++);
  55.495  
  55.496 -			if (extra)
  55.497 -				extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
  55.498 -			else
  55.499 -				resp->flags |= NETRXF_extra_info;
  55.500 +			resp->flags |= NETRXF_extra_info;
  55.501  
  55.502 -			gso->u.gso.size = meta[count].frag.size;
  55.503 +			gso->u.gso.size = meta[npo.meta_cons].frag.size;
  55.504  			gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
  55.505  			gso->u.gso.pad = 0;
  55.506  			gso->u.gso.features = 0;
  55.507  
  55.508  			gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
  55.509  			gso->flags = 0;
  55.510 -			extra = gso;
  55.511  		}
  55.512  
  55.513 -		netbk_add_frag_responses(netif, status, meta + count + 1,
  55.514 +		netbk_add_frag_responses(netif, status,
  55.515 +					 meta + npo.meta_cons + 1,
  55.516  					 nr_frags);
  55.517  
  55.518  		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
  55.519 @@ -569,7 +702,7 @@ static void net_rx_action(unsigned long 
  55.520  
  55.521  		netif_put(netif);
  55.522  		dev_kfree_skb(skb);
  55.523 -		count += nr_frags + 1;
  55.524 +		npo.meta_cons += nr_frags + 1;
  55.525  	}
  55.526  
  55.527  	while (notify_nr != 0) {
  55.528 @@ -1029,7 +1162,7 @@ static void net_tx_action(unsigned long 
  55.529  		if (unlikely(txreq.size < ETH_HLEN)) {
  55.530  			DPRINTK("Bad packet size: %d\n", txreq.size);
  55.531  			netbk_tx_err(netif, &txreq, i);
  55.532 -			continue; 
  55.533 +			continue;
  55.534  		}
  55.535  
  55.536  		/* No crossing a page as the payload mustn't fragment. */
  55.537 @@ -1085,6 +1218,9 @@ static void net_tx_action(unsigned long 
  55.538  			skb_shinfo(skb)->nr_frags++;
  55.539  			skb_shinfo(skb)->frags[0].page =
  55.540  				(void *)(unsigned long)pending_idx;
  55.541 +		} else {
  55.542 +			/* Discriminate from any valid pending_idx value. */
  55.543 +			skb_shinfo(skb)->frags[0].page = (void *)~0UL;
  55.544  		}
  55.545  
  55.546  		__skb_queue_tail(&tx_queue, skb);
  55.547 @@ -1187,6 +1323,12 @@ static void netif_page_release(struct pa
  55.548  	netif_idx_release(pending_idx);
  55.549  }
  55.550  
  55.551 +static void netif_rx_page_release(struct page *page)
  55.552 +{
  55.553 +	/* Ready for next use. */
  55.554 +	set_page_count(page, 1);
  55.555 +}
  55.556 +
  55.557  irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
  55.558  {
  55.559  	netif_t *netif = dev_id;
  55.560 @@ -1317,6 +1459,16 @@ static int __init netback_init(void)
  55.561  		SetPageForeign(page, netif_page_release);
  55.562  	}
  55.563  
  55.564 +	page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE);
  55.565 +	BUG_ON(page == NULL);
  55.566 +	rx_mmap_area = pfn_to_kaddr(page_to_pfn(page));
  55.567 +
  55.568 +	for (i = 0; i < NET_RX_RING_SIZE; i++) {
  55.569 +		page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE));
  55.570 +		set_page_count(page, 1);
  55.571 +		SetPageForeign(page, netif_rx_page_release);
  55.572 +	}
  55.573 +
  55.574  	pending_cons = 0;
  55.575  	pending_prod = MAX_PENDING_REQS;
  55.576  	for (i = 0; i < MAX_PENDING_REQS; i++)
    56.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Tue Aug 22 14:45:49 2006 -0600
    56.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Wed Aug 23 11:11:27 2006 -0600
    56.3 @@ -108,6 +108,12 @@ static int netback_probe(struct xenbus_d
    56.4  			goto abort_transaction;
    56.5  		}
    56.6  
    56.7 +		err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", "%d", 1);
    56.8 +		if (err) {
    56.9 +			message = "writing feature-copying";
   56.10 +			goto abort_transaction;
   56.11 +		}
   56.12 +
   56.13  		err = xenbus_transaction_end(xbt, 0);
   56.14  	} while (err == -EAGAIN);
   56.15  
   56.16 @@ -228,10 +234,25 @@ static void frontend_changed(struct xenb
   56.17  
   56.18  	switch (frontend_state) {
   56.19  	case XenbusStateInitialising:
   56.20 +		if (dev->state == XenbusStateClosing) {
   56.21 +			printk("%s: %s: prepare for reconnect\n",
   56.22 +			       __FUNCTION__, dev->nodename);
   56.23 +			if (be->netif) {
   56.24 +				netif_disconnect(be->netif);
   56.25 +				be->netif = NULL;
   56.26 +			}
   56.27 +			xenbus_switch_state(dev, XenbusStateInitWait);
   56.28 +		}
   56.29 +		break;
   56.30 +
   56.31  	case XenbusStateInitialised:
   56.32  		break;
   56.33  
   56.34  	case XenbusStateConnected:
   56.35 +		if (!be->netif) {
   56.36 +			/* reconnect: setup be->netif */
   56.37 +			backend_changed(&be->backend_watch, NULL, 0);
   56.38 +		}
   56.39  		maybe_connect(be);
   56.40  		break;
   56.41  
   56.42 @@ -239,14 +260,13 @@ static void frontend_changed(struct xenb
   56.43  		xenbus_switch_state(dev, XenbusStateClosing);
   56.44  		break;
   56.45  
   56.46 +	case XenbusStateUnknown:
   56.47  	case XenbusStateClosed:
   56.48  		if (be->netif != NULL)
   56.49  			kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
   56.50  		device_unregister(&dev->dev);
   56.51  		break;
   56.52  
   56.53 -	case XenbusStateUnknown:
   56.54 -	case XenbusStateInitWait:
   56.55  	default:
   56.56  		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
   56.57  				 frontend_state);
   56.58 @@ -349,7 +369,7 @@ static int connect_rings(struct backend_
   56.59  {
   56.60  	struct xenbus_device *dev = be->dev;
   56.61  	unsigned long tx_ring_ref, rx_ring_ref;
   56.62 -	unsigned int evtchn;
   56.63 +	unsigned int evtchn, rx_copy;
   56.64  	int err;
   56.65  	int val;
   56.66  
   56.67 @@ -366,6 +386,19 @@ static int connect_rings(struct backend_
   56.68  		return err;
   56.69  	}
   56.70  
   56.71 +	err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
   56.72 +			   &rx_copy);
   56.73 +	if (err == -ENOENT) {
   56.74 +		err = 0;
   56.75 +		rx_copy = 0;
   56.76 +	}
   56.77 +	if (err < 0) {
   56.78 +		xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
   56.79 +				 dev->otherend);
   56.80 +		return err;
   56.81 +	}
   56.82 +	be->netif->copying_receiver = !!rx_copy;
   56.83 +
   56.84  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d",
   56.85  			 &val) < 0)
   56.86  		val = 0;
    57.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Tue Aug 22 14:45:49 2006 -0600
    57.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Wed Aug 23 11:11:27 2006 -0600
    57.3 @@ -58,12 +58,27 @@
    57.4  #include <xen/interface/memory.h>
    57.5  #include <xen/balloon.h>
    57.6  #include <asm/page.h>
    57.7 +#include <asm/maddr.h>
    57.8  #include <asm/uaccess.h>
    57.9  #include <xen/interface/grant_table.h>
   57.10  #include <xen/gnttab.h>
   57.11  
   57.12  #define RX_COPY_THRESHOLD 256
   57.13  
   57.14 +/* If we don't have GSO, fake things up so that we never try to use it. */
   57.15 +#ifndef NETIF_F_GSO
   57.16 +#define netif_needs_gso(dev, skb)	0
   57.17 +#define dev_disable_gso_features(dev)	((void)0)
   57.18 +#else
   57.19 +#define HAVE_GSO			1
   57.20 +static inline void dev_disable_gso_features(struct net_device *dev)
   57.21 +{
   57.22 +	/* Turn off all GSO bits except ROBUST. */
   57.23 +	dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
   57.24 +	dev->features |= NETIF_F_GSO_ROBUST;
   57.25 +}
   57.26 +#endif
   57.27 +
   57.28  #define GRANT_INVALID_REF	0
   57.29  
   57.30  #define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
   57.31 @@ -83,6 +98,7 @@ struct netfront_info {
   57.32  
   57.33  	unsigned int handle;
   57.34  	unsigned int evtchn, irq;
   57.35 +	unsigned int copying_receiver;
   57.36  
   57.37  	/* Receive-ring batched refills. */
   57.38  #define RX_MIN_TARGET 8
   57.39 @@ -171,7 +187,7 @@ static inline grant_ref_t xennet_get_rx_
   57.40  
   57.41  static int talk_to_backend(struct xenbus_device *, struct netfront_info *);
   57.42  static int setup_device(struct xenbus_device *, struct netfront_info *);
   57.43 -static struct net_device *create_netdev(int, struct xenbus_device *);
   57.44 +static struct net_device *create_netdev(int, int, struct xenbus_device *);
   57.45  
   57.46  static void netfront_closing(struct xenbus_device *);
   57.47  
   57.48 @@ -213,6 +229,7 @@ static int __devinit netfront_probe(stru
   57.49  	struct net_device *netdev;
   57.50  	struct netfront_info *info;
   57.51  	unsigned int handle;
   57.52 +	unsigned feature_rx_copy;
   57.53  
   57.54  	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
   57.55  	if (err != 1) {
   57.56 @@ -220,7 +237,22 @@ static int __devinit netfront_probe(stru
   57.57  		return err;
   57.58  	}
   57.59  
   57.60 -	netdev = create_netdev(handle, dev);
   57.61 +#ifndef CONFIG_XEN
   57.62 +	err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u",
   57.63 +			   &feature_rx_copy);
   57.64 +	if (err != 1) {
   57.65 +		xenbus_dev_fatal(dev, err, "reading feature-rx-copy");
   57.66 +		return err;
   57.67 +	}
   57.68 +	if (!feature_rx_copy) {
   57.69 +		xenbus_dev_fatal(dev, 0, "need a copy-capable backend");
   57.70 +		return -EINVAL;
   57.71 +	}
   57.72 +#else
   57.73 +	feature_rx_copy = 0;
   57.74 +#endif
   57.75 +
   57.76 +	netdev = create_netdev(handle, feature_rx_copy, dev);
   57.77  	if (IS_ERR(netdev)) {
   57.78  		err = PTR_ERR(netdev);
   57.79  		xenbus_dev_fatal(dev, err, "creating netdev");
   57.80 @@ -326,6 +358,13 @@ again:
   57.81  		goto abort_transaction;
   57.82  	}
   57.83  
   57.84 +	err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
   57.85 +			    info->copying_receiver);
   57.86 +	if (err) {
   57.87 +		message = "writing request-rx-copy";
   57.88 +		goto abort_transaction;
   57.89 +	}
   57.90 +
   57.91  	err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
   57.92  	if (err) {
   57.93  		message = "writing feature-rx-notify";
   57.94 @@ -338,11 +377,13 @@ again:
   57.95  		goto abort_transaction;
   57.96  	}
   57.97  
   57.98 +#ifdef HAVE_GSO
   57.99  	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
  57.100  	if (err) {
  57.101  		message = "writing feature-gso-tcpv4";
  57.102  		goto abort_transaction;
  57.103  	}
  57.104 +#endif
  57.105  
  57.106  	err = xenbus_transaction_end(xbt, 0);
  57.107  	if (err) {
  57.108 @@ -415,7 +456,8 @@ static int setup_device(struct xenbus_de
  57.109  
  57.110  	memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
  57.111  	err = bind_evtchn_to_irqhandler(info->evtchn, netif_int,
  57.112 -					SA_SAMPLE_RANDOM, netdev->name, netdev);
  57.113 +					SA_SAMPLE_RANDOM, netdev->name,
  57.114 +					netdev);
  57.115  	if (err < 0)
  57.116  		goto fail;
  57.117  	info->irq = err;
  57.118 @@ -494,11 +536,14 @@ static int network_open(struct net_devic
  57.119  
  57.120  	memset(&np->stats, 0, sizeof(np->stats));
  57.121  
  57.122 -	network_alloc_rx_buffers(dev);
  57.123 -	np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
  57.124 -
  57.125 -	if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
  57.126 -		netif_rx_schedule(dev);
  57.127 +	spin_lock(&np->rx_lock);
  57.128 +	if (netif_carrier_ok(dev)) {
  57.129 +		network_alloc_rx_buffers(dev);
  57.130 +		np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
  57.131 +		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
  57.132 +			netif_rx_schedule(dev);
  57.133 +	}
  57.134 +	spin_unlock(&np->rx_lock);
  57.135  
  57.136  	netif_start_queue(dev);
  57.137  
  57.138 @@ -527,8 +572,7 @@ static void network_tx_buf_gc(struct net
  57.139  	struct netfront_info *np = netdev_priv(dev);
  57.140  	struct sk_buff *skb;
  57.141  
  57.142 -	if (unlikely(!netif_carrier_ok(dev)))
  57.143 -		return;
  57.144 +	BUG_ON(!netif_carrier_ok(dev));
  57.145  
  57.146  	do {
  57.147  		prod = np->tx.sring->rsp_prod;
  57.148 @@ -597,6 +641,8 @@ static void network_alloc_rx_buffers(str
  57.149  	grant_ref_t ref;
  57.150   	unsigned long pfn;
  57.151   	void *vaddr;
  57.152 +	int nr_flips;
  57.153 +	netif_rx_request_t *req;
  57.154  
  57.155  	if (unlikely(!netif_carrier_ok(dev)))
  57.156  		return;
  57.157 @@ -609,9 +655,14 @@ static void network_alloc_rx_buffers(str
  57.158  	 */
  57.159  	batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
  57.160  	for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
  57.161 -		/* Allocate an skb and a page. */
  57.162 -		skb = __dev_alloc_skb(RX_COPY_THRESHOLD,
  57.163 -				      GFP_ATOMIC | __GFP_NOWARN);
  57.164 +		/*
  57.165 +		 * Allocate an skb and a page. Do not use __dev_alloc_skb as
  57.166 +		 * that will allocate page-sized buffers which is not
  57.167 +		 * necessary here.
  57.168 +		 * 16 bytes added as necessary headroom for netif_receive_skb.
  57.169 +		 */
  57.170 +		skb = alloc_skb(RX_COPY_THRESHOLD + 16,
  57.171 +				GFP_ATOMIC | __GFP_NOWARN);
  57.172  		if (unlikely(!skb))
  57.173  			goto no_skb;
  57.174  
  57.175 @@ -628,6 +679,7 @@ no_skb:
  57.176  			break;
  57.177  		}
  57.178  
  57.179 +		skb_reserve(skb, 16); /* mimic dev_alloc_skb() */
  57.180  		skb_shinfo(skb)->frags[0].page = page;
  57.181  		skb_shinfo(skb)->nr_frags = 1;
  57.182  		__skb_queue_tail(&np->rx_batch, skb);
  57.183 @@ -646,7 +698,7 @@ no_skb:
  57.184  		np->rx_target = np->rx_max_target;
  57.185  
  57.186   refill:
  57.187 -	for (i = 0; ; i++) {
  57.188 +	for (nr_flips = i = 0; ; i++) {
  57.189  		if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
  57.190  			break;
  57.191  
  57.192 @@ -657,7 +709,6 @@ no_skb:
  57.193  		BUG_ON(np->rx_skbs[id]);
  57.194  		np->rx_skbs[id] = skb;
  57.195  
  57.196 -		RING_GET_REQUEST(&np->rx, req_prod + i)->id = id;
  57.197  		ref = gnttab_claim_grant_reference(&np->gref_rx_head);
  57.198  		BUG_ON((signed short)ref < 0);
  57.199  		np->grant_rx_ref[id] = ref;
  57.200 @@ -665,49 +716,68 @@ no_skb:
  57.201  		pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
  57.202  		vaddr = page_address(skb_shinfo(skb)->frags[0].page);
  57.203  
  57.204 -		gnttab_grant_foreign_transfer_ref(ref,
  57.205 -						  np->xbdev->otherend_id, pfn);
  57.206 -		RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
  57.207 -		np->rx_pfn_array[i] = pfn_to_mfn(pfn);
  57.208 +		req = RING_GET_REQUEST(&np->rx, req_prod + i);
  57.209 +		if (!np->copying_receiver) {
  57.210 +			gnttab_grant_foreign_transfer_ref(ref,
  57.211 +							  np->xbdev->otherend_id,
  57.212 +							  pfn);
  57.213 +			np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
  57.214 +			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  57.215 +				/* Remove this page before passing
  57.216 +				 * back to Xen. */
  57.217 +				set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
  57.218 +				MULTI_update_va_mapping(np->rx_mcl+i,
  57.219 +							(unsigned long)vaddr,
  57.220 +							__pte(0), 0);
  57.221 +			}
  57.222 +			nr_flips++;
  57.223 +		} else {
  57.224 +			gnttab_grant_foreign_access_ref(ref,
  57.225 +							np->xbdev->otherend_id,
  57.226 +							pfn,
  57.227 +							0);
  57.228 +		}
  57.229  
  57.230 -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  57.231 -			/* Remove this page before passing back to Xen. */
  57.232 -			set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
  57.233 -			MULTI_update_va_mapping(np->rx_mcl+i,
  57.234 -						(unsigned long)vaddr,
  57.235 -						__pte(0), 0);
  57.236 -		}
  57.237 +		req->id = id;
  57.238 +		req->gref = ref;
  57.239  	}
  57.240  
  57.241 -	/* Tell the ballon driver what is going on. */
  57.242 -	balloon_update_driver_allowance(i);
  57.243 -
  57.244 -	set_xen_guest_handle(reservation.extent_start, np->rx_pfn_array);
  57.245 -	reservation.nr_extents   = i;
  57.246 -	reservation.extent_order = 0;
  57.247 -	reservation.address_bits = 0;
  57.248 -	reservation.domid        = DOMID_SELF;
  57.249 -
  57.250 -	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  57.251 -		/* After all PTEs have been zapped, flush the TLB. */
  57.252 -		np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
  57.253 -			UVMF_TLB_FLUSH|UVMF_ALL;
  57.254 +	if ( nr_flips != 0 ) {
  57.255 +		/* Tell the ballon driver what is going on. */
  57.256 +		balloon_update_driver_allowance(i);
  57.257  
  57.258 -		/* Give away a batch of pages. */
  57.259 -		np->rx_mcl[i].op = __HYPERVISOR_memory_op;
  57.260 -		np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
  57.261 -		np->rx_mcl[i].args[1] = (unsigned long)&reservation;
  57.262 +		set_xen_guest_handle(reservation.extent_start,
  57.263 +				     np->rx_pfn_array);
  57.264 +		reservation.nr_extents   = nr_flips;
  57.265 +		reservation.extent_order = 0;
  57.266 +		reservation.address_bits = 0;
  57.267 +		reservation.domid        = DOMID_SELF;
  57.268  
  57.269 -		/* Zap PTEs and give away pages in one big multicall. */
  57.270 -		(void)HYPERVISOR_multicall(np->rx_mcl, i+1);
  57.271 +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  57.272 +			/* After all PTEs have been zapped, flush the TLB. */
  57.273 +			np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
  57.274 +				UVMF_TLB_FLUSH|UVMF_ALL;
  57.275  
  57.276 -		/* Check return status of HYPERVISOR_memory_op(). */
  57.277 -		if (unlikely(np->rx_mcl[i].result != i))
  57.278 -			panic("Unable to reduce memory reservation\n");
  57.279 -	} else
  57.280 -		if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
  57.281 -					 &reservation) != i)
  57.282 -			panic("Unable to reduce memory reservation\n");
  57.283 +			/* Give away a batch of pages. */
  57.284 +			np->rx_mcl[i].op = __HYPERVISOR_memory_op;
  57.285 +			np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
  57.286 +			np->rx_mcl[i].args[1] = (unsigned long)&reservation;
  57.287 +
  57.288 +			/* Zap PTEs and give away pages in one big
  57.289 +			 * multicall. */
  57.290 +			(void)HYPERVISOR_multicall(np->rx_mcl, i+1);
  57.291 +
  57.292 +			/* Check return status of HYPERVISOR_memory_op(). */
  57.293 +			if (unlikely(np->rx_mcl[i].result != i))
  57.294 +				panic("Unable to reduce memory reservation\n");
  57.295 +		} else {
  57.296 +			if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
  57.297 +						 &reservation) != i)
  57.298 +				panic("Unable to reduce memory reservation\n");
  57.299 +		}
  57.300 +	} else {
  57.301 +		wmb();
  57.302 +	}
  57.303  
  57.304  	/* Above is a suitable barrier to ensure backend will see requests. */
  57.305  	np->rx.req_prod_pvt = req_prod + i;
  57.306 @@ -834,9 +904,12 @@ static int network_start_xmit(struct sk_
  57.307  
  57.308  	if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
  57.309  		tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
  57.310 +#ifdef CONFIG_XEN
  57.311  	if (skb->proto_data_valid) /* remote but checksummed? */
  57.312  		tx->flags |= NETTXF_data_validated;
  57.313 +#endif
  57.314  
  57.315 +#ifdef HAVE_GSO
  57.316  	if (skb_shinfo(skb)->gso_size) {
  57.317  		struct netif_extra_info *gso = (struct netif_extra_info *)
  57.318  			RING_GET_REQUEST(&np->tx, ++i);
  57.319 @@ -855,6 +928,7 @@ static int network_start_xmit(struct sk_
  57.320  		gso->flags = 0;
  57.321  		extra = gso;
  57.322  	}
  57.323 +#endif
  57.324  
  57.325  	np->tx.req_prod_pvt = i + 1;
  57.326  
  57.327 @@ -890,12 +964,15 @@ static irqreturn_t netif_int(int irq, vo
  57.328  	unsigned long flags;
  57.329  
  57.330  	spin_lock_irqsave(&np->tx_lock, flags);
  57.331 -	network_tx_buf_gc(dev);
  57.332 -	spin_unlock_irqrestore(&np->tx_lock, flags);
  57.333  
  57.334 -	if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx) &&
  57.335 -	    likely(netif_running(dev)))
  57.336 -		netif_rx_schedule(dev);
  57.337 +	if (likely(netif_carrier_ok(dev))) {
  57.338 +		network_tx_buf_gc(dev);
  57.339 +		/* Under tx_lock: protects access to rx shared-ring indexes. */
  57.340 +		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
  57.341 +			netif_rx_schedule(dev);
  57.342 +	}
  57.343 +
  57.344 +	spin_unlock_irqrestore(&np->tx_lock, flags);
  57.345  
  57.346  	return IRQ_HANDLED;
  57.347  }
  57.348 @@ -941,8 +1018,10 @@ int xennet_get_extras(struct netfront_in
  57.349  				WPRINTK("Invalid extra type: %d\n",
  57.350  					extra->type);
  57.351  			err = -EINVAL;
  57.352 -		} else
  57.353 -			memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
  57.354 +		} else {
  57.355 +			memcpy(&extras[extra->type - 1], extra,
  57.356 +			       sizeof(*extra));
  57.357 +		}
  57.358  
  57.359  		skb = xennet_get_rx_skb(np, cons);
  57.360  		ref = xennet_get_rx_ref(np, cons);
  57.361 @@ -955,10 +1034,12 @@ int xennet_get_extras(struct netfront_in
  57.362  
  57.363  static int xennet_get_responses(struct netfront_info *np,
  57.364  				struct netfront_rx_info *rinfo, RING_IDX rp,
  57.365 -				struct sk_buff_head *list, int count)
  57.366 +				struct sk_buff_head *list,
  57.367 +				int *pages_flipped_p)
  57.368  {
  57.369 -	struct mmu_update *mmu = np->rx_mmu + count;
  57.370 -	struct multicall_entry *mcl = np->rx_mcl + count;
  57.371 +	int pages_flipped = *pages_flipped_p;
  57.372 +	struct mmu_update *mmu;
  57.373 +	struct multicall_entry *mcl;
  57.374  	struct netif_rx_response *rx = &rinfo->rx;
  57.375  	struct netif_extra_info *extras = rinfo->extras;
  57.376  	RING_IDX cons = np->rx.rsp_cons;
  57.377 @@ -967,6 +1048,7 @@ static int xennet_get_responses(struct n
  57.378  	int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
  57.379  	int frags = 1;
  57.380  	int err = 0;
  57.381 +	unsigned long ret;
  57.382  
  57.383  	if (rx->flags & NETRXF_extra_info) {
  57.384  		err = xennet_get_extras(np, extras, rp);
  57.385 @@ -982,6 +1064,7 @@ static int xennet_get_responses(struct n
  57.386  				WPRINTK("rx->offset: %x, size: %u\n",
  57.387  					rx->offset, rx->status);
  57.388  			err = -EINVAL;
  57.389 +			goto next;
  57.390  		}
  57.391  
  57.392  		/*
  57.393 @@ -995,36 +1078,48 @@ static int xennet_get_responses(struct n
  57.394  			goto next;
  57.395  		}
  57.396  
  57.397 -		/* Memory pressure, insufficient buffer headroom, ... */
  57.398 -		if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
  57.399 -			if (net_ratelimit())
  57.400 -				WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
  57.401 -					rx->id, rx->status);
  57.402 -			xennet_move_rx_slot(np, skb, ref);
  57.403 -			err = -ENOMEM;
  57.404 -			goto next;
  57.405 +		if (!np->copying_receiver) {
  57.406 +			/* Memory pressure, insufficient buffer
  57.407 +			 * headroom, ... */
  57.408 +			if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
  57.409 +				if (net_ratelimit())
  57.410 +					WPRINTK("Unfulfilled rx req "
  57.411 +						"(id=%d, st=%d).\n",
  57.412 +						rx->id, rx->status);
  57.413 +				xennet_move_rx_slot(np, skb, ref);
  57.414 +				err = -ENOMEM;
  57.415 +				goto next;
  57.416 +			}
  57.417 +
  57.418 +			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  57.419 +				/* Remap the page. */
  57.420 +				struct page *page =
  57.421 +					skb_shinfo(skb)->frags[0].page;
  57.422 +				unsigned long pfn = page_to_pfn(page);
  57.423 +				void *vaddr = page_address(page);
  57.424 +
  57.425 +				mcl = np->rx_mcl + pages_flipped;
  57.426 +				mmu = np->rx_mmu + pages_flipped;
  57.427 +
  57.428 +				MULTI_update_va_mapping(mcl,
  57.429 +							(unsigned long)vaddr,
  57.430 +							pfn_pte_ma(mfn,
  57.431 +								   PAGE_KERNEL),
  57.432 +							0);
  57.433 +				mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
  57.434 +					| MMU_MACHPHYS_UPDATE;
  57.435 +				mmu->val = pfn;
  57.436 +
  57.437 +				set_phys_to_machine(pfn, mfn);
  57.438 +			}
  57.439 +			pages_flipped++;
  57.440 +		} else {
  57.441 +			ret = gnttab_end_foreign_access_ref(ref, 0);
  57.442 +			BUG_ON(!ret);
  57.443  		}
  57.444  
  57.445  		gnttab_release_grant_reference(&np->gref_rx_head, ref);
  57.446  
  57.447 -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  57.448 -			/* Remap the page. */
  57.449 -			struct page *page = skb_shinfo(skb)->frags[0].page;
  57.450 -			unsigned long pfn = page_to_pfn(page);
  57.451 -			void *vaddr = page_address(page);
  57.452 -
  57.453 -			MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
  57.454 -						pfn_pte_ma(mfn, PAGE_KERNEL),
  57.455 -						0);
  57.456 -			mcl++;
  57.457 -			mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
  57.458 -				| MMU_MACHPHYS_UPDATE;
  57.459 -			mmu->val = pfn;
  57.460 -			mmu++;
  57.461 -
  57.462 -			set_phys_to_machine(pfn, mfn);
  57.463 -		}
  57.464 -
  57.465  		__skb_queue_tail(list, skb);
  57.466  
  57.467  next:
  57.468 @@ -1050,6 +1145,8 @@ next:
  57.469  		err = -E2BIG;
  57.470  	}
  57.471  
  57.472 +	*pages_flipped_p = pages_flipped;
  57.473 +
  57.474  	return err;
  57.475  }
  57.476  
  57.477 @@ -1084,7 +1181,8 @@ static RING_IDX xennet_fill_frags(struct
  57.478  	return cons;
  57.479  }
  57.480  
  57.481 -static int xennet_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
  57.482 +static int xennet_set_skb_gso(struct sk_buff *skb,
  57.483 +			      struct netif_extra_info *gso)
  57.484  {
  57.485  	if (!gso->u.gso.size) {
  57.486  		if (net_ratelimit())
  57.487 @@ -1099,6 +1197,7 @@ static int xennet_set_skb_gso(struct sk_
  57.488  		return -EINVAL;
  57.489  	}
  57.490  
  57.491 +#ifdef HAVE_GSO
  57.492  	skb_shinfo(skb)->gso_size = gso->u.gso.size;
  57.493  	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
  57.494  
  57.495 @@ -1107,6 +1206,11 @@ static int xennet_set_skb_gso(struct sk_
  57.496  	skb_shinfo(skb)->gso_segs = 0;
  57.497  
  57.498  	return 0;
  57.499 +#else
  57.500 +	if (net_ratelimit())
  57.501 +		WPRINTK("GSO unsupported by this kernel.\n");
  57.502 +	return -EINVAL;
  57.503 +#endif
  57.504  }
  57.505  
  57.506  static int netif_poll(struct net_device *dev, int *pbudget)
  57.507 @@ -1124,7 +1228,7 @@ static int netif_poll(struct net_device 
  57.508  	struct sk_buff_head tmpq;
  57.509  	unsigned long flags;
  57.510  	unsigned int len;
  57.511 -	int pages_done;
  57.512 +	int pages_flipped = 0;
  57.513  	int err;
  57.514  
  57.515  	spin_lock(&np->rx_lock);
  57.516 @@ -1143,14 +1247,14 @@ static int netif_poll(struct net_device 
  57.517  	rp = np->rx.sring->rsp_prod;
  57.518  	rmb(); /* Ensure we see queued responses up to 'rp'. */
  57.519  
  57.520 -	for (i = np->rx.rsp_cons, work_done = 0, pages_done = 0;
  57.521 +	for (i = np->rx.rsp_cons, work_done = 0;
  57.522  	     (i != rp) && (work_done < budget);
  57.523  	     np->rx.rsp_cons = ++i, work_done++) {
  57.524  		memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
  57.525  		memset(extras, 0, sizeof(extras));
  57.526  
  57.527 -		err = xennet_get_responses(np, &rinfo, rp, &tmpq, pages_done);
  57.528 -		pages_done += skb_queue_len(&tmpq);
  57.529 +		err = xennet_get_responses(np, &rinfo, rp, &tmpq,
  57.530 +					   &pages_flipped);
  57.531  
  57.532  		if (unlikely(err)) {
  57.533  err:
  57.534 @@ -1193,40 +1297,63 @@ err:
  57.535  		}
  57.536  
  57.537  		i = xennet_fill_frags(np, skb, &tmpq);
  57.538 -		skb->truesize += skb->data_len;
  57.539 +
  57.540 +		/*
  57.541 +		 * Truesize must approximates the size of true data plus
  57.542 +		 * any supervisor overheads. Adding hypervisor overheads
  57.543 +		 * has been shown to significantly reduce achievable
  57.544 +		 * bandwidth with the default receive buffer size. It is
  57.545 +		 * therefore not wise to account for it here.
  57.546 +		 *
  57.547 +		 * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
  57.548 +		 * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
  57.549 +		 * add the size of the data pulled in xennet_fill_frags().
  57.550 +		 *
  57.551 +		 * We also adjust for any unused space in the main data
  57.552 +		 * area by subtracting (RX_COPY_THRESHOLD - len). This is
  57.553 +		 * especially important with drivers which split incoming
  57.554 +		 * packets into header and data, using only 66 bytes of
  57.555 +		 * the main data area (see the e1000 driver for example.)
  57.556 +		 * On such systems, without this last adjustement, our
  57.557 +		 * achievable receive throughout using the standard receive
  57.558 +		 * buffer size was cut by 25%(!!!).
  57.559 +		 */
  57.560 +		skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
  57.561  		skb->len += skb->data_len;
  57.562  
  57.563  		/*
  57.564  		 * Old backends do not assert data_validated but we
  57.565  		 * can infer it from csum_blank so test both flags.
  57.566  		 */
  57.567 -		if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) {
  57.568 +		if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank))
  57.569  			skb->ip_summed = CHECKSUM_UNNECESSARY;
  57.570 -			skb->proto_data_valid = 1;
  57.571 -		} else {
  57.572 +		else
  57.573  			skb->ip_summed = CHECKSUM_NONE;
  57.574 -			skb->proto_data_valid = 0;
  57.575 -		}
  57.576 +#ifdef CONFIG_XEN
  57.577 +		skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE);
  57.578  		skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
  57.579 -
  57.580 +#endif
  57.581  		np->stats.rx_packets++;
  57.582  		np->stats.rx_bytes += skb->len;
  57.583  
  57.584  		__skb_queue_tail(&rxq, skb);
  57.585  	}
  57.586  
  57.587 -	/* Some pages are no longer absent... */
  57.588 -	balloon_update_driver_allowance(-pages_done);
  57.589 +	if (pages_flipped) {
  57.590 +		/* Some pages are no longer absent... */
  57.591 +		balloon_update_driver_allowance(-pages_flipped);
  57.592  
  57.593 -	/* Do all the remapping work, and M2P updates, in one big hypercall. */
  57.594 -	if (likely(pages_done)) {
  57.595 -		mcl = np->rx_mcl + pages_done;
  57.596 -		mcl->op = __HYPERVISOR_mmu_update;
  57.597 -		mcl->args[0] = (unsigned long)np->rx_mmu;
  57.598 -		mcl->args[1] = pages_done;
  57.599 -		mcl->args[2] = 0;
  57.600 -		mcl->args[3] = DOMID_SELF;
  57.601 -		(void)HYPERVISOR_multicall(np->rx_mcl, pages_done + 1);
  57.602 +		/* Do all the remapping work and M2P updates. */
  57.603 +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  57.604 +			mcl = np->rx_mcl + pages_flipped;
  57.605 +			mcl->op = __HYPERVISOR_mmu_update;
  57.606 +			mcl->args[0] = (unsigned long)np->rx_mmu;
  57.607 +			mcl->args[1] = pages_flipped;
  57.608 +			mcl->args[2] = 0;
  57.609 +			mcl->args[3] = DOMID_SELF;
  57.610 +			(void)HYPERVISOR_multicall(np->rx_mcl,
  57.611 +						   pages_flipped + 1);
  57.612 +		}
  57.613  	}
  57.614  
  57.615  	while ((skb = __skb_dequeue(&errq)))
  57.616 @@ -1277,6 +1404,111 @@ err:
  57.617  	return more_to_do;
  57.618  }
  57.619  
  57.620 +static void netif_release_tx_bufs(struct netfront_info *np)
  57.621 +{
  57.622 +	struct sk_buff *skb;
  57.623 +	int i;
  57.624 +
  57.625 +	for (i = 1; i <= NET_TX_RING_SIZE; i++) {
  57.626 +		if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
  57.627 +			continue;
  57.628 +
  57.629 +		skb = np->tx_skbs[i];
  57.630 +		gnttab_end_foreign_access_ref(
  57.631 +			np->grant_tx_ref[i], GNTMAP_readonly);
  57.632 +		gnttab_release_grant_reference(
  57.633 +			&np->gref_tx_head, np->grant_tx_ref[i]);
  57.634 +		np->grant_tx_ref[i] = GRANT_INVALID_REF;
  57.635 +		add_id_to_freelist(np->tx_skbs, i);
  57.636 +		dev_kfree_skb_irq(skb);
  57.637 +	}
  57.638 +}
  57.639 +
  57.640 +static void netif_release_rx_bufs(struct netfront_info *np)
  57.641 +{
  57.642 +	struct mmu_update      *mmu = np->rx_mmu;
  57.643 +	struct multicall_entry *mcl = np->rx_mcl;
  57.644 +	struct sk_buff_head free_list;
  57.645 +	struct sk_buff *skb;
  57.646 +	unsigned long mfn;
  57.647 +	int xfer = 0, noxfer = 0, unused = 0;
  57.648 +	int id, ref;
  57.649 +
  57.650 +	if (np->copying_receiver) {
  57.651 +		printk("%s: fix me for copying receiver.\n", __FUNCTION__);
  57.652 +		return;
  57.653 +	}
  57.654 +
  57.655 +	skb_queue_head_init(&free_list);
  57.656 +
  57.657 +	spin_lock(&np->rx_lock);
  57.658 +
  57.659 +	for (id = 0; id < NET_RX_RING_SIZE; id++) {
  57.660 +		if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
  57.661 +			unused++;
  57.662 +			continue;
  57.663 +		}
  57.664 +
  57.665 +		skb = np->rx_skbs[id];
  57.666 +		mfn = gnttab_end_foreign_transfer_ref(ref);
  57.667 +		gnttab_release_grant_reference(&np->gref_rx_head, ref);
  57.668 +		np->grant_rx_ref[id] = GRANT_INVALID_REF;
  57.669 +		add_id_to_freelist(np->rx_skbs, id);
  57.670 +
  57.671 +		if (0 == mfn) {
  57.672 +			struct page *page = skb_shinfo(skb)->frags[0].page;
  57.673 +			balloon_release_driver_page(page);
  57.674 +			skb_shinfo(skb)->nr_frags = 0;
  57.675 +			dev_kfree_skb(skb);
  57.676 +			noxfer++;
  57.677 +			continue;
  57.678 +		}
  57.679 +
  57.680 +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  57.681 +			/* Remap the page. */
  57.682 +			struct page *page = skb_shinfo(skb)->frags[0].page;
  57.683 +			unsigned long pfn = page_to_pfn(page);
  57.684 +			void *vaddr = page_address(page);
  57.685 +
  57.686 +			MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
  57.687 +						pfn_pte_ma(mfn, PAGE_KERNEL),
  57.688 +						0);
  57.689 +			mcl++;
  57.690 +			mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
  57.691 +				| MMU_MACHPHYS_UPDATE;
  57.692 +			mmu->val = pfn;
  57.693 +			mmu++;
  57.694 +
  57.695 +			set_phys_to_machine(pfn, mfn);
  57.696 +		}
  57.697 +		__skb_queue_tail(&free_list, skb);
  57.698 +		xfer++;
  57.699 +	}
  57.700 +
  57.701 +	printk("%s: %d xfer, %d noxfer, %d unused\n",
  57.702 +	       __FUNCTION__, xfer, noxfer, unused);
  57.703 +
  57.704 +	if (xfer) {
  57.705 +		/* Some pages are no longer absent... */
  57.706 +		balloon_update_driver_allowance(-xfer);
  57.707 +
  57.708 +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  57.709 +			/* Do all the remapping work and M2P updates. */
  57.710 +			mcl->op = __HYPERVISOR_mmu_update;
  57.711 +			mcl->args[0] = (unsigned long)np->rx_mmu;
  57.712 +			mcl->args[1] = mmu - np->rx_mmu;
  57.713 +			mcl->args[2] = 0;
  57.714 +			mcl->args[3] = DOMID_SELF;
  57.715 +			mcl++;
  57.716 +			HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
  57.717 +		}
  57.718 +	}
  57.719 +
  57.720 +	while ((skb = __skb_dequeue(&free_list)) != NULL)
  57.721 +		dev_kfree_skb(skb);
  57.722 +
  57.723 +	spin_unlock(&np->rx_lock);
  57.724 +}
  57.725  
  57.726  static int network_close(struct net_device *dev)
  57.727  {
  57.728 @@ -1321,6 +1553,7 @@ static int xennet_set_sg(struct net_devi
  57.729  
  57.730  static int xennet_set_tso(struct net_device *dev, u32 data)
  57.731  {
  57.732 +#ifdef HAVE_GSO
  57.733  	if (data) {
  57.734  		struct netfront_info *np = netdev_priv(dev);
  57.735  		int val;
  57.736 @@ -1333,15 +1566,20 @@ static int xennet_set_tso(struct net_dev
  57.737  	}
  57.738  
  57.739  	return ethtool_op_set_tso(dev, data);
  57.740 +#else
  57.741 +	return -ENOSYS;
  57.742 +#endif
  57.743  }
  57.744  
  57.745  static void xennet_set_features(struct net_device *dev)
  57.746  {
  57.747 -	/* Turn off all GSO bits except ROBUST. */
  57.748 -	dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
  57.749 -	dev->features |= NETIF_F_GSO_ROBUST;
  57.750 +	dev_disable_gso_features(dev);
  57.751  	xennet_set_sg(dev, 0);
  57.752  
  57.753 +	/* We need checksum offload to enable scatter/gather and TSO. */
  57.754 +	if (!(dev->features & NETIF_F_IP_CSUM))
  57.755 +		return;
  57.756 +
  57.757  	if (!xennet_set_sg(dev, 1))
  57.758  		xennet_set_tso(dev, 1);
  57.759  }
  57.760 @@ -1352,6 +1590,7 @@ static void network_connect(struct net_d
  57.761  	int i, requeue_idx;
  57.762  	struct sk_buff *skb;
  57.763  	grant_ref_t ref;
  57.764 +	netif_rx_request_t *req;
  57.765  
  57.766  	xennet_set_features(dev);
  57.767  
  57.768 @@ -1359,27 +1598,15 @@ static void network_connect(struct net_d
  57.769  	spin_lock(&np->rx_lock);
  57.770  
  57.771  	/*
  57.772 -         * Recovery procedure:
  57.773 +	 * Recovery procedure:
  57.774  	 *  NB. Freelist index entries are always going to be less than
  57.775  	 *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
  57.776  	 *  greater than PAGE_OFFSET: we use this property to distinguish
  57.777  	 *  them.
  57.778 -         */
  57.779 +	 */
  57.780  
  57.781  	/* Step 1: Discard all pending TX packet fragments. */
  57.782 -	for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
  57.783 -		if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
  57.784 -			continue;
  57.785 -
  57.786 -		skb = np->tx_skbs[i];
  57.787 -		gnttab_end_foreign_access_ref(
  57.788 -			np->grant_tx_ref[i], GNTMAP_readonly);
  57.789 -		gnttab_release_grant_reference(
  57.790 -			&np->gref_tx_head, np->grant_tx_ref[i]);
  57.791 -		np->grant_tx_ref[i] = GRANT_INVALID_REF;
  57.792 -		add_id_to_freelist(np->tx_skbs, i);
  57.793 -		dev_kfree_skb_irq(skb);
  57.794 -	}
  57.795 +	netif_release_tx_bufs(np);
  57.796  
  57.797  	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
  57.798  	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
  57.799 @@ -1388,13 +1615,20 @@ static void network_connect(struct net_d
  57.800  
  57.801  		skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
  57.802  		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
  57.803 +		req = RING_GET_REQUEST(&np->rx, requeue_idx);
  57.804  
  57.805 -		gnttab_grant_foreign_transfer_ref(
  57.806 -			ref, np->xbdev->otherend_id,
  57.807 -			page_to_pfn(skb_shinfo(skb)->frags->page));
  57.808 -
  57.809 -		RING_GET_REQUEST(&np->rx, requeue_idx)->gref = ref;
  57.810 -		RING_GET_REQUEST(&np->rx, requeue_idx)->id   = requeue_idx;
  57.811 +		if (!np->copying_receiver) {
  57.812 +			gnttab_grant_foreign_transfer_ref(
  57.813 +				ref, np->xbdev->otherend_id,
  57.814 +				page_to_pfn(skb_shinfo(skb)->frags->page));
  57.815 +		} else {
  57.816 +			gnttab_grant_foreign_access_ref(
  57.817 +				ref, np->xbdev->otherend_id,
  57.818 +				page_to_pfn(skb_shinfo(skb)->frags->page),
  57.819 +				0);
  57.820 +		}
  57.821 +		req->gref = ref;
  57.822 +		req->id   = requeue_idx;
  57.823  
  57.824  		requeue_idx++;
  57.825  	}
  57.826 @@ -1419,6 +1653,8 @@ static void network_connect(struct net_d
  57.827  static void netif_uninit(struct net_device *dev)
  57.828  {
  57.829  	struct netfront_info *np = netdev_priv(dev);
  57.830 +	netif_release_tx_bufs(np);
  57.831 +	netif_release_rx_bufs(np);
  57.832  	gnttab_free_grant_references(np->gref_tx_head);
  57.833  	gnttab_free_grant_references(np->gref_rx_head);
  57.834  }
  57.835 @@ -1577,13 +1813,8 @@ static void network_set_multicast_list(s
  57.836  {
  57.837  }
  57.838  
  57.839 -/** Create a network device.
  57.840 - * @param handle device handle
  57.841 - * @param val return parameter for created device
  57.842 - * @return 0 on success, error code otherwise
  57.843 - */
  57.844 -static struct net_device * __devinit create_netdev(int handle,
  57.845 -						   struct xenbus_device *dev)
  57.846 +static struct net_device * __devinit
  57.847 +create_netdev(int handle, int copying_receiver, struct xenbus_device *dev)
  57.848  {
  57.849  	int i, err = 0;
  57.850  	struct net_device *netdev = NULL;
  57.851 @@ -1596,9 +1827,10 @@ static struct net_device * __devinit cre
  57.852  		return ERR_PTR(-ENOMEM);
  57.853  	}
  57.854  
  57.855 -	np                = netdev_priv(netdev);
  57.856 -	np->handle        = handle;
  57.857 -	np->xbdev         = dev;
  57.858 +	np                   = netdev_priv(netdev);
  57.859 +	np->handle           = handle;
  57.860 +	np->xbdev            = dev;
  57.861 +	np->copying_receiver = copying_receiver;
  57.862  
  57.863  	netif_carrier_off(netdev);
  57.864  
  57.865 @@ -1812,7 +2044,7 @@ static int __init netif_init(void)
  57.866  	if (!is_running_on_xen())
  57.867  		return -ENODEV;
  57.868  
  57.869 -	if (xen_start_info->flags & SIF_INITDOMAIN)
  57.870 +	if (is_initial_xendomain())
  57.871  		return 0;
  57.872  
  57.873  	IPRINTK("Initialising virtual ethernet driver.\n");
    58.1 --- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c	Tue Aug 22 14:45:49 2006 -0600
    58.2 +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c	Wed Aug 23 11:11:27 2006 -0600
    58.3 @@ -181,6 +181,7 @@ static void pciback_frontend_changed(str
    58.4  		xenbus_switch_state(xdev, XenbusStateClosing);
    58.5  		break;
    58.6  
    58.7 +	case XenbusStateUnknown:
    58.8  	case XenbusStateClosed:
    58.9  		dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
   58.10  		device_unregister(&xdev->dev);
    59.1 --- a/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c	Tue Aug 22 14:45:49 2006 -0600
    59.2 +++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/xenbus.c	Wed Aug 23 11:11:27 2006 -0600
    59.3 @@ -225,6 +225,7 @@ static void pcifront_backend_changed(str
    59.4  		pcifront_try_disconnect(pdev);
    59.5  		break;
    59.6  
    59.7 +	case XenbusStateUnknown:
    59.8  	case XenbusStateClosed:
    59.9  		dev_warn(&xdev->dev, "backend went away!\n");
   59.10  		pcifront_try_disconnect(pdev);
    60.1 --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Tue Aug 22 14:45:49 2006 -0600
    60.2 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Wed Aug 23 11:11:27 2006 -0600
    60.3 @@ -108,7 +108,6 @@ static int privcmd_ioctl(struct inode *i
    60.4  	}
    60.5  	break;
    60.6  
    60.7 -#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
    60.8  	case IOCTL_PRIVCMD_MMAP: {
    60.9  #define PRIVCMD_MMAP_SZ 32
   60.10  		privcmd_mmap_t mmapcmd;
   60.11 @@ -116,6 +115,9 @@ static int privcmd_ioctl(struct inode *i
   60.12  		privcmd_mmap_entry_t __user *p;
   60.13  		int i, rc;
   60.14  
   60.15 +		if (!is_initial_xendomain())
   60.16 +			return -EPERM;
   60.17 +
   60.18  		if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
   60.19  			return -EFAULT;
   60.20  
   60.21 @@ -162,9 +164,12 @@ static int privcmd_ioctl(struct inode *i
   60.22  		privcmd_mmapbatch_t m;
   60.23  		struct vm_area_struct *vma = NULL;
   60.24  		xen_pfn_t __user *p;
   60.25 -		unsigned long addr, mfn; 
   60.26 +		unsigned long addr, mfn;
   60.27  		int i;
   60.28  
   60.29 +		if (!is_initial_xendomain())
   60.30 +			return -EPERM;
   60.31 +
   60.32  		if (copy_from_user(&m, udata, sizeof(m))) {
   60.33  			ret = -EFAULT;
   60.34  			goto batch_err;
   60.35 @@ -215,7 +220,6 @@ static int privcmd_ioctl(struct inode *i
   60.36  		break;
   60.37  	}
   60.38  	break;
   60.39 -#endif
   60.40  
   60.41  	default:
   60.42  		ret = -EINVAL;
   60.43 @@ -246,7 +250,7 @@ static int capabilities_read(char *page,
   60.44  	int len = 0;
   60.45  	*page = 0;
   60.46  
   60.47 -	if (xen_start_info->flags & SIF_INITDOMAIN)
   60.48 +	if (is_initial_xendomain())
   60.49  		len = sprintf( page, "control_d\n" );
   60.50  
   60.51  	*eof = 1;
    61.1 --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c	Tue Aug 22 14:45:49 2006 -0600
    61.2 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c	Wed Aug 23 11:11:27 2006 -0600
    61.3 @@ -34,7 +34,6 @@ struct backend_info
    61.4  
    61.5  	/* watch front end for changes */
    61.6  	struct xenbus_watch backend_watch;
    61.7 -	enum xenbus_state frontend_state;
    61.8  };
    61.9  
   61.10  static void maybe_connect(struct backend_info *be);
   61.11 @@ -143,8 +142,6 @@ static void frontend_changed(struct xenb
   61.12  	struct backend_info *be = dev->dev.driver_data;
   61.13  	int err;
   61.14  
   61.15 -	be->frontend_state = frontend_state;
   61.16 -
   61.17  	switch (frontend_state) {
   61.18  	case XenbusStateInitialising:
   61.19  	case XenbusStateInitialised:
   61.20 @@ -162,13 +159,12 @@ static void frontend_changed(struct xenb
   61.21  		be->instance = -1;
   61.22  		break;
   61.23  
   61.24 +	case XenbusStateUnknown:
   61.25  	case XenbusStateClosed:
   61.26  		device_unregister(&be->dev->dev);
   61.27  		tpmback_remove(dev);
   61.28  		break;
   61.29  
   61.30 -	case XenbusStateUnknown:
   61.31 -	case XenbusStateInitWait:
   61.32  	default:
   61.33  		xenbus_dev_fatal(dev, -EINVAL,
   61.34  				 "saw state %d at frontend",
    62.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c	Tue Aug 22 14:45:49 2006 -0600
    62.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c	Wed Aug 23 11:11:27 2006 -0600
    62.3 @@ -274,7 +274,7 @@ enum xenbus_state xenbus_read_driver_sta
    62.4  	enum xenbus_state result;
    62.5  	int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
    62.6  	if (err)
    62.7 -		result = XenbusStateClosed;
    62.8 +		result = XenbusStateUnknown;
    62.9  
   62.10  	return result;
   62.11  }
    63.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c	Tue Aug 22 14:45:49 2006 -0600
    63.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c	Wed Aug 23 11:11:27 2006 -0600
    63.3 @@ -47,11 +47,6 @@ static DECLARE_WORK(probe_work, xenbus_p
    63.4  
    63.5  DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
    63.6  
    63.7 -static inline struct xenstore_domain_interface *xenstore_domain_interface(void)
    63.8 -{
    63.9 -	return mfn_to_virt(xen_start_info->store_mfn);
   63.10 -}
   63.11 -
   63.12  static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
   63.13  {
   63.14  	if (unlikely(xenstored_ready == 0)) {
   63.15 @@ -90,7 +85,7 @@ static const void *get_input_chunk(XENST
   63.16  
   63.17  int xb_write(const void *data, unsigned len)
   63.18  {
   63.19 -	struct xenstore_domain_interface *intf = xenstore_domain_interface();
   63.20 +	struct xenstore_domain_interface *intf = xen_store_interface;
   63.21  	XENSTORE_RING_IDX cons, prod;
   63.22  	int rc;
   63.23  
   63.24 @@ -129,7 +124,7 @@ int xb_write(const void *data, unsigned 
   63.25  		intf->req_prod += avail;
   63.26  
   63.27  		/* This implies mb() before other side sees interrupt. */
   63.28 -		notify_remote_via_evtchn(xen_start_info->store_evtchn);
   63.29 +		notify_remote_via_evtchn(xen_store_evtchn);
   63.30  	}
   63.31  
   63.32  	return 0;
   63.33 @@ -137,7 +132,7 @@ int xb_write(const void *data, unsigned 
   63.34  
   63.35  int xb_read(void *data, unsigned len)
   63.36  {
   63.37 -	struct xenstore_domain_interface *intf = xenstore_domain_interface();
   63.38 +	struct xenstore_domain_interface *intf = xen_store_interface;
   63.39  	XENSTORE_RING_IDX cons, prod;
   63.40  	int rc;
   63.41  
   63.42 @@ -180,7 +175,7 @@ int xb_read(void *data, unsigned len)
   63.43  		pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
   63.44  
   63.45  		/* Implies mb(): they will see new header. */
   63.46 -		notify_remote_via_evtchn(xen_start_info->store_evtchn);
   63.47 +		notify_remote_via_evtchn(xen_store_evtchn);
   63.48  	}
   63.49  
   63.50  	return 0;
   63.51 @@ -195,7 +190,7 @@ int xb_init_comms(void)
   63.52  		unbind_from_irqhandler(xenbus_irq, &xb_waitq);
   63.53  
   63.54  	err = bind_evtchn_to_irqhandler(
   63.55 -		xen_start_info->store_evtchn, wake_waiting,
   63.56 +		xen_store_evtchn, wake_waiting,
   63.57  		0, "xenbus", &xb_waitq);
   63.58  	if (err <= 0) {
   63.59  		printk(KERN_ERR "XENBUS request irq failed %i\n", err);
    64.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h	Tue Aug 22 14:45:49 2006 -0600
    64.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h	Wed Aug 23 11:11:27 2006 -0600
    64.3 @@ -39,5 +39,7 @@ int xb_write(const void *data, unsigned 
    64.4  int xb_read(void *data, unsigned len);
    64.5  int xs_input_avail(void);
    64.6  extern wait_queue_head_t xb_waitq;
    64.7 +extern struct xenstore_domain_interface *xen_store_interface;
    64.8 +extern int xen_store_evtchn;
    64.9  
   64.10  #endif /* _XENBUS_COMMS_H */
    65.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c	Tue Aug 22 14:45:49 2006 -0600
    65.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c	Wed Aug 23 11:11:27 2006 -0600
    65.3 @@ -285,7 +285,7 @@ static int xenbus_dev_open(struct inode 
    65.4  {
    65.5  	struct xenbus_dev_data *u;
    65.6  
    65.7 -	if (xen_start_info->store_evtchn == 0)
    65.8 +	if (xen_store_evtchn == 0)
    65.9  		return -ENOENT;
   65.10  
   65.11  	nonseekable_open(inode, filp);
   65.12 @@ -346,7 +346,7 @@ static struct file_operations xenbus_dev
   65.13  	.poll = xenbus_dev_poll,
   65.14  };
   65.15  
   65.16 -static int __init
   65.17 +int __init
   65.18  xenbus_dev_init(void)
   65.19  {
   65.20  	xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400);
   65.21 @@ -355,5 +355,3 @@ xenbus_dev_init(void)
   65.22  
   65.23  	return 0;
   65.24  }
   65.25 -
   65.26 -__initcall(xenbus_dev_init);
    66.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Tue Aug 22 14:45:49 2006 -0600
    66.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c	Wed Aug 23 11:11:27 2006 -0600
    66.3 @@ -45,19 +45,35 @@
    66.4  
    66.5  #include <asm/io.h>
    66.6  #include <asm/page.h>
    66.7 +#include <asm/maddr.h>
    66.8  #include <asm/pgtable.h>
    66.9  #include <asm/hypervisor.h>
   66.10  #include <xen/xenbus.h>
   66.11  #include <xen/xen_proc.h>
   66.12  #include <xen/evtchn.h>
   66.13  #include <xen/features.h>
   66.14 +#include <xen/hvm.h>
   66.15  
   66.16  #include "xenbus_comms.h"
   66.17  
   66.18 +int xen_store_evtchn;
   66.19 +struct xenstore_domain_interface *xen_store_interface;
   66.20 +static unsigned long xen_store_mfn;
   66.21 +
   66.22  extern struct mutex xenwatch_mutex;
   66.23  
   66.24  static struct notifier_block *xenstore_chain;
   66.25  
   66.26 +static void wait_for_devices(struct xenbus_driver *xendrv);
   66.27 +
   66.28 +static int xenbus_probe_frontend(const char *type, const char *name);
   66.29 +static int xenbus_uevent_backend(struct device *dev, char **envp,
   66.30 +				 int num_envp, char *buffer, int buffer_size);
   66.31 +static int xenbus_probe_backend(const char *type, const char *domid);
   66.32 +
   66.33 +static int xenbus_dev_probe(struct device *_dev);
   66.34 +static int xenbus_dev_remove(struct device *_dev);
   66.35 +
   66.36  /* If something in array of ids matches this device, return it. */
   66.37  static const struct xenbus_device_id *
   66.38  match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
   66.39 @@ -166,15 +182,16 @@ static int read_frontend_details(struct 
   66.40  
   66.41  
   66.42  /* Bus type for frontend drivers. */
   66.43 -static int xenbus_probe_frontend(const char *type, const char *name);
   66.44  static struct xen_bus_type xenbus_frontend = {
   66.45  	.root = "device",
   66.46  	.levels = 2, 		/* device/type/<id> */
   66.47  	.get_bus_id = frontend_bus_id,
   66.48  	.probe = xenbus_probe_frontend,
   66.49  	.bus = {
   66.50 -		.name  = "xen",
   66.51 -		.match = xenbus_match,
   66.52 +		.name     = "xen",
   66.53 +		.match    = xenbus_match,
   66.54 +		.probe    = xenbus_dev_probe,
   66.55 +		.remove   = xenbus_dev_remove,
   66.56  	},
   66.57  	.dev = {
   66.58  		.bus_id = "xen",
   66.59 @@ -219,18 +236,17 @@ static int backend_bus_id(char bus_id[BU
   66.60  	return 0;
   66.61  }
   66.62  
   66.63 -static int xenbus_uevent_backend(struct device *dev, char **envp,
   66.64 -				 int num_envp, char *buffer, int buffer_size);
   66.65 -static int xenbus_probe_backend(const char *type, const char *domid);
   66.66  static struct xen_bus_type xenbus_backend = {
   66.67  	.root = "backend",
   66.68  	.levels = 3, 		/* backend/type/<frontend>/<id> */
   66.69  	.get_bus_id = backend_bus_id,
   66.70  	.probe = xenbus_probe_backend,
   66.71  	.bus = {
   66.72 -		.name  = "xen-backend",
   66.73 -		.match = xenbus_match,
   66.74 -		.uevent = xenbus_uevent_backend,
   66.75 +		.name     = "xen-backend",
   66.76 +		.match    = xenbus_match,
   66.77 +		.probe    = xenbus_dev_probe,
   66.78 +		.remove   = xenbus_dev_remove,
   66.79 +		.uevent   = xenbus_uevent_backend,
   66.80  	},
   66.81  	.dev = {
   66.82  		.bus_id = "xen-backend",
   66.83 @@ -397,8 +413,6 @@ static int xenbus_register_driver_common
   66.84  	drv->driver.name = drv->name;
   66.85  	drv->driver.bus = &bus->bus;
   66.86  	drv->driver.owner = drv->owner;
   66.87 -	drv->driver.probe = xenbus_dev_probe;
   66.88 -	drv->driver.remove = xenbus_dev_remove;
   66.89  
   66.90  	mutex_lock(&xenwatch_mutex);
   66.91  	ret = driver_register(&drv->driver);
   66.92 @@ -408,9 +422,18 @@ static int xenbus_register_driver_common
   66.93  
   66.94  int xenbus_register_frontend(struct xenbus_driver *drv)
   66.95  {
   66.96 +	int ret;
   66.97 +
   66.98  	drv->read_otherend_details = read_backend_details;
   66.99  
  66.100 -	return xenbus_register_driver_common(drv, &xenbus_frontend);
  66.101 +	ret = xenbus_register_driver_common(drv, &xenbus_frontend);
  66.102 +	if (ret)
  66.103 +		return ret;
  66.104 +
  66.105 +	/* If this driver is loaded as a module wait for devices to attach. */
  66.106 +	wait_for_devices(drv);
  66.107 +
  66.108 +	return 0;
  66.109  }
  66.110  EXPORT_SYMBOL_GPL(xenbus_register_frontend);
  66.111  
  66.112 @@ -829,7 +852,7 @@ static int resume_dev(struct device *dev
  66.113  			printk(KERN_WARNING
  66.114  			       "xenbus: resume %s failed: %i\n", 
  66.115  			       dev->bus_id, err);
  66.116 -			return err; 
  66.117 +			return err;
  66.118  		}
  66.119  	}
  66.120  
  66.121 @@ -841,7 +864,7 @@ static int resume_dev(struct device *dev
  66.122  		return err;
  66.123  	}
  66.124  
  66.125 -	return 0; 
  66.126 +	return 0;
  66.127  }
  66.128  
  66.129  void xenbus_suspend(void)
  66.130 @@ -917,8 +940,7 @@ static int xsd_kva_mmap(struct file *fil
  66.131  	if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
  66.132  		return -EINVAL;
  66.133  
  66.134 -	if (remap_pfn_range(vma, vma->vm_start,
  66.135 -			    mfn_to_pfn(xen_start_info->store_mfn),
  66.136 +	if (remap_pfn_range(vma, vma->vm_start, mfn_to_pfn(xen_store_mfn),
  66.137  			    size, vma->vm_page_prot))
  66.138  		return -EAGAIN;
  66.139  
  66.140 @@ -930,7 +952,7 @@ static int xsd_kva_read(char *page, char
  66.141  {
  66.142  	int len;
  66.143  
  66.144 -	len  = sprintf(page, "0x%p", mfn_to_virt(xen_start_info->store_mfn));
  66.145 +	len  = sprintf(page, "0x%p", xen_store_interface);
  66.146  	*eof = 1;
  66.147  	return len;
  66.148  }
  66.149 @@ -940,16 +962,15 @@ static int xsd_port_read(char *page, cha
  66.150  {
  66.151  	int len;
  66.152  
  66.153 -	len  = sprintf(page, "%d", xen_start_info->store_evtchn);
  66.154 +	len  = sprintf(page, "%d", xen_store_evtchn);
  66.155  	*eof = 1;
  66.156  	return len;
  66.157  }
  66.158  #endif
  66.159  
  66.160 -
  66.161  static int __init xenbus_probe_init(void)
  66.162  {
  66.163 -	int err = 0, dom0;
  66.164 +	int err = 0;
  66.165  	unsigned long page = 0;
  66.166  
  66.167  	DPRINTK("");
  66.168 @@ -964,9 +985,7 @@ static int __init xenbus_probe_init(void
  66.169  	/*
  66.170  	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
  66.171  	 */
  66.172 -	dom0 = (xen_start_info->store_evtchn == 0);
  66.173 -
  66.174 -	if (dom0) {
  66.175 +	if (is_initial_xendomain()) {
  66.176  		struct evtchn_alloc_unbound alloc_unbound;
  66.177  
  66.178  		/* Allocate page. */
  66.179 @@ -974,7 +993,7 @@ static int __init xenbus_probe_init(void
  66.180  		if (!page)
  66.181  			return -ENOMEM;
  66.182  
  66.183 -		xen_start_info->store_mfn =
  66.184 +		xen_store_mfn = xen_start_info->store_mfn =
  66.185  			pfn_to_mfn(virt_to_phys((void *)page) >>
  66.186  				   PAGE_SHIFT);
  66.187  
  66.188 @@ -987,7 +1006,8 @@ static int __init xenbus_probe_init(void
  66.189  		if (err == -ENOSYS)
  66.190  			goto err;
  66.191  		BUG_ON(err);
  66.192 -		xen_start_info->store_evtchn = alloc_unbound.port;
  66.193 +		xen_store_evtchn = xen_start_info->store_evtchn =
  66.194 +			alloc_unbound.port;
  66.195  
  66.196  #ifdef CONFIG_PROC_FS
  66.197  		/* And finally publish the above info in /proc/xen */
  66.198 @@ -1003,8 +1023,23 @@ static int __init xenbus_probe_init(void
  66.199  		if (xsd_port_intf)
  66.200  			xsd_port_intf->read_proc = xsd_port_read;
  66.201  #endif
  66.202 -	} else
  66.203 +		xen_store_interface = mfn_to_virt(xen_store_mfn);
  66.204 +	} else {
  66.205  		xenstored_ready = 1;
  66.206 +#ifdef CONFIG_XEN
  66.207 +		xen_store_evtchn = xen_start_info->store_evtchn;
  66.208 +		xen_store_mfn = xen_start_info->store_mfn;
  66.209 +		xen_store_interface = mfn_to_virt(xen_store_mfn);
  66.210 +#else
  66.211 +		xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
  66.212 +		xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
  66.213 +		xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
  66.214 +					      PAGE_SIZE);
  66.215 +#endif
  66.216 +	}
  66.217 +
  66.218 +
  66.219 +	xenbus_dev_init();
  66.220  
  66.221  	/* Initialize the interface to xenstore. */
  66.222  	err = xs_init();
  66.223 @@ -1018,7 +1053,7 @@ static int __init xenbus_probe_init(void
  66.224  	device_register(&xenbus_frontend.dev);
  66.225  	device_register(&xenbus_backend.dev);
  66.226  
  66.227 -	if (!dom0)
  66.228 +	if (!is_initial_xendomain())
  66.229  		xenbus_probe(NULL);
  66.230  
  66.231  	return 0;
  66.232 @@ -1038,10 +1073,13 @@ static int __init xenbus_probe_init(void
  66.233  
  66.234  postcore_initcall(xenbus_probe_init);
  66.235  
  66.236 +MODULE_LICENSE("Dual BSD/GPL");
  66.237 +
  66.238  
  66.239  static int is_disconnected_device(struct device *dev, void *data)
  66.240  {
  66.241  	struct xenbus_device *xendev = to_xenbus_device(dev);
  66.242 +	struct device_driver *drv = data;
  66.243  
  66.244  	/*
  66.245  	 * A device with no driver will never connect. We care only about
  66.246 @@ -1050,18 +1088,27 @@ static int is_disconnected_device(struct
  66.247  	if (!dev->driver)
  66.248  		return 0;
  66.249  
  66.250 +	/* Is this search limited to a particular driver? */
  66.251 +	if (drv && (dev->driver != drv))
  66.252 +		return 0;
  66.253 +
  66.254  	return (xendev->state != XenbusStateConnected);
  66.255  }
  66.256  
  66.257 -static int exists_disconnected_device(void)
  66.258 +static int exists_disconnected_device(struct device_driver *drv)
  66.259  {
  66.260 -	return bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL,
  66.261 +	return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
  66.262  				is_disconnected_device);
  66.263  }
  66.264  
  66.265  static int print_device_status(struct device *dev, void *data)
  66.266  {
  66.267  	struct xenbus_device *xendev = to_xenbus_device(dev);
  66.268 +	struct device_driver *drv = data;
  66.269 +
  66.270 +	/* Is this operation limited to a particular driver? */
  66.271 +	if (drv && (dev->driver != drv))
  66.272 +		return 0;
  66.273  
  66.274  	if (!dev->driver) {
  66.275  		/* Information only: is this too noisy? */
  66.276 @@ -1076,6 +1123,9 @@ static int print_device_status(struct de
  66.277  	return 0;
  66.278  }
  66.279  
  66.280 +/* We only wait for device setup after most initcalls have run. */
  66.281 +static int ready_to_wait_for_devices;
  66.282 +
  66.283  /*
  66.284   * On a 10 second timeout, wait for all devices currently configured.  We need
  66.285   * to do this to guarantee that the filesystems and / or network devices
  66.286 @@ -1090,20 +1140,31 @@ static int print_device_status(struct de
  66.287   * boot slightly, but of course needs tools or manual intervention to set up
  66.288   * those flags correctly.
  66.289   */
  66.290 -static int __init wait_for_devices(void)
  66.291 +static void wait_for_devices(struct xenbus_driver *xendrv)
  66.292  {
  66.293  	unsigned long timeout = jiffies + 10*HZ;
  66.294 -
  66.295 -	if (!is_running_on_xen())
  66.296 -		return -ENODEV;
  66.297 +	struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
  66.298  
  66.299 -	while (time_before(jiffies, timeout) && exists_disconnected_device())
  66.300 -		schedule_timeout_interruptible(HZ/10);
  66.301 +	if (!ready_to_wait_for_devices || !is_running_on_xen())
  66.302 +		return;
  66.303  
  66.304 -	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL,
  66.305 +	while (exists_disconnected_device(drv)) {
  66.306 +		if (time_after(jiffies, timeout))
  66.307 +			break;
  66.308 +		schedule_timeout_interruptible(HZ/10);
  66.309 +	}
  66.310 +
  66.311 +	bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
  66.312  			 print_device_status);
  66.313 +}
  66.314  
  66.315 +#ifndef MODULE
  66.316 +static int __init boot_wait_for_devices(void)
  66.317 +{
  66.318 +	ready_to_wait_for_devices = 1;
  66.319 +	wait_for_devices(NULL);
  66.320  	return 0;
  66.321  }
  66.322  
  66.323 -late_initcall(wait_for_devices);
  66.324 +late_initcall(boot_wait_for_devices);
  66.325 +#endif
    67.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c	Tue Aug 22 14:45:49 2006 -0600
    67.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c	Wed Aug 23 11:11:27 2006 -0600
    67.3 @@ -665,7 +665,17 @@ EXPORT_SYMBOL_GPL(unregister_xenbus_watc
    67.4  
    67.5  void xs_suspend(void)
    67.6  {
    67.7 +	struct xenbus_watch *watch;
    67.8 +	char token[sizeof(watch) * 2 + 1];
    67.9 +
   67.10  	down_write(&xs_state.suspend_mutex);
   67.11 +
   67.12 +	/* No need for watches_lock: the suspend_mutex is sufficient. */
   67.13 +	list_for_each_entry(watch, &watches, list) {
   67.14 +		sprintf(token, "%lX", (long)watch);
   67.15 +		xs_unwatch(watch->node, token);
   67.16 +	}
   67.17 +
   67.18  	mutex_lock(&xs_state.request_mutex);
   67.19  }
   67.20  
    68.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h	Tue Aug 22 14:45:49 2006 -0600
    68.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h	Wed Aug 23 11:11:27 2006 -0600
    68.3 @@ -42,11 +42,21 @@
    68.4  #define __STR(x) #x
    68.5  #define STR(x) __STR(x)
    68.6  
    68.7 +#ifdef CONFIG_XEN
    68.8 +#define HYPERCALL_STR(name)					\
    68.9 +	"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"
   68.10 +#else
   68.11 +#define HYPERCALL_STR(name)					\
   68.12 +	"mov hypercall_stubs,%%eax; "				\
   68.13 +	"add $("STR(__HYPERVISOR_##name)" * 32),%%eax; "	\
   68.14 +	"call *%%eax"
   68.15 +#endif
   68.16 +
   68.17  #define _hypercall0(type, name)			\
   68.18  ({						\
   68.19  	long __res;				\
   68.20  	asm volatile (				\
   68.21 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   68.22 +		HYPERCALL_STR(name)		\
   68.23  		: "=a" (__res)			\
   68.24  		:				\
   68.25  		: "memory" );			\
   68.26 @@ -57,7 +67,7 @@
   68.27  ({								\
   68.28  	long __res, __ign1;					\
   68.29  	asm volatile (						\
   68.30 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   68.31 +		HYPERCALL_STR(name)				\
   68.32  		: "=a" (__res), "=b" (__ign1)			\
   68.33  		: "1" ((long)(a1))				\
   68.34  		: "memory" );					\
   68.35 @@ -68,7 +78,7 @@
   68.36  ({								\
   68.37  	long __res, __ign1, __ign2;				\
   68.38  	asm volatile (						\
   68.39 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   68.40 +		HYPERCALL_STR(name)				\
   68.41  		: "=a" (__res), "=b" (__ign1), "=c" (__ign2)	\
   68.42  		: "1" ((long)(a1)), "2" ((long)(a2))		\
   68.43  		: "memory" );					\
   68.44 @@ -79,7 +89,7 @@
   68.45  ({								\
   68.46  	long __res, __ign1, __ign2, __ign3;			\
   68.47  	asm volatile (						\
   68.48 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   68.49 +		HYPERCALL_STR(name)				\
   68.50  		: "=a" (__res), "=b" (__ign1), "=c" (__ign2), 	\
   68.51  		"=d" (__ign3)					\
   68.52  		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   68.53 @@ -92,7 +102,7 @@
   68.54  ({								\
   68.55  	long __res, __ign1, __ign2, __ign3, __ign4;		\
   68.56  	asm volatile (						\
   68.57 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   68.58 +		HYPERCALL_STR(name)				\
   68.59  		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),	\
   68.60  		"=d" (__ign3), "=S" (__ign4)			\
   68.61  		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   68.62 @@ -105,7 +115,7 @@
   68.63  ({								\
   68.64  	long __res, __ign1, __ign2, __ign3, __ign4, __ign5;	\
   68.65  	asm volatile (						\
   68.66 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   68.67 +		HYPERCALL_STR(name)				\
   68.68  		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),	\
   68.69  		"=d" (__ign3), "=S" (__ign4), "=D" (__ign5)	\
   68.70  		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   68.71 @@ -354,6 +364,13 @@ HYPERVISOR_nmi_op(
   68.72  	return _hypercall2(int, nmi_op, op, arg);
   68.73  }
   68.74  
   68.75 +static inline unsigned long
   68.76 +HYPERVISOR_hvm_op(
   68.77 +    int op, void *arg)
   68.78 +{
   68.79 +    return _hypercall2(unsigned long, hvm_op, op, arg);
   68.80 +}
   68.81 +
   68.82  static inline int
   68.83  HYPERVISOR_callback_op(
   68.84  	int cmd, void *arg)
    69.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h	Tue Aug 22 14:45:49 2006 -0600
    69.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h	Wed Aug 23 11:11:27 2006 -0600
    69.3 @@ -58,6 +58,11 @@ extern shared_info_t *HYPERVISOR_shared_
    69.4  
    69.5  /* arch/xen/i386/kernel/setup.c */
    69.6  extern start_info_t *xen_start_info;
    69.7 +#ifdef CONFIG_XEN_PRIVILEGED_GUEST
    69.8 +#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
    69.9 +#else
   69.10 +#define is_initial_xendomain() 0
   69.11 +#endif
   69.12  
   69.13  /* arch/xen/kernel/evtchn.c */
   69.14  /* Force a proper event-channel callback from Xen. */
   69.15 @@ -198,6 +203,16 @@ MULTI_update_va_mapping(
   69.16  }
   69.17  
   69.18  static inline void
   69.19 +MULTI_grant_table_op(multicall_entry_t *mcl, unsigned int cmd,
   69.20 +		     void *uop, unsigned int count)
   69.21 +{
   69.22 +    mcl->op = __HYPERVISOR_grant_table_op;
   69.23 +    mcl->args[0] = cmd;
   69.24 +    mcl->args[1] = (unsigned long)uop;
   69.25 +    mcl->args[2] = count;
   69.26 +}
   69.27 +
   69.28 +static inline void
   69.29  MULTI_update_va_mapping_otherdomain(
   69.30      multicall_entry_t *mcl, unsigned long va,
   69.31      pte_t new_val, unsigned long flags, domid_t domid)
    70.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    70.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h	Wed Aug 23 11:11:27 2006 -0600
    70.3 @@ -0,0 +1,160 @@
    70.4 +#ifndef _I386_MADDR_H
    70.5 +#define _I386_MADDR_H
    70.6 +
    70.7 +#include <xen/features.h>
    70.8 +#include <xen/interface/xen.h>
    70.9 +
   70.10 +/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
   70.11 +#define INVALID_P2M_ENTRY	(~0UL)
   70.12 +#define FOREIGN_FRAME_BIT	(1UL<<31)
   70.13 +#define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
   70.14 +
   70.15 +#ifdef CONFIG_XEN
   70.16 +
   70.17 +extern unsigned long *phys_to_machine_mapping;
   70.18 +
   70.19 +#undef machine_to_phys_mapping
   70.20 +extern unsigned long *machine_to_phys_mapping;
   70.21 +extern unsigned int   machine_to_phys_order;
   70.22 +
   70.23 +static inline unsigned long pfn_to_mfn(unsigned long pfn)
   70.24 +{
   70.25 +	if (xen_feature(XENFEAT_auto_translated_physmap))
   70.26 +		return pfn;
   70.27 +	return phys_to_machine_mapping[(unsigned int)(pfn)] &
   70.28 +		~FOREIGN_FRAME_BIT;
   70.29 +}
   70.30 +
   70.31 +static inline int phys_to_machine_mapping_valid(unsigned long pfn)
   70.32 +{
   70.33 +	if (xen_feature(XENFEAT_auto_translated_physmap))
   70.34 +		return 1;
   70.35 +	return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
   70.36 +}
   70.37 +
   70.38 +static inline unsigned long mfn_to_pfn(unsigned long mfn)
   70.39 +{
   70.40 +	extern unsigned long max_mapnr;
   70.41 +	unsigned long pfn;
   70.42 +
   70.43 +	if (xen_feature(XENFEAT_auto_translated_physmap))
   70.44 +		return mfn;
   70.45 +
   70.46 +	if (unlikely((mfn >> machine_to_phys_order) != 0))
   70.47 +		return max_mapnr;
   70.48 +
   70.49 +	/* The array access can fail (e.g., device space beyond end of RAM). */
   70.50 +	asm (
   70.51 +		"1:	movl %1,%0\n"
   70.52 +		"2:\n"
   70.53 +		".section .fixup,\"ax\"\n"
   70.54 +		"3:	movl %2,%0\n"
   70.55 +		"	jmp  2b\n"
   70.56 +		".previous\n"
   70.57 +		".section __ex_table,\"a\"\n"
   70.58 +		"	.align 4\n"
   70.59 +		"	.long 1b,3b\n"
   70.60 +		".previous"
   70.61 +		: "=r" (pfn)
   70.62 +		: "m" (machine_to_phys_mapping[mfn]), "m" (max_mapnr) );
   70.63 +
   70.64 +	return pfn;
   70.65 +}
   70.66 +
   70.67 +/*
   70.68 + * We detect special mappings in one of two ways:
   70.69 + *  1. If the MFN is an I/O page then Xen will set the m2p entry
   70.70 + *     to be outside our maximum possible pseudophys range.
   70.71 + *  2. If the MFN belongs to a different domain then we will certainly
   70.72 + *     not have MFN in our p2m table. Conversely, if the page is ours,
   70.73 + *     then we'll have p2m(m2p(MFN))==MFN.
   70.74 + * If we detect a special mapping then it doesn't have a 'struct page'.
   70.75 + * We force !pfn_valid() by returning an out-of-range pointer.
   70.76 + *
   70.77 + * NB. These checks require that, for any MFN that is not in our reservation,
   70.78 + * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
   70.79 + * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
   70.80 + * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
   70.81 + *
   70.82 + * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
   70.83 + *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
   70.84 + *      require. In all the cases we care about, the FOREIGN_FRAME bit is
   70.85 + *      masked (e.g., pfn_to_mfn()) so behaviour there is correct.
   70.86 + */
   70.87 +static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
   70.88 +{
   70.89 +	extern unsigned long max_mapnr;
   70.90 +	unsigned long pfn = mfn_to_pfn(mfn);
   70.91 +	if ((pfn < max_mapnr)
   70.92 +	    && !xen_feature(XENFEAT_auto_translated_physmap)
   70.93 +	    && (phys_to_machine_mapping[pfn] != mfn))
   70.94 +		return max_mapnr; /* force !pfn_valid() */
   70.95 +	return pfn;
   70.96 +}
   70.97 +
   70.98 +static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
   70.99 +{
  70.100 +	if (xen_feature(XENFEAT_auto_translated_physmap)) {
  70.101 +		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
  70.102 +		return;
  70.103 +	}
  70.104 +	phys_to_machine_mapping[pfn] = mfn;
  70.105 +}
  70.106 +
  70.107 +
  70.108 +#else /* !CONFIG_XEN */
  70.109 +
  70.110 +#define pfn_to_mfn(pfn) (pfn)
  70.111 +#define mfn_to_pfn(mfn) (mfn)
  70.112 +#define mfn_to_local_pfn(mfn) (mfn)
  70.113 +#define set_phys_to_machine(pfn, mfn) BUG_ON((pfn) != (mfn))
  70.114 +#define phys_to_machine_mapping_valid(pfn) (1)
  70.115 +
  70.116 +#endif /* !CONFIG_XEN */
  70.117 +
  70.118 +/* Definitions for machine and pseudophysical addresses. */
  70.119 +#ifdef CONFIG_X86_PAE
  70.120 +typedef unsigned long long paddr_t;
  70.121 +typedef unsigned long long maddr_t;
  70.122 +#else
  70.123 +typedef unsigned long paddr_t;
  70.124 +typedef unsigned long maddr_t;
  70.125 +#endif
  70.126 +
  70.127 +static inline maddr_t phys_to_machine(paddr_t phys)
  70.128 +{
  70.129 +	maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
  70.130 +	machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
  70.131 +	return machine;
  70.132 +}
  70.133 +static inline paddr_t machine_to_phys(maddr_t machine)
  70.134 +{
  70.135 +	paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
  70.136 +	phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
  70.137 +	return phys;
  70.138 +}
  70.139 +
  70.140 +/* VIRT <-> MACHINE conversion */
  70.141 +#define virt_to_machine(v)	(phys_to_machine(__pa(v)))
  70.142 +#define virt_to_mfn(v)		(pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
  70.143 +#define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
  70.144 +
  70.145 +#ifdef CONFIG_X86_PAE
  70.146 +static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot)
  70.147 +{
  70.148 +	pte_t pte;
  70.149 +
  70.150 +	pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | \
  70.151 +					(pgprot_val(pgprot) >> 32);
  70.152 +	pte.pte_high &= (__supported_pte_mask >> 32);
  70.153 +	pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & \
  70.154 +							__supported_pte_mask;
  70.155 +	return pte;
  70.156 +}
  70.157 +#else
  70.158 +#define pfn_pte_ma(pfn, prot)	__pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
  70.159 +#endif
  70.160 +
  70.161 +#define __pte_ma(x)	((pte_t) { (x) } )
  70.162 +
  70.163 +#endif /* _I386_MADDR_H */
    71.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h	Tue Aug 22 14:45:49 2006 -0600
    71.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h	Wed Aug 23 11:11:27 2006 -0600
    71.3 @@ -60,123 +60,6 @@
    71.4  #define clear_user_page(page, vaddr, pg)	clear_page(page)
    71.5  #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
    71.6  
    71.7 -/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
    71.8 -#define INVALID_P2M_ENTRY	(~0UL)
    71.9 -#define FOREIGN_FRAME_BIT	(1UL<<31)
   71.10 -#define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
   71.11 -
   71.12 -extern unsigned long *phys_to_machine_mapping;
   71.13 -
   71.14 -#undef machine_to_phys_mapping
   71.15 -extern unsigned long *machine_to_phys_mapping;
   71.16 -extern unsigned int   machine_to_phys_order;
   71.17 -
   71.18 -static inline unsigned long pfn_to_mfn(unsigned long pfn)
   71.19 -{
   71.20 -	if (xen_feature(XENFEAT_auto_translated_physmap))
   71.21 -		return pfn;
   71.22 -	return phys_to_machine_mapping[(unsigned int)(pfn)] &
   71.23 -		~FOREIGN_FRAME_BIT;
   71.24 -}
   71.25 -
   71.26 -static inline int phys_to_machine_mapping_valid(unsigned long pfn)
   71.27 -{
   71.28 -	if (xen_feature(XENFEAT_auto_translated_physmap))
   71.29 -		return 1;
   71.30 -	return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
   71.31 -}
   71.32 -
   71.33 -static inline unsigned long mfn_to_pfn(unsigned long mfn)
   71.34 -{
   71.35 -	extern unsigned long max_mapnr;
   71.36 -	unsigned long pfn;
   71.37 -
   71.38 -	if (xen_feature(XENFEAT_auto_translated_physmap))
   71.39 -		return mfn;
   71.40 -
   71.41 -	if (unlikely((mfn >> machine_to_phys_order) != 0))
   71.42 -		return max_mapnr;
   71.43 -
   71.44 -	/* The array access can fail (e.g., device space beyond end of RAM). */
   71.45 -	asm (
   71.46 -		"1:	movl %1,%0\n"
   71.47 -		"2:\n"
   71.48 -		".section .fixup,\"ax\"\n"
   71.49 -		"3:	movl %2,%0\n"
   71.50 -		"	jmp  2b\n"
   71.51 -		".previous\n"
   71.52 -		".section __ex_table,\"a\"\n"
   71.53 -		"	.align 4\n"
   71.54 -		"	.long 1b,3b\n"
   71.55 -		".previous"
   71.56 -		: "=r" (pfn)
   71.57 -		: "m" (machine_to_phys_mapping[mfn]), "m" (max_mapnr) );
   71.58 -
   71.59 -	return pfn;
   71.60 -}
   71.61 -
   71.62 -/*
   71.63 - * We detect special mappings in one of two ways:
   71.64 - *  1. If the MFN is an I/O page then Xen will set the m2p entry
   71.65 - *     to be outside our maximum possible pseudophys range.
   71.66 - *  2. If the MFN belongs to a different domain then we will certainly
   71.67 - *     not have MFN in our p2m table. Conversely, if the page is ours,
   71.68 - *     then we'll have p2m(m2p(MFN))==MFN.
   71.69 - * If we detect a special mapping then it doesn't have a 'struct page'.
   71.70 - * We force !pfn_valid() by returning an out-of-range pointer.
   71.71 - *
   71.72 - * NB. These checks require that, for any MFN that is not in our reservation,
   71.73 - * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
   71.74 - * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
   71.75 - * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
   71.76 - *
   71.77 - * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
   71.78 - *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
   71.79 - *      require. In all the cases we care about, the FOREIGN_FRAME bit is
   71.80 - *      masked (e.g., pfn_to_mfn()) so behaviour there is correct.
   71.81 - */
   71.82 -static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
   71.83 -{
   71.84 -	extern unsigned long max_mapnr;
   71.85 -	unsigned long pfn = mfn_to_pfn(mfn);
   71.86 -	if ((pfn < max_mapnr)
   71.87 -	    && !xen_feature(XENFEAT_auto_translated_physmap)
   71.88 -	    && (phys_to_machine_mapping[pfn] != mfn))
   71.89 -		return max_mapnr; /* force !pfn_valid() */
   71.90 -	return pfn;
   71.91 -}
   71.92 -
   71.93 -static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
   71.94 -{
   71.95 -	if (xen_feature(XENFEAT_auto_translated_physmap)) {
   71.96 -		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
   71.97 -		return;
   71.98 -	}
   71.99 -	phys_to_machine_mapping[pfn] = mfn;
  71.100 -}
  71.101 -
  71.102 -/* Definitions for machine and pseudophysical addresses. */
  71.103 -#ifdef CONFIG_X86_PAE
  71.104 -typedef unsigned long long paddr_t;
  71.105 -typedef unsigned long long maddr_t;
  71.106 -#else
  71.107 -typedef unsigned long paddr_t;
  71.108 -typedef unsigned long maddr_t;
  71.109 -#endif
  71.110 -
  71.111 -static inline maddr_t phys_to_machine(paddr_t phys)
  71.112 -{
  71.113 -	maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
  71.114 -	machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
  71.115 -	return machine;
  71.116 -}
  71.117 -static inline paddr_t machine_to_phys(maddr_t machine)
  71.118 -{
  71.119 -	paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
  71.120 -	phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
  71.121 -	return phys;
  71.122 -}
  71.123 -
  71.124  /*
  71.125   * These are used to make use of C type-checking..
  71.126   */
  71.127 @@ -187,6 +70,8 @@ typedef struct { unsigned long pte_low, 
  71.128  typedef struct { unsigned long long pmd; } pmd_t;
  71.129  typedef struct { unsigned long long pgd; } pgd_t;
  71.130  typedef struct { unsigned long long pgprot; } pgprot_t;
  71.131 +#define pgprot_val(x)	((x).pgprot)
  71.132 +#include <asm/maddr.h>
  71.133  #define __pte(x) ({ unsigned long long _x = (x);        \
  71.134      if (_x & 1) _x = phys_to_machine(_x);               \
  71.135      ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); })
  71.136 @@ -227,6 +112,8 @@ static inline unsigned long long pte_val
  71.137  typedef struct { unsigned long pte_low; } pte_t;
  71.138  typedef struct { unsigned long pgd; } pgd_t;
  71.139  typedef struct { unsigned long pgprot; } pgprot_t;
  71.140 +#define pgprot_val(x)	((x).pgprot)
  71.141 +#include <asm/maddr.h>
  71.142  #define boot_pte_t pte_t /* or would you rather have a typedef */
  71.143  #define pte_val(x)	(((x).pte_low & 1) ? machine_to_phys((x).pte_low) : \
  71.144  			 (x).pte_low)
  71.145 @@ -252,9 +139,6 @@ static inline unsigned long pgd_val(pgd_
  71.146  #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
  71.147  #endif
  71.148  
  71.149 -#define pgprot_val(x)	((x).pgprot)
  71.150 -
  71.151 -#define __pte_ma(x)	((pte_t) { (x) } )
  71.152  #define __pgprot(x)	((pgprot_t) { (x) } )
  71.153  
  71.154  #endif /* !__ASSEMBLY__ */
  71.155 @@ -323,11 +207,6 @@ extern int page_is_ram(unsigned long pag
  71.156  	((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
  71.157  		 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
  71.158  
  71.159 -/* VIRT <-> MACHINE conversion */
  71.160 -#define virt_to_machine(v)	(phys_to_machine(__pa(v)))
  71.161 -#define virt_to_mfn(v)		(pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
  71.162 -#define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
  71.163 -
  71.164  #define __HAVE_ARCH_GATE_AREA 1
  71.165  
  71.166  #endif /* __KERNEL__ */
    72.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h	Tue Aug 22 14:45:49 2006 -0600
    72.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level.h	Wed Aug 23 11:11:27 2006 -0600
    72.3 @@ -45,7 +45,6 @@
    72.4  
    72.5  #define pte_none(x)		(!(x).pte_low)
    72.6  #define pfn_pte(pfn, prot)	__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
    72.7 -#define pfn_pte_ma(pfn, prot)	__pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
    72.8  #define pfn_pmd(pfn, prot)	__pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
    72.9  
   72.10  /*
    73.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h	Tue Aug 22 14:45:49 2006 -0600
    73.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level.h	Wed Aug 23 11:11:27 2006 -0600
    73.3 @@ -151,18 +151,6 @@ static inline int pte_none(pte_t pte)
    73.4  
    73.5  extern unsigned long long __supported_pte_mask;
    73.6  
    73.7 -static inline pte_t pfn_pte_ma(unsigned long page_nr, pgprot_t pgprot)
    73.8 -{
    73.9 -	pte_t pte;
   73.10 -
   73.11 -	pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | \
   73.12 -					(pgprot_val(pgprot) >> 32);
   73.13 -	pte.pte_high &= (__supported_pte_mask >> 32);
   73.14 -	pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & \
   73.15 -							__supported_pte_mask;
   73.16 -	return pte;
   73.17 -}
   73.18 -
   73.19  static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
   73.20  {
   73.21  	return pfn_pte_ma(pfn_to_mfn(page_nr), pgprot);
    74.1 --- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h	Tue Aug 22 14:45:49 2006 -0600
    74.2 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h	Wed Aug 23 11:11:27 2006 -0600
    74.3 @@ -59,6 +59,8 @@ extern int running_on_xen;
    74.4  extern shared_info_t *HYPERVISOR_shared_info;
    74.5  extern start_info_t *xen_start_info;
    74.6  
    74.7 +#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
    74.8 +
    74.9  void force_evtchn_callback(void);
   74.10  
   74.11  /* Turn jiffies into Xen system time. XXX Implement me. */
    75.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h	Tue Aug 22 14:45:49 2006 -0600
    75.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h	Wed Aug 23 11:11:27 2006 -0600
    75.3 @@ -46,11 +46,21 @@
    75.4  #define __STR(x) #x
    75.5  #define STR(x) __STR(x)
    75.6  
    75.7 +#ifdef CONFIG_XEN
    75.8 +#define HYPERCALL_STR(name)					\
    75.9 +	"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"
   75.10 +#else
   75.11 +#define HYPERCALL_STR(name)					\
   75.12 +	"mov hypercall_stubs,%%rax; "				\
   75.13 +	"add $("STR(__HYPERVISOR_##name)" * 32),%%rax; "	\
   75.14 +	"call *%%rax"
   75.15 +#endif
   75.16 +
   75.17  #define _hypercall0(type, name)			\
   75.18  ({						\
   75.19  	long __res;				\
   75.20  	asm volatile (				\
   75.21 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   75.22 +		HYPERCALL_STR(name)		\
   75.23  		: "=a" (__res)			\
   75.24  		:				\
   75.25  		: "memory" );			\
   75.26 @@ -61,7 +71,7 @@
   75.27  ({								\
   75.28  	long __res, __ign1;					\
   75.29  	asm volatile (						\
   75.30 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   75.31 +		HYPERCALL_STR(name)				\
   75.32  		: "=a" (__res), "=D" (__ign1)			\
   75.33  		: "1" ((long)(a1))				\
   75.34  		: "memory" );					\
   75.35 @@ -72,7 +82,7 @@
   75.36  ({								\
   75.37  	long __res, __ign1, __ign2;				\
   75.38  	asm volatile (						\
   75.39 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   75.40 +		HYPERCALL_STR(name)				\
   75.41  		: "=a" (__res), "=D" (__ign1), "=S" (__ign2)	\
   75.42  		: "1" ((long)(a1)), "2" ((long)(a2))		\
   75.43  		: "memory" );					\
   75.44 @@ -83,7 +93,7 @@
   75.45  ({								\
   75.46  	long __res, __ign1, __ign2, __ign3;			\
   75.47  	asm volatile (						\
   75.48 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   75.49 +		HYPERCALL_STR(name)				\
   75.50  		: "=a" (__res), "=D" (__ign1), "=S" (__ign2), 	\
   75.51  		"=d" (__ign3)					\
   75.52  		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   75.53 @@ -97,7 +107,7 @@
   75.54  	long __res, __ign1, __ign2, __ign3;			\
   75.55  	asm volatile (						\
   75.56  		"movq %7,%%r10; "				\
   75.57 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   75.58 +		HYPERCALL_STR(name)				\
   75.59  		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
   75.60  		"=d" (__ign3)					\
   75.61  		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   75.62 @@ -111,7 +121,7 @@
   75.63  	long __res, __ign1, __ign2, __ign3;			\
   75.64  	asm volatile (						\
   75.65  		"movq %7,%%r10; movq %8,%%r8; "			\
   75.66 -		"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"\
   75.67 +		HYPERCALL_STR(name)				\
   75.68  		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
   75.69  		"=d" (__ign3)					\
   75.70  		: "1" ((long)(a1)), "2" ((long)(a2)),		\
   75.71 @@ -355,6 +365,13 @@ HYPERVISOR_nmi_op(
   75.72  	return _hypercall2(int, nmi_op, op, arg);
   75.73  }
   75.74  
   75.75 +static inline unsigned long
   75.76 +HYPERVISOR_hvm_op(
   75.77 +    int op, void *arg)
   75.78 +{
   75.79 +    return _hypercall2(unsigned long, hvm_op, op, arg);
   75.80 +}
   75.81 +
   75.82  static inline int
   75.83  HYPERVISOR_callback_op(
   75.84  	int cmd, void *arg)
    76.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    76.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h	Wed Aug 23 11:11:27 2006 -0600
    76.3 @@ -0,0 +1,139 @@
    76.4 +#ifndef _X86_64_MADDR_H
    76.5 +#define _X86_64_MADDR_H
    76.6 +
    76.7 +#include <xen/features.h>
    76.8 +#include <xen/interface/xen.h>
    76.9 +
   76.10 +/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
   76.11 +#define INVALID_P2M_ENTRY	(~0UL)
   76.12 +#define FOREIGN_FRAME_BIT	(1UL<<63)
   76.13 +#define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
   76.14 +
   76.15 +#ifdef CONFIG_XEN
   76.16 +
   76.17 +extern unsigned long *phys_to_machine_mapping;
   76.18 +
   76.19 +#undef machine_to_phys_mapping
   76.20 +extern unsigned long *machine_to_phys_mapping;
   76.21 +extern unsigned int   machine_to_phys_order;
   76.22 +
   76.23 +static inline unsigned long pfn_to_mfn(unsigned long pfn)
   76.24 +{
   76.25 +	if (xen_feature(XENFEAT_auto_translated_physmap))
   76.26 +		return pfn;
   76.27 +	return phys_to_machine_mapping[(unsigned int)(pfn)] &
   76.28 +		~FOREIGN_FRAME_BIT;
   76.29 +}
   76.30 +
   76.31 +static inline int phys_to_machine_mapping_valid(unsigned long pfn)
   76.32 +{
   76.33 +	if (xen_feature(XENFEAT_auto_translated_physmap))
   76.34 +		return 1;
   76.35 +	return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
   76.36 +}
   76.37 +
   76.38 +static inline unsigned long mfn_to_pfn(unsigned long mfn)
   76.39 +{
   76.40 +	unsigned long pfn;
   76.41 +
   76.42 +	if (xen_feature(XENFEAT_auto_translated_physmap))
   76.43 +		return mfn;
   76.44 +
   76.45 +	if (unlikely((mfn >> machine_to_phys_order) != 0))
   76.46 +		return end_pfn;
   76.47 +
   76.48 +	/* The array access can fail (e.g., device space beyond end of RAM). */
   76.49 +	asm (
   76.50 +		"1:	movq %1,%0\n"
   76.51 +		"2:\n"
   76.52 +		".section .fixup,\"ax\"\n"
   76.53 +		"3:	movq %2,%0\n"
   76.54 +		"	jmp  2b\n"
   76.55 +		".previous\n"
   76.56 +		".section __ex_table,\"a\"\n"
   76.57 +		"	.align 8\n"
   76.58 +		"	.quad 1b,3b\n"
   76.59 +		".previous"
   76.60 +		: "=r" (pfn)
   76.61 +		: "m" (machine_to_phys_mapping[mfn]), "m" (end_pfn) );
   76.62 +
   76.63 +	return pfn;
   76.64 +}
   76.65 +
   76.66 +/*
   76.67 + * We detect special mappings in one of two ways:
   76.68 + *  1. If the MFN is an I/O page then Xen will set the m2p entry
   76.69 + *     to be outside our maximum possible pseudophys range.
   76.70 + *  2. If the MFN belongs to a different domain then we will certainly
   76.71 + *     not have MFN in our p2m table. Conversely, if the page is ours,
   76.72 + *     then we'll have p2m(m2p(MFN))==MFN.
   76.73 + * If we detect a special mapping then it doesn't have a 'struct page'.
   76.74 + * We force !pfn_valid() by returning an out-of-range pointer.
   76.75 + *
   76.76 + * NB. These checks require that, for any MFN that is not in our reservation,
   76.77 + * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
   76.78 + * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
   76.79 + * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
   76.80 + *
   76.81 + * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
   76.82 + *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
   76.83 + *      require. In all the cases we care about, the FOREIGN_FRAME bit is
   76.84 + *      masked (e.g., pfn_to_mfn()) so behaviour there is correct.
   76.85 + */
   76.86 +static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
   76.87 +{
   76.88 +	unsigned long pfn = mfn_to_pfn(mfn);
   76.89 +	if ((pfn < end_pfn)
   76.90 +	    && !xen_feature(XENFEAT_auto_translated_physmap)
   76.91 +	    && (phys_to_machine_mapping[pfn] != mfn))
   76.92 +		return end_pfn; /* force !pfn_valid() */
   76.93 +	return pfn;
   76.94 +}
   76.95 +
   76.96 +static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
   76.97 +{
   76.98 +	if (xen_feature(XENFEAT_auto_translated_physmap)) {
   76.99 +		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
  76.100 +		return;
  76.101 +	}
  76.102 +	phys_to_machine_mapping[pfn] = mfn;
  76.103 +}
  76.104 +
  76.105 +#else /* !CONFIG_XEN */
  76.106 +
  76.107 +#define pfn_to_mfn(pfn) (pfn)
  76.108 +#define mfn_to_pfn(mfn) (mfn)
  76.109 +#define mfn_to_local_pfn(mfn) (mfn)
  76.110 +#define set_phys_to_machine(pfn, mfn) BUG_ON((pfn) != (mfn))
  76.111 +#define phys_to_machine_mapping_valid(pfn) (1)
  76.112 +
  76.113 +#endif /* !CONFIG_XEN */
  76.114 +
  76.115 +/* Definitions for machine and pseudophysical addresses. */
  76.116 +typedef unsigned long paddr_t;
  76.117 +typedef unsigned long maddr_t;
  76.118 +
  76.119 +static inline maddr_t phys_to_machine(paddr_t phys)
  76.120 +{
  76.121 +	maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
  76.122 +	machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
  76.123 +	return machine;
  76.124 +}
  76.125 +
  76.126 +static inline paddr_t machine_to_phys(maddr_t machine)
  76.127 +{
  76.128 +	paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
  76.129 +	phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
  76.130 +	return phys;
  76.131 +}
  76.132 +
  76.133 +/* VIRT <-> MACHINE conversion */
  76.134 +#define virt_to_machine(v)	(phys_to_machine(__pa(v)))
  76.135 +#define virt_to_mfn(v)		(pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
  76.136 +#define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
  76.137 +
  76.138 +#define __pte_ma(x)     ((pte_t) { (x) } )
  76.139 +#define pfn_pte_ma(pfn, prot)	__pte_ma((((pfn) << PAGE_SHIFT) | pgprot_val(prot)) & __supported_pte_mask)
  76.140 +
  76.141 +#endif /* _X86_64_MADDR_H */
  76.142 +
    77.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h	Tue Aug 22 14:45:49 2006 -0600
    77.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h	Wed Aug 23 11:11:27 2006 -0600
    77.3 @@ -7,7 +7,6 @@
    77.4  #include <linux/kernel.h>
    77.5  #include <linux/types.h>
    77.6  #include <asm/bug.h>
    77.7 -#include <xen/features.h>
    77.8  #endif
    77.9  #include <xen/interface/xen.h> 
   77.10  #include <xen/foreign_page.h>
   77.11 @@ -69,6 +68,8 @@
   77.12  
   77.13  extern unsigned long end_pfn;
   77.14  
   77.15 +#include <asm/maddr.h>
   77.16 +
   77.17  void clear_page(void *);
   77.18  void copy_page(void *, void *);
   77.19  
   77.20 @@ -78,118 +79,6 @@ void copy_page(void *, void *);
   77.21  #define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
   77.22  #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
   77.23  
   77.24 -/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
   77.25 -#define INVALID_P2M_ENTRY	(~0UL)
   77.26 -#define FOREIGN_FRAME_BIT	(1UL<<63)
   77.27 -#define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
   77.28 -
   77.29 -extern unsigned long *phys_to_machine_mapping;
   77.30 -
   77.31 -#undef machine_to_phys_mapping
   77.32 -extern unsigned long *machine_to_phys_mapping;
   77.33 -extern unsigned int   machine_to_phys_order;
   77.34 -
   77.35 -static inline unsigned long pfn_to_mfn(unsigned long pfn)
   77.36 -{
   77.37 -	if (xen_feature(XENFEAT_auto_translated_physmap))
   77.38 -		return pfn;
   77.39 -	return phys_to_machine_mapping[(unsigned int)(pfn)] &
   77.40 -		~FOREIGN_FRAME_BIT;
   77.41 -}
   77.42 -
   77.43 -static inline int phys_to_machine_mapping_valid(unsigned long pfn)
   77.44 -{
   77.45 -	if (xen_feature(XENFEAT_auto_translated_physmap))
   77.46 -		return 1;
   77.47 -	return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
   77.48 -}
   77.49 -
   77.50 -static inline unsigned long mfn_to_pfn(unsigned long mfn)
   77.51 -{
   77.52 -	unsigned long pfn;
   77.53 -
   77.54 -	if (xen_feature(XENFEAT_auto_translated_physmap))
   77.55 -		return mfn;
   77.56 -
   77.57 -	if (unlikely((mfn >> machine_to_phys_order) != 0))
   77.58 -		return end_pfn;
   77.59 -
   77.60 -	/* The array access can fail (e.g., device space beyond end of RAM). */
   77.61 -	asm (
   77.62 -		"1:	movq %1,%0\n"
   77.63 -		"2:\n"
   77.64 -		".section .fixup,\"ax\"\n"
   77.65 -		"3:	movq %2,%0\n"
   77.66 -		"	jmp  2b\n"
   77.67 -		".previous\n"
   77.68 -		".section __ex_table,\"a\"\n"
   77.69 -		"	.align 8\n"
   77.70 -		"	.quad 1b,3b\n"
   77.71 -		".previous"
   77.72 -		: "=r" (pfn)
   77.73 -		: "m" (machine_to_phys_mapping[mfn]), "m" (end_pfn) );
   77.74 -
   77.75 -	return pfn;
   77.76 -}
   77.77 -
   77.78 -/*
   77.79 - * We detect special mappings in one of two ways:
   77.80 - *  1. If the MFN is an I/O page then Xen will set the m2p entry
   77.81 - *     to be outside our maximum possible pseudophys range.
   77.82 - *  2. If the MFN belongs to a different domain then we will certainly
   77.83 - *     not have MFN in our p2m table. Conversely, if the page is ours,
   77.84 - *     then we'll have p2m(m2p(MFN))==MFN.
   77.85 - * If we detect a special mapping then it doesn't have a 'struct page'.
   77.86 - * We force !pfn_valid() by returning an out-of-range pointer.
   77.87 - *
   77.88 - * NB. These checks require that, for any MFN that is not in our reservation,
   77.89 - * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
   77.90 - * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
   77.91 - * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
   77.92 - *
   77.93 - * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
   77.94 - *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
   77.95 - *      require. In all the cases we care about, the FOREIGN_FRAME bit is
   77.96 - *      masked (e.g., pfn_to_mfn()) so behaviour there is correct.
   77.97 - */
   77.98 -static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
   77.99 -{
  77.100 -	unsigned long pfn = mfn_to_pfn(mfn);
  77.101 -	if ((pfn < end_pfn)
  77.102 -	    && !xen_feature(XENFEAT_auto_translated_physmap)
  77.103 -	    && (phys_to_machine_mapping[pfn] != mfn))
  77.104 -		return end_pfn; /* force !pfn_valid() */
  77.105 -	return pfn;
  77.106 -}
  77.107 -
  77.108 -
  77.109 -static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
  77.110 -{
  77.111 -	if (xen_feature(XENFEAT_auto_translated_physmap)) {
  77.112 -		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
  77.113 -		return;
  77.114 -	}
  77.115 -	phys_to_machine_mapping[pfn] = mfn;
  77.116 -}
  77.117 -
  77.118 -/* Definitions for machine and pseudophysical addresses. */
  77.119 -typedef unsigned long paddr_t;
  77.120 -typedef unsigned long maddr_t;
  77.121 -
  77.122 -static inline maddr_t phys_to_machine(paddr_t phys)
  77.123 -{
  77.124 -	maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
  77.125 -	machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
  77.126 -	return machine;
  77.127 -}
  77.128 -
  77.129 -static inline paddr_t machine_to_phys(maddr_t machine)
  77.130 -{
  77.131 -	paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
  77.132 -	phys = (phys << PAGE_SHIFT) | (machine & ~PAGE_MASK);
  77.133 -	return phys;
  77.134 -}
  77.135 -
  77.136  /*
  77.137   * These are used to make use of C type-checking..
  77.138   */
  77.139 @@ -228,8 +117,6 @@ static inline unsigned long pgd_val(pgd_
  77.140  
  77.141  #define pgprot_val(x)	((x).pgprot)
  77.142  
  77.143 -#define __pte_ma(x)     ((pte_t) { (x) } )
  77.144 -
  77.145  static inline pte_t __pte(unsigned long x)
  77.146  {
  77.147  	if (x & 1) x = phys_to_machine(x);
  77.148 @@ -310,11 +197,6 @@ static inline pgd_t __pgd(unsigned long 
  77.149  #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
  77.150  #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
  77.151  
  77.152 -/* VIRT <-> MACHINE conversion */
  77.153 -#define virt_to_machine(v)	(phys_to_machine(__pa(v)))
  77.154 -#define virt_to_mfn(v)		(pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
  77.155 -#define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
  77.156 -
  77.157  #define VM_DATA_DEFAULT_FLAGS \
  77.158  	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
  77.159  	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
    78.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h	Tue Aug 22 14:45:49 2006 -0600
    78.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h	Wed Aug 23 11:11:27 2006 -0600
    78.3 @@ -206,7 +206,7 @@ static inline pte_t ptep_get_and_clear_f
    78.4  #define _PAGE_NX        (1UL<<_PAGE_BIT_NX)
    78.5  
    78.6  #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
    78.7 -#define _KERNPG_TABLE	_PAGE_TABLE
    78.8 +#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
    78.9  
   78.10  #define _PAGE_CHG_MASK	(PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
   78.11  
   78.12 @@ -219,22 +219,21 @@ static inline pte_t ptep_get_and_clear_f
   78.13  #define PAGE_READONLY	__pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
   78.14  #define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
   78.15  #define __PAGE_KERNEL \
   78.16 -	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER )
   78.17 +	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
   78.18  #define __PAGE_KERNEL_EXEC \
   78.19 -	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_USER )
   78.20 +	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
   78.21  #define __PAGE_KERNEL_NOCACHE \
   78.22 -	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER )
   78.23 +	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX)
   78.24  #define __PAGE_KERNEL_RO \
   78.25 -	(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | _PAGE_USER )
   78.26 +	(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
   78.27  #define __PAGE_KERNEL_VSYSCALL \
   78.28 -	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_USER )
   78.29 +	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
   78.30  #define __PAGE_KERNEL_VSYSCALL_NOCACHE \
   78.31 -	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD | _PAGE_USER )
   78.32 +	(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PCD)
   78.33  #define __PAGE_KERNEL_LARGE \
   78.34 -	(__PAGE_KERNEL | _PAGE_PSE | _PAGE_USER )
   78.35 +	(__PAGE_KERNEL | _PAGE_PSE)
   78.36  #define __PAGE_KERNEL_LARGE_EXEC \
   78.37 -	(__PAGE_KERNEL_EXEC | _PAGE_PSE | _PAGE_USER )
   78.38 -
   78.39 +	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
   78.40  
   78.41  /*
   78.42   * We don't support GLOBAL page in xenolinux64
   78.43 @@ -312,7 +311,6 @@ static inline pte_t pfn_pte(unsigned lon
   78.44  	return pte;
   78.45  }
   78.46  
   78.47 -#define pfn_pte_ma(pfn, prot)	__pte_ma((((pfn) << PAGE_SHIFT) | pgprot_val(prot)) & __supported_pte_mask)
   78.48  /*
   78.49   * The following only work if pte_present() is true.
   78.50   * Undefined behaviour if not..
   78.51 @@ -424,7 +422,7 @@ static inline pud_t *pud_offset_k(pgd_t 
   78.52     can temporarily clear it. */
   78.53  #define pmd_present(x)	(pmd_val(x))
   78.54  #define pmd_clear(xp)	do { set_pmd(xp, __pmd(0)); } while (0)
   78.55 -#define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
   78.56 +#define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
   78.57  #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
   78.58  #define pmd_pfn(x)  ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
   78.59  
    79.1 --- a/linux-2.6-xen-sparse/include/xen/balloon.h	Tue Aug 22 14:45:49 2006 -0600
    79.2 +++ b/linux-2.6-xen-sparse/include/xen/balloon.h	Wed Aug 23 11:11:27 2006 -0600
    79.3 @@ -38,20 +38,24 @@
    79.4   * Inform the balloon driver that it should allow some slop for device-driver
    79.5   * memory activities.
    79.6   */
    79.7 -extern void
    79.8 +void
    79.9  balloon_update_driver_allowance(
   79.10  	long delta);
   79.11  
   79.12  /* Allocate an empty low-memory page range. */
   79.13 -extern struct page *
   79.14 +struct page *
   79.15  balloon_alloc_empty_page_range(
   79.16  	unsigned long nr_pages);
   79.17  
   79.18  /* Deallocate an empty page range, adding to the balloon. */
   79.19 -extern void
   79.20 +void
   79.21  balloon_dealloc_empty_page_range(
   79.22  	struct page *page, unsigned long nr_pages);
   79.23  
   79.24 +void
   79.25 +balloon_release_driver_page(
   79.26 +	struct page *page);
   79.27 +
   79.28  /*
   79.29   * Prevent the balloon driver from changing the memory reservation during
   79.30   * a driver critical region.
    80.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    80.2 +++ b/linux-2.6-xen-sparse/include/xen/hvm.h	Wed Aug 23 11:11:27 2006 -0600
    80.3 @@ -0,0 +1,24 @@
    80.4 +/* Simple wrappers around HVM functions */
    80.5 +#ifndef XEN_HVM_H__
    80.6 +#define XEN_HVM_H__
    80.7 +
    80.8 +#include <xen/interface/hvm/params.h>
    80.9 +#include <asm/hypercall.h>
   80.10 +
   80.11 +static inline unsigned long hvm_get_parameter(int idx)
   80.12 +{
   80.13 +	struct xen_hvm_param xhv;
   80.14 +	int r;
   80.15 +
   80.16 +	xhv.domid = DOMID_SELF;
   80.17 +	xhv.index = idx;
   80.18 +	r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
   80.19 +	if (r < 0) {
   80.20 +		printk(KERN_ERR "cannot get hvm parameter %d: %d.\n",
   80.21 +		       idx, r);
   80.22 +		return 0;
   80.23 +	}
   80.24 +	return xhv.value;
   80.25 +}
   80.26 +
   80.27 +#endif /* XEN_HVM_H__ */
    81.1 --- a/linux-2.6-xen-sparse/include/xen/xenbus.h	Tue Aug 22 14:45:49 2006 -0600
    81.2 +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h	Wed Aug 23 11:11:27 2006 -0600
    81.3 @@ -274,7 +274,7 @@ int xenbus_free_evtchn(struct xenbus_dev
    81.4  
    81.5  /**
    81.6   * Return the state of the driver rooted at the given store path, or
    81.7 - * XenbusStateClosed if no state can be read.
    81.8 + * XenbusStateUnknown if no state can be read.
    81.9   */
   81.10  enum xenbus_state xenbus_read_driver_state(const char *path);
   81.11  
   81.12 @@ -295,5 +295,6 @@ void xenbus_dev_error(struct xenbus_devi
   81.13  void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
   81.14  		      ...);
   81.15  
   81.16 +int __init xenbus_dev_init(void);
   81.17  
   81.18  #endif /* _XEN_XENBUS_H */
    82.1 --- a/linux-2.6-xen-sparse/mm/memory.c	Tue Aug 22 14:45:49 2006 -0600
    82.2 +++ b/linux-2.6-xen-sparse/mm/memory.c	Wed Aug 23 11:11:27 2006 -0600
    82.3 @@ -390,7 +390,7 @@ struct page *vm_normal_page(struct vm_ar
    82.4  
    82.5  	if (vma->vm_flags & VM_PFNMAP) {
    82.6  		unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
    82.7 -		if (pfn == vma->vm_pgoff + off)
    82.8 +		if ((pfn == vma->vm_pgoff + off) || !pfn_valid(pfn))
    82.9  			return NULL;
   82.10  		if (!is_cow_mapping(vma->vm_flags))
   82.11  			return NULL;
   82.12 @@ -405,8 +405,7 @@ struct page *vm_normal_page(struct vm_ar
   82.13  	 * Remove this test eventually!
   82.14  	 */
   82.15  	if (unlikely(!pfn_valid(pfn))) {
   82.16 -		if (!(vma->vm_flags & VM_RESERVED))
   82.17 -			print_bad_pte(vma, pte, addr);
   82.18 +		print_bad_pte(vma, pte, addr);
   82.19  		return NULL;
   82.20  	}
   82.21  
    83.1 --- a/linux-2.6-xen-sparse/net/core/dev.c	Tue Aug 22 14:45:49 2006 -0600
    83.2 +++ b/linux-2.6-xen-sparse/net/core/dev.c	Wed Aug 23 11:11:27 2006 -0600
    83.3 @@ -1093,11 +1093,6 @@ int skb_checksum_help(struct sk_buff *sk
    83.4  		goto out_set_summed;
    83.5  
    83.6  	if (unlikely(skb_shinfo(skb)->gso_size)) {
    83.7 -		static int warned;
    83.8 -
    83.9 -		WARN_ON(!warned);
   83.10 -		warned = 1;
   83.11 -
   83.12  		/* Let GSO fix up the checksum. */
   83.13  		goto out_set_summed;
   83.14  	}
   83.15 @@ -1147,11 +1142,6 @@ struct sk_buff *skb_gso_segment(struct s
   83.16  	__skb_pull(skb, skb->mac_len);
   83.17  
   83.18  	if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
   83.19 -		static int warned;
   83.20 -
   83.21 -		WARN_ON(!warned);
   83.22 -		warned = 1;
   83.23 -
   83.24  		if (skb_header_cloned(skb) &&
   83.25  		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
   83.26  			return ERR_PTR(err);
    84.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    84.2 +++ b/patches/linux-2.6.16.13/net-gso-4-kill-warnon.patch	Wed Aug 23 11:11:27 2006 -0600
    84.3 @@ -0,0 +1,29 @@
    84.4 +508c578140642a641bb9b888369719c510ae2a00
    84.5 +diff --git a/net/core/dev.c b/net/core/dev.c
    84.6 +index e814a89..240773b 100644
    84.7 +--- a/net/core/dev.c
    84.8 ++++ b/net/core/dev.c
    84.9 +@@ -1087,11 +1087,6 @@ int skb_checksum_help(struct sk_buff *sk
   84.10 + 		goto out_set_summed;
   84.11 + 
   84.12 + 	if (unlikely(skb_shinfo(skb)->gso_size)) {
   84.13 +-		static int warned;
   84.14 +-
   84.15 +-		WARN_ON(!warned);
   84.16 +-		warned = 1;
   84.17 +-
   84.18 + 		/* Let GSO fix up the checksum. */
   84.19 + 		goto out_set_summed;
   84.20 + 	}
   84.21 +@@ -1141,11 +1136,6 @@ struct sk_buff *skb_gso_segment(struct s
   84.22 + 	__skb_pull(skb, skb->mac_len);
   84.23 + 
   84.24 + 	if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
   84.25 +-		static int warned;
   84.26 +-
   84.27 +-		WARN_ON(!warned);
   84.28 +-		warned = 1;
   84.29 +-
   84.30 + 		if (skb_header_cloned(skb) &&
   84.31 + 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
   84.32 + 			return ERR_PTR(err);
    85.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    85.2 +++ b/patches/linux-2.6.16.13/series	Wed Aug 23 11:11:27 2006 -0600
    85.3 @@ -0,0 +1,22 @@
    85.4 +blktap-aio-16_03_06.patch
    85.5 +device_bind.patch
    85.6 +fix-hz-suspend.patch
    85.7 +fix-ide-cd-pio-mode.patch
    85.8 +i386-mach-io-check-nmi.patch
    85.9 +ipv6-no-autoconf.patch
   85.10 +net-csum.patch
   85.11 +net-gso-0-base.patch
   85.12 +net-gso-1-check-dodgy.patch
   85.13 +net-gso-2-checksum-fix.patch
   85.14 +net-gso-3-fix-errorcheck.patch
   85.15 +net-gso-4-kill-warnon.patch
   85.16 +pmd-shared.patch
   85.17 +rcu_needs_cpu.patch
   85.18 +rename-TSS_sysenter_esp0-SYSENTER_stack_esp0.patch
   85.19 +smp-alts.patch
   85.20 +tpm_plugin_2.6.17.patch
   85.21 +x86-increase-interrupt-vector-range.patch
   85.22 +xen-hotplug.patch
   85.23 +xenoprof-generic.patch
   85.24 +x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
   85.25 +x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch
    86.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    86.2 +++ b/patches/linux-2.6.16.13/x86-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch	Wed Aug 23 11:11:27 2006 -0600
    86.3 @@ -0,0 +1,174 @@
    86.4 +Taken from 2.6.18-rc4-mm1.
    86.5 +
    86.6 +From: Jeremy Fitzhardinge <jeremy@xensource.com>
    86.7 +
    86.8 +This patch will pack any .note.* section into a PT_NOTE segment in the output
    86.9 +file.
   86.10 +
   86.11 +To do this, we tell ld that we need a PT_NOTE segment.  This requires us to
   86.12 +start explicitly mapping sections to segments, so we also need to explicitly
   86.13 +create PT_LOAD segments for text and data, and map the sections to them
   86.14 +appropriately.  Fortunately, each section will default to its previous
   86.15 +section's segment, so it doesn't take many changes to vmlinux.lds.S.
   86.16 +
   86.17 +This only changes i386 for now, but I presume the corresponding changes for
   86.18 +other architectures will be as simple.
   86.19 +
   86.20 +This change also adds <linux/elfnote.h>, which defines C and Assembler macros
   86.21 +for actually creating ELF notes.
   86.22 +
   86.23 +Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
   86.24 +Cc: Eric W. Biederman <ebiederm@xmission.com>
   86.25 +Cc: Hollis Blanchard <hollisb@us.ibm.com>
   86.26 +Signed-off-by: Andrew Morton <akpm@osdl.org>
   86.27 +---
   86.28 +
   86.29 + arch/i386/kernel/vmlinux.lds.S    |   12 +++
   86.30 + include/asm-generic/vmlinux.lds.h |    3 
   86.31 + include/linux/elfnote.h           |   88 ++++++++++++++++++++++++++++
   86.32 + 3 files changed, 101 insertions(+), 2 deletions(-)
   86.33 +
   86.34 +diff -puN arch/i386/kernel/vmlinux.lds.S~x86-put-note-sections-into-a-pt_note-segment-in-vmlinux arch/i386/kernel/vmlinux.lds.S
   86.35 +--- a/arch/i386/kernel/vmlinux.lds.S~x86-put-note-sections-into-a-pt_note-segment-in-vmlinux
   86.36 ++++ a/arch/i386/kernel/vmlinux.lds.S
   86.37 +@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386"
   86.38 + OUTPUT_ARCH(i386)
   86.39 + ENTRY(phys_startup_32)
   86.40 + jiffies = jiffies_64;
   86.41 ++
   86.42 ++PHDRS {
   86.43 ++	text PT_LOAD FLAGS(5);	/* R_E */
   86.44 ++	data PT_LOAD FLAGS(7);	/* RWE */
   86.45 ++	note PT_NOTE FLAGS(4);	/* R__ */
   86.46 ++}
   86.47 + SECTIONS
   86.48 + {
   86.49 +   . = __KERNEL_START;
   86.50 +@@ -26,7 +32,7 @@ SECTIONS
   86.51 + 	KPROBES_TEXT
   86.52 + 	*(.fixup)
   86.53 + 	*(.gnu.warning)
   86.54 +-	} = 0x9090
   86.55 ++	} :text = 0x9090
   86.56 + 
   86.57 +   _etext = .;			/* End of text section */
   86.58 + 
   86.59 +@@ -50,7 +56,7 @@ SECTIONS
   86.60 +   .data : AT(ADDR(.data) - LOAD_OFFSET) {	/* Data */
   86.61 + 	*(.data)
   86.62 + 	CONSTRUCTORS
   86.63 +-	}
   86.64 ++	} :data
   86.65 + 
   86.66 +   . = ALIGN(4096);
   86.67 +   __nosave_begin = .;
   86.68 +@@ -186,4 +192,6 @@ SECTIONS
   86.69 +   STABS_DEBUG
   86.70 + 
   86.71 +   DWARF_DEBUG
   86.72 ++
   86.73 ++  NOTES
   86.74 + }
   86.75 +diff -puN include/asm-generic/vmlinux.lds.h~x86-put-note-sections-into-a-pt_note-segment-in-vmlinux include/asm-generic/vmlinux.lds.h
   86.76 +--- a/include/asm-generic/vmlinux.lds.h~x86-put-note-sections-into-a-pt_note-segment-in-vmlinux
   86.77 ++++ a/include/asm-generic/vmlinux.lds.h
   86.78 +@@ -194,3 +194,6 @@
   86.79 + 		.stab.index 0 : { *(.stab.index) }			\
   86.80 + 		.stab.indexstr 0 : { *(.stab.indexstr) }		\
   86.81 + 		.comment 0 : { *(.comment) }
   86.82 ++
   86.83 ++#define NOTES								\
   86.84 ++		.notes : { *(.note.*) } :note
   86.85 +diff -puN /dev/null include/linux/elfnote.h
   86.86 +--- /dev/null
   86.87 ++++ a/include/linux/elfnote.h
   86.88 +@@ -0,0 +1,88 @@
   86.89 ++#ifndef _LINUX_ELFNOTE_H
   86.90 ++#define _LINUX_ELFNOTE_H
   86.91 ++/*
   86.92 ++ * Helper macros to generate ELF Note structures, which are put into a
   86.93 ++ * PT_NOTE segment of the final vmlinux image.  These are useful for
   86.94 ++ * including name-value pairs of metadata into the kernel binary (or
   86.95 ++ * modules?) for use by external programs.
   86.96 ++ *
   86.97 ++ * Each note has three parts: a name, a type and a desc.  The name is
   86.98 ++ * intended to distinguish the note's originator, so it would be a
   86.99 ++ * company, project, subsystem, etc; it must be in a suitable form for
  86.100 ++ * use in a section name.  The type is an integer which is used to tag
  86.101 ++ * the data, and is considered to be within the "name" namespace (so
  86.102 ++ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42).  The
  86.103 ++ * "desc" field is the actual data.  There are no constraints on the
  86.104 ++ * desc field's contents, though typically they're fairly small.
  86.105 ++ *
  86.106 ++ * All notes from a given NAME are put into a section named
  86.107 ++ * .note.NAME.  When the kernel image is finally linked, all the notes
  86.108 ++ * are packed into a single .notes section, which is mapped into the
  86.109 ++ * PT_NOTE segment.  Because notes for a given name are grouped into
  86.110 ++ * the same section, they'll all be adjacent the output file.
  86.111 ++ *
  86.112 ++ * This file defines macros for both C and assembler use.  Their
  86.113 ++ * syntax is slightly different, but they're semantically similar.
  86.114 ++ *
  86.115 ++ * See the ELF specification for more detail about ELF notes.
  86.116 ++ */
  86.117 ++
  86.118 ++#ifdef __ASSEMBLER__
  86.119 ++/*
  86.120 ++ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
  86.121 ++ * turn out to be the same size and shape), followed by the name and
  86.122 ++ * desc data with appropriate padding.  The 'desc' argument includes
  86.123 ++ * the assembler pseudo op defining the type of the data: .asciz
  86.124 ++ * "hello, world"
  86.125 ++ */
  86.126 ++.macro ELFNOTE name type desc:vararg
  86.127 ++.pushsection ".note.\name"
  86.128 ++  .align 4
  86.129 ++  .long 2f - 1f			/* namesz */
  86.130 ++  .long 4f - 3f			/* descsz */
  86.131 ++  .long \type
  86.132 ++1:.asciz "\name"
  86.133 ++2:.align 4
  86.134 ++3:\desc
  86.135 ++4:.align 4
  86.136 ++.popsection
  86.137 ++.endm
  86.138 ++#else	/* !__ASSEMBLER__ */
  86.139 ++#include <linux/elf.h>
  86.140 ++/*
  86.141 ++ * Use an anonymous structure which matches the shape of
  86.142 ++ * Elf{32,64}_Nhdr, but includes the name and desc data.  The size and
  86.143 ++ * type of name and desc depend on the macro arguments.  "name" must
  86.144 ++ * be a literal string, and "desc" must be passed by value.  You may
  86.145 ++ * only define one note per line, since __LINE__ is used to generate
  86.146 ++ * unique symbols.
  86.147 ++ */
  86.148 ++#define _ELFNOTE_PASTE(a,b)	a##b
  86.149 ++#define _ELFNOTE(size, name, unique, type, desc)			\
  86.150 ++	static const struct {						\
  86.151 ++		struct elf##size##_note _nhdr;				\
  86.152 ++		unsigned char _name[sizeof(name)]			\
  86.153 ++		__attribute__((aligned(sizeof(Elf##size##_Word))));	\
  86.154 ++		typeof(desc) _desc					\
  86.155 ++			     __attribute__((aligned(sizeof(Elf##size##_Word)))); \
  86.156 ++	} _ELFNOTE_PASTE(_note_, unique)				\
  86.157 ++		__attribute_used__					\
  86.158 ++		__attribute__((section(".note." name),			\
  86.159 ++			       aligned(sizeof(Elf##size##_Word)),	\
  86.160 ++			       unused)) = {				\
  86.161 ++		{							\
  86.162 ++			sizeof(name),					\
  86.163 ++			sizeof(desc),					\
  86.164 ++			type,						\
  86.165 ++		},							\
  86.166 ++		name,							\
  86.167 ++		desc							\
  86.168 ++	}
  86.169 ++#define ELFNOTE(size, name, type, desc)		\
  86.170 ++	_ELFNOTE(size, name, __LINE__, type, desc)
  86.171 ++
  86.172 ++#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
  86.173 ++#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
  86.174 ++#endif	/* __ASSEMBLER__ */
  86.175 ++
  86.176 ++#endif /* _LINUX_ELFNOTE_H */
  86.177 +_
    87.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    87.2 +++ b/patches/linux-2.6.16.13/x86_64-put-note-sections-into-a-pt_note-segment-in-vmlinux.patch	Wed Aug 23 11:11:27 2006 -0600
    87.3 @@ -0,0 +1,60 @@
    87.4 +diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
    87.5 +index 7c4de31..ef418b3 100644
    87.6 +--- a/arch/x86_64/kernel/vmlinux.lds.S
    87.7 ++++ b/arch/x86_64/kernel/vmlinux.lds.S
    87.8 +@@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86
    87.9 + OUTPUT_ARCH(i386:x86-64)
   87.10 + ENTRY(phys_startup_64)
   87.11 + jiffies_64 = jiffies;
   87.12 ++PHDRS {
   87.13 ++	text PT_LOAD FLAGS(5);	/* R_E */
   87.14 ++	data PT_LOAD FLAGS(7);	/* RWE */
   87.15 ++	user PT_LOAD FLAGS(7);	/* RWE */
   87.16 ++	note PT_NOTE FLAGS(4);	/* R__ */
   87.17 ++}
   87.18 + SECTIONS
   87.19 + {
   87.20 +   . = __START_KERNEL;
   87.21 +@@ -31,7 +37,7 @@ SECTIONS
   87.22 + 	KPROBES_TEXT
   87.23 + 	*(.fixup)
   87.24 + 	*(.gnu.warning)
   87.25 +-	} = 0x9090
   87.26 ++	} :text = 0x9090
   87.27 +   				/* out-of-line lock text */
   87.28 +   .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) { *(.text.lock) }
   87.29 + 
   87.30 +@@ -57,7 +63,7 @@ #endif
   87.31 +   .data : AT(ADDR(.data) - LOAD_OFFSET) {
   87.32 + 	*(.data)
   87.33 + 	CONSTRUCTORS
   87.34 +-	}
   87.35 ++	} :data
   87.36 + 
   87.37 +   _edata = .;			/* End of data section */
   87.38 + 
   87.39 +@@ -89,7 +95,7 @@ #define VVIRT_OFFSET (VSYSCALL_ADDR - VS
   87.40 + #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
   87.41 + 
   87.42 +   . = VSYSCALL_ADDR;
   87.43 +-  .vsyscall_0 :	 AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) }
   87.44 ++  .vsyscall_0 :	 AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user
   87.45 +   __vsyscall_0 = VSYSCALL_VIRT_ADDR;
   87.46 + 
   87.47 +   . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
   87.48 +@@ -132,7 +138,7 @@ #undef VVIRT
   87.49 +   . = ALIGN(8192);		/* init_task */
   87.50 +   .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
   87.51 + 	*(.data.init_task)
   87.52 +-  }
   87.53 ++  } :data
   87.54 + 
   87.55 +   . = ALIGN(4096);
   87.56 +   .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
   87.57 +@@ -235,4 +241,6 @@ #endif
   87.58 +   STABS_DEBUG
   87.59 + 
   87.60 +   DWARF_DEBUG
   87.61 ++
   87.62 ++  NOTES
   87.63 + }
    88.1 --- a/tools/blktap/lib/Makefile	Tue Aug 22 14:45:49 2006 -0600
    88.2 +++ b/tools/blktap/lib/Makefile	Wed Aug 23 11:11:27 2006 -0600
    88.3 @@ -61,7 +61,7 @@ libblktap.a: $(OBJS)
    88.4  	      -o libblktap.so.$(MAJOR).$(MINOR) $^ $(LIBS)
    88.5  	ln -sf libblktap.so.$(MAJOR).$(MINOR) libblktap.so.$(MAJOR)
    88.6  	ln -sf libblktap.so.$(MAJOR) libblktap.so
    88.7 -	ar rc $@ libblktap.so
    88.8 +	$(AR) rc $@ libblktap.so
    88.9  
   88.10  .PHONY: TAGS all build clean install libblktap
   88.11  
    89.1 --- a/tools/examples/xen-network-common.sh	Tue Aug 22 14:45:49 2006 -0600
    89.2 +++ b/tools/examples/xen-network-common.sh	Wed Aug 23 11:11:27 2006 -0600
    89.3 @@ -143,6 +143,7 @@ add_to_bridge () {
    89.4  
    89.5      # Don't add $dev to $bridge if it's already on a bridge.
    89.6      if [ -e "/sys/class/net/${bridge}/brif/${dev}" ]; then
    89.7 +	ip link set ${dev} up || true
    89.8  	return
    89.9      fi
   89.10      brctl addif ${bridge} ${dev}
    90.1 --- a/tools/examples/xend-config.sxp	Tue Aug 22 14:45:49 2006 -0600
    90.2 +++ b/tools/examples/xend-config.sxp	Wed Aug 23 11:11:27 2006 -0600
    90.3 @@ -54,7 +54,7 @@
    90.4  #  (xend-relocation-hosts-allow '^localhost$ ^.*\.example\.org$')
    90.5  #
    90.6  #(xend-relocation-hosts-allow '')
    90.7 -(xend-relocation-hosts-allow '^localhost$ ^localhost\.localdomain$')
    90.8 +(xend-relocation-hosts-allow '^localhost$ ^localhost\\.localdomain$')
    90.9  
   90.10  # The limit (in kilobytes) on the size of the console buffer
   90.11  #(console-limit 1024)
    91.1 --- a/tools/examples/xmexample.hvm	Tue Aug 22 14:45:49 2006 -0600
    91.2 +++ b/tools/examples/xmexample.hvm	Wed Aug 23 11:11:27 2006 -0600
    91.3 @@ -27,6 +27,10 @@ builder='hvm'
    91.4  #          and modules. Allocating less than 32MBs is not recommended.
    91.5  memory = 128
    91.6  
    91.7 +# Shadow pagetable memory for the domain, in MB.
    91.8 +# Should be at least 2KB per MB of domain memory, plus a few MB per vcpu.
    91.9 +shadow_memory = 8
   91.10 +
   91.11  # A name for your domain. All domains must have different names.
   91.12  name = "ExampleHVMDomain"
   91.13  
   91.14 @@ -112,7 +116,9 @@ device_model = '/usr/' + arch_libdir + '
   91.15  
   91.16  #-----------------------------------------------------------------------------
   91.17  # boot on floppy (a), hard disk (c) or CD-ROM (d) 
   91.18 -#boot=[a|c|d]
   91.19 +# default: hard disk, cd-rom, floppy
   91.20 +#boot="cda"
   91.21 +
   91.22  #-----------------------------------------------------------------------------
   91.23  #  write to temporary files instead of disk image files
   91.24  #snapshot=1
    92.1 --- a/tools/examples/xmexample.vti	Tue Aug 22 14:45:49 2006 -0600
    92.2 +++ b/tools/examples/xmexample.vti	Wed Aug 23 11:11:27 2006 -0600
    92.3 @@ -66,7 +66,9 @@ device_model = '/usr/' + arch_libdir + '
    92.4  
    92.5  #-----------------------------------------------------------------------------
    92.6  # boot on floppy (a), hard disk (c) or CD-ROM (d) 
    92.7 -#boot=[a|c|d]
    92.8 +# default: hard disk, cd-rom, floppy
    92.9 +#boot="cda"
   92.10 +
   92.11  #-----------------------------------------------------------------------------
   92.12  #  write to temporary files instead of disk image files
   92.13  #snapshot=1
    93.1 --- a/tools/firmware/acpi/acpi_fadt.h	Tue Aug 22 14:45:49 2006 -0600
    93.2 +++ b/tools/firmware/acpi/acpi_fadt.h	Wed Aug 23 11:11:27 2006 -0600
    93.3 @@ -59,8 +59,7 @@
    93.4  #define ACPI_PM1A_EVT_BLK_ADDRESS_SPACE_ID  ACPI_SYSTEM_IO
    93.5  #define ACPI_PM1A_EVT_BLK_BIT_WIDTH         0x20
    93.6  #define ACPI_PM1A_EVT_BLK_BIT_OFFSET        0x00
    93.7 -//#define ACPI_PM1A_EVT_BLK_ADDRESS           0x000000000000c010
    93.8 -#define ACPI_PM1A_EVT_BLK_ADDRESS           0x000000000000c040
    93.9 +#define ACPI_PM1A_EVT_BLK_ADDRESS           0x000000000000c010
   93.10  
   93.11  //
   93.12  // PM1B Event Register Block Generic Address Information
    94.1 --- a/tools/firmware/hvmloader/Makefile	Tue Aug 22 14:45:49 2006 -0600
    94.2 +++ b/tools/firmware/hvmloader/Makefile	Wed Aug 23 11:11:27 2006 -0600
    94.3 @@ -31,8 +31,6 @@ LOADADDR = 0x100000
    94.4  DEFINES  =-DDEBUG
    94.5  XENINC   =-I$(XEN_ROOT)/tools/libxc
    94.6  
    94.7 -OBJECTS	 = hvmloader.o acpi_madt.o 
    94.8 -
    94.9  # Disable PIE/SSP if GCC supports them. They can break us.
   94.10  CFLAGS  += $(call test-gcc-flag,$(CC),-nopie)
   94.11  CFLAGS  += $(call test-gcc-flag,$(CC),-fno-stack-protector)
   94.12 @@ -42,7 +40,7 @@ OBJCOPY  = objcopy
   94.13  CFLAGS  += $(DEFINES) -I. $(XENINC) -fno-builtin -O2 -msoft-float
   94.14  LDFLAGS  = -m32 -nostdlib -Wl,-N -Wl,-Ttext -Wl,$(LOADADDR)
   94.15  
   94.16 -SRCS = hvmloader.c acpi_madt.c mp_tables.c util.c
   94.17 +SRCS = hvmloader.c acpi_madt.c mp_tables.c util.c smbios.c
   94.18  OBJS = $(patsubst %.c,%.o,$(SRCS))
   94.19  
   94.20  .PHONY: all
   94.21 @@ -54,7 +52,7 @@ hvmloader: roms.h $(SRCS)
   94.22  	$(OBJCOPY) hvmloader.tmp hvmloader
   94.23  	rm -f hvmloader.tmp
   94.24  
   94.25 -roms.h:	../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../vmxassist/vmxassist.bin
   94.26 +roms.h:	../rombios/BIOS-bochs-latest ../vgabios/VGABIOS-lgpl-latest.bin ../vgabios/VGABIOS-lgpl-latest.cirrus.bin ../vmxassist/vmxassist.bin ../acpi/acpi.bin
   94.27  	sh ./mkhex rombios ../rombios/BIOS-bochs-latest > roms.h
   94.28  	sh ./mkhex vgabios_stdvga ../vgabios/VGABIOS-lgpl-latest.bin >> roms.h
   94.29  	sh ./mkhex vgabios_cirrusvga ../vgabios/VGABIOS-lgpl-latest.cirrus.bin >> roms.h
   94.30 @@ -64,5 +62,5 @@ roms.h:	../rombios/BIOS-bochs-latest ../
   94.31  .PHONY: clean
   94.32  clean:
   94.33  	rm -f roms.h acpi.h
   94.34 -	rm -f hvmloader hvmloader.tmp hvmloader.o $(OBJECTS)
   94.35 +	rm -f hvmloader hvmloader.tmp hvmloader.o $(OBJS)
   94.36  
    95.1 --- a/tools/firmware/hvmloader/hvmloader.c	Tue Aug 22 14:45:49 2006 -0600
    95.2 +++ b/tools/firmware/hvmloader/hvmloader.c	Wed Aug 23 11:11:27 2006 -0600
    95.3 @@ -25,8 +25,9 @@
    95.4  #include "../acpi/acpi2_0.h"  /* for ACPI_PHYSICAL_ADDRESS */
    95.5  #include "hypercall.h"
    95.6  #include "util.h"
    95.7 +#include "smbios.h"
    95.8  #include <xen/version.h>
    95.9 -#include <xen/hvm/hvm_info_table.h>
   95.10 +#include <xen/hvm/params.h>
   95.11  
   95.12  /* memory map */
   95.13  #define HYPERCALL_PHYSICAL_ADDRESS	0x00080000
   95.14 @@ -116,15 +117,6 @@ check_amd(void)
   95.15  }
   95.16  
   95.17  static void
   95.18 -cpuid(uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
   95.19 -{
   95.20 -	__asm__ __volatile__(
   95.21 -		"cpuid"
   95.22 -		: "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
   95.23 -		: "0" (idx) );
   95.24 -}
   95.25 -
   95.26 -static void
   95.27  wrmsr(uint32_t idx, uint64_t v)
   95.28  {
   95.29  	__asm__ __volatile__(
   95.30 @@ -172,7 +164,7 @@ init_hypercalls(void)
   95.31  int
   95.32  main(void)
   95.33  {
   95.34 -	struct hvm_info_table *t = get_hvm_info_table();
   95.35 +	struct xen_hvm_param hvm_param;
   95.36  
   95.37  	puts("HVM Loader\n");
   95.38  
   95.39 @@ -180,7 +172,10 @@ main(void)
   95.40  
   95.41  	puts("Loading ROMBIOS ...\n");
   95.42  	memcpy((void *)ROMBIOS_PHYSICAL_ADDRESS, rombios, sizeof(rombios));
   95.43 -	if (t->apic_enabled)
   95.44 +
   95.45 +	hvm_param.domid = DOMID_SELF;
   95.46 +	hvm_param.index = HVM_PARAM_APIC_ENABLED;
   95.47 +	if (!hypercall_hvm_op(HVMOP_get_param, &hvm_param) && hvm_param.value)
   95.48  		create_mp_tables();
   95.49  	
   95.50  	if (cirrus_check()) {
   95.51 @@ -206,6 +201,9 @@ main(void)
   95.52  		}
   95.53  	}
   95.54  
   95.55 +	puts("Writing SMBIOS tables ...\n");
   95.56 +	hvm_write_smbios_tables();
   95.57 +
   95.58  	if (check_amd()) {
   95.59  		/* AMD implies this is SVM */
   95.60                  puts("SVM go ...\n");
    96.1 --- a/tools/firmware/hvmloader/hypercall.h	Tue Aug 22 14:45:49 2006 -0600
    96.2 +++ b/tools/firmware/hvmloader/hypercall.h	Wed Aug 23 11:11:27 2006 -0600
    96.3 @@ -31,6 +31,8 @@
    96.4  #ifndef __HVMLOADER_HYPERCALL_H__
    96.5  #define __HVMLOADER_HYPERCALL_H__
    96.6  
    96.7 +#include <xen/xen.h>
    96.8 +
    96.9  /*
   96.10   * NB. Hypercall address needs to be relative to a linkage symbol for
   96.11   * some version of ld to relocate the relative calls properly.
    97.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    97.2 +++ b/tools/firmware/hvmloader/smbios.c	Wed Aug 23 11:11:27 2006 -0600
    97.3 @@ -0,0 +1,606 @@
    97.4 +/*
    97.5 + * smbios.c - Generate SMBIOS tables for Xen HVM domU's.
    97.6 + *
    97.7 + * This program is free software; you can redistribute it and/or modify
    97.8 + * it under the terms of the GNU General Public License as published by
    97.9 + * the Free Software Foundation; either version 2 of the License, or
   97.10 + * (at your option) any later version.
   97.11 + *
   97.12 + * This program is distributed in the hope that it will be useful,
   97.13 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   97.14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   97.15 + * GNU General Public License for more details.
   97.16 + *
   97.17 + * You should have received a copy of the GNU General Public License
   97.18 + * along with this program; if not, write to the Free Software
   97.19 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
   97.20 + *
   97.21 + * Copyright (C) IBM Corporation, 2006
   97.22 + *
   97.23 + * Authors: Andrew D. Ball <aball@us.ibm.com>
   97.24 + */
   97.25 +
   97.26 +#include <stdint.h>
   97.27 +#include <xen/version.h>
   97.28 +#include <xen/hvm/e820.h>
   97.29 +#include "smbios.h"
   97.30 +#include "smbios_types.h"
   97.31 +#include "util.h"
   97.32 +#include "hypercall.h"
   97.33 +
   97.34 +/* write SMBIOS tables starting at 'start', without writing more
   97.35 +   than 'max_size' bytes.
   97.36 +
   97.37 +   Return the number of bytes written
   97.38 +*/
   97.39 +static size_t
   97.40 +write_smbios_tables(void *start, size_t max_size,
   97.41 +		    uint32_t vcpus, uint64_t memsize,
   97.42 +		    uint8_t uuid[16], char *xen_version,
   97.43 +		    uint32_t xen_major_version, uint32_t xen_minor_version);
   97.44 +
   97.45 +static void
   97.46 +get_cpu_manufacturer(char *buf, int len);
   97.47 +static size_t
   97.48 +smbios_table_size(uint32_t vcpus, const char *xen_version,
   97.49 +		  const char *processor_manufacturer);
   97.50 +static void *
   97.51 +smbios_entry_point_init(void *start,
   97.52 +			uint16_t max_structure_size,
   97.53 +			uint16_t structure_table_length,
   97.54 +			uint32_t structure_table_address,
   97.55 +			uint16_t number_of_structures);
   97.56 +static void *
   97.57 +smbios_type_0_init(void *start, const char *xen_version,
   97.58 +		   uint32_t xen_major_version, uint32_t xen_minor_version);
   97.59 +static void *
   97.60 +smbios_type_1_init(void *start, const char *xen_version, 
   97.61 +		   uint8_t uuid[16]);
   97.62 +static void *
   97.63 +smbios_type_3_init(void *start);
   97.64 +static void *
   97.65 +smbios_type_4_init(void *start, unsigned int cpu_number,
   97.66 +		   char *cpu_manufacturer);
   97.67 +static void *
   97.68 +smbios_type_16_init(void *start, uint32_t memory_size_mb);
   97.69 +static void *
   97.70 +smbios_type_17_init(void *start, uint32_t memory_size_mb);
   97.71 +static void *
   97.72 +smbios_type_19_init(void *start, uint32_t memory_size_mb);
   97.73 +static void *
   97.74 +smbios_type_20_init(void *start, uint32_t memory_size_mb);
   97.75 +static void *
   97.76 +smbios_type_32_init(void *start);
   97.77 +void *
   97.78 +smbios_type_127_init(void *start);
   97.79 +
   97.80 +static void
   97.81 +get_cpu_manufacturer(char *buf, int len)
   97.82 +{
   97.83 +	char id[12];
   97.84 +	uint32_t eax = 0;
   97.85 +
   97.86 +	cpuid(0, &eax, (uint32_t *)&id[0], (uint32_t *)&id[8], (uint32_t *)&id[4]);
   97.87 +
   97.88 +	if (memcmp(id, "GenuineIntel", 12) == 0)
   97.89 +		strncpy(buf, "Intel", len);
   97.90 +	else if (memcmp(id, "AuthenticAMD", 12) == 0)
   97.91 +		strncpy(buf, "AMD", len);
   97.92 +	else
   97.93 +		strncpy(buf, "unknown", len);
   97.94 +}
   97.95 +
   97.96 +
   97.97 +/* Calculate the size of the SMBIOS structure table.
   97.98 +*/
   97.99 +static size_t
  97.100 +smbios_table_size(uint32_t vcpus, const char *xen_version,
  97.101 +		  const char *processor_manufacturer)
  97.102 +{
  97.103 +	size_t size;
  97.104 +
  97.105 +	/* first compute size without strings or terminating 0 bytes */
  97.106 +	size =  sizeof(struct smbios_type_0) + sizeof(struct smbios_type_1) +
  97.107 +		sizeof(struct smbios_type_3) + sizeof(struct smbios_type_4)*vcpus +
  97.108 +		sizeof(struct smbios_type_16) + sizeof(struct smbios_type_17) +
  97.109 +		sizeof(struct smbios_type_19) + sizeof(struct smbios_type_20) +
  97.110 +		sizeof(struct smbios_type_32) + sizeof(struct smbios_type_127);
  97.111 +
  97.112 +	/* 5 structures with no strings, 2 null bytes each */
  97.113 +	size += 10;
  97.114 +
  97.115 +	/* Need to include 1 null byte per structure with strings (first
  97.116 +	   terminating null byte comes from the string terminator of the
  97.117 +	   last string). */
  97.118 +	size += 4 + vcpus;
  97.119 +
  97.120 +	/* type 0: "Xen", xen_version, and release_date */
  97.121 +	size += strlen("Xen") + strlen(xen_version) + 2;
  97.122 +	/* type 1: "Xen", xen_version, "HVM domU" */
  97.123 +	size += strlen("Xen") + strlen("HVM domU") + strlen(xen_version) + 3;
  97.124 +	/* type 3: "Xen" */
  97.125 +	size += strlen("Xen") + 1;
  97.126 +	/* type 4: socket designation ("CPU n"), processor_manufacturer */
  97.127 +	size += vcpus * (strlen("CPU n") + strlen(processor_manufacturer) + 2);
  97.128 +	/* Make room for two-digit CPU numbers if necessary -- doesn't handle
  97.129 +	   vcpus > 99 */
  97.130 +	if (vcpus > 9)
  97.131 +		size += vcpus - 9;
  97.132 +	/* type 17: device locator string ("DIMM 1") */
  97.133 +	size += strlen("DIMM 1") + 1;
  97.134 +
  97.135 +	return size;
  97.136 +}
  97.137 +
  97.138 +static size_t
  97.139 +write_smbios_tables(void *start, size_t max_size,
  97.140 +		    uint32_t vcpus, uint64_t memsize,
  97.141 +		    uint8_t uuid[16], char *xen_version,
  97.142 +		    uint32_t xen_major_version, uint32_t xen_minor_version)
  97.143 +{
  97.144 +	unsigned cpu_num;
  97.145 +	void *p = start;
  97.146 +	char cpu_manufacturer[15];
  97.147 +	size_t structure_table_length;
  97.148 +
  97.149 +	get_cpu_manufacturer(cpu_manufacturer, 15);
  97.150 +
  97.151 +
  97.152 +	structure_table_length = smbios_table_size(vcpus, xen_version,
  97.153 +						   cpu_manufacturer);
  97.154 +
  97.155 +	if (structure_table_length + sizeof(struct smbios_entry_point) > max_size)
  97.156 +		return 0;
  97.157 +
  97.158 +	p = smbios_entry_point_init(p, sizeof(struct smbios_type_4), 
  97.159 +				    structure_table_length,
  97.160 +				    (uint32_t)start + 
  97.161 +				    sizeof(struct smbios_entry_point),
  97.162 +				    9 + vcpus);
  97.163 +
  97.164 +	p = smbios_type_0_init(p, xen_version, xen_major_version,
  97.165 +			       xen_minor_version);
  97.166 +	p = smbios_type_1_init(p, xen_version, uuid);
  97.167 +	p = smbios_type_3_init(p);
  97.168 +	for (cpu_num = 1; cpu_num <= vcpus; ++cpu_num)
  97.169 +		p = smbios_type_4_init(p, cpu_num, cpu_manufacturer);
  97.170 +	p = smbios_type_16_init(p, memsize);
  97.171 +	p = smbios_type_17_init(p, memsize);
  97.172 +	p = smbios_type_19_init(p, memsize);
  97.173 +	p = smbios_type_20_init(p, memsize);
  97.174 +	p = smbios_type_32_init(p);
  97.175 +	p = smbios_type_127_init(p);
  97.176 +
  97.177 +	return (size_t)((char*)p - (char*)start);
  97.178 +}
  97.179 +
  97.180 +/* This tries to figure out how much pseudo-physical memory (in MB)
  97.181 +   is allocated to the current domU.
  97.182 +
  97.183 +   It iterates through the e820 table, adding up the 'usable' and
  97.184 +   'reserved' entries and rounding up to the nearest MB.
  97.185 +
  97.186 +   The e820map is not at e820 in hvmloader, so this uses the
  97.187 +   E820_MAP_* constants from e820.h to pick it up where libxenguest
  97.188 +   left it.
  97.189 + */
  97.190 +static uint64_t
  97.191 +get_memsize(void)
  97.192 +{
  97.193 +	struct e820entry *map = NULL;
  97.194 +	uint8_t num_entries = 0;
  97.195 +	uint64_t memsize = 0;
  97.196 +	uint8_t i;
  97.197 +
  97.198 +	map = (struct e820entry *) (E820_MAP_PAGE + E820_MAP_OFFSET);
  97.199 +	num_entries = *((uint8_t *) (E820_MAP_PAGE + E820_MAP_NR_OFFSET));
  97.200 +
  97.201 +	/* walk through e820map, ignoring any entries that aren't marked
  97.202 +	   as usable or reserved. */
  97.203 +
  97.204 +	for (i = 0; i < num_entries; i++) {
  97.205 +		if (map->type == E820_RAM || map->type == E820_RESERVED)
  97.206 +			memsize += map->size;
  97.207 +		map++;
  97.208 +	}
  97.209 +
  97.210 +	/* Round up to the nearest MB.  The user specifies domU
  97.211 +	   pseudo-physical memory in megabytes, so not doing this
  97.212 +	   could easily lead to reporting one less MB than the user
  97.213 +	   specified. */
  97.214 +	if (memsize & ((1<<20)-1))
  97.215 +		memsize = (memsize >> 20) + 1;
  97.216 +	else
  97.217 +		memsize = (memsize >> 20);
  97.218 +
  97.219 +	return memsize;
  97.220 +}
  97.221 +
  97.222 +void
  97.223 +hvm_write_smbios_tables(void)
  97.224 +{
  97.225 +	uint8_t uuid[16]; /* ** This will break if xen_domain_handle_t is
  97.226 +			     not uint8_t[16]. ** */
  97.227 +	uint16_t xen_major_version, xen_minor_version;
  97.228 +	uint32_t xen_version;
  97.229 +	char xen_extra_version[XEN_EXTRAVERSION_LEN];
  97.230 +	/* guess conservatively on buffer length for Xen version string */
  97.231 +	char xen_version_str[80];
  97.232 +	/* temporary variables used to build up Xen version string */
  97.233 +	char *p = NULL; /* points to next point of insertion */
  97.234 +	unsigned len = 0; /* length of string already composed */
  97.235 +	char *tmp = NULL; /* holds result of itoa() */
  97.236 +	unsigned tmp_len; /* length of next string to add */
  97.237 +
  97.238 +	hypercall_xen_version(XENVER_guest_handle, uuid);
  97.239 +
  97.240 +	/* xen_version major and minor */
  97.241 +	xen_version = hypercall_xen_version(XENVER_version, NULL);
  97.242 +	xen_major_version = (uint16_t) (xen_version >> 16);
  97.243 +	xen_minor_version = (uint16_t) xen_version;
  97.244 +
  97.245 +	hypercall_xen_version(XENVER_extraversion, xen_extra_version);
  97.246 +
  97.247 +	/* build up human-readable Xen version string */
  97.248 +	p = xen_version_str;
  97.249 +	len = 0;
  97.250 +
  97.251 +	itoa(tmp, xen_major_version);
  97.252 +	tmp_len = strlen(tmp);
  97.253 +	len += tmp_len;
  97.254 +	if (len >= sizeof(xen_version_str))
  97.255 +		goto error_out;
  97.256 +	strcpy(p, tmp);
  97.257 +	p += tmp_len;
  97.258 +
  97.259 +	len++;
  97.260 +	if (len >= sizeof(xen_version_str))
  97.261 +		goto error_out;
  97.262 +	*p = '.';
  97.263 +	p++;
  97.264 +
  97.265 +	itoa(tmp, xen_minor_version);
  97.266 +	tmp_len = strlen(tmp);
  97.267 +	len += tmp_len;
  97.268 +	if (len >= sizeof(xen_version_str))
  97.269 +		goto error_out;
  97.270 +	strcpy(p, tmp);
  97.271 +	p += tmp_len;
  97.272 +
  97.273 +	tmp_len = strlen(xen_extra_version);
  97.274 +	len += tmp_len;
  97.275 +	if (len >= sizeof(xen_version_str))
  97.276 +		goto error_out;
  97.277 +	strcpy(p, xen_extra_version);
  97.278 +	p += tmp_len;
  97.279 +
  97.280 +	xen_version_str[sizeof(xen_version_str)-1] = '\0';
  97.281 +
  97.282 +	write_smbios_tables((void *) SMBIOS_PHYSICAL_ADDRESS,
  97.283 +			    SMBIOS_SIZE_LIMIT, get_vcpu_nr(), get_memsize(),
  97.284 +			    uuid, xen_version_str,
  97.285 +			    xen_major_version, xen_minor_version);
  97.286 +	return;
  97.287 +
  97.288 + error_out:
  97.289 +	puts("Could not write SMBIOS tables, error in hvmloader.c:"
  97.290 +	     "hvm_write_smbios_tables()\n");
  97.291 +}
  97.292 +
  97.293 +
  97.294 +static void *
  97.295 +smbios_entry_point_init(void *start,
  97.296 +			uint16_t max_structure_size,
  97.297 +			uint16_t structure_table_length,
  97.298 +			uint32_t structure_table_address,
  97.299 +			uint16_t number_of_structures)
  97.300 +{
  97.301 +	uint8_t sum;
  97.302 +	int i;
  97.303 +	struct smbios_entry_point *ep = (struct smbios_entry_point *)start;
  97.304 +
  97.305 +	strncpy(ep->anchor_string, "_SM_", 4);
  97.306 +	ep->length = 0x1f;
  97.307 +	ep->smbios_major_version = 2;
  97.308 +	ep->smbios_minor_version = 4;
  97.309 +	ep->max_structure_size = max_structure_size;
  97.310 +	ep->entry_point_revision = 0;
  97.311 +	memset(ep->formatted_area, 0, 5);
  97.312 +	strncpy(ep->intermediate_anchor_string, "_DMI_", 5);
  97.313 +    
  97.314 +	ep->structure_table_length = structure_table_length;
  97.315 +	ep->structure_table_address = structure_table_address;
  97.316 +	ep->number_of_structures = number_of_structures;
  97.317 +	ep->smbios_bcd_revision = 0x24;
  97.318 +
  97.319 +	ep->checksum = 0;
  97.320 +	ep->intermediate_checksum = 0;
  97.321 +    
  97.322 +	sum = 0;
  97.323 +	for (i = 0; i < 0x10; ++i)
  97.324 +		sum += ((int8_t *)start)[i];
  97.325 +	ep->checksum = -sum;
  97.326 +
  97.327 +	sum = 0;
  97.328 +	for (i = 0x10; i < ep->length; ++i)
  97.329 +		sum += ((int8_t *)start)[i];
  97.330 +	ep->intermediate_checksum = -sum;
  97.331 +
  97.332 +	return (char *)start + sizeof(struct smbios_entry_point);
  97.333 +}
  97.334 +
  97.335 +/* Type 0 -- BIOS Information */
  97.336 +static void *
  97.337 +smbios_type_0_init(void *start, const char *xen_version,
  97.338 +		   uint32_t xen_major_version, uint32_t xen_minor_version)
  97.339 +{
  97.340 +	struct smbios_type_0 *p = (struct smbios_type_0 *)start;
  97.341 +    
  97.342 +	p->header.type = 0;
  97.343 +	p->header.length = sizeof(struct smbios_type_0);
  97.344 +	p->header.handle = 0;
  97.345 +    
  97.346 +	p->vendor_str = 1;
  97.347 +	p->version_str = 2;
  97.348 +	p->starting_address_segment = 0xe800;
  97.349 +	p->release_date_str = 0;
  97.350 +	p->rom_size = 0;
  97.351 +    
  97.352 +	memset(p->characteristics, 0, 8);
  97.353 +	p->characteristics[7] = 0x08; /* BIOS characteristics not supported */
  97.354 +	p->characteristics_extension_bytes[0] = 0;
  97.355 +	p->characteristics_extension_bytes[1] = 0;
  97.356 +    
  97.357 +	p->major_release = (uint8_t) xen_major_version;
  97.358 +	p->minor_release = (uint8_t) xen_minor_version;
  97.359 +	p->embedded_controller_major = 0xff;
  97.360 +	p->embedded_controller_minor = 0xff;
  97.361 +
  97.362 +	start += sizeof(struct smbios_type_0);
  97.363 +	strcpy((char *)start, "Xen");
  97.364 +	start += strlen("Xen") + 1;
  97.365 +	strcpy((char *)start, xen_version);
  97.366 +	start += strlen(xen_version) + 1;
  97.367 +
  97.368 +	*((uint8_t *)start) = 0;
  97.369 +	return start + 1;
  97.370 +}
  97.371 +
  97.372 +/* Type 1 -- System Information */
  97.373 +static void *
  97.374 +smbios_type_1_init(void *start, const char *xen_version, 
  97.375 +		   uint8_t uuid[16])
  97.376 +{
  97.377 +	struct smbios_type_1 *p = (struct smbios_type_1 *)start;
  97.378 +	p->header.type = 1;
  97.379 +	p->header.length = sizeof(struct smbios_type_1);
  97.380 +	p->header.handle = 0x100;
  97.381 +
  97.382 +	p->manufacturer_str = 1;
  97.383 +	p->product_name_str = 2;
  97.384 +	p->version_str = 3;
  97.385 +	p->serial_number_str = 0;
  97.386 +    
  97.387 +	memcpy(p->uuid, uuid, 16);
  97.388 +
  97.389 +	p->wake_up_type = 0x06; /* power switch */
  97.390 +	p->sku_str = 0;
  97.391 +	p->family_str = 0;
  97.392 +
  97.393 +	start += sizeof(struct smbios_type_1);
  97.394 +    
  97.395 +	strcpy((char *)start, "Xen");
  97.396 +	start += strlen("Xen") + 1;
  97.397 +	strcpy((char *)start, "HVM domU");
  97.398 +	start += strlen("HVM domU") + 1;
  97.399 +	strcpy((char *)start, xen_version);
  97.400 +	start += strlen(xen_version) + 1;
  97.401 +	*((uint8_t *)start) = 0;
  97.402 +    
  97.403 +	return start+1; 
  97.404 +}
  97.405 +
  97.406 +/* Type 3 -- System Enclosure */
  97.407 +static void *
  97.408 +smbios_type_3_init(void *start)
  97.409 +{
  97.410 +	struct smbios_type_3 *p = (struct smbios_type_3 *)start;
  97.411 +    
  97.412 +	p->header.type = 3;
  97.413 +	p->header.length = sizeof(struct smbios_type_3);
  97.414 +	p->header.handle = 0x300;
  97.415 +
  97.416 +	p->manufacturer_str = 1;
  97.417 +	p->type = 0x01; /* other */
  97.418 +	p->version_str = 0;
  97.419 +	p->serial_number_str = 0;
  97.420 +	p->asset_tag_str = 0;
  97.421 +	p->boot_up_state = 0x03; /* safe */
  97.422 +	p->power_supply_state = 0x03; /* safe */
  97.423 +	p->thermal_state = 0x03; /* safe */
  97.424 +	p->security_status = 0x02; /* unknown */
  97.425 +
  97.426 +	start += sizeof(struct smbios_type_3);
  97.427 +    
  97.428 +	strcpy((char *)start, "Xen");
  97.429 +	start += strlen("Xen") + 1;
  97.430 +	*((uint8_t *)start) = 0;
  97.431 +	return start+1;
  97.432 +}
  97.433 +
  97.434 +/* Type 4 -- Processor Information */
  97.435 +static void *
  97.436 +smbios_type_4_init(void *start, unsigned int cpu_number, char *cpu_manufacturer)
  97.437 +{
  97.438 +	char buf[80]; 
  97.439 +	struct smbios_type_4 *p = (struct smbios_type_4 *)start;
  97.440 +	uint32_t eax, ebx, ecx, edx;
  97.441 +
  97.442 +	p->header.type = 4;
  97.443 +	p->header.length = sizeof(struct smbios_type_4);
  97.444 +	p->header.handle = 0x400 + cpu_number;
  97.445 +
  97.446 +	p->socket_designation_str = 1;
  97.447 +	p->processor_type = 0x03; /* CPU */
  97.448 +	p->processor_family = 0x01; /* other */
  97.449 +	p->manufacturer_str = 2;
  97.450 +
  97.451 +	cpuid(1, &eax, &ebx, &ecx, &edx);
  97.452 +
  97.453 +	p->cpuid[0] = eax;
  97.454 +	p->cpuid[1] = edx;
  97.455 +
  97.456 +	p->version_str = 0;
  97.457 +	p->voltage = 0;
  97.458 +	p->external_clock = 0;
  97.459 +
  97.460 +	p->max_speed = 0; /* unknown */
  97.461 +	p->current_speed = 0; /* unknown */
  97.462 +
  97.463 +	p->status = 0x41; /* socket populated, CPU enabled */
  97.464 +	p->upgrade = 0x01; /* other */
  97.465 +
  97.466 +	start += sizeof(struct smbios_type_4);
  97.467 +
  97.468 +	strncpy(buf, "CPU ", sizeof(buf));
  97.469 +	if ((sizeof(buf) - strlen("CPU ")) >= 3)
  97.470 +		itoa(buf + strlen("CPU "), cpu_number);
  97.471 +
  97.472 +	strcpy((char *)start, buf);
  97.473 +	start += strlen(buf) + 1;
  97.474 +
  97.475 +	strcpy((char *)start, cpu_manufacturer);
  97.476 +	start += strlen(buf) + 1;
  97.477 +
  97.478 +	*((uint8_t *)start) = 0;
  97.479 +	return start+1;
  97.480 +}
  97.481 +
  97.482 +/* Type 16 -- Physical Memory Array */
  97.483 +static void *
  97.484 +smbios_type_16_init(void *start, uint32_t memsize)
  97.485 +{
  97.486 +	struct smbios_type_16 *p = (struct smbios_type_16*)start;
  97.487 +
  97.488 +	p->header.type = 16;
  97.489 +	p->header.handle = 0x1000;
  97.490 +	p->header.length = sizeof(struct smbios_type_16);
  97.491 +    
  97.492 +	p->location = 0x01; /* other */
  97.493 +	p->use = 0x03; /* system memory */
  97.494 +	p->error_correction = 0x01; /* other */
  97.495 +	p->maximum_capacity = memsize * 1024;
  97.496 +	p->memory_error_information_handle = 0xfffe; /* none provided */
  97.497 +	p->number_of_memory_devices = 1;
  97.498 +
  97.499 +	start += sizeof(struct smbios_type_16);
  97.500 +	*((uint16_t *)start) = 0;
  97.501 +	return start + 2;
  97.502 +}
  97.503 +
  97.504 +/* Type 17 -- Memory Device */
  97.505 +static void *
  97.506 +smbios_type_17_init(void *start, uint32_t memory_size_mb)
  97.507 +{
  97.508 +	struct smbios_type_17 *p = (struct smbios_type_17 *)start;
  97.509 +    
  97.510 +	p->header.type = 17;
  97.511 +	p->header.length = sizeof(struct smbios_type_17);
  97.512 +	p->header.handle = 0x1100;
  97.513 +
  97.514 +	p->physical_memory_array_handle = 0x1000;
  97.515 +	p->total_width = 64;
  97.516 +	p->data_width = 64;
  97.517 +	/* truncate memory_size_mb to 16 bits and clear most significant
  97.518 +	   bit [indicates size in MB] */
  97.519 +	p->size = (uint16_t) memory_size_mb & 0x7fff;
  97.520 +	p->form_factor = 0x09; /* DIMM */
  97.521 +	p->device_set = 0;
  97.522 +	p->device_locator_str = 1;
  97.523 +	p->bank_locator_str = 0;
  97.524 +	p->memory_type = 0x07; /* RAM */
  97.525 +	p->type_detail = 0;
  97.526 +
  97.527 +	start += sizeof(struct smbios_type_17);
  97.528 +	strcpy((char *)start, "DIMM 1");
  97.529 +	start += strlen("DIMM 1") + 1;
  97.530 +	*((uint8_t *)start) = 0;
  97.531 +
  97.532 +	return start+1;
  97.533 +}
  97.534 +
  97.535 +/* Type 19 -- Memory Array Mapped Address */
  97.536 +static void *
  97.537 +smbios_type_19_init(void *start, uint32_t memory_size_mb)
  97.538 +{
  97.539 +	struct smbios_type_19 *p = (struct smbios_type_19 *)start;
  97.540 +    
  97.541 +	p->header.type = 19;
  97.542 +	p->header.length = sizeof(struct smbios_type_19);
  97.543 +	p->header.handle = 0x1300;
  97.544 +
  97.545 +	p->starting_address = 0;
  97.546 +	p->ending_address = (memory_size_mb-1) * 1024;
  97.547 +	p->memory_array_handle = 0x1000;
  97.548 +	p->partition_width = 1;
  97.549 +
  97.550 +	start += sizeof(struct smbios_type_19);
  97.551 +	*((uint16_t *)start) = 0;
  97.552 +	return start + 2;
  97.553 +}
  97.554 +
  97.555 +/* Type 20 -- Memory Device Mapped Address */
  97.556 +static void *
  97.557 +smbios_type_20_init(void *start, uint32_t memory_size_mb)
  97.558 +{
  97.559 +	struct smbios_type_20 *p = (struct smbios_type_20 *)start;
  97.560 +
  97.561 +	p->header.type = 20;
  97.562 +	p->header.length = sizeof(struct smbios_type_20);
  97.563 +	p->header.handle = 0x1400;
  97.564 +
  97.565 +	p->starting_address = 0;
  97.566 +	p->ending_address = (memory_size_mb-1)*1024;
  97.567 +	p->memory_device_handle = 0x1100;
  97.568 +	p->memory_array_mapped_address_handle = 0x1300;
  97.569 +	p->partition_row_position = 1;
  97.570 +	p->interleave_position = 0;
  97.571 +	p->interleaved_data_depth = 0;
  97.572 +
  97.573 +	start += sizeof(struct smbios_type_20);
  97.574 +
  97.575 +	*((uint16_t *)start) = 0;
  97.576 +	return start+2;
  97.577 +}
  97.578 +
  97.579 +/* Type 32 -- System Boot Information */
  97.580 +static void *
  97.581 +smbios_type_32_init(void *start)
  97.582 +{
  97.583 +	struct smbios_type_32 *p = (struct smbios_type_32 *)start;
  97.584 +
  97.585 +	p->header.type = 32;
  97.586 +	p->header.length = sizeof(struct smbios_type_32);
  97.587 +	p->header.handle = 0x2000;
  97.588 +	memset(p->reserved, 0, 6);
  97.589 +	p->boot_status = 0; /* no errors detected */
  97.590 +    
  97.591 +	start += sizeof(struct smbios_type_32);
  97.592 +	*((uint16_t *)start) = 0;
  97.593 +	return start+2;
  97.594 +}
  97.595 +
  97.596 +/* Type 127 -- End of Table */
  97.597 +void *
  97.598 +smbios_type_127_init(void *start)
  97.599 +{
  97.600 +	struct smbios_type_127 *p = (struct smbios_type_127 *)start;
  97.601 +
  97.602 +	p->header.type = 127;
  97.603 +	p->header.length = sizeof(struct smbios_type_127);
  97.604 +	p->header.handle = 0x7f00;
  97.605 +
  97.606 +	start += sizeof(struct smbios_type_127);
  97.607 +	*((uint16_t *)start) = 0;
  97.608 +	return start + 2;
  97.609 +}
    98.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    98.2 +++ b/tools/firmware/hvmloader/smbios.h	Wed Aug 23 11:11:27 2006 -0600
    98.3 @@ -0,0 +1,38 @@
    98.4 +/*
    98.5 + * smbios.h - interface for Xen HVM SMBIOS generation
    98.6 + *
    98.7 + * This program is free software; you can redistribute it and/or modify
    98.8 + * it under the terms of the GNU General Public License as published by
    98.9 + * the Free Software Foundation; either version 2 of the License, or
   98.10 + * (at your option) any later version.
   98.11 + *
   98.12 + * This program is distributed in the hope that it will be useful,
   98.13 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   98.14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   98.15 + * GNU General Public License for more details.
   98.16 + *
   98.17 + * You should have received a copy of the GNU General Public License
   98.18 + * along with this program; if not, write to the Free Software
   98.19 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
   98.20 + *
   98.21 + * Copyright (C) IBM Corporation, 2006
   98.22 + *
   98.23 + * Authors: Andrew D. Ball <aball@us.ibm.com>
   98.24 + */
   98.25 +
   98.26 +#ifndef SMBIOS_H
   98.27 +#define SMBIOS_H
   98.28 +
   98.29 +#include <stdint.h>
   98.30 +#include <stdlib.h>
   98.31 +
   98.32 +/* These constants must agree with the ACPI e820 memory map as defined
   98.33 +   in tools/libxc/xc_hvm_build.c and the address the ROMBIOS pulls the
   98.34 +   SMBIOS entry point from in the smbios_init subroutine.
   98.35 + */
   98.36 +#define SMBIOS_PHYSICAL_ADDRESS 0x9f000
   98.37 +#define SMBIOS_SIZE_LIMIT 0x800
   98.38 +
   98.39 +void hvm_write_smbios_tables(void);
   98.40 +
   98.41 +#endif /* SMBIOS_H */
    99.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    99.2 +++ b/tools/firmware/hvmloader/smbios_types.h	Wed Aug 23 11:11:27 2006 -0600
    99.3 @@ -0,0 +1,182 @@
    99.4 +/*
    99.5 + * smbios_types.h - data structure definitions for Xen HVM SMBIOS support
    99.6 + *
    99.7 + * This program is free software; you can redistribute it and/or modify
    99.8 + * it under the terms of the GNU General Public License as published by
    99.9 + * the Free Software Foundation; either version 2 of the License, or
   99.10 + * (at your option) any later version.
   99.11 + *
   99.12 + * This program is distributed in the hope that it will be useful,
   99.13 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   99.14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   99.15 + * GNU General Public License for more details.
   99.16 + *
   99.17 + * You should have received a copy of the GNU General Public License
   99.18 + * along with this program; if not, write to the Free Software
   99.19 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
   99.20 + *
   99.21 + * Copyright (C) IBM Corporation, 2006
   99.22 + *
   99.23 + * Authors: Andrew D. Ball <aball@us.ibm.com>
   99.24 + *
   99.25 + * See the SMBIOS 2.4 spec for more detail:
   99.26 + *   http://www.dmtf.org/standards/smbios/
   99.27 + */
   99.28 +
   99.29 +#ifndef SMBIOS_TYPES_H
   99.30 +#define SMBIOS_TYPES_H
   99.31 +
   99.32 +#include <stdint.h>
   99.33 +
   99.34 +/* SMBIOS entry point -- must be written to a 16-bit aligned address
   99.35 +   between 0xf0000 and 0xfffff. 
   99.36 + */
   99.37 +struct smbios_entry_point {
   99.38 +	char anchor_string[4];
   99.39 +	uint8_t checksum;
   99.40 +	uint8_t length;
   99.41 +	uint8_t smbios_major_version;
   99.42 +	uint8_t smbios_minor_version;
   99.43 +	uint16_t max_structure_size;
   99.44 +	uint8_t entry_point_revision;
   99.45 +	uint8_t formatted_area[5];
   99.46 +	char intermediate_anchor_string[5];
   99.47 +	uint8_t intermediate_checksum;
   99.48 +	uint16_t structure_table_length;
   99.49 +	uint32_t structure_table_address;
   99.50 +	uint16_t number_of_structures;
   99.51 +	uint8_t smbios_bcd_revision;
   99.52 +} __attribute__ ((packed));
   99.53 +
   99.54 +/* This goes at the beginning of every SMBIOS structure. */
   99.55 +struct smbios_structure_header {
   99.56 +	uint8_t type;
   99.57 +	uint8_t length;
   99.58 +	uint16_t handle;
   99.59 +} __attribute__ ((packed));
   99.60 +
   99.61 +/* SMBIOS type 0 - BIOS Information */
   99.62 +struct smbios_type_0 {
   99.63 +	struct smbios_structure_header header;
   99.64 +	uint8_t vendor_str;
   99.65 +	uint8_t version_str;
   99.66 +	uint16_t starting_address_segment;
   99.67 +	uint8_t release_date_str;
   99.68 +	uint8_t rom_size; 
   99.69 +	uint8_t characteristics[8];
   99.70 +	uint8_t characteristics_extension_bytes[2];
   99.71 +	uint8_t major_release;
   99.72 +	uint8_t minor_release;
   99.73 +	uint8_t embedded_controller_major;
   99.74 +	uint8_t embedded_controller_minor;
   99.75 +} __attribute__ ((packed));
   99.76 +
   99.77 +/* SMBIOS type 1 - System Information */
   99.78 +struct smbios_type_1 {
   99.79 +	struct smbios_structure_header header;
   99.80 +	uint8_t manufacturer_str;
   99.81 +	uint8_t product_name_str;
   99.82 +	uint8_t version_str;
   99.83 +	uint8_t serial_number_str;
   99.84 +	uint8_t uuid[16];
   99.85 +	uint8_t wake_up_type;
   99.86 +	uint8_t sku_str;
   99.87 +	uint8_t family_str;
   99.88 +} __attribute__ ((packed));
   99.89 +
   99.90 +/* SMBIOS type 3 - System Enclosure */
   99.91 +struct smbios_type_3 {
   99.92 +	struct smbios_structure_header header;
   99.93 +	uint8_t manufacturer_str;
   99.94 +	uint8_t type;
   99.95 +	uint8_t version_str;
   99.96 +	uint8_t serial_number_str;
   99.97 +	uint8_t asset_tag_str;
   99.98 +	uint8_t boot_up_state;
   99.99 +	uint8_t power_supply_state;
  99.100 +	uint8_t thermal_state;
  99.101 +	uint8_t security_status;
  99.102 +} __attribute__ ((packed));
  99.103 +
  99.104 +/* SMBIOS type 4 - Processor Information */
  99.105 +struct smbios_type_4 {
  99.106 +	struct smbios_structure_header header;
  99.107 +	uint8_t socket_designation_str;
  99.108 +	uint8_t processor_type;
  99.109 +	uint8_t processor_family;
  99.110 +	uint8_t manufacturer_str;
  99.111 +	uint32_t cpuid[2];
  99.112 +	uint8_t version_str;
  99.113 +	uint8_t voltage;
  99.114 +	uint16_t external_clock;
  99.115 +	uint16_t max_speed;
  99.116 +	uint16_t current_speed;
  99.117 +	uint8_t status;
  99.118 +	uint8_t upgrade;
  99.119 +} __attribute__ ((packed));
  99.120 +
  99.121 +/* SMBIOS type 16 - Physical Memory Array
  99.122 + *   Associated with one type 17 (Memory Device).
  99.123 + */
  99.124 +struct smbios_type_16 {
  99.125 +	struct smbios_structure_header header;
  99.126 +	uint8_t location;
  99.127 +	uint8_t use;
  99.128 +	uint8_t error_correction;
  99.129 +	uint32_t maximum_capacity;
  99.130 +	uint16_t memory_error_information_handle;
  99.131 +	uint16_t number_of_memory_devices;
  99.132 +} __attribute__ ((packed));
  99.133 +
  99.134 +/* SMBIOS type 17 - Memory Device 
  99.135 + *   Associated with one type 19
  99.136 + */
  99.137 +struct smbios_type_17 {
  99.138 +	struct smbios_structure_header header;
  99.139 +	uint16_t physical_memory_array_handle;
  99.140 +	uint16_t memory_error_information_handle;
  99.141 +	uint16_t total_width;
  99.142 +	uint16_t data_width;
  99.143 +	uint16_t size;
  99.144 +	uint8_t form_factor;
  99.145 +	uint8_t device_set;
  99.146 +	uint8_t device_locator_str;
  99.147 +	uint8_t bank_locator_str;
  99.148 +	uint8_t memory_type;
  99.149 +	uint16_t type_detail;
  99.150 +} __attribute__ ((packed));
  99.151 +
  99.152 +/* SMBIOS type 19 - Memory Array Mapped Address */
  99.153 +struct smbios_type_19 {
  99.154 +	struct smbios_structure_header header;
  99.155 +	uint32_t starting_address;
  99.156 +	uint32_t ending_address;
  99.157 +	uint16_t memory_array_handle;
  99.158 +	uint8_t partition_width;
  99.159 +} __attribute__ ((packed));
  99.160 +
  99.161 +/* SMBIOS type 20 - Memory Device Mapped Address */
  99.162 +struct smbios_type_20 {
  99.163 +	struct smbios_structure_header header;
  99.164 +	uint32_t starting_address;
  99.165 +	uint32_t ending_address;
  99.166 +	uint16_t memory_device_handle;
  99.167 +	uint16_t memory_array_mapped_address_handle;
  99.168 +	uint8_t partition_row_position;
  99.169 +	uint8_t interleave_position;
  99.170 +	uint8_t interleaved_data_depth;
  99.171 +} __attribute__ ((packed));
  99.172 +
  99.173 +/* SMBIOS type 32 - System Boot Information */
  99.174 +struct smbios_type_32 {
  99.175 +	struct smbios_structure_header header;
  99.176 +	uint8_t reserved[6];
  99.177 +	uint8_t boot_status;
  99.178 +} __attribute__ ((packed));
  99.179 +
  99.180 +/* SMBIOS type 127 -- End-of-table */
  99.181 +struct smbios_type_127 {
  99.182 +	struct smbios_structure_header header;
  99.183 +} __attribute__ ((packed));
  99.184 +
  99.185 +#endif /* SMBIOS_TYPES_H */
   100.1 --- a/tools/firmware/hvmloader/util.c	Tue Aug 22 14:45:49 2006 -0600
   100.2 +++ b/tools/firmware/hvmloader/util.c	Wed Aug 23 11:11:27 2006 -0600
   100.3 @@ -20,6 +20,7 @@
   100.4  
   100.5  #include "../acpi/acpi2_0.h"  /* for ACPI_PHYSICAL_ADDRESS */
   100.6  #include "util.h"
   100.7 +#include <stdint.h>
   100.8  
   100.9  void outw(uint16_t addr, uint16_t val)
  100.10  {
  100.11 @@ -94,3 +95,82 @@ void puts(const char *s)
  100.12  	while (*s)
  100.13  		outb(0xE9, *s++);
  100.14  }
  100.15 +
  100.16 +char *
  100.17 +strcpy(char *dest, const char *src)
  100.18 +{
  100.19 +	char *p = dest;
  100.20 +	while (*src)
  100.21 +		*p++ = *src++;
  100.22 +	*p = 0;
  100.23 +	return dest;
  100.24 +}
  100.25 +
  100.26 +char *
  100.27 +strncpy(char *dest, const char *src, unsigned n)
  100.28 +{
  100.29 +	int i = 0;
  100.30 +	char *p = dest;
  100.31 +
  100.32 +	/* write non-NUL characters from src into dest until we run
  100.33 +	   out of room in dest or encounter a NUL in src */
  100.34 +	while (i < n && *src) {
  100.35 +		*p++ = *src++;
  100.36 +		++i;
  100.37 +	}
  100.38 +
  100.39 +	/* pad remaining bytes of dest with NUL bytes */
  100.40 +	while (i < n) {
  100.41 +		*p++ = 0;
  100.42 +		++i;
  100.43 +	}
  100.44 +
  100.45 +	return dest;
  100.46 +}
  100.47 +
  100.48 +unsigned
  100.49 +strlen(const char *s)
  100.50 +{
  100.51 +	int i = 0;
  100.52 +	while (*s++)
  100.53 +		++i;
  100.54 +	return i;
  100.55 +}
  100.56 +
  100.57 +void *
  100.58 +memset(void *s, int c, unsigned n)
  100.59 +{
  100.60 +	uint8_t b = (uint8_t) c;
  100.61 +	uint8_t *p = (uint8_t *)s;
  100.62 +	int i;
  100.63 +	for (i = 0; i < n; ++i)
  100.64 +		*p++ = b;
  100.65 +	return s;
  100.66 +}
  100.67 +
  100.68 +int
  100.69 +memcmp(const void *s1, const void *s2, unsigned n)
  100.70 +{
  100.71 +	unsigned i;
  100.72 +	uint8_t *p1 = (uint8_t *) s1;
  100.73 +	uint8_t *p2 = (uint8_t *) s2;
  100.74 +
  100.75 +	for (i = 0; i < n; ++i) {
  100.76 +		if (p1[i] < p2[i])
  100.77 +			return -1;
  100.78 +		else if (p1[i] > p2[i])
  100.79 +			return 1;
  100.80 +	}
  100.81 +
  100.82 +	return 0;
  100.83 +}
  100.84 +
  100.85 +void
  100.86 +cpuid(uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
  100.87 +{
  100.88 +	__asm__ __volatile__(
  100.89 +		"cpuid"
  100.90 +		: "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
  100.91 +		: "0" (idx) );
  100.92 +}
  100.93 +
   101.1 --- a/tools/firmware/hvmloader/util.h	Tue Aug 22 14:45:49 2006 -0600
   101.2 +++ b/tools/firmware/hvmloader/util.h	Wed Aug 23 11:11:27 2006 -0600
   101.3 @@ -8,9 +8,21 @@ void outb(uint16_t addr, uint8_t val);
   101.4  /* I/O input */
   101.5  uint8_t inb(uint16_t addr);
   101.6  
   101.7 +/* Do cpuid instruction, with operation 'idx' */
   101.8 +void cpuid(uint32_t idx, uint32_t *eax, uint32_t *ebx,
   101.9 +           uint32_t *ecx, uint32_t *edx);
  101.10 +
  101.11 +/* Return number of vcpus. */
  101.12 +int get_vcpu_nr(void);
  101.13 +
  101.14  /* String and memory functions */
  101.15  int strcmp(const char *cs, const char *ct);
  101.16 +char *strcpy(char *dest, const char *src);
  101.17 +char *strncpy(char *dest, const char *src, unsigned n);
  101.18 +unsigned strlen(const char *s);
  101.19 +int memcmp(const void *s1, const void *s2, unsigned n);
  101.20  void *memcpy(void *dest, const void *src, unsigned n);
  101.21 +void *memset(void *s, int c, unsigned n);
  101.22  char *itoa(char *a, unsigned int i);
  101.23  
  101.24  /* Debug output */
   102.1 --- a/tools/firmware/rombios/apmbios.S	Tue Aug 22 14:45:49 2006 -0600
   102.2 +++ b/tools/firmware/rombios/apmbios.S	Wed Aug 23 11:11:27 2006 -0600
   102.3 @@ -225,7 +225,10 @@ APMSYM(04):
   102.4  APMSYM(05):
   102.5    cmp al, #0x05
   102.6    jne APMSYM(07)
   102.7 +  pushf ; XEN
   102.8 +  sti   ; XEN: OS calls us with ints disabled -- better re-enable here!
   102.9    hlt
  102.10 +  popf  ; XEN
  102.11    jmp APMSYM(ok)
  102.12  
  102.13  ;-----------------
   103.1 --- a/tools/firmware/rombios/rombios.c	Tue Aug 22 14:45:49 2006 -0600
   103.2 +++ b/tools/firmware/rombios/rombios.c	Wed Aug 23 11:11:27 2006 -0600
   103.3 @@ -9443,6 +9443,43 @@ rom_scan_increment:
   103.4    mov  ds, ax
   103.5    ret
   103.6  
   103.7 +#ifdef HVMASSIST
   103.8 +
   103.9 +; Copy the SMBIOS entry point over from 0x9f000, where hvmloader left it.
  103.10 +; The entry point must be somewhere in 0xf0000-0xfffff on a 16-byte boundary,
  103.11 +; but the tables themeselves can be elsewhere.
  103.12 +smbios_init:
  103.13 +  push ax
  103.14 +  push cx
  103.15 +  push es
  103.16 +  push ds
  103.17 +  push di
  103.18 +  push si
  103.19 +
  103.20 +  mov cx, #0x001f ; 0x1f bytes to copy
  103.21 +  mov ax, #0xf000
  103.22 +  mov es, ax      ; destination segment is 0xf0000
  103.23 +  mov di, #smbios_entry_point ; destination offset
  103.24 +  mov ax, #0x9f00
  103.25 +  mov ds, ax      ; source segment is 0x9f000
  103.26 +  mov si, #0x0000 ; source offset is 0
  103.27 +  cld
  103.28 +  rep
  103.29 +    movsb
  103.30 +
  103.31 +  pop si
  103.32 +  pop di
  103.33 +  pop ds
  103.34 +  pop es
  103.35 +  pop cx
  103.36 +  pop ax
  103.37 +
  103.38 +  ret
  103.39 +
  103.40 +#endif
  103.41 +
  103.42 +
  103.43 +
  103.44  ;; for 'C' strings and other data, insert them here with
  103.45  ;; a the following hack:
  103.46  ;; DATA_SEG_DEFS_HERE
  103.47 @@ -9724,6 +9761,7 @@ post_default_ints:
  103.48  
  103.49  #ifdef HVMASSIST
  103.50    call _copy_e820_table
  103.51 +  call smbios_init
  103.52  #endif
  103.53  
  103.54    call rom_scan
  103.55 @@ -10538,6 +10576,13 @@ dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  103.56  dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 768 bytes
  103.57  dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 832 bytes
  103.58  dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 896 bytes
  103.59 +
  103.60 +.align 16
  103.61 +smbios_entry_point:
  103.62 +db 0,0,0,0,0,0,0,0 ; 8 bytes
  103.63 +db 0,0,0,0,0,0,0,0 ; 16 bytes
  103.64 +db 0,0,0,0,0,0,0,0 ; 24 bytes
  103.65 +db 0,0,0,0,0,0,0   ; 31 bytes
  103.66  ASM_END
  103.67  
  103.68  #else // !HVMASSIST
   104.1 --- a/tools/firmware/vmxassist/head.S	Tue Aug 22 14:45:49 2006 -0600
   104.2 +++ b/tools/firmware/vmxassist/head.S	Wed Aug 23 11:11:27 2006 -0600
   104.3 @@ -114,8 +114,6 @@ 1:
   104.4  #ifdef TEST
   104.5  	xorl	%edx, %edx
   104.6  #endif
   104.7 -	movl	%edx, booting_cpu
   104.8 -	movl	%ebx, booting_vector
   104.9  
  104.10  	/* clear bss */
  104.11  	cld
  104.12 @@ -125,6 +123,9 @@ 1:
  104.13  	subl	%edi, %ecx
  104.14  	rep	stosb
  104.15  
  104.16 +	movl	%edx, booting_cpu
  104.17 +	movl	%ebx, booting_vector
  104.18 +
  104.19  	/* make sure we are in a sane world */
  104.20  	clts
  104.21  
   105.1 --- a/tools/firmware/vmxassist/setup.c	Tue Aug 22 14:45:49 2006 -0600
   105.2 +++ b/tools/firmware/vmxassist/setup.c	Wed Aug 23 11:11:27 2006 -0600
   105.3 @@ -56,7 +56,7 @@ struct e820entry e820map[] = {
   105.4  	{ 0x00000000000A0000ULL, 0x0000000000020000ULL, E820_IO },
   105.5  	{ 0x00000000000C0000ULL, 0x0000000000040000ULL, E820_RESERVED },
   105.6  	{ 0x0000000000100000ULL, 0x0000000000000000ULL, E820_RAM },
   105.7 -	{ 0x0000000000000000ULL, 0x0000000000001000ULL, E820_SHARED },
   105.8 +	{ 0x0000000000000000ULL, 0x0000000000001000ULL, E820_SHARED_PAGE },
   105.9  	{ 0x0000000000000000ULL, 0x0000000000003000ULL, E820_NVS },
  105.10  	{ 0x0000000000003000ULL, 0x000000000000A000ULL, E820_ACPI },
  105.11  	{ 0x00000000FEC00000ULL, 0x0000000001400000ULL, E820_IO },
  105.12 @@ -94,13 +94,13 @@ banner(void)
  105.13  	e820map[6].addr = memory_size;
  105.14  	e820map[7].addr += memory_size;
  105.15  
  105.16 -	*LINUX_E820_MAP_NR = sizeof(e820map)/sizeof(e820map[0]);
  105.17 -	memcpy(LINUX_E820_MAP, e820map, sizeof(e820map));
  105.18 +	*E820_MAP_NR = sizeof(e820map)/sizeof(e820map[0]);
  105.19 +	memcpy(E820_MAP, e820map, sizeof(e820map));
  105.20  #endif
  105.21  
  105.22  	printf("Memory size %ld MB\n", memory_size >> 20);
  105.23  	printf("E820 map:\n");
  105.24 -	print_e820_map(LINUX_E820_MAP, *LINUX_E820_MAP_NR);
  105.25 +	print_e820_map(E820_MAP, *E820_MAP_NR);
  105.26  	printf("\n");
  105.27  }
  105.28  
   106.1 --- a/tools/firmware/vmxassist/util.h	Tue Aug 22 14:45:49 2006 -0600
   106.2 +++ b/tools/firmware/vmxassist/util.h	Wed Aug 23 11:11:27 2006 -0600
   106.3 @@ -23,23 +23,9 @@
   106.4  #include <stdarg.h>
   106.5  #include <vm86.h>
   106.6  
   106.7 -
   106.8 -#define	LINUX_E820_MAP_NR	((unsigned char *)0x901E8)
   106.9 -#define	LINUX_E820_MAP		((struct e820entry *)0x902D0)
  106.10 -
  106.11 -#define	E820_RAM	1
  106.12 -#define	E820_RESERVED	2
  106.13 -#define	E820_ACPI	3
  106.14 -#define	E820_NVS	4
  106.15 -#define	E820_IO		16
  106.16 -#define	E820_SHARED	17
  106.17 -
  106.18 -struct e820entry {
  106.19 -	unsigned long long	addr;
  106.20 -	unsigned long long	size;
  106.21 -	unsigned long		type;
  106.22 -} __attribute__((packed));
  106.23 -
  106.24 +#include <xen/hvm/e820.h>
  106.25 +#define E820_MAP_NR ((unsigned char *)E820_MAP_PAGE + E820_MAP_NR_OFFSET)
  106.26 +#define E820_MAP    ((struct e820entry *)(E820_MAP_PAGE + E820_MAP_OFFSET))
  106.27  
  106.28  #define	offsetof(type, member)	((unsigned) &((type *)0)->member)
  106.29  
   107.1 --- a/tools/firmware/vmxassist/vm86.c	Tue Aug 22 14:45:49 2006 -0600
   107.2 +++ b/tools/firmware/vmxassist/vm86.c	Wed Aug 23 11:11:27 2006 -0600
   107.3 @@ -983,7 +983,9 @@ set_mode(struct regs *regs, enum vm86_mo
   107.4  	case VM86_PROTECTED:
   107.5  		if (mode == VM86_REAL_TO_PROTECTED) {
   107.6  			protected_mode(regs);
   107.7 -			break;
   107.8 +//			printf("<VM86_PROTECTED>\n");
   107.9 +			mode = newmode;
  107.10 +			return;
  107.11  		} else
  107.12  			panic("unexpected protected mode transition");
  107.13  		break;
  107.14 @@ -1170,6 +1172,26 @@ inbyte(struct regs *regs, unsigned prefi
  107.15  	return 1;
  107.16  }
  107.17  
  107.18 +static void
  107.19 +pushrm(struct regs *regs, int prefix, unsigned modrm)
  107.20 +{
  107.21 +	unsigned n = regs->eip;
  107.22 +	unsigned addr;
  107.23 +	unsigned data;
  107.24 +
  107.25 +	addr  = operand(prefix, regs, modrm);
  107.26 +	
  107.27 +	if (prefix & DATA32) {
  107.28 +		data = read32(addr);
  107.29 +		push32(regs, data);
  107.30 +	} else {
  107.31 +		data = read16(addr);
  107.32 +		push16(regs, data);
  107.33 +	}
  107.34 +
  107.35 +	TRACE((regs, (regs->eip - n) + 1, "push *0x%x", addr));
  107.36 +}
  107.37 +
  107.38  enum { OPC_INVALID, OPC_EMULATED };
  107.39  
  107.40  /*
  107.41 @@ -1186,6 +1208,14 @@ opcode(struct regs *regs)
  107.42  
  107.43  	for (;;) {
  107.44  		switch ((opc = fetch8(regs))) {
  107.45 +		case 0x07:
  107.46 +			if (prefix & DATA32)
  107.47 +				regs->ves = pop32(regs);
  107.48 +			else
  107.49 +				regs->ves = pop16(regs);
  107.50 +			TRACE((regs, regs->eip - eip, "pop %%es"));
  107.51 +			return OPC_EMULATED;
  107.52 +
  107.53  		case 0x0F: /* two byte opcode */
  107.54  			if (mode == VM86_PROTECTED)
  107.55  				goto invalid;
  107.56 @@ -1288,6 +1318,22 @@ opcode(struct regs *regs)
  107.57                          return OPC_EMULATED;
  107.58  
  107.59  		case 0x89: /* addr32 mov r16, r/m16 */
  107.60 +			if (mode == VM86_PROTECTED_TO_REAL) {
  107.61 +				unsigned modrm = fetch8(regs);
  107.62 +				unsigned addr = operand(prefix, regs, modrm);
  107.63 +				unsigned val, r = (modrm >> 3) & 7;
  107.64 +				
  107.65 +				if (prefix & DATA32) {
  107.66 +					val = getreg16(regs, r);
  107.67 +					write32(addr, val);
  107.68 +				} else {
  107.69 +					val = getreg32(regs, r);
  107.70 +					write16(addr, MASK16(val));
  107.71 +				}
  107.72 +				TRACE((regs, regs->eip - eip,
  107.73 +					"mov %%%s, *0x%x", rnames[r], addr));
  107.74 +				return OPC_EMULATED;
  107.75 +			}
  107.76  		case 0x8B: /* addr32 mov r/m16, r16 */
  107.77  			if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
  107.78  				goto invalid;
  107.79 @@ -1326,6 +1372,37 @@ opcode(struct regs *regs)
  107.80  			regs->eflags |= EFLAGS_VM;
  107.81  			return OPC_EMULATED;
  107.82  
  107.83 +		case 0xA1: /* mov ax, r/m16 */ 
  107.84 +			{
  107.85 +				int addr, data;
  107.86 +				int seg = segment(prefix, regs, regs->vds);
  107.87 +				if (prefix & DATA32) {
  107.88 +					addr = address(regs, seg, fetch32(regs));
  107.89 +					data = read32(addr);
  107.90 +					setreg32(regs, 0, data);
  107.91 +				} else {
  107.92 +					addr = address(regs, seg, fetch16(regs));
  107.93 +					data = read16(addr);
  107.94 +					setreg16(regs, 0, data);
  107.95 +				}
  107.96 +				TRACE((regs, regs->eip - eip, "mov *0x%x, %%ax", addr));
  107.97 +			}
  107.98 +			return OPC_EMULATED;
  107.99 +
 107.100 +		case 0xBB: /* mov bx, imm16 */
 107.101 +			{
 107.102 +				int data;
 107.103 +				if (prefix & DATA32) {
 107.104 +					data = fetch32(regs);
 107.105 +					setreg32(regs, 3, data);
 107.106 +				} else {
 107.107 +					data = fetch16(regs);
 107.108 +					setreg16(regs, 3, data);
 107.109 +				}
 107.110 +				TRACE((regs, regs->eip - eip, "mov $0x%x, %%bx", data));
 107.111 +			}
 107.112 +			return OPC_EMULATED;
 107.113 +
 107.114  		case 0xC6: /* addr32 movb $imm, r/m8 */
 107.115                          if ((prefix & ADDR32) == 0)
 107.116                                  goto invalid;
 107.117 @@ -1380,21 +1457,25 @@ opcode(struct regs *regs)
 107.118  			goto invalid;
 107.119  
 107.120  		case 0xFF: /* jmpl (indirect) */
 107.121 -			if ((mode == VM86_REAL_TO_PROTECTED) ||
 107.122 -			    (mode == VM86_PROTECTED_TO_REAL)) {
 107.123 -			 	unsigned modrm = fetch8(regs);
 107.124 -				
 107.125 +			{
 107.126 +				unsigned modrm = fetch8(regs);
 107.127  				switch((modrm >> 3) & 7) {
 107.128 -				case 5:
 107.129 -				  jmpl_indirect(regs, prefix, modrm);
 107.130 -				  return OPC_INVALID;
 107.131 +				case 5: /* jmpl (indirect) */
 107.132 +					if ((mode == VM86_REAL_TO_PROTECTED) ||
 107.133 +					    (mode == VM86_PROTECTED_TO_REAL)) {
 107.134 +						jmpl_indirect(regs, prefix, modrm);
 107.135 +						return OPC_INVALID;
 107.136 +					}
 107.137 +					goto invalid;
 107.138 +
 107.139 +				case 6: /* push r/m16 */
 107.140 +					pushrm(regs, prefix, modrm);
 107.141 +					return OPC_EMULATED;
 107.142  
 107.143  				default:
 107.144 -				  break;
 107.145 +					goto invalid;
 107.146  				}
 107.147 -
 107.148  			}
 107.149 -			goto invalid;
 107.150  
 107.151  		case 0xEB: /* short jump */
 107.152  			if ((mode == VM86_REAL_TO_PROTECTED) ||
   108.1 --- a/tools/firmware/vmxassist/vmxassist.ld	Tue Aug 22 14:45:49 2006 -0600
   108.2 +++ b/tools/firmware/vmxassist/vmxassist.ld	Wed Aug 23 11:11:27 2006 -0600
   108.3 @@ -6,27 +6,27 @@ ENTRY(_start)
   108.4  
   108.5  SECTIONS
   108.6  {
   108.7 +	_btext = .;
   108.8  	.text TEXTADDR : 
   108.9  	{
  108.10 -		_btext = .;
  108.11  		*(.text)
  108.12  		*(.rodata)
  108.13  		*(.rodata.*)
  108.14 -		_etext = .;
  108.15  	}
  108.16 +	_etext = .;
  108.17  
  108.18 +	_bdata = .;
  108.19  	.data :
  108.20  	{
  108.21 -		_bdata = .;
  108.22  		*(.data)
  108.23 -		_edata = .;
  108.24  	}
  108.25 +	_edata = .;
  108.26  
  108.27 +	_bbss = .;
  108.28  	.bss :
  108.29  	{
  108.30 -		_bbss = .;
  108.31  		*(.bss)
  108.32 -		_ebss = .;
  108.33  	}
  108.34 +	_ebss = .;
  108.35  }
  108.36  
   109.1 --- a/tools/ioemu/Makefile.target	Tue Aug 22 14:45:49 2006 -0600
   109.2 +++ b/tools/ioemu/Makefile.target	Wed Aug 23 11:11:27 2006 -0600
   109.3 @@ -359,6 +359,7 @@ VL_OBJS+= cirrus_vga.o mixeng.o parallel
   109.4  VL_OBJS+= usb-uhci.o
   109.5  VL_OBJS+= piix4acpi.o
   109.6  VL_OBJS+= xenstore.o
   109.7 +VL_OBJS+= xen_platform.o
   109.8  DEFINES += -DHAS_AUDIO
   109.9  endif
  109.10  ifeq ($(TARGET_BASE_ARCH), ppc)
   110.1 --- a/tools/ioemu/block-bochs.c	Tue Aug 22 14:45:49 2006 -0600
   110.2 +++ b/tools/ioemu/block-bochs.c	Wed Aug 23 11:11:27 2006 -0600
   110.3 @@ -91,7 +91,7 @@ static int bochs_open(BlockDriverState *
   110.4      int fd, i;
   110.5      struct bochs_header bochs;
   110.6  
   110.7 -    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE | O_SYNC);
   110.8 +    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
   110.9      if (fd < 0) {
  110.10          fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
  110.11          if (fd < 0)
   111.1 --- a/tools/ioemu/block-cloop.c	Tue Aug 22 14:45:49 2006 -0600
   111.2 +++ b/tools/ioemu/block-cloop.c	Wed Aug 23 11:11:27 2006 -0600
   111.3 @@ -55,7 +55,7 @@ static int cloop_open(BlockDriverState *
   111.4      BDRVCloopState *s = bs->opaque;
   111.5      uint32_t offsets_size,max_compressed_block_size=1,i;
   111.6  
   111.7 -    s->fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE | O_SYNC);
   111.8 +    s->fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
   111.9      if (s->fd < 0)
  111.10          return -1;
  111.11      bs->read_only = 1;
   112.1 --- a/tools/ioemu/block-cow.c	Tue Aug 22 14:45:49 2006 -0600
   112.2 +++ b/tools/ioemu/block-cow.c	Wed Aug 23 11:11:27 2006 -0600
   112.3 @@ -69,7 +69,7 @@ static int cow_open(BlockDriverState *bs
   112.4      struct cow_header_v2 cow_header;
   112.5      int64_t size;
   112.6  
   112.7 -    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE | O_SYNC);
   112.8 +    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
   112.9      if (fd < 0) {
  112.10          fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
  112.11          if (fd < 0)
   113.1 --- a/tools/ioemu/block-qcow.c	Tue Aug 22 14:45:49 2006 -0600
   113.2 +++ b/tools/ioemu/block-qcow.c	Wed Aug 23 11:11:27 2006 -0600
   113.3 @@ -95,7 +95,7 @@ static int qcow_open(BlockDriverState *b
   113.4      int fd, len, i, shift;
   113.5      QCowHeader header;
   113.6      
   113.7 -    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE | O_SYNC);
   113.8 +    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
   113.9      if (fd < 0) {
  113.10          fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
  113.11          if (fd < 0)
   114.1 --- a/tools/ioemu/block-vmdk.c	Tue Aug 22 14:45:49 2006 -0600
   114.2 +++ b/tools/ioemu/block-vmdk.c	Wed Aug 23 11:11:27 2006 -0600
   114.3 @@ -96,7 +96,7 @@ static int vmdk_open(BlockDriverState *b
   114.4      uint32_t magic;
   114.5      int l1_size;
   114.6  
   114.7 -    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE | O_SYNC);
   114.8 +    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
   114.9      if (fd < 0) {
  114.10          fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
  114.11          if (fd < 0)
   115.1 --- a/tools/ioemu/block.c	Tue Aug 22 14:45:49 2006 -0600
   115.2 +++ b/tools/ioemu/block.c	Wed Aug 23 11:11:27 2006 -0600
   115.3 @@ -685,7 +685,7 @@ static int raw_open(BlockDriverState *bs
   115.4      int rv;
   115.5  #endif
   115.6  
   115.7 -    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE | O_SYNC);
   115.8 +    fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
   115.9      if (fd < 0) {
  115.10          fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
  115.11          if (fd < 0)
   116.1 --- a/tools/ioemu/hw/ide.c	Tue Aug 22 14:45:49 2006 -0600
   116.2 +++ b/tools/ioemu/hw/ide.c	Wed Aug 23 11:11:27 2006 -0600
   116.3 @@ -305,6 +305,7 @@ typedef struct IDEState {
   116.4      PCIDevice *pci_dev;
   116.5      struct BMDMAState *bmdma;
   116.6      int drive_serial;
   116.7 +    int write_cache;
   116.8      /* ide regs */
   116.9      uint8_t feature;
  116.10      uint8_t error;
  116.11 @@ -789,6 +790,9 @@ static void ide_sector_write(IDEState *s
  116.12      }
  116.13      ide_set_sector(s, sector_num + n);
  116.14      
  116.15 +    if (!s->write_cache)
  116.16 +        bdrv_flush(s->bs);
  116.17 +    
  116.18  #ifdef TARGET_I386
  116.19      if (win2k_install_hack && ((++s->irq_count % 16) == 0)) {
  116.20          /* It seems there is a bug in the Windows 2000 installer HDD
  116.21 @@ -863,6 +867,10 @@ static int ide_write_dma_cb(IDEState *s,
  116.22          transfer_size -= len;
  116.23          phys_addr += len;
  116.24      }
  116.25 +    /* Ensure the data hit disk before telling the guest OS so. */
  116.26 +    if (!s->write_cache)
  116.27 +        bdrv_flush(s->bs);
  116.28 +
  116.29      return transfer_size1 - transfer_size;
  116.30  }
  116.31  
  116.32 @@ -1672,7 +1680,15 @@ static void ide_ioport_write(void *opaqu
  116.33              /* XXX: valid for CDROM ? */
  116.34              switch(s->feature) {
  116.35              case 0x02: /* write cache enable */
  116.36 +                s->write_cache = 1;
  116.37 +                s->status = READY_STAT | SEEK_STAT;
  116.38 +                ide_set_irq(s);
  116.39 +                break;
  116.40              case 0x82: /* write cache disable */
  116.41 +                s->write_cache = 0;
  116.42 +                s->status = READY_STAT | SEEK_STAT;
  116.43 +                ide_set_irq(s);
  116.44 +                break;
  116.45              case 0xaa: /* read look-ahead enable */
  116.46              case 0x55: /* read look-ahead disable */
  116.47                  s->status = READY_STAT | SEEK_STAT;
  116.48 @@ -2090,6 +2106,7 @@ static void ide_init2(IDEState *ide_stat
  116.49          s->irq = irq;
  116.50          s->sector_write_timer = qemu_new_timer(vm_clock, 
  116.51                                                 ide_sector_write_timer_cb, s);
  116.52 +        s->write_cache = 0;
  116.53          ide_reset(s);
  116.54      }
  116.55  }
   117.1 --- a/tools/ioemu/hw/pc.c	Tue Aug 22 14:45:49 2006 -0600
   117.2 +++ b/tools/ioemu/hw/pc.c	Wed Aug 23 11:11:27 2006 -0600
   117.3 @@ -158,8 +158,23 @@ static void cmos_init_hd(int type_ofs, i
   117.4      rtc_set_memory(s, info_ofs + 8, sectors);
   117.5  }
   117.6  
   117.7 +static int get_bios_disk(char *boot_device, int index) {
   117.8 +
   117.9 +    if (index < strlen(boot_device)) {
  117.10 +        switch (boot_device[index]) {
  117.11 +        case 'a':
  117.12 +            return 0x01;            /* floppy */
  117.13 +        case 'c':
  117.14 +            return 0x02;            /* hard drive */
  117.15 +        case 'd':
  117.16 +            return 0x03;            /* cdrom */
  117.17 +        }
  117.18 +    }
  117.19 +    return 0x00;                /* no device */
  117.20 +}
  117.21 +
  117.22  /* hd_table must contain 4 block drivers */
  117.23 -static void cmos_init(uint64_t ram_size, int boot_device, BlockDriverState **hd_table, time_t timeoffset)
  117.24 +static void cmos_init(uint64_t ram_size, char *boot_device, BlockDriverState **hd_table, time_t timeoffset)
  117.25  {
  117.26      RTCState *s = rtc_state;
  117.27      int val;
  117.28 @@ -205,21 +220,14 @@ static void cmos_init(uint64_t ram_size,
  117.29      rtc_set_memory(s, 0x34, val);
  117.30      rtc_set_memory(s, 0x35, val >> 8);
  117.31      
  117.32 -    switch(boot_device) {
  117.33 -    case 'a':
  117.34 -    case 'b':
  117.35 -        rtc_set_memory(s, 0x3d, 0x01); /* floppy boot */
  117.36 -        if (!fd_bootchk)
  117.37 -            rtc_set_memory(s, 0x38, 0x01); /* disable signature check */
  117.38 -        break;
  117.39 -    default:
  117.40 -    case 'c':
  117.41 -        rtc_set_memory(s, 0x3d, 0x02); /* hard drive boot */
  117.42 -        break;
  117.43 -    case 'd':
  117.44 -        rtc_set_memory(s, 0x3d, 0x03); /* CD-ROM boot */
  117.45 -        break;
  117.46 +    if (boot_device == NULL) {
  117.47 +        /* default to hd, then cd, then floppy. */
  117.48 +        boot_device = "cda";
  117.49      }
  117.50 +    rtc_set_memory(s, 0x3d, get_bios_disk(boot_device, 0) |
  117.51 +                   (get_bios_disk(boot_device, 1) << 4));
  117.52 +    rtc_set_memory(s, 0x38, (get_bios_disk(boot_device, 2) << 4) |
  117.53 +                   (!fd_bootchk ? 0x01 : 0x00));
  117.54  
  117.55      /* floppy type */
  117.56  
  117.57 @@ -572,9 +580,6 @@ static int serial_irq[MAX_SERIAL_PORTS] 
  117.58  static int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc };
  117.59  static int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 };
  117.60  
  117.61 -/* PIIX4 acpi pci configuration space, func 3 */
  117.62 -extern void pci_piix4_acpi_init(PCIBus *bus, int devfn);
  117.63 -
  117.64  #ifdef HAS_AUDIO
  117.65  static void audio_init (PCIBus *pci_bus)
  117.66  {
  117.67 @@ -620,7 +625,7 @@ static void pc_init_ne2k_isa(NICInfo *nd
  117.68  #define NOBIOS 1
  117.69  
  117.70  /* PC hardware initialisation */
  117.71 -static void pc_init1(uint64_t ram_size, int vga_ram_size, int boot_device,
  117.72 +static void pc_init1(uint64_t ram_size, int vga_ram_size, char *boot_device,
  117.73                       DisplayState *ds, const char **fd_filename, int snapshot,
  117.74                       const char *kernel_filename, const char *kernel_cmdline,
  117.75                       const char *initrd_filename, time_t timeoffset,
  117.76 @@ -826,6 +831,9 @@ static void pc_init1(uint64_t ram_size, 
  117.77      }
  117.78  #endif /* !CONFIG_DM */
  117.79  
  117.80 +    if (pci_enabled)
  117.81 +        pci_xen_platform_init(pci_bus);
  117.82 +
  117.83      for(i = 0; i < MAX_SERIAL_PORTS; i++) {
  117.84          if (serial_hds[i]) {
  117.85              serial_init(&pic_set_irq_new, isa_pic,
  117.86 @@ -879,15 +887,17 @@ static void pc_init1(uint64_t ram_size, 
  117.87  
  117.88      /* using PIIX4 acpi model */
  117.89      if (pci_enabled && acpi_enabled)
  117.90 -        pci_piix4_acpi_init(pci_bus, piix3_devfn + 3);
  117.91 +        pci_piix4_acpi_init(pci_bus, piix3_devfn + 2);
  117.92  
  117.93      if (pci_enabled && usb_enabled) {
  117.94 -        usb_uhci_init(pci_bus, piix3_devfn + 2);
  117.95 +        usb_uhci_init(pci_bus, piix3_devfn + (acpi_enabled ? 3 : 2));
  117.96      }
  117.97  
  117.98 -    if (pci_enabled && acpi_enabled && 0) {
  117.99 +#ifndef CONFIG_DM
 117.100 +    if (pci_enabled && acpi_enabled) {
 117.101          piix4_pm_init(pci_bus, piix3_devfn + 3);
 117.102      }
 117.103 +#endif /* !CONFIG_DM */
 117.104  
 117.105  #if 0
 117.106      /* ??? Need to figure out some way for the user to
 117.107 @@ -910,12 +920,14 @@ static void pc_init1(uint64_t ram_size, 
 117.108      /* XXX: should be done in the Bochs BIOS */
 117.109      if (pci_enabled) {
 117.110          pci_bios_init();
 117.111 +#ifndef CONFIG_DM
 117.112          if (acpi_enabled)
 117.113              acpi_bios_init();
 117.114 +#endif /* !CONFIG_DM */
 117.115      }
 117.116  }
 117.117  
 117.118 -static void pc_init_pci(uint64_t ram_size, int vga_ram_size, int boot_device,
 117.119 +static void pc_init_pci(uint64_t ram_size, int vga_ram_size, char *boot_device,
 117.120                          DisplayState *ds, const char **fd_filename, 
 117.121                          int snapshot, 
 117.122                          const char *kernel_filename, 
 117.123 @@ -929,7 +941,7 @@ static void pc_init_pci(uint64_t ram_siz
 117.124               initrd_filename, timeoffset, 1);
 117.125  }
 117.126  
 117.127 -static void pc_init_isa(uint64_t ram_size, int vga_ram_size, int boot_device,
 117.128 +static void pc_init_isa(uint64_t ram_size, int vga_ram_size, char *boot_device,
 117.129                          DisplayState *ds, const char **fd_filename, 
 117.130                          int snapshot, 
 117.131                          const char *kernel_filename, 
   118.1 --- a/tools/ioemu/hw/piix4acpi.c	Tue Aug 22 14:45:49 2006 -0600
   118.2 +++ b/tools/ioemu/hw/piix4acpi.c	Wed Aug 23 11:11:27 2006 -0600
   118.3 @@ -374,13 +374,13 @@ static void acpi_map(PCIDevice *pci_dev,
   118.4      register_ioport_read(addr + 8, 4, 4, acpiPm1Timer_readl, d);
   118.5  }
   118.6  
   118.7 -/* PIIX4 acpi pci configuration space, func 3 */
   118.8 +/* PIIX4 acpi pci configuration space, func 2 */
   118.9  void pci_piix4_acpi_init(PCIBus *bus, int devfn)
  118.10  {
  118.11      PCIAcpiState *d;
  118.12      uint8_t *pci_conf;
  118.13  
  118.14 -    /* register a function 3 of PIIX4 */
  118.15 +    /* register a function 2 of PIIX4 */
  118.16      d = (PCIAcpiState *)pci_register_device(
  118.17          bus, "PIIX4 ACPI", sizeof(PCIAcpiState),
  118.18          devfn, NULL, NULL);
   119.1 --- a/tools/ioemu/hw/piix_pci.c	Tue Aug 22 14:45:49 2006 -0600
   119.2 +++ b/tools/ioemu/hw/piix_pci.c	Wed Aug 23 11:11:27 2006 -0600
   119.3 @@ -415,7 +415,7 @@ void pci_bios_init(void)
   119.4      uint8_t elcr[2];
   119.5  
   119.6      pci_bios_io_addr = 0xc000;
   119.7 -    pci_bios_mem_addr = 0xf0000000;
   119.8 +    pci_bios_mem_addr = HVM_BELOW_4G_MMIO_START;
   119.9  
  119.10      /* activate IRQ mappings */
  119.11      elcr[0] = 0x00;
   120.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   120.2 +++ b/tools/ioemu/hw/xen_platform.c	Wed Aug 23 11:11:27 2006 -0600
   120.3 @@ -0,0 +1,138 @@
   120.4 +/*
   120.5 + * XEN platform fake pci device, formerly known as the event channel device
   120.6 + * 
   120.7 + * Copyright (c) 2003-2004 Intel Corp.
   120.8 + * Copyright (c) 2006 XenSource
   120.9 + * 
  120.10 + * Permission is hereby granted, free of charge, to any person obtaining a copy
  120.11 + * of this software and associated documentation files (the "Software"), to deal
  120.12 + * in the Software without restriction, including without limitation the rights
  120.13 + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  120.14 + * copies of the Software, and to permit persons to whom the Software is
  120.15 + * furnished to do so, subject to the following conditions:
  120.16 + *
  120.17 + * The above copyright notice and this permission notice shall be included in
  120.18 + * all copies or substantial portions of the Software.
  120.19 + *
  120.20 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  120.21 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  120.22 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  120.23 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  120.24 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  120.25 + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  120.26 + * THE SOFTWARE.
  120.27 + */
  120.28 +#include "vl.h"
  120.29 +
  120.30 +#include <xenguest.h>
  120.31 +#include <xc_private.h>
  120.32 +
  120.33 +extern FILE *logfile;
  120.34 +
  120.35 +static void platform_ioport_write(void *opaque, uint32_t addr, uint32_t val)
  120.36 +{
  120.37 +    return;
  120.38 +}
  120.39 +
  120.40 +static uint32_t platform_ioport_read(void *opaque, uint32_t addr)
  120.41 +{
  120.42 +    return 0;
  120.43 +}
  120.44 +
  120.45 +static void platform_ioport_map(PCIDevice *pci_dev, int region_num,
  120.46 +                                uint32_t addr, uint32_t size, int type)
  120.47 +{
  120.48 +    register_ioport_write(addr, 16, 4, platform_ioport_write, NULL);
  120.49 +    register_ioport_read(addr, 16, 1, platform_ioport_read, NULL);
  120.50 +}
  120.51 +
  120.52 +static uint32_t platform_mmio_read(void *opaque, target_phys_addr_t addr)
  120.53 +{
  120.54 +    fprintf(logfile, "Warning: try read from xen platform mmio space\n");
  120.55 +    return 0;
  120.56 +}
  120.57 +
  120.58 +static void platform_mmio_write(void *opaque, target_phys_addr_t addr,
  120.59 +			       uint32_t val)
  120.60 +{
  120.61 +    fprintf(logfile, "Warning: try write to xen platform mmio space\n");
  120.62 +    return;
  120.63 +}
  120.64 +
  120.65 +static CPUReadMemoryFunc *platform_mmio_read_funcs[3] = {
  120.66 +    platform_mmio_read,
  120.67 +    platform_mmio_read,
  120.68 +    platform_mmio_read,
  120.69 +};
  120.70 +
  120.71 +static CPUWriteMemoryFunc *platform_mmio_write_funcs[3] = {
  120.72 +    platform_mmio_write,
  120.73 +    platform_mmio_write,
  120.74 +    platform_mmio_write,
  120.75 +};
  120.76 +
  120.77 +static void platform_mmio_map(PCIDevice *d, int region_num,
  120.78 +                              uint32_t addr, uint32_t size, int type)
  120.79 +{
  120.80 +    int mmio_io_addr;
  120.81 +
  120.82 +    mmio_io_addr = cpu_register_io_memory(0, platform_mmio_read_funcs,
  120.83 +                                          platform_mmio_write_funcs, NULL);
  120.84 +
  120.85 +    cpu_register_physical_memory(addr, 0x1000000, mmio_io_addr);
  120.86 +}
  120.87 +
  120.88 +struct pci_config_header {
  120.89 +    uint16_t vendor_id;
  120.90 +    uint16_t device_id;
  120.91 +    uint16_t command;
  120.92 +    uint16_t status;
  120.93 +    uint8_t  revision;
  120.94 +    uint8_t  api;
  120.95 +    uint8_t  subclass;
  120.96 +    uint8_t  class;
  120.97 +    uint8_t  cache_line_size; /* Units of 32 bit words */
  120.98 +    uint8_t  latency_timer; /* In units of bus cycles */
  120.99 +    uint8_t  header_type; /* Should be 0 */
 120.100 +    uint8_t  bist; /* Built in self test */
 120.101 +    uint32_t base_address_regs[6];
 120.102 +    uint32_t reserved1;
 120.103 +    uint32_t reserved2;
 120.104 +    uint32_t rom_addr;
 120.105 +    uint32_t reserved3;
 120.106 +    uint32_t reserved4;
 120.107 +    uint8_t  interrupt_line;
 120.108 +    uint8_t  interrupt_pin;
 120.109 +    uint8_t  min_gnt;
 120.110 +    uint8_t  max_lat;
 120.111 +};
 120.112 +
 120.113 +void pci_xen_platform_init(PCIBus *bus)
 120.114 +{
 120.115 +    PCIDevice *d;
 120.116 +    struct pci_config_header *pch;
 120.117 +
 120.118 +    printf("Register xen platform.\n");
 120.119 +    d = pci_register_device(bus, "xen-platform", sizeof(PCIDevice), -1, NULL,
 120.120 +			    NULL);
 120.121 +    pch = (struct pci_config_header *)d->config;
 120.122 +    pch->vendor_id = 0xfffd;
 120.123 +    pch->device_id = 0x0101;
 120.124 +    pch->command = 3; /* IO and memory access */
 120.125 +    pch->revision = 0;
 120.126 +    pch->api = 0;
 120.127 +    pch->subclass = 0x80; /* Other */
 120.128 +    pch->class = 0xff; /* Unclassified device class */
 120.129 +    pch->header_type = 0;
 120.130 +    pch->interrupt_pin = 1;
 120.131 +
 120.132 +    pci_register_io_region(d, 0, 0x100, PCI_ADDRESS_SPACE_IO,
 120.133 +                           platform_ioport_map);
 120.134 +
 120.135 +    /* reserve 16MB mmio address for share memory*/
 120.136 +    pci_register_io_region(d, 1, 0x1000000, PCI_ADDRESS_SPACE_MEM_PREFETCH,
 120.137 +			   platform_mmio_map);
 120.138 +
 120.139 +    register_savevm("platform", 0, 1, generic_pci_save, generic_pci_load, d);
 120.140 +    printf("Done register platform.\n");
 120.141 +}
   121.1 --- a/tools/ioemu/patches/acpi-poweroff-support	Tue Aug 22 14:45:49 2006 -0600
   121.2 +++ b/tools/ioemu/patches/acpi-poweroff-support	Wed Aug 23 11:11:27 2006 -0600
   121.3 @@ -1,7 +1,7 @@
   121.4  Index: ioemu/hw/piix4acpi.c
   121.5  ===================================================================
   121.6 ---- ioemu.orig/hw/piix4acpi.c	2006-08-06 02:30:29.288761563 +0100
   121.7 -+++ ioemu/hw/piix4acpi.c	2006-08-06 02:30:42.131331446 +0100
   121.8 +--- ioemu.orig/hw/piix4acpi.c	2006-08-17 19:50:05.060576667 +0100
   121.9 ++++ ioemu/hw/piix4acpi.c	2006-08-17 19:50:07.563300039 +0100
  121.10  @@ -45,6 +45,10 @@
  121.11   #define GBL_RLS           (1 << 2)
  121.12   #define SLP_EN            (1 << 13)
   122.1 --- a/tools/ioemu/patches/acpi-support	Tue Aug 22 14:45:49 2006 -0600
   122.2 +++ b/tools/ioemu/patches/acpi-support	Wed Aug 23 11:11:27 2006 -0600
   122.3 @@ -1,7 +1,7 @@
   122.4  Index: ioemu/Makefile.target
   122.5  ===================================================================
   122.6 ---- ioemu.orig/Makefile.target	2006-08-06 02:23:23.000000000 +0100
   122.7 -+++ ioemu/Makefile.target	2006-08-07 17:38:47.698306442 +0100
   122.8 +--- ioemu.orig/Makefile.target	2006-08-17 19:49:50.228216099 +0100
   122.9 ++++ ioemu/Makefile.target	2006-08-17 19:50:02.405870095 +0100
  122.10  @@ -357,6 +357,7 @@
  122.11   VL_OBJS+= fdc.o mc146818rtc.o serial.o pc.o
  122.12   VL_OBJS+= cirrus_vga.o mixeng.o parallel.o acpi.o piix_pci.o
  122.13 @@ -12,39 +12,44 @@ Index: ioemu/Makefile.target
  122.14   ifeq ($(TARGET_BASE_ARCH), ppc)
  122.15  Index: ioemu/hw/pc.c
  122.16  ===================================================================
  122.17 ---- ioemu.orig/hw/pc.c	2006-08-06 02:23:45.000000000 +0100
  122.18 -+++ ioemu/hw/pc.c	2006-08-07 17:42:00.939426374 +0100
  122.19 -@@ -572,6 +572,9 @@
  122.20 - static int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc };
  122.21 - static int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 };
  122.22 - 
  122.23 -+/* PIIX4 acpi pci configuration space, func 3 */
  122.24 -+extern void pci_piix4_acpi_init(PCIBus *bus, int devfn);
  122.25 -+
  122.26 - #ifdef HAS_AUDIO
  122.27 - static void audio_init (PCIBus *pci_bus)
  122.28 - {
  122.29 -@@ -874,11 +877,15 @@
  122.30 +--- ioemu.orig/hw/pc.c	2006-08-17 19:49:59.312212039 +0100
  122.31 ++++ ioemu/hw/pc.c	2006-08-17 19:50:02.406869984 +0100
  122.32 +@@ -874,13 +874,19 @@
  122.33   
  122.34       cmos_init(ram_size, boot_device, bs_table, timeoffset);
  122.35   
  122.36  +    /* using PIIX4 acpi model */
  122.37  +    if (pci_enabled && acpi_enabled)
  122.38 -+        pci_piix4_acpi_init(pci_bus, piix3_devfn + 3);
  122.39 ++        pci_piix4_acpi_init(pci_bus, piix3_devfn + 2);
  122.40  +
  122.41       if (pci_enabled && usb_enabled) {
  122.42 -         usb_uhci_init(pci_bus, piix3_devfn + 2);
  122.43 +-        usb_uhci_init(pci_bus, piix3_devfn + 2);
  122.44 ++        usb_uhci_init(pci_bus, piix3_devfn + (acpi_enabled ? 3 : 2));
  122.45       }
  122.46   
  122.47 --    if (pci_enabled && acpi_enabled) {
  122.48 -+    if (pci_enabled && acpi_enabled && 0) {
  122.49 ++#ifndef CONFIG_DM
  122.50 +     if (pci_enabled && acpi_enabled) {
  122.51           piix4_pm_init(pci_bus, piix3_devfn + 3);
  122.52       }
  122.53 ++#endif /* !CONFIG_DM */
  122.54 + 
  122.55 + #if 0
  122.56 +     /* ??? Need to figure out some way for the user to
  122.57 +@@ -903,8 +909,10 @@
  122.58 +     /* XXX: should be done in the Bochs BIOS */
  122.59 +     if (pci_enabled) {
  122.60 +         pci_bios_init();
  122.61 ++#ifndef CONFIG_DM
  122.62 +         if (acpi_enabled)
  122.63 +             acpi_bios_init();
  122.64 ++#endif /* !CONFIG_DM */
  122.65 +     }
  122.66 + }
  122.67   
  122.68  Index: ioemu/hw/piix4acpi.c
  122.69  ===================================================================
  122.70  --- /dev/null	1970-01-01 00:00:00.000000000 +0000
  122.71 -+++ ioemu/hw/piix4acpi.c	2006-08-07 17:41:41.932577728 +0100
  122.72 ++++ ioemu/hw/piix4acpi.c	2006-08-17 19:50:02.407869874 +0100
  122.73  @@ -0,0 +1,388 @@
  122.74  +/*
  122.75  + * PIIX4 ACPI controller emulation
  122.76 @@ -405,13 +410,13 @@ Index: ioemu/hw/piix4acpi.c
  122.77  +}
  122.78  +													
  122.79  +
  122.80 -+/* PIIX4 acpi pci configuration space, func 3 */
  122.81 ++/* PIIX4 acpi pci configuration space, func 2 */
  122.82  +void pci_piix4_acpi_init(PCIBus *bus, int devfn)
  122.83  +{
  122.84  +    PCIAcpiState *d;
  122.85  +    uint8_t *pci_conf;
  122.86  +
  122.87 -+    /* register a function 3 of PIIX4 */
  122.88 ++    /* register a function 2 of PIIX4 */
  122.89  +    d = (PCIAcpiState *)pci_register_device(
  122.90  +        bus, "PIIX4 ACPI", sizeof(PCIAcpiState),
  122.91  +        devfn, NULL, NULL);
  122.92 @@ -436,8 +441,8 @@ Index: ioemu/hw/piix4acpi.c
  122.93  +}
  122.94  Index: ioemu/vl.c
  122.95  ===================================================================
  122.96 ---- ioemu.orig/vl.c	2006-08-06 02:23:45.000000000 +0100
  122.97 -+++ ioemu/vl.c	2006-08-07 17:41:40.613727012 +0100
  122.98 +--- ioemu.orig/vl.c	2006-08-17 19:49:59.315211708 +0100
  122.99 ++++ ioemu/vl.c	2006-08-17 19:50:02.410869542 +0100
 122.100  @@ -156,7 +156,7 @@
 122.101   #else
 122.102   #define MAX_CPUS 1
 122.103 @@ -483,9 +488,9 @@ Index: ioemu/vl.c
 122.104       }
 122.105  Index: ioemu/vl.h
 122.106  ===================================================================
 122.107 ---- ioemu.orig/vl.h	2006-08-06 02:23:45.000000000 +0100
 122.108 -+++ ioemu/vl.h	2006-08-07 17:38:47.847289567 +0100
 122.109 -@@ -167,6 +167,7 @@
 122.110 +--- ioemu.orig/vl.h	2006-08-17 19:49:59.316211597 +0100
 122.111 ++++ ioemu/vl.h	2006-08-17 19:50:02.411869432 +0100
 122.112 +@@ -168,6 +168,7 @@
 122.113   extern int kqemu_allowed;
 122.114   extern int win2k_install_hack;
 122.115   extern int usb_enabled;
 122.116 @@ -493,10 +498,20 @@ Index: ioemu/vl.h
 122.117   extern int smp_cpus;
 122.118   
 122.119   /* XXX: make it dynamic */
 122.120 +@@ -923,6 +924,9 @@
 122.121 + void piix4_pm_init(PCIBus *bus, int devfn);
 122.122 + void acpi_bios_init(void);
 122.123 + 
 122.124 ++/* piix4acpi.c */
 122.125 ++extern void pci_piix4_acpi_init(PCIBus *bus, int devfn);
 122.126 ++
 122.127 + /* pc.c */
 122.128 + extern QEMUMachine pc_machine;
 122.129 + extern QEMUMachine isapc_machine;
 122.130  Index: ioemu/hw/piix_pci.c
 122.131  ===================================================================
 122.132 ---- ioemu.orig/hw/piix_pci.c	2006-08-06 02:29:41.000000000 +0100
 122.133 -+++ ioemu/hw/piix_pci.c	2006-08-07 17:38:57.480198468 +0100
 122.134 +--- ioemu.orig/hw/piix_pci.c	2006-08-17 19:38:05.806252180 +0100
 122.135 ++++ ioemu/hw/piix_pci.c	2006-08-17 19:50:02.411869432 +0100
 122.136  @@ -241,7 +241,7 @@
 122.137   static uint32_t pci_bios_io_addr;
 122.138   static uint32_t pci_bios_mem_addr;
   123.1 --- a/tools/ioemu/patches/acpi-timer-support	Tue Aug 22 14:45:49 2006 -0600
   123.2 +++ b/tools/ioemu/patches/acpi-timer-support	Wed Aug 23 11:11:27 2006 -0600
   123.3 @@ -1,7 +1,7 @@
   123.4  Index: ioemu/hw/piix4acpi.c
   123.5  ===================================================================
   123.6 ---- ioemu.orig/hw/piix4acpi.c	2006-08-06 02:24:54.262068457 +0100
   123.7 -+++ ioemu/hw/piix4acpi.c	2006-08-06 02:30:29.288761563 +0100
   123.8 +--- ioemu.orig/hw/piix4acpi.c	2006-08-17 19:50:02.407869874 +0100
   123.9 ++++ ioemu/hw/piix4acpi.c	2006-08-17 19:50:05.060576667 +0100
  123.10  @@ -24,31 +24,30 @@
  123.11    */
  123.12   
  123.13 @@ -184,8 +184,8 @@ Index: ioemu/hw/piix4acpi.c
  123.14   }
  123.15  -													
  123.16   
  123.17 - /* PIIX4 acpi pci configuration space, func 3 */
  123.18 - void pci_piix4_acpi_init(PCIBus *bus)
  123.19 + /* PIIX4 acpi pci configuration space, func 2 */
  123.20 + void pci_piix4_acpi_init(PCIBus *bus, int devfn)
  123.21  @@ -384,5 +383,5 @@
  123.22       pci_register_io_region((PCIDevice *)d, 4, 0x10,
  123.23                              PCI_ADDRESS_SPACE_IO, acpi_map);
   124.1 --- a/tools/ioemu/patches/domain-destroy	Tue Aug 22 14:45:49 2006 -0600
   124.2 +++ b/tools/ioemu/patches/domain-destroy	Wed Aug 23 11:11:27 2006 -0600
   124.3 @@ -1,7 +1,7 @@
   124.4  Index: ioemu/monitor.c