ia64/xen-unstable

changeset 19106:79f259a26a11

merge with xen-unstable.hg
author Isaku Yamahata <yamahata@valinux.co.jp>
date Wed Jan 28 13:06:45 2009 +0900 (2009-01-28)
parents 4fd4dcf2f891 31983c30c460
children 254021201b1b
files tools/firmware/rombios/32bitgateway.h xen/arch/ia64/tools/p2m_foreign/Makefile xen/arch/ia64/tools/p2m_foreign/p2m_foreign.c xen/include/public/elfstructs.h xen/include/public/libelf.h
line diff
     1.1 --- a/Config.mk	Wed Jan 28 12:22:58 2009 +0900
     1.2 +++ b/Config.mk	Wed Jan 28 13:06:45 2009 +0900
     1.3 @@ -1,7 +1,7 @@
     1.4  # -*- mode: Makefile; -*-
     1.5  
     1.6  # A debug build of Xen and tools?
     1.7 -debug ?= n
     1.8 +debug ?= y ## TEMPORARILY ENABLED
     1.9  
    1.10  XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
    1.11                           -e s/i86pc/x86_32/ -e s/amd64/x86_64/)
    1.12 @@ -38,6 +38,15 @@ endif
    1.13  cc-option = $(shell if test -z "`$(1) $(2) -S -o /dev/null -xc \
    1.14                /dev/null 2>&1`"; then echo "$(2)"; else echo "$(3)"; fi ;)
    1.15  
    1.16 +# cc-option-add: Add an option to compilation flags, but only if supported.
    1.17 +# Usage: $(call cc-option-add CFLAGS,CC,-march=winchip-c6)
    1.18 +cc-option-add = $(eval $(call cc-option-add-closure,$(1),$(2),$(3)))
    1.19 +define cc-option-add-closure
    1.20 +    ifneq ($$(call cc-option,$$($(2)),$(3),n),n)
    1.21 +        $(1) += $(3)
    1.22 +    endif
    1.23 +endef
    1.24 +
    1.25  # cc-ver: Check compiler is at least specified version. Return boolean 'y'/'n'.
    1.26  # Usage: ifeq ($(call cc-ver,$(CC),0x030400),y)
    1.27  cc-ver = $(shell if [ $$((`$(1) -dumpversion | awk -F. \
    1.28 @@ -84,8 +93,8 @@ CFLAGS += -Wall -Wstrict-prototypes
    1.29  # result of any casted expression causes a warning.
    1.30  CFLAGS += -Wno-unused-value
    1.31  
    1.32 -HOSTCFLAGS += $(call cc-option,$(HOSTCC),-Wdeclaration-after-statement,)
    1.33 -CFLAGS     += $(call cc-option,$(CC),-Wdeclaration-after-statement,)
    1.34 +$(call cc-option-add,HOSTCFLAGS,HOSTCC,-Wdeclaration-after-statement)
    1.35 +$(call cc-option-add,CFLAGS,CC,-Wdeclaration-after-statement)
    1.36  
    1.37  LDFLAGS += $(foreach i, $(EXTRA_LIB), -L$(i)) 
    1.38  CFLAGS += $(foreach i, $(EXTRA_INCLUDES), -I$(i))
     2.1 --- a/buildconfigs/mk.linux-2.6-common	Wed Jan 28 12:22:58 2009 +0900
     2.2 +++ b/buildconfigs/mk.linux-2.6-common	Wed Jan 28 13:06:45 2009 +0900
     2.3 @@ -100,10 +100,10 @@ ifneq ($(EXTRAVERSION),)
     2.4  endif
     2.5  	$(__NONINT_CONFIG) $(MAKE) -C $(LINUX_SRCDIR) ARCH=$(LINUX_ARCH) oldconfig O=$$(/bin/pwd)/$(LINUX_DIR)
     2.6  	@set -e ; if [ ! -f $(LINUX_DIR)/Makefile ] ; then \
     2.7 -	    echo "***********************************"; \
     2.8 +	    echo "==================================="; \
     2.9  	    echo "oldconfig did not create a Makefile"; \
    2.10  	    echo "Generating $(LINUX_DIR)/Makefile   "; \
    2.11 -	    echo "***********************************"; \
    2.12 +	    echo "==================================="; \
    2.13  	    ( echo "# Automatically generated: don't edit"; \
    2.14  	      echo ""; \
    2.15  	      echo "VERSION = 2"; \
     3.1 --- a/docs/check_pkgs	Wed Jan 28 12:22:58 2009 +0900
     3.2 +++ b/docs/check_pkgs	Wed Jan 28 13:06:45 2009 +0900
     3.3 @@ -2,12 +2,12 @@
     3.4  silent_which ()
     3.5  {
     3.6          which $1 1>/dev/null 2>/dev/null || {
     3.7 -                echo "*************************************************"
     3.8 -                echo "*************************************************"
     3.9 -                echo "* WARNING: Package '$1' is required"
    3.10 -                echo "*          to build Xen documentation"
    3.11 -                echo "*************************************************"
    3.12 -                echo "*************************************************"
    3.13 +                echo "================================================="
    3.14 +                echo "================================================="
    3.15 +                echo "= WARNING: Package '$1' is required"
    3.16 +                echo "=          to build Xen documentation"
    3.17 +                echo "================================================="
    3.18 +                echo "================================================="
    3.19          }
    3.20          which $1 1>/dev/null 2>/dev/null
    3.21  }
     4.1 --- a/docs/misc/dump-core-format.txt	Wed Jan 28 12:22:58 2009 +0900
     4.2 +++ b/docs/misc/dump-core-format.txt	Wed Jan 28 13:06:45 2009 +0900
     4.3 @@ -30,8 +30,13 @@ The elf header members are set as follow
     4.4          e_ident[EI_OSABI] = ELFOSABI_SYSV = 0
     4.5          e_type = ET_CORE = 4
     4.6  ELFCLASS64 is always used independent of architecture.
     4.7 -e_ident[EI_DATA] and e_flags are set according to the dumping system's
     4.8 -architecture. Other members are set as usual.
     4.9 +e_ident[EI_DATA] is set as follows
    4.10 +  For x86 PV domain case, it is set according to the guest configuration
    4.11 +  (i.e. if guest is 32bit it is set to EM_386 even when the dom0 is 64 bit.)
    4.12 +  For other domain case (x86 HVM domain case and ia64 domain case),
    4.13 +  it is set according to the dumping system's architecture.
    4.14 +e_flags is set according to the dumping system's architecture.
    4.15 +Other members are set as usual.
    4.16  
    4.17  Sections
    4.18  --------
    4.19 @@ -241,3 +246,7 @@ Currently only (major, minor) = (0, 1) i
    4.20    The format version isn't bumped because analysis tools can distinguish it.
    4.21  - .xen_ia64_mapped_regs section was made only for ia64 PV domain.
    4.22    In case of IA64 HVM domain, this section doesn't exist.
    4.23 +- elf header e_ident[EI_DATA]
    4.24 +  On x86 PV domain case, it is set according to the guest configuration.
    4.25 +  I.e. 32-on-64 case, the file will be set EM_386 instead of EM_X86_64.
    4.26 +  This is the same as 32-on-32 case, so there is no impact on analysis tools.
     5.1 --- a/docs/misc/vtd.txt	Wed Jan 28 12:22:58 2009 +0900
     5.2 +++ b/docs/misc/vtd.txt	Wed Jan 28 13:06:45 2009 +0900
     5.3 @@ -38,6 +38,30 @@ Enable MSI/MSI-x for assigned devices
     5.4  Add "msi=1" option in kernel line of host grub.
     5.5  
     5.6  
     5.7 +MSI-INTx translation for passthrough devices in HVM
     5.8 +---------------------------------------------------
     5.9 +
    5.10 +If the assigned device uses a physical IRQ that is shared by more than
    5.11 +one device among multiple domains, there may be significant impact on
    5.12 +device performance. Unfortunately, this is quite a common case if the
    5.13 +IO-APIC (INTx) IRQ is used. MSI can avoid this issue, but was only
    5.14 +available if the guest enables it.
    5.15 +
    5.16 +With MSI-INTx translation turned on, Xen enables device MSI if it's
    5.17 +available, regardless of whether the guest uses INTx or MSI. If the
    5.18 +guest uses INTx IRQ, Xen will inject a translated INTx IRQ to guest's
    5.19 +virtual ioapic whenever an MSI message is received. This reduces the
    5.20 +interrupt sharing of the system. If the guest OS enables MSI or MSI-X,
    5.21 +the translation is automatically turned off.
    5.22 +
    5.23 +To enable or disable MSI-INTx translation globally, add "pci_msitranslate"
    5.24 +in the config file:
    5.25 +	pci_msitranslate = 1         (default is 1)
    5.26 +
    5.27 +To override for a specific device:
    5.28 +	pci = [ '01:00.0,msitranslate=0', '03:00.0' ]
    5.29 +
    5.30 +
    5.31  Caveat on Conventional PCI Device Passthrough
    5.32  ---------------------------------------------
    5.33  
    5.34 @@ -80,6 +104,11 @@ 2 virtual PCI slots (6~7) are reserved i
    5.35  
    5.36  	[root@vt-vtd ~]# xm pci-attach HVMDomainVtd 0:2:0.0 7
    5.37  
    5.38 +    To specify options for the device, use -o or --options=. Following command would disable MSI-INTx translation for the device
    5.39 +
    5.40 +	[root@vt-vtd ~]# xm pci-attach -o msitranslate=0 0:2:0.0 7
    5.41 +
    5.42 +
    5.43  VTd hotplug usage model:
    5.44  ------------------------
    5.45  
     6.1 --- a/stubdom/Makefile	Wed Jan 28 12:22:58 2009 +0900
     6.2 +++ b/stubdom/Makefile	Wed Jan 28 13:06:45 2009 +0900
     6.3 @@ -194,6 +194,7 @@ mk-headers-$(XEN_TARGET_ARCH): ioemu/lin
     6.4            ln -sf $(addprefix ../../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) include/xen && \
     6.5            ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) include/xen && \
     6.6            ( [ -h include/xen/sys ] || ln -sf ../../$(XEN_ROOT)/tools/include/xen-sys/MiniOS include/xen/sys ) && \
     6.7 +          ( [ -h include/xen/libelf ] || ln -sf ../../$(XEN_ROOT)/tools/include/xen/libelf include/xen/libelf ) && \
     6.8  	  mkdir -p include/xen-foreign && \
     6.9  	  ln -sf $(addprefix ../../,$(wildcard $(XEN_ROOT)/tools/include/xen-foreign/*)) include/xen-foreign/ && \
    6.10  	  $(MAKE) -C include/xen-foreign/ && \
     7.1 --- a/stubdom/README	Wed Jan 28 12:22:58 2009 +0900
     7.2 +++ b/stubdom/README	Wed Jan 28 13:06:45 2009 +0900
     7.3 @@ -56,6 +56,11 @@ sdl = 0
     7.4  
     7.5  vfb = [ 'type=sdl' ]
     7.6  
     7.7 +    by default qemu will use sdl together with opengl for rendering, if
     7.8 +    you do not want qemu to use opengl then also pass opengl=0:
     7.9 +
    7.10 +vfb = [ 'type=sdl, opengl=0' ]
    7.11 +
    7.12  * Using a VNC server in the stub domain
    7.13  
    7.14    - In hvmconfig, set vnclisten to "172.30.206.1" for instance.  Do not use a
     8.1 --- a/tools/Rules.mk	Wed Jan 28 12:22:58 2009 +0900
     8.2 +++ b/tools/Rules.mk	Wed Jan 28 13:06:45 2009 +0900
     8.3 @@ -29,6 +29,10 @@ X11_LDPATH = -L/usr/X11R6/$(LIBLEAFDIR)
     8.4  
     8.5  CFLAGS += -D__XEN_TOOLS__
     8.6  
     8.7 +# Get gcc to generate the dependencies for us.
     8.8 +CFLAGS += -MMD -MF .$(@F).d
     8.9 +DEPS = .*.d
    8.10 +
    8.11  # Enable implicit LFS support *and* explicit LFS names.
    8.12  CFLAGS  += $(shell getconf LFS_CFLAGS)
    8.13  CFLAGS  += -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
    8.14 @@ -59,4 +63,3 @@ subdirs-all subdirs-clean subdirs-instal
    8.15  
    8.16  subdir-all-% subdir-clean-% subdir-install-%: .phony
    8.17  	$(MAKE) -C $* $(patsubst subdir-%-$*,%,$@)
    8.18 -
     9.1 --- a/tools/blktap/drivers/Makefile	Wed Jan 28 12:22:58 2009 +0900
     9.2 +++ b/tools/blktap/drivers/Makefile	Wed Jan 28 13:06:45 2009 +0900
     9.3 @@ -13,16 +13,12 @@ CFLAGS   += $(CFLAGS_libxenstore)
     9.4  CFLAGS   += -I $(LIBAIO_DIR)
     9.5  CFLAGS   += -D_GNU_SOURCE
     9.6  
     9.7 -# Get gcc to generate the dependencies for us.
     9.8 -CFLAGS   += -Wp,-MD,.$(@F).d
     9.9 -DEPS      = .*.d
    9.10 -
    9.11  ifeq ($(shell . ./check_gcrypt),"yes")
    9.12  CFLAGS += -DUSE_GCRYPT
    9.13  CRYPT_LIB := -lgcrypt
    9.14  else
    9.15  CRYPT_LIB := -lcrypto
    9.16 -$(warning *** libgcrypt not installed: falling back to libcrypto ***)
    9.17 +$(warning === libgcrypt not installed: falling back to libcrypto ===)
    9.18  endif
    9.19  
    9.20  LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) -L../lib -lblktap
    10.1 --- a/tools/console/Makefile	Wed Jan 28 12:22:58 2009 +0900
    10.2 +++ b/tools/console/Makefile	Wed Jan 28 13:06:45 2009 +0900
    10.3 @@ -16,7 +16,7 @@ all: $(BIN)
    10.4  
    10.5  .PHONY: clean
    10.6  clean:
    10.7 -	$(RM) *.a *.so *.o *.rpm $(BIN)
    10.8 +	$(RM) *.a *.so *.o *.rpm $(BIN) $(DEPS)
    10.9  	$(RM) client/*.o daemon/*.o
   10.10  
   10.11  xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c))
   10.12 @@ -33,3 +33,5 @@ install: $(BIN)
   10.13  	$(INSTALL_PROG) xenconsoled $(DESTDIR)/$(SBINDIR)
   10.14  	$(INSTALL_DIR) $(DESTDIR)$(PRIVATE_BINDIR)
   10.15  	$(INSTALL_PROG) xenconsole $(DESTDIR)$(PRIVATE_BINDIR)
   10.16 +
   10.17 +-include $(DEPS)
    11.1 --- a/tools/examples/xmexample.hvm	Wed Jan 28 12:22:58 2009 +0900
    11.2 +++ b/tools/examples/xmexample.hvm	Wed Jan 28 13:06:45 2009 +0900
    11.3 @@ -288,6 +288,39 @@ serial='pty'
    11.4  #  'x' -> we don't care (do not check)
    11.5  #  's' -> the bit must be the same as on the host that started this VM
    11.6  
    11.7 +#-----------------------------------------------------------------------------
    11.8 +#   Configure passthrough PCI{,-X,e} devices:
    11.9 +#
   11.10 +#   pci=[ '[SSSS:]BB:DD.F[,option1[,option2[...]]]', ... ]
   11.11 +#
   11.12 +#   [SSSS]:BB:DD.F  "bus segment:bus:device.function"(1) of the device to
   11.13 +#                   be assigned, bus segment is optional. All fields are
   11.14 +#                   in hexadecimal and no field should be longer than that
   11.15 +#                   as shown in the pattern. Successful assignment may need
   11.16 +#                   certain hardware support and additional configurations
   11.17 +#                   (e.g. VT-d, see docs/misc/vtd.txt for more details).
   11.18 +#
   11.19 +#       (1) bus segment is sometimes also referred to as the PCI "domain",
   11.20 +#           not to be confused with Xen domain.
   11.21 +#
   11.22 +#
   11.23 +#   optionN         per-device options in "key=val" format. Current
   11.24 +#                   available options are:
   11.25 +#                   - msitranslate=0|1
   11.26 +#                      per-device overriden of pci_msitranslate, see below
   11.27 +#
   11.28 +#pci=[ '07:00.0', '07:00.1' ]
   11.29 +
   11.30 +#   MSI-INTx translation for MSI capable devices:
   11.31 +#
   11.32 +#   If it's set, Xen will enable MSI for the device that supports it even
   11.33 +# if the guest don't use MSI. In the case, an IO-APIC type interrupt will
   11.34 +# be injected to the guest every time a corresponding MSI message is
   11.35 +# received.
   11.36 +#   If the guest enables MSI or MSI-X, the translation is automatically
   11.37 +# turned off.
   11.38 +# 
   11.39 +#pci_msitranslate=1
   11.40  
   11.41  #-----------------------------------------------------------------------------
   11.42  #   Configure PVSCSI devices:
    12.1 --- a/tools/firmware/Makefile	Wed Jan 28 12:22:58 2009 +0900
    12.2 +++ b/tools/firmware/Makefile	Wed Jan 28 13:06:45 2009 +0900
    12.3 @@ -15,10 +15,10 @@ SUBDIRS += hvmloader
    12.4  .PHONY: all
    12.5  all:
    12.6  	@set -e; if [ $$((`( bcc -v 2>&1 | grep version || echo 0.0.0 ) | cut -d' ' -f 3 | awk -F. '{ printf "0x%02x%02x%02x", $$1, $$2, $$3}'`)) -lt $$((0x00100e)) ] ; then \
    12.7 -	echo "***********************************************************"; \
    12.8 +	echo "==========================================================="; \
    12.9  	echo "Require dev86 package version >= 0.16.14 to build firmware!"; \
   12.10  	echo "(visit http://www.cix.co.uk/~mayday for more information)"; \
   12.11 -	echo "***********************************************************"; \
   12.12 +	echo "==========================================================="; \
   12.13  	else \
   12.14  	$(MAKE) subdirs-$@; \
   12.15  	fi
    13.1 --- a/tools/firmware/Rules.mk	Wed Jan 28 12:22:58 2009 +0900
    13.2 +++ b/tools/firmware/Rules.mk	Wed Jan 28 13:06:45 2009 +0900
    13.3 @@ -2,7 +2,7 @@
    13.4  override XEN_TARGET_ARCH = x86_32
    13.5  
    13.6  # User-supplied CFLAGS are not useful here.
    13.7 -CFLAGS :=
    13.8 +CFLAGS =
    13.9  
   13.10  include $(XEN_ROOT)/tools/Rules.mk
   13.11  
   13.12 @@ -13,9 +13,9 @@ endif
   13.13  CFLAGS += -Werror
   13.14  
   13.15  # Disable PIE/SSP if GCC supports them. They can break us.
   13.16 -CFLAGS += $(call cc-option,$(CC),-nopie,)
   13.17 -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
   13.18 -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
   13.19 +$(call cc-option-add,CFLAGS,CC,-nopie)
   13.20 +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
   13.21 +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
   13.22  
   13.23  # Extra CFLAGS suitable for an embedded type of environment.
   13.24  CFLAGS += -fno-builtin -msoft-float
    14.1 --- a/tools/firmware/hvmloader/32bitbios_support.c	Wed Jan 28 12:22:58 2009 +0900
    14.2 +++ b/tools/firmware/hvmloader/32bitbios_support.c	Wed Jan 28 13:06:45 2009 +0900
    14.3 @@ -32,15 +32,13 @@
    14.4  
    14.5  #include "../rombios/32bit/32bitbios_flat.h"
    14.6  
    14.7 -static void relocate_32bitbios(char *elfarray, uint32_t elfarraysize)
    14.8 +static uint32_t relocate_32bitbios(char *elfarray, uint32_t elfarraysize)
    14.9  {
   14.10      Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfarray;
   14.11      Elf32_Shdr *shdr = (Elf32_Shdr *)&elfarray[ehdr->e_shoff];
   14.12 -    char *secstrings = &elfarray[shdr[ehdr->e_shstrndx].sh_offset];
   14.13 -    char *jump_table;
   14.14      uint32_t reloc_off, reloc_size;
   14.15      char *highbiosarea;
   14.16 -    int i, jump_sec_idx = 0;
   14.17 +    int i;
   14.18  
   14.19      /*
   14.20       * Step 1. General elf cleanup, and compute total relocation size.
   14.21 @@ -51,13 +49,6 @@ static void relocate_32bitbios(char *elf
   14.22          /* By default all section data points into elf image data array. */
   14.23          shdr[i].sh_addr = (Elf32_Addr)&elfarray[shdr[i].sh_offset];
   14.24  
   14.25 -        if ( !strcmp(".biosjumptable", secstrings + shdr[i].sh_name) )
   14.26 -        {
   14.27 -            /* We do not relocate the BIOS jump table to high memory. */
   14.28 -            shdr[i].sh_flags &= ~SHF_ALLOC;
   14.29 -            jump_sec_idx = i;
   14.30 -        }
   14.31 -
   14.32          /* Fix up a corner case of address alignment. */
   14.33          if ( shdr[i].sh_addralign == 0 )
   14.34              shdr[i].sh_addralign = 1;
   14.35 @@ -76,7 +67,7 @@ static void relocate_32bitbios(char *elf
   14.36       */
   14.37      reloc_size = reloc_off;
   14.38      printf("%d bytes of ROMBIOS high-memory extensions:\n", reloc_size);
   14.39 -    highbiosarea = (char *)(long)e820_malloc(reloc_size, 0);
   14.40 +    highbiosarea = mem_alloc(reloc_size, 0);
   14.41      BUG_ON(highbiosarea == NULL);
   14.42      printf("  Relocating to 0x%x-0x%x ... ",
   14.43             (uint32_t)&highbiosarea[0],
   14.44 @@ -148,21 +139,12 @@ static void relocate_32bitbios(char *elf
   14.45          }
   14.46      }
   14.47  
   14.48 -    /* Step 5. Find the ROMBIOS jump-table stub and copy in the real table. */
   14.49 -    for ( jump_table = (char *)ROMBIOS_BEGIN;
   14.50 -          jump_table != (char *)ROMBIOS_END;
   14.51 -          jump_table++ )
   14.52 -        if ( !strncmp(jump_table, "___JMPT", 7) )
   14.53 -            break;
   14.54 -    BUG_ON(jump_table == NULL);
   14.55 -    BUG_ON(jump_sec_idx == 0);
   14.56 -    memcpy(jump_table, (char *)shdr[jump_sec_idx].sh_addr,
   14.57 -           shdr[jump_sec_idx].sh_size);
   14.58 +    printf("done\n");
   14.59  
   14.60 -    printf("done\n");
   14.61 +    return (uint32_t)highbiosarea;
   14.62  }
   14.63  
   14.64 -void highbios_setup(void)
   14.65 +uint32_t highbios_setup(void)
   14.66  {
   14.67 -    relocate_32bitbios((char *)highbios_array, sizeof(highbios_array));
   14.68 +    return relocate_32bitbios((char *)highbios_array, sizeof(highbios_array));
   14.69  }
    15.1 --- a/tools/firmware/hvmloader/Makefile	Wed Jan 28 12:22:58 2009 +0900
    15.2 +++ b/tools/firmware/hvmloader/Makefile	Wed Jan 28 13:06:45 2009 +0900
    15.3 @@ -58,4 +58,6 @@ roms.h: ../rombios/BIOS-bochs-latest ../
    15.4  .PHONY: clean
    15.5  clean: subdirs-clean
    15.6  	rm -f roms.h acpi.h
    15.7 -	rm -f hvmloader hvmloader.tmp *.o
    15.8 +	rm -f hvmloader hvmloader.tmp *.o $(DEPS)
    15.9 +
   15.10 +-include $(DEPS)
    16.1 --- a/tools/firmware/hvmloader/acpi/Makefile	Wed Jan 28 12:22:58 2009 +0900
    16.2 +++ b/tools/firmware/hvmloader/acpi/Makefile	Wed Jan 28 13:06:45 2009 +0900
    16.3 @@ -22,10 +22,6 @@ C_SRC = build.c dsdt.c static_tables.c
    16.4  H_SRC = $(wildcard *.h)
    16.5  OBJS  = $(patsubst %.c,%.o,$(C_SRC))
    16.6  
    16.7 -IASL_VER = acpica-unix-20080729
    16.8 -#IASL_URL = http://acpica.org/download/$(IASL_VER).tar.gz
    16.9 -IASL_URL = $(XEN_EXTFILES_URL)/$(IASL_VER).tar.gz
   16.10 -
   16.11  CFLAGS += -I. -I.. $(CFLAGS_include)
   16.12  
   16.13  vpath iasl $(PATH)
   16.14 @@ -46,15 +42,11 @@ dsdt.c: dsdt.asl
   16.15  
   16.16  iasl:
   16.17  	@echo
   16.18 -	@echo "ACPI ASL compiler(iasl) is needed"
   16.19 -	@echo "Download Intel ACPI CA"
   16.20 -	@echo "If wget failed, please download and compile manually from"
   16.21 +	@echo "ACPI ASL compiler (iasl) is needed"
   16.22 +	@echo "Download and install Intel ACPI CA from"
   16.23  	@echo "http://acpica.org/downloads/"
   16.24  	@echo 
   16.25 -	wget $(IASL_URL)
   16.26 -	tar xzf $(IASL_VER).tar.gz
   16.27 -	make -C $(IASL_VER)/compiler
   16.28 -	$(INSTALL_PROG) $(IASL_VER)/compiler/iasl $(DESTDIR)$(BINDIR)/iasl
   16.29 +	@exit 1
   16.30  
   16.31  acpi.a: $(OBJS)
   16.32  	$(AR) rc $@ $(OBJS)
   16.33 @@ -63,6 +55,8 @@ acpi.a: $(OBJS)
   16.34  	$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
   16.35  
   16.36  clean:
   16.37 -	rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz
   16.38 +	rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz $(DEPS)
   16.39  
   16.40  install: all
   16.41 +
   16.42 +-include $(DEPS)
    17.1 --- a/tools/firmware/hvmloader/acpi/build.c	Wed Jan 28 12:22:58 2009 +0900
    17.2 +++ b/tools/firmware/hvmloader/acpi/build.c	Wed Jan 28 13:06:45 2009 +0900
    17.3 @@ -48,50 +48,11 @@ static void set_checksum(
    17.4      p[checksum_offset] = -sum;
    17.5  }
    17.6  
    17.7 -static int uart_exists(uint16_t uart_base)
    17.8 -{
    17.9 -    uint16_t ier = uart_base + 1;
   17.10 -    uint8_t a, b, c;
   17.11 -
   17.12 -    a = inb(ier);
   17.13 -    outb(ier, 0);
   17.14 -    b = inb(ier);
   17.15 -    outb(ier, 0xf);
   17.16 -    c = inb(ier);
   17.17 -    outb(ier, a);
   17.18 -
   17.19 -    return ((b == 0) && (c == 0xf));
   17.20 -}
   17.21 -
   17.22 -static int hpet_exists(unsigned long hpet_base)
   17.23 -{
   17.24 -    uint32_t hpet_id = *(uint32_t *)hpet_base;
   17.25 -    return ((hpet_id >> 16) == 0x8086);
   17.26 -}
   17.27 -
   17.28  static uint8_t battery_port_exists(void)
   17.29  {
   17.30      return (inb(0x88) == 0x1F);
   17.31  }
   17.32  
   17.33 -static int construct_bios_info_table(uint8_t *buf)
   17.34 -{
   17.35 -    struct bios_info *bios_info = (struct bios_info *)buf;
   17.36 -
   17.37 -    memset(bios_info, 0, sizeof(*bios_info));
   17.38 -
   17.39 -    bios_info->com1_present = uart_exists(0x3f8);
   17.40 -    bios_info->com2_present = uart_exists(0x2f8);
   17.41 -
   17.42 -    bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS);
   17.43 -
   17.44 -    bios_info->pci_min = PCI_MEMBASE;
   17.45 -    bios_info->pci_len = PCI_MEMSIZE;
   17.46 -    bios_info->xen_pfiob = 0xdead;
   17.47 -
   17.48 -    return align16(sizeof(*bios_info));
   17.49 -}
   17.50 -
   17.51  static int construct_madt(struct acpi_20_madt *madt)
   17.52  {
   17.53      struct acpi_20_madt_intsrcovr *intsrcovr;
   17.54 @@ -150,7 +111,7 @@ static int construct_madt(struct acpi_20
   17.55      offset += sizeof(*io_apic);
   17.56  
   17.57      lapic = (struct acpi_20_madt_lapic *)(io_apic + 1);
   17.58 -    for ( i = 0; i < get_vcpu_nr(); i++ )
   17.59 +    for ( i = 0; i < hvm_info->nr_vcpus; i++ )
   17.60      {
   17.61          memset(lapic, 0, sizeof(*lapic));
   17.62          lapic->type    = ACPI_PROCESSOR_LOCAL_APIC;
   17.63 @@ -199,9 +160,10 @@ static int construct_secondary_tables(ui
   17.64      struct acpi_20_tcpa *tcpa;
   17.65      static const uint16_t tis_signature[] = {0x0001, 0x0001, 0x0001};
   17.66      uint16_t *tis_hdr;
   17.67 +    void *lasa;
   17.68  
   17.69      /* MADT. */
   17.70 -    if ( (get_vcpu_nr() > 1) || get_apic_mode() )
   17.71 +    if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode )
   17.72      {
   17.73          madt = (struct acpi_20_madt *)&buf[offset];
   17.74          offset += construct_madt(madt);
   17.75 @@ -246,11 +208,11 @@ static int construct_secondary_tables(ui
   17.76          tcpa->header.oem_revision = ACPI_OEM_REVISION;
   17.77          tcpa->header.creator_id   = ACPI_CREATOR_ID;
   17.78          tcpa->header.creator_revision = ACPI_CREATOR_REVISION;
   17.79 -        tcpa->lasa = e820_malloc(ACPI_2_0_TCPA_LAML_SIZE, 0);
   17.80 -        if ( tcpa->lasa )
   17.81 +        if ( (lasa = mem_alloc(ACPI_2_0_TCPA_LAML_SIZE, 0)) != NULL )
   17.82          {
   17.83 +            tcpa->lasa = virt_to_phys(lasa);
   17.84              tcpa->laml = ACPI_2_0_TCPA_LAML_SIZE;
   17.85 -            memset((char *)(unsigned long)tcpa->lasa, 0, tcpa->laml);
   17.86 +            memset(lasa, 0, tcpa->laml);
   17.87              set_checksum(tcpa,
   17.88                           offsetof(struct acpi_header, checksum),
   17.89                           tcpa->header.length);
   17.90 @@ -348,9 +310,7 @@ static void __acpi_build_tables(uint8_t 
   17.91      buf = (uint8_t *)ACPI_PHYSICAL_ADDRESS;
   17.92      offset = 0;
   17.93  
   17.94 -    offset += construct_bios_info_table(&buf[offset]);
   17.95      rsdp = (struct acpi_20_rsdp *)&buf[offset];
   17.96 -
   17.97      memcpy(rsdp, &Rsdp, sizeof(struct acpi_20_rsdp));
   17.98      offset += align16(sizeof(struct acpi_20_rsdp));
   17.99      rsdp->rsdt_address = (unsigned long)rsdt;
  17.100 @@ -376,7 +336,7 @@ void acpi_build_tables(void)
  17.101      memset(buf, 0, high_sz);
  17.102  
  17.103      /* Allocate data area and set up ACPI tables there. */
  17.104 -    buf = (uint8_t *)e820_malloc(high_sz, 0);
  17.105 +    buf = mem_alloc(high_sz, 0);
  17.106      __acpi_build_tables(buf, &low_sz, &high_sz);
  17.107  
  17.108      printf(" - Lo data: %08lx-%08lx\n"
    18.1 --- a/tools/firmware/hvmloader/acpi/dsdt.asl	Wed Jan 28 12:22:58 2009 +0900
    18.2 +++ b/tools/firmware/hvmloader/acpi/dsdt.asl	Wed Jan 28 13:06:45 2009 +0900
    18.3 @@ -86,7 +86,7 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 
    18.4  
    18.5      Scope (\_SB)
    18.6      {
    18.7 -       /* ACPI_PHYSICAL_ADDRESS == 0xEA000 */
    18.8 +       /* BIOS_INFO_PHYSICAL_ADDRESS == 0xEA000 */
    18.9         OperationRegion(BIOS, SystemMemory, 0xEA000, 16)
   18.10         Field(BIOS, ByteAcc, NoLock, Preserve) {
   18.11             UAR1, 1,
    19.1 --- a/tools/firmware/hvmloader/acpi/dsdt.c	Wed Jan 28 12:22:58 2009 +0900
    19.2 +++ b/tools/firmware/hvmloader/acpi/dsdt.c	Wed Jan 28 13:06:45 2009 +0900
    19.3 @@ -1,11 +1,11 @@
    19.4  /*
    19.5   * 
    19.6   * Intel ACPI Component Architecture
    19.7 - * ASL Optimizing Compiler version 20080729 [Dec 25 2008]
    19.8 + * ASL Optimizing Compiler version 20081204 [Jan 23 2009]
    19.9   * Copyright (C) 2000 - 2008 Intel Corporation
   19.10   * Supports ACPI Specification Revision 3.0a
   19.11   * 
   19.12 - * Compilation of "dsdt.asl" - Thu Dec 25 17:00:32 2008
   19.13 + * Compilation of "dsdt.asl" - Fri Jan 23 14:30:29 2009
   19.14   * 
   19.15   * C source code output
   19.16   *
   19.17 @@ -13,10 +13,10 @@
   19.18  unsigned char AmlCode[] =
   19.19  {
   19.20      0x44,0x53,0x44,0x54,0x5E,0x11,0x00,0x00,  /* 00000000    "DSDT^..." */
   19.21 -    0x02,0xD1,0x58,0x65,0x6E,0x00,0x00,0x00,  /* 00000008    "..Xen..." */
   19.22 +    0x02,0xEB,0x58,0x65,0x6E,0x00,0x00,0x00,  /* 00000008    "..Xen..." */
   19.23      0x48,0x56,0x4D,0x00,0x00,0x00,0x00,0x00,  /* 00000010    "HVM....." */
   19.24      0x00,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C,  /* 00000018    "....INTL" */
   19.25 -    0x29,0x07,0x08,0x20,0x08,0x50,0x4D,0x42,  /* 00000020    ").. .PMB" */
   19.26 +    0x04,0x12,0x08,0x20,0x08,0x50,0x4D,0x42,  /* 00000020    "... .PMB" */
   19.27      0x53,0x0B,0x00,0x0C,0x08,0x50,0x4D,0x4C,  /* 00000028    "S....PML" */
   19.28      0x4E,0x0A,0x08,0x08,0x49,0x4F,0x42,0x31,  /* 00000030    "N...IOB1" */
   19.29      0x00,0x08,0x49,0x4F,0x4C,0x31,0x00,0x08,  /* 00000038    "..IOL1.." */
    20.1 --- a/tools/firmware/hvmloader/cacheattr.c	Wed Jan 28 12:22:58 2009 +0900
    20.2 +++ b/tools/firmware/hvmloader/cacheattr.c	Wed Jan 28 13:06:45 2009 +0900
    20.3 @@ -88,11 +88,25 @@ void cacheattr_init(void)
    20.4      nr_var_ranges = (uint8_t)mtrr_cap;
    20.5      if ( nr_var_ranges != 0 )
    20.6      {
    20.7 -        /* A single UC range covering PCI space. */
    20.8 -        wrmsr(MSR_MTRRphysBase(0), PCI_MEMBASE);
    20.9 -        wrmsr(MSR_MTRRphysMask(0),
   20.10 -              ((uint64_t)(int32_t)PCI_MEMBASE & addr_mask) | (1u << 11));
   20.11 -        printf("var MTRRs ... ");
   20.12 +        unsigned long base = pci_mem_start, size;
   20.13 +        int i;
   20.14 +
   20.15 +        for ( i = 0; (base != pci_mem_end) && (i < nr_var_ranges); i++ )
   20.16 +        {
   20.17 +            size = PAGE_SIZE;
   20.18 +            while ( !(base & size) )
   20.19 +                size <<= 1;
   20.20 +            while ( ((base + size) < base) || ((base + size) > pci_mem_end) )
   20.21 +                size >>= 1;
   20.22 +
   20.23 +            wrmsr(MSR_MTRRphysBase(i), base);
   20.24 +            wrmsr(MSR_MTRRphysMask(i),
   20.25 +                  (~(uint64_t)(size-1) & addr_mask) | (1u << 11));
   20.26 +
   20.27 +            base += size;
   20.28 +        }
   20.29 +
   20.30 +        printf("var MTRRs [%d/%d] ... ", i, nr_var_ranges);
   20.31      }
   20.32  
   20.33      wrmsr(MSR_MTRRdefType, mtrr_def);
    21.1 --- a/tools/firmware/hvmloader/config.h	Wed Jan 28 12:22:58 2009 +0900
    21.2 +++ b/tools/firmware/hvmloader/config.h	Wed Jan 28 13:06:45 2009 +0900
    21.3 @@ -1,6 +1,9 @@
    21.4  #ifndef __HVMLOADER_CONFIG_H__
    21.5  #define __HVMLOADER_CONFIG_H__
    21.6  
    21.7 +#define PAGE_SHIFT 12
    21.8 +#define PAGE_SIZE  (1ul << PAGE_SHIFT)
    21.9 +
   21.10  #define IOAPIC_BASE_ADDRESS 0xfec00000
   21.11  #define IOAPIC_ID           0x01
   21.12  #define IOAPIC_VERSION      0x11
   21.13 @@ -11,8 +14,14 @@
   21.14  #define PCI_ISA_DEVFN       0x08    /* dev 1, fn 0 */
   21.15  #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
   21.16  
   21.17 -#define PCI_MEMBASE         0xf0000000
   21.18 -#define PCI_MEMSIZE         0x0c000000
   21.19 +/* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
   21.20 +#define PCI_MEM_START       0xf0000000
   21.21 +#define PCI_MEM_END         0xfc000000
   21.22 +extern unsigned long pci_mem_start, pci_mem_end;
   21.23 +
   21.24 +/* We reserve 16MB for special BIOS mappings, etc. */
   21.25 +#define RESERVED_MEMBASE    0xfc000000
   21.26 +#define RESERVED_MEMSIZE    0x01000000
   21.27  
   21.28  #define ROMBIOS_SEG            0xF000
   21.29  #define ROMBIOS_BEGIN          0x000F0000
   21.30 @@ -21,16 +30,17 @@
   21.31  #define ROMBIOS_END            (ROMBIOS_BEGIN + ROMBIOS_SIZE)
   21.32  
   21.33  /* Memory map. */
   21.34 +#define SCRATCH_PHYSICAL_ADDRESS      0x00010000
   21.35  #define HYPERCALL_PHYSICAL_ADDRESS    0x00080000
   21.36  #define VGABIOS_PHYSICAL_ADDRESS      0x000C0000
   21.37  #define OPTIONROM_PHYSICAL_ADDRESS    0x000C8000
   21.38  #define OPTIONROM_PHYSICAL_END        0x000EA000
   21.39 -#define ACPI_PHYSICAL_ADDRESS         0x000EA000
   21.40 +#define BIOS_INFO_PHYSICAL_ADDRESS    0x000EA000
   21.41 +#define ACPI_PHYSICAL_ADDRESS         0x000EA020
   21.42  #define E820_PHYSICAL_ADDRESS         0x000EA100
   21.43  #define SMBIOS_PHYSICAL_ADDRESS       0x000EB000
   21.44  #define SMBIOS_MAXIMUM_SIZE           0x00005000
   21.45  #define ROMBIOS_PHYSICAL_ADDRESS      0x000F0000
   21.46 -#define SCRATCH_PHYSICAL_ADDRESS      0x00010000
   21.47  
   21.48  /* Offsets from E820_PHYSICAL_ADDRESS. */
   21.49  #define E820_NR_OFFSET                0x0
   21.50 @@ -39,12 +49,16 @@
   21.51  /* Xen Platform Device */
   21.52  #define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */
   21.53  
   21.54 +/* Located at BIOS_INFO_PHYSICAL_ADDRESS. */
   21.55  struct bios_info {
   21.56 -    uint8_t  com1_present:1;
   21.57 -    uint8_t  com2_present:1;
   21.58 -    uint8_t  hpet_present:1;
   21.59 -    uint32_t pci_min, pci_len;
   21.60 -    uint16_t xen_pfiob;
   21.61 +    uint8_t  com1_present:1;    /* 0[0] - System has COM1? */
   21.62 +    uint8_t  com2_present:1;    /* 0[1] - System has COM2? */
   21.63 +    uint8_t  hpet_present:1;    /* 0[2] - System has HPET? */
   21.64 +    uint32_t pci_min, pci_len;  /* 4, 8 - PCI I/O hole boundaries */
   21.65 +    uint32_t bios32_entry;      /* 12   - Entry point for 32-bit BIOS */
   21.66 +    uint16_t xen_pfiob;         /* 16   - Xen platform device I/O ports */
   21.67  };
   21.68 +#define BIOSINFO_OFF_bios32_entry 12
   21.69 +#define BIOSINFO_OFF_xen_pfiob    16
   21.70  
   21.71  #endif /* __HVMLOADER_CONFIG_H__ */
    22.1 --- a/tools/firmware/hvmloader/hvmloader.c	Wed Jan 28 12:22:58 2009 +0900
    22.2 +++ b/tools/firmware/hvmloader/hvmloader.c	Wed Jan 28 13:06:45 2009 +0900
    22.3 @@ -31,6 +31,7 @@
    22.4  #include "option_rom.h"
    22.5  #include <xen/version.h>
    22.6  #include <xen/hvm/params.h>
    22.7 +#include <xen/memory.h>
    22.8  
    22.9  asm (
   22.10      "    .text                       \n"
   22.11 @@ -99,6 +100,9 @@ asm (
   22.12      "    .text                       \n"
   22.13      );
   22.14  
   22.15 +unsigned long pci_mem_start = PCI_MEM_START;
   22.16 +unsigned long pci_mem_end = PCI_MEM_END;
   22.17 +
   22.18  static enum { VGA_none, VGA_std, VGA_cirrus } virtual_vga = VGA_none;
   22.19  
   22.20  static void init_hypercalls(void)
   22.21 @@ -148,16 +152,14 @@ static void apic_setup(void)
   22.22  
   22.23  static void pci_setup(void)
   22.24  {
   22.25 -    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd;
   22.26 +    uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
   22.27      uint16_t class, vendor_id, device_id;
   22.28      unsigned int bar, pin, link, isa_irq;
   22.29  
   22.30      /* Resources assignable to PCI devices via BARs. */
   22.31      struct resource {
   22.32          uint32_t base, max;
   22.33 -    } *resource;
   22.34 -    struct resource mem_resource = { PCI_MEMBASE, PCI_MEMBASE + PCI_MEMSIZE };
   22.35 -    struct resource io_resource  = { 0xc000, 0x10000 };
   22.36 +    } *resource, mem_resource, io_resource;
   22.37  
   22.38      /* Create a list of device BARs in descending order of size. */
   22.39      struct bars {
   22.40 @@ -248,6 +250,10 @@ static void pci_setup(void)
   22.41              bars[i].bar_reg = bar_reg;
   22.42              bars[i].bar_sz  = bar_sz;
   22.43  
   22.44 +            if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
   22.45 +                 PCI_BASE_ADDRESS_SPACE_MEMORY )
   22.46 +                mmio_total += bar_sz;
   22.47 +
   22.48              nr_bars++;
   22.49  
   22.50              /* Skip the upper-half of the address for a 64-bit BAR. */
   22.51 @@ -276,6 +282,28 @@ static void pci_setup(void)
   22.52          pci_writew(devfn, PCI_COMMAND, cmd);
   22.53      }
   22.54  
   22.55 +    while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
   22.56 +            ((pci_mem_start << 1) != 0) )
   22.57 +        pci_mem_start <<= 1;
   22.58 +
   22.59 +    while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend )
   22.60 +    {
   22.61 +        struct xen_add_to_physmap xatp;
   22.62 +        if ( hvm_info->high_mem_pgend == 0 )
   22.63 +            hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT);
   22.64 +        xatp.domid = DOMID_SELF;
   22.65 +        xatp.space = XENMAPSPACE_gmfn;
   22.66 +        xatp.idx   = --hvm_info->low_mem_pgend;
   22.67 +        xatp.gpfn  = hvm_info->high_mem_pgend++;
   22.68 +        if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
   22.69 +            BUG();
   22.70 +    }
   22.71 +
   22.72 +    mem_resource.base = pci_mem_start;
   22.73 +    mem_resource.max = pci_mem_end;
   22.74 +    io_resource.base = 0xc000;
   22.75 +    io_resource.max = 0x10000;
   22.76 +
   22.77      /* Assign iomem and ioport resources in descending order of size. */
   22.78      for ( i = 0; i < nr_bars; i++ )
   22.79      {
   22.80 @@ -488,22 +516,13 @@ static int pci_load_option_roms(uint32_t
   22.81  /* Replace possibly erroneous memory-size CMOS fields with correct values. */
   22.82  static void cmos_write_memory_size(void)
   22.83  {
   22.84 -    struct e820entry *map = E820;
   22.85 -    int i, nr = *E820_NR;
   22.86 -    uint32_t base_mem = 640, ext_mem = 0, alt_mem = 0;
   22.87 -
   22.88 -    for ( i = 0; i < nr; i++ )
   22.89 -        if ( (map[i].addr >= 0x100000) && (map[i].type == E820_RAM) )
   22.90 -            break;
   22.91 +    uint32_t base_mem = 640, ext_mem, alt_mem;
   22.92  
   22.93 -    if ( i != nr )
   22.94 -    {
   22.95 -        alt_mem = ext_mem = map[i].addr + map[i].size;
   22.96 -        ext_mem = (ext_mem > 0x0100000) ? (ext_mem - 0x0100000) >> 10 : 0;
   22.97 -        if ( ext_mem > 0xffff )
   22.98 -            ext_mem = 0xffff;
   22.99 -        alt_mem = (alt_mem > 0x1000000) ? (alt_mem - 0x1000000) >> 16 : 0;
  22.100 -    }
  22.101 +    alt_mem = ext_mem = hvm_info->low_mem_pgend << PAGE_SHIFT;
  22.102 +    ext_mem = (ext_mem > 0x0100000) ? (ext_mem - 0x0100000) >> 10 : 0;
  22.103 +    if ( ext_mem > 0xffff )
  22.104 +        ext_mem = 0xffff;
  22.105 +    alt_mem = (alt_mem > 0x1000000) ? (alt_mem - 0x1000000) >> 16 : 0;
  22.106  
  22.107      /* All BIOSes: conventional memory (CMOS *always* reports 640kB). */
  22.108      cmos_outb(0x15, (uint8_t)(base_mem >> 0));
  22.109 @@ -520,25 +539,23 @@ static void cmos_write_memory_size(void)
  22.110      cmos_outb(0x35, (uint8_t)( alt_mem >> 8));
  22.111  }
  22.112  
  22.113 -static uint16_t init_xen_platform_io_base(void)
  22.114 +static uint16_t xen_platform_io_base(void)
  22.115  {
  22.116 -    struct bios_info *bios_info = (struct bios_info *)ACPI_PHYSICAL_ADDRESS;
  22.117      uint32_t devfn, bar_data;
  22.118      uint16_t vendor_id, device_id;
  22.119  
  22.120 -    bios_info->xen_pfiob = 0;
  22.121 -
  22.122      for ( devfn = 0; devfn < 128; devfn++ )
  22.123      {
  22.124          vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
  22.125          device_id = pci_readw(devfn, PCI_DEVICE_ID);
  22.126 -        if ( (vendor_id != 0x5853) || (device_id != 0x0001) )
  22.127 -            continue;
  22.128 -        bar_data = pci_readl(devfn, PCI_BASE_ADDRESS_0);
  22.129 -        bios_info->xen_pfiob = bar_data & PCI_BASE_ADDRESS_IO_MASK;
  22.130 +        if ( (vendor_id == 0x5853) && (device_id == 0x0001) )
  22.131 +        {
  22.132 +            bar_data = pci_readl(devfn, PCI_BASE_ADDRESS_0);
  22.133 +            return bar_data & PCI_BASE_ADDRESS_IO_MASK;
  22.134 +        }
  22.135      }
  22.136  
  22.137 -    return bios_info->xen_pfiob;
  22.138 +    return 0;
  22.139  }
  22.140  
  22.141  /*
  22.142 @@ -548,27 +565,80 @@ static uint16_t init_xen_platform_io_bas
  22.143   */
  22.144  static void init_vm86_tss(void)
  22.145  {
  22.146 -    uint32_t tss;
  22.147 +    void *tss;
  22.148      struct xen_hvm_param p;
  22.149  
  22.150 -    tss = e820_malloc(128, 128);
  22.151 -    memset((char *)tss, 0, 128);
  22.152 +    tss = mem_alloc(128, 128);
  22.153 +    memset(tss, 0, 128);
  22.154      p.domid = DOMID_SELF;
  22.155      p.index = HVM_PARAM_VM86_TSS;
  22.156 -    p.value = tss;
  22.157 +    p.value = virt_to_phys(tss);
  22.158      hypercall_hvm_op(HVMOP_set_param, &p);
  22.159 -    printf("vm86 TSS at %08x\n", tss);
  22.160 +    printf("vm86 TSS at %08lx\n", virt_to_phys(tss));
  22.161  }
  22.162  
  22.163 -/*
  22.164 - * Copy the E820 table provided by the HVM domain builder into the correct
  22.165 - * place in the memory map we share with the rombios.
  22.166 - */
  22.167 -static void copy_e820_table(void)
  22.168 +/* Create an E820 table based on memory parameters provided in hvm_info. */
  22.169 +static void build_e820_table(void)
  22.170  {
  22.171 -    uint8_t nr = *(uint8_t *)(HVM_E820_PAGE + HVM_E820_NR_OFFSET);
  22.172 -    BUG_ON(nr > 16);
  22.173 -    memcpy(E820, (char *)HVM_E820_PAGE + HVM_E820_OFFSET, nr * sizeof(*E820));
  22.174 +    struct e820entry *e820 = E820;
  22.175 +    unsigned int nr = 0;
  22.176 +
  22.177 +    /* 0x0-0x9FC00: Ordinary RAM. */
  22.178 +    e820[nr].addr = 0x0;
  22.179 +    e820[nr].size = 0x9FC00;
  22.180 +    e820[nr].type = E820_RAM;
  22.181 +    nr++;
  22.182 +
  22.183 +    /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */
  22.184 +    e820[nr].addr = 0x9FC00;
  22.185 +    e820[nr].size = 0x400;
  22.186 +    e820[nr].type = E820_RESERVED;
  22.187 +    nr++;
  22.188 +
  22.189 +    /*
  22.190 +     * Following regions are standard regions of the PC memory map.
  22.191 +     * They are not covered by e820 regions. OSes will not use as RAM.
  22.192 +     * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
  22.193 +     * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
  22.194 +     * TODO: free pages which turn out to be unused.
  22.195 +     */
  22.196 +
  22.197 +    /*
  22.198 +     * 0xE0000-0x0F0000: PC-specific area. We place various tables here.
  22.199 +     * 0xF0000-0x100000: System BIOS.
  22.200 +     * TODO: free pages which turn out to be unused.
  22.201 +     */
  22.202 +    e820[nr].addr = 0xE0000;
  22.203 +    e820[nr].size = 0x20000;
  22.204 +    e820[nr].type = E820_RESERVED;
  22.205 +    nr++;
  22.206 +
  22.207 +    /* Low RAM goes here. Reserve space for special pages. */
  22.208 +    BUG_ON((hvm_info->low_mem_pgend << PAGE_SHIFT) < (2u << 20));
  22.209 +    e820[nr].addr = 0x100000;
  22.210 +    e820[nr].size = (hvm_info->low_mem_pgend << PAGE_SHIFT) - e820[nr].addr;
  22.211 +    e820[nr].type = E820_RAM;
  22.212 +    nr++;
  22.213 +
  22.214 +    /*
  22.215 +     * Explicitly reserve space for special pages.
  22.216 +     * This space starts at RESERVED_MEMBASE an extends to cover various
  22.217 +     * fixed hardware mappings (e.g., LAPIC, IOAPIC, default SVGA framebuffer).
  22.218 +     */
  22.219 +    e820[nr].addr = RESERVED_MEMBASE;
  22.220 +    e820[nr].size = (uint32_t)-e820[nr].addr;
  22.221 +    e820[nr].type = E820_RESERVED;
  22.222 +    nr++;
  22.223 +
  22.224 +    if ( hvm_info->high_mem_pgend )
  22.225 +    {
  22.226 +        e820[nr].addr = ((uint64_t)1 << 32);
  22.227 +        e820[nr].size =
  22.228 +            ((uint64_t)hvm_info->high_mem_pgend << PAGE_SHIFT) - e820[nr].addr;
  22.229 +        e820[nr].type = E820_RAM;
  22.230 +        nr++;
  22.231 +    }
  22.232 +
  22.233      *E820_NR = nr;
  22.234  }
  22.235  
  22.236 @@ -576,17 +646,18 @@ int main(void)
  22.237  {
  22.238      int option_rom_sz = 0, vgabios_sz = 0, etherboot_sz = 0;
  22.239      int rombios_sz, smbios_sz;
  22.240 -    uint32_t etherboot_phys_addr, option_rom_phys_addr, vga_ram = 0;
  22.241 -    uint16_t xen_pfiob;
  22.242 +    uint32_t etherboot_phys_addr, option_rom_phys_addr, bios32_addr;
  22.243 +    struct bios_info *bios_info;
  22.244  
  22.245      printf("HVM Loader\n");
  22.246  
  22.247 -    copy_e820_table();
  22.248 -
  22.249      init_hypercalls();
  22.250  
  22.251      printf("CPU speed is %u MHz\n", get_cpu_mhz());
  22.252  
  22.253 +    apic_setup();
  22.254 +    pci_setup();
  22.255 +
  22.256      smp_initialise();
  22.257  
  22.258      perform_tests();
  22.259 @@ -599,12 +670,9 @@ int main(void)
  22.260      if ( rombios_sz > 0x10000 )
  22.261          rombios_sz = 0x10000;
  22.262      memcpy((void *)ROMBIOS_PHYSICAL_ADDRESS, rombios, rombios_sz);
  22.263 -    highbios_setup();
  22.264 +    bios32_addr = highbios_setup();
  22.265  
  22.266 -    apic_setup();
  22.267 -    pci_setup();
  22.268 -
  22.269 -    if ( (get_vcpu_nr() > 1) || get_apic_mode() )
  22.270 +    if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode )
  22.271          create_mp_tables();
  22.272  
  22.273      switch ( virtual_vga )
  22.274 @@ -626,12 +694,6 @@ int main(void)
  22.275          break;
  22.276      }
  22.277  
  22.278 -    if ( virtual_vga != VGA_none )
  22.279 -    {
  22.280 -        vga_ram = e820_malloc(8 << 20, 4096);
  22.281 -        printf("VGA RAM at %08x\n", vga_ram);
  22.282 -    }
  22.283 -
  22.284      etherboot_phys_addr = VGABIOS_PHYSICAL_ADDRESS + vgabios_sz;
  22.285      if ( etherboot_phys_addr < OPTIONROM_PHYSICAL_ADDRESS )
  22.286          etherboot_phys_addr = OPTIONROM_PHYSICAL_ADDRESS;
  22.287 @@ -640,7 +702,7 @@ int main(void)
  22.288      option_rom_phys_addr = etherboot_phys_addr + etherboot_sz;
  22.289      option_rom_sz = pci_load_option_roms(option_rom_phys_addr);
  22.290  
  22.291 -    if ( get_acpi_enabled() )
  22.292 +    if ( hvm_info->acpi_enabled )
  22.293      {
  22.294          printf("Loading ACPI ...\n");
  22.295          acpi_build_tables();
  22.296 @@ -672,9 +734,17 @@ int main(void)
  22.297                 ROMBIOS_PHYSICAL_ADDRESS,
  22.298                 ROMBIOS_PHYSICAL_ADDRESS + rombios_sz - 1);
  22.299  
  22.300 -    xen_pfiob = init_xen_platform_io_base();
  22.301 -    if ( xen_pfiob && vga_ram )
  22.302 -        outl(xen_pfiob + 4, vga_ram);
  22.303 +    build_e820_table();
  22.304 +
  22.305 +    bios_info = (struct bios_info *)BIOS_INFO_PHYSICAL_ADDRESS;
  22.306 +    memset(bios_info, 0, sizeof(*bios_info));
  22.307 +    bios_info->com1_present = uart_exists(0x3f8);
  22.308 +    bios_info->com2_present = uart_exists(0x2f8);
  22.309 +    bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS);
  22.310 +    bios_info->pci_min = pci_mem_start;
  22.311 +    bios_info->pci_len = pci_mem_end - pci_mem_start;
  22.312 +    bios_info->bios32_entry = bios32_addr;
  22.313 +    bios_info->xen_pfiob = xen_platform_io_base();
  22.314  
  22.315      printf("Invoking ROMBIOS ...\n");
  22.316      return 0;
    23.1 --- a/tools/firmware/hvmloader/mp_tables.c	Wed Jan 28 12:22:58 2009 +0900
    23.2 +++ b/tools/firmware/hvmloader/mp_tables.c	Wed Jan 28 13:06:45 2009 +0900
    23.3 @@ -155,7 +155,7 @@ static void fill_mp_config_table(struct 
    23.4      int vcpu_nr, i;
    23.5      uint8_t checksum;
    23.6  
    23.7 -    vcpu_nr = get_vcpu_nr();
    23.8 +    vcpu_nr = hvm_info->nr_vcpus;
    23.9  
   23.10      /* fill in the MP configuration table signature, "PCMP" */
   23.11      mpct->signature[0] = 'P';
   23.12 @@ -317,7 +317,7 @@ void create_mp_tables(void)
   23.13      char *p;
   23.14      int vcpu_nr, i, length;
   23.15  
   23.16 -    vcpu_nr = get_vcpu_nr();
   23.17 +    vcpu_nr = hvm_info->nr_vcpus;
   23.18  
   23.19      printf("Creating MP tables ...\n");
   23.20  
    24.1 --- a/tools/firmware/hvmloader/smbios.c	Wed Jan 28 12:22:58 2009 +0900
    24.2 +++ b/tools/firmware/hvmloader/smbios.c	Wed Jan 28 13:06:45 2009 +0900
    24.3 @@ -118,8 +118,9 @@ write_smbios_tables(void *start,
    24.4      do_struct(smbios_type_16_init(p, memsize, nr_mem_devs));
    24.5      for ( i = 0; i < nr_mem_devs; i++ )
    24.6      {
    24.7 -        uint32_t dev_memsize = ((i == (nr_mem_devs - 1))
    24.8 -                                ? (memsize & 0x3fff) : 0x4000);
    24.9 +        uint32_t dev_memsize = 0x4000; /* all but last covers 16GB */
   24.10 +        if ( (i == (nr_mem_devs - 1)) && ((memsize & 0x3fff) != 0) )
   24.11 +            dev_memsize = memsize & 0x3fff; /* last dev is <16GB */
   24.12          do_struct(smbios_type_17_init(p, dev_memsize, i));
   24.13          do_struct(smbios_type_19_init(p, dev_memsize, i));
   24.14          do_struct(smbios_type_20_init(p, dev_memsize, i));
   24.15 @@ -143,28 +144,18 @@ write_smbios_tables(void *start,
   24.16  static uint64_t
   24.17  get_memsize(void)
   24.18  {
   24.19 -    struct e820entry *map = E820;
   24.20 -    uint8_t num_entries = *E820_NR;
   24.21 -    uint64_t memsize = 0;
   24.22 -    int i;
   24.23 +    uint64_t sz;
   24.24  
   24.25 -    /*
   24.26 -     * Walk through e820map, ignoring any entries that aren't marked
   24.27 -     * as usable or reserved.
   24.28 -     */
   24.29 -    for ( i = 0; i < num_entries; i++ )
   24.30 -    {
   24.31 -        if ( (map->type == E820_RAM) || (map->type == E820_RESERVED) )
   24.32 -            memsize += map->size;
   24.33 -        map++;
   24.34 -    }
   24.35 +    sz = (uint64_t)hvm_info->low_mem_pgend << PAGE_SHIFT;
   24.36 +    if ( hvm_info->high_mem_pgend )
   24.37 +        sz += (hvm_info->high_mem_pgend << PAGE_SHIFT) - (1ull << 32);
   24.38  
   24.39      /*
   24.40       * Round up to the nearest MB.  The user specifies domU pseudo-physical 
   24.41       * memory in megabytes, so not doing this could easily lead to reporting 
   24.42       * one less MB than the user specified.
   24.43       */
   24.44 -    return (memsize + (1 << 20) - 1) >> 20;
   24.45 +    return (sz + (1ul << 20) - 1) >> 20;
   24.46  }
   24.47  
   24.48  int
   24.49 @@ -229,7 +220,7 @@ hvm_write_smbios_tables(void)
   24.50  
   24.51      /* SCRATCH_PHYSICAL_ADDRESS is a safe large memory area for scratch. */
   24.52      len = write_smbios_tables((void *)SCRATCH_PHYSICAL_ADDRESS,
   24.53 -                              get_vcpu_nr(), get_memsize(),
   24.54 +                              hvm_info->nr_vcpus, get_memsize(),
   24.55                                uuid, xen_version_str,
   24.56                                xen_major_version, xen_minor_version);
   24.57      if ( len > SMBIOS_MAXIMUM_SIZE )
    25.1 --- a/tools/firmware/hvmloader/smp.c	Wed Jan 28 12:22:58 2009 +0900
    25.2 +++ b/tools/firmware/hvmloader/smp.c	Wed Jan 28 13:06:45 2009 +0900
    25.3 @@ -121,7 +121,7 @@ static void boot_cpu(unsigned int cpu)
    25.4  
    25.5  void smp_initialise(void)
    25.6  {
    25.7 -    unsigned int i, nr_cpus = get_vcpu_nr();
    25.8 +    unsigned int i, nr_cpus = hvm_info->nr_vcpus;
    25.9  
   25.10      memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end - ap_boot_start);
   25.11  
    26.1 --- a/tools/firmware/hvmloader/util.c	Wed Jan 28 12:22:58 2009 +0900
    26.2 +++ b/tools/firmware/hvmloader/util.c	Wed Jan 28 13:06:45 2009 +0900
    26.3 @@ -25,7 +25,6 @@
    26.4  #include <stdint.h>
    26.5  #include <xen/xen.h>
    26.6  #include <xen/memory.h>
    26.7 -#include <xen/hvm/hvm_info_table.h>
    26.8  
    26.9  void wrmsr(uint32_t idx, uint64_t v)
   26.10  {
   26.11 @@ -304,63 +303,63 @@ uuid_to_string(char *dest, uint8_t *uuid
   26.12      *p = '\0';
   26.13  }
   26.14  
   26.15 -static void e820_collapse(void)
   26.16 +void *mem_alloc(uint32_t size, uint32_t align)
   26.17  {
   26.18 -    int i = 0;
   26.19 -    struct e820entry *ent = E820;
   26.20 -
   26.21 -    while ( i < (*E820_NR-1) )
   26.22 -    {
   26.23 -        if ( (ent[i].type == ent[i+1].type) &&
   26.24 -             ((ent[i].addr + ent[i].size) == ent[i+1].addr) )
   26.25 -        {
   26.26 -            ent[i].size += ent[i+1].size;
   26.27 -            memcpy(&ent[i+1], &ent[i+2], (*E820_NR-i-2) * sizeof(*ent));
   26.28 -            (*E820_NR)--;
   26.29 -        }
   26.30 -        else
   26.31 -        {
   26.32 -            i++;
   26.33 -        }
   26.34 -    }
   26.35 -}
   26.36 -
   26.37 -uint32_t e820_malloc(uint32_t size, uint32_t align)
   26.38 -{
   26.39 -    uint32_t addr;
   26.40 -    int i;
   26.41 -    struct e820entry *ent = E820;
   26.42 +    static uint32_t reserve = RESERVED_MEMBASE - 1;
   26.43 +    static int over_allocated;
   26.44 +    struct xen_add_to_physmap xatp;
   26.45 +    struct xen_memory_reservation xmr;
   26.46 +    xen_pfn_t mfn;
   26.47 +    uint32_t s, e;
   26.48  
   26.49      /* Align to at least one kilobyte. */
   26.50      if ( align < 1024 )
   26.51          align = 1024;
   26.52  
   26.53 -    for ( i = *E820_NR - 1; i >= 0; i-- )
   26.54 +    s = (reserve + align) & ~(align - 1);
   26.55 +    e = s + size - 1;
   26.56 +
   26.57 +    BUG_ON((e < s) || (e >> PAGE_SHIFT) >= hvm_info->reserved_mem_pgstart);
   26.58 +
   26.59 +    while ( (reserve >> PAGE_SHIFT) != (e >> PAGE_SHIFT) )
   26.60      {
   26.61 -        addr = (ent[i].addr + ent[i].size - size) & ~(align-1);
   26.62 -        if ( (ent[i].type != E820_RAM) || /* not ram? */
   26.63 -             (addr < ent[i].addr) ||      /* too small or starts above 4gb? */
   26.64 -             ((addr + size) < addr) )     /* ends above 4gb? */
   26.65 -            continue;
   26.66 +        reserve += PAGE_SIZE;
   26.67 +        mfn = reserve >> PAGE_SHIFT;
   26.68  
   26.69 -        if ( addr != ent[i].addr )
   26.70 +        /* Try to allocate a brand new page in the reserved area. */
   26.71 +        if ( !over_allocated )
   26.72          {
   26.73 -            memmove(&ent[i+1], &ent[i], (*E820_NR-i) * sizeof(*ent));
   26.74 -            (*E820_NR)++;
   26.75 -            ent[i].size = addr - ent[i].addr;
   26.76 -            ent[i+1].addr = addr;
   26.77 -            ent[i+1].size -= ent[i].size;
   26.78 -            i++;
   26.79 +            xmr.domid = DOMID_SELF;
   26.80 +            xmr.mem_flags = 0;
   26.81 +            xmr.extent_order = 0;
   26.82 +            xmr.nr_extents = 1;
   26.83 +            set_xen_guest_handle(xmr.extent_start, &mfn);
   26.84 +            if ( hypercall_memory_op(XENMEM_populate_physmap, &xmr) == 1 )
   26.85 +                continue;
   26.86 +            over_allocated = 1;
   26.87          }
   26.88  
   26.89 -        ent[i].type = E820_RESERVED;
   26.90 -
   26.91 -        e820_collapse();
   26.92 -
   26.93 -        return addr;
   26.94 +        /* Otherwise, relocate a page from the ordinary RAM map. */
   26.95 +        if ( hvm_info->high_mem_pgend )
   26.96 +        {
   26.97 +            xatp.idx = --hvm_info->high_mem_pgend;
   26.98 +            if ( xatp.idx == (1ull << (32 - PAGE_SHIFT)) )
   26.99 +                hvm_info->high_mem_pgend = 0;
  26.100 +        }
  26.101 +        else
  26.102 +        {
  26.103 +            xatp.idx = --hvm_info->low_mem_pgend;
  26.104 +        }
  26.105 +        xatp.domid = DOMID_SELF;
  26.106 +        xatp.space = XENMAPSPACE_gmfn;
  26.107 +        xatp.gpfn  = mfn;
  26.108 +        if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
  26.109 +            BUG();
  26.110      }
  26.111  
  26.112 -    return 0;
  26.113 +    reserve = e;
  26.114 +
  26.115 +    return (void *)(unsigned long)s;
  26.116  }
  26.117  
  26.118  uint32_t ioapic_read(uint32_t reg)
  26.119 @@ -543,30 +542,35 @@ void __bug(char *file, int line)
  26.120          asm volatile ( "ud2" );
  26.121  }
  26.122  
  26.123 -static int validate_hvm_info(struct hvm_info_table *t)
  26.124 +static void validate_hvm_info(struct hvm_info_table *t)
  26.125  {
  26.126 -    char signature[] = "HVM INFO";
  26.127      uint8_t *ptr = (uint8_t *)t;
  26.128      uint8_t sum = 0;
  26.129      int i;
  26.130  
  26.131 -    /* strncmp(t->signature, "HVM INFO", 8) */
  26.132 -    for ( i = 0; i < 8; i++ )
  26.133 +    if ( strncmp(t->signature, "HVM INFO", 8) )
  26.134      {
  26.135 -        if ( signature[i] != t->signature[i] )
  26.136 -        {
  26.137 -            printf("Bad hvm info signature\n");
  26.138 -            return 0;
  26.139 -        }
  26.140 +        printf("Bad hvm info signature\n");
  26.141 +        BUG();
  26.142 +    }
  26.143 +
  26.144 +    if ( t->length < sizeof(struct hvm_info_table) )
  26.145 +    {
  26.146 +        printf("Bad hvm info length\n");
  26.147 +        BUG();
  26.148      }
  26.149  
  26.150      for ( i = 0; i < t->length; i++ )
  26.151          sum += ptr[i];
  26.152  
  26.153 -    return (sum == 0);
  26.154 +    if ( sum != 0 )
  26.155 +    {
  26.156 +        printf("Bad hvm info checksum\n");
  26.157 +        BUG();
  26.158 +    }
  26.159  }
  26.160  
  26.161 -static struct hvm_info_table *get_hvm_info_table(void)
  26.162 +struct hvm_info_table *get_hvm_info_table(void)
  26.163  {
  26.164      static struct hvm_info_table *table;
  26.165      struct hvm_info_table *t;
  26.166 @@ -576,35 +580,13 @@ static struct hvm_info_table *get_hvm_in
  26.167  
  26.168      t = (struct hvm_info_table *)HVM_INFO_PADDR;
  26.169  
  26.170 -    if ( !validate_hvm_info(t) )
  26.171 -    {
  26.172 -        printf("Bad hvm info table\n");
  26.173 -        return NULL;
  26.174 -    }
  26.175 +    validate_hvm_info(t);
  26.176  
  26.177      table = t;
  26.178  
  26.179      return table;
  26.180  }
  26.181  
  26.182 -int get_vcpu_nr(void)
  26.183 -{
  26.184 -    struct hvm_info_table *t = get_hvm_info_table();
  26.185 -    return (t ? t->nr_vcpus : 1);
  26.186 -}
  26.187 -
  26.188 -int get_acpi_enabled(void)
  26.189 -{
  26.190 -    struct hvm_info_table *t = get_hvm_info_table();
  26.191 -    return (t ? t->acpi_enabled : 1);
  26.192 -}
  26.193 -
  26.194 -int get_apic_mode(void)
  26.195 -{
  26.196 -    struct hvm_info_table *t = get_hvm_info_table();
  26.197 -    return (t ? t->apic_mode : 1);
  26.198 -}
  26.199 -
  26.200  uint16_t get_cpu_mhz(void)
  26.201  {
  26.202      struct xen_add_to_physmap xatp;
  26.203 @@ -647,6 +629,27 @@ uint16_t get_cpu_mhz(void)
  26.204      return cpu_mhz;
  26.205  }
  26.206  
  26.207 +int uart_exists(uint16_t uart_base)
  26.208 +{
  26.209 +    uint16_t ier = uart_base + 1;
  26.210 +    uint8_t a, b, c;
  26.211 +
  26.212 +    a = inb(ier);
  26.213 +    outb(ier, 0);
  26.214 +    b = inb(ier);
  26.215 +    outb(ier, 0xf);
  26.216 +    c = inb(ier);
  26.217 +    outb(ier, a);
  26.218 +
  26.219 +    return ((b == 0) && (c == 0xf));
  26.220 +}
  26.221 +
  26.222 +int hpet_exists(unsigned long hpet_base)
  26.223 +{
  26.224 +    uint32_t hpet_id = *(uint32_t *)hpet_base;
  26.225 +    return ((hpet_id >> 16) == 0x8086);
  26.226 +}
  26.227 +
  26.228  /*
  26.229   * Local variables:
  26.230   * mode: C
    27.1 --- a/tools/firmware/hvmloader/util.h	Wed Jan 28 12:22:58 2009 +0900
    27.2 +++ b/tools/firmware/hvmloader/util.h	Wed Jan 28 13:06:45 2009 +0900
    27.3 @@ -3,6 +3,7 @@
    27.4  
    27.5  #include <stdarg.h>
    27.6  #include <stdint.h>
    27.7 +#include <xen/hvm/hvm_info_table.h>
    27.8  
    27.9  #undef offsetof
   27.10  #define offsetof(t, m) ((unsigned long)&((t *)0)->m)
   27.11 @@ -56,6 +57,10 @@ void pci_write(uint32_t devfn, uint32_t 
   27.12  /* Get CPU speed in MHz. */
   27.13  uint16_t get_cpu_mhz(void);
   27.14  
   27.15 +/* Hardware detection. */
   27.16 +int uart_exists(uint16_t uart_base);
   27.17 +int hpet_exists(unsigned long hpet_base);
   27.18 +
   27.19  /* Do cpuid instruction, with operation 'idx' */
   27.20  void cpuid(uint32_t idx, uint32_t *eax, uint32_t *ebx,
   27.21             uint32_t *ecx, uint32_t *edx);
   27.22 @@ -103,9 +108,8 @@ static inline void cpu_relax(void)
   27.23  })
   27.24  
   27.25  /* HVM-builder info. */
   27.26 -int get_vcpu_nr(void);
   27.27 -int get_acpi_enabled(void);
   27.28 -int get_apic_mode(void);
   27.29 +struct hvm_info_table *get_hvm_info_table(void);
   27.30 +#define hvm_info (get_hvm_info_table())
   27.31  
   27.32  /* String and memory functions */
   27.33  int strcmp(const char *cs, const char *ct);
   27.34 @@ -131,11 +135,12 @@ void uuid_to_string(char *dest, uint8_t 
   27.35  int printf(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
   27.36  int vprintf(const char *fmt, va_list ap);
   27.37  
   27.38 -/* Reserve a RAM region in the e820 table. */
   27.39 -uint32_t e820_malloc(uint32_t size, uint32_t align);
   27.40 +/* Allocate memory in a reserved region below 4GB. */
   27.41 +void *mem_alloc(uint32_t size, uint32_t align);
   27.42 +#define virt_to_phys(v) ((unsigned long)(v))
   27.43  
   27.44  /* Prepare the 32bit BIOS */
   27.45 -void highbios_setup(void);
   27.46 +uint32_t highbios_setup(void);
   27.47  
   27.48  /* Miscellaneous. */
   27.49  void cacheattr_init(void);
    28.1 --- a/tools/firmware/rombios/32bit/32bitbios.c	Wed Jan 28 12:22:58 2009 +0900
    28.2 +++ b/tools/firmware/rombios/32bit/32bitbios.c	Wed Jan 28 13:06:45 2009 +0900
    28.3 @@ -19,35 +19,16 @@
    28.4   *
    28.5   * Author: Stefan Berger <stefanb@us.ibm.com>
    28.6   */
    28.7 +
    28.8  #include "rombios_compat.h"
    28.9 -#include "32bitprotos.h"
   28.10 -
   28.11 -/*
   28.12 -   the jumptable that will be copied into the rombios in the 0xf000 segment
   28.13 -   for every function that is to be called from the lower BIOS, make an entry
   28.14 -   here.
   28.15 - */
   28.16 -#define TABLE_ENTRY(idx, func) [idx] = (uint32_t)func
   28.17 -uint32_t jumptable[IDX_LAST+1] __attribute__((section (".biosjumptable"))) =
   28.18 -{
   28.19 -	TABLE_ENTRY(IDX_TCPA_ACPI_INIT, tcpa_acpi_init),
   28.20 -	TABLE_ENTRY(IDX_TCPA_EXTEND_ACPI_LOG, tcpa_extend_acpi_log),
   28.21 -
   28.22 -	TABLE_ENTRY(IDX_TCGINTERRUPTHANDLER, TCGInterruptHandler),
   28.23  
   28.24 -	TABLE_ENTRY(IDX_TCPA_CALLING_INT19H, tcpa_calling_int19h),
   28.25 -	TABLE_ENTRY(IDX_TCPA_RETURNED_INT19H, tcpa_returned_int19h),
   28.26 -	TABLE_ENTRY(IDX_TCPA_ADD_EVENT_SEPARATORS, tcpa_add_event_separators),
   28.27 -	TABLE_ENTRY(IDX_TCPA_WAKE_EVENT, tcpa_wake_event),
   28.28 -	TABLE_ENTRY(IDX_TCPA_ADD_BOOTDEVICE, tcpa_add_bootdevice),
   28.29 -	TABLE_ENTRY(IDX_TCPA_START_OPTION_ROM_SCAN, tcpa_start_option_rom_scan),
   28.30 -	TABLE_ENTRY(IDX_TCPA_OPTION_ROM, tcpa_option_rom),
   28.31 -	TABLE_ENTRY(IDX_TCPA_IPL, tcpa_ipl),
   28.32 -	TABLE_ENTRY(IDX_TCPA_MEASURE_POST, tcpa_measure_post),
   28.33 -
   28.34 -	TABLE_ENTRY(IDX_TCPA_INITIALIZE_TPM, tcpa_initialize_tpm),
   28.35 -
   28.36 -	TABLE_ENTRY(IDX_GET_S3_WAKING_VECTOR, get_s3_waking_vector),
   28.37 -
   28.38 -	TABLE_ENTRY(IDX_LAST       , 0)     /* keep last */
   28.39 -};
   28.40 +asm (
   28.41 +    "    .text                       \n"
   28.42 +    "     movzwl %bx,%eax            \n"
   28.43 +    "     jmp *jumptable(,%eax,4)    \n"
   28.44 +    "    .data                       \n"
   28.45 +    "jumptable:                      \n"
   28.46 +#define X(idx, ret, fn, args...) " .long "#fn"\n"
   28.47 +#include "32bitprotos.h"
   28.48 +#undef X
   28.49 +    );
    29.1 --- a/tools/firmware/rombios/32bit/Makefile	Wed Jan 28 12:22:58 2009 +0900
    29.2 +++ b/tools/firmware/rombios/32bit/Makefile	Wed Jan 28 13:06:45 2009 +0900
    29.3 @@ -1,24 +1,24 @@
    29.4  XEN_ROOT = ../../../..
    29.5  include $(XEN_ROOT)/tools/firmware/Rules.mk
    29.6  
    29.7 -SOURCES = util.c
    29.8  TARGET = 32bitbios_flat.h
    29.9  
   29.10 -CFLAGS += $(CFLAGS_include) -I.. -DGCC_PROTOS
   29.11 +CFLAGS += $(CFLAGS_include) -I..
   29.12  
   29.13  SUBDIRS = tcgbios
   29.14  
   29.15 -MODULES = tcgbios/tcgbiosext.o
   29.16 -
   29.17  .PHONY: all
   29.18  all: subdirs-all
   29.19  	$(MAKE) $(TARGET)
   29.20  
   29.21  .PHONY: clean
   29.22  clean: subdirs-clean
   29.23 -	rm -rf *.o $(TARGET)
   29.24 +	rm -rf *.o $(TARGET) $(DEPS)
   29.25  
   29.26 -$(TARGET): 32bitbios.o $(MODULES) util.o
   29.27 +$(TARGET): 32bitbios_all.o
   29.28 +	sh mkhex highbios_array 32bitbios_all.o > $@
   29.29 +
   29.30 +32bitbios_all.o: 32bitbios.o tcgbios/tcgbiosext.o util.o pmm.o
   29.31  	$(LD) $(LDFLAGS_DIRECT) -s -r $^ -o 32bitbios_all.o
   29.32  	@nm 32bitbios_all.o |                                \
   29.33  	  egrep '^ +U ' >/dev/null && {                      \
   29.34 @@ -26,4 +26,5 @@ clean: subdirs-clean
   29.35  	    nm -u 32bitbios_all.o;                           \
   29.36  	    exit 11;                                         \
   29.37  	  } || :
   29.38 -	sh mkhex highbios_array 32bitbios_all.o > $@
   29.39 +
   29.40 +-include $(DEPS)
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/tools/firmware/rombios/32bit/pmm.c	Wed Jan 28 13:06:45 2009 +0900
    30.3 @@ -0,0 +1,531 @@
    30.4 +/*
    30.5 + *  pmm.c - POST(Power On Self Test) Memory Manager
    30.6 + *  according to the specification described in
    30.7 + *  http://www.phoenix.com/NR/rdonlyres/873A00CF-33AC-4775-B77E-08E7B9754993/0/specspmm101.pdf
    30.8 + *
    30.9 + *  This library is free software; you can redistribute it and/or
   30.10 + *  modify it under the terms of the GNU Lesser General Public
   30.11 + *  License as published by the Free Software Foundation; either
   30.12 + *  version 2 of the License, or (at your option) any later version.
   30.13 + *
   30.14 + *  This library is distributed in the hope that it will be useful,
   30.15 + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   30.16 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   30.17 + *  Lesser General Public License for more details.
   30.18 + *
   30.19 + *  You should have received a copy of the GNU Lesser General Public
   30.20 + *  License along with this library; if not, write to the Free Software
   30.21 + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
   30.22 + *
   30.23 + *  Copyright (C) 2009 FUJITSU LIMITED
   30.24 + *
   30.25 + *  Author: Kouya Shimura <kouya@jp.fujitsu.com>
   30.26 + */
   30.27 +
   30.28 +/*
   30.29 + * Algorithm:
   30.30 + *
   30.31 + * This is not a fast storage allocator but simple one.  There is no
   30.32 + * segregated management by block size and it does nothing special for
   30.33 + * avoiding the fragmentation.
   30.34 + *
   30.35 + * The allocation algorithm is a first-fit. All memory blocks are
   30.36 + * managed by linear single linked list in order of the address.
   30.37 + * (i.e. There is no backward pointer) It searches the first available
   30.38 + * equal or larger block from the head (lowest address) of memory
   30.39 + * heap. The larger block is splitted into two blocks unless one side
   30.40 + * becomes too small.
   30.41 + * 
   30.42 + * For de-allocation, the specified block is just marked as available
   30.43 + * and it does nothing else. Thus, the fragmentation will occur. The
   30.44 + * collection of continuous available blocks are done on the search
   30.45 + * phase of another block allocation.
   30.46 + *
   30.47 + * The following is an abstract of this algorithm. The actual code
   30.48 + * looks complicated on account of alignment and checking the handle.
   30.49 + *
   30.50 + *     static memblk_t *
   30.51 + *     alloc(heap_t *heap, uint32_t size)
   30.52 + *     {
   30.53 + *         static memblk_t *mb;
   30.54 + *         for_each_memblk(heap, mb) // search memory blocks
   30.55 + *             if (memblk_is_avail(mb))
   30.56 + *             {
   30.57 + *                 collect_avail_memblks(heap, mb);
   30.58 + *                 if (size <= memblk_bufsize(mb))
   30.59 + *                 {
   30.60 + *                     split_memblk(mb, size);
   30.61 + *                     set_inuse(mb);
   30.62 + *                     return mb;
   30.63 + *                 }
   30.64 + *             }
   30.65 + *         return NULL;
   30.66 + *     }
   30.67 + */
   30.68 +
   30.69 +#include <stdint.h>
   30.70 +#include <stddef.h>
   30.71 +#include <../hvmloader/config.h>
   30.72 +#include <../hvmloader/e820.h>
   30.73 +#include "util.h"
   30.74 +
   30.75 +#define DEBUG_PMM 0
   30.76 +
   30.77 +#define ASSERT(_expr, _action)                                  \
   30.78 +    if (!(_expr)) {                                             \
   30.79 +        printf("ASSERTION FAIL: %s %s:%d %s()\n",               \
   30.80 +               __STRING(_expr), __FILE__, __LINE__, __func__);  \
   30.81 +        _action;                                                \
   30.82 +    } else
   30.83 +
   30.84 +#if DEBUG_PMM
   30.85 +# define PMM_DEBUG(format, p...) printf("PMM " format, ##p)
   30.86 +#else
   30.87 +# define PMM_DEBUG(format, p...)
   30.88 +#endif
   30.89 +
   30.90 +struct pmmAllocArgs {
   30.91 +    uint16_t function;
   30.92 +    uint32_t length;
   30.93 +    uint32_t handle;
   30.94 +    uint16_t flags;
   30.95 +} __attribute__ ((packed));
   30.96 +
   30.97 +struct pmmFindArgs {
   30.98 +    uint16_t function;
   30.99 +    uint32_t handle;
  30.100 +} __attribute__ ((packed));
  30.101 +
  30.102 +struct pmmDeallocateArgs {
  30.103 +    uint16_t function;
  30.104 +    uint32_t buffer;
  30.105 +} __attribute__ ((packed));
  30.106 +
  30.107 +#define PMM_FUNCTION_ALLOCATE   0
  30.108 +#define PMM_FUNCTION_FIND       1         
  30.109 +#define PMM_FUNCTION_DEALLOC    2
  30.110 +
  30.111 +#define PARAGRAPH_LENGTH        16  // unit of length
  30.112 +
  30.113 +#define PMM_HANDLE_ANONYMOUS    0xffffffff
  30.114 +
  30.115 +#define PMM_FLAGS_MEMORY_TYPE_MASK      0x0003
  30.116 +#define PMM_FLAGS_MEMORY_INVALID        0
  30.117 +#define PMM_FLAGS_MEMORY_CONVENTIONAL   1  // 0 to 1MB
  30.118 +#define PMM_FLAGS_MEMORY_EXTENDED       2  // 1MB to 4GB
  30.119 +#define PMM_FLAGS_MEMORY_ANY            3  // whichever is available
  30.120 +#define PMM_FLAGS_ALIGINMENT            0x0004
  30.121 +
  30.122 +/* Error code */
  30.123 +#define PMM_ENOMEM      (0)     // Out of memory, duplicate handle
  30.124 +#define PMM_EINVAL      (-1)    // Invalid argument
  30.125 +
  30.126 +#define ALIGN_UP(addr, size)    (((addr)+((size)-1))&(~((size)-1)))
  30.127 +#define ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
  30.128 +
  30.129 +typedef struct memblk {
  30.130 +    uint32_t magic;      // inuse or available
  30.131 +    struct memblk *next; // points the very next of this memblk
  30.132 +    uint32_t handle;     // identifier of this block
  30.133 +    uint32_t __fill;     // for 16byte alignment, not used
  30.134 +    uint8_t buffer[0];
  30.135 +} memblk_t;
  30.136 +
  30.137 +typedef struct heap {
  30.138 +    memblk_t *head;     // start address of heap
  30.139 +    memblk_t *end;      // end address of heap
  30.140 +} heap_t;
  30.141 +
  30.142 +#define HEAP_NOT_INITIALIZED    (memblk_t *)-1
  30.143 +#define HEAP_ALIGNMENT          16
  30.144 +
  30.145 +/*
  30.146 + * PMM handles two memory heaps, the caller chooses either.
  30.147 + *
  30.148 + * - conventional memroy (below 1MB)
  30.149 + *    In HVM, the area is fixed. 0x00010000-0x0007FFFF
  30.150 + *    (from SCRATCH_PHYSICAL_ADDRESS to HYPERCALL_PHYSICAL_ADDRESS)
  30.151 + *
  30.152 + * - extended memory (start at 1MB, below 4GB)
  30.153 + *    In HVM, the area starts at memory address 0x00100000.
  30.154 + *    The end address is variable. We read low RAM address from e820 table.
  30.155 + *
  30.156 + * The following struct must be located in the data segment since bss
  30.157 + * in 32bitbios doesn't be relocated.
  30.158 + */
  30.159 +static struct {
  30.160 +    heap_t heap;     // conventional memory
  30.161 +    heap_t ext_heap; // extended memory
  30.162 +} pmm_data = { {HEAP_NOT_INITIALIZED, NULL}, {NULL, NULL} };
  30.163 +
  30.164 +/* These values are private use, not a spec in PMM */
  30.165 +#define MEMBLK_MAGIC_INUSE   0x2A4D4D50  // 'PMM*'
  30.166 +#define MEMBLK_MAGIC_AVAIL   0x5F4D4D50  // 'PMM_'
  30.167 +
  30.168 +#define memblk_is_inuse(_mb)  ((_mb)->magic == MEMBLK_MAGIC_INUSE)
  30.169 +#define memblk_is_avail(_mb)  ((_mb)->magic == MEMBLK_MAGIC_AVAIL)
  30.170 +
  30.171 +static void set_inuse(memblk_t *mb, uint32_t handle)
  30.172 +{
  30.173 +    mb->magic = MEMBLK_MAGIC_INUSE;
  30.174 +    mb->handle = handle;
  30.175 +}
  30.176 +
  30.177 +static void set_avail(memblk_t *mb)
  30.178 +{
  30.179 +    mb->magic = MEMBLK_MAGIC_AVAIL;
  30.180 +    mb->handle = PMM_HANDLE_ANONYMOUS;
  30.181 +}
  30.182 +
  30.183 +#define MEMBLK_HEADER_SIZE   ((int)(&((memblk_t *)0)->buffer))
  30.184 +#define MIN_MEMBLK_SIZE      (MEMBLK_HEADER_SIZE + PARAGRAPH_LENGTH)
  30.185 +
  30.186 +#define memblk_size(_mb)     ((void *)((_mb)->next) - (void *)(_mb))
  30.187 +#define memblk_buffer(_mb)   ((uint32_t)(&(_mb)->buffer))
  30.188 +#define memblk_bufsize(_mb)  (memblk_size(_mb) - MEMBLK_HEADER_SIZE)
  30.189 +
  30.190 +#define buffer_memblk(_buf)  (memblk_t *)((_buf) - MEMBLK_HEADER_SIZE)
  30.191 +
  30.192 +#define memblk_loop_mbondition(_h, _mb) \
  30.193 +    (((_mb) < (_h)->end) && (/* avoid infinite loop */ (_mb) < (_mb)->next))
  30.194 +
  30.195 +#define for_each_memblk(_h, _mb)                \
  30.196 +    for ((_mb) = (_h)->head;                    \
  30.197 +         memblk_loop_mbondition(_h, _mb);       \
  30.198 +         (_mb) = (_mb)->next)
  30.199 +
  30.200 +#define for_remain_memblk(_h, _mb)              \
  30.201 +    for (;                                      \
  30.202 +         memblk_loop_mbondition(_h, _mb);       \
  30.203 +         (_mb) = (_mb)->next)
  30.204 +
  30.205 +/*
  30.206 + *                                       <-size->
  30.207 + *    +==================+======+       +========+========+======+
  30.208 + *    |      avail       |      |       | avail  | avail  |      |
  30.209 + *    |      memblk      |memblk|...    | memblk | memblk |memblk|...
  30.210 + *    +==================+======+   =>  +========+========+======+
  30.211 + *    ^ |                ^ |    ^         |      ^ |      ^ |    ^
  30.212 + *    | |next            | |next|         |next  | |next  | |next|
  30.213 + *    | \________________/ \____/         \______/ \______/ \____/
  30.214 + *    |                                          ^
  30.215 + *    |                                          |
  30.216 + *    mb                                         +- sb(return value)
  30.217 + */
  30.218 +static memblk_t *
  30.219 +split_memblk(memblk_t *mb, uint32_t size)
  30.220 +{
  30.221 +    memblk_t *sb = (void *)memblk_buffer(mb) + size;
  30.222 +
  30.223 +    /* Only split if the remaining fragment is big enough. */
  30.224 +    if ( (memblk_bufsize(mb) - size) < MIN_MEMBLK_SIZE)
  30.225 +        return mb;
  30.226 +
  30.227 +    sb->next = mb->next;
  30.228 +    set_avail(sb);
  30.229 +
  30.230 +    mb->next = sb;
  30.231 +    return sb;
  30.232 +}
  30.233 +
  30.234 +/*
  30.235 + *    +======+======+======+======+       +=================+======+
  30.236 + *    |avail |avail |avail |inuse |       |      avail      |inuse |   
  30.237 + *    |memblk|memblk|memblk|memblk|...    |      memblk     |memblk|...
  30.238 + *    +======+======+======+======+   =>  +=================+======+
  30.239 + *    ^ |    ^ |    ^ |    ^ |    ^         |               ^ |    ^
  30.240 + *    | |next| |next| |next| |next|         |next           | |next|
  30.241 + *    | \____/ \____/ \____/ \____/         \_______________/ \____/
  30.242 + *    |
  30.243 + *    mb
  30.244 + */
  30.245 +static void
  30.246 +collect_avail_memblks(heap_t *heap, memblk_t *mb)
  30.247 +{
  30.248 +    memblk_t *nb = mb->next;
  30.249 +
  30.250 +    for_remain_memblk ( heap, nb )
  30.251 +        if ( memblk_is_inuse(nb) )
  30.252 +            break;
  30.253 +    mb->next = nb;
  30.254 +}
  30.255 +
  30.256 +static void
  30.257 +pmm_init_heap(heap_t *heap, uint32_t from_addr, uint32_t to_addr)
  30.258 +{
  30.259 +    memblk_t *mb = (memblk_t *)ALIGN_UP(from_addr, HEAP_ALIGNMENT);
  30.260 +
  30.261 +    mb->next = (memblk_t *)ALIGN_DOWN(to_addr, HEAP_ALIGNMENT);
  30.262 +    set_avail(mb);
  30.263 +
  30.264 +    heap->head = mb;
  30.265 +    heap->end = mb->next;
  30.266 +}
  30.267 +
  30.268 +static void
  30.269 +pmm_initalize(void)
  30.270 +{
  30.271 +    int i, e820_nr = *E820_NR;
  30.272 +    struct e820entry *e820 = E820;
  30.273 +
  30.274 +    /* Extended memory: RAM below 4GB, 0x100000-0xXXXXXXXX */
  30.275 +    for ( i = 0; i < e820_nr; i++ )
  30.276 +    {
  30.277 +        if ( (e820[i].type == E820_RAM) && (e820[i].addr >= 0x00100000) )
  30.278 +        {
  30.279 +            pmm_init_heap(&pmm_data.ext_heap, e820[i].addr, 
  30.280 +                          e820[i].addr + e820[i].size);
  30.281 +            break;
  30.282 +        }
  30.283 +    }
  30.284 +
  30.285 +    /* convectional memory: RAM below 1MB, 0x10000-0x7FFFF */
  30.286 +    pmm_init_heap(&pmm_data.heap, SCRATCH_PHYSICAL_ADDRESS,
  30.287 +                  HYPERCALL_PHYSICAL_ADDRESS);
  30.288 +}
  30.289 +
  30.290 +static uint32_t
  30.291 +pmm_max_avail_length(heap_t *heap)
  30.292 +{
  30.293 +    memblk_t *mb;
  30.294 +    uint32_t size, max = 0;
  30.295 +
  30.296 +    for_each_memblk ( heap, mb )
  30.297 +    {
  30.298 +        if ( !memblk_is_avail(mb) )
  30.299 +            continue;
  30.300 +        collect_avail_memblks(heap, mb);
  30.301 +        size = memblk_bufsize(mb);
  30.302 +        if ( size > max )
  30.303 +            max = size;
  30.304 +    }
  30.305 +
  30.306 +    return (max / PARAGRAPH_LENGTH);
  30.307 +}
  30.308 +
  30.309 +static memblk_t *
  30.310 +first_fit(heap_t *heap, uint32_t size, uint32_t handle, uint32_t flags)
  30.311 +{
  30.312 +    memblk_t *mb;
  30.313 +    int32_t align = 0;
  30.314 +
  30.315 +    if ( flags & PMM_FLAGS_ALIGINMENT )
  30.316 +        align = ((size ^ (size - 1)) >> 1) + 1;
  30.317 +
  30.318 +    for_each_memblk ( heap, mb )
  30.319 +    {
  30.320 +        if ( memblk_is_avail(mb) )
  30.321 +        {
  30.322 +            collect_avail_memblks(heap, mb);
  30.323 +
  30.324 +            if ( align )
  30.325 +            {
  30.326 +                uint32_t addr = memblk_buffer(mb);
  30.327 +                uint32_t offset = ALIGN_UP(addr, align) - addr;
  30.328 +
  30.329 +                if ( offset > 0 )
  30.330 +                {
  30.331 +                    ASSERT(offset >= MEMBLK_HEADER_SIZE, continue);
  30.332 +
  30.333 +                    if ( (offset + size) > memblk_bufsize(mb) )
  30.334 +                        continue;
  30.335 +
  30.336 +                    mb = split_memblk(mb, offset - MEMBLK_HEADER_SIZE);
  30.337 +                    return mb;
  30.338 +                }
  30.339 +            }
  30.340 +
  30.341 +            if ( size <= memblk_bufsize(mb) )
  30.342 +                return mb;
  30.343 +        }
  30.344 +        else
  30.345 +        {
  30.346 +            ASSERT(memblk_is_inuse(mb), return NULL);
  30.347 +
  30.348 +            /* Duplication check for handle. */
  30.349 +            if ( (handle != PMM_HANDLE_ANONYMOUS) && (mb->handle == handle) )
  30.350 +                return NULL;
  30.351 +        }
  30.352 +    }
  30.353 +
  30.354 +    return NULL;
  30.355 +}
  30.356 +
  30.357 +static memblk_t *
  30.358 +pmm_find_handle(heap_t *heap, uint32_t handle)
  30.359 +{
  30.360 +    memblk_t *mb;
  30.361 +
  30.362 +    if ( handle == PMM_HANDLE_ANONYMOUS )
  30.363 +        return NULL;
  30.364 +
  30.365 +    for_each_memblk ( heap, mb )
  30.366 +        if ( mb->handle == handle )
  30.367 +            return mb;
  30.368 +
  30.369 +    return NULL;
  30.370 +}
  30.371 +
  30.372 +/*
  30.373 + * allocate a memory block of the specified type and size, and returns
  30.374 + * the address of the memory block.
  30.375 + *
  30.376 + * A client-specified identifier to be associated with the allocated
  30.377 + * memory block. A handle of 0xFFFFFFFF indicates that no identifier
  30.378 + * should be associated with the block. Such a memory block is known
  30.379 + * as an "anonymous" memory block and cannot be found using the
  30.380 + * pmmFind function. If a specified handle for a requested memory
  30.381 + * block is already used in a currently allocated memory block, the
  30.382 + * error value of 0x00000000 is returned
  30.383 + *
  30.384 + * If length is 0x00000000, no memory is allocated and the value
  30.385 + * returned is the size of the largest memory block available for the
  30.386 + * memory type specified in the flags parameter. The alignment bit in
  30.387 + * the flags register is ignored when calculating the largest memory
  30.388 + * block available.
  30.389 + *
  30.390 + * If a specified handle for a requested memory block is already used
  30.391 + * in a currently allocated memory block, the error value of
  30.392 + * 0x00000000 is returned.
  30.393 + * 
  30.394 + * A return value of 0x00000000 indicates that an error occurred and
  30.395 + * no memory has been allocated. 
  30.396 + */
  30.397 +static uint32_t
  30.398 +pmmAllocate(uint32_t length, uint32_t handle, uint16_t flags)
  30.399 +{
  30.400 +    heap_t *heap;
  30.401 +    memblk_t *mb;
  30.402 +    uint32_t size;
  30.403 +
  30.404 +    switch ( flags & PMM_FLAGS_MEMORY_TYPE_MASK )
  30.405 +    {
  30.406 +    case PMM_FLAGS_MEMORY_CONVENTIONAL:
  30.407 +        heap = &pmm_data.heap;
  30.408 +        break;
  30.409 +
  30.410 +    case PMM_FLAGS_MEMORY_EXTENDED:
  30.411 +    case PMM_FLAGS_MEMORY_ANY: /* XXX: ignore conventional memory for now */
  30.412 +        heap = &pmm_data.ext_heap;
  30.413 +        break;
  30.414 +
  30.415 +    default:
  30.416 +        return PMM_EINVAL;
  30.417 +    }
  30.418 +
  30.419 +    /* return the largest memory block available */
  30.420 +    if ( length == 0 )
  30.421 +        return pmm_max_avail_length(heap);
  30.422 +
  30.423 +    size = length * PARAGRAPH_LENGTH;
  30.424 +    mb = first_fit(heap, size, handle, flags);
  30.425 +
  30.426 +    if ( mb == NULL )
  30.427 +        return PMM_ENOMEM;
  30.428 +
  30.429 +    /* duplication check for handle */
  30.430 +    if ( handle != PMM_HANDLE_ANONYMOUS )
  30.431 +    {
  30.432 +        memblk_t *nb = mb->next;
  30.433 +
  30.434 +        for_remain_memblk(heap, nb)
  30.435 +            if (nb->handle == handle)
  30.436 +                return PMM_ENOMEM;
  30.437 +    }
  30.438 +
  30.439 +    split_memblk(mb, size);
  30.440 +    set_inuse(mb, handle);
  30.441 +
  30.442 +    return memblk_buffer(mb);
  30.443 +}
  30.444 +
  30.445 +/*
  30.446 + * returns the address of the memory block associated with the
  30.447 + * specified handle.  
  30.448 + *
  30.449 + * A return value of 0x00000000 indicates that the handle does not
  30.450 + * correspond to a currently allocated memory block.
  30.451 + */
  30.452 +static uint32_t
  30.453 +pmmFind(uint32_t handle)
  30.454 +{
  30.455 +    memblk_t *mb;
  30.456 +
  30.457 +    if ( handle == PMM_HANDLE_ANONYMOUS )
  30.458 +        return 0;
  30.459 +
  30.460 +    mb = pmm_find_handle(&pmm_data.heap, handle);
  30.461 +    if ( mb == NULL )
  30.462 +        mb = pmm_find_handle(&pmm_data.ext_heap, handle);
  30.463 +
  30.464 +    return mb ? memblk_buffer(mb) : 0;
  30.465 +}
  30.466 +
  30.467 +/* 
  30.468 + * frees the specified memory block that was previously allocated by
  30.469 + * pmmAllocate.
  30.470 + *
  30.471 + * If the memory block was deallocated correctly, the return value is
  30.472 + * 0x00000000. If there was an error, the return value is non-zero.
  30.473 + */
  30.474 +static uint32_t
  30.475 +pmmDeallocate(uint32_t buffer)
  30.476 +{
  30.477 +    memblk_t *mb = buffer_memblk(buffer);
  30.478 +
  30.479 +    if ( !memblk_is_inuse(mb) )
  30.480 +        return PMM_EINVAL;
  30.481 +
  30.482 +    set_avail(mb);
  30.483 +    return 0;
  30.484 +}
  30.485 +
  30.486 +
  30.487 +union pmm_args {
  30.488 +    uint16_t function;
  30.489 +    struct pmmAllocArgs alloc;
  30.490 +    struct pmmFindArgs find;
  30.491 +    struct pmmDeallocateArgs dealloc;
  30.492 +} __attribute__ ((packed));
  30.493 +
  30.494 +/*
  30.495 + * entry function of all PMM services.
  30.496 + *
  30.497 + * Values returned to the caller are placed in the DX:AX register
  30.498 + * pair. The flags and all registers, other than DX and AX, are
  30.499 + * preserved across calls to PMM services.
  30.500 + */
  30.501 +uint32_t
  30.502 +pmm(void *argp)
  30.503 +{
  30.504 +    union pmm_args *ap = argp;
  30.505 +    uint32_t ret = PMM_EINVAL;
  30.506 +
  30.507 +    if ( pmm_data.heap.head == HEAP_NOT_INITIALIZED )
  30.508 +        pmm_initalize();
  30.509 +
  30.510 +    switch ( ap->function )
  30.511 +    {
  30.512 +    case PMM_FUNCTION_ALLOCATE:
  30.513 +        ret = pmmAllocate(ap->alloc.length, ap->alloc.handle, ap->alloc.flags);
  30.514 +        PMM_DEBUG("Alloc length=%x handle=%x flags=%x ret=%x\n", 
  30.515 +                  ap->alloc.length, ap->alloc.handle, ap->alloc.flags, ret);
  30.516 +        break;
  30.517 +
  30.518 +    case PMM_FUNCTION_FIND:
  30.519 +        ret = pmmFind(ap->find.handle);
  30.520 +        PMM_DEBUG("Find handle=%x ret=%x\n", ap->find.handle, ret);
  30.521 +        break;
  30.522 +
  30.523 +    case PMM_FUNCTION_DEALLOC:
  30.524 +        ret = pmmDeallocate(ap->dealloc.buffer);
  30.525 +        PMM_DEBUG("Dealloc buffer=%x ret=%x\n", ap->dealloc.buffer, ret);
  30.526 +        break;
  30.527 +
  30.528 +    default:
  30.529 +        PMM_DEBUG("Invalid function:%d\n", ap->function);
  30.530 +        break;
  30.531 +    }
  30.532 +
  30.533 +    return ret;
  30.534 +}
    31.1 --- a/tools/firmware/rombios/32bit/rombios_compat.h	Wed Jan 28 12:22:58 2009 +0900
    31.2 +++ b/tools/firmware/rombios/32bit/rombios_compat.h	Wed Jan 28 13:06:45 2009 +0900
    31.3 @@ -89,4 +89,8 @@ static inline void write_byte(Bit16u seg
    31.4  	*addr = val;
    31.5  }
    31.6  
    31.7 +#define X(idx, ret, fn, args...) ret fn (args);
    31.8 +#include "32bitprotos.h"
    31.9 +#undef X
   31.10 +
   31.11  #endif
    32.1 --- a/tools/firmware/rombios/32bit/tcgbios/Makefile	Wed Jan 28 12:22:58 2009 +0900
    32.2 +++ b/tools/firmware/rombios/32bit/tcgbios/Makefile	Wed Jan 28 13:06:45 2009 +0900
    32.3 @@ -2,17 +2,17 @@ XEN_ROOT = ../../../../..
    32.4  include $(XEN_ROOT)/tools/firmware/Rules.mk
    32.5  
    32.6  TARGET  = tcgbiosext.o
    32.7 -FILES   = tcgbios tpm_drivers
    32.8 -OBJECTS = $(foreach f,$(FILES),$(f).o)
    32.9  
   32.10 -CFLAGS += $(CFLAGS_include) -I.. -I../.. -DGCC_PROTOS
   32.11 +CFLAGS += $(CFLAGS_include) -I.. -I../..
   32.12  
   32.13 -.PHONY: all clean
   32.14 -
   32.15 +.PHONY: all
   32.16  all: $(TARGET)
   32.17  
   32.18 +.PHONY: clean
   32.19  clean:
   32.20 -	rm -rf *.o $(TARGET)
   32.21 +	rm -rf *.o $(TARGET) $(DEPS)
   32.22  
   32.23 -$(TARGET): $(OBJECTS)
   32.24 +$(TARGET): tcgbios.o tpm_drivers.o
   32.25  	$(LD) $(LDFLAGS_DIRECT) -r $^ -o $@
   32.26 +
   32.27 +-include $(DEPS)
    33.1 --- a/tools/firmware/rombios/32bit/tcgbios/tcgbios.c	Wed Jan 28 12:22:58 2009 +0900
    33.2 +++ b/tools/firmware/rombios/32bit/tcgbios/tcgbios.c	Wed Jan 28 13:06:45 2009 +0900
    33.3 @@ -26,7 +26,6 @@
    33.4  
    33.5  #include "util.h"
    33.6  #include "tcgbios.h"
    33.7 -#include "32bitprotos.h"
    33.8  
    33.9  /* local structure and variables */
   33.10  struct ptti_cust {
   33.11 @@ -259,6 +258,10 @@ uint8_t acpi_validate_entry(struct acpi_
   33.12  }
   33.13  
   33.14  
   33.15 +/*
   33.16 +   initialize the TCPA ACPI subsystem; find the ACPI tables and determine
   33.17 +   where the TCPA table is.
   33.18 + */
   33.19  void tcpa_acpi_init(void)
   33.20  {
   33.21  	struct acpi_20_rsdt *rsdt;
   33.22 @@ -313,6 +316,16 @@ static void tcpa_reset_acpi_log(void)
   33.23  }
   33.24  
   33.25  
   33.26 +/*
   33.27 + * Extend the ACPI log with the given entry by copying the
   33.28 + * entry data into the log.
   33.29 + * Input
   33.30 + *  Pointer to the structure to be copied into the log
   33.31 + *
   33.32 + * Output:
   33.33 + *  lower 16 bits of return code contain entry number
   33.34 + *  if entry number is '0', then upper 16 bits contain error code.
   33.35 + */
   33.36  uint32_t tcpa_extend_acpi_log(uint32_t entry_ptr)
   33.37  {
   33.38  	uint32_t res = 0;
   33.39 @@ -622,7 +635,8 @@ void tcpa_wake_event()
   33.40  }
   33.41  
   33.42  /*
   33.43 - * add the boot device to the measurement log
   33.44 + * Add a measurement regarding the boot device (CDRom, Floppy, HDD) to
   33.45 + * the list of measurements.
   33.46   */
   33.47  void tcpa_add_bootdevice(uint32_t bootcd, uint32_t bootdrv)
   33.48  {
    34.1 --- a/tools/firmware/rombios/32bitgateway.c	Wed Jan 28 12:22:58 2009 +0900
    34.2 +++ b/tools/firmware/rombios/32bitgateway.c	Wed Jan 28 13:06:45 2009 +0900
    34.3 @@ -19,8 +19,10 @@
    34.4   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
    34.5   *
    34.6   * Copyright (C) IBM Corporation, 2006
    34.7 + * Copyright (c) 2008, Citrix Systems, Inc.
    34.8   *
    34.9   * Author: Stefan Berger <stefanb@us.ibm.com>
   34.10 + * Author: Keir Fraser <keir.fraser@citrix.com>
   34.11   */
   34.12  
   34.13  /*
   34.14 @@ -34,389 +36,142 @@
   34.15   *  (4 bytes) even for uint16_t, so casting to 32bit from bcc is a good idea.
   34.16   */
   34.17  
   34.18 -#define SEGMENT_OFFSET  0xf0000
   34.19 -#define REAL_MODE_CODE_SEGMENT  0xf000
   34.20 +/* At most 32 bytes in argument list to a 32-bit function. */
   34.21 +#define MAX_ARG_BYTES 32
   34.22  
   34.23 -#define START_PM_CODE  USE32
   34.24 -#define END_PM_CODE    USE16
   34.25 +#define REAL_MODE_CODE_OFFSET  0xf0000
   34.26  
   34.27 -/* definition of used code/data segment descriptors */
   34.28 -#define PM_NORMAL_CS (gdt_entry_pm_cs       - gdt_base)
   34.29 +/* Definitions of code/data segment descriptors. */
   34.30 +#define PM_32BIT_CS  (gdt_entry_pm_32bit_cs - gdt_base)
   34.31  #define PM_16BIT_CS  (gdt_entry_pm_16bit_cs - gdt_base)
   34.32  #define PM_32BIT_DS  (gdt_entry_pm_32bit_ds - gdt_base)
   34.33 -
   34.34 -  ASM_START
   34.35 -
   34.36 -    ; Switch into protected mode to allow access to 32 bit addresses.
   34.37 -    ; This function allows switching into protected mode.
   34.38 -    ; (the specs says big real mode, but that will not work)
   34.39 -    ;
   34.40 -    ; preserves all registers and prepares cs, ds, es, ss for usage
   34.41 -    ; in protected mode; while in prot.mode interrupts remain disabled
   34.42 -switch_to_protmode:
   34.43 -    cli
   34.44 -
   34.45 -    ; have to fix the stack for proper return address in 32 bit mode
   34.46 -    push WORD #(REAL_MODE_CODE_SEGMENT>>12)	;extended return address
   34.47 -    push bp					;pop@A1
   34.48 -    mov bp, sp
   34.49 -    push eax					;pop@A2
   34.50 -    mov eax, 2[bp]				; fix return address
   34.51 -    rol eax, #16
   34.52 -    mov 2[bp], eax
   34.53 -
   34.54 -    mov eax, esp
   34.55 -    ror eax, #16				; hi(esp)
   34.56 -
   34.57 -    push bx					; preserve before function call
   34.58 -    push cx
   34.59 -    push dx
   34.60 -
   34.61 -    push ax					; prepare stack for
   34.62 -    push es					; call
   34.63 -    push ds
   34.64 -    push cs
   34.65 -    push ss
   34.66 -    call _store_segment_registers
   34.67 -    add sp, #10					; pop ax,es-ss
   34.68 -
   34.69 -    pop dx					; restore after function call
   34.70 -    pop cx
   34.71 -    pop bx
   34.72 -
   34.73 -    ; calculate protected-mode esp from ss:sp
   34.74 -    and esp, #0xffff
   34.75 -    xor eax, eax
   34.76 -    mov ax, ss
   34.77 -    rol eax, #4
   34.78 -    add eax, esp
   34.79 -    mov esp, eax
   34.80 -
   34.81 -    seg cs
   34.82 -    lgdt my_gdtdesc				; switch to own table
   34.83 -
   34.84 -    mov eax, cr0
   34.85 -    or	al, #0x1				; protected mode 'on'
   34.86 -    mov cr0, eax
   34.87 -
   34.88 -    jmpf DWORD (SEGMENT_OFFSET | switch_to_protmode_goon_1), #PM_NORMAL_CS
   34.89 -
   34.90 -    START_PM_CODE
   34.91 -
   34.92 -switch_to_protmode_goon_1:
   34.93 -    mov ax, #PM_32BIT_DS			; 32 bit segment that allows
   34.94 -    mov ds, ax					; to reach all 32 bit
   34.95 -    mov es, ax					; addresses
   34.96 -    mov ss, ax
   34.97 -
   34.98 -    pop eax					;@A2
   34.99 -    pop bp					;@A1
  34.100 -    ret
  34.101 -
  34.102 -    END_PM_CODE
  34.103 -
  34.104 -
  34.105 +#define PM_16BIT_DS  (gdt_entry_pm_16bit_ds - gdt_base)
  34.106  
  34.107      .align 16
  34.108  gdt_base:
  34.109 -    ; see Intel SW Dev. Manuals section 3.4.5, Volume 3 for meaning of bits
  34.110      .word 0,0
  34.111      .byte 0,0,0,0
  34.112 -
  34.113 -gdt_entry_pm_cs:
  34.114 -    ; 32 bit code segment for protected mode
  34.115 +gdt_entry_pm_32bit_cs:
  34.116      .word 0xffff, 0x0000
  34.117 -    .byte 0x00, 0x9a, 0xcf, 0x00
  34.118 -
  34.119 +    .byte 0x00, 0x9b, 0xcf, 0x00
  34.120  gdt_entry_pm_16bit_cs:
  34.121 -    ; temp. 16 bit code segment used while in protected mode
  34.122      .word 0xffff, 0x0000
  34.123 -    .byte SEGMENT_OFFSET >> 16, 0x9a, 0x0, 0x0
  34.124 -
  34.125 +    .byte REAL_MODE_CODE_OFFSET >> 16, 0x9b, 0x0, 0x0
  34.126  gdt_entry_pm_32bit_ds:
  34.127 -    ; (32 bit) data segment (r/w) reaching all possible areas in 32bit memory
  34.128 -    ; 4kb granularity
  34.129      .word 0xffff, 0x0000
  34.130 -    .byte 0x0, 0x92, 0xcf, 0x0
  34.131 +    .byte 0x0, 0x93, 0xcf, 0x0
  34.132 +gdt_entry_pm_16bit_ds:
  34.133 +    .word 0xffff, 0x0000
  34.134 +    .byte 0x0, 0x93, 0x0, 0x0
  34.135  gdt_entry_end:
  34.136  
  34.137 -my_gdtdesc:
  34.138 +protmode_gdtdesc:
  34.139      .word (gdt_entry_end - gdt_base) - 1
  34.140 -    .long gdt_base | SEGMENT_OFFSET
  34.141 +    .long gdt_base | REAL_MODE_CODE_OFFSET
  34.142  
  34.143 -
  34.144 -realmode_gdtdesc:				;to be used in real mode
  34.145 +realmode_gdtdesc:
  34.146      .word 0xffff
  34.147      .long 0x0
  34.148  
  34.149 -
  34.150 -
  34.151 -switch_to_realmode:
  34.152 -    ; Implementation of switching from protected mode to real mode
  34.153 -    ; prepares cs, es, ds, ss to be used in real mode
  34.154 -    ; spills   eax
  34.155 -    START_PM_CODE
  34.156 -
  34.157 -    ; need to fix up the stack to return in 16 bit mode
  34.158 -    ; currently the 32 bit return address is on the stack
  34.159 -    pop eax
  34.160 -    push ax
  34.161 -
  34.162 -    push bx					;pop@1
  34.163 -    push si					;pop@2
  34.164 -
  34.165 -    call _ebda_ss_offset32			; get the offset of the ss
  34.166 -    mov bx, ax					; entry within the ebda.
  34.167 -
  34.168 -    jmpf switch_to_realmode_goon_1, #PM_16BIT_CS
  34.169 +Upcall:
  34.170 +    ; Do an upcall into 32 bit space
  34.171 +    ;
  34.172 +    ; Input:
  34.173 +    ; bx: index of function to call
  34.174 +    ; Ouput:
  34.175 +    ; dx, ax: 32 bit result of call (even if 'void' is expected)
  34.176  
  34.177 -    END_PM_CODE
  34.178 -
  34.179 -switch_to_realmode_goon_1:
  34.180 -    mov eax, cr0
  34.181 -    and al, #0xfe				; protected mode 'off'
  34.182 -    mov cr0, eax
  34.183 -
  34.184 -    jmpf switch_to_realmode_goon_2, #REAL_MODE_CODE_SEGMENT
  34.185 -
  34.186 -switch_to_realmode_goon_2:
  34.187 +    ; Save caller state, stack frame offsets listed below
  34.188 +#define esp_off     0
  34.189 +#define ss_off      4
  34.190 +#define es_off      6
  34.191 +#define ds_off      8
  34.192 +#define flags_off   10
  34.193 +#define retaddr_off 12
  34.194 +#define args_off    14
  34.195 +    pushf
  34.196 +    cli
  34.197 +    push ds
  34.198 +    push es
  34.199 +    push ss
  34.200 +    push esp
  34.201  
  34.202 -    ; get orig. 'ss' without using the stack (no 'call'!)
  34.203 -    xor eax, eax			; clear upper 16 bits (and lower)
  34.204 -    mov ax, #0x40			; where is the ebda located?
  34.205 +    ; Calculate protected-mode esp from ss:sp
  34.206 +    and esp, #0xffff
  34.207 +    xor eax, eax
  34.208 +    mov ax, ss
  34.209 +    shl eax, #4
  34.210 +    add esp, eax
  34.211 +
  34.212 +    ; Switch to protected mode
  34.213 +    seg cs
  34.214 +    lgdt protmode_gdtdesc
  34.215 +    mov eax, cr0
  34.216 +    or al, #0x1  ; protected mode on
  34.217 +    mov cr0, eax
  34.218 +    jmpf DWORD (REAL_MODE_CODE_OFFSET|upcall1), #PM_32BIT_CS
  34.219 +upcall1:
  34.220 +    USE32
  34.221 +    mov ax, #PM_32BIT_DS
  34.222      mov ds, ax
  34.223 -    mov si, #0xe
  34.224 -    seg ds
  34.225 -    mov ax, [si]			; ax = segment of ebda
  34.226 -
  34.227 -    mov ds, ax				; segment of ebda
  34.228 -    seg ds
  34.229 -    mov ax, [bx]			; stack segment - bx has been set above
  34.230 +    mov es, ax
  34.231      mov ss, ax
  34.232  
  34.233 -    ; from esp and ss calculate real-mode sp
  34.234 -    rol eax, #4
  34.235 +    ; Marshal arguments and call 32-bit function
  34.236 +    mov ecx, #MAX_ARG_BYTES/4
  34.237 +upcall2:
  34.238 +    push MAX_ARG_BYTES-4+args_off[esp]
  34.239 +    loop upcall2
  34.240 +    mov eax, [BIOS_INFO_PHYSICAL_ADDRESS + BIOSINFO_OFF_bios32_entry]
  34.241 +    call eax
  34.242 +    add esp, #MAX_ARG_BYTES
  34.243 +    mov ecx, eax  ; Result in ecx
  34.244 +
  34.245 +    ; Restore real-mode stack pointer
  34.246 +    xor eax, eax
  34.247 +    mov ax, ss_off[esp]
  34.248 +    mov bx, ax    ; Real-mode ss in bx
  34.249 +    shl eax, 4
  34.250      sub esp, eax
  34.251  
  34.252 -    push dx				;preserve before call(s)
  34.253 -    push cx
  34.254 -    push bx
  34.255 -
  34.256 -    call _get_register_ds		; get orig. 'ds'
  34.257 +    ; Return to real mode
  34.258 +    jmpf upcall3, #PM_16BIT_CS
  34.259 +upcall3:
  34.260 +    USE16
  34.261 +    mov ax, #PM_16BIT_DS
  34.262      mov ds, ax
  34.263 -    call _get_register_es		; get orig. 'es'
  34.264      mov es, ax
  34.265 -    call _get_register_esp_hi		; fix the upper 16 bits of esp
  34.266 -    ror esp, #16
  34.267 -    mov sp, ax
  34.268 -    rol esp, #16
  34.269 -
  34.270 -    pop bx
  34.271 -    pop cx
  34.272 -    pop dx
  34.273 -
  34.274 +    mov ss, ax
  34.275 +    mov eax, cr0
  34.276 +    and al, #0xfe ; protected mode off
  34.277 +    mov cr0, eax
  34.278 +    jmpf upcall4, #REAL_MODE_CODE_OFFSET>>4
  34.279 +upcall4:
  34.280      seg cs
  34.281      lgdt realmode_gdtdesc
  34.282  
  34.283 -    sti						; allow interrupts
  34.284 +    ; Restore real-mode ss
  34.285 +    mov ss, bx
  34.286  
  34.287 -    pop si					;@2
  34.288 -    pop bx					;@1
  34.289 +    ; Convert result into dx:ax format
  34.290 +    mov eax, ecx
  34.291 +    ror eax, #16
  34.292 +    mov dx, ax
  34.293 +    ror eax, #16
  34.294  
  34.295 +    ; Restore caller state and return
  34.296 +    pop esp
  34.297 +    pop bx ; skip ss
  34.298 +    pop es
  34.299 +    pop ds
  34.300 +    popf
  34.301      ret
  34.302  
  34.303 -    ASM_END
  34.304 -
  34.305 -/*
  34.306 - * Helper function to get the offset of the reg_ss within the ebda struct
  34.307 - * Only 'C' can tell the offset.
  34.308 - */
  34.309 -Bit16u
  34.310 -ebda_ss_offset32()
  34.311 -{
  34.312 -    ASM_START
  34.313 -    START_PM_CODE				// need to have this
  34.314 -    ASM_END					// compiled for protected mode
  34.315 -    return &EbdaData->upcall.reg_ss;		// 'C' knows the offset!
  34.316 -    ASM_START
  34.317 -    END_PM_CODE
  34.318 -    ASM_END
  34.319 -}
  34.320 -
  34.321 -/*
  34.322 - * Two often-used functions
  34.323 - */
  34.324 -Bit16u
  34.325 -read_word_from_ebda(offset)
  34.326 -    Bit16u offset;
  34.327 -{
  34.328 -	Bit16u ebda_seg = read_word(0x0040, 0x000E);
  34.329 -	return read_word(ebda_seg, offset);
  34.330 -}
  34.331 -
  34.332 -Bit32u
  34.333 -read_dword_from_ebda(offset)
  34.334 -    Bit16u offset;
  34.335 -{
  34.336 -	Bit16u ebda_seg = read_word(0x0040, 0x000E);
  34.337 -	return read_dword(ebda_seg, offset);
  34.338 -}
  34.339 -
  34.340 -/*
  34.341 - * Store registers in the EBDA; used to keep the registers'
  34.342 - * content in a well-defined place during protected mode execution
  34.343 - */
  34.344 -  void
  34.345 -store_segment_registers(ss, cs, ds, es, esp_hi)
  34.346 -  Bit16u ss, cs, ds, es, esp_hi;
  34.347 -{
  34.348 -	Bit16u ebda_seg = read_word(0x0040, 0x000E);
  34.349 -	write_word(ebda_seg, &EbdaData->upcall.reg_ss, ss);
  34.350 -	write_word(ebda_seg, &EbdaData->upcall.reg_cs, cs);
  34.351 -	write_word(ebda_seg, &EbdaData->upcall.reg_ds, ds);
  34.352 -	write_word(ebda_seg, &EbdaData->upcall.reg_es, es);
  34.353 -	write_word(ebda_seg, &EbdaData->upcall.esp_hi, esp_hi);
  34.354 -}
  34.355 -
  34.356 -
  34.357 -  void
  34.358 -store_returnaddress(retaddr)
  34.359 -   Bit16u retaddr;
  34.360 -{
  34.361 -	Bit16u ebda_seg = read_word(0x0040, 0x000E);
  34.362 -	write_word(ebda_seg, &EbdaData->upcall.retaddr, retaddr);
  34.363 -}
  34.364 -
  34.365 -Bit16u
  34.366 -get_returnaddress()
  34.367 -{
  34.368 -	return read_word_from_ebda(&EbdaData->upcall.retaddr);
  34.369 -}
  34.370 -
  34.371 -/*
  34.372 - * get the segment register 'cs' value from the EBDA
  34.373 - */
  34.374 -Bit16u
  34.375 -get_register_cs()
  34.376 -{
  34.377 -	return read_word_from_ebda(&EbdaData->upcall.reg_cs);
  34.378 -}
  34.379 -
  34.380 -/*
  34.381 - * get the segment register 'ds' value from the EBDA
  34.382 - */
  34.383 -Bit16u
  34.384 -get_register_ds()
  34.385 -{
  34.386 -	return read_word_from_ebda(&EbdaData->upcall.reg_ds);
  34.387 -}
  34.388 -
  34.389 -/*
  34.390 - * get the segment register 'es' value from the EBDA
  34.391 - */
  34.392 -Bit16u
  34.393 -get_register_es()
  34.394 -{
  34.395 -	return read_word_from_ebda(&EbdaData->upcall.reg_es);
  34.396 -}
  34.397 -
  34.398 -/*
  34.399 - * get the upper 16 bits of the esp from the EBDA
  34.400 - */
  34.401 -Bit16u
  34.402 -get_register_esp_hi()
  34.403 -{
  34.404 -	return read_word_from_ebda(&EbdaData->upcall.esp_hi);
  34.405 -}
  34.406 -
  34.407 -
  34.408 -
  34.409 -/********************************************************/
  34.410 -
  34.411 -
  34.412 -ASM_START
  34.413 -
  34.414 -Upcall:
  34.415 -	; do the upcall into 32 bit space
  34.416 -	; clear the stack frame so that 32 bit space sees all the parameters
  34.417 -	; on the stack as if they were prepared for it
  34.418 -	; ---> take the 16 bit return address off the stack and remember it
  34.419 -	;
  34.420 -	; Input:
  34.421 -	; bx: index of function to call
  34.422 -	; Ouput:
  34.423 -	; dx, ax: 32 bit result of call (even if 'void' is expected)
  34.424 -
  34.425 -	push bp				;pop @1
  34.426 -	mov bp, sp
  34.427 -	push si				;pop @2
  34.428 -
  34.429 -	mov ax, 2[bp]			; 16 bit return address
  34.430 -	push ax
  34.431 -	call _store_returnaddress	; store away
  34.432 -	pop ax
  34.433 -
  34.434 -	; XXX GDT munging requires ROM to be writable!
  34.435 -	call _enable_rom_write_access
  34.436 -
  34.437 -	rol bx, #2
  34.438 -	mov si, #jmptable
  34.439 -	seg cs
  34.440 -	mov eax, dword ptr [si+bx]	; address to call from table
  34.441 -
  34.442 -	pop si				;@2
  34.443 -	pop bp				;@1
  34.444 -
  34.445 -	add sp, #2			; remove 16bit return address from stack
  34.446 -
  34.447 -	call switch_to_protmode
  34.448 -	START_PM_CODE
  34.449 -
  34.450 -	call eax			; call 32bit function
  34.451 -	push eax			; preserve result
  34.452 -
  34.453 -	call switch_to_realmode		; back to realmode
  34.454 -	END_PM_CODE
  34.455 -
  34.456 -	pop eax				; get result
  34.457 -
  34.458 -	push word 0x0000		; placeholder for 16 bit return address
  34.459 -	push bp
  34.460 -	mov bp,sp
  34.461 -	push eax			; preserve work register
  34.462 -
  34.463 -	call _disable_rom_write_access
  34.464 -
  34.465 -	call _get_returnaddress
  34.466 -	mov 2[bp], ax			; 16bit return address onto stack
  34.467 -
  34.468 -	pop eax
  34.469 -	pop bp
  34.470 -
  34.471 -	ror eax, #16			; result into dx/ax
  34.472 -	mov dx, ax			; hi(res) -> dx
  34.473 -	ror eax, #16
  34.474 -
  34.475 -	ret
  34.476 -
  34.477 -
  34.478 -/* macro for functions to declare their call into 32bit space */
  34.479  MACRO DoUpcall
  34.480 -	mov bx, #?1
  34.481 -	jmp Upcall
  34.482 +    mov bx, #?1
  34.483 +    jmp Upcall
  34.484  MEND
  34.485  
  34.486 -
  34.487 -ASM_END
  34.488 -
  34.489 +#define X(idx, ret, fn, args...) _ ## fn: DoUpcall(idx)
  34.490  #include "32bitprotos.h"
  34.491 -#include "32bitgateway.h"
  34.492 -
  34.493 -#include "tcgbios.c"
  34.494 -
  34.495 -Bit32u get_s3_waking_vector()
  34.496 -{
  34.497 -	ASM_START
  34.498 -	DoUpcall(IDX_GET_S3_WAKING_VECTOR)
  34.499 -	ASM_END
  34.500 -}
  34.501 +#undef X
    35.1 --- a/tools/firmware/rombios/32bitgateway.h	Wed Jan 28 12:22:58 2009 +0900
    35.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.3 @@ -1,18 +0,0 @@
    35.4 -#ifndef GATEWAY
    35.5 -#define GATEWAY
    35.6 -
    35.7 -#include "32bitprotos.h"
    35.8 -
    35.9 -void test_gateway();
   35.10 -
   35.11 -/* extension for the EBDA */
   35.12 -typedef struct {
   35.13 -  Bit16u reg_ss;
   35.14 -  Bit16u reg_cs;
   35.15 -  Bit16u reg_ds;
   35.16 -  Bit16u reg_es;
   35.17 -  Bit16u esp_hi;
   35.18 -  Bit16u retaddr;
   35.19 -} upcall_t;
   35.20 -
   35.21 -#endif
    36.1 --- a/tools/firmware/rombios/32bitprotos.h	Wed Jan 28 12:22:58 2009 +0900
    36.2 +++ b/tools/firmware/rombios/32bitprotos.h	Wed Jan 28 13:06:45 2009 +0900
    36.3 @@ -1,47 +1,16 @@
    36.4 -#ifndef PROTOS_HIGHBIOS
    36.5 -#define PROTOS_HIGHBIOS
    36.6 -
    36.7 -/* shared include file for bcc and gcc */
    36.8 -
    36.9 -/* bcc does not like 'enum' */
   36.10 -#define IDX_TCGINTERRUPTHANDLER            0
   36.11 -#define IDX_TCPA_ACPI_INIT                 1
   36.12 -#define IDX_TCPA_EXTEND_ACPI_LOG           2
   36.13 -#define IDX_TCPA_CALLING_INT19H            3
   36.14 -#define IDX_TCPA_RETURNED_INT19H           4
   36.15 -#define IDX_TCPA_ADD_EVENT_SEPARATORS      5
   36.16 -#define IDX_TCPA_WAKE_EVENT                6
   36.17 -#define IDX_TCPA_ADD_BOOTDEVICE            7
   36.18 -#define IDX_TCPA_START_OPTION_ROM_SCAN     8
   36.19 -#define IDX_TCPA_OPTION_ROM                9
   36.20 -#define IDX_TCPA_IPL                       10
   36.21 -#define IDX_TCPA_INITIALIZE_TPM            11
   36.22 -#define IDX_TCPA_MEASURE_POST              12
   36.23 -#define IDX_GET_S3_WAKING_VECTOR           13
   36.24 -#define IDX_LAST                           14 /* keep last! */
   36.25 -
   36.26 -#ifdef GCC_PROTOS
   36.27 -  #define PARMS(x...) x
   36.28 -#else
   36.29 -  /* bcc doesn't want any parameter types in prototypes */
   36.30 -  #define PARMS(x...)
   36.31 -#endif
   36.32 -
   36.33 -Bit32u TCGInterruptHandler( PARMS(pushad_regs_t *regs, Bit32u esds, Bit32u flags_ptr));
   36.34 -
   36.35 -void tcpa_acpi_init( PARMS(void) );
   36.36 -Bit32u tcpa_extend_acpi_log( PARMS(Bit32u entry_ptr) );
   36.37 -void tcpa_calling_int19h( PARMS(void) );
   36.38 -void tcpa_returned_int19h( PARMS(void) );
   36.39 -void tcpa_add_event_separators( PARMS(void) );
   36.40 -void tcpa_wake_event( PARMS(void) );
   36.41 -void tcpa_add_bootdevice( PARMS(Bit32u bootcd, Bit32u bootdrv) );
   36.42 -void tcpa_start_option_rom_scan( PARMS(void) );
   36.43 -void tcpa_option_rom( PARMS(Bit32u seg) );
   36.44 -void tcpa_ipl( PARMS(Bit32u bootcd,Bit32u seg,Bit32u off,Bit32u count) );
   36.45 -void tcpa_measure_post( PARMS(Bit32u from, Bit32u to) );
   36.46 -Bit32u tcpa_initialize_tpm( PARMS(Bit32u physpres) );
   36.47 -
   36.48 -Bit32u get_s3_waking_vector( PARMS(void) );
   36.49 -
   36.50 -#endif
   36.51 +X(0,  Bit32u, TCGInterruptHandler,
   36.52 +  pushad_regs_t *regs, Bit32u esds, Bit32u flags_ptr)
   36.53 +X(1,  void,   tcpa_acpi_init, void)
   36.54 +X(2,  Bit32u, tcpa_extend_acpi_log, Bit32u entry_ptr)
   36.55 +X(3,  void,   tcpa_calling_int19h,void)
   36.56 +X(4,  void,   tcpa_returned_int19h, void)
   36.57 +X(5,  void,   tcpa_add_event_separators, void)
   36.58 +X(6,  void,   tcpa_wake_event, void)
   36.59 +X(7,  void,   tcpa_add_bootdevice, Bit32u bootcd, Bit32u bootdrv)
   36.60 +X(8,  void,   tcpa_start_option_rom_scan, void)
   36.61 +X(9,  void,   tcpa_option_rom, Bit32u seg)
   36.62 +X(10, void,   tcpa_ipl, Bit32u bootcd, Bit32u seg, Bit32u off, Bit32u count)
   36.63 +X(11, void,   tcpa_measure_post, Bit32u from, Bit32u to)
   36.64 +X(12, Bit32u, tcpa_initialize_tpm, Bit32u physpres)
   36.65 +X(13, Bit32u, get_s3_waking_vector, void)
   36.66 +X(14, Bit32u, pmm, void *argp)
    37.1 --- a/tools/firmware/rombios/Makefile	Wed Jan 28 12:22:58 2009 +0900
    37.2 +++ b/tools/firmware/rombios/Makefile	Wed Jan 28 13:06:45 2009 +0900
    37.3 @@ -13,6 +13,7 @@ clean: subdirs-clean
    37.4  	rm -f  as86-sym.txt ld86-sym.txt 
    37.5  	rm -f  rombios*.txt rombios*.sym usage biossums
    37.6  	rm -f  BIOS-bochs-*
    37.7 +	rm -f  $(DEPS)
    37.8  
    37.9  BIOS-bochs-latest: rombios.c biossums 32bitgateway.c tcgbios.c
   37.10  	gcc -DBX_SMP_PROCESSORS=1 -E -P $< > _rombios_.c
   37.11 @@ -27,3 +28,4 @@ BIOS-bochs-latest: rombios.c biossums 32
   37.12  biossums: biossums.c
   37.13  	gcc -o biossums biossums.c
   37.14  
   37.15 +-include $(DEPS)
    38.1 --- a/tools/firmware/rombios/rombios.c	Wed Jan 28 12:22:58 2009 +0900
    38.2 +++ b/tools/firmware/rombios/rombios.c	Wed Jan 28 13:06:45 2009 +0900
    38.3 @@ -161,6 +161,8 @@
    38.4  
    38.5  #define BX_TCGBIOS       0   /* main switch for TCG BIOS ext. */
    38.6  
    38.7 +#define BX_PMM           1   /* POST Memory Manager */
    38.8 +
    38.9  #define BX_MAX_ATA_INTERFACES   4
   38.10  #define BX_MAX_ATA_DEVICES      (BX_MAX_ATA_INTERFACES*2)
   38.11  
   38.12 @@ -726,7 +728,9 @@ typedef struct {
   38.13      } cdemu_t;
   38.14  #endif // BX_ELTORITO_BOOT
   38.15  
   38.16 -#include "32bitgateway.h"
   38.17 +#define X(idx, ret, fn, arg...) ret fn ();
   38.18 +#include "32bitprotos.h"
   38.19 +#undef X
   38.20  
   38.21    // for access to EBDA area
   38.22    //     The EBDA structure should conform to
   38.23 @@ -752,8 +756,6 @@ typedef struct {
   38.24      // El Torito Emulation data
   38.25      cdemu_t cdemu;
   38.26  #endif // BX_ELTORITO_BOOT
   38.27 -
   38.28 -    upcall_t upcall;
   38.29      } ebda_data_t;
   38.30  
   38.31    #define EBDA_CMOS_SHUTDOWN_STATUS_OFFSET 1
   38.32 @@ -1416,31 +1418,24 @@ fixup_base_mem_in_k()
   38.33    write_word(0x40, 0x13, base_mem >> 10);
   38.34  }
   38.35  
   38.36 -void
   38.37 -set_rom_write_access(action)
   38.38 -  Bit16u action;
   38.39 -{
   38.40 -    Bit16u off = (Bit16u)&((struct bios_info *)0)->xen_pfiob;
   38.41  ASM_START
   38.42 -    mov si,.set_rom_write_access.off[bp]
   38.43 +_rom_write_access_control:
   38.44      push ds
   38.45 -    mov ax,#(ACPI_PHYSICAL_ADDRESS >> 4)
   38.46 +    mov ax,#(BIOS_INFO_PHYSICAL_ADDRESS >> 4)
   38.47      mov ds,ax
   38.48 -    mov dx,[si]
   38.49 +    mov ax,[BIOSINFO_OFF_xen_pfiob]
   38.50      pop ds
   38.51 -    mov ax,.set_rom_write_access.action[bp]
   38.52 -    out dx,al
   38.53 +    ret
   38.54  ASM_END
   38.55 -}
   38.56  
   38.57  void enable_rom_write_access()
   38.58  {
   38.59 -    set_rom_write_access(0);
   38.60 +    outb(rom_write_access_control(), 0);
   38.61  }
   38.62  
   38.63  void disable_rom_write_access()
   38.64  {
   38.65 -    set_rom_write_access(PFFLAG_ROM_LOCK);
   38.66 +    outb(rom_write_access_control(), PFFLAG_ROM_LOCK);
   38.67  }
   38.68      
   38.69  #endif /* HVMASSIST */
   38.70 @@ -2054,7 +2049,10 @@ print_bios_banner()
   38.71    "rombios32 "
   38.72  #endif
   38.73  #if BX_TCGBIOS
   38.74 -  "TCG-enabled"
   38.75 +  "TCG-enabled "
   38.76 +#endif
   38.77 +#if BX_PMM
   38.78 +  "PMM "
   38.79  #endif
   38.80    "\n\n");
   38.81  }
   38.82 @@ -9499,8 +9497,9 @@ use16 386
   38.83  
   38.84  #endif
   38.85  
   38.86 +#include "32bitgateway.c"
   38.87  ASM_END
   38.88 -#include "32bitgateway.c"
   38.89 +#include "tcgbios.c"
   38.90  ASM_START
   38.91  
   38.92  ;--------------------
   38.93 @@ -10355,6 +10354,48 @@ rombios32_gdt:
   38.94    dw 0xffff, 0, 0x9300, 0x0000 ; 16 bit data segment base=0x0 limit=0xffff
   38.95  #endif // BX_ROMBIOS32
   38.96  
   38.97 +#if BX_PMM
   38.98 +; according to POST Memory Manager Specification Version 1.01
   38.99 +.align 16
  38.100 +pmm_structure:
  38.101 +  db 0x24,0x50,0x4d,0x4d ;; "$PMM" signature
  38.102 +  db 0x01 ;; revision
  38.103 +  db 16 ;; length
  38.104 +  db (-((pmm_entry_point>>8)+pmm_entry_point+0x20f))&0xff;; checksum
  38.105 +  dw pmm_entry_point,0xf000 ;; far call entrypoint
  38.106 +  db 0,0,0,0,0 ;; reserved
  38.107 +
  38.108 +pmm_entry_point:
  38.109 +  pushf
  38.110 +  pushad
  38.111 +; Calculate protected-mode address of PMM function args
  38.112 +  xor	eax, eax
  38.113 +  mov	ax, sp
  38.114 +  xor	ebx, ebx
  38.115 +  mov	bx, ss
  38.116 +  shl	ebx, 4
  38.117 +  lea	ebx, [eax+ebx+38] ;; ebx=(ss<<4)+sp+4(far call)+2(pushf)+32(pushad)
  38.118 +  push	ebx
  38.119 +;
  38.120 +; Stack layout at this point:
  38.121 +;
  38.122 +;        : +0x0    +0x2    +0x4    +0x6    +0x8    +0xa    +0xc    +0xe
  38.123 +; -----------------------------------------------------------------------
  38.124 +; sp     : [&arg1         ][edi           ][esi           ][ebp           ]
  38.125 +; sp+0x10: [esp           ][ebx           ][edx           ][ecx           ]
  38.126 +; sp+0x20: [eax           ][flags ][ip    ][cs    ][arg1  ][arg2, ...
  38.127 +;
  38.128 +  call _pmm
  38.129 +  mov	bx, sp
  38.130 +SEG SS
  38.131 +  mov	[bx+0x20], ax
  38.132 +SEG SS
  38.133 +  mov	[bx+0x18], dx
  38.134 +  pop	ebx
  38.135 +  popad
  38.136 +  popf
  38.137 +  retf
  38.138 +#endif // BX_PMM
  38.139  
  38.140  ; parallel port detection: base address in DX, index in BX, timeout in CL
  38.141  detect_parport:
  38.142 @@ -10447,7 +10488,9 @@ rom_scan:
  38.143    ;;   3         ROM initialization entry point (FAR CALL)
  38.144  
  38.145  #if BX_TCGBIOS
  38.146 +  push ax
  38.147    call _tcpa_start_option_rom_scan    /* specs: 3.2.3.3 + 10.4.3 */
  38.148 +  pop ax
  38.149  #endif
  38.150  
  38.151  rom_scan_loop:
  38.152 @@ -11790,15 +11833,6 @@ static Bit8u vgafont8[128*8]=
  38.153  #ifdef HVMASSIST
  38.154  ASM_START
  38.155  
  38.156 -// space for addresses in 32bit BIOS area; currently 256/4 entries
  38.157 -// are allocated
  38.158 -.org 0xcb00
  38.159 -jmptable:
  38.160 -db 0x5F, 0x5F, 0x5F, 0x4A, 0x4D, 0x50, 0x54 ;; ___JMPT
  38.161 -dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;;  64 bytes
  38.162 -dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 128 bytes
  38.163 -dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 192 bytes
  38.164 -
  38.165  //
  38.166  // MP Tables
  38.167  // just carve out some blank space for HVMLOADER to write the MP tables to
    39.1 --- a/tools/firmware/rombios/tcgbios.c	Wed Jan 28 12:22:58 2009 +0900
    39.2 +++ b/tools/firmware/rombios/tcgbios.c	Wed Jan 28 13:06:45 2009 +0900
    39.3 @@ -25,162 +25,6 @@
    39.4    Support for TCPA ACPI logging
    39.5   ******************************************************************/
    39.6  
    39.7 -/*
    39.8 - * Extend the ACPI log with the given entry by copying the
    39.9 - * entry data into the log.
   39.10 - * Input
   39.11 - *  Pointer to the structure to be copied into the log
   39.12 - *
   39.13 - * Output:
   39.14 - *  lower 16 bits of return code contain entry number
   39.15 - *  if entry number is '0', then upper 16 bits contain error code.
   39.16 - */
   39.17 -Bit32u tcpa_extend_acpi_log(entry_ptr)
   39.18 -    Bit32u entry_ptr;
   39.19 -{
   39.20 -	ASM_START
   39.21 -	DoUpcall(IDX_TCPA_EXTEND_ACPI_LOG)
   39.22 -	ASM_END
   39.23 -}
   39.24 -
   39.25 -
   39.26 -/*
   39.27 -   initialize the TCPA ACPI subsystem; find the ACPI tables and determine
   39.28 -   where the TCPA table is.
   39.29 - */
   39.30 - void
   39.31 -tcpa_acpi_init()
   39.32 -{
   39.33 -	ASM_START
   39.34 -	DoUpcall(IDX_TCPA_ACPI_INIT)
   39.35 -	ASM_END
   39.36 -}
   39.37 -
   39.38 -
   39.39 -/*
   39.40 - * Add measurement to log about call of int 19h
   39.41 - */
   39.42 - void
   39.43 -tcpa_calling_int19h()
   39.44 -{
   39.45 -	ASM_START
   39.46 -	DoUpcall(IDX_TCPA_CALLING_INT19H)
   39.47 -	ASM_END
   39.48 -}
   39.49 -
   39.50 -/*
   39.51 - * Add measurement to log about retuning from int 19h
   39.52 - */
   39.53 - void
   39.54 -tcpa_returned_int19h()
   39.55 -{
   39.56 -	ASM_START
   39.57 -	DoUpcall(IDX_TCPA_RETURNED_INT19H)
   39.58 -	ASM_END
   39.59 -}
   39.60 -
   39.61 -/*
   39.62 - * Add event separators for PCRs 0 to 7; specs 8.2.3
   39.63 - */
   39.64 - void
   39.65 -tcpa_add_event_separators()
   39.66 -{
   39.67 -	ASM_START
   39.68 -	DoUpcall(IDX_TCPA_ADD_EVENT_SEPARATORS)
   39.69 -	ASM_END
   39.70 -}
   39.71 -
   39.72 -
   39.73 -/*
   39.74 - * Add a wake event to the log
   39.75 - */
   39.76 - void
   39.77 -tcpa_wake_event()
   39.78 -{
   39.79 -	ASM_START
   39.80 -	DoUpcall(IDX_TCPA_WAKE_EVENT)
   39.81 -	ASM_END
   39.82 -}
   39.83 -
   39.84 -
   39.85 -/*
   39.86 - * Add measurement to the log about option rom scan
   39.87 - * 10.4.3 : action 14
   39.88 - */
   39.89 - void
   39.90 -tcpa_start_option_rom_scan()
   39.91 -{
   39.92 -	ASM_START
   39.93 -	DoUpcall(IDX_TCPA_START_OPTION_ROM_SCAN)
   39.94 -	ASM_END
   39.95 -}
   39.96 -
   39.97 -
   39.98 -/*
   39.99 - * Add measurement to the log about an option rom
  39.100 - */
  39.101 - void
  39.102 -tcpa_option_rom(seg)
  39.103 -    Bit32u seg;
  39.104 -{
  39.105 -	ASM_START
  39.106 -	DoUpcall(IDX_TCPA_OPTION_ROM)
  39.107 -	ASM_END
  39.108 -}
  39.109 -
  39.110 -/*
  39.111 - * Add a measurement regarding the boot device (CDRom, Floppy, HDD) to
  39.112 - * the list of measurements.
  39.113 - */
  39.114 -void
  39.115 - tcpa_add_bootdevice(bootcd, bootdrv)
  39.116 -  Bit32u bootcd;
  39.117 -  Bit32u bootdrv;
  39.118 -{
  39.119 -	ASM_START
  39.120 -	DoUpcall(IDX_TCPA_ADD_BOOTDEVICE)
  39.121 -	ASM_END
  39.122 -}
  39.123 -
  39.124 -/*
  39.125 - * Add a measurement to the log in support of 8.2.5.3
  39.126 - * Creates two log entries
  39.127 - *
  39.128 - * Input parameter:
  39.129 - *  seg    : segment where the IPL data are located
  39.130 - */
  39.131 - void
  39.132 - tcpa_ipl(bootcd,seg,off,count)
  39.133 -    Bit32u bootcd;
  39.134 -    Bit32u seg;
  39.135 -    Bit32u off;
  39.136 -    Bit32u count;
  39.137 -{
  39.138 -	ASM_START
  39.139 -	DoUpcall(IDX_TCPA_IPL)
  39.140 -	ASM_END
  39.141 -}
  39.142 -
  39.143 -
  39.144 -Bit32u
  39.145 -tcpa_initialize_tpm(physpres)
  39.146 -  Bit32u physpres;
  39.147 -{
  39.148 -	ASM_START
  39.149 -	DoUpcall(IDX_TCPA_INITIALIZE_TPM)
  39.150 -	ASM_END
  39.151 -}
  39.152 -
  39.153 -void
  39.154 -tcpa_measure_post(from, to)
  39.155 -   Bit32u from;
  39.156 -   Bit32u to;
  39.157 -{
  39.158 -	ASM_START
  39.159 -	DoUpcall(IDX_TCPA_MEASURE_POST)
  39.160 -	ASM_END
  39.161 -}
  39.162 -
  39.163  ASM_START
  39.164  MACRO POST_MEASURE
  39.165  	push word #0x000f
  39.166 @@ -208,18 +52,6 @@ tcpa_do_measure_POSTs()
  39.167  	ASM_END
  39.168  }
  39.169  
  39.170 -Bit32u
  39.171 -TCGInterruptHandler(regs_ptr, es, ds, flags_ptr)
  39.172 -   Bit32u regs_ptr;
  39.173 -   Bit16u es;
  39.174 -   Bit16u ds;
  39.175 -   Bit32u flags_ptr;
  39.176 -{
  39.177 -	ASM_START
  39.178 -	DoUpcall(IDX_TCGINTERRUPTHANDLER)
  39.179 -	ASM_END
  39.180 -}
  39.181 -
  39.182  /*
  39.183   * C-dispatcher for the TCG BIOS functions
  39.184   */
    40.1 --- a/tools/firmware/vgabios/vbe.c	Wed Jan 28 12:22:58 2009 +0900
    40.2 +++ b/tools/firmware/vgabios/vbe.c	Wed Jan 28 13:06:45 2009 +0900
    40.3 @@ -38,8 +38,6 @@
    40.4  #include "vbe.h"
    40.5  #include "vbetables.h"
    40.6  
    40.7 -#define VBE_TOTAL_VIDEO_MEMORY_DIV_64K (VBE_DISPI_TOTAL_VIDEO_MEMORY_MB*1024/64)
    40.8 -
    40.9  // The current OEM Software Revision of this VBE Bios
   40.10  #define VBE_OEM_SOFTWARE_REV 0x0002;
   40.11  
   40.12 @@ -821,7 +819,8 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
   40.13          vbe_info_block.VideoModePtr_Off= DI + 34;
   40.14  
   40.15          // VBE Total Memory (in 64b blocks)
   40.16 -        vbe_info_block.TotalMemory = VBE_TOTAL_VIDEO_MEMORY_DIV_64K;
   40.17 +        outw(VBE_DISPI_IOPORT_INDEX, VBE_DISPI_INDEX_VIDEO_MEMORY_64K);
   40.18 +        vbe_info_block.TotalMemory = inw(VBE_DISPI_IOPORT_DATA);
   40.19  
   40.20          if (vbe2_info)
   40.21  	{
   40.22 @@ -846,7 +845,8 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
   40.23          do
   40.24          {
   40.25                  if ((cur_info->info.XResolution <= dispi_get_max_xres()) &&
   40.26 -                    (cur_info->info.BitsPerPixel <= dispi_get_max_bpp())) {
   40.27 +                    (cur_info->info.BitsPerPixel <= dispi_get_max_bpp()) &&
   40.28 +                    (cur_info->info.XResolution * cur_info->info.XResolution * cur_info->info.BitsPerPixel <= vbe_info_block.TotalMemory << 19 )) {
   40.29  #ifdef DEBUG
   40.30                    printf("VBE found mode %x => %x\n", cur_info->mode,cur_mode);
   40.31  #endif
   40.32 @@ -855,7 +855,7 @@ Bit16u *AX;Bit16u ES;Bit16u DI;
   40.33                    cur_ptr+=2;
   40.34                  } else {
   40.35  #ifdef DEBUG
   40.36 -                  printf("VBE mode %x (xres=%x / bpp=%02x) not supported by display\n", cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel);
   40.37 +                  printf("VBE mode %x (xres=%x / bpp=%02x) not supported \n", cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel);
   40.38  #endif
   40.39                  }
   40.40                  cur_info++;
   40.41 @@ -913,7 +913,13 @@ Bit16u *AX;Bit16u CX; Bit16u ES;Bit16u D
   40.42                    info.WinFuncPtr = 0xC0000000UL;
   40.43                    *(Bit16u *)&(info.WinFuncPtr) = (Bit16u)(dispi_set_bank_farcall);
   40.44                  }
   40.45 -                
   40.46 +                outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_LFB_ADDRESS_H);
   40.47 +                info.PhysBasePtr = inw(VBE_DISPI_IOPORT_DATA);
   40.48 +                info.PhysBasePtr = info.PhysBasePtr << 16;
   40.49 +#if 0					
   40.50 +                outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_LFB_ADDRESS_L);
   40.51 +                info.PhysBasePtr |= inw(VBE_DISPI_IOPORT_DATA);
   40.52 +#endif 							
   40.53                  result = 0x4f;
   40.54          }
   40.55          else
    41.1 --- a/tools/firmware/vgabios/vbe.h	Wed Jan 28 12:22:58 2009 +0900
    41.2 +++ b/tools/firmware/vgabios/vbe.h	Wed Jan 28 13:06:45 2009 +0900
    41.3 @@ -275,39 +275,41 @@ typedef struct ModeInfoListItem
    41.4  //        like 0xE0000000
    41.5  
    41.6  
    41.7 -  #define VBE_DISPI_BANK_ADDRESS          0xA0000
    41.8 -  #define VBE_DISPI_BANK_SIZE_KB          64
    41.9 +  #define VBE_DISPI_BANK_ADDRESS           0xA0000
   41.10 +  #define VBE_DISPI_BANK_SIZE_KB           64
   41.11    
   41.12 -  #define VBE_DISPI_MAX_XRES              1024
   41.13 -  #define VBE_DISPI_MAX_YRES              768
   41.14 +  #define VBE_DISPI_MAX_XRES               2560
   41.15 +  #define VBE_DISPI_MAX_YRES               1600
   41.16    
   41.17 -  #define VBE_DISPI_IOPORT_INDEX          0x01CE
   41.18 -  #define VBE_DISPI_IOPORT_DATA           0x01CF
   41.19 +  #define VBE_DISPI_IOPORT_INDEX           0x01CE
   41.20 +  #define VBE_DISPI_IOPORT_DATA            0x01CF
   41.21    
   41.22 -  #define VBE_DISPI_INDEX_ID              0x0
   41.23 -  #define VBE_DISPI_INDEX_XRES            0x1
   41.24 -  #define VBE_DISPI_INDEX_YRES            0x2
   41.25 -  #define VBE_DISPI_INDEX_BPP             0x3
   41.26 -  #define VBE_DISPI_INDEX_ENABLE          0x4
   41.27 -  #define VBE_DISPI_INDEX_BANK            0x5
   41.28 -  #define VBE_DISPI_INDEX_VIRT_WIDTH      0x6
   41.29 -  #define VBE_DISPI_INDEX_VIRT_HEIGHT     0x7
   41.30 -  #define VBE_DISPI_INDEX_X_OFFSET        0x8
   41.31 -  #define VBE_DISPI_INDEX_Y_OFFSET        0x9
   41.32 -      
   41.33 -  #define VBE_DISPI_ID0                   0xB0C0
   41.34 -  #define VBE_DISPI_ID1                   0xB0C1
   41.35 -  #define VBE_DISPI_ID2                   0xB0C2
   41.36 -  #define VBE_DISPI_ID3                   0xB0C3
   41.37 -  #define VBE_DISPI_ID4                   0xB0C4
   41.38 -  
   41.39 -  #define VBE_DISPI_DISABLED              0x00
   41.40 -  #define VBE_DISPI_ENABLED               0x01
   41.41 -  #define VBE_DISPI_GETCAPS               0x02
   41.42 -  #define VBE_DISPI_8BIT_DAC              0x20
   41.43 -  #define VBE_DISPI_LFB_ENABLED           0x40
   41.44 -  #define VBE_DISPI_NOCLEARMEM            0x80
   41.45 -  
   41.46 -  #define VBE_DISPI_LFB_PHYSICAL_ADDRESS  0xE0000000
   41.47 +  #define VBE_DISPI_INDEX_ID               0x0
   41.48 +  #define VBE_DISPI_INDEX_XRES             0x1
   41.49 +  #define VBE_DISPI_INDEX_YRES             0x2
   41.50 +  #define VBE_DISPI_INDEX_BPP              0x3
   41.51 +  #define VBE_DISPI_INDEX_ENABLE           0x4
   41.52 +  #define VBE_DISPI_INDEX_BANK             0x5
   41.53 +  #define VBE_DISPI_INDEX_VIRT_WIDTH       0x6
   41.54 +  #define VBE_DISPI_INDEX_VIRT_HEIGHT      0x7
   41.55 +  #define VBE_DISPI_INDEX_X_OFFSET         0x8
   41.56 +  #define VBE_DISPI_INDEX_Y_OFFSET         0x9
   41.57 +  #define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa
   41.58 +  #define VBE_DISPI_INDEX_LFB_ADDRESS_H    0xb
   41.59 +  #define VBE_DISPI_INDEX_LFB_ADDRESS_L    0xc
   41.60 +
   41.61 +  #define VBE_DISPI_LFB_PHYSICAL_ADDRESS   0xE0000000
   41.62 +  #define VBE_DISPI_ID0                    0xB0C0
   41.63 +  #define VBE_DISPI_ID1                    0xB0C1
   41.64 +  #define VBE_DISPI_ID2                    0xB0C2
   41.65 +  #define VBE_DISPI_ID3                    0xB0C3
   41.66 +  #define VBE_DISPI_ID4                    0xB0C4
   41.67 +
   41.68 +  #define VBE_DISPI_DISABLED               0x00
   41.69 +  #define VBE_DISPI_ENABLED                0x01
   41.70 +  #define VBE_DISPI_GETCAPS                0x02
   41.71 +  #define VBE_DISPI_8BIT_DAC               0x20
   41.72 +  #define VBE_DISPI_LFB_ENABLED            0x40
   41.73 +  #define VBE_DISPI_NOCLEARMEM             0x80
   41.74  
   41.75  #endif
    42.1 --- a/tools/firmware/vgabios/vbetables-gen.c	Wed Jan 28 12:22:58 2009 +0900
    42.2 +++ b/tools/firmware/vgabios/vbetables-gen.c	Wed Jan 28 13:06:45 2009 +0900
    42.3 @@ -2,7 +2,7 @@
    42.4  #include <stdlib.h>
    42.5  #include <stdio.h>
    42.6  
    42.7 -#define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 8
    42.8 +#define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 16
    42.9  
   42.10  typedef struct {
   42.11      int width;
   42.12 @@ -42,19 +42,40 @@ ModeInfo modes[] = {
   42.13  { 1600, 1200, 24                      , 0x11F},
   42.14  
   42.15        /* BOCHS/PLE, 86 'own' mode numbers */
   42.16 -{ 320, 200, 32                        , 0x140},
   42.17 -{ 640, 400, 32                        , 0x141},
   42.18 -{ 640, 480, 32                        , 0x142},
   42.19 -{ 800, 600, 32                        , 0x143},
   42.20 -{ 1024, 768, 32                       , 0x144},
   42.21 -{ 1280, 1024, 32                      , 0x145},
   42.22 -{ 320, 200, 8                           , 0x146},
   42.23 -{ 1600, 1200, 32                      , 0x147},
   42.24 -{ 1152, 864, 8                      , 0x148},
   42.25 +{ 320, 200, 32                       , 0x140},
   42.26 +{ 640, 400, 32                       , 0x141},
   42.27 +{ 640, 480, 32                       , 0x142},
   42.28 +{ 800, 600, 32                       , 0x143},
   42.29 +{ 1024, 768, 32                      , 0x144},
   42.30 +{ 1280, 1024, 32                     , 0x145},
   42.31 +{ 320, 200, 8                        , 0x146},
   42.32 +{ 1600, 1200, 32                     , 0x147},
   42.33 +{ 1152, 864, 8                       , 0x148},
   42.34  { 1152, 864, 15                      , 0x149},
   42.35  { 1152, 864, 16                      , 0x14a},
   42.36  { 1152, 864, 24                      , 0x14b},
   42.37  { 1152, 864, 32                      , 0x14c},
   42.38 +{ 1280, 800, 16                      , 0x178},
   42.39 +{ 1280, 800, 24                      , 0x179},
   42.40 +{ 1280, 800, 32                      , 0x17a},
   42.41 +{ 1280, 960, 16                      , 0x17b},
   42.42 +{ 1280, 960, 24                      , 0x17c},
   42.43 +{ 1280, 960, 32                      , 0x17d},
   42.44 +{ 1440, 900, 16                      , 0x17e},
   42.45 +{ 1440, 900, 24                      , 0x17f},
   42.46 +{ 1440, 900, 32                      , 0x180},
   42.47 +{ 1400, 1050, 16                     , 0x181},
   42.48 +{ 1400, 1050, 24                     , 0x182},
   42.49 +{ 1400, 1050, 32                     , 0x183},
   42.50 +{ 1680, 1050, 16                     , 0x184},
   42.51 +{ 1680, 1050, 24                     , 0x185},
   42.52 +{ 1680, 1050, 32                     , 0x186},
   42.53 +{ 1920, 1200, 16                     , 0x187},
   42.54 +{ 1920, 1200, 24                     , 0x188},
   42.55 +{ 1920, 1200, 32                     , 0x189},
   42.56 +{ 2560, 1600, 16                     , 0x18a},
   42.57 +{ 2560, 1600, 24                     , 0x18b},
   42.58 +{ 2560, 1600, 32                     , 0x18c},
   42.59  { 0, },
   42.60  };
   42.61  
    43.1 --- a/tools/firmware/vgabios/vgabios.c	Wed Jan 28 12:22:58 2009 +0900
    43.2 +++ b/tools/firmware/vgabios/vgabios.c	Wed Jan 28 13:06:45 2009 +0900
    43.3 @@ -3811,9 +3811,9 @@ void printf(s)
    43.4          for (i=0; i<format_width; i++) {
    43.5            nibble = (arg >> (4 * digit)) & 0x000f;
    43.6            if (nibble <= 9)
    43.7 -            outb(0x0500, nibble + '0');
    43.8 +            outb(0xe9, nibble + '0');
    43.9            else
   43.10 -            outb(0x0500, (nibble - 10) + 'A');
   43.11 +            outb(0xe9, (nibble - 10) + 'A');
   43.12            digit--;
   43.13            }
   43.14          in_format = 0;
   43.15 @@ -3823,7 +3823,7 @@ void printf(s)
   43.16        //  }
   43.17        }
   43.18      else {
   43.19 -      outb(0x0500, c);
   43.20 +      outb(0xe9, c);
   43.21        }
   43.22      s ++;
   43.23      }
    44.1 --- a/tools/flask/libflask/Makefile	Wed Jan 28 12:22:58 2009 +0900
    44.2 +++ b/tools/flask/libflask/Makefile	Wed Jan 28 13:06:45 2009 +0900
    44.3 @@ -16,7 +16,6 @@ CFLAGS   += $(INCLUDES) -I./include -I$(
    44.4  # Get gcc to generate the dependencies for us.
    44.5  CFLAGS   += -Wp,-MD,.$(@F).d
    44.6  LDFLAGS  += -L.
    44.7 -DEPS     = .*.d
    44.8  
    44.9  LIB_OBJS := $(patsubst %.c,%.o,$(SRCS))
   44.10  PIC_OBJS := $(patsubst %.c,%.opic,$(SRCS))
    45.1 --- a/tools/flask/loadpolicy/Makefile	Wed Jan 28 12:22:58 2009 +0900
    45.2 +++ b/tools/flask/loadpolicy/Makefile	Wed Jan 28 13:06:45 2009 +0900
    45.3 @@ -7,9 +7,6 @@ LIBFLASK_ROOT = $(XEN_ROOT)/tools/flask/
    45.4  
    45.5  PROFILE=#-pg
    45.6  BASECFLAGS=-Wall -g -Werror
    45.7 -# Make gcc generate dependencies.
    45.8 -BASECFLAGS += -Wp,-MD,.$(@F).d
    45.9 -PROG_DEP = .*.d
   45.10  BASECFLAGS+= $(PROFILE)
   45.11  #BASECFLAGS+= -I$(XEN_ROOT)/tools
   45.12  BASECFLAGS+= $(CFLAGS_libxenctrl)
   45.13 @@ -39,7 +36,7 @@ all: $(CLIENTS)
   45.14  clean: 
   45.15  	rm -f *.o *.opic *.so
   45.16  	rm -f $(CLIENTS)
   45.17 -	$(RM) $(PROG_DEP)
   45.18 +	$(RM) $(DEPS)
   45.19  
   45.20  .PHONY: print-dir
   45.21  print-dir:
   45.22 @@ -54,7 +51,7 @@ install: all
   45.23  	$(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
   45.24  	$(INSTALL_PROG) $(CLIENTS) $(DESTDIR)$(SBINDIR)
   45.25  
   45.26 --include $(PROG_DEP)
   45.27 +-include $(DEPS)
   45.28  
   45.29  # never delete any intermediate files.
   45.30  .SECONDARY:
    46.1 --- a/tools/fs-back/Makefile	Wed Jan 28 12:22:58 2009 +0900
    46.2 +++ b/tools/fs-back/Makefile	Wed Jan 28 13:06:45 2009 +0900
    46.3 @@ -13,10 +13,6 @@ CFLAGS   += $(CFLAGS_libxenstore)
    46.4  CFLAGS   += $(INCLUDES) -I.
    46.5  CFLAGS   += -D_GNU_SOURCE
    46.6  
    46.7 -# Get gcc to generate the dependencies for us.
    46.8 -CFLAGS   += -Wp,-MD,.$(@F).d
    46.9 -DEPS      = .*.d
   46.10 -
   46.11  LIBS      := -L. -L.. -L../lib
   46.12  LIBS      += $(LDFLAGS_libxenctrl)
   46.13  LIBS      += $(LDFLAGS_libxenstore)
    47.1 --- a/tools/include/Makefile	Wed Jan 28 12:22:58 2009 +0900
    47.2 +++ b/tools/include/Makefile	Wed Jan 28 13:06:45 2009 +0900
    47.3 @@ -10,11 +10,12 @@ xen-foreign:
    47.4  
    47.5  xen/.dir:
    47.6  	@rm -rf xen
    47.7 -	mkdir xen
    47.8 +	mkdir -p xen/libelf
    47.9  	ln -sf ../$(XEN_ROOT)/xen/include/public/COPYING xen
   47.10  	ln -sf $(addprefix ../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) xen
   47.11  	ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) xen
   47.12  	ln -sf ../xen-sys/$(XEN_OS) xen/sys
   47.13 +	ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/xen/,libelf.h elfstructs.h) xen/libelf/
   47.14  	ln -s ../xen-foreign xen/foreign
   47.15  	touch $@
   47.16  
    48.1 --- a/tools/include/xen-foreign/reference.size	Wed Jan 28 12:22:58 2009 +0900
    48.2 +++ b/tools/include/xen-foreign/reference.size	Wed Jan 28 13:06:45 2009 +0900
    48.3 @@ -1,7 +1,7 @@
    48.4  
    48.5  structs                   |  x86_32  x86_64    ia64
    48.6  
    48.7 -start_info                |    1104    1152    1152
    48.8 +start_info                |    1112    1168    1168
    48.9  trap_info                 |       8      16       -
   48.10  pt_fpreg                  |       -       -      16
   48.11  cpu_user_regs             |      68     200       -
    49.1 --- a/tools/libaio/src/Makefile	Wed Jan 28 12:22:58 2009 +0900
    49.2 +++ b/tools/libaio/src/Makefile	Wed Jan 28 13:06:45 2009 +0900
    49.3 @@ -6,7 +6,7 @@ includedir=$(prefix)/include
    49.4  libdir=$(prefix)/lib
    49.5  
    49.6  ARCH := $(shell uname -m | sed -e s/i.86/i386/)
    49.7 -CFLAGS := -nostdlib -nostartfiles -Wall -I. -g -fomit-frame-pointer -O2 -fPIC
    49.8 +CFLAGS = -nostdlib -nostartfiles -Wall -I. -g -fomit-frame-pointer -O2 -fPIC
    49.9  SO_CFLAGS=-shared $(CFLAGS)
   49.10  L_CFLAGS=$(CFLAGS)
   49.11  LINK_FLAGS=
    50.1 --- a/tools/libfsimage/Rules.mk	Wed Jan 28 12:22:58 2009 +0900
    50.2 +++ b/tools/libfsimage/Rules.mk	Wed Jan 28 13:06:45 2009 +0900
    50.3 @@ -1,8 +1,6 @@
    50.4  include $(XEN_ROOT)/tools/Rules.mk
    50.5  
    50.6 -DEPS = .*.d
    50.7 -
    50.8 -CFLAGS += -I$(XEN_ROOT)/tools/libfsimage/common/ -Werror -Wp,-MD,.$(@F).d
    50.9 +CFLAGS += -I$(XEN_ROOT)/tools/libfsimage/common/ -Werror
   50.10  LDFLAGS += -L../common/
   50.11  
   50.12  PIC_OBJS := $(patsubst %.c,%.opic,$(LIB_SRCS-y))
    51.1 --- a/tools/libfsimage/common/Makefile	Wed Jan 28 12:22:58 2009 +0900
    51.2 +++ b/tools/libfsimage/common/Makefile	Wed Jan 28 13:06:45 2009 +0900
    51.3 @@ -4,9 +4,6 @@ include $(XEN_ROOT)/tools/Rules.mk
    51.4  MAJOR = 1.0
    51.5  MINOR = 0
    51.6  
    51.7 -CFLAGS += -Werror -Wp,-MD,.$(@F).d
    51.8 -DEPS = .*.d
    51.9 -
   51.10  LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS
   51.11  LDFLAGS-$(CONFIG_Linux) = -Wl,mapfile-GNU
   51.12  LDFLAGS = $(LDFLAGS-y)
    52.1 --- a/tools/libxc/Makefile	Wed Jan 28 12:22:58 2009 +0900
    52.2 +++ b/tools/libxc/Makefile	Wed Jan 28 13:06:45 2009 +0900
    52.3 @@ -1,7 +1,7 @@
    52.4  XEN_ROOT = ../..
    52.5  include $(XEN_ROOT)/tools/Rules.mk
    52.6  
    52.7 -MAJOR    = 3.2
    52.8 +MAJOR    = 3.4
    52.9  MINOR    = 0
   52.10  
   52.11  CTRL_SRCS-y       :=
   52.12 @@ -62,10 +62,7 @@ CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE
   52.13  # libraries.
   52.14  #CFLAGS   += -DVALGRIND -O0 -ggdb3
   52.15  
   52.16 -# Get gcc to generate the dependencies for us.
   52.17 -CFLAGS   += -Wp,-MD,.$(@F).d
   52.18  LDFLAGS  += -L.
   52.19 -DEPS     = .*.d
   52.20  
   52.21  CTRL_LIB_OBJS := $(patsubst %.c,%.o,$(CTRL_SRCS-y))
   52.22  CTRL_PIC_OBJS := $(patsubst %.c,%.opic,$(CTRL_SRCS-y))
    53.1 --- a/tools/libxc/xc_core.c	Wed Jan 28 12:22:58 2009 +0900
    53.2 +++ b/tools/libxc/xc_core.c	Wed Jan 28 13:06:45 2009 +0900
    53.3 @@ -58,9 +58,6 @@
    53.4  /* number of pages to write at a time */
    53.5  #define DUMP_INCREMENT (4 * 1024)
    53.6  
    53.7 -/* Don't yet support cross-address-size core dump */
    53.8 -#define guest_width (sizeof (unsigned long))
    53.9 -
   53.10  /* string table */
   53.11  struct xc_core_strtab {
   53.12      char       *strings;
   53.13 @@ -240,7 +237,7 @@ xc_core_ehdr_init(Elf64_Ehdr *ehdr)
   53.14      ehdr->e_ident[EI_ABIVERSION] = EV_CURRENT;
   53.15  
   53.16      ehdr->e_type = ET_CORE;
   53.17 -    ehdr->e_machine = ELF_ARCH_MACHINE;
   53.18 +    /* e_machine will be filled in later */
   53.19      ehdr->e_version = EV_CURRENT;
   53.20      ehdr->e_entry = 0;
   53.21      ehdr->e_phoff = 0;
   53.22 @@ -359,7 +356,8 @@ elfnote_dump_core_header(
   53.23  }
   53.24  
   53.25  static int
   53.26 -elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle)
   53.27 +elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle,
   53.28 +                         unsigned int guest_width)
   53.29  {
   53.30      int sts;
   53.31      struct elfnote elfnote;
   53.32 @@ -371,6 +369,12 @@ elfnote_dump_xen_version(void *args, dum
   53.33      elfnote.descsz = sizeof(xen_version);
   53.34      elfnote.type = XEN_ELFNOTE_DUMPCORE_XEN_VERSION;
   53.35      elfnote_fill_xen_version(xc_handle, &xen_version);
   53.36 +    if (guest_width < sizeof(unsigned long))
   53.37 +    {
   53.38 +        // 32 bit elf file format differs in pagesize's alignment
   53.39 +        char *p = (char *)&xen_version.pagesize;
   53.40 +        memmove(p - 4, p, sizeof(xen_version.pagesize));
   53.41 +    }
   53.42      sts = dump_rtn(args, (char*)&elfnote, sizeof(elfnote));
   53.43      if ( sts != 0 )
   53.44          return sts;
   53.45 @@ -396,6 +400,24 @@ elfnote_dump_format_version(void *args, 
   53.46      return dump_rtn(args, (char*)&format_version, sizeof(format_version));
   53.47  }
   53.48  
   53.49 +static int
   53.50 +get_guest_width(int xc_handle,
   53.51 +                uint32_t domid,
   53.52 +                unsigned int *guest_width)
   53.53 +{
   53.54 +    DECLARE_DOMCTL;
   53.55 +
   53.56 +    memset(&domctl, 0, sizeof(domctl));
   53.57 +    domctl.domain = domid;
   53.58 +    domctl.cmd = XEN_DOMCTL_get_address_size;
   53.59 +
   53.60 +    if ( do_domctl(xc_handle, &domctl) != 0 )
   53.61 +        return 1;
   53.62 +        
   53.63 +    *guest_width = domctl.u.address_size.size / 8;
   53.64 +    return 0;
   53.65 +}
   53.66 +
   53.67  int
   53.68  xc_domain_dumpcore_via_callback(int xc_handle,
   53.69                                  uint32_t domid,
   53.70 @@ -403,7 +425,8 @@ xc_domain_dumpcore_via_callback(int xc_h
   53.71                                  dumpcore_rtn_t dump_rtn)
   53.72  {
   53.73      xc_dominfo_t info;
   53.74 -    shared_info_t *live_shinfo = NULL;
   53.75 +    shared_info_any_t *live_shinfo = NULL;
   53.76 +    unsigned int guest_width; 
   53.77  
   53.78      int nr_vcpus = 0;
   53.79      char *dump_mem, *dump_mem_start = NULL;
   53.80 @@ -437,6 +460,12 @@ xc_domain_dumpcore_via_callback(int xc_h
   53.81      uint16_t strtab_idx;
   53.82      struct xc_core_section_headers *sheaders = NULL;
   53.83      Elf64_Shdr *shdr;
   53.84 + 
   53.85 +    if ( get_guest_width(xc_handle, domid, &guest_width) != 0 )
   53.86 +    {
   53.87 +        PERROR("Could not get address size for domain");
   53.88 +        return sts;
   53.89 +    }
   53.90  
   53.91      xc_core_arch_context_init(&arch_ctxt);
   53.92      if ( (dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL )
   53.93 @@ -500,7 +529,7 @@ xc_domain_dumpcore_via_callback(int xc_h
   53.94              goto out;
   53.95          }
   53.96  
   53.97 -        sts = xc_core_arch_map_p2m(xc_handle, &info, live_shinfo,
   53.98 +        sts = xc_core_arch_map_p2m(xc_handle, guest_width, &info, live_shinfo,
   53.99                                     &p2m, &p2m_size);
  53.100          if ( sts != 0 )
  53.101              goto out;
  53.102 @@ -676,6 +705,7 @@ xc_domain_dumpcore_via_callback(int xc_h
  53.103      /* write out elf header */
  53.104      ehdr.e_shnum = sheaders->num;
  53.105      ehdr.e_shstrndx = strtab_idx;
  53.106 +    ehdr.e_machine = ELF_ARCH_MACHINE;
  53.107      sts = dump_rtn(args, (char*)&ehdr, sizeof(ehdr));
  53.108      if ( sts != 0 )
  53.109          goto out;
  53.110 @@ -697,7 +727,7 @@ xc_domain_dumpcore_via_callback(int xc_h
  53.111          goto out;
  53.112  
  53.113      /* elf note section: xen version */
  53.114 -    sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle);
  53.115 +    sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle, guest_width);
  53.116      if ( sts != 0 )
  53.117          goto out;
  53.118  
  53.119 @@ -757,9 +787,21 @@ xc_domain_dumpcore_via_callback(int xc_h
  53.120  
  53.121              if ( !auto_translated_physmap )
  53.122              {
  53.123 -                gmfn = p2m[i];
  53.124 -                if ( gmfn == INVALID_P2M_ENTRY )
  53.125 -                    continue;
  53.126 +                if ( guest_width >= sizeof(unsigned long) )
  53.127 +                {
  53.128 +                    if ( guest_width == sizeof(unsigned long) )
  53.129 +                        gmfn = p2m[i];
  53.130 +                    else
  53.131 +                        gmfn = ((uint64_t *)p2m)[i];
  53.132 +                    if ( gmfn == INVALID_P2M_ENTRY )
  53.133 +                        continue;
  53.134 +                }
  53.135 +                else
  53.136 +                {
  53.137 +                    gmfn = ((uint32_t *)p2m)[i];
  53.138 +                    if ( gmfn == (uint32_t)INVALID_P2M_ENTRY )
  53.139 +                       continue;
  53.140 +                }
  53.141  
  53.142                  p2m_array[j].pfn = i;
  53.143                  p2m_array[j].gmfn = gmfn;
  53.144 @@ -802,7 +844,7 @@ copy_done:
  53.145          /* When live dump-mode (-L option) is specified,
  53.146           * guest domain may reduce memory. pad with zero pages.
  53.147           */
  53.148 -        IPRINTF("j (%ld) != nr_pages (%ld)", j , nr_pages);
  53.149 +        IPRINTF("j (%ld) != nr_pages (%ld)", j, nr_pages);
  53.150          memset(dump_mem_start, 0, PAGE_SIZE);
  53.151          for (; j < nr_pages; j++) {
  53.152              sts = dump_rtn(args, dump_mem_start, PAGE_SIZE);
  53.153 @@ -891,7 +933,7 @@ xc_domain_dumpcore(int xc_handle,
  53.154      struct dump_args da;
  53.155      int sts;
  53.156  
  53.157 -    if ( (da.fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0 )
  53.158 +    if ( (da.fd = open(corename, O_CREAT|O_RDWR|O_TRUNC, S_IWUSR|S_IRUSR)) < 0 )
  53.159      {
  53.160          PERROR("Could not open corefile %s", corename);
  53.161          return -errno;
    54.1 --- a/tools/libxc/xc_core.h	Wed Jan 28 12:22:58 2009 +0900
    54.2 +++ b/tools/libxc/xc_core.h	Wed Jan 28 13:06:45 2009 +0900
    54.3 @@ -23,7 +23,7 @@
    54.4  
    54.5  #include "xen/version.h"
    54.6  #include "xg_private.h"
    54.7 -#include "xen/elfstructs.h"
    54.8 +#include "xen/libelf/elfstructs.h"
    54.9  
   54.10  /* section names */
   54.11  #define XEN_DUMPCORE_SEC_NOTE                   ".note.Xen"
   54.12 @@ -136,12 +136,12 @@ int xc_core_arch_auto_translated_physmap
   54.13  struct xc_core_arch_context;
   54.14  int xc_core_arch_memory_map_get(int xc_handle,
   54.15                                  struct xc_core_arch_context *arch_ctxt,
   54.16 -                                xc_dominfo_t *info, shared_info_t *live_shinfo,
   54.17 +                                xc_dominfo_t *info, shared_info_any_t *live_shinfo,
   54.18                                  xc_core_memory_map_t **mapp,
   54.19                                  unsigned int *nr_entries);
   54.20 -int xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
   54.21 -                         shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
   54.22 -                         unsigned long *pfnp);
   54.23 +int xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width,
   54.24 +                         xc_dominfo_t *info, shared_info_any_t *live_shinfo,
   54.25 +                         xen_pfn_t **live_p2m, unsigned long *pfnp);
   54.26  
   54.27  
   54.28  #if defined (__i386__) || defined (__x86_64__)
    55.1 --- a/tools/libxc/xc_core_ia64.c	Wed Jan 28 12:22:58 2009 +0900
    55.2 +++ b/tools/libxc/xc_core_ia64.c	Wed Jan 28 13:06:45 2009 +0900
    55.3 @@ -68,7 +68,7 @@ xc_core_arch_auto_translated_physmap(con
    55.4  /* see setup_guest() @ xc_linux_build.c */
    55.5  static int
    55.6  memory_map_get_old_domu(int xc_handle, xc_dominfo_t *info,
    55.7 -                        shared_info_t *live_shinfo,
    55.8 +                        shared_info_any_t *live_shinfo,
    55.9                          xc_core_memory_map_t **mapp, unsigned int *nr_entries)
   55.10  {
   55.11      xc_core_memory_map_t *map = NULL;
   55.12 @@ -96,7 +96,7 @@ out:
   55.13  /* see setup_guest() @ xc_ia64_hvm_build.c */
   55.14  static int
   55.15  memory_map_get_old_hvm(int xc_handle, xc_dominfo_t *info, 
   55.16 -                       shared_info_t *live_shinfo,
   55.17 +                       shared_info_any_t *live_shinfo,
   55.18                         xc_core_memory_map_t **mapp, unsigned int *nr_entries)
   55.19  {
   55.20      const xc_core_memory_map_t gfw_map[] = {
   55.21 @@ -155,7 +155,7 @@ out:
   55.22  
   55.23  static int
   55.24  memory_map_get_old(int xc_handle, xc_dominfo_t *info, 
   55.25 -                   shared_info_t *live_shinfo,
   55.26 +                   shared_info_any_t *live_shinfo,
   55.27                     xc_core_memory_map_t **mapp, unsigned int *nr_entries)
   55.28  {
   55.29      if ( info->hvm )
   55.30 @@ -170,7 +170,8 @@ memory_map_get_old(int xc_handle, xc_dom
   55.31  int
   55.32  xc_core_arch_memory_map_get(int xc_handle,
   55.33                              struct xc_core_arch_context *arch_ctxt,
   55.34 -                            xc_dominfo_t *info, shared_info_t *live_shinfo,
   55.35 +                            xc_dominfo_t *info,
   55.36 +                            shared_info_any_t *live_shinfo,
   55.37                              xc_core_memory_map_t **mapp,
   55.38                              unsigned int *nr_entries)
   55.39  {
   55.40 @@ -190,8 +191,8 @@ xc_core_arch_memory_map_get(int xc_handl
   55.41      }
   55.42  
   55.43      /* copy before use in case someone updating them */
   55.44 -    if (xc_ia64_copy_memmap(xc_handle, info->domid, live_shinfo, &memmap_info,
   55.45 -                            NULL)) {
   55.46 +    if (xc_ia64_copy_memmap(xc_handle, info->domid, &live_shinfo->s,
   55.47 +                            &memmap_info, NULL)) {
   55.48          goto old;
   55.49      }
   55.50  
   55.51 @@ -235,8 +236,8 @@ old:
   55.52  }
   55.53  
   55.54  int
   55.55 -xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
   55.56 -                     shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
   55.57 +xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t *info,
   55.58 +                     shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m,
   55.59                       unsigned long *pfnp)
   55.60  {
   55.61      /*
    56.1 --- a/tools/libxc/xc_core_x86.c	Wed Jan 28 12:22:58 2009 +0900
    56.2 +++ b/tools/libxc/xc_core_x86.c	Wed Jan 28 13:06:45 2009 +0900
    56.3 @@ -20,9 +20,25 @@
    56.4  
    56.5  #include "xg_private.h"
    56.6  #include "xc_core.h"
    56.7 +#include "xc_e820.h"
    56.8  
    56.9 -/* Don't yet support cross-address-size core dump */
   56.10 -#define guest_width (sizeof (unsigned long))
   56.11 +#define GET_FIELD(_p, _f) ((guest_width==8) ? ((_p)->x64._f) : ((_p)->x32._f))
   56.12 +
   56.13 +#ifndef MAX
   56.14 +#define MAX(_a, _b) ((_a) >= (_b) ? (_a) : (_b))
   56.15 +#endif
   56.16 +
   56.17 +int
   56.18 +xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt,
   56.19 +                              unsigned long pfn)
   56.20 +{
   56.21 +    if ((pfn >= 0xa0 && pfn < 0xc0) /* VGA hole */
   56.22 +        || (pfn >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT)
   56.23 +            && pfn < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */
   56.24 +        return 0;
   56.25 +    return 1;
   56.26 +}
   56.27 +
   56.28  
   56.29  static int nr_gpfns(int xc_handle, domid_t domid)
   56.30  {
   56.31 @@ -37,7 +53,7 @@ xc_core_arch_auto_translated_physmap(con
   56.32  
   56.33  int
   56.34  xc_core_arch_memory_map_get(int xc_handle, struct xc_core_arch_context *unused,
   56.35 -                            xc_dominfo_t *info, shared_info_t *live_shinfo,
   56.36 +                            xc_dominfo_t *info, shared_info_any_t *live_shinfo,
   56.37                              xc_core_memory_map_t **mapp,
   56.38                              unsigned int *nr_entries)
   56.39  {
   56.40 @@ -60,17 +76,22 @@ xc_core_arch_memory_map_get(int xc_handl
   56.41  }
   56.42  
   56.43  int
   56.44 -xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
   56.45 -                     shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
   56.46 +xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t *info,
   56.47 +                     shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m,
   56.48                       unsigned long *pfnp)
   56.49  {
   56.50      /* Double and single indirect references to the live P2M table */
   56.51      xen_pfn_t *live_p2m_frame_list_list = NULL;
   56.52      xen_pfn_t *live_p2m_frame_list = NULL;
   56.53 +    /* Copies of the above. */
   56.54 +    xen_pfn_t *p2m_frame_list_list = NULL;
   56.55 +    xen_pfn_t *p2m_frame_list = NULL;
   56.56 +
   56.57      uint32_t dom = info->domid;
   56.58      unsigned long p2m_size = nr_gpfns(xc_handle, info->domid);
   56.59      int ret = -1;
   56.60      int err;
   56.61 +    int i;
   56.62  
   56.63      if ( p2m_size < info->nr_pages  )
   56.64      {
   56.65 @@ -80,7 +101,7 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
   56.66  
   56.67      live_p2m_frame_list_list =
   56.68          xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
   56.69 -                             live_shinfo->arch.pfn_to_mfn_frame_list_list);
   56.70 +                             GET_FIELD(live_shinfo, arch.pfn_to_mfn_frame_list_list));
   56.71  
   56.72      if ( !live_p2m_frame_list_list )
   56.73      {
   56.74 @@ -88,9 +109,28 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
   56.75          goto out;
   56.76      }
   56.77  
   56.78 +    /* Get a local copy of the live_P2M_frame_list_list */
   56.79 +    if ( !(p2m_frame_list_list = malloc(PAGE_SIZE)) )
   56.80 +    {
   56.81 +        ERROR("Couldn't allocate p2m_frame_list_list array");
   56.82 +        goto out;
   56.83 +    }
   56.84 +    memcpy(p2m_frame_list_list, live_p2m_frame_list_list, PAGE_SIZE);
   56.85 +
   56.86 +    /* Canonicalize guest's unsigned long vs ours */
   56.87 +    if ( guest_width > sizeof(unsigned long) )
   56.88 +        for ( i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++ )
   56.89 +            if ( i < PAGE_SIZE/guest_width )
   56.90 +                p2m_frame_list_list[i] = ((uint64_t *)p2m_frame_list_list)[i];
   56.91 +            else
   56.92 +                p2m_frame_list_list[i] = 0;
   56.93 +    else if ( guest_width < sizeof(unsigned long) )
   56.94 +        for ( i = PAGE_SIZE/sizeof(unsigned long) - 1; i >= 0; i-- )
   56.95 +            p2m_frame_list_list[i] = ((uint32_t *)p2m_frame_list_list)[i];
   56.96 +
   56.97      live_p2m_frame_list =
   56.98          xc_map_foreign_pages(xc_handle, dom, PROT_READ,
   56.99 -                             live_p2m_frame_list_list,
  56.100 +                             p2m_frame_list_list,
  56.101                               P2M_FLL_ENTRIES);
  56.102  
  56.103      if ( !live_p2m_frame_list )
  56.104 @@ -99,8 +139,25 @@ xc_core_arch_map_p2m(int xc_handle, xc_d
  56.105          goto out;
  56.106      }
  56.107  
  56.108 +    /* Get a local copy of the live_P2M_frame_list */
  56.109 +    if ( !(p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) )
  56.110 +    {
  56.111 +        ERROR("Couldn't allocate p2m_frame_list array");
  56.112 +        goto out;
  56.113 +    }
  56.114 +    memset(p2m_frame_list, 0, P2M_TOOLS_FL_SIZE);
  56.115 +    memcpy(p2m_frame_list, live_p2m_frame_list, P2M_GUEST_FL_SIZE);
  56.116 +
  56.117 +    /* Canonicalize guest's unsigned long vs ours */
  56.118 +    if ( guest_width > sizeof(unsigned long) )
  56.119 +        for ( i = 0; i < P2M_FL_ENTRIES; i++ )
  56.120 +            p2m_frame_list[i] = ((uint64_t *)p2m_frame_list)[i];
  56.121 +    else if ( guest_width < sizeof(unsigned long) )
  56.122 +        for ( i = P2M_FL_ENTRIES - 1; i >= 0; i-- )
  56.123 +            p2m_frame_list[i] = ((uint32_t *)p2m_frame_list)[i];
  56.124 +
  56.125      *live_p2m = xc_map_foreign_pages(xc_handle, dom, PROT_READ,
  56.126 -                                    live_p2m_frame_list,
  56.127 +                                    p2m_frame_list,
  56.128                                      P2M_FL_ENTRIES);
  56.129  
  56.130      if ( !*live_p2m )
  56.131 @@ -122,6 +179,12 @@ out:
  56.132      if ( live_p2m_frame_list )
  56.133          munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
  56.134  
  56.135 +    if ( p2m_frame_list_list )
  56.136 +        free(p2m_frame_list_list);
  56.137 +
  56.138 +    if ( p2m_frame_list )
  56.139 +        free(p2m_frame_list);
  56.140 +
  56.141      errno = err;
  56.142      return ret;
  56.143  }
    57.1 --- a/tools/libxc/xc_core_x86.h	Wed Jan 28 12:22:58 2009 +0900
    57.2 +++ b/tools/libxc/xc_core_x86.h	Wed Jan 28 13:06:45 2009 +0900
    57.3 @@ -21,15 +21,8 @@
    57.4  #ifndef XC_CORE_X86_H
    57.5  #define XC_CORE_X86_H
    57.6  
    57.7 -#if defined(__i386__) || defined(__x86_64__)
    57.8  #define ELF_ARCH_DATA           ELFDATA2LSB
    57.9 -#if defined (__i386__)
   57.10 -# define ELF_ARCH_MACHINE       EM_386
   57.11 -#else
   57.12 -# define ELF_ARCH_MACHINE       EM_X86_64
   57.13 -#endif
   57.14 -#endif /* __i386__ or __x86_64__ */
   57.15 -
   57.16 +#define ELF_ARCH_MACHINE       (guest_width == 8 ? EM_X86_64 : EM_386)
   57.17  
   57.18  struct xc_core_arch_context {
   57.19      /* nothing */
   57.20 @@ -40,8 +33,10 @@ struct xc_core_arch_context {
   57.21  #define xc_core_arch_context_get(arch_ctxt, ctxt, xc_handle, domid) \
   57.22                                                                  (0)
   57.23  #define xc_core_arch_context_dump(arch_ctxt, args, dump_rtn)    (0)
   57.24 -#define xc_core_arch_gpfn_may_present(arch_ctxt, i)             (1)
   57.25  
   57.26 +int
   57.27 +xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt,
   57.28 +                              unsigned long pfn);
   57.29  static inline int
   57.30  xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt, 
   57.31                                struct xc_core_section_headers *sheaders,
    58.1 --- a/tools/libxc/xc_dom.h	Wed Jan 28 12:22:58 2009 +0900
    58.2 +++ b/tools/libxc/xc_dom.h	Wed Jan 28 13:06:45 2009 +0900
    58.3 @@ -1,4 +1,4 @@
    58.4 -#include <xen/libelf.h>
    58.5 +#include <xen/libelf/libelf.h>
    58.6  
    58.7  #define INVALID_P2M_ENTRY   ((xen_pfn_t)-1)
    58.8  
    59.1 --- a/tools/libxc/xc_domain.c	Wed Jan 28 12:22:58 2009 +0900
    59.2 +++ b/tools/libxc/xc_domain.c	Wed Jan 28 13:06:45 2009 +0900
    59.3 @@ -537,33 +537,6 @@ int xc_domain_memory_populate_physmap(in
    59.4      return err;
    59.5  }
    59.6  
    59.7 -int xc_domain_memory_translate_gpfn_list(int xc_handle,
    59.8 -                                         uint32_t domid,
    59.9 -                                         unsigned long nr_gpfns,
   59.10 -                                         xen_pfn_t *gpfn_list,
   59.11 -                                         xen_pfn_t *mfn_list)
   59.12 -{
   59.13 -    int err;
   59.14 -    struct xen_translate_gpfn_list translate_gpfn_list = {
   59.15 -        .domid    = domid,
   59.16 -        .nr_gpfns = nr_gpfns,
   59.17 -    };
   59.18 -    set_xen_guest_handle(translate_gpfn_list.gpfn_list, gpfn_list);
   59.19 -    set_xen_guest_handle(translate_gpfn_list.mfn_list, mfn_list);
   59.20 -
   59.21 -    err = xc_memory_op(xc_handle, XENMEM_translate_gpfn_list, &translate_gpfn_list);
   59.22 -
   59.23 -    if ( err != 0 )
   59.24 -    {
   59.25 -        DPRINTF("Failed translation for dom %d (%ld PFNs)\n",
   59.26 -                domid, nr_gpfns);
   59.27 -        errno = -err;
   59.28 -        err = -1;
   59.29 -    }
   59.30 -
   59.31 -    return err;
   59.32 -}
   59.33 -
   59.34  static int xc_domain_memory_pod_target(int xc_handle,
   59.35                                         int op,
   59.36                                         uint32_t domid,
   59.37 @@ -958,7 +931,8 @@ int xc_domain_bind_pt_irq(
   59.38      bind->hvm_domid = domid;
   59.39      bind->irq_type = irq_type;
   59.40      bind->machine_irq = machine_irq;
   59.41 -    if ( irq_type == PT_IRQ_TYPE_PCI )
   59.42 +    if ( irq_type == PT_IRQ_TYPE_PCI ||
   59.43 +         irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
   59.44      {
   59.45          bind->u.pci.bus = bus;
   59.46          bind->u.pci.device = device;    
    60.1 --- a/tools/libxc/xc_elf.h	Wed Jan 28 12:22:58 2009 +0900
    60.2 +++ b/tools/libxc/xc_elf.h	Wed Jan 28 13:06:45 2009 +0900
    60.3 @@ -1,1 +1,1 @@
    60.4 -#include <xen/elfstructs.h>
    60.5 +#include <xen/libelf/elfstructs.h>
    61.1 --- a/tools/libxc/xc_hvm_build.c	Wed Jan 28 12:22:58 2009 +0900
    61.2 +++ b/tools/libxc/xc_hvm_build.c	Wed Jan 28 13:06:45 2009 +0900
    61.3 @@ -15,100 +15,55 @@
    61.4  #include <xen/foreign/x86_64.h>
    61.5  #include <xen/hvm/hvm_info_table.h>
    61.6  #include <xen/hvm/params.h>
    61.7 -#include "xc_e820.h"
    61.8 +#include <xen/hvm/e820.h>
    61.9  
   61.10 -#include <xen/libelf.h>
   61.11 +#include <xen/libelf/libelf.h>
   61.12  
   61.13  #define SUPERPAGE_PFN_SHIFT  9
   61.14  #define SUPERPAGE_NR_PFNS    (1UL << SUPERPAGE_PFN_SHIFT)
   61.15  
   61.16 -#define SCRATCH_PFN 0xFFFFF
   61.17 +#define SPECIALPAGE_BUFIOREQ 0
   61.18 +#define SPECIALPAGE_XENSTORE 1
   61.19 +#define SPECIALPAGE_IOREQ    2
   61.20 +#define SPECIALPAGE_IDENT_PT 3
   61.21 +#define SPECIALPAGE_SHINFO   4
   61.22 +#define NR_SPECIAL_PAGES     5
   61.23 +#define special_pfn(x) (0xff000u - NR_SPECIAL_PAGES + (x))
   61.24  
   61.25 -#define SPECIALPAGE_GUARD    0
   61.26 -#define SPECIALPAGE_BUFIOREQ 1
   61.27 -#define SPECIALPAGE_XENSTORE 2
   61.28 -#define SPECIALPAGE_IOREQ    3
   61.29 -#define SPECIALPAGE_IDENT_PT 4
   61.30 -#define NR_SPECIAL_PAGES     5
   61.31 -
   61.32 -static void build_e820map(void *e820_page, unsigned long long mem_size)
   61.33 +static void build_hvm_info(void *hvm_info_page, uint64_t mem_size)
   61.34  {
   61.35 -    struct e820entry *e820entry =
   61.36 -        (struct e820entry *)(((unsigned char *)e820_page) + HVM_E820_OFFSET);
   61.37 -    unsigned long long extra_mem_size = 0;
   61.38 -    unsigned char nr_map = 0;
   61.39 +    struct hvm_info_table *hvm_info = (struct hvm_info_table *)
   61.40 +        (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET);
   61.41 +    uint64_t lowmem_end = mem_size, highmem_end = 0;
   61.42 +    uint8_t sum;
   61.43 +    int i;
   61.44  
   61.45 -    /*
   61.46 -     * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
   61.47 -     * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
   61.48 -     * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
   61.49 -     */
   61.50 -    if ( mem_size > HVM_BELOW_4G_RAM_END )
   61.51 +    if ( lowmem_end > HVM_BELOW_4G_RAM_END )
   61.52      {
   61.53 -        extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
   61.54 -        mem_size = HVM_BELOW_4G_RAM_END;
   61.55 +        highmem_end = lowmem_end + (1ull<<32) - HVM_BELOW_4G_RAM_END;
   61.56 +        lowmem_end = HVM_BELOW_4G_RAM_END;
   61.57      }
   61.58  
   61.59 -    /* 0x0-0x9FC00: Ordinary RAM. */
   61.60 -    e820entry[nr_map].addr = 0x0;
   61.61 -    e820entry[nr_map].size = 0x9FC00;
   61.62 -    e820entry[nr_map].type = E820_RAM;
   61.63 -    nr_map++;
   61.64 +    memset(hvm_info_page, 0, PAGE_SIZE);
   61.65  
   61.66 -    /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */
   61.67 -    e820entry[nr_map].addr = 0x9FC00;
   61.68 -    e820entry[nr_map].size = 0x400;
   61.69 -    e820entry[nr_map].type = E820_RESERVED;
   61.70 -    nr_map++;
   61.71 -
   61.72 -    /*
   61.73 -     * Following regions are standard regions of the PC memory map.
   61.74 -     * They are not covered by e820 regions. OSes will not use as RAM.
   61.75 -     * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
   61.76 -     * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
   61.77 -     * TODO: hvmloader should free pages which turn out to be unused.
   61.78 -     */
   61.79 +    /* Fill in the header. */
   61.80 +    strncpy(hvm_info->signature, "HVM INFO", 8);
   61.81 +    hvm_info->length = sizeof(struct hvm_info_table);
   61.82  
   61.83 -    /*
   61.84 -     * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here.
   61.85 -     *                   We *cannot* mark as E820_ACPI, for two reasons:
   61.86 -     *                    1. ACPI spec. says that E820_ACPI regions below
   61.87 -     *                       16MB must clip INT15h 0x88 and 0xe801 queries.
   61.88 -     *                       Our rombios doesn't do this.
   61.89 -     *                    2. The OS is allowed to reclaim ACPI memory after
   61.90 -     *                       parsing the tables. But our FACS is in this
   61.91 -     *                       region and it must not be reclaimed (it contains
   61.92 -     *                       the ACPI global lock!).
   61.93 -     * 0xF0000-0x100000: System BIOS.
   61.94 -     * TODO: hvmloader should free pages which turn out to be unused.
   61.95 -     */
   61.96 -    e820entry[nr_map].addr = 0xE0000;
   61.97 -    e820entry[nr_map].size = 0x20000;
   61.98 -    e820entry[nr_map].type = E820_RESERVED;
   61.99 -    nr_map++;
  61.100 +    /* Sensible defaults: these can be overridden by the caller. */
  61.101 +    hvm_info->acpi_enabled = 1;
  61.102 +    hvm_info->apic_mode = 1;
  61.103 +    hvm_info->nr_vcpus = 1;
  61.104  
  61.105 -    /* Low RAM goes here. Reserve space for special pages. */
  61.106 -    e820entry[nr_map].addr = 0x100000;
  61.107 -    e820entry[nr_map].size = (mem_size - 0x100000 -
  61.108 -                              PAGE_SIZE * NR_SPECIAL_PAGES);
  61.109 -    e820entry[nr_map].type = E820_RAM;
  61.110 -    nr_map++;
  61.111 +    /* Memory parameters. */
  61.112 +    hvm_info->low_mem_pgend = lowmem_end >> PAGE_SHIFT;
  61.113 +    hvm_info->high_mem_pgend = highmem_end >> PAGE_SHIFT;
  61.114 +    hvm_info->reserved_mem_pgstart = special_pfn(0);
  61.115  
  61.116 -    /* Explicitly reserve space for special pages (excluding guard page). */
  61.117 -    e820entry[nr_map].addr = mem_size - PAGE_SIZE * (NR_SPECIAL_PAGES - 1);
  61.118 -    e820entry[nr_map].size = PAGE_SIZE * (NR_SPECIAL_PAGES - 1);
  61.119 -    e820entry[nr_map].type = E820_RESERVED;
  61.120 -    nr_map++;
  61.121 -
  61.122 -    if ( extra_mem_size )
  61.123 -    {
  61.124 -        e820entry[nr_map].addr = (1ULL << 32);
  61.125 -        e820entry[nr_map].size = extra_mem_size;
  61.126 -        e820entry[nr_map].type = E820_RAM;
  61.127 -        nr_map++;
  61.128 -    }
  61.129 -
  61.130 -    *(((unsigned char *)e820_page) + HVM_E820_NR_OFFSET) = nr_map;
  61.131 +    /* Finish with the checksum. */
  61.132 +    for ( i = 0, sum = 0; i < hvm_info->length; i++ )
  61.133 +        sum += ((uint8_t *)hvm_info)[i];
  61.134 +    hvm_info->checksum = -sum;
  61.135  }
  61.136  
  61.137  static int loadelfimage(
  61.138 @@ -153,10 +108,10 @@ static int setup_guest(int xc_handle,
  61.139      unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
  61.140      unsigned long target_pages = (unsigned long)target << (20 - PAGE_SHIFT);
  61.141      unsigned long pod_pages = 0;
  61.142 -    unsigned long special_page_nr, entry_eip, cur_pages;
  61.143 +    unsigned long entry_eip, cur_pages;
  61.144      struct xen_add_to_physmap xatp;
  61.145      struct shared_info *shared_info;
  61.146 -    void *e820_page;
  61.147 +    void *hvm_info_page;
  61.148      uint32_t *ident_pt;
  61.149      struct elf_binary elf;
  61.150      uint64_t v_start, v_end;
  61.151 @@ -289,23 +244,22 @@ static int setup_guest(int xc_handle,
  61.152      if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 )
  61.153          goto error_out;
  61.154  
  61.155 -    if ( (e820_page = xc_map_foreign_range(
  61.156 +    if ( (hvm_info_page = xc_map_foreign_range(
  61.157                xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
  61.158 -              HVM_E820_PAGE >> PAGE_SHIFT)) == NULL )
  61.159 +              HVM_INFO_PFN)) == NULL )
  61.160          goto error_out;
  61.161 -    memset(e820_page, 0, PAGE_SIZE);
  61.162 -    build_e820map(e820_page, v_end);
  61.163 -    munmap(e820_page, PAGE_SIZE);
  61.164 +    build_hvm_info(hvm_info_page, v_end);
  61.165 +    munmap(hvm_info_page, PAGE_SIZE);
  61.166  
  61.167      /* Map and initialise shared_info page. */
  61.168      xatp.domid = dom;
  61.169      xatp.space = XENMAPSPACE_shared_info;
  61.170      xatp.idx   = 0;
  61.171 -    xatp.gpfn  = SCRATCH_PFN;
  61.172 +    xatp.gpfn  = special_pfn(SPECIALPAGE_SHINFO);
  61.173      if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) ||
  61.174           ((shared_info = xc_map_foreign_range(
  61.175               xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
  61.176 -             SCRATCH_PFN)) == NULL) )
  61.177 +             special_pfn(SPECIALPAGE_SHINFO))) == NULL) )
  61.178          goto error_out;
  61.179      memset(shared_info, 0, PAGE_SIZE);
  61.180      /* NB. evtchn_upcall_mask is unused: leave as zero. */
  61.181 @@ -313,31 +267,28 @@ static int setup_guest(int xc_handle,
  61.182             sizeof(shared_info->evtchn_mask));
  61.183      munmap(shared_info, PAGE_SIZE);
  61.184  
  61.185 -    special_page_nr = (((v_end > HVM_BELOW_4G_RAM_END)
  61.186 -                        ? (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT)
  61.187 -                        : (v_end >> PAGE_SHIFT))
  61.188 -                       - NR_SPECIAL_PAGES);
  61.189 -
  61.190 -    /* Paranoia: clean special pages. */
  61.191 +    /* Allocate and clear special pages. */
  61.192      for ( i = 0; i < NR_SPECIAL_PAGES; i++ )
  61.193 -        if ( xc_clear_domain_page(xc_handle, dom, special_page_nr + i) )
  61.194 +    {
  61.195 +        xen_pfn_t pfn = special_pfn(i);
  61.196 +        if ( i == SPECIALPAGE_SHINFO )
  61.197 +            continue;
  61.198 +        rc = xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, 0, &pfn);
  61.199 +        if ( rc != 0 )
  61.200 +        {
  61.201 +            PERROR("Could not allocate %d'th special page.\n", i);
  61.202              goto error_out;
  61.203 -
  61.204 -    /* Free the guard page that separates low RAM from special pages. */
  61.205 -    rc = xc_domain_memory_decrease_reservation(
  61.206 -        xc_handle, dom, 1, 0, &page_array[special_page_nr]);
  61.207 -    if ( rc != 0 )
  61.208 -    {
  61.209 -        PERROR("Could not deallocate guard page for HVM guest.\n");
  61.210 -        goto error_out;
  61.211 +        }
  61.212 +        if ( xc_clear_domain_page(xc_handle, dom, special_pfn(i)) )
  61.213 +            goto error_out;
  61.214      }
  61.215  
  61.216      xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
  61.217 -                     special_page_nr + SPECIALPAGE_XENSTORE);
  61.218 +                     special_pfn(SPECIALPAGE_XENSTORE));
  61.219      xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
  61.220 -                     special_page_nr + SPECIALPAGE_BUFIOREQ);
  61.221 +                     special_pfn(SPECIALPAGE_BUFIOREQ));
  61.222      xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
  61.223 -                     special_page_nr + SPECIALPAGE_IOREQ);
  61.224 +                     special_pfn(SPECIALPAGE_IOREQ));
  61.225  
  61.226      /*
  61.227       * Identity-map page table is required for running with CR0.PG=0 when
  61.228 @@ -345,14 +296,14 @@ static int setup_guest(int xc_handle,
  61.229       */
  61.230      if ( (ident_pt = xc_map_foreign_range(
  61.231                xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
  61.232 -              special_page_nr + SPECIALPAGE_IDENT_PT)) == NULL )
  61.233 +              special_pfn(SPECIALPAGE_IDENT_PT))) == NULL )
  61.234          goto error_out;
  61.235      for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ )
  61.236          ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
  61.237                         _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
  61.238      munmap(ident_pt, PAGE_SIZE);
  61.239      xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
  61.240 -                     (special_page_nr + SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
  61.241 +                     special_pfn(SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
  61.242  
  61.243      /* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */
  61.244      entry_eip = elf_uval(&elf, elf.ehdr, e_entry);
    62.1 --- a/tools/libxc/xc_private.c	Wed Jan 28 12:22:58 2009 +0900
    62.2 +++ b/tools/libxc/xc_private.c	Wed Jan 28 13:06:45 2009 +0900
    62.3 @@ -307,13 +307,6 @@ int xc_memory_op(int xc_handle,
    62.4              goto out1;
    62.5          }
    62.6          break;
    62.7 -    case XENMEM_remove_from_physmap:
    62.8 -        if ( lock_pages(arg, sizeof(struct xen_remove_from_physmap)) )
    62.9 -        {
   62.10 -            PERROR("Could not lock");
   62.11 -            goto out1;
   62.12 -        }
   62.13 -        break;
   62.14      case XENMEM_current_reservation:
   62.15      case XENMEM_maximum_reservation:
   62.16      case XENMEM_maximum_gpfn:
   62.17 @@ -355,9 +348,6 @@ int xc_memory_op(int xc_handle,
   62.18      case XENMEM_add_to_physmap:
   62.19          unlock_pages(arg, sizeof(struct xen_add_to_physmap));
   62.20          break;
   62.21 -    case XENMEM_remove_from_physmap:
   62.22 -        unlock_pages(arg, sizeof(struct xen_remove_from_physmap));
   62.23 -        break;
   62.24      case XENMEM_current_reservation:
   62.25      case XENMEM_maximum_reservation:
   62.26      case XENMEM_maximum_gpfn:
    63.1 --- a/tools/libxc/xc_ptrace_core.c	Wed Jan 28 12:22:58 2009 +0900
    63.2 +++ b/tools/libxc/xc_ptrace_core.c	Wed Jan 28 13:06:45 2009 +0900
    63.3 @@ -540,7 +540,9 @@ xc_waitdomain_core_elf(
    63.4                               XEN_ELFNOTE_DUMPCORE_XEN_VERSION,
    63.5                               (void**)&xen_version) < 0)
    63.6          goto out;
    63.7 -    if (xen_version->xen_version.pagesize != PAGE_SIZE)
    63.8 +    /* shifted case covers 32 bit FV guest core file created on 64 bit Dom0 */
    63.9 +    if (xen_version->xen_version.pagesize != PAGE_SIZE &&
   63.10 +        (xen_version->xen_version.pagesize >> 32) != PAGE_SIZE)
   63.11          goto out;
   63.12  
   63.13      /* .note.Xen: format_version */
    64.1 --- a/tools/libxc/xenctrl.h	Wed Jan 28 12:22:58 2009 +0900
    64.2 +++ b/tools/libxc/xenctrl.h	Wed Jan 28 13:06:45 2009 +0900
    64.3 @@ -628,12 +628,6 @@ int xc_domain_memory_populate_physmap(in
    64.4                                        unsigned int mem_flags,
    64.5                                        xen_pfn_t *extent_start);
    64.6  
    64.7 -int xc_domain_memory_translate_gpfn_list(int xc_handle,
    64.8 -                                         uint32_t domid,
    64.9 -                                         unsigned long nr_gpfns,
   64.10 -                                         xen_pfn_t *gpfn_list,
   64.11 -                                         xen_pfn_t *mfn_list);
   64.12 -
   64.13  int xc_domain_memory_set_pod_target(int xc_handle,
   64.14                                      uint32_t domid,
   64.15                                      uint64_t target_pages,
    65.1 --- a/tools/misc/Makefile	Wed Jan 28 12:22:58 2009 +0900
    65.2 +++ b/tools/misc/Makefile	Wed Jan 28 13:06:45 2009 +0900
    65.3 @@ -47,7 +47,7 @@ install: build
    65.4  
    65.5  .PHONY: clean
    65.6  clean:
    65.7 -	$(RM) *.o $(TARGETS) *~
    65.8 +	$(RM) *.o $(TARGETS) *~ $(DEPS)
    65.9  	set -e; for d in $(SUBDIRS); do $(MAKE) -C $$d clean; done
   65.10  
   65.11  %.o: %.c $(HDRS) Makefile
   65.12 @@ -55,3 +55,5 @@ clean:
   65.13  
   65.14  xenperf xenpm: %: %.o Makefile
   65.15  	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDFLAGS_libxenctrl)
   65.16 +
   65.17 +-include $(DEPS)
    66.1 --- a/tools/misc/xenpm.c	Wed Jan 28 12:22:58 2009 +0900
    66.2 +++ b/tools/misc/xenpm.c	Wed Jan 28 13:06:45 2009 +0900
    66.3 @@ -21,83 +21,56 @@
    66.4  
    66.5  #include <stdio.h>
    66.6  #include <stdlib.h>
    66.7 +#include <unistd.h>
    66.8  #include <string.h>
    66.9  #include <getopt.h>
   66.10  #include <errno.h>
   66.11 +#include <signal.h>
   66.12  
   66.13  #include <xenctrl.h>
   66.14  #include <inttypes.h>
   66.15 +#include <sys/time.h>
   66.16  
   66.17  #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
   66.18  
   66.19 +static int xc_fd;
   66.20 +static int max_cpu_nr;
   66.21 +
   66.22  /* help message */
   66.23  void show_help(void)
   66.24  {
   66.25      fprintf(stderr,
   66.26 -            "Usage:\n"
   66.27 -            "       xenpm get-cpuidle-states [cpuid]: list cpu idle information on CPU cpuid or all CPUs.\n"
   66.28 -            "       xenpm get-cpufreq-states [cpuid]: list cpu frequency information on CPU cpuid or all CPUs.\n"
   66.29 -            "       xenpm get-cpufreq-para [cpuid]: list cpu frequency information on CPU cpuid or all CPUs.\n"
   66.30 -            "       xenpm set-scaling-maxfreq <cpuid> <HZ>: set max cpu frequency <HZ> on CPU <cpuid>.\n"
   66.31 -            "       xenpm set-scaling-minfreq <cpuid> <HZ>: set min cpu frequency <HZ> on CPU <cpuid>.\n"
   66.32 -            "       xenpm set-scaling-governor <cpuid> <name>: set scaling governor on CPU <cpuid>.\n"
   66.33 -            "       xenpm set-scaling-speed <cpuid> <num>: set scaling speed on CPU <cpuid>.\n"
   66.34 -            "       xenpm set-sampling-rate <cpuid> <num>: set sampling rate on CPU <cpuid>.\n"
   66.35 -            "       xenpm set-up-threshold <cpuid> <num>: set up threshold on CPU <cpuid>.\n");
   66.36 +            "xen power management control tool\n\n"
   66.37 +            "usage: xenpm <command> [args]\n\n"
   66.38 +            "xenpm command list:\n\n"
   66.39 +            " get-cpuidle-states    [cpuid]       list cpu idle info of CPU <cpuid> or all\n"
   66.40 +            " get-cpufreq-states    [cpuid]       list cpu freq info of CPU <cpuid> or all\n"
   66.41 +            " get-cpufreq-para      [cpuid]       list cpu freq parameter of CPU <cpuid> or all\n"
   66.42 +            " set-scaling-maxfreq   [cpuid] <HZ>  set max cpu frequency <HZ> on CPU <cpuid>\n"
   66.43 +            "                                     or all CPUs\n"
   66.44 +            " set-scaling-minfreq   [cpuid] <HZ>  set min cpu frequency <HZ> on CPU <cpuid>\n"
   66.45 +            "                                     or all CPUs\n"
   66.46 +            " set-scaling-speed     [cpuid] <num> set scaling speed on CPU <cpuid> or all\n"
   66.47 +            "                                     it is used in userspace governor.\n"
   66.48 +            " set-scaling-governor  [cpuid] <gov> set scaling governor on CPU <cpuid> or all\n"
   66.49 +            "                                     as userspace/performance/powersave/ondemand\n"
   66.50 +            " set-sampling-rate     [cpuid] <num> set sampling rate on CPU <cpuid> or all\n"
   66.51 +            "                                     it is used in ondemand governor.\n"
   66.52 +            " set-up-threshold      [cpuid] <num> set up threshold on CPU <cpuid> or all\n"
   66.53 +            "                                     it is used in ondemand governor.\n"
   66.54 +            " start                               start collect Cx/Px statistics,\n"
   66.55 +            "                                     output after CTRL-C or SIGINT.\n"
   66.56 +            );
   66.57  }
   66.58 -
   66.59  /* wrapper function */
   66.60 -int help_func(int xc_fd, int cpuid, uint32_t value)
   66.61 +void help_func(int argc, char *argv[])
   66.62  {
   66.63      show_help();
   66.64 -    return 0;
   66.65  }
   66.66  
   66.67 -/* show cpu idle information on CPU cpuid */
   66.68 -static int show_cx_cpuid(int xc_fd, int cpuid)
   66.69 +static void print_cxstat(int cpuid, struct xc_cx_stat *cxstat)
   66.70  {
   66.71 -    int i, ret = 0;
   66.72 -    int max_cx_num = 0;
   66.73 -    struct xc_cx_stat cxstatinfo, *cxstat = &cxstatinfo;
   66.74 -
   66.75 -    ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num);
   66.76 -    if ( ret )
   66.77 -    {
   66.78 -        if ( errno == ENODEV )
   66.79 -        {
   66.80 -            fprintf(stderr, "Xen cpuidle is not enabled!\n");
   66.81 -            return -ENODEV;
   66.82 -        }
   66.83 -        else
   66.84 -        {
   66.85 -            fprintf(stderr, "[CPU%d] failed to get max C-state\n", cpuid);
   66.86 -            return -EINVAL;
   66.87 -        }
   66.88 -    }
   66.89 -
   66.90 -    cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
   66.91 -    if ( !cxstat->triggers )
   66.92 -    {
   66.93 -        fprintf(stderr, "[CPU%d] failed to malloc for C-states triggers\n", cpuid);
   66.94 -        return -ENOMEM;
   66.95 -    }
   66.96 -    cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
   66.97 -    if ( !cxstat->residencies )
   66.98 -    {
   66.99 -        fprintf(stderr, "[CPU%d] failed to malloc for C-states residencies\n", cpuid);
  66.100 -        free(cxstat->triggers);
  66.101 -        return -ENOMEM;
  66.102 -    }
  66.103 -
  66.104 -    ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat);
  66.105 -    if( ret )
  66.106 -    {
  66.107 -        fprintf(stderr, "[CPU%d] failed to get C-states statistics "
  66.108 -                "information\n", cpuid);
  66.109 -        free(cxstat->triggers);
  66.110 -        free(cxstat->residencies);
  66.111 -        return -EINVAL;
  66.112 -    }
  66.113 +    int i;
  66.114  
  66.115      printf("cpu id               : %d\n", cpuid);
  66.116      printf("total C-states       : %d\n", cxstat->nr);
  66.117 @@ -110,88 +83,87 @@ static int show_cx_cpuid(int xc_fd, int 
  66.118          printf("                       residency  [%020"PRIu64" ms]\n",
  66.119                 cxstat->residencies[i]/1000000UL);
  66.120      }
  66.121 +    printf("\n");
  66.122 +}
  66.123  
  66.124 -    free(cxstat->triggers);
  66.125 -    free(cxstat->residencies);
  66.126 +/* show cpu idle information on CPU cpuid */
  66.127 +static int get_cxstat_by_cpuid(int xc_fd, int cpuid, struct xc_cx_stat *cxstat)
  66.128 +{
  66.129 +    int ret = 0;
  66.130 +    int max_cx_num = 0;
  66.131 +
  66.132 +    ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num);
  66.133 +    if ( ret )
  66.134 +        return errno;
  66.135 +
  66.136 +    if ( !cxstat )
  66.137 +        return -EINVAL;
  66.138  
  66.139 -    printf("\n");
  66.140 +    cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
  66.141 +    if ( !cxstat->triggers )
  66.142 +        return -ENOMEM;
  66.143 +    cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
  66.144 +    if ( !cxstat->residencies )
  66.145 +    {
  66.146 +        free(cxstat->triggers);
  66.147 +        return -ENOMEM;
  66.148 +    }
  66.149 +
  66.150 +    ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat);
  66.151 +    if( ret )
  66.152 +    {
  66.153 +        int temp = errno;
  66.154 +        free(cxstat->triggers);
  66.155 +        free(cxstat->residencies);
  66.156 +        cxstat->triggers = NULL;
  66.157 +        cxstat->residencies = NULL;
  66.158 +        return temp;
  66.159 +    }
  66.160 +
  66.161      return 0;
  66.162  }
  66.163  
  66.164 -int cxstates_func(int xc_fd, int cpuid, uint32_t value)
  66.165 +static int show_cxstat_by_cpuid(int xc_fd, int cpuid)
  66.166  {
  66.167      int ret = 0;
  66.168 -    xc_physinfo_t physinfo = { 0 };
  66.169 +    struct xc_cx_stat cxstatinfo;
  66.170 +
  66.171 +    ret = get_cxstat_by_cpuid(xc_fd, cpuid, &cxstatinfo);
  66.172 +    if ( ret )
  66.173 +        return ret;
  66.174 +
  66.175 +    print_cxstat(cpuid, &cxstatinfo);
  66.176 +
  66.177 +    free(cxstatinfo.triggers);
  66.178 +    free(cxstatinfo.residencies);
  66.179 +    return 0;
  66.180 +}
  66.181 +
  66.182 +void cxstat_func(int argc, char *argv[])
  66.183 +{
  66.184 +    int cpuid = -1;
  66.185 +
  66.186 +    if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
  66.187 +        cpuid = -1;
  66.188 +
  66.189 +    if ( cpuid >= max_cpu_nr )
  66.190 +        cpuid = -1;
  66.191  
  66.192      if ( cpuid < 0 )
  66.193      {
  66.194 -        /* show cxstates on all cpu */
  66.195 -        ret = xc_physinfo(xc_fd, &physinfo);
  66.196 -        if ( ret )
  66.197 -        {
  66.198 -            fprintf(stderr, "failed to get the processor information\n");
  66.199 -        }
  66.200 -        else
  66.201 -        {
  66.202 -            int i;
  66.203 -            for ( i = 0; i < physinfo.nr_cpus; i++ )
  66.204 -            {
  66.205 -                if ( (ret = show_cx_cpuid(xc_fd, i)) == -ENODEV )
  66.206 -                    break;
  66.207 -            }
  66.208 -        }
  66.209 +        /* show cxstates on all cpus */
  66.210 +        int i;
  66.211 +        for ( i = 0; i < max_cpu_nr; i++ )
  66.212 +            if ( show_cxstat_by_cpuid(xc_fd, i) == -ENODEV )
  66.213 +                break;
  66.214      }
  66.215      else
  66.216 -        ret = show_cx_cpuid(xc_fd, cpuid);
  66.217 -
  66.218 -    return ret;
  66.219 +        show_cxstat_by_cpuid(xc_fd, cpuid);
  66.220  }
  66.221  
  66.222 -/* show cpu frequency information on CPU cpuid */
  66.223 -static int show_px_cpuid(int xc_fd, int cpuid)
  66.224 +static void print_pxstat(int cpuid, struct xc_px_stat *pxstat)
  66.225  {
  66.226 -    int i, ret = 0;
  66.227 -    int max_px_num = 0;
  66.228 -    struct xc_px_stat pxstatinfo, *pxstat = &pxstatinfo;
  66.229 -
  66.230 -    ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num);
  66.231 -    if ( ret )
  66.232 -    {
  66.233 -        if ( errno == ENODEV )
  66.234 -        {
  66.235 -            printf("Xen cpufreq is not enabled!\n");
  66.236 -            return -ENODEV;
  66.237 -        }
  66.238 -        else
  66.239 -        {
  66.240 -            fprintf(stderr, "[CPU%d] failed to get max P-state\n", cpuid);
  66.241 -            return -EINVAL;
  66.242 -        }
  66.243 -    }
  66.244 -
  66.245 -    pxstat->trans_pt = malloc(max_px_num * max_px_num *
  66.246 -                              sizeof(uint64_t));
  66.247 -    if ( !pxstat->trans_pt )
  66.248 -    {
  66.249 -        fprintf(stderr, "[CPU%d] failed to malloc for P-states transition table\n", cpuid);
  66.250 -        return -ENOMEM;
  66.251 -    }
  66.252 -    pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
  66.253 -    if ( !pxstat->pt )
  66.254 -    {
  66.255 -        fprintf(stderr, "[CPU%d] failed to malloc for P-states table\n", cpuid);
  66.256 -        free(pxstat->trans_pt);
  66.257 -        return -ENOMEM;
  66.258 -    }
  66.259 -
  66.260 -    ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat);
  66.261 -    if( ret )
  66.262 -    {
  66.263 -        fprintf(stderr, "[CPU%d] failed to get P-states statistics information\n", cpuid);
  66.264 -        free(pxstat->trans_pt);
  66.265 -        free(pxstat->pt);
  66.266 -        return -ENOMEM;
  66.267 -    }
  66.268 +    int i;
  66.269  
  66.270      printf("cpu id               : %d\n", cpuid);
  66.271      printf("total P-states       : %d\n", pxstat->total);
  66.272 @@ -211,40 +183,233 @@ static int show_px_cpuid(int xc_fd, int 
  66.273          printf("                       residency  [%020"PRIu64" ms]\n",
  66.274                 pxstat->pt[i].residency/1000000UL);
  66.275      }
  66.276 +    printf("\n");
  66.277 +}
  66.278  
  66.279 -    free(pxstat->trans_pt);
  66.280 -    free(pxstat->pt);
  66.281 +/* show cpu frequency information on CPU cpuid */
  66.282 +static int get_pxstat_by_cpuid(int xc_fd, int cpuid, struct xc_px_stat *pxstat)
  66.283 +{
  66.284 +    int ret = 0;
  66.285 +    int max_px_num = 0;
  66.286 +
  66.287 +    ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num);
  66.288 +    if ( ret )
  66.289 +        return errno;
  66.290 +
  66.291 +    if ( !pxstat)
  66.292 +        return -EINVAL;
  66.293  
  66.294 -    printf("\n");
  66.295 +    pxstat->trans_pt = malloc(max_px_num * max_px_num *
  66.296 +                              sizeof(uint64_t));
  66.297 +    if ( !pxstat->trans_pt )
  66.298 +        return -ENOMEM;
  66.299 +    pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
  66.300 +    if ( !pxstat->pt )
  66.301 +    {
  66.302 +        free(pxstat->trans_pt);
  66.303 +        return -ENOMEM;
  66.304 +    }
  66.305 +
  66.306 +    ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat);
  66.307 +    if( ret )
  66.308 +    {
  66.309 +        int temp = errno;
  66.310 +        free(pxstat->trans_pt);
  66.311 +        free(pxstat->pt);
  66.312 +        pxstat->trans_pt = NULL;
  66.313 +        pxstat->pt = NULL;
  66.314 +        return temp;
  66.315 +    }
  66.316 +
  66.317      return 0;
  66.318  }
  66.319  
  66.320 -int pxstates_func(int xc_fd, int cpuid, uint32_t value)
  66.321 +static int show_pxstat_by_cpuid(int xc_fd, int cpuid)
  66.322  {
  66.323      int ret = 0;
  66.324 -    xc_physinfo_t physinfo = { 0 };
  66.325 +    struct xc_px_stat pxstatinfo;
  66.326 +
  66.327 +    ret = get_pxstat_by_cpuid(xc_fd, cpuid, &pxstatinfo);
  66.328 +    if ( ret )
  66.329 +        return ret;
  66.330 +
  66.331 +    print_pxstat(cpuid, &pxstatinfo);
  66.332 +
  66.333 +    free(pxstatinfo.trans_pt);
  66.334 +    free(pxstatinfo.pt);
  66.335 +    return 0;
  66.336 +}
  66.337 +
  66.338 +void pxstat_func(int argc, char *argv[])
  66.339 +{
  66.340 +    int cpuid = -1;
  66.341 +
  66.342 +    if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
  66.343 +        cpuid = -1;
  66.344 +
  66.345 +    if ( cpuid >= max_cpu_nr )
  66.346 +        cpuid = -1;
  66.347  
  66.348      if ( cpuid < 0 )
  66.349      {
  66.350 -        ret = xc_physinfo(xc_fd, &physinfo);
  66.351 -        if ( ret )
  66.352 +        /* show pxstates on all cpus */
  66.353 +        int i;
  66.354 +        for ( i = 0; i < max_cpu_nr; i++ )
  66.355 +            if ( show_pxstat_by_cpuid(xc_fd, i) == -ENODEV )
  66.356 +                break;
  66.357 +    }
  66.358 +    else
  66.359 +        show_pxstat_by_cpuid(xc_fd, cpuid);
  66.360 +}
  66.361 +
  66.362 +static uint64_t usec_start, usec_end;
  66.363 +static struct xc_cx_stat *cxstat, *cxstat_start, *cxstat_end;
  66.364 +static struct xc_px_stat *pxstat, *pxstat_start, *pxstat_end;
  66.365 +static uint64_t *sum, *sum_cx, *sum_px;
  66.366 +
  66.367 +static void signal_int_handler(int signo)
  66.368 +{
  66.369 +    int i, j;
  66.370 +    struct timeval tv;
  66.371 +    int cx_cap = 0, px_cap = 0;
  66.372 +
  66.373 +    if ( gettimeofday(&tv, NULL) == -1 )
  66.374 +    {
  66.375 +        fprintf(stderr, "failed to get timeofday\n");
  66.376 +        return ;
  66.377 +    }
  66.378 +    usec_end = tv.tv_sec * 1000000UL + tv.tv_usec;
  66.379 +
  66.380 +    if ( get_cxstat_by_cpuid(xc_fd, 0, NULL) != -ENODEV )
  66.381 +    {
  66.382 +        cx_cap = 1;
  66.383 +        for ( i = 0; i < max_cpu_nr; i++ )
  66.384 +            if ( !get_cxstat_by_cpuid(xc_fd, i, &cxstat_end[i]) )
  66.385 +                for ( j = 0; j < cxstat_end[i].nr; j++ )
  66.386 +                    sum_cx[i] += cxstat_end[i].residencies[j] -
  66.387 +                                 cxstat_start[i].residencies[j];
  66.388 +    }
  66.389 +
  66.390 +    if ( get_pxstat_by_cpuid(xc_fd, 0, NULL) != -ENODEV )
  66.391 +    {
  66.392 +        px_cap = 1;
  66.393 +        for ( i = 0; i < max_cpu_nr; i++ )
  66.394 +            if ( !get_pxstat_by_cpuid(xc_fd, i , &pxstat_end[i]) )
  66.395 +                for ( j = 0; j < pxstat_end[i].total; j++ )
  66.396 +                    sum_px[i] += pxstat_end[i].pt[j].residency -
  66.397 +                                 pxstat_start[i].pt[j].residency;
  66.398 +    }
  66.399 +
  66.400 +    printf("Elapsed time (ms): %"PRIu64"\n", (usec_end - usec_start) / 1000UL);
  66.401 +    for ( i = 0; i < max_cpu_nr; i++ )
  66.402 +    {
  66.403 +        uint64_t temp;
  66.404 +        printf("CPU%d:\n\tresidency\tpercentage\n", i);
  66.405 +        if ( cx_cap )
  66.406          {
  66.407 -            fprintf(stderr, "failed to get the processor information\n");
  66.408 +            for ( j = 0; j < cxstat_end[i].nr; j++ )
  66.409 +            {
  66.410 +                if ( sum_cx[i] > 0 )
  66.411 +                {
  66.412 +                    temp = cxstat_end[i].residencies[j] -
  66.413 +                           cxstat_start[i].residencies[j];
  66.414 +                    printf("  C%d\t%"PRIu64" ms\t%.2f%%\n", j,
  66.415 +                           temp / 1000000UL, 100UL * temp / (double)sum_cx[i]);
  66.416 +                }
  66.417 +            }
  66.418          }
  66.419 -        else
  66.420 +        if ( px_cap )
  66.421          {
  66.422 -            int i;
  66.423 -            for ( i = 0; i < physinfo.nr_cpus; i++ )
  66.424 +            for ( j = 0; j < pxstat_end[i].total; j++ )
  66.425              {
  66.426 -                if ( (ret = show_px_cpuid(xc_fd, i)) == -ENODEV )
  66.427 -                    break;
  66.428 +                if ( sum_px[i] > 0 )
  66.429 +                {
  66.430 +                    temp = pxstat_end[i].pt[j].residency -
  66.431 +                           pxstat_start[i].pt[j].residency;
  66.432 +                    printf("  P%d\t%"PRIu64" ms\t%.2f%%\n", j,
  66.433 +                           temp / 1000000UL, 100UL * temp / (double)sum_px[i]);
  66.434 +                }
  66.435              }
  66.436          }
  66.437 +        printf("\n");
  66.438      }
  66.439 -    else
  66.440 -        ret = show_px_cpuid(xc_fd, cpuid);
  66.441 +
  66.442 +    /* some clean up and then exits */
  66.443 +    for ( i = 0; i < 2 * max_cpu_nr; i++ )
  66.444 +    {
  66.445 +        free(cxstat[i].triggers);
  66.446 +        free(cxstat[i].residencies);
  66.447 +        free(pxstat[i].trans_pt);
  66.448 +        free(pxstat[i].pt);
  66.449 +    }
  66.450 +    free(cxstat);
  66.451 +    free(pxstat);
  66.452 +    free(sum);
  66.453 +    xc_interface_close(xc_fd);
  66.454 +    exit(0);
  66.455 +}
  66.456 +
  66.457 +void start_gather_func(int argc, char *argv[])
  66.458 +{
  66.459 +    int i;
  66.460 +    struct timeval tv;
  66.461 +
  66.462 +    if ( gettimeofday(&tv, NULL) == -1 )
  66.463 +    {
  66.464 +        fprintf(stderr, "failed to get timeofday\n");
  66.465 +        return ;
  66.466 +    }
  66.467 +    usec_start = tv.tv_sec * 1000000UL + tv.tv_usec;
  66.468  
  66.469 -    return ret;
  66.470 +    sum = malloc(sizeof(uint64_t) * 2 * max_cpu_nr);
  66.471 +    if ( sum == NULL )
  66.472 +        return ;
  66.473 +    cxstat = malloc(sizeof(struct xc_cx_stat) * 2 * max_cpu_nr);
  66.474 +    if ( cxstat == NULL )
  66.475 +    {
  66.476 +        free(sum);
  66.477 +        return ;
  66.478 +    }
  66.479 +    pxstat = malloc(sizeof(struct xc_px_stat) * 2 * max_cpu_nr);
  66.480 +    if ( pxstat == NULL )
  66.481 +    {
  66.482 +        free(sum);
  66.483 +        free(cxstat);
  66.484 +        return ;
  66.485 +    }
  66.486 +    memset(sum, 0, sizeof(uint64_t) * 2 * max_cpu_nr);
  66.487 +    memset(cxstat, 0, sizeof(struct xc_cx_stat) * 2 * max_cpu_nr);
  66.488 +    memset(pxstat, 0, sizeof(struct xc_px_stat) * 2 * max_cpu_nr);
  66.489 +    sum_cx = sum;
  66.490 +    sum_px = sum + max_cpu_nr;
  66.491 +    cxstat_start = cxstat;
  66.492 +    cxstat_end = cxstat + max_cpu_nr;
  66.493 +    pxstat_start = pxstat;
  66.494 +    pxstat_end = pxstat + max_cpu_nr;
  66.495 +
  66.496 +    if ( get_cxstat_by_cpuid(xc_fd, 0, NULL) == -ENODEV &&
  66.497 +         get_pxstat_by_cpuid(xc_fd, 0, NULL) == -ENODEV )
  66.498 +    {
  66.499 +        fprintf(stderr, "Xen cpu idle and frequency is disabled!\n");
  66.500 +        return ;
  66.501 +    }
  66.502 +
  66.503 +    for ( i = 0; i < max_cpu_nr; i++ )
  66.504 +    {
  66.505 +        get_cxstat_by_cpuid(xc_fd, i, &cxstat_start[i]);
  66.506 +        get_pxstat_by_cpuid(xc_fd, i, &pxstat_start[i]);
  66.507 +    }
  66.508 +
  66.509 +    if (signal(SIGINT, signal_int_handler) == SIG_ERR)
  66.510 +    {
  66.511 +        fprintf(stderr, "failed to set signal int handler\n");
  66.512 +        free(sum);
  66.513 +        free(pxstat);
  66.514 +        free(cxstat);
  66.515 +        return ;
  66.516 +    }
  66.517 +
  66.518 +    pause();
  66.519  }
  66.520  
  66.521  /* print out parameters about cpu frequency */
  66.522 @@ -294,7 +459,8 @@ static void print_cpufreq_para(int cpuid
  66.523  
  66.524      printf("scaling_avail_freq   :");
  66.525      for ( i = 0; i < p_cpufreq->freq_num; i++ )
  66.526 -        if ( p_cpufreq->scaling_available_frequencies[i] == p_cpufreq->scaling_cur_freq )
  66.527 +        if ( p_cpufreq->scaling_available_frequencies[i] ==
  66.528 +             p_cpufreq->scaling_cur_freq )
  66.529              printf(" *%d", p_cpufreq->scaling_available_frequencies[i]);
  66.530          else
  66.531              printf(" %d", p_cpufreq->scaling_available_frequencies[i]);
  66.532 @@ -308,7 +474,7 @@ static void print_cpufreq_para(int cpuid
  66.533  }
  66.534  
  66.535  /* show cpu frequency parameters information on CPU cpuid */
  66.536 -static int show_cpufreq_para_cpuid(int xc_fd, int cpuid)
  66.537 +static int show_cpufreq_para_by_cpuid(int xc_fd, int cpuid)
  66.538  {
  66.539      int ret = 0;
  66.540      struct xc_get_cpufreq_para cpufreq_para, *p_cpufreq = &cpufreq_para;
  66.541 @@ -381,159 +547,221 @@ out:
  66.542      return ret;
  66.543  }
  66.544  
  66.545 -int cpufreq_para_func(int xc_fd, int cpuid, uint32_t value)
  66.546 +void cpufreq_para_func(int argc, char *argv[])
  66.547  {
  66.548 -    int ret = 0;
  66.549 -    xc_physinfo_t physinfo = { 0 };
  66.550 +    int cpuid = -1;
  66.551 +
  66.552 +    if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
  66.553 +        cpuid = -1;
  66.554 +
  66.555 +    if ( cpuid >= max_cpu_nr )
  66.556 +        cpuid = -1;
  66.557 +
  66.558 +    if ( cpuid < 0 )
  66.559 +    {
  66.560 +        /* show cpu freqency information on all cpus */
  66.561 +        int i;
  66.562 +        for ( i = 0; i < max_cpu_nr; i++ )
  66.563 +            if ( show_cpufreq_para_by_cpuid(xc_fd, i) == -ENODEV )
  66.564 +                break;
  66.565 +    }
  66.566 +    else
  66.567 +        show_cpufreq_para_by_cpuid(xc_fd, cpuid);
  66.568 +}
  66.569 +
  66.570 +void scaling_max_freq_func(int argc, char *argv[])
  66.571 +{
  66.572 +    int cpuid = -1, freq = -1;
  66.573 +
  66.574 +    if ( (argc >= 2 && (sscanf(argv[1], "%d", &freq) != 1 ||
  66.575 +                        sscanf(argv[0], "%d", &cpuid) != 1)) ||
  66.576 +         (argc == 1 && sscanf(argv[0], "%d", &freq) != 1 ) ||
  66.577 +         argc == 0 )
  66.578 +    {
  66.579 +        fprintf(stderr, "failed to set scaling max freq\n");
  66.580 +        return ;
  66.581 +    }
  66.582  
  66.583      if ( cpuid < 0 )
  66.584      {
  66.585 -        ret = xc_physinfo(xc_fd, &physinfo);
  66.586 -        if ( ret )
  66.587 -        {
  66.588 -            fprintf(stderr, "failed to get the processor information\n");
  66.589 -        }
  66.590 -        else
  66.591 -        {
  66.592 -            int i;
  66.593 -            for ( i = 0; i < physinfo.nr_cpus; i++ )
  66.594 -            {
  66.595 -                if ( (ret = show_cpufreq_para_cpuid(xc_fd, i)) == -ENODEV )
  66.596 -                    break;
  66.597 -            }
  66.598 -        }
  66.599 +        int i;
  66.600 +        for ( i = 0; i < max_cpu_nr; i++ )
  66.601 +            if ( xc_set_cpufreq_para(xc_fd, i, SCALING_MAX_FREQ, freq) )
  66.602 +                fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", i);
  66.603      }
  66.604      else
  66.605 -        ret = show_cpufreq_para_cpuid(xc_fd, cpuid);
  66.606 -
  66.607 -    return ret;
  66.608 +    {
  66.609 +        if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, freq) )
  66.610 +            fprintf(stderr, "failed to set scaling max freq\n");
  66.611 +    }
  66.612  }
  66.613  
  66.614 -int scaling_max_freq_func(int xc_fd, int cpuid, uint32_t value)
  66.615 +void scaling_min_freq_func(int argc, char *argv[])
  66.616  {
  66.617 -    int ret = 0;
  66.618 +    int cpuid = -1, freq = -1;
  66.619 +
  66.620 +    if ( (argc >= 2 && (sscanf(argv[1], "%d", &freq) != 1 ||
  66.621 +                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
  66.622 +         (argc == 1 && sscanf(argv[0], "%d", &freq) != 1 ) ||
  66.623 +         argc == 0 )
  66.624 +    {
  66.625 +        fprintf(stderr, "failed to set scaling min freq\n");
  66.626 +        return ;
  66.627 +    }
  66.628  
  66.629      if ( cpuid < 0 )
  66.630      {
  66.631 -        show_help();
  66.632 -        return -EINVAL;
  66.633 +        int i;
  66.634 +        for ( i = 0; i < max_cpu_nr; i++ )
  66.635 +            if ( xc_set_cpufreq_para(xc_fd, i, SCALING_MIN_FREQ, freq) )
  66.636 +                fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", i);
  66.637      }
  66.638 -
  66.639 -    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, value);
  66.640 -    if ( ret )
  66.641 +    else
  66.642      {
  66.643 -        fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", cpuid);
  66.644 +        if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, freq) )
  66.645 +            fprintf(stderr, "failed to set scaling min freq\n");
  66.646      }
  66.647 -
  66.648 -    return ret;
  66.649  }
  66.650  
  66.651 -int scaling_min_freq_func(int xc_fd, int cpuid, uint32_t value)
  66.652 +void scaling_speed_func(int argc, char *argv[])
  66.653  {
  66.654 -    int ret;
  66.655 +    int cpuid = -1, speed = -1;
  66.656 +
  66.657 +    if ( (argc >= 2 && (sscanf(argv[1], "%d", &speed) != 1 ||
  66.658 +                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
  66.659 +         (argc == 1 && sscanf(argv[0], "%d", &speed) != 1 ) ||
  66.660 +         argc == 0 )
  66.661 +    {
  66.662 +        fprintf(stderr, "failed to set scaling speed\n");
  66.663 +        return ;
  66.664 +    }
  66.665  
  66.666      if ( cpuid < 0 )
  66.667      {
  66.668 -        show_help();
  66.669 -        return -EINVAL;
  66.670 +        int i;
  66.671 +        for ( i = 0; i < max_cpu_nr; i++ )
  66.672 +            if ( xc_set_cpufreq_para(xc_fd, i, SCALING_SETSPEED, speed) )
  66.673 +                fprintf(stderr, "[CPU%d] failed to set scaling speed\n", i);
  66.674      }
  66.675 -
  66.676 -    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, value);
  66.677 -    if ( ret )
  66.678 +    else
  66.679      {
  66.680 -        fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", cpuid);
  66.681 +        if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, speed) )
  66.682 +            fprintf(stderr, "failed to set scaling speed\n");
  66.683      }
  66.684 -
  66.685 -    return ret;
  66.686  }
  66.687  
  66.688 -int scaling_speed_func(int xc_fd, int cpuid, uint32_t value)
  66.689 +void scaling_sampling_rate_func(int argc, char *argv[])
  66.690  {
  66.691 -    int ret;
  66.692 +    int cpuid = -1, rate = -1;
  66.693 +
  66.694 +    if ( (argc >= 2 && (sscanf(argv[1], "%d", &rate) != 1 ||
  66.695 +                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
  66.696 +         (argc == 1 && sscanf(argv[0], "%d", &rate) != 1 ) ||
  66.697 +         argc == 0 )
  66.698 +    {
  66.699 +        fprintf(stderr, "failed to set scaling sampling rate\n");
  66.700 +        return ;
  66.701 +    }
  66.702  
  66.703      if ( cpuid < 0 )
  66.704      {
  66.705 -        show_help();
  66.706 -        return -EINVAL;
  66.707 +        int i;
  66.708 +        for ( i = 0; i < max_cpu_nr; i++ )
  66.709 +            if ( xc_set_cpufreq_para(xc_fd, i, SAMPLING_RATE, rate) )
  66.710 +                fprintf(stderr,
  66.711 +                        "[CPU%d] failed to set scaling sampling rate\n", i);
  66.712      }
  66.713 -
  66.714 -    ret = xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, value);
  66.715 -    if ( ret )
  66.716 +    else
  66.717      {
  66.718 -        fprintf(stderr, "[CPU%d] failed to set scaling speed\n", cpuid);
  66.719 +        if ( xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, rate) )
  66.720 +            fprintf(stderr, "failed to set scaling sampling rate\n");
  66.721      }
  66.722 -
  66.723 -    return ret;
  66.724  }
  66.725  
  66.726 -int scaling_sampling_rate_func(int xc_fd, int cpuid, uint32_t value)
  66.727 +void scaling_up_threshold_func(int argc, char *argv[])
  66.728  {
  66.729 -    int ret;
  66.730 +    int cpuid = -1, threshold = -1;
  66.731 +
  66.732 +    if ( (argc >= 2 && (sscanf(argv[1], "%d", &threshold) != 1 ||
  66.733 +                        sscanf(argv[0], "%d", &cpuid) != 1) ) ||
  66.734 +         (argc == 1 && sscanf(argv[0], "%d", &threshold) != 1 ) ||
  66.735 +         argc == 0 )
  66.736 +    {
  66.737 +        fprintf(stderr, "failed to set up scaling threshold\n");
  66.738 +        return ;
  66.739 +    }
  66.740  
  66.741      if ( cpuid < 0 )
  66.742      {
  66.743 -        show_help();
  66.744 -        return -EINVAL;
  66.745 +        int i;
  66.746 +        for ( i = 0; i < max_cpu_nr; i++ )
  66.747 +            if ( xc_set_cpufreq_para(xc_fd, i, UP_THRESHOLD, threshold) )
  66.748 +                fprintf(stderr,
  66.749 +                        "[CPU%d] failed to set up scaling threshold\n", i);
  66.750      }
  66.751 -
  66.752 -    ret = xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, value);
  66.753 -    if ( ret ) 
  66.754 +    else
  66.755      {
  66.756 -        fprintf(stderr, "[CPU%d] failed to set scaling sampling rate\n", cpuid);
  66.757 +        if ( xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, threshold) )
  66.758 +            fprintf(stderr, "failed to set up scaling threshold\n");
  66.759      }
  66.760 -
  66.761 -    return ret;
  66.762  }
  66.763  
  66.764 -int scaling_up_threshold_func(int xc_fd, int cpuid, uint32_t value)
  66.765 +void scaling_governor_func(int argc, char *argv[])
  66.766  {
  66.767 -    int ret;
  66.768 +    int cpuid = -1;
  66.769 +    char *name = NULL;
  66.770 +
  66.771 +    if ( argc >= 2 )
  66.772 +    {
  66.773 +        name = strdup(argv[1]);
  66.774 +        if ( name == NULL )
  66.775 +            goto out;
  66.776 +        if ( sscanf(argv[0], "%d", &cpuid) != 1 )
  66.777 +        {
  66.778 +            free(name);
  66.779 +            goto out;
  66.780 +        }
  66.781 +    }
  66.782 +    else if ( argc > 0 )
  66.783 +    {
  66.784 +        name = strdup(argv[0]);
  66.785 +        if ( name == NULL )
  66.786 +            goto out;
  66.787 +    }
  66.788 +    else
  66.789 +        goto out;
  66.790  
  66.791      if ( cpuid < 0 )
  66.792      {
  66.793 -        show_help();
  66.794 -        return -EINVAL;
  66.795 +        int i;
  66.796 +        for ( i = 0; i < max_cpu_nr; i++ )
  66.797 +            if ( xc_set_cpufreq_gov(xc_fd, i, name) )
  66.798 +                fprintf(stderr, "[CPU%d] failed to set governor name\n", i);
  66.799      }
  66.800 -
  66.801 -    ret = xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, value);
  66.802 -    if ( ret )
  66.803 +    else
  66.804      {
  66.805 -        fprintf(stderr, "[CPU%d] failed to set scaling threshold\n", cpuid);
  66.806 +        if ( xc_set_cpufreq_gov(xc_fd, cpuid, name) )
  66.807 +            fprintf(stderr, "failed to set governor name\n");
  66.808      }
  66.809  
  66.810 -    return ret;
  66.811 -}
  66.812 -
  66.813 -int scaling_governor_func(int xc_fd, int cpuid, char *name)
  66.814 -{
  66.815 -    int ret = 0;
  66.816 -
  66.817 -    if ( cpuid < 0 )
  66.818 -    {
  66.819 -        show_help();
  66.820 -        return -EINVAL;
  66.821 -    }
  66.822 -
  66.823 -    ret = xc_set_cpufreq_gov(xc_fd, cpuid, name);
  66.824 -    if ( ret )
  66.825 -    {
  66.826 -        fprintf(stderr, "failed to set cpufreq governor to %s\n", name);
  66.827 -    }
  66.828 -
  66.829 -    return ret;
  66.830 +    free(name);
  66.831 +    return ;
  66.832 +out:
  66.833 +    fprintf(stderr, "failed to set governor name\n");
  66.834  }
  66.835  
  66.836  struct {
  66.837      const char *name;
  66.838 -    int (*function)(int xc_fd, int cpuid, uint32_t value);
  66.839 +    void (*function)(int argc, char *argv[]);
  66.840  } main_options[] = {
  66.841      { "help", help_func },
  66.842 -    { "get-cpuidle-states", cxstates_func },
  66.843 -    { "get-cpufreq-states", pxstates_func },
  66.844 +    { "get-cpuidle-states", cxstat_func },
  66.845 +    { "get-cpufreq-states", pxstat_func },
  66.846 +    { "start", start_gather_func },
  66.847      { "get-cpufreq-para", cpufreq_para_func },
  66.848      { "set-scaling-maxfreq", scaling_max_freq_func },
  66.849      { "set-scaling-minfreq", scaling_min_freq_func },
  66.850 -    { "set-scaling-governor", NULL },
  66.851 +    { "set-scaling-governor", scaling_governor_func },
  66.852      { "set-scaling-speed", scaling_speed_func },
  66.853      { "set-sampling-rate", scaling_sampling_rate_func },
  66.854      { "set-up-threshold", scaling_up_threshold_func },
  66.855 @@ -541,38 +769,37 @@ struct {
  66.856  
  66.857  int main(int argc, char *argv[])
  66.858  {
  66.859 -    int i, ret = -EINVAL;
  66.860 -    int xc_fd;
  66.861 -    int cpuid = -1;
  66.862 -    uint32_t value = 0;
  66.863 +    int i, ret = 0;
  66.864 +    xc_physinfo_t physinfo = { 0 };
  66.865      int nr_matches = 0;
  66.866      int matches_main_options[ARRAY_SIZE(main_options)];
  66.867  
  66.868      if ( argc < 2 )
  66.869      {
  66.870          show_help();
  66.871 -        return ret;
  66.872 -    }
  66.873 -
  66.874 -    if ( argc > 2 )
  66.875 -    {
  66.876 -        if ( sscanf(argv[2], "%d", &cpuid) != 1 )
  66.877 -            cpuid = -1;
  66.878 +        return 0;
  66.879      }
  66.880  
  66.881      xc_fd = xc_interface_open();
  66.882      if ( xc_fd < 0 )
  66.883      {
  66.884          fprintf(stderr, "failed to get the handler\n");
  66.885 +        return 0;
  66.886      }
  66.887  
  66.888 +    ret = xc_physinfo(xc_fd, &physinfo);
  66.889 +    if ( ret )
  66.890 +    {
  66.891 +        fprintf(stderr, "failed to get the processor information\n");
  66.892 +        xc_interface_close(xc_fd);
  66.893 +        return 0;
  66.894 +    }
  66.895 +    max_cpu_nr = physinfo.nr_cpus;
  66.896 +
  66.897 +    /* calculate how many options match with user's input */
  66.898      for ( i = 0; i < ARRAY_SIZE(main_options); i++ )
  66.899 -    {
  66.900          if ( !strncmp(main_options[i].name, argv[1], strlen(argv[1])) )
  66.901 -        {
  66.902              matches_main_options[nr_matches++] = i;
  66.903 -        }
  66.904 -    }
  66.905  
  66.906      if ( nr_matches > 1 )
  66.907      {
  66.908 @@ -582,27 +809,12 @@ int main(int argc, char *argv[])
  66.909          fprintf(stderr, "\n");
  66.910      }
  66.911      else if ( nr_matches == 1 )
  66.912 -    {
  66.913 -        if ( !strcmp("set-scaling-governor", main_options[matches_main_options[0]].name) )
  66.914 -        {
  66.915 -            char *name = strdup(argv[3]);
  66.916 -            ret = scaling_governor_func(xc_fd, cpuid, name);
  66.917 -            free(name);
  66.918 -        }
  66.919 -        else
  66.920 -        {
  66.921 -            if ( argc > 3 )
  66.922 -            {
  66.923 -                if ( sscanf(argv[3], "%d", &value) != 1 )
  66.924 -                    value = 0;
  66.925 -            }
  66.926 -            ret = main_options[matches_main_options[0]].function(xc_fd, cpuid, value);
  66.927 -        }
  66.928 -    }
  66.929 +        /* dispatch to the corresponding function handler */
  66.930 +        main_options[matches_main_options[0]].function(argc - 2, argv + 2);
  66.931      else
  66.932          show_help();
  66.933  
  66.934      xc_interface_close(xc_fd);
  66.935 -    return ret;
  66.936 +    return 0;
  66.937  }
  66.938  
    67.1 --- a/tools/pygrub/Makefile	Wed Jan 28 12:22:58 2009 +0900
    67.2 +++ b/tools/pygrub/Makefile	Wed Jan 28 13:06:45 2009 +0900
    67.3 @@ -22,4 +22,6 @@ endif
    67.4  
    67.5  .PHONY: clean
    67.6  clean:
    67.7 -	rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out
    67.8 +	rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out $(DEPS)
    67.9 +
   67.10 +-include $(DEPS)
    68.1 --- a/tools/python/Makefile	Wed Jan 28 12:22:58 2009 +0900
    68.2 +++ b/tools/python/Makefile	Wed Jan 28 13:06:45 2009 +0900
    68.3 @@ -85,3 +85,6 @@ test:
    68.4  .PHONY: clean
    68.5  clean:
    68.6  	rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/auxbin.pyc
    68.7 +	rm -f $(DEPS)
    68.8 +
    68.9 +-include $(DEPS)
    69.1 --- a/tools/python/xen/lowlevel/xc/xc.c	Wed Jan 28 12:22:58 2009 +0900
    69.2 +++ b/tools/python/xen/lowlevel/xc/xc.c	Wed Jan 28 13:06:45 2009 +0900
    69.3 @@ -903,26 +903,24 @@ static PyObject *pyxc_hvm_build(XcObject
    69.4      if ( target == -1 )
    69.5          target = memsize;
    69.6  
    69.7 -    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize, target, image) != 0 )
    69.8 +    if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
    69.9 +                                 target, image) != 0 )
   69.10          return pyxc_error_to_exception();
   69.11  
   69.12  #if !defined(__ia64__)
   69.13 -    /* Set up the HVM info table. */
   69.14 +    /* Fix up the HVM info table. */
   69.15      va_map = xc_map_foreign_range(self->xc_handle, dom, XC_PAGE_SIZE,
   69.16                                    PROT_READ | PROT_WRITE,
   69.17                                    HVM_INFO_PFN);
   69.18      if ( va_map == NULL )
   69.19          return PyErr_SetFromErrno(xc_error_obj);
   69.20      va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
   69.21 -    memset(va_hvm, 0, sizeof(*va_hvm));
   69.22 -    strncpy(va_hvm->signature, "HVM INFO", 8);
   69.23 -    va_hvm->length       = sizeof(struct hvm_info_table);
   69.24      va_hvm->acpi_enabled = acpi;
   69.25      va_hvm->apic_mode    = apic;
   69.26      va_hvm->nr_vcpus     = vcpus;
   69.27      for ( i = 0, sum = 0; i < va_hvm->length; i++ )
   69.28          sum += ((uint8_t *)va_hvm)[i];
   69.29 -    va_hvm->checksum = -sum;
   69.30 +    va_hvm->checksum -= sum;
   69.31      munmap(va_map, XC_PAGE_SIZE);
   69.32  #endif
   69.33  
    70.1 --- a/tools/python/xen/util/oshelp.py	Wed Jan 28 12:22:58 2009 +0900
    70.2 +++ b/tools/python/xen/util/oshelp.py	Wed Jan 28 13:06:45 2009 +0900
    70.3 @@ -5,7 +5,7 @@ def fcntl_setfd_cloexec(file, bool):
    70.4          f = fcntl.fcntl(file, fcntl.F_GETFD)
    70.5          if bool: f |= fcntl.FD_CLOEXEC
    70.6          else: f &= ~fcntl.FD_CLOEXEC
    70.7 -        fcntl.fcntl(file, fcntl.F_SETFD)
    70.8 +        fcntl.fcntl(file, fcntl.F_SETFD, f)
    70.9  
   70.10  def waitstatus_description(st):
   70.11          if os.WIFEXITED(st):
    71.1 --- a/tools/python/xen/xend/XendConfig.py	Wed Jan 28 12:22:58 2009 +0900
    71.2 +++ b/tools/python/xen/xend/XendConfig.py	Wed Jan 28 13:06:45 2009 +0900
    71.3 @@ -149,6 +149,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
    71.4      'opengl': int,
    71.5      'soundhw': str,
    71.6      'stdvga': int,
    71.7 +    'videoram': int,
    71.8      'usb': int,
    71.9      'usbdevice': str,
   71.10      'hpet': int,
   71.11 @@ -166,6 +167,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
   71.12      'guest_os_type': str,
   71.13      'hap': int,
   71.14      'xen_extended_power_mgmt': int,
   71.15 +    'pci_msitranslate': int,
   71.16  }
   71.17  
   71.18  # Xen API console 'other_config' keys.
   71.19 @@ -1247,6 +1249,11 @@ class XendConfig(dict):
   71.20                          'PPCI': ppci_uuid,
   71.21                          'hotplug_slot': pci_dev.get('vslot', 0)
   71.22                      }
   71.23 +
   71.24 +                    dpci_opts = pci_dev.get('opts')
   71.25 +                    if dpci_opts and len(dpci_opts) > 0:
   71.26 +                        dpci_record['options'] = dpci_opts
   71.27 +
   71.28                      XendDPCI(dpci_uuid, dpci_record)
   71.29  
   71.30                  target['devices'][pci_devs_uuid] = (dev_type,
   71.31 @@ -1762,6 +1769,11 @@ class XendConfig(dict):
   71.32                          'PPCI': ppci_uuid,
   71.33                          'hotplug_slot': pci_dev.get('vslot', 0)
   71.34                      }
   71.35 +
   71.36 +                    dpci_opts = pci_dev.get('opts')
   71.37 +                    if dpci_opts and len(dpci_opts) > 0:
   71.38 +                        dpci_record['options'] = dpci_opts
   71.39 +
   71.40                      XendDPCI(dpci_uuid, dpci_record)
   71.41  
   71.42                  self['devices'][dev_uuid] = (dev_type,
    72.1 --- a/tools/python/xen/xend/XendDPCI.py	Wed Jan 28 12:22:58 2009 +0900
    72.2 +++ b/tools/python/xen/xend/XendDPCI.py	Wed Jan 28 13:06:45 2009 +0900
    72.3 @@ -41,7 +41,8 @@ class XendDPCI(XendBase):
    72.4                    'virtual_name',
    72.5                    'VM',
    72.6                    'PPCI',
    72.7 -                  'hotplug_slot']
    72.8 +                  'hotplug_slot',
    72.9 +                  'options']
   72.10          return XendBase.getAttrRO() + attrRO
   72.11  
   72.12      def getAttrRW(self):
   72.13 @@ -119,6 +120,8 @@ class XendDPCI(XendBase):
   72.14          self.VM = record['VM']
   72.15          self.PPCI = record['PPCI']
   72.16          self.hotplug_slot = record['hotplug_slot']
   72.17 +        if 'options' in record.keys():
   72.18 +            self.options = record['options']
   72.19  
   72.20      def destroy(self):
   72.21          xendom = XendDomain.instance()
   72.22 @@ -152,3 +155,5 @@ class XendDPCI(XendBase):
   72.23      def get_hotplug_slot(self):
   72.24          return self.hotplug_slot
   72.25  
   72.26 +    def get_options(self):
   72.27 +        return self.options
    73.1 --- a/tools/python/xen/xend/XendDomain.py	Wed Jan 28 12:22:58 2009 +0900
    73.2 +++ b/tools/python/xen/xend/XendDomain.py	Wed Jan 28 13:06:45 2009 +0900
    73.3 @@ -423,7 +423,7 @@ class XendDomain:
    73.4                      log.exception("Unable to recreate domain")
    73.5                      try:
    73.6                          xc.domain_pause(domid)
    73.7 -                        do_FLR(domid)
    73.8 +                        XendDomainInfo.do_FLR(domid)
    73.9                          xc.domain_destroy(domid)
   73.10                      except:
   73.11                          log.exception("Hard destruction of domain failed: %d" %
   73.12 @@ -1264,7 +1264,7 @@ class XendDomain:
   73.13          else:
   73.14              try:
   73.15                  xc.domain_pause(int(domid))
   73.16 -                do_FLR(int(domid))
   73.17 +                XendDomainInfo.do_FLR(int(domid))
   73.18                  val = xc.domain_destroy(int(domid))
   73.19              except ValueError:
   73.20                  raise XendInvalidDomain(domid)
    74.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Wed Jan 28 12:22:58 2009 +0900
    74.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Wed Jan 28 13:06:45 2009 +0900
    74.3 @@ -696,10 +696,17 @@ class XendDomainInfo:
    74.4                      " assigned to other domain.' \
    74.5                      )% (pci_device.name, self.domid, pci_str))
    74.6  
    74.7 -        bdf_str = "%s:%s:%s.%s@%s" % (new_dev['domain'],
    74.8 +        opts = ''
    74.9 +        if 'opts' in new_dev and len(new_dev['opts']) > 0:
   74.10 +            config_opts = new_dev['opts']
   74.11 +            config_opts = map(lambda (x, y): x+'='+y, config_opts)
   74.12 +            opts = ',' + reduce(lambda x, y: x+','+y, config_opts)
   74.13 +
   74.14 +        bdf_str = "%s:%s:%s.%s%s@%s" % (new_dev['domain'],
   74.15                  new_dev['bus'],
   74.16                  new_dev['slot'],
   74.17                  new_dev['func'],
   74.18 +                opts,
   74.19                  new_dev['vslt'])
   74.20          self.image.signalDeviceModel('pci-ins', 'pci-inserted', bdf_str)
   74.21  
   74.22 @@ -1192,7 +1199,7 @@ class XendDomainInfo:
   74.23  
   74.24          if self.domid >= 0:
   74.25              if target > memory_cur:
   74.26 -                balloon.free( (target-memory_cur)*1024 )
   74.27 +                balloon.free((target - memory_cur) * 1024, self)
   74.28              self.storeVm("memory", target)
   74.29              self.storeDom("memory/target", target << 10)
   74.30              xc.domain_set_target_mem(self.domid,
   74.31 @@ -2234,7 +2241,11 @@ class XendDomainInfo:
   74.32          xc.domain_max_vcpus(self.domid, int(self.info['VCPUs_max']))
   74.33  
   74.34          # Test whether the devices can be assigned with VT-d
   74.35 -        pci_str = str(self.info["platform"].get("pci"))
   74.36 +        pci = self.info["platform"].get("pci")
   74.37 +        pci_str = ''
   74.38 +        if pci and len(pci) > 0:
   74.39 +            pci = map(lambda x: x[0:4], pci)  # strip options 
   74.40 +            pci_str = str(pci)
   74.41          if hvm and pci_str:
   74.42              bdf = xc.test_assign_device(self.domid, pci_str)
   74.43              if bdf != 0:
   74.44 @@ -3527,6 +3538,11 @@ class XendDomainInfo:
   74.45  
   74.46          dpci_uuid = uuid.createString()
   74.47  
   74.48 +        dpci_opts = []
   74.49 +        opts_dict = xenapi_pci.get('options')
   74.50 +        for k in opts_dict.keys():
   74.51 +            dpci_opts.append([k, opts_dict[k]])
   74.52 +
   74.53          # Convert xenapi to sxp
   74.54          ppci = XendAPIStore.get(xenapi_pci.get('PPCI'), 'PPCI')
   74.55  
   74.56 @@ -3538,6 +3554,7 @@ class XendDomainInfo:
   74.57                      ['slot', '0x%02x' % ppci.get_slot()],
   74.58                      ['func', '0x%1x' % ppci.get_func()],
   74.59                      ['vslt', '0x%02x' % xenapi_pci.get('hotplug_slot')],
   74.60 +                    ['opts', dpci_opts],
   74.61                      ['uuid', dpci_uuid]
   74.62                  ],
   74.63                  ['state', 'Initialising']
    75.1 --- a/tools/python/xen/xend/balloon.py	Wed Jan 28 12:22:58 2009 +0900
    75.2 +++ b/tools/python/xen/xend/balloon.py	Wed Jan 28 13:06:45 2009 +0900
    75.3 @@ -67,7 +67,7 @@ def get_dom0_target_alloc():
    75.4          raise VmError('Failed to query target memory allocation of dom0.')
    75.5      return kb
    75.6  
    75.7 -def free(need_mem ,self):
    75.8 +def free(need_mem, dominfo):
    75.9      """Balloon out memory from the privileged domain so that there is the
   75.10      specified required amount (in KiB) free.
   75.11      """
   75.12 @@ -130,7 +130,7 @@ def free(need_mem ,self):
   75.13          if physinfo['nr_nodes'] > 1 and retries == 0:
   75.14              oldnode = -1
   75.15              waitscrub = 1
   75.16 -            vcpus = self.info['cpus'][0]
   75.17 +            vcpus = dominfo.info['cpus'][0]
   75.18              for vcpu in vcpus:
   75.19                  nodenum = 0
   75.20                  for node in physinfo['node_to_cpu']:
    76.1 --- a/tools/python/xen/xend/image.py	Wed Jan 28 12:22:58 2009 +0900
    76.2 +++ b/tools/python/xen/xend/image.py	Wed Jan 28 13:06:45 2009 +0900
    76.3 @@ -265,6 +265,10 @@ class ImageHandler:
    76.4              ret.append('-nographic')
    76.5              return ret
    76.6  
    76.7 +        vram = str(vmConfig['platform'].get('videoram',4))
    76.8 +        ret.append('-videoram')
    76.9 +        ret.append(vram)
   76.10 +
   76.11          vnc_config = {}
   76.12          has_vnc = int(vmConfig['platform'].get('vnc', 0)) != 0
   76.13          has_sdl = int(vmConfig['platform'].get('sdl', 0)) != 0
   76.14 @@ -833,6 +837,7 @@ class IA64_HVM_ImageHandler(HVMImageHand
   76.15      def configure(self, vmConfig):
   76.16          HVMImageHandler.configure(self, vmConfig)
   76.17          self.vhpt = int(vmConfig['platform'].get('vhpt',  0))
   76.18 +        self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024
   76.19  
   76.20      def buildDomain(self):
   76.21          xc.nvram_init(self.vm.getName(), self.vm.getDomid())
   76.22 @@ -847,8 +852,8 @@ class IA64_HVM_ImageHandler(HVMImageHand
   76.23          # buffer io page, buffer pio page and memmap info page
   76.24          extra_pages = 1024 + 5
   76.25          mem_kb += extra_pages * page_kb
   76.26 -        # Add 8 MiB overhead for QEMU's video RAM.
   76.27 -        return mem_kb + 8192
   76.28 +        mem_kb += self.vramsize
   76.29 +        return mem_kb
   76.30  
   76.31      def getRequiredInitialReservation(self):
   76.32          return self.vm.getMemoryTarget()
   76.33 @@ -882,6 +887,7 @@ class X86_HVM_ImageHandler(HVMImageHandl
   76.34      def configure(self, vmConfig):
   76.35          HVMImageHandler.configure(self, vmConfig)
   76.36          self.pae = int(vmConfig['platform'].get('pae',  0))
   76.37 +        self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024
   76.38  
   76.39      def buildDomain(self):
   76.40          xc.hvm_set_param(self.vm.getDomid(), HVM_PARAM_PAE_ENABLED, self.pae)
   76.41 @@ -890,8 +896,7 @@ class X86_HVM_ImageHandler(HVMImageHandl
   76.42          return rc
   76.43  
   76.44      def getRequiredAvailableMemory(self, mem_kb):
   76.45 -        # Add 8 MiB overhead for QEMU's video RAM.
   76.46 -        return mem_kb + 8192
   76.47 +        return mem_kb + self.vramsize
   76.48  
   76.49      def getRequiredInitialReservation(self):
   76.50          return self.vm.getMemoryTarget()
    77.1 --- a/tools/python/xen/xend/server/pciif.py	Wed Jan 28 12:22:58 2009 +0900
    77.2 +++ b/tools/python/xen/xend/server/pciif.py	Wed Jan 28 13:06:45 2009 +0900
    77.3 @@ -75,6 +75,12 @@ class PciController(DevController):
    77.4              slot = parse_hex(pci_config.get('slot', 0))
    77.5              func = parse_hex(pci_config.get('func', 0))            
    77.6  
    77.7 +            opts = pci_config.get('opts', '')
    77.8 +            if len(opts) > 0:
    77.9 +                opts = map(lambda (x, y): x+'='+y, opts)
   77.10 +                opts = reduce(lambda x, y: x+','+y, opts)
   77.11 +                back['opts-%i' % pcidevid] = opts
   77.12 +
   77.13              vslt = pci_config.get('vslt')
   77.14              if vslt is not None:
   77.15                  vslots = vslots + vslt + ";"
   77.16 @@ -89,6 +95,9 @@ class PciController(DevController):
   77.17  
   77.18          back['num_devs']=str(pcidevid)
   77.19          back['uuid'] = config.get('uuid','')
   77.20 +        if 'pci_msitranslate' in self.vm.info['platform']:
   77.21 +            back['msitranslate']=str(self.vm.info['platform']['pci_msitranslate'])
   77.22 +
   77.23          return (0, back, {})
   77.24  
   77.25  
   77.26 @@ -108,6 +117,9 @@ class PciController(DevController):
   77.27                  dev = back['dev-%i' % i]
   77.28                  state = states[i]
   77.29                  uuid = back['uuid-%i' %i]
   77.30 +                opts = ''
   77.31 +                if 'opts-%i' % i in back:
   77.32 +                    opts = back['opts-%i' % i]
   77.33              except:
   77.34                  raise XendError('Error reading config')
   77.35  
   77.36 @@ -129,6 +141,8 @@ class PciController(DevController):
   77.37                  self.writeBackend(devid, 'state-%i' % (num_olddevs + i),
   77.38                                    str(xenbusState['Initialising']))
   77.39                  self.writeBackend(devid, 'uuid-%i' % (num_olddevs + i), uuid)
   77.40 +                if len(opts) > 0:
   77.41 +                    self.writeBackend(devid, 'opts-%i' % (num_olddevs + i), opts)
   77.42                  self.writeBackend(devid, 'num_devs', str(num_olddevs + i + 1))
   77.43  
   77.44                  # Update vslots
   77.45 @@ -540,6 +554,9 @@ class PciController(DevController):
   77.46                  self.removeBackend(devid, 'vdev-%i' % i)
   77.47                  self.removeBackend(devid, 'state-%i' % i)
   77.48                  self.removeBackend(devid, 'uuid-%i' % i)
   77.49 +                tmpopts = self.readBackend(devid, 'opts-%i' % i)
   77.50 +                if tmpopts is not None:
   77.51 +                    self.removeBackend(devid, 'opts-%i' % i)
   77.52              else:
   77.53                  if new_num_devs != i:
   77.54                      tmpdev = self.readBackend(devid, 'dev-%i' % i)
   77.55 @@ -556,6 +573,9 @@ class PciController(DevController):
   77.56                      tmpuuid = self.readBackend(devid, 'uuid-%i' % i)
   77.57                      self.writeBackend(devid, 'uuid-%i' % new_num_devs, tmpuuid)
   77.58                      self.removeBackend(devid, 'uuid-%i' % i)
   77.59 +                    tmpopts = self.readBackend(devid, 'opts-%i' % i)
   77.60 +                    if tmpopts is not None:
   77.61 +                        self.removeBackend(devid, 'opts-%i' % i)
   77.62                  new_num_devs = new_num_devs + 1
   77.63  
   77.64          self.writeBackend(devid, 'num_devs', str(new_num_devs))
    78.1 --- a/tools/python/xen/xend/server/relocate.py	Wed Jan 28 12:22:58 2009 +0900
    78.2 +++ b/tools/python/xen/xend/server/relocate.py	Wed Jan 28 13:06:45 2009 +0900
    78.3 @@ -122,6 +122,8 @@ class RelocationProtocol(protocol.Protoc
    78.4          if self.transport:
    78.5              self.send_reply(["ready", name])
    78.6              p2cread, p2cwrite = os.pipe()
    78.7 +            from xen.util import oshelp
    78.8 +            oshelp.fcntl_setfd_cloexec(p2cwrite, True)
    78.9              threading.Thread(target=connection.SSLSocketServerConnection.recv2fd,
   78.10                               args=(self.transport.sock, p2cwrite)).start()
   78.11              try:
    79.1 --- a/tools/python/xen/xm/create.dtd	Wed Jan 28 12:22:58 2009 +0900
    79.2 +++ b/tools/python/xen/xm/create.dtd	Wed Jan 28 13:06:45 2009 +0900
    79.3 @@ -82,11 +82,12 @@
    79.4  <!ELEMENT vtpm   (name*)>
    79.5  <!ATTLIST vtpm   backend         CDATA #REQUIRED>
    79.6  
    79.7 -<!ELEMENT pci    EMPTY>
    79.8 +<!ELEMENT pci    (pci_opt*)>
    79.9  <!ATTLIST pci    domain          CDATA #REQUIRED
   79.10                   bus             CDATA #REQUIRED
   79.11                   slot            CDATA #REQUIRED
   79.12                   func            CDATA #REQUIRED
   79.13 +                 opts_str        CDATA #IMPLIED
   79.14                   vslt            CDATA #IMPLIED>
   79.15  
   79.16  <!ELEMENT vscsi  EMPTY>
   79.17 @@ -138,6 +139,10 @@
   79.18  <!ATTLIST vcpu_param key   CDATA #REQUIRED
   79.19                       value CDATA #REQUIRED>
   79.20  
   79.21 +<!ELEMENT pci_opt    EMPTY>
   79.22 +<!ATTLIST pci_opt    key   CDATA #REQUIRED
   79.23 +                     value CDATA #REQUIRED>
   79.24 +
   79.25  <!ELEMENT other_config EMPTY>
   79.26  <!ATTLIST other_config key   CDATA #REQUIRED
   79.27                         value CDATA #REQUIRED>
    80.1 --- a/tools/python/xen/xm/create.py	Wed Jan 28 12:22:58 2009 +0900
    80.2 +++ b/tools/python/xen/xm/create.py	Wed Jan 28 13:06:45 2009 +0900
    80.3 @@ -318,11 +318,14 @@ gopts.var('disk', val='phy:DEV,VDEV,MODE
    80.4            backend driver domain to use for the disk.
    80.5            The option may be repeated to add more than one disk.""")
    80.6  
    80.7 -gopts.var('pci', val='BUS:DEV.FUNC',
    80.8 +gopts.var('pci', val='BUS:DEV.FUNC[,msitranslate=0|1]',
    80.9            fn=append_value, default=[],
   80.10            use="""Add a PCI device to a domain, using given params (in hex).
   80.11 -         For example 'pci=c0:02.1'.
   80.12 -         The option may be repeated to add more than one pci device.""")
   80.13 +          For example 'pci=c0:02.1'.
   80.14 +          If msitranslate is set, MSI-INTx translation is enabled if possible.
   80.15 +          Guest that doesn't support MSI will get IO-APIC type IRQs
   80.16 +          translated from physical MSI, HVM only. Default is 1.
   80.17 +          The option may be repeated to add more than one pci device.""")
   80.18  
   80.19  gopts.var('vscsi', val='PDEV,VDEV[,DOM]',
   80.20            fn=append_value, default=[],
   80.21 @@ -523,9 +526,9 @@ gopts.var('vncunused', val='',
   80.22            use="""Try to find an unused port for the VNC server.
   80.23            Only valid when vnc=1.""")
   80.24  
   80.25 -gopts.var('videoram', val='',
   80.26 -          fn=set_value, default=None,
   80.27 -          use="""Maximum amount of videoram PV guest can allocate
   80.28 +gopts.var('videoram', val='MEMORY',
   80.29 +          fn=set_int, default=4,
   80.30 +          use="""Maximum amount of videoram a guest can allocate
   80.31            for frame buffer.""")
   80.32  
   80.33  gopts.var('sdl', val='',
   80.34 @@ -588,6 +591,11 @@ gopts.var('suppress_spurious_page_faults
   80.35            fn=set_bool, default=None,
   80.36            use="""Do not inject spurious page faults into this guest""")
   80.37  
   80.38 +gopts.var('pci_msitranslate', val='TRANSLATE',
   80.39 +          fn=set_int, default=1,
   80.40 +          use="""Global PCI MSI-INTx translation flag (0=disable;
   80.41 +          1=enable.""")
   80.42 +
   80.43  def err(msg):
   80.44      """Print an error to stderr and exit.
   80.45      """
   80.46 @@ -667,9 +675,23 @@ def configure_pci(config_devs, vals):
   80.47      """Create the config for pci devices.
   80.48      """
   80.49      config_pci = []
   80.50 -    for (domain, bus, slot, func) in vals.pci:
   80.51 -        config_pci.append(['dev', ['domain', domain], ['bus', bus], \
   80.52 -                        ['slot', slot], ['func', func]])
   80.53 +    for (domain, bus, slot, func, opts) in vals.pci:
   80.54 +        config_pci_opts = []
   80.55 +        d = comma_sep_kv_to_dict(opts)
   80.56 +
   80.57 +        def f(k):
   80.58 +            if k not in ['msitranslate']:
   80.59 +                err('Invalid pci option: ' + k)
   80.60 +
   80.61 +            config_pci_opts.append([k, d[k]])
   80.62 +
   80.63 +        config_pci_bdf = ['dev', ['domain', domain], ['bus', bus], \
   80.64 +                          ['slot', slot], ['func', func]]
   80.65 +        map(f, d.keys())
   80.66 +        if len(config_pci_opts)>0:
   80.67 +            config_pci_bdf.append(['opts', config_pci_opts])
   80.68 +
   80.69 +        config_pci.append(config_pci_bdf)
   80.70  
   80.71      if len(config_pci)>0:
   80.72          config_pci.insert(0, 'pci')
   80.73 @@ -862,12 +884,12 @@ def configure_hvm(config_image, vals):
   80.74      """Create the config for HVM devices.
   80.75      """
   80.76      args = [ 'device_model', 'pae', 'vcpus', 'boot', 'fda', 'fdb', 'timer_mode',
   80.77 -             'localtime', 'serial', 'stdvga', 'isa', 'nographic', 'soundhw',
   80.78 +             'localtime', 'serial', 'stdvga', 'videoram', 'isa', 'nographic', 'soundhw',
   80.79               'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
   80.80               'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',
   80.81               'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci', 'hpet',
   80.82               'guest_os_type', 'hap', 'opengl', 'cpuid', 'cpuid_check',
   80.83 -             'viridian', 'xen_extended_power_mgmt' ]
   80.84 +             'viridian', 'xen_extended_power_mgmt', 'pci_msitranslate' ]
   80.85  
   80.86      for a in args:
   80.87          if a in vals.__dict__ and vals.__dict__[a] is not None:
   80.88 @@ -991,14 +1013,18 @@ def preprocess_pci(vals):
   80.89          pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + \
   80.90                  r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + \
   80.91                  r"(?P<slot>[0-9a-fA-F]{1,2})[.,]" + \
   80.92 -                r"(?P<func>[0-7])$", pci_dev_str)
   80.93 +                r"(?P<func>[0-7])" + \
   80.94 +                r"(,(?P<opts>.*))?$", pci_dev_str)
   80.95          if pci_match!=None:
   80.96 -            pci_dev_info = pci_match.groupdict('0')
   80.97 +            pci_dev_info = pci_match.groupdict('')
   80.98 +            if pci_dev_info['domain']=='':
   80.99 +                pci_dev_info['domain']='0'
  80.100              try:
  80.101                  pci.append( ('0x'+pci_dev_info['domain'], \
  80.102                          '0x'+pci_dev_info['bus'], \
  80.103                          '0x'+pci_dev_info['slot'], \
  80.104 -                        '0x'+pci_dev_info['func']))
  80.105 +                        '0x'+pci_dev_info['func'], \
  80.106 +                        pci_dev_info['opts']))
  80.107              except IndexError:
  80.108                  err('Error in PCI slot syntax "%s"'%(pci_dev_str))
  80.109      vals.pci = pci
    81.1 --- a/tools/python/xen/xm/main.py	Wed Jan 28 12:22:58 2009 +0900
    81.2 +++ b/tools/python/xen/xm/main.py	Wed Jan 28 13:06:45 2009 +0900
    81.3 @@ -187,7 +187,7 @@ SUBCOMMAND_HELP = {
    81.4      'vnet-delete'   :  ('<VnetId>', 'Delete a Vnet.'),
    81.5      'vnet-list'     :  ('[-l|--long]', 'List Vnets.'),
    81.6      'vtpm-list'     :  ('<Domain> [--long]', 'List virtual TPM devices.'),
    81.7 -    'pci-attach'    :  ('<Domain> <domain:bus:slot.func> [virtual slot]',
    81.8 +    'pci-attach'    :  ('[-o|--options=<opt>] <Domain> <domain:bus:slot.func> [virtual slot]',
    81.9                          'Insert a new pass-through pci device.'),
   81.10      'pci-detach'    :  ('<Domain> <domain:bus:slot.func>',
   81.11                          'Remove a domain\'s pass-through pci device.'),
   81.12 @@ -2428,7 +2428,7 @@ def xm_network_attach(args):
   81.13              vif.append(vif_param)
   81.14          server.xend.domain.device_create(dom, vif)
   81.15  
   81.16 -def parse_pci_configuration(args, state):
   81.17 +def parse_pci_configuration(args, state, opts = ''):
   81.18      dom = args[0]
   81.19      pci_dev_str = args[1]
   81.20      if len(args) == 3:
   81.21 @@ -2443,12 +2443,17 @@ def parse_pci_configuration(args, state)
   81.22      if pci_match == None:
   81.23          raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt))
   81.24      pci_dev_info = pci_match.groupdict('0')
   81.25 +
   81.26      try:
   81.27 -        pci.append(['dev', ['domain', '0x'+ pci_dev_info['domain']], \
   81.28 +        pci_bdf =['dev', ['domain', '0x'+ pci_dev_info['domain']], \
   81.29                  ['bus', '0x'+ pci_dev_info['bus']],
   81.30                  ['slot', '0x'+ pci_dev_info['slot']],
   81.31                  ['func', '0x'+ pci_dev_info['func']],
   81.32 -                ['vslt', '0x%x' % int(vslt, 16)]])
   81.33 +                ['vslt', '0x%x' % int(vslt, 16)]]
   81.34 +        if len(opts) > 0:
   81.35 +            pci_bdf.append(['opts', opts])
   81.36 +        pci.append(pci_bdf)
   81.37 +
   81.38      except:
   81.39          raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt))
   81.40      pci.append(['state', state])
   81.41 @@ -2456,8 +2461,22 @@ def parse_pci_configuration(args, state)
   81.42      return (dom, pci)
   81.43  
   81.44  def xm_pci_attach(args):
   81.45 -    arg_check(args, 'pci-attach', 2, 3)
   81.46 -    (dom, pci) = parse_pci_configuration(args, 'Initialising')
   81.47 +    config_pci_opts = []
   81.48 +    (options, params) = getopt.gnu_getopt(args, 'o:', ['options='])
   81.49 +    for (k, v) in options:
   81.50 +        if k in ('-o', '--options'):
   81.51 +            if len(v.split('=')) != 2:
   81.52 +                err("Invalid pci attach option: %s" % v)
   81.53 +                usage('pci-attach')
   81.54 +            config_pci_opts.append(v.split('='))
   81.55 +
   81.56 +    n = len([i for i in params if i != '--'])
   81.57 +    if n < 2 or n > 3:
   81.58 +        err("Invalid argument for 'xm pci-attach'")
   81.59 +        usage('pci-attach')
   81.60 +
   81.61 +    (dom, pci) = parse_pci_configuration(params, 'Initialising',
   81.62 +                     config_pci_opts)
   81.63  
   81.64      if serverType == SERVER_XEN_API:
   81.65  
   81.66 @@ -2480,7 +2499,8 @@ def xm_pci_attach(args):
   81.67          dpci_record = {
   81.68              "VM":           get_single_vm(dom),
   81.69              "PPCI":         target_ref,
   81.70 -            "hotplug_slot": vslt
   81.71 +            "hotplug_slot": vslt,
   81.72 +            "options":      dict(config_pci_opts)
   81.73          }
   81.74          server.xenapi.DPCI.create(dpci_record)
   81.75  
    82.1 --- a/tools/python/xen/xm/xenapi_create.py	Wed Jan 28 12:22:58 2009 +0900
    82.2 +++ b/tools/python/xen/xm/xenapi_create.py	Wed Jan 28 13:06:45 2009 +0900
    82.3 @@ -533,7 +533,10 @@ class xenapi_create:
    82.4              "PPCI":
    82.5                  target_ref,
    82.6              "hotplug_slot":
    82.7 -                int(pci.attributes["func"].value, 16)
    82.8 +                int(pci.attributes["func"].value, 16),
    82.9 +            "options":
   82.10 +                get_child_nodes_as_dict(pci,
   82.11 +                  "pci_opt", "key", "value")
   82.12          }
   82.13  
   82.14          return server.xenapi.DPCI.create(dpci_record)
   82.15 @@ -931,6 +934,12 @@ class sxp2xml:
   82.16                      = get_child_by_name(dev_sxp, "func", "0")
   82.17                  pci.attributes["vslt"] \
   82.18                      = get_child_by_name(dev_sxp, "vslt", "0")
   82.19 +                for opt in get_child_by_name(dev_sxp, "opts", ""):
   82.20 +                    if len(opt) > 0:
   82.21 +                        pci_opt = document.createElement("pci_opt")
   82.22 +                        pci_opt.attributes["key"] = opt[0]
   82.23 +                        pci_opt.attributes["value"] = opt[1]
   82.24 +                        pci.appendChild(pci_opt)
   82.25  
   82.26                  pcis.append(pci)
   82.27  
   82.28 @@ -1032,6 +1041,7 @@ class sxp2xml:
   82.29              'vhpt',
   82.30              'guest_os_type',
   82.31              'hap',
   82.32 +            'pci_msitranslate',
   82.33          ]
   82.34  
   82.35          platform_configs = []
    83.1 --- a/tools/tests/blowfish.mk	Wed Jan 28 12:22:58 2009 +0900
    83.2 +++ b/tools/tests/blowfish.mk	Wed Jan 28 13:06:45 2009 +0900
    83.3 @@ -1,13 +1,13 @@
    83.4  
    83.5  override XEN_TARGET_ARCH = x86_32
    83.6  XEN_ROOT = ../..
    83.7 -CFLAGS :=
    83.8 +CFLAGS =
    83.9  include $(XEN_ROOT)/tools/Rules.mk
   83.10  
   83.11  # Disable PIE/SSP if GCC supports them. They can break us.
   83.12 -CFLAGS += $(call cc-option,$(CC),-nopie,)
   83.13 -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
   83.14 -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
   83.15 +$(call cc-option-add,CFLAGS,CC,-nopie)
   83.16 +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
   83.17 +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
   83.18  
   83.19  CFLAGS += -fno-builtin -msoft-float
   83.20  
    84.1 --- a/tools/vnet/libxutil/Makefile	Wed Jan 28 12:22:58 2009 +0900
    84.2 +++ b/tools/vnet/libxutil/Makefile	Wed Jan 28 13:06:45 2009 +0900
    84.3 @@ -24,14 +24,11 @@ LIB_SRCS += util.c
    84.4  LIB_OBJS := $(LIB_SRCS:.c=.o)
    84.5  PIC_OBJS := $(LIB_SRCS:.c=.opic)
    84.6  
    84.7 -CFLAGS   += -Werror -fno-strict-aliasing $(call cc-option,$(CC),-fgnu89-inline,)
    84.8 +$(call cc-option-add,CFLAGS,CC,-fgnu89-inline)
    84.9 +CFLAGS   += -Werror -fno-strict-aliasing
   84.10  CFLAGS   += -O3
   84.11  #CFLAGS   += -g
   84.12  
   84.13 -# Get gcc to generate the dependencies for us.
   84.14 -CFLAGS   += -Wp,-MD,.$(@F).d
   84.15 -DEPS     = .*.d
   84.16 -
   84.17  MAJOR    := 3.0
   84.18  MINOR    := 0
   84.19  LIB      := libxutil.so 
    85.1 --- a/tools/vtpm/Makefile	Wed Jan 28 12:22:58 2009 +0900
    85.2 +++ b/tools/vtpm/Makefile	Wed Jan 28 13:06:45 2009 +0900
    85.3 @@ -89,6 +89,6 @@ build_sub:
    85.4  			$(MAKE) -C $(TPM_EMULATOR_DIR); \
    85.5  		fi \
    85.6  	else \
    85.7 -		echo "*** Unable to build VTPMs. libgmp could not be found."; \
    85.8 +		echo "=== Unable to build VTPMs. libgmp could not be found."; \
    85.9  	fi
   85.10  
    86.1 --- a/tools/vtpm/Rules.mk	Wed Jan 28 12:22:58 2009 +0900
    86.2 +++ b/tools/vtpm/Rules.mk	Wed Jan 28 13:06:45 2009 +0900
    86.3 @@ -11,11 +11,6 @@ TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
    86.4  # General compiler flags
    86.5  CFLAGS   = -Werror -g3 -I.
    86.6  
    86.7 -# For generating dependencies
    86.8 -CFLAGS	+= -Wp,-MD,.$(@F).d
    86.9 -
   86.10 -DEP_FILES	= .*.d
   86.11 -
   86.12  # Generic project files
   86.13  HDRS	= $(wildcard *.h)
   86.14  SRCS	= $(wildcard *.c)
   86.15 @@ -26,7 +21,7 @@ OBJS	= $(patsubst %.c,%.o,$(SRCS))
   86.16  
   86.17  $(OBJS): $(SRCS)
   86.18  
   86.19 --include $(DEP_FILES)
   86.20 +-include $(DEPS)
   86.21  
   86.22  BUILD_EMULATOR = y
   86.23  
    87.1 --- a/tools/vtpm_manager/Rules.mk	Wed Jan 28 12:22:58 2009 +0900
    87.2 +++ b/tools/vtpm_manager/Rules.mk	Wed Jan 28 13:06:45 2009 +0900
    87.3 @@ -11,11 +11,6 @@ TOOLS_INSTALL_DIR = $(DESTDIR)/usr/bin
    87.4  # General compiler flags
    87.5  CFLAGS	= -Werror -g3 -I.
    87.6  
    87.7 -# For generating dependencies
    87.8 -CFLAGS	+= -Wp,-MD,.$(@F).d
    87.9 -
   87.10 -DEP_FILES	= .*.d
   87.11 -
   87.12  # Generic project files
   87.13  HDRS	= $(wildcard *.h)
   87.14  SRCS	= $(wildcard *.c)
   87.15 @@ -26,7 +21,7 @@ OBJS	= $(patsubst %.c,%.o,$(SRCS))
   87.16  
   87.17  $(OBJS): $(SRCS)
   87.18  
   87.19 --include $(DEP_FILES)
   87.20 +-include $(FILES)
   87.21  
   87.22  # Make sure these are just rules
   87.23  .PHONY : all build install clean
    88.1 --- a/tools/xcutils/Makefile	Wed Jan 28 12:22:58 2009 +0900
    88.2 +++ b/tools/xcutils/Makefile	Wed Jan 28 13:06:45 2009 +0900
    88.3 @@ -14,10 +14,6 @@ include $(XEN_ROOT)/tools/Rules.mk
    88.4  CFLAGS += -Werror
    88.5  CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) $(CFLAGS_libxenstore)
    88.6  
    88.7 -# Make gcc generate dependencies.
    88.8 -CFLAGS += -Wp,-MD,.$(@F).d
    88.9 -PROG_DEP = .*.d
   88.10 -
   88.11  PROGRAMS = xc_restore xc_save readnotes lsevtchn
   88.12  
   88.13  LDLIBS   = $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenguest) $(LDFLAGS_libxenstore)
   88.14 @@ -40,6 +36,6 @@ install: build
   88.15  .PHONY: clean
   88.16  clean:
   88.17  	$(RM) *.o $(PROGRAMS)
   88.18 -	$(RM) $(PROG_DEP)
   88.19 +	$(RM) $(DEPS)
   88.20  
   88.21 --include $(PROG_DEP)
   88.22 +-include $(DEPS)
    89.1 --- a/tools/xcutils/readnotes.c	Wed Jan 28 12:22:58 2009 +0900
    89.2 +++ b/tools/xcutils/readnotes.c	Wed Jan 28 13:06:45 2009 +0900
    89.3 @@ -13,7 +13,7 @@
    89.4  #include <xg_private.h>
    89.5  #include <xc_dom.h> /* gunzip bits */
    89.6  
    89.7 -#include <xen/libelf.h>
    89.8 +#include <xen/libelf/libelf.h>
    89.9  
   89.10  static void print_string_note(const char *prefix, struct elf_binary *elf,
   89.11  			      const elf_note *note)
    90.1 --- a/tools/xenmon/Makefile	Wed Jan 28 12:22:58 2009 +0900
    90.2 +++ b/tools/xenmon/Makefile	Wed Jan 28 13:06:45 2009 +0900
    90.3 @@ -38,10 +38,12 @@ install: build
    90.4  
    90.5  .PHONY: clean
    90.6  clean:
    90.7 -	rm -f $(BIN)
    90.8 +	rm -f $(BIN) $(DEPS)
    90.9  
   90.10  
   90.11  %: %.c Makefile
   90.12  	$(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
   90.13  xentrace_%: %.c Makefile
   90.14  	$(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
   90.15 +
   90.16 +-include $(DEPS)
    91.1 --- a/tools/xenpmd/Makefile	Wed Jan 28 12:22:58 2009 +0900
    91.2 +++ b/tools/xenpmd/Makefile	Wed Jan 28 13:06:45 2009 +0900
    91.3 @@ -17,4 +17,6 @@ install: all
    91.4  
    91.5  .PHONY: clean
    91.6  clean:
    91.7 -	$(RM) -f $(BIN)
    91.8 +	$(RM) -f $(BIN) $(DEPS)
    91.9 +
   91.10 +-include $(DEPS)
    92.1 --- a/tools/xenstat/libxenstat/Makefile	Wed Jan 28 12:22:58 2009 +0900
    92.2 +++ b/tools/xenstat/libxenstat/Makefile	Wed Jan 28 13:06:45 2009 +0900
    92.3 @@ -155,4 +155,6 @@ endif
    92.4  .PHONY: clean
    92.5  clean:
    92.6  	rm -f $(LIB) $(SHLIB) $(SHLIB_LINKS) $(OBJECTS-y) \
    92.7 -	      $(BINDINGS) $(BINDINGSRC)
    92.8 +	      $(BINDINGS) $(BINDINGSRC) $(DEPS)
    92.9 +
   92.10 +-include $(DEPS)
    93.1 --- a/tools/xenstat/xentop/Makefile	Wed Jan 28 12:22:58 2009 +0900
    93.2 +++ b/tools/xenstat/xentop/Makefile	Wed Jan 28 13:06:45 2009 +0900
    93.3 @@ -37,4 +37,6 @@ endif
    93.4  
    93.5  .PHONY: clean
    93.6  clean:
    93.7 -	rm -f xentop xentop.o
    93.8 +	rm -f xentop xentop.o $(DEPS)
    93.9 +
   93.10 +-include $(DEPS)
    94.1 --- a/tools/xenstore/Makefile	Wed Jan 28 12:22:58 2009 +0900
    94.2 +++ b/tools/xenstore/Makefile	Wed Jan 28 13:06:45 2009 +0900
    94.3 @@ -8,10 +8,6 @@ CFLAGS += -Werror
    94.4  CFLAGS += -I.
    94.5  CFLAGS += $(CFLAGS_libxenctrl)
    94.6  
    94.7 -# Make gcc generate dependencies.
    94.8 -CFLAGS += -Wp,-MD,.$(@F).d
    94.9 -DEP    = .*.d
   94.10 -
   94.11  CLIENTS := xenstore-exists xenstore-list xenstore-read xenstore-rm xenstore-chmod
   94.12  CLIENTS += xenstore-write xenstore-ls
   94.13  
   94.14 @@ -82,7 +78,7 @@ clean:
   94.15  	rm -f xenstored xs_random xs_stress xs_crashme
   94.16  	rm -f xs_tdb_dump xenstore-control
   94.17  	rm -f xenstore $(CLIENTS)
   94.18 -	$(RM) $(DEP)
   94.19 +	$(RM) $(DEPS)
   94.20  
   94.21  .PHONY: TAGS
   94.22  TAGS:
   94.23 @@ -113,7 +109,7 @@ install: all
   94.24  	$(INSTALL_DATA) xs.h $(DESTDIR)$(INCLUDEDIR)
   94.25  	$(INSTALL_DATA) xs_lib.h $(DESTDIR)$(INCLUDEDIR)
   94.26  
   94.27 --include $(DEP)
   94.28 +-include $(DEPS)
   94.29  
   94.30  # never delete any intermediate files.
   94.31  .SECONDARY:
    95.1 --- a/tools/xentrace/Makefile	Wed Jan 28 12:22:58 2009 +0900
    95.2 +++ b/tools/xentrace/Makefile	Wed Jan 28 13:06:45 2009 +0900
    95.3 @@ -46,9 +46,12 @@ install: build
    95.4  
    95.5  .PHONY: clean
    95.6  clean:
    95.7 -	$(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN)
    95.8 +	$(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN) $(DEPS)
    95.9  
   95.10  %: %.c $(HDRS) Makefile
   95.11  	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
   95.12  xentrace_%: %.c $(HDRS) Makefile
   95.13  	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
   95.14 +
   95.15 +-include $(DEPS)
   95.16 +
    96.1 --- a/xen/Rules.mk	Wed Jan 28 12:22:58 2009 +0900
    96.2 +++ b/xen/Rules.mk	Wed Jan 28 13:06:45 2009 +0900
    96.3 @@ -23,9 +23,6 @@ endif
    96.4  ifeq ($(perfc_arrays),y)
    96.5  perfc := y
    96.6  endif
    96.7 -ifeq ($(frame_pointer),y)
    96.8 -CFLAGS := $(shell echo $(CFLAGS) | sed -e 's/-f[^ ]*omit-frame-pointer//g')
    96.9 -endif
   96.10  
   96.11  # Set ARCH/SUBARCH appropriately.
   96.12  override TARGET_SUBARCH  := $(XEN_TARGET_ARCH)
   96.13 @@ -34,21 +31,8 @@ override TARGET_ARCH     := $(shell echo
   96.14  
   96.15  TARGET := $(BASEDIR)/xen
   96.16  
   96.17 -HDRS := $(wildcard *.h)
   96.18 -HDRS += $(wildcard $(BASEDIR)/include/xen/*.h)
   96.19 -HDRS += $(wildcard $(BASEDIR)/include/xen/hvm/*.h)
   96.20 -HDRS += $(wildcard $(BASEDIR)/include/public/*.h)
   96.21 -HDRS += $(wildcard $(BASEDIR)/include/public/*/*.h)
   96.22 -HDRS += $(wildcard $(BASEDIR)/include/compat/*.h)
   96.23 -HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h)
   96.24 -HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h)
   96.25 -
   96.26  include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk
   96.27  
   96.28 -# Do not depend on auto-generated header files.
   96.29 -AHDRS := $(filter-out %/include/xen/compile.h,$(HDRS))
   96.30 -HDRS  := $(filter-out %/asm-offsets.h,$(AHDRS))
   96.31 -
   96.32  # Note that link order matters!
   96.33  ALL_OBJS-y               += $(BASEDIR)/common/built_in.o
   96.34  ALL_OBJS-y               += $(BASEDIR)/drivers/built_in.o
   96.35 @@ -77,15 +61,18 @@ AFLAGS-y                += -D__ASSEMBLY_
   96.36  
   96.37  ALL_OBJS := $(ALL_OBJS-y)
   96.38  
   96.39 -CFLAGS   := $(strip $(CFLAGS) $(CFLAGS-y))
   96.40 +# Get gcc to generate the dependencies for us.
   96.41 +CFLAGS-y += -MMD -MF .$(@F).d
   96.42 +DEPS = .*.d
   96.43 +
   96.44 +CFLAGS += $(CFLAGS-y)
   96.45  
   96.46  # Most CFLAGS are safe for assembly files:
   96.47  #  -std=gnu{89,99} gets confused by #-prefixed end-of-line comments
   96.48 -AFLAGS   := $(strip $(AFLAGS) $(AFLAGS-y))
   96.49 -AFLAGS   += $(patsubst -std=gnu%,,$(CFLAGS))
   96.50 +AFLAGS += $(AFLAGS-y) $(filter-out -std=gnu%,$(CFLAGS))
   96.51  
   96.52  # LDFLAGS are only passed directly to $(LD)
   96.53 -LDFLAGS  := $(strip $(LDFLAGS) $(LDFLAGS_DIRECT))
   96.54 +LDFLAGS += $(LDFLAGS_DIRECT)
   96.55  
   96.56  include Makefile
   96.57  
   96.58 @@ -115,19 +102,21 @@ FORCE:
   96.59  
   96.60  .PHONY: clean
   96.61  clean:: $(addprefix _clean_, $(subdir-all))
   96.62 -	rm -f *.o *~ core
   96.63 +	rm -f *.o *~ core $(DEPS)
   96.64  _clean_%/: FORCE
   96.65  	$(MAKE) -f $(BASEDIR)/Rules.mk -C $* clean
   96.66  
   96.67 -%.o: %.c $(HDRS) Makefile
   96.68 +%.o: %.c Makefile
   96.69  	$(CC) $(CFLAGS) -c $< -o $@
   96.70  
   96.71 -%.o: %.S $(AHDRS) Makefile
   96.72 +%.o: %.S Makefile
   96.73  	$(CC) $(AFLAGS) -c $< -o $@
   96.74  
   96.75 -%.i: %.c $(HDRS) Makefile
   96.76 +%.i: %.c Makefile
   96.77  	$(CPP) $(CFLAGS) $< -o $@
   96.78  
   96.79  # -std=gnu{89,99} gets confused by # as an end-of-line comment marker
   96.80 -%.s: %.S $(AHDRS) Makefile
   96.81 +%.s: %.S Makefile
   96.82  	$(CPP) $(AFLAGS) $< -o $@
   96.83 +
   96.84 +-include $(DEPS)
    97.1 --- a/xen/arch/ia64/Makefile	Wed Jan 28 12:22:58 2009 +0900
    97.2 +++ b/xen/arch/ia64/Makefile	Wed Jan 28 13:06:45 2009 +0900
    97.3 @@ -29,11 +29,11 @@ subdir-y += linux-xen
    97.4  # Headers do not depend on auto-generated header, but object files do.
    97.5  $(ALL_OBJS): $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h
    97.6  
    97.7 -asm-offsets.s: asm-offsets.c $(HDRS) \
    97.8 +asm-offsets.s: asm-offsets.c \
    97.9      $(BASEDIR)/include/asm-ia64/.offsets.h.stamp 
   97.10  	$(CC) $(CFLAGS) -DGENERATE_ASM_OFFSETS -DIA64_TASK_SIZE=0 -S -o $@ $<
   97.11  
   97.12 -asm-xsi-offsets.s: asm-xsi-offsets.c $(HDRS)
   97.13 +asm-xsi-offsets.s: asm-xsi-offsets.c
   97.14  	$(CC) $(CFLAGS) -S -o $@ $<
   97.15  
   97.16  $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h: asm-xsi-offsets.s
   97.17 @@ -61,7 +61,7 @@ asm-xsi-offsets.s: asm-xsi-offsets.c $(H
   97.18  	touch $@
   97.19  
   97.20  # I'm sure a Makefile wizard would know a better way to do this
   97.21 -xen.lds.s: xen/xen.lds.S $(HDRS)
   97.22 +xen.lds.s: xen/xen.lds.S
   97.23  	$(CC) -E $(CPPFLAGS) -P -DXEN $(AFLAGS) \
   97.24  		-o xen.lds.s xen/xen.lds.S
   97.25  
    98.1 --- a/xen/arch/ia64/Rules.mk	Wed Jan 28 12:22:58 2009 +0900
    98.2 +++ b/xen/arch/ia64/Rules.mk	Wed Jan 28 13:06:45 2009 +0900
    98.3 @@ -72,19 +72,4 @@ ifeq ($(xen_ia64_disable_optvfault),y)
    98.4  CFLAGS += -DCONFIG_XEN_IA64_DISABLE_OPTVFAULT
    98.5  endif
    98.6  
    98.7 -LDFLAGS := -g
    98.8 -
    98.9 -# Additionnal IA64 include dirs.
   98.10 -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/*.h)
   98.11 -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/sn/*.h)
   98.12 -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/linux/*.h)
   98.13 -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/*.h)
   98.14 -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/sn/*.h)
   98.15 -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/linux/*.h)
   98.16 -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/*.h)
   98.17 -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm-generic/*.h)
   98.18 -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm/*.h)
   98.19 -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/byteorder/*.h)
   98.20 -HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/hvm/*.h)
   98.21 -
   98.22 -HDRS := $(filter-out %/include/asm-ia64/asm-xsi-offsets.h,$(HDRS))
   98.23 +LDFLAGS = -g
    99.1 --- a/xen/arch/ia64/tools/p2m_foreign/Makefile	Wed Jan 28 12:22:58 2009 +0900
    99.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    99.3 @@ -1,52 +0,0 @@
    99.4 -#
    99.5 -# xen/arch/ia64/tools/p2m_foreign
    99.6 -#
    99.7 -# Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
    99.8 -#                    VA Linux Systems Japan K.K.
    99.9 -#
   99.10 -# This program is free software; you can redistribute it and/or modify
   99.11 -# it under the terms of the GNU General Public License as published by
   99.12 -# the Free Software Foundation; either version 2 of the License, or
   99.13 -# (at your option) any later version.
   99.14 -#
   99.15 -# This program is distributed in the hope that it will be useful,
   99.16 -# but WITHOUT ANY WARRANTY; without even the implied warranty of
   99.17 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   99.18 -# GNU General Public License for more details.
   99.19 -#
   99.20 -# You should have received a copy of the GNU General Public License
   99.21 -# along with this program; if not, write to the Free Software
   99.22 -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   99.23 -
   99.24 -XEN_ROOT	= ../../../../..
   99.25 -include $(XEN_ROOT)/tools/Rules.mk
   99.26 -
   99.27 -CFLAGS += -Werror -ggdb3
   99.28 -CFLAGS += -I$(XEN_LIBXC) -I$(XEN_XENSTORE)
   99.29 -
   99.30 -# Make gcc generate dependencies.
   99.31 -CFLAGS += -Wp,-MD,.$(@F).d
   99.32 -DEPS = .*.d
   99.33 -
   99.34 -PROGRAMS = p2m_foreign
   99.35 -LDLIBS   = -L$(XEN_LIBXC) -L$(XEN_XENSTORE) -lxenguest -lxenctrl
   99.36 -
   99.37 -.PHONY: all
   99.38 -all: build
   99.39 -
   99.40 -.PHONY: build
   99.41 -build: $(PROGRAMS)
   99.42 -
   99.43 -$(PROGRAMS): %: %.o
   99.44 -	$(CC) $(CFLAGS) $^ $(LDLIBS) -o $@
   99.45 -
   99.46 -
   99.47 -.PHONY: install
   99.48 -install:
   99.49 -
   99.50 -.PHONY: clean
   99.51 -clean:
   99.52 -	$(RM) *.o $(PROGRAMS)
   99.53 -	$(RM) $(DEPS)
   99.54 -
   99.55 --include $(DEPS)
   100.1 --- a/xen/arch/ia64/tools/p2m_foreign/p2m_foreign.c	Wed Jan 28 12:22:58 2009 +0900
   100.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
   100.3 @@ -1,233 +0,0 @@
   100.4 -/*
   100.5 - * Foreign p2m exposure test.
   100.6 - *
   100.7 - * This program is free software; you can redistribute it and/or modify
   100.8 - * it under the terms of the GNU General Public License as published by
   100.9 - * the Free Software Foundation; either version 2 of the License, or
  100.10 - * (at your option) any later version.
  100.11 - *
  100.12 - * This program is distributed in the hope that it will be useful,
  100.13 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
  100.14 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  100.15 - * GNU General Public License for more details.
  100.16 - *
  100.17 - * You should have received a copy of the GNU General Public License
  100.18 - * along with this program; if not, write to the Free Software
  100.19 - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  100.20 - *
  100.21 - * Copyright (c) 2007 Isaku Yamahata <yamahata at valinux co jp>
  100.22 - *                    VA Linux Systems Japan K.K.
  100.23 - *
  100.24 - */
  100.25 -
  100.26 -#include <sys/mman.h>
  100.27 -#include <err.h>
  100.28 -#include <errno.h>
  100.29 -#include <assert.h>
  100.30 -
  100.31 -#include <xc_private.h>
  100.32 -#include <xenctrl.h>
  100.33 -#include <xenguest.h>
  100.34 -#include <xc_efi.h>
  100.35 -#include <ia64/xc_ia64.h>
  100.36 -
  100.37 -#if 1
  100.38 -# define printd(fmt, args...)	printf(fmt, ##args)
  100.39 -#else
  100.40 -# define printd(fmt, args...)	((void)0)
  100.41 -#endif
  100.42 -
  100.43 -/* xc_memory_op() in xc_private.c doesn't support translate_gpfn_list */
  100.44 -static int
  100.45 -__xc_memory_op(int xc_handle, int cmd, void *arg)
  100.46 -{
  100.47 -	DECLARE_HYPERCALL;
  100.48 -	struct xen_translate_gpfn_list* translate = arg;
  100.49 -
  100.50 -	xen_ulong_t* gpfns;
  100.51 -	xen_ulong_t* mfns;
  100.52 -	size_t len;
  100.53 -
  100.54 -	long ret = -EINVAL;
  100.55 -
  100.56 -	hypercall.op     = __HYPERVISOR_memory_op;
  100.57 -	hypercall.arg[0] = (unsigned long)cmd;
  100.58 -	hypercall.arg[1] = (unsigned long)arg;
  100.59 -
  100.60 -	assert(cmd == XENMEM_translate_gpfn_list);
  100.61 -
  100.62 -	get_xen_guest_handle(gpfns, translate->gpfn_list);
  100.63 -	get_xen_guest_handle(mfns, translate->mfn_list);
  100.64 -	len = sizeof(gpfns[0]) * translate->nr_gpfns;
  100.65 -	if (lock_pages(translate, sizeof(*translate)) ||
  100.66 -	    lock_pages(gpfns, len) ||
  100.67 -	    lock_pages(mfns, len))
  100.68 -		goto out;
  100.69 -
  100.70 -	ret = do_xen_hypercall(xc_handle, &hypercall);
  100.71 -
  100.72 -out:
  100.73 -	unlock_pages(mfns, len);
  100.74 -	unlock_pages(gpfns, len);
  100.75 -	unlock_pages(translate, sizeof(*translate));
  100.76 -
  100.77 -	return ret;
  100.78 -}
  100.79 -
  100.80 -int
  100.81 -xc_translate_gpfn_list(int xc_handle, uint32_t domid, xen_ulong_t nr_gpfns,
  100.82 -		       xen_ulong_t* gpfns, xen_ulong_t* mfns)
  100.83 -{
  100.84 -	struct xen_translate_gpfn_list translate = {
  100.85 -		.domid = domid,
  100.86 -		.nr_gpfns = nr_gpfns,
  100.87 -	};
  100.88 -	set_xen_guest_handle(translate.gpfn_list, gpfns);
  100.89 -	set_xen_guest_handle(translate.mfn_list, mfns);
  100.90 -
  100.91 -	return __xc_memory_op(xc_handle,
  100.92 -			      XENMEM_translate_gpfn_list, &translate);
  100.93 -}
  100.94 -
  100.95 -int
  100.96 -main(int argc, char** argv)
  100.97 -{
  100.98 -	uint32_t domid;
  100.99 -	int xc_handle;
 100.100 -
 100.101 -	xc_dominfo_t info;
 100.102 -	shared_info_t* shinfo;
 100.103 -
 100.104 -	unsigned long map_size;
 100.105 -	xen_ia64_memmap_info_t* memmap_info;
 100.106 -	struct xen_ia64_p2m_table p2m_table;
 100.107 -
 100.108 -	char* p;
 100.109 -	char* start;
 100.110 -	char* end;
 100.111 -	xen_ulong_t nr_gpfns;
 100.112 -
 100.113 -	xen_ulong_t* gpfns;
 100.114 -	xen_ulong_t* mfns;
 100.115 -
 100.116 -	unsigned long i;
 100.117 -
 100.118 -	if (argc != 2)
 100.119 -		errx(EXIT_FAILURE, "usage: %s <domid>", argv[0]);
 100.120 -	domid = atol(argv[1]);
 100.121 -
 100.122 -	printd("xc_interface_open()\n");
 100.123 -	xc_handle = xc_interface_open();
 100.124 -	if (xc_handle < 0)
 100.125 -		errx(EXIT_FAILURE, "can't open control interface");
 100.126 -
 100.127 -	printd("xc_domain_getinfo\n");
 100.128 -	if (xc_domain_getinfo(xc_handle, domid, 1, &info) != 1)
 100.129 -		errx(EXIT_FAILURE, "Could not get info for domain");
 100.130 -
 100.131 -
 100.132 -	printd("shared info\n");
 100.133 -	shinfo = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
 100.134 -				      PROT_READ, info.shared_info_frame);
 100.135 -	if (shinfo == NULL)
 100.136 -		errx(EXIT_FAILURE, "can't map shared info");
 100.137 -
 100.138 -	printd("memmap_info\n");
 100.139 -	map_size = PAGE_SIZE * shinfo->arch.memmap_info_num_pages;
 100.140 -	memmap_info = xc_map_foreign_range(xc_handle, info.domid,
 100.141 -					   map_size, PROT_READ,
 100.142 -					   shinfo->arch.memmap_info_pfn);
 100.143 -	if (memmap_info == NULL)
 100.144 -		errx(EXIT_FAILURE, "can't map memmap_info");
 100.145 -
 100.146 -#if 1
 100.147 -	start = (char*)&memmap_info->memdesc;
 100.148 -	end = start + memmap_info->efi_memmap_size;
 100.149 -	i = 0;
 100.150 -	for (p = start; p < end; p += memmap_info->efi_memdesc_size) {
 100.151 -		efi_memory_desc_t* md = (efi_memory_desc_t*)p;
 100.152 -		printd("%ld [0x%lx, 0x%lx) 0x%lx pages\n",
 100.153 -		       i, md->phys_addr,
 100.154 -		       md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
 100.155 -		       md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT));
 100.156 -		i++;
 100.157 -	}
 100.158 -#endif
 100.159 -
 100.160 -
 100.161 -	printd("p2m map\n");
 100.162 -	if (xc_ia64_p2m_map(&p2m_table, xc_handle, domid, memmap_info, 0) < 0)
 100.163 -		errx(EXIT_FAILURE, "can't map foreign p2m table");
 100.164 -	printd("p2m map done\n");
 100.165 -
 100.166 -	start = (char*)&memmap_info->memdesc;
 100.167 -	end = start + memmap_info->efi_memmap_size;
 100.168 -	nr_gpfns = 0;
 100.169 -	i = 0;
 100.170 -	for (p = start; p < end; p += memmap_info->efi_memdesc_size) {
 100.171 -		efi_memory_desc_t* md = (efi_memory_desc_t*)p;
 100.172 -		if ( md->type != EFI_CONVENTIONAL_MEMORY ||
 100.173 -		     md->attribute != EFI_MEMORY_WB ||
 100.174 -		     md->num_pages == 0 )
 100.175 -			continue;
 100.176 -
 100.177 -		printd("%ld [0x%lx, 0x%lx) 0x%lx pages\n",
 100.178 -		       i, md->phys_addr,
 100.179 -		       md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
 100.180 -		       md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT));
 100.181 -		nr_gpfns += md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT);
 100.182 -		i++;
 100.183 -	}
 100.184 -
 100.185 -	printd("total 0x%lx gpfns\n", nr_gpfns);
 100.186 -	gpfns = malloc(sizeof(gpfns[0]) * nr_gpfns);
 100.187 -	mfns = malloc(sizeof(mfns[0]) * nr_gpfns);
 100.188 -	if (gpfns == NULL || mfns == NULL)
 100.189 -		err(EXIT_FAILURE, "can't allocate memory for gpfns/mfns");
 100.190 -
 100.191 -	i = 0;
 100.192 -	for (p = start; p < end; p += memmap_info->efi_memdesc_size) {
 100.193 -		efi_memory_desc_t* md = (efi_memory_desc_t*)p;
 100.194 -		unsigned long j;
 100.195 -		if ( md->type != EFI_CONVENTIONAL_MEMORY ||
 100.196 -		     md->attribute != EFI_MEMORY_WB ||
 100.197 -		     md->num_pages == 0 )
 100.198 -			continue;
 100.199 -
 100.200 -		for (j = 0;
 100.201 -		     j < md->num_pages >> (PAGE_SHIFT - EFI_PAGE_SHIFT);
 100.202 -		     j++) {
 100.203 -			gpfns[i] = (md->phys_addr >> PAGE_SHIFT) + j;
 100.204 -			i++;
 100.205 -		}
 100.206 -	}
 100.207 -	for (i = 0; i < nr_gpfns; i++)
 100.208 -		mfns[i] = INVALID_MFN;
 100.209 -
 100.210 -	printd("issue translate gpfn list hypercall. "
 100.211 -	       "this may take a while\n");
 100.212 -	if (xc_translate_gpfn_list(xc_handle,
 100.213 -				   domid, nr_gpfns, gpfns, mfns) < 0)
 100.214 -		err(EXIT_FAILURE, "translate gpfn list hypercall failure");
 100.215 -	printd("translate gpfn list hypercall done\n");
 100.216 -
 100.217 -	printd("checking p2m table\n");
 100.218 -	for (i = 0; i < nr_gpfns; i++) {
 100.219 -		unsigned long mfn_by_translated = mfns[i];
 100.220 -		unsigned long mfn_by_p2m =
 100.221 -			xc_ia64_p2m_mfn(&p2m_table, gpfns[i]);
 100.222 -		if (mfn_by_translated != mfn_by_p2m &&
 100.223 -		    !(mfn_by_translated == 0 && mfn_by_p2m == INVALID_MFN)) {
 100.224 -			printf("ERROR! i 0x%lx gpfn "
 100.225 -			       "0x%lx trnslated 0x%lx p2m 0x%lx\n",
 100.226 -			       i, gpfns[i], mfn_by_translated, mfn_by_p2m);
 100.227 -		}
 100.228 -	}
 100.229 -	printd("checking p2m table done\n");
 100.230 -
 100.231 -	xc_ia64_p2m_unmap(&p2m_table);
 100.232 -	munmap(memmap_info, map_size);
 100.233 -	munmap(shinfo, PAGE_SIZE);
 100.234 -
 100.235 -	return EXIT_SUCCESS;
 100.236 -}
   101.1 --- a/xen/arch/ia64/xen/domain.c	Wed Jan 28 12:22:58 2009 +0900
   101.2 +++ b/xen/arch/ia64/xen/domain.c	Wed Jan 28 13:06:45 2009 +0900
   101.3 @@ -31,7 +31,7 @@
   101.4  #include <xen/event.h>
   101.5  #include <xen/console.h>
   101.6  #include <xen/version.h>
   101.7 -#include <public/libelf.h>
   101.8 +#include <xen/libelf.h>
   101.9  #include <asm/pgalloc.h>
  101.10  #include <asm/offsets.h>  /* for IA64_THREAD_INFO_SIZE */
  101.11  #include <asm/vcpu.h>   /* for function declarations */
   102.1 --- a/xen/arch/ia64/xen/irq.c	Wed Jan 28 12:22:58 2009 +0900
   102.2 +++ b/xen/arch/ia64/xen/irq.c	Wed Jan 28 13:06:45 2009 +0900
   102.3 @@ -402,7 +402,7 @@ void __do_IRQ_guest(int irq)
   102.4  	}
   102.5  }
   102.6  
   102.7 -int pirq_acktype(int irq)
   102.8 +static int pirq_acktype(int irq)
   102.9  {
  102.10      irq_desc_t *desc = &irq_desc[irq];
  102.11  
   103.1 --- a/xen/arch/ia64/xen/machine_kexec.c	Wed Jan 28 12:22:58 2009 +0900
   103.2 +++ b/xen/arch/ia64/xen/machine_kexec.c	Wed Jan 28 13:06:45 2009 +0900
   103.3 @@ -195,6 +195,7 @@ int machine_kexec_get(xen_kexec_range_t 
   103.4  
   103.5  void arch_crash_save_vmcoreinfo(void)
   103.6  {
   103.7 +    VMCOREINFO_SYMBOL(xenheap_phys_end);
   103.8  	VMCOREINFO_SYMBOL(dom_xen);
   103.9  	VMCOREINFO_SYMBOL(dom_io);
  103.10  	VMCOREINFO_SYMBOL(xen_pstart);
   104.1 --- a/xen/arch/ia64/xen/mm.c	Wed Jan 28 12:22:58 2009 +0900
   104.2 +++ b/xen/arch/ia64/xen/mm.c	Wed Jan 28 13:06:45 2009 +0900
   104.3 @@ -3246,9 +3246,9 @@ int get_page_type(struct page_info *page
   104.4      return 1;
   104.5  }
   104.6  
   104.7 -int memory_is_conventional_ram(paddr_t p)
   104.8 +int page_is_conventional_ram(unsigned long mfn)
   104.9  {
  104.10 -    return (efi_mem_type(p) == EFI_CONVENTIONAL_MEMORY);
  104.11 +    return (efi_mem_type(pfn_to_paddr(mfn)) == EFI_CONVENTIONAL_MEMORY);
  104.12  }
  104.13  
  104.14  
  104.15 @@ -3295,38 +3295,39 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
  104.16  
  104.17              spin_unlock(&d->grant_table->lock);
  104.18              break;
  104.19 -        case XENMAPSPACE_mfn:
  104.20 -        {
  104.21 -            if ( get_page_from_pagenr(xatp.idx, d) ) {
  104.22 -                struct xen_ia64_memmap_info memmap_info;
  104.23 -                efi_memory_desc_t md;
  104.24 -                int ret;
  104.25 -
  104.26 -                mfn = xatp.idx;
  104.27 -                page = mfn_to_page(mfn);
  104.28 -
  104.29 -                memmap_info.efi_memmap_size = sizeof(md);
  104.30 -                memmap_info.efi_memdesc_size = sizeof(md);
  104.31 -                memmap_info.efi_memdesc_version =
  104.32 -                    EFI_MEMORY_DESCRIPTOR_VERSION;
  104.33 -
  104.34 -                md.type = EFI_CONVENTIONAL_MEMORY;
  104.35 -                md.pad = 0;
  104.36 -                md.phys_addr = xatp.gpfn << PAGE_SHIFT;
  104.37 -                md.virt_addr = 0;
  104.38 -                md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT);
  104.39 -                md.attribute = EFI_MEMORY_WB;
  104.40 -
  104.41 -                ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md);
  104.42 -                if (ret != 0) {
  104.43 -                    put_page(page);
  104.44 -                    rcu_unlock_domain(d);
  104.45 -                    gdprintk(XENLOG_DEBUG,
  104.46 -                             "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n",
  104.47 -                             __func__, __LINE__,
  104.48 -                             d->domain_id, xatp.gpfn, xatp.idx, ret);
  104.49 -                    return ret;
  104.50 -                }
  104.51 +        case XENMAPSPACE_gmfn: {
  104.52 +            struct xen_ia64_memmap_info memmap_info;
  104.53 +            efi_memory_desc_t md;
  104.54 +            int ret;
  104.55 +
  104.56 +            xatp.idx = gmfn_to_mfn(d, xatp.idx);
  104.57 +            if ( !get_page_from_pagenr(xatp.idx, d) )
  104.58 +                break;
  104.59 +
  104.60 +            mfn = xatp.idx;
  104.61 +            page = mfn_to_page(mfn);
  104.62 +
  104.63 +            memmap_info.efi_memmap_size = sizeof(md);
  104.64 +            memmap_info.efi_memdesc_size = sizeof(md);
  104.65 +            memmap_info.efi_memdesc_version =
  104.66 +                EFI_MEMORY_DESCRIPTOR_VERSION;
  104.67 +
  104.68 +            md.type = EFI_CONVENTIONAL_MEMORY;
  104.69 +            md.pad = 0;
  104.70 +            md.phys_addr = xatp.gpfn << PAGE_SHIFT;
  104.71 +            md.virt_addr = 0;
  104.72 +            md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT);
  104.73 +            md.attribute = EFI_MEMORY_WB;
  104.74 +
  104.75 +            ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md);
  104.76 +            if (ret != 0) {
  104.77 +                put_page(page);
  104.78 +                rcu_unlock_domain(d);
  104.79 +                gdprintk(XENLOG_DEBUG,
  104.80 +                         "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n",
  104.81 +                         __func__, __LINE__,
  104.82 +                         d->domain_id, xatp.gpfn, xatp.idx, ret);
  104.83 +                return ret;
  104.84              }
  104.85              break;
  104.86          }
  104.87 @@ -3378,34 +3379,6 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
  104.88          break;
  104.89      }
  104.90  
  104.91 -    case XENMEM_remove_from_physmap:
  104.92 -    {
  104.93 -        struct xen_remove_from_physmap xrfp;
  104.94 -        unsigned long mfn;
  104.95 -        struct domain *d;
  104.96 -
  104.97 -        if ( copy_from_guest(&xrfp, arg, 1) )
  104.98 -            return -EFAULT;
  104.99 -
 104.100 -        rc = rcu_lock_target_domain_by_id(xrfp.domid, &d);
 104.101 -        if ( rc != 0 )
 104.102 -            return rc;
 104.103 -
 104.104 -        domain_lock(d);
 104.105 -
 104.106 -        mfn = gmfn_to_mfn(d, xrfp.gpfn);
 104.107 -
 104.108 -        if ( mfn_valid(mfn) )
 104.109 -            guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0);
 104.110 -
 104.111 -        domain_unlock(d);
 104.112 -
 104.113 -        rcu_unlock_domain(d);
 104.114 -
 104.115 -        break;
 104.116 -    }
 104.117 -
 104.118 -
 104.119      case XENMEM_machine_memory_map:
 104.120      {
 104.121          struct xen_memory_map memmap;
   105.1 --- a/xen/arch/ia64/xen/xensetup.c	Wed Jan 28 12:22:58 2009 +0900
   105.2 +++ b/xen/arch/ia64/xen/xensetup.c	Wed Jan 28 13:06:45 2009 +0900
   105.3 @@ -747,8 +747,3 @@ int xen_in_range(paddr_t start, paddr_t 
   105.4  
   105.5      return start < end;
   105.6  }
   105.7 -
   105.8 -int tboot_in_range(paddr_t start, paddr_t end)
   105.9 -{
  105.10 -    return 0;
  105.11 -}
   106.1 --- a/xen/arch/x86/Makefile	Wed Jan 28 12:22:58 2009 +0900
   106.2 +++ b/xen/arch/x86/Makefile	Wed Jan 28 13:06:45 2009 +0900
   106.3 @@ -53,6 +53,7 @@ obj-y += machine_kexec.o
   106.4  obj-y += crash.o
   106.5  obj-y += tboot.o
   106.6  obj-y += hpet.o
   106.7 +obj-y += bzimage.o
   106.8  
   106.9  obj-$(crash_debug) += gdbstub.o
  106.10  
  106.11 @@ -78,10 +79,10 @@ ALL_OBJS := $(BASEDIR)/arch/x86/boot/bui
  106.12  	    $(@D)/.$(@F).1.o -o $@
  106.13  	rm -f $(@D)/.$(@F).[0-9]*
  106.14  
  106.15 -asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
  106.16 +asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c
  106.17  	$(CC) $(CFLAGS) -S -o $@ $<
  106.18  
  106.19 -xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS)
  106.20 +xen.lds: $(TARGET_SUBARCH)/xen.lds.S
  106.21  	$(CC) -P -E -Ui386 $(AFLAGS) -o $@ $<
  106.22  
  106.23  boot/mkelf32: boot/mkelf32.c
  106.24 @@ -90,4 +91,4 @@ boot/mkelf32: boot/mkelf32.c
  106.25  .PHONY: clean
  106.26  clean::
  106.27  	rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32
  106.28 -	rm -f $(BASEDIR)/.xen-syms.[0-9]*
  106.29 +	rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d
   107.1 --- a/xen/arch/x86/Rules.mk	Wed Jan 28 12:22:58 2009 +0900
   107.2 +++ b/xen/arch/x86/Rules.mk	Wed Jan 28 13:06:45 2009 +0900
   107.3 @@ -26,9 +26,9 @@ CFLAGS += -I$(BASEDIR)/include/asm-x86/m
   107.4  CFLAGS += -msoft-float
   107.5  
   107.6  # Disable PIE/SSP if GCC supports them. They can break us.
   107.7 -CFLAGS += $(call cc-option,$(CC),-nopie,)
   107.8 -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
   107.9 -CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
  107.10 +$(call cc-option-add,CFLAGS,CC,-nopie)
  107.11 +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
  107.12 +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
  107.13  
  107.14  ifeq ($(supervisor_mode_kernel),y)
  107.15  CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
  107.16 @@ -45,16 +45,12 @@ ifeq ($(TARGET_SUBARCH),x86_64)
  107.17  CFLAGS += -mno-red-zone -fpic -fno-reorder-blocks
  107.18  CFLAGS += -fno-asynchronous-unwind-tables
  107.19  # -fvisibility=hidden reduces -fpic cost, if it's available
  107.20 -CFLAGS += $(call cc-option,$(CC),-fvisibility=hidden,)
  107.21 -CFLAGS := $(subst -fvisibility=hidden,-DGCC_HAS_VISIBILITY_ATTRIBUTE,$(CFLAGS))
  107.22 +ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n)
  107.23 +CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
  107.24 +endif
  107.25  x86_32 := n
  107.26  x86_64 := y
  107.27  endif
  107.28  
  107.29 -HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/*.h)
  107.30 -HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/svm/*.h)
  107.31 -HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/vmx/*.h)
  107.32 -HDRS += $(wildcard $(BASEDIR)/include/asm-x86/mach-*/*.h)
  107.33 -
  107.34  # Require GCC v3.4+ (to avoid issues with alignment constraints in Xen headers)
  107.35  $(call cc-ver-check,CC,0x030400,"Xen requires at least gcc-3.4")
   108.1 --- a/xen/arch/x86/acpi/cpu_idle.c	Wed Jan 28 12:22:58 2009 +0900
   108.2 +++ b/xen/arch/x86/acpi/cpu_idle.c	Wed Jan 28 13:06:45 2009 +0900
   108.3 @@ -50,11 +50,6 @@
   108.4  
   108.5  #define DEBUG_PM_CX
   108.6  
   108.7 -#define US_TO_PM_TIMER_TICKS(t)     ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
   108.8 -#define PM_TIMER_TICKS_TO_US(t)     ((t * 1000) / (PM_TIMER_FREQUENCY / 1000))
   108.9 -#define C2_OVERHEAD         4   /* 1us (3.579 ticks per us) */
  108.10 -#define C3_OVERHEAD         4   /* 1us (3.579 ticks per us) */
  108.11 -
  108.12  static void (*lapic_timer_off)(void);
  108.13  static void (*lapic_timer_on)(void);
  108.14  
  108.15 @@ -366,7 +361,7 @@ static void acpi_processor_idle(void)
  108.16      cx->usage++;
  108.17      if ( sleep_ticks > 0 )
  108.18      {
  108.19 -        power->last_residency = PM_TIMER_TICKS_TO_US(sleep_ticks);
  108.20 +        power->last_residency = acpi_pm_tick_to_ns(sleep_ticks) / 1000UL;
  108.21          cx->time += sleep_ticks;
  108.22      }
  108.23  
  108.24 @@ -611,7 +606,7 @@ static void set_cx(
  108.25      cx->latency  = xen_cx->latency;
  108.26      cx->power    = xen_cx->power;
  108.27      
  108.28 -    cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
  108.29 +    cx->latency_ticks = ns_to_acpi_pm_tick(cx->latency * 1000UL);
  108.30      cx->target_residency = cx->latency * latency_factor;
  108.31      if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 )
  108.32          acpi_power->safe_state = cx;
   109.1 --- a/xen/arch/x86/acpi/power.c	Wed Jan 28 12:22:58 2009 +0900
   109.2 +++ b/xen/arch/x86/acpi/power.c	Wed Jan 28 13:06:45 2009 +0900
   109.3 @@ -221,6 +221,7 @@ static int enter_state(u32 state)
   109.4  
   109.5   enable_cpu:
   109.6      cpufreq_add_cpu(0);
   109.7 +    microcode_resume_cpu(0);
   109.8      enable_nonboot_cpus();
   109.9      thaw_domains();
  109.10      spin_unlock(&pm_lock);
   110.1 --- a/xen/arch/x86/apic.c	Wed Jan 28 12:22:58 2009 +0900
   110.2 +++ b/xen/arch/x86/apic.c	Wed Jan 28 13:06:45 2009 +0900
   110.3 @@ -40,7 +40,7 @@
   110.4  /*
   110.5   * Knob to control our willingness to enable the local APIC.
   110.6   */
   110.7 -int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
   110.8 +static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
   110.9  
  110.10  /*
  110.11   * Debug level
  110.12 @@ -742,7 +742,7 @@ static void apic_pm_activate(void)
  110.13  static void __init lapic_disable(char *str)
  110.14  {
  110.15      enable_local_apic = -1;
  110.16 -    clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
  110.17 +    setup_clear_cpu_cap(X86_FEATURE_APIC);
  110.18  }
  110.19  custom_param("nolapic", lapic_disable);
  110.20  
   111.1 --- a/xen/arch/x86/boot/Makefile	Wed Jan 28 12:22:58 2009 +0900
   111.2 +++ b/xen/arch/x86/boot/Makefile	Wed Jan 28 13:06:45 2009 +0900
   111.3 @@ -1,4 +1,1 @@
   111.4  obj-y += head.o
   111.5 -
   111.6 -head.o: head.S $(TARGET_SUBARCH).S trampoline.S mem.S video.S \
   111.7 -	cmdline.S edd.S wakeup.S
   112.1 --- a/xen/arch/x86/boot/mkelf32.c	Wed Jan 28 12:22:58 2009 +0900
   112.2 +++ b/xen/arch/x86/boot/mkelf32.c	Wed Jan 28 13:06:45 2009 +0900
   112.3 @@ -25,7 +25,7 @@
   112.4  #define s16 int16_t
   112.5  #define s32 int32_t
   112.6  #define s64 int64_t
   112.7 -#include "../../../include/public/elfstructs.h"
   112.8 +#include "../../../include/xen/elfstructs.h"
   112.9  
  112.10  #define DYNAMICALLY_FILLED   0
  112.11  #define RAW_OFFSET         128
   113.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   113.2 +++ b/xen/arch/x86/bzimage.c	Wed Jan 28 13:06:45 2009 +0900
   113.3 @@ -0,0 +1,242 @@
   113.4 +#include <xen/cache.h>
   113.5 +#include <xen/errno.h>
   113.6 +#include <xen/lib.h>
   113.7 +#include <xen/mm.h>
   113.8 +#include <xen/string.h>
   113.9 +#include <xen/types.h>
  113.10 +
  113.11 +#define HEAPORDER 3
  113.12 +
  113.13 +static unsigned char *window;
  113.14 +#define memptr long
  113.15 +static memptr free_mem_ptr;
  113.16 +static memptr free_mem_end_ptr;
  113.17 +
  113.18 +#define WSIZE           0x80000000
  113.19 +
  113.20 +static unsigned char    *inbuf;
  113.21 +static unsigned         insize;
  113.22 +
  113.23 +/* Index of next byte to be processed in inbuf: */
  113.24 +static unsigned         inptr;
  113.25 +
  113.26 +/* Bytes in output buffer: */
  113.27 +static unsigned         outcnt;
  113.28 +
  113.29 +#define OF(args)        args
  113.30 +#define STATIC          static
  113.31 +
  113.32 +#define memzero(s, n)   memset((s), 0, (n))
  113.33 +
  113.34 +typedef unsigned char   uch;
  113.35 +typedef unsigned short  ush;
  113.36 +typedef unsigned long   ulg;
  113.37 +
  113.38 +#define INIT __init
  113.39 +
  113.40 +#define get_byte()      (inptr < insize ? inbuf[inptr++] : fill_inbuf())
  113.41 +
  113.42 +/* Diagnostic functions */
  113.43 +#ifdef DEBUG
  113.44 +#  define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0)
  113.45 +#  define Trace(x)      do { fprintf x; } while (0)
  113.46 +#  define Tracev(x)     do { if (verbose) fprintf x ; } while (0)
  113.47 +#  define Tracevv(x)    do { if (verbose > 1) fprintf x ; } while (0)
  113.48 +#  define Tracec(c, x)  do { if (verbose && (c)) fprintf x ; } while (0)
  113.49 +#  define Tracecv(c, x) do { if (verbose > 1 && (c)) fprintf x ; } while (0)
  113.50 +#else
  113.51 +#  define Assert(cond, msg)
  113.52 +#  define Trace(x)
  113.53 +#  define Tracev(x)
  113.54 +#  define Tracevv(x)
  113.55 +#  define Tracec(c, x)
  113.56 +#  define Tracecv(c, x)
  113.57 +#endif
  113.58 +
  113.59 +static long bytes_out;
  113.60 +static void flush_window(void);
  113.61 +
  113.62 +static __init void error(char *x)
  113.63 +{
  113.64 +    printk("%s\n", x);
  113.65 +    BUG();
  113.66 +}
  113.67 +
  113.68 +static __init int fill_inbuf(void)
  113.69 +{
  113.70 +        error("ran out of input data");
  113.71 +        return 0;
  113.72 +}
  113.73 +
  113.74 +
  113.75 +#include "../../common/inflate.c"
  113.76 +
  113.77 +static __init void flush_window(void)
  113.78 +{
  113.79 +    /*
  113.80 +     * The window is equal to the output buffer therefore only need to
  113.81 +     * compute the crc.
  113.82 +     */
  113.83 +    unsigned long c = crc;
  113.84 +    unsigned n;
  113.85 +    unsigned char *in, ch;
  113.86 +
  113.87 +    in = window;
  113.88 +    for ( n = 0; n < outcnt; n++ )
  113.89 +    {
  113.90 +        ch = *in++;
  113.91 +        c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
  113.92 +    }
  113.93 +    crc = c;
  113.94 +
  113.95 +    bytes_out += (unsigned long)outcnt;
  113.96 +    outcnt = 0;
  113.97 +}
  113.98 +
  113.99 +static __init int gzip_length(char *image, unsigned long image_len)
 113.100 +{
 113.101 +    return *(uint32_t *)&image[image_len - 4];
 113.102 +}
 113.103 +
 113.104 +static  __init int perform_gunzip(char *output, char **_image_start, unsigned long *image_len)
 113.105 +{
 113.106 +    char *image = *_image_start;
 113.107 +    int rc;
 113.108 +    unsigned char magic0 = (unsigned char)image[0];
 113.109 +    unsigned char magic1 = (unsigned char)image[1];
 113.110 +
 113.111 +    if ( magic0 != 0x1f || ( (magic1 != 0x8b) && (magic1 != 0x9e) ) )
 113.112 +        return 0;
 113.113 +
 113.114 +    window = (unsigned char *)output;
 113.115 +
 113.116 +    free_mem_ptr = (unsigned long)alloc_xenheap_pages(HEAPORDER);
 113.117 +    free_mem_end_ptr = free_mem_ptr + (PAGE_SIZE << HEAPORDER);
 113.118 +
 113.119 +    inbuf = (unsigned char *)image;
 113.120 +    insize = *image_len;
 113.121 +    inptr = 0;
 113.122 +
 113.123 +    makecrc();
 113.124 +
 113.125 +    if ( gunzip() < 0 )
 113.126 +    {
 113.127 +        rc = -EINVAL;
 113.128 +    }
 113.129 +    else
 113.130 +    {
 113.131 +        *_image_start = (char *)window;
 113.132 +        *image_len = gzip_length(image, *image_len);
 113.133 +        rc = 0;
 113.134 +    }
 113.135 +
 113.136 +    free_xenheap_pages((void *)free_mem_ptr, HEAPORDER);
 113.137 +
 113.138 +    return rc;
 113.139 +}
 113.140 +
 113.141 +struct setup_header {
 113.142 +        uint8_t         _pad0[0x1f1];           /* skip uninteresting stuff */
 113.143 +        uint8_t         setup_sects;
 113.144 +        uint16_t        root_flags;
 113.145 +        uint32_t        syssize;
 113.146 +        uint16_t        ram_size;
 113.147 +        uint16_t        vid_mode;
 113.148 +        uint16_t        root_dev;
 113.149 +        uint16_t        boot_flag;
 113.150 +        uint16_t        jump;
 113.151 +        uint32_t        header;
 113.152 +#define HDR_MAGIC               "HdrS"
 113.153 +#define HDR_MAGIC_SZ    4
 113.154 +        uint16_t        version;
 113.155 +#define VERSION(h,l)    (((h)<<8) | (l))
 113.156 +        uint32_t        realmode_swtch;
 113.157 +        uint16_t        start_sys;
 113.158 +        uint16_t        kernel_version;
 113.159 +        uint8_t         type_of_loader;
 113.160 +        uint8_t         loadflags;
 113.161 +        uint16_t        setup_move_size;
 113.162 +        uint32_t        code32_start;
 113.163 +        uint32_t        ramdisk_image;
 113.164 +        uint32_t        ramdisk_size;
 113.165 +        uint32_t        bootsect_kludge;
 113.166 +        uint16_t        heap_end_ptr;
 113.167 +        uint16_t        _pad1;
 113.168 +        uint32_t        cmd_line_ptr;
 113.169 +        uint32_t        initrd_addr_max;
 113.170 +        uint32_t        kernel_alignment;
 113.171 +        uint8_t         relocatable_kernel;
 113.172 +        uint8_t         _pad2[3];
 113.173 +        uint32_t        cmdline_size;
 113.174 +        uint32_t        hardware_subarch;
 113.175 +        uint64_t        hardware_subarch_data;
 113.176 +        uint32_t        payload_offset;
 113.177 +        uint32_t        payload_length;
 113.178 +    } __attribute__((packed));
 113.179 +
 113.180 +static __init int bzimage_check(struct setup_header *hdr, unsigned long len)
 113.181 +{
 113.182 +    if ( len < sizeof(struct setup_header) )
 113.183 +        return 0;
 113.184 +
 113.185 +    if ( memcmp(&hdr->header, HDR_MAGIC, HDR_MAGIC_SZ) != 0 )
 113.186 +        return 0;
 113.187 +
 113.188 +    if ( hdr->version < VERSION(2,8) ) {
 113.189 +        printk("Cannot load bzImage v%d.%02d at least v2.08 is required\n",
 113.190 +           hdr->version >> 8, hdr->version & 0xff);
 113.191 +        return -EINVAL;
 113.192 +    }
 113.193 +    return 1;
 113.194 +}
 113.195 +
 113.196 +int __init bzimage_headroom(char *image_start, unsigned long image_length)
 113.197 +{
 113.198 +    struct setup_header *hdr = (struct setup_header *)image_start;
 113.199 +    char *img;
 113.200 +    int err, headroom;
 113.201 +
 113.202 +    err = bzimage_check(hdr, image_length);
 113.203 +    if (err < 1)
 113.204 +        return err;
 113.205 +
 113.206 +    img = image_start + (hdr->setup_sects+1) * 512;
 113.207 +    img += hdr->payload_offset;
 113.208 +
 113.209 +    headroom = gzip_length(img, hdr->payload_length);
 113.210 +    headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */
 113.211 +    headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */
 113.212 +    headroom = (headroom + 4095) & ~4095;
 113.213 +
 113.214 +    return headroom;
 113.215 +}
 113.216 +
 113.217 +int __init bzimage_parse(char *image_base, char **image_start, unsigned long *image_len)
 113.218 +{
 113.219 +    struct setup_header *hdr = (struct setup_header *)(*image_start);
 113.220 +    int err = bzimage_check(hdr, *image_len);
 113.221 +
 113.222 +    if (err < 1)
 113.223 +        return err;
 113.224 +
 113.225 +    BUG_ON(!(image_base < *image_start));
 113.226 +
 113.227 +    *image_start += (hdr->setup_sects+1) * 512;
 113.228 +    *image_start += hdr->payload_offset;
 113.229 +    *image_len = hdr->payload_length;
 113.230 +
 113.231 +    if ( (err = perform_gunzip(image_base, image_start, image_len)) < 0 )
 113.232 +        return err;
 113.233 +
 113.234 +    return 0;
 113.235 +}
 113.236 +
 113.237 +/*
 113.238 + * Local variables:
 113.239 + * mode: C
 113.240 + * c-set-style: "BSD"
 113.241 + * c-basic-offset: 4
 113.242 + * tab-width: 4
 113.243 + * indent-tabs-mode: nil
 113.244 + * End:
 113.245 + */
   114.1 --- a/xen/arch/x86/cpu/common.c	Wed Jan 28 12:22:58 2009 +0900
   114.2 +++ b/xen/arch/x86/cpu/common.c	Wed Jan 28 13:06:45 2009 +0900
   114.3 @@ -29,6 +29,14 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
   114.4   */
   114.5  u64 host_pat = 0x050100070406;
   114.6  
   114.7 +static unsigned int __cpuinitdata cleared_caps[NCAPINTS];
   114.8 +
   114.9 +void __init setup_clear_cpu_cap(unsigned int cap)
  114.10 +{
  114.11 +	__clear_bit(cap, boot_cpu_data.x86_capability);
  114.12 +	__set_bit(cap, cleared_caps);
  114.13 +}
  114.14 +
  114.15  static void default_init(struct cpuinfo_x86 * c)
  114.16  {
  114.17  	/* Not much we can do here... */
  114.18 @@ -235,6 +243,7 @@ static void __init early_cpu_detect(void
  114.19  		if (c->x86 >= 0x6)
  114.20  			c->x86_model += ((tfms >> 16) & 0xF) << 4;
  114.21  		c->x86_mask = tfms & 15;
  114.22 +		cap0 &= ~cleared_caps[0];
  114.23  		if (cap0 & (1<<19))
  114.24  			c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
  114.25  		c->x86_capability[0] = cap0; /* Added for Xen bootstrap */
  114.26 @@ -329,6 +338,7 @@ void __cpuinit identify_cpu(struct cpuin
  114.27  	c->x86_vendor_id[0] = '\0'; /* Unset */
  114.28  	c->x86_model_id[0] = '\0';  /* Unset */
  114.29  	c->x86_max_cores = 1;
  114.30 +	c->x86_num_siblings = 1;
  114.31  	c->x86_clflush_size = 0;
  114.32  	memset(&c->x86_capability, 0, sizeof c->x86_capability);
  114.33  
  114.34 @@ -395,6 +405,9 @@ void __cpuinit identify_cpu(struct cpuin
  114.35  	if (disable_pse)
  114.36  		clear_bit(X86_FEATURE_PSE, c->x86_capability);
  114.37  
  114.38 +	for (i = 0 ; i < NCAPINTS ; ++i)
  114.39 +		c->x86_capability[i] &= ~cleared_caps[i];
  114.40 +
  114.41  	/* If the model name is still unset, do table lookup. */
  114.42  	if ( !c->x86_model_id[0] ) {
  114.43  		char *p;
  114.44 @@ -468,27 +481,27 @@ void __cpuinit detect_ht(struct cpuinfo_
  114.45  	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
  114.46  		return;
  114.47  
  114.48 -	smp_num_siblings = (ebx & 0xff0000) >> 16;
  114.49 +	c->x86_num_siblings = (ebx & 0xff0000) >> 16;
  114.50  
  114.51 -	if (smp_num_siblings == 1) {
  114.52 +	if (c->x86_num_siblings == 1) {
  114.53  		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
  114.54 -	} else if (smp_num_siblings > 1 ) {
  114.55 +	} else if (c->x86_num_siblings > 1 ) {
  114.56  
  114.57 -		if (smp_num_siblings > NR_CPUS) {
  114.58 -			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
  114.59 -			smp_num_siblings = 1;
  114.60 +		if (c->x86_num_siblings > NR_CPUS) {
  114.61 +			printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", c->x86_num_siblings);
  114.62 +			c->x86_num_siblings = 1;
  114.63  			return;
  114.64  		}
  114.65  
  114.66 -		index_msb = get_count_order(smp_num_siblings);
  114.67 +		index_msb = get_count_order(c->x86_num_siblings);
  114.68  		phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
  114.69  
  114.70  		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
  114.71  		       phys_proc_id[cpu]);
  114.72  
  114.73 -		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
  114.74 +		c->x86_num_siblings = c->x86_num_siblings / c->x86_max_cores;
  114.75  
  114.76 -		index_msb = get_count_order(smp_num_siblings) ;
  114.77 +		index_msb = get_count_order(c->x86_num_siblings) ;
  114.78  
  114.79  		core_bits = get_count_order(c->x86_max_cores);
  114.80  
   115.1 --- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Wed Jan 28 12:22:58 2009 +0900
   115.2 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Wed Jan 28 13:06:45 2009 +0900
   115.3 @@ -14,7 +14,6 @@ DEFINE_PER_CPU(cpu_banks_t, mce_banks_ow
   115.4  
   115.5  static int nr_intel_ext_msrs = 0;
   115.6  static int cmci_support = 0;
   115.7 -extern int firstbank;
   115.8  
   115.9  #ifdef CONFIG_X86_MCE_THERMAL
  115.10  static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
  115.11 @@ -121,7 +120,7 @@ static inline void intel_get_extended_ms
  115.12      if (nr_intel_ext_msrs == 0)
  115.13          return;
  115.14  
  115.15 -    /*this function will called when CAP(9).MCG_EXT_P = 1*/
  115.16 +    /* this function will called when CAP(9).MCG_EXT_P = 1 */
  115.17      memset(mc_ext, 0, sizeof(struct mcinfo_extended));
  115.18      mc_ext->common.type = MC_TYPE_EXTENDED;
  115.19      mc_ext->common.size = sizeof(mc_ext);
  115.20 @@ -157,7 +156,7 @@ static inline void intel_get_extended_ms
  115.21   * 3. called in polling handler
  115.22   * It will generate a new mc_info item if found CE/UC errors. DOM0 is the 
  115.23   * consumer.
  115.24 -*/
  115.25 + */
  115.26  static struct mc_info *machine_check_poll(int calltype)
  115.27  {
  115.28      struct mc_info *mi = NULL;
  115.29 @@ -174,9 +173,9 @@ static struct mc_info *machine_check_pol
  115.30      memset(&mcg, 0, sizeof(mcg));
  115.31      mcg.common.type = MC_TYPE_GLOBAL;
  115.32      mcg.common.size = sizeof(mcg);
  115.33 -    /*If called from cpu-reset check, don't need to fill them.
  115.34 -     *If called from cmci context, we'll try to fill domid by memory addr
  115.35 -    */
  115.36 +    /* If called from cpu-reset check, don't need to fill them.
  115.37 +     * If called from cmci context, we'll try to fill domid by memory addr
  115.38 +     */
  115.39      mcg.mc_domid = -1;
  115.40      mcg.mc_vcpuid = -1;
  115.41      if (calltype == MC_FLAG_POLLED || calltype == MC_FLAG_RESET)
  115.42 @@ -186,12 +185,13 @@ static struct mc_info *machine_check_pol
  115.43      mcg.mc_socketid = phys_proc_id[cpu];
  115.44      mcg.mc_coreid = cpu_core_id[cpu];
  115.45      mcg.mc_apicid = cpu_physical_id(cpu);
  115.46 -    mcg.mc_core_threadid = mcg.mc_apicid & ( 1 << (smp_num_siblings - 1)); 
  115.47 +    mcg.mc_core_threadid =
  115.48 +        mcg.mc_apicid & ( 1 << (cpu_data[cpu].x86_num_siblings - 1)); 
  115.49      rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
  115.50  
  115.51      for ( i = 0; i < nr_mce_banks; i++ ) {
  115.52          struct mcinfo_bank mcb;
  115.53 -        /*For CMCI, only owners checks the owned MSRs*/
  115.54 +        /* For CMCI, only owners checks the owned MSRs */
  115.55          if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) &&
  115.56               (calltype & MC_FLAG_CMCI) )
  115.57              continue;
  115.58 @@ -240,7 +240,7 @@ static struct mc_info *machine_check_pol
  115.59          x86_mcinfo_add(mi, &mcb);
  115.60          nr_unit++;
  115.61          add_taint(TAINT_MACHINE_CHECK);
  115.62 -        /*Clear state for this bank */
  115.63 +        /* Clear state for this bank */
  115.64          wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0);
  115.65          printk(KERN_DEBUG "mcheck_poll: bank%i CPU%d status[%"PRIx64"]\n", 
  115.66                  i, cpu, status);
  115.67 @@ -249,12 +249,12 @@ static struct mc_info *machine_check_pol
  115.68                  mcg.mc_coreid, mcg.mc_apicid, mcg.mc_core_threadid);
  115.69   
  115.70      }
  115.71 -    /*if pcc = 1, uc must be 1*/
  115.72 +    /* if pcc = 1, uc must be 1 */
  115.73      if (pcc)
  115.74          mcg.mc_flags |= MC_FLAG_UNCORRECTABLE;
  115.75      else if (uc)
  115.76          mcg.mc_flags |= MC_FLAG_RECOVERABLE;
  115.77 -    else /*correctable*/
  115.78 +    else /* correctable */
  115.79          mcg.mc_flags |= MC_FLAG_CORRECTABLE;
  115.80  
  115.81      if (nr_unit && nr_intel_ext_msrs && 
  115.82 @@ -264,7 +264,7 @@ static struct mc_info *machine_check_pol
  115.83      }
  115.84      if (nr_unit) 
  115.85          x86_mcinfo_add(mi, &mcg);
  115.86 -    /*Clear global state*/
  115.87 +    /* Clear global state */
  115.88      return mi;
  115.89  }
  115.90  
  115.91 @@ -541,8 +541,7 @@ static void mce_init(void)
  115.92       * This also clears all registers*/
  115.93  
  115.94      mi = machine_check_poll(MC_FLAG_RESET);
  115.95 -    /*in the boot up stage, not expect inject to DOM0, but go print out
  115.96 -    */
  115.97 +    /* in the boot up stage, don't inject to DOM0, but print out */
  115.98      if (mi)
  115.99          x86_mcinfo_dump(mi);
 115.100  
 115.101 @@ -553,22 +552,22 @@ static void mce_init(void)
 115.102  
 115.103      for (i = firstbank; i < nr_mce_banks; i++)
 115.104      {
 115.105 -        /*Some banks are shared across cores, use MCi_CTRL to judge whether
 115.106 -         * this bank has been initialized by other cores already.*/
 115.107 +        /* Some banks are shared across cores, use MCi_CTRL to judge whether
 115.108 +         * this bank has been initialized by other cores already. */
 115.109          rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h);
 115.110 -        if (!l & !h)
 115.111 +        if (!(l | h))
 115.112          {
 115.113 -            /*if ctl is 0, this bank is never initialized*/
 115.114 +            /* if ctl is 0, this bank is never initialized */
 115.115              printk(KERN_DEBUG "mce_init: init bank%d\n", i);
 115.116              wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff);
 115.117              wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0);
 115.118 -       }
 115.119 +        }
 115.120      }
 115.121 -    if (firstbank) /*if cmci enabled, firstbank = 0*/
 115.122 +    if (firstbank) /* if cmci enabled, firstbank = 0 */
 115.123          wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
 115.124  }
 115.125  
 115.126 -/*p4/p6 faimily has similar MCA initialization process*/
 115.127 +/* p4/p6 family have similar MCA initialization process */
 115.128  void intel_mcheck_init(struct cpuinfo_x86 *c)
 115.129  {
 115.130      mce_cap_init(c);
   116.1 --- a/xen/arch/x86/domain.c	Wed Jan 28 12:22:58 2009 +0900
   116.2 +++ b/xen/arch/x86/domain.c	Wed Jan 28 13:06:45 2009 +0900
   116.3 @@ -143,7 +143,7 @@ void dump_pageframe_info(struct domain *
   116.4      {
   116.5          list_for_each_entry ( page, &d->page_list, list )
   116.6          {
   116.7 -            printk("    DomPage %p: caf=%08x, taf=%" PRtype_info "\n",
   116.8 +            printk("    DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
   116.9                     _p(page_to_mfn(page)),
  116.10                     page->count_info, page->u.inuse.type_info);
  116.11          }
  116.12 @@ -156,7 +156,7 @@ void dump_pageframe_info(struct domain *
  116.13  
  116.14      list_for_each_entry ( page, &d->xenpage_list, list )
  116.15      {
  116.16 -        printk("    XenPage %p: caf=%08x, taf=%" PRtype_info "\n",
  116.17 +        printk("    XenPage %p: caf=%08lx, taf=%" PRtype_info "\n",
  116.18                 _p(page_to_mfn(page)),
  116.19                 page->count_info, page->u.inuse.type_info);
  116.20      }
  116.21 @@ -405,8 +405,17 @@ int arch_domain_create(struct domain *d,
  116.22          if ( d->arch.ioport_caps == NULL )
  116.23              goto fail;
  116.24  
  116.25 +#ifdef __i386__
  116.26          if ( (d->shared_info = alloc_xenheap_page()) == NULL )
  116.27              goto fail;
  116.28 +#else
  116.29 +        pg = alloc_domheap_page(
  116.30 +            NULL, MEMF_node(domain_to_node(d)) | MEMF_bits(32));
  116.31 +        if ( pg == NULL )
  116.32 +            goto fail;
  116.33 +        pg->count_info |= PGC_xen_heap;
  116.34 +        d->shared_info = page_to_virt(pg);
  116.35 +#endif
  116.36  
  116.37          clear_page(d->shared_info);
  116.38          share_xen_page_with_guest(
   117.1 --- a/xen/arch/x86/domain_build.c	Wed Jan 28 12:22:58 2009 +0900
   117.2 +++ b/xen/arch/x86/domain_build.c	Wed Jan 28 13:06:45 2009 +0900
   117.3 @@ -19,6 +19,7 @@
   117.4  #include <xen/iocap.h>
   117.5  #include <xen/bitops.h>
   117.6  #include <xen/compat.h>
   117.7 +#include <xen/libelf.h>
   117.8  #include <asm/regs.h>
   117.9  #include <asm/system.h>
  117.10  #include <asm/io.h>
  117.11 @@ -30,7 +31,9 @@
  117.12  #include <asm/e820.h>
  117.13  
  117.14  #include <public/version.h>
  117.15 -#include <public/libelf.h>
  117.16 +
  117.17 +int __init bzimage_parse(
  117.18 +    char *output, char **image_start, unsigned long *image_len);
  117.19  
  117.20  extern unsigned long initial_images_nrpages(void);
  117.21  extern void discard_initial_images(void);
  117.22 @@ -196,7 +199,8 @@ static void __init process_dom0_ioports_
  117.23  
  117.24  int __init construct_dom0(
  117.25      struct domain *d,
  117.26 -    unsigned long _image_start, unsigned long image_len, 
  117.27 +    unsigned long _image_base,
  117.28 +    unsigned long _image_start, unsigned long image_len,
  117.29      unsigned long _initrd_start, unsigned long initrd_len,
  117.30      char *cmdline)
  117.31  {
  117.32 @@ -213,9 +217,11 @@ int __init construct_dom0(
  117.33      struct vcpu *v = d->vcpu[0];
  117.34      unsigned long long value;
  117.35  #if defined(__i386__)
  117.36 +    char *image_base   = (char *)_image_base;   /* use lowmem mappings */
  117.37      char *image_start  = (char *)_image_start;  /* use lowmem mappings */
  117.38      char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
  117.39  #elif defined(__x86_64__)
  117.40 +    char *image_base   = __va(_image_base);
  117.41      char *image_start  = __va(_image_start);
  117.42      char *initrd_start = __va(_initrd_start);
  117.43  #endif
  117.44 @@ -262,6 +268,9 @@ int __init construct_dom0(
  117.45  
  117.46      nr_pages = compute_dom0_nr_pages();
  117.47  
  117.48 +    if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 )
  117.49 +        return rc;
  117.50 +
  117.51      if ( (rc = elf_init(&elf, image_start, image_len)) != 0 )
  117.52          return rc;
  117.53  #ifdef VERBOSE
  117.54 @@ -341,6 +350,12 @@ int __init construct_dom0(
  117.55  #endif
  117.56      }
  117.57  
  117.58 +    if ( (parms.p2m_base != UNSET_ADDR) && elf_32bit(&elf) )
  117.59 +    {
  117.60 +        printk(XENLOG_WARNING "P2M table base ignored\n");
  117.61 +        parms.p2m_base = UNSET_ADDR;
  117.62 +    }
  117.63 +
  117.64      domain_set_alloc_bitsize(d);
  117.65  
  117.66      /*
  117.67 @@ -359,6 +374,8 @@ int __init construct_dom0(
  117.68      vphysmap_end     = vphysmap_start + (nr_pages * (!is_pv_32on64_domain(d) ?
  117.69                                                       sizeof(unsigned long) :
  117.70                                                       sizeof(unsigned int)));
  117.71 +    if ( parms.p2m_base != UNSET_ADDR )
  117.72 +        vphysmap_end = vphysmap_start;
  117.73      vstartinfo_start = round_pgup(vphysmap_end);
  117.74      vstartinfo_end   = (vstartinfo_start +
  117.75                          sizeof(struct start_info) +
  117.76 @@ -400,6 +417,11 @@ int __init construct_dom0(
  117.77      /* Ensure that our low-memory 1:1 mapping covers the allocation. */
  117.78      page = alloc_domheap_pages(d, order, MEMF_bits(30));
  117.79  #else
  117.80 +    if ( parms.p2m_base != UNSET_ADDR )
  117.81 +    {
  117.82 +        vphysmap_start = parms.p2m_base;
  117.83 +        vphysmap_end   = vphysmap_start + nr_pages * sizeof(unsigned long);
  117.84 +    }
  117.85      page = alloc_domheap_pages(d, order, 0);
  117.86  #endif
  117.87      if ( page == NULL )
  117.88 @@ -430,14 +452,6 @@ int __init construct_dom0(
  117.89             _p(v_start), _p(v_end));
  117.90      printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry));
  117.91  
  117.92 -    if ( ((v_end - v_start)>>PAGE_SHIFT) > nr_pages )
  117.93 -    {
  117.94 -        printk("Initial guest OS requires too much space\n"
  117.95 -               "(%luMB is greater than %luMB limit)\n",
  117.96 -               (v_end-v_start)>>20, nr_pages>>(20-PAGE_SHIFT));
  117.97 -        return -ENOMEM;
  117.98 -    }
  117.99 -
 117.100      mpt_alloc = (vpt_start - v_start) +
 117.101          (unsigned long)pfn_to_paddr(alloc_spfn);
 117.102  
 117.103 @@ -748,8 +762,109 @@ int __init construct_dom0(
 117.104      snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%d%s",
 117.105               elf_64bit(&elf) ? 64 : 32, parms.pae ? "p" : "");
 117.106  
 117.107 +    count = d->tot_pages;
 117.108 +#ifdef __x86_64__
 117.109 +    /* Set up the phys->machine table if not part of the initial mapping. */
 117.110 +    if ( parms.p2m_base != UNSET_ADDR )
 117.111 +    {
 117.112 +        unsigned long va = vphysmap_start;
 117.113 +
 117.114 +        if ( v_start <= vphysmap_end && vphysmap_start <= v_end )
 117.115 +            panic("DOM0 P->M table overlaps initial mapping");
 117.116 +
 117.117 +        while ( va < vphysmap_end )
 117.118 +        {
 117.119 +            if ( d->tot_pages + ((round_pgup(vphysmap_end) - va)
 117.120 +                                 >> PAGE_SHIFT) + 3 > nr_pages )
 117.121 +                panic("Dom0 allocation too small for initial P->M table.\n");
 117.122 +
 117.123 +            l4tab = l4start + l4_table_offset(va);
 117.124 +            if ( !l4e_get_intpte(*l4tab) )
 117.125 +            {
 117.126 +                page = alloc_domheap_page(d, 0);
 117.127 +                if ( !page )
 117.128 +                    break;
 117.129 +                /* No mapping, PGC_allocated + page-table page. */
 117.130 +                page->count_info = PGC_allocated | 2;
 117.131 +                page->u.inuse.type_info =
 117.132 +                    PGT_l3_page_table | PGT_validated | 1;
 117.133 +                clear_page(page_to_virt(page));
 117.134 +                *l4tab = l4e_from_page(page, L4_PROT);
 117.135 +            }
 117.136 +            l3tab = page_to_virt(l4e_get_page(*l4tab));
 117.137 +            l3tab += l3_table_offset(va);
 117.138 +            if ( !l3e_get_intpte(*l3tab) )
 117.139 +            {
 117.140 +                if ( cpu_has_page1gb &&
 117.141 +                     !(va & ((1UL << L3_PAGETABLE_SHIFT) - 1)) &&
 117.142 +                     vphysmap_end >= va + (1UL << L3_PAGETABLE_SHIFT) &&
 117.143 +                     (page = alloc_domheap_pages(d,
 117.144 +                                                 L3_PAGETABLE_SHIFT -
 117.145 +                                                     PAGE_SHIFT,
 117.146 +                                                 0)) != NULL )
 117.147 +                {
 117.148 +                    *l3tab = l3e_from_page(page,
 117.149 +                                           L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
 117.150 +                    va += 1UL << L3_PAGETABLE_SHIFT;
 117.151 +                    continue;
 117.152 +                }
 117.153 +                if ( (page = alloc_domheap_page(d, 0)) == NULL )
 117.154 +                    break;
 117.155 +                else
 117.156 +                {
 117.157 +                    /* No mapping, PGC_allocated + page-table page. */
 117.158 +                    page->count_info = PGC_allocated | 2;
 117.159 +                    page->u.inuse.type_info =
 117.160 +                        PGT_l2_page_table | PGT_validated | 1;
 117.161 +                    clear_page(page_to_virt(page));
 117.162 +                    *l3tab = l3e_from_page(page, L3_PROT);
 117.163 +                }
 117.164 +            }
 117.165 +            l2tab = page_to_virt(l3e_get_page(*l3tab));
 117.166 +            l2tab += l2_table_offset(va);
 117.167 +            if ( !l2e_get_intpte(*l2tab) )
 117.168 +            {
 117.169 +                if ( !(va & ((1UL << L2_PAGETABLE_SHIFT) - 1)) &&
 117.170 +                     vphysmap_end >= va + (1UL << L2_PAGETABLE_SHIFT) &&
 117.171 +                     (page = alloc_domheap_pages(d,
 117.172 +                                                 L2_PAGETABLE_SHIFT -
 117.173 +                                                     PAGE_SHIFT,
 117.174 +                                                 0)) != NULL )
 117.175 +                {
 117.176 +                    *l2tab = l2e_from_page(page,
 117.177 +                                           L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
 117.178 +                    va += 1UL << L2_PAGETABLE_SHIFT;
 117.179 +                    continue;
 117.180 +                }
 117.181 +                if ( (page = alloc_domheap_page(d, 0)) == NULL )
 117.182 +                    break;
 117.183 +                else
 117.184 +                {
 117.185 +                    /* No mapping, PGC_allocated + page-table page. */
 117.186 +                    page->count_info = PGC_allocated | 2;
 117.187 +                    page->u.inuse.type_info =
 117.188 +                        PGT_l1_page_table | PGT_validated | 1;
 117.189 +                    clear_page(page_to_virt(page));
 117.190 +                    *l2tab = l2e_from_page(page, L2_PROT);
 117.191 +                }
 117.192 +            }
 117.193 +            l1tab = page_to_virt(l2e_get_page(*l2tab));
 117.194 +            l1tab += l1_table_offset(va);
 117.195 +            BUG_ON(l1e_get_intpte(*l1tab));
 117.196 +            page = alloc_domheap_page(d, 0);
 117.197 +            if ( !page )
 117.198 +                break;
 117.199 +            *l1tab = l1e_from_page(page, L1_PROT|_PAGE_DIRTY);
 117.200 +            va += PAGE_SIZE;
 117.201 +            va &= PAGE_MASK;
 117.202 +        }
 117.203 +        if ( !page )
 117.204 +            panic("Not enough RAM for DOM0 P->M table.\n");
 117.205 +    }
 117.206 +#endif
 117.207 +
 117.208      /* Write the phys->machine and machine->phys table entries. */
 117.209 -    for ( pfn = 0; pfn < d->tot_pages; pfn++ )
 117.210 +    for ( pfn = 0; pfn < count; pfn++ )
 117.211      {
 117.212          mfn = pfn + alloc_spfn;
 117.213  #ifndef NDEBUG
 117.214 @@ -763,6 +878,26 @@ int __init construct_dom0(
 117.215              ((unsigned int *)vphysmap_start)[pfn] = mfn;
 117.216          set_gpfn_from_mfn(mfn, pfn);
 117.217      }
 117.218 +    si->first_p2m_pfn = pfn;
 117.219 +    si->nr_p2m_frames = d->tot_pages - count;
 117.220 +    list_for_each_entry ( page, &d->page_list, list )
 117.221 +    {
 117.222 +        mfn = page_to_mfn(page);
 117.223 +        if ( get_gpfn_from_mfn(mfn) >= count )
 117.224 +        {
 117.225 +            BUG_ON(is_pv_32bit_domain(d));
 117.226 +            if ( !page->u.inuse.type_info &&
 117.227 +                 !get_page_and_type(page, d, PGT_writable_page) )
 117.228 +                BUG();
 117.229 +            ((unsigned long *)vphysmap_start)[pfn] = mfn;
 117.230 +            set_gpfn_from_mfn(mfn, pfn);
 117.231 +            ++pfn;
 117.232 +#ifndef NDEBUG
 117.233 +            ++alloc_epfn;
 117.234 +#endif
 117.235 +        }
 117.236 +    }
 117.237 +    BUG_ON(pfn != d->tot_pages);
 117.238      while ( pfn < nr_pages )
 117.239      {
 117.240          if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL )
   118.1 --- a/xen/arch/x86/hvm/hvm.c	Wed Jan 28 12:22:58 2009 +0900
   118.2 +++ b/xen/arch/x86/hvm/hvm.c	Wed Jan 28 13:06:45 2009 +0900
   118.3 @@ -20,6 +20,7 @@
   118.4   */
   118.5  
   118.6  #include <xen/config.h>
   118.7 +#include <xen/ctype.h>
   118.8  #include <xen/init.h>
   118.9  #include <xen/lib.h>
  118.10  #include <xen/trace.h>
  118.11 @@ -273,6 +274,10 @@ static int hvm_print_line(
  118.12  
  118.13      BUG_ON(bytes != 1);
  118.14  
  118.15 +    /* Accept only printable characters, newline, and horizontal tab. */
  118.16 +    if ( !isprint(c) && (c != '\n') && (c != '\t') )
  118.17 +        return X86EMUL_OKAY;
  118.18 +
  118.19      spin_lock(&hd->pbuf_lock);
  118.20      hd->pbuf[hd->pbuf_idx++] = c;
  118.21      if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') )
  118.22 @@ -1503,7 +1508,15 @@ static enum hvm_copy_result __hvm_copy(
  118.23  
  118.24          if ( flags & HVMCOPY_to_guest )
  118.25          {
  118.26 -            if ( p2mt != p2m_ram_ro )
  118.27 +            if ( p2mt == p2m_ram_ro )
  118.28 +            {
  118.29 +                static unsigned long lastpage;
  118.30 +                if ( xchg(&lastpage, gfn) != gfn )
  118.31 +                    gdprintk(XENLOG_DEBUG, "guest attempted write to read-only"
  118.32 +                             " memory page. gfn=%#lx, mfn=%#lx\n",
  118.33 +                             gfn, mfn);
  118.34 +            }
  118.35 +            else
  118.36              {
  118.37                  memcpy(p, buf, count);
  118.38                  paging_mark_dirty(curr->domain, mfn);
   119.1 --- a/xen/arch/x86/hvm/mtrr.c	Wed Jan 28 12:22:58 2009 +0900
   119.2 +++ b/xen/arch/x86/hvm/mtrr.c	Wed Jan 28 13:06:45 2009 +0900
   119.3 @@ -702,12 +702,15 @@ HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save
   119.4                            1, HVMSR_PER_VCPU);
   119.5  
   119.6  uint8_t epte_get_entry_emt(
   119.7 -    struct domain *d, unsigned long gfn, unsigned long mfn)
   119.8 +    struct domain *d, unsigned long gfn, 
   119.9 +    unsigned long mfn, uint8_t *igmt, int direct_mmio)
  119.10  {
  119.11      uint8_t gmtrr_mtype, hmtrr_mtype;
  119.12      uint32_t type;
  119.13      struct vcpu *v = current;
  119.14  
  119.15 +    *igmt = 0;
  119.16 +
  119.17      if ( (current->domain != d) && ((v = d->vcpu[0]) == NULL) )
  119.18          return MTRR_TYPE_WRBACK;
  119.19  
  119.20 @@ -723,6 +726,21 @@ uint8_t epte_get_entry_emt(
  119.21      if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) )
  119.22          return type;
  119.23  
  119.24 +    if ( !iommu_enabled )
  119.25 +    {
  119.26 +        *igmt = 1;
  119.27 +        return MTRR_TYPE_WRBACK;
  119.28 +    }
  119.29 +
  119.30 +    if ( direct_mmio )
  119.31 +        return MTRR_TYPE_UNCACHABLE;
  119.32 +
  119.33 +    if ( iommu_snoop )
  119.34 +    {
  119.35 +        *igmt = 1;
  119.36 +        return MTRR_TYPE_WRBACK;
  119.37 +    }
  119.38 +
  119.39      gmtrr_mtype = get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT));
  119.40      hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn << PAGE_SHIFT));
  119.41      return ((gmtrr_mtype <= hmtrr_mtype) ? gmtrr_mtype : hmtrr_mtype);
   120.1 --- a/xen/arch/x86/hvm/vmsi.c	Wed Jan 28 12:22:58 2009 +0900
   120.2 +++ b/xen/arch/x86/hvm/vmsi.c	Wed Jan 28 13:06:45 2009 +0900
   120.3 @@ -134,7 +134,7 @@ int vmsi_deliver(struct domain *d, int p
   120.4                  "vector=%x trig_mode=%x\n",
   120.5                  dest, dest_mode, delivery_mode, vector, trig_mode);
   120.6  
   120.7 -    if ( !test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags) )
   120.8 +    if ( !( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_GUEST_MSI ) )
   120.9      {
  120.10          gdprintk(XENLOG_WARNING, "pirq %x not msi \n", pirq);
  120.11          return 0;
   121.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Wed Jan 28 12:22:58 2009 +0900
   121.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Wed Jan 28 13:06:45 2009 +0900
   121.3 @@ -167,14 +167,15 @@ static void vmx_init_vmcs_config(void)
   121.4  #endif
   121.5  
   121.6      min = VM_EXIT_ACK_INTR_ON_EXIT;
   121.7 -    opt = 0;
   121.8 +    opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT;
   121.9  #ifdef __x86_64__
  121.10      min |= VM_EXIT_IA32E_MODE;
  121.11  #endif
  121.12      _vmx_vmexit_control = adjust_vmx_controls(
  121.13          min, opt, MSR_IA32_VMX_EXIT_CTLS);
  121.14  
  121.15 -    min = opt = 0;
  121.16 +    min = 0;
  121.17 +    opt = VM_ENTRY_LOAD_GUEST_PAT;
  121.18      _vmx_vmentry_control = adjust_vmx_controls(
  121.19          min, opt, MSR_IA32_VMX_ENTRY_CTLS);
  121.20  
  121.21 @@ -519,8 +520,6 @@ static int construct_vmcs(struct vcpu *v
  121.22  
  121.23      /* VMCS controls. */
  121.24      __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
  121.25 -    __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
  121.26 -    __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
  121.27  
  121.28      v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
  121.29      v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
  121.30 @@ -534,12 +533,18 @@ static int construct_vmcs(struct vcpu *v
  121.31      else
  121.32      {
  121.33          v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
  121.34 +        vmx_vmexit_control &= ~(VM_EXIT_SAVE_GUEST_PAT |
  121.35 +                                VM_EXIT_LOAD_HOST_PAT);
  121.36 +        vmx_vmentry_control &= ~VM_ENTRY_LOAD_GUEST_PAT;
  121.37      }
  121.38  
  121.39      /* Do not enable Monitor Trap Flag unless start single step debug */
  121.40      v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
  121.41  
  121.42      __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
  121.43 +    __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
  121.44 +    __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
  121.45 +
  121.46      if ( cpu_has_vmx_secondary_exec_control )
  121.47          __vmwrite(SECONDARY_VM_EXEC_CONTROL,
  121.48                    v->arch.hvm_vmx.secondary_exec_control);
  121.49 @@ -561,6 +566,8 @@ static int construct_vmcs(struct vcpu *v
  121.50          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
  121.51          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
  121.52          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
  121.53 +        if ( cpu_has_vmx_pat && paging_mode_hap(d) )
  121.54 +            vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT);
  121.55      }
  121.56  
  121.57      /* I/O access bitmap. */
  121.58 @@ -692,6 +699,21 @@ static int construct_vmcs(struct vcpu *v
  121.59          __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid);
  121.60      }
  121.61  
  121.62 +    if ( cpu_has_vmx_pat && paging_mode_hap(d) )
  121.63 +    {
  121.64 +        u64 host_pat, guest_pat;
  121.65 +
  121.66 +        rdmsrl(MSR_IA32_CR_PAT, host_pat);
  121.67 +        guest_pat = 0x7040600070406ULL;
  121.68 +
  121.69 +        __vmwrite(HOST_PAT, host_pat);
  121.70 +        __vmwrite(GUEST_PAT, guest_pat);
  121.71 +#ifdef __i386__
  121.72 +        __vmwrite(HOST_PAT_HIGH, host_pat >> 32);
  121.73 +        __vmwrite(GUEST_PAT_HIGH, guest_pat >> 32);
  121.74 +#endif
  121.75 +    }
  121.76 +
  121.77      vmx_vmcs_exit(v);
  121.78  
  121.79      paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
  121.80 @@ -989,6 +1011,8 @@ void vmcs_dump_vcpu(struct vcpu *v)
  121.81      vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR);
  121.82      vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT);
  121.83      vmx_dump_sel("TR", GUEST_TR_SELECTOR);
  121.84 +    printk("Guest PAT = 0x%08x%08x\n",
  121.85 +           (uint32_t)vmr(GUEST_PAT_HIGH), (uint32_t)vmr(GUEST_PAT));
  121.86      x  = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32;
  121.87      x |= (uint32_t)vmr(TSC_OFFSET);
  121.88      printk("TSC Offset = %016llx\n", x);
  121.89 @@ -1027,6 +1051,8 @@ void vmcs_dump_vcpu(struct vcpu *v)
  121.90             (unsigned long long)vmr(HOST_SYSENTER_ESP),
  121.91             (int)vmr(HOST_SYSENTER_CS),
  121.92             (unsigned long long)vmr(HOST_SYSENTER_EIP));
  121.93 +    printk("Host PAT = 0x%08x%08x\n",
  121.94 +           (uint32_t)vmr(HOST_PAT_HIGH), (uint32_t)vmr(HOST_PAT));
  121.95  
  121.96      printk("*** Control State ***\n");
  121.97      printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
   122.1 --- a/xen/arch/x86/io_apic.c	Wed Jan 28 12:22:58 2009 +0900
   122.2 +++ b/xen/arch/x86/io_apic.c	Wed Jan 28 13:06:45 2009 +0900
   122.3 @@ -84,7 +84,9 @@ int disable_timer_pin_1 __initdata;
   122.4  
   122.5  static struct irq_pin_list {
   122.6      int apic, pin, next;
   122.7 -} irq_2_pin[PIN_MAP_SIZE];
   122.8 +} irq_2_pin[PIN_MAP_SIZE] = {
   122.9 +    [0 ... PIN_MAP_SIZE-1].pin = -1
  122.10 +};
  122.11  static int irq_2_pin_free_entry = NR_IRQS;
  122.12  
  122.13  int vector_irq[NR_VECTORS] __read_mostly = {
  122.14 @@ -1018,11 +1020,6 @@ static void __init enable_IO_APIC(void)
  122.15      int i, apic;
  122.16      unsigned long flags;
  122.17  
  122.18 -    for (i = 0; i < PIN_MAP_SIZE; i++) {
  122.19 -        irq_2_pin[i].pin = -1;
  122.20 -        irq_2_pin[i].next = 0;
  122.21 -    }
  122.22 -
  122.23      /* Initialise dynamic irq_2_pin free list. */
  122.24      for (i = NR_IRQS; i < PIN_MAP_SIZE; i++)
  122.25          irq_2_pin[i].next = i + 1;
  122.26 @@ -1557,11 +1554,14 @@ static unsigned int startup_msi_vector(u
  122.27  
  122.28  static void ack_msi_vector(unsigned int vector)
  122.29  {
  122.30 -    ack_APIC_irq();
  122.31 +    if ( msi_maskable_irq(irq_desc[vector].msi_desc) )
  122.32 +        ack_APIC_irq(); /* ACKTYPE_NONE */
  122.33  }
  122.34  
  122.35  static void end_msi_vector(unsigned int vector)
  122.36  {
  122.37 +    if ( !msi_maskable_irq(irq_desc[vector].msi_desc) )
  122.38 +        ack_APIC_irq(); /* ACKTYPE_EOI */
  122.39  }
  122.40  
  122.41  static void shutdown_msi_vector(unsigned int vector)
   123.1 --- a/xen/arch/x86/irq.c	Wed Jan 28 12:22:58 2009 +0900
   123.2 +++ b/xen/arch/x86/irq.c	Wed Jan 28 13:06:45 2009 +0900
   123.3 @@ -491,7 +491,7 @@ int pirq_guest_unmask(struct domain *d)
   123.4  }
   123.5  
   123.6  extern int ioapic_ack_new;
   123.7 -int pirq_acktype(struct domain *d, int irq)
   123.8 +static int pirq_acktype(struct domain *d, int irq)
   123.9  {
  123.10      irq_desc_t  *desc;
  123.11      unsigned int vector;
  123.12 @@ -705,6 +705,10 @@ static irq_guest_action_t *__pirq_guest_
  123.13              spin_lock_irq(&desc->lock);
  123.14          }
  123.15          break;
  123.16 +    case ACKTYPE_NONE:
  123.17 +        stop_timer(&irq_guest_eoi_timer[vector]);
  123.18 +        _irq_guest_eoi(desc);
  123.19 +        break;
  123.20      }
  123.21  
  123.22      /*
  123.23 @@ -853,10 +857,6 @@ int map_domain_pirq(
  123.24      ASSERT(spin_is_locked(&pcidevs_lock));
  123.25      ASSERT(spin_is_locked(&d->event_lock));
  123.26  
  123.27 -    /* XXX Until pcidev and msi locking is fixed. */
  123.28 -    if ( type == MAP_PIRQ_TYPE_MSI )
  123.29 -        return -EINVAL;
  123.30 -
  123.31      if ( !IS_PRIV(current->domain) )
  123.32          return -EPERM;
  123.33  
  123.34 @@ -867,8 +867,8 @@ int map_domain_pirq(
  123.35          return -EINVAL;
  123.36      }
  123.37  
  123.38 -    old_vector = d->arch.pirq_vector[pirq];
  123.39 -    old_pirq = d->arch.vector_pirq[vector];
  123.40 +    old_vector = domain_irq_to_vector(d, pirq);
  123.41 +    old_pirq = domain_vector_to_irq(d, vector);
  123.42  
  123.43      if ( (old_vector && (old_vector != vector) ) ||
  123.44           (old_pirq && (old_pirq != pirq)) )
  123.45 @@ -892,6 +892,10 @@ int map_domain_pirq(
  123.46      {
  123.47          struct msi_info *msi = (struct msi_info *)data;
  123.48  
  123.49 +        ret = -ENODEV;
  123.50 +        if ( !cpu_has_apic )
  123.51 +            goto done;
  123.52 +
  123.53          pdev = pci_get_pdev(msi->bus, msi->devfn);
  123.54          ret = pci_enable_msi(msi, &msi_desc);
  123.55          if ( ret )
  123.56 @@ -937,7 +941,7 @@ int unmap_domain_pirq(struct domain *d, 
  123.57      ASSERT(spin_is_locked(&pcidevs_lock));
  123.58      ASSERT(spin_is_locked(&d->event_lock));
  123.59  
  123.60 -    vector = d->arch.pirq_vector[pirq];
  123.61 +    vector = domain_irq_to_vector(d, pirq);
  123.62      if ( vector <= 0 )
  123.63      {
  123.64          dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
  123.65 @@ -958,7 +962,7 @@ int unmap_domain_pirq(struct domain *d, 
  123.66  
  123.67      spin_lock_irqsave(&desc->lock, flags);
  123.68  
  123.69 -    BUG_ON(vector != d->arch.pirq_vector[pirq]);
  123.70 +    BUG_ON(vector != domain_irq_to_vector(d, pirq));
  123.71  
  123.72      if ( msi_desc )
  123.73          teardown_msi_vector(vector);
   124.1 --- a/xen/arch/x86/machine_kexec.c	Wed Jan 28 12:22:58 2009 +0900
   124.2 +++ b/xen/arch/x86/machine_kexec.c	Wed Jan 28 13:06:45 2009 +0900
   124.3 @@ -150,6 +150,9 @@ void arch_crash_save_vmcoreinfo(void)
   124.4  	VMCOREINFO_SYMBOL(dom_xen);
   124.5  	VMCOREINFO_SYMBOL(dom_io);
   124.6  
   124.7 +#ifdef CONFIG_X86_32
   124.8 +    VMCOREINFO_SYMBOL(xenheap_phys_end);
   124.9 +#endif
  124.10  #ifdef CONFIG_X86_PAE
  124.11  	VMCOREINFO_SYMBOL_ALIAS(pgd_l3, idle_pg_table);
  124.12  #endif
   125.1 --- a/xen/arch/x86/microcode.c	Wed Jan 28 12:22:58 2009 +0900
   125.2 +++ b/xen/arch/x86/microcode.c	Wed Jan 28 13:06:45 2009 +0900
   125.3 @@ -49,31 +49,22 @@ struct microcode_info {
   125.4      char buffer[1];
   125.5  };
   125.6  
   125.7 -static void microcode_fini_cpu(int cpu)
   125.8 +static void __microcode_fini_cpu(int cpu)
   125.9  {
  125.10      struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
  125.11  
  125.12 +    xfree(uci->mc.mc_valid);
  125.13 +    memset(uci, 0, sizeof(*uci));
  125.14 +}
  125.15 +
  125.16 +static void microcode_fini_cpu(int cpu)
  125.17 +{
  125.18      spin_lock(&microcode_mutex);
  125.19 -    xfree(uci->mc.valid_mc);
  125.20 -    uci->mc.valid_mc = NULL;
  125.21 -    uci->valid = 0;
  125.22 +    __microcode_fini_cpu(cpu);
  125.23      spin_unlock(&microcode_mutex);
  125.24  }
  125.25  
  125.26 -static int collect_cpu_info(int cpu)
  125.27 -{
  125.28 -    int err = 0;
  125.29 -    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
  125.30 -
  125.31 -    memset(uci, 0, sizeof(*uci));
  125.32 -    err = microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
  125.33 -    if ( !err )
  125.34 -        uci->valid = 1;
  125.35 -
  125.36 -    return err;
  125.37 -}
  125.38 -
  125.39 -static int microcode_resume_cpu(int cpu)
  125.40 +int microcode_resume_cpu(int cpu)
  125.41  {
  125.42      int err = 0;
  125.43      struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
  125.44 @@ -81,7 +72,7 @@ static int microcode_resume_cpu(int cpu)
  125.45  
  125.46      gdprintk(XENLOG_INFO, "microcode: CPU%d resumed\n", cpu);
  125.47  
  125.48 -    if ( !uci->mc.valid_mc )
  125.49 +    if ( !uci->mc.mc_valid )
  125.50          return -EIO;
  125.51  
  125.52      /*
  125.53 @@ -95,16 +86,15 @@ static int microcode_resume_cpu(int cpu)
  125.54          return err;
  125.55      }
  125.56  
  125.57 -    if ( memcmp(&nsig, &uci->cpu_sig, sizeof(nsig)) )
  125.58 +    if ( microcode_ops->microcode_resume_match(cpu, &nsig) )
  125.59 +    {
  125.60 +        return microcode_ops->apply_microcode(cpu);
  125.61 +    }
  125.62 +    else
  125.63      {
  125.64          microcode_fini_cpu(cpu);
  125.65 -        /* Should we look for a new ucode here? */
  125.66          return -EIO;
  125.67      }
  125.68 -
  125.69 -    err = microcode_ops->apply_microcode(cpu);
  125.70 -
  125.71 -    return err;
  125.72  }
  125.73  
  125.74  static int microcode_update_cpu(const void *buf, size_t size)
  125.75 @@ -115,20 +105,11 @@ static int microcode_update_cpu(const vo
  125.76  
  125.77      spin_lock(&microcode_mutex);
  125.78  
  125.79 -    /*
  125.80 -     * Check if the system resume is in progress (uci->valid != NULL),
  125.81 -     * otherwise just request a firmware:
  125.82 -     */
  125.83 -    if ( uci->valid )
  125.84 -    {
  125.85 -        err = microcode_resume_cpu(cpu);
  125.86 -    }
  125.87 +    err = microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
  125.88 +    if ( likely(!err) )
  125.89 +        err = microcode_ops->cpu_request_microcode(cpu, buf, size);
  125.90      else
  125.91 -    {
  125.92 -        err = collect_cpu_info(cpu);
  125.93 -        if ( !err && uci->valid )
  125.94 -            err = microcode_ops->cpu_request_microcode(cpu, buf, size);
  125.95 -    }
  125.96 +        __microcode_fini_cpu(cpu);
  125.97  
  125.98      spin_unlock(&microcode_mutex);
  125.99  
 125.100 @@ -153,7 +134,6 @@ static long do_microcode_update(void *_i
 125.101      error = info->error;
 125.102      xfree(info);
 125.103      return error;
 125.104 -
 125.105  }
 125.106  
 125.107  int microcode_update(XEN_GUEST_HANDLE(const_void) buf, unsigned long len)
   126.1 --- a/xen/arch/x86/microcode_amd.c	Wed Jan 28 12:22:58 2009 +0900
   126.2 +++ b/xen/arch/x86/microcode_amd.c	Wed Jan 28 13:06:45 2009 +0900
   126.3 @@ -38,21 +38,16 @@
   126.4  #define MC_HEADER_SIZE          (sizeof(struct microcode_header_amd))
   126.5  #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
   126.6  #define DWSIZE                  (sizeof(uint32_t))
   126.7 -/* For now we support a fixed ucode total size only */
   126.8 -#define get_totalsize(mc) \
   126.9 -        ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
  126.10 -         + MC_HEADER_SIZE)
  126.11  
  126.12  /* serialize access to the physical write */
  126.13  static DEFINE_SPINLOCK(microcode_update_lock);
  126.14  
  126.15  struct equiv_cpu_entry *equiv_cpu_table;
  126.16  
  126.17 -static long install_equiv_cpu_table(const void *, uint32_t, long);
  126.18 -
  126.19  static int collect_cpu_info(int cpu, struct cpu_signature *csig)
  126.20  {
  126.21      struct cpuinfo_x86 *c = &cpu_data[cpu];
  126.22 +    uint32_t dummy;
  126.23  
  126.24      memset(csig, 0, sizeof(*csig));
  126.25  
  126.26 @@ -60,13 +55,10 @@ static int collect_cpu_info(int cpu, str
  126.27      {
  126.28          printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n",
  126.29                 cpu);
  126.30 -        return -1;
  126.31 +        return -EINVAL;
  126.32      }
  126.33  
  126.34 -    asm volatile (
  126.35 -        "movl %1, %%ecx; rdmsr"
  126.36 -        : "=a" (csig->rev)
  126.37 -        : "i" (MSR_AMD_PATCHLEVEL) : "ecx" );
  126.38 +    rdmsr(MSR_AMD_PATCHLEVEL, csig->rev, dummy);
  126.39  
  126.40      printk(KERN_INFO "microcode: collect_cpu_info: patch_id=0x%x\n",
  126.41             csig->rev);
  126.42 @@ -74,29 +66,17 @@ static int collect_cpu_info(int cpu, str
  126.43      return 0;
  126.44  }
  126.45  
  126.46 -static int get_matching_microcode(void *mc, int cpu)
  126.47 +static int microcode_fits(void *mc, int cpu)
  126.48  {
  126.49      struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
  126.50      struct microcode_header_amd *mc_header = mc;
  126.51 -    unsigned long total_size = get_totalsize(mc_header);
  126.52 -    void *new_mc;
  126.53      unsigned int current_cpu_id;
  126.54 -    unsigned int equiv_cpu_id = 0x00;
  126.55 +    unsigned int equiv_cpu_id = 0x0;
  126.56      unsigned int i;
  126.57  
  126.58      /* We should bind the task to the CPU */
  126.59      BUG_ON(cpu != raw_smp_processor_id());
  126.60  
  126.61 -    /* This is a tricky part. We might be called from a write operation
  126.62 -     * to the device file instead of the usual process of firmware
  126.63 -     * loading. This routine needs to be able to distinguish both
  126.64 -     * cases. This is done by checking if there already is a equivalent
  126.65 -     * CPU table installed. If not, we're written through
  126.66 -     * /dev/cpu/microcode.
  126.67 -     * Since we ignore all checks. The error case in which going through
  126.68 -     * firmware loading and that table is not loaded has already been
  126.69 -     * checked earlier.
  126.70 -     */
  126.71      if ( equiv_cpu_table == NULL )
  126.72      {
  126.73          printk(KERN_INFO "microcode: CPU%d microcode update with "
  126.74 @@ -111,7 +91,7 @@ static int get_matching_microcode(void *
  126.75      {
  126.76          if ( current_cpu_id == equiv_cpu_table[i].installed_cpu )
  126.77          {
  126.78 -            equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
  126.79 +            equiv_cpu_id = equiv_cpu_table[i].equiv_cpu & 0xffff;
  126.80              break;
  126.81          }
  126.82      }
  126.83 @@ -119,171 +99,136 @@ static int get_matching_microcode(void *
  126.84      if ( !equiv_cpu_id )
  126.85      {
  126.86          printk(KERN_ERR "microcode: CPU%d cpu_id "
  126.87 -               "not found in equivalent cpu table \n", cpu);
  126.88 -        return 0;
  126.89 +               "not found in equivalent cpu table\n", cpu);
  126.90 +        return -EINVAL;
  126.91      }
  126.92  
  126.93 -    if ( (mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff) )
  126.94 -    {
  126.95 -        printk(KERN_INFO
  126.96 -               "microcode: CPU%d patch does not match "
  126.97 -               "(patch is %x, cpu extended is %x) \n",
  126.98 -               cpu, mc_header->processor_rev_id[0],
  126.99 -               (equiv_cpu_id & 0xff));
 126.100 -        return 0;
 126.101 -    }
 126.102 -
 126.103 -    if ( (mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff) )
 126.104 +    if ( (mc_header->processor_rev_id) != equiv_cpu_id )
 126.105      {
 126.106          printk(KERN_INFO "microcode: CPU%d patch does not match "
 126.107                 "(patch is %x, cpu base id is %x) \n",
 126.108 -               cpu, mc_header->processor_rev_id[1],
 126.109 -               ((equiv_cpu_id >> 16) & 0xff));
 126.110 -        return 0;
 126.111 +               cpu, mc_header->processor_rev_id, equiv_cpu_id);
 126.112 +        return -EINVAL;
 126.113      }
 126.114  
 126.115      if ( mc_header->patch_id <= uci->cpu_sig.rev )
 126.116 -        return 0;
 126.117 +        return -EINVAL;
 126.118  
 126.119      printk(KERN_INFO "microcode: CPU%d found a matching microcode "
 126.120             "update with version 0x%x (current=0x%x)\n",
 126.121             cpu, mc_header->patch_id, uci->cpu_sig.rev);
 126.122  
 126.123 - out:
 126.124 -    new_mc = xmalloc_bytes(UCODE_MAX_SIZE);
 126.125 -    if ( new_mc == NULL )
 126.126 -    {
 126.127 -        printk(KERN_ERR "microcode: error, can't allocate memory\n");
 126.128 -        return -ENOMEM;
 126.129 -    }
 126.130 -    memset(new_mc, 0, UCODE_MAX_SIZE);
 126.131 -
 126.132 -    /* free previous update file */
 126.133 -    xfree(uci->mc.mc_amd);
 126.134 -
 126.135 -    memcpy(new_mc, mc, total_size);
 126.136 -
 126.137 -    uci->mc.mc_amd = new_mc;
 126.138 -    return 1;
 126.139 +out:
 126.140 +    return 0;
 126.141  }
 126.142  
 126.143  static int apply_microcode(int cpu)
 126.144  {
 126.145      unsigned long flags;
 126.146 -    uint32_t eax, edx, rev;
 126.147 -    int cpu_num = raw_smp_processor_id();
 126.148 -    struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 126.149 -    uint64_t addr;
 126.150 +    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 126.151 +    uint32_t rev, dummy;
 126.152 +    struct microcode_amd *mc_amd = uci->mc.mc_amd;
 126.153  
 126.154      /* We should bind the task to the CPU */
 126.155 -    BUG_ON(cpu_num != cpu);
 126.156 +    BUG_ON(raw_smp_processor_id() != cpu);
 126.157  
 126.158 -    if ( uci->mc.mc_amd == NULL )
 126.159 +    if ( mc_amd == NULL )
 126.160          return -EINVAL;
 126.161  
 126.162      spin_lock_irqsave(&microcode_update_lock, flags);
 126.163  
 126.164 -    addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code;
 126.165 -    edx = (uint32_t)(addr >> 32);
 126.166 -    eax = (uint32_t)addr;
 126.167 -
 126.168 -    asm volatile (
 126.169 -        "movl %0, %%ecx; wrmsr" :
 126.170 -        : "i" (MSR_AMD_PATCHLOADER), "a" (eax), "d" (edx) : "ecx" );
 126.171 +    wrmsrl(MSR_AMD_PATCHLOADER, (unsigned long)&mc_amd->hdr.data_code);
 126.172  
 126.173      /* get patch id after patching */
 126.174 -    asm volatile (
 126.175 -        "movl %1, %%ecx; rdmsr"
 126.176 -        : "=a" (rev)
 126.177 -        : "i" (MSR_AMD_PATCHLEVEL) : "ecx");
 126.178 +    rdmsr(MSR_AMD_PATCHLEVEL, rev, dummy);
 126.179  
 126.180      spin_unlock_irqrestore(&microcode_update_lock, flags);
 126.181  
 126.182      /* check current patch id and patch's id for match */
 126.183 -    if ( rev != uci->mc.mc_amd->hdr.patch_id )
 126.184 +    if ( rev != mc_amd->hdr.patch_id )
 126.185      {
 126.186          printk(KERN_ERR "microcode: CPU%d update from revision "
 126.187 -               "0x%x to 0x%x failed\n", cpu_num,
 126.188 -               uci->mc.mc_amd->hdr.patch_id, rev);
 126.189 +               "0x%x to 0x%x failed\n", cpu,
 126.190 +               mc_amd->hdr.patch_id, rev);
 126.191          return -EIO;
 126.192      }
 126.193  
 126.194      printk("microcode: CPU%d updated from revision "
 126.195             "0x%x to 0x%x \n",
 126.196 -           cpu_num, uci->cpu_sig.rev, uci->mc.mc_amd->hdr.patch_id);
 126.197 +           cpu, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
 126.198  
 126.199      uci->cpu_sig.rev = rev;
 126.200  
 126.201      return 0;
 126.202  }
 126.203  
 126.204 -static long get_next_ucode_from_buffer_amd(void **mc, const void *buf,
 126.205 -                                           unsigned long size, long offset)
 126.206 +static int get_next_ucode_from_buffer_amd(void *mc, const void *buf,
 126.207 +                                         size_t size, unsigned long *offset)
 126.208  {
 126.209      struct microcode_header_amd *mc_header;
 126.210 -    unsigned long total_size;
 126.211 -    const uint8_t *buf_pos = buf;
 126.212 +    size_t total_size;
 126.213 +    const uint8_t *bufp = buf;
 126.214 +    unsigned long off;
 126.215 +
 126.216 +    off = *offset;
 126.217  
 126.218      /* No more data */
 126.219 -    if ( offset >= size )
 126.220 -        return 0;
 126.221 +    if ( off >= size )
 126.222 +        return 1;
 126.223  
 126.224 -    if ( buf_pos[offset] != UCODE_UCODE_TYPE )
 126.225 +    if ( bufp[off] != UCODE_UCODE_TYPE )
 126.226      {
 126.227          printk(KERN_ERR "microcode: error! "
 126.228                 "Wrong microcode payload type field\n");
 126.229          return -EINVAL;
 126.230      }
 126.231  
 126.232 -    mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]);
 126.233 +    mc_header = (struct microcode_header_amd *)(&bufp[off+8]);
 126.234  
 126.235 -    total_size = (unsigned long) (buf_pos[offset+4] +
 126.236 -                                  (buf_pos[offset+5] << 8));
 126.237 +    total_size = (unsigned long) (bufp[off+4] + (bufp[off+5] << 8));
 126.238  
 126.239      printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n",
 126.240 -           size, total_size, offset);
 126.241 +           (unsigned long)size, total_size, off);
 126.242  
 126.243 -    if ( (offset + total_size) > size )
 126.244 +    if ( (off + total_size) > size )
 126.245      {
 126.246          printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
 126.247          return -EINVAL;
 126.248      }
 126.249  
 126.250 -    *mc = xmalloc_bytes(UCODE_MAX_SIZE);
 126.251 -    if ( *mc == NULL )
 126.252 -    {
 126.253 -        printk(KERN_ERR "microcode: error! "
 126.254 -               "Can not allocate memory for microcode patch\n");
 126.255 -        return -ENOMEM;
 126.256 -    }
 126.257 +    memset(mc, 0, UCODE_MAX_SIZE);
 126.258 +    memcpy(mc, (const void *)(&bufp[off + 8]), total_size);
 126.259  
 126.260 -    memset(*mc, 0, UCODE_MAX_SIZE);
 126.261 -    memcpy(*mc, (const void *)(buf + offset + 8), total_size);
 126.262 +    *offset = off + total_size + 8;
 126.263  
 126.264 -    return offset + total_size + 8;
 126.265 +    return 0;
 126.266  }
 126.267  
 126.268 -static long install_equiv_cpu_table(const void *buf,
 126.269 -                                    uint32_t size, long offset)
 126.270 +static int install_equiv_cpu_table(const void *buf, uint32_t size,
 126.271 +                                   unsigned long *offset)
 126.272  {
 126.273      const uint32_t *buf_pos = buf;
 126.274 +    unsigned long off;
 126.275 +
 126.276 +    off = *offset;
 126.277 +    *offset = 0;
 126.278  
 126.279      /* No more data */
 126.280 -    if ( offset >= size )
 126.281 -        return 0;
 126.282 +    if ( off >= size )
 126.283 +        return -EINVAL;
 126.284  
 126.285      if ( buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE )
 126.286      {
 126.287          printk(KERN_ERR "microcode: error! "
 126.288 -               "Wrong microcode equivalnet cpu table type field\n");
 126.289 -        return 0;
 126.290 +               "Wrong microcode equivalent cpu table type field\n");
 126.291 +        return -EINVAL;
 126.292      }
 126.293  
 126.294      if ( size == 0 )
 126.295      {
 126.296          printk(KERN_ERR "microcode: error! "
 126.297                 "Wrong microcode equivalnet cpu table length\n");
 126.298 -        return 0;
 126.299 +        return -EINVAL;
 126.300      }
 126.301  
 126.302      equiv_cpu_table = xmalloc_bytes(size);
 126.303 @@ -291,20 +236,24 @@ static long install_equiv_cpu_table(cons
 126.304      {
 126.305          printk(KERN_ERR "microcode: error, can't allocate "
 126.306                 "memory for equiv CPU table\n");
 126.307 -        return 0;
 126.308 +        return -ENOMEM;
 126.309      }
 126.310  
 126.311      memset(equiv_cpu_table, 0, size);
 126.312      memcpy(equiv_cpu_table, (const void *)&buf_pos[3], size);
 126.313  
 126.314 -    return size + 12; /* add header length */
 126.315 +    *offset = size + 12;	/* add header length */
 126.316 +
 126.317 +    return 0;
 126.318  }
 126.319  
 126.320  static int cpu_request_microcode(int cpu, const void *buf, size_t size)
 126.321  {
 126.322      const uint32_t *buf_pos;
 126.323 -    long offset = 0;
 126.324 +    unsigned long offset = 0;
 126.325      int error = 0;
 126.326 +    int ret;
 126.327 +    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 126.328      void *mc;
 126.329  
 126.330      /* We should bind the task to the CPU */
 126.331 @@ -319,41 +268,63 @@ static int cpu_request_microcode(int cpu
 126.332          return -EINVAL;
 126.333      }
 126.334  
 126.335 -    offset = install_equiv_cpu_table(buf, (uint32_t)(buf_pos[2]), offset);
 126.336 -    if ( !offset )
 126.337 +    error = install_equiv_cpu_table(buf, (uint32_t)(buf_pos[2]), &offset);
 126.338 +    if ( error )
 126.339      {
 126.340          printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
 126.341          return -EINVAL;
 126.342      }
 126.343  
 126.344 -    while ( (offset =
 126.345 -             get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0 )
 126.346 +    mc = xmalloc_bytes(UCODE_MAX_SIZE);
 126.347 +    if ( mc == NULL )
 126.348      {
 126.349 -        error = get_matching_microcode(mc, cpu);
 126.350 -        if ( error < 0 )
 126.351 +        printk(KERN_ERR "microcode: error! "
 126.352 +               "Can not allocate memory for microcode patch\n");
 126.353 +        error = -ENOMEM;
 126.354 +        goto out;
 126.355 +    }
 126.356 +
 126.357 +    /* implicitely validates uci->mc.mc_valid */
 126.358 +    uci->mc.mc_amd = mc;
 126.359 +
 126.360 +    /*
 126.361 +     * It's possible the data file has multiple matching ucode,
 126.362 +     * lets keep searching till the latest version
 126.363 +     */
 126.364 +    while ( (ret = get_next_ucode_from_buffer_amd(mc, buf, size, &offset)) == 0)
 126.365 +    {
 126.366 +        error = microcode_fits(mc, cpu);
 126.367 +        if (error != 0)
 126.368 +            continue;
 126.369 +
 126.370 +        error = apply_microcode(cpu);
 126.371 +        if (error == 0)
 126.372              break;
 126.373 -        /*
 126.374 -         * It's possible the data file has multiple matching ucode,
 126.375 -         * lets keep searching till the latest version
 126.376 -         */
 126.377 -        if ( error == 1 )
 126.378 -            error = apply_microcode(cpu);
 126.379 -        xfree(mc);
 126.380      }
 126.381 -    if ( offset > 0 )
 126.382 -    {
 126.383 +
 126.384 +    /* On success keep the microcode patch for
 126.385 +     * re-apply on resume.
 126.386 +     */
 126.387 +    if (error) {
 126.388          xfree(mc);
 126.389 -        xfree(equiv_cpu_table);
 126.390 -        equiv_cpu_table = NULL;
 126.391 +        mc = NULL;
 126.392      }
 126.393 -    if ( offset < 0 )
 126.394 -        error = offset;
 126.395 +    uci->mc.mc_amd = mc;
 126.396 +
 126.397 +out:
 126.398 +    xfree(equiv_cpu_table);
 126.399 +    equiv_cpu_table = NULL;
 126.400  
 126.401      return error;
 126.402  }
 126.403  
 126.404 +static int microcode_resume_match(int cpu, struct cpu_signature *nsig)
 126.405 +{
 126.406 +    return 0;
 126.407 +}
 126.408 +
 126.409  static struct microcode_ops microcode_amd_ops = {
 126.410 -    .get_matching_microcode           = get_matching_microcode,
 126.411 +    .microcode_resume_match           = microcode_resume_match,
 126.412      .cpu_request_microcode            = cpu_request_microcode,
 126.413      .collect_cpu_info                 = collect_cpu_info,
 126.414      .apply_microcode                  = apply_microcode,
   127.1 --- a/xen/arch/x86/microcode_intel.c	Wed Jan 28 12:22:58 2009 +0900
   127.2 +++ b/xen/arch/x86/microcode_intel.c	Wed Jan 28 13:06:45 2009 +0900
   127.3 @@ -64,6 +64,8 @@ static int collect_cpu_info(int cpu_num,
   127.4      struct cpuinfo_x86 *c = &cpu_data[cpu_num];
   127.5      unsigned int val[2];
   127.6  
   127.7 +    BUG_ON(cpu_num != smp_processor_id());
   127.8 +
   127.9      memset(csig, 0, sizeof(*csig));
  127.10  
  127.11      if ( (c->x86_vendor != X86_VENDOR_INTEL) || (c->x86 < 6) ||
  127.12 @@ -323,6 +325,7 @@ static int cpu_request_microcode(int cpu
  127.13      long offset = 0;
  127.14      int error = 0;
  127.15      void *mc;
  127.16 +    unsigned int matching_count = 0;
  127.17  
  127.18      /* We should bind the task to the CPU */
  127.19      BUG_ON(cpu != raw_smp_processor_id());
  127.20 @@ -341,7 +344,7 @@ static int cpu_request_microcode(int cpu
  127.21           */
  127.22          if ( error == 1 )
  127.23          {
  127.24 -            apply_microcode(cpu);
  127.25 +            matching_count++;
  127.26              error = 0;
  127.27          }
  127.28          xfree(mc);
  127.29 @@ -351,11 +354,22 @@ static int cpu_request_microcode(int cpu
  127.30      if ( offset < 0 )
  127.31          error = offset;
  127.32  
  127.33 +    if ( !error && matching_count )
  127.34 +        apply_microcode(cpu);
  127.35 +
  127.36      return error;
  127.37  }
  127.38  
  127.39 +static int microcode_resume_match(int cpu, struct cpu_signature *nsig)
  127.40 +{
  127.41 +    struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
  127.42 +
  127.43 +    return (sigmatch(nsig->sig, uci->cpu_sig.sig, nsig->pf, uci->cpu_sig.pf) &&
  127.44 +            (uci->cpu_sig.rev > nsig->rev));
  127.45 +}
  127.46 +
  127.47  static struct microcode_ops microcode_intel_ops = {
  127.48 -    .get_matching_microcode           = get_matching_microcode,
  127.49 +    .microcode_resume_match           = microcode_resume_match,
  127.50      .cpu_request_microcode            = cpu_request_microcode,
  127.51      .collect_cpu_info                 = collect_cpu_info,
  127.52      .apply_microcode                  = apply_microcode,
   128.1 --- a/xen/arch/x86/mm.c	Wed Jan 28 12:22:58 2009 +0900
   128.2 +++ b/xen/arch/x86/mm.c	Wed Jan 28 13:06:45 2009 +0900
   128.3 @@ -205,11 +205,6 @@ void __init init_frametable(void)
   128.4      }
   128.5  
   128.6      memset(frame_table, 0, nr_pages << PAGE_SHIFT);
   128.7 -
   128.8 -#if defined(__x86_64__)
   128.9 -    for ( i = 0; i < max_page; i ++ )
  128.10 -        spin_lock_init(&frame_table[i].lock);
  128.11 -#endif
  128.12  }
  128.13  
  128.14  void __init arch_init_memory(void)
  128.15 @@ -290,15 +285,16 @@ void __init arch_init_memory(void)
  128.16      subarch_init_memory();
  128.17  }
  128.18  
  128.19 -int memory_is_conventional_ram(paddr_t p)
  128.20 +int page_is_conventional_ram(unsigned long mfn)
  128.21  {
  128.22 +    uint64_t maddr = pfn_to_paddr(mfn);
  128.23      int i;
  128.24  
  128.25      for ( i = 0; i < e820.nr_map; i++ )
  128.26      {
  128.27          if ( (e820.map[i].type == E820_RAM) &&
  128.28 -             (e820.map[i].addr <= p) &&
  128.29 -             (e820.map[i].size > p) )
  128.30 +             (e820.map[i].addr <= maddr) &&
  128.31 +             ((e820.map[i].addr + e820.map[i].size) >= (maddr + PAGE_SIZE)) )
  128.32              return 1;
  128.33      }
  128.34  
  128.35 @@ -329,7 +325,7 @@ void share_xen_page_with_guest(
  128.36  
  128.37      page_set_owner(page, d);
  128.38      wmb(); /* install valid domain ptr before updating refcnt. */
  128.39 -    ASSERT(page->count_info == 0);
  128.40 +    ASSERT((page->count_info & ~PGC_xen_heap) == 0);
  128.41  
  128.42      /* Only add to the allocation list if the domain isn't dying. */
  128.43      if ( !d->is_dying )
  128.44 @@ -738,8 +734,8 @@ get_page_from_l1e(
  128.45      else if ( pte_flags_to_cacheattr(l1f) !=
  128.46                ((page->count_info >> PGC_cacheattr_base) & 7) )
  128.47      {
  128.48 -        uint32_t x, nx, y = page->count_info;
  128.49 -        uint32_t cacheattr = pte_flags_to_cacheattr(l1f);
  128.50 +        unsigned long x, nx, y = page->count_info;
  128.51 +        unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
  128.52  
  128.53          if ( is_xen_heap_page(page) )
  128.54          {
  128.55 @@ -1013,7 +1009,8 @@ static int put_page_from_l2e(l2_pgentry_
  128.56      {
  128.57          unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
  128.58          int writeable = l2e_get_flags(l2e) & _PAGE_RW;
  128.59 -        ASSERT(opt_allow_hugepage && !(mfn & (L1_PAGETABLE_ENTRIES-1)));
  128.60 +
  128.61 +        ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1)));
  128.62          do {
  128.63              put_data_page(mfn_to_page(m), writeable);
  128.64          } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
  128.65 @@ -1031,14 +1028,28 @@ static int __put_page_type(struct page_i
  128.66  static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
  128.67                               int partial, int preemptible)
  128.68  {
  128.69 -    if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && 
  128.70 -         (l3e_get_pfn(l3e) != pfn) )
  128.71 +    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
  128.72 +        return 1;
  128.73 +
  128.74 +#ifdef __x86_64__
  128.75 +    if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) )
  128.76      {
  128.77 -        if ( unlikely(partial > 0) )
  128.78 -            return __put_page_type(l3e_get_page(l3e), preemptible);
  128.79 -        return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
  128.80 +        unsigned long mfn = l3e_get_pfn(l3e);
  128.81 +        int writeable = l3e_get_flags(l3e) & _PAGE_RW;
  128.82 +
  128.83 +        ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)));
  128.84 +        do {
  128.85 +            put_data_page(mfn_to_page(mfn), writeable);
  128.86 +        } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) );
  128.87 +
  128.88 +        return 0;
  128.89      }
  128.90 -    return 1;
  128.91 +#endif
  128.92 +
  128.93 +    if ( unlikely(partial > 0) )
  128.94 +        return __put_page_type(l3e_get_page(l3e), preemptible);
  128.95 +
  128.96 +    return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
  128.97  }
  128.98  
  128.99  #if CONFIG_PAGING_LEVELS >= 4
 128.100 @@ -1523,24 +1534,31 @@ static int free_l4_table(struct page_inf
 128.101  #define free_l4_table(page, preemptible) (-EINVAL)
 128.102  #endif
 128.103  
 128.104 -static void page_lock(struct page_info *page)
 128.105 +static int page_lock(struct page_info *page)
 128.106  {
 128.107 -#if defined(__i386__)
 128.108 -    while ( unlikely(test_and_set_bit(_PGC_locked, &page->count_info)) )
 128.109 -        while ( test_bit(_PGC_locked, &page->count_info) )
 128.110 +    unsigned long x, nx;
 128.111 +
 128.112 +    do {
 128.113 +        while ( (x = page->u.inuse.type_info) & PGT_locked )
 128.114              cpu_relax();
 128.115 -#else
 128.116 -    spin_lock(&page->lock);
 128.117 -#endif
 128.118 +        nx = x + (1 | PGT_locked);
 128.119 +        if ( !(x & PGT_validated) ||
 128.120 +             !(x & PGT_count_mask) ||
 128.121 +             !(nx & PGT_count_mask) )
 128.122 +            return 0;
 128.123 +    } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
 128.124 +
 128.125 +    return 1;
 128.126  }
 128.127  
 128.128  static void page_unlock(struct page_info *page)
 128.129  {
 128.130 -#if defined(__i386__)
 128.131 -    clear_bit(_PGC_locked, &page->count_info);
 128.132 -#else
 128.133 -    spin_unlock(&page->lock);
 128.134 -#endif
 128.135 +    unsigned long x, nx, y = page->u.inuse.type_info;
 128.136 +
 128.137 +    do {
 128.138 +        x = y;
 128.139 +        nx = x - (1 | PGT_locked);
 128.140 +    } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
 128.141  }
 128.142  
 128.143  /* How to write an entry to the guest pagetables.
 128.144 @@ -1603,19 +1621,15 @@ static int mod_l1_entry(l1_pgentry_t *pl
 128.145      struct vcpu *curr = current;
 128.146      struct domain *d = curr->domain;
 128.147      unsigned long mfn;
 128.148 -    struct page_info *l1pg = mfn_to_page(gl1mfn);
 128.149      p2m_type_t p2mt;
 128.150      int rc = 1;
 128.151  
 128.152 -    page_lock(l1pg);
 128.153 -
 128.154      if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
 128.155 -        return page_unlock(l1pg), 0;
 128.156 +        return 0;
 128.157  
 128.158      if ( unlikely(paging_mode_refcounts(d)) )
 128.159      {
 128.160          rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
 128.161 -        page_unlock(l1pg);
 128.162          return rc;
 128.163      }
 128.164  
 128.165 @@ -1624,13 +1638,12 @@ static int mod_l1_entry(l1_pgentry_t *pl
 128.166          /* Translate foreign guest addresses. */
 128.167          mfn = mfn_x(gfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e), &p2mt));
 128.168          if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) )
 128.169 -            return page_unlock(l1pg), 0;
 128.170 +            return 0;
 128.171          ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
 128.172          nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
 128.173  
 128.174          if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
 128.175          {
 128.176 -            page_unlock(l1pg);
 128.177              MEM_LOG("Bad L1 flags %x",
 128.178                      l1e_get_flags(nl1e) & l1_disallow_mask(d));
 128.179              return 0;
 128.180 @@ -1642,12 +1655,11 @@ static int mod_l1_entry(l1_pgentry_t *pl
 128.181              adjust_guest_l1e(nl1e, d);
 128.182              rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
 128.183                                preserve_ad);
 128.184 -            page_unlock(l1pg);
 128.185              return rc;
 128.186          }
 128.187  
 128.188          if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
 128.189 -            return page_unlock(l1pg), 0;
 128.190 +            return 0;
 128.191          
 128.192          adjust_guest_l1e(nl1e, d);
 128.193          if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
 128.194 @@ -1660,11 +1672,9 @@ static int mod_l1_entry(l1_pgentry_t *pl
 128.195      else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
 128.196                                       preserve_ad)) )
 128.197      {
 128.198 -        page_unlock(l1pg);
 128.199          return 0;
 128.200      }
 128.201  
 128.202 -    page_unlock(l1pg);
 128.203      put_page_from_l1e(ol1e, d);
 128.204      return rc;
 128.205  }
 128.206 @@ -1674,13 +1684,13 @@ static int mod_l1_entry(l1_pgentry_t *pl
 128.207  static int mod_l2_entry(l2_pgentry_t *pl2e, 
 128.208                          l2_pgentry_t nl2e, 
 128.209                          unsigned long pfn,
 128.210 -                        unsigned long type,
 128.211                          int preserve_ad)
 128.212  {
 128.213      l2_pgentry_t ol2e;
 128.214      struct vcpu *curr = current;
 128.215      struct domain *d = curr->domain;
 128.216      struct page_info *l2pg = mfn_to_page(pfn);
 128.217 +    unsigned long type = l2pg->u.inuse.type_info;
 128.218      int rc = 1;
 128.219  
 128.220      if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
 128.221 @@ -1689,16 +1699,13 @@ static int mod_l2_entry(l2_pgentry_t *pl
 128.222          return 0;
 128.223      }
 128.224  
 128.225 -    page_lock(l2pg);
 128.226 -
 128.227      if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
 128.228 -        return page_unlock(l2pg), 0;
 128.229 +        return 0;
 128.230  
 128.231      if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
 128.232      {
 128.233          if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
 128.234          {
 128.235 -            page_unlock(l2pg);
 128.236              MEM_LOG("Bad L2 flags %x",
 128.237                      l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
 128.238              return 0;
 128.239 @@ -1709,12 +1716,11 @@ static int mod_l2_entry(l2_pgentry_t *pl
 128.240          {
 128.241              adjust_guest_l2e(nl2e, d);
 128.242              rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
 128.243 -            page_unlock(l2pg);
 128.244              return rc;
 128.245          }
 128.246  
 128.247          if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
 128.248 -            return page_unlock(l2pg), 0;
 128.249 +            return 0;
 128.250  
 128.251          adjust_guest_l2e(nl2e, d);
 128.252          if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
 128.253 @@ -1727,11 +1733,9 @@ static int mod_l2_entry(l2_pgentry_t *pl
 128.254      else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
 128.255                                       preserve_ad)) )
 128.256      {
 128.257 -        page_unlock(l2pg);
 128.258          return 0;
 128.259      }
 128.260  
 128.261 -    page_unlock(l2pg);
 128.262      put_page_from_l2e(ol2e, pfn);
 128.263      return rc;
 128.264  }
 128.265 @@ -1746,7 +1750,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
 128.266      l3_pgentry_t ol3e;
 128.267      struct vcpu *curr = current;
 128.268      struct domain *d = curr->domain;
 128.269 -    struct page_info *l3pg = mfn_to_page(pfn);
 128.270      int rc = 0;
 128.271  
 128.272      if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
 128.273 @@ -1762,16 +1765,13 @@ static int mod_l3_entry(l3_pgentry_t *pl
 128.274      if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
 128.275          return -EINVAL;
 128.276  
 128.277 -    page_lock(l3pg);
 128.278 -
 128.279      if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
 128.280 -        return page_unlock(l3pg), -EFAULT;
 128.281 +        return -EFAULT;
 128.282  
 128.283      if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
 128.284      {
 128.285          if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
 128.286          {
 128.287 -            page_unlock(l3pg);
 128.288              MEM_LOG("Bad L3 flags %x",
 128.289                      l3e_get_flags(nl3e) & l3_disallow_mask(d));
 128.290              return -EINVAL;
 128.291 @@ -1782,13 +1782,12 @@ static int mod_l3_entry(l3_pgentry_t *pl
 128.292          {
 128.293              adjust_guest_l3e(nl3e, d);
 128.294              rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
 128.295 -            page_unlock(l3pg);
 128.296              return rc ? 0 : -EFAULT;
 128.297          }
 128.298  
 128.299          rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
 128.300          if ( unlikely(rc < 0) )
 128.301 -            return page_unlock(l3pg), rc;
 128.302 +            return rc;
 128.303          rc = 0;
 128.304  
 128.305          adjust_guest_l3e(nl3e, d);
 128.306 @@ -1802,7 +1801,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
 128.307      else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
 128.308                                       preserve_ad)) )
 128.309      {
 128.310 -        page_unlock(l3pg);
 128.311          return -EFAULT;
 128.312      }
 128.313  
 128.314 @@ -1814,7 +1812,6 @@ static int mod_l3_entry(l3_pgentry_t *pl
 128.315          pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
 128.316      }
 128.317  
 128.318 -    page_unlock(l3pg);
 128.319      put_page_from_l3e(ol3e, pfn, 0, 0);
 128.320      return rc;
 128.321  }
 128.322 @@ -1831,7 +1828,6 @@ static int mod_l4_entry(l4_pgentry_t *pl
 128.323      struct vcpu *curr = current;
 128.324      struct domain *d = curr->domain;
 128.325      l4_pgentry_t ol4e;
 128.326 -    struct page_info *l4pg = mfn_to_page(pfn);
 128.327      int rc = 0;
 128.328  
 128.329      if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
 128.330 @@ -1840,16 +1836,13 @@ static int mod_l4_entry(l4_pgentry_t *pl
 128.331          return -EINVAL;
 128.332      }
 128.333  
 128.334 -    page_lock(l4pg);
 128.335 -
 128.336      if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
 128.337 -        return page_unlock(l4pg), -EFAULT;
 128.338 +        return -EFAULT;
 128.339  
 128.340      if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
 128.341      {
 128.342          if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
 128.343          {
 128.344 -            page_unlock(l4pg);
 128.345              MEM_LOG("Bad L4 flags %x",
 128.346                      l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
 128.347              return -EINVAL;
 128.348 @@ -1860,13 +1853,12 @@ static int mod_l4_entry(l4_pgentry_t *pl
 128.349          {
 128.350              adjust_guest_l4e(nl4e, d);
 128.351              rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
 128.352 -            page_unlock(l4pg);
 128.353              return rc ? 0 : -EFAULT;
 128.354          }
 128.355  
 128.356          rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
 128.357          if ( unlikely(rc < 0) )
 128.358 -            return page_unlock(l4pg), rc;
 128.359 +            return rc;
 128.360          rc = 0;
 128.361  
 128.362          adjust_guest_l4e(nl4e, d);
 128.363 @@ -1880,11 +1872,9 @@ static int mod_l4_entry(l4_pgentry_t *pl
 128.364      else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
 128.365                                       preserve_ad)) )
 128.366      {
 128.367 -        page_unlock(l4pg);
 128.368          return -EFAULT;
 128.369      }
 128.370  
 128.371 -    page_unlock(l4pg);
 128.372      put_page_from_l4e(ol4e, pfn, 0, 0);
 128.373      return rc;
 128.374  }
 128.375 @@ -1893,7 +1883,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
 128.376  
 128.377  void put_page(struct page_info *page)
 128.378  {
 128.379 -    u32 nx, x, y = page->count_info;
 128.380 +    unsigned long nx, x, y = page->count_info;
 128.381  
 128.382      do {
 128.383          x  = y;
 128.384 @@ -1911,36 +1901,30 @@ void put_page(struct page_info *page)
 128.385  
 128.386  int get_page(struct page_info *page, struct domain *domain)
 128.387  {
 128.388 -    u32 x, nx, y = page->count_info;
 128.389 -    u32 d, nd = page->u.inuse._domain;
 128.390 -    u32 _domain = pickle_domptr(domain);
 128.391 +    unsigned long x, y = page->count_info;
 128.392  
 128.393      do {
 128.394 -        x  = y;
 128.395 -        nx = x + 1;
 128.396 -        d  = nd;
 128.397 +        x = y;
 128.398          if ( unlikely((x & PGC_count_mask) == 0) ||  /* Not allocated? */
 128.399               /* Keep one spare reference to be acquired by get_page_light(). */
 128.400 -             unlikely(((nx + 1) & PGC_count_mask) <= 1) || /* Overflow? */
 128.401 -             unlikely(d != _domain) )                /* Wrong owner? */
 128.402 -        {
 128.403 -            if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
 128.404 -                gdprintk(XENLOG_INFO,
 128.405 -                         "Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
 128.406 -                         PRtype_info "\n",
 128.407 -                         page_to_mfn(page), domain, unpickle_domptr(d),
 128.408 -                         x, page->u.inuse.type_info);
 128.409 -            return 0;
 128.410 -        }
 128.411 -        asm volatile (
 128.412 -            LOCK_PREFIX "cmpxchg8b %2"
 128.413 -            : "=d" (nd), "=a" (y),
 128.414 -            "=m" (*(volatile u64 *)(&page->count_info))
 128.415 -            : "0" (d), "1" (x), "c" (d), "b" (nx) );
 128.416 +             unlikely(((x + 2) & PGC_count_mask) <= 1) ) /* Overflow? */
 128.417 +            goto fail;
 128.418      }
 128.419 -    while ( unlikely(nd != d) || unlikely(y != x) );
 128.420 -
 128.421 -    return 1;
 128.422 +    while ( (y = cmpxchg(&page->count_info, x, x + 1)) != x );
 128.423 +
 128.424 +    if ( likely(page_get_owner(page) == domain) )
 128.425 +        return 1;
 128.426 +
 128.427 +    put_page(page);
 128.428 +
 128.429 + fail:
 128.430 +    if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
 128.431 +        gdprintk(XENLOG_INFO,
 128.432 +                 "Error pfn %lx: rd=%p, od=%p, caf=%08lx, taf=%"
 128.433 +                 PRtype_info "\n",
 128.434 +                 page_to_mfn(page), domain, page_get_owner(page),
 128.435 +                 y, page->u.inuse.type_info);
 128.436 +    return 0;
 128.437  }
 128.438  
 128.439  /*
 128.440 @@ -1953,7 +1937,7 @@ int get_page(struct page_info *page, str
 128.441   */
 128.442  static void get_page_light(struct page_info *page)
 128.443  {
 128.444 -    u32 x, nx, y = page->count_info;
 128.445 +    unsigned long x, nx, y = page->count_info;
 128.446  
 128.447      do {
 128.448          x  = y;
 128.449 @@ -1994,7 +1978,7 @@ static int alloc_page_type(struct page_i
 128.450          rc = alloc_segdesc_page(page);
 128.451          break;
 128.452      default:
 128.453 -        printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n", 
 128.454 +        printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%lx\n", 
 128.455                 type, page->u.inuse.type_info,
 128.456                 page->count_info);
 128.457          rc = -EINVAL;
 128.458 @@ -2018,7 +2002,7 @@ static int alloc_page_type(struct page_i
 128.459      {
 128.460          ASSERT(rc < 0);
 128.461          MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
 128.462 -                PRtype_info ": caf=%08x taf=%" PRtype_info,
 128.463 +                PRtype_info ": caf=%08lx taf=%" PRtype_info,
 128.464                  page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
 128.465                  type, page->count_info, page->u.inuse.type_info);
 128.466          page->u.inuse.type_info = 0;
 128.467 @@ -2949,7 +2933,6 @@ int do_mmu_update(
 128.468      unsigned int cmd, done = 0;
 128.469      struct vcpu *v = current;
 128.470      struct domain *d = v->domain;
 128.471 -    unsigned long type_info;
 128.472      struct domain_mmap_cache mapcache;
 128.473  
 128.474      if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
 128.475 @@ -3021,24 +3004,9 @@ int do_mmu_update(
 128.476                            (unsigned long)(req.ptr & ~PAGE_MASK));
 128.477              page = mfn_to_page(mfn);
 128.478  
 128.479 -            switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
 128.480 -            {
 128.481 -            case PGT_l1_page_table:
 128.482 -            case PGT_l2_page_table:
 128.483 -            case PGT_l3_page_table:
 128.484 -            case PGT_l4_page_table:
 128.485 +            if ( page_lock(page) )
 128.486              {
 128.487 -                if ( paging_mode_refcounts(d) )
 128.488 -                {
 128.489 -                    MEM_LOG("mmu update on auto-refcounted domain!");
 128.490 -                    break;
 128.491 -                }
 128.492 -
 128.493 -                if ( unlikely(!get_page_type(
 128.494 -                    page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) )
 128.495 -                    goto not_a_pt;
 128.496 -
 128.497 -                switch ( type_info & PGT_type_mask )
 128.498 +                switch ( page->u.inuse.type_info & PGT_type_mask )
 128.499                  {
 128.500                  case PGT_l1_page_table:
 128.501                  {
 128.502 @@ -3050,7 +3018,7 @@ int do_mmu_update(
 128.503                  case PGT_l2_page_table:
 128.504                  {
 128.505                      l2_pgentry_t l2e = l2e_from_intpte(req.val);
 128.506 -                    okay = mod_l2_entry(va, l2e, mfn, type_info,
 128.507 +                    okay = mod_l2_entry(va, l2e, mfn,
 128.508                                          cmd == MMU_PT_UPDATE_PRESERVE_AD);
 128.509                  }
 128.510                  break;
 128.511 @@ -3072,31 +3040,23 @@ int do_mmu_update(
 128.512                  }
 128.513                  break;
 128.514  #endif
 128.515 +                case PGT_writable_page:
 128.516 +                    perfc_incr(writable_mmu_updates);
 128.517 +                    okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
 128.518 +                    break;
 128.519                  }
 128.520 -
 128.521 -                put_page_type(page);
 128.522 +                page_unlock(page);
 128.523                  if ( rc == -EINTR )
 128.524                      rc = -EAGAIN;
 128.525              }
 128.526 -            break;
 128.527 -
 128.528 -            default:
 128.529 -            not_a_pt:
 128.530 +            else if ( get_page_type(page, PGT_writable_page) )
 128.531              {
 128.532 -                if ( unlikely(!get_page_type(page, PGT_writable_page)) )
 128.533 -                    break;
 128.534 -
 128.535                  perfc_incr(writable_mmu_updates);
 128.536 -
 128.537                  okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
 128.538 -
 128.539                  put_page_type(page);
 128.540              }
 128.541 -            break;
 128.542 -            }
 128.543  
 128.544              unmap_domain_page_with_cache(va, &mapcache);
 128.545 -
 128.546              put_page(page);
 128.547              break;
 128.548  
 128.549 @@ -3175,7 +3135,6 @@ static int create_grant_pte_mapping(
 128.550      void *va;
 128.551      unsigned long gmfn, mfn;
 128.552      struct page_info *page;
 128.553 -    u32 type;
 128.554      l1_pgentry_t ol1e;
 128.555      struct domain *d = v->domain;
 128.556  
 128.557 @@ -3196,21 +3155,23 @@ static int create_grant_pte_mapping(
 128.558      va = (void *)((unsigned long)va + ((unsigned long)pte_addr & ~PAGE_MASK));
 128.559      page = mfn_to_page(mfn);
 128.560  
 128.561 -    type = page->u.inuse.type_info & PGT_type_mask;
 128.562 -    if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
 128.563 +    if ( !page_lock(page) )
 128.564      {
 128.565 -        MEM_LOG("Grant map attempted to update a non-L1 page");
 128.566          rc = GNTST_general_error;
 128.567          goto failed;
 128.568      }
 128.569  
 128.570 -    page_lock(page);
 128.571 +    if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
 128.572 +    {
 128.573 +        page_unlock(page);
 128.574 +        rc = GNTST_general_error;
 128.575 +        goto failed;
 128.576 +    }
 128.577  
 128.578      ol1e = *(l1_pgentry_t *)va;
 128.579      if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) )
 128.580      {
 128.581          page_unlock(page);
 128.582 -        put_page_type(page);
 128.583          rc = GNTST_general_error;
 128.584          goto failed;
 128.585      } 
 128.586 @@ -3220,8 +3181,6 @@ static int create_grant_pte_mapping(
 128.587      if ( !paging_mode_refcounts(d) )
 128.588          put_page_from_l1e(ol1e, d);
 128.589  
 128.590 -    put_page_type(page);
 128.591 - 
 128.592   failed:
 128.593      unmap_domain_page(va);
 128.594      put_page(page);
 128.595 @@ -3236,7 +3195,6 @@ static int destroy_grant_pte_mapping(
 128.596      void *va;
 128.597      unsigned long gmfn, mfn;
 128.598      struct page_info *page;
 128.599 -    u32 type;
 128.600      l1_pgentry_t ol1e;
 128.601  
 128.602      gmfn = addr >> PAGE_SHIFT;
 128.603 @@ -3252,15 +3210,18 @@ static int destroy_grant_pte_mapping(
 128.604      va = (void *)((unsigned long)va + ((unsigned long)addr & ~PAGE_MASK));
 128.605      page = mfn_to_page(mfn);
 128.606  
 128.607 -    type = page->u.inuse.type_info & PGT_type_mask;
 128.608 -    if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
 128.609 +    if ( !page_lock(page) )
 128.610      {
 128.611 -        MEM_LOG("Grant map attempted to update a non-L1 page");
 128.612          rc = GNTST_general_error;
 128.613          goto failed;
 128.614      }
 128.615  
 128.616 -    page_lock(page);
 128.617 +    if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
 128.618 +    {
 128.619 +        page_unlock(page);
 128.620 +        rc = GNTST_general_error;
 128.621 +        goto failed;
 128.622 +    }
 128.623  
 128.624      ol1e = *(l1_pgentry_t *)va;
 128.625      
 128.626 @@ -3270,7 +3231,6 @@ static int destroy_grant_pte_mapping(
 128.627          page_unlock(page);
 128.628          MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
 128.629                  (unsigned long)l1e_get_intpte(ol1e), addr, frame);
 128.630 -        put_page_type(page);
 128.631          rc = GNTST_general_error;
 128.632          goto failed;
 128.633      }
 128.634 @@ -3284,13 +3244,11 @@ static int destroy_grant_pte_mapping(
 128.635      {
 128.636          page_unlock(page);
 128.637          MEM_LOG("Cannot delete PTE entry at %p", va);
 128.638 -        put_page_type(page);
 128.639          rc = GNTST_general_error;
 128.640          goto failed;
 128.641      }
 128.642  
 128.643      page_unlock(page);
 128.644 -    put_page_type(page);
 128.645  
 128.646   failed:
 128.647      unmap_domain_page(va);
 128.648 @@ -3318,21 +3276,40 @@ static int create_grant_va_mapping(
 128.649          MEM_LOG("Could not find L1 PTE for address %lx", va);
 128.650          return GNTST_general_error;
 128.651      }
 128.652 +
 128.653 +    if ( !get_page_from_pagenr(gl1mfn, current->domain) )
 128.654 +    {
 128.655 +        guest_unmap_l1e(v, pl1e);
 128.656 +        return GNTST_general_error;
 128.657 +    }
 128.658 +
 128.659      l1pg = mfn_to_page(gl1mfn);
 128.660 -    page_lock(l1pg);
 128.661 +    if ( !page_lock(l1pg) )
 128.662 +    {
 128.663 +        put_page(l1pg);
 128.664 +        guest_unmap_l1e(v, pl1e);
 128.665 +        return GNTST_general_error;
 128.666 +    }
 128.667 +
 128.668 +    if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
 128.669 +    {
 128.670 +        page_unlock(l1pg);
 128.671 +        put_page(l1pg);
 128.672 +        guest_unmap_l1e(v, pl1e);
 128.673 +        return GNTST_general_error;
 128.674 +    }
 128.675 +
 128.676      ol1e = *pl1e;
 128.677      okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0);
 128.678 +
 128.679      page_unlock(l1pg);
 128.680 +    put_page(l1pg);
 128.681      guest_unmap_l1e(v, pl1e);
 128.682 -    pl1e = NULL;
 128.683 -
 128.684 -    if ( !okay )
 128.685 -            return GNTST_general_error;
 128.686 -
 128.687 -    if ( !paging_mode_refcounts(d) )
 128.688 +
 128.689 +    if ( okay && !paging_mode_refcounts(d) )
 128.690          put_page_from_l1e(ol1e, d);
 128.691  
 128.692 -    return GNTST_okay;
 128.693 +    return okay ? GNTST_okay : GNTST_general_error;
 128.694  }
 128.695  
 128.696  static int replace_grant_va_mapping(
 128.697 @@ -3350,31 +3327,48 @@ static int replace_grant_va_mapping(
 128.698          return GNTST_general_error;
 128.699      }
 128.700  
 128.701 +    if ( !get_page_from_pagenr(gl1mfn, current->domain) )
 128.702 +    {
 128.703 +        rc = GNTST_general_error;
 128.704 +        goto out;
 128.705 +    }
 128.706 +
 128.707      l1pg = mfn_to_page(gl1mfn);
 128.708 -    page_lock(l1pg);
 128.709 +    if ( !page_lock(l1pg) )
 128.710 +    {
 128.711 +        rc = GNTST_general_error;
 128.712 +        put_page(l1pg);
 128.713 +        goto out;
 128.714 +    }
 128.715 +
 128.716 +    if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
 128.717 +    {
 128.718 +        rc = GNTST_general_error;
 128.719 +        goto unlock_and_out;
 128.720 +    }
 128.721 +
 128.722      ol1e = *pl1e;
 128.723  
 128.724      /* Check that the virtual address supplied is actually mapped to frame. */
 128.725      if ( unlikely(l1e_get_pfn(ol1e) != frame) )
 128.726      {
 128.727 -        page_unlock(l1pg);
 128.728          MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
 128.729                  l1e_get_pfn(ol1e), addr, frame);
 128.730          rc = GNTST_general_error;
 128.731 -        goto out;
 128.732 +        goto unlock_and_out;
 128.733      }
 128.734  
 128.735      /* Delete pagetable entry. */
 128.736      if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
 128.737      {
 128.738 -        page_unlock(l1pg);
 128.739          MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
 128.740          rc = GNTST_general_error;
 128.741 -        goto out;
 128.742 +        goto unlock_and_out;
 128.743      }
 128.744  
 128.745 + unlock_and_out:
 128.746      page_unlock(l1pg);
 128.747 -
 128.748 +    put_page(l1pg);
 128.749   out:
 128.750      guest_unmap_l1e(v, pl1e);
 128.751      return rc;
 128.752 @@ -3436,20 +3430,42 @@ int replace_grant_host_mapping(
 128.753          return GNTST_general_error;
 128.754      }
 128.755  
 128.756 +    if ( !get_page_from_pagenr(gl1mfn, current->domain) )
 128.757 +    {
 128.758 +        guest_unmap_l1e(curr, pl1e);
 128.759 +        return GNTST_general_error;
 128.760 +    }
 128.761 +
 128.762      l1pg = mfn_to_page(gl1mfn);
 128.763 -    page_lock(l1pg);
 128.764 +    if ( !page_lock(l1pg) )
 128.765 +    {
 128.766 +        put_page(l1pg);
 128.767 +        guest_unmap_l1e(curr, pl1e);
 128.768 +        return GNTST_general_error;
 128.769 +    }
 128.770 +
 128.771 +    if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
 128.772 +    {
 128.773 +        page_unlock(l1pg);
 128.774 +        put_page(l1pg);
 128.775 +        guest_unmap_l1e(curr, pl1e);
 128.776 +        return GNTST_general_error;
 128.777 +    }
 128.778 +
 128.779      ol1e = *pl1e;
 128.780  
 128.781      if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(),
 128.782                                  gl1mfn, curr, 0)) )
 128.783      {
 128.784          page_unlock(l1pg);
 128.785 +        put_page(l1pg);
 128.786          MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
 128.787          guest_unmap_l1e(curr, pl1e);
 128.788          return GNTST_general_error;
 128.789      }
 128.790  
 128.791      page_unlock(l1pg);
 128.792 +    put_page(l1pg);
 128.793      guest_unmap_l1e(curr, pl1e);
 128.794  
 128.795      rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
 128.796 @@ -3462,49 +3478,47 @@ int replace_grant_host_mapping(
 128.797  int steal_page(
 128.798      struct domain *d, struct page_info *page, unsigned int memflags)
 128.799  {
 128.800 -    u32 _d, _nd, x, y;
 128.801 +    unsigned long x, y;
 128.802  
 128.803      spin_lock(&d->page_alloc_lock);
 128.804  
 128.805 +    if ( is_xen_heap_page(page) || (page_get_owner(page) != d) )
 128.806 +        goto fail;
 128.807 +
 128.808      /*
 128.809 -     * The tricky bit: atomically release ownership while there is just one 
 128.810 -     * benign reference to the page (PGC_allocated). If that reference 
 128.811 -     * disappears then the deallocation routine will safely spin.
 128.812 +     * We require there is just one reference (PGC_allocated). We temporarily
 128.813 +     * drop this reference now so that we can safely swizzle the owner.
 128.814       */
 128.815 -    _d  = pickle_domptr(d);
 128.816 -    _nd = page->u.inuse._domain;
 128.817 -    y   = page->count_info;
 128.818 +    y = page->count_info;
 128.819      do {
 128.820          x = y;
 128.821 -        if ( unlikely((x & (PGC_count_mask|PGC_allocated)) !=
 128.822 -                      (1 | PGC_allocated)) || unlikely(_nd != _d) )
 128.823 -        { 
 128.824 -            MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p,"
 128.825 -                    " caf=%08x, taf=%" PRtype_info "\n", 
 128.826 -                    (void *) page_to_mfn(page),
 128.827 -                    d, d->domain_id, unpickle_domptr(_nd), x, 
 128.828 -                    page->u.inuse.type_info);
 128.829 -            spin_unlock(&d->page_alloc_lock);
 128.830 -            return -1;
 128.831 -        }
 128.832 -        asm volatile (
 128.833 -            LOCK_PREFIX "cmpxchg8b %2"
 128.834 -            : "=d" (_nd), "=a" (y),
 128.835 -            "=m" (*(volatile u64 *)(&page->count_info))
 128.836 -            : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
 128.837 -    } while (unlikely(_nd != _d) || unlikely(y != x));
 128.838 -
 128.839 -    /*
 128.840 -     * Unlink from 'd'. At least one reference remains (now anonymous), so 
 128.841 -     * noone else is spinning to try to delete this page from 'd'.
 128.842 -     */
 128.843 +        if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) )
 128.844 +            goto fail;
 128.845 +        y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask);
 128.846 +    } while ( y != x );
 128.847 +
 128.848 +    /* Swizzle the owner then reinstate the PGC_allocated reference. */
 128.849 +    page_set_owner(page, NULL);
 128.850 +    y = page->count_info;
 128.851 +    do {
 128.852 +        x = y;
 128.853 +        BUG_ON((x & (PGC_count_mask|PGC_allocated)) != PGC_allocated);
 128.854 +    } while ( (y = cmpxchg(&page->count_info, x, x | 1)) != x );
 128.855 +
 128.856 +    /* Unlink from original owner. */
 128.857      if ( !(memflags & MEMF_no_refcount) )
 128.858          d->tot_pages--;
 128.859      list_del(&page->list);
 128.860  
 128.861      spin_unlock(&d->page_alloc_lock);
 128.862 -
 128.863      return 0;
 128.864 +
 128.865 + fail:
 128.866 +    spin_unlock(&d->page_alloc_lock);
 128.867 +    MEM_LOG("Bad page %p: ed=%p(%u), sd=%p, caf=%08lx, taf=%" PRtype_info,
 128.868 +            (void *)page_to_mfn(page), d, d->domain_id,
 128.869 +            page_get_owner(page), page->count_info, page->u.inuse.type_info);
 128.870 +    return -1;
 128.871  }
 128.872  
 128.873  int do_update_va_mapping(unsigned long va, u64 val64,
 128.874 @@ -3513,28 +3527,45 @@ int do_update_va_mapping(unsigned long v
 128.875      l1_pgentry_t   val = l1e_from_intpte(val64);
 128.876      struct vcpu   *v   = current;
 128.877      struct domain *d   = v->domain;
 128.878 +    struct page_info *gl1pg;
 128.879      l1_pgentry_t  *pl1e;
 128.880      unsigned long  vmask, bmap_ptr, gl1mfn;
 128.881      cpumask_t      pmask;
 128.882 -    int            rc  = 0;
 128.883 +    int            rc;
 128.884  
 128.885      perfc_incr(calls_to_update_va);
 128.886  
 128.887 -    if ( unlikely(!access_ok(va, 1) && !paging_mode_external(d)) )
 128.888 -        return -EINVAL;
 128.889 -
 128.890      rc = xsm_update_va_mapping(d, FOREIGNDOM, val);
 128.891      if ( rc )
 128.892          return rc;
 128.893  
 128.894 +    rc = -EINVAL;
 128.895      pl1e = guest_map_l1e(v, va, &gl1mfn);
 128.896 -
 128.897 -    if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) )
 128.898 -        rc = -EINVAL;
 128.899 -
 128.900 +    if ( unlikely(!pl1e || !get_page_from_pagenr(gl1mfn, d)) )
 128.901 +        goto out;
 128.902 +
 128.903 +    gl1pg = mfn_to_page(gl1mfn);
 128.904 +    if ( !page_lock(gl1pg) )
 128.905 +    {
 128.906 +        put_page(gl1pg);
 128.907 +        goto out;
 128.908 +    }
 128.909 +
 128.910 +    if ( (gl1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
 128.911 +    {
 128.912 +        page_unlock(gl1pg);
 128.913 +        put_page(gl1pg);
 128.914 +        goto out;
 128.915 +    }
 128.916 +
 128.917 +    rc = mod_l1_entry(pl1e, val, gl1mfn, 0) ? 0 : -EINVAL;
 128.918 +
 128.919 +    page_unlock(gl1pg);
 128.920 +    put_page(gl1pg);
 128.921 +
 128.922 + out:
 128.923      if ( pl1e )
 128.924          guest_unmap_l1e(v, pl1e);
 128.925 -    pl1e = NULL;
 128.926  
 128.927      process_deferred_ops();
 128.928  
 128.929 @@ -3793,14 +3824,13 @@ long arch_memory_op(int op, XEN_GUEST_HA
 128.930  
 128.931              spin_unlock(&d->grant_table->lock);
 128.932              break;
 128.933 -        case XENMAPSPACE_mfn:
 128.934 -        {
 128.935 -            if ( get_page_from_pagenr(xatp.idx, d) ) {
 128.936 -                mfn = xatp.idx;
 128.937 -                page = mfn_to_page(mfn);
 128.938 -            }
 128.939 +        case XENMAPSPACE_gmfn:
 128.940 +            xatp.idx = gmfn_to_mfn(d, xatp.idx);
 128.941 +            if ( !get_page_from_pagenr(xatp.idx, d) )
 128.942 +                break;
 128.943 +            mfn = xatp.idx;
 128.944 +            page = mfn_to_page(mfn);
 128.945              break;
 128.946 -        }
 128.947          default:
 128.948              break;
 128.949          }
 128.950 @@ -3845,39 +3875,6 @@ long arch_memory_op(int op, XEN_GUEST_HA
 128.951          break;
 128.952      }
 128.953  
 128.954 -    case XENMEM_remove_from_physmap:
 128.955 -    {
 128.956 -        struct xen_remove_from_physmap xrfp;
 128.957 -        unsigned long mfn;
 128.958 -        struct domain *d;
 128.959 -
 128.960 -        if ( copy_from_guest(&xrfp, arg, 1) )
 128.961 -            return -EFAULT;
 128.962 -
 128.963 -        rc = rcu_lock_target_domain_by_id(xrfp.domid, &d);
 128.964 -        if ( rc != 0 )
 128.965 -            return rc;
 128.966 -
 128.967 -        if ( xsm_remove_from_physmap(current->domain, d) )
 128.968 -        {
 128.969 -            rcu_unlock_domain(d);
 128.970 -            return -EPERM;
 128.971 -        }
 128.972 -
 128.973 -        domain_lock(d);
 128.974 -
 128.975 -        mfn = gmfn_to_mfn(d, xrfp.gpfn);
 128.976 -
 128.977 -        if ( mfn_valid(mfn) )
 128.978 -            guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0);
 128.979 -
 128.980 -        domain_unlock(d);
 128.981 -
 128.982 -        rcu_unlock_domain(d);
 128.983 -
 128.984 -        break;
 128.985 -    }
 128.986 -
 128.987      case XENMEM_set_memory_map:
 128.988      {
 128.989          struct xen_foreign_memory_map fmap;
 128.990 @@ -4245,15 +4242,25 @@ int ptwr_do_page_fault(struct vcpu *v, u
 128.991  
 128.992      /* Attempt to read the PTE that maps the VA being accessed. */
 128.993      guest_get_eff_l1e(v, addr, &pte);
 128.994 -    page = l1e_get_page(pte);
 128.995  
 128.996      /* We are looking only for read-only mappings of p.t. pages. */
 128.997      if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
 128.998 -         !mfn_valid(l1e_get_pfn(pte)) ||
 128.999 -         ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
128.1000 -         ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
128.1001 -         (page_get_owner(page) != d) )
128.1002 +         !get_page_from_pagenr(l1e_get_pfn(pte), d) )
128.1003 +        goto bail;
128.1004 +
128.1005 +    page = l1e_get_page(pte);
128.1006 +    if ( !page_lock(page) )
128.1007 +    {
128.1008 +        put_page(page);
128.1009          goto bail;
128.1010 +    }
128.1011 +
128.1012 +    if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
128.1013 +    {
128.1014 +        page_unlock(page);
128.1015 +        put_page(page);
128.1016 +        goto bail;
128.1017 +    }
128.1018  
128.1019      ptwr_ctxt.ctxt.regs = regs;
128.1020      ptwr_ctxt.ctxt.force_writeback = 0;
128.1021 @@ -4262,9 +4269,11 @@ int ptwr_do_page_fault(struct vcpu *v, u
128.1022      ptwr_ctxt.cr2 = addr;
128.1023      ptwr_ctxt.pte = pte;
128.1024  
128.1025 -    page_lock(page);
128.1026      rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
128.1027 +
128.1028      page_unlock(page);
128.1029 +    put_page(page);
128.1030 +
128.1031      if ( rc == X86EMUL_UNHANDLEABLE )
128.1032          goto bail;
128.1033  
128.1034 @@ -4741,12 +4750,18 @@ void __set_fixmap(
128.1035  void memguard_init(void)
128.1036  {
128.1037      unsigned long start = max_t(unsigned long, xen_phys_start, 1UL << 20);
128.1038 +#ifdef __i386__
128.1039      map_pages_to_xen(
128.1040          (unsigned long)__va(start),
128.1041          start >> PAGE_SHIFT,
128.1042          (xenheap_phys_end - start) >> PAGE_SHIFT,
128.1043          __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
128.1044 -#ifdef __x86_64__
128.1045 +#else
128.1046 +    map_pages_to_xen(
128.1047 +        (unsigned long)__va(start),
128.1048 +        start >> PAGE_SHIFT,
128.1049 +        (__pa(&_end) + PAGE_SIZE - 1 - start) >> PAGE_SHIFT,
128.1050 +        __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
128.1051      BUG_ON(start != xen_phys_start);
128.1052      map_pages_to_xen(
128.1053          XEN_VIRT_START,
   129.1 --- a/xen/arch/x86/mm/Makefile	Wed Jan 28 12:22:58 2009 +0900
   129.2 +++ b/xen/arch/x86/mm/Makefile	Wed Jan 28 13:06:45 2009 +0900
   129.3 @@ -7,5 +7,5 @@ obj-y += guest_walk_2.o
   129.4  obj-y += guest_walk_3.o
   129.5  obj-$(x86_64) += guest_walk_4.o
   129.6  
   129.7 -guest_walk_%.o: guest_walk.c $(HDRS) Makefile
   129.8 +guest_walk_%.o: guest_walk.c Makefile
   129.9  	$(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
   130.1 --- a/xen/arch/x86/mm/hap/Makefile	Wed Jan 28 12:22:58 2009 +0900
   130.2 +++ b/xen/arch/x86/mm/hap/Makefile	Wed Jan 28 13:06:45 2009 +0900
   130.3 @@ -7,5 +7,5 @@ obj-y += p2m-ept.o
   130.4  guest_levels  = $(subst level,,$(filter %level,$(subst ., ,$(subst _, ,$(1)))))
   130.5  guest_walk_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1))
   130.6  
   130.7 -guest_walk_%level.o: guest_walk.c $(HDRS) Makefile
   130.8 +guest_walk_%level.o: guest_walk.c Makefile
   130.9  	$(CC) $(CFLAGS) $(call guest_walk_defns,$(@F)) -c $< -o $@
   131.1 --- a/xen/arch/x86/mm/hap/hap.c	Wed Jan 28 12:22:58 2009 +0900
   131.2 +++ b/xen/arch/x86/mm/hap/hap.c	Wed Jan 28 13:06:45 2009 +0900
   131.3 @@ -166,7 +166,7 @@ void hap_free_p2m_page(struct domain *d,
   131.4      ASSERT(page_get_owner(pg) == d);
   131.5      /* Should have just the one ref we gave it in alloc_p2m_page() */
   131.6      if ( (pg->count_info & PGC_count_mask) != 1 )
   131.7 -        HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
   131.8 +        HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
   131.9                    pg->count_info, pg->u.inuse.type_info);
  131.10      pg->count_info = 0;
  131.11      /* Free should not decrement domain's total allocation, since
   132.1 --- a/xen/arch/x86/mm/hap/p2m-ept.c	Wed Jan 28 12:22:58 2009 +0900
   132.2 +++ b/xen/arch/x86/mm/hap/p2m-ept.c	Wed Jan 28 13:06:45 2009 +0900
   132.3 @@ -66,6 +66,7 @@ static int ept_set_middle_entry(struct d
   132.4      list_add_tail(&pg->list, &d->arch.p2m->pages);
   132.5  
   132.6      ept_entry->emt = 0;
   132.7 +    ept_entry->igmt = 0;
   132.8      ept_entry->sp_avail = 0;
   132.9      ept_entry->avail1 = 0;
  132.10      ept_entry->mfn = page_to_mfn(pg);
  132.11 @@ -114,9 +115,13 @@ static int ept_next_level(struct domain 
  132.12      }
  132.13  }
  132.14  
  132.15 +/*
  132.16 + * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself,
  132.17 + * by observing whether any gfn->mfn translations are modified.
  132.18 + */
  132.19  static int
  132.20 -ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
  132.21 -              unsigned int order, p2m_type_t p2mt)
  132.22 +_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
  132.23 +              unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table)
  132.24  {
  132.25      ept_entry_t *table = NULL;
  132.26      unsigned long gfn_remainder = gfn, offset = 0;
  132.27 @@ -124,6 +129,8 @@ ept_set_entry(struct domain *d, unsigned
  132.28      u32 index;
  132.29      int i, rv = 0, ret = 0;
  132.30      int walk_level = order / EPT_TABLE_ORDER;
  132.31 +    int direct_mmio = (p2mt == p2m_mmio_direct);
  132.32 +    uint8_t igmt = 0;
  132.33  
  132.34      /* we only support 4k and 2m pages now */
  132.35  
  132.36 @@ -157,7 +164,9 @@ ept_set_entry(struct domain *d, unsigned
  132.37      {
  132.38          if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
  132.39          {
  132.40 -            ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
  132.41 +            ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn),
  132.42 +                                &igmt, direct_mmio);
  132.43 +            ept_entry->igmt = igmt;
  132.44              ept_entry->sp_avail = walk_level ? 1 : 0;
  132.45  
  132.46              if ( ret == GUEST_TABLE_SUPER_PAGE )
  132.47 @@ -208,7 +217,10 @@ ept_set_entry(struct domain *d, unsigned
  132.48          {
  132.49              split_ept_entry = split_table + i;
  132.50              split_ept_entry->emt = epte_get_entry_emt(d,
  132.51 -                                        gfn-offset+i, split_mfn+i);
  132.52 +                                        gfn-offset+i, split_mfn+i, 
  132.53 +                                        &igmt, direct_mmio);
  132.54 +            split_ept_entry->igmt = igmt;
  132.55 +
  132.56              split_ept_entry->sp_avail =  0;
  132.57  
  132.58              split_ept_entry->mfn = split_mfn+i;
  132.59 @@ -223,7 +235,10 @@ ept_set_entry(struct domain *d, unsigned
  132.60  
  132.61          /* Set the destinated 4k page as normal */
  132.62          split_ept_entry = split_table + offset;
  132.63 -        split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
  132.64 +        split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn), 
  132.65 +                                                &igmt, direct_mmio);
  132.66 +        split_ept_entry->igmt = igmt;
  132.67 +
  132.68          split_ept_entry->mfn = mfn_x(mfn);
  132.69          split_ept_entry->avail1 = p2mt;
  132.70          ept_p2m_type_to_flags(split_ept_entry, p2mt);
  132.71 @@ -246,7 +261,8 @@ out:
  132.72  
  132.73      /* Now the p2m table is not shared with vt-d page table */
  132.74  
  132.75 -    if ( iommu_enabled && is_hvm_domain(d) )
  132.76 +    if ( iommu_enabled && is_hvm_domain(d)  
  132.77 +             && need_modify_vtd_table )
  132.78      {
  132.79          if ( p2mt == p2m_ram_rw )
  132.80          {
  132.81 @@ -273,6 +289,17 @@ out:
  132.82      return rv;
  132.83  }
  132.84  
  132.85 +static int
  132.86 +ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
  132.87 +              unsigned int order, p2m_type_t p2mt)
  132.88 +{
  132.89 +    /* ept_set_entry() are called from set_entry(),
  132.90 +     * We should always create VT-d page table acording 
  132.91 +     * to the gfn to mfn translations changes.
  132.92 +     */
  132.93 +    return _ept_set_entry(d, gfn, mfn, order, p2mt, 1); 
  132.94 +}
  132.95 +
  132.96  /* Read ept p2m entries */
  132.97  static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t,
  132.98      p2m_query_t q)
  132.99 @@ -395,18 +422,30 @@ void ept_change_entry_emt_with_range(str
 132.100                   * Set emt for super page.
 132.101                   */
 132.102                  order = EPT_TABLE_ORDER;
 132.103 -                ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
 132.104 +                /* vmx_set_uc_mode() dont' touch the gfn to mfn
 132.105 +                 * translations, only modify the emt field of the EPT entries.
 132.106 +                 * so we need not modify the current VT-d page tables.
 132.107 +                 */
 132.108 +                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
 132.109                  gfn += 0x1FF;
 132.110              }
 132.111              else
 132.112              {
 132.113 -                /* change emt for partial entries of the 2m area */
 132.114 -                ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
 132.115 +                /* 1)change emt for partial entries of the 2m area.
 132.116 +                 * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
 132.117 +                 * translations, only modify the emt field of the EPT entries.
 132.118 +                 * so we need not modify the current VT-d page tables.
 132.119 +                 */
 132.120 +                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0);
 132.121                  gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF;
 132.122              }
 132.123          }
 132.124 -        else /* gfn assigned with 4k */
 132.125 -            ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
 132.126 +        else /* 1)gfn assigned with 4k
 132.127 +              * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
 132.128 +              * translations, only modify the emt field of the EPT entries.
 132.129 +              * so we need not modify the current VT-d page tables.
 132.130 +             */
 132.131 +            _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
 132.132      }
 132.133  }
 132.134  
   133.1 --- a/xen/arch/x86/mm/p2m.c	Wed Jan 28 12:22:58 2009 +0900
   133.2 +++ b/xen/arch/x86/mm/p2m.c	Wed Jan 28 13:06:45 2009 +0900
   133.3 @@ -1890,6 +1890,30 @@ guest_physmap_remove_page(struct domain 
   133.4      p2m_unlock(d->arch.p2m);
   133.5  }
   133.6  
   133.7 +#if CONFIG_PAGING_LEVELS == 3
   133.8 +static int gfn_check_limit(
   133.9 +    struct domain *d, unsigned long gfn, unsigned int order)
  133.10 +{
  133.11 +    /*
  133.12 +     * 32bit AMD nested paging does not support over 4GB guest due to 
  133.13 +     * hardware translation limit. This limitation is checked by comparing
  133.14 +     * gfn with 0xfffffUL.
  133.15 +     */
  133.16 +    if ( !paging_mode_hap(d) || ((gfn + (1ul << order)) <= 0x100000UL) ||
  133.17 +         (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) )
  133.18 +        return 0;
  133.19 +
  133.20 +    if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
  133.21 +        dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
  133.22 +                " 4GB: specify 'hap=0' domain config option.\n",
  133.23 +                d->domain_id);
  133.24 +
  133.25 +    return -EINVAL;
  133.26 +}
  133.27 +#else
  133.28 +#define gfn_check_limit(d, g, o) 0
  133.29 +#endif
  133.30 +
  133.31  int
  133.32  guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
  133.33                                        unsigned int order)
  133.34 @@ -1903,21 +1927,9 @@ guest_physmap_mark_populate_on_demand(st
  133.35  
  133.36      BUG_ON(!paging_mode_translate(d));
  133.37  
  133.38 -#if CONFIG_PAGING_LEVELS == 3
  133.39 -    /*
  133.40 -     * 32bit PAE nested paging does not support over 4GB guest due to 
  133.41 -     * hardware translation limit. This limitation is checked by comparing
  133.42 -     * gfn with 0xfffffUL.
  133.43 -     */
  133.44 -    if ( paging_mode_hap(d) && (gfn > 0xfffffUL) )
  133.45 -    {
  133.46 -        if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
  133.47 -            dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
  133.48 -                    " 4GB: specify 'hap=0' domain config option.\n",
  133.49 -                    d->domain_id);
  133.50 -        return -EINVAL;
  133.51 -    }
  133.52 -#endif
  133.53 +    rc = gfn_check_limit(d, gfn, order);
  133.54 +    if ( rc != 0 )
  133.55 +        return rc;
  133.56  
  133.57      p2m_lock(p2md);
  133.58      audit_p2m(d);
  133.59 @@ -1987,22 +1999,9 @@ guest_physmap_add_entry(struct domain *d
  133.60          return 0;
  133.61      }
  133.62  
  133.63 -#if CONFIG_PAGING_LEVELS == 3
  133.64 -    /*
  133.65 -     * 32bit AMD nested paging does not support over 4GB guest due to 
  133.66 -     * hardware translation limit. This limitation is checked by comparing
  133.67 -     * gfn with 0xfffffUL.
  133.68 -     */
  133.69 -    if ( paging_mode_hap(d) && (gfn > 0xfffffUL) &&
  133.70 -         (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) )
  133.71 -    {
  133.72 -        if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
  133.73 -            dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
  133.74 -                    " 4GB: specify 'hap=0' domain config option.\n",
  133.75 -                    d->domain_id);
  133.76 -        return -EINVAL;
  133.77 -    }
  133.78 -#endif
  133.79 +    rc = gfn_check_limit(d, gfn, page_order);
  133.80 +    if ( rc != 0 )
  133.81 +        return rc;
  133.82  
  133.83      p2m_lock(d->arch.p2m);
  133.84      audit_p2m(d);
   134.1 --- a/xen/arch/x86/mm/shadow/Makefile	Wed Jan 28 12:22:58 2009 +0900
   134.2 +++ b/xen/arch/x86/mm/shadow/Makefile	Wed Jan 28 13:06:45 2009 +0900
   134.3 @@ -1,5 +1,5 @@
   134.4  obj-$(x86_32) += common.o guest_2.o guest_3.o
   134.5  obj-$(x86_64) += common.o guest_2.o guest_3.o guest_4.o
   134.6  
   134.7 -guest_%.o: multi.c $(HDRS) Makefile
   134.8 +guest_%.o: multi.c Makefile
   134.9  	$(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
   135.1 --- a/xen/arch/x86/mm/shadow/common.c	Wed Jan 28 12:22:58 2009 +0900
   135.2 +++ b/xen/arch/x86/mm/shadow/common.c	Wed Jan 28 13:06:45 2009 +0900
   135.3 @@ -1715,16 +1715,13 @@ shadow_free_p2m_page(struct domain *d, s
   135.4      /* Should have just the one ref we gave it in alloc_p2m_page() */
   135.5      if ( (pg->count_info & PGC_count_mask) != 1 )
   135.6      {
   135.7 -        SHADOW_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
   135.8 +        SHADOW_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
   135.9                       pg->count_info, pg->u.inuse.type_info);
  135.10      }
  135.11      pg->count_info = 0;
  135.12      /* Free should not decrement domain's total allocation, since 
  135.13       * these pages were allocated without an owner. */
  135.14      page_set_owner(pg, NULL); 
  135.15 -#if defined(__x86_64__)
  135.16 -    spin_lock_init(&pg->lock);
  135.17 -#endif
  135.18      free_domheap_pages(pg, 0);
  135.19      d->arch.paging.shadow.p2m_pages--;
  135.20      perfc_decr(shadow_alloc_count);
  135.21 @@ -1833,14 +1830,6 @@ static unsigned int sh_set_allocation(st
  135.22              sp = list_entry(d->arch.paging.shadow.freelists[order].next,
  135.23                              struct shadow_page_info, list);
  135.24              list_del(&sp->list);
  135.25 -#if defined(__x86_64__)
  135.26 -            /*
  135.27 -             * Re-instate lock field which we overwrite with shadow_page_info.
  135.28 -             * This was safe, since the lock is only used on guest pages.
  135.29 -             */
  135.30 -            for ( j = 0; j < 1U << order; j++ )
  135.31 -                spin_lock_init(&((struct page_info *)sp)[j].lock);
  135.32 -#endif
  135.33              d->arch.paging.shadow.free_pages -= 1 << order;
  135.34              d->arch.paging.shadow.total_pages -= 1 << order;
  135.35              free_domheap_pages((struct page_info *)sp, order);
  135.36 @@ -2593,7 +2582,7 @@ int sh_remove_all_mappings(struct vcpu *
  135.37                 && (page->u.inuse.type_info & PGT_count_mask) == 0) )
  135.38          {
  135.39              SHADOW_ERROR("can't find all mappings of mfn %lx: "
  135.40 -                          "c=%08x t=%08lx\n", mfn_x(gmfn), 
  135.41 +                          "c=%08lx t=%08lx\n", mfn_x(gmfn), 
  135.42                            page->count_info, page->u.inuse.type_info);
  135.43          }
  135.44      }
   136.1 --- a/xen/arch/x86/mm/shadow/multi.c	Wed Jan 28 12:22:58 2009 +0900
   136.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Wed Jan 28 13:06:45 2009 +0900
   136.3 @@ -3230,7 +3230,14 @@ static int sh_page_fault(struct vcpu *v,
   136.4  
   136.5      /* Ignore attempts to write to read-only memory. */
   136.6      if ( (p2mt == p2m_ram_ro) && (ft == ft_demand_write) )
   136.7 +    {
   136.8 +        static unsigned long lastpage;
   136.9 +        if ( xchg(&lastpage, va & PAGE_MASK) != (va & PAGE_MASK) )
  136.10 +            gdprintk(XENLOG_DEBUG, "guest attempted write to read-only memory"
  136.11 +                     " page. va page=%#lx, mfn=%#lx\n",
  136.12 +                     va & PAGE_MASK, mfn_x(gmfn));
  136.13          goto emulate_readonly; /* skip over the instruction */
  136.14 +    }
  136.15  
  136.16      /* In HVM guests, we force CR0.WP always to be set, so that the
  136.17       * pagetables are always write-protected.  If the guest thinks
   137.1 --- a/xen/arch/x86/mm/shadow/private.h	Wed Jan 28 12:22:58 2009 +0900
   137.2 +++ b/xen/arch/x86/mm/shadow/private.h	Wed Jan 28 13:06:45 2009 +0900
   137.3 @@ -244,11 +244,10 @@ struct shadow_page_info
   137.4                  u32 tlbflush_timestamp;
   137.5              };
   137.6              struct {
   137.7 -                unsigned int type:5;   /* What kind of shadow is this? */
   137.8 -                unsigned int pinned:1; /* Is the shadow pinned? */
   137.9 -                unsigned int count:26; /* Reference count */
  137.10 -                u32 mbz;               /* Must be zero: this is where the
  137.11 -                                        * owner field lives in page_info */
  137.12 +                unsigned long mbz;     /* Must be zero: count_info is here. */
  137.13 +                unsigned long type:5;   /* What kind of shadow is this? */
  137.14 +                unsigned long pinned:1; /* Is the shadow pinned? */
  137.15 +                unsigned long count:26; /* Reference count */
  137.16              } __attribute__((packed));
  137.17              union {
  137.18                  /* For unused shadow pages, a list of pages of this order; for 
  137.19 @@ -266,13 +265,13 @@ struct shadow_page_info
  137.20  
  137.21  /* The structure above *must* be no larger than a struct page_info
  137.22   * from mm.h, since we'll be using the same space in the frametable. 
  137.23 - * Also, the mbz field must line up with the owner field of normal 
  137.24 - * pages, so they look properly like anonymous/xen pages. */
  137.25 + * Also, the mbz field must line up with the count_info field of normal 
  137.26 + * pages, so they cannot be successfully get_page()d. */
  137.27  static inline void shadow_check_page_struct_offsets(void) {
  137.28      BUILD_BUG_ON(sizeof (struct shadow_page_info) !=
  137.29                   sizeof (struct page_info));
  137.30      BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) !=
  137.31 -                 offsetof(struct page_info, u.inuse._domain));
  137.32 +                 offsetof(struct page_info, count_info));
  137.33  };
  137.34  
  137.35  /* Shadow type codes */
   138.1 --- a/xen/arch/x86/msi.c	Wed Jan 28 12:22:58 2009 +0900
   138.2 +++ b/xen/arch/x86/msi.c	Wed Jan 28 13:06:45 2009 +0900
   138.3 @@ -671,7 +671,7 @@ static void __pci_disable_msix(struct ms
   138.4  
   138.5      pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
   138.6      control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
   138.7 -    msi_set_enable(dev, 0);
   138.8 +    msix_set_enable(dev, 0);
   138.9  
  138.10      BUG_ON(list_empty(&dev->msi_list));
  138.11  
  138.12 @@ -770,11 +770,20 @@ int pci_restore_msi_state(struct pci_dev
  138.13              return -EINVAL;
  138.14          }
  138.15  
  138.16 -        msi_set_enable(pdev, 0);
  138.17 +        if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
  138.18 +            msi_set_enable(pdev, 0);
  138.19 +        else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
  138.20 +            msix_set_enable(pdev, 0);
  138.21 +
  138.22          write_msi_msg(entry, &entry->msg);
  138.23  
  138.24 -        msi_set_enable(pdev, 1);
  138.25          msi_set_mask_bit(vector, entry->msi_attrib.masked);
  138.26 +
  138.27 +        if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
  138.28 +            msi_set_enable(pdev, 1);
  138.29 +        else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
  138.30 +            msix_set_enable(pdev, 1);
  138.31 +
  138.32          spin_unlock_irqrestore(&desc->lock, flags);
  138.33      }
  138.34  
   139.1 --- a/xen/arch/x86/nmi.c	Wed Jan 28 12:22:58 2009 +0900
   139.2 +++ b/xen/arch/x86/nmi.c	Wed Jan 28 13:06:45 2009 +0900
   139.3 @@ -286,7 +286,7 @@ static int __pminit setup_p4_watchdog(vo
   139.4  
   139.5      nmi_perfctr_msr = MSR_P4_IQ_PERFCTR0;
   139.6      nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
   139.7 -    if ( smp_num_siblings == 2 )
   139.8 +    if ( boot_cpu_data.x86_num_siblings == 2 )
   139.9          nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
  139.10  
  139.11      if (!(misc_enable & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL))
   140.1 --- a/xen/arch/x86/oprofile/nmi_int.c	Wed Jan 28 12:22:58 2009 +0900
   140.2 +++ b/xen/arch/x86/oprofile/nmi_int.c	Wed Jan 28 13:06:45 2009 +0900
   140.3 @@ -38,19 +38,29 @@ static char *cpu_type;
   140.4  extern int is_active(struct domain *d);
   140.5  extern int is_passive(struct domain *d);
   140.6  
   140.7 +static int passive_domain_msr_op_checks(struct cpu_user_regs *regs ,int *typep, int *indexp)
   140.8 +{
   140.9 +	struct vpmu_struct *vpmu = vcpu_vpmu(current);
  140.10 +	if ( model == NULL )
  140.11 +		return 0;
  140.12 +	if ( model->is_arch_pmu_msr == NULL )
  140.13 +		return 0;
  140.14 +	if ( !model->is_arch_pmu_msr((u64)regs->ecx, typep, indexp) )
  140.15 +		return 0;
  140.16 +
  140.17 +	if ( !(vpmu->flags & PASSIVE_DOMAIN_ALLOCATED) )
  140.18 +		if ( ! model->allocated_msr(current) )
  140.19 +			return 0;
  140.20 +	return 1;
  140.21 +}
  140.22 +
  140.23  int passive_domain_do_rdmsr(struct cpu_user_regs *regs)
  140.24  {
  140.25  	u64 msr_content;
  140.26  	int type, index;
  140.27 -	struct vpmu_struct *vpmu = vcpu_vpmu(current);
  140.28  
  140.29 -	if ( model->is_arch_pmu_msr == NULL )
  140.30 +	if ( !passive_domain_msr_op_checks(regs, &type, &index))
  140.31  		return 0;
  140.32 -	if ( !model->is_arch_pmu_msr((u64)regs->ecx, &type, &index) )
  140.33 -		return 0;
  140.34 -	if ( !(vpmu->flags & PASSIVE_DOMAIN_ALLOCATED) )
  140.35 -		if ( ! model->allocated_msr(current) )
  140.36 -			return 0;
  140.37  
  140.38  	model->load_msr(current, type, index, &msr_content);
  140.39  	regs->eax = msr_content & 0xFFFFFFFF;
  140.40 @@ -58,23 +68,13 @@ int passive_domain_do_rdmsr(struct cpu_u
  140.41  	return 1;
  140.42  }
  140.43  
  140.44 -
  140.45  int passive_domain_do_wrmsr(struct cpu_user_regs *regs)
  140.46  {
  140.47  	u64 msr_content;
  140.48  	int type, index;
  140.49 -	struct vpmu_struct *vpmu = vcpu_vpmu(current);
  140.50  
  140.51 -	if ( model == NULL )
  140.52 -		return 0;
  140.53 -	if ( model->is_arch_pmu_msr == NULL )
  140.54 +	if ( !passive_domain_msr_op_checks(regs, &type, &index))
  140.55  		return 0;
  140.56 -	if ( !model->is_arch_pmu_msr((u64)regs->ecx, &type, &index) )
  140.57 -		return 0;
  140.58 -
  140.59 -	if ( !(vpmu->flags & PASSIVE_DOMAIN_ALLOCATED) )
  140.60 -		if ( ! model->allocated_msr(current) )
  140.61 -			return 0;
  140.62  
  140.63  	msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
  140.64  	model->save_msr(current, type, index, msr_content);
  140.65 @@ -326,7 +326,7 @@ static int __init p4_init(char ** cpu_ty
  140.66  	model = &op_p4_spec;
  140.67  	return 1;
  140.68  #else
  140.69 -	switch (smp_num_siblings) {
  140.70 +	switch (current_cpu_data.x86_num_siblings) {
  140.71  		case 1:
  140.72  			*cpu_type = "i386/p4";
  140.73  			model = &op_p4_spec;
   141.1 --- a/xen/arch/x86/oprofile/op_model_p4.c	Wed Jan 28 12:22:58 2009 +0900
   141.2 +++ b/xen/arch/x86/oprofile/op_model_p4.c	Wed Jan 28 13:06:45 2009 +0900
   141.3 @@ -41,7 +41,7 @@ static unsigned int num_counters = NUM_C
   141.4  static inline void setup_num_counters(void)
   141.5  {
   141.6  #ifdef CONFIG_SMP
   141.7 -	if (smp_num_siblings == 2)
   141.8 +	if (boot_cpu_data.x86_num_siblings == 2) 	/* XXX */
   141.9  		num_counters = NUM_COUNTERS_HT2;
  141.10  #endif
  141.11  }
  141.12 @@ -49,7 +49,7 @@ static inline void setup_num_counters(vo
  141.13  static int inline addr_increment(void)
  141.14  {
  141.15  #ifdef CONFIG_SMP
  141.16 -	return smp_num_siblings == 2 ? 2 : 1;
  141.17 +	return boot_cpu_data.x86_num_siblings == 2 ? 2 : 1;
  141.18  #else
  141.19  	return 1;
  141.20  #endif
   142.1 --- a/xen/arch/x86/physdev.c	Wed Jan 28 12:22:58 2009 +0900
   142.2 +++ b/xen/arch/x86/physdev.c	Wed Jan 28 13:06:45 2009 +0900
   142.3 @@ -103,14 +103,14 @@ static int physdev_map_pirq(struct physd
   142.4      spin_lock(&pcidevs_lock);
   142.5      /* Verify or get pirq. */
   142.6      spin_lock(&d->event_lock);
   142.7 +    pirq = domain_vector_to_irq(d, vector);
   142.8      if ( map->pirq < 0 )
   142.9      {
  142.10 -        if ( d->arch.vector_pirq[vector] )
  142.11 +        if ( pirq )
  142.12          {
  142.13              dprintk(XENLOG_G_ERR, "dom%d: %d:%d already mapped to %d\n",
  142.14                      d->domain_id, map->index, map->pirq,
  142.15 -                    d->arch.vector_pirq[vector]);
  142.16 -            pirq = d->arch.vector_pirq[vector];
  142.17 +                    pirq);
  142.18              if ( pirq < 0 )
  142.19              {
  142.20                  ret = -EBUSY;
  142.21 @@ -130,8 +130,7 @@ static int physdev_map_pirq(struct physd
  142.22      }
  142.23      else
  142.24      {
  142.25 -        if ( d->arch.vector_pirq[vector] &&
  142.26 -             d->arch.vector_pirq[vector] != map->pirq )
  142.27 +        if ( pirq && pirq != map->pirq )
  142.28          {
  142.29              dprintk(XENLOG_G_ERR, "dom%d: vector %d conflicts with irq %d\n",
  142.30                      d->domain_id, map->index, map->pirq);
  142.31 @@ -258,8 +257,15 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
  142.32          if ( (irq < 0) || (irq >= NR_IRQS) )
  142.33              break;
  142.34          irq_status_query.flags = 0;
  142.35 -        if ( pirq_acktype(v->domain, irq) != 0 )
  142.36 -            irq_status_query.flags |= XENIRQSTAT_needs_eoi;
  142.37 +        /*
  142.38 +         * Even edge-triggered or message-based IRQs can need masking from
  142.39 +         * time to time. If teh guest is not dynamically checking for this
  142.40 +         * via the new pirq_eoi_map mechanism, it must conservatively always
  142.41 +         * execute the EOI hypercall. In practice, this only really makes a
  142.42 +         * difference for maskable MSI sources, and if those are supported
  142.43 +         * then dom0 is probably modern anyway.
  142.44 +         */
  142.45 +        irq_status_query.flags |= XENIRQSTAT_needs_eoi;
  142.46          if ( pirq_shared(v->domain, irq) )
  142.47              irq_status_query.flags |= XENIRQSTAT_shared;
  142.48          ret = copy_to_guest(arg, &irq_status_query, 1) ? -EFAULT : 0;
   143.1 --- a/xen/arch/x86/setup.c	Wed Jan 28 12:22:58 2009 +0900
   143.2 +++ b/xen/arch/x86/setup.c	Wed Jan 28 13:06:45 2009 +0900
   143.3 @@ -39,6 +39,8 @@
   143.4  #include <xsm/xsm.h>
   143.5  #include <asm/tboot.h>
   143.6  
   143.7 +int __init bzimage_headroom(char *image_start, unsigned long image_length);
   143.8 +
   143.9  #if defined(CONFIG_X86_64)
  143.10  #define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */
  143.11  #define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
  143.12 @@ -54,15 +56,6 @@ extern u16 boot_edid_caps;
  143.13  extern u8 boot_edid_info[128];
  143.14  extern struct boot_video_info boot_vid_info;
  143.15  
  143.16 -/*
  143.17 - * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
  143.18 - * page_info table and allocation bitmap.
  143.19 - */
  143.20 -static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
  143.21 -#if defined(CONFIG_X86_64)
  143.22 -integer_param("xenheap_megabytes", opt_xenheap_megabytes);
  143.23 -#endif
  143.24 -
  143.25  /* opt_nosmp: If true, secondary processors are ignored. */
  143.26  static int opt_nosmp = 0;
  143.27  boolean_param("nosmp", opt_nosmp);
  143.28 @@ -105,8 +98,10 @@ cpumask_t cpu_present_map;
  143.29  
  143.30  unsigned long xen_phys_start;
  143.31  
  143.32 +#ifdef CONFIG_X86_32
  143.33  /* Limits of Xen heap, used to initialise the allocator. */
  143.34 -unsigned long xenheap_phys_start, xenheap_phys_end;
  143.35 +unsigned long xenheap_initial_phys_start, xenheap_phys_end;
  143.36 +#endif
  143.37  
  143.38  extern void arch_init_memory(void);
  143.39  extern void init_IRQ(void);
  143.40 @@ -178,19 +173,21 @@ static void __init do_initcalls(void)
  143.41      for ( ; ; ) halt();                         \
  143.42  } while (0)
  143.43  
  143.44 -static unsigned long __initdata initial_images_start, initial_images_end;
  143.45 +static unsigned long __initdata initial_images_base;
  143.46 +static unsigned long __initdata initial_images_start;
  143.47 +static unsigned long __initdata initial_images_end;
  143.48  
  143.49  unsigned long __init initial_images_nrpages(void)
  143.50  {
  143.51 -    ASSERT(!(initial_images_start & ~PAGE_MASK));
  143.52 +    ASSERT(!(initial_images_base & ~PAGE_MASK));
  143.53      ASSERT(!(initial_images_end   & ~PAGE_MASK));
  143.54      return ((initial_images_end >> PAGE_SHIFT) -
  143.55 -            (initial_images_start >> PAGE_SHIFT));
  143.56 +            (initial_images_base >> PAGE_SHIFT));
  143.57  }
  143.58  
  143.59  void __init discard_initial_images(void)
  143.60  {
  143.61 -    init_domheap_pages(initial_images_start, initial_images_end);
  143.62 +    init_domheap_pages(initial_images_base, initial_images_end);
  143.63  }
  143.64  
  143.65  extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
  143.66 @@ -420,7 +417,8 @@ void __init __start_xen(unsigned long mb
  143.67      unsigned int initrdidx = 1;
  143.68      multiboot_info_t *mbi = __va(mbi_p);
  143.69      module_t *mod = (module_t *)__va(mbi->mods_addr);
  143.70 -    unsigned long nr_pages, modules_length;
  143.71 +    unsigned long nr_pages, modules_length, modules_headroom;
  143.72 +    unsigned long allocator_bitmap_end;
  143.73      int i, e820_warn = 0, bytes = 0;
  143.74      struct ns16550_defaults ns16550 = {
  143.75          .data_bits = 8,
  143.76 @@ -599,23 +597,6 @@ void __init __start_xen(unsigned long mb
  143.77      /* Sanitise the raw E820 map to produce a final clean version. */
  143.78      max_page = init_e820(memmap_type, e820_raw, &e820_raw_nr);
  143.79  
  143.80 -#ifdef CONFIG_X86_64
  143.81 -    /*
  143.82 -     * On x86/64 we are able to account for the allocation bitmap
  143.83 -     * (allocated in common/page_alloc.c:init_boot_allocator()) stealing
  143.84 -     * from the Xen heap. Here we make the Xen heap appropriately larger.
  143.85 -     */
  143.86 -    opt_xenheap_megabytes += (max_page / 8) >> 20;
  143.87 -#endif
  143.88 -
  143.89 -    /*
  143.90 -     * Since there are some stubs getting built on the stacks which use
  143.91 -     * direct calls/jumps, the heap must be confined to the lower 2G so
  143.92 -     * that those branches can reach their targets.
  143.93 -     */
  143.94 -    if ( opt_xenheap_megabytes > 2048 )
  143.95 -        opt_xenheap_megabytes = 2048;
  143.96 -
  143.97      /* Create a temporary copy of the E820 map. */
  143.98      memcpy(&boot_e820, &e820, sizeof(e820));
  143.99  
 143.100 @@ -636,6 +617,10 @@ void __init __start_xen(unsigned long mb
 143.101       * x86/64, we relocate Xen to higher memory.
 143.102       */
 143.103      modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
 143.104 +    modules_headroom = bzimage_headroom(
 143.105 +        (char *)(unsigned long)mod[0].mod_start,
 143.106 +        (unsigned long)(mod[0].mod_end - mod[0].mod_start));
 143.107 +
 143.108      for ( i = boot_e820.nr_map-1; i >= 0; i-- )
 143.109      {
 143.110          uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
 143.111 @@ -654,8 +639,9 @@ void __init __start_xen(unsigned long mb
 143.112              s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
 143.113  
 143.114  #if defined(CONFIG_X86_64)
 143.115 +#define reloc_size ((__pa(&_end) + mask) & ~mask)
 143.116          /* Is the region suitable for relocating Xen? */
 143.117 -        if ( !xen_phys_start && (((e-s) >> 20) >= opt_xenheap_megabytes) )
 143.118 +        if ( !xen_phys_start && ((e-s) >= reloc_size) )
 143.119          {
 143.120              extern l2_pgentry_t l2_xenmap[];
 143.121              l4_pgentry_t *pl4e;
 143.122 @@ -664,7 +650,7 @@ void __init __start_xen(unsigned long mb
 143.123              int i, j, k;
 143.124  
 143.125              /* Select relocation address. */
 143.126 -            e = (e - (opt_xenheap_megabytes << 20)) & ~mask;
 143.127 +            e -= reloc_size;
 143.128              xen_phys_start = e;
 143.129              bootsym(trampoline_xen_phys_start) = e;
 143.130  
 143.131 @@ -739,12 +725,15 @@ void __init __start_xen(unsigned long mb
 143.132  #endif
 143.133  
 143.134          /* Is the region suitable for relocating the multiboot modules? */
 143.135 -        if ( !initial_images_start && (s < e) && ((e-s) >= modules_length) )
 143.136 +        if ( !initial_images_start && (s < e) &&
 143.137 +             ((e-s) >= (modules_length+modules_headroom)) )
 143.138          {
 143.139              initial_images_end = e;
 143.140              e = (e - modules_length) & PAGE_MASK;
 143.141              initial_images_start = e;
 143.142 -            move_memory(initial_images_start, 
 143.143 +            e -= modules_headroom;
 143.144 +            initial_images_base = e;
 143.145 +            move_memory(initial_images_start,
 143.146                          mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
 143.147          }
 143.148  
 143.149 @@ -758,17 +747,17 @@ void __init __start_xen(unsigned long mb
 143.150  
 143.151      if ( !initial_images_start )
 143.152          EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
 143.153 -    reserve_e820_ram(&boot_e820, initial_images_start, initial_images_end);
 143.154 +    reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end);
 143.155  
 143.156 -    /* Initialise Xen heap and boot heap. */
 143.157 -    xenheap_phys_start = init_boot_allocator(__pa(&_end));
 143.158 -    xenheap_phys_end   = opt_xenheap_megabytes << 20;
 143.159 -#if defined(CONFIG_X86_64)
 143.160 +    /* Initialise boot heap. */
 143.161 +    allocator_bitmap_end = init_boot_allocator(__pa(&_end));
 143.162 +#if defined(CONFIG_X86_32)
 143.163 +    xenheap_initial_phys_start = allocator_bitmap_end;
 143.164 +    xenheap_phys_end = DIRECTMAP_MBYTES << 20;
 143.165 +#else
 143.166      if ( !xen_phys_start )
 143.167          EARLY_FAIL("Not enough memory to relocate Xen.\n");
 143.168 -    xenheap_phys_end += xen_phys_start;
 143.169 -    reserve_e820_ram(&boot_e820, xen_phys_start,
 143.170 -                     xen_phys_start + (opt_xenheap_megabytes<<20));
 143.171 +    reserve_e820_ram(&boot_e820, __pa(&_start), allocator_bitmap_end);
 143.172  #endif
 143.173  
 143.174      /* Late kexec reservation (dynamic start address). */
 143.175 @@ -861,22 +850,21 @@ void __init __start_xen(unsigned long mb
 143.176  
 143.177      numa_initmem_init(0, max_page);
 143.178  
 143.179 -    /* Initialise the Xen heap, skipping RAM holes. */
 143.180 -    init_xenheap_pages(xenheap_phys_start, xenheap_phys_end);
 143.181 -    nr_pages = (xenheap_phys_end - xenheap_phys_start) >> PAGE_SHIFT;
 143.182 -#ifdef __x86_64__
 143.183 -    init_xenheap_pages(xen_phys_start, __pa(&_start));
 143.184 -    nr_pages += (__pa(&_start) - xen_phys_start) >> PAGE_SHIFT;
 143.185 -    vesa_init();
 143.186 -#endif
 143.187 -    xenheap_phys_start = xen_phys_start;
 143.188 +#if defined(CONFIG_X86_32)
 143.189 +    /* Initialise the Xen heap. */
 143.190 +    init_xenheap_pages(xenheap_initial_phys_start, xenheap_phys_end);
 143.191 +    nr_pages = (xenheap_phys_end - xenheap_initial_phys_start) >> PAGE_SHIFT;
 143.192      printk("Xen heap: %luMB (%lukB)\n", 
 143.193             nr_pages >> (20 - PAGE_SHIFT),
 143.194             nr_pages << (PAGE_SHIFT - 10));
 143.195 +#endif
 143.196  
 143.197      end_boot_allocator();
 143.198 +    early_boot = 0;
 143.199  
 143.200 -    early_boot = 0;
 143.201 +#if defined(CONFIG_X86_64)
 143.202 +    vesa_init();
 143.203 +#endif
 143.204  
 143.205      softirq_init();
 143.206  
 143.207 @@ -1050,7 +1038,8 @@ void __init __start_xen(unsigned long mb
 143.208       * above our heap. The second module, if present, is an initrd ramdisk.
 143.209       */
 143.210      if ( construct_dom0(dom0,
 143.211 -                        initial_images_start, 
 143.212 +                        initial_images_base,
 143.213 +                        initial_images_start,
 143.214                          mod[0].mod_end-mod[0].mod_start,
 143.215                          _initrd_start,
 143.216                          _initrd_len,
 143.217 @@ -1115,10 +1104,15 @@ void arch_get_xen_caps(xen_capabilities_
 143.218  
 143.219  int xen_in_range(paddr_t start, paddr_t end)
 143.220  {
 143.221 -    start = max_t(paddr_t, start, xenheap_phys_start);
 143.222 -    end = min_t(paddr_t, end, xenheap_phys_end);
 143.223 - 
 143.224 -    return start < end; 
 143.225 +#if defined(CONFIG_X86_32)
 143.226 +    paddr_t xs = 0;
 143.227 +    paddr_t xe = xenheap_phys_end;
 143.228 +#else
 143.229 +    paddr_t xs = __pa(&_stext);
 143.230 +    paddr_t xe = __pa(&_etext);
 143.231 +#endif
 143.232 +
 143.233 +    return (start < xe) && (end > xs);
 143.234  }
 143.235  
 143.236  /*
   144.1 --- a/xen/arch/x86/smpboot.c	Wed Jan 28 12:22:58 2009 +0900
   144.2 +++ b/xen/arch/x86/smpboot.c	Wed Jan 28 13:06:45 2009 +0900
   144.3 @@ -63,12 +63,6 @@
   144.4  /* Set if we find a B stepping CPU */
   144.5  static int __devinitdata smp_b_stepping;
   144.6  
   144.7 -/* Number of siblings per CPU package */
   144.8 -int smp_num_siblings = 1;
   144.9 -#ifdef CONFIG_X86_HT
  144.10 -EXPORT_SYMBOL(smp_num_siblings);
  144.11 -#endif
  144.12 -
  144.13  /* Package ID of each logical CPU */
  144.14  int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
  144.15  
  144.16 @@ -423,7 +417,7 @@ set_cpu_sibling_map(int cpu)
  144.17  
  144.18  	cpu_set(cpu, cpu_sibling_setup_map);
  144.19  
  144.20 -	if (smp_num_siblings > 1) {
  144.21 +	if (c[cpu].x86_num_siblings > 1) {
  144.22  		for_each_cpu_mask(i, cpu_sibling_setup_map) {
  144.23  			if (phys_proc_id[cpu] == phys_proc_id[i] &&
  144.24  			    cpu_core_id[cpu] == cpu_core_id[i]) {
  144.25 @@ -437,7 +431,7 @@ set_cpu_sibling_map(int cpu)
  144.26  		cpu_set(cpu, cpu_sibling_map[cpu]);
  144.27  	}
  144.28  
  144.29 -	if (current_cpu_data.x86_max_cores == 1) {
  144.30 +	if (c[cpu].x86_max_cores == 1) {
  144.31  		cpu_core_map[cpu] = cpu_sibling_map[cpu];
  144.32  		c[cpu].booted_cores = 1;
  144.33  		return;
  144.34 @@ -531,6 +525,8 @@ void __devinit start_secondary(void *unu
  144.35  	/* We can take interrupts now: we're officially "up". */
  144.36  	local_irq_enable();
  144.37  
  144.38 +	microcode_resume_cpu(cpu);
  144.39 +
  144.40  	wmb();
  144.41  	startup_cpu_idle_loop();
  144.42  }
   145.1 --- a/xen/arch/x86/tboot.c	Wed Jan 28 12:22:58 2009 +0900
   145.2 +++ b/xen/arch/x86/tboot.c	Wed Jan 28 13:06:45 2009 +0900
   145.3 @@ -96,18 +96,6 @@ int tboot_in_measured_env(void)
   145.4      return (g_tboot_shared != NULL);
   145.5  }
   145.6  
   145.7 -int tboot_in_range(paddr_t start, paddr_t end)
   145.8 -{
   145.9 -    if ( g_tboot_shared == NULL || g_tboot_shared->version < 0x02 )
  145.10 -        return 0;
  145.11 -
  145.12 -    start = max_t(paddr_t, start, g_tboot_shared->tboot_base);
  145.13 -    end = min_t(paddr_t, end, 
  145.14 -                g_tboot_shared->tboot_base + g_tboot_shared->tboot_size);
  145.15 - 
  145.16 -    return start < end; 
  145.17 -}
  145.18 -
  145.19  /*
  145.20   * Local variables:
  145.21   * mode: C
   146.1 --- a/xen/arch/x86/time.c	Wed Jan 28 12:22:58 2009 +0900
   146.2 +++ b/xen/arch/x86/time.c	Wed Jan 28 13:06:45 2009 +0900
   146.3 @@ -532,9 +532,11 @@ static struct platform_timesource plt_pm
   146.4  };
   146.5  
   146.6  static struct time_scale pmt_scale;
   146.7 +static struct time_scale pmt_scale_r;
   146.8  static __init int init_pmtmr_scale(void)
   146.9  {
  146.10      set_time_scale(&pmt_scale, ACPI_PM_FREQUENCY);
  146.11 +    pmt_scale_r = scale_reciprocal(pmt_scale);
  146.12      return 0;
  146.13  }
  146.14  __initcall(init_pmtmr_scale);
  146.15 @@ -544,6 +546,11 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic
  146.16      return scale_delta(ticks, &pmt_scale);
  146.17  }
  146.18  
  146.19 +uint64_t ns_to_acpi_pm_tick(uint64_t ns)
  146.20 +{
  146.21 +    return scale_delta(ns, &pmt_scale_r);
  146.22 +}
  146.23 +
  146.24  /************************************************************
  146.25   * GENERIC PLATFORM TIMER INFRASTRUCTURE
  146.26   */
   147.1 --- a/xen/arch/x86/traps.c	Wed Jan 28 12:22:58 2009 +0900
   147.2 +++ b/xen/arch/x86/traps.c	Wed Jan 28 13:06:45 2009 +0900
   147.3 @@ -723,6 +723,8 @@ static void pv_cpuid(struct cpu_user_reg
   147.4      {
   147.5          /* Modify Feature Information. */
   147.6          __clear_bit(X86_FEATURE_VME, &d);
   147.7 +        if ( !cpu_has_apic )
   147.8 +            __clear_bit(X86_FEATURE_APIC, &d);
   147.9          if ( !opt_allow_hugepage )
  147.10              __clear_bit(X86_FEATURE_PSE, &d);
  147.11          __clear_bit(X86_FEATURE_PGE, &d);
  147.12 @@ -755,6 +757,8 @@ static void pv_cpuid(struct cpu_user_reg
  147.13          __clear_bit(X86_FEATURE_XTPR % 32, &c);
  147.14          __clear_bit(X86_FEATURE_PDCM % 32, &c);
  147.15          __clear_bit(X86_FEATURE_DCA % 32, &c);
  147.16 +        if ( !cpu_has_apic )
  147.17 +           __clear_bit(X86_FEATURE_X2APIC % 32, &c);
  147.18          __set_bit(X86_FEATURE_HYPERVISOR % 32, &c);
  147.19          break;
  147.20      case 0x80000001:
  147.21 @@ -773,6 +777,8 @@ static void pv_cpuid(struct cpu_user_reg
  147.22          __clear_bit(X86_FEATURE_RDTSCP % 32, &d);
  147.23  
  147.24          __clear_bit(X86_FEATURE_SVME % 32, &c);
  147.25 +        if ( !cpu_has_apic )
  147.26 +           __clear_bit(X86_FEATURE_EXTAPICSPACE % 32, &c);
  147.27          __clear_bit(X86_FEATURE_OSVW % 32, &c);
  147.28          __clear_bit(X86_FEATURE_IBS % 32, &c);
  147.29          __clear_bit(X86_FEATURE_SKINIT % 32, &c);
  147.30 @@ -1626,6 +1632,12 @@ void (*pv_post_outb_hook)(unsigned int p
  147.31  # define read_sreg(regs, sr) read_segment_register(sr)
  147.32  #endif
  147.33  
  147.34 +static int is_cpufreq_controller(struct domain *d)
  147.35 +{
  147.36 +    return ((cpufreq_controller == FREQCTL_dom0_kernel) &&
  147.37 +            (d->domain_id == 0));
  147.38 +}
  147.39 +
  147.40  static int emulate_privileged_op(struct cpu_user_regs *regs)
  147.41  {
  147.42      struct vcpu *v = current;
  147.43 @@ -2137,7 +2149,7 @@ static int emulate_privileged_op(struct 
  147.44          case MSR_K8_PSTATE7:
  147.45              if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
  147.46                  goto fail;
  147.47 -            if ( cpufreq_controller != FREQCTL_dom0_kernel )
  147.48 +            if ( !is_cpufreq_controller(v->domain) )
  147.49                  break;
  147.50              if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
  147.51                  goto fail;
  147.52 @@ -2172,17 +2184,14 @@ static int emulate_privileged_op(struct 
  147.53              if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, eax, edx) != 0 )
  147.54                  goto fail;
  147.55              break;
  147.56 +        case MSR_IA32_MPERF:
  147.57 +        case MSR_IA32_APERF:
  147.58          case MSR_IA32_PERF_CTL:
  147.59 -            if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
  147.60 -                goto fail;
  147.61 -            if ( cpufreq_controller != FREQCTL_dom0_kernel )
  147.62 -                break;
  147.63 -            if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
  147.64 -                goto fail;
  147.65 -            break;
  147.66          case MSR_IA32_THERM_CONTROL:
  147.67              if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
  147.68                  goto fail;
  147.69 +            if ( !is_cpufreq_controller(v->domain) )
  147.70 +                break;
  147.71              if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
  147.72                  goto fail;
  147.73              break;
  147.74 @@ -2241,7 +2250,7 @@ static int emulate_privileged_op(struct 
  147.75          case MSR_K8_PSTATE7:
  147.76              if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
  147.77                  goto fail;
  147.78 -            if ( cpufreq_controller != FREQCTL_dom0_kernel )
  147.79 +            if ( !is_cpufreq_controller(v->domain) )
  147.80              {
  147.81                  regs->eax = regs->edx = 0;
  147.82                  break;
  147.83 @@ -2259,7 +2268,6 @@ static int emulate_privileged_op(struct 
  147.84                           MSR_IA32_MISC_ENABLE_XTPR_DISABLE;
  147.85              break;
  147.86          case MSR_EFER:
  147.87 -        case MSR_IA32_THERM_CONTROL:
  147.88          case MSR_AMD_PATCHLEVEL:
  147.89          default:
  147.90              if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) )
   148.1 --- a/xen/arch/x86/x86_32/machine_kexec.c	Wed Jan 28 12:22:58 2009 +0900
   148.2 +++ b/xen/arch/x86/x86_32/machine_kexec.c	Wed Jan 28 13:06:45 2009 +0900
   148.3 @@ -6,8 +6,6 @@
   148.4   * - Magnus Damm <magnus@valinux.co.jp>
   148.5   */
   148.6  
   148.7 -#ifndef CONFIG_COMPAT
   148.8 -
   148.9  #include <xen/types.h>
  148.10  #include <xen/kernel.h>
  148.11  #include <asm/page.h>
  148.12 @@ -20,7 +18,6 @@ int machine_kexec_get_xen(xen_kexec_rang
  148.13                        (unsigned long)range->start;
  148.14          return 0;
  148.15  }
  148.16 -#endif
  148.17  
  148.18  /*
  148.19   * Local variables:
   149.1 --- a/xen/arch/x86/x86_32/mm.c	Wed Jan 28 12:22:58 2009 +0900
   149.2 +++ b/xen/arch/x86/x86_32/mm.c	Wed Jan 28 13:06:45 2009 +0900
   149.3 @@ -43,7 +43,7 @@ static unsigned long mpt_size;
   149.4  void *alloc_xen_pagetable(void)
   149.5  {
   149.6      extern int early_boot;
   149.7 -    extern unsigned long xenheap_phys_start;
   149.8 +    extern unsigned long xenheap_initial_phys_start;
   149.9      unsigned long mfn;
  149.10  
  149.11      if ( !early_boot )
  149.12 @@ -53,8 +53,8 @@ void *alloc_xen_pagetable(void)
  149.13          return v;
  149.14      }
  149.15  
  149.16 -    mfn = xenheap_phys_start >> PAGE_SHIFT;
  149.17 -    xenheap_phys_start += PAGE_SIZE;
  149.18 +    mfn = xenheap_initial_phys_start >> PAGE_SHIFT;
  149.19 +    xenheap_initial_phys_start += PAGE_SIZE;
  149.20      return mfn_to_virt(mfn);
  149.21  }
  149.22  
  149.23 @@ -159,15 +159,6 @@ void __init subarch_init_memory(void)
  149.24      unsigned long m2p_start_mfn;
  149.25      unsigned int i, j;
  149.26  
  149.27 -    /*
  149.28 -     * We are rather picky about the layout of 'struct page_info'. The
  149.29 -     * count_info and domain fields must be adjacent, as we perform atomic
  149.30 -     * 64-bit operations on them. Also, just for sanity, we assert the size
  149.31 -     * of the structure here.
  149.32 -     */
  149.33 -    BUILD_BUG_ON(offsetof(struct page_info, u.inuse._domain) != 
  149.34 -                 (offsetof(struct page_info, count_info) + sizeof(u32)));
  149.35 -    BUILD_BUG_ON((offsetof(struct page_info, count_info) & 7) != 0);
  149.36      BUILD_BUG_ON(sizeof(struct page_info) != 24);
  149.37  
  149.38      /* M2P table is mappable read-only by privileged domains. */
   150.1 --- a/xen/arch/x86/x86_64/Makefile	Wed Jan 28 12:22:58 2009 +0900
   150.2 +++ b/xen/arch/x86/x86_64/Makefile	Wed Jan 28 13:06:45 2009 +0900
   150.3 @@ -14,16 +14,3 @@ obj-$(CONFIG_COMPAT) += physdev.o
   150.4  obj-$(CONFIG_COMPAT) += platform_hypercall.o
   150.5  obj-$(CONFIG_COMPAT) += cpu_idle.o
   150.6  obj-$(CONFIG_COMPAT) += cpufreq.o
   150.7 -
   150.8 -ifeq ($(CONFIG_COMPAT),y)
   150.9 -# extra dependencies
  150.10 -compat.o:	../compat.c
  150.11 -domctl.o:	../domctl.c
  150.12 -mm.o:		compat/mm.c
  150.13 -physdev.o:	../physdev.c
  150.14 -platform_hypercall.o: ../platform_hypercall.c
  150.15 -sysctl.o:	../sysctl.c
  150.16 -traps.o:	compat/traps.c
  150.17 -cpu_idle.o:	../acpi/cpu_idle.c
  150.18 -cpufreq.o:	../../../drivers/cpufreq/cpufreq.c
  150.19 -endif
   151.1 --- a/xen/arch/x86/x86_64/compat/mm.c	Wed Jan 28 12:22:58 2009 +0900
   151.2 +++ b/xen/arch/x86/x86_64/compat/mm.c	Wed Jan 28 13:06:45 2009 +0900
   151.3 @@ -69,20 +69,6 @@ int compat_arch_memory_op(int op, XEN_GU
   151.4          break;
   151.5      }
   151.6  
   151.7 -    case XENMEM_remove_from_physmap:
   151.8 -    {
   151.9 -        struct compat_remove_from_physmap cmp;
  151.10 -        struct xen_remove_from_physmap *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE;
  151.11 -
  151.12 -        if ( copy_from_guest(&cmp, arg, 1) )
  151.13 -            return -EFAULT;
  151.14 -
  151.15 -        XLAT_remove_from_physmap(nat, &cmp);
  151.16 -        rc = arch_memory_op(op, guest_handle_from_ptr(nat, void));
  151.17 -
  151.18 -        break;
  151.19 -    }
  151.20 -
  151.21      case XENMEM_set_memory_map:
  151.22      {
  151.23          struct compat_foreign_memory_map cmp;
   152.1 --- a/xen/arch/x86/x86_64/machine_kexec.c	Wed Jan 28 12:22:58 2009 +0900
   152.2 +++ b/xen/arch/x86/x86_64/machine_kexec.c	Wed Jan 28 13:06:45 2009 +0900
   152.3 @@ -6,20 +6,17 @@
   152.4   * - Magnus Damm <magnus@valinux.co.jp>
   152.5   */
   152.6  
   152.7 -#ifndef CONFIG_COMPAT
   152.8 -
   152.9  #include <xen/types.h>
  152.10 +#include <xen/kernel.h>
  152.11  #include <asm/page.h>
  152.12  #include <public/kexec.h>
  152.13  
  152.14  int machine_kexec_get_xen(xen_kexec_range_t *range)
  152.15  {
  152.16 -        range->start = xenheap_phys_start;
  152.17 -        range->size = (unsigned long)xenheap_phys_end -
  152.18 -                      (unsigned long)range->start;
  152.19 +        range->start = virt_to_maddr(_start);
  152.20 +        range->size = virt_to_maddr(_end) - (unsigned long)range->start;
  152.21          return 0;
  152.22  }
  152.23 -#endif
  152.24  
  152.25  /*
  152.26   * Local variables:
   153.1 --- a/xen/arch/x86/x86_64/mm.c	Wed Jan 28 12:22:58 2009 +0900
   153.2 +++ b/xen/arch/x86/x86_64/mm.c	Wed Jan 28 13:06:45 2009 +0900
   153.3 @@ -30,6 +30,7 @@
   153.4  #include <asm/fixmap.h>
   153.5  #include <asm/hypercall.h>
   153.6  #include <asm/msr.h>
   153.7 +#include <asm/numa.h>
   153.8  #include <public/memory.h>
   153.9  
  153.10  #ifdef CONFIG_COMPAT
  153.11 @@ -105,6 +106,7 @@ l2_pgentry_t *virt_to_xen_l2e(unsigned l
  153.12  void __init paging_init(void)
  153.13  {
  153.14      unsigned long i, mpt_size, va;
  153.15 +    unsigned int memflags;
  153.16      l3_pgentry_t *l3_ro_mpt;
  153.17      l2_pgentry_t *l2_ro_mpt = NULL;
  153.18      struct page_info *l1_pg, *l2_pg, *l3_pg;
  153.19 @@ -125,7 +127,36 @@ void __init paging_init(void)
  153.20      mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
  153.21      for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
  153.22      {
  153.23 -        if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
  153.24 +        BUILD_BUG_ON(RO_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
  153.25 +        va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
  153.26 +        memflags = MEMF_node(phys_to_nid(i <<
  153.27 +            (L2_PAGETABLE_SHIFT - 3 + PAGE_SHIFT)));
  153.28 +
  153.29 +        if ( cpu_has_page1gb &&
  153.30 +             !((unsigned long)l2_ro_mpt & ~PAGE_MASK) &&
  153.31 +             (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) &&
  153.32 +             (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER,
  153.33 +                                          memflags)) != NULL )
  153.34 +        {
  153.35 +            map_pages_to_xen(
  153.36 +                RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
  153.37 +                page_to_mfn(l1_pg),
  153.38 +                1UL << (2 * PAGETABLE_ORDER),
  153.39 +                PAGE_HYPERVISOR);
  153.40 +            memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)),
  153.41 +                   0x77, 1UL << L3_PAGETABLE_SHIFT);
  153.42 +
  153.43 +            ASSERT(!l2_table_offset(va));
  153.44 +            /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
  153.45 +            l3e_write(&l3_ro_mpt[l3_table_offset(va)],
  153.46 +                l3e_from_page(l1_pg,
  153.47 +                    /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
  153.48 +            i += (1UL << PAGETABLE_ORDER) - 1;
  153.49 +            continue;
  153.50 +        }
  153.51 +
  153.52 +        if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
  153.53 +                                          memflags)) == NULL )
  153.54              goto nomem;
  153.55          map_pages_to_xen(
  153.56              RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
  153.57 @@ -136,14 +167,13 @@ void __init paging_init(void)
  153.58                 1UL << L2_PAGETABLE_SHIFT);
  153.59          if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
  153.60          {
  153.61 -            if ( (l2_pg = alloc_domheap_page(NULL, 0)) == NULL )
  153.62 +            if ( (l2_pg = alloc_domheap_page(NULL, memflags)) == NULL )
  153.63                  goto nomem;
  153.64 -            va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
  153.65              l2_ro_mpt = page_to_virt(l2_pg);
  153.66              clear_page(l2_ro_mpt);
  153.67              l3e_write(&l3_ro_mpt[l3_table_offset(va)],
  153.68                        l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
  153.69 -            l2_ro_mpt += l2_table_offset(va);
  153.70 +            ASSERT(!l2_table_offset(va));
  153.71          }
  153.72          /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
  153.73          l2e_write(l2_ro_mpt, l2e_from_page(
  153.74 @@ -172,7 +202,10 @@ void __init paging_init(void)
  153.75          m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size;
  153.76      for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
  153.77      {
  153.78 -        if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
  153.79 +        memflags = MEMF_node(phys_to_nid(i <<
  153.80 +            (L2_PAGETABLE_SHIFT - 2 + PAGE_SHIFT)));
  153.81 +        if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
  153.82 +                                          memflags)) == NULL )
  153.83              goto nomem;
  153.84          map_pages_to_xen(
  153.85              RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
  153.86 @@ -221,34 +254,36 @@ void __init zap_low_mappings(void)
  153.87  
  153.88  void __init subarch_init_memory(void)