]> xenbits.xensource.com Git - people/jgross/xen.git/commitdiff
tools/libxc: move libxenguest to tools/libs/guest
authorJuergen Gross <jgross@suse.com>
Sun, 23 Aug 2020 08:00:16 +0000 (10:00 +0200)
committerJuergen Gross <jgross@suse.com>
Fri, 28 Aug 2020 15:00:49 +0000 (17:00 +0200)
tools/libxc now contains libxenguest only. Move it to tools/libs/guest.

When generating the pkg-config file for libxenguest a filter is now
required for replacing "xenctrl" by "xencontrol" in the
"Requires.private:" entry. Add this filter to tools/libs/libs.mk.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org> (stubdom parts)
93 files changed:
.gitignore
stubdom/Makefile
stubdom/grub/Makefile
stubdom/mini-os.mk
tools/Makefile
tools/Rules.mk
tools/libs/Makefile
tools/libs/guest/COPYING [new file with mode: 0644]
tools/libs/guest/Makefile [new file with mode: 0644]
tools/libs/guest/include/xenguest.h [new file with mode: 0644]
tools/libs/guest/xg_cpuid_x86.c [new file with mode: 0644]
tools/libs/guest/xg_dom_arm.c [new file with mode: 0644]
tools/libs/guest/xg_dom_armzimageloader.c [new file with mode: 0644]
tools/libs/guest/xg_dom_binloader.c [new file with mode: 0644]
tools/libs/guest/xg_dom_boot.c [new file with mode: 0644]
tools/libs/guest/xg_dom_bzimageloader.c [new file with mode: 0644]
tools/libs/guest/xg_dom_compat_linux.c [new file with mode: 0644]
tools/libs/guest/xg_dom_core.c [new file with mode: 0644]
tools/libs/guest/xg_dom_decompress.h [new file with mode: 0644]
tools/libs/guest/xg_dom_decompress_lz4.c [new file with mode: 0644]
tools/libs/guest/xg_dom_decompress_unsafe.c [new file with mode: 0644]
tools/libs/guest/xg_dom_decompress_unsafe.h [new file with mode: 0644]
tools/libs/guest/xg_dom_decompress_unsafe_bzip2.c [new file with mode: 0644]
tools/libs/guest/xg_dom_decompress_unsafe_lzma.c [new file with mode: 0644]
tools/libs/guest/xg_dom_decompress_unsafe_lzo1x.c [new file with mode: 0644]
tools/libs/guest/xg_dom_decompress_unsafe_xz.c [new file with mode: 0644]
tools/libs/guest/xg_dom_elfloader.c [new file with mode: 0644]
tools/libs/guest/xg_dom_hvmloader.c [new file with mode: 0644]
tools/libs/guest/xg_dom_x86.c [new file with mode: 0644]
tools/libs/guest/xg_domain.c [new file with mode: 0644]
tools/libs/guest/xg_nomigrate.c [new file with mode: 0644]
tools/libs/guest/xg_offline_page.c [new file with mode: 0644]
tools/libs/guest/xg_private.c [new file with mode: 0644]
tools/libs/guest/xg_private.h [new file with mode: 0644]
tools/libs/guest/xg_save_restore.h [new file with mode: 0644]
tools/libs/guest/xg_sr_common.c [new file with mode: 0644]
tools/libs/guest/xg_sr_common.h [new file with mode: 0644]
tools/libs/guest/xg_sr_common_x86.c [new file with mode: 0644]
tools/libs/guest/xg_sr_common_x86.h [new file with mode: 0644]
tools/libs/guest/xg_sr_common_x86_pv.c [new file with mode: 0644]
tools/libs/guest/xg_sr_common_x86_pv.h [new file with mode: 0644]
tools/libs/guest/xg_sr_restore.c [new file with mode: 0644]
tools/libs/guest/xg_sr_restore_x86_hvm.c [new file with mode: 0644]
tools/libs/guest/xg_sr_restore_x86_pv.c [new file with mode: 0644]
tools/libs/guest/xg_sr_save.c [new file with mode: 0644]
tools/libs/guest/xg_sr_save_x86_hvm.c [new file with mode: 0644]
tools/libs/guest/xg_sr_save_x86_pv.c [new file with mode: 0644]
tools/libs/guest/xg_sr_stream_format.h [new file with mode: 0644]
tools/libs/guest/xg_suspend.c [new file with mode: 0644]
tools/libs/libs.mk
tools/libs/uselibs.mk
tools/libxc/COPYING [deleted file]
tools/libxc/Makefile [deleted file]
tools/libxc/include/xenguest.h [deleted file]
tools/libxc/xg_cpuid_x86.c [deleted file]
tools/libxc/xg_dom_arm.c [deleted file]
tools/libxc/xg_dom_armzimageloader.c [deleted file]
tools/libxc/xg_dom_binloader.c [deleted file]
tools/libxc/xg_dom_boot.c [deleted file]
tools/libxc/xg_dom_bzimageloader.c [deleted file]
tools/libxc/xg_dom_compat_linux.c [deleted file]
tools/libxc/xg_dom_core.c [deleted file]
tools/libxc/xg_dom_decompress.h [deleted file]
tools/libxc/xg_dom_decompress_lz4.c [deleted file]
tools/libxc/xg_dom_decompress_unsafe.c [deleted file]
tools/libxc/xg_dom_decompress_unsafe.h [deleted file]
tools/libxc/xg_dom_decompress_unsafe_bzip2.c [deleted file]
tools/libxc/xg_dom_decompress_unsafe_lzma.c [deleted file]
tools/libxc/xg_dom_decompress_unsafe_lzo1x.c [deleted file]
tools/libxc/xg_dom_decompress_unsafe_xz.c [deleted file]
tools/libxc/xg_dom_elfloader.c [deleted file]
tools/libxc/xg_dom_hvmloader.c [deleted file]
tools/libxc/xg_dom_x86.c [deleted file]
tools/libxc/xg_domain.c [deleted file]
tools/libxc/xg_nomigrate.c [deleted file]
tools/libxc/xg_offline_page.c [deleted file]
tools/libxc/xg_private.c [deleted file]
tools/libxc/xg_private.h [deleted file]
tools/libxc/xg_save_restore.h [deleted file]
tools/libxc/xg_sr_common.c [deleted file]
tools/libxc/xg_sr_common.h [deleted file]
tools/libxc/xg_sr_common_x86.c [deleted file]
tools/libxc/xg_sr_common_x86.h [deleted file]
tools/libxc/xg_sr_common_x86_pv.c [deleted file]
tools/libxc/xg_sr_common_x86_pv.h [deleted file]
tools/libxc/xg_sr_restore.c [deleted file]
tools/libxc/xg_sr_restore_x86_hvm.c [deleted file]
tools/libxc/xg_sr_restore_x86_pv.c [deleted file]
tools/libxc/xg_sr_save.c [deleted file]
tools/libxc/xg_sr_save_x86_hvm.c [deleted file]
tools/libxc/xg_sr_save_x86_pv.c [deleted file]
tools/libxc/xg_sr_stream_format.h [deleted file]
tools/libxc/xg_suspend.c [deleted file]

index d22b031ed2e01053b80d02ee1cda2eb56c3003bc..eb637a98e950a51f99e0bd4a94700237b652e73b 100644 (file)
@@ -71,7 +71,6 @@ stubdom/include
 stubdom/ioemu
 stubdom/ioemu/
 stubdom/libs-*
-stubdom/libxc-*
 stubdom/libxencall-*
 stubdom/libxenevtchn-*
 stubdom/libxenforeignmemory-*
@@ -121,6 +120,14 @@ tools/libs/foreignmemory/headers.chk
 tools/libs/foreignmemory/xenforeignmemory.pc
 tools/libs/devicemodel/headers.chk
 tools/libs/devicemodel/xendevicemodel.pc
+tools/libs/guest/_*.[ch]
+tools/libs/guest/libxenguest.map
+tools/libs/guest/xenguest.pc
+tools/libs/guest/xc_bitops.h
+tools/libs/guest/xc_core.h
+tools/libs/guest/xc_core_arm.h
+tools/libs/guest/xc_core_x86.h
+tools/libs/guest/xc_private.h
 tools/console/xenconsole
 tools/console/xenconsoled
 tools/console/client/_paths.h
@@ -197,12 +204,6 @@ tools/include/xen-xsm/*
 tools/include/xen-foreign/*.(c|h|size)
 tools/include/xen-foreign/checker
 tools/libvchan/xenvchan.pc
-tools/libxc/*.pc
-tools/libxc/xc_bitops.h
-tools/libxc/xc_core.h
-tools/libxc/xc_core_arm.h
-tools/libxc/xc_core_x86.h
-tools/libxc/xc_private.h
 tools/libxl/_libxl.api-for-check
 tools/libxl/*.api-ok
 tools/libxl/*.pc
@@ -370,7 +371,6 @@ tools/include/xen-foreign/arm64.h
 tools/misc/xen-hptool
 tools/misc/xen-mfndump
 tools/libs/toolcore/include/_*.h
-tools/libxc/_*.[ch]
 tools/libxl/_*.[ch]
 tools/libxl/testidl
 tools/libxl/testidl.c
index 961a9f87048ffabb27f4798380c7c3162699f764..de10281ca333564a486ad58b066c01c2421f1a01 100644 (file)
@@ -331,7 +331,9 @@ endif
 # libraries under tools/libs
 #######
 
-STUB_LIBS := toolcore toollog evtchn gnttab call foreignmemory devicemodel ctrl
+STUB_LIBS := toolcore toollog evtchn gnttab call foreignmemory devicemodel ctrl guest
+
+LIBDEP_guest := cross-zlib
 
 #######
 # common handling
@@ -362,13 +364,10 @@ endef
 
 $(foreach lib,$(STUB_LIBS),$(eval $(call BUILD_lib,$(lib))))
 
-libxc-$(XEN_TARGET_ARCH)/stamp: $(XEN_ROOT)/tools/libxc/Makefile
-       $(do_links)
-
 xenstore/stamp: $(XEN_ROOT)/tools/xenstore/Makefile
        $(do_links)
 
-LINK_DIRS := libxc-$(XEN_TARGET_ARCH) xenstore $(foreach dir,$(STUB_LIBS),libs-$(XEN_TARGET_ARCH)/$(dir))
+LINK_DIRS := xenstore $(foreach dir,$(STUB_LIBS),libs-$(XEN_TARGET_ARCH)/$(dir))
 LINK_STAMPS := $(foreach dir,$(LINK_DIRS),$(dir)/stamp)
 
 mk-headers-$(XEN_TARGET_ARCH): $(IOEMU_LINKFARM_TARGET) $(LINK_STAMPS)
@@ -391,16 +390,6 @@ $(TARGETS_MINIOS): mini-os-%:
                 mkdir -p $@/$$i ; \
        done
 
-#######
-# libxc
-#######
-
-.PHONY: libxc
-libxc: libxc-$(XEN_TARGET_ARCH)/libxenguest.a
-libxc-$(XEN_TARGET_ARCH)/libxenguest.a: libxenevtchn libxenctrl cross-zlib
-libxc-$(XEN_TARGET_ARCH)/libxenguest.a: mk-headers-$(XEN_TARGET_ARCH) $(NEWLIB_STAMPFILE)
-       CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) DESTDIR= CONFIG_LIBXC_MINIOS=y -C libxc-$(XEN_TARGET_ARCH)
-
 #######
 # ioemu
 #######
@@ -409,7 +398,7 @@ ioemu-minios-config.mk: $(CURDIR)/ioemu-minios.cfg
        MINIOS_CONFIG="$<" CONFIG_FILE="$(CURDIR)/$@" $(MAKE) DESTDIR= -C $(MINI_OS) config
 
 .PHONY: ioemu
-ioemu: cross-zlib cross-libpci libxc ioemu-minios-config.mk
+ioemu: cross-zlib cross-libpci libxenguest ioemu-minios-config.mk
        [ -f ioemu/config-host.mak ] || \
          ( $(buildmakevars2shellvars); \
            cd ioemu ; \
@@ -503,15 +492,15 @@ xenstore: $(CROSS_ROOT) xenstore-minios-config.mk
 
 .PHONY: ioemu-stubdom
 ioemu-stubdom: APP_OBJS=$(CURDIR)/ioemu/i386-stubdom/qemu.a $(CURDIR)/ioemu/i386-stubdom/libqemu.a $(CURDIR)/ioemu/libqemu_common.a
-ioemu-stubdom: mini-os-$(XEN_TARGET_ARCH)-ioemu lwip-$(XEN_TARGET_ARCH) libxc ioemu
+ioemu-stubdom: mini-os-$(XEN_TARGET_ARCH)-ioemu lwip-$(XEN_TARGET_ARCH) libxenguest ioemu
        DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/ioemu-minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS="$(APP_OBJS)"
 
 .PHONY: caml-stubdom
-caml-stubdom: mini-os-$(XEN_TARGET_ARCH)-caml lwip-$(XEN_TARGET_ARCH) libxc cross-ocaml caml
+caml-stubdom: mini-os-$(XEN_TARGET_ARCH)-caml lwip-$(XEN_TARGET_ARCH) libxenguest cross-ocaml caml
        DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/caml/minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS="$(CURDIR)/caml/main-caml.o $(CURDIR)/caml/caml.o $(CAMLLIB)/libasmrun.a"
 
 .PHONY: c-stubdom
-c-stubdom: mini-os-$(XEN_TARGET_ARCH)-c lwip-$(XEN_TARGET_ARCH) libxc c
+c-stubdom: mini-os-$(XEN_TARGET_ARCH)-c lwip-$(XEN_TARGET_ARCH) libxenguest c
        DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/c/minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) APP_OBJS=$(CURDIR)/c/main.a
 
 .PHONY: vtpm-stubdom
@@ -523,11 +512,11 @@ vtpmmgr-stubdom: mini-os-$(XEN_TARGET_ARCH)-vtpmmgr vtpmmgr
        DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/vtpmmgr/minios.cfg" $(MAKE) -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< APP_OBJS="$(CURDIR)/vtpmmgr/vtpmmgr.a" APP_LDLIBS="-lm -lpolarssl"
 
 .PHONY: pv-grub
-pv-grub: mini-os-$(XEN_TARGET_ARCH)-grub libxc grub
+pv-grub: mini-os-$(XEN_TARGET_ARCH)-grub libxenguest grub
        DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/grub/minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< APP_OBJS=$(CURDIR)/grub-$(XEN_TARGET_ARCH)/main.a
 
 .PHONY: xenstore-stubdom
-xenstore-stubdom: mini-os-$(XEN_TARGET_ARCH)-xenstore libxc xenstore
+xenstore-stubdom: mini-os-$(XEN_TARGET_ARCH)-xenstore libxenguest xenstore
        DEF_CPPFLAGS="$(TARGET_CPPFLAGS)" DEF_CFLAGS="$(TARGET_CFLAGS)" DEF_LDFLAGS="$(TARGET_LDFLAGS)" MINIOS_CONFIG="$(CURDIR)/xenstore-minios.cfg" $(MAKE) DESTDIR= -C $(MINI_OS) OBJ_DIR=$(CURDIR)/$< APP_OBJS=$(CURDIR)/xenstore/xenstored.a
 
 #########
@@ -621,7 +610,6 @@ clean:
        rm -f $(STUBDOMPATH)
        rm -f *-minios-config.mk
        rm -fr pkg-config
-       [ ! -e libxc-$(XEN_TARGET_ARCH)/Makefile ] || $(MAKE) DESTDIR= -C libxc-$(XEN_TARGET_ARCH) clean
        -[ ! -d ioemu ] || $(MAKE) DESTDIR= -C ioemu clean
        -[ ! -d xenstore ] || $(MAKE) DESTDIR= -C xenstore clean
 
@@ -632,7 +620,7 @@ crossclean: clean
        rm -fr newlib-$(XEN_TARGET_ARCH)
        rm -fr zlib-$(XEN_TARGET_ARCH) pciutils-$(XEN_TARGET_ARCH)
        rm -fr libs-$(XEN_TARGET_ARCH)
-       rm -fr libxc-$(XEN_TARGET_ARCH) ioemu xenstore
+       rm -fr ioemu xenstore
        rm -fr gmp-$(XEN_TARGET_ARCH)
        rm -fr polarssl-$(XEN_TARGET_ARCH)
        rm -fr openssl-$(XEN_TARGET_ARCH)
index d33fa2f71ea89f80523d3b49b7b5c209bc8993f6..7397661c9b435de9afbc399e8fead567cf7d41a5 100644 (file)
@@ -7,7 +7,7 @@ BOOT=$(OBJ_DIR)/boot-$(XEN_TARGET_ARCH).o
 
 DEF_CPPFLAGS += -I$(XEN_ROOT)/tools/libs/toollog/include
 DEF_CPPFLAGS += -I$(XEN_ROOT)/tools/libs/ctrl/include
-DEF_CPPFLAGS += -I$(XEN_ROOT)/tools/libxc/include
+DEF_CPPFLAGS += -I$(XEN_ROOT)/tools/libs/guest/include
 DEF_CPPFLAGS += -I$(XEN_ROOT)/tools/include -I.
 DEF_CPPFLAGS += -I../grub-upstream/stage1
 DEF_CPPFLAGS += -I../grub-upstream/stage2
index b1387df3f8e9abfe915fa2fc06562f1d87621650..e1640a7cbc863a3c91267ff251028b84b3b4eed0 100644 (file)
@@ -14,4 +14,4 @@ CALL_PATH = $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/call
 FOREIGNMEMORY_PATH = $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/foreignmemory
 DEVICEMODEL_PATH = $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/devicemodel
 CTRL_PATH = $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/ctrl
-GUEST_PATH = $(XEN_ROOT)/stubdom/libxc-$(MINIOS_TARGET_ARCH)
+GUEST_PATH = $(XEN_ROOT)/stubdom/libs-$(MINIOS_TARGET_ARCH)/guest
index 7c9f9fc9004b0e2f6f87a302deb747e60e2c1bb8..f9b40122909223a94a3d3c20ae2729983a644c2f 100644 (file)
@@ -6,7 +6,6 @@ include $(XEN_ROOT)/tools/Rules.mk
 
 SUBDIRS-y :=
 SUBDIRS-y += libs
-SUBDIRS-y += libxc
 SUBDIRS-y += flask
 SUBDIRS-y += fuzz
 SUBDIRS-y += xenstore
@@ -44,7 +43,7 @@ SUBDIRS-y += pygrub
 SUBDIRS-$(OCAML_TOOLS) += ocaml
 
 ifeq ($(CONFIG_RUMP),y)
-SUBDIRS-y := libs libxc xenstore
+SUBDIRS-y := libs xenstore
 endif
 
 # For the sake of linking, set the sys-root
index 1cc56e9ab83a9e96e9ab7dd870fab7c1d267abb1..914c79bfb91e583384163198ddd07624d4fbcfd5 100644 (file)
@@ -15,7 +15,6 @@ XEN_INCLUDE        = $(XEN_ROOT)/tools/include
 
 include $(XEN_ROOT)/tools/libs/uselibs.mk
 
-XEN_libxenguest    = $(XEN_ROOT)/tools/libxc
 XEN_libxenlight    = $(XEN_ROOT)/tools/libxl
 # Currently libxlutil lives in the same directory as libxenlight
 XEN_libxlutil      = $(XEN_libxenlight)
@@ -105,11 +104,7 @@ $(foreach lib,$(LIBS_LIBS),$(eval $(call LIB_defs,$(lib))))
 # code which compiles against libxenctrl get __XEN_TOOLS__ and
 # therefore sees the unstable hypercall interfaces.
 CFLAGS_libxenctrl += $(CFLAGS_libxentoollog) $(CFLAGS_libxenforeignmemory) $(CFLAGS_libxendevicemodel) -D__XEN_TOOLS__
-
-CFLAGS_libxenguest = -I$(XEN_libxenguest)/include $(CFLAGS_libxenevtchn) $(CFLAGS_libxenforeignmemory) $(CFLAGS_xeninclude)
-SHDEPS_libxenguest = $(SHLIB_libxenevtchn) $(SHLIB_libxenctrl)
-LDLIBS_libxenguest = $(SHDEPS_libxenguest) $(XEN_libxenguest)/libxenguest$(libextension)
-SHLIB_libxenguest  = $(SHDEPS_libxenguest) -Wl,-rpath-link=$(XEN_libxenguest)
+CFLAGS_libxenguest += $(CFLAGS_libxenevtchn) $(CFLAGS_libxenforeignmemory)
 
 CFLAGS_libxenstore = -I$(XEN_libxenstore)/include $(CFLAGS_xeninclude)
 SHDEPS_libxenstore = $(SHLIB_libxentoolcore) $(SHLIB_libxenctrl)
index 7648ea0e4cfc96f0831cc8078914e9a8f1ad4769..f15c1688f782107a58953eb76fd31b3f1c095a6c 100644 (file)
@@ -10,6 +10,7 @@ SUBDIRS-y += call
 SUBDIRS-y += foreignmemory
 SUBDIRS-y += devicemodel
 SUBDIRS-y += ctrl
+SUBDIRS-y += guest
 SUBDIRS-y += hypfs
 
 ifeq ($(CONFIG_RUMP),y)
diff --git a/tools/libs/guest/COPYING b/tools/libs/guest/COPYING
new file mode 100644 (file)
index 0000000..7ca8702
--- /dev/null
@@ -0,0 +1,467 @@
+Note that the only valid version of the LGPL as far as the files in
+this directory (and its subdirectories) are concerned is _this_
+particular version of the license (i.e., *only* v2.1, not v2.2 or v3.x
+or whatever), unless explicitly otherwise stated.
+
+Where clause 3 is invoked in order to relicense under the GPL then
+this shall be considered to be GPL v2 only for files which have
+specified LGPL v2.1 only.
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+\f
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be
+allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+\f
+                  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+\f
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+\f
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+\f
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+\f
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+\f
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+\f
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+                            NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
diff --git a/tools/libs/guest/Makefile b/tools/libs/guest/Makefile
new file mode 100644 (file)
index 0000000..e53aeab
--- /dev/null
@@ -0,0 +1,121 @@
+XEN_ROOT = $(CURDIR)/../../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+ifeq ($(CONFIG_LIBXC_MINIOS),y)
+# Save/restore of a domain is currently incompatible with a stubdom environment
+override CONFIG_MIGRATE := n
+endif
+
+LINK_FILES := xc_private.h xc_core.h xc_core_x86.h xc_core_arm.h xc_bitops.h
+
+$(LINK_FILES):
+       ln -sf $(XEN_ROOT)/tools/libs/ctrl/$(notdir $@) $@
+
+SRCS-y += xg_private.c
+SRCS-y += xg_domain.c
+SRCS-y += xg_suspend.c
+ifeq ($(CONFIG_MIGRATE),y)
+SRCS-y += xg_sr_common.c
+SRCS-$(CONFIG_X86) += xg_sr_common_x86.c
+SRCS-$(CONFIG_X86) += xg_sr_common_x86_pv.c
+SRCS-$(CONFIG_X86) += xg_sr_restore_x86_pv.c
+SRCS-$(CONFIG_X86) += xg_sr_restore_x86_hvm.c
+SRCS-$(CONFIG_X86) += xg_sr_save_x86_pv.c
+SRCS-$(CONFIG_X86) += xg_sr_save_x86_hvm.c
+SRCS-y += xg_sr_restore.c
+SRCS-y += xg_sr_save.c
+SRCS-y += xg_offline_page.c
+else
+SRCS-y += xg_nomigrate.c
+endif
+
+vpath %.c ../../../xen/common/libelf
+CFLAGS += -I../../../xen/common/libelf
+
+ELF_SRCS-y += libelf-tools.c libelf-loader.c
+ELF_SRCS-y += libelf-dominfo.c
+
+SRCS-y += $(ELF_SRCS-y)
+
+$(patsubst %.c,%.o,$(ELF_SRCS-y)): CFLAGS += -Wno-pointer-sign
+$(patsubst %.c,%.opic,$(ELF_SRCS-y)): CFLAGS += -Wno-pointer-sign
+
+ifeq ($(CONFIG_X86),y) # Add libx86 to the build
+vpath %.c ../../../xen/lib/x86
+
+SRCS-y                 += cpuid.c msr.c
+endif
+
+# new domain builder
+SRCS-y                 += xg_dom_core.c
+SRCS-y                 += xg_dom_boot.c
+SRCS-y                 += xg_dom_elfloader.c
+SRCS-$(CONFIG_X86)     += xg_dom_bzimageloader.c
+SRCS-$(CONFIG_X86)     += xg_dom_decompress_lz4.c
+SRCS-$(CONFIG_X86)     += xg_dom_hvmloader.c
+SRCS-$(CONFIG_ARM)     += xg_dom_armzimageloader.c
+SRCS-y                 += xg_dom_binloader.c
+SRCS-y                 += xg_dom_compat_linux.c
+
+SRCS-$(CONFIG_X86)     += xg_dom_x86.c
+SRCS-$(CONFIG_X86)     += xg_cpuid_x86.c
+SRCS-$(CONFIG_ARM)     += xg_dom_arm.c
+
+ifeq ($(CONFIG_LIBXC_MINIOS),y)
+SRCS-y                 += xg_dom_decompress_unsafe.c
+SRCS-y                 += xg_dom_decompress_unsafe_bzip2.c
+SRCS-y                 += xg_dom_decompress_unsafe_lzma.c
+SRCS-y                 += xg_dom_decompress_unsafe_lzo1x.c
+SRCS-y                 += xg_dom_decompress_unsafe_xz.c
+endif
+
+-include $(XEN_TARGET_ARCH)/Makefile
+
+CFLAGS   += -Werror -Wmissing-prototypes
+CFLAGS   += -I. -I./include $(CFLAGS_xeninclude)
+CFLAGS   += -D__XEN_TOOLS__
+CFLAGS   += -include $(XEN_ROOT)/tools/config.h
+
+# Needed for posix_fadvise64() in xc_linux.c
+CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE
+
+CFLAGS += $(PTHREAD_CFLAGS)
+CFLAGS += $(CFLAGS_libxentoollog)
+CFLAGS += $(CFLAGS_libxenevtchn)
+CFLAGS += $(CFLAGS_libxendevicemodel)
+
+# libxenguest includes xc_private.h, so needs this despite not using
+# this functionality directly.
+CFLAGS += $(CFLAGS_libxencall) $(CFLAGS_libxenforeignmemory)
+
+ifeq ($(CONFIG_MiniOS),y)
+zlib-options =
+else
+zlib-options = $(ZLIB)
+endif
+
+xc_dom_bzimageloader.o: CFLAGS += $(filter -D%,$(zlib-options))
+xc_dom_bzimageloader.opic: CFLAGS += $(filter -D%,$(zlib-options))
+
+LIBHEADER := xenguest.h
+
+NO_HEADERS_CHK := y
+
+include $(XEN_ROOT)/tools/libs/libs.mk
+
+libxenguest.so.$(MAJOR).$(MINOR): COMPRESSION_LIBS = $(filter -l%,$(zlib-options))
+libxenguest.so.$(MAJOR).$(MINOR): APPEND_LDFLAGS += $(COMPRESSION_LIBS) -lz
+
+genpath-target = $(call buildmakevars2header,_paths.h)
+$(eval $(genpath-target))
+
+xc_private.h: _paths.h
+
+$(LIB_OBJS) $(PIC_OBJS): $(LINK_FILES)
+
+$(PKG_CONFIG_LOCAL): PKG_CONFIG_INCDIR = $(XEN_libxenctrl)/include
+$(PKG_CONFIG_LOCAL): PKG_CONFIG_CFLAGS_LOCAL = $(CFLAGS_xeninclude)
+
+.PHONY: cleanlocal
+cleanlocal:
+       rm -f libxenguest.map
diff --git a/tools/libs/guest/include/xenguest.h b/tools/libs/guest/include/xenguest.h
new file mode 100644 (file)
index 0000000..4643384
--- /dev/null
@@ -0,0 +1,327 @@
+/******************************************************************************
+ * xenguest.h
+ *
+ * A library for guest domain management in Xen.
+ *
+ * Copyright (c) 2003-2004, K A Fraser.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef XENGUEST_H
+#define XENGUEST_H
+
+#include <xenctrl_dom.h>
+
+#define XC_NUMA_NO_NODE   (~0U)
+
+#define XCFLAGS_LIVE      (1 << 0)
+#define XCFLAGS_DEBUG     (1 << 1)
+
+#define X86_64_B_SIZE   64 
+#define X86_32_B_SIZE   32
+
+/*
+ * User not using xc_suspend_* / xc_await_suspent may not want to
+ * include the full libxenevtchn API here.
+ */
+struct xenevtchn_handle;
+
+/* For save's precopy_policy(). */
+struct precopy_stats
+{
+    unsigned int iteration;
+    unsigned int total_written;
+    long dirty_count; /* -1 if unknown */
+};
+
+/*
+ * A precopy_policy callback may not be running in the same address
+ * space as libxc an so precopy_stats is passed by value.
+ */
+typedef int (*precopy_policy_t)(struct precopy_stats, void *);
+
+/* callbacks provided by xc_domain_save */
+struct save_callbacks {
+    /*
+     * Called after expiration of checkpoint interval,
+     * to suspend the guest.
+     */
+    int (*suspend)(void *data);
+
+    /*
+     * Called before and after every batch of page data sent during
+     * the precopy phase of a live migration to ask the caller what
+     * to do next based on the current state of the precopy migration.
+     *
+     * Should return one of the values listed below:
+     */
+#define XGS_POLICY_ABORT          (-1) /* Abandon the migration entirely
+                                        * and tidy up. */
+#define XGS_POLICY_CONTINUE_PRECOPY 0  /* Remain in the precopy phase. */
+#define XGS_POLICY_STOP_AND_COPY    1  /* Immediately suspend and transmit the
+                                        * remaining dirty pages. */
+    precopy_policy_t precopy_policy;
+
+    /*
+     * Called after the guest's dirty pages have been
+     *  copied into an output buffer.
+     * Callback function resumes the guest & the device model,
+     *  returns to xc_domain_save.
+     * xc_domain_save then flushes the output buffer, while the
+     *  guest continues to run.
+     */
+    int (*postcopy)(void *data);
+
+    /*
+     * Called after the memory checkpoint has been flushed
+     * out into the network. Typical actions performed in this
+     * callback include:
+     *   (a) send the saved device model state (for HVM guests),
+     *   (b) wait for checkpoint ack
+     *   (c) release the network output buffer pertaining to the acked checkpoint.
+     *   (c) sleep for the checkpoint interval.
+     *
+     * returns:
+     * 0: terminate checkpointing gracefully
+     * 1: take another checkpoint
+     */
+    int (*checkpoint)(void *data);
+
+    /*
+     * Called after the checkpoint callback.
+     *
+     * returns:
+     * 0: terminate checkpointing gracefully
+     * 1: take another checkpoint
+     */
+    int (*wait_checkpoint)(void *data);
+
+    /* Enable qemu-dm logging dirty pages to xen */
+    int (*switch_qemu_logdirty)(uint32_t domid, unsigned enable, void *data); /* HVM only */
+
+    /* to be provided as the last argument to each callback function */
+    void *data;
+};
+
+/* Type of stream.  Plain, or using a continuous replication protocol? */
+typedef enum {
+    XC_STREAM_PLAIN,
+    XC_STREAM_REMUS,
+    XC_STREAM_COLO,
+} xc_stream_type_t;
+
+/**
+ * This function will save a running domain.
+ *
+ * @param xch a handle to an open hypervisor interface
+ * @param io_fd the file descriptor to save a domain to
+ * @param dom the id of the domain
+ * @param flags XCFLAGS_xxx
+ * @param stream_type XC_STREAM_PLAIN if the far end of the stream
+ *        doesn't use checkpointing
+ * @param recv_fd Only used for XC_STREAM_COLO.  Contains backchannel from
+ *        the destination side.
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
+                   uint32_t flags, struct save_callbacks *callbacks,
+                   xc_stream_type_t stream_type, int recv_fd);
+
+/* callbacks provided by xc_domain_restore */
+struct restore_callbacks {
+    /*
+     * Called once the STATIC_DATA_END record has been received/inferred.
+     *
+     * For compatibility with older streams, provides a list of static data
+     * expected to be found in the stream, which was missing.  A higher level
+     * toolstack is responsible for providing any necessary compatibiltiy.
+     */
+#define XGR_SDD_MISSING_CPUID (1 << 0)
+#define XGR_SDD_MISSING_MSR   (1 << 1)
+    int (*static_data_done)(unsigned int missing, void *data);
+
+    /* Called after a new checkpoint to suspend the guest. */
+    int (*suspend)(void *data);
+
+    /*
+     * Called after the secondary vm is ready to resume.
+     * Callback function resumes the guest & the device model,
+     * returns to xc_domain_restore.
+     */
+    int (*postcopy)(void *data);
+
+    /*
+     * A checkpoint record has been found in the stream.
+     * returns:
+     */
+#define XGR_CHECKPOINT_ERROR    0 /* Terminate processing */
+#define XGR_CHECKPOINT_SUCCESS  1 /* Continue reading more data from the stream */
+#define XGR_CHECKPOINT_FAILOVER 2 /* Failover and resume VM */
+    int (*checkpoint)(void *data);
+
+    /*
+     * Called after the checkpoint callback.
+     *
+     * returns:
+     * 0: terminate checkpointing gracefully
+     * 1: take another checkpoint
+     */
+    int (*wait_checkpoint)(void *data);
+
+    /*
+     * callback to send store gfn and console gfn to xl
+     * if we want to resume vm before xc_domain_save()
+     * exits.
+     */
+    void (*restore_results)(xen_pfn_t store_gfn, xen_pfn_t console_gfn,
+                            void *data);
+
+    /* to be provided as the last argument to each callback function */
+    void *data;
+};
+
+/**
+ * This function will restore a saved domain.
+ *
+ * Domain is restored in a suspended state ready to be unpaused.
+ *
+ * @param xch a handle to an open hypervisor interface
+ * @param io_fd the file descriptor to restore a domain from
+ * @param dom the id of the domain
+ * @param store_evtchn the xenstore event channel for this domain to use
+ * @param store_mfn filled with the gfn of the store page
+ * @param store_domid the backend domain for xenstore
+ * @param console_evtchn the console event channel for this domain to use
+ * @param console_mfn filled with the gfn of the console page
+ * @param console_domid the backend domain for xenconsole
+ * @param stream_type XC_STREAM_PLAIN if the far end of the stream is using
+ *        checkpointing
+ * @param callbacks non-NULL to receive a callback to restore toolstack
+ *        specific data
+ * @param send_back_fd Only used for XC_STREAM_COLO.  Contains backchannel to
+ *        the source side.
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
+                      unsigned int store_evtchn, unsigned long *store_mfn,
+                      uint32_t store_domid, unsigned int console_evtchn,
+                      unsigned long *console_mfn, uint32_t console_domid,
+                      xc_stream_type_t stream_type,
+                      struct restore_callbacks *callbacks, int send_back_fd);
+
+/**
+ * This function will create a domain for a paravirtualized Linux
+ * using file names pointing to kernel and ramdisk
+ *
+ * @parm xch a handle to an open hypervisor interface
+ * @parm domid the id of the domain
+ * @parm mem_mb memory size in megabytes
+ * @parm image_name name of the kernel image file
+ * @parm ramdisk_name name of the ramdisk image file
+ * @parm cmdline command line string
+ * @parm flags domain creation flags
+ * @parm store_evtchn the store event channel for this domain to use
+ * @parm store_mfn returned with the mfn of the store page
+ * @parm console_evtchn the console event channel for this domain to use
+ * @parm conole_mfn returned with the mfn of the console page
+ * @return 0 on success, -1 on failure
+ */
+int xc_linux_build(xc_interface *xch,
+                   uint32_t domid,
+                   unsigned int mem_mb,
+                   const char *image_name,
+                   const char *ramdisk_name,
+                   const char *cmdline,
+                   const char *features,
+                   unsigned long flags,
+                   unsigned int store_evtchn,
+                   unsigned long *store_mfn,
+                   unsigned int console_evtchn,
+                   unsigned long *console_mfn);
+
+/*
+ * Sets *lockfd to -1.
+ * Has deallocated everything even on error.
+ */
+int xc_suspend_evtchn_release(xc_interface *xch,
+                              struct xenevtchn_handle *xce,
+                              uint32_t domid, int suspend_evtchn, int *lockfd);
+
+/**
+ * This function eats the initial notification.
+ * xce must not be used for anything else
+ * See xc_suspend_evtchn_init_sane re lockfd.
+ */
+int xc_suspend_evtchn_init_exclusive(xc_interface *xch,
+                                     struct xenevtchn_handle *xce,
+                                     uint32_t domid, int port, int *lockfd);
+
+/* xce must not be used for anything else */
+int xc_await_suspend(xc_interface *xch, struct xenevtchn_handle *xce,
+                     int suspend_evtchn);
+
+/**
+ * The port will be signaled immediately after this call
+ * The caller should check the domain status and look for the next event
+ * On success, *lockfd will be set to >=0 and *lockfd must be preserved
+ * and fed to xc_suspend_evtchn_release.  (On error *lockfd is
+ * undefined and xc_suspend_evtchn_release is not allowed.)
+ */
+int xc_suspend_evtchn_init_sane(xc_interface *xch,
+                                struct xenevtchn_handle *xce,
+                                uint32_t domid, int port, int *lockfd);
+
+int xc_mark_page_online(xc_interface *xch, unsigned long start,
+                        unsigned long end, uint32_t *status);
+
+int xc_mark_page_offline(xc_interface *xch, unsigned long start,
+                          unsigned long end, uint32_t *status);
+
+int xc_query_page_offline_status(xc_interface *xch, unsigned long start,
+                                 unsigned long end, uint32_t *status);
+
+int xc_exchange_page(xc_interface *xch, uint32_t domid, xen_pfn_t mfn);
+
+
+/**
+ * Memory related information, such as PFN types, the P2M table,
+ * the guest word width and the guest page table levels.
+ */
+struct xc_domain_meminfo {
+    unsigned int pt_levels;
+    unsigned int guest_width;
+    xen_pfn_t *pfn_type;
+    xen_pfn_t *p2m_table;
+    unsigned long p2m_size;
+};
+
+int xc_map_domain_meminfo(xc_interface *xch, uint32_t domid,
+                          struct xc_domain_meminfo *minfo);
+
+int xc_unmap_domain_meminfo(xc_interface *xch, struct xc_domain_meminfo *mem);
+
+/**
+ * This function map m2p table
+ * @parm xch a handle to an open hypervisor interface
+ * @parm max_mfn the max pfn
+ * @parm prot the flags to map, such as read/write etc
+ * @parm mfn0 return the first mfn, can be NULL
+ * @return mapped m2p table on success, NULL on failure
+ */
+xen_pfn_t *xc_map_m2p(xc_interface *xch,
+                      unsigned long max_mfn,
+                      int prot,
+                      unsigned long *mfn0);
+#endif /* XENGUEST_H */
diff --git a/tools/libs/guest/xg_cpuid_x86.c b/tools/libs/guest/xg_cpuid_x86.c
new file mode 100644 (file)
index 0000000..0f24d6d
--- /dev/null
@@ -0,0 +1,665 @@
+/******************************************************************************
+ * xc_cpuid_x86.c
+ *
+ * Compute cpuid of a domain.
+ *
+ * Copyright (c) 2008, Citrix Systems, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <limits.h>
+#include "xc_private.h"
+#include "xc_bitops.h"
+#include <xen/hvm/params.h>
+#include <xen-tools/libs.h>
+
+enum {
+#define XEN_CPUFEATURE(name, value) X86_FEATURE_##name = value,
+#include <xen/arch-x86/cpufeatureset.h>
+};
+
+#include <xen/asm/x86-vendors.h>
+
+#include <xen/lib/x86/cpu-policy.h>
+
+#define bitmaskof(idx)      (1u << ((idx) & 31))
+#define featureword_of(idx) ((idx) >> 5)
+
+int xc_get_cpu_levelling_caps(xc_interface *xch, uint32_t *caps)
+{
+    DECLARE_SYSCTL;
+    int ret;
+
+    sysctl.cmd = XEN_SYSCTL_get_cpu_levelling_caps;
+    ret = do_sysctl(xch, &sysctl);
+
+    if ( !ret )
+        *caps = sysctl.u.cpu_levelling_caps.caps;
+
+    return ret;
+}
+
+int xc_get_cpu_featureset(xc_interface *xch, uint32_t index,
+                          uint32_t *nr_features, uint32_t *featureset)
+{
+    DECLARE_SYSCTL;
+    DECLARE_HYPERCALL_BOUNCE(featureset,
+                             *nr_features * sizeof(*featureset),
+                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+    int ret;
+
+    if ( xc_hypercall_bounce_pre(xch, featureset) )
+        return -1;
+
+    sysctl.cmd = XEN_SYSCTL_get_cpu_featureset;
+    sysctl.u.cpu_featureset.index = index;
+    sysctl.u.cpu_featureset.nr_features = *nr_features;
+    set_xen_guest_handle(sysctl.u.cpu_featureset.features, featureset);
+
+    ret = do_sysctl(xch, &sysctl);
+
+    xc_hypercall_bounce_post(xch, featureset);
+
+    if ( !ret )
+        *nr_features = sysctl.u.cpu_featureset.nr_features;
+
+    return ret;
+}
+
+uint32_t xc_get_cpu_featureset_size(void)
+{
+    return FEATURESET_NR_ENTRIES;
+}
+
+const uint32_t *xc_get_static_cpu_featuremask(
+    enum xc_static_cpu_featuremask mask)
+{
+    static const uint32_t masks[][FEATURESET_NR_ENTRIES] = {
+#define MASK(x) [XC_FEATUREMASK_ ## x] = INIT_ ## x ## _FEATURES
+
+        MASK(KNOWN),
+        MASK(SPECIAL),
+        MASK(PV_MAX),
+        MASK(PV_DEF),
+        MASK(HVM_SHADOW_MAX),
+        MASK(HVM_SHADOW_DEF),
+        MASK(HVM_HAP_MAX),
+        MASK(HVM_HAP_DEF),
+
+#undef MASK
+    };
+
+    if ( (unsigned int)mask >= ARRAY_SIZE(masks) )
+        return NULL;
+
+    return masks[mask];
+}
+
+int xc_get_cpu_policy_size(xc_interface *xch, uint32_t *nr_leaves,
+                           uint32_t *nr_msrs)
+{
+    struct xen_sysctl sysctl = {};
+    int ret;
+
+    sysctl.cmd = XEN_SYSCTL_get_cpu_policy;
+
+    ret = do_sysctl(xch, &sysctl);
+
+    if ( !ret )
+    {
+        *nr_leaves = sysctl.u.cpu_policy.nr_leaves;
+        *nr_msrs = sysctl.u.cpu_policy.nr_msrs;
+    }
+
+    return ret;
+}
+
+int xc_get_system_cpu_policy(xc_interface *xch, uint32_t index,
+                             uint32_t *nr_leaves, xen_cpuid_leaf_t *leaves,
+                             uint32_t *nr_msrs, xen_msr_entry_t *msrs)
+{
+    struct xen_sysctl sysctl = {};
+    DECLARE_HYPERCALL_BOUNCE(leaves,
+                             *nr_leaves * sizeof(*leaves),
+                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+    DECLARE_HYPERCALL_BOUNCE(msrs,
+                             *nr_msrs * sizeof(*msrs),
+                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+    int ret;
+
+    if ( xc_hypercall_bounce_pre(xch, leaves) ||
+         xc_hypercall_bounce_pre(xch, msrs) )
+        return -1;
+
+    sysctl.cmd = XEN_SYSCTL_get_cpu_policy;
+    sysctl.u.cpu_policy.index = index;
+    sysctl.u.cpu_policy.nr_leaves = *nr_leaves;
+    set_xen_guest_handle(sysctl.u.cpu_policy.cpuid_policy, leaves);
+    sysctl.u.cpu_policy.nr_msrs = *nr_msrs;
+    set_xen_guest_handle(sysctl.u.cpu_policy.msr_policy, msrs);
+
+    ret = do_sysctl(xch, &sysctl);
+
+    xc_hypercall_bounce_post(xch, leaves);
+    xc_hypercall_bounce_post(xch, msrs);
+
+    if ( !ret )
+    {
+        *nr_leaves = sysctl.u.cpu_policy.nr_leaves;
+        *nr_msrs = sysctl.u.cpu_policy.nr_msrs;
+    }
+
+    return ret;
+}
+
+int xc_get_domain_cpu_policy(xc_interface *xch, uint32_t domid,
+                             uint32_t *nr_leaves, xen_cpuid_leaf_t *leaves,
+                             uint32_t *nr_msrs, xen_msr_entry_t *msrs)
+{
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(leaves,
+                             *nr_leaves * sizeof(*leaves),
+                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+    DECLARE_HYPERCALL_BOUNCE(msrs,
+                             *nr_msrs * sizeof(*msrs),
+                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
+    int ret;
+
+    if ( xc_hypercall_bounce_pre(xch, leaves) ||
+         xc_hypercall_bounce_pre(xch, msrs) )
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_get_cpu_policy;
+    domctl.domain = domid;
+    domctl.u.cpu_policy.nr_leaves = *nr_leaves;
+    set_xen_guest_handle(domctl.u.cpu_policy.cpuid_policy, leaves);
+    domctl.u.cpu_policy.nr_msrs = *nr_msrs;
+    set_xen_guest_handle(domctl.u.cpu_policy.msr_policy, msrs);
+
+    ret = do_domctl(xch, &domctl);
+
+    xc_hypercall_bounce_post(xch, leaves);
+    xc_hypercall_bounce_post(xch, msrs);
+
+    if ( !ret )
+    {
+        *nr_leaves = domctl.u.cpu_policy.nr_leaves;
+        *nr_msrs = domctl.u.cpu_policy.nr_msrs;
+    }
+
+    return ret;
+}
+
+int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid,
+                             uint32_t nr_leaves, xen_cpuid_leaf_t *leaves,
+                             uint32_t nr_msrs, xen_msr_entry_t *msrs,
+                             uint32_t *err_leaf_p, uint32_t *err_subleaf_p,
+                             uint32_t *err_msr_p)
+{
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BOUNCE(leaves,
+                             nr_leaves * sizeof(*leaves),
+                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
+    DECLARE_HYPERCALL_BOUNCE(msrs,
+                             nr_msrs * sizeof(*msrs),
+                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
+    int ret;
+
+    if ( err_leaf_p )
+        *err_leaf_p = -1;
+    if ( err_subleaf_p )
+        *err_subleaf_p = -1;
+    if ( err_msr_p )
+        *err_msr_p = -1;
+
+    if ( xc_hypercall_bounce_pre(xch, leaves) )
+        return -1;
+
+    if ( xc_hypercall_bounce_pre(xch, msrs) )
+        return -1;
+
+    domctl.cmd = XEN_DOMCTL_set_cpu_policy;
+    domctl.domain = domid;
+    domctl.u.cpu_policy.nr_leaves = nr_leaves;
+    set_xen_guest_handle(domctl.u.cpu_policy.cpuid_policy, leaves);
+    domctl.u.cpu_policy.nr_msrs = nr_msrs;
+    set_xen_guest_handle(domctl.u.cpu_policy.msr_policy, msrs);
+    domctl.u.cpu_policy.err_leaf = -1;
+    domctl.u.cpu_policy.err_subleaf = -1;
+    domctl.u.cpu_policy.err_msr = -1;
+
+    ret = do_domctl(xch, &domctl);
+
+    xc_hypercall_bounce_post(xch, leaves);
+    xc_hypercall_bounce_post(xch, msrs);
+
+    if ( err_leaf_p )
+        *err_leaf_p = domctl.u.cpu_policy.err_leaf;
+    if ( err_subleaf_p )
+        *err_subleaf_p = domctl.u.cpu_policy.err_subleaf;
+    if ( err_msr_p )
+        *err_msr_p = domctl.u.cpu_policy.err_msr;
+
+    return ret;
+}
+
+static int compare_leaves(const void *l, const void *r)
+{
+    const xen_cpuid_leaf_t *lhs = l;
+    const xen_cpuid_leaf_t *rhs = r;
+
+    if ( lhs->leaf != rhs->leaf )
+        return lhs->leaf < rhs->leaf ? -1 : 1;
+
+    if ( lhs->subleaf != rhs->subleaf )
+        return lhs->subleaf < rhs->subleaf ? -1 : 1;
+
+    return 0;
+}
+
+static xen_cpuid_leaf_t *find_leaf(
+    xen_cpuid_leaf_t *leaves, unsigned int nr_leaves,
+    const struct xc_xend_cpuid *xend)
+{
+    const xen_cpuid_leaf_t key = { xend->leaf, xend->subleaf };
+
+    return bsearch(&key, leaves, nr_leaves, sizeof(*leaves), compare_leaves);
+}
+
+static int xc_cpuid_xend_policy(
+    xc_interface *xch, uint32_t domid, const struct xc_xend_cpuid *xend)
+{
+    int rc;
+    xc_dominfo_t di;
+    unsigned int nr_leaves, nr_msrs;
+    uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1;
+    /*
+     * Three full policies.  The host, domain max, and domain current for the
+     * domain type.
+     */
+    xen_cpuid_leaf_t *host = NULL, *max = NULL, *cur = NULL;
+    unsigned int nr_host, nr_max, nr_cur;
+
+    if ( xc_domain_getinfo(xch, domid, 1, &di) != 1 ||
+         di.domid != domid )
+    {
+        ERROR("Failed to obtain d%d info", domid);
+        rc = -ESRCH;
+        goto fail;
+    }
+
+    rc = xc_get_cpu_policy_size(xch, &nr_leaves, &nr_msrs);
+    if ( rc )
+    {
+        PERROR("Failed to obtain policy info size");
+        rc = -errno;
+        goto fail;
+    }
+
+    rc = -ENOMEM;
+    if ( (host = calloc(nr_leaves, sizeof(*host))) == NULL ||
+         (max  = calloc(nr_leaves, sizeof(*max)))  == NULL ||
+         (cur  = calloc(nr_leaves, sizeof(*cur)))  == NULL )
+    {
+        ERROR("Unable to allocate memory for %u CPUID leaves", nr_leaves);
+        goto fail;
+    }
+
+    /* Get the domain's current policy. */
+    nr_msrs = 0;
+    nr_cur = nr_leaves;
+    rc = xc_get_domain_cpu_policy(xch, domid, &nr_cur, cur, &nr_msrs, NULL);
+    if ( rc )
+    {
+        PERROR("Failed to obtain d%d current policy", domid);
+        rc = -errno;
+        goto fail;
+    }
+
+    /* Get the domain's max policy. */
+    nr_msrs = 0;
+    nr_max = nr_leaves;
+    rc = xc_get_system_cpu_policy(xch, di.hvm ? XEN_SYSCTL_cpu_policy_hvm_max
+                                              : XEN_SYSCTL_cpu_policy_pv_max,
+                                  &nr_max, max, &nr_msrs, NULL);
+    if ( rc )
+    {
+        PERROR("Failed to obtain %s max policy", di.hvm ? "hvm" : "pv");
+        rc = -errno;
+        goto fail;
+    }
+
+    /* Get the host policy. */
+    nr_msrs = 0;
+    nr_host = nr_leaves;
+    rc = xc_get_system_cpu_policy(xch, XEN_SYSCTL_cpu_policy_host,
+                                  &nr_host, host, &nr_msrs, NULL);
+    if ( rc )
+    {
+        PERROR("Failed to obtain host policy");
+        rc = -errno;
+        goto fail;
+    }
+
+    rc = -EINVAL;
+    for ( ; xend->leaf != XEN_CPUID_INPUT_UNUSED; ++xend )
+    {
+        xen_cpuid_leaf_t *cur_leaf = find_leaf(cur, nr_cur, xend);
+        const xen_cpuid_leaf_t *max_leaf = find_leaf(max, nr_max, xend);
+        const xen_cpuid_leaf_t *host_leaf = find_leaf(host, nr_host, xend);
+
+        if ( cur_leaf == NULL || max_leaf == NULL || host_leaf == NULL )
+        {
+            ERROR("Missing leaf %#x, subleaf %#x", xend->leaf, xend->subleaf);
+            goto fail;
+        }
+
+        for ( unsigned int i = 0; i < ARRAY_SIZE(xend->policy); i++ )
+        {
+            uint32_t *cur_reg = &cur_leaf->a + i;
+            const uint32_t *max_reg = &max_leaf->a + i;
+            const uint32_t *host_reg = &host_leaf->a + i;
+
+            if ( xend->policy[i] == NULL )
+                continue;
+
+            for ( unsigned int j = 0; j < 32; j++ )
+            {
+                bool val;
+
+                if ( xend->policy[i][j] == '1' )
+                    val = true;
+                else if ( xend->policy[i][j] == '0' )
+                    val = false;
+                else if ( xend->policy[i][j] == 'x' )
+                    val = test_bit(31 - j, max_reg);
+                else if ( xend->policy[i][j] == 'k' ||
+                          xend->policy[i][j] == 's' )
+                    val = test_bit(31 - j, host_reg);
+                else
+                {
+                    ERROR("Bad character '%c' in policy[%d] string '%s'",
+                          xend->policy[i][j], i, xend->policy[i]);
+                    goto fail;
+                }
+
+                clear_bit(31 - j, cur_reg);
+                if ( val )
+                    set_bit(31 - j, cur_reg);
+            }
+        }
+    }
+
+    /* Feed the transformed currrent policy back up to Xen. */
+    rc = xc_set_domain_cpu_policy(xch, domid, nr_cur, cur, 0, NULL,
+                                  &err_leaf, &err_subleaf, &err_msr);
+    if ( rc )
+    {
+        PERROR("Failed to set d%d's policy (err leaf %#x, subleaf %#x, msr %#x)",
+               domid, err_leaf, err_subleaf, err_msr);
+        rc = -errno;
+        goto fail;
+    }
+
+    /* Success! */
+
+ fail:
+    free(cur);
+    free(max);
+    free(host);
+
+    return rc;
+}
+
+int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore,
+                          const uint32_t *featureset, unsigned int nr_features,
+                          bool pae,
+                          const struct xc_xend_cpuid *xend)
+{
+    int rc;
+    xc_dominfo_t di;
+    unsigned int i, nr_leaves, nr_msrs;
+    xen_cpuid_leaf_t *leaves = NULL;
+    struct cpuid_policy *p = NULL;
+    uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1;
+    uint32_t host_featureset[FEATURESET_NR_ENTRIES] = {};
+    uint32_t len = ARRAY_SIZE(host_featureset);
+
+    if ( xc_domain_getinfo(xch, domid, 1, &di) != 1 ||
+         di.domid != domid )
+    {
+        ERROR("Failed to obtain d%d info", domid);
+        rc = -ESRCH;
+        goto out;
+    }
+
+    rc = xc_get_cpu_policy_size(xch, &nr_leaves, &nr_msrs);
+    if ( rc )
+    {
+        PERROR("Failed to obtain policy info size");
+        rc = -errno;
+        goto out;
+    }
+
+    rc = -ENOMEM;
+    if ( (leaves = calloc(nr_leaves, sizeof(*leaves))) == NULL ||
+         (p = calloc(1, sizeof(*p))) == NULL )
+        goto out;
+
+    /* Get the host policy. */
+    rc = xc_get_cpu_featureset(xch, XEN_SYSCTL_cpu_featureset_host,
+                               &len, host_featureset);
+    if ( rc )
+    {
+        /* Tolerate "buffer too small", as we've got the bits we need. */
+        if ( errno == ENOBUFS )
+            rc = 0;
+        else
+        {
+            PERROR("Failed to obtain host featureset");
+            rc = -errno;
+            goto out;
+        }
+    }
+
+    /* Get the domain's default policy. */
+    nr_msrs = 0;
+    rc = xc_get_system_cpu_policy(xch, di.hvm ? XEN_SYSCTL_cpu_policy_hvm_default
+                                              : XEN_SYSCTL_cpu_policy_pv_default,
+                                  &nr_leaves, leaves, &nr_msrs, NULL);
+    if ( rc )
+    {
+        PERROR("Failed to obtain %s default policy", di.hvm ? "hvm" : "pv");
+        rc = -errno;
+        goto out;
+    }
+
+    rc = x86_cpuid_copy_from_buffer(p, leaves, nr_leaves,
+                                    &err_leaf, &err_subleaf);
+    if ( rc )
+    {
+        ERROR("Failed to deserialise CPUID (err leaf %#x, subleaf %#x) (%d = %s)",
+              err_leaf, err_subleaf, -rc, strerror(-rc));
+        goto out;
+    }
+
+    /*
+     * Account for feature which have been disabled by default since Xen 4.13,
+     * so migrated-in VM's don't risk seeing features disappearing.
+     */
+    if ( restore )
+    {
+        p->basic.rdrand = test_bit(X86_FEATURE_RDRAND, host_featureset);
+
+        if ( di.hvm )
+        {
+            p->feat.mpx = test_bit(X86_FEATURE_MPX, host_featureset);
+        }
+    }
+
+    if ( featureset )
+    {
+        uint32_t disabled_features[FEATURESET_NR_ENTRIES],
+            feat[FEATURESET_NR_ENTRIES] = {};
+        static const uint32_t deep_features[] = INIT_DEEP_FEATURES;
+        unsigned int i, b;
+
+        /*
+         * The user supplied featureset may be shorter or longer than
+         * FEATURESET_NR_ENTRIES.  Shorter is fine, and we will zero-extend.
+         * Longer is fine, so long as it only padded with zeros.
+         */
+        unsigned int user_len = min(FEATURESET_NR_ENTRIES + 0u, nr_features);
+
+        /* Check for truncated set bits. */
+        rc = -EOPNOTSUPP;
+        for ( i = user_len; i < nr_features; ++i )
+            if ( featureset[i] != 0 )
+                goto out;
+
+        memcpy(feat, featureset, sizeof(*featureset) * user_len);
+
+        /* Disable deep dependencies of disabled features. */
+        for ( i = 0; i < ARRAY_SIZE(disabled_features); ++i )
+            disabled_features[i] = ~feat[i] & deep_features[i];
+
+        for ( b = 0; b < sizeof(disabled_features) * CHAR_BIT; ++b )
+        {
+            const uint32_t *dfs;
+
+            if ( !test_bit(b, disabled_features) ||
+                 !(dfs = x86_cpuid_lookup_deep_deps(b)) )
+                continue;
+
+            for ( i = 0; i < ARRAY_SIZE(disabled_features); ++i )
+            {
+                feat[i] &= ~dfs[i];
+                disabled_features[i] &= ~dfs[i];
+            }
+        }
+
+        cpuid_featureset_to_policy(feat, p);
+    }
+    else
+    {
+        if ( di.hvm )
+            p->basic.pae = pae;
+    }
+
+    if ( !di.hvm )
+    {
+        /*
+         * On hardware without CPUID Faulting, PV guests see real topology.
+         * As a consequence, they also need to see the host htt/cmp fields.
+         */
+        p->basic.htt       = test_bit(X86_FEATURE_HTT, host_featureset);
+        p->extd.cmp_legacy = test_bit(X86_FEATURE_CMP_LEGACY, host_featureset);
+    }
+    else
+    {
+        /*
+         * Topology for HVM guests is entirely controlled by Xen.  For now, we
+         * hardcode APIC_ID = vcpu_id * 2 to give the illusion of no SMT.
+         */
+        p->basic.htt = true;
+        p->extd.cmp_legacy = false;
+
+        /*
+         * Leaf 1 EBX[23:16] is Maximum Logical Processors Per Package.
+         * Update to reflect vLAPIC_ID = vCPU_ID * 2, but make sure to avoid
+         * overflow.
+         */
+        if ( !(p->basic.lppp & 0x80) )
+            p->basic.lppp *= 2;
+
+        switch ( p->x86_vendor )
+        {
+        case X86_VENDOR_INTEL:
+            for ( i = 0; (p->cache.subleaf[i].type &&
+                          i < ARRAY_SIZE(p->cache.raw)); ++i )
+            {
+                p->cache.subleaf[i].cores_per_package =
+                    (p->cache.subleaf[i].cores_per_package << 1) | 1;
+                p->cache.subleaf[i].threads_per_cache = 0;
+            }
+            break;
+
+        case X86_VENDOR_AMD:
+        case X86_VENDOR_HYGON:
+            /*
+             * Leaf 0x80000008 ECX[15:12] is ApicIdCoreSize.
+             * Leaf 0x80000008 ECX[7:0] is NumberOfCores (minus one).
+             * Update to reflect vLAPIC_ID = vCPU_ID * 2.  But avoid
+             * - overflow,
+             * - going out of sync with leaf 1 EBX[23:16],
+             * - incrementing ApicIdCoreSize when it's zero (which changes the
+             *   meaning of bits 7:0).
+             *
+             * UPDATE: I addition to avoiding overflow, some
+             * proprietary operating systems have trouble with
+             * apic_id_size values greater than 7.  Limit the value to
+             * 7 for now.
+             */
+            if ( p->extd.nc < 0x7f )
+            {
+                if ( p->extd.apic_id_size != 0 && p->extd.apic_id_size < 0x7 )
+                    p->extd.apic_id_size++;
+
+                p->extd.nc = (p->extd.nc << 1) | 1;
+            }
+            break;
+        }
+
+        /*
+         * These settings are necessary to cause earlier HVM_PARAM_NESTEDHVM /
+         * XEN_DOMCTL_disable_migrate settings to be reflected correctly in
+         * CPUID.  Xen will discard these bits if configuration hasn't been
+         * set for the domain.
+         */
+        p->extd.itsc = true;
+        p->basic.vmx = true;
+        p->extd.svm = true;
+    }
+
+    rc = x86_cpuid_copy_to_buffer(p, leaves, &nr_leaves);
+    if ( rc )
+    {
+        ERROR("Failed to serialise CPUID (%d = %s)", -rc, strerror(-rc));
+        goto out;
+    }
+
+    rc = xc_set_domain_cpu_policy(xch, domid, nr_leaves, leaves, 0, NULL,
+                                  &err_leaf, &err_subleaf, &err_msr);
+    if ( rc )
+    {
+        PERROR("Failed to set d%d's policy (err leaf %#x, subleaf %#x, msr %#x)",
+               domid, err_leaf, err_subleaf, err_msr);
+        rc = -errno;
+        goto out;
+    }
+
+    if ( xend && (rc = xc_cpuid_xend_policy(xch, domid, xend)) )
+        goto out;
+
+    rc = 0;
+
+out:
+    free(p);
+    free(leaves);
+
+    return rc;
+}
diff --git a/tools/libs/guest/xg_dom_arm.c b/tools/libs/guest/xg_dom_arm.c
new file mode 100644 (file)
index 0000000..3f66f1d
--- /dev/null
@@ -0,0 +1,552 @@
+/*
+ * Xen domain builder -- ARM
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2011, Citrix Systems
+ */
+#include <inttypes.h>
+#include <assert.h>
+
+#include <xen/xen.h>
+#include <xen/io/protocols.h>
+#include <xen-tools/libs.h>
+
+#include "xg_private.h"
+#include "xenctrl_dom.h"
+
+#define NR_MAGIC_PAGES 4
+#define CONSOLE_PFN_OFFSET 0
+#define XENSTORE_PFN_OFFSET 1
+#define MEMACCESS_PFN_OFFSET 2
+#define VUART_PFN_OFFSET 3
+
+#define LPAE_SHIFT 9
+
+#define PFN_4K_SHIFT  (0)
+#define PFN_2M_SHIFT  (PFN_4K_SHIFT+LPAE_SHIFT)
+#define PFN_1G_SHIFT  (PFN_2M_SHIFT+LPAE_SHIFT)
+#define PFN_512G_SHIFT (PFN_1G_SHIFT+LPAE_SHIFT)
+
+/* get guest IO ABI protocol */
+const char *xc_domain_get_native_protocol(xc_interface *xch,
+                                          uint32_t domid)
+{
+    return XEN_IO_PROTO_ABI_ARM;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_magic_pages(struct xc_dom_image *dom)
+{
+    int rc, i;
+    const xen_pfn_t base = GUEST_MAGIC_BASE >> XC_PAGE_SHIFT;
+    xen_pfn_t p2m[NR_MAGIC_PAGES];
+
+    BUILD_BUG_ON(NR_MAGIC_PAGES > GUEST_MAGIC_SIZE >> XC_PAGE_SHIFT);
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    for (i = 0; i < NR_MAGIC_PAGES; i++)
+        p2m[i] = base + i;
+
+    rc = xc_domain_populate_physmap_exact(
+            dom->xch, dom->guest_domid, NR_MAGIC_PAGES,
+            0, 0, p2m);
+    if ( rc < 0 )
+        return rc;
+
+    dom->console_pfn = base + CONSOLE_PFN_OFFSET;
+    dom->xenstore_pfn = base + XENSTORE_PFN_OFFSET;
+    dom->vuart_gfn = base + VUART_PFN_OFFSET;
+
+    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->console_pfn);
+    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->xenstore_pfn);
+    xc_clear_domain_page(dom->xch, dom->guest_domid, base + MEMACCESS_PFN_OFFSET);
+    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->vuart_gfn);
+
+    xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_PFN,
+            dom->console_pfn);
+    xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_STORE_PFN,
+            dom->xenstore_pfn);
+    xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_MONITOR_RING_PFN,
+            base + MEMACCESS_PFN_OFFSET);
+    /* allocated by toolstack */
+    xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_EVTCHN,
+            dom->console_evtchn);
+    xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_STORE_EVTCHN,
+            dom->xenstore_evtchn);
+
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int start_info_arm(struct xc_dom_image *dom)
+{
+    DOMPRINTF_CALLED(dom->xch);
+    return 0;
+}
+
+static int shared_info_arm(struct xc_dom_image *dom, void *ptr)
+{
+    DOMPRINTF_CALLED(dom->xch);
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int vcpu_arm32(struct xc_dom_image *dom)
+{
+    vcpu_guest_context_any_t any_ctx;
+    vcpu_guest_context_t *ctxt = &any_ctx.c;
+    int rc;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    ctxt->user_regs.pc32 = dom->parms.virt_entry;
+
+    /* Linux boot protocol. See linux.Documentation/arm/Booting. */
+    ctxt->user_regs.r0_usr = 0; /* SBZ */
+    /* Machine ID: We use DTB therefore no machine id */
+    ctxt->user_regs.r1_usr = 0xffffffff;
+    /* ATAGS/DTB: We currently require that the guest kernel to be
+     * using CONFIG_ARM_APPENDED_DTB. Ensure that r2 does not look
+     * like a valid pointer to a set of ATAGS or a DTB.
+     */
+    ctxt->user_regs.r2_usr = dom->devicetree_blob ?
+        dom->devicetree_seg.vstart : 0xffffffff;
+
+    ctxt->sctlr = SCTLR_GUEST_INIT;
+
+    ctxt->ttbr0 = 0;
+    ctxt->ttbr1 = 0;
+    ctxt->ttbcr = 0; /* Defined Reset Value */
+
+    ctxt->user_regs.cpsr = PSR_GUEST32_INIT;
+
+    ctxt->flags = VGCF_online;
+
+    DOMPRINTF("Initial state CPSR %#"PRIx32" PC %#"PRIx32,
+           ctxt->user_regs.cpsr, ctxt->user_regs.pc32);
+
+    rc = xc_vcpu_setcontext(dom->xch, dom->guest_domid, 0, &any_ctx);
+    if ( rc != 0 )
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: SETVCPUCONTEXT failed (rc=%d)", __func__, rc);
+
+    return rc;
+}
+
+static int vcpu_arm64(struct xc_dom_image *dom)
+{
+    vcpu_guest_context_any_t any_ctx;
+    vcpu_guest_context_t *ctxt = &any_ctx.c;
+    int rc;
+
+    DOMPRINTF_CALLED(dom->xch);
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    ctxt->user_regs.pc64 = dom->parms.virt_entry;
+
+    /* Linux boot protocol. See linux.Documentation/arm64/booting.txt. */
+    ctxt->user_regs.x0 = dom->devicetree_blob ?
+        dom->devicetree_seg.vstart : 0xffffffff;
+    ctxt->user_regs.x1 = 0;
+    ctxt->user_regs.x2 = 0;
+    ctxt->user_regs.x3 = 0;
+
+    DOMPRINTF("DTB %"PRIx64, ctxt->user_regs.x0);
+
+    ctxt->sctlr = SCTLR_GUEST_INIT;
+
+    ctxt->ttbr0 = 0;
+    ctxt->ttbr1 = 0;
+    ctxt->ttbcr = 0; /* Defined Reset Value */
+
+    ctxt->user_regs.cpsr = PSR_GUEST64_INIT;
+
+    ctxt->flags = VGCF_online;
+
+    DOMPRINTF("Initial state CPSR %#"PRIx32" PC %#"PRIx64,
+           ctxt->user_regs.cpsr, ctxt->user_regs.pc64);
+
+    rc = xc_vcpu_setcontext(dom->xch, dom->guest_domid, 0, &any_ctx);
+    if ( rc != 0 )
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: SETVCPUCONTEXT failed (rc=%d)", __func__, rc);
+
+    return rc;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int set_mode(xc_interface *xch, uint32_t domid, char *guest_type)
+{
+    static const struct {
+        char           *guest;
+        uint32_t        size;
+    } types[] = {
+        { "xen-3.0-aarch64", 64 },
+        { "xen-3.0-armv7l",  32 },
+    };
+    DECLARE_DOMCTL;
+    int i,rc;
+
+    domctl.domain = domid;
+    domctl.cmd    = XEN_DOMCTL_set_address_size;
+    domctl.u.address_size.size = 0;
+
+    for ( i = 0; i < ARRAY_SIZE(types); i++ )
+        if ( !strcmp(types[i].guest, guest_type) )
+            domctl.u.address_size.size = types[i].size;
+    if ( domctl.u.address_size.size == 0 )
+    {
+        xc_dom_printf(xch, "%s: warning: unknown guest type %s",
+                      __FUNCTION__, guest_type);
+        return -EINVAL;
+    }
+
+    xc_dom_printf(xch, "%s: guest %s, address size %" PRId32 "", __FUNCTION__,
+                  guest_type, domctl.u.address_size.size);
+    rc = do_domctl(xch, &domctl);
+    if ( rc != 0 )
+        xc_dom_printf(xch, "%s: warning: failed (rc=%d)",
+                      __FUNCTION__, rc);
+    return rc;
+}
+
+/*  >0: success, *nr_pfns set to number actually populated
+ *   0: didn't try with this pfn shift (e.g. misaligned base etc)
+ *  <0: ERROR
+ */
+static int populate_one_size(struct xc_dom_image *dom, int pfn_shift,
+                             xen_pfn_t base_pfn, xen_pfn_t *nr_pfns,
+                             xen_pfn_t *extents)
+{
+    /* The mask for this level */
+    const uint64_t mask = ((uint64_t)1<<(pfn_shift))-1;
+    /* The shift, mask and next boundary for the level above this one */
+    const int next_shift = pfn_shift + LPAE_SHIFT;
+    const uint64_t next_mask = ((uint64_t)1<<next_shift)-1;
+    const xen_pfn_t next_boundary
+        = (base_pfn + ((uint64_t)1<<next_shift)) & ~next_mask;
+
+    int nr, i, count;
+    xen_pfn_t end_pfn = base_pfn + *nr_pfns;
+
+    /* No level zero super pages with current hardware */
+    if ( pfn_shift == PFN_512G_SHIFT )
+        return 0;
+
+    /* base is misaligned for this level */
+    if ( mask & base_pfn )
+        return 0;
+
+    /*
+     * If base is not aligned at the next level up then try and make
+     * it so for next time around.
+     */
+    if ( (base_pfn & next_mask) && end_pfn > next_boundary )
+        end_pfn = next_boundary;
+
+    count = ( end_pfn - base_pfn ) >> pfn_shift;
+
+    /* Nothing to allocate */
+    if ( !count )
+        return 0;
+
+    for ( i = 0 ; i < count ; i ++ )
+        extents[i] = base_pfn + (i<<pfn_shift);
+
+    nr = xc_domain_populate_physmap(dom->xch, dom->guest_domid, count,
+                                    pfn_shift, 0, extents);
+    if ( nr <= 0 ) return nr;
+    DOMPRINTF("%s: populated %#x/%#x entries with shift %d",
+              __FUNCTION__, nr, count, pfn_shift);
+
+    *nr_pfns = nr << pfn_shift;
+
+    return 1;
+}
+
+static int populate_guest_memory(struct xc_dom_image *dom,
+                                 xen_pfn_t base_pfn, xen_pfn_t nr_pfns)
+{
+    int rc = 0;
+    xen_pfn_t allocsz, pfn, *extents;
+
+    extents = calloc(1024*1024,sizeof(xen_pfn_t));
+    if ( extents == NULL )
+    {
+        DOMPRINTF("%s: Unable to allocate extent array", __FUNCTION__);
+        return -1;
+    }
+
+    DOMPRINTF("%s: populating RAM @ %016"PRIx64"-%016"PRIx64" (%"PRId64"MB)",
+              __FUNCTION__,
+              (uint64_t)base_pfn << XC_PAGE_SHIFT,
+              (uint64_t)(base_pfn + nr_pfns) << XC_PAGE_SHIFT,
+              (uint64_t)nr_pfns >> (20-XC_PAGE_SHIFT));
+
+    for ( pfn = 0; pfn < nr_pfns; pfn += allocsz )
+    {
+        allocsz = min_t(int, 1024*1024, nr_pfns - pfn);
+#if 0 /* Enable this to exercise/debug the code which tries to realign
+       * to a superpage boundary, by misaligning at the start. */
+        if ( pfn == 0 )
+        {
+            allocsz = 1;
+            rc = populate_one_size(dom, PFN_4K_SHIFT,
+                                   base_pfn + pfn, &allocsz, extents);
+            if (rc < 0) break;
+            if (rc > 0) continue;
+            /* Failed to allocate a single page? */
+            break;
+        }
+#endif
+
+        rc = populate_one_size(dom, PFN_512G_SHIFT,
+                               base_pfn + pfn, &allocsz, extents);
+        if ( rc < 0 ) break;
+        if ( rc > 0 ) continue;
+
+        rc = populate_one_size(dom, PFN_1G_SHIFT,
+                               base_pfn + pfn, &allocsz, extents);
+        if ( rc < 0 ) break;
+        if ( rc > 0 ) continue;
+
+        rc = populate_one_size(dom, PFN_2M_SHIFT,
+                               base_pfn + pfn, &allocsz, extents);
+        if ( rc < 0 ) break;
+        if ( rc > 0 ) continue;
+
+        rc = populate_one_size(dom, PFN_4K_SHIFT,
+                               base_pfn + pfn, &allocsz, extents);
+        if ( rc < 0 ) break;
+        if ( rc == 0 )
+        {
+            DOMPRINTF("%s: Not enough RAM", __FUNCTION__);
+            errno = ENOMEM;
+            rc = -1;
+            goto out;
+        }
+    }
+
+out:
+    free(extents);
+    return rc < 0 ? rc : 0;
+}
+
+static int meminit(struct xc_dom_image *dom)
+{
+    int i, rc;
+    uint64_t modbase;
+
+    uint64_t ramsize = (uint64_t)dom->total_pages << XC_PAGE_SHIFT;
+
+    const uint64_t bankbase[] = GUEST_RAM_BANK_BASES;
+    const uint64_t bankmax[] = GUEST_RAM_BANK_SIZES;
+
+    /* Convenient */
+    const uint64_t kernbase = dom->kernel_seg.vstart;
+    const uint64_t kernend = ROUNDUP(dom->kernel_seg.vend, 21/*2MB*/);
+    const uint64_t kernsize = kernend - kernbase;
+    const uint64_t dtb_size = dom->devicetree_blob ?
+        ROUNDUP(dom->devicetree_size, XC_PAGE_SHIFT) : 0;
+    const uint64_t ramdisk_size = dom->modules[0].blob ?
+        ROUNDUP(dom->modules[0].size, XC_PAGE_SHIFT) : 0;
+    const uint64_t modsize = dtb_size + ramdisk_size;
+    const uint64_t ram128mb = bankbase[0] + (128<<20);
+
+    xen_pfn_t p2m_size;
+    uint64_t bank0end;
+
+    assert(dom->rambase_pfn << XC_PAGE_SHIFT == bankbase[0]);
+
+    if ( modsize + kernsize > bankmax[0] )
+    {
+        DOMPRINTF("%s: Not enough memory for the kernel+dtb+initrd",
+                  __FUNCTION__);
+        return -1;
+    }
+
+    if ( ramsize == 0 )
+    {
+        DOMPRINTF("%s: ram size is 0", __FUNCTION__);
+        return -1;
+    }
+
+    if ( ramsize > GUEST_RAM_MAX )
+    {
+        DOMPRINTF("%s: ram size is too large for guest address space: "
+                  "%"PRIx64" > %llx",
+                  __FUNCTION__, ramsize, GUEST_RAM_MAX);
+        return -1;
+    }
+
+    rc = set_mode(dom->xch, dom->guest_domid, dom->guest_type);
+    if ( rc )
+        return rc;
+
+    for ( i = 0; ramsize && i < GUEST_RAM_BANKS; i++ )
+    {
+        uint64_t banksize = ramsize > bankmax[i] ? bankmax[i] : ramsize;
+
+        ramsize -= banksize;
+
+        p2m_size = ( bankbase[i] + banksize - bankbase[0] ) >> XC_PAGE_SHIFT;
+
+        dom->rambank_size[i] = banksize >> XC_PAGE_SHIFT;
+    }
+
+    assert(dom->rambank_size[0] != 0);
+    assert(ramsize == 0); /* Too much RAM is rejected above */
+
+    dom->p2m_size = p2m_size;
+
+    /* setup initial p2m and allocate guest memory */
+    for ( i = 0; i < GUEST_RAM_BANKS && dom->rambank_size[i]; i++ )
+    {
+        if ((rc = populate_guest_memory(dom,
+                                        bankbase[i] >> XC_PAGE_SHIFT,
+                                        dom->rambank_size[i])))
+            return rc;
+    }
+
+    /*
+     * We try to place dtb+initrd at 128MB or if we have less RAM
+     * as high as possible. If there is no space then fallback to
+     * just before the kernel.
+     *
+     * If changing this then consider
+     * xen/arch/arm/kernel.c:place_modules as well.
+     */
+    bank0end = bankbase[0] + ((uint64_t)dom->rambank_size[0] << XC_PAGE_SHIFT);
+
+    if ( bank0end >= ram128mb + modsize && kernend < ram128mb )
+        modbase = ram128mb;
+    else if ( bank0end - modsize > kernend )
+        modbase = bank0end - modsize;
+    else if (kernbase - bankbase[0] > modsize )
+        modbase = kernbase - modsize;
+    else
+        return -1;
+
+    DOMPRINTF("%s: placing boot modules at 0x%" PRIx64, __FUNCTION__, modbase);
+
+    /*
+     * Must map DTB *after* initrd, to satisfy order of calls to
+     * xc_dom_alloc_segment in xc_dom_build_image, which must map
+     * things at monotonolically increasing addresses.
+     */
+    if ( ramdisk_size )
+    {
+        dom->modules[0].seg.vstart = modbase;
+        dom->modules[0].seg.vend = modbase + ramdisk_size;
+
+        DOMPRINTF("%s: ramdisk: 0x%" PRIx64 " -> 0x%" PRIx64 "",
+                  __FUNCTION__,
+                  dom->modules[0].seg.vstart, dom->modules[0].seg.vend);
+
+        modbase += ramdisk_size;
+    }
+
+    if ( dtb_size )
+    {
+        dom->devicetree_seg.vstart = modbase;
+        dom->devicetree_seg.vend = modbase + dtb_size;
+
+        DOMPRINTF("%s: devicetree: 0x%" PRIx64 " -> 0x%" PRIx64 "",
+                  __FUNCTION__,
+                  dom->devicetree_seg.vstart, dom->devicetree_seg.vend);
+
+        modbase += dtb_size;
+    }
+
+    return 0;
+}
+
+bool xc_dom_translated(const struct xc_dom_image *dom)
+{
+    return true;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int bootearly(struct xc_dom_image *dom)
+{
+    DOMPRINTF("%s: doing nothing", __FUNCTION__);
+    return 0;
+}
+
+static int bootlate(struct xc_dom_image *dom)
+{
+    /* XXX
+     *   map shared info
+     *   map grant tables
+     *   setup shared info
+     */
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_arch xc_dom_32 = {
+    .guest_type = "xen-3.0-armv7l",
+    .native_protocol = XEN_IO_PROTO_ABI_ARM,
+    .page_shift = PAGE_SHIFT_ARM,
+    .sizeof_pfn = 8,
+    .alloc_magic_pages = alloc_magic_pages,
+    .start_info = start_info_arm,
+    .shared_info = shared_info_arm,
+    .vcpu = vcpu_arm32,
+    .meminit = meminit,
+    .bootearly = bootearly,
+    .bootlate = bootlate,
+};
+
+static struct xc_dom_arch xc_dom_64 = {
+    .guest_type = "xen-3.0-aarch64",
+    .native_protocol = XEN_IO_PROTO_ABI_ARM,
+    .page_shift = PAGE_SHIFT_ARM,
+    .sizeof_pfn = 8,
+    .alloc_magic_pages = alloc_magic_pages,
+    .start_info = start_info_arm,
+    .shared_info = shared_info_arm,
+    .vcpu = vcpu_arm64,
+    .meminit = meminit,
+    .bootearly = bootearly,
+    .bootlate = bootlate,
+};
+
+static void __init register_arch_hooks(void)
+{
+    xc_dom_register_arch_hooks(&xc_dom_32);
+    xc_dom_register_arch_hooks(&xc_dom_64);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_dom_armzimageloader.c b/tools/libs/guest/xg_dom_armzimageloader.c
new file mode 100644 (file)
index 0000000..4246c8e
--- /dev/null
@@ -0,0 +1,271 @@
+/*
+ * Xen domain builder -- ARM zImage bits
+ *
+ * Parse and load ARM zImage kernel images.
+ *
+ * Copyright (C) 2012, Citrix Systems.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xenctrl_dom.h"
+
+#include <arpa/inet.h> /* XXX ntohl is not the right function... */
+
+struct minimal_dtb_header {
+    uint32_t magic;
+    uint32_t total_size;
+    /* There are other fields but we don't use them yet. */
+};
+
+#define DTB_MAGIC 0xd00dfeed
+
+/* ------------------------------------------------------------ */
+/* 32-bit zImage Support                                        */
+/* ------------------------------------------------------------ */
+
+#define ZIMAGE32_MAGIC_OFFSET 0x24
+#define ZIMAGE32_START_OFFSET 0x28
+#define ZIMAGE32_END_OFFSET   0x2c
+
+#define ZIMAGE32_MAGIC 0x016f2818
+
+static int xc_dom_probe_zimage32_kernel(struct xc_dom_image *dom)
+{
+    uint32_t *zimage;
+
+    if ( dom->kernel_blob == NULL )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: no kernel image loaded", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    if ( dom->kernel_size < 0x30 /*sizeof(struct setup_header)*/ )
+    {
+        xc_dom_printf(dom->xch, "%s: kernel image too small", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    zimage = (uint32_t *)dom->kernel_blob;
+    if ( zimage[ZIMAGE32_MAGIC_OFFSET/4] != ZIMAGE32_MAGIC )
+    {
+        xc_dom_printf(dom->xch, "%s: kernel is not an arm32 zImage", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static int xc_dom_parse_zimage32_kernel(struct xc_dom_image *dom)
+{
+    uint32_t *zimage;
+    uint32_t start, entry_addr;
+    uint64_t v_start, v_end;
+    uint64_t rambase = dom->rambase_pfn << XC_PAGE_SHIFT;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    zimage = (uint32_t *)dom->kernel_blob;
+
+    /* Do not load kernel at the very first RAM address */
+    v_start = rambase + 0x8000;
+
+    if ( dom->kernel_size > UINT64_MAX - v_start )
+    {
+        DOMPRINTF("%s: kernel is too large\n", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    v_end = v_start + dom->kernel_size;
+
+    /*
+     * If start is invalid then the guest will start at some invalid
+     * address and crash, but this happens in guest context so doesn't
+     * concern us here.
+     */
+    start = zimage[ZIMAGE32_START_OFFSET/4];
+
+    if (start == 0)
+        entry_addr = v_start;
+    else
+        entry_addr = start;
+
+    /* find kernel segment */
+    dom->kernel_seg.vstart = v_start;
+    dom->kernel_seg.vend   = v_end;
+
+    dom->parms.virt_entry = entry_addr;
+    dom->parms.virt_base = rambase;
+
+    dom->guest_type = "xen-3.0-armv7l";
+    DOMPRINTF("%s: %s: 0x%" PRIx64 " -> 0x%" PRIx64 "",
+              __FUNCTION__, dom->guest_type,
+              dom->kernel_seg.vstart, dom->kernel_seg.vend);
+    return 0;
+}
+
+/* ------------------------------------------------------------ */
+/* 64-bit zImage Support                                        */
+/* ------------------------------------------------------------ */
+
+#define ZIMAGE64_MAGIC_V0 0x14000008
+#define ZIMAGE64_MAGIC_V1 0x644d5241 /* "ARM\x64" */
+
+/* linux/Documentation/arm64/booting.txt */
+struct zimage64_hdr {
+    uint32_t magic0;
+    uint32_t res0;
+    uint64_t text_offset;  /* Image load offset */
+    uint64_t res1;
+    uint64_t res2;
+    /* zImage V1 only from here */
+    uint64_t res3;
+    uint64_t res4;
+    uint64_t res5;
+    uint32_t magic1;
+    uint32_t res6;
+};
+static int xc_dom_probe_zimage64_kernel(struct xc_dom_image *dom)
+{
+    struct zimage64_hdr *zimage;
+
+    if ( dom->kernel_blob == NULL )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: no kernel image loaded", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    if ( dom->kernel_size < sizeof(*zimage) )
+    {
+        xc_dom_printf(dom->xch, "%s: kernel image too small", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    zimage =  dom->kernel_blob;
+    if ( zimage->magic0 != ZIMAGE64_MAGIC_V0 &&
+         zimage->magic1 != ZIMAGE64_MAGIC_V1 )
+    {
+        xc_dom_printf(dom->xch, "%s: kernel is not an arm64 Image", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static int xc_dom_parse_zimage64_kernel(struct xc_dom_image *dom)
+{
+    struct zimage64_hdr *zimage;
+    uint64_t v_start, v_end;
+    uint64_t rambase = dom->rambase_pfn << XC_PAGE_SHIFT;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    zimage = dom->kernel_blob;
+
+    if ( zimage->text_offset > UINT64_MAX - rambase )
+    {
+        DOMPRINTF("%s: kernel text offset is too large\n", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    v_start = rambase + zimage->text_offset;
+
+    if ( dom->kernel_size > UINT64_MAX - v_start )
+    {
+        DOMPRINTF("%s: kernel is too large\n", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    v_end = v_start + dom->kernel_size;
+
+    dom->kernel_seg.vstart = v_start;
+    dom->kernel_seg.vend   = v_end;
+
+    /* Call the kernel at offset 0 */
+    dom->parms.virt_entry = v_start;
+    dom->parms.virt_base = rambase;
+
+    dom->guest_type = "xen-3.0-aarch64";
+    DOMPRINTF("%s: %s: 0x%" PRIx64 " -> 0x%" PRIx64 "",
+              __FUNCTION__, dom->guest_type,
+              dom->kernel_seg.vstart, dom->kernel_seg.vend);
+
+    return 0;
+}
+
+/* ------------------------------------------------------------ */
+/* Common zImage Support                                        */
+/* ------------------------------------------------------------ */
+
+static int xc_dom_load_zimage_kernel(struct xc_dom_image *dom)
+{
+    void *dst;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    dst = xc_dom_seg_to_ptr(dom, &dom->kernel_seg);
+    if ( dst == NULL )
+    {
+        DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->kernel_seg) => NULL",
+                  __func__);
+        return -1;
+    }
+
+    DOMPRINTF("%s: kernel seg %#"PRIx64"-%#"PRIx64,
+              __func__, dom->kernel_seg.vstart, dom->kernel_seg.vend);
+    DOMPRINTF("%s: copy %zd bytes from blob %p to dst %p",
+              __func__, dom->kernel_size, dom->kernel_blob, dst);
+
+    memcpy(dst, dom->kernel_blob, dom->kernel_size);
+
+    return 0;
+}
+
+static struct xc_dom_loader zimage32_loader = {
+    .name = "Linux zImage (ARM32)",
+    .probe = xc_dom_probe_zimage32_kernel,
+    .parser = xc_dom_parse_zimage32_kernel,
+    .loader = xc_dom_load_zimage_kernel,
+};
+
+static struct xc_dom_loader zimage64_loader = {
+    .name = "Linux zImage (ARM64)",
+    .probe = xc_dom_probe_zimage64_kernel,
+    .parser = xc_dom_parse_zimage64_kernel,
+    .loader = xc_dom_load_zimage_kernel,
+};
+
+static void __init register_loader(void)
+{
+    xc_dom_register_loader(&zimage32_loader);
+    xc_dom_register_loader(&zimage64_loader);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_dom_binloader.c b/tools/libs/guest/xg_dom_binloader.c
new file mode 100644 (file)
index 0000000..870a921
--- /dev/null
@@ -0,0 +1,329 @@
+/*
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Some of the field descriptions were copied from "The Multiboot
+ * Specification", Copyright 1995, 96 Bryan Ford <baford@cs.utah.edu>,
+ * Erich Stefan Boleyn <erich@uruk.org> Copyright 1999, 2000, 2001, 2002
+ * Free Software Foundation, Inc.
+ */
+
+/******************************************************************************
+ *
+ * Loads simple binary images. It's like a .COM file in MS-DOS. No headers are
+ * present. The only requirement is that it must have a xen_bin_image table
+ * somewhere in the first 8192 bytes, starting on a 32-bit aligned address.
+ * Those familiar with the multiboot specification should recognize this, it's
+ * (almost) the same as the multiboot header.
+ * The layout of the xen_bin_image table is:
+ *
+ * Offset Type Name          Note
+ * 0      uint32_t  magic         required
+ * 4      uint32_t  flags         required
+ * 8      uint32_t  checksum      required
+ * 12     uint32_t  header_addr   required
+ * 16     uint32_t  load_addr     required
+ * 20     uint32_t  load_end_addr required
+ * 24     uint32_t  bss_end_addr  required
+ * 28     uint32_t  entry_addr    required
+ *
+ * - magic
+ *   Magic number identifying the table. For images to be loaded by Xen 3, the
+ *   magic value is 0x336ec578 ("xEn3" with the 0x80 bit of the "E" set).
+ * - flags
+ *   bit 0: indicates whether the image needs to be loaded on a page boundary
+ *   bit 1: reserved, must be 0 (the multiboot spec uses this bit to indicate
+ *          that memory info should be passed to the image)
+ *   bit 2: reserved, must be 0 (the multiboot spec uses this bit to indicate
+ *          that the bootloader should pass video mode info to the image)
+ *   bit 16: reserved, must be 1 (the multiboot spec uses this bit to indicate
+ *           that the values in the fields header_addr - entry_addr are
+ *           valid)
+ *   All other bits should be set to 0.
+ * - checksum
+ *   When added to "magic" and "flags", the resulting value should be 0.
+ * - header_addr
+ *   Contains the virtual address corresponding to the beginning of the
+ *   table - the memory location at which the magic value is supposed to be
+ *   loaded. This field serves to synchronize the mapping between OS image
+ *   offsets and virtual memory addresses.
+ * - load_addr
+ *   Contains the virtual address of the beginning of the text segment. The
+ *   offset in the OS image file at which to start loading is defined by the
+ *   offset at which the table was found, minus (header addr - load addr).
+ *   load addr must be less than or equal to header addr.
+ * - load_end_addr
+ *   Contains the virtual address of the end of the data segment.
+ *   (load_end_addr - load_addr) specifies how much data to load. This implies
+ *   that the text and data segments must be consecutive in the OS image. If
+ *   this field is zero, the domain builder assumes that the text and data
+ *   segments occupy the whole OS image file.
+ * - bss_end_addr
+ *   Contains the virtual address of the end of the bss segment. The domain
+ *   builder initializes this area to zero, and reserves the memory it occupies
+ *   to avoid placing boot modules and other data relevant to the loaded image
+ *   in that area. If this field is zero, the domain builder assumes that no bss
+ *   segment is present.
+ * - entry_addr
+ *   The virtual address at which to start execution of the loaded image.
+ *
+ */
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xenctrl_dom.h"
+
+#define round_pgup(_p)    (((_p)+(PAGE_SIZE_X86-1))&PAGE_MASK_X86)
+#define round_pgdown(_p)  ((_p)&PAGE_MASK_X86)
+
+struct xen_bin_image_table
+{
+    uint32_t magic;
+    uint32_t flags;
+    uint32_t checksum;
+    uint32_t header_addr;
+    uint32_t load_addr;
+    uint32_t load_end_addr;
+    uint32_t bss_end_addr;
+    uint32_t entry_addr;
+};
+
+#define XEN_MULTIBOOT_MAGIC3 0x336ec578
+
+#define XEN_MULTIBOOT_FLAG_ALIGN4K     0x00000001
+#define XEN_MULTIBOOT_FLAG_NEEDMEMINFO 0x00000002
+#define XEN_MULTIBOOT_FLAG_NEEDVIDINFO 0x00000004
+#define XEN_MULTIBOOT_FLAG_ADDRSVALID  0x00010000
+#define XEN_MULTIBOOT_FLAG_PAE_SHIFT   14
+#define XEN_MULTIBOOT_FLAG_PAE_MASK    (3 << XEN_MULTIBOOT_FLAG_PAE_SHIFT)
+
+/* Flags we test for */
+#define FLAGS_MASK     ((~ 0) & (~ XEN_MULTIBOOT_FLAG_ALIGN4K) & \
+    (~ XEN_MULTIBOOT_FLAG_PAE_MASK))
+#define FLAGS_REQUIRED XEN_MULTIBOOT_FLAG_ADDRSVALID
+
+/* --------------------------------------------------------------------- */
+
+static struct xen_bin_image_table *find_table(struct xc_dom_image *dom)
+{
+    struct xen_bin_image_table *table;
+    uint32_t *probe_ptr;
+    uint32_t *probe_end;
+
+    if ( dom->kernel_size < sizeof(*table) )
+        return NULL;
+    probe_ptr = dom->kernel_blob;
+    if ( dom->kernel_size > (8192 + sizeof(*table)) )
+        probe_end = dom->kernel_blob + 8192;
+    else
+        probe_end = dom->kernel_blob + dom->kernel_size - sizeof(*table);
+
+    for ( table = NULL; probe_ptr < probe_end; probe_ptr++ )
+    {
+        if ( *probe_ptr == XEN_MULTIBOOT_MAGIC3 )
+        {
+            table = (struct xen_bin_image_table *) probe_ptr;
+            /* Checksum correct? */
+            if ( (table->magic + table->flags + table->checksum) == 0 )
+                return table;
+        }
+    }
+    return NULL;
+}
+
+static int xc_dom_probe_bin_kernel(struct xc_dom_image *dom)
+{
+    return find_table(dom) ? 0 : -EINVAL;
+}
+
+static int xc_dom_parse_bin_kernel(struct xc_dom_image *dom)
+{
+    struct xen_bin_image_table *image_info;
+    char *image = dom->kernel_blob;
+    size_t image_size = dom->kernel_size;
+    uint32_t start_addr;
+    uint32_t load_end_addr;
+    uint32_t bss_end_addr;
+    uint32_t pae_flags;
+
+    image_info = find_table(dom);
+    if ( !image_info )
+        return -EINVAL;
+
+    DOMPRINTF("%s: multiboot header fields", __FUNCTION__);
+    DOMPRINTF("  flags:         0x%" PRIx32 "", image_info->flags);
+    DOMPRINTF("  header_addr:   0x%" PRIx32 "", image_info->header_addr);
+    DOMPRINTF("  load_addr:     0x%" PRIx32 "", image_info->load_addr);
+    DOMPRINTF("  load_end_addr: 0x%" PRIx32 "", image_info->load_end_addr);
+    DOMPRINTF("  bss_end_addr:  0x%" PRIx32 "", image_info->bss_end_addr);
+    DOMPRINTF("  entry_addr:    0x%" PRIx32 "", image_info->entry_addr);
+
+    /* Check the flags */
+    if ( (image_info->flags & FLAGS_MASK) != FLAGS_REQUIRED )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                     "%s: xen_bin_image_table flags required "
+                     "0x%08" PRIx32 " found 0x%08" PRIx32 "",
+                     __FUNCTION__, FLAGS_REQUIRED, image_info->flags & FLAGS_MASK);
+        return -EINVAL;
+    }
+
+    /* Sanity check on the addresses */
+    if ( (image_info->header_addr < image_info->load_addr) ||
+         ((char *) image_info - image) <
+         (image_info->header_addr - image_info->load_addr) )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Invalid header_addr.",
+                     __FUNCTION__);
+        return -EINVAL;
+    }
+
+    start_addr = image_info->header_addr - ((char *)image_info - image);
+    load_end_addr = image_info->load_end_addr ?: start_addr + image_size;
+    bss_end_addr = image_info->bss_end_addr ?: load_end_addr;
+
+    DOMPRINTF("%s: calculated addresses", __FUNCTION__);
+    DOMPRINTF("  start_addr:    0x%" PRIx32 "", start_addr);
+    DOMPRINTF("  load_end_addr: 0x%" PRIx32 "", load_end_addr);
+    DOMPRINTF("  bss_end_addr:  0x%" PRIx32 "", bss_end_addr);
+
+    if ( (start_addr + image_size) < load_end_addr )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Invalid load_end_addr.",
+                     __FUNCTION__);
+        return -EINVAL;
+    }
+
+    if ( bss_end_addr < load_end_addr)
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Invalid bss_end_addr.",
+                     __FUNCTION__);
+        return -EINVAL;
+    }
+
+    dom->kernel_seg.vstart = image_info->load_addr;
+    dom->kernel_seg.vend   = bss_end_addr;
+    dom->parms.virt_base   = start_addr;
+    dom->parms.virt_entry  = image_info->entry_addr;
+
+    pae_flags = image_info->flags & XEN_MULTIBOOT_FLAG_PAE_MASK;
+    switch (pae_flags >> XEN_MULTIBOOT_FLAG_PAE_SHIFT) {
+    case 0:
+        dom->guest_type = "xen-3.0-x86_32";
+        break;
+    case 1:
+        dom->guest_type = "xen-3.0-x86_32p";
+        break;
+    case 2:
+        dom->guest_type = "xen-3.0-x86_64";
+        break;
+    case 3:
+        /* Kernel detects PAE at runtime.  So try to figure whenever
+         * xen supports PAE and advertise a PAE-capable kernel in case
+         * it does. */
+        dom->guest_type = "xen-3.0-x86_32";
+        if ( strstr(dom->xen_caps, "xen-3.0-x86_32p") )
+        {
+            DOMPRINTF("%s: PAE fixup", __FUNCTION__);
+            dom->guest_type = "xen-3.0-x86_32p";
+            dom->parms.pae  = XEN_PAE_EXTCR3;
+        }
+        break;
+    }
+    return 0;
+}
+
+static int xc_dom_load_bin_kernel(struct xc_dom_image *dom)
+{
+    struct xen_bin_image_table *image_info;
+    char *image = dom->kernel_blob;
+    char *dest;
+    size_t image_size = dom->kernel_size;
+    size_t dest_size;
+    uint32_t start_addr;
+    uint32_t load_end_addr;
+    uint32_t bss_end_addr;
+    uint32_t skip, text_size, bss_size;
+
+    image_info = find_table(dom);
+    if ( !image_info )
+        return -EINVAL;
+
+    start_addr = image_info->header_addr - ((char *)image_info - image);
+    load_end_addr = image_info->load_end_addr ?: start_addr + image_size;
+    bss_end_addr = image_info->bss_end_addr ?: load_end_addr;
+
+    /* It's possible that we need to skip the first part of the image */
+    skip = image_info->load_addr - start_addr;
+    text_size = load_end_addr - image_info->load_addr;
+    bss_size = bss_end_addr - load_end_addr;
+
+    DOMPRINTF("%s: calculated sizes", __FUNCTION__);
+    DOMPRINTF("  skip:      0x%" PRIx32 "", skip);
+    DOMPRINTF("  text_size: 0x%" PRIx32 "", text_size);
+    DOMPRINTF("  bss_size:  0x%" PRIx32 "", bss_size);
+
+    dest = xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart, &dest_size);
+    if ( dest == NULL )
+    {
+        DOMPRINTF("%s: xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart)"
+                  " => NULL", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    if ( dest_size < text_size ||
+         dest_size - text_size < bss_size )
+    {
+        DOMPRINTF("%s: mapped region is too small for image", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    if ( image_size < skip ||
+         image_size - skip < text_size )
+    {
+        DOMPRINTF("%s: image is too small for declared text size",
+                  __FUNCTION__);
+        return -EINVAL;
+    }
+
+    memcpy(dest, image + skip, text_size);
+    memset(dest + text_size, 0, bss_size);
+
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_loader bin_loader = {
+    .name = "multiboot-binary",
+    .probe = xc_dom_probe_bin_kernel,
+    .parser = xc_dom_parse_bin_kernel,
+    .loader = xc_dom_load_bin_kernel,
+};
+
+static void __init register_loader(void)
+{
+    xc_dom_register_loader(&bin_loader);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_dom_boot.c b/tools/libs/guest/xg_dom_boot.c
new file mode 100644 (file)
index 0000000..1e31e92
--- /dev/null
@@ -0,0 +1,451 @@
+/*
+ * Xen domain builder -- xen booter.
+ *
+ * This is the code which actually boots a fresh
+ * prepared domain image as xen guest domain.
+ *
+ * ==>  this is the only domain builder code piece
+ *          where xen hypercalls are allowed        <==
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <zlib.h>
+
+#include "xg_private.h"
+#include "xenctrl_dom.h"
+#include "xc_core.h"
+#include <xen/hvm/params.h>
+#include <xen/grant_table.h>
+
+/* ------------------------------------------------------------------------ */
+
+static int setup_hypercall_page(struct xc_dom_image *dom)
+{
+    DECLARE_DOMCTL;
+    xen_pfn_t pfn;
+    int rc;
+
+    if ( dom->parms.virt_hypercall == -1 )
+        return 0;
+    pfn = (dom->parms.virt_hypercall - dom->parms.virt_base)
+        >> XC_DOM_PAGE_SHIFT(dom);
+
+    DOMPRINTF("%s: vaddr=0x%" PRIx64 " pfn=0x%" PRIpfn "", __FUNCTION__,
+                  dom->parms.virt_hypercall, pfn);
+    domctl.cmd = XEN_DOMCTL_hypercall_init;
+    domctl.domain = dom->guest_domid;
+    domctl.u.hypercall_init.gmfn = xc_dom_p2m(dom, pfn);
+    rc = do_domctl(dom->xch, &domctl);
+    if ( rc != 0 )
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: HYPERCALL_INIT failed: %d - %s)",
+                     __FUNCTION__, errno, strerror(errno));
+    return rc;
+}
+
+
+/* ------------------------------------------------------------------------ */
+
+int xc_dom_compat_check(struct xc_dom_image *dom)
+{
+    xen_capabilities_info_t xen_caps;
+    char *item, *ptr;
+    int match, found = 0;
+
+    strncpy(xen_caps, dom->xen_caps, XEN_CAPABILITIES_INFO_LEN - 1);
+    xen_caps[XEN_CAPABILITIES_INFO_LEN - 1] = '\0';
+
+    for ( item = strtok_r(xen_caps, " ", &ptr);
+          item != NULL ; item = strtok_r(NULL, " ", &ptr) )
+    {
+        match = !strcmp(dom->guest_type, item);
+        DOMPRINTF("%s: supported guest type: %s%s", __FUNCTION__,
+                  item, match ? " <= matches" : "");
+        if ( match )
+            found++;
+    }
+    if ( !found )
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                     "%s: guest type %s not supported by xen kernel, sorry",
+                     __FUNCTION__, dom->guest_type);
+
+    return found;
+}
+
+int xc_dom_boot_xen_init(struct xc_dom_image *dom, xc_interface *xch, uint32_t domid)
+{
+    dom->xch = xch;
+    dom->guest_domid = domid;
+
+    dom->xen_version = xc_version(xch, XENVER_version, NULL);
+    if ( xc_version(xch, XENVER_capabilities, &dom->xen_caps) < 0 )
+    {
+        xc_dom_panic(xch, XC_INTERNAL_ERROR, "can't get xen capabilities");
+        return -1;
+    }
+    DOMPRINTF("%s: ver %d.%d, caps %s", __FUNCTION__,
+              dom->xen_version >> 16, dom->xen_version & 0xff,
+              dom->xen_caps);
+    return 0;
+}
+
+int xc_dom_boot_mem_init(struct xc_dom_image *dom)
+{
+    long rc;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    rc = dom->arch_hooks->meminit(dom);
+    if ( rc != 0 )
+    {
+        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
+                     "%s: can't allocate low memory for domain",
+                     __FUNCTION__);
+        return rc;
+    }
+
+    return 0;
+}
+
+void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
+                           xen_pfn_t count)
+{
+    int page_shift = XC_DOM_PAGE_SHIFT(dom);
+    privcmd_mmap_entry_t *entries;
+    void *ptr;
+    int i;
+    int err;
+
+    entries = xc_dom_malloc(dom, count * sizeof(privcmd_mmap_entry_t));
+    if ( entries == NULL )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
+                     " [malloc]", __FUNCTION__, pfn, count);
+        return NULL;
+    }
+
+    for ( i = 0; i < count; i++ )
+        entries[i].mfn = xc_dom_p2m(dom, pfn + i);
+
+    ptr = xc_map_foreign_ranges(dom->xch, dom->guest_domid,
+                count << page_shift, PROT_READ | PROT_WRITE, 1 << page_shift,
+                entries, count);
+    if ( ptr == NULL )
+    {
+        err = errno;
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
+                     " [mmap, errno=%i (%s)]", __FUNCTION__, pfn, count,
+                     err, strerror(err));
+        return NULL;
+    }
+
+    return ptr;
+}
+
+int xc_dom_boot_image(struct xc_dom_image *dom)
+{
+    xc_dominfo_t info;
+    int rc;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    /* misc stuff*/
+    if ( (rc = dom->arch_hooks->bootearly(dom)) != 0 )
+        return rc;
+
+    /* collect some info */
+    rc = xc_domain_getinfo(dom->xch, dom->guest_domid, 1, &info);
+    if ( rc < 0 )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: getdomaininfo failed (rc=%d)", __FUNCTION__, rc);
+        return rc;
+    }
+    if ( rc == 0 || info.domid != dom->guest_domid )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: Huh? No domains found (nr_domains=%d) "
+                     "or domid mismatch (%d != %d)", __FUNCTION__,
+                     rc, info.domid, dom->guest_domid);
+        return -1;
+    }
+    dom->shared_info_mfn = info.shared_info_frame;
+
+    /* sanity checks */
+    if ( !xc_dom_compat_check(dom) )
+        return -1;
+
+    /* initial mm setup */
+    if ( dom->arch_hooks->setup_pgtables &&
+         (rc = dom->arch_hooks->setup_pgtables(dom)) != 0 )
+        return rc;
+
+    /* start info page */
+    if ( dom->arch_hooks->start_info )
+        dom->arch_hooks->start_info(dom);
+
+    /* hypercall page */
+    if ( (rc = setup_hypercall_page(dom)) != 0 )
+        return rc;
+    xc_dom_log_memory_footprint(dom);
+
+    /* misc x86 stuff */
+    if ( (rc = dom->arch_hooks->bootlate(dom)) != 0 )
+        return rc;
+
+    /* let the vm run */
+    if ( (rc = dom->arch_hooks->vcpu(dom)) != 0 )
+        return rc;
+    xc_dom_unmap_all(dom);
+
+    return rc;
+}
+
+static xen_pfn_t xc_dom_gnttab_setup(xc_interface *xch, uint32_t domid)
+{
+    gnttab_setup_table_t setup;
+    DECLARE_HYPERCALL_BUFFER(xen_pfn_t, gmfnp);
+    int rc;
+    xen_pfn_t gmfn;
+
+    gmfnp = xc_hypercall_buffer_alloc(xch, gmfnp, sizeof(*gmfnp));
+    if (gmfnp == NULL)
+        return -1;
+
+    setup.dom = domid;
+    setup.nr_frames = 1;
+    set_xen_guest_handle(setup.frame_list, gmfnp);
+    setup.status = 0;
+
+    rc = xc_gnttab_op(xch, GNTTABOP_setup_table, &setup, sizeof(setup), 1);
+    gmfn = *gmfnp;
+    xc_hypercall_buffer_free(xch, gmfnp);
+
+    if ( rc != 0 || setup.status != GNTST_okay )
+    {
+        xc_dom_panic(xch, XC_INTERNAL_ERROR,
+                     "%s: failed to setup domU grant table "
+                     "[errno=%d, status=%" PRId16 "]\n",
+                     __FUNCTION__, rc != 0 ? errno : 0, setup.status);
+        return -1;
+    }
+
+    return gmfn;
+}
+
+static void xc_dom_set_gnttab_entry(xc_interface *xch,
+                                    grant_entry_v1_t *gnttab,
+                                    unsigned int idx,
+                                    uint32_t guest_domid,
+                                    uint32_t backend_domid,
+                                    xen_pfn_t guest_gfn)
+{
+    if ( guest_domid == backend_domid || guest_gfn == -1 )
+        return;
+
+    xc_dom_printf(xch, "%s: d%d gnt[%u] -> d%d 0x%"PRI_xen_pfn,
+                  __func__, guest_domid, idx, backend_domid, guest_gfn);
+
+    gnttab[idx].flags = GTF_permit_access;
+    gnttab[idx].domid = backend_domid;
+    gnttab[idx].frame = guest_gfn;
+}
+
+static int compat_gnttab_seed(xc_interface *xch, uint32_t domid,
+                              xen_pfn_t console_gfn,
+                              xen_pfn_t xenstore_gfn,
+                              uint32_t console_domid,
+                              uint32_t xenstore_domid)
+{
+
+    xen_pfn_t gnttab_gfn;
+    grant_entry_v1_t *gnttab;
+
+    gnttab_gfn = xc_dom_gnttab_setup(xch, domid);
+    if ( gnttab_gfn == -1 )
+        return -1;
+
+    gnttab = xc_map_foreign_range(xch,
+                                  domid,
+                                  PAGE_SIZE,
+                                  PROT_READ|PROT_WRITE,
+                                  gnttab_gfn);
+    if ( gnttab == NULL )
+    {
+        xc_dom_panic(xch, XC_INTERNAL_ERROR,
+                     "%s: failed to map d%d grant table "
+                     "[errno=%d]\n",
+                     __func__, domid, errno);
+        return -1;
+    }
+
+    xc_dom_set_gnttab_entry(xch, gnttab, GNTTAB_RESERVED_CONSOLE,
+                            domid, console_domid, console_gfn);
+    xc_dom_set_gnttab_entry(xch, gnttab, GNTTAB_RESERVED_XENSTORE,
+                            domid, xenstore_domid, xenstore_gfn);
+
+    if ( munmap(gnttab, PAGE_SIZE) == -1 )
+    {
+        xc_dom_panic(xch, XC_INTERNAL_ERROR,
+                     "%s: failed to unmap d%d grant table "
+                     "[errno=%d]\n",
+                     __func__, domid, errno);
+        return -1;
+    }
+
+    /* Guest shouldn't really touch its grant table until it has
+     * enabled its caches. But lets be nice. */
+    xc_domain_cacheflush(xch, domid, gnttab_gfn, 1);
+
+    return 0;
+}
+
+static int compat_gnttab_hvm_seed(xc_interface *xch, uint32_t domid,
+                                  xen_pfn_t console_gfn,
+                                  xen_pfn_t xenstore_gfn,
+                                  uint32_t console_domid,
+                                  uint32_t xenstore_domid)
+{
+    int rc;
+    xen_pfn_t scratch_gfn;
+    struct xen_add_to_physmap xatp = {
+        .domid = domid,
+        .space = XENMAPSPACE_grant_table,
+        .idx   = 0,
+    };
+    struct xen_remove_from_physmap xrfp = {
+        .domid = domid,
+    };
+
+    rc = xc_core_arch_get_scratch_gpfn(xch, domid, &scratch_gfn);
+    if ( rc < 0 )
+    {
+        xc_dom_panic(xch, XC_INTERNAL_ERROR,
+                     "%s: failed to get a scratch gfn from d%d"
+                     "[errno=%d]\n",
+                     __func__, domid, errno);
+        return -1;
+    }
+    xatp.gpfn = scratch_gfn;
+    xrfp.gpfn = scratch_gfn;
+
+    xc_dom_printf(xch, "%s: d%d: pfn=0x%"PRI_xen_pfn, __func__,
+                  domid, scratch_gfn);
+
+    rc = do_memory_op(xch, XENMEM_add_to_physmap, &xatp, sizeof(xatp));
+    if ( rc != 0 )
+    {
+        xc_dom_panic(xch, XC_INTERNAL_ERROR,
+                     "%s: failed to add gnttab to d%d physmap "
+                     "[errno=%d]\n",
+                     __func__, domid, errno);
+        return -1;
+    }
+
+    rc = compat_gnttab_seed(xch, domid,
+                            console_gfn, xenstore_gfn,
+                            console_domid, xenstore_domid);
+    if (rc != 0)
+    {
+        xc_dom_panic(xch, XC_INTERNAL_ERROR,
+                     "%s: failed to seed gnttab entries for d%d\n",
+                     __func__, domid);
+        (void) do_memory_op(xch, XENMEM_remove_from_physmap, &xrfp,
+                            sizeof(xrfp));
+        return -1;
+    }
+
+    rc = do_memory_op(xch, XENMEM_remove_from_physmap, &xrfp, sizeof(xrfp));
+    if (rc != 0)
+    {
+        xc_dom_panic(xch, XC_INTERNAL_ERROR,
+                     "%s: failed to remove gnttab from d%d physmap "
+                     "[errno=%d]\n",
+                     __func__, domid, errno);
+        return -1;
+    }
+
+    return 0;
+}
+
+int xc_dom_gnttab_seed(xc_interface *xch, uint32_t guest_domid,
+                       bool is_hvm, xen_pfn_t console_gfn,
+                       xen_pfn_t xenstore_gfn, uint32_t console_domid,
+                       uint32_t xenstore_domid)
+{
+    xenforeignmemory_handle* fmem = xch->fmem;
+    xenforeignmemory_resource_handle *fres;
+    void *addr = NULL;
+
+    fres = xenforeignmemory_map_resource(
+        fmem, guest_domid, XENMEM_resource_grant_table,
+        XENMEM_resource_grant_table_id_shared, 0, 1, &addr,
+        PROT_READ | PROT_WRITE, 0);
+    if ( !fres )
+    {
+        if ( errno == EOPNOTSUPP )
+            return is_hvm ?
+                compat_gnttab_hvm_seed(xch, guest_domid,
+                                       console_gfn, xenstore_gfn,
+                                       console_domid, xenstore_domid) :
+                compat_gnttab_seed(xch, guest_domid,
+                                   console_gfn, xenstore_gfn,
+                                   console_domid, xenstore_domid);
+
+        xc_dom_panic(xch, XC_INTERNAL_ERROR,
+                     "%s: failed to acquire d%d grant table [errno=%d]\n",
+                     __func__, guest_domid, errno);
+        return -1;
+    }
+
+    xc_dom_set_gnttab_entry(xch, addr, GNTTAB_RESERVED_CONSOLE,
+                            guest_domid, console_domid, console_gfn);
+    xc_dom_set_gnttab_entry(xch, addr, GNTTAB_RESERVED_XENSTORE,
+                            guest_domid, xenstore_domid, xenstore_gfn);
+
+    xenforeignmemory_unmap_resource(fmem, fres);
+
+    return 0;
+}
+
+int xc_dom_gnttab_init(struct xc_dom_image *dom)
+{
+    bool is_hvm = xc_dom_translated(dom);
+    xen_pfn_t console_gfn = xc_dom_p2m(dom, dom->console_pfn);
+    xen_pfn_t xenstore_gfn = xc_dom_p2m(dom, dom->xenstore_pfn);
+
+    return xc_dom_gnttab_seed(dom->xch, dom->guest_domid, is_hvm,
+                              console_gfn, xenstore_gfn,
+                              dom->console_domid, dom->xenstore_domid);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_dom_bzimageloader.c b/tools/libs/guest/xg_dom_bzimageloader.c
new file mode 100644 (file)
index 0000000..f959a77
--- /dev/null
@@ -0,0 +1,812 @@
+/*
+ * Xen domain builder -- bzImage bits
+ *
+ * Parse and load bzImage kernel images.
+ *
+ * This relies on version 2.08 of the boot protocol, which contains an
+ * ELF file embedded in the bzImage.  The loader extracts this ELF
+ * image and passes it off to the standard ELF loader.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ * written 2007 by Jeremy Fitzhardinge <jeremy@xensource.com>
+ * written 2008 by Ian Campbell <ijc@hellion.org.uk>
+ * written 2009 by Chris Lalancette <clalance@redhat.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xg_dom_decompress.h"
+
+#include <xen-tools/libs.h>
+
+#ifndef __MINIOS__
+
+#if defined(HAVE_BZLIB)
+
+#include <bzlib.h>
+
+static int xc_try_bzip2_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    bz_stream stream;
+    int ret;
+    char *out_buf;
+    char *tmp_buf;
+    int retval = -1;
+    unsigned int outsize;
+    uint64_t total;
+
+    stream.bzalloc = NULL;
+    stream.bzfree = NULL;
+    stream.opaque = NULL;
+
+    if ( dom->kernel_size == 0)
+    {
+        DOMPRINTF("BZIP2: Input is 0 size");
+        return -1;
+    }
+
+    ret = BZ2_bzDecompressInit(&stream, 0, 0);
+    if ( ret != BZ_OK )
+    {
+        DOMPRINTF("BZIP2: Error initting stream");
+        return -1;
+    }
+
+    /* sigh.  We don't know up-front how much memory we are going to need
+     * for the output buffer.  Allocate the output buffer to be equal
+     * the input buffer to start, and we'll realloc as needed.
+     */
+    outsize = dom->kernel_size;
+
+    /*
+     * stream.avail_in and outsize are unsigned int, while kernel_size
+     * is a size_t. Check we aren't overflowing.
+     */
+    if ( outsize != dom->kernel_size )
+    {
+        DOMPRINTF("BZIP2: Input too large");
+        goto bzip2_cleanup;
+    }
+
+    out_buf = malloc(outsize);
+    if ( out_buf == NULL )
+    {
+        DOMPRINTF("BZIP2: Failed to alloc memory");
+        goto bzip2_cleanup;
+    }
+
+    stream.next_in = dom->kernel_blob;
+    stream.avail_in = dom->kernel_size;
+
+    stream.next_out = out_buf;
+    stream.avail_out = dom->kernel_size;
+
+    for ( ; ; )
+    {
+        ret = BZ2_bzDecompress(&stream);
+        if ( ret == BZ_STREAM_END )
+        {
+            DOMPRINTF("BZIP2: Saw data stream end");
+            retval = 0;
+            break;
+        }
+        if ( ret != BZ_OK )
+        {
+            DOMPRINTF("BZIP2: error %d", ret);
+            free(out_buf);
+            goto bzip2_cleanup;
+        }
+
+        if ( stream.avail_out == 0 )
+        {
+            /* Protect against output buffer overflow */
+            if ( outsize > UINT_MAX / 2 )
+            {
+                DOMPRINTF("BZIP2: output buffer overflow");
+                free(out_buf);
+                goto bzip2_cleanup;
+            }
+
+            if ( xc_dom_kernel_check_size(dom, outsize * 2) )
+            {
+                DOMPRINTF("BZIP2: output too large");
+                free(out_buf);
+                goto bzip2_cleanup;
+            }
+
+            tmp_buf = realloc(out_buf, outsize * 2);
+            if ( tmp_buf == NULL )
+            {
+                DOMPRINTF("BZIP2: Failed to realloc memory");
+                free(out_buf);
+                goto bzip2_cleanup;
+            }
+            out_buf = tmp_buf;
+
+            stream.next_out = out_buf + outsize;
+            stream.avail_out = (outsize * 2) - outsize;
+            outsize *= 2;
+        }
+        else if ( stream.avail_in == 0 )
+        {
+            /*
+             * If there is output buffer available then this indicates
+             * that BZ2_bzDecompress would like more input data to be
+             * provided.  However our complete input buffer is in
+             * memory and provided upfront so if avail_in is zero this
+             * actually indicates a truncated input.
+             */
+            DOMPRINTF("BZIP2: not enough input");
+            free(out_buf);
+            goto bzip2_cleanup;
+        }
+    }
+
+    total = (((uint64_t)stream.total_out_hi32) << 32) | stream.total_out_lo32;
+
+    if ( xc_dom_register_external(dom, out_buf, total) )
+    {
+        DOMPRINTF("BZIP2: Error registering stream output");
+        free(out_buf);
+        goto bzip2_cleanup;
+    }
+
+    DOMPRINTF("%s: BZIP2 decompress OK, 0x%zx -> 0x%lx",
+              __FUNCTION__, *size, (long unsigned int) total);
+
+    *blob = out_buf;
+    *size = total;
+
+ bzip2_cleanup:
+    BZ2_bzDecompressEnd(&stream);
+
+    return retval;
+}
+
+#else /* !defined(HAVE_BZLIB) */
+
+static int xc_try_bzip2_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                 "%s: BZIP2 decompress support unavailable",
+                 __FUNCTION__);
+    return -1;
+}
+
+#endif
+
+#if defined(HAVE_LZMA)
+
+#include <lzma.h>
+
+static int _xc_try_lzma_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size,
+    lzma_stream *stream, const char *what)
+{
+    lzma_ret ret;
+    lzma_action action = LZMA_RUN;
+    unsigned char *out_buf;
+    unsigned char *tmp_buf;
+    int retval = -1;
+    size_t outsize;
+    const char *msg;
+
+    if ( dom->kernel_size == 0)
+    {
+        DOMPRINTF("%s: Input is 0 size", what);
+        return -1;
+    }
+
+    /* sigh.  We don't know up-front how much memory we are going to need
+     * for the output buffer.  Allocate the output buffer to be equal
+     * the input buffer to start, and we'll realloc as needed.
+     */
+    outsize = dom->kernel_size;
+    out_buf = malloc(outsize);
+    if ( out_buf == NULL )
+    {
+        DOMPRINTF("%s: Failed to alloc memory", what);
+        goto lzma_cleanup;
+    }
+
+    stream->next_in = dom->kernel_blob;
+    stream->avail_in = dom->kernel_size;
+
+    stream->next_out = out_buf;
+    stream->avail_out = dom->kernel_size;
+
+    for ( ; ; )
+    {
+        ret = lzma_code(stream, action);
+        if ( ret == LZMA_STREAM_END )
+        {
+            DOMPRINTF("%s: Saw data stream end", what);
+            retval = 0;
+            break;
+        }
+        if ( ret != LZMA_OK )
+        {
+            switch ( ret )
+            {
+            case LZMA_MEM_ERROR:
+                msg = strerror(ENOMEM);
+                break;
+
+            case LZMA_MEMLIMIT_ERROR:
+                msg = "Memory usage limit reached";
+                break;
+
+            case LZMA_FORMAT_ERROR:
+                msg = "File format not recognized";
+                break;
+
+            case LZMA_OPTIONS_ERROR:
+                // FIXME: Better message?
+                msg = "Unsupported compression options";
+                break;
+
+            case LZMA_DATA_ERROR:
+                msg = "File is corrupt";
+                break;
+
+            case LZMA_BUF_ERROR:
+                msg = "Unexpected end of input";
+                break;
+
+            default:
+                msg = "Internal program error (bug)";
+                break;
+            }
+            DOMPRINTF("%s: %s decompression error: %s",
+                      __FUNCTION__, what, msg);
+            free(out_buf);
+            goto lzma_cleanup;
+        }
+
+        if ( stream->avail_out == 0 )
+        {
+            /* Protect against output buffer overflow */
+            if ( outsize > SIZE_MAX / 2 )
+            {
+                DOMPRINTF("%s: output buffer overflow", what);
+                free(out_buf);
+                goto lzma_cleanup;
+            }
+
+            if ( xc_dom_kernel_check_size(dom, outsize * 2) )
+            {
+                DOMPRINTF("%s: output too large", what);
+                free(out_buf);
+                goto lzma_cleanup;
+            }
+
+            tmp_buf = realloc(out_buf, outsize * 2);
+            if ( tmp_buf == NULL )
+            {
+                DOMPRINTF("%s: Failed to realloc memory", what);
+                free(out_buf);
+                goto lzma_cleanup;
+            }
+            out_buf = tmp_buf;
+
+            stream->next_out = out_buf + outsize;
+            stream->avail_out = (outsize * 2) - outsize;
+            outsize *= 2;
+        }
+    }
+
+    if ( xc_dom_register_external(dom, out_buf, stream->total_out) )
+    {
+        DOMPRINTF("%s: Error registering stream output", what);
+        free(out_buf);
+        goto lzma_cleanup;
+    }
+
+    DOMPRINTF("%s: %s decompress OK, 0x%zx -> 0x%zx",
+              __FUNCTION__, what, *size, (size_t)stream->total_out);
+
+    *blob = out_buf;
+    *size = stream->total_out;
+
+ lzma_cleanup:
+    lzma_end(stream);
+
+    return retval;
+}
+
+/* 128 Mb is the minimum size (half-way) documented to work for all inputs. */
+#define LZMA_BLOCK_SIZE (128*1024*1024)
+
+static int xc_try_xz_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    lzma_stream stream = LZMA_STREAM_INIT;
+
+    if ( lzma_stream_decoder(&stream, LZMA_BLOCK_SIZE, 0) != LZMA_OK )
+    {
+        DOMPRINTF("XZ: Failed to init decoder");
+        return -1;
+    }
+
+    return _xc_try_lzma_decode(dom, blob, size, &stream, "XZ");
+}
+
+static int xc_try_lzma_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    lzma_stream stream = LZMA_STREAM_INIT;
+
+    if ( lzma_alone_decoder(&stream, LZMA_BLOCK_SIZE) != LZMA_OK )
+    {
+        DOMPRINTF("LZMA: Failed to init decoder");
+        return -1;
+    }
+
+    return _xc_try_lzma_decode(dom, blob, size, &stream, "LZMA");
+}
+
+#else /* !defined(HAVE_LZMA) */
+
+static int xc_try_xz_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                 "%s: XZ decompress support unavailable",
+                 __FUNCTION__);
+    return -1;
+}
+
+static int xc_try_lzma_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                 "%s: LZMA decompress support unavailable",
+                 __FUNCTION__);
+    return -1;
+}
+
+#endif
+
+#if defined(HAVE_LZO1X)
+
+#include <lzo/lzo1x.h>
+
+#define LZOP_HEADER_HAS_FILTER 0x00000800
+#define LZOP_MAX_BLOCK_SIZE (64*1024*1024)
+
+static inline uint_fast16_t lzo_read_16(const unsigned char *buf)
+{
+    return buf[1] | (buf[0] << 8);
+}
+
+static inline uint_fast32_t lzo_read_32(const unsigned char *buf)
+{
+    return lzo_read_16(buf + 2) | ((uint32_t)lzo_read_16(buf) << 16);
+}
+
+static int xc_try_lzo1x_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    int ret;
+    const unsigned char *cur = dom->kernel_blob;
+    unsigned char *out_buf = NULL;
+    size_t left = dom->kernel_size;
+    const char *msg;
+    unsigned version;
+    static const unsigned char magic[] = {
+        0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a
+    };
+
+    /*
+     * lzo_uint should match size_t. Check that this is the case to be
+     * sure we won't overflow various lzo_uint fields.
+     */
+    BUILD_BUG_ON(sizeof(lzo_uint) != sizeof(size_t));
+
+    ret = lzo_init();
+    if ( ret != LZO_E_OK )
+    {
+        DOMPRINTF("LZO1x: Failed to init library (%d)\n", ret);
+        return -1;
+    }
+
+    if ( left < 16 || memcmp(cur, magic, 9) )
+    {
+        DOMPRINTF("LZO1x: Unrecognized magic\n");
+        return -1;
+    }
+
+    /* get version (2bytes), skip library version (2),
+     * 'need to be extracted' version (2) and method (1) */
+    version = lzo_read_16(cur + 9);
+    cur += 16;
+    left -= 16;
+
+    if ( version >= 0x0940 )
+    {
+        /* skip level */
+        ++cur;
+        if ( left )
+            --left;
+    }
+
+    if ( left >= 4 && (lzo_read_32(cur) & LZOP_HEADER_HAS_FILTER) )
+        ret = 8; /* flags + filter info */
+    else
+        ret = 4; /* flags */
+
+    /* skip mode and mtime_low */
+    ret += 8;
+    if ( version >= 0x0940 )
+        ret += 4; /* skip mtime_high */
+
+    /* don't care about the file name, and skip checksum */
+    if ( left > ret )
+        ret += 1 + cur[ret] + 4;
+
+    if ( left < ret )
+    {
+        DOMPRINTF("LZO1x: Incomplete header\n");
+        return -1;
+    }
+    cur += ret;
+    left -= ret;
+
+    for ( *size = 0; ; )
+    {
+        lzo_uint src_len, dst_len, out_len;
+        unsigned char *tmp_buf;
+
+        msg = "Short input";
+        if ( left < 4 )
+            break;
+
+        dst_len = lzo_read_32(cur);
+        if ( !dst_len )
+        {
+            msg = "Error registering stream output";
+            if ( xc_dom_register_external(dom, out_buf, *size) )
+                break;
+
+            return 0;
+        }
+
+        if ( dst_len > LZOP_MAX_BLOCK_SIZE )
+        {
+            msg = "Block size too large";
+            break;
+        }
+
+        if ( left < 12 )
+            break;
+
+        src_len = lzo_read_32(cur + 4);
+        cur += 12; /* also skip block checksum info */
+        left -= 12;
+
+        msg = "Bad source length";
+        if ( src_len <= 0 || src_len > dst_len || src_len > left )
+            break;
+
+        msg = "Output buffer overflow";
+        if ( *size > SIZE_MAX - dst_len )
+            break;
+
+        msg = "Decompressed image too large";
+        if ( xc_dom_kernel_check_size(dom, *size + dst_len) )
+            break;
+
+        msg = "Failed to (re)alloc memory";
+        tmp_buf = realloc(out_buf, *size + dst_len);
+        if ( tmp_buf == NULL )
+            break;
+
+        out_buf = tmp_buf;
+        out_len = dst_len;
+
+        ret = lzo1x_decompress_safe(cur, src_len,
+                                    out_buf + *size, &out_len, NULL);
+        switch ( ret )
+        {
+        case LZO_E_OK:
+            msg = "Input underrun";
+            if ( out_len != dst_len )
+                break;
+
+            *blob = out_buf;
+            *size += out_len;
+            cur += src_len;
+            left -= src_len;
+            continue;
+
+        case LZO_E_INPUT_NOT_CONSUMED:
+            msg = "Unconsumed input";
+            break;
+
+        case LZO_E_OUTPUT_OVERRUN:
+            msg = "Output overrun";
+            break;
+
+        case LZO_E_INPUT_OVERRUN:
+            msg = "Input overrun";
+            break;
+
+        case LZO_E_LOOKBEHIND_OVERRUN:
+            msg = "Look-behind overrun";
+            break;
+
+        case LZO_E_EOF_NOT_FOUND:
+            msg = "No EOF marker";
+            break;
+
+        case LZO_E_ERROR:
+            msg = "General error";
+            break;
+
+        default:
+            msg = "Internal program error (bug)";
+            break;
+        }
+
+        break;
+    }
+
+    free(out_buf);
+    DOMPRINTF("LZO1x decompression error: %s\n", msg);
+
+    return -1;
+}
+
+#else /* !defined(HAVE_LZO1X) */
+
+static int xc_try_lzo1x_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                 "%s: LZO1x decompress support unavailable\n",
+                 __FUNCTION__);
+    return -1;
+}
+
+#endif
+
+#else /* __MINIOS__ */
+
+int xc_try_bzip2_decode(struct xc_dom_image *dom, void **blob, size_t *size);
+int xc_try_lzma_decode(struct xc_dom_image *dom, void **blob, size_t *size);
+int xc_try_lzo1x_decode(struct xc_dom_image *dom, void **blob, size_t *size);
+int xc_try_xz_decode(struct xc_dom_image *dom, void **blob, size_t *size);
+
+#endif /* !__MINIOS__ */
+
+struct setup_header {
+    uint8_t  _pad0[0x1f1];  /* skip uninteresting stuff */
+    uint8_t  setup_sects;
+    uint16_t root_flags;
+    uint32_t syssize;
+    uint16_t ram_size;
+    uint16_t vid_mode;
+    uint16_t root_dev;
+    uint16_t boot_flag;
+    uint16_t jump;
+    uint32_t header;
+#define HDR_MAGIC  "HdrS"
+#define HDR_MAGIC_SZ 4
+    uint16_t version;
+#define VERSION(h,l) (((h)<<8) | (l))
+    uint32_t realmode_swtch;
+    uint16_t start_sys;
+    uint16_t kernel_version;
+    uint8_t  type_of_loader;
+    uint8_t  loadflags;
+    uint16_t setup_move_size;
+    uint32_t code32_start;
+    uint32_t ramdisk_image;
+    uint32_t ramdisk_size;
+    uint32_t bootsect_kludge;
+    uint16_t heap_end_ptr;
+    uint16_t _pad1;
+    uint32_t cmd_line_ptr;
+    uint32_t initrd_addr_max;
+    uint32_t kernel_alignment;
+    uint8_t  relocatable_kernel;
+    uint8_t  _pad2[3];
+    uint32_t cmdline_size;
+    uint32_t hardware_subarch;
+    uint64_t hardware_subarch_data;
+    uint32_t payload_offset;
+    uint32_t payload_length;
+} __attribute__((packed));
+
+extern struct xc_dom_loader elf_loader;
+
+static int check_magic(struct xc_dom_image *dom, const void *magic, size_t len)
+{
+    if (len > dom->kernel_size)
+        return 0;
+
+    return (memcmp(dom->kernel_blob, magic, len) == 0);
+}
+
+static int xc_dom_probe_bzimage_kernel(struct xc_dom_image *dom)
+{
+    struct setup_header *hdr;
+    uint64_t payload_offset, payload_length;
+    int ret;
+
+    if ( dom->kernel_blob == NULL )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: no kernel image loaded", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    if ( dom->kernel_size < sizeof(struct setup_header) )
+    {
+        xc_dom_printf(dom->xch, "%s: kernel image too small", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    hdr = dom->kernel_blob;
+
+    if ( memcmp(&hdr->header, HDR_MAGIC, HDR_MAGIC_SZ) != 0 )
+    {
+        xc_dom_printf(dom->xch, "%s: kernel is not a bzImage", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    if ( hdr->version < VERSION(2,8) )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: boot protocol"
+                     " too old (%04x)", __FUNCTION__, hdr->version);
+        return -EINVAL;
+    }
+
+
+    /* upcast to 64 bits to avoid overflow */
+    /* setup_sects is u8 and so cannot overflow */
+    payload_offset = (hdr->setup_sects + 1) * 512;
+    payload_offset += hdr->payload_offset;
+    payload_length = hdr->payload_length;
+
+    if ( payload_offset >= dom->kernel_size )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: payload offset overflow",
+                     __FUNCTION__);
+        return -EINVAL;
+    }
+    if ( (payload_offset + payload_length) > dom->kernel_size )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: payload length overflow",
+                     __FUNCTION__);
+        return -EINVAL;
+    }
+
+    dom->kernel_blob = dom->kernel_blob + payload_offset;
+    dom->kernel_size = payload_length;
+
+    if ( check_magic(dom, "\037\213", 2) )
+    {
+        ret = xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
+        if ( ret == -1 )
+        {
+            xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: unable to"
+                         " gzip decompress kernel", __FUNCTION__);
+            return -EINVAL;
+        }
+    }
+    else if ( check_magic(dom, "\102\132\150", 3) )
+    {
+        ret = xc_try_bzip2_decode(dom, &dom->kernel_blob, &dom->kernel_size);
+        if ( ret < 0 )
+        {
+            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                         "%s unable to BZIP2 decompress kernel",
+                         __FUNCTION__);
+            return -EINVAL;
+        }
+    }
+    else if ( check_magic(dom, "\3757zXZ", 6) )
+    {
+        ret = xc_try_xz_decode(dom, &dom->kernel_blob, &dom->kernel_size);
+        if ( ret < 0 )
+        {
+            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                         "%s unable to XZ decompress kernel",
+                         __FUNCTION__);
+            return -EINVAL;
+        }
+    }
+    else if ( check_magic(dom, "\135\000", 2) )
+    {
+        ret = xc_try_lzma_decode(dom, &dom->kernel_blob, &dom->kernel_size);
+        if ( ret < 0 )
+        {
+            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                         "%s unable to LZMA decompress kernel",
+                         __FUNCTION__);
+            return -EINVAL;
+        }
+    }
+    else if ( check_magic(dom, "\x89LZO", 5) )
+    {
+        ret = xc_try_lzo1x_decode(dom, &dom->kernel_blob, &dom->kernel_size);
+        if ( ret < 0 )
+        {
+            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                         "%s unable to LZO decompress kernel\n",
+                         __FUNCTION__);
+            return -EINVAL;
+        }
+    }
+    else if ( check_magic(dom, "\x02\x21", 2) )
+    {
+        ret = xc_try_lz4_decode(dom, &dom->kernel_blob, &dom->kernel_size);
+        if ( ret < 0 )
+        {
+            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                         "%s unable to LZ4 decompress kernel\n",
+                         __FUNCTION__);
+            return -EINVAL;
+        }
+    }
+    else
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                     "%s: unknown compression format", __FUNCTION__);
+        return -EINVAL;
+    }
+
+    return elf_loader.probe(dom);
+}
+
+static int xc_dom_parse_bzimage_kernel(struct xc_dom_image *dom)
+{
+    return elf_loader.parser(dom);
+}
+
+static int xc_dom_load_bzimage_kernel(struct xc_dom_image *dom)
+{
+    return elf_loader.loader(dom);
+}
+
+static struct xc_dom_loader bzimage_loader = {
+    .name = "Linux bzImage",
+    .probe = xc_dom_probe_bzimage_kernel,
+    .parser = xc_dom_parse_bzimage_kernel,
+    .loader = xc_dom_load_bzimage_kernel,
+};
+
+static void __init register_loader(void)
+{
+    xc_dom_register_loader(&bzimage_loader);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_dom_compat_linux.c b/tools/libs/guest/xg_dom_compat_linux.c
new file mode 100644 (file)
index 0000000..b645f0b
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Xen domain builder -- compatibility code.
+ *
+ * Replacements for xc_linux_build & friends,
+ * as example code and to make the new builder
+ * usable as drop-in replacement.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <zlib.h>
+
+#include "xenctrl.h"
+#include "xg_private.h"
+#include "xenctrl_dom.h"
+
+/* ------------------------------------------------------------------------ */
+
+int xc_linux_build(xc_interface *xch, uint32_t domid,
+                   unsigned int mem_mb,
+                   const char *image_name,
+                   const char *initrd_name,
+                   const char *cmdline,
+                   const char *features,
+                   unsigned long flags,
+                   unsigned int store_evtchn,
+                   unsigned long *store_mfn,
+                   unsigned int console_evtchn,
+                   unsigned long *console_mfn)
+{
+    struct xc_dom_image *dom;
+    int rc;
+
+    xc_dom_loginit(xch);
+    dom = xc_dom_allocate(xch, cmdline, features);
+    if (dom == NULL)
+        return -1;
+    if ( (rc = xc_dom_kernel_file(dom, image_name)) != 0 )
+        goto out;
+    if ( initrd_name && strlen(initrd_name) &&
+         ((rc = xc_dom_module_file(dom, initrd_name, NULL)) != 0) )
+        goto out;
+
+    dom->flags |= flags;
+    dom->console_evtchn = console_evtchn;
+    dom->xenstore_evtchn = store_evtchn;
+
+    if ( (rc = xc_dom_boot_xen_init(dom, xch, domid)) != 0 )
+        goto out;
+    if ( (rc = xc_dom_parse_image(dom)) != 0 )
+        goto out;
+    if ( (rc = xc_dom_mem_init(dom, mem_mb)) != 0 )
+        goto out;
+    if ( (rc = xc_dom_boot_mem_init(dom)) != 0 )
+        goto out;
+    if ( (rc = xc_dom_build_image(dom)) != 0 )
+        goto out;
+    if ( (rc = xc_dom_boot_image(dom)) != 0 )
+        goto out;
+    if ( (rc = xc_dom_gnttab_init(dom)) != 0)
+        goto out;
+
+    *console_mfn = xc_dom_p2m(dom, dom->console_pfn);
+    *store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
+
+ out:
+    xc_dom_release(dom);
+    return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_dom_core.c b/tools/libs/guest/xg_dom_core.c
new file mode 100644 (file)
index 0000000..1c91cce
--- /dev/null
@@ -0,0 +1,1272 @@
+/*
+ * Xen domain builder -- core bits.
+ *
+ * The core code goes here:
+ *   - allocate and release domain structs.
+ *   - memory management functions.
+ *   - misc helper functions.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <zlib.h>
+#include <assert.h>
+
+#include "xg_private.h"
+#include "xenctrl_dom.h"
+#include "_paths.h"
+
+/* ------------------------------------------------------------------------ */
+/* debugging                                                                */
+
+
+
+static const char *default_logfile = XEN_LOG_DIR "/domain-builder-ng.log";
+
+int xc_dom_loginit(xc_interface *xch) {
+    if (xch->dombuild_logger) return 0;
+
+    if (!xch->dombuild_logger_file) {
+        xch->dombuild_logger_file = fopen(default_logfile, "a");
+        if (!xch->dombuild_logger_file) {
+            PERROR("Could not open logfile `%s'", default_logfile);
+            return -1;
+        }
+    }
+    
+    xch->dombuild_logger = xch->dombuild_logger_tofree =
+        (xentoollog_logger*)
+        xtl_createlogger_stdiostream(xch->dombuild_logger_file, XTL_DETAIL,
+             XTL_STDIOSTREAM_SHOW_DATE|XTL_STDIOSTREAM_SHOW_PID);
+    if (!xch->dombuild_logger)
+        return -1;
+
+    xc_dom_printf(xch, "### ----- xc domain builder logfile opened -----");
+
+    return 0;
+}
+
+void xc_dom_printf(xc_interface *xch, const char *fmt, ...)
+{
+    va_list args;
+    if (!xch->dombuild_logger) return;
+    va_start(args, fmt);
+    xtl_logv(xch->dombuild_logger, XTL_DETAIL, -1, "domainbuilder", fmt, args);
+    va_end(args);
+}
+
+void xc_dom_panic_func(xc_interface *xch,
+                       const char *file, int line, xc_error_code err,
+                       const char *fmt, ...)
+{
+    va_list args;
+    char msg[XC_MAX_ERROR_MSG_LEN];
+
+    va_start(args, fmt);
+    vsnprintf(msg, sizeof(msg), fmt, args);
+    va_end(args);
+    msg[sizeof(msg)-1] = 0;
+    
+    xc_report(xch,
+              xch->dombuild_logger ? xch->dombuild_logger : xch->error_handler,
+              XTL_ERROR, err, "panic: %s:%d: %s",
+              file, line, msg);
+}
+
+static void print_mem(struct xc_dom_image *dom, const char *name, size_t mem)
+{
+    if ( mem > (32 * 1024 * 1024) )
+        DOMPRINTF("%-24s : %zd MB", name, mem / (1024 * 1024));
+    else if ( mem > (32 * 1024) )
+        DOMPRINTF("%-24s : %zd kB", name, mem / 1024);
+    else
+        DOMPRINTF("%-24s : %zd bytes", name, mem);
+}
+
+void xc_dom_log_memory_footprint(struct xc_dom_image *dom)
+{
+    DOMPRINTF("domain builder memory footprint");
+    DOMPRINTF("   allocated");
+    print_mem(dom, "      malloc", dom->alloc_malloc);
+    print_mem(dom, "      anon mmap", dom->alloc_mem_map);
+    DOMPRINTF("   mapped");
+    print_mem(dom, "      file mmap", dom->alloc_file_map);
+    print_mem(dom, "      domU mmap", dom->alloc_domU_map);
+}
+
+/* ------------------------------------------------------------------------ */
+/* simple memory pool                                                       */
+
+void *xc_dom_malloc(struct xc_dom_image *dom, size_t size)
+{
+    struct xc_dom_mem *block;
+
+    if ( size > SIZE_MAX - sizeof(*block) )
+    {
+        DOMPRINTF("%s: unreasonable allocation size", __FUNCTION__);
+        return NULL;
+    }
+    block = malloc(sizeof(*block) + size);
+    if ( block == NULL )
+    {
+        DOMPRINTF("%s: allocation failed", __FUNCTION__);
+        return NULL;
+    }
+    memset(block, 0, sizeof(*block) + size);
+    block->type = XC_DOM_MEM_TYPE_MALLOC_INTERNAL;
+    block->next = dom->memblocks;
+    dom->memblocks = block;
+    dom->alloc_malloc += sizeof(*block) + size;
+    if ( size > (100 * 1024) )
+        print_mem(dom, __FUNCTION__, size);
+    return block->memory;
+}
+
+void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size)
+{
+    struct xc_dom_mem *block;
+
+    block = malloc(sizeof(*block));
+    if ( block == NULL )
+    {
+        DOMPRINTF("%s: allocation failed", __FUNCTION__);
+        return NULL;
+    }
+    memset(block, 0, sizeof(*block));
+    block->len = size;
+    block->ptr = mmap(NULL, block->len,
+                      PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+                      -1, 0);
+    if ( block->ptr == MAP_FAILED )
+    {
+        DOMPRINTF("%s: mmap failed", __FUNCTION__);
+        free(block);
+        return NULL;
+    }
+    block->type = XC_DOM_MEM_TYPE_MMAP;
+    block->next = dom->memblocks;
+    dom->memblocks = block;
+    dom->alloc_malloc += sizeof(*block);
+    dom->alloc_mem_map += block->len;
+    if ( size > (100 * 1024) )
+        print_mem(dom, __FUNCTION__, size);
+    return block->ptr;
+}
+
+int xc_dom_register_external(struct xc_dom_image *dom, void *ptr, size_t size)
+{
+    struct xc_dom_mem *block;
+
+    block = malloc(sizeof(*block));
+    if ( block == NULL )
+    {
+        DOMPRINTF("%s: allocation failed", __FUNCTION__);
+        return -1;
+    }
+    memset(block, 0, sizeof(*block));
+    block->ptr = ptr;
+    block->len = size;
+    block->type = XC_DOM_MEM_TYPE_MALLOC_EXTERNAL;
+    block->next = dom->memblocks;
+    dom->memblocks = block;
+    dom->alloc_malloc += sizeof(*block);
+    dom->alloc_mem_map += block->len;
+    return 0;
+}
+
+void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
+                            const char *filename, size_t * size,
+                            const size_t max_size)
+{
+    struct xc_dom_mem *block = NULL;
+    int fd = -1;
+    off_t offset;
+
+    fd = open(filename, O_RDONLY);
+    if ( fd == -1 ) {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "failed to open file '%s': %s",
+                     filename, strerror(errno));
+        goto err;
+    }
+
+    if ( (lseek(fd, 0, SEEK_SET) == -1) ||
+         ((offset = lseek(fd, 0, SEEK_END)) == -1) ) {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "failed to seek on file '%s': %s",
+                     filename, strerror(errno));
+        goto err;
+    }
+
+    *size = offset;
+
+    if ( max_size && *size > max_size )
+    {
+        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
+                     "tried to map file which is too large");
+        goto err;
+    }
+
+    if ( !*size )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "'%s': zero length file", filename);
+        goto err;
+    }
+
+    block = malloc(sizeof(*block));
+    if ( block == NULL ) {
+        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
+                     "failed to allocate block (%zu bytes)",
+                     sizeof(*block));
+        goto err;
+    }
+
+    memset(block, 0, sizeof(*block));
+    block->len = *size;
+    block->ptr = mmap(NULL, block->len, PROT_READ,
+                           MAP_SHARED, fd, 0);
+    if ( block->ptr == MAP_FAILED ) {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "failed to mmap file '%s': %s",
+                     filename, strerror(errno));
+        goto err;
+    }
+
+    block->type = XC_DOM_MEM_TYPE_MMAP;
+    block->next = dom->memblocks;
+    dom->memblocks = block;
+    dom->alloc_malloc += sizeof(*block);
+    dom->alloc_file_map += block->len;
+    close(fd);
+    if ( *size > (100 * 1024) )
+        print_mem(dom, __FUNCTION__, *size);
+    return block->ptr;
+
+ err:
+    if ( fd != -1 )
+        close(fd);
+    free(block);
+    DOMPRINTF("%s: failed (on file `%s')", __FUNCTION__, filename);
+    return NULL;
+}
+
+static void xc_dom_free_all(struct xc_dom_image *dom)
+{
+    struct xc_dom_mem *block;
+
+    while ( (block = dom->memblocks) != NULL )
+    {
+        dom->memblocks = block->next;
+        switch ( block->type )
+        {
+        case XC_DOM_MEM_TYPE_MALLOC_INTERNAL:
+            break;
+        case XC_DOM_MEM_TYPE_MALLOC_EXTERNAL:
+            free(block->ptr);
+            break;
+        case XC_DOM_MEM_TYPE_MMAP:
+            munmap(block->ptr, block->len);
+            break;
+        }
+        free(block);
+    }
+}
+
+char *xc_dom_strdup(struct xc_dom_image *dom, const char *str)
+{
+    size_t len = strlen(str) + 1;
+    char *nstr = xc_dom_malloc(dom, len);
+
+    if ( nstr == NULL )
+        return NULL;
+    memcpy(nstr, str, len);
+    return nstr;
+}
+
+/* ------------------------------------------------------------------------ */
+/* decompression buffer sizing                                              */
+int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz)
+{
+    /* No limit */
+    if ( !dom->max_kernel_size )
+        return 0;
+
+    if ( sz > dom->max_kernel_size )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                     "kernel image too large");
+        return 1;
+    }
+
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* read files, copy memory blocks, with transparent gunzip                  */
+
+size_t xc_dom_check_gzip(xc_interface *xch, void *blob, size_t ziplen)
+{
+    unsigned char *gzlen;
+    size_t unziplen;
+
+    if ( ziplen < 6 )
+        /* Too small.  We need (i.e. the subsequent code relies on)
+         * 2 bytes for the magic number plus 4 bytes length. */
+        return 0;
+
+    if ( strncmp(blob, "\037\213", 2) )
+        /* not gzipped */
+        return 0;
+
+    gzlen = blob + ziplen - 4;
+    unziplen = (size_t)gzlen[3] << 24 | gzlen[2] << 16 | gzlen[1] << 8 | gzlen[0];
+    if ( unziplen > XC_DOM_DECOMPRESS_MAX )
+    {
+        xc_dom_printf
+            (xch,
+             "%s: size (zip %zd, unzip %zd) looks insane, skip gunzip",
+             __FUNCTION__, ziplen, unziplen);
+        return 0;
+    }
+
+    return unziplen + 16;
+}
+
+int xc_dom_do_gunzip(xc_interface *xch,
+                     void *src, size_t srclen, void *dst, size_t dstlen)
+{
+    z_stream zStream;
+    int rc;
+
+    memset(&zStream, 0, sizeof(zStream));
+    zStream.next_in = src;
+    zStream.avail_in = srclen;
+    zStream.next_out = dst;
+    zStream.avail_out = dstlen;
+    rc = inflateInit2(&zStream, (MAX_WBITS + 32)); /* +32 means "handle gzip" */
+    if ( rc != Z_OK )
+    {
+        xc_dom_panic(xch, XC_INTERNAL_ERROR,
+                     "%s: inflateInit2 failed (rc=%d)", __FUNCTION__, rc);
+        return -1;
+    }
+    rc = inflate(&zStream, Z_FINISH);
+    inflateEnd(&zStream);
+    if ( rc != Z_STREAM_END )
+    {
+        xc_dom_panic(xch, XC_INTERNAL_ERROR,
+                     "%s: inflate failed (rc=%d)", __FUNCTION__, rc);
+        return -1;
+    }
+
+    xc_dom_printf(xch, "%s: unzip ok, 0x%zx -> 0x%zx",
+                  __FUNCTION__, srclen, dstlen);
+    return 0;
+}
+
+int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size)
+{
+    void *unzip;
+    size_t unziplen;
+
+    unziplen = xc_dom_check_gzip(dom->xch, *blob, *size);
+    if ( unziplen == 0 )
+        return 0;
+
+    if ( xc_dom_kernel_check_size(dom, unziplen) )
+        return 0;
+
+    unzip = xc_dom_malloc(dom, unziplen);
+    if ( unzip == NULL )
+        return -1;
+
+    if ( xc_dom_do_gunzip(dom->xch, *blob, *size, unzip, unziplen) == -1 )
+        return -1;
+
+    *blob = unzip;
+    *size = unziplen;
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+/* domain memory                                                            */
+
+void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn,
+                        xen_pfn_t count)
+{
+    xen_pfn_t count_out_dummy;
+    return xc_dom_pfn_to_ptr_retcount(dom, pfn, count, &count_out_dummy);
+}
+
+void *xc_dom_pfn_to_ptr_retcount(struct xc_dom_image *dom, xen_pfn_t pfn,
+                                 xen_pfn_t count, xen_pfn_t *count_out)
+{
+    struct xc_dom_phys *phys;
+    xen_pfn_t offset;
+    unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom);
+    char *mode = "unset";
+
+    *count_out = 0;
+
+    offset = pfn - dom->rambase_pfn;
+    if ( offset > dom->total_pages || /* multiple checks to avoid overflows */
+         count > dom->total_pages ||
+         offset > dom->total_pages - count )
+    {
+        DOMPRINTF("%s: pfn %"PRI_xen_pfn" out of range (0x%" PRIpfn " > 0x%" PRIpfn ")",
+                  __FUNCTION__, pfn, offset, dom->total_pages);
+        return NULL;
+    }
+
+    /* already allocated? */
+    for ( phys = dom->phys_pages; phys != NULL; phys = phys->next )
+    {
+        if ( pfn >= (phys->first + phys->count) )
+            continue;
+        if ( count )
+        {
+            /* size given: must be completely within the already allocated block */
+            if ( (pfn + count) <= phys->first )
+                continue;
+            if ( (pfn < phys->first) ||
+                 ((pfn + count) > (phys->first + phys->count)) )
+            {
+                DOMPRINTF("%s: request overlaps allocated block"
+                          " (req 0x%" PRIpfn "+0x%" PRIpfn ","
+                          " blk 0x%" PRIpfn "+0x%" PRIpfn ")",
+                          __FUNCTION__, pfn, count, phys->first,
+                          phys->count);
+                return NULL;
+            }
+            *count_out = count;
+        }
+        else
+        {
+            /* no size given: block must be allocated already,
+               just hand out a pointer to it */
+            if ( pfn < phys->first )
+                continue;
+            if ( pfn >= phys->first + phys->count )
+                continue;
+            *count_out = phys->count - (pfn - phys->first);
+        }
+        return phys->ptr + ((pfn - phys->first) << page_shift);
+    }
+
+    /* allocating is allowed with size specified only */
+    if ( count == 0 )
+    {
+        DOMPRINTF("%s: no block found, no size given,"
+                  " can't malloc (pfn 0x%" PRIpfn ")",
+                  __FUNCTION__, pfn);
+        return NULL;
+    }
+
+    /* not found, no overlap => allocate */
+    phys = xc_dom_malloc(dom, sizeof(*phys));
+    if ( phys == NULL )
+        return NULL;
+    memset(phys, 0, sizeof(*phys));
+    phys->first = pfn;
+    phys->count = count;
+
+    if ( dom->guest_domid )
+    {
+        mode = "domU mapping";
+        phys->ptr = xc_dom_boot_domU_map(dom, phys->first, phys->count);
+        if ( phys->ptr == NULL )
+            return NULL;
+        dom->alloc_domU_map += phys->count << page_shift;
+    }
+    else
+    {
+        int err;
+
+        mode = "anonymous memory";
+        phys->ptr = mmap(NULL, phys->count << page_shift,
+                         PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+                         -1, 0);
+        if ( phys->ptr == MAP_FAILED )
+        {
+            err = errno;
+            xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
+                         "%s: oom: can't allocate 0x%" PRIpfn " pages"
+                         " [mmap, errno=%i (%s)]",
+                         __FUNCTION__, count, err, strerror(err));
+            return NULL;
+        }
+        dom->alloc_mem_map += phys->count << page_shift;
+    }
+
+#if 1
+    DOMPRINTF("%s: %s: pfn 0x%" PRIpfn "+0x%" PRIpfn " at %p",
+              __FUNCTION__, mode, phys->first, phys->count, phys->ptr);
+#endif
+    phys->next = dom->phys_pages;
+    dom->phys_pages = phys;
+    return phys->ptr;
+}
+
+static int xc_dom_chk_alloc_pages(struct xc_dom_image *dom, char *name,
+                                  xen_pfn_t pages)
+{
+    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+
+    if ( pages > dom->total_pages || /* multiple test avoids overflow probs */
+         dom->pfn_alloc_end - dom->rambase_pfn > dom->total_pages ||
+         pages > dom->total_pages - dom->pfn_alloc_end + dom->rambase_pfn )
+    {
+        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
+                     "%s: segment %s too large (0x%"PRIpfn" > "
+                     "0x%"PRIpfn" - 0x%"PRIpfn" pages)", __FUNCTION__, name,
+                     pages, dom->total_pages,
+                     dom->pfn_alloc_end - dom->rambase_pfn);
+        return -1;
+    }
+
+    dom->pfn_alloc_end += pages;
+    dom->virt_alloc_end += pages * page_size;
+
+    if ( dom->allocate )
+        dom->allocate(dom);
+
+    return 0;
+}
+
+static int xc_dom_alloc_pad(struct xc_dom_image *dom, xen_vaddr_t boundary)
+{
+    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+    xen_pfn_t pages;
+
+    if ( boundary & (page_size - 1) )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: segment boundary isn't page aligned (0x%" PRIx64 ")",
+                     __FUNCTION__, boundary);
+        return -1;
+    }
+    if ( boundary < dom->virt_alloc_end )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: segment boundary too low (0x%" PRIx64 " < 0x%" PRIx64
+                     ")", __FUNCTION__, boundary, dom->virt_alloc_end);
+        return -1;
+    }
+    pages = (boundary - dom->virt_alloc_end) / page_size;
+
+    return xc_dom_chk_alloc_pages(dom, "padding", pages);
+}
+
+int xc_dom_alloc_segment(struct xc_dom_image *dom,
+                         struct xc_dom_seg *seg, char *name,
+                         xen_vaddr_t start, xen_vaddr_t size)
+{
+    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
+    xen_pfn_t pages;
+    void *ptr;
+
+    if ( start && xc_dom_alloc_pad(dom, start) )
+        return -1;
+
+    pages = (size + page_size - 1) / page_size;
+    start = dom->virt_alloc_end;
+
+    seg->pfn = dom->pfn_alloc_end;
+    seg->pages = pages;
+
+    if ( xc_dom_chk_alloc_pages(dom, name, pages) )
+        return -1;
+
+    /* map and clear pages */
+    ptr = xc_dom_seg_to_ptr(dom, seg);
+    if ( ptr == NULL )
+        return -1;
+    memset(ptr, 0, pages * page_size);
+
+    seg->vstart = start;
+    seg->vend = dom->virt_alloc_end;
+
+    DOMPRINTF("%-20s:   %-12s : 0x%" PRIx64 " -> 0x%" PRIx64
+              "  (pfn 0x%" PRIpfn " + 0x%" PRIpfn " pages)",
+              __FUNCTION__, name, seg->vstart, seg->vend, seg->pfn, pages);
+
+    return 0;
+}
+
+xen_pfn_t xc_dom_alloc_page(struct xc_dom_image *dom, char *name)
+{
+    xen_vaddr_t start;
+    xen_pfn_t pfn;
+
+    start = dom->virt_alloc_end;
+    pfn = dom->pfn_alloc_end - dom->rambase_pfn;
+
+    if ( xc_dom_chk_alloc_pages(dom, name, 1) )
+        return INVALID_PFN;
+
+    DOMPRINTF("%-20s:   %-12s : 0x%" PRIx64 " (pfn 0x%" PRIpfn ")",
+              __FUNCTION__, name, start, pfn);
+    return pfn;
+}
+
+void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn)
+{
+    unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom);
+    struct xc_dom_phys *phys, *prev = NULL;
+
+    for ( phys = dom->phys_pages; phys != NULL; phys = phys->next )
+    {
+        if ( (pfn >= phys->first) && (pfn < (phys->first + phys->count)) )
+            break;
+        prev = phys;
+    }
+    if ( !phys )
+    {
+        DOMPRINTF("%s: Huh? no mapping with pfn 0x%" PRIpfn "",
+                  __FUNCTION__, pfn);
+        return;
+    }
+
+    munmap(phys->ptr, phys->count << page_shift);
+    if ( prev )
+        prev->next = phys->next;
+    else
+        dom->phys_pages = phys->next;
+
+    xc_domain_cacheflush(dom->xch, dom->guest_domid, phys->first, phys->count);
+}
+
+void xc_dom_unmap_all(struct xc_dom_image *dom)
+{
+    while ( dom->phys_pages )
+        xc_dom_unmap_one(dom, dom->phys_pages->first);
+}
+
+/* ------------------------------------------------------------------------ */
+/* pluggable kernel loaders                                                 */
+
+static struct xc_dom_loader *first_loader = NULL;
+static struct xc_dom_arch *first_hook = NULL;
+
+void xc_dom_register_loader(struct xc_dom_loader *loader)
+{
+    loader->next = first_loader;
+    first_loader = loader;
+}
+
+static struct xc_dom_loader *xc_dom_find_loader(struct xc_dom_image *dom)
+{
+    struct xc_dom_loader *loader = first_loader;
+
+    while ( loader != NULL )
+    {
+        DOMPRINTF("%s: trying %s loader ... ", __FUNCTION__, loader->name);
+        if ( loader->probe(dom) == 0 )
+        {
+            DOMPRINTF("loader probe OK");
+            return loader;
+        }
+        DOMPRINTF("loader probe failed");
+        loader = loader->next;
+    }
+    xc_dom_panic(dom->xch,
+                 XC_INVALID_KERNEL, "%s: no loader found", __FUNCTION__);
+    return NULL;
+}
+
+void xc_dom_register_arch_hooks(struct xc_dom_arch *hooks)
+{
+    hooks->next = first_hook;
+    first_hook = hooks;
+}
+
+int xc_dom_set_arch_hooks(struct xc_dom_image *dom)
+{
+    struct xc_dom_arch *hooks = first_hook;
+
+    while (  hooks != NULL )
+    {
+        if ( !strcmp(hooks->guest_type, dom->guest_type) )
+        {
+            if ( hooks->arch_private_size )
+            {
+                dom->arch_private = malloc(hooks->arch_private_size);
+                if ( dom->arch_private == NULL )
+                    return -1;
+                memset(dom->arch_private, 0, hooks->arch_private_size);
+                dom->alloc_malloc += hooks->arch_private_size;
+            }
+            dom->arch_hooks = hooks;
+            return 0;
+        }
+        hooks = hooks->next;
+    }
+    xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                 "%s: not found (type %s)", __FUNCTION__, dom->guest_type);
+    return -1;
+}
+
+/* ------------------------------------------------------------------------ */
+/* public interface                                                         */
+
+void xc_dom_release(struct xc_dom_image *dom)
+{
+    DOMPRINTF_CALLED(dom->xch);
+    if ( dom->phys_pages )
+        xc_dom_unmap_all(dom);
+    xc_dom_free_all(dom);
+    free(dom->arch_private);
+    free(dom);
+}
+
+struct xc_dom_image *xc_dom_allocate(xc_interface *xch,
+                                     const char *cmdline, const char *features)
+{
+    struct xc_dom_image *dom;
+
+    xc_dom_printf(xch, "%s: cmdline=\"%s\", features=\"%s\"",
+                  __FUNCTION__, cmdline ? cmdline : "",
+                  features ? features : "");
+    dom = malloc(sizeof(*dom));
+    if ( !dom )
+        goto err;
+
+    memset(dom, 0, sizeof(*dom));
+    dom->xch = xch;
+
+    dom->max_kernel_size = XC_DOM_DECOMPRESS_MAX;
+    dom->max_module_size = XC_DOM_DECOMPRESS_MAX;
+    dom->max_devicetree_size = XC_DOM_DECOMPRESS_MAX;
+
+    if ( cmdline )
+        dom->cmdline = xc_dom_strdup(dom, cmdline);
+    if ( features )
+        elf_xen_parse_features(features, dom->f_requested, NULL);
+
+    dom->parms.virt_base = UNSET_ADDR;
+    dom->parms.virt_entry = UNSET_ADDR;
+    dom->parms.virt_hypercall = UNSET_ADDR;
+    dom->parms.virt_hv_start_low = UNSET_ADDR;
+    dom->parms.elf_paddr_offset = UNSET_ADDR;
+    dom->parms.p2m_base = UNSET_ADDR;
+
+    dom->flags = SIF_VIRT_P2M_4TOOLS;
+
+    dom->alloc_malloc += sizeof(*dom);
+    return dom;
+
+ err:
+    if ( dom )
+        xc_dom_release(dom);
+    return NULL;
+}
+
+int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz)
+{
+    DOMPRINTF("%s: kernel_max_size=%zx", __FUNCTION__, sz);
+    dom->max_kernel_size = sz;
+    return 0;
+}
+
+int xc_dom_module_max_size(struct xc_dom_image *dom, size_t sz)
+{
+    DOMPRINTF("%s: module_max_size=%zx", __FUNCTION__, sz);
+    dom->max_module_size = sz;
+    return 0;
+}
+
+int xc_dom_devicetree_max_size(struct xc_dom_image *dom, size_t sz)
+{
+    DOMPRINTF("%s: devicetree_max_size=%zx", __FUNCTION__, sz);
+    dom->max_devicetree_size = sz;
+    return 0;
+}
+
+int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename)
+{
+    DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename);
+    dom->kernel_blob = xc_dom_malloc_filemap(dom, filename, &dom->kernel_size,
+                                             dom->max_kernel_size);
+    if ( dom->kernel_blob == NULL )
+        return -1;
+    return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
+}
+
+int xc_dom_module_file(struct xc_dom_image *dom, const char *filename, const char *cmdline)
+{
+    unsigned int mod = dom->num_modules++;
+
+    DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename);
+    dom->modules[mod].blob =
+        xc_dom_malloc_filemap(dom, filename, &dom->modules[mod].size,
+                              dom->max_module_size);
+
+    if ( dom->modules[mod].blob == NULL )
+        return -1;
+
+    if ( cmdline )
+    {
+        dom->modules[mod].cmdline = xc_dom_strdup(dom, cmdline);
+
+        if ( dom->modules[mod].cmdline == NULL )
+            return -1;
+    }
+    else
+    {
+        dom->modules[mod].cmdline = NULL;
+    }
+
+    return 0;
+}
+
+int xc_dom_devicetree_file(struct xc_dom_image *dom, const char *filename)
+{
+#if defined (__arm__) || defined(__aarch64__)
+    DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename);
+    dom->devicetree_blob =
+        xc_dom_malloc_filemap(dom, filename, &dom->devicetree_size,
+                              dom->max_devicetree_size);
+
+    if ( dom->devicetree_blob == NULL )
+        return -1;
+    return 0;
+#else
+    errno = -EINVAL;
+    return -1;
+#endif
+}
+
+int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem, size_t memsize)
+{
+    DOMPRINTF_CALLED(dom->xch);
+    dom->kernel_blob = (void *)mem;
+    dom->kernel_size = memsize;
+    return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
+}
+
+int xc_dom_module_mem(struct xc_dom_image *dom, const void *mem,
+                      size_t memsize, const char *cmdline)
+{
+    unsigned int mod = dom->num_modules++;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    dom->modules[mod].blob = (void *)mem;
+    dom->modules[mod].size = memsize;
+
+    if ( cmdline )
+    {
+        dom->modules[mod].cmdline = xc_dom_strdup(dom, cmdline);
+
+        if ( dom->modules[mod].cmdline == NULL )
+            return -1;
+    }
+    else
+    {
+        dom->modules[mod].cmdline = NULL;
+    }
+
+    return 0;
+}
+
+int xc_dom_devicetree_mem(struct xc_dom_image *dom, const void *mem,
+                          size_t memsize)
+{
+    DOMPRINTF_CALLED(dom->xch);
+    dom->devicetree_blob = (void *)mem;
+    dom->devicetree_size = memsize;
+    return 0;
+}
+
+int xc_dom_parse_image(struct xc_dom_image *dom)
+{
+    int i;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    /* parse kernel image */
+    dom->kernel_loader = xc_dom_find_loader(dom);
+    if ( dom->kernel_loader == NULL )
+        goto err;
+    if ( dom->kernel_loader->parser(dom) != 0 )
+        goto err;
+    if ( dom->guest_type == NULL )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: guest_type not set", __FUNCTION__);
+        goto err;
+    }
+
+    /* check features */
+    for ( i = 0; i < XENFEAT_NR_SUBMAPS; i++ )
+    {
+        dom->f_active[i] |= dom->f_requested[i]; /* cmd line */
+        dom->f_active[i] |= dom->parms.f_required[i]; /* kernel   */
+        if ( (dom->f_active[i] & dom->parms.f_supported[i]) !=
+             dom->f_active[i] )
+        {
+            xc_dom_panic(dom->xch, XC_INVALID_PARAM,
+                         "%s: unsupported feature requested", __FUNCTION__);
+            goto err;
+        }
+    }
+    return 0;
+
+ err:
+    return -1;
+}
+
+int xc_dom_rambase_init(struct xc_dom_image *dom, uint64_t rambase)
+{
+    dom->rambase_pfn = rambase >> XC_PAGE_SHIFT;
+    dom->pfn_alloc_end = dom->rambase_pfn;
+    DOMPRINTF("%s: RAM starts at %"PRI_xen_pfn,
+              __FUNCTION__, dom->rambase_pfn);
+    return 0;
+}
+
+int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb)
+{
+    unsigned int page_shift;
+    xen_pfn_t nr_pages;
+
+    if ( xc_dom_set_arch_hooks(dom) )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, "%s: arch hooks not set",
+                     __FUNCTION__);
+        return -1;
+    }
+
+    page_shift = XC_DOM_PAGE_SHIFT(dom);
+    nr_pages = mem_mb << (20 - page_shift);
+
+    DOMPRINTF("%s: mem %d MB, pages 0x%" PRIpfn " pages, %dk each",
+               __FUNCTION__, mem_mb, nr_pages, 1 << (page_shift-10));
+    dom->total_pages = nr_pages;
+
+    DOMPRINTF("%s: 0x%" PRIpfn " pages",
+              __FUNCTION__, dom->total_pages);
+
+    return 0;
+}
+
+static int xc_dom_build_module(struct xc_dom_image *dom, unsigned int mod)
+{
+    size_t unziplen, modulelen;
+    void *modulemap;
+    char name[10];
+
+    if ( !dom->modules[mod].seg.vstart )
+        unziplen = xc_dom_check_gzip(dom->xch,
+                                     dom->modules[mod].blob, dom->modules[mod].size);
+    else
+        unziplen = 0;
+
+    modulelen = max(unziplen, dom->modules[mod].size);
+    if ( dom->max_module_size )
+    {
+        if ( unziplen && modulelen > dom->max_module_size )
+        {
+            modulelen = min(unziplen, dom->modules[mod].size);
+            if ( unziplen > modulelen )
+                unziplen = 0;
+        }
+        if ( modulelen > dom->max_module_size )
+        {
+            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                         "module %u image too large", mod);
+            goto err;
+        }
+    }
+
+    snprintf(name, sizeof(name), "module%u", mod);
+    if ( xc_dom_alloc_segment(dom, &dom->modules[mod].seg, name,
+                              dom->modules[mod].seg.vstart, modulelen) != 0 )
+        goto err;
+    modulemap = xc_dom_seg_to_ptr(dom, &dom->modules[mod].seg);
+    if ( modulemap == NULL )
+    {
+        DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->modules[%u].seg) => NULL",
+                  __FUNCTION__, mod);
+        goto err;
+    }
+    if ( unziplen )
+    {
+        if ( xc_dom_do_gunzip(dom->xch, dom->modules[mod].blob, dom->modules[mod].size,
+                              modulemap, unziplen) != -1 )
+            return 0;
+        if ( dom->modules[mod].size > modulelen )
+            goto err;
+    }
+
+    /* Fall back to handing over the raw blob. */
+    memcpy(modulemap, dom->modules[mod].blob, dom->modules[mod].size);
+    /* If an unzip attempt was made, the buffer may no longer be all zero. */
+    if ( unziplen > dom->modules[mod].size )
+        memset(modulemap + dom->modules[mod].size, 0,
+               unziplen - dom->modules[mod].size);
+
+    return 0;
+
+ err:
+    return -1;
+}
+
+static int populate_acpi_pages(struct xc_dom_image *dom,
+                               xen_pfn_t *extents,
+                               unsigned int num_pages)
+{
+    int rc;
+    xc_interface *xch = dom->xch;
+    uint32_t domid = dom->guest_domid;
+    unsigned long idx;
+    unsigned long first_high_idx = 4UL << (30 - PAGE_SHIFT); /* 4GB */
+
+    for ( ; num_pages; num_pages--, extents++ )
+    {
+
+        if ( xc_domain_populate_physmap(xch, domid, 1, 0, 0, extents) == 1 )
+            continue;
+
+        if ( dom->highmem_end )
+        {
+            idx = --dom->highmem_end;
+            if ( idx == first_high_idx )
+                dom->highmem_end = 0;
+        }
+        else
+        {
+            idx = --dom->lowmem_end;
+        }
+
+        rc = xc_domain_add_to_physmap(xch, domid,
+                                      XENMAPSPACE_gmfn,
+                                      idx, *extents);
+        if ( rc )
+            return rc;
+    }
+
+    return 0;
+}
+
+static int xc_dom_load_acpi(struct xc_dom_image *dom)
+{
+    int j, i = 0;
+    unsigned num_pages;
+    xen_pfn_t *extents, base;
+    void *ptr;
+
+    while ( (i < MAX_ACPI_MODULES) && dom->acpi_modules[i].length )
+    {
+        DOMPRINTF("%s: %d bytes at address %" PRIx64, __FUNCTION__,
+                  dom->acpi_modules[i].length,
+                  dom->acpi_modules[i].guest_addr_out);
+
+        num_pages = (dom->acpi_modules[i].length +
+                     (dom->acpi_modules[i].guest_addr_out & ~XC_PAGE_MASK) +
+                     (XC_PAGE_SIZE - 1)) >> XC_PAGE_SHIFT;
+        extents = malloc(num_pages * sizeof(*extents));
+        if ( !extents )
+        {
+            DOMPRINTF("%s: Out of memory", __FUNCTION__);
+            goto err;
+        }
+
+        base = dom->acpi_modules[i].guest_addr_out >> XC_PAGE_SHIFT;
+        for ( j = 0; j < num_pages; j++ )
+            extents[j] = base + j;
+        if ( populate_acpi_pages(dom, extents, num_pages) )
+        {
+            DOMPRINTF("%s: Can populate ACPI pages", __FUNCTION__);
+            goto err;
+        }
+
+        ptr = xc_map_foreign_range(dom->xch, dom->guest_domid,
+                                   XC_PAGE_SIZE * num_pages,
+                                   PROT_READ | PROT_WRITE, base);
+        if ( !ptr )
+        {
+            DOMPRINTF("%s: Can't map %d pages at 0x%"PRI_xen_pfn,
+                      __FUNCTION__, num_pages, base);
+            goto err;
+        }
+
+        memcpy((uint8_t *)ptr +
+               (dom->acpi_modules[i].guest_addr_out & ~XC_PAGE_MASK),
+               dom->acpi_modules[i].data, dom->acpi_modules[i].length);
+        munmap(ptr, XC_PAGE_SIZE * num_pages);
+
+        free(extents);
+        i++;
+    }
+
+    return 0;
+
+err:
+    free(extents);
+    return -1;
+}
+
+int xc_dom_build_image(struct xc_dom_image *dom)
+{
+    unsigned int page_size;
+    bool unmapped_initrd;
+    unsigned int mod;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    /* check for arch hooks */
+    if ( dom->arch_hooks == NULL )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, "%s: arch hooks not set",
+                     __FUNCTION__);
+        goto err;
+    }
+    page_size = XC_DOM_PAGE_SIZE(dom);
+    if ( dom->parms.virt_base != UNSET_ADDR )
+        dom->virt_alloc_end = dom->parms.virt_base;
+
+    /* load kernel */
+    if ( xc_dom_alloc_segment(dom, &dom->kernel_seg, "kernel",
+                              dom->kernel_seg.vstart,
+                              dom->kernel_seg.vend -
+                              dom->kernel_seg.vstart) != 0 )
+        goto err;
+    if ( dom->kernel_loader->loader(dom) != 0 )
+        goto err;
+
+    /* Don't load ramdisk / other modules now if no initial mapping required. */
+    for ( mod = 0; mod < dom->num_modules; mod++ )
+    {
+        unmapped_initrd = (dom->parms.unmapped_initrd &&
+                           !dom->modules[mod].seg.vstart);
+
+        if ( dom->modules[mod].blob && !unmapped_initrd )
+        {
+            if ( xc_dom_build_module(dom, mod) != 0 )
+                goto err;
+
+            if ( mod == 0 )
+            {
+                dom->initrd_start = dom->modules[mod].seg.vstart;
+                dom->initrd_len =
+                    dom->modules[mod].seg.vend - dom->modules[mod].seg.vstart;
+            }
+        }
+    }
+
+    /* load devicetree */
+    if ( dom->devicetree_blob )
+    {
+        void *devicetreemap;
+
+        if ( xc_dom_alloc_segment(dom, &dom->devicetree_seg, "devicetree",
+                                  dom->devicetree_seg.vstart,
+                                  dom->devicetree_size) != 0 )
+            goto err;
+        devicetreemap = xc_dom_seg_to_ptr(dom, &dom->devicetree_seg);
+        if ( devicetreemap == NULL )
+        {
+            DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->devicetree_seg) => NULL",
+                      __FUNCTION__);
+            goto err;
+        }
+        memcpy(devicetreemap, dom->devicetree_blob, dom->devicetree_size);
+    }
+
+    /* load ACPI tables */
+    if ( xc_dom_load_acpi(dom) != 0 )
+        goto err;
+
+    /* allocate other pages */
+    if ( !dom->arch_hooks->p2m_base_supported ||
+         dom->parms.p2m_base >= dom->parms.virt_base ||
+         (dom->parms.p2m_base & (XC_DOM_PAGE_SIZE(dom) - 1)) )
+        dom->parms.p2m_base = UNSET_ADDR;
+    if ( dom->arch_hooks->alloc_p2m_list && dom->parms.p2m_base == UNSET_ADDR &&
+         dom->arch_hooks->alloc_p2m_list(dom) != 0 )
+        goto err;
+    if ( dom->arch_hooks->alloc_magic_pages(dom) != 0 )
+        goto err;
+    if ( dom->arch_hooks->alloc_pgtables &&
+         dom->arch_hooks->alloc_pgtables(dom) != 0 )
+        goto err;
+    if ( dom->alloc_bootstack )
+    {
+        dom->bootstack_pfn = xc_dom_alloc_page(dom, "boot stack");
+        if ( dom->bootstack_pfn == INVALID_PFN )
+            goto err;
+    }
+
+    DOMPRINTF("%-20s: virt_alloc_end : 0x%" PRIx64 "",
+              __FUNCTION__, dom->virt_alloc_end);
+    DOMPRINTF("%-20s: virt_pgtab_end : 0x%" PRIx64 "",
+              __FUNCTION__, dom->virt_pgtab_end);
+
+    /* Make sure all memory mapped by initial page tables is available */
+    if ( dom->virt_pgtab_end && xc_dom_alloc_pad(dom, dom->virt_pgtab_end) )
+        return -1;
+
+    for ( mod = 0; mod < dom->num_modules; mod++ )
+    {
+        unmapped_initrd = (dom->parms.unmapped_initrd &&
+                           !dom->modules[mod].seg.vstart);
+
+        /* Load ramdisk / other modules if no initial mapping required. */
+        if ( dom->modules[mod].blob && unmapped_initrd )
+        {
+            if ( xc_dom_build_module(dom, mod) != 0 )
+                goto err;
+
+            if ( mod == 0 )
+            {
+                dom->flags |= SIF_MOD_START_PFN;
+                dom->initrd_start = dom->modules[mod].seg.pfn;
+                dom->initrd_len = page_size * dom->modules[mod].seg.pages;
+            }
+        }
+    }
+
+    /* Allocate p2m list if outside of initial kernel mapping. */
+    if ( dom->arch_hooks->alloc_p2m_list && dom->parms.p2m_base != UNSET_ADDR )
+    {
+        if ( dom->arch_hooks->alloc_p2m_list(dom) != 0 )
+            goto err;
+        dom->p2m_seg.vstart = dom->parms.p2m_base;
+    }
+
+    return 0;
+
+ err:
+    return -1;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_dom_decompress.h b/tools/libs/guest/xg_dom_decompress.h
new file mode 100644 (file)
index 0000000..c5ab2e5
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef __MINIOS__
+# include "xenctrl_dom.h"
+#else
+# include "xg_dom_decompress_unsafe.h"
+#endif
+
+int xc_try_lz4_decode(struct xc_dom_image *dom, void **blob, size_t *size);
+
diff --git a/tools/libs/guest/xg_dom_decompress_lz4.c b/tools/libs/guest/xg_dom_decompress_lz4.c
new file mode 100644 (file)
index 0000000..97ba620
--- /dev/null
@@ -0,0 +1,141 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <stdint.h>
+
+#include "xg_private.h"
+#include "xg_dom_decompress.h"
+
+#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+#define likely(a) a
+#define unlikely(a) a
+
+static inline uint_fast16_t le16_to_cpup(const unsigned char *buf)
+{
+    return buf[0] | (buf[1] << 8);
+}
+
+static inline uint_fast32_t le32_to_cpup(const unsigned char *buf)
+{
+    return le16_to_cpup(buf) | ((uint32_t)le16_to_cpup(buf + 2) << 16);
+}
+
+#include "../../xen/include/xen/lz4.h"
+#include "../../xen/common/decompress.h"
+
+#ifndef __MINIOS__
+
+#include "../../xen/common/lz4/decompress.c"
+
+#define ARCHIVE_MAGICNUMBER 0x184C2102
+
+int xc_try_lz4_decode(
+       struct xc_dom_image *dom, void **blob, size_t *psize)
+{
+       int ret = -1;
+       unsigned char *inp = *blob, *output, *outp;
+       ssize_t size = *psize - 4;
+       size_t out_len, dest_len, chunksize;
+       const char *msg;
+
+       if (size < 4) {
+               msg = "input too small";
+               goto exit_0;
+       }
+
+       out_len = get_unaligned_le32(inp + size);
+       if (xc_dom_kernel_check_size(dom, out_len)) {
+               msg = "Decompressed image too large";
+               goto exit_0;
+       }
+
+       output = malloc(out_len);
+       if (!output) {
+               msg = "Could not allocate output buffer";
+               goto exit_0;
+       }
+       outp = output;
+
+       chunksize = get_unaligned_le32(inp);
+       if (chunksize == ARCHIVE_MAGICNUMBER) {
+               inp += 4;
+               size -= 4;
+       } else {
+               msg = "invalid header";
+               goto exit_2;
+       }
+
+       for (;;) {
+               if (size < 4) {
+                       msg = "missing data";
+                       goto exit_2;
+               }
+               chunksize = get_unaligned_le32(inp);
+               if (chunksize == ARCHIVE_MAGICNUMBER) {
+                       inp += 4;
+                       size -= 4;
+                       continue;
+               }
+               inp += 4;
+               size -= 4;
+               if (chunksize > size) {
+                       msg = "insufficient input data";
+                       goto exit_2;
+               }
+
+               dest_len = out_len - (outp - output);
+               ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
+                               &dest_len);
+               if (ret < 0) {
+                       msg = "decoding failed";
+                       goto exit_2;
+               }
+
+               ret = -1;
+               outp += dest_len;
+               size -= chunksize;
+
+               if (size == 0)
+               {
+                       if ( xc_dom_register_external(dom, output, out_len) )
+                       {
+                               msg = "Error registering stream output";
+                               goto exit_2;
+                       }
+                       *blob = output;
+                       *psize = out_len;
+                       return 0;
+               }
+
+               if (size < 0) {
+                       msg = "data corrupted";
+                       goto exit_2;
+               }
+
+               inp += chunksize;
+       }
+
+exit_2:
+       free(output);
+exit_0:
+       DOMPRINTF("LZ4 decompression error: %s\n", msg);
+       return ret;
+}
+
+#else /* __MINIOS__ */
+
+#include "../../xen/common/unlz4.c"
+
+int xc_try_lz4_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    return xc_dom_decompress_unsafe(unlz4, dom, blob, size);
+}
+
+#endif
diff --git a/tools/libs/guest/xg_dom_decompress_unsafe.c b/tools/libs/guest/xg_dom_decompress_unsafe.c
new file mode 100644 (file)
index 0000000..21d9647
--- /dev/null
@@ -0,0 +1,48 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xg_dom_decompress_unsafe.h"
+
+static struct xc_dom_image *unsafe_dom;
+static unsigned char *output_blob;
+static unsigned int output_size;
+
+static void unsafe_error(const char *msg)
+{
+    xc_dom_panic(unsafe_dom->xch, XC_INVALID_KERNEL, "%s", msg);
+}
+
+static int unsafe_flush(void *src, unsigned int size)
+{
+    void *n = realloc(output_blob, output_size + size);
+    if (!n)
+        return -1;
+    output_blob = n;
+
+    memcpy(&output_blob[output_size], src, size);
+    output_size += size;
+    return size;
+}
+
+int xc_dom_decompress_unsafe(
+    decompress_fn fn, struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    int ret;
+
+    unsafe_dom = dom;
+    output_blob = NULL;
+    output_size = 0;
+
+    ret = fn(dom->kernel_blob, dom->kernel_size, NULL, unsafe_flush, NULL, NULL, unsafe_error);
+
+    if (ret)
+        free(output_blob);
+    else {
+        *blob = output_blob;
+        *size = output_size;
+    }
+
+    return ret;
+}
diff --git a/tools/libs/guest/xg_dom_decompress_unsafe.h b/tools/libs/guest/xg_dom_decompress_unsafe.h
new file mode 100644 (file)
index 0000000..fb84b6a
--- /dev/null
@@ -0,0 +1,20 @@
+#include "xenctrl_dom.h"
+
+typedef int decompress_fn(unsigned char *inbuf, unsigned int len,
+                          int (*fill)(void*, unsigned int),
+                          int (*flush)(void*, unsigned int),
+                          unsigned char *outbuf, unsigned int *posp,
+                          void (*error)(const char *x));
+
+int xc_dom_decompress_unsafe(
+    decompress_fn fn, struct xc_dom_image *dom, void **blob, size_t *size)
+    __attribute__((visibility("internal")));
+
+int xc_try_bzip2_decode(struct xc_dom_image *dom, void **blob, size_t *size)
+    __attribute__((visibility("internal")));
+int xc_try_lzma_decode(struct xc_dom_image *dom, void **blob, size_t *size)
+    __attribute__((visibility("internal")));
+int xc_try_lzo1x_decode(struct xc_dom_image *dom, void **blob, size_t *size)
+    __attribute__((visibility("internal")));
+int xc_try_xz_decode(struct xc_dom_image *dom, void **blob, size_t *size)
+    __attribute__((visibility("internal")));
diff --git a/tools/libs/guest/xg_dom_decompress_unsafe_bzip2.c b/tools/libs/guest/xg_dom_decompress_unsafe_bzip2.c
new file mode 100644 (file)
index 0000000..9d3709e
--- /dev/null
@@ -0,0 +1,14 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xg_dom_decompress_unsafe.h"
+
+#include "../../xen/common/bunzip2.c"
+
+int xc_try_bzip2_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    return xc_dom_decompress_unsafe(bunzip2, dom, blob, size);
+}
diff --git a/tools/libs/guest/xg_dom_decompress_unsafe_lzma.c b/tools/libs/guest/xg_dom_decompress_unsafe_lzma.c
new file mode 100644 (file)
index 0000000..5d178f0
--- /dev/null
@@ -0,0 +1,14 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xg_dom_decompress_unsafe.h"
+
+#include "../../xen/common/unlzma.c"
+
+int xc_try_lzma_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    return xc_dom_decompress_unsafe(unlzma, dom, blob, size);
+}
diff --git a/tools/libs/guest/xg_dom_decompress_unsafe_lzo1x.c b/tools/libs/guest/xg_dom_decompress_unsafe_lzo1x.c
new file mode 100644 (file)
index 0000000..a4f8ebd
--- /dev/null
@@ -0,0 +1,50 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <endian.h>
+#include <stdint.h>
+
+#include "xg_private.h"
+#include "xg_dom_decompress_unsafe.h"
+
+typedef uint8_t u8;
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint64_t u64;
+
+#define likely(a) a
+#define noinline
+#define unlikely(a) a
+
+static inline u16 be16_to_cpup(const u16 *p)
+{
+       u16 v = *p;
+#if BYTE_ORDER == LITTLE_ENDIAN
+       return (((v & 0x00ffU) << 8) |
+                ((v & 0xff00U) >> 8));
+#else
+       return v;
+#endif
+}
+
+static inline u32 be32_to_cpup(const u32 *p)
+{
+       u32 v = *p;
+#if BYTE_ORDER == LITTLE_ENDIAN
+       return (((v & 0x000000ffUL) << 24) |
+                ((v & 0x0000ff00UL) <<  8) |
+                ((v & 0x00ff0000UL) >>  8) |
+                ((v & 0xff000000UL) >> 24));
+#else
+       return v;
+#endif
+}
+
+#include "../../xen/common/lzo.c"
+#include "../../xen/common/unlzo.c"
+
+int xc_try_lzo1x_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    return xc_dom_decompress_unsafe(unlzo, dom, blob, size);
+}
diff --git a/tools/libs/guest/xg_dom_decompress_unsafe_xz.c b/tools/libs/guest/xg_dom_decompress_unsafe_xz.c
new file mode 100644 (file)
index 0000000..ff6824b
--- /dev/null
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <endian.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xg_dom_decompress_unsafe.h"
+
+// TODO
+#define XZ_DEC_X86
+
+typedef char bool_t;
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint32_t __le32;
+
+static inline u32 cpu_to_le32(const u32 v)
+{
+#if BYTE_ORDER == BIG_ENDIAN
+       return (((v & 0x000000ffUL) << 24) |
+               ((v & 0x0000ff00UL) <<  8) |
+               ((v & 0x00ff0000UL) >>  8) |
+               ((v & 0xff000000UL) >> 24));
+#else
+       return v;
+#endif
+}
+
+static inline u32 le32_to_cpup(const u32 *p)
+{
+       return cpu_to_le32(*p);
+}
+
+#define __force
+#define always_inline
+
+#include "../../xen/common/unxz.c"
+
+int xc_try_xz_decode(
+    struct xc_dom_image *dom, void **blob, size_t *size)
+{
+    return xc_dom_decompress_unsafe(unxz, dom, blob, size);
+}
diff --git a/tools/libs/guest/xg_dom_elfloader.c b/tools/libs/guest/xg_dom_elfloader.c
new file mode 100644 (file)
index 0000000..7043c3b
--- /dev/null
@@ -0,0 +1,249 @@
+/*
+ * Xen domain builder -- ELF bits.
+ *
+ * Parse and load ELF kernel images.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <inttypes.h>
+
+#include "xg_private.h"
+#include "xenctrl_dom.h"
+#include "xc_bitops.h"
+
+#define XEN_VER "xen-3.0"
+
+/* ------------------------------------------------------------------------ */
+
+static void log_callback(struct elf_binary *elf, void *caller_data,
+                         bool iserr, const char *fmt, va_list al) {
+    xc_interface *xch = caller_data;
+
+    xc_reportv(xch,
+          xch->dombuild_logger ? xch->dombuild_logger : xch->error_handler,
+                       iserr ? XTL_ERROR : XTL_DETAIL,
+                       iserr ? XC_INVALID_KERNEL : XC_ERROR_NONE,
+                       fmt, al);
+}
+
+void xc_elf_set_logfile(xc_interface *xch, struct elf_binary *elf,
+                        int verbose) {
+    elf_set_log(elf, log_callback, xch, verbose /* convert to bool */);
+}
+
+/* ------------------------------------------------------------------------ */
+
+static char *xc_dom_guest_type(struct xc_dom_image *dom,
+                               struct elf_binary *elf)
+{
+    uint64_t machine = elf_uval(elf, elf->ehdr, e_machine);
+
+    if ( dom->container_type == XC_DOM_HVM_CONTAINER &&
+         dom->parms.phys_entry != UNSET_ADDR32 )
+        return "hvm-3.0-x86_32";
+    if ( dom->container_type == XC_DOM_HVM_CONTAINER )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                     "%s: image not capable of booting inside a HVM container",
+                     __FUNCTION__);
+        return NULL;
+    }
+
+    switch ( machine )
+    {
+    case EM_386:
+        switch ( dom->parms.pae )
+        {
+        case XEN_PAE_BIMODAL:
+            if ( strstr(dom->xen_caps, "xen-3.0-x86_32p") )
+                return "xen-3.0-x86_32p";
+            return "xen-3.0-x86_32";
+        case XEN_PAE_EXTCR3:
+        case XEN_PAE_YES:
+            return "xen-3.0-x86_32p";
+        case XEN_PAE_NO:
+        default:
+            return "xen-3.0-x86_32";
+        }
+    case EM_X86_64:
+        return "xen-3.0-x86_64";
+    default:
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                     "%s: unknown image type %"PRIu64,
+                     __FUNCTION__, machine);
+        return NULL;
+    }
+}
+
+/* ------------------------------------------------------------------------ */
+/* parse elf binary                                                         */
+
+static elf_negerrnoval check_elf_kernel(struct xc_dom_image *dom, bool verbose)
+{
+    if ( dom->kernel_blob == NULL )
+    {
+        if ( verbose )
+            xc_dom_panic(dom->xch,
+                         XC_INTERNAL_ERROR, "%s: no kernel image loaded",
+                         __FUNCTION__);
+        return -EINVAL;
+    }
+
+    if ( !elf_is_elfbinary(dom->kernel_blob, dom->kernel_size) )
+    {
+        if ( verbose )
+            xc_dom_panic(dom->xch,
+                         XC_INVALID_KERNEL, "%s: kernel is not an ELF image",
+                         __FUNCTION__);
+        return -EINVAL;
+    }
+    return 0;
+}
+
+static elf_negerrnoval xc_dom_probe_elf_kernel(struct xc_dom_image *dom)
+{
+    struct elf_binary elf;
+    int rc;
+
+    rc = check_elf_kernel(dom, 0);
+    if ( rc != 0 )
+        return rc;
+
+    rc = elf_init(&elf, dom->kernel_blob, dom->kernel_size);
+    if ( rc != 0 )
+        return rc;
+
+    /*
+     * We need to check that it contains Xen ELFNOTES,
+     * or else we might be trying to load a plain ELF.
+     */
+    elf_parse_binary(&elf);
+    rc = elf_xen_parse(&elf, &dom->parms);
+    if ( rc != 0 )
+        return rc;
+
+    return 0;
+}
+
+static elf_negerrnoval xc_dom_parse_elf_kernel(struct xc_dom_image *dom)
+{
+    struct elf_binary *elf;
+    elf_negerrnoval rc;
+
+    rc = check_elf_kernel(dom, 1);
+    if ( rc != 0 )
+        return rc;
+
+    elf = xc_dom_malloc(dom, sizeof(*elf));
+    if ( elf == NULL )
+        return -ENOMEM;
+    dom->private_loader = elf;
+    rc = elf_init(elf, dom->kernel_blob, dom->kernel_size) != 0 ? -EINVAL : 0;
+    xc_elf_set_logfile(dom->xch, elf, 1);
+    if ( rc != 0 )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: corrupted ELF image",
+                     __FUNCTION__);
+        return rc;
+    }
+
+    /* parse binary and get xen meta info */
+    elf_parse_binary(elf);
+    if ( elf_xen_parse(elf, &dom->parms) != 0 )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    if ( elf_xen_feature_get(XENFEAT_dom0, dom->parms.f_required) )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Kernel does not"
+                     " support unprivileged (DomU) operation", __FUNCTION__);
+        rc = -EINVAL;
+        goto out;
+    }
+
+    /* find kernel segment */
+    dom->kernel_seg.vstart = dom->parms.virt_kstart;
+    dom->kernel_seg.vend   = dom->parms.virt_kend;
+
+    dom->guest_type = xc_dom_guest_type(dom, elf);
+    if ( dom->guest_type == NULL )
+        return -EINVAL;
+    DOMPRINTF("%s: %s: 0x%" PRIx64 " -> 0x%" PRIx64 "",
+              __FUNCTION__, dom->guest_type,
+              dom->kernel_seg.vstart, dom->kernel_seg.vend);
+    rc = 0;
+out:
+    if ( elf_check_broken(elf) )
+        DOMPRINTF("%s: ELF broken: %s", __FUNCTION__,
+                  elf_check_broken(elf));
+
+    return rc;
+}
+
+static elf_errorstatus xc_dom_load_elf_kernel(struct xc_dom_image *dom)
+{
+    struct elf_binary *elf = dom->private_loader;
+    elf_errorstatus rc;
+    xen_pfn_t pages;
+
+    elf->dest_base = xc_dom_seg_to_ptr_pages(dom, &dom->kernel_seg, &pages);
+    if ( elf->dest_base == NULL )
+    {
+        DOMPRINTF("%s: xc_dom_vaddr_to_ptr(dom,dom->kernel_seg)"
+                  " => NULL", __FUNCTION__);
+        return -1;
+    }
+    elf->dest_size = pages * XC_DOM_PAGE_SIZE(dom);
+
+    rc = elf_load_binary(elf);
+    if ( rc < 0 )
+    {
+        DOMPRINTF("%s: failed to load elf binary", __FUNCTION__);
+        return rc;
+    }
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+struct xc_dom_loader elf_loader = {
+    .name = "ELF-generic",
+    .probe = xc_dom_probe_elf_kernel,
+    .parser = xc_dom_parse_elf_kernel,
+    .loader = xc_dom_load_elf_kernel,
+};
+
+static void __init register_loader(void)
+{
+    xc_dom_register_loader(&elf_loader);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_dom_hvmloader.c b/tools/libs/guest/xg_dom_hvmloader.c
new file mode 100644 (file)
index 0000000..995a0f3
--- /dev/null
@@ -0,0 +1,264 @@
+/*
+ * Xen domain builder -- HVM specific bits.
+ *
+ * Parse and load ELF firmware images for HVM domains.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include "xg_private.h"
+#include "xenctrl_dom.h"
+#include "xc_bitops.h"
+
+/* ------------------------------------------------------------------------ */
+/* parse elf binary                                                         */
+
+static elf_negerrnoval check_elf_kernel(struct xc_dom_image *dom, bool verbose)
+{
+    if ( dom->kernel_blob == NULL )
+    {
+        if ( verbose )
+            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                         "%s: no kernel image loaded", __func__);
+        return -EINVAL;
+    }
+
+    if ( !elf_is_elfbinary(dom->kernel_blob, dom->kernel_size) )
+    {
+        if ( verbose )
+            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
+                         "%s: kernel is not an ELF image", __func__);
+        return -EINVAL;
+    }
+    return 0;
+}
+
+static elf_negerrnoval xc_dom_probe_hvm_kernel(struct xc_dom_image *dom)
+{
+    struct elf_binary elf;
+    int rc;
+
+    /* This loader is designed for HVM guest firmware. */
+    if ( dom->container_type != XC_DOM_HVM_CONTAINER )
+        return -EINVAL;
+
+    rc = check_elf_kernel(dom, 0);
+    if ( rc != 0 )
+        return rc;
+
+    rc = elf_init(&elf, dom->kernel_blob, dom->kernel_size);
+    if ( rc != 0 )
+        return rc;
+
+    /*
+     * We need to check that there are no Xen ELFNOTES, or
+     * else we might be trying to load a PV kernel.
+     */
+    elf_parse_binary(&elf);
+    rc = elf_xen_parse(&elf, &dom->parms);
+    if ( rc == 0 )
+        return -EINVAL;
+
+    return 0;
+}
+
+static elf_errorstatus xc_dom_parse_hvm_kernel(struct xc_dom_image *dom)
+    /*
+     * This function sometimes returns -1 for error and sometimes
+     * an errno value.  ?!?!
+     */
+{
+    struct elf_binary *elf;
+    elf_errorstatus rc;
+
+    rc = check_elf_kernel(dom, 1);
+    if ( rc != 0 )
+        return rc;
+
+    elf = xc_dom_malloc(dom, sizeof(*elf));
+    if ( elf == NULL )
+        return -1;
+    dom->private_loader = elf;
+    rc = elf_init(elf, dom->kernel_blob, dom->kernel_size);
+    xc_elf_set_logfile(dom->xch, elf, 1);
+    if ( rc != 0 )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: corrupted ELF image",
+                     __func__);
+        return rc;
+    }
+
+    if ( !elf_32bit(elf) )
+    {
+        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: ELF image is not 32bit",
+                     __func__);
+        return -EINVAL;
+    }
+
+    /* parse binary and get xen meta info */
+    elf_parse_binary(elf);
+
+    /* find kernel segment */
+    dom->kernel_seg.vstart = elf->pstart;
+    dom->kernel_seg.vend   = elf->pend;
+
+    dom->guest_type = "hvm-3.0-x86_32";
+
+    if ( elf_check_broken(elf) )
+        DOMPRINTF("%s: ELF broken: %s", __func__, elf_check_broken(elf));
+
+    return rc;
+}
+
+static int module_init_one(struct xc_dom_image *dom,
+                           struct xc_hvm_firmware_module *module,
+                           char *name)
+{
+    struct xc_dom_seg seg;
+    void *dest;
+
+    if ( module->length && !module->guest_addr_out )
+    {
+        if ( xc_dom_alloc_segment(dom, &seg, name, 0, module->length) )
+            goto err;
+        dest = xc_dom_seg_to_ptr(dom, &seg);
+        if ( dest == NULL )
+        {
+            DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &seg) => NULL",
+                      __FUNCTION__);
+            goto err;
+        }
+        memcpy(dest, module->data, module->length);
+        module->guest_addr_out = seg.vstart;
+
+        assert(dom->mmio_start > 0 && dom->mmio_start < UINT32_MAX);
+        if ( module->guest_addr_out > dom->mmio_start ||
+             module->guest_addr_out + module->length > dom->mmio_start )
+        {
+            DOMPRINTF("%s: Module %s would be loaded abrove 4GB",
+                      __FUNCTION__, name);
+            goto err;
+        }
+    }
+
+    return 0;
+err:
+    return -1;
+}
+
+static int modules_init(struct xc_dom_image *dom)
+{
+    int rc;
+
+    rc = module_init_one(dom, &dom->system_firmware_module,
+                         "System Firmware module");
+    if ( rc ) goto err;
+    /* Only one module can be added */
+    rc = module_init_one(dom, &dom->acpi_modules[0], "ACPI module");
+    if ( rc ) goto err;
+    rc = module_init_one(dom, &dom->smbios_module, "SMBIOS module");
+    if ( rc ) goto err;
+
+    return 0;
+err:
+    return -1;
+}
+
+static elf_errorstatus xc_dom_load_hvm_kernel(struct xc_dom_image *dom)
+{
+    struct elf_binary *elf = dom->private_loader;
+    privcmd_mmap_entry_t *entries = NULL;
+    size_t pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
+    elf_errorstatus rc;
+    int i;
+
+    /* Map address space for initial elf image. */
+    entries = calloc(pages, sizeof(privcmd_mmap_entry_t));
+    if ( entries == NULL )
+        return -ENOMEM;
+
+    for ( i = 0; i < pages; i++ )
+        entries[i].mfn = (elf->pstart >> PAGE_SHIFT) + i;
+
+    elf->dest_base = xc_map_foreign_ranges(
+        dom->xch, dom->guest_domid, pages << PAGE_SHIFT,
+        PROT_READ | PROT_WRITE, 1 << PAGE_SHIFT,
+        entries, pages);
+    if ( elf->dest_base == NULL )
+    {
+        DOMPRINTF("%s: unable to map guest memory space", __func__);
+        rc = -EFAULT;
+        goto error;
+    }
+
+    elf->dest_size = pages * XC_DOM_PAGE_SIZE(dom);
+
+    rc = elf_load_binary(elf);
+    if ( rc < 0 )
+    {
+        DOMPRINTF("%s: failed to load elf binary", __func__);
+        goto error;
+    }
+
+    munmap(elf->dest_base, elf->dest_size);
+
+    rc = modules_init(dom);
+    if ( rc != 0 )
+    {
+        DOMPRINTF("%s: unable to load modules.", __func__);
+        goto error;
+    }
+
+    dom->parms.phys_entry = elf_uval(elf, elf->ehdr, e_entry);
+
+    free(entries);
+    return 0;
+
+ error:
+    assert(rc != 0);
+    free(entries);
+    return rc;
+}
+
+/* ------------------------------------------------------------------------ */
+
+struct xc_dom_loader hvm_loader = {
+    .name = "HVM-generic",
+    .probe = xc_dom_probe_hvm_kernel,
+    .parser = xc_dom_parse_hvm_kernel,
+    .loader = xc_dom_load_hvm_kernel,
+};
+
+static void __init register_loader(void)
+{
+    xc_dom_register_loader(&hvm_loader);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_dom_x86.c b/tools/libs/guest/xg_dom_x86.c
new file mode 100644 (file)
index 0000000..842dbcc
--- /dev/null
@@ -0,0 +1,1945 @@
+/*
+ * Xen domain builder -- i386 and x86_64 bits.
+ *
+ * Most architecture-specific code for x86 goes here.
+ *   - prepare page tables.
+ *   - fill architecture-specific structs.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include <xen/xen.h>
+#include <xen/foreign/x86_32.h>
+#include <xen/foreign/x86_64.h>
+#include <xen/hvm/hvm_info_table.h>
+#include <xen/arch-x86/hvm/start_info.h>
+#include <xen/io/protocols.h>
+
+#include <xen-tools/libs.h>
+
+#include "xg_private.h"
+#include "xenctrl_dom.h"
+#include "xenctrl.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define SUPERPAGE_BATCH_SIZE 512
+
+#define SUPERPAGE_2MB_SHIFT   9
+#define SUPERPAGE_2MB_NR_PFNS (1UL << SUPERPAGE_2MB_SHIFT)
+#define SUPERPAGE_1GB_SHIFT   18
+#define SUPERPAGE_1GB_NR_PFNS (1UL << SUPERPAGE_1GB_SHIFT)
+
+#define X86_CR0_PE 0x01
+#define X86_CR0_ET 0x10
+
+#define X86_DR6_DEFAULT 0xffff0ff0u
+#define X86_DR7_DEFAULT 0x00000400u
+
+#define MTRR_TYPE_WRBACK     6
+#define MTRR_DEF_TYPE_ENABLE (1u << 11)
+
+#define SPECIALPAGE_PAGING   0
+#define SPECIALPAGE_ACCESS   1
+#define SPECIALPAGE_SHARING  2
+#define SPECIALPAGE_BUFIOREQ 3
+#define SPECIALPAGE_XENSTORE 4
+#define SPECIALPAGE_IOREQ    5
+#define SPECIALPAGE_IDENT_PT 6
+#define SPECIALPAGE_CONSOLE  7
+#define special_pfn(x) \
+    (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES + (x))
+
+#define NR_IOREQ_SERVER_PAGES 8
+#define ioreq_server_pfn(x) (special_pfn(0) - NR_IOREQ_SERVER_PAGES + (x))
+
+#define bits_to_mask(bits)       (((xen_vaddr_t)1 << (bits))-1)
+#define round_down(addr, mask)   ((addr) & ~(mask))
+#define round_up(addr, mask)     ((addr) | (mask))
+#define round_pg_up(addr)  (((addr) + PAGE_SIZE_X86 - 1) & ~(PAGE_SIZE_X86 - 1))
+
+#define HVMLOADER_MODULE_MAX_COUNT 2
+#define HVMLOADER_MODULE_CMDLINE_SIZE MAX_GUEST_CMDLINE
+
+struct xc_dom_params {
+    unsigned levels;
+    xen_vaddr_t vaddr_mask;
+    x86_pgentry_t lvl_prot[4];
+};
+
+struct xc_dom_x86_mapping_lvl {
+    xen_vaddr_t from;
+    xen_vaddr_t to;
+    xen_pfn_t pfn;
+    unsigned int pgtables;
+};
+
+struct xc_dom_x86_mapping {
+    struct xc_dom_x86_mapping_lvl area;
+    struct xc_dom_x86_mapping_lvl lvls[4];
+};
+
+struct xc_dom_image_x86 {
+    unsigned n_mappings;
+#define MAPPING_MAX 2
+    struct xc_dom_x86_mapping maps[MAPPING_MAX];
+    const struct xc_dom_params *params;
+
+    /* PV: Pointer to the in-guest P2M. */
+    void *p2m_guest;
+};
+
+/* get guest IO ABI protocol */
+const char *xc_domain_get_native_protocol(xc_interface *xch,
+                                          uint32_t domid)
+{
+    int ret;
+    uint32_t guest_width;
+    const char *protocol;
+
+    ret = xc_domain_get_guest_width(xch, domid, &guest_width);
+
+    if ( ret )
+        return NULL;
+
+    switch (guest_width) {
+    case 4: /* 32 bit guest */
+        protocol = XEN_IO_PROTO_ABI_X86_32;
+        break;
+    case 8: /* 64 bit guest */
+        protocol = XEN_IO_PROTO_ABI_X86_64;
+        break;
+    default:
+        protocol = NULL;
+    }
+
+    return protocol;
+}
+
+static int count_pgtables(struct xc_dom_image *dom, xen_vaddr_t from,
+                          xen_vaddr_t to, xen_pfn_t pfn)
+{
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+    struct xc_dom_x86_mapping *map, *map_cmp;
+    xen_pfn_t pfn_end;
+    xen_vaddr_t mask;
+    unsigned bits;
+    int l, m;
+
+    if ( domx86->n_mappings == MAPPING_MAX )
+    {
+        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
+                     "%s: too many mappings\n", __FUNCTION__);
+        return -ENOMEM;
+    }
+    map = domx86->maps + domx86->n_mappings;
+
+    pfn_end = pfn + ((to - from) >> PAGE_SHIFT_X86);
+    if ( pfn_end >= dom->p2m_size )
+    {
+        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
+                     "%s: not enough memory for initial mapping (%#"PRIpfn" > %#"PRIpfn")",
+                     __FUNCTION__, pfn_end, dom->p2m_size);
+        return -ENOMEM;
+    }
+    for ( m = 0; m < domx86->n_mappings; m++ )
+    {
+        map_cmp = domx86->maps + m;
+        if ( from < map_cmp->area.to && to > map_cmp->area.from )
+        {
+            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                         "%s: overlapping mappings\n", __FUNCTION__);
+            return -EINVAL;
+        }
+    }
+
+    memset(map, 0, sizeof(*map));
+    map->area.from = from & domx86->params->vaddr_mask;
+    map->area.to = to & domx86->params->vaddr_mask;
+
+    for ( l = domx86->params->levels - 1; l >= 0; l-- )
+    {
+        map->lvls[l].pfn = dom->pfn_alloc_end + map->area.pgtables;
+        if ( l == domx86->params->levels - 1 )
+        {
+            /* Top level page table in first mapping only. */
+            if ( domx86->n_mappings == 0 )
+            {
+                map->lvls[l].from = 0;
+                map->lvls[l].to = domx86->params->vaddr_mask;
+                map->lvls[l].pgtables = 1;
+                map->area.pgtables++;
+            }
+            continue;
+        }
+
+        bits = PAGE_SHIFT_X86 + (l + 1) * PGTBL_LEVEL_SHIFT_X86;
+        mask = bits_to_mask(bits);
+        map->lvls[l].from = map->area.from & ~mask;
+        map->lvls[l].to = map->area.to | mask;
+
+        if ( domx86->params->levels == PGTBL_LEVELS_I386 &&
+             domx86->n_mappings == 0 && to < 0xc0000000 && l == 1 )
+        {
+            DOMPRINTF("%s: PAE: extra l2 page table for l3#3", __FUNCTION__);
+            map->lvls[l].to = domx86->params->vaddr_mask;
+        }
+
+        for ( m = 0; m < domx86->n_mappings; m++ )
+        {
+            map_cmp = domx86->maps + m;
+            if ( map_cmp->lvls[l].from == map_cmp->lvls[l].to )
+                continue;
+            if ( map->lvls[l].from >= map_cmp->lvls[l].from &&
+                 map->lvls[l].to <= map_cmp->lvls[l].to )
+            {
+                map->lvls[l].from = 0;
+                map->lvls[l].to = 0;
+                break;
+            }
+            assert(map->lvls[l].from >= map_cmp->lvls[l].from ||
+                   map->lvls[l].to <= map_cmp->lvls[l].to);
+            if ( map->lvls[l].from >= map_cmp->lvls[l].from &&
+                 map->lvls[l].from <= map_cmp->lvls[l].to )
+                map->lvls[l].from = map_cmp->lvls[l].to + 1;
+            if ( map->lvls[l].to >= map_cmp->lvls[l].from &&
+                 map->lvls[l].to <= map_cmp->lvls[l].to )
+                map->lvls[l].to = map_cmp->lvls[l].from - 1;
+        }
+        if ( map->lvls[l].from < map->lvls[l].to )
+            map->lvls[l].pgtables =
+                ((map->lvls[l].to - map->lvls[l].from) >> bits) + 1;
+        DOMPRINTF("%s: 0x%016" PRIx64 "/%d: 0x%016" PRIx64 " -> 0x%016" PRIx64
+                  ", %d table(s)", __FUNCTION__, mask, bits,
+                  map->lvls[l].from, map->lvls[l].to, map->lvls[l].pgtables);
+        map->area.pgtables += map->lvls[l].pgtables;
+    }
+
+    return 0;
+}
+
+static int alloc_pgtables_pv(struct xc_dom_image *dom)
+{
+    int pages, extra_pages;
+    xen_vaddr_t try_virt_end;
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+    struct xc_dom_x86_mapping *map = domx86->maps + domx86->n_mappings;
+
+    extra_pages = dom->alloc_bootstack ? 1 : 0;
+    extra_pages += (512 * 1024) / PAGE_SIZE_X86; /* 512kB padding */
+    pages = extra_pages;
+    for ( ; ; )
+    {
+        try_virt_end = round_up(dom->virt_alloc_end + pages * PAGE_SIZE_X86,
+                                bits_to_mask(22)); /* 4MB alignment */
+
+        if ( count_pgtables(dom, dom->parms.virt_base, try_virt_end, 0) )
+            return -1;
+
+        pages = map->area.pgtables + extra_pages;
+        if ( dom->virt_alloc_end + pages * PAGE_SIZE_X86 <= try_virt_end + 1 )
+            break;
+    }
+    map->area.pfn = 0;
+    domx86->n_mappings++;
+    dom->virt_pgtab_end = try_virt_end + 1;
+
+    return xc_dom_alloc_segment(dom, &dom->pgtables_seg, "page tables", 0,
+                                map->area.pgtables * PAGE_SIZE_X86);
+}
+
+/* ------------------------------------------------------------------------ */
+/* i386 pagetables                                                          */
+
+static int alloc_pgtables_x86_32_pae(struct xc_dom_image *dom)
+{
+    static const struct xc_dom_params x86_32_params = {
+        .levels = PGTBL_LEVELS_I386,
+        .vaddr_mask = bits_to_mask(VIRT_BITS_I386),
+        .lvl_prot[0] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED,
+        /*
+         * 64bit Xen runs 32bit PV guests with the PAE entries in an L3
+         * pagetable.  They don't behave exactly like native PAE paging.
+         */
+        .lvl_prot[1 ... 2] =
+            _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER,
+    };
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+
+    domx86->params = &x86_32_params;
+
+    return alloc_pgtables_pv(dom);
+}
+
+#define pfn_to_paddr(pfn) ((xen_paddr_t)(pfn) << PAGE_SHIFT_X86)
+#define pgentry_to_pfn(entry) ((xen_pfn_t)((entry) >> PAGE_SHIFT_X86))
+
+/*
+ * Move the l3 page table page below 4G for guests which do not
+ * support the extended-cr3 format.  The l3 is currently empty so we
+ * do not need to preserve the current contents.
+ */
+static xen_pfn_t move_l3_below_4G(struct xc_dom_image *dom,
+                                  xen_pfn_t l3pfn,
+                                  xen_pfn_t l3mfn)
+{
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+    uint32_t *p2m_guest = domx86->p2m_guest;
+    xen_pfn_t new_l3mfn;
+    struct xc_mmu *mmu;
+    void *l3tab;
+
+    mmu = xc_alloc_mmu_updates(dom->xch, dom->guest_domid);
+    if ( mmu == NULL )
+    {
+        DOMPRINTF("%s: failed at %d", __FUNCTION__, __LINE__);
+        return l3mfn;
+    }
+
+    xc_dom_unmap_one(dom, l3pfn);
+
+    new_l3mfn = xc_make_page_below_4G(dom->xch, dom->guest_domid, l3mfn);
+    if ( !new_l3mfn )
+        goto out;
+
+    p2m_guest[l3pfn] = dom->pv_p2m[l3pfn] = new_l3mfn;
+
+    if ( xc_add_mmu_update(dom->xch, mmu,
+                           (((unsigned long long)new_l3mfn)
+                            << XC_DOM_PAGE_SHIFT(dom)) |
+                           MMU_MACHPHYS_UPDATE, l3pfn) )
+        goto out;
+
+    if ( xc_flush_mmu_updates(dom->xch, mmu) )
+        goto out;
+
+    /*
+     * This ensures that the entire pgtables_seg is mapped by a single
+     * mmap region. arch_setup_bootlate() relies on this to be able to
+     * unmap and pin the pagetables.
+     */
+    if ( xc_dom_seg_to_ptr(dom, &dom->pgtables_seg) == NULL )
+        goto out;
+
+    l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
+    if ( l3tab == NULL )
+    {
+        DOMPRINTF("%s: xc_dom_pfn_to_ptr(dom, l3pfn, 1) => NULL",
+                  __FUNCTION__);
+        goto out; /* our one call site will call xc_dom_panic and fail */
+    }
+    memset(l3tab, 0, XC_DOM_PAGE_SIZE(dom));
+
+    DOMPRINTF("%s: successfully relocated L3 below 4G. "
+              "(L3 PFN %#"PRIpfn" MFN %#"PRIpfn"=>%#"PRIpfn")",
+              __FUNCTION__, l3pfn, l3mfn, new_l3mfn);
+
+    l3mfn = new_l3mfn;
+
+ out:
+    free(mmu);
+
+    return l3mfn;
+}
+
+static x86_pgentry_t *get_pg_table(struct xc_dom_image *dom, int m, int l)
+{
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+    struct xc_dom_x86_mapping *map;
+    x86_pgentry_t *pg;
+
+    map = domx86->maps + m;
+    pg = xc_dom_pfn_to_ptr(dom, map->lvls[l].pfn, 0);
+    if ( pg )
+        return pg;
+
+    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                 "%s: xc_dom_pfn_to_ptr failed", __FUNCTION__);
+    return NULL;
+}
+
+static x86_pgentry_t get_pg_prot(struct xc_dom_image *dom, int l, xen_pfn_t pfn)
+{
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+    struct xc_dom_x86_mapping *map;
+    xen_pfn_t pfn_s, pfn_e;
+    x86_pgentry_t prot;
+    unsigned m;
+
+    prot = domx86->params->lvl_prot[l];
+    if ( l > 0 )
+        return prot;
+
+    for ( m = 0; m < domx86->n_mappings; m++ )
+    {
+        map = domx86->maps + m;
+        pfn_s = map->lvls[domx86->params->levels - 1].pfn;
+        pfn_e = map->area.pgtables + pfn_s;
+        if ( pfn >= pfn_s && pfn < pfn_e )
+            return prot & ~_PAGE_RW;
+    }
+
+    return prot;
+}
+
+static int setup_pgtables_pv(struct xc_dom_image *dom)
+{
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+    struct xc_dom_x86_mapping *map1, *map2;
+    struct xc_dom_x86_mapping_lvl *lvl;
+    xen_vaddr_t from, to;
+    xen_pfn_t pfn, p, p_s, p_e;
+    x86_pgentry_t *pg;
+    unsigned m1, m2;
+    int l;
+
+    for ( l = domx86->params->levels - 1; l >= 0; l-- )
+        for ( m1 = 0; m1 < domx86->n_mappings; m1++ )
+        {
+            map1 = domx86->maps + m1;
+            from = map1->lvls[l].from;
+            to = map1->lvls[l].to;
+            pg = get_pg_table(dom, m1, l);
+            if ( !pg )
+                return -1;
+            for ( m2 = 0; m2 < domx86->n_mappings; m2++ )
+            {
+                map2 = domx86->maps + m2;
+                lvl = (l > 0) ? map2->lvls + l - 1 : &map2->area;
+                if ( l > 0 && lvl->pgtables == 0 )
+                    continue;
+                if ( lvl->from >= to || lvl->to <= from )
+                    continue;
+                p_s = (max(from, lvl->from) - from) >>
+                      (PAGE_SHIFT_X86 + l * PGTBL_LEVEL_SHIFT_X86);
+                p_e = (min(to, lvl->to) - from) >>
+                      (PAGE_SHIFT_X86 + l * PGTBL_LEVEL_SHIFT_X86);
+                pfn = ((max(from, lvl->from) - lvl->from) >>
+                      (PAGE_SHIFT_X86 + l * PGTBL_LEVEL_SHIFT_X86)) + lvl->pfn;
+                for ( p = p_s; p <= p_e; p++ )
+                {
+                    pg[p] = pfn_to_paddr(xc_dom_p2m(dom, pfn)) |
+                            get_pg_prot(dom, l, pfn);
+                    pfn++;
+                }
+            }
+        }
+
+    return 0;
+}
+
+static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom)
+{
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+    uint32_t *p2m_guest = domx86->p2m_guest;
+    xen_pfn_t l3mfn, l3pfn, i;
+
+    /* Copy dom->pv_p2m[] into the guest. */
+    for ( i = 0; i < dom->p2m_size; ++i )
+    {
+        if ( dom->pv_p2m[i] != INVALID_PFN )
+            p2m_guest[i] = dom->pv_p2m[i];
+        else
+            p2m_guest[i] = -1;
+    }
+
+    l3pfn = domx86->maps[0].lvls[2].pfn;
+    l3mfn = xc_dom_p2m(dom, l3pfn);
+    if ( dom->parms.pae == XEN_PAE_YES )
+    {
+        if ( l3mfn >= 0x100000 )
+            l3mfn = move_l3_below_4G(dom, l3pfn, l3mfn);
+
+        if ( l3mfn >= 0x100000 )
+        {
+            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,"%s: cannot move L3"
+                         " below 4G. extended-cr3 not supported by guest. "
+                         "(L3 PFN %#"PRIpfn" MFN %#"PRIpfn")",
+                         __FUNCTION__, l3pfn, l3mfn);
+            return -EINVAL;
+        }
+    }
+
+    return setup_pgtables_pv(dom);
+}
+
+/* ------------------------------------------------------------------------ */
+/* x86_64 pagetables                                                        */
+
+static int alloc_pgtables_x86_64(struct xc_dom_image *dom)
+{
+    const static struct xc_dom_params x86_64_params = {
+        .levels = PGTBL_LEVELS_X86_64,
+        .vaddr_mask = bits_to_mask(VIRT_BITS_X86_64),
+        .lvl_prot[0] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED,
+        .lvl_prot[1 ... 3] =
+            _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER,
+    };
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+
+    domx86->params = &x86_64_params;
+
+    return alloc_pgtables_pv(dom);
+}
+
+static int setup_pgtables_x86_64(struct xc_dom_image *dom)
+{
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+    uint64_t *p2m_guest = domx86->p2m_guest;
+    xen_pfn_t i;
+
+    /* Copy dom->pv_p2m[] into the guest. */
+    for ( i = 0; i < dom->p2m_size; ++i )
+    {
+        if ( dom->pv_p2m[i] != INVALID_PFN )
+            p2m_guest[i] = dom->pv_p2m[i];
+        else
+            p2m_guest[i] = -1;
+    }
+
+    return setup_pgtables_pv(dom);
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_p2m_list(struct xc_dom_image *dom, size_t p2m_alloc_size)
+{
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+
+    if ( xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach",
+                              0, p2m_alloc_size) )
+        return -1;
+
+    domx86->p2m_guest = xc_dom_seg_to_ptr(dom, &dom->p2m_seg);
+    if ( domx86->p2m_guest == NULL )
+        return -1;
+
+    return 0;
+}
+
+static int alloc_p2m_list_x86_32(struct xc_dom_image *dom)
+{
+    size_t p2m_alloc_size = dom->p2m_size * dom->arch_hooks->sizeof_pfn;
+
+    p2m_alloc_size = round_pg_up(p2m_alloc_size);
+    return alloc_p2m_list(dom, p2m_alloc_size);
+}
+
+static int alloc_p2m_list_x86_64(struct xc_dom_image *dom)
+{
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+    struct xc_dom_x86_mapping *map = domx86->maps + domx86->n_mappings;
+    size_t p2m_alloc_size = dom->p2m_size * dom->arch_hooks->sizeof_pfn;
+    xen_vaddr_t from, to;
+    unsigned lvl;
+
+    p2m_alloc_size = round_pg_up(p2m_alloc_size);
+    if ( dom->parms.p2m_base != UNSET_ADDR )
+    {
+        from = dom->parms.p2m_base;
+        to = from + p2m_alloc_size - 1;
+        if ( count_pgtables(dom, from, to, dom->pfn_alloc_end) )
+            return -1;
+
+        map->area.pfn = dom->pfn_alloc_end;
+        for ( lvl = 0; lvl < 4; lvl++ )
+            map->lvls[lvl].pfn += p2m_alloc_size >> PAGE_SHIFT_X86;
+        domx86->n_mappings++;
+        p2m_alloc_size += map->area.pgtables << PAGE_SHIFT_X86;
+    }
+
+    return alloc_p2m_list(dom, p2m_alloc_size);
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int alloc_magic_pages_pv(struct xc_dom_image *dom)
+{
+    dom->start_info_pfn = xc_dom_alloc_page(dom, "start info");
+    if ( dom->start_info_pfn == INVALID_PFN )
+        return -1;
+
+    dom->xenstore_pfn = xc_dom_alloc_page(dom, "xenstore");
+    if ( dom->xenstore_pfn == INVALID_PFN )
+        return -1;
+    xc_clear_domain_page(dom->xch, dom->guest_domid,
+                         xc_dom_p2m(dom, dom->xenstore_pfn));
+
+    dom->console_pfn = xc_dom_alloc_page(dom, "console");
+    if ( dom->console_pfn == INVALID_PFN )
+        return -1;
+    xc_clear_domain_page(dom->xch, dom->guest_domid,
+                         xc_dom_p2m(dom, dom->console_pfn));
+
+    dom->alloc_bootstack = 1;
+
+    return 0;
+}
+
+static void build_hvm_info(void *hvm_info_page, struct xc_dom_image *dom)
+{
+    struct hvm_info_table *hvm_info = (struct hvm_info_table *)
+        (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET);
+    uint8_t sum;
+    int i;
+
+    memset(hvm_info_page, 0, PAGE_SIZE);
+
+    /* Fill in the header. */
+    memcpy(hvm_info->signature, "HVM INFO", sizeof(hvm_info->signature));
+    hvm_info->length = sizeof(struct hvm_info_table);
+
+    /* Sensible defaults: these can be overridden by the caller. */
+    hvm_info->apic_mode = 1;
+    hvm_info->nr_vcpus = 1;
+    memset(hvm_info->vcpu_online, 0xff, sizeof(hvm_info->vcpu_online));
+
+    /* Memory parameters. */
+    hvm_info->low_mem_pgend = dom->lowmem_end >> PAGE_SHIFT;
+    hvm_info->high_mem_pgend = dom->highmem_end >> PAGE_SHIFT;
+    hvm_info->reserved_mem_pgstart = ioreq_server_pfn(0);
+
+    /* Finish with the checksum. */
+    for ( i = 0, sum = 0; i < hvm_info->length; i++ )
+        sum += ((uint8_t *)hvm_info)[i];
+    hvm_info->checksum = -sum;
+}
+
+static int alloc_magic_pages_hvm(struct xc_dom_image *dom)
+{
+    unsigned long i;
+    uint32_t *ident_pt, domid = dom->guest_domid;
+    int rc;
+    xen_pfn_t special_array[X86_HVM_NR_SPECIAL_PAGES];
+    xen_pfn_t ioreq_server_array[NR_IOREQ_SERVER_PAGES];
+    xc_interface *xch = dom->xch;
+    size_t start_info_size = sizeof(struct hvm_start_info);
+
+    /* Allocate and clear special pages. */
+    for ( i = 0; i < X86_HVM_NR_SPECIAL_PAGES; i++ )
+        special_array[i] = special_pfn(i);
+
+    rc = xc_domain_populate_physmap_exact(xch, domid, X86_HVM_NR_SPECIAL_PAGES,
+                                          0, 0, special_array);
+    if ( rc != 0 )
+    {
+        DOMPRINTF("Could not allocate special pages.");
+        goto error_out;
+    }
+
+    if ( xc_clear_domain_pages(xch, domid, special_pfn(0),
+                               X86_HVM_NR_SPECIAL_PAGES) )
+            goto error_out;
+
+    xc_hvm_param_set(xch, domid, HVM_PARAM_STORE_PFN,
+                     special_pfn(SPECIALPAGE_XENSTORE));
+    xc_hvm_param_set(xch, domid, HVM_PARAM_BUFIOREQ_PFN,
+                     special_pfn(SPECIALPAGE_BUFIOREQ));
+    xc_hvm_param_set(xch, domid, HVM_PARAM_IOREQ_PFN,
+                     special_pfn(SPECIALPAGE_IOREQ));
+    xc_hvm_param_set(xch, domid, HVM_PARAM_CONSOLE_PFN,
+                     special_pfn(SPECIALPAGE_CONSOLE));
+    xc_hvm_param_set(xch, domid, HVM_PARAM_PAGING_RING_PFN,
+                     special_pfn(SPECIALPAGE_PAGING));
+    xc_hvm_param_set(xch, domid, HVM_PARAM_MONITOR_RING_PFN,
+                     special_pfn(SPECIALPAGE_ACCESS));
+    xc_hvm_param_set(xch, domid, HVM_PARAM_SHARING_RING_PFN,
+                     special_pfn(SPECIALPAGE_SHARING));
+
+    start_info_size +=
+        sizeof(struct hvm_modlist_entry) * HVMLOADER_MODULE_MAX_COUNT;
+
+    start_info_size +=
+        HVMLOADER_MODULE_CMDLINE_SIZE * HVMLOADER_MODULE_MAX_COUNT;
+
+    start_info_size +=
+        dom->e820_entries * sizeof(struct hvm_memmap_table_entry);
+
+    if ( !dom->device_model )
+    {
+        if ( dom->cmdline )
+        {
+            dom->cmdline_size = ROUNDUP(strlen(dom->cmdline) + 1, 8);
+            start_info_size += dom->cmdline_size;
+        }
+    }
+    else
+    {
+        /*
+         * Allocate and clear additional ioreq server pages. The default
+         * server will use the IOREQ and BUFIOREQ special pages above.
+         */
+        for ( i = 0; i < NR_IOREQ_SERVER_PAGES; i++ )
+            ioreq_server_array[i] = ioreq_server_pfn(i);
+
+        rc = xc_domain_populate_physmap_exact(xch, domid, NR_IOREQ_SERVER_PAGES, 0,
+                                              0, ioreq_server_array);
+        if ( rc != 0 )
+        {
+            DOMPRINTF("Could not allocate ioreq server pages.");
+            goto error_out;
+        }
+
+        if ( xc_clear_domain_pages(xch, domid, ioreq_server_pfn(0),
+                                   NR_IOREQ_SERVER_PAGES) )
+                goto error_out;
+
+        /* Tell the domain where the pages are and how many there are */
+        xc_hvm_param_set(xch, domid, HVM_PARAM_IOREQ_SERVER_PFN,
+                         ioreq_server_pfn(0));
+        xc_hvm_param_set(xch, domid, HVM_PARAM_NR_IOREQ_SERVER_PAGES,
+                         NR_IOREQ_SERVER_PAGES);
+    }
+
+    rc = xc_dom_alloc_segment(dom, &dom->start_info_seg,
+                              "HVM start info", 0, start_info_size);
+    if ( rc != 0 )
+    {
+        DOMPRINTF("Unable to reserve memory for the start info");
+        goto out;
+    }
+
+    /*
+     * Identity-map page table is required for running with CR0.PG=0 when
+     * using Intel EPT. Create a 32-bit non-PAE page directory of superpages.
+     */
+    if ( (ident_pt = xc_map_foreign_range(
+              xch, domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
+              special_pfn(SPECIALPAGE_IDENT_PT))) == NULL )
+        goto error_out;
+    for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ )
+        ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
+                       _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
+    munmap(ident_pt, PAGE_SIZE);
+    xc_hvm_param_set(xch, domid, HVM_PARAM_IDENT_PT,
+                     special_pfn(SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
+
+    dom->console_pfn = special_pfn(SPECIALPAGE_CONSOLE);
+    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->console_pfn);
+
+    dom->xenstore_pfn = special_pfn(SPECIALPAGE_XENSTORE);
+    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->xenstore_pfn);
+
+    dom->parms.virt_hypercall = -1;
+
+    rc = 0;
+    goto out;
+ error_out:
+    rc = -1;
+ out:
+
+    return rc;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int start_info_x86_32(struct xc_dom_image *dom)
+{
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+    start_info_x86_32_t *start_info =
+        xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+    xen_pfn_t shinfo =
+        xc_dom_translated(dom) ? dom->shared_info_pfn : dom->shared_info_mfn;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    if ( start_info == NULL )
+    {
+        DOMPRINTF("%s: xc_dom_pfn_to_ptr failed on start_info", __FUNCTION__);
+        return -1; /* our caller throws away our return value :-/ */
+    }
+
+    memset(start_info, 0, sizeof(*start_info));
+    strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
+    start_info->magic[sizeof(start_info->magic) - 1] = '\0';
+    start_info->nr_pages = dom->total_pages;
+    start_info->shared_info = shinfo << PAGE_SHIFT_X86;
+    start_info->pt_base = dom->pgtables_seg.vstart;
+    start_info->nr_pt_frames = domx86->maps[0].area.pgtables;
+    start_info->mfn_list = dom->p2m_seg.vstart;
+
+    start_info->flags = dom->flags;
+    start_info->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
+    start_info->store_evtchn = dom->xenstore_evtchn;
+    start_info->console.domU.mfn = xc_dom_p2m(dom, dom->console_pfn);
+    start_info->console.domU.evtchn = dom->console_evtchn;
+
+    if ( dom->modules[0].blob )
+    {
+        start_info->mod_start = dom->initrd_start;
+        start_info->mod_len = dom->initrd_len;
+    }
+
+    if ( dom->cmdline )
+    {
+        strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
+        start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
+    }
+
+    return 0;
+}
+
+static int start_info_x86_64(struct xc_dom_image *dom)
+{
+    struct xc_dom_image_x86 *domx86 = dom->arch_private;
+    start_info_x86_64_t *start_info =
+        xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
+    xen_pfn_t shinfo =
+        xc_dom_translated(dom) ? dom->shared_info_pfn : dom->shared_info_mfn;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    if ( start_info == NULL )
+    {
+        DOMPRINTF("%s: xc_dom_pfn_to_ptr failed on start_info", __FUNCTION__);
+        return -1; /* our caller throws away our return value :-/ */
+    }
+
+    memset(start_info, 0, sizeof(*start_info));
+    strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
+    start_info->magic[sizeof(start_info->magic) - 1] = '\0';
+    start_info->nr_pages = dom->total_pages;
+    start_info->shared_info = shinfo << PAGE_SHIFT_X86;
+    start_info->pt_base = dom->pgtables_seg.vstart;
+    start_info->nr_pt_frames = domx86->maps[0].area.pgtables;
+    start_info->mfn_list = dom->p2m_seg.vstart;
+    if ( dom->parms.p2m_base != UNSET_ADDR )
+    {
+        start_info->first_p2m_pfn = dom->p2m_seg.pfn;
+        start_info->nr_p2m_frames = dom->p2m_seg.pages;
+    }
+
+    start_info->flags = dom->flags;
+    start_info->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
+    start_info->store_evtchn = dom->xenstore_evtchn;
+    start_info->console.domU.mfn = xc_dom_p2m(dom, dom->console_pfn);
+    start_info->console.domU.evtchn = dom->console_evtchn;
+
+    if ( dom->modules[0].blob )
+    {
+        start_info->mod_start = dom->initrd_start;
+        start_info->mod_len = dom->initrd_len;
+    }
+
+    if ( dom->cmdline )
+    {
+        strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
+        start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
+    }
+
+    return 0;
+}
+
+static int shared_info_x86_32(struct xc_dom_image *dom, void *ptr)
+{
+    shared_info_x86_32_t *shared_info = ptr;
+    int i;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    memset(shared_info, 0, sizeof(*shared_info));
+    for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
+        shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+    return 0;
+}
+
+static int shared_info_x86_64(struct xc_dom_image *dom, void *ptr)
+{
+    shared_info_x86_64_t *shared_info = ptr;
+    int i;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    memset(shared_info, 0, sizeof(*shared_info));
+    for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
+        shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
+    return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int vcpu_x86_32(struct xc_dom_image *dom)
+{
+    vcpu_guest_context_any_t any_ctx;
+    vcpu_guest_context_x86_32_t *ctxt = &any_ctx.x32;
+    xen_pfn_t cr3_pfn;
+    int rc;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    ctxt->user_regs.eip = dom->parms.virt_entry;
+    ctxt->user_regs.esp =
+        dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+    ctxt->user_regs.esi =
+        dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
+    ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
+
+    ctxt->debugreg[6] = X86_DR6_DEFAULT;
+    ctxt->debugreg[7] = X86_DR7_DEFAULT;
+
+    ctxt->flags = VGCF_in_kernel_X86_32 | VGCF_online_X86_32;
+    if ( dom->parms.pae == XEN_PAE_EXTCR3 ||
+         dom->parms.pae == XEN_PAE_BIMODAL )
+        ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3);
+
+    cr3_pfn = xc_dom_p2m(dom, dom->pgtables_seg.pfn);
+    ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_32(cr3_pfn);
+    DOMPRINTF("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "",
+              __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
+
+    ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.es = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_32;
+    ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_32;
+    ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_32;
+
+    ctxt->kernel_ss = ctxt->user_regs.ss;
+    ctxt->kernel_sp = ctxt->user_regs.esp;
+
+    rc = xc_vcpu_setcontext(dom->xch, dom->guest_domid, 0, &any_ctx);
+    if ( rc != 0 )
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: SETVCPUCONTEXT failed (rc=%d)", __func__, rc);
+
+    return rc;
+}
+
+static int vcpu_x86_64(struct xc_dom_image *dom)
+{
+    vcpu_guest_context_any_t any_ctx;
+    vcpu_guest_context_x86_64_t *ctxt = &any_ctx.x64;
+    xen_pfn_t cr3_pfn;
+    int rc;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    ctxt->user_regs.rip = dom->parms.virt_entry;
+    ctxt->user_regs.rsp =
+        dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
+    ctxt->user_regs.rsi =
+        dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
+    ctxt->user_regs.rflags = 1 << 9; /* Interrupt Enable */
+
+    ctxt->debugreg[6] = X86_DR6_DEFAULT;
+    ctxt->debugreg[7] = X86_DR7_DEFAULT;
+
+    ctxt->flags = VGCF_in_kernel_X86_64 | VGCF_online_X86_64;
+    cr3_pfn = xc_dom_p2m(dom, dom->pgtables_seg.pfn);
+    ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_64(cr3_pfn);
+    DOMPRINTF("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "",
+              __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
+
+    ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.es = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_64;
+    ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_64;
+
+    ctxt->kernel_ss = ctxt->user_regs.ss;
+    ctxt->kernel_sp = ctxt->user_regs.esp;
+
+    rc = xc_vcpu_setcontext(dom->xch, dom->guest_domid, 0, &any_ctx);
+    if ( rc != 0 )
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: SETVCPUCONTEXT failed (rc=%d)", __func__, rc);
+
+    return rc;
+}
+
+const static void *hvm_get_save_record(const void *ctx, unsigned int type,
+                                       unsigned int instance)
+{
+    const struct hvm_save_descriptor *header;
+
+    for ( header = ctx;
+          header->typecode != HVM_SAVE_CODE(END);
+          ctx += sizeof(*header) + header->length, header = ctx )
+        if ( header->typecode == type && header->instance == instance )
+            return ctx + sizeof(*header);
+
+    return NULL;
+}
+
+static int vcpu_hvm(struct xc_dom_image *dom)
+{
+    struct {
+        struct hvm_save_descriptor header_d;
+        HVM_SAVE_TYPE(HEADER) header;
+        struct hvm_save_descriptor cpu_d;
+        HVM_SAVE_TYPE(CPU) cpu;
+        struct hvm_save_descriptor end_d;
+        HVM_SAVE_TYPE(END) end;
+    } bsp_ctx;
+    uint8_t *full_ctx = NULL;
+    int rc;
+
+    DOMPRINTF_CALLED(dom->xch);
+
+    assert(dom->max_vcpus);
+
+    /*
+     * Get the full HVM context in order to have the header, it is not
+     * possible to get the header with getcontext_partial, and crafting one
+     * from userspace is also not an option since cpuid is trapped and
+     * modified by Xen.
+     */
+
+    rc = xc_domain_hvm_getcontext(dom->xch, dom->guest_domid, NULL, 0);
+    if ( rc <= 0 )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: unable to fetch HVM context size (rc=%d)",
+                     __func__, rc);
+        goto out;
+    }
+
+    full_ctx = calloc(1, rc);
+    if ( full_ctx == NULL )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: unable to allocate memory for HVM context (rc=%d)",
+                     __func__, rc);
+        rc = -ENOMEM;
+        goto out;
+    }
+
+    rc = xc_domain_hvm_getcontext(dom->xch, dom->guest_domid, full_ctx, rc);
+    if ( rc <= 0 )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: unable to fetch HVM context (rc=%d)",
+                     __func__, rc);
+        goto out;
+    }
+
+    /* Copy the header to our partial context. */
+    memset(&bsp_ctx, 0, sizeof(bsp_ctx));
+    memcpy(&bsp_ctx, full_ctx,
+           sizeof(struct hvm_save_descriptor) + HVM_SAVE_LENGTH(HEADER));
+
+    /* Set the CPU descriptor. */
+    bsp_ctx.cpu_d.typecode = HVM_SAVE_CODE(CPU);
+    bsp_ctx.cpu_d.instance = 0;
+    bsp_ctx.cpu_d.length = HVM_SAVE_LENGTH(CPU);
+
+    /* Set the cached part of the relevant segment registers. */
+    bsp_ctx.cpu.cs_base = 0;
+    bsp_ctx.cpu.ds_base = 0;
+    bsp_ctx.cpu.es_base = 0;
+    bsp_ctx.cpu.ss_base = 0;
+    bsp_ctx.cpu.tr_base = 0;
+    bsp_ctx.cpu.cs_limit = ~0u;
+    bsp_ctx.cpu.ds_limit = ~0u;
+    bsp_ctx.cpu.es_limit = ~0u;
+    bsp_ctx.cpu.ss_limit = ~0u;
+    bsp_ctx.cpu.tr_limit = 0x67;
+    bsp_ctx.cpu.cs_arbytes = 0xc9b;
+    bsp_ctx.cpu.ds_arbytes = 0xc93;
+    bsp_ctx.cpu.es_arbytes = 0xc93;
+    bsp_ctx.cpu.ss_arbytes = 0xc93;
+    bsp_ctx.cpu.tr_arbytes = 0x8b;
+
+    /* Set the control registers. */
+    bsp_ctx.cpu.cr0 = X86_CR0_PE | X86_CR0_ET;
+
+    /* Set the IP. */
+    bsp_ctx.cpu.rip = dom->parms.phys_entry;
+
+    bsp_ctx.cpu.dr6 = X86_DR6_DEFAULT;
+    bsp_ctx.cpu.dr7 = X86_DR7_DEFAULT;
+
+    if ( dom->start_info_seg.pfn )
+        bsp_ctx.cpu.rbx = dom->start_info_seg.pfn << PAGE_SHIFT;
+
+    /* Set the end descriptor. */
+    bsp_ctx.end_d.typecode = HVM_SAVE_CODE(END);
+    bsp_ctx.end_d.instance = 0;
+    bsp_ctx.end_d.length = HVM_SAVE_LENGTH(END);
+
+    /* TODO: maybe this should be a firmware option instead? */
+    if ( !dom->device_model )
+    {
+        struct {
+            struct hvm_save_descriptor header_d;
+            HVM_SAVE_TYPE(HEADER) header;
+            struct hvm_save_descriptor mtrr_d;
+            HVM_SAVE_TYPE(MTRR) mtrr;
+            struct hvm_save_descriptor end_d;
+            HVM_SAVE_TYPE(END) end;
+        } mtrr = {
+            .header_d = bsp_ctx.header_d,
+            .header = bsp_ctx.header,
+            .mtrr_d.typecode = HVM_SAVE_CODE(MTRR),
+            .mtrr_d.length = HVM_SAVE_LENGTH(MTRR),
+            .end_d = bsp_ctx.end_d,
+            .end = bsp_ctx.end,
+        };
+        const HVM_SAVE_TYPE(MTRR) *mtrr_record =
+            hvm_get_save_record(full_ctx, HVM_SAVE_CODE(MTRR), 0);
+        unsigned int i;
+
+        if ( !mtrr_record )
+        {
+            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                         "%s: unable to get MTRR save record", __func__);
+            goto out;
+        }
+
+        memcpy(&mtrr.mtrr, mtrr_record, sizeof(mtrr.mtrr));
+
+        /*
+         * Enable MTRR, set default type to WB.
+         * TODO: add MMIO areas as UC when passthrough is supported.
+         */
+        mtrr.mtrr.msr_mtrr_def_type = MTRR_TYPE_WRBACK | MTRR_DEF_TYPE_ENABLE;
+
+        for ( i = 0; i < dom->max_vcpus; i++ )
+        {
+            mtrr.mtrr_d.instance = i;
+            rc = xc_domain_hvm_setcontext(dom->xch, dom->guest_domid,
+                                          (uint8_t *)&mtrr, sizeof(mtrr));
+            if ( rc != 0 )
+                xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                             "%s: SETHVMCONTEXT failed (rc=%d)", __func__, rc);
+        }
+    }
+
+    /*
+     * Loading the BSP context should be done in the last call to setcontext,
+     * since each setcontext call will put all vCPUs down.
+     */
+    rc = xc_domain_hvm_setcontext(dom->xch, dom->guest_domid,
+                                  (uint8_t *)&bsp_ctx, sizeof(bsp_ctx));
+    if ( rc != 0 )
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: SETHVMCONTEXT failed (rc=%d)", __func__, rc);
+
+ out:
+    free(full_ctx);
+    return rc;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int x86_compat(xc_interface *xch, uint32_t domid, char *guest_type)
+{
+    static const struct {
+        char           *guest;
+        uint32_t        size;
+    } types[] = {
+        { "xen-3.0-x86_32p", 32 },
+        { "xen-3.0-x86_64",  64 },
+    };
+    DECLARE_DOMCTL;
+    int i,rc;
+
+    memset(&domctl, 0, sizeof(domctl));
+    domctl.domain = domid;
+    domctl.cmd    = XEN_DOMCTL_set_address_size;
+    for ( i = 0; i < ARRAY_SIZE(types); i++ )
+        if ( !strcmp(types[i].guest, guest_type) )
+            domctl.u.address_size.size = types[i].size;
+    if ( domctl.u.address_size.size == 0 )
+        /* nothing to do */
+        return 0;
+
+    xc_dom_printf(xch, "%s: guest %s, address size %" PRId32 "", __FUNCTION__,
+                  guest_type, domctl.u.address_size.size);
+    rc = do_domctl(xch, &domctl);
+    if ( rc != 0 )
+        xc_dom_printf(xch, "%s: warning: failed (rc=%d)",
+                      __FUNCTION__, rc);
+    return rc;
+}
+
+static int meminit_pv(struct xc_dom_image *dom)
+{
+    int rc;
+    xen_pfn_t pfn, allocsz, mfn, total, pfn_base;
+    int i, j, k;
+    xen_vmemrange_t dummy_vmemrange[1];
+    unsigned int dummy_vnode_to_pnode[1];
+    xen_vmemrange_t *vmemranges;
+    unsigned int *vnode_to_pnode;
+    unsigned int nr_vmemranges, nr_vnodes;
+
+    rc = x86_compat(dom->xch, dom->guest_domid, dom->guest_type);
+    if ( rc )
+        return rc;
+
+    /* try to claim pages for early warning of insufficient memory avail */
+    if ( dom->claim_enabled )
+    {
+        rc = xc_domain_claim_pages(dom->xch, dom->guest_domid,
+                                   dom->total_pages);
+        if ( rc )
+            return rc;
+    }
+
+    /* Setup dummy vNUMA information if it's not provided. Note
+     * that this is a valid state if libxl doesn't provide any
+     * vNUMA information.
+     *
+     * The dummy values make libxc allocate all pages from
+     * arbitrary physical nodes. This is the expected behaviour if
+     * no vNUMA configuration is provided to libxc.
+     *
+     * Note that the following hunk is just for the convenience of
+     * allocation code. No defaulting happens in libxc.
+     */
+    if ( dom->nr_vmemranges == 0 )
+    {
+        nr_vmemranges = 1;
+        vmemranges = dummy_vmemrange;
+        vmemranges[0].start = 0;
+        vmemranges[0].end   = (uint64_t)dom->total_pages << PAGE_SHIFT;
+        vmemranges[0].flags = 0;
+        vmemranges[0].nid   = 0;
+
+        nr_vnodes = 1;
+        vnode_to_pnode = dummy_vnode_to_pnode;
+        vnode_to_pnode[0] = XC_NUMA_NO_NODE;
+    }
+    else
+    {
+        nr_vmemranges = dom->nr_vmemranges;
+        nr_vnodes = dom->nr_vnodes;
+        vmemranges = dom->vmemranges;
+        vnode_to_pnode = dom->vnode_to_pnode;
+    }
+
+    total = dom->p2m_size = 0;
+    for ( i = 0; i < nr_vmemranges; i++ )
+    {
+        total += ((vmemranges[i].end - vmemranges[i].start) >> PAGE_SHIFT);
+        dom->p2m_size = max(dom->p2m_size,
+                            (xen_pfn_t)(vmemranges[i].end >> PAGE_SHIFT));
+    }
+    if ( total != dom->total_pages )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: vNUMA page count mismatch (0x%"PRIpfn" != 0x%"PRIpfn")",
+                     __func__, total, dom->total_pages);
+        return -EINVAL;
+    }
+
+    dom->pv_p2m = xc_dom_malloc(dom, sizeof(*dom->pv_p2m) * dom->p2m_size);
+    if ( dom->pv_p2m == NULL )
+        return -EINVAL;
+    for ( pfn = 0; pfn < dom->p2m_size; pfn++ )
+        dom->pv_p2m[pfn] = INVALID_PFN;
+
+    /* allocate guest memory */
+    for ( i = 0; i < nr_vmemranges; i++ )
+    {
+        unsigned int memflags;
+        uint64_t pages, super_pages;
+        unsigned int pnode = vnode_to_pnode[vmemranges[i].nid];
+        xen_pfn_t extents[SUPERPAGE_BATCH_SIZE];
+        xen_pfn_t pfn_base_idx;
+
+        memflags = 0;
+        if ( pnode != XC_NUMA_NO_NODE )
+            memflags |= XENMEMF_exact_node(pnode);
+
+        pages = (vmemranges[i].end - vmemranges[i].start) >> PAGE_SHIFT;
+        super_pages = pages >> SUPERPAGE_2MB_SHIFT;
+        pfn_base = vmemranges[i].start >> PAGE_SHIFT;
+
+        for ( pfn = pfn_base; pfn < pfn_base+pages; pfn++ )
+            dom->pv_p2m[pfn] = pfn;
+
+        pfn_base_idx = pfn_base;
+        while ( super_pages ) {
+            uint64_t count = min_t(uint64_t, super_pages, SUPERPAGE_BATCH_SIZE);
+            super_pages -= count;
+
+            for ( pfn = pfn_base_idx, j = 0;
+                  pfn < pfn_base_idx + (count << SUPERPAGE_2MB_SHIFT);
+                  pfn += SUPERPAGE_2MB_NR_PFNS, j++ )
+                extents[j] = dom->pv_p2m[pfn];
+            rc = xc_domain_populate_physmap(dom->xch, dom->guest_domid, count,
+                                            SUPERPAGE_2MB_SHIFT, memflags,
+                                            extents);
+            if ( rc < 0 )
+                return rc;
+
+            /* Expand the returned mfns into the p2m array. */
+            pfn = pfn_base_idx;
+            for ( j = 0; j < rc; j++ )
+            {
+                mfn = extents[j];
+                for ( k = 0; k < SUPERPAGE_2MB_NR_PFNS; k++, pfn++ )
+                    dom->pv_p2m[pfn] = mfn + k;
+            }
+            pfn_base_idx = pfn;
+        }
+
+        for ( j = pfn_base_idx - pfn_base; j < pages; j += allocsz )
+        {
+            allocsz = min_t(uint64_t, 1024 * 1024, pages - j);
+            rc = xc_domain_populate_physmap_exact(dom->xch, dom->guest_domid,
+                     allocsz, 0, memflags, &dom->pv_p2m[pfn_base + j]);
+
+            if ( rc )
+            {
+                if ( pnode != XC_NUMA_NO_NODE )
+                    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                                 "%s: failed to allocate 0x%"PRIx64" pages (v=%d, p=%d)",
+                                 __func__, pages, i, pnode);
+                else
+                    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                                 "%s: failed to allocate 0x%"PRIx64" pages",
+                                 __func__, pages);
+                return rc;
+            }
+        }
+        rc = 0;
+    }
+
+    /* Ensure no unclaimed pages are left unused.
+     * OK to call if hadn't done the earlier claim call. */
+    xc_domain_claim_pages(dom->xch, dom->guest_domid, 0 /* cancel claim */);
+
+    return rc;
+}
+
+/*
+ * Check whether there exists mmio hole in the specified memory range.
+ * Returns 1 if exists, else returns 0.
+ */
+static int check_mmio_hole(uint64_t start, uint64_t memsize,
+                           uint64_t mmio_start, uint64_t mmio_size)
+{
+    if ( start + memsize <= mmio_start || start >= mmio_start + mmio_size )
+        return 0;
+    else
+        return 1;
+}
+
+static int meminit_hvm(struct xc_dom_image *dom)
+{
+    unsigned long i, vmemid, nr_pages = dom->total_pages;
+    unsigned long p2m_size;
+    unsigned long target_pages = dom->target_pages;
+    unsigned long cur_pages, cur_pfn;
+    int rc;
+    unsigned long stat_normal_pages = 0, stat_2mb_pages = 0,
+        stat_1gb_pages = 0;
+    unsigned int memflags = 0;
+    int claim_enabled = dom->claim_enabled;
+    uint64_t total_pages;
+    xen_vmemrange_t dummy_vmemrange[2];
+    unsigned int dummy_vnode_to_pnode[1];
+    xen_vmemrange_t *vmemranges;
+    unsigned int *vnode_to_pnode;
+    unsigned int nr_vmemranges, nr_vnodes;
+    xc_interface *xch = dom->xch;
+    uint32_t domid = dom->guest_domid;
+
+    if ( nr_pages > target_pages )
+        memflags |= XENMEMF_populate_on_demand;
+
+    if ( dom->nr_vmemranges == 0 )
+    {
+        /* Build dummy vnode information
+         *
+         * Guest physical address space layout:
+         * [0, hole_start) [hole_start, 4G) [4G, highmem_end)
+         *
+         * Of course if there is no high memory, the second vmemrange
+         * has no effect on the actual result.
+         */
+
+        dummy_vmemrange[0].start = 0;
+        dummy_vmemrange[0].end   = dom->lowmem_end;
+        dummy_vmemrange[0].flags = 0;
+        dummy_vmemrange[0].nid   = 0;
+        nr_vmemranges = 1;
+
+        if ( dom->highmem_end > (1ULL << 32) )
+        {
+            dummy_vmemrange[1].start = 1ULL << 32;
+            dummy_vmemrange[1].end   = dom->highmem_end;
+            dummy_vmemrange[1].flags = 0;
+            dummy_vmemrange[1].nid   = 0;
+
+            nr_vmemranges++;
+        }
+
+        dummy_vnode_to_pnode[0] = XC_NUMA_NO_NODE;
+        nr_vnodes = 1;
+        vmemranges = dummy_vmemrange;
+        vnode_to_pnode = dummy_vnode_to_pnode;
+    }
+    else
+    {
+        if ( nr_pages > target_pages )
+        {
+            DOMPRINTF("Cannot enable vNUMA and PoD at the same time");
+            goto error_out;
+        }
+
+        nr_vmemranges = dom->nr_vmemranges;
+        nr_vnodes = dom->nr_vnodes;
+        vmemranges = dom->vmemranges;
+        vnode_to_pnode = dom->vnode_to_pnode;
+    }
+
+    total_pages = 0;
+    p2m_size = 0;
+    for ( i = 0; i < nr_vmemranges; i++ )
+    {
+        DOMPRINTF("range: start=0x%"PRIx64" end=0x%"PRIx64, vmemranges[i].start, vmemranges[i].end);
+
+        total_pages += ((vmemranges[i].end - vmemranges[i].start)
+                        >> PAGE_SHIFT);
+        p2m_size = p2m_size > (vmemranges[i].end >> PAGE_SHIFT) ?
+            p2m_size : (vmemranges[i].end >> PAGE_SHIFT);
+    }
+
+    if ( total_pages != nr_pages )
+    {
+        DOMPRINTF("vNUMA memory pages mismatch (0x%"PRIx64" != 0x%lx)",
+               total_pages, nr_pages);
+        goto error_out;
+    }
+
+    dom->p2m_size = p2m_size;
+
+    /*
+     * Try to claim pages for early warning of insufficient memory available.
+     * This should go before xc_domain_set_pod_target, becuase that function
+     * actually allocates memory for the guest. Claiming after memory has been
+     * allocated is pointless.
+     */
+    if ( claim_enabled ) {
+        rc = xc_domain_claim_pages(xch, domid,
+                                   target_pages - dom->vga_hole_size);
+        if ( rc != 0 )
+        {
+            DOMPRINTF("Could not allocate memory for HVM guest as we cannot claim memory!");
+            goto error_out;
+        }
+    }
+
+    if ( memflags & XENMEMF_populate_on_demand )
+    {
+        /*
+         * Subtract VGA_HOLE_SIZE from target_pages for the VGA
+         * "hole".  Xen will adjust the PoD cache size so that domain
+         * tot_pages will be target_pages - VGA_HOLE_SIZE after
+         * this call.
+         */
+        rc = xc_domain_set_pod_target(xch, domid,
+                                      target_pages - dom->vga_hole_size,
+                                      NULL, NULL, NULL);
+        if ( rc != 0 )
+        {
+            DOMPRINTF("Could not set PoD target for HVM guest.\n");
+            goto error_out;
+        }
+    }
+
+    /*
+     * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
+     *
+     * We attempt to allocate 1GB pages if possible. It falls back on 2MB
+     * pages if 1GB allocation fails. 4KB pages will be used eventually if
+     * both fail.
+     */
+    if ( dom->device_model )
+    {
+        xen_pfn_t extents[0xa0];
+
+        for ( i = 0; i < ARRAY_SIZE(extents); ++i )
+            extents[i] = i;
+
+        rc = xc_domain_populate_physmap_exact(
+            xch, domid, 0xa0, 0, memflags, extents);
+        if ( rc != 0 )
+        {
+            DOMPRINTF("Could not populate low memory (< 0xA0).\n");
+            goto error_out;
+        }
+    }
+
+    stat_normal_pages = 0;
+    for ( vmemid = 0; vmemid < nr_vmemranges; vmemid++ )
+    {
+        unsigned int new_memflags = memflags;
+        uint64_t end_pages;
+        unsigned int vnode = vmemranges[vmemid].nid;
+        unsigned int pnode = vnode_to_pnode[vnode];
+
+        if ( pnode != XC_NUMA_NO_NODE )
+            new_memflags |= XENMEMF_exact_node(pnode);
+
+        end_pages = vmemranges[vmemid].end >> PAGE_SHIFT;
+        /*
+         * Consider vga hole belongs to the vmemrange that covers
+         * 0xA0000-0xC0000. Note that 0x00000-0xA0000 is populated just
+         * before this loop.
+         */
+        if ( vmemranges[vmemid].start == 0 && dom->device_model )
+        {
+            cur_pages = 0xc0;
+            stat_normal_pages += 0xc0;
+        }
+        else
+            cur_pages = vmemranges[vmemid].start >> PAGE_SHIFT;
+
+        rc = 0;
+        while ( (rc == 0) && (end_pages > cur_pages) )
+        {
+            /* Clip count to maximum 1GB extent. */
+            unsigned long count = end_pages - cur_pages;
+            unsigned long max_pages = SUPERPAGE_1GB_NR_PFNS;
+
+            if ( count > max_pages )
+                count = max_pages;
+
+            cur_pfn = cur_pages;
+
+            /* Take care the corner cases of super page tails */
+            if ( ((cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
+                 (count > (-cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1))) )
+                count = -cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1);
+            else if ( ((count & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
+                      (count > SUPERPAGE_1GB_NR_PFNS) )
+                count &= ~(SUPERPAGE_1GB_NR_PFNS - 1);
+
+            /* Attemp to allocate 1GB super page. Because in each pass
+             * we only allocate at most 1GB, we don't have to clip
+             * super page boundaries.
+             */
+            if ( ((count | cur_pfn) & (SUPERPAGE_1GB_NR_PFNS - 1)) == 0 &&
+                 /* Check if there exists MMIO hole in the 1GB memory
+                  * range */
+                 !check_mmio_hole(cur_pfn << PAGE_SHIFT,
+                                  SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT,
+                                  dom->mmio_start, dom->mmio_size) )
+            {
+                long done;
+                unsigned long nr_extents = count >> SUPERPAGE_1GB_SHIFT;
+                xen_pfn_t sp_extents[nr_extents];
+
+                for ( i = 0; i < nr_extents; i++ )
+                    sp_extents[i] = cur_pages + (i << SUPERPAGE_1GB_SHIFT);
+
+                done = xc_domain_populate_physmap(xch, domid, nr_extents,
+                                                  SUPERPAGE_1GB_SHIFT,
+                                                  new_memflags, sp_extents);
+
+                if ( done > 0 )
+                {
+                    stat_1gb_pages += done;
+                    done <<= SUPERPAGE_1GB_SHIFT;
+                    cur_pages += done;
+                    count -= done;
+                }
+            }
+
+            if ( count != 0 )
+            {
+                /* Clip count to maximum 8MB extent. */
+                max_pages = SUPERPAGE_2MB_NR_PFNS * 4;
+                if ( count > max_pages )
+                    count = max_pages;
+
+                /* Clip partial superpage extents to superpage
+                 * boundaries. */
+                if ( ((cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
+                     (count > (-cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1))) )
+                    count = -cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1);
+                else if ( ((count & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
+                          (count > SUPERPAGE_2MB_NR_PFNS) )
+                    count &= ~(SUPERPAGE_2MB_NR_PFNS - 1); /* clip non-s.p. tail */
+
+                /* Attempt to allocate superpage extents. */
+                if ( ((count | cur_pfn) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 )
+                {
+                    long done;
+                    unsigned long nr_extents = count >> SUPERPAGE_2MB_SHIFT;
+                    xen_pfn_t sp_extents[nr_extents];
+
+                    for ( i = 0; i < nr_extents; i++ )
+                        sp_extents[i] = cur_pages + (i << SUPERPAGE_2MB_SHIFT);
+
+                    done = xc_domain_populate_physmap(xch, domid, nr_extents,
+                                                      SUPERPAGE_2MB_SHIFT,
+                                                      new_memflags, sp_extents);
+
+                    if ( done > 0 )
+                    {
+                        stat_2mb_pages += done;
+                        done <<= SUPERPAGE_2MB_SHIFT;
+                        cur_pages += done;
+                        count -= done;
+                    }
+                }
+            }
+
+            /* Fall back to 4kB extents. */
+            if ( count != 0 )
+            {
+                xen_pfn_t extents[count];
+
+                for ( i = 0; i < count; ++i )
+                    extents[i] = cur_pages + i;
+
+                rc = xc_domain_populate_physmap_exact(
+                    xch, domid, count, 0, new_memflags, extents);
+                cur_pages += count;
+                stat_normal_pages += count;
+            }
+        }
+
+        if ( rc != 0 )
+        {
+            DOMPRINTF("Could not allocate memory for HVM guest.");
+            goto error_out;
+        }
+    }
+
+    DPRINTF("PHYSICAL MEMORY ALLOCATION:\n");
+    DPRINTF("  4KB PAGES: 0x%016lx\n", stat_normal_pages);
+    DPRINTF("  2MB PAGES: 0x%016lx\n", stat_2mb_pages);
+    DPRINTF("  1GB PAGES: 0x%016lx\n", stat_1gb_pages);
+
+    rc = 0;
+    goto out;
+ error_out:
+    rc = -1;
+ out:
+
+    /* ensure no unclaimed pages are left unused */
+    xc_domain_claim_pages(xch, domid, 0 /* cancels the claim */);
+
+    return rc;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int bootearly(struct xc_dom_image *dom)
+{
+    if ( dom->container_type == XC_DOM_PV_CONTAINER &&
+         elf_xen_feature_get(XENFEAT_auto_translated_physmap, dom->f_active) )
+    {
+        DOMPRINTF("PV Autotranslate guests no longer supported");
+        errno = EOPNOTSUPP;
+        return -1;
+    }
+
+    return 0;
+}
+
+static int bootlate_pv(struct xc_dom_image *dom)
+{
+    static const struct {
+        char *guest;
+        unsigned long pgd_type;
+    } types[] = {
+        { "xen-3.0-x86_32",  MMUEXT_PIN_L2_TABLE},
+        { "xen-3.0-x86_32p", MMUEXT_PIN_L3_TABLE},
+        { "xen-3.0-x86_64",  MMUEXT_PIN_L4_TABLE},
+    };
+    unsigned long pgd_type = 0;
+    shared_info_t *shared_info;
+    xen_pfn_t shinfo;
+    int i, rc;
+
+    for ( i = 0; i < ARRAY_SIZE(types); i++ )
+        if ( !strcmp(types[i].guest, dom->guest_type) )
+            pgd_type = types[i].pgd_type;
+
+    /* Drop references to all initial page tables before pinning. */
+    xc_dom_unmap_one(dom, dom->pgtables_seg.pfn);
+    xc_dom_unmap_one(dom, dom->p2m_seg.pfn);
+    rc = pin_table(dom->xch, pgd_type,
+                   xc_dom_p2m(dom, dom->pgtables_seg.pfn),
+                   dom->guest_domid);
+    if ( rc != 0 )
+    {
+        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                     "%s: pin_table failed (pfn 0x%" PRIpfn ", rc=%d)",
+                     __FUNCTION__, dom->pgtables_seg.pfn, rc);
+        return rc;
+    }
+    shinfo = dom->shared_info_mfn;
+
+    /* setup shared_info page */
+    DOMPRINTF("%s: shared_info: pfn 0x%" PRIpfn ", mfn 0x%" PRIpfn "",
+              __FUNCTION__, dom->shared_info_pfn, dom->shared_info_mfn);
+    shared_info = xc_map_foreign_range(dom->xch, dom->guest_domid,
+                                       PAGE_SIZE_X86,
+                                       PROT_READ | PROT_WRITE,
+                                       shinfo);
+    if ( shared_info == NULL )
+        return -1;
+    dom->arch_hooks->shared_info(dom, shared_info);
+    munmap(shared_info, PAGE_SIZE_X86);
+
+    return 0;
+}
+
+/*
+ * The memory layout of the start_info page and the modules, and where the
+ * addresses are stored:
+ *
+ * /----------------------------------\
+ * | struct hvm_start_info            |
+ * +----------------------------------+ <- start_info->modlist_paddr
+ * | struct hvm_modlist_entry[0]      |
+ * +----------------------------------+
+ * | struct hvm_modlist_entry[1]      |
+ * +----------------------------------+ <- modlist[0].cmdline_paddr
+ * | cmdline of module 0              |
+ * | char[HVMLOADER_MODULE_NAME_SIZE] |
+ * +----------------------------------+ <- modlist[1].cmdline_paddr
+ * | cmdline of module 1              |
+ * +----------------------------------+
+ */
+static void add_module_to_list(struct xc_dom_image *dom,
+                               struct xc_hvm_firmware_module *module,
+                               const char *cmdline,
+                               struct hvm_modlist_entry *modlist,
+                               struct hvm_start_info *start_info)
+{
+    uint32_t index = start_info->nr_modules;
+    void *modules_cmdline_start = modlist + HVMLOADER_MODULE_MAX_COUNT;
+    uint64_t modlist_paddr = (dom->start_info_seg.pfn << PAGE_SHIFT) +
+        ((uintptr_t)modlist - (uintptr_t)start_info);
+    uint64_t modules_cmdline_paddr = modlist_paddr +
+        sizeof(struct hvm_modlist_entry) * HVMLOADER_MODULE_MAX_COUNT;
+
+    if ( module->length == 0 )
+        return;
+
+    assert(start_info->nr_modules < HVMLOADER_MODULE_MAX_COUNT);
+
+    modlist[index].paddr = module->guest_addr_out;
+    modlist[index].size = module->length;
+
+    if ( cmdline )
+    {
+        assert(strnlen(cmdline, HVMLOADER_MODULE_CMDLINE_SIZE)
+               < HVMLOADER_MODULE_CMDLINE_SIZE);
+        strncpy(modules_cmdline_start + HVMLOADER_MODULE_CMDLINE_SIZE * index,
+                cmdline, HVMLOADER_MODULE_CMDLINE_SIZE);
+        modlist[index].cmdline_paddr = modules_cmdline_paddr +
+                                       HVMLOADER_MODULE_CMDLINE_SIZE * index;
+    }
+
+    start_info->nr_modules++;
+}
+
+static int bootlate_hvm(struct xc_dom_image *dom)
+{
+    uint32_t domid = dom->guest_domid;
+    xc_interface *xch = dom->xch;
+    struct hvm_start_info *start_info;
+    size_t modsize;
+    struct hvm_modlist_entry *modlist;
+    struct hvm_memmap_table_entry *memmap;
+    unsigned int i;
+
+    start_info = xc_map_foreign_range(xch, domid, dom->start_info_seg.pages <<
+                                                  XC_DOM_PAGE_SHIFT(dom),
+                                      PROT_READ | PROT_WRITE,
+                                      dom->start_info_seg.pfn);
+    if ( start_info == NULL )
+    {
+        DOMPRINTF("Unable to map HVM start info page");
+        return -1;
+    }
+
+    modlist = (void*)(start_info + 1) + dom->cmdline_size;
+
+    if ( !dom->device_model )
+    {
+        if ( dom->cmdline )
+        {
+            char *cmdline = (void*)(start_info + 1);
+
+            strncpy(cmdline, dom->cmdline, dom->cmdline_size);
+            start_info->cmdline_paddr = (dom->start_info_seg.pfn << PAGE_SHIFT) +
+                                ((uintptr_t)cmdline - (uintptr_t)start_info);
+        }
+
+        /* ACPI module 0 is the RSDP */
+        start_info->rsdp_paddr = dom->acpi_modules[0].guest_addr_out ? : 0;
+    }
+    else
+    {
+        add_module_to_list(dom, &dom->system_firmware_module, "firmware",
+                           modlist, start_info);
+    }
+
+    for ( i = 0; i < dom->num_modules; i++ )
+    {
+        struct xc_hvm_firmware_module mod;
+        uint64_t base = dom->parms.virt_base != UNSET_ADDR ?
+            dom->parms.virt_base : 0;
+
+        mod.guest_addr_out =
+            dom->modules[i].seg.vstart - base;
+        mod.length =
+            dom->modules[i].seg.vend - dom->modules[i].seg.vstart;
+
+        DOMPRINTF("Adding module %u guest_addr %"PRIx64" len %u",
+                  i, mod.guest_addr_out, mod.length);
+
+        add_module_to_list(dom, &mod, dom->modules[i].cmdline,
+                           modlist, start_info);
+    }
+
+    if ( start_info->nr_modules )
+    {
+        start_info->modlist_paddr = (dom->start_info_seg.pfn << PAGE_SHIFT) +
+                            ((uintptr_t)modlist - (uintptr_t)start_info);
+    }
+
+    /*
+     * Check a couple of XEN_HVM_MEMMAP_TYPEs to verify consistency with
+     * their corresponding e820 numerical values.
+     */
+    BUILD_BUG_ON(XEN_HVM_MEMMAP_TYPE_RAM != E820_RAM);
+    BUILD_BUG_ON(XEN_HVM_MEMMAP_TYPE_ACPI != E820_ACPI);
+
+    modsize = HVMLOADER_MODULE_MAX_COUNT *
+        (sizeof(*modlist) + HVMLOADER_MODULE_CMDLINE_SIZE);
+    memmap = (void*)modlist + modsize;
+
+    start_info->memmap_paddr = (dom->start_info_seg.pfn << PAGE_SHIFT) +
+        ((uintptr_t)modlist - (uintptr_t)start_info) + modsize;
+    start_info->memmap_entries = dom->e820_entries;
+    for ( i = 0; i < dom->e820_entries; i++ )
+    {
+        memmap[i].addr = dom->e820[i].addr;
+        memmap[i].size = dom->e820[i].size;
+        memmap[i].type = dom->e820[i].type;
+    }
+
+    start_info->magic = XEN_HVM_START_MAGIC_VALUE;
+    start_info->version = 1;
+
+    munmap(start_info, dom->start_info_seg.pages << XC_DOM_PAGE_SHIFT(dom));
+
+    if ( dom->device_model )
+    {
+        void *hvm_info_page;
+
+        if ( (hvm_info_page = xc_map_foreign_range(
+                  xch, domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
+                  HVM_INFO_PFN)) == NULL )
+            return -1;
+        build_hvm_info(hvm_info_page, dom);
+        munmap(hvm_info_page, PAGE_SIZE);
+    }
+
+    return 0;
+}
+
+bool xc_dom_translated(const struct xc_dom_image *dom)
+{
+    /* HVM guests are translated.  PV guests are not. */
+    return dom->container_type == XC_DOM_HVM_CONTAINER;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct xc_dom_arch xc_dom_32_pae = {
+    .guest_type = "xen-3.0-x86_32p",
+    .native_protocol = XEN_IO_PROTO_ABI_X86_32,
+    .page_shift = PAGE_SHIFT_X86,
+    .sizeof_pfn = 4,
+    .p2m_base_supported = 0,
+    .arch_private_size = sizeof(struct xc_dom_image_x86),
+    .alloc_magic_pages = alloc_magic_pages_pv,
+    .alloc_pgtables = alloc_pgtables_x86_32_pae,
+    .alloc_p2m_list = alloc_p2m_list_x86_32,
+    .setup_pgtables = setup_pgtables_x86_32_pae,
+    .start_info = start_info_x86_32,
+    .shared_info = shared_info_x86_32,
+    .vcpu = vcpu_x86_32,
+    .meminit = meminit_pv,
+    .bootearly = bootearly,
+    .bootlate = bootlate_pv,
+};
+
+static struct xc_dom_arch xc_dom_64 = {
+    .guest_type = "xen-3.0-x86_64",
+    .native_protocol = XEN_IO_PROTO_ABI_X86_64,
+    .page_shift = PAGE_SHIFT_X86,
+    .sizeof_pfn = 8,
+    .p2m_base_supported = 1,
+    .arch_private_size = sizeof(struct xc_dom_image_x86),
+    .alloc_magic_pages = alloc_magic_pages_pv,
+    .alloc_pgtables = alloc_pgtables_x86_64,
+    .alloc_p2m_list = alloc_p2m_list_x86_64,
+    .setup_pgtables = setup_pgtables_x86_64,
+    .start_info = start_info_x86_64,
+    .shared_info = shared_info_x86_64,
+    .vcpu = vcpu_x86_64,
+    .meminit = meminit_pv,
+    .bootearly = bootearly,
+    .bootlate = bootlate_pv,
+};
+
+static struct xc_dom_arch xc_hvm_32 = {
+    .guest_type = "hvm-3.0-x86_32",
+    .native_protocol = XEN_IO_PROTO_ABI_X86_32,
+    .page_shift = PAGE_SHIFT_X86,
+    .sizeof_pfn = 4,
+    .alloc_magic_pages = alloc_magic_pages_hvm,
+    .vcpu = vcpu_hvm,
+    .meminit = meminit_hvm,
+    .bootearly = bootearly,
+    .bootlate = bootlate_hvm,
+};
+
+static void __init register_arch_hooks(void)
+{
+    xc_dom_register_arch_hooks(&xc_dom_32_pae);
+    xc_dom_register_arch_hooks(&xc_dom_64);
+    xc_dom_register_arch_hooks(&xc_hvm_32);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_domain.c b/tools/libs/guest/xg_domain.c
new file mode 100644 (file)
index 0000000..58713cd
--- /dev/null
@@ -0,0 +1,149 @@
+/******************************************************************************
+ * xg_domain.c
+ *
+ * API for manipulating and obtaining information on domains.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ */
+
+#include "xg_private.h"
+#include "xc_core.h"
+
+int xc_unmap_domain_meminfo(xc_interface *xch, struct xc_domain_meminfo *minfo)
+{
+    struct domain_info_context _di = { .guest_width = minfo->guest_width,
+                                       .p2m_size = minfo->p2m_size};
+    struct domain_info_context *dinfo = &_di;
+
+    free(minfo->pfn_type);
+    if ( minfo->p2m_table )
+        munmap(minfo->p2m_table, P2M_FL_ENTRIES * PAGE_SIZE);
+    minfo->p2m_table = NULL;
+
+    return 0;
+}
+
+int xc_map_domain_meminfo(xc_interface *xch, uint32_t domid,
+                          struct xc_domain_meminfo *minfo)
+{
+    struct domain_info_context _di;
+    struct domain_info_context *dinfo = &_di;
+
+    xc_dominfo_t info;
+    shared_info_any_t *live_shinfo;
+    xen_capabilities_info_t xen_caps = "";
+    int i;
+
+    /* Only be initialized once */
+    if ( minfo->pfn_type || minfo->p2m_table )
+    {
+        errno = EINVAL;
+        return -1;
+    }
+
+    if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 )
+    {
+        PERROR("Could not get domain info");
+        return -1;
+    }
+
+    if ( xc_domain_get_guest_width(xch, domid, &minfo->guest_width) )
+    {
+        PERROR("Could not get domain address size");
+        return -1;
+    }
+    _di.guest_width = minfo->guest_width;
+
+    /* Get page table levels (see get_platform_info() in xg_save_restore.h */
+    if ( xc_version(xch, XENVER_capabilities, &xen_caps) )
+    {
+        PERROR("Could not get Xen capabilities (for page table levels)");
+        return -1;
+    }
+    if ( strstr(xen_caps, "xen-3.0-x86_64") )
+        /* Depends on whether it's a compat 32-on-64 guest */
+        minfo->pt_levels = ( (minfo->guest_width == 8) ? 4 : 3 );
+    else if ( strstr(xen_caps, "xen-3.0-x86_32p") )
+        minfo->pt_levels = 3;
+    else if ( strstr(xen_caps, "xen-3.0-x86_32") )
+        minfo->pt_levels = 2;
+    else
+    {
+        errno = EFAULT;
+        return -1;
+    }
+
+    /* We need the shared info page for mapping the P2M */
+    live_shinfo = xc_map_foreign_range(xch, domid, PAGE_SIZE, PROT_READ,
+                                       info.shared_info_frame);
+    if ( !live_shinfo )
+    {
+        PERROR("Could not map the shared info frame (MFN 0x%lx)",
+               info.shared_info_frame);
+        return -1;
+    }
+
+    if ( xc_core_arch_map_p2m_writable(xch, minfo->guest_width, &info,
+                                       live_shinfo, &minfo->p2m_table,
+                                       &minfo->p2m_size) )
+    {
+        PERROR("Could not map the P2M table");
+        munmap(live_shinfo, PAGE_SIZE);
+        return -1;
+    }
+    munmap(live_shinfo, PAGE_SIZE);
+    _di.p2m_size = minfo->p2m_size;
+
+    /* Make space and prepare for getting the PFN types */
+    minfo->pfn_type = calloc(sizeof(*minfo->pfn_type), minfo->p2m_size);
+    if ( !minfo->pfn_type )
+    {
+        PERROR("Could not allocate memory for the PFN types");
+        goto failed;
+    }
+    for ( i = 0; i < minfo->p2m_size; i++ )
+        minfo->pfn_type[i] = xc_pfn_to_mfn(i, minfo->p2m_table,
+                                           minfo->guest_width);
+
+    /* Retrieve PFN types in batches */
+    for ( i = 0; i < minfo->p2m_size ; i+=1024 )
+    {
+        int count = ((minfo->p2m_size - i ) > 1024 ) ?
+                        1024: (minfo->p2m_size - i);
+
+        if ( xc_get_pfn_type_batch(xch, domid, count, minfo->pfn_type + i) )
+        {
+            PERROR("Could not get %d-eth batch of PFN types", (i+1)/1024);
+            goto failed;
+        }
+    }
+
+    return 0;
+
+failed:
+    if ( minfo->pfn_type )
+    {
+        free(minfo->pfn_type);
+        minfo->pfn_type = NULL;
+    }
+    if ( minfo->p2m_table )
+    {
+        munmap(minfo->p2m_table, P2M_FL_ENTRIES * PAGE_SIZE);
+        minfo->p2m_table = NULL;
+    }
+
+    return -1;
+}
diff --git a/tools/libs/guest/xg_nomigrate.c b/tools/libs/guest/xg_nomigrate.c
new file mode 100644 (file)
index 0000000..6795c62
--- /dev/null
@@ -0,0 +1,50 @@
+/******************************************************************************
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2011, Citrix Systems
+ */
+
+#include <inttypes.h>
+#include <errno.h>
+#include <xenctrl.h>
+#include <xenguest.h>
+
+int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t flags,
+                   struct save_callbacks *callbacks,
+                   xc_stream_type_t stream_type, int recv_fd)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
+                      unsigned int store_evtchn, unsigned long *store_mfn,
+                      uint32_t store_domid, unsigned int console_evtchn,
+                      unsigned long *console_mfn, uint32_t console_domid,
+                      xc_stream_type_t stream_type,
+                      struct restore_callbacks *callbacks, int send_back_fd)
+{
+    errno = ENOSYS;
+    return -1;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_offline_page.c b/tools/libs/guest/xg_offline_page.c
new file mode 100644 (file)
index 0000000..77e8889
--- /dev/null
@@ -0,0 +1,708 @@
+/******************************************************************************
+ * xc_offline_page.c
+ *
+ * Helper functions to offline/online one page
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <xc_core.h>
+
+#include "xc_private.h"
+#include "xenctrl_dom.h"
+#include "xg_private.h"
+#include "xg_save_restore.h"
+
+struct pte_backup_entry
+{
+    xen_pfn_t table_mfn;
+    int offset;
+};
+
+#define DEFAULT_BACKUP_COUNT 1024
+struct pte_backup
+{
+    struct pte_backup_entry *entries;
+    int max;
+    int cur;
+};
+
+static struct domain_info_context _dinfo;
+static struct domain_info_context *dinfo = &_dinfo;
+
+int xc_mark_page_online(xc_interface *xch, unsigned long start,
+                        unsigned long end, uint32_t *status)
+{
+    DECLARE_SYSCTL;
+    DECLARE_HYPERCALL_BOUNCE(status, sizeof(uint32_t)*(end - start + 1), XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
+    int ret = -1;
+
+    if ( !status || (end < start) )
+    {
+        errno = EINVAL;
+        return -1;
+    }
+    if ( xc_hypercall_bounce_pre(xch, status) )
+    {
+        ERROR("Could not bounce memory for xc_mark_page_online\n");
+        return -1;
+    }
+
+    sysctl.cmd = XEN_SYSCTL_page_offline_op;
+    sysctl.u.page_offline.start = start;
+    sysctl.u.page_offline.cmd = sysctl_page_online;
+    sysctl.u.page_offline.end = end;
+    set_xen_guest_handle(sysctl.u.page_offline.status, status);
+    ret = xc_sysctl(xch, &sysctl);
+
+    xc_hypercall_bounce_post(xch, status);
+
+    return ret;
+}
+
+int xc_mark_page_offline(xc_interface *xch, unsigned long start,
+                          unsigned long end, uint32_t *status)
+{
+    DECLARE_SYSCTL;
+    DECLARE_HYPERCALL_BOUNCE(status, sizeof(uint32_t)*(end - start + 1), XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
+    int ret = -1;
+
+    if ( !status || (end < start) )
+    {
+        errno = EINVAL;
+        return -1;
+    }
+    if ( xc_hypercall_bounce_pre(xch, status) )
+    {
+        ERROR("Could not bounce memory for xc_mark_page_offline");
+        return -1;
+    }
+
+    sysctl.cmd = XEN_SYSCTL_page_offline_op;
+    sysctl.u.page_offline.start = start;
+    sysctl.u.page_offline.cmd = sysctl_page_offline;
+    sysctl.u.page_offline.end = end;
+    set_xen_guest_handle(sysctl.u.page_offline.status, status);
+    ret = xc_sysctl(xch, &sysctl);
+
+    xc_hypercall_bounce_post(xch, status);
+
+    return ret;
+}
+
+int xc_query_page_offline_status(xc_interface *xch, unsigned long start,
+                                 unsigned long end, uint32_t *status)
+{
+    DECLARE_SYSCTL;
+    DECLARE_HYPERCALL_BOUNCE(status, sizeof(uint32_t)*(end - start + 1), XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
+    int ret = -1;
+
+    if ( !status || (end < start) )
+    {
+        errno = EINVAL;
+        return -1;
+    }
+    if ( xc_hypercall_bounce_pre(xch, status) )
+    {
+        ERROR("Could not bounce memory for xc_query_page_offline_status\n");
+        return -1;
+    }
+
+    sysctl.cmd = XEN_SYSCTL_page_offline_op;
+    sysctl.u.page_offline.start = start;
+    sysctl.u.page_offline.cmd = sysctl_query_page_offline;
+    sysctl.u.page_offline.end = end;
+    set_xen_guest_handle(sysctl.u.page_offline.status, status);
+    ret = xc_sysctl(xch, &sysctl);
+
+    xc_hypercall_bounce_post(xch, status);
+
+    return ret;
+}
+
+ /*
+  * There should no update to the grant when domain paused
+  */
+static int xc_is_page_granted_v1(xc_interface *xch, xen_pfn_t gpfn,
+                                 grant_entry_v1_t *gnttab, int gnt_num)
+{
+    int i = 0;
+
+    if (!gnttab)
+        return 0;
+
+    for (i = 0; i < gnt_num; i++)
+        if ( ((gnttab[i].flags & GTF_type_mask) !=  GTF_invalid) &&
+             (gnttab[i].frame == gpfn) )
+             break;
+
+   return (i != gnt_num);
+}
+
+static int xc_is_page_granted_v2(xc_interface *xch, xen_pfn_t gpfn,
+                                 grant_entry_v2_t *gnttab, int gnt_num)
+{
+    int i = 0;
+
+    if (!gnttab)
+        return 0;
+
+    for (i = 0; i < gnt_num; i++)
+        if ( ((gnttab[i].hdr.flags & GTF_type_mask) !=  GTF_invalid) &&
+             (gnttab[i].full_page.frame == gpfn) )
+             break;
+
+   return (i != gnt_num);
+}
+
+static int backup_ptes(xen_pfn_t table_mfn, int offset,
+                       struct pte_backup *backup)
+{
+    if (!backup)
+        return -EINVAL;
+
+    if (backup->max == backup->cur)
+    {
+        backup->entries = realloc(backup->entries,
+                            backup->max * 2 * sizeof(struct pte_backup_entry));
+        if (backup->entries == NULL)
+            return -1;
+        else
+            backup->max *= 2;
+    }
+
+    backup->entries[backup->cur].table_mfn = table_mfn;
+    backup->entries[backup->cur++].offset = offset;
+
+    return 0;
+}
+
+/*
+ * return:
+ * 1 when MMU update is required
+ * 0 when no changes
+ * <0 when error happen
+ */
+typedef int (*pte_func)(xc_interface *xch,
+                       uint64_t pte, uint64_t *new_pte,
+                       unsigned long table_mfn, int table_offset,
+                       struct pte_backup *backup,
+                       unsigned long no_use);
+
+static int __clear_pte(xc_interface *xch,
+                       uint64_t pte, uint64_t *new_pte,
+                       unsigned long table_mfn, int table_offset,
+                       struct pte_backup *backup,
+                       unsigned long mfn)
+{
+    /* If no new_pte pointer, same as no changes needed */
+    if (!new_pte || !backup)
+        return -EINVAL;
+
+    if ( !(pte & _PAGE_PRESENT))
+        return 0;
+
+    /* XXX Check for PSE bit here */
+    /* Hit one entry */
+    if ( ((pte >> PAGE_SHIFT_X86) & MFN_MASK_X86) == mfn)
+    {
+        *new_pte = pte & ~_PAGE_PRESENT;
+        if (!backup_ptes(table_mfn, table_offset, backup))
+            return 1;
+    }
+
+    return 0;
+}
+
+static int __update_pte(xc_interface *xch,
+                      uint64_t pte, uint64_t *new_pte,
+                      unsigned long table_mfn, int table_offset,
+                      struct pte_backup *backup,
+                      unsigned long new_mfn)
+{
+    int index;
+
+    if (!new_pte)
+        return 0;
+
+    for (index = 0; index < backup->cur; index ++)
+        if ( (backup->entries[index].table_mfn == table_mfn) &&
+             (backup->entries[index].offset == table_offset) )
+            break;
+
+    if (index != backup->cur)
+    {
+        if (pte & _PAGE_PRESENT)
+            ERROR("Page present while in backup ptes\n");
+        pte &= ~MFN_MASK_X86;
+        pte |= (new_mfn << PAGE_SHIFT_X86) | _PAGE_PRESENT;
+        *new_pte = pte;
+        return 1;
+    }
+
+    return 0;
+}
+
+static int change_pte(xc_interface *xch, uint32_t domid,
+                     struct xc_domain_meminfo *minfo,
+                     struct pte_backup *backup,
+                     struct xc_mmu *mmu,
+                     pte_func func,
+                     unsigned long data)
+{
+    int pte_num, rc;
+    uint64_t i;
+    void *content = NULL;
+
+    pte_num = PAGE_SIZE / ((minfo->pt_levels == 2) ? 4 : 8);
+
+    for (i = 0; i < minfo->p2m_size; i++)
+    {
+        xen_pfn_t table_mfn = xc_pfn_to_mfn(i, minfo->p2m_table,
+                                            minfo->guest_width);
+        uint64_t pte, new_pte;
+        int j;
+
+        if ( (table_mfn == INVALID_PFN) ||
+             ((minfo->pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
+              XEN_DOMCTL_PFINFO_XTAB) )
+            continue;
+
+        if ( minfo->pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        {
+            content = xc_map_foreign_range(xch, domid, PAGE_SIZE,
+                                            PROT_READ, table_mfn);
+            if (!content)
+                goto failed;
+
+            for (j = 0; j < pte_num; j++)
+            {
+                if ( minfo->pt_levels == 2 )
+                    pte = ((const uint32_t*)content)[j];
+                else
+                    pte = ((const uint64_t*)content)[j];
+
+                rc = func(xch, pte, &new_pte, table_mfn, j, backup, data);
+
+                switch (rc)
+                {
+                    case 1:
+                    if ( xc_add_mmu_update(xch, mmu,
+                          table_mfn << PAGE_SHIFT |
+                          j * ( (minfo->pt_levels == 2) ?
+                              sizeof(uint32_t): sizeof(uint64_t)) |
+                          MMU_PT_UPDATE_PRESERVE_AD,
+                          new_pte) )
+                        goto failed;
+                    break;
+
+                    case 0:
+                    break;
+
+                    default:
+                    goto failed;
+                }
+            }
+
+            munmap(content, PAGE_SIZE);
+            content = NULL;
+        }
+    }
+
+    if ( xc_flush_mmu_updates(xch, mmu) )
+        goto failed;
+
+    return 0;
+failed:
+    /* XXX Shall we take action if we have fail to swap? */
+    if (content)
+        munmap(content, PAGE_SIZE);
+
+    return -1;
+}
+
+static int update_pte(xc_interface *xch, uint32_t domid,
+                     struct xc_domain_meminfo *minfo,
+                     struct pte_backup *backup,
+                     struct xc_mmu *mmu,
+                     unsigned long new_mfn)
+{
+    return change_pte(xch, domid,  minfo, backup, mmu,
+                      __update_pte, new_mfn);
+}
+
+static int clear_pte(xc_interface *xch, uint32_t domid,
+                     struct xc_domain_meminfo *minfo,
+                     struct pte_backup *backup,
+                     struct xc_mmu *mmu,
+                     xen_pfn_t mfn)
+{
+    return change_pte(xch, domid, minfo, backup, mmu,
+                      __clear_pte, mfn);
+}
+
+/*
+ * Check if a page can be exchanged successfully
+ */
+
+static int is_page_exchangable(xc_interface *xch, uint32_t domid, xen_pfn_t mfn,
+                               xc_dominfo_t *info)
+{
+    uint32_t status;
+    int rc;
+
+    /* domain checking */
+    if ( !domid || (domid > DOMID_FIRST_RESERVED) )
+    {
+        DPRINTF("Dom0's page can't be LM");
+        return 0;
+    }
+    if (info->hvm)
+    {
+        DPRINTF("Currently we can only live change PV guest's page\n");
+        return 0;
+    }
+
+    /* Check if pages are offline pending or not */
+    rc = xc_query_page_offline_status(xch, mfn, mfn, &status);
+
+    if ( rc || !(status & PG_OFFLINE_STATUS_OFFLINE_PENDING) )
+    {
+        ERROR("Page %lx is not offline pending %x\n",
+          mfn, status);
+        return 0;
+    }
+
+    return 1;
+}
+
+xen_pfn_t *xc_map_m2p(xc_interface *xch,
+                      unsigned long max_mfn,
+                      int prot,
+                      unsigned long *mfn0)
+{
+    privcmd_mmap_entry_t *entries;
+    unsigned long m2p_chunks, m2p_size;
+    xen_pfn_t *m2p;
+    xen_pfn_t *extent_start;
+    int i;
+
+    m2p = NULL;
+    m2p_size   = M2P_SIZE(max_mfn);
+    m2p_chunks = M2P_CHUNKS(max_mfn);
+
+    extent_start = calloc(m2p_chunks, sizeof(xen_pfn_t));
+    if ( !extent_start )
+    {
+        ERROR("failed to allocate space for m2p mfns");
+        goto err0;
+    }
+
+    if ( xc_machphys_mfn_list(xch, m2p_chunks, extent_start) )
+    {
+        PERROR("xc_get_m2p_mfns");
+        goto err1;
+    }
+
+    entries = calloc(m2p_chunks, sizeof(privcmd_mmap_entry_t));
+    if (entries == NULL)
+    {
+        ERROR("failed to allocate space for mmap entries");
+        goto err1;
+    }
+
+    for ( i = 0; i < m2p_chunks; i++ )
+        entries[i].mfn = extent_start[i];
+
+    m2p = xc_map_foreign_ranges(xch, DOMID_XEN,
+                       m2p_size, prot, M2P_CHUNK_SIZE,
+                       entries, m2p_chunks);
+    if (m2p == NULL)
+    {
+        PERROR("xc_mmap_foreign_ranges failed");
+        goto err2;
+    }
+
+    if (mfn0)
+        *mfn0 = entries[0].mfn;
+
+err2:
+    free(entries);
+err1:
+    free(extent_start);
+
+err0:
+    return m2p;
+}
+
+/* The domain should be suspended when called here */
+int xc_exchange_page(xc_interface *xch, uint32_t domid, xen_pfn_t mfn)
+{
+    xc_dominfo_t info;
+    struct xc_domain_meminfo minfo;
+    struct xc_mmu *mmu = NULL;
+    struct pte_backup old_ptes = {NULL, 0, 0};
+    grant_entry_v1_t *gnttab_v1 = NULL;
+    grant_entry_v2_t *gnttab_v2 = NULL;
+    struct mmuext_op mops;
+    int gnt_num, unpined = 0;
+    void *old_p, *backup = NULL;
+    int rc, result = -1;
+    uint32_t status;
+    xen_pfn_t new_mfn, gpfn;
+    xen_pfn_t *m2p_table;
+    unsigned long max_mfn;
+
+    if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 )
+    {
+        ERROR("Could not get domain info");
+        return -1;
+    }
+
+    if (!info.shutdown || info.shutdown_reason != SHUTDOWN_suspend)
+    {
+        errno = EINVAL;
+        ERROR("Can't exchange page unless domain is suspended\n");
+        return -1;
+    }
+    if (!is_page_exchangable(xch, domid, mfn, &info))
+    {
+        ERROR("Could not exchange page\n");
+        return -1;
+    }
+
+    /* Map M2P and obtain gpfn */
+    rc = xc_maximum_ram_page(xch, &max_mfn);
+    if ( rc || !(m2p_table = xc_map_m2p(xch, max_mfn, PROT_READ, NULL)) )
+    {
+        PERROR("Failed to map live M2P table");
+        return -1;
+    }
+    gpfn = m2p_table[mfn];
+
+    /* Map domain's memory information */
+    memset(&minfo, 0, sizeof(minfo));
+    if ( xc_map_domain_meminfo(xch, domid, &minfo) )
+    {
+        PERROR("Could not map domain's memory information\n");
+        goto failed;
+    }
+
+    /* For translation macros */
+    dinfo->guest_width = minfo.guest_width;
+    dinfo->p2m_size = minfo.p2m_size;
+
+    /* Don't exchange CR3 for PAE guest in PAE host environment */
+    if (minfo.guest_width > sizeof(long))
+    {
+        if ( (minfo.pfn_type[gpfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
+                    XEN_DOMCTL_PFINFO_L3TAB )
+            goto failed;
+    }
+
+    gnttab_v2 = xc_gnttab_map_table_v2(xch, domid, &gnt_num);
+    if (!gnttab_v2)
+    {
+        gnttab_v1 = xc_gnttab_map_table_v1(xch, domid, &gnt_num);
+        if (!gnttab_v1)
+        {
+            ERROR("Failed to map grant table\n");
+            goto failed;
+        }
+    }
+
+    if (gnttab_v1
+        ? xc_is_page_granted_v1(xch, mfn, gnttab_v1, gnt_num)
+        : xc_is_page_granted_v2(xch, mfn, gnttab_v2, gnt_num))
+    {
+        ERROR("Page %lx is granted now\n", mfn);
+        goto failed;
+    }
+
+    /* allocate required data structure */
+    backup = malloc(PAGE_SIZE);
+    if (!backup)
+    {
+        ERROR("Failed to allocate backup pages pointer\n");
+        goto failed;
+    }
+
+    old_ptes.max = DEFAULT_BACKUP_COUNT;
+    old_ptes.entries = malloc(sizeof(struct pte_backup_entry) *
+                              DEFAULT_BACKUP_COUNT);
+
+    if (!old_ptes.entries)
+    {
+        ERROR("Faield to allocate backup\n");
+        goto failed;
+    }
+    old_ptes.cur = 0;
+
+    /* Unpin the page if it is pined */
+    if (minfo.pfn_type[gpfn] & XEN_DOMCTL_PFINFO_LPINTAB)
+    {
+        mops.cmd = MMUEXT_UNPIN_TABLE;
+        mops.arg1.mfn = mfn;
+
+        if ( xc_mmuext_op(xch, &mops, 1, domid) < 0 )
+        {
+            ERROR("Failed to unpin page %lx", mfn);
+            goto failed;
+        }
+        mops.arg1.mfn = mfn;
+        unpined = 1;
+    }
+
+    /* backup the content */
+    old_p = xc_map_foreign_range(xch, domid, PAGE_SIZE,
+      PROT_READ, mfn);
+    if (!old_p)
+    {
+        ERROR("Failed to map foreign page %lx\n", mfn);
+        goto failed;
+    }
+
+    memcpy(backup, old_p, PAGE_SIZE);
+    munmap(old_p, PAGE_SIZE);
+
+    mmu = xc_alloc_mmu_updates(xch, domid);
+    if ( mmu == NULL )
+    {
+        ERROR("%s: failed at %d\n", __FUNCTION__, __LINE__);
+        goto failed;
+    }
+
+    /* Firstly update all pte to be invalid to remove the reference */
+    rc = clear_pte(xch, domid,  &minfo, &old_ptes, mmu, mfn);
+
+    if (rc)
+    {
+        ERROR("clear pte failed\n");
+        goto failed;
+    }
+
+    rc = xc_domain_memory_exchange_pages(xch, domid,
+                                        1, 0, &mfn,
+                                        1, 0, &new_mfn);
+
+    if (rc)
+    {
+        ERROR("Exchange the page failed\n");
+        /* Exchange fail means there are refere to the page still */
+        rc = update_pte(xch, domid, &minfo, &old_ptes, mmu, mfn);
+        if (rc)
+            result = -2;
+        goto failed;
+    }
+
+    rc = update_pte(xch, domid, &minfo, &old_ptes, mmu, new_mfn);
+
+    if (rc)
+    {
+        ERROR("update pte failed guest may be broken now\n");
+        /* No recover action now for swap fail */
+        result = -2;
+        goto failed;
+    }
+
+    /* Check if pages are offlined already */
+    rc = xc_query_page_offline_status(xch, mfn, mfn,
+                            &status);
+
+    if (rc)
+    {
+        ERROR("Fail to query offline status\n");
+    }else if ( !(status & PG_OFFLINE_STATUS_OFFLINED) )
+    {
+        ERROR("page is still online or pending\n");
+        goto failed;
+    }
+    else
+    {
+        void *new_p;
+        IPRINTF("Now page is offlined %lx\n", mfn);
+        /* Update the p2m table */
+        minfo.p2m_table[gpfn] = new_mfn;
+
+        new_p = xc_map_foreign_range(xch, domid, PAGE_SIZE,
+                                     PROT_READ|PROT_WRITE, new_mfn);
+        if ( new_p == NULL )
+        {
+            ERROR("failed to map new_p for copy, guest may be broken?");
+            goto failed;
+        }
+        memcpy(new_p, backup, PAGE_SIZE);
+        munmap(new_p, PAGE_SIZE);
+        mops.arg1.mfn = new_mfn;
+        result = 0;
+    }
+
+failed:
+
+    if (unpined && (minfo.pfn_type[mfn] & XEN_DOMCTL_PFINFO_LPINTAB))
+    {
+        switch ( minfo.pfn_type[mfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
+        {
+            case XEN_DOMCTL_PFINFO_L1TAB:
+                mops.cmd = MMUEXT_PIN_L1_TABLE;
+                break;
+
+            case XEN_DOMCTL_PFINFO_L2TAB:
+                mops.cmd = MMUEXT_PIN_L2_TABLE;
+                break;
+
+            case XEN_DOMCTL_PFINFO_L3TAB:
+                mops.cmd = MMUEXT_PIN_L3_TABLE;
+                break;
+
+            case XEN_DOMCTL_PFINFO_L4TAB:
+                mops.cmd = MMUEXT_PIN_L4_TABLE;
+                break;
+
+            default:
+                ERROR("Unpined for non pate table page\n");
+                break;
+        }
+
+        if ( xc_mmuext_op(xch, &mops, 1, domid) < 0 )
+        {
+            ERROR("failed to pin the mfn again\n");
+            result = -2;
+        }
+    }
+
+    free(mmu);
+
+    free(old_ptes.entries);
+
+    free(backup);
+
+    if (gnttab_v1)
+        munmap(gnttab_v1, gnt_num / (PAGE_SIZE/sizeof(grant_entry_v1_t)));
+    if (gnttab_v2)
+        munmap(gnttab_v2, gnt_num / (PAGE_SIZE/sizeof(grant_entry_v2_t)));
+
+    xc_unmap_domain_meminfo(xch, &minfo);
+    munmap(m2p_table, M2P_SIZE(max_mfn));
+
+    return result;
+}
diff --git a/tools/libs/guest/xg_private.c b/tools/libs/guest/xg_private.c
new file mode 100644 (file)
index 0000000..2073dba
--- /dev/null
@@ -0,0 +1,198 @@
+/******************************************************************************
+ * xg_private.c
+ *
+ * Helper functions for the rest of the library.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <zlib.h>
+
+#include "xg_private.h"
+
+char *xc_read_image(xc_interface *xch,
+                    const char *filename, unsigned long *size)
+{
+    int kernel_fd = -1;
+    gzFile kernel_gfd = NULL;
+    char *image = NULL, *tmp;
+    unsigned int bytes;
+
+    if ( (filename == NULL) || (size == NULL) )
+        return NULL;
+
+    if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
+    {
+        PERROR("Could not open kernel image '%s'", filename);
+        goto out;
+    }
+
+    if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
+    {
+        PERROR("Could not allocate decompression state for state file");
+        goto out;
+    }
+
+    *size = 0;
+
+#define CHUNK 1*1024*1024
+    while(1)
+    {
+        if ( (tmp = realloc(image, *size + CHUNK)) == NULL )
+        {
+            PERROR("Could not allocate memory for kernel image");
+            free(image);
+            image = NULL;
+            goto out;
+        }
+        image = tmp;
+
+        bytes = gzread(kernel_gfd, image + *size, CHUNK);
+        switch (bytes)
+        {
+        case -1:
+            PERROR("Error reading kernel image");
+            free(image);
+            image = NULL;
+            goto out;
+        case 0: /* EOF */
+            if ( *size == 0 )
+            {
+                PERROR("Could not read kernel image");
+                free(image);
+                image = NULL;
+            }
+            goto out;
+        default:
+            *size += bytes;
+            break;
+        }
+    }
+#undef CHUNK
+
+ out:
+    if ( image )
+    {
+        /* Shrink allocation to fit image. */
+        tmp = realloc(image, *size);
+        if ( tmp )
+            image = tmp;
+    }
+
+    if ( kernel_gfd != NULL )
+        gzclose(kernel_gfd);
+    else if ( kernel_fd >= 0 )
+        close(kernel_fd);
+    return image;
+}
+
+char *xc_inflate_buffer(xc_interface *xch,
+                        const char *in_buf, unsigned long in_size,
+                        unsigned long *out_size)
+{
+    int           sts;
+    z_stream      zStream;
+    unsigned long out_len;
+    char         *out_buf;
+
+    /* Not compressed? Then return the original buffer. */
+    if ( ((unsigned char)in_buf[0] != 0x1F) ||
+         ((unsigned char)in_buf[1] != 0x8B) )
+    {
+        if ( out_size != NULL )
+            *out_size = in_size;
+        return (char *)in_buf;
+    }
+
+    out_len = (unsigned char)in_buf[in_size-4] +
+        (256 * ((unsigned char)in_buf[in_size-3] +
+                (256 * ((unsigned char)in_buf[in_size-2] +
+                        (256 * (unsigned char)in_buf[in_size-1])))));
+
+    memset(&zStream, 0, sizeof(zStream));
+    out_buf = malloc(out_len + 16);        /* Leave a little extra space */
+    if ( out_buf == NULL )
+    {
+        ERROR("Error mallocing buffer\n");
+        return NULL;
+    }
+
+    zStream.next_in = (unsigned char *)in_buf;
+    zStream.avail_in = in_size;
+    zStream.next_out = (unsigned char *)out_buf;
+    zStream.avail_out = out_len+16;
+    sts = inflateInit2(&zStream, (MAX_WBITS+32)); /* +32 means "handle gzip" */
+    if ( sts != Z_OK )
+    {
+        ERROR("inflateInit failed, sts %d\n", sts);
+        free(out_buf);
+        return NULL;
+    }
+
+    /* Inflate in one pass/call */
+    sts = inflate(&zStream, Z_FINISH);
+    inflateEnd(&zStream);
+    if ( sts != Z_STREAM_END )
+    {
+        ERROR("inflate failed, sts %d\n", sts);
+        free(out_buf);
+        return NULL;
+    }
+
+    if ( out_size != NULL )
+        *out_size = out_len;
+
+    return out_buf;
+}
+
+/*******************/
+
+int pin_table(
+    xc_interface *xch, unsigned int type, unsigned long mfn, uint32_t dom)
+{
+    struct mmuext_op op;
+
+    op.cmd = type;
+    op.arg1.mfn = mfn;
+
+    if ( xc_mmuext_op(xch, &op, 1, dom) < 0 )
+        return 1;
+
+    return 0;
+}
+
+/* This is shared between save and restore, and may generally be useful. */
+unsigned long csum_page(void *page)
+{
+    int i;
+    unsigned long *p = page;
+    unsigned long long sum=0;
+
+    for ( i = 0; i < (PAGE_SIZE/sizeof(unsigned long)); i++ )
+        sum += p[i];
+
+    return sum ^ (sum>>32);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_private.h b/tools/libs/guest/xg_private.h
new file mode 100644 (file)
index 0000000..0000b2b
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef XG_PRIVATE_H
+#define XG_PRIVATE_H
+
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "xc_private.h"
+#include "xenguest.h"
+
+#include <xen/memory.h>
+#include <xen/elfnote.h>
+
+#ifndef ELFSIZE
+#include <limits.h>
+#if UINT_MAX == ULONG_MAX
+#define ELFSIZE 32
+#else
+#define ELFSIZE 64
+#endif
+#endif
+
+char *xc_read_image(xc_interface *xch,
+                    const char *filename, unsigned long *size);
+char *xc_inflate_buffer(xc_interface *xch,
+                        const char *in_buf,
+                        unsigned long in_size,
+                        unsigned long *out_size);
+
+unsigned long csum_page (void * page);
+
+#define _PAGE_PRESENT   0x001
+#define _PAGE_RW        0x002
+#define _PAGE_USER      0x004
+#define _PAGE_PWT       0x008
+#define _PAGE_PCD       0x010
+#define _PAGE_ACCESSED  0x020
+#define _PAGE_DIRTY     0x040
+#define _PAGE_PAT       0x080
+#define _PAGE_PSE       0x080
+#define _PAGE_GLOBAL    0x100
+
+#define VIRT_BITS_I386     32
+#define VIRT_BITS_X86_64   48
+
+#define PGTBL_LEVELS_I386       3
+#define PGTBL_LEVELS_X86_64     4
+
+#define PGTBL_LEVEL_SHIFT_X86   9
+
+#define L1_PAGETABLE_SHIFT_PAE        12
+#define L2_PAGETABLE_SHIFT_PAE        21
+#define L3_PAGETABLE_SHIFT_PAE        30
+#define L1_PAGETABLE_ENTRIES_PAE     512
+#define L2_PAGETABLE_ENTRIES_PAE     512
+#define L3_PAGETABLE_ENTRIES_PAE       4
+
+#define L1_PAGETABLE_SHIFT_X86_64     12
+#define L2_PAGETABLE_SHIFT_X86_64     21
+#define L3_PAGETABLE_SHIFT_X86_64     30
+#define L4_PAGETABLE_SHIFT_X86_64     39
+#define L1_PAGETABLE_ENTRIES_X86_64  512
+#define L2_PAGETABLE_ENTRIES_X86_64  512
+#define L3_PAGETABLE_ENTRIES_X86_64  512
+#define L4_PAGETABLE_ENTRIES_X86_64  512
+
+typedef uint64_t x86_pgentry_t;
+
+#define PAGE_SHIFT_ARM          12
+#define PAGE_SIZE_ARM           (1UL << PAGE_SHIFT_ARM)
+#define PAGE_MASK_ARM           (~(PAGE_SIZE_ARM-1))
+
+#define PAGE_SHIFT_X86          12
+#define PAGE_SIZE_X86           (1UL << PAGE_SHIFT_X86)
+#define PAGE_MASK_X86           (~(PAGE_SIZE_X86-1))
+
+#define NRPAGES(x) (ROUNDUP(x, PAGE_SHIFT) >> PAGE_SHIFT)
+
+static inline xen_pfn_t xc_pfn_to_mfn(xen_pfn_t pfn, xen_pfn_t *p2m,
+                                      unsigned gwidth)
+{
+    if ( gwidth == sizeof(uint64_t) )
+        /* 64 bit guest.  Need to truncate their pfns for 32 bit toolstacks. */
+        return ((uint64_t *)p2m)[pfn];
+    else
+    {
+        /* 32 bit guest.  Need to expand INVALID_MFN for 64 bit toolstacks. */
+        uint32_t mfn = ((uint32_t *)p2m)[pfn];
+
+        return mfn == ~0U ? INVALID_MFN : mfn;
+    }
+}
+
+
+/* Masks for PTE<->PFN conversions */
+#define MADDR_BITS_X86  ((dinfo->guest_width == 8) ? 52 : 44)
+#define MFN_MASK_X86    ((1ULL << (MADDR_BITS_X86 - PAGE_SHIFT_X86)) - 1)
+#define MADDR_MASK_X86  (MFN_MASK_X86 << PAGE_SHIFT_X86)
+
+int pin_table(xc_interface *xch, unsigned int type, unsigned long mfn,
+              uint32_t dom);
+
+#endif /* XG_PRIVATE_H */
diff --git a/tools/libs/guest/xg_save_restore.h b/tools/libs/guest/xg_save_restore.h
new file mode 100644 (file)
index 0000000..88120eb
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ * Definitions and utilities for save / restore.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "xc_private.h"
+
+#include <xen/foreign/x86_32.h>
+#include <xen/foreign/x86_64.h>
+
+/*
+** We process save/restore/migrate in batches of pages; the below
+** determines how many pages we (at maximum) deal with in each batch.
+*/
+#define MAX_BATCH_SIZE 1024   /* up to 1024 pages (4MB) at a time */
+
+/* When pinning page tables at the end of restore, we also use batching. */
+#define MAX_PIN_BATCH  1024
+
+/*
+** Determine various platform information required for save/restore, in
+** particular:
+**
+**    - the maximum MFN on this machine, used to compute the size of
+**      the M2P table;
+**
+**    - the starting virtual address of the the hypervisor; we use this
+**      to determine which parts of guest address space(s) do and don't
+**      require canonicalization during save/restore; and
+**
+**    - the number of page-table levels for save/ restore. This should
+**      be a property of the domain, but for the moment we just read it
+**      from the hypervisor.
+**
+**    - The width of a guest word (unsigned long), in bytes.
+**
+** Returns 1 on success, 0 on failure.
+*/
+static inline int get_platform_info(xc_interface *xch, uint32_t dom,
+                                    /* OUT */ unsigned long *max_mfn,
+                                    /* OUT */ unsigned long *hvirt_start,
+                                    /* OUT */ unsigned int *pt_levels,
+                                    /* OUT */ unsigned int *guest_width)
+{
+    xen_capabilities_info_t xen_caps = "";
+    xen_platform_parameters_t xen_params;
+
+    if (xc_version(xch, XENVER_platform_parameters, &xen_params) != 0)
+        return 0;
+
+    if (xc_version(xch, XENVER_capabilities, &xen_caps) != 0)
+        return 0;
+
+    if (xc_maximum_ram_page(xch, max_mfn))
+        return 0;
+
+    *hvirt_start = xen_params.virt_start;
+
+    if ( xc_domain_get_guest_width(xch, dom, guest_width) != 0)
+        return 0; 
+
+    /* 64-bit tools will see the 64-bit hvirt_start, but 32-bit guests 
+     * will be using the compat one. */
+    if ( *guest_width < sizeof (unsigned long) )
+        /* XXX need to fix up a way of extracting this value from Xen if
+         * XXX it becomes variable for domU */
+        *hvirt_start = 0xf5800000;
+
+    if (strstr(xen_caps, "xen-3.0-x86_64"))
+        /* Depends on whether it's a compat 32-on-64 guest */
+        *pt_levels = ( (*guest_width == 8) ? 4 : 3 );
+    else if (strstr(xen_caps, "xen-3.0-x86_32p"))
+        *pt_levels = 3;
+    else
+        return 0;
+
+    return 1;
+}
+
+
+/*
+** Save/restore deal with the mfn_to_pfn (M2P) and pfn_to_mfn (P2M) tables.
+** The M2P simply holds the corresponding PFN, while the top bit of a P2M
+** entry tell us whether or not the the PFN is currently mapped.
+*/
+
+#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
+
+
+/*
+** The M2P is made up of some number of 'chunks' of at least 2MB in size.
+** The below definitions and utility function(s) deal with mapping the M2P
+** regarldess of the underlying machine memory size or architecture.
+*/
+#define M2P_SHIFT       L2_PAGETABLE_SHIFT_PAE
+#define M2P_CHUNK_SIZE  (1 << M2P_SHIFT)
+#define M2P_SIZE(_m)    ROUNDUP(((_m) * sizeof(xen_pfn_t)), M2P_SHIFT)
+#define M2P_CHUNKS(_m)  (M2P_SIZE((_m)) >> M2P_SHIFT)
+
+#define UNFOLD_CR3(_c)                                                  \
+  ((uint64_t)((dinfo->guest_width == 8)                                 \
+              ? ((_c) >> 12)                                            \
+              : (((uint32_t)(_c) >> 12) | ((uint32_t)(_c) << 20))))
+
+#define FOLD_CR3(_c)                                                    \
+  ((uint64_t)((dinfo->guest_width == 8)                                 \
+              ? ((uint64_t)(_c)) << 12                                  \
+              : (((uint32_t)(_c) << 12) | ((uint32_t)(_c) >> 20))))
+
+#define MEMCPY_FIELD(_d, _s, _f, _w) do {                          \
+    if ((_w) == 8)                                                 \
+        memcpy(&(_d)->x64._f, &(_s)->x64._f,sizeof((_d)->x64._f)); \
+    else                                                           \
+        memcpy(&(_d)->x32._f, &(_s)->x32._f,sizeof((_d)->x32._f)); \
+} while (0)
+
+#define MEMSET_ARRAY_FIELD(_p, _f, _v, _w) do {                    \
+    if ((_w) == 8)                                                 \
+        memset(&(_p)->x64._f[0], (_v), sizeof((_p)->x64._f));      \
+    else                                                           \
+        memset(&(_p)->x32._f[0], (_v), sizeof((_p)->x32._f));      \
+} while (0)
diff --git a/tools/libs/guest/xg_sr_common.c b/tools/libs/guest/xg_sr_common.c
new file mode 100644 (file)
index 0000000..17567ab
--- /dev/null
@@ -0,0 +1,167 @@
+#include <assert.h>
+
+#include "xg_sr_common.h"
+
+#include <xen-tools/libs.h>
+
+static const char *const dhdr_types[] =
+{
+    [DHDR_TYPE_X86_PV]  = "x86 PV",
+    [DHDR_TYPE_X86_HVM] = "x86 HVM",
+};
+
+const char *dhdr_type_to_str(uint32_t type)
+{
+    if ( type < ARRAY_SIZE(dhdr_types) && dhdr_types[type] )
+        return dhdr_types[type];
+
+    return "Reserved";
+}
+
+static const char *const mandatory_rec_types[] =
+{
+    [REC_TYPE_END]                          = "End",
+    [REC_TYPE_PAGE_DATA]                    = "Page data",
+    [REC_TYPE_X86_PV_INFO]                  = "x86 PV info",
+    [REC_TYPE_X86_PV_P2M_FRAMES]            = "x86 PV P2M frames",
+    [REC_TYPE_X86_PV_VCPU_BASIC]            = "x86 PV vcpu basic",
+    [REC_TYPE_X86_PV_VCPU_EXTENDED]         = "x86 PV vcpu extended",
+    [REC_TYPE_X86_PV_VCPU_XSAVE]            = "x86 PV vcpu xsave",
+    [REC_TYPE_SHARED_INFO]                  = "Shared info",
+    [REC_TYPE_X86_TSC_INFO]                 = "x86 TSC info",
+    [REC_TYPE_HVM_CONTEXT]                  = "HVM context",
+    [REC_TYPE_HVM_PARAMS]                   = "HVM params",
+    [REC_TYPE_TOOLSTACK]                    = "Toolstack",
+    [REC_TYPE_X86_PV_VCPU_MSRS]             = "x86 PV vcpu msrs",
+    [REC_TYPE_VERIFY]                       = "Verify",
+    [REC_TYPE_CHECKPOINT]                   = "Checkpoint",
+    [REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST]    = "Checkpoint dirty pfn list",
+    [REC_TYPE_STATIC_DATA_END]              = "Static data end",
+    [REC_TYPE_X86_CPUID_POLICY]             = "x86 CPUID policy",
+    [REC_TYPE_X86_MSR_POLICY]               = "x86 MSR policy",
+};
+
+const char *rec_type_to_str(uint32_t type)
+{
+    if ( !(type & REC_TYPE_OPTIONAL) )
+    {
+        if ( (type < ARRAY_SIZE(mandatory_rec_types)) &&
+             (mandatory_rec_types[type]) )
+            return mandatory_rec_types[type];
+    }
+
+    return "Reserved";
+}
+
+int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec,
+                       void *buf, size_t sz)
+{
+    static const char zeroes[(1u << REC_ALIGN_ORDER) - 1] = { 0 };
+
+    xc_interface *xch = ctx->xch;
+    typeof(rec->length) combined_length = rec->length + sz;
+    size_t record_length = ROUNDUP(combined_length, REC_ALIGN_ORDER);
+    struct iovec parts[] = {
+        { &rec->type,       sizeof(rec->type) },
+        { &combined_length, sizeof(combined_length) },
+        { rec->data,        rec->length },
+        { buf,              sz },
+        { (void *)zeroes,   record_length - combined_length },
+    };
+
+    if ( record_length > REC_LENGTH_MAX )
+    {
+        ERROR("Record (0x%08x, %s) length %#zx exceeds max (%#x)", rec->type,
+              rec_type_to_str(rec->type), record_length, REC_LENGTH_MAX);
+        return -1;
+    }
+
+    if ( rec->length )
+        assert(rec->data);
+    if ( sz )
+        assert(buf);
+
+    if ( writev_exact(ctx->fd, parts, ARRAY_SIZE(parts)) )
+        goto err;
+
+    return 0;
+
+ err:
+    PERROR("Unable to write record to stream");
+    return -1;
+}
+
+int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rhdr rhdr;
+    size_t datasz;
+
+    if ( read_exact(fd, &rhdr, sizeof(rhdr)) )
+    {
+        PERROR("Failed to read Record Header from stream");
+        return -1;
+    }
+
+    if ( rhdr.length > REC_LENGTH_MAX )
+    {
+        ERROR("Record (0x%08x, %s) length %#x exceeds max (%#x)", rhdr.type,
+              rec_type_to_str(rhdr.type), rhdr.length, REC_LENGTH_MAX);
+        return -1;
+    }
+
+    datasz = ROUNDUP(rhdr.length, REC_ALIGN_ORDER);
+
+    if ( datasz )
+    {
+        rec->data = malloc(datasz);
+
+        if ( !rec->data )
+        {
+            ERROR("Unable to allocate %zu bytes for record data (0x%08x, %s)",
+                  datasz, rhdr.type, rec_type_to_str(rhdr.type));
+            return -1;
+        }
+
+        if ( read_exact(fd, rec->data, datasz) )
+        {
+            free(rec->data);
+            rec->data = NULL;
+            PERROR("Failed to read %zu bytes of data for record (0x%08x, %s)",
+                   datasz, rhdr.type, rec_type_to_str(rhdr.type));
+            return -1;
+        }
+    }
+    else
+        rec->data = NULL;
+
+    rec->type   = rhdr.type;
+    rec->length = rhdr.length;
+
+    return 0;
+};
+
+static void __attribute__((unused)) build_assertions(void)
+{
+    BUILD_BUG_ON(sizeof(struct xc_sr_ihdr) != 24);
+    BUILD_BUG_ON(sizeof(struct xc_sr_dhdr) != 16);
+    BUILD_BUG_ON(sizeof(struct xc_sr_rhdr) != 8);
+
+    BUILD_BUG_ON(sizeof(struct xc_sr_rec_page_data_header)  != 8);
+    BUILD_BUG_ON(sizeof(struct xc_sr_rec_x86_pv_info)       != 8);
+    BUILD_BUG_ON(sizeof(struct xc_sr_rec_x86_pv_p2m_frames) != 8);
+    BUILD_BUG_ON(sizeof(struct xc_sr_rec_x86_pv_vcpu_hdr)   != 8);
+    BUILD_BUG_ON(sizeof(struct xc_sr_rec_x86_tsc_info)      != 24);
+    BUILD_BUG_ON(sizeof(struct xc_sr_rec_hvm_params_entry)  != 16);
+    BUILD_BUG_ON(sizeof(struct xc_sr_rec_hvm_params)        != 8);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_common.h b/tools/libs/guest/xg_sr_common.h
new file mode 100644 (file)
index 0000000..13fcc47
--- /dev/null
@@ -0,0 +1,468 @@
+#ifndef __COMMON__H
+#define __COMMON__H
+
+#include <stdbool.h>
+
+#include "xg_private.h"
+#include "xg_save_restore.h"
+#include "xenctrl_dom.h"
+#include "xc_bitops.h"
+
+#include "xg_sr_stream_format.h"
+
+/* String representation of Domain Header types. */
+const char *dhdr_type_to_str(uint32_t type);
+
+/* String representation of Record types. */
+const char *rec_type_to_str(uint32_t type);
+
+struct xc_sr_context;
+struct xc_sr_record;
+
+/**
+ * Save operations.  To be implemented for each type of guest, for use by the
+ * common save algorithm.
+ *
+ * Every function must be implemented, even if only with a no-op stub.
+ */
+struct xc_sr_save_ops
+{
+    /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
+    xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
+
+    /**
+     * Optionally transform the contents of a page from being specific to the
+     * sending environment, to being generic for the stream.
+     *
+     * The page of data at the end of 'page' may be a read-only mapping of a
+     * running guest; it must not be modified.  If no transformation is
+     * required, the callee should leave '*pages' untouched.
+     *
+     * If a transformation is required, the callee should allocate themselves
+     * a local page using malloc() and return it via '*page'.
+     *
+     * The caller shall free() '*page' in all cases.  In the case that the
+     * callee encounters an error, it should *NOT* free() the memory it
+     * allocated for '*page'.
+     *
+     * It is valid to fail with EAGAIN if the transformation is not able to be
+     * completed at this point.  The page shall be retried later.
+     *
+     * @returns 0 for success, -1 for failure, with errno appropriately set.
+     */
+    int (*normalise_page)(struct xc_sr_context *ctx, xen_pfn_t type,
+                          void **page);
+
+    /**
+     * Set up local environment to save a domain. (Typically querying
+     * running domain state, setting up mappings etc.)
+     *
+     * This is called once before any common setup has occurred, allowing for
+     * guest-specific adjustments to be made to common state.
+     */
+    int (*setup)(struct xc_sr_context *ctx);
+
+    /**
+     * Send static records at the head of the stream.  This is called once,
+     * after the Image and Domain headers are written.
+     */
+    int (*static_data)(struct xc_sr_context *ctx);
+
+    /**
+     * Send dynamic records which need to be at the start of the stream.  This
+     * is called after the STATIC_DATA_END record is written.
+     */
+    int (*start_of_stream)(struct xc_sr_context *ctx);
+
+    /**
+     * Send records which need to be at the start of a checkpoint.  This is
+     * called once, or once per checkpoint in a checkpointed stream, and is
+     * ahead of memory data.
+     */
+    int (*start_of_checkpoint)(struct xc_sr_context *ctx);
+
+    /**
+     * Send records which need to be at the end of the checkpoint.  This is
+     * called once, or once per checkpoint in a checkpointed stream, and is
+     * after the memory data.
+     */
+    int (*end_of_checkpoint)(struct xc_sr_context *ctx);
+
+    /**
+     * Check state of guest to decide whether it makes sense to continue
+     * migration.  This is called in each iteration or checkpoint to check
+     * whether all criteria for the migration are still met.  If that's not
+     * the case either migration is cancelled via a bad rc or the situation
+     * is handled, e.g. by sending appropriate records.
+     */
+    int (*check_vm_state)(struct xc_sr_context *ctx);
+
+    /**
+     * Clean up the local environment.  Will be called exactly once, either
+     * after a successful save, or upon encountering an error.
+     */
+    int (*cleanup)(struct xc_sr_context *ctx);
+};
+
+
+/**
+ * Restore operations.  To be implemented for each type of guest, for use by
+ * the common restore algorithm.
+ *
+ * Every function must be implemented, even if only with a no-op stub.
+ */
+struct xc_sr_restore_ops
+{
+    /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
+    xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
+
+    /* Check to see whether a PFN is valid. */
+    bool (*pfn_is_valid)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
+
+    /* Set the GFN of a PFN. */
+    void (*set_gfn)(struct xc_sr_context *ctx, xen_pfn_t pfn, xen_pfn_t gfn);
+
+    /* Set the type of a PFN. */
+    void (*set_page_type)(struct xc_sr_context *ctx, xen_pfn_t pfn,
+                          xen_pfn_t type);
+
+    /**
+     * Optionally transform the contents of a page from being generic in the
+     * stream, to being specific to the restoring environment.
+     *
+     * 'page' is expected to be modified in-place if a transformation is
+     * required.
+     *
+     * @returns 0 for success, -1 for failure, with errno appropriately set.
+     */
+    int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page);
+
+    /**
+     * Set up local environment to restore a domain.
+     *
+     * This is called once before any common setup has occurred, allowing for
+     * guest-specific adjustments to be made to common state.
+     */
+    int (*setup)(struct xc_sr_context *ctx);
+
+    /**
+     * Process an individual record from the stream.  The caller shall take
+     * care of processing common records (e.g. END, PAGE_DATA).
+     *
+     * @return 0 for success, -1 for failure, or the following sentinels:
+     *  - RECORD_NOT_PROCESSED
+     *  - BROKEN_CHANNEL: under Remus/COLO, this means master may be dead, and
+     *    a failover is needed.
+     */
+#define RECORD_NOT_PROCESSED 1
+#define BROKEN_CHANNEL 2
+    int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec);
+
+    /**
+     * Perform any actions required after the static data has arrived.  Called
+     * when the STATIC_DATA_COMPLETE record has been recieved/inferred.
+     * 'missing' should be filled in for any data item the higher level
+     * toolstack needs to provide compatiblity for.
+     */
+    int (*static_data_complete)(struct xc_sr_context *ctx,
+                                unsigned int *missing);
+
+    /**
+     * Perform any actions required after the stream has been finished. Called
+     * after the END record has been received.
+     */
+    int (*stream_complete)(struct xc_sr_context *ctx);
+
+    /**
+     * Clean up the local environment.  Will be called exactly once, either
+     * after a successful restore, or upon encountering an error.
+     */
+    int (*cleanup)(struct xc_sr_context *ctx);
+};
+
+/* Wrapper for blobs of data heading Xen-wards. */
+struct xc_sr_blob
+{
+    void *ptr;
+    size_t size;
+};
+
+/*
+ * Update a blob.  Duplicate src/size, freeing the old blob if necessary.  May
+ * fail due to memory allocation.
+ */
+static inline int update_blob(struct xc_sr_blob *blob,
+                              const void *src, size_t size)
+{
+    void *ptr;
+
+    if ( !src || !size )
+    {
+        errno = EINVAL;
+        return -1;
+    }
+
+    if ( (ptr = malloc(size)) == NULL )
+        return -1;
+
+    free(blob->ptr);
+    blob->ptr = memcpy(ptr, src, size);
+    blob->size = size;
+
+    return 0;
+}
+
+struct xc_sr_context
+{
+    xc_interface *xch;
+    uint32_t domid;
+    int fd;
+
+    /* Plain VM, or checkpoints over time. */
+    xc_stream_type_t stream_type;
+
+    xc_dominfo_t dominfo;
+
+    union /* Common save or restore data. */
+    {
+        struct /* Save data. */
+        {
+            int recv_fd;
+
+            struct xc_sr_save_ops ops;
+            struct save_callbacks *callbacks;
+
+            /* Live migrate vs non live suspend. */
+            bool live;
+
+            /* Further debugging information in the stream. */
+            bool debug;
+
+            unsigned long p2m_size;
+
+            struct precopy_stats stats;
+
+            xen_pfn_t *batch_pfns;
+            unsigned int nr_batch_pfns;
+            unsigned long *deferred_pages;
+            unsigned long nr_deferred_pages;
+            xc_hypercall_buffer_t dirty_bitmap_hbuf;
+        } save;
+
+        struct /* Restore data. */
+        {
+            struct xc_sr_restore_ops ops;
+            struct restore_callbacks *callbacks;
+
+            int send_back_fd;
+            unsigned long p2m_size;
+            xc_hypercall_buffer_t dirty_bitmap_hbuf;
+
+            /* From Image Header. */
+            uint32_t format_version;
+
+            /* From Domain Header. */
+            uint32_t guest_type;
+            uint32_t guest_page_size;
+
+            /* Currently buffering records between a checkpoint */
+            bool buffer_all_records;
+
+            /* Whether a STATIC_DATA_END record has been seen/inferred. */
+            bool seen_static_data_end;
+
+/*
+ * With Remus/COLO, we buffer the records sent by the primary at checkpoint,
+ * in case the primary will fail, we can recover from the last
+ * checkpoint state.
+ * This should be enough for most of the cases because primary only send
+ * dirty pages at checkpoint.
+ */
+#define DEFAULT_BUF_RECORDS 1024
+            struct xc_sr_record *buffered_records;
+            unsigned int allocated_rec_num;
+            unsigned int buffered_rec_num;
+
+            /*
+             * Xenstore and Console parameters.
+             * INPUT:  evtchn & domid
+             * OUTPUT: gfn
+             */
+            xen_pfn_t    xenstore_gfn,    console_gfn;
+            unsigned int xenstore_evtchn, console_evtchn;
+            uint32_t     xenstore_domid,  console_domid;
+
+            /* Bitmap of currently populated PFNs during restore. */
+            unsigned long *populated_pfns;
+            xen_pfn_t max_populated_pfn;
+
+            /* Sender has invoked verify mode on the stream. */
+            bool verify;
+        } restore;
+    };
+
+    union /* Guest-arch specific data. */
+    {
+        struct /* x86 */
+        {
+            /* Common save/restore data. */
+            union
+            {
+                struct
+                {
+                    /* X86_{CPUID,MSR}_DATA blobs for CPU Policy. */
+                    struct xc_sr_blob cpuid, msr;
+                } restore;
+            };
+
+            struct /* x86 PV guest. */
+            {
+                /* 4 or 8; 32 or 64 bit domain */
+                unsigned int width;
+                /* 3 or 4 pagetable levels */
+                unsigned int levels;
+
+                /* Maximum Xen frame */
+                xen_pfn_t max_mfn;
+                /* Read-only machine to phys map */
+                xen_pfn_t *m2p;
+                /* first mfn of the compat m2p (Only needed for 32bit PV guests) */
+                xen_pfn_t compat_m2p_mfn0;
+                /* Number of m2p frames mapped */
+                unsigned long nr_m2p_frames;
+
+                /* Maximum guest frame */
+                xen_pfn_t max_pfn;
+
+                /* Number of frames making up the p2m */
+                unsigned int p2m_frames;
+                /* Guest's phys to machine map.  Mapped read-only (save) or
+                 * allocated locally (restore).  Uses guest unsigned longs. */
+                void *p2m;
+                /* The guest pfns containing the p2m leaves */
+                xen_pfn_t *p2m_pfns;
+
+                /* Read-only mapping of guests shared info page */
+                shared_info_any_t *shinfo;
+
+                /* p2m generation count for verifying validity of local p2m. */
+                uint64_t p2m_generation;
+
+                union
+                {
+                    struct
+                    {
+                        /* State machine for the order of received records. */
+                        bool seen_pv_info;
+
+                        /* Types for each page (bounded by max_pfn). */
+                        uint32_t *pfn_types;
+
+                        /* x86 PV per-vcpu storage structure for blobs. */
+                        struct xc_sr_x86_pv_restore_vcpu
+                        {
+                            struct xc_sr_blob basic, extd, xsave, msr;
+                        } *vcpus;
+                        unsigned int nr_vcpus;
+                    } restore;
+                };
+            } pv;
+
+            struct /* x86 HVM guest. */
+            {
+                union
+                {
+                    struct
+                    {
+                        /* Whether qemu enabled logdirty mode, and we should
+                         * disable on cleanup. */
+                        bool qemu_enabled_logdirty;
+                    } save;
+
+                    struct
+                    {
+                        /* HVM context blob. */
+                        struct xc_sr_blob context;
+                    } restore;
+                };
+            } hvm;
+
+        } x86;
+    };
+};
+
+extern struct xc_sr_save_ops save_ops_x86_pv;
+extern struct xc_sr_save_ops save_ops_x86_hvm;
+
+extern struct xc_sr_restore_ops restore_ops_x86_pv;
+extern struct xc_sr_restore_ops restore_ops_x86_hvm;
+
+struct xc_sr_record
+{
+    uint32_t type;
+    uint32_t length;
+    void *data;
+};
+
+/*
+ * Writes a split record to the stream, applying correct padding where
+ * appropriate.  It is common when sending records containing blobs from Xen
+ * that the header and blob data are separate.  This function accepts a second
+ * buffer and length, and will merge it with the main record when sending.
+ *
+ * Records with a non-zero length must provide a valid data field; records
+ * with a 0 length shall have their data field ignored.
+ *
+ * Returns 0 on success and non0 on failure.
+ */
+int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec,
+                       void *buf, size_t sz);
+
+/*
+ * Writes a record to the stream, applying correct padding where appropriate.
+ * Records with a non-zero length must provide a valid data field; records
+ * with a 0 length shall have their data field ignored.
+ *
+ * Returns 0 on success and non0 on failure.
+ */
+static inline int write_record(struct xc_sr_context *ctx,
+                               struct xc_sr_record *rec)
+{
+    return write_split_record(ctx, rec, NULL, 0);
+}
+
+/*
+ * Reads a record from the stream, and fills in the record structure.
+ *
+ * Returns 0 on success and non-0 on failure.
+ *
+ * On success, the records type and size shall be valid.
+ * - If size is 0, data shall be NULL.
+ * - If size is non-0, data shall be a buffer allocated by malloc() which must
+ *   be passed to free() by the caller.
+ *
+ * On failure, the contents of the record structure are undefined.
+ */
+int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec);
+
+/*
+ * This would ideally be private in restore.c, but is needed by
+ * x86_pv_localise_page() if we receive pagetables frames ahead of the
+ * contents of the frames they point at.
+ */
+int populate_pfns(struct xc_sr_context *ctx, unsigned int count,
+                  const xen_pfn_t *original_pfns, const uint32_t *types);
+
+/* Handle a STATIC_DATA_END record. */
+int handle_static_data_end(struct xc_sr_context *ctx);
+
+#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_common_x86.c b/tools/libs/guest/xg_sr_common_x86.c
new file mode 100644 (file)
index 0000000..6f12483
--- /dev/null
@@ -0,0 +1,173 @@
+#include "xg_sr_common_x86.h"
+
+int write_x86_tsc_info(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rec_x86_tsc_info tsc = {};
+    struct xc_sr_record rec = {
+        .type = REC_TYPE_X86_TSC_INFO,
+        .length = sizeof(tsc),
+        .data = &tsc,
+    };
+
+    if ( xc_domain_get_tsc_info(xch, ctx->domid, &tsc.mode,
+                                &tsc.nsec, &tsc.khz, &tsc.incarnation) < 0 )
+    {
+        PERROR("Unable to obtain TSC information");
+        return -1;
+    }
+
+    return write_record(ctx, &rec);
+}
+
+int handle_x86_tsc_info(struct xc_sr_context *ctx, struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rec_x86_tsc_info *tsc = rec->data;
+
+    if ( rec->length != sizeof(*tsc) )
+    {
+        ERROR("X86_TSC_INFO record wrong size: length %u, expected %zu",
+              rec->length, sizeof(*tsc));
+        return -1;
+    }
+
+    if ( xc_domain_set_tsc_info(xch, ctx->domid, tsc->mode,
+                                tsc->nsec, tsc->khz, tsc->incarnation) )
+    {
+        PERROR("Unable to set TSC information");
+        return -1;
+    }
+
+    return 0;
+}
+
+int write_x86_cpu_policy_records(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_record cpuid = { .type = REC_TYPE_X86_CPUID_POLICY, };
+    struct xc_sr_record msrs  = { .type = REC_TYPE_X86_MSR_POLICY, };
+    uint32_t nr_leaves = 0, nr_msrs = 0;
+    int rc;
+
+    if ( xc_get_cpu_policy_size(xch, &nr_leaves, &nr_msrs) < 0 )
+    {
+        PERROR("Unable to get CPU Policy size");
+        return -1;
+    }
+
+    cpuid.data = malloc(nr_leaves * sizeof(xen_cpuid_leaf_t));
+    msrs.data  = malloc(nr_msrs   * sizeof(xen_msr_entry_t));
+    if ( !cpuid.data || !msrs.data )
+    {
+        ERROR("Cannot allocate memory for CPU Policy");
+        rc = -1;
+        goto out;
+    }
+
+    if ( xc_get_domain_cpu_policy(xch, ctx->domid, &nr_leaves, cpuid.data,
+                                  &nr_msrs, msrs.data) )
+    {
+        PERROR("Unable to get d%d CPU Policy", ctx->domid);
+        rc = -1;
+        goto out;
+    }
+
+    cpuid.length = nr_leaves * sizeof(xen_cpuid_leaf_t);
+    if ( cpuid.length )
+    {
+        rc = write_record(ctx, &cpuid);
+        if ( rc )
+            goto out;
+    }
+
+    msrs.length = nr_msrs * sizeof(xen_msr_entry_t);
+    if ( msrs.length )
+        rc = write_record(ctx, &msrs);
+
+ out:
+    free(cpuid.data);
+    free(msrs.data);
+
+    return rc;
+}
+
+int handle_x86_cpuid_policy(struct xc_sr_context *ctx, struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    int rc;
+
+    if ( rec->length == 0 ||
+         rec->length % sizeof(xen_cpuid_leaf_t) != 0 )
+    {
+        ERROR("X86_CPUID_POLICY size %u should be multiple of %zu",
+              rec->length, sizeof(xen_cpuid_leaf_t));
+        return -1;
+    }
+
+    rc = update_blob(&ctx->x86.restore.cpuid, rec->data, rec->length);
+    if ( rc )
+        ERROR("Unable to allocate %u bytes for X86_CPUID_POLICY", rec->length);
+
+    return rc;
+}
+
+int handle_x86_msr_policy(struct xc_sr_context *ctx, struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    int rc;
+
+    if ( rec->length == 0 ||
+         rec->length % sizeof(xen_msr_entry_t) != 0 )
+    {
+        ERROR("X86_MSR_POLICY size %u should be multiple of %zu",
+              rec->length, sizeof(xen_cpuid_leaf_t));
+        return -1;
+    }
+
+    rc = update_blob(&ctx->x86.restore.msr, rec->data, rec->length);
+    if ( rc )
+        ERROR("Unable to allocate %u bytes for X86_MSR_POLICY", rec->length);
+
+    return rc;
+}
+
+int x86_static_data_complete(struct xc_sr_context *ctx, unsigned int *missing)
+{
+    xc_interface *xch = ctx->xch;
+    uint32_t nr_leaves = 0, nr_msrs = 0;
+    uint32_t err_l = ~0, err_s = ~0, err_m = ~0;
+
+    if ( ctx->x86.restore.cpuid.ptr )
+        nr_leaves = ctx->x86.restore.cpuid.size / sizeof(xen_cpuid_leaf_t);
+    else
+        *missing |= XGR_SDD_MISSING_CPUID;
+
+    if ( ctx->x86.restore.msr.ptr )
+        nr_msrs = ctx->x86.restore.msr.size / sizeof(xen_msr_entry_t);
+    else
+        *missing |= XGR_SDD_MISSING_MSR;
+
+    if ( (nr_leaves || nr_msrs) &&
+         xc_set_domain_cpu_policy(xch, ctx->domid,
+                                  nr_leaves, ctx->x86.restore.cpuid.ptr,
+                                  nr_msrs,   ctx->x86.restore.msr.ptr,
+                                  &err_l, &err_s, &err_m) )
+    {
+        PERROR("Failed to set CPUID policy: leaf %08x, subleaf %08x, msr %08x",
+               err_l, err_s, err_m);
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_common_x86.h b/tools/libs/guest/xg_sr_common_x86.h
new file mode 100644 (file)
index 0000000..b55758c
--- /dev/null
@@ -0,0 +1,51 @@
+#ifndef __COMMON_X86__H
+#define __COMMON_X86__H
+
+#include "xg_sr_common.h"
+
+/*
+ * Obtains a domains TSC information from Xen and writes a X86_TSC_INFO record
+ * into the stream.
+ */
+int write_x86_tsc_info(struct xc_sr_context *ctx);
+
+/*
+ * Parses a X86_TSC_INFO record and applies the result to the domain.
+ */
+int handle_x86_tsc_info(struct xc_sr_context *ctx, struct xc_sr_record *rec);
+
+/*
+ * Obtains a domains CPU Policy from Xen, and writes X86_{CPUID,MSR}_POLICY
+ * records into the stream.
+ */
+int write_x86_cpu_policy_records(struct xc_sr_context *ctx);
+
+/*
+ * Parses an X86_CPUID_POLICY record and stashes the content for application
+ * when a STATIC_DATA_END record is encountered.
+ */
+int handle_x86_cpuid_policy(struct xc_sr_context *ctx,
+                            struct xc_sr_record *rec);
+
+/*
+ * Parses an X86_MSR_POLICY record and stashes the content for application
+ * when a STATIC_DATA_END record is encountered.
+ */
+int handle_x86_msr_policy(struct xc_sr_context *ctx,
+                          struct xc_sr_record *rec);
+
+/*
+ * Perform common x86 actions required after the static data has arrived.
+ */
+int x86_static_data_complete(struct xc_sr_context *ctx, unsigned int *missing);
+
+#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_common_x86_pv.c b/tools/libs/guest/xg_sr_common_x86_pv.c
new file mode 100644 (file)
index 0000000..cd33406
--- /dev/null
@@ -0,0 +1,193 @@
+#include <assert.h>
+
+#include "xg_sr_common_x86_pv.h"
+
+xen_pfn_t mfn_to_pfn(struct xc_sr_context *ctx, xen_pfn_t mfn)
+{
+    assert(mfn <= ctx->x86.pv.max_mfn);
+    return ctx->x86.pv.m2p[mfn];
+}
+
+bool mfn_in_pseudophysmap(struct xc_sr_context *ctx, xen_pfn_t mfn)
+{
+    return ((mfn <= ctx->x86.pv.max_mfn) &&
+            (mfn_to_pfn(ctx, mfn) <= ctx->x86.pv.max_pfn) &&
+            (xc_pfn_to_mfn(mfn_to_pfn(ctx, mfn), ctx->x86.pv.p2m,
+                           ctx->x86.pv.width) == mfn));
+}
+
+void dump_bad_pseudophysmap_entry(struct xc_sr_context *ctx, xen_pfn_t mfn)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t pfn = ~0UL;
+
+    ERROR("mfn %#lx, max %#lx", mfn, ctx->x86.pv.max_mfn);
+
+    if ( (mfn != ~0UL) && (mfn <= ctx->x86.pv.max_mfn) )
+    {
+        pfn = ctx->x86.pv.m2p[mfn];
+        ERROR("  m2p[%#lx] = %#lx, max_pfn %#lx",
+              mfn, pfn, ctx->x86.pv.max_pfn);
+    }
+
+    if ( (pfn != ~0UL) && (pfn <= ctx->x86.pv.max_pfn) )
+        ERROR("  p2m[%#lx] = %#lx",
+              pfn, xc_pfn_to_mfn(pfn, ctx->x86.pv.p2m, ctx->x86.pv.width));
+}
+
+xen_pfn_t cr3_to_mfn(struct xc_sr_context *ctx, uint64_t cr3)
+{
+    if ( ctx->x86.pv.width == 8 )
+        return cr3 >> 12;
+    else
+    {
+        /* 32bit guests can't represent mfns wider than 32 bits */
+        if ( cr3 & 0xffffffff00000000UL )
+            return ~0UL;
+        else
+            return (uint32_t)((cr3 >> 12) | (cr3 << 20));
+    }
+}
+
+uint64_t mfn_to_cr3(struct xc_sr_context *ctx, xen_pfn_t _mfn)
+{
+    uint64_t mfn = _mfn;
+
+    if ( ctx->x86.pv.width == 8 )
+        return mfn << 12;
+    else
+    {
+        /* 32bit guests can't represent mfns wider than 32 bits */
+        if ( mfn & 0xffffffff00000000UL )
+            return ~0UL;
+        else
+            return (uint32_t)((mfn << 12) | (mfn >> 20));
+    }
+}
+
+int x86_pv_domain_info(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned int guest_width, guest_levels;
+
+    /* Get the domain width */
+    if ( xc_domain_get_guest_width(xch, ctx->domid, &guest_width) )
+    {
+        PERROR("Unable to determine dom%d's width", ctx->domid);
+        return -1;
+    }
+
+    if ( guest_width == 4 )
+        guest_levels = 3;
+    else if ( guest_width == 8 )
+        guest_levels = 4;
+    else
+    {
+        ERROR("Invalid guest width %d.  Expected 32 or 64", guest_width * 8);
+        return -1;
+    }
+    ctx->x86.pv.width = guest_width;
+    ctx->x86.pv.levels = guest_levels;
+
+    DPRINTF("%d bits, %d levels", guest_width * 8, guest_levels);
+
+    return 0;
+}
+
+int x86_pv_map_m2p(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t m2p_chunks, m2p_size, max_page;
+    privcmd_mmap_entry_t *entries = NULL;
+    xen_pfn_t *extents_start = NULL;
+    int rc = -1, i;
+
+    if ( xc_maximum_ram_page(xch, &max_page) < 0 )
+    {
+        PERROR("Failed to get maximum ram page");
+        goto err;
+    }
+
+    ctx->x86.pv.max_mfn = max_page;
+    m2p_size   = M2P_SIZE(ctx->x86.pv.max_mfn);
+    m2p_chunks = M2P_CHUNKS(ctx->x86.pv.max_mfn);
+
+    extents_start = malloc(m2p_chunks * sizeof(xen_pfn_t));
+    if ( !extents_start )
+    {
+        ERROR("Unable to allocate %lu bytes for m2p mfns",
+              m2p_chunks * sizeof(xen_pfn_t));
+        goto err;
+    }
+
+    if ( xc_machphys_mfn_list(xch, m2p_chunks, extents_start) )
+    {
+        PERROR("Failed to get m2p mfn list");
+        goto err;
+    }
+
+    entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t));
+    if ( !entries )
+    {
+        ERROR("Unable to allocate %lu bytes for m2p mapping mfns",
+              m2p_chunks * sizeof(privcmd_mmap_entry_t));
+        goto err;
+    }
+
+    for ( i = 0; i < m2p_chunks; ++i )
+        entries[i].mfn = extents_start[i];
+
+    ctx->x86.pv.m2p = xc_map_foreign_ranges(
+        xch, DOMID_XEN, m2p_size, PROT_READ,
+        M2P_CHUNK_SIZE, entries, m2p_chunks);
+
+    if ( !ctx->x86.pv.m2p )
+    {
+        PERROR("Failed to mmap() m2p ranges");
+        goto err;
+    }
+
+    ctx->x86.pv.nr_m2p_frames = (M2P_CHUNK_SIZE >> PAGE_SHIFT) * m2p_chunks;
+
+#ifdef __i386__
+    /* 32 bit toolstacks automatically get the compat m2p */
+    ctx->x86.pv.compat_m2p_mfn0 = entries[0].mfn;
+#else
+    /* 64 bit toolstacks need to ask Xen specially for it */
+    {
+        struct xen_machphys_mfn_list xmml = {
+            .max_extents = 1,
+            .extent_start = { &ctx->x86.pv.compat_m2p_mfn0 },
+        };
+
+        rc = do_memory_op(xch, XENMEM_machphys_compat_mfn_list,
+                          &xmml, sizeof(xmml));
+        if ( rc || xmml.nr_extents != 1 )
+        {
+            PERROR("Failed to get compat mfn list from Xen");
+            rc = -1;
+            goto err;
+        }
+    }
+#endif
+
+    /* All Done */
+    rc = 0;
+    DPRINTF("max_mfn %#lx", ctx->x86.pv.max_mfn);
+
+ err:
+    free(entries);
+    free(extents_start);
+
+    return rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_common_x86_pv.h b/tools/libs/guest/xg_sr_common_x86_pv.h
new file mode 100644 (file)
index 0000000..953b5bf
--- /dev/null
@@ -0,0 +1,109 @@
+#ifndef __COMMON_X86_PV_H
+#define __COMMON_X86_PV_H
+
+#include "xg_sr_common_x86.h"
+
+/* Virtual address ranges reserved for hypervisor. */
+#define HYPERVISOR_VIRT_START_X86_64 0xFFFF800000000000ULL
+#define HYPERVISOR_VIRT_END_X86_64   0xFFFF87FFFFFFFFFFULL
+
+#define HYPERVISOR_VIRT_START_X86_32 0x00000000F5800000ULL
+#define HYPERVISOR_VIRT_END_X86_32   0x00000000FFFFFFFFULL
+
+/*
+ * Convert an mfn to a pfn, given Xen's m2p table.
+ *
+ * Caller must ensure that the requested mfn is in range.
+ */
+xen_pfn_t mfn_to_pfn(struct xc_sr_context *ctx, xen_pfn_t mfn);
+
+/*
+ * Query whether a particular mfn is valid in the physmap of a guest.
+ */
+bool mfn_in_pseudophysmap(struct xc_sr_context *ctx, xen_pfn_t mfn);
+
+/*
+ * Debug a particular mfn by walking the p2m and m2p.
+ */
+void dump_bad_pseudophysmap_entry(struct xc_sr_context *ctx, xen_pfn_t mfn);
+
+/*
+ * Convert a PV cr3 field to an mfn.
+ *
+ * Adjusts for Xen's extended-cr3 format to pack a 44bit physical address into
+ * a 32bit architectural cr3.
+ */
+xen_pfn_t cr3_to_mfn(struct xc_sr_context *ctx, uint64_t cr3);
+
+/*
+ * Convert an mfn to a PV cr3 field.
+ *
+ * Adjusts for Xen's extended-cr3 format to pack a 44bit physical address into
+ * a 32bit architectural cr3.
+ */
+uint64_t mfn_to_cr3(struct xc_sr_context *ctx, xen_pfn_t mfn);
+
+/* Bits 12 through 51 of a PTE point at the frame */
+#define PTE_FRAME_MASK 0x000ffffffffff000ULL
+
+/*
+ * Extract an mfn from a Pagetable Entry.  May return INVALID_MFN if the pte
+ * would overflow a 32bit xen_pfn_t.
+ */
+static inline xen_pfn_t pte_to_frame(uint64_t pte)
+{
+    uint64_t frame = (pte & PTE_FRAME_MASK) >> PAGE_SHIFT;
+
+#ifdef __i386__
+    if ( frame >= INVALID_MFN )
+        return INVALID_MFN;
+#endif
+
+    return frame;
+}
+
+/*
+ * Change the frame in a Pagetable Entry while leaving the flags alone.
+ */
+static inline uint64_t merge_pte(uint64_t pte, xen_pfn_t mfn)
+{
+    return (pte & ~PTE_FRAME_MASK) | ((uint64_t)mfn << PAGE_SHIFT);
+}
+
+/*
+ * Get current domain information.
+ *
+ * Fills ctx->x86.pv
+ * - .width
+ * - .levels
+ * - .fpp
+ * - .p2m_frames
+ *
+ * Used by the save side to create the X86_PV_INFO record, and by the restore
+ * side to verify the incoming stream.
+ *
+ * Returns 0 on success and non-zero on error.
+ */
+int x86_pv_domain_info(struct xc_sr_context *ctx);
+
+/*
+ * Maps the Xen M2P.
+ *
+ * Fills ctx->x86.pv.
+ * - .max_mfn
+ * - .m2p
+ *
+ * Returns 0 on success and non-zero on error.
+ */
+int x86_pv_map_m2p(struct xc_sr_context *ctx);
+
+#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_restore.c b/tools/libs/guest/xg_sr_restore.c
new file mode 100644 (file)
index 0000000..b57a787
--- /dev/null
@@ -0,0 +1,986 @@
+#include <arpa/inet.h>
+
+#include <assert.h>
+
+#include "xg_sr_common.h"
+
+/*
+ * Read and validate the Image and Domain headers.
+ */
+static int read_headers(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_ihdr ihdr;
+    struct xc_sr_dhdr dhdr;
+
+    if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
+    {
+        PERROR("Failed to read Image Header from stream");
+        return -1;
+    }
+
+    ihdr.id      = ntohl(ihdr.id);
+    ihdr.version = ntohl(ihdr.version);
+    ihdr.options = ntohs(ihdr.options);
+
+    if ( ihdr.marker != IHDR_MARKER )
+    {
+        ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker);
+        return -1;
+    }
+
+    if ( ihdr.id != IHDR_ID )
+    {
+        ERROR("Invalid ID: Expected 0x%08x, Got 0x%08x", IHDR_ID, ihdr.id);
+        return -1;
+    }
+
+    if ( ihdr.version < 2 || ihdr.version > 3 )
+    {
+        ERROR("Invalid Version: Expected 2 <= ver <= 3, Got %d",
+              ihdr.version);
+        return -1;
+    }
+
+    if ( ihdr.options & IHDR_OPT_BIG_ENDIAN )
+    {
+        ERROR("Unable to handle big endian streams");
+        return -1;
+    }
+
+    ctx->restore.format_version = ihdr.version;
+
+    if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
+    {
+        PERROR("Failed to read Domain Header from stream");
+        return -1;
+    }
+
+    ctx->restore.guest_type = dhdr.type;
+    ctx->restore.guest_page_size = (1U << dhdr.page_shift);
+
+    if ( dhdr.xen_major == 0 )
+    {
+        IPRINTF("Found %s domain, converted from legacy stream format",
+                dhdr_type_to_str(dhdr.type));
+        DPRINTF("  Legacy conversion script version %u", dhdr.xen_minor);
+    }
+    else
+        IPRINTF("Found %s domain from Xen %u.%u",
+                dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor);
+    return 0;
+}
+
+/*
+ * Is a pfn populated?
+ */
+static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    if ( pfn > ctx->restore.max_populated_pfn )
+        return false;
+    return test_bit(pfn, ctx->restore.populated_pfns);
+}
+
+/*
+ * Set a pfn as populated, expanding the tracking structures if needed. To
+ * avoid realloc()ing too excessively, the size increased to the nearest power
+ * of two large enough to contain the required pfn.
+ */
+static int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    xc_interface *xch = ctx->xch;
+
+    if ( pfn > ctx->restore.max_populated_pfn )
+    {
+        xen_pfn_t new_max;
+        size_t old_sz, new_sz;
+        unsigned long *p;
+
+        /* Round up to the nearest power of two larger than pfn, less 1. */
+        new_max = pfn;
+        new_max |= new_max >> 1;
+        new_max |= new_max >> 2;
+        new_max |= new_max >> 4;
+        new_max |= new_max >> 8;
+        new_max |= new_max >> 16;
+#ifdef __x86_64__
+        new_max |= new_max >> 32;
+#endif
+
+        old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
+        new_sz = bitmap_size(new_max + 1);
+        p = realloc(ctx->restore.populated_pfns, new_sz);
+        if ( !p )
+        {
+            ERROR("Failed to realloc populated bitmap");
+            errno = ENOMEM;
+            return -1;
+        }
+
+        memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
+
+        ctx->restore.populated_pfns    = p;
+        ctx->restore.max_populated_pfn = new_max;
+    }
+
+    assert(!test_bit(pfn, ctx->restore.populated_pfns));
+    set_bit(pfn, ctx->restore.populated_pfns);
+
+    return 0;
+}
+
+/*
+ * Given a set of pfns, obtain memory from Xen to fill the physmap for the
+ * unpopulated subset.  If types is NULL, no page type checking is performed
+ * and all unpopulated pfns are populated.
+ */
+int populate_pfns(struct xc_sr_context *ctx, unsigned int count,
+                  const xen_pfn_t *original_pfns, const uint32_t *types)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
+        *pfns = malloc(count * sizeof(*pfns));
+    unsigned int i, nr_pfns = 0;
+    int rc = -1;
+
+    if ( !mfns || !pfns )
+    {
+        ERROR("Failed to allocate %zu bytes for populating the physmap",
+              2 * count * sizeof(*mfns));
+        goto err;
+    }
+
+    for ( i = 0; i < count; ++i )
+    {
+        if ( (!types || (types &&
+                         (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
+                          types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
+             !pfn_is_populated(ctx, original_pfns[i]) )
+        {
+            rc = pfn_set_populated(ctx, original_pfns[i]);
+            if ( rc )
+                goto err;
+            pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
+            ++nr_pfns;
+        }
+    }
+
+    if ( nr_pfns )
+    {
+        rc = xc_domain_populate_physmap_exact(
+            xch, ctx->domid, nr_pfns, 0, 0, mfns);
+        if ( rc )
+        {
+            PERROR("Failed to populate physmap");
+            goto err;
+        }
+
+        for ( i = 0; i < nr_pfns; ++i )
+        {
+            if ( mfns[i] == INVALID_MFN )
+            {
+                ERROR("Populate physmap failed for pfn %u", i);
+                rc = -1;
+                goto err;
+            }
+
+            ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
+        }
+    }
+
+    rc = 0;
+
+ err:
+    free(pfns);
+    free(mfns);
+
+    return rc;
+}
+
+/*
+ * Given a list of pfns, their types, and a block of page data from the
+ * stream, populate and record their types, map the relevant subset and copy
+ * the data into the guest.
+ */
+static int process_page_data(struct xc_sr_context *ctx, unsigned int count,
+                             xen_pfn_t *pfns, uint32_t *types, void *page_data)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t *mfns = malloc(count * sizeof(*mfns));
+    int *map_errs = malloc(count * sizeof(*map_errs));
+    int rc;
+    void *mapping = NULL, *guest_page = NULL;
+    unsigned int i, /* i indexes the pfns from the record. */
+        j,          /* j indexes the subset of pfns we decide to map. */
+        nr_pages = 0;
+
+    if ( !mfns || !map_errs )
+    {
+        rc = -1;
+        ERROR("Failed to allocate %zu bytes to process page data",
+              count * (sizeof(*mfns) + sizeof(*map_errs)));
+        goto err;
+    }
+
+    rc = populate_pfns(ctx, count, pfns, types);
+    if ( rc )
+    {
+        ERROR("Failed to populate pfns for batch of %u pages", count);
+        goto err;
+    }
+
+    for ( i = 0; i < count; ++i )
+    {
+        ctx->restore.ops.set_page_type(ctx, pfns[i], types[i]);
+
+        switch ( types[i] )
+        {
+        case XEN_DOMCTL_PFINFO_NOTAB:
+
+        case XEN_DOMCTL_PFINFO_L1TAB:
+        case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+        case XEN_DOMCTL_PFINFO_L2TAB:
+        case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+        case XEN_DOMCTL_PFINFO_L3TAB:
+        case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+        case XEN_DOMCTL_PFINFO_L4TAB:
+        case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB:
+
+            mfns[nr_pages++] = ctx->restore.ops.pfn_to_gfn(ctx, pfns[i]);
+            break;
+        }
+    }
+
+    /* Nothing to do? */
+    if ( nr_pages == 0 )
+        goto done;
+
+    mapping = guest_page = xenforeignmemory_map(
+        xch->fmem, ctx->domid, PROT_READ | PROT_WRITE,
+        nr_pages, mfns, map_errs);
+    if ( !mapping )
+    {
+        rc = -1;
+        PERROR("Unable to map %u mfns for %u pages of data",
+               nr_pages, count);
+        goto err;
+    }
+
+    for ( i = 0, j = 0; i < count; ++i )
+    {
+        switch ( types[i] )
+        {
+        case XEN_DOMCTL_PFINFO_XTAB:
+        case XEN_DOMCTL_PFINFO_BROKEN:
+        case XEN_DOMCTL_PFINFO_XALLOC:
+            /* No page data to deal with. */
+            continue;
+        }
+
+        if ( map_errs[j] )
+        {
+            rc = -1;
+            ERROR("Mapping pfn %#"PRIpfn" (mfn %#"PRIpfn", type %#"PRIx32") failed with %d",
+                  pfns[i], mfns[j], types[i], map_errs[j]);
+            goto err;
+        }
+
+        /* Undo page normalisation done by the saver. */
+        rc = ctx->restore.ops.localise_page(ctx, types[i], page_data);
+        if ( rc )
+        {
+            ERROR("Failed to localise pfn %#"PRIpfn" (type %#"PRIx32")",
+                  pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+            goto err;
+        }
+
+        if ( ctx->restore.verify )
+        {
+            /* Verify mode - compare incoming data to what we already have. */
+            if ( memcmp(guest_page, page_data, PAGE_SIZE) )
+                ERROR("verify pfn %#"PRIpfn" failed (type %#"PRIx32")",
+                      pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
+        }
+        else
+        {
+            /* Regular mode - copy incoming data into place. */
+            memcpy(guest_page, page_data, PAGE_SIZE);
+        }
+
+        ++j;
+        guest_page += PAGE_SIZE;
+        page_data += PAGE_SIZE;
+    }
+
+ done:
+    rc = 0;
+
+ err:
+    if ( mapping )
+        xenforeignmemory_unmap(xch->fmem, mapping, nr_pages);
+
+    free(map_errs);
+    free(mfns);
+
+    return rc;
+}
+
+/*
+ * Validate a PAGE_DATA record from the stream, and pass the results to
+ * process_page_data() to actually perform the legwork.
+ */
+static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rec_page_data_header *pages = rec->data;
+    unsigned int i, pages_of_data = 0;
+    int rc = -1;
+
+    xen_pfn_t *pfns = NULL, pfn;
+    uint32_t *types = NULL, type;
+
+    /*
+     * v2 compatibility only exists for x86 streams.  This is a bit of a
+     * bodge, but it is less bad than duplicating handle_page_data() between
+     * different architectures.
+     */
+#if defined(__i386__) || defined(__x86_64__)
+    /* v2 compat.  Infer the position of STATIC_DATA_END. */
+    if ( ctx->restore.format_version < 3 && !ctx->restore.seen_static_data_end )
+    {
+        rc = handle_static_data_end(ctx);
+        if ( rc )
+        {
+            ERROR("Inferred STATIC_DATA_END record failed");
+            goto err;
+        }
+        rc = -1;
+    }
+
+    if ( !ctx->restore.seen_static_data_end )
+    {
+        ERROR("No STATIC_DATA_END seen");
+        goto err;
+    }
+#endif
+
+    if ( rec->length < sizeof(*pages) )
+    {
+        ERROR("PAGE_DATA record truncated: length %u, min %zu",
+              rec->length, sizeof(*pages));
+        goto err;
+    }
+
+    if ( pages->count < 1 )
+    {
+        ERROR("Expected at least 1 pfn in PAGE_DATA record");
+        goto err;
+    }
+
+    if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t)) )
+    {
+        ERROR("PAGE_DATA record (length %u) too short to contain %u"
+              " pfns worth of information", rec->length, pages->count);
+        goto err;
+    }
+
+    pfns = malloc(pages->count * sizeof(*pfns));
+    types = malloc(pages->count * sizeof(*types));
+    if ( !pfns || !types )
+    {
+        ERROR("Unable to allocate enough memory for %u pfns",
+              pages->count);
+        goto err;
+    }
+
+    for ( i = 0; i < pages->count; ++i )
+    {
+        pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
+        if ( !ctx->restore.ops.pfn_is_valid(ctx, pfn) )
+        {
+            ERROR("pfn %#"PRIpfn" (index %u) outside domain maximum", pfn, i);
+            goto err;
+        }
+
+        type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
+        if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) &&
+             ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) )
+        {
+            ERROR("Invalid type %#"PRIx32" for pfn %#"PRIpfn" (index %u)",
+                  type, pfn, i);
+            goto err;
+        }
+
+        if ( type < XEN_DOMCTL_PFINFO_BROKEN )
+            /* NOTAB and all L1 through L4 tables (including pinned) should
+             * have a page worth of data in the record. */
+            pages_of_data++;
+
+        pfns[i] = pfn;
+        types[i] = type;
+    }
+
+    if ( rec->length != (sizeof(*pages) +
+                         (sizeof(uint64_t) * pages->count) +
+                         (PAGE_SIZE * pages_of_data)) )
+    {
+        ERROR("PAGE_DATA record wrong size: length %u, expected "
+              "%zu + %zu + %lu", rec->length, sizeof(*pages),
+              (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
+        goto err;
+    }
+
+    rc = process_page_data(ctx, pages->count, pfns, types,
+                           &pages->pfn[pages->count]);
+ err:
+    free(types);
+    free(pfns);
+
+    return rc;
+}
+
+/*
+ * Send checkpoint dirty pfn list to primary.
+ */
+static int send_checkpoint_dirty_pfn_list(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc = -1;
+    unsigned int count, written;
+    uint64_t i, *pfns = NULL;
+    struct iovec *iov = NULL;
+    xc_shadow_op_stats_t stats = { 0, ctx->restore.p2m_size };
+    struct xc_sr_record rec = {
+        .type = REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST,
+    };
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->restore.dirty_bitmap_hbuf);
+
+    if ( xc_shadow_control(
+             xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
+             HYPERCALL_BUFFER(dirty_bitmap), ctx->restore.p2m_size,
+             NULL, 0, &stats) != ctx->restore.p2m_size )
+    {
+        PERROR("Failed to retrieve logdirty bitmap");
+        goto err;
+    }
+
+    for ( i = 0, count = 0; i < ctx->restore.p2m_size; i++ )
+    {
+        if ( test_bit(i, dirty_bitmap) )
+            count++;
+    }
+
+
+    pfns = malloc(count * sizeof(*pfns));
+    if ( !pfns )
+    {
+        ERROR("Unable to allocate %zu bytes of memory for dirty pfn list",
+              count * sizeof(*pfns));
+        goto err;
+    }
+
+    for ( i = 0, written = 0; i < ctx->restore.p2m_size; ++i )
+    {
+        if ( !test_bit(i, dirty_bitmap) )
+            continue;
+
+        if ( written > count )
+        {
+            ERROR("Dirty pfn list exceed");
+            goto err;
+        }
+
+        pfns[written++] = i;
+    }
+
+    /* iovec[] for writev(). */
+    iov = malloc(3 * sizeof(*iov));
+    if ( !iov )
+    {
+        ERROR("Unable to allocate memory for sending dirty bitmap");
+        goto err;
+    }
+
+    rec.length = count * sizeof(*pfns);
+
+    iov[0].iov_base = &rec.type;
+    iov[0].iov_len = sizeof(rec.type);
+
+    iov[1].iov_base = &rec.length;
+    iov[1].iov_len = sizeof(rec.length);
+
+    iov[2].iov_base = pfns;
+    iov[2].iov_len = count * sizeof(*pfns);
+
+    if ( writev_exact(ctx->restore.send_back_fd, iov, 3) )
+    {
+        PERROR("Failed to write dirty bitmap to stream");
+        goto err;
+    }
+
+    rc = 0;
+ err:
+    free(pfns);
+    free(iov);
+    return rc;
+}
+
+static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
+static int handle_checkpoint(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc = 0, ret;
+    unsigned int i;
+
+    if ( ctx->stream_type == XC_STREAM_PLAIN )
+    {
+        ERROR("Found checkpoint in non-checkpointed stream");
+        rc = -1;
+        goto err;
+    }
+
+    ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data);
+    switch ( ret )
+    {
+    case XGR_CHECKPOINT_SUCCESS:
+        break;
+
+    case XGR_CHECKPOINT_FAILOVER:
+        if ( ctx->restore.buffer_all_records )
+            rc = BROKEN_CHANNEL;
+        else
+            /* We don't have a consistent state */
+            rc = -1;
+        goto err;
+
+    default: /* Other fatal error */
+        rc = -1;
+        goto err;
+    }
+
+    if ( ctx->restore.buffer_all_records )
+    {
+        IPRINTF("All records buffered");
+
+        for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
+        {
+            rc = process_record(ctx, &ctx->restore.buffered_records[i]);
+            if ( rc )
+                goto err;
+        }
+        ctx->restore.buffered_rec_num = 0;
+        IPRINTF("All records processed");
+    }
+    else
+        ctx->restore.buffer_all_records = true;
+
+    if ( ctx->stream_type == XC_STREAM_COLO )
+    {
+#define HANDLE_CALLBACK_RETURN_VALUE(ret)                   \
+    do {                                                    \
+        if ( ret == 1 )                                     \
+            rc = 0; /* Success */                           \
+        else                                                \
+        {                                                   \
+            if ( ret == 2 )                                 \
+                rc = BROKEN_CHANNEL;                        \
+            else                                            \
+                rc = -1; /* Some unspecified error */       \
+            goto err;                                       \
+        }                                                   \
+    } while (0)
+
+        /* COLO */
+
+        /* We need to resume guest */
+        rc = ctx->restore.ops.stream_complete(ctx);
+        if ( rc )
+            goto err;
+
+        ctx->restore.callbacks->restore_results(ctx->restore.xenstore_gfn,
+                                                ctx->restore.console_gfn,
+                                                ctx->restore.callbacks->data);
+
+        /* Resume secondary vm */
+        ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(ret);
+
+        /* Wait for a new checkpoint */
+        ret = ctx->restore.callbacks->wait_checkpoint(
+            ctx->restore.callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(ret);
+
+        /* suspend secondary vm */
+        ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data);
+        HANDLE_CALLBACK_RETURN_VALUE(ret);
+
+#undef HANDLE_CALLBACK_RETURN_VALUE
+
+        rc = send_checkpoint_dirty_pfn_list(ctx);
+        if ( rc )
+            goto err;
+    }
+
+ err:
+    return rc;
+}
+
+static int buffer_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned int new_alloc_num;
+    struct xc_sr_record *p;
+
+    if ( ctx->restore.buffered_rec_num >= ctx->restore.allocated_rec_num )
+    {
+        new_alloc_num = ctx->restore.allocated_rec_num + DEFAULT_BUF_RECORDS;
+        p = realloc(ctx->restore.buffered_records,
+                    new_alloc_num * sizeof(struct xc_sr_record));
+        if ( !p )
+        {
+            ERROR("Failed to realloc memory for buffered records");
+            return -1;
+        }
+
+        ctx->restore.buffered_records = p;
+        ctx->restore.allocated_rec_num = new_alloc_num;
+    }
+
+    memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
+           rec, sizeof(*rec));
+
+    return 0;
+}
+
+int handle_static_data_end(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned int missing = 0;
+    int rc = 0;
+
+    if ( ctx->restore.seen_static_data_end )
+    {
+        ERROR("Multiple STATIC_DATA_END records found");
+        return -1;
+    }
+
+    ctx->restore.seen_static_data_end = true;
+
+    rc = ctx->restore.ops.static_data_complete(ctx, &missing);
+    if ( rc )
+        return rc;
+
+    if ( ctx->restore.callbacks->static_data_done &&
+         (rc = ctx->restore.callbacks->static_data_done(
+             missing, ctx->restore.callbacks->data) != 0) )
+        ERROR("static_data_done() callback failed: %d\n", rc);
+
+    return rc;
+}
+
+static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    int rc = 0;
+
+    switch ( rec->type )
+    {
+    case REC_TYPE_END:
+        break;
+
+    case REC_TYPE_PAGE_DATA:
+        rc = handle_page_data(ctx, rec);
+        break;
+
+    case REC_TYPE_VERIFY:
+        DPRINTF("Verify mode enabled");
+        ctx->restore.verify = true;
+        break;
+
+    case REC_TYPE_CHECKPOINT:
+        rc = handle_checkpoint(ctx);
+        break;
+
+    case REC_TYPE_STATIC_DATA_END:
+        rc = handle_static_data_end(ctx);
+        break;
+
+    default:
+        rc = ctx->restore.ops.process_record(ctx, rec);
+        break;
+    }
+
+    free(rec->data);
+    rec->data = NULL;
+
+    return rc;
+}
+
+static int setup(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->restore.dirty_bitmap_hbuf);
+
+    if ( ctx->stream_type == XC_STREAM_COLO )
+    {
+        dirty_bitmap = xc_hypercall_buffer_alloc_pages(
+            xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
+
+        if ( !dirty_bitmap )
+        {
+            ERROR("Unable to allocate memory for dirty bitmap");
+            rc = -1;
+            goto err;
+        }
+    }
+
+    rc = ctx->restore.ops.setup(ctx);
+    if ( rc )
+        goto err;
+
+    ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1;
+    ctx->restore.populated_pfns = bitmap_alloc(
+        ctx->restore.max_populated_pfn + 1);
+    if ( !ctx->restore.populated_pfns )
+    {
+        ERROR("Unable to allocate memory for populated_pfns bitmap");
+        rc = -1;
+        goto err;
+    }
+
+    ctx->restore.buffered_records = malloc(
+        DEFAULT_BUF_RECORDS * sizeof(struct xc_sr_record));
+    if ( !ctx->restore.buffered_records )
+    {
+        ERROR("Unable to allocate memory for buffered records");
+        rc = -1;
+        goto err;
+    }
+    ctx->restore.allocated_rec_num = DEFAULT_BUF_RECORDS;
+
+ err:
+    return rc;
+}
+
+static void cleanup(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned int i;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->restore.dirty_bitmap_hbuf);
+
+    for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
+        free(ctx->restore.buffered_records[i].data);
+
+    if ( ctx->stream_type == XC_STREAM_COLO )
+        xc_hypercall_buffer_free_pages(
+            xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
+
+    free(ctx->restore.buffered_records);
+    free(ctx->restore.populated_pfns);
+
+    if ( ctx->restore.ops.cleanup(ctx) )
+        PERROR("Failed to clean up");
+}
+
+/*
+ * Restore a domain.
+ */
+static int restore(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_record rec;
+    int rc, saved_rc = 0, saved_errno = 0;
+
+    IPRINTF("Restoring domain");
+
+    rc = setup(ctx);
+    if ( rc )
+        goto err;
+
+    do
+    {
+        rc = read_record(ctx, ctx->fd, &rec);
+        if ( rc )
+        {
+            if ( ctx->restore.buffer_all_records )
+                goto remus_failover;
+            else
+                goto err;
+        }
+
+        if ( ctx->restore.buffer_all_records &&
+             rec.type != REC_TYPE_END &&
+             rec.type != REC_TYPE_CHECKPOINT )
+        {
+            rc = buffer_record(ctx, &rec);
+            if ( rc )
+                goto err;
+        }
+        else
+        {
+            rc = process_record(ctx, &rec);
+            if ( rc == RECORD_NOT_PROCESSED )
+            {
+                if ( rec.type & REC_TYPE_OPTIONAL )
+                    DPRINTF("Ignoring optional record %#x (%s)",
+                            rec.type, rec_type_to_str(rec.type));
+                else
+                {
+                    ERROR("Mandatory record %#x (%s) not handled",
+                          rec.type, rec_type_to_str(rec.type));
+                    rc = -1;
+                    goto err;
+                }
+            }
+            else if ( rc == BROKEN_CHANNEL )
+                goto remus_failover;
+            else if ( rc )
+                goto err;
+        }
+
+    } while ( rec.type != REC_TYPE_END );
+
+ remus_failover:
+    if ( ctx->stream_type == XC_STREAM_COLO )
+    {
+        /* With COLO, we have already called stream_complete */
+        rc = 0;
+        IPRINTF("COLO Failover");
+        goto done;
+    }
+
+    /*
+     * With Remus, if we reach here, there must be some error on primary,
+     * failover from the last checkpoint state.
+     */
+    rc = ctx->restore.ops.stream_complete(ctx);
+    if ( rc )
+        goto err;
+
+    IPRINTF("Restore successful");
+    goto done;
+
+ err:
+    saved_errno = errno;
+    saved_rc = rc;
+    PERROR("Restore failed");
+
+ done:
+    cleanup(ctx);
+
+    if ( saved_rc )
+    {
+        rc = saved_rc;
+        errno = saved_errno;
+    }
+
+    return rc;
+}
+
+int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
+                      unsigned int store_evtchn, unsigned long *store_mfn,
+                      uint32_t store_domid, unsigned int console_evtchn,
+                      unsigned long *console_gfn, uint32_t console_domid,
+                      xc_stream_type_t stream_type,
+                      struct restore_callbacks *callbacks, int send_back_fd)
+{
+    xen_pfn_t nr_pfns;
+    struct xc_sr_context ctx = {
+        .xch = xch,
+        .fd = io_fd,
+        .stream_type = stream_type,
+    };
+
+    /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
+    ctx.restore.console_evtchn = console_evtchn;
+    ctx.restore.console_domid = console_domid;
+    ctx.restore.xenstore_evtchn = store_evtchn;
+    ctx.restore.xenstore_domid = store_domid;
+    ctx.restore.callbacks = callbacks;
+    ctx.restore.send_back_fd = send_back_fd;
+
+    /* Sanity check stream_type-related parameters */
+    switch ( stream_type )
+    {
+    case XC_STREAM_COLO:
+        assert(callbacks->suspend &&
+               callbacks->postcopy &&
+               callbacks->wait_checkpoint &&
+               callbacks->restore_results);
+        /* Fallthrough */
+    case XC_STREAM_REMUS:
+        assert(callbacks->checkpoint);
+        /* Fallthrough */
+    case XC_STREAM_PLAIN:
+        break;
+
+    default:
+        assert(!"Bad stream_type");
+        break;
+    }
+
+    if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
+    {
+        PERROR("Failed to get domain info");
+        return -1;
+    }
+
+    if ( ctx.dominfo.domid != dom )
+    {
+        ERROR("Domain %u does not exist", dom);
+        return -1;
+    }
+
+    DPRINTF("fd %d, dom %u, hvm %u, stream_type %d",
+            io_fd, dom, ctx.dominfo.hvm, stream_type);
+
+    ctx.domid = dom;
+
+    if ( read_headers(&ctx) )
+        return -1;
+
+    if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 )
+    {
+        PERROR("Unable to obtain the guest p2m size");
+        return -1;
+    }
+
+    ctx.restore.p2m_size = nr_pfns;
+    ctx.restore.ops = ctx.dominfo.hvm
+        ? restore_ops_x86_hvm : restore_ops_x86_pv;
+
+    if ( restore(&ctx) )
+        return -1;
+
+    IPRINTF("XenStore: mfn %#"PRIpfn", dom %d, evt %u",
+            ctx.restore.xenstore_gfn,
+            ctx.restore.xenstore_domid,
+            ctx.restore.xenstore_evtchn);
+
+    IPRINTF("Console: mfn %#"PRIpfn", dom %d, evt %u",
+            ctx.restore.console_gfn,
+            ctx.restore.console_domid,
+            ctx.restore.console_evtchn);
+
+    *console_gfn = ctx.restore.console_gfn;
+    *store_mfn = ctx.restore.xenstore_gfn;
+
+    return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_restore_x86_hvm.c b/tools/libs/guest/xg_sr_restore_x86_hvm.c
new file mode 100644 (file)
index 0000000..d6ea6f3
--- /dev/null
@@ -0,0 +1,274 @@
+#include <assert.h>
+#include <arpa/inet.h>
+
+#include "xg_sr_common_x86.h"
+
+/*
+ * Process an HVM_CONTEXT record from the stream.
+ */
+static int handle_hvm_context(struct xc_sr_context *ctx,
+                              struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    int rc = update_blob(&ctx->x86.hvm.restore.context, rec->data, rec->length);
+
+    if ( rc )
+        ERROR("Unable to allocate %u bytes for hvm context", rec->length);
+
+    return rc;
+}
+
+/*
+ * Process an HVM_PARAMS record from the stream.
+ */
+static int handle_hvm_params(struct xc_sr_context *ctx,
+                             struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rec_hvm_params *hdr = rec->data;
+    struct xc_sr_rec_hvm_params_entry *entry = hdr->param;
+    unsigned int i;
+    int rc;
+
+    if ( rec->length < sizeof(*hdr) )
+    {
+        ERROR("HVM_PARAMS record truncated: length %u, header size %zu",
+              rec->length, sizeof(*hdr));
+        return -1;
+    }
+
+    if ( rec->length != (sizeof(*hdr) + hdr->count * sizeof(*entry)) )
+    {
+        ERROR("HVM_PARAMS record truncated: header %zu, count %u, "
+              "expected len %zu, got %u",
+              sizeof(*hdr), hdr->count, hdr->count * sizeof(*entry),
+              rec->length);
+        return -1;
+    }
+
+    /*
+     * Tolerate empty records.  Older sending sides used to accidentally
+     * generate them.
+     */
+    if ( hdr->count == 0 )
+    {
+        DBGPRINTF("Skipping empty HVM_PARAMS record\n");
+        return 0;
+    }
+
+    for ( i = 0; i < hdr->count; i++, entry++ )
+    {
+        switch ( entry->index )
+        {
+        case HVM_PARAM_CONSOLE_PFN:
+            ctx->restore.console_gfn = entry->value;
+            xc_clear_domain_page(xch, ctx->domid, entry->value);
+            break;
+        case HVM_PARAM_STORE_PFN:
+            ctx->restore.xenstore_gfn = entry->value;
+            xc_clear_domain_page(xch, ctx->domid, entry->value);
+            break;
+        case HVM_PARAM_IOREQ_PFN:
+        case HVM_PARAM_BUFIOREQ_PFN:
+            xc_clear_domain_page(xch, ctx->domid, entry->value);
+            break;
+
+        case HVM_PARAM_PAE_ENABLED:
+            /*
+             * This HVM_PARAM only ever existed to pass data into
+             * xc_cpuid_apply_policy().  The function has now been updated to
+             * use a normal calling convention, making the param obsolete.
+             *
+             * Discard if we find it in an old migration stream.
+             */
+            continue;
+        }
+
+        rc = xc_hvm_param_set(xch, ctx->domid, entry->index, entry->value);
+        if ( rc < 0 )
+        {
+            PERROR("set HVM param %"PRId64" = 0x%016"PRIx64,
+                   entry->index, entry->value);
+            return rc;
+        }
+    }
+    return 0;
+}
+
+/* restore_ops function. */
+static bool x86_hvm_pfn_is_valid(const struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    return true;
+}
+
+/* restore_ops function. */
+static xen_pfn_t x86_hvm_pfn_to_gfn(const struct xc_sr_context *ctx,
+                                    xen_pfn_t pfn)
+{
+    return pfn;
+}
+
+/* restore_ops function. */
+static void x86_hvm_set_gfn(struct xc_sr_context *ctx, xen_pfn_t pfn,
+                            xen_pfn_t gfn)
+{
+    /* no op */
+}
+
+/* restore_ops function. */
+static void x86_hvm_set_page_type(struct xc_sr_context *ctx,
+                                  xen_pfn_t pfn, xen_pfn_t type)
+{
+    /* no-op */
+}
+
+/* restore_ops function. */
+static int x86_hvm_localise_page(struct xc_sr_context *ctx,
+                                 uint32_t type, void *page)
+{
+    /* no-op */
+    return 0;
+}
+
+/*
+ * restore_ops function. Confirms the stream matches the domain.
+ */
+static int x86_hvm_setup(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+
+    if ( ctx->restore.guest_type != DHDR_TYPE_X86_HVM )
+    {
+        ERROR("Unable to restore %s domain into an x86 HVM domain",
+              dhdr_type_to_str(ctx->restore.guest_type));
+        return -1;
+    }
+
+    if ( ctx->restore.guest_page_size != PAGE_SIZE )
+    {
+        ERROR("Invalid page size %u for x86 HVM domains",
+              ctx->restore.guest_page_size);
+        return -1;
+    }
+
+#ifdef __i386__
+    /* Very large domains (> 1TB) will exhaust virtual address space. */
+    if ( ctx->restore.p2m_size > 0x0fffffff )
+    {
+        errno = E2BIG;
+        PERROR("Cannot restore this big a guest");
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+/*
+ * restore_ops function.
+ */
+static int x86_hvm_process_record(struct xc_sr_context *ctx,
+                                  struct xc_sr_record *rec)
+{
+    switch ( rec->type )
+    {
+    case REC_TYPE_X86_TSC_INFO:
+        return handle_x86_tsc_info(ctx, rec);
+
+    case REC_TYPE_HVM_CONTEXT:
+        return handle_hvm_context(ctx, rec);
+
+    case REC_TYPE_HVM_PARAMS:
+        return handle_hvm_params(ctx, rec);
+
+    case REC_TYPE_X86_CPUID_POLICY:
+        return handle_x86_cpuid_policy(ctx, rec);
+
+    case REC_TYPE_X86_MSR_POLICY:
+        return handle_x86_msr_policy(ctx, rec);
+
+    default:
+        return RECORD_NOT_PROCESSED;
+    }
+}
+
+/*
+ * restore_ops function.  Sets extra hvm parameters and seeds the grant table.
+ */
+static int x86_hvm_stream_complete(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc;
+
+    rc = xc_hvm_param_set(xch, ctx->domid, HVM_PARAM_STORE_EVTCHN,
+                          ctx->restore.xenstore_evtchn);
+    if ( rc )
+    {
+        PERROR("Failed to set HVM_PARAM_STORE_EVTCHN");
+        return rc;
+    }
+
+    rc = xc_hvm_param_set(xch, ctx->domid, HVM_PARAM_CONSOLE_EVTCHN,
+                          ctx->restore.console_evtchn);
+    if ( rc )
+    {
+        PERROR("Failed to set HVM_PARAM_CONSOLE_EVTCHN");
+        return rc;
+    }
+
+    rc = xc_domain_hvm_setcontext(xch, ctx->domid,
+                                  ctx->x86.hvm.restore.context.ptr,
+                                  ctx->x86.hvm.restore.context.size);
+    if ( rc < 0 )
+    {
+        PERROR("Unable to restore HVM context");
+        return rc;
+    }
+
+    rc = xc_dom_gnttab_seed(xch, ctx->domid, true,
+                            ctx->restore.console_gfn,
+                            ctx->restore.xenstore_gfn,
+                            ctx->restore.console_domid,
+                            ctx->restore.xenstore_domid);
+    if ( rc )
+    {
+        PERROR("Failed to seed grant table");
+        return rc;
+    }
+
+    return rc;
+}
+
+static int x86_hvm_cleanup(struct xc_sr_context *ctx)
+{
+    free(ctx->x86.hvm.restore.context.ptr);
+
+    free(ctx->x86.restore.cpuid.ptr);
+    free(ctx->x86.restore.msr.ptr);
+
+    return 0;
+}
+
+struct xc_sr_restore_ops restore_ops_x86_hvm =
+{
+    .pfn_is_valid    = x86_hvm_pfn_is_valid,
+    .pfn_to_gfn      = x86_hvm_pfn_to_gfn,
+    .set_gfn         = x86_hvm_set_gfn,
+    .set_page_type   = x86_hvm_set_page_type,
+    .localise_page   = x86_hvm_localise_page,
+    .setup           = x86_hvm_setup,
+    .process_record  = x86_hvm_process_record,
+    .static_data_complete = x86_static_data_complete,
+    .stream_complete = x86_hvm_stream_complete,
+    .cleanup         = x86_hvm_cleanup,
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_restore_x86_pv.c b/tools/libs/guest/xg_sr_restore_x86_pv.c
new file mode 100644 (file)
index 0000000..dc50b0f
--- /dev/null
@@ -0,0 +1,1210 @@
+#include <assert.h>
+
+#include "xg_sr_common_x86_pv.h"
+
+static xen_pfn_t pfn_to_mfn(const struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    assert(pfn <= ctx->x86.pv.max_pfn);
+
+    return xc_pfn_to_mfn(pfn, ctx->x86.pv.p2m, ctx->x86.pv.width);
+}
+
+/*
+ * Expand our local tracking information for the p2m table and domains maximum
+ * size.  Normally this will be called once to expand from 0 to max_pfn, but
+ * is liable to expand multiple times if the domain grows on the sending side
+ * after migration has started.
+ */
+static int expand_p2m(struct xc_sr_context *ctx, unsigned long max_pfn)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned long old_max = ctx->x86.pv.max_pfn, i;
+    unsigned int fpp = PAGE_SIZE / ctx->x86.pv.width;
+    unsigned long end_frame = (max_pfn / fpp) + 1;
+    unsigned long old_end_frame = (old_max / fpp) + 1;
+    xen_pfn_t *p2m = NULL, *p2m_pfns = NULL;
+    uint32_t *pfn_types = NULL;
+    size_t p2msz, p2m_pfnsz, pfn_typesz;
+
+    assert(max_pfn > old_max);
+
+    p2msz = (max_pfn + 1) * ctx->x86.pv.width;
+    p2m = realloc(ctx->x86.pv.p2m, p2msz);
+    if ( !p2m )
+    {
+        ERROR("Failed to (re)alloc %zu bytes for p2m", p2msz);
+        return -1;
+    }
+    ctx->x86.pv.p2m = p2m;
+
+    pfn_typesz = (max_pfn + 1) * sizeof(*pfn_types);
+    pfn_types = realloc(ctx->x86.pv.restore.pfn_types, pfn_typesz);
+    if ( !pfn_types )
+    {
+        ERROR("Failed to (re)alloc %zu bytes for pfn_types", pfn_typesz);
+        return -1;
+    }
+    ctx->x86.pv.restore.pfn_types = pfn_types;
+
+    p2m_pfnsz = (end_frame + 1) * sizeof(*p2m_pfns);
+    p2m_pfns = realloc(ctx->x86.pv.p2m_pfns, p2m_pfnsz);
+    if ( !p2m_pfns )
+    {
+        ERROR("Failed to (re)alloc %zu bytes for p2m frame list", p2m_pfnsz);
+        return -1;
+    }
+    ctx->x86.pv.p2m_frames = end_frame;
+    ctx->x86.pv.p2m_pfns = p2m_pfns;
+
+    ctx->x86.pv.max_pfn = max_pfn;
+    for ( i = (old_max ? old_max + 1 : 0); i <= max_pfn; ++i )
+    {
+        ctx->restore.ops.set_gfn(ctx, i, INVALID_MFN);
+        ctx->restore.ops.set_page_type(ctx, i, 0);
+    }
+
+    for ( i = (old_end_frame ? old_end_frame + 1 : 0); i <= end_frame; ++i )
+        ctx->x86.pv.p2m_pfns[i] = INVALID_MFN;
+
+    DPRINTF("Changed max_pfn from %#lx to %#lx", old_max, max_pfn);
+    return 0;
+}
+
+/*
+ * Pin all of the pagetables.
+ */
+static int pin_pagetables(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned long i, nr_pins;
+    struct mmuext_op pin[MAX_PIN_BATCH];
+
+    for ( i = nr_pins = 0; i <= ctx->x86.pv.max_pfn; ++i )
+    {
+        if ( (ctx->x86.pv.restore.pfn_types[i] &
+              XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
+            continue;
+
+        switch ( (ctx->x86.pv.restore.pfn_types[i] &
+                  XEN_DOMCTL_PFINFO_LTABTYPE_MASK) )
+        {
+        case XEN_DOMCTL_PFINFO_L1TAB:
+            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
+            break;
+        case XEN_DOMCTL_PFINFO_L2TAB:
+            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
+            break;
+        case XEN_DOMCTL_PFINFO_L3TAB:
+            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
+            break;
+        case XEN_DOMCTL_PFINFO_L4TAB:
+            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
+            break;
+        default:
+            continue;
+        }
+
+        pin[nr_pins].arg1.mfn = pfn_to_mfn(ctx, i);
+        nr_pins++;
+
+        if ( nr_pins == MAX_PIN_BATCH )
+        {
+            if ( xc_mmuext_op(xch, pin, nr_pins, ctx->domid) != 0 )
+            {
+                PERROR("Failed to pin batch of pagetables");
+                return -1;
+            }
+            nr_pins = 0;
+        }
+    }
+
+    if ( (nr_pins > 0) && (xc_mmuext_op(xch, pin, nr_pins, ctx->domid) < 0) )
+    {
+        PERROR("Failed to pin batch of pagetables");
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Update details in a guests start_info structure.
+ */
+static int process_start_info(struct xc_sr_context *ctx,
+                              vcpu_guest_context_any_t *vcpu)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t pfn, mfn;
+    start_info_any_t *guest_start_info = NULL;
+    int rc = -1;
+
+    pfn = GET_FIELD(vcpu, user_regs.edx, ctx->x86.pv.width);
+
+    if ( pfn > ctx->x86.pv.max_pfn )
+    {
+        ERROR("Start Info pfn %#lx out of range", pfn);
+        goto err;
+    }
+
+    if ( ctx->x86.pv.restore.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB )
+    {
+        ERROR("Start Info pfn %#lx has bad type %u", pfn,
+              (ctx->x86.pv.restore.pfn_types[pfn] >>
+               XEN_DOMCTL_PFINFO_LTAB_SHIFT));
+        goto err;
+    }
+
+    mfn = pfn_to_mfn(ctx, pfn);
+    if ( !mfn_in_pseudophysmap(ctx, mfn) )
+    {
+        ERROR("Start Info has bad mfn");
+        dump_bad_pseudophysmap_entry(ctx, mfn);
+        goto err;
+    }
+
+    SET_FIELD(vcpu, user_regs.edx, mfn, ctx->x86.pv.width);
+    guest_start_info = xc_map_foreign_range(
+        xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
+    if ( !guest_start_info )
+    {
+        PERROR("Failed to map Start Info at mfn %#lx", mfn);
+        goto err;
+    }
+
+    /* Deal with xenstore stuff */
+    pfn = GET_FIELD(guest_start_info, store_mfn, ctx->x86.pv.width);
+    if ( pfn > ctx->x86.pv.max_pfn )
+    {
+        ERROR("XenStore pfn %#lx out of range", pfn);
+        goto err;
+    }
+
+    mfn = pfn_to_mfn(ctx, pfn);
+    if ( !mfn_in_pseudophysmap(ctx, mfn) )
+    {
+        ERROR("XenStore pfn has bad mfn");
+        dump_bad_pseudophysmap_entry(ctx, mfn);
+        goto err;
+    }
+
+    ctx->restore.xenstore_gfn = mfn;
+    SET_FIELD(guest_start_info, store_mfn, mfn, ctx->x86.pv.width);
+    SET_FIELD(guest_start_info, store_evtchn,
+              ctx->restore.xenstore_evtchn, ctx->x86.pv.width);
+
+    /* Deal with console stuff */
+    pfn = GET_FIELD(guest_start_info, console.domU.mfn, ctx->x86.pv.width);
+    if ( pfn > ctx->x86.pv.max_pfn )
+    {
+        ERROR("Console pfn %#lx out of range", pfn);
+        goto err;
+    }
+
+    mfn = pfn_to_mfn(ctx, pfn);
+    if ( !mfn_in_pseudophysmap(ctx, mfn) )
+    {
+        ERROR("Console pfn has bad mfn");
+        dump_bad_pseudophysmap_entry(ctx, mfn);
+        goto err;
+    }
+
+    ctx->restore.console_gfn = mfn;
+    SET_FIELD(guest_start_info, console.domU.mfn, mfn, ctx->x86.pv.width);
+    SET_FIELD(guest_start_info, console.domU.evtchn,
+              ctx->restore.console_evtchn, ctx->x86.pv.width);
+
+    /* Set other information */
+    SET_FIELD(guest_start_info, nr_pages,
+              ctx->x86.pv.max_pfn + 1, ctx->x86.pv.width);
+    SET_FIELD(guest_start_info, shared_info,
+              ctx->dominfo.shared_info_frame << PAGE_SHIFT, ctx->x86.pv.width);
+    SET_FIELD(guest_start_info, flags, 0, ctx->x86.pv.width);
+
+    rc = 0;
+
+ err:
+    if ( guest_start_info )
+        munmap(guest_start_info, PAGE_SIZE);
+
+    return rc;
+}
+
+/*
+ * Process one stashed vcpu worth of basic state and send to Xen.
+ */
+static int process_vcpu_basic(struct xc_sr_context *ctx,
+                              unsigned int vcpuid)
+{
+    xc_interface *xch = ctx->xch;
+    vcpu_guest_context_any_t *vcpu = ctx->x86.pv.restore.vcpus[vcpuid].basic.ptr;
+    xen_pfn_t pfn, mfn;
+    unsigned int i, gdt_count;
+    int rc = -1;
+
+    /* Vcpu 0 is special: Convert the suspend record to an mfn. */
+    if ( vcpuid == 0 )
+    {
+        rc = process_start_info(ctx, vcpu);
+        if ( rc )
+            return rc;
+        rc = -1;
+    }
+
+    SET_FIELD(vcpu, flags,
+              GET_FIELD(vcpu, flags, ctx->x86.pv.width) | VGCF_online,
+              ctx->x86.pv.width);
+
+    gdt_count = GET_FIELD(vcpu, gdt_ents, ctx->x86.pv.width);
+    if ( gdt_count > FIRST_RESERVED_GDT_ENTRY )
+    {
+        ERROR("GDT entry count (%u) out of range (max %u)",
+              gdt_count, FIRST_RESERVED_GDT_ENTRY);
+        errno = ERANGE;
+        goto err;
+    }
+    gdt_count = (gdt_count + 511) / 512; /* gdt_count now in units of frames. */
+
+    /* Convert GDT frames to mfns. */
+    for ( i = 0; i < gdt_count; ++i )
+    {
+        pfn = GET_FIELD(vcpu, gdt_frames[i], ctx->x86.pv.width);
+        if ( pfn > ctx->x86.pv.max_pfn )
+        {
+            ERROR("GDT frame %u (pfn %#lx) out of range", i, pfn);
+            goto err;
+        }
+
+        if ( (ctx->x86.pv.restore.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
+        {
+            ERROR("GDT frame %u (pfn %#lx) has bad type %u", i, pfn,
+                  (ctx->x86.pv.restore.pfn_types[pfn] >>
+                   XEN_DOMCTL_PFINFO_LTAB_SHIFT));
+            goto err;
+        }
+
+        mfn = pfn_to_mfn(ctx, pfn);
+        if ( !mfn_in_pseudophysmap(ctx, mfn) )
+        {
+            ERROR("GDT frame %u has bad mfn", i);
+            dump_bad_pseudophysmap_entry(ctx, mfn);
+            goto err;
+        }
+
+        SET_FIELD(vcpu, gdt_frames[i], mfn, ctx->x86.pv.width);
+    }
+
+    /* Convert CR3 to an mfn. */
+    pfn = cr3_to_mfn(ctx, GET_FIELD(vcpu, ctrlreg[3], ctx->x86.pv.width));
+    if ( pfn > ctx->x86.pv.max_pfn )
+    {
+        ERROR("cr3 (pfn %#lx) out of range", pfn);
+        goto err;
+    }
+
+    if ( (ctx->x86.pv.restore.pfn_types[pfn] &
+          XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
+         (((xen_pfn_t)ctx->x86.pv.levels) << XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
+    {
+        ERROR("cr3 (pfn %#lx) has bad type %u, expected %u", pfn,
+              (ctx->x86.pv.restore.pfn_types[pfn] >>
+               XEN_DOMCTL_PFINFO_LTAB_SHIFT),
+              ctx->x86.pv.levels);
+        goto err;
+    }
+
+    mfn = pfn_to_mfn(ctx, pfn);
+    if ( !mfn_in_pseudophysmap(ctx, mfn) )
+    {
+        ERROR("cr3 has bad mfn");
+        dump_bad_pseudophysmap_entry(ctx, mfn);
+        goto err;
+    }
+
+    SET_FIELD(vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86.pv.width);
+
+    /* 64bit guests: Convert CR1 (guest pagetables) to mfn. */
+    if ( ctx->x86.pv.levels == 4 && (vcpu->x64.ctrlreg[1] & 1) )
+    {
+        pfn = vcpu->x64.ctrlreg[1] >> PAGE_SHIFT;
+
+        if ( pfn > ctx->x86.pv.max_pfn )
+        {
+            ERROR("cr1 (pfn %#lx) out of range", pfn);
+            goto err;
+        }
+
+        if ( (ctx->x86.pv.restore.pfn_types[pfn] &
+              XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
+             (((xen_pfn_t)ctx->x86.pv.levels) << XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
+        {
+            ERROR("cr1 (pfn %#lx) has bad type %u, expected %u", pfn,
+                  (ctx->x86.pv.restore.pfn_types[pfn] >>
+                   XEN_DOMCTL_PFINFO_LTAB_SHIFT),
+                  ctx->x86.pv.levels);
+            goto err;
+        }
+
+        mfn = pfn_to_mfn(ctx, pfn);
+        if ( !mfn_in_pseudophysmap(ctx, mfn) )
+        {
+            ERROR("cr1 has bad mfn");
+            dump_bad_pseudophysmap_entry(ctx, mfn);
+            goto err;
+        }
+
+        vcpu->x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
+    }
+
+    if ( xc_vcpu_setcontext(xch, ctx->domid, vcpuid, vcpu) )
+    {
+        PERROR("Failed to set vcpu%u's basic info", vcpuid);
+        goto err;
+    }
+
+    rc = 0;
+
+ err:
+    return rc;
+}
+
+/*
+ * Process one stashed vcpu worth of extended state and send to Xen.
+ */
+static int process_vcpu_extended(struct xc_sr_context *ctx,
+                                 unsigned int vcpuid)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_x86_pv_restore_vcpu *vcpu =
+        &ctx->x86.pv.restore.vcpus[vcpuid];
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
+    domctl.domain = ctx->domid;
+    memcpy(&domctl.u.ext_vcpucontext, vcpu->extd.ptr, vcpu->extd.size);
+
+    if ( xc_domctl(xch, &domctl) != 0 )
+    {
+        PERROR("Failed to set vcpu%u's extended info", vcpuid);
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Process one stashed vcpu worth of xsave state and send to Xen.
+ */
+static int process_vcpu_xsave(struct xc_sr_context *ctx,
+                              unsigned int vcpuid)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_x86_pv_restore_vcpu *vcpu =
+        &ctx->x86.pv.restore.vcpus[vcpuid];
+    int rc;
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BUFFER(void, buffer);
+
+    buffer = xc_hypercall_buffer_alloc(xch, buffer, vcpu->xsave.size);
+    if ( !buffer )
+    {
+        ERROR("Unable to allocate %zu bytes for xsave hypercall buffer",
+              vcpu->xsave.size);
+        return -1;
+    }
+
+    domctl.cmd = XEN_DOMCTL_setvcpuextstate;
+    domctl.domain = ctx->domid;
+    domctl.u.vcpuextstate.vcpu = vcpuid;
+    domctl.u.vcpuextstate.size = vcpu->xsave.size;
+    set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
+
+    memcpy(buffer, vcpu->xsave.ptr, vcpu->xsave.size);
+
+    rc = xc_domctl(xch, &domctl);
+    if ( rc )
+        PERROR("Failed to set vcpu%u's xsave info", vcpuid);
+
+    xc_hypercall_buffer_free(xch, buffer);
+
+    return rc;
+}
+
+/*
+ * Process one stashed vcpu worth of msr state and send to Xen.
+ */
+static int process_vcpu_msrs(struct xc_sr_context *ctx,
+                             unsigned int vcpuid)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_x86_pv_restore_vcpu *vcpu =
+        &ctx->x86.pv.restore.vcpus[vcpuid];
+    int rc;
+    DECLARE_DOMCTL;
+    DECLARE_HYPERCALL_BUFFER(void, buffer);
+
+    buffer = xc_hypercall_buffer_alloc(xch, buffer, vcpu->msr.size);
+    if ( !buffer )
+    {
+        ERROR("Unable to allocate %zu bytes for msr hypercall buffer",
+              vcpu->msr.size);
+        return -1;
+    }
+
+    domctl.cmd = XEN_DOMCTL_set_vcpu_msrs;
+    domctl.domain = ctx->domid;
+    domctl.u.vcpu_msrs.vcpu = vcpuid;
+    domctl.u.vcpu_msrs.msr_count = vcpu->msr.size / sizeof(xen_domctl_vcpu_msr_t);
+    set_xen_guest_handle(domctl.u.vcpu_msrs.msrs, buffer);
+
+    memcpy(buffer, vcpu->msr.ptr, vcpu->msr.size);
+
+    rc = xc_domctl(xch, &domctl);
+    if ( rc )
+        PERROR("Failed to set vcpu%u's msrs", vcpuid);
+
+    xc_hypercall_buffer_free(xch, buffer);
+
+    return rc;
+}
+
+/*
+ * Process all stashed vcpu context and send to Xen.
+ */
+static int update_vcpu_context(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_x86_pv_restore_vcpu *vcpu;
+    unsigned int i;
+    int rc = 0;
+
+    for ( i = 0; i < ctx->x86.pv.restore.nr_vcpus; ++i )
+    {
+        vcpu = &ctx->x86.pv.restore.vcpus[i];
+
+        if ( vcpu->basic.ptr )
+        {
+            rc = process_vcpu_basic(ctx, i);
+            if ( rc )
+                return rc;
+        }
+        else if ( i == 0 )
+        {
+            ERROR("Sender didn't send vcpu0's basic state");
+            return -1;
+        }
+
+        if ( vcpu->extd.ptr )
+        {
+            rc = process_vcpu_extended(ctx, i);
+            if ( rc )
+                return rc;
+        }
+
+        if ( vcpu->xsave.ptr )
+        {
+            rc = process_vcpu_xsave(ctx, i);
+            if ( rc )
+                return rc;
+        }
+
+        if ( vcpu->msr.ptr )
+        {
+            rc = process_vcpu_msrs(ctx, i);
+            if ( rc )
+                return rc;
+        }
+    }
+
+    return rc;
+}
+
+/*
+ * Copy the p2m which has been constructed locally as memory has been
+ * allocated, over the p2m in guest, so the guest can find its memory again on
+ * resume.
+ */
+static int update_guest_p2m(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t mfn, pfn, *guest_p2m = NULL;
+    unsigned int i;
+    int rc = -1;
+
+    for ( i = 0; i < ctx->x86.pv.p2m_frames; ++i )
+    {
+        pfn = ctx->x86.pv.p2m_pfns[i];
+
+        if ( pfn > ctx->x86.pv.max_pfn )
+        {
+            ERROR("pfn (%#lx) for p2m_frame_list[%u] out of range",
+                  pfn, i);
+            goto err;
+        }
+
+        if ( (ctx->x86.pv.restore.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
+        {
+            ERROR("pfn (%#lx) for p2m_frame_list[%u] has bad type %u", pfn, i,
+                  (ctx->x86.pv.restore.pfn_types[pfn] >>
+                   XEN_DOMCTL_PFINFO_LTAB_SHIFT));
+            goto err;
+        }
+
+        mfn = pfn_to_mfn(ctx, pfn);
+        if ( !mfn_in_pseudophysmap(ctx, mfn) )
+        {
+            ERROR("p2m_frame_list[%u] has bad mfn", i);
+            dump_bad_pseudophysmap_entry(ctx, mfn);
+            goto err;
+        }
+
+        ctx->x86.pv.p2m_pfns[i] = mfn;
+    }
+
+    guest_p2m = xc_map_foreign_pages(xch, ctx->domid, PROT_WRITE,
+                                     ctx->x86.pv.p2m_pfns,
+                                     ctx->x86.pv.p2m_frames);
+    if ( !guest_p2m )
+    {
+        PERROR("Failed to map p2m frames");
+        goto err;
+    }
+
+    memcpy(guest_p2m, ctx->x86.pv.p2m,
+           (ctx->x86.pv.max_pfn + 1) * ctx->x86.pv.width);
+    rc = 0;
+
+ err:
+    if ( guest_p2m )
+        munmap(guest_p2m, ctx->x86.pv.p2m_frames * PAGE_SIZE);
+
+    return rc;
+}
+
+/*
+ * The valid width/pt_levels values in X86_PV_INFO are inextricably linked.
+ * Cross-check the legitimate combinations.
+ */
+static bool valid_x86_pv_info_combination(
+    const struct xc_sr_rec_x86_pv_info *info)
+{
+    switch ( info->guest_width )
+    {
+    case 4:  return info->pt_levels == 3;
+    case 8:  return info->pt_levels == 4;
+    default: return false;
+    }
+}
+
+/*
+ * Process an X86_PV_INFO record.
+ */
+static int handle_x86_pv_info(struct xc_sr_context *ctx,
+                              struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rec_x86_pv_info *info = rec->data;
+
+    if ( ctx->x86.pv.restore.seen_pv_info )
+    {
+        ERROR("Already received X86_PV_INFO record");
+        return -1;
+    }
+
+    if ( rec->length < sizeof(*info) )
+    {
+        ERROR("X86_PV_INFO record truncated: length %u, expected %zu",
+              rec->length, sizeof(*info));
+        return -1;
+    }
+
+    if ( !valid_x86_pv_info_combination(info) )
+    {
+        ERROR("Invalid X86_PV_INFO combination: width %u, pt_levels %u",
+              info->guest_width, info->pt_levels);
+        return -1;
+    }
+
+    /*
+     * PV domains default to native width.  For an incomming compat domain, we
+     * will typically be the first entity to inform Xen.
+     */
+    if ( info->guest_width != ctx->x86.pv.width )
+    {
+        struct xen_domctl domctl = {
+            .domain = ctx->domid,
+            .cmd    = XEN_DOMCTL_set_address_size,
+            .u.address_size.size = info->guest_width * 8,
+        };
+        int rc = do_domctl(xch, &domctl);
+
+        if ( rc != 0 )
+        {
+            ERROR("Failed to update d%d address size to %u",
+                  ctx->domid, info->guest_width * 8);
+            return -1;
+        }
+
+        /* Domain's information changed, better to refresh. */
+        rc = x86_pv_domain_info(ctx);
+        if ( rc != 0 )
+        {
+            ERROR("Unable to refresh guest information");
+            return -1;
+        }
+    }
+
+    /* Sanity check (possibly new) domain settings. */
+    if ( (info->guest_width != ctx->x86.pv.width) ||
+         (info->pt_levels   != ctx->x86.pv.levels) )
+    {
+        ERROR("X86_PV_INFO width/pt_levels settings %u/%u mismatch with d%d %u/%u",
+              info->guest_width, info->pt_levels, ctx->domid,
+              ctx->x86.pv.width, ctx->x86.pv.levels);
+        return -1;
+    }
+
+    ctx->x86.pv.restore.seen_pv_info = true;
+    return 0;
+}
+
+/*
+ * Process an X86_PV_P2M_FRAMES record.  Takes care of expanding the local p2m
+ * state if needed.
+ */
+static int handle_x86_pv_p2m_frames(struct xc_sr_context *ctx,
+                                    struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rec_x86_pv_p2m_frames *data = rec->data;
+    unsigned int start, end, x, fpp = PAGE_SIZE / ctx->x86.pv.width;
+    int rc;
+
+    /* v2 compat.  Infer the position of STATIC_DATA_END. */
+    if ( ctx->restore.format_version < 3 && !ctx->restore.seen_static_data_end )
+    {
+        rc = handle_static_data_end(ctx);
+        if ( rc )
+        {
+            ERROR("Inferred STATIC_DATA_END record failed");
+            return rc;
+        }
+    }
+
+    if ( !ctx->restore.seen_static_data_end )
+    {
+        ERROR("No STATIC_DATA_END seen");
+        return -1;
+    }
+
+    if ( !ctx->x86.pv.restore.seen_pv_info )
+    {
+        ERROR("Not yet received X86_PV_INFO record");
+        return -1;
+    }
+
+    if ( rec->length < sizeof(*data) )
+    {
+        ERROR("X86_PV_P2M_FRAMES record truncated: length %u, min %zu",
+              rec->length, sizeof(*data) + sizeof(uint64_t));
+        return -1;
+    }
+
+    if ( data->start_pfn > data->end_pfn )
+    {
+        ERROR("End pfn in stream (%#x) exceeds Start (%#x)",
+              data->end_pfn, data->start_pfn);
+        return -1;
+    }
+
+    start =  data->start_pfn / fpp;
+    end = data->end_pfn / fpp + 1;
+
+    if ( rec->length != sizeof(*data) + ((end - start) * sizeof(uint64_t)) )
+    {
+        ERROR("X86_PV_P2M_FRAMES record wrong size: start_pfn %#x"
+              ", end_pfn %#x, length %u, expected %zu + (%u - %u) * %zu",
+              data->start_pfn, data->end_pfn, rec->length,
+              sizeof(*data), end, start, sizeof(uint64_t));
+        return -1;
+    }
+
+    if ( data->end_pfn > ctx->x86.pv.max_pfn )
+    {
+        rc = expand_p2m(ctx, data->end_pfn);
+        if ( rc )
+            return rc;
+    }
+
+    for ( x = 0; x < (end - start); ++x )
+        ctx->x86.pv.p2m_pfns[start + x] = data->p2m_pfns[x];
+
+    return 0;
+}
+
+/*
+ * Processes X86_PV_VCPU_{BASIC,EXTENDED,XSAVE,MSRS} records from the stream.
+ * The blobs are all stashed to one side as they need to be deferred until the
+ * very end of the stream, rather than being send to Xen at the point they
+ * arrive in the stream.  It performs all pre-hypercall size validation.
+ */
+static int handle_x86_pv_vcpu_blob(struct xc_sr_context *ctx,
+                                   struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rec_x86_pv_vcpu_hdr *vhdr = rec->data;
+    struct xc_sr_x86_pv_restore_vcpu *vcpu;
+    const char *rec_name;
+    size_t blobsz;
+    struct xc_sr_blob *blob = NULL;
+    int rc = -1;
+
+    switch ( rec->type )
+    {
+    case REC_TYPE_X86_PV_VCPU_BASIC:
+        rec_name = "X86_PV_VCPU_BASIC";
+        break;
+
+    case REC_TYPE_X86_PV_VCPU_EXTENDED:
+        rec_name = "X86_PV_VCPU_EXTENDED";
+        break;
+
+    case REC_TYPE_X86_PV_VCPU_XSAVE:
+        rec_name = "X86_PV_VCPU_XSAVE";
+        break;
+
+    case REC_TYPE_X86_PV_VCPU_MSRS:
+        rec_name = "X86_PV_VCPU_MSRS";
+        break;
+
+    default:
+        ERROR("Unrecognised vcpu blob record %s (%u)",
+              rec_type_to_str(rec->type), rec->type);
+        goto out;
+    }
+
+    /* Confirm that there is a complete header. */
+    if ( rec->length < sizeof(*vhdr) )
+    {
+        ERROR("%s record truncated: length %u, header size %zu",
+              rec_name, rec->length, sizeof(*vhdr));
+        goto out;
+    }
+
+    blobsz = rec->length - sizeof(*vhdr);
+
+    /*
+     * Tolerate empty records.  Older sending sides used to accidentally
+     * generate them.
+     */
+    if ( blobsz == 0 )
+    {
+        DBGPRINTF("Skipping empty %s record for vcpu %u\n",
+                  rec_type_to_str(rec->type), vhdr->vcpu_id);
+        rc = 0;
+        goto out;
+    }
+
+    /* Check that the vcpu id is within range. */
+    if ( vhdr->vcpu_id >= ctx->x86.pv.restore.nr_vcpus )
+    {
+        ERROR("%s record vcpu_id (%u) exceeds domain max (%u)",
+              rec_name, vhdr->vcpu_id, ctx->x86.pv.restore.nr_vcpus - 1);
+        goto out;
+    }
+
+    vcpu = &ctx->x86.pv.restore.vcpus[vhdr->vcpu_id];
+
+    /* Further per-record checks, where possible. */
+    switch ( rec->type )
+    {
+    case REC_TYPE_X86_PV_VCPU_BASIC:
+    {
+        size_t vcpusz = ctx->x86.pv.width == 8 ?
+            sizeof(vcpu_guest_context_x86_64_t) :
+            sizeof(vcpu_guest_context_x86_32_t);
+
+        if ( blobsz != vcpusz )
+        {
+            ERROR("%s record wrong size: expected %zu, got %u",
+                  rec_name, sizeof(*vhdr) + vcpusz, rec->length);
+            goto out;
+        }
+        blob = &vcpu->basic;
+        break;
+    }
+
+    case REC_TYPE_X86_PV_VCPU_EXTENDED:
+        if ( blobsz > 128 )
+        {
+            ERROR("%s record too long: max %zu, got %u",
+                  rec_name, sizeof(*vhdr) + 128, rec->length);
+            goto out;
+        }
+        blob = &vcpu->extd;
+        break;
+
+    case REC_TYPE_X86_PV_VCPU_XSAVE:
+        if ( blobsz < 16 )
+        {
+            ERROR("%s record too short: min %zu, got %u",
+                  rec_name, sizeof(*vhdr) + 16, rec->length);
+            goto out;
+        }
+        blob = &vcpu->xsave;
+        break;
+
+    case REC_TYPE_X86_PV_VCPU_MSRS:
+        if ( blobsz % sizeof(xen_domctl_vcpu_msr_t) != 0 )
+        {
+            ERROR("%s record payload size %zu expected to be a multiple of %zu",
+                  rec_name, blobsz, sizeof(xen_domctl_vcpu_msr_t));
+            goto out;
+        }
+        blob = &vcpu->msr;
+        break;
+    }
+
+    rc = update_blob(blob, vhdr->context, blobsz);
+    if ( rc )
+        ERROR("Unable to allocate %zu bytes for vcpu%u %s blob",
+              blobsz, vhdr->vcpu_id, rec_name);
+
+ out:
+    return rc;
+}
+
+/*
+ * Process a SHARED_INFO record from the stream.
+ */
+static int handle_shared_info(struct xc_sr_context *ctx,
+                              struct xc_sr_record *rec)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned int i;
+    int rc = -1;
+    shared_info_any_t *guest_shinfo = NULL;
+    const shared_info_any_t *old_shinfo = rec->data;
+
+    if ( !ctx->x86.pv.restore.seen_pv_info )
+    {
+        ERROR("Not yet received X86_PV_INFO record");
+        return -1;
+    }
+
+    if ( rec->length != PAGE_SIZE )
+    {
+        ERROR("X86_PV_SHARED_INFO record wrong size: length %u"
+              ", expected 4096", rec->length);
+        goto err;
+    }
+
+    guest_shinfo = xc_map_foreign_range(
+        xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
+        ctx->dominfo.shared_info_frame);
+    if ( !guest_shinfo )
+    {
+        PERROR("Failed to map Shared Info at mfn %#lx",
+               ctx->dominfo.shared_info_frame);
+        goto err;
+    }
+
+    MEMCPY_FIELD(guest_shinfo, old_shinfo, vcpu_info, ctx->x86.pv.width);
+    MEMCPY_FIELD(guest_shinfo, old_shinfo, arch, ctx->x86.pv.width);
+
+    SET_FIELD(guest_shinfo, arch.pfn_to_mfn_frame_list_list,
+              0, ctx->x86.pv.width);
+
+    MEMSET_ARRAY_FIELD(guest_shinfo, evtchn_pending, 0, ctx->x86.pv.width);
+    for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
+        SET_FIELD(guest_shinfo, vcpu_info[i].evtchn_pending_sel,
+                  0, ctx->x86.pv.width);
+
+    MEMSET_ARRAY_FIELD(guest_shinfo, evtchn_mask, 0xff, ctx->x86.pv.width);
+
+    rc = 0;
+
+ err:
+    if ( guest_shinfo )
+        munmap(guest_shinfo, PAGE_SIZE);
+
+    return rc;
+}
+
+/* restore_ops function. */
+static bool x86_pv_pfn_is_valid(const struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    return pfn <= ctx->x86.pv.max_pfn;
+}
+
+/* restore_ops function. */
+static void x86_pv_set_page_type(struct xc_sr_context *ctx, xen_pfn_t pfn,
+                                 unsigned long type)
+{
+    assert(pfn <= ctx->x86.pv.max_pfn);
+
+    ctx->x86.pv.restore.pfn_types[pfn] = type;
+}
+
+/* restore_ops function. */
+static void x86_pv_set_gfn(struct xc_sr_context *ctx, xen_pfn_t pfn,
+                           xen_pfn_t mfn)
+{
+    assert(pfn <= ctx->x86.pv.max_pfn);
+
+    if ( ctx->x86.pv.width == sizeof(uint64_t) )
+        /* 64 bit guest.  Need to expand INVALID_MFN for 32 bit toolstacks. */
+        ((uint64_t *)ctx->x86.pv.p2m)[pfn] = mfn == INVALID_MFN ? ~0ULL : mfn;
+    else
+        /* 32 bit guest.  Can truncate INVALID_MFN for 64 bit toolstacks. */
+        ((uint32_t *)ctx->x86.pv.p2m)[pfn] = mfn;
+}
+
+/*
+ * restore_ops function.  Convert pfns back to mfns in pagetables.  Possibly
+ * needs to populate new frames if a PTE is found referring to a frame which
+ * hasn't yet been seen from PAGE_DATA records.
+ */
+static int x86_pv_localise_page(struct xc_sr_context *ctx,
+                                uint32_t type, void *page)
+{
+    xc_interface *xch = ctx->xch;
+    uint64_t *table = page;
+    uint64_t pte;
+    unsigned int i, to_populate;
+    xen_pfn_t pfns[(PAGE_SIZE / sizeof(uint64_t))];
+
+    type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+    /* Only page tables need localisation. */
+    if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
+        return 0;
+
+    /* Check to see whether we need to populate any new frames. */
+    for ( i = 0, to_populate = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
+    {
+        pte = table[i];
+
+        if ( pte & _PAGE_PRESENT )
+        {
+            xen_pfn_t pfn = pte_to_frame(pte);
+
+#ifdef __i386__
+            if ( pfn == INVALID_MFN )
+            {
+                ERROR("PTE truncation detected.  L%u[%u] = %016"PRIx64,
+                      type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
+                errno = E2BIG;
+                return -1;
+            }
+#endif
+
+            if ( pfn_to_mfn(ctx, pfn) == INVALID_MFN )
+                pfns[to_populate++] = pfn;
+        }
+    }
+
+    if ( to_populate && populate_pfns(ctx, to_populate, pfns, NULL) )
+        return -1;
+
+    for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
+    {
+        pte = table[i];
+
+        if ( pte & _PAGE_PRESENT )
+        {
+            xen_pfn_t mfn, pfn;
+
+            pfn = pte_to_frame(pte);
+            mfn = pfn_to_mfn(ctx, pfn);
+
+            if ( !mfn_in_pseudophysmap(ctx, mfn) )
+            {
+                ERROR("Bad mfn for L%u[%u] - pte %"PRIx64,
+                      type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
+                dump_bad_pseudophysmap_entry(ctx, mfn);
+                errno = ERANGE;
+                return -1;
+            }
+
+            table[i] = merge_pte(pte, mfn);
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * restore_ops function.  Confirm that the incoming stream matches the type of
+ * domain we are attempting to restore into.
+ */
+static int x86_pv_setup(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc;
+
+    if ( ctx->restore.guest_type != DHDR_TYPE_X86_PV )
+    {
+        ERROR("Unable to restore %s domain into an x86_pv domain",
+              dhdr_type_to_str(ctx->restore.guest_type));
+        return -1;
+    }
+
+    if ( ctx->restore.guest_page_size != PAGE_SIZE )
+    {
+        ERROR("Invalid page size %d for x86_pv domains",
+              ctx->restore.guest_page_size);
+        return -1;
+    }
+
+    rc = x86_pv_domain_info(ctx);
+    if ( rc )
+        return rc;
+
+    ctx->x86.pv.restore.nr_vcpus = ctx->dominfo.max_vcpu_id + 1;
+    ctx->x86.pv.restore.vcpus = calloc(sizeof(struct xc_sr_x86_pv_restore_vcpu),
+                                       ctx->x86.pv.restore.nr_vcpus);
+    if ( !ctx->x86.pv.restore.vcpus )
+    {
+        errno = ENOMEM;
+        return -1;
+    }
+
+    rc = x86_pv_map_m2p(ctx);
+    if ( rc )
+        return rc;
+
+    return rc;
+}
+
+/*
+ * restore_ops function.
+ */
+static int x86_pv_process_record(struct xc_sr_context *ctx,
+                                 struct xc_sr_record *rec)
+{
+    switch ( rec->type )
+    {
+    case REC_TYPE_X86_PV_INFO:
+        return handle_x86_pv_info(ctx, rec);
+
+    case REC_TYPE_X86_PV_P2M_FRAMES:
+        return handle_x86_pv_p2m_frames(ctx, rec);
+
+    case REC_TYPE_X86_PV_VCPU_BASIC:
+    case REC_TYPE_X86_PV_VCPU_EXTENDED:
+    case REC_TYPE_X86_PV_VCPU_XSAVE:
+    case REC_TYPE_X86_PV_VCPU_MSRS:
+        return handle_x86_pv_vcpu_blob(ctx, rec);
+
+    case REC_TYPE_SHARED_INFO:
+        return handle_shared_info(ctx, rec);
+
+    case REC_TYPE_X86_TSC_INFO:
+        return handle_x86_tsc_info(ctx, rec);
+
+    case REC_TYPE_X86_CPUID_POLICY:
+        return handle_x86_cpuid_policy(ctx, rec);
+
+    case REC_TYPE_X86_MSR_POLICY:
+        return handle_x86_msr_policy(ctx, rec);
+
+    default:
+        return RECORD_NOT_PROCESSED;
+    }
+}
+
+/*
+ * restore_ops function.  Update the vcpu context in Xen, pin the pagetables,
+ * rewrite the p2m and seed the grant table.
+ */
+static int x86_pv_stream_complete(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc;
+
+    rc = update_vcpu_context(ctx);
+    if ( rc )
+        return rc;
+
+    rc = pin_pagetables(ctx);
+    if ( rc )
+        return rc;
+
+    rc = update_guest_p2m(ctx);
+    if ( rc )
+        return rc;
+
+    rc = xc_dom_gnttab_seed(xch, ctx->domid, false,
+                            ctx->restore.console_gfn,
+                            ctx->restore.xenstore_gfn,
+                            ctx->restore.console_domid,
+                            ctx->restore.xenstore_domid);
+    if ( rc )
+    {
+        PERROR("Failed to seed grant table");
+        return rc;
+    }
+
+    return rc;
+}
+
+/*
+ * restore_ops function.
+ */
+static int x86_pv_cleanup(struct xc_sr_context *ctx)
+{
+    free(ctx->x86.pv.p2m);
+    free(ctx->x86.pv.p2m_pfns);
+
+    if ( ctx->x86.pv.restore.vcpus )
+    {
+        unsigned int i;
+
+        for ( i = 0; i < ctx->x86.pv.restore.nr_vcpus; ++i )
+        {
+            struct xc_sr_x86_pv_restore_vcpu *vcpu =
+                &ctx->x86.pv.restore.vcpus[i];
+
+            free(vcpu->basic.ptr);
+            free(vcpu->extd.ptr);
+            free(vcpu->xsave.ptr);
+            free(vcpu->msr.ptr);
+        }
+
+        free(ctx->x86.pv.restore.vcpus);
+    }
+
+    free(ctx->x86.pv.restore.pfn_types);
+
+    if ( ctx->x86.pv.m2p )
+        munmap(ctx->x86.pv.m2p, ctx->x86.pv.nr_m2p_frames * PAGE_SIZE);
+
+    free(ctx->x86.restore.cpuid.ptr);
+    free(ctx->x86.restore.msr.ptr);
+
+    return 0;
+}
+
+struct xc_sr_restore_ops restore_ops_x86_pv =
+{
+    .pfn_is_valid    = x86_pv_pfn_is_valid,
+    .pfn_to_gfn      = pfn_to_mfn,
+    .set_page_type   = x86_pv_set_page_type,
+    .set_gfn         = x86_pv_set_gfn,
+    .localise_page   = x86_pv_localise_page,
+    .setup           = x86_pv_setup,
+    .process_record  = x86_pv_process_record,
+    .static_data_complete = x86_static_data_complete,
+    .stream_complete = x86_pv_stream_complete,
+    .cleanup         = x86_pv_cleanup,
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_save.c b/tools/libs/guest/xg_sr_save.c
new file mode 100644 (file)
index 0000000..d74c72c
--- /dev/null
@@ -0,0 +1,1059 @@
+#include <assert.h>
+#include <arpa/inet.h>
+
+#include "xg_sr_common.h"
+
+/*
+ * Writes an Image header and Domain header into the stream.
+ */
+static int write_headers(struct xc_sr_context *ctx, uint16_t guest_type)
+{
+    xc_interface *xch = ctx->xch;
+    int32_t xen_version = xc_version(xch, XENVER_version, NULL);
+    struct xc_sr_ihdr ihdr = {
+        .marker  = IHDR_MARKER,
+        .id      = htonl(IHDR_ID),
+        .version = htonl(3),
+        .options = htons(IHDR_OPT_LITTLE_ENDIAN),
+    };
+    struct xc_sr_dhdr dhdr = {
+        .type       = guest_type,
+        .page_shift = XC_PAGE_SHIFT,
+        .xen_major  = (xen_version >> 16) & 0xffff,
+        .xen_minor  = (xen_version)       & 0xffff,
+    };
+
+    if ( xen_version < 0 )
+    {
+        PERROR("Unable to obtain Xen Version");
+        return -1;
+    }
+
+    if ( write_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
+    {
+        PERROR("Unable to write Image Header to stream");
+        return -1;
+    }
+
+    if ( write_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
+    {
+        PERROR("Unable to write Domain Header to stream");
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Writes an END record into the stream.
+ */
+static int write_end_record(struct xc_sr_context *ctx)
+{
+    struct xc_sr_record end = { .type = REC_TYPE_END };
+
+    return write_record(ctx, &end);
+}
+
+/*
+ * Writes a STATIC_DATA_END record into the stream.
+ */
+static int write_static_data_end_record(struct xc_sr_context *ctx)
+{
+    struct xc_sr_record end = { .type = REC_TYPE_STATIC_DATA_END };
+
+    return write_record(ctx, &end);
+}
+
+/*
+ * Writes a CHECKPOINT record into the stream.
+ */
+static int write_checkpoint_record(struct xc_sr_context *ctx)
+{
+    struct xc_sr_record checkpoint = { .type = REC_TYPE_CHECKPOINT };
+
+    return write_record(ctx, &checkpoint);
+}
+
+/*
+ * Writes a batch of memory as a PAGE_DATA record into the stream.  The batch
+ * is constructed in ctx->save.batch_pfns.
+ *
+ * This function:
+ * - gets the types for each pfn in the batch.
+ * - for each pfn with real data:
+ *   - maps and attempts to localise the pages.
+ * - construct and writes a PAGE_DATA record into the stream.
+ */
+static int write_batch(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t *mfns = NULL, *types = NULL;
+    void *guest_mapping = NULL;
+    void **guest_data = NULL;
+    void **local_pages = NULL;
+    int *errors = NULL, rc = -1;
+    unsigned int i, p, nr_pages = 0, nr_pages_mapped = 0;
+    unsigned int nr_pfns = ctx->save.nr_batch_pfns;
+    void *page, *orig_page;
+    uint64_t *rec_pfns = NULL;
+    struct iovec *iov = NULL; int iovcnt = 0;
+    struct xc_sr_rec_page_data_header hdr = { 0 };
+    struct xc_sr_record rec = {
+        .type = REC_TYPE_PAGE_DATA,
+    };
+
+    assert(nr_pfns != 0);
+
+    /* Mfns of the batch pfns. */
+    mfns = malloc(nr_pfns * sizeof(*mfns));
+    /* Types of the batch pfns. */
+    types = malloc(nr_pfns * sizeof(*types));
+    /* Errors from attempting to map the gfns. */
+    errors = malloc(nr_pfns * sizeof(*errors));
+    /* Pointers to page data to send.  Mapped gfns or local allocations. */
+    guest_data = calloc(nr_pfns, sizeof(*guest_data));
+    /* Pointers to locally allocated pages.  Need freeing. */
+    local_pages = calloc(nr_pfns, sizeof(*local_pages));
+    /* iovec[] for writev(). */
+    iov = malloc((nr_pfns + 4) * sizeof(*iov));
+
+    if ( !mfns || !types || !errors || !guest_data || !local_pages || !iov )
+    {
+        ERROR("Unable to allocate arrays for a batch of %u pages",
+              nr_pfns);
+        goto err;
+    }
+
+    for ( i = 0; i < nr_pfns; ++i )
+    {
+        types[i] = mfns[i] = ctx->save.ops.pfn_to_gfn(ctx,
+                                                      ctx->save.batch_pfns[i]);
+
+        /* Likely a ballooned page. */
+        if ( mfns[i] == INVALID_MFN )
+        {
+            set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
+            ++ctx->save.nr_deferred_pages;
+        }
+    }
+
+    rc = xc_get_pfn_type_batch(xch, ctx->domid, nr_pfns, types);
+    if ( rc )
+    {
+        PERROR("Failed to get types for pfn batch");
+        goto err;
+    }
+    rc = -1;
+
+    for ( i = 0; i < nr_pfns; ++i )
+    {
+        switch ( types[i] )
+        {
+        case XEN_DOMCTL_PFINFO_BROKEN:
+        case XEN_DOMCTL_PFINFO_XALLOC:
+        case XEN_DOMCTL_PFINFO_XTAB:
+            continue;
+        }
+
+        mfns[nr_pages++] = mfns[i];
+    }
+
+    if ( nr_pages > 0 )
+    {
+        guest_mapping = xenforeignmemory_map(
+            xch->fmem, ctx->domid, PROT_READ, nr_pages, mfns, errors);
+        if ( !guest_mapping )
+        {
+            PERROR("Failed to map guest pages");
+            goto err;
+        }
+        nr_pages_mapped = nr_pages;
+
+        for ( i = 0, p = 0; i < nr_pfns; ++i )
+        {
+            switch ( types[i] )
+            {
+            case XEN_DOMCTL_PFINFO_BROKEN:
+            case XEN_DOMCTL_PFINFO_XALLOC:
+            case XEN_DOMCTL_PFINFO_XTAB:
+                continue;
+            }
+
+            if ( errors[p] )
+            {
+                ERROR("Mapping of pfn %#"PRIpfn" (mfn %#"PRIpfn") failed %d",
+                      ctx->save.batch_pfns[i], mfns[p], errors[p]);
+                goto err;
+            }
+
+            orig_page = page = guest_mapping + (p * PAGE_SIZE);
+            rc = ctx->save.ops.normalise_page(ctx, types[i], &page);
+
+            if ( orig_page != page )
+                local_pages[i] = page;
+
+            if ( rc )
+            {
+                if ( rc == -1 && errno == EAGAIN )
+                {
+                    set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
+                    ++ctx->save.nr_deferred_pages;
+                    types[i] = XEN_DOMCTL_PFINFO_XTAB;
+                    --nr_pages;
+                }
+                else
+                    goto err;
+            }
+            else
+                guest_data[i] = page;
+
+            rc = -1;
+            ++p;
+        }
+    }
+
+    rec_pfns = malloc(nr_pfns * sizeof(*rec_pfns));
+    if ( !rec_pfns )
+    {
+        ERROR("Unable to allocate %zu bytes of memory for page data pfn list",
+              nr_pfns * sizeof(*rec_pfns));
+        goto err;
+    }
+
+    hdr.count = nr_pfns;
+
+    rec.length = sizeof(hdr);
+    rec.length += nr_pfns * sizeof(*rec_pfns);
+    rec.length += nr_pages * PAGE_SIZE;
+
+    for ( i = 0; i < nr_pfns; ++i )
+        rec_pfns[i] = ((uint64_t)(types[i]) << 32) | ctx->save.batch_pfns[i];
+
+    iov[0].iov_base = &rec.type;
+    iov[0].iov_len = sizeof(rec.type);
+
+    iov[1].iov_base = &rec.length;
+    iov[1].iov_len = sizeof(rec.length);
+
+    iov[2].iov_base = &hdr;
+    iov[2].iov_len = sizeof(hdr);
+
+    iov[3].iov_base = rec_pfns;
+    iov[3].iov_len = nr_pfns * sizeof(*rec_pfns);
+
+    iovcnt = 4;
+
+    if ( nr_pages )
+    {
+        for ( i = 0; i < nr_pfns; ++i )
+        {
+            if ( guest_data[i] )
+            {
+                iov[iovcnt].iov_base = guest_data[i];
+                iov[iovcnt].iov_len = PAGE_SIZE;
+                iovcnt++;
+                --nr_pages;
+            }
+        }
+    }
+
+    if ( writev_exact(ctx->fd, iov, iovcnt) )
+    {
+        PERROR("Failed to write page data to stream");
+        goto err;
+    }
+
+    /* Sanity check we have sent all the pages we expected to. */
+    assert(nr_pages == 0);
+    rc = ctx->save.nr_batch_pfns = 0;
+
+ err:
+    free(rec_pfns);
+    if ( guest_mapping )
+        xenforeignmemory_unmap(xch->fmem, guest_mapping, nr_pages_mapped);
+    for ( i = 0; local_pages && i < nr_pfns; ++i )
+        free(local_pages[i]);
+    free(iov);
+    free(local_pages);
+    free(guest_data);
+    free(errors);
+    free(types);
+    free(mfns);
+
+    return rc;
+}
+
+/*
+ * Flush a batch of pfns into the stream.
+ */
+static int flush_batch(struct xc_sr_context *ctx)
+{
+    int rc = 0;
+
+    if ( ctx->save.nr_batch_pfns == 0 )
+        return rc;
+
+    rc = write_batch(ctx);
+
+    if ( !rc )
+    {
+        VALGRIND_MAKE_MEM_UNDEFINED(ctx->save.batch_pfns,
+                                    MAX_BATCH_SIZE *
+                                    sizeof(*ctx->save.batch_pfns));
+    }
+
+    return rc;
+}
+
+/*
+ * Add a single pfn to the batch, flushing the batch if full.
+ */
+static int add_to_batch(struct xc_sr_context *ctx, xen_pfn_t pfn)
+{
+    int rc = 0;
+
+    if ( ctx->save.nr_batch_pfns == MAX_BATCH_SIZE )
+        rc = flush_batch(ctx);
+
+    if ( rc == 0 )
+        ctx->save.batch_pfns[ctx->save.nr_batch_pfns++] = pfn;
+
+    return rc;
+}
+
+/*
+ * Pause/suspend the domain, and refresh ctx->dominfo if required.
+ */
+static int suspend_domain(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+
+    /* TODO: Properly specify the return value from this callback.  All
+     * implementations currently appear to return 1 for success, whereas
+     * the legacy code checks for != 0. */
+    int cb_rc = ctx->save.callbacks->suspend(ctx->save.callbacks->data);
+
+    if ( cb_rc == 0 )
+    {
+        ERROR("save callback suspend() failed: %d", cb_rc);
+        return -1;
+    }
+
+    /* Refresh domain information. */
+    if ( (xc_domain_getinfo(xch, ctx->domid, 1, &ctx->dominfo) != 1) ||
+         (ctx->dominfo.domid != ctx->domid) )
+    {
+        PERROR("Unable to refresh domain information");
+        return -1;
+    }
+
+    /* Confirm the domain has actually been paused. */
+    if ( !ctx->dominfo.shutdown ||
+         (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
+    {
+        ERROR("Domain has not been suspended: shutdown %d, reason %d",
+              ctx->dominfo.shutdown, ctx->dominfo.shutdown_reason);
+        return -1;
+    }
+
+    xc_report_progress_single(xch, "Domain now suspended");
+
+    return 0;
+}
+
+/*
+ * Send a subset of pages in the guests p2m, according to the dirty bitmap.
+ * Used for each subsequent iteration of the live migration loop.
+ *
+ * Bitmap is bounded by p2m_size.
+ */
+static int send_dirty_pages(struct xc_sr_context *ctx,
+                            unsigned long entries)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t p;
+    unsigned long written;
+    int rc;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->save.dirty_bitmap_hbuf);
+
+    for ( p = 0, written = 0; p < ctx->save.p2m_size; ++p )
+    {
+        if ( !test_bit(p, dirty_bitmap) )
+            continue;
+
+        rc = add_to_batch(ctx, p);
+        if ( rc )
+            return rc;
+
+        /* Update progress every 4MB worth of memory sent. */
+        if ( (written & ((1U << (22 - 12)) - 1)) == 0 )
+            xc_report_progress_step(xch, written, entries);
+
+        ++written;
+    }
+
+    rc = flush_batch(ctx);
+    if ( rc )
+        return rc;
+
+    if ( written > entries )
+        DPRINTF("Bitmap contained more entries than expected...");
+
+    xc_report_progress_step(xch, entries, entries);
+
+    return ctx->save.ops.check_vm_state(ctx);
+}
+
+/*
+ * Send all pages in the guests p2m.  Used as the first iteration of the live
+ * migration loop, and for a non-live save.
+ */
+static int send_all_pages(struct xc_sr_context *ctx)
+{
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->save.dirty_bitmap_hbuf);
+
+    bitmap_set(dirty_bitmap, ctx->save.p2m_size);
+
+    return send_dirty_pages(ctx, ctx->save.p2m_size);
+}
+
+static int enable_logdirty(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int on1 = 0, off = 0, on2 = 0;
+    int rc;
+
+    /* This juggling is required if logdirty is enabled for VRAM tracking. */
+    rc = xc_shadow_control(xch, ctx->domid,
+                           XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+                           NULL, 0, NULL, 0, NULL);
+    if ( rc < 0 )
+    {
+        on1 = errno;
+        rc = xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
+                               NULL, 0, NULL, 0, NULL);
+        if ( rc < 0 )
+            off = errno;
+        else {
+            rc = xc_shadow_control(xch, ctx->domid,
+                                   XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
+                                   NULL, 0, NULL, 0, NULL);
+            if ( rc < 0 )
+                on2 = errno;
+        }
+        if ( rc < 0 )
+        {
+            PERROR("Failed to enable logdirty: %d,%d,%d", on1, off, on2);
+            return rc;
+        }
+    }
+
+    return 0;
+}
+
+static int update_progress_string(struct xc_sr_context *ctx, char **str)
+{
+    xc_interface *xch = ctx->xch;
+    char *new_str = NULL;
+    unsigned int iter = ctx->save.stats.iteration;
+
+    if ( asprintf(&new_str, "Frames iteration %u", iter) == -1 )
+    {
+        PERROR("Unable to allocate new progress string");
+        return -1;
+    }
+
+    free(*str);
+    *str = new_str;
+
+    xc_set_progress_prefix(xch, *str);
+    return 0;
+}
+
+/*
+ * This is the live migration precopy policy - it's called periodically during
+ * the precopy phase of live migrations, and is responsible for deciding when
+ * the precopy phase should terminate and what should be done next.
+ *
+ * The policy implemented here behaves identically to the policy previously
+ * hard-coded into xc_domain_save() - it proceeds to the stop-and-copy phase of
+ * the live migration when there are either fewer than 50 dirty pages, or more
+ * than 5 precopy rounds have completed.
+ */
+#define SPP_MAX_ITERATIONS      5
+#define SPP_TARGET_DIRTY_COUNT 50
+
+static int simple_precopy_policy(struct precopy_stats stats, void *user)
+{
+    return ((stats.dirty_count >= 0 &&
+             stats.dirty_count < SPP_TARGET_DIRTY_COUNT) ||
+            stats.iteration >= SPP_MAX_ITERATIONS)
+        ? XGS_POLICY_STOP_AND_COPY
+        : XGS_POLICY_CONTINUE_PRECOPY;
+}
+
+/*
+ * Send memory while guest is running.
+ */
+static int send_memory_live(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
+    char *progress_str = NULL;
+    unsigned int x = 0;
+    int rc;
+    int policy_decision;
+
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->save.dirty_bitmap_hbuf);
+
+    precopy_policy_t precopy_policy = ctx->save.callbacks->precopy_policy;
+    void *data = ctx->save.callbacks->data;
+
+    struct precopy_stats *policy_stats;
+
+    rc = update_progress_string(ctx, &progress_str);
+    if ( rc )
+        goto out;
+
+    ctx->save.stats = (struct precopy_stats){
+        .dirty_count = ctx->save.p2m_size,
+    };
+    policy_stats = &ctx->save.stats;
+
+    if ( precopy_policy == NULL )
+        precopy_policy = simple_precopy_policy;
+
+    bitmap_set(dirty_bitmap, ctx->save.p2m_size);
+
+    for ( ; ; )
+    {
+        policy_decision = precopy_policy(*policy_stats, data);
+        x++;
+
+        if ( stats.dirty_count > 0 && policy_decision != XGS_POLICY_ABORT )
+        {
+            rc = update_progress_string(ctx, &progress_str);
+            if ( rc )
+                goto out;
+
+            rc = send_dirty_pages(ctx, stats.dirty_count);
+            if ( rc )
+                goto out;
+        }
+
+        if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
+            break;
+
+        policy_stats->iteration     = x;
+        policy_stats->total_written += policy_stats->dirty_count;
+        policy_stats->dirty_count   = -1;
+
+        policy_decision = precopy_policy(*policy_stats, data);
+
+        if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
+            break;
+
+        if ( xc_shadow_control(
+                 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
+                 &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
+                 NULL, 0, &stats) != ctx->save.p2m_size )
+        {
+            PERROR("Failed to retrieve logdirty bitmap");
+            rc = -1;
+            goto out;
+        }
+
+        policy_stats->dirty_count = stats.dirty_count;
+
+    }
+
+    if ( policy_decision == XGS_POLICY_ABORT )
+    {
+        PERROR("Abort precopy loop");
+        rc = -1;
+        goto out;
+    }
+
+ out:
+    xc_set_progress_prefix(xch, NULL);
+    free(progress_str);
+    return rc;
+}
+
+static int colo_merge_secondary_dirty_bitmap(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_record rec;
+    uint64_t *pfns = NULL;
+    uint64_t pfn;
+    unsigned int count, i;
+    int rc;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->save.dirty_bitmap_hbuf);
+
+    rc = read_record(ctx, ctx->save.recv_fd, &rec);
+    if ( rc )
+        goto err;
+
+    if ( rec.type != REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST )
+    {
+        PERROR("Expect dirty bitmap record, but received %u", rec.type);
+        rc = -1;
+        goto err;
+    }
+
+    if ( rec.length % sizeof(*pfns) )
+    {
+        PERROR("Invalid dirty pfn list record length %u", rec.length);
+        rc = -1;
+        goto err;
+    }
+
+    count = rec.length / sizeof(*pfns);
+    pfns = rec.data;
+
+    for ( i = 0; i < count; i++ )
+    {
+        pfn = pfns[i];
+        if ( pfn > ctx->save.p2m_size )
+        {
+            PERROR("Invalid pfn 0x%" PRIx64, pfn);
+            rc = -1;
+            goto err;
+        }
+
+        set_bit(pfn, dirty_bitmap);
+    }
+
+    rc = 0;
+
+ err:
+    free(rec.data);
+    return rc;
+}
+
+/*
+ * Suspend the domain and send dirty memory.
+ * This is the last iteration of the live migration and the
+ * heart of the checkpointed stream.
+ */
+static int suspend_and_send_dirty(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
+    char *progress_str = NULL;
+    int rc;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->save.dirty_bitmap_hbuf);
+
+    rc = suspend_domain(ctx);
+    if ( rc )
+        goto out;
+
+    if ( xc_shadow_control(
+             xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
+             HYPERCALL_BUFFER(dirty_bitmap), ctx->save.p2m_size,
+             NULL, XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL, &stats) !=
+         ctx->save.p2m_size )
+    {
+        PERROR("Failed to retrieve logdirty bitmap");
+        rc = -1;
+        goto out;
+    }
+
+    if ( ctx->save.live )
+    {
+        rc = update_progress_string(ctx, &progress_str);
+        if ( rc )
+            goto out;
+    }
+    else
+        xc_set_progress_prefix(xch, "Checkpointed save");
+
+    bitmap_or(dirty_bitmap, ctx->save.deferred_pages, ctx->save.p2m_size);
+
+    if ( !ctx->save.live && ctx->stream_type == XC_STREAM_COLO )
+    {
+        rc = colo_merge_secondary_dirty_bitmap(ctx);
+        if ( rc )
+        {
+            PERROR("Failed to get secondary vm's dirty pages");
+            goto out;
+        }
+    }
+
+    rc = send_dirty_pages(ctx, stats.dirty_count + ctx->save.nr_deferred_pages);
+    if ( rc )
+        goto out;
+
+    bitmap_clear(ctx->save.deferred_pages, ctx->save.p2m_size);
+    ctx->save.nr_deferred_pages = 0;
+
+ out:
+    xc_set_progress_prefix(xch, NULL);
+    free(progress_str);
+    return rc;
+}
+
+static int verify_frames(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
+    int rc;
+    struct xc_sr_record rec = { .type = REC_TYPE_VERIFY };
+
+    DPRINTF("Enabling verify mode");
+
+    rc = write_record(ctx, &rec);
+    if ( rc )
+        goto out;
+
+    xc_set_progress_prefix(xch, "Frames verify");
+    rc = send_all_pages(ctx);
+    if ( rc )
+        goto out;
+
+    if ( xc_shadow_control(
+             xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_PEEK,
+             &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
+             NULL, 0, &stats) != ctx->save.p2m_size )
+    {
+        PERROR("Failed to retrieve logdirty bitmap");
+        rc = -1;
+        goto out;
+    }
+
+    DPRINTF("  Further stats: faults %u, dirty %u",
+            stats.fault_count, stats.dirty_count);
+
+ out:
+    return rc;
+}
+
+/*
+ * Send all domain memory.  This is the heart of the live migration loop.
+ */
+static int send_domain_memory_live(struct xc_sr_context *ctx)
+{
+    int rc;
+
+    rc = enable_logdirty(ctx);
+    if ( rc )
+        goto out;
+
+    rc = send_memory_live(ctx);
+    if ( rc )
+        goto out;
+
+    rc = suspend_and_send_dirty(ctx);
+    if ( rc )
+        goto out;
+
+    if ( ctx->save.debug && ctx->stream_type != XC_STREAM_PLAIN )
+    {
+        rc = verify_frames(ctx);
+        if ( rc )
+            goto out;
+    }
+
+ out:
+    return rc;
+}
+
+/*
+ * Checkpointed save.
+ */
+static int send_domain_memory_checkpointed(struct xc_sr_context *ctx)
+{
+    return suspend_and_send_dirty(ctx);
+}
+
+/*
+ * Send all domain memory, pausing the domain first.  Generally used for
+ * suspend-to-file.
+ */
+static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc;
+
+    rc = suspend_domain(ctx);
+    if ( rc )
+        goto err;
+
+    xc_set_progress_prefix(xch, "Frames");
+
+    rc = send_all_pages(ctx);
+    if ( rc )
+        goto err;
+
+ err:
+    return rc;
+}
+
+static int setup(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->save.dirty_bitmap_hbuf);
+
+    rc = ctx->save.ops.setup(ctx);
+    if ( rc )
+        goto err;
+
+    dirty_bitmap = xc_hypercall_buffer_alloc_pages(
+        xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->save.p2m_size)));
+    ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
+                                  sizeof(*ctx->save.batch_pfns));
+    ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
+
+    if ( !ctx->save.batch_pfns || !dirty_bitmap || !ctx->save.deferred_pages )
+    {
+        ERROR("Unable to allocate memory for dirty bitmaps, batch pfns and"
+              " deferred pages");
+        rc = -1;
+        errno = ENOMEM;
+        goto err;
+    }
+
+    rc = 0;
+
+ err:
+    return rc;
+}
+
+static void cleanup(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
+                                    &ctx->save.dirty_bitmap_hbuf);
+
+
+    xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
+                      NULL, 0, NULL, 0, NULL);
+
+    if ( ctx->save.ops.cleanup(ctx) )
+        PERROR("Failed to clean up");
+
+    xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
+                                   NRPAGES(bitmap_size(ctx->save.p2m_size)));
+    free(ctx->save.deferred_pages);
+    free(ctx->save.batch_pfns);
+}
+
+/*
+ * Save a domain.
+ */
+static int save(struct xc_sr_context *ctx, uint16_t guest_type)
+{
+    xc_interface *xch = ctx->xch;
+    int rc, saved_rc = 0, saved_errno = 0;
+
+    IPRINTF("Saving domain %d, type %s",
+            ctx->domid, dhdr_type_to_str(guest_type));
+
+    rc = setup(ctx);
+    if ( rc )
+        goto err;
+
+    xc_report_progress_single(xch, "Start of stream");
+
+    rc = write_headers(ctx, guest_type);
+    if ( rc )
+        goto err;
+
+    rc = ctx->save.ops.static_data(ctx);
+    if ( rc )
+        goto err;
+
+    rc = write_static_data_end_record(ctx);
+    if ( rc )
+        goto err;
+
+    rc = ctx->save.ops.start_of_stream(ctx);
+    if ( rc )
+        goto err;
+
+    do {
+        rc = ctx->save.ops.start_of_checkpoint(ctx);
+        if ( rc )
+            goto err;
+
+        rc = ctx->save.ops.check_vm_state(ctx);
+        if ( rc )
+            goto err;
+
+        if ( ctx->save.live )
+            rc = send_domain_memory_live(ctx);
+        else if ( ctx->stream_type != XC_STREAM_PLAIN )
+            rc = send_domain_memory_checkpointed(ctx);
+        else
+            rc = send_domain_memory_nonlive(ctx);
+
+        if ( rc )
+            goto err;
+
+        if ( !ctx->dominfo.shutdown ||
+             (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
+        {
+            ERROR("Domain has not been suspended");
+            rc = -1;
+            goto err;
+        }
+
+        rc = ctx->save.ops.end_of_checkpoint(ctx);
+        if ( rc )
+            goto err;
+
+        if ( ctx->stream_type != XC_STREAM_PLAIN )
+        {
+            /*
+             * We have now completed the initial live portion of the checkpoint
+             * process. Therefore switch into periodically sending synchronous
+             * batches of pages.
+             */
+            ctx->save.live = false;
+
+            rc = write_checkpoint_record(ctx);
+            if ( rc )
+                goto err;
+
+            if ( ctx->stream_type == XC_STREAM_COLO )
+            {
+                rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
+                if ( !rc )
+                {
+                    rc = -1;
+                    goto err;
+                }
+            }
+
+            rc = ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
+            if ( rc <= 0 )
+                goto err;
+
+            if ( ctx->stream_type == XC_STREAM_COLO )
+            {
+                rc = ctx->save.callbacks->wait_checkpoint(
+                    ctx->save.callbacks->data);
+                if ( rc <= 0 )
+                    goto err;
+            }
+            else if ( ctx->stream_type == XC_STREAM_REMUS )
+            {
+                rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
+                if ( rc <= 0 )
+                    goto err;
+            }
+            else
+            {
+                ERROR("Unknown checkpointed stream");
+                rc = -1;
+                goto err;
+            }
+        }
+    } while ( ctx->stream_type != XC_STREAM_PLAIN );
+
+    xc_report_progress_single(xch, "End of stream");
+
+    rc = write_end_record(ctx);
+    if ( rc )
+        goto err;
+
+    xc_report_progress_single(xch, "Complete");
+    goto done;
+
+ err:
+    saved_errno = errno;
+    saved_rc = rc;
+    PERROR("Save failed");
+
+ done:
+    cleanup(ctx);
+
+    if ( saved_rc )
+    {
+        rc = saved_rc;
+        errno = saved_errno;
+    }
+
+    return rc;
+};
+
+int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
+                   uint32_t flags, struct save_callbacks *callbacks,
+                   xc_stream_type_t stream_type, int recv_fd)
+{
+    struct xc_sr_context ctx = {
+        .xch = xch,
+        .fd = io_fd,
+        .stream_type = stream_type,
+    };
+
+    /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
+    ctx.save.callbacks = callbacks;
+    ctx.save.live  = !!(flags & XCFLAGS_LIVE);
+    ctx.save.debug = !!(flags & XCFLAGS_DEBUG);
+    ctx.save.recv_fd = recv_fd;
+
+    if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
+    {
+        PERROR("Failed to get domain info");
+        return -1;
+    }
+
+    if ( ctx.dominfo.domid != dom )
+    {
+        ERROR("Domain %u does not exist", dom);
+        return -1;
+    }
+
+    /* Sanity check stream_type-related parameters */
+    switch ( stream_type )
+    {
+    case XC_STREAM_COLO:
+        assert(callbacks->wait_checkpoint);
+        /* Fallthrough */
+    case XC_STREAM_REMUS:
+        assert(callbacks->checkpoint && callbacks->postcopy);
+        /* Fallthrough */
+    case XC_STREAM_PLAIN:
+        if ( ctx.dominfo.hvm )
+            assert(callbacks->switch_qemu_logdirty);
+        break;
+
+    default:
+        assert(!"Bad stream_type");
+        break;
+    }
+
+    DPRINTF("fd %d, dom %u, flags %u, hvm %d",
+            io_fd, dom, flags, ctx.dominfo.hvm);
+
+    ctx.domid = dom;
+
+    if ( ctx.dominfo.hvm )
+    {
+        ctx.save.ops = save_ops_x86_hvm;
+        return save(&ctx, DHDR_TYPE_X86_HVM);
+    }
+    else
+    {
+        ctx.save.ops = save_ops_x86_pv;
+        return save(&ctx, DHDR_TYPE_X86_PV);
+    }
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_save_x86_hvm.c b/tools/libs/guest/xg_sr_save_x86_hvm.c
new file mode 100644 (file)
index 0000000..1634a7b
--- /dev/null
@@ -0,0 +1,251 @@
+#include <assert.h>
+
+#include "xg_sr_common_x86.h"
+
+#include <xen/hvm/params.h>
+
+/*
+ * Query for the HVM context and write an HVM_CONTEXT record into the stream.
+ */
+static int write_hvm_context(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc, hvm_buf_size;
+    struct xc_sr_record hvm_rec = {
+        .type = REC_TYPE_HVM_CONTEXT,
+    };
+
+    hvm_buf_size = xc_domain_hvm_getcontext(xch, ctx->domid, 0, 0);
+    if ( hvm_buf_size < 0 )
+    {
+        PERROR("Couldn't get HVM context size from Xen");
+        rc = -1;
+        goto out;
+    }
+
+    hvm_rec.data = malloc(hvm_buf_size);
+    if ( !hvm_rec.data )
+    {
+        PERROR("Couldn't allocate memory");
+        rc = -1;
+        goto out;
+    }
+
+    hvm_buf_size = xc_domain_hvm_getcontext(xch, ctx->domid,
+                                            hvm_rec.data, hvm_buf_size);
+    if ( hvm_buf_size < 0 )
+    {
+        PERROR("Couldn't get HVM context from Xen");
+        rc = -1;
+        goto out;
+    }
+
+    hvm_rec.length = hvm_buf_size;
+    rc = write_record(ctx, &hvm_rec);
+    if ( rc < 0 )
+    {
+        PERROR("error write HVM_CONTEXT record");
+        goto out;
+    }
+
+ out:
+    free(hvm_rec.data);
+    return rc;
+}
+
+/*
+ * Query for a range of HVM parameters and write an HVM_PARAMS record into the
+ * stream.
+ */
+static int write_hvm_params(struct xc_sr_context *ctx)
+{
+    static const unsigned int params[] = {
+        HVM_PARAM_STORE_PFN,
+        HVM_PARAM_IOREQ_PFN,
+        HVM_PARAM_BUFIOREQ_PFN,
+        HVM_PARAM_PAGING_RING_PFN,
+        HVM_PARAM_MONITOR_RING_PFN,
+        HVM_PARAM_SHARING_RING_PFN,
+        HVM_PARAM_VM86_TSS_SIZED,
+        HVM_PARAM_CONSOLE_PFN,
+        HVM_PARAM_ACPI_IOPORTS_LOCATION,
+        HVM_PARAM_VIRIDIAN,
+        HVM_PARAM_IDENT_PT,
+        HVM_PARAM_VM_GENERATION_ID_ADDR,
+        HVM_PARAM_IOREQ_SERVER_PFN,
+        HVM_PARAM_NR_IOREQ_SERVER_PAGES,
+        HVM_PARAM_X87_FIP_WIDTH,
+        HVM_PARAM_MCA_CAP,
+    };
+
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rec_hvm_params_entry entries[ARRAY_SIZE(params)];
+    struct xc_sr_rec_hvm_params hdr = {
+        .count = 0,
+    };
+    struct xc_sr_record rec = {
+        .type   = REC_TYPE_HVM_PARAMS,
+        .length = sizeof(hdr),
+        .data   = &hdr,
+    };
+    unsigned int i;
+    int rc;
+
+    for ( i = 0; i < ARRAY_SIZE(params); i++ )
+    {
+        uint32_t index = params[i];
+        uint64_t value;
+
+        rc = xc_hvm_param_get(xch, ctx->domid, index, &value);
+        if ( rc )
+        {
+            PERROR("Failed to get HVMPARAM at index %u", index);
+            return rc;
+        }
+
+        if ( value != 0 )
+        {
+            entries[hdr.count].index = index;
+            entries[hdr.count].value = value;
+            hdr.count++;
+        }
+    }
+
+    /* No params? Skip this record. */
+    if ( hdr.count == 0 )
+        return 0;
+
+    rc = write_split_record(ctx, &rec, entries, hdr.count * sizeof(*entries));
+    if ( rc )
+        PERROR("Failed to write HVM_PARAMS record");
+
+    return rc;
+}
+
+static xen_pfn_t x86_hvm_pfn_to_gfn(const struct xc_sr_context *ctx,
+                                    xen_pfn_t pfn)
+{
+    /* identity map */
+    return pfn;
+}
+
+static int x86_hvm_normalise_page(struct xc_sr_context *ctx,
+                                  xen_pfn_t type, void **page)
+{
+    return 0;
+}
+
+static int x86_hvm_setup(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t nr_pfns;
+
+    if ( xc_domain_nr_gpfns(xch, ctx->domid, &nr_pfns) < 0 )
+    {
+        PERROR("Unable to obtain the guest p2m size");
+        return -1;
+    }
+#ifdef __i386__
+    /* Very large domains (> 1TB) will exhaust virtual address space. */
+    if ( nr_pfns > 0x0fffffff )
+    {
+        errno = E2BIG;
+        PERROR("Cannot save this big a guest");
+        return -1;
+    }
+#endif
+
+    ctx->save.p2m_size = nr_pfns;
+
+    if ( ctx->save.callbacks->switch_qemu_logdirty(
+             ctx->domid, 1, ctx->save.callbacks->data) )
+    {
+        PERROR("Couldn't enable qemu log-dirty mode");
+        return -1;
+    }
+
+    ctx->x86.hvm.save.qemu_enabled_logdirty = true;
+
+    return 0;
+}
+
+static int x86_hvm_static_data(struct xc_sr_context *ctx)
+{
+    return write_x86_cpu_policy_records(ctx);
+}
+
+static int x86_hvm_start_of_stream(struct xc_sr_context *ctx)
+{
+    return 0;
+}
+
+static int x86_hvm_start_of_checkpoint(struct xc_sr_context *ctx)
+{
+    return 0;
+}
+
+static int x86_hvm_check_vm_state(struct xc_sr_context *ctx)
+{
+    return 0;
+}
+
+static int x86_hvm_end_of_checkpoint(struct xc_sr_context *ctx)
+{
+    int rc;
+
+    /* Write the TSC record. */
+    rc = write_x86_tsc_info(ctx);
+    if ( rc )
+        return rc;
+
+    /* Write the HVM_CONTEXT record. */
+    rc = write_hvm_context(ctx);
+    if ( rc )
+        return rc;
+
+    /* Write HVM_PARAMS record contains applicable HVM params. */
+    rc = write_hvm_params(ctx);
+    if ( rc )
+        return rc;
+
+    return 0;
+}
+
+static int x86_hvm_cleanup(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+
+    /* If qemu successfully enabled logdirty mode, attempt to disable. */
+    if ( ctx->x86.hvm.save.qemu_enabled_logdirty &&
+         ctx->save.callbacks->switch_qemu_logdirty(
+             ctx->domid, 0, ctx->save.callbacks->data) )
+    {
+        PERROR("Couldn't disable qemu log-dirty mode");
+        return -1;
+    }
+
+    return 0;
+}
+
+struct xc_sr_save_ops save_ops_x86_hvm =
+{
+    .pfn_to_gfn          = x86_hvm_pfn_to_gfn,
+    .normalise_page      = x86_hvm_normalise_page,
+    .setup               = x86_hvm_setup,
+    .static_data         = x86_hvm_static_data,
+    .start_of_stream     = x86_hvm_start_of_stream,
+    .start_of_checkpoint = x86_hvm_start_of_checkpoint,
+    .end_of_checkpoint   = x86_hvm_end_of_checkpoint,
+    .check_vm_state      = x86_hvm_check_vm_state,
+    .cleanup             = x86_hvm_cleanup,
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_save_x86_pv.c b/tools/libs/guest/xg_sr_save_x86_pv.c
new file mode 100644 (file)
index 0000000..4964f1f
--- /dev/null
@@ -0,0 +1,1156 @@
+#include <assert.h>
+#include <limits.h>
+
+#include "xg_sr_common_x86_pv.h"
+
+/* Check a 64 bit virtual address for being canonical. */
+static inline bool is_canonical_address(xen_vaddr_t vaddr)
+{
+    return ((int64_t)vaddr >> 47) == ((int64_t)vaddr >> 63);
+}
+
+/*
+ * Maps the guests shared info page.
+ */
+static int map_shinfo(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+
+    ctx->x86.pv.shinfo = xc_map_foreign_range(
+        xch, ctx->domid, PAGE_SIZE, PROT_READ, ctx->dominfo.shared_info_frame);
+    if ( !ctx->x86.pv.shinfo )
+    {
+        PERROR("Failed to map shared info frame at mfn %#lx",
+               ctx->dominfo.shared_info_frame);
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Copy a list of mfns from a guest, accounting for differences between guest
+ * and toolstack width.  Can fail if truncation would occur.
+ */
+static int copy_mfns_from_guest(const struct xc_sr_context *ctx,
+                                xen_pfn_t *dst, const void *src, size_t count)
+{
+    size_t x;
+
+    if ( ctx->x86.pv.width == sizeof(unsigned long) )
+        memcpy(dst, src, count * sizeof(*dst));
+    else
+    {
+        for ( x = 0; x < count; ++x )
+        {
+#ifdef __x86_64__
+            /* 64bit toolstack, 32bit guest.  Expand any INVALID_MFN. */
+            uint32_t s = ((uint32_t *)src)[x];
+
+            dst[x] = s == ~0U ? INVALID_MFN : s;
+#else
+            /*
+             * 32bit toolstack, 64bit guest.  Truncate INVALID_MFN, but bail
+             * if any other truncation would occur.
+             *
+             * This will only occur on hosts where a PV guest has ram above
+             * the 16TB boundary.  A 32bit dom0 is unlikely to have
+             * successfully booted on a system this large.
+             */
+            uint64_t s = ((uint64_t *)src)[x];
+
+            if ( (s != ~0ULL) && ((s >> 32) != 0) )
+            {
+                errno = E2BIG;
+                return -1;
+            }
+
+            dst[x] = s;
+#endif
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Map the p2m leave pages and build an array of their pfns.
+ */
+static int map_p2m_leaves(struct xc_sr_context *ctx, xen_pfn_t *mfns,
+                          size_t n_mfns)
+{
+    xc_interface *xch = ctx->xch;
+    unsigned int x;
+
+    ctx->x86.pv.p2m = xc_map_foreign_pages(xch, ctx->domid, PROT_READ,
+                                           mfns, n_mfns);
+    if ( !ctx->x86.pv.p2m )
+    {
+        PERROR("Failed to map p2m frames");
+        return -1;
+    }
+
+    ctx->save.p2m_size = ctx->x86.pv.max_pfn + 1;
+    ctx->x86.pv.p2m_frames = n_mfns;
+    ctx->x86.pv.p2m_pfns = malloc(n_mfns * sizeof(*mfns));
+    if ( !ctx->x86.pv.p2m_pfns )
+    {
+        ERROR("Cannot allocate %zu bytes for p2m pfns list",
+              n_mfns * sizeof(*mfns));
+        return -1;
+    }
+
+    /* Convert leaf frames from mfns to pfns. */
+    for ( x = 0; x < n_mfns; ++x )
+    {
+        if ( !mfn_in_pseudophysmap(ctx, mfns[x]) )
+        {
+            ERROR("Bad mfn in p2m_frame_list[%u]", x);
+            dump_bad_pseudophysmap_entry(ctx, mfns[x]);
+            errno = ERANGE;
+            return -1;
+        }
+
+        ctx->x86.pv.p2m_pfns[x] = mfn_to_pfn(ctx, mfns[x]);
+    }
+
+    return 0;
+}
+
+/*
+ * Walk the guests frame list list and frame list to identify and map the
+ * frames making up the guests p2m table.  Construct a list of pfns making up
+ * the table.
+ */
+static int map_p2m_tree(struct xc_sr_context *ctx)
+{
+    /* Terminology:
+     *
+     * fll   - frame list list, top level p2m, list of fl mfns
+     * fl    - frame list, mid level p2m, list of leaf mfns
+     * local - own allocated buffers, adjusted for bitness
+     * guest - mappings into the domain
+     */
+    xc_interface *xch = ctx->xch;
+    int rc = -1;
+    unsigned int x, saved_x, fpp, fll_entries, fl_entries;
+    xen_pfn_t fll_mfn, saved_mfn, max_pfn;
+
+    xen_pfn_t *local_fll = NULL;
+    void *guest_fll = NULL;
+    size_t local_fll_size;
+
+    xen_pfn_t *local_fl = NULL;
+    void *guest_fl = NULL;
+    size_t local_fl_size;
+
+    fpp = PAGE_SIZE / ctx->x86.pv.width;
+    fll_entries = (ctx->x86.pv.max_pfn / (fpp * fpp)) + 1;
+    if ( fll_entries > fpp )
+    {
+        ERROR("max_pfn %#lx too large for p2m tree", ctx->x86.pv.max_pfn);
+        goto err;
+    }
+
+    fll_mfn = GET_FIELD(ctx->x86.pv.shinfo, arch.pfn_to_mfn_frame_list_list,
+                        ctx->x86.pv.width);
+    if ( fll_mfn == 0 || fll_mfn > ctx->x86.pv.max_mfn )
+    {
+        ERROR("Bad mfn %#lx for p2m frame list list", fll_mfn);
+        goto err;
+    }
+
+    /* Map the guest top p2m. */
+    guest_fll = xc_map_foreign_range(xch, ctx->domid, PAGE_SIZE,
+                                     PROT_READ, fll_mfn);
+    if ( !guest_fll )
+    {
+        PERROR("Failed to map p2m frame list list at %#lx", fll_mfn);
+        goto err;
+    }
+
+    local_fll_size = fll_entries * sizeof(*local_fll);
+    local_fll = malloc(local_fll_size);
+    if ( !local_fll )
+    {
+        ERROR("Cannot allocate %zu bytes for local p2m frame list list",
+              local_fll_size);
+        goto err;
+    }
+
+    if ( copy_mfns_from_guest(ctx, local_fll, guest_fll, fll_entries) )
+    {
+        ERROR("Truncation detected copying p2m frame list list");
+        goto err;
+    }
+
+    /* Check for bad mfns in frame list list. */
+    saved_mfn = 0;
+    saved_x = 0;
+    for ( x = 0; x < fll_entries; ++x )
+    {
+        if ( local_fll[x] == 0 || local_fll[x] > ctx->x86.pv.max_mfn )
+        {
+            ERROR("Bad mfn %#lx at index %u (of %u) in p2m frame list list",
+                  local_fll[x], x, fll_entries);
+            goto err;
+        }
+        if ( local_fll[x] != saved_mfn )
+        {
+            saved_mfn = local_fll[x];
+            saved_x = x;
+        }
+    }
+
+    /*
+     * Check for actual lower max_pfn:
+     * If the trailing entries of the frame list list were all the same we can
+     * assume they all reference mid pages all referencing p2m pages with all
+     * invalid entries. Otherwise there would be multiple pfns referencing all
+     * the same mfn which can't work across migration, as this sharing would be
+     * broken by the migration process.
+     * Adjust max_pfn if possible to avoid allocating much larger areas as
+     * needed for p2m and logdirty map.
+     */
+    max_pfn = (saved_x + 1) * fpp * fpp - 1;
+    if ( max_pfn < ctx->x86.pv.max_pfn )
+    {
+        ctx->x86.pv.max_pfn = max_pfn;
+        fll_entries = (ctx->x86.pv.max_pfn / (fpp * fpp)) + 1;
+    }
+    ctx->x86.pv.p2m_frames = (ctx->x86.pv.max_pfn + fpp) / fpp;
+    DPRINTF("max_pfn %#lx, p2m_frames %d", ctx->x86.pv.max_pfn,
+            ctx->x86.pv.p2m_frames);
+    fl_entries  = (ctx->x86.pv.max_pfn / fpp) + 1;
+
+    /* Map the guest mid p2m frames. */
+    guest_fl = xc_map_foreign_pages(xch, ctx->domid, PROT_READ,
+                                    local_fll, fll_entries);
+    if ( !guest_fl )
+    {
+        PERROR("Failed to map p2m frame list");
+        goto err;
+    }
+
+    local_fl_size = fl_entries * sizeof(*local_fl);
+    local_fl = malloc(local_fl_size);
+    if ( !local_fl )
+    {
+        ERROR("Cannot allocate %zu bytes for local p2m frame list",
+              local_fl_size);
+        goto err;
+    }
+
+    if ( copy_mfns_from_guest(ctx, local_fl, guest_fl, fl_entries) )
+    {
+        ERROR("Truncation detected copying p2m frame list");
+        goto err;
+    }
+
+    for ( x = 0; x < fl_entries; ++x )
+    {
+        if ( local_fl[x] == 0 || local_fl[x] > ctx->x86.pv.max_mfn )
+        {
+            ERROR("Bad mfn %#lx at index %u (of %u) in p2m frame list",
+                  local_fl[x], x, fl_entries);
+            goto err;
+        }
+    }
+
+    /* Map the p2m leaves themselves. */
+    rc = map_p2m_leaves(ctx, local_fl, fl_entries);
+
+ err:
+    free(local_fl);
+    if ( guest_fl )
+        munmap(guest_fl, fll_entries * PAGE_SIZE);
+
+    free(local_fll);
+    if ( guest_fll )
+        munmap(guest_fll, PAGE_SIZE);
+
+    return rc;
+}
+
+/*
+ * Get p2m_generation count.
+ * Returns an error if the generation count has changed since the last call.
+ */
+static int get_p2m_generation(struct xc_sr_context *ctx)
+{
+    uint64_t p2m_generation;
+    int rc;
+
+    p2m_generation = GET_FIELD(ctx->x86.pv.shinfo, arch.p2m_generation,
+                               ctx->x86.pv.width);
+
+    rc = (p2m_generation == ctx->x86.pv.p2m_generation) ? 0 : -1;
+    ctx->x86.pv.p2m_generation = p2m_generation;
+
+    return rc;
+}
+
+static int x86_pv_check_vm_state_p2m_list(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc;
+
+    if ( !ctx->save.live )
+        return 0;
+
+    rc = get_p2m_generation(ctx);
+    if ( rc )
+        ERROR("p2m generation count changed. Migration aborted.");
+
+    return rc;
+}
+
+/*
+ * Map the guest p2m frames specified via a cr3 value, a virtual address, and
+ * the maximum pfn. PTE entries are 64 bits for both, 32 and 64 bit guests as
+ * in 32 bit case we support PAE guests only.
+ */
+static int map_p2m_list(struct xc_sr_context *ctx, uint64_t p2m_cr3)
+{
+    xc_interface *xch = ctx->xch;
+    xen_vaddr_t p2m_vaddr, p2m_end, mask, off;
+    xen_pfn_t p2m_mfn, mfn, saved_mfn, max_pfn;
+    uint64_t *ptes = NULL;
+    xen_pfn_t *mfns = NULL;
+    unsigned int fpp, n_pages, level, shift, idx_start, idx_end, idx, saved_idx;
+    int rc = -1;
+
+    p2m_mfn = cr3_to_mfn(ctx, p2m_cr3);
+    assert(p2m_mfn != 0);
+    if ( p2m_mfn > ctx->x86.pv.max_mfn )
+    {
+        ERROR("Bad p2m_cr3 value %#" PRIx64, p2m_cr3);
+        errno = ERANGE;
+        goto err;
+    }
+
+    get_p2m_generation(ctx);
+
+    p2m_vaddr = GET_FIELD(ctx->x86.pv.shinfo, arch.p2m_vaddr,
+                          ctx->x86.pv.width);
+    fpp = PAGE_SIZE / ctx->x86.pv.width;
+    ctx->x86.pv.p2m_frames = ctx->x86.pv.max_pfn / fpp + 1;
+    p2m_end = p2m_vaddr + ctx->x86.pv.p2m_frames * PAGE_SIZE - 1;
+
+    if ( ctx->x86.pv.width == 8 )
+    {
+        mask = 0x0000ffffffffffffULL;
+        if ( !is_canonical_address(p2m_vaddr) ||
+             !is_canonical_address(p2m_end) ||
+             p2m_end < p2m_vaddr ||
+             (p2m_vaddr <= HYPERVISOR_VIRT_END_X86_64 &&
+              p2m_end > HYPERVISOR_VIRT_START_X86_64) )
+        {
+            ERROR("Bad virtual p2m address range %#" PRIx64 "-%#" PRIx64,
+                  p2m_vaddr, p2m_end);
+            errno = ERANGE;
+            goto err;
+        }
+    }
+    else
+    {
+        mask = 0x00000000ffffffffULL;
+        if ( p2m_vaddr > mask || p2m_end > mask || p2m_end < p2m_vaddr ||
+             (p2m_vaddr <= HYPERVISOR_VIRT_END_X86_32 &&
+              p2m_end > HYPERVISOR_VIRT_START_X86_32) )
+        {
+            ERROR("Bad virtual p2m address range %#" PRIx64 "-%#" PRIx64,
+                  p2m_vaddr, p2m_end);
+            errno = ERANGE;
+            goto err;
+        }
+    }
+
+    DPRINTF("p2m list from %#" PRIx64 " to %#" PRIx64 ", root at %#lx",
+            p2m_vaddr, p2m_end, p2m_mfn);
+    DPRINTF("max_pfn %#lx, p2m_frames %d", ctx->x86.pv.max_pfn,
+            ctx->x86.pv.p2m_frames);
+
+    mfns = malloc(sizeof(*mfns));
+    if ( !mfns )
+    {
+        ERROR("Cannot allocate memory for array of %u mfns", 1);
+        goto err;
+    }
+    mfns[0] = p2m_mfn;
+    off = 0;
+    saved_mfn = 0;
+    idx_start = idx_end = saved_idx = 0;
+
+    for ( level = ctx->x86.pv.levels; level > 0; level-- )
+    {
+        n_pages = idx_end - idx_start + 1;
+        ptes = xc_map_foreign_pages(xch, ctx->domid, PROT_READ, mfns, n_pages);
+        if ( !ptes )
+        {
+            PERROR("Failed to map %u page table pages for p2m list", n_pages);
+            goto err;
+        }
+        free(mfns);
+
+        shift = level * 9 + 3;
+        idx_start = ((p2m_vaddr - off) & mask) >> shift;
+        idx_end = ((p2m_end - off) & mask) >> shift;
+        idx = idx_end - idx_start + 1;
+        mfns = malloc(sizeof(*mfns) * idx);
+        if ( !mfns )
+        {
+            ERROR("Cannot allocate memory for array of %u mfns", idx);
+            goto err;
+        }
+
+        for ( idx = idx_start; idx <= idx_end; idx++ )
+        {
+            mfn = pte_to_frame(ptes[idx]);
+            if ( mfn == 0 || mfn > ctx->x86.pv.max_mfn )
+            {
+                ERROR("Bad mfn %#lx during page table walk for vaddr %#" PRIx64 " at level %d of p2m list",
+                      mfn, off + ((xen_vaddr_t)idx << shift), level);
+                errno = ERANGE;
+                goto err;
+            }
+            mfns[idx - idx_start] = mfn;
+
+            /* Maximum pfn check at level 2. Same reasoning as for p2m tree. */
+            if ( level == 2 )
+            {
+                if ( mfn != saved_mfn )
+                {
+                    saved_mfn = mfn;
+                    saved_idx = idx - idx_start;
+                }
+            }
+        }
+
+        if ( level == 2 )
+        {
+            if ( saved_idx == idx_end )
+                saved_idx++;
+            max_pfn = ((xen_pfn_t)saved_idx << 9) * fpp - 1;
+            if ( max_pfn < ctx->x86.pv.max_pfn )
+            {
+                ctx->x86.pv.max_pfn = max_pfn;
+                ctx->x86.pv.p2m_frames = (ctx->x86.pv.max_pfn + fpp) / fpp;
+                p2m_end = p2m_vaddr + ctx->x86.pv.p2m_frames * PAGE_SIZE - 1;
+                idx_end = idx_start + saved_idx;
+            }
+        }
+
+        munmap(ptes, n_pages * PAGE_SIZE);
+        ptes = NULL;
+        off = p2m_vaddr & ((mask >> shift) << shift);
+    }
+
+    /* Map the p2m leaves themselves. */
+    rc = map_p2m_leaves(ctx, mfns, idx_end - idx_start + 1);
+
+ err:
+    free(mfns);
+    if ( ptes )
+        munmap(ptes, n_pages * PAGE_SIZE);
+
+    return rc;
+}
+
+/*
+ * Map the guest p2m frames.
+ * Depending on guest support this might either be a virtual mapped linear
+ * list (preferred format) or a 3 level tree linked via mfns.
+ */
+static int map_p2m(struct xc_sr_context *ctx)
+{
+    uint64_t p2m_cr3;
+
+    ctx->x86.pv.p2m_generation = ~0ULL;
+    ctx->x86.pv.max_pfn = GET_FIELD(ctx->x86.pv.shinfo, arch.max_pfn,
+                                    ctx->x86.pv.width) - 1;
+    p2m_cr3 = GET_FIELD(ctx->x86.pv.shinfo, arch.p2m_cr3, ctx->x86.pv.width);
+
+    return p2m_cr3 ? map_p2m_list(ctx, p2m_cr3) : map_p2m_tree(ctx);
+}
+
+/*
+ * Obtain a specific vcpus basic state and write an X86_PV_VCPU_BASIC record
+ * into the stream.  Performs mfn->pfn conversion on architectural state.
+ */
+static int write_one_vcpu_basic(struct xc_sr_context *ctx, uint32_t id)
+{
+    xc_interface *xch = ctx->xch;
+    xen_pfn_t mfn, pfn;
+    unsigned int i, gdt_count;
+    int rc = -1;
+    vcpu_guest_context_any_t vcpu;
+    struct xc_sr_rec_x86_pv_vcpu_hdr vhdr = {
+        .vcpu_id = id,
+    };
+    struct xc_sr_record rec = {
+        .type = REC_TYPE_X86_PV_VCPU_BASIC,
+        .length = sizeof(vhdr),
+        .data = &vhdr,
+    };
+
+    if ( xc_vcpu_getcontext(xch, ctx->domid, id, &vcpu) )
+    {
+        PERROR("Failed to get vcpu%u context", id);
+        goto err;
+    }
+
+    /* Vcpu0 is special: Convert the suspend record to a pfn. */
+    if ( id == 0 )
+    {
+        mfn = GET_FIELD(&vcpu, user_regs.edx, ctx->x86.pv.width);
+        if ( !mfn_in_pseudophysmap(ctx, mfn) )
+        {
+            ERROR("Bad mfn for suspend record");
+            dump_bad_pseudophysmap_entry(ctx, mfn);
+            errno = ERANGE;
+            goto err;
+        }
+        SET_FIELD(&vcpu, user_regs.edx, mfn_to_pfn(ctx, mfn),
+                  ctx->x86.pv.width);
+    }
+
+    gdt_count = GET_FIELD(&vcpu, gdt_ents, ctx->x86.pv.width);
+    if ( gdt_count > FIRST_RESERVED_GDT_ENTRY )
+    {
+        ERROR("GDT entry count (%u) out of range (max %u)",
+              gdt_count, FIRST_RESERVED_GDT_ENTRY);
+        errno = ERANGE;
+        goto err;
+    }
+    gdt_count = (gdt_count + 511) / 512; /* gdt_count now in units of frames. */
+
+    /* Convert GDT frames to pfns. */
+    for ( i = 0; i < gdt_count; ++i )
+    {
+        mfn = GET_FIELD(&vcpu, gdt_frames[i], ctx->x86.pv.width);
+        if ( !mfn_in_pseudophysmap(ctx, mfn) )
+        {
+            ERROR("Bad mfn for frame %u of vcpu%u's GDT", i, id);
+            dump_bad_pseudophysmap_entry(ctx, mfn);
+            errno = ERANGE;
+            goto err;
+        }
+        SET_FIELD(&vcpu, gdt_frames[i], mfn_to_pfn(ctx, mfn),
+                  ctx->x86.pv.width);
+    }
+
+    /* Convert CR3 to a pfn. */
+    mfn = cr3_to_mfn(ctx, GET_FIELD(&vcpu, ctrlreg[3], ctx->x86.pv.width));
+    if ( !mfn_in_pseudophysmap(ctx, mfn) )
+    {
+        ERROR("Bad mfn for vcpu%u's cr3", id);
+        dump_bad_pseudophysmap_entry(ctx, mfn);
+        errno = ERANGE;
+        goto err;
+    }
+    pfn = mfn_to_pfn(ctx, mfn);
+    SET_FIELD(&vcpu, ctrlreg[3], mfn_to_cr3(ctx, pfn), ctx->x86.pv.width);
+
+    /* 64bit guests: Convert CR1 (guest pagetables) to pfn. */
+    if ( ctx->x86.pv.levels == 4 && vcpu.x64.ctrlreg[1] )
+    {
+        mfn = vcpu.x64.ctrlreg[1] >> PAGE_SHIFT;
+        if ( !mfn_in_pseudophysmap(ctx, mfn) )
+        {
+            ERROR("Bad mfn for vcpu%u's cr1", id);
+            dump_bad_pseudophysmap_entry(ctx, mfn);
+            errno = ERANGE;
+            goto err;
+        }
+        pfn = mfn_to_pfn(ctx, mfn);
+        vcpu.x64.ctrlreg[1] = 1 | ((uint64_t)pfn << PAGE_SHIFT);
+    }
+
+    if ( ctx->x86.pv.width == 8 )
+        rc = write_split_record(ctx, &rec, &vcpu, sizeof(vcpu.x64));
+    else
+        rc = write_split_record(ctx, &rec, &vcpu, sizeof(vcpu.x32));
+
+ err:
+    return rc;
+}
+
+/*
+ * Obtain a specific vcpus extended state and write an X86_PV_VCPU_EXTENDED
+ * record into the stream.
+ */
+static int write_one_vcpu_extended(struct xc_sr_context *ctx, uint32_t id)
+{
+    xc_interface *xch = ctx->xch;
+    struct xc_sr_rec_x86_pv_vcpu_hdr vhdr = {
+        .vcpu_id = id,
+    };
+    struct xc_sr_record rec = {
+        .type = REC_TYPE_X86_PV_VCPU_EXTENDED,
+        .length = sizeof(vhdr),
+        .data = &vhdr,
+    };
+    struct xen_domctl domctl = {
+        .cmd = XEN_DOMCTL_get_ext_vcpucontext,
+        .domain = ctx->domid,
+        .u.ext_vcpucontext.vcpu = id,
+    };
+
+    if ( xc_domctl(xch, &domctl) < 0 )
+    {
+        PERROR("Unable to get vcpu%u extended context", id);
+        return -1;
+    }
+
+    /* No content? Skip the record. */
+    if ( domctl.u.ext_vcpucontext.size == 0 )
+        return 0;
+
+    return write_split_record(ctx, &rec, &domctl.u.ext_vcpucontext,
+                              domctl.u.ext_vcpucontext.size);
+}
+
+/*
+ * Query to see whether a specific vcpu has xsave state and if so, write an
+ * X86_PV_VCPU_XSAVE record into the stream.
+ */
+static int write_one_vcpu_xsave(struct xc_sr_context *ctx, uint32_t id)
+{
+    xc_interface *xch = ctx->xch;
+    int rc = -1;
+    DECLARE_HYPERCALL_BUFFER(void, buffer);
+    struct xc_sr_rec_x86_pv_vcpu_hdr vhdr = {
+        .vcpu_id = id,
+    };
+    struct xc_sr_record rec = {
+        .type = REC_TYPE_X86_PV_VCPU_XSAVE,
+        .length = sizeof(vhdr),
+        .data = &vhdr,
+    };
+    struct xen_domctl domctl = {
+        .cmd = XEN_DOMCTL_getvcpuextstate,
+        .domain = ctx->domid,
+        .u.vcpuextstate.vcpu = id,
+    };
+
+    if ( xc_domctl(xch, &domctl) < 0 )
+    {
+        PERROR("Unable to get vcpu%u's xsave context", id);
+        goto err;
+    }
+
+    /* No xsave state? skip this record. */
+    if ( !domctl.u.vcpuextstate.xfeature_mask )
+        goto out;
+
+    buffer = xc_hypercall_buffer_alloc(xch, buffer, domctl.u.vcpuextstate.size);
+    if ( !buffer )
+    {
+        ERROR("Unable to allocate %"PRIx64" bytes for vcpu%u's xsave context",
+              domctl.u.vcpuextstate.size, id);
+        goto err;
+    }
+
+    set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
+    if ( xc_domctl(xch, &domctl) < 0 )
+    {
+        PERROR("Unable to get vcpu%u's xsave context", id);
+        goto err;
+    }
+
+    /* No xsave state? Skip this record. */
+    if ( domctl.u.vcpuextstate.size == 0 )
+        goto out;
+
+    rc = write_split_record(ctx, &rec, buffer, domctl.u.vcpuextstate.size);
+    if ( rc )
+        goto err;
+
+ out:
+    rc = 0;
+
+ err:
+    xc_hypercall_buffer_free(xch, buffer);
+
+    return rc;
+}
+
+/*
+ * Query to see whether a specific vcpu has msr state and if so, write an
+ * X86_PV_VCPU_MSRS record into the stream.
+ */
+static int write_one_vcpu_msrs(struct xc_sr_context *ctx, uint32_t id)
+{
+    xc_interface *xch = ctx->xch;
+    int rc = -1;
+    size_t buffersz;
+    DECLARE_HYPERCALL_BUFFER(void, buffer);
+    struct xc_sr_rec_x86_pv_vcpu_hdr vhdr = {
+        .vcpu_id = id,
+    };
+    struct xc_sr_record rec = {
+        .type = REC_TYPE_X86_PV_VCPU_MSRS,
+        .length = sizeof(vhdr),
+        .data = &vhdr,
+    };
+    struct xen_domctl domctl = {
+        .cmd = XEN_DOMCTL_get_vcpu_msrs,
+        .domain = ctx->domid,
+        .u.vcpu_msrs.vcpu = id,
+    };
+
+    if ( xc_domctl(xch, &domctl) < 0 )
+    {
+        PERROR("Unable to get vcpu%u's msrs", id);
+        goto err;
+    }
+
+    /* No MSRs? skip this record. */
+    if ( !domctl.u.vcpu_msrs.msr_count )
+        goto out;
+
+    buffersz = domctl.u.vcpu_msrs.msr_count * sizeof(xen_domctl_vcpu_msr_t);
+    buffer = xc_hypercall_buffer_alloc(xch, buffer, buffersz);
+    if ( !buffer )
+    {
+        ERROR("Unable to allocate %zu bytes for vcpu%u's msrs",
+              buffersz, id);
+        goto err;
+    }
+
+    set_xen_guest_handle(domctl.u.vcpu_msrs.msrs, buffer);
+    if ( xc_domctl(xch, &domctl) < 0 )
+    {
+        PERROR("Unable to get vcpu%u's msrs", id);
+        goto err;
+    }
+
+    /* No MSRs? Skip this record. */
+    if ( domctl.u.vcpu_msrs.msr_count == 0 )
+        goto out;
+
+    rc = write_split_record(ctx, &rec, buffer,
+                            domctl.u.vcpu_msrs.msr_count *
+                            sizeof(xen_domctl_vcpu_msr_t));
+    if ( rc )
+        goto err;
+
+ out:
+    rc = 0;
+
+ err:
+    xc_hypercall_buffer_free(xch, buffer);
+
+    return rc;
+}
+
+/*
+ * For each vcpu, if it is online, write its state into the stream.
+ */
+static int write_all_vcpu_information(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    xc_vcpuinfo_t vinfo;
+    unsigned int i;
+    int rc;
+
+    for ( i = 0; i <= ctx->dominfo.max_vcpu_id; ++i )
+    {
+        rc = xc_vcpu_getinfo(xch, ctx->domid, i, &vinfo);
+        if ( rc )
+        {
+            PERROR("Failed to get vcpu%u information", i);
+            return rc;
+        }
+
+        /* Vcpu offline? skip all these records. */
+        if ( !vinfo.online )
+            continue;
+
+        rc = write_one_vcpu_basic(ctx, i);
+        if ( rc )
+            return rc;
+
+        rc = write_one_vcpu_extended(ctx, i);
+        if ( rc )
+            return rc;
+
+        rc = write_one_vcpu_xsave(ctx, i);
+        if ( rc )
+            return rc;
+
+        rc = write_one_vcpu_msrs(ctx, i);
+        if ( rc )
+            return rc;
+    }
+
+    return 0;
+}
+
+/*
+ * Writes an X86_PV_INFO record into the stream.
+ */
+static int write_x86_pv_info(struct xc_sr_context *ctx)
+{
+    struct xc_sr_rec_x86_pv_info info = {
+        .guest_width = ctx->x86.pv.width,
+        .pt_levels = ctx->x86.pv.levels,
+    };
+    struct xc_sr_record rec = {
+        .type = REC_TYPE_X86_PV_INFO,
+        .length = sizeof(info),
+        .data = &info,
+    };
+
+    return write_record(ctx, &rec);
+}
+
+/*
+ * Writes an X86_PV_P2M_FRAMES record into the stream.  This contains the list
+ * of pfns making up the p2m table.
+ */
+static int write_x86_pv_p2m_frames(struct xc_sr_context *ctx)
+{
+    xc_interface *xch = ctx->xch;
+    int rc; unsigned int i;
+    size_t datasz = ctx->x86.pv.p2m_frames * sizeof(uint64_t);
+    uint64_t *data = NULL;
+    struct xc_sr_rec_x86_pv_p2m_frames hdr = {
+        .end_pfn = ctx->x86.pv.max_pfn,
+    };
+    struct xc_sr_record rec = {
+        .type = REC_TYPE_X86_PV_P2M_FRAMES,
+        .length = sizeof(hdr),
+        .data = &hdr,
+    };
+
+    /* No need to translate if sizeof(uint64_t) == sizeof(xen_pfn_t). */
+    if ( sizeof(uint64_t) != sizeof(*ctx->x86.pv.p2m_pfns) )
+    {
+        if ( !(data = malloc(datasz)) )
+        {
+            ERROR("Cannot allocate %zu bytes for X86_PV_P2M_FRAMES data",
+                  datasz);
+            return -1;
+        }
+
+        for ( i = 0; i < ctx->x86.pv.p2m_frames; ++i )
+            data[i] = ctx->x86.pv.p2m_pfns[i];
+    }
+    else
+        data = (uint64_t *)ctx->x86.pv.p2m_pfns;
+
+    rc = write_split_record(ctx, &rec, data, datasz);
+
+    if ( data != (uint64_t *)ctx->x86.pv.p2m_pfns )
+        free(data);
+
+    return rc;
+}
+
+/*
+ * Writes an SHARED_INFO record into the stream.
+ */
+static int write_shared_info(struct xc_sr_context *ctx)
+{
+    struct xc_sr_record rec = {
+        .type = REC_TYPE_SHARED_INFO,
+        .length = PAGE_SIZE,
+        .data = ctx->x86.pv.shinfo,
+    };
+
+    return write_record(ctx, &rec);
+}
+
+/*
+ * Normalise a pagetable for the migration stream.  Performs mfn->pfn
+ * conversions on the ptes.
+ */
+static int normalise_pagetable(struct xc_sr_context *ctx, const uint64_t *src,
+                               uint64_t *dst, unsigned long type)
+{
+    xc_interface *xch = ctx->xch;
+    uint64_t pte;
+    unsigned int i, xen_first = -1, xen_last = -1; /* Indices of Xen mappings. */
+
+    type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+    if ( ctx->x86.pv.levels == 4 )
+    {
+        /* 64bit guests only have Xen mappings in their L4 tables. */
+        if ( type == XEN_DOMCTL_PFINFO_L4TAB )
+        {
+            xen_first = (HYPERVISOR_VIRT_START_X86_64 >>
+                         L4_PAGETABLE_SHIFT_X86_64) & 511;
+            xen_last = (HYPERVISOR_VIRT_END_X86_64 >>
+                        L4_PAGETABLE_SHIFT_X86_64) & 511;
+        }
+    }
+    else
+    {
+        switch ( type )
+        {
+        case XEN_DOMCTL_PFINFO_L4TAB:
+            ERROR("??? Found L4 table for 32bit guest");
+            errno = EINVAL;
+            return -1;
+
+        case XEN_DOMCTL_PFINFO_L3TAB:
+            /* 32bit guests can only use the first 4 entries of their L3 tables.
+             * All other are potentially used by Xen. */
+            xen_first = 4;
+            xen_last = 511;
+            break;
+
+        case XEN_DOMCTL_PFINFO_L2TAB:
+            /* It is hard to spot Xen mappings in a 32bit guest's L2.  Most
+             * are normal but only a few will have Xen mappings.
+             */
+            i = (HYPERVISOR_VIRT_START_X86_32 >> L2_PAGETABLE_SHIFT_PAE) & 511;
+            if ( pte_to_frame(src[i]) == ctx->x86.pv.compat_m2p_mfn0 )
+            {
+                xen_first = i;
+                xen_last = (HYPERVISOR_VIRT_END_X86_32 >>
+                            L2_PAGETABLE_SHIFT_PAE) & 511;
+            }
+            break;
+        }
+    }
+
+    for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
+    {
+        xen_pfn_t mfn;
+
+        pte = src[i];
+
+        /* Remove Xen mappings: Xen will reconstruct on the other side. */
+        if ( i >= xen_first && i <= xen_last )
+            pte = 0;
+
+        /*
+         * Errors during the live part of migration are expected as a result
+         * of split pagetable updates, page type changes, active grant
+         * mappings etc.  The pagetable will need to be resent after pausing.
+         * In such cases we fail with EAGAIN.
+         *
+         * For domains which are already paused, errors are fatal.
+         */
+        if ( pte & _PAGE_PRESENT )
+        {
+            mfn = pte_to_frame(pte);
+
+#ifdef __i386__
+            if ( mfn == INVALID_MFN )
+            {
+                if ( !ctx->dominfo.paused )
+                    errno = EAGAIN;
+                else
+                {
+                    ERROR("PTE truncation detected.  L%lu[%u] = %016"PRIx64,
+                          type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
+                    errno = E2BIG;
+                }
+                return -1;
+            }
+#endif
+
+            if ( (type > XEN_DOMCTL_PFINFO_L1TAB) && (pte & _PAGE_PSE) )
+            {
+                ERROR("Cannot migrate superpage (L%lu[%u]: 0x%016"PRIx64")",
+                      type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
+                errno = E2BIG;
+                return -1;
+            }
+
+            if ( !mfn_in_pseudophysmap(ctx, mfn) )
+            {
+                if ( !ctx->dominfo.paused )
+                    errno = EAGAIN;
+                else
+                {
+                    ERROR("Bad mfn for L%lu[%u]",
+                          type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i);
+                    dump_bad_pseudophysmap_entry(ctx, mfn);
+                    errno = ERANGE;
+                }
+                return -1;
+            }
+
+            pte = merge_pte(pte, mfn_to_pfn(ctx, mfn));
+        }
+
+        dst[i] = pte;
+    }
+
+    return 0;
+}
+
+static xen_pfn_t x86_pv_pfn_to_gfn(const struct xc_sr_context *ctx,
+                                   xen_pfn_t pfn)
+{
+    assert(pfn <= ctx->x86.pv.max_pfn);
+
+    return xc_pfn_to_mfn(pfn, ctx->x86.pv.p2m, ctx->x86.pv.width);
+}
+
+
+/*
+ * save_ops function.  Performs pagetable normalisation on appropriate pages.
+ */
+static int x86_pv_normalise_page(struct xc_sr_context *ctx, xen_pfn_t type,
+                                 void **page)
+{
+    xc_interface *xch = ctx->xch;
+    void *local_page;
+    int rc;
+
+    type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
+
+    if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
+        return 0;
+
+    local_page = malloc(PAGE_SIZE);
+    if ( !local_page )
+    {
+        ERROR("Unable to allocate scratch page");
+        rc = -1;
+        goto out;
+    }
+
+    rc = normalise_pagetable(ctx, *page, local_page, type);
+    *page = local_page;
+
+ out:
+    return rc;
+}
+
+/*
+ * save_ops function.  Queries domain information and maps the Xen m2p and the
+ * guests shinfo and p2m table.
+ */
+static int x86_pv_setup(struct xc_sr_context *ctx)
+{
+    int rc;
+
+    rc = x86_pv_domain_info(ctx);
+    if ( rc )
+        return rc;
+
+    rc = x86_pv_map_m2p(ctx);
+    if ( rc )
+        return rc;
+
+    rc = map_shinfo(ctx);
+    if ( rc )
+        return rc;
+
+    rc = map_p2m(ctx);
+    if ( rc )
+        return rc;
+
+    return 0;
+}
+
+static int x86_pv_static_data(struct xc_sr_context *ctx)
+{
+    int rc;
+
+    rc = write_x86_pv_info(ctx);
+    if ( rc )
+        return rc;
+
+    rc = write_x86_cpu_policy_records(ctx);
+    if ( rc )
+        return rc;
+
+    return 0;
+}
+
+static int x86_pv_start_of_stream(struct xc_sr_context *ctx)
+{
+    int rc;
+
+    /*
+     * Ideally should be able to change during migration.  Currently
+     * corruption will occur if the contents or location of the P2M changes
+     * during the live migration loop.  If one is very lucky, the breakage
+     * will not be subtle.
+     */
+    rc = write_x86_pv_p2m_frames(ctx);
+    if ( rc )
+        return rc;
+
+    return 0;
+}
+
+static int x86_pv_start_of_checkpoint(struct xc_sr_context *ctx)
+{
+    return 0;
+}
+
+static int x86_pv_end_of_checkpoint(struct xc_sr_context *ctx)
+{
+    int rc;
+
+    rc = write_x86_tsc_info(ctx);
+    if ( rc )
+        return rc;
+
+    rc = write_shared_info(ctx);
+    if ( rc )
+        return rc;
+
+    rc = write_all_vcpu_information(ctx);
+    if ( rc )
+        return rc;
+
+    return 0;
+}
+
+static int x86_pv_check_vm_state(struct xc_sr_context *ctx)
+{
+    if ( ctx->x86.pv.p2m_generation == ~0ULL )
+        return 0;
+
+    return x86_pv_check_vm_state_p2m_list(ctx);
+}
+
+static int x86_pv_cleanup(struct xc_sr_context *ctx)
+{
+    free(ctx->x86.pv.p2m_pfns);
+
+    if ( ctx->x86.pv.p2m )
+        munmap(ctx->x86.pv.p2m, ctx->x86.pv.p2m_frames * PAGE_SIZE);
+
+    if ( ctx->x86.pv.shinfo )
+        munmap(ctx->x86.pv.shinfo, PAGE_SIZE);
+
+    if ( ctx->x86.pv.m2p )
+        munmap(ctx->x86.pv.m2p, ctx->x86.pv.nr_m2p_frames * PAGE_SIZE);
+
+    return 0;
+}
+
+struct xc_sr_save_ops save_ops_x86_pv =
+{
+    .pfn_to_gfn          = x86_pv_pfn_to_gfn,
+    .normalise_page      = x86_pv_normalise_page,
+    .setup               = x86_pv_setup,
+    .static_data         = x86_pv_static_data,
+    .start_of_stream     = x86_pv_start_of_stream,
+    .start_of_checkpoint = x86_pv_start_of_checkpoint,
+    .end_of_checkpoint   = x86_pv_end_of_checkpoint,
+    .check_vm_state      = x86_pv_check_vm_state,
+    .cleanup             = x86_pv_cleanup,
+};
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_sr_stream_format.h b/tools/libs/guest/xg_sr_stream_format.h
new file mode 100644 (file)
index 0000000..8a0da26
--- /dev/null
@@ -0,0 +1,150 @@
+#ifndef __STREAM_FORMAT__H
+#define __STREAM_FORMAT__H
+
+/*
+ * C structures for the Migration v2 stream format.
+ * See docs/specs/libxc-migration-stream.pandoc
+ */
+
+#include <inttypes.h>
+
+/*
+ * Image Header
+ */
+struct xc_sr_ihdr
+{
+    uint64_t marker;
+    uint32_t id;
+    uint32_t version;
+    uint16_t options;
+    uint16_t _res1;
+    uint32_t _res2;
+};
+
+#define IHDR_MARKER  0xffffffffffffffffULL
+#define IHDR_ID      0x58454E46U
+
+#define _IHDR_OPT_ENDIAN 0
+#define IHDR_OPT_LITTLE_ENDIAN (0 << _IHDR_OPT_ENDIAN)
+#define IHDR_OPT_BIG_ENDIAN    (1 << _IHDR_OPT_ENDIAN)
+
+/*
+ * Domain Header
+ */
+struct xc_sr_dhdr
+{
+    uint32_t type;
+    uint16_t page_shift;
+    uint16_t _res1;
+    uint32_t xen_major;
+    uint32_t xen_minor;
+};
+
+#define DHDR_TYPE_X86_PV  0x00000001U
+#define DHDR_TYPE_X86_HVM 0x00000002U
+
+/*
+ * Record Header
+ */
+struct xc_sr_rhdr
+{
+    uint32_t type;
+    uint32_t length;
+};
+
+/* All records must be aligned up to an 8 octet boundary */
+#define REC_ALIGN_ORDER               (3U)
+/* Somewhat arbitrary - 128MB */
+#define REC_LENGTH_MAX                (128U << 20)
+
+#define REC_TYPE_END                        0x00000000U
+#define REC_TYPE_PAGE_DATA                  0x00000001U
+#define REC_TYPE_X86_PV_INFO                0x00000002U
+#define REC_TYPE_X86_PV_P2M_FRAMES          0x00000003U
+#define REC_TYPE_X86_PV_VCPU_BASIC          0x00000004U
+#define REC_TYPE_X86_PV_VCPU_EXTENDED       0x00000005U
+#define REC_TYPE_X86_PV_VCPU_XSAVE          0x00000006U
+#define REC_TYPE_SHARED_INFO                0x00000007U
+#define REC_TYPE_X86_TSC_INFO               0x00000008U
+#define REC_TYPE_HVM_CONTEXT                0x00000009U
+#define REC_TYPE_HVM_PARAMS                 0x0000000aU
+#define REC_TYPE_TOOLSTACK                  0x0000000bU
+#define REC_TYPE_X86_PV_VCPU_MSRS           0x0000000cU
+#define REC_TYPE_VERIFY                     0x0000000dU
+#define REC_TYPE_CHECKPOINT                 0x0000000eU
+#define REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST  0x0000000fU
+#define REC_TYPE_STATIC_DATA_END            0x00000010U
+#define REC_TYPE_X86_CPUID_POLICY           0x00000011U
+#define REC_TYPE_X86_MSR_POLICY             0x00000012U
+
+#define REC_TYPE_OPTIONAL             0x80000000U
+
+/* PAGE_DATA */
+struct xc_sr_rec_page_data_header
+{
+    uint32_t count;
+    uint32_t _res1;
+    uint64_t pfn[0];
+};
+
+#define PAGE_DATA_PFN_MASK  0x000fffffffffffffULL
+#define PAGE_DATA_TYPE_MASK 0xf000000000000000ULL
+
+/* X86_PV_INFO */
+struct xc_sr_rec_x86_pv_info
+{
+    uint8_t guest_width;
+    uint8_t pt_levels;
+    uint8_t _res[6];
+};
+
+/* X86_PV_P2M_FRAMES */
+struct xc_sr_rec_x86_pv_p2m_frames
+{
+    uint32_t start_pfn;
+    uint32_t end_pfn;
+    uint64_t p2m_pfns[0];
+};
+
+/* X86_PV_VCPU_{BASIC,EXTENDED,XSAVE,MSRS} */
+struct xc_sr_rec_x86_pv_vcpu_hdr
+{
+    uint32_t vcpu_id;
+    uint32_t _res1;
+    uint8_t context[0];
+};
+
+/* X86_TSC_INFO */
+struct xc_sr_rec_x86_tsc_info
+{
+    uint32_t mode;
+    uint32_t khz;
+    uint64_t nsec;
+    uint32_t incarnation;
+    uint32_t _res1;
+};
+
+/* HVM_PARAMS */
+struct xc_sr_rec_hvm_params_entry
+{
+    uint64_t index;
+    uint64_t value;
+};
+
+struct xc_sr_rec_hvm_params
+{
+    uint32_t count;
+    uint32_t _res1;
+    struct xc_sr_rec_hvm_params_entry param[0];
+};
+
+#endif
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libs/guest/xg_suspend.c b/tools/libs/guest/xg_suspend.c
new file mode 100644 (file)
index 0000000..0ce6364
--- /dev/null
@@ -0,0 +1,202 @@
+/*
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <xenevtchn.h>
+
+#include "xc_private.h"
+#include "xenguest.h"
+
+#define SUSPEND_LOCK_FILE    XEN_RUN_DIR "/suspend-evtchn-%d.lock"
+
+/*
+ * locking
+ */
+
+#define ERR(x) do{                                                      \
+    ERROR("Can't " #x " lock file for suspend event channel %s: %s\n",  \
+          suspend_file, strerror(errno));                               \
+    goto err;                                                           \
+}while(0)
+
+#define SUSPEND_FILE_BUFLEN (sizeof(SUSPEND_LOCK_FILE) + 10)
+
+static void get_suspend_file(char buf[], uint32_t domid)
+{
+    snprintf(buf, SUSPEND_FILE_BUFLEN, SUSPEND_LOCK_FILE, domid);
+}
+
+static int lock_suspend_event(xc_interface *xch, uint32_t domid, int *lockfd)
+{
+    int fd = -1, r;
+    char suspend_file[SUSPEND_FILE_BUFLEN];
+    struct stat ours, theirs;
+    struct flock fl;
+
+    get_suspend_file(suspend_file, domid);
+
+    *lockfd = -1;
+
+    for (;;) {
+        if (fd >= 0)
+            close (fd);
+
+        fd = open(suspend_file, O_CREAT | O_RDWR, 0600);
+        if (fd < 0)
+            ERR("create");
+
+        r = fcntl(fd, F_SETFD, FD_CLOEXEC);
+        if (r)
+            ERR("fcntl F_SETFD FD_CLOEXEC");
+
+        memset(&fl, 0, sizeof(fl));
+        fl.l_type = F_WRLCK;
+        fl.l_whence = SEEK_SET;
+        fl.l_len = 1;
+        r = fcntl(fd, F_SETLK, &fl);
+        if (r)
+            ERR("fcntl F_SETLK");
+
+        r = fstat(fd, &ours);
+        if (r)
+            ERR("fstat");
+
+        r = stat(suspend_file, &theirs);
+        if (r) {
+            if (errno == ENOENT)
+                /* try again */
+                continue;
+            ERR("stat");
+        }
+
+        if (ours.st_ino != theirs.st_ino)
+            /* someone else must have removed it while we were locking it */
+            continue;
+
+        break;
+    }
+
+    *lockfd = fd;
+    return 0;
+
+ err:
+    if (fd >= 0)
+        close(fd);
+
+    return -1;
+}
+
+static int unlock_suspend_event(xc_interface *xch, uint32_t domid, int *lockfd)
+{
+    int r;
+    char suspend_file[SUSPEND_FILE_BUFLEN];
+
+    if (*lockfd < 0)
+        return 0;
+
+    get_suspend_file(suspend_file, domid);
+
+    r = unlink(suspend_file);
+    if (r)
+        ERR("unlink");
+
+    r = close(*lockfd);
+    *lockfd = -1;
+    if (r)
+        ERR("close");
+
+ err:
+    if (*lockfd >= 0)
+        close(*lockfd);
+
+    return -1;
+}
+
+int xc_await_suspend(xc_interface *xch, xenevtchn_handle *xce, int suspend_evtchn)
+{
+    int rc;
+
+    do {
+        rc = xenevtchn_pending(xce);
+        if (rc < 0) {
+            ERROR("error polling suspend notification channel: %d", rc);
+            return -1;
+        }
+    } while (rc != suspend_evtchn);
+
+    /* harmless for one-off suspend */
+    if (xenevtchn_unmask(xce, suspend_evtchn) < 0)
+        ERROR("failed to unmask suspend notification channel: %d", rc);
+
+    return 0;
+}
+
+/* Internal callers are allowed to call this with suspend_evtchn<0
+ * but *lockfd>0. */
+int xc_suspend_evtchn_release(xc_interface *xch, xenevtchn_handle *xce,
+                              uint32_t domid, int suspend_evtchn, int *lockfd)
+{
+    if (suspend_evtchn >= 0)
+        xenevtchn_unbind(xce, suspend_evtchn);
+
+    return unlock_suspend_event(xch, domid, lockfd);
+}
+
+int xc_suspend_evtchn_init_sane(xc_interface *xch, xenevtchn_handle *xce,
+                                uint32_t domid, int port, int *lockfd)
+{
+    int rc, suspend_evtchn = -1;
+
+    if (lock_suspend_event(xch, domid, lockfd)) {
+        errno = EINVAL;
+        goto cleanup;
+    }
+
+    suspend_evtchn = xenevtchn_bind_interdomain(xce, domid, port);
+    if (suspend_evtchn < 0) {
+        ERROR("failed to bind suspend event channel: %d", suspend_evtchn);
+        goto cleanup;
+    }
+
+    rc = xc_domain_subscribe_for_suspend(xch, domid, port);
+    if (rc < 0) {
+        ERROR("failed to subscribe to domain: %d", rc);
+        goto cleanup;
+    }
+
+    return suspend_evtchn;
+
+cleanup:
+    xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn, lockfd);
+
+    return -1;
+}
+
+int xc_suspend_evtchn_init_exclusive(xc_interface *xch, xenevtchn_handle *xce,
+                                     uint32_t domid, int port, int *lockfd)
+{
+    int suspend_evtchn;
+
+    suspend_evtchn = xc_suspend_evtchn_init_sane(xch, xce, domid, port, lockfd);
+    if (suspend_evtchn < 0)
+        return suspend_evtchn;
+
+    /* event channel is pending immediately after binding */
+    xc_await_suspend(xch, xce, suspend_evtchn);
+
+    return suspend_evtchn;
+}
index 4679268fc2067ec73238751adf4e0dcf73fee08e..9d0ed0884606d8a1c09c0a8ae1218f4eb6171965 100644 (file)
@@ -34,7 +34,7 @@ PKG_CONFIG_DESC ?= The $(PKG_CONFIG_NAME) library for Xen hypervisor
 PKG_CONFIG_VERSION := $(MAJOR).$(MINOR)
 PKG_CONFIG_USELIBS := $(SHLIB_libxen$(LIBNAME))
 PKG_CONFIG_LIB := xen$(LIBNAME)
-PKG_CONFIG_REQPRIV := $(subst $(space),$(comma),$(strip $(foreach lib,$(USELIBS_$(LIBNAME)),xen$(lib))))
+PKG_CONFIG_REQPRIV := $(subst $(space),$(comma),$(strip $(foreach lib,$(patsubst ctrl,control,$(USELIBS_$(LIBNAME))),xen$(lib))))
 
 ifneq ($(CONFIG_LIBXC_MINIOS),y)
 PKG_CONFIG_INST := $(PKG_CONFIG)
index 8e45e8d9177ada247fbb46cdf50be4b15aa22667..9619c576ba0073eda4156242d772044f34d18ee0 100644 (file)
@@ -18,3 +18,5 @@ LIBS_LIBS += hypfs
 USELIBS_hypfs := toollog toolcore call
 LIBS_LIBS += ctrl
 USELIBS_ctrl := toollog call evtchn gnttab foreignmemory devicemodel
+LIBS_LIBS += guest
+USELIBS_guest := evtchn ctrl
diff --git a/tools/libxc/COPYING b/tools/libxc/COPYING
deleted file mode 100644 (file)
index 7ca8702..0000000
+++ /dev/null
@@ -1,467 +0,0 @@
-Note that the only valid version of the LGPL as far as the files in
-this directory (and its subdirectories) are concerned is _this_
-particular version of the license (i.e., *only* v2.1, not v2.2 or v3.x
-or whatever), unless explicitly otherwise stated.
-
-Where clause 3 is invoked in order to relicense under the GPL then
-this shall be considered to be GPL v2 only for files which have
-specified LGPL v2.1 only.
-
-                  GNU LESSER GENERAL PUBLIC LICENSE
-                       Version 2.1, February 1999
-
- Copyright (C) 1991, 1999 Free Software Foundation, Inc.
- 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-[This is the first released version of the Lesser GPL.  It also counts
- as the successor of the GNU Library Public License, version 2, hence
- the version number 2.1.]
-
-                            Preamble
-
-  The licenses for most software are designed to take away your
-freedom to share and change it.  By contrast, the GNU General Public
-Licenses are intended to guarantee your freedom to share and change
-free software--to make sure the software is free for all its users.
-
-  This license, the Lesser General Public License, applies to some
-specially designated software packages--typically libraries--of the
-Free Software Foundation and other authors who decide to use it.  You
-can use it too, but we suggest you first think carefully about whether
-this license or the ordinary General Public License is the better
-strategy to use in any particular case, based on the explanations below.
-
-  When we speak of free software, we are referring to freedom of use,
-not price.  Our General Public Licenses are designed to make sure that
-you have the freedom to distribute copies of free software (and charge
-for this service if you wish); that you receive source code or can get
-it if you want it; that you can change the software and use pieces of
-it in new free programs; and that you are informed that you can do
-these things.
-
-  To protect your rights, we need to make restrictions that forbid
-distributors to deny you these rights or to ask you to surrender these
-rights.  These restrictions translate to certain responsibilities for
-you if you distribute copies of the library or if you modify it.
-
-  For example, if you distribute copies of the library, whether gratis
-or for a fee, you must give the recipients all the rights that we gave
-you.  You must make sure that they, too, receive or can get the source
-code.  If you link other code with the library, you must provide
-complete object files to the recipients, so that they can relink them
-with the library after making changes to the library and recompiling
-it.  And you must show them these terms so they know their rights.
-
-  We protect your rights with a two-step method: (1) we copyright the
-library, and (2) we offer you this license, which gives you legal
-permission to copy, distribute and/or modify the library.
-
-  To protect each distributor, we want to make it very clear that
-there is no warranty for the free library.  Also, if the library is
-modified by someone else and passed on, the recipients should know
-that what they have is not the original version, so that the original
-author's reputation will not be affected by problems that might be
-introduced by others.
-\f
-  Finally, software patents pose a constant threat to the existence of
-any free program.  We wish to make sure that a company cannot
-effectively restrict the users of a free program by obtaining a
-restrictive license from a patent holder.  Therefore, we insist that
-any patent license obtained for a version of the library must be
-consistent with the full freedom of use specified in this license.
-
-  Most GNU software, including some libraries, is covered by the
-ordinary GNU General Public License.  This license, the GNU Lesser
-General Public License, applies to certain designated libraries, and
-is quite different from the ordinary General Public License.  We use
-this license for certain libraries in order to permit linking those
-libraries into non-free programs.
-
-  When a program is linked with a library, whether statically or using
-a shared library, the combination of the two is legally speaking a
-combined work, a derivative of the original library.  The ordinary
-General Public License therefore permits such linking only if the
-entire combination fits its criteria of freedom.  The Lesser General
-Public License permits more lax criteria for linking other code with
-the library.
-
-  We call this license the "Lesser" General Public License because it
-does Less to protect the user's freedom than the ordinary General
-Public License.  It also provides other free software developers Less
-of an advantage over competing non-free programs.  These disadvantages
-are the reason we use the ordinary General Public License for many
-libraries.  However, the Lesser license provides advantages in certain
-special circumstances.
-
-  For example, on rare occasions, there may be a special need to
-encourage the widest possible use of a certain library, so that it becomes
-a de-facto standard.  To achieve this, non-free programs must be
-allowed to use the library.  A more frequent case is that a free
-library does the same job as widely used non-free libraries.  In this
-case, there is little to gain by limiting the free library to free
-software only, so we use the Lesser General Public License.
-
-  In other cases, permission to use a particular library in non-free
-programs enables a greater number of people to use a large body of
-free software.  For example, permission to use the GNU C Library in
-non-free programs enables many more people to use the whole GNU
-operating system, as well as its variant, the GNU/Linux operating
-system.
-
-  Although the Lesser General Public License is Less protective of the
-users' freedom, it does ensure that the user of a program that is
-linked with the Library has the freedom and the wherewithal to run
-that program using a modified version of the Library.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.  Pay close attention to the difference between a
-"work based on the library" and a "work that uses the library".  The
-former contains code derived from the library, whereas the latter must
-be combined with the library in order to run.
-\f
-                  GNU LESSER GENERAL PUBLIC LICENSE
-   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-  0. This License Agreement applies to any software library or other
-program which contains a notice placed by the copyright holder or
-other authorized party saying it may be distributed under the terms of
-this Lesser General Public License (also called "this License").
-Each licensee is addressed as "you".
-
-  A "library" means a collection of software functions and/or data
-prepared so as to be conveniently linked with application programs
-(which use some of those functions and data) to form executables.
-
-  The "Library", below, refers to any such software library or work
-which has been distributed under these terms.  A "work based on the
-Library" means either the Library or any derivative work under
-copyright law: that is to say, a work containing the Library or a
-portion of it, either verbatim or with modifications and/or translated
-straightforwardly into another language.  (Hereinafter, translation is
-included without limitation in the term "modification".)
-
-  "Source code" for a work means the preferred form of the work for
-making modifications to it.  For a library, complete source code means
-all the source code for all modules it contains, plus any associated
-interface definition files, plus the scripts used to control compilation
-and installation of the library.
-
-  Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope.  The act of
-running a program using the Library is not restricted, and output from
-such a program is covered only if its contents constitute a work based
-on the Library (independent of the use of the Library in a tool for
-writing it).  Whether that is true depends on what the Library does
-and what the program that uses the Library does.
-
-  1. You may copy and distribute verbatim copies of the Library's
-complete source code as you receive it, in any medium, provided that
-you conspicuously and appropriately publish on each copy an
-appropriate copyright notice and disclaimer of warranty; keep intact
-all the notices that refer to this License and to the absence of any
-warranty; and distribute a copy of this License along with the
-Library.
-
-  You may charge a fee for the physical act of transferring a copy,
-and you may at your option offer warranty protection in exchange for a
-fee.
-\f
-  2. You may modify your copy or copies of the Library or any portion
-of it, thus forming a work based on the Library, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
-    a) The modified work must itself be a software library.
-
-    b) You must cause the files modified to carry prominent notices
-    stating that you changed the files and the date of any change.
-
-    c) You must cause the whole of the work to be licensed at no
-    charge to all third parties under the terms of this License.
-
-    d) If a facility in the modified Library refers to a function or a
-    table of data to be supplied by an application program that uses
-    the facility, other than as an argument passed when the facility
-    is invoked, then you must make a good faith effort to ensure that,
-    in the event an application does not supply such function or
-    table, the facility still operates, and performs whatever part of
-    its purpose remains meaningful.
-
-    (For example, a function in a library to compute square roots has
-    a purpose that is entirely well-defined independent of the
-    application.  Therefore, Subsection 2d requires that any
-    application-supplied function or table used by this function must
-    be optional: if the application does not supply it, the square
-    root function must still compute square roots.)
-
-These requirements apply to the modified work as a whole.  If
-identifiable sections of that work are not derived from the Library,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works.  But when you
-distribute the same sections as part of a whole which is a work based
-on the Library, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote
-it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Library.
-
-In addition, mere aggregation of another work not based on the Library
-with the Library (or with a work based on the Library) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-  3. You may opt to apply the terms of the ordinary GNU General Public
-License instead of this License to a given copy of the Library.  To do
-this, you must alter all the notices that refer to this License, so
-that they refer to the ordinary GNU General Public License, version 2,
-instead of to this License.  (If a newer version than version 2 of the
-ordinary GNU General Public License has appeared, then you can specify
-that version instead if you wish.)  Do not make any other change in
-these notices.
-\f
-  Once this change is made in a given copy, it is irreversible for
-that copy, so the ordinary GNU General Public License applies to all
-subsequent copies and derivative works made from that copy.
-
-  This option is useful when you wish to copy part of the code of
-the Library into a program that is not a library.
-
-  4. You may copy and distribute the Library (or a portion or
-derivative of it, under Section 2) in object code or executable form
-under the terms of Sections 1 and 2 above provided that you accompany
-it with the complete corresponding machine-readable source code, which
-must be distributed under the terms of Sections 1 and 2 above on a
-medium customarily used for software interchange.
-
-  If distribution of object code is made by offering access to copy
-from a designated place, then offering equivalent access to copy the
-source code from the same place satisfies the requirement to
-distribute the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-  5. A program that contains no derivative of any portion of the
-Library, but is designed to work with the Library by being compiled or
-linked with it, is called a "work that uses the Library".  Such a
-work, in isolation, is not a derivative work of the Library, and
-therefore falls outside the scope of this License.
-
-  However, linking a "work that uses the Library" with the Library
-creates an executable that is a derivative of the Library (because it
-contains portions of the Library), rather than a "work that uses the
-library".  The executable is therefore covered by this License.
-Section 6 states terms for distribution of such executables.
-
-  When a "work that uses the Library" uses material from a header file
-that is part of the Library, the object code for the work may be a
-derivative work of the Library even though the source code is not.
-Whether this is true is especially significant if the work can be
-linked without the Library, or if the work is itself a library.  The
-threshold for this to be true is not precisely defined by law.
-
-  If such an object file uses only numerical parameters, data
-structure layouts and accessors, and small macros and small inline
-functions (ten lines or less in length), then the use of the object
-file is unrestricted, regardless of whether it is legally a derivative
-work.  (Executables containing this object code plus portions of the
-Library will still fall under Section 6.)
-
-  Otherwise, if the work is a derivative of the Library, you may
-distribute the object code for the work under the terms of Section 6.
-Any executables containing that work also fall under Section 6,
-whether or not they are linked directly with the Library itself.
-\f
-  6. As an exception to the Sections above, you may also combine or
-link a "work that uses the Library" with the Library to produce a
-work containing portions of the Library, and distribute that work
-under terms of your choice, provided that the terms permit
-modification of the work for the customer's own use and reverse
-engineering for debugging such modifications.
-
-  You must give prominent notice with each copy of the work that the
-Library is used in it and that the Library and its use are covered by
-this License.  You must supply a copy of this License.  If the work
-during execution displays copyright notices, you must include the
-copyright notice for the Library among them, as well as a reference
-directing the user to the copy of this License.  Also, you must do one
-of these things:
-
-    a) Accompany the work with the complete corresponding
-    machine-readable source code for the Library including whatever
-    changes were used in the work (which must be distributed under
-    Sections 1 and 2 above); and, if the work is an executable linked
-    with the Library, with the complete machine-readable "work that
-    uses the Library", as object code and/or source code, so that the
-    user can modify the Library and then relink to produce a modified
-    executable containing the modified Library.  (It is understood
-    that the user who changes the contents of definitions files in the
-    Library will not necessarily be able to recompile the application
-    to use the modified definitions.)
-
-    b) Use a suitable shared library mechanism for linking with the
-    Library.  A suitable mechanism is one that (1) uses at run time a
-    copy of the library already present on the user's computer system,
-    rather than copying library functions into the executable, and (2)
-    will operate properly with a modified version of the library, if
-    the user installs one, as long as the modified version is
-    interface-compatible with the version that the work was made with.
-
-    c) Accompany the work with a written offer, valid for at
-    least three years, to give the same user the materials
-    specified in Subsection 6a, above, for a charge no more
-    than the cost of performing this distribution.
-
-    d) If distribution of the work is made by offering access to copy
-    from a designated place, offer equivalent access to copy the above
-    specified materials from the same place.
-
-    e) Verify that the user has already received a copy of these
-    materials or that you have already sent this user a copy.
-
-  For an executable, the required form of the "work that uses the
-Library" must include any data and utility programs needed for
-reproducing the executable from it.  However, as a special exception,
-the materials to be distributed need not include anything that is
-normally distributed (in either source or binary form) with the major
-components (compiler, kernel, and so on) of the operating system on
-which the executable runs, unless that component itself accompanies
-the executable.
-
-  It may happen that this requirement contradicts the license
-restrictions of other proprietary libraries that do not normally
-accompany the operating system.  Such a contradiction means you cannot
-use both them and the Library together in an executable that you
-distribute.
-\f
-  7. You may place library facilities that are a work based on the
-Library side-by-side in a single library together with other library
-facilities not covered by this License, and distribute such a combined
-library, provided that the separate distribution of the work based on
-the Library and of the other library facilities is otherwise
-permitted, and provided that you do these two things:
-
-    a) Accompany the combined library with a copy of the same work
-    based on the Library, uncombined with any other library
-    facilities.  This must be distributed under the terms of the
-    Sections above.
-
-    b) Give prominent notice with the combined library of the fact
-    that part of it is a work based on the Library, and explaining
-    where to find the accompanying uncombined form of the same work.
-
-  8. You may not copy, modify, sublicense, link with, or distribute
-the Library except as expressly provided under this License.  Any
-attempt otherwise to copy, modify, sublicense, link with, or
-distribute the Library is void, and will automatically terminate your
-rights under this License.  However, parties who have received copies,
-or rights, from you under this License will not have their licenses
-terminated so long as such parties remain in full compliance.
-
-  9. You are not required to accept this License, since you have not
-signed it.  However, nothing else grants you permission to modify or
-distribute the Library or its derivative works.  These actions are
-prohibited by law if you do not accept this License.  Therefore, by
-modifying or distributing the Library (or any work based on the
-Library), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Library or works based on it.
-
-  10. Each time you redistribute the Library (or any work based on the
-Library), the recipient automatically receives a license from the
-original licensor to copy, distribute, link with or modify the Library
-subject to these terms and conditions.  You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties with
-this License.
-\f
-  11. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Library at all.  For example, if a patent
-license would not permit royalty-free redistribution of the Library by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Library.
-
-If any portion of this section is held invalid or unenforceable under any
-particular circumstance, the balance of the section is intended to apply,
-and the section as a whole is intended to apply in other circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system which is
-implemented by public license practices.  Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-  12. If the distribution and/or use of the Library is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Library under this License may add
-an explicit geographical distribution limitation excluding those countries,
-so that distribution is permitted only in or among countries not thus
-excluded.  In such case, this License incorporates the limitation as if
-written in the body of this License.
-
-  13. The Free Software Foundation may publish revised and/or new
-versions of the Lesser General Public License from time to time.
-Such new versions will be similar in spirit to the present version,
-but may differ in detail to address new problems or concerns.
-
-Each version is given a distinguishing version number.  If the Library
-specifies a version number of this License which applies to it and
-"any later version", you have the option of following the terms and
-conditions either of that version or of any later version published by
-the Free Software Foundation.  If the Library does not specify a
-license version number, you may choose any version ever published by
-the Free Software Foundation.
-\f
-  14. If you wish to incorporate parts of the Library into other free
-programs whose distribution conditions are incompatible with these,
-write to the author to ask for permission.  For software which is
-copyrighted by the Free Software Foundation, write to the Free
-Software Foundation; we sometimes make exceptions for this.  Our
-decision will be guided by the two goals of preserving the free status
-of all derivatives of our free software and of promoting the sharing
-and reuse of software generally.
-
-                            NO WARRANTY
-
-  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
-WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
-EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
-OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
-KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
-LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
-THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
-WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
-AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
-FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
-CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
-LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
-RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
-FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
-SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGES.
-
-                     END OF TERMS AND CONDITIONS
diff --git a/tools/libxc/Makefile b/tools/libxc/Makefile
deleted file mode 100644 (file)
index 44fa048..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-XEN_ROOT = $(CURDIR)/../..
-include $(XEN_ROOT)/tools/Rules.mk
-
-MAJOR    = 4.15
-MINOR    = 0
-
-ifeq ($(CONFIG_LIBXC_MINIOS),y)
-# Save/restore of a domain is currently incompatible with a stubdom environment
-override CONFIG_MIGRATE := n
-endif
-
-LINK_FILES := xc_private.h xc_core.h xc_core_x86.h xc_core_arm.h xc_bitops.h
-
-$(LINK_FILES):
-       ln -sf $(XEN_ROOT)/tools/libs/ctrl/$(notdir $@) $@
-
-GUEST_SRCS-y :=
-GUEST_SRCS-y += xg_private.c
-GUEST_SRCS-y += xg_domain.c
-GUEST_SRCS-y += xg_suspend.c
-ifeq ($(CONFIG_MIGRATE),y)
-GUEST_SRCS-y += xg_sr_common.c
-GUEST_SRCS-$(CONFIG_X86) += xg_sr_common_x86.c
-GUEST_SRCS-$(CONFIG_X86) += xg_sr_common_x86_pv.c
-GUEST_SRCS-$(CONFIG_X86) += xg_sr_restore_x86_pv.c
-GUEST_SRCS-$(CONFIG_X86) += xg_sr_restore_x86_hvm.c
-GUEST_SRCS-$(CONFIG_X86) += xg_sr_save_x86_pv.c
-GUEST_SRCS-$(CONFIG_X86) += xg_sr_save_x86_hvm.c
-GUEST_SRCS-y += xg_sr_restore.c
-GUEST_SRCS-y += xg_sr_save.c
-GUEST_SRCS-y += xg_offline_page.c
-else
-GUEST_SRCS-y += xg_nomigrate.c
-endif
-
-vpath %.c ../../xen/common/libelf
-CFLAGS += -I../../xen/common/libelf
-
-ELF_SRCS-y += libelf-tools.c libelf-loader.c
-ELF_SRCS-y += libelf-dominfo.c
-
-GUEST_SRCS-y += $(ELF_SRCS-y)
-
-$(patsubst %.c,%.o,$(ELF_SRCS-y)): CFLAGS += -Wno-pointer-sign
-$(patsubst %.c,%.opic,$(ELF_SRCS-y)): CFLAGS += -Wno-pointer-sign
-
-ifeq ($(CONFIG_X86),y) # Add libx86 to the build
-vpath %.c ../../xen/lib/x86
-
-GUEST_SRCS-y                 += cpuid.c msr.c
-endif
-
-# new domain builder
-GUEST_SRCS-y                 += xg_dom_core.c
-GUEST_SRCS-y                 += xg_dom_boot.c
-GUEST_SRCS-y                 += xg_dom_elfloader.c
-GUEST_SRCS-$(CONFIG_X86)     += xg_dom_bzimageloader.c
-GUEST_SRCS-$(CONFIG_X86)     += xg_dom_decompress_lz4.c
-GUEST_SRCS-$(CONFIG_X86)     += xg_dom_hvmloader.c
-GUEST_SRCS-$(CONFIG_ARM)     += xg_dom_armzimageloader.c
-GUEST_SRCS-y                 += xg_dom_binloader.c
-GUEST_SRCS-y                 += xg_dom_compat_linux.c
-
-GUEST_SRCS-$(CONFIG_X86)     += xg_dom_x86.c
-GUEST_SRCS-$(CONFIG_X86)     += xg_cpuid_x86.c
-GUEST_SRCS-$(CONFIG_ARM)     += xg_dom_arm.c
-
-ifeq ($(CONFIG_LIBXC_MINIOS),y)
-GUEST_SRCS-y                 += xg_dom_decompress_unsafe.c
-GUEST_SRCS-y                 += xg_dom_decompress_unsafe_bzip2.c
-GUEST_SRCS-y                 += xg_dom_decompress_unsafe_lzma.c
-GUEST_SRCS-y                 += xg_dom_decompress_unsafe_lzo1x.c
-GUEST_SRCS-y                 += xg_dom_decompress_unsafe_xz.c
-endif
-
--include $(XEN_TARGET_ARCH)/Makefile
-
-CFLAGS   += -Werror -Wmissing-prototypes
-CFLAGS   += -I. -I./include $(CFLAGS_xeninclude)
-CFLAGS   += -D__XEN_TOOLS__
-
-# Needed for posix_fadvise64() in xc_linux.c
-CFLAGS-$(CONFIG_Linux) += -D_GNU_SOURCE
-
-CFLAGS += $(PTHREAD_CFLAGS)
-CFLAGS += $(CFLAGS_libxentoollog)
-CFLAGS += $(CFLAGS_libxenevtchn)
-CFLAGS += $(CFLAGS_libxendevicemodel)
-
-GUEST_LIB_OBJS := $(patsubst %.c,%.o,$(GUEST_SRCS-y))
-GUEST_PIC_OBJS := $(patsubst %.c,%.opic,$(GUEST_SRCS-y))
-
-$(GUEST_LIB_OBJS) $(GUEST_PIC_OBJS): CFLAGS += -include $(XEN_ROOT)/tools/config.h
-
-# libxenguest includes xc_private.h, so needs this despite not using
-# this functionality directly.
-$(GUEST_LIB_OBJS) $(GUEST_PIC_OBJS): CFLAGS += $(CFLAGS_libxencall) $(CFLAGS_libxenforeignmemory)
-
-LIB += libxenguest.a
-ifneq ($(nosharedlibs),y)
-LIB += libxenguest.so libxenguest.so.$(MAJOR) libxenguest.so.$(MAJOR).$(MINOR)
-endif
-
-genpath-target = $(call buildmakevars2header,_paths.h)
-$(eval $(genpath-target))
-
-xc_private.h: _paths.h
-
-$(GUEST_LIB_OBJS) $(GUEST_PIC_OBJS): $(LINK_FILES)
-
-PKG_CONFIG := xenguest.pc
-PKG_CONFIG_VERSION := $(MAJOR).$(MINOR)
-
-xenguest.pc: PKG_CONFIG_NAME = Xenguest
-xenguest.pc: PKG_CONFIG_DESC = The Xenguest library for Xen hypervisor
-xenguest.pc: PKG_CONFIG_USELIBS = $(SHLIB_libxenguest)
-xenguest.pc: PKG_CONFIG_LIB = xenguest
-xenguest.pc: PKG_CONFIG_REQPRIV = xentoollog,xencall,xenforeignmemory,xenevtchn
-
-$(PKG_CONFIG_DIR)/xenguest.pc: PKG_CONFIG_NAME = Xenguest
-$(PKG_CONFIG_DIR)/xenguest.pc: PKG_CONFIG_DESC = The Xenguest library for Xen hypervisor
-$(PKG_CONFIG_DIR)/xenguest.pc: PKG_CONFIG_USELIBS = $(SHLIB_libxenguest)
-$(PKG_CONFIG_DIR)/xenguest.pc: PKG_CONFIG_LIB = xenguest
-$(PKG_CONFIG_DIR)/xenguest.pc: PKG_CONFIG_REQPRIV = xentoollog,xencall,xenforeignmemory,xenevtchn,xencontrol
-
-ifneq ($(CONFIG_LIBXC_MINIOS),y)
-PKG_CONFIG_INST := $(PKG_CONFIG)
-$(PKG_CONFIG_INST): PKG_CONFIG_PREFIX = $(prefix)
-$(PKG_CONFIG_INST): PKG_CONFIG_INCDIR = $(includedir)
-$(PKG_CONFIG_INST): PKG_CONFIG_LIBDIR = $(libdir)
-endif
-
-PKG_CONFIG_LOCAL := $(foreach pc,$(PKG_CONFIG),$(PKG_CONFIG_DIR)/$(pc))
-
-$(PKG_CONFIG_LOCAL): PKG_CONFIG_PREFIX = $(XEN_ROOT)
-$(PKG_CONFIG_LOCAL): PKG_CONFIG_INCDIR = $(XEN_libxenctrl)/include
-$(PKG_CONFIG_LOCAL): PKG_CONFIG_LIBDIR = $(CURDIR)
-$(PKG_CONFIG_LOCAL): PKG_CONFIG_CFLAGS_LOCAL = $(CFLAGS_xeninclude)
-
-.PHONY: all
-all: build
-
-.PHONY: build
-build:
-       $(MAKE) libs
-
-.PHONY: libs
-libs: $(LIB) $(PKG_CONFIG_INST) $(PKG_CONFIG_LOCAL)
-
-.PHONY: install
-install: build
-       $(INSTALL_DIR) $(DESTDIR)$(libdir)
-       $(INSTALL_DIR) $(DESTDIR)$(includedir)
-       $(INSTALL_SHLIB) libxenguest.so.$(MAJOR).$(MINOR) $(DESTDIR)$(libdir)
-       $(INSTALL_DATA) libxenguest.a $(DESTDIR)$(libdir)
-       $(SYMLINK_SHLIB) libxenguest.so.$(MAJOR).$(MINOR) $(DESTDIR)$(libdir)/libxenguest.so.$(MAJOR)
-       $(SYMLINK_SHLIB) libxenguest.so.$(MAJOR) $(DESTDIR)$(libdir)/libxenguest.so
-       $(INSTALL_DATA) include/xenguest.h $(DESTDIR)$(includedir)
-       $(INSTALL_DATA) xenguest.pc $(DESTDIR)$(PKG_INSTALLDIR)
-
-.PHONY: uninstall
-uninstall:
-       rm -f $(DESTDIR)$(PKG_INSTALLDIR)/xenguest.pc
-       rm -f $(DESTDIR)$(includedir)/xenguest.h
-       rm -f $(DESTDIR)$(libdir)/libxenguest.so
-       rm -f $(DESTDIR)$(libdir)/libxenguest.so.$(MAJOR)
-       rm -f $(DESTDIR)$(libdir)/libxenguest.so.$(MAJOR).$(MINOR)
-       rm -f $(DESTDIR)$(libdir)/libxenguest.a
-
-.PHONY: TAGS
-TAGS:
-       etags -t *.c *.h
-
-.PHONY: clean
-clean:
-       rm -rf *.rpm $(LIB) *~ $(DEPS_RM) \
-            _paths.h \
-           $(LINK_FILES) \
-           xenguest.pc \
-            $(GUEST_LIB_OBJS) $(GUEST_PIC_OBJS)
-
-.PHONY: distclean
-distclean: clean
-
-.PHONY: rpm
-rpm: build
-       rm -rf staging
-       mkdir staging
-       mkdir staging/i386
-       rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \
-               --define "_rpmdir$$PWD/staging" -bb rpm.spec
-       mv staging/i386/*.rpm .
-       rm -rf staging
-
-# libxenguest
-
-libxenguest.a: $(GUEST_LIB_OBJS)
-       $(AR) rc $@ $^
-
-libxenguest.so: libxenguest.so.$(MAJOR)
-       $(SYMLINK_SHLIB) $< $@
-libxenguest.so.$(MAJOR): libxenguest.so.$(MAJOR).$(MINOR)
-       $(SYMLINK_SHLIB) $< $@
-
-ifeq ($(CONFIG_MiniOS),y)
-zlib-options =
-else
-zlib-options = $(ZLIB)
-endif
-
-xc_dom_bzimageloader.o: CFLAGS += $(filter -D%,$(zlib-options))
-xc_dom_bzimageloader.opic: CFLAGS += $(filter -D%,$(zlib-options))
-
-libxenguest.so.$(MAJOR).$(MINOR): COMPRESSION_LIBS = $(filter -l%,$(zlib-options))
-libxenguest.so.$(MAJOR).$(MINOR): $(GUEST_PIC_OBJS)
-       $(CC) $(LDFLAGS) -Wl,$(SONAME_LDFLAG) -Wl,libxenguest.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $(GUEST_PIC_OBJS) $(COMPRESSION_LIBS) -lz $(LDLIBS_libxenevtchn) $(LDLIBS_libxenctrl) $(PTHREAD_LIBS) $(APPEND_LDFLAGS)
-
--include $(DEPS_INCLUDE)
-
diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h
deleted file mode 100644 (file)
index 4643384..0000000
+++ /dev/null
@@ -1,327 +0,0 @@
-/******************************************************************************
- * xenguest.h
- *
- * A library for guest domain management in Xen.
- *
- * Copyright (c) 2003-2004, K A Fraser.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XENGUEST_H
-#define XENGUEST_H
-
-#include <xenctrl_dom.h>
-
-#define XC_NUMA_NO_NODE   (~0U)
-
-#define XCFLAGS_LIVE      (1 << 0)
-#define XCFLAGS_DEBUG     (1 << 1)
-
-#define X86_64_B_SIZE   64 
-#define X86_32_B_SIZE   32
-
-/*
- * User not using xc_suspend_* / xc_await_suspent may not want to
- * include the full libxenevtchn API here.
- */
-struct xenevtchn_handle;
-
-/* For save's precopy_policy(). */
-struct precopy_stats
-{
-    unsigned int iteration;
-    unsigned int total_written;
-    long dirty_count; /* -1 if unknown */
-};
-
-/*
- * A precopy_policy callback may not be running in the same address
- * space as libxc an so precopy_stats is passed by value.
- */
-typedef int (*precopy_policy_t)(struct precopy_stats, void *);
-
-/* callbacks provided by xc_domain_save */
-struct save_callbacks {
-    /*
-     * Called after expiration of checkpoint interval,
-     * to suspend the guest.
-     */
-    int (*suspend)(void *data);
-
-    /*
-     * Called before and after every batch of page data sent during
-     * the precopy phase of a live migration to ask the caller what
-     * to do next based on the current state of the precopy migration.
-     *
-     * Should return one of the values listed below:
-     */
-#define XGS_POLICY_ABORT          (-1) /* Abandon the migration entirely
-                                        * and tidy up. */
-#define XGS_POLICY_CONTINUE_PRECOPY 0  /* Remain in the precopy phase. */
-#define XGS_POLICY_STOP_AND_COPY    1  /* Immediately suspend and transmit the
-                                        * remaining dirty pages. */
-    precopy_policy_t precopy_policy;
-
-    /*
-     * Called after the guest's dirty pages have been
-     *  copied into an output buffer.
-     * Callback function resumes the guest & the device model,
-     *  returns to xc_domain_save.
-     * xc_domain_save then flushes the output buffer, while the
-     *  guest continues to run.
-     */
-    int (*postcopy)(void *data);
-
-    /*
-     * Called after the memory checkpoint has been flushed
-     * out into the network. Typical actions performed in this
-     * callback include:
-     *   (a) send the saved device model state (for HVM guests),
-     *   (b) wait for checkpoint ack
-     *   (c) release the network output buffer pertaining to the acked checkpoint.
-     *   (c) sleep for the checkpoint interval.
-     *
-     * returns:
-     * 0: terminate checkpointing gracefully
-     * 1: take another checkpoint
-     */
-    int (*checkpoint)(void *data);
-
-    /*
-     * Called after the checkpoint callback.
-     *
-     * returns:
-     * 0: terminate checkpointing gracefully
-     * 1: take another checkpoint
-     */
-    int (*wait_checkpoint)(void *data);
-
-    /* Enable qemu-dm logging dirty pages to xen */
-    int (*switch_qemu_logdirty)(uint32_t domid, unsigned enable, void *data); /* HVM only */
-
-    /* to be provided as the last argument to each callback function */
-    void *data;
-};
-
-/* Type of stream.  Plain, or using a continuous replication protocol? */
-typedef enum {
-    XC_STREAM_PLAIN,
-    XC_STREAM_REMUS,
-    XC_STREAM_COLO,
-} xc_stream_type_t;
-
-/**
- * This function will save a running domain.
- *
- * @param xch a handle to an open hypervisor interface
- * @param io_fd the file descriptor to save a domain to
- * @param dom the id of the domain
- * @param flags XCFLAGS_xxx
- * @param stream_type XC_STREAM_PLAIN if the far end of the stream
- *        doesn't use checkpointing
- * @param recv_fd Only used for XC_STREAM_COLO.  Contains backchannel from
- *        the destination side.
- * @return 0 on success, -1 on failure
- */
-int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
-                   uint32_t flags, struct save_callbacks *callbacks,
-                   xc_stream_type_t stream_type, int recv_fd);
-
-/* callbacks provided by xc_domain_restore */
-struct restore_callbacks {
-    /*
-     * Called once the STATIC_DATA_END record has been received/inferred.
-     *
-     * For compatibility with older streams, provides a list of static data
-     * expected to be found in the stream, which was missing.  A higher level
-     * toolstack is responsible for providing any necessary compatibiltiy.
-     */
-#define XGR_SDD_MISSING_CPUID (1 << 0)
-#define XGR_SDD_MISSING_MSR   (1 << 1)
-    int (*static_data_done)(unsigned int missing, void *data);
-
-    /* Called after a new checkpoint to suspend the guest. */
-    int (*suspend)(void *data);
-
-    /*
-     * Called after the secondary vm is ready to resume.
-     * Callback function resumes the guest & the device model,
-     * returns to xc_domain_restore.
-     */
-    int (*postcopy)(void *data);
-
-    /*
-     * A checkpoint record has been found in the stream.
-     * returns:
-     */
-#define XGR_CHECKPOINT_ERROR    0 /* Terminate processing */
-#define XGR_CHECKPOINT_SUCCESS  1 /* Continue reading more data from the stream */
-#define XGR_CHECKPOINT_FAILOVER 2 /* Failover and resume VM */
-    int (*checkpoint)(void *data);
-
-    /*
-     * Called after the checkpoint callback.
-     *
-     * returns:
-     * 0: terminate checkpointing gracefully
-     * 1: take another checkpoint
-     */
-    int (*wait_checkpoint)(void *data);
-
-    /*
-     * callback to send store gfn and console gfn to xl
-     * if we want to resume vm before xc_domain_save()
-     * exits.
-     */
-    void (*restore_results)(xen_pfn_t store_gfn, xen_pfn_t console_gfn,
-                            void *data);
-
-    /* to be provided as the last argument to each callback function */
-    void *data;
-};
-
-/**
- * This function will restore a saved domain.
- *
- * Domain is restored in a suspended state ready to be unpaused.
- *
- * @param xch a handle to an open hypervisor interface
- * @param io_fd the file descriptor to restore a domain from
- * @param dom the id of the domain
- * @param store_evtchn the xenstore event channel for this domain to use
- * @param store_mfn filled with the gfn of the store page
- * @param store_domid the backend domain for xenstore
- * @param console_evtchn the console event channel for this domain to use
- * @param console_mfn filled with the gfn of the console page
- * @param console_domid the backend domain for xenconsole
- * @param stream_type XC_STREAM_PLAIN if the far end of the stream is using
- *        checkpointing
- * @param callbacks non-NULL to receive a callback to restore toolstack
- *        specific data
- * @param send_back_fd Only used for XC_STREAM_COLO.  Contains backchannel to
- *        the source side.
- * @return 0 on success, -1 on failure
- */
-int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
-                      unsigned int store_evtchn, unsigned long *store_mfn,
-                      uint32_t store_domid, unsigned int console_evtchn,
-                      unsigned long *console_mfn, uint32_t console_domid,
-                      xc_stream_type_t stream_type,
-                      struct restore_callbacks *callbacks, int send_back_fd);
-
-/**
- * This function will create a domain for a paravirtualized Linux
- * using file names pointing to kernel and ramdisk
- *
- * @parm xch a handle to an open hypervisor interface
- * @parm domid the id of the domain
- * @parm mem_mb memory size in megabytes
- * @parm image_name name of the kernel image file
- * @parm ramdisk_name name of the ramdisk image file
- * @parm cmdline command line string
- * @parm flags domain creation flags
- * @parm store_evtchn the store event channel for this domain to use
- * @parm store_mfn returned with the mfn of the store page
- * @parm console_evtchn the console event channel for this domain to use
- * @parm conole_mfn returned with the mfn of the console page
- * @return 0 on success, -1 on failure
- */
-int xc_linux_build(xc_interface *xch,
-                   uint32_t domid,
-                   unsigned int mem_mb,
-                   const char *image_name,
-                   const char *ramdisk_name,
-                   const char *cmdline,
-                   const char *features,
-                   unsigned long flags,
-                   unsigned int store_evtchn,
-                   unsigned long *store_mfn,
-                   unsigned int console_evtchn,
-                   unsigned long *console_mfn);
-
-/*
- * Sets *lockfd to -1.
- * Has deallocated everything even on error.
- */
-int xc_suspend_evtchn_release(xc_interface *xch,
-                              struct xenevtchn_handle *xce,
-                              uint32_t domid, int suspend_evtchn, int *lockfd);
-
-/**
- * This function eats the initial notification.
- * xce must not be used for anything else
- * See xc_suspend_evtchn_init_sane re lockfd.
- */
-int xc_suspend_evtchn_init_exclusive(xc_interface *xch,
-                                     struct xenevtchn_handle *xce,
-                                     uint32_t domid, int port, int *lockfd);
-
-/* xce must not be used for anything else */
-int xc_await_suspend(xc_interface *xch, struct xenevtchn_handle *xce,
-                     int suspend_evtchn);
-
-/**
- * The port will be signaled immediately after this call
- * The caller should check the domain status and look for the next event
- * On success, *lockfd will be set to >=0 and *lockfd must be preserved
- * and fed to xc_suspend_evtchn_release.  (On error *lockfd is
- * undefined and xc_suspend_evtchn_release is not allowed.)
- */
-int xc_suspend_evtchn_init_sane(xc_interface *xch,
-                                struct xenevtchn_handle *xce,
-                                uint32_t domid, int port, int *lockfd);
-
-int xc_mark_page_online(xc_interface *xch, unsigned long start,
-                        unsigned long end, uint32_t *status);
-
-int xc_mark_page_offline(xc_interface *xch, unsigned long start,
-                          unsigned long end, uint32_t *status);
-
-int xc_query_page_offline_status(xc_interface *xch, unsigned long start,
-                                 unsigned long end, uint32_t *status);
-
-int xc_exchange_page(xc_interface *xch, uint32_t domid, xen_pfn_t mfn);
-
-
-/**
- * Memory related information, such as PFN types, the P2M table,
- * the guest word width and the guest page table levels.
- */
-struct xc_domain_meminfo {
-    unsigned int pt_levels;
-    unsigned int guest_width;
-    xen_pfn_t *pfn_type;
-    xen_pfn_t *p2m_table;
-    unsigned long p2m_size;
-};
-
-int xc_map_domain_meminfo(xc_interface *xch, uint32_t domid,
-                          struct xc_domain_meminfo *minfo);
-
-int xc_unmap_domain_meminfo(xc_interface *xch, struct xc_domain_meminfo *mem);
-
-/**
- * This function map m2p table
- * @parm xch a handle to an open hypervisor interface
- * @parm max_mfn the max pfn
- * @parm prot the flags to map, such as read/write etc
- * @parm mfn0 return the first mfn, can be NULL
- * @return mapped m2p table on success, NULL on failure
- */
-xen_pfn_t *xc_map_m2p(xc_interface *xch,
-                      unsigned long max_mfn,
-                      int prot,
-                      unsigned long *mfn0);
-#endif /* XENGUEST_H */
diff --git a/tools/libxc/xg_cpuid_x86.c b/tools/libxc/xg_cpuid_x86.c
deleted file mode 100644 (file)
index 0f24d6d..0000000
+++ /dev/null
@@ -1,665 +0,0 @@
-/******************************************************************************
- * xc_cpuid_x86.c
- *
- * Compute cpuid of a domain.
- *
- * Copyright (c) 2008, Citrix Systems, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <stdlib.h>
-#include <stdbool.h>
-#include <limits.h>
-#include "xc_private.h"
-#include "xc_bitops.h"
-#include <xen/hvm/params.h>
-#include <xen-tools/libs.h>
-
-enum {
-#define XEN_CPUFEATURE(name, value) X86_FEATURE_##name = value,
-#include <xen/arch-x86/cpufeatureset.h>
-};
-
-#include <xen/asm/x86-vendors.h>
-
-#include <xen/lib/x86/cpu-policy.h>
-
-#define bitmaskof(idx)      (1u << ((idx) & 31))
-#define featureword_of(idx) ((idx) >> 5)
-
-int xc_get_cpu_levelling_caps(xc_interface *xch, uint32_t *caps)
-{
-    DECLARE_SYSCTL;
-    int ret;
-
-    sysctl.cmd = XEN_SYSCTL_get_cpu_levelling_caps;
-    ret = do_sysctl(xch, &sysctl);
-
-    if ( !ret )
-        *caps = sysctl.u.cpu_levelling_caps.caps;
-
-    return ret;
-}
-
-int xc_get_cpu_featureset(xc_interface *xch, uint32_t index,
-                          uint32_t *nr_features, uint32_t *featureset)
-{
-    DECLARE_SYSCTL;
-    DECLARE_HYPERCALL_BOUNCE(featureset,
-                             *nr_features * sizeof(*featureset),
-                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
-    int ret;
-
-    if ( xc_hypercall_bounce_pre(xch, featureset) )
-        return -1;
-
-    sysctl.cmd = XEN_SYSCTL_get_cpu_featureset;
-    sysctl.u.cpu_featureset.index = index;
-    sysctl.u.cpu_featureset.nr_features = *nr_features;
-    set_xen_guest_handle(sysctl.u.cpu_featureset.features, featureset);
-
-    ret = do_sysctl(xch, &sysctl);
-
-    xc_hypercall_bounce_post(xch, featureset);
-
-    if ( !ret )
-        *nr_features = sysctl.u.cpu_featureset.nr_features;
-
-    return ret;
-}
-
-uint32_t xc_get_cpu_featureset_size(void)
-{
-    return FEATURESET_NR_ENTRIES;
-}
-
-const uint32_t *xc_get_static_cpu_featuremask(
-    enum xc_static_cpu_featuremask mask)
-{
-    static const uint32_t masks[][FEATURESET_NR_ENTRIES] = {
-#define MASK(x) [XC_FEATUREMASK_ ## x] = INIT_ ## x ## _FEATURES
-
-        MASK(KNOWN),
-        MASK(SPECIAL),
-        MASK(PV_MAX),
-        MASK(PV_DEF),
-        MASK(HVM_SHADOW_MAX),
-        MASK(HVM_SHADOW_DEF),
-        MASK(HVM_HAP_MAX),
-        MASK(HVM_HAP_DEF),
-
-#undef MASK
-    };
-
-    if ( (unsigned int)mask >= ARRAY_SIZE(masks) )
-        return NULL;
-
-    return masks[mask];
-}
-
-int xc_get_cpu_policy_size(xc_interface *xch, uint32_t *nr_leaves,
-                           uint32_t *nr_msrs)
-{
-    struct xen_sysctl sysctl = {};
-    int ret;
-
-    sysctl.cmd = XEN_SYSCTL_get_cpu_policy;
-
-    ret = do_sysctl(xch, &sysctl);
-
-    if ( !ret )
-    {
-        *nr_leaves = sysctl.u.cpu_policy.nr_leaves;
-        *nr_msrs = sysctl.u.cpu_policy.nr_msrs;
-    }
-
-    return ret;
-}
-
-int xc_get_system_cpu_policy(xc_interface *xch, uint32_t index,
-                             uint32_t *nr_leaves, xen_cpuid_leaf_t *leaves,
-                             uint32_t *nr_msrs, xen_msr_entry_t *msrs)
-{
-    struct xen_sysctl sysctl = {};
-    DECLARE_HYPERCALL_BOUNCE(leaves,
-                             *nr_leaves * sizeof(*leaves),
-                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
-    DECLARE_HYPERCALL_BOUNCE(msrs,
-                             *nr_msrs * sizeof(*msrs),
-                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
-    int ret;
-
-    if ( xc_hypercall_bounce_pre(xch, leaves) ||
-         xc_hypercall_bounce_pre(xch, msrs) )
-        return -1;
-
-    sysctl.cmd = XEN_SYSCTL_get_cpu_policy;
-    sysctl.u.cpu_policy.index = index;
-    sysctl.u.cpu_policy.nr_leaves = *nr_leaves;
-    set_xen_guest_handle(sysctl.u.cpu_policy.cpuid_policy, leaves);
-    sysctl.u.cpu_policy.nr_msrs = *nr_msrs;
-    set_xen_guest_handle(sysctl.u.cpu_policy.msr_policy, msrs);
-
-    ret = do_sysctl(xch, &sysctl);
-
-    xc_hypercall_bounce_post(xch, leaves);
-    xc_hypercall_bounce_post(xch, msrs);
-
-    if ( !ret )
-    {
-        *nr_leaves = sysctl.u.cpu_policy.nr_leaves;
-        *nr_msrs = sysctl.u.cpu_policy.nr_msrs;
-    }
-
-    return ret;
-}
-
-int xc_get_domain_cpu_policy(xc_interface *xch, uint32_t domid,
-                             uint32_t *nr_leaves, xen_cpuid_leaf_t *leaves,
-                             uint32_t *nr_msrs, xen_msr_entry_t *msrs)
-{
-    DECLARE_DOMCTL;
-    DECLARE_HYPERCALL_BOUNCE(leaves,
-                             *nr_leaves * sizeof(*leaves),
-                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
-    DECLARE_HYPERCALL_BOUNCE(msrs,
-                             *nr_msrs * sizeof(*msrs),
-                             XC_HYPERCALL_BUFFER_BOUNCE_OUT);
-    int ret;
-
-    if ( xc_hypercall_bounce_pre(xch, leaves) ||
-         xc_hypercall_bounce_pre(xch, msrs) )
-        return -1;
-
-    domctl.cmd = XEN_DOMCTL_get_cpu_policy;
-    domctl.domain = domid;
-    domctl.u.cpu_policy.nr_leaves = *nr_leaves;
-    set_xen_guest_handle(domctl.u.cpu_policy.cpuid_policy, leaves);
-    domctl.u.cpu_policy.nr_msrs = *nr_msrs;
-    set_xen_guest_handle(domctl.u.cpu_policy.msr_policy, msrs);
-
-    ret = do_domctl(xch, &domctl);
-
-    xc_hypercall_bounce_post(xch, leaves);
-    xc_hypercall_bounce_post(xch, msrs);
-
-    if ( !ret )
-    {
-        *nr_leaves = domctl.u.cpu_policy.nr_leaves;
-        *nr_msrs = domctl.u.cpu_policy.nr_msrs;
-    }
-
-    return ret;
-}
-
-int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid,
-                             uint32_t nr_leaves, xen_cpuid_leaf_t *leaves,
-                             uint32_t nr_msrs, xen_msr_entry_t *msrs,
-                             uint32_t *err_leaf_p, uint32_t *err_subleaf_p,
-                             uint32_t *err_msr_p)
-{
-    DECLARE_DOMCTL;
-    DECLARE_HYPERCALL_BOUNCE(leaves,
-                             nr_leaves * sizeof(*leaves),
-                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
-    DECLARE_HYPERCALL_BOUNCE(msrs,
-                             nr_msrs * sizeof(*msrs),
-                             XC_HYPERCALL_BUFFER_BOUNCE_IN);
-    int ret;
-
-    if ( err_leaf_p )
-        *err_leaf_p = -1;
-    if ( err_subleaf_p )
-        *err_subleaf_p = -1;
-    if ( err_msr_p )
-        *err_msr_p = -1;
-
-    if ( xc_hypercall_bounce_pre(xch, leaves) )
-        return -1;
-
-    if ( xc_hypercall_bounce_pre(xch, msrs) )
-        return -1;
-
-    domctl.cmd = XEN_DOMCTL_set_cpu_policy;
-    domctl.domain = domid;
-    domctl.u.cpu_policy.nr_leaves = nr_leaves;
-    set_xen_guest_handle(domctl.u.cpu_policy.cpuid_policy, leaves);
-    domctl.u.cpu_policy.nr_msrs = nr_msrs;
-    set_xen_guest_handle(domctl.u.cpu_policy.msr_policy, msrs);
-    domctl.u.cpu_policy.err_leaf = -1;
-    domctl.u.cpu_policy.err_subleaf = -1;
-    domctl.u.cpu_policy.err_msr = -1;
-
-    ret = do_domctl(xch, &domctl);
-
-    xc_hypercall_bounce_post(xch, leaves);
-    xc_hypercall_bounce_post(xch, msrs);
-
-    if ( err_leaf_p )
-        *err_leaf_p = domctl.u.cpu_policy.err_leaf;
-    if ( err_subleaf_p )
-        *err_subleaf_p = domctl.u.cpu_policy.err_subleaf;
-    if ( err_msr_p )
-        *err_msr_p = domctl.u.cpu_policy.err_msr;
-
-    return ret;
-}
-
-static int compare_leaves(const void *l, const void *r)
-{
-    const xen_cpuid_leaf_t *lhs = l;
-    const xen_cpuid_leaf_t *rhs = r;
-
-    if ( lhs->leaf != rhs->leaf )
-        return lhs->leaf < rhs->leaf ? -1 : 1;
-
-    if ( lhs->subleaf != rhs->subleaf )
-        return lhs->subleaf < rhs->subleaf ? -1 : 1;
-
-    return 0;
-}
-
-static xen_cpuid_leaf_t *find_leaf(
-    xen_cpuid_leaf_t *leaves, unsigned int nr_leaves,
-    const struct xc_xend_cpuid *xend)
-{
-    const xen_cpuid_leaf_t key = { xend->leaf, xend->subleaf };
-
-    return bsearch(&key, leaves, nr_leaves, sizeof(*leaves), compare_leaves);
-}
-
-static int xc_cpuid_xend_policy(
-    xc_interface *xch, uint32_t domid, const struct xc_xend_cpuid *xend)
-{
-    int rc;
-    xc_dominfo_t di;
-    unsigned int nr_leaves, nr_msrs;
-    uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1;
-    /*
-     * Three full policies.  The host, domain max, and domain current for the
-     * domain type.
-     */
-    xen_cpuid_leaf_t *host = NULL, *max = NULL, *cur = NULL;
-    unsigned int nr_host, nr_max, nr_cur;
-
-    if ( xc_domain_getinfo(xch, domid, 1, &di) != 1 ||
-         di.domid != domid )
-    {
-        ERROR("Failed to obtain d%d info", domid);
-        rc = -ESRCH;
-        goto fail;
-    }
-
-    rc = xc_get_cpu_policy_size(xch, &nr_leaves, &nr_msrs);
-    if ( rc )
-    {
-        PERROR("Failed to obtain policy info size");
-        rc = -errno;
-        goto fail;
-    }
-
-    rc = -ENOMEM;
-    if ( (host = calloc(nr_leaves, sizeof(*host))) == NULL ||
-         (max  = calloc(nr_leaves, sizeof(*max)))  == NULL ||
-         (cur  = calloc(nr_leaves, sizeof(*cur)))  == NULL )
-    {
-        ERROR("Unable to allocate memory for %u CPUID leaves", nr_leaves);
-        goto fail;
-    }
-
-    /* Get the domain's current policy. */
-    nr_msrs = 0;
-    nr_cur = nr_leaves;
-    rc = xc_get_domain_cpu_policy(xch, domid, &nr_cur, cur, &nr_msrs, NULL);
-    if ( rc )
-    {
-        PERROR("Failed to obtain d%d current policy", domid);
-        rc = -errno;
-        goto fail;
-    }
-
-    /* Get the domain's max policy. */
-    nr_msrs = 0;
-    nr_max = nr_leaves;
-    rc = xc_get_system_cpu_policy(xch, di.hvm ? XEN_SYSCTL_cpu_policy_hvm_max
-                                              : XEN_SYSCTL_cpu_policy_pv_max,
-                                  &nr_max, max, &nr_msrs, NULL);
-    if ( rc )
-    {
-        PERROR("Failed to obtain %s max policy", di.hvm ? "hvm" : "pv");
-        rc = -errno;
-        goto fail;
-    }
-
-    /* Get the host policy. */
-    nr_msrs = 0;
-    nr_host = nr_leaves;
-    rc = xc_get_system_cpu_policy(xch, XEN_SYSCTL_cpu_policy_host,
-                                  &nr_host, host, &nr_msrs, NULL);
-    if ( rc )
-    {
-        PERROR("Failed to obtain host policy");
-        rc = -errno;
-        goto fail;
-    }
-
-    rc = -EINVAL;
-    for ( ; xend->leaf != XEN_CPUID_INPUT_UNUSED; ++xend )
-    {
-        xen_cpuid_leaf_t *cur_leaf = find_leaf(cur, nr_cur, xend);
-        const xen_cpuid_leaf_t *max_leaf = find_leaf(max, nr_max, xend);
-        const xen_cpuid_leaf_t *host_leaf = find_leaf(host, nr_host, xend);
-
-        if ( cur_leaf == NULL || max_leaf == NULL || host_leaf == NULL )
-        {
-            ERROR("Missing leaf %#x, subleaf %#x", xend->leaf, xend->subleaf);
-            goto fail;
-        }
-
-        for ( unsigned int i = 0; i < ARRAY_SIZE(xend->policy); i++ )
-        {
-            uint32_t *cur_reg = &cur_leaf->a + i;
-            const uint32_t *max_reg = &max_leaf->a + i;
-            const uint32_t *host_reg = &host_leaf->a + i;
-
-            if ( xend->policy[i] == NULL )
-                continue;
-
-            for ( unsigned int j = 0; j < 32; j++ )
-            {
-                bool val;
-
-                if ( xend->policy[i][j] == '1' )
-                    val = true;
-                else if ( xend->policy[i][j] == '0' )
-                    val = false;
-                else if ( xend->policy[i][j] == 'x' )
-                    val = test_bit(31 - j, max_reg);
-                else if ( xend->policy[i][j] == 'k' ||
-                          xend->policy[i][j] == 's' )
-                    val = test_bit(31 - j, host_reg);
-                else
-                {
-                    ERROR("Bad character '%c' in policy[%d] string '%s'",
-                          xend->policy[i][j], i, xend->policy[i]);
-                    goto fail;
-                }
-
-                clear_bit(31 - j, cur_reg);
-                if ( val )
-                    set_bit(31 - j, cur_reg);
-            }
-        }
-    }
-
-    /* Feed the transformed currrent policy back up to Xen. */
-    rc = xc_set_domain_cpu_policy(xch, domid, nr_cur, cur, 0, NULL,
-                                  &err_leaf, &err_subleaf, &err_msr);
-    if ( rc )
-    {
-        PERROR("Failed to set d%d's policy (err leaf %#x, subleaf %#x, msr %#x)",
-               domid, err_leaf, err_subleaf, err_msr);
-        rc = -errno;
-        goto fail;
-    }
-
-    /* Success! */
-
- fail:
-    free(cur);
-    free(max);
-    free(host);
-
-    return rc;
-}
-
-int xc_cpuid_apply_policy(xc_interface *xch, uint32_t domid, bool restore,
-                          const uint32_t *featureset, unsigned int nr_features,
-                          bool pae,
-                          const struct xc_xend_cpuid *xend)
-{
-    int rc;
-    xc_dominfo_t di;
-    unsigned int i, nr_leaves, nr_msrs;
-    xen_cpuid_leaf_t *leaves = NULL;
-    struct cpuid_policy *p = NULL;
-    uint32_t err_leaf = -1, err_subleaf = -1, err_msr = -1;
-    uint32_t host_featureset[FEATURESET_NR_ENTRIES] = {};
-    uint32_t len = ARRAY_SIZE(host_featureset);
-
-    if ( xc_domain_getinfo(xch, domid, 1, &di) != 1 ||
-         di.domid != domid )
-    {
-        ERROR("Failed to obtain d%d info", domid);
-        rc = -ESRCH;
-        goto out;
-    }
-
-    rc = xc_get_cpu_policy_size(xch, &nr_leaves, &nr_msrs);
-    if ( rc )
-    {
-        PERROR("Failed to obtain policy info size");
-        rc = -errno;
-        goto out;
-    }
-
-    rc = -ENOMEM;
-    if ( (leaves = calloc(nr_leaves, sizeof(*leaves))) == NULL ||
-         (p = calloc(1, sizeof(*p))) == NULL )
-        goto out;
-
-    /* Get the host policy. */
-    rc = xc_get_cpu_featureset(xch, XEN_SYSCTL_cpu_featureset_host,
-                               &len, host_featureset);
-    if ( rc )
-    {
-        /* Tolerate "buffer too small", as we've got the bits we need. */
-        if ( errno == ENOBUFS )
-            rc = 0;
-        else
-        {
-            PERROR("Failed to obtain host featureset");
-            rc = -errno;
-            goto out;
-        }
-    }
-
-    /* Get the domain's default policy. */
-    nr_msrs = 0;
-    rc = xc_get_system_cpu_policy(xch, di.hvm ? XEN_SYSCTL_cpu_policy_hvm_default
-                                              : XEN_SYSCTL_cpu_policy_pv_default,
-                                  &nr_leaves, leaves, &nr_msrs, NULL);
-    if ( rc )
-    {
-        PERROR("Failed to obtain %s default policy", di.hvm ? "hvm" : "pv");
-        rc = -errno;
-        goto out;
-    }
-
-    rc = x86_cpuid_copy_from_buffer(p, leaves, nr_leaves,
-                                    &err_leaf, &err_subleaf);
-    if ( rc )
-    {
-        ERROR("Failed to deserialise CPUID (err leaf %#x, subleaf %#x) (%d = %s)",
-              err_leaf, err_subleaf, -rc, strerror(-rc));
-        goto out;
-    }
-
-    /*
-     * Account for feature which have been disabled by default since Xen 4.13,
-     * so migrated-in VM's don't risk seeing features disappearing.
-     */
-    if ( restore )
-    {
-        p->basic.rdrand = test_bit(X86_FEATURE_RDRAND, host_featureset);
-
-        if ( di.hvm )
-        {
-            p->feat.mpx = test_bit(X86_FEATURE_MPX, host_featureset);
-        }
-    }
-
-    if ( featureset )
-    {
-        uint32_t disabled_features[FEATURESET_NR_ENTRIES],
-            feat[FEATURESET_NR_ENTRIES] = {};
-        static const uint32_t deep_features[] = INIT_DEEP_FEATURES;
-        unsigned int i, b;
-
-        /*
-         * The user supplied featureset may be shorter or longer than
-         * FEATURESET_NR_ENTRIES.  Shorter is fine, and we will zero-extend.
-         * Longer is fine, so long as it only padded with zeros.
-         */
-        unsigned int user_len = min(FEATURESET_NR_ENTRIES + 0u, nr_features);
-
-        /* Check for truncated set bits. */
-        rc = -EOPNOTSUPP;
-        for ( i = user_len; i < nr_features; ++i )
-            if ( featureset[i] != 0 )
-                goto out;
-
-        memcpy(feat, featureset, sizeof(*featureset) * user_len);
-
-        /* Disable deep dependencies of disabled features. */
-        for ( i = 0; i < ARRAY_SIZE(disabled_features); ++i )
-            disabled_features[i] = ~feat[i] & deep_features[i];
-
-        for ( b = 0; b < sizeof(disabled_features) * CHAR_BIT; ++b )
-        {
-            const uint32_t *dfs;
-
-            if ( !test_bit(b, disabled_features) ||
-                 !(dfs = x86_cpuid_lookup_deep_deps(b)) )
-                continue;
-
-            for ( i = 0; i < ARRAY_SIZE(disabled_features); ++i )
-            {
-                feat[i] &= ~dfs[i];
-                disabled_features[i] &= ~dfs[i];
-            }
-        }
-
-        cpuid_featureset_to_policy(feat, p);
-    }
-    else
-    {
-        if ( di.hvm )
-            p->basic.pae = pae;
-    }
-
-    if ( !di.hvm )
-    {
-        /*
-         * On hardware without CPUID Faulting, PV guests see real topology.
-         * As a consequence, they also need to see the host htt/cmp fields.
-         */
-        p->basic.htt       = test_bit(X86_FEATURE_HTT, host_featureset);
-        p->extd.cmp_legacy = test_bit(X86_FEATURE_CMP_LEGACY, host_featureset);
-    }
-    else
-    {
-        /*
-         * Topology for HVM guests is entirely controlled by Xen.  For now, we
-         * hardcode APIC_ID = vcpu_id * 2 to give the illusion of no SMT.
-         */
-        p->basic.htt = true;
-        p->extd.cmp_legacy = false;
-
-        /*
-         * Leaf 1 EBX[23:16] is Maximum Logical Processors Per Package.
-         * Update to reflect vLAPIC_ID = vCPU_ID * 2, but make sure to avoid
-         * overflow.
-         */
-        if ( !(p->basic.lppp & 0x80) )
-            p->basic.lppp *= 2;
-
-        switch ( p->x86_vendor )
-        {
-        case X86_VENDOR_INTEL:
-            for ( i = 0; (p->cache.subleaf[i].type &&
-                          i < ARRAY_SIZE(p->cache.raw)); ++i )
-            {
-                p->cache.subleaf[i].cores_per_package =
-                    (p->cache.subleaf[i].cores_per_package << 1) | 1;
-                p->cache.subleaf[i].threads_per_cache = 0;
-            }
-            break;
-
-        case X86_VENDOR_AMD:
-        case X86_VENDOR_HYGON:
-            /*
-             * Leaf 0x80000008 ECX[15:12] is ApicIdCoreSize.
-             * Leaf 0x80000008 ECX[7:0] is NumberOfCores (minus one).
-             * Update to reflect vLAPIC_ID = vCPU_ID * 2.  But avoid
-             * - overflow,
-             * - going out of sync with leaf 1 EBX[23:16],
-             * - incrementing ApicIdCoreSize when it's zero (which changes the
-             *   meaning of bits 7:0).
-             *
-             * UPDATE: I addition to avoiding overflow, some
-             * proprietary operating systems have trouble with
-             * apic_id_size values greater than 7.  Limit the value to
-             * 7 for now.
-             */
-            if ( p->extd.nc < 0x7f )
-            {
-                if ( p->extd.apic_id_size != 0 && p->extd.apic_id_size < 0x7 )
-                    p->extd.apic_id_size++;
-
-                p->extd.nc = (p->extd.nc << 1) | 1;
-            }
-            break;
-        }
-
-        /*
-         * These settings are necessary to cause earlier HVM_PARAM_NESTEDHVM /
-         * XEN_DOMCTL_disable_migrate settings to be reflected correctly in
-         * CPUID.  Xen will discard these bits if configuration hasn't been
-         * set for the domain.
-         */
-        p->extd.itsc = true;
-        p->basic.vmx = true;
-        p->extd.svm = true;
-    }
-
-    rc = x86_cpuid_copy_to_buffer(p, leaves, &nr_leaves);
-    if ( rc )
-    {
-        ERROR("Failed to serialise CPUID (%d = %s)", -rc, strerror(-rc));
-        goto out;
-    }
-
-    rc = xc_set_domain_cpu_policy(xch, domid, nr_leaves, leaves, 0, NULL,
-                                  &err_leaf, &err_subleaf, &err_msr);
-    if ( rc )
-    {
-        PERROR("Failed to set d%d's policy (err leaf %#x, subleaf %#x, msr %#x)",
-               domid, err_leaf, err_subleaf, err_msr);
-        rc = -errno;
-        goto out;
-    }
-
-    if ( xend && (rc = xc_cpuid_xend_policy(xch, domid, xend)) )
-        goto out;
-
-    rc = 0;
-
-out:
-    free(p);
-    free(leaves);
-
-    return rc;
-}
diff --git a/tools/libxc/xg_dom_arm.c b/tools/libxc/xg_dom_arm.c
deleted file mode 100644 (file)
index 3f66f1d..0000000
+++ /dev/null
@@ -1,552 +0,0 @@
-/*
- * Xen domain builder -- ARM
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- *
- * Copyright (c) 2011, Citrix Systems
- */
-#include <inttypes.h>
-#include <assert.h>
-
-#include <xen/xen.h>
-#include <xen/io/protocols.h>
-#include <xen-tools/libs.h>
-
-#include "xg_private.h"
-#include "xenctrl_dom.h"
-
-#define NR_MAGIC_PAGES 4
-#define CONSOLE_PFN_OFFSET 0
-#define XENSTORE_PFN_OFFSET 1
-#define MEMACCESS_PFN_OFFSET 2
-#define VUART_PFN_OFFSET 3
-
-#define LPAE_SHIFT 9
-
-#define PFN_4K_SHIFT  (0)
-#define PFN_2M_SHIFT  (PFN_4K_SHIFT+LPAE_SHIFT)
-#define PFN_1G_SHIFT  (PFN_2M_SHIFT+LPAE_SHIFT)
-#define PFN_512G_SHIFT (PFN_1G_SHIFT+LPAE_SHIFT)
-
-/* get guest IO ABI protocol */
-const char *xc_domain_get_native_protocol(xc_interface *xch,
-                                          uint32_t domid)
-{
-    return XEN_IO_PROTO_ABI_ARM;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static int alloc_magic_pages(struct xc_dom_image *dom)
-{
-    int rc, i;
-    const xen_pfn_t base = GUEST_MAGIC_BASE >> XC_PAGE_SHIFT;
-    xen_pfn_t p2m[NR_MAGIC_PAGES];
-
-    BUILD_BUG_ON(NR_MAGIC_PAGES > GUEST_MAGIC_SIZE >> XC_PAGE_SHIFT);
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    for (i = 0; i < NR_MAGIC_PAGES; i++)
-        p2m[i] = base + i;
-
-    rc = xc_domain_populate_physmap_exact(
-            dom->xch, dom->guest_domid, NR_MAGIC_PAGES,
-            0, 0, p2m);
-    if ( rc < 0 )
-        return rc;
-
-    dom->console_pfn = base + CONSOLE_PFN_OFFSET;
-    dom->xenstore_pfn = base + XENSTORE_PFN_OFFSET;
-    dom->vuart_gfn = base + VUART_PFN_OFFSET;
-
-    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->console_pfn);
-    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->xenstore_pfn);
-    xc_clear_domain_page(dom->xch, dom->guest_domid, base + MEMACCESS_PFN_OFFSET);
-    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->vuart_gfn);
-
-    xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_PFN,
-            dom->console_pfn);
-    xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_STORE_PFN,
-            dom->xenstore_pfn);
-    xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_MONITOR_RING_PFN,
-            base + MEMACCESS_PFN_OFFSET);
-    /* allocated by toolstack */
-    xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_CONSOLE_EVTCHN,
-            dom->console_evtchn);
-    xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_STORE_EVTCHN,
-            dom->xenstore_evtchn);
-
-    return 0;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static int start_info_arm(struct xc_dom_image *dom)
-{
-    DOMPRINTF_CALLED(dom->xch);
-    return 0;
-}
-
-static int shared_info_arm(struct xc_dom_image *dom, void *ptr)
-{
-    DOMPRINTF_CALLED(dom->xch);
-    return 0;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static int vcpu_arm32(struct xc_dom_image *dom)
-{
-    vcpu_guest_context_any_t any_ctx;
-    vcpu_guest_context_t *ctxt = &any_ctx.c;
-    int rc;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    /* clear everything */
-    memset(ctxt, 0, sizeof(*ctxt));
-
-    ctxt->user_regs.pc32 = dom->parms.virt_entry;
-
-    /* Linux boot protocol. See linux.Documentation/arm/Booting. */
-    ctxt->user_regs.r0_usr = 0; /* SBZ */
-    /* Machine ID: We use DTB therefore no machine id */
-    ctxt->user_regs.r1_usr = 0xffffffff;
-    /* ATAGS/DTB: We currently require that the guest kernel to be
-     * using CONFIG_ARM_APPENDED_DTB. Ensure that r2 does not look
-     * like a valid pointer to a set of ATAGS or a DTB.
-     */
-    ctxt->user_regs.r2_usr = dom->devicetree_blob ?
-        dom->devicetree_seg.vstart : 0xffffffff;
-
-    ctxt->sctlr = SCTLR_GUEST_INIT;
-
-    ctxt->ttbr0 = 0;
-    ctxt->ttbr1 = 0;
-    ctxt->ttbcr = 0; /* Defined Reset Value */
-
-    ctxt->user_regs.cpsr = PSR_GUEST32_INIT;
-
-    ctxt->flags = VGCF_online;
-
-    DOMPRINTF("Initial state CPSR %#"PRIx32" PC %#"PRIx32,
-           ctxt->user_regs.cpsr, ctxt->user_regs.pc32);
-
-    rc = xc_vcpu_setcontext(dom->xch, dom->guest_domid, 0, &any_ctx);
-    if ( rc != 0 )
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: SETVCPUCONTEXT failed (rc=%d)", __func__, rc);
-
-    return rc;
-}
-
-static int vcpu_arm64(struct xc_dom_image *dom)
-{
-    vcpu_guest_context_any_t any_ctx;
-    vcpu_guest_context_t *ctxt = &any_ctx.c;
-    int rc;
-
-    DOMPRINTF_CALLED(dom->xch);
-    /* clear everything */
-    memset(ctxt, 0, sizeof(*ctxt));
-
-    ctxt->user_regs.pc64 = dom->parms.virt_entry;
-
-    /* Linux boot protocol. See linux.Documentation/arm64/booting.txt. */
-    ctxt->user_regs.x0 = dom->devicetree_blob ?
-        dom->devicetree_seg.vstart : 0xffffffff;
-    ctxt->user_regs.x1 = 0;
-    ctxt->user_regs.x2 = 0;
-    ctxt->user_regs.x3 = 0;
-
-    DOMPRINTF("DTB %"PRIx64, ctxt->user_regs.x0);
-
-    ctxt->sctlr = SCTLR_GUEST_INIT;
-
-    ctxt->ttbr0 = 0;
-    ctxt->ttbr1 = 0;
-    ctxt->ttbcr = 0; /* Defined Reset Value */
-
-    ctxt->user_regs.cpsr = PSR_GUEST64_INIT;
-
-    ctxt->flags = VGCF_online;
-
-    DOMPRINTF("Initial state CPSR %#"PRIx32" PC %#"PRIx64,
-           ctxt->user_regs.cpsr, ctxt->user_regs.pc64);
-
-    rc = xc_vcpu_setcontext(dom->xch, dom->guest_domid, 0, &any_ctx);
-    if ( rc != 0 )
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: SETVCPUCONTEXT failed (rc=%d)", __func__, rc);
-
-    return rc;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static int set_mode(xc_interface *xch, uint32_t domid, char *guest_type)
-{
-    static const struct {
-        char           *guest;
-        uint32_t        size;
-    } types[] = {
-        { "xen-3.0-aarch64", 64 },
-        { "xen-3.0-armv7l",  32 },
-    };
-    DECLARE_DOMCTL;
-    int i,rc;
-
-    domctl.domain = domid;
-    domctl.cmd    = XEN_DOMCTL_set_address_size;
-    domctl.u.address_size.size = 0;
-
-    for ( i = 0; i < ARRAY_SIZE(types); i++ )
-        if ( !strcmp(types[i].guest, guest_type) )
-            domctl.u.address_size.size = types[i].size;
-    if ( domctl.u.address_size.size == 0 )
-    {
-        xc_dom_printf(xch, "%s: warning: unknown guest type %s",
-                      __FUNCTION__, guest_type);
-        return -EINVAL;
-    }
-
-    xc_dom_printf(xch, "%s: guest %s, address size %" PRId32 "", __FUNCTION__,
-                  guest_type, domctl.u.address_size.size);
-    rc = do_domctl(xch, &domctl);
-    if ( rc != 0 )
-        xc_dom_printf(xch, "%s: warning: failed (rc=%d)",
-                      __FUNCTION__, rc);
-    return rc;
-}
-
-/*  >0: success, *nr_pfns set to number actually populated
- *   0: didn't try with this pfn shift (e.g. misaligned base etc)
- *  <0: ERROR
- */
-static int populate_one_size(struct xc_dom_image *dom, int pfn_shift,
-                             xen_pfn_t base_pfn, xen_pfn_t *nr_pfns,
-                             xen_pfn_t *extents)
-{
-    /* The mask for this level */
-    const uint64_t mask = ((uint64_t)1<<(pfn_shift))-1;
-    /* The shift, mask and next boundary for the level above this one */
-    const int next_shift = pfn_shift + LPAE_SHIFT;
-    const uint64_t next_mask = ((uint64_t)1<<next_shift)-1;
-    const xen_pfn_t next_boundary
-        = (base_pfn + ((uint64_t)1<<next_shift)) & ~next_mask;
-
-    int nr, i, count;
-    xen_pfn_t end_pfn = base_pfn + *nr_pfns;
-
-    /* No level zero super pages with current hardware */
-    if ( pfn_shift == PFN_512G_SHIFT )
-        return 0;
-
-    /* base is misaligned for this level */
-    if ( mask & base_pfn )
-        return 0;
-
-    /*
-     * If base is not aligned at the next level up then try and make
-     * it so for next time around.
-     */
-    if ( (base_pfn & next_mask) && end_pfn > next_boundary )
-        end_pfn = next_boundary;
-
-    count = ( end_pfn - base_pfn ) >> pfn_shift;
-
-    /* Nothing to allocate */
-    if ( !count )
-        return 0;
-
-    for ( i = 0 ; i < count ; i ++ )
-        extents[i] = base_pfn + (i<<pfn_shift);
-
-    nr = xc_domain_populate_physmap(dom->xch, dom->guest_domid, count,
-                                    pfn_shift, 0, extents);
-    if ( nr <= 0 ) return nr;
-    DOMPRINTF("%s: populated %#x/%#x entries with shift %d",
-              __FUNCTION__, nr, count, pfn_shift);
-
-    *nr_pfns = nr << pfn_shift;
-
-    return 1;
-}
-
-static int populate_guest_memory(struct xc_dom_image *dom,
-                                 xen_pfn_t base_pfn, xen_pfn_t nr_pfns)
-{
-    int rc = 0;
-    xen_pfn_t allocsz, pfn, *extents;
-
-    extents = calloc(1024*1024,sizeof(xen_pfn_t));
-    if ( extents == NULL )
-    {
-        DOMPRINTF("%s: Unable to allocate extent array", __FUNCTION__);
-        return -1;
-    }
-
-    DOMPRINTF("%s: populating RAM @ %016"PRIx64"-%016"PRIx64" (%"PRId64"MB)",
-              __FUNCTION__,
-              (uint64_t)base_pfn << XC_PAGE_SHIFT,
-              (uint64_t)(base_pfn + nr_pfns) << XC_PAGE_SHIFT,
-              (uint64_t)nr_pfns >> (20-XC_PAGE_SHIFT));
-
-    for ( pfn = 0; pfn < nr_pfns; pfn += allocsz )
-    {
-        allocsz = min_t(int, 1024*1024, nr_pfns - pfn);
-#if 0 /* Enable this to exercise/debug the code which tries to realign
-       * to a superpage boundary, by misaligning at the start. */
-        if ( pfn == 0 )
-        {
-            allocsz = 1;
-            rc = populate_one_size(dom, PFN_4K_SHIFT,
-                                   base_pfn + pfn, &allocsz, extents);
-            if (rc < 0) break;
-            if (rc > 0) continue;
-            /* Failed to allocate a single page? */
-            break;
-        }
-#endif
-
-        rc = populate_one_size(dom, PFN_512G_SHIFT,
-                               base_pfn + pfn, &allocsz, extents);
-        if ( rc < 0 ) break;
-        if ( rc > 0 ) continue;
-
-        rc = populate_one_size(dom, PFN_1G_SHIFT,
-                               base_pfn + pfn, &allocsz, extents);
-        if ( rc < 0 ) break;
-        if ( rc > 0 ) continue;
-
-        rc = populate_one_size(dom, PFN_2M_SHIFT,
-                               base_pfn + pfn, &allocsz, extents);
-        if ( rc < 0 ) break;
-        if ( rc > 0 ) continue;
-
-        rc = populate_one_size(dom, PFN_4K_SHIFT,
-                               base_pfn + pfn, &allocsz, extents);
-        if ( rc < 0 ) break;
-        if ( rc == 0 )
-        {
-            DOMPRINTF("%s: Not enough RAM", __FUNCTION__);
-            errno = ENOMEM;
-            rc = -1;
-            goto out;
-        }
-    }
-
-out:
-    free(extents);
-    return rc < 0 ? rc : 0;
-}
-
-static int meminit(struct xc_dom_image *dom)
-{
-    int i, rc;
-    uint64_t modbase;
-
-    uint64_t ramsize = (uint64_t)dom->total_pages << XC_PAGE_SHIFT;
-
-    const uint64_t bankbase[] = GUEST_RAM_BANK_BASES;
-    const uint64_t bankmax[] = GUEST_RAM_BANK_SIZES;
-
-    /* Convenient */
-    const uint64_t kernbase = dom->kernel_seg.vstart;
-    const uint64_t kernend = ROUNDUP(dom->kernel_seg.vend, 21/*2MB*/);
-    const uint64_t kernsize = kernend - kernbase;
-    const uint64_t dtb_size = dom->devicetree_blob ?
-        ROUNDUP(dom->devicetree_size, XC_PAGE_SHIFT) : 0;
-    const uint64_t ramdisk_size = dom->modules[0].blob ?
-        ROUNDUP(dom->modules[0].size, XC_PAGE_SHIFT) : 0;
-    const uint64_t modsize = dtb_size + ramdisk_size;
-    const uint64_t ram128mb = bankbase[0] + (128<<20);
-
-    xen_pfn_t p2m_size;
-    uint64_t bank0end;
-
-    assert(dom->rambase_pfn << XC_PAGE_SHIFT == bankbase[0]);
-
-    if ( modsize + kernsize > bankmax[0] )
-    {
-        DOMPRINTF("%s: Not enough memory for the kernel+dtb+initrd",
-                  __FUNCTION__);
-        return -1;
-    }
-
-    if ( ramsize == 0 )
-    {
-        DOMPRINTF("%s: ram size is 0", __FUNCTION__);
-        return -1;
-    }
-
-    if ( ramsize > GUEST_RAM_MAX )
-    {
-        DOMPRINTF("%s: ram size is too large for guest address space: "
-                  "%"PRIx64" > %llx",
-                  __FUNCTION__, ramsize, GUEST_RAM_MAX);
-        return -1;
-    }
-
-    rc = set_mode(dom->xch, dom->guest_domid, dom->guest_type);
-    if ( rc )
-        return rc;
-
-    for ( i = 0; ramsize && i < GUEST_RAM_BANKS; i++ )
-    {
-        uint64_t banksize = ramsize > bankmax[i] ? bankmax[i] : ramsize;
-
-        ramsize -= banksize;
-
-        p2m_size = ( bankbase[i] + banksize - bankbase[0] ) >> XC_PAGE_SHIFT;
-
-        dom->rambank_size[i] = banksize >> XC_PAGE_SHIFT;
-    }
-
-    assert(dom->rambank_size[0] != 0);
-    assert(ramsize == 0); /* Too much RAM is rejected above */
-
-    dom->p2m_size = p2m_size;
-
-    /* setup initial p2m and allocate guest memory */
-    for ( i = 0; i < GUEST_RAM_BANKS && dom->rambank_size[i]; i++ )
-    {
-        if ((rc = populate_guest_memory(dom,
-                                        bankbase[i] >> XC_PAGE_SHIFT,
-                                        dom->rambank_size[i])))
-            return rc;
-    }
-
-    /*
-     * We try to place dtb+initrd at 128MB or if we have less RAM
-     * as high as possible. If there is no space then fallback to
-     * just before the kernel.
-     *
-     * If changing this then consider
-     * xen/arch/arm/kernel.c:place_modules as well.
-     */
-    bank0end = bankbase[0] + ((uint64_t)dom->rambank_size[0] << XC_PAGE_SHIFT);
-
-    if ( bank0end >= ram128mb + modsize && kernend < ram128mb )
-        modbase = ram128mb;
-    else if ( bank0end - modsize > kernend )
-        modbase = bank0end - modsize;
-    else if (kernbase - bankbase[0] > modsize )
-        modbase = kernbase - modsize;
-    else
-        return -1;
-
-    DOMPRINTF("%s: placing boot modules at 0x%" PRIx64, __FUNCTION__, modbase);
-
-    /*
-     * Must map DTB *after* initrd, to satisfy order of calls to
-     * xc_dom_alloc_segment in xc_dom_build_image, which must map
-     * things at monotonolically increasing addresses.
-     */
-    if ( ramdisk_size )
-    {
-        dom->modules[0].seg.vstart = modbase;
-        dom->modules[0].seg.vend = modbase + ramdisk_size;
-
-        DOMPRINTF("%s: ramdisk: 0x%" PRIx64 " -> 0x%" PRIx64 "",
-                  __FUNCTION__,
-                  dom->modules[0].seg.vstart, dom->modules[0].seg.vend);
-
-        modbase += ramdisk_size;
-    }
-
-    if ( dtb_size )
-    {
-        dom->devicetree_seg.vstart = modbase;
-        dom->devicetree_seg.vend = modbase + dtb_size;
-
-        DOMPRINTF("%s: devicetree: 0x%" PRIx64 " -> 0x%" PRIx64 "",
-                  __FUNCTION__,
-                  dom->devicetree_seg.vstart, dom->devicetree_seg.vend);
-
-        modbase += dtb_size;
-    }
-
-    return 0;
-}
-
-bool xc_dom_translated(const struct xc_dom_image *dom)
-{
-    return true;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static int bootearly(struct xc_dom_image *dom)
-{
-    DOMPRINTF("%s: doing nothing", __FUNCTION__);
-    return 0;
-}
-
-static int bootlate(struct xc_dom_image *dom)
-{
-    /* XXX
-     *   map shared info
-     *   map grant tables
-     *   setup shared info
-     */
-    return 0;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static struct xc_dom_arch xc_dom_32 = {
-    .guest_type = "xen-3.0-armv7l",
-    .native_protocol = XEN_IO_PROTO_ABI_ARM,
-    .page_shift = PAGE_SHIFT_ARM,
-    .sizeof_pfn = 8,
-    .alloc_magic_pages = alloc_magic_pages,
-    .start_info = start_info_arm,
-    .shared_info = shared_info_arm,
-    .vcpu = vcpu_arm32,
-    .meminit = meminit,
-    .bootearly = bootearly,
-    .bootlate = bootlate,
-};
-
-static struct xc_dom_arch xc_dom_64 = {
-    .guest_type = "xen-3.0-aarch64",
-    .native_protocol = XEN_IO_PROTO_ABI_ARM,
-    .page_shift = PAGE_SHIFT_ARM,
-    .sizeof_pfn = 8,
-    .alloc_magic_pages = alloc_magic_pages,
-    .start_info = start_info_arm,
-    .shared_info = shared_info_arm,
-    .vcpu = vcpu_arm64,
-    .meminit = meminit,
-    .bootearly = bootearly,
-    .bootlate = bootlate,
-};
-
-static void __init register_arch_hooks(void)
-{
-    xc_dom_register_arch_hooks(&xc_dom_32);
-    xc_dom_register_arch_hooks(&xc_dom_64);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_dom_armzimageloader.c b/tools/libxc/xg_dom_armzimageloader.c
deleted file mode 100644 (file)
index 4246c8e..0000000
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Xen domain builder -- ARM zImage bits
- *
- * Parse and load ARM zImage kernel images.
- *
- * Copyright (C) 2012, Citrix Systems.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "xg_private.h"
-#include "xenctrl_dom.h"
-
-#include <arpa/inet.h> /* XXX ntohl is not the right function... */
-
-struct minimal_dtb_header {
-    uint32_t magic;
-    uint32_t total_size;
-    /* There are other fields but we don't use them yet. */
-};
-
-#define DTB_MAGIC 0xd00dfeed
-
-/* ------------------------------------------------------------ */
-/* 32-bit zImage Support                                        */
-/* ------------------------------------------------------------ */
-
-#define ZIMAGE32_MAGIC_OFFSET 0x24
-#define ZIMAGE32_START_OFFSET 0x28
-#define ZIMAGE32_END_OFFSET   0x2c
-
-#define ZIMAGE32_MAGIC 0x016f2818
-
-static int xc_dom_probe_zimage32_kernel(struct xc_dom_image *dom)
-{
-    uint32_t *zimage;
-
-    if ( dom->kernel_blob == NULL )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: no kernel image loaded", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    if ( dom->kernel_size < 0x30 /*sizeof(struct setup_header)*/ )
-    {
-        xc_dom_printf(dom->xch, "%s: kernel image too small", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    zimage = (uint32_t *)dom->kernel_blob;
-    if ( zimage[ZIMAGE32_MAGIC_OFFSET/4] != ZIMAGE32_MAGIC )
-    {
-        xc_dom_printf(dom->xch, "%s: kernel is not an arm32 zImage", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    return 0;
-}
-
-static int xc_dom_parse_zimage32_kernel(struct xc_dom_image *dom)
-{
-    uint32_t *zimage;
-    uint32_t start, entry_addr;
-    uint64_t v_start, v_end;
-    uint64_t rambase = dom->rambase_pfn << XC_PAGE_SHIFT;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    zimage = (uint32_t *)dom->kernel_blob;
-
-    /* Do not load kernel at the very first RAM address */
-    v_start = rambase + 0x8000;
-
-    if ( dom->kernel_size > UINT64_MAX - v_start )
-    {
-        DOMPRINTF("%s: kernel is too large\n", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    v_end = v_start + dom->kernel_size;
-
-    /*
-     * If start is invalid then the guest will start at some invalid
-     * address and crash, but this happens in guest context so doesn't
-     * concern us here.
-     */
-    start = zimage[ZIMAGE32_START_OFFSET/4];
-
-    if (start == 0)
-        entry_addr = v_start;
-    else
-        entry_addr = start;
-
-    /* find kernel segment */
-    dom->kernel_seg.vstart = v_start;
-    dom->kernel_seg.vend   = v_end;
-
-    dom->parms.virt_entry = entry_addr;
-    dom->parms.virt_base = rambase;
-
-    dom->guest_type = "xen-3.0-armv7l";
-    DOMPRINTF("%s: %s: 0x%" PRIx64 " -> 0x%" PRIx64 "",
-              __FUNCTION__, dom->guest_type,
-              dom->kernel_seg.vstart, dom->kernel_seg.vend);
-    return 0;
-}
-
-/* ------------------------------------------------------------ */
-/* 64-bit zImage Support                                        */
-/* ------------------------------------------------------------ */
-
-#define ZIMAGE64_MAGIC_V0 0x14000008
-#define ZIMAGE64_MAGIC_V1 0x644d5241 /* "ARM\x64" */
-
-/* linux/Documentation/arm64/booting.txt */
-struct zimage64_hdr {
-    uint32_t magic0;
-    uint32_t res0;
-    uint64_t text_offset;  /* Image load offset */
-    uint64_t res1;
-    uint64_t res2;
-    /* zImage V1 only from here */
-    uint64_t res3;
-    uint64_t res4;
-    uint64_t res5;
-    uint32_t magic1;
-    uint32_t res6;
-};
-static int xc_dom_probe_zimage64_kernel(struct xc_dom_image *dom)
-{
-    struct zimage64_hdr *zimage;
-
-    if ( dom->kernel_blob == NULL )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: no kernel image loaded", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    if ( dom->kernel_size < sizeof(*zimage) )
-    {
-        xc_dom_printf(dom->xch, "%s: kernel image too small", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    zimage =  dom->kernel_blob;
-    if ( zimage->magic0 != ZIMAGE64_MAGIC_V0 &&
-         zimage->magic1 != ZIMAGE64_MAGIC_V1 )
-    {
-        xc_dom_printf(dom->xch, "%s: kernel is not an arm64 Image", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    return 0;
-}
-
-static int xc_dom_parse_zimage64_kernel(struct xc_dom_image *dom)
-{
-    struct zimage64_hdr *zimage;
-    uint64_t v_start, v_end;
-    uint64_t rambase = dom->rambase_pfn << XC_PAGE_SHIFT;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    zimage = dom->kernel_blob;
-
-    if ( zimage->text_offset > UINT64_MAX - rambase )
-    {
-        DOMPRINTF("%s: kernel text offset is too large\n", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    v_start = rambase + zimage->text_offset;
-
-    if ( dom->kernel_size > UINT64_MAX - v_start )
-    {
-        DOMPRINTF("%s: kernel is too large\n", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    v_end = v_start + dom->kernel_size;
-
-    dom->kernel_seg.vstart = v_start;
-    dom->kernel_seg.vend   = v_end;
-
-    /* Call the kernel at offset 0 */
-    dom->parms.virt_entry = v_start;
-    dom->parms.virt_base = rambase;
-
-    dom->guest_type = "xen-3.0-aarch64";
-    DOMPRINTF("%s: %s: 0x%" PRIx64 " -> 0x%" PRIx64 "",
-              __FUNCTION__, dom->guest_type,
-              dom->kernel_seg.vstart, dom->kernel_seg.vend);
-
-    return 0;
-}
-
-/* ------------------------------------------------------------ */
-/* Common zImage Support                                        */
-/* ------------------------------------------------------------ */
-
-static int xc_dom_load_zimage_kernel(struct xc_dom_image *dom)
-{
-    void *dst;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    dst = xc_dom_seg_to_ptr(dom, &dom->kernel_seg);
-    if ( dst == NULL )
-    {
-        DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->kernel_seg) => NULL",
-                  __func__);
-        return -1;
-    }
-
-    DOMPRINTF("%s: kernel seg %#"PRIx64"-%#"PRIx64,
-              __func__, dom->kernel_seg.vstart, dom->kernel_seg.vend);
-    DOMPRINTF("%s: copy %zd bytes from blob %p to dst %p",
-              __func__, dom->kernel_size, dom->kernel_blob, dst);
-
-    memcpy(dst, dom->kernel_blob, dom->kernel_size);
-
-    return 0;
-}
-
-static struct xc_dom_loader zimage32_loader = {
-    .name = "Linux zImage (ARM32)",
-    .probe = xc_dom_probe_zimage32_kernel,
-    .parser = xc_dom_parse_zimage32_kernel,
-    .loader = xc_dom_load_zimage_kernel,
-};
-
-static struct xc_dom_loader zimage64_loader = {
-    .name = "Linux zImage (ARM64)",
-    .probe = xc_dom_probe_zimage64_kernel,
-    .parser = xc_dom_parse_zimage64_kernel,
-    .loader = xc_dom_load_zimage_kernel,
-};
-
-static void __init register_loader(void)
-{
-    xc_dom_register_loader(&zimage32_loader);
-    xc_dom_register_loader(&zimage64_loader);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_dom_binloader.c b/tools/libxc/xg_dom_binloader.c
deleted file mode 100644 (file)
index 870a921..0000000
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- *
- * Some of the field descriptions were copied from "The Multiboot
- * Specification", Copyright 1995, 96 Bryan Ford <baford@cs.utah.edu>,
- * Erich Stefan Boleyn <erich@uruk.org> Copyright 1999, 2000, 2001, 2002
- * Free Software Foundation, Inc.
- */
-
-/******************************************************************************
- *
- * Loads simple binary images. It's like a .COM file in MS-DOS. No headers are
- * present. The only requirement is that it must have a xen_bin_image table
- * somewhere in the first 8192 bytes, starting on a 32-bit aligned address.
- * Those familiar with the multiboot specification should recognize this, it's
- * (almost) the same as the multiboot header.
- * The layout of the xen_bin_image table is:
- *
- * Offset Type Name          Note
- * 0      uint32_t  magic         required
- * 4      uint32_t  flags         required
- * 8      uint32_t  checksum      required
- * 12     uint32_t  header_addr   required
- * 16     uint32_t  load_addr     required
- * 20     uint32_t  load_end_addr required
- * 24     uint32_t  bss_end_addr  required
- * 28     uint32_t  entry_addr    required
- *
- * - magic
- *   Magic number identifying the table. For images to be loaded by Xen 3, the
- *   magic value is 0x336ec578 ("xEn3" with the 0x80 bit of the "E" set).
- * - flags
- *   bit 0: indicates whether the image needs to be loaded on a page boundary
- *   bit 1: reserved, must be 0 (the multiboot spec uses this bit to indicate
- *          that memory info should be passed to the image)
- *   bit 2: reserved, must be 0 (the multiboot spec uses this bit to indicate
- *          that the bootloader should pass video mode info to the image)
- *   bit 16: reserved, must be 1 (the multiboot spec uses this bit to indicate
- *           that the values in the fields header_addr - entry_addr are
- *           valid)
- *   All other bits should be set to 0.
- * - checksum
- *   When added to "magic" and "flags", the resulting value should be 0.
- * - header_addr
- *   Contains the virtual address corresponding to the beginning of the
- *   table - the memory location at which the magic value is supposed to be
- *   loaded. This field serves to synchronize the mapping between OS image
- *   offsets and virtual memory addresses.
- * - load_addr
- *   Contains the virtual address of the beginning of the text segment. The
- *   offset in the OS image file at which to start loading is defined by the
- *   offset at which the table was found, minus (header addr - load addr).
- *   load addr must be less than or equal to header addr.
- * - load_end_addr
- *   Contains the virtual address of the end of the data segment.
- *   (load_end_addr - load_addr) specifies how much data to load. This implies
- *   that the text and data segments must be consecutive in the OS image. If
- *   this field is zero, the domain builder assumes that the text and data
- *   segments occupy the whole OS image file.
- * - bss_end_addr
- *   Contains the virtual address of the end of the bss segment. The domain
- *   builder initializes this area to zero, and reserves the memory it occupies
- *   to avoid placing boot modules and other data relevant to the loaded image
- *   in that area. If this field is zero, the domain builder assumes that no bss
- *   segment is present.
- * - entry_addr
- *   The virtual address at which to start execution of the loaded image.
- *
- */
-
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "xg_private.h"
-#include "xenctrl_dom.h"
-
-#define round_pgup(_p)    (((_p)+(PAGE_SIZE_X86-1))&PAGE_MASK_X86)
-#define round_pgdown(_p)  ((_p)&PAGE_MASK_X86)
-
-struct xen_bin_image_table
-{
-    uint32_t magic;
-    uint32_t flags;
-    uint32_t checksum;
-    uint32_t header_addr;
-    uint32_t load_addr;
-    uint32_t load_end_addr;
-    uint32_t bss_end_addr;
-    uint32_t entry_addr;
-};
-
-#define XEN_MULTIBOOT_MAGIC3 0x336ec578
-
-#define XEN_MULTIBOOT_FLAG_ALIGN4K     0x00000001
-#define XEN_MULTIBOOT_FLAG_NEEDMEMINFO 0x00000002
-#define XEN_MULTIBOOT_FLAG_NEEDVIDINFO 0x00000004
-#define XEN_MULTIBOOT_FLAG_ADDRSVALID  0x00010000
-#define XEN_MULTIBOOT_FLAG_PAE_SHIFT   14
-#define XEN_MULTIBOOT_FLAG_PAE_MASK    (3 << XEN_MULTIBOOT_FLAG_PAE_SHIFT)
-
-/* Flags we test for */
-#define FLAGS_MASK     ((~ 0) & (~ XEN_MULTIBOOT_FLAG_ALIGN4K) & \
-    (~ XEN_MULTIBOOT_FLAG_PAE_MASK))
-#define FLAGS_REQUIRED XEN_MULTIBOOT_FLAG_ADDRSVALID
-
-/* --------------------------------------------------------------------- */
-
-static struct xen_bin_image_table *find_table(struct xc_dom_image *dom)
-{
-    struct xen_bin_image_table *table;
-    uint32_t *probe_ptr;
-    uint32_t *probe_end;
-
-    if ( dom->kernel_size < sizeof(*table) )
-        return NULL;
-    probe_ptr = dom->kernel_blob;
-    if ( dom->kernel_size > (8192 + sizeof(*table)) )
-        probe_end = dom->kernel_blob + 8192;
-    else
-        probe_end = dom->kernel_blob + dom->kernel_size - sizeof(*table);
-
-    for ( table = NULL; probe_ptr < probe_end; probe_ptr++ )
-    {
-        if ( *probe_ptr == XEN_MULTIBOOT_MAGIC3 )
-        {
-            table = (struct xen_bin_image_table *) probe_ptr;
-            /* Checksum correct? */
-            if ( (table->magic + table->flags + table->checksum) == 0 )
-                return table;
-        }
-    }
-    return NULL;
-}
-
-static int xc_dom_probe_bin_kernel(struct xc_dom_image *dom)
-{
-    return find_table(dom) ? 0 : -EINVAL;
-}
-
-static int xc_dom_parse_bin_kernel(struct xc_dom_image *dom)
-{
-    struct xen_bin_image_table *image_info;
-    char *image = dom->kernel_blob;
-    size_t image_size = dom->kernel_size;
-    uint32_t start_addr;
-    uint32_t load_end_addr;
-    uint32_t bss_end_addr;
-    uint32_t pae_flags;
-
-    image_info = find_table(dom);
-    if ( !image_info )
-        return -EINVAL;
-
-    DOMPRINTF("%s: multiboot header fields", __FUNCTION__);
-    DOMPRINTF("  flags:         0x%" PRIx32 "", image_info->flags);
-    DOMPRINTF("  header_addr:   0x%" PRIx32 "", image_info->header_addr);
-    DOMPRINTF("  load_addr:     0x%" PRIx32 "", image_info->load_addr);
-    DOMPRINTF("  load_end_addr: 0x%" PRIx32 "", image_info->load_end_addr);
-    DOMPRINTF("  bss_end_addr:  0x%" PRIx32 "", image_info->bss_end_addr);
-    DOMPRINTF("  entry_addr:    0x%" PRIx32 "", image_info->entry_addr);
-
-    /* Check the flags */
-    if ( (image_info->flags & FLAGS_MASK) != FLAGS_REQUIRED )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                     "%s: xen_bin_image_table flags required "
-                     "0x%08" PRIx32 " found 0x%08" PRIx32 "",
-                     __FUNCTION__, FLAGS_REQUIRED, image_info->flags & FLAGS_MASK);
-        return -EINVAL;
-    }
-
-    /* Sanity check on the addresses */
-    if ( (image_info->header_addr < image_info->load_addr) ||
-         ((char *) image_info - image) <
-         (image_info->header_addr - image_info->load_addr) )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Invalid header_addr.",
-                     __FUNCTION__);
-        return -EINVAL;
-    }
-
-    start_addr = image_info->header_addr - ((char *)image_info - image);
-    load_end_addr = image_info->load_end_addr ?: start_addr + image_size;
-    bss_end_addr = image_info->bss_end_addr ?: load_end_addr;
-
-    DOMPRINTF("%s: calculated addresses", __FUNCTION__);
-    DOMPRINTF("  start_addr:    0x%" PRIx32 "", start_addr);
-    DOMPRINTF("  load_end_addr: 0x%" PRIx32 "", load_end_addr);
-    DOMPRINTF("  bss_end_addr:  0x%" PRIx32 "", bss_end_addr);
-
-    if ( (start_addr + image_size) < load_end_addr )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Invalid load_end_addr.",
-                     __FUNCTION__);
-        return -EINVAL;
-    }
-
-    if ( bss_end_addr < load_end_addr)
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Invalid bss_end_addr.",
-                     __FUNCTION__);
-        return -EINVAL;
-    }
-
-    dom->kernel_seg.vstart = image_info->load_addr;
-    dom->kernel_seg.vend   = bss_end_addr;
-    dom->parms.virt_base   = start_addr;
-    dom->parms.virt_entry  = image_info->entry_addr;
-
-    pae_flags = image_info->flags & XEN_MULTIBOOT_FLAG_PAE_MASK;
-    switch (pae_flags >> XEN_MULTIBOOT_FLAG_PAE_SHIFT) {
-    case 0:
-        dom->guest_type = "xen-3.0-x86_32";
-        break;
-    case 1:
-        dom->guest_type = "xen-3.0-x86_32p";
-        break;
-    case 2:
-        dom->guest_type = "xen-3.0-x86_64";
-        break;
-    case 3:
-        /* Kernel detects PAE at runtime.  So try to figure whenever
-         * xen supports PAE and advertise a PAE-capable kernel in case
-         * it does. */
-        dom->guest_type = "xen-3.0-x86_32";
-        if ( strstr(dom->xen_caps, "xen-3.0-x86_32p") )
-        {
-            DOMPRINTF("%s: PAE fixup", __FUNCTION__);
-            dom->guest_type = "xen-3.0-x86_32p";
-            dom->parms.pae  = XEN_PAE_EXTCR3;
-        }
-        break;
-    }
-    return 0;
-}
-
-static int xc_dom_load_bin_kernel(struct xc_dom_image *dom)
-{
-    struct xen_bin_image_table *image_info;
-    char *image = dom->kernel_blob;
-    char *dest;
-    size_t image_size = dom->kernel_size;
-    size_t dest_size;
-    uint32_t start_addr;
-    uint32_t load_end_addr;
-    uint32_t bss_end_addr;
-    uint32_t skip, text_size, bss_size;
-
-    image_info = find_table(dom);
-    if ( !image_info )
-        return -EINVAL;
-
-    start_addr = image_info->header_addr - ((char *)image_info - image);
-    load_end_addr = image_info->load_end_addr ?: start_addr + image_size;
-    bss_end_addr = image_info->bss_end_addr ?: load_end_addr;
-
-    /* It's possible that we need to skip the first part of the image */
-    skip = image_info->load_addr - start_addr;
-    text_size = load_end_addr - image_info->load_addr;
-    bss_size = bss_end_addr - load_end_addr;
-
-    DOMPRINTF("%s: calculated sizes", __FUNCTION__);
-    DOMPRINTF("  skip:      0x%" PRIx32 "", skip);
-    DOMPRINTF("  text_size: 0x%" PRIx32 "", text_size);
-    DOMPRINTF("  bss_size:  0x%" PRIx32 "", bss_size);
-
-    dest = xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart, &dest_size);
-    if ( dest == NULL )
-    {
-        DOMPRINTF("%s: xc_dom_vaddr_to_ptr(dom, dom->kernel_seg.vstart)"
-                  " => NULL", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    if ( dest_size < text_size ||
-         dest_size - text_size < bss_size )
-    {
-        DOMPRINTF("%s: mapped region is too small for image", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    if ( image_size < skip ||
-         image_size - skip < text_size )
-    {
-        DOMPRINTF("%s: image is too small for declared text size",
-                  __FUNCTION__);
-        return -EINVAL;
-    }
-
-    memcpy(dest, image + skip, text_size);
-    memset(dest + text_size, 0, bss_size);
-
-    return 0;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static struct xc_dom_loader bin_loader = {
-    .name = "multiboot-binary",
-    .probe = xc_dom_probe_bin_kernel,
-    .parser = xc_dom_parse_bin_kernel,
-    .loader = xc_dom_load_bin_kernel,
-};
-
-static void __init register_loader(void)
-{
-    xc_dom_register_loader(&bin_loader);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_dom_boot.c b/tools/libxc/xg_dom_boot.c
deleted file mode 100644 (file)
index 1e31e92..0000000
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- * Xen domain builder -- xen booter.
- *
- * This is the code which actually boots a fresh
- * prepared domain image as xen guest domain.
- *
- * ==>  this is the only domain builder code piece
- *          where xen hypercalls are allowed        <==
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- *
- * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <inttypes.h>
-#include <zlib.h>
-
-#include "xg_private.h"
-#include "xenctrl_dom.h"
-#include "xc_core.h"
-#include <xen/hvm/params.h>
-#include <xen/grant_table.h>
-
-/* ------------------------------------------------------------------------ */
-
-static int setup_hypercall_page(struct xc_dom_image *dom)
-{
-    DECLARE_DOMCTL;
-    xen_pfn_t pfn;
-    int rc;
-
-    if ( dom->parms.virt_hypercall == -1 )
-        return 0;
-    pfn = (dom->parms.virt_hypercall - dom->parms.virt_base)
-        >> XC_DOM_PAGE_SHIFT(dom);
-
-    DOMPRINTF("%s: vaddr=0x%" PRIx64 " pfn=0x%" PRIpfn "", __FUNCTION__,
-                  dom->parms.virt_hypercall, pfn);
-    domctl.cmd = XEN_DOMCTL_hypercall_init;
-    domctl.domain = dom->guest_domid;
-    domctl.u.hypercall_init.gmfn = xc_dom_p2m(dom, pfn);
-    rc = do_domctl(dom->xch, &domctl);
-    if ( rc != 0 )
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: HYPERCALL_INIT failed: %d - %s)",
-                     __FUNCTION__, errno, strerror(errno));
-    return rc;
-}
-
-
-/* ------------------------------------------------------------------------ */
-
-int xc_dom_compat_check(struct xc_dom_image *dom)
-{
-    xen_capabilities_info_t xen_caps;
-    char *item, *ptr;
-    int match, found = 0;
-
-    strncpy(xen_caps, dom->xen_caps, XEN_CAPABILITIES_INFO_LEN - 1);
-    xen_caps[XEN_CAPABILITIES_INFO_LEN - 1] = '\0';
-
-    for ( item = strtok_r(xen_caps, " ", &ptr);
-          item != NULL ; item = strtok_r(NULL, " ", &ptr) )
-    {
-        match = !strcmp(dom->guest_type, item);
-        DOMPRINTF("%s: supported guest type: %s%s", __FUNCTION__,
-                  item, match ? " <= matches" : "");
-        if ( match )
-            found++;
-    }
-    if ( !found )
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                     "%s: guest type %s not supported by xen kernel, sorry",
-                     __FUNCTION__, dom->guest_type);
-
-    return found;
-}
-
-int xc_dom_boot_xen_init(struct xc_dom_image *dom, xc_interface *xch, uint32_t domid)
-{
-    dom->xch = xch;
-    dom->guest_domid = domid;
-
-    dom->xen_version = xc_version(xch, XENVER_version, NULL);
-    if ( xc_version(xch, XENVER_capabilities, &dom->xen_caps) < 0 )
-    {
-        xc_dom_panic(xch, XC_INTERNAL_ERROR, "can't get xen capabilities");
-        return -1;
-    }
-    DOMPRINTF("%s: ver %d.%d, caps %s", __FUNCTION__,
-              dom->xen_version >> 16, dom->xen_version & 0xff,
-              dom->xen_caps);
-    return 0;
-}
-
-int xc_dom_boot_mem_init(struct xc_dom_image *dom)
-{
-    long rc;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    rc = dom->arch_hooks->meminit(dom);
-    if ( rc != 0 )
-    {
-        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
-                     "%s: can't allocate low memory for domain",
-                     __FUNCTION__);
-        return rc;
-    }
-
-    return 0;
-}
-
-void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
-                           xen_pfn_t count)
-{
-    int page_shift = XC_DOM_PAGE_SHIFT(dom);
-    privcmd_mmap_entry_t *entries;
-    void *ptr;
-    int i;
-    int err;
-
-    entries = xc_dom_malloc(dom, count * sizeof(privcmd_mmap_entry_t));
-    if ( entries == NULL )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
-                     " [malloc]", __FUNCTION__, pfn, count);
-        return NULL;
-    }
-
-    for ( i = 0; i < count; i++ )
-        entries[i].mfn = xc_dom_p2m(dom, pfn + i);
-
-    ptr = xc_map_foreign_ranges(dom->xch, dom->guest_domid,
-                count << page_shift, PROT_READ | PROT_WRITE, 1 << page_shift,
-                entries, count);
-    if ( ptr == NULL )
-    {
-        err = errno;
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: failed to mmap domU pages 0x%" PRIpfn "+0x%" PRIpfn
-                     " [mmap, errno=%i (%s)]", __FUNCTION__, pfn, count,
-                     err, strerror(err));
-        return NULL;
-    }
-
-    return ptr;
-}
-
-int xc_dom_boot_image(struct xc_dom_image *dom)
-{
-    xc_dominfo_t info;
-    int rc;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    /* misc stuff*/
-    if ( (rc = dom->arch_hooks->bootearly(dom)) != 0 )
-        return rc;
-
-    /* collect some info */
-    rc = xc_domain_getinfo(dom->xch, dom->guest_domid, 1, &info);
-    if ( rc < 0 )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: getdomaininfo failed (rc=%d)", __FUNCTION__, rc);
-        return rc;
-    }
-    if ( rc == 0 || info.domid != dom->guest_domid )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: Huh? No domains found (nr_domains=%d) "
-                     "or domid mismatch (%d != %d)", __FUNCTION__,
-                     rc, info.domid, dom->guest_domid);
-        return -1;
-    }
-    dom->shared_info_mfn = info.shared_info_frame;
-
-    /* sanity checks */
-    if ( !xc_dom_compat_check(dom) )
-        return -1;
-
-    /* initial mm setup */
-    if ( dom->arch_hooks->setup_pgtables &&
-         (rc = dom->arch_hooks->setup_pgtables(dom)) != 0 )
-        return rc;
-
-    /* start info page */
-    if ( dom->arch_hooks->start_info )
-        dom->arch_hooks->start_info(dom);
-
-    /* hypercall page */
-    if ( (rc = setup_hypercall_page(dom)) != 0 )
-        return rc;
-    xc_dom_log_memory_footprint(dom);
-
-    /* misc x86 stuff */
-    if ( (rc = dom->arch_hooks->bootlate(dom)) != 0 )
-        return rc;
-
-    /* let the vm run */
-    if ( (rc = dom->arch_hooks->vcpu(dom)) != 0 )
-        return rc;
-    xc_dom_unmap_all(dom);
-
-    return rc;
-}
-
-static xen_pfn_t xc_dom_gnttab_setup(xc_interface *xch, uint32_t domid)
-{
-    gnttab_setup_table_t setup;
-    DECLARE_HYPERCALL_BUFFER(xen_pfn_t, gmfnp);
-    int rc;
-    xen_pfn_t gmfn;
-
-    gmfnp = xc_hypercall_buffer_alloc(xch, gmfnp, sizeof(*gmfnp));
-    if (gmfnp == NULL)
-        return -1;
-
-    setup.dom = domid;
-    setup.nr_frames = 1;
-    set_xen_guest_handle(setup.frame_list, gmfnp);
-    setup.status = 0;
-
-    rc = xc_gnttab_op(xch, GNTTABOP_setup_table, &setup, sizeof(setup), 1);
-    gmfn = *gmfnp;
-    xc_hypercall_buffer_free(xch, gmfnp);
-
-    if ( rc != 0 || setup.status != GNTST_okay )
-    {
-        xc_dom_panic(xch, XC_INTERNAL_ERROR,
-                     "%s: failed to setup domU grant table "
-                     "[errno=%d, status=%" PRId16 "]\n",
-                     __FUNCTION__, rc != 0 ? errno : 0, setup.status);
-        return -1;
-    }
-
-    return gmfn;
-}
-
-static void xc_dom_set_gnttab_entry(xc_interface *xch,
-                                    grant_entry_v1_t *gnttab,
-                                    unsigned int idx,
-                                    uint32_t guest_domid,
-                                    uint32_t backend_domid,
-                                    xen_pfn_t guest_gfn)
-{
-    if ( guest_domid == backend_domid || guest_gfn == -1 )
-        return;
-
-    xc_dom_printf(xch, "%s: d%d gnt[%u] -> d%d 0x%"PRI_xen_pfn,
-                  __func__, guest_domid, idx, backend_domid, guest_gfn);
-
-    gnttab[idx].flags = GTF_permit_access;
-    gnttab[idx].domid = backend_domid;
-    gnttab[idx].frame = guest_gfn;
-}
-
-static int compat_gnttab_seed(xc_interface *xch, uint32_t domid,
-                              xen_pfn_t console_gfn,
-                              xen_pfn_t xenstore_gfn,
-                              uint32_t console_domid,
-                              uint32_t xenstore_domid)
-{
-
-    xen_pfn_t gnttab_gfn;
-    grant_entry_v1_t *gnttab;
-
-    gnttab_gfn = xc_dom_gnttab_setup(xch, domid);
-    if ( gnttab_gfn == -1 )
-        return -1;
-
-    gnttab = xc_map_foreign_range(xch,
-                                  domid,
-                                  PAGE_SIZE,
-                                  PROT_READ|PROT_WRITE,
-                                  gnttab_gfn);
-    if ( gnttab == NULL )
-    {
-        xc_dom_panic(xch, XC_INTERNAL_ERROR,
-                     "%s: failed to map d%d grant table "
-                     "[errno=%d]\n",
-                     __func__, domid, errno);
-        return -1;
-    }
-
-    xc_dom_set_gnttab_entry(xch, gnttab, GNTTAB_RESERVED_CONSOLE,
-                            domid, console_domid, console_gfn);
-    xc_dom_set_gnttab_entry(xch, gnttab, GNTTAB_RESERVED_XENSTORE,
-                            domid, xenstore_domid, xenstore_gfn);
-
-    if ( munmap(gnttab, PAGE_SIZE) == -1 )
-    {
-        xc_dom_panic(xch, XC_INTERNAL_ERROR,
-                     "%s: failed to unmap d%d grant table "
-                     "[errno=%d]\n",
-                     __func__, domid, errno);
-        return -1;
-    }
-
-    /* Guest shouldn't really touch its grant table until it has
-     * enabled its caches. But lets be nice. */
-    xc_domain_cacheflush(xch, domid, gnttab_gfn, 1);
-
-    return 0;
-}
-
-static int compat_gnttab_hvm_seed(xc_interface *xch, uint32_t domid,
-                                  xen_pfn_t console_gfn,
-                                  xen_pfn_t xenstore_gfn,
-                                  uint32_t console_domid,
-                                  uint32_t xenstore_domid)
-{
-    int rc;
-    xen_pfn_t scratch_gfn;
-    struct xen_add_to_physmap xatp = {
-        .domid = domid,
-        .space = XENMAPSPACE_grant_table,
-        .idx   = 0,
-    };
-    struct xen_remove_from_physmap xrfp = {
-        .domid = domid,
-    };
-
-    rc = xc_core_arch_get_scratch_gpfn(xch, domid, &scratch_gfn);
-    if ( rc < 0 )
-    {
-        xc_dom_panic(xch, XC_INTERNAL_ERROR,
-                     "%s: failed to get a scratch gfn from d%d"
-                     "[errno=%d]\n",
-                     __func__, domid, errno);
-        return -1;
-    }
-    xatp.gpfn = scratch_gfn;
-    xrfp.gpfn = scratch_gfn;
-
-    xc_dom_printf(xch, "%s: d%d: pfn=0x%"PRI_xen_pfn, __func__,
-                  domid, scratch_gfn);
-
-    rc = do_memory_op(xch, XENMEM_add_to_physmap, &xatp, sizeof(xatp));
-    if ( rc != 0 )
-    {
-        xc_dom_panic(xch, XC_INTERNAL_ERROR,
-                     "%s: failed to add gnttab to d%d physmap "
-                     "[errno=%d]\n",
-                     __func__, domid, errno);
-        return -1;
-    }
-
-    rc = compat_gnttab_seed(xch, domid,
-                            console_gfn, xenstore_gfn,
-                            console_domid, xenstore_domid);
-    if (rc != 0)
-    {
-        xc_dom_panic(xch, XC_INTERNAL_ERROR,
-                     "%s: failed to seed gnttab entries for d%d\n",
-                     __func__, domid);
-        (void) do_memory_op(xch, XENMEM_remove_from_physmap, &xrfp,
-                            sizeof(xrfp));
-        return -1;
-    }
-
-    rc = do_memory_op(xch, XENMEM_remove_from_physmap, &xrfp, sizeof(xrfp));
-    if (rc != 0)
-    {
-        xc_dom_panic(xch, XC_INTERNAL_ERROR,
-                     "%s: failed to remove gnttab from d%d physmap "
-                     "[errno=%d]\n",
-                     __func__, domid, errno);
-        return -1;
-    }
-
-    return 0;
-}
-
-int xc_dom_gnttab_seed(xc_interface *xch, uint32_t guest_domid,
-                       bool is_hvm, xen_pfn_t console_gfn,
-                       xen_pfn_t xenstore_gfn, uint32_t console_domid,
-                       uint32_t xenstore_domid)
-{
-    xenforeignmemory_handle* fmem = xch->fmem;
-    xenforeignmemory_resource_handle *fres;
-    void *addr = NULL;
-
-    fres = xenforeignmemory_map_resource(
-        fmem, guest_domid, XENMEM_resource_grant_table,
-        XENMEM_resource_grant_table_id_shared, 0, 1, &addr,
-        PROT_READ | PROT_WRITE, 0);
-    if ( !fres )
-    {
-        if ( errno == EOPNOTSUPP )
-            return is_hvm ?
-                compat_gnttab_hvm_seed(xch, guest_domid,
-                                       console_gfn, xenstore_gfn,
-                                       console_domid, xenstore_domid) :
-                compat_gnttab_seed(xch, guest_domid,
-                                   console_gfn, xenstore_gfn,
-                                   console_domid, xenstore_domid);
-
-        xc_dom_panic(xch, XC_INTERNAL_ERROR,
-                     "%s: failed to acquire d%d grant table [errno=%d]\n",
-                     __func__, guest_domid, errno);
-        return -1;
-    }
-
-    xc_dom_set_gnttab_entry(xch, addr, GNTTAB_RESERVED_CONSOLE,
-                            guest_domid, console_domid, console_gfn);
-    xc_dom_set_gnttab_entry(xch, addr, GNTTAB_RESERVED_XENSTORE,
-                            guest_domid, xenstore_domid, xenstore_gfn);
-
-    xenforeignmemory_unmap_resource(fmem, fres);
-
-    return 0;
-}
-
-int xc_dom_gnttab_init(struct xc_dom_image *dom)
-{
-    bool is_hvm = xc_dom_translated(dom);
-    xen_pfn_t console_gfn = xc_dom_p2m(dom, dom->console_pfn);
-    xen_pfn_t xenstore_gfn = xc_dom_p2m(dom, dom->xenstore_pfn);
-
-    return xc_dom_gnttab_seed(dom->xch, dom->guest_domid, is_hvm,
-                              console_gfn, xenstore_gfn,
-                              dom->console_domid, dom->xenstore_domid);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_dom_bzimageloader.c b/tools/libxc/xg_dom_bzimageloader.c
deleted file mode 100644 (file)
index f959a77..0000000
+++ /dev/null
@@ -1,812 +0,0 @@
-/*
- * Xen domain builder -- bzImage bits
- *
- * Parse and load bzImage kernel images.
- *
- * This relies on version 2.08 of the boot protocol, which contains an
- * ELF file embedded in the bzImage.  The loader extracts this ELF
- * image and passes it off to the standard ELF loader.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- *
- * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
- * written 2007 by Jeremy Fitzhardinge <jeremy@xensource.com>
- * written 2008 by Ian Campbell <ijc@hellion.org.uk>
- * written 2009 by Chris Lalancette <clalance@redhat.com>
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "xg_private.h"
-#include "xg_dom_decompress.h"
-
-#include <xen-tools/libs.h>
-
-#ifndef __MINIOS__
-
-#if defined(HAVE_BZLIB)
-
-#include <bzlib.h>
-
-static int xc_try_bzip2_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    bz_stream stream;
-    int ret;
-    char *out_buf;
-    char *tmp_buf;
-    int retval = -1;
-    unsigned int outsize;
-    uint64_t total;
-
-    stream.bzalloc = NULL;
-    stream.bzfree = NULL;
-    stream.opaque = NULL;
-
-    if ( dom->kernel_size == 0)
-    {
-        DOMPRINTF("BZIP2: Input is 0 size");
-        return -1;
-    }
-
-    ret = BZ2_bzDecompressInit(&stream, 0, 0);
-    if ( ret != BZ_OK )
-    {
-        DOMPRINTF("BZIP2: Error initting stream");
-        return -1;
-    }
-
-    /* sigh.  We don't know up-front how much memory we are going to need
-     * for the output buffer.  Allocate the output buffer to be equal
-     * the input buffer to start, and we'll realloc as needed.
-     */
-    outsize = dom->kernel_size;
-
-    /*
-     * stream.avail_in and outsize are unsigned int, while kernel_size
-     * is a size_t. Check we aren't overflowing.
-     */
-    if ( outsize != dom->kernel_size )
-    {
-        DOMPRINTF("BZIP2: Input too large");
-        goto bzip2_cleanup;
-    }
-
-    out_buf = malloc(outsize);
-    if ( out_buf == NULL )
-    {
-        DOMPRINTF("BZIP2: Failed to alloc memory");
-        goto bzip2_cleanup;
-    }
-
-    stream.next_in = dom->kernel_blob;
-    stream.avail_in = dom->kernel_size;
-
-    stream.next_out = out_buf;
-    stream.avail_out = dom->kernel_size;
-
-    for ( ; ; )
-    {
-        ret = BZ2_bzDecompress(&stream);
-        if ( ret == BZ_STREAM_END )
-        {
-            DOMPRINTF("BZIP2: Saw data stream end");
-            retval = 0;
-            break;
-        }
-        if ( ret != BZ_OK )
-        {
-            DOMPRINTF("BZIP2: error %d", ret);
-            free(out_buf);
-            goto bzip2_cleanup;
-        }
-
-        if ( stream.avail_out == 0 )
-        {
-            /* Protect against output buffer overflow */
-            if ( outsize > UINT_MAX / 2 )
-            {
-                DOMPRINTF("BZIP2: output buffer overflow");
-                free(out_buf);
-                goto bzip2_cleanup;
-            }
-
-            if ( xc_dom_kernel_check_size(dom, outsize * 2) )
-            {
-                DOMPRINTF("BZIP2: output too large");
-                free(out_buf);
-                goto bzip2_cleanup;
-            }
-
-            tmp_buf = realloc(out_buf, outsize * 2);
-            if ( tmp_buf == NULL )
-            {
-                DOMPRINTF("BZIP2: Failed to realloc memory");
-                free(out_buf);
-                goto bzip2_cleanup;
-            }
-            out_buf = tmp_buf;
-
-            stream.next_out = out_buf + outsize;
-            stream.avail_out = (outsize * 2) - outsize;
-            outsize *= 2;
-        }
-        else if ( stream.avail_in == 0 )
-        {
-            /*
-             * If there is output buffer available then this indicates
-             * that BZ2_bzDecompress would like more input data to be
-             * provided.  However our complete input buffer is in
-             * memory and provided upfront so if avail_in is zero this
-             * actually indicates a truncated input.
-             */
-            DOMPRINTF("BZIP2: not enough input");
-            free(out_buf);
-            goto bzip2_cleanup;
-        }
-    }
-
-    total = (((uint64_t)stream.total_out_hi32) << 32) | stream.total_out_lo32;
-
-    if ( xc_dom_register_external(dom, out_buf, total) )
-    {
-        DOMPRINTF("BZIP2: Error registering stream output");
-        free(out_buf);
-        goto bzip2_cleanup;
-    }
-
-    DOMPRINTF("%s: BZIP2 decompress OK, 0x%zx -> 0x%lx",
-              __FUNCTION__, *size, (long unsigned int) total);
-
-    *blob = out_buf;
-    *size = total;
-
- bzip2_cleanup:
-    BZ2_bzDecompressEnd(&stream);
-
-    return retval;
-}
-
-#else /* !defined(HAVE_BZLIB) */
-
-static int xc_try_bzip2_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                 "%s: BZIP2 decompress support unavailable",
-                 __FUNCTION__);
-    return -1;
-}
-
-#endif
-
-#if defined(HAVE_LZMA)
-
-#include <lzma.h>
-
-static int _xc_try_lzma_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size,
-    lzma_stream *stream, const char *what)
-{
-    lzma_ret ret;
-    lzma_action action = LZMA_RUN;
-    unsigned char *out_buf;
-    unsigned char *tmp_buf;
-    int retval = -1;
-    size_t outsize;
-    const char *msg;
-
-    if ( dom->kernel_size == 0)
-    {
-        DOMPRINTF("%s: Input is 0 size", what);
-        return -1;
-    }
-
-    /* sigh.  We don't know up-front how much memory we are going to need
-     * for the output buffer.  Allocate the output buffer to be equal
-     * the input buffer to start, and we'll realloc as needed.
-     */
-    outsize = dom->kernel_size;
-    out_buf = malloc(outsize);
-    if ( out_buf == NULL )
-    {
-        DOMPRINTF("%s: Failed to alloc memory", what);
-        goto lzma_cleanup;
-    }
-
-    stream->next_in = dom->kernel_blob;
-    stream->avail_in = dom->kernel_size;
-
-    stream->next_out = out_buf;
-    stream->avail_out = dom->kernel_size;
-
-    for ( ; ; )
-    {
-        ret = lzma_code(stream, action);
-        if ( ret == LZMA_STREAM_END )
-        {
-            DOMPRINTF("%s: Saw data stream end", what);
-            retval = 0;
-            break;
-        }
-        if ( ret != LZMA_OK )
-        {
-            switch ( ret )
-            {
-            case LZMA_MEM_ERROR:
-                msg = strerror(ENOMEM);
-                break;
-
-            case LZMA_MEMLIMIT_ERROR:
-                msg = "Memory usage limit reached";
-                break;
-
-            case LZMA_FORMAT_ERROR:
-                msg = "File format not recognized";
-                break;
-
-            case LZMA_OPTIONS_ERROR:
-                // FIXME: Better message?
-                msg = "Unsupported compression options";
-                break;
-
-            case LZMA_DATA_ERROR:
-                msg = "File is corrupt";
-                break;
-
-            case LZMA_BUF_ERROR:
-                msg = "Unexpected end of input";
-                break;
-
-            default:
-                msg = "Internal program error (bug)";
-                break;
-            }
-            DOMPRINTF("%s: %s decompression error: %s",
-                      __FUNCTION__, what, msg);
-            free(out_buf);
-            goto lzma_cleanup;
-        }
-
-        if ( stream->avail_out == 0 )
-        {
-            /* Protect against output buffer overflow */
-            if ( outsize > SIZE_MAX / 2 )
-            {
-                DOMPRINTF("%s: output buffer overflow", what);
-                free(out_buf);
-                goto lzma_cleanup;
-            }
-
-            if ( xc_dom_kernel_check_size(dom, outsize * 2) )
-            {
-                DOMPRINTF("%s: output too large", what);
-                free(out_buf);
-                goto lzma_cleanup;
-            }
-
-            tmp_buf = realloc(out_buf, outsize * 2);
-            if ( tmp_buf == NULL )
-            {
-                DOMPRINTF("%s: Failed to realloc memory", what);
-                free(out_buf);
-                goto lzma_cleanup;
-            }
-            out_buf = tmp_buf;
-
-            stream->next_out = out_buf + outsize;
-            stream->avail_out = (outsize * 2) - outsize;
-            outsize *= 2;
-        }
-    }
-
-    if ( xc_dom_register_external(dom, out_buf, stream->total_out) )
-    {
-        DOMPRINTF("%s: Error registering stream output", what);
-        free(out_buf);
-        goto lzma_cleanup;
-    }
-
-    DOMPRINTF("%s: %s decompress OK, 0x%zx -> 0x%zx",
-              __FUNCTION__, what, *size, (size_t)stream->total_out);
-
-    *blob = out_buf;
-    *size = stream->total_out;
-
- lzma_cleanup:
-    lzma_end(stream);
-
-    return retval;
-}
-
-/* 128 Mb is the minimum size (half-way) documented to work for all inputs. */
-#define LZMA_BLOCK_SIZE (128*1024*1024)
-
-static int xc_try_xz_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    lzma_stream stream = LZMA_STREAM_INIT;
-
-    if ( lzma_stream_decoder(&stream, LZMA_BLOCK_SIZE, 0) != LZMA_OK )
-    {
-        DOMPRINTF("XZ: Failed to init decoder");
-        return -1;
-    }
-
-    return _xc_try_lzma_decode(dom, blob, size, &stream, "XZ");
-}
-
-static int xc_try_lzma_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    lzma_stream stream = LZMA_STREAM_INIT;
-
-    if ( lzma_alone_decoder(&stream, LZMA_BLOCK_SIZE) != LZMA_OK )
-    {
-        DOMPRINTF("LZMA: Failed to init decoder");
-        return -1;
-    }
-
-    return _xc_try_lzma_decode(dom, blob, size, &stream, "LZMA");
-}
-
-#else /* !defined(HAVE_LZMA) */
-
-static int xc_try_xz_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                 "%s: XZ decompress support unavailable",
-                 __FUNCTION__);
-    return -1;
-}
-
-static int xc_try_lzma_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                 "%s: LZMA decompress support unavailable",
-                 __FUNCTION__);
-    return -1;
-}
-
-#endif
-
-#if defined(HAVE_LZO1X)
-
-#include <lzo/lzo1x.h>
-
-#define LZOP_HEADER_HAS_FILTER 0x00000800
-#define LZOP_MAX_BLOCK_SIZE (64*1024*1024)
-
-static inline uint_fast16_t lzo_read_16(const unsigned char *buf)
-{
-    return buf[1] | (buf[0] << 8);
-}
-
-static inline uint_fast32_t lzo_read_32(const unsigned char *buf)
-{
-    return lzo_read_16(buf + 2) | ((uint32_t)lzo_read_16(buf) << 16);
-}
-
-static int xc_try_lzo1x_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    int ret;
-    const unsigned char *cur = dom->kernel_blob;
-    unsigned char *out_buf = NULL;
-    size_t left = dom->kernel_size;
-    const char *msg;
-    unsigned version;
-    static const unsigned char magic[] = {
-        0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a
-    };
-
-    /*
-     * lzo_uint should match size_t. Check that this is the case to be
-     * sure we won't overflow various lzo_uint fields.
-     */
-    BUILD_BUG_ON(sizeof(lzo_uint) != sizeof(size_t));
-
-    ret = lzo_init();
-    if ( ret != LZO_E_OK )
-    {
-        DOMPRINTF("LZO1x: Failed to init library (%d)\n", ret);
-        return -1;
-    }
-
-    if ( left < 16 || memcmp(cur, magic, 9) )
-    {
-        DOMPRINTF("LZO1x: Unrecognized magic\n");
-        return -1;
-    }
-
-    /* get version (2bytes), skip library version (2),
-     * 'need to be extracted' version (2) and method (1) */
-    version = lzo_read_16(cur + 9);
-    cur += 16;
-    left -= 16;
-
-    if ( version >= 0x0940 )
-    {
-        /* skip level */
-        ++cur;
-        if ( left )
-            --left;
-    }
-
-    if ( left >= 4 && (lzo_read_32(cur) & LZOP_HEADER_HAS_FILTER) )
-        ret = 8; /* flags + filter info */
-    else
-        ret = 4; /* flags */
-
-    /* skip mode and mtime_low */
-    ret += 8;
-    if ( version >= 0x0940 )
-        ret += 4; /* skip mtime_high */
-
-    /* don't care about the file name, and skip checksum */
-    if ( left > ret )
-        ret += 1 + cur[ret] + 4;
-
-    if ( left < ret )
-    {
-        DOMPRINTF("LZO1x: Incomplete header\n");
-        return -1;
-    }
-    cur += ret;
-    left -= ret;
-
-    for ( *size = 0; ; )
-    {
-        lzo_uint src_len, dst_len, out_len;
-        unsigned char *tmp_buf;
-
-        msg = "Short input";
-        if ( left < 4 )
-            break;
-
-        dst_len = lzo_read_32(cur);
-        if ( !dst_len )
-        {
-            msg = "Error registering stream output";
-            if ( xc_dom_register_external(dom, out_buf, *size) )
-                break;
-
-            return 0;
-        }
-
-        if ( dst_len > LZOP_MAX_BLOCK_SIZE )
-        {
-            msg = "Block size too large";
-            break;
-        }
-
-        if ( left < 12 )
-            break;
-
-        src_len = lzo_read_32(cur + 4);
-        cur += 12; /* also skip block checksum info */
-        left -= 12;
-
-        msg = "Bad source length";
-        if ( src_len <= 0 || src_len > dst_len || src_len > left )
-            break;
-
-        msg = "Output buffer overflow";
-        if ( *size > SIZE_MAX - dst_len )
-            break;
-
-        msg = "Decompressed image too large";
-        if ( xc_dom_kernel_check_size(dom, *size + dst_len) )
-            break;
-
-        msg = "Failed to (re)alloc memory";
-        tmp_buf = realloc(out_buf, *size + dst_len);
-        if ( tmp_buf == NULL )
-            break;
-
-        out_buf = tmp_buf;
-        out_len = dst_len;
-
-        ret = lzo1x_decompress_safe(cur, src_len,
-                                    out_buf + *size, &out_len, NULL);
-        switch ( ret )
-        {
-        case LZO_E_OK:
-            msg = "Input underrun";
-            if ( out_len != dst_len )
-                break;
-
-            *blob = out_buf;
-            *size += out_len;
-            cur += src_len;
-            left -= src_len;
-            continue;
-
-        case LZO_E_INPUT_NOT_CONSUMED:
-            msg = "Unconsumed input";
-            break;
-
-        case LZO_E_OUTPUT_OVERRUN:
-            msg = "Output overrun";
-            break;
-
-        case LZO_E_INPUT_OVERRUN:
-            msg = "Input overrun";
-            break;
-
-        case LZO_E_LOOKBEHIND_OVERRUN:
-            msg = "Look-behind overrun";
-            break;
-
-        case LZO_E_EOF_NOT_FOUND:
-            msg = "No EOF marker";
-            break;
-
-        case LZO_E_ERROR:
-            msg = "General error";
-            break;
-
-        default:
-            msg = "Internal program error (bug)";
-            break;
-        }
-
-        break;
-    }
-
-    free(out_buf);
-    DOMPRINTF("LZO1x decompression error: %s\n", msg);
-
-    return -1;
-}
-
-#else /* !defined(HAVE_LZO1X) */
-
-static int xc_try_lzo1x_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                 "%s: LZO1x decompress support unavailable\n",
-                 __FUNCTION__);
-    return -1;
-}
-
-#endif
-
-#else /* __MINIOS__ */
-
-int xc_try_bzip2_decode(struct xc_dom_image *dom, void **blob, size_t *size);
-int xc_try_lzma_decode(struct xc_dom_image *dom, void **blob, size_t *size);
-int xc_try_lzo1x_decode(struct xc_dom_image *dom, void **blob, size_t *size);
-int xc_try_xz_decode(struct xc_dom_image *dom, void **blob, size_t *size);
-
-#endif /* !__MINIOS__ */
-
-struct setup_header {
-    uint8_t  _pad0[0x1f1];  /* skip uninteresting stuff */
-    uint8_t  setup_sects;
-    uint16_t root_flags;
-    uint32_t syssize;
-    uint16_t ram_size;
-    uint16_t vid_mode;
-    uint16_t root_dev;
-    uint16_t boot_flag;
-    uint16_t jump;
-    uint32_t header;
-#define HDR_MAGIC  "HdrS"
-#define HDR_MAGIC_SZ 4
-    uint16_t version;
-#define VERSION(h,l) (((h)<<8) | (l))
-    uint32_t realmode_swtch;
-    uint16_t start_sys;
-    uint16_t kernel_version;
-    uint8_t  type_of_loader;
-    uint8_t  loadflags;
-    uint16_t setup_move_size;
-    uint32_t code32_start;
-    uint32_t ramdisk_image;
-    uint32_t ramdisk_size;
-    uint32_t bootsect_kludge;
-    uint16_t heap_end_ptr;
-    uint16_t _pad1;
-    uint32_t cmd_line_ptr;
-    uint32_t initrd_addr_max;
-    uint32_t kernel_alignment;
-    uint8_t  relocatable_kernel;
-    uint8_t  _pad2[3];
-    uint32_t cmdline_size;
-    uint32_t hardware_subarch;
-    uint64_t hardware_subarch_data;
-    uint32_t payload_offset;
-    uint32_t payload_length;
-} __attribute__((packed));
-
-extern struct xc_dom_loader elf_loader;
-
-static int check_magic(struct xc_dom_image *dom, const void *magic, size_t len)
-{
-    if (len > dom->kernel_size)
-        return 0;
-
-    return (memcmp(dom->kernel_blob, magic, len) == 0);
-}
-
-static int xc_dom_probe_bzimage_kernel(struct xc_dom_image *dom)
-{
-    struct setup_header *hdr;
-    uint64_t payload_offset, payload_length;
-    int ret;
-
-    if ( dom->kernel_blob == NULL )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: no kernel image loaded", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    if ( dom->kernel_size < sizeof(struct setup_header) )
-    {
-        xc_dom_printf(dom->xch, "%s: kernel image too small", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    hdr = dom->kernel_blob;
-
-    if ( memcmp(&hdr->header, HDR_MAGIC, HDR_MAGIC_SZ) != 0 )
-    {
-        xc_dom_printf(dom->xch, "%s: kernel is not a bzImage", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    if ( hdr->version < VERSION(2,8) )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: boot protocol"
-                     " too old (%04x)", __FUNCTION__, hdr->version);
-        return -EINVAL;
-    }
-
-
-    /* upcast to 64 bits to avoid overflow */
-    /* setup_sects is u8 and so cannot overflow */
-    payload_offset = (hdr->setup_sects + 1) * 512;
-    payload_offset += hdr->payload_offset;
-    payload_length = hdr->payload_length;
-
-    if ( payload_offset >= dom->kernel_size )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: payload offset overflow",
-                     __FUNCTION__);
-        return -EINVAL;
-    }
-    if ( (payload_offset + payload_length) > dom->kernel_size )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: payload length overflow",
-                     __FUNCTION__);
-        return -EINVAL;
-    }
-
-    dom->kernel_blob = dom->kernel_blob + payload_offset;
-    dom->kernel_size = payload_length;
-
-    if ( check_magic(dom, "\037\213", 2) )
-    {
-        ret = xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
-        if ( ret == -1 )
-        {
-            xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: unable to"
-                         " gzip decompress kernel", __FUNCTION__);
-            return -EINVAL;
-        }
-    }
-    else if ( check_magic(dom, "\102\132\150", 3) )
-    {
-        ret = xc_try_bzip2_decode(dom, &dom->kernel_blob, &dom->kernel_size);
-        if ( ret < 0 )
-        {
-            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                         "%s unable to BZIP2 decompress kernel",
-                         __FUNCTION__);
-            return -EINVAL;
-        }
-    }
-    else if ( check_magic(dom, "\3757zXZ", 6) )
-    {
-        ret = xc_try_xz_decode(dom, &dom->kernel_blob, &dom->kernel_size);
-        if ( ret < 0 )
-        {
-            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                         "%s unable to XZ decompress kernel",
-                         __FUNCTION__);
-            return -EINVAL;
-        }
-    }
-    else if ( check_magic(dom, "\135\000", 2) )
-    {
-        ret = xc_try_lzma_decode(dom, &dom->kernel_blob, &dom->kernel_size);
-        if ( ret < 0 )
-        {
-            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                         "%s unable to LZMA decompress kernel",
-                         __FUNCTION__);
-            return -EINVAL;
-        }
-    }
-    else if ( check_magic(dom, "\x89LZO", 5) )
-    {
-        ret = xc_try_lzo1x_decode(dom, &dom->kernel_blob, &dom->kernel_size);
-        if ( ret < 0 )
-        {
-            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                         "%s unable to LZO decompress kernel\n",
-                         __FUNCTION__);
-            return -EINVAL;
-        }
-    }
-    else if ( check_magic(dom, "\x02\x21", 2) )
-    {
-        ret = xc_try_lz4_decode(dom, &dom->kernel_blob, &dom->kernel_size);
-        if ( ret < 0 )
-        {
-            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                         "%s unable to LZ4 decompress kernel\n",
-                         __FUNCTION__);
-            return -EINVAL;
-        }
-    }
-    else
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                     "%s: unknown compression format", __FUNCTION__);
-        return -EINVAL;
-    }
-
-    return elf_loader.probe(dom);
-}
-
-static int xc_dom_parse_bzimage_kernel(struct xc_dom_image *dom)
-{
-    return elf_loader.parser(dom);
-}
-
-static int xc_dom_load_bzimage_kernel(struct xc_dom_image *dom)
-{
-    return elf_loader.loader(dom);
-}
-
-static struct xc_dom_loader bzimage_loader = {
-    .name = "Linux bzImage",
-    .probe = xc_dom_probe_bzimage_kernel,
-    .parser = xc_dom_parse_bzimage_kernel,
-    .loader = xc_dom_load_bzimage_kernel,
-};
-
-static void __init register_loader(void)
-{
-    xc_dom_register_loader(&bzimage_loader);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_dom_compat_linux.c b/tools/libxc/xg_dom_compat_linux.c
deleted file mode 100644 (file)
index b645f0b..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Xen domain builder -- compatibility code.
- *
- * Replacements for xc_linux_build & friends,
- * as example code and to make the new builder
- * usable as drop-in replacement.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- *
- * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <inttypes.h>
-#include <zlib.h>
-
-#include "xenctrl.h"
-#include "xg_private.h"
-#include "xenctrl_dom.h"
-
-/* ------------------------------------------------------------------------ */
-
-int xc_linux_build(xc_interface *xch, uint32_t domid,
-                   unsigned int mem_mb,
-                   const char *image_name,
-                   const char *initrd_name,
-                   const char *cmdline,
-                   const char *features,
-                   unsigned long flags,
-                   unsigned int store_evtchn,
-                   unsigned long *store_mfn,
-                   unsigned int console_evtchn,
-                   unsigned long *console_mfn)
-{
-    struct xc_dom_image *dom;
-    int rc;
-
-    xc_dom_loginit(xch);
-    dom = xc_dom_allocate(xch, cmdline, features);
-    if (dom == NULL)
-        return -1;
-    if ( (rc = xc_dom_kernel_file(dom, image_name)) != 0 )
-        goto out;
-    if ( initrd_name && strlen(initrd_name) &&
-         ((rc = xc_dom_module_file(dom, initrd_name, NULL)) != 0) )
-        goto out;
-
-    dom->flags |= flags;
-    dom->console_evtchn = console_evtchn;
-    dom->xenstore_evtchn = store_evtchn;
-
-    if ( (rc = xc_dom_boot_xen_init(dom, xch, domid)) != 0 )
-        goto out;
-    if ( (rc = xc_dom_parse_image(dom)) != 0 )
-        goto out;
-    if ( (rc = xc_dom_mem_init(dom, mem_mb)) != 0 )
-        goto out;
-    if ( (rc = xc_dom_boot_mem_init(dom)) != 0 )
-        goto out;
-    if ( (rc = xc_dom_build_image(dom)) != 0 )
-        goto out;
-    if ( (rc = xc_dom_boot_image(dom)) != 0 )
-        goto out;
-    if ( (rc = xc_dom_gnttab_init(dom)) != 0)
-        goto out;
-
-    *console_mfn = xc_dom_p2m(dom, dom->console_pfn);
-    *store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
-
- out:
-    xc_dom_release(dom);
-    return rc;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_dom_core.c b/tools/libxc/xg_dom_core.c
deleted file mode 100644 (file)
index 1c91cce..0000000
+++ /dev/null
@@ -1,1272 +0,0 @@
-/*
- * Xen domain builder -- core bits.
- *
- * The core code goes here:
- *   - allocate and release domain structs.
- *   - memory management functions.
- *   - misc helper functions.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- *
- * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <inttypes.h>
-#include <zlib.h>
-#include <assert.h>
-
-#include "xg_private.h"
-#include "xenctrl_dom.h"
-#include "_paths.h"
-
-/* ------------------------------------------------------------------------ */
-/* debugging                                                                */
-
-
-
-static const char *default_logfile = XEN_LOG_DIR "/domain-builder-ng.log";
-
-int xc_dom_loginit(xc_interface *xch) {
-    if (xch->dombuild_logger) return 0;
-
-    if (!xch->dombuild_logger_file) {
-        xch->dombuild_logger_file = fopen(default_logfile, "a");
-        if (!xch->dombuild_logger_file) {
-            PERROR("Could not open logfile `%s'", default_logfile);
-            return -1;
-        }
-    }
-    
-    xch->dombuild_logger = xch->dombuild_logger_tofree =
-        (xentoollog_logger*)
-        xtl_createlogger_stdiostream(xch->dombuild_logger_file, XTL_DETAIL,
-             XTL_STDIOSTREAM_SHOW_DATE|XTL_STDIOSTREAM_SHOW_PID);
-    if (!xch->dombuild_logger)
-        return -1;
-
-    xc_dom_printf(xch, "### ----- xc domain builder logfile opened -----");
-
-    return 0;
-}
-
-void xc_dom_printf(xc_interface *xch, const char *fmt, ...)
-{
-    va_list args;
-    if (!xch->dombuild_logger) return;
-    va_start(args, fmt);
-    xtl_logv(xch->dombuild_logger, XTL_DETAIL, -1, "domainbuilder", fmt, args);
-    va_end(args);
-}
-
-void xc_dom_panic_func(xc_interface *xch,
-                       const char *file, int line, xc_error_code err,
-                       const char *fmt, ...)
-{
-    va_list args;
-    char msg[XC_MAX_ERROR_MSG_LEN];
-
-    va_start(args, fmt);
-    vsnprintf(msg, sizeof(msg), fmt, args);
-    va_end(args);
-    msg[sizeof(msg)-1] = 0;
-    
-    xc_report(xch,
-              xch->dombuild_logger ? xch->dombuild_logger : xch->error_handler,
-              XTL_ERROR, err, "panic: %s:%d: %s",
-              file, line, msg);
-}
-
-static void print_mem(struct xc_dom_image *dom, const char *name, size_t mem)
-{
-    if ( mem > (32 * 1024 * 1024) )
-        DOMPRINTF("%-24s : %zd MB", name, mem / (1024 * 1024));
-    else if ( mem > (32 * 1024) )
-        DOMPRINTF("%-24s : %zd kB", name, mem / 1024);
-    else
-        DOMPRINTF("%-24s : %zd bytes", name, mem);
-}
-
-void xc_dom_log_memory_footprint(struct xc_dom_image *dom)
-{
-    DOMPRINTF("domain builder memory footprint");
-    DOMPRINTF("   allocated");
-    print_mem(dom, "      malloc", dom->alloc_malloc);
-    print_mem(dom, "      anon mmap", dom->alloc_mem_map);
-    DOMPRINTF("   mapped");
-    print_mem(dom, "      file mmap", dom->alloc_file_map);
-    print_mem(dom, "      domU mmap", dom->alloc_domU_map);
-}
-
-/* ------------------------------------------------------------------------ */
-/* simple memory pool                                                       */
-
-void *xc_dom_malloc(struct xc_dom_image *dom, size_t size)
-{
-    struct xc_dom_mem *block;
-
-    if ( size > SIZE_MAX - sizeof(*block) )
-    {
-        DOMPRINTF("%s: unreasonable allocation size", __FUNCTION__);
-        return NULL;
-    }
-    block = malloc(sizeof(*block) + size);
-    if ( block == NULL )
-    {
-        DOMPRINTF("%s: allocation failed", __FUNCTION__);
-        return NULL;
-    }
-    memset(block, 0, sizeof(*block) + size);
-    block->type = XC_DOM_MEM_TYPE_MALLOC_INTERNAL;
-    block->next = dom->memblocks;
-    dom->memblocks = block;
-    dom->alloc_malloc += sizeof(*block) + size;
-    if ( size > (100 * 1024) )
-        print_mem(dom, __FUNCTION__, size);
-    return block->memory;
-}
-
-void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size)
-{
-    struct xc_dom_mem *block;
-
-    block = malloc(sizeof(*block));
-    if ( block == NULL )
-    {
-        DOMPRINTF("%s: allocation failed", __FUNCTION__);
-        return NULL;
-    }
-    memset(block, 0, sizeof(*block));
-    block->len = size;
-    block->ptr = mmap(NULL, block->len,
-                      PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
-                      -1, 0);
-    if ( block->ptr == MAP_FAILED )
-    {
-        DOMPRINTF("%s: mmap failed", __FUNCTION__);
-        free(block);
-        return NULL;
-    }
-    block->type = XC_DOM_MEM_TYPE_MMAP;
-    block->next = dom->memblocks;
-    dom->memblocks = block;
-    dom->alloc_malloc += sizeof(*block);
-    dom->alloc_mem_map += block->len;
-    if ( size > (100 * 1024) )
-        print_mem(dom, __FUNCTION__, size);
-    return block->ptr;
-}
-
-int xc_dom_register_external(struct xc_dom_image *dom, void *ptr, size_t size)
-{
-    struct xc_dom_mem *block;
-
-    block = malloc(sizeof(*block));
-    if ( block == NULL )
-    {
-        DOMPRINTF("%s: allocation failed", __FUNCTION__);
-        return -1;
-    }
-    memset(block, 0, sizeof(*block));
-    block->ptr = ptr;
-    block->len = size;
-    block->type = XC_DOM_MEM_TYPE_MALLOC_EXTERNAL;
-    block->next = dom->memblocks;
-    dom->memblocks = block;
-    dom->alloc_malloc += sizeof(*block);
-    dom->alloc_mem_map += block->len;
-    return 0;
-}
-
-void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
-                            const char *filename, size_t * size,
-                            const size_t max_size)
-{
-    struct xc_dom_mem *block = NULL;
-    int fd = -1;
-    off_t offset;
-
-    fd = open(filename, O_RDONLY);
-    if ( fd == -1 ) {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "failed to open file '%s': %s",
-                     filename, strerror(errno));
-        goto err;
-    }
-
-    if ( (lseek(fd, 0, SEEK_SET) == -1) ||
-         ((offset = lseek(fd, 0, SEEK_END)) == -1) ) {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "failed to seek on file '%s': %s",
-                     filename, strerror(errno));
-        goto err;
-    }
-
-    *size = offset;
-
-    if ( max_size && *size > max_size )
-    {
-        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
-                     "tried to map file which is too large");
-        goto err;
-    }
-
-    if ( !*size )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "'%s': zero length file", filename);
-        goto err;
-    }
-
-    block = malloc(sizeof(*block));
-    if ( block == NULL ) {
-        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
-                     "failed to allocate block (%zu bytes)",
-                     sizeof(*block));
-        goto err;
-    }
-
-    memset(block, 0, sizeof(*block));
-    block->len = *size;
-    block->ptr = mmap(NULL, block->len, PROT_READ,
-                           MAP_SHARED, fd, 0);
-    if ( block->ptr == MAP_FAILED ) {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "failed to mmap file '%s': %s",
-                     filename, strerror(errno));
-        goto err;
-    }
-
-    block->type = XC_DOM_MEM_TYPE_MMAP;
-    block->next = dom->memblocks;
-    dom->memblocks = block;
-    dom->alloc_malloc += sizeof(*block);
-    dom->alloc_file_map += block->len;
-    close(fd);
-    if ( *size > (100 * 1024) )
-        print_mem(dom, __FUNCTION__, *size);
-    return block->ptr;
-
- err:
-    if ( fd != -1 )
-        close(fd);
-    free(block);
-    DOMPRINTF("%s: failed (on file `%s')", __FUNCTION__, filename);
-    return NULL;
-}
-
-static void xc_dom_free_all(struct xc_dom_image *dom)
-{
-    struct xc_dom_mem *block;
-
-    while ( (block = dom->memblocks) != NULL )
-    {
-        dom->memblocks = block->next;
-        switch ( block->type )
-        {
-        case XC_DOM_MEM_TYPE_MALLOC_INTERNAL:
-            break;
-        case XC_DOM_MEM_TYPE_MALLOC_EXTERNAL:
-            free(block->ptr);
-            break;
-        case XC_DOM_MEM_TYPE_MMAP:
-            munmap(block->ptr, block->len);
-            break;
-        }
-        free(block);
-    }
-}
-
-char *xc_dom_strdup(struct xc_dom_image *dom, const char *str)
-{
-    size_t len = strlen(str) + 1;
-    char *nstr = xc_dom_malloc(dom, len);
-
-    if ( nstr == NULL )
-        return NULL;
-    memcpy(nstr, str, len);
-    return nstr;
-}
-
-/* ------------------------------------------------------------------------ */
-/* decompression buffer sizing                                              */
-int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz)
-{
-    /* No limit */
-    if ( !dom->max_kernel_size )
-        return 0;
-
-    if ( sz > dom->max_kernel_size )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                     "kernel image too large");
-        return 1;
-    }
-
-    return 0;
-}
-
-/* ------------------------------------------------------------------------ */
-/* read files, copy memory blocks, with transparent gunzip                  */
-
-size_t xc_dom_check_gzip(xc_interface *xch, void *blob, size_t ziplen)
-{
-    unsigned char *gzlen;
-    size_t unziplen;
-
-    if ( ziplen < 6 )
-        /* Too small.  We need (i.e. the subsequent code relies on)
-         * 2 bytes for the magic number plus 4 bytes length. */
-        return 0;
-
-    if ( strncmp(blob, "\037\213", 2) )
-        /* not gzipped */
-        return 0;
-
-    gzlen = blob + ziplen - 4;
-    unziplen = (size_t)gzlen[3] << 24 | gzlen[2] << 16 | gzlen[1] << 8 | gzlen[0];
-    if ( unziplen > XC_DOM_DECOMPRESS_MAX )
-    {
-        xc_dom_printf
-            (xch,
-             "%s: size (zip %zd, unzip %zd) looks insane, skip gunzip",
-             __FUNCTION__, ziplen, unziplen);
-        return 0;
-    }
-
-    return unziplen + 16;
-}
-
-int xc_dom_do_gunzip(xc_interface *xch,
-                     void *src, size_t srclen, void *dst, size_t dstlen)
-{
-    z_stream zStream;
-    int rc;
-
-    memset(&zStream, 0, sizeof(zStream));
-    zStream.next_in = src;
-    zStream.avail_in = srclen;
-    zStream.next_out = dst;
-    zStream.avail_out = dstlen;
-    rc = inflateInit2(&zStream, (MAX_WBITS + 32)); /* +32 means "handle gzip" */
-    if ( rc != Z_OK )
-    {
-        xc_dom_panic(xch, XC_INTERNAL_ERROR,
-                     "%s: inflateInit2 failed (rc=%d)", __FUNCTION__, rc);
-        return -1;
-    }
-    rc = inflate(&zStream, Z_FINISH);
-    inflateEnd(&zStream);
-    if ( rc != Z_STREAM_END )
-    {
-        xc_dom_panic(xch, XC_INTERNAL_ERROR,
-                     "%s: inflate failed (rc=%d)", __FUNCTION__, rc);
-        return -1;
-    }
-
-    xc_dom_printf(xch, "%s: unzip ok, 0x%zx -> 0x%zx",
-                  __FUNCTION__, srclen, dstlen);
-    return 0;
-}
-
-int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size)
-{
-    void *unzip;
-    size_t unziplen;
-
-    unziplen = xc_dom_check_gzip(dom->xch, *blob, *size);
-    if ( unziplen == 0 )
-        return 0;
-
-    if ( xc_dom_kernel_check_size(dom, unziplen) )
-        return 0;
-
-    unzip = xc_dom_malloc(dom, unziplen);
-    if ( unzip == NULL )
-        return -1;
-
-    if ( xc_dom_do_gunzip(dom->xch, *blob, *size, unzip, unziplen) == -1 )
-        return -1;
-
-    *blob = unzip;
-    *size = unziplen;
-    return 0;
-}
-
-/* ------------------------------------------------------------------------ */
-/* domain memory                                                            */
-
-void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t pfn,
-                        xen_pfn_t count)
-{
-    xen_pfn_t count_out_dummy;
-    return xc_dom_pfn_to_ptr_retcount(dom, pfn, count, &count_out_dummy);
-}
-
-void *xc_dom_pfn_to_ptr_retcount(struct xc_dom_image *dom, xen_pfn_t pfn,
-                                 xen_pfn_t count, xen_pfn_t *count_out)
-{
-    struct xc_dom_phys *phys;
-    xen_pfn_t offset;
-    unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom);
-    char *mode = "unset";
-
-    *count_out = 0;
-
-    offset = pfn - dom->rambase_pfn;
-    if ( offset > dom->total_pages || /* multiple checks to avoid overflows */
-         count > dom->total_pages ||
-         offset > dom->total_pages - count )
-    {
-        DOMPRINTF("%s: pfn %"PRI_xen_pfn" out of range (0x%" PRIpfn " > 0x%" PRIpfn ")",
-                  __FUNCTION__, pfn, offset, dom->total_pages);
-        return NULL;
-    }
-
-    /* already allocated? */
-    for ( phys = dom->phys_pages; phys != NULL; phys = phys->next )
-    {
-        if ( pfn >= (phys->first + phys->count) )
-            continue;
-        if ( count )
-        {
-            /* size given: must be completely within the already allocated block */
-            if ( (pfn + count) <= phys->first )
-                continue;
-            if ( (pfn < phys->first) ||
-                 ((pfn + count) > (phys->first + phys->count)) )
-            {
-                DOMPRINTF("%s: request overlaps allocated block"
-                          " (req 0x%" PRIpfn "+0x%" PRIpfn ","
-                          " blk 0x%" PRIpfn "+0x%" PRIpfn ")",
-                          __FUNCTION__, pfn, count, phys->first,
-                          phys->count);
-                return NULL;
-            }
-            *count_out = count;
-        }
-        else
-        {
-            /* no size given: block must be allocated already,
-               just hand out a pointer to it */
-            if ( pfn < phys->first )
-                continue;
-            if ( pfn >= phys->first + phys->count )
-                continue;
-            *count_out = phys->count - (pfn - phys->first);
-        }
-        return phys->ptr + ((pfn - phys->first) << page_shift);
-    }
-
-    /* allocating is allowed with size specified only */
-    if ( count == 0 )
-    {
-        DOMPRINTF("%s: no block found, no size given,"
-                  " can't malloc (pfn 0x%" PRIpfn ")",
-                  __FUNCTION__, pfn);
-        return NULL;
-    }
-
-    /* not found, no overlap => allocate */
-    phys = xc_dom_malloc(dom, sizeof(*phys));
-    if ( phys == NULL )
-        return NULL;
-    memset(phys, 0, sizeof(*phys));
-    phys->first = pfn;
-    phys->count = count;
-
-    if ( dom->guest_domid )
-    {
-        mode = "domU mapping";
-        phys->ptr = xc_dom_boot_domU_map(dom, phys->first, phys->count);
-        if ( phys->ptr == NULL )
-            return NULL;
-        dom->alloc_domU_map += phys->count << page_shift;
-    }
-    else
-    {
-        int err;
-
-        mode = "anonymous memory";
-        phys->ptr = mmap(NULL, phys->count << page_shift,
-                         PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
-                         -1, 0);
-        if ( phys->ptr == MAP_FAILED )
-        {
-            err = errno;
-            xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
-                         "%s: oom: can't allocate 0x%" PRIpfn " pages"
-                         " [mmap, errno=%i (%s)]",
-                         __FUNCTION__, count, err, strerror(err));
-            return NULL;
-        }
-        dom->alloc_mem_map += phys->count << page_shift;
-    }
-
-#if 1
-    DOMPRINTF("%s: %s: pfn 0x%" PRIpfn "+0x%" PRIpfn " at %p",
-              __FUNCTION__, mode, phys->first, phys->count, phys->ptr);
-#endif
-    phys->next = dom->phys_pages;
-    dom->phys_pages = phys;
-    return phys->ptr;
-}
-
-static int xc_dom_chk_alloc_pages(struct xc_dom_image *dom, char *name,
-                                  xen_pfn_t pages)
-{
-    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
-
-    if ( pages > dom->total_pages || /* multiple test avoids overflow probs */
-         dom->pfn_alloc_end - dom->rambase_pfn > dom->total_pages ||
-         pages > dom->total_pages - dom->pfn_alloc_end + dom->rambase_pfn )
-    {
-        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
-                     "%s: segment %s too large (0x%"PRIpfn" > "
-                     "0x%"PRIpfn" - 0x%"PRIpfn" pages)", __FUNCTION__, name,
-                     pages, dom->total_pages,
-                     dom->pfn_alloc_end - dom->rambase_pfn);
-        return -1;
-    }
-
-    dom->pfn_alloc_end += pages;
-    dom->virt_alloc_end += pages * page_size;
-
-    if ( dom->allocate )
-        dom->allocate(dom);
-
-    return 0;
-}
-
-static int xc_dom_alloc_pad(struct xc_dom_image *dom, xen_vaddr_t boundary)
-{
-    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
-    xen_pfn_t pages;
-
-    if ( boundary & (page_size - 1) )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: segment boundary isn't page aligned (0x%" PRIx64 ")",
-                     __FUNCTION__, boundary);
-        return -1;
-    }
-    if ( boundary < dom->virt_alloc_end )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: segment boundary too low (0x%" PRIx64 " < 0x%" PRIx64
-                     ")", __FUNCTION__, boundary, dom->virt_alloc_end);
-        return -1;
-    }
-    pages = (boundary - dom->virt_alloc_end) / page_size;
-
-    return xc_dom_chk_alloc_pages(dom, "padding", pages);
-}
-
-int xc_dom_alloc_segment(struct xc_dom_image *dom,
-                         struct xc_dom_seg *seg, char *name,
-                         xen_vaddr_t start, xen_vaddr_t size)
-{
-    unsigned int page_size = XC_DOM_PAGE_SIZE(dom);
-    xen_pfn_t pages;
-    void *ptr;
-
-    if ( start && xc_dom_alloc_pad(dom, start) )
-        return -1;
-
-    pages = (size + page_size - 1) / page_size;
-    start = dom->virt_alloc_end;
-
-    seg->pfn = dom->pfn_alloc_end;
-    seg->pages = pages;
-
-    if ( xc_dom_chk_alloc_pages(dom, name, pages) )
-        return -1;
-
-    /* map and clear pages */
-    ptr = xc_dom_seg_to_ptr(dom, seg);
-    if ( ptr == NULL )
-        return -1;
-    memset(ptr, 0, pages * page_size);
-
-    seg->vstart = start;
-    seg->vend = dom->virt_alloc_end;
-
-    DOMPRINTF("%-20s:   %-12s : 0x%" PRIx64 " -> 0x%" PRIx64
-              "  (pfn 0x%" PRIpfn " + 0x%" PRIpfn " pages)",
-              __FUNCTION__, name, seg->vstart, seg->vend, seg->pfn, pages);
-
-    return 0;
-}
-
-xen_pfn_t xc_dom_alloc_page(struct xc_dom_image *dom, char *name)
-{
-    xen_vaddr_t start;
-    xen_pfn_t pfn;
-
-    start = dom->virt_alloc_end;
-    pfn = dom->pfn_alloc_end - dom->rambase_pfn;
-
-    if ( xc_dom_chk_alloc_pages(dom, name, 1) )
-        return INVALID_PFN;
-
-    DOMPRINTF("%-20s:   %-12s : 0x%" PRIx64 " (pfn 0x%" PRIpfn ")",
-              __FUNCTION__, name, start, pfn);
-    return pfn;
-}
-
-void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn)
-{
-    unsigned int page_shift = XC_DOM_PAGE_SHIFT(dom);
-    struct xc_dom_phys *phys, *prev = NULL;
-
-    for ( phys = dom->phys_pages; phys != NULL; phys = phys->next )
-    {
-        if ( (pfn >= phys->first) && (pfn < (phys->first + phys->count)) )
-            break;
-        prev = phys;
-    }
-    if ( !phys )
-    {
-        DOMPRINTF("%s: Huh? no mapping with pfn 0x%" PRIpfn "",
-                  __FUNCTION__, pfn);
-        return;
-    }
-
-    munmap(phys->ptr, phys->count << page_shift);
-    if ( prev )
-        prev->next = phys->next;
-    else
-        dom->phys_pages = phys->next;
-
-    xc_domain_cacheflush(dom->xch, dom->guest_domid, phys->first, phys->count);
-}
-
-void xc_dom_unmap_all(struct xc_dom_image *dom)
-{
-    while ( dom->phys_pages )
-        xc_dom_unmap_one(dom, dom->phys_pages->first);
-}
-
-/* ------------------------------------------------------------------------ */
-/* pluggable kernel loaders                                                 */
-
-static struct xc_dom_loader *first_loader = NULL;
-static struct xc_dom_arch *first_hook = NULL;
-
-void xc_dom_register_loader(struct xc_dom_loader *loader)
-{
-    loader->next = first_loader;
-    first_loader = loader;
-}
-
-static struct xc_dom_loader *xc_dom_find_loader(struct xc_dom_image *dom)
-{
-    struct xc_dom_loader *loader = first_loader;
-
-    while ( loader != NULL )
-    {
-        DOMPRINTF("%s: trying %s loader ... ", __FUNCTION__, loader->name);
-        if ( loader->probe(dom) == 0 )
-        {
-            DOMPRINTF("loader probe OK");
-            return loader;
-        }
-        DOMPRINTF("loader probe failed");
-        loader = loader->next;
-    }
-    xc_dom_panic(dom->xch,
-                 XC_INVALID_KERNEL, "%s: no loader found", __FUNCTION__);
-    return NULL;
-}
-
-void xc_dom_register_arch_hooks(struct xc_dom_arch *hooks)
-{
-    hooks->next = first_hook;
-    first_hook = hooks;
-}
-
-int xc_dom_set_arch_hooks(struct xc_dom_image *dom)
-{
-    struct xc_dom_arch *hooks = first_hook;
-
-    while (  hooks != NULL )
-    {
-        if ( !strcmp(hooks->guest_type, dom->guest_type) )
-        {
-            if ( hooks->arch_private_size )
-            {
-                dom->arch_private = malloc(hooks->arch_private_size);
-                if ( dom->arch_private == NULL )
-                    return -1;
-                memset(dom->arch_private, 0, hooks->arch_private_size);
-                dom->alloc_malloc += hooks->arch_private_size;
-            }
-            dom->arch_hooks = hooks;
-            return 0;
-        }
-        hooks = hooks->next;
-    }
-    xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                 "%s: not found (type %s)", __FUNCTION__, dom->guest_type);
-    return -1;
-}
-
-/* ------------------------------------------------------------------------ */
-/* public interface                                                         */
-
-void xc_dom_release(struct xc_dom_image *dom)
-{
-    DOMPRINTF_CALLED(dom->xch);
-    if ( dom->phys_pages )
-        xc_dom_unmap_all(dom);
-    xc_dom_free_all(dom);
-    free(dom->arch_private);
-    free(dom);
-}
-
-struct xc_dom_image *xc_dom_allocate(xc_interface *xch,
-                                     const char *cmdline, const char *features)
-{
-    struct xc_dom_image *dom;
-
-    xc_dom_printf(xch, "%s: cmdline=\"%s\", features=\"%s\"",
-                  __FUNCTION__, cmdline ? cmdline : "",
-                  features ? features : "");
-    dom = malloc(sizeof(*dom));
-    if ( !dom )
-        goto err;
-
-    memset(dom, 0, sizeof(*dom));
-    dom->xch = xch;
-
-    dom->max_kernel_size = XC_DOM_DECOMPRESS_MAX;
-    dom->max_module_size = XC_DOM_DECOMPRESS_MAX;
-    dom->max_devicetree_size = XC_DOM_DECOMPRESS_MAX;
-
-    if ( cmdline )
-        dom->cmdline = xc_dom_strdup(dom, cmdline);
-    if ( features )
-        elf_xen_parse_features(features, dom->f_requested, NULL);
-
-    dom->parms.virt_base = UNSET_ADDR;
-    dom->parms.virt_entry = UNSET_ADDR;
-    dom->parms.virt_hypercall = UNSET_ADDR;
-    dom->parms.virt_hv_start_low = UNSET_ADDR;
-    dom->parms.elf_paddr_offset = UNSET_ADDR;
-    dom->parms.p2m_base = UNSET_ADDR;
-
-    dom->flags = SIF_VIRT_P2M_4TOOLS;
-
-    dom->alloc_malloc += sizeof(*dom);
-    return dom;
-
- err:
-    if ( dom )
-        xc_dom_release(dom);
-    return NULL;
-}
-
-int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz)
-{
-    DOMPRINTF("%s: kernel_max_size=%zx", __FUNCTION__, sz);
-    dom->max_kernel_size = sz;
-    return 0;
-}
-
-int xc_dom_module_max_size(struct xc_dom_image *dom, size_t sz)
-{
-    DOMPRINTF("%s: module_max_size=%zx", __FUNCTION__, sz);
-    dom->max_module_size = sz;
-    return 0;
-}
-
-int xc_dom_devicetree_max_size(struct xc_dom_image *dom, size_t sz)
-{
-    DOMPRINTF("%s: devicetree_max_size=%zx", __FUNCTION__, sz);
-    dom->max_devicetree_size = sz;
-    return 0;
-}
-
-int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename)
-{
-    DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename);
-    dom->kernel_blob = xc_dom_malloc_filemap(dom, filename, &dom->kernel_size,
-                                             dom->max_kernel_size);
-    if ( dom->kernel_blob == NULL )
-        return -1;
-    return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
-}
-
-int xc_dom_module_file(struct xc_dom_image *dom, const char *filename, const char *cmdline)
-{
-    unsigned int mod = dom->num_modules++;
-
-    DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename);
-    dom->modules[mod].blob =
-        xc_dom_malloc_filemap(dom, filename, &dom->modules[mod].size,
-                              dom->max_module_size);
-
-    if ( dom->modules[mod].blob == NULL )
-        return -1;
-
-    if ( cmdline )
-    {
-        dom->modules[mod].cmdline = xc_dom_strdup(dom, cmdline);
-
-        if ( dom->modules[mod].cmdline == NULL )
-            return -1;
-    }
-    else
-    {
-        dom->modules[mod].cmdline = NULL;
-    }
-
-    return 0;
-}
-
-int xc_dom_devicetree_file(struct xc_dom_image *dom, const char *filename)
-{
-#if defined (__arm__) || defined(__aarch64__)
-    DOMPRINTF("%s: filename=\"%s\"", __FUNCTION__, filename);
-    dom->devicetree_blob =
-        xc_dom_malloc_filemap(dom, filename, &dom->devicetree_size,
-                              dom->max_devicetree_size);
-
-    if ( dom->devicetree_blob == NULL )
-        return -1;
-    return 0;
-#else
-    errno = -EINVAL;
-    return -1;
-#endif
-}
-
-int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem, size_t memsize)
-{
-    DOMPRINTF_CALLED(dom->xch);
-    dom->kernel_blob = (void *)mem;
-    dom->kernel_size = memsize;
-    return xc_dom_try_gunzip(dom, &dom->kernel_blob, &dom->kernel_size);
-}
-
-int xc_dom_module_mem(struct xc_dom_image *dom, const void *mem,
-                      size_t memsize, const char *cmdline)
-{
-    unsigned int mod = dom->num_modules++;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    dom->modules[mod].blob = (void *)mem;
-    dom->modules[mod].size = memsize;
-
-    if ( cmdline )
-    {
-        dom->modules[mod].cmdline = xc_dom_strdup(dom, cmdline);
-
-        if ( dom->modules[mod].cmdline == NULL )
-            return -1;
-    }
-    else
-    {
-        dom->modules[mod].cmdline = NULL;
-    }
-
-    return 0;
-}
-
-int xc_dom_devicetree_mem(struct xc_dom_image *dom, const void *mem,
-                          size_t memsize)
-{
-    DOMPRINTF_CALLED(dom->xch);
-    dom->devicetree_blob = (void *)mem;
-    dom->devicetree_size = memsize;
-    return 0;
-}
-
-int xc_dom_parse_image(struct xc_dom_image *dom)
-{
-    int i;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    /* parse kernel image */
-    dom->kernel_loader = xc_dom_find_loader(dom);
-    if ( dom->kernel_loader == NULL )
-        goto err;
-    if ( dom->kernel_loader->parser(dom) != 0 )
-        goto err;
-    if ( dom->guest_type == NULL )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: guest_type not set", __FUNCTION__);
-        goto err;
-    }
-
-    /* check features */
-    for ( i = 0; i < XENFEAT_NR_SUBMAPS; i++ )
-    {
-        dom->f_active[i] |= dom->f_requested[i]; /* cmd line */
-        dom->f_active[i] |= dom->parms.f_required[i]; /* kernel   */
-        if ( (dom->f_active[i] & dom->parms.f_supported[i]) !=
-             dom->f_active[i] )
-        {
-            xc_dom_panic(dom->xch, XC_INVALID_PARAM,
-                         "%s: unsupported feature requested", __FUNCTION__);
-            goto err;
-        }
-    }
-    return 0;
-
- err:
-    return -1;
-}
-
-int xc_dom_rambase_init(struct xc_dom_image *dom, uint64_t rambase)
-{
-    dom->rambase_pfn = rambase >> XC_PAGE_SHIFT;
-    dom->pfn_alloc_end = dom->rambase_pfn;
-    DOMPRINTF("%s: RAM starts at %"PRI_xen_pfn,
-              __FUNCTION__, dom->rambase_pfn);
-    return 0;
-}
-
-int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb)
-{
-    unsigned int page_shift;
-    xen_pfn_t nr_pages;
-
-    if ( xc_dom_set_arch_hooks(dom) )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, "%s: arch hooks not set",
-                     __FUNCTION__);
-        return -1;
-    }
-
-    page_shift = XC_DOM_PAGE_SHIFT(dom);
-    nr_pages = mem_mb << (20 - page_shift);
-
-    DOMPRINTF("%s: mem %d MB, pages 0x%" PRIpfn " pages, %dk each",
-               __FUNCTION__, mem_mb, nr_pages, 1 << (page_shift-10));
-    dom->total_pages = nr_pages;
-
-    DOMPRINTF("%s: 0x%" PRIpfn " pages",
-              __FUNCTION__, dom->total_pages);
-
-    return 0;
-}
-
-static int xc_dom_build_module(struct xc_dom_image *dom, unsigned int mod)
-{
-    size_t unziplen, modulelen;
-    void *modulemap;
-    char name[10];
-
-    if ( !dom->modules[mod].seg.vstart )
-        unziplen = xc_dom_check_gzip(dom->xch,
-                                     dom->modules[mod].blob, dom->modules[mod].size);
-    else
-        unziplen = 0;
-
-    modulelen = max(unziplen, dom->modules[mod].size);
-    if ( dom->max_module_size )
-    {
-        if ( unziplen && modulelen > dom->max_module_size )
-        {
-            modulelen = min(unziplen, dom->modules[mod].size);
-            if ( unziplen > modulelen )
-                unziplen = 0;
-        }
-        if ( modulelen > dom->max_module_size )
-        {
-            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                         "module %u image too large", mod);
-            goto err;
-        }
-    }
-
-    snprintf(name, sizeof(name), "module%u", mod);
-    if ( xc_dom_alloc_segment(dom, &dom->modules[mod].seg, name,
-                              dom->modules[mod].seg.vstart, modulelen) != 0 )
-        goto err;
-    modulemap = xc_dom_seg_to_ptr(dom, &dom->modules[mod].seg);
-    if ( modulemap == NULL )
-    {
-        DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->modules[%u].seg) => NULL",
-                  __FUNCTION__, mod);
-        goto err;
-    }
-    if ( unziplen )
-    {
-        if ( xc_dom_do_gunzip(dom->xch, dom->modules[mod].blob, dom->modules[mod].size,
-                              modulemap, unziplen) != -1 )
-            return 0;
-        if ( dom->modules[mod].size > modulelen )
-            goto err;
-    }
-
-    /* Fall back to handing over the raw blob. */
-    memcpy(modulemap, dom->modules[mod].blob, dom->modules[mod].size);
-    /* If an unzip attempt was made, the buffer may no longer be all zero. */
-    if ( unziplen > dom->modules[mod].size )
-        memset(modulemap + dom->modules[mod].size, 0,
-               unziplen - dom->modules[mod].size);
-
-    return 0;
-
- err:
-    return -1;
-}
-
-static int populate_acpi_pages(struct xc_dom_image *dom,
-                               xen_pfn_t *extents,
-                               unsigned int num_pages)
-{
-    int rc;
-    xc_interface *xch = dom->xch;
-    uint32_t domid = dom->guest_domid;
-    unsigned long idx;
-    unsigned long first_high_idx = 4UL << (30 - PAGE_SHIFT); /* 4GB */
-
-    for ( ; num_pages; num_pages--, extents++ )
-    {
-
-        if ( xc_domain_populate_physmap(xch, domid, 1, 0, 0, extents) == 1 )
-            continue;
-
-        if ( dom->highmem_end )
-        {
-            idx = --dom->highmem_end;
-            if ( idx == first_high_idx )
-                dom->highmem_end = 0;
-        }
-        else
-        {
-            idx = --dom->lowmem_end;
-        }
-
-        rc = xc_domain_add_to_physmap(xch, domid,
-                                      XENMAPSPACE_gmfn,
-                                      idx, *extents);
-        if ( rc )
-            return rc;
-    }
-
-    return 0;
-}
-
-static int xc_dom_load_acpi(struct xc_dom_image *dom)
-{
-    int j, i = 0;
-    unsigned num_pages;
-    xen_pfn_t *extents, base;
-    void *ptr;
-
-    while ( (i < MAX_ACPI_MODULES) && dom->acpi_modules[i].length )
-    {
-        DOMPRINTF("%s: %d bytes at address %" PRIx64, __FUNCTION__,
-                  dom->acpi_modules[i].length,
-                  dom->acpi_modules[i].guest_addr_out);
-
-        num_pages = (dom->acpi_modules[i].length +
-                     (dom->acpi_modules[i].guest_addr_out & ~XC_PAGE_MASK) +
-                     (XC_PAGE_SIZE - 1)) >> XC_PAGE_SHIFT;
-        extents = malloc(num_pages * sizeof(*extents));
-        if ( !extents )
-        {
-            DOMPRINTF("%s: Out of memory", __FUNCTION__);
-            goto err;
-        }
-
-        base = dom->acpi_modules[i].guest_addr_out >> XC_PAGE_SHIFT;
-        for ( j = 0; j < num_pages; j++ )
-            extents[j] = base + j;
-        if ( populate_acpi_pages(dom, extents, num_pages) )
-        {
-            DOMPRINTF("%s: Can populate ACPI pages", __FUNCTION__);
-            goto err;
-        }
-
-        ptr = xc_map_foreign_range(dom->xch, dom->guest_domid,
-                                   XC_PAGE_SIZE * num_pages,
-                                   PROT_READ | PROT_WRITE, base);
-        if ( !ptr )
-        {
-            DOMPRINTF("%s: Can't map %d pages at 0x%"PRI_xen_pfn,
-                      __FUNCTION__, num_pages, base);
-            goto err;
-        }
-
-        memcpy((uint8_t *)ptr +
-               (dom->acpi_modules[i].guest_addr_out & ~XC_PAGE_MASK),
-               dom->acpi_modules[i].data, dom->acpi_modules[i].length);
-        munmap(ptr, XC_PAGE_SIZE * num_pages);
-
-        free(extents);
-        i++;
-    }
-
-    return 0;
-
-err:
-    free(extents);
-    return -1;
-}
-
-int xc_dom_build_image(struct xc_dom_image *dom)
-{
-    unsigned int page_size;
-    bool unmapped_initrd;
-    unsigned int mod;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    /* check for arch hooks */
-    if ( dom->arch_hooks == NULL )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR, "%s: arch hooks not set",
-                     __FUNCTION__);
-        goto err;
-    }
-    page_size = XC_DOM_PAGE_SIZE(dom);
-    if ( dom->parms.virt_base != UNSET_ADDR )
-        dom->virt_alloc_end = dom->parms.virt_base;
-
-    /* load kernel */
-    if ( xc_dom_alloc_segment(dom, &dom->kernel_seg, "kernel",
-                              dom->kernel_seg.vstart,
-                              dom->kernel_seg.vend -
-                              dom->kernel_seg.vstart) != 0 )
-        goto err;
-    if ( dom->kernel_loader->loader(dom) != 0 )
-        goto err;
-
-    /* Don't load ramdisk / other modules now if no initial mapping required. */
-    for ( mod = 0; mod < dom->num_modules; mod++ )
-    {
-        unmapped_initrd = (dom->parms.unmapped_initrd &&
-                           !dom->modules[mod].seg.vstart);
-
-        if ( dom->modules[mod].blob && !unmapped_initrd )
-        {
-            if ( xc_dom_build_module(dom, mod) != 0 )
-                goto err;
-
-            if ( mod == 0 )
-            {
-                dom->initrd_start = dom->modules[mod].seg.vstart;
-                dom->initrd_len =
-                    dom->modules[mod].seg.vend - dom->modules[mod].seg.vstart;
-            }
-        }
-    }
-
-    /* load devicetree */
-    if ( dom->devicetree_blob )
-    {
-        void *devicetreemap;
-
-        if ( xc_dom_alloc_segment(dom, &dom->devicetree_seg, "devicetree",
-                                  dom->devicetree_seg.vstart,
-                                  dom->devicetree_size) != 0 )
-            goto err;
-        devicetreemap = xc_dom_seg_to_ptr(dom, &dom->devicetree_seg);
-        if ( devicetreemap == NULL )
-        {
-            DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &dom->devicetree_seg) => NULL",
-                      __FUNCTION__);
-            goto err;
-        }
-        memcpy(devicetreemap, dom->devicetree_blob, dom->devicetree_size);
-    }
-
-    /* load ACPI tables */
-    if ( xc_dom_load_acpi(dom) != 0 )
-        goto err;
-
-    /* allocate other pages */
-    if ( !dom->arch_hooks->p2m_base_supported ||
-         dom->parms.p2m_base >= dom->parms.virt_base ||
-         (dom->parms.p2m_base & (XC_DOM_PAGE_SIZE(dom) - 1)) )
-        dom->parms.p2m_base = UNSET_ADDR;
-    if ( dom->arch_hooks->alloc_p2m_list && dom->parms.p2m_base == UNSET_ADDR &&
-         dom->arch_hooks->alloc_p2m_list(dom) != 0 )
-        goto err;
-    if ( dom->arch_hooks->alloc_magic_pages(dom) != 0 )
-        goto err;
-    if ( dom->arch_hooks->alloc_pgtables &&
-         dom->arch_hooks->alloc_pgtables(dom) != 0 )
-        goto err;
-    if ( dom->alloc_bootstack )
-    {
-        dom->bootstack_pfn = xc_dom_alloc_page(dom, "boot stack");
-        if ( dom->bootstack_pfn == INVALID_PFN )
-            goto err;
-    }
-
-    DOMPRINTF("%-20s: virt_alloc_end : 0x%" PRIx64 "",
-              __FUNCTION__, dom->virt_alloc_end);
-    DOMPRINTF("%-20s: virt_pgtab_end : 0x%" PRIx64 "",
-              __FUNCTION__, dom->virt_pgtab_end);
-
-    /* Make sure all memory mapped by initial page tables is available */
-    if ( dom->virt_pgtab_end && xc_dom_alloc_pad(dom, dom->virt_pgtab_end) )
-        return -1;
-
-    for ( mod = 0; mod < dom->num_modules; mod++ )
-    {
-        unmapped_initrd = (dom->parms.unmapped_initrd &&
-                           !dom->modules[mod].seg.vstart);
-
-        /* Load ramdisk / other modules if no initial mapping required. */
-        if ( dom->modules[mod].blob && unmapped_initrd )
-        {
-            if ( xc_dom_build_module(dom, mod) != 0 )
-                goto err;
-
-            if ( mod == 0 )
-            {
-                dom->flags |= SIF_MOD_START_PFN;
-                dom->initrd_start = dom->modules[mod].seg.pfn;
-                dom->initrd_len = page_size * dom->modules[mod].seg.pages;
-            }
-        }
-    }
-
-    /* Allocate p2m list if outside of initial kernel mapping. */
-    if ( dom->arch_hooks->alloc_p2m_list && dom->parms.p2m_base != UNSET_ADDR )
-    {
-        if ( dom->arch_hooks->alloc_p2m_list(dom) != 0 )
-            goto err;
-        dom->p2m_seg.vstart = dom->parms.p2m_base;
-    }
-
-    return 0;
-
- err:
-    return -1;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_dom_decompress.h b/tools/libxc/xg_dom_decompress.h
deleted file mode 100644 (file)
index c5ab2e5..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __MINIOS__
-# include "xenctrl_dom.h"
-#else
-# include "xg_dom_decompress_unsafe.h"
-#endif
-
-int xc_try_lz4_decode(struct xc_dom_image *dom, void **blob, size_t *size);
-
diff --git a/tools/libxc/xg_dom_decompress_lz4.c b/tools/libxc/xg_dom_decompress_lz4.c
deleted file mode 100644 (file)
index 97ba620..0000000
+++ /dev/null
@@ -1,141 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <stdint.h>
-
-#include "xg_private.h"
-#include "xg_dom_decompress.h"
-
-#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-
-typedef uint8_t u8;
-typedef uint16_t u16;
-typedef uint32_t u32;
-typedef uint64_t u64;
-
-#define likely(a) a
-#define unlikely(a) a
-
-static inline uint_fast16_t le16_to_cpup(const unsigned char *buf)
-{
-    return buf[0] | (buf[1] << 8);
-}
-
-static inline uint_fast32_t le32_to_cpup(const unsigned char *buf)
-{
-    return le16_to_cpup(buf) | ((uint32_t)le16_to_cpup(buf + 2) << 16);
-}
-
-#include "../../xen/include/xen/lz4.h"
-#include "../../xen/common/decompress.h"
-
-#ifndef __MINIOS__
-
-#include "../../xen/common/lz4/decompress.c"
-
-#define ARCHIVE_MAGICNUMBER 0x184C2102
-
-int xc_try_lz4_decode(
-       struct xc_dom_image *dom, void **blob, size_t *psize)
-{
-       int ret = -1;
-       unsigned char *inp = *blob, *output, *outp;
-       ssize_t size = *psize - 4;
-       size_t out_len, dest_len, chunksize;
-       const char *msg;
-
-       if (size < 4) {
-               msg = "input too small";
-               goto exit_0;
-       }
-
-       out_len = get_unaligned_le32(inp + size);
-       if (xc_dom_kernel_check_size(dom, out_len)) {
-               msg = "Decompressed image too large";
-               goto exit_0;
-       }
-
-       output = malloc(out_len);
-       if (!output) {
-               msg = "Could not allocate output buffer";
-               goto exit_0;
-       }
-       outp = output;
-
-       chunksize = get_unaligned_le32(inp);
-       if (chunksize == ARCHIVE_MAGICNUMBER) {
-               inp += 4;
-               size -= 4;
-       } else {
-               msg = "invalid header";
-               goto exit_2;
-       }
-
-       for (;;) {
-               if (size < 4) {
-                       msg = "missing data";
-                       goto exit_2;
-               }
-               chunksize = get_unaligned_le32(inp);
-               if (chunksize == ARCHIVE_MAGICNUMBER) {
-                       inp += 4;
-                       size -= 4;
-                       continue;
-               }
-               inp += 4;
-               size -= 4;
-               if (chunksize > size) {
-                       msg = "insufficient input data";
-                       goto exit_2;
-               }
-
-               dest_len = out_len - (outp - output);
-               ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
-                               &dest_len);
-               if (ret < 0) {
-                       msg = "decoding failed";
-                       goto exit_2;
-               }
-
-               ret = -1;
-               outp += dest_len;
-               size -= chunksize;
-
-               if (size == 0)
-               {
-                       if ( xc_dom_register_external(dom, output, out_len) )
-                       {
-                               msg = "Error registering stream output";
-                               goto exit_2;
-                       }
-                       *blob = output;
-                       *psize = out_len;
-                       return 0;
-               }
-
-               if (size < 0) {
-                       msg = "data corrupted";
-                       goto exit_2;
-               }
-
-               inp += chunksize;
-       }
-
-exit_2:
-       free(output);
-exit_0:
-       DOMPRINTF("LZ4 decompression error: %s\n", msg);
-       return ret;
-}
-
-#else /* __MINIOS__ */
-
-#include "../../xen/common/unlz4.c"
-
-int xc_try_lz4_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    return xc_dom_decompress_unsafe(unlz4, dom, blob, size);
-}
-
-#endif
diff --git a/tools/libxc/xg_dom_decompress_unsafe.c b/tools/libxc/xg_dom_decompress_unsafe.c
deleted file mode 100644 (file)
index 21d9647..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "xg_private.h"
-#include "xg_dom_decompress_unsafe.h"
-
-static struct xc_dom_image *unsafe_dom;
-static unsigned char *output_blob;
-static unsigned int output_size;
-
-static void unsafe_error(const char *msg)
-{
-    xc_dom_panic(unsafe_dom->xch, XC_INVALID_KERNEL, "%s", msg);
-}
-
-static int unsafe_flush(void *src, unsigned int size)
-{
-    void *n = realloc(output_blob, output_size + size);
-    if (!n)
-        return -1;
-    output_blob = n;
-
-    memcpy(&output_blob[output_size], src, size);
-    output_size += size;
-    return size;
-}
-
-int xc_dom_decompress_unsafe(
-    decompress_fn fn, struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    int ret;
-
-    unsafe_dom = dom;
-    output_blob = NULL;
-    output_size = 0;
-
-    ret = fn(dom->kernel_blob, dom->kernel_size, NULL, unsafe_flush, NULL, NULL, unsafe_error);
-
-    if (ret)
-        free(output_blob);
-    else {
-        *blob = output_blob;
-        *size = output_size;
-    }
-
-    return ret;
-}
diff --git a/tools/libxc/xg_dom_decompress_unsafe.h b/tools/libxc/xg_dom_decompress_unsafe.h
deleted file mode 100644 (file)
index fb84b6a..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-#include "xenctrl_dom.h"
-
-typedef int decompress_fn(unsigned char *inbuf, unsigned int len,
-                          int (*fill)(void*, unsigned int),
-                          int (*flush)(void*, unsigned int),
-                          unsigned char *outbuf, unsigned int *posp,
-                          void (*error)(const char *x));
-
-int xc_dom_decompress_unsafe(
-    decompress_fn fn, struct xc_dom_image *dom, void **blob, size_t *size)
-    __attribute__((visibility("internal")));
-
-int xc_try_bzip2_decode(struct xc_dom_image *dom, void **blob, size_t *size)
-    __attribute__((visibility("internal")));
-int xc_try_lzma_decode(struct xc_dom_image *dom, void **blob, size_t *size)
-    __attribute__((visibility("internal")));
-int xc_try_lzo1x_decode(struct xc_dom_image *dom, void **blob, size_t *size)
-    __attribute__((visibility("internal")));
-int xc_try_xz_decode(struct xc_dom_image *dom, void **blob, size_t *size)
-    __attribute__((visibility("internal")));
diff --git a/tools/libxc/xg_dom_decompress_unsafe_bzip2.c b/tools/libxc/xg_dom_decompress_unsafe_bzip2.c
deleted file mode 100644 (file)
index 9d3709e..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "xg_private.h"
-#include "xg_dom_decompress_unsafe.h"
-
-#include "../../xen/common/bunzip2.c"
-
-int xc_try_bzip2_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    return xc_dom_decompress_unsafe(bunzip2, dom, blob, size);
-}
diff --git a/tools/libxc/xg_dom_decompress_unsafe_lzma.c b/tools/libxc/xg_dom_decompress_unsafe_lzma.c
deleted file mode 100644 (file)
index 5d178f0..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-
-#include "xg_private.h"
-#include "xg_dom_decompress_unsafe.h"
-
-#include "../../xen/common/unlzma.c"
-
-int xc_try_lzma_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    return xc_dom_decompress_unsafe(unlzma, dom, blob, size);
-}
diff --git a/tools/libxc/xg_dom_decompress_unsafe_lzo1x.c b/tools/libxc/xg_dom_decompress_unsafe_lzo1x.c
deleted file mode 100644 (file)
index a4f8ebd..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <endian.h>
-#include <stdint.h>
-
-#include "xg_private.h"
-#include "xg_dom_decompress_unsafe.h"
-
-typedef uint8_t u8;
-typedef uint32_t u32;
-typedef uint16_t u16;
-typedef uint64_t u64;
-
-#define likely(a) a
-#define noinline
-#define unlikely(a) a
-
-static inline u16 be16_to_cpup(const u16 *p)
-{
-       u16 v = *p;
-#if BYTE_ORDER == LITTLE_ENDIAN
-       return (((v & 0x00ffU) << 8) |
-                ((v & 0xff00U) >> 8));
-#else
-       return v;
-#endif
-}
-
-static inline u32 be32_to_cpup(const u32 *p)
-{
-       u32 v = *p;
-#if BYTE_ORDER == LITTLE_ENDIAN
-       return (((v & 0x000000ffUL) << 24) |
-                ((v & 0x0000ff00UL) <<  8) |
-                ((v & 0x00ff0000UL) >>  8) |
-                ((v & 0xff000000UL) >> 24));
-#else
-       return v;
-#endif
-}
-
-#include "../../xen/common/lzo.c"
-#include "../../xen/common/unlzo.c"
-
-int xc_try_lzo1x_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    return xc_dom_decompress_unsafe(unlzo, dom, blob, size);
-}
diff --git a/tools/libxc/xg_dom_decompress_unsafe_xz.c b/tools/libxc/xg_dom_decompress_unsafe_xz.c
deleted file mode 100644 (file)
index ff6824b..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-#include <stdio.h>
-#include <endian.h>
-#include <stdlib.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include "xg_private.h"
-#include "xg_dom_decompress_unsafe.h"
-
-// TODO
-#define XZ_DEC_X86
-
-typedef char bool_t;
-typedef uint8_t u8;
-typedef uint16_t u16;
-typedef uint32_t u32;
-typedef uint32_t __le32;
-
-static inline u32 cpu_to_le32(const u32 v)
-{
-#if BYTE_ORDER == BIG_ENDIAN
-       return (((v & 0x000000ffUL) << 24) |
-               ((v & 0x0000ff00UL) <<  8) |
-               ((v & 0x00ff0000UL) >>  8) |
-               ((v & 0xff000000UL) >> 24));
-#else
-       return v;
-#endif
-}
-
-static inline u32 le32_to_cpup(const u32 *p)
-{
-       return cpu_to_le32(*p);
-}
-
-#define __force
-#define always_inline
-
-#include "../../xen/common/unxz.c"
-
-int xc_try_xz_decode(
-    struct xc_dom_image *dom, void **blob, size_t *size)
-{
-    return xc_dom_decompress_unsafe(unxz, dom, blob, size);
-}
diff --git a/tools/libxc/xg_dom_elfloader.c b/tools/libxc/xg_dom_elfloader.c
deleted file mode 100644 (file)
index 7043c3b..0000000
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Xen domain builder -- ELF bits.
- *
- * Parse and load ELF kernel images.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- *
- * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <inttypes.h>
-
-#include "xg_private.h"
-#include "xenctrl_dom.h"
-#include "xc_bitops.h"
-
-#define XEN_VER "xen-3.0"
-
-/* ------------------------------------------------------------------------ */
-
-static void log_callback(struct elf_binary *elf, void *caller_data,
-                         bool iserr, const char *fmt, va_list al) {
-    xc_interface *xch = caller_data;
-
-    xc_reportv(xch,
-          xch->dombuild_logger ? xch->dombuild_logger : xch->error_handler,
-                       iserr ? XTL_ERROR : XTL_DETAIL,
-                       iserr ? XC_INVALID_KERNEL : XC_ERROR_NONE,
-                       fmt, al);
-}
-
-void xc_elf_set_logfile(xc_interface *xch, struct elf_binary *elf,
-                        int verbose) {
-    elf_set_log(elf, log_callback, xch, verbose /* convert to bool */);
-}
-
-/* ------------------------------------------------------------------------ */
-
-static char *xc_dom_guest_type(struct xc_dom_image *dom,
-                               struct elf_binary *elf)
-{
-    uint64_t machine = elf_uval(elf, elf->ehdr, e_machine);
-
-    if ( dom->container_type == XC_DOM_HVM_CONTAINER &&
-         dom->parms.phys_entry != UNSET_ADDR32 )
-        return "hvm-3.0-x86_32";
-    if ( dom->container_type == XC_DOM_HVM_CONTAINER )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                     "%s: image not capable of booting inside a HVM container",
-                     __FUNCTION__);
-        return NULL;
-    }
-
-    switch ( machine )
-    {
-    case EM_386:
-        switch ( dom->parms.pae )
-        {
-        case XEN_PAE_BIMODAL:
-            if ( strstr(dom->xen_caps, "xen-3.0-x86_32p") )
-                return "xen-3.0-x86_32p";
-            return "xen-3.0-x86_32";
-        case XEN_PAE_EXTCR3:
-        case XEN_PAE_YES:
-            return "xen-3.0-x86_32p";
-        case XEN_PAE_NO:
-        default:
-            return "xen-3.0-x86_32";
-        }
-    case EM_X86_64:
-        return "xen-3.0-x86_64";
-    default:
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                     "%s: unknown image type %"PRIu64,
-                     __FUNCTION__, machine);
-        return NULL;
-    }
-}
-
-/* ------------------------------------------------------------------------ */
-/* parse elf binary                                                         */
-
-static elf_negerrnoval check_elf_kernel(struct xc_dom_image *dom, bool verbose)
-{
-    if ( dom->kernel_blob == NULL )
-    {
-        if ( verbose )
-            xc_dom_panic(dom->xch,
-                         XC_INTERNAL_ERROR, "%s: no kernel image loaded",
-                         __FUNCTION__);
-        return -EINVAL;
-    }
-
-    if ( !elf_is_elfbinary(dom->kernel_blob, dom->kernel_size) )
-    {
-        if ( verbose )
-            xc_dom_panic(dom->xch,
-                         XC_INVALID_KERNEL, "%s: kernel is not an ELF image",
-                         __FUNCTION__);
-        return -EINVAL;
-    }
-    return 0;
-}
-
-static elf_negerrnoval xc_dom_probe_elf_kernel(struct xc_dom_image *dom)
-{
-    struct elf_binary elf;
-    int rc;
-
-    rc = check_elf_kernel(dom, 0);
-    if ( rc != 0 )
-        return rc;
-
-    rc = elf_init(&elf, dom->kernel_blob, dom->kernel_size);
-    if ( rc != 0 )
-        return rc;
-
-    /*
-     * We need to check that it contains Xen ELFNOTES,
-     * or else we might be trying to load a plain ELF.
-     */
-    elf_parse_binary(&elf);
-    rc = elf_xen_parse(&elf, &dom->parms);
-    if ( rc != 0 )
-        return rc;
-
-    return 0;
-}
-
-static elf_negerrnoval xc_dom_parse_elf_kernel(struct xc_dom_image *dom)
-{
-    struct elf_binary *elf;
-    elf_negerrnoval rc;
-
-    rc = check_elf_kernel(dom, 1);
-    if ( rc != 0 )
-        return rc;
-
-    elf = xc_dom_malloc(dom, sizeof(*elf));
-    if ( elf == NULL )
-        return -ENOMEM;
-    dom->private_loader = elf;
-    rc = elf_init(elf, dom->kernel_blob, dom->kernel_size) != 0 ? -EINVAL : 0;
-    xc_elf_set_logfile(dom->xch, elf, 1);
-    if ( rc != 0 )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: corrupted ELF image",
-                     __FUNCTION__);
-        return rc;
-    }
-
-    /* parse binary and get xen meta info */
-    elf_parse_binary(elf);
-    if ( elf_xen_parse(elf, &dom->parms) != 0 )
-    {
-        rc = -EINVAL;
-        goto out;
-    }
-
-    if ( elf_xen_feature_get(XENFEAT_dom0, dom->parms.f_required) )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: Kernel does not"
-                     " support unprivileged (DomU) operation", __FUNCTION__);
-        rc = -EINVAL;
-        goto out;
-    }
-
-    /* find kernel segment */
-    dom->kernel_seg.vstart = dom->parms.virt_kstart;
-    dom->kernel_seg.vend   = dom->parms.virt_kend;
-
-    dom->guest_type = xc_dom_guest_type(dom, elf);
-    if ( dom->guest_type == NULL )
-        return -EINVAL;
-    DOMPRINTF("%s: %s: 0x%" PRIx64 " -> 0x%" PRIx64 "",
-              __FUNCTION__, dom->guest_type,
-              dom->kernel_seg.vstart, dom->kernel_seg.vend);
-    rc = 0;
-out:
-    if ( elf_check_broken(elf) )
-        DOMPRINTF("%s: ELF broken: %s", __FUNCTION__,
-                  elf_check_broken(elf));
-
-    return rc;
-}
-
-static elf_errorstatus xc_dom_load_elf_kernel(struct xc_dom_image *dom)
-{
-    struct elf_binary *elf = dom->private_loader;
-    elf_errorstatus rc;
-    xen_pfn_t pages;
-
-    elf->dest_base = xc_dom_seg_to_ptr_pages(dom, &dom->kernel_seg, &pages);
-    if ( elf->dest_base == NULL )
-    {
-        DOMPRINTF("%s: xc_dom_vaddr_to_ptr(dom,dom->kernel_seg)"
-                  " => NULL", __FUNCTION__);
-        return -1;
-    }
-    elf->dest_size = pages * XC_DOM_PAGE_SIZE(dom);
-
-    rc = elf_load_binary(elf);
-    if ( rc < 0 )
-    {
-        DOMPRINTF("%s: failed to load elf binary", __FUNCTION__);
-        return rc;
-    }
-    return 0;
-}
-
-/* ------------------------------------------------------------------------ */
-
-struct xc_dom_loader elf_loader = {
-    .name = "ELF-generic",
-    .probe = xc_dom_probe_elf_kernel,
-    .parser = xc_dom_parse_elf_kernel,
-    .loader = xc_dom_load_elf_kernel,
-};
-
-static void __init register_loader(void)
-{
-    xc_dom_register_loader(&elf_loader);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_dom_hvmloader.c b/tools/libxc/xg_dom_hvmloader.c
deleted file mode 100644 (file)
index 995a0f3..0000000
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Xen domain builder -- HVM specific bits.
- *
- * Parse and load ELF firmware images for HVM domains.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <inttypes.h>
-#include <assert.h>
-
-#include "xg_private.h"
-#include "xenctrl_dom.h"
-#include "xc_bitops.h"
-
-/* ------------------------------------------------------------------------ */
-/* parse elf binary                                                         */
-
-static elf_negerrnoval check_elf_kernel(struct xc_dom_image *dom, bool verbose)
-{
-    if ( dom->kernel_blob == NULL )
-    {
-        if ( verbose )
-            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                         "%s: no kernel image loaded", __func__);
-        return -EINVAL;
-    }
-
-    if ( !elf_is_elfbinary(dom->kernel_blob, dom->kernel_size) )
-    {
-        if ( verbose )
-            xc_dom_panic(dom->xch, XC_INVALID_KERNEL,
-                         "%s: kernel is not an ELF image", __func__);
-        return -EINVAL;
-    }
-    return 0;
-}
-
-static elf_negerrnoval xc_dom_probe_hvm_kernel(struct xc_dom_image *dom)
-{
-    struct elf_binary elf;
-    int rc;
-
-    /* This loader is designed for HVM guest firmware. */
-    if ( dom->container_type != XC_DOM_HVM_CONTAINER )
-        return -EINVAL;
-
-    rc = check_elf_kernel(dom, 0);
-    if ( rc != 0 )
-        return rc;
-
-    rc = elf_init(&elf, dom->kernel_blob, dom->kernel_size);
-    if ( rc != 0 )
-        return rc;
-
-    /*
-     * We need to check that there are no Xen ELFNOTES, or
-     * else we might be trying to load a PV kernel.
-     */
-    elf_parse_binary(&elf);
-    rc = elf_xen_parse(&elf, &dom->parms);
-    if ( rc == 0 )
-        return -EINVAL;
-
-    return 0;
-}
-
-static elf_errorstatus xc_dom_parse_hvm_kernel(struct xc_dom_image *dom)
-    /*
-     * This function sometimes returns -1 for error and sometimes
-     * an errno value.  ?!?!
-     */
-{
-    struct elf_binary *elf;
-    elf_errorstatus rc;
-
-    rc = check_elf_kernel(dom, 1);
-    if ( rc != 0 )
-        return rc;
-
-    elf = xc_dom_malloc(dom, sizeof(*elf));
-    if ( elf == NULL )
-        return -1;
-    dom->private_loader = elf;
-    rc = elf_init(elf, dom->kernel_blob, dom->kernel_size);
-    xc_elf_set_logfile(dom->xch, elf, 1);
-    if ( rc != 0 )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: corrupted ELF image",
-                     __func__);
-        return rc;
-    }
-
-    if ( !elf_32bit(elf) )
-    {
-        xc_dom_panic(dom->xch, XC_INVALID_KERNEL, "%s: ELF image is not 32bit",
-                     __func__);
-        return -EINVAL;
-    }
-
-    /* parse binary and get xen meta info */
-    elf_parse_binary(elf);
-
-    /* find kernel segment */
-    dom->kernel_seg.vstart = elf->pstart;
-    dom->kernel_seg.vend   = elf->pend;
-
-    dom->guest_type = "hvm-3.0-x86_32";
-
-    if ( elf_check_broken(elf) )
-        DOMPRINTF("%s: ELF broken: %s", __func__, elf_check_broken(elf));
-
-    return rc;
-}
-
-static int module_init_one(struct xc_dom_image *dom,
-                           struct xc_hvm_firmware_module *module,
-                           char *name)
-{
-    struct xc_dom_seg seg;
-    void *dest;
-
-    if ( module->length && !module->guest_addr_out )
-    {
-        if ( xc_dom_alloc_segment(dom, &seg, name, 0, module->length) )
-            goto err;
-        dest = xc_dom_seg_to_ptr(dom, &seg);
-        if ( dest == NULL )
-        {
-            DOMPRINTF("%s: xc_dom_seg_to_ptr(dom, &seg) => NULL",
-                      __FUNCTION__);
-            goto err;
-        }
-        memcpy(dest, module->data, module->length);
-        module->guest_addr_out = seg.vstart;
-
-        assert(dom->mmio_start > 0 && dom->mmio_start < UINT32_MAX);
-        if ( module->guest_addr_out > dom->mmio_start ||
-             module->guest_addr_out + module->length > dom->mmio_start )
-        {
-            DOMPRINTF("%s: Module %s would be loaded abrove 4GB",
-                      __FUNCTION__, name);
-            goto err;
-        }
-    }
-
-    return 0;
-err:
-    return -1;
-}
-
-static int modules_init(struct xc_dom_image *dom)
-{
-    int rc;
-
-    rc = module_init_one(dom, &dom->system_firmware_module,
-                         "System Firmware module");
-    if ( rc ) goto err;
-    /* Only one module can be added */
-    rc = module_init_one(dom, &dom->acpi_modules[0], "ACPI module");
-    if ( rc ) goto err;
-    rc = module_init_one(dom, &dom->smbios_module, "SMBIOS module");
-    if ( rc ) goto err;
-
-    return 0;
-err:
-    return -1;
-}
-
-static elf_errorstatus xc_dom_load_hvm_kernel(struct xc_dom_image *dom)
-{
-    struct elf_binary *elf = dom->private_loader;
-    privcmd_mmap_entry_t *entries = NULL;
-    size_t pages = (elf->pend - elf->pstart + PAGE_SIZE - 1) >> PAGE_SHIFT;
-    elf_errorstatus rc;
-    int i;
-
-    /* Map address space for initial elf image. */
-    entries = calloc(pages, sizeof(privcmd_mmap_entry_t));
-    if ( entries == NULL )
-        return -ENOMEM;
-
-    for ( i = 0; i < pages; i++ )
-        entries[i].mfn = (elf->pstart >> PAGE_SHIFT) + i;
-
-    elf->dest_base = xc_map_foreign_ranges(
-        dom->xch, dom->guest_domid, pages << PAGE_SHIFT,
-        PROT_READ | PROT_WRITE, 1 << PAGE_SHIFT,
-        entries, pages);
-    if ( elf->dest_base == NULL )
-    {
-        DOMPRINTF("%s: unable to map guest memory space", __func__);
-        rc = -EFAULT;
-        goto error;
-    }
-
-    elf->dest_size = pages * XC_DOM_PAGE_SIZE(dom);
-
-    rc = elf_load_binary(elf);
-    if ( rc < 0 )
-    {
-        DOMPRINTF("%s: failed to load elf binary", __func__);
-        goto error;
-    }
-
-    munmap(elf->dest_base, elf->dest_size);
-
-    rc = modules_init(dom);
-    if ( rc != 0 )
-    {
-        DOMPRINTF("%s: unable to load modules.", __func__);
-        goto error;
-    }
-
-    dom->parms.phys_entry = elf_uval(elf, elf->ehdr, e_entry);
-
-    free(entries);
-    return 0;
-
- error:
-    assert(rc != 0);
-    free(entries);
-    return rc;
-}
-
-/* ------------------------------------------------------------------------ */
-
-struct xc_dom_loader hvm_loader = {
-    .name = "HVM-generic",
-    .probe = xc_dom_probe_hvm_kernel,
-    .parser = xc_dom_parse_hvm_kernel,
-    .loader = xc_dom_load_hvm_kernel,
-};
-
-static void __init register_loader(void)
-{
-    xc_dom_register_loader(&hvm_loader);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_dom_x86.c b/tools/libxc/xg_dom_x86.c
deleted file mode 100644 (file)
index 842dbcc..0000000
+++ /dev/null
@@ -1,1945 +0,0 @@
-/*
- * Xen domain builder -- i386 and x86_64 bits.
- *
- * Most architecture-specific code for x86 goes here.
- *   - prepare page tables.
- *   - fill architecture-specific structs.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- *
- * written 2006 by Gerd Hoffmann <kraxel@suse.de>.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <inttypes.h>
-#include <assert.h>
-
-#include <xen/xen.h>
-#include <xen/foreign/x86_32.h>
-#include <xen/foreign/x86_64.h>
-#include <xen/hvm/hvm_info_table.h>
-#include <xen/arch-x86/hvm/start_info.h>
-#include <xen/io/protocols.h>
-
-#include <xen-tools/libs.h>
-
-#include "xg_private.h"
-#include "xenctrl_dom.h"
-#include "xenctrl.h"
-
-/* ------------------------------------------------------------------------ */
-
-#define SUPERPAGE_BATCH_SIZE 512
-
-#define SUPERPAGE_2MB_SHIFT   9
-#define SUPERPAGE_2MB_NR_PFNS (1UL << SUPERPAGE_2MB_SHIFT)
-#define SUPERPAGE_1GB_SHIFT   18
-#define SUPERPAGE_1GB_NR_PFNS (1UL << SUPERPAGE_1GB_SHIFT)
-
-#define X86_CR0_PE 0x01
-#define X86_CR0_ET 0x10
-
-#define X86_DR6_DEFAULT 0xffff0ff0u
-#define X86_DR7_DEFAULT 0x00000400u
-
-#define MTRR_TYPE_WRBACK     6
-#define MTRR_DEF_TYPE_ENABLE (1u << 11)
-
-#define SPECIALPAGE_PAGING   0
-#define SPECIALPAGE_ACCESS   1
-#define SPECIALPAGE_SHARING  2
-#define SPECIALPAGE_BUFIOREQ 3
-#define SPECIALPAGE_XENSTORE 4
-#define SPECIALPAGE_IOREQ    5
-#define SPECIALPAGE_IDENT_PT 6
-#define SPECIALPAGE_CONSOLE  7
-#define special_pfn(x) \
-    (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES + (x))
-
-#define NR_IOREQ_SERVER_PAGES 8
-#define ioreq_server_pfn(x) (special_pfn(0) - NR_IOREQ_SERVER_PAGES + (x))
-
-#define bits_to_mask(bits)       (((xen_vaddr_t)1 << (bits))-1)
-#define round_down(addr, mask)   ((addr) & ~(mask))
-#define round_up(addr, mask)     ((addr) | (mask))
-#define round_pg_up(addr)  (((addr) + PAGE_SIZE_X86 - 1) & ~(PAGE_SIZE_X86 - 1))
-
-#define HVMLOADER_MODULE_MAX_COUNT 2
-#define HVMLOADER_MODULE_CMDLINE_SIZE MAX_GUEST_CMDLINE
-
-struct xc_dom_params {
-    unsigned levels;
-    xen_vaddr_t vaddr_mask;
-    x86_pgentry_t lvl_prot[4];
-};
-
-struct xc_dom_x86_mapping_lvl {
-    xen_vaddr_t from;
-    xen_vaddr_t to;
-    xen_pfn_t pfn;
-    unsigned int pgtables;
-};
-
-struct xc_dom_x86_mapping {
-    struct xc_dom_x86_mapping_lvl area;
-    struct xc_dom_x86_mapping_lvl lvls[4];
-};
-
-struct xc_dom_image_x86 {
-    unsigned n_mappings;
-#define MAPPING_MAX 2
-    struct xc_dom_x86_mapping maps[MAPPING_MAX];
-    const struct xc_dom_params *params;
-
-    /* PV: Pointer to the in-guest P2M. */
-    void *p2m_guest;
-};
-
-/* get guest IO ABI protocol */
-const char *xc_domain_get_native_protocol(xc_interface *xch,
-                                          uint32_t domid)
-{
-    int ret;
-    uint32_t guest_width;
-    const char *protocol;
-
-    ret = xc_domain_get_guest_width(xch, domid, &guest_width);
-
-    if ( ret )
-        return NULL;
-
-    switch (guest_width) {
-    case 4: /* 32 bit guest */
-        protocol = XEN_IO_PROTO_ABI_X86_32;
-        break;
-    case 8: /* 64 bit guest */
-        protocol = XEN_IO_PROTO_ABI_X86_64;
-        break;
-    default:
-        protocol = NULL;
-    }
-
-    return protocol;
-}
-
-static int count_pgtables(struct xc_dom_image *dom, xen_vaddr_t from,
-                          xen_vaddr_t to, xen_pfn_t pfn)
-{
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-    struct xc_dom_x86_mapping *map, *map_cmp;
-    xen_pfn_t pfn_end;
-    xen_vaddr_t mask;
-    unsigned bits;
-    int l, m;
-
-    if ( domx86->n_mappings == MAPPING_MAX )
-    {
-        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
-                     "%s: too many mappings\n", __FUNCTION__);
-        return -ENOMEM;
-    }
-    map = domx86->maps + domx86->n_mappings;
-
-    pfn_end = pfn + ((to - from) >> PAGE_SHIFT_X86);
-    if ( pfn_end >= dom->p2m_size )
-    {
-        xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY,
-                     "%s: not enough memory for initial mapping (%#"PRIpfn" > %#"PRIpfn")",
-                     __FUNCTION__, pfn_end, dom->p2m_size);
-        return -ENOMEM;
-    }
-    for ( m = 0; m < domx86->n_mappings; m++ )
-    {
-        map_cmp = domx86->maps + m;
-        if ( from < map_cmp->area.to && to > map_cmp->area.from )
-        {
-            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                         "%s: overlapping mappings\n", __FUNCTION__);
-            return -EINVAL;
-        }
-    }
-
-    memset(map, 0, sizeof(*map));
-    map->area.from = from & domx86->params->vaddr_mask;
-    map->area.to = to & domx86->params->vaddr_mask;
-
-    for ( l = domx86->params->levels - 1; l >= 0; l-- )
-    {
-        map->lvls[l].pfn = dom->pfn_alloc_end + map->area.pgtables;
-        if ( l == domx86->params->levels - 1 )
-        {
-            /* Top level page table in first mapping only. */
-            if ( domx86->n_mappings == 0 )
-            {
-                map->lvls[l].from = 0;
-                map->lvls[l].to = domx86->params->vaddr_mask;
-                map->lvls[l].pgtables = 1;
-                map->area.pgtables++;
-            }
-            continue;
-        }
-
-        bits = PAGE_SHIFT_X86 + (l + 1) * PGTBL_LEVEL_SHIFT_X86;
-        mask = bits_to_mask(bits);
-        map->lvls[l].from = map->area.from & ~mask;
-        map->lvls[l].to = map->area.to | mask;
-
-        if ( domx86->params->levels == PGTBL_LEVELS_I386 &&
-             domx86->n_mappings == 0 && to < 0xc0000000 && l == 1 )
-        {
-            DOMPRINTF("%s: PAE: extra l2 page table for l3#3", __FUNCTION__);
-            map->lvls[l].to = domx86->params->vaddr_mask;
-        }
-
-        for ( m = 0; m < domx86->n_mappings; m++ )
-        {
-            map_cmp = domx86->maps + m;
-            if ( map_cmp->lvls[l].from == map_cmp->lvls[l].to )
-                continue;
-            if ( map->lvls[l].from >= map_cmp->lvls[l].from &&
-                 map->lvls[l].to <= map_cmp->lvls[l].to )
-            {
-                map->lvls[l].from = 0;
-                map->lvls[l].to = 0;
-                break;
-            }
-            assert(map->lvls[l].from >= map_cmp->lvls[l].from ||
-                   map->lvls[l].to <= map_cmp->lvls[l].to);
-            if ( map->lvls[l].from >= map_cmp->lvls[l].from &&
-                 map->lvls[l].from <= map_cmp->lvls[l].to )
-                map->lvls[l].from = map_cmp->lvls[l].to + 1;
-            if ( map->lvls[l].to >= map_cmp->lvls[l].from &&
-                 map->lvls[l].to <= map_cmp->lvls[l].to )
-                map->lvls[l].to = map_cmp->lvls[l].from - 1;
-        }
-        if ( map->lvls[l].from < map->lvls[l].to )
-            map->lvls[l].pgtables =
-                ((map->lvls[l].to - map->lvls[l].from) >> bits) + 1;
-        DOMPRINTF("%s: 0x%016" PRIx64 "/%d: 0x%016" PRIx64 " -> 0x%016" PRIx64
-                  ", %d table(s)", __FUNCTION__, mask, bits,
-                  map->lvls[l].from, map->lvls[l].to, map->lvls[l].pgtables);
-        map->area.pgtables += map->lvls[l].pgtables;
-    }
-
-    return 0;
-}
-
-static int alloc_pgtables_pv(struct xc_dom_image *dom)
-{
-    int pages, extra_pages;
-    xen_vaddr_t try_virt_end;
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-    struct xc_dom_x86_mapping *map = domx86->maps + domx86->n_mappings;
-
-    extra_pages = dom->alloc_bootstack ? 1 : 0;
-    extra_pages += (512 * 1024) / PAGE_SIZE_X86; /* 512kB padding */
-    pages = extra_pages;
-    for ( ; ; )
-    {
-        try_virt_end = round_up(dom->virt_alloc_end + pages * PAGE_SIZE_X86,
-                                bits_to_mask(22)); /* 4MB alignment */
-
-        if ( count_pgtables(dom, dom->parms.virt_base, try_virt_end, 0) )
-            return -1;
-
-        pages = map->area.pgtables + extra_pages;
-        if ( dom->virt_alloc_end + pages * PAGE_SIZE_X86 <= try_virt_end + 1 )
-            break;
-    }
-    map->area.pfn = 0;
-    domx86->n_mappings++;
-    dom->virt_pgtab_end = try_virt_end + 1;
-
-    return xc_dom_alloc_segment(dom, &dom->pgtables_seg, "page tables", 0,
-                                map->area.pgtables * PAGE_SIZE_X86);
-}
-
-/* ------------------------------------------------------------------------ */
-/* i386 pagetables                                                          */
-
-static int alloc_pgtables_x86_32_pae(struct xc_dom_image *dom)
-{
-    static const struct xc_dom_params x86_32_params = {
-        .levels = PGTBL_LEVELS_I386,
-        .vaddr_mask = bits_to_mask(VIRT_BITS_I386),
-        .lvl_prot[0] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED,
-        /*
-         * 64bit Xen runs 32bit PV guests with the PAE entries in an L3
-         * pagetable.  They don't behave exactly like native PAE paging.
-         */
-        .lvl_prot[1 ... 2] =
-            _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER,
-    };
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-
-    domx86->params = &x86_32_params;
-
-    return alloc_pgtables_pv(dom);
-}
-
-#define pfn_to_paddr(pfn) ((xen_paddr_t)(pfn) << PAGE_SHIFT_X86)
-#define pgentry_to_pfn(entry) ((xen_pfn_t)((entry) >> PAGE_SHIFT_X86))
-
-/*
- * Move the l3 page table page below 4G for guests which do not
- * support the extended-cr3 format.  The l3 is currently empty so we
- * do not need to preserve the current contents.
- */
-static xen_pfn_t move_l3_below_4G(struct xc_dom_image *dom,
-                                  xen_pfn_t l3pfn,
-                                  xen_pfn_t l3mfn)
-{
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-    uint32_t *p2m_guest = domx86->p2m_guest;
-    xen_pfn_t new_l3mfn;
-    struct xc_mmu *mmu;
-    void *l3tab;
-
-    mmu = xc_alloc_mmu_updates(dom->xch, dom->guest_domid);
-    if ( mmu == NULL )
-    {
-        DOMPRINTF("%s: failed at %d", __FUNCTION__, __LINE__);
-        return l3mfn;
-    }
-
-    xc_dom_unmap_one(dom, l3pfn);
-
-    new_l3mfn = xc_make_page_below_4G(dom->xch, dom->guest_domid, l3mfn);
-    if ( !new_l3mfn )
-        goto out;
-
-    p2m_guest[l3pfn] = dom->pv_p2m[l3pfn] = new_l3mfn;
-
-    if ( xc_add_mmu_update(dom->xch, mmu,
-                           (((unsigned long long)new_l3mfn)
-                            << XC_DOM_PAGE_SHIFT(dom)) |
-                           MMU_MACHPHYS_UPDATE, l3pfn) )
-        goto out;
-
-    if ( xc_flush_mmu_updates(dom->xch, mmu) )
-        goto out;
-
-    /*
-     * This ensures that the entire pgtables_seg is mapped by a single
-     * mmap region. arch_setup_bootlate() relies on this to be able to
-     * unmap and pin the pagetables.
-     */
-    if ( xc_dom_seg_to_ptr(dom, &dom->pgtables_seg) == NULL )
-        goto out;
-
-    l3tab = xc_dom_pfn_to_ptr(dom, l3pfn, 1);
-    if ( l3tab == NULL )
-    {
-        DOMPRINTF("%s: xc_dom_pfn_to_ptr(dom, l3pfn, 1) => NULL",
-                  __FUNCTION__);
-        goto out; /* our one call site will call xc_dom_panic and fail */
-    }
-    memset(l3tab, 0, XC_DOM_PAGE_SIZE(dom));
-
-    DOMPRINTF("%s: successfully relocated L3 below 4G. "
-              "(L3 PFN %#"PRIpfn" MFN %#"PRIpfn"=>%#"PRIpfn")",
-              __FUNCTION__, l3pfn, l3mfn, new_l3mfn);
-
-    l3mfn = new_l3mfn;
-
- out:
-    free(mmu);
-
-    return l3mfn;
-}
-
-static x86_pgentry_t *get_pg_table(struct xc_dom_image *dom, int m, int l)
-{
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-    struct xc_dom_x86_mapping *map;
-    x86_pgentry_t *pg;
-
-    map = domx86->maps + m;
-    pg = xc_dom_pfn_to_ptr(dom, map->lvls[l].pfn, 0);
-    if ( pg )
-        return pg;
-
-    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                 "%s: xc_dom_pfn_to_ptr failed", __FUNCTION__);
-    return NULL;
-}
-
-static x86_pgentry_t get_pg_prot(struct xc_dom_image *dom, int l, xen_pfn_t pfn)
-{
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-    struct xc_dom_x86_mapping *map;
-    xen_pfn_t pfn_s, pfn_e;
-    x86_pgentry_t prot;
-    unsigned m;
-
-    prot = domx86->params->lvl_prot[l];
-    if ( l > 0 )
-        return prot;
-
-    for ( m = 0; m < domx86->n_mappings; m++ )
-    {
-        map = domx86->maps + m;
-        pfn_s = map->lvls[domx86->params->levels - 1].pfn;
-        pfn_e = map->area.pgtables + pfn_s;
-        if ( pfn >= pfn_s && pfn < pfn_e )
-            return prot & ~_PAGE_RW;
-    }
-
-    return prot;
-}
-
-static int setup_pgtables_pv(struct xc_dom_image *dom)
-{
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-    struct xc_dom_x86_mapping *map1, *map2;
-    struct xc_dom_x86_mapping_lvl *lvl;
-    xen_vaddr_t from, to;
-    xen_pfn_t pfn, p, p_s, p_e;
-    x86_pgentry_t *pg;
-    unsigned m1, m2;
-    int l;
-
-    for ( l = domx86->params->levels - 1; l >= 0; l-- )
-        for ( m1 = 0; m1 < domx86->n_mappings; m1++ )
-        {
-            map1 = domx86->maps + m1;
-            from = map1->lvls[l].from;
-            to = map1->lvls[l].to;
-            pg = get_pg_table(dom, m1, l);
-            if ( !pg )
-                return -1;
-            for ( m2 = 0; m2 < domx86->n_mappings; m2++ )
-            {
-                map2 = domx86->maps + m2;
-                lvl = (l > 0) ? map2->lvls + l - 1 : &map2->area;
-                if ( l > 0 && lvl->pgtables == 0 )
-                    continue;
-                if ( lvl->from >= to || lvl->to <= from )
-                    continue;
-                p_s = (max(from, lvl->from) - from) >>
-                      (PAGE_SHIFT_X86 + l * PGTBL_LEVEL_SHIFT_X86);
-                p_e = (min(to, lvl->to) - from) >>
-                      (PAGE_SHIFT_X86 + l * PGTBL_LEVEL_SHIFT_X86);
-                pfn = ((max(from, lvl->from) - lvl->from) >>
-                      (PAGE_SHIFT_X86 + l * PGTBL_LEVEL_SHIFT_X86)) + lvl->pfn;
-                for ( p = p_s; p <= p_e; p++ )
-                {
-                    pg[p] = pfn_to_paddr(xc_dom_p2m(dom, pfn)) |
-                            get_pg_prot(dom, l, pfn);
-                    pfn++;
-                }
-            }
-        }
-
-    return 0;
-}
-
-static int setup_pgtables_x86_32_pae(struct xc_dom_image *dom)
-{
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-    uint32_t *p2m_guest = domx86->p2m_guest;
-    xen_pfn_t l3mfn, l3pfn, i;
-
-    /* Copy dom->pv_p2m[] into the guest. */
-    for ( i = 0; i < dom->p2m_size; ++i )
-    {
-        if ( dom->pv_p2m[i] != INVALID_PFN )
-            p2m_guest[i] = dom->pv_p2m[i];
-        else
-            p2m_guest[i] = -1;
-    }
-
-    l3pfn = domx86->maps[0].lvls[2].pfn;
-    l3mfn = xc_dom_p2m(dom, l3pfn);
-    if ( dom->parms.pae == XEN_PAE_YES )
-    {
-        if ( l3mfn >= 0x100000 )
-            l3mfn = move_l3_below_4G(dom, l3pfn, l3mfn);
-
-        if ( l3mfn >= 0x100000 )
-        {
-            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,"%s: cannot move L3"
-                         " below 4G. extended-cr3 not supported by guest. "
-                         "(L3 PFN %#"PRIpfn" MFN %#"PRIpfn")",
-                         __FUNCTION__, l3pfn, l3mfn);
-            return -EINVAL;
-        }
-    }
-
-    return setup_pgtables_pv(dom);
-}
-
-/* ------------------------------------------------------------------------ */
-/* x86_64 pagetables                                                        */
-
-static int alloc_pgtables_x86_64(struct xc_dom_image *dom)
-{
-    const static struct xc_dom_params x86_64_params = {
-        .levels = PGTBL_LEVELS_X86_64,
-        .vaddr_mask = bits_to_mask(VIRT_BITS_X86_64),
-        .lvl_prot[0] = _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED,
-        .lvl_prot[1 ... 3] =
-            _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER,
-    };
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-
-    domx86->params = &x86_64_params;
-
-    return alloc_pgtables_pv(dom);
-}
-
-static int setup_pgtables_x86_64(struct xc_dom_image *dom)
-{
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-    uint64_t *p2m_guest = domx86->p2m_guest;
-    xen_pfn_t i;
-
-    /* Copy dom->pv_p2m[] into the guest. */
-    for ( i = 0; i < dom->p2m_size; ++i )
-    {
-        if ( dom->pv_p2m[i] != INVALID_PFN )
-            p2m_guest[i] = dom->pv_p2m[i];
-        else
-            p2m_guest[i] = -1;
-    }
-
-    return setup_pgtables_pv(dom);
-}
-
-/* ------------------------------------------------------------------------ */
-
-static int alloc_p2m_list(struct xc_dom_image *dom, size_t p2m_alloc_size)
-{
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-
-    if ( xc_dom_alloc_segment(dom, &dom->p2m_seg, "phys2mach",
-                              0, p2m_alloc_size) )
-        return -1;
-
-    domx86->p2m_guest = xc_dom_seg_to_ptr(dom, &dom->p2m_seg);
-    if ( domx86->p2m_guest == NULL )
-        return -1;
-
-    return 0;
-}
-
-static int alloc_p2m_list_x86_32(struct xc_dom_image *dom)
-{
-    size_t p2m_alloc_size = dom->p2m_size * dom->arch_hooks->sizeof_pfn;
-
-    p2m_alloc_size = round_pg_up(p2m_alloc_size);
-    return alloc_p2m_list(dom, p2m_alloc_size);
-}
-
-static int alloc_p2m_list_x86_64(struct xc_dom_image *dom)
-{
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-    struct xc_dom_x86_mapping *map = domx86->maps + domx86->n_mappings;
-    size_t p2m_alloc_size = dom->p2m_size * dom->arch_hooks->sizeof_pfn;
-    xen_vaddr_t from, to;
-    unsigned lvl;
-
-    p2m_alloc_size = round_pg_up(p2m_alloc_size);
-    if ( dom->parms.p2m_base != UNSET_ADDR )
-    {
-        from = dom->parms.p2m_base;
-        to = from + p2m_alloc_size - 1;
-        if ( count_pgtables(dom, from, to, dom->pfn_alloc_end) )
-            return -1;
-
-        map->area.pfn = dom->pfn_alloc_end;
-        for ( lvl = 0; lvl < 4; lvl++ )
-            map->lvls[lvl].pfn += p2m_alloc_size >> PAGE_SHIFT_X86;
-        domx86->n_mappings++;
-        p2m_alloc_size += map->area.pgtables << PAGE_SHIFT_X86;
-    }
-
-    return alloc_p2m_list(dom, p2m_alloc_size);
-}
-
-/* ------------------------------------------------------------------------ */
-
-static int alloc_magic_pages_pv(struct xc_dom_image *dom)
-{
-    dom->start_info_pfn = xc_dom_alloc_page(dom, "start info");
-    if ( dom->start_info_pfn == INVALID_PFN )
-        return -1;
-
-    dom->xenstore_pfn = xc_dom_alloc_page(dom, "xenstore");
-    if ( dom->xenstore_pfn == INVALID_PFN )
-        return -1;
-    xc_clear_domain_page(dom->xch, dom->guest_domid,
-                         xc_dom_p2m(dom, dom->xenstore_pfn));
-
-    dom->console_pfn = xc_dom_alloc_page(dom, "console");
-    if ( dom->console_pfn == INVALID_PFN )
-        return -1;
-    xc_clear_domain_page(dom->xch, dom->guest_domid,
-                         xc_dom_p2m(dom, dom->console_pfn));
-
-    dom->alloc_bootstack = 1;
-
-    return 0;
-}
-
-static void build_hvm_info(void *hvm_info_page, struct xc_dom_image *dom)
-{
-    struct hvm_info_table *hvm_info = (struct hvm_info_table *)
-        (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET);
-    uint8_t sum;
-    int i;
-
-    memset(hvm_info_page, 0, PAGE_SIZE);
-
-    /* Fill in the header. */
-    memcpy(hvm_info->signature, "HVM INFO", sizeof(hvm_info->signature));
-    hvm_info->length = sizeof(struct hvm_info_table);
-
-    /* Sensible defaults: these can be overridden by the caller. */
-    hvm_info->apic_mode = 1;
-    hvm_info->nr_vcpus = 1;
-    memset(hvm_info->vcpu_online, 0xff, sizeof(hvm_info->vcpu_online));
-
-    /* Memory parameters. */
-    hvm_info->low_mem_pgend = dom->lowmem_end >> PAGE_SHIFT;
-    hvm_info->high_mem_pgend = dom->highmem_end >> PAGE_SHIFT;
-    hvm_info->reserved_mem_pgstart = ioreq_server_pfn(0);
-
-    /* Finish with the checksum. */
-    for ( i = 0, sum = 0; i < hvm_info->length; i++ )
-        sum += ((uint8_t *)hvm_info)[i];
-    hvm_info->checksum = -sum;
-}
-
-static int alloc_magic_pages_hvm(struct xc_dom_image *dom)
-{
-    unsigned long i;
-    uint32_t *ident_pt, domid = dom->guest_domid;
-    int rc;
-    xen_pfn_t special_array[X86_HVM_NR_SPECIAL_PAGES];
-    xen_pfn_t ioreq_server_array[NR_IOREQ_SERVER_PAGES];
-    xc_interface *xch = dom->xch;
-    size_t start_info_size = sizeof(struct hvm_start_info);
-
-    /* Allocate and clear special pages. */
-    for ( i = 0; i < X86_HVM_NR_SPECIAL_PAGES; i++ )
-        special_array[i] = special_pfn(i);
-
-    rc = xc_domain_populate_physmap_exact(xch, domid, X86_HVM_NR_SPECIAL_PAGES,
-                                          0, 0, special_array);
-    if ( rc != 0 )
-    {
-        DOMPRINTF("Could not allocate special pages.");
-        goto error_out;
-    }
-
-    if ( xc_clear_domain_pages(xch, domid, special_pfn(0),
-                               X86_HVM_NR_SPECIAL_PAGES) )
-            goto error_out;
-
-    xc_hvm_param_set(xch, domid, HVM_PARAM_STORE_PFN,
-                     special_pfn(SPECIALPAGE_XENSTORE));
-    xc_hvm_param_set(xch, domid, HVM_PARAM_BUFIOREQ_PFN,
-                     special_pfn(SPECIALPAGE_BUFIOREQ));
-    xc_hvm_param_set(xch, domid, HVM_PARAM_IOREQ_PFN,
-                     special_pfn(SPECIALPAGE_IOREQ));
-    xc_hvm_param_set(xch, domid, HVM_PARAM_CONSOLE_PFN,
-                     special_pfn(SPECIALPAGE_CONSOLE));
-    xc_hvm_param_set(xch, domid, HVM_PARAM_PAGING_RING_PFN,
-                     special_pfn(SPECIALPAGE_PAGING));
-    xc_hvm_param_set(xch, domid, HVM_PARAM_MONITOR_RING_PFN,
-                     special_pfn(SPECIALPAGE_ACCESS));
-    xc_hvm_param_set(xch, domid, HVM_PARAM_SHARING_RING_PFN,
-                     special_pfn(SPECIALPAGE_SHARING));
-
-    start_info_size +=
-        sizeof(struct hvm_modlist_entry) * HVMLOADER_MODULE_MAX_COUNT;
-
-    start_info_size +=
-        HVMLOADER_MODULE_CMDLINE_SIZE * HVMLOADER_MODULE_MAX_COUNT;
-
-    start_info_size +=
-        dom->e820_entries * sizeof(struct hvm_memmap_table_entry);
-
-    if ( !dom->device_model )
-    {
-        if ( dom->cmdline )
-        {
-            dom->cmdline_size = ROUNDUP(strlen(dom->cmdline) + 1, 8);
-            start_info_size += dom->cmdline_size;
-        }
-    }
-    else
-    {
-        /*
-         * Allocate and clear additional ioreq server pages. The default
-         * server will use the IOREQ and BUFIOREQ special pages above.
-         */
-        for ( i = 0; i < NR_IOREQ_SERVER_PAGES; i++ )
-            ioreq_server_array[i] = ioreq_server_pfn(i);
-
-        rc = xc_domain_populate_physmap_exact(xch, domid, NR_IOREQ_SERVER_PAGES, 0,
-                                              0, ioreq_server_array);
-        if ( rc != 0 )
-        {
-            DOMPRINTF("Could not allocate ioreq server pages.");
-            goto error_out;
-        }
-
-        if ( xc_clear_domain_pages(xch, domid, ioreq_server_pfn(0),
-                                   NR_IOREQ_SERVER_PAGES) )
-                goto error_out;
-
-        /* Tell the domain where the pages are and how many there are */
-        xc_hvm_param_set(xch, domid, HVM_PARAM_IOREQ_SERVER_PFN,
-                         ioreq_server_pfn(0));
-        xc_hvm_param_set(xch, domid, HVM_PARAM_NR_IOREQ_SERVER_PAGES,
-                         NR_IOREQ_SERVER_PAGES);
-    }
-
-    rc = xc_dom_alloc_segment(dom, &dom->start_info_seg,
-                              "HVM start info", 0, start_info_size);
-    if ( rc != 0 )
-    {
-        DOMPRINTF("Unable to reserve memory for the start info");
-        goto out;
-    }
-
-    /*
-     * Identity-map page table is required for running with CR0.PG=0 when
-     * using Intel EPT. Create a 32-bit non-PAE page directory of superpages.
-     */
-    if ( (ident_pt = xc_map_foreign_range(
-              xch, domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
-              special_pfn(SPECIALPAGE_IDENT_PT))) == NULL )
-        goto error_out;
-    for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ )
-        ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
-                       _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
-    munmap(ident_pt, PAGE_SIZE);
-    xc_hvm_param_set(xch, domid, HVM_PARAM_IDENT_PT,
-                     special_pfn(SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
-
-    dom->console_pfn = special_pfn(SPECIALPAGE_CONSOLE);
-    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->console_pfn);
-
-    dom->xenstore_pfn = special_pfn(SPECIALPAGE_XENSTORE);
-    xc_clear_domain_page(dom->xch, dom->guest_domid, dom->xenstore_pfn);
-
-    dom->parms.virt_hypercall = -1;
-
-    rc = 0;
-    goto out;
- error_out:
-    rc = -1;
- out:
-
-    return rc;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static int start_info_x86_32(struct xc_dom_image *dom)
-{
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-    start_info_x86_32_t *start_info =
-        xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
-    xen_pfn_t shinfo =
-        xc_dom_translated(dom) ? dom->shared_info_pfn : dom->shared_info_mfn;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    if ( start_info == NULL )
-    {
-        DOMPRINTF("%s: xc_dom_pfn_to_ptr failed on start_info", __FUNCTION__);
-        return -1; /* our caller throws away our return value :-/ */
-    }
-
-    memset(start_info, 0, sizeof(*start_info));
-    strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
-    start_info->magic[sizeof(start_info->magic) - 1] = '\0';
-    start_info->nr_pages = dom->total_pages;
-    start_info->shared_info = shinfo << PAGE_SHIFT_X86;
-    start_info->pt_base = dom->pgtables_seg.vstart;
-    start_info->nr_pt_frames = domx86->maps[0].area.pgtables;
-    start_info->mfn_list = dom->p2m_seg.vstart;
-
-    start_info->flags = dom->flags;
-    start_info->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
-    start_info->store_evtchn = dom->xenstore_evtchn;
-    start_info->console.domU.mfn = xc_dom_p2m(dom, dom->console_pfn);
-    start_info->console.domU.evtchn = dom->console_evtchn;
-
-    if ( dom->modules[0].blob )
-    {
-        start_info->mod_start = dom->initrd_start;
-        start_info->mod_len = dom->initrd_len;
-    }
-
-    if ( dom->cmdline )
-    {
-        strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
-        start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
-    }
-
-    return 0;
-}
-
-static int start_info_x86_64(struct xc_dom_image *dom)
-{
-    struct xc_dom_image_x86 *domx86 = dom->arch_private;
-    start_info_x86_64_t *start_info =
-        xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1);
-    xen_pfn_t shinfo =
-        xc_dom_translated(dom) ? dom->shared_info_pfn : dom->shared_info_mfn;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    if ( start_info == NULL )
-    {
-        DOMPRINTF("%s: xc_dom_pfn_to_ptr failed on start_info", __FUNCTION__);
-        return -1; /* our caller throws away our return value :-/ */
-    }
-
-    memset(start_info, 0, sizeof(*start_info));
-    strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
-    start_info->magic[sizeof(start_info->magic) - 1] = '\0';
-    start_info->nr_pages = dom->total_pages;
-    start_info->shared_info = shinfo << PAGE_SHIFT_X86;
-    start_info->pt_base = dom->pgtables_seg.vstart;
-    start_info->nr_pt_frames = domx86->maps[0].area.pgtables;
-    start_info->mfn_list = dom->p2m_seg.vstart;
-    if ( dom->parms.p2m_base != UNSET_ADDR )
-    {
-        start_info->first_p2m_pfn = dom->p2m_seg.pfn;
-        start_info->nr_p2m_frames = dom->p2m_seg.pages;
-    }
-
-    start_info->flags = dom->flags;
-    start_info->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
-    start_info->store_evtchn = dom->xenstore_evtchn;
-    start_info->console.domU.mfn = xc_dom_p2m(dom, dom->console_pfn);
-    start_info->console.domU.evtchn = dom->console_evtchn;
-
-    if ( dom->modules[0].blob )
-    {
-        start_info->mod_start = dom->initrd_start;
-        start_info->mod_len = dom->initrd_len;
-    }
-
-    if ( dom->cmdline )
-    {
-        strncpy((char *)start_info->cmd_line, dom->cmdline, MAX_GUEST_CMDLINE);
-        start_info->cmd_line[MAX_GUEST_CMDLINE - 1] = '\0';
-    }
-
-    return 0;
-}
-
-static int shared_info_x86_32(struct xc_dom_image *dom, void *ptr)
-{
-    shared_info_x86_32_t *shared_info = ptr;
-    int i;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    memset(shared_info, 0, sizeof(*shared_info));
-    for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
-        shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
-    return 0;
-}
-
-static int shared_info_x86_64(struct xc_dom_image *dom, void *ptr)
-{
-    shared_info_x86_64_t *shared_info = ptr;
-    int i;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    memset(shared_info, 0, sizeof(*shared_info));
-    for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
-        shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
-    return 0;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static int vcpu_x86_32(struct xc_dom_image *dom)
-{
-    vcpu_guest_context_any_t any_ctx;
-    vcpu_guest_context_x86_32_t *ctxt = &any_ctx.x32;
-    xen_pfn_t cr3_pfn;
-    int rc;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    /* clear everything */
-    memset(ctxt, 0, sizeof(*ctxt));
-
-    ctxt->user_regs.eip = dom->parms.virt_entry;
-    ctxt->user_regs.esp =
-        dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
-    ctxt->user_regs.esi =
-        dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
-    ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
-
-    ctxt->debugreg[6] = X86_DR6_DEFAULT;
-    ctxt->debugreg[7] = X86_DR7_DEFAULT;
-
-    ctxt->flags = VGCF_in_kernel_X86_32 | VGCF_online_X86_32;
-    if ( dom->parms.pae == XEN_PAE_EXTCR3 ||
-         dom->parms.pae == XEN_PAE_BIMODAL )
-        ctxt->vm_assist |= (1UL << VMASST_TYPE_pae_extended_cr3);
-
-    cr3_pfn = xc_dom_p2m(dom, dom->pgtables_seg.pfn);
-    ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_32(cr3_pfn);
-    DOMPRINTF("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "",
-              __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
-
-    ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_32;
-    ctxt->user_regs.es = FLAT_KERNEL_DS_X86_32;
-    ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_32;
-    ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_32;
-    ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_32;
-    ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_32;
-
-    ctxt->kernel_ss = ctxt->user_regs.ss;
-    ctxt->kernel_sp = ctxt->user_regs.esp;
-
-    rc = xc_vcpu_setcontext(dom->xch, dom->guest_domid, 0, &any_ctx);
-    if ( rc != 0 )
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: SETVCPUCONTEXT failed (rc=%d)", __func__, rc);
-
-    return rc;
-}
-
-static int vcpu_x86_64(struct xc_dom_image *dom)
-{
-    vcpu_guest_context_any_t any_ctx;
-    vcpu_guest_context_x86_64_t *ctxt = &any_ctx.x64;
-    xen_pfn_t cr3_pfn;
-    int rc;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    /* clear everything */
-    memset(ctxt, 0, sizeof(*ctxt));
-
-    ctxt->user_regs.rip = dom->parms.virt_entry;
-    ctxt->user_regs.rsp =
-        dom->parms.virt_base + (dom->bootstack_pfn + 1) * PAGE_SIZE_X86;
-    ctxt->user_regs.rsi =
-        dom->parms.virt_base + (dom->start_info_pfn) * PAGE_SIZE_X86;
-    ctxt->user_regs.rflags = 1 << 9; /* Interrupt Enable */
-
-    ctxt->debugreg[6] = X86_DR6_DEFAULT;
-    ctxt->debugreg[7] = X86_DR7_DEFAULT;
-
-    ctxt->flags = VGCF_in_kernel_X86_64 | VGCF_online_X86_64;
-    cr3_pfn = xc_dom_p2m(dom, dom->pgtables_seg.pfn);
-    ctxt->ctrlreg[3] = xen_pfn_to_cr3_x86_64(cr3_pfn);
-    DOMPRINTF("%s: cr3: pfn 0x%" PRIpfn " mfn 0x%" PRIpfn "",
-              __FUNCTION__, dom->pgtables_seg.pfn, cr3_pfn);
-
-    ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_64;
-    ctxt->user_regs.es = FLAT_KERNEL_DS_X86_64;
-    ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_64;
-    ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_64;
-    ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_64;
-    ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_64;
-
-    ctxt->kernel_ss = ctxt->user_regs.ss;
-    ctxt->kernel_sp = ctxt->user_regs.esp;
-
-    rc = xc_vcpu_setcontext(dom->xch, dom->guest_domid, 0, &any_ctx);
-    if ( rc != 0 )
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: SETVCPUCONTEXT failed (rc=%d)", __func__, rc);
-
-    return rc;
-}
-
-const static void *hvm_get_save_record(const void *ctx, unsigned int type,
-                                       unsigned int instance)
-{
-    const struct hvm_save_descriptor *header;
-
-    for ( header = ctx;
-          header->typecode != HVM_SAVE_CODE(END);
-          ctx += sizeof(*header) + header->length, header = ctx )
-        if ( header->typecode == type && header->instance == instance )
-            return ctx + sizeof(*header);
-
-    return NULL;
-}
-
-static int vcpu_hvm(struct xc_dom_image *dom)
-{
-    struct {
-        struct hvm_save_descriptor header_d;
-        HVM_SAVE_TYPE(HEADER) header;
-        struct hvm_save_descriptor cpu_d;
-        HVM_SAVE_TYPE(CPU) cpu;
-        struct hvm_save_descriptor end_d;
-        HVM_SAVE_TYPE(END) end;
-    } bsp_ctx;
-    uint8_t *full_ctx = NULL;
-    int rc;
-
-    DOMPRINTF_CALLED(dom->xch);
-
-    assert(dom->max_vcpus);
-
-    /*
-     * Get the full HVM context in order to have the header, it is not
-     * possible to get the header with getcontext_partial, and crafting one
-     * from userspace is also not an option since cpuid is trapped and
-     * modified by Xen.
-     */
-
-    rc = xc_domain_hvm_getcontext(dom->xch, dom->guest_domid, NULL, 0);
-    if ( rc <= 0 )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: unable to fetch HVM context size (rc=%d)",
-                     __func__, rc);
-        goto out;
-    }
-
-    full_ctx = calloc(1, rc);
-    if ( full_ctx == NULL )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: unable to allocate memory for HVM context (rc=%d)",
-                     __func__, rc);
-        rc = -ENOMEM;
-        goto out;
-    }
-
-    rc = xc_domain_hvm_getcontext(dom->xch, dom->guest_domid, full_ctx, rc);
-    if ( rc <= 0 )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: unable to fetch HVM context (rc=%d)",
-                     __func__, rc);
-        goto out;
-    }
-
-    /* Copy the header to our partial context. */
-    memset(&bsp_ctx, 0, sizeof(bsp_ctx));
-    memcpy(&bsp_ctx, full_ctx,
-           sizeof(struct hvm_save_descriptor) + HVM_SAVE_LENGTH(HEADER));
-
-    /* Set the CPU descriptor. */
-    bsp_ctx.cpu_d.typecode = HVM_SAVE_CODE(CPU);
-    bsp_ctx.cpu_d.instance = 0;
-    bsp_ctx.cpu_d.length = HVM_SAVE_LENGTH(CPU);
-
-    /* Set the cached part of the relevant segment registers. */
-    bsp_ctx.cpu.cs_base = 0;
-    bsp_ctx.cpu.ds_base = 0;
-    bsp_ctx.cpu.es_base = 0;
-    bsp_ctx.cpu.ss_base = 0;
-    bsp_ctx.cpu.tr_base = 0;
-    bsp_ctx.cpu.cs_limit = ~0u;
-    bsp_ctx.cpu.ds_limit = ~0u;
-    bsp_ctx.cpu.es_limit = ~0u;
-    bsp_ctx.cpu.ss_limit = ~0u;
-    bsp_ctx.cpu.tr_limit = 0x67;
-    bsp_ctx.cpu.cs_arbytes = 0xc9b;
-    bsp_ctx.cpu.ds_arbytes = 0xc93;
-    bsp_ctx.cpu.es_arbytes = 0xc93;
-    bsp_ctx.cpu.ss_arbytes = 0xc93;
-    bsp_ctx.cpu.tr_arbytes = 0x8b;
-
-    /* Set the control registers. */
-    bsp_ctx.cpu.cr0 = X86_CR0_PE | X86_CR0_ET;
-
-    /* Set the IP. */
-    bsp_ctx.cpu.rip = dom->parms.phys_entry;
-
-    bsp_ctx.cpu.dr6 = X86_DR6_DEFAULT;
-    bsp_ctx.cpu.dr7 = X86_DR7_DEFAULT;
-
-    if ( dom->start_info_seg.pfn )
-        bsp_ctx.cpu.rbx = dom->start_info_seg.pfn << PAGE_SHIFT;
-
-    /* Set the end descriptor. */
-    bsp_ctx.end_d.typecode = HVM_SAVE_CODE(END);
-    bsp_ctx.end_d.instance = 0;
-    bsp_ctx.end_d.length = HVM_SAVE_LENGTH(END);
-
-    /* TODO: maybe this should be a firmware option instead? */
-    if ( !dom->device_model )
-    {
-        struct {
-            struct hvm_save_descriptor header_d;
-            HVM_SAVE_TYPE(HEADER) header;
-            struct hvm_save_descriptor mtrr_d;
-            HVM_SAVE_TYPE(MTRR) mtrr;
-            struct hvm_save_descriptor end_d;
-            HVM_SAVE_TYPE(END) end;
-        } mtrr = {
-            .header_d = bsp_ctx.header_d,
-            .header = bsp_ctx.header,
-            .mtrr_d.typecode = HVM_SAVE_CODE(MTRR),
-            .mtrr_d.length = HVM_SAVE_LENGTH(MTRR),
-            .end_d = bsp_ctx.end_d,
-            .end = bsp_ctx.end,
-        };
-        const HVM_SAVE_TYPE(MTRR) *mtrr_record =
-            hvm_get_save_record(full_ctx, HVM_SAVE_CODE(MTRR), 0);
-        unsigned int i;
-
-        if ( !mtrr_record )
-        {
-            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                         "%s: unable to get MTRR save record", __func__);
-            goto out;
-        }
-
-        memcpy(&mtrr.mtrr, mtrr_record, sizeof(mtrr.mtrr));
-
-        /*
-         * Enable MTRR, set default type to WB.
-         * TODO: add MMIO areas as UC when passthrough is supported.
-         */
-        mtrr.mtrr.msr_mtrr_def_type = MTRR_TYPE_WRBACK | MTRR_DEF_TYPE_ENABLE;
-
-        for ( i = 0; i < dom->max_vcpus; i++ )
-        {
-            mtrr.mtrr_d.instance = i;
-            rc = xc_domain_hvm_setcontext(dom->xch, dom->guest_domid,
-                                          (uint8_t *)&mtrr, sizeof(mtrr));
-            if ( rc != 0 )
-                xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                             "%s: SETHVMCONTEXT failed (rc=%d)", __func__, rc);
-        }
-    }
-
-    /*
-     * Loading the BSP context should be done in the last call to setcontext,
-     * since each setcontext call will put all vCPUs down.
-     */
-    rc = xc_domain_hvm_setcontext(dom->xch, dom->guest_domid,
-                                  (uint8_t *)&bsp_ctx, sizeof(bsp_ctx));
-    if ( rc != 0 )
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: SETHVMCONTEXT failed (rc=%d)", __func__, rc);
-
- out:
-    free(full_ctx);
-    return rc;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static int x86_compat(xc_interface *xch, uint32_t domid, char *guest_type)
-{
-    static const struct {
-        char           *guest;
-        uint32_t        size;
-    } types[] = {
-        { "xen-3.0-x86_32p", 32 },
-        { "xen-3.0-x86_64",  64 },
-    };
-    DECLARE_DOMCTL;
-    int i,rc;
-
-    memset(&domctl, 0, sizeof(domctl));
-    domctl.domain = domid;
-    domctl.cmd    = XEN_DOMCTL_set_address_size;
-    for ( i = 0; i < ARRAY_SIZE(types); i++ )
-        if ( !strcmp(types[i].guest, guest_type) )
-            domctl.u.address_size.size = types[i].size;
-    if ( domctl.u.address_size.size == 0 )
-        /* nothing to do */
-        return 0;
-
-    xc_dom_printf(xch, "%s: guest %s, address size %" PRId32 "", __FUNCTION__,
-                  guest_type, domctl.u.address_size.size);
-    rc = do_domctl(xch, &domctl);
-    if ( rc != 0 )
-        xc_dom_printf(xch, "%s: warning: failed (rc=%d)",
-                      __FUNCTION__, rc);
-    return rc;
-}
-
-static int meminit_pv(struct xc_dom_image *dom)
-{
-    int rc;
-    xen_pfn_t pfn, allocsz, mfn, total, pfn_base;
-    int i, j, k;
-    xen_vmemrange_t dummy_vmemrange[1];
-    unsigned int dummy_vnode_to_pnode[1];
-    xen_vmemrange_t *vmemranges;
-    unsigned int *vnode_to_pnode;
-    unsigned int nr_vmemranges, nr_vnodes;
-
-    rc = x86_compat(dom->xch, dom->guest_domid, dom->guest_type);
-    if ( rc )
-        return rc;
-
-    /* try to claim pages for early warning of insufficient memory avail */
-    if ( dom->claim_enabled )
-    {
-        rc = xc_domain_claim_pages(dom->xch, dom->guest_domid,
-                                   dom->total_pages);
-        if ( rc )
-            return rc;
-    }
-
-    /* Setup dummy vNUMA information if it's not provided. Note
-     * that this is a valid state if libxl doesn't provide any
-     * vNUMA information.
-     *
-     * The dummy values make libxc allocate all pages from
-     * arbitrary physical nodes. This is the expected behaviour if
-     * no vNUMA configuration is provided to libxc.
-     *
-     * Note that the following hunk is just for the convenience of
-     * allocation code. No defaulting happens in libxc.
-     */
-    if ( dom->nr_vmemranges == 0 )
-    {
-        nr_vmemranges = 1;
-        vmemranges = dummy_vmemrange;
-        vmemranges[0].start = 0;
-        vmemranges[0].end   = (uint64_t)dom->total_pages << PAGE_SHIFT;
-        vmemranges[0].flags = 0;
-        vmemranges[0].nid   = 0;
-
-        nr_vnodes = 1;
-        vnode_to_pnode = dummy_vnode_to_pnode;
-        vnode_to_pnode[0] = XC_NUMA_NO_NODE;
-    }
-    else
-    {
-        nr_vmemranges = dom->nr_vmemranges;
-        nr_vnodes = dom->nr_vnodes;
-        vmemranges = dom->vmemranges;
-        vnode_to_pnode = dom->vnode_to_pnode;
-    }
-
-    total = dom->p2m_size = 0;
-    for ( i = 0; i < nr_vmemranges; i++ )
-    {
-        total += ((vmemranges[i].end - vmemranges[i].start) >> PAGE_SHIFT);
-        dom->p2m_size = max(dom->p2m_size,
-                            (xen_pfn_t)(vmemranges[i].end >> PAGE_SHIFT));
-    }
-    if ( total != dom->total_pages )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: vNUMA page count mismatch (0x%"PRIpfn" != 0x%"PRIpfn")",
-                     __func__, total, dom->total_pages);
-        return -EINVAL;
-    }
-
-    dom->pv_p2m = xc_dom_malloc(dom, sizeof(*dom->pv_p2m) * dom->p2m_size);
-    if ( dom->pv_p2m == NULL )
-        return -EINVAL;
-    for ( pfn = 0; pfn < dom->p2m_size; pfn++ )
-        dom->pv_p2m[pfn] = INVALID_PFN;
-
-    /* allocate guest memory */
-    for ( i = 0; i < nr_vmemranges; i++ )
-    {
-        unsigned int memflags;
-        uint64_t pages, super_pages;
-        unsigned int pnode = vnode_to_pnode[vmemranges[i].nid];
-        xen_pfn_t extents[SUPERPAGE_BATCH_SIZE];
-        xen_pfn_t pfn_base_idx;
-
-        memflags = 0;
-        if ( pnode != XC_NUMA_NO_NODE )
-            memflags |= XENMEMF_exact_node(pnode);
-
-        pages = (vmemranges[i].end - vmemranges[i].start) >> PAGE_SHIFT;
-        super_pages = pages >> SUPERPAGE_2MB_SHIFT;
-        pfn_base = vmemranges[i].start >> PAGE_SHIFT;
-
-        for ( pfn = pfn_base; pfn < pfn_base+pages; pfn++ )
-            dom->pv_p2m[pfn] = pfn;
-
-        pfn_base_idx = pfn_base;
-        while ( super_pages ) {
-            uint64_t count = min_t(uint64_t, super_pages, SUPERPAGE_BATCH_SIZE);
-            super_pages -= count;
-
-            for ( pfn = pfn_base_idx, j = 0;
-                  pfn < pfn_base_idx + (count << SUPERPAGE_2MB_SHIFT);
-                  pfn += SUPERPAGE_2MB_NR_PFNS, j++ )
-                extents[j] = dom->pv_p2m[pfn];
-            rc = xc_domain_populate_physmap(dom->xch, dom->guest_domid, count,
-                                            SUPERPAGE_2MB_SHIFT, memflags,
-                                            extents);
-            if ( rc < 0 )
-                return rc;
-
-            /* Expand the returned mfns into the p2m array. */
-            pfn = pfn_base_idx;
-            for ( j = 0; j < rc; j++ )
-            {
-                mfn = extents[j];
-                for ( k = 0; k < SUPERPAGE_2MB_NR_PFNS; k++, pfn++ )
-                    dom->pv_p2m[pfn] = mfn + k;
-            }
-            pfn_base_idx = pfn;
-        }
-
-        for ( j = pfn_base_idx - pfn_base; j < pages; j += allocsz )
-        {
-            allocsz = min_t(uint64_t, 1024 * 1024, pages - j);
-            rc = xc_domain_populate_physmap_exact(dom->xch, dom->guest_domid,
-                     allocsz, 0, memflags, &dom->pv_p2m[pfn_base + j]);
-
-            if ( rc )
-            {
-                if ( pnode != XC_NUMA_NO_NODE )
-                    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                                 "%s: failed to allocate 0x%"PRIx64" pages (v=%d, p=%d)",
-                                 __func__, pages, i, pnode);
-                else
-                    xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                                 "%s: failed to allocate 0x%"PRIx64" pages",
-                                 __func__, pages);
-                return rc;
-            }
-        }
-        rc = 0;
-    }
-
-    /* Ensure no unclaimed pages are left unused.
-     * OK to call if hadn't done the earlier claim call. */
-    xc_domain_claim_pages(dom->xch, dom->guest_domid, 0 /* cancel claim */);
-
-    return rc;
-}
-
-/*
- * Check whether there exists mmio hole in the specified memory range.
- * Returns 1 if exists, else returns 0.
- */
-static int check_mmio_hole(uint64_t start, uint64_t memsize,
-                           uint64_t mmio_start, uint64_t mmio_size)
-{
-    if ( start + memsize <= mmio_start || start >= mmio_start + mmio_size )
-        return 0;
-    else
-        return 1;
-}
-
-static int meminit_hvm(struct xc_dom_image *dom)
-{
-    unsigned long i, vmemid, nr_pages = dom->total_pages;
-    unsigned long p2m_size;
-    unsigned long target_pages = dom->target_pages;
-    unsigned long cur_pages, cur_pfn;
-    int rc;
-    unsigned long stat_normal_pages = 0, stat_2mb_pages = 0,
-        stat_1gb_pages = 0;
-    unsigned int memflags = 0;
-    int claim_enabled = dom->claim_enabled;
-    uint64_t total_pages;
-    xen_vmemrange_t dummy_vmemrange[2];
-    unsigned int dummy_vnode_to_pnode[1];
-    xen_vmemrange_t *vmemranges;
-    unsigned int *vnode_to_pnode;
-    unsigned int nr_vmemranges, nr_vnodes;
-    xc_interface *xch = dom->xch;
-    uint32_t domid = dom->guest_domid;
-
-    if ( nr_pages > target_pages )
-        memflags |= XENMEMF_populate_on_demand;
-
-    if ( dom->nr_vmemranges == 0 )
-    {
-        /* Build dummy vnode information
-         *
-         * Guest physical address space layout:
-         * [0, hole_start) [hole_start, 4G) [4G, highmem_end)
-         *
-         * Of course if there is no high memory, the second vmemrange
-         * has no effect on the actual result.
-         */
-
-        dummy_vmemrange[0].start = 0;
-        dummy_vmemrange[0].end   = dom->lowmem_end;
-        dummy_vmemrange[0].flags = 0;
-        dummy_vmemrange[0].nid   = 0;
-        nr_vmemranges = 1;
-
-        if ( dom->highmem_end > (1ULL << 32) )
-        {
-            dummy_vmemrange[1].start = 1ULL << 32;
-            dummy_vmemrange[1].end   = dom->highmem_end;
-            dummy_vmemrange[1].flags = 0;
-            dummy_vmemrange[1].nid   = 0;
-
-            nr_vmemranges++;
-        }
-
-        dummy_vnode_to_pnode[0] = XC_NUMA_NO_NODE;
-        nr_vnodes = 1;
-        vmemranges = dummy_vmemrange;
-        vnode_to_pnode = dummy_vnode_to_pnode;
-    }
-    else
-    {
-        if ( nr_pages > target_pages )
-        {
-            DOMPRINTF("Cannot enable vNUMA and PoD at the same time");
-            goto error_out;
-        }
-
-        nr_vmemranges = dom->nr_vmemranges;
-        nr_vnodes = dom->nr_vnodes;
-        vmemranges = dom->vmemranges;
-        vnode_to_pnode = dom->vnode_to_pnode;
-    }
-
-    total_pages = 0;
-    p2m_size = 0;
-    for ( i = 0; i < nr_vmemranges; i++ )
-    {
-        DOMPRINTF("range: start=0x%"PRIx64" end=0x%"PRIx64, vmemranges[i].start, vmemranges[i].end);
-
-        total_pages += ((vmemranges[i].end - vmemranges[i].start)
-                        >> PAGE_SHIFT);
-        p2m_size = p2m_size > (vmemranges[i].end >> PAGE_SHIFT) ?
-            p2m_size : (vmemranges[i].end >> PAGE_SHIFT);
-    }
-
-    if ( total_pages != nr_pages )
-    {
-        DOMPRINTF("vNUMA memory pages mismatch (0x%"PRIx64" != 0x%lx)",
-               total_pages, nr_pages);
-        goto error_out;
-    }
-
-    dom->p2m_size = p2m_size;
-
-    /*
-     * Try to claim pages for early warning of insufficient memory available.
-     * This should go before xc_domain_set_pod_target, becuase that function
-     * actually allocates memory for the guest. Claiming after memory has been
-     * allocated is pointless.
-     */
-    if ( claim_enabled ) {
-        rc = xc_domain_claim_pages(xch, domid,
-                                   target_pages - dom->vga_hole_size);
-        if ( rc != 0 )
-        {
-            DOMPRINTF("Could not allocate memory for HVM guest as we cannot claim memory!");
-            goto error_out;
-        }
-    }
-
-    if ( memflags & XENMEMF_populate_on_demand )
-    {
-        /*
-         * Subtract VGA_HOLE_SIZE from target_pages for the VGA
-         * "hole".  Xen will adjust the PoD cache size so that domain
-         * tot_pages will be target_pages - VGA_HOLE_SIZE after
-         * this call.
-         */
-        rc = xc_domain_set_pod_target(xch, domid,
-                                      target_pages - dom->vga_hole_size,
-                                      NULL, NULL, NULL);
-        if ( rc != 0 )
-        {
-            DOMPRINTF("Could not set PoD target for HVM guest.\n");
-            goto error_out;
-        }
-    }
-
-    /*
-     * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
-     *
-     * We attempt to allocate 1GB pages if possible. It falls back on 2MB
-     * pages if 1GB allocation fails. 4KB pages will be used eventually if
-     * both fail.
-     */
-    if ( dom->device_model )
-    {
-        xen_pfn_t extents[0xa0];
-
-        for ( i = 0; i < ARRAY_SIZE(extents); ++i )
-            extents[i] = i;
-
-        rc = xc_domain_populate_physmap_exact(
-            xch, domid, 0xa0, 0, memflags, extents);
-        if ( rc != 0 )
-        {
-            DOMPRINTF("Could not populate low memory (< 0xA0).\n");
-            goto error_out;
-        }
-    }
-
-    stat_normal_pages = 0;
-    for ( vmemid = 0; vmemid < nr_vmemranges; vmemid++ )
-    {
-        unsigned int new_memflags = memflags;
-        uint64_t end_pages;
-        unsigned int vnode = vmemranges[vmemid].nid;
-        unsigned int pnode = vnode_to_pnode[vnode];
-
-        if ( pnode != XC_NUMA_NO_NODE )
-            new_memflags |= XENMEMF_exact_node(pnode);
-
-        end_pages = vmemranges[vmemid].end >> PAGE_SHIFT;
-        /*
-         * Consider vga hole belongs to the vmemrange that covers
-         * 0xA0000-0xC0000. Note that 0x00000-0xA0000 is populated just
-         * before this loop.
-         */
-        if ( vmemranges[vmemid].start == 0 && dom->device_model )
-        {
-            cur_pages = 0xc0;
-            stat_normal_pages += 0xc0;
-        }
-        else
-            cur_pages = vmemranges[vmemid].start >> PAGE_SHIFT;
-
-        rc = 0;
-        while ( (rc == 0) && (end_pages > cur_pages) )
-        {
-            /* Clip count to maximum 1GB extent. */
-            unsigned long count = end_pages - cur_pages;
-            unsigned long max_pages = SUPERPAGE_1GB_NR_PFNS;
-
-            if ( count > max_pages )
-                count = max_pages;
-
-            cur_pfn = cur_pages;
-
-            /* Take care the corner cases of super page tails */
-            if ( ((cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
-                 (count > (-cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1))) )
-                count = -cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1);
-            else if ( ((count & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) &&
-                      (count > SUPERPAGE_1GB_NR_PFNS) )
-                count &= ~(SUPERPAGE_1GB_NR_PFNS - 1);
-
-            /* Attemp to allocate 1GB super page. Because in each pass
-             * we only allocate at most 1GB, we don't have to clip
-             * super page boundaries.
-             */
-            if ( ((count | cur_pfn) & (SUPERPAGE_1GB_NR_PFNS - 1)) == 0 &&
-                 /* Check if there exists MMIO hole in the 1GB memory
-                  * range */
-                 !check_mmio_hole(cur_pfn << PAGE_SHIFT,
-                                  SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT,
-                                  dom->mmio_start, dom->mmio_size) )
-            {
-                long done;
-                unsigned long nr_extents = count >> SUPERPAGE_1GB_SHIFT;
-                xen_pfn_t sp_extents[nr_extents];
-
-                for ( i = 0; i < nr_extents; i++ )
-                    sp_extents[i] = cur_pages + (i << SUPERPAGE_1GB_SHIFT);
-
-                done = xc_domain_populate_physmap(xch, domid, nr_extents,
-                                                  SUPERPAGE_1GB_SHIFT,
-                                                  new_memflags, sp_extents);
-
-                if ( done > 0 )
-                {
-                    stat_1gb_pages += done;
-                    done <<= SUPERPAGE_1GB_SHIFT;
-                    cur_pages += done;
-                    count -= done;
-                }
-            }
-
-            if ( count != 0 )
-            {
-                /* Clip count to maximum 8MB extent. */
-                max_pages = SUPERPAGE_2MB_NR_PFNS * 4;
-                if ( count > max_pages )
-                    count = max_pages;
-
-                /* Clip partial superpage extents to superpage
-                 * boundaries. */
-                if ( ((cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
-                     (count > (-cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1))) )
-                    count = -cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1);
-                else if ( ((count & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) &&
-                          (count > SUPERPAGE_2MB_NR_PFNS) )
-                    count &= ~(SUPERPAGE_2MB_NR_PFNS - 1); /* clip non-s.p. tail */
-
-                /* Attempt to allocate superpage extents. */
-                if ( ((count | cur_pfn) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 )
-                {
-                    long done;
-                    unsigned long nr_extents = count >> SUPERPAGE_2MB_SHIFT;
-                    xen_pfn_t sp_extents[nr_extents];
-
-                    for ( i = 0; i < nr_extents; i++ )
-                        sp_extents[i] = cur_pages + (i << SUPERPAGE_2MB_SHIFT);
-
-                    done = xc_domain_populate_physmap(xch, domid, nr_extents,
-                                                      SUPERPAGE_2MB_SHIFT,
-                                                      new_memflags, sp_extents);
-
-                    if ( done > 0 )
-                    {
-                        stat_2mb_pages += done;
-                        done <<= SUPERPAGE_2MB_SHIFT;
-                        cur_pages += done;
-                        count -= done;
-                    }
-                }
-            }
-
-            /* Fall back to 4kB extents. */
-            if ( count != 0 )
-            {
-                xen_pfn_t extents[count];
-
-                for ( i = 0; i < count; ++i )
-                    extents[i] = cur_pages + i;
-
-                rc = xc_domain_populate_physmap_exact(
-                    xch, domid, count, 0, new_memflags, extents);
-                cur_pages += count;
-                stat_normal_pages += count;
-            }
-        }
-
-        if ( rc != 0 )
-        {
-            DOMPRINTF("Could not allocate memory for HVM guest.");
-            goto error_out;
-        }
-    }
-
-    DPRINTF("PHYSICAL MEMORY ALLOCATION:\n");
-    DPRINTF("  4KB PAGES: 0x%016lx\n", stat_normal_pages);
-    DPRINTF("  2MB PAGES: 0x%016lx\n", stat_2mb_pages);
-    DPRINTF("  1GB PAGES: 0x%016lx\n", stat_1gb_pages);
-
-    rc = 0;
-    goto out;
- error_out:
-    rc = -1;
- out:
-
-    /* ensure no unclaimed pages are left unused */
-    xc_domain_claim_pages(xch, domid, 0 /* cancels the claim */);
-
-    return rc;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static int bootearly(struct xc_dom_image *dom)
-{
-    if ( dom->container_type == XC_DOM_PV_CONTAINER &&
-         elf_xen_feature_get(XENFEAT_auto_translated_physmap, dom->f_active) )
-    {
-        DOMPRINTF("PV Autotranslate guests no longer supported");
-        errno = EOPNOTSUPP;
-        return -1;
-    }
-
-    return 0;
-}
-
-static int bootlate_pv(struct xc_dom_image *dom)
-{
-    static const struct {
-        char *guest;
-        unsigned long pgd_type;
-    } types[] = {
-        { "xen-3.0-x86_32",  MMUEXT_PIN_L2_TABLE},
-        { "xen-3.0-x86_32p", MMUEXT_PIN_L3_TABLE},
-        { "xen-3.0-x86_64",  MMUEXT_PIN_L4_TABLE},
-    };
-    unsigned long pgd_type = 0;
-    shared_info_t *shared_info;
-    xen_pfn_t shinfo;
-    int i, rc;
-
-    for ( i = 0; i < ARRAY_SIZE(types); i++ )
-        if ( !strcmp(types[i].guest, dom->guest_type) )
-            pgd_type = types[i].pgd_type;
-
-    /* Drop references to all initial page tables before pinning. */
-    xc_dom_unmap_one(dom, dom->pgtables_seg.pfn);
-    xc_dom_unmap_one(dom, dom->p2m_seg.pfn);
-    rc = pin_table(dom->xch, pgd_type,
-                   xc_dom_p2m(dom, dom->pgtables_seg.pfn),
-                   dom->guest_domid);
-    if ( rc != 0 )
-    {
-        xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
-                     "%s: pin_table failed (pfn 0x%" PRIpfn ", rc=%d)",
-                     __FUNCTION__, dom->pgtables_seg.pfn, rc);
-        return rc;
-    }
-    shinfo = dom->shared_info_mfn;
-
-    /* setup shared_info page */
-    DOMPRINTF("%s: shared_info: pfn 0x%" PRIpfn ", mfn 0x%" PRIpfn "",
-              __FUNCTION__, dom->shared_info_pfn, dom->shared_info_mfn);
-    shared_info = xc_map_foreign_range(dom->xch, dom->guest_domid,
-                                       PAGE_SIZE_X86,
-                                       PROT_READ | PROT_WRITE,
-                                       shinfo);
-    if ( shared_info == NULL )
-        return -1;
-    dom->arch_hooks->shared_info(dom, shared_info);
-    munmap(shared_info, PAGE_SIZE_X86);
-
-    return 0;
-}
-
-/*
- * The memory layout of the start_info page and the modules, and where the
- * addresses are stored:
- *
- * /----------------------------------\
- * | struct hvm_start_info            |
- * +----------------------------------+ <- start_info->modlist_paddr
- * | struct hvm_modlist_entry[0]      |
- * +----------------------------------+
- * | struct hvm_modlist_entry[1]      |
- * +----------------------------------+ <- modlist[0].cmdline_paddr
- * | cmdline of module 0              |
- * | char[HVMLOADER_MODULE_NAME_SIZE] |
- * +----------------------------------+ <- modlist[1].cmdline_paddr
- * | cmdline of module 1              |
- * +----------------------------------+
- */
-static void add_module_to_list(struct xc_dom_image *dom,
-                               struct xc_hvm_firmware_module *module,
-                               const char *cmdline,
-                               struct hvm_modlist_entry *modlist,
-                               struct hvm_start_info *start_info)
-{
-    uint32_t index = start_info->nr_modules;
-    void *modules_cmdline_start = modlist + HVMLOADER_MODULE_MAX_COUNT;
-    uint64_t modlist_paddr = (dom->start_info_seg.pfn << PAGE_SHIFT) +
-        ((uintptr_t)modlist - (uintptr_t)start_info);
-    uint64_t modules_cmdline_paddr = modlist_paddr +
-        sizeof(struct hvm_modlist_entry) * HVMLOADER_MODULE_MAX_COUNT;
-
-    if ( module->length == 0 )
-        return;
-
-    assert(start_info->nr_modules < HVMLOADER_MODULE_MAX_COUNT);
-
-    modlist[index].paddr = module->guest_addr_out;
-    modlist[index].size = module->length;
-
-    if ( cmdline )
-    {
-        assert(strnlen(cmdline, HVMLOADER_MODULE_CMDLINE_SIZE)
-               < HVMLOADER_MODULE_CMDLINE_SIZE);
-        strncpy(modules_cmdline_start + HVMLOADER_MODULE_CMDLINE_SIZE * index,
-                cmdline, HVMLOADER_MODULE_CMDLINE_SIZE);
-        modlist[index].cmdline_paddr = modules_cmdline_paddr +
-                                       HVMLOADER_MODULE_CMDLINE_SIZE * index;
-    }
-
-    start_info->nr_modules++;
-}
-
-static int bootlate_hvm(struct xc_dom_image *dom)
-{
-    uint32_t domid = dom->guest_domid;
-    xc_interface *xch = dom->xch;
-    struct hvm_start_info *start_info;
-    size_t modsize;
-    struct hvm_modlist_entry *modlist;
-    struct hvm_memmap_table_entry *memmap;
-    unsigned int i;
-
-    start_info = xc_map_foreign_range(xch, domid, dom->start_info_seg.pages <<
-                                                  XC_DOM_PAGE_SHIFT(dom),
-                                      PROT_READ | PROT_WRITE,
-                                      dom->start_info_seg.pfn);
-    if ( start_info == NULL )
-    {
-        DOMPRINTF("Unable to map HVM start info page");
-        return -1;
-    }
-
-    modlist = (void*)(start_info + 1) + dom->cmdline_size;
-
-    if ( !dom->device_model )
-    {
-        if ( dom->cmdline )
-        {
-            char *cmdline = (void*)(start_info + 1);
-
-            strncpy(cmdline, dom->cmdline, dom->cmdline_size);
-            start_info->cmdline_paddr = (dom->start_info_seg.pfn << PAGE_SHIFT) +
-                                ((uintptr_t)cmdline - (uintptr_t)start_info);
-        }
-
-        /* ACPI module 0 is the RSDP */
-        start_info->rsdp_paddr = dom->acpi_modules[0].guest_addr_out ? : 0;
-    }
-    else
-    {
-        add_module_to_list(dom, &dom->system_firmware_module, "firmware",
-                           modlist, start_info);
-    }
-
-    for ( i = 0; i < dom->num_modules; i++ )
-    {
-        struct xc_hvm_firmware_module mod;
-        uint64_t base = dom->parms.virt_base != UNSET_ADDR ?
-            dom->parms.virt_base : 0;
-
-        mod.guest_addr_out =
-            dom->modules[i].seg.vstart - base;
-        mod.length =
-            dom->modules[i].seg.vend - dom->modules[i].seg.vstart;
-
-        DOMPRINTF("Adding module %u guest_addr %"PRIx64" len %u",
-                  i, mod.guest_addr_out, mod.length);
-
-        add_module_to_list(dom, &mod, dom->modules[i].cmdline,
-                           modlist, start_info);
-    }
-
-    if ( start_info->nr_modules )
-    {
-        start_info->modlist_paddr = (dom->start_info_seg.pfn << PAGE_SHIFT) +
-                            ((uintptr_t)modlist - (uintptr_t)start_info);
-    }
-
-    /*
-     * Check a couple of XEN_HVM_MEMMAP_TYPEs to verify consistency with
-     * their corresponding e820 numerical values.
-     */
-    BUILD_BUG_ON(XEN_HVM_MEMMAP_TYPE_RAM != E820_RAM);
-    BUILD_BUG_ON(XEN_HVM_MEMMAP_TYPE_ACPI != E820_ACPI);
-
-    modsize = HVMLOADER_MODULE_MAX_COUNT *
-        (sizeof(*modlist) + HVMLOADER_MODULE_CMDLINE_SIZE);
-    memmap = (void*)modlist + modsize;
-
-    start_info->memmap_paddr = (dom->start_info_seg.pfn << PAGE_SHIFT) +
-        ((uintptr_t)modlist - (uintptr_t)start_info) + modsize;
-    start_info->memmap_entries = dom->e820_entries;
-    for ( i = 0; i < dom->e820_entries; i++ )
-    {
-        memmap[i].addr = dom->e820[i].addr;
-        memmap[i].size = dom->e820[i].size;
-        memmap[i].type = dom->e820[i].type;
-    }
-
-    start_info->magic = XEN_HVM_START_MAGIC_VALUE;
-    start_info->version = 1;
-
-    munmap(start_info, dom->start_info_seg.pages << XC_DOM_PAGE_SHIFT(dom));
-
-    if ( dom->device_model )
-    {
-        void *hvm_info_page;
-
-        if ( (hvm_info_page = xc_map_foreign_range(
-                  xch, domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
-                  HVM_INFO_PFN)) == NULL )
-            return -1;
-        build_hvm_info(hvm_info_page, dom);
-        munmap(hvm_info_page, PAGE_SIZE);
-    }
-
-    return 0;
-}
-
-bool xc_dom_translated(const struct xc_dom_image *dom)
-{
-    /* HVM guests are translated.  PV guests are not. */
-    return dom->container_type == XC_DOM_HVM_CONTAINER;
-}
-
-/* ------------------------------------------------------------------------ */
-
-static struct xc_dom_arch xc_dom_32_pae = {
-    .guest_type = "xen-3.0-x86_32p",
-    .native_protocol = XEN_IO_PROTO_ABI_X86_32,
-    .page_shift = PAGE_SHIFT_X86,
-    .sizeof_pfn = 4,
-    .p2m_base_supported = 0,
-    .arch_private_size = sizeof(struct xc_dom_image_x86),
-    .alloc_magic_pages = alloc_magic_pages_pv,
-    .alloc_pgtables = alloc_pgtables_x86_32_pae,
-    .alloc_p2m_list = alloc_p2m_list_x86_32,
-    .setup_pgtables = setup_pgtables_x86_32_pae,
-    .start_info = start_info_x86_32,
-    .shared_info = shared_info_x86_32,
-    .vcpu = vcpu_x86_32,
-    .meminit = meminit_pv,
-    .bootearly = bootearly,
-    .bootlate = bootlate_pv,
-};
-
-static struct xc_dom_arch xc_dom_64 = {
-    .guest_type = "xen-3.0-x86_64",
-    .native_protocol = XEN_IO_PROTO_ABI_X86_64,
-    .page_shift = PAGE_SHIFT_X86,
-    .sizeof_pfn = 8,
-    .p2m_base_supported = 1,
-    .arch_private_size = sizeof(struct xc_dom_image_x86),
-    .alloc_magic_pages = alloc_magic_pages_pv,
-    .alloc_pgtables = alloc_pgtables_x86_64,
-    .alloc_p2m_list = alloc_p2m_list_x86_64,
-    .setup_pgtables = setup_pgtables_x86_64,
-    .start_info = start_info_x86_64,
-    .shared_info = shared_info_x86_64,
-    .vcpu = vcpu_x86_64,
-    .meminit = meminit_pv,
-    .bootearly = bootearly,
-    .bootlate = bootlate_pv,
-};
-
-static struct xc_dom_arch xc_hvm_32 = {
-    .guest_type = "hvm-3.0-x86_32",
-    .native_protocol = XEN_IO_PROTO_ABI_X86_32,
-    .page_shift = PAGE_SHIFT_X86,
-    .sizeof_pfn = 4,
-    .alloc_magic_pages = alloc_magic_pages_hvm,
-    .vcpu = vcpu_hvm,
-    .meminit = meminit_hvm,
-    .bootearly = bootearly,
-    .bootlate = bootlate_hvm,
-};
-
-static void __init register_arch_hooks(void)
-{
-    xc_dom_register_arch_hooks(&xc_dom_32_pae);
-    xc_dom_register_arch_hooks(&xc_dom_64);
-    xc_dom_register_arch_hooks(&xc_hvm_32);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_domain.c b/tools/libxc/xg_domain.c
deleted file mode 100644 (file)
index 58713cd..0000000
+++ /dev/null
@@ -1,149 +0,0 @@
-/******************************************************************************
- * xg_domain.c
- *
- * API for manipulating and obtaining information on domains.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- *
- * Copyright (c) 2003, K A Fraser.
- */
-
-#include "xg_private.h"
-#include "xc_core.h"
-
-int xc_unmap_domain_meminfo(xc_interface *xch, struct xc_domain_meminfo *minfo)
-{
-    struct domain_info_context _di = { .guest_width = minfo->guest_width,
-                                       .p2m_size = minfo->p2m_size};
-    struct domain_info_context *dinfo = &_di;
-
-    free(minfo->pfn_type);
-    if ( minfo->p2m_table )
-        munmap(minfo->p2m_table, P2M_FL_ENTRIES * PAGE_SIZE);
-    minfo->p2m_table = NULL;
-
-    return 0;
-}
-
-int xc_map_domain_meminfo(xc_interface *xch, uint32_t domid,
-                          struct xc_domain_meminfo *minfo)
-{
-    struct domain_info_context _di;
-    struct domain_info_context *dinfo = &_di;
-
-    xc_dominfo_t info;
-    shared_info_any_t *live_shinfo;
-    xen_capabilities_info_t xen_caps = "";
-    int i;
-
-    /* Only be initialized once */
-    if ( minfo->pfn_type || minfo->p2m_table )
-    {
-        errno = EINVAL;
-        return -1;
-    }
-
-    if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 )
-    {
-        PERROR("Could not get domain info");
-        return -1;
-    }
-
-    if ( xc_domain_get_guest_width(xch, domid, &minfo->guest_width) )
-    {
-        PERROR("Could not get domain address size");
-        return -1;
-    }
-    _di.guest_width = minfo->guest_width;
-
-    /* Get page table levels (see get_platform_info() in xg_save_restore.h */
-    if ( xc_version(xch, XENVER_capabilities, &xen_caps) )
-    {
-        PERROR("Could not get Xen capabilities (for page table levels)");
-        return -1;
-    }
-    if ( strstr(xen_caps, "xen-3.0-x86_64") )
-        /* Depends on whether it's a compat 32-on-64 guest */
-        minfo->pt_levels = ( (minfo->guest_width == 8) ? 4 : 3 );
-    else if ( strstr(xen_caps, "xen-3.0-x86_32p") )
-        minfo->pt_levels = 3;
-    else if ( strstr(xen_caps, "xen-3.0-x86_32") )
-        minfo->pt_levels = 2;
-    else
-    {
-        errno = EFAULT;
-        return -1;
-    }
-
-    /* We need the shared info page for mapping the P2M */
-    live_shinfo = xc_map_foreign_range(xch, domid, PAGE_SIZE, PROT_READ,
-                                       info.shared_info_frame);
-    if ( !live_shinfo )
-    {
-        PERROR("Could not map the shared info frame (MFN 0x%lx)",
-               info.shared_info_frame);
-        return -1;
-    }
-
-    if ( xc_core_arch_map_p2m_writable(xch, minfo->guest_width, &info,
-                                       live_shinfo, &minfo->p2m_table,
-                                       &minfo->p2m_size) )
-    {
-        PERROR("Could not map the P2M table");
-        munmap(live_shinfo, PAGE_SIZE);
-        return -1;
-    }
-    munmap(live_shinfo, PAGE_SIZE);
-    _di.p2m_size = minfo->p2m_size;
-
-    /* Make space and prepare for getting the PFN types */
-    minfo->pfn_type = calloc(sizeof(*minfo->pfn_type), minfo->p2m_size);
-    if ( !minfo->pfn_type )
-    {
-        PERROR("Could not allocate memory for the PFN types");
-        goto failed;
-    }
-    for ( i = 0; i < minfo->p2m_size; i++ )
-        minfo->pfn_type[i] = xc_pfn_to_mfn(i, minfo->p2m_table,
-                                           minfo->guest_width);
-
-    /* Retrieve PFN types in batches */
-    for ( i = 0; i < minfo->p2m_size ; i+=1024 )
-    {
-        int count = ((minfo->p2m_size - i ) > 1024 ) ?
-                        1024: (minfo->p2m_size - i);
-
-        if ( xc_get_pfn_type_batch(xch, domid, count, minfo->pfn_type + i) )
-        {
-            PERROR("Could not get %d-eth batch of PFN types", (i+1)/1024);
-            goto failed;
-        }
-    }
-
-    return 0;
-
-failed:
-    if ( minfo->pfn_type )
-    {
-        free(minfo->pfn_type);
-        minfo->pfn_type = NULL;
-    }
-    if ( minfo->p2m_table )
-    {
-        munmap(minfo->p2m_table, P2M_FL_ENTRIES * PAGE_SIZE);
-        minfo->p2m_table = NULL;
-    }
-
-    return -1;
-}
diff --git a/tools/libxc/xg_nomigrate.c b/tools/libxc/xg_nomigrate.c
deleted file mode 100644 (file)
index 6795c62..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-/******************************************************************************
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- *
- * Copyright (c) 2011, Citrix Systems
- */
-
-#include <inttypes.h>
-#include <errno.h>
-#include <xenctrl.h>
-#include <xenguest.h>
-
-int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom, uint32_t flags,
-                   struct save_callbacks *callbacks,
-                   xc_stream_type_t stream_type, int recv_fd)
-{
-    errno = ENOSYS;
-    return -1;
-}
-
-int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
-                      unsigned int store_evtchn, unsigned long *store_mfn,
-                      uint32_t store_domid, unsigned int console_evtchn,
-                      unsigned long *console_mfn, uint32_t console_domid,
-                      xc_stream_type_t stream_type,
-                      struct restore_callbacks *callbacks, int send_back_fd)
-{
-    errno = ENOSYS;
-    return -1;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_offline_page.c b/tools/libxc/xg_offline_page.c
deleted file mode 100644 (file)
index 77e8889..0000000
+++ /dev/null
@@ -1,708 +0,0 @@
-/******************************************************************************
- * xc_offline_page.c
- *
- * Helper functions to offline/online one page
- *
- * Copyright (c) 2003, K A Fraser.
- * Copyright (c) 2009, Intel Corporation.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <inttypes.h>
-#include <time.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <xc_core.h>
-
-#include "xc_private.h"
-#include "xenctrl_dom.h"
-#include "xg_private.h"
-#include "xg_save_restore.h"
-
-struct pte_backup_entry
-{
-    xen_pfn_t table_mfn;
-    int offset;
-};
-
-#define DEFAULT_BACKUP_COUNT 1024
-struct pte_backup
-{
-    struct pte_backup_entry *entries;
-    int max;
-    int cur;
-};
-
-static struct domain_info_context _dinfo;
-static struct domain_info_context *dinfo = &_dinfo;
-
-int xc_mark_page_online(xc_interface *xch, unsigned long start,
-                        unsigned long end, uint32_t *status)
-{
-    DECLARE_SYSCTL;
-    DECLARE_HYPERCALL_BOUNCE(status, sizeof(uint32_t)*(end - start + 1), XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
-    int ret = -1;
-
-    if ( !status || (end < start) )
-    {
-        errno = EINVAL;
-        return -1;
-    }
-    if ( xc_hypercall_bounce_pre(xch, status) )
-    {
-        ERROR("Could not bounce memory for xc_mark_page_online\n");
-        return -1;
-    }
-
-    sysctl.cmd = XEN_SYSCTL_page_offline_op;
-    sysctl.u.page_offline.start = start;
-    sysctl.u.page_offline.cmd = sysctl_page_online;
-    sysctl.u.page_offline.end = end;
-    set_xen_guest_handle(sysctl.u.page_offline.status, status);
-    ret = xc_sysctl(xch, &sysctl);
-
-    xc_hypercall_bounce_post(xch, status);
-
-    return ret;
-}
-
-int xc_mark_page_offline(xc_interface *xch, unsigned long start,
-                          unsigned long end, uint32_t *status)
-{
-    DECLARE_SYSCTL;
-    DECLARE_HYPERCALL_BOUNCE(status, sizeof(uint32_t)*(end - start + 1), XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
-    int ret = -1;
-
-    if ( !status || (end < start) )
-    {
-        errno = EINVAL;
-        return -1;
-    }
-    if ( xc_hypercall_bounce_pre(xch, status) )
-    {
-        ERROR("Could not bounce memory for xc_mark_page_offline");
-        return -1;
-    }
-
-    sysctl.cmd = XEN_SYSCTL_page_offline_op;
-    sysctl.u.page_offline.start = start;
-    sysctl.u.page_offline.cmd = sysctl_page_offline;
-    sysctl.u.page_offline.end = end;
-    set_xen_guest_handle(sysctl.u.page_offline.status, status);
-    ret = xc_sysctl(xch, &sysctl);
-
-    xc_hypercall_bounce_post(xch, status);
-
-    return ret;
-}
-
-int xc_query_page_offline_status(xc_interface *xch, unsigned long start,
-                                 unsigned long end, uint32_t *status)
-{
-    DECLARE_SYSCTL;
-    DECLARE_HYPERCALL_BOUNCE(status, sizeof(uint32_t)*(end - start + 1), XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
-    int ret = -1;
-
-    if ( !status || (end < start) )
-    {
-        errno = EINVAL;
-        return -1;
-    }
-    if ( xc_hypercall_bounce_pre(xch, status) )
-    {
-        ERROR("Could not bounce memory for xc_query_page_offline_status\n");
-        return -1;
-    }
-
-    sysctl.cmd = XEN_SYSCTL_page_offline_op;
-    sysctl.u.page_offline.start = start;
-    sysctl.u.page_offline.cmd = sysctl_query_page_offline;
-    sysctl.u.page_offline.end = end;
-    set_xen_guest_handle(sysctl.u.page_offline.status, status);
-    ret = xc_sysctl(xch, &sysctl);
-
-    xc_hypercall_bounce_post(xch, status);
-
-    return ret;
-}
-
- /*
-  * There should no update to the grant when domain paused
-  */
-static int xc_is_page_granted_v1(xc_interface *xch, xen_pfn_t gpfn,
-                                 grant_entry_v1_t *gnttab, int gnt_num)
-{
-    int i = 0;
-
-    if (!gnttab)
-        return 0;
-
-    for (i = 0; i < gnt_num; i++)
-        if ( ((gnttab[i].flags & GTF_type_mask) !=  GTF_invalid) &&
-             (gnttab[i].frame == gpfn) )
-             break;
-
-   return (i != gnt_num);
-}
-
-static int xc_is_page_granted_v2(xc_interface *xch, xen_pfn_t gpfn,
-                                 grant_entry_v2_t *gnttab, int gnt_num)
-{
-    int i = 0;
-
-    if (!gnttab)
-        return 0;
-
-    for (i = 0; i < gnt_num; i++)
-        if ( ((gnttab[i].hdr.flags & GTF_type_mask) !=  GTF_invalid) &&
-             (gnttab[i].full_page.frame == gpfn) )
-             break;
-
-   return (i != gnt_num);
-}
-
-static int backup_ptes(xen_pfn_t table_mfn, int offset,
-                       struct pte_backup *backup)
-{
-    if (!backup)
-        return -EINVAL;
-
-    if (backup->max == backup->cur)
-    {
-        backup->entries = realloc(backup->entries,
-                            backup->max * 2 * sizeof(struct pte_backup_entry));
-        if (backup->entries == NULL)
-            return -1;
-        else
-            backup->max *= 2;
-    }
-
-    backup->entries[backup->cur].table_mfn = table_mfn;
-    backup->entries[backup->cur++].offset = offset;
-
-    return 0;
-}
-
-/*
- * return:
- * 1 when MMU update is required
- * 0 when no changes
- * <0 when error happen
- */
-typedef int (*pte_func)(xc_interface *xch,
-                       uint64_t pte, uint64_t *new_pte,
-                       unsigned long table_mfn, int table_offset,
-                       struct pte_backup *backup,
-                       unsigned long no_use);
-
-static int __clear_pte(xc_interface *xch,
-                       uint64_t pte, uint64_t *new_pte,
-                       unsigned long table_mfn, int table_offset,
-                       struct pte_backup *backup,
-                       unsigned long mfn)
-{
-    /* If no new_pte pointer, same as no changes needed */
-    if (!new_pte || !backup)
-        return -EINVAL;
-
-    if ( !(pte & _PAGE_PRESENT))
-        return 0;
-
-    /* XXX Check for PSE bit here */
-    /* Hit one entry */
-    if ( ((pte >> PAGE_SHIFT_X86) & MFN_MASK_X86) == mfn)
-    {
-        *new_pte = pte & ~_PAGE_PRESENT;
-        if (!backup_ptes(table_mfn, table_offset, backup))
-            return 1;
-    }
-
-    return 0;
-}
-
-static int __update_pte(xc_interface *xch,
-                      uint64_t pte, uint64_t *new_pte,
-                      unsigned long table_mfn, int table_offset,
-                      struct pte_backup *backup,
-                      unsigned long new_mfn)
-{
-    int index;
-
-    if (!new_pte)
-        return 0;
-
-    for (index = 0; index < backup->cur; index ++)
-        if ( (backup->entries[index].table_mfn == table_mfn) &&
-             (backup->entries[index].offset == table_offset) )
-            break;
-
-    if (index != backup->cur)
-    {
-        if (pte & _PAGE_PRESENT)
-            ERROR("Page present while in backup ptes\n");
-        pte &= ~MFN_MASK_X86;
-        pte |= (new_mfn << PAGE_SHIFT_X86) | _PAGE_PRESENT;
-        *new_pte = pte;
-        return 1;
-    }
-
-    return 0;
-}
-
-static int change_pte(xc_interface *xch, uint32_t domid,
-                     struct xc_domain_meminfo *minfo,
-                     struct pte_backup *backup,
-                     struct xc_mmu *mmu,
-                     pte_func func,
-                     unsigned long data)
-{
-    int pte_num, rc;
-    uint64_t i;
-    void *content = NULL;
-
-    pte_num = PAGE_SIZE / ((minfo->pt_levels == 2) ? 4 : 8);
-
-    for (i = 0; i < minfo->p2m_size; i++)
-    {
-        xen_pfn_t table_mfn = xc_pfn_to_mfn(i, minfo->p2m_table,
-                                            minfo->guest_width);
-        uint64_t pte, new_pte;
-        int j;
-
-        if ( (table_mfn == INVALID_PFN) ||
-             ((minfo->pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) ==
-              XEN_DOMCTL_PFINFO_XTAB) )
-            continue;
-
-        if ( minfo->pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
-        {
-            content = xc_map_foreign_range(xch, domid, PAGE_SIZE,
-                                            PROT_READ, table_mfn);
-            if (!content)
-                goto failed;
-
-            for (j = 0; j < pte_num; j++)
-            {
-                if ( minfo->pt_levels == 2 )
-                    pte = ((const uint32_t*)content)[j];
-                else
-                    pte = ((const uint64_t*)content)[j];
-
-                rc = func(xch, pte, &new_pte, table_mfn, j, backup, data);
-
-                switch (rc)
-                {
-                    case 1:
-                    if ( xc_add_mmu_update(xch, mmu,
-                          table_mfn << PAGE_SHIFT |
-                          j * ( (minfo->pt_levels == 2) ?
-                              sizeof(uint32_t): sizeof(uint64_t)) |
-                          MMU_PT_UPDATE_PRESERVE_AD,
-                          new_pte) )
-                        goto failed;
-                    break;
-
-                    case 0:
-                    break;
-
-                    default:
-                    goto failed;
-                }
-            }
-
-            munmap(content, PAGE_SIZE);
-            content = NULL;
-        }
-    }
-
-    if ( xc_flush_mmu_updates(xch, mmu) )
-        goto failed;
-
-    return 0;
-failed:
-    /* XXX Shall we take action if we have fail to swap? */
-    if (content)
-        munmap(content, PAGE_SIZE);
-
-    return -1;
-}
-
-static int update_pte(xc_interface *xch, uint32_t domid,
-                     struct xc_domain_meminfo *minfo,
-                     struct pte_backup *backup,
-                     struct xc_mmu *mmu,
-                     unsigned long new_mfn)
-{
-    return change_pte(xch, domid,  minfo, backup, mmu,
-                      __update_pte, new_mfn);
-}
-
-static int clear_pte(xc_interface *xch, uint32_t domid,
-                     struct xc_domain_meminfo *minfo,
-                     struct pte_backup *backup,
-                     struct xc_mmu *mmu,
-                     xen_pfn_t mfn)
-{
-    return change_pte(xch, domid, minfo, backup, mmu,
-                      __clear_pte, mfn);
-}
-
-/*
- * Check if a page can be exchanged successfully
- */
-
-static int is_page_exchangable(xc_interface *xch, uint32_t domid, xen_pfn_t mfn,
-                               xc_dominfo_t *info)
-{
-    uint32_t status;
-    int rc;
-
-    /* domain checking */
-    if ( !domid || (domid > DOMID_FIRST_RESERVED) )
-    {
-        DPRINTF("Dom0's page can't be LM");
-        return 0;
-    }
-    if (info->hvm)
-    {
-        DPRINTF("Currently we can only live change PV guest's page\n");
-        return 0;
-    }
-
-    /* Check if pages are offline pending or not */
-    rc = xc_query_page_offline_status(xch, mfn, mfn, &status);
-
-    if ( rc || !(status & PG_OFFLINE_STATUS_OFFLINE_PENDING) )
-    {
-        ERROR("Page %lx is not offline pending %x\n",
-          mfn, status);
-        return 0;
-    }
-
-    return 1;
-}
-
-xen_pfn_t *xc_map_m2p(xc_interface *xch,
-                      unsigned long max_mfn,
-                      int prot,
-                      unsigned long *mfn0)
-{
-    privcmd_mmap_entry_t *entries;
-    unsigned long m2p_chunks, m2p_size;
-    xen_pfn_t *m2p;
-    xen_pfn_t *extent_start;
-    int i;
-
-    m2p = NULL;
-    m2p_size   = M2P_SIZE(max_mfn);
-    m2p_chunks = M2P_CHUNKS(max_mfn);
-
-    extent_start = calloc(m2p_chunks, sizeof(xen_pfn_t));
-    if ( !extent_start )
-    {
-        ERROR("failed to allocate space for m2p mfns");
-        goto err0;
-    }
-
-    if ( xc_machphys_mfn_list(xch, m2p_chunks, extent_start) )
-    {
-        PERROR("xc_get_m2p_mfns");
-        goto err1;
-    }
-
-    entries = calloc(m2p_chunks, sizeof(privcmd_mmap_entry_t));
-    if (entries == NULL)
-    {
-        ERROR("failed to allocate space for mmap entries");
-        goto err1;
-    }
-
-    for ( i = 0; i < m2p_chunks; i++ )
-        entries[i].mfn = extent_start[i];
-
-    m2p = xc_map_foreign_ranges(xch, DOMID_XEN,
-                       m2p_size, prot, M2P_CHUNK_SIZE,
-                       entries, m2p_chunks);
-    if (m2p == NULL)
-    {
-        PERROR("xc_mmap_foreign_ranges failed");
-        goto err2;
-    }
-
-    if (mfn0)
-        *mfn0 = entries[0].mfn;
-
-err2:
-    free(entries);
-err1:
-    free(extent_start);
-
-err0:
-    return m2p;
-}
-
-/* The domain should be suspended when called here */
-int xc_exchange_page(xc_interface *xch, uint32_t domid, xen_pfn_t mfn)
-{
-    xc_dominfo_t info;
-    struct xc_domain_meminfo minfo;
-    struct xc_mmu *mmu = NULL;
-    struct pte_backup old_ptes = {NULL, 0, 0};
-    grant_entry_v1_t *gnttab_v1 = NULL;
-    grant_entry_v2_t *gnttab_v2 = NULL;
-    struct mmuext_op mops;
-    int gnt_num, unpined = 0;
-    void *old_p, *backup = NULL;
-    int rc, result = -1;
-    uint32_t status;
-    xen_pfn_t new_mfn, gpfn;
-    xen_pfn_t *m2p_table;
-    unsigned long max_mfn;
-
-    if ( xc_domain_getinfo(xch, domid, 1, &info) != 1 )
-    {
-        ERROR("Could not get domain info");
-        return -1;
-    }
-
-    if (!info.shutdown || info.shutdown_reason != SHUTDOWN_suspend)
-    {
-        errno = EINVAL;
-        ERROR("Can't exchange page unless domain is suspended\n");
-        return -1;
-    }
-    if (!is_page_exchangable(xch, domid, mfn, &info))
-    {
-        ERROR("Could not exchange page\n");
-        return -1;
-    }
-
-    /* Map M2P and obtain gpfn */
-    rc = xc_maximum_ram_page(xch, &max_mfn);
-    if ( rc || !(m2p_table = xc_map_m2p(xch, max_mfn, PROT_READ, NULL)) )
-    {
-        PERROR("Failed to map live M2P table");
-        return -1;
-    }
-    gpfn = m2p_table[mfn];
-
-    /* Map domain's memory information */
-    memset(&minfo, 0, sizeof(minfo));
-    if ( xc_map_domain_meminfo(xch, domid, &minfo) )
-    {
-        PERROR("Could not map domain's memory information\n");
-        goto failed;
-    }
-
-    /* For translation macros */
-    dinfo->guest_width = minfo.guest_width;
-    dinfo->p2m_size = minfo.p2m_size;
-
-    /* Don't exchange CR3 for PAE guest in PAE host environment */
-    if (minfo.guest_width > sizeof(long))
-    {
-        if ( (minfo.pfn_type[gpfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
-                    XEN_DOMCTL_PFINFO_L3TAB )
-            goto failed;
-    }
-
-    gnttab_v2 = xc_gnttab_map_table_v2(xch, domid, &gnt_num);
-    if (!gnttab_v2)
-    {
-        gnttab_v1 = xc_gnttab_map_table_v1(xch, domid, &gnt_num);
-        if (!gnttab_v1)
-        {
-            ERROR("Failed to map grant table\n");
-            goto failed;
-        }
-    }
-
-    if (gnttab_v1
-        ? xc_is_page_granted_v1(xch, mfn, gnttab_v1, gnt_num)
-        : xc_is_page_granted_v2(xch, mfn, gnttab_v2, gnt_num))
-    {
-        ERROR("Page %lx is granted now\n", mfn);
-        goto failed;
-    }
-
-    /* allocate required data structure */
-    backup = malloc(PAGE_SIZE);
-    if (!backup)
-    {
-        ERROR("Failed to allocate backup pages pointer\n");
-        goto failed;
-    }
-
-    old_ptes.max = DEFAULT_BACKUP_COUNT;
-    old_ptes.entries = malloc(sizeof(struct pte_backup_entry) *
-                              DEFAULT_BACKUP_COUNT);
-
-    if (!old_ptes.entries)
-    {
-        ERROR("Faield to allocate backup\n");
-        goto failed;
-    }
-    old_ptes.cur = 0;
-
-    /* Unpin the page if it is pined */
-    if (minfo.pfn_type[gpfn] & XEN_DOMCTL_PFINFO_LPINTAB)
-    {
-        mops.cmd = MMUEXT_UNPIN_TABLE;
-        mops.arg1.mfn = mfn;
-
-        if ( xc_mmuext_op(xch, &mops, 1, domid) < 0 )
-        {
-            ERROR("Failed to unpin page %lx", mfn);
-            goto failed;
-        }
-        mops.arg1.mfn = mfn;
-        unpined = 1;
-    }
-
-    /* backup the content */
-    old_p = xc_map_foreign_range(xch, domid, PAGE_SIZE,
-      PROT_READ, mfn);
-    if (!old_p)
-    {
-        ERROR("Failed to map foreign page %lx\n", mfn);
-        goto failed;
-    }
-
-    memcpy(backup, old_p, PAGE_SIZE);
-    munmap(old_p, PAGE_SIZE);
-
-    mmu = xc_alloc_mmu_updates(xch, domid);
-    if ( mmu == NULL )
-    {
-        ERROR("%s: failed at %d\n", __FUNCTION__, __LINE__);
-        goto failed;
-    }
-
-    /* Firstly update all pte to be invalid to remove the reference */
-    rc = clear_pte(xch, domid,  &minfo, &old_ptes, mmu, mfn);
-
-    if (rc)
-    {
-        ERROR("clear pte failed\n");
-        goto failed;
-    }
-
-    rc = xc_domain_memory_exchange_pages(xch, domid,
-                                        1, 0, &mfn,
-                                        1, 0, &new_mfn);
-
-    if (rc)
-    {
-        ERROR("Exchange the page failed\n");
-        /* Exchange fail means there are refere to the page still */
-        rc = update_pte(xch, domid, &minfo, &old_ptes, mmu, mfn);
-        if (rc)
-            result = -2;
-        goto failed;
-    }
-
-    rc = update_pte(xch, domid, &minfo, &old_ptes, mmu, new_mfn);
-
-    if (rc)
-    {
-        ERROR("update pte failed guest may be broken now\n");
-        /* No recover action now for swap fail */
-        result = -2;
-        goto failed;
-    }
-
-    /* Check if pages are offlined already */
-    rc = xc_query_page_offline_status(xch, mfn, mfn,
-                            &status);
-
-    if (rc)
-    {
-        ERROR("Fail to query offline status\n");
-    }else if ( !(status & PG_OFFLINE_STATUS_OFFLINED) )
-    {
-        ERROR("page is still online or pending\n");
-        goto failed;
-    }
-    else
-    {
-        void *new_p;
-        IPRINTF("Now page is offlined %lx\n", mfn);
-        /* Update the p2m table */
-        minfo.p2m_table[gpfn] = new_mfn;
-
-        new_p = xc_map_foreign_range(xch, domid, PAGE_SIZE,
-                                     PROT_READ|PROT_WRITE, new_mfn);
-        if ( new_p == NULL )
-        {
-            ERROR("failed to map new_p for copy, guest may be broken?");
-            goto failed;
-        }
-        memcpy(new_p, backup, PAGE_SIZE);
-        munmap(new_p, PAGE_SIZE);
-        mops.arg1.mfn = new_mfn;
-        result = 0;
-    }
-
-failed:
-
-    if (unpined && (minfo.pfn_type[mfn] & XEN_DOMCTL_PFINFO_LPINTAB))
-    {
-        switch ( minfo.pfn_type[mfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
-        {
-            case XEN_DOMCTL_PFINFO_L1TAB:
-                mops.cmd = MMUEXT_PIN_L1_TABLE;
-                break;
-
-            case XEN_DOMCTL_PFINFO_L2TAB:
-                mops.cmd = MMUEXT_PIN_L2_TABLE;
-                break;
-
-            case XEN_DOMCTL_PFINFO_L3TAB:
-                mops.cmd = MMUEXT_PIN_L3_TABLE;
-                break;
-
-            case XEN_DOMCTL_PFINFO_L4TAB:
-                mops.cmd = MMUEXT_PIN_L4_TABLE;
-                break;
-
-            default:
-                ERROR("Unpined for non pate table page\n");
-                break;
-        }
-
-        if ( xc_mmuext_op(xch, &mops, 1, domid) < 0 )
-        {
-            ERROR("failed to pin the mfn again\n");
-            result = -2;
-        }
-    }
-
-    free(mmu);
-
-    free(old_ptes.entries);
-
-    free(backup);
-
-    if (gnttab_v1)
-        munmap(gnttab_v1, gnt_num / (PAGE_SIZE/sizeof(grant_entry_v1_t)));
-    if (gnttab_v2)
-        munmap(gnttab_v2, gnt_num / (PAGE_SIZE/sizeof(grant_entry_v2_t)));
-
-    xc_unmap_domain_meminfo(xch, &minfo);
-    munmap(m2p_table, M2P_SIZE(max_mfn));
-
-    return result;
-}
diff --git a/tools/libxc/xg_private.c b/tools/libxc/xg_private.c
deleted file mode 100644 (file)
index 2073dba..0000000
+++ /dev/null
@@ -1,198 +0,0 @@
-/******************************************************************************
- * xg_private.c
- *
- * Helper functions for the rest of the library.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <zlib.h>
-
-#include "xg_private.h"
-
-char *xc_read_image(xc_interface *xch,
-                    const char *filename, unsigned long *size)
-{
-    int kernel_fd = -1;
-    gzFile kernel_gfd = NULL;
-    char *image = NULL, *tmp;
-    unsigned int bytes;
-
-    if ( (filename == NULL) || (size == NULL) )
-        return NULL;
-
-    if ( (kernel_fd = open(filename, O_RDONLY)) < 0 )
-    {
-        PERROR("Could not open kernel image '%s'", filename);
-        goto out;
-    }
-
-    if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL )
-    {
-        PERROR("Could not allocate decompression state for state file");
-        goto out;
-    }
-
-    *size = 0;
-
-#define CHUNK 1*1024*1024
-    while(1)
-    {
-        if ( (tmp = realloc(image, *size + CHUNK)) == NULL )
-        {
-            PERROR("Could not allocate memory for kernel image");
-            free(image);
-            image = NULL;
-            goto out;
-        }
-        image = tmp;
-
-        bytes = gzread(kernel_gfd, image + *size, CHUNK);
-        switch (bytes)
-        {
-        case -1:
-            PERROR("Error reading kernel image");
-            free(image);
-            image = NULL;
-            goto out;
-        case 0: /* EOF */
-            if ( *size == 0 )
-            {
-                PERROR("Could not read kernel image");
-                free(image);
-                image = NULL;
-            }
-            goto out;
-        default:
-            *size += bytes;
-            break;
-        }
-    }
-#undef CHUNK
-
- out:
-    if ( image )
-    {
-        /* Shrink allocation to fit image. */
-        tmp = realloc(image, *size);
-        if ( tmp )
-            image = tmp;
-    }
-
-    if ( kernel_gfd != NULL )
-        gzclose(kernel_gfd);
-    else if ( kernel_fd >= 0 )
-        close(kernel_fd);
-    return image;
-}
-
-char *xc_inflate_buffer(xc_interface *xch,
-                        const char *in_buf, unsigned long in_size,
-                        unsigned long *out_size)
-{
-    int           sts;
-    z_stream      zStream;
-    unsigned long out_len;
-    char         *out_buf;
-
-    /* Not compressed? Then return the original buffer. */
-    if ( ((unsigned char)in_buf[0] != 0x1F) ||
-         ((unsigned char)in_buf[1] != 0x8B) )
-    {
-        if ( out_size != NULL )
-            *out_size = in_size;
-        return (char *)in_buf;
-    }
-
-    out_len = (unsigned char)in_buf[in_size-4] +
-        (256 * ((unsigned char)in_buf[in_size-3] +
-                (256 * ((unsigned char)in_buf[in_size-2] +
-                        (256 * (unsigned char)in_buf[in_size-1])))));
-
-    memset(&zStream, 0, sizeof(zStream));
-    out_buf = malloc(out_len + 16);        /* Leave a little extra space */
-    if ( out_buf == NULL )
-    {
-        ERROR("Error mallocing buffer\n");
-        return NULL;
-    }
-
-    zStream.next_in = (unsigned char *)in_buf;
-    zStream.avail_in = in_size;
-    zStream.next_out = (unsigned char *)out_buf;
-    zStream.avail_out = out_len+16;
-    sts = inflateInit2(&zStream, (MAX_WBITS+32)); /* +32 means "handle gzip" */
-    if ( sts != Z_OK )
-    {
-        ERROR("inflateInit failed, sts %d\n", sts);
-        free(out_buf);
-        return NULL;
-    }
-
-    /* Inflate in one pass/call */
-    sts = inflate(&zStream, Z_FINISH);
-    inflateEnd(&zStream);
-    if ( sts != Z_STREAM_END )
-    {
-        ERROR("inflate failed, sts %d\n", sts);
-        free(out_buf);
-        return NULL;
-    }
-
-    if ( out_size != NULL )
-        *out_size = out_len;
-
-    return out_buf;
-}
-
-/*******************/
-
-int pin_table(
-    xc_interface *xch, unsigned int type, unsigned long mfn, uint32_t dom)
-{
-    struct mmuext_op op;
-
-    op.cmd = type;
-    op.arg1.mfn = mfn;
-
-    if ( xc_mmuext_op(xch, &op, 1, dom) < 0 )
-        return 1;
-
-    return 0;
-}
-
-/* This is shared between save and restore, and may generally be useful. */
-unsigned long csum_page(void *page)
-{
-    int i;
-    unsigned long *p = page;
-    unsigned long long sum=0;
-
-    for ( i = 0; i < (PAGE_SIZE/sizeof(unsigned long)); i++ )
-        sum += p[i];
-
-    return sum ^ (sum>>32);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_private.h b/tools/libxc/xg_private.h
deleted file mode 100644 (file)
index 0000b2b..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef XG_PRIVATE_H
-#define XG_PRIVATE_H
-
-#include <unistd.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "xc_private.h"
-#include "xenguest.h"
-
-#include <xen/memory.h>
-#include <xen/elfnote.h>
-
-#ifndef ELFSIZE
-#include <limits.h>
-#if UINT_MAX == ULONG_MAX
-#define ELFSIZE 32
-#else
-#define ELFSIZE 64
-#endif
-#endif
-
-char *xc_read_image(xc_interface *xch,
-                    const char *filename, unsigned long *size);
-char *xc_inflate_buffer(xc_interface *xch,
-                        const char *in_buf,
-                        unsigned long in_size,
-                        unsigned long *out_size);
-
-unsigned long csum_page (void * page);
-
-#define _PAGE_PRESENT   0x001
-#define _PAGE_RW        0x002
-#define _PAGE_USER      0x004
-#define _PAGE_PWT       0x008
-#define _PAGE_PCD       0x010
-#define _PAGE_ACCESSED  0x020
-#define _PAGE_DIRTY     0x040
-#define _PAGE_PAT       0x080
-#define _PAGE_PSE       0x080
-#define _PAGE_GLOBAL    0x100
-
-#define VIRT_BITS_I386     32
-#define VIRT_BITS_X86_64   48
-
-#define PGTBL_LEVELS_I386       3
-#define PGTBL_LEVELS_X86_64     4
-
-#define PGTBL_LEVEL_SHIFT_X86   9
-
-#define L1_PAGETABLE_SHIFT_PAE        12
-#define L2_PAGETABLE_SHIFT_PAE        21
-#define L3_PAGETABLE_SHIFT_PAE        30
-#define L1_PAGETABLE_ENTRIES_PAE     512
-#define L2_PAGETABLE_ENTRIES_PAE     512
-#define L3_PAGETABLE_ENTRIES_PAE       4
-
-#define L1_PAGETABLE_SHIFT_X86_64     12
-#define L2_PAGETABLE_SHIFT_X86_64     21
-#define L3_PAGETABLE_SHIFT_X86_64     30
-#define L4_PAGETABLE_SHIFT_X86_64     39
-#define L1_PAGETABLE_ENTRIES_X86_64  512
-#define L2_PAGETABLE_ENTRIES_X86_64  512
-#define L3_PAGETABLE_ENTRIES_X86_64  512
-#define L4_PAGETABLE_ENTRIES_X86_64  512
-
-typedef uint64_t x86_pgentry_t;
-
-#define PAGE_SHIFT_ARM          12
-#define PAGE_SIZE_ARM           (1UL << PAGE_SHIFT_ARM)
-#define PAGE_MASK_ARM           (~(PAGE_SIZE_ARM-1))
-
-#define PAGE_SHIFT_X86          12
-#define PAGE_SIZE_X86           (1UL << PAGE_SHIFT_X86)
-#define PAGE_MASK_X86           (~(PAGE_SIZE_X86-1))
-
-#define NRPAGES(x) (ROUNDUP(x, PAGE_SHIFT) >> PAGE_SHIFT)
-
-static inline xen_pfn_t xc_pfn_to_mfn(xen_pfn_t pfn, xen_pfn_t *p2m,
-                                      unsigned gwidth)
-{
-    if ( gwidth == sizeof(uint64_t) )
-        /* 64 bit guest.  Need to truncate their pfns for 32 bit toolstacks. */
-        return ((uint64_t *)p2m)[pfn];
-    else
-    {
-        /* 32 bit guest.  Need to expand INVALID_MFN for 64 bit toolstacks. */
-        uint32_t mfn = ((uint32_t *)p2m)[pfn];
-
-        return mfn == ~0U ? INVALID_MFN : mfn;
-    }
-}
-
-
-/* Masks for PTE<->PFN conversions */
-#define MADDR_BITS_X86  ((dinfo->guest_width == 8) ? 52 : 44)
-#define MFN_MASK_X86    ((1ULL << (MADDR_BITS_X86 - PAGE_SHIFT_X86)) - 1)
-#define MADDR_MASK_X86  (MFN_MASK_X86 << PAGE_SHIFT_X86)
-
-int pin_table(xc_interface *xch, unsigned int type, unsigned long mfn,
-              uint32_t dom);
-
-#endif /* XG_PRIVATE_H */
diff --git a/tools/libxc/xg_save_restore.h b/tools/libxc/xg_save_restore.h
deleted file mode 100644 (file)
index 88120eb..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Definitions and utilities for save / restore.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "xc_private.h"
-
-#include <xen/foreign/x86_32.h>
-#include <xen/foreign/x86_64.h>
-
-/*
-** We process save/restore/migrate in batches of pages; the below
-** determines how many pages we (at maximum) deal with in each batch.
-*/
-#define MAX_BATCH_SIZE 1024   /* up to 1024 pages (4MB) at a time */
-
-/* When pinning page tables at the end of restore, we also use batching. */
-#define MAX_PIN_BATCH  1024
-
-/*
-** Determine various platform information required for save/restore, in
-** particular:
-**
-**    - the maximum MFN on this machine, used to compute the size of
-**      the M2P table;
-**
-**    - the starting virtual address of the the hypervisor; we use this
-**      to determine which parts of guest address space(s) do and don't
-**      require canonicalization during save/restore; and
-**
-**    - the number of page-table levels for save/ restore. This should
-**      be a property of the domain, but for the moment we just read it
-**      from the hypervisor.
-**
-**    - The width of a guest word (unsigned long), in bytes.
-**
-** Returns 1 on success, 0 on failure.
-*/
-static inline int get_platform_info(xc_interface *xch, uint32_t dom,
-                                    /* OUT */ unsigned long *max_mfn,
-                                    /* OUT */ unsigned long *hvirt_start,
-                                    /* OUT */ unsigned int *pt_levels,
-                                    /* OUT */ unsigned int *guest_width)
-{
-    xen_capabilities_info_t xen_caps = "";
-    xen_platform_parameters_t xen_params;
-
-    if (xc_version(xch, XENVER_platform_parameters, &xen_params) != 0)
-        return 0;
-
-    if (xc_version(xch, XENVER_capabilities, &xen_caps) != 0)
-        return 0;
-
-    if (xc_maximum_ram_page(xch, max_mfn))
-        return 0;
-
-    *hvirt_start = xen_params.virt_start;
-
-    if ( xc_domain_get_guest_width(xch, dom, guest_width) != 0)
-        return 0; 
-
-    /* 64-bit tools will see the 64-bit hvirt_start, but 32-bit guests 
-     * will be using the compat one. */
-    if ( *guest_width < sizeof (unsigned long) )
-        /* XXX need to fix up a way of extracting this value from Xen if
-         * XXX it becomes variable for domU */
-        *hvirt_start = 0xf5800000;
-
-    if (strstr(xen_caps, "xen-3.0-x86_64"))
-        /* Depends on whether it's a compat 32-on-64 guest */
-        *pt_levels = ( (*guest_width == 8) ? 4 : 3 );
-    else if (strstr(xen_caps, "xen-3.0-x86_32p"))
-        *pt_levels = 3;
-    else
-        return 0;
-
-    return 1;
-}
-
-
-/*
-** Save/restore deal with the mfn_to_pfn (M2P) and pfn_to_mfn (P2M) tables.
-** The M2P simply holds the corresponding PFN, while the top bit of a P2M
-** entry tell us whether or not the the PFN is currently mapped.
-*/
-
-#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
-
-
-/*
-** The M2P is made up of some number of 'chunks' of at least 2MB in size.
-** The below definitions and utility function(s) deal with mapping the M2P
-** regarldess of the underlying machine memory size or architecture.
-*/
-#define M2P_SHIFT       L2_PAGETABLE_SHIFT_PAE
-#define M2P_CHUNK_SIZE  (1 << M2P_SHIFT)
-#define M2P_SIZE(_m)    ROUNDUP(((_m) * sizeof(xen_pfn_t)), M2P_SHIFT)
-#define M2P_CHUNKS(_m)  (M2P_SIZE((_m)) >> M2P_SHIFT)
-
-#define UNFOLD_CR3(_c)                                                  \
-  ((uint64_t)((dinfo->guest_width == 8)                                 \
-              ? ((_c) >> 12)                                            \
-              : (((uint32_t)(_c) >> 12) | ((uint32_t)(_c) << 20))))
-
-#define FOLD_CR3(_c)                                                    \
-  ((uint64_t)((dinfo->guest_width == 8)                                 \
-              ? ((uint64_t)(_c)) << 12                                  \
-              : (((uint32_t)(_c) << 12) | ((uint32_t)(_c) >> 20))))
-
-#define MEMCPY_FIELD(_d, _s, _f, _w) do {                          \
-    if ((_w) == 8)                                                 \
-        memcpy(&(_d)->x64._f, &(_s)->x64._f,sizeof((_d)->x64._f)); \
-    else                                                           \
-        memcpy(&(_d)->x32._f, &(_s)->x32._f,sizeof((_d)->x32._f)); \
-} while (0)
-
-#define MEMSET_ARRAY_FIELD(_p, _f, _v, _w) do {                    \
-    if ((_w) == 8)                                                 \
-        memset(&(_p)->x64._f[0], (_v), sizeof((_p)->x64._f));      \
-    else                                                           \
-        memset(&(_p)->x32._f[0], (_v), sizeof((_p)->x32._f));      \
-} while (0)
diff --git a/tools/libxc/xg_sr_common.c b/tools/libxc/xg_sr_common.c
deleted file mode 100644 (file)
index 17567ab..0000000
+++ /dev/null
@@ -1,167 +0,0 @@
-#include <assert.h>
-
-#include "xg_sr_common.h"
-
-#include <xen-tools/libs.h>
-
-static const char *const dhdr_types[] =
-{
-    [DHDR_TYPE_X86_PV]  = "x86 PV",
-    [DHDR_TYPE_X86_HVM] = "x86 HVM",
-};
-
-const char *dhdr_type_to_str(uint32_t type)
-{
-    if ( type < ARRAY_SIZE(dhdr_types) && dhdr_types[type] )
-        return dhdr_types[type];
-
-    return "Reserved";
-}
-
-static const char *const mandatory_rec_types[] =
-{
-    [REC_TYPE_END]                          = "End",
-    [REC_TYPE_PAGE_DATA]                    = "Page data",
-    [REC_TYPE_X86_PV_INFO]                  = "x86 PV info",
-    [REC_TYPE_X86_PV_P2M_FRAMES]            = "x86 PV P2M frames",
-    [REC_TYPE_X86_PV_VCPU_BASIC]            = "x86 PV vcpu basic",
-    [REC_TYPE_X86_PV_VCPU_EXTENDED]         = "x86 PV vcpu extended",
-    [REC_TYPE_X86_PV_VCPU_XSAVE]            = "x86 PV vcpu xsave",
-    [REC_TYPE_SHARED_INFO]                  = "Shared info",
-    [REC_TYPE_X86_TSC_INFO]                 = "x86 TSC info",
-    [REC_TYPE_HVM_CONTEXT]                  = "HVM context",
-    [REC_TYPE_HVM_PARAMS]                   = "HVM params",
-    [REC_TYPE_TOOLSTACK]                    = "Toolstack",
-    [REC_TYPE_X86_PV_VCPU_MSRS]             = "x86 PV vcpu msrs",
-    [REC_TYPE_VERIFY]                       = "Verify",
-    [REC_TYPE_CHECKPOINT]                   = "Checkpoint",
-    [REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST]    = "Checkpoint dirty pfn list",
-    [REC_TYPE_STATIC_DATA_END]              = "Static data end",
-    [REC_TYPE_X86_CPUID_POLICY]             = "x86 CPUID policy",
-    [REC_TYPE_X86_MSR_POLICY]               = "x86 MSR policy",
-};
-
-const char *rec_type_to_str(uint32_t type)
-{
-    if ( !(type & REC_TYPE_OPTIONAL) )
-    {
-        if ( (type < ARRAY_SIZE(mandatory_rec_types)) &&
-             (mandatory_rec_types[type]) )
-            return mandatory_rec_types[type];
-    }
-
-    return "Reserved";
-}
-
-int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec,
-                       void *buf, size_t sz)
-{
-    static const char zeroes[(1u << REC_ALIGN_ORDER) - 1] = { 0 };
-
-    xc_interface *xch = ctx->xch;
-    typeof(rec->length) combined_length = rec->length + sz;
-    size_t record_length = ROUNDUP(combined_length, REC_ALIGN_ORDER);
-    struct iovec parts[] = {
-        { &rec->type,       sizeof(rec->type) },
-        { &combined_length, sizeof(combined_length) },
-        { rec->data,        rec->length },
-        { buf,              sz },
-        { (void *)zeroes,   record_length - combined_length },
-    };
-
-    if ( record_length > REC_LENGTH_MAX )
-    {
-        ERROR("Record (0x%08x, %s) length %#zx exceeds max (%#x)", rec->type,
-              rec_type_to_str(rec->type), record_length, REC_LENGTH_MAX);
-        return -1;
-    }
-
-    if ( rec->length )
-        assert(rec->data);
-    if ( sz )
-        assert(buf);
-
-    if ( writev_exact(ctx->fd, parts, ARRAY_SIZE(parts)) )
-        goto err;
-
-    return 0;
-
- err:
-    PERROR("Unable to write record to stream");
-    return -1;
-}
-
-int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_rhdr rhdr;
-    size_t datasz;
-
-    if ( read_exact(fd, &rhdr, sizeof(rhdr)) )
-    {
-        PERROR("Failed to read Record Header from stream");
-        return -1;
-    }
-
-    if ( rhdr.length > REC_LENGTH_MAX )
-    {
-        ERROR("Record (0x%08x, %s) length %#x exceeds max (%#x)", rhdr.type,
-              rec_type_to_str(rhdr.type), rhdr.length, REC_LENGTH_MAX);
-        return -1;
-    }
-
-    datasz = ROUNDUP(rhdr.length, REC_ALIGN_ORDER);
-
-    if ( datasz )
-    {
-        rec->data = malloc(datasz);
-
-        if ( !rec->data )
-        {
-            ERROR("Unable to allocate %zu bytes for record data (0x%08x, %s)",
-                  datasz, rhdr.type, rec_type_to_str(rhdr.type));
-            return -1;
-        }
-
-        if ( read_exact(fd, rec->data, datasz) )
-        {
-            free(rec->data);
-            rec->data = NULL;
-            PERROR("Failed to read %zu bytes of data for record (0x%08x, %s)",
-                   datasz, rhdr.type, rec_type_to_str(rhdr.type));
-            return -1;
-        }
-    }
-    else
-        rec->data = NULL;
-
-    rec->type   = rhdr.type;
-    rec->length = rhdr.length;
-
-    return 0;
-};
-
-static void __attribute__((unused)) build_assertions(void)
-{
-    BUILD_BUG_ON(sizeof(struct xc_sr_ihdr) != 24);
-    BUILD_BUG_ON(sizeof(struct xc_sr_dhdr) != 16);
-    BUILD_BUG_ON(sizeof(struct xc_sr_rhdr) != 8);
-
-    BUILD_BUG_ON(sizeof(struct xc_sr_rec_page_data_header)  != 8);
-    BUILD_BUG_ON(sizeof(struct xc_sr_rec_x86_pv_info)       != 8);
-    BUILD_BUG_ON(sizeof(struct xc_sr_rec_x86_pv_p2m_frames) != 8);
-    BUILD_BUG_ON(sizeof(struct xc_sr_rec_x86_pv_vcpu_hdr)   != 8);
-    BUILD_BUG_ON(sizeof(struct xc_sr_rec_x86_tsc_info)      != 24);
-    BUILD_BUG_ON(sizeof(struct xc_sr_rec_hvm_params_entry)  != 16);
-    BUILD_BUG_ON(sizeof(struct xc_sr_rec_hvm_params)        != 8);
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_common.h b/tools/libxc/xg_sr_common.h
deleted file mode 100644 (file)
index 13fcc47..0000000
+++ /dev/null
@@ -1,468 +0,0 @@
-#ifndef __COMMON__H
-#define __COMMON__H
-
-#include <stdbool.h>
-
-#include "xg_private.h"
-#include "xg_save_restore.h"
-#include "xenctrl_dom.h"
-#include "xc_bitops.h"
-
-#include "xg_sr_stream_format.h"
-
-/* String representation of Domain Header types. */
-const char *dhdr_type_to_str(uint32_t type);
-
-/* String representation of Record types. */
-const char *rec_type_to_str(uint32_t type);
-
-struct xc_sr_context;
-struct xc_sr_record;
-
-/**
- * Save operations.  To be implemented for each type of guest, for use by the
- * common save algorithm.
- *
- * Every function must be implemented, even if only with a no-op stub.
- */
-struct xc_sr_save_ops
-{
-    /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
-    xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
-
-    /**
-     * Optionally transform the contents of a page from being specific to the
-     * sending environment, to being generic for the stream.
-     *
-     * The page of data at the end of 'page' may be a read-only mapping of a
-     * running guest; it must not be modified.  If no transformation is
-     * required, the callee should leave '*pages' untouched.
-     *
-     * If a transformation is required, the callee should allocate themselves
-     * a local page using malloc() and return it via '*page'.
-     *
-     * The caller shall free() '*page' in all cases.  In the case that the
-     * callee encounters an error, it should *NOT* free() the memory it
-     * allocated for '*page'.
-     *
-     * It is valid to fail with EAGAIN if the transformation is not able to be
-     * completed at this point.  The page shall be retried later.
-     *
-     * @returns 0 for success, -1 for failure, with errno appropriately set.
-     */
-    int (*normalise_page)(struct xc_sr_context *ctx, xen_pfn_t type,
-                          void **page);
-
-    /**
-     * Set up local environment to save a domain. (Typically querying
-     * running domain state, setting up mappings etc.)
-     *
-     * This is called once before any common setup has occurred, allowing for
-     * guest-specific adjustments to be made to common state.
-     */
-    int (*setup)(struct xc_sr_context *ctx);
-
-    /**
-     * Send static records at the head of the stream.  This is called once,
-     * after the Image and Domain headers are written.
-     */
-    int (*static_data)(struct xc_sr_context *ctx);
-
-    /**
-     * Send dynamic records which need to be at the start of the stream.  This
-     * is called after the STATIC_DATA_END record is written.
-     */
-    int (*start_of_stream)(struct xc_sr_context *ctx);
-
-    /**
-     * Send records which need to be at the start of a checkpoint.  This is
-     * called once, or once per checkpoint in a checkpointed stream, and is
-     * ahead of memory data.
-     */
-    int (*start_of_checkpoint)(struct xc_sr_context *ctx);
-
-    /**
-     * Send records which need to be at the end of the checkpoint.  This is
-     * called once, or once per checkpoint in a checkpointed stream, and is
-     * after the memory data.
-     */
-    int (*end_of_checkpoint)(struct xc_sr_context *ctx);
-
-    /**
-     * Check state of guest to decide whether it makes sense to continue
-     * migration.  This is called in each iteration or checkpoint to check
-     * whether all criteria for the migration are still met.  If that's not
-     * the case either migration is cancelled via a bad rc or the situation
-     * is handled, e.g. by sending appropriate records.
-     */
-    int (*check_vm_state)(struct xc_sr_context *ctx);
-
-    /**
-     * Clean up the local environment.  Will be called exactly once, either
-     * after a successful save, or upon encountering an error.
-     */
-    int (*cleanup)(struct xc_sr_context *ctx);
-};
-
-
-/**
- * Restore operations.  To be implemented for each type of guest, for use by
- * the common restore algorithm.
- *
- * Every function must be implemented, even if only with a no-op stub.
- */
-struct xc_sr_restore_ops
-{
-    /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
-    xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
-
-    /* Check to see whether a PFN is valid. */
-    bool (*pfn_is_valid)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
-
-    /* Set the GFN of a PFN. */
-    void (*set_gfn)(struct xc_sr_context *ctx, xen_pfn_t pfn, xen_pfn_t gfn);
-
-    /* Set the type of a PFN. */
-    void (*set_page_type)(struct xc_sr_context *ctx, xen_pfn_t pfn,
-                          xen_pfn_t type);
-
-    /**
-     * Optionally transform the contents of a page from being generic in the
-     * stream, to being specific to the restoring environment.
-     *
-     * 'page' is expected to be modified in-place if a transformation is
-     * required.
-     *
-     * @returns 0 for success, -1 for failure, with errno appropriately set.
-     */
-    int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page);
-
-    /**
-     * Set up local environment to restore a domain.
-     *
-     * This is called once before any common setup has occurred, allowing for
-     * guest-specific adjustments to be made to common state.
-     */
-    int (*setup)(struct xc_sr_context *ctx);
-
-    /**
-     * Process an individual record from the stream.  The caller shall take
-     * care of processing common records (e.g. END, PAGE_DATA).
-     *
-     * @return 0 for success, -1 for failure, or the following sentinels:
-     *  - RECORD_NOT_PROCESSED
-     *  - BROKEN_CHANNEL: under Remus/COLO, this means master may be dead, and
-     *    a failover is needed.
-     */
-#define RECORD_NOT_PROCESSED 1
-#define BROKEN_CHANNEL 2
-    int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec);
-
-    /**
-     * Perform any actions required after the static data has arrived.  Called
-     * when the STATIC_DATA_COMPLETE record has been recieved/inferred.
-     * 'missing' should be filled in for any data item the higher level
-     * toolstack needs to provide compatiblity for.
-     */
-    int (*static_data_complete)(struct xc_sr_context *ctx,
-                                unsigned int *missing);
-
-    /**
-     * Perform any actions required after the stream has been finished. Called
-     * after the END record has been received.
-     */
-    int (*stream_complete)(struct xc_sr_context *ctx);
-
-    /**
-     * Clean up the local environment.  Will be called exactly once, either
-     * after a successful restore, or upon encountering an error.
-     */
-    int (*cleanup)(struct xc_sr_context *ctx);
-};
-
-/* Wrapper for blobs of data heading Xen-wards. */
-struct xc_sr_blob
-{
-    void *ptr;
-    size_t size;
-};
-
-/*
- * Update a blob.  Duplicate src/size, freeing the old blob if necessary.  May
- * fail due to memory allocation.
- */
-static inline int update_blob(struct xc_sr_blob *blob,
-                              const void *src, size_t size)
-{
-    void *ptr;
-
-    if ( !src || !size )
-    {
-        errno = EINVAL;
-        return -1;
-    }
-
-    if ( (ptr = malloc(size)) == NULL )
-        return -1;
-
-    free(blob->ptr);
-    blob->ptr = memcpy(ptr, src, size);
-    blob->size = size;
-
-    return 0;
-}
-
-struct xc_sr_context
-{
-    xc_interface *xch;
-    uint32_t domid;
-    int fd;
-
-    /* Plain VM, or checkpoints over time. */
-    xc_stream_type_t stream_type;
-
-    xc_dominfo_t dominfo;
-
-    union /* Common save or restore data. */
-    {
-        struct /* Save data. */
-        {
-            int recv_fd;
-
-            struct xc_sr_save_ops ops;
-            struct save_callbacks *callbacks;
-
-            /* Live migrate vs non live suspend. */
-            bool live;
-
-            /* Further debugging information in the stream. */
-            bool debug;
-
-            unsigned long p2m_size;
-
-            struct precopy_stats stats;
-
-            xen_pfn_t *batch_pfns;
-            unsigned int nr_batch_pfns;
-            unsigned long *deferred_pages;
-            unsigned long nr_deferred_pages;
-            xc_hypercall_buffer_t dirty_bitmap_hbuf;
-        } save;
-
-        struct /* Restore data. */
-        {
-            struct xc_sr_restore_ops ops;
-            struct restore_callbacks *callbacks;
-
-            int send_back_fd;
-            unsigned long p2m_size;
-            xc_hypercall_buffer_t dirty_bitmap_hbuf;
-
-            /* From Image Header. */
-            uint32_t format_version;
-
-            /* From Domain Header. */
-            uint32_t guest_type;
-            uint32_t guest_page_size;
-
-            /* Currently buffering records between a checkpoint */
-            bool buffer_all_records;
-
-            /* Whether a STATIC_DATA_END record has been seen/inferred. */
-            bool seen_static_data_end;
-
-/*
- * With Remus/COLO, we buffer the records sent by the primary at checkpoint,
- * in case the primary will fail, we can recover from the last
- * checkpoint state.
- * This should be enough for most of the cases because primary only send
- * dirty pages at checkpoint.
- */
-#define DEFAULT_BUF_RECORDS 1024
-            struct xc_sr_record *buffered_records;
-            unsigned int allocated_rec_num;
-            unsigned int buffered_rec_num;
-
-            /*
-             * Xenstore and Console parameters.
-             * INPUT:  evtchn & domid
-             * OUTPUT: gfn
-             */
-            xen_pfn_t    xenstore_gfn,    console_gfn;
-            unsigned int xenstore_evtchn, console_evtchn;
-            uint32_t     xenstore_domid,  console_domid;
-
-            /* Bitmap of currently populated PFNs during restore. */
-            unsigned long *populated_pfns;
-            xen_pfn_t max_populated_pfn;
-
-            /* Sender has invoked verify mode on the stream. */
-            bool verify;
-        } restore;
-    };
-
-    union /* Guest-arch specific data. */
-    {
-        struct /* x86 */
-        {
-            /* Common save/restore data. */
-            union
-            {
-                struct
-                {
-                    /* X86_{CPUID,MSR}_DATA blobs for CPU Policy. */
-                    struct xc_sr_blob cpuid, msr;
-                } restore;
-            };
-
-            struct /* x86 PV guest. */
-            {
-                /* 4 or 8; 32 or 64 bit domain */
-                unsigned int width;
-                /* 3 or 4 pagetable levels */
-                unsigned int levels;
-
-                /* Maximum Xen frame */
-                xen_pfn_t max_mfn;
-                /* Read-only machine to phys map */
-                xen_pfn_t *m2p;
-                /* first mfn of the compat m2p (Only needed for 32bit PV guests) */
-                xen_pfn_t compat_m2p_mfn0;
-                /* Number of m2p frames mapped */
-                unsigned long nr_m2p_frames;
-
-                /* Maximum guest frame */
-                xen_pfn_t max_pfn;
-
-                /* Number of frames making up the p2m */
-                unsigned int p2m_frames;
-                /* Guest's phys to machine map.  Mapped read-only (save) or
-                 * allocated locally (restore).  Uses guest unsigned longs. */
-                void *p2m;
-                /* The guest pfns containing the p2m leaves */
-                xen_pfn_t *p2m_pfns;
-
-                /* Read-only mapping of guests shared info page */
-                shared_info_any_t *shinfo;
-
-                /* p2m generation count for verifying validity of local p2m. */
-                uint64_t p2m_generation;
-
-                union
-                {
-                    struct
-                    {
-                        /* State machine for the order of received records. */
-                        bool seen_pv_info;
-
-                        /* Types for each page (bounded by max_pfn). */
-                        uint32_t *pfn_types;
-
-                        /* x86 PV per-vcpu storage structure for blobs. */
-                        struct xc_sr_x86_pv_restore_vcpu
-                        {
-                            struct xc_sr_blob basic, extd, xsave, msr;
-                        } *vcpus;
-                        unsigned int nr_vcpus;
-                    } restore;
-                };
-            } pv;
-
-            struct /* x86 HVM guest. */
-            {
-                union
-                {
-                    struct
-                    {
-                        /* Whether qemu enabled logdirty mode, and we should
-                         * disable on cleanup. */
-                        bool qemu_enabled_logdirty;
-                    } save;
-
-                    struct
-                    {
-                        /* HVM context blob. */
-                        struct xc_sr_blob context;
-                    } restore;
-                };
-            } hvm;
-
-        } x86;
-    };
-};
-
-extern struct xc_sr_save_ops save_ops_x86_pv;
-extern struct xc_sr_save_ops save_ops_x86_hvm;
-
-extern struct xc_sr_restore_ops restore_ops_x86_pv;
-extern struct xc_sr_restore_ops restore_ops_x86_hvm;
-
-struct xc_sr_record
-{
-    uint32_t type;
-    uint32_t length;
-    void *data;
-};
-
-/*
- * Writes a split record to the stream, applying correct padding where
- * appropriate.  It is common when sending records containing blobs from Xen
- * that the header and blob data are separate.  This function accepts a second
- * buffer and length, and will merge it with the main record when sending.
- *
- * Records with a non-zero length must provide a valid data field; records
- * with a 0 length shall have their data field ignored.
- *
- * Returns 0 on success and non0 on failure.
- */
-int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec,
-                       void *buf, size_t sz);
-
-/*
- * Writes a record to the stream, applying correct padding where appropriate.
- * Records with a non-zero length must provide a valid data field; records
- * with a 0 length shall have their data field ignored.
- *
- * Returns 0 on success and non0 on failure.
- */
-static inline int write_record(struct xc_sr_context *ctx,
-                               struct xc_sr_record *rec)
-{
-    return write_split_record(ctx, rec, NULL, 0);
-}
-
-/*
- * Reads a record from the stream, and fills in the record structure.
- *
- * Returns 0 on success and non-0 on failure.
- *
- * On success, the records type and size shall be valid.
- * - If size is 0, data shall be NULL.
- * - If size is non-0, data shall be a buffer allocated by malloc() which must
- *   be passed to free() by the caller.
- *
- * On failure, the contents of the record structure are undefined.
- */
-int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec);
-
-/*
- * This would ideally be private in restore.c, but is needed by
- * x86_pv_localise_page() if we receive pagetables frames ahead of the
- * contents of the frames they point at.
- */
-int populate_pfns(struct xc_sr_context *ctx, unsigned int count,
-                  const xen_pfn_t *original_pfns, const uint32_t *types);
-
-/* Handle a STATIC_DATA_END record. */
-int handle_static_data_end(struct xc_sr_context *ctx);
-
-#endif
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_common_x86.c b/tools/libxc/xg_sr_common_x86.c
deleted file mode 100644 (file)
index 6f12483..0000000
+++ /dev/null
@@ -1,173 +0,0 @@
-#include "xg_sr_common_x86.h"
-
-int write_x86_tsc_info(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_rec_x86_tsc_info tsc = {};
-    struct xc_sr_record rec = {
-        .type = REC_TYPE_X86_TSC_INFO,
-        .length = sizeof(tsc),
-        .data = &tsc,
-    };
-
-    if ( xc_domain_get_tsc_info(xch, ctx->domid, &tsc.mode,
-                                &tsc.nsec, &tsc.khz, &tsc.incarnation) < 0 )
-    {
-        PERROR("Unable to obtain TSC information");
-        return -1;
-    }
-
-    return write_record(ctx, &rec);
-}
-
-int handle_x86_tsc_info(struct xc_sr_context *ctx, struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_rec_x86_tsc_info *tsc = rec->data;
-
-    if ( rec->length != sizeof(*tsc) )
-    {
-        ERROR("X86_TSC_INFO record wrong size: length %u, expected %zu",
-              rec->length, sizeof(*tsc));
-        return -1;
-    }
-
-    if ( xc_domain_set_tsc_info(xch, ctx->domid, tsc->mode,
-                                tsc->nsec, tsc->khz, tsc->incarnation) )
-    {
-        PERROR("Unable to set TSC information");
-        return -1;
-    }
-
-    return 0;
-}
-
-int write_x86_cpu_policy_records(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_record cpuid = { .type = REC_TYPE_X86_CPUID_POLICY, };
-    struct xc_sr_record msrs  = { .type = REC_TYPE_X86_MSR_POLICY, };
-    uint32_t nr_leaves = 0, nr_msrs = 0;
-    int rc;
-
-    if ( xc_get_cpu_policy_size(xch, &nr_leaves, &nr_msrs) < 0 )
-    {
-        PERROR("Unable to get CPU Policy size");
-        return -1;
-    }
-
-    cpuid.data = malloc(nr_leaves * sizeof(xen_cpuid_leaf_t));
-    msrs.data  = malloc(nr_msrs   * sizeof(xen_msr_entry_t));
-    if ( !cpuid.data || !msrs.data )
-    {
-        ERROR("Cannot allocate memory for CPU Policy");
-        rc = -1;
-        goto out;
-    }
-
-    if ( xc_get_domain_cpu_policy(xch, ctx->domid, &nr_leaves, cpuid.data,
-                                  &nr_msrs, msrs.data) )
-    {
-        PERROR("Unable to get d%d CPU Policy", ctx->domid);
-        rc = -1;
-        goto out;
-    }
-
-    cpuid.length = nr_leaves * sizeof(xen_cpuid_leaf_t);
-    if ( cpuid.length )
-    {
-        rc = write_record(ctx, &cpuid);
-        if ( rc )
-            goto out;
-    }
-
-    msrs.length = nr_msrs * sizeof(xen_msr_entry_t);
-    if ( msrs.length )
-        rc = write_record(ctx, &msrs);
-
- out:
-    free(cpuid.data);
-    free(msrs.data);
-
-    return rc;
-}
-
-int handle_x86_cpuid_policy(struct xc_sr_context *ctx, struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    int rc;
-
-    if ( rec->length == 0 ||
-         rec->length % sizeof(xen_cpuid_leaf_t) != 0 )
-    {
-        ERROR("X86_CPUID_POLICY size %u should be multiple of %zu",
-              rec->length, sizeof(xen_cpuid_leaf_t));
-        return -1;
-    }
-
-    rc = update_blob(&ctx->x86.restore.cpuid, rec->data, rec->length);
-    if ( rc )
-        ERROR("Unable to allocate %u bytes for X86_CPUID_POLICY", rec->length);
-
-    return rc;
-}
-
-int handle_x86_msr_policy(struct xc_sr_context *ctx, struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    int rc;
-
-    if ( rec->length == 0 ||
-         rec->length % sizeof(xen_msr_entry_t) != 0 )
-    {
-        ERROR("X86_MSR_POLICY size %u should be multiple of %zu",
-              rec->length, sizeof(xen_cpuid_leaf_t));
-        return -1;
-    }
-
-    rc = update_blob(&ctx->x86.restore.msr, rec->data, rec->length);
-    if ( rc )
-        ERROR("Unable to allocate %u bytes for X86_MSR_POLICY", rec->length);
-
-    return rc;
-}
-
-int x86_static_data_complete(struct xc_sr_context *ctx, unsigned int *missing)
-{
-    xc_interface *xch = ctx->xch;
-    uint32_t nr_leaves = 0, nr_msrs = 0;
-    uint32_t err_l = ~0, err_s = ~0, err_m = ~0;
-
-    if ( ctx->x86.restore.cpuid.ptr )
-        nr_leaves = ctx->x86.restore.cpuid.size / sizeof(xen_cpuid_leaf_t);
-    else
-        *missing |= XGR_SDD_MISSING_CPUID;
-
-    if ( ctx->x86.restore.msr.ptr )
-        nr_msrs = ctx->x86.restore.msr.size / sizeof(xen_msr_entry_t);
-    else
-        *missing |= XGR_SDD_MISSING_MSR;
-
-    if ( (nr_leaves || nr_msrs) &&
-         xc_set_domain_cpu_policy(xch, ctx->domid,
-                                  nr_leaves, ctx->x86.restore.cpuid.ptr,
-                                  nr_msrs,   ctx->x86.restore.msr.ptr,
-                                  &err_l, &err_s, &err_m) )
-    {
-        PERROR("Failed to set CPUID policy: leaf %08x, subleaf %08x, msr %08x",
-               err_l, err_s, err_m);
-        return -1;
-    }
-
-    return 0;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_common_x86.h b/tools/libxc/xg_sr_common_x86.h
deleted file mode 100644 (file)
index b55758c..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef __COMMON_X86__H
-#define __COMMON_X86__H
-
-#include "xg_sr_common.h"
-
-/*
- * Obtains a domains TSC information from Xen and writes a X86_TSC_INFO record
- * into the stream.
- */
-int write_x86_tsc_info(struct xc_sr_context *ctx);
-
-/*
- * Parses a X86_TSC_INFO record and applies the result to the domain.
- */
-int handle_x86_tsc_info(struct xc_sr_context *ctx, struct xc_sr_record *rec);
-
-/*
- * Obtains a domains CPU Policy from Xen, and writes X86_{CPUID,MSR}_POLICY
- * records into the stream.
- */
-int write_x86_cpu_policy_records(struct xc_sr_context *ctx);
-
-/*
- * Parses an X86_CPUID_POLICY record and stashes the content for application
- * when a STATIC_DATA_END record is encountered.
- */
-int handle_x86_cpuid_policy(struct xc_sr_context *ctx,
-                            struct xc_sr_record *rec);
-
-/*
- * Parses an X86_MSR_POLICY record and stashes the content for application
- * when a STATIC_DATA_END record is encountered.
- */
-int handle_x86_msr_policy(struct xc_sr_context *ctx,
-                          struct xc_sr_record *rec);
-
-/*
- * Perform common x86 actions required after the static data has arrived.
- */
-int x86_static_data_complete(struct xc_sr_context *ctx, unsigned int *missing);
-
-#endif
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_common_x86_pv.c b/tools/libxc/xg_sr_common_x86_pv.c
deleted file mode 100644 (file)
index cd33406..0000000
+++ /dev/null
@@ -1,193 +0,0 @@
-#include <assert.h>
-
-#include "xg_sr_common_x86_pv.h"
-
-xen_pfn_t mfn_to_pfn(struct xc_sr_context *ctx, xen_pfn_t mfn)
-{
-    assert(mfn <= ctx->x86.pv.max_mfn);
-    return ctx->x86.pv.m2p[mfn];
-}
-
-bool mfn_in_pseudophysmap(struct xc_sr_context *ctx, xen_pfn_t mfn)
-{
-    return ((mfn <= ctx->x86.pv.max_mfn) &&
-            (mfn_to_pfn(ctx, mfn) <= ctx->x86.pv.max_pfn) &&
-            (xc_pfn_to_mfn(mfn_to_pfn(ctx, mfn), ctx->x86.pv.p2m,
-                           ctx->x86.pv.width) == mfn));
-}
-
-void dump_bad_pseudophysmap_entry(struct xc_sr_context *ctx, xen_pfn_t mfn)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t pfn = ~0UL;
-
-    ERROR("mfn %#lx, max %#lx", mfn, ctx->x86.pv.max_mfn);
-
-    if ( (mfn != ~0UL) && (mfn <= ctx->x86.pv.max_mfn) )
-    {
-        pfn = ctx->x86.pv.m2p[mfn];
-        ERROR("  m2p[%#lx] = %#lx, max_pfn %#lx",
-              mfn, pfn, ctx->x86.pv.max_pfn);
-    }
-
-    if ( (pfn != ~0UL) && (pfn <= ctx->x86.pv.max_pfn) )
-        ERROR("  p2m[%#lx] = %#lx",
-              pfn, xc_pfn_to_mfn(pfn, ctx->x86.pv.p2m, ctx->x86.pv.width));
-}
-
-xen_pfn_t cr3_to_mfn(struct xc_sr_context *ctx, uint64_t cr3)
-{
-    if ( ctx->x86.pv.width == 8 )
-        return cr3 >> 12;
-    else
-    {
-        /* 32bit guests can't represent mfns wider than 32 bits */
-        if ( cr3 & 0xffffffff00000000UL )
-            return ~0UL;
-        else
-            return (uint32_t)((cr3 >> 12) | (cr3 << 20));
-    }
-}
-
-uint64_t mfn_to_cr3(struct xc_sr_context *ctx, xen_pfn_t _mfn)
-{
-    uint64_t mfn = _mfn;
-
-    if ( ctx->x86.pv.width == 8 )
-        return mfn << 12;
-    else
-    {
-        /* 32bit guests can't represent mfns wider than 32 bits */
-        if ( mfn & 0xffffffff00000000UL )
-            return ~0UL;
-        else
-            return (uint32_t)((mfn << 12) | (mfn >> 20));
-    }
-}
-
-int x86_pv_domain_info(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    unsigned int guest_width, guest_levels;
-
-    /* Get the domain width */
-    if ( xc_domain_get_guest_width(xch, ctx->domid, &guest_width) )
-    {
-        PERROR("Unable to determine dom%d's width", ctx->domid);
-        return -1;
-    }
-
-    if ( guest_width == 4 )
-        guest_levels = 3;
-    else if ( guest_width == 8 )
-        guest_levels = 4;
-    else
-    {
-        ERROR("Invalid guest width %d.  Expected 32 or 64", guest_width * 8);
-        return -1;
-    }
-    ctx->x86.pv.width = guest_width;
-    ctx->x86.pv.levels = guest_levels;
-
-    DPRINTF("%d bits, %d levels", guest_width * 8, guest_levels);
-
-    return 0;
-}
-
-int x86_pv_map_m2p(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t m2p_chunks, m2p_size, max_page;
-    privcmd_mmap_entry_t *entries = NULL;
-    xen_pfn_t *extents_start = NULL;
-    int rc = -1, i;
-
-    if ( xc_maximum_ram_page(xch, &max_page) < 0 )
-    {
-        PERROR("Failed to get maximum ram page");
-        goto err;
-    }
-
-    ctx->x86.pv.max_mfn = max_page;
-    m2p_size   = M2P_SIZE(ctx->x86.pv.max_mfn);
-    m2p_chunks = M2P_CHUNKS(ctx->x86.pv.max_mfn);
-
-    extents_start = malloc(m2p_chunks * sizeof(xen_pfn_t));
-    if ( !extents_start )
-    {
-        ERROR("Unable to allocate %lu bytes for m2p mfns",
-              m2p_chunks * sizeof(xen_pfn_t));
-        goto err;
-    }
-
-    if ( xc_machphys_mfn_list(xch, m2p_chunks, extents_start) )
-    {
-        PERROR("Failed to get m2p mfn list");
-        goto err;
-    }
-
-    entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t));
-    if ( !entries )
-    {
-        ERROR("Unable to allocate %lu bytes for m2p mapping mfns",
-              m2p_chunks * sizeof(privcmd_mmap_entry_t));
-        goto err;
-    }
-
-    for ( i = 0; i < m2p_chunks; ++i )
-        entries[i].mfn = extents_start[i];
-
-    ctx->x86.pv.m2p = xc_map_foreign_ranges(
-        xch, DOMID_XEN, m2p_size, PROT_READ,
-        M2P_CHUNK_SIZE, entries, m2p_chunks);
-
-    if ( !ctx->x86.pv.m2p )
-    {
-        PERROR("Failed to mmap() m2p ranges");
-        goto err;
-    }
-
-    ctx->x86.pv.nr_m2p_frames = (M2P_CHUNK_SIZE >> PAGE_SHIFT) * m2p_chunks;
-
-#ifdef __i386__
-    /* 32 bit toolstacks automatically get the compat m2p */
-    ctx->x86.pv.compat_m2p_mfn0 = entries[0].mfn;
-#else
-    /* 64 bit toolstacks need to ask Xen specially for it */
-    {
-        struct xen_machphys_mfn_list xmml = {
-            .max_extents = 1,
-            .extent_start = { &ctx->x86.pv.compat_m2p_mfn0 },
-        };
-
-        rc = do_memory_op(xch, XENMEM_machphys_compat_mfn_list,
-                          &xmml, sizeof(xmml));
-        if ( rc || xmml.nr_extents != 1 )
-        {
-            PERROR("Failed to get compat mfn list from Xen");
-            rc = -1;
-            goto err;
-        }
-    }
-#endif
-
-    /* All Done */
-    rc = 0;
-    DPRINTF("max_mfn %#lx", ctx->x86.pv.max_mfn);
-
- err:
-    free(entries);
-    free(extents_start);
-
-    return rc;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_common_x86_pv.h b/tools/libxc/xg_sr_common_x86_pv.h
deleted file mode 100644 (file)
index 953b5bf..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-#ifndef __COMMON_X86_PV_H
-#define __COMMON_X86_PV_H
-
-#include "xg_sr_common_x86.h"
-
-/* Virtual address ranges reserved for hypervisor. */
-#define HYPERVISOR_VIRT_START_X86_64 0xFFFF800000000000ULL
-#define HYPERVISOR_VIRT_END_X86_64   0xFFFF87FFFFFFFFFFULL
-
-#define HYPERVISOR_VIRT_START_X86_32 0x00000000F5800000ULL
-#define HYPERVISOR_VIRT_END_X86_32   0x00000000FFFFFFFFULL
-
-/*
- * Convert an mfn to a pfn, given Xen's m2p table.
- *
- * Caller must ensure that the requested mfn is in range.
- */
-xen_pfn_t mfn_to_pfn(struct xc_sr_context *ctx, xen_pfn_t mfn);
-
-/*
- * Query whether a particular mfn is valid in the physmap of a guest.
- */
-bool mfn_in_pseudophysmap(struct xc_sr_context *ctx, xen_pfn_t mfn);
-
-/*
- * Debug a particular mfn by walking the p2m and m2p.
- */
-void dump_bad_pseudophysmap_entry(struct xc_sr_context *ctx, xen_pfn_t mfn);
-
-/*
- * Convert a PV cr3 field to an mfn.
- *
- * Adjusts for Xen's extended-cr3 format to pack a 44bit physical address into
- * a 32bit architectural cr3.
- */
-xen_pfn_t cr3_to_mfn(struct xc_sr_context *ctx, uint64_t cr3);
-
-/*
- * Convert an mfn to a PV cr3 field.
- *
- * Adjusts for Xen's extended-cr3 format to pack a 44bit physical address into
- * a 32bit architectural cr3.
- */
-uint64_t mfn_to_cr3(struct xc_sr_context *ctx, xen_pfn_t mfn);
-
-/* Bits 12 through 51 of a PTE point at the frame */
-#define PTE_FRAME_MASK 0x000ffffffffff000ULL
-
-/*
- * Extract an mfn from a Pagetable Entry.  May return INVALID_MFN if the pte
- * would overflow a 32bit xen_pfn_t.
- */
-static inline xen_pfn_t pte_to_frame(uint64_t pte)
-{
-    uint64_t frame = (pte & PTE_FRAME_MASK) >> PAGE_SHIFT;
-
-#ifdef __i386__
-    if ( frame >= INVALID_MFN )
-        return INVALID_MFN;
-#endif
-
-    return frame;
-}
-
-/*
- * Change the frame in a Pagetable Entry while leaving the flags alone.
- */
-static inline uint64_t merge_pte(uint64_t pte, xen_pfn_t mfn)
-{
-    return (pte & ~PTE_FRAME_MASK) | ((uint64_t)mfn << PAGE_SHIFT);
-}
-
-/*
- * Get current domain information.
- *
- * Fills ctx->x86.pv
- * - .width
- * - .levels
- * - .fpp
- * - .p2m_frames
- *
- * Used by the save side to create the X86_PV_INFO record, and by the restore
- * side to verify the incoming stream.
- *
- * Returns 0 on success and non-zero on error.
- */
-int x86_pv_domain_info(struct xc_sr_context *ctx);
-
-/*
- * Maps the Xen M2P.
- *
- * Fills ctx->x86.pv.
- * - .max_mfn
- * - .m2p
- *
- * Returns 0 on success and non-zero on error.
- */
-int x86_pv_map_m2p(struct xc_sr_context *ctx);
-
-#endif
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_restore.c b/tools/libxc/xg_sr_restore.c
deleted file mode 100644 (file)
index b57a787..0000000
+++ /dev/null
@@ -1,986 +0,0 @@
-#include <arpa/inet.h>
-
-#include <assert.h>
-
-#include "xg_sr_common.h"
-
-/*
- * Read and validate the Image and Domain headers.
- */
-static int read_headers(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_ihdr ihdr;
-    struct xc_sr_dhdr dhdr;
-
-    if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
-    {
-        PERROR("Failed to read Image Header from stream");
-        return -1;
-    }
-
-    ihdr.id      = ntohl(ihdr.id);
-    ihdr.version = ntohl(ihdr.version);
-    ihdr.options = ntohs(ihdr.options);
-
-    if ( ihdr.marker != IHDR_MARKER )
-    {
-        ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker);
-        return -1;
-    }
-
-    if ( ihdr.id != IHDR_ID )
-    {
-        ERROR("Invalid ID: Expected 0x%08x, Got 0x%08x", IHDR_ID, ihdr.id);
-        return -1;
-    }
-
-    if ( ihdr.version < 2 || ihdr.version > 3 )
-    {
-        ERROR("Invalid Version: Expected 2 <= ver <= 3, Got %d",
-              ihdr.version);
-        return -1;
-    }
-
-    if ( ihdr.options & IHDR_OPT_BIG_ENDIAN )
-    {
-        ERROR("Unable to handle big endian streams");
-        return -1;
-    }
-
-    ctx->restore.format_version = ihdr.version;
-
-    if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
-    {
-        PERROR("Failed to read Domain Header from stream");
-        return -1;
-    }
-
-    ctx->restore.guest_type = dhdr.type;
-    ctx->restore.guest_page_size = (1U << dhdr.page_shift);
-
-    if ( dhdr.xen_major == 0 )
-    {
-        IPRINTF("Found %s domain, converted from legacy stream format",
-                dhdr_type_to_str(dhdr.type));
-        DPRINTF("  Legacy conversion script version %u", dhdr.xen_minor);
-    }
-    else
-        IPRINTF("Found %s domain from Xen %u.%u",
-                dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor);
-    return 0;
-}
-
-/*
- * Is a pfn populated?
- */
-static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn)
-{
-    if ( pfn > ctx->restore.max_populated_pfn )
-        return false;
-    return test_bit(pfn, ctx->restore.populated_pfns);
-}
-
-/*
- * Set a pfn as populated, expanding the tracking structures if needed. To
- * avoid realloc()ing too excessively, the size increased to the nearest power
- * of two large enough to contain the required pfn.
- */
-static int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
-{
-    xc_interface *xch = ctx->xch;
-
-    if ( pfn > ctx->restore.max_populated_pfn )
-    {
-        xen_pfn_t new_max;
-        size_t old_sz, new_sz;
-        unsigned long *p;
-
-        /* Round up to the nearest power of two larger than pfn, less 1. */
-        new_max = pfn;
-        new_max |= new_max >> 1;
-        new_max |= new_max >> 2;
-        new_max |= new_max >> 4;
-        new_max |= new_max >> 8;
-        new_max |= new_max >> 16;
-#ifdef __x86_64__
-        new_max |= new_max >> 32;
-#endif
-
-        old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
-        new_sz = bitmap_size(new_max + 1);
-        p = realloc(ctx->restore.populated_pfns, new_sz);
-        if ( !p )
-        {
-            ERROR("Failed to realloc populated bitmap");
-            errno = ENOMEM;
-            return -1;
-        }
-
-        memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
-
-        ctx->restore.populated_pfns    = p;
-        ctx->restore.max_populated_pfn = new_max;
-    }
-
-    assert(!test_bit(pfn, ctx->restore.populated_pfns));
-    set_bit(pfn, ctx->restore.populated_pfns);
-
-    return 0;
-}
-
-/*
- * Given a set of pfns, obtain memory from Xen to fill the physmap for the
- * unpopulated subset.  If types is NULL, no page type checking is performed
- * and all unpopulated pfns are populated.
- */
-int populate_pfns(struct xc_sr_context *ctx, unsigned int count,
-                  const xen_pfn_t *original_pfns, const uint32_t *types)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
-        *pfns = malloc(count * sizeof(*pfns));
-    unsigned int i, nr_pfns = 0;
-    int rc = -1;
-
-    if ( !mfns || !pfns )
-    {
-        ERROR("Failed to allocate %zu bytes for populating the physmap",
-              2 * count * sizeof(*mfns));
-        goto err;
-    }
-
-    for ( i = 0; i < count; ++i )
-    {
-        if ( (!types || (types &&
-                         (types[i] != XEN_DOMCTL_PFINFO_XTAB &&
-                          types[i] != XEN_DOMCTL_PFINFO_BROKEN))) &&
-             !pfn_is_populated(ctx, original_pfns[i]) )
-        {
-            rc = pfn_set_populated(ctx, original_pfns[i]);
-            if ( rc )
-                goto err;
-            pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
-            ++nr_pfns;
-        }
-    }
-
-    if ( nr_pfns )
-    {
-        rc = xc_domain_populate_physmap_exact(
-            xch, ctx->domid, nr_pfns, 0, 0, mfns);
-        if ( rc )
-        {
-            PERROR("Failed to populate physmap");
-            goto err;
-        }
-
-        for ( i = 0; i < nr_pfns; ++i )
-        {
-            if ( mfns[i] == INVALID_MFN )
-            {
-                ERROR("Populate physmap failed for pfn %u", i);
-                rc = -1;
-                goto err;
-            }
-
-            ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
-        }
-    }
-
-    rc = 0;
-
- err:
-    free(pfns);
-    free(mfns);
-
-    return rc;
-}
-
-/*
- * Given a list of pfns, their types, and a block of page data from the
- * stream, populate and record their types, map the relevant subset and copy
- * the data into the guest.
- */
-static int process_page_data(struct xc_sr_context *ctx, unsigned int count,
-                             xen_pfn_t *pfns, uint32_t *types, void *page_data)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t *mfns = malloc(count * sizeof(*mfns));
-    int *map_errs = malloc(count * sizeof(*map_errs));
-    int rc;
-    void *mapping = NULL, *guest_page = NULL;
-    unsigned int i, /* i indexes the pfns from the record. */
-        j,          /* j indexes the subset of pfns we decide to map. */
-        nr_pages = 0;
-
-    if ( !mfns || !map_errs )
-    {
-        rc = -1;
-        ERROR("Failed to allocate %zu bytes to process page data",
-              count * (sizeof(*mfns) + sizeof(*map_errs)));
-        goto err;
-    }
-
-    rc = populate_pfns(ctx, count, pfns, types);
-    if ( rc )
-    {
-        ERROR("Failed to populate pfns for batch of %u pages", count);
-        goto err;
-    }
-
-    for ( i = 0; i < count; ++i )
-    {
-        ctx->restore.ops.set_page_type(ctx, pfns[i], types[i]);
-
-        switch ( types[i] )
-        {
-        case XEN_DOMCTL_PFINFO_NOTAB:
-
-        case XEN_DOMCTL_PFINFO_L1TAB:
-        case XEN_DOMCTL_PFINFO_L1TAB | XEN_DOMCTL_PFINFO_LPINTAB:
-
-        case XEN_DOMCTL_PFINFO_L2TAB:
-        case XEN_DOMCTL_PFINFO_L2TAB | XEN_DOMCTL_PFINFO_LPINTAB:
-
-        case XEN_DOMCTL_PFINFO_L3TAB:
-        case XEN_DOMCTL_PFINFO_L3TAB | XEN_DOMCTL_PFINFO_LPINTAB:
-
-        case XEN_DOMCTL_PFINFO_L4TAB:
-        case XEN_DOMCTL_PFINFO_L4TAB | XEN_DOMCTL_PFINFO_LPINTAB:
-
-            mfns[nr_pages++] = ctx->restore.ops.pfn_to_gfn(ctx, pfns[i]);
-            break;
-        }
-    }
-
-    /* Nothing to do? */
-    if ( nr_pages == 0 )
-        goto done;
-
-    mapping = guest_page = xenforeignmemory_map(
-        xch->fmem, ctx->domid, PROT_READ | PROT_WRITE,
-        nr_pages, mfns, map_errs);
-    if ( !mapping )
-    {
-        rc = -1;
-        PERROR("Unable to map %u mfns for %u pages of data",
-               nr_pages, count);
-        goto err;
-    }
-
-    for ( i = 0, j = 0; i < count; ++i )
-    {
-        switch ( types[i] )
-        {
-        case XEN_DOMCTL_PFINFO_XTAB:
-        case XEN_DOMCTL_PFINFO_BROKEN:
-        case XEN_DOMCTL_PFINFO_XALLOC:
-            /* No page data to deal with. */
-            continue;
-        }
-
-        if ( map_errs[j] )
-        {
-            rc = -1;
-            ERROR("Mapping pfn %#"PRIpfn" (mfn %#"PRIpfn", type %#"PRIx32") failed with %d",
-                  pfns[i], mfns[j], types[i], map_errs[j]);
-            goto err;
-        }
-
-        /* Undo page normalisation done by the saver. */
-        rc = ctx->restore.ops.localise_page(ctx, types[i], page_data);
-        if ( rc )
-        {
-            ERROR("Failed to localise pfn %#"PRIpfn" (type %#"PRIx32")",
-                  pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
-            goto err;
-        }
-
-        if ( ctx->restore.verify )
-        {
-            /* Verify mode - compare incoming data to what we already have. */
-            if ( memcmp(guest_page, page_data, PAGE_SIZE) )
-                ERROR("verify pfn %#"PRIpfn" failed (type %#"PRIx32")",
-                      pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
-        }
-        else
-        {
-            /* Regular mode - copy incoming data into place. */
-            memcpy(guest_page, page_data, PAGE_SIZE);
-        }
-
-        ++j;
-        guest_page += PAGE_SIZE;
-        page_data += PAGE_SIZE;
-    }
-
- done:
-    rc = 0;
-
- err:
-    if ( mapping )
-        xenforeignmemory_unmap(xch->fmem, mapping, nr_pages);
-
-    free(map_errs);
-    free(mfns);
-
-    return rc;
-}
-
-/*
- * Validate a PAGE_DATA record from the stream, and pass the results to
- * process_page_data() to actually perform the legwork.
- */
-static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_rec_page_data_header *pages = rec->data;
-    unsigned int i, pages_of_data = 0;
-    int rc = -1;
-
-    xen_pfn_t *pfns = NULL, pfn;
-    uint32_t *types = NULL, type;
-
-    /*
-     * v2 compatibility only exists for x86 streams.  This is a bit of a
-     * bodge, but it is less bad than duplicating handle_page_data() between
-     * different architectures.
-     */
-#if defined(__i386__) || defined(__x86_64__)
-    /* v2 compat.  Infer the position of STATIC_DATA_END. */
-    if ( ctx->restore.format_version < 3 && !ctx->restore.seen_static_data_end )
-    {
-        rc = handle_static_data_end(ctx);
-        if ( rc )
-        {
-            ERROR("Inferred STATIC_DATA_END record failed");
-            goto err;
-        }
-        rc = -1;
-    }
-
-    if ( !ctx->restore.seen_static_data_end )
-    {
-        ERROR("No STATIC_DATA_END seen");
-        goto err;
-    }
-#endif
-
-    if ( rec->length < sizeof(*pages) )
-    {
-        ERROR("PAGE_DATA record truncated: length %u, min %zu",
-              rec->length, sizeof(*pages));
-        goto err;
-    }
-
-    if ( pages->count < 1 )
-    {
-        ERROR("Expected at least 1 pfn in PAGE_DATA record");
-        goto err;
-    }
-
-    if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t)) )
-    {
-        ERROR("PAGE_DATA record (length %u) too short to contain %u"
-              " pfns worth of information", rec->length, pages->count);
-        goto err;
-    }
-
-    pfns = malloc(pages->count * sizeof(*pfns));
-    types = malloc(pages->count * sizeof(*types));
-    if ( !pfns || !types )
-    {
-        ERROR("Unable to allocate enough memory for %u pfns",
-              pages->count);
-        goto err;
-    }
-
-    for ( i = 0; i < pages->count; ++i )
-    {
-        pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
-        if ( !ctx->restore.ops.pfn_is_valid(ctx, pfn) )
-        {
-            ERROR("pfn %#"PRIpfn" (index %u) outside domain maximum", pfn, i);
-            goto err;
-        }
-
-        type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
-        if ( ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) >= 5) &&
-             ((type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT) <= 8) )
-        {
-            ERROR("Invalid type %#"PRIx32" for pfn %#"PRIpfn" (index %u)",
-                  type, pfn, i);
-            goto err;
-        }
-
-        if ( type < XEN_DOMCTL_PFINFO_BROKEN )
-            /* NOTAB and all L1 through L4 tables (including pinned) should
-             * have a page worth of data in the record. */
-            pages_of_data++;
-
-        pfns[i] = pfn;
-        types[i] = type;
-    }
-
-    if ( rec->length != (sizeof(*pages) +
-                         (sizeof(uint64_t) * pages->count) +
-                         (PAGE_SIZE * pages_of_data)) )
-    {
-        ERROR("PAGE_DATA record wrong size: length %u, expected "
-              "%zu + %zu + %lu", rec->length, sizeof(*pages),
-              (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
-        goto err;
-    }
-
-    rc = process_page_data(ctx, pages->count, pfns, types,
-                           &pages->pfn[pages->count]);
- err:
-    free(types);
-    free(pfns);
-
-    return rc;
-}
-
-/*
- * Send checkpoint dirty pfn list to primary.
- */
-static int send_checkpoint_dirty_pfn_list(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int rc = -1;
-    unsigned int count, written;
-    uint64_t i, *pfns = NULL;
-    struct iovec *iov = NULL;
-    xc_shadow_op_stats_t stats = { 0, ctx->restore.p2m_size };
-    struct xc_sr_record rec = {
-        .type = REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST,
-    };
-    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
-                                    &ctx->restore.dirty_bitmap_hbuf);
-
-    if ( xc_shadow_control(
-             xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
-             HYPERCALL_BUFFER(dirty_bitmap), ctx->restore.p2m_size,
-             NULL, 0, &stats) != ctx->restore.p2m_size )
-    {
-        PERROR("Failed to retrieve logdirty bitmap");
-        goto err;
-    }
-
-    for ( i = 0, count = 0; i < ctx->restore.p2m_size; i++ )
-    {
-        if ( test_bit(i, dirty_bitmap) )
-            count++;
-    }
-
-
-    pfns = malloc(count * sizeof(*pfns));
-    if ( !pfns )
-    {
-        ERROR("Unable to allocate %zu bytes of memory for dirty pfn list",
-              count * sizeof(*pfns));
-        goto err;
-    }
-
-    for ( i = 0, written = 0; i < ctx->restore.p2m_size; ++i )
-    {
-        if ( !test_bit(i, dirty_bitmap) )
-            continue;
-
-        if ( written > count )
-        {
-            ERROR("Dirty pfn list exceed");
-            goto err;
-        }
-
-        pfns[written++] = i;
-    }
-
-    /* iovec[] for writev(). */
-    iov = malloc(3 * sizeof(*iov));
-    if ( !iov )
-    {
-        ERROR("Unable to allocate memory for sending dirty bitmap");
-        goto err;
-    }
-
-    rec.length = count * sizeof(*pfns);
-
-    iov[0].iov_base = &rec.type;
-    iov[0].iov_len = sizeof(rec.type);
-
-    iov[1].iov_base = &rec.length;
-    iov[1].iov_len = sizeof(rec.length);
-
-    iov[2].iov_base = pfns;
-    iov[2].iov_len = count * sizeof(*pfns);
-
-    if ( writev_exact(ctx->restore.send_back_fd, iov, 3) )
-    {
-        PERROR("Failed to write dirty bitmap to stream");
-        goto err;
-    }
-
-    rc = 0;
- err:
-    free(pfns);
-    free(iov);
-    return rc;
-}
-
-static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
-static int handle_checkpoint(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int rc = 0, ret;
-    unsigned int i;
-
-    if ( ctx->stream_type == XC_STREAM_PLAIN )
-    {
-        ERROR("Found checkpoint in non-checkpointed stream");
-        rc = -1;
-        goto err;
-    }
-
-    ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data);
-    switch ( ret )
-    {
-    case XGR_CHECKPOINT_SUCCESS:
-        break;
-
-    case XGR_CHECKPOINT_FAILOVER:
-        if ( ctx->restore.buffer_all_records )
-            rc = BROKEN_CHANNEL;
-        else
-            /* We don't have a consistent state */
-            rc = -1;
-        goto err;
-
-    default: /* Other fatal error */
-        rc = -1;
-        goto err;
-    }
-
-    if ( ctx->restore.buffer_all_records )
-    {
-        IPRINTF("All records buffered");
-
-        for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
-        {
-            rc = process_record(ctx, &ctx->restore.buffered_records[i]);
-            if ( rc )
-                goto err;
-        }
-        ctx->restore.buffered_rec_num = 0;
-        IPRINTF("All records processed");
-    }
-    else
-        ctx->restore.buffer_all_records = true;
-
-    if ( ctx->stream_type == XC_STREAM_COLO )
-    {
-#define HANDLE_CALLBACK_RETURN_VALUE(ret)                   \
-    do {                                                    \
-        if ( ret == 1 )                                     \
-            rc = 0; /* Success */                           \
-        else                                                \
-        {                                                   \
-            if ( ret == 2 )                                 \
-                rc = BROKEN_CHANNEL;                        \
-            else                                            \
-                rc = -1; /* Some unspecified error */       \
-            goto err;                                       \
-        }                                                   \
-    } while (0)
-
-        /* COLO */
-
-        /* We need to resume guest */
-        rc = ctx->restore.ops.stream_complete(ctx);
-        if ( rc )
-            goto err;
-
-        ctx->restore.callbacks->restore_results(ctx->restore.xenstore_gfn,
-                                                ctx->restore.console_gfn,
-                                                ctx->restore.callbacks->data);
-
-        /* Resume secondary vm */
-        ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data);
-        HANDLE_CALLBACK_RETURN_VALUE(ret);
-
-        /* Wait for a new checkpoint */
-        ret = ctx->restore.callbacks->wait_checkpoint(
-            ctx->restore.callbacks->data);
-        HANDLE_CALLBACK_RETURN_VALUE(ret);
-
-        /* suspend secondary vm */
-        ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data);
-        HANDLE_CALLBACK_RETURN_VALUE(ret);
-
-#undef HANDLE_CALLBACK_RETURN_VALUE
-
-        rc = send_checkpoint_dirty_pfn_list(ctx);
-        if ( rc )
-            goto err;
-    }
-
- err:
-    return rc;
-}
-
-static int buffer_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    unsigned int new_alloc_num;
-    struct xc_sr_record *p;
-
-    if ( ctx->restore.buffered_rec_num >= ctx->restore.allocated_rec_num )
-    {
-        new_alloc_num = ctx->restore.allocated_rec_num + DEFAULT_BUF_RECORDS;
-        p = realloc(ctx->restore.buffered_records,
-                    new_alloc_num * sizeof(struct xc_sr_record));
-        if ( !p )
-        {
-            ERROR("Failed to realloc memory for buffered records");
-            return -1;
-        }
-
-        ctx->restore.buffered_records = p;
-        ctx->restore.allocated_rec_num = new_alloc_num;
-    }
-
-    memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
-           rec, sizeof(*rec));
-
-    return 0;
-}
-
-int handle_static_data_end(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    unsigned int missing = 0;
-    int rc = 0;
-
-    if ( ctx->restore.seen_static_data_end )
-    {
-        ERROR("Multiple STATIC_DATA_END records found");
-        return -1;
-    }
-
-    ctx->restore.seen_static_data_end = true;
-
-    rc = ctx->restore.ops.static_data_complete(ctx, &missing);
-    if ( rc )
-        return rc;
-
-    if ( ctx->restore.callbacks->static_data_done &&
-         (rc = ctx->restore.callbacks->static_data_done(
-             missing, ctx->restore.callbacks->data) != 0) )
-        ERROR("static_data_done() callback failed: %d\n", rc);
-
-    return rc;
-}
-
-static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    int rc = 0;
-
-    switch ( rec->type )
-    {
-    case REC_TYPE_END:
-        break;
-
-    case REC_TYPE_PAGE_DATA:
-        rc = handle_page_data(ctx, rec);
-        break;
-
-    case REC_TYPE_VERIFY:
-        DPRINTF("Verify mode enabled");
-        ctx->restore.verify = true;
-        break;
-
-    case REC_TYPE_CHECKPOINT:
-        rc = handle_checkpoint(ctx);
-        break;
-
-    case REC_TYPE_STATIC_DATA_END:
-        rc = handle_static_data_end(ctx);
-        break;
-
-    default:
-        rc = ctx->restore.ops.process_record(ctx, rec);
-        break;
-    }
-
-    free(rec->data);
-    rec->data = NULL;
-
-    return rc;
-}
-
-static int setup(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int rc;
-    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
-                                    &ctx->restore.dirty_bitmap_hbuf);
-
-    if ( ctx->stream_type == XC_STREAM_COLO )
-    {
-        dirty_bitmap = xc_hypercall_buffer_alloc_pages(
-            xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
-
-        if ( !dirty_bitmap )
-        {
-            ERROR("Unable to allocate memory for dirty bitmap");
-            rc = -1;
-            goto err;
-        }
-    }
-
-    rc = ctx->restore.ops.setup(ctx);
-    if ( rc )
-        goto err;
-
-    ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1;
-    ctx->restore.populated_pfns = bitmap_alloc(
-        ctx->restore.max_populated_pfn + 1);
-    if ( !ctx->restore.populated_pfns )
-    {
-        ERROR("Unable to allocate memory for populated_pfns bitmap");
-        rc = -1;
-        goto err;
-    }
-
-    ctx->restore.buffered_records = malloc(
-        DEFAULT_BUF_RECORDS * sizeof(struct xc_sr_record));
-    if ( !ctx->restore.buffered_records )
-    {
-        ERROR("Unable to allocate memory for buffered records");
-        rc = -1;
-        goto err;
-    }
-    ctx->restore.allocated_rec_num = DEFAULT_BUF_RECORDS;
-
- err:
-    return rc;
-}
-
-static void cleanup(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    unsigned int i;
-    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
-                                    &ctx->restore.dirty_bitmap_hbuf);
-
-    for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
-        free(ctx->restore.buffered_records[i].data);
-
-    if ( ctx->stream_type == XC_STREAM_COLO )
-        xc_hypercall_buffer_free_pages(
-            xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
-
-    free(ctx->restore.buffered_records);
-    free(ctx->restore.populated_pfns);
-
-    if ( ctx->restore.ops.cleanup(ctx) )
-        PERROR("Failed to clean up");
-}
-
-/*
- * Restore a domain.
- */
-static int restore(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_record rec;
-    int rc, saved_rc = 0, saved_errno = 0;
-
-    IPRINTF("Restoring domain");
-
-    rc = setup(ctx);
-    if ( rc )
-        goto err;
-
-    do
-    {
-        rc = read_record(ctx, ctx->fd, &rec);
-        if ( rc )
-        {
-            if ( ctx->restore.buffer_all_records )
-                goto remus_failover;
-            else
-                goto err;
-        }
-
-        if ( ctx->restore.buffer_all_records &&
-             rec.type != REC_TYPE_END &&
-             rec.type != REC_TYPE_CHECKPOINT )
-        {
-            rc = buffer_record(ctx, &rec);
-            if ( rc )
-                goto err;
-        }
-        else
-        {
-            rc = process_record(ctx, &rec);
-            if ( rc == RECORD_NOT_PROCESSED )
-            {
-                if ( rec.type & REC_TYPE_OPTIONAL )
-                    DPRINTF("Ignoring optional record %#x (%s)",
-                            rec.type, rec_type_to_str(rec.type));
-                else
-                {
-                    ERROR("Mandatory record %#x (%s) not handled",
-                          rec.type, rec_type_to_str(rec.type));
-                    rc = -1;
-                    goto err;
-                }
-            }
-            else if ( rc == BROKEN_CHANNEL )
-                goto remus_failover;
-            else if ( rc )
-                goto err;
-        }
-
-    } while ( rec.type != REC_TYPE_END );
-
- remus_failover:
-    if ( ctx->stream_type == XC_STREAM_COLO )
-    {
-        /* With COLO, we have already called stream_complete */
-        rc = 0;
-        IPRINTF("COLO Failover");
-        goto done;
-    }
-
-    /*
-     * With Remus, if we reach here, there must be some error on primary,
-     * failover from the last checkpoint state.
-     */
-    rc = ctx->restore.ops.stream_complete(ctx);
-    if ( rc )
-        goto err;
-
-    IPRINTF("Restore successful");
-    goto done;
-
- err:
-    saved_errno = errno;
-    saved_rc = rc;
-    PERROR("Restore failed");
-
- done:
-    cleanup(ctx);
-
-    if ( saved_rc )
-    {
-        rc = saved_rc;
-        errno = saved_errno;
-    }
-
-    return rc;
-}
-
-int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
-                      unsigned int store_evtchn, unsigned long *store_mfn,
-                      uint32_t store_domid, unsigned int console_evtchn,
-                      unsigned long *console_gfn, uint32_t console_domid,
-                      xc_stream_type_t stream_type,
-                      struct restore_callbacks *callbacks, int send_back_fd)
-{
-    xen_pfn_t nr_pfns;
-    struct xc_sr_context ctx = {
-        .xch = xch,
-        .fd = io_fd,
-        .stream_type = stream_type,
-    };
-
-    /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
-    ctx.restore.console_evtchn = console_evtchn;
-    ctx.restore.console_domid = console_domid;
-    ctx.restore.xenstore_evtchn = store_evtchn;
-    ctx.restore.xenstore_domid = store_domid;
-    ctx.restore.callbacks = callbacks;
-    ctx.restore.send_back_fd = send_back_fd;
-
-    /* Sanity check stream_type-related parameters */
-    switch ( stream_type )
-    {
-    case XC_STREAM_COLO:
-        assert(callbacks->suspend &&
-               callbacks->postcopy &&
-               callbacks->wait_checkpoint &&
-               callbacks->restore_results);
-        /* Fallthrough */
-    case XC_STREAM_REMUS:
-        assert(callbacks->checkpoint);
-        /* Fallthrough */
-    case XC_STREAM_PLAIN:
-        break;
-
-    default:
-        assert(!"Bad stream_type");
-        break;
-    }
-
-    if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
-    {
-        PERROR("Failed to get domain info");
-        return -1;
-    }
-
-    if ( ctx.dominfo.domid != dom )
-    {
-        ERROR("Domain %u does not exist", dom);
-        return -1;
-    }
-
-    DPRINTF("fd %d, dom %u, hvm %u, stream_type %d",
-            io_fd, dom, ctx.dominfo.hvm, stream_type);
-
-    ctx.domid = dom;
-
-    if ( read_headers(&ctx) )
-        return -1;
-
-    if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 )
-    {
-        PERROR("Unable to obtain the guest p2m size");
-        return -1;
-    }
-
-    ctx.restore.p2m_size = nr_pfns;
-    ctx.restore.ops = ctx.dominfo.hvm
-        ? restore_ops_x86_hvm : restore_ops_x86_pv;
-
-    if ( restore(&ctx) )
-        return -1;
-
-    IPRINTF("XenStore: mfn %#"PRIpfn", dom %d, evt %u",
-            ctx.restore.xenstore_gfn,
-            ctx.restore.xenstore_domid,
-            ctx.restore.xenstore_evtchn);
-
-    IPRINTF("Console: mfn %#"PRIpfn", dom %d, evt %u",
-            ctx.restore.console_gfn,
-            ctx.restore.console_domid,
-            ctx.restore.console_evtchn);
-
-    *console_gfn = ctx.restore.console_gfn;
-    *store_mfn = ctx.restore.xenstore_gfn;
-
-    return 0;
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_restore_x86_hvm.c b/tools/libxc/xg_sr_restore_x86_hvm.c
deleted file mode 100644 (file)
index d6ea6f3..0000000
+++ /dev/null
@@ -1,274 +0,0 @@
-#include <assert.h>
-#include <arpa/inet.h>
-
-#include "xg_sr_common_x86.h"
-
-/*
- * Process an HVM_CONTEXT record from the stream.
- */
-static int handle_hvm_context(struct xc_sr_context *ctx,
-                              struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    int rc = update_blob(&ctx->x86.hvm.restore.context, rec->data, rec->length);
-
-    if ( rc )
-        ERROR("Unable to allocate %u bytes for hvm context", rec->length);
-
-    return rc;
-}
-
-/*
- * Process an HVM_PARAMS record from the stream.
- */
-static int handle_hvm_params(struct xc_sr_context *ctx,
-                             struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_rec_hvm_params *hdr = rec->data;
-    struct xc_sr_rec_hvm_params_entry *entry = hdr->param;
-    unsigned int i;
-    int rc;
-
-    if ( rec->length < sizeof(*hdr) )
-    {
-        ERROR("HVM_PARAMS record truncated: length %u, header size %zu",
-              rec->length, sizeof(*hdr));
-        return -1;
-    }
-
-    if ( rec->length != (sizeof(*hdr) + hdr->count * sizeof(*entry)) )
-    {
-        ERROR("HVM_PARAMS record truncated: header %zu, count %u, "
-              "expected len %zu, got %u",
-              sizeof(*hdr), hdr->count, hdr->count * sizeof(*entry),
-              rec->length);
-        return -1;
-    }
-
-    /*
-     * Tolerate empty records.  Older sending sides used to accidentally
-     * generate them.
-     */
-    if ( hdr->count == 0 )
-    {
-        DBGPRINTF("Skipping empty HVM_PARAMS record\n");
-        return 0;
-    }
-
-    for ( i = 0; i < hdr->count; i++, entry++ )
-    {
-        switch ( entry->index )
-        {
-        case HVM_PARAM_CONSOLE_PFN:
-            ctx->restore.console_gfn = entry->value;
-            xc_clear_domain_page(xch, ctx->domid, entry->value);
-            break;
-        case HVM_PARAM_STORE_PFN:
-            ctx->restore.xenstore_gfn = entry->value;
-            xc_clear_domain_page(xch, ctx->domid, entry->value);
-            break;
-        case HVM_PARAM_IOREQ_PFN:
-        case HVM_PARAM_BUFIOREQ_PFN:
-            xc_clear_domain_page(xch, ctx->domid, entry->value);
-            break;
-
-        case HVM_PARAM_PAE_ENABLED:
-            /*
-             * This HVM_PARAM only ever existed to pass data into
-             * xc_cpuid_apply_policy().  The function has now been updated to
-             * use a normal calling convention, making the param obsolete.
-             *
-             * Discard if we find it in an old migration stream.
-             */
-            continue;
-        }
-
-        rc = xc_hvm_param_set(xch, ctx->domid, entry->index, entry->value);
-        if ( rc < 0 )
-        {
-            PERROR("set HVM param %"PRId64" = 0x%016"PRIx64,
-                   entry->index, entry->value);
-            return rc;
-        }
-    }
-    return 0;
-}
-
-/* restore_ops function. */
-static bool x86_hvm_pfn_is_valid(const struct xc_sr_context *ctx, xen_pfn_t pfn)
-{
-    return true;
-}
-
-/* restore_ops function. */
-static xen_pfn_t x86_hvm_pfn_to_gfn(const struct xc_sr_context *ctx,
-                                    xen_pfn_t pfn)
-{
-    return pfn;
-}
-
-/* restore_ops function. */
-static void x86_hvm_set_gfn(struct xc_sr_context *ctx, xen_pfn_t pfn,
-                            xen_pfn_t gfn)
-{
-    /* no op */
-}
-
-/* restore_ops function. */
-static void x86_hvm_set_page_type(struct xc_sr_context *ctx,
-                                  xen_pfn_t pfn, xen_pfn_t type)
-{
-    /* no-op */
-}
-
-/* restore_ops function. */
-static int x86_hvm_localise_page(struct xc_sr_context *ctx,
-                                 uint32_t type, void *page)
-{
-    /* no-op */
-    return 0;
-}
-
-/*
- * restore_ops function. Confirms the stream matches the domain.
- */
-static int x86_hvm_setup(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-
-    if ( ctx->restore.guest_type != DHDR_TYPE_X86_HVM )
-    {
-        ERROR("Unable to restore %s domain into an x86 HVM domain",
-              dhdr_type_to_str(ctx->restore.guest_type));
-        return -1;
-    }
-
-    if ( ctx->restore.guest_page_size != PAGE_SIZE )
-    {
-        ERROR("Invalid page size %u for x86 HVM domains",
-              ctx->restore.guest_page_size);
-        return -1;
-    }
-
-#ifdef __i386__
-    /* Very large domains (> 1TB) will exhaust virtual address space. */
-    if ( ctx->restore.p2m_size > 0x0fffffff )
-    {
-        errno = E2BIG;
-        PERROR("Cannot restore this big a guest");
-        return -1;
-    }
-#endif
-
-    return 0;
-}
-
-/*
- * restore_ops function.
- */
-static int x86_hvm_process_record(struct xc_sr_context *ctx,
-                                  struct xc_sr_record *rec)
-{
-    switch ( rec->type )
-    {
-    case REC_TYPE_X86_TSC_INFO:
-        return handle_x86_tsc_info(ctx, rec);
-
-    case REC_TYPE_HVM_CONTEXT:
-        return handle_hvm_context(ctx, rec);
-
-    case REC_TYPE_HVM_PARAMS:
-        return handle_hvm_params(ctx, rec);
-
-    case REC_TYPE_X86_CPUID_POLICY:
-        return handle_x86_cpuid_policy(ctx, rec);
-
-    case REC_TYPE_X86_MSR_POLICY:
-        return handle_x86_msr_policy(ctx, rec);
-
-    default:
-        return RECORD_NOT_PROCESSED;
-    }
-}
-
-/*
- * restore_ops function.  Sets extra hvm parameters and seeds the grant table.
- */
-static int x86_hvm_stream_complete(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int rc;
-
-    rc = xc_hvm_param_set(xch, ctx->domid, HVM_PARAM_STORE_EVTCHN,
-                          ctx->restore.xenstore_evtchn);
-    if ( rc )
-    {
-        PERROR("Failed to set HVM_PARAM_STORE_EVTCHN");
-        return rc;
-    }
-
-    rc = xc_hvm_param_set(xch, ctx->domid, HVM_PARAM_CONSOLE_EVTCHN,
-                          ctx->restore.console_evtchn);
-    if ( rc )
-    {
-        PERROR("Failed to set HVM_PARAM_CONSOLE_EVTCHN");
-        return rc;
-    }
-
-    rc = xc_domain_hvm_setcontext(xch, ctx->domid,
-                                  ctx->x86.hvm.restore.context.ptr,
-                                  ctx->x86.hvm.restore.context.size);
-    if ( rc < 0 )
-    {
-        PERROR("Unable to restore HVM context");
-        return rc;
-    }
-
-    rc = xc_dom_gnttab_seed(xch, ctx->domid, true,
-                            ctx->restore.console_gfn,
-                            ctx->restore.xenstore_gfn,
-                            ctx->restore.console_domid,
-                            ctx->restore.xenstore_domid);
-    if ( rc )
-    {
-        PERROR("Failed to seed grant table");
-        return rc;
-    }
-
-    return rc;
-}
-
-static int x86_hvm_cleanup(struct xc_sr_context *ctx)
-{
-    free(ctx->x86.hvm.restore.context.ptr);
-
-    free(ctx->x86.restore.cpuid.ptr);
-    free(ctx->x86.restore.msr.ptr);
-
-    return 0;
-}
-
-struct xc_sr_restore_ops restore_ops_x86_hvm =
-{
-    .pfn_is_valid    = x86_hvm_pfn_is_valid,
-    .pfn_to_gfn      = x86_hvm_pfn_to_gfn,
-    .set_gfn         = x86_hvm_set_gfn,
-    .set_page_type   = x86_hvm_set_page_type,
-    .localise_page   = x86_hvm_localise_page,
-    .setup           = x86_hvm_setup,
-    .process_record  = x86_hvm_process_record,
-    .static_data_complete = x86_static_data_complete,
-    .stream_complete = x86_hvm_stream_complete,
-    .cleanup         = x86_hvm_cleanup,
-};
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_restore_x86_pv.c b/tools/libxc/xg_sr_restore_x86_pv.c
deleted file mode 100644 (file)
index dc50b0f..0000000
+++ /dev/null
@@ -1,1210 +0,0 @@
-#include <assert.h>
-
-#include "xg_sr_common_x86_pv.h"
-
-static xen_pfn_t pfn_to_mfn(const struct xc_sr_context *ctx, xen_pfn_t pfn)
-{
-    assert(pfn <= ctx->x86.pv.max_pfn);
-
-    return xc_pfn_to_mfn(pfn, ctx->x86.pv.p2m, ctx->x86.pv.width);
-}
-
-/*
- * Expand our local tracking information for the p2m table and domains maximum
- * size.  Normally this will be called once to expand from 0 to max_pfn, but
- * is liable to expand multiple times if the domain grows on the sending side
- * after migration has started.
- */
-static int expand_p2m(struct xc_sr_context *ctx, unsigned long max_pfn)
-{
-    xc_interface *xch = ctx->xch;
-    unsigned long old_max = ctx->x86.pv.max_pfn, i;
-    unsigned int fpp = PAGE_SIZE / ctx->x86.pv.width;
-    unsigned long end_frame = (max_pfn / fpp) + 1;
-    unsigned long old_end_frame = (old_max / fpp) + 1;
-    xen_pfn_t *p2m = NULL, *p2m_pfns = NULL;
-    uint32_t *pfn_types = NULL;
-    size_t p2msz, p2m_pfnsz, pfn_typesz;
-
-    assert(max_pfn > old_max);
-
-    p2msz = (max_pfn + 1) * ctx->x86.pv.width;
-    p2m = realloc(ctx->x86.pv.p2m, p2msz);
-    if ( !p2m )
-    {
-        ERROR("Failed to (re)alloc %zu bytes for p2m", p2msz);
-        return -1;
-    }
-    ctx->x86.pv.p2m = p2m;
-
-    pfn_typesz = (max_pfn + 1) * sizeof(*pfn_types);
-    pfn_types = realloc(ctx->x86.pv.restore.pfn_types, pfn_typesz);
-    if ( !pfn_types )
-    {
-        ERROR("Failed to (re)alloc %zu bytes for pfn_types", pfn_typesz);
-        return -1;
-    }
-    ctx->x86.pv.restore.pfn_types = pfn_types;
-
-    p2m_pfnsz = (end_frame + 1) * sizeof(*p2m_pfns);
-    p2m_pfns = realloc(ctx->x86.pv.p2m_pfns, p2m_pfnsz);
-    if ( !p2m_pfns )
-    {
-        ERROR("Failed to (re)alloc %zu bytes for p2m frame list", p2m_pfnsz);
-        return -1;
-    }
-    ctx->x86.pv.p2m_frames = end_frame;
-    ctx->x86.pv.p2m_pfns = p2m_pfns;
-
-    ctx->x86.pv.max_pfn = max_pfn;
-    for ( i = (old_max ? old_max + 1 : 0); i <= max_pfn; ++i )
-    {
-        ctx->restore.ops.set_gfn(ctx, i, INVALID_MFN);
-        ctx->restore.ops.set_page_type(ctx, i, 0);
-    }
-
-    for ( i = (old_end_frame ? old_end_frame + 1 : 0); i <= end_frame; ++i )
-        ctx->x86.pv.p2m_pfns[i] = INVALID_MFN;
-
-    DPRINTF("Changed max_pfn from %#lx to %#lx", old_max, max_pfn);
-    return 0;
-}
-
-/*
- * Pin all of the pagetables.
- */
-static int pin_pagetables(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    unsigned long i, nr_pins;
-    struct mmuext_op pin[MAX_PIN_BATCH];
-
-    for ( i = nr_pins = 0; i <= ctx->x86.pv.max_pfn; ++i )
-    {
-        if ( (ctx->x86.pv.restore.pfn_types[i] &
-              XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
-            continue;
-
-        switch ( (ctx->x86.pv.restore.pfn_types[i] &
-                  XEN_DOMCTL_PFINFO_LTABTYPE_MASK) )
-        {
-        case XEN_DOMCTL_PFINFO_L1TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
-            break;
-        case XEN_DOMCTL_PFINFO_L2TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
-            break;
-        case XEN_DOMCTL_PFINFO_L3TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
-            break;
-        case XEN_DOMCTL_PFINFO_L4TAB:
-            pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
-            break;
-        default:
-            continue;
-        }
-
-        pin[nr_pins].arg1.mfn = pfn_to_mfn(ctx, i);
-        nr_pins++;
-
-        if ( nr_pins == MAX_PIN_BATCH )
-        {
-            if ( xc_mmuext_op(xch, pin, nr_pins, ctx->domid) != 0 )
-            {
-                PERROR("Failed to pin batch of pagetables");
-                return -1;
-            }
-            nr_pins = 0;
-        }
-    }
-
-    if ( (nr_pins > 0) && (xc_mmuext_op(xch, pin, nr_pins, ctx->domid) < 0) )
-    {
-        PERROR("Failed to pin batch of pagetables");
-        return -1;
-    }
-
-    return 0;
-}
-
-/*
- * Update details in a guests start_info structure.
- */
-static int process_start_info(struct xc_sr_context *ctx,
-                              vcpu_guest_context_any_t *vcpu)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t pfn, mfn;
-    start_info_any_t *guest_start_info = NULL;
-    int rc = -1;
-
-    pfn = GET_FIELD(vcpu, user_regs.edx, ctx->x86.pv.width);
-
-    if ( pfn > ctx->x86.pv.max_pfn )
-    {
-        ERROR("Start Info pfn %#lx out of range", pfn);
-        goto err;
-    }
-
-    if ( ctx->x86.pv.restore.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB )
-    {
-        ERROR("Start Info pfn %#lx has bad type %u", pfn,
-              (ctx->x86.pv.restore.pfn_types[pfn] >>
-               XEN_DOMCTL_PFINFO_LTAB_SHIFT));
-        goto err;
-    }
-
-    mfn = pfn_to_mfn(ctx, pfn);
-    if ( !mfn_in_pseudophysmap(ctx, mfn) )
-    {
-        ERROR("Start Info has bad mfn");
-        dump_bad_pseudophysmap_entry(ctx, mfn);
-        goto err;
-    }
-
-    SET_FIELD(vcpu, user_regs.edx, mfn, ctx->x86.pv.width);
-    guest_start_info = xc_map_foreign_range(
-        xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
-    if ( !guest_start_info )
-    {
-        PERROR("Failed to map Start Info at mfn %#lx", mfn);
-        goto err;
-    }
-
-    /* Deal with xenstore stuff */
-    pfn = GET_FIELD(guest_start_info, store_mfn, ctx->x86.pv.width);
-    if ( pfn > ctx->x86.pv.max_pfn )
-    {
-        ERROR("XenStore pfn %#lx out of range", pfn);
-        goto err;
-    }
-
-    mfn = pfn_to_mfn(ctx, pfn);
-    if ( !mfn_in_pseudophysmap(ctx, mfn) )
-    {
-        ERROR("XenStore pfn has bad mfn");
-        dump_bad_pseudophysmap_entry(ctx, mfn);
-        goto err;
-    }
-
-    ctx->restore.xenstore_gfn = mfn;
-    SET_FIELD(guest_start_info, store_mfn, mfn, ctx->x86.pv.width);
-    SET_FIELD(guest_start_info, store_evtchn,
-              ctx->restore.xenstore_evtchn, ctx->x86.pv.width);
-
-    /* Deal with console stuff */
-    pfn = GET_FIELD(guest_start_info, console.domU.mfn, ctx->x86.pv.width);
-    if ( pfn > ctx->x86.pv.max_pfn )
-    {
-        ERROR("Console pfn %#lx out of range", pfn);
-        goto err;
-    }
-
-    mfn = pfn_to_mfn(ctx, pfn);
-    if ( !mfn_in_pseudophysmap(ctx, mfn) )
-    {
-        ERROR("Console pfn has bad mfn");
-        dump_bad_pseudophysmap_entry(ctx, mfn);
-        goto err;
-    }
-
-    ctx->restore.console_gfn = mfn;
-    SET_FIELD(guest_start_info, console.domU.mfn, mfn, ctx->x86.pv.width);
-    SET_FIELD(guest_start_info, console.domU.evtchn,
-              ctx->restore.console_evtchn, ctx->x86.pv.width);
-
-    /* Set other information */
-    SET_FIELD(guest_start_info, nr_pages,
-              ctx->x86.pv.max_pfn + 1, ctx->x86.pv.width);
-    SET_FIELD(guest_start_info, shared_info,
-              ctx->dominfo.shared_info_frame << PAGE_SHIFT, ctx->x86.pv.width);
-    SET_FIELD(guest_start_info, flags, 0, ctx->x86.pv.width);
-
-    rc = 0;
-
- err:
-    if ( guest_start_info )
-        munmap(guest_start_info, PAGE_SIZE);
-
-    return rc;
-}
-
-/*
- * Process one stashed vcpu worth of basic state and send to Xen.
- */
-static int process_vcpu_basic(struct xc_sr_context *ctx,
-                              unsigned int vcpuid)
-{
-    xc_interface *xch = ctx->xch;
-    vcpu_guest_context_any_t *vcpu = ctx->x86.pv.restore.vcpus[vcpuid].basic.ptr;
-    xen_pfn_t pfn, mfn;
-    unsigned int i, gdt_count;
-    int rc = -1;
-
-    /* Vcpu 0 is special: Convert the suspend record to an mfn. */
-    if ( vcpuid == 0 )
-    {
-        rc = process_start_info(ctx, vcpu);
-        if ( rc )
-            return rc;
-        rc = -1;
-    }
-
-    SET_FIELD(vcpu, flags,
-              GET_FIELD(vcpu, flags, ctx->x86.pv.width) | VGCF_online,
-              ctx->x86.pv.width);
-
-    gdt_count = GET_FIELD(vcpu, gdt_ents, ctx->x86.pv.width);
-    if ( gdt_count > FIRST_RESERVED_GDT_ENTRY )
-    {
-        ERROR("GDT entry count (%u) out of range (max %u)",
-              gdt_count, FIRST_RESERVED_GDT_ENTRY);
-        errno = ERANGE;
-        goto err;
-    }
-    gdt_count = (gdt_count + 511) / 512; /* gdt_count now in units of frames. */
-
-    /* Convert GDT frames to mfns. */
-    for ( i = 0; i < gdt_count; ++i )
-    {
-        pfn = GET_FIELD(vcpu, gdt_frames[i], ctx->x86.pv.width);
-        if ( pfn > ctx->x86.pv.max_pfn )
-        {
-            ERROR("GDT frame %u (pfn %#lx) out of range", i, pfn);
-            goto err;
-        }
-
-        if ( (ctx->x86.pv.restore.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
-        {
-            ERROR("GDT frame %u (pfn %#lx) has bad type %u", i, pfn,
-                  (ctx->x86.pv.restore.pfn_types[pfn] >>
-                   XEN_DOMCTL_PFINFO_LTAB_SHIFT));
-            goto err;
-        }
-
-        mfn = pfn_to_mfn(ctx, pfn);
-        if ( !mfn_in_pseudophysmap(ctx, mfn) )
-        {
-            ERROR("GDT frame %u has bad mfn", i);
-            dump_bad_pseudophysmap_entry(ctx, mfn);
-            goto err;
-        }
-
-        SET_FIELD(vcpu, gdt_frames[i], mfn, ctx->x86.pv.width);
-    }
-
-    /* Convert CR3 to an mfn. */
-    pfn = cr3_to_mfn(ctx, GET_FIELD(vcpu, ctrlreg[3], ctx->x86.pv.width));
-    if ( pfn > ctx->x86.pv.max_pfn )
-    {
-        ERROR("cr3 (pfn %#lx) out of range", pfn);
-        goto err;
-    }
-
-    if ( (ctx->x86.pv.restore.pfn_types[pfn] &
-          XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
-         (((xen_pfn_t)ctx->x86.pv.levels) << XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
-    {
-        ERROR("cr3 (pfn %#lx) has bad type %u, expected %u", pfn,
-              (ctx->x86.pv.restore.pfn_types[pfn] >>
-               XEN_DOMCTL_PFINFO_LTAB_SHIFT),
-              ctx->x86.pv.levels);
-        goto err;
-    }
-
-    mfn = pfn_to_mfn(ctx, pfn);
-    if ( !mfn_in_pseudophysmap(ctx, mfn) )
-    {
-        ERROR("cr3 has bad mfn");
-        dump_bad_pseudophysmap_entry(ctx, mfn);
-        goto err;
-    }
-
-    SET_FIELD(vcpu, ctrlreg[3], mfn_to_cr3(ctx, mfn), ctx->x86.pv.width);
-
-    /* 64bit guests: Convert CR1 (guest pagetables) to mfn. */
-    if ( ctx->x86.pv.levels == 4 && (vcpu->x64.ctrlreg[1] & 1) )
-    {
-        pfn = vcpu->x64.ctrlreg[1] >> PAGE_SHIFT;
-
-        if ( pfn > ctx->x86.pv.max_pfn )
-        {
-            ERROR("cr1 (pfn %#lx) out of range", pfn);
-            goto err;
-        }
-
-        if ( (ctx->x86.pv.restore.pfn_types[pfn] &
-              XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
-             (((xen_pfn_t)ctx->x86.pv.levels) << XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
-        {
-            ERROR("cr1 (pfn %#lx) has bad type %u, expected %u", pfn,
-                  (ctx->x86.pv.restore.pfn_types[pfn] >>
-                   XEN_DOMCTL_PFINFO_LTAB_SHIFT),
-                  ctx->x86.pv.levels);
-            goto err;
-        }
-
-        mfn = pfn_to_mfn(ctx, pfn);
-        if ( !mfn_in_pseudophysmap(ctx, mfn) )
-        {
-            ERROR("cr1 has bad mfn");
-            dump_bad_pseudophysmap_entry(ctx, mfn);
-            goto err;
-        }
-
-        vcpu->x64.ctrlreg[1] = (uint64_t)mfn << PAGE_SHIFT;
-    }
-
-    if ( xc_vcpu_setcontext(xch, ctx->domid, vcpuid, vcpu) )
-    {
-        PERROR("Failed to set vcpu%u's basic info", vcpuid);
-        goto err;
-    }
-
-    rc = 0;
-
- err:
-    return rc;
-}
-
-/*
- * Process one stashed vcpu worth of extended state and send to Xen.
- */
-static int process_vcpu_extended(struct xc_sr_context *ctx,
-                                 unsigned int vcpuid)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_x86_pv_restore_vcpu *vcpu =
-        &ctx->x86.pv.restore.vcpus[vcpuid];
-    DECLARE_DOMCTL;
-
-    domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
-    domctl.domain = ctx->domid;
-    memcpy(&domctl.u.ext_vcpucontext, vcpu->extd.ptr, vcpu->extd.size);
-
-    if ( xc_domctl(xch, &domctl) != 0 )
-    {
-        PERROR("Failed to set vcpu%u's extended info", vcpuid);
-        return -1;
-    }
-
-    return 0;
-}
-
-/*
- * Process one stashed vcpu worth of xsave state and send to Xen.
- */
-static int process_vcpu_xsave(struct xc_sr_context *ctx,
-                              unsigned int vcpuid)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_x86_pv_restore_vcpu *vcpu =
-        &ctx->x86.pv.restore.vcpus[vcpuid];
-    int rc;
-    DECLARE_DOMCTL;
-    DECLARE_HYPERCALL_BUFFER(void, buffer);
-
-    buffer = xc_hypercall_buffer_alloc(xch, buffer, vcpu->xsave.size);
-    if ( !buffer )
-    {
-        ERROR("Unable to allocate %zu bytes for xsave hypercall buffer",
-              vcpu->xsave.size);
-        return -1;
-    }
-
-    domctl.cmd = XEN_DOMCTL_setvcpuextstate;
-    domctl.domain = ctx->domid;
-    domctl.u.vcpuextstate.vcpu = vcpuid;
-    domctl.u.vcpuextstate.size = vcpu->xsave.size;
-    set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
-
-    memcpy(buffer, vcpu->xsave.ptr, vcpu->xsave.size);
-
-    rc = xc_domctl(xch, &domctl);
-    if ( rc )
-        PERROR("Failed to set vcpu%u's xsave info", vcpuid);
-
-    xc_hypercall_buffer_free(xch, buffer);
-
-    return rc;
-}
-
-/*
- * Process one stashed vcpu worth of msr state and send to Xen.
- */
-static int process_vcpu_msrs(struct xc_sr_context *ctx,
-                             unsigned int vcpuid)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_x86_pv_restore_vcpu *vcpu =
-        &ctx->x86.pv.restore.vcpus[vcpuid];
-    int rc;
-    DECLARE_DOMCTL;
-    DECLARE_HYPERCALL_BUFFER(void, buffer);
-
-    buffer = xc_hypercall_buffer_alloc(xch, buffer, vcpu->msr.size);
-    if ( !buffer )
-    {
-        ERROR("Unable to allocate %zu bytes for msr hypercall buffer",
-              vcpu->msr.size);
-        return -1;
-    }
-
-    domctl.cmd = XEN_DOMCTL_set_vcpu_msrs;
-    domctl.domain = ctx->domid;
-    domctl.u.vcpu_msrs.vcpu = vcpuid;
-    domctl.u.vcpu_msrs.msr_count = vcpu->msr.size / sizeof(xen_domctl_vcpu_msr_t);
-    set_xen_guest_handle(domctl.u.vcpu_msrs.msrs, buffer);
-
-    memcpy(buffer, vcpu->msr.ptr, vcpu->msr.size);
-
-    rc = xc_domctl(xch, &domctl);
-    if ( rc )
-        PERROR("Failed to set vcpu%u's msrs", vcpuid);
-
-    xc_hypercall_buffer_free(xch, buffer);
-
-    return rc;
-}
-
-/*
- * Process all stashed vcpu context and send to Xen.
- */
-static int update_vcpu_context(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_x86_pv_restore_vcpu *vcpu;
-    unsigned int i;
-    int rc = 0;
-
-    for ( i = 0; i < ctx->x86.pv.restore.nr_vcpus; ++i )
-    {
-        vcpu = &ctx->x86.pv.restore.vcpus[i];
-
-        if ( vcpu->basic.ptr )
-        {
-            rc = process_vcpu_basic(ctx, i);
-            if ( rc )
-                return rc;
-        }
-        else if ( i == 0 )
-        {
-            ERROR("Sender didn't send vcpu0's basic state");
-            return -1;
-        }
-
-        if ( vcpu->extd.ptr )
-        {
-            rc = process_vcpu_extended(ctx, i);
-            if ( rc )
-                return rc;
-        }
-
-        if ( vcpu->xsave.ptr )
-        {
-            rc = process_vcpu_xsave(ctx, i);
-            if ( rc )
-                return rc;
-        }
-
-        if ( vcpu->msr.ptr )
-        {
-            rc = process_vcpu_msrs(ctx, i);
-            if ( rc )
-                return rc;
-        }
-    }
-
-    return rc;
-}
-
-/*
- * Copy the p2m which has been constructed locally as memory has been
- * allocated, over the p2m in guest, so the guest can find its memory again on
- * resume.
- */
-static int update_guest_p2m(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t mfn, pfn, *guest_p2m = NULL;
-    unsigned int i;
-    int rc = -1;
-
-    for ( i = 0; i < ctx->x86.pv.p2m_frames; ++i )
-    {
-        pfn = ctx->x86.pv.p2m_pfns[i];
-
-        if ( pfn > ctx->x86.pv.max_pfn )
-        {
-            ERROR("pfn (%#lx) for p2m_frame_list[%u] out of range",
-                  pfn, i);
-            goto err;
-        }
-
-        if ( (ctx->x86.pv.restore.pfn_types[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
-        {
-            ERROR("pfn (%#lx) for p2m_frame_list[%u] has bad type %u", pfn, i,
-                  (ctx->x86.pv.restore.pfn_types[pfn] >>
-                   XEN_DOMCTL_PFINFO_LTAB_SHIFT));
-            goto err;
-        }
-
-        mfn = pfn_to_mfn(ctx, pfn);
-        if ( !mfn_in_pseudophysmap(ctx, mfn) )
-        {
-            ERROR("p2m_frame_list[%u] has bad mfn", i);
-            dump_bad_pseudophysmap_entry(ctx, mfn);
-            goto err;
-        }
-
-        ctx->x86.pv.p2m_pfns[i] = mfn;
-    }
-
-    guest_p2m = xc_map_foreign_pages(xch, ctx->domid, PROT_WRITE,
-                                     ctx->x86.pv.p2m_pfns,
-                                     ctx->x86.pv.p2m_frames);
-    if ( !guest_p2m )
-    {
-        PERROR("Failed to map p2m frames");
-        goto err;
-    }
-
-    memcpy(guest_p2m, ctx->x86.pv.p2m,
-           (ctx->x86.pv.max_pfn + 1) * ctx->x86.pv.width);
-    rc = 0;
-
- err:
-    if ( guest_p2m )
-        munmap(guest_p2m, ctx->x86.pv.p2m_frames * PAGE_SIZE);
-
-    return rc;
-}
-
-/*
- * The valid width/pt_levels values in X86_PV_INFO are inextricably linked.
- * Cross-check the legitimate combinations.
- */
-static bool valid_x86_pv_info_combination(
-    const struct xc_sr_rec_x86_pv_info *info)
-{
-    switch ( info->guest_width )
-    {
-    case 4:  return info->pt_levels == 3;
-    case 8:  return info->pt_levels == 4;
-    default: return false;
-    }
-}
-
-/*
- * Process an X86_PV_INFO record.
- */
-static int handle_x86_pv_info(struct xc_sr_context *ctx,
-                              struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_rec_x86_pv_info *info = rec->data;
-
-    if ( ctx->x86.pv.restore.seen_pv_info )
-    {
-        ERROR("Already received X86_PV_INFO record");
-        return -1;
-    }
-
-    if ( rec->length < sizeof(*info) )
-    {
-        ERROR("X86_PV_INFO record truncated: length %u, expected %zu",
-              rec->length, sizeof(*info));
-        return -1;
-    }
-
-    if ( !valid_x86_pv_info_combination(info) )
-    {
-        ERROR("Invalid X86_PV_INFO combination: width %u, pt_levels %u",
-              info->guest_width, info->pt_levels);
-        return -1;
-    }
-
-    /*
-     * PV domains default to native width.  For an incomming compat domain, we
-     * will typically be the first entity to inform Xen.
-     */
-    if ( info->guest_width != ctx->x86.pv.width )
-    {
-        struct xen_domctl domctl = {
-            .domain = ctx->domid,
-            .cmd    = XEN_DOMCTL_set_address_size,
-            .u.address_size.size = info->guest_width * 8,
-        };
-        int rc = do_domctl(xch, &domctl);
-
-        if ( rc != 0 )
-        {
-            ERROR("Failed to update d%d address size to %u",
-                  ctx->domid, info->guest_width * 8);
-            return -1;
-        }
-
-        /* Domain's information changed, better to refresh. */
-        rc = x86_pv_domain_info(ctx);
-        if ( rc != 0 )
-        {
-            ERROR("Unable to refresh guest information");
-            return -1;
-        }
-    }
-
-    /* Sanity check (possibly new) domain settings. */
-    if ( (info->guest_width != ctx->x86.pv.width) ||
-         (info->pt_levels   != ctx->x86.pv.levels) )
-    {
-        ERROR("X86_PV_INFO width/pt_levels settings %u/%u mismatch with d%d %u/%u",
-              info->guest_width, info->pt_levels, ctx->domid,
-              ctx->x86.pv.width, ctx->x86.pv.levels);
-        return -1;
-    }
-
-    ctx->x86.pv.restore.seen_pv_info = true;
-    return 0;
-}
-
-/*
- * Process an X86_PV_P2M_FRAMES record.  Takes care of expanding the local p2m
- * state if needed.
- */
-static int handle_x86_pv_p2m_frames(struct xc_sr_context *ctx,
-                                    struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_rec_x86_pv_p2m_frames *data = rec->data;
-    unsigned int start, end, x, fpp = PAGE_SIZE / ctx->x86.pv.width;
-    int rc;
-
-    /* v2 compat.  Infer the position of STATIC_DATA_END. */
-    if ( ctx->restore.format_version < 3 && !ctx->restore.seen_static_data_end )
-    {
-        rc = handle_static_data_end(ctx);
-        if ( rc )
-        {
-            ERROR("Inferred STATIC_DATA_END record failed");
-            return rc;
-        }
-    }
-
-    if ( !ctx->restore.seen_static_data_end )
-    {
-        ERROR("No STATIC_DATA_END seen");
-        return -1;
-    }
-
-    if ( !ctx->x86.pv.restore.seen_pv_info )
-    {
-        ERROR("Not yet received X86_PV_INFO record");
-        return -1;
-    }
-
-    if ( rec->length < sizeof(*data) )
-    {
-        ERROR("X86_PV_P2M_FRAMES record truncated: length %u, min %zu",
-              rec->length, sizeof(*data) + sizeof(uint64_t));
-        return -1;
-    }
-
-    if ( data->start_pfn > data->end_pfn )
-    {
-        ERROR("End pfn in stream (%#x) exceeds Start (%#x)",
-              data->end_pfn, data->start_pfn);
-        return -1;
-    }
-
-    start =  data->start_pfn / fpp;
-    end = data->end_pfn / fpp + 1;
-
-    if ( rec->length != sizeof(*data) + ((end - start) * sizeof(uint64_t)) )
-    {
-        ERROR("X86_PV_P2M_FRAMES record wrong size: start_pfn %#x"
-              ", end_pfn %#x, length %u, expected %zu + (%u - %u) * %zu",
-              data->start_pfn, data->end_pfn, rec->length,
-              sizeof(*data), end, start, sizeof(uint64_t));
-        return -1;
-    }
-
-    if ( data->end_pfn > ctx->x86.pv.max_pfn )
-    {
-        rc = expand_p2m(ctx, data->end_pfn);
-        if ( rc )
-            return rc;
-    }
-
-    for ( x = 0; x < (end - start); ++x )
-        ctx->x86.pv.p2m_pfns[start + x] = data->p2m_pfns[x];
-
-    return 0;
-}
-
-/*
- * Processes X86_PV_VCPU_{BASIC,EXTENDED,XSAVE,MSRS} records from the stream.
- * The blobs are all stashed to one side as they need to be deferred until the
- * very end of the stream, rather than being send to Xen at the point they
- * arrive in the stream.  It performs all pre-hypercall size validation.
- */
-static int handle_x86_pv_vcpu_blob(struct xc_sr_context *ctx,
-                                   struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_rec_x86_pv_vcpu_hdr *vhdr = rec->data;
-    struct xc_sr_x86_pv_restore_vcpu *vcpu;
-    const char *rec_name;
-    size_t blobsz;
-    struct xc_sr_blob *blob = NULL;
-    int rc = -1;
-
-    switch ( rec->type )
-    {
-    case REC_TYPE_X86_PV_VCPU_BASIC:
-        rec_name = "X86_PV_VCPU_BASIC";
-        break;
-
-    case REC_TYPE_X86_PV_VCPU_EXTENDED:
-        rec_name = "X86_PV_VCPU_EXTENDED";
-        break;
-
-    case REC_TYPE_X86_PV_VCPU_XSAVE:
-        rec_name = "X86_PV_VCPU_XSAVE";
-        break;
-
-    case REC_TYPE_X86_PV_VCPU_MSRS:
-        rec_name = "X86_PV_VCPU_MSRS";
-        break;
-
-    default:
-        ERROR("Unrecognised vcpu blob record %s (%u)",
-              rec_type_to_str(rec->type), rec->type);
-        goto out;
-    }
-
-    /* Confirm that there is a complete header. */
-    if ( rec->length < sizeof(*vhdr) )
-    {
-        ERROR("%s record truncated: length %u, header size %zu",
-              rec_name, rec->length, sizeof(*vhdr));
-        goto out;
-    }
-
-    blobsz = rec->length - sizeof(*vhdr);
-
-    /*
-     * Tolerate empty records.  Older sending sides used to accidentally
-     * generate them.
-     */
-    if ( blobsz == 0 )
-    {
-        DBGPRINTF("Skipping empty %s record for vcpu %u\n",
-                  rec_type_to_str(rec->type), vhdr->vcpu_id);
-        rc = 0;
-        goto out;
-    }
-
-    /* Check that the vcpu id is within range. */
-    if ( vhdr->vcpu_id >= ctx->x86.pv.restore.nr_vcpus )
-    {
-        ERROR("%s record vcpu_id (%u) exceeds domain max (%u)",
-              rec_name, vhdr->vcpu_id, ctx->x86.pv.restore.nr_vcpus - 1);
-        goto out;
-    }
-
-    vcpu = &ctx->x86.pv.restore.vcpus[vhdr->vcpu_id];
-
-    /* Further per-record checks, where possible. */
-    switch ( rec->type )
-    {
-    case REC_TYPE_X86_PV_VCPU_BASIC:
-    {
-        size_t vcpusz = ctx->x86.pv.width == 8 ?
-            sizeof(vcpu_guest_context_x86_64_t) :
-            sizeof(vcpu_guest_context_x86_32_t);
-
-        if ( blobsz != vcpusz )
-        {
-            ERROR("%s record wrong size: expected %zu, got %u",
-                  rec_name, sizeof(*vhdr) + vcpusz, rec->length);
-            goto out;
-        }
-        blob = &vcpu->basic;
-        break;
-    }
-
-    case REC_TYPE_X86_PV_VCPU_EXTENDED:
-        if ( blobsz > 128 )
-        {
-            ERROR("%s record too long: max %zu, got %u",
-                  rec_name, sizeof(*vhdr) + 128, rec->length);
-            goto out;
-        }
-        blob = &vcpu->extd;
-        break;
-
-    case REC_TYPE_X86_PV_VCPU_XSAVE:
-        if ( blobsz < 16 )
-        {
-            ERROR("%s record too short: min %zu, got %u",
-                  rec_name, sizeof(*vhdr) + 16, rec->length);
-            goto out;
-        }
-        blob = &vcpu->xsave;
-        break;
-
-    case REC_TYPE_X86_PV_VCPU_MSRS:
-        if ( blobsz % sizeof(xen_domctl_vcpu_msr_t) != 0 )
-        {
-            ERROR("%s record payload size %zu expected to be a multiple of %zu",
-                  rec_name, blobsz, sizeof(xen_domctl_vcpu_msr_t));
-            goto out;
-        }
-        blob = &vcpu->msr;
-        break;
-    }
-
-    rc = update_blob(blob, vhdr->context, blobsz);
-    if ( rc )
-        ERROR("Unable to allocate %zu bytes for vcpu%u %s blob",
-              blobsz, vhdr->vcpu_id, rec_name);
-
- out:
-    return rc;
-}
-
-/*
- * Process a SHARED_INFO record from the stream.
- */
-static int handle_shared_info(struct xc_sr_context *ctx,
-                              struct xc_sr_record *rec)
-{
-    xc_interface *xch = ctx->xch;
-    unsigned int i;
-    int rc = -1;
-    shared_info_any_t *guest_shinfo = NULL;
-    const shared_info_any_t *old_shinfo = rec->data;
-
-    if ( !ctx->x86.pv.restore.seen_pv_info )
-    {
-        ERROR("Not yet received X86_PV_INFO record");
-        return -1;
-    }
-
-    if ( rec->length != PAGE_SIZE )
-    {
-        ERROR("X86_PV_SHARED_INFO record wrong size: length %u"
-              ", expected 4096", rec->length);
-        goto err;
-    }
-
-    guest_shinfo = xc_map_foreign_range(
-        xch, ctx->domid, PAGE_SIZE, PROT_READ | PROT_WRITE,
-        ctx->dominfo.shared_info_frame);
-    if ( !guest_shinfo )
-    {
-        PERROR("Failed to map Shared Info at mfn %#lx",
-               ctx->dominfo.shared_info_frame);
-        goto err;
-    }
-
-    MEMCPY_FIELD(guest_shinfo, old_shinfo, vcpu_info, ctx->x86.pv.width);
-    MEMCPY_FIELD(guest_shinfo, old_shinfo, arch, ctx->x86.pv.width);
-
-    SET_FIELD(guest_shinfo, arch.pfn_to_mfn_frame_list_list,
-              0, ctx->x86.pv.width);
-
-    MEMSET_ARRAY_FIELD(guest_shinfo, evtchn_pending, 0, ctx->x86.pv.width);
-    for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
-        SET_FIELD(guest_shinfo, vcpu_info[i].evtchn_pending_sel,
-                  0, ctx->x86.pv.width);
-
-    MEMSET_ARRAY_FIELD(guest_shinfo, evtchn_mask, 0xff, ctx->x86.pv.width);
-
-    rc = 0;
-
- err:
-    if ( guest_shinfo )
-        munmap(guest_shinfo, PAGE_SIZE);
-
-    return rc;
-}
-
-/* restore_ops function. */
-static bool x86_pv_pfn_is_valid(const struct xc_sr_context *ctx, xen_pfn_t pfn)
-{
-    return pfn <= ctx->x86.pv.max_pfn;
-}
-
-/* restore_ops function. */
-static void x86_pv_set_page_type(struct xc_sr_context *ctx, xen_pfn_t pfn,
-                                 unsigned long type)
-{
-    assert(pfn <= ctx->x86.pv.max_pfn);
-
-    ctx->x86.pv.restore.pfn_types[pfn] = type;
-}
-
-/* restore_ops function. */
-static void x86_pv_set_gfn(struct xc_sr_context *ctx, xen_pfn_t pfn,
-                           xen_pfn_t mfn)
-{
-    assert(pfn <= ctx->x86.pv.max_pfn);
-
-    if ( ctx->x86.pv.width == sizeof(uint64_t) )
-        /* 64 bit guest.  Need to expand INVALID_MFN for 32 bit toolstacks. */
-        ((uint64_t *)ctx->x86.pv.p2m)[pfn] = mfn == INVALID_MFN ? ~0ULL : mfn;
-    else
-        /* 32 bit guest.  Can truncate INVALID_MFN for 64 bit toolstacks. */
-        ((uint32_t *)ctx->x86.pv.p2m)[pfn] = mfn;
-}
-
-/*
- * restore_ops function.  Convert pfns back to mfns in pagetables.  Possibly
- * needs to populate new frames if a PTE is found referring to a frame which
- * hasn't yet been seen from PAGE_DATA records.
- */
-static int x86_pv_localise_page(struct xc_sr_context *ctx,
-                                uint32_t type, void *page)
-{
-    xc_interface *xch = ctx->xch;
-    uint64_t *table = page;
-    uint64_t pte;
-    unsigned int i, to_populate;
-    xen_pfn_t pfns[(PAGE_SIZE / sizeof(uint64_t))];
-
-    type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
-
-    /* Only page tables need localisation. */
-    if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
-        return 0;
-
-    /* Check to see whether we need to populate any new frames. */
-    for ( i = 0, to_populate = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
-    {
-        pte = table[i];
-
-        if ( pte & _PAGE_PRESENT )
-        {
-            xen_pfn_t pfn = pte_to_frame(pte);
-
-#ifdef __i386__
-            if ( pfn == INVALID_MFN )
-            {
-                ERROR("PTE truncation detected.  L%u[%u] = %016"PRIx64,
-                      type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
-                errno = E2BIG;
-                return -1;
-            }
-#endif
-
-            if ( pfn_to_mfn(ctx, pfn) == INVALID_MFN )
-                pfns[to_populate++] = pfn;
-        }
-    }
-
-    if ( to_populate && populate_pfns(ctx, to_populate, pfns, NULL) )
-        return -1;
-
-    for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
-    {
-        pte = table[i];
-
-        if ( pte & _PAGE_PRESENT )
-        {
-            xen_pfn_t mfn, pfn;
-
-            pfn = pte_to_frame(pte);
-            mfn = pfn_to_mfn(ctx, pfn);
-
-            if ( !mfn_in_pseudophysmap(ctx, mfn) )
-            {
-                ERROR("Bad mfn for L%u[%u] - pte %"PRIx64,
-                      type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
-                dump_bad_pseudophysmap_entry(ctx, mfn);
-                errno = ERANGE;
-                return -1;
-            }
-
-            table[i] = merge_pte(pte, mfn);
-        }
-    }
-
-    return 0;
-}
-
-/*
- * restore_ops function.  Confirm that the incoming stream matches the type of
- * domain we are attempting to restore into.
- */
-static int x86_pv_setup(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int rc;
-
-    if ( ctx->restore.guest_type != DHDR_TYPE_X86_PV )
-    {
-        ERROR("Unable to restore %s domain into an x86_pv domain",
-              dhdr_type_to_str(ctx->restore.guest_type));
-        return -1;
-    }
-
-    if ( ctx->restore.guest_page_size != PAGE_SIZE )
-    {
-        ERROR("Invalid page size %d for x86_pv domains",
-              ctx->restore.guest_page_size);
-        return -1;
-    }
-
-    rc = x86_pv_domain_info(ctx);
-    if ( rc )
-        return rc;
-
-    ctx->x86.pv.restore.nr_vcpus = ctx->dominfo.max_vcpu_id + 1;
-    ctx->x86.pv.restore.vcpus = calloc(sizeof(struct xc_sr_x86_pv_restore_vcpu),
-                                       ctx->x86.pv.restore.nr_vcpus);
-    if ( !ctx->x86.pv.restore.vcpus )
-    {
-        errno = ENOMEM;
-        return -1;
-    }
-
-    rc = x86_pv_map_m2p(ctx);
-    if ( rc )
-        return rc;
-
-    return rc;
-}
-
-/*
- * restore_ops function.
- */
-static int x86_pv_process_record(struct xc_sr_context *ctx,
-                                 struct xc_sr_record *rec)
-{
-    switch ( rec->type )
-    {
-    case REC_TYPE_X86_PV_INFO:
-        return handle_x86_pv_info(ctx, rec);
-
-    case REC_TYPE_X86_PV_P2M_FRAMES:
-        return handle_x86_pv_p2m_frames(ctx, rec);
-
-    case REC_TYPE_X86_PV_VCPU_BASIC:
-    case REC_TYPE_X86_PV_VCPU_EXTENDED:
-    case REC_TYPE_X86_PV_VCPU_XSAVE:
-    case REC_TYPE_X86_PV_VCPU_MSRS:
-        return handle_x86_pv_vcpu_blob(ctx, rec);
-
-    case REC_TYPE_SHARED_INFO:
-        return handle_shared_info(ctx, rec);
-
-    case REC_TYPE_X86_TSC_INFO:
-        return handle_x86_tsc_info(ctx, rec);
-
-    case REC_TYPE_X86_CPUID_POLICY:
-        return handle_x86_cpuid_policy(ctx, rec);
-
-    case REC_TYPE_X86_MSR_POLICY:
-        return handle_x86_msr_policy(ctx, rec);
-
-    default:
-        return RECORD_NOT_PROCESSED;
-    }
-}
-
-/*
- * restore_ops function.  Update the vcpu context in Xen, pin the pagetables,
- * rewrite the p2m and seed the grant table.
- */
-static int x86_pv_stream_complete(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int rc;
-
-    rc = update_vcpu_context(ctx);
-    if ( rc )
-        return rc;
-
-    rc = pin_pagetables(ctx);
-    if ( rc )
-        return rc;
-
-    rc = update_guest_p2m(ctx);
-    if ( rc )
-        return rc;
-
-    rc = xc_dom_gnttab_seed(xch, ctx->domid, false,
-                            ctx->restore.console_gfn,
-                            ctx->restore.xenstore_gfn,
-                            ctx->restore.console_domid,
-                            ctx->restore.xenstore_domid);
-    if ( rc )
-    {
-        PERROR("Failed to seed grant table");
-        return rc;
-    }
-
-    return rc;
-}
-
-/*
- * restore_ops function.
- */
-static int x86_pv_cleanup(struct xc_sr_context *ctx)
-{
-    free(ctx->x86.pv.p2m);
-    free(ctx->x86.pv.p2m_pfns);
-
-    if ( ctx->x86.pv.restore.vcpus )
-    {
-        unsigned int i;
-
-        for ( i = 0; i < ctx->x86.pv.restore.nr_vcpus; ++i )
-        {
-            struct xc_sr_x86_pv_restore_vcpu *vcpu =
-                &ctx->x86.pv.restore.vcpus[i];
-
-            free(vcpu->basic.ptr);
-            free(vcpu->extd.ptr);
-            free(vcpu->xsave.ptr);
-            free(vcpu->msr.ptr);
-        }
-
-        free(ctx->x86.pv.restore.vcpus);
-    }
-
-    free(ctx->x86.pv.restore.pfn_types);
-
-    if ( ctx->x86.pv.m2p )
-        munmap(ctx->x86.pv.m2p, ctx->x86.pv.nr_m2p_frames * PAGE_SIZE);
-
-    free(ctx->x86.restore.cpuid.ptr);
-    free(ctx->x86.restore.msr.ptr);
-
-    return 0;
-}
-
-struct xc_sr_restore_ops restore_ops_x86_pv =
-{
-    .pfn_is_valid    = x86_pv_pfn_is_valid,
-    .pfn_to_gfn      = pfn_to_mfn,
-    .set_page_type   = x86_pv_set_page_type,
-    .set_gfn         = x86_pv_set_gfn,
-    .localise_page   = x86_pv_localise_page,
-    .setup           = x86_pv_setup,
-    .process_record  = x86_pv_process_record,
-    .static_data_complete = x86_static_data_complete,
-    .stream_complete = x86_pv_stream_complete,
-    .cleanup         = x86_pv_cleanup,
-};
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_save.c b/tools/libxc/xg_sr_save.c
deleted file mode 100644 (file)
index d74c72c..0000000
+++ /dev/null
@@ -1,1059 +0,0 @@
-#include <assert.h>
-#include <arpa/inet.h>
-
-#include "xg_sr_common.h"
-
-/*
- * Writes an Image header and Domain header into the stream.
- */
-static int write_headers(struct xc_sr_context *ctx, uint16_t guest_type)
-{
-    xc_interface *xch = ctx->xch;
-    int32_t xen_version = xc_version(xch, XENVER_version, NULL);
-    struct xc_sr_ihdr ihdr = {
-        .marker  = IHDR_MARKER,
-        .id      = htonl(IHDR_ID),
-        .version = htonl(3),
-        .options = htons(IHDR_OPT_LITTLE_ENDIAN),
-    };
-    struct xc_sr_dhdr dhdr = {
-        .type       = guest_type,
-        .page_shift = XC_PAGE_SHIFT,
-        .xen_major  = (xen_version >> 16) & 0xffff,
-        .xen_minor  = (xen_version)       & 0xffff,
-    };
-
-    if ( xen_version < 0 )
-    {
-        PERROR("Unable to obtain Xen Version");
-        return -1;
-    }
-
-    if ( write_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
-    {
-        PERROR("Unable to write Image Header to stream");
-        return -1;
-    }
-
-    if ( write_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
-    {
-        PERROR("Unable to write Domain Header to stream");
-        return -1;
-    }
-
-    return 0;
-}
-
-/*
- * Writes an END record into the stream.
- */
-static int write_end_record(struct xc_sr_context *ctx)
-{
-    struct xc_sr_record end = { .type = REC_TYPE_END };
-
-    return write_record(ctx, &end);
-}
-
-/*
- * Writes a STATIC_DATA_END record into the stream.
- */
-static int write_static_data_end_record(struct xc_sr_context *ctx)
-{
-    struct xc_sr_record end = { .type = REC_TYPE_STATIC_DATA_END };
-
-    return write_record(ctx, &end);
-}
-
-/*
- * Writes a CHECKPOINT record into the stream.
- */
-static int write_checkpoint_record(struct xc_sr_context *ctx)
-{
-    struct xc_sr_record checkpoint = { .type = REC_TYPE_CHECKPOINT };
-
-    return write_record(ctx, &checkpoint);
-}
-
-/*
- * Writes a batch of memory as a PAGE_DATA record into the stream.  The batch
- * is constructed in ctx->save.batch_pfns.
- *
- * This function:
- * - gets the types for each pfn in the batch.
- * - for each pfn with real data:
- *   - maps and attempts to localise the pages.
- * - construct and writes a PAGE_DATA record into the stream.
- */
-static int write_batch(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t *mfns = NULL, *types = NULL;
-    void *guest_mapping = NULL;
-    void **guest_data = NULL;
-    void **local_pages = NULL;
-    int *errors = NULL, rc = -1;
-    unsigned int i, p, nr_pages = 0, nr_pages_mapped = 0;
-    unsigned int nr_pfns = ctx->save.nr_batch_pfns;
-    void *page, *orig_page;
-    uint64_t *rec_pfns = NULL;
-    struct iovec *iov = NULL; int iovcnt = 0;
-    struct xc_sr_rec_page_data_header hdr = { 0 };
-    struct xc_sr_record rec = {
-        .type = REC_TYPE_PAGE_DATA,
-    };
-
-    assert(nr_pfns != 0);
-
-    /* Mfns of the batch pfns. */
-    mfns = malloc(nr_pfns * sizeof(*mfns));
-    /* Types of the batch pfns. */
-    types = malloc(nr_pfns * sizeof(*types));
-    /* Errors from attempting to map the gfns. */
-    errors = malloc(nr_pfns * sizeof(*errors));
-    /* Pointers to page data to send.  Mapped gfns or local allocations. */
-    guest_data = calloc(nr_pfns, sizeof(*guest_data));
-    /* Pointers to locally allocated pages.  Need freeing. */
-    local_pages = calloc(nr_pfns, sizeof(*local_pages));
-    /* iovec[] for writev(). */
-    iov = malloc((nr_pfns + 4) * sizeof(*iov));
-
-    if ( !mfns || !types || !errors || !guest_data || !local_pages || !iov )
-    {
-        ERROR("Unable to allocate arrays for a batch of %u pages",
-              nr_pfns);
-        goto err;
-    }
-
-    for ( i = 0; i < nr_pfns; ++i )
-    {
-        types[i] = mfns[i] = ctx->save.ops.pfn_to_gfn(ctx,
-                                                      ctx->save.batch_pfns[i]);
-
-        /* Likely a ballooned page. */
-        if ( mfns[i] == INVALID_MFN )
-        {
-            set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
-            ++ctx->save.nr_deferred_pages;
-        }
-    }
-
-    rc = xc_get_pfn_type_batch(xch, ctx->domid, nr_pfns, types);
-    if ( rc )
-    {
-        PERROR("Failed to get types for pfn batch");
-        goto err;
-    }
-    rc = -1;
-
-    for ( i = 0; i < nr_pfns; ++i )
-    {
-        switch ( types[i] )
-        {
-        case XEN_DOMCTL_PFINFO_BROKEN:
-        case XEN_DOMCTL_PFINFO_XALLOC:
-        case XEN_DOMCTL_PFINFO_XTAB:
-            continue;
-        }
-
-        mfns[nr_pages++] = mfns[i];
-    }
-
-    if ( nr_pages > 0 )
-    {
-        guest_mapping = xenforeignmemory_map(
-            xch->fmem, ctx->domid, PROT_READ, nr_pages, mfns, errors);
-        if ( !guest_mapping )
-        {
-            PERROR("Failed to map guest pages");
-            goto err;
-        }
-        nr_pages_mapped = nr_pages;
-
-        for ( i = 0, p = 0; i < nr_pfns; ++i )
-        {
-            switch ( types[i] )
-            {
-            case XEN_DOMCTL_PFINFO_BROKEN:
-            case XEN_DOMCTL_PFINFO_XALLOC:
-            case XEN_DOMCTL_PFINFO_XTAB:
-                continue;
-            }
-
-            if ( errors[p] )
-            {
-                ERROR("Mapping of pfn %#"PRIpfn" (mfn %#"PRIpfn") failed %d",
-                      ctx->save.batch_pfns[i], mfns[p], errors[p]);
-                goto err;
-            }
-
-            orig_page = page = guest_mapping + (p * PAGE_SIZE);
-            rc = ctx->save.ops.normalise_page(ctx, types[i], &page);
-
-            if ( orig_page != page )
-                local_pages[i] = page;
-
-            if ( rc )
-            {
-                if ( rc == -1 && errno == EAGAIN )
-                {
-                    set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
-                    ++ctx->save.nr_deferred_pages;
-                    types[i] = XEN_DOMCTL_PFINFO_XTAB;
-                    --nr_pages;
-                }
-                else
-                    goto err;
-            }
-            else
-                guest_data[i] = page;
-
-            rc = -1;
-            ++p;
-        }
-    }
-
-    rec_pfns = malloc(nr_pfns * sizeof(*rec_pfns));
-    if ( !rec_pfns )
-    {
-        ERROR("Unable to allocate %zu bytes of memory for page data pfn list",
-              nr_pfns * sizeof(*rec_pfns));
-        goto err;
-    }
-
-    hdr.count = nr_pfns;
-
-    rec.length = sizeof(hdr);
-    rec.length += nr_pfns * sizeof(*rec_pfns);
-    rec.length += nr_pages * PAGE_SIZE;
-
-    for ( i = 0; i < nr_pfns; ++i )
-        rec_pfns[i] = ((uint64_t)(types[i]) << 32) | ctx->save.batch_pfns[i];
-
-    iov[0].iov_base = &rec.type;
-    iov[0].iov_len = sizeof(rec.type);
-
-    iov[1].iov_base = &rec.length;
-    iov[1].iov_len = sizeof(rec.length);
-
-    iov[2].iov_base = &hdr;
-    iov[2].iov_len = sizeof(hdr);
-
-    iov[3].iov_base = rec_pfns;
-    iov[3].iov_len = nr_pfns * sizeof(*rec_pfns);
-
-    iovcnt = 4;
-
-    if ( nr_pages )
-    {
-        for ( i = 0; i < nr_pfns; ++i )
-        {
-            if ( guest_data[i] )
-            {
-                iov[iovcnt].iov_base = guest_data[i];
-                iov[iovcnt].iov_len = PAGE_SIZE;
-                iovcnt++;
-                --nr_pages;
-            }
-        }
-    }
-
-    if ( writev_exact(ctx->fd, iov, iovcnt) )
-    {
-        PERROR("Failed to write page data to stream");
-        goto err;
-    }
-
-    /* Sanity check we have sent all the pages we expected to. */
-    assert(nr_pages == 0);
-    rc = ctx->save.nr_batch_pfns = 0;
-
- err:
-    free(rec_pfns);
-    if ( guest_mapping )
-        xenforeignmemory_unmap(xch->fmem, guest_mapping, nr_pages_mapped);
-    for ( i = 0; local_pages && i < nr_pfns; ++i )
-        free(local_pages[i]);
-    free(iov);
-    free(local_pages);
-    free(guest_data);
-    free(errors);
-    free(types);
-    free(mfns);
-
-    return rc;
-}
-
-/*
- * Flush a batch of pfns into the stream.
- */
-static int flush_batch(struct xc_sr_context *ctx)
-{
-    int rc = 0;
-
-    if ( ctx->save.nr_batch_pfns == 0 )
-        return rc;
-
-    rc = write_batch(ctx);
-
-    if ( !rc )
-    {
-        VALGRIND_MAKE_MEM_UNDEFINED(ctx->save.batch_pfns,
-                                    MAX_BATCH_SIZE *
-                                    sizeof(*ctx->save.batch_pfns));
-    }
-
-    return rc;
-}
-
-/*
- * Add a single pfn to the batch, flushing the batch if full.
- */
-static int add_to_batch(struct xc_sr_context *ctx, xen_pfn_t pfn)
-{
-    int rc = 0;
-
-    if ( ctx->save.nr_batch_pfns == MAX_BATCH_SIZE )
-        rc = flush_batch(ctx);
-
-    if ( rc == 0 )
-        ctx->save.batch_pfns[ctx->save.nr_batch_pfns++] = pfn;
-
-    return rc;
-}
-
-/*
- * Pause/suspend the domain, and refresh ctx->dominfo if required.
- */
-static int suspend_domain(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-
-    /* TODO: Properly specify the return value from this callback.  All
-     * implementations currently appear to return 1 for success, whereas
-     * the legacy code checks for != 0. */
-    int cb_rc = ctx->save.callbacks->suspend(ctx->save.callbacks->data);
-
-    if ( cb_rc == 0 )
-    {
-        ERROR("save callback suspend() failed: %d", cb_rc);
-        return -1;
-    }
-
-    /* Refresh domain information. */
-    if ( (xc_domain_getinfo(xch, ctx->domid, 1, &ctx->dominfo) != 1) ||
-         (ctx->dominfo.domid != ctx->domid) )
-    {
-        PERROR("Unable to refresh domain information");
-        return -1;
-    }
-
-    /* Confirm the domain has actually been paused. */
-    if ( !ctx->dominfo.shutdown ||
-         (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
-    {
-        ERROR("Domain has not been suspended: shutdown %d, reason %d",
-              ctx->dominfo.shutdown, ctx->dominfo.shutdown_reason);
-        return -1;
-    }
-
-    xc_report_progress_single(xch, "Domain now suspended");
-
-    return 0;
-}
-
-/*
- * Send a subset of pages in the guests p2m, according to the dirty bitmap.
- * Used for each subsequent iteration of the live migration loop.
- *
- * Bitmap is bounded by p2m_size.
- */
-static int send_dirty_pages(struct xc_sr_context *ctx,
-                            unsigned long entries)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t p;
-    unsigned long written;
-    int rc;
-    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
-                                    &ctx->save.dirty_bitmap_hbuf);
-
-    for ( p = 0, written = 0; p < ctx->save.p2m_size; ++p )
-    {
-        if ( !test_bit(p, dirty_bitmap) )
-            continue;
-
-        rc = add_to_batch(ctx, p);
-        if ( rc )
-            return rc;
-
-        /* Update progress every 4MB worth of memory sent. */
-        if ( (written & ((1U << (22 - 12)) - 1)) == 0 )
-            xc_report_progress_step(xch, written, entries);
-
-        ++written;
-    }
-
-    rc = flush_batch(ctx);
-    if ( rc )
-        return rc;
-
-    if ( written > entries )
-        DPRINTF("Bitmap contained more entries than expected...");
-
-    xc_report_progress_step(xch, entries, entries);
-
-    return ctx->save.ops.check_vm_state(ctx);
-}
-
-/*
- * Send all pages in the guests p2m.  Used as the first iteration of the live
- * migration loop, and for a non-live save.
- */
-static int send_all_pages(struct xc_sr_context *ctx)
-{
-    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
-                                    &ctx->save.dirty_bitmap_hbuf);
-
-    bitmap_set(dirty_bitmap, ctx->save.p2m_size);
-
-    return send_dirty_pages(ctx, ctx->save.p2m_size);
-}
-
-static int enable_logdirty(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int on1 = 0, off = 0, on2 = 0;
-    int rc;
-
-    /* This juggling is required if logdirty is enabled for VRAM tracking. */
-    rc = xc_shadow_control(xch, ctx->domid,
-                           XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
-                           NULL, 0, NULL, 0, NULL);
-    if ( rc < 0 )
-    {
-        on1 = errno;
-        rc = xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
-                               NULL, 0, NULL, 0, NULL);
-        if ( rc < 0 )
-            off = errno;
-        else {
-            rc = xc_shadow_control(xch, ctx->domid,
-                                   XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
-                                   NULL, 0, NULL, 0, NULL);
-            if ( rc < 0 )
-                on2 = errno;
-        }
-        if ( rc < 0 )
-        {
-            PERROR("Failed to enable logdirty: %d,%d,%d", on1, off, on2);
-            return rc;
-        }
-    }
-
-    return 0;
-}
-
-static int update_progress_string(struct xc_sr_context *ctx, char **str)
-{
-    xc_interface *xch = ctx->xch;
-    char *new_str = NULL;
-    unsigned int iter = ctx->save.stats.iteration;
-
-    if ( asprintf(&new_str, "Frames iteration %u", iter) == -1 )
-    {
-        PERROR("Unable to allocate new progress string");
-        return -1;
-    }
-
-    free(*str);
-    *str = new_str;
-
-    xc_set_progress_prefix(xch, *str);
-    return 0;
-}
-
-/*
- * This is the live migration precopy policy - it's called periodically during
- * the precopy phase of live migrations, and is responsible for deciding when
- * the precopy phase should terminate and what should be done next.
- *
- * The policy implemented here behaves identically to the policy previously
- * hard-coded into xc_domain_save() - it proceeds to the stop-and-copy phase of
- * the live migration when there are either fewer than 50 dirty pages, or more
- * than 5 precopy rounds have completed.
- */
-#define SPP_MAX_ITERATIONS      5
-#define SPP_TARGET_DIRTY_COUNT 50
-
-static int simple_precopy_policy(struct precopy_stats stats, void *user)
-{
-    return ((stats.dirty_count >= 0 &&
-             stats.dirty_count < SPP_TARGET_DIRTY_COUNT) ||
-            stats.iteration >= SPP_MAX_ITERATIONS)
-        ? XGS_POLICY_STOP_AND_COPY
-        : XGS_POLICY_CONTINUE_PRECOPY;
-}
-
-/*
- * Send memory while guest is running.
- */
-static int send_memory_live(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
-    char *progress_str = NULL;
-    unsigned int x = 0;
-    int rc;
-    int policy_decision;
-
-    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
-                                    &ctx->save.dirty_bitmap_hbuf);
-
-    precopy_policy_t precopy_policy = ctx->save.callbacks->precopy_policy;
-    void *data = ctx->save.callbacks->data;
-
-    struct precopy_stats *policy_stats;
-
-    rc = update_progress_string(ctx, &progress_str);
-    if ( rc )
-        goto out;
-
-    ctx->save.stats = (struct precopy_stats){
-        .dirty_count = ctx->save.p2m_size,
-    };
-    policy_stats = &ctx->save.stats;
-
-    if ( precopy_policy == NULL )
-        precopy_policy = simple_precopy_policy;
-
-    bitmap_set(dirty_bitmap, ctx->save.p2m_size);
-
-    for ( ; ; )
-    {
-        policy_decision = precopy_policy(*policy_stats, data);
-        x++;
-
-        if ( stats.dirty_count > 0 && policy_decision != XGS_POLICY_ABORT )
-        {
-            rc = update_progress_string(ctx, &progress_str);
-            if ( rc )
-                goto out;
-
-            rc = send_dirty_pages(ctx, stats.dirty_count);
-            if ( rc )
-                goto out;
-        }
-
-        if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
-            break;
-
-        policy_stats->iteration     = x;
-        policy_stats->total_written += policy_stats->dirty_count;
-        policy_stats->dirty_count   = -1;
-
-        policy_decision = precopy_policy(*policy_stats, data);
-
-        if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
-            break;
-
-        if ( xc_shadow_control(
-                 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
-                 &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
-                 NULL, 0, &stats) != ctx->save.p2m_size )
-        {
-            PERROR("Failed to retrieve logdirty bitmap");
-            rc = -1;
-            goto out;
-        }
-
-        policy_stats->dirty_count = stats.dirty_count;
-
-    }
-
-    if ( policy_decision == XGS_POLICY_ABORT )
-    {
-        PERROR("Abort precopy loop");
-        rc = -1;
-        goto out;
-    }
-
- out:
-    xc_set_progress_prefix(xch, NULL);
-    free(progress_str);
-    return rc;
-}
-
-static int colo_merge_secondary_dirty_bitmap(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_record rec;
-    uint64_t *pfns = NULL;
-    uint64_t pfn;
-    unsigned int count, i;
-    int rc;
-    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
-                                    &ctx->save.dirty_bitmap_hbuf);
-
-    rc = read_record(ctx, ctx->save.recv_fd, &rec);
-    if ( rc )
-        goto err;
-
-    if ( rec.type != REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST )
-    {
-        PERROR("Expect dirty bitmap record, but received %u", rec.type);
-        rc = -1;
-        goto err;
-    }
-
-    if ( rec.length % sizeof(*pfns) )
-    {
-        PERROR("Invalid dirty pfn list record length %u", rec.length);
-        rc = -1;
-        goto err;
-    }
-
-    count = rec.length / sizeof(*pfns);
-    pfns = rec.data;
-
-    for ( i = 0; i < count; i++ )
-    {
-        pfn = pfns[i];
-        if ( pfn > ctx->save.p2m_size )
-        {
-            PERROR("Invalid pfn 0x%" PRIx64, pfn);
-            rc = -1;
-            goto err;
-        }
-
-        set_bit(pfn, dirty_bitmap);
-    }
-
-    rc = 0;
-
- err:
-    free(rec.data);
-    return rc;
-}
-
-/*
- * Suspend the domain and send dirty memory.
- * This is the last iteration of the live migration and the
- * heart of the checkpointed stream.
- */
-static int suspend_and_send_dirty(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
-    char *progress_str = NULL;
-    int rc;
-    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
-                                    &ctx->save.dirty_bitmap_hbuf);
-
-    rc = suspend_domain(ctx);
-    if ( rc )
-        goto out;
-
-    if ( xc_shadow_control(
-             xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
-             HYPERCALL_BUFFER(dirty_bitmap), ctx->save.p2m_size,
-             NULL, XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL, &stats) !=
-         ctx->save.p2m_size )
-    {
-        PERROR("Failed to retrieve logdirty bitmap");
-        rc = -1;
-        goto out;
-    }
-
-    if ( ctx->save.live )
-    {
-        rc = update_progress_string(ctx, &progress_str);
-        if ( rc )
-            goto out;
-    }
-    else
-        xc_set_progress_prefix(xch, "Checkpointed save");
-
-    bitmap_or(dirty_bitmap, ctx->save.deferred_pages, ctx->save.p2m_size);
-
-    if ( !ctx->save.live && ctx->stream_type == XC_STREAM_COLO )
-    {
-        rc = colo_merge_secondary_dirty_bitmap(ctx);
-        if ( rc )
-        {
-            PERROR("Failed to get secondary vm's dirty pages");
-            goto out;
-        }
-    }
-
-    rc = send_dirty_pages(ctx, stats.dirty_count + ctx->save.nr_deferred_pages);
-    if ( rc )
-        goto out;
-
-    bitmap_clear(ctx->save.deferred_pages, ctx->save.p2m_size);
-    ctx->save.nr_deferred_pages = 0;
-
- out:
-    xc_set_progress_prefix(xch, NULL);
-    free(progress_str);
-    return rc;
-}
-
-static int verify_frames(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
-    int rc;
-    struct xc_sr_record rec = { .type = REC_TYPE_VERIFY };
-
-    DPRINTF("Enabling verify mode");
-
-    rc = write_record(ctx, &rec);
-    if ( rc )
-        goto out;
-
-    xc_set_progress_prefix(xch, "Frames verify");
-    rc = send_all_pages(ctx);
-    if ( rc )
-        goto out;
-
-    if ( xc_shadow_control(
-             xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_PEEK,
-             &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
-             NULL, 0, &stats) != ctx->save.p2m_size )
-    {
-        PERROR("Failed to retrieve logdirty bitmap");
-        rc = -1;
-        goto out;
-    }
-
-    DPRINTF("  Further stats: faults %u, dirty %u",
-            stats.fault_count, stats.dirty_count);
-
- out:
-    return rc;
-}
-
-/*
- * Send all domain memory.  This is the heart of the live migration loop.
- */
-static int send_domain_memory_live(struct xc_sr_context *ctx)
-{
-    int rc;
-
-    rc = enable_logdirty(ctx);
-    if ( rc )
-        goto out;
-
-    rc = send_memory_live(ctx);
-    if ( rc )
-        goto out;
-
-    rc = suspend_and_send_dirty(ctx);
-    if ( rc )
-        goto out;
-
-    if ( ctx->save.debug && ctx->stream_type != XC_STREAM_PLAIN )
-    {
-        rc = verify_frames(ctx);
-        if ( rc )
-            goto out;
-    }
-
- out:
-    return rc;
-}
-
-/*
- * Checkpointed save.
- */
-static int send_domain_memory_checkpointed(struct xc_sr_context *ctx)
-{
-    return suspend_and_send_dirty(ctx);
-}
-
-/*
- * Send all domain memory, pausing the domain first.  Generally used for
- * suspend-to-file.
- */
-static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int rc;
-
-    rc = suspend_domain(ctx);
-    if ( rc )
-        goto err;
-
-    xc_set_progress_prefix(xch, "Frames");
-
-    rc = send_all_pages(ctx);
-    if ( rc )
-        goto err;
-
- err:
-    return rc;
-}
-
-static int setup(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int rc;
-    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
-                                    &ctx->save.dirty_bitmap_hbuf);
-
-    rc = ctx->save.ops.setup(ctx);
-    if ( rc )
-        goto err;
-
-    dirty_bitmap = xc_hypercall_buffer_alloc_pages(
-        xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->save.p2m_size)));
-    ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
-                                  sizeof(*ctx->save.batch_pfns));
-    ctx->save.deferred_pages = calloc(1, bitmap_size(ctx->save.p2m_size));
-
-    if ( !ctx->save.batch_pfns || !dirty_bitmap || !ctx->save.deferred_pages )
-    {
-        ERROR("Unable to allocate memory for dirty bitmaps, batch pfns and"
-              " deferred pages");
-        rc = -1;
-        errno = ENOMEM;
-        goto err;
-    }
-
-    rc = 0;
-
- err:
-    return rc;
-}
-
-static void cleanup(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
-                                    &ctx->save.dirty_bitmap_hbuf);
-
-
-    xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
-                      NULL, 0, NULL, 0, NULL);
-
-    if ( ctx->save.ops.cleanup(ctx) )
-        PERROR("Failed to clean up");
-
-    xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
-                                   NRPAGES(bitmap_size(ctx->save.p2m_size)));
-    free(ctx->save.deferred_pages);
-    free(ctx->save.batch_pfns);
-}
-
-/*
- * Save a domain.
- */
-static int save(struct xc_sr_context *ctx, uint16_t guest_type)
-{
-    xc_interface *xch = ctx->xch;
-    int rc, saved_rc = 0, saved_errno = 0;
-
-    IPRINTF("Saving domain %d, type %s",
-            ctx->domid, dhdr_type_to_str(guest_type));
-
-    rc = setup(ctx);
-    if ( rc )
-        goto err;
-
-    xc_report_progress_single(xch, "Start of stream");
-
-    rc = write_headers(ctx, guest_type);
-    if ( rc )
-        goto err;
-
-    rc = ctx->save.ops.static_data(ctx);
-    if ( rc )
-        goto err;
-
-    rc = write_static_data_end_record(ctx);
-    if ( rc )
-        goto err;
-
-    rc = ctx->save.ops.start_of_stream(ctx);
-    if ( rc )
-        goto err;
-
-    do {
-        rc = ctx->save.ops.start_of_checkpoint(ctx);
-        if ( rc )
-            goto err;
-
-        rc = ctx->save.ops.check_vm_state(ctx);
-        if ( rc )
-            goto err;
-
-        if ( ctx->save.live )
-            rc = send_domain_memory_live(ctx);
-        else if ( ctx->stream_type != XC_STREAM_PLAIN )
-            rc = send_domain_memory_checkpointed(ctx);
-        else
-            rc = send_domain_memory_nonlive(ctx);
-
-        if ( rc )
-            goto err;
-
-        if ( !ctx->dominfo.shutdown ||
-             (ctx->dominfo.shutdown_reason != SHUTDOWN_suspend) )
-        {
-            ERROR("Domain has not been suspended");
-            rc = -1;
-            goto err;
-        }
-
-        rc = ctx->save.ops.end_of_checkpoint(ctx);
-        if ( rc )
-            goto err;
-
-        if ( ctx->stream_type != XC_STREAM_PLAIN )
-        {
-            /*
-             * We have now completed the initial live portion of the checkpoint
-             * process. Therefore switch into periodically sending synchronous
-             * batches of pages.
-             */
-            ctx->save.live = false;
-
-            rc = write_checkpoint_record(ctx);
-            if ( rc )
-                goto err;
-
-            if ( ctx->stream_type == XC_STREAM_COLO )
-            {
-                rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
-                if ( !rc )
-                {
-                    rc = -1;
-                    goto err;
-                }
-            }
-
-            rc = ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
-            if ( rc <= 0 )
-                goto err;
-
-            if ( ctx->stream_type == XC_STREAM_COLO )
-            {
-                rc = ctx->save.callbacks->wait_checkpoint(
-                    ctx->save.callbacks->data);
-                if ( rc <= 0 )
-                    goto err;
-            }
-            else if ( ctx->stream_type == XC_STREAM_REMUS )
-            {
-                rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
-                if ( rc <= 0 )
-                    goto err;
-            }
-            else
-            {
-                ERROR("Unknown checkpointed stream");
-                rc = -1;
-                goto err;
-            }
-        }
-    } while ( ctx->stream_type != XC_STREAM_PLAIN );
-
-    xc_report_progress_single(xch, "End of stream");
-
-    rc = write_end_record(ctx);
-    if ( rc )
-        goto err;
-
-    xc_report_progress_single(xch, "Complete");
-    goto done;
-
- err:
-    saved_errno = errno;
-    saved_rc = rc;
-    PERROR("Save failed");
-
- done:
-    cleanup(ctx);
-
-    if ( saved_rc )
-    {
-        rc = saved_rc;
-        errno = saved_errno;
-    }
-
-    return rc;
-};
-
-int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
-                   uint32_t flags, struct save_callbacks *callbacks,
-                   xc_stream_type_t stream_type, int recv_fd)
-{
-    struct xc_sr_context ctx = {
-        .xch = xch,
-        .fd = io_fd,
-        .stream_type = stream_type,
-    };
-
-    /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
-    ctx.save.callbacks = callbacks;
-    ctx.save.live  = !!(flags & XCFLAGS_LIVE);
-    ctx.save.debug = !!(flags & XCFLAGS_DEBUG);
-    ctx.save.recv_fd = recv_fd;
-
-    if ( xc_domain_getinfo(xch, dom, 1, &ctx.dominfo) != 1 )
-    {
-        PERROR("Failed to get domain info");
-        return -1;
-    }
-
-    if ( ctx.dominfo.domid != dom )
-    {
-        ERROR("Domain %u does not exist", dom);
-        return -1;
-    }
-
-    /* Sanity check stream_type-related parameters */
-    switch ( stream_type )
-    {
-    case XC_STREAM_COLO:
-        assert(callbacks->wait_checkpoint);
-        /* Fallthrough */
-    case XC_STREAM_REMUS:
-        assert(callbacks->checkpoint && callbacks->postcopy);
-        /* Fallthrough */
-    case XC_STREAM_PLAIN:
-        if ( ctx.dominfo.hvm )
-            assert(callbacks->switch_qemu_logdirty);
-        break;
-
-    default:
-        assert(!"Bad stream_type");
-        break;
-    }
-
-    DPRINTF("fd %d, dom %u, flags %u, hvm %d",
-            io_fd, dom, flags, ctx.dominfo.hvm);
-
-    ctx.domid = dom;
-
-    if ( ctx.dominfo.hvm )
-    {
-        ctx.save.ops = save_ops_x86_hvm;
-        return save(&ctx, DHDR_TYPE_X86_HVM);
-    }
-    else
-    {
-        ctx.save.ops = save_ops_x86_pv;
-        return save(&ctx, DHDR_TYPE_X86_PV);
-    }
-}
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_save_x86_hvm.c b/tools/libxc/xg_sr_save_x86_hvm.c
deleted file mode 100644 (file)
index 1634a7b..0000000
+++ /dev/null
@@ -1,251 +0,0 @@
-#include <assert.h>
-
-#include "xg_sr_common_x86.h"
-
-#include <xen/hvm/params.h>
-
-/*
- * Query for the HVM context and write an HVM_CONTEXT record into the stream.
- */
-static int write_hvm_context(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int rc, hvm_buf_size;
-    struct xc_sr_record hvm_rec = {
-        .type = REC_TYPE_HVM_CONTEXT,
-    };
-
-    hvm_buf_size = xc_domain_hvm_getcontext(xch, ctx->domid, 0, 0);
-    if ( hvm_buf_size < 0 )
-    {
-        PERROR("Couldn't get HVM context size from Xen");
-        rc = -1;
-        goto out;
-    }
-
-    hvm_rec.data = malloc(hvm_buf_size);
-    if ( !hvm_rec.data )
-    {
-        PERROR("Couldn't allocate memory");
-        rc = -1;
-        goto out;
-    }
-
-    hvm_buf_size = xc_domain_hvm_getcontext(xch, ctx->domid,
-                                            hvm_rec.data, hvm_buf_size);
-    if ( hvm_buf_size < 0 )
-    {
-        PERROR("Couldn't get HVM context from Xen");
-        rc = -1;
-        goto out;
-    }
-
-    hvm_rec.length = hvm_buf_size;
-    rc = write_record(ctx, &hvm_rec);
-    if ( rc < 0 )
-    {
-        PERROR("error write HVM_CONTEXT record");
-        goto out;
-    }
-
- out:
-    free(hvm_rec.data);
-    return rc;
-}
-
-/*
- * Query for a range of HVM parameters and write an HVM_PARAMS record into the
- * stream.
- */
-static int write_hvm_params(struct xc_sr_context *ctx)
-{
-    static const unsigned int params[] = {
-        HVM_PARAM_STORE_PFN,
-        HVM_PARAM_IOREQ_PFN,
-        HVM_PARAM_BUFIOREQ_PFN,
-        HVM_PARAM_PAGING_RING_PFN,
-        HVM_PARAM_MONITOR_RING_PFN,
-        HVM_PARAM_SHARING_RING_PFN,
-        HVM_PARAM_VM86_TSS_SIZED,
-        HVM_PARAM_CONSOLE_PFN,
-        HVM_PARAM_ACPI_IOPORTS_LOCATION,
-        HVM_PARAM_VIRIDIAN,
-        HVM_PARAM_IDENT_PT,
-        HVM_PARAM_VM_GENERATION_ID_ADDR,
-        HVM_PARAM_IOREQ_SERVER_PFN,
-        HVM_PARAM_NR_IOREQ_SERVER_PAGES,
-        HVM_PARAM_X87_FIP_WIDTH,
-        HVM_PARAM_MCA_CAP,
-    };
-
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_rec_hvm_params_entry entries[ARRAY_SIZE(params)];
-    struct xc_sr_rec_hvm_params hdr = {
-        .count = 0,
-    };
-    struct xc_sr_record rec = {
-        .type   = REC_TYPE_HVM_PARAMS,
-        .length = sizeof(hdr),
-        .data   = &hdr,
-    };
-    unsigned int i;
-    int rc;
-
-    for ( i = 0; i < ARRAY_SIZE(params); i++ )
-    {
-        uint32_t index = params[i];
-        uint64_t value;
-
-        rc = xc_hvm_param_get(xch, ctx->domid, index, &value);
-        if ( rc )
-        {
-            PERROR("Failed to get HVMPARAM at index %u", index);
-            return rc;
-        }
-
-        if ( value != 0 )
-        {
-            entries[hdr.count].index = index;
-            entries[hdr.count].value = value;
-            hdr.count++;
-        }
-    }
-
-    /* No params? Skip this record. */
-    if ( hdr.count == 0 )
-        return 0;
-
-    rc = write_split_record(ctx, &rec, entries, hdr.count * sizeof(*entries));
-    if ( rc )
-        PERROR("Failed to write HVM_PARAMS record");
-
-    return rc;
-}
-
-static xen_pfn_t x86_hvm_pfn_to_gfn(const struct xc_sr_context *ctx,
-                                    xen_pfn_t pfn)
-{
-    /* identity map */
-    return pfn;
-}
-
-static int x86_hvm_normalise_page(struct xc_sr_context *ctx,
-                                  xen_pfn_t type, void **page)
-{
-    return 0;
-}
-
-static int x86_hvm_setup(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t nr_pfns;
-
-    if ( xc_domain_nr_gpfns(xch, ctx->domid, &nr_pfns) < 0 )
-    {
-        PERROR("Unable to obtain the guest p2m size");
-        return -1;
-    }
-#ifdef __i386__
-    /* Very large domains (> 1TB) will exhaust virtual address space. */
-    if ( nr_pfns > 0x0fffffff )
-    {
-        errno = E2BIG;
-        PERROR("Cannot save this big a guest");
-        return -1;
-    }
-#endif
-
-    ctx->save.p2m_size = nr_pfns;
-
-    if ( ctx->save.callbacks->switch_qemu_logdirty(
-             ctx->domid, 1, ctx->save.callbacks->data) )
-    {
-        PERROR("Couldn't enable qemu log-dirty mode");
-        return -1;
-    }
-
-    ctx->x86.hvm.save.qemu_enabled_logdirty = true;
-
-    return 0;
-}
-
-static int x86_hvm_static_data(struct xc_sr_context *ctx)
-{
-    return write_x86_cpu_policy_records(ctx);
-}
-
-static int x86_hvm_start_of_stream(struct xc_sr_context *ctx)
-{
-    return 0;
-}
-
-static int x86_hvm_start_of_checkpoint(struct xc_sr_context *ctx)
-{
-    return 0;
-}
-
-static int x86_hvm_check_vm_state(struct xc_sr_context *ctx)
-{
-    return 0;
-}
-
-static int x86_hvm_end_of_checkpoint(struct xc_sr_context *ctx)
-{
-    int rc;
-
-    /* Write the TSC record. */
-    rc = write_x86_tsc_info(ctx);
-    if ( rc )
-        return rc;
-
-    /* Write the HVM_CONTEXT record. */
-    rc = write_hvm_context(ctx);
-    if ( rc )
-        return rc;
-
-    /* Write HVM_PARAMS record contains applicable HVM params. */
-    rc = write_hvm_params(ctx);
-    if ( rc )
-        return rc;
-
-    return 0;
-}
-
-static int x86_hvm_cleanup(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-
-    /* If qemu successfully enabled logdirty mode, attempt to disable. */
-    if ( ctx->x86.hvm.save.qemu_enabled_logdirty &&
-         ctx->save.callbacks->switch_qemu_logdirty(
-             ctx->domid, 0, ctx->save.callbacks->data) )
-    {
-        PERROR("Couldn't disable qemu log-dirty mode");
-        return -1;
-    }
-
-    return 0;
-}
-
-struct xc_sr_save_ops save_ops_x86_hvm =
-{
-    .pfn_to_gfn          = x86_hvm_pfn_to_gfn,
-    .normalise_page      = x86_hvm_normalise_page,
-    .setup               = x86_hvm_setup,
-    .static_data         = x86_hvm_static_data,
-    .start_of_stream     = x86_hvm_start_of_stream,
-    .start_of_checkpoint = x86_hvm_start_of_checkpoint,
-    .end_of_checkpoint   = x86_hvm_end_of_checkpoint,
-    .check_vm_state      = x86_hvm_check_vm_state,
-    .cleanup             = x86_hvm_cleanup,
-};
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_save_x86_pv.c b/tools/libxc/xg_sr_save_x86_pv.c
deleted file mode 100644 (file)
index 4964f1f..0000000
+++ /dev/null
@@ -1,1156 +0,0 @@
-#include <assert.h>
-#include <limits.h>
-
-#include "xg_sr_common_x86_pv.h"
-
-/* Check a 64 bit virtual address for being canonical. */
-static inline bool is_canonical_address(xen_vaddr_t vaddr)
-{
-    return ((int64_t)vaddr >> 47) == ((int64_t)vaddr >> 63);
-}
-
-/*
- * Maps the guests shared info page.
- */
-static int map_shinfo(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-
-    ctx->x86.pv.shinfo = xc_map_foreign_range(
-        xch, ctx->domid, PAGE_SIZE, PROT_READ, ctx->dominfo.shared_info_frame);
-    if ( !ctx->x86.pv.shinfo )
-    {
-        PERROR("Failed to map shared info frame at mfn %#lx",
-               ctx->dominfo.shared_info_frame);
-        return -1;
-    }
-
-    return 0;
-}
-
-/*
- * Copy a list of mfns from a guest, accounting for differences between guest
- * and toolstack width.  Can fail if truncation would occur.
- */
-static int copy_mfns_from_guest(const struct xc_sr_context *ctx,
-                                xen_pfn_t *dst, const void *src, size_t count)
-{
-    size_t x;
-
-    if ( ctx->x86.pv.width == sizeof(unsigned long) )
-        memcpy(dst, src, count * sizeof(*dst));
-    else
-    {
-        for ( x = 0; x < count; ++x )
-        {
-#ifdef __x86_64__
-            /* 64bit toolstack, 32bit guest.  Expand any INVALID_MFN. */
-            uint32_t s = ((uint32_t *)src)[x];
-
-            dst[x] = s == ~0U ? INVALID_MFN : s;
-#else
-            /*
-             * 32bit toolstack, 64bit guest.  Truncate INVALID_MFN, but bail
-             * if any other truncation would occur.
-             *
-             * This will only occur on hosts where a PV guest has ram above
-             * the 16TB boundary.  A 32bit dom0 is unlikely to have
-             * successfully booted on a system this large.
-             */
-            uint64_t s = ((uint64_t *)src)[x];
-
-            if ( (s != ~0ULL) && ((s >> 32) != 0) )
-            {
-                errno = E2BIG;
-                return -1;
-            }
-
-            dst[x] = s;
-#endif
-        }
-    }
-
-    return 0;
-}
-
-/*
- * Map the p2m leave pages and build an array of their pfns.
- */
-static int map_p2m_leaves(struct xc_sr_context *ctx, xen_pfn_t *mfns,
-                          size_t n_mfns)
-{
-    xc_interface *xch = ctx->xch;
-    unsigned int x;
-
-    ctx->x86.pv.p2m = xc_map_foreign_pages(xch, ctx->domid, PROT_READ,
-                                           mfns, n_mfns);
-    if ( !ctx->x86.pv.p2m )
-    {
-        PERROR("Failed to map p2m frames");
-        return -1;
-    }
-
-    ctx->save.p2m_size = ctx->x86.pv.max_pfn + 1;
-    ctx->x86.pv.p2m_frames = n_mfns;
-    ctx->x86.pv.p2m_pfns = malloc(n_mfns * sizeof(*mfns));
-    if ( !ctx->x86.pv.p2m_pfns )
-    {
-        ERROR("Cannot allocate %zu bytes for p2m pfns list",
-              n_mfns * sizeof(*mfns));
-        return -1;
-    }
-
-    /* Convert leaf frames from mfns to pfns. */
-    for ( x = 0; x < n_mfns; ++x )
-    {
-        if ( !mfn_in_pseudophysmap(ctx, mfns[x]) )
-        {
-            ERROR("Bad mfn in p2m_frame_list[%u]", x);
-            dump_bad_pseudophysmap_entry(ctx, mfns[x]);
-            errno = ERANGE;
-            return -1;
-        }
-
-        ctx->x86.pv.p2m_pfns[x] = mfn_to_pfn(ctx, mfns[x]);
-    }
-
-    return 0;
-}
-
-/*
- * Walk the guests frame list list and frame list to identify and map the
- * frames making up the guests p2m table.  Construct a list of pfns making up
- * the table.
- */
-static int map_p2m_tree(struct xc_sr_context *ctx)
-{
-    /* Terminology:
-     *
-     * fll   - frame list list, top level p2m, list of fl mfns
-     * fl    - frame list, mid level p2m, list of leaf mfns
-     * local - own allocated buffers, adjusted for bitness
-     * guest - mappings into the domain
-     */
-    xc_interface *xch = ctx->xch;
-    int rc = -1;
-    unsigned int x, saved_x, fpp, fll_entries, fl_entries;
-    xen_pfn_t fll_mfn, saved_mfn, max_pfn;
-
-    xen_pfn_t *local_fll = NULL;
-    void *guest_fll = NULL;
-    size_t local_fll_size;
-
-    xen_pfn_t *local_fl = NULL;
-    void *guest_fl = NULL;
-    size_t local_fl_size;
-
-    fpp = PAGE_SIZE / ctx->x86.pv.width;
-    fll_entries = (ctx->x86.pv.max_pfn / (fpp * fpp)) + 1;
-    if ( fll_entries > fpp )
-    {
-        ERROR("max_pfn %#lx too large for p2m tree", ctx->x86.pv.max_pfn);
-        goto err;
-    }
-
-    fll_mfn = GET_FIELD(ctx->x86.pv.shinfo, arch.pfn_to_mfn_frame_list_list,
-                        ctx->x86.pv.width);
-    if ( fll_mfn == 0 || fll_mfn > ctx->x86.pv.max_mfn )
-    {
-        ERROR("Bad mfn %#lx for p2m frame list list", fll_mfn);
-        goto err;
-    }
-
-    /* Map the guest top p2m. */
-    guest_fll = xc_map_foreign_range(xch, ctx->domid, PAGE_SIZE,
-                                     PROT_READ, fll_mfn);
-    if ( !guest_fll )
-    {
-        PERROR("Failed to map p2m frame list list at %#lx", fll_mfn);
-        goto err;
-    }
-
-    local_fll_size = fll_entries * sizeof(*local_fll);
-    local_fll = malloc(local_fll_size);
-    if ( !local_fll )
-    {
-        ERROR("Cannot allocate %zu bytes for local p2m frame list list",
-              local_fll_size);
-        goto err;
-    }
-
-    if ( copy_mfns_from_guest(ctx, local_fll, guest_fll, fll_entries) )
-    {
-        ERROR("Truncation detected copying p2m frame list list");
-        goto err;
-    }
-
-    /* Check for bad mfns in frame list list. */
-    saved_mfn = 0;
-    saved_x = 0;
-    for ( x = 0; x < fll_entries; ++x )
-    {
-        if ( local_fll[x] == 0 || local_fll[x] > ctx->x86.pv.max_mfn )
-        {
-            ERROR("Bad mfn %#lx at index %u (of %u) in p2m frame list list",
-                  local_fll[x], x, fll_entries);
-            goto err;
-        }
-        if ( local_fll[x] != saved_mfn )
-        {
-            saved_mfn = local_fll[x];
-            saved_x = x;
-        }
-    }
-
-    /*
-     * Check for actual lower max_pfn:
-     * If the trailing entries of the frame list list were all the same we can
-     * assume they all reference mid pages all referencing p2m pages with all
-     * invalid entries. Otherwise there would be multiple pfns referencing all
-     * the same mfn which can't work across migration, as this sharing would be
-     * broken by the migration process.
-     * Adjust max_pfn if possible to avoid allocating much larger areas as
-     * needed for p2m and logdirty map.
-     */
-    max_pfn = (saved_x + 1) * fpp * fpp - 1;
-    if ( max_pfn < ctx->x86.pv.max_pfn )
-    {
-        ctx->x86.pv.max_pfn = max_pfn;
-        fll_entries = (ctx->x86.pv.max_pfn / (fpp * fpp)) + 1;
-    }
-    ctx->x86.pv.p2m_frames = (ctx->x86.pv.max_pfn + fpp) / fpp;
-    DPRINTF("max_pfn %#lx, p2m_frames %d", ctx->x86.pv.max_pfn,
-            ctx->x86.pv.p2m_frames);
-    fl_entries  = (ctx->x86.pv.max_pfn / fpp) + 1;
-
-    /* Map the guest mid p2m frames. */
-    guest_fl = xc_map_foreign_pages(xch, ctx->domid, PROT_READ,
-                                    local_fll, fll_entries);
-    if ( !guest_fl )
-    {
-        PERROR("Failed to map p2m frame list");
-        goto err;
-    }
-
-    local_fl_size = fl_entries * sizeof(*local_fl);
-    local_fl = malloc(local_fl_size);
-    if ( !local_fl )
-    {
-        ERROR("Cannot allocate %zu bytes for local p2m frame list",
-              local_fl_size);
-        goto err;
-    }
-
-    if ( copy_mfns_from_guest(ctx, local_fl, guest_fl, fl_entries) )
-    {
-        ERROR("Truncation detected copying p2m frame list");
-        goto err;
-    }
-
-    for ( x = 0; x < fl_entries; ++x )
-    {
-        if ( local_fl[x] == 0 || local_fl[x] > ctx->x86.pv.max_mfn )
-        {
-            ERROR("Bad mfn %#lx at index %u (of %u) in p2m frame list",
-                  local_fl[x], x, fl_entries);
-            goto err;
-        }
-    }
-
-    /* Map the p2m leaves themselves. */
-    rc = map_p2m_leaves(ctx, local_fl, fl_entries);
-
- err:
-    free(local_fl);
-    if ( guest_fl )
-        munmap(guest_fl, fll_entries * PAGE_SIZE);
-
-    free(local_fll);
-    if ( guest_fll )
-        munmap(guest_fll, PAGE_SIZE);
-
-    return rc;
-}
-
-/*
- * Get p2m_generation count.
- * Returns an error if the generation count has changed since the last call.
- */
-static int get_p2m_generation(struct xc_sr_context *ctx)
-{
-    uint64_t p2m_generation;
-    int rc;
-
-    p2m_generation = GET_FIELD(ctx->x86.pv.shinfo, arch.p2m_generation,
-                               ctx->x86.pv.width);
-
-    rc = (p2m_generation == ctx->x86.pv.p2m_generation) ? 0 : -1;
-    ctx->x86.pv.p2m_generation = p2m_generation;
-
-    return rc;
-}
-
-static int x86_pv_check_vm_state_p2m_list(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int rc;
-
-    if ( !ctx->save.live )
-        return 0;
-
-    rc = get_p2m_generation(ctx);
-    if ( rc )
-        ERROR("p2m generation count changed. Migration aborted.");
-
-    return rc;
-}
-
-/*
- * Map the guest p2m frames specified via a cr3 value, a virtual address, and
- * the maximum pfn. PTE entries are 64 bits for both, 32 and 64 bit guests as
- * in 32 bit case we support PAE guests only.
- */
-static int map_p2m_list(struct xc_sr_context *ctx, uint64_t p2m_cr3)
-{
-    xc_interface *xch = ctx->xch;
-    xen_vaddr_t p2m_vaddr, p2m_end, mask, off;
-    xen_pfn_t p2m_mfn, mfn, saved_mfn, max_pfn;
-    uint64_t *ptes = NULL;
-    xen_pfn_t *mfns = NULL;
-    unsigned int fpp, n_pages, level, shift, idx_start, idx_end, idx, saved_idx;
-    int rc = -1;
-
-    p2m_mfn = cr3_to_mfn(ctx, p2m_cr3);
-    assert(p2m_mfn != 0);
-    if ( p2m_mfn > ctx->x86.pv.max_mfn )
-    {
-        ERROR("Bad p2m_cr3 value %#" PRIx64, p2m_cr3);
-        errno = ERANGE;
-        goto err;
-    }
-
-    get_p2m_generation(ctx);
-
-    p2m_vaddr = GET_FIELD(ctx->x86.pv.shinfo, arch.p2m_vaddr,
-                          ctx->x86.pv.width);
-    fpp = PAGE_SIZE / ctx->x86.pv.width;
-    ctx->x86.pv.p2m_frames = ctx->x86.pv.max_pfn / fpp + 1;
-    p2m_end = p2m_vaddr + ctx->x86.pv.p2m_frames * PAGE_SIZE - 1;
-
-    if ( ctx->x86.pv.width == 8 )
-    {
-        mask = 0x0000ffffffffffffULL;
-        if ( !is_canonical_address(p2m_vaddr) ||
-             !is_canonical_address(p2m_end) ||
-             p2m_end < p2m_vaddr ||
-             (p2m_vaddr <= HYPERVISOR_VIRT_END_X86_64 &&
-              p2m_end > HYPERVISOR_VIRT_START_X86_64) )
-        {
-            ERROR("Bad virtual p2m address range %#" PRIx64 "-%#" PRIx64,
-                  p2m_vaddr, p2m_end);
-            errno = ERANGE;
-            goto err;
-        }
-    }
-    else
-    {
-        mask = 0x00000000ffffffffULL;
-        if ( p2m_vaddr > mask || p2m_end > mask || p2m_end < p2m_vaddr ||
-             (p2m_vaddr <= HYPERVISOR_VIRT_END_X86_32 &&
-              p2m_end > HYPERVISOR_VIRT_START_X86_32) )
-        {
-            ERROR("Bad virtual p2m address range %#" PRIx64 "-%#" PRIx64,
-                  p2m_vaddr, p2m_end);
-            errno = ERANGE;
-            goto err;
-        }
-    }
-
-    DPRINTF("p2m list from %#" PRIx64 " to %#" PRIx64 ", root at %#lx",
-            p2m_vaddr, p2m_end, p2m_mfn);
-    DPRINTF("max_pfn %#lx, p2m_frames %d", ctx->x86.pv.max_pfn,
-            ctx->x86.pv.p2m_frames);
-
-    mfns = malloc(sizeof(*mfns));
-    if ( !mfns )
-    {
-        ERROR("Cannot allocate memory for array of %u mfns", 1);
-        goto err;
-    }
-    mfns[0] = p2m_mfn;
-    off = 0;
-    saved_mfn = 0;
-    idx_start = idx_end = saved_idx = 0;
-
-    for ( level = ctx->x86.pv.levels; level > 0; level-- )
-    {
-        n_pages = idx_end - idx_start + 1;
-        ptes = xc_map_foreign_pages(xch, ctx->domid, PROT_READ, mfns, n_pages);
-        if ( !ptes )
-        {
-            PERROR("Failed to map %u page table pages for p2m list", n_pages);
-            goto err;
-        }
-        free(mfns);
-
-        shift = level * 9 + 3;
-        idx_start = ((p2m_vaddr - off) & mask) >> shift;
-        idx_end = ((p2m_end - off) & mask) >> shift;
-        idx = idx_end - idx_start + 1;
-        mfns = malloc(sizeof(*mfns) * idx);
-        if ( !mfns )
-        {
-            ERROR("Cannot allocate memory for array of %u mfns", idx);
-            goto err;
-        }
-
-        for ( idx = idx_start; idx <= idx_end; idx++ )
-        {
-            mfn = pte_to_frame(ptes[idx]);
-            if ( mfn == 0 || mfn > ctx->x86.pv.max_mfn )
-            {
-                ERROR("Bad mfn %#lx during page table walk for vaddr %#" PRIx64 " at level %d of p2m list",
-                      mfn, off + ((xen_vaddr_t)idx << shift), level);
-                errno = ERANGE;
-                goto err;
-            }
-            mfns[idx - idx_start] = mfn;
-
-            /* Maximum pfn check at level 2. Same reasoning as for p2m tree. */
-            if ( level == 2 )
-            {
-                if ( mfn != saved_mfn )
-                {
-                    saved_mfn = mfn;
-                    saved_idx = idx - idx_start;
-                }
-            }
-        }
-
-        if ( level == 2 )
-        {
-            if ( saved_idx == idx_end )
-                saved_idx++;
-            max_pfn = ((xen_pfn_t)saved_idx << 9) * fpp - 1;
-            if ( max_pfn < ctx->x86.pv.max_pfn )
-            {
-                ctx->x86.pv.max_pfn = max_pfn;
-                ctx->x86.pv.p2m_frames = (ctx->x86.pv.max_pfn + fpp) / fpp;
-                p2m_end = p2m_vaddr + ctx->x86.pv.p2m_frames * PAGE_SIZE - 1;
-                idx_end = idx_start + saved_idx;
-            }
-        }
-
-        munmap(ptes, n_pages * PAGE_SIZE);
-        ptes = NULL;
-        off = p2m_vaddr & ((mask >> shift) << shift);
-    }
-
-    /* Map the p2m leaves themselves. */
-    rc = map_p2m_leaves(ctx, mfns, idx_end - idx_start + 1);
-
- err:
-    free(mfns);
-    if ( ptes )
-        munmap(ptes, n_pages * PAGE_SIZE);
-
-    return rc;
-}
-
-/*
- * Map the guest p2m frames.
- * Depending on guest support this might either be a virtual mapped linear
- * list (preferred format) or a 3 level tree linked via mfns.
- */
-static int map_p2m(struct xc_sr_context *ctx)
-{
-    uint64_t p2m_cr3;
-
-    ctx->x86.pv.p2m_generation = ~0ULL;
-    ctx->x86.pv.max_pfn = GET_FIELD(ctx->x86.pv.shinfo, arch.max_pfn,
-                                    ctx->x86.pv.width) - 1;
-    p2m_cr3 = GET_FIELD(ctx->x86.pv.shinfo, arch.p2m_cr3, ctx->x86.pv.width);
-
-    return p2m_cr3 ? map_p2m_list(ctx, p2m_cr3) : map_p2m_tree(ctx);
-}
-
-/*
- * Obtain a specific vcpus basic state and write an X86_PV_VCPU_BASIC record
- * into the stream.  Performs mfn->pfn conversion on architectural state.
- */
-static int write_one_vcpu_basic(struct xc_sr_context *ctx, uint32_t id)
-{
-    xc_interface *xch = ctx->xch;
-    xen_pfn_t mfn, pfn;
-    unsigned int i, gdt_count;
-    int rc = -1;
-    vcpu_guest_context_any_t vcpu;
-    struct xc_sr_rec_x86_pv_vcpu_hdr vhdr = {
-        .vcpu_id = id,
-    };
-    struct xc_sr_record rec = {
-        .type = REC_TYPE_X86_PV_VCPU_BASIC,
-        .length = sizeof(vhdr),
-        .data = &vhdr,
-    };
-
-    if ( xc_vcpu_getcontext(xch, ctx->domid, id, &vcpu) )
-    {
-        PERROR("Failed to get vcpu%u context", id);
-        goto err;
-    }
-
-    /* Vcpu0 is special: Convert the suspend record to a pfn. */
-    if ( id == 0 )
-    {
-        mfn = GET_FIELD(&vcpu, user_regs.edx, ctx->x86.pv.width);
-        if ( !mfn_in_pseudophysmap(ctx, mfn) )
-        {
-            ERROR("Bad mfn for suspend record");
-            dump_bad_pseudophysmap_entry(ctx, mfn);
-            errno = ERANGE;
-            goto err;
-        }
-        SET_FIELD(&vcpu, user_regs.edx, mfn_to_pfn(ctx, mfn),
-                  ctx->x86.pv.width);
-    }
-
-    gdt_count = GET_FIELD(&vcpu, gdt_ents, ctx->x86.pv.width);
-    if ( gdt_count > FIRST_RESERVED_GDT_ENTRY )
-    {
-        ERROR("GDT entry count (%u) out of range (max %u)",
-              gdt_count, FIRST_RESERVED_GDT_ENTRY);
-        errno = ERANGE;
-        goto err;
-    }
-    gdt_count = (gdt_count + 511) / 512; /* gdt_count now in units of frames. */
-
-    /* Convert GDT frames to pfns. */
-    for ( i = 0; i < gdt_count; ++i )
-    {
-        mfn = GET_FIELD(&vcpu, gdt_frames[i], ctx->x86.pv.width);
-        if ( !mfn_in_pseudophysmap(ctx, mfn) )
-        {
-            ERROR("Bad mfn for frame %u of vcpu%u's GDT", i, id);
-            dump_bad_pseudophysmap_entry(ctx, mfn);
-            errno = ERANGE;
-            goto err;
-        }
-        SET_FIELD(&vcpu, gdt_frames[i], mfn_to_pfn(ctx, mfn),
-                  ctx->x86.pv.width);
-    }
-
-    /* Convert CR3 to a pfn. */
-    mfn = cr3_to_mfn(ctx, GET_FIELD(&vcpu, ctrlreg[3], ctx->x86.pv.width));
-    if ( !mfn_in_pseudophysmap(ctx, mfn) )
-    {
-        ERROR("Bad mfn for vcpu%u's cr3", id);
-        dump_bad_pseudophysmap_entry(ctx, mfn);
-        errno = ERANGE;
-        goto err;
-    }
-    pfn = mfn_to_pfn(ctx, mfn);
-    SET_FIELD(&vcpu, ctrlreg[3], mfn_to_cr3(ctx, pfn), ctx->x86.pv.width);
-
-    /* 64bit guests: Convert CR1 (guest pagetables) to pfn. */
-    if ( ctx->x86.pv.levels == 4 && vcpu.x64.ctrlreg[1] )
-    {
-        mfn = vcpu.x64.ctrlreg[1] >> PAGE_SHIFT;
-        if ( !mfn_in_pseudophysmap(ctx, mfn) )
-        {
-            ERROR("Bad mfn for vcpu%u's cr1", id);
-            dump_bad_pseudophysmap_entry(ctx, mfn);
-            errno = ERANGE;
-            goto err;
-        }
-        pfn = mfn_to_pfn(ctx, mfn);
-        vcpu.x64.ctrlreg[1] = 1 | ((uint64_t)pfn << PAGE_SHIFT);
-    }
-
-    if ( ctx->x86.pv.width == 8 )
-        rc = write_split_record(ctx, &rec, &vcpu, sizeof(vcpu.x64));
-    else
-        rc = write_split_record(ctx, &rec, &vcpu, sizeof(vcpu.x32));
-
- err:
-    return rc;
-}
-
-/*
- * Obtain a specific vcpus extended state and write an X86_PV_VCPU_EXTENDED
- * record into the stream.
- */
-static int write_one_vcpu_extended(struct xc_sr_context *ctx, uint32_t id)
-{
-    xc_interface *xch = ctx->xch;
-    struct xc_sr_rec_x86_pv_vcpu_hdr vhdr = {
-        .vcpu_id = id,
-    };
-    struct xc_sr_record rec = {
-        .type = REC_TYPE_X86_PV_VCPU_EXTENDED,
-        .length = sizeof(vhdr),
-        .data = &vhdr,
-    };
-    struct xen_domctl domctl = {
-        .cmd = XEN_DOMCTL_get_ext_vcpucontext,
-        .domain = ctx->domid,
-        .u.ext_vcpucontext.vcpu = id,
-    };
-
-    if ( xc_domctl(xch, &domctl) < 0 )
-    {
-        PERROR("Unable to get vcpu%u extended context", id);
-        return -1;
-    }
-
-    /* No content? Skip the record. */
-    if ( domctl.u.ext_vcpucontext.size == 0 )
-        return 0;
-
-    return write_split_record(ctx, &rec, &domctl.u.ext_vcpucontext,
-                              domctl.u.ext_vcpucontext.size);
-}
-
-/*
- * Query to see whether a specific vcpu has xsave state and if so, write an
- * X86_PV_VCPU_XSAVE record into the stream.
- */
-static int write_one_vcpu_xsave(struct xc_sr_context *ctx, uint32_t id)
-{
-    xc_interface *xch = ctx->xch;
-    int rc = -1;
-    DECLARE_HYPERCALL_BUFFER(void, buffer);
-    struct xc_sr_rec_x86_pv_vcpu_hdr vhdr = {
-        .vcpu_id = id,
-    };
-    struct xc_sr_record rec = {
-        .type = REC_TYPE_X86_PV_VCPU_XSAVE,
-        .length = sizeof(vhdr),
-        .data = &vhdr,
-    };
-    struct xen_domctl domctl = {
-        .cmd = XEN_DOMCTL_getvcpuextstate,
-        .domain = ctx->domid,
-        .u.vcpuextstate.vcpu = id,
-    };
-
-    if ( xc_domctl(xch, &domctl) < 0 )
-    {
-        PERROR("Unable to get vcpu%u's xsave context", id);
-        goto err;
-    }
-
-    /* No xsave state? skip this record. */
-    if ( !domctl.u.vcpuextstate.xfeature_mask )
-        goto out;
-
-    buffer = xc_hypercall_buffer_alloc(xch, buffer, domctl.u.vcpuextstate.size);
-    if ( !buffer )
-    {
-        ERROR("Unable to allocate %"PRIx64" bytes for vcpu%u's xsave context",
-              domctl.u.vcpuextstate.size, id);
-        goto err;
-    }
-
-    set_xen_guest_handle(domctl.u.vcpuextstate.buffer, buffer);
-    if ( xc_domctl(xch, &domctl) < 0 )
-    {
-        PERROR("Unable to get vcpu%u's xsave context", id);
-        goto err;
-    }
-
-    /* No xsave state? Skip this record. */
-    if ( domctl.u.vcpuextstate.size == 0 )
-        goto out;
-
-    rc = write_split_record(ctx, &rec, buffer, domctl.u.vcpuextstate.size);
-    if ( rc )
-        goto err;
-
- out:
-    rc = 0;
-
- err:
-    xc_hypercall_buffer_free(xch, buffer);
-
-    return rc;
-}
-
-/*
- * Query to see whether a specific vcpu has msr state and if so, write an
- * X86_PV_VCPU_MSRS record into the stream.
- */
-static int write_one_vcpu_msrs(struct xc_sr_context *ctx, uint32_t id)
-{
-    xc_interface *xch = ctx->xch;
-    int rc = -1;
-    size_t buffersz;
-    DECLARE_HYPERCALL_BUFFER(void, buffer);
-    struct xc_sr_rec_x86_pv_vcpu_hdr vhdr = {
-        .vcpu_id = id,
-    };
-    struct xc_sr_record rec = {
-        .type = REC_TYPE_X86_PV_VCPU_MSRS,
-        .length = sizeof(vhdr),
-        .data = &vhdr,
-    };
-    struct xen_domctl domctl = {
-        .cmd = XEN_DOMCTL_get_vcpu_msrs,
-        .domain = ctx->domid,
-        .u.vcpu_msrs.vcpu = id,
-    };
-
-    if ( xc_domctl(xch, &domctl) < 0 )
-    {
-        PERROR("Unable to get vcpu%u's msrs", id);
-        goto err;
-    }
-
-    /* No MSRs? skip this record. */
-    if ( !domctl.u.vcpu_msrs.msr_count )
-        goto out;
-
-    buffersz = domctl.u.vcpu_msrs.msr_count * sizeof(xen_domctl_vcpu_msr_t);
-    buffer = xc_hypercall_buffer_alloc(xch, buffer, buffersz);
-    if ( !buffer )
-    {
-        ERROR("Unable to allocate %zu bytes for vcpu%u's msrs",
-              buffersz, id);
-        goto err;
-    }
-
-    set_xen_guest_handle(domctl.u.vcpu_msrs.msrs, buffer);
-    if ( xc_domctl(xch, &domctl) < 0 )
-    {
-        PERROR("Unable to get vcpu%u's msrs", id);
-        goto err;
-    }
-
-    /* No MSRs? Skip this record. */
-    if ( domctl.u.vcpu_msrs.msr_count == 0 )
-        goto out;
-
-    rc = write_split_record(ctx, &rec, buffer,
-                            domctl.u.vcpu_msrs.msr_count *
-                            sizeof(xen_domctl_vcpu_msr_t));
-    if ( rc )
-        goto err;
-
- out:
-    rc = 0;
-
- err:
-    xc_hypercall_buffer_free(xch, buffer);
-
-    return rc;
-}
-
-/*
- * For each vcpu, if it is online, write its state into the stream.
- */
-static int write_all_vcpu_information(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    xc_vcpuinfo_t vinfo;
-    unsigned int i;
-    int rc;
-
-    for ( i = 0; i <= ctx->dominfo.max_vcpu_id; ++i )
-    {
-        rc = xc_vcpu_getinfo(xch, ctx->domid, i, &vinfo);
-        if ( rc )
-        {
-            PERROR("Failed to get vcpu%u information", i);
-            return rc;
-        }
-
-        /* Vcpu offline? skip all these records. */
-        if ( !vinfo.online )
-            continue;
-
-        rc = write_one_vcpu_basic(ctx, i);
-        if ( rc )
-            return rc;
-
-        rc = write_one_vcpu_extended(ctx, i);
-        if ( rc )
-            return rc;
-
-        rc = write_one_vcpu_xsave(ctx, i);
-        if ( rc )
-            return rc;
-
-        rc = write_one_vcpu_msrs(ctx, i);
-        if ( rc )
-            return rc;
-    }
-
-    return 0;
-}
-
-/*
- * Writes an X86_PV_INFO record into the stream.
- */
-static int write_x86_pv_info(struct xc_sr_context *ctx)
-{
-    struct xc_sr_rec_x86_pv_info info = {
-        .guest_width = ctx->x86.pv.width,
-        .pt_levels = ctx->x86.pv.levels,
-    };
-    struct xc_sr_record rec = {
-        .type = REC_TYPE_X86_PV_INFO,
-        .length = sizeof(info),
-        .data = &info,
-    };
-
-    return write_record(ctx, &rec);
-}
-
-/*
- * Writes an X86_PV_P2M_FRAMES record into the stream.  This contains the list
- * of pfns making up the p2m table.
- */
-static int write_x86_pv_p2m_frames(struct xc_sr_context *ctx)
-{
-    xc_interface *xch = ctx->xch;
-    int rc; unsigned int i;
-    size_t datasz = ctx->x86.pv.p2m_frames * sizeof(uint64_t);
-    uint64_t *data = NULL;
-    struct xc_sr_rec_x86_pv_p2m_frames hdr = {
-        .end_pfn = ctx->x86.pv.max_pfn,
-    };
-    struct xc_sr_record rec = {
-        .type = REC_TYPE_X86_PV_P2M_FRAMES,
-        .length = sizeof(hdr),
-        .data = &hdr,
-    };
-
-    /* No need to translate if sizeof(uint64_t) == sizeof(xen_pfn_t). */
-    if ( sizeof(uint64_t) != sizeof(*ctx->x86.pv.p2m_pfns) )
-    {
-        if ( !(data = malloc(datasz)) )
-        {
-            ERROR("Cannot allocate %zu bytes for X86_PV_P2M_FRAMES data",
-                  datasz);
-            return -1;
-        }
-
-        for ( i = 0; i < ctx->x86.pv.p2m_frames; ++i )
-            data[i] = ctx->x86.pv.p2m_pfns[i];
-    }
-    else
-        data = (uint64_t *)ctx->x86.pv.p2m_pfns;
-
-    rc = write_split_record(ctx, &rec, data, datasz);
-
-    if ( data != (uint64_t *)ctx->x86.pv.p2m_pfns )
-        free(data);
-
-    return rc;
-}
-
-/*
- * Writes an SHARED_INFO record into the stream.
- */
-static int write_shared_info(struct xc_sr_context *ctx)
-{
-    struct xc_sr_record rec = {
-        .type = REC_TYPE_SHARED_INFO,
-        .length = PAGE_SIZE,
-        .data = ctx->x86.pv.shinfo,
-    };
-
-    return write_record(ctx, &rec);
-}
-
-/*
- * Normalise a pagetable for the migration stream.  Performs mfn->pfn
- * conversions on the ptes.
- */
-static int normalise_pagetable(struct xc_sr_context *ctx, const uint64_t *src,
-                               uint64_t *dst, unsigned long type)
-{
-    xc_interface *xch = ctx->xch;
-    uint64_t pte;
-    unsigned int i, xen_first = -1, xen_last = -1; /* Indices of Xen mappings. */
-
-    type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
-
-    if ( ctx->x86.pv.levels == 4 )
-    {
-        /* 64bit guests only have Xen mappings in their L4 tables. */
-        if ( type == XEN_DOMCTL_PFINFO_L4TAB )
-        {
-            xen_first = (HYPERVISOR_VIRT_START_X86_64 >>
-                         L4_PAGETABLE_SHIFT_X86_64) & 511;
-            xen_last = (HYPERVISOR_VIRT_END_X86_64 >>
-                        L4_PAGETABLE_SHIFT_X86_64) & 511;
-        }
-    }
-    else
-    {
-        switch ( type )
-        {
-        case XEN_DOMCTL_PFINFO_L4TAB:
-            ERROR("??? Found L4 table for 32bit guest");
-            errno = EINVAL;
-            return -1;
-
-        case XEN_DOMCTL_PFINFO_L3TAB:
-            /* 32bit guests can only use the first 4 entries of their L3 tables.
-             * All other are potentially used by Xen. */
-            xen_first = 4;
-            xen_last = 511;
-            break;
-
-        case XEN_DOMCTL_PFINFO_L2TAB:
-            /* It is hard to spot Xen mappings in a 32bit guest's L2.  Most
-             * are normal but only a few will have Xen mappings.
-             */
-            i = (HYPERVISOR_VIRT_START_X86_32 >> L2_PAGETABLE_SHIFT_PAE) & 511;
-            if ( pte_to_frame(src[i]) == ctx->x86.pv.compat_m2p_mfn0 )
-            {
-                xen_first = i;
-                xen_last = (HYPERVISOR_VIRT_END_X86_32 >>
-                            L2_PAGETABLE_SHIFT_PAE) & 511;
-            }
-            break;
-        }
-    }
-
-    for ( i = 0; i < (PAGE_SIZE / sizeof(uint64_t)); ++i )
-    {
-        xen_pfn_t mfn;
-
-        pte = src[i];
-
-        /* Remove Xen mappings: Xen will reconstruct on the other side. */
-        if ( i >= xen_first && i <= xen_last )
-            pte = 0;
-
-        /*
-         * Errors during the live part of migration are expected as a result
-         * of split pagetable updates, page type changes, active grant
-         * mappings etc.  The pagetable will need to be resent after pausing.
-         * In such cases we fail with EAGAIN.
-         *
-         * For domains which are already paused, errors are fatal.
-         */
-        if ( pte & _PAGE_PRESENT )
-        {
-            mfn = pte_to_frame(pte);
-
-#ifdef __i386__
-            if ( mfn == INVALID_MFN )
-            {
-                if ( !ctx->dominfo.paused )
-                    errno = EAGAIN;
-                else
-                {
-                    ERROR("PTE truncation detected.  L%lu[%u] = %016"PRIx64,
-                          type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
-                    errno = E2BIG;
-                }
-                return -1;
-            }
-#endif
-
-            if ( (type > XEN_DOMCTL_PFINFO_L1TAB) && (pte & _PAGE_PSE) )
-            {
-                ERROR("Cannot migrate superpage (L%lu[%u]: 0x%016"PRIx64")",
-                      type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i, pte);
-                errno = E2BIG;
-                return -1;
-            }
-
-            if ( !mfn_in_pseudophysmap(ctx, mfn) )
-            {
-                if ( !ctx->dominfo.paused )
-                    errno = EAGAIN;
-                else
-                {
-                    ERROR("Bad mfn for L%lu[%u]",
-                          type >> XEN_DOMCTL_PFINFO_LTAB_SHIFT, i);
-                    dump_bad_pseudophysmap_entry(ctx, mfn);
-                    errno = ERANGE;
-                }
-                return -1;
-            }
-
-            pte = merge_pte(pte, mfn_to_pfn(ctx, mfn));
-        }
-
-        dst[i] = pte;
-    }
-
-    return 0;
-}
-
-static xen_pfn_t x86_pv_pfn_to_gfn(const struct xc_sr_context *ctx,
-                                   xen_pfn_t pfn)
-{
-    assert(pfn <= ctx->x86.pv.max_pfn);
-
-    return xc_pfn_to_mfn(pfn, ctx->x86.pv.p2m, ctx->x86.pv.width);
-}
-
-
-/*
- * save_ops function.  Performs pagetable normalisation on appropriate pages.
- */
-static int x86_pv_normalise_page(struct xc_sr_context *ctx, xen_pfn_t type,
-                                 void **page)
-{
-    xc_interface *xch = ctx->xch;
-    void *local_page;
-    int rc;
-
-    type &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
-
-    if ( type < XEN_DOMCTL_PFINFO_L1TAB || type > XEN_DOMCTL_PFINFO_L4TAB )
-        return 0;
-
-    local_page = malloc(PAGE_SIZE);
-    if ( !local_page )
-    {
-        ERROR("Unable to allocate scratch page");
-        rc = -1;
-        goto out;
-    }
-
-    rc = normalise_pagetable(ctx, *page, local_page, type);
-    *page = local_page;
-
- out:
-    return rc;
-}
-
-/*
- * save_ops function.  Queries domain information and maps the Xen m2p and the
- * guests shinfo and p2m table.
- */
-static int x86_pv_setup(struct xc_sr_context *ctx)
-{
-    int rc;
-
-    rc = x86_pv_domain_info(ctx);
-    if ( rc )
-        return rc;
-
-    rc = x86_pv_map_m2p(ctx);
-    if ( rc )
-        return rc;
-
-    rc = map_shinfo(ctx);
-    if ( rc )
-        return rc;
-
-    rc = map_p2m(ctx);
-    if ( rc )
-        return rc;
-
-    return 0;
-}
-
-static int x86_pv_static_data(struct xc_sr_context *ctx)
-{
-    int rc;
-
-    rc = write_x86_pv_info(ctx);
-    if ( rc )
-        return rc;
-
-    rc = write_x86_cpu_policy_records(ctx);
-    if ( rc )
-        return rc;
-
-    return 0;
-}
-
-static int x86_pv_start_of_stream(struct xc_sr_context *ctx)
-{
-    int rc;
-
-    /*
-     * Ideally should be able to change during migration.  Currently
-     * corruption will occur if the contents or location of the P2M changes
-     * during the live migration loop.  If one is very lucky, the breakage
-     * will not be subtle.
-     */
-    rc = write_x86_pv_p2m_frames(ctx);
-    if ( rc )
-        return rc;
-
-    return 0;
-}
-
-static int x86_pv_start_of_checkpoint(struct xc_sr_context *ctx)
-{
-    return 0;
-}
-
-static int x86_pv_end_of_checkpoint(struct xc_sr_context *ctx)
-{
-    int rc;
-
-    rc = write_x86_tsc_info(ctx);
-    if ( rc )
-        return rc;
-
-    rc = write_shared_info(ctx);
-    if ( rc )
-        return rc;
-
-    rc = write_all_vcpu_information(ctx);
-    if ( rc )
-        return rc;
-
-    return 0;
-}
-
-static int x86_pv_check_vm_state(struct xc_sr_context *ctx)
-{
-    if ( ctx->x86.pv.p2m_generation == ~0ULL )
-        return 0;
-
-    return x86_pv_check_vm_state_p2m_list(ctx);
-}
-
-static int x86_pv_cleanup(struct xc_sr_context *ctx)
-{
-    free(ctx->x86.pv.p2m_pfns);
-
-    if ( ctx->x86.pv.p2m )
-        munmap(ctx->x86.pv.p2m, ctx->x86.pv.p2m_frames * PAGE_SIZE);
-
-    if ( ctx->x86.pv.shinfo )
-        munmap(ctx->x86.pv.shinfo, PAGE_SIZE);
-
-    if ( ctx->x86.pv.m2p )
-        munmap(ctx->x86.pv.m2p, ctx->x86.pv.nr_m2p_frames * PAGE_SIZE);
-
-    return 0;
-}
-
-struct xc_sr_save_ops save_ops_x86_pv =
-{
-    .pfn_to_gfn          = x86_pv_pfn_to_gfn,
-    .normalise_page      = x86_pv_normalise_page,
-    .setup               = x86_pv_setup,
-    .static_data         = x86_pv_static_data,
-    .start_of_stream     = x86_pv_start_of_stream,
-    .start_of_checkpoint = x86_pv_start_of_checkpoint,
-    .end_of_checkpoint   = x86_pv_end_of_checkpoint,
-    .check_vm_state      = x86_pv_check_vm_state,
-    .cleanup             = x86_pv_cleanup,
-};
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_sr_stream_format.h b/tools/libxc/xg_sr_stream_format.h
deleted file mode 100644 (file)
index 8a0da26..0000000
+++ /dev/null
@@ -1,150 +0,0 @@
-#ifndef __STREAM_FORMAT__H
-#define __STREAM_FORMAT__H
-
-/*
- * C structures for the Migration v2 stream format.
- * See docs/specs/libxc-migration-stream.pandoc
- */
-
-#include <inttypes.h>
-
-/*
- * Image Header
- */
-struct xc_sr_ihdr
-{
-    uint64_t marker;
-    uint32_t id;
-    uint32_t version;
-    uint16_t options;
-    uint16_t _res1;
-    uint32_t _res2;
-};
-
-#define IHDR_MARKER  0xffffffffffffffffULL
-#define IHDR_ID      0x58454E46U
-
-#define _IHDR_OPT_ENDIAN 0
-#define IHDR_OPT_LITTLE_ENDIAN (0 << _IHDR_OPT_ENDIAN)
-#define IHDR_OPT_BIG_ENDIAN    (1 << _IHDR_OPT_ENDIAN)
-
-/*
- * Domain Header
- */
-struct xc_sr_dhdr
-{
-    uint32_t type;
-    uint16_t page_shift;
-    uint16_t _res1;
-    uint32_t xen_major;
-    uint32_t xen_minor;
-};
-
-#define DHDR_TYPE_X86_PV  0x00000001U
-#define DHDR_TYPE_X86_HVM 0x00000002U
-
-/*
- * Record Header
- */
-struct xc_sr_rhdr
-{
-    uint32_t type;
-    uint32_t length;
-};
-
-/* All records must be aligned up to an 8 octet boundary */
-#define REC_ALIGN_ORDER               (3U)
-/* Somewhat arbitrary - 128MB */
-#define REC_LENGTH_MAX                (128U << 20)
-
-#define REC_TYPE_END                        0x00000000U
-#define REC_TYPE_PAGE_DATA                  0x00000001U
-#define REC_TYPE_X86_PV_INFO                0x00000002U
-#define REC_TYPE_X86_PV_P2M_FRAMES          0x00000003U
-#define REC_TYPE_X86_PV_VCPU_BASIC          0x00000004U
-#define REC_TYPE_X86_PV_VCPU_EXTENDED       0x00000005U
-#define REC_TYPE_X86_PV_VCPU_XSAVE          0x00000006U
-#define REC_TYPE_SHARED_INFO                0x00000007U
-#define REC_TYPE_X86_TSC_INFO               0x00000008U
-#define REC_TYPE_HVM_CONTEXT                0x00000009U
-#define REC_TYPE_HVM_PARAMS                 0x0000000aU
-#define REC_TYPE_TOOLSTACK                  0x0000000bU
-#define REC_TYPE_X86_PV_VCPU_MSRS           0x0000000cU
-#define REC_TYPE_VERIFY                     0x0000000dU
-#define REC_TYPE_CHECKPOINT                 0x0000000eU
-#define REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST  0x0000000fU
-#define REC_TYPE_STATIC_DATA_END            0x00000010U
-#define REC_TYPE_X86_CPUID_POLICY           0x00000011U
-#define REC_TYPE_X86_MSR_POLICY             0x00000012U
-
-#define REC_TYPE_OPTIONAL             0x80000000U
-
-/* PAGE_DATA */
-struct xc_sr_rec_page_data_header
-{
-    uint32_t count;
-    uint32_t _res1;
-    uint64_t pfn[0];
-};
-
-#define PAGE_DATA_PFN_MASK  0x000fffffffffffffULL
-#define PAGE_DATA_TYPE_MASK 0xf000000000000000ULL
-
-/* X86_PV_INFO */
-struct xc_sr_rec_x86_pv_info
-{
-    uint8_t guest_width;
-    uint8_t pt_levels;
-    uint8_t _res[6];
-};
-
-/* X86_PV_P2M_FRAMES */
-struct xc_sr_rec_x86_pv_p2m_frames
-{
-    uint32_t start_pfn;
-    uint32_t end_pfn;
-    uint64_t p2m_pfns[0];
-};
-
-/* X86_PV_VCPU_{BASIC,EXTENDED,XSAVE,MSRS} */
-struct xc_sr_rec_x86_pv_vcpu_hdr
-{
-    uint32_t vcpu_id;
-    uint32_t _res1;
-    uint8_t context[0];
-};
-
-/* X86_TSC_INFO */
-struct xc_sr_rec_x86_tsc_info
-{
-    uint32_t mode;
-    uint32_t khz;
-    uint64_t nsec;
-    uint32_t incarnation;
-    uint32_t _res1;
-};
-
-/* HVM_PARAMS */
-struct xc_sr_rec_hvm_params_entry
-{
-    uint64_t index;
-    uint64_t value;
-};
-
-struct xc_sr_rec_hvm_params
-{
-    uint32_t count;
-    uint32_t _res1;
-    struct xc_sr_rec_hvm_params_entry param[0];
-};
-
-#endif
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/tools/libxc/xg_suspend.c b/tools/libxc/xg_suspend.c
deleted file mode 100644 (file)
index 0ce6364..0000000
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <unistd.h>
-#include <fcntl.h>
-
-#include <xenevtchn.h>
-
-#include "xc_private.h"
-#include "xenguest.h"
-
-#define SUSPEND_LOCK_FILE    XEN_RUN_DIR "/suspend-evtchn-%d.lock"
-
-/*
- * locking
- */
-
-#define ERR(x) do{                                                      \
-    ERROR("Can't " #x " lock file for suspend event channel %s: %s\n",  \
-          suspend_file, strerror(errno));                               \
-    goto err;                                                           \
-}while(0)
-
-#define SUSPEND_FILE_BUFLEN (sizeof(SUSPEND_LOCK_FILE) + 10)
-
-static void get_suspend_file(char buf[], uint32_t domid)
-{
-    snprintf(buf, SUSPEND_FILE_BUFLEN, SUSPEND_LOCK_FILE, domid);
-}
-
-static int lock_suspend_event(xc_interface *xch, uint32_t domid, int *lockfd)
-{
-    int fd = -1, r;
-    char suspend_file[SUSPEND_FILE_BUFLEN];
-    struct stat ours, theirs;
-    struct flock fl;
-
-    get_suspend_file(suspend_file, domid);
-
-    *lockfd = -1;
-
-    for (;;) {
-        if (fd >= 0)
-            close (fd);
-
-        fd = open(suspend_file, O_CREAT | O_RDWR, 0600);
-        if (fd < 0)
-            ERR("create");
-
-        r = fcntl(fd, F_SETFD, FD_CLOEXEC);
-        if (r)
-            ERR("fcntl F_SETFD FD_CLOEXEC");
-
-        memset(&fl, 0, sizeof(fl));
-        fl.l_type = F_WRLCK;
-        fl.l_whence = SEEK_SET;
-        fl.l_len = 1;
-        r = fcntl(fd, F_SETLK, &fl);
-        if (r)
-            ERR("fcntl F_SETLK");
-
-        r = fstat(fd, &ours);
-        if (r)
-            ERR("fstat");
-
-        r = stat(suspend_file, &theirs);
-        if (r) {
-            if (errno == ENOENT)
-                /* try again */
-                continue;
-            ERR("stat");
-        }
-
-        if (ours.st_ino != theirs.st_ino)
-            /* someone else must have removed it while we were locking it */
-            continue;
-
-        break;
-    }
-
-    *lockfd = fd;
-    return 0;
-
- err:
-    if (fd >= 0)
-        close(fd);
-
-    return -1;
-}
-
-static int unlock_suspend_event(xc_interface *xch, uint32_t domid, int *lockfd)
-{
-    int r;
-    char suspend_file[SUSPEND_FILE_BUFLEN];
-
-    if (*lockfd < 0)
-        return 0;
-
-    get_suspend_file(suspend_file, domid);
-
-    r = unlink(suspend_file);
-    if (r)
-        ERR("unlink");
-
-    r = close(*lockfd);
-    *lockfd = -1;
-    if (r)
-        ERR("close");
-
- err:
-    if (*lockfd >= 0)
-        close(*lockfd);
-
-    return -1;
-}
-
-int xc_await_suspend(xc_interface *xch, xenevtchn_handle *xce, int suspend_evtchn)
-{
-    int rc;
-
-    do {
-        rc = xenevtchn_pending(xce);
-        if (rc < 0) {
-            ERROR("error polling suspend notification channel: %d", rc);
-            return -1;
-        }
-    } while (rc != suspend_evtchn);
-
-    /* harmless for one-off suspend */
-    if (xenevtchn_unmask(xce, suspend_evtchn) < 0)
-        ERROR("failed to unmask suspend notification channel: %d", rc);
-
-    return 0;
-}
-
-/* Internal callers are allowed to call this with suspend_evtchn<0
- * but *lockfd>0. */
-int xc_suspend_evtchn_release(xc_interface *xch, xenevtchn_handle *xce,
-                              uint32_t domid, int suspend_evtchn, int *lockfd)
-{
-    if (suspend_evtchn >= 0)
-        xenevtchn_unbind(xce, suspend_evtchn);
-
-    return unlock_suspend_event(xch, domid, lockfd);
-}
-
-int xc_suspend_evtchn_init_sane(xc_interface *xch, xenevtchn_handle *xce,
-                                uint32_t domid, int port, int *lockfd)
-{
-    int rc, suspend_evtchn = -1;
-
-    if (lock_suspend_event(xch, domid, lockfd)) {
-        errno = EINVAL;
-        goto cleanup;
-    }
-
-    suspend_evtchn = xenevtchn_bind_interdomain(xce, domid, port);
-    if (suspend_evtchn < 0) {
-        ERROR("failed to bind suspend event channel: %d", suspend_evtchn);
-        goto cleanup;
-    }
-
-    rc = xc_domain_subscribe_for_suspend(xch, domid, port);
-    if (rc < 0) {
-        ERROR("failed to subscribe to domain: %d", rc);
-        goto cleanup;
-    }
-
-    return suspend_evtchn;
-
-cleanup:
-    xc_suspend_evtchn_release(xch, xce, domid, suspend_evtchn, lockfd);
-
-    return -1;
-}
-
-int xc_suspend_evtchn_init_exclusive(xc_interface *xch, xenevtchn_handle *xce,
-                                     uint32_t domid, int port, int *lockfd)
-{
-    int suspend_evtchn;
-
-    suspend_evtchn = xc_suspend_evtchn_init_sane(xch, xce, domid, port, lockfd);
-    if (suspend_evtchn < 0)
-        return suspend_evtchn;
-
-    /* event channel is pending immediately after binding */
-    xc_await_suspend(xch, xce, suspend_evtchn);
-
-    return suspend_evtchn;
-}