From 15d6ef21fb500842690584405d5f3986117bf4b8 Mon Sep 17 00:00:00 2001 From: Jean Guyader Date: Wed, 13 May 2009 13:40:02 +0100 Subject: [PATCH] Import xen-3.4-testing, 19599:7d552e56d105. --- Config.mk | 16 +- Makefile | 8 +- buildconfigs/mk.linux-2.6-pvops | 2 +- config/StdGNU.mk | 3 +- extras/mini-os/blkfront.c | 16 +- extras/mini-os/include/blkfront.h | 2 + extras/mini-os/lib/stack_chk_fail.c | 8 + extras/mini-os/lib/sys.c | 16 +- extras/mini-os/main.c | 2 +- extras/mini-os/netfront.c | 11 +- stubdom/Makefile | 1 + stubdom/README | 73 +- stubdom/newlib-chk.patch | 155 ++++ stubdom/stubdom-dm | 82 +- tools/Rules.mk | 2 + tools/console/daemon/io.c | 16 +- tools/examples/Makefile | 1 - tools/examples/xmexample.hvm-stubdom | 16 +- tools/firmware/hvmloader/acpi/dsdt.asl | 227 ++--- tools/firmware/hvmloader/acpi/dsdt.c | 290 ++++--- tools/firmware/hvmloader/acpi/static_tables.c | 2 +- tools/firmware/hvmloader/hvmloader.c | 4 +- tools/firmware/rombios/32bitgateway.c | 4 +- tools/hotplug/Linux/network-bridge | 15 +- tools/hotplug/Linux/network-nat | 6 +- tools/hotplug/Linux/vif-common.sh | 15 +- tools/hotplug/Linux/xend.rules | 2 +- tools/libxc/xc_domain_restore.c | 2 +- tools/libxc/xc_pagetab.c | 2 +- tools/libxc/xc_pm.c | 63 ++ tools/libxc/xenctrl.h | 5 + tools/misc/Makefile | 2 +- tools/misc/sbdf2devicepath | 82 ++ tools/misc/sxp-pretty | 8 - tools/misc/xen-bugtool | 3 - tools/misc/xen-python-path | 31 +- tools/misc/xend | 8 - tools/misc/xenpm.c | 87 +- tools/misc/xm | 3 - tools/misc/xsview | 2 - tools/pygrub/Makefile | 12 +- tools/pygrub/src/pygrub | 10 +- tools/python/Makefile | 19 +- tools/python/README.XendConfig | 1 + tools/python/scripts/test_hvm_create.py | 1 - tools/python/scripts/test_vm_create.py | 1 - tools/python/scripts/xapi.py | 1 - tools/python/xen/util/acmpolicy.py | 2 +- tools/python/xen/util/auxbin.py | 6 +- tools/python/xen/util/blkif.py | 2 +- tools/python/xen/util/pci.py | 13 +- tools/python/xen/util/vscsi_util.py | 86 +- tools/python/xen/web/connection.py | 2 + tools/python/xen/xend/XendAPI.py | 17 +- tools/python/xen/xend/XendConfig.py | 28 +- tools/python/xen/xend/XendConstants.py | 6 +- tools/python/xen/xend/XendDomain.py | 1 + tools/python/xen/xend/XendDomainInfo.py | 278 +++--- tools/python/xen/xend/XendNode.py | 77 +- tools/python/xen/xend/image.py | 93 +- tools/python/xen/xend/server/XMLRPCServer.py | 3 +- tools/python/xen/xend/server/pciif.py | 43 +- tools/python/xen/xend/server/udevevent.py | 22 +- tools/python/xen/xend/server/vfbif.py | 2 +- tools/python/xen/xm/create.dtd | 2 +- tools/python/xen/xm/create.py | 9 +- tools/python/xen/xm/main.py | 84 +- tools/python/xen/xm/xenapi_create.py | 19 +- tools/security/Makefile | 12 +- .../security/python/xensec_tools/acm_getlabel | 4 - tools/security/xensec_gen.py | 4 - tools/sv/index.psp | 1 - tools/vnet/scripts/vn | 3 - tools/xcutils/xc_save.c | 2 +- tools/xenpmd/Makefile | 3 + tools/xenstore/xs.h | 5 + tools/xentrace/formats | 6 +- tools/xentrace/xenctx.c | 68 +- xen/Makefile | 2 +- xen/arch/ia64/linux-xen/head.S | 29 + xen/arch/ia64/linux-xen/mm_contig.c | 16 +- xen/arch/ia64/linux-xen/smpboot.c | 2 +- xen/arch/ia64/xen/ivt.S | 2 +- xen/arch/ia64/xen/xen.lds.S | 12 +- xen/arch/ia64/xen/xensetup.c | 5 +- xen/arch/x86/Rules.mk | 2 +- xen/arch/x86/acpi/cpu_idle.c | 53 +- xen/arch/x86/acpi/cpufreq/cpufreq.c | 6 +- xen/arch/x86/acpi/suspend.c | 14 + xen/arch/x86/apic.c | 5 +- xen/arch/x86/cpu/intel.c | 3 + xen/arch/x86/cpu/mcheck/mce.c | 191 +++-- xen/arch/x86/cpu/mcheck/mce.h | 9 +- xen/arch/x86/cpu/mcheck/mce_intel.c | 794 ++++++++---------- xen/arch/x86/cpu/mcheck/mctelem.c | 114 ++- xen/arch/x86/cpu/mcheck/mctelem.h | 4 + xen/arch/x86/cpu/mcheck/non-fatal.c | 8 +- xen/arch/x86/cpu/mcheck/x86_mca.h | 2 +- xen/arch/x86/domain.c | 15 +- xen/arch/x86/domctl.c | 19 +- xen/arch/x86/e820.c | 132 ++- xen/arch/x86/hpet.c | 93 +- xen/arch/x86/hvm/emulate.c | 11 +- xen/arch/x86/hvm/hvm.c | 41 +- xen/arch/x86/hvm/intercept.c | 10 +- xen/arch/x86/hvm/irq.c | 12 +- xen/arch/x86/hvm/pmtimer.c | 22 +- xen/arch/x86/hvm/svm/svm.c | 11 +- xen/arch/x86/hvm/vmx/vmcs.c | 12 +- xen/arch/x86/hvm/vmx/vmx.c | 32 +- xen/arch/x86/ioport_emulate.c | 8 + xen/arch/x86/irq.c | 34 +- xen/arch/x86/mm/hap/p2m-ept.c | 6 +- xen/arch/x86/mm/paging.c | 8 +- xen/arch/x86/mm/shadow/multi.c | 3 + xen/arch/x86/numa.c | 6 +- xen/arch/x86/setup.c | 39 +- xen/arch/x86/tboot.c | 7 +- xen/arch/x86/time.c | 126 ++- xen/arch/x86/traps.c | 60 +- xen/common/domctl.c | 35 +- xen/common/page_alloc.c | 3 +- xen/common/sched_credit.c | 62 +- xen/common/schedule.c | 19 +- xen/common/spinlock.c | 21 +- xen/common/sysctl.c | 8 + xen/drivers/acpi/pmstat.c | 25 + xen/drivers/char/console.c | 61 +- xen/drivers/cpufreq/cpufreq.c | 125 +-- xen/drivers/cpufreq/cpufreq_ondemand.c | 44 +- xen/drivers/passthrough/amd/iommu_intr.c | 34 + xen/drivers/passthrough/io.c | 21 +- xen/drivers/passthrough/vtd/iommu.c | 52 +- xen/drivers/passthrough/vtd/qinval.c | 5 +- xen/include/acpi/cpufreq/cpufreq.h | 4 + .../asm-ia64/linux-xen/asm/README.origin | 1 + xen/include/asm-ia64/linux-xen/asm/sections.h | 28 + xen/include/asm-ia64/linux-xen/asm/spinlock.h | 105 --- xen/include/asm-ia64/linux/asm/README.origin | 1 - xen/include/asm-ia64/mm.h | 1 + xen/include/asm-x86/atomic.h | 46 +- xen/include/asm-x86/bug.h | 24 + xen/include/asm-x86/config.h | 1 + xen/include/asm-x86/cpufeature.h | 1 + xen/include/asm-x86/domain.h | 1 + xen/include/asm-x86/hpet.h | 1 + xen/include/asm-x86/hvm/vcpu.h | 2 + xen/include/asm-x86/hvm/vmx/vmx.h | 1 + xen/include/asm-x86/msr-index.h | 10 +- xen/include/asm-x86/perfc_defn.h | 2 +- xen/include/asm-x86/spinlock.h | 13 - xen/include/asm-x86/traps.h | 5 + xen/include/asm-x86/x86_32/bug.h | 29 +- xen/include/asm-x86/x86_32/page.h | 19 +- xen/include/asm-x86/x86_64/bug.h | 31 +- xen/include/asm-x86/x86_64/page.h | 8 +- xen/include/public/arch-x86/xen-mca.h | 63 +- xen/include/public/arch-x86/xen.h | 4 - xen/include/public/domctl.h | 1 + xen/include/public/sysctl.h | 12 + xen/include/public/trace.h | 10 +- xen/include/xen/acpi.h | 4 +- xen/include/xen/console.h | 3 +- xen/include/xen/domain.h | 3 + xen/include/xen/hypercall.h | 1 - xen/include/xen/iommu.h | 2 +- xen/include/xen/lib.h | 4 +- xen/include/xen/sched-if.h | 3 + xen/include/xen/sched.h | 5 + xen/include/xlat.lst | 16 + xen/include/xsm/xsm.h | 12 + xen/tools/get-fields.sh | 2 +- xen/xsm/dummy.c | 12 + 173 files changed, 3325 insertions(+), 1902 deletions(-) create mode 100644 extras/mini-os/lib/stack_chk_fail.c create mode 100644 stubdom/newlib-chk.patch create mode 100644 tools/misc/sbdf2devicepath create mode 100644 xen/include/asm-ia64/linux-xen/asm/sections.h diff --git a/Config.mk b/Config.mk index abdb1ab..214b592 100644 --- a/Config.mk +++ b/Config.mk @@ -1,7 +1,7 @@ # -*- mode: Makefile; -*- -# A debug build of Xen and tools? TEMPORARILY ENABLED -debug ?= y +# A debug build of Xen and tools? +debug ?= n XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \ -e s/i86pc/x86_32/ -e s/amd64/x86_64/) @@ -19,8 +19,6 @@ HOSTCFLAGS += -fno-strict-aliasing DISTDIR ?= $(XEN_ROOT)/dist DESTDIR ?= / -DOCDIR ?= /usr/share/doc/xen -MANDIR ?= /usr/share/man # Allow phony attribute to be listed as dependency rather than fake target .PHONY: .phony @@ -28,11 +26,21 @@ MANDIR ?= /usr/share/man include $(XEN_ROOT)/config/$(XEN_OS).mk include $(XEN_ROOT)/config/$(XEN_TARGET_ARCH).mk +SHAREDIR ?= $(PREFIX)/share +DOCDIR ?= $(SHAREDIR)/doc/xen +MANDIR ?= $(SHAREDIR)/man + ifneq ($(EXTRA_PREFIX),) EXTRA_INCLUDES += $(EXTRA_PREFIX)/include EXTRA_LIB += $(EXTRA_PREFIX)/$(LIBLEAFDIR) endif +PYTHON ?= python +PYTHON_PREFIX_ARG ?= --prefix="$(PREFIX)" +# The above requires that PREFIX contains *no spaces*. This variable is here +# to permit the user to set PYTHON_PREFIX_ARG to '' to workaround this bug: +# https://bugs.launchpad.net/ubuntu/+bug/362570 + # cc-option: Check if compiler supports first option, else fall back to second. # Usage: cflags-y += $(call cc-option,$(CC),-march=winchip-c6,-march=i586) cc-option = $(shell if test -z "`$(1) $(2) -S -o /dev/null -xc \ diff --git a/Makefile b/Makefile index 131ab3d..ab4602b 100644 --- a/Makefile +++ b/Makefile @@ -188,11 +188,7 @@ help: @echo ' clean-tboot - clean the tboot module if it exists' @echo @echo 'Environment:' - @echo ' XEN_PYTHON_NATIVE_INSTALL=y' - @echo ' - native python install or dist' - @echo ' install into prefix/lib/python' - @echo ' instead of /lib/python' - @echo ' true if set to non-empty value, false otherwise' + @echo ' [ this documentation is sadly not complete ]' # Use this target with extreme care! .PHONY: uninstall @@ -241,7 +237,7 @@ linux26: # tboot targets # -TBOOT_TARFILE = tboot-20080613.tar.gz +TBOOT_TARFILE = tboot-20090330.tar.gz #TBOOT_BASE_URL = http://downloads.sourceforge.net/tboot TBOOT_BASE_URL = $(XEN_EXTFILES_URL) diff --git a/buildconfigs/mk.linux-2.6-pvops b/buildconfigs/mk.linux-2.6-pvops index d3258f5..56e606f 100644 --- a/buildconfigs/mk.linux-2.6-pvops +++ b/buildconfigs/mk.linux-2.6-pvops @@ -7,7 +7,7 @@ XEN_LINUX_CONFIG_UPDATE := buildconfigs/enable-xen-config XEN_LINUX_GIT_URL ?= git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git XEN_LINUX_GIT_REMOTENAME ?= xen -XEN_LINUX_GIT_REMOTEBRANCH ?= xen/dom0/hackery +XEN_LINUX_GIT_REMOTEBRANCH ?= xen-tip/master EXTRAVERSION ?= diff --git a/config/StdGNU.mk b/config/StdGNU.mk index d3d69bf..812d389 100644 --- a/config/StdGNU.mk +++ b/config/StdGNU.mk @@ -31,7 +31,8 @@ LIBDIR = $(PREFIX)/$(LIBLEAFDIR) LIBDIR_x86_32 = $(PREFIX)/$(LIBLEAFDIR_x86_32) LIBDIR_x86_64 = $(PREFIX)/$(LIBLEAFDIR_x86_64) LIBEXEC = $(LIBDIR_x86_32)/xen/bin -MANDIR = $(PREFIX)/share/man +SHAREDIR = $(PREFIX)/share +MANDIR = $(SHAREDIR)/man MAN1DIR = $(MANDIR)/man1 MAN8DIR = $(MANDIR)/man8 SBINDIR = $(PREFIX)/sbin diff --git a/extras/mini-os/blkfront.c b/extras/mini-os/blkfront.c index d4b0ea5..d869715 100644 --- a/extras/mini-os/blkfront.c +++ b/extras/mini-os/blkfront.c @@ -244,10 +244,6 @@ void shutdown_blkfront(struct blkfront_dev *dev) xenbus_wait_for_value(path, "5", &dev->events); err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6); - xenbus_wait_for_value(path, "6", &dev->events); - - err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 1); - xenbus_wait_for_value(path, "2", &dev->events); xenbus_unwatch_path(XBT_NIL, path); @@ -316,20 +312,22 @@ void blkfront_aio(struct blkfront_aiocb *aiocbp, int write) req->id = (uintptr_t) aiocbp; req->sector_number = aiocbp->aio_offset / dev->info.sector_size; + for (j = 0; j < n; j++) { + req->seg[j].first_sect = 0; + req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1; + } + req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->info.sector_size; + req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size; for (j = 0; j < n; j++) { uintptr_t data = start + j * PAGE_SIZE; if (!write) { /* Trigger CoW if needed */ - *(char*)data = 0; + *(char*)(data + (req->seg[j].first_sect << 9)) = 0; barrier(); } aiocbp->gref[j] = req->seg[j].gref = gnttab_grant_access(dev->dom, virtual_to_mfn(data), write); - req->seg[j].first_sect = 0; - req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1; } - req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->info.sector_size; - req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size; dev->ring.req_prod_pvt = i + 1; diff --git a/extras/mini-os/include/blkfront.h b/extras/mini-os/include/blkfront.h index 8cbc090..724137e 100644 --- a/extras/mini-os/include/blkfront.h +++ b/extras/mini-os/include/blkfront.h @@ -8,6 +8,8 @@ struct blkfront_aiocb uint8_t *aio_buf; size_t aio_nbytes; off_t aio_offset; + size_t total_bytes; + uint8_t is_write; void *data; grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; diff --git a/extras/mini-os/lib/stack_chk_fail.c b/extras/mini-os/lib/stack_chk_fail.c new file mode 100644 index 0000000..ade0045 --- /dev/null +++ b/extras/mini-os/lib/stack_chk_fail.c @@ -0,0 +1,8 @@ +#include +#include + +void __stack_chk_fail(void) +{ + printk("stack smashing detected\n"); + do_exit(); +} diff --git a/extras/mini-os/lib/sys.c b/extras/mini-os/lib/sys.c index 7c5f05c..12395bf 100644 --- a/extras/mini-os/lib/sys.c +++ b/extras/mini-os/lib/sys.c @@ -677,7 +677,7 @@ static int select_poll(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exce { int i, n = 0; #ifdef HAVE_LWIP - int sock_n, sock_nfds = 0; + int sock_n = 0, sock_nfds = 0; fd_set sock_readfds, sock_writefds, sock_exceptfds; struct timeval timeout = { .tv_sec = 0, .tv_usec = 0}; #endif @@ -711,12 +711,14 @@ static int select_poll(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exce } } } - DEBUG("lwip_select("); - dump_set(nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); - DEBUG("); -> "); - sock_n = lwip_select(sock_nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); - dump_set(nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); - DEBUG("\n"); + if (sock_nfds > 0) { + DEBUG("lwip_select("); + dump_set(nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); + DEBUG("); -> "); + sock_n = lwip_select(sock_nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); + dump_set(nfds, &sock_readfds, &sock_writefds, &sock_exceptfds, &timeout); + DEBUG("\n"); + } #endif /* Then see others as well. */ diff --git a/extras/mini-os/main.c b/extras/mini-os/main.c index 3289c63..204cf85 100644 --- a/extras/mini-os/main.c +++ b/extras/mini-os/main.c @@ -62,7 +62,7 @@ static void call_main(void *p) #ifndef CONFIG_GRUB sparse((unsigned long) &__app_bss_start, &__app_bss_end - &__app_bss_start); -#ifdef HAVE_LWIP +#if defined(HAVE_LWIP) && !defined(CONFIG_QEMU) start_networking(); #endif init_fs_frontend(); diff --git a/extras/mini-os/netfront.c b/extras/mini-os/netfront.c index 824c42a..a235769 100644 --- a/extras/mini-os/netfront.c +++ b/extras/mini-os/netfront.c @@ -306,11 +306,16 @@ struct netfront_dev *init_netfront(char *_nodename, void (*thenetif_rx)(unsigned int retry=0; int i; char* msg; - char* nodename = _nodename ? _nodename : "device/vif/0"; - + char nodename[256]; + char path[256]; struct netfront_dev *dev; + static int netfrontends = 0; - char path[strlen(nodename) + 1 + 10 + 1]; + if (!_nodename) + snprintf(nodename, sizeof(nodename), "device/vif/%d", netfrontends); + else + strncpy(nodename, _nodename, strlen(nodename)); + netfrontends++; if (!thenetif_rx) thenetif_rx = netif_rx; diff --git a/stubdom/Makefile b/stubdom/Makefile index dcb614f..fabb690 100644 --- a/stubdom/Makefile +++ b/stubdom/Makefile @@ -91,6 +91,7 @@ newlib-$(NEWLIB_VERSION).tar.gz: newlib-$(NEWLIB_VERSION): newlib-$(NEWLIB_VERSION).tar.gz tar xzf $< patch -d $@ -p0 < newlib.patch + patch -d $@ -p0 < newlib-chk.patch touch $@ NEWLIB_STAMPFILE=$(CROSS_ROOT)/$(GNU_TARGET_ARCH)-xen-elf/lib/libc.a diff --git a/stubdom/README b/stubdom/README index 5bc2211..caab189 100644 --- a/stubdom/README +++ b/stubdom/README @@ -11,81 +11,12 @@ of video memory for the HVM domain, you need to avoid the need for ballooning, by using the hypervisor dom0_mem= option for instance. -There is a sample configuration set in xmexample.hvm-stubdom and -xmexample.hvm-dm +There is a sample configuration set in xmexample.hvm-stubdom -In your HVM config "hvmconfig", - -- use /usr/lib/xen/bin/stubdom-dm as dm script: +In your HVM config "hvmconfig" use /usr/lib/xen/bin/stubdom-dm as dm script: device_model = '/usr/lib/xen/bin/stubdom-dm' -- comment the disk statement: - -#disk = [ 'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w', 'file:/tmp/test,hdb,r' ] - -- disable anything related to dom0, like pty serial assignments - - -Create /etc/xen/hvmconfig-dm (where "hvmconfig" is the name of your HVM -guest) with - -kernel = "/usr/lib/xen/boot/ioemu-stubdom.gz" -vif = [ '', 'ip=10.0.1.1,mac=aa:00:00:12:23:34'] -disk = [ 'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w', 'file:/tmp/test,hdb,r' ] - -where -- the first vif ('') is reserved for VNC (see below) -- 'ip=10.0.1.1,mac= etc...' is the same net configuration as in the hvmconfig -script, -- and disk = is the same block configuration as in the hvmconfig script. - -Display Configuration -===================== - -There are three posibilities - -* Using SDL - - - In hvmconfig, disable vnc and sdl: - -vnc = 0 -sdl = 0 - - - In hvmconfig-dm, set an sdl vfb: - -vfb = [ 'type=sdl' ] - - by default qemu will use sdl together with opengl for rendering, if - you do not want qemu to use opengl then also pass opengl=0: - -vfb = [ 'type=sdl, opengl=0' ] - -* Using a VNC server in the stub domain - - - In hvmconfig, set vnclisten to "172.30.206.1" for instance. Do not use a -host name as Mini-OS does not have a name resolver. Do not use 127.0.0.1 since -then you will not be able to connect to it. - -vnc = 1 -vnclisten = "172.30.206.1" - - - In hvmconfig-dm, fill the reserved vif with the same IP, for instance: - -vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,mac=aa:00:00:12:23:34'] - -* Using a VNC server in dom0 - - - In hvmconfig, disable vnc and sdl: - -vnc = 0 -sdl = 0 - - - In hvmconfig-dm, set a vnc vfb: - -vfb = [ 'type=vnc' ] - -and any other parameter as wished. To run ====== diff --git a/stubdom/newlib-chk.patch b/stubdom/newlib-chk.patch new file mode 100644 index 0000000..848dbd5 --- /dev/null +++ b/stubdom/newlib-chk.patch @@ -0,0 +1,155 @@ +--- newlib/libc/stdio/fprintf_chk.c 1969-12-31 19:00:00.000000000 -0500 ++++ newlib/libc/stdio/fprintf_chk.c 2009-02-26 19:02:53.000000000 -0500 +@@ -0,0 +1,21 @@ ++#include ++#include ++ ++/* ++ * Stub implementation of __fprintf_chk adapted from glibc 2.7. This ++ * doesn't actually implement any buffer overflow protection. It just makes ++ * the linker happy :) ++*/ ++int ++__fprintf_chk (FILE *fp, int flag, const char *format, ...) ++{ ++ va_list ap; ++ int done; ++ ++ va_start (ap, format); ++ done = vfprintf (fp, format, ap); ++ va_end (ap); ++ ++ return done; ++} ++ +--- newlib/libc/stdio/Makefile.am 2007-08-02 16:23:06.000000000 -0400 ++++ newlib/libc/stdio/Makefile.am 2009-02-26 18:14:53.000000000 -0500 +@@ -20,6 +20,7 @@ + flags.c \ + fopen.c \ + fprintf.c \ ++ fprintf_chk.c \ + fputc.c \ + fputs.c \ + fread.c \ +@@ -65,6 +66,7 @@ + sniprintf.c \ + snprintf.c \ + sprintf.c \ ++ sprintf_chk.c \ + sscanf.c \ + stdio.c \ + tmpfile.c \ +--- newlib/libc/stdio/Makefile.in 2007-12-19 17:36:38.000000000 -0500 ++++ newlib/libc/stdio/Makefile.in 2009-02-26 18:43:52.000000000 -0500 +@@ -63,7 +63,8 @@ + lib_a-fgets.$(OBJEXT) lib_a-fileno.$(OBJEXT) \ + lib_a-findfp.$(OBJEXT) lib_a-fiprintf.$(OBJEXT) \ + lib_a-flags.$(OBJEXT) lib_a-fopen.$(OBJEXT) \ +- lib_a-fprintf.$(OBJEXT) lib_a-fputc.$(OBJEXT) \ ++ lib_a-fprintf.$(OBJEXT) lib_a-fprintf_chk.$(OBJEXT) \ ++ lib_a-fputc.$(OBJEXT) \ + lib_a-fputs.$(OBJEXT) lib_a-fread.$(OBJEXT) \ + lib_a-freopen.$(OBJEXT) lib_a-fscanf.$(OBJEXT) \ + lib_a-fiscanf.$(OBJEXT) lib_a-fseek.$(OBJEXT) \ +@@ -86,6 +87,7 @@ + lib_a-setvbuf.$(OBJEXT) lib_a-siprintf.$(OBJEXT) \ + lib_a-siscanf.$(OBJEXT) lib_a-sniprintf.$(OBJEXT) \ + lib_a-snprintf.$(OBJEXT) lib_a-sprintf.$(OBJEXT) \ ++ lib_a-sprintf_chk.$(OBJEXT) \ + lib_a-sscanf.$(OBJEXT) lib_a-stdio.$(OBJEXT) \ + lib_a-tmpfile.$(OBJEXT) lib_a-tmpnam.$(OBJEXT) \ + lib_a-ungetc.$(OBJEXT) lib_a-vdiprintf.$(OBJEXT) \ +@@ -122,15 +124,15 @@ + LTLIBRARIES = $(noinst_LTLIBRARIES) + am__objects_4 = clearerr.lo fclose.lo fdopen.lo feof.lo ferror.lo \ + fflush.lo fgetc.lo fgetpos.lo fgets.lo fileno.lo findfp.lo \ +- fiprintf.lo flags.lo fopen.lo fprintf.lo fputc.lo fputs.lo \ +- fread.lo freopen.lo fscanf.lo fiscanf.lo fseek.lo fsetpos.lo \ ++ fiprintf.lo flags.lo fopen.lo fprintf.lo fprintf_chk.lo fputc.lo \ ++ fputs.lo fread.lo freopen.lo fscanf.lo fiscanf.lo fseek.lo fsetpos.lo \ + ftell.lo fvwrite.lo fwalk.lo fwrite.lo getc.lo getchar.lo \ + getc_u.lo getchar_u.lo getdelim.lo getline.lo gets.lo \ + iprintf.lo iscanf.lo makebuf.lo perror.lo printf.lo putc.lo \ + putchar.lo putc_u.lo putchar_u.lo puts.lo refill.lo remove.lo \ + rename.lo rewind.lo rget.lo scanf.lo sccl.lo setbuf.lo \ + setbuffer.lo setlinebuf.lo setvbuf.lo siprintf.lo siscanf.lo \ +- sniprintf.lo snprintf.lo sprintf.lo sscanf.lo stdio.lo \ ++ sniprintf.lo snprintf.lo sprintf.lo sprintf_chk.lo sscanf.lo stdio.lo \ + tmpfile.lo tmpnam.lo ungetc.lo vdiprintf.lo vdprintf.lo \ + viprintf.lo viscanf.lo vprintf.lo vscanf.lo vsiprintf.lo \ + vsiscanf.lo vsnprintf.lo vsniprintf.lo vsprintf.lo vsscanf.lo \ +@@ -344,6 +346,7 @@ + flags.c \ + fopen.c \ + fprintf.c \ ++ fprintf_chk.c \ + fputc.c \ + fputs.c \ + fread.c \ +@@ -389,6 +392,7 @@ + sniprintf.c \ + snprintf.c \ + sprintf.c \ ++ sprintf_chk.c \ + sscanf.c \ + stdio.c \ + tmpfile.c \ +@@ -508,6 +512,7 @@ + siprintf.def \ + siscanf.def \ + sprintf.def \ ++ sprintf_chk.def \ + sscanf.def \ + tmpfile.def \ + tmpnam.def \ +@@ -678,6 +683,12 @@ + lib_a-fprintf.obj: fprintf.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-fprintf.obj `if test -f 'fprintf.c'; then $(CYGPATH_W) 'fprintf.c'; else $(CYGPATH_W) '$(srcdir)/fprintf.c'; fi` + ++lib_a-fprintf_chk.o: fprintf_chk.c ++ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-fprintf_chk.o `test -f 'fprintf_chk.c' || echo '$(srcdir)/'`fprintf_chk.c ++ ++lib_a-fprintf_chk.obj: fprintf_chk.c ++ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-fprintf_chk.obj `if test -f 'fprintf_chk.c'; then $(CYGPATH_W) 'fprintf_chk.c'; else $(CYGPATH_W) '$(srcdir)/fprintf_chk.c'; fi` ++ + lib_a-fputc.o: fputc.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-fputc.o `test -f 'fputc.c' || echo '$(srcdir)/'`fputc.c + +@@ -948,6 +959,12 @@ + lib_a-sprintf.obj: sprintf.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sprintf.obj `if test -f 'sprintf.c'; then $(CYGPATH_W) 'sprintf.c'; else $(CYGPATH_W) '$(srcdir)/sprintf.c'; fi` + ++lib_a-sprintf_chk.o: sprintf_chk.c ++ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sprintf_chk.o `test -f 'sprintf_chk.c' || echo '$(srcdir)/'`sprintf_chk.c ++ ++lib_a-sprintf_chk.obj: sprintf_chk.c ++ $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sprintf_chk.obj `if test -f 'sprintf_chk.c'; then $(CYGPATH_W) 'sprintf_chk.c'; else $(CYGPATH_W) '$(srcdir)/sprintf_chk.c'; fi` ++ + lib_a-sscanf.o: sscanf.c + $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sscanf.o `test -f 'sscanf.c' || echo '$(srcdir)/'`sscanf.c + +--- newlib/libc/stdio/sprintf_chk.c 1969-12-31 19:00:00.000000000 -0500 ++++ newlib/libc/stdio/sprintf_chk.c 2009-02-26 19:02:26.000000000 -0500 +@@ -0,0 +1,21 @@ ++#include ++#include ++ ++/* ++ * Stub implementation of __sprintf_chk adapted from glibc 2.7. This ++ * doesn't actually implement any buffer overflow protection. It just makes ++ * the linker happy :) ++*/ ++int ++__sprintf_chk (char *s, int flags, size_t slen, const char *format, ...) ++{ ++ va_list arg; ++ int done; ++ ++ va_start (arg, format); ++ done = vsprintf (s, format, arg); ++ va_end (arg); ++ ++ return done; ++} ++ diff --git a/stubdom/stubdom-dm b/stubdom/stubdom-dm index f471e82..0bcc1e6 100644 --- a/stubdom/stubdom-dm +++ b/stubdom/stubdom-dm @@ -15,7 +15,10 @@ domname= vncviewer=0 vncpid= extra= -videoram=4 +sdl=0 +opengl=1 +vnc=0 +vncunused=0 while [ "$#" -gt 0 ]; do if [ "$#" -ge 2 ]; @@ -31,22 +34,30 @@ do shift ;; -vnc) - ip=${2%:*}; - vnc_port=${2#*:}; + vnc=1 + op=${2%,*} + ip=${op%:*}; + vnc_port=${op#*:}; shift ;; + -vncunused) + vncunused=1 + shift + ;; -loadvm) extra="$extra -loadvm $2"; shift ;; - -videoram) - videoram="$2" - shift - ;; + -k) + keymap=$2 + shift + ;; esac fi case "$1" in -vncviewer) vncviewer=1 ;; + -sdl) sdl=1 ;; + -disable-opengl) opengl=0 ;; esac shift done @@ -61,7 +72,7 @@ term() { ( [ -n "$vncpid" ] && kill -9 $vncpid xm destroy $domname-dm - #xm destroy $domname + rm /etc/xen/stubdoms/$domname-dm ) & # We need to exit immediately so as to let xend do the commands above exit 0 @@ -77,7 +88,60 @@ do sleep 1 done -creation="xm create -c $domname-dm target=$domid memory=32 videoram=$videoram extra=\"$extra\"" +# Generate stubdom config file +mkdir -p /etc/xen/stubdoms &>/dev/null +echo "#This file is autogenerated, edit $domname instead!" > /etc/xen/stubdoms/$domname-dm +echo "kernel = '/usr/lib/xen/boot/ioemu-stubdom.gz'" >> /etc/xen/stubdoms/$domname-dm + +vfb="sdl=$sdl, opengl=$opengl" +test "$DISPLAY" && vfb="$vfb, display=$DISPLAY" +test "$XAUTHORITY" && vfb="$vfb, xauthority=$XAUTHORITY" +test $vnc != 0 && vfb="$vfb, vnc=$vnc, vncdisplay=$vnc_port, vnclisten=$ip, vncunused=$vncunused" +vncpasswd=`xenstore-read /local/domain/0/backend/vfb/$domid/0/vncpasswd 2>/dev/null` +test "$vncpasswd" && vfb="$vfb, vncpasswd=$vncpasswd" +test "$keymap" && vfb="$vfb, keymap=$keymap" +echo "vfb = ['$vfb']" >> /etc/xen/stubdoms/$domname-dm + +echo -n "disk = [ " >> /etc/xen/stubdoms/$domname-dm +j=0 +for i in `xenstore-ls /local/domain/$domid/device/vbd | grep 'backend =' | awk '{print $3}'` +do + i=${i%\"} + i=${i#\"} + vbd_mode=`xenstore-read $i/mode` + vbd_disk=`xenstore-read $i/params` + vbd_type=`xenstore-read $i/type` + vbd_dev=`xenstore-read $i/dev` + vbd_front=`xenstore-read $i/frontend` + vbd_devtype=`xenstore-read $vbd_front/device-type` + if [ $vbd_type = "file" ] + then + vbd_type="tap:aio" + fi + if [ $j -ne 0 ] + then + echo -n "," >> /etc/xen/stubdoms/$domname-dm + fi + echo -n "'$vbd_type:$vbd_disk,$vbd_dev:$vbd_devtype,$vbd_mode'" >> /etc/xen/stubdoms/$domname-dm + j=$(( $j + 1 )) +done +echo " ] " >> /etc/xen/stubdoms/$domname-dm +echo -n "vif = [ " >> /etc/xen/stubdoms/$domname-dm +j=0 +for i in `xenstore-ls /local/domain/$domid/device/vif | grep 'backend =' | awk '{print $3}'` +do + i=${i%\"} + i=${i#\"} + vif_mac=`xenstore-read $i/mac` + if [ $j -ne 0 ] + then + echo -n "," >> /etc/xen/stubdoms/$domname-dm + fi + echo -n "'mac=$vif_mac'" >> /etc/xen/stubdoms/$domname-dm + j=$(( $j + 1 )) +done +echo " ] " >> /etc/xen/stubdoms/$domname-dm +creation="xm create -c /etc/xen/stubdoms/$domname-dm target=$domid memory=32 extra=\"$extra\"" (while true ; do sleep 60 ; done) | /bin/sh -c "$creation" & #xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to shut down ; read" & diff --git a/tools/Rules.mk b/tools/Rules.mk index fbd69ea..53434d5 100644 --- a/tools/Rules.mk +++ b/tools/Rules.mk @@ -33,10 +33,12 @@ CFLAGS += -D__XEN_TOOLS__ CFLAGS += -MMD -MF .$(@F).d DEPS = .*.d +ifneq ($(XEN_OS),NetBSD) # Enable implicit LFS support *and* explicit LFS names. CFLAGS += $(shell getconf LFS_CFLAGS) CFLAGS += -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE LDFLAGS += $(shell getconf LFS_LDFLAGS) +endif # 32-bit x86 does not perform well with -ve segment accesses on Xen. CFLAGS-$(CONFIG_X86_32) += $(call cc-option,$(CC),-mno-tls-direct-seg-refs) diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c index de712af..2475963 100644 --- a/tools/console/daemon/io.c +++ b/tools/console/daemon/io.c @@ -288,6 +288,7 @@ static int create_domain_log(struct domain *dom) namepath = s; strcat(namepath, "/name"); data = xs_read(xs, XBT_NULL, namepath, &len); + free(namepath); if (!data) return -1; if (!len) { @@ -548,9 +549,6 @@ static int domain_create_ring(struct domain *dom) } free(type); - if ((ring_ref == dom->ring_ref) && (remote_port == dom->remote_port)) - goto out; - if (ring_ref != dom->ring_ref) { if (dom->interface != NULL) munmap(dom->interface, getpagesize()); @@ -565,6 +563,16 @@ static int domain_create_ring(struct domain *dom) dom->ring_ref = ring_ref; } + /* Go no further if port has not changed and we are still bound. */ + if (remote_port == dom->remote_port) { + xc_evtchn_status_t status = { + .dom = DOMID_SELF, + .port = dom->local_port }; + if ((xc_evtchn_status(xc, &status) == 0) && + (status.status == EVTCHNSTAT_interdomain)) + goto out; + } + dom->local_port = -1; dom->remote_port = -1; if (dom->xce_handle != -1) @@ -601,7 +609,7 @@ static int domain_create_ring(struct domain *dom) } } - if (log_guest) + if (log_guest && (dom->log_fd == -1)) dom->log_fd = create_domain_log(dom); out: diff --git a/tools/examples/Makefile b/tools/examples/Makefile index 6c8349b..ecddb43 100644 --- a/tools/examples/Makefile +++ b/tools/examples/Makefile @@ -17,7 +17,6 @@ XEN_CONFIGS += xmexample2 XEN_CONFIGS += xmexample3 XEN_CONFIGS += xmexample.hvm XEN_CONFIGS += xmexample.hvm-stubdom -XEN_CONFIGS += xmexample.hvm-dm XEN_CONFIGS += xmexample.pv-grub XEN_CONFIGS += xmexample.nbd XEN_CONFIGS += xmexample.vti diff --git a/tools/examples/xmexample.hvm-stubdom b/tools/examples/xmexample.hvm-stubdom index bfceef3..acc250e 100644 --- a/tools/examples/xmexample.hvm-stubdom +++ b/tools/examples/xmexample.hvm-stubdom @@ -6,8 +6,7 @@ # you can set the parameters for the domain on the xm command line. #============================================================================ # -# This is a version using a stubdomain for device model, see -# xmexample.hvm-dm and README.stubdom for more details +# This is a version using a stubdomain for device model # The differences with xmexample.hvm are marked with "STUBDOM" #---------------------------------------------------------------------------- @@ -68,11 +67,8 @@ vif = [ 'type=ioemu, bridge=xenbr0' ] # Each disk entry is of the form phy:UNAME,DEV,MODE # where UNAME is the device, DEV is the device name the domain will see, # and MODE is r for read-only, w for read-write. -# -# STUBDOM: do not put it here but in stubdom-ExampleHVMDomain -#disk = [ 'phy:hda1,hda1,r' ] -#disk = [ 'file:/var/images/min-el3-i386.img,hda,w', ',hdc:cdrom,r' ] +disk = [ 'file:/var/images/min-el3-i386.img,hda,w', 'phy:/dev/cdrom,hdc:cdrom,r' ] #---------------------------------------------------------------------------- # Configure the behaviour when a domain exits. There are three 'reasons' @@ -127,7 +123,7 @@ device_model = '/usr/lib/xen/bin/stubdom-dm' #----------------------------------------------------------------------------- # boot on floppy (a), hard disk (c), Network (n) or CD-ROM (d) # default: hard disk, cd-rom, floppy -#boot="cda" +boot="cda" #----------------------------------------------------------------------------- # write to temporary files instead of disk image files @@ -136,19 +132,17 @@ device_model = '/usr/lib/xen/bin/stubdom-dm' #---------------------------------------------------------------------------- # enable SDL library for graphics, default = 0 # -# STUBDOM: always disable since the stub domain doesn't have direct X access sdl=0 #---------------------------------------------------------------------------- # enable OpenGL for texture rendering inside the SDL window, default = 1 # valid only if sdl is enabled. # -# STUBDOM: always disable for the same reason -opengl=0 +opengl=1 #---------------------------------------------------------------------------- # enable VNC library for graphics, default = 1 -vnc=0 +vnc=1 #---------------------------------------------------------------------------- # address that should be listened on for the VNC server if vnc is set. diff --git a/tools/firmware/hvmloader/acpi/dsdt.asl b/tools/firmware/hvmloader/acpi/dsdt.asl index a21574e..44a5022 100644 --- a/tools/firmware/hvmloader/acpi/dsdt.asl +++ b/tools/firmware/hvmloader/acpi/dsdt.asl @@ -123,7 +123,7 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, "Xen", "HVM", 0) Name (_BBN, 0x00) /* - * Reserve the IO port ranges [0x10c0, 0x10c2] and [0xb044, 0xb047]. + * Reserve the IO port ranges [0x10c0, 0x10e1] and [0xb044, 0xb047]. * Or else, for a hotplugged-in device, the port IO BAR assigned * by guest OS may conflict with the ranges here. */ @@ -131,7 +131,7 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, "Xen", "HVM", 0) { Name(_HID, EISAID("PNP0C02")) Name(_CRS, ResourceTemplate() { - IO (Decode16, 0x10c0, 0x10c0, 0x00, 0x03) + IO (Decode16, 0x10c0, 0x10c0, 0x00, 0x22) IO (Decode16, 0xb044, 0xb044, 0x00, 0x04) }) } @@ -2067,104 +2067,133 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, "Xen", "HVM", 0) Store (SLT, DPT1) Store (EVT, DPT2) - Switch (SLT) + If ( LEqual(SLT, 0x00) ) { - Case (0x00) { - Notify (\_SB.PCI0.S00, EVT) - } - Case (0x01) { - Notify (\_SB.PCI0.S01, EVT) - } - Case (0x02) { - Notify (\_SB.PCI0.S02, EVT) - } - Case (0x03) { - Notify (\_SB.PCI0.S03, EVT) - } - Case (0x04) { - Notify (\_SB.PCI0.S04, EVT) - } - Case (0x05) { - Notify (\_SB.PCI0.S05, EVT) - } - Case (0x06) { - Notify (\_SB.PCI0.S06, EVT) - } - Case (0x07) { - Notify (\_SB.PCI0.S07, EVT) - } - Case (0x08) { - Notify (\_SB.PCI0.S08, EVT) - } - Case (0x09) { - Notify (\_SB.PCI0.S09, EVT) - } - Case (0x0a) { - Notify (\_SB.PCI0.S0A, EVT) - } - Case (0x0b) { - Notify (\_SB.PCI0.S0B, EVT) - } - Case (0x0c) { - Notify (\_SB.PCI0.S0C, EVT) - } - Case (0x0d) { - Notify (\_SB.PCI0.S0D, EVT) - } - Case (0x0e) { - Notify (\_SB.PCI0.S0E, EVT) - } - Case (0x0f) { - Notify (\_SB.PCI0.S0F, EVT) - } - Case (0x10) { - Notify (\_SB.PCI0.S10, EVT) - } - Case (0x11) { - Notify (\_SB.PCI0.S11, EVT) - } - Case (0x12) { - Notify (\_SB.PCI0.S12, EVT) - } - Case (0x13) { - Notify (\_SB.PCI0.S13, EVT) - } - Case (0x14) { - Notify (\_SB.PCI0.S14, EVT) - } - Case (0x15) { - Notify (\_SB.PCI0.S15, EVT) - } - Case (0x16) { - Notify (\_SB.PCI0.S16, EVT) - } - Case (0x17) { - Notify (\_SB.PCI0.S17, EVT) - } - Case (0x18) { - Notify (\_SB.PCI0.S18, EVT) - } - Case (0x19) { - Notify (\_SB.PCI0.S19, EVT) - } - Case (0x1a) { - Notify (\_SB.PCI0.S1A, EVT) - } - Case (0x1b) { - Notify (\_SB.PCI0.S1B, EVT) - } - Case (0x1c) { - Notify (\_SB.PCI0.S1C, EVT) - } - Case (0x1d) { - Notify (\_SB.PCI0.S1D, EVT) - } - Case (0x1e) { - Notify (\_SB.PCI0.S1E, EVT) - } - Case (0x1f) { - Notify (\_SB.PCI0.S1F, EVT) - } + Notify (\_SB.PCI0.S00, EVT) + } + ElseIf ( LEqual(SLT, 0x01) ) + { + Notify (\_SB.PCI0.S01, EVT) + } + ElseIf ( LEqual(SLT, 0x02) ) + { + Notify (\_SB.PCI0.S02, EVT) + } + ElseIf ( LEqual(SLT, 0x03) ) + { + Notify (\_SB.PCI0.S03, EVT) + } + ElseIf ( LEqual(SLT, 0x04) ) + { + Notify (\_SB.PCI0.S04, EVT) + } + ElseIf ( LEqual(SLT, 0x05) ) + { + Notify (\_SB.PCI0.S05, EVT) + } + ElseIf ( LEqual(SLT, 0x06) ) + { + Notify (\_SB.PCI0.S06, EVT) + } + ElseIf ( LEqual(SLT, 0x07) ) + { + Notify (\_SB.PCI0.S07, EVT) + } + ElseIf ( LEqual(SLT, 0x08) ) + { + Notify (\_SB.PCI0.S08, EVT) + } + ElseIf ( LEqual(SLT, 0x09) ) + { + Notify (\_SB.PCI0.S09, EVT) + } + ElseIf ( LEqual(SLT, 0x0a) ) + { + Notify (\_SB.PCI0.S0A, EVT) + } + ElseIf ( LEqual(SLT, 0x0b) ) + { + Notify (\_SB.PCI0.S0B, EVT) + } + ElseIf ( LEqual(SLT, 0x0c) ) + { + Notify (\_SB.PCI0.S0C, EVT) + } + ElseIf ( LEqual(SLT, 0x0d) ) + { + Notify (\_SB.PCI0.S0D, EVT) + } + ElseIf ( LEqual(SLT, 0x0e) ) + { + Notify (\_SB.PCI0.S0E, EVT) + } + ElseIf ( LEqual(SLT, 0x0f) ) + { + Notify (\_SB.PCI0.S0F, EVT) + } + ElseIf ( LEqual(SLT, 0x10) ) + { + Notify (\_SB.PCI0.S10, EVT) + } + ElseIf ( LEqual(SLT, 0x11) ) + { + Notify (\_SB.PCI0.S11, EVT) + } + ElseIf ( LEqual(SLT, 0x12) ) + { + Notify (\_SB.PCI0.S12, EVT) + } + ElseIf ( LEqual(SLT, 0x13) ) + { + Notify (\_SB.PCI0.S13, EVT) + } + ElseIf ( LEqual(SLT, 0x14) ) + { + Notify (\_SB.PCI0.S14, EVT) + } + ElseIf ( LEqual(SLT, 0x15) ) + { + Notify (\_SB.PCI0.S15, EVT) + } + ElseIf ( LEqual(SLT, 0x16) ) + { + Notify (\_SB.PCI0.S16, EVT) + } + ElseIf ( LEqual(SLT, 0x17) ) + { + Notify (\_SB.PCI0.S17, EVT) + } + ElseIf ( LEqual(SLT, 0x18) ) + { + Notify (\_SB.PCI0.S18, EVT) + } + ElseIf ( LEqual(SLT, 0x19) ) + { + Notify (\_SB.PCI0.S19, EVT) + } + ElseIf ( LEqual(SLT, 0x1a) ) + { + Notify (\_SB.PCI0.S1A, EVT) + } + ElseIf ( LEqual(SLT, 0x1b) ) + { + Notify (\_SB.PCI0.S1B, EVT) + } + ElseIf ( LEqual(SLT, 0x1c) ) + { + Notify (\_SB.PCI0.S1C, EVT) + } + ElseIf ( LEqual(SLT, 0x1d) ) + { + Notify (\_SB.PCI0.S1D, EVT) + } + ElseIf ( LEqual(SLT, 0x1e) ) + { + Notify (\_SB.PCI0.S1E, EVT) + } + ElseIf ( LEqual(SLT, 0x1f) ) + { + Notify (\_SB.PCI0.S1F, EVT) } } } diff --git a/tools/firmware/hvmloader/acpi/dsdt.c b/tools/firmware/hvmloader/acpi/dsdt.c index ca58b32..8216f8b 100644 --- a/tools/firmware/hvmloader/acpi/dsdt.c +++ b/tools/firmware/hvmloader/acpi/dsdt.c @@ -1,22 +1,22 @@ /* * * Intel ACPI Component Architecture - * ASL Optimizing Compiler version 20090220 [Mar 9 2009] - * Copyright (C) 2000 - 2009 Intel Corporation + * ASL Optimizing Compiler version 20081204 [Jan 23 2009] + * Copyright (C) 2000 - 2008 Intel Corporation * Supports ACPI Specification Revision 3.0a * - * Compilation of "dsdt.asl" - Tue Mar 17 10:44:21 2009 + * Compilation of "dsdt.asl" - Tue Mar 31 13:24:51 2009 * * C source code output * */ unsigned char AmlCode[] = { - 0x44,0x53,0x44,0x54,0x02,0x32,0x00,0x00, /* 00000000 "DSDT.2.." */ - 0x02,0xC6,0x58,0x65,0x6E,0x00,0x00,0x00, /* 00000008 "..Xen..." */ + 0x44,0x53,0x44,0x54,0xF3,0x31,0x00,0x00, /* 00000000 "DSDT.1.." */ + 0x02,0x12,0x58,0x65,0x6E,0x00,0x00,0x00, /* 00000008 "..Xen..." */ 0x48,0x56,0x4D,0x00,0x00,0x00,0x00,0x00, /* 00000010 "HVM....." */ 0x00,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ - 0x20,0x02,0x09,0x20,0x08,0x50,0x4D,0x42, /* 00000020 " .. .PMB" */ + 0x04,0x12,0x08,0x20,0x08,0x50,0x4D,0x42, /* 00000020 "... .PMB" */ 0x53,0x0B,0x00,0x0C,0x08,0x50,0x4D,0x4C, /* 00000028 "S....PML" */ 0x4E,0x0A,0x08,0x08,0x49,0x4F,0x42,0x31, /* 00000030 "N...IOB1" */ 0x00,0x08,0x49,0x4F,0x4C,0x31,0x00,0x08, /* 00000038 "..IOL1.." */ @@ -81,7 +81,7 @@ unsigned char AmlCode[] = 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 00000210 "._HID.A." */ 0x0C,0x02,0x08,0x5F,0x43,0x52,0x53,0x11, /* 00000218 "..._CRS." */ 0x15,0x0A,0x12,0x47,0x01,0xC0,0x10,0xC0, /* 00000220 "...G...." */ - 0x10,0x00,0x03,0x47,0x01,0x44,0xB0,0x44, /* 00000228 "...G.D.D" */ + 0x10,0x00,0x22,0x47,0x01,0x44,0xB0,0x44, /* 00000228 ".."G.D.D" */ 0xB0,0x00,0x04,0x79,0x00,0x14,0x4E,0x0C, /* 00000230 "...y..N." */ 0x5F,0x43,0x52,0x53,0x00,0x08,0x50,0x52, /* 00000238 "_CRS..PR" */ 0x54,0x30,0x11,0x42,0x07,0x0A,0x6E,0x88, /* 00000240 "T0.B..n." */ @@ -1446,7 +1446,7 @@ unsigned char AmlCode[] = 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00002CB8 "DPT1p..\" */ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002CC0 "._GPEDPT" */ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002CC8 "2.\._GPE" */ - 0x50,0x48,0x31,0x46,0x10,0x4D,0x52,0x5F, /* 00002CD0 "PH1F.MR_" */ + 0x50,0x48,0x31,0x46,0x10,0x4E,0x51,0x5F, /* 00002CD0 "PH1F.NQ_" */ 0x47,0x50,0x45,0x5B,0x80,0x50,0x48,0x50, /* 00002CD8 "GPE[.PHP" */ 0x5F,0x01,0x0B,0xC0,0x10,0x0A,0x22,0x5B, /* 00002CE0 "_....."[" */ 0x81,0x41,0x0B,0x50,0x48,0x50,0x5F,0x01, /* 00002CE8 ".A.PHP_." */ @@ -1475,143 +1475,141 @@ unsigned char AmlCode[] = 0x01,0x0B,0x44,0xB0,0x0A,0x04,0x5B,0x81, /* 00002DA0 "..D...[." */ 0x10,0x44,0x47,0x31,0x5F,0x01,0x44,0x50, /* 00002DA8 ".DG1_.DP" */ 0x54,0x31,0x08,0x44,0x50,0x54,0x32,0x08, /* 00002DB0 "T1.DPT2." */ - 0x14,0x49,0x44,0x5F,0x4C,0x30,0x33,0x08, /* 00002DB8 ".ID_L03." */ - 0x08,0x5F,0x54,0x5F,0x30,0x00,0x08,0x53, /* 00002DC0 "._T_0..S" */ - 0x4C,0x54,0x5F,0x00,0x08,0x45,0x56,0x54, /* 00002DC8 "LT_..EVT" */ - 0x5F,0x00,0x70,0x50,0x53,0x54,0x41,0x61, /* 00002DD0 "_.pPSTAa" */ - 0x7B,0x61,0x0A,0x0F,0x45,0x56,0x54,0x5F, /* 00002DD8 "{a..EVT_" */ - 0x70,0x50,0x53,0x54,0x42,0x61,0x7B,0x61, /* 00002DE0 "pPSTBa{a" */ - 0x0A,0xFF,0x53,0x4C,0x54,0x5F,0x70,0x53, /* 00002DE8 "..SLT_pS" */ - 0x4C,0x54,0x5F,0x44,0x50,0x54,0x31,0x70, /* 00002DF0 "LT_DPT1p" */ - 0x45,0x56,0x54,0x5F,0x44,0x50,0x54,0x32, /* 00002DF8 "EVT_DPT2" */ - 0x70,0x53,0x4C,0x54,0x5F,0x5F,0x54,0x5F, /* 00002E00 "pSLT__T_" */ - 0x30,0xA0,0x1B,0x93,0x5F,0x54,0x5F,0x30, /* 00002E08 "0..._T_0" */ - 0x00,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002E10 "..\/._SB" */ - 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x30, /* 00002E18 "_PCI0S00" */ - 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4C,0x3D, /* 00002E20 "_EVT_.L=" */ - 0xA0,0x1B,0x93,0x5F,0x54,0x5F,0x30,0x01, /* 00002E28 "..._T_0." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002E30 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x31,0x5F, /* 00002E38 "PCI0S01_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x3B,0xA0, /* 00002E40 "EVT_.M;." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x02, /* 00002E48 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002E50 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x32,0x5F, /* 00002E58 "PCI0S02_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x39,0xA0, /* 00002E60 "EVT_.M9." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x03, /* 00002E68 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002E70 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x33,0x5F, /* 00002E78 "PCI0S03_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x37,0xA0, /* 00002E80 "EVT_.M7." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x04, /* 00002E88 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002E90 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x34,0x5F, /* 00002E98 "PCI0S04_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x35,0xA0, /* 00002EA0 "EVT_.M5." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x05, /* 00002EA8 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002EB0 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x35,0x5F, /* 00002EB8 "PCI0S05_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x33,0xA0, /* 00002EC0 "EVT_.M3." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x06, /* 00002EC8 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002ED0 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x36,0x5F, /* 00002ED8 "PCI0S06_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x31,0xA0, /* 00002EE0 "EVT_.M1." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x07, /* 00002EE8 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002EF0 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x37,0x5F, /* 00002EF8 "PCI0S07_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x2F,0xA0, /* 00002F00 "EVT_.M/." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x08, /* 00002F08 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002F10 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x38,0x5F, /* 00002F18 "PCI0S08_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x2D,0xA0, /* 00002F20 "EVT_.M-." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x09, /* 00002F28 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002F30 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x39,0x5F, /* 00002F38 "PCI0S09_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x2B,0xA0, /* 00002F40 "EVT_.M+." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0A, /* 00002F48 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002F50 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x41,0x5F, /* 00002F58 "PCI0S0A_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x29,0xA0, /* 00002F60 "EVT_.M)." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0B, /* 00002F68 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002F70 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x42,0x5F, /* 00002F78 "PCI0S0B_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x27,0xA0, /* 00002F80 "EVT_.M'." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0C, /* 00002F88 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002F90 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x43,0x5F, /* 00002F98 "PCI0S0C_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x25,0xA0, /* 00002FA0 "EVT_.M%." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0D, /* 00002FA8 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002FB0 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x44,0x5F, /* 00002FB8 "PCI0S0D_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x23,0xA0, /* 00002FC0 "EVT_.M#." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0E, /* 00002FC8 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002FD0 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x45,0x5F, /* 00002FD8 "PCI0S0E_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x21,0xA0, /* 00002FE0 "EVT_.M!." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0F, /* 00002FE8 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002FF0 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x30,0x46,0x5F, /* 00002FF8 "PCI0S0F_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x1F,0xA0, /* 00003000 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x10, /* 00003008 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003010 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x30,0x5F, /* 00003018 "PCI0S10_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x1D,0xA0, /* 00003020 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x11, /* 00003028 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003030 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x31,0x5F, /* 00003038 "PCI0S11_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x1B,0xA0, /* 00003040 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x12, /* 00003048 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003050 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x32,0x5F, /* 00003058 "PCI0S12_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x19,0xA0, /* 00003060 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x13, /* 00003068 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003070 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x33,0x5F, /* 00003078 "PCI0S13_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x17,0xA0, /* 00003080 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x14, /* 00003088 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003090 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x34,0x5F, /* 00003098 "PCI0S14_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x15,0xA0, /* 000030A0 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x15, /* 000030A8 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 000030B0 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x35,0x5F, /* 000030B8 "PCI0S15_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x13,0xA0, /* 000030C0 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x16, /* 000030C8 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 000030D0 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x36,0x5F, /* 000030D8 "PCI0S16_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x11,0xA0, /* 000030E0 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x17, /* 000030E8 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 000030F0 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x37,0x5F, /* 000030F8 "PCI0S17_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x0F,0xA0, /* 00003100 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x18, /* 00003108 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003110 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x38,0x5F, /* 00003118 "PCI0S18_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x0D,0xA0, /* 00003120 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x19, /* 00003128 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003130 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x39,0x5F, /* 00003138 "PCI0S19_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x0B,0xA0, /* 00003140 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x1A, /* 00003148 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003150 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x41,0x5F, /* 00003158 "PCI0S1A_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x09,0xA0, /* 00003160 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x1B, /* 00003168 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003170 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x42,0x5F, /* 00003178 "PCI0S1B_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x07,0xA0, /* 00003180 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x1C, /* 00003188 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003190 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x43,0x5F, /* 00003198 "PCI0S1C_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x05,0xA0, /* 000031A0 "EVT_.M.." */ - 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x1D, /* 000031A8 ".._T_0.." */ - 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 000031B0 ".\/._SB_" */ - 0x50,0x43,0x49,0x30,0x53,0x31,0x44,0x5F, /* 000031B8 "PCI0S1D_" */ - 0x45,0x56,0x54,0x5F,0xA1,0x3D,0xA0,0x1C, /* 000031C0 "EVT_.=.." */ - 0x93,0x5F,0x54,0x5F,0x30,0x0A,0x1E,0x86, /* 000031C8 "._T_0..." */ - 0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F,0x50, /* 000031D0 "\/._SB_P" */ - 0x43,0x49,0x30,0x53,0x31,0x45,0x5F,0x45, /* 000031D8 "CI0S1E_E" */ - 0x56,0x54,0x5F,0xA1,0x1E,0xA0,0x1C,0x93, /* 000031E0 "VT_....." */ - 0x5F,0x54,0x5F,0x30,0x0A,0x1F,0x86,0x5C, /* 000031E8 "_T_0...\" */ - 0x2F,0x03,0x5F,0x53,0x42,0x5F,0x50,0x43, /* 000031F0 "/._SB_PC" */ - 0x49,0x30,0x53,0x31,0x46,0x5F,0x45,0x56, /* 000031F8 "I0S1F_EV" */ - 0x54,0x5F, + 0x14,0x4A,0x43,0x5F,0x4C,0x30,0x33,0x08, /* 00002DB8 ".JC_L03." */ + 0x08,0x53,0x4C,0x54,0x5F,0x00,0x08,0x45, /* 00002DC0 ".SLT_..E" */ + 0x56,0x54,0x5F,0x00,0x70,0x50,0x53,0x54, /* 00002DC8 "VT_.pPST" */ + 0x41,0x61,0x7B,0x61,0x0A,0x0F,0x45,0x56, /* 00002DD0 "Aa{a..EV" */ + 0x54,0x5F,0x70,0x50,0x53,0x54,0x42,0x61, /* 00002DD8 "T_pPSTBa" */ + 0x7B,0x61,0x0A,0xFF,0x53,0x4C,0x54,0x5F, /* 00002DE0 "{a..SLT_" */ + 0x70,0x53,0x4C,0x54,0x5F,0x44,0x50,0x54, /* 00002DE8 "pSLT_DPT" */ + 0x31,0x70,0x45,0x56,0x54,0x5F,0x44,0x50, /* 00002DF0 "1pEVT_DP" */ + 0x54,0x32,0xA0,0x1B,0x93,0x53,0x4C,0x54, /* 00002DF8 "T2...SLT" */ + 0x5F,0x00,0x86,0x5C,0x2F,0x03,0x5F,0x53, /* 00002E00 "_..\/._S" */ + 0x42,0x5F,0x50,0x43,0x49,0x30,0x53,0x30, /* 00002E08 "B_PCI0S0" */ + 0x30,0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4C, /* 00002E10 "0_EVT_.L" */ + 0x3D,0xA0,0x1B,0x93,0x53,0x4C,0x54,0x5F, /* 00002E18 "=...SLT_" */ + 0x01,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002E20 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x31, /* 00002E28 "_PCI0S01" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x3B, /* 00002E30 "_EVT_.M;" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002E38 "...SLT_." */ + 0x02,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002E40 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x32, /* 00002E48 "_PCI0S02" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x39, /* 00002E50 "_EVT_.M9" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002E58 "...SLT_." */ + 0x03,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002E60 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x33, /* 00002E68 "_PCI0S03" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x37, /* 00002E70 "_EVT_.M7" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002E78 "...SLT_." */ + 0x04,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002E80 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x34, /* 00002E88 "_PCI0S04" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x35, /* 00002E90 "_EVT_.M5" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002E98 "...SLT_." */ + 0x05,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002EA0 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x35, /* 00002EA8 "_PCI0S05" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x33, /* 00002EB0 "_EVT_.M3" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002EB8 "...SLT_." */ + 0x06,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002EC0 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x36, /* 00002EC8 "_PCI0S06" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x31, /* 00002ED0 "_EVT_.M1" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002ED8 "...SLT_." */ + 0x07,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002EE0 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x37, /* 00002EE8 "_PCI0S07" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x2F, /* 00002EF0 "_EVT_.M/" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002EF8 "...SLT_." */ + 0x08,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002F00 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x38, /* 00002F08 "_PCI0S08" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x2D, /* 00002F10 "_EVT_.M-" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002F18 "...SLT_." */ + 0x09,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002F20 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x39, /* 00002F28 "_PCI0S09" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x2B, /* 00002F30 "_EVT_.M+" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002F38 "...SLT_." */ + 0x0A,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002F40 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x41, /* 00002F48 "_PCI0S0A" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x29, /* 00002F50 "_EVT_.M)" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002F58 "...SLT_." */ + 0x0B,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002F60 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x42, /* 00002F68 "_PCI0S0B" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x27, /* 00002F70 "_EVT_.M'" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002F78 "...SLT_." */ + 0x0C,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002F80 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x43, /* 00002F88 "_PCI0S0C" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x25, /* 00002F90 "_EVT_.M%" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002F98 "...SLT_." */ + 0x0D,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002FA0 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x44, /* 00002FA8 "_PCI0S0D" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x23, /* 00002FB0 "_EVT_.M#" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002FB8 "...SLT_." */ + 0x0E,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002FC0 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x45, /* 00002FC8 "_PCI0S0E" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x21, /* 00002FD0 "_EVT_.M!" */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002FD8 "...SLT_." */ + 0x0F,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002FE0 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x46, /* 00002FE8 "_PCI0S0F" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x1F, /* 00002FF0 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00002FF8 "...SLT_." */ + 0x10,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00003000 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x30, /* 00003008 "_PCI0S10" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x1D, /* 00003010 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00003018 "...SLT_." */ + 0x11,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00003020 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x31, /* 00003028 "_PCI0S11" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x1B, /* 00003030 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00003038 "...SLT_." */ + 0x12,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00003040 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x32, /* 00003048 "_PCI0S12" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x19, /* 00003050 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00003058 "...SLT_." */ + 0x13,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00003060 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x33, /* 00003068 "_PCI0S13" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x17, /* 00003070 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00003078 "...SLT_." */ + 0x14,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00003080 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x34, /* 00003088 "_PCI0S14" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x15, /* 00003090 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00003098 "...SLT_." */ + 0x15,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 000030A0 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x35, /* 000030A8 "_PCI0S15" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x13, /* 000030B0 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 000030B8 "...SLT_." */ + 0x16,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 000030C0 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x36, /* 000030C8 "_PCI0S16" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x11, /* 000030D0 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 000030D8 "...SLT_." */ + 0x17,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 000030E0 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x37, /* 000030E8 "_PCI0S17" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x0F, /* 000030F0 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 000030F8 "...SLT_." */ + 0x18,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00003100 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x38, /* 00003108 "_PCI0S18" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x0D, /* 00003110 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00003118 "...SLT_." */ + 0x19,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00003120 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x39, /* 00003128 "_PCI0S19" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x0B, /* 00003130 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00003138 "...SLT_." */ + 0x1A,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00003140 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x41, /* 00003148 "_PCI0S1A" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x09, /* 00003150 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00003158 "...SLT_." */ + 0x1B,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00003160 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x42, /* 00003168 "_PCI0S1B" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x07, /* 00003170 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00003178 "...SLT_." */ + 0x1C,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00003180 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x43, /* 00003188 "_PCI0S1C" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4D,0x05, /* 00003190 "_EVT_.M." */ + 0xA0,0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A, /* 00003198 "...SLT_." */ + 0x1D,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 000031A0 "..\/._SB" */ + 0x5F,0x50,0x43,0x49,0x30,0x53,0x31,0x44, /* 000031A8 "_PCI0S1D" */ + 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x3D,0xA0, /* 000031B0 "_EVT_.=." */ + 0x1C,0x93,0x53,0x4C,0x54,0x5F,0x0A,0x1E, /* 000031B8 "..SLT_.." */ + 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 000031C0 ".\/._SB_" */ + 0x50,0x43,0x49,0x30,0x53,0x31,0x45,0x5F, /* 000031C8 "PCI0S1E_" */ + 0x45,0x56,0x54,0x5F,0xA1,0x1E,0xA0,0x1C, /* 000031D0 "EVT_...." */ + 0x93,0x53,0x4C,0x54,0x5F,0x0A,0x1F,0x86, /* 000031D8 ".SLT_..." */ + 0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F,0x50, /* 000031E0 "\/._SB_P" */ + 0x43,0x49,0x30,0x53,0x31,0x46,0x5F,0x45, /* 000031E8 "CI0S1F_E" */ + 0x56,0x54,0x5F, }; int DsdtLen=sizeof(AmlCode); diff --git a/tools/firmware/hvmloader/acpi/static_tables.c b/tools/firmware/hvmloader/acpi/static_tables.c index ab544e1..e040c56 100644 --- a/tools/firmware/hvmloader/acpi/static_tables.c +++ b/tools/firmware/hvmloader/acpi/static_tables.c @@ -69,7 +69,7 @@ struct acpi_20_fadt Fadt = { .p_lvl3_lat = 0x0fff, /* >1000, means we do not support C3 state */ .iapc_boot_arch = ACPI_8042, .flags = (ACPI_PROC_C1 | ACPI_SLP_BUTTON | - ACPI_WBINVD | ACPI_PWR_BUTTON | + ACPI_WBINVD | ACPI_FIX_RTC | ACPI_TMR_VAL_EXT), .reset_reg = { diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c index 39aa949..7552122 100644 --- a/tools/firmware/hvmloader/hvmloader.c +++ b/tools/firmware/hvmloader/hvmloader.c @@ -88,8 +88,8 @@ asm ( " .align 8 \n" "gdt: \n" " .quad 0x0000000000000000 \n" - " .quad 0x00009a000000ffff \n" /* Ring 0 code, base 0 limit 0xffff */ - " .quad 0x000092000000ffff \n" /* Ring 0 data, base 0 limit 0xffff */ + " .quad 0x008f9a000000ffff \n" /* Ring 0 16b code, base 0 limit 4G */ + " .quad 0x008f92000000ffff \n" /* Ring 0 16b data, base 0 limit 4G */ "gdt_end: \n" " \n" " .bss \n" diff --git a/tools/firmware/rombios/32bitgateway.c b/tools/firmware/rombios/32bitgateway.c index 8b3dffa..d76f194 100644 --- a/tools/firmware/rombios/32bitgateway.c +++ b/tools/firmware/rombios/32bitgateway.c @@ -56,13 +56,13 @@ gdt_entry_pm_32bit_cs: .byte 0x00, 0x9b, 0xcf, 0x00 gdt_entry_pm_16bit_cs: .word 0xffff, 0x0000 - .byte REAL_MODE_CODE_OFFSET >> 16, 0x9b, 0x0, 0x0 + .byte REAL_MODE_CODE_OFFSET >> 16, 0x9b, 0x8f, 0x0 gdt_entry_pm_32bit_ds: .word 0xffff, 0x0000 .byte 0x0, 0x93, 0xcf, 0x0 gdt_entry_pm_16bit_ds: .word 0xffff, 0x0000 - .byte 0x0, 0x93, 0x0, 0x0 + .byte 0x0, 0x93, 0x8f, 0x0 gdt_entry_end: protmode_gdtdesc: diff --git a/tools/hotplug/Linux/network-bridge b/tools/hotplug/Linux/network-bridge index 9d7be4e..954c016 100644 --- a/tools/hotplug/Linux/network-bridge +++ b/tools/hotplug/Linux/network-bridge @@ -106,7 +106,7 @@ get_ip_info() { } do_ifup() { - if ! ifup $1 ; then + if [ $1 != "${netdev}" ] || ! ifup $1 ; then if [ -n "$addr_pfx" ] ; then # use the info from get_ip_info() ip addr flush $1 @@ -223,9 +223,13 @@ op_start () { preiftransfer ${netdev} transfer_addrs ${netdev} ${tdev} + # Remember slaves for bonding interface. + if [ -e /sys/class/net/${netdev}/bonding/slaves ]; then + slaves=`cat /sys/class/net/${netdev}/bonding/slaves` + fi + # Remember the IP details for do_ifup. + get_ip_info ${netdev} if ! ifdown ${netdev}; then - # If ifdown fails, remember the IP details. - get_ip_info ${netdev} ip link set ${netdev} down ip addr flush ${netdev} fi @@ -234,6 +238,11 @@ op_start () { setup_bridge_port ${pdev} + # Restore slaves + if [ -n "${slaves}" ]; then + ip link set ${pdev} up + ifenslave ${pdev} ${slaves} + fi add_to_bridge2 ${bridge} ${pdev} do_ifup ${bridge} diff --git a/tools/hotplug/Linux/network-nat b/tools/hotplug/Linux/network-nat index d9c62c6..aab793d 100644 --- a/tools/hotplug/Linux/network-nat +++ b/tools/hotplug/Linux/network-nat @@ -48,12 +48,16 @@ then fi fi +domain_name=`cat /etc/resolv.conf | grep -v "#" | grep -E 'search|domain' -i | tail -n 1 | awk '{ print $2 }'` +nameserver=`cat /etc/resolv.conf | grep -v "#" | grep "nameserver" -i -m 1 | awk '{ print $2 }'` function dhcp_start() { if ! grep -q "subnet 10.0.0.0" "$dhcpd_conf_file" then - echo >>"$dhcpd_conf_file" "subnet 10.0.0.0 netmask 255.255.0.0 {}" + echo >>"$dhcpd_conf_file" "subnet 10.0.0.0 netmask 255.255.0.0 {\ + option domain-name \"$domain_name\";\ + option domain-name-servers $nameserver; }" fi "$dhcpd_init_file" restart diff --git a/tools/hotplug/Linux/vif-common.sh b/tools/hotplug/Linux/vif-common.sh index ee67ee2..5c1e9c3 100644 --- a/tools/hotplug/Linux/vif-common.sh +++ b/tools/hotplug/Linux/vif-common.sh @@ -68,17 +68,20 @@ frob_iptable() { if [ "$command" == "online" ] then - local c="-A" + local c="-I" else local c="-D" fi iptables "$c" FORWARD -m physdev --physdev-in "$vif" "$@" -j ACCEPT \ - 2>/dev/null || - [ "$c" == "-D" ] || - log err \ - "iptables $c FORWARD -m physdev --physdev-in $vif $@ -j ACCEPT failed. -If you are using iptables, this may affect networking for guest domains." + 2>/dev/null && + iptables "$c" FORWARD -m state --state RELATED,ESTABLISHED -m physdev \ + --physdev-out "$vif" -j ACCEPT 2>/dev/null + + if [ "$command" == "online" ] && [ $? ] + then + log err "iptables setup failed. This may affect guest networking." + fi } diff --git a/tools/hotplug/Linux/xend.rules b/tools/hotplug/Linux/xend.rules index d996555..8cb8295 100644 --- a/tools/hotplug/Linux/xend.rules +++ b/tools/hotplug/Linux/xend.rules @@ -1,3 +1,3 @@ SUBSYSTEM=="pci", RUN+="socket:/org/xen/xend/udev_event" -#SUBSYSTEM=="scsi", RUN+="socket:/org/xen/xend/udev_event" +SUBSYSTEM=="scsi", RUN+="socket:/org/xen/xend/udev_event" #SUBSYSTEM=="net", KERNEL!="vif[0-9]*.[0-9]*|tap[0-9]*.[0-9]*", RUN+="socket:/org/xen/xend/udev_event" diff --git a/tools/libxc/xc_domain_restore.c b/tools/libxc/xc_domain_restore.c index 4f92ebb..32782e1 100644 --- a/tools/libxc/xc_domain_restore.c +++ b/tools/libxc/xc_domain_restore.c @@ -1197,7 +1197,7 @@ int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, * we need to adjust the live_p2m assignment appropriately */ if ( guest_width > sizeof (xen_pfn_t) ) for ( i = p2m_size - 1; i >= 0; i-- ) - ((uint64_t *)live_p2m)[i] = p2m[i]; + ((int64_t *)live_p2m)[i] = (long)p2m[i]; else if ( guest_width < sizeof (xen_pfn_t) ) for ( i = 0; i < p2m_size; i++ ) ((uint32_t *)live_p2m)[i] = p2m[i]; diff --git a/tools/libxc/xc_pagetab.c b/tools/libxc/xc_pagetab.c index 00ee0f8..1a4a3d0 100644 --- a/tools/libxc/xc_pagetab.c +++ b/tools/libxc/xc_pagetab.c @@ -32,7 +32,7 @@ unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom, &ctx, sizeof ctx) != 0) return 0; if (!(ctx.cr0 & CR0_PG)) - return virt; + return virt >> PAGE_SHIFT; pt_levels = (ctx.msr_efer&EFER_LMA) ? 4 : (ctx.cr4&CR4_PAE) ? 3 : 2; paddr = ctx.cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull); } else { diff --git a/tools/libxc/xc_pm.c b/tools/libxc/xc_pm.c index 13342a3..5010410 100644 --- a/tools/libxc/xc_pm.c +++ b/tools/libxc/xc_pm.c @@ -362,3 +362,66 @@ int xc_set_sched_opt_smt(int xc_handle, uint32_t value) return rc; } +int xc_set_vcpu_migration_delay(int xc_handle, uint32_t value) +{ + int rc; + DECLARE_SYSCTL; + + sysctl.cmd = XEN_SYSCTL_pm_op; + sysctl.u.pm_op.cmd = XEN_SYSCTL_pm_op_set_vcpu_migration_delay; + sysctl.u.pm_op.cpuid = 0; + sysctl.u.pm_op.set_vcpu_migration_delay = value; + rc = do_sysctl(xc_handle, &sysctl); + + return rc; +} + +int xc_get_vcpu_migration_delay(int xc_handle, uint32_t *value) +{ + int rc; + DECLARE_SYSCTL; + + sysctl.cmd = XEN_SYSCTL_pm_op; + sysctl.u.pm_op.cmd = XEN_SYSCTL_pm_op_get_vcpu_migration_delay; + sysctl.u.pm_op.cpuid = 0; + rc = do_sysctl(xc_handle, &sysctl); + + if (!rc && value) + *value = sysctl.u.pm_op.get_vcpu_migration_delay; + + return rc; +} + +int xc_get_cpuidle_max_cstate(int xc_handle, uint32_t *value) +{ + int rc; + DECLARE_SYSCTL; + + if ( xc_handle < 0 || !value ) + return -EINVAL; + + sysctl.cmd = XEN_SYSCTL_pm_op; + sysctl.u.pm_op.cmd = XEN_SYSCTL_pm_op_get_max_cstate; + sysctl.u.pm_op.cpuid = 0; + sysctl.u.pm_op.get_max_cstate = 0; + rc = do_sysctl(xc_handle, &sysctl); + *value = sysctl.u.pm_op.get_max_cstate; + + return rc; +} + +int xc_set_cpuidle_max_cstate(int xc_handle, uint32_t value) +{ + DECLARE_SYSCTL; + + if ( xc_handle < 0 ) + return -EINVAL; + + sysctl.cmd = XEN_SYSCTL_pm_op; + sysctl.u.pm_op.cmd = XEN_SYSCTL_pm_op_set_max_cstate; + sysctl.u.pm_op.cpuid = 0; + sysctl.u.pm_op.set_max_cstate = value; + + return do_sysctl(xc_handle, &sysctl); +} + diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h index 9ce2286..c9b1866 100644 --- a/tools/libxc/xenctrl.h +++ b/tools/libxc/xenctrl.h @@ -1261,5 +1261,10 @@ struct xc_get_cputopo { int xc_get_cputopo(int xc_handle, struct xc_get_cputopo *info); int xc_set_sched_opt_smt(int xc_handle, uint32_t value); +int xc_set_vcpu_migration_delay(int xc_handle, uint32_t value); +int xc_get_vcpu_migration_delay(int xc_handle, uint32_t *value); + +int xc_get_cpuidle_max_cstate(int xc_handle, uint32_t *value); +int xc_set_cpuidle_max_cstate(int xc_handle, uint32_t value); #endif /* XENCTRL_H */ diff --git a/tools/misc/Makefile b/tools/misc/Makefile index 12c599c..c309a3f 100644 --- a/tools/misc/Makefile +++ b/tools/misc/Makefile @@ -22,7 +22,7 @@ INSTALL_BIN-y := xencons INSTALL_BIN-$(CONFIG_X86) += xen-detect INSTALL_BIN := $(INSTALL_BIN-y) -INSTALL_SBIN-y := netfix xm xen-bugtool xen-python-path xend xenperf xsview xenpm +INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm INSTALL_SBIN := $(INSTALL_SBIN-y) DEFAULT_PYTHON_PATH := $(shell $(XEN_ROOT)/tools/python/get-path) diff --git a/tools/misc/sbdf2devicepath b/tools/misc/sbdf2devicepath new file mode 100644 index 0000000..690834a --- /dev/null +++ b/tools/misc/sbdf2devicepath @@ -0,0 +1,82 @@ +#!/usr/bin/env python +# -*- mode: python; -*- +#============================================================================ +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU Lesser General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +#============================================================================ +# Copyright (c) 2009, NEC Corporation. +#============================================================================ +# This script converts SBDF into device path. +# 'SBDF' format is "[SEG#:]BUS#:DEV#.FUNC#" +# ex) 0000:0a:1f.3 +# Device path format is "HID[:UID]-DEV#.FUNC#[-DEV#.FUNC#[...]]" +# ex) PNP0A08:0-2.0-0.0 +#============================================================================= + +import sys +import os + +# add fallback path for non-native python path installs if needed +sys.path.append('/usr/lib/python') +sys.path.append('/usr/lib64/python') +from xen.util.pci import * + +SYSFS_ACPI_DEVS_PATH = '/firmware/acpi/namespace/ACPI/_SB' + +def find_hid_uid(dom, b, d, f): + obj_list = os.listdir(sb_path) + for obj in obj_list: + obj_path = sb_path + '/' + obj.strip() + '/' + if os.path.exists(obj_path + 'seg') and \ + os.path.exists(obj_path + 'bbn'): + seg = open(obj_path + 'seg').read() + bbn = open(obj_path + 'bbn').read() + if int(seg) == dom and int(bbn) == b: + hid = open(obj_path + 'hid').read() + if os.path.exists(obj_path + 'uid') is False: + path_str = hid.strip() + else: + uid = open(obj_path + 'uid').read() + path_str = hid.strip() + ':' + uid.strip() + return path_str + return None + +def make_device_path(dom, b, d, f): + dev = PciDevice(dom, b, d, f) + parent = dev.find_parent() + if parent is None: + path_str = find_hid_uid(dom, b, d, f) + path_str = path_str + '-' + hex(d).replace('0x', '') + '.' + \ + hex(f).replace('0x', '') + return path_str + (pdom, pb, pd, pf) = parent + path_str = make_device_path(pdom, pb, pd, pf) + path_str = path_str + '-' + hex(d).replace('0x', '') + '.' + \ + hex(f).replace('0x', '') + return path_str + +# main +if len(sys.argv) <> 2: + print 'Usage: sbdf2devicepath SBDF\n' +else: + sb_path = find_sysfs_mnt() + SYSFS_ACPI_DEVS_PATH + if os.path.exists(sb_path): + path = os.environ['PATH'] + os.environ['PATH'] = path + ':/sbin' + ':/user/sbin' + sbdf = sys.argv[1] + (dom, b, d, f) = parse_pci_name(sbdf) + path_str = make_device_path(dom, b, d, f) + print path_str + else: + print sb_path + ' not found.\n' + print 'This command is only for linux 2.6.18.8 xen kernel.\n' diff --git a/tools/misc/sxp-pretty b/tools/misc/sxp-pretty index 4b8eaed..dd642b0 100644 --- a/tools/misc/sxp-pretty +++ b/tools/misc/sxp-pretty @@ -23,14 +23,6 @@ import os.path import pprint import sys -result = commands.getstatusoutput(os.path.join(os.path.dirname(sys.argv[0]), - 'xen-python-path')) -if result[0] != 0: - print >>sys.stderr, result[1] - sys.exit(1) - -sys.path.append(result[1]) - import xen.xend.sxp as sxp def main(): diff --git a/tools/misc/xen-bugtool b/tools/misc/xen-bugtool index cf41c8c..a3742b4 100644 --- a/tools/misc/xen-bugtool +++ b/tools/misc/xen-bugtool @@ -6,9 +6,6 @@ import sys -sys.path.append('/usr/lib/python') -sys.path.append('/usr/lib64/python') - from xen.util import bugtool diff --git a/tools/misc/xen-python-path b/tools/misc/xen-python-path index 57774a3..073abae 100644 --- a/tools/misc/xen-python-path +++ b/tools/misc/xen-python-path @@ -17,31 +17,8 @@ # Copyright (C) 2007 XenSource Inc. #============================================================================ +# Nowadays we install xen in the standard python site-packages +# directories. This script is still provided for the benefit of old +# out-of-xen-tree callers. It is deprecated and will be removed. -# Use the auxbin module in Xend to determine the correct Python path. We -# take the first installed instance of auxbin that we find, and then run it -# to determine the correct path, appending that to sys.path. - -AUXBIN = 'xen/util/auxbin.py' - -import os -import os.path -import sys - -usr = os.path.dirname(os.path.dirname(sys.argv[0])) -list = [ os.path.join(usr,'lib64') ] -list += [ os.path.join(usr,'lib') ] -list += ['/usr/lib64', '/usr/lib'] - -for l in list: - for p in ['python%s' % sys.version[:3], 'python']: - for k in ['', 'site-packages/']: - d = os.path.join(l, p, k) - if os.path.exists(os.path.join(d, AUXBIN)): - sys.path.append(d) - import xen.util.auxbin - print os.path.join(xen.util.auxbin.libpath(), p) - sys.exit(0) - -print >>sys.stderr, "Cannot find Xen Python modules." -sys.exit(1) +print '/dev/enoent/xen/python-path' diff --git a/tools/misc/xend b/tools/misc/xend index 2cbdf61..4dd550b 100644 --- a/tools/misc/xend +++ b/tools/misc/xend @@ -33,14 +33,6 @@ import signal import time import commands -xpp = os.path.join(os.path.dirname(sys.argv[0]), 'xen-python-path') -if os.path.exists(xpp): - result = commands.getstatusoutput(xpp) - if result[0] != 0: - print >>sys.stderr, result[1] - sys.exit(1) - sys.path.append(result[1]) - from xen.xend.server import SrvDaemon class CheckError(ValueError): diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c index 39eef65..015d80a 100644 --- a/tools/misc/xenpm.c +++ b/tools/misc/xenpm.c @@ -57,6 +57,9 @@ void show_help(void) " it is used in ondemand governor.\n" " get-cpu-topology get thread/core/socket topology info\n" " set-sched-smt enable|disable enable/disable scheduler smt power saving\n" + " set-vcpu-migration-delay set scheduler vcpu migration delay in us\n" + " get-vcpu-migration-delay get scheduler vcpu migration delay\n" + " set-max-cstate set the C-State limitation ( >= 0)\n" " start [seconds] start collect Cx/Px statistics,\n" " output after CTRL-C or SIGINT or several seconds.\n" ); @@ -122,6 +125,18 @@ static int get_cxstat_by_cpuid(int xc_fd, int cpuid, struct xc_cx_stat *cxstat) return 0; } +static int show_max_cstate(int xc_fd) +{ + int ret = 0; + uint32_t value; + + if ( (ret = xc_get_cpuidle_max_cstate(xc_fd, &value)) ) + return ret; + + printf("Max C-state: C%d\n\n", value); + return 0; +} + static int show_cxstat_by_cpuid(int xc_fd, int cpuid) { int ret = 0; @@ -148,6 +163,8 @@ void cxstat_func(int argc, char *argv[]) if ( cpuid >= max_cpu_nr ) cpuid = -1; + show_max_cstate(xc_fd); + if ( cpuid < 0 ) { /* show cxstates on all cpus */ @@ -864,7 +881,72 @@ void set_sched_smt_func(int argc, char *argv[]) rc = xc_set_sched_opt_smt(xc_fd, value); printf("%s sched_smt_power_savings %s\n", argv[0], - rc? "failed":"successeed" ); + rc? "failed":"succeeded" ); + + return; +} + +void set_vcpu_migration_delay_func(int argc, char *argv[]) +{ + int value; + int rc; + + if (argc != 1){ + show_help(); + exit(-1); + } + + value = atoi(argv[0]); + + if (value < 0) + { + printf("Please try non-negative vcpu migration delay\n"); + exit(-1); + } + + rc = xc_set_vcpu_migration_delay(xc_fd, value); + printf("%s to set vcpu migration delay to %d us\n", + rc? "Fail":"Succeed", value ); + + return; +} + +void get_vcpu_migration_delay_func(int argc, char *argv[]) +{ + uint32_t value; + int rc; + + if (argc != 0){ + show_help(); + exit(-1); + } + + rc = xc_get_vcpu_migration_delay(xc_fd, &value); + if (!rc) + { + printf("Schduler vcpu migration delay is %d us\n", value); + } + else + { + printf("Failed to get scheduler vcpu migration delay, errno=%d\n", errno); + } + + return; +} + +void set_max_cstate_func(int argc, char *argv[]) +{ + int value, rc; + + if ( argc != 1 || sscanf(argv[0], "%d", &value) != 1 || value < 0 ) + { + show_help(); + exit(-1); + } + + rc = xc_set_cpuidle_max_cstate(xc_fd, (uint32_t)value); + printf("set max_cstate to C%d %s\n", value, + rc? "failed":"succeeded" ); return; } @@ -886,6 +968,9 @@ struct { { "set-up-threshold", scaling_up_threshold_func }, { "get-cpu-topology", cpu_topology_func}, { "set-sched-smt", set_sched_smt_func}, + { "get-vcpu-migration-delay", get_vcpu_migration_delay_func}, + { "set-vcpu-migration-delay", set_vcpu_migration_delay_func}, + { "set-max-cstate", set_max_cstate_func}, }; int main(int argc, char *argv[]) diff --git a/tools/misc/xm b/tools/misc/xm index 80972cc..f4fd200 100755 --- a/tools/misc/xm +++ b/tools/misc/xm @@ -2,9 +2,6 @@ # -*- mode: python; -*- import sys -# add fallback path for non-native python path installs if needed -sys.path.append('/usr/lib/python') -sys.path.append('/usr/lib64/python') from xen.xm import main main.main(sys.argv) diff --git a/tools/misc/xsview b/tools/misc/xsview index c672655..f926fe4 100644 --- a/tools/misc/xsview +++ b/tools/misc/xsview @@ -2,8 +2,6 @@ import sys -sys.path.append('/usr/lib/python') -sys.path.append('/usr/lib64/python') from xen.xsview import main main.main(sys.argv) diff --git a/tools/pygrub/Makefile b/tools/pygrub/Makefile index 03c8ed9..b8b64a7 100644 --- a/tools/pygrub/Makefile +++ b/tools/pygrub/Makefile @@ -6,19 +6,13 @@ include $(XEN_ROOT)/tools/Rules.mk all: build .PHONY: build build: - CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build + CC="$(CC)" CFLAGS="$(CFLAGS)" $(PYTHON) setup.py build .PHONY: install -ifndef XEN_PYTHON_NATIVE_INSTALL -install: LIBPATH=$(shell PYTHONPATH=../python/xen/util python -c "import auxbin; print auxbin.libpath()") install: all - CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" --install-lib="$(DESTDIR)$(LIBPATH)/python" + CC="$(CC)" CFLAGS="$(CFLAGS)" $(PYTHON) setup.py install \ + $(PYTHON_PREFIX_ARG) --root="$(DESTDIR)" --force $(INSTALL_DIR) $(DESTDIR)/var/run/xend/boot -else -install: all - CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" - $(INSTALL_DIR) $(DESTDIR)/var/run/xend/boot -endif .PHONY: clean clean: diff --git a/tools/pygrub/src/pygrub b/tools/pygrub/src/pygrub index ba70832..095db5e 100644 --- a/tools/pygrub/src/pygrub +++ b/tools/pygrub/src/pygrub @@ -21,8 +21,6 @@ import platform import curses, _curses, curses.wrapper, curses.textpad, curses.ascii import getopt -sys.path = [ '/usr/lib/python', '/usr/lib64/python' ] + sys.path - import fsimage import grub.GrubConf import grub.LiloConf @@ -316,7 +314,11 @@ class Grub: curline = len(img.lines) - 1 if self.isdone: - origimg.reset(img.lines) + # Fix to allow pygrub command-line editing in Lilo bootloader (used by IA64) + if platform.machine() == 'ia64': + origimg.reset(img.lines, img.path) + else: + origimg.reset(img.lines) def edit_line(self, line): self.screen.erase() @@ -532,7 +534,7 @@ def run_grub(file, entry, fs, arg): try: img = g.cf.images[sel] - except: + except IndexError: log.debug("PyGrub: Default selection is not valid, using first boot configuration...") img = g.cf.images[0] diff --git a/tools/python/Makefile b/tools/python/Makefile index 4ae0a2d..16ab59f 100644 --- a/tools/python/Makefile +++ b/tools/python/Makefile @@ -12,11 +12,11 @@ PODIR := xen/xm/messages POTFILE := $(PODIR)/xen-xm.pot I18NSRCFILES = $(shell find xen/xm/ -name '*.py') CATALOGS = $(patsubst %,xen/xm/messages/%.mo,$(LINGUAS)) -NLSDIR = /usr/share/locale +NLSDIR = $(SHAREDIR)/locale .PHONY: build buildpy buildpy: - CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build + CC="$(CC)" CFLAGS="$(CFLAGS)" $(PYTHON) setup.py build build: buildpy refresh-pot refresh-po $(CATALOGS) @@ -54,22 +54,17 @@ refresh-po: $(POTFILE) $(MSGFMT) -c -o $@ $< .PHONY: install -ifndef XEN_PYTHON_NATIVE_INSTALL -install: LIBPATH=$(shell PYTHONPATH=xen/util python -c "import auxbin; print auxbin.libpath()") install: install-messages install-dtd - CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" --force --install-lib="$(DESTDIR)$(LIBPATH)/python" -else -install: install-messages install-dtd - CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" --force -endif + CC="$(CC)" CFLAGS="$(CFLAGS)" $(PYTHON) setup.py install \ + $(PYTHON_PREFIX_ARG) --root="$(DESTDIR)" --force install-dtd: all - $(INSTALL_DIR) $(DESTDIR)/usr/share/xen - $(INSTALL_DATA) xen/xm/create.dtd $(DESTDIR)/usr/share/xen + $(INSTALL_DIR) $(DESTDIR)$(DOCDIR) + $(INSTALL_DATA) xen/xm/create.dtd $(DESTDIR)$(DOCDIR) install-messages: all set -e; if which $(MSGFMT) >/dev/null ; then \ - mkdir -p $(DESTDIR)$(NLSDIR); \ + $(INSTALL_DIR) $(DESTDIR)$(NLSDIR); \ for l in $(LINGUAS); do \ $(INSTALL_DIR) $(DESTDIR)$(NLSDIR)/$$l; \ $(INSTALL_DIR) $(DESTDIR)$(NLSDIR)/$$l/LC_MESSAGES; \ diff --git a/tools/python/README.XendConfig b/tools/python/README.XendConfig index 9776f33..9cc0ead 100644 --- a/tools/python/README.XendConfig +++ b/tools/python/README.XendConfig @@ -25,6 +25,7 @@ name_label name name_description user_version is_a_template +auto_power_on resident_on memory_static_min memory memory_static_max maxmem diff --git a/tools/python/scripts/test_hvm_create.py b/tools/python/scripts/test_hvm_create.py index 35abfe0..5020363 100644 --- a/tools/python/scripts/test_hvm_create.py +++ b/tools/python/scripts/test_hvm_create.py @@ -74,7 +74,6 @@ console_cfg = { import sys import time -sys.path.append('/usr/lib/python') from xapi import connect, execute diff --git a/tools/python/scripts/test_vm_create.py b/tools/python/scripts/test_vm_create.py index 6575f15..9ac8a6e 100644 --- a/tools/python/scripts/test_vm_create.py +++ b/tools/python/scripts/test_vm_create.py @@ -93,7 +93,6 @@ console_cfg = { import sys import time -sys.path.append('/usr/lib/python') from xapi import connect, execute diff --git a/tools/python/scripts/xapi.py b/tools/python/scripts/xapi.py index 1a07795..9530f4a 100644 --- a/tools/python/scripts/xapi.py +++ b/tools/python/scripts/xapi.py @@ -20,7 +20,6 @@ import sys import time import re import os -sys.path.append('/usr/lib/python') from xen.util.xmlrpclib2 import ServerProxy from optparse import * diff --git a/tools/python/xen/util/acmpolicy.py b/tools/python/xen/util/acmpolicy.py index 64978a6..1a95c3a 100644 --- a/tools/python/xen/util/acmpolicy.py +++ b/tools/python/xen/util/acmpolicy.py @@ -216,7 +216,7 @@ ACM_SCHEMA=""" - + diff --git a/tools/python/xen/util/auxbin.py b/tools/python/xen/util/auxbin.py index 75e38a8..e183000 100644 --- a/tools/python/xen/util/auxbin.py +++ b/tools/python/xen/util/auxbin.py @@ -35,7 +35,11 @@ def execute(exe, args = None): a = [ exepath ] if args: a.extend(args) - os.execv(exepath, a) + try: + os.execv(exepath, a) + except OSError, exn: + print exepath, ": ", exn + sys.exit(1) def pathTo(exe): diff --git a/tools/python/xen/util/blkif.py b/tools/python/xen/util/blkif.py index 29b23fb..6091afe 100644 --- a/tools/python/xen/util/blkif.py +++ b/tools/python/xen/util/blkif.py @@ -75,7 +75,7 @@ def _parse_uname(uname): fn = taptype = None if uname.find(":") != -1: (typ, fn) = uname.split(":", 1) - if typ == "phy" and not fn.startswith("/"): + if typ in ("phy", "drbd") and not fn.startswith("/"): fn = "/dev/%s" %(fn,) if typ == "tap": (taptype, fn) = fn.split(":", 1) diff --git a/tools/python/xen/util/pci.py b/tools/python/xen/util/pci.py index bb7720b..a5eb407 100644 --- a/tools/python/xen/util/pci.py +++ b/tools/python/xen/util/pci.py @@ -417,7 +417,10 @@ class PciDevice: def find_the_uppermost_pci_bridge(self): # Find the uppermost PCI/PCI-X bridge - (dom, b, d, f) = self.find_parent() + dev = self.find_parent() + if dev is None: + return None + (dom, b, d, f) = dev dev = dev_parent = PciDevice(dom, b, d, f) while dev_parent.dev_type != DEV_TYPE_PCIe_BRIDGE: parent = dev_parent.find_parent() @@ -463,6 +466,11 @@ class PciDevice: element, the caller itself can remove it explicitly. ''' dev = self.find_the_uppermost_pci_bridge() + + # The 'self' device is on bus0. + if dev is None: + return [self.name] + dev_list = dev.find_all_devices_behind_the_bridge(ignore_bridge) dev_list = re.findall(PCI_DEV_REG_EXPRESS_STR, '%s' % dev_list) return dev_list @@ -559,7 +567,8 @@ class PciDevice: return self.find_all_the_multi_functions() elif self.dev_type == DEV_TYPE_PCI and not self.pci_af_flr: coassigned_pci_list = self.find_coassigned_pci_devices(True) - del coassigned_pci_list[0] + if len(coassigned_pci_list) > 1: + del coassigned_pci_list[0] return coassigned_pci_list else: return [self.name] diff --git a/tools/python/xen/util/vscsi_util.py b/tools/python/xen/util/vscsi_util.py index 3b8d053..8b26117 100644 --- a/tools/python/xen/util/vscsi_util.py +++ b/tools/python/xen/util/vscsi_util.py @@ -36,6 +36,11 @@ SYSFS_SCSI_DEV_TYPEID_PATH = '/type' SYSFS_SCSI_DEV_REVISION_PATH = '/rev' SYSFS_SCSI_DEV_SCSILEVEL_PATH = '/scsi_level' +SCSI_ID_COMMANDS = [ + "/lib/udev/scsi_id -gu --sg-version 3 -d /dev/%s 2>/dev/null", + "/sbin/scsi_id -gu -s /class/scsi_generic/%s 2>/dev/null" +] + def _vscsi_get_devname_by(name, scsi_devices): """A device name is gotten by the HCTL. (e.g., '0:0:0:0' to '/dev/sda') @@ -79,9 +84,10 @@ def _vscsi_get_hctl_by(phyname, scsi_devices): def _vscsi_get_scsiid(sg): - scsi_id = os.popen('/sbin/scsi_id -gu -s /class/scsi_generic/' + sg).read().split() - if len(scsi_id): - return scsi_id[0] + for scsi_id_command in SCSI_ID_COMMANDS: + scsi_id = os.popen(scsi_id_command % sg).read().split() + if len(scsi_id): + return scsi_id[0] return None @@ -225,40 +231,50 @@ def get_scsi_scsilevel(pHCTL): except: return None -def get_all_scsi_devices(): - - scsi_devs = [] +def _make_scsi_record(scsi_info): + scsi_rec = { + 'physical_HCTL': scsi_info[0], + 'dev_name': None, + 'sg_name': scsi_info[2], + 'scsi_id': None + } + if scsi_info[1] is not None: + scsi_rec['dev_name'] = scsi_info[1] + if scsi_info[3] is not None: + scsi_rec['scsi_id'] = scsi_info[3] + + scsi_rec['vendor_name'] = \ + get_scsi_vendor(scsi_rec['physical_HCTL']) + scsi_rec['model'] = \ + get_scsi_model(scsi_rec['physical_HCTL']) + scsi_rec['type_id'] = \ + get_scsi_typeid(scsi_rec['physical_HCTL']) + scsi_rec['revision'] = \ + get_scsi_revision(scsi_rec['physical_HCTL']) + scsi_rec['scsi_level'] = \ + get_scsi_scsilevel(scsi_rec['physical_HCTL']) - for scsi_info in vscsi_get_scsidevices(): - scsi_dev = { - 'physical_HCTL': scsi_info[0], - 'dev_name': None, - 'sg_name': scsi_info[2], - 'scsi_id': None - } - if scsi_info[1] is not None: - scsi_dev['dev_name'] = scsi_info[1] - if scsi_info[3] is not None: - scsi_dev['scsi_id'] = scsi_info[3] - - scsi_dev['vendor_name'] = \ - get_scsi_vendor(scsi_dev['physical_HCTL']) - scsi_dev['model'] = \ - get_scsi_model(scsi_dev['physical_HCTL']) - scsi_dev['type_id'] = \ - get_scsi_typeid(scsi_dev['physical_HCTL']) - scsi_dev['revision'] = \ - get_scsi_revision(scsi_dev['physical_HCTL']) - scsi_dev['scsi_level'] = \ - get_scsi_scsilevel(scsi_dev['physical_HCTL']) + try: + lsscsi_info = os.popen('lsscsi %s 2>/dev/null' % scsi_rec['physical_HCTL']).read().split() + scsi_rec['type'] = lsscsi_info[1] + except: + scsi_rec['type'] = None - try: - lsscsi_info = os.popen('lsscsi %s 2>/dev/null' % scsi_dev['physical_HCTL']).read().split() - scsi_dev['type'] = lsscsi_info[1] - except: - scsi_dev['type'] = None + return scsi_rec - scsi_devs.append(scsi_dev) +def get_scsi_device(pHCTL): + scsis_info = _vscsi_get_scsidevices_by_lsscsi(pHCTL) + if not scsis_info: + scsis_info = _vscsi_get_scsidevices_by_sysfs() + for scsi_info in scsis_info: + if scsi_info[0] == pHCTL: + return _make_scsi_record(scsi_info) + return None - return scsi_devs +def get_all_scsi_devices(): + scsi_records = [] + for scsi_info in vscsi_get_scsidevices(): + scsi_record = _make_scsi_record(scsi_info) + scsi_records.append(scsi_record) + return scsi_records diff --git a/tools/python/xen/web/connection.py b/tools/python/xen/web/connection.py index e507323..3d335d2 100644 --- a/tools/python/xen/web/connection.py +++ b/tools/python/xen/web/connection.py @@ -317,6 +317,8 @@ class SocketDgramListener: def main(self): try: + fcntl.fcntl(self.sock.fileno(), fcntl.F_SETFD, fcntl.FD_CLOEXEC) + while True: try: data = self.sock.recv(BUFFER_SIZE) diff --git a/tools/python/xen/xend/XendAPI.py b/tools/python/xen/xend/XendAPI.py index b4a33c4..07e6b21 100644 --- a/tools/python/xen/xend/XendAPI.py +++ b/tools/python/xen/xend/XendAPI.py @@ -125,7 +125,11 @@ def event_register(session, reg_classes): } if not reg_classes: reg_classes = classes - event_registrations[session]['classes'].union_update(reg_classes) + if hasattr(set, 'union_update'): + event_registrations[session]['classes'].union_update(reg_classes) + else: + event_registrations[session]['classes'].update(reg_classes) + def event_unregister(session, unreg_classes): @@ -1352,6 +1356,9 @@ class XendAPI(object): def VM_get_is_a_template(self, session, ref): return self.VM_get('is_a_template', session, ref) + def VM_get_auto_power_on(self, session, vm_ref): + return self.VM_get('auto_power_on', session, vm_ref) + def VM_get_memory_dynamic_max(self, session, vm_ref): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) return xen_api_success(dom.get_memory_dynamic_max()) @@ -1441,6 +1448,9 @@ class XendAPI(object): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) return xen_api_todo() + def VM_set_auto_power_on(self, session, vm_ref, val): + return self.VM_set('auto_power_on', session, vm_ref, val) + def VM_set_memory_dynamic_max(self, session, vm_ref, mem): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) dom.set_memory_dynamic_max(int(mem)) @@ -1499,8 +1509,7 @@ class XendAPI(object): if key.startswith("cpumap"): vcpu = int(key[6:]) try: - cpus = map(int, value.split(",")) - xendom.domain_pincpu(xeninfo.getDomid(), vcpu, cpus) + xendom.domain_pincpu(xeninfo.getDomid(), vcpu, value) except Exception, ex: log.exception(ex) @@ -1682,7 +1691,7 @@ class XendAPI(object): 'name_description': xeninfo.getName(), 'user_version': 1, 'is_a_template': xeninfo.info['is_a_template'], - 'auto_power_on': False, + 'auto_power_on': xeninfo.info['auto_power_on'], 'resident_on': XendNode.instance().uuid, 'memory_static_min': xeninfo.get_memory_static_min(), 'memory_static_max': xeninfo.get_memory_static_max(), diff --git a/tools/python/xen/xend/XendConfig.py b/tools/python/xen/xend/XendConfig.py index 7bb6255..02b7d91 100644 --- a/tools/python/xen/xend/XendConfig.py +++ b/tools/python/xen/xend/XendConfig.py @@ -187,6 +187,7 @@ XENAPI_CFG_TYPES = { 'name_description': str, 'user_version': str, 'is_a_template': bool0, + 'auto_power_on': bool0, 'resident_on': str, 'memory_static_min': int, # note these are stored in bytes, not KB! 'memory_static_max': int, @@ -361,6 +362,7 @@ class XendConfig(dict): 'actions_after_crash': 'restart', 'actions_after_suspend': '', 'is_a_template': False, + 'auto_power_on': False, 'is_control_domain': False, 'features': '', 'PV_bootloader': '', @@ -1283,7 +1285,7 @@ class XendConfig(dict): dpci_record = { 'VM': self['uuid'], 'PPCI': ppci_uuid, - 'hotplug_slot': pci_dev.get('vslot', 0) + 'hotplug_slot': pci_dev.get('requested_vslot', 0) } dpci_opts = pci_dev.get('opts') @@ -1410,6 +1412,21 @@ class XendConfig(dict): if dev_uuid not in target['console_refs']: target['console_refs'].append(dev_uuid) + # Cope with old-format save files which say under vfb + # (type vfb) rather than (vfb 1) + try: + vfb_type = dev_info['type'] + except KeyError: + vfb_type = None + log.debug("iwj dev_type=%s vfb type %s" % + (dev_type, `vfb_type`)) + + if vfb_type == 'vnc' or vfb_type == 'sdl': + dev_info[vfb_type] = 1 + del dev_info['type'] + log.debug("iwj dev_type=%s vfb setting dev_info['%s']" % + (dev_type, vfb_type)) + elif dev_type == 'console': if 'console_refs' not in target: target['console_refs'] = [] @@ -1600,10 +1617,10 @@ class XendConfig(dict): # extendend like this: # # [device, [pci, [dev, [domain, 0], [bus, 0], [slot, 1], [func, 2], - # [vslt, 0]], + # [vslot, 0]], # [state, 'Initialising']]] # - # 'vslt' shows the virtual hotplug slot number which the PCI device + # 'vslot' shows the virtual hotplug slot number which the PCI device # is inserted in. This is only effective for HVM domains. # # state 'Initialising' indicates that the device is being attached, @@ -1611,7 +1628,7 @@ class XendConfig(dict): # # The Dict looks like this: # - # { devs: [{domain: 0, bus: 0, slot: 1, func: 2, vslt: 0}], + # { devs: [{domain: 0, bus: 0, slot: 1, func: 2, vslot: 0}], # states: ['Initialising'] } dev_config = {} @@ -1830,7 +1847,7 @@ class XendConfig(dict): dpci_record = { 'VM': self['uuid'], 'PPCI': ppci_uuid, - 'hotplug_slot': pci_dev.get('vslot', 0) + 'hotplug_slot': pci_dev.get('requested_vslot', 0) } dpci_opts = pci_dev.get('opts') @@ -1895,6 +1912,7 @@ class XendConfig(dict): for key, val in cfg_xenapi.items(): dev_info[key] = val self['devices'][dev_uuid] = (dev_type, dev_info) + return True return False diff --git a/tools/python/xen/xend/XendConstants.py b/tools/python/xen/xend/XendConstants.py index b1c2957..6775795 100644 --- a/tools/python/xen/xend/XendConstants.py +++ b/tools/python/xen/xend/XendConstants.py @@ -105,13 +105,15 @@ LAST_SHUTDOWN_REASON = 'xend/last_shutdown_reason' TRIGGER_NMI = 0 TRIGGER_RESET = 1 TRIGGER_INIT = 2 -TRIGGER_S3RESUME = 3 +TRIGGER_POWER = 3 +TRIGGER_S3RESUME = 4 TRIGGER_TYPE = { "nmi" : TRIGGER_NMI, "reset" : TRIGGER_RESET, "init" : TRIGGER_INIT, - "s3resume": TRIGGER_S3RESUME + "s3resume": TRIGGER_S3RESUME, + "power": TRIGGER_POWER } # diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/XendDomain.py index b624f78..a8ee276 100644 --- a/tools/python/xen/xend/XendDomain.py +++ b/tools/python/xen/xend/XendDomain.py @@ -1442,6 +1442,7 @@ class XendDomain: # set the same cpumask for all vcpus rc = 0 cpus = dominfo.getCpus() + cpumap = map(int, cpumap.split(",")) for v in vcpus: try: if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py index ea68657..c4492f6 100644 --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -144,6 +144,7 @@ def recreate(info, priv): xeninfo = XendConfig.XendConfig(dominfo = info) xeninfo['is_control_domain'] = priv xeninfo['is_a_template'] = False + xeninfo['auto_power_on'] = False domid = xeninfo['domid'] uuid1 = uuid.fromString(xeninfo['uuid']) needs_reinitialising = False @@ -349,6 +350,8 @@ class XendDomainInfo: @type shutdownWatch: xen.xend.xenstore.xswatch @ivar shutdownStartTime: UNIX Time when domain started shutting down. @type shutdownStartTime: float or None + @ivar restart_in_progress: Is a domain restart thread running? + @type restart_in_progress: bool # @ivar state: Domain state # @type state: enum(DOM_STATE_HALTED, DOM_STATE_RUNNING, ...) @ivar state_updated: lock for self.state @@ -417,6 +420,7 @@ class XendDomainInfo: self.shutdownWatch = None self.shutdownStartTime = None self._resume = resume + self.restart_in_progress = False self.state_updated = threading.Condition() self.refresh_shutdown_lock = threading.Condition() @@ -513,7 +517,6 @@ class XendDomainInfo: if reason not in DOMAIN_SHUTDOWN_REASONS.values(): raise XendError('Invalid reason: %s' % reason) - self._removeVm('xend/previous_restart_time') self.storeDom("control/shutdown", reason) # HVM domain shuts itself down only if it has PV drivers @@ -595,7 +598,7 @@ class XendDomainInfo: #update the vslot info count = 0; for x in pci_devs: - x['vslt'] = slot_list[count] + x['vslot'] = slot_list[count] count += 1 @@ -618,9 +621,9 @@ class XendDomainInfo: pci_conf = self.info['devices'][dev_uuid][1] pci_devs = pci_conf['devs'] for x in pci_devs: - if (int(x['vslt'], 16) == int(new_dev['vslt'], 16) and - int(x['vslt'], 16) != 0 ): - raise VmError("vslot %s already have a device." % (new_dev['vslt'])) + if (int(x['vslot'], 16) == int(new_dev['vslot'], 16) and + int(x['vslot'], 16) != AUTO_PHP_SLOT): + raise VmError("vslot %s already have a device." % (new_dev['vslot'])) if (int(x['domain'], 16) == int(new_dev['domain'], 16) and int(x['bus'], 16) == int(new_dev['bus'], 16) and @@ -633,7 +636,7 @@ class XendDomainInfo: new_dev['bus'], new_dev['slot'], new_dev['func']) - bdf = xc.test_assign_device(self.domid, pci_str) + bdf = xc.test_assign_device(0, pci_str) if bdf != 0: if bdf == -1: raise VmError("failed to assign device: maybe the platform" @@ -684,31 +687,39 @@ class XendDomainInfo: # co-assignment devices hasn't been assigned, or has been assigned to # domN. coassignment_list = pci_device.find_coassigned_devices() - assigned_pci_device_str_list = get_assigned_pci_devices(self.domid) + assigned_pci_device_str_list = self._get_assigned_pci_devices() for pci_str in coassignment_list: (domain, bus, dev, func) = parse_pci_name(pci_str) dev_str = '0x%x,0x%x,0x%x,0x%x' % (domain, bus, dev, func) - if xc.test_assign_device(self.domid, dev_str) == 0: + if xc.test_assign_device(0, dev_str) == 0: continue if not pci_str in assigned_pci_device_str_list: - raise VmError(('pci: failed to pci-attach %s to dom%d" + \ + raise VmError(("pci: failed to pci-attach %s to domain %s" + \ " because one of its co-assignment device %s has been" + \ - " assigned to other domain.' \ - )% (pci_device.name, self.domid, pci_str)) + " assigned to other domain." \ + )% (pci_device.name, self.info['name_label'], pci_str)) - opts = '' - if 'opts' in new_dev and len(new_dev['opts']) > 0: - config_opts = new_dev['opts'] - config_opts = map(lambda (x, y): x+'='+y, config_opts) - opts = ',' + reduce(lambda x, y: x+','+y, config_opts) + if self.domid is not None: + opts = '' + if 'opts' in new_dev and len(new_dev['opts']) > 0: + config_opts = new_dev['opts'] + config_opts = map(lambda (x, y): x+'='+y, config_opts) + opts = ',' + reduce(lambda x, y: x+','+y, config_opts) - bdf_str = "%s:%s:%s.%s%s@%s" % (new_dev['domain'], + bdf_str = "%s:%s:%s.%s@%s%s" % (new_dev['domain'], new_dev['bus'], new_dev['slot'], new_dev['func'], - opts, - new_dev['vslt']) - self.image.signalDeviceModel('pci-ins', 'pci-inserted', bdf_str) + new_dev['vslot'], + opts) + self.image.signalDeviceModel('pci-ins', 'pci-inserted', bdf_str) + + vslot = xstransact.Read("/local/domain/0/device-model/%i/parameter" + % self.getDomid()) + else: + vslot = new_dev['vslot'] + + return vslot def device_create(self, dev_config): @@ -723,6 +734,13 @@ class XendDomainInfo: dev_config_dict = self.info['devices'][dev_uuid][1] log.debug("XendDomainInfo.device_create: %s" % scrub_password(dev_config_dict)) + if dev_type == 'vif': + for x in dev_config: + if x != 'vif' and x[0] == 'mac': + if not re.match('^([0-9a-f]{2}:){5}[0-9a-f]{2}$', x[1], re.I): + log.error("Virtual network interface creation error - invalid MAC Address entered: %s", x[1]) + raise VmError("Cannot create a new virtual network interface - MAC address is not valid!"); + if self.domid is not None: try: dev_config_dict['devid'] = devid = \ @@ -780,72 +798,101 @@ class XendDomainInfo: if self.info.is_hvm(): if pci_state == 'Initialising': # HVM PCI device attachment - self.hvm_pci_device_create(dev_config) - # Update vslt - vslt = xstransact.Read("/local/domain/0/device-model/%i/parameter" - % self.getDomid()) - dev['vslt'] = vslt + vslot = self.hvm_pci_device_create(dev_config) + # Update vslot + dev['vslot'] = vslot for n in sxp.children(pci_dev): - if(n[0] == 'vslt'): - n[1] = vslt + if(n[0] == 'vslot'): + n[1] = vslot else: # HVM PCI device detachment existing_dev_uuid = sxp.child_value(existing_dev_info, 'uuid') existing_pci_conf = self.info['devices'][existing_dev_uuid][1] existing_pci_devs = existing_pci_conf['devs'] - vslt = AUTO_PHP_SLOT_STR + vslot = AUTO_PHP_SLOT_STR for x in existing_pci_devs: if ( int(x['domain'], 16) == int(dev['domain'], 16) and int(x['bus'], 16) == int(dev['bus'], 16) and int(x['slot'], 16) == int(dev['slot'], 16) and int(x['func'], 16) == int(dev['func'], 16) ): - vslt = x['vslt'] + vslot = x['vslot'] break - if vslt == AUTO_PHP_SLOT_STR: + if vslot == AUTO_PHP_SLOT_STR: raise VmError("Device %04x:%02x:%02x.%01x is not connected" % (int(dev['domain'],16), int(dev['bus'],16), int(dev['slot'],16), int(dev['func'],16))) - self.hvm_destroyPCIDevice(int(vslt, 16)) - # Update vslt - dev['vslt'] = vslt + self.hvm_destroyPCIDevice(int(vslot, 16)) + # Update vslot + dev['vslot'] = vslot for n in sxp.children(pci_dev): - if(n[0] == 'vslt'): - n[1] = vslt + if(n[0] == 'vslot'): + n[1] = vslot # If pci platform does not exist, create and exit. if existing_dev_info is None: self.device_create(dev_sxp) return True - # use DevController.reconfigureDevice to change device config - dev_control = self.getDeviceController(dev_class) - dev_uuid = dev_control.reconfigureDevice(devid, dev_config) - if not self.info.is_hvm(): - # in PV case, wait until backend state becomes connected. - dev_control.waitForDevice_reconfigure(devid) - num_devs = dev_control.cleanupDevice(devid) + if self.domid is not None: + # use DevController.reconfigureDevice to change device config + dev_control = self.getDeviceController(dev_class) + dev_uuid = dev_control.reconfigureDevice(devid, dev_config) + if not self.info.is_hvm(): + # in PV case, wait until backend state becomes connected. + dev_control.waitForDevice_reconfigure(devid) + num_devs = dev_control.cleanupDevice(devid) - # update XendConfig with new device info - if dev_uuid: - new_dev_sxp = dev_control.configuration(devid) + # update XendConfig with new device info + if dev_uuid: + new_dev_sxp = dev_control.configuration(devid) + self.info.device_update(dev_uuid, new_dev_sxp) + + # If there is no device left, destroy pci and remove config. + if num_devs == 0: + if self.info.is_hvm(): + self.destroyDevice('pci', devid, True) + del self.info['devices'][dev_uuid] + platform = self.info['platform'] + orig_dev_num = len(platform['pci']) + # TODO: can use this to keep some info to ask high level + # management tools to hot insert a new passthrough dev + # after migration + if orig_dev_num != 0: + #platform['pci'] = ["%dDEVs" % orig_dev_num] + platform['pci'] = [] + else: + self.destroyDevice('pci', devid) + del self.info['devices'][dev_uuid] + else: + new_dev_sxp = ['pci'] + for cur_dev in sxp.children(existing_dev_info, 'dev'): + if pci_state == 'Closing': + if int(dev['domain'], 16) == int(sxp.child_value(cur_dev, 'domain'), 16) and \ + int(dev['bus'], 16) == int(sxp.child_value(cur_dev, 'bus'), 16) and \ + int(dev['slot'], 16) == int(sxp.child_value(cur_dev, 'slot'), 16) and \ + int(dev['func'], 16) == int(sxp.child_value(cur_dev, 'func'), 16): + continue + new_dev_sxp.append(cur_dev) + + if pci_state == 'Initialising': + for new_dev in sxp.children(dev_sxp, 'dev'): + new_dev_sxp.append(new_dev) + + dev_uuid = sxp.child_value(existing_dev_info, 'uuid') self.info.device_update(dev_uuid, new_dev_sxp) - # If there is no device left, destroy pci and remove config. - if num_devs == 0: - if self.info.is_hvm(): - self.destroyDevice('pci', devid, True) - del self.info['devices'][dev_uuid] - platform = self.info['platform'] - orig_dev_num = len(platform['pci']) - # TODO: can use this to keep some info to ask high level - # management tools to hot insert a new passthrough dev - # after migration - if orig_dev_num != 0: - #platform['pci'] = ["%dDEVs" % orig_dev_num] - platform['pci'] = [] - else: - self.destroyDevice('pci', devid) + # If there is only 'vscsi' in new_dev_sxp, remove the config. + if len(sxp.children(new_dev_sxp, 'dev')) == 0: del self.info['devices'][dev_uuid] + if self.info.is_hvm(): + platform = self.info['platform'] + orig_dev_num = len(platform['pci']) + # TODO: can use this to keep some info to ask high level + # management tools to hot insert a new passthrough dev + # after migration + if orig_dev_num != 0: + #platform['pci'] = ["%dDEVs" % orig_dev_num] + platform['pci'] = [] xen.xend.XendDomain.instance().managed_config_save(self) @@ -1038,15 +1085,15 @@ class XendDomainInfo: #find the pass-through device with the virtual slot devnum = 0 for x in pci_conf['devs']: - if int(x['vslt'], 16) == vslot: + if int(x['vslot'], 16) == vslot: break devnum += 1 if devnum >= pci_len: raise VmError("Device @ vslot 0x%x doesn't exist." % (vslot)) - if vslot == 0: - raise VmError("Device @ vslot 0x%x do not support hotplug." % (vslot)) + if vslot == AUTO_PHP_SLOT: + raise VmError("Device @ vslot 0x%x doesn't support hotplug." % (vslot)) # Check the co-assignment. # To pci-detach a device D from domN, we should ensure: for each DD in the @@ -1064,19 +1111,20 @@ class XendDomainInfo: "parse it's resources - "+str(e)) coassignment_list = pci_device.find_coassigned_devices() coassignment_list.remove(pci_device.name) - assigned_pci_device_str_list = get_assigned_pci_devices(self.domid) + assigned_pci_device_str_list = self._get_assigned_pci_devices() for pci_str in coassignment_list: if pci_str in assigned_pci_device_str_list: - raise VmError(('pci: failed to pci-detach %s from dom%d" + \ + raise VmError(("pci: failed to pci-detach %s from domain %s" + \ " because one of its co-assignment device %s is still " + \ - " assigned to the domain.' \ - )% (pci_device.name, self.domid, pci_str)) + " assigned to the domain." \ + )% (pci_device.name, self.info['name_label'], pci_str)) bdf_str = "%s:%s:%s.%s" % (x['domain'], x['bus'], x['slot'], x['func']) log.info("hvm_destroyPCIDevice:%s:%s!", x, bdf_str) - self.image.signalDeviceModel('pci-rem', 'pci-removed', bdf_str) + if self.domid is not None: + self.image.signalDeviceModel('pci-rem', 'pci-removed', bdf_str) return 0 @@ -1226,6 +1274,26 @@ class XendDomainInfo: return dev_info return None + def _get_assigned_pci_devices(self, devid = 0): + if self.domid is not None: + return get_assigned_pci_devices(self.domid) + + dev_str_list = [] + dev_info = self._getDeviceInfo_pci(devid) + if dev_info is None: + return dev_str_list + dev_uuid = sxp.child_value(dev_info, 'uuid') + pci_conf = self.info['devices'][dev_uuid][1] + pci_devs = pci_conf['devs'] + for pci_dev in pci_devs: + domain = int(pci_dev['domain'], 16) + bus = int(pci_dev['bus'], 16) + slot = int(pci_dev['slot'], 16) + func = int(pci_dev['func'], 16) + dev_str = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func) + dev_str_list = dev_str_list + [dev_str] + return dev_str_list + def setMemoryTarget(self, target): """Set the memory target of this domain. @param target: In MiB. @@ -1597,9 +1665,6 @@ class XendDomainInfo: # convert two lists into a python dictionary vm_details = dict(zip(cfg_vm, vm_details)) - if vm_details['rtc/timeoffset'] == None: - vm_details['rtc/timeoffset'] = "0" - for arg, val in vm_details.items(): if arg in XendConfig.LEGACY_CFG_TO_XENAPI_CFG: xapiarg = XendConfig.LEGACY_CFG_TO_XENAPI_CFG[arg] @@ -1621,15 +1686,21 @@ class XendDomainInfo: self.info.update_with_image_sxp(sxp.from_string(image_sxp)) changed = True - # Check if the rtc offset has changes - if vm_details.get("rtc/timeoffset", "0") != self.info["platform"].get("rtc_timeoffset", "0"): - self.info["platform"]["rtc_timeoffset"] = vm_details.get("rtc/timeoffset", 0) - changed = True + # Update the rtc_timeoffset to be preserved across reboot. + # NB. No need to update xenstore domain section. + val = int(vm_details.get("rtc/timeoffset", 0)) + self.info["platform"]["rtc_timeoffset"] = val if changed: # Update the domain section of the store, as this contains some # parameters derived from the VM configuration. - self._storeDomDetails() + self.refresh_shutdown_lock.acquire() + try: + state = self._stateGet() + if state not in (DOM_STATE_SHUTDOWN, DOM_STATE_HALTED,): + self._storeDomDetails() + finally: + self.refresh_shutdown_lock.release() return 1 @@ -1861,7 +1932,8 @@ class XendDomainInfo: finally: self.refresh_shutdown_lock.release() - if restart_reason: + if restart_reason and not self.restart_in_progress: + self.restart_in_progress = True threading.Thread(target = self._maybeRestart, args = (restart_reason,)).start() @@ -1932,20 +2004,13 @@ class XendDomainInfo: old_domid = self.domid self._writeVm(RESTART_IN_PROGRESS, 'True') - now = time.time() - rst = self._readVm('xend/previous_restart_time') - if rst: - rst = float(rst) - timeout = now - rst - if timeout < MINIMUM_RESTART_TIME: - log.error( - 'VM %s restarting too fast (%f seconds since the last ' - 'restart). Refusing to restart to avoid loops.', - self.info['name_label'], timeout) - self.destroy() - return - - self._writeVm('xend/previous_restart_time', str(now)) + elapse = time.time() - self.info['start_time'] + if elapse < MINIMUM_RESTART_TIME: + log.error('VM %s restarting too fast (Elapsed time: %f seconds). ' + 'Refusing to restart to avoid loops.', + self.info['name_label'], elapse) + self.destroy() + return prev_vm_xend = self._listRecursiveVm('xend') new_dom_info = self.info @@ -2230,6 +2295,7 @@ class XendDomainInfo: log.debug('XendDomainInfo.constructDomain') self.shutdownStartTime = None + self.restart_in_progress = False hap = 0 hvm = self.info.is_hvm() @@ -2245,8 +2311,9 @@ class XendDomainInfo: # There is an implicit memory overhead for any domain creation. This # overhead is greater for some types of domain than others. For # example, an x86 HVM domain will have a default shadow-pagetable - # allocation of 1MB. We free up 2MB here to be on the safe side. - balloon.free(2*1024, self) # 2MB should be plenty + # allocation of 1MB. We free up 4MB here to be on the safe side. + # 2MB memory allocation was not enough in some cases, so it's 4MB now + balloon.free(4*1024, self) # 4MB should be plenty ssidref = 0 if security.on() == xsconstants.XS_POLICY_USE: @@ -2305,6 +2372,21 @@ class XendDomainInfo: # Set maximum number of vcpus in domain xc.domain_max_vcpus(self.domid, int(self.info['VCPUs_max'])) + # Check for cpu_{cap|weight} validity for credit scheduler + if XendNode.instance().xenschedinfo() == 'credit': + cap = self.getCap() + weight = self.getWeight() + + assert type(weight) == int + assert type(cap) == int + + if weight < 1 or weight > 65535: + raise VmError("Cpu weight out of range, valid values are within range from 1 to 65535") + + if cap < 0 or cap > self.getVCpuCount() * 100: + raise VmError("Cpu cap out of range, valid range is from 0 to %s for specified number of vcpus" % + (self.getVCpuCount() * 100)) + # Test whether the devices can be assigned with VT-d pci = self.info["platform"].get("pci") pci_str = '' @@ -2312,7 +2394,7 @@ class XendDomainInfo: pci = map(lambda x: x[0:4], pci) # strip options pci_str = str(pci) if hvm and pci_str: - bdf = xc.test_assign_device(self.domid, pci_str) + bdf = xc.test_assign_device(0, pci_str) if bdf != 0: if bdf == -1: raise VmError("failed to assign device: maybe the platform" @@ -2416,12 +2498,6 @@ class XendDomainInfo: self._configureBootloader() try: - if self.info['platform'].get('localtime', 0): - if time.localtime(time.time())[8]: - self.info['platform']['rtc_timeoffset'] = -time.altzone - else: - self.info['platform']['rtc_timeoffset'] = -time.timezone - self.image = image.create(self, self.info) # repin domain vcpus if a restricted cpus list is provided @@ -3645,7 +3721,7 @@ class XendDomainInfo: ['bus', '0x%02x' % ppci.get_bus()], ['slot', '0x%02x' % ppci.get_slot()], ['func', '0x%1x' % ppci.get_func()], - ['vslt', '0x%02x' % xenapi_pci.get('hotplug_slot')], + ['vslot', '0x%02x' % xenapi_pci.get('hotplug_slot')], ['opts', dpci_opts], ['uuid', dpci_uuid] ], diff --git a/tools/python/xen/xend/XendNode.py b/tools/python/xen/xend/XendNode.py index 61779fa..d1c4055 100644 --- a/tools/python/xen/xend/XendNode.py +++ b/tools/python/xen/xend/XendNode.py @@ -363,6 +363,8 @@ class XendNode: ppci_uuid = saved_ppci_table.get(pci_dev.name, uuid.createString()) XendPPCI(ppci_uuid, ppci_record) + self.save_PPCIs() + def remove_PPCI(self, pci_name): # Update lspci info @@ -373,15 +375,41 @@ class XendNode: ppci_ref = XendPPCI.get_by_sbdf(domain, bus, slot, func) XendAPIStore.get(ppci_ref, "PPCI").destroy() + self.save_PPCIs() - def add_PSCSI(self): - # TODO - log.debug("add_network(): Not implemented.") + def add_PSCSI(self, add_HCTL): + saved_pscsis = self.state_store.load_state('pscsi') + saved_pscsi_table = {} + if saved_pscsis: + for saved_uuid, saved_record in saved_pscsis.items(): + try: + saved_pscsi_table[saved_record['scsi_id']] = saved_uuid + except KeyError: + pass + + # Initialise the PSCSI + pscsi_record = vscsi_util.get_scsi_device(add_HCTL) + if pscsi_record and pscsi_record['scsi_id']: + pscsi_uuid = saved_pscsi_table.get(pscsi_record['scsi_id'], None) + if pscsi_uuid is None: + pscsi_uuid = uuid.createString() + XendPSCSI(pscsi_uuid, pscsi_record) + self.save_PSCSIs() - def remove_PSCSI(self): - # TODO - log.debug("add_network(): Not implemented.") + + def remove_PSCSI(self, rem_HCTL): + saved_pscsis = self.state_store.load_state('pscsi') + if not saved_pscsis: + return + + # Remove the PSCSI + for pscsi_record in saved_pscsis.values(): + if rem_HCTL == pscsi_record['physical_HCTL']: + pscsi_ref = XendPSCSI.get_by_HCTL(rem_HCTL) + XendAPIStore.get(pscsi_ref, "PSCSI").destroy() + self.save_PSCSIs() + return ## def network_destroy(self, net_uuid): @@ -802,6 +830,43 @@ class XendNode: return [[k, info[k]] for k in ITEM_ORDER] + + def pciinfo(self): + # Each element of dev_list is a PciDevice + dev_list = PciUtil.find_all_devices_owned_by_pciback() + + # Each element of devs_list is a list of PciDevice + devs_list = PciUtil.check_FLR_capability(dev_list) + + devs_list = PciUtil.check_mmio_bar(devs_list) + + # Check if the devices have been assigned to guests. + final_devs_list = [] + for dev_list in devs_list: + available = True + for d in dev_list: + pci_str = '0x%x,0x%x,0x%x,0x%x' %(d.domain, d.bus, d.slot, d.func) + # Xen doesn't care what the domid is, so we pass 0 here... + domid = 0 + bdf = self.xc.test_assign_device(domid, pci_str) + if bdf != 0: + available = False + break + if available: + final_devs_list = final_devs_list + [dev_list] + + pci_sxp_list = [] + for dev_list in final_devs_list: + for d in dev_list: + pci_sxp = ['dev', ['domain', '0x%04x' % d.domain], + ['bus', '0x%02x' % d.bus], + ['slot', '0x%02x' % d.slot], + ['func', '0x%x' % d.func]] + pci_sxp_list.append(pci_sxp) + + return pci_sxp_list + + def xenschedinfo(self): sched_id = self.xc.sched_id_get() if sched_id == xen.lowlevel.xc.XEN_SCHEDULER_SEDF: diff --git a/tools/python/xen/xend/image.py b/tools/python/xen/xend/image.py index 04689c3..411936d 100644 --- a/tools/python/xen/xend/image.py +++ b/tools/python/xen/xend/image.py @@ -119,9 +119,14 @@ class ImageHandler: self.vncconsole = int(vmConfig['platform'].get('vncconsole', 0)) self.dmargs = self.parseDeviceModelArgs(vmConfig) self.pid = None - rtc_timeoffset = vmConfig['platform'].get('rtc_timeoffset') - if rtc_timeoffset is not None: - xc.domain_set_time_offset(self.vm.getDomid(), int(rtc_timeoffset)) + rtc_timeoffset = int(vmConfig['platform'].get('rtc_timeoffset', 0)) + if vmConfig['platform'].get('localtime', 0): + if time.localtime(time.time())[8]: + rtc_timeoffset -= time.altzone + else: + rtc_timeoffset -= time.timezone + if rtc_timeoffset != 0: + xc.domain_set_time_offset(self.vm.getDomid(), rtc_timeoffset) self.cpuid = None self.cpuid_check = None @@ -286,8 +291,8 @@ class ImageHandler: if int(dev_info.get('sdl', 0)) != 0 : has_sdl = True if has_sdl: - self.display = dev_info.get('display', {}) - self.xauthority = dev_info.get('xauthority', {}) + self.display = dev_info.get('display', self.display) + self.xauthority = dev_info.get('xauthority', self.xauthority) opengl = int(dev_info.get('opengl', opengl)) if has_vnc: vnc_config = dev_info.get('other_config', {}) @@ -488,7 +493,10 @@ class ImageHandler: def _dmfailed(self, message): log.warning("domain %s: %s", self.vm.getName(), message) - xc.domain_shutdown(self.vm.getDomid(), DOMAIN_CRASH) + try: + xc.domain_shutdown(self.vm.getDomid(), DOMAIN_CRASH) + except: + pass def recreate(self): if self.device_model is None: @@ -526,8 +534,8 @@ class ImageHandler: try: self.sentinel_fifo.read(1) except OSError, e: pass self.sentinel_lock.acquire() - try: - if self.pid: + if self.pid: + try: (p,st) = os.waitpid(self.pid, os.WNOHANG) if p == self.pid: message = oshelp.waitstatus_description(st) @@ -539,23 +547,23 @@ class ImageHandler: except: message = "malfunctioning or died ?" message = "pid %d: %s" % (self.pid, message) - else: - message = "no longer running" - except Exception, e: - message = "waitpid failed: %s" % utils.exception_string(e) - message = "device model failure: %s" % message - try: message += "; see %s " % self.logfile - except: pass - self._dmfailed(message) - self.pid = None + except Exception, e: + message = "waitpid failed: %s" % utils.exception_string(e) + message = "device model failure: %s" % message + try: message += "; see %s " % self.logfile + except: pass + self._dmfailed(message) + self.pid = None + else: + log.info("%s device model terminated", self.vm.getName()) self.sentinel_lock.release() def destroyDeviceModel(self): if self.device_model is None: return - if self.pid: - self.sentinel_lock.acquire() - try: + self.sentinel_lock.acquire() + try: + if self.pid: try: os.kill(self.pid, signal.SIGHUP) except OSError, exn: @@ -584,22 +592,22 @@ class ImageHandler: except OSError: # This happens if the process doesn't exist. pass - state = xstransact.Remove("/local/domain/0/device-model/%i" - % self.vm.getDomid()) - finally: - self.pid = None - self.sentinel_lock.release() + finally: + self.pid = None + self.sentinel_lock.release() - try: - os.unlink('/var/run/tap/qemu-read-%d' % self.vm.getDomid()) - os.unlink('/var/run/tap/qemu-write-%d' % self.vm.getDomid()) - except: - pass - try: - del sentinel_fifos_inuse[self.sentinel_path_fifo] - os.unlink(self.sentinel_path_fifo) - except: - pass + state = xstransact.Remove("/local/domain/0/device-model/%i" + % self.vm.getDomid()) + try: + os.unlink('/var/run/tap/qemu-read-%d' % self.vm.getDomid()) + os.unlink('/var/run/tap/qemu-write-%d' % self.vm.getDomid()) + except: + pass + try: + del sentinel_fifos_inuse[self.sentinel_path_fifo] + os.unlink(self.sentinel_path_fifo) + except: + pass def setCpuid(self): xc.domain_set_policy_cpuid(self.vm.getDomid()) @@ -721,7 +729,12 @@ class HVMImageHandler(ImageHandler): if not self.display : self.display = '' - self.vm.storeVm(("image/dmargs", " ".join(self.dmargs)), + # Do not store sdl, opengl and serial related qemu cli options + self.vm.storeVm(("image/dmargs", " ".join([ x for x in self.dmargs + if x != "-sdl" + and x != "-disable-opengl" + and x != "-serial" + and x != "pty" ])), ("image/device-model", self.device_model), ("image/display", self.display)) self.vm.permissionsVm("image/dmargs", { 'dom': self.vm.getDomid(), 'read': True } ) @@ -778,6 +791,14 @@ class HVMImageHandler(ImageHandler): if v: ret.append("-%s" % a) except (ValueError, TypeError): pass # if we can't convert it to a sane type, ignore it + elif a == 'serial': + if v: + if type(v) == str: + v = [v] + for s in v: + if s: + ret.append("-serial") + ret.append("%s" % s) else: if v: ret.append("-%s" % a) diff --git a/tools/python/xen/xend/server/XMLRPCServer.py b/tools/python/xen/xend/server/XMLRPCServer.py index 96c0ac2..fb9bdfe 100644 --- a/tools/python/xen/xend/server/XMLRPCServer.py +++ b/tools/python/xen/xend/server/XMLRPCServer.py @@ -198,7 +198,8 @@ class XMLRPCServer: self.server.register_function(fn, "xend.domain.%s" % name[7:]) # Functions in XendNode and XendDmesg - for type, lst, n in [(XendNode, ['info', 'send_debug_keys'], 'node'), + for type, lst, n in [(XendNode, ['info', 'pciinfo', 'send_debug_keys'], + 'node'), (XendDmesg, ['info', 'clear'], 'node.dmesg')]: inst = type.instance() for name in lst: diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py index e6ba4bc..7820fd0 100644 --- a/tools/python/xen/xend/server/pciif.py +++ b/tools/python/xen/xend/server/pciif.py @@ -71,11 +71,15 @@ class PciController(DevController): pcidevid = 0 vslots = "" for pci_config in config.get('devs', []): + vslot = pci_config.get('vslot') + if vslot is not None: + vslots = vslots + vslot + ";" + domain = parse_hex(pci_config.get('domain', 0)) bus = parse_hex(pci_config.get('bus', 0)) slot = parse_hex(pci_config.get('slot', 0)) func = parse_hex(pci_config.get('func', 0)) - vslot = parse_hex(pci_config.get('vslot', 0)) + requested_vslot = parse_hex(pci_config.get('requested_vslot', 0)) opts = pci_config.get('opts', '') if len(opts) > 0: @@ -83,13 +87,10 @@ class PciController(DevController): opts = reduce(lambda x, y: x+','+y, opts) back['opts-%i' % pcidevid] = opts - vslt = pci_config.get('vslt') - if vslt is not None: - vslots = vslots + vslt + ";" - back['dev-%i' % pcidevid] = "%04x:%02x:%02x.%01x" % \ (domain, bus, slot, func) back['uuid-%i' % pcidevid] = pci_config.get('uuid', '') + back['vslot-%i' % pcidevid] = "%02x" % requested_vslot pcidevid += 1 if vslots != "": @@ -169,9 +170,9 @@ class PciController(DevController): # Update vslots if back.get('vslots') is not None: vslots = old_vslots - for vslt in back['vslots'].split(';'): - if vslt != '': - vslots = vslots.replace(vslt + ';', '', 1) + for vslot in back['vslots'].split(';'): + if vslot != '': + vslots = vslots.replace(vslot + ';', '', 1) if vslots == '': self.removeBackend(devid, 'vslots') else: @@ -218,9 +219,9 @@ class PciController(DevController): #append vslot info if vslots is not None: try: - dev_dict['vslt'] = slot_list[i] + dev_dict['vslot'] = slot_list[i] except IndexError: - dev_dict['vslt'] = AUTO_PHP_SLOT_STR + dev_dict['vslot'] = AUTO_PHP_SLOT_STR pci_devs.append(dev_dict) @@ -453,7 +454,7 @@ class PciController(DevController): for (domain, bus, slot, func) in pci_dev_list: self.setupOneDevice(domain, bus, slot, func) wPath = '/local/domain/0/backend/pci/%u/0/aerState' % (self.getDomid()) - self.aerStatePath = xswatch(wPath, self._handleAerStateWatch) + self.aerStateWatch = xswatch(wPath, self._handleAerStateWatch) log.debug('pci: register aer watch %s', wPath) return @@ -488,13 +489,16 @@ class PciController(DevController): "bind your slot/device to the PCI backend using sysfs" \ )%(dev.name)) - if not self.vm.info.is_hvm(): - pci_str = "0x%x, 0x%x, 0x%x, 0x%x" % (domain, bus, slot, func) - bdf = xc.deassign_device(fe_domid, pci_str) - if bdf > 0: - raise VmError("Failed to deassign device from IOMMU (%x:%x.%x)" - % (bus, slot, func)) - log.debug("pci: deassign device %x:%x.%x" % (bus, slot, func)) + # Need to do FLR here before deassign device in order to terminate + # DMA transaction, etc + dev.do_FLR() + + pci_str = "0x%x, 0x%x, 0x%x, 0x%x" % (domain, bus, slot, func) + bdf = xc.deassign_device(fe_domid, pci_str) + if bdf > 0: + raise VmError("Failed to deassign device from IOMMU (%x:%x.%x)" + % (bus, slot, func)) + log.debug("pci: Deassign device %x:%x.%x" % (bus, slot, func)) for (start, size) in dev.ioports: log.debug('pci: disabling ioport 0x%x/0x%x'%(start,size)) @@ -527,7 +531,6 @@ class PciController(DevController): if rc<0: raise VmError(('pci: failed to configure irq on device '+ '%s - errno=%d')%(dev.name,rc)) - dev.do_FLR() def cleanupDevice(self, devid): """ Detach I/O resources for device and cleanup xenstore nodes @@ -589,7 +592,7 @@ class PciController(DevController): def destroyDevice(self, devid, force): DevController.destroyDevice(self, devid, True) log.debug('pci: unregister aer watch') - self.unwatchAerState + self.unwatchAerState() def unwatchAerState(self): """Remove the watch on the domain's aerState node, if any.""" diff --git a/tools/python/xen/xend/server/udevevent.py b/tools/python/xen/xend/server/udevevent.py index b7ce26f..b2b9f09 100644 --- a/tools/python/xen/xend/server/udevevent.py +++ b/tools/python/xen/xend/server/udevevent.py @@ -40,13 +40,25 @@ class UdevEventProtocol(protocol.Protocol): log.info("Removing pci device %s", pci_name) XendNode.instance().remove_PPCI(pci_name) - elif (udev_event.get('SUBSYSTEMS', None) == 'scsi'): + elif (udev_event.get('SUBSYSTEM', None) == 'scsi'): + hctl = None + devpath = udev_event.get('DEVPATH', None) + if devpath: + hctl = devpath.split('/')[-1] + if len(hctl.split(':')) != 4: + hctl = None + if hctl is None: + # By any possibility, if an HCTL isn't gotten from + # the udev event, the udev event is ignored. + log.warn("Invalid udev event about scsi received") + return + if (udev_event['ACTION'] == 'add'): - log.info("Adding scsi device") - XendNode.instance().add_PSCSI() + log.info("Adding scsi device %s", hctl) + XendNode.instance().add_PSCSI(hctl) elif (udev_event['ACTION'] == 'remove'): - log.info("Removing scci device") - XendNode.instance().remove_PSCSI() + log.info("Removing scsi device %s", hctl) + XendNode.instance().remove_PSCSI(hctl) elif (udev_event.get('SUBSYSTEM', None) == 'net'): interface = udev_event.get('INTERFACE', None) diff --git a/tools/python/xen/xend/server/vfbif.py b/tools/python/xen/xend/server/vfbif.py index 6f049d3..d1e99ef 100644 --- a/tools/python/xen/xend/server/vfbif.py +++ b/tools/python/xen/xend/server/vfbif.py @@ -6,7 +6,7 @@ import xen.xend import os CONFIG_ENTRIES = ['type', 'vncdisplay', 'vnclisten', 'vncpasswd', 'vncunused', - 'videoram', 'display', 'xauthority', 'keymap', + 'videoram', 'display', 'xauthority', 'keymap', 'vnc', 'sdl', 'uuid', 'location', 'protocol', 'opengl'] class VfbifController(DevController): diff --git a/tools/python/xen/xm/create.dtd b/tools/python/xen/xm/create.dtd index d3010b6..693d27e 100644 --- a/tools/python/xen/xm/create.dtd +++ b/tools/python/xen/xm/create.dtd @@ -89,7 +89,7 @@ slot CDATA #REQUIRED func CDATA #REQUIRED opts_str CDATA #IMPLIED - vslt CDATA #IMPLIED> + vslot CDATA #IMPLIED> 0: config_pci_bdf.append(['opts', config_pci_opts]) @@ -1057,7 +1058,7 @@ def preprocess_pci(vals): r"(?P[0-9a-fA-F]{1,2})[:,]" + \ r"(?P[0-9a-fA-F]{1,2})[.,]" + \ r"(?P[0-7])" + \ - r"(@(?P[0-9a-fA-F]))?" + \ + r"(@(?P[01]?[0-9a-fA-F]))?" + \ r"(,(?P.*))?$", \ pci_dev_str) if pci_match!=None: @@ -1081,7 +1082,7 @@ def preprocess_vscsi(vals): if not vals.vscsi: return scsi = [] for scsi_str in vals.vscsi: - d = scsi_str.split(',') + d = [tmp.strip() for tmp in scsi_str.split(',')] n = len(d) if n == 2: tmp = d[1].split(':') diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py index 46cf8e4..a460bc8 100644 --- a/tools/python/xen/xm/main.py +++ b/tools/python/xen/xm/main.py @@ -58,13 +58,6 @@ from xen.util.acmpolicy import ACM_LABEL_UNLABELED_DISPLAY import XenAPI -import xen.lowlevel.xc -try: - xc = xen.lowlevel.xc.xc() -except Exception, ex: - print >>sys.stderr, ("Is xen kernel running?") - sys.exit(1) - import inspect from xen.xend import XendOptions xoptions = XendOptions.instance() @@ -158,7 +151,7 @@ SUBCOMMAND_HELP = { 'Get/set credit scheduler parameters.'), 'sysrq' : (' ', 'Send a sysrq to a domain.'), 'debug-keys' : ('', 'Send debug keys to Xen.'), - 'trigger' : (' []', + 'trigger' : (' []', 'Send a trigger to a domain.'), 'vcpu-list' : ('[Domain, ...]', 'List the VCPUs for all/some domains.'), @@ -1386,7 +1379,7 @@ def xm_vcpu_pin(args): else: cpus.append(int(c)) cpus.sort() - return cpus + return ",".join(map(str, cpus)) dom = args[0] vcpu = args[1] @@ -1396,9 +1389,8 @@ def xm_vcpu_pin(args): cpumap = cpu_make_map(args[2]) if serverType == SERVER_XEN_API: - cpumap = map(str, cpumap) server.xenapi.VM.add_to_VCPUs_params_live( - get_single_vm(dom), "cpumap%i" % int(vcpu), ",".join(cpumap)) + get_single_vm(dom), "cpumap%i" % int(vcpu), cpumap) else: server.xend.domain.pincpu(dom, vcpu, cpumap) @@ -2162,7 +2154,7 @@ def xm_pci_list(args): "bus": "0x%02x" % int(ppci_record["bus"]), "slot": "0x%02x" % int(ppci_record["slot"]), "func": "0x%01x" % int(ppci_record["func"]), - "vslt": "0x%02x" % \ + "vslot": "0x%02x" % \ int(server.xenapi.DPCI.get_hotplug_slot(dpci_ref)) } devs.append(dev) @@ -2173,10 +2165,10 @@ def xm_pci_list(args): if len(devs) == 0: return - has_vslt = devs[0].has_key('vslt') - if has_vslt: + has_vslot = devs[0].has_key('vslot') + if has_vslot: hdr_str = 'VSlt domain bus slot func' - fmt_str = "%(vslt)-3s %(domain)-3s %(bus)-3s %(slot)-3s %(func)-3s " + fmt_str = "%(vslot)-3s %(domain)-3s %(bus)-3s %(slot)-3s %(func)-3s " else: hdr_str = 'domain bus slot func' fmt_str = "%(domain)-3s %(bus)-3s %(slot)-3s %(func)-3s " @@ -2188,34 +2180,28 @@ def xm_pci_list(args): hdr = 1 print ( fmt_str % x ) + +def parse_pci_info(info): + def get_info(n, t, d): + return t(sxp.child_value(info, n, d)) + return { + 'domain' : get_info('domain', parse_hex, 0), + 'bus' : get_info('bus', parse_hex, -1), + 'slot' : get_info('slot', parse_hex, -1), + 'func' : get_info('func', parse_hex, -1) + } + def xm_pci_list_assignable_devices(args): - # Each element of dev_list is a PciDevice - dev_list = find_all_devices_owned_by_pciback() - - # Each element of devs_list is a list of PciDevice - devs_list = check_FLR_capability(dev_list) - - devs_list = check_mmio_bar(devs_list) - - # Check if the devices have been assigned to guests. - final_devs_list = [] - for dev_list in devs_list: - available = True - for d in dev_list: - pci_str = '0x%x,0x%x,0x%x,0x%x' %(d.domain, d.bus, d.slot, d.func) - # Xen doesn't care what the domid is, so we pass 0 here... - domid = 0 - bdf = xc.test_assign_device(domid, pci_str) - if bdf != 0: - available = False - break - if available: - final_devs_list = final_devs_list + [dev_list] + xenapi_unsupported() + arg_check(args, "pci-list-assignable-devices", 0) + + devs = server.xend.node.pciinfo() + + fmt_str = "%(domain)04x:%(bus)02x:%(slot)02x.%(func)01x" + for x in devs: + pci = parse_pci_info(x) + print fmt_str % pci - for dev_list in final_devs_list: - for d in dev_list: - print d.name, - print def vscsi_sort(devs): def sort_hctl(ds, l): @@ -2454,16 +2440,16 @@ def parse_pci_configuration(args, state, opts = ''): dom = args[0] pci_dev_str = args[1] if len(args) == 3: - vslt = args[2] + vslot = args[2] else: - vslt = AUTO_PHP_SLOT_STR + vslot = AUTO_PHP_SLOT_STR pci=['pci'] pci_match = re.match(r"((?P[0-9a-fA-F]{1,4})[:,])?" + \ r"(?P[0-9a-fA-F]{1,2})[:,]" + \ r"(?P[0-9a-fA-F]{1,2})[.,]" + \ r"(?P[0-7])$", pci_dev_str) if pci_match == None: - raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt)) + raise OptionError("Invalid argument: %s %s" % (pci_dev_str, vslot)) pci_dev_info = pci_match.groupdict('0') try: @@ -2471,13 +2457,13 @@ def parse_pci_configuration(args, state, opts = ''): ['bus', '0x'+ pci_dev_info['bus']], ['slot', '0x'+ pci_dev_info['slot']], ['func', '0x'+ pci_dev_info['func']], - ['vslt', '0x%x' % int(vslt, 16)]] + ['vslot', '0x%x' % int(vslot, 16)]] if len(opts) > 0: pci_bdf.append(['opts', opts]) pci.append(pci_bdf) except: - raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt)) + raise OptionError("Invalid argument: %s %s" % (pci_dev_str, vslot)) pci.append(['state', state]) return (dom, pci) @@ -2507,7 +2493,7 @@ def xm_pci_attach(args): bus = int(sxp.child_value(pci_dev, 'bus'), 16) slot = int(sxp.child_value(pci_dev, 'slot'), 16) func = int(sxp.child_value(pci_dev, 'func'), 16) - vslt = int(sxp.child_value(pci_dev, 'vslt'), 16) + vslot = int(sxp.child_value(pci_dev, 'vslot'), 16) name = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func) target_ref = None @@ -2521,7 +2507,7 @@ def xm_pci_attach(args): dpci_record = { "VM": get_single_vm(dom), "PPCI": target_ref, - "hotplug_slot": vslt, + "hotplug_slot": vslot, "options": dict(config_pci_opts) } server.xenapi.DPCI.create(dpci_record) @@ -2680,7 +2666,7 @@ def xm_pci_detach(args): bus = int(sxp.child_value(pci_dev, 'bus'), 16) slot = int(sxp.child_value(pci_dev, 'slot'), 16) func = int(sxp.child_value(pci_dev, 'func'), 16) - vslt = int(sxp.child_value(pci_dev, 'vslt'), 16) + vslot = int(sxp.child_value(pci_dev, 'vslot'), 16) name = "%04x:%02x:%02x.%01x" % (domain, bus, slot, func) target_ref = None diff --git a/tools/python/xen/xm/xenapi_create.py b/tools/python/xen/xm/xenapi_create.py index d3cf5e2..0deced5 100644 --- a/tools/python/xen/xm/xenapi_create.py +++ b/tools/python/xen/xm/xenapi_create.py @@ -218,8 +218,8 @@ class xenapi_create: "SR": self.DEFAULT_STORAGE_REPOSITORY, "virtual_size": vdi.attributes["size"].value, "type": vdi.attributes["type"].value, - "sharable": bool(vdi.attributes["sharable"].value), - "read_only": bool(vdi.attributes["read_only"].value), + "sharable": vdi.attributes["sharable"].value == "True", + "read_only": vdi.attributes["read_only"].value == "True", "other_config": {"location": vdi.attributes["src"].value} } @@ -804,6 +804,7 @@ class sxp2xml: def extract_vbd(self, vbd_sxp, document): src = get_child_by_name(vbd_sxp, "uname") + mode = get_child_by_name(vbd_sxp, "mode") name = str(src.__hash__()) vbd = document.createElement("vbd") @@ -811,8 +812,7 @@ class sxp2xml: vbd.attributes["name"] = "vdb" + name vbd.attributes["vdi"] = "vdi" + name vbd.attributes["mode"] \ - = get_child_by_name(vbd_sxp, "mode") != "w" \ - and "RO" or "RW" + = re.search("^w!{0,1}$", mode) and "RW" or "RO" vbd.attributes["device"] \ = re.sub(":cdrom$", "", get_child_by_name(vbd_sxp, "dev")) vbd.attributes["bootable"] = "1" @@ -825,17 +825,18 @@ class sxp2xml: def extract_vdi(self, vbd_sxp, document): src = get_child_by_name(vbd_sxp, "uname") + mode = get_child_by_name(vbd_sxp, "mode") name = "vdi" + str(src.__hash__()) vdi = document.createElement("vdi") vdi.attributes["src"] = src vdi.attributes["read_only"] \ - = (get_child_by_name(vbd_sxp, "mode") != "w") \ - and "True" or "False" + = re.search("^w!{0,1}$", mode) and "False" or "True" vdi.attributes["size"] = '-1' vdi.attributes["type"] = "system" - vdi.attributes["sharable"] = "False" + vdi.attributes["sharable"] \ + = re.search("^w!$", mode) and "True" or "False" vdi.attributes["name"] = name vdi.appendChild(self.make_name_tag(name, document)) @@ -936,8 +937,8 @@ class sxp2xml: = get_child_by_name(dev_sxp, "slot", "0") pci.attributes["func"] \ = get_child_by_name(dev_sxp, "func", "0") - pci.attributes["vslt"] \ - = get_child_by_name(dev_sxp, "vslt", "0") + pci.attributes["vslot"] \ + = get_child_by_name(dev_sxp, "vslot", "0") for opt in get_child_by_name(dev_sxp, "opts", ""): if len(opt) > 0: pci_opt = document.createElement("pci_opt") diff --git a/tools/security/Makefile b/tools/security/Makefile index 6106271..f8eb69c 100644 --- a/tools/security/Makefile +++ b/tools/security/Makefile @@ -40,9 +40,6 @@ ifeq ($(ACM_SECURITY),y) all: build .PHONY: install -ifndef XEN_PYTHON_NATIVE_INSTALL -install: LIBPATH=$(shell PYTHONPATH=../python/xen/util python -c "import auxbin; print auxbin.libpath()") -endif install: all $(ACM_CONFIG_FILE) $(INSTALL_DIR) $(DESTDIR)$(SBINDIR) $(INSTALL_PROG) $(ACM_INST_TOOLS) $(DESTDIR)$(SBINDIR) @@ -63,11 +60,8 @@ install: all $(ACM_CONFIG_FILE) $(INSTALL_DATA) $(ACM_INST_HTML) $(DESTDIR)$(ACM_SECGEN_HTMLDIR) $(INSTALL_DIR) $(DESTDIR)$(ACM_SECGEN_CGIDIR) $(INSTALL_PROG) $(ACM_INST_CGI) $(DESTDIR)$(ACM_SECGEN_CGIDIR) -ifndef XEN_PYTHON_NATIVE_INSTALL - python python/setup.py install --install-lib="$(DESTDIR)$(LIBPATH)/python" -else - python python/setup.py install --root="$(DESTDIR)" -endif + $(PYTHON) python/setup.py install $(PYTHON_PREFIX_ARG) \ + --root="$(DESTDIR)" --force else .PHONY: all all: @@ -78,7 +72,7 @@ endif .PHONY: build build: $(ACM_INST_TOOLS) $(ACM_NOINST_TOOLS) - python python/setup.py build + $(PYTHON) python/setup.py build chmod 700 $(ACM_SCRIPTS) xensec_tool: $(OBJS_TOOL) diff --git a/tools/security/python/xensec_tools/acm_getlabel b/tools/security/python/xensec_tools/acm_getlabel index 63137a9..8d5fe22 100644 --- a/tools/security/python/xensec_tools/acm_getlabel +++ b/tools/security/python/xensec_tools/acm_getlabel @@ -4,10 +4,6 @@ import sys import traceback import getopt -# add fallback path for non-native python path installs if needed -sys.path.insert(-1, '/usr/lib/python') -sys.path.insert(-1, '/usr/lib64/python') - from xen.util.security import ACMError, err, get_ssid # getopt.gnu_getopt is better, but only exists in Python 2.3+. Use diff --git a/tools/security/xensec_gen.py b/tools/security/xensec_gen.py index 8f65b4c..d531777 100644 --- a/tools/security/xensec_gen.py +++ b/tools/security/xensec_gen.py @@ -17,10 +17,6 @@ import sys -# Add fallback path for non-native python path installs if needed -sys.path.append( '/usr/lib/python' ) -sys.path.append( '/usr/lib64/python' ) - from xen.xensec_gen import main main.main( ) diff --git a/tools/sv/index.psp b/tools/sv/index.psp index 192aff8..829d468 100755 --- a/tools/sv/index.psp +++ b/tools/sv/index.psp @@ -1,6 +1,5 @@ <% import sys -sys.path.append( "/usr/lib/python" ) debug = True and False diff --git a/tools/vnet/scripts/vn b/tools/vnet/scripts/vn index 8e8e224..4a4281f 100644 --- a/tools/vnet/scripts/vn +++ b/tools/vnet/scripts/vn @@ -27,9 +27,6 @@ import socket import sys from getopt import getopt, GetoptError -sys.path.append('/usr/lib/python') -sys.path.append('/usr/lib64/python') - from xen.xend import sxp from xen.xend.PrettyPrint import prettyprint diff --git a/tools/xcutils/xc_save.c b/tools/xcutils/xc_save.c index d38bea4..f15674a 100644 --- a/tools/xcutils/xc_save.c +++ b/tools/xcutils/xc_save.c @@ -242,7 +242,7 @@ main(int argc, char **argv) port = xs_suspend_evtchn_port(si.domid); if (port < 0) - warnx("faield to get the suspend evtchn port\n"); + warnx("failed to get the suspend evtchn port\n"); else { si.suspend_evtchn = diff --git a/tools/xenpmd/Makefile b/tools/xenpmd/Makefile index 7e9353b..10cb2fb 100644 --- a/tools/xenpmd/Makefile +++ b/tools/xenpmd/Makefile @@ -19,4 +19,7 @@ install: all clean: $(RM) -f $(BIN) $(DEPS) +%: %.c Makefile + $(CC) $(CFLAGS) $< $(LDFLAGS) -o $@ + -include $(DEPS) diff --git a/tools/xenstore/xs.h b/tools/xenstore/xs.h index 6293952..bd36a0b 100644 --- a/tools/xenstore/xs.h +++ b/tools/xenstore/xs.h @@ -27,6 +27,11 @@ struct xs_handle; typedef uint32_t xs_transaction_t; +/* IMPORTANT: For details on xenstore protocol limits, see + * docs/misc/xenstore.txt in the Xen public source repository, and use the + * XENSTORE_*_MAX limit macros defined in xen/io/xs_wire.h. + */ + /* On failure, these routines set errno. */ /* Connect to the xs daemon. diff --git a/tools/xentrace/formats b/tools/xentrace/formats index 42744a7..617e474 100644 --- a/tools/xentrace/formats +++ b/tools/xentrace/formats @@ -62,8 +62,10 @@ 0x00082014 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INVLPG [ is invlpga? = %(1)d, virt = 0x%(2)08x ] 0x00082114 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INVLPG [ is invlpga? = %(1)d, virt = 0x%(2)016x ] 0x00082015 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MCE -0x00082016 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) IO_ASSIST [ data = 0x%(1)04x ] -0x00082017 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MMIO_ASSIST [ data = 0x%(1)04x ] +0x00082016 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) IOPORT_READ [ data = 0x%(1)04x ] +0x00082216 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) IOPORT_WRITE [ data = 0x%(1)04x ] +0x00082017 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MMIO_READ [ data = 0x%(1)04x ] +0x00082217 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) MMIO_WRITE [ data = 0x%(1)04x ] 0x00082018 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) CLTS 0x00082019 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) LMSW [ value = 0x%(1)08x ] 0x00082119 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) LMSW [ value = 0x%(1)016x ] diff --git a/tools/xentrace/xenctx.c b/tools/xentrace/xenctx.c index d5ec038..c67171c 100644 --- a/tools/xentrace/xenctx.c +++ b/tools/xentrace/xenctx.c @@ -42,6 +42,7 @@ typedef unsigned long long guest_word_t; int guest_word_size = sizeof (unsigned long); /* Word-length of the context record we get from xen */ int ctxt_word_size = sizeof (unsigned long); +int guest_protected_mode = 1; #elif defined (__ia64__) /* On ia64, we can't translate virtual address to physical address. */ #define NO_TRANSLATION @@ -206,6 +207,7 @@ static void read_symbol_table(const char *symtab) } #if defined(__i386__) || defined(__x86_64__) +#define CR0_PE 0x1 char *flag_values[22][2] = {/* clear, set, bit# */ { NULL, "c" }, // 0 Carry @@ -371,20 +373,38 @@ static void print_ctx(vcpu_guest_context_any_t *ctx) print_ctx_64(&ctx->x64); } +#define NONPROT_MODE_SEGMENT_SHIFT 4 + static guest_word_t instr_pointer(vcpu_guest_context_any_t *ctx) { - if (ctxt_word_size == 4) - return ctx->x32.user_regs.eip; + guest_word_t r; + if (ctxt_word_size == 4) + { + r = ctx->x32.user_regs.eip; + + if ( !guest_protected_mode ) + r += ctx->x32.user_regs.cs << NONPROT_MODE_SEGMENT_SHIFT; + } else - return ctx->x64.user_regs.rip; + r = ctx->x64.user_regs.rip; + + return r; } static guest_word_t stack_pointer(vcpu_guest_context_any_t *ctx) { - if (ctxt_word_size == 4) - return ctx->x32.user_regs.esp; + guest_word_t r; + if (ctxt_word_size == 4) + { + r = ctx->x32.user_regs.esp; + + if ( !guest_protected_mode ) + r += ctx->x32.user_regs.ss << NONPROT_MODE_SEGMENT_SHIFT; + } else - return ctx->x64.user_regs.rsp; + r = ctx->x64.user_regs.rsp; + + return r; } static guest_word_t frame_pointer(vcpu_guest_context_any_t *ctx) @@ -683,12 +703,31 @@ static void print_stack_word(guest_word_t word, int width) printf(FMT_64B_WORD, word); } +static void print_code(vcpu_guest_context_any_t *ctx, int vcpu) +{ + guest_word_t instr; + int i; + + instr = instr_pointer(ctx); + printf("Code (instr addr %08llx)\n", instr); + instr -= 21; + for(i=0; i<32; i++) { + unsigned char *c = map_page(ctx, vcpu, instr+i); + if (instr+i == instr_pointer(ctx)) + printf("<%02x> ", *c); + else + printf("%02x ", *c); + } + printf("\n"); + + printf("\n"); +} + static void print_stack(vcpu_guest_context_any_t *ctx, int vcpu, int width) { guest_word_t stack = stack_pointer(ctx); guest_word_t stack_limit; guest_word_t frame; - guest_word_t instr; guest_word_t word; guest_word_t *p; int i; @@ -709,19 +748,6 @@ static void print_stack(vcpu_guest_context_any_t *ctx, int vcpu, int width) } printf("\n"); - printf("Code:\n"); - instr = instr_pointer(ctx) - 21; - for(i=0; i<32; i++) { - unsigned char *c = map_page(ctx, vcpu, instr+i); - if (instr+i == instr_pointer(ctx)) - printf("<%02x> ", *c); - else - printf("%02x ", *c); - } - printf("\n"); - - printf("\n"); - if(stack_trace) printf("Stack Trace:\n"); else @@ -830,6 +856,7 @@ static void dump_ctx(int vcpu) exit(-1); } guest_word_size = (cpuctx.msr_efer & 0x400) ? 8 : 4; + guest_protected_mode = (cpuctx.cr0 & CR0_PE); /* HVM guest context records are always host-sized */ if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) { perror("xc_version"); @@ -849,6 +876,7 @@ static void dump_ctx(int vcpu) print_ctx(&ctx); #ifndef NO_TRANSLATION + print_code(&ctx, vcpu); if (is_kernel_text(instr_pointer(&ctx))) print_stack(&ctx, vcpu, guest_word_size); #endif diff --git a/xen/Makefile b/xen/Makefile index d16477a..93374dc 100644 --- a/xen/Makefile +++ b/xen/Makefile @@ -2,7 +2,7 @@ # All other places this is stored (eg. compile.h) should be autogenerated. export XEN_VERSION = 3 export XEN_SUBVERSION = 4 -export XEN_EXTRAVERSION ?= -unstable$(XEN_VENDORVERSION) +export XEN_EXTRAVERSION ?= .0-rc4-pre$(XEN_VENDORVERSION) export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) -include xen-version diff --git a/xen/arch/ia64/linux-xen/head.S b/xen/arch/ia64/linux-xen/head.S index 2ef757b..c1fa5ed 100644 --- a/xen/arch/ia64/linux-xen/head.S +++ b/xen/arch/ia64/linux-xen/head.S @@ -382,6 +382,35 @@ start_ap: mov ar.rsc=0 // place RSE in enforced lazy mode ;; loadrs // clear the dirty partition +#ifdef XEN +(isAP) br.few 2f + movl r19=__phys_per_cpu_start + mov r18=PERCPU_PAGE_SIZE +#ifndef CONFIG_SMP + add r19=r19,r18 + ;; +#else + movl r20=__cpu0_per_cpu + ;; + shr.u r18=r18,3 +1: + ld8 r21=[r19],8 ;; + st8[r20]=r21,8 + adds r18=-1,r18 + ;; + cmp4.lt p7,p6=0,r18 +(p7) br.cond.dptk.few 1b + ;; +#endif + movl r18=__per_cpu_offset + movl r19=__cpu0_per_cpu + movl r20=__per_cpu_start + ;; + sub r20=r19,r20 + ;; + st8 [r18]=r20 +2: +#endif ;; mov ar.bspstore=r2 // establish the new RSE stack ;; diff --git a/xen/arch/ia64/linux-xen/mm_contig.c b/xen/arch/ia64/linux-xen/mm_contig.c index 12462dc..f2326eb 100644 --- a/xen/arch/ia64/linux-xen/mm_contig.c +++ b/xen/arch/ia64/linux-xen/mm_contig.c @@ -183,7 +183,7 @@ void *percpu_area __initdata = NULL; void* __init per_cpu_allocate(void *xen_heap_start, unsigned long end_in_pa) { - int order = get_order(NR_CPUS * PERCPU_PAGE_SIZE); + int order = get_order((NR_CPUS - 1) * PERCPU_PAGE_SIZE); unsigned long size = 1UL << (order + PAGE_SHIFT); unsigned long start = ALIGN_UP((unsigned long)xen_heap_start, PERCPU_PAGE_SIZE); @@ -226,19 +226,31 @@ per_cpu_init (void) */ if (smp_processor_id() == 0) { #ifdef XEN + void *cpu0_data = __cpu0_per_cpu; + + __per_cpu_offset[0] = (char *)cpu0_data - __per_cpu_start; + per_cpu(local_per_cpu_offset, 0) = __per_cpu_offset[0]; + cpu_data = get_per_cpu_area(); if (cpu_data == NULL) panic("can't allocate per cpu area.\n"); + + for (cpu = 1; cpu < NR_CPUS; cpu++) { + memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start; + cpu_data += PERCPU_PAGE_SIZE; + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; + } #else cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); -#endif for (cpu = 0; cpu < NR_CPUS; cpu++) { memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start; cpu_data += PERCPU_PAGE_SIZE; per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; } +#endif } return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; } diff --git a/xen/arch/ia64/linux-xen/smpboot.c b/xen/arch/ia64/linux-xen/smpboot.c index ef7f9ea..a450dec 100644 --- a/xen/arch/ia64/linux-xen/smpboot.c +++ b/xen/arch/ia64/linux-xen/smpboot.c @@ -449,8 +449,8 @@ start_secondary (void *unused) { /* Early console may use I/O ports */ ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); - Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); #ifndef XEN + Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); efi_map_pal_code(); #endif cpu_init(); diff --git a/xen/arch/ia64/xen/ivt.S b/xen/arch/ia64/xen/ivt.S index 7c5ac5f..682c846 100644 --- a/xen/arch/ia64/xen/ivt.S +++ b/xen/arch/ia64/xen/ivt.S @@ -689,7 +689,7 @@ fast_hypercall: bsw.1 // B (6 cyc) regs are saved, switch to bank 1 ;; - PT_REGS_UNWIND_INFO(0) + PT_REGS_UNWIND_INFO(-48) ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to re-enable intr.-collection // movl r3=ia64_ret_from_syscall // X ;; diff --git a/xen/arch/ia64/xen/xen.lds.S b/xen/arch/ia64/xen/xen.lds.S index 96cd1ce..4daf6ad 100644 --- a/xen/arch/ia64/xen/xen.lds.S +++ b/xen/arch/ia64/xen/xen.lds.S @@ -195,7 +195,17 @@ SECTIONS data : { } :data .data : AT(ADDR(.data) - LOAD_OFFSET) - { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } + { +#ifdef CONFIG_SMP + . = ALIGN(PERCPU_PAGE_SIZE); + __cpu0_per_cpu = .; + . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */ +#endif + *(.data) + *(.data1) + *(.gnu.linkonce.d*) + CONSTRUCTORS + } . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ .got : AT(ADDR(.got) - LOAD_OFFSET) diff --git a/xen/arch/ia64/xen/xensetup.c b/xen/arch/ia64/xen/xensetup.c index c3affb4..61b1d11 100644 --- a/xen/arch/ia64/xen/xensetup.c +++ b/xen/arch/ia64/xen/xensetup.c @@ -374,7 +374,6 @@ void __init start_kernel(void) ns16550_init(0, &ns16550_com1); ns16550_init(1, &ns16550_com2); } - serial_init_preirq(); #ifdef CONFIG_VGA /* Plug in a default VGA mode */ @@ -390,7 +389,7 @@ void __init start_kernel(void) ia64_boot_param->console_info.num_cols; #endif - init_console(); + console_init_preirq(); if (running_on_sim || ia64_boot_param->domain_start == 0 || ia64_boot_param->domain_size == 0) { @@ -648,7 +647,7 @@ printk("num_online_cpus=%d, max_cpus=%d\n",num_online_cpus(),max_cpus); ns16550_init(0, &ns16550_com1); } } - serial_init_postirq(); + console_init_postirq(); } expose_p2m_init(); diff --git a/xen/arch/x86/Rules.mk b/xen/arch/x86/Rules.mk index e9e1d5b..d97cea4 100644 --- a/xen/arch/x86/Rules.mk +++ b/xen/arch/x86/Rules.mk @@ -42,7 +42,7 @@ x86_64 := n endif ifeq ($(TARGET_SUBARCH),x86_64) -CFLAGS += -mno-red-zone -fpic -fno-reorder-blocks +CFLAGS += -mno-red-zone -fpic CFLAGS += -fno-asynchronous-unwind-tables # -fvisibility=hidden reduces -fpic cost, if it's available ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n) diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c index f8302d1..0685194 100644 --- a/xen/arch/x86/acpi/cpu_idle.c +++ b/xen/arch/x86/acpi/cpu_idle.c @@ -47,9 +47,11 @@ #include #include #include +#include -#define DEBUG_PM_CX +/*#define DEBUG_PM_CX*/ +static void lapic_timer_nop(void) { } static void (*lapic_timer_off)(void); static void (*lapic_timer_on)(void); @@ -195,6 +197,17 @@ static void acpi_processor_idle(void) int sleep_ticks = 0; u32 t1, t2 = 0; + cpufreq_dbs_timer_suspend(); + + sched_tick_suspend(); + /* + * sched_tick_suspend may raise TIMER_SOFTIRQ by __stop_timer, + * which will break the later assumption of no sofirq pending, + * so add do_softirq + */ + if ( softirq_pending(smp_processor_id()) ) + do_softirq(); + /* * Interrupts must be disabled during bus mastering calculations and * for C2/C3 transitions. @@ -204,11 +217,13 @@ static void acpi_processor_idle(void) if ( softirq_pending(smp_processor_id()) ) { local_irq_enable(); + sched_tick_resume(); + cpufreq_dbs_timer_resume(); return; } - next_state = power ? cpuidle_current_governor->select(power) : -1; - if ( next_state > 0 ) + if ( max_cstate > 0 && power && + (next_state = cpuidle_current_governor->select(power)) > 0 ) { cx = &power->states[next_state]; if ( power->flags.bm_check && acpi_idle_bm_check() @@ -223,6 +238,8 @@ static void acpi_processor_idle(void) pm_idle_save(); else acpi_safe_halt(); + sched_tick_resume(); + cpufreq_dbs_timer_resume(); return; } @@ -329,6 +346,8 @@ static void acpi_processor_idle(void) default: local_irq_enable(); + sched_tick_resume(); + cpufreq_dbs_timer_resume(); return; } @@ -339,6 +358,9 @@ static void acpi_processor_idle(void) cx->time += sleep_ticks; } + sched_tick_resume(); + cpufreq_dbs_timer_resume(); + if ( cpuidle_current_governor->reflect ) cpuidle_current_governor->reflect(power); } @@ -517,8 +539,12 @@ static int check_cx(struct acpi_processor_power *power, xen_processor_cx_t *cx) if ( local_apic_timer_c2_ok ) break; case ACPI_STATE_C3: - /* We must be able to use HPET in place of LAPIC timers. */ - if ( hpet_broadcast_is_available() ) + if ( boot_cpu_has(X86_FEATURE_ARAT) ) + { + lapic_timer_off = lapic_timer_nop; + lapic_timer_on = lapic_timer_nop; + } + else if ( hpet_broadcast_is_available() ) { lapic_timer_off = hpet_broadcast_enter; lapic_timer_on = hpet_broadcast_exit; @@ -766,7 +792,7 @@ long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power) /* FIXME: C-state dependency is not supported by far */ - print_acpi_power(cpu_id, acpi_power); + /*print_acpi_power(cpu_id, acpi_power);*/ if ( cpu_id == 0 && pm_idle_save == NULL ) { @@ -832,3 +858,18 @@ int pmstat_reset_cx_stat(uint32_t cpuid) return 0; } +void cpuidle_disable_deep_cstate(void) +{ + if ( max_cstate > 1 ) + { + if ( local_apic_timer_c2_ok ) + max_cstate = 2; + else + max_cstate = 1; + } + + mb(); + + hpet_disable_legacy_broadcast(); +} + diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c b/xen/arch/x86/acpi/cpufreq/cpufreq.c index cda7fb4..1631a30 100644 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c @@ -191,7 +191,11 @@ static void drv_read(struct drv_cmd *cmd) static void drv_write(struct drv_cmd *cmd) { - on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0); + if ((cpus_weight(cmd->mask) == 1) && + cpu_isset(smp_processor_id(), cmd->mask)) + do_drv_write((void *)cmd); + else + on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0); } static u32 get_cur_val(cpumask_t mask) diff --git a/xen/arch/x86/acpi/suspend.c b/xen/arch/x86/acpi/suspend.c index 0aac31d..bcb3e83 100644 --- a/xen/arch/x86/acpi/suspend.c +++ b/xen/arch/x86/acpi/suspend.c @@ -16,6 +16,7 @@ #if defined(CONFIG_X86_64) static unsigned long saved_lstar, saved_cstar; +static unsigned long saved_sysenter_esp, saved_sysenter_eip; #endif void save_rest_processor_state(void) @@ -26,6 +27,11 @@ void save_rest_processor_state(void) #if defined(CONFIG_X86_64) rdmsrl(MSR_CSTAR, saved_cstar); rdmsrl(MSR_LSTAR, saved_lstar); + if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) + { + rdmsrl(MSR_IA32_SYSENTER_ESP, saved_sysenter_esp); + rdmsrl(MSR_IA32_SYSENTER_EIP, saved_sysenter_eip); + } #endif } @@ -41,6 +47,14 @@ void restore_rest_processor_state(void) wrmsrl(MSR_CSTAR, saved_cstar); wrmsr(MSR_STAR, 0, (FLAT_RING3_CS32<<16) | __HYPERVISOR_CS); wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U); + + if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) + { + /* Recover sysenter MSRs */ + wrmsrl(MSR_IA32_SYSENTER_ESP, saved_sysenter_esp); + wrmsrl(MSR_IA32_SYSENTER_EIP, saved_sysenter_eip); + wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0); + } #else /* !defined(CONFIG_X86_64) */ if ( supervisor_mode_kernel && cpu_has_sep ) wrmsr(MSR_IA32_SYSENTER_ESP, &init_tss[smp_processor_id()].esp1, 0); diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c index 2bbb003..aab6804 100644 --- a/xen/arch/x86/apic.c +++ b/xen/arch/x86/apic.c @@ -1303,8 +1303,10 @@ int __init APIC_init_uniprocessor (void) if (enable_local_apic < 0) clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - if (!smp_found_config && !cpu_has_apic) + if (!smp_found_config && !cpu_has_apic) { + skip_ioapic_setup = 1; return -1; + } /* * Complain if the BIOS pretends there is one. @@ -1313,6 +1315,7 @@ int __init APIC_init_uniprocessor (void) printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); + skip_ioapic_setup = 1; return -1; } diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c index 1574004..4715f34 100644 --- a/xen/arch/x86/cpu/intel.c +++ b/xen/arch/x86/cpu/intel.c @@ -222,6 +222,9 @@ static void __devinit init_intel(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability); } + if ((c->cpuid_level >= 0x00000006) && + (cpuid_eax(0x00000006) & (1u<<2))) + set_bit(X86_FEATURE_ARAT, c->x86_capability); start_vmx(); } diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c index 3685056..49f3f84 100644 --- a/xen/arch/x86/cpu/mcheck/mce.c +++ b/xen/arch/x86/cpu/mcheck/mce.c @@ -23,6 +23,7 @@ #include "mce.h" int mce_disabled = 0; +int is_mc_panic = 0; unsigned int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ @@ -33,18 +34,15 @@ static void mcinfo_clear(struct mc_info *); #define SEG_PL(segsel) ((segsel) & 0x3) #define _MC_MSRINJ_F_REQ_HWCR_WREN (1 << 16) -#if 1 /* XXFM switch to 0 for putback */ - -#define x86_mcerr(str, err) _x86_mcerr(str, err) - -static int _x86_mcerr(const char *msg, int err) +#if 0 +static int x86_mcerr(const char *msg, int err) { - printk("x86_mcerr: %s, returning %d\n", - msg != NULL ? msg : "", err); - return err; + gdprintk(XENLOG_WARNING, "x86_mcerr: %s, returning %d\n", + msg != NULL ? msg : "", err); + return err; } #else -#define x86_mcerr(str,err) +#define x86_mcerr(msg, err) (err) #endif cpu_banks_t mca_allbanks; @@ -127,6 +125,7 @@ mctelem_cookie_t mcheck_mca_logout(enum mca_source who, cpu_banks_t bankmask, switch (who) { case MCA_MCE_HANDLER: + case MCA_MCE_SCAN: mcg.mc_flags = MC_FLAG_MCE; which = MC_URGENT; break; @@ -222,8 +221,9 @@ mctelem_cookie_t mcheck_mca_logout(enum mca_source who, cpu_banks_t bankmask, cbret = mc_callback_bank_extended(mci, i, status); } - /* Clear status */ - mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL); + if (who != MCA_MCE_SCAN) + /* Clear status */ + mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL); wmb(); } @@ -472,6 +472,21 @@ cmn_handler_done: } } +void mcheck_mca_clearbanks(cpu_banks_t bankmask) +{ + int i; + uint64_t status; + + for (i = 0; i < 32 && i < nr_mce_banks; i++) { + if (!test_bit(i, bankmask)) + continue; + mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status); + if (!(status & MCi_STATUS_VAL)) + continue; + mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL); + } +} + static int amd_mcheck_init(struct cpuinfo_x86 *ci) { int rc = 0; @@ -577,6 +592,7 @@ void mcheck_init(struct cpuinfo_x86 *c) break; } + set_poll_bankmask(c); if (!inited) printk(XENLOG_INFO "CPU%i: No machine check initialization\n", smp_processor_id()); @@ -984,14 +1000,76 @@ static void x86_mc_mceinject(void *data) #error BITS_PER_LONG definition absent #endif +#ifdef CONFIG_COMPAT +# include + +# define xen_mcinfo_msr mcinfo_msr +CHECK_mcinfo_msr; +# undef xen_mcinfo_msr +# undef CHECK_mcinfo_msr +# define CHECK_mcinfo_msr struct mcinfo_msr + +# define xen_mcinfo_common mcinfo_common +CHECK_mcinfo_common; +# undef xen_mcinfo_common +# undef CHECK_mcinfo_common +# define CHECK_mcinfo_common struct mcinfo_common + +CHECK_FIELD_(struct, mc_fetch, flags); +CHECK_FIELD_(struct, mc_fetch, fetch_id); +# define CHECK_compat_mc_fetch struct mc_fetch + +CHECK_FIELD_(struct, mc_physcpuinfo, ncpus); +# define CHECK_compat_mc_physcpuinfo struct mc_physcpuinfo + +CHECK_mc; +# undef CHECK_compat_mc_fetch +# undef CHECK_compat_mc_physcpuinfo + +# define xen_mc_info mc_info +CHECK_mc_info; +# undef xen_mc_info + +# define xen_mcinfo_global mcinfo_global +CHECK_mcinfo_global; +# undef xen_mcinfo_global + +# define xen_mcinfo_bank mcinfo_bank +CHECK_mcinfo_bank; +# undef xen_mcinfo_bank + +# define xen_mcinfo_extended mcinfo_extended +CHECK_mcinfo_extended; +# undef xen_mcinfo_extended + +# define xen_mcinfo_recovery mcinfo_recovery +# define xen_cpu_offline_action cpu_offline_action +# define xen_page_offline_action page_offline_action +CHECK_mcinfo_recovery; +# undef xen_cpu_offline_action +# undef xen_page_offline_action +# undef xen_mcinfo_recovery +#else +# define compat_mc_fetch xen_mc_fetch +# define compat_mc_physcpuinfo xen_mc_physcpuinfo +# define compat_handle_is_null guest_handle_is_null +# define copy_to_compat copy_to_guest +#endif + /* Machine Check Architecture Hypercall */ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc) { long ret = 0; struct xen_mc curop, *op = &curop; struct vcpu *v = current; - struct xen_mc_fetch *mc_fetch; - struct xen_mc_physcpuinfo *mc_physcpuinfo; + union { + struct xen_mc_fetch *nat; + struct compat_mc_fetch *cmp; + } mc_fetch; + union { + struct xen_mc_physcpuinfo *nat; + struct compat_mc_physcpuinfo *cmp; + } mc_physcpuinfo; uint32_t flags, cmdflags; int nlcpu; xen_mc_logical_cpu_t *log_cpus = NULL; @@ -1001,6 +1079,9 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc) struct xen_mc_msrinject *mc_msrinject; struct xen_mc_mceinject *mc_mceinject; + if (!IS_PRIV(v->domain) ) + return x86_mcerr(NULL, -EPERM); + if ( copy_from_guest(op, u_xen_mc, 1) ) return x86_mcerr("do_mca: failed copyin of xen_mc_t", -EFAULT); @@ -1009,12 +1090,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc) switch (op->cmd) { case XEN_MC_fetch: - mc_fetch = &op->u.mc_fetch; - cmdflags = mc_fetch->flags; - - /* This hypercall is for Dom0 only */ - if (!IS_PRIV(v->domain) ) - return x86_mcerr(NULL, -EPERM); + mc_fetch.nat = &op->u.mc_fetch; + cmdflags = mc_fetch.nat->flags; switch (cmdflags & (XEN_MC_NONURGENT | XEN_MC_URGENT)) { case XEN_MC_NONURGENT: @@ -1032,30 +1109,35 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc) flags = XEN_MC_OK; if (cmdflags & XEN_MC_ACK) { - mctelem_cookie_t cookie = ID2COOKIE(mc_fetch->fetch_id); + mctelem_cookie_t cookie = ID2COOKIE(mc_fetch.nat->fetch_id); mctelem_ack(which, cookie); } else { - if (guest_handle_is_null(mc_fetch->data)) + if (!is_pv_32on64_vcpu(v) + ? guest_handle_is_null(mc_fetch.nat->data) + : compat_handle_is_null(mc_fetch.cmp->data)) return x86_mcerr("do_mca fetch: guest buffer " "invalid", -EINVAL); if ((mctc = mctelem_consume_oldest_begin(which))) { struct mc_info *mcip = mctelem_dataptr(mctc); - if (copy_to_guest(mc_fetch->data, mcip, 1)) { + if (!is_pv_32on64_vcpu(v) + ? copy_to_guest(mc_fetch.nat->data, mcip, 1) + : copy_to_compat(mc_fetch.cmp->data, + mcip, 1)) { ret = -EFAULT; flags |= XEN_MC_FETCHFAILED; - mc_fetch->fetch_id = 0; + mc_fetch.nat->fetch_id = 0; } else { - mc_fetch->fetch_id = COOKIE2ID(mctc); + mc_fetch.nat->fetch_id = COOKIE2ID(mctc); } mctelem_consume_oldest_end(mctc); } else { /* There is no data */ flags |= XEN_MC_NODATA; - mc_fetch->fetch_id = 0; + mc_fetch.nat->fetch_id = 0; } - mc_fetch->flags = flags; + mc_fetch.nat->flags = flags; if (copy_to_guest(u_xen_mc, op, 1) != 0) ret = -EFAULT; } @@ -1066,17 +1148,16 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc) return x86_mcerr("do_mca notify unsupported", -EINVAL); case XEN_MC_physcpuinfo: - if ( !IS_PRIV(v->domain) ) - return x86_mcerr("do_mca cpuinfo", -EPERM); - - mc_physcpuinfo = &op->u.mc_physcpuinfo; + mc_physcpuinfo.nat = &op->u.mc_physcpuinfo; nlcpu = num_online_cpus(); - if (!guest_handle_is_null(mc_physcpuinfo->info)) { - if (mc_physcpuinfo->ncpus <= 0) + if (!is_pv_32on64_vcpu(v) + ? !guest_handle_is_null(mc_physcpuinfo.nat->info) + : !compat_handle_is_null(mc_physcpuinfo.cmp->info)) { + if (mc_physcpuinfo.nat->ncpus <= 0) return x86_mcerr("do_mca cpuinfo: ncpus <= 0", -EINVAL); - nlcpu = min(nlcpu, (int)mc_physcpuinfo->ncpus); + nlcpu = min(nlcpu, (int)mc_physcpuinfo.nat->ncpus); log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu); if (log_cpus == NULL) return x86_mcerr("do_mca cpuinfo", -ENOMEM); @@ -1086,28 +1167,23 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc) xfree(log_cpus); return x86_mcerr("do_mca cpuinfo", -EIO); } + if (!is_pv_32on64_vcpu(v) + ? copy_to_guest(mc_physcpuinfo.nat->info, + log_cpus, nlcpu) + : copy_to_compat(mc_physcpuinfo.cmp->info, + log_cpus, nlcpu)) + ret = -EFAULT; + xfree(log_cpus); } - mc_physcpuinfo->ncpus = nlcpu; + mc_physcpuinfo.nat->ncpus = nlcpu; - if (copy_to_guest(u_xen_mc, op, 1)) { - if (log_cpus != NULL) - xfree(log_cpus); + if (copy_to_guest(u_xen_mc, op, 1)) return x86_mcerr("do_mca cpuinfo", -EFAULT); - } - if (!guest_handle_is_null(mc_physcpuinfo->info)) { - if (copy_to_guest(mc_physcpuinfo->info, - log_cpus, nlcpu)) - ret = -EFAULT; - xfree(log_cpus); - } break; case XEN_MC_msrinject: - if ( !IS_PRIV(v->domain) ) - return x86_mcerr("do_mca inject", -EPERM); - if (nr_mce_banks == 0) return x86_mcerr("do_mca inject", -ENODEV); @@ -1135,9 +1211,6 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc) break; case XEN_MC_mceinject: - if ( !IS_PRIV(v->domain) ) - return x86_mcerr("do_mca #MC", -EPERM); - if (nr_mce_banks == 0) return x86_mcerr("do_mca #MC", -ENODEV); @@ -1152,9 +1225,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc) add_taint(TAINT_ERROR_INJECT); - on_selected_cpus(cpumask_of_cpu(target), - x86_mc_mceinject, mc_mceinject, 1, 1); - + on_selected_cpus(cpumask_of_cpu(target), x86_mc_mceinject, + mc_mceinject, 1, 1); break; default: @@ -1163,9 +1235,22 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc) return ret; } +void set_poll_bankmask(struct cpuinfo_x86 *c) +{ + if (cmci_support && !mce_disabled) { + memcpy(&(__get_cpu_var(poll_bankmask)), + &(__get_cpu_var(no_cmci_banks)), sizeof(cpu_banks_t)); + } + else { + memcpy(&(get_cpu_var(poll_bankmask)), &mca_allbanks, sizeof(cpu_banks_t)); + if (mce_firstbank(c)) + clear_bit(0, get_cpu_var(poll_bankmask)); + } +} void mc_panic(char *s) { + is_mc_panic = 1; console_start_sync(); printk("Fatal machine check: %s\n", s); printk("\n" diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h index 2bd6f02..044d11a 100644 --- a/xen/arch/x86/cpu/mcheck/mce.h +++ b/xen/arch/x86/cpu/mcheck/mce.h @@ -70,7 +70,8 @@ enum mca_source { MCA_MCE_HANDLER, MCA_POLLER, MCA_CMCI_HANDLER, - MCA_RESET + MCA_RESET, + MCA_MCE_SCAN }; enum mca_extinfo { @@ -88,6 +89,12 @@ struct mca_summary { }; extern cpu_banks_t mca_allbanks; +void set_poll_bankmask(struct cpuinfo_x86 *c); +DECLARE_PER_CPU(cpu_banks_t, poll_bankmask); +DECLARE_PER_CPU(cpu_banks_t, no_cmci_banks); +extern int cmci_support; +extern int is_mc_panic; +extern void mcheck_mca_clearbanks(cpu_banks_t); extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, cpu_banks_t, struct mca_summary *); diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c index fb04a5b..6bf8c87 100644 --- a/xen/arch/x86/cpu/mcheck/mce_intel.c +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c @@ -12,11 +12,35 @@ #include "x86_mca.h" DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned); +DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks); +int cmci_support = 0; static int nr_intel_ext_msrs = 0; -static int cmci_support = 0; static int firstbank; +/* Below are for MCE handling */ +struct mce_softirq_barrier { + atomic_t val; + atomic_t ingen; + atomic_t outgen; +}; + +static struct mce_softirq_barrier mce_inside_bar, mce_severity_bar; +static struct mce_softirq_barrier mce_trap_bar; + +/* + * mce_logout_lock should only be used in the trap handler, + * while MCIP has not been cleared yet in the global status + * register. Other use is not safe, since an MCE trap can + * happen at any moment, which would cause lock recursion. + */ +static DEFINE_SPINLOCK(mce_logout_lock); + +static atomic_t severity_cpu = ATOMIC_INIT(-1); + +static void mce_barrier_enter(struct mce_softirq_barrier *); +static void mce_barrier_exit(struct mce_softirq_barrier *); + #ifdef CONFIG_X86_MCE_THERMAL static void unexpected_thermal_interrupt(struct cpu_user_regs *regs) { @@ -117,6 +141,16 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) } #endif /* CONFIG_X86_MCE_THERMAL */ +static inline void intel_get_extended_msr(struct mcinfo_extended *ext, u32 msr) +{ + if ( ext->mc_msrs < ARRAY_SIZE(ext->mc_msr) + && msr < MSR_IA32_MCG_EAX + nr_intel_ext_msrs ) { + ext->mc_msr[ext->mc_msrs].reg = msr; + mca_rdmsrl(msr, ext->mc_msr[ext->mc_msrs].value); + ++ext->mc_msrs; + } +} + static enum mca_extinfo intel_get_extended_msrs(struct mc_info *mci, uint16_t bank, uint64_t status) { @@ -129,75 +163,35 @@ intel_get_extended_msrs(struct mc_info *mci, uint16_t bank, uint64_t status) memset(&mc_ext, 0, sizeof(struct mcinfo_extended)); mc_ext.common.type = MC_TYPE_EXTENDED; mc_ext.common.size = sizeof(mc_ext); - mc_ext.mc_msrs = 10; - - mc_ext.mc_msr[0].reg = MSR_IA32_MCG_EAX; - rdmsrl(MSR_IA32_MCG_EAX, mc_ext.mc_msr[0].value); - mc_ext.mc_msr[1].reg = MSR_IA32_MCG_EBX; - rdmsrl(MSR_IA32_MCG_EBX, mc_ext.mc_msr[1].value); - mc_ext.mc_msr[2].reg = MSR_IA32_MCG_ECX; - rdmsrl(MSR_IA32_MCG_ECX, mc_ext.mc_msr[2].value); - - mc_ext.mc_msr[3].reg = MSR_IA32_MCG_EDX; - rdmsrl(MSR_IA32_MCG_EDX, mc_ext.mc_msr[3].value); - mc_ext.mc_msr[4].reg = MSR_IA32_MCG_ESI; - rdmsrl(MSR_IA32_MCG_ESI, mc_ext.mc_msr[4].value); - mc_ext.mc_msr[5].reg = MSR_IA32_MCG_EDI; - rdmsrl(MSR_IA32_MCG_EDI, mc_ext.mc_msr[5].value); - - mc_ext.mc_msr[6].reg = MSR_IA32_MCG_EBP; - rdmsrl(MSR_IA32_MCG_EBP, mc_ext.mc_msr[6].value); - mc_ext.mc_msr[7].reg = MSR_IA32_MCG_ESP; - rdmsrl(MSR_IA32_MCG_ESP, mc_ext.mc_msr[7].value); - mc_ext.mc_msr[8].reg = MSR_IA32_MCG_EFLAGS; - rdmsrl(MSR_IA32_MCG_EFLAGS, mc_ext.mc_msr[8].value); - mc_ext.mc_msr[9].reg = MSR_IA32_MCG_EIP; - rdmsrl(MSR_IA32_MCG_EIP, mc_ext.mc_msr[9].value); + + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EAX); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EBX); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_ECX); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EDX); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_ESI); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EDI); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EBP); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_ESP); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EFLAGS); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EIP); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_MISC); + +#ifdef __x86_64__ + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R8); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R9); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R10); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R11); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R12); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R13); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R14); + intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R15); +#endif x86_mcinfo_add(mci, &mc_ext); return MCA_EXTINFO_GLOBAL; } -/* Below are for MCE handling */ - -/* Log worst error severity and offending CPU., - * Pick this CPU for further processing in softirq */ -static int severity_cpu = -1; -static int worst = 0; - -/* Lock of entry@second round scanning in MCE# handler */ -static cpumask_t scanned_cpus; -/* Lock for entry@Critical Section in MCE# handler */ -static bool_t mce_enter_lock = 0; -/* Record how many CPUs impacted in this MCE# */ -static cpumask_t impact_map; - -/* Lock of softirq rendezvous entering point */ -static cpumask_t mced_cpus; -/*Lock of softirq rendezvous leaving point */ -static cpumask_t finished_cpus; -/* Lock for picking one processing CPU */ -static bool_t mce_process_lock = 0; - -/* Spinlock for vMCE# MSR virtualization data */ -static DEFINE_SPINLOCK(mce_locks); - -/* Local buffer for holding MCE# data temporarily, sharing between mce - * handler and softirq handler. Those data will be finally committed - * for DOM0 Log and coped to per_dom related data for guest vMCE# - * MSR virtualization. - * Note: When local buffer is still in processing in softirq, another - * MCA comes, simply panic. - */ - -struct mc_local_t -{ - bool_t in_use; - mctelem_cookie_t mctc[NR_CPUS]; -}; -static struct mc_local_t mc_local; - /* This node list records errors impacting a domain. when one * MCE# happens, one error bank impacts a domain. This error node * will be inserted to the tail of the per_dom data for vMCE# MSR @@ -242,18 +236,22 @@ static int fill_vmsr_data(int cpu, struct mcinfo_bank *mc_bank, } entry = alloc_bank_entry(); + if (entry == NULL) + return -1; entry->mci_status = mc_bank->mc_status; entry->mci_addr = mc_bank->mc_addr; entry->mci_misc = mc_bank->mc_misc; entry->cpu = cpu; entry->bank = mc_bank->mc_bank; + spin_lock(&d->arch.vmca_msrs.lock); /* New error Node, insert to the tail of the per_dom data */ list_add_tail(&entry->list, &d->arch.vmca_msrs.impact_header); /* Fill MSR global status */ d->arch.vmca_msrs.mcg_status = gstatus; /* New node impact the domain, need another vMCE# injection*/ d->arch.vmca_msrs.nr_injection++; + spin_unlock(&d->arch.vmca_msrs.lock); printk(KERN_DEBUG "MCE: Found error @[CPU%d BANK%d " "status %"PRIx64" addr %"PRIx64" domid %d]\n ", @@ -263,100 +261,83 @@ static int fill_vmsr_data(int cpu, struct mcinfo_bank *mc_bank, return 0; } -static int mce_actions(void) { - int32_t cpu, ret; +/* + * Called from mctelem_process_deferred. Return 1 if the telemetry + * should be committed for dom0 consumption, 0 if it should be + * dismissed. + */ +static int mce_action(unsigned int cpu, mctelem_cookie_t mctc) +{ struct mc_info *local_mi; struct mcinfo_common *mic = NULL; struct mcinfo_global *mc_global; struct mcinfo_bank *mc_bank; - /* Spinlock is used for exclusive read/write of vMSR virtualization - * (per_dom vMCE# data) - */ - spin_lock(&mce_locks); - - /* - * If softirq is filling this buffer while another MCE# comes, - * simply panic - */ - test_and_set_bool(mc_local.in_use); - - for_each_cpu_mask(cpu, impact_map) { - if (mc_local.mctc[cpu] == NULL) { - printk(KERN_ERR "MCE: get reserved entry failed\n "); - ret = -1; - goto end; - } - local_mi = (struct mc_info*)mctelem_dataptr(mc_local.mctc[cpu]); - x86_mcinfo_lookup(mic, local_mi, MC_TYPE_GLOBAL); - if (mic == NULL) { - printk(KERN_ERR "MCE: get local buffer entry failed\n "); - ret = -1; - goto end; - } - - mc_global = (struct mcinfo_global *)mic; + local_mi = (struct mc_info*)mctelem_dataptr(mctc); + x86_mcinfo_lookup(mic, local_mi, MC_TYPE_GLOBAL); + if (mic == NULL) { + printk(KERN_ERR "MCE: get local buffer entry failed\n "); + return 0; + } - /* Processing bank information */ - x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); + mc_global = (struct mcinfo_global *)mic; - for ( ; mic && mic->size; mic = x86_mcinfo_next(mic) ) { - if (mic->type != MC_TYPE_BANK) { - continue; - } - mc_bank = (struct mcinfo_bank*)mic; - /* Fill vMCE# injection and vMCE# MSR virtualization related data */ - if (fill_vmsr_data(cpu, mc_bank, mc_global->mc_gstatus) == -1) { - ret = -1; - goto end; - } + /* Processing bank information */ + x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK); - /* TODO: Add recovery actions here, such as page-offline, etc */ + for ( ; mic && mic->size; mic = x86_mcinfo_next(mic) ) { + if (mic->type != MC_TYPE_BANK) { + continue; } - } /* end of impact_map loop */ - - ret = 0; - -end: + mc_bank = (struct mcinfo_bank*)mic; + /* Fill vMCE# injection and vMCE# MSR virtualization related data */ + if (fill_vmsr_data(cpu, mc_bank, mc_global->mc_gstatus) == -1) + break; - for_each_cpu_mask(cpu, impact_map) { - /* This reserved entry is processed, commit it */ - if (mc_local.mctc[cpu] != NULL) { - mctelem_commit(mc_local.mctc[cpu]); - printk(KERN_DEBUG "MCE: Commit one URGENT ENTRY\n"); - } + /* TODO: Add recovery actions here, such as page-offline, etc */ } - test_and_clear_bool(mc_local.in_use); - spin_unlock(&mce_locks); - return ret; + return 1; } /* Softirq Handler for this MCE# processing */ static void mce_softirq(void) { int cpu = smp_processor_id(); + unsigned int workcpu; cpumask_t affinity; - /* Wait until all cpus entered softirq */ - while ( cpus_weight(mced_cpus) != num_online_cpus() ) { - cpu_relax(); - } - /* Not Found worst error on severity_cpu, it's weird */ - if (severity_cpu == -1) { - printk(KERN_WARNING "MCE: not found severity_cpu!\n"); - mc_panic("MCE: not found severity_cpu!"); - return; - } + printk(KERN_DEBUG "CPU%d enter softirq\n", cpu); + + mce_barrier_enter(&mce_inside_bar); + + /* + * Everybody is here. Now let's see who gets to do the + * recovery work. Right now we just see if there's a CPU + * that did not have any problems, and pick that one. + * + * First, just set a default value: the last CPU who reaches this + * will overwrite the value and become the default. + */ + + atomic_set(&severity_cpu, cpu); + + mce_barrier_enter(&mce_severity_bar); + if (!mctelem_has_deferred(cpu)) + atomic_set(&severity_cpu, cpu); + mce_barrier_exit(&mce_severity_bar); + /* We choose severity_cpu for further processing */ - if (severity_cpu == cpu) { + if (atomic_read(&severity_cpu) == cpu) { + + printk(KERN_DEBUG "CPU%d handling errors\n", cpu); /* Step1: Fill DOM0 LOG buffer, vMCE injection buffer and * vMCE MSRs virtualization buffer */ - if (mce_actions()) - mc_panic("MCE recovery actions or Filling vMCE MSRS " - "virtualization data failed!\n"); + for_each_online_cpu(workcpu) { + mctelem_process_deferred(workcpu, mce_action); + } /* Step2: Send Log to DOM0 through vIRQ */ if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) { @@ -377,26 +358,9 @@ static void mce_softirq(void) vcpu_set_affinity(dom0->vcpu[0], &affinity); vcpu_kick(dom0->vcpu[0]); } - - /* Clean Data */ - test_and_clear_bool(mce_process_lock); - cpus_clear(impact_map); - cpus_clear(scanned_cpus); - worst = 0; - cpus_clear(mced_cpus); - memset(&mc_local, 0x0, sizeof(mc_local)); } - cpu_set(cpu, finished_cpus); - wmb(); - /* Leave until all cpus finished recovery actions in softirq */ - while ( cpus_weight(finished_cpus) != num_online_cpus() ) { - cpu_relax(); - } - - cpus_clear(finished_cpus); - severity_cpu = -1; - printk(KERN_DEBUG "CPU%d exit softirq \n", cpu); + mce_barrier_exit(&mce_inside_bar); } /* Machine Check owner judge algorithm: @@ -414,127 +378,161 @@ static void mce_softirq(void) * Round2: Do all MCE processing logic as normal. */ -/* Simple Scan. Panic when found non-recovery errors. Doing this for - * avoiding LOG missing +static void mce_panic_check(void) +{ + if (is_mc_panic) { + local_irq_enable(); + for ( ; ; ) + halt(); + } +} + +/* + * Initialize a barrier. Just set it to 0. */ -static void severity_scan(void) +static void mce_barrier_init(struct mce_softirq_barrier *bar) { - uint64_t status; - int32_t i; + atomic_set(&bar->val, 0); + atomic_set(&bar->ingen, 0); + atomic_set(&bar->outgen, 0); +} - /* TODO: For PCC = 0, we need to have further judge. If it is can't be - * recovered, we need to RESET for avoiding DOM0 LOG missing - */ - for ( i = 0; i < nr_mce_banks; i++) { - rdmsrl(MSR_IA32_MC0_STATUS + 4 * i , status); - if ( !(status & MCi_STATUS_VAL) ) - continue; - /* MCE handler only handles UC error */ - if ( !(status & MCi_STATUS_UC) ) - continue; - if ( !(status & MCi_STATUS_EN) ) - continue; - if (status & MCi_STATUS_PCC) - mc_panic("pcc = 1, cpu unable to continue\n"); - } +#if 0 +/* + * This function will need to be used when offlining a CPU in the + * recovery actions. + * + * Decrement a barrier only. Needed for cases where the CPU + * in question can't do it itself (e.g. it is being offlined). + */ +static void mce_barrier_dec(struct mce_softirq_barrier *bar) +{ + atomic_inc(&bar->outgen); + wmb(); + atomic_dec(&bar->val); +} +#endif + +static void mce_spin_lock(spinlock_t *lk) +{ + while (!spin_trylock(lk)) { + cpu_relax(); + mce_panic_check(); + } +} - /* TODO: Further judgement for later CPUs here, maybe need MCACOD assistence */ - /* EIPV and RIPV is not a reliable way to judge the error severity */ +static void mce_spin_unlock(spinlock_t *lk) +{ + spin_unlock(lk); +} + +/* + * Increment the generation number and the value. The generation number + * is incremented when entering a barrier. This way, it can be checked + * on exit if a CPU is trying to re-enter the barrier. This can happen + * if the first CPU to make it out immediately exits or re-enters, while + * another CPU that is still in the loop becomes otherwise occupied + * (e.g. it needs to service an interrupt, etc), missing the value + * it's waiting for. + * + * These barrier functions should always be paired, so that the + * counter value will reach 0 again after all CPUs have exited. + */ +static void mce_barrier_enter(struct mce_softirq_barrier *bar) +{ + int gen; + + atomic_inc(&bar->ingen); + gen = atomic_read(&bar->outgen); + mb(); + atomic_inc(&bar->val); + while ( atomic_read(&bar->val) != num_online_cpus() && + atomic_read(&bar->outgen) == gen) { + mb(); + mce_panic_check(); + } +} +static void mce_barrier_exit(struct mce_softirq_barrier *bar) +{ + int gen; + + atomic_inc(&bar->outgen); + gen = atomic_read(&bar->ingen); + mb(); + atomic_dec(&bar->val); + while ( atomic_read(&bar->val) != 0 && + atomic_read(&bar->ingen) == gen ) { + mb(); + mce_panic_check(); + } } +static void mce_barrier(struct mce_softirq_barrier *bar) +{ + mce_barrier_enter(bar); + mce_barrier_exit(bar); +} static void intel_machine_check(struct cpu_user_regs * regs, long error_code) { - unsigned int cpu = smp_processor_id(); - int32_t severity = 0; uint64_t gstatus; mctelem_cookie_t mctc = NULL; struct mca_summary bs; - /* First round scanning */ - severity_scan(); - cpu_set(cpu, scanned_cpus); - while (cpus_weight(scanned_cpus) < num_online_cpus()) - cpu_relax(); - - wmb(); - /* All CPUs Finished first round scanning */ - if (mc_local.in_use != 0) { - mc_panic("MCE: Local buffer is being processed, can't handle new MCE!\n"); - return; - } + mce_spin_lock(&mce_logout_lock); - /* Enter Critical Section */ - while (test_and_set_bool(mce_enter_lock)) { - udelay (1); - } + mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs); - mctc = mcheck_mca_logout(MCA_MCE_HANDLER, mca_allbanks, &bs); - /* local data point to the reserved entry, let softirq to - * process the local data */ - if (!bs.errcnt) { + if (bs.errcnt) { + /* + * Uncorrected errors must be dealth with in softirq context. + */ + if (bs.uc || bs.pcc) { + add_taint(TAINT_MACHINE_CHECK); + if (mctc != NULL) + mctelem_defer(mctc); + /* + * For PCC=1, context is lost, so reboot now without clearing + * the banks, and deal with the telemetry after reboot + * (the MSRs are sticky) + */ + if (bs.pcc) + mc_panic("State lost due to machine check exception.\n"); + } else { + if (mctc != NULL) + mctelem_commit(mctc); + } + mcheck_mca_clearbanks(mca_allbanks); + } else { if (mctc != NULL) mctelem_dismiss(mctc); - mc_local.mctc[cpu] = NULL; - cpu_set(cpu, mced_cpus); - test_and_clear_bool(mce_enter_lock); - raise_softirq(MACHINE_CHECK_SOFTIRQ); - return; - } - else if ( mctc != NULL) { - mc_local.mctc[cpu] = mctc; } - if (bs.uc || bs.pcc) - add_taint(TAINT_MACHINE_CHECK); + mce_spin_unlock(&mce_logout_lock); - if (bs.pcc) { - printk(KERN_WARNING "PCC=1 should have caused reset\n"); - severity = 3; - } - else if (bs.uc) { - severity = 2; - } - else { - printk(KERN_WARNING "We should skip Correctable Error\n"); - severity = 1; - } - /* This is the offending cpu! */ - cpu_set(cpu, impact_map); + /* + * Wait until everybody has processed the trap. + */ + mce_barrier(&mce_trap_bar); - if ( severity > worst) { - worst = severity; - severity_cpu = cpu; - } - cpu_set(cpu, mced_cpus); - test_and_clear_bool(mce_enter_lock); - wmb(); - - /* Wait for all cpus Leave Critical */ - while (cpus_weight(mced_cpus) < num_online_cpus()) - cpu_relax(); - /* Print MCE error */ - x86_mcinfo_dump(mctelem_dataptr(mctc)); - - /* Pick one CPU to clear MCIP */ - if (!test_and_set_bool(mce_process_lock)) { - rdmsrl(MSR_IA32_MCG_STATUS, gstatus); - wrmsrl(MSR_IA32_MCG_STATUS, gstatus & ~MCG_STATUS_MCIP); - - if (worst >= 3) { - printk(KERN_WARNING "worst=3 should have caused RESET\n"); - mc_panic("worst=3 should have caused RESET"); - } - else { - printk(KERN_DEBUG "MCE: trying to recover\n"); - } + /* + * Clear MCIP if it wasn't already. There is a small + * chance that more than 1 CPU will end up doing this, + * but that's OK. + */ + if (bs.errcnt) { + mca_rdmsrl(MSR_IA32_MCG_STATUS, gstatus); + if ((gstatus & MCG_STATUS_MCIP) != 0) + mca_wrmsrl(MSR_IA32_MCG_STATUS, gstatus & ~MCG_STATUS_MCIP); + /* Print MCE error */ + x86_mcinfo_dump(mctelem_dataptr(mctc)); } + raise_softirq(MACHINE_CHECK_SOFTIRQ); } static DEFINE_SPINLOCK(cmci_discover_lock); -static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks); /* * Discover bank sharing using the algorithm recommended in the SDM. @@ -728,7 +726,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) intel_init_cmci(c); } -uint64_t g_mcg_cap; +static uint64_t g_mcg_cap; static void mce_cap_init(struct cpuinfo_x86 *c) { u32 l, h; @@ -740,9 +738,12 @@ static void mce_cap_init(struct cpuinfo_x86 *c) if ((l & MCG_CMCI_P) && cpu_has_apic) cmci_support = 1; - nr_mce_banks = l & 0xff; + nr_mce_banks = l & MCG_CAP_COUNT; if (nr_mce_banks > MAX_NR_BANKS) + { printk(KERN_WARNING "MCE: exceed max mce banks\n"); + g_mcg_cap = (g_mcg_cap & ~MCG_CAP_COUNT) | MAX_NR_BANKS; + } if (l & MCG_EXT_P) { nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff; @@ -761,6 +762,11 @@ static void mce_init(void) clear_in_cr4(X86_CR4_MCE); + mce_barrier_init(&mce_inside_bar); + mce_barrier_init(&mce_severity_bar); + mce_barrier_init(&mce_trap_bar); + spin_lock_init(&mce_logout_lock); + /* log the machine checks left over from the previous reset. * This also clears all registers*/ @@ -814,104 +820,101 @@ int intel_mcheck_init(struct cpuinfo_x86 *c) } /* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */ -int intel_mce_wrmsr(u32 msr, u32 lo, u32 hi) +void intel_mce_init_msr(struct domain *d) +{ + d->arch.vmca_msrs.mcg_status = 0x0; + d->arch.vmca_msrs.mcg_cap = g_mcg_cap; + d->arch.vmca_msrs.mcg_ctl = (uint64_t)~0x0; + d->arch.vmca_msrs.nr_injection = 0; + memset(d->arch.vmca_msrs.mci_ctl, ~0, + sizeof(d->arch.vmca_msrs.mci_ctl)); + INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header); + spin_lock_init(&d->arch.vmca_msrs.lock); +} + +int intel_mce_wrmsr(u32 msr, u64 value) { struct domain *d = current->domain; struct bank_entry *entry = NULL; - uint64_t value = (u64)hi << 32 | lo; + unsigned int bank; int ret = 1; - spin_lock(&mce_locks); + spin_lock(&d->arch.vmca_msrs.lock); switch(msr) { case MSR_IA32_MCG_CTL: if (value != (u64)~0x0 && value != 0x0) { - gdprintk(XENLOG_WARNING, "MCE: value writen to MCG_CTL" + gdprintk(XENLOG_WARNING, "MCE: value written to MCG_CTL" "should be all 0s or 1s\n"); ret = -1; break; } - if (!d || is_idle_domain(d)) { - gdprintk(XENLOG_WARNING, "MCE: wrmsr not in DOM context, skip\n"); - break; - } d->arch.vmca_msrs.mcg_ctl = value; break; case MSR_IA32_MCG_STATUS: - if (!d || is_idle_domain(d)) { - gdprintk(XENLOG_WARNING, "MCE: wrmsr not in DOM context, skip\n"); - break; - } d->arch.vmca_msrs.mcg_status = value; gdprintk(XENLOG_DEBUG, "MCE: wrmsr MCG_CTL %"PRIx64"\n", value); break; - case MSR_IA32_MC0_CTL2: - case MSR_IA32_MC1_CTL2: - case MSR_IA32_MC2_CTL2: - case MSR_IA32_MC3_CTL2: - case MSR_IA32_MC4_CTL2: - case MSR_IA32_MC5_CTL2: - case MSR_IA32_MC6_CTL2: - case MSR_IA32_MC7_CTL2: - case MSR_IA32_MC8_CTL2: + case MSR_IA32_MCG_CAP: + gdprintk(XENLOG_WARNING, "MCE: MCG_CAP is read-only\n"); + ret = -1; + break; + case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1: gdprintk(XENLOG_WARNING, "We have disabled CMCI capability, " "Guest should not write this MSR!\n"); break; - case MSR_IA32_MC0_CTL: - case MSR_IA32_MC1_CTL: - case MSR_IA32_MC2_CTL: - case MSR_IA32_MC3_CTL: - case MSR_IA32_MC4_CTL: - case MSR_IA32_MC5_CTL: - case MSR_IA32_MC6_CTL: - case MSR_IA32_MC7_CTL: - case MSR_IA32_MC8_CTL: - if (value != (u64)~0x0 && value != 0x0) { - gdprintk(XENLOG_WARNING, "MCE: value writen to MCi_CTL" - "should be all 0s or 1s\n"); + case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: + bank = (msr - MSR_IA32_MC0_CTL) / 4; + if (bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT)) { + gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n", bank); ret = -1; break; } - if (!d || is_idle_domain(d)) { - gdprintk(XENLOG_WARNING, "MCE: wrmsr not in DOM context, skip\n"); + switch (msr & (MSR_IA32_MC0_CTL | 3)) + { + case MSR_IA32_MC0_CTL: + if (value != (u64)~0x0 && value != 0x0) { + gdprintk(XENLOG_WARNING, "MCE: value written to MC%u_CTL" + "should be all 0s or 1s (is %"PRIx64")\n", + bank, value); + ret = -1; + break; + } + d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4] = value; break; - } - d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4] = value; - break; - case MSR_IA32_MC0_STATUS: - case MSR_IA32_MC1_STATUS: - case MSR_IA32_MC2_STATUS: - case MSR_IA32_MC3_STATUS: - case MSR_IA32_MC4_STATUS: - case MSR_IA32_MC5_STATUS: - case MSR_IA32_MC6_STATUS: - case MSR_IA32_MC7_STATUS: - case MSR_IA32_MC8_STATUS: - if (!d || is_idle_domain(d)) { - /* Just skip */ - gdprintk(XENLOG_WARNING, "mce wrmsr: not in domain context!\n"); + case MSR_IA32_MC0_STATUS: + /* Give the first entry of the list, it corresponds to current + * vMCE# injection. When vMCE# is finished processing by the + * the guest, this node will be deleted. + * Only error bank is written. Non-error banks simply return. + */ + if (!list_empty(&d->arch.vmca_msrs.impact_header)) { + entry = list_entry(d->arch.vmca_msrs.impact_header.next, + struct bank_entry, list); + if ( entry->bank == bank ) + entry->mci_status = value; + gdprintk(XENLOG_DEBUG, + "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", + bank, value); + } else + gdprintk(XENLOG_DEBUG, + "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, value); + break; + case MSR_IA32_MC0_ADDR: + gdprintk(XENLOG_WARNING, "MCE: MC%u_ADDR is read-only\n", bank); + ret = -1; + break; + case MSR_IA32_MC0_MISC: + gdprintk(XENLOG_WARNING, "MCE: MC%u_MISC is read-only\n", bank); + ret = -1; break; } - /* Give the first entry of the list, it corresponds to current - * vMCE# injection. When vMCE# is finished processing by the - * the guest, this node will be deleted. - * Only error bank is written. Non-error bank simply return. - */ - if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if ( entry->bank == (msr - MSR_IA32_MC0_STATUS)/4 ) { - entry->mci_status = value; - } - gdprintk(XENLOG_DEBUG, "MCE: wmrsr mci_status in vMCE# context\n"); - } - gdprintk(XENLOG_DEBUG, "MCE: wrmsr mci_status val:%"PRIx64"\n", value); break; default: ret = 0; break; } - spin_unlock(&mce_locks); + spin_unlock(&d->arch.vmca_msrs.lock); return ret; } @@ -919,156 +922,97 @@ int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi) { struct domain *d = current->domain; int ret = 1; + unsigned int bank; struct bank_entry *entry = NULL; *lo = *hi = 0x0; - spin_lock(&mce_locks); + spin_lock(&d->arch.vmca_msrs.lock); switch(msr) { case MSR_IA32_MCG_STATUS: - if (!d || is_idle_domain(d)) { - gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain context!\n"); - *lo = *hi = 0x0; - break; - } *lo = (u32)d->arch.vmca_msrs.mcg_status; *hi = (u32)(d->arch.vmca_msrs.mcg_status >> 32); gdprintk(XENLOG_DEBUG, "MCE: rd MCG_STATUS lo %x hi %x\n", *lo, *hi); break; case MSR_IA32_MCG_CAP: - if (!d || is_idle_domain(d)) { - gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain context!\n"); - *lo = *hi = 0x0; - break; - } *lo = (u32)d->arch.vmca_msrs.mcg_cap; *hi = (u32)(d->arch.vmca_msrs.mcg_cap >> 32); gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CAP lo %x hi %x\n", *lo, *hi); break; case MSR_IA32_MCG_CTL: - if (!d || is_idle_domain(d)) { - gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain context!\n"); - *lo = *hi = 0x0; - break; - } *lo = (u32)d->arch.vmca_msrs.mcg_ctl; *hi = (u32)(d->arch.vmca_msrs.mcg_ctl >> 32); gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CTL lo %x hi %x\n", *lo, *hi); break; - case MSR_IA32_MC0_CTL2: - case MSR_IA32_MC1_CTL2: - case MSR_IA32_MC2_CTL2: - case MSR_IA32_MC3_CTL2: - case MSR_IA32_MC4_CTL2: - case MSR_IA32_MC5_CTL2: - case MSR_IA32_MC6_CTL2: - case MSR_IA32_MC7_CTL2: - case MSR_IA32_MC8_CTL2: + case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1: gdprintk(XENLOG_WARNING, "We have disabled CMCI capability, " "Guest should not read this MSR!\n"); break; - case MSR_IA32_MC0_CTL: - case MSR_IA32_MC1_CTL: - case MSR_IA32_MC2_CTL: - case MSR_IA32_MC3_CTL: - case MSR_IA32_MC4_CTL: - case MSR_IA32_MC5_CTL: - case MSR_IA32_MC6_CTL: - case MSR_IA32_MC7_CTL: - case MSR_IA32_MC8_CTL: - if (!d || is_idle_domain(d)) { - gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain context!\n"); - *lo = *hi = 0x0; + case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: + bank = (msr - MSR_IA32_MC0_CTL) / 4; + if (bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT)) { + gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n", bank); + ret = -1; break; } - *lo = (u32)d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4]; - *hi = - (u32)(d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4] - >> 32); - gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_CTL lo %x hi %x\n", *lo, *hi); - break; - case MSR_IA32_MC0_STATUS: - case MSR_IA32_MC1_STATUS: - case MSR_IA32_MC2_STATUS: - case MSR_IA32_MC3_STATUS: - case MSR_IA32_MC4_STATUS: - case MSR_IA32_MC5_STATUS: - case MSR_IA32_MC6_STATUS: - case MSR_IA32_MC7_STATUS: - case MSR_IA32_MC8_STATUS: - /* Only error bank is read. Non-error bank simply return */ - *lo = *hi = 0x0; - gdprintk(XENLOG_DEBUG, "MCE: rdmsr mci_status\n"); - if (!d || is_idle_domain(d)) { - gdprintk(XENLOG_WARNING, "mce_rdmsr: not in domain context!\n"); + switch (msr & (MSR_IA32_MC0_CTL | 3)) + { + case MSR_IA32_MC0_CTL: + *lo = (u32)d->arch.vmca_msrs.mci_ctl[bank]; + *hi = (u32)(d->arch.vmca_msrs.mci_ctl[bank] >> 32); + gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_CTL lo %x hi %x\n", + bank, *lo, *hi); break; - } - if (!list_empty(&d->arch.vmca_msrs.impact_header)) { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if ( entry->bank == (msr - MSR_IA32_MC0_STATUS)/4 ) { - *lo = entry->mci_status; - *hi = entry->mci_status >> 32; - gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_STATUS in vmCE# context " - "lo %x hi %x\n", *lo, *hi); + case MSR_IA32_MC0_STATUS: + /* Only error bank is read. Non-error banks simply return. */ + if (!list_empty(&d->arch.vmca_msrs.impact_header)) { + entry = list_entry(d->arch.vmca_msrs.impact_header.next, + struct bank_entry, list); + if (entry->bank == bank) { + *lo = entry->mci_status; + *hi = entry->mci_status >> 32; + gdprintk(XENLOG_DEBUG, + "MCE: rd MC%u_STATUS in vmCE# context " + "lo %x hi %x\n", bank, *lo, *hi); + } else + entry = NULL; } - } - break; - case MSR_IA32_MC0_ADDR: - case MSR_IA32_MC1_ADDR: - case MSR_IA32_MC2_ADDR: - case MSR_IA32_MC3_ADDR: - case MSR_IA32_MC4_ADDR: - case MSR_IA32_MC5_ADDR: - case MSR_IA32_MC6_ADDR: - case MSR_IA32_MC7_ADDR: - case MSR_IA32_MC8_ADDR: - *lo = *hi = 0x0; - if (!d || is_idle_domain(d)) { - gdprintk(XENLOG_WARNING, "mce_rdmsr: not in domain context!\n"); + if (!entry) + gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_STATUS\n", bank); break; - } - if (!list_empty(&d->arch.vmca_msrs.impact_header)) { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if ( entry->bank == (msr - MSR_IA32_MC0_ADDR)/4 ) { - *lo = entry->mci_addr; - *hi = entry->mci_addr >> 32; - gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_ADDR in vMCE# context " - "lo %x hi %x\n", *lo, *hi); + case MSR_IA32_MC0_ADDR: + if (!list_empty(&d->arch.vmca_msrs.impact_header)) { + entry = list_entry(d->arch.vmca_msrs.impact_header.next, + struct bank_entry, list); + if (entry->bank == bank) { + *lo = entry->mci_addr; + *hi = entry->mci_addr >> 32; + gdprintk(XENLOG_DEBUG, + "MCE: rd MC%u_ADDR in vMCE# context lo %x hi %x\n", + bank, *lo, *hi); + } } - } - break; - case MSR_IA32_MC0_MISC: - case MSR_IA32_MC1_MISC: - case MSR_IA32_MC2_MISC: - case MSR_IA32_MC3_MISC: - case MSR_IA32_MC4_MISC: - case MSR_IA32_MC5_MISC: - case MSR_IA32_MC6_MISC: - case MSR_IA32_MC7_MISC: - case MSR_IA32_MC8_MISC: - *lo = *hi = 0x0; - if (!d || is_idle_domain(d)) { - gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain context!\n"); break; - } - if (!list_empty(&d->arch.vmca_msrs.impact_header)) { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if ( entry->bank == (msr - MSR_IA32_MC0_MISC)/4 ) { - *lo = entry->mci_misc; - *hi = entry->mci_misc >> 32; - gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_MISC in vMCE# context " - " lo %x hi %x\n", *lo, *hi); + case MSR_IA32_MC0_MISC: + if (!list_empty(&d->arch.vmca_msrs.impact_header)) { + entry = list_entry(d->arch.vmca_msrs.impact_header.next, + struct bank_entry, list); + if (entry->bank == bank) { + *lo = entry->mci_misc; + *hi = entry->mci_misc >> 32; + gdprintk(XENLOG_DEBUG, + "MCE: rd MC%u_MISC in vMCE# context lo %x hi %x\n", + bank, *lo, *hi); + } } + break; } break; default: ret = 0; break; } - spin_unlock(&mce_locks); + spin_unlock(&d->arch.vmca_msrs.lock); return ret; } diff --git a/xen/arch/x86/cpu/mcheck/mctelem.c b/xen/arch/x86/cpu/mcheck/mctelem.c index 4111ddc..d8dd482 100644 --- a/xen/arch/x86/cpu/mcheck/mctelem.c +++ b/xen/arch/x86/cpu/mcheck/mctelem.c @@ -109,6 +109,14 @@ static struct mc_telem_ctl { * Telemetry array */ struct mctelem_ent *mctc_elems; + /* + * Per-CPU processing lists, used for deferred (softirq) + * processing of telemetry. mctc_cpu is indexed by the + * CPU that the telemetry belongs to. mctc_cpu_processing + * is indexed by the CPU that is processing the telemetry. + */ + struct mctelem_ent *mctc_cpu[NR_CPUS]; + struct mctelem_ent *mctc_cpu_processing[NR_CPUS]; } mctctl; /* Lock protecting all processing lists */ @@ -123,6 +131,82 @@ static void *cmpxchgptr(void *ptr, void *old, void *new) return (void *)cmpxchg(ulp, a, b); } +static void mctelem_xchg_head(struct mctelem_ent **headp, + struct mctelem_ent **old, + struct mctelem_ent *new) +{ + for (;;) { + *old = *headp; + wmb(); + if (cmpxchgptr(headp, *old, new) == *old) + break; + } +} + + +void mctelem_defer(mctelem_cookie_t cookie) +{ + struct mctelem_ent *tep = COOKIE2MCTE(cookie); + + mctelem_xchg_head(&mctctl.mctc_cpu[smp_processor_id()], + &tep->mcte_next, tep); +} + +void mctelem_process_deferred(unsigned int cpu, + int (*fn)(unsigned int, mctelem_cookie_t)) +{ + struct mctelem_ent *tep; + struct mctelem_ent *head, *prev; + int ret; + + /* + * First, unhook the list of telemetry structures, and + * hook it up to the processing list head for this CPU. + */ + mctelem_xchg_head(&mctctl.mctc_cpu[cpu], + &mctctl.mctc_cpu_processing[smp_processor_id()], NULL); + + head = mctctl.mctc_cpu_processing[smp_processor_id()]; + + /* + * Then, fix up the list to include prev pointers, to make + * things a little easier, as the list must be traversed in + * chronological order, which is backward from the order they + * are in. + */ + for (tep = head, prev = NULL; tep != NULL; tep = tep->mcte_next) { + tep->mcte_prev = prev; + prev = tep; + } + + /* + * Now walk the list of telemetry structures, handling each + * one of them. Unhooking the structure here does not need to + * be atomic, as this list is only accessed from a softirq + * context; the MCE handler does not touch it. + */ + for (tep = prev; tep != NULL; tep = prev) { + prev = tep->mcte_prev; + tep->mcte_next = tep->mcte_prev = NULL; + + ret = fn(cpu, MCTE2COOKIE(tep)); + if (prev != NULL) + prev->mcte_next = NULL; + tep->mcte_prev = tep->mcte_next = NULL; + if (ret != 0) + mctelem_commit(MCTE2COOKIE(tep)); + else + mctelem_dismiss(MCTE2COOKIE(tep)); + } +} + +int mctelem_has_deferred(unsigned int cpu) +{ + if (mctctl.mctc_cpu[cpu] != NULL) + return 1; + return 0; +} + /* Free an entry to its native free list; the entry must not be linked on * any list. */ @@ -130,21 +214,12 @@ static void mctelem_free(struct mctelem_ent *tep) { mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ? MC_URGENT : MC_NONURGENT; - struct mctelem_ent **freelp; - struct mctelem_ent *oldhead; BUG_ON(tep->mcte_refcnt != 0); BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE); tep->mcte_prev = NULL; - freelp = &mctctl.mctc_free[target]; - for (;;) { - oldhead = *freelp; - tep->mcte_next = oldhead; - wmb(); - if (cmpxchgptr(freelp, oldhead, tep) == oldhead) - break; - } + mctelem_xchg_head(&mctctl.mctc_free[target], &tep->mcte_next, tep); } /* Increment the reference count of an entry that is not linked on to @@ -308,22 +383,13 @@ void mctelem_dismiss(mctelem_cookie_t cookie) void mctelem_commit(mctelem_cookie_t cookie) { struct mctelem_ent *tep = COOKIE2MCTE(cookie); - struct mctelem_ent **commlp; - struct mctelem_ent *oldhead; mctelem_class_t target = MCTE_CLASS(tep) == MCTE_F_CLASS_URGENT ? MC_URGENT : MC_NONURGENT; BUG_ON(tep->mcte_next != NULL || tep->mcte_prev != NULL); MCTE_TRANSITION_STATE(tep, UNCOMMITTED, COMMITTED); - commlp = &mctctl.mctc_committed[target]; - for (;;) { - oldhead = *commlp; - tep->mcte_prev = oldhead; - wmb(); - if (cmpxchgptr(commlp, oldhead, tep) == oldhead) - break; - } + mctelem_xchg_head(&mctctl.mctc_committed[target], &tep->mcte_prev, tep); } /* Move telemetry from committed list to processing list, reversing the @@ -358,13 +424,7 @@ static void mctelem_append_processing(mctelem_class_t which) * the list we unlink in a well-known location so it can be * picked up in panic code should we panic between this unlink * and the append to the processing list. */ - for (;;) { - dangling[target] = *commlp; - wmb(); - if (cmpxchgptr(commlp, dangling[target], NULL) == - dangling[target]) - break; - } + mctelem_xchg_head(commlp, &dangling[target], NULL); if (dangling[target] == NULL) return; diff --git a/xen/arch/x86/cpu/mcheck/mctelem.h b/xen/arch/x86/cpu/mcheck/mctelem.h index e3270f6..fb381a1 100644 --- a/xen/arch/x86/cpu/mcheck/mctelem.h +++ b/xen/arch/x86/cpu/mcheck/mctelem.h @@ -67,5 +67,9 @@ extern void mctelem_dismiss(mctelem_cookie_t); extern mctelem_cookie_t mctelem_consume_oldest_begin(mctelem_class_t); extern void mctelem_consume_oldest_end(mctelem_cookie_t); extern void mctelem_ack(mctelem_class_t, mctelem_cookie_t); +extern void mctelem_defer(mctelem_cookie_t); +extern void mctelem_process_deferred(unsigned int, + int (*)(unsigned int, mctelem_cookie_t)); +int mctelem_has_deferred(unsigned int); #endif diff --git a/xen/arch/x86/cpu/mcheck/non-fatal.c b/xen/arch/x86/cpu/mcheck/non-fatal.c index 167b1ce..9d66a12 100644 --- a/xen/arch/x86/cpu/mcheck/non-fatal.c +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c @@ -22,7 +22,7 @@ #include "mce.h" -static cpu_banks_t bankmask; +DEFINE_PER_CPU(cpu_banks_t, poll_bankmask); static struct timer mce_timer; #define MCE_PERIOD MILLISECS(8000) @@ -39,7 +39,7 @@ static void mce_checkregs (void *info) struct mca_summary bs; static uint64_t dumpcount = 0; - mctc = mcheck_mca_logout(MCA_POLLER, bankmask, &bs); + mctc = mcheck_mca_logout(MCA_POLLER, __get_cpu_var(poll_bankmask), &bs); if (bs.errcnt && mctc != NULL) { adjust++; @@ -94,10 +94,6 @@ static int __init init_nonfatal_mce_checker(void) if (!mce_available(c)) return -ENODEV; - memcpy(&bankmask, &mca_allbanks, sizeof (cpu_banks_t)); - if (mce_firstbank(c) == 1) - clear_bit(0, bankmask); - /* * Check for non-fatal errors every MCE_RATE s */ diff --git a/xen/arch/x86/cpu/mcheck/x86_mca.h b/xen/arch/x86/cpu/mcheck/x86_mca.h index ac98744..6333ccd 100644 --- a/xen/arch/x86/cpu/mcheck/x86_mca.h +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h @@ -18,9 +18,9 @@ */ #ifndef X86_MCA_H - #define X86_MCA_H +#include /* The MCA/MCE MSRs should not be used anywhere else. * They are cpu family/model specific and are only for use diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 8d7a166..598d970 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -373,7 +374,6 @@ void vcpu_destroy(struct vcpu *v) hvm_vcpu_destroy(v); } -extern uint64_t g_mcg_cap; int arch_domain_create(struct domain *d, unsigned int domcr_flags) { #ifdef __x86_64__ @@ -458,14 +458,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) goto fail; /* For Guest vMCE MSRs virtualization */ - d->arch.vmca_msrs.mcg_status = 0x0; - d->arch.vmca_msrs.mcg_cap = g_mcg_cap; - d->arch.vmca_msrs.mcg_ctl = (uint64_t)~0x0; - d->arch.vmca_msrs.nr_injection = 0; - memset(d->arch.vmca_msrs.mci_ctl, 0x1, - sizeof(d->arch.vmca_msrs.mci_ctl)); - INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header); - + if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) + intel_mce_init_msr(d); } if ( is_hvm_domain(d) ) @@ -1314,8 +1308,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next) if ( unlikely(!cpu_isset(cpu, dirty_mask) && !cpus_empty(dirty_mask)) ) { /* Other cpus call __sync_lazy_execstate from flush ipi handler. */ - if ( !cpus_empty(next->vcpu_dirty_cpumask) ) - flush_tlb_mask(next->vcpu_dirty_cpumask); + flush_tlb_mask(dirty_mask); } if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) ) diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index 7f4d7db..955c5e6 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -587,6 +587,19 @@ long arch_do_domctl( } break; + case XEN_DOMCTL_SENDTRIGGER_POWER: + { + extern void hvm_acpi_power_button(struct domain *d); + + ret = -EINVAL; + if ( is_hvm_domain(d) ) + { + ret = 0; + hvm_acpi_power_button(d); + } + } + break; + default: ret = -ENOSYS; } @@ -1148,9 +1161,9 @@ void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c) c.nat->ctrlreg[3] = xen_pfn_to_cr3( pagetable_get_pfn(v->arch.guest_table)); #ifdef __x86_64__ - if ( !pagetable_is_null(v->arch.guest_table_user) ) - c.nat->ctrlreg[1] = xen_pfn_to_cr3( - pagetable_get_pfn(v->arch.guest_table_user)); + c.nat->ctrlreg[1] = + pagetable_is_null(v->arch.guest_table_user) ? 0 + : xen_pfn_to_cr3(pagetable_get_pfn(v->arch.guest_table_user)); #endif /* Merge shadow DR7 bits into real DR7. */ diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c index 125c8ff..c496e37 100644 --- a/xen/arch/x86/e820.c +++ b/xen/arch/x86/e820.c @@ -6,12 +6,23 @@ #include #include #include +#include +#include +#include /* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */ unsigned long long opt_mem; static void parse_mem(char *s) { opt_mem = parse_size_and_unit(s, NULL); } custom_param("mem", parse_mem); +/* opt_nomtrr_check: Don't clip ram to highest cacheable MTRR. */ +static int __initdata e820_mtrr_clip = -1; +boolean_param("e820-mtrr-clip", e820_mtrr_clip); + +/* opt_e820_verbose: Be verbose about clipping, the original e820, &c */ +static int __initdata e820_verbose; +boolean_param("e820-verbose", e820_verbose); + struct e820map e820; static void __init add_memory_region(unsigned long long start, @@ -321,28 +332,111 @@ static void __init clip_to_limit(uint64_t limit, char *warnmsg) { int i; char _warnmsg[160]; + uint64_t old_limit = 0; for ( i = 0; i < e820.nr_map; i++ ) { - if ( (e820.map[i].addr + e820.map[i].size) <= limit ) + if ( (e820.map[i].type != E820_RAM) || + ((e820.map[i].addr + e820.map[i].size) <= limit) ) continue; - if ( warnmsg ) + old_limit = e820.map[i].addr + e820.map[i].size; + if ( e820.map[i].addr < limit ) { - snprintf(_warnmsg, sizeof(_warnmsg), warnmsg, (long)(limit>>30)); - printk("WARNING: %s\n", _warnmsg); + e820.map[i].size = limit - e820.map[i].addr; } - printk("Truncating memory map to %lukB\n", - (unsigned long)(limit >> 10)); - if ( e820.map[i].addr >= limit ) + else { - e820.nr_map = i; + memmove(&e820.map[i], &e820.map[i+1], + (e820.nr_map - i - 1) * sizeof(struct e820entry)); + e820.nr_map--; } - else + } + + if ( old_limit ) + { + if ( warnmsg ) { - e820.map[i].size = limit - e820.map[i].addr; - e820.nr_map = i + 1; - } + snprintf(_warnmsg, sizeof(_warnmsg), warnmsg, (long)(limit>>30)); + printk("WARNING: %s\n", _warnmsg); + } + printk("Truncating RAM from %lukB to %lukB\n", + (unsigned long)(old_limit >> 10), (unsigned long)(limit >> 10)); + } +} + +/* Conservative estimate of top-of-RAM by looking for MTRR WB regions. */ +#define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg)) +#define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1) +static uint64_t mtrr_top_of_ram(void) +{ + uint32_t eax, ebx, ecx, edx; + uint64_t mtrr_cap, mtrr_def, addr_mask, base, mask, top; + unsigned int i, phys_bits = 36; + + /* By default we check only Intel systems. */ + if ( e820_mtrr_clip == -1 ) + { + char vendor[13]; + cpuid(0x00000000, &eax, + (uint32_t *)&vendor[0], + (uint32_t *)&vendor[8], + (uint32_t *)&vendor[4]); + vendor[12] = '\0'; + e820_mtrr_clip = !strcmp(vendor, "GenuineIntel"); + } + + if ( !e820_mtrr_clip ) + return 0; + + if ( e820_verbose ) + printk("Checking MTRR ranges...\n"); + + /* Does the CPU support architectural MTRRs? */ + cpuid(0x00000001, &eax, &ebx, &ecx, &edx); + if ( !test_bit(X86_FEATURE_MTRR & 31, &edx) ) + return 0; + + /* Find the physical address size for this CPU. */ + cpuid(0x80000000, &eax, &ebx, &ecx, &edx); + if ( eax >= 0x80000008 ) + { + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); + phys_bits = (uint8_t)eax; + } + addr_mask = ((1ull << phys_bits) - 1) & ~((1ull << 12) - 1); + + rdmsrl(MSR_MTRRcap, mtrr_cap); + rdmsrl(MSR_MTRRdefType, mtrr_def); + + if ( e820_verbose ) + printk(" MTRR cap: %"PRIx64" type: %"PRIx64"\n", mtrr_cap, mtrr_def); + + /* MTRRs enabled, and default memory type is not writeback? */ + if ( !test_bit(11, &mtrr_def) || ((uint8_t)mtrr_def == MTRR_TYPE_WRBACK) ) + return 0; + + /* + * Find end of highest WB-type range. This is a conservative estimate + * of the highest WB address since overlapping UC/WT ranges dominate. + */ + top = 0; + for ( i = 0; i < (uint8_t)mtrr_cap; i++ ) + { + rdmsrl(MSR_MTRRphysBase(i), base); + rdmsrl(MSR_MTRRphysMask(i), mask); + + if ( e820_verbose ) + printk(" MTRR[%d]: base %"PRIx64" mask %"PRIx64"\n", + i, base, mask); + + if ( !test_bit(11, &mask) || ((uint8_t)base != MTRR_TYPE_WRBACK) ) + continue; + base &= addr_mask; + mask &= addr_mask; + top = max_t(uint64_t, top, ((base | ~mask) & addr_mask) + PAGE_SIZE); } + + return top; } static void __init reserve_dmi_region(void) @@ -357,6 +451,8 @@ static void __init reserve_dmi_region(void) static void __init machine_specific_memory_setup( struct e820entry *raw, int *raw_nr) { + uint64_t top_of_ram; + char nr = (char)*raw_nr; sanitize_e820_map(raw, &nr); *raw_nr = nr; @@ -389,6 +485,10 @@ static void __init machine_specific_memory_setup( #endif reserve_dmi_region(); + + top_of_ram = mtrr_top_of_ram(); + if ( top_of_ram ) + clip_to_limit(top_of_ram, "MTRRs do not cover all of memory."); } int __init e820_change_range_type( @@ -485,8 +585,16 @@ int __init reserve_e820_ram(struct e820map *e820, uint64_t s, uint64_t e) unsigned long __init init_e820( const char *str, struct e820entry *raw, int *raw_nr) { + if ( e820_verbose ) + { + printk("Initial %s RAM map:\n", str); + print_e820_memory_map(raw, *raw_nr); + } + machine_specific_memory_setup(raw, raw_nr); + printk("%s RAM map:\n", str); print_e820_memory_map(e820.map, e820.nr_map); + return find_max_pfn(); } diff --git a/xen/arch/x86/hpet.c b/xen/arch/x86/hpet.c index 97d0979..e66eede 100644 --- a/xen/arch/x86/hpet.c +++ b/xen/arch/x86/hpet.c @@ -22,8 +22,10 @@ #define MAX_HPET_NUM 32 -#define HPET_EVT_USED_BIT 2 +#define HPET_EVT_USED_BIT 0 #define HPET_EVT_USED (1 << HPET_EVT_USED_BIT) +#define HPET_EVT_DISABLE_BIT 1 +#define HPET_EVT_DISABLE (1 << HPET_EVT_DISABLE_BIT) struct hpet_event_channel { @@ -45,7 +47,7 @@ static unsigned int num_hpets_used; /* msi hpet channels used for broadcast */ DEFINE_PER_CPU(struct hpet_event_channel *, cpu_bc_channel); -static int vector_channel[NR_IRQS] = {[0 ... NR_IRQS-1] = -1}; +static int vector_channel[NR_VECTORS] = {[0 ... NR_VECTORS-1] = -1}; #define vector_to_channel(vector) vector_channel[vector] @@ -53,8 +55,11 @@ unsigned long hpet_address; void msi_compose_msg(struct pci_dev *pdev, int vector, struct msi_msg *msg); -/* force_hpet_broadcast: if true, force using hpet_broadcast to fix lapic stop - issue for deep C state with pit disabled */ +/* + * force_hpet_broadcast: by default legacy hpet broadcast will be stopped + * if RTC interrupts are enabled. Enable this option if want to always enable + * legacy hpet broadcast for deep C state + */ int force_hpet_broadcast; boolean_param("hpetbroadcast", force_hpet_broadcast); @@ -114,9 +119,12 @@ static int reprogram_hpet_evt_channel( int64_t delta; int ret; + if ( (ch->flags & HPET_EVT_DISABLE) || (expire == 0) ) + return 0; + if ( unlikely(expire < 0) ) { - printk(KERN_DEBUG "reprogram: expire < 0\n"); + printk(KERN_DEBUG "reprogram: expire <= 0\n"); return -ETIME; } @@ -343,20 +351,19 @@ static int hpet_setup_msi_irq(unsigned int vector) static int hpet_assign_irq(struct hpet_event_channel *ch) { - unsigned int vector; + int vector; - vector = assign_irq_vector(AUTO_ASSIGN_IRQ); - if ( !vector ) - return -EINVAL; + if ( ch->vector ) + return 0; + + if ( (vector = assign_irq_vector(AUTO_ASSIGN_IRQ)) < 0 ) + return vector; - irq_vector[vector] = vector; - vector_irq[vector] = vector; vector_channel[vector] = ch - &hpet_events[0]; if ( hpet_setup_msi_irq(vector) ) { - irq_vector[vector] = 0; - vector_irq[vector] = FREE_TO_ASSIGN_IRQ; + free_irq_vector(vector); vector_channel[vector] = -1; return -EINVAL; } @@ -484,6 +491,32 @@ static void hpet_detach_channel_share(int cpu) static void (*hpet_attach_channel)(int cpu, struct hpet_event_channel *ch); static void (*hpet_detach_channel)(int cpu); +#include +void cpuidle_disable_deep_cstate(void); + +void (*pv_rtc_handler)(unsigned int port, uint8_t value); + +static void handle_rtc_once(unsigned int port, uint8_t value) +{ + static int index; + + if ( port == 0x70 ) + { + index = value; + return; + } + + if ( index != RTC_REG_B ) + return; + + /* RTC Reg B, contain PIE/AIE/UIE */ + if ( value & (RTC_PIE | RTC_AIE | RTC_UIE ) ) + { + cpuidle_disable_deep_cstate(); + pv_rtc_handler = NULL; + } +} + void hpet_broadcast_init(void) { u64 hpet_rate; @@ -527,8 +560,11 @@ void hpet_broadcast_init(void) return; } + if ( legacy_hpet_event.flags & HPET_EVT_DISABLE ) + return; + hpet_id = hpet_read32(HPET_ID); - if ( !(hpet_id & HPET_ID_LEGSUP) || !force_hpet_broadcast ) + if ( !(hpet_id & HPET_ID_LEGSUP) ) return; /* Start HPET legacy interrupts */ @@ -556,6 +592,32 @@ void hpet_broadcast_init(void) for_each_cpu(i) per_cpu(cpu_bc_channel, i) = &legacy_hpet_event; + + if ( !force_hpet_broadcast ) + pv_rtc_handler = handle_rtc_once; +} + +void hpet_disable_legacy_broadcast(void) +{ + u32 cfg; + + spin_lock_irq(&legacy_hpet_event.lock); + + legacy_hpet_event.flags |= HPET_EVT_DISABLE; + + /* disable HPET T0 */ + cfg = hpet_read32(HPET_T0_CFG); + cfg &= ~HPET_TN_ENABLE; + hpet_write32(cfg, HPET_T0_CFG); + + /* Stop HPET legacy interrupts */ + cfg = hpet_read32(HPET_CFG); + cfg &= ~HPET_CFG_LEGACY; + hpet_write32(cfg, HPET_CFG); + + spin_unlock_irq(&legacy_hpet_event.lock); + + smp_send_event_check_mask(cpu_online_map); } void hpet_broadcast_enter(void) @@ -563,6 +625,9 @@ void hpet_broadcast_enter(void) int cpu = smp_processor_id(); struct hpet_event_channel *ch = per_cpu(cpu_bc_channel, cpu); + if ( this_cpu(timer_deadline) == 0 ) + return; + if ( !ch ) ch = hpet_get_channel(cpu); BUG_ON( !ch ); diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c index 6fbce84..ad5ec35 100644 --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -18,9 +18,9 @@ #include #include #include +#include #include -#define HVMTRACE_IO_ASSIST_WRITE 0x200 static void hvmtrace_io_assist(int is_mmio, ioreq_t *p) { unsigned int size, event; @@ -29,9 +29,10 @@ static void hvmtrace_io_assist(int is_mmio, ioreq_t *p) if ( likely(!tb_init_done) ) return; - event = is_mmio ? TRC_HVM_MMIO_ASSIST : TRC_HVM_IO_ASSIST; - if ( !p->dir ) - event |= HVMTRACE_IO_ASSIST_WRITE; + if ( is_mmio ) + event = p->dir ? TRC_HVM_IOMEM_READ : TRC_HVM_IOMEM_WRITE; + else + event = p->dir ? TRC_HVM_IOPORT_READ : TRC_HVM_IOPORT_WRITE; *(uint64_t *)buffer = p->addr; size = (p->addr != (u32)p->addr) ? 8 : 4; @@ -749,6 +750,7 @@ static int hvmemul_read_cr( case 3: case 4: *val = current->arch.hvm_vcpu.guest_cr[reg]; + HVMTRACE_LONG_2D(CR_READ, reg, TRC_PAR_LONG(*val)); return X86EMUL_OKAY; default: break; @@ -762,6 +764,7 @@ static int hvmemul_write_cr( unsigned long val, struct x86_emulate_ctxt *ctxt) { + HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val)); switch ( reg ) { case 0: diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 9a6d6cd..000a70d 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -697,6 +697,10 @@ int hvm_vcpu_initialise(struct vcpu *v) if ( rc != 0 ) goto fail3; + tasklet_init(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet, + (void(*)(unsigned long))hvm_assert_evtchn_irq, + (unsigned long)v); + v->arch.guest_context.user_regs.eflags = 2; if ( v->vcpu_id == 0 ) @@ -726,6 +730,7 @@ int hvm_vcpu_initialise(struct vcpu *v) void hvm_vcpu_destroy(struct vcpu *v) { + tasklet_kill(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet); hvm_vcpu_cacheattr_destroy(v); vlapic_destroy(v); hvm_funcs.vcpu_destroy(v); @@ -1440,13 +1445,13 @@ void hvm_task_switch( } exn_raised = 0; - if ( hvm_load_segment_selector(v, x86_seg_es, tss.es) || + if ( hvm_load_segment_selector(v, x86_seg_ldtr, tss.ldt) || + hvm_load_segment_selector(v, x86_seg_es, tss.es) || hvm_load_segment_selector(v, x86_seg_cs, tss.cs) || hvm_load_segment_selector(v, x86_seg_ss, tss.ss) || hvm_load_segment_selector(v, x86_seg_ds, tss.ds) || hvm_load_segment_selector(v, x86_seg_fs, tss.fs) || - hvm_load_segment_selector(v, x86_seg_gs, tss.gs) || - hvm_load_segment_selector(v, x86_seg_ldtr, tss.ldt) ) + hvm_load_segment_selector(v, x86_seg_gs, tss.gs) ) exn_raised = 1; rc = hvm_copy_to_guest_virt( @@ -2377,6 +2382,9 @@ static int hvmop_flush_tlb_all(void) struct domain *d = current->domain; struct vcpu *v; + if ( !is_hvm_domain(d) ) + return -EINVAL; + /* Avoid deadlock if more than one vcpu tries this at the same time. */ if ( !spin_trylock(&d->hypercall_deadlock_mutex) ) return -EAGAIN; @@ -2413,6 +2421,7 @@ static int hvmop_flush_tlb_all(void) long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) { + struct domain *curr_d = current->domain; long rc = 0; switch ( op ) @@ -2477,8 +2486,9 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) rc = -EINVAL; break; case HVM_PARAM_IDENT_PT: + /* Not reflexive, as we must domain_pause(). */ rc = -EPERM; - if ( !IS_PRIV(current->domain) ) + if ( curr_d == d ) break; rc = -EINVAL; @@ -2489,29 +2499,32 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) if ( !paging_mode_hap(d) ) break; - domain_pause(d); - /* * Update GUEST_CR3 in each VMCS to point at identity map. * All foreign updates to guest state must synchronise on * the domctl_lock. */ - spin_lock(&domctl_lock); + rc = -EAGAIN; + if ( !domctl_lock_acquire() ) + break; + + rc = 0; + domain_pause(d); d->arch.hvm_domain.params[a.index] = a.value; for_each_vcpu ( d, v ) paging_update_cr3(v); - spin_unlock(&domctl_lock); - domain_unpause(d); + + domctl_lock_release(); break; case HVM_PARAM_DM_DOMAIN: - /* Privileged domains only, as we must domain_pause(d). */ + /* Not reflexive, as we must domain_pause(). */ rc = -EPERM; - if ( !IS_PRIV_FOR(current->domain, d) ) + if ( curr_d == d ) break; if ( a.value == DOMID_SELF ) - a.value = current->domain->domain_id; + a.value = curr_d->domain_id; rc = 0; domain_pause(d); /* safe to change per-vcpu xen_port */ @@ -2536,9 +2549,9 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) domain_unpause(d); break; case HVM_PARAM_ACPI_S_STATE: - /* Privileged domains only, as we must domain_pause(d). */ + /* Not reflexive, as we must domain_pause(). */ rc = -EPERM; - if ( !IS_PRIV_FOR(current->domain, d) ) + if ( curr_d == d ) break; rc = 0; diff --git a/xen/arch/x86/hvm/intercept.c b/xen/arch/x86/hvm/intercept.c index 107b87d..1a1f24c 100644 --- a/xen/arch/x86/hvm/intercept.c +++ b/xen/arch/x86/hvm/intercept.c @@ -100,8 +100,11 @@ static int hvm_mmio_access(struct vcpu *v, } } - if ( (p->count = i) != 0 ) + if ( i != 0 ) + { + p->count = i; rc = X86EMUL_OKAY; + } return rc; } @@ -165,8 +168,11 @@ static int process_portio_intercept(portio_action_t action, ioreq_t *p) } } - if ( (p->count = i) != 0 ) + if ( i != 0 ) + { + p->count = i; rc = X86EMUL_OKAY; + } return rc; } diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c index 97eb25f..bd2f145 100644 --- a/xen/arch/x86/hvm/irq.c +++ b/xen/arch/x86/hvm/irq.c @@ -185,8 +185,16 @@ void hvm_maybe_deassert_evtchn_irq(void) void hvm_assert_evtchn_irq(struct vcpu *v) { - if ( v->vcpu_id == 0 ) - hvm_set_callback_irq_level(v); + if ( v->vcpu_id != 0 ) + return; + + if ( unlikely(in_irq() || !local_irq_is_enabled()) ) + { + tasklet_schedule(&v->arch.hvm_vcpu.assert_evtchn_irq_tasklet); + return; + } + + hvm_set_callback_irq_level(v); } void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq) diff --git a/xen/arch/x86/hvm/pmtimer.c b/xen/arch/x86/hvm/pmtimer.c index 15bb3c3..276538f 100644 --- a/xen/arch/x86/hvm/pmtimer.c +++ b/xen/arch/x86/hvm/pmtimer.c @@ -29,18 +29,15 @@ /* The interesting bits of the PM1a_STS register */ #define TMR_STS (1 << 0) -#define PWRBTN_STS (1 << 5) -#define GBL_STS (1 << 8) +#define GBL_STS (1 << 5) +#define PWRBTN_STS (1 << 8) /* The same in PM1a_EN */ #define TMR_EN (1 << 0) -#define PWRBTN_EN (1 << 5) -#define GBL_EN (1 << 8) +#define GBL_EN (1 << 5) +#define PWRBTN_EN (1 << 8) -/* Mask of bits in PM1a_STS that can generate an SCI. Although the ACPI - * spec lists other bits, the PIIX4, which we are emulating, only - * supports these three. For now, we only use TMR_STS; in future we - * will let qemu set the other bits */ +/* Mask of bits in PM1a_STS that can generate an SCI. */ #define SCI_MASK (TMR_STS|PWRBTN_STS|GBL_STS) /* SCI IRQ number (must match SCI_INT number in ACPI FADT in hvmloader) */ @@ -61,6 +58,15 @@ static void pmt_update_sci(PMTState *s) hvm_isa_irq_deassert(s->vcpu->domain, SCI_IRQ); } +void hvm_acpi_power_button(struct domain *d) +{ + PMTState *s = &d->arch.hvm_domain.pl_time.vpmt; + spin_lock(&s->lock); + s->pm.pm1a_sts |= PWRBTN_STS; + pmt_update_sci(s); + spin_unlock(&s->lock); +} + /* Set the correct value in the timer, accounting for time elapsed * since the last time we did that. */ static void pmt_update_time(PMTState *s) diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index bd320fb..a70f0ff 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -1217,9 +1217,14 @@ asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs) exit_reason = vmcb->exitcode; - HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason, - (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32), - 0, 0, 0); + if ( hvm_long_mode_enabled(v) ) + HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason, + (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32), + 0, 0, 0); + else + HVMTRACE_ND(VMEXIT, 1/*cycles*/, 2, exit_reason, + (uint32_t)regs->eip, + 0, 0, 0, 0); if ( unlikely(exit_reason == VMEXIT_INVALID) ) { diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index c86b55e..aea69c7 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -912,18 +912,8 @@ void vmx_do_resume(struct vcpu *v) debug_state = v->domain->debugger_attached; if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) ) { - unsigned long intercepts = __vmread(EXCEPTION_BITMAP); - unsigned long mask = 1u << TRAP_int3; - - if ( !cpu_has_monitor_trap_flag ) - mask |= 1u << TRAP_debug; - v->arch.hvm_vcpu.debug_state_latch = debug_state; - if ( debug_state ) - intercepts |= mask; - else - intercepts &= ~mask; - __vmwrite(EXCEPTION_BITMAP, intercepts); + vmx_update_debug_state(v); } hvm_do_resume(v); diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 3374ebb..64d9f39 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1035,6 +1035,24 @@ static void vmx_update_host_cr3(struct vcpu *v) vmx_vmcs_exit(v); } +void vmx_update_debug_state(struct vcpu *v) +{ + unsigned long intercepts, mask; + + ASSERT(v == current); + + mask = 1u << TRAP_int3; + if ( !cpu_has_monitor_trap_flag ) + mask |= 1u << TRAP_debug; + + intercepts = __vmread(EXCEPTION_BITMAP); + if ( v->arch.hvm_vcpu.debug_state_latch ) + intercepts |= mask; + else + intercepts &= ~mask; + __vmwrite(EXCEPTION_BITMAP, intercepts); +} + static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) { vmx_vmcs_enter(v); @@ -1107,6 +1125,7 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr) | (paging_mode_hap(v->domain) ? 0 : (1U << TRAP_page_fault)) | (1U << TRAP_no_device)); + vmx_update_debug_state(v); } } @@ -2241,9 +2260,14 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs) exit_reason = __vmread(VM_EXIT_REASON); - HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason, - (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32), - 0, 0, 0); + if ( hvm_long_mode_enabled(v) ) + HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason, + (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32), + 0, 0, 0); + else + HVMTRACE_ND(VMEXIT, 1/*cycles*/, 2, exit_reason, + (uint32_t)regs->eip, + 0, 0, 0, 0); perfc_incra(vmexits, exit_reason); @@ -2342,7 +2366,7 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs) * (NB. If we emulate this IRET for any reason, we should re-clear!) */ if ( unlikely(intr_info & INTR_INFO_NMI_UNBLOCKED_BY_IRET) && - !(__vmread(IDT_VECTORING_INFO) & INTR_INFO_VALID_MASK) && + !(idtv_info & INTR_INFO_VALID_MASK) && (vector != TRAP_double_fault) ) __vmwrite(GUEST_INTERRUPTIBILITY_INFO, __vmread(GUEST_INTERRUPTIBILITY_INFO) diff --git a/xen/arch/x86/ioport_emulate.c b/xen/arch/x86/ioport_emulate.c index 4a5ab90..6437753 100644 --- a/xen/arch/x86/ioport_emulate.c +++ b/xen/arch/x86/ioport_emulate.c @@ -94,6 +94,14 @@ static struct dmi_system_id __initdata ioport_quirks_tbl[] = { DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant ML5"), }, }, + { + .callback = proliant_quirk, + .ident = "HP ProLiant BL2xx", + .matches = { + DMI_MATCH(DMI_BIOS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant BL2"), + }, + }, { .callback = proliant_quirk, .ident = "HP ProLiant BL4xx", diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c index 18baba0..8c790cf 100644 --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -1069,7 +1069,7 @@ extern void dump_ioapic_irq_info(void); static void dump_irqs(unsigned char key) { - int i, irq, vector; + int i, glob_irq, irq, vector; irq_desc_t *desc; irq_guest_action_t *action; struct domain *d; @@ -1077,41 +1077,47 @@ static void dump_irqs(unsigned char key) printk("Guest interrupt information:\n"); - for ( irq = 0; irq < NR_IRQS; irq++ ) + for ( vector = 0; vector < NR_VECTORS; vector++ ) { - vector = irq_to_vector(irq); - if ( vector == 0 ) - continue; + + glob_irq = vector_to_irq(vector); desc = &irq_desc[vector]; + if ( desc == NULL || desc->handler == &no_irq_type ) + continue; spin_lock_irqsave(&desc->lock, flags); - if ( desc->status & IRQ_GUEST ) + if ( !(desc->status & IRQ_GUEST) ) + printk(" Vec%3d IRQ%3d: type=%-15s status=%08x " + "mapped, unbound\n", + vector, glob_irq, desc->handler->typename, desc->status); + else { action = (irq_guest_action_t *)desc->action; - printk(" IRQ%3d Vec%3d: type=%-15s status=%08x " + printk(" Vec%3d IRQ%3d: type=%-15s status=%08x " "in-flight=%d domain-list=", - irq, vector, desc->handler->typename, + vector, glob_irq, desc->handler->typename, desc->status, action->in_flight); for ( i = 0; i < action->nr_guests; i++ ) { d = action->guest[i]; - printk("%u(%c%c%c%c)", - d->domain_id, - (test_bit(d->pirq_to_evtchn[irq], + irq = domain_vector_to_irq(d, vector); + printk("%u:%3d(%c%c%c%c)", + d->domain_id, irq, + (test_bit(d->pirq_to_evtchn[glob_irq], &shared_info(d, evtchn_pending)) ? 'P' : '-'), - (test_bit(d->pirq_to_evtchn[irq] / + (test_bit(d->pirq_to_evtchn[glob_irq] / BITS_PER_EVTCHN_WORD(d), &vcpu_info(d->vcpu[0], evtchn_pending_sel)) ? 'S' : '-'), - (test_bit(d->pirq_to_evtchn[irq], + (test_bit(d->pirq_to_evtchn[glob_irq], &shared_info(d, evtchn_mask)) ? 'M' : '-'), - (test_bit(irq, d->pirq_mask) ? + (test_bit(glob_irq, d->pirq_mask) ? 'M' : '-')); if ( i != action->nr_guests ) printk(","); diff --git a/xen/arch/x86/mm/hap/p2m-ept.c b/xen/arch/x86/mm/hap/p2m-ept.c index bb1e8ee..2531132 100644 --- a/xen/arch/x86/mm/hap/p2m-ept.c +++ b/xen/arch/x86/mm/hap/p2m-ept.c @@ -210,17 +210,18 @@ ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, ept_entry_t *split_ept_entry = NULL; unsigned long split_mfn = ept_entry->mfn; p2m_type_t split_p2mt = ept_entry->avail1; + ept_entry_t new_ept_entry; /* alloc new page for new ept middle level entry which is * before a leaf super entry */ - if ( !ept_set_middle_entry(d, ept_entry) ) + if ( !ept_set_middle_entry(d, &new_ept_entry) ) goto out; /* split the super page before to 4k pages */ - split_table = map_domain_page(ept_entry->mfn); + split_table = map_domain_page(new_ept_entry.mfn); offset = gfn & ((1 << EPT_TABLE_ORDER) - 1); for ( i = 0; i < 512; i++ ) @@ -257,6 +258,7 @@ ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, ept_p2m_type_to_flags(split_ept_entry, p2mt); unmap_domain_page(split_table); + *ept_entry = new_ept_entry; } /* Track the highest gfn for which we have ever had a valid mapping */ diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c index 2b898dd..e9313e1 100644 --- a/xen/arch/x86/mm/paging.c +++ b/xen/arch/x86/mm/paging.c @@ -385,13 +385,14 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc) } pages = 0; - l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top)); + l4 = (mfn_valid(d->arch.paging.log_dirty.top) ? + map_domain_page(mfn_x(d->arch.paging.log_dirty.top)) : NULL); for ( i4 = 0; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++ ) { - l3 = mfn_valid(l4[i4]) ? map_domain_page(mfn_x(l4[i4])) : NULL; + l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(mfn_x(l4[i4])) : NULL; for ( i3 = 0; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ ) @@ -429,7 +430,8 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc) if ( l3 ) unmap_domain_page(l3); } - unmap_domain_page(l4); + if ( l4 ) + unmap_domain_page(l4); if ( pages < sc->pages ) sc->pages = pages; diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index 4e1fe8a..072dda0 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -4986,6 +4986,9 @@ int sh_audit_fl1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x) f = shadow_l1e_get_flags(*sl1e); f &= ~(_PAGE_AVAIL0|_PAGE_AVAIL1|_PAGE_AVAIL2); if ( !(f == 0 + || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| + _PAGE_ACCESSED) + || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED) || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW| _PAGE_ACCESSED|_PAGE_DIRTY) || f == (_PAGE_PRESENT|_PAGE_USER|_PAGE_ACCESSED|_PAGE_DIRTY) diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c index 8ed3a11..8ef96b4 100644 --- a/xen/arch/x86/numa.c +++ b/xen/arch/x86/numa.c @@ -74,7 +74,7 @@ populate_memnodemap(const struct node *nodes, int numnodes, int shift) if (memnodemap[addr >> shift] != 0xff) return -1; memnodemap[addr >> shift] = i; - addr += (1UL << shift); + addr += (1ULL << shift); } while (addr < end); res = 1; } @@ -290,7 +290,7 @@ static void dump_numa(unsigned char key) (u32)(now>>32), (u32)now); for_each_online_node(i) { - paddr_t pa = (NODE_DATA(i)->node_start_pfn + 1)<< PAGE_SHIFT; + paddr_t pa = (paddr_t)(NODE_DATA(i)->node_start_pfn + 1)<< PAGE_SHIFT; printk("idx%d -> NODE%d start->%lu size->%lu\n", i, NODE_DATA(i)->node_id, NODE_DATA(i)->node_start_pfn, @@ -314,7 +314,7 @@ static void dump_numa(unsigned char key) page_list_for_each(page, &d->page_list) { - i = phys_to_nid(page_to_mfn(page) << PAGE_SHIFT); + i = phys_to_nid((paddr_t)page_to_mfn(page) << PAGE_SHIFT); page_num_node[i]++; } diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 8cf4190..d87d082 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -89,7 +89,7 @@ boolean_param("noapic", skip_ioapic_setup); /* **** Linux config option: propagated to domain0. */ /* xen_cpuidle: xen control cstate. */ -/*static*/ int xen_cpuidle = 1; +/*static*/ int xen_cpuidle = -1; boolean_param("cpuidle", xen_cpuidle); int early_boot = 1; @@ -419,7 +419,7 @@ void __init __start_xen(unsigned long mbi_p) multiboot_info_t *mbi = __va(mbi_p); module_t *mod = (module_t *)__va(mbi->mods_addr); unsigned long nr_pages, modules_length, modules_headroom; - int i, e820_warn = 0, bytes = 0; + int i, j, e820_warn = 0, bytes = 0; struct ns16550_defaults ns16550 = { .data_bits = 8, .parity = 'n', @@ -463,9 +463,7 @@ void __init __start_xen(unsigned long mbi_p) ns16550.io_base = 0x2f8; ns16550.irq = 3; ns16550_init(1, &ns16550); - serial_init_preirq(); - - init_console(); + console_init_preirq(); printk("Command line: %s\n", cmdline); @@ -616,7 +614,9 @@ void __init __start_xen(unsigned long mbi_p) * we can relocate the dom0 kernel and other multiboot modules. Also, on * x86/64, we relocate Xen to higher memory. */ - modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start; + modules_length = 0; + for ( i = 0; i < mbi->mods_count; i++ ) + modules_length += mod[i].mod_end - mod[i].mod_start; /* ensure mod[0] is mapped before parsing */ bootstrap_map(mod[0].mod_start, mod[0].mod_end); @@ -737,8 +737,14 @@ void __init __start_xen(unsigned long mbi_p) initial_images_start = e; e -= modules_headroom; initial_images_base = e; - move_memory(initial_images_start, - mod[0].mod_start, mod[mbi->mods_count-1].mod_end); + e += modules_length + modules_headroom; + for ( j = mbi->mods_count-1; j >= 0; j-- ) + { + e -= mod[j].mod_end - mod[j].mod_start; + move_memory(e, mod[j].mod_start, mod[j].mod_end); + mod[j].mod_end += e - mod[j].mod_start; + mod[j].mod_start = e; + } } if ( !kexec_crash_area.start && (s < e) && @@ -958,7 +964,7 @@ void __init __start_xen(unsigned long mbi_p) initialize_keytable(); - serial_init_postirq(); + console_init_postirq(); for_each_present_cpu ( i ) { @@ -1032,8 +1038,7 @@ void __init __start_xen(unsigned long mbi_p) if ( (initrdidx > 0) && (initrdidx < mbi->mods_count) ) { - _initrd_start = initial_images_start + - (mod[initrdidx].mod_start - mod[0].mod_start); + _initrd_start = mod[initrdidx].mod_start; _initrd_len = mod[initrdidx].mod_end - mod[initrdidx].mod_start; } @@ -1114,13 +1119,12 @@ int xen_in_range(paddr_t start, paddr_t end) int i; static struct { paddr_t s, e; - } xen_regions[5]; + } xen_regions[4]; /* initialize first time */ if ( !xen_regions[0].s ) { - extern char __init_begin[], __per_cpu_start[], __per_cpu_end[], - __bss_start[]; + extern char __init_begin[], __bss_start[]; extern unsigned long allocator_bitmap_end; /* S3 resume code (and other real mode trampoline code) */ @@ -1131,14 +1135,11 @@ int xen_in_range(paddr_t start, paddr_t end) xen_regions[1].e = __pa(&__init_begin); /* per-cpu data */ xen_regions[2].s = __pa(&__per_cpu_start); - xen_regions[2].e = __pa(&__per_cpu_end); + xen_regions[2].e = xen_regions[2].s + + (((paddr_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT); /* bss + boot allocator bitmap */ xen_regions[3].s = __pa(&__bss_start); xen_regions[3].e = allocator_bitmap_end; - /* frametable */ - xen_regions[4].s = (unsigned long)frame_table; - xen_regions[4].e = (unsigned long)frame_table + - PFN_UP(max_page * sizeof(*frame_table)); } for ( i = 0; i < ARRAY_SIZE(xen_regions); i++ ) diff --git a/xen/arch/x86/tboot.c b/xen/arch/x86/tboot.c index e259cd5..54075c2 100644 --- a/xen/arch/x86/tboot.c +++ b/xen/arch/x86/tboot.c @@ -46,7 +46,7 @@ static uint64_t sinit_base, sinit_size; #define TXTCR_HEAP_BASE 0x0300 #define TXTCR_HEAP_SIZE 0x0308 -extern char __init_begin[], __per_cpu_start[], __per_cpu_end[], __bss_start[]; +extern char __init_begin[], __per_cpu_start[], __bss_start[]; extern unsigned long allocator_bitmap_end; #define SHA1_SIZE 20 @@ -310,8 +310,9 @@ void tboot_shutdown(uint32_t shutdown_type) __pa(&_stext); /* per-cpu data */ g_tboot_shared->mac_regions[2].start = (uint64_t)__pa(&__per_cpu_start); - g_tboot_shared->mac_regions[2].size = __pa(&__per_cpu_end) - - __pa(&__per_cpu_start); + g_tboot_shared->mac_regions[2].size = + g_tboot_shared->mac_regions[2].start + + (((uint64_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT); /* bss */ g_tboot_shared->mac_regions[3].start = (uint64_t)__pa(&__bss_start); g_tboot_shared->mac_regions[3].size = __pa(&_end) - __pa(&__bss_start); diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c index 223b44b..9e9ff9b 100644 --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -35,6 +35,13 @@ static char opt_clocksource[10]; string_param("clocksource", opt_clocksource); +/* + * opt_consistent_tscs: All TSCs tick at the exact same rate, allowing + * simplified system time handling. + */ +static int opt_consistent_tscs; +boolean_param("consistent_tscs", opt_consistent_tscs); + unsigned long cpu_khz; /* CPU clock frequency in kHz. */ DEFINE_SPINLOCK(rtc_lock); unsigned long pit0_ticks; @@ -959,7 +966,7 @@ static void local_time_calibration(void) /* The overall calibration scale multiplier. */ u32 calibration_mul_frac; - if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) + if ( opt_consistent_tscs ) { /* Atomically read cpu_calibration struct and write cpu_time struct. */ local_irq_disable(); @@ -1087,64 +1094,53 @@ static void local_time_calibration(void) */ struct calibration_rendezvous { cpumask_t cpu_calibration_map; - atomic_t count_start; - atomic_t count_end; + atomic_t semaphore; s_time_t master_stime; u64 master_tsc_stamp; }; -#define NR_LOOPS 5 - -static void time_calibration_rendezvous(void *_r) +static void time_calibration_tsc_rendezvous(void *_r) { int i; struct cpu_calibration *c = &this_cpu(cpu_calibration); struct calibration_rendezvous *r = _r; unsigned int total_cpus = cpus_weight(r->cpu_calibration_map); - /* - * Loop is used here to get rid of the cache's side effect to enlarge - * the TSC difference among CPUs. - */ - for ( i = 0; i < NR_LOOPS; i++ ) + /* Loop to get rid of cache effects on TSC skew. */ + for ( i = 4; i >= 0; i-- ) { if ( smp_processor_id() == 0 ) { - while ( atomic_read(&r->count_start) != (total_cpus - 1) ) + while ( atomic_read(&r->semaphore) != (total_cpus - 1) ) mb(); - + if ( r->master_stime == 0 ) { r->master_stime = read_platform_stime(); - if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) - rdtscll(r->master_tsc_stamp); + rdtscll(r->master_tsc_stamp); } - atomic_set(&r->count_end, 0); - wmb(); - atomic_inc(&r->count_start); - - if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && - i == NR_LOOPS - 1 ) - write_tsc((u32)r->master_tsc_stamp, (u32)(r->master_tsc_stamp >> 32)); - - while (atomic_read(&r->count_end) != total_cpus - 1) + atomic_inc(&r->semaphore); + + if ( i == 0 ) + write_tsc((u32)r->master_tsc_stamp, + (u32)(r->master_tsc_stamp >> 32)); + + while ( atomic_read(&r->semaphore) != (2*total_cpus - 1) ) mb(); - atomic_set(&r->count_start, 0); - wmb(); - atomic_inc(&r->count_end); + atomic_set(&r->semaphore, 0); } else { - atomic_inc(&r->count_start); - while ( atomic_read(&r->count_start) != total_cpus ) + atomic_inc(&r->semaphore); + while ( atomic_read(&r->semaphore) < total_cpus ) mb(); - - if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && - i == NR_LOOPS - 1 ) - write_tsc((u32)r->master_tsc_stamp, (u32)(r->master_tsc_stamp >> 32)); - - atomic_inc(&r->count_end); - while (atomic_read(&r->count_end) != total_cpus) + + if ( i == 0 ) + write_tsc((u32)r->master_tsc_stamp, + (u32)(r->master_tsc_stamp >> 32)); + + atomic_inc(&r->semaphore); + while ( atomic_read(&r->semaphore) > total_cpus ) mb(); } } @@ -1156,18 +1152,48 @@ static void time_calibration_rendezvous(void *_r) raise_softirq(TIME_CALIBRATE_SOFTIRQ); } +static void time_calibration_std_rendezvous(void *_r) +{ + struct cpu_calibration *c = &this_cpu(cpu_calibration); + struct calibration_rendezvous *r = _r; + unsigned int total_cpus = cpus_weight(r->cpu_calibration_map); + + if ( smp_processor_id() == 0 ) + { + while ( atomic_read(&r->semaphore) != (total_cpus - 1) ) + cpu_relax(); + r->master_stime = read_platform_stime(); + mb(); /* write r->master_stime /then/ signal */ + atomic_inc(&r->semaphore); + } + else + { + atomic_inc(&r->semaphore); + while ( atomic_read(&r->semaphore) != total_cpus ) + cpu_relax(); + mb(); /* receive signal /then/ read r->master_stime */ + } + + rdtscll(c->local_tsc_stamp); + c->stime_local_stamp = get_s_time(); + c->stime_master_stamp = r->master_stime; + + raise_softirq(TIME_CALIBRATE_SOFTIRQ); +} + static void time_calibration(void *unused) { struct calibration_rendezvous r = { .cpu_calibration_map = cpu_online_map, - .count_start = ATOMIC_INIT(0), - .count_end = ATOMIC_INIT(0), - .master_stime = 0 + .semaphore = ATOMIC_INIT(0) }; /* @wait=1 because we must wait for all cpus before freeing @r. */ on_selected_cpus(r.cpu_calibration_map, - time_calibration_rendezvous, &r, 0, 1); + opt_consistent_tscs + ? time_calibration_tsc_rendezvous + : time_calibration_std_rendezvous, + &r, 0, 1); } void init_percpu_time(void) @@ -1194,8 +1220,11 @@ void init_percpu_time(void) /* Late init function (after all CPUs are booted). */ int __init init_xen_time(void) { + if ( !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) + opt_consistent_tscs = 0; + /* If we have constant TSCs then scale factor can be shared. */ - if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) + if ( opt_consistent_tscs ) { int cpu; for_each_cpu ( cpu ) @@ -1245,13 +1274,22 @@ static int disable_pit_irq(void) * XXX dom0 may rely on RTC interrupt delivery, so only enable * hpet_broadcast if FSB mode available or if force_hpet_broadcast. */ - if ( xen_cpuidle ) + if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) ) { hpet_broadcast_init(); if ( !hpet_broadcast_is_available() ) { - printk("HPET broadcast init failed, turn to PIT broadcast.\n"); - return 0; + if ( xen_cpuidle == -1 ) + { + xen_cpuidle = 0; + printk("CPUIDLE: disabled due to no HPET. " + "Force enable with 'cpuidle'.\n"); + } + else + { + printk("HPET broadcast init failed, turn to PIT broadcast.\n"); + return 0; + } } } diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index dfdc7bf..0b134a3 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -841,7 +841,7 @@ asmlinkage void do_invalid_op(struct cpu_user_regs *regs) { struct bug_frame bug; struct bug_frame_str bug_str; - char *filename, *predicate, *eip = (char *)regs->eip; + const char *filename, *predicate, *eip = (char *)regs->eip; unsigned long fixup; int id, lineno; @@ -873,11 +873,13 @@ asmlinkage void do_invalid_op(struct cpu_user_regs *regs) /* WARN, BUG or ASSERT: decode the filename pointer and line number. */ if ( !is_kernel(eip) || __copy_from_user(&bug_str, eip, sizeof(bug_str)) || - memcmp(bug_str.mov, BUG_MOV_STR, sizeof(bug_str.mov)) ) + (bug_str.mov != 0xbc) ) goto die; + filename = bug_str(bug_str, eip); eip += sizeof(bug_str); - filename = is_kernel(bug_str.str) ? (char *)bug_str.str : ""; + if ( !is_kernel(filename) ) + filename = ""; lineno = bug.id >> 2; if ( id == BUGFRAME_warn ) @@ -900,11 +902,13 @@ asmlinkage void do_invalid_op(struct cpu_user_regs *regs) ASSERT(id == BUGFRAME_assert); if ( !is_kernel(eip) || __copy_from_user(&bug_str, eip, sizeof(bug_str)) || - memcmp(bug_str.mov, BUG_MOV_STR, sizeof(bug_str.mov)) ) + (bug_str.mov != 0xbc) ) goto die; + predicate = bug_str(bug_str, eip); eip += sizeof(bug_str); - predicate = is_kernel(bug_str.str) ? (char *)bug_str.str : ""; + if ( !is_kernel(predicate) ) + predicate = ""; printk("Assertion '%s' failed at %.50s:%d\n", predicate, filename, lineno); DEBUGGER_trap_fatal(TRAP_invalid_op, regs); @@ -1360,11 +1364,11 @@ static int read_descriptor(unsigned int sel, asm volatile ( "larl %2,%0 ; setz %1" - : "=r" (a), "=rm" (valid) : "rm" (sel)); + : "=r" (a), "=qm" (valid) : "rm" (sel)); BUG_ON(valid && ((a & 0x00f0ff00) != *ar)); asm volatile ( "lsll %2,%0 ; setz %1" - : "=r" (l), "=rm" (valid) : "rm" (sel)); + : "=r" (l), "=qm" (valid) : "rm" (sel)); BUG_ON(valid && (l != *limit)); } #endif @@ -1547,6 +1551,8 @@ static uint32_t guest_io_read( return data; } +extern void (*pv_rtc_handler)(unsigned int port, uint8_t value); + static void guest_io_write( unsigned int port, unsigned int bytes, uint32_t data, struct vcpu *v, struct cpu_user_regs *regs) @@ -1558,6 +1564,8 @@ static void guest_io_write( { switch ( bytes ) { case 1: + if ( ((port == 0x70) || (port == 0x71)) && pv_rtc_handler ) + pv_rtc_handler(port, (uint8_t)data); outb((uint8_t)data, port); if ( pv_post_outb_hook ) pv_post_outb_hook(port, (uint8_t)data); @@ -1637,10 +1645,6 @@ static int is_cpufreq_controller(struct domain *d) (d->domain_id == 0)); } -/*Intel vMCE MSRs virtualization*/ -extern int intel_mce_wrmsr(u32 msr, u32 lo, u32 hi); -extern int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi); - static int emulate_privileged_op(struct cpu_user_regs *regs) { struct vcpu *v = current; @@ -1933,6 +1937,10 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) goto fail; if ( admin_io_okay(port, op_bytes, v, regs) ) { + if ( (op_bytes == 1) && + ((port == 0x71) || (port == 0x70)) && + pv_rtc_handler ) + pv_rtc_handler(port, regs->eax); io_emul(regs); if ( (op_bytes == 1) && pv_post_outb_hook ) pv_post_outb_hook(port, regs->eax); @@ -2210,10 +2218,10 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) break; if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) { - int rc = intel_mce_wrmsr(regs->ecx, eax, edx); - if ( rc == -1 ) + int rc = intel_mce_wrmsr(regs->ecx, res); + if ( rc < 0 ) goto fail; - if ( rc == 0 ) + if ( rc ) break; } @@ -2291,25 +2299,27 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) default: if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) ) { + rdmsr_writeback: regs->eax = l; regs->edx = h; break; } - /* Everyone can read the MSR space. */ - /* gdprintk(XENLOG_WARNING,"Domain attempted RDMSR %p.\n", - _p(regs->ecx));*/ - if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) - goto fail; if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) { - int rc = intel_mce_rdmsr(regs->ecx, &eax, &edx); - if ( rc == -1 ) + int rc = intel_mce_rdmsr(regs->ecx, &l, &h); + + if ( rc < 0 ) goto fail; - if ( rc == 0 ) - break; + if ( rc ) + goto rdmsr_writeback; } + /* Everyone can read the MSR space. */ + /* gdprintk(XENLOG_WARNING,"Domain attempted RDMSR %p.\n", + _p(regs->ecx));*/ + if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) + goto fail; break; } break; @@ -3048,8 +3058,8 @@ void load_TR(void) /* Switch to non-compat GDT (which has B bit clear) to execute LTR. */ asm volatile ( - "sgdt %1; lgdt %2; ltr %%ax; lgdt %1" - : : "a" (TSS_ENTRY << 3), "m" (old_gdt), "m" (tss_gdt) : "memory" ); + "sgdt %0; lgdt %2; ltr %w1; lgdt %0" + : "=m" (old_gdt) : "rm" (TSS_ENTRY << 3), "m" (tss_gdt) : "memory" ); } void __devinit percpu_traps_init(void) diff --git a/xen/common/domctl.c b/xen/common/domctl.c index 23c2f4e..4a8df90 100644 --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -25,7 +25,7 @@ #include #include -DEFINE_SPINLOCK(domctl_lock); +static DEFINE_SPINLOCK(domctl_lock); extern long arch_do_domctl( struct xen_domctl *op, XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); @@ -188,6 +188,33 @@ static unsigned int default_vcpu0_location(void) return cpu; } +bool_t domctl_lock_acquire(void) +{ + /* + * Caller may try to pause its own VCPUs. We must prevent deadlock + * against other non-domctl routines which try to do the same. + */ + if ( !spin_trylock(¤t->domain->hypercall_deadlock_mutex) ) + return 0; + + /* + * Trylock here is paranoia if we have multiple privileged domains. Then + * we could have one domain trying to pause another which is spinning + * on domctl_lock -- results in deadlock. + */ + if ( spin_trylock(&domctl_lock) ) + return 1; + + spin_unlock(¤t->domain->hypercall_deadlock_mutex); + return 0; +} + +void domctl_lock_release(void) +{ + spin_unlock(&domctl_lock); + spin_unlock(¤t->domain->hypercall_deadlock_mutex); +} + long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) { long ret = 0; @@ -202,7 +229,9 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) if ( op->interface_version != XEN_DOMCTL_INTERFACE_VERSION ) return -EACCES; - spin_lock(&domctl_lock); + if ( !domctl_lock_acquire() ) + return hypercall_create_continuation( + __HYPERVISOR_domctl, "h", u_domctl); switch ( op->cmd ) { @@ -866,7 +895,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domctl_t) u_domctl) break; } - spin_unlock(&domctl_lock); + domctl_lock_release(); return ret; } diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 830e44c..ab3445b 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -302,7 +302,8 @@ static unsigned long init_node_heap(int node, unsigned long mfn, (mfn + needed) <= (virt_to_mfn(DIRECTMAP_VIRT_END - 1) + 1) ) { _heap[node] = mfn_to_virt(mfn); - avail[node] = mfn_to_virt(mfn + needed) - sizeof(**avail) * NR_ZONES; + avail[node] = mfn_to_virt(mfn + needed - 1) + + PAGE_SIZE - sizeof(**avail) * NR_ZONES; } #endif else if ( get_order_from_bytes(sizeof(**_heap)) == diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c index d724293..d0d31d9 100644 --- a/xen/common/sched_credit.c +++ b/xen/common/sched_credit.c @@ -154,6 +154,7 @@ struct csched_private { spinlock_t lock; struct list_head active_sdom; uint32_t ncpus; + struct timer master_ticker; unsigned int master; cpumask_t idlers; uint32_t weight; @@ -325,6 +326,16 @@ __csched_vcpu_check(struct vcpu *vc) static unsigned int vcpu_migration_delay; integer_param("vcpu_migration_delay", vcpu_migration_delay); +void set_vcpu_migration_delay(unsigned int delay) +{ + vcpu_migration_delay = delay; +} + +unsigned int get_vcpu_migration_delay(void) +{ + return vcpu_migration_delay; +} + static inline int __csched_vcpu_is_cache_hot(struct vcpu *v) { @@ -757,7 +768,7 @@ csched_runq_sort(unsigned int cpu) } static void -csched_acct(void) +csched_acct(void* dummy) { unsigned long flags; struct list_head *iter_vcpu, *next_vcpu; @@ -792,7 +803,7 @@ csched_acct(void) csched_priv.credit_balance = 0; spin_unlock_irqrestore(&csched_priv.lock, flags); CSCHED_STAT_CRANK(acct_no_work); - return; + goto out; } CSCHED_STAT_CRANK(acct_run); @@ -950,6 +961,10 @@ csched_acct(void) /* Inform each CPU that its runq needs to be sorted */ csched_priv.runq_sort++; + +out: + set_timer( &csched_priv.master_ticker, NOW() + + MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT ); } static void @@ -966,18 +981,6 @@ csched_tick(void *_cpu) if ( !is_idle_vcpu(current) ) csched_vcpu_acct(cpu); - /* - * Host-wide accounting duty - * - * Note: Currently, this is always done by the master boot CPU. Eventually, - * we could distribute or at the very least cycle the duty. - */ - if ( (csched_priv.master == cpu) && - (spc->tick % CSCHED_TICKS_PER_ACCT) == 0 ) - { - csched_acct(); - } - /* * Check if runq needs to be sorted * @@ -1153,7 +1156,8 @@ csched_schedule(s_time_t now) /* * Return task to run next... */ - ret.time = MILLISECS(CSCHED_MSECS_PER_TSLICE); + ret.time = (is_idle_vcpu(snext->vcpu) ? + -1 : MILLISECS(CSCHED_MSECS_PER_TSLICE)); ret.task = snext->vcpu; CSCHED_VCPU_CHECK(ret.task); @@ -1310,10 +1314,35 @@ static __init int csched_start_tickers(void) set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK)); } + init_timer( &csched_priv.master_ticker, csched_acct, NULL, + csched_priv.master); + + set_timer( &csched_priv.master_ticker, NOW() + + MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT ); + return 0; } __initcall(csched_start_tickers); +static void csched_tick_suspend(void) +{ + struct csched_pcpu *spc; + + spc = CSCHED_PCPU(smp_processor_id()); + + stop_timer(&spc->ticker); +} + +static void csched_tick_resume(void) +{ + struct csched_pcpu *spc; + uint64_t now = NOW(); + + spc = CSCHED_PCPU(smp_processor_id()); + + set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK) + - now % MILLISECS(CSCHED_MSECS_PER_TICK) ); +} struct scheduler sched_credit_def = { .name = "SMP Credit Scheduler", @@ -1337,4 +1366,7 @@ struct scheduler sched_credit_def = { .dump_cpu_state = csched_dump_pcpu, .dump_settings = csched_dump, .init = csched_init, + + .tick_suspend = csched_tick_suspend, + .tick_resume = csched_tick_resume, }; diff --git a/xen/common/schedule.c b/xen/common/schedule.c index 5e91f6c..fb2464f 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -798,7 +798,6 @@ static void schedule(void) s_time_t now = NOW(); struct schedule_data *sd; struct task_slice next_slice; - s32 r_time; /* time for new dom to run */ ASSERT(!in_irq()); ASSERT(this_cpu(mc_state).flags == 0); @@ -814,12 +813,12 @@ static void schedule(void) /* get policy-specific decision on scheduling... */ next_slice = ops.do_schedule(now); - r_time = next_slice.time; next = next_slice.task; sd->curr = next; - - set_timer(&sd->s_timer, now + r_time); + + if ( next_slice.time >= 0 ) /* -ve means no limit */ + set_timer(&sd->s_timer, now + next_slice.time); if ( unlikely(prev == next) ) { @@ -835,7 +834,7 @@ static void schedule(void) next->domain->domain_id, (next->runstate.state == RUNSTATE_runnable) ? (now - next->runstate.state_entry_time) : 0, - r_time); + next_slice.time); ASSERT(prev->runstate.state == RUNSTATE_running); vcpu_runstate_change( @@ -964,6 +963,16 @@ void dump_runq(unsigned char key) local_irq_restore(flags); } +void sched_tick_suspend(void) +{ + SCHED_OP(tick_suspend); +} + +void sched_tick_resume(void) +{ + SCHED_OP(tick_resume); +} + #ifdef CONFIG_COMPAT #include "compat/schedule.c" #endif diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c index 002f82e..ac2aaab 100644 --- a/xen/common/spinlock.c +++ b/xen/common/spinlock.c @@ -2,6 +2,7 @@ #include #include #include +#include #ifndef NDEBUG @@ -43,7 +44,9 @@ void spin_debug_disable(void) void _spin_lock(spinlock_t *lock) { check_lock(&lock->debug); - _raw_spin_lock(&lock->raw); + while ( unlikely(!_raw_spin_trylock(&lock->raw)) ) + while ( likely(_raw_spin_is_locked(&lock->raw)) ) + cpu_relax(); } void _spin_lock_irq(spinlock_t *lock) @@ -51,7 +54,13 @@ void _spin_lock_irq(spinlock_t *lock) ASSERT(local_irq_is_enabled()); local_irq_disable(); check_lock(&lock->debug); - _raw_spin_lock(&lock->raw); + while ( unlikely(!_raw_spin_trylock(&lock->raw)) ) + { + local_irq_enable(); + while ( likely(_raw_spin_is_locked(&lock->raw)) ) + cpu_relax(); + local_irq_disable(); + } } unsigned long _spin_lock_irqsave(spinlock_t *lock) @@ -59,7 +68,13 @@ unsigned long _spin_lock_irqsave(spinlock_t *lock) unsigned long flags; local_irq_save(flags); check_lock(&lock->debug); - _raw_spin_lock(&lock->raw); + while ( unlikely(!_raw_spin_trylock(&lock->raw)) ) + { + local_irq_restore(flags); + while ( likely(_raw_spin_is_locked(&lock->raw)) ) + cpu_relax(); + local_irq_save(flags); + } return flags; } diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c index 70da626..fe254bb 100644 --- a/xen/common/sysctl.c +++ b/xen/common/sysctl.c @@ -206,6 +206,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) case XEN_SYSCTL_get_pmstat: { + ret = xsm_get_pmstat(); + if ( ret ) + break; + ret = do_get_pm_info(&op->u.get_pmstat); if ( ret ) break; @@ -220,6 +224,10 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl) case XEN_SYSCTL_pm_op: { + ret = xsm_pm_op(); + if ( ret ) + break; + ret = do_pm_op(&op->u.pm_op); if ( ret && (ret != -EAGAIN) ) break; diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c index 1ac35c8..ad3623d 100644 --- a/xen/drivers/acpi/pmstat.c +++ b/xen/drivers/acpi/pmstat.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -527,6 +528,30 @@ int do_pm_op(struct xen_sysctl_pm_op *op) break; } + case XEN_SYSCTL_pm_op_set_vcpu_migration_delay: + { + set_vcpu_migration_delay(op->set_vcpu_migration_delay); + break; + } + + case XEN_SYSCTL_pm_op_get_vcpu_migration_delay: + { + op->get_vcpu_migration_delay = get_vcpu_migration_delay(); + break; + } + + case XEN_SYSCTL_pm_op_get_max_cstate: + { + op->get_max_cstate = acpi_get_cstate_limit(); + break; + } + + case XEN_SYSCTL_pm_op_set_max_cstate: + { + acpi_set_cstate_limit(op->set_max_cstate); + break; + } + default: printk("not defined sub-hypercall @ do_pm_op\n"); ret = -ENOSYS; diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c index f6ce51a..38c540a 100644 --- a/xen/drivers/char/console.c +++ b/xen/drivers/char/console.c @@ -58,10 +58,16 @@ boolean_param("console_to_ring", opt_console_to_ring); static int opt_console_timestamps; boolean_param("console_timestamps", opt_console_timestamps); -#define CONRING_SIZE 16384 -#define CONRING_IDX_MASK(i) ((i)&(CONRING_SIZE-1)) -static char conring[CONRING_SIZE]; -static uint32_t conringc, conringp; +/* conring_size: allows a large console ring than default (16kB). */ +static uint32_t opt_conring_size; +static void parse_conring_size(char *s) +{ opt_conring_size = parse_size_and_unit(s, NULL); } +custom_param("conring_size", parse_conring_size); + +#define _CONRING_SIZE 16384 +#define CONRING_IDX_MASK(i) ((i)&(conring_size-1)) +static char _conring[_CONRING_SIZE], *conring = _conring; +static uint32_t conring_size = _CONRING_SIZE, conringc, conringp; static int sercon_handle = -1; @@ -178,8 +184,8 @@ static void putchar_console_ring(int c) { ASSERT(spin_is_locked(&console_lock)); conring[CONRING_IDX_MASK(conringp++)] = c; - if ( (uint32_t)(conringp - conringc) > CONRING_SIZE ) - conringc = conringp - CONRING_SIZE; + if ( (uint32_t)(conringp - conringc) > conring_size ) + conringc = conringp - conring_size; } long read_console_ring(struct xen_sysctl_readconsole *op) @@ -199,8 +205,8 @@ long read_console_ring(struct xen_sysctl_readconsole *op) { idx = CONRING_IDX_MASK(c); len = conringp - c; - if ( (idx + len) > CONRING_SIZE ) - len = CONRING_SIZE - idx; + if ( (idx + len) > conring_size ) + len = conring_size - idx; if ( (sofar + len) > max ) len = max - sofar; if ( copy_to_guest_offset(str, sofar, &conring[idx], len) ) @@ -212,8 +218,8 @@ long read_console_ring(struct xen_sysctl_readconsole *op) if ( op->clear ) { spin_lock_irq(&console_lock); - if ( (uint32_t)(conringp - c) > CONRING_SIZE ) - conringc = conringp - CONRING_SIZE; + if ( (uint32_t)(conringp - c) > conring_size ) + conringc = conringp - conring_size; else conringc = c; spin_unlock_irq(&console_lock); @@ -544,10 +550,12 @@ void printk(const char *fmt, ...) local_irq_restore(flags); } -void __init init_console(void) +void __init console_init_preirq(void) { char *p; + serial_init_preirq(); + /* Where should console output go? */ for ( p = opt_console; p != NULL; p = strchr(p, ',') ) { @@ -587,6 +595,37 @@ void __init init_console(void) } } +void __init console_init_postirq(void) +{ + char *ring; + unsigned int i; + + serial_init_postirq(); + + /* Round size down to a power of two. */ + while ( opt_conring_size & (opt_conring_size - 1) ) + opt_conring_size &= opt_conring_size - 1; + if ( opt_conring_size < conring_size ) + return; + + ring = xmalloc_bytes(opt_conring_size); + if ( ring == NULL ) + { + printk("Unable to allocate console ring of %u bytes.\n", + opt_conring_size); + return; + } + + spin_lock_irq(&console_lock); + for ( i = conringc ; i != conringp; i++ ) + ring[i & (opt_conring_size - 1)] = conring[i & (conring_size - 1)]; + conring_size = opt_conring_size; + conring = ring; + spin_unlock_irq(&console_lock); + + printk("Allocated console ring of %u bytes.\n", opt_conring_size); +} + void __init console_endboot(void) { int i, j; diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c index efb805b..39cc7eb 100644 --- a/xen/drivers/cpufreq/cpufreq.c +++ b/xen/drivers/cpufreq/cpufreq.c @@ -130,7 +130,7 @@ int cpufreq_add_cpu(unsigned int cpu) int ret = 0; unsigned int firstcpu; unsigned int dom, domexist = 0; - unsigned int j; + unsigned int hw_all = 0; struct list_head *pos; struct cpufreq_dom *cpufreq_dom = NULL; struct cpufreq_policy new_policy; @@ -146,9 +146,8 @@ int cpufreq_add_cpu(unsigned int cpu) if (cpufreq_cpu_policy[cpu]) return 0; - ret = cpufreq_statistic_init(cpu); - if (ret) - return ret; + if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW) + hw_all = 1; dom = perf->domain_info.domain; @@ -160,61 +159,57 @@ int cpufreq_add_cpu(unsigned int cpu) } } - if (domexist) { - /* share policy with the first cpu since on same boat */ - firstcpu = first_cpu(cpufreq_dom->map); - policy = cpufreq_cpu_policy[firstcpu]; - - cpufreq_cpu_policy[cpu] = policy; - cpu_set(cpu, cpufreq_dom->map); - cpu_set(cpu, policy->cpus); - - /* domain coordination sanity check */ - if ((perf->domain_info.coord_type != - processor_pminfo[firstcpu]->perf.domain_info.coord_type) || - (perf->domain_info.num_processors != - processor_pminfo[firstcpu]->perf.domain_info.num_processors)) { - ret = -EINVAL; - goto err2; - } - - printk(KERN_EMERG"adding CPU %u\n", cpu); - } else { + if (!domexist) { cpufreq_dom = xmalloc(struct cpufreq_dom); - if (!cpufreq_dom) { - cpufreq_statistic_exit(cpu); + if (!cpufreq_dom) return -ENOMEM; - } + memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom)); cpufreq_dom->dom = dom; - cpu_set(cpu, cpufreq_dom->map); list_add(&cpufreq_dom->node, &cpufreq_dom_list_head); + } else { + /* domain sanity check under whatever coordination type */ + firstcpu = first_cpu(cpufreq_dom->map); + if ((perf->domain_info.coord_type != + processor_pminfo[firstcpu]->perf.domain_info.coord_type) || + (perf->domain_info.num_processors != + processor_pminfo[firstcpu]->perf.domain_info.num_processors)) { + return -EINVAL; + } + } - /* for the first cpu, setup policy and do init work */ + if (!domexist || hw_all) { policy = xmalloc(struct cpufreq_policy); - if (!policy) { - list_del(&cpufreq_dom->node); - xfree(cpufreq_dom); - cpufreq_statistic_exit(cpu); - return -ENOMEM; - } + if (!policy) + ret = -ENOMEM; + memset(policy, 0, sizeof(struct cpufreq_policy)); policy->cpu = cpu; - cpu_set(cpu, policy->cpus); cpufreq_cpu_policy[cpu] = policy; ret = cpufreq_driver->init(policy); - if (ret) - goto err1; + if (ret) { + xfree(policy); + return ret; + } printk(KERN_EMERG"CPU %u initialization completed\n", cpu); + } else { + firstcpu = first_cpu(cpufreq_dom->map); + policy = cpufreq_cpu_policy[firstcpu]; + + cpufreq_cpu_policy[cpu] = policy; + printk(KERN_EMERG"adding CPU %u\n", cpu); } - /* - * After get full cpumap of the coordination domain, - * we can safely start gov here. - */ - if (cpus_weight(cpufreq_dom->map) == - perf->domain_info.num_processors) { + cpu_set(cpu, policy->cpus); + cpu_set(cpu, cpufreq_dom->map); + + ret = cpufreq_statistic_init(cpu); + if (ret) + goto err1; + + if (hw_all || + (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors)) { memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); policy->governor = NULL; @@ -240,22 +235,29 @@ int cpufreq_add_cpu(unsigned int cpu) return 0; err2: - cpufreq_driver->exit(policy); + cpufreq_statistic_exit(cpu); err1: - for_each_cpu_mask(j, cpufreq_dom->map) { - cpufreq_cpu_policy[j] = NULL; - cpufreq_statistic_exit(j); + cpufreq_cpu_policy[cpu] = NULL; + cpu_clear(cpu, policy->cpus); + cpu_clear(cpu, cpufreq_dom->map); + + if (cpus_empty(policy->cpus)) { + cpufreq_driver->exit(policy); + xfree(policy); + } + + if (cpus_empty(cpufreq_dom->map)) { + list_del(&cpufreq_dom->node); + xfree(cpufreq_dom); } - list_del(&cpufreq_dom->node); - xfree(cpufreq_dom); - xfree(policy); return ret; } int cpufreq_del_cpu(unsigned int cpu) { unsigned int dom, domexist = 0; + unsigned int hw_all = 0; struct list_head *pos; struct cpufreq_dom *cpufreq_dom = NULL; struct cpufreq_policy *policy; @@ -270,6 +272,9 @@ int cpufreq_del_cpu(unsigned int cpu) if (!cpufreq_cpu_policy[cpu]) return 0; + if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW) + hw_all = 1; + dom = perf->domain_info.domain; policy = cpufreq_cpu_policy[cpu]; @@ -284,23 +289,27 @@ int cpufreq_del_cpu(unsigned int cpu) if (!domexist) return -EINVAL; - /* for the first cpu of the domain, stop gov */ - if (cpus_weight(cpufreq_dom->map) == - perf->domain_info.num_processors) + /* for HW_ALL, stop gov for each core of the _PSD domain */ + /* for SW_ALL & SW_ANY, stop gov for the 1st core of the _PSD domain */ + if (hw_all || + (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors)) __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + cpufreq_statistic_exit(cpu); cpufreq_cpu_policy[cpu] = NULL; cpu_clear(cpu, policy->cpus); cpu_clear(cpu, cpufreq_dom->map); - cpufreq_statistic_exit(cpu); + + if (cpus_empty(policy->cpus)) { + cpufreq_driver->exit(policy); + xfree(policy); + } /* for the last cpu of the domain, clean room */ /* It's safe here to free freq_table, drv_data and policy */ - if (!cpus_weight(cpufreq_dom->map)) { - cpufreq_driver->exit(policy); + if (cpus_empty(cpufreq_dom->map)) { list_del(&cpufreq_dom->node); xfree(cpufreq_dom); - xfree(policy); } printk(KERN_EMERG"deleting CPU %u\n", cpu); diff --git a/xen/drivers/cpufreq/cpufreq_ondemand.c b/xen/drivers/cpufreq/cpufreq_ondemand.c index b01312d..a4ff4f9 100644 --- a/xen/drivers/cpufreq/cpufreq_ondemand.c +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c @@ -178,7 +178,8 @@ static void do_dbs_timer(void *dbs) dbs_check_cpu(dbs_info); - set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); + set_timer(&dbs_timer[dbs_info->cpu], + align_timer(NOW() , dbs_tuners_ins.sampling_rate)); } static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) @@ -189,6 +190,12 @@ static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) (void *)dbs_info, dbs_info->cpu); set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); + + if ( processor_pminfo[dbs_info->cpu]->perf.shared_type + == CPUFREQ_SHARED_TYPE_HW ) + { + dbs_info->stoppable = 1; + } } static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) @@ -336,3 +343,38 @@ static void __exit cpufreq_gov_dbs_exit(void) cpufreq_unregister_governor(&cpufreq_gov_dbs); } __exitcall(cpufreq_gov_dbs_exit); + +void cpufreq_dbs_timer_suspend(void) +{ + int cpu; + + cpu = smp_processor_id(); + + if ( per_cpu(cpu_dbs_info,cpu).stoppable ) + { + stop_timer( &dbs_timer[cpu] ); + } +} + +void cpufreq_dbs_timer_resume(void) +{ + int cpu; + struct timer* t; + s_time_t now; + + cpu = smp_processor_id(); + + if ( per_cpu(cpu_dbs_info,cpu).stoppable ) + { + now = NOW(); + t = &dbs_timer[cpu]; + if (t->expires <= now) + { + t->function(t->data); + } + else + { + set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate)); + } + } +} diff --git a/xen/drivers/passthrough/amd/iommu_intr.c b/xen/drivers/passthrough/amd/iommu_intr.c index c3a9dc8..4774431 100644 --- a/xen/drivers/passthrough/amd/iommu_intr.c +++ b/xen/drivers/passthrough/amd/iommu_intr.c @@ -108,8 +108,17 @@ static void update_intremap_entry_from_ioapic( return; } +extern int nr_ioapic_registers[MAX_IO_APICS]; +extern int nr_ioapics; + int __init amd_iommu_setup_intremap_table(void) { + struct IO_APIC_route_entry rte = {0}; + unsigned long flags; + u32* entry; + int apic, pin; + u8 delivery_mode, dest, vector, dest_mode; + if ( int_remap_table == NULL ) { int_remap_table = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER); @@ -118,6 +127,31 @@ int __init amd_iommu_setup_intremap_table(void) memset(int_remap_table, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER)); } + /* Read ioapic entries and update interrupt remapping table accordingly */ + for ( apic = 0; apic < nr_ioapics; apic++ ) + { + for ( pin = 0; pin < nr_ioapic_registers[apic]; pin++ ) + { + *(((int *)&rte) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + *(((int *)&rte) + 0) = io_apic_read(apic, 0x10 + 2 * pin); + + if ( rte.mask == 1 ) + continue; + + delivery_mode = rte.delivery_mode; + vector = rte.vector; + dest_mode = rte.dest_mode; + if ( dest_mode == 0 ) + dest = rte.dest.physical.physical_dest & 0xf; + else + dest = rte.dest.logical.logical_dest & 0xff; + + spin_lock_irqsave(&int_remap_table_lock, flags); + entry = (u32*)get_intremap_entry(vector, delivery_mode); + update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); + spin_unlock_irqrestore(&int_remap_table_lock, flags); + } + } return 0; } diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c index bdb0d46..18b4a6c 100644 --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -148,12 +148,23 @@ int pt_irq_create_bind_vtd( return rc; } } - else if (hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec - ||hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] != pirq) - + else { - spin_unlock(&d->event_lock); - return -EBUSY; + uint32_t mask = HVM_IRQ_DPCI_MACH_MSI | HVM_IRQ_DPCI_GUEST_MSI; + uint32_t old_gvec; + + if ( (hvm_irq_dpci->mirq[pirq].flags & mask) != mask) + { + spin_unlock(&d->event_lock); + return -EBUSY; + } + + /* if pirq is already mapped as vmsi, update the guest data/addr */ + old_gvec = hvm_irq_dpci->mirq[pirq].gmsi.gvec; + hvm_irq_dpci->msi_gvec_pirq[old_gvec] = 0; + hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec; + hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags; + hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] = pirq; } } else diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c index 6e4d4a1..ff56a19 100644 --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -911,6 +911,8 @@ static int iommu_alloc(struct acpi_drhd_unit *drhd) return -ENOMEM; memset(iommu, 0, sizeof(struct iommu)); + iommu->vector = -1; /* No vector assigned yet. */ + iommu->intel = alloc_intel_iommu(); if ( iommu->intel == NULL ) { @@ -1194,7 +1196,20 @@ static int domain_context_mapping(struct domain *domain, u8 bus, u8 devfn) u8 secbus, secdevfn; struct pci_dev *pdev = pci_get_pdev(bus, devfn); - BUG_ON(!pdev); + if ( pdev == NULL ) + { + /* We can reach here by setup_dom0_rmrr() -> iommu_prepare_rmrr_dev() + * -> domain_context_mapping(). + * In the case a user enables VT-d and disables USB (that usually needs + * RMRR) in BIOS, we can't discover the BDF of the USB controller in + * setup_dom0_devices(), but the ACPI RMRR structures may still contain + * the BDF and at last pci_get_pdev() returns NULL here. + */ + gdprintk(XENLOG_WARNING VTDPREFIX, + "domain_context_mapping: can't find bdf = %x:%x.%x\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + return 0; + } drhd = acpi_find_matched_drhd_unit(pdev); if ( !drhd ) @@ -1666,15 +1681,18 @@ static int init_vtd_hw(void) return -EIO; } - vector = iommu_set_interrupt(iommu); - if ( vector < 0 ) + if ( iommu->vector < 0 ) { - gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n"); - return vector; + vector = iommu_set_interrupt(iommu); + if ( vector < 0 ) + { + gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n"); + return vector; + } + iommu->vector = vector; } - dma_msi_data_init(iommu, vector); + dma_msi_data_init(iommu, iommu->vector); dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map))); - iommu->vector = vector; clear_fault_bits(iommu); dmar_writel(iommu->reg, DMAR_FECTL_REG, 0); @@ -1948,16 +1966,34 @@ void iommu_resume(void) { struct acpi_drhd_unit *drhd; struct iommu *iommu; + struct iommu_flush *flush; u32 i; if ( !vtd_enabled ) return; + /* Re-initialize the register-based flush functions. + * In iommu_flush_all(), we invoke iommu_flush_{context,iotlb}_global(), + * but at this point, on hosts that support QI(Queued Invalidation), QI + * hasn't been re-enabed yet, so for now let's use the register-based + * invalidation method before invoking init_vtd_hw(). + */ + if ( iommu_qinval ) + { + for_each_drhd_unit ( drhd ) + { + iommu = drhd->iommu; + flush = iommu_get_flush(iommu); + flush->context = flush_context_reg; + flush->iotlb = flush_iotlb_reg; + } + } + /* Not sure whether the flush operation is required to meet iommu * specification. Note that BIOS also executes in S3 resume and iommu may * be touched again, so let us do the flush operation for safety. */ - flush_all_cache(); + iommu_flush_all(); if ( init_vtd_hw() != 0 && force_iommu ) panic("IOMMU setup failed, crash Xen for security purpose!\n"); diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c index e823370..60c7e0a 100644 --- a/xen/drivers/passthrough/vtd/qinval.c +++ b/xen/drivers/passthrough/vtd/qinval.c @@ -432,10 +432,11 @@ int enable_qinval(struct iommu *iommu) "Cannot allocate memory for qi_ctrl->qinval_maddr\n"); return -ENOMEM; } - flush->context = flush_context_qi; - flush->iotlb = flush_iotlb_qi; } + flush->context = flush_context_qi; + flush->iotlb = flush_iotlb_qi; + /* Setup Invalidation Queue Address(IQA) register with the * address of the page we just allocated. QS field at * bits[2:0] to indicate size of queue is one 4KB page. diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h index 8423664..2f24c4f 100644 --- a/xen/include/acpi/cpufreq/cpufreq.h +++ b/xen/include/acpi/cpufreq/cpufreq.h @@ -221,6 +221,7 @@ struct cpu_dbs_info_s { struct cpufreq_frequency_table *freq_table; int cpu; unsigned int enable:1; + unsigned int stoppable:1; }; int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); @@ -232,4 +233,7 @@ int write_ondemand_sampling_rate(unsigned int sampling_rate); int write_ondemand_up_threshold(unsigned int up_threshold); int write_userspace_scaling_setspeed(unsigned int cpu, unsigned int freq); + +void cpufreq_dbs_timer_suspend(void); +void cpufreq_dbs_timer_resume(void); #endif /* __XEN_CPUFREQ_PM_H__ */ diff --git a/xen/include/asm-ia64/linux-xen/asm/README.origin b/xen/include/asm-ia64/linux-xen/asm/README.origin index 4e0986f..e3cc246 100644 --- a/xen/include/asm-ia64/linux-xen/asm/README.origin +++ b/xen/include/asm-ia64/linux-xen/asm/README.origin @@ -22,6 +22,7 @@ pgtable.h -> linux/include/asm-ia64/pgtable.h processor.h -> linux/include/asm-ia64/processor.h ptrace.h -> linux/include/asm-ia64/ptrace.h sal.h -> linux/include/asm-ia64/sal.h +sections.h -> linux/include/asm-ia64/sections.h smp.h -> linux/include/asm-ia64/smp.h spinlock.h -> linux/include/asm-ia64/spinlock.h system.h -> linux/include/asm-ia64/system.h diff --git a/xen/include/asm-ia64/linux-xen/asm/sections.h b/xen/include/asm-ia64/linux-xen/asm/sections.h new file mode 100644 index 0000000..a6334c6 --- /dev/null +++ b/xen/include/asm-ia64/linux-xen/asm/sections.h @@ -0,0 +1,28 @@ +#ifndef _ASM_IA64_SECTIONS_H +#define _ASM_IA64_SECTIONS_H + +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include + +extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[]; +#ifdef XEN +#ifdef CONFIG_SMP +extern char __cpu0_per_cpu[]; +#endif +#endif +extern char __start___vtop_patchlist[], __end___vtop_patchlist[]; +extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[]; +extern char __start_gate_section[]; +extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[]; +extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[]; +extern char __start_gate_fsyscall_patchlist[], __end_gate_fsyscall_patchlist[]; +extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_bubble_down_patchlist[]; +extern char __start_unwind[], __end_unwind[]; +extern char __start_ivt_text[], __end_ivt_text[]; + +#endif /* _ASM_IA64_SECTIONS_H */ + diff --git a/xen/include/asm-ia64/linux-xen/asm/spinlock.h b/xen/include/asm-ia64/linux-xen/asm/spinlock.h index 14bf3a3..f49d008 100644 --- a/xen/include/asm-ia64/linux-xen/asm/spinlock.h +++ b/xen/include/asm-ia64/linux-xen/asm/spinlock.h @@ -21,111 +21,9 @@ typedef struct { volatile unsigned int lock; -#ifdef CONFIG_PREEMPT - unsigned int break_lock; -#endif -#ifdef DEBUG_SPINLOCK - void *locker; -#endif } raw_spinlock_t; -#ifdef XEN -#ifdef DEBUG_SPINLOCK -#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 0, NULL } -#else #define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 0 } -#endif -#else -#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 0 } -#endif - -#ifdef ASM_SUPPORTED -/* - * Try to get the lock. If we fail to get the lock, make a non-standard call to - * ia64_spinlock_contention(). We do not use a normal call because that would force all - * callers of spin_lock() to be non-leaf routines. Instead, ia64_spinlock_contention() is - * carefully coded to touch only those registers that spin_lock() marks "clobbered". - */ - -#define IA64_SPINLOCK_CLOBBERS "ar.ccv", "ar.pfs", "p14", "p15", "r27", "r28", "r29", "r30", "b6", "memory" - -static inline void -_raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags) -{ - register volatile unsigned int *ptr asm ("r31") = &lock->lock; - -#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) -# ifdef CONFIG_ITANIUM - /* don't use brl on Itanium... */ - asm volatile ("{\n\t" - " mov ar.ccv = r0\n\t" - " mov r28 = ip\n\t" - " mov r30 = 1;;\n\t" - "}\n\t" - "cmpxchg4.acq r30 = [%1], r30, ar.ccv\n\t" - "movl r29 = ia64_spinlock_contention_pre3_4;;\n\t" - "cmp4.ne p14, p0 = r30, r0\n\t" - "mov b6 = r29;;\n\t" - "mov r27=%2\n\t" - "(p14) br.cond.spnt.many b6" - : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS); -# else - asm volatile ("{\n\t" - " mov ar.ccv = r0\n\t" - " mov r28 = ip\n\t" - " mov r30 = 1;;\n\t" - "}\n\t" - "cmpxchg4.acq r30 = [%1], r30, ar.ccv;;\n\t" - "cmp4.ne p14, p0 = r30, r0\n\t" - "mov r27=%2\n\t" - "(p14) brl.cond.spnt.many ia64_spinlock_contention_pre3_4;;" - : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS); -# endif /* CONFIG_MCKINLEY */ -#else -# ifdef CONFIG_ITANIUM - /* don't use brl on Itanium... */ - /* mis-declare, so we get the entry-point, not it's function descriptor: */ - asm volatile ("mov r30 = 1\n\t" - "mov r27=%2\n\t" - "mov ar.ccv = r0;;\n\t" - "cmpxchg4.acq r30 = [%0], r30, ar.ccv\n\t" - "movl r29 = ia64_spinlock_contention;;\n\t" - "cmp4.ne p14, p0 = r30, r0\n\t" - "mov b6 = r29;;\n\t" - "(p14) br.call.spnt.many b6 = b6" - : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS); -# else - asm volatile ("mov r30 = 1\n\t" - "mov r27=%2\n\t" - "mov ar.ccv = r0;;\n\t" - "cmpxchg4.acq r30 = [%0], r30, ar.ccv;;\n\t" - "cmp4.ne p14, p0 = r30, r0\n\t" - "(p14) brl.call.spnt.many b6=ia64_spinlock_contention;;" - : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS); -# endif /* CONFIG_MCKINLEY */ -#endif - -#ifdef DEBUG_SPINLOCK - asm volatile ("mov %0=ip" : "=r" (lock->locker)); -#endif -} -#define _raw_spin_lock(lock) _raw_spin_lock_flags(lock, 0) -#else /* !ASM_SUPPORTED */ -#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) -# define _raw_spin_lock(x) \ -do { \ - __u32 *ia64_spinlock_ptr = (__u32 *) (x); \ - __u64 ia64_spinlock_val; \ - ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0); \ - if (unlikely(ia64_spinlock_val)) { \ - do { \ - while (*ia64_spinlock_ptr) \ - ia64_barrier(); \ - ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0); \ - } while (ia64_spinlock_val); \ - } \ -} while (0) -#endif /* !ASM_SUPPORTED */ #define _raw_spin_is_locked(x) ((x)->lock != 0) #define _raw_spin_unlock(x) do { barrier(); (x)->lock = 0; } while (0) @@ -134,9 +32,6 @@ do { \ typedef struct { volatile unsigned int read_counter : 31; volatile unsigned int write_lock : 1; -#ifdef CONFIG_PREEMPT - unsigned int break_lock; -#endif } raw_rwlock_t; #define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { 0, 0 } diff --git a/xen/include/asm-ia64/linux/asm/README.origin b/xen/include/asm-ia64/linux/asm/README.origin index 778d9a8..25e1204 100644 --- a/xen/include/asm-ia64/linux/asm/README.origin +++ b/xen/include/asm-ia64/linux/asm/README.origin @@ -29,7 +29,6 @@ param.h -> linux/include/asm-ia64/param.h patch.h -> linux/include/asm-ia64/patch.h pci.h -> linux/include/asm-ia64/pci.h rse.h -> linux/include/asm-ia64/rse.h -sections.h -> linux/include/asm-ia64/sections.h setup.h -> linux/include/asm-ia64/setup.h string.h -> linux/include/asm-ia64/string.h thread_info.h -> linux/include/asm-ia64/thread_info.h diff --git a/xen/include/asm-ia64/mm.h b/xen/include/asm-ia64/mm.h index b7b2d39..bb3dc8a 100644 --- a/xen/include/asm-ia64/mm.h +++ b/xen/include/asm-ia64/mm.h @@ -212,6 +212,7 @@ static inline void put_page(struct page_info *page) unsigned long nx, x, y = page->count_info; do { + ASSERT((y & PGC_count_mask) != 0); x = y; nx = x - 1; } diff --git a/xen/include/asm-x86/atomic.h b/xen/include/asm-x86/atomic.h index f2ecf95..17becc5 100644 --- a/xen/include/asm-x86/atomic.h +++ b/xen/include/asm-x86/atomic.h @@ -23,8 +23,7 @@ typedef struct { int counter; } atomic_t; * atomic_read - read atomic variable * @v: pointer of type atomic_t * - * Atomically reads the value of @v. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. + * Atomically reads the value of @v. */ #define _atomic_read(v) ((v).counter) #define atomic_read(v) (*(volatile int *)&((v)->counter)) @@ -34,8 +33,7 @@ typedef struct { int counter; } atomic_t; * @v: pointer of type atomic_t * @i: required value * - * Atomically sets the value of @v to @i. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. + * Atomically sets the value of @v to @i. */ #define _atomic_set(v,i) (((v).counter) = (i)) #define atomic_set(v,i) (*(volatile int *)&((v)->counter) = (i)) @@ -45,12 +43,11 @@ typedef struct { int counter; } atomic_t; * @i: integer value to add * @v: pointer of type atomic_t * - * Atomically adds @i to @v. Note that the guaranteed useful range - * of an atomic_t is only 24 bits. + * Atomically adds @i to @v. */ static __inline__ void atomic_add(int i, atomic_t *v) { - __asm__ __volatile__( + asm volatile( LOCK "addl %1,%0" :"=m" (*(volatile int *)&v->counter) :"ir" (i), "m" (*(volatile int *)&v->counter)); @@ -61,12 +58,11 @@ static __inline__ void atomic_add(int i, atomic_t *v) * @i: integer value to subtract * @v: pointer of type atomic_t * - * Atomically subtracts @i from @v. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. + * Atomically subtracts @i from @v. */ static __inline__ void atomic_sub(int i, atomic_t *v) { - __asm__ __volatile__( + asm volatile( LOCK "subl %1,%0" :"=m" (*(volatile int *)&v->counter) :"ir" (i), "m" (*(volatile int *)&v->counter)); @@ -79,14 +75,13 @@ static __inline__ void atomic_sub(int i, atomic_t *v) * * Atomically subtracts @i from @v and returns * true if the result is zero, or false for all - * other cases. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. + * other cases. */ static __inline__ int atomic_sub_and_test(int i, atomic_t *v) { unsigned char c; - __asm__ __volatile__( + asm volatile( LOCK "subl %2,%0; sete %1" :"=m" (*(volatile int *)&v->counter), "=qm" (c) :"ir" (i), "m" (*(volatile int *)&v->counter) : "memory"); @@ -97,12 +92,11 @@ static __inline__ int atomic_sub_and_test(int i, atomic_t *v) * atomic_inc - increment atomic variable * @v: pointer of type atomic_t * - * Atomically increments @v by 1. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. + * Atomically increments @v by 1. */ static __inline__ void atomic_inc(atomic_t *v) { - __asm__ __volatile__( + asm volatile( LOCK "incl %0" :"=m" (*(volatile int *)&v->counter) :"m" (*(volatile int *)&v->counter)); @@ -112,12 +106,11 @@ static __inline__ void atomic_inc(atomic_t *v) * atomic_dec - decrement atomic variable * @v: pointer of type atomic_t * - * Atomically decrements @v by 1. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. + * Atomically decrements @v by 1. */ static __inline__ void atomic_dec(atomic_t *v) { - __asm__ __volatile__( + asm volatile( LOCK "decl %0" :"=m" (*(volatile int *)&v->counter) :"m" (*(volatile int *)&v->counter)); @@ -129,14 +122,13 @@ static __inline__ void atomic_dec(atomic_t *v) * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other - * cases. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. + * cases. */ static __inline__ int atomic_dec_and_test(atomic_t *v) { unsigned char c; - __asm__ __volatile__( + asm volatile( LOCK "decl %0; sete %1" :"=m" (*(volatile int *)&v->counter), "=qm" (c) :"m" (*(volatile int *)&v->counter) : "memory"); @@ -149,14 +141,13 @@ static __inline__ int atomic_dec_and_test(atomic_t *v) * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all - * other cases. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. + * other cases. */ static __inline__ int atomic_inc_and_test(atomic_t *v) { unsigned char c; - __asm__ __volatile__( + asm volatile( LOCK "incl %0; sete %1" :"=m" (*(volatile int *)&v->counter), "=qm" (c) :"m" (*(volatile int *)&v->counter) : "memory"); @@ -170,14 +161,13 @@ static __inline__ int atomic_inc_and_test(atomic_t *v) * * Atomically adds @i to @v and returns true * if the result is negative, or false when - * result is greater than or equal to zero. Note that the guaranteed - * useful range of an atomic_t is only 24 bits. + * result is greater than or equal to zero. */ static __inline__ int atomic_add_negative(int i, atomic_t *v) { unsigned char c; - __asm__ __volatile__( + asm volatile( LOCK "addl %2,%0; sets %1" :"=m" (*(volatile int *)&v->counter), "=qm" (c) :"ir" (i), "m" (*(volatile int *)&v->counter) : "memory"); diff --git a/xen/include/asm-x86/bug.h b/xen/include/asm-x86/bug.h index 9755761..df64549 100644 --- a/xen/include/asm-x86/bug.h +++ b/xen/include/asm-x86/bug.h @@ -18,4 +18,28 @@ struct bug_frame { #define BUGFRAME_bug 2 #define BUGFRAME_assert 3 +#define dump_execution_state() \ + asm volatile ( \ + "ud2 ; ret $0" \ + : : "i" (BUGFRAME_dump) ) + +#define WARN() \ + asm volatile ( \ + "ud2 ; ret %0" BUG_STR(1) \ + : : "i" (BUGFRAME_warn | (__LINE__<<2)), \ + "i" (__FILE__) ) + +#define BUG() \ + asm volatile ( \ + "ud2 ; ret %0" BUG_STR(1) \ + : : "i" (BUGFRAME_bug | (__LINE__<<2)), \ + "i" (__FILE__) ) + +#define assert_failed(p) \ + asm volatile ( \ + "ud2 ; ret %0" BUG_STR(1) BUG_STR(2) \ + : : "i" (BUGFRAME_assert | (__LINE__<<2)), \ + "i" (__FILE__), "i" (#p) ) + + #endif /* __X86_BUG_H__ */ diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index 9d140ef..3e05dbf 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -35,6 +35,7 @@ #define CONFIG_ACPI_SLEEP 1 #define CONFIG_ACPI_NUMA 1 #define CONFIG_ACPI_SRAT 1 +#define CONFIG_ACPI_CSTATE 1 #define CONFIG_VGA 1 diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h index 4c6eeab..f36d707 100644 --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -75,6 +75,7 @@ #define X86_FEATURE_P4 (3*32+ 7) /* P4 */ #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ #define X86_FEATURE_NOSTOP_TSC (3*32+ 9) /* TSC does not stop in C states */ +#define X86_FEATURE_ARAT (3*32+ 10) /* Always running APIC timer */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index 2bf5b1c..8508a1d 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -226,6 +226,7 @@ struct domain_mca_msrs uint64_t mci_ctl[MAX_NR_BANKS]; uint16_t nr_injection; struct list_head impact_header; + spinlock_t lock; }; struct arch_domain diff --git a/xen/include/asm-x86/hpet.h b/xen/include/asm-x86/hpet.h index f962ab7..b0038e0 100644 --- a/xen/include/asm-x86/hpet.h +++ b/xen/include/asm-x86/hpet.h @@ -78,5 +78,6 @@ void hpet_broadcast_init(void); void hpet_broadcast_enter(void); void hpet_broadcast_exit(void); int hpet_broadcast_is_available(void); +void hpet_disable_legacy_broadcast(void); #endif /* __X86_HPET_H__ */ diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h index faea392..cd24177 100644 --- a/xen/include/asm-x86/hvm/vcpu.h +++ b/xen/include/asm-x86/hvm/vcpu.h @@ -66,6 +66,8 @@ struct hvm_vcpu { struct arch_svm_struct svm; } u; + struct tasklet assert_evtchn_irq_tasklet; + struct mtrr_state mtrr; u64 pat_cr; diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h index 6fd9894..97cf763 100644 --- a/xen/include/asm-x86/hvm/vmx/vmx.h +++ b/xen/include/asm-x86/hvm/vmx/vmx.h @@ -60,6 +60,7 @@ void vmx_intr_assist(void); void vmx_do_resume(struct vcpu *); void vmx_vlapic_msr_changed(struct vcpu *v); void vmx_realmode(struct cpu_user_regs *regs); +void vmx_update_debug_state(struct vcpu *v); /* * Exit Reasons diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h index f6b3fd1..ab3b2a6 100644 --- a/xen/include/asm-x86/msr-index.h +++ b/xen/include/asm-x86/msr-index.h @@ -326,7 +326,15 @@ #define MSR_IA32_MCG_ESP 0x00000187 #define MSR_IA32_MCG_EFLAGS 0x00000188 #define MSR_IA32_MCG_EIP 0x00000189 -#define MSR_IA32_MCG_RESERVED 0x0000018a +#define MSR_IA32_MCG_MISC 0x0000018a +#define MSR_IA32_MCG_R8 0x00000190 +#define MSR_IA32_MCG_R9 0x00000191 +#define MSR_IA32_MCG_R10 0x00000192 +#define MSR_IA32_MCG_R11 0x00000193 +#define MSR_IA32_MCG_R12 0x00000194 +#define MSR_IA32_MCG_R13 0x00000195 +#define MSR_IA32_MCG_R14 0x00000196 +#define MSR_IA32_MCG_R15 0x00000197 /* Pentium IV performance counter MSRs */ #define MSR_P4_BPU_PERFCTR0 0x00000300 diff --git a/xen/include/asm-x86/perfc_defn.h b/xen/include/asm-x86/perfc_defn.h index 99a95cd..9a2697c 100644 --- a/xen/include/asm-x86/perfc_defn.h +++ b/xen/include/asm-x86/perfc_defn.h @@ -4,7 +4,7 @@ PERFCOUNTER_ARRAY(exceptions, "exceptions", 32) -#define VMX_PERF_EXIT_REASON_SIZE 44 +#define VMX_PERF_EXIT_REASON_SIZE 56 #define VMX_PERF_VECTOR_SIZE 0x20 PERFCOUNTER_ARRAY(vmexits, "vmexits", VMX_PERF_EXIT_REASON_SIZE) PERFCOUNTER_ARRAY(cause_vector, "cause vector", VMX_PERF_VECTOR_SIZE) diff --git a/xen/include/asm-x86/spinlock.h b/xen/include/asm-x86/spinlock.h index 66c4d51..f1a5feb 100644 --- a/xen/include/asm-x86/spinlock.h +++ b/xen/include/asm-x86/spinlock.h @@ -13,19 +13,6 @@ typedef struct { #define _raw_spin_is_locked(x) ((x)->lock <= 0) -static always_inline void _raw_spin_lock(raw_spinlock_t *lock) -{ - asm volatile ( - "1: lock; decw %0 \n" - " jns 3f \n" - "2: rep; nop \n" - " cmpw $0,%0 \n" - " jle 2b \n" - " jmp 1b \n" - "3:" - : "=m" (lock->lock) : : "memory" ); -} - static always_inline void _raw_spin_unlock(raw_spinlock_t *lock) { ASSERT(_raw_spin_is_locked(lock)); diff --git a/xen/include/asm-x86/traps.h b/xen/include/asm-x86/traps.h index 85a4223..c2a0982 100644 --- a/xen/include/asm-x86/traps.h +++ b/xen/include/asm-x86/traps.h @@ -47,4 +47,9 @@ extern int guest_has_trap_callback(struct domain *d, uint16_t vcpuid, extern int send_guest_trap(struct domain *d, uint16_t vcpuid, unsigned int trap_nr); +/* Intel vMCE MSRs virtualization */ +extern void intel_mce_init_msr(struct domain *d); +extern int intel_mce_wrmsr(u32 msr, u64 value); +extern int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi); + #endif /* ASM_TRAP_H */ diff --git a/xen/include/asm-x86/x86_32/bug.h b/xen/include/asm-x86/x86_32/bug.h index 20a9137..dfb5955 100644 --- a/xen/include/asm-x86/x86_32/bug.h +++ b/xen/include/asm-x86/x86_32/bug.h @@ -2,33 +2,10 @@ #define __X86_32_BUG_H__ struct bug_frame_str { - unsigned char mov[1]; + unsigned char mov; unsigned long str; } __attribute__((packed)); -#define BUG_MOV_STR "\xbc" - -#define dump_execution_state() \ - asm volatile ( \ - "ud2 ; ret $%c0" \ - : : "i" (BUGFRAME_dump) ) - -#define WARN() \ - asm volatile ( \ - "ud2 ; ret $%c0 ; .byte 0xbc ; .long %c1" \ - : : "i" (BUGFRAME_warn | (__LINE__<<2)), \ - "i" (__FILE__) ) - -#define BUG() \ - asm volatile ( \ - "ud2 ; ret $%c0 ; .byte 0xbc ; .long %c1" \ - : : "i" (BUGFRAME_bug | (__LINE__<<2)), \ - "i" (__FILE__) ) - -#define assert_failed(p) \ - asm volatile ( \ - "ud2 ; ret $%c0 ; .byte 0xbc ; .long %c1" \ - " ; .byte 0xbc ; .long %c2" \ - : : "i" (BUGFRAME_assert | (__LINE__<<2)), \ - "i" (__FILE__), "i" (#p) ) +#define bug_str(b, eip) ((const char *)(b).str) +#define BUG_STR(n) "; movl %" #n ", %%esp" #endif /* __X86_32_BUG_H__ */ diff --git a/xen/include/asm-x86/x86_32/page.h b/xen/include/asm-x86/x86_32/page.h index aef51f5..648d96d 100644 --- a/xen/include/asm-x86/x86_32/page.h +++ b/xen/include/asm-x86/x86_32/page.h @@ -27,9 +27,6 @@ #define __PAGE_OFFSET (0xFF000000) #define __XEN_VIRT_START __PAGE_OFFSET -#define virt_to_maddr(va) ((unsigned long)(va)-DIRECTMAP_VIRT_START) -#define maddr_to_virt(ma) ((void *)((unsigned long)(ma)+DIRECTMAP_VIRT_START)) - #define VADDR_BITS 32 #define VADDR_MASK (~0UL) @@ -44,6 +41,22 @@ #include #include +static inline unsigned long __virt_to_maddr(unsigned long va) +{ + ASSERT(va >= DIRECTMAP_VIRT_START && va < DIRECTMAP_VIRT_END); + return va - DIRECTMAP_VIRT_START; +} +#define virt_to_maddr(va) \ + (__virt_to_maddr((unsigned long)(va))) + +static inline void *__maddr_to_virt(unsigned long ma) +{ + ASSERT(ma < DIRECTMAP_VIRT_END - DIRECTMAP_VIRT_START); + return (void *)(ma + DIRECTMAP_VIRT_START); +} +#define maddr_to_virt(ma) \ + (__maddr_to_virt((unsigned long)(ma))) + /* read access (should only be used for debug printk's) */ typedef u64 intpte_t; #define PRIpte "016llx" diff --git a/xen/include/asm-x86/x86_64/bug.h b/xen/include/asm-x86/x86_64/bug.h index 3fa5deb..ecae455 100644 --- a/xen/include/asm-x86/x86_64/bug.h +++ b/xen/include/asm-x86/x86_64/bug.h @@ -2,33 +2,10 @@ #define __X86_64_BUG_H__ struct bug_frame_str { - unsigned char mov[2]; - unsigned long str; + unsigned char mov; + signed int str_disp; } __attribute__((packed)); -#define BUG_MOV_STR "\x48\xbc" - -#define dump_execution_state() \ - asm volatile ( \ - "ud2 ; ret $%c0" \ - : : "i" (BUGFRAME_dump) ) - -#define WARN() \ - asm volatile ( \ - "ud2 ; ret $%c0 ; .byte 0x48,0xbc ; .quad %c1" \ - : : "i" (BUGFRAME_warn | (__LINE__<<2)), \ - "i" (__FILE__) ) - -#define BUG() \ - asm volatile ( \ - "ud2 ; ret $%c0 ; .byte 0x48,0xbc ; .quad %c1" \ - : : "i" (BUGFRAME_bug | (__LINE__<<2)), \ - "i" (__FILE__) ) - -#define assert_failed(p) \ - asm volatile ( \ - "ud2 ; ret $%c0 ; .byte 0x48,0xbc ; .quad %c1" \ - " ; .byte 0x48,0xbc ; .quad %c2" \ - : : "i" (BUGFRAME_assert | (__LINE__<<2)), \ - "i" (__FILE__), "i" (#p) ) +#define bug_str(b, rip) ((const char *)(rip) + (b).str_disp) +#define BUG_STR(n) "; movl %" #n " - ., %%esp" #endif /* __X86_64_BUG_H__ */ diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h index 8899fe7..fcfb8ee 100644 --- a/xen/include/asm-x86/x86_64/page.h +++ b/xen/include/asm-x86/x86_64/page.h @@ -46,8 +46,14 @@ static inline unsigned long __virt_to_maddr(unsigned long va) } #define virt_to_maddr(va) \ (__virt_to_maddr((unsigned long)(va))) + +static inline void *__maddr_to_virt(unsigned long ma) +{ + ASSERT(ma < DIRECTMAP_VIRT_END - DIRECTMAP_VIRT_START); + return (void *)(ma + DIRECTMAP_VIRT_START); +} #define maddr_to_virt(ma) \ - ((void *)((unsigned long)(ma)+DIRECTMAP_VIRT_START)) + (__maddr_to_virt((unsigned long)(ma))) /* read access (should only be used for debug printk's) */ typedef u64 intpte_t; diff --git a/xen/include/public/arch-x86/xen-mca.h b/xen/include/public/arch-x86/xen-mca.h index b02ebf0..13fc5b1 100644 --- a/xen/include/public/arch-x86/xen-mca.h +++ b/xen/include/public/arch-x86/xen-mca.h @@ -62,7 +62,7 @@ * choose a different version number range that is numerically less * than that used in xen-unstable. */ -#define XEN_MCA_INTERFACE_VERSION 0x01ecc002 +#define XEN_MCA_INTERFACE_VERSION 0x01ecc003 /* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */ #define XEN_MC_NONURGENT 0x0001 @@ -125,13 +125,13 @@ struct mcinfo_global { /* running domain at the time in error (most likely the impacted one) */ uint16_t mc_domid; + uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ uint32_t mc_socketid; /* physical socket of the physical core */ uint16_t mc_coreid; /* physical impacted core */ - uint32_t mc_apicid; uint16_t mc_core_threadid; /* core thread of physical core */ - uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ - uint64_t mc_gstatus; /* global status */ + uint32_t mc_apicid; uint32_t mc_flags; + uint64_t mc_gstatus; /* global status */ }; /* contains bank local x86 mc information */ @@ -166,11 +166,11 @@ struct mcinfo_extended { uint32_t mc_msrs; /* Number of msr with valid values. */ /* - * Currently Intel extended MSR (32/64) including all gp registers - * and E(R)DI, E(R)BP, E(R)SP, E(R)FLAGS, E(R)IP, E(R)MISC, only 10 - * of them might be useful. So expend this array to 10. - */ - struct mcinfo_msr mc_msr[10]; + * Currently Intel extended MSR (32/64) include all gp registers + * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be + * useful at present. So expand this array to 16/32 to leave room. + */ + struct mcinfo_msr mc_msr[sizeof(void *) * 4]; }; /* Recovery Action flags. Giving recovery result information to DOM0 */ @@ -216,8 +216,9 @@ struct cpu_offline_action }; #define MAX_UNION_SIZE 16 -struct mc_recovery +struct mcinfo_recovery { + struct mcinfo_common common; uint16_t mc_bank; /* bank nr */ uint8_t action_flags; uint8_t action_types; @@ -228,12 +229,6 @@ struct mc_recovery } action_info; }; -struct mcinfo_recovery -{ - struct mcinfo_common common; - struct mc_recovery mc_action; -}; - #define MCINFO_HYPERCALLSIZE 1024 #define MCINFO_MAXSIZE 768 @@ -241,8 +236,8 @@ struct mcinfo_recovery struct mc_info { /* Number of mcinfo_* entries in mi_data */ uint32_t mi_nentries; - - uint8_t mi_data[MCINFO_MAXSIZE - sizeof(uint32_t)]; + uint32_t _pad0; + uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8]; }; typedef struct mc_info mc_info_t; DEFINE_XEN_GUEST_HANDLE(mc_info_t); @@ -258,7 +253,7 @@ DEFINE_XEN_GUEST_HANDLE(mc_info_t); #define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ #define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ -typedef struct mcinfo_logical_cpu { +struct mcinfo_logical_cpu { uint32_t mc_cpunr; uint32_t mc_chipid; uint16_t mc_coreid; @@ -280,7 +275,8 @@ typedef struct mcinfo_logical_cpu { uint32_t mc_cache_alignment; int32_t mc_nmsrvals; struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; -} xen_mc_logical_cpu_t; +}; +typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t; DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); @@ -299,12 +295,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); */ #define x86_mcinfo_first(_mi) \ - (struct mcinfo_common *)((_mi)->mi_data) + ((struct mcinfo_common *)(_mi)->mi_data) /* Prototype: * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); */ #define x86_mcinfo_next(_mic) \ - (struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size) + ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)) /* Prototype: * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); @@ -350,6 +346,7 @@ struct xen_mc_fetch { XEN_MC_ACK if ack'ing an earlier fetch */ /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, XEN_MC_NODATA, XEN_MC_NOMATCH */ + uint32_t _pad0; uint64_t fetch_id; /* OUT: id for ack, IN: id we are ack'ing */ /* OUT variables. */ @@ -382,7 +379,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); struct xen_mc_physcpuinfo { /* IN/OUT */ uint32_t ncpus; - uint32_t pad0; + uint32_t _pad0; /* OUT */ XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; }; @@ -391,10 +388,10 @@ struct xen_mc_physcpuinfo { #define MC_MSRINJ_MAXMSRS 8 struct xen_mc_msrinject { /* IN */ - unsigned int mcinj_cpunr; /* target processor id */ + uint32_t mcinj_cpunr; /* target processor id */ uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ - uint32_t mcinj_pad0; + uint32_t _pad0; struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; }; @@ -406,18 +403,16 @@ struct xen_mc_mceinject { unsigned int mceinj_cpunr; /* target processor id */ }; -typedef union { - struct xen_mc_fetch mc_fetch; - struct xen_mc_notifydomain mc_notifydomain; - struct xen_mc_physcpuinfo mc_physcpuinfo; - struct xen_mc_msrinject mc_msrinject; - struct xen_mc_mceinject mc_mceinject; -} xen_mc_arg_t; - struct xen_mc { uint32_t cmd; uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ - xen_mc_arg_t u; + union { + struct xen_mc_fetch mc_fetch; + struct xen_mc_notifydomain mc_notifydomain; + struct xen_mc_physcpuinfo mc_physcpuinfo; + struct xen_mc_msrinject mc_msrinject; + struct xen_mc_mceinject mc_mceinject; + } u; }; typedef struct xen_mc xen_mc_t; DEFINE_XEN_GUEST_HANDLE(xen_mc_t); diff --git a/xen/include/public/arch-x86/xen.h b/xen/include/public/arch-x86/xen.h index 084348f..5f7579a 100644 --- a/xen/include/public/arch-x86/xen.h +++ b/xen/include/public/arch-x86/xen.h @@ -76,10 +76,6 @@ typedef unsigned long xen_pfn_t; /* Maximum number of virtual CPUs in multi-processor guests. */ #define MAX_VIRT_CPUS 32 - -/* Machine check support */ -#include "xen-mca.h" - #ifndef __ASSEMBLY__ typedef unsigned long xen_ulong_t; diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index 8574302..f7b1fc5 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -433,6 +433,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); #define XEN_DOMCTL_SENDTRIGGER_NMI 0 #define XEN_DOMCTL_SENDTRIGGER_RESET 1 #define XEN_DOMCTL_SENDTRIGGER_INIT 2 +#define XEN_DOMCTL_SENDTRIGGER_POWER 3 struct xen_domctl_sendtrigger { uint32_t trigger; /* IN */ uint32_t vcpu; /* IN */ diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h index 48d327c..f17cd45 100644 --- a/xen/include/public/sysctl.h +++ b/xen/include/public/sysctl.h @@ -382,6 +382,14 @@ struct xen_sysctl_pm_op { /* set/reset scheduler power saving option */ #define XEN_SYSCTL_pm_op_set_sched_opt_smt 0x21 + /* cpuidle max_cstate access command */ + #define XEN_SYSCTL_pm_op_get_max_cstate 0x22 + #define XEN_SYSCTL_pm_op_set_max_cstate 0x23 + + /* set scheduler migration cost value */ + #define XEN_SYSCTL_pm_op_set_vcpu_migration_delay 0x24 + #define XEN_SYSCTL_pm_op_get_vcpu_migration_delay 0x25 + uint32_t cmd; uint32_t cpuid; union { @@ -391,6 +399,10 @@ struct xen_sysctl_pm_op { uint64_t get_avgfreq; struct xen_get_cputopo get_topo; uint32_t set_sched_opt_smt; + uint32_t get_max_cstate; + uint32_t set_max_cstate; + uint32_t get_vcpu_migration_delay; + uint32_t set_vcpu_migration_delay; }; }; diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h index 83e09f3..76088e3 100644 --- a/xen/include/public/trace.h +++ b/xen/include/public/trace.h @@ -142,14 +142,14 @@ #define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14) #define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14) #define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15) -#define TRC_HVM_IO_ASSIST (TRC_HVM_HANDLER + 0x16) -#define TRC_HVM_IO_ASSIST64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x16) -#define TRC_HVM_MMIO_ASSIST (TRC_HVM_HANDLER + 0x17) -#define TRC_HVM_MMIO_ASSIST64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x17) +#define TRC_HVM_IOPORT_READ (TRC_HVM_HANDLER + 0x16) +#define TRC_HVM_IOMEM_READ (TRC_HVM_HANDLER + 0x17) #define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18) #define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19) #define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19) -#define TRC_HVM_INTR_WINDOW (TRC_HVM_HANDLER + 0X20) +#define TRC_HVM_INTR_WINDOW (TRC_HVM_HANDLER + 0x20) +#define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216) +#define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217) /* trace subclasses for power management */ #define TRC_PM_FREQ 0x00801000 /* xen cpu freq events */ diff --git a/xen/include/xen/acpi.h b/xen/include/xen/acpi.h index cbf795a..43bdade 100644 --- a/xen/include/xen/acpi.h +++ b/xen/include/xen/acpi.h @@ -402,9 +402,7 @@ static inline int acpi_blacklisted(void) #endif /*!CONFIG_ACPI_INTERPRETER*/ -#define ACPI_CSTATE_LIMIT_DEFINED /* for driver builds */ -#ifdef CONFIG_ACPI - +#ifdef CONFIG_ACPI_CSTATE /* * Set highest legal C-state * 0: C0 okay, but not C1 diff --git a/xen/include/xen/console.h b/xen/include/xen/console.h index 5817f74..50dd1e5 100644 --- a/xen/include/xen/console.h +++ b/xen/include/xen/console.h @@ -14,7 +14,8 @@ struct xen_sysctl_readconsole; long read_console_ring(struct xen_sysctl_readconsole *op); -void init_console(void); +void console_init_preirq(void); +void console_init_postirq(void); void console_endboot(void); int console_has(const char *device); diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h index 65df554..282e566 100644 --- a/xen/include/xen/domain.h +++ b/xen/include/xen/domain.h @@ -58,6 +58,9 @@ void arch_dump_domain_info(struct domain *d); void arch_vcpu_reset(struct vcpu *v); +bool_t domctl_lock_acquire(void); +void domctl_lock_release(void); + extern unsigned int xen_processor_pmbits; #endif /* __XEN_DOMAIN_H__ */ diff --git a/xen/include/xen/hypercall.h b/xen/include/xen/hypercall.h index 99d2e00..43758b9 100644 --- a/xen/include/xen/hypercall.h +++ b/xen/include/xen/hypercall.h @@ -30,7 +30,6 @@ do_sched_op( int cmd, XEN_GUEST_HANDLE(void) arg); -extern spinlock_t domctl_lock; extern long do_domctl( XEN_GUEST_HANDLE(xen_domctl_t) u_domctl); diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h index 37fa3c7..b470492 100644 --- a/xen/include/xen/iommu.h +++ b/xen/include/xen/iommu.h @@ -55,7 +55,7 @@ struct iommu { spinlock_t lock; /* protect context, domain ids */ spinlock_t register_lock; /* protect iommu register handling */ u64 root_maddr; /* root entry machine address */ - unsigned int vector; + int vector; struct intel_iommu *intel; }; diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h index 93fdabb..f5c21f6 100644 --- a/xen/include/xen/lib.h +++ b/xen/include/xen/lib.h @@ -12,8 +12,8 @@ void __bug(char *file, int line) __attribute__((noreturn)); void __warn(char *file, int line); -#define BUG_ON(p) do { if (p) BUG(); } while (0) -#define WARN_ON(p) do { if (p) WARN(); } while (0) +#define BUG_ON(p) do { if (unlikely(p)) BUG(); } while (0) +#define WARN_ON(p) do { if (unlikely(p)) WARN(); } while (0) /* Force a compilation error if condition is true */ #define BUILD_BUG_ON(condition) ((void)sizeof(struct { int:-!!(condition); })) diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h index f6ba9fa..5caf824 100644 --- a/xen/include/xen/sched-if.h +++ b/xen/include/xen/sched-if.h @@ -77,6 +77,9 @@ struct scheduler { struct xen_domctl_scheduler_op *); void (*dump_settings) (void); void (*dump_cpu_state) (int); + + void (*tick_suspend) (void); + void (*tick_resume) (void); }; #endif /* __XEN_SCHED_IF_H__ */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index efaec7e..46731a5 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -428,6 +428,8 @@ int sched_init_domain(struct domain *d); void sched_destroy_domain(struct domain *d); long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *); int sched_id(void); +void sched_tick_suspend(void); +void sched_tick_resume(void); void vcpu_wake(struct vcpu *d); void vcpu_sleep_nosync(struct vcpu *d); void vcpu_sleep_sync(struct vcpu *d); @@ -550,6 +552,9 @@ uint64_t get_cpu_idle_time(unsigned int cpu); #define is_hvm_vcpu(v) (is_hvm_domain(v->domain)) #define need_iommu(d) ((d)->need_iommu && !(d)->is_hvm) +void set_vcpu_migration_delay(unsigned int delay); +unsigned int get_vcpu_migration_delay(void); + extern int sched_smt_power_savings; extern enum cpufreq_controller { diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst index 17f47e9..f2e4597 100644 --- a/xen/include/xlat.lst +++ b/xen/include/xlat.lst @@ -10,6 +10,22 @@ ! cpu_user_regs arch-x86/xen-@arch@.h ! trap_info arch-x86/xen.h ! vcpu_guest_context arch-x86/xen.h +? cpu_offline_action arch-x86/xen-mca.h +? mc arch-x86/xen-mca.h +? mcinfo_bank arch-x86/xen-mca.h +? mcinfo_common arch-x86/xen-mca.h +? mcinfo_extended arch-x86/xen-mca.h +? mcinfo_global arch-x86/xen-mca.h +? mcinfo_logical_cpu arch-x86/xen-mca.h +? mcinfo_msr arch-x86/xen-mca.h +? mcinfo_recovery arch-x86/xen-mca.h +! mc_fetch arch-x86/xen-mca.h +? mc_info arch-x86/xen-mca.h +? mc_mceinject arch-x86/xen-mca.h +? mc_msrinject arch-x86/xen-mca.h +? mc_notifydomain arch-x86/xen-mca.h +! mc_physcpuinfo arch-x86/xen-mca.h +? page_offline_action arch-x86/xen-mca.h ? evtchn_alloc_unbound event_channel.h ? evtchn_bind_interdomain event_channel.h ? evtchn_bind_ipi event_channel.h diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h index e5e15d5..6d45efd 100644 --- a/xen/include/xsm/xsm.h +++ b/xen/include/xsm/xsm.h @@ -75,6 +75,8 @@ struct xsm_operations { int (*debug_keys) (void); int (*getcpuinfo) (void); int (*availheap) (void); + int (*get_pmstat) (void); + int (*pm_op) (void); int (*evtchn_unbound) (struct domain *d, struct evtchn *chn, domid_t id2); int (*evtchn_interdomain) (struct domain *d1, struct evtchn *chn1, @@ -282,6 +284,16 @@ static inline int xsm_getcpuinfo (void) return xsm_call(getcpuinfo()); } +static inline int xsm_get_pmstat(void) +{ + return xsm_call(get_pmstat()); +} + +static inline int xsm_pm_op(void) +{ + return xsm_call(pm_op()); +} + static inline int xsm_evtchn_unbound (struct domain *d1, struct evtchn *chn, domid_t id2) { diff --git a/xen/tools/get-fields.sh b/xen/tools/get-fields.sh index 49019a4..2537fc4 100644 --- a/xen/tools/get-fields.sh +++ b/xen/tools/get-fields.sh @@ -328,7 +328,7 @@ check_field () struct|union) ;; [a-zA-Z_]*) - echo -n " CHECK_$n" + echo -n " CHECK_${n#xen_}" break ;; *) diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c index 8809828..9716f94 100644 --- a/xen/xsm/dummy.c +++ b/xen/xsm/dummy.c @@ -134,6 +134,16 @@ static int dummy_getcpuinfo (void) return 0; } +static int dummy_get_pmstat (void) +{ + return 0; +} + +static int dummy_pm_op (void) +{ + return 0; +} + static int dummy_availheap (void) { return 0; @@ -492,6 +502,8 @@ void xsm_fixup_ops (struct xsm_operations *ops) set_to_dummy_if_null(ops, perfcontrol); set_to_dummy_if_null(ops, debug_keys); set_to_dummy_if_null(ops, getcpuinfo); + set_to_dummy_if_null(ops, pm_op); + set_to_dummy_if_null(ops, get_pmstat); set_to_dummy_if_null(ops, availheap); set_to_dummy_if_null(ops, evtchn_unbound); -- 2.39.5