From 7df7425c7a930d2d9fc5b3ddc27e81062d0ee99f Mon Sep 17 00:00:00 2001 From: Jean Guyader Date: Mon, 13 Jul 2009 17:35:28 +0100 Subject: [PATCH] Rebase with xen-3.4-testing, 19693:c12ec0949abd. --- COPYING | 12 +- Config.mk | 3 +- README | 18 +- docs/ChangeLog | 5 + extras/mini-os/COPYING | 23 +++ extras/mini-os/arch/ia64/time.c | 97 +++++---- extras/mini-os/arch/x86/x86_64.S | 131 ++----------- extras/mini-os/blkfront.c | 12 +- extras/mini-os/fbfront.c | 2 - extras/mini-os/include/posix/limits.h | 2 + install.sh | 3 + stubdom/stubdom-dm | 6 +- tools/blktap/drivers/blktapctrl.c | 16 +- tools/blktap/drivers/block-qcow.c | 17 +- tools/check/check_udev | 7 +- tools/check/check_x11_devel | 3 +- tools/check/funcs.sh | 5 + tools/console/daemon/io.c | 6 +- tools/firmware/hvmloader/hvmloader.c | 7 +- tools/firmware/rombios/rombios.c | 38 +++- tools/fs-back/fs-backend.c | 69 +++++-- tools/fs-back/fs-backend.h | 11 +- tools/fs-back/fs-ops.c | 184 +++++++++++------- tools/fs-back/fs-xenbus.c | 43 ++-- tools/hotplug/Linux/Makefile | 10 +- tools/hotplug/Linux/block | 6 +- tools/hotplug/Linux/block-common.sh | 5 +- tools/hotplug/Linux/init.d/xend | 1 + tools/hotplug/Linux/vif-common.sh | 4 + tools/include/xen-foreign/Makefile | 8 +- tools/python/Makefile | 2 +- tools/python/xen/util/acmpolicy.py | 13 +- tools/python/xen/util/pci.py | 35 +++- tools/python/xen/web/connection.py | 2 +- tools/python/xen/xend/XendAPI.py | 16 +- tools/python/xen/xend/XendDomainInfo.py | 1 + tools/python/xen/xend/server/pciif.py | 5 + tools/python/xen/xm/create.py | 8 +- tools/xenstat/libxenstat/src/xenstat_linux.c | 2 +- .../compat-include/xen/platform-compat.h | 4 + xen/Makefile | 2 +- xen/arch/ia64/linux-xen/iosapic.c | 18 ++ xen/arch/x86/acpi/cpu_idle.c | 10 +- xen/arch/x86/acpi/cpuidle_menu.c | 12 +- xen/arch/x86/acpi/wakeup_prot.S | 6 + xen/arch/x86/boot/wakeup.S | 21 +- xen/arch/x86/cpu/amd.c | 2 +- xen/arch/x86/hvm/Makefile | 1 + xen/arch/x86/hvm/hvm.c | 26 ++- xen/arch/x86/hvm/i8254.c | 10 +- xen/arch/x86/hvm/quirks.c | 98 ++++++++++ xen/arch/x86/hvm/rtc.c | 10 +- xen/arch/x86/hvm/svm/asid.c | 12 +- xen/arch/x86/hvm/svm/svm.c | 59 ++++-- xen/arch/x86/hvm/viridian.c | 29 ++- xen/arch/x86/hvm/vlapic.c | 2 +- xen/arch/x86/hvm/vmsi.c | 2 +- xen/arch/x86/hvm/vmx/vmx.c | 10 +- xen/arch/x86/irq.c | 4 +- xen/arch/x86/machine_kexec.c | 12 ++ xen/arch/x86/mm.c | 75 +++---- xen/arch/x86/mm/hap/hap.c | 151 +++++++++++++- xen/arch/x86/mm/paging.c | 157 +++++++++++++++ xen/arch/x86/mm/shadow/multi.c | 64 +++--- xen/arch/x86/mm/shadow/private.h | 11 -- xen/arch/x86/platform_hypercall.c | 3 +- xen/arch/x86/smpboot.c | 3 + xen/arch/x86/tboot.c | 1 - xen/arch/x86/x86_64/compat/mm.c | 10 +- xen/arch/x86/x86_64/mm.c | 27 ++- xen/common/compat/domain.c | 2 +- xen/common/compat/grant_table.c | 2 +- xen/common/compat/memory.c | 2 +- xen/common/grant_table.c | 6 +- xen/drivers/cpufreq/cpufreq_ondemand.c | 39 ++-- xen/drivers/passthrough/io.c | 2 +- xen/drivers/passthrough/pci.c | 2 +- xen/drivers/passthrough/vtd/intremap.c | 62 ++++-- xen/include/asm-ia64/linux-xen/asm/iosapic.h | 3 + xen/include/asm-x86/hap.h | 5 + xen/include/asm-x86/hvm/vlapic.h | 7 +- xen/include/asm-x86/hvm/vmx/vmx.h | 2 +- xen/include/asm-x86/mm.h | 5 +- xen/include/asm-x86/paging.h | 15 ++ xen/include/asm-x86/percpu.h | 2 +- xen/include/asm-x86/x86_64/uaccess.h | 5 +- xen/include/public/io/blkif.h | 7 + xen/include/public/xen.h | 4 +- xen/include/xen/grant_table.h | 1 - 89 files changed, 1291 insertions(+), 572 deletions(-) create mode 100644 extras/mini-os/COPYING create mode 100644 xen/arch/x86/hvm/quirks.c diff --git a/COPYING b/COPYING index aba8ddf..cf891e3 100644 --- a/COPYING +++ b/COPYING @@ -17,12 +17,12 @@ Xen guests, certain files in this repository are not subject to the GPL when distributed separately or included in software packages outside this repository. Instead we specify a much more relaxed BSD-style license. Affected files include the Xen interface headers -(xen/include/public/COPYING), and various drivers, support functions -and header files within the Linux source trees on -http://xenbits.xensource.com/linux-2.6.X-xen.hg. In all such cases, -license terms are stated at the top of the file or in a COPYING file -in the same directory. Note that _any_ file that is modified and then -distributed within a Linux kernel is still subject to the GNU GPL. +(xen/include/public/COPYING), MiniOS (extras/mini-os) and various +drivers, support functions and header files within Xen-aware Linux +source trees. In all such cases, license terms are stated at the top +of the file or in a COPYING file in the same directory. Note that +_any_ file that is modified and then distributed within a Linux kernel +is still subject to the GNU GPL. -- Keir Fraser (on behalf of the Xen team) diff --git a/Config.mk b/Config.mk index 214b592..5d9197d 100644 --- a/Config.mk +++ b/Config.mk @@ -118,7 +118,8 @@ XEN_EXTFILES_URL=http://xenbits.xensource.com/xen-extfiles # the internet. The original download URL is preserved as a comment # near the place in the Xen Makefiles where the file is used. -QEMU_REMOTE=http://xenbits.xensource.com/git-http/qemu-xen-unstable.git +QEMU_TAG=xen-3.4.1-rc7 +QEMU_REMOTE=http://xenbits.xensource.com/git-http/qemu-xen-3.4-testing.git # Specify which qemu-dm to use. This may be `ioemu' to use the old # Mercurial in-tree version, or a local directory, or a git URL. diff --git a/README b/README index 1a46b32..9ace1fe 100644 --- a/README +++ b/README @@ -1,9 +1,9 @@ ################################# - __ __ _____ _____ - \ \/ /___ _ __ |___ / |___ / - \ // _ \ '_ \ |_ \ |_ \ - / \ __/ | | | ___) | ___) | - /_/\_\___|_| |_| |____(_)____/ + __ __ _____ _ _ + \ \/ /___ _ __ |___ /| || | + \ // _ \ '_ \ |_ \| || |_ + / \ __/ | | | ___) |__ _| + /_/\_\___|_| |_| |____(_) |_| ################################# @@ -21,7 +21,7 @@ development community, spearheaded by XenSource Inc, a company created by the original Xen development team to build enterprise products around Xen. -The 3.3 release offers excellent performance, hardware support and +The 3.4 release offers excellent performance, hardware support and enterprise-grade features such as x86_32-PAE, x86_64, SMP guests and live relocation of VMs. Ports to Linux 2.6, Linux 2.4, NetBSD, FreeBSD and Solaris are available from the community. @@ -54,8 +54,8 @@ performed with root privileges.] /boot/grub/menu.lst: edit this file to include an entry like the following: - title Xen 3.3 / XenLinux 2.6 - kernel /boot/xen-3.3.gz console=vga + title Xen 3.4 / XenLinux 2.6 + kernel /boot/xen-3.4.gz console=vga module /boot/vmlinuz-2.6-xen root= ro console=tty0 module /boot/initrd-2.6-xen.img @@ -74,7 +74,7 @@ performed with root privileges.] 32MB memory for internal use, which is not available for allocation to virtual machines. -3. Reboot your system and select the "Xen 3.3 / XenLinux 2.6" menu +3. Reboot your system and select the "Xen 3.4 / XenLinux 2.6" menu option. After booting Xen, Linux will start and your initialisation scripts should execute in the usual way. diff --git a/docs/ChangeLog b/docs/ChangeLog index 08f975e..58531f3 100644 --- a/docs/ChangeLog +++ b/docs/ChangeLog @@ -13,6 +13,11 @@ Add a ChangeLog file indicating changes to the API/ABI, as discussed here: http://lists.xensource.com/archives/html/xen-devel/2008-01/msg00010.html +Xen 3.4 release (and later) +--------------------------- + +This file not updated. + Xen 3.3 release --------------- diff --git a/extras/mini-os/COPYING b/extras/mini-os/COPYING new file mode 100644 index 0000000..c26ccfc --- /dev/null +++ b/extras/mini-os/COPYING @@ -0,0 +1,23 @@ +Copyright (c) 2009 Citrix Systems, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + diff --git a/extras/mini-os/arch/ia64/time.c b/extras/mini-os/arch/ia64/time.c index 6da27fb..d358730 100644 --- a/extras/mini-os/arch/ia64/time.c +++ b/extras/mini-os/arch/ia64/time.c @@ -1,26 +1,9 @@ /* * Done by Dietmar Hahn * Description: simple ia64 specific time handling - * mktime() is taken from Linux (see copyright below) * Parts are taken from FreeBSD. * **************************************************************************** - * For the copy of the mktime() from linux. - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - **************************************************************************** * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -57,45 +40,73 @@ static uint64_t itc_frequency; static uint64_t processor_frequency; static uint64_t itm_val; +static int is_leap_year(int year) +{ + if( year % 4 == 0 ) + { + if( year % 100 == 0 ) + { + if( year % 400 == 0 ) return 1; + else return 0; + } + return 1; + } + return 0; +} + +static int count_leap_years(int epoch, int year) +{ + int i, result = 0; + for( i = epoch ; i < year ; i++ ) if( is_leap_year(i) ) result++; + return result; +} + +static int get_day(int year, int mon, int day) { + int result; + switch(mon) + { + case 0: result = 0; break; + case 1: result = 31; break; /* 1: 31 */ + case 2: result = 59; break; /* 2: 31+28 */ + case 3: result = 90; break; /* 3: 59+31 */ + case 4: result = 120;break; /* 4: 90+30 */ + case 5: result = 151;break; /* 5: 120+31 */ + case 6: result = 181;break; /* 6: 151+30 */ + case 7: result = 212;break; /* 7: 181+31 */ + case 8: result = 243;break; /* 8: 212+31 */ + case 9: result = 273;break; /* 9: 243+30 */ + case 10:result = 304;break; /* 10:273+31 */ + case 11:result = 334;break; /* 11:304+30 */ + default: break; + } + if( is_leap_year(year) && mon > 2 ) result++; + result += day - 1; + return result; +} /* - * mktime() is take from Linux. See copyright above. * Converts Gregorian date to seconds since 1970-01-01 00:00:00. * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. * - * [For the Julian calendar (which was used in Russia before 1917, - * Britain & colonies before 1752, anywhere else before 1582, - * and is still in use by some communities) leave out the - * -year/100+year/400 terms, and add 10.] - * - * This algorithm was first published by Gauss (I think). - * * WARNING: this function will overflow on 2106-02-07 06:28:16 on * machines were long is 32-bit! (However, as time_t is signed, we * will already get problems at other places on 2038-01-19 03:14:08) */ -static unsigned long -_mktime(const unsigned int year0, const unsigned int mon0, - const unsigned int day, const unsigned int hour, - const unsigned int min, const unsigned int sec) +static unsigned long _mktime(const unsigned int year, const unsigned int mon, + const unsigned int day, const unsigned int hour, + const unsigned int min, const unsigned int sec) { - unsigned int mon = mon0, year = year0; + unsigned long result = 0; - /* 1..12 -> 11,12,1..10 */ - if (0 >= (int) (mon -= 2)) { - mon += 12; /* Puts Feb last since it has leap day */ - year -= 1; - } + result = sec; + result += min * 60; + result += hour * 3600; + result += get_day(year, mon - 1, day) * 86400; + result += (year - 1970) * 31536000; + result += count_leap_years(1970, year) * 86400; - return ( - ( - ((unsigned long) - (year/4 - year/100 + year/400 + 367*mon/12 + day) + - year*365 - 719499 - ) * 24 + hour /* now have hours */ - ) * 60 + min /* now have minutes */ - ) * 60 + sec; /* finally seconds */ + return result; } static inline uint64_t diff --git a/extras/mini-os/arch/x86/x86_64.S b/extras/mini-os/arch/x86/x86_64.S index c7a797d..a8b1933 100644 --- a/extras/mini-os/arch/x86/x86_64.S +++ b/extras/mini-os/arch/x86/x86_64.S @@ -46,88 +46,21 @@ NMI_MASK = 0x80000000 #define ORIG_RAX 120 /* + error_code */ #define EFLAGS 144 -#define REST_SKIP 6*8 -.macro SAVE_REST - subq $REST_SKIP,%rsp -# CFI_ADJUST_CFA_OFFSET REST_SKIP - movq %rbx,5*8(%rsp) -# CFI_REL_OFFSET rbx,5*8 - movq %rbp,4*8(%rsp) -# CFI_REL_OFFSET rbp,4*8 - movq %r12,3*8(%rsp) -# CFI_REL_OFFSET r12,3*8 - movq %r13,2*8(%rsp) -# CFI_REL_OFFSET r13,2*8 - movq %r14,1*8(%rsp) -# CFI_REL_OFFSET r14,1*8 - movq %r15,(%rsp) -# CFI_REL_OFFSET r15,0*8 -.endm - - -.macro RESTORE_REST - movq (%rsp),%r15 -# CFI_RESTORE r15 - movq 1*8(%rsp),%r14 -# CFI_RESTORE r14 - movq 2*8(%rsp),%r13 -# CFI_RESTORE r13 - movq 3*8(%rsp),%r12 -# CFI_RESTORE r12 - movq 4*8(%rsp),%rbp -# CFI_RESTORE rbp - movq 5*8(%rsp),%rbx -# CFI_RESTORE rbx - addq $REST_SKIP,%rsp -# CFI_ADJUST_CFA_OFFSET -(REST_SKIP) -.endm - - -#define ARG_SKIP 9*8 -.macro RESTORE_ARGS skiprax=0,addskip=0,skiprcx=0,skipr11=0,skipr8910=0,skiprdx=0 - .if \skipr11 - .else +.macro RESTORE_ALL movq (%rsp),%r11 -# CFI_RESTORE r11 - .endif - .if \skipr8910 - .else movq 1*8(%rsp),%r10 -# CFI_RESTORE r10 movq 2*8(%rsp),%r9 -# CFI_RESTORE r9 movq 3*8(%rsp),%r8 -# CFI_RESTORE r8 - .endif - .if \skiprax - .else movq 4*8(%rsp),%rax -# CFI_RESTORE rax - .endif - .if \skiprcx - .else movq 5*8(%rsp),%rcx -# CFI_RESTORE rcx - .endif - .if \skiprdx - .else movq 6*8(%rsp),%rdx -# CFI_RESTORE rdx - .endif movq 7*8(%rsp),%rsi -# CFI_RESTORE rsi movq 8*8(%rsp),%rdi -# CFI_RESTORE rdi - .if ARG_SKIP+\addskip > 0 - addq $ARG_SKIP+\addskip,%rsp -# CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) - .endif + addq $9*8+8,%rsp .endm .macro HYPERVISOR_IRET flag -# testb $3,1*8(%rsp) /* Don't need to do that in Mini-os, as */ -# jnz 2f /* there is no userspace? */ testl $NMI_MASK,2*8(%rsp) jnz 2f @@ -150,44 +83,25 @@ NMI_MASK = 0x80000000 * and the exception handler in %rax. */ ENTRY(error_entry) -# _frame RDI /* rdi slot contains rax, oldrax contains error code */ cld subq $14*8,%rsp -# CFI_ADJUST_CFA_OFFSET (14*8) movq %rsi,13*8(%rsp) -# CFI_REL_OFFSET rsi,RSI movq 14*8(%rsp),%rsi /* load rax from rdi slot */ movq %rdx,12*8(%rsp) -# CFI_REL_OFFSET rdx,RDX movq %rcx,11*8(%rsp) -# CFI_REL_OFFSET rcx,RCX movq %rsi,10*8(%rsp) /* store rax */ -# CFI_REL_OFFSET rax,RAX movq %r8, 9*8(%rsp) -# CFI_REL_OFFSET r8,R8 movq %r9, 8*8(%rsp) -# CFI_REL_OFFSET r9,R9 movq %r10,7*8(%rsp) -# CFI_REL_OFFSET r10,R10 movq %r11,6*8(%rsp) -# CFI_REL_OFFSET r11,R11 movq %rbx,5*8(%rsp) -# CFI_REL_OFFSET rbx,RBX movq %rbp,4*8(%rsp) -# CFI_REL_OFFSET rbp,RBP movq %r12,3*8(%rsp) -# CFI_REL_OFFSET r12,R12 movq %r13,2*8(%rsp) -# CFI_REL_OFFSET r13,R13 movq %r14,1*8(%rsp) -# CFI_REL_OFFSET r14,R14 movq %r15,(%rsp) -# CFI_REL_OFFSET r15,R15 -#if 0 - cmpl $__KERNEL_CS,CS(%rsp) - je error_kernelspace -#endif + error_call_handler: movq %rdi, RDI(%rsp) movq %rsp,%rdi @@ -197,29 +111,22 @@ error_call_handler: jmp error_exit .macro zeroentry sym -# INTR_FRAME movq (%rsp),%rcx movq 8(%rsp),%r11 addq $0x10,%rsp /* skip rcx and r11 */ pushq $0 /* push error code/oldrax */ -# CFI_ADJUST_CFA_OFFSET 8 pushq %rax /* push real oldrax to the rdi slot */ -# CFI_ADJUST_CFA_OFFSET 8 leaq \sym(%rip),%rax jmp error_entry -# CFI_ENDPROC .endm .macro errorentry sym -# XCPT_FRAME movq (%rsp),%rcx movq 8(%rsp),%r11 addq $0x10,%rsp /* rsp points to the error code */ pushq %rax -# CFI_ADJUST_CFA_OFFSET 8 leaq \sym(%rip),%rax jmp error_entry -# CFI_ENDPROC .endm #define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg @@ -253,7 +160,6 @@ ENTRY(hypervisor_callback2) decl %gs:0 jmp error_exit -# ALIGN restore_all_enable_events: XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up... @@ -261,12 +167,18 @@ scrit: /**** START OF CRITICAL REGION ****/ XEN_TEST_PENDING(%rsi) jnz 14f # process more events if necessary... XEN_PUT_VCPU_INFO(%rsi) - RESTORE_ARGS 0,8,0 + RESTORE_ALL HYPERVISOR_IRET 0 14: XEN_LOCKED_BLOCK_EVENTS(%rsi) XEN_PUT_VCPU_INFO(%rsi) - SAVE_REST + subq $6*8,%rsp + movq %rbx,5*8(%rsp) + movq %rbp,4*8(%rsp) + movq %r12,3*8(%rsp) + movq %r13,2*8(%rsp) + movq %r14,1*8(%rsp) + movq %r15,(%rsp) movq %rsp,%rdi # set the argument again jmp 11b ecrit: /**** END OF CRITICAL REGION ****/ @@ -274,7 +186,7 @@ ecrit: /**** END OF CRITICAL REGION ****/ retint_kernel: retint_restore_args: - movl EFLAGS-REST_SKIP(%rsp), %eax + movl EFLAGS-6*8(%rsp), %eax shr $9, %eax # EAX[0] == IRET_EFLAGS.IF XEN_GET_VCPU_INFO(%rsi) andb evtchn_upcall_mask(%rsi),%al @@ -282,13 +194,18 @@ retint_restore_args: jnz restore_all_enable_events # != 0 => enable event delivery XEN_PUT_VCPU_INFO(%rsi) - RESTORE_ARGS 0,8,0 + RESTORE_ALL HYPERVISOR_IRET 0 error_exit: - RESTORE_REST -/* cli */ + movq (%rsp),%r15 + movq 1*8(%rsp),%r14 + movq 2*8(%rsp),%r13 + movq 3*8(%rsp),%r12 + movq 4*8(%rsp),%rbp + movq 5*8(%rsp),%rbx + addq $6*8,%rsp XEN_BLOCK_EVENTS(%rsi) jmp retint_kernel @@ -313,17 +230,11 @@ ENTRY(device_not_available) ENTRY(debug) -# INTR_FRAME -# CFI_ADJUST_CFA_OFFSET 8 */ zeroentry do_debug -# CFI_ENDPROC ENTRY(int3) -# INTR_FRAME -# CFI_ADJUST_CFA_OFFSET 8 */ zeroentry do_int3 -# CFI_ENDPROC ENTRY(overflow) zeroentry do_overflow @@ -351,9 +262,7 @@ ENTRY(segment_not_present) /* runs on exception stack */ ENTRY(stack_segment) -# XCPT_FRAME errorentry do_stack_segment -# CFI_ENDPROC ENTRY(general_protection) diff --git a/extras/mini-os/blkfront.c b/extras/mini-os/blkfront.c index d869715..1534747 100644 --- a/extras/mini-os/blkfront.c +++ b/extras/mini-os/blkfront.c @@ -244,6 +244,10 @@ void shutdown_blkfront(struct blkfront_dev *dev) xenbus_wait_for_value(path, "5", &dev->events); err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6); + xenbus_wait_for_value(path, "6", &dev->events); + + err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 1); + xenbus_wait_for_value(path, "2", &dev->events); xenbus_unwatch_path(XBT_NIL, path); @@ -310,14 +314,14 @@ void blkfront_aio(struct blkfront_aiocb *aiocbp, int write) req->nr_segments = n; req->handle = dev->handle; req->id = (uintptr_t) aiocbp; - req->sector_number = aiocbp->aio_offset / dev->info.sector_size; + req->sector_number = aiocbp->aio_offset / 512; for (j = 0; j < n; j++) { req->seg[j].first_sect = 0; - req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1; + req->seg[j].last_sect = PAGE_SIZE / 512 - 1; } - req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->info.sector_size; - req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size; + req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 512; + req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / 512; for (j = 0; j < n; j++) { uintptr_t data = start + j * PAGE_SIZE; if (!write) { diff --git a/extras/mini-os/fbfront.c b/extras/mini-os/fbfront.c index fe227d6..4bf11a5 100644 --- a/extras/mini-os/fbfront.c +++ b/extras/mini-os/fbfront.c @@ -580,8 +580,6 @@ void shutdown_fbfront(struct fbfront_dev *dev) snprintf(path, sizeof(path), "%s/feature-update", nodename); xenbus_rm(XBT_NIL, path); - unbind_evtchn(dev->evtchn); - free_fbfront(dev); } diff --git a/extras/mini-os/include/posix/limits.h b/extras/mini-os/include/posix/limits.h index c45e039..b8e168a 100644 --- a/extras/mini-os/include/posix/limits.h +++ b/extras/mini-os/include/posix/limits.h @@ -21,7 +21,9 @@ #define INT_MIN (-INT_MAX-1) #define UINT_MAX 0xffffffff +#define SHRT_MIN (-0x8000) #define SHRT_MAX 0x7fff +#define USHRT_MAX 0xffff #if defined(__x86_64__) || defined(__ia64__) # define LONG_MAX 0x7fffffffffffffffL diff --git a/install.sh b/install.sh index fab9472..691214e 100755 --- a/install.sh +++ b/install.sh @@ -30,6 +30,9 @@ echo "Installing Xen from '$src' to '$dst'..." [ -x "$(which udevinfo)" ] && \ UDEV_VERSION=$(udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/') +[ -z "$UDEV_VERSION" -a -x /sbin/udevadm ] && \ + UDEV_VERSION=$(/sbin/udevadm info -V | awk '{print $NF}') + if [ -n "$UDEV_VERSION" ] && [ $UDEV_VERSION -ge 059 ]; then echo " - installing for udev-based system" rm -rf "$tmp/etc/hotplug" diff --git a/stubdom/stubdom-dm b/stubdom/stubdom-dm index 0bcc1e6..cb1e22c 100644 --- a/stubdom/stubdom-dm +++ b/stubdom/stubdom-dm @@ -71,8 +71,8 @@ term() { kill %1 ( [ -n "$vncpid" ] && kill -9 $vncpid + rm /etc/xen/stubdoms/$domname-dm xm destroy $domname-dm - rm /etc/xen/stubdoms/$domname-dm ) & # We need to exit immediately so as to let xend do the commands above exit 0 @@ -114,10 +114,6 @@ do vbd_dev=`xenstore-read $i/dev` vbd_front=`xenstore-read $i/frontend` vbd_devtype=`xenstore-read $vbd_front/device-type` - if [ $vbd_type = "file" ] - then - vbd_type="tap:aio" - fi if [ $j -ne 0 ] then echo -n "," >> /etc/xen/stubdoms/$domname-dm diff --git a/tools/blktap/drivers/blktapctrl.c b/tools/blktap/drivers/blktapctrl.c index 8a56302..21cdfe5 100644 --- a/tools/blktap/drivers/blktapctrl.c +++ b/tools/blktap/drivers/blktapctrl.c @@ -659,6 +659,9 @@ static int blktapctrl_new_blkif(blkif_t *blkif) DPRINTF("Received a poll for a new vbd\n"); if ( ((blk=blkif->info) != NULL) && (blk->params != NULL) ) { + if (blktap_interface_create(ctlfd, &major, &minor, blkif) < 0) + return -1; + if (test_path(blk->params, &ptr, &type, &exist, &use_ioemu) != 0) { DPRINTF("Error in blktap device string(%s).\n", blk->params); @@ -682,6 +685,10 @@ static int blktapctrl_new_blkif(blkif_t *blkif) blkif->fds[WRITE] = exist->fds[WRITE]; } + add_disktype(blkif, type); + blkif->major = major; + blkif->minor = minor; + image = (image_t *)malloc(sizeof(image_t)); blkif->prv = (void *)image; blkif->ops = &tapdisk_ops; @@ -705,18 +712,11 @@ static int blktapctrl_new_blkif(blkif_t *blkif) goto fail; } - if (blktap_interface_create(ctlfd, &major, &minor, blkif) < 0) - return -1; - - blkif->major = major; - blkif->minor = minor; - - add_disktype(blkif, type); - } else return -1; return 0; fail: + ioctl(ctlfd, BLKTAP_IOCTL_FREEINTF, minor); return -EINVAL; } diff --git a/tools/blktap/drivers/block-qcow.c b/tools/blktap/drivers/block-qcow.c index c6d32a4..8027fca 100644 --- a/tools/blktap/drivers/block-qcow.c +++ b/tools/blktap/drivers/block-qcow.c @@ -55,7 +55,7 @@ #define ROUNDUP(l, s) \ ({ \ (uint64_t)( \ - (l + (s - 1)) - ((l + (s - 1)) % s)); \ + ((l) + ((s) - 1)) - (((l) + ((s) - 1)) % (s))); \ }) #undef IOCB_IDX @@ -800,14 +800,14 @@ static int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t flag /* read the level 1 table */ shift = s->cluster_bits + s->l2_bits; - s->l1_size = (header->size + (1LL << shift) - 1) >> shift; + s->l1_size = ROUNDUP(header->size, 1LL << shift); s->l1_table_offset = header->l1_table_offset; /*allocate a 4Kbyte multiple of memory*/ l1_table_size = s->l1_size * sizeof(uint64_t); if (l1_table_size % 4096 > 0) { - l1_table_size = ((l1_table_size >> 12) + 1) << 12; + l1_table_size = ROUNDUP(l1_table_size, 4096); } ret = posix_memalign((void **)&s->l1_table, 4096, l1_table_size); if (ret != 0) goto fail; @@ -821,10 +821,10 @@ static int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t flag lseek(fd, 0, SEEK_SET); l1_table_block = l1_table_size + s->l1_table_offset; - l1_table_block = l1_table_block + 512 - (l1_table_block % 512); + l1_table_block = ROUNDUP(l1_table_block, 512); ret = posix_memalign((void **)&buf2, 4096, l1_table_block); if (ret != 0) goto fail; - if (read(fd, buf2, l1_table_block) != l1_table_block) + if (read(fd, buf2, l1_table_block) < l1_table_size + s->l1_table_offset) goto fail; memcpy(s->l1_table, buf2 + s->l1_table_offset, l1_table_size); @@ -878,7 +878,8 @@ static int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t flag memcpy(buf2 + s->l1_table_offset, s->l1_table, l1_table_size); lseek(fd, 0, SEEK_SET); - if (write(fd, buf2, l1_table_block) != l1_table_block) { + if (write(fd, buf2, l1_table_block) < + l1_table_size + s->l1_table_offset) { DPRINTF("qcow: Failed to write new L1 table\n"); goto fail; } @@ -1226,11 +1227,11 @@ int qcow_create(const char *filename, uint64_t total_size, header_size = (header_size + 7) & ~7; if (header_size % 4096 > 0) { - header_size = ((header_size >> 12) + 1) << 12; + header_size = ROUNDUP(header_size, 4096); } shift = header.cluster_bits + header.l2_bits; - l1_size = ((size * 512) + (1LL << shift) - 1) >> shift; + l1_size = ROUNDUP(size * 512, 1LL << shift); header.l1_table_offset = cpu_to_be64(header_size); DPRINTF("L1 Table offset: %d, size %d\n", diff --git a/tools/check/check_udev b/tools/check/check_udev index 3deaa36..27e557d 100755 --- a/tools/check/check_udev +++ b/tools/check/check_udev @@ -8,8 +8,11 @@ OpenBSD|NetBSD|FreeBSD) has_or_fail vnconfig ;; Linux) - has_or_fail udevinfo - [ "`udevinfo -V | awk '{print $NF}'`" -ge 59 ] 2>/dev/null || \ + has /sbin/udevadm && \ + udevver=`/sbin/udevadm info -V | awk '{print $NF}'` + [ -z "$udevver" ] && has_or_fail udevinfo && \ + udevver=`udevinfo -V | awk '{print $NF}'` + [ "$udevver" -ge 59 ] 2>/dev/null || \ has hotplug || \ fail "udev is too old, upgrade to version 59 or later" ;; diff --git a/tools/check/check_x11_devel b/tools/check/check_x11_devel index aab7e42..8b7df98 100755 --- a/tools/check/check_x11_devel +++ b/tools/check/check_x11_devel @@ -5,4 +5,5 @@ has_header X11/keysymdef.h || \ has_header /usr/X11R6/include/X11/keysymdef.h || \ -fail "can't find X11 headers" +has_header /usr/X11R7/include/X11/keysymdef.h || \ +warning "can't find X11 headers" diff --git a/tools/check/funcs.sh b/tools/check/funcs.sh index d3fb60a..095dcb1 100644 --- a/tools/check/funcs.sh +++ b/tools/check/funcs.sh @@ -78,6 +78,11 @@ check_sys_root() { fi } +warning() { + echo + echo " *** `basename "$0"` FAILED${*+: $*}" +} + fail() { echo echo " *** `basename "$0"` FAILED${*+: $*}" diff --git a/tools/console/daemon/io.c b/tools/console/daemon/io.c index 2475963..67fb22c 100644 --- a/tools/console/daemon/io.c +++ b/tools/console/daemon/io.c @@ -127,7 +127,7 @@ static int write_with_timestamp(int fd, const char *data, size_t sz, const char *last_byte = data + sz - 1; while (data <= last_byte) { - const char *nl = memchr(data, '\n', sz); + const char *nl = memchr(data, '\n', last_byte + 1 - data); int found_nl = (nl != NULL); if (!found_nl) nl = last_byte; @@ -688,7 +688,7 @@ static struct domain *create_domain(int domid) dom->buffer.capacity = 0; dom->buffer.max_capacity = 0; dom->event_count = 0; - dom->next_period = (ts.tv_sec * 1000) + (ts.tv_nsec / 1000000) + RATE_LIMIT_PERIOD; + dom->next_period = ((long long)ts.tv_sec * 1000) + (ts.tv_nsec / 1000000) + RATE_LIMIT_PERIOD; dom->next = NULL; dom->ring_ref = -1; @@ -1009,7 +1009,7 @@ void handle_io(void) if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0) return; - now = (ts.tv_sec * 1000) + (ts.tv_nsec / 1000000); + now = ((long long)ts.tv_sec * 1000) + (ts.tv_nsec / 1000000); /* Re-calculate any event counter allowances & unblock domains with new allowance */ diff --git a/tools/firmware/hvmloader/hvmloader.c b/tools/firmware/hvmloader/hvmloader.c index 7552122..fef3158 100644 --- a/tools/firmware/hvmloader/hvmloader.c +++ b/tools/firmware/hvmloader/hvmloader.c @@ -451,8 +451,9 @@ static int scan_etherboot_nic(uint32_t copy_rom_dest) { uint8_t devfn; uint16_t class, vendor_id, device_id; + int rom_size = 0; - for ( devfn = 0; devfn < 128; devfn++ ) + for ( devfn = 0; (devfn < 128) && !rom_size; devfn++ ) { class = pci_readw(devfn, PCI_CLASS_DEVICE); vendor_id = pci_readw(devfn, PCI_VENDOR_ID); @@ -462,11 +463,11 @@ static int scan_etherboot_nic(uint32_t copy_rom_dest) if ( (vendor_id != 0xffff) && (device_id != 0xffff) && (class == 0x0200) ) - return scan_option_rom( + rom_size = scan_option_rom( devfn, vendor_id, device_id, etherboot, copy_rom_dest); } - return 0; + return rom_size; } /* diff --git a/tools/firmware/rombios/rombios.c b/tools/firmware/rombios/rombios.c index 0aea421..2d97a40 100644 --- a/tools/firmware/rombios/rombios.c +++ b/tools/firmware/rombios/rombios.c @@ -8362,8 +8362,8 @@ ASM_END /* Jump to the boot vector */ ASM_START mov bp, sp -// push cs -// push #int18_handler + push cs + push #int18_handler ;; Build an iret stack frame that will take us to the boot vector. ;; iret pops ip, then cs, then flags, so push them in the opposite order. pushf @@ -10439,22 +10439,44 @@ no_serial: ret rom_checksum: - push ax - push bx - push cx + pusha + push ds + xor ax, ax xor bx, bx xor cx, cx + xor dx, dx + mov ch, [2] shl cx, #1 + + jnc checksum_loop + jz checksum_loop + xchg dx, cx + dec cx + checksum_loop: add al, [bx] inc bx loop checksum_loop + + test dx, dx + je checksum_out + + add al, [bx] + mov cx, dx + mov dx, ds + add dh, #0x10 + mov ds, dx + xor dx, dx + xor bx, bx + + jmp checksum_loop + +checksum_out: and al, #0xff - pop cx - pop bx - pop ax + pop ds + popa ret diff --git a/tools/fs-back/fs-backend.c b/tools/fs-back/fs-backend.c index 721b2dc..648b63b 100644 --- a/tools/fs-back/fs-backend.c +++ b/tools/fs-back/fs-backend.c @@ -144,7 +144,8 @@ moretodo: xc_evtchn_notify(mount->evth, mount->local_evtchn); } -static void terminate_mount_request(struct fs_mount *mount) { +void terminate_mount_request(struct fs_mount *mount) +{ int count = 0, i; FS_DEBUG("terminate_mount_request %s\n", mount->frontend); @@ -158,7 +159,13 @@ static void terminate_mount_request(struct fs_mount *mount) { } mount->nr_entries = count; - while (!xenbus_frontend_state_changed(mount, STATE_CLOSING)); + /* wait for the frontend to shut down but don't wait more than 3 + * seconds */ + i = 0; + while (!xenbus_frontend_state_changed(mount, STATE_CLOSING) && i < 3) { + sleep(1); + i++; + } xenbus_write_backend_state(mount, STATE_CLOSED); xc_gnttab_munmap(mount->gnth, mount->ring.sring, mount->shared_ring_size); @@ -183,7 +190,7 @@ static void handle_connection(int frontend_dom_id, int export_id, char *frontend { struct fs_mount *mount; struct fs_export *export; - struct fsif_sring *sring; + struct fsif_sring *sring = NULL; uint32_t dom_ids[MAX_RING_SIZE]; int i; @@ -204,24 +211,38 @@ static void handle_connection(int frontend_dom_id, int export_id, char *frontend } mount = (struct fs_mount*)malloc(sizeof(struct fs_mount)); + memset(mount, 0, sizeof(struct fs_mount)); mount->dom_id = frontend_dom_id; mount->export = export; mount->mount_id = mount_id++; - xenbus_read_mount_request(mount, frontend); + if (xenbus_read_mount_request(mount, frontend) < 0) + goto error; FS_DEBUG("Frontend found at: %s (gref=%d, evtchn=%d)\n", mount->frontend, mount->grefs[0], mount->remote_evtchn); - xenbus_write_backend_node(mount); + if (!xenbus_write_backend_node(mount)) { + FS_DEBUG("ERROR: failed to write backend node on xenbus\n"); + goto error; + } mount->evth = -1; mount->evth = xc_evtchn_open(); - assert(mount->evth != -1); + if (mount->evth < 0) { + FS_DEBUG("ERROR: Couldn't open evtchn!\n"); + goto error; + } mount->local_evtchn = -1; mount->local_evtchn = xc_evtchn_bind_interdomain(mount->evth, mount->dom_id, mount->remote_evtchn); - assert(mount->local_evtchn != -1); + if (mount->local_evtchn < 0) { + FS_DEBUG("ERROR: Couldn't bind evtchn!\n"); + goto error; + } mount->gnth = -1; mount->gnth = xc_gnttab_open(); - assert(mount->gnth != -1); + if (mount->gnth < 0) { + FS_DEBUG("ERROR: Couldn't open gnttab!\n"); + goto error; + } for(i=0; ishared_ring_size; i++) dom_ids[i] = mount->dom_id; sring = xc_gnttab_map_grant_refs(mount->gnth, @@ -230,16 +251,40 @@ static void handle_connection(int frontend_dom_id, int export_id, char *frontend mount->grefs, PROT_READ | PROT_WRITE); + if (!sring) { + FS_DEBUG("ERROR: Couldn't amp grant refs!\n"); + goto error; + } + BACK_RING_INIT(&mount->ring, sring, mount->shared_ring_size * XC_PAGE_SIZE); mount->nr_entries = mount->ring.nr_ents; for (i = 0; i < MAX_FDS; i++) mount->fds[i] = -1; LIST_INSERT_HEAD(&mount_requests_head, mount, entries); - xenbus_watch_frontend_state(mount); - xenbus_write_backend_state(mount, STATE_READY); - + if (!xenbus_watch_frontend_state(mount)) { + FS_DEBUG("ERROR: failed to watch frontend state on xenbus\n"); + goto error; + } + if (!xenbus_write_backend_state(mount, STATE_READY)) { + FS_DEBUG("ERROR: failed to write backend state to xenbus\n"); + goto error; + } + allocate_request_array(mount); + + return; + +error: + xenbus_write_backend_state(mount, STATE_CLOSED); + if (sring) + xc_gnttab_munmap(mount->gnth, mount->ring.sring, mount->shared_ring_size); + if (mount->gnth > 0) + xc_gnttab_close(mount->gnth); + if (mount->local_evtchn > 0) + xc_evtchn_unbind(mount->evth, mount->local_evtchn); + if (mount->evth > 0) + xc_evtchn_close(mount->evth); } static void await_connections(void) @@ -404,7 +449,7 @@ int main(void) xenbus_create_request_node(); /* Create & register the default export */ - export = create_export("default", "/exports"); + export = create_export("default", "/var/lib/xen"); xenbus_register_export(export); if (socketpair(PF_UNIX,SOCK_STREAM, 0, pipefds) == -1) diff --git a/tools/fs-back/fs-backend.h b/tools/fs-back/fs-backend.h index 5045690..9399841 100644 --- a/tools/fs-back/fs-backend.h +++ b/tools/fs-back/fs-backend.h @@ -56,6 +56,7 @@ struct fs_mount LIST_ENTRY(fs_mount) entries; }; +void terminate_mount_request(struct fs_mount *mount); /* Handle to XenStore driver */ extern struct xs_handle *xsh; @@ -63,12 +64,12 @@ extern struct xs_handle *xsh; bool xenbus_create_request_node(void); int xenbus_register_export(struct fs_export *export); int xenbus_get_watch_fd(void); -void xenbus_read_mount_request(struct fs_mount *mount, char *frontend); -void xenbus_write_backend_node(struct fs_mount *mount); -void xenbus_write_backend_state(struct fs_mount *mount, const char *state); +int xenbus_read_mount_request(struct fs_mount *mount, char *frontend); +bool xenbus_write_backend_node(struct fs_mount *mount); +bool xenbus_write_backend_state(struct fs_mount *mount, const char *state); int xenbus_frontend_state_changed(struct fs_mount *mount, const char *oldstate); -void xenbus_watch_frontend_state(struct fs_mount *mount); -void xenbus_unwatch_frontend_state(struct fs_mount *mount); +bool xenbus_watch_frontend_state(struct fs_mount *mount); +bool xenbus_unwatch_frontend_state(struct fs_mount *mount); char* xenbus_read_frontend_state(struct fs_mount *mount); /* File operations, implemented in fs-ops.c */ diff --git a/tools/fs-back/fs-ops.c b/tools/fs-back/fs-ops.c index 6abd354..0458267 100644 --- a/tools/fs-back/fs-ops.c +++ b/tools/fs-back/fs-ops.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "fs-backend.h" #include "fs-debug.h" @@ -23,6 +24,25 @@ #define BUFFER_SIZE 1024 +static int check_export_path(const char *export_path, const char *path) +{ + int i; + if (!export_path || !path) + return -1; + if (strlen(path) < strlen(export_path)) + return -1; + if (strstr(path, "..") != NULL) + return -1; + for (i = 0; i < strlen(path); i++) { + if (!isascii(path[i])) + return -1; + } + if (strncmp(export_path, path, strlen(export_path))) + return -1; + else + return 0; +} + static unsigned short get_request(struct fs_mount *mount, struct fsif_request *req) { unsigned short id = get_id_from_freelist(mount->freelist); @@ -47,7 +67,7 @@ static int get_fd(struct fs_mount *mount) static void dispatch_file_open(struct fs_mount *mount, struct fsif_request *req) { - char *file_name, full_path[BUFFER_SIZE]; + char *file_name; int fd; RING_IDX rsp_idx; fsif_response_t *rsp; @@ -62,15 +82,14 @@ static void dispatch_file_open(struct fs_mount *mount, struct fsif_request *req) req_id = req->id; FS_DEBUG("File open issued for %s\n", file_name); - assert(BUFFER_SIZE > - strlen(file_name) + strlen(mount->export->export_path) + 1); - snprintf(full_path, sizeof(full_path), "%s/%s", - mount->export->export_path, file_name); - assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0); - FS_DEBUG("Issuing open for %s\n", full_path); + if (check_export_path(mount->export->export_path, file_name) < 0) { + FS_DEBUG("Filename check failed\n"); + fd = -1; + goto out; + } fd = get_fd(mount); if (fd >= 0) { - int real_fd = open(full_path, O_RDWR); + int real_fd = open(file_name, O_RDWR); if (real_fd < 0) fd = -1; else @@ -79,6 +98,11 @@ static void dispatch_file_open(struct fs_mount *mount, struct fsif_request *req) FS_DEBUG("Got FD: %d for real %d\n", fd, real_fd); } } +out: + if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) { + FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno); + terminate_mount_request(mount); + } /* We can advance the request consumer index, from here on, the request * should not be used (it may be overrinden by a response) */ mount->ring.req_cons++; @@ -166,7 +190,11 @@ static void dispatch_file_read(struct fs_mount *mount, struct fsif_request *req) priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL; priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2; priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req; - assert(aio_read(&priv_req->aiocb) >= 0); + if (aio_read(&priv_req->aiocb) < 0) { + FS_DEBUG("ERROR: aio_read failed errno=%d\n", errno); + xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count); + terminate_mount_request(mount); + } /* We can advance the request consumer index, from here on, the request * should not be used (it may be overrinden by a response) */ @@ -180,9 +208,10 @@ static void end_file_read(struct fs_mount *mount, struct fs_request *priv_req) uint16_t req_id; /* Release the grant */ - assert(xc_gnttab_munmap(mount->gnth, - priv_req->page, - priv_req->count) == 0); + if (xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count) != 0) { + FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno); + terminate_mount_request(mount); + } /* Get a response from the ring */ rsp_idx = mount->ring.rsp_prod_pvt++; @@ -236,7 +265,11 @@ static void dispatch_file_write(struct fs_mount *mount, struct fsif_request *req priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL; priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2; priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req; - assert(aio_write(&priv_req->aiocb) >= 0); + if (aio_write(&priv_req->aiocb) < 0) { + FS_DEBUG("ERROR: aio_write failed errno=%d\n", errno); + xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count); + terminate_mount_request(mount); + } /* We can advance the request consumer index, from here on, the request @@ -251,9 +284,10 @@ static void end_file_write(struct fs_mount *mount, struct fs_request *priv_req) uint16_t req_id; /* Release the grant */ - assert(xc_gnttab_munmap(mount->gnth, - priv_req->page, - priv_req->count) == 0); + if (xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count) != 0) { + FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno); + terminate_mount_request(mount); + } /* Get a response from the ring */ rsp_idx = mount->ring.rsp_prod_pvt++; @@ -349,7 +383,7 @@ static void dispatch_truncate(struct fs_mount *mount, struct fsif_request *req) static void dispatch_remove(struct fs_mount *mount, struct fsif_request *req) { - char *file_name, full_path[BUFFER_SIZE]; + char *file_name; int ret; RING_IDX rsp_idx; fsif_response_t *rsp; @@ -364,14 +398,17 @@ static void dispatch_remove(struct fs_mount *mount, struct fsif_request *req) req_id = req->id; FS_DEBUG("File remove issued for %s\n", file_name); - assert(BUFFER_SIZE > - strlen(file_name) + strlen(mount->export->export_path) + 1); - snprintf(full_path, sizeof(full_path), "%s/%s", - mount->export->export_path, file_name); - assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0); - FS_DEBUG("Issuing remove for %s\n", full_path); - ret = remove(full_path); + if (check_export_path(mount->export->export_path, file_name) < 0) { + FS_DEBUG("Filename check failed\n"); + ret = -1; + } else { + ret = remove(file_name); + } FS_DEBUG("Got ret: %d\n", ret); + if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) { + FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno); + terminate_mount_request(mount); + } /* We can advance the request consumer index, from here on, the request * should not be used (it may be overrinden by a response) */ mount->ring.req_cons++; @@ -389,7 +426,6 @@ static void dispatch_remove(struct fs_mount *mount, struct fsif_request *req) static void dispatch_rename(struct fs_mount *mount, struct fsif_request *req) { char *buf, *old_file_name, *new_file_name; - char old_full_path[BUFFER_SIZE], new_full_path[BUFFER_SIZE]; int ret; RING_IDX rsp_idx; fsif_response_t *rsp; @@ -407,18 +443,18 @@ static void dispatch_rename(struct fs_mount *mount, struct fsif_request *req) new_file_name = buf + req->u.frename.new_name_offset; FS_DEBUG("File rename issued for %s -> %s (buf=%s)\n", old_file_name, new_file_name, buf); - assert(BUFFER_SIZE > - strlen(old_file_name) + strlen(mount->export->export_path) + 1); - assert(BUFFER_SIZE > - strlen(new_file_name) + strlen(mount->export->export_path) + 1); - snprintf(old_full_path, sizeof(old_full_path), "%s/%s", - mount->export->export_path, old_file_name); - snprintf(new_full_path, sizeof(new_full_path), "%s/%s", - mount->export->export_path, new_file_name); - assert(xc_gnttab_munmap(mount->gnth, buf, 1) == 0); - FS_DEBUG("Issuing rename for %s -> %s\n", old_full_path, new_full_path); - ret = rename(old_full_path, new_full_path); + if (check_export_path(mount->export->export_path, old_file_name) < 0 || + check_export_path(mount->export->export_path, new_file_name) < 0) { + FS_DEBUG("Filename check failed\n"); + ret = -1; + } else { + ret = rename(old_file_name, new_file_name); + } FS_DEBUG("Got ret: %d\n", ret); + if (xc_gnttab_munmap(mount->gnth, buf, 1) != 0) { + FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno); + terminate_mount_request(mount); + } /* We can advance the request consumer index, from here on, the request * should not be used (it may be overrinden by a response) */ mount->ring.req_cons++; @@ -435,7 +471,7 @@ static void dispatch_rename(struct fs_mount *mount, struct fsif_request *req) static void dispatch_create(struct fs_mount *mount, struct fsif_request *req) { - char *file_name, full_path[BUFFER_SIZE]; + char *file_name; int ret; int8_t directory; int32_t mode; @@ -453,27 +489,26 @@ static void dispatch_create(struct fs_mount *mount, struct fsif_request *req) PROT_READ); req_id = req->id; - FS_DEBUG("File create issued for %s\n", file_name); - assert(BUFFER_SIZE > - strlen(file_name) + strlen(mount->export->export_path) + 1); - snprintf(full_path, sizeof(full_path), "%s/%s", - mount->export->export_path, file_name); - assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0); + if (check_export_path(mount->export->export_path, file_name) < 0) { + FS_DEBUG("Filename check failed\n"); + ret = -1; + goto out; + } /* We can advance the request consumer index, from here on, the request * should not be used (it may be overrinden by a response) */ mount->ring.req_cons++; if(directory) { - FS_DEBUG("Issuing create for directory: %s\n", full_path); - ret = mkdir(full_path, mode); + FS_DEBUG("Issuing create for directory: %s\n", file_name); + ret = mkdir(file_name, mode); } else { - FS_DEBUG("Issuing create for file: %s\n", full_path); + FS_DEBUG("Issuing create for file: %s\n", file_name); ret = get_fd(mount); if (ret >= 0) { - int real_fd = creat(full_path, mode); + int real_fd = creat(file_name, mode); if (real_fd < 0) ret = -1; else @@ -483,6 +518,11 @@ static void dispatch_create(struct fs_mount *mount, struct fsif_request *req) } } } +out: + if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) { + FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno); + terminate_mount_request(mount); + } FS_DEBUG("Got ret %d (errno=%d)\n", ret, errno); /* Get a response from the ring */ @@ -495,8 +535,8 @@ static void dispatch_create(struct fs_mount *mount, struct fsif_request *req) static void dispatch_list(struct fs_mount *mount, struct fsif_request *req) { - char *file_name, *buf, full_path[BUFFER_SIZE]; - uint32_t offset, nr_files, error_code; + char *file_name, *buf; + uint32_t offset = 0, nr_files = 0, error_code = 0; uint64_t ret_val; RING_IDX rsp_idx; fsif_response_t *rsp; @@ -514,17 +554,18 @@ static void dispatch_list(struct fs_mount *mount, struct fsif_request *req) req_id = req->id; FS_DEBUG("Dir list issued for %s\n", file_name); - assert(BUFFER_SIZE > - strlen(file_name) + strlen(mount->export->export_path) + 1); - snprintf(full_path, sizeof(full_path), "%s/%s", - mount->export->export_path, file_name); + if (check_export_path(mount->export->export_path, file_name) < 0) { + FS_DEBUG("Filename check failed\n"); + error_code = 1; + goto error_out; + } /* We can advance the request consumer index, from here on, the request * should not be used (it may be overrinden by a response) */ mount->ring.req_cons++; ret_val = 0; nr_files = 0; - dir = opendir(full_path); + dir = opendir(file_name); if(dir == NULL) { error_code = errno; @@ -537,7 +578,8 @@ static void dispatch_list(struct fs_mount *mount, struct fsif_request *req) /* If there was any error with reading the directory, errno will be set */ error_code = errno; /* Copy file names of the remaining non-NULL dirents into buf */ - assert(NAME_MAX < XC_PAGE_SIZE >> 1); + if (NAME_MAX >= XC_PAGE_SIZE >> 1) + goto error_out; while(dirent != NULL && (XC_PAGE_SIZE - ((unsigned long)buf & XC_PAGE_MASK) > NAME_MAX)) { @@ -553,8 +595,11 @@ error_out: ret_val = ((nr_files << NR_FILES_SHIFT) & NR_FILES_MASK) | ((error_code << ERROR_SHIFT) & ERROR_MASK) | (dirent != NULL ? HAS_MORE_FLAG : 0); - assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0); - + if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) { + FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno); + terminate_mount_request(mount); + } + /* Get a response from the ring */ rsp_idx = mount->ring.rsp_prod_pvt++; FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id); @@ -596,7 +641,7 @@ static void dispatch_chmod(struct fs_mount *mount, struct fsif_request *req) static void dispatch_fs_space(struct fs_mount *mount, struct fsif_request *req) { - char *file_name, full_path[BUFFER_SIZE]; + char *file_name; RING_IDX rsp_idx; fsif_response_t *rsp; uint16_t req_id; @@ -612,16 +657,19 @@ static void dispatch_fs_space(struct fs_mount *mount, struct fsif_request *req) req_id = req->id; FS_DEBUG("Fs space issued for %s\n", file_name); - assert(BUFFER_SIZE > - strlen(file_name) + strlen(mount->export->export_path) + 1); - snprintf(full_path, sizeof(full_path), "%s/%s", - mount->export->export_path, file_name); - assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0); - FS_DEBUG("Issuing fs space for %s\n", full_path); - ret = statvfs(full_path, &stat); + if (check_export_path(mount->export->export_path, file_name) < 0) { + FS_DEBUG("Filename check failed\n"); + ret = -1; + } else { + ret = statvfs(file_name, &stat); + } if(ret >= 0) ret = stat.f_bsize * stat.f_bfree; + if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) { + FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno); + terminate_mount_request(mount); + } /* We can advance the request consumer index, from here on, the request * should not be used (it may be overrinden by a response) */ mount->ring.req_cons++; @@ -661,9 +709,11 @@ static void dispatch_file_sync(struct fs_mount *mount, struct fsif_request *req) priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL; priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2; priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req; - assert(aio_fsync(O_SYNC, &priv_req->aiocb) >= 0); + if (aio_fsync(O_SYNC, &priv_req->aiocb) < 0) { + FS_DEBUG("ERROR: aio_fsync failed errno=%d\n", errno); + terminate_mount_request(mount); + } - /* We can advance the request consumer index, from here on, the request * should not be used (it may be overrinden by a response) */ mount->ring.req_cons++; diff --git a/tools/fs-back/fs-xenbus.c b/tools/fs-back/fs-xenbus.c index cc47dbc..603bfb9 100644 --- a/tools/fs-back/fs-xenbus.c +++ b/tools/fs-back/fs-xenbus.c @@ -107,11 +107,15 @@ int xenbus_get_watch_fd(void) int res; assert(xsh != NULL); res = xs_watch(xsh, WATCH_NODE, "conn-watch"); - assert(res); + if (!res) { + FS_DEBUG("ERROR: xs_watch %s failed ret=%d errno=%d\n", + WATCH_NODE, res, errno); + return -1; + } return xs_fileno(xsh); } -void xenbus_read_mount_request(struct fs_mount *mount, char *frontend) +int xenbus_read_mount_request(struct fs_mount *mount, char *frontend) { char node[1024]; char *s; @@ -126,12 +130,18 @@ void xenbus_read_mount_request(struct fs_mount *mount, char *frontend) mount->frontend = frontend; snprintf(node, sizeof(node), "%s/state", frontend); s = xs_read(xsh, XBT_NULL, node, NULL); - assert(strcmp(s, STATE_READY) == 0); + if (strcmp(s, STATE_READY) != 0) { + FS_DEBUG("ERROR: frontend not read\n"); + goto error; + } free(s); snprintf(node, sizeof(node), "%s/ring-size", frontend); s = xs_read(xsh, XBT_NULL, node, NULL); mount->shared_ring_size = atoi(s); - assert(mount->shared_ring_size <= MAX_RING_SIZE); + if (mount->shared_ring_size > MAX_RING_SIZE) { + FS_DEBUG("ERROR: shared_ring_size (%d) > MAX_RING_SIZE\n", mount->shared_ring_size); + goto error; + } free(s); for(i=0; ishared_ring_size; i++) { @@ -144,6 +154,11 @@ void xenbus_read_mount_request(struct fs_mount *mount, char *frontend) s = xs_read(xsh, XBT_NULL, node, NULL); mount->remote_evtchn = atoi(s); free(s); + return 0; + +error: + free(s); + return -1; } /* Small utility function to figure out our domain id */ @@ -161,7 +176,7 @@ static int get_self_id(void) } -void xenbus_write_backend_node(struct fs_mount *mount) +bool xenbus_write_backend_node(struct fs_mount *mount) { char node[1024], backend_node[1024]; int self_id; @@ -175,10 +190,10 @@ void xenbus_write_backend_node(struct fs_mount *mount) xs_write(xsh, XBT_NULL, node, backend_node, strlen(backend_node)); snprintf(node, sizeof(node), ROOT_NODE"/%d/state", mount->mount_id); - xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, strlen(STATE_INITIALISED)); + return xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, strlen(STATE_INITIALISED)); } -void xenbus_write_backend_state(struct fs_mount *mount, const char *state) +bool xenbus_write_backend_state(struct fs_mount *mount, const char *state) { char node[1024]; int self_id; @@ -186,29 +201,25 @@ void xenbus_write_backend_state(struct fs_mount *mount, const char *state) assert(xsh != NULL); self_id = get_self_id(); snprintf(node, sizeof(node), ROOT_NODE"/%d/state", mount->mount_id); - xs_write(xsh, XBT_NULL, node, state, strlen(state)); + return xs_write(xsh, XBT_NULL, node, state, strlen(state)); } -void xenbus_watch_frontend_state(struct fs_mount *mount) +bool xenbus_watch_frontend_state(struct fs_mount *mount) { - int res; char statepath[1024]; assert(xsh != NULL); snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend); - res = xs_watch(xsh, statepath, "frontend-state"); - assert(res); + return xs_watch(xsh, statepath, "frontend-state"); } -void xenbus_unwatch_frontend_state(struct fs_mount *mount) +bool xenbus_unwatch_frontend_state(struct fs_mount *mount) { - int res; char statepath[1024]; assert(xsh != NULL); snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend); - res = xs_unwatch(xsh, statepath, "frontend-state"); - assert(res); + return xs_unwatch(xsh, statepath, "frontend-state"); } int xenbus_frontend_state_changed(struct fs_mount *mount, const char *oldstate) diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile index c9edd0d..b9b7d64 100644 --- a/tools/hotplug/Linux/Makefile +++ b/tools/hotplug/Linux/Makefile @@ -28,6 +28,14 @@ XEN_SCRIPT_DATA += vtpm-migration.sh vtpm-impl XEN_HOTPLUG_DIR = /etc/hotplug XEN_HOTPLUG_SCRIPTS = xen-backend.agent +UDEVVER = 0 +ifeq ($(shell [ -x /sbin/udevadm ] && echo 1),1) +UDEVVER = $(shell /sbin/udevadm info -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/' ) +endif +ifeq ($(shell [ -x /usr/bin/udevinfo ] && echo 1),1) +UDEVVER = $(shell /usr/bin/udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/' ) +endif + UDEV_RULES_DIR = /etc/udev UDEV_RULES = xen-backend.rules xend.rules @@ -36,7 +44,7 @@ DE = $(if $(DESTDIR),$(shell readlink -f $(DESTDIR)),) ifeq ($(findstring $(DI),$(DE)),$(DI)) HOTPLUGS=install-hotplug install-udev else -ifeq ($(shell [ -x /usr/bin/udevinfo ] && [ `/usr/bin/udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/'` -ge 059 ] && echo 1),1) +ifeq ($(shell [ $(UDEVVER) -ge 059 ] && echo 1),1) HOTPLUGS=install-udev else HOTPLUGS=install-hotplug diff --git a/tools/hotplug/Linux/block b/tools/hotplug/Linux/block index 8c61744..e042a7e 100644 --- a/tools/hotplug/Linux/block +++ b/tools/hotplug/Linux/block @@ -202,13 +202,13 @@ case "$command" in p=$(xenstore_read "$XENBUS_PATH/params") mode=$(xenstore_read "$XENBUS_PATH/mode") fi + FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id") + FRONTEND_UUID=$(xenstore_read_default \ + "/local/domain/$FRONTEND_ID/vm" 'unknown') case $t in phy) dev=$(expand_dev $p) - FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id") - FRONTEND_UUID=$(xenstore_read_default \ - "/local/domain/$FRONTEND_ID/vm" 'unknown') if [ -L "$dev" ] then diff --git a/tools/hotplug/Linux/block-common.sh b/tools/hotplug/Linux/block-common.sh index a0ebc9b..efedbf5 100644 --- a/tools/hotplug/Linux/block-common.sh +++ b/tools/hotplug/Linux/block-common.sh @@ -110,7 +110,8 @@ same_vm() # allowed. local othervm=$(xenstore_read_default "/local/domain/$otherdom/vm" \ "$FRONTEND_UUID") - - [ "$FRONTEND_UUID" = "$othervm" ] + local target=$(xenstore_read_default "/local/domain/$FRONTEND_ID/target" \ + "-1") + [ "$FRONTEND_UUID" = "$othervm" -o "$target" = "$otherdom" ] } diff --git a/tools/hotplug/Linux/init.d/xend b/tools/hotplug/Linux/init.d/xend index 32dfc84..b98924a 100755 --- a/tools/hotplug/Linux/init.d/xend +++ b/tools/hotplug/Linux/init.d/xend @@ -39,6 +39,7 @@ function await_daemons_up case "$1" in start) + mkdir -p /var/lock/subsys touch /var/lock/subsys/xend xend start await_daemons_up diff --git a/tools/hotplug/Linux/vif-common.sh b/tools/hotplug/Linux/vif-common.sh index 5c1e9c3..fe483f9 100644 --- a/tools/hotplug/Linux/vif-common.sh +++ b/tools/hotplug/Linux/vif-common.sh @@ -103,6 +103,8 @@ handle_iptable() return fi + claim_lock "iptables" + if [ "$ip" != "" ] then local addr @@ -117,6 +119,8 @@ handle_iptable() # No IP addresses have been specified, so allow anything. frob_iptable fi + + release_lock "iptables" } diff --git a/tools/include/xen-foreign/Makefile b/tools/include/xen-foreign/Makefile index 0b8ed92..9d66166 100644 --- a/tools/include/xen-foreign/Makefile +++ b/tools/include/xen-foreign/Makefile @@ -22,14 +22,14 @@ check-headers: checker diff -u reference.size tmp.size rm tmp.size -x86_32.h: mkheader.py $(ROOT)/arch-x86/xen-x86_32.h $(ROOT)/arch-x86/xen.h $(ROOT)/xen.h +x86_32.h: mkheader.py structs.py $(ROOT)/arch-x86/xen-x86_32.h $(ROOT)/arch-x86/xen.h $(ROOT)/xen.h python $< $* $@ $(filter %.h,$^) -x86_64.h: mkheader.py $(ROOT)/arch-x86/xen-x86_64.h $(ROOT)/arch-x86/xen.h $(ROOT)/xen.h +x86_64.h: mkheader.py structs.py $(ROOT)/arch-x86/xen-x86_64.h $(ROOT)/arch-x86/xen.h $(ROOT)/xen.h python $< $* $@ $(filter %.h,$^) -ia64.h: mkheader.py $(ROOT)/arch-ia64.h $(ROOT)/xen.h +ia64.h: mkheader.py structs.py $(ROOT)/arch-ia64.h $(ROOT)/xen.h python $< $* $@ $(filter %.h,$^) -checker.c: mkchecker.py +checker.c: mkchecker.py structs.py python $< $@ $(architectures) diff --git a/tools/python/Makefile b/tools/python/Makefile index 16ab59f..3845ff7 100644 --- a/tools/python/Makefile +++ b/tools/python/Makefile @@ -25,7 +25,7 @@ build: buildpy refresh-pot refresh-po $(CATALOGS) # string, hence the sed black magic. This avoids the expensive # re-generation of .po files on every single build refresh-pot: $(I18NSRCFILES) - xgettext --default-domain=$(POPACAKGE) \ + xgettext --default-domain=$(POPACKAGE) \ --keyword=N_ \ --keyword=_ \ -o $(POTFILE)-tmp \ diff --git a/tools/python/xen/util/acmpolicy.py b/tools/python/xen/util/acmpolicy.py index 1a95c3a..6752169 100644 --- a/tools/python/xen/util/acmpolicy.py +++ b/tools/python/xen/util/acmpolicy.py @@ -17,12 +17,19 @@ #============================================================================ import os -import sha import stat import array import struct import shutil import commands + +# sha is deprecated as of python 2.6 +try: + from hashlib import sha1 +except ImportError: + # but hashlib was only added in python 2.5 + from sha import new as sha1 + from xml.dom import minidom, Node from xen.xend.XendLogging import log from xen.util import xsconstants, bootloader, mkdir @@ -1102,8 +1109,8 @@ class ACMPolicy(XSPolicy): return None def hash(self): - """ Calculate a SAH1 hash of the XML policy """ - return sha.sha(self.toxml()) + """ Calculate a SHA1 hash of the XML policy """ + return sha1(self.toxml()) def save(self): ### Save the XML policy into a file ### diff --git a/tools/python/xen/util/pci.py b/tools/python/xen/util/pci.py index a5eb407..6367640 100644 --- a/tools/python/xen/util/pci.py +++ b/tools/python/xen/util/pci.py @@ -140,6 +140,25 @@ def parse_pci_name(pci_name_string): return (domain, bus, slot, func) +def extract_the_exact_pci_names(pci_names): + result = [] + + if isinstance(pci_names, types.StringTypes): + pci_names = pci_names.split() + elif isinstance(pci_names, types.ListType): + pci_names = re.findall(PCI_DEV_REG_EXPRESS_STR, '%s' % pci_names) + else: + raise PciDeviceParseError('Invalid argument: %s' % pci_names) + + for pci in pci_names: + # The length of DDDD:bb:dd.f is 12. + if len(pci) != 12: + continue + if re.match(PCI_DEV_REG_EXPRESS_STR, pci) is None: + continue + result = result + [pci] + return result + def find_sysfs_mnt(): try: return utils.find_sysfs_mount() @@ -240,7 +259,7 @@ def find_all_devices_owned_by_pciback(): sysfs_mnt = find_sysfs_mnt() pciback_path = sysfs_mnt + SYSFS_PCIBACK_PATH pci_names = os.popen('ls ' + pciback_path).read() - pci_list = re.findall(PCI_DEV_REG_EXPRESS_STR, pci_names) + pci_list = extract_the_exact_pci_names(pci_names) dev_list = [] for pci in pci_list: (dom, b, d, f) = parse_pci_name(pci) @@ -352,7 +371,7 @@ class PciDeviceAssignmentError(Exception): def __init__(self,msg): self.message = msg def __str__(self): - return 'pci: impproper device assignment spcified: ' + \ + return 'pci: improper device assignment specified: ' + \ self.message class PciDevice: @@ -435,7 +454,7 @@ class PciDevice: sysfs_mnt = find_sysfs_mnt() self_path = sysfs_mnt + SYSFS_PCI_DEVS_PATH + '/' + self.name pci_names = os.popen('ls ' + self_path).read() - dev_list = re.findall(PCI_DEV_REG_EXPRESS_STR, pci_names) + dev_list = extract_the_exact_pci_names(pci_names) list = [self.name] for pci_str in dev_list: @@ -472,7 +491,7 @@ class PciDevice: return [self.name] dev_list = dev.find_all_devices_behind_the_bridge(ignore_bridge) - dev_list = re.findall(PCI_DEV_REG_EXPRESS_STR, '%s' % dev_list) + dev_list = extract_the_exact_pci_names(dev_list) return dev_list def do_secondary_bus_reset(self, target_bus, devs): @@ -556,10 +575,10 @@ class PciDevice: def find_all_the_multi_functions(self): sysfs_mnt = find_sysfs_mnt() - pci_names = os.popen('ls ' + sysfs_mnt + SYSFS_PCI_DEVS_PATH).read() - p = self.name - p = p[0 : p.rfind('.')] + '.[0-7]' - funcs = re.findall(p, pci_names) + parent = PCI_DEV_FORMAT_STR % self.find_parent() + pci_names = os.popen('ls ' + sysfs_mnt + SYSFS_PCI_DEVS_PATH + '/' + \ + parent + '/').read() + funcs = extract_the_exact_pci_names(pci_names) return funcs def find_coassigned_devices(self): diff --git a/tools/python/xen/web/connection.py b/tools/python/xen/web/connection.py index 3d335d2..17e1762 100644 --- a/tools/python/xen/web/connection.py +++ b/tools/python/xen/web/connection.py @@ -37,7 +37,7 @@ specifying what kind of socket they are. There are subclasses for TCP and unix-domain sockets (see tcp.py and unix.py). """ -BUFFER_SIZE = 1024 +BUFFER_SIZE = 16384 BACKLOG = 5 diff --git a/tools/python/xen/xend/XendAPI.py b/tools/python/xen/xend/XendAPI.py index 07e6b21..6dda3a9 100644 --- a/tools/python/xen/xend/XendAPI.py +++ b/tools/python/xen/xend/XendAPI.py @@ -18,7 +18,6 @@ import inspect import os import Queue -import sets import string import sys import traceback @@ -26,6 +25,12 @@ import threading import time import xmlrpclib +# sets is deprecated as of python 2.6, but set is unavailable in 2.3 +try: + set +except NameError: + from sets import Set as set + import XendDomain, XendDomainInfo, XendNode, XendDmesg import XendLogging, XendTaskManager, XendAPIStore @@ -119,16 +124,17 @@ event_registrations = {} def event_register(session, reg_classes): if session not in event_registrations: event_registrations[session] = { - 'classes' : sets.Set(), + 'classes' : set(), 'queue' : Queue.Queue(EVENT_QUEUE_LENGTH), 'next-id' : 1 } if not reg_classes: reg_classes = classes - if hasattr(set, 'union_update'): - event_registrations[session]['classes'].union_update(reg_classes) + sessionclasses = event_registrations[session]['classes'] + if hasattr(sessionclasses, 'union_update'): + sessionclasses.union_update(reg_classes) else: - event_registrations[session]['classes'].update(reg_classes) + sessionclasses.update(reg_classes) diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py index c4492f6..4b74189 100644 --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -687,6 +687,7 @@ class XendDomainInfo: # co-assignment devices hasn't been assigned, or has been assigned to # domN. coassignment_list = pci_device.find_coassigned_devices() + pci_device.devs_check_driver(coassignment_list) assigned_pci_device_str_list = self._get_assigned_pci_devices() for pci_str in coassignment_list: (domain, bus, dev, func) = parse_pci_name(pci_str) diff --git a/tools/python/xen/xend/server/pciif.py b/tools/python/xen/xend/server/pciif.py index 7820fd0..d203c14 100644 --- a/tools/python/xen/xend/server/pciif.py +++ b/tools/python/xen/xend/server/pciif.py @@ -397,6 +397,9 @@ class PciController(DevController): pci_str_list = pci_str_list + [pci_str] pci_dev_list = pci_dev_list + [(domain, bus, slot, func)] + if len(pci_str_list) != len(set(pci_str_list)): + raise VmError('pci: duplicate devices specified in guest config?') + for (domain, bus, slot, func) in pci_dev_list: try: dev = PciDevice(domain, bus, slot, func) @@ -413,6 +416,7 @@ class PciController(DevController): log.warn(err_msg % dev.name) else: funcs = dev.find_all_the_multi_functions() + dev.devs_check_driver(funcs) for f in funcs: if not f in pci_str_list: (f_dom, f_bus, f_slot, f_func) = parse_pci_name(f) @@ -440,6 +444,7 @@ class PciController(DevController): # Remove the element 0 which is a bridge del devs_str[0] + dev.devs_check_driver(devs_str) for s in devs_str: if not s in pci_str_list: (s_dom, s_bus, s_slot, s_func) = parse_pci_name(s) diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py index b663faf..78601fe 100644 --- a/tools/python/xen/xm/create.py +++ b/tools/python/xen/xm/create.py @@ -1395,7 +1395,13 @@ def do_console(domain_name): for i in range(10): # Catch failure of the create process time.sleep(1) - (p, rv) = os.waitpid(cpid, os.WNOHANG) + try: + (p, rv) = os.waitpid(cpid, os.WNOHANG) + except OSError: + # Domain has started cleanly and then exiting, + # the child process used to do this has detached + print("Domain has already finished"); + break if os.WIFEXITED(rv): if os.WEXITSTATUS(rv) != 0: sys.exit(os.WEXITSTATUS(rv)) diff --git a/tools/xenstat/libxenstat/src/xenstat_linux.c b/tools/xenstat/libxenstat/src/xenstat_linux.c index 2ac3ccf..5d0b976 100644 --- a/tools/xenstat/libxenstat/src/xenstat_linux.c +++ b/tools/xenstat/libxenstat/src/xenstat_linux.c @@ -31,7 +31,7 @@ #include "xenstat_priv.h" -#define SYSFS_VBD_PATH "/sys/devices/xen-backend/" +#define SYSFS_VBD_PATH "/sys/bus/xen-backend/devices" struct priv_data { FILE *procnetdev; diff --git a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h index eceac2a..fd69670 100644 --- a/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h +++ b/unmodified_drivers/linux-2.6/compat-include/xen/platform-compat.h @@ -157,4 +157,8 @@ typedef irqreturn_t (*irq_handler_t)(int, void *, struct pt_regs *); #define setup_xen_features xen_setup_features #endif +#ifndef atomic_cmpxchg +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) +#endif + #endif diff --git a/xen/Makefile b/xen/Makefile index 93374dc..b4dd12f 100644 --- a/xen/Makefile +++ b/xen/Makefile @@ -2,7 +2,7 @@ # All other places this is stored (eg. compile.h) should be autogenerated. export XEN_VERSION = 3 export XEN_SUBVERSION = 4 -export XEN_EXTRAVERSION ?= .0-rc4-pre$(XEN_VENDORVERSION) +export XEN_EXTRAVERSION ?= .1-rc7$(XEN_VENDORVERSION) export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) -include xen-version diff --git a/xen/arch/ia64/linux-xen/iosapic.c b/xen/arch/ia64/linux-xen/iosapic.c index 6d187f8..7807024 100644 --- a/xen/arch/ia64/linux-xen/iosapic.c +++ b/xen/arch/ia64/linux-xen/iosapic.c @@ -1275,4 +1275,22 @@ int iosapic_guest_write(unsigned long physbase, unsigned int reg, u32 val) spin_unlock_irqrestore(&irq_descp(vec)->lock, flags); return 0; } + +/* for vtd interrupt remapping. xen/drivers/vtd/intremap.c */ +int iosapic_get_nr_iosapics(void) +{ + int index; + + for (index = NR_IOSAPICS - 1; index >= 0; index--) { + if (iosapic_lists[index].addr) + break; + } + + return index + 1; +} + +int iosapic_get_nr_pins(int index) +{ + return iosapic_lists[index].num_rte; +} #endif /* XEN */ diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c index 0685194..4bab139 100644 --- a/xen/arch/x86/acpi/cpu_idle.c +++ b/xen/arch/x86/acpi/cpu_idle.c @@ -200,13 +200,8 @@ static void acpi_processor_idle(void) cpufreq_dbs_timer_suspend(); sched_tick_suspend(); - /* - * sched_tick_suspend may raise TIMER_SOFTIRQ by __stop_timer, - * which will break the later assumption of no sofirq pending, - * so add do_softirq - */ - if ( softirq_pending(smp_processor_id()) ) - do_softirq(); + /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */ + process_pending_timers(); /* * Interrupts must be disabled during bus mastering calculations and @@ -624,6 +619,7 @@ static int check_cx(struct acpi_processor_power *power, xen_processor_cx_t *cx) } static unsigned int latency_factor = 2; +integer_param("idle_latency_factor", latency_factor); static void set_cx( struct acpi_processor_power *acpi_power, diff --git a/xen/arch/x86/acpi/cpuidle_menu.c b/xen/arch/x86/acpi/cpuidle_menu.c index 683dc3b..27ed7f5 100644 --- a/xen/arch/x86/acpi/cpuidle_menu.c +++ b/xen/arch/x86/acpi/cpuidle_menu.c @@ -45,9 +45,15 @@ struct menu_device static DEFINE_PER_CPU(struct menu_device, menu_devices); -static s_time_t get_sleep_length_ns(void) +static unsigned int get_sleep_length_us(void) { - return per_cpu(timer_deadline, smp_processor_id()) - NOW(); + s_time_t us = (per_cpu(timer_deadline, smp_processor_id()) - NOW()) / 1000; + /* + * while us < 0 or us > (u32)-1, return a large u32, + * choose (unsigned int)-2000 to avoid wrapping while added with exit + * latency because the latency should not larger than 2ms + */ + return (us >> 32) ? (unsigned int)-2000 : (unsigned int)us; } static int menu_select(struct acpi_processor_power *power) @@ -56,7 +62,7 @@ static int menu_select(struct acpi_processor_power *power) int i; /* determine the expected residency time */ - data->expected_us = (u32) get_sleep_length_ns() / 1000; + data->expected_us = get_sleep_length_us(); /* find the deepest idle state that satisfies our constraints */ for ( i = 2; i < power->count; i++ ) diff --git a/xen/arch/x86/acpi/wakeup_prot.S b/xen/arch/x86/acpi/wakeup_prot.S index 8a91488..cf876ed 100644 --- a/xen/arch/x86/acpi/wakeup_prot.S +++ b/xen/arch/x86/acpi/wakeup_prot.S @@ -142,6 +142,12 @@ __ret_point: LOAD_GREG(sp) #if defined(__x86_64__) + /* Reload code selector */ + pushq $(__HYPERVISOR_CS64) + leaq 1f(%rip),%rax + pushq %rax + lretq +1: mov REF(saved_cr8), %rax mov %rax, %cr8 diff --git a/xen/arch/x86/boot/wakeup.S b/xen/arch/x86/boot/wakeup.S index cf40d8b..db36799 100644 --- a/xen/arch/x86/boot/wakeup.S +++ b/xen/arch/x86/boot/wakeup.S @@ -110,6 +110,7 @@ video_flags: .long 0 # Add offset for any reference to xen specific symbols wakeup_32: + /* Set up segment registers and initial stack for protected mode */ mov $BOOT_DS, %eax mov %eax, %ds mov %eax, %ss @@ -152,6 +153,7 @@ wakeup_32: wbinvd + /* Enable paging and flush prefetch queue */ mov $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */ mov %eax,%cr0 jmp 1f @@ -163,31 +165,22 @@ wakeup_32: ljmp $BOOT_CS64, $bootsym_phys(wakeup_64) .code64 - .align 8 - .word 0,0,0 -lgdt_descr: - .word LAST_RESERVED_GDT_BYTE - .quad boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE - wakeup_64: - lgdt lgdt_descr(%rip) - mov $(__HYPERVISOR_DS64), %eax - mov %eax, %ds - - # long jump to return point, with cs reload - rex64 ljmp *ret_point(%rip) + /* Jump to high mappings and the higher-level wakeup code. */ + movq ret_point(%rip), %rbx + jmp *%rbx - .align 8 ret_point: .quad __ret_point - .word __HYPERVISOR_CS64 #else /* !defined(__x86_64__) */ + lgdt gdt_descr mov $(__HYPERVISOR_DS), %eax mov %eax, %ds ljmp $(__HYPERVISOR_CS), $__ret_point + #endif bogus_saved_magic: diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c index f2ff228..b80ff8c 100644 --- a/xen/arch/x86/cpu/amd.c +++ b/xen/arch/x86/cpu/amd.c @@ -12,7 +12,7 @@ #include "cpu.h" #include "amd.h" -int start_svm(struct cpuinfo_x86 *c); +void start_svm(struct cpuinfo_x86 *c); /* * Pre-canned values for overriding the CPUID features diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile index f950f1d..7d8fad6 100644 --- a/xen/arch/x86/hvm/Makefile +++ b/xen/arch/x86/hvm/Makefile @@ -9,6 +9,7 @@ obj-y += io.o obj-y += irq.o obj-y += mtrr.o obj-y += pmtimer.o +obj-y += quirks.o obj-y += rtc.o obj-y += hpet.o obj-y += vpt.o diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 000a70d..42652ca 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +71,8 @@ unsigned long __attribute__ ((__section__ (".bss.page_aligned"))) void hvm_enable(struct hvm_function_table *fns) { + extern int hvm_port80_allowed; + BUG_ON(hvm_enabled); printk("HVM: %s enabled\n", fns->name); @@ -78,7 +81,8 @@ void hvm_enable(struct hvm_function_table *fns) * often used for I/O delays, but the vmexits simply slow things down). */ memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap)); - __clear_bit(0x80, hvm_io_bitmap); + if ( hvm_port80_allowed ) + __clear_bit(0x80, hvm_io_bitmap); __clear_bit(0xed, hvm_io_bitmap); hvm_funcs = *fns; @@ -511,11 +515,13 @@ static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) vc = &v->arch.guest_context; /* Need to init this vcpu before loading its contents */ + rc = 0; domain_lock(d); if ( !v->is_initialised ) - if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 ) - return rc; + rc = boot_vcpu(d, vcpuid, vc); domain_unlock(d); + if ( rc != 0 ) + return rc; if ( hvm_load_entry(CPU, h, &ctxt) != 0 ) return -EINVAL; @@ -659,8 +665,9 @@ static int hvm_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h) v->fpu_initialised = 1; /* Auxiliary processors should be woken immediately. */ - if ( test_and_clear_bit(_VPF_down, &v->pause_flags) ) - vcpu_wake(v); + v->is_initialised = 1; + clear_bit(_VPF_down, &v->pause_flags); + vcpu_wake(v); return 0; } @@ -2277,7 +2284,7 @@ static void hvm_s3_suspend(struct domain *d) vpic_reset(d); vioapic_reset(d); pit_reset(d); - rtc_reset(d); + rtc_reset(d); pmtimer_reset(d); hpet_reset(d); @@ -2635,12 +2642,13 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) goto param_fail2; rc = -EINVAL; - if ( !shadow_mode_enabled(d)) - goto param_fail2; if ( d->vcpu[0] == NULL ) goto param_fail2; - rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap); + if ( shadow_mode_enabled(d) ) + rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap); + else + rc = hap_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap); param_fail2: rcu_unlock_domain(d); diff --git a/xen/arch/x86/hvm/i8254.c b/xen/arch/x86/hvm/i8254.c index 2babc2e..df923bf 100644 --- a/xen/arch/x86/hvm/i8254.c +++ b/xen/arch/x86/hvm/i8254.c @@ -38,11 +38,11 @@ #include #include -#define domain_vpit(d) (&(d)->arch.hvm_domain.pl_time.vpit) -#define vcpu_vpit(vcpu) (domain_vpit((vcpu)->domain)) -#define vpit_domain(pit) (container_of((pit), struct domain, \ - arch.hvm_domain.pl_time.vpit)) -#define vpit_vcpu(pit) (vpit_domain(pit)->vcpu[0]) +#define domain_vpit(x) (&(x)->arch.hvm_domain.pl_time.vpit) +#define vcpu_vpit(x) (domain_vpit((x)->domain)) +#define vpit_domain(x) (container_of((x), struct domain, \ + arch.hvm_domain.pl_time.vpit)) +#define vpit_vcpu(x) (vpit_domain(x)->vcpu[0]) #define RW_STATE_LSB 1 #define RW_STATE_MSB 2 diff --git a/xen/arch/x86/hvm/quirks.c b/xen/arch/x86/hvm/quirks.c new file mode 100644 index 0000000..1e08a2b --- /dev/null +++ b/xen/arch/x86/hvm/quirks.c @@ -0,0 +1,98 @@ +/****************************************************************************** + * x86/hvm/quirks.c + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +int hvm_port80_allowed = -1; +boolean_param("hvm_port80", hvm_port80_allowed); + +static int __init dmi_hvm_deny_port80(/*const*/ struct dmi_system_id *id) +{ + printk(XENLOG_WARNING "%s: port 0x80 access %s allowed for HVM guests\n", + id->ident, hvm_port80_allowed > 0 ? "forcibly" : "not"); + + if ( hvm_port80_allowed < 0 ) + hvm_port80_allowed = 0; + + return 0; +} + +static int __init check_port80(void) +{ + /* + * Quirk table for systems that misbehave (lock up, etc.) if port + * 0x80 is used: + */ + static struct dmi_system_id __initdata hvm_no_port80_dmi_table[] = + { + { + .callback = dmi_hvm_deny_port80, + .ident = "Compaq Presario V6000", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30B7") + } + }, + { + .callback = dmi_hvm_deny_port80, + .ident = "HP Pavilion dv9000z", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30B9") + } + }, + { + .callback = dmi_hvm_deny_port80, + .ident = "HP Pavilion dv6000", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30B8") + } + }, + { + .callback = dmi_hvm_deny_port80, + .ident = "HP Pavilion tx1000", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30BF") + } + }, + { + .callback = dmi_hvm_deny_port80, + .ident = "Presario F700", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), + DMI_MATCH(DMI_BOARD_NAME, "30D3") + } + }, + { } + }; + + dmi_check_system(hvm_no_port80_dmi_table); + + if ( !hvm_port80_allowed ) + __set_bit(0x80, hvm_io_bitmap); + + return 0; +} +__initcall(check_port80); diff --git a/xen/arch/x86/hvm/rtc.c b/xen/arch/x86/hvm/rtc.c index 9ab6903..48dd459 100644 --- a/xen/arch/x86/hvm/rtc.c +++ b/xen/arch/x86/hvm/rtc.c @@ -28,11 +28,11 @@ #include #include -#define domain_vrtc(d) (&(d)->arch.hvm_domain.pl_time.vrtc) -#define vcpu_vrtc(vcpu) (domain_vrtc((vcpu)->domain)) -#define vrtc_domain(rtc) (container_of((rtc), struct domain, \ - arch.hvm_domain.pl_time.vrtc)) -#define vrtc_vcpu(rtc) (vrtc_domain(rtc)->vcpu[0]) +#define domain_vrtc(x) (&(x)->arch.hvm_domain.pl_time.vrtc) +#define vcpu_vrtc(x) (domain_vrtc((x)->domain)) +#define vrtc_domain(x) (container_of((x), struct domain, \ + arch.hvm_domain.pl_time.vrtc)) +#define vrtc_vcpu(x) (vrtc_domain(x)->vcpu[0]) static void rtc_periodic_cb(struct vcpu *v, void *opaque) { diff --git a/xen/arch/x86/hvm/svm/asid.c b/xen/arch/x86/hvm/svm/asid.c index 57477aa..bca0d2a 100644 --- a/xen/arch/x86/hvm/svm/asid.c +++ b/xen/arch/x86/hvm/svm/asid.c @@ -61,6 +61,7 @@ struct svm_asid_data { u32 next_asid; u32 max_asid; u32 erratum170:1; + u32 initialised:1; }; static DEFINE_PER_CPU(struct svm_asid_data, svm_asid_data); @@ -70,7 +71,7 @@ static DEFINE_PER_CPU(struct svm_asid_data, svm_asid_data); */ static struct svm_asid_data *svm_asid_core_data(void) { - return &get_cpu_var(svm_asid_data); + return &this_cpu(svm_asid_data); } /* @@ -81,6 +82,15 @@ void svm_asid_init(struct cpuinfo_x86 *c) int nasids; struct svm_asid_data *data = svm_asid_core_data(); + /* + * If already initialised, we just bump the generation to force a TLB + * flush. Resetting the generation could be dangerous, if VCPUs still + * exist that reference earlier generations on this CPU. + */ + if ( data->initialised ) + return svm_asid_inc_generation(); + data->initialised = 1; + /* Find #ASID. */ nasids = cpuid_ebx(0x8000000A); data->max_asid = nasids - 1; diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index a70f0ff..2b8428c 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -838,45 +838,66 @@ static struct hvm_function_table svm_function_table = { .invlpg_intercept = svm_invlpg_intercept }; -int start_svm(struct cpuinfo_x86 *c) +static int svm_cpu_up(struct cpuinfo_x86 *c) { - u32 eax, ecx, edx; - u32 phys_hsa_lo, phys_hsa_hi; + u32 eax, edx, phys_hsa_lo, phys_hsa_hi; u64 phys_hsa; int cpu = smp_processor_id(); - /* Xen does not fill x86_capability words except 0. */ - ecx = cpuid_ecx(0x80000001); - boot_cpu_data.x86_capability[5] = ecx; - - if ( !(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)) ) - return 0; - /* Check whether SVM feature is disabled in BIOS */ rdmsr(MSR_K8_VM_CR, eax, edx); if ( eax & K8_VMCR_SVME_DISABLE ) { - printk("AMD SVM Extension is disabled in BIOS.\n"); + printk("CPU%d: AMD SVM Extension is disabled in BIOS.\n", cpu); return 0; } - if ( ((hsa[cpu] = alloc_host_save_area()) == NULL) || - ((root_vmcb[cpu] = alloc_vmcb()) == NULL) ) + if ( ((hsa[cpu] == NULL) && + ((hsa[cpu] = alloc_host_save_area()) == NULL)) || + ((root_vmcb[cpu] == NULL) && + ((root_vmcb[cpu] = alloc_vmcb()) == NULL)) ) return 0; write_efer(read_efer() | EFER_SVME); /* Initialize the HSA for this core. */ - phys_hsa = (u64) virt_to_maddr(hsa[cpu]); - phys_hsa_lo = (u32) phys_hsa; - phys_hsa_hi = (u32) (phys_hsa >> 32); + phys_hsa = (u64)virt_to_maddr(hsa[cpu]); + phys_hsa_lo = (u32)phys_hsa; + phys_hsa_hi = (u32)(phys_hsa >> 32); wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi); /* Initialize core's ASID handling. */ svm_asid_init(c); - if ( cpu != 0 ) - return 1; + return 1; +} + +void start_svm(struct cpuinfo_x86 *c) +{ + static bool_t bootstrapped; + + if ( test_and_set_bool(bootstrapped) ) + { + if ( hvm_enabled && !svm_cpu_up(c) ) + { + printk("SVM: FATAL: failed to initialise CPU%d!\n", + smp_processor_id()); + BUG(); + } + return; + } + + /* Xen does not fill x86_capability words except 0. */ + boot_cpu_data.x86_capability[5] = cpuid_ecx(0x80000001); + + if ( !test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability) ) + return; + + if ( !svm_cpu_up(c) ) + { + printk("SVM: failed to initialise.\n"); + return; + } setup_vmcb_dump(); @@ -886,8 +907,6 @@ int start_svm(struct cpuinfo_x86 *c) svm_function_table.hap_supported = cpu_has_svm_npt; hvm_enable(&svm_function_table); - - return 1; } static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs) diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c index a18ea9e..ca4f224 100644 --- a/xen/arch/x86/hvm/viridian.c +++ b/xen/arch/x86/hvm/viridian.c @@ -22,6 +22,7 @@ #define VIRIDIAN_MSR_EOI 0x40000070 #define VIRIDIAN_MSR_ICR 0x40000071 #define VIRIDIAN_MSR_TPR 0x40000072 +#define VIRIDIAN_MSR_APIC_ASSIST 0x40000073 /* Viridian Hypercall Status Codes. */ #define HV_STATUS_SUCCESS 0x0000 @@ -49,14 +50,14 @@ int cpuid_viridian_leaves(unsigned int leaf, unsigned int *eax, return 0; leaf -= 0x40000000; - if ( leaf > 5 ) + if ( leaf > 6 ) return 0; *eax = *ebx = *ecx = *edx = 0; switch ( leaf ) { case 0: - *eax = 0x40000005; /* Maximum leaf */ + *eax = 0x40000006; /* Maximum leaf */ *ebx = 0x7263694d; /* Magic numbers */ *ecx = 0x666F736F; *edx = 0x76482074; @@ -192,6 +193,30 @@ int wrmsr_viridian_regs(uint32_t idx, uint32_t eax, uint32_t edx) vlapic_set_reg(vcpu_vlapic(current), APIC_TASKPRI, eax & 0xff); break; + case VIRIDIAN_MSR_APIC_ASSIST: + /* + * We don't support the APIC assist page, and that fact is reflected in + * our CPUID flags. However, Windows 7 build 7000 has a bug which means + * that it doesn't recognise that, and tries to use the page anyway. We + * therefore have to fake up just enough to keep win7 happy. + * Fortunately, that's really easy: just setting the first four bytes + * in the page to zero effectively disables the page again, so that's + * what we do. Semantically, the first four bytes are supposed to be a + * flag saying whether the guest really needs to issue an EOI. Setting + * that flag to zero means that it must always issue one, which is what + * we want. Once a page has been repurposed as an APIC assist page the + * guest isn't allowed to set anything in it, so the flag remains zero + * and all is fine. The guest is allowed to clear flags in the page, + * but that doesn't cause us any problems. + */ + if ( val & 1 ) /* APIC assist page enabled? */ + { + uint32_t word = 0; + paddr_t page_start = val & ~1ul; + hvm_copy_to_guest_phys(page_start, &word, sizeof(word)); + } + break; + default: return 0; } diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c index 68e9b27..98684d4 100644 --- a/xen/arch/x86/hvm/vlapic.c +++ b/xen/arch/x86/hvm/vlapic.c @@ -460,7 +460,7 @@ int vlapic_ipi( if ( delivery_mode == APIC_DM_LOWEST ) { - target = apic_lowest_prio(vlapic_domain(v), lpr_map); + target = apic_lowest_prio(vlapic_domain(vlapic), lpr_map); if ( target != NULL ) rc = vlapic_accept_irq(vlapic_vcpu(target), delivery_mode, vector, level, trig_mode); diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c index 37c1e5c..e6bc057 100644 --- a/xen/arch/x86/hvm/vmsi.c +++ b/xen/arch/x86/hvm/vmsi.c @@ -290,7 +290,7 @@ static int msixtbl_write(struct vcpu *v, unsigned long address, goto out; entry = msixtbl_find_entry(v, address); - nr_entry = (address - entry->gtable) % PCI_MSIX_ENTRY_SIZE; + nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE; offset = address & (PCI_MSIX_ENTRY_SIZE - 1); if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 64d9f39..23649a3 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1398,11 +1398,11 @@ static unsigned long *vpid_bitmap; void start_vmx(void) { - static int bootstrapped; + static bool_t bootstrapped; vmx_save_host_msrs(); - if ( bootstrapped ) + if ( test_and_set_bool(bootstrapped) ) { if ( hvm_enabled && !vmx_cpu_up() ) { @@ -1413,8 +1413,6 @@ void start_vmx(void) return; } - bootstrapped = 1; - /* Xen does not fill x86_capability words except 0. */ boot_cpu_data.x86_capability[4] = cpuid_ecx(1); @@ -2220,7 +2218,7 @@ static void vmx_failed_vmentry(unsigned int exit_reason, case EXIT_REASON_MSR_LOADING: printk("caused by MSR entry %ld loading.\n", exit_qualification); break; - case EXIT_REASON_MACHINE_CHECK: + case EXIT_REASON_MCE_DURING_VMENTRY: printk("caused by machine check.\n"); HVMTRACE_0D(MCE); do_machine_check(regs); @@ -2308,7 +2306,7 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs) case EXIT_REASON_SIPI: case EXIT_REASON_PENDING_VIRT_INTR: case EXIT_REASON_PENDING_VIRT_NMI: - case EXIT_REASON_MACHINE_CHECK: + case EXIT_REASON_MCE_DURING_VMENTRY: break; default: v->arch.hvm_vmx.vmx_emulate = 1; diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c index 8c790cf..e2202ba 100644 --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -82,9 +82,9 @@ int assign_irq_vector(int irq) spin_lock(&vector_lock); - if ((irq != AUTO_ASSIGN_IRQ) && (IO_APIC_VECTOR(irq) > 0)) { + if ((irq != AUTO_ASSIGN_IRQ) && (irq_to_vector(irq) > 0)) { spin_unlock(&vector_lock); - return IO_APIC_VECTOR(irq); + return irq_to_vector(irq); } vector = current_vector; diff --git a/xen/arch/x86/machine_kexec.c b/xen/arch/x86/machine_kexec.c index 4d15e4b..8452b8a 100644 --- a/xen/arch/x86/machine_kexec.c +++ b/xen/arch/x86/machine_kexec.c @@ -115,6 +115,18 @@ void machine_reboot_kexec(xen_kexec_image_t *image) void machine_kexec(xen_kexec_image_t *image) { + struct desc_ptr gdt_desc = { + .base = (unsigned long)(boot_cpu_gdt_table - FIRST_RESERVED_GDT_ENTRY), + .limit = LAST_RESERVED_GDT_BYTE + }; + + /* + * compat_machine_kexec() returns to idle pagetables, which requires us + * to be running on a static GDT mapping (idle pagetables have no GDT + * mappings in their per-domain mapping area). + */ + asm volatile ( "lgdt %0" : : "m" (gdt_desc) ); + #ifdef CONFIG_COMPAT if ( is_pv_32on64_domain(dom0) ) { diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index d11c2a1..32fe02e 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -702,66 +702,69 @@ int is_iomem_page(unsigned long mfn) return (page_get_owner(page) == dom_io); } - int get_page_from_l1e( - l1_pgentry_t l1e, struct domain *d) + l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner) { unsigned long mfn = l1e_get_pfn(l1e); struct page_info *page = mfn_to_page(mfn); uint32_t l1f = l1e_get_flags(l1e); struct vcpu *curr = current; - struct domain *owner; + struct domain *real_pg_owner; if ( !(l1f & _PAGE_PRESENT) ) return 1; - if ( unlikely(l1f & l1_disallow_mask(d)) ) + if ( unlikely(l1f & l1_disallow_mask(l1e_owner)) ) { - MEM_LOG("Bad L1 flags %x", l1f & l1_disallow_mask(d)); + MEM_LOG("Bad L1 flags %x", l1f & l1_disallow_mask(l1e_owner)); return 0; } if ( !mfn_valid(mfn) || - (owner = page_get_owner_and_reference(page)) == dom_io ) + (real_pg_owner = page_get_owner_and_reference(page)) == dom_io ) { /* Only needed the reference to confirm dom_io ownership. */ if ( mfn_valid(mfn) ) put_page(page); /* DOMID_IO reverts to caller for privilege checks. */ - if ( d == dom_io ) - d = curr->domain; + if ( pg_owner == dom_io ) + pg_owner = curr->domain; - if ( !iomem_access_permitted(d, mfn, mfn) ) + if ( !iomem_access_permitted(pg_owner, mfn, mfn) ) { if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */ MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx", - d->domain_id, mfn); + pg_owner->domain_id, mfn); return 0; } return 1; } - if ( owner == NULL ) + if ( real_pg_owner == NULL ) goto could_not_pin; - /* - * Let privileged domains transfer the right to map their target - * domain's pages. This is used to allow stub-domain pvfb export to dom0, - * until pvfb supports granted mappings. At that time this minor hack - * can go away. - */ - if ( unlikely(d != owner) && (d != curr->domain) && IS_PRIV_FOR(d, owner) ) - d = owner; + if ( unlikely(real_pg_owner != pg_owner) ) + { + /* + * Let privileged domains transfer the right to map their target + * domain's pages. This is used to allow stub-domain pvfb export to + * dom0, until pvfb supports granted mappings. At that time this + * minor hack can go away. + */ + if ( (pg_owner == l1e_owner) || !IS_PRIV_FOR(pg_owner, real_pg_owner) ) + goto could_not_pin; + pg_owner = real_pg_owner; + } /* Foreign mappings into guests in shadow external mode don't * contribute to writeable mapping refcounts. (This allows the * qemu-dm helper process in dom0 to map the domain's memory without * messing up the count of "real" writable mappings.) */ if ( (l1f & _PAGE_RW) && - !(paging_mode_external(d) && (d != curr->domain)) && + ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) && !get_page_type(page, PGT_writable_page) ) goto could_not_pin; @@ -774,8 +777,7 @@ get_page_from_l1e( if ( is_xen_heap_page(page) ) { if ( (l1f & _PAGE_RW) && - !(unlikely(paging_mode_external(d) && - (d != curr->domain))) ) + ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) ) put_page_type(page); put_page(page); MEM_LOG("Attempt to change cache attributes of Xen heap page"); @@ -799,10 +801,10 @@ get_page_from_l1e( could_not_pin: MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte - " for dom%d", + " for l1e_owner=%d, pg_owner=%d", mfn, get_gpfn_from_mfn(mfn), - l1e_get_intpte(l1e), d->domain_id); - if ( owner != NULL ) + l1e_get_intpte(l1e), l1e_owner->domain_id, pg_owner->domain_id); + if ( real_pg_owner != NULL ) put_page(page); return 0; } @@ -981,19 +983,18 @@ get_page_from_l4e( #define unadjust_guest_l3e(_p, _d) ((void)(_d)) #endif -void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) +void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner) { unsigned long pfn = l1e_get_pfn(l1e); struct page_info *page; - struct domain *e; + struct domain *pg_owner; struct vcpu *v; if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || is_iomem_page(pfn) ) return; page = mfn_to_page(pfn); - - e = page_get_owner(page); + pg_owner = page_get_owner(page); /* * Check if this is a mapping that was established via a grant reference. @@ -1009,17 +1010,17 @@ void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) * Xen. All active grants can safely be cleaned up when the domain dies.) */ if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) && - !d->is_shutting_down && !d->is_dying ) + !l1e_owner->is_shutting_down && !l1e_owner->is_dying ) { MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte, l1e_get_intpte(l1e)); - domain_crash(d); + domain_crash(l1e_owner); } /* Remember we didn't take a type-count of foreign writable mappings * to paging-external domains */ if ( (l1e_get_flags(l1e) & _PAGE_RW) && - !(unlikely((e != d) && paging_mode_external(e))) ) + ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) ) { put_page_and_type(page); } @@ -1029,9 +1030,9 @@ void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) == PGT_seg_desc_page)) && unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) && - (d == e) ) + (l1e_owner == pg_owner) ) { - for_each_vcpu ( d, v ) + for_each_vcpu ( pg_owner, v ) invalidate_shadow_ldt(v, 1); } put_page(page); @@ -1122,7 +1123,7 @@ static int alloc_l1_table(struct page_info *page) for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) { if ( is_guest_l1_slot(i) && - unlikely(!get_page_from_l1e(pl1e[i], d)) ) + unlikely(!get_page_from_l1e(pl1e[i], d, d)) ) goto fail; adjust_guest_l1e(pl1e[i], d); @@ -1701,7 +1702,7 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, return rc; } - if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) + if ( unlikely(!get_page_from_l1e(nl1e, d, FOREIGNDOM)) ) return 0; adjust_guest_l1e(nl1e, d); @@ -4175,7 +4176,7 @@ static int ptwr_emulated_update( /* Check the new PTE. */ nl1e = l1e_from_intpte(val); - if ( unlikely(!get_page_from_l1e(nl1e, d)) ) + if ( unlikely(!get_page_from_l1e(nl1e, d, d)) ) { if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c index b7615f0..14ede71 100644 --- a/xen/arch/x86/mm/hap/hap.c +++ b/xen/arch/x86/mm/hap/hap.c @@ -51,9 +51,141 @@ #undef page_to_mfn #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) +/************************************************/ +/* HAP VRAM TRACKING SUPPORT */ +/************************************************/ + +int hap_enable_vram_tracking(struct domain *d) +{ + int i; + + if ( !d->dirty_vram ) + return -EINVAL; + + /* turn on PG_log_dirty bit in paging mode */ + hap_lock(d); + d->arch.paging.mode |= PG_log_dirty; + hap_unlock(d); + + /* set l1e entries of P2M table to be read-only. */ + for (i = d->dirty_vram->begin_pfn; i < d->dirty_vram->end_pfn; i++) + p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty); + + flush_tlb_mask(d->domain_dirty_cpumask); + return 0; +} + +int hap_disable_vram_tracking(struct domain *d) +{ + int i; + + if ( !d->dirty_vram ) + return -EINVAL; + + hap_lock(d); + d->arch.paging.mode &= ~PG_log_dirty; + hap_unlock(d); + + /* set l1e entries of P2M table with normal mode */ + for (i = d->dirty_vram->begin_pfn; i < d->dirty_vram->end_pfn; i++) + p2m_change_type(d, i, p2m_ram_logdirty, p2m_ram_rw); + + flush_tlb_mask(d->domain_dirty_cpumask); + return 0; +} + +void hap_clean_vram_tracking(struct domain *d) +{ + int i; + + if ( !d->dirty_vram ) + return; + + /* set l1e entries of P2M table to be read-only. */ + for (i = d->dirty_vram->begin_pfn; i < d->dirty_vram->end_pfn; i++) + p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty); + + flush_tlb_mask(d->domain_dirty_cpumask); +} + +void hap_vram_tracking_init(struct domain *d) +{ + paging_log_dirty_init(d, hap_enable_vram_tracking, + hap_disable_vram_tracking, + hap_clean_vram_tracking); +} + +int hap_track_dirty_vram(struct domain *d, + unsigned long begin_pfn, + unsigned long nr, + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap) +{ + long rc = 0; + + if ( nr ) + { + if ( paging_mode_log_dirty(d) && d->dirty_vram ) + { + if ( begin_pfn != d->dirty_vram->begin_pfn || + begin_pfn + nr != d->dirty_vram->end_pfn ) + { + paging_log_dirty_disable(d); + d->dirty_vram->begin_pfn = begin_pfn; + d->dirty_vram->end_pfn = begin_pfn + nr; + rc = paging_log_dirty_enable(d); + if (rc != 0) + goto param_fail; + } + } + else if ( !paging_mode_log_dirty(d) && !d->dirty_vram ) + { + rc -ENOMEM; + if ( (d->dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL ) + goto param_fail; + + d->dirty_vram->begin_pfn = begin_pfn; + d->dirty_vram->end_pfn = begin_pfn + nr; + hap_vram_tracking_init(d); + rc = paging_log_dirty_enable(d); + if (rc != 0) + goto param_fail; + } + else + { + if ( !paging_mode_log_dirty(d) && d->dirty_vram ) + rc = -EINVAL; + else + rc = -ENODATA; + goto param_fail; + } + /* get the bitmap */ + rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap); + } + else + { + if ( paging_mode_log_dirty(d) && d->dirty_vram ) { + rc = paging_log_dirty_disable(d); + xfree(d->dirty_vram); + d->dirty_vram = NULL; + } else + rc = 0; + } + + return rc; + +param_fail: + if ( d->dirty_vram ) + { + xfree(d->dirty_vram); + d->dirty_vram = NULL; + } + return rc; +} + /************************************************/ /* HAP LOG DIRTY SUPPORT */ /************************************************/ + /* hap code to call when log_dirty is enable. return 0 if no problem found. */ int hap_enable_log_dirty(struct domain *d) { @@ -86,6 +218,21 @@ void hap_clean_dirty_bitmap(struct domain *d) flush_tlb_mask(d->domain_dirty_cpumask); } +void hap_logdirty_init(struct domain *d) +{ + if ( paging_mode_log_dirty(d) && d->dirty_vram ) + { + paging_log_dirty_disable(d); + xfree(d->dirty_vram); + d->dirty_vram = NULL; + } + + /* Reinitialize logdirty mechanism */ + paging_log_dirty_init(d, hap_enable_log_dirty, + hap_disable_log_dirty, + hap_clean_dirty_bitmap); +} + /************************************************/ /* HAP SUPPORT FUNCTIONS */ /************************************************/ @@ -390,10 +537,6 @@ void hap_domain_init(struct domain *d) { hap_lock_init(d); INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist); - - /* This domain will use HAP for log-dirty mode */ - paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty, - hap_clean_dirty_bitmap); } /* return 0 for success, -errno for failure */ diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c index e9313e1..38f1202 100644 --- a/xen/arch/x86/mm/paging.c +++ b/xen/arch/x86/mm/paging.c @@ -453,6 +453,157 @@ int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc) return rv; } +int paging_log_dirty_range(struct domain *d, + unsigned long begin_pfn, + unsigned long nr, + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap) +{ + int rv = 0; + unsigned long pages = 0; + mfn_t *l4, *l3, *l2; + unsigned long *l1; + int b1, b2, b3, b4; + int i2, i3, i4; + + d->arch.paging.log_dirty.clean_dirty_bitmap(d); + log_dirty_lock(d); + + PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n", + d->domain_id, + d->arch.paging.log_dirty.fault_count, + d->arch.paging.log_dirty.dirty_count); + + if ( !mfn_valid(d->arch.paging.log_dirty.top) ) + { + rv = -EINVAL; /* perhaps should be ENOMEM? */ + goto out; + } + + if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) { + printk("%s: %d failed page allocs while logging dirty pages\n", + __FUNCTION__, d->arch.paging.log_dirty.failed_allocs); + rv = -ENOMEM; + goto out; + } + + if ( !d->arch.paging.log_dirty.fault_count && + !d->arch.paging.log_dirty.dirty_count ) { + int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG; + unsigned long zeroes[size]; + memset(zeroes, 0x00, size * BYTES_PER_LONG); + rv = 0; + if ( copy_to_guest_offset(dirty_bitmap, 0, (uint8_t *) zeroes, + size * BYTES_PER_LONG) != 0 ) + rv = -EFAULT; + goto out; + } + d->arch.paging.log_dirty.fault_count = 0; + d->arch.paging.log_dirty.dirty_count = 0; + + b1 = L1_LOGDIRTY_IDX(begin_pfn); + b2 = L2_LOGDIRTY_IDX(begin_pfn); + b3 = L3_LOGDIRTY_IDX(begin_pfn); + b4 = L4_LOGDIRTY_IDX(begin_pfn); + l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top)); + + for ( i4 = b4; + (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES); + i4++ ) + { + l3 = mfn_valid(l4[i4]) ? map_domain_page(mfn_x(l4[i4])) : NULL; + for ( i3 = b3; + (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES); + i3++ ) + { + l2 = ((l3 && mfn_valid(l3[i3])) ? + map_domain_page(mfn_x(l3[i3])) : NULL); + for ( i2 = b2; + (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES); + i2++ ) + { + static unsigned long zeroes[PAGE_SIZE/BYTES_PER_LONG]; + unsigned int bytes = PAGE_SIZE; + uint8_t *s; + l1 = ((l2 && mfn_valid(l2[i2])) ? + map_domain_page(mfn_x(l2[i2])) : zeroes); + + s = ((uint8_t*)l1) + (b1 >> 3); + bytes -= b1 >> 3; + + if ( likely(((nr - pages + 7) >> 3) < bytes) ) + bytes = (unsigned int)((nr - pages + 7) >> 3); + + /* begin_pfn is not 32K aligned, hence we have to bit + * shift the bitmap */ + if ( b1 & 0x7 ) + { + int i, j; + uint32_t *l = (uint32_t*) s; + int bits = b1 & 0x7; + int bitmask = (1 << bits) - 1; + int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG; + unsigned long bitmap[size]; + static unsigned long printed = 0; + + if ( printed != begin_pfn ) + { + dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n", + __FUNCTION__, begin_pfn); + printed = begin_pfn; + } + + for ( i = 0; i < size - 1; i++, l++ ) { + bitmap[i] = ((*l) >> bits) | + (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits)); + } + s = (uint8_t*) l; + size = BYTES_PER_LONG - ((b1 >> 3) & 0x3); + bitmap[i] = 0; + for ( j = 0; j < size; j++, s++ ) + bitmap[i] |= (*s) << (j * 8); + bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits)); + if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3), + (uint8_t*) bitmap, bytes) != 0 ) + { + rv = -EFAULT; + goto out; + } + } + else + { + if ( copy_to_guest_offset(dirty_bitmap, pages >> 3, + s, bytes) != 0 ) + { + rv = -EFAULT; + goto out; + } + } + + if ( l1 != zeroes ) + clear_page(l1); + pages += bytes << 3; + if ( l1 != zeroes ) + unmap_domain_page(l1); + b1 = b1 & 0x7; + } + b2 = 0; + if ( l2 ) + unmap_domain_page(l2); + } + b3 = 0; + if ( l3 ) + unmap_domain_page(l3); + } + unmap_domain_page(l4); + + log_dirty_unlock(d); + + return rv; + + out: + log_dirty_unlock(d); + return rv; +} /* Note that this function takes three function pointers. Callers must supply * these functions for log dirty code to call. This function usually is @@ -554,11 +705,17 @@ int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, switch ( sc->op ) { case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY: + if ( hap_enabled(d) ) + hap_logdirty_init(d); return paging_log_dirty_enable(d); case XEN_DOMCTL_SHADOW_OP_ENABLE: if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY ) + { + if ( hap_enabled(d) ) + hap_logdirty_init(d); return paging_log_dirty_enable(d); + } case XEN_DOMCTL_SHADOW_OP_OFF: if ( paging_mode_log_dirty(d) ) diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c index 072dda0..b8cd752 100644 --- a/xen/arch/x86/mm/shadow/multi.c +++ b/xen/arch/x86/mm/shadow/multi.c @@ -493,11 +493,13 @@ _sh_propagate(struct vcpu *v, if ( unlikely(!(gflags & _PAGE_PRESENT)) ) { +#if !(SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) /* If a guest l1 entry is not present, shadow with the magic * guest-not-present entry. */ if ( level == 1 ) *sp = sh_l1e_gnp(); else +#endif /* !OOS */ *sp = shadow_l1e_empty(); goto done; } @@ -816,7 +818,7 @@ shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d) if ( !shadow_mode_refcounts(d) ) return 1; - res = get_page_from_l1e(sl1e, d); + res = get_page_from_l1e(sl1e, d, d); // If a privileged domain is attempting to install a map of a page it does // not own, we let it succeed anyway. @@ -828,7 +830,7 @@ shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d) (d != owner) && IS_PRIV_FOR(d, owner)) { - res = get_page_from_l1e(sl1e, owner); + res = get_page_from_l1e(sl1e, d, owner); SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx " "which is owned by domain %d: %s\n", d->domain_id, mfn_x(mfn), owner->domain_id, @@ -1765,7 +1767,7 @@ static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v, if ( r & SHADOW_SET_ERROR ) return NULL; -#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC ) +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC ) *resync |= 1; #endif @@ -1820,7 +1822,7 @@ static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v, if ( r & SHADOW_SET_ERROR ) return NULL; -#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC ) +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC ) *resync |= 1; #endif @@ -1917,7 +1919,7 @@ static shadow_l1e_t * shadow_get_and_create_l1e(struct vcpu *v, (void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va)); } -#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC ) +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC ) /* All pages walked are now pagetables. Safe to resync pages in case level 4 or 3 shadows were set. */ if ( resync ) @@ -2199,7 +2201,7 @@ static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se) else if ( p2mt != p2m_populate_on_demand ) result |= SHADOW_SET_ERROR; -#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC ) +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC ) if ( mfn_valid(sl3mfn) ) shadow_resync_all(v, 0); #endif @@ -2256,7 +2258,7 @@ static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se) else if ( p2mt != p2m_populate_on_demand ) result |= SHADOW_SET_ERROR; -#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC ) +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC ) if ( mfn_valid(sl2mfn) ) shadow_resync_all(v, 0); #endif @@ -2972,6 +2974,30 @@ static int sh_page_fault(struct vcpu *v, #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) if ( (regs->error_code & PFEC_reserved_bit) ) { +#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) + /* First, need to check that this isn't an out-of-sync + * shadow l1e. If it is, we fall back to the slow path, which + * will sync it up again. */ + { + shadow_l2e_t sl2e; + mfn_t gl1mfn; + if ( (__copy_from_user(&sl2e, + (sh_linear_l2_table(v) + + shadow_l2_linear_offset(va)), + sizeof(sl2e)) != 0) + || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) + || !mfn_valid(gl1mfn = _mfn(mfn_to_page( + shadow_l2e_get_mfn(sl2e))->v.sh.back)) + || unlikely(mfn_is_out_of_sync(gl1mfn)) ) + { + /* Hit the slow path as if there had been no + * shadow entry at all, and let it tidy up */ + ASSERT(regs->error_code & PFEC_page_present); + regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present); + goto page_fault_slow_path; + } + } +#endif /* SHOPT_OUT_OF_SYNC */ /* The only reasons for reserved bits to be set in shadow entries * are the two "magic" shadow_l1e entries. */ if ( likely((__copy_from_user(&sl1e, @@ -2980,30 +3006,6 @@ static int sh_page_fault(struct vcpu *v, sizeof(sl1e)) == 0) && sh_l1e_is_magic(sl1e)) ) { -#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) - /* First, need to check that this isn't an out-of-sync - * shadow l1e. If it is, we fall back to the slow path, which - * will sync it up again. */ - { - shadow_l2e_t sl2e; - mfn_t gl1mfn; - if ( (__copy_from_user(&sl2e, - (sh_linear_l2_table(v) - + shadow_l2_linear_offset(va)), - sizeof(sl2e)) != 0) - || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) - || !mfn_valid(gl1mfn = _mfn(mfn_to_page( - shadow_l2e_get_mfn(sl2e))->v.sh.back)) - || unlikely(mfn_is_out_of_sync(gl1mfn)) ) - { - /* Hit the slow path as if there had been no - * shadow entry at all, and let it tidy up */ - ASSERT(regs->error_code & PFEC_page_present); - regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present); - goto page_fault_slow_path; - } - } -#endif /* SHOPT_OUT_OF_SYNC */ if ( sh_l1e_is_gnp(sl1e) ) { diff --git a/xen/arch/x86/mm/shadow/private.h b/xen/arch/x86/mm/shadow/private.h index a831afd..3c77093 100644 --- a/xen/arch/x86/mm/shadow/private.h +++ b/xen/arch/x86/mm/shadow/private.h @@ -589,17 +589,6 @@ sh_mfn_is_dirty(struct domain *d, mfn_t gmfn) return rv; } -/**************************************************************************/ -/* VRAM dirty tracking support */ - -struct sh_dirty_vram { - unsigned long begin_pfn; - unsigned long end_pfn; - paddr_t *sl1ma; - uint8_t *dirty_bitmap; - s_time_t last_dirty; -}; - /**************************************************************************/ /* Shadow-page refcounting. */ diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c index 4bf6777..bb2f313 100644 --- a/xen/arch/x86/platform_hypercall.c +++ b/xen/arch/x86/platform_hypercall.c @@ -313,7 +313,6 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) { uint32_t cpu; uint64_t idletime, now = NOW(); - struct vcpu *v; struct xenctl_cpumap ctlmap; cpumask_t cpumap; XEN_GUEST_HANDLE(uint8) cpumap_bitmap; @@ -336,7 +335,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) for_each_cpu_mask ( cpu, cpumap ) { - if ( (v = idle_vcpu[cpu]) != NULL ) + if ( idle_vcpu[cpu] == NULL ) cpu_clear(cpu, cpumap); idletime = get_cpu_idle_time(cpu); diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 64b55eb..976fba8 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -878,6 +878,9 @@ static int __devinit do_boot_cpu(int apicid, int cpu) per_cpu(doublefault_tss, cpu) = alloc_xenheap_page(); memset(per_cpu(doublefault_tss, cpu), 0, PAGE_SIZE); } +#else + if (!per_cpu(compat_arg_xlat, cpu)) + setup_compat_arg_xlat(cpu, apicid_to_node[apicid]); #endif if (!idt_tables[cpu]) { diff --git a/xen/arch/x86/tboot.c b/xen/arch/x86/tboot.c index 54075c2..9709553 100644 --- a/xen/arch/x86/tboot.c +++ b/xen/arch/x86/tboot.c @@ -311,7 +311,6 @@ void tboot_shutdown(uint32_t shutdown_type) /* per-cpu data */ g_tboot_shared->mac_regions[2].start = (uint64_t)__pa(&__per_cpu_start); g_tboot_shared->mac_regions[2].size = - g_tboot_shared->mac_regions[2].start + (((uint64_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT); /* bss */ g_tboot_shared->mac_regions[3].start = (uint64_t)__pa(&__bss_start); diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c index 0ac685f..266f2f0 100644 --- a/xen/arch/x86/x86_64/compat/mm.c +++ b/xen/arch/x86/x86_64/compat/mm.c @@ -58,7 +58,7 @@ int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) case XENMEM_add_to_physmap: { struct compat_add_to_physmap cmp; - struct xen_add_to_physmap *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE; + struct xen_add_to_physmap *nat = COMPAT_ARG_XLAT_VIRT_BASE; if ( copy_from_guest(&cmp, arg, 1) ) return -EFAULT; @@ -72,7 +72,7 @@ int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) case XENMEM_set_memory_map: { struct compat_foreign_memory_map cmp; - struct xen_foreign_memory_map *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE; + struct xen_foreign_memory_map *nat = COMPAT_ARG_XLAT_VIRT_BASE; if ( copy_from_guest(&cmp, arg, 1) ) return -EFAULT; @@ -91,7 +91,7 @@ int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) case XENMEM_machine_memory_map: { struct compat_memory_map cmp; - struct xen_memory_map *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE; + struct xen_memory_map *nat = COMPAT_ARG_XLAT_VIRT_BASE; if ( copy_from_guest(&cmp, arg, 1) ) return -EFAULT; @@ -118,7 +118,7 @@ int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) case XENMEM_get_pod_target: { struct compat_pod_target cmp; - struct xen_pod_target *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE; + struct xen_pod_target *nat = COMPAT_ARG_XLAT_VIRT_BASE; if ( copy_from_guest(&cmp, arg, 1) ) return -EFAULT; @@ -212,7 +212,7 @@ int compat_mmuext_op(XEN_GUEST_HANDLE(mmuext_op_compat_t) cmp_uops, if ( unlikely(!guest_handle_okay(cmp_uops, count)) ) return -EFAULT; - set_xen_guest_handle(nat_ops, (void *)COMPAT_ARG_XLAT_VIRT_BASE); + set_xen_guest_handle(nat_ops, COMPAT_ARG_XLAT_VIRT_BASE); for ( ; count; count -= i ) { diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index 81440d1..9812e00 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -30,14 +31,13 @@ #include #include #include -#include #include #ifdef CONFIG_COMPAT unsigned int m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START; #endif -DEFINE_PER_CPU(char, compat_arg_xlat[COMPAT_ARG_XLAT_SIZE]); +DEFINE_PER_CPU(void *, compat_arg_xlat); /* Top-level master (and idle-domain) page directory. */ l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned"))) @@ -252,6 +252,25 @@ void __init zap_low_mappings(void) 0x10, __PAGE_HYPERVISOR); } +int __cpuinit setup_compat_arg_xlat(unsigned int cpu, int node) +{ + unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE); + unsigned long sz = PAGE_SIZE << order; + unsigned int memflags = node != NUMA_NO_NODE ? MEMF_node(node) : 0; + struct page_info *pg; + + pg = alloc_domheap_pages(NULL, order, memflags); + if ( !pg ) + return -ENOMEM; + + for ( ; (sz -= PAGE_SIZE) >= COMPAT_ARG_XLAT_SIZE; ++pg ) + free_domheap_page(pg); + + per_cpu(compat_arg_xlat, cpu) = page_to_virt(pg); + + return 0; +} + void __init subarch_init_memory(void) { unsigned long i, n, v, m2p_start_mfn; @@ -309,6 +328,10 @@ void __init subarch_init_memory(void) share_xen_page_with_privileged_guests(page, XENSHARE_readonly); } } + + if ( setup_compat_arg_xlat(smp_processor_id(), + apicid_to_node[boot_cpu_physical_apicid]) ) + panic("Could not setup argument translation area"); } long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) diff --git a/xen/common/compat/domain.c b/xen/common/compat/domain.c index cf2a13b..629a764 100644 --- a/xen/common/compat/domain.c +++ b/xen/common/compat/domain.c @@ -87,7 +87,7 @@ int compat_vcpu_op(int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg) if ( copy_from_guest(&cmp, arg, 1) ) return -EFAULT; - nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE; + nat = COMPAT_ARG_XLAT_VIRT_BASE; XLAT_vcpu_set_singleshot_timer(nat, &cmp); rc = do_vcpu_op(cmd, vcpuid, guest_handle_from_ptr(nat, void)); break; diff --git a/xen/common/compat/grant_table.c b/xen/common/compat/grant_table.c index f3a6d93..5f0dc2d 100644 --- a/xen/common/compat/grant_table.c +++ b/xen/common/compat/grant_table.c @@ -97,7 +97,7 @@ int compat_grant_table_op(unsigned int cmd, struct compat_gnttab_copy copy; } cmp; - set_xen_guest_handle(nat.uop, (void *)COMPAT_ARG_XLAT_VIRT_BASE); + set_xen_guest_handle(nat.uop, COMPAT_ARG_XLAT_VIRT_BASE); switch ( cmd ) { case GNTTABOP_setup_table: diff --git a/xen/common/compat/memory.c b/xen/common/compat/memory.c index 779cad9..99467db 100644 --- a/xen/common/compat/memory.c +++ b/xen/common/compat/memory.c @@ -25,7 +25,7 @@ int compat_memory_op(unsigned int cmd, XEN_GUEST_HANDLE(void) compat) struct compat_memory_exchange xchg; } cmp; - set_xen_guest_handle(nat.hnd, (void *)COMPAT_ARG_XLAT_VIRT_BASE); + set_xen_guest_handle(nat.hnd, COMPAT_ARG_XLAT_VIRT_BASE); split = 0; switch ( op ) { diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index 1c32772..5306354 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -119,7 +119,6 @@ __get_maptrack_handle( if ( unlikely((h = t->maptrack_head) == (t->maptrack_limit - 1)) ) return -1; t->maptrack_head = maptrack_entry(t, h).ref; - t->map_count++; return h; } @@ -129,7 +128,6 @@ put_maptrack_handle( { maptrack_entry(t, handle).ref = t->maptrack_head; t->maptrack_head = handle; - t->map_count--; } static inline int @@ -201,6 +199,7 @@ __gnttab_map_grant_ref( unsigned long frame = 0, nr_gets = 0; int rc = GNTST_okay; u32 old_pin; + u32 act_pin; unsigned int cache_flags; struct active_grant_entry *act; struct grant_mapping *mt; @@ -331,6 +330,7 @@ __gnttab_map_grant_ref( GNTPIN_hstr_inc : GNTPIN_hstw_inc; frame = act->frame; + act_pin = act->pin; cache_flags = (sha->flags & (GTF_PAT | GTF_PWT | GTF_PCD) ); @@ -393,7 +393,7 @@ __gnttab_map_grant_ref( if ( need_iommu(ld) && !(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) && - (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) + (act_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) { if ( iommu_map_page(ld, mfn_to_gmfn(ld, frame), frame) ) { diff --git a/xen/drivers/cpufreq/cpufreq_ondemand.c b/xen/drivers/cpufreq/cpufreq_ondemand.c index a4ff4f9..2e5a1c5 100644 --- a/xen/drivers/cpufreq/cpufreq_ondemand.c +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c @@ -97,9 +97,8 @@ int get_cpufreq_ondemand_para(uint32_t *sampling_rate_max, static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) { - unsigned int load = 0; - uint64_t cur_ns, idle_ns, total_ns; - + uint64_t cur_ns, total_ns; + uint64_t max_load_freq = 0; struct cpufreq_policy *policy; unsigned int j; @@ -121,30 +120,34 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) return; /* Get Idle Time */ - idle_ns = UINT_MAX; for_each_cpu_mask(j, policy->cpus) { - uint64_t total_idle_ns; - unsigned int tmp_idle_ns; + uint64_t idle_ns, total_idle_ns; + uint64_t load, load_freq, freq_avg; struct cpu_dbs_info_s *j_dbs_info; j_dbs_info = &per_cpu(cpu_dbs_info, j); total_idle_ns = get_cpu_idle_time(j); - tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle; + idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle; j_dbs_info->prev_cpu_idle = total_idle_ns; - if (tmp_idle_ns < idle_ns) - idle_ns = tmp_idle_ns; - } + if (unlikely(total_ns < idle_ns)) + continue; - if (likely(total_ns > idle_ns)) - load = (100 * (total_ns - idle_ns)) / total_ns; + load = 100 * (total_ns - idle_ns) / total_ns; + + freq_avg = cpufreq_driver_getavg(j, GOV_GETAVG); + + load_freq = load * freq_avg; + if (load_freq > max_load_freq) + max_load_freq = load_freq; + } /* Check for frequency increase */ - if (load > dbs_tuners_ins.up_threshold) { + if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { /* if we are already at full speed then break out early */ if (policy->cur == policy->max) return; - __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H); + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); return; } @@ -158,12 +161,10 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) * can support the current CPU usage without triggering the up * policy. To be safe, we focus 10 points under the threshold. */ - if (load < (dbs_tuners_ins.up_threshold - 10)) { - unsigned int freq_next, freq_cur; - - freq_cur = cpufreq_driver_getavg(policy->cpu, GOV_GETAVG); + if (max_load_freq < (dbs_tuners_ins.up_threshold - 10) * policy->cur) { + uint64_t freq_next; - freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10); + freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - 10); __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); } diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c index 18b4a6c..b73ad09 100644 --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -418,7 +418,7 @@ void hvm_dirq_assist(struct vcpu *v) irq < NR_IRQS; irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) ) { - if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) ) + if ( !test_and_clear_bit(irq, hvm_irq_dpci->dirq_mask) ) continue; spin_lock(&d->event_lock); diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index d85a467..723cff0 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -206,7 +206,7 @@ static void pci_clean_dpci_irqs(struct domain *d) i = find_next_bit(hvm_irq_dpci->mapping, NR_IRQS, i + 1) ) { pirq_guest_unbind(d, i); - kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(i)]); + kill_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, i)]); list_for_each_safe ( digl_list, tmp, &hvm_irq_dpci->mirq[i].digl_list ) diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c index eb3e69e..e918708 100644 --- a/xen/drivers/passthrough/vtd/intremap.c +++ b/xen/drivers/passthrough/vtd/intremap.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include "iommu.h" @@ -30,18 +31,47 @@ #include "vtd.h" #include "extern.h" -#ifndef dest_SMI +#ifdef __ia64__ #define dest_SMI -1 +#define nr_ioapics iosapic_get_nr_iosapics() +#define nr_ioapic_registers(i) iosapic_get_nr_pins(i) +#else +#define nr_ioapic_registers(i) nr_ioapic_registers[i] #endif -/* The max number of IOAPIC (or IOSAPIC) pin. The typical values can be 24 or - * 48 on x86 and Itanium platforms. Here we use a biger number 256. This - * should be big enough. Actually now IREMAP_ENTRY_NR is also 256. - */ -#define MAX_IOAPIC_PIN_NUM 256 +/* apic_pin_2_ir_idx[apicid][pin] = interrupt remapping table index */ +static unsigned int **apic_pin_2_ir_idx; + +static int init_apic_pin_2_ir_idx(void) +{ + unsigned int *_apic_pin_2_ir_idx; + unsigned int nr_pins, i; + + nr_pins = 0; + for ( i = 0; i < nr_ioapics; i++ ) + nr_pins += nr_ioapic_registers(i); -static int ioapic_pin_to_intremap_index[MAX_IOAPIC_PIN_NUM] = - { [0 ... MAX_IOAPIC_PIN_NUM-1] = -1 }; + _apic_pin_2_ir_idx = xmalloc_array(unsigned int, nr_pins); + apic_pin_2_ir_idx = xmalloc_array(unsigned int *, nr_ioapics); + if ( (_apic_pin_2_ir_idx == NULL) || (apic_pin_2_ir_idx == NULL) ) + { + xfree(_apic_pin_2_ir_idx); + xfree(apic_pin_2_ir_idx); + return -ENOMEM; + } + + for ( i = 0; i < nr_pins; i++ ) + _apic_pin_2_ir_idx[i] = -1; + + nr_pins = 0; + for ( i = 0; i < nr_ioapics; i++ ) + { + apic_pin_2_ir_idx[i] = &_apic_pin_2_ir_idx[nr_pins]; + nr_pins += nr_ioapic_registers(i); + } + + return 0; +} u16 apicid_to_bdf(int apic_id) { @@ -103,7 +133,7 @@ static int remap_entry_to_ioapic_rte( } static int ioapic_rte_to_remap_entry(struct iommu *iommu, - int apic_id, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte, + int apic, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte, unsigned int rte_upper, unsigned int value) { struct iremap_entry *iremap_entry = NULL, *iremap_entries; @@ -117,14 +147,13 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu, remap_rte = (struct IO_APIC_route_remap_entry *) old_rte; spin_lock_irqsave(&ir_ctrl->iremap_lock, flags); - if ( ioapic_pin_to_intremap_index[ioapic_pin] < 0 ) + index = apic_pin_2_ir_idx[apic][ioapic_pin]; + if ( index < 0 ) { ir_ctrl->iremap_index++; index = ir_ctrl->iremap_index; - ioapic_pin_to_intremap_index[ioapic_pin] = index; + apic_pin_2_ir_idx[apic][ioapic_pin] = index; } - else - index = ioapic_pin_to_intremap_index[ioapic_pin]; if ( index > IREMAP_ENTRY_NR - 1 ) { @@ -162,7 +191,7 @@ static int ioapic_rte_to_remap_entry(struct iommu *iommu, new_ire.lo.res_1 = 0; new_ire.lo.vector = new_rte.vector; new_ire.lo.res_2 = 0; - new_ire.hi.sid = apicid_to_bdf(apic_id); + new_ire.hi.sid = apicid_to_bdf(IO_APIC_ID(apic)); new_ire.hi.sq = 0; /* comparing all 16-bit of SID */ new_ire.hi.svt = 1; /* requestor ID verification SID/SQ */ @@ -300,8 +329,7 @@ void io_apic_write_remap_rte( *(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0); remap_rte->mask = saved_mask; - ASSERT(ioapic_pin < MAX_IOAPIC_PIN_NUM); - if ( ioapic_rte_to_remap_entry(iommu, IO_APIC_ID(apic), ioapic_pin, + if ( ioapic_rte_to_remap_entry(iommu, apic, ioapic_pin, &old_rte, rte_upper, value) ) { *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg; @@ -572,7 +600,7 @@ int enable_intremap(struct iommu *iommu) /* After set SIRTP, we should do globally invalidate the IEC */ iommu_flush_iec_global(iommu); - return 0; + return init_apic_pin_2_ir_idx(); } void disable_intremap(struct iommu *iommu) diff --git a/xen/include/asm-ia64/linux-xen/asm/iosapic.h b/xen/include/asm-ia64/linux-xen/asm/iosapic.h index 46dd30e..899c947 100644 --- a/xen/include/asm-ia64/linux-xen/asm/iosapic.h +++ b/xen/include/asm-ia64/linux-xen/asm/iosapic.h @@ -186,6 +186,9 @@ struct rte_entry { #define IOSAPIC_RTEINDEX(reg) (((reg) - 0x10) >> 1) extern unsigned long ia64_vector_mask[]; extern unsigned long ia64_xen_vector[]; + +int iosapic_get_nr_iosapics(void); +int iosapic_get_nr_pins(int index); #endif /* XEN */ #define IO_APIC_BASE(idx) ((unsigned int *)iosapic_lists[idx].addr) diff --git a/xen/include/asm-x86/hap.h b/xen/include/asm-x86/hap.h index 60a2ad4..3f3cf3f 100644 --- a/xen/include/asm-x86/hap.h +++ b/xen/include/asm-x86/hap.h @@ -91,6 +91,11 @@ int hap_enable(struct domain *d, u32 mode); void hap_final_teardown(struct domain *d); void hap_teardown(struct domain *d); void hap_vcpu_init(struct vcpu *v); +void hap_logdirty_init(struct domain *d); +int hap_track_dirty_vram(struct domain *d, + unsigned long begin_pfn, + unsigned long nr, + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap); extern struct paging_mode hap_paging_real_mode; extern struct paging_mode hap_paging_protected_mode; diff --git a/xen/include/asm-x86/hvm/vlapic.h b/xen/include/asm-x86/hvm/vlapic.h index 8c36ed5..9edecbf 100644 --- a/xen/include/asm-x86/hvm/vlapic.h +++ b/xen/include/asm-x86/hvm/vlapic.h @@ -28,10 +28,9 @@ #define MAX_VECTOR 256 -#define vcpu_vlapic(vcpu) (&(vcpu)->arch.hvm_vcpu.vlapic) -#define vlapic_vcpu(vpic) (container_of((vpic), struct vcpu, \ - arch.hvm_vcpu.vlapic)) -#define vlapic_domain(vpic) (vlapic_vcpu(vlapic)->domain) +#define vcpu_vlapic(x) (&(x)->arch.hvm_vcpu.vlapic) +#define vlapic_vcpu(x) (container_of((x), struct vcpu, arch.hvm_vcpu.vlapic)) +#define vlapic_domain(x) (vlapic_vcpu(x)->domain) #define VLAPIC_ID(vlapic) \ (GET_xAPIC_ID(vlapic_get_reg((vlapic), APIC_ID))) diff --git a/xen/include/asm-x86/hvm/vmx/vmx.h b/xen/include/asm-x86/hvm/vmx/vmx.h index 97cf763..631dd27 100644 --- a/xen/include/asm-x86/hvm/vmx/vmx.h +++ b/xen/include/asm-x86/hvm/vmx/vmx.h @@ -105,7 +105,7 @@ void vmx_update_debug_state(struct vcpu *v); #define EXIT_REASON_MONITOR_TRAP_FLAG 37 #define EXIT_REASON_MONITOR_INSTRUCTION 39 #define EXIT_REASON_PAUSE_INSTRUCTION 40 -#define EXIT_REASON_MACHINE_CHECK 41 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 #define EXIT_REASON_EPT_VIOLATION 48 diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 6772b40..82d29f6 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -285,8 +285,9 @@ void put_page_type(struct page_info *page); int get_page_type(struct page_info *page, unsigned long type); int put_page_type_preemptible(struct page_info *page); int get_page_type_preemptible(struct page_info *page, unsigned long type); -int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d); -void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d); +int get_page_from_l1e( + l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner); +void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner); static inline void put_page_and_type(struct page_info *page) { diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h index 11e96ee..f26c5e8 100644 --- a/xen/include/asm-x86/paging.h +++ b/xen/include/asm-x86/paging.h @@ -139,6 +139,12 @@ int paging_alloc_log_dirty_bitmap(struct domain *d); /* free log dirty bitmap resource */ void paging_free_log_dirty_bitmap(struct domain *d); +/* get the dirty bitmap for a specific range of pfns */ +int paging_log_dirty_range(struct domain *d, + unsigned long begin_pfn, + unsigned long nr, + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap); + /* enable log dirty */ int paging_log_dirty_enable(struct domain *d); @@ -176,6 +182,15 @@ void paging_mark_dirty(struct domain *d, unsigned long guest_mfn); #define L4_LOGDIRTY_IDX(pfn) 0 #endif +/* VRAM dirty tracking support */ +struct sh_dirty_vram { + unsigned long begin_pfn; + unsigned long end_pfn; + paddr_t *sl1ma; + uint8_t *dirty_bitmap; + s_time_t last_dirty; +}; + /***************************************************************************** * Entry points into the paging-assistance code */ diff --git a/xen/include/asm-x86/percpu.h b/xen/include/asm-x86/percpu.h index aca6d2b..d762065 100644 --- a/xen/include/asm-x86/percpu.h +++ b/xen/include/asm-x86/percpu.h @@ -1,7 +1,7 @@ #ifndef __X86_PERCPU_H__ #define __X86_PERCPU_H__ -#define PERCPU_SHIFT 13 +#define PERCPU_SHIFT 12 #define PERCPU_SIZE (1UL << PERCPU_SHIFT) /* Separate out the type, so (int[3], foo) works. */ diff --git a/xen/include/asm-x86/x86_64/uaccess.h b/xen/include/asm-x86/x86_64/uaccess.h index 2080be8..2cdc373 100644 --- a/xen/include/asm-x86/x86_64/uaccess.h +++ b/xen/include/asm-x86/x86_64/uaccess.h @@ -2,8 +2,9 @@ #define __X86_64_UACCESS_H #define COMPAT_ARG_XLAT_VIRT_BASE this_cpu(compat_arg_xlat) -#define COMPAT_ARG_XLAT_SIZE PAGE_SIZE -DECLARE_PER_CPU(char, compat_arg_xlat[COMPAT_ARG_XLAT_SIZE]); +#define COMPAT_ARG_XLAT_SIZE (2*PAGE_SIZE) +DECLARE_PER_CPU(void *, compat_arg_xlat); +int setup_compat_arg_xlat(unsigned int cpu, int node); #define is_compat_arg_xlat_range(addr, size) ({ \ unsigned long __off; \ __off = (unsigned long)(addr) - (unsigned long)COMPAT_ARG_XLAT_VIRT_BASE; \ diff --git a/xen/include/public/io/blkif.h b/xen/include/public/io/blkif.h index 2380066..3e0c5ab 100644 --- a/xen/include/public/io/blkif.h +++ b/xen/include/public/io/blkif.h @@ -84,6 +84,13 @@ */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 +/* + * NB. first_sect and last_sect in blkif_request_segment, as well as + * sector_number in blkif_request, are always expressed in 512-byte units. + * However they must be properly aligned to the real sector size of the + * physical disk, which is reported in the "sector-size" node in the backend + * xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units. + */ struct blkif_request_segment { grant_ref_t gref; /* reference to I/O buffer frame */ /* @first_sect: first sector in frame to transfer (inclusive). */ diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index fb03d0c..524118b 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -272,9 +272,9 @@ struct mmuext_op { unsigned int nr_ents; /* TLB_FLUSH_MULTI, INVLPG_MULTI */ #if __XEN_INTERFACE_VERSION__ >= 0x00030205 - XEN_GUEST_HANDLE(void) vcpumask; + XEN_GUEST_HANDLE(const_void) vcpumask; #else - void *vcpumask; + const void *vcpumask; #endif /* COPY_PAGE */ xen_pfn_t src_mfn; diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h index d0e8040..096af9b 100644 --- a/xen/include/xen/grant_table.h +++ b/xen/include/xen/grant_table.h @@ -91,7 +91,6 @@ struct grant_table { struct grant_mapping **maptrack; unsigned int maptrack_head; unsigned int maptrack_limit; - unsigned int map_count; /* Lock protecting updates to active and shared grant tables. */ spinlock_t lock; }; -- 2.39.5