ia64/xen-unstable
changeset 19201:c7cba853583d
merge with xen-unstable.hg
author | Isaku Yamahata <yamahata@valinux.co.jp> |
---|---|
date | Fri Feb 13 11:22:28 2009 +0900 (2009-02-13) |
parents | af992824b5cf 32b154137492 |
children | af0da711bbdb |
files | xen/arch/ia64/linux-xen/mca.c |
line diff
1.1 --- a/.hgignore Fri Feb 13 10:56:01 2009 +0900 1.2 +++ b/.hgignore Fri Feb 13 11:22:28 2009 +0900 1.3 @@ -256,6 +256,7 @@ 1.4 ^xen/arch/x86/asm-offsets\.s$ 1.5 ^xen/arch/x86/boot/mkelf32$ 1.6 ^xen/arch/x86/xen\.lds$ 1.7 +^xen/arch/x86/boot/reloc.S$ 1.8 ^xen/ddb/.*$ 1.9 ^xen/include/asm$ 1.10 ^xen/include/asm-.*/asm-offsets\.h$ 1.11 @@ -279,15 +280,6 @@ 1.12 ^xen/arch/ia64/asm-xsi-offsets\.s$ 1.13 ^xen/arch/ia64/map\.out$ 1.14 ^xen/arch/ia64/xen\.lds\.s$ 1.15 -^xen/arch/powerpc/dom0\.bin$ 1.16 -^xen/arch/powerpc/asm-offsets\.s$ 1.17 -^xen/arch/powerpc/firmware$ 1.18 -^xen/arch/powerpc/firmware.dbg$ 1.19 -^xen/arch/powerpc/firmware_image.bin$ 1.20 -^xen/arch/powerpc/xen\.lds$ 1.21 -^xen/arch/powerpc/\.xen-syms$ 1.22 -^xen/arch/powerpc/xen-syms\.S$ 1.23 -^xen/arch/powerpc/cmdline.dep$ 1.24 ^unmodified_drivers/linux-2.6/\.tmp_versions 1.25 ^unmodified_drivers/linux-2.6/.*\.cmd$ 1.26 ^unmodified_drivers/linux-2.6/.*\.ko$
2.1 --- a/Config.mk Fri Feb 13 10:56:01 2009 +0900 2.2 +++ b/Config.mk Fri Feb 13 11:22:28 2009 +0900 2.3 @@ -1,7 +1,7 @@ 2.4 # -*- mode: Makefile; -*- 2.5 2.6 -# A debug build of Xen and tools? 2.7 -debug ?= y ## TEMPORARILY ENABLED 2.8 +# A debug build of Xen and tools? TEMPORARILY ENABLED 2.9 +debug ?= y 2.10 2.11 XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \ 2.12 -e s/i86pc/x86_32/ -e s/amd64/x86_64/)
3.1 --- a/docs/man/xm.pod.1 Fri Feb 13 10:56:01 2009 +0900 3.2 +++ b/docs/man/xm.pod.1 Fri Feb 13 11:22:28 2009 +0900 3.3 @@ -67,6 +67,8 @@ The attached console will perform much l 3.4 so running curses based interfaces over the console B<is not 3.5 advised>. Vi tends to get very odd when using it over this interface. 3.6 3.7 +Use the key combination Ctrl+] to detach the domain console. 3.8 + 3.9 =item B<create> I<configfile> [I<OPTIONS>] [I<vars>].. 3.10 3.11 The create subcommand requires a config file and can optionally take a
4.1 --- a/extras/mini-os/arch/x86/mm.c Fri Feb 13 10:56:01 2009 +0900 4.2 +++ b/extras/mini-os/arch/x86/mm.c Fri Feb 13 11:22:28 2009 +0900 4.3 @@ -550,9 +550,15 @@ static void clear_bootstrap(void) 4.4 4.5 void arch_init_p2m(unsigned long max_pfn) 4.6 { 4.7 +#ifdef __x86_64__ 4.8 #define L1_P2M_SHIFT 9 4.9 #define L2_P2M_SHIFT 18 4.10 #define L3_P2M_SHIFT 27 4.11 +#else 4.12 +#define L1_P2M_SHIFT 10 4.13 +#define L2_P2M_SHIFT 20 4.14 +#define L3_P2M_SHIFT 30 4.15 +#endif 4.16 #define L1_P2M_ENTRIES (1 << L1_P2M_SHIFT) 4.17 #define L2_P2M_ENTRIES (1 << (L2_P2M_SHIFT - L1_P2M_SHIFT)) 4.18 #define L3_P2M_ENTRIES (1 << (L3_P2M_SHIFT - L2_P2M_SHIFT))
5.1 --- a/tools/blktap/drivers/Makefile Fri Feb 13 10:56:01 2009 +0900 5.2 +++ b/tools/blktap/drivers/Makefile Fri Feb 13 11:22:28 2009 +0900 5.3 @@ -13,7 +13,7 @@ CFLAGS += $(CFLAGS_libxenstore) 5.4 CFLAGS += -I $(LIBAIO_DIR) 5.5 CFLAGS += -D_GNU_SOURCE 5.6 5.7 -ifeq ($(shell . ./check_gcrypt),"yes") 5.8 +ifeq ($(shell . ./check_gcrypt $(CC)),yes) 5.9 CFLAGS += -DUSE_GCRYPT 5.10 CRYPT_LIB := -lgcrypt 5.11 else
6.1 --- a/tools/firmware/rombios/rombios.c Fri Feb 13 10:56:01 2009 +0900 6.2 +++ b/tools/firmware/rombios/rombios.c Fri Feb 13 11:22:28 2009 +0900 6.3 @@ -4609,6 +4609,10 @@ int15_function32(regs, ES, DS, FLAGS) 6.4 { 6.5 Bit32u extended_memory_size=0; // 64bits long 6.6 Bit16u CX,DX; 6.7 +#ifdef HVMASSIST 6.8 + Bit16u off, e820_table_size; 6.9 + Bit32u base, type, size; 6.10 +#endif 6.11 6.12 BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax); 6.13 6.14 @@ -4625,8 +4629,10 @@ ASM_START 6.15 6.16 ;; Get the count in eax 6.17 mov bx, sp 6.18 +SEG SS 6.19 mov ax, _int15_function32.CX [bx] 6.20 shl eax, #16 6.21 +SEG SS 6.22 mov ax, _int15_function32.DX [bx] 6.23 6.24 ;; convert to numbers of 15usec ticks 6.25 @@ -4660,8 +4666,7 @@ ASM_END 6.26 { 6.27 #ifdef HVMASSIST 6.28 case 0x20: { 6.29 - Bit16u e820_table_size = 6.30 - read_word(E820_SEG, E820_NR_OFFSET) * 0x14; 6.31 + e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14; 6.32 6.33 if (regs.u.r32.edx != 0x534D4150) /* SMAP */ 6.34 goto int15_unimplemented; 6.35 @@ -4674,8 +4679,6 @@ ASM_END 6.36 if ((regs.u.r32.ebx + 0x14 - 1) > e820_table_size) 6.37 regs.u.r32.ebx = 0; 6.38 } else if (regs.u.r16.bx == 1) { 6.39 - Bit32u base, type; 6.40 - Bit16u off; 6.41 for (off = 0; off < e820_table_size; off += 0x14) { 6.42 base = read_dword(E820_SEG, E820_OFFSET + off); 6.43 type = read_dword(E820_SEG, E820_OFFSET + 0x10 + off); 6.44 @@ -4699,9 +4702,7 @@ ASM_END 6.45 } 6.46 6.47 case 0x01: { 6.48 - Bit16u off, e820_table_size = 6.49 - read_word(E820_SEG, E820_NR_OFFSET) * 0x14; 6.50 - Bit32u base, type, size; 6.51 + e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14; 6.52 6.53 // do we have any reason to fail here ? 6.54 CLEAR_CF();
7.1 --- a/tools/libxc/xc_domain.c Fri Feb 13 10:56:01 2009 +0900 7.2 +++ b/tools/libxc/xc_domain.c Fri Feb 13 11:22:28 2009 +0900 7.3 @@ -271,6 +271,38 @@ int xc_domain_hvm_getcontext(int xc_hand 7.4 return (ret < 0 ? -1 : domctl.u.hvmcontext.size); 7.5 } 7.6 7.7 +/* Get just one element of the HVM guest context. 7.8 + * size must be >= HVM_SAVE_LENGTH(type) */ 7.9 +int xc_domain_hvm_getcontext_partial(int xc_handle, 7.10 + uint32_t domid, 7.11 + uint16_t typecode, 7.12 + uint16_t instance, 7.13 + void *ctxt_buf, 7.14 + uint32_t size) 7.15 +{ 7.16 + int ret; 7.17 + DECLARE_DOMCTL; 7.18 + 7.19 + if ( !ctxt_buf ) 7.20 + return -EINVAL; 7.21 + 7.22 + domctl.cmd = XEN_DOMCTL_gethvmcontext_partial; 7.23 + domctl.domain = (domid_t) domid; 7.24 + domctl.u.hvmcontext_partial.type = typecode; 7.25 + domctl.u.hvmcontext_partial.instance = instance; 7.26 + set_xen_guest_handle(domctl.u.hvmcontext_partial.buffer, ctxt_buf); 7.27 + 7.28 + if ( (ret = lock_pages(ctxt_buf, size)) != 0 ) 7.29 + return ret; 7.30 + 7.31 + ret = do_domctl(xc_handle, &domctl); 7.32 + 7.33 + if ( ctxt_buf ) 7.34 + unlock_pages(ctxt_buf, size); 7.35 + 7.36 + return ret ? -1 : 0; 7.37 +} 7.38 + 7.39 /* set info to hvm guest for restore */ 7.40 int xc_domain_hvm_setcontext(int xc_handle, 7.41 uint32_t domid, 7.42 @@ -909,6 +941,32 @@ int xc_domain_update_msi_irq( 7.43 return rc; 7.44 } 7.45 7.46 +int xc_domain_unbind_msi_irq( 7.47 + int xc_handle, 7.48 + uint32_t domid, 7.49 + uint32_t gvec, 7.50 + uint32_t pirq, 7.51 + uint32_t gflags) 7.52 +{ 7.53 + int rc; 7.54 + xen_domctl_bind_pt_irq_t *bind; 7.55 + 7.56 + DECLARE_DOMCTL; 7.57 + 7.58 + domctl.cmd = XEN_DOMCTL_unbind_pt_irq; 7.59 + domctl.domain = (domid_t)domid; 7.60 + 7.61 + bind = &(domctl.u.bind_pt_irq); 7.62 + bind->hvm_domid = domid; 7.63 + bind->irq_type = PT_IRQ_TYPE_MSI; 7.64 + bind->machine_irq = pirq; 7.65 + bind->u.msi.gvec = gvec; 7.66 + bind->u.msi.gflags = gflags; 7.67 + 7.68 + rc = do_domctl(xc_handle, &domctl); 7.69 + return rc; 7.70 +} 7.71 + 7.72 /* Pass-through: binds machine irq to guests irq */ 7.73 int xc_domain_bind_pt_irq( 7.74 int xc_handle,
8.1 --- a/tools/libxc/xc_pagetab.c Fri Feb 13 10:56:01 2009 +0900 8.2 +++ b/tools/libxc/xc_pagetab.c Fri Feb 13 11:22:28 2009 +0900 8.3 @@ -4,50 +4,42 @@ 8.4 * Function to translate virtual to physical addresses. 8.5 */ 8.6 #include "xc_private.h" 8.7 +#include <xen/hvm/save.h> 8.8 8.9 #define CR0_PG 0x80000000 8.10 #define CR4_PAE 0x20 8.11 #define PTE_PSE 0x80 8.12 +#define EFER_LMA 0x400 8.13 + 8.14 8.15 unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom, 8.16 int vcpu, unsigned long long virt) 8.17 { 8.18 xc_dominfo_t dominfo; 8.19 - vcpu_guest_context_any_t ctx; 8.20 uint64_t paddr, mask, pte = 0; 8.21 int size, level, pt_levels = 2; 8.22 void *map; 8.23 8.24 if (xc_domain_getinfo(xc_handle, dom, 1, &dominfo) != 1 8.25 - || dominfo.domid != dom 8.26 - || xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0) 8.27 + || dominfo.domid != dom) 8.28 return 0; 8.29 8.30 /* What kind of paging are we dealing with? */ 8.31 if (dominfo.hvm) { 8.32 - unsigned long cr0, cr3, cr4; 8.33 - xen_capabilities_info_t xen_caps = ""; 8.34 - if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) 8.35 + struct hvm_hw_cpu ctx; 8.36 + if (xc_domain_hvm_getcontext_partial(xc_handle, dom, 8.37 + HVM_SAVE_CODE(CPU), vcpu, 8.38 + &ctx, sizeof ctx) != 0) 8.39 return 0; 8.40 - /* HVM context records are always host-sized */ 8.41 - if (strstr(xen_caps, "xen-3.0-x86_64")) { 8.42 - cr0 = ctx.x64.ctrlreg[0]; 8.43 - cr3 = ctx.x64.ctrlreg[3]; 8.44 - cr4 = ctx.x64.ctrlreg[4]; 8.45 - } else { 8.46 - cr0 = ctx.x32.ctrlreg[0]; 8.47 - cr3 = ctx.x32.ctrlreg[3]; 8.48 - cr4 = ctx.x32.ctrlreg[4]; 8.49 - } 8.50 - if (!(cr0 & CR0_PG)) 8.51 + if (!(ctx.cr0 & CR0_PG)) 8.52 return virt; 8.53 - if (0 /* XXX how to get EFER.LMA? */) 8.54 - pt_levels = 4; 8.55 - else 8.56 - pt_levels = (cr4 & CR4_PAE) ? 3 : 2; 8.57 - paddr = cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull); 8.58 + pt_levels = (ctx.msr_efer&EFER_LMA) ? 4 : (ctx.cr4&CR4_PAE) ? 3 : 2; 8.59 + paddr = ctx.cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull); 8.60 } else { 8.61 DECLARE_DOMCTL; 8.62 + vcpu_guest_context_any_t ctx; 8.63 + if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0) 8.64 + return 0; 8.65 domctl.domain = dom; 8.66 domctl.cmd = XEN_DOMCTL_get_address_size; 8.67 if ( do_domctl(xc_handle, &domctl) != 0 )
9.1 --- a/tools/libxc/xenctrl.h Fri Feb 13 10:56:01 2009 +0900 9.2 +++ b/tools/libxc/xenctrl.h Fri Feb 13 11:22:28 2009 +0900 9.3 @@ -375,6 +375,25 @@ int xc_domain_hvm_getcontext(int xc_hand 9.4 uint8_t *ctxt_buf, 9.5 uint32_t size); 9.6 9.7 + 9.8 +/** 9.9 + * This function returns one element of the context of a hvm domain 9.10 + * @parm xc_handle a handle to an open hypervisor interface 9.11 + * @parm domid the domain to get information from 9.12 + * @parm typecode which type of elemnt required 9.13 + * @parm instance which instance of the type 9.14 + * @parm ctxt_buf a pointer to a structure to store the execution context of 9.15 + * the hvm domain 9.16 + * @parm size the size of ctxt_buf (must be >= HVM_SAVE_LENGTH(typecode)) 9.17 + * @return 0 on success, -1 on failure 9.18 + */ 9.19 +int xc_domain_hvm_getcontext_partial(int xc_handle, 9.20 + uint32_t domid, 9.21 + uint16_t typecode, 9.22 + uint16_t instance, 9.23 + void *ctxt_buf, 9.24 + uint32_t size); 9.25 + 9.26 /** 9.27 * This function will set the context for hvm domain 9.28 * 9.29 @@ -1075,6 +1094,12 @@ int xc_domain_update_msi_irq( 9.30 uint32_t pirq, 9.31 uint32_t gflags); 9.32 9.33 +int xc_domain_unbind_msi_irq(int xc_handle, 9.34 + uint32_t domid, 9.35 + uint32_t gvec, 9.36 + uint32_t pirq, 9.37 + uint32_t gflags); 9.38 + 9.39 int xc_domain_bind_pt_irq(int xc_handle, 9.40 uint32_t domid, 9.41 uint8_t machine_irq,
10.1 --- a/tools/python/xen/xend/XendAPIStore.py Fri Feb 13 10:56:01 2009 +0900 10.2 +++ b/tools/python/xen/xend/XendAPIStore.py Fri Feb 13 11:22:28 2009 +0900 10.3 @@ -33,7 +33,8 @@ def register(uuid, type, inst): 10.4 10.5 def deregister(uuid, type): 10.6 old = get(uuid, type) 10.7 - del __classes[(uuid, type)] 10.8 + if old is not None: 10.9 + del __classes[(uuid, type)] 10.10 return old 10.11 10.12 def get(uuid, type):
11.1 --- a/tools/python/xen/xend/image.py Fri Feb 13 10:56:01 2009 +0900 11.2 +++ b/tools/python/xen/xend/image.py Fri Feb 13 11:22:28 2009 +0900 11.3 @@ -372,8 +372,6 @@ class ImageHandler: 11.4 env['DISPLAY'] = self.display 11.5 if self.xauthority: 11.6 env['XAUTHORITY'] = self.xauthority 11.7 - if self.vncconsole: 11.8 - args = args + ([ "-vncviewer" ]) 11.9 unique_id = "%i-%i" % (self.vm.getDomid(), time.time()) 11.10 sentinel_path = sentinel_path_prefix + unique_id 11.11 sentinel_path_fifo = sentinel_path + '.fifo' 11.12 @@ -558,24 +556,30 @@ class ImageHandler: 11.13 os.kill(self.pid, signal.SIGHUP) 11.14 except OSError, exn: 11.15 log.exception(exn) 11.16 - try: 11.17 - # Try to reap the child every 100ms for 10s. Then SIGKILL it. 11.18 - for i in xrange(100): 11.19 + # Try to reap the child every 100ms for 10s. Then SIGKILL it. 11.20 + for i in xrange(100): 11.21 + try: 11.22 (p, rv) = os.waitpid(self.pid, os.WNOHANG) 11.23 if p == self.pid: 11.24 break 11.25 - time.sleep(0.1) 11.26 - else: 11.27 - log.warning("DeviceModel %d took more than 10s " 11.28 - "to terminate: sending SIGKILL" % self.pid) 11.29 + except OSError: 11.30 + # This is expected if Xend has been restarted within 11.31 + # the life of this domain. In this case, we can kill 11.32 + # the process, but we can't wait for it because it's 11.33 + # not our child. We continue this loop, and after it is 11.34 + # terminated make really sure the process is going away 11.35 + # (SIGKILL). 11.36 + pass 11.37 + time.sleep(0.1) 11.38 + else: 11.39 + log.warning("DeviceModel %d took more than 10s " 11.40 + "to terminate: sending SIGKILL" % self.pid) 11.41 + try: 11.42 os.kill(self.pid, signal.SIGKILL) 11.43 os.waitpid(self.pid, 0) 11.44 - except OSError, exn: 11.45 - # This is expected if Xend has been restarted within the 11.46 - # life of this domain. In this case, we can kill the process, 11.47 - # but we can't wait for it because it's not our child. 11.48 - # We just make really sure it's going away (SIGKILL) first. 11.49 - os.kill(self.pid, signal.SIGKILL) 11.50 + except OSError: 11.51 + # This happens if the process doesn't exist. 11.52 + pass 11.53 state = xstransact.Remove("/local/domain/0/device-model/%i" 11.54 % self.vm.getDomid()) 11.55 finally:
12.1 --- a/tools/python/xen/xend/server/pciquirk.py Fri Feb 13 10:56:01 2009 +0900 12.2 +++ b/tools/python/xen/xend/server/pciquirk.py Fri Feb 13 11:22:28 2009 +0900 12.3 @@ -123,7 +123,8 @@ class PCIQuirk: 12.4 log.info("Config file does not exist: %s" % PERMISSIVE_CONFIG_FILE) 12.5 self.pci_perm_dev_config = ['xend-pci-perm-devs'] 12.6 12.7 - devices = child_at(child(pci_perm_dev_config, 'unconstrained_dev_ids'),0) 12.8 + devices = child_at(child(self.pci_perm_dev_config, 12.9 + 'unconstrained_dev_ids'),0) 12.10 if self.__matchPCIdev( devices ): 12.11 log.debug("Permissive mode enabled for PCI device [%s]" % 12.12 self.devid)
13.1 --- a/tools/python/xen/xm/create.py Fri Feb 13 10:56:01 2009 +0900 13.2 +++ b/tools/python/xen/xm/create.py Fri Feb 13 11:22:28 2009 +0900 13.3 @@ -1337,7 +1337,7 @@ def main(argv): 13.4 elif not opts.is_xml: 13.5 dom = make_domain(opts, config) 13.6 13.7 - if opts.vals.vncviewer: 13.8 + if opts.vals.vncconsole: 13.9 domid = domain_name_to_domid(sxp.child_value(config, 'name', -1)) 13.10 vncviewer_autopass = getattr(opts.vals,'vncviewer-autopass', False) 13.11 console.runVncViewer(domid, vncviewer_autopass, True)
14.1 --- a/tools/python/xen/xm/main.py Fri Feb 13 10:56:01 2009 +0900 14.2 +++ b/tools/python/xen/xm/main.py Fri Feb 13 11:22:28 2009 +0900 14.3 @@ -59,7 +59,11 @@ from xen.util.acmpolicy import ACM_LABEL 14.4 import XenAPI 14.5 14.6 import xen.lowlevel.xc 14.7 -xc = xen.lowlevel.xc.xc() 14.8 +try: 14.9 + xc = xen.lowlevel.xc.xc() 14.10 +except Exception, ex: 14.11 + print >>sys.stderr, ("Is xen kernel running?") 14.12 + sys.exit(1) 14.13 14.14 import inspect 14.15 from xen.xend import XendOptions 14.16 @@ -735,7 +739,7 @@ def xm_save(args): 14.17 (options, params) = getopt.gnu_getopt(args, 'c', ['checkpoint']) 14.18 except getopt.GetoptError, opterr: 14.19 err(opterr) 14.20 - sys.exit(1) 14.21 + usage('save') 14.22 14.23 checkpoint = False 14.24 for (k, v) in options:
15.1 --- a/tools/xentrace/xenctx.c Fri Feb 13 10:56:01 2009 +0900 15.2 +++ b/tools/xentrace/xenctx.c Fri Feb 13 11:22:28 2009 +0900 15.3 @@ -26,6 +26,7 @@ 15.4 #include "xenctrl.h" 15.5 #include <xen/foreign/x86_32.h> 15.6 #include <xen/foreign/x86_64.h> 15.7 +#include <xen/hvm/save.h> 15.8 15.9 int xc_handle = 0; 15.10 int domid = 0; 15.11 @@ -287,6 +288,35 @@ static void print_ctx_32(vcpu_guest_cont 15.12 } 15.13 } 15.14 15.15 +static void print_ctx_32on64(vcpu_guest_context_x86_64_t *ctx) 15.16 +{ 15.17 + struct cpu_user_regs_x86_64 *regs = &ctx->user_regs; 15.18 + 15.19 + printf("cs:eip: %04x:%08x ", regs->cs, (uint32_t)regs->eip); 15.20 + print_symbol((uint32_t)regs->eip); 15.21 + print_flags((uint32_t)regs->eflags); 15.22 + printf("ss:esp: %04x:%08x\n", regs->ss, (uint32_t)regs->esp); 15.23 + 15.24 + printf("eax: %08x\t", (uint32_t)regs->eax); 15.25 + printf("ebx: %08x\t", (uint32_t)regs->ebx); 15.26 + printf("ecx: %08x\t", (uint32_t)regs->ecx); 15.27 + printf("edx: %08x\n", (uint32_t)regs->edx); 15.28 + 15.29 + printf("esi: %08x\t", (uint32_t)regs->esi); 15.30 + printf("edi: %08x\t", (uint32_t)regs->edi); 15.31 + printf("ebp: %08x\n", (uint32_t)regs->ebp); 15.32 + 15.33 + printf(" ds: %04x\t", regs->ds); 15.34 + printf(" es: %04x\t", regs->es); 15.35 + printf(" fs: %04x\t", regs->fs); 15.36 + printf(" gs: %04x\n", regs->gs); 15.37 + 15.38 + if (disp_all) { 15.39 + print_special(ctx->ctrlreg, "cr", 0x1d, 4); 15.40 + print_special(ctx->debugreg, "dr", 0xcf, 4); 15.41 + } 15.42 +} 15.43 + 15.44 static void print_ctx_64(vcpu_guest_context_x86_64_t *ctx) 15.45 { 15.46 struct cpu_user_regs_x86_64 *regs = &ctx->user_regs; 15.47 @@ -335,6 +365,8 @@ static void print_ctx(vcpu_guest_context 15.48 { 15.49 if (ctxt_word_size == 4) 15.50 print_ctx_32(&ctx->x32); 15.51 + else if (guest_word_size == 4) 15.52 + print_ctx_32on64(&ctx->x64); 15.53 else 15.54 print_ctx_64(&ctx->x64); 15.55 } 15.56 @@ -788,23 +820,29 @@ static void dump_ctx(int vcpu) 15.57 15.58 #if defined(__i386__) || defined(__x86_64__) 15.59 { 15.60 - struct xen_domctl domctl; 15.61 - memset(&domctl, 0, sizeof domctl); 15.62 - domctl.domain = domid; 15.63 - domctl.cmd = XEN_DOMCTL_get_address_size; 15.64 - if (xc_domctl(xc_handle, &domctl) == 0) 15.65 - ctxt_word_size = guest_word_size = domctl.u.address_size.size / 8; 15.66 if (dominfo.hvm) { 15.67 + struct hvm_hw_cpu cpuctx; 15.68 xen_capabilities_info_t xen_caps = ""; 15.69 + if (xc_domain_hvm_getcontext_partial( 15.70 + xc_handle, domid, HVM_SAVE_CODE(CPU), 15.71 + vcpu, &cpuctx, sizeof cpuctx) != 0) { 15.72 + perror("xc_domain_hvm_getcontext_partial"); 15.73 + exit(-1); 15.74 + } 15.75 + guest_word_size = (cpuctx.msr_efer & 0x400) ? 8 : 4; 15.76 + /* HVM guest context records are always host-sized */ 15.77 if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) { 15.78 perror("xc_version"); 15.79 exit(-1); 15.80 } 15.81 - /* HVM guest context records are always host-sized */ 15.82 ctxt_word_size = (strstr(xen_caps, "xen-3.0-x86_64")) ? 8 : 4; 15.83 - /* XXX For now we can't tell whether a HVM guest is in long 15.84 - * XXX mode; eventually fix this here and in xc_pagetab.c */ 15.85 - guest_word_size = 4; 15.86 + } else { 15.87 + struct xen_domctl domctl; 15.88 + memset(&domctl, 0, sizeof domctl); 15.89 + domctl.domain = domid; 15.90 + domctl.cmd = XEN_DOMCTL_get_address_size; 15.91 + if (xc_domctl(xc_handle, &domctl) == 0) 15.92 + ctxt_word_size = guest_word_size = domctl.u.address_size.size / 8; 15.93 } 15.94 } 15.95 #endif
16.1 --- a/xen/arch/ia64/linux-xen/iosapic.c Fri Feb 13 10:56:01 2009 +0900 16.2 +++ b/xen/arch/ia64/linux-xen/iosapic.c Fri Feb 13 11:22:28 2009 +0900 16.3 @@ -93,6 +93,16 @@ 16.4 #include <asm/ptrace.h> 16.5 #include <asm/system.h> 16.6 16.7 +#ifdef XEN 16.8 +static inline int iosapic_irq_to_vector (int irq) 16.9 +{ 16.10 + return irq; 16.11 +} 16.12 + 16.13 +#undef irq_to_vector 16.14 +#define irq_to_vector(irq) iosapic_irq_to_vector(irq) 16.15 +#define AUTO_ASSIGN AUTO_ASSIGN_IRQ 16.16 +#endif 16.17 16.18 #undef DEBUG_INTERRUPT_ROUTING 16.19
17.1 --- a/xen/arch/ia64/linux-xen/irq_ia64.c Fri Feb 13 10:56:01 2009 +0900 17.2 +++ b/xen/arch/ia64/linux-xen/irq_ia64.c Fri Feb 13 11:22:28 2009 +0900 17.3 @@ -250,6 +250,7 @@ void 17.4 register_percpu_irq (ia64_vector vec, struct irqaction *action) 17.5 { 17.6 irq_desc_t *desc; 17.7 +#ifndef XEN 17.8 unsigned int irq; 17.9 17.10 for (irq = 0; irq < NR_IRQS; ++irq) 17.11 @@ -258,16 +259,19 @@ register_percpu_irq (ia64_vector vec, st 17.12 desc->status |= IRQ_PER_CPU; 17.13 desc->handler = &irq_type_ia64_lsapic; 17.14 if (action) 17.15 -#ifdef XEN 17.16 - setup_vector(irq, action); 17.17 + setup_irq(irq, action); 17.18 + } 17.19 #else 17.20 - setup_irq(irq, action); 17.21 + desc = irq_descp(vec); 17.22 + desc->status |= IRQ_PER_CPU; 17.23 + desc->handler = &irq_type_ia64_lsapic; 17.24 + if (action) 17.25 + setup_vector(vec, action); 17.26 #endif 17.27 - } 17.28 } 17.29 17.30 #ifdef XEN 17.31 -int request_irq(unsigned int irq, 17.32 +int request_irq_vector(unsigned int vector, 17.33 void (*handler)(int, void *, struct cpu_user_regs *), 17.34 unsigned long irqflags, const char * devname, void *dev_id) 17.35 { 17.36 @@ -279,7 +283,7 @@ int request_irq(unsigned int irq, 17.37 * otherwise we'll have trouble later trying to figure out 17.38 * which interrupt is which (messes up the interrupt freeing logic etc). 17.39 * */ 17.40 - if (irq >= NR_IRQS) 17.41 + if (vector >= NR_VECTORS) 17.42 return -EINVAL; 17.43 if (!handler) 17.44 return -EINVAL; 17.45 @@ -291,7 +295,7 @@ int request_irq(unsigned int irq, 17.46 action->handler = handler; 17.47 action->name = devname; 17.48 action->dev_id = dev_id; 17.49 - setup_vector(irq, action); 17.50 + setup_vector(vector, action); 17.51 if (retval) 17.52 xfree(action); 17.53
18.1 --- a/xen/arch/ia64/linux-xen/mca.c Fri Feb 13 10:56:01 2009 +0900 18.2 +++ b/xen/arch/ia64/linux-xen/mca.c Fri Feb 13 11:22:28 2009 +0900 18.3 @@ -114,7 +114,6 @@ extern void ia64_monarch_init_handler 18.4 extern void ia64_slave_init_handler (void); 18.5 #ifdef XEN 18.6 extern void setup_vector (unsigned int vec, struct irqaction *action); 18.7 -#define setup_irq(irq, action) setup_vector(irq, action) 18.8 #endif 18.9 18.10 static ia64_mc_info_t ia64_mc_info; 18.11 @@ -1931,12 +1930,18 @@ ia64_mca_late_init(void) 18.12 if (cpe_vector >= 0) { 18.13 /* If platform supports CPEI, enable the irq. */ 18.14 cpe_poll_enabled = 0; 18.15 +#ifndef XEN 18.16 for (irq = 0; irq < NR_IRQS; ++irq) 18.17 if (irq_to_vector(irq) == cpe_vector) { 18.18 desc = irq_descp(irq); 18.19 desc->status |= IRQ_PER_CPU; 18.20 - setup_irq(irq, &mca_cpe_irqaction); 18.21 + setup_vector(irq, &mca_cpe_irqaction); 18.22 } 18.23 +#else 18.24 + desc = irq_descp(cpe_vector); 18.25 + desc->status |= IRQ_PER_CPU; 18.26 + setup_vector(cpe_vector, &mca_cpe_irqaction); 18.27 +#endif 18.28 ia64_mca_register_cpev(cpe_vector); 18.29 IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); 18.30 } else {
19.1 --- a/xen/arch/ia64/xen/hypercall.c Fri Feb 13 10:56:01 2009 +0900 19.2 +++ b/xen/arch/ia64/xen/hypercall.c Fri Feb 13 11:22:28 2009 +0900 19.3 @@ -543,7 +543,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA 19.4 break; 19.5 irq_status_query.flags = 0; 19.6 /* Edge-triggered interrupts don't need an explicit unmask downcall. */ 19.7 - if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") ) 19.8 + if ( !strstr(irq_descp(irq)->handler->typename, "edge") ) 19.9 irq_status_query.flags |= XENIRQSTAT_needs_eoi; 19.10 ret = copy_to_guest(arg, &irq_status_query, 1) ? -EFAULT : 0; 19.11 break;
20.1 --- a/xen/arch/ia64/xen/irq.c Fri Feb 13 10:56:01 2009 +0900 20.2 +++ b/xen/arch/ia64/xen/irq.c Fri Feb 13 11:22:28 2009 +0900 20.3 @@ -228,11 +228,11 @@ out: 20.4 * disabled. 20.5 */ 20.6 20.7 -int setup_vector(unsigned int irq, struct irqaction * new) 20.8 +int setup_vector(unsigned int vector, struct irqaction * new) 20.9 { 20.10 unsigned long flags; 20.11 struct irqaction *old, **p; 20.12 - irq_desc_t *desc = irq_descp(irq); 20.13 + irq_desc_t *desc = irq_descp(vector); 20.14 20.15 /* 20.16 * The following block of code has to be executed atomically 20.17 @@ -248,8 +248,8 @@ int setup_vector(unsigned int irq, struc 20.18 20.19 desc->depth = 0; 20.20 desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_GUEST); 20.21 - desc->handler->startup(irq); 20.22 - desc->handler->enable(irq); 20.23 + desc->handler->startup(vector); 20.24 + desc->handler->enable(vector); 20.25 spin_unlock_irqrestore(&desc->lock,flags); 20.26 20.27 return 0; 20.28 @@ -258,13 +258,11 @@ int setup_vector(unsigned int irq, struc 20.29 /* Vectors reserved by xen (and thus not sharable with domains). */ 20.30 unsigned long ia64_xen_vector[BITS_TO_LONGS(NR_IRQS)]; 20.31 20.32 -int setup_irq(unsigned int irq, struct irqaction * new) 20.33 +int setup_irq_vector(unsigned int vec, struct irqaction * new) 20.34 { 20.35 - unsigned int vec; 20.36 int res; 20.37 20.38 - /* Get vector for IRQ. */ 20.39 - if (acpi_gsi_to_irq (irq, &vec) < 0) 20.40 + if ( vec == IA64_INVALID_VECTOR ) 20.41 return -ENOSYS; 20.42 /* Reserve the vector (and thus the irq). */ 20.43 if (test_and_set_bit(vec, ia64_xen_vector)) 20.44 @@ -273,14 +271,12 @@ int setup_irq(unsigned int irq, struct i 20.45 return res; 20.46 } 20.47 20.48 -void free_irq(unsigned int irq) 20.49 +void release_irq_vector(unsigned int vec) 20.50 { 20.51 - unsigned int vec; 20.52 unsigned long flags; 20.53 irq_desc_t *desc; 20.54 20.55 - /* Get vector for IRQ. */ 20.56 - if (acpi_gsi_to_irq(irq, &vec) < 0) 20.57 + if ( vec == IA64_INVALID_VECTOR ) 20.58 return; 20.59 20.60 desc = irq_descp(vec);
21.1 --- a/xen/arch/x86/Makefile Fri Feb 13 10:56:01 2009 +0900 21.2 +++ b/xen/arch/x86/Makefile Fri Feb 13 11:22:28 2009 +0900 21.3 @@ -92,3 +92,4 @@ boot/mkelf32: boot/mkelf32.c 21.4 clean:: 21.5 rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32 21.6 rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d 21.7 + rm -f boot/reloc.S boot/reloc.lnk boot/reloc.bin
22.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Feb 13 10:56:01 2009 +0900 22.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Feb 13 11:22:28 2009 +0900 22.3 @@ -58,6 +58,9 @@ static struct acpi_cpufreq_data *drv_dat 22.4 22.5 static struct cpufreq_driver acpi_cpufreq_driver; 22.6 22.7 +static unsigned int __read_mostly acpi_pstate_strict; 22.8 +integer_param("acpi_pstate_strict", acpi_pstate_strict); 22.9 + 22.10 static int check_est_cpu(unsigned int cpuid) 22.11 { 22.12 struct cpuinfo_x86 *cpu = &cpu_data[cpuid]; 22.13 @@ -180,7 +183,7 @@ static void drv_read(struct drv_cmd *cmd 22.14 ASSERT(cpus_weight(cmd->mask) == 1); 22.15 22.16 /* to reduce IPI for the sake of performance */ 22.17 - if (cpu_isset(smp_processor_id(), cmd->mask)) 22.18 + if (likely(cpu_isset(smp_processor_id(), cmd->mask))) 22.19 do_drv_read((void *)cmd); 22.20 else 22.21 on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1); 22.22 @@ -196,15 +199,16 @@ static u32 get_cur_val(cpumask_t mask) 22.23 struct cpufreq_policy *policy; 22.24 struct processor_performance *perf; 22.25 struct drv_cmd cmd; 22.26 - unsigned int cpu; 22.27 + unsigned int cpu = smp_processor_id(); 22.28 22.29 if (unlikely(cpus_empty(mask))) 22.30 return 0; 22.31 22.32 - cpu = first_cpu(mask); 22.33 + if (!cpu_isset(cpu, mask)) 22.34 + cpu = first_cpu(mask); 22.35 policy = cpufreq_cpu_policy[cpu]; 22.36 22.37 - if (!policy) 22.38 + if (cpu >= NR_CPUS || !policy || !drv_data[policy->cpu]) 22.39 return 0; 22.40 22.41 switch (drv_data[policy->cpu]->cpu_feature) { 22.42 @@ -214,7 +218,7 @@ static u32 get_cur_val(cpumask_t mask) 22.43 break; 22.44 case SYSTEM_IO_CAPABLE: 22.45 cmd.type = SYSTEM_IO_CAPABLE; 22.46 - perf = drv_data[first_cpu(mask)]->acpi_data; 22.47 + perf = drv_data[policy->cpu]->acpi_data; 22.48 cmd.addr.io.port = perf->control_register.address; 22.49 cmd.addr.io.bit_width = perf->control_register.bit_width; 22.50 break; 22.51 @@ -393,7 +397,7 @@ static int acpi_cpufreq_target(struct cp 22.52 22.53 drv_write(&cmd); 22.54 22.55 - if (!check_freqs(cmd.mask, freqs.new, data)) { 22.56 + if (acpi_pstate_strict && !check_freqs(cmd.mask, freqs.new, data)) { 22.57 printk(KERN_WARNING "Fail transfer to new freq %d\n", freqs.new); 22.58 return -EAGAIN; 22.59 }
23.1 --- a/xen/arch/x86/acpi/suspend.c Fri Feb 13 10:56:01 2009 +0900 23.2 +++ b/xen/arch/x86/acpi/suspend.c Fri Feb 13 11:22:28 2009 +0900 23.3 @@ -65,6 +65,9 @@ void restore_rest_processor_state(void) 23.4 /* Reload FPU state on next FPU use. */ 23.5 stts(); 23.6 23.7 + if (cpu_has_pat) 23.8 + wrmsrl(MSR_IA32_CR_PAT, host_pat); 23.9 + 23.10 mtrr_ap_init(); 23.11 mcheck_init(&boot_cpu_data); 23.12 }
24.1 --- a/xen/arch/x86/boot/Makefile Fri Feb 13 10:56:01 2009 +0900 24.2 +++ b/xen/arch/x86/boot/Makefile Fri Feb 13 11:22:28 2009 +0900 24.3 @@ -1,1 +1,7 @@ 24.4 obj-y += head.o 24.5 + 24.6 +head.o: reloc.S 24.7 + 24.8 +# NB. BOOT_TRAMPOLINE == 0x8c000 24.9 +%.S: %.c 24.10 + RELOC=0x8c000 $(MAKE) -f build32.mk $@
25.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 25.2 +++ b/xen/arch/x86/boot/build32.mk Fri Feb 13 11:22:28 2009 +0900 25.3 @@ -0,0 +1,24 @@ 25.4 +XEN_ROOT=../../../.. 25.5 +override XEN_TARGET_ARCH=x86_32 25.6 +CFLAGS = 25.7 +include $(XEN_ROOT)/Config.mk 25.8 + 25.9 +# Disable PIE/SSP if GCC supports them. They can break us. 25.10 +$(call cc-option-add,CFLAGS,CC,-nopie) 25.11 +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector) 25.12 +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all) 25.13 + 25.14 +CFLAGS += -Werror -fno-builtin -msoft-float 25.15 + 25.16 +%.S: %.bin 25.17 + (od -v -t x $< | head -n -1 | \ 25.18 + sed 's/ /,0x/g' | sed 's/^[0-9]*,/ .long /') >$@ 25.19 + 25.20 +%.bin: %.lnk 25.21 + $(OBJCOPY) -O binary $< $@ 25.22 + 25.23 +%.lnk: %.o 25.24 + $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x8c000 -o $@ $< 25.25 + 25.26 +%.o: %.c 25.27 + $(CC) $(CFLAGS) -c $< -o $@
26.1 --- a/xen/arch/x86/boot/head.S Fri Feb 13 10:56:01 2009 +0900 26.2 +++ b/xen/arch/x86/boot/head.S Fri Feb 13 11:22:28 2009 +0900 26.3 @@ -79,8 +79,11 @@ gdt_boot_descr: 26.4 cmp $0x2BADB002,%eax 26.5 jne not_multiboot 26.6 26.7 - /* Save the Multiboot info structure for later use. */ 26.8 - mov %ebx,sym_phys(multiboot_ptr) 26.9 + /* Save the Multiboot info struct (after relocation) for later use. */ 26.10 + mov $sym_phys(cpu0_stack)+1024,%esp 26.11 + push %ebx 26.12 + call reloc 26.13 + mov %eax,sym_phys(multiboot_ptr) 26.14 26.15 /* Initialize BSS (no nasty surprises!) */ 26.16 mov $sym_phys(__bss_start),%edi 26.17 @@ -192,6 +195,9 @@ 2: cmp $L1_PAGETABLE_ENTRIES,%e 26.18 26.19 #include "cmdline.S" 26.20 26.21 +reloc: 26.22 +#include "reloc.S" 26.23 + 26.24 .align 16 26.25 .globl trampoline_start, trampoline_end 26.26 trampoline_start:
27.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 27.2 +++ b/xen/arch/x86/boot/reloc.c Fri Feb 13 11:22:28 2009 +0900 27.3 @@ -0,0 +1,89 @@ 27.4 +/****************************************************************************** 27.5 + * reloc.c 27.6 + * 27.7 + * 32-bit flat memory-map routines for relocating Multiboot structures 27.8 + * and modules. This is most easily done early with paging disabled. 27.9 + * 27.10 + * Copyright (c) 2009, Citrix Systems, Inc. 27.11 + * 27.12 + * Authors: 27.13 + * Keir Fraser <keir.fraser@citrix.com> 27.14 + */ 27.15 + 27.16 +asm ( 27.17 + " .text \n" 27.18 + " .globl _start \n" 27.19 + "_start: \n" 27.20 + " mov $_start,%edi \n" 27.21 + " call 1f \n" 27.22 + "1: pop %esi \n" 27.23 + " sub $1b-_start,%esi \n" 27.24 + " mov $__bss_start-_start,%ecx \n" 27.25 + " rep movsb \n" 27.26 + " xor %eax,%eax \n" 27.27 + " mov $_end,%ecx \n" 27.28 + " sub %edi,%ecx \n" 27.29 + " rep stosb \n" 27.30 + " mov $reloc,%eax \n" 27.31 + " jmp *%eax \n" 27.32 + ); 27.33 + 27.34 +typedef unsigned int u32; 27.35 +#include "../../../include/xen/multiboot.h" 27.36 + 27.37 +extern char _start[]; 27.38 + 27.39 +static void *memcpy(void *dest, const void *src, unsigned int n) 27.40 +{ 27.41 + char *s = (char *)src, *d = dest; 27.42 + while ( n-- ) 27.43 + *d++ = *s++; 27.44 + return dest; 27.45 +} 27.46 + 27.47 +static void *reloc_mbi_struct(void *old, unsigned int bytes) 27.48 +{ 27.49 + static void *alloc = &_start; 27.50 + alloc = (void *)(((unsigned long)alloc - bytes) & ~15ul); 27.51 + return memcpy(alloc, old, bytes); 27.52 +} 27.53 + 27.54 +static char *reloc_mbi_string(char *old) 27.55 +{ 27.56 + char *p; 27.57 + for ( p = old; *p != '\0'; p++ ) 27.58 + continue; 27.59 + return reloc_mbi_struct(old, p - old + 1); 27.60 +} 27.61 + 27.62 +multiboot_info_t *reloc(multiboot_info_t *mbi_old) 27.63 +{ 27.64 + multiboot_info_t *mbi = reloc_mbi_struct(mbi_old, sizeof(*mbi)); 27.65 + int i; 27.66 + 27.67 + if ( mbi->flags & MBI_CMDLINE ) 27.68 + mbi->cmdline = (u32)reloc_mbi_string((char *)mbi->cmdline); 27.69 + 27.70 + if ( mbi->flags & MBI_MODULES ) 27.71 + { 27.72 + module_t *mods = reloc_mbi_struct( 27.73 + (module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t)); 27.74 + mbi->mods_addr = (u32)mods; 27.75 + for ( i = 0; i < mbi->mods_count; i++ ) 27.76 + if ( mods[i].string ) 27.77 + mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string); 27.78 + } 27.79 + 27.80 + if ( mbi->flags & MBI_MEMMAP ) 27.81 + mbi->mmap_addr = (u32)reloc_mbi_struct( 27.82 + (memory_map_t *)mbi->mmap_addr, mbi->mmap_length); 27.83 + 27.84 + /* Mask features we don't understand or don't relocate. */ 27.85 + mbi->flags &= (MBI_MEMLIMITS | 27.86 + MBI_DRIVES | 27.87 + MBI_CMDLINE | 27.88 + MBI_MODULES | 27.89 + MBI_MEMMAP); 27.90 + 27.91 + return mbi; 27.92 +}
28.1 --- a/xen/arch/x86/cpu/mcheck/amd_k8.c Fri Feb 13 10:56:01 2009 +0900 28.2 +++ b/xen/arch/x86/cpu/mcheck/amd_k8.c Fri Feb 13 11:22:28 2009 +0900 28.3 @@ -99,6 +99,8 @@ void k8_machine_check(struct cpu_user_re 28.4 28.5 mc_data = x86_mcinfo_getptr(); 28.6 cpu_nr = smp_processor_id(); 28.7 + BUG_ON(cpu_nr != vcpu->processor); 28.8 + 28.9 curdom = vcpu->domain; 28.10 28.11 memset(&mc_global, 0, sizeof(mc_global)); 28.12 @@ -106,14 +108,12 @@ void k8_machine_check(struct cpu_user_re 28.13 mc_global.common.size = sizeof(mc_global); 28.14 28.15 mc_global.mc_domid = curdom->domain_id; /* impacted domain */ 28.16 - mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */ 28.17 - BUG_ON(cpu_nr != vcpu->processor); 28.18 - mc_global.mc_core_threadid = 0; 28.19 + 28.20 + x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid, 28.21 + &mc_global.mc_coreid, &mc_global.mc_core_threadid, 28.22 + &mc_global.mc_apicid, NULL, NULL, NULL); 28.23 + 28.24 mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */ 28.25 -#if 0 /* TODO: on which socket is this physical core? 28.26 - It's not clear to me how to figure this out. */ 28.27 - mc_global.mc_socketid = ???; 28.28 -#endif 28.29 mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE; 28.30 rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus); 28.31
29.1 --- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Fri Feb 13 10:56:01 2009 +0900 29.2 +++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c Fri Feb 13 11:22:28 2009 +0900 29.3 @@ -95,6 +95,7 @@ void mce_amd_checkregs(void *info) 29.4 mc_data = NULL; 29.5 29.6 cpu_nr = smp_processor_id(); 29.7 + BUG_ON(cpu_nr != vcpu->processor); 29.8 event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA); 29.9 error_found = 0; 29.10 29.11 @@ -103,14 +104,12 @@ void mce_amd_checkregs(void *info) 29.12 mc_global.common.size = sizeof(mc_global); 29.13 29.14 mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */ 29.15 - mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */ 29.16 - BUG_ON(cpu_nr != vcpu->processor); 29.17 - mc_global.mc_core_threadid = 0; 29.18 mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */ 29.19 -#if 0 /* TODO: on which socket is this physical core? 29.20 - It's not clear to me how to figure this out. */ 29.21 - mc_global.mc_socketid = ???; 29.22 -#endif 29.23 + 29.24 + x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid, 29.25 + &mc_global.mc_coreid, &mc_global.mc_core_threadid, 29.26 + &mc_global.mc_apicid, NULL, NULL, NULL); 29.27 + 29.28 mc_global.mc_flags |= MC_FLAG_CORRECTABLE; 29.29 rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus); 29.30
30.1 --- a/xen/arch/x86/cpu/mcheck/mce.c Fri Feb 13 10:56:01 2009 +0900 30.2 +++ b/xen/arch/x86/cpu/mcheck/mce.c Fri Feb 13 11:22:28 2009 +0900 30.3 @@ -443,6 +443,96 @@ next: 30.4 30.5 30.6 30.7 +static void do_mc_get_cpu_info(void *v) 30.8 +{ 30.9 + int cpu = smp_processor_id(); 30.10 + int cindex, cpn; 30.11 + struct cpuinfo_x86 *c; 30.12 + xen_mc_logical_cpu_t *log_cpus, *xcp; 30.13 + uint32_t junk, ebx; 30.14 + 30.15 + log_cpus = v; 30.16 + c = &cpu_data[cpu]; 30.17 + cindex = 0; 30.18 + cpn = cpu - 1; 30.19 + 30.20 + /* 30.21 + * Deal with sparse masks, condensed into a contig array. 30.22 + */ 30.23 + while (cpn >= 0) { 30.24 + if (cpu_isset(cpn, cpu_online_map)) 30.25 + cindex++; 30.26 + cpn--; 30.27 + } 30.28 + 30.29 + xcp = &log_cpus[cindex]; 30.30 + c = &cpu_data[cpu]; 30.31 + xcp->mc_cpunr = cpu; 30.32 + x86_mc_get_cpu_info(cpu, &xcp->mc_chipid, 30.33 + &xcp->mc_coreid, &xcp->mc_threadid, 30.34 + &xcp->mc_apicid, &xcp->mc_ncores, 30.35 + &xcp->mc_ncores_active, &xcp->mc_nthreads); 30.36 + xcp->mc_cpuid_level = c->cpuid_level; 30.37 + xcp->mc_family = c->x86; 30.38 + xcp->mc_vendor = c->x86_vendor; 30.39 + xcp->mc_model = c->x86_model; 30.40 + xcp->mc_step = c->x86_mask; 30.41 + xcp->mc_cache_size = c->x86_cache_size; 30.42 + xcp->mc_cache_alignment = c->x86_cache_alignment; 30.43 + memcpy(xcp->mc_vendorid, c->x86_vendor_id, sizeof xcp->mc_vendorid); 30.44 + memcpy(xcp->mc_brandid, c->x86_model_id, sizeof xcp->mc_brandid); 30.45 + memcpy(xcp->mc_cpu_caps, c->x86_capability, sizeof xcp->mc_cpu_caps); 30.46 + 30.47 + /* 30.48 + * This part needs to run on the CPU itself. 30.49 + */ 30.50 + xcp->mc_nmsrvals = __MC_NMSRS; 30.51 + xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP; 30.52 + rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value); 30.53 + 30.54 + if (c->cpuid_level >= 1) { 30.55 + cpuid(1, &junk, &ebx, &junk, &junk); 30.56 + xcp->mc_clusterid = (ebx >> 24) & 0xff; 30.57 + } else 30.58 + xcp->mc_clusterid = hard_smp_processor_id(); 30.59 +} 30.60 + 30.61 + 30.62 +void x86_mc_get_cpu_info(unsigned cpu, uint32_t *chipid, uint16_t *coreid, 30.63 + uint16_t *threadid, uint32_t *apicid, 30.64 + unsigned *ncores, unsigned *ncores_active, 30.65 + unsigned *nthreads) 30.66 +{ 30.67 + struct cpuinfo_x86 *c; 30.68 + 30.69 + *apicid = cpu_physical_id(cpu); 30.70 + c = &cpu_data[cpu]; 30.71 + if (c->apicid == BAD_APICID) { 30.72 + *chipid = cpu; 30.73 + *coreid = 0; 30.74 + *threadid = 0; 30.75 + if (ncores != NULL) 30.76 + *ncores = 1; 30.77 + if (ncores_active != NULL) 30.78 + *ncores_active = 1; 30.79 + if (nthreads != NULL) 30.80 + *nthreads = 1; 30.81 + } else { 30.82 + *chipid = phys_proc_id[cpu]; 30.83 + if (c->x86_max_cores > 1) 30.84 + *coreid = cpu_core_id[cpu]; 30.85 + else 30.86 + *coreid = 0; 30.87 + *threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1); 30.88 + if (ncores != NULL) 30.89 + *ncores = c->x86_max_cores; 30.90 + if (ncores_active != NULL) 30.91 + *ncores_active = c->booted_cores; 30.92 + if (nthreads != NULL) 30.93 + *nthreads = c->x86_num_siblings; 30.94 + } 30.95 +} 30.96 + 30.97 /* Machine Check Architecture Hypercall */ 30.98 long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc) 30.99 { 30.100 @@ -452,6 +542,7 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u 30.101 struct domain *domU; 30.102 struct xen_mc_fetch *mc_fetch; 30.103 struct xen_mc_notifydomain *mc_notifydomain; 30.104 + struct xen_mc_physcpuinfo *mc_physcpuinfo; 30.105 struct mc_info *mi; 30.106 uint32_t flags; 30.107 uint32_t fetch_idx; 30.108 @@ -460,6 +551,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u 30.109 * a DomU to fetch mc data while Dom0 notifies another DomU. */ 30.110 static DEFINE_SPINLOCK(mc_lock); 30.111 static DEFINE_SPINLOCK(mc_notify_lock); 30.112 + int nlcpu; 30.113 + xen_mc_logical_cpu_t *log_cpus = NULL; 30.114 30.115 if ( copy_from_guest(op, u_xen_mc, 1) ) 30.116 return -EFAULT; 30.117 @@ -580,6 +673,43 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u 30.118 30.119 spin_unlock(&mc_notify_lock); 30.120 break; 30.121 + 30.122 + case XEN_MC_physcpuinfo: 30.123 + if ( !IS_PRIV(v->domain) ) 30.124 + return -EPERM; 30.125 + 30.126 + mc_physcpuinfo = &op->u.mc_physcpuinfo; 30.127 + nlcpu = num_online_cpus(); 30.128 + 30.129 + if (!guest_handle_is_null(mc_physcpuinfo->info)) { 30.130 + if (mc_physcpuinfo->ncpus <= 0) 30.131 + return -EINVAL; 30.132 + nlcpu = min(nlcpu, (int)mc_physcpuinfo->ncpus); 30.133 + log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu); 30.134 + if (log_cpus == NULL) 30.135 + return -ENOMEM; 30.136 + 30.137 + if (on_each_cpu(do_mc_get_cpu_info, log_cpus, 30.138 + 1, 1) != 0) { 30.139 + xfree(log_cpus); 30.140 + return -EIO; 30.141 + } 30.142 + } 30.143 + 30.144 + mc_physcpuinfo->ncpus = nlcpu; 30.145 + 30.146 + if (copy_to_guest(u_xen_mc, op, 1)) { 30.147 + if (log_cpus != NULL) 30.148 + xfree(log_cpus); 30.149 + return -EFAULT; 30.150 + } 30.151 + 30.152 + if (!guest_handle_is_null(mc_physcpuinfo->info)) { 30.153 + if (copy_to_guest(mc_physcpuinfo->info, 30.154 + log_cpus, nlcpu)) 30.155 + ret = -EFAULT; 30.156 + xfree(log_cpus); 30.157 + } 30.158 } 30.159 30.160 return ret;
31.1 --- a/xen/arch/x86/cpu/mcheck/mce.h Fri Feb 13 10:56:01 2009 +0900 31.2 +++ b/xen/arch/x86/cpu/mcheck/mce.h Fri Feb 13 11:22:28 2009 +0900 31.3 @@ -34,4 +34,5 @@ void x86_mcinfo_clear(struct mc_info *mi 31.4 int x86_mcinfo_add(struct mc_info *mi, void *mcinfo); 31.5 void x86_mcinfo_dump(struct mc_info *mi); 31.6 void mc_panic(char *s); 31.7 - 31.8 +void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *, 31.9 + uint32_t *, uint32_t *, uint32_t *, uint32_t *);
32.1 --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 13 10:56:01 2009 +0900 32.2 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Fri Feb 13 11:22:28 2009 +0900 32.3 @@ -182,11 +182,9 @@ static struct mc_info *machine_check_pol 32.4 mcg.mc_flags = MC_FLAG_POLLED; 32.5 else if (calltype == MC_FLAG_CMCI) 32.6 mcg.mc_flags = MC_FLAG_CMCI; 32.7 - mcg.mc_socketid = phys_proc_id[cpu]; 32.8 - mcg.mc_coreid = cpu_core_id[cpu]; 32.9 - mcg.mc_apicid = cpu_physical_id(cpu); 32.10 - mcg.mc_core_threadid = 32.11 - mcg.mc_apicid & ( 1 << (cpu_data[cpu].x86_num_siblings - 1)); 32.12 + x86_mc_get_cpu_info( 32.13 + cpu, &mcg.mc_socketid, &mcg.mc_coreid, 32.14 + &mcg.mc_core_threadid, &mcg.mc_apicid, NULL, NULL, NULL); 32.15 rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus); 32.16 32.17 for ( i = 0; i < nr_mce_banks; i++ ) {
33.1 --- a/xen/arch/x86/domain.c Fri Feb 13 10:56:01 2009 +0900 33.2 +++ b/xen/arch/x86/domain.c Fri Feb 13 11:22:28 2009 +0900 33.3 @@ -141,7 +141,7 @@ void dump_pageframe_info(struct domain * 33.4 } 33.5 else 33.6 { 33.7 - list_for_each_entry ( page, &d->page_list, list ) 33.8 + page_list_for_each ( page, &d->page_list ) 33.9 { 33.10 printk(" DomPage %p: caf=%08lx, taf=%" PRtype_info "\n", 33.11 _p(page_to_mfn(page)), 33.12 @@ -154,7 +154,7 @@ void dump_pageframe_info(struct domain * 33.13 p2m_pod_dump_data(d); 33.14 } 33.15 33.16 - list_for_each_entry ( page, &d->xenpage_list, list ) 33.17 + page_list_for_each ( page, &d->xenpage_list ) 33.18 { 33.19 printk(" XenPage %p: caf=%08lx, taf=%" PRtype_info "\n", 33.20 _p(page_to_mfn(page)), 33.21 @@ -352,6 +352,8 @@ int vcpu_initialise(struct vcpu *v) 33.22 v->arch.perdomain_ptes = 33.23 d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT); 33.24 33.25 + spin_lock_init(&v->arch.shadow_ldt_lock); 33.26 + 33.27 return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0); 33.28 } 33.29 33.30 @@ -380,7 +382,7 @@ int arch_domain_create(struct domain *d, 33.31 INIT_LIST_HEAD(&d->arch.pdev_list); 33.32 33.33 d->arch.relmem = RELMEM_not_started; 33.34 - INIT_LIST_HEAD(&d->arch.relmem_list); 33.35 + INIT_PAGE_LIST_HEAD(&d->arch.relmem_list); 33.36 33.37 pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t)); 33.38 d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order, 0); 33.39 @@ -1655,9 +1657,8 @@ int hypercall_xlat_continuation(unsigned 33.40 #endif 33.41 33.42 static int relinquish_memory( 33.43 - struct domain *d, struct list_head *list, unsigned long type) 33.44 + struct domain *d, struct page_list_head *list, unsigned long type) 33.45 { 33.46 - struct list_head *ent; 33.47 struct page_info *page; 33.48 unsigned long x, y; 33.49 int ret = 0; 33.50 @@ -1665,17 +1666,13 @@ static int relinquish_memory( 33.51 /* Use a recursive lock, as we may enter 'free_domheap_page'. */ 33.52 spin_lock_recursive(&d->page_alloc_lock); 33.53 33.54 - ent = list->next; 33.55 - while ( ent != list ) 33.56 + while ( (page = page_list_remove_head(list)) ) 33.57 { 33.58 - page = list_entry(ent, struct page_info, list); 33.59 - 33.60 /* Grab a reference to the page so it won't disappear from under us. */ 33.61 if ( unlikely(!get_page(page, d)) ) 33.62 { 33.63 /* Couldn't get a reference -- someone is freeing this page. */ 33.64 - ent = ent->next; 33.65 - list_move_tail(&page->list, &d->arch.relmem_list); 33.66 + page_list_add_tail(page, &d->arch.relmem_list); 33.67 continue; 33.68 } 33.69 33.70 @@ -1687,6 +1684,7 @@ static int relinquish_memory( 33.71 break; 33.72 case -EAGAIN: 33.73 case -EINTR: 33.74 + page_list_add(page, list); 33.75 set_bit(_PGT_pinned, &page->u.inuse.type_info); 33.76 put_page(page); 33.77 goto out; 33.78 @@ -1723,6 +1721,7 @@ static int relinquish_memory( 33.79 case 0: 33.80 break; 33.81 case -EINTR: 33.82 + page_list_add(page, list); 33.83 page->u.inuse.type_info |= PGT_validated; 33.84 if ( x & PGT_partial ) 33.85 put_page(page); 33.86 @@ -1730,6 +1729,7 @@ static int relinquish_memory( 33.87 ret = -EAGAIN; 33.88 goto out; 33.89 case -EAGAIN: 33.90 + page_list_add(page, list); 33.91 page->u.inuse.type_info |= PGT_partial; 33.92 if ( x & PGT_partial ) 33.93 put_page(page); 33.94 @@ -1746,9 +1746,8 @@ static int relinquish_memory( 33.95 } 33.96 } 33.97 33.98 - /* Follow the list chain and /then/ potentially free the page. */ 33.99 - ent = ent->next; 33.100 - list_move_tail(&page->list, &d->arch.relmem_list); 33.101 + /* Put the page on the list and /then/ potentially free it. */ 33.102 + page_list_add_tail(page, &d->arch.relmem_list); 33.103 put_page(page); 33.104 33.105 if ( hypercall_preempt_check() ) 33.106 @@ -1758,7 +1757,12 @@ static int relinquish_memory( 33.107 } 33.108 } 33.109 33.110 - list_splice_init(&d->arch.relmem_list, list); 33.111 + /* list is empty at this point. */ 33.112 + if ( !page_list_empty(&d->arch.relmem_list) ) 33.113 + { 33.114 + *list = d->arch.relmem_list; 33.115 + INIT_PAGE_LIST_HEAD(&d->arch.relmem_list); 33.116 + } 33.117 33.118 out: 33.119 spin_unlock_recursive(&d->page_alloc_lock);
34.1 --- a/xen/arch/x86/domain_build.c Fri Feb 13 10:56:01 2009 +0900 34.2 +++ b/xen/arch/x86/domain_build.c Fri Feb 13 11:22:28 2009 +0900 34.3 @@ -880,7 +880,7 @@ int __init construct_dom0( 34.4 } 34.5 si->first_p2m_pfn = pfn; 34.6 si->nr_p2m_frames = d->tot_pages - count; 34.7 - list_for_each_entry ( page, &d->page_list, list ) 34.8 + page_list_for_each ( page, &d->page_list ) 34.9 { 34.10 mfn = page_to_mfn(page); 34.11 if ( get_gpfn_from_mfn(mfn) >= count )
35.1 --- a/xen/arch/x86/domctl.c Fri Feb 13 10:56:01 2009 +0900 35.2 +++ b/xen/arch/x86/domctl.c Fri Feb 13 11:22:28 2009 +0900 35.3 @@ -240,7 +240,7 @@ long arch_do_domctl( 35.4 struct domain *d = rcu_lock_domain_by_id(domctl->domain); 35.5 unsigned long max_pfns = domctl->u.getmemlist.max_pfns; 35.6 uint64_t mfn; 35.7 - struct list_head *list_ent; 35.8 + struct page_info *page; 35.9 35.10 ret = -EINVAL; 35.11 if ( d != NULL ) 35.12 @@ -259,19 +259,19 @@ long arch_do_domctl( 35.13 goto getmemlist_out; 35.14 } 35.15 35.16 - ret = 0; 35.17 - list_ent = d->page_list.next; 35.18 - for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ ) 35.19 + ret = i = 0; 35.20 + page_list_for_each(page, &d->page_list) 35.21 { 35.22 - mfn = page_to_mfn(list_entry( 35.23 - list_ent, struct page_info, list)); 35.24 + if ( i >= max_pfns ) 35.25 + break; 35.26 + mfn = page_to_mfn(page); 35.27 if ( copy_to_guest_offset(domctl->u.getmemlist.buffer, 35.28 i, &mfn, 1) ) 35.29 { 35.30 ret = -EFAULT; 35.31 break; 35.32 } 35.33 - list_ent = mfn_to_page(mfn)->list.next; 35.34 + ++i; 35.35 } 35.36 35.37 spin_unlock(&d->page_alloc_lock); 35.38 @@ -417,6 +417,34 @@ long arch_do_domctl( 35.39 } 35.40 break; 35.41 35.42 + case XEN_DOMCTL_gethvmcontext_partial: 35.43 + { 35.44 + struct domain *d; 35.45 + 35.46 + ret = -ESRCH; 35.47 + if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL ) 35.48 + break; 35.49 + 35.50 + ret = xsm_hvmcontext(d, domctl->cmd); 35.51 + if ( ret ) 35.52 + goto gethvmcontext_partial_out; 35.53 + 35.54 + ret = -EINVAL; 35.55 + if ( !is_hvm_domain(d) ) 35.56 + goto gethvmcontext_partial_out; 35.57 + 35.58 + domain_pause(d); 35.59 + ret = hvm_save_one(d, domctl->u.hvmcontext_partial.type, 35.60 + domctl->u.hvmcontext_partial.instance, 35.61 + domctl->u.hvmcontext_partial.buffer); 35.62 + domain_unpause(d); 35.63 + 35.64 + gethvmcontext_partial_out: 35.65 + rcu_unlock_domain(d); 35.66 + } 35.67 + break; 35.68 + 35.69 + 35.70 case XEN_DOMCTL_set_address_size: 35.71 { 35.72 struct domain *d;
36.1 --- a/xen/arch/x86/e820.c Fri Feb 13 10:56:01 2009 +0900 36.2 +++ b/xen/arch/x86/e820.c Fri Feb 13 11:22:28 2009 +0900 36.3 @@ -1,10 +1,10 @@ 36.4 #include <xen/config.h> 36.5 #include <xen/init.h> 36.6 #include <xen/lib.h> 36.7 +#include <xen/mm.h> 36.8 #include <xen/compat.h> 36.9 #include <xen/dmi.h> 36.10 #include <asm/e820.h> 36.11 -#include <asm/mm.h> 36.12 #include <asm/page.h> 36.13 36.14 /* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */
37.1 --- a/xen/arch/x86/i8259.c Fri Feb 13 10:56:01 2009 +0900 37.2 +++ b/xen/arch/x86/i8259.c Fri Feb 13 11:22:28 2009 +0900 37.3 @@ -410,8 +410,8 @@ void __init init_IRQ(void) 37.4 } 37.5 37.6 /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */ 37.7 - vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN; 37.8 - vector_irq[0x80] = NEVER_ASSIGN; 37.9 + vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN_IRQ; 37.10 + vector_irq[0x80] = NEVER_ASSIGN_IRQ; 37.11 37.12 apic_intr_init(); 37.13
38.1 --- a/xen/arch/x86/io_apic.c Fri Feb 13 10:56:01 2009 +0900 38.2 +++ b/xen/arch/x86/io_apic.c Fri Feb 13 11:22:28 2009 +0900 38.3 @@ -49,7 +49,6 @@ atomic_t irq_mis_count; 38.4 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 38.5 38.6 static DEFINE_SPINLOCK(ioapic_lock); 38.7 -static DEFINE_SPINLOCK(vector_lock); 38.8 38.9 int skip_ioapic_setup; 38.10 38.11 @@ -89,9 +88,6 @@ static struct irq_pin_list { 38.12 }; 38.13 static int irq_2_pin_free_entry = NR_IRQS; 38.14 38.15 -int vector_irq[NR_VECTORS] __read_mostly = { 38.16 - [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN}; 38.17 - 38.18 /* 38.19 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 38.20 * shared ISA-space IRQs, so we have to support them. We are super 38.21 @@ -669,56 +665,6 @@ static inline int IO_APIC_irq_trigger(in 38.22 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ 38.23 u8 irq_vector[NR_IRQS] __read_mostly; 38.24 38.25 -int free_irq_vector(int vector) 38.26 -{ 38.27 - int irq; 38.28 - 38.29 - BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR)); 38.30 - 38.31 - spin_lock(&vector_lock); 38.32 - if ((irq = vector_irq[vector]) == AUTO_ASSIGN) 38.33 - vector_irq[vector] = FREE_TO_ASSIGN; 38.34 - spin_unlock(&vector_lock); 38.35 - 38.36 - return (irq == AUTO_ASSIGN) ? 0 : -EINVAL; 38.37 -} 38.38 - 38.39 -int assign_irq_vector(int irq) 38.40 -{ 38.41 - static unsigned current_vector = FIRST_DYNAMIC_VECTOR; 38.42 - unsigned vector; 38.43 - 38.44 - BUG_ON(irq >= NR_IRQS); 38.45 - 38.46 - spin_lock(&vector_lock); 38.47 - 38.48 - if ((irq != AUTO_ASSIGN) && (IO_APIC_VECTOR(irq) > 0)) { 38.49 - spin_unlock(&vector_lock); 38.50 - return IO_APIC_VECTOR(irq); 38.51 - } 38.52 - 38.53 - vector = current_vector; 38.54 - while (vector_irq[vector] != FREE_TO_ASSIGN) { 38.55 - vector += 8; 38.56 - if (vector > LAST_DYNAMIC_VECTOR) 38.57 - vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7); 38.58 - 38.59 - if (vector == current_vector) { 38.60 - spin_unlock(&vector_lock); 38.61 - return -ENOSPC; 38.62 - } 38.63 - } 38.64 - 38.65 - current_vector = vector; 38.66 - vector_irq[vector] = irq; 38.67 - if (irq != AUTO_ASSIGN) 38.68 - IO_APIC_VECTOR(irq) = vector; 38.69 - 38.70 - spin_unlock(&vector_lock); 38.71 - 38.72 - return vector; 38.73 -} 38.74 - 38.75 static struct hw_interrupt_type ioapic_level_type; 38.76 static struct hw_interrupt_type ioapic_edge_type; 38.77
39.1 --- a/xen/arch/x86/irq.c Fri Feb 13 10:56:01 2009 +0900 39.2 +++ b/xen/arch/x86/irq.c Fri Feb 13 11:22:28 2009 +0900 39.3 @@ -27,6 +27,11 @@ boolean_param("noirqbalance", opt_noirqb 39.4 39.5 irq_desc_t irq_desc[NR_VECTORS]; 39.6 39.7 +static DEFINE_SPINLOCK(vector_lock); 39.8 +int vector_irq[NR_VECTORS] __read_mostly = { 39.9 + [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ 39.10 +}; 39.11 + 39.12 static void __do_IRQ_guest(int vector); 39.13 39.14 void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { } 39.15 @@ -54,6 +59,56 @@ struct hw_interrupt_type no_irq_type = { 39.16 39.17 atomic_t irq_err_count; 39.18 39.19 +int free_irq_vector(int vector) 39.20 +{ 39.21 + int irq; 39.22 + 39.23 + BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR)); 39.24 + 39.25 + spin_lock(&vector_lock); 39.26 + if ((irq = vector_irq[vector]) == AUTO_ASSIGN_IRQ) 39.27 + vector_irq[vector] = FREE_TO_ASSIGN_IRQ; 39.28 + spin_unlock(&vector_lock); 39.29 + 39.30 + return (irq == AUTO_ASSIGN_IRQ) ? 0 : -EINVAL; 39.31 +} 39.32 + 39.33 +int assign_irq_vector(int irq) 39.34 +{ 39.35 + static unsigned current_vector = FIRST_DYNAMIC_VECTOR; 39.36 + unsigned vector; 39.37 + 39.38 + BUG_ON(irq >= NR_IRQS); 39.39 + 39.40 + spin_lock(&vector_lock); 39.41 + 39.42 + if ((irq != AUTO_ASSIGN_IRQ) && (IO_APIC_VECTOR(irq) > 0)) { 39.43 + spin_unlock(&vector_lock); 39.44 + return IO_APIC_VECTOR(irq); 39.45 + } 39.46 + 39.47 + vector = current_vector; 39.48 + while (vector_irq[vector] != FREE_TO_ASSIGN_IRQ) { 39.49 + vector += 8; 39.50 + if (vector > LAST_DYNAMIC_VECTOR) 39.51 + vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7); 39.52 + 39.53 + if (vector == current_vector) { 39.54 + spin_unlock(&vector_lock); 39.55 + return -ENOSPC; 39.56 + } 39.57 + } 39.58 + 39.59 + current_vector = vector; 39.60 + vector_irq[vector] = irq; 39.61 + if (irq != AUTO_ASSIGN_IRQ) 39.62 + IO_APIC_VECTOR(irq) = vector; 39.63 + 39.64 + spin_unlock(&vector_lock); 39.65 + 39.66 + return vector; 39.67 +} 39.68 + 39.69 asmlinkage void do_IRQ(struct cpu_user_regs *regs) 39.70 { 39.71 unsigned int vector = regs->entry_vector; 39.72 @@ -104,7 +159,7 @@ asmlinkage void do_IRQ(struct cpu_user_r 39.73 spin_unlock(&desc->lock); 39.74 } 39.75 39.76 -int request_irq(unsigned int irq, 39.77 +int request_irq_vector(unsigned int vector, 39.78 void (*handler)(int, void *, struct cpu_user_regs *), 39.79 unsigned long irqflags, const char * devname, void *dev_id) 39.80 { 39.81 @@ -117,7 +172,7 @@ int request_irq(unsigned int irq, 39.82 * which interrupt is which (messes up the interrupt freeing 39.83 * logic etc). 39.84 */ 39.85 - if (irq >= NR_IRQS) 39.86 + if (vector >= NR_VECTORS) 39.87 return -EINVAL; 39.88 if (!handler) 39.89 return -EINVAL; 39.90 @@ -130,34 +185,32 @@ int request_irq(unsigned int irq, 39.91 action->name = devname; 39.92 action->dev_id = dev_id; 39.93 39.94 - retval = setup_irq(irq, action); 39.95 + retval = setup_irq_vector(vector, action); 39.96 if (retval) 39.97 xfree(action); 39.98 39.99 return retval; 39.100 } 39.101 39.102 -void free_irq(unsigned int irq) 39.103 +void release_irq_vector(unsigned int vector) 39.104 { 39.105 - unsigned int vector = irq_to_vector(irq); 39.106 - irq_desc_t *desc = &irq_desc[vector]; 39.107 + irq_desc_t *desc = &irq_desc[vector]; 39.108 unsigned long flags; 39.109 39.110 spin_lock_irqsave(&desc->lock,flags); 39.111 desc->action = NULL; 39.112 desc->depth = 1; 39.113 desc->status |= IRQ_DISABLED; 39.114 - desc->handler->shutdown(irq); 39.115 + desc->handler->shutdown(vector); 39.116 spin_unlock_irqrestore(&desc->lock,flags); 39.117 39.118 /* Wait to make sure it's not being used on another CPU */ 39.119 do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS ); 39.120 } 39.121 39.122 -int setup_irq(unsigned int irq, struct irqaction *new) 39.123 +int setup_irq_vector(unsigned int vector, struct irqaction *new) 39.124 { 39.125 - unsigned int vector = irq_to_vector(irq); 39.126 - irq_desc_t *desc = &irq_desc[vector]; 39.127 + irq_desc_t *desc = &irq_desc[vector]; 39.128 unsigned long flags; 39.129 39.130 spin_lock_irqsave(&desc->lock,flags);
40.1 --- a/xen/arch/x86/mm.c Fri Feb 13 10:56:01 2009 +0900 40.2 +++ b/xen/arch/x86/mm.c Fri Feb 13 11:22:28 2009 +0900 40.3 @@ -179,12 +179,6 @@ l2_pgentry_t *compat_idle_pg_table_l2 = 40.4 #define l3_disallow_mask(d) L3_DISALLOW_MASK 40.5 #endif 40.6 40.7 -static void queue_deferred_ops(struct domain *d, unsigned int ops) 40.8 -{ 40.9 - ASSERT(d == current->domain); 40.10 - this_cpu(percpu_mm_info).deferred_ops |= ops; 40.11 -} 40.12 - 40.13 void __init init_frametable(void) 40.14 { 40.15 unsigned long nr_pages, page_step, i, mfn; 40.16 @@ -333,7 +327,7 @@ void share_xen_page_with_guest( 40.17 page->count_info |= PGC_allocated | 1; 40.18 if ( unlikely(d->xenheap_pages++ == 0) ) 40.19 get_knownalive_domain(d); 40.20 - list_add_tail(&page->list, &d->xenpage_list); 40.21 + page_list_add_tail(page, &d->xenpage_list); 40.22 } 40.23 40.24 spin_unlock(&d->page_alloc_lock); 40.25 @@ -464,14 +458,18 @@ void update_cr3(struct vcpu *v) 40.26 } 40.27 40.28 40.29 -static void invalidate_shadow_ldt(struct vcpu *v) 40.30 +static void invalidate_shadow_ldt(struct vcpu *v, int flush) 40.31 { 40.32 int i; 40.33 unsigned long pfn; 40.34 struct page_info *page; 40.35 - 40.36 + 40.37 + BUG_ON(unlikely(in_irq())); 40.38 + 40.39 + spin_lock(&v->arch.shadow_ldt_lock); 40.40 + 40.41 if ( v->arch.shadow_ldt_mapcnt == 0 ) 40.42 - return; 40.43 + goto out; 40.44 40.45 v->arch.shadow_ldt_mapcnt = 0; 40.46 40.47 @@ -486,11 +484,12 @@ static void invalidate_shadow_ldt(struct 40.48 put_page_and_type(page); 40.49 } 40.50 40.51 - /* Dispose of the (now possibly invalid) mappings from the TLB. */ 40.52 - if ( v == current ) 40.53 - queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT); 40.54 - else 40.55 - flush_tlb_mask(v->domain->domain_dirty_cpumask); 40.56 + /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */ 40.57 + if ( flush ) 40.58 + flush_tlb_mask(v->vcpu_dirty_cpumask); 40.59 + 40.60 + out: 40.61 + spin_unlock(&v->arch.shadow_ldt_lock); 40.62 } 40.63 40.64 40.65 @@ -541,8 +540,10 @@ int map_ldt_shadow_page(unsigned int off 40.66 40.67 nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW); 40.68 40.69 + spin_lock(&v->arch.shadow_ldt_lock); 40.70 l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e); 40.71 v->arch.shadow_ldt_mapcnt++; 40.72 + spin_unlock(&v->arch.shadow_ldt_lock); 40.73 40.74 return 1; 40.75 } 40.76 @@ -989,7 +990,7 @@ void put_page_from_l1e(l1_pgentry_t l1e, 40.77 (d == e) ) 40.78 { 40.79 for_each_vcpu ( d, v ) 40.80 - invalidate_shadow_ldt(v); 40.81 + invalidate_shadow_ldt(v, 1); 40.82 } 40.83 put_page(page); 40.84 } 40.85 @@ -2023,30 +2024,17 @@ int free_page_type(struct page_info *pag 40.86 unsigned long gmfn; 40.87 int rc; 40.88 40.89 - if ( likely(owner != NULL) ) 40.90 + if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) ) 40.91 { 40.92 - /* 40.93 - * We have to flush before the next use of the linear mapping 40.94 - * (e.g., update_va_mapping()) or we could end up modifying a page 40.95 - * that is no longer a page table (and hence screw up ref counts). 40.96 - */ 40.97 - if ( current->domain == owner ) 40.98 - queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS); 40.99 - else 40.100 - flush_tlb_mask(owner->domain_dirty_cpumask); 40.101 - 40.102 - if ( unlikely(paging_mode_enabled(owner)) ) 40.103 - { 40.104 - /* A page table is dirtied when its type count becomes zero. */ 40.105 - paging_mark_dirty(owner, page_to_mfn(page)); 40.106 - 40.107 - if ( shadow_mode_refcounts(owner) ) 40.108 - return 0; 40.109 - 40.110 - gmfn = mfn_to_gmfn(owner, page_to_mfn(page)); 40.111 - ASSERT(VALID_M2P(gmfn)); 40.112 - shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn)); 40.113 - } 40.114 + /* A page table is dirtied when its type count becomes zero. */ 40.115 + paging_mark_dirty(owner, page_to_mfn(page)); 40.116 + 40.117 + if ( shadow_mode_refcounts(owner) ) 40.118 + return 0; 40.119 + 40.120 + gmfn = mfn_to_gmfn(owner, page_to_mfn(page)); 40.121 + ASSERT(VALID_M2P(gmfn)); 40.122 + shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn)); 40.123 } 40.124 40.125 if ( !(type & PGT_partial) ) 40.126 @@ -2366,8 +2354,8 @@ void cleanup_page_cacheattr(struct page_ 40.127 40.128 int new_guest_cr3(unsigned long mfn) 40.129 { 40.130 - struct vcpu *v = current; 40.131 - struct domain *d = v->domain; 40.132 + struct vcpu *curr = current; 40.133 + struct domain *d = curr->domain; 40.134 int okay; 40.135 unsigned long old_base_mfn; 40.136 40.137 @@ -2377,19 +2365,19 @@ int new_guest_cr3(unsigned long mfn) 40.138 okay = paging_mode_refcounts(d) 40.139 ? 0 /* Old code was broken, but what should it be? */ 40.140 : mod_l4_entry( 40.141 - __va(pagetable_get_paddr(v->arch.guest_table)), 40.142 + __va(pagetable_get_paddr(curr->arch.guest_table)), 40.143 l4e_from_pfn( 40.144 mfn, 40.145 (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)), 40.146 - pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0; 40.147 + pagetable_get_pfn(curr->arch.guest_table), 0, 0) == 0; 40.148 if ( unlikely(!okay) ) 40.149 { 40.150 MEM_LOG("Error while installing new compat baseptr %lx", mfn); 40.151 return 0; 40.152 } 40.153 40.154 - invalidate_shadow_ldt(v); 40.155 - write_ptbase(v); 40.156 + invalidate_shadow_ldt(curr, 0); 40.157 + write_ptbase(curr); 40.158 40.159 return 1; 40.160 } 40.161 @@ -2403,14 +2391,14 @@ int new_guest_cr3(unsigned long mfn) 40.162 return 0; 40.163 } 40.164 40.165 - invalidate_shadow_ldt(v); 40.166 - 40.167 - old_base_mfn = pagetable_get_pfn(v->arch.guest_table); 40.168 - 40.169 - v->arch.guest_table = pagetable_from_pfn(mfn); 40.170 - update_cr3(v); 40.171 - 40.172 - write_ptbase(v); 40.173 + invalidate_shadow_ldt(curr, 0); 40.174 + 40.175 + old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); 40.176 + 40.177 + curr->arch.guest_table = pagetable_from_pfn(mfn); 40.178 + update_cr3(curr); 40.179 + 40.180 + write_ptbase(curr); 40.181 40.182 if ( likely(old_base_mfn != 0) ) 40.183 { 40.184 @@ -2440,6 +2428,10 @@ static void process_deferred_ops(void) 40.185 flush_tlb_local(); 40.186 } 40.187 40.188 + /* 40.189 + * Do this after flushing TLBs, to ensure we see fresh LDT mappings 40.190 + * via the linear pagetable mapping. 40.191 + */ 40.192 if ( deferred_ops & DOP_RELOAD_LDT ) 40.193 (void)map_ldt_shadow_page(0); 40.194 40.195 @@ -2565,8 +2557,8 @@ int do_mmuext_op( 40.196 unsigned long mfn = 0, gmfn = 0, type; 40.197 unsigned int done = 0; 40.198 struct page_info *page; 40.199 - struct vcpu *v = current; 40.200 - struct domain *d = v->domain; 40.201 + struct vcpu *curr = current; 40.202 + struct domain *d = curr->domain; 40.203 40.204 if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) 40.205 { 40.206 @@ -2729,8 +2721,8 @@ int do_mmuext_op( 40.207 } 40.208 } 40.209 40.210 - old_mfn = pagetable_get_pfn(v->arch.guest_table_user); 40.211 - v->arch.guest_table_user = pagetable_from_pfn(mfn); 40.212 + old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); 40.213 + curr->arch.guest_table_user = pagetable_from_pfn(mfn); 40.214 40.215 if ( old_mfn != 0 ) 40.216 { 40.217 @@ -2750,7 +2742,7 @@ int do_mmuext_op( 40.218 40.219 case MMUEXT_INVLPG_LOCAL: 40.220 if ( !paging_mode_enabled(d) 40.221 - || paging_invlpg(v, op.arg1.linear_addr) != 0 ) 40.222 + || paging_invlpg(curr, op.arg1.linear_addr) != 0 ) 40.223 flush_tlb_one_local(op.arg1.linear_addr); 40.224 break; 40.225 40.226 @@ -2773,7 +2765,7 @@ int do_mmuext_op( 40.227 } 40.228 40.229 case MMUEXT_TLB_FLUSH_ALL: 40.230 - flush_tlb_mask(d->domain_dirty_cpumask); 40.231 + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS; 40.232 break; 40.233 40.234 case MMUEXT_INVLPG_ALL: 40.235 @@ -2809,13 +2801,14 @@ int do_mmuext_op( 40.236 okay = 0; 40.237 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents); 40.238 } 40.239 - else if ( (v->arch.guest_context.ldt_ents != ents) || 40.240 - (v->arch.guest_context.ldt_base != ptr) ) 40.241 + else if ( (curr->arch.guest_context.ldt_ents != ents) || 40.242 + (curr->arch.guest_context.ldt_base != ptr) ) 40.243 { 40.244 - invalidate_shadow_ldt(v); 40.245 - v->arch.guest_context.ldt_base = ptr; 40.246 - v->arch.guest_context.ldt_ents = ents; 40.247 - load_LDT(v); 40.248 + invalidate_shadow_ldt(curr, 0); 40.249 + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB; 40.250 + curr->arch.guest_context.ldt_base = ptr; 40.251 + curr->arch.guest_context.ldt_ents = ents; 40.252 + load_LDT(curr); 40.253 this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT; 40.254 if ( ents != 0 ) 40.255 this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT; 40.256 @@ -2931,8 +2924,7 @@ int do_mmu_update( 40.257 struct page_info *page; 40.258 int rc = 0, okay = 1, i = 0; 40.259 unsigned int cmd, done = 0; 40.260 - struct vcpu *v = current; 40.261 - struct domain *d = v->domain; 40.262 + struct domain *d = current->domain; 40.263 struct domain_mmap_cache mapcache; 40.264 40.265 if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) 40.266 @@ -3042,7 +3034,8 @@ int do_mmu_update( 40.267 #endif 40.268 case PGT_writable_page: 40.269 perfc_incr(writable_mmu_updates); 40.270 - okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn)); 40.271 + okay = paging_write_guest_entry( 40.272 + current, va, req.val, _mfn(mfn)); 40.273 break; 40.274 } 40.275 page_unlock(page); 40.276 @@ -3052,7 +3045,8 @@ int do_mmu_update( 40.277 else if ( get_page_type(page, PGT_writable_page) ) 40.278 { 40.279 perfc_incr(writable_mmu_updates); 40.280 - okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn)); 40.281 + okay = paging_write_guest_entry( 40.282 + current, va, req.val, _mfn(mfn)); 40.283 put_page_type(page); 40.284 } 40.285 40.286 @@ -3508,7 +3502,7 @@ int steal_page( 40.287 /* Unlink from original owner. */ 40.288 if ( !(memflags & MEMF_no_refcount) ) 40.289 d->tot_pages--; 40.290 - list_del(&page->list); 40.291 + page_list_del(page, &d->page_list); 40.292 40.293 spin_unlock(&d->page_alloc_lock); 40.294 return 0; 40.295 @@ -3567,34 +3561,40 @@ int do_update_va_mapping(unsigned long v 40.296 if ( pl1e ) 40.297 guest_unmap_l1e(v, pl1e); 40.298 40.299 - process_deferred_ops(); 40.300 - 40.301 switch ( flags & UVMF_FLUSHTYPE_MASK ) 40.302 { 40.303 case UVMF_TLB_FLUSH: 40.304 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) 40.305 { 40.306 case UVMF_LOCAL: 40.307 - flush_tlb_local(); 40.308 + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB; 40.309 break; 40.310 case UVMF_ALL: 40.311 - flush_tlb_mask(d->domain_dirty_cpumask); 40.312 + this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS; 40.313 break; 40.314 default: 40.315 + if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS ) 40.316 + break; 40.317 if ( unlikely(!is_pv_32on64_domain(d) ? 40.318 get_user(vmask, (unsigned long *)bmap_ptr) : 40.319 get_user(vmask, (unsigned int *)bmap_ptr)) ) 40.320 - rc = -EFAULT; 40.321 + rc = -EFAULT, vmask = 0; 40.322 pmask = vcpumask_to_pcpumask(d, vmask); 40.323 + if ( cpu_isset(smp_processor_id(), pmask) ) 40.324 + this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB; 40.325 flush_tlb_mask(pmask); 40.326 break; 40.327 } 40.328 break; 40.329 40.330 case UVMF_INVLPG: 40.331 + if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS ) 40.332 + break; 40.333 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) 40.334 { 40.335 case UVMF_LOCAL: 40.336 + if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB ) 40.337 + break; 40.338 if ( !paging_mode_enabled(d) || 40.339 (paging_invlpg(v, va) != 0) ) 40.340 flush_tlb_one_local(va); 40.341 @@ -3606,14 +3606,18 @@ int do_update_va_mapping(unsigned long v 40.342 if ( unlikely(!is_pv_32on64_domain(d) ? 40.343 get_user(vmask, (unsigned long *)bmap_ptr) : 40.344 get_user(vmask, (unsigned int *)bmap_ptr)) ) 40.345 - rc = -EFAULT; 40.346 + rc = -EFAULT, vmask = 0; 40.347 pmask = vcpumask_to_pcpumask(d, vmask); 40.348 + if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB ) 40.349 + cpu_clear(smp_processor_id(), pmask); 40.350 flush_tlb_one_mask(pmask, va); 40.351 break; 40.352 } 40.353 break; 40.354 } 40.355 40.356 + process_deferred_ops(); 40.357 + 40.358 return rc; 40.359 } 40.360
41.1 --- a/xen/arch/x86/mm/hap/hap.c Fri Feb 13 10:56:01 2009 +0900 41.2 +++ b/xen/arch/x86/mm/hap/hap.c Fri Feb 13 11:22:28 2009 +0900 41.3 @@ -45,11 +45,11 @@ 41.4 41.5 /* Override macros from asm/page.h to make them work with mfn_t */ 41.6 #undef mfn_to_page 41.7 -#define mfn_to_page(_m) (frame_table + mfn_x(_m)) 41.8 +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) 41.9 #undef mfn_valid 41.10 -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) 41.11 +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn)) 41.12 #undef page_to_mfn 41.13 -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) 41.14 +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) 41.15 41.16 /************************************************/ 41.17 /* HAP LOG DIRTY SUPPORT */ 41.18 @@ -96,11 +96,10 @@ static struct page_info *hap_alloc(struc 41.19 41.20 ASSERT(hap_locked_by_me(d)); 41.21 41.22 - if ( unlikely(list_empty(&d->arch.paging.hap.freelist)) ) 41.23 + pg = page_list_remove_head(&d->arch.paging.hap.freelist); 41.24 + if ( unlikely(!pg) ) 41.25 return NULL; 41.26 41.27 - pg = list_entry(d->arch.paging.hap.freelist.next, struct page_info, list); 41.28 - list_del(&pg->list); 41.29 d->arch.paging.hap.free_pages--; 41.30 41.31 p = hap_map_domain_page(page_to_mfn(pg)); 41.32 @@ -118,7 +117,7 @@ static void hap_free(struct domain *d, m 41.33 ASSERT(hap_locked_by_me(d)); 41.34 41.35 d->arch.paging.hap.free_pages++; 41.36 - list_add_tail(&pg->list, &d->arch.paging.hap.freelist); 41.37 + page_list_add_tail(pg, &d->arch.paging.hap.freelist); 41.38 } 41.39 41.40 static struct page_info *hap_alloc_p2m_page(struct domain *d) 41.41 @@ -210,15 +209,13 @@ hap_set_allocation(struct domain *d, uns 41.42 } 41.43 d->arch.paging.hap.free_pages++; 41.44 d->arch.paging.hap.total_pages++; 41.45 - list_add_tail(&pg->list, &d->arch.paging.hap.freelist); 41.46 + page_list_add_tail(pg, &d->arch.paging.hap.freelist); 41.47 } 41.48 else if ( d->arch.paging.hap.total_pages > pages ) 41.49 { 41.50 /* Need to return memory to domheap */ 41.51 - ASSERT(!list_empty(&d->arch.paging.hap.freelist)); 41.52 - pg = list_entry(d->arch.paging.hap.freelist.next, 41.53 - struct page_info, list); 41.54 - list_del(&pg->list); 41.55 + pg = page_list_remove_head(&d->arch.paging.hap.freelist); 41.56 + ASSERT(pg); 41.57 d->arch.paging.hap.free_pages--; 41.58 d->arch.paging.hap.total_pages--; 41.59 pg->count_info = 0; 41.60 @@ -393,7 +390,7 @@ static void hap_destroy_monitor_table(st 41.61 void hap_domain_init(struct domain *d) 41.62 { 41.63 hap_lock_init(d); 41.64 - INIT_LIST_HEAD(&d->arch.paging.hap.freelist); 41.65 + INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist); 41.66 41.67 /* This domain will use HAP for log-dirty mode */ 41.68 paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty,
42.1 --- a/xen/arch/x86/mm/hap/p2m-ept.c Fri Feb 13 10:56:01 2009 +0900 42.2 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Fri Feb 13 11:22:28 2009 +0900 42.3 @@ -63,7 +63,7 @@ static int ept_set_middle_entry(struct d 42.4 42.5 pg->count_info = 1; 42.6 pg->u.inuse.type_info = 1 | PGT_validated; 42.7 - list_add_tail(&pg->list, &d->arch.p2m->pages); 42.8 + page_list_add_tail(pg, &d->arch.p2m->pages); 42.9 42.10 ept_entry->emt = 0; 42.11 ept_entry->igmt = 0; 42.12 @@ -116,12 +116,12 @@ static int ept_next_level(struct domain 42.13 } 42.14 42.15 /* 42.16 - * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself, 42.17 + * ept_set_entry() computes 'need_modify_vtd_table' for itself, 42.18 * by observing whether any gfn->mfn translations are modified. 42.19 */ 42.20 static int 42.21 -_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 42.22 - unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table) 42.23 +ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 42.24 + unsigned int order, p2m_type_t p2mt) 42.25 { 42.26 ept_entry_t *table = NULL; 42.27 unsigned long gfn_remainder = gfn, offset = 0; 42.28 @@ -131,6 +131,7 @@ static int 42.29 int walk_level = order / EPT_TABLE_ORDER; 42.30 int direct_mmio = (p2mt == p2m_mmio_direct); 42.31 uint8_t igmt = 0; 42.32 + int need_modify_vtd_table = 1; 42.33 42.34 /* we only support 4k and 2m pages now */ 42.35 42.36 @@ -171,14 +172,23 @@ static int 42.37 42.38 if ( ret == GUEST_TABLE_SUPER_PAGE ) 42.39 { 42.40 - ept_entry->mfn = mfn_x(mfn) - offset; 42.41 + if ( ept_entry->mfn == (mfn_x(mfn) - offset) ) 42.42 + need_modify_vtd_table = 0; 42.43 + else 42.44 + ept_entry->mfn = mfn_x(mfn) - offset; 42.45 + 42.46 if ( ept_entry->avail1 == p2m_ram_logdirty && 42.47 p2mt == p2m_ram_rw ) 42.48 for ( i = 0; i < 512; i++ ) 42.49 paging_mark_dirty(d, mfn_x(mfn)-offset+i); 42.50 } 42.51 else 42.52 - ept_entry->mfn = mfn_x(mfn); 42.53 + { 42.54 + if ( ept_entry->mfn == mfn_x(mfn) ) 42.55 + need_modify_vtd_table = 0; 42.56 + else 42.57 + ept_entry->mfn = mfn_x(mfn); 42.58 + } 42.59 42.60 ept_entry->avail1 = p2mt; 42.61 ept_entry->rsvd = 0; 42.62 @@ -239,7 +249,10 @@ static int 42.63 &igmt, direct_mmio); 42.64 split_ept_entry->igmt = igmt; 42.65 42.66 - split_ept_entry->mfn = mfn_x(mfn); 42.67 + if ( split_ept_entry->mfn == mfn_x(mfn) ) 42.68 + need_modify_vtd_table = 0; 42.69 + else 42.70 + split_ept_entry->mfn = mfn_x(mfn); 42.71 split_ept_entry->avail1 = p2mt; 42.72 ept_p2m_type_to_flags(split_ept_entry, p2mt); 42.73 42.74 @@ -289,17 +302,6 @@ out: 42.75 return rv; 42.76 } 42.77 42.78 -static int 42.79 -ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 42.80 - unsigned int order, p2m_type_t p2mt) 42.81 -{ 42.82 - /* ept_set_entry() are called from set_entry(), 42.83 - * We should always create VT-d page table acording 42.84 - * to the gfn to mfn translations changes. 42.85 - */ 42.86 - return _ept_set_entry(d, gfn, mfn, order, p2mt, 1); 42.87 -} 42.88 - 42.89 /* Read ept p2m entries */ 42.90 static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t, 42.91 p2m_query_t q) 42.92 @@ -393,6 +395,21 @@ static mfn_t ept_get_entry_current(unsig 42.93 return ept_get_entry(current->domain, gfn, t, q); 42.94 } 42.95 42.96 +/* To test if the new emt type is the same with old, 42.97 + * return 1 to not to reset ept entry. 42.98 + */ 42.99 +static int need_modify_ept_entry(struct domain *d, unsigned long gfn, 42.100 + unsigned long mfn, uint8_t o_igmt, 42.101 + uint8_t o_emt, p2m_type_t p2mt) 42.102 +{ 42.103 + uint8_t igmt, emt; 42.104 + emt = epte_get_entry_emt(d, gfn, mfn, &igmt, 42.105 + (p2mt == p2m_mmio_direct)); 42.106 + if ( (emt == o_emt) && (igmt == o_igmt) ) 42.107 + return 0; 42.108 + return 1; 42.109 +} 42.110 + 42.111 void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn, 42.112 unsigned long end_gfn) 42.113 { 42.114 @@ -401,6 +418,7 @@ void ept_change_entry_emt_with_range(str 42.115 uint64_t epte; 42.116 int order = 0; 42.117 unsigned long mfn; 42.118 + uint8_t o_igmt, o_emt; 42.119 42.120 for ( gfn = start_gfn; gfn <= end_gfn; gfn++ ) 42.121 { 42.122 @@ -410,7 +428,9 @@ void ept_change_entry_emt_with_range(str 42.123 mfn = (epte & EPTE_MFN_MASK) >> PAGE_SHIFT; 42.124 if ( !mfn_valid(mfn) ) 42.125 continue; 42.126 - p2mt = (epte & EPTE_AVAIL1_MASK) >> 8; 42.127 + p2mt = (epte & EPTE_AVAIL1_MASK) >> EPTE_AVAIL1_SHIFT; 42.128 + o_igmt = (epte & EPTE_IGMT_MASK) >> EPTE_IGMT_SHIFT; 42.129 + o_emt = (epte & EPTE_EMT_MASK) >> EPTE_EMT_SHIFT; 42.130 order = 0; 42.131 42.132 if ( epte & EPTE_SUPER_PAGE_MASK ) 42.133 @@ -422,30 +442,26 @@ void ept_change_entry_emt_with_range(str 42.134 * Set emt for super page. 42.135 */ 42.136 order = EPT_TABLE_ORDER; 42.137 - /* vmx_set_uc_mode() dont' touch the gfn to mfn 42.138 - * translations, only modify the emt field of the EPT entries. 42.139 - * so we need not modify the current VT-d page tables. 42.140 - */ 42.141 - _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0); 42.142 + if ( need_modify_ept_entry(d, gfn, mfn, 42.143 + o_igmt, o_emt, p2mt) ) 42.144 + ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); 42.145 gfn += 0x1FF; 42.146 } 42.147 else 42.148 { 42.149 - /* 1)change emt for partial entries of the 2m area. 42.150 - * 2)vmx_set_uc_mode() dont' touch the gfn to mfn 42.151 - * translations, only modify the emt field of the EPT entries. 42.152 - * so we need not modify the current VT-d page tables. 42.153 - */ 42.154 - _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0); 42.155 + /* change emt for partial entries of the 2m area. */ 42.156 + if ( need_modify_ept_entry(d, gfn, mfn, 42.157 + o_igmt, o_emt, p2mt) ) 42.158 + ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); 42.159 gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF; 42.160 } 42.161 } 42.162 - else /* 1)gfn assigned with 4k 42.163 - * 2)vmx_set_uc_mode() dont' touch the gfn to mfn 42.164 - * translations, only modify the emt field of the EPT entries. 42.165 - * so we need not modify the current VT-d page tables. 42.166 - */ 42.167 - _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0); 42.168 + else /* gfn assigned with 4k */ 42.169 + { 42.170 + if ( need_modify_ept_entry(d, gfn, mfn, 42.171 + o_igmt, o_emt, p2mt) ) 42.172 + ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); 42.173 + } 42.174 } 42.175 } 42.176
43.1 --- a/xen/arch/x86/mm/p2m.c Fri Feb 13 10:56:01 2009 +0900 43.2 +++ b/xen/arch/x86/mm/p2m.c Fri Feb 13 11:22:28 2009 +0900 43.3 @@ -89,11 +89,11 @@ 43.4 43.5 /* Override macros from asm/page.h to make them work with mfn_t */ 43.6 #undef mfn_to_page 43.7 -#define mfn_to_page(_m) (frame_table + mfn_x(_m)) 43.8 +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) 43.9 #undef mfn_valid 43.10 -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) 43.11 +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn)) 43.12 #undef page_to_mfn 43.13 -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) 43.14 +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) 43.15 43.16 43.17 /* PTE flags for the various types of p2m entry */ 43.18 @@ -175,7 +175,7 @@ p2m_next_level(struct domain *d, mfn_t * 43.19 struct page_info *pg = d->arch.p2m->alloc_page(d); 43.20 if ( pg == NULL ) 43.21 return 0; 43.22 - list_add_tail(&pg->list, &d->arch.p2m->pages); 43.23 + page_list_add_tail(pg, &d->arch.p2m->pages); 43.24 pg->u.inuse.type_info = type | 1 | PGT_validated; 43.25 pg->count_info = 1; 43.26 43.27 @@ -214,7 +214,7 @@ p2m_next_level(struct domain *d, mfn_t * 43.28 struct page_info *pg = d->arch.p2m->alloc_page(d); 43.29 if ( pg == NULL ) 43.30 return 0; 43.31 - list_add_tail(&pg->list, &d->arch.p2m->pages); 43.32 + page_list_add_tail(pg, &d->arch.p2m->pages); 43.33 pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated; 43.34 pg->count_info = 1; 43.35 43.36 @@ -300,18 +300,18 @@ p2m_pod_cache_add(struct domain *d, 43.37 for(i=0; i < 1 << order ; i++) 43.38 { 43.39 p = page + i; 43.40 - list_del(&p->list); 43.41 + page_list_del(p, &d->page_list); 43.42 } 43.43 43.44 /* Then add the first one to the appropriate populate-on-demand list */ 43.45 switch(order) 43.46 { 43.47 case 9: 43.48 - list_add_tail(&page->list, &p2md->pod.super); /* lock: page_alloc */ 43.49 + page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */ 43.50 p2md->pod.count += 1 << order; 43.51 break; 43.52 case 0: 43.53 - list_add_tail(&page->list, &p2md->pod.single); /* lock: page_alloc */ 43.54 + page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */ 43.55 p2md->pod.count += 1 ; 43.56 break; 43.57 default: 43.58 @@ -334,54 +334,51 @@ static struct page_info * p2m_pod_cache_ 43.59 struct page_info *p = NULL; 43.60 int i; 43.61 43.62 - if ( order == 9 && list_empty(&p2md->pod.super) ) 43.63 + if ( order == 9 && page_list_empty(&p2md->pod.super) ) 43.64 { 43.65 return NULL; 43.66 } 43.67 - else if ( order == 0 && list_empty(&p2md->pod.single) ) 43.68 + else if ( order == 0 && page_list_empty(&p2md->pod.single) ) 43.69 { 43.70 unsigned long mfn; 43.71 struct page_info *q; 43.72 43.73 - BUG_ON( list_empty(&p2md->pod.super) ); 43.74 + BUG_ON( page_list_empty(&p2md->pod.super) ); 43.75 43.76 /* Break up a superpage to make single pages. NB count doesn't 43.77 * need to be adjusted. */ 43.78 printk("%s: Breaking up superpage.\n", __func__); 43.79 - p = list_entry(p2md->pod.super.next, struct page_info, list); 43.80 - list_del(&p->list); 43.81 + p = page_list_remove_head(&p2md->pod.super); 43.82 mfn = mfn_x(page_to_mfn(p)); 43.83 43.84 for ( i=0; i<(1<<9); i++ ) 43.85 { 43.86 q = mfn_to_page(_mfn(mfn+i)); 43.87 - list_add_tail(&q->list, &p2md->pod.single); 43.88 + page_list_add_tail(q, &p2md->pod.single); 43.89 } 43.90 } 43.91 43.92 switch ( order ) 43.93 { 43.94 case 9: 43.95 - BUG_ON( list_empty(&p2md->pod.super) ); 43.96 - p = list_entry(p2md->pod.super.next, struct page_info, list); 43.97 + BUG_ON( page_list_empty(&p2md->pod.super) ); 43.98 + p = page_list_remove_head(&p2md->pod.super); 43.99 p2md->pod.count -= 1 << order; /* Lock: page_alloc */ 43.100 break; 43.101 case 0: 43.102 - BUG_ON( list_empty(&p2md->pod.single) ); 43.103 - p = list_entry(p2md->pod.single.next, struct page_info, list); 43.104 + BUG_ON( page_list_empty(&p2md->pod.single) ); 43.105 + p = page_list_remove_head(&p2md->pod.single); 43.106 p2md->pod.count -= 1; 43.107 break; 43.108 default: 43.109 BUG(); 43.110 } 43.111 43.112 - list_del(&p->list); 43.113 - 43.114 /* Put the pages back on the domain page_list */ 43.115 for ( i = 0 ; i < (1 << order) ; i++ ) 43.116 { 43.117 BUG_ON(page_get_owner(p + i) != d); 43.118 - list_add_tail(&p[i].list, &d->page_list); 43.119 + page_list_add_tail(p + i, &d->page_list); 43.120 } 43.121 43.122 return p; 43.123 @@ -425,7 +422,7 @@ p2m_pod_set_cache_target(struct domain * 43.124 spin_lock(&d->page_alloc_lock); 43.125 43.126 if ( (p2md->pod.count - pod_target) > (1>>9) 43.127 - && !list_empty(&p2md->pod.super) ) 43.128 + && !page_list_empty(&p2md->pod.super) ) 43.129 order = 9; 43.130 else 43.131 order = 0; 43.132 @@ -535,38 +532,27 @@ void 43.133 p2m_pod_empty_cache(struct domain *d) 43.134 { 43.135 struct p2m_domain *p2md = d->arch.p2m; 43.136 - struct list_head *q, *p; 43.137 + struct page_info *page; 43.138 43.139 spin_lock(&d->page_alloc_lock); 43.140 43.141 - list_for_each_safe(p, q, &p2md->pod.super) /* lock: page_alloc */ 43.142 + while ( (page = page_list_remove_head(&p2md->pod.super)) ) 43.143 { 43.144 int i; 43.145 - struct page_info *page; 43.146 43.147 - list_del(p); 43.148 - 43.149 - page = list_entry(p, struct page_info, list); 43.150 - 43.151 for ( i = 0 ; i < (1 << 9) ; i++ ) 43.152 { 43.153 BUG_ON(page_get_owner(page + i) != d); 43.154 - list_add_tail(&page[i].list, &d->page_list); 43.155 + page_list_add_tail(page + i, &d->page_list); 43.156 } 43.157 43.158 p2md->pod.count -= 1<<9; 43.159 } 43.160 43.161 - list_for_each_safe(p, q, &p2md->pod.single) 43.162 + while ( (page = page_list_remove_head(&p2md->pod.single)) ) 43.163 { 43.164 - struct page_info *page; 43.165 - 43.166 - list_del(p); 43.167 - 43.168 - page = list_entry(p, struct page_info, list); 43.169 - 43.170 BUG_ON(page_get_owner(page) != d); 43.171 - list_add_tail(&page->list, &d->page_list); 43.172 + page_list_add_tail(page, &d->page_list); 43.173 43.174 p2md->pod.count -= 1; 43.175 } 43.176 @@ -952,7 +938,7 @@ p2m_pod_emergency_sweep_super(struct dom 43.177 * NB that this is a zero-sum game; we're increasing our cache size 43.178 * by increasing our 'debt'. Since we hold the p2m lock, 43.179 * (entry_count - count) must remain the same. */ 43.180 - if ( !list_empty(&p2md->pod.super) && i < limit ) 43.181 + if ( !page_list_empty(&p2md->pod.super) && i < limit ) 43.182 break; 43.183 } 43.184 43.185 @@ -1035,12 +1021,12 @@ p2m_pod_demand_populate(struct domain *d 43.186 } 43.187 43.188 /* If we're low, start a sweep */ 43.189 - if ( order == 9 && list_empty(&p2md->pod.super) ) 43.190 + if ( order == 9 && page_list_empty(&p2md->pod.super) ) 43.191 p2m_pod_emergency_sweep_super(d); 43.192 43.193 - if ( list_empty(&p2md->pod.single) && 43.194 + if ( page_list_empty(&p2md->pod.single) && 43.195 ( ( order == 0 ) 43.196 - || (order == 9 && list_empty(&p2md->pod.super) ) ) ) 43.197 + || (order == 9 && page_list_empty(&p2md->pod.super) ) ) ) 43.198 p2m_pod_emergency_sweep(d); 43.199 43.200 /* Keep track of the highest gfn demand-populated by a guest fault */ 43.201 @@ -1477,9 +1463,9 @@ int p2m_init(struct domain *d) 43.202 43.203 memset(p2m, 0, sizeof(*p2m)); 43.204 p2m_lock_init(p2m); 43.205 - INIT_LIST_HEAD(&p2m->pages); 43.206 - INIT_LIST_HEAD(&p2m->pod.super); 43.207 - INIT_LIST_HEAD(&p2m->pod.single); 43.208 + INIT_PAGE_LIST_HEAD(&p2m->pages); 43.209 + INIT_PAGE_LIST_HEAD(&p2m->pod.super); 43.210 + INIT_PAGE_LIST_HEAD(&p2m->pod.single); 43.211 43.212 p2m->set_entry = p2m_set_entry; 43.213 p2m->get_entry = p2m_gfn_to_mfn; 43.214 @@ -1540,7 +1526,6 @@ int p2m_alloc_table(struct domain *d, 43.215 43.216 { 43.217 mfn_t mfn = _mfn(INVALID_MFN); 43.218 - struct list_head *entry; 43.219 struct page_info *page, *p2m_top; 43.220 unsigned int page_count = 0; 43.221 unsigned long gfn = -1UL; 43.222 @@ -1566,7 +1551,7 @@ int p2m_alloc_table(struct domain *d, 43.223 p2m_unlock(p2m); 43.224 return -ENOMEM; 43.225 } 43.226 - list_add_tail(&p2m_top->list, &p2m->pages); 43.227 + page_list_add_tail(p2m_top, &p2m->pages); 43.228 43.229 p2m_top->count_info = 1; 43.230 p2m_top->u.inuse.type_info = 43.231 @@ -1587,11 +1572,8 @@ int p2m_alloc_table(struct domain *d, 43.232 goto error; 43.233 43.234 /* Copy all existing mappings from the page list and m2p */ 43.235 - for ( entry = d->page_list.next; 43.236 - entry != &d->page_list; 43.237 - entry = entry->next ) 43.238 + page_list_for_each(page, &d->page_list) 43.239 { 43.240 - page = list_entry(entry, struct page_info, list); 43.241 mfn = page_to_mfn(page); 43.242 gfn = get_gpfn_from_mfn(mfn_x(mfn)); 43.243 page_count++; 43.244 @@ -1621,19 +1603,14 @@ void p2m_teardown(struct domain *d) 43.245 /* Return all the p2m pages to Xen. 43.246 * We know we don't have any extra mappings to these pages */ 43.247 { 43.248 - struct list_head *entry, *n; 43.249 struct page_info *pg; 43.250 struct p2m_domain *p2m = d->arch.p2m; 43.251 43.252 p2m_lock(p2m); 43.253 d->arch.phys_table = pagetable_null(); 43.254 43.255 - list_for_each_safe(entry, n, &p2m->pages) 43.256 - { 43.257 - pg = list_entry(entry, struct page_info, list); 43.258 - list_del(entry); 43.259 + while ( (pg = page_list_remove_head(&p2m->pages)) ) 43.260 p2m->free_page(d, pg); 43.261 - } 43.262 p2m_unlock(p2m); 43.263 } 43.264
44.1 --- a/xen/arch/x86/mm/paging.c Fri Feb 13 10:56:01 2009 +0900 44.2 +++ b/xen/arch/x86/mm/paging.c Fri Feb 13 11:22:28 2009 +0900 44.3 @@ -47,11 +47,11 @@ 44.4 /************************************************/ 44.5 /* Override macros from asm/page.h to make them work with mfn_t */ 44.6 #undef mfn_to_page 44.7 -#define mfn_to_page(_m) (frame_table + mfn_x(_m)) 44.8 +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) 44.9 #undef mfn_valid 44.10 -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) 44.11 +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn)) 44.12 #undef page_to_mfn 44.13 -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) 44.14 +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) 44.15 44.16 /* The log-dirty lock. This protects the log-dirty bitmap from 44.17 * concurrent accesses (and teardowns, etc).
45.1 --- a/xen/arch/x86/mm/shadow/common.c Fri Feb 13 10:56:01 2009 +0900 45.2 +++ b/xen/arch/x86/mm/shadow/common.c Fri Feb 13 11:22:28 2009 +0900 45.3 @@ -48,9 +48,9 @@ void shadow_domain_init(struct domain *d 45.4 int i; 45.5 shadow_lock_init(d); 45.6 for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) 45.7 - INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]); 45.8 - INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist); 45.9 - INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows); 45.10 + INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelists[i]); 45.11 + INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist); 45.12 + INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows); 45.13 45.14 /* Use shadow pagetables for log-dirty support */ 45.15 paging_log_dirty_init(d, shadow_enable_log_dirty, 45.16 @@ -1291,9 +1291,9 @@ static inline int space_is_available( 45.17 for ( ; order <= shadow_max_order(d); ++order ) 45.18 { 45.19 unsigned int n = count; 45.20 - const struct list_head *p; 45.21 - 45.22 - list_for_each ( p, &d->arch.paging.shadow.freelists[order] ) 45.23 + const struct page_info *sp; 45.24 + 45.25 + page_list_for_each ( sp, &d->arch.paging.shadow.freelists[order] ) 45.26 if ( --n == 0 ) 45.27 return 1; 45.28 count = (count + 1) >> 1; 45.29 @@ -1306,8 +1306,8 @@ static inline int space_is_available( 45.30 * non-Xen mappings in this top-level shadow mfn */ 45.31 static void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn) 45.32 { 45.33 - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); 45.34 - switch ( sp->type ) 45.35 + struct page_info *sp = mfn_to_page(smfn); 45.36 + switch ( sp->u.sh.type ) 45.37 { 45.38 case SH_type_l2_32_shadow: 45.39 SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v,smfn); 45.40 @@ -1322,7 +1322,7 @@ static void shadow_unhook_mappings(struc 45.41 break; 45.42 #endif 45.43 default: 45.44 - SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->type); 45.45 + SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->u.sh.type); 45.46 BUG(); 45.47 } 45.48 } 45.49 @@ -1334,7 +1334,7 @@ static inline void trace_shadow_prealloc 45.50 /* Convert smfn to gfn */ 45.51 unsigned long gfn; 45.52 ASSERT(mfn_valid(smfn)); 45.53 - gfn = mfn_to_gfn(d, _mfn(mfn_to_shadow_page(smfn)->backpointer)); 45.54 + gfn = mfn_to_gfn(d, _mfn(mfn_to_page(smfn)->v.sh.back)); 45.55 __trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/, 45.56 sizeof(gfn), (unsigned char*)&gfn); 45.57 } 45.58 @@ -1350,8 +1350,7 @@ static void _shadow_prealloc( 45.59 /* Need a vpcu for calling unpins; for now, since we don't have 45.60 * per-vcpu shadows, any will do */ 45.61 struct vcpu *v, *v2; 45.62 - struct list_head *l, *t; 45.63 - struct shadow_page_info *sp; 45.64 + struct page_info *sp, *t; 45.65 mfn_t smfn; 45.66 int i; 45.67 45.68 @@ -1365,10 +1364,9 @@ static void _shadow_prealloc( 45.69 45.70 /* Stage one: walk the list of pinned pages, unpinning them */ 45.71 perfc_incr(shadow_prealloc_1); 45.72 - list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows) 45.73 + page_list_for_each_safe_reverse(sp, t, &d->arch.paging.shadow.pinned_shadows) 45.74 { 45.75 - sp = list_entry(l, struct shadow_page_info, list); 45.76 - smfn = shadow_page_to_mfn(sp); 45.77 + smfn = page_to_mfn(sp); 45.78 45.79 /* Unpin this top-level shadow */ 45.80 trace_shadow_prealloc_unpin(d, smfn); 45.81 @@ -1427,8 +1425,7 @@ void shadow_prealloc(struct domain *d, u 45.82 * this domain's shadows */ 45.83 static void shadow_blow_tables(struct domain *d) 45.84 { 45.85 - struct list_head *l, *t; 45.86 - struct shadow_page_info *sp; 45.87 + struct page_info *sp, *t; 45.88 struct vcpu *v = d->vcpu[0]; 45.89 mfn_t smfn; 45.90 int i; 45.91 @@ -1436,10 +1433,9 @@ static void shadow_blow_tables(struct do 45.92 ASSERT(v != NULL); 45.93 45.94 /* Pass one: unpin all pinned pages */ 45.95 - list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows) 45.96 + page_list_for_each_safe_reverse(sp, t, &d->arch.paging.shadow.pinned_shadows) 45.97 { 45.98 - sp = list_entry(l, struct shadow_page_info, list); 45.99 - smfn = shadow_page_to_mfn(sp); 45.100 + smfn = page_to_mfn(sp); 45.101 sh_unpin(v, smfn); 45.102 } 45.103 45.104 @@ -1493,6 +1489,18 @@ static __init int shadow_blow_tables_key 45.105 __initcall(shadow_blow_tables_keyhandler_init); 45.106 #endif /* !NDEBUG */ 45.107 45.108 +static inline struct page_info * 45.109 +next_shadow(const struct page_info *sp) 45.110 +{ 45.111 + return sp->next_shadow ? mfn_to_page(_mfn(sp->next_shadow)) : NULL; 45.112 +} 45.113 + 45.114 +static inline void 45.115 +set_next_shadow(struct page_info *sp, struct page_info *next) 45.116 +{ 45.117 + sp->next_shadow = next ? mfn_x(page_to_mfn(next)) : 0; 45.118 +} 45.119 + 45.120 /* Allocate another shadow's worth of (contiguous, aligned) pages, 45.121 * and fill in the type and backpointer fields of their page_infos. 45.122 * Never fails to allocate. */ 45.123 @@ -1500,7 +1508,7 @@ mfn_t shadow_alloc(struct domain *d, 45.124 u32 shadow_type, 45.125 unsigned long backpointer) 45.126 { 45.127 - struct shadow_page_info *sp = NULL; 45.128 + struct page_info *sp = NULL; 45.129 unsigned int order = shadow_order(shadow_type); 45.130 cpumask_t mask; 45.131 void *p; 45.132 @@ -1515,7 +1523,7 @@ mfn_t shadow_alloc(struct domain *d, 45.133 45.134 /* Find smallest order which can satisfy the request. */ 45.135 for ( i = order; i <= SHADOW_MAX_ORDER; i++ ) 45.136 - if ( !list_empty(&d->arch.paging.shadow.freelists[i]) ) 45.137 + if ( (sp = page_list_remove_head(&d->arch.paging.shadow.freelists[i])) ) 45.138 goto found; 45.139 45.140 /* If we get here, we failed to allocate. This should never happen. 45.141 @@ -1526,16 +1534,12 @@ mfn_t shadow_alloc(struct domain *d, 45.142 BUG(); 45.143 45.144 found: 45.145 - sp = list_entry(d->arch.paging.shadow.freelists[i].next, 45.146 - struct shadow_page_info, list); 45.147 - list_del(&sp->list); 45.148 - 45.149 /* We may have to halve the chunk a number of times. */ 45.150 while ( i != order ) 45.151 { 45.152 i--; 45.153 - sp->order = i; 45.154 - list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]); 45.155 + sp->v.free.order = i; 45.156 + page_list_add_tail(sp, &d->arch.paging.shadow.freelists[i]); 45.157 sp += 1 << i; 45.158 } 45.159 d->arch.paging.shadow.free_pages -= 1 << order; 45.160 @@ -1553,26 +1557,26 @@ mfn_t shadow_alloc(struct domain *d, 45.161 flush_tlb_mask(mask); 45.162 } 45.163 /* Now safe to clear the page for reuse */ 45.164 - p = sh_map_domain_page(shadow_page_to_mfn(sp+i)); 45.165 + p = sh_map_domain_page(page_to_mfn(sp+i)); 45.166 ASSERT(p != NULL); 45.167 clear_page(p); 45.168 sh_unmap_domain_page(p); 45.169 - INIT_LIST_HEAD(&sp[i].list); 45.170 - sp[i].type = shadow_type; 45.171 - sp[i].pinned = 0; 45.172 - sp[i].count = 0; 45.173 - sp[i].backpointer = backpointer; 45.174 - sp[i].next_shadow = NULL; 45.175 + INIT_PAGE_LIST_ENTRY(&sp[i].list); 45.176 + sp[i].u.sh.type = shadow_type; 45.177 + sp[i].u.sh.pinned = 0; 45.178 + sp[i].u.sh.count = 0; 45.179 + sp[i].v.sh.back = backpointer; 45.180 + set_next_shadow(&sp[i], NULL); 45.181 perfc_incr(shadow_alloc_count); 45.182 } 45.183 - return shadow_page_to_mfn(sp); 45.184 + return page_to_mfn(sp); 45.185 } 45.186 45.187 45.188 /* Return some shadow pages to the pool. */ 45.189 void shadow_free(struct domain *d, mfn_t smfn) 45.190 { 45.191 - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); 45.192 + struct page_info *sp = mfn_to_page(smfn); 45.193 u32 shadow_type; 45.194 unsigned long order; 45.195 unsigned long mask; 45.196 @@ -1581,7 +1585,7 @@ void shadow_free(struct domain *d, mfn_t 45.197 ASSERT(shadow_locked_by_me(d)); 45.198 perfc_incr(shadow_free); 45.199 45.200 - shadow_type = sp->type; 45.201 + shadow_type = sp->u.sh.type; 45.202 ASSERT(shadow_type != SH_type_none); 45.203 ASSERT(shadow_type != SH_type_p2m_table); 45.204 order = shadow_order(shadow_type); 45.205 @@ -1605,7 +1609,7 @@ void shadow_free(struct domain *d, mfn_t 45.206 } 45.207 #endif 45.208 /* Strip out the type: this is now a free shadow page */ 45.209 - sp[i].type = 0; 45.210 + sp[i].u.sh.type = 0; 45.211 /* Remember the TLB timestamp so we will know whether to flush 45.212 * TLBs when we reuse the page. Because the destructors leave the 45.213 * contents of the pages in place, we can delay TLB flushes until 45.214 @@ -1618,22 +1622,24 @@ void shadow_free(struct domain *d, mfn_t 45.215 for ( ; order < shadow_max_order(d); ++order ) 45.216 { 45.217 mask = 1 << order; 45.218 - if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) { 45.219 + if ( (mfn_x(page_to_mfn(sp)) & mask) ) { 45.220 /* Merge with predecessor block? */ 45.221 - if ( ((sp-mask)->type != PGT_none) || ((sp-mask)->order != order) ) 45.222 + if ( ((sp-mask)->u.sh.type != PGT_none) || 45.223 + ((sp-mask)->v.free.order != order) ) 45.224 break; 45.225 - list_del(&(sp-mask)->list); 45.226 sp -= mask; 45.227 + page_list_del(sp, &d->arch.paging.shadow.freelists[order]); 45.228 } else { 45.229 /* Merge with successor block? */ 45.230 - if ( ((sp+mask)->type != PGT_none) || ((sp+mask)->order != order) ) 45.231 + if ( ((sp+mask)->u.sh.type != PGT_none) || 45.232 + ((sp+mask)->v.free.order != order) ) 45.233 break; 45.234 - list_del(&(sp+mask)->list); 45.235 + page_list_del(sp + mask, &d->arch.paging.shadow.freelists[order]); 45.236 } 45.237 } 45.238 45.239 - sp->order = order; 45.240 - list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]); 45.241 + sp->v.free.order = order; 45.242 + page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]); 45.243 } 45.244 45.245 /* Divert some memory from the pool to be used by the p2m mapping. 45.246 @@ -1672,7 +1678,7 @@ sh_alloc_p2m_pages(struct domain *d) 45.247 */ 45.248 page_set_owner(&pg[i], d); 45.249 pg[i].count_info = 1; 45.250 - list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist); 45.251 + page_list_add_tail(&pg[i], &d->arch.paging.shadow.p2m_freelist); 45.252 } 45.253 return 1; 45.254 } 45.255 @@ -1681,25 +1687,22 @@ sh_alloc_p2m_pages(struct domain *d) 45.256 static struct page_info * 45.257 shadow_alloc_p2m_page(struct domain *d) 45.258 { 45.259 - struct list_head *entry; 45.260 struct page_info *pg; 45.261 mfn_t mfn; 45.262 void *p; 45.263 45.264 shadow_lock(d); 45.265 45.266 - if ( list_empty(&d->arch.paging.shadow.p2m_freelist) && 45.267 + if ( page_list_empty(&d->arch.paging.shadow.p2m_freelist) && 45.268 !sh_alloc_p2m_pages(d) ) 45.269 { 45.270 shadow_unlock(d); 45.271 return NULL; 45.272 } 45.273 - entry = d->arch.paging.shadow.p2m_freelist.next; 45.274 - list_del(entry); 45.275 + pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist); 45.276 45.277 shadow_unlock(d); 45.278 45.279 - pg = list_entry(entry, struct page_info, list); 45.280 mfn = page_to_mfn(pg); 45.281 p = sh_map_domain_page(mfn); 45.282 clear_page(p); 45.283 @@ -1780,7 +1783,7 @@ static unsigned int sh_set_allocation(st 45.284 unsigned int pages, 45.285 int *preempted) 45.286 { 45.287 - struct shadow_page_info *sp; 45.288 + struct page_info *sp; 45.289 unsigned int lower_bound; 45.290 unsigned int j, order = shadow_max_order(d); 45.291 45.292 @@ -1802,7 +1805,7 @@ static unsigned int sh_set_allocation(st 45.293 if ( d->arch.paging.shadow.total_pages < pages ) 45.294 { 45.295 /* Need to allocate more memory from domheap */ 45.296 - sp = (struct shadow_page_info *) 45.297 + sp = (struct page_info *) 45.298 alloc_domheap_pages(NULL, order, MEMF_node(domain_to_node(d))); 45.299 if ( sp == NULL ) 45.300 { 45.301 @@ -1813,23 +1816,26 @@ static unsigned int sh_set_allocation(st 45.302 d->arch.paging.shadow.total_pages += 1 << order; 45.303 for ( j = 0; j < 1U << order; j++ ) 45.304 { 45.305 - sp[j].type = 0; 45.306 - sp[j].pinned = 0; 45.307 - sp[j].count = 0; 45.308 - sp[j].mbz = 0; 45.309 + sp[j].u.sh.type = 0; 45.310 + sp[j].u.sh.pinned = 0; 45.311 + sp[j].u.sh.count = 0; 45.312 sp[j].tlbflush_timestamp = 0; /* Not in any TLB */ 45.313 } 45.314 - sp->order = order; 45.315 - list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]); 45.316 + sp->v.free.order = order; 45.317 + page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]); 45.318 } 45.319 else if ( d->arch.paging.shadow.total_pages > pages ) 45.320 { 45.321 /* Need to return memory to domheap */ 45.322 _shadow_prealloc(d, order, 1); 45.323 - ASSERT(!list_empty(&d->arch.paging.shadow.freelists[order])); 45.324 - sp = list_entry(d->arch.paging.shadow.freelists[order].next, 45.325 - struct shadow_page_info, list); 45.326 - list_del(&sp->list); 45.327 + sp = page_list_remove_head(&d->arch.paging.shadow.freelists[order]); 45.328 + ASSERT(sp); 45.329 + /* 45.330 + * The pages were allocated anonymously, but the owner field 45.331 + * gets overwritten normally, so need to clear it here. 45.332 + */ 45.333 + for ( j = 0; j < 1U << order; j++ ) 45.334 + page_set_owner(&((struct page_info *)sp)[j], NULL); 45.335 d->arch.paging.shadow.free_pages -= 1 << order; 45.336 d->arch.paging.shadow.total_pages -= 1 << order; 45.337 free_domheap_pages((struct page_info *)sp, order); 45.338 @@ -1880,7 +1886,7 @@ static inline key_t sh_hash(unsigned lon 45.339 static void sh_hash_audit_bucket(struct domain *d, int bucket) 45.340 /* Audit one bucket of the hash table */ 45.341 { 45.342 - struct shadow_page_info *sp, *x; 45.343 + struct page_info *sp, *x; 45.344 45.345 if ( !(SHADOW_AUDIT_ENABLE) ) 45.346 return; 45.347 @@ -1889,38 +1895,39 @@ static void sh_hash_audit_bucket(struct 45.348 while ( sp ) 45.349 { 45.350 /* Not a shadow? */ 45.351 - BUG_ON( sp->mbz != 0 ); 45.352 + BUG_ON( sp->count_info != 0 ); 45.353 /* Bogus type? */ 45.354 - BUG_ON( sp->type == 0 ); 45.355 - BUG_ON( sp->type > SH_type_max_shadow ); 45.356 + BUG_ON( sp->u.sh.type == 0 ); 45.357 + BUG_ON( sp->u.sh.type > SH_type_max_shadow ); 45.358 /* Wrong bucket? */ 45.359 - BUG_ON( sh_hash(sp->backpointer, sp->type) != bucket ); 45.360 + BUG_ON( sh_hash(sp->v.sh.back, sp->u.sh.type) != bucket ); 45.361 /* Duplicate entry? */ 45.362 - for ( x = sp->next_shadow; x; x = x->next_shadow ) 45.363 - BUG_ON( x->backpointer == sp->backpointer && x->type == sp->type ); 45.364 + for ( x = next_shadow(sp); x; x = next_shadow(x) ) 45.365 + BUG_ON( x->v.sh.back == sp->v.sh.back && 45.366 + x->u.sh.type == sp->u.sh.type ); 45.367 /* Follow the backpointer to the guest pagetable */ 45.368 - if ( sp->type != SH_type_fl1_32_shadow 45.369 - && sp->type != SH_type_fl1_pae_shadow 45.370 - && sp->type != SH_type_fl1_64_shadow ) 45.371 + if ( sp->u.sh.type != SH_type_fl1_32_shadow 45.372 + && sp->u.sh.type != SH_type_fl1_pae_shadow 45.373 + && sp->u.sh.type != SH_type_fl1_64_shadow ) 45.374 { 45.375 - struct page_info *gpg = mfn_to_page(_mfn(sp->backpointer)); 45.376 + struct page_info *gpg = mfn_to_page(_mfn(sp->v.sh.back)); 45.377 /* Bad shadow flags on guest page? */ 45.378 - BUG_ON( !(gpg->shadow_flags & (1<<sp->type)) ); 45.379 + BUG_ON( !(gpg->shadow_flags & (1<<sp->u.sh.type)) ); 45.380 /* Bad type count on guest page? */ 45.381 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 45.382 - if ( sp->type == SH_type_l1_32_shadow 45.383 - || sp->type == SH_type_l1_pae_shadow 45.384 - || sp->type == SH_type_l1_64_shadow ) 45.385 + if ( sp->u.sh.type == SH_type_l1_32_shadow 45.386 + || sp->u.sh.type == SH_type_l1_pae_shadow 45.387 + || sp->u.sh.type == SH_type_l1_64_shadow ) 45.388 { 45.389 if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page 45.390 && (gpg->u.inuse.type_info & PGT_count_mask) != 0 ) 45.391 { 45.392 if ( !page_is_out_of_sync(gpg) ) 45.393 { 45.394 - SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")" 45.395 + SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")" 45.396 " and not OOS but has typecount %#lx\n", 45.397 - sp->backpointer, 45.398 - mfn_x(shadow_page_to_mfn(sp)), 45.399 + sp->v.sh.back, 45.400 + mfn_x(page_to_mfn(sp)), 45.401 gpg->u.inuse.type_info); 45.402 BUG(); 45.403 } 45.404 @@ -1931,15 +1938,15 @@ static void sh_hash_audit_bucket(struct 45.405 if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page 45.406 && (gpg->u.inuse.type_info & PGT_count_mask) != 0 ) 45.407 { 45.408 - SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")" 45.409 + SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")" 45.410 " but has typecount %#lx\n", 45.411 - sp->backpointer, mfn_x(shadow_page_to_mfn(sp)), 45.412 + sp->v.sh.back, mfn_x(page_to_mfn(sp)), 45.413 gpg->u.inuse.type_info); 45.414 BUG(); 45.415 } 45.416 } 45.417 /* That entry was OK; on we go */ 45.418 - sp = sp->next_shadow; 45.419 + sp = next_shadow(sp); 45.420 } 45.421 } 45.422 45.423 @@ -1972,15 +1979,15 @@ static void sh_hash_audit(struct domain 45.424 * Returns 0 for success, 1 for error. */ 45.425 static int shadow_hash_alloc(struct domain *d) 45.426 { 45.427 - struct shadow_page_info **table; 45.428 + struct page_info **table; 45.429 45.430 ASSERT(shadow_locked_by_me(d)); 45.431 ASSERT(!d->arch.paging.shadow.hash_table); 45.432 45.433 - table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS); 45.434 + table = xmalloc_array(struct page_info *, SHADOW_HASH_BUCKETS); 45.435 if ( !table ) return 1; 45.436 memset(table, 0, 45.437 - SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *)); 45.438 + SHADOW_HASH_BUCKETS * sizeof (struct page_info *)); 45.439 d->arch.paging.shadow.hash_table = table; 45.440 return 0; 45.441 } 45.442 @@ -2002,7 +2009,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v, 45.443 * or INVALID_MFN if it doesn't exist */ 45.444 { 45.445 struct domain *d = v->domain; 45.446 - struct shadow_page_info *sp, *prev; 45.447 + struct page_info *sp, *prev; 45.448 key_t key; 45.449 45.450 ASSERT(shadow_locked_by_me(d)); 45.451 @@ -2019,21 +2026,21 @@ mfn_t shadow_hash_lookup(struct vcpu *v, 45.452 prev = NULL; 45.453 while(sp) 45.454 { 45.455 - if ( sp->backpointer == n && sp->type == t ) 45.456 + if ( sp->v.sh.back == n && sp->u.sh.type == t ) 45.457 { 45.458 /* Pull-to-front if 'sp' isn't already the head item */ 45.459 if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) ) 45.460 { 45.461 if ( unlikely(d->arch.paging.shadow.hash_walking != 0) ) 45.462 /* Can't reorder: someone is walking the hash chains */ 45.463 - return shadow_page_to_mfn(sp); 45.464 + return page_to_mfn(sp); 45.465 else 45.466 { 45.467 ASSERT(prev); 45.468 /* Delete sp from the list */ 45.469 prev->next_shadow = sp->next_shadow; 45.470 /* Re-insert it at the head of the list */ 45.471 - sp->next_shadow = d->arch.paging.shadow.hash_table[key]; 45.472 + set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]); 45.473 d->arch.paging.shadow.hash_table[key] = sp; 45.474 } 45.475 } 45.476 @@ -2041,10 +2048,10 @@ mfn_t shadow_hash_lookup(struct vcpu *v, 45.477 { 45.478 perfc_incr(shadow_hash_lookup_head); 45.479 } 45.480 - return shadow_page_to_mfn(sp); 45.481 + return page_to_mfn(sp); 45.482 } 45.483 prev = sp; 45.484 - sp = sp->next_shadow; 45.485 + sp = next_shadow(sp); 45.486 } 45.487 45.488 perfc_incr(shadow_hash_lookup_miss); 45.489 @@ -2056,7 +2063,7 @@ void shadow_hash_insert(struct vcpu *v, 45.490 /* Put a mapping (n,t)->smfn into the hash table */ 45.491 { 45.492 struct domain *d = v->domain; 45.493 - struct shadow_page_info *sp; 45.494 + struct page_info *sp; 45.495 key_t key; 45.496 45.497 ASSERT(shadow_locked_by_me(d)); 45.498 @@ -2070,8 +2077,8 @@ void shadow_hash_insert(struct vcpu *v, 45.499 sh_hash_audit_bucket(d, key); 45.500 45.501 /* Insert this shadow at the top of the bucket */ 45.502 - sp = mfn_to_shadow_page(smfn); 45.503 - sp->next_shadow = d->arch.paging.shadow.hash_table[key]; 45.504 + sp = mfn_to_page(smfn); 45.505 + set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]); 45.506 d->arch.paging.shadow.hash_table[key] = sp; 45.507 45.508 sh_hash_audit_bucket(d, key); 45.509 @@ -2082,7 +2089,7 @@ void shadow_hash_delete(struct vcpu *v, 45.510 /* Excise the mapping (n,t)->smfn from the hash table */ 45.511 { 45.512 struct domain *d = v->domain; 45.513 - struct shadow_page_info *sp, *x; 45.514 + struct page_info *sp, *x; 45.515 key_t key; 45.516 45.517 ASSERT(shadow_locked_by_me(d)); 45.518 @@ -2095,10 +2102,10 @@ void shadow_hash_delete(struct vcpu *v, 45.519 key = sh_hash(n, t); 45.520 sh_hash_audit_bucket(d, key); 45.521 45.522 - sp = mfn_to_shadow_page(smfn); 45.523 + sp = mfn_to_page(smfn); 45.524 if ( d->arch.paging.shadow.hash_table[key] == sp ) 45.525 /* Easy case: we're deleting the head item. */ 45.526 - d->arch.paging.shadow.hash_table[key] = sp->next_shadow; 45.527 + d->arch.paging.shadow.hash_table[key] = next_shadow(sp); 45.528 else 45.529 { 45.530 /* Need to search for the one we want */ 45.531 @@ -2107,15 +2114,15 @@ void shadow_hash_delete(struct vcpu *v, 45.532 { 45.533 ASSERT(x); /* We can't have hit the end, since our target is 45.534 * still in the chain somehwere... */ 45.535 - if ( x->next_shadow == sp ) 45.536 + if ( next_shadow(x) == sp ) 45.537 { 45.538 x->next_shadow = sp->next_shadow; 45.539 break; 45.540 } 45.541 - x = x->next_shadow; 45.542 + x = next_shadow(x); 45.543 } 45.544 } 45.545 - sp->next_shadow = NULL; 45.546 + set_next_shadow(sp, NULL); 45.547 45.548 sh_hash_audit_bucket(d, key); 45.549 } 45.550 @@ -2137,7 +2144,7 @@ static void hash_foreach(struct vcpu *v, 45.551 { 45.552 int i, done = 0; 45.553 struct domain *d = v->domain; 45.554 - struct shadow_page_info *x; 45.555 + struct page_info *x; 45.556 45.557 /* Say we're here, to stop hash-lookups reordering the chains */ 45.558 ASSERT(shadow_locked_by_me(d)); 45.559 @@ -2149,14 +2156,14 @@ static void hash_foreach(struct vcpu *v, 45.560 /* WARNING: This is not safe against changes to the hash table. 45.561 * The callback *must* return non-zero if it has inserted or 45.562 * deleted anything from the hash (lookups are OK, though). */ 45.563 - for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow ) 45.564 + for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) ) 45.565 { 45.566 - if ( callback_mask & (1 << x->type) ) 45.567 + if ( callback_mask & (1 << x->u.sh.type) ) 45.568 { 45.569 - ASSERT(x->type <= 15); 45.570 - ASSERT(callbacks[x->type] != NULL); 45.571 - done = callbacks[x->type](v, shadow_page_to_mfn(x), 45.572 - callback_mfn); 45.573 + ASSERT(x->u.sh.type <= 15); 45.574 + ASSERT(callbacks[x->u.sh.type] != NULL); 45.575 + done = callbacks[x->u.sh.type](v, page_to_mfn(x), 45.576 + callback_mfn); 45.577 if ( done ) break; 45.578 } 45.579 } 45.580 @@ -2173,8 +2180,8 @@ static void hash_foreach(struct vcpu *v, 45.581 45.582 void sh_destroy_shadow(struct vcpu *v, mfn_t smfn) 45.583 { 45.584 - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); 45.585 - unsigned int t = sp->type; 45.586 + struct page_info *sp = mfn_to_page(smfn); 45.587 + unsigned int t = sp->u.sh.type; 45.588 45.589 45.590 SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn)); 45.591 @@ -2186,7 +2193,7 @@ void sh_destroy_shadow(struct vcpu *v, m 45.592 t == SH_type_fl1_64_shadow || 45.593 t == SH_type_monitor_table || 45.594 (is_pv_32on64_vcpu(v) && t == SH_type_l4_64_shadow) || 45.595 - (page_get_owner(mfn_to_page(_mfn(sp->backpointer))) 45.596 + (page_get_owner(mfn_to_page(_mfn(sp->v.sh.back))) 45.597 == v->domain)); 45.598 45.599 /* The down-shifts here are so that the switch statement is on nice 45.600 @@ -2438,7 +2445,7 @@ int sh_remove_write_access(struct vcpu * 45.601 { 45.602 unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask); 45.603 mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn); 45.604 - int shtype = mfn_to_shadow_page(last_smfn)->type; 45.605 + int shtype = mfn_to_page(last_smfn)->u.sh.type; 45.606 45.607 if ( callbacks[shtype] ) 45.608 callbacks[shtype](v, last_smfn, gmfn); 45.609 @@ -2481,25 +2488,25 @@ int sh_remove_write_access(struct vcpu * 45.610 int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn, 45.611 mfn_t smfn, unsigned long off) 45.612 { 45.613 - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); 45.614 + struct page_info *sp = mfn_to_page(smfn); 45.615 45.616 ASSERT(mfn_valid(smfn)); 45.617 ASSERT(mfn_valid(gmfn)); 45.618 45.619 - if ( sp->type == SH_type_l1_32_shadow 45.620 - || sp->type == SH_type_fl1_32_shadow ) 45.621 + if ( sp->u.sh.type == SH_type_l1_32_shadow 45.622 + || sp->u.sh.type == SH_type_fl1_32_shadow ) 45.623 { 45.624 return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,2) 45.625 (v, gmfn, smfn, off); 45.626 } 45.627 #if CONFIG_PAGING_LEVELS >= 3 45.628 - else if ( sp->type == SH_type_l1_pae_shadow 45.629 - || sp->type == SH_type_fl1_pae_shadow ) 45.630 + else if ( sp->u.sh.type == SH_type_l1_pae_shadow 45.631 + || sp->u.sh.type == SH_type_fl1_pae_shadow ) 45.632 return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,3) 45.633 (v, gmfn, smfn, off); 45.634 #if CONFIG_PAGING_LEVELS >= 4 45.635 - else if ( sp->type == SH_type_l1_64_shadow 45.636 - || sp->type == SH_type_fl1_64_shadow ) 45.637 + else if ( sp->u.sh.type == SH_type_l1_64_shadow 45.638 + || sp->u.sh.type == SH_type_fl1_64_shadow ) 45.639 return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,4) 45.640 (v, gmfn, smfn, off); 45.641 #endif 45.642 @@ -2601,17 +2608,17 @@ static int sh_remove_shadow_via_pointer( 45.643 /* Follow this shadow's up-pointer, if it has one, and remove the reference 45.644 * found there. Returns 1 if that was the only reference to this shadow */ 45.645 { 45.646 - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); 45.647 + struct page_info *sp = mfn_to_page(smfn); 45.648 mfn_t pmfn; 45.649 void *vaddr; 45.650 int rc; 45.651 45.652 - ASSERT(sp->type > 0); 45.653 - ASSERT(sp->type < SH_type_max_shadow); 45.654 - ASSERT(sp->type != SH_type_l2_32_shadow); 45.655 - ASSERT(sp->type != SH_type_l2_pae_shadow); 45.656 - ASSERT(sp->type != SH_type_l2h_pae_shadow); 45.657 - ASSERT(sp->type != SH_type_l4_64_shadow); 45.658 + ASSERT(sp->u.sh.type > 0); 45.659 + ASSERT(sp->u.sh.type < SH_type_max_shadow); 45.660 + ASSERT(sp->u.sh.type != SH_type_l2_32_shadow); 45.661 + ASSERT(sp->u.sh.type != SH_type_l2_pae_shadow); 45.662 + ASSERT(sp->u.sh.type != SH_type_l2h_pae_shadow); 45.663 + ASSERT(sp->u.sh.type != SH_type_l4_64_shadow); 45.664 45.665 if (sp->up == 0) return 0; 45.666 pmfn = _mfn(sp->up >> PAGE_SHIFT); 45.667 @@ -2622,10 +2629,10 @@ static int sh_remove_shadow_via_pointer( 45.668 ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn)); 45.669 45.670 /* Is this the only reference to this shadow? */ 45.671 - rc = (sp->count == 1) ? 1 : 0; 45.672 + rc = (sp->u.sh.count == 1) ? 1 : 0; 45.673 45.674 /* Blank the offending entry */ 45.675 - switch (sp->type) 45.676 + switch (sp->u.sh.type) 45.677 { 45.678 case SH_type_l1_32_shadow: 45.679 case SH_type_l2_32_shadow: 45.680 @@ -3156,7 +3163,6 @@ void shadow_teardown(struct domain *d) 45.681 { 45.682 struct vcpu *v; 45.683 mfn_t mfn; 45.684 - struct list_head *entry, *n; 45.685 struct page_info *pg; 45.686 45.687 ASSERT(d->is_dying); 45.688 @@ -3208,12 +3214,8 @@ void shadow_teardown(struct domain *d) 45.689 } 45.690 #endif /* (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) */ 45.691 45.692 - list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist) 45.693 - { 45.694 - list_del(entry); 45.695 - pg = list_entry(entry, struct page_info, list); 45.696 + while ( (pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist)) ) 45.697 shadow_free_p2m_page(d, pg); 45.698 - } 45.699 45.700 if ( d->arch.paging.shadow.total_pages != 0 ) 45.701 { 45.702 @@ -3657,7 +3659,6 @@ int shadow_track_dirty_vram(struct domai 45.703 for ( i = 0; i < nr; i++ ) { 45.704 mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t); 45.705 struct page_info *page; 45.706 - u32 count_info; 45.707 int dirty = 0; 45.708 paddr_t sl1ma = d->dirty_vram->sl1ma[i]; 45.709 45.710 @@ -3668,8 +3669,7 @@ int shadow_track_dirty_vram(struct domai 45.711 else 45.712 { 45.713 page = mfn_to_page(mfn); 45.714 - count_info = page->u.inuse.type_info & PGT_count_mask; 45.715 - switch (count_info) 45.716 + switch (page->u.inuse.type_info & PGT_count_mask) 45.717 { 45.718 case 0: 45.719 /* No guest reference, nothing to track. */
46.1 --- a/xen/arch/x86/mm/shadow/multi.c Fri Feb 13 10:56:01 2009 +0900 46.2 +++ b/xen/arch/x86/mm/shadow/multi.c Fri Feb 13 11:22:28 2009 +0900 46.3 @@ -973,13 +973,13 @@ static int shadow_set_l2e(struct vcpu *v 46.4 } 46.5 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 46.6 { 46.7 - struct shadow_page_info *sp = mfn_to_shadow_page(sl1mfn); 46.8 - mfn_t gl1mfn = _mfn(sp->backpointer); 46.9 + struct page_info *sp = mfn_to_page(sl1mfn); 46.10 + mfn_t gl1mfn = _mfn(sp->v.sh.back); 46.11 46.12 /* If the shadow is a fl1 then the backpointer contains 46.13 the GFN instead of the GMFN, and it's definitely not 46.14 OOS. */ 46.15 - if ( (sp->type != SH_type_fl1_shadow) && mfn_valid(gl1mfn) 46.16 + if ( (sp->u.sh.type != SH_type_fl1_shadow) && mfn_valid(gl1mfn) 46.17 && mfn_is_out_of_sync(gl1mfn) ) 46.18 sh_resync(v, gl1mfn); 46.19 } 46.20 @@ -1036,9 +1036,8 @@ static inline void shadow_vram_get_l1e(s 46.21 if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) { 46.22 unsigned long i = gfn - d->dirty_vram->begin_pfn; 46.23 struct page_info *page = mfn_to_page(mfn); 46.24 - u32 count_info = page->u.inuse.type_info & PGT_count_mask; 46.25 46.26 - if ( count_info == 1 ) 46.27 + if ( (page->u.inuse.type_info & PGT_count_mask) == 1 ) 46.28 /* Initial guest reference, record it */ 46.29 d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn)) 46.30 | ((unsigned long)sl1e & ~PAGE_MASK); 46.31 @@ -1064,12 +1063,11 @@ static inline void shadow_vram_put_l1e(s 46.32 if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) { 46.33 unsigned long i = gfn - d->dirty_vram->begin_pfn; 46.34 struct page_info *page = mfn_to_page(mfn); 46.35 - u32 count_info = page->u.inuse.type_info & PGT_count_mask; 46.36 int dirty = 0; 46.37 paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn)) 46.38 | ((unsigned long)sl1e & ~PAGE_MASK); 46.39 46.40 - if ( count_info == 1 ) { 46.41 + if ( (page->u.inuse.type_info & PGT_count_mask) == 1 ) { 46.42 /* Last reference */ 46.43 if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) { 46.44 /* We didn't know it was that one, let's say it is dirty */ 46.45 @@ -1194,8 +1192,8 @@ static inline void increment_ptr_to_gues 46.46 do { \ 46.47 int _i; \ 46.48 shadow_l1e_t *_sp = sh_map_domain_page((_sl1mfn)); \ 46.49 - ASSERT(mfn_to_shadow_page(_sl1mfn)->type == SH_type_l1_shadow \ 46.50 - || mfn_to_shadow_page(_sl1mfn)->type == SH_type_fl1_shadow); \ 46.51 + ASSERT(mfn_to_page(_sl1mfn)->u.sh.type == SH_type_l1_shadow \ 46.52 + || mfn_to_page(_sl1mfn)->u.sh.type == SH_type_fl1_shadow);\ 46.53 for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ ) \ 46.54 { \ 46.55 (_sl1e) = _sp + _i; \ 46.56 @@ -1232,7 +1230,7 @@ do { 46.57 do { \ 46.58 int _i, _j, __done = 0; \ 46.59 int _xen = !shadow_mode_external(_dom); \ 46.60 - ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_32_shadow); \ 46.61 + ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_32_shadow);\ 46.62 for ( _j = 0; _j < 4 && !__done; _j++ ) \ 46.63 { \ 46.64 shadow_l2e_t *_sp = sh_map_domain_page(_sl2mfn); \ 46.65 @@ -1260,11 +1258,11 @@ do { 46.66 int _i; \ 46.67 int _xen = !shadow_mode_external(_dom); \ 46.68 shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn)); \ 46.69 - ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_pae_shadow \ 46.70 - || mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_pae_shadow);\ 46.71 + ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_pae_shadow \ 46.72 + || mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow);\ 46.73 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \ 46.74 if ( (!(_xen)) \ 46.75 - || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_pae_shadow\ 46.76 + || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_pae_shadow\ 46.77 || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES)) \ 46.78 < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \ 46.79 { \ 46.80 @@ -1285,13 +1283,13 @@ do { 46.81 int _i; \ 46.82 int _xen = !shadow_mode_external(_dom); \ 46.83 shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn)); \ 46.84 - ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_64_shadow || \ 46.85 - mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_64_shadow); \ 46.86 + ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_64_shadow ||\ 46.87 + mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_64_shadow);\ 46.88 for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \ 46.89 { \ 46.90 if ( (!(_xen)) \ 46.91 || !is_pv_32on64_domain(_dom) \ 46.92 - || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_64_shadow \ 46.93 + || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_64_shadow\ 46.94 || (_i < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_dom)) ) \ 46.95 { \ 46.96 (_sl2e) = _sp + _i; \ 46.97 @@ -1313,7 +1311,7 @@ do { 46.98 do { \ 46.99 int _i; \ 46.100 shadow_l3e_t *_sp = sh_map_domain_page((_sl3mfn)); \ 46.101 - ASSERT(mfn_to_shadow_page(_sl3mfn)->type == SH_type_l3_64_shadow); \ 46.102 + ASSERT(mfn_to_page(_sl3mfn)->u.sh.type == SH_type_l3_64_shadow);\ 46.103 for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ ) \ 46.104 { \ 46.105 (_sl3e) = _sp + _i; \ 46.106 @@ -1331,7 +1329,7 @@ do { 46.107 shadow_l4e_t *_sp = sh_map_domain_page((_sl4mfn)); \ 46.108 int _xen = !shadow_mode_external(_dom); \ 46.109 int _i; \ 46.110 - ASSERT(mfn_to_shadow_page(_sl4mfn)->type == SH_type_l4_64_shadow); \ 46.111 + ASSERT(mfn_to_page(_sl4mfn)->u.sh.type == SH_type_l4_64_shadow);\ 46.112 for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ ) \ 46.113 { \ 46.114 if ( (!(_xen)) || is_guest_l4_slot(_dom, _i) ) \ 46.115 @@ -1506,7 +1504,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf 46.116 && shadow_type != SH_type_l2h_pae_shadow 46.117 && shadow_type != SH_type_l4_64_shadow ) 46.118 /* Lower-level shadow, not yet linked form a higher level */ 46.119 - mfn_to_shadow_page(smfn)->up = 0; 46.120 + mfn_to_page(smfn)->up = 0; 46.121 46.122 #if GUEST_PAGING_LEVELS == 4 46.123 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 46.124 @@ -1519,14 +1517,12 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf 46.125 * of them, decide that this isn't an old linux guest, and stop 46.126 * pinning l3es. This is not very quick but it doesn't happen 46.127 * very often. */ 46.128 - struct list_head *l, *t; 46.129 - struct shadow_page_info *sp; 46.130 + struct page_info *sp, *t; 46.131 struct vcpu *v2; 46.132 int l4count = 0, vcpus = 0; 46.133 - list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows) 46.134 + page_list_for_each(sp, &v->domain->arch.paging.shadow.pinned_shadows) 46.135 { 46.136 - sp = list_entry(l, struct shadow_page_info, list); 46.137 - if ( sp->type == SH_type_l4_64_shadow ) 46.138 + if ( sp->u.sh.type == SH_type_l4_64_shadow ) 46.139 l4count++; 46.140 } 46.141 for_each_vcpu ( v->domain, v2 ) 46.142 @@ -1534,11 +1530,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf 46.143 if ( l4count > 2 * vcpus ) 46.144 { 46.145 /* Unpin all the pinned l3 tables, and don't pin any more. */ 46.146 - list_for_each_safe(l, t, &v->domain->arch.paging.shadow.pinned_shadows) 46.147 + page_list_for_each_safe(sp, t, &v->domain->arch.paging.shadow.pinned_shadows) 46.148 { 46.149 - sp = list_entry(l, struct shadow_page_info, list); 46.150 - if ( sp->type == SH_type_l3_64_shadow ) 46.151 - sh_unpin(v, shadow_page_to_mfn(sp)); 46.152 + if ( sp->u.sh.type == SH_type_l3_64_shadow ) 46.153 + sh_unpin(v, page_to_mfn(sp)); 46.154 } 46.155 v->domain->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; 46.156 } 46.157 @@ -1921,7 +1916,7 @@ static shadow_l1e_t * shadow_get_and_cre 46.158 void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn) 46.159 { 46.160 shadow_l4e_t *sl4e; 46.161 - u32 t = mfn_to_shadow_page(smfn)->type; 46.162 + u32 t = mfn_to_page(smfn)->u.sh.type; 46.163 mfn_t gmfn, sl4mfn; 46.164 46.165 SHADOW_DEBUG(DESTROY_SHADOW, 46.166 @@ -1929,7 +1924,7 @@ void sh_destroy_l4_shadow(struct vcpu *v 46.167 ASSERT(t == SH_type_l4_shadow); 46.168 46.169 /* Record that the guest page isn't shadowed any more (in this type) */ 46.170 - gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer); 46.171 + gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); 46.172 delete_shadow_status(v, gmfn, t, smfn); 46.173 shadow_demote(v, gmfn, t); 46.174 /* Decrement refcounts of all the old entries */ 46.175 @@ -1950,7 +1945,7 @@ void sh_destroy_l4_shadow(struct vcpu *v 46.176 void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn) 46.177 { 46.178 shadow_l3e_t *sl3e; 46.179 - u32 t = mfn_to_shadow_page(smfn)->type; 46.180 + u32 t = mfn_to_page(smfn)->u.sh.type; 46.181 mfn_t gmfn, sl3mfn; 46.182 46.183 SHADOW_DEBUG(DESTROY_SHADOW, 46.184 @@ -1958,7 +1953,7 @@ void sh_destroy_l3_shadow(struct vcpu *v 46.185 ASSERT(t == SH_type_l3_shadow); 46.186 46.187 /* Record that the guest page isn't shadowed any more (in this type) */ 46.188 - gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer); 46.189 + gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); 46.190 delete_shadow_status(v, gmfn, t, smfn); 46.191 shadow_demote(v, gmfn, t); 46.192 46.193 @@ -1980,7 +1975,7 @@ void sh_destroy_l3_shadow(struct vcpu *v 46.194 void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn) 46.195 { 46.196 shadow_l2e_t *sl2e; 46.197 - u32 t = mfn_to_shadow_page(smfn)->type; 46.198 + u32 t = mfn_to_page(smfn)->u.sh.type; 46.199 mfn_t gmfn, sl2mfn; 46.200 46.201 SHADOW_DEBUG(DESTROY_SHADOW, 46.202 @@ -1993,7 +1988,7 @@ void sh_destroy_l2_shadow(struct vcpu *v 46.203 #endif 46.204 46.205 /* Record that the guest page isn't shadowed any more (in this type) */ 46.206 - gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer); 46.207 + gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); 46.208 delete_shadow_status(v, gmfn, t, smfn); 46.209 shadow_demote(v, gmfn, t); 46.210 46.211 @@ -2014,7 +2009,7 @@ void sh_destroy_l1_shadow(struct vcpu *v 46.212 { 46.213 struct domain *d = v->domain; 46.214 shadow_l1e_t *sl1e; 46.215 - u32 t = mfn_to_shadow_page(smfn)->type; 46.216 + u32 t = mfn_to_page(smfn)->u.sh.type; 46.217 46.218 SHADOW_DEBUG(DESTROY_SHADOW, 46.219 "%s(%05lx)\n", __func__, mfn_x(smfn)); 46.220 @@ -2023,12 +2018,12 @@ void sh_destroy_l1_shadow(struct vcpu *v 46.221 /* Record that the guest page isn't shadowed any more (in this type) */ 46.222 if ( t == SH_type_fl1_shadow ) 46.223 { 46.224 - gfn_t gfn = _gfn(mfn_to_shadow_page(smfn)->backpointer); 46.225 + gfn_t gfn = _gfn(mfn_to_page(smfn)->v.sh.back); 46.226 delete_fl1_shadow_status(v, gfn, smfn); 46.227 } 46.228 else 46.229 { 46.230 - mfn_t gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer); 46.231 + mfn_t gmfn = _mfn(mfn_to_page(smfn)->v.sh.back); 46.232 delete_shadow_status(v, gmfn, t, smfn); 46.233 shadow_demote(v, gmfn, t); 46.234 } 46.235 @@ -2054,7 +2049,7 @@ void sh_destroy_l1_shadow(struct vcpu *v 46.236 void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn) 46.237 { 46.238 struct domain *d = v->domain; 46.239 - ASSERT(mfn_to_shadow_page(mmfn)->type == SH_type_monitor_table); 46.240 + ASSERT(mfn_to_page(mmfn)->u.sh.type == SH_type_monitor_table); 46.241 46.242 #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4) 46.243 { 46.244 @@ -2298,7 +2293,7 @@ static int validate_gl2e(struct vcpu *v, 46.245 46.246 #if SHADOW_PAGING_LEVELS == 3 46.247 reserved_xen_slot = 46.248 - ((mfn_to_shadow_page(sl2mfn)->type == SH_type_l2h_pae_shadow) && 46.249 + ((mfn_to_page(sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow) && 46.250 (shadow_index 46.251 >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)))); 46.252 #else /* SHADOW_PAGING_LEVELS == 2 */ 46.253 @@ -2352,7 +2347,7 @@ static int validate_gl1e(struct vcpu *v, 46.254 result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn); 46.255 46.256 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 46.257 - gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer); 46.258 + gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back); 46.259 if ( mfn_valid(gl1mfn) 46.260 && mfn_is_out_of_sync(gl1mfn) ) 46.261 { 46.262 @@ -2429,30 +2424,30 @@ void sh_resync_l1(struct vcpu *v, mfn_t 46.263 * called in the *mode* of the vcpu that unsynced it. Clear? Good. */ 46.264 int sh_safe_not_to_sync(struct vcpu *v, mfn_t gl1mfn) 46.265 { 46.266 - struct shadow_page_info *sp; 46.267 + struct page_info *sp; 46.268 mfn_t smfn; 46.269 46.270 smfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow); 46.271 ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */ 46.272 46.273 /* Up to l2 */ 46.274 - sp = mfn_to_shadow_page(smfn); 46.275 - if ( sp->count != 1 || !sp->up ) 46.276 + sp = mfn_to_page(smfn); 46.277 + if ( sp->u.sh.count != 1 || !sp->up ) 46.278 return 0; 46.279 smfn = _mfn(sp->up >> PAGE_SHIFT); 46.280 ASSERT(mfn_valid(smfn)); 46.281 46.282 #if (SHADOW_PAGING_LEVELS == 4) 46.283 /* up to l3 */ 46.284 - sp = mfn_to_shadow_page(smfn); 46.285 - if ( sp->count != 1 || !sp->up ) 46.286 + sp = mfn_to_page(smfn); 46.287 + if ( sp->u.sh.count != 1 || !sp->up ) 46.288 return 0; 46.289 smfn = _mfn(sp->up >> PAGE_SHIFT); 46.290 ASSERT(mfn_valid(smfn)); 46.291 46.292 /* up to l4 */ 46.293 - sp = mfn_to_shadow_page(smfn); 46.294 - if ( sp->count != 1 46.295 + sp = mfn_to_page(smfn); 46.296 + if ( sp->u.sh.count != 1 46.297 || sh_type_is_pinnable(v, SH_type_l3_64_shadow) || !sp->up ) 46.298 return 0; 46.299 smfn = _mfn(sp->up >> PAGE_SHIFT); 46.300 @@ -2970,8 +2965,8 @@ static int sh_page_fault(struct vcpu *v, 46.301 + shadow_l2_linear_offset(va)), 46.302 sizeof(sl2e)) != 0) 46.303 || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT) 46.304 - || !mfn_valid(gl1mfn = _mfn(mfn_to_shadow_page( 46.305 - shadow_l2e_get_mfn(sl2e))->backpointer)) 46.306 + || !mfn_valid(gl1mfn = _mfn(mfn_to_page( 46.307 + shadow_l2e_get_mfn(sl2e))->v.sh.back)) 46.308 || unlikely(mfn_is_out_of_sync(gl1mfn)) ) 46.309 { 46.310 /* Hit the slow path as if there had been no 46.311 @@ -3523,7 +3518,7 @@ sh_invlpg(struct vcpu *v, unsigned long 46.312 // easier than invalidating all of the individual 4K pages). 46.313 // 46.314 sl1mfn = shadow_l2e_get_mfn(sl2e); 46.315 - if ( mfn_to_shadow_page(sl1mfn)->type 46.316 + if ( mfn_to_page(sl1mfn)->u.sh.type 46.317 == SH_type_fl1_shadow ) 46.318 { 46.319 flush_tlb_local(); 46.320 @@ -3533,7 +3528,7 @@ sh_invlpg(struct vcpu *v, unsigned long 46.321 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 46.322 /* Check to see if the SL1 is out of sync. */ 46.323 { 46.324 - mfn_t gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer); 46.325 + mfn_t gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back); 46.326 struct page_info *pg = mfn_to_page(gl1mfn); 46.327 if ( mfn_valid(gl1mfn) 46.328 && page_is_out_of_sync(pg) ) 46.329 @@ -3563,7 +3558,7 @@ sh_invlpg(struct vcpu *v, unsigned long 46.330 } 46.331 46.332 sl1mfn = shadow_l2e_get_mfn(sl2e); 46.333 - gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer); 46.334 + gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back); 46.335 pg = mfn_to_page(gl1mfn); 46.336 46.337 if ( likely(sh_mfn_is_a_page_table(gl1mfn) 46.338 @@ -3968,7 +3963,7 @@ sh_set_toplevel_shadow(struct vcpu *v, 46.339 /* Need to repin the old toplevel shadow if it's been unpinned 46.340 * by shadow_prealloc(): in PV mode we're still running on this 46.341 * shadow and it's not safe to free it yet. */ 46.342 - if ( !mfn_to_shadow_page(old_smfn)->pinned && !sh_pin(v, old_smfn) ) 46.343 + if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(v, old_smfn) ) 46.344 { 46.345 SHADOW_ERROR("can't re-pin %#lx\n", mfn_x(old_smfn)); 46.346 domain_crash(v->domain); 46.347 @@ -4262,16 +4257,16 @@ int sh_rm_write_access_from_sl1p(struct 46.348 { 46.349 int r; 46.350 shadow_l1e_t *sl1p, sl1e; 46.351 - struct shadow_page_info *sp; 46.352 + struct page_info *sp; 46.353 46.354 ASSERT(mfn_valid(gmfn)); 46.355 ASSERT(mfn_valid(smfn)); 46.356 46.357 - sp = mfn_to_shadow_page(smfn); 46.358 - 46.359 - if ( sp->mbz != 0 46.360 - || (sp->type != SH_type_l1_shadow 46.361 - && sp->type != SH_type_fl1_shadow) ) 46.362 + sp = mfn_to_page(smfn); 46.363 + 46.364 + if ( sp->count_info != 0 46.365 + || (sp->u.sh.type != SH_type_l1_shadow 46.366 + && sp->u.sh.type != SH_type_fl1_shadow) ) 46.367 goto fail; 46.368 46.369 sl1p = sh_map_domain_page(smfn); 46.370 @@ -4410,7 +4405,7 @@ int sh_rm_mappings_from_l1(struct vcpu * 46.371 void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn) 46.372 /* Blank out a single shadow entry */ 46.373 { 46.374 - switch ( mfn_to_shadow_page(smfn)->type ) 46.375 + switch ( mfn_to_page(smfn)->u.sh.type ) 46.376 { 46.377 case SH_type_l1_shadow: 46.378 (void) shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break; 46.379 @@ -4443,7 +4438,7 @@ int sh_remove_l1_shadow(struct vcpu *v, 46.380 && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) ) 46.381 { 46.382 (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn); 46.383 - if ( mfn_to_shadow_page(sl1mfn)->type == 0 ) 46.384 + if ( mfn_to_page(sl1mfn)->u.sh.type == 0 ) 46.385 /* This breaks us cleanly out of the FOREACH macro */ 46.386 done = 1; 46.387 } 46.388 @@ -4466,7 +4461,7 @@ int sh_remove_l2_shadow(struct vcpu *v, 46.389 && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) ) 46.390 { 46.391 (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn); 46.392 - if ( mfn_to_shadow_page(sl2mfn)->type == 0 ) 46.393 + if ( mfn_to_page(sl2mfn)->u.sh.type == 0 ) 46.394 /* This breaks us cleanly out of the FOREACH macro */ 46.395 done = 1; 46.396 } 46.397 @@ -4488,7 +4483,7 @@ int sh_remove_l3_shadow(struct vcpu *v, 46.398 && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) ) 46.399 { 46.400 (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn); 46.401 - if ( mfn_to_shadow_page(sl3mfn)->type == 0 ) 46.402 + if ( mfn_to_page(sl3mfn)->u.sh.type == 0 ) 46.403 /* This breaks us cleanly out of the FOREACH macro */ 46.404 done = 1; 46.405 } 46.406 @@ -4890,7 +4885,7 @@ int sh_audit_l1_table(struct vcpu *v, mf 46.407 int done = 0; 46.408 46.409 /* Follow the backpointer */ 46.410 - gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer); 46.411 + gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back); 46.412 46.413 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 46.414 /* Out-of-sync l1 shadows can contain anything: just check the OOS hash */ 46.415 @@ -4980,7 +4975,7 @@ int sh_audit_l2_table(struct vcpu *v, mf 46.416 int done = 0; 46.417 46.418 /* Follow the backpointer */ 46.419 - gl2mfn = _mfn(mfn_to_shadow_page(sl2mfn)->backpointer); 46.420 + gl2mfn = _mfn(mfn_to_page(sl2mfn)->v.sh.back); 46.421 46.422 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 46.423 /* Only L1's may be out of sync. */ 46.424 @@ -5029,7 +5024,7 @@ int sh_audit_l3_table(struct vcpu *v, mf 46.425 int done = 0; 46.426 46.427 /* Follow the backpointer */ 46.428 - gl3mfn = _mfn(mfn_to_shadow_page(sl3mfn)->backpointer); 46.429 + gl3mfn = _mfn(mfn_to_page(sl3mfn)->v.sh.back); 46.430 46.431 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 46.432 /* Only L1's may be out of sync. */ 46.433 @@ -5076,7 +5071,7 @@ int sh_audit_l4_table(struct vcpu *v, mf 46.434 int done = 0; 46.435 46.436 /* Follow the backpointer */ 46.437 - gl4mfn = _mfn(mfn_to_shadow_page(sl4mfn)->backpointer); 46.438 + gl4mfn = _mfn(mfn_to_page(sl4mfn)->v.sh.back); 46.439 46.440 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 46.441 /* Only L1's may be out of sync. */
47.1 --- a/xen/arch/x86/mm/shadow/private.h Fri Feb 13 10:56:01 2009 +0900 47.2 +++ b/xen/arch/x86/mm/shadow/private.h Fri Feb 13 11:22:28 2009 +0900 47.3 @@ -220,60 +220,6 @@ extern void shadow_audit_tables(struct v 47.4 #undef GUEST_LEVELS 47.5 #endif /* CONFIG_PAGING_LEVELS == 4 */ 47.6 47.7 -/****************************************************************************** 47.8 - * Page metadata for shadow pages. 47.9 - */ 47.10 - 47.11 -struct shadow_page_info 47.12 -{ 47.13 - union { 47.14 - /* Ensures that shadow_page_info is same size as page_info. */ 47.15 - struct page_info page_info; 47.16 - 47.17 - struct { 47.18 - union { 47.19 - /* When in use, guest page we're a shadow of */ 47.20 - unsigned long backpointer; 47.21 - /* When free, order of the freelist we're on */ 47.22 - unsigned int order; 47.23 - }; 47.24 - union { 47.25 - /* When in use, next shadow in this hash chain */ 47.26 - struct shadow_page_info *next_shadow; 47.27 - /* When free, TLB flush time when freed */ 47.28 - u32 tlbflush_timestamp; 47.29 - }; 47.30 - struct { 47.31 - unsigned long mbz; /* Must be zero: count_info is here. */ 47.32 - unsigned long type:5; /* What kind of shadow is this? */ 47.33 - unsigned long pinned:1; /* Is the shadow pinned? */ 47.34 - unsigned long count:26; /* Reference count */ 47.35 - } __attribute__((packed)); 47.36 - union { 47.37 - /* For unused shadow pages, a list of pages of this order; for 47.38 - * pinnable shadows, if pinned, a list of other pinned shadows 47.39 - * (see sh_type_is_pinnable() below for the definition of 47.40 - * "pinnable" shadow types). */ 47.41 - struct list_head list; 47.42 - /* For non-pinnable shadows, a higher entry that points 47.43 - * at us. */ 47.44 - paddr_t up; 47.45 - }; 47.46 - }; 47.47 - }; 47.48 -}; 47.49 - 47.50 -/* The structure above *must* be no larger than a struct page_info 47.51 - * from mm.h, since we'll be using the same space in the frametable. 47.52 - * Also, the mbz field must line up with the count_info field of normal 47.53 - * pages, so they cannot be successfully get_page()d. */ 47.54 -static inline void shadow_check_page_struct_offsets(void) { 47.55 - BUILD_BUG_ON(sizeof (struct shadow_page_info) != 47.56 - sizeof (struct page_info)); 47.57 - BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) != 47.58 - offsetof(struct page_info, count_info)); 47.59 -}; 47.60 - 47.61 /* Shadow type codes */ 47.62 #define SH_type_none (0U) /* on the shadow free list */ 47.63 #define SH_type_min_shadow (1U) 47.64 @@ -528,22 +474,13 @@ mfn_t oos_snapshot_lookup(struct vcpu *v 47.65 * MFN/page-info handling 47.66 */ 47.67 47.68 -// Override mfn_to_page from asm/page.h, which was #include'd above, 47.69 -// in order to make it work with our mfn type. 47.70 +/* Override macros from asm/page.h to make them work with mfn_t */ 47.71 #undef mfn_to_page 47.72 -#define mfn_to_page(_m) (frame_table + mfn_x(_m)) 47.73 -#define mfn_to_shadow_page(_m) ((struct shadow_page_info *)mfn_to_page(_m)) 47.74 - 47.75 -// Override page_to_mfn from asm/page.h, which was #include'd above, 47.76 -// in order to make it work with our mfn type. 47.77 +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) 47.78 +#undef mfn_valid 47.79 +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn)) 47.80 #undef page_to_mfn 47.81 -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) 47.82 -#define shadow_page_to_mfn(_spg) (page_to_mfn((struct page_info *)_spg)) 47.83 - 47.84 -// Override mfn_valid from asm/page.h, which was #include'd above, 47.85 -// in order to make it work with our mfn type. 47.86 -#undef mfn_valid 47.87 -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) 47.88 +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) 47.89 47.90 /* Override pagetable_t <-> struct page_info conversions to work with mfn_t */ 47.91 #undef pagetable_get_page 47.92 @@ -675,26 +612,26 @@ void sh_destroy_shadow(struct vcpu *v, m 47.93 static inline int sh_get_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa) 47.94 { 47.95 u32 x, nx; 47.96 - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); 47.97 + struct page_info *sp = mfn_to_page(smfn); 47.98 47.99 ASSERT(mfn_valid(smfn)); 47.100 47.101 - x = sp->count; 47.102 + x = sp->u.sh.count; 47.103 nx = x + 1; 47.104 47.105 if ( unlikely(nx >= 1U<<26) ) 47.106 { 47.107 - SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n", 47.108 - sp->backpointer, mfn_x(smfn)); 47.109 + SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRpgmfn " smfn=%lx\n", 47.110 + sp->v.sh.back, mfn_x(smfn)); 47.111 return 0; 47.112 } 47.113 47.114 /* Guarded by the shadow lock, so no need for atomic update */ 47.115 - sp->count = nx; 47.116 + sp->u.sh.count = nx; 47.117 47.118 /* We remember the first shadow entry that points to each shadow. */ 47.119 if ( entry_pa != 0 47.120 - && !sh_type_is_pinnable(v, sp->type) 47.121 + && !sh_type_is_pinnable(v, sp->u.sh.type) 47.122 && sp->up == 0 ) 47.123 sp->up = entry_pa; 47.124 47.125 @@ -707,29 +644,29 @@ static inline int sh_get_ref(struct vcpu 47.126 static inline void sh_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa) 47.127 { 47.128 u32 x, nx; 47.129 - struct shadow_page_info *sp = mfn_to_shadow_page(smfn); 47.130 + struct page_info *sp = mfn_to_page(smfn); 47.131 47.132 ASSERT(mfn_valid(smfn)); 47.133 - ASSERT(sp->mbz == 0); 47.134 + ASSERT(sp->count_info == 0); 47.135 47.136 /* If this is the entry in the up-pointer, remove it */ 47.137 if ( entry_pa != 0 47.138 - && !sh_type_is_pinnable(v, sp->type) 47.139 + && !sh_type_is_pinnable(v, sp->u.sh.type) 47.140 && sp->up == entry_pa ) 47.141 sp->up = 0; 47.142 47.143 - x = sp->count; 47.144 + x = sp->u.sh.count; 47.145 nx = x - 1; 47.146 47.147 if ( unlikely(x == 0) ) 47.148 { 47.149 SHADOW_ERROR("shadow ref underflow, smfn=%lx oc=%08x t=%#x\n", 47.150 - mfn_x(smfn), sp->count, sp->type); 47.151 + mfn_x(smfn), sp->u.sh.count, sp->u.sh.type); 47.152 BUG(); 47.153 } 47.154 47.155 /* Guarded by the shadow lock, so no need for atomic update */ 47.156 - sp->count = nx; 47.157 + sp->u.sh.count = nx; 47.158 47.159 if ( unlikely(nx == 0) ) 47.160 sh_destroy_shadow(v, smfn); 47.161 @@ -741,26 +678,26 @@ static inline void sh_put_ref(struct vcp 47.162 * Returns 0 for failure, 1 for success. */ 47.163 static inline int sh_pin(struct vcpu *v, mfn_t smfn) 47.164 { 47.165 - struct shadow_page_info *sp; 47.166 + struct page_info *sp; 47.167 47.168 ASSERT(mfn_valid(smfn)); 47.169 - sp = mfn_to_shadow_page(smfn); 47.170 - ASSERT(sh_type_is_pinnable(v, sp->type)); 47.171 - if ( sp->pinned ) 47.172 + sp = mfn_to_page(smfn); 47.173 + ASSERT(sh_type_is_pinnable(v, sp->u.sh.type)); 47.174 + if ( sp->u.sh.pinned ) 47.175 { 47.176 /* Already pinned: take it out of the pinned-list so it can go 47.177 * at the front */ 47.178 - list_del(&sp->list); 47.179 + page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows); 47.180 } 47.181 else 47.182 { 47.183 /* Not pinned: pin it! */ 47.184 if ( !sh_get_ref(v, smfn, 0) ) 47.185 return 0; 47.186 - sp->pinned = 1; 47.187 + sp->u.sh.pinned = 1; 47.188 } 47.189 /* Put it at the head of the list of pinned shadows */ 47.190 - list_add(&sp->list, &v->domain->arch.paging.shadow.pinned_shadows); 47.191 + page_list_add(sp, &v->domain->arch.paging.shadow.pinned_shadows); 47.192 return 1; 47.193 } 47.194 47.195 @@ -768,15 +705,15 @@ static inline int sh_pin(struct vcpu *v, 47.196 * of pinned shadows, and release the extra ref. */ 47.197 static inline void sh_unpin(struct vcpu *v, mfn_t smfn) 47.198 { 47.199 - struct shadow_page_info *sp; 47.200 + struct page_info *sp; 47.201 47.202 ASSERT(mfn_valid(smfn)); 47.203 - sp = mfn_to_shadow_page(smfn); 47.204 - ASSERT(sh_type_is_pinnable(v, sp->type)); 47.205 - if ( sp->pinned ) 47.206 + sp = mfn_to_page(smfn); 47.207 + ASSERT(sh_type_is_pinnable(v, sp->u.sh.type)); 47.208 + if ( sp->u.sh.pinned ) 47.209 { 47.210 - sp->pinned = 0; 47.211 - list_del(&sp->list); 47.212 + sp->u.sh.pinned = 0; 47.213 + page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows); 47.214 sp->up = 0; /* in case this stops being a pinnable type in future */ 47.215 sh_put_ref(v, smfn, 0); 47.216 }
48.1 --- a/xen/arch/x86/numa.c Fri Feb 13 10:56:01 2009 +0900 48.2 +++ b/xen/arch/x86/numa.c Fri Feb 13 11:22:28 2009 +0900 48.3 @@ -312,7 +312,7 @@ static void dump_numa(unsigned char key) 48.4 for_each_online_node(i) 48.5 page_num_node[i] = 0; 48.6 48.7 - list_for_each_entry(page, &d->page_list, list) 48.8 + page_list_for_each(page, &d->page_list) 48.9 { 48.10 i = phys_to_nid(page_to_mfn(page) << PAGE_SHIFT); 48.11 page_num_node[i]++;
49.1 --- a/xen/arch/x86/physdev.c Fri Feb 13 10:56:01 2009 +0900 49.2 +++ b/xen/arch/x86/physdev.c Fri Feb 13 11:22:28 2009 +0900 49.3 @@ -62,7 +62,7 @@ static int physdev_map_pirq(struct physd 49.4 ret = -EINVAL; 49.5 goto free_domain; 49.6 } 49.7 - vector = IO_APIC_VECTOR(map->index); 49.8 + vector = domain_irq_to_vector(current->domain, map->index); 49.9 if ( !vector ) 49.10 { 49.11 dprintk(XENLOG_G_ERR, "dom%d: map irq with no vector %d\n", 49.12 @@ -75,7 +75,7 @@ static int physdev_map_pirq(struct physd 49.13 case MAP_PIRQ_TYPE_MSI: 49.14 vector = map->index; 49.15 if ( vector == -1 ) 49.16 - vector = assign_irq_vector(AUTO_ASSIGN); 49.17 + vector = assign_irq_vector(AUTO_ASSIGN_IRQ); 49.18 49.19 if ( vector < 0 || vector >= NR_VECTORS ) 49.20 {
50.1 --- a/xen/arch/x86/setup.c Fri Feb 13 10:56:01 2009 +0900 50.2 +++ b/xen/arch/x86/setup.c Fri Feb 13 11:22:28 2009 +0900 50.3 @@ -417,7 +417,7 @@ void __init __start_xen(unsigned long mb 50.4 unsigned int initrdidx = 1; 50.5 multiboot_info_t *mbi = __va(mbi_p); 50.6 module_t *mod = (module_t *)__va(mbi->mods_addr); 50.7 - unsigned long nr_pages, modules_length, modules_headroom = -1; 50.8 + unsigned long nr_pages, modules_length, modules_headroom; 50.9 unsigned long allocator_bitmap_end; 50.10 int i, e820_warn = 0, bytes = 0; 50.11 struct ns16550_defaults ns16550 = { 50.12 @@ -618,6 +618,12 @@ void __init __start_xen(unsigned long mb 50.13 */ 50.14 modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start; 50.15 50.16 + /* ensure mod[0] is mapped before parsing */ 50.17 + bootstrap_map(mod[0].mod_start, mod[0].mod_end); 50.18 + modules_headroom = bzimage_headroom( 50.19 + (char *)(unsigned long)mod[0].mod_start, 50.20 + (unsigned long)(mod[0].mod_end - mod[0].mod_start)); 50.21 + 50.22 for ( i = boot_e820.nr_map-1; i >= 0; i-- ) 50.23 { 50.24 uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1; 50.25 @@ -636,7 +642,8 @@ void __init __start_xen(unsigned long mb 50.26 s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR); 50.27 50.28 #if defined(CONFIG_X86_64) 50.29 -#define reloc_size ((__pa(&_end) + mask) & ~mask) 50.30 +/* Relocate Xen image, allocation bitmap, and one page of padding. */ 50.31 +#define reloc_size ((__pa(&_end) + max_page/8 + PAGE_SIZE + mask) & ~mask) 50.32 /* Is the region suitable for relocating Xen? */ 50.33 if ( !xen_phys_start && ((e-s) >= reloc_size) ) 50.34 { 50.35 @@ -721,11 +728,6 @@ void __init __start_xen(unsigned long mb 50.36 } 50.37 #endif 50.38 50.39 - if ( modules_headroom == -1 ) 50.40 - modules_headroom = bzimage_headroom( 50.41 - (char *)(unsigned long)mod[0].mod_start, 50.42 - (unsigned long)(mod[0].mod_end - mod[0].mod_start)); 50.43 - 50.44 /* Is the region suitable for relocating the multiboot modules? */ 50.45 if ( !initial_images_start && (s < e) && 50.46 ((e-s) >= (modules_length+modules_headroom)) )
51.1 --- a/xen/arch/x86/smpboot.c Fri Feb 13 10:56:01 2009 +0900 51.2 +++ b/xen/arch/x86/smpboot.c Fri Feb 13 11:22:28 2009 +0900 51.3 @@ -1256,8 +1256,6 @@ int __cpu_disable(void) 51.4 mdelay(1); 51.5 local_irq_disable(); 51.6 51.7 - cpufreq_del_cpu(cpu); 51.8 - 51.9 time_suspend(); 51.10 51.11 cpu_mcheck_disable(); 51.12 @@ -1321,6 +1319,8 @@ int cpu_down(unsigned int cpu) 51.13 51.14 printk("Prepare to bring CPU%d down...\n", cpu); 51.15 51.16 + cpufreq_del_cpu(cpu); 51.17 + 51.18 err = stop_machine_run(take_cpu_down, NULL, cpu); 51.19 if (err < 0) 51.20 goto out;
52.1 --- a/xen/arch/x86/x86_32/xen.lds.S Fri Feb 13 10:56:01 2009 +0900 52.2 +++ b/xen/arch/x86/x86_32/xen.lds.S Fri Feb 13 11:22:28 2009 +0900 52.3 @@ -91,6 +91,7 @@ SECTIONS 52.4 *(.exit.text) 52.5 *(.exit.data) 52.6 *(.exitcall.exit) 52.7 + *(.eh_frame) 52.8 } 52.9 52.10 /* Stabs debugging sections. */
53.1 --- a/xen/arch/x86/x86_64/entry.S Fri Feb 13 10:56:01 2009 +0900 53.2 +++ b/xen/arch/x86/x86_64/entry.S Fri Feb 13 11:22:28 2009 +0900 53.3 @@ -739,7 +739,6 @@ ENTRY(hypercall_args_table) 53.4 .byte 1 /* do_sysctl */ /* 35 */ 53.5 .byte 1 /* do_domctl */ 53.6 .byte 2 /* do_kexec */ 53.7 - .byte 1 /* do_xsm_op */ 53.8 .rept __HYPERVISOR_arch_0-(.-hypercall_args_table) 53.9 .byte 0 /* do_ni_hypercall */ 53.10 .endr
54.1 --- a/xen/arch/x86/x86_64/xen.lds.S Fri Feb 13 10:56:01 2009 +0900 54.2 +++ b/xen/arch/x86/x86_64/xen.lds.S Fri Feb 13 11:22:28 2009 +0900 54.3 @@ -89,6 +89,7 @@ SECTIONS 54.4 *(.exit.text) 54.5 *(.exit.data) 54.6 *(.exitcall.exit) 54.7 + *(.eh_frame) 54.8 } 54.9 54.10 /* Stabs debugging sections. */
55.1 --- a/xen/common/domain.c Fri Feb 13 10:56:01 2009 +0900 55.2 +++ b/xen/common/domain.c Fri Feb 13 11:22:28 2009 +0900 55.3 @@ -41,7 +41,6 @@ boolean_param("dom0_vcpus_pin", opt_dom0 55.4 55.5 /* set xen as default cpufreq */ 55.6 enum cpufreq_controller cpufreq_controller = FREQCTL_xen; 55.7 -struct cpufreq_governor *cpufreq_opt_governor; 55.8 55.9 static void __init setup_cpufreq_option(char *str) 55.10 { 55.11 @@ -71,19 +70,6 @@ static void __init setup_cpufreq_option( 55.12 } 55.13 custom_param("cpufreq", setup_cpufreq_option); 55.14 55.15 -static void __init setup_cpufreq_gov_option(char *str) 55.16 -{ 55.17 - if ( !strcmp(str, "userspace") ) 55.18 - cpufreq_opt_governor = &cpufreq_gov_userspace; 55.19 - else if ( !strcmp(str, "performance") ) 55.20 - cpufreq_opt_governor = &cpufreq_gov_performance; 55.21 - else if ( !strcmp(str, "powersave") ) 55.22 - cpufreq_opt_governor = &cpufreq_gov_powersave; 55.23 - else if ( !strcmp(str, "ondemand") ) 55.24 - cpufreq_opt_governor = &cpufreq_gov_dbs; 55.25 -} 55.26 -custom_param("cpufreq_governor", setup_cpufreq_gov_option); 55.27 - 55.28 /* Protect updates/reads (resp.) of domain_list and domain_hash. */ 55.29 DEFINE_SPINLOCK(domlist_update_lock); 55.30 DEFINE_RCU_READ_LOCK(domlist_read_lock); 55.31 @@ -233,8 +219,8 @@ struct domain *domain_create( 55.32 spin_lock_init(&d->page_alloc_lock); 55.33 spin_lock_init(&d->shutdown_lock); 55.34 spin_lock_init(&d->hypercall_deadlock_mutex); 55.35 - INIT_LIST_HEAD(&d->page_list); 55.36 - INIT_LIST_HEAD(&d->xenpage_list); 55.37 + INIT_PAGE_LIST_HEAD(&d->page_list); 55.38 + INIT_PAGE_LIST_HEAD(&d->xenpage_list); 55.39 55.40 if ( domcr_flags & DOMCRF_hvm ) 55.41 d->is_hvm = 1;
56.1 --- a/xen/common/grant_table.c Fri Feb 13 10:56:01 2009 +0900 56.2 +++ b/xen/common/grant_table.c Fri Feb 13 11:22:28 2009 +0900 56.3 @@ -1192,7 +1192,7 @@ gnttab_transfer( 56.4 /* Okay, add the page to 'e'. */ 56.5 if ( unlikely(e->tot_pages++ == 0) ) 56.6 get_knownalive_domain(e); 56.7 - list_add_tail(&page->list, &e->page_list); 56.8 + page_list_add_tail(page, &e->page_list); 56.9 page_set_owner(page, e); 56.10 56.11 spin_unlock(&e->page_alloc_lock);
57.1 --- a/xen/common/hvm/save.c Fri Feb 13 10:56:01 2009 +0900 57.2 +++ b/xen/common/hvm/save.c Fri Feb 13 11:22:28 2009 +0900 57.3 @@ -26,6 +26,7 @@ 57.4 #include <xen/version.h> 57.5 #include <public/version.h> 57.6 #include <xen/sched.h> 57.7 +#include <xen/guest_access.h> 57.8 57.9 #include <asm/hvm/support.h> 57.10 57.11 @@ -75,6 +76,53 @@ size_t hvm_save_size(struct domain *d) 57.12 return sz; 57.13 } 57.14 57.15 +/* Extract a single instance of a save record, by marshalling all 57.16 + * records of that type and copying out the one we need. */ 57.17 +int hvm_save_one(struct domain *d, uint16_t typecode, uint16_t instance, 57.18 + XEN_GUEST_HANDLE_64(uint8) handle) 57.19 +{ 57.20 + int rv = 0; 57.21 + size_t sz = 0; 57.22 + struct vcpu *v; 57.23 + hvm_domain_context_t ctxt = { 0, }; 57.24 + 57.25 + if ( d->is_dying 57.26 + || typecode > HVM_SAVE_CODE_MAX 57.27 + || hvm_sr_handlers[typecode].size < sizeof(struct hvm_save_descriptor) 57.28 + || hvm_sr_handlers[typecode].save == NULL ) 57.29 + return -EINVAL; 57.30 + 57.31 + if ( hvm_sr_handlers[typecode].kind == HVMSR_PER_VCPU ) 57.32 + for_each_vcpu(d, v) 57.33 + sz += hvm_sr_handlers[typecode].size; 57.34 + else 57.35 + sz = hvm_sr_handlers[typecode].size; 57.36 + 57.37 + if ( (instance + 1) * hvm_sr_handlers[typecode].size > sz ) 57.38 + return -EINVAL; 57.39 + 57.40 + ctxt.size = sz; 57.41 + ctxt.data = xmalloc_bytes(sz); 57.42 + if ( !ctxt.data ) 57.43 + return -ENOMEM; 57.44 + 57.45 + if ( hvm_sr_handlers[typecode].save(d, &ctxt) != 0 ) 57.46 + { 57.47 + gdprintk(XENLOG_ERR, 57.48 + "HVM save: failed to save type %"PRIu16"\n", typecode); 57.49 + rv = -EFAULT; 57.50 + } 57.51 + else if ( copy_to_guest(handle, 57.52 + ctxt.data 57.53 + + (instance * hvm_sr_handlers[typecode].size) 57.54 + + sizeof (struct hvm_save_descriptor), 57.55 + hvm_sr_handlers[typecode].size 57.56 + - sizeof (struct hvm_save_descriptor)) ) 57.57 + rv = -EFAULT; 57.58 + 57.59 + xfree(ctxt.data); 57.60 + return rv; 57.61 +} 57.62 57.63 int hvm_save(struct domain *d, hvm_domain_context_t *h) 57.64 {
58.1 --- a/xen/common/memory.c Fri Feb 13 10:56:01 2009 +0900 58.2 +++ b/xen/common/memory.c Fri Feb 13 11:22:28 2009 +0900 58.3 @@ -218,8 +218,8 @@ static void decrease_reservation(struct 58.4 static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg) 58.5 { 58.6 struct xen_memory_exchange exch; 58.7 - LIST_HEAD(in_chunk_list); 58.8 - LIST_HEAD(out_chunk_list); 58.9 + PAGE_LIST_HEAD(in_chunk_list); 58.10 + PAGE_LIST_HEAD(out_chunk_list); 58.11 unsigned long in_chunk_order, out_chunk_order; 58.12 xen_pfn_t gpfn, gmfn, mfn; 58.13 unsigned long i, j, k; 58.14 @@ -325,7 +325,7 @@ static long memory_exchange(XEN_GUEST_HA 58.15 goto fail; 58.16 } 58.17 58.18 - list_add(&page->list, &in_chunk_list); 58.19 + page_list_add(page, &in_chunk_list); 58.20 } 58.21 } 58.22 58.23 @@ -339,7 +339,7 @@ static long memory_exchange(XEN_GUEST_HA 58.24 goto fail; 58.25 } 58.26 58.27 - list_add(&page->list, &out_chunk_list); 58.28 + page_list_add(page, &out_chunk_list); 58.29 } 58.30 58.31 /* 58.32 @@ -347,10 +347,8 @@ static long memory_exchange(XEN_GUEST_HA 58.33 */ 58.34 58.35 /* Destroy final reference to each input page. */ 58.36 - while ( !list_empty(&in_chunk_list) ) 58.37 + while ( (page = page_list_remove_head(&in_chunk_list)) ) 58.38 { 58.39 - page = list_entry(in_chunk_list.next, struct page_info, list); 58.40 - list_del(&page->list); 58.41 if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) ) 58.42 BUG(); 58.43 mfn = page_to_mfn(page); 58.44 @@ -360,10 +358,8 @@ static long memory_exchange(XEN_GUEST_HA 58.45 58.46 /* Assign each output page to the domain. */ 58.47 j = 0; 58.48 - while ( !list_empty(&out_chunk_list) ) 58.49 + while ( (page = page_list_remove_head(&out_chunk_list)) ) 58.50 { 58.51 - page = list_entry(out_chunk_list.next, struct page_info, list); 58.52 - list_del(&page->list); 58.53 if ( assign_pages(d, page, exch.out.extent_order, 58.54 MEMF_no_refcount) ) 58.55 BUG(); 58.56 @@ -399,21 +395,13 @@ static long memory_exchange(XEN_GUEST_HA 58.57 */ 58.58 fail: 58.59 /* Reassign any input pages we managed to steal. */ 58.60 - while ( !list_empty(&in_chunk_list) ) 58.61 - { 58.62 - page = list_entry(in_chunk_list.next, struct page_info, list); 58.63 - list_del(&page->list); 58.64 + while ( (page = page_list_remove_head(&in_chunk_list)) ) 58.65 if ( assign_pages(d, page, 0, MEMF_no_refcount) ) 58.66 BUG(); 58.67 - } 58.68 58.69 /* Free any output pages we managed to allocate. */ 58.70 - while ( !list_empty(&out_chunk_list) ) 58.71 - { 58.72 - page = list_entry(out_chunk_list.next, struct page_info, list); 58.73 - list_del(&page->list); 58.74 + while ( (page = page_list_remove_head(&out_chunk_list)) ) 58.75 free_domheap_pages(page, exch.out.extent_order); 58.76 - } 58.77 58.78 exch.nr_exchanged = i << in_chunk_order; 58.79
59.1 --- a/xen/common/page_alloc.c Fri Feb 13 10:56:01 2009 +0900 59.2 +++ b/xen/common/page_alloc.c Fri Feb 13 11:22:28 2009 +0900 59.3 @@ -71,7 +71,7 @@ integer_param("dma_bits", dma_bitsize); 59.4 #endif 59.5 59.6 static DEFINE_SPINLOCK(page_scrub_lock); 59.7 -LIST_HEAD(page_scrub_list); 59.8 +PAGE_LIST_HEAD(page_scrub_list); 59.9 static unsigned long scrub_pages; 59.10 59.11 /********************* 59.12 @@ -264,7 +264,7 @@ unsigned long __init alloc_boot_pages( 59.13 #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN : \ 59.14 (fls(page_to_mfn(pg)) - 1)) 59.15 59.16 -typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1]; 59.17 +typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1]; 59.18 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES]; 59.19 #define heap(node, zone, order) ((*_heap[node])[zone][order]) 59.20 59.21 @@ -272,13 +272,16 @@ static unsigned long *avail[MAX_NUMNODES 59.22 59.23 static DEFINE_SPINLOCK(heap_lock); 59.24 59.25 -static void init_node_heap(int node) 59.26 +static unsigned long init_node_heap(int node, unsigned long mfn, 59.27 + unsigned long nr) 59.28 { 59.29 /* First node to be discovered has its heap metadata statically alloced. */ 59.30 static heap_by_zone_and_order_t _heap_static; 59.31 static unsigned long avail_static[NR_ZONES]; 59.32 static int first_node_initialised; 59.33 - 59.34 + unsigned long needed = (sizeof(**_heap) + 59.35 + sizeof(**avail) * NR_ZONES + 59.36 + PAGE_SIZE - 1) >> PAGE_SHIFT; 59.37 int i, j; 59.38 59.39 if ( !first_node_initialised ) 59.40 @@ -286,19 +289,40 @@ static void init_node_heap(int node) 59.41 _heap[node] = &_heap_static; 59.42 avail[node] = avail_static; 59.43 first_node_initialised = 1; 59.44 + needed = 0; 59.45 + } 59.46 +#ifdef DIRECTMAP_VIRT_END 59.47 + else if ( nr >= needed && 59.48 + mfn + needed <= virt_to_mfn(DIRECTMAP_VIRT_END) ) 59.49 + { 59.50 + _heap[node] = mfn_to_virt(mfn); 59.51 + avail[node] = mfn_to_virt(mfn + needed) - sizeof(**avail) * NR_ZONES; 59.52 + } 59.53 +#endif 59.54 + else if ( get_order_from_bytes(sizeof(**_heap)) == 59.55 + get_order_from_pages(needed) ) 59.56 + { 59.57 + _heap[node] = alloc_xenheap_pages(get_order_from_pages(needed), 0); 59.58 + BUG_ON(!_heap[node]); 59.59 + avail[node] = (void *)_heap[node] + (needed << PAGE_SHIFT) - 59.60 + sizeof(**avail) * NR_ZONES; 59.61 + needed = 0; 59.62 } 59.63 else 59.64 { 59.65 _heap[node] = xmalloc(heap_by_zone_and_order_t); 59.66 avail[node] = xmalloc_array(unsigned long, NR_ZONES); 59.67 BUG_ON(!_heap[node] || !avail[node]); 59.68 + needed = 0; 59.69 } 59.70 59.71 memset(avail[node], 0, NR_ZONES * sizeof(long)); 59.72 59.73 for ( i = 0; i < NR_ZONES; i++ ) 59.74 for ( j = 0; j <= MAX_ORDER; j++ ) 59.75 - INIT_LIST_HEAD(&(*_heap[node])[i][j]); 59.76 + INIT_PAGE_LIST_HEAD(&(*_heap[node])[i][j]); 59.77 + 59.78 + return needed; 59.79 } 59.80 59.81 /* Allocate 2^@order contiguous pages. */ 59.82 @@ -340,7 +364,7 @@ static struct page_info *alloc_heap_page 59.83 59.84 /* Find smallest order which can satisfy the request. */ 59.85 for ( j = order; j <= MAX_ORDER; j++ ) 59.86 - if ( !list_empty(&heap(node, zone, j)) ) 59.87 + if ( (pg = page_list_remove_head(&heap(node, zone, j))) ) 59.88 goto found; 59.89 } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */ 59.90 59.91 @@ -354,14 +378,11 @@ static struct page_info *alloc_heap_page 59.92 return NULL; 59.93 59.94 found: 59.95 - pg = list_entry(heap(node, zone, j).next, struct page_info, list); 59.96 - list_del(&pg->list); 59.97 - 59.98 /* We may have to halve the chunk a number of times. */ 59.99 while ( j != order ) 59.100 { 59.101 PFN_ORDER(pg) = --j; 59.102 - list_add_tail(&pg->list, &heap(node, zone, j)); 59.103 + page_list_add_tail(pg, &heap(node, zone, j)); 59.104 pg += 1 << j; 59.105 } 59.106 59.107 @@ -378,10 +399,13 @@ static struct page_info *alloc_heap_page 59.108 /* Reference count must continuously be zero for free pages. */ 59.109 BUG_ON(pg[i].count_info != 0); 59.110 59.111 - /* Add in any extra CPUs that need flushing because of this page. */ 59.112 - cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask); 59.113 - tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp); 59.114 - cpus_or(mask, mask, extra_cpus_mask); 59.115 + if ( pg[i].u.free.need_tlbflush ) 59.116 + { 59.117 + /* Add in extra CPUs that need flushing because of this page. */ 59.118 + cpus_andnot(extra_cpus_mask, cpu_online_map, mask); 59.119 + tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp); 59.120 + cpus_or(mask, mask, extra_cpus_mask); 59.121 + } 59.122 59.123 /* Initialise fields which have other uses for free pages. */ 59.124 pg[i].u.inuse.type_info = 0; 59.125 @@ -404,7 +428,6 @@ static void free_heap_pages( 59.126 unsigned long mask; 59.127 unsigned int i, node = phys_to_nid(page_to_maddr(pg)); 59.128 unsigned int zone = page_to_zone(pg); 59.129 - struct domain *d; 59.130 59.131 ASSERT(order <= MAX_ORDER); 59.132 ASSERT(node >= 0); 59.133 @@ -425,15 +448,10 @@ static void free_heap_pages( 59.134 */ 59.135 pg[i].count_info = 0; 59.136 59.137 - if ( (d = page_get_owner(&pg[i])) != NULL ) 59.138 - { 59.139 + /* If a page has no owner it will need no safety TLB flush. */ 59.140 + pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL); 59.141 + if ( pg[i].u.free.need_tlbflush ) 59.142 pg[i].tlbflush_timestamp = tlbflush_current_time(); 59.143 - pg[i].u.free.cpumask = d->domain_dirty_cpumask; 59.144 - } 59.145 - else 59.146 - { 59.147 - cpus_clear(pg[i].u.free.cpumask); 59.148 - } 59.149 } 59.150 59.151 spin_lock(&heap_lock); 59.152 @@ -452,8 +470,8 @@ static void free_heap_pages( 59.153 if ( allocated_in_map(page_to_mfn(pg)-mask) || 59.154 (PFN_ORDER(pg-mask) != order) ) 59.155 break; 59.156 - list_del(&(pg-mask)->list); 59.157 pg -= mask; 59.158 + page_list_del(pg, &heap(node, zone, order)); 59.159 } 59.160 else 59.161 { 59.162 @@ -461,7 +479,7 @@ static void free_heap_pages( 59.163 if ( allocated_in_map(page_to_mfn(pg)+mask) || 59.164 (PFN_ORDER(pg+mask) != order) ) 59.165 break; 59.166 - list_del(&(pg+mask)->list); 59.167 + page_list_del(pg + mask, &heap(node, zone, order)); 59.168 } 59.169 59.170 order++; 59.171 @@ -471,7 +489,7 @@ static void free_heap_pages( 59.172 } 59.173 59.174 PFN_ORDER(pg) = order; 59.175 - list_add_tail(&pg->list, &heap(node, zone, order)); 59.176 + page_list_add_tail(pg, &heap(node, zone, order)); 59.177 59.178 spin_unlock(&heap_lock); 59.179 } 59.180 @@ -482,7 +500,6 @@ static void free_heap_pages( 59.181 * latter is not on a MAX_ORDER boundary, then we reserve the page by 59.182 * not freeing it to the buddy allocator. 59.183 */ 59.184 -#define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER)) 59.185 static void init_heap_pages( 59.186 struct page_info *pg, unsigned long nr_pages) 59.187 { 59.188 @@ -491,25 +508,33 @@ static void init_heap_pages( 59.189 59.190 nid_prev = phys_to_nid(page_to_maddr(pg-1)); 59.191 59.192 - for ( i = 0; i < nr_pages; i++ ) 59.193 + for ( i = 0; i < nr_pages; nid_prev = nid_curr, i++ ) 59.194 { 59.195 nid_curr = phys_to_nid(page_to_maddr(pg+i)); 59.196 59.197 if ( unlikely(!avail[nid_curr]) ) 59.198 - init_node_heap(nid_curr); 59.199 + { 59.200 + unsigned long n; 59.201 + 59.202 + n = init_node_heap(nid_curr, page_to_mfn(pg+i), nr_pages - i); 59.203 + if ( n ) 59.204 + { 59.205 + BUG_ON(i + n > nr_pages); 59.206 + i += n - 1; 59.207 + continue; 59.208 + } 59.209 + } 59.210 59.211 /* 59.212 - * free pages of the same node, or if they differ, but are on a 59.213 - * MAX_ORDER alignement boundary (which already get reserved) 59.214 + * Free pages of the same node, or if they differ, but are on a 59.215 + * MAX_ORDER alignment boundary (which already get reserved). 59.216 */ 59.217 - if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) & 59.218 - MAX_ORDER_ALIGNED) ) 59.219 - free_heap_pages(pg+i, 0); 59.220 - else 59.221 - printk("Reserving non-aligned node boundary @ mfn %lu\n", 59.222 - page_to_mfn(pg+i)); 59.223 - 59.224 - nid_prev = nid_curr; 59.225 + if ( (nid_curr == nid_prev) || 59.226 + !(page_to_mfn(pg+i) & ((1UL << MAX_ORDER) - 1)) ) 59.227 + free_heap_pages(pg+i, 0); 59.228 + else 59.229 + printk("Reserving non-aligned node boundary @ mfn %#lx\n", 59.230 + page_to_mfn(pg+i)); 59.231 } 59.232 } 59.233 59.234 @@ -537,7 +562,7 @@ static unsigned long avail_heap_pages( 59.235 #define avail_for_domheap(mfn) !(allocated_in_map(mfn) || is_xen_heap_mfn(mfn)) 59.236 void __init end_boot_allocator(void) 59.237 { 59.238 - unsigned long i; 59.239 + unsigned long i, nr = 0; 59.240 int curr_free, next_free; 59.241 59.242 /* Pages that are free now go to the domain sub-allocator. */ 59.243 @@ -550,8 +575,15 @@ void __init end_boot_allocator(void) 59.244 if ( next_free ) 59.245 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */ 59.246 if ( curr_free ) 59.247 - init_heap_pages(mfn_to_page(i), 1); 59.248 + ++nr; 59.249 + else if ( nr ) 59.250 + { 59.251 + init_heap_pages(mfn_to_page(i - nr), nr); 59.252 + nr = 0; 59.253 + } 59.254 } 59.255 + if ( nr ) 59.256 + init_heap_pages(mfn_to_page(i - nr), nr); 59.257 59.258 if ( !dma_bitsize && (num_online_nodes() > 1) ) 59.259 { 59.260 @@ -786,7 +818,7 @@ int assign_pages( 59.261 page_set_owner(&pg[i], d); 59.262 wmb(); /* Domain pointer must be visible before updating refcnt. */ 59.263 pg[i].count_info = PGC_allocated | 1; 59.264 - list_add_tail(&pg[i].list, &d->page_list); 59.265 + page_list_add_tail(&pg[i], &d->page_list); 59.266 } 59.267 59.268 spin_unlock(&d->page_alloc_lock); 59.269 @@ -844,7 +876,7 @@ void free_domheap_pages(struct page_info 59.270 spin_lock_recursive(&d->page_alloc_lock); 59.271 59.272 for ( i = 0; i < (1 << order); i++ ) 59.273 - list_del(&pg[i].list); 59.274 + page_list_del2(&pg[i], &d->xenpage_list, &d->arch.relmem_list); 59.275 59.276 d->xenheap_pages -= 1 << order; 59.277 drop_dom_ref = (d->xenheap_pages == 0); 59.278 @@ -859,7 +891,7 @@ void free_domheap_pages(struct page_info 59.279 for ( i = 0; i < (1 << order); i++ ) 59.280 { 59.281 BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0); 59.282 - list_del(&pg[i].list); 59.283 + page_list_del2(&pg[i], &d->page_list, &d->arch.relmem_list); 59.284 } 59.285 59.286 d->tot_pages -= 1 << order; 59.287 @@ -882,7 +914,7 @@ void free_domheap_pages(struct page_info 59.288 { 59.289 page_set_owner(&pg[i], NULL); 59.290 spin_lock(&page_scrub_lock); 59.291 - list_add(&pg[i].list, &page_scrub_list); 59.292 + page_list_add(&pg[i], &page_scrub_list); 59.293 scrub_pages++; 59.294 spin_unlock(&page_scrub_lock); 59.295 } 59.296 @@ -965,7 +997,7 @@ static DEFINE_PER_CPU(struct timer, page 59.297 59.298 static void page_scrub_softirq(void) 59.299 { 59.300 - struct list_head *ent; 59.301 + PAGE_LIST_HEAD(list); 59.302 struct page_info *pg; 59.303 void *p; 59.304 int i; 59.305 @@ -983,32 +1015,26 @@ static void page_scrub_softirq(void) 59.306 do { 59.307 spin_lock(&page_scrub_lock); 59.308 59.309 - if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) ) 59.310 + /* Peel up to 16 pages from the list. */ 59.311 + for ( i = 0; i < 16; i++ ) 59.312 + { 59.313 + if ( !(pg = page_list_remove_head(&page_scrub_list)) ) 59.314 + break; 59.315 + page_list_add_tail(pg, &list); 59.316 + } 59.317 + 59.318 + if ( unlikely(i == 0) ) 59.319 { 59.320 spin_unlock(&page_scrub_lock); 59.321 goto out; 59.322 } 59.323 - 59.324 - /* Peel up to 16 pages from the list. */ 59.325 - for ( i = 0; i < 16; i++ ) 59.326 - { 59.327 - if ( ent->next == &page_scrub_list ) 59.328 - break; 59.329 - ent = ent->next; 59.330 - } 59.331 - 59.332 - /* Remove peeled pages from the list. */ 59.333 - ent->next->prev = &page_scrub_list; 59.334 - page_scrub_list.next = ent->next; 59.335 - scrub_pages -= (i+1); 59.336 + 59.337 + scrub_pages -= i; 59.338 59.339 spin_unlock(&page_scrub_lock); 59.340 59.341 - /* Working backwards, scrub each page in turn. */ 59.342 - while ( ent != &page_scrub_list ) 59.343 - { 59.344 - pg = list_entry(ent, struct page_info, list); 59.345 - ent = ent->prev; 59.346 + /* Scrub each page in turn. */ 59.347 + while ( (pg = page_list_remove_head(&list)) ) { 59.348 p = map_domain_page(page_to_mfn(pg)); 59.349 scrub_page(p); 59.350 unmap_domain_page(p);
60.1 --- a/xen/drivers/char/serial.c Fri Feb 13 10:56:01 2009 +0900 60.2 +++ b/xen/drivers/char/serial.c Fri Feb 13 11:22:28 2009 +0900 60.3 @@ -471,7 +471,7 @@ void serial_suspend(void) 60.4 int i, irq; 60.5 for ( i = 0; i < ARRAY_SIZE(com); i++ ) 60.6 if ( (irq = serial_irq(i)) >= 0 ) 60.7 - free_irq(irq); 60.8 + release_irq(irq); 60.9 } 60.10 60.11 void serial_resume(void)
61.1 --- a/xen/drivers/cpufreq/cpufreq.c Fri Feb 13 10:56:01 2009 +0900 61.2 +++ b/xen/drivers/cpufreq/cpufreq.c Fri Feb 13 11:22:28 2009 +0900 61.3 @@ -46,6 +46,9 @@ 61.4 #include <acpi/acpi.h> 61.5 #include <acpi/cpufreq/cpufreq.h> 61.6 61.7 +static unsigned int usr_max_freq, usr_min_freq; 61.8 +static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy); 61.9 + 61.10 struct cpufreq_dom { 61.11 unsigned int dom; 61.12 cpumask_t map; 61.13 @@ -53,6 +56,7 @@ struct cpufreq_dom { 61.14 }; 61.15 static LIST_HEAD(cpufreq_dom_list_head); 61.16 61.17 +struct cpufreq_governor *cpufreq_opt_governor; 61.18 LIST_HEAD(cpufreq_governor_list); 61.19 61.20 struct cpufreq_governor *__find_governor(const char *governor) 61.21 @@ -213,6 +217,9 @@ int cpufreq_add_cpu(unsigned int cpu) 61.22 perf->domain_info.num_processors) { 61.23 memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); 61.24 policy->governor = NULL; 61.25 + 61.26 + cpufreq_cmdline_common_para(&new_policy); 61.27 + 61.28 ret = __cpufreq_set_policy(policy, &new_policy); 61.29 if (ret) { 61.30 if (new_policy.governor == CPUFREQ_DEFAULT_GOVERNOR) 61.31 @@ -467,3 +474,69 @@ out: 61.32 return ret; 61.33 } 61.34 61.35 +static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy) 61.36 +{ 61.37 + if (usr_max_freq) 61.38 + new_policy->max = usr_max_freq; 61.39 + if (usr_min_freq) 61.40 + new_policy->min = usr_min_freq; 61.41 +} 61.42 + 61.43 +static int __init cpufreq_handle_common_option(const char *name, const char *val) 61.44 +{ 61.45 + if (!strcmp(name, "maxfreq") && val) { 61.46 + usr_max_freq = simple_strtoul(val, NULL, 0); 61.47 + return 1; 61.48 + } 61.49 + 61.50 + if (!strcmp(name, "minfreq") && val) { 61.51 + usr_min_freq = simple_strtoul(val, NULL, 0); 61.52 + return 1; 61.53 + } 61.54 + 61.55 + return 0; 61.56 +} 61.57 + 61.58 +void __init cpufreq_cmdline_parse(char *str) 61.59 +{ 61.60 + static struct cpufreq_governor *__initdata cpufreq_governors[] = 61.61 + { 61.62 + &cpufreq_gov_userspace, 61.63 + &cpufreq_gov_dbs, 61.64 + &cpufreq_gov_performance, 61.65 + &cpufreq_gov_powersave 61.66 + }; 61.67 + unsigned int gov_index = 0; 61.68 + 61.69 + do { 61.70 + char *val, *end = strchr(str, ','); 61.71 + unsigned int i; 61.72 + 61.73 + if (end) 61.74 + *end++ = '\0'; 61.75 + val = strchr(str, '='); 61.76 + if (val) 61.77 + *val++ = '\0'; 61.78 + 61.79 + if (!cpufreq_opt_governor) { 61.80 + if (!val) { 61.81 + for (i = 0; i < ARRAY_SIZE(cpufreq_governors); ++i) { 61.82 + if (!strcmp(str, cpufreq_governors[i]->name)) { 61.83 + cpufreq_opt_governor = cpufreq_governors[i]; 61.84 + gov_index = i; 61.85 + str = NULL; 61.86 + break; 61.87 + } 61.88 + } 61.89 + } else { 61.90 + cpufreq_opt_governor = CPUFREQ_DEFAULT_GOVERNOR; 61.91 + } 61.92 + } 61.93 + 61.94 + if (str && !cpufreq_handle_common_option(str, val) && 61.95 + cpufreq_governors[gov_index]->handle_option) 61.96 + cpufreq_governors[gov_index]->handle_option(str, val); 61.97 + 61.98 + str = end; 61.99 + } while (str); 61.100 +}
62.1 --- a/xen/drivers/cpufreq/cpufreq_misc_governors.c Fri Feb 13 10:56:01 2009 +0900 62.2 +++ b/xen/drivers/cpufreq/cpufreq_misc_governors.c Fri Feb 13 11:22:28 2009 +0900 62.3 @@ -18,6 +18,7 @@ 62.4 #include <xen/sched.h> 62.5 #include <acpi/cpufreq/cpufreq.h> 62.6 62.7 +static unsigned int usr_speed; 62.8 62.9 /* 62.10 * cpufreq userspace governor 62.11 @@ -26,6 +27,7 @@ static int cpufreq_governor_userspace(st 62.12 unsigned int event) 62.13 { 62.14 int ret = 0; 62.15 + unsigned int freq; 62.16 62.17 if (!policy) 62.18 return -EINVAL; 62.19 @@ -35,12 +37,17 @@ static int cpufreq_governor_userspace(st 62.20 case CPUFREQ_GOV_STOP: 62.21 break; 62.22 case CPUFREQ_GOV_LIMITS: 62.23 - if (policy->max < policy->cur) 62.24 + freq = usr_speed ? : policy->cur; 62.25 + if (policy->max < freq) 62.26 ret = __cpufreq_driver_target(policy, policy->max, 62.27 CPUFREQ_RELATION_H); 62.28 - else if (policy->min > policy->cur) 62.29 + else if (policy->min > freq) 62.30 ret = __cpufreq_driver_target(policy, policy->min, 62.31 CPUFREQ_RELATION_L); 62.32 + else if (usr_speed) 62.33 + ret = __cpufreq_driver_target(policy, freq, 62.34 + CPUFREQ_RELATION_L); 62.35 + 62.36 break; 62.37 default: 62.38 ret = -EINVAL; 62.39 @@ -50,9 +57,17 @@ static int cpufreq_governor_userspace(st 62.40 return ret; 62.41 } 62.42 62.43 +static void __init 62.44 +cpufreq_userspace_handle_option(const char *name, const char *val) 62.45 +{ 62.46 + if (!strcmp(name, "speed") && val) 62.47 + usr_speed = simple_strtoul(val, NULL, 0); 62.48 +} 62.49 + 62.50 struct cpufreq_governor cpufreq_gov_userspace = { 62.51 .name = "userspace", 62.52 .governor = cpufreq_governor_userspace, 62.53 + .handle_option = cpufreq_userspace_handle_option 62.54 }; 62.55 62.56 static int __init cpufreq_gov_userspace_init(void) 62.57 @@ -61,7 +76,7 @@ static int __init cpufreq_gov_userspace_ 62.58 } 62.59 __initcall(cpufreq_gov_userspace_init); 62.60 62.61 -static void cpufreq_gov_userspace_exit(void) 62.62 +static void __exit cpufreq_gov_userspace_exit(void) 62.63 { 62.64 cpufreq_unregister_governor(&cpufreq_gov_userspace); 62.65 } 62.66 @@ -106,7 +121,7 @@ static int __init cpufreq_gov_performanc 62.67 } 62.68 __initcall(cpufreq_gov_performance_init); 62.69 62.70 -static void cpufreq_gov_performance_exit(void) 62.71 +static void __exit cpufreq_gov_performance_exit(void) 62.72 { 62.73 cpufreq_unregister_governor(&cpufreq_gov_performance); 62.74 } 62.75 @@ -151,7 +166,7 @@ static int __init cpufreq_gov_powersave_ 62.76 } 62.77 __initcall(cpufreq_gov_powersave_init); 62.78 62.79 -static void cpufreq_gov_powersave_exit(void) 62.80 +static void __exit cpufreq_gov_powersave_exit(void) 62.81 { 62.82 cpufreq_unregister_governor(&cpufreq_gov_powersave); 62.83 }
63.1 --- a/xen/drivers/cpufreq/cpufreq_ondemand.c Fri Feb 13 10:56:01 2009 +0900 63.2 +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c Fri Feb 13 11:22:28 2009 +0900 63.3 @@ -281,9 +281,50 @@ int cpufreq_governor_dbs(struct cpufreq_ 63.4 return 0; 63.5 } 63.6 63.7 +static void __init cpufreq_dbs_handle_option(const char *name, const char *val) 63.8 +{ 63.9 + if ( !strcmp(name, "rate") && val ) 63.10 + { 63.11 + usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1); 63.12 + } 63.13 + else if ( !strcmp(name, "up_threshold") && val ) 63.14 + { 63.15 + unsigned long tmp = simple_strtoul(val, NULL, 0); 63.16 + 63.17 + if ( tmp < MIN_FREQUENCY_UP_THRESHOLD ) 63.18 + { 63.19 + printk(XENLOG_WARNING "cpufreq/ondemand: " 63.20 + "specified threshold too low, using %d\n", 63.21 + MIN_FREQUENCY_UP_THRESHOLD); 63.22 + tmp = MIN_FREQUENCY_UP_THRESHOLD; 63.23 + } 63.24 + else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD ) 63.25 + { 63.26 + printk(XENLOG_WARNING "cpufreq/ondemand: " 63.27 + "specified threshold too high, using %d\n", 63.28 + MAX_FREQUENCY_UP_THRESHOLD); 63.29 + tmp = MAX_FREQUENCY_UP_THRESHOLD; 63.30 + } 63.31 + dbs_tuners_ins.up_threshold = tmp; 63.32 + } 63.33 + else if ( !strcmp(name, "bias") && val ) 63.34 + { 63.35 + unsigned long tmp = simple_strtoul(val, NULL, 0); 63.36 + 63.37 + if ( tmp > 1000 ) 63.38 + { 63.39 + printk(XENLOG_WARNING "cpufreq/ondemand: " 63.40 + "specified bias too high, using 1000\n"); 63.41 + tmp = 1000; 63.42 + } 63.43 + dbs_tuners_ins.powersave_bias = tmp; 63.44 + } 63.45 +} 63.46 + 63.47 struct cpufreq_governor cpufreq_gov_dbs = { 63.48 .name = "ondemand", 63.49 .governor = cpufreq_governor_dbs, 63.50 + .handle_option = cpufreq_dbs_handle_option 63.51 }; 63.52 63.53 static int __init cpufreq_gov_dbs_init(void) 63.54 @@ -292,60 +333,8 @@ static int __init cpufreq_gov_dbs_init(v 63.55 } 63.56 __initcall(cpufreq_gov_dbs_init); 63.57 63.58 -static void cpufreq_gov_dbs_exit(void) 63.59 +static void __exit cpufreq_gov_dbs_exit(void) 63.60 { 63.61 cpufreq_unregister_governor(&cpufreq_gov_dbs); 63.62 } 63.63 __exitcall(cpufreq_gov_dbs_exit); 63.64 - 63.65 -void __init cpufreq_cmdline_parse(char *str) 63.66 -{ 63.67 - do { 63.68 - char *val, *end = strchr(str, ','); 63.69 - 63.70 - if ( end ) 63.71 - *end++ = '\0'; 63.72 - val = strchr(str, '='); 63.73 - if ( val ) 63.74 - *val++ = '\0'; 63.75 - 63.76 - if ( !strcmp(str, "rate") && val ) 63.77 - { 63.78 - usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1); 63.79 - } 63.80 - else if ( !strcmp(str, "threshold") && val ) 63.81 - { 63.82 - unsigned long tmp = simple_strtoul(val, NULL, 0); 63.83 - 63.84 - if ( tmp < MIN_FREQUENCY_UP_THRESHOLD ) 63.85 - { 63.86 - printk(XENLOG_WARNING "cpufreq/ondemand: " 63.87 - "specified threshold too low, using %d\n", 63.88 - MIN_FREQUENCY_UP_THRESHOLD); 63.89 - tmp = MIN_FREQUENCY_UP_THRESHOLD; 63.90 - } 63.91 - else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD ) 63.92 - { 63.93 - printk(XENLOG_WARNING "cpufreq/ondemand: " 63.94 - "specified threshold too high, using %d\n", 63.95 - MAX_FREQUENCY_UP_THRESHOLD); 63.96 - tmp = MAX_FREQUENCY_UP_THRESHOLD; 63.97 - } 63.98 - dbs_tuners_ins.up_threshold = tmp; 63.99 - } 63.100 - else if ( !strcmp(str, "bias") && val ) 63.101 - { 63.102 - unsigned long tmp = simple_strtoul(val, NULL, 0); 63.103 - 63.104 - if ( tmp > 1000 ) 63.105 - { 63.106 - printk(XENLOG_WARNING "cpufreq/ondemand: " 63.107 - "specified bias too high, using 1000\n"); 63.108 - tmp = 1000; 63.109 - } 63.110 - dbs_tuners_ins.powersave_bias = tmp; 63.111 - } 63.112 - 63.113 - str = end; 63.114 - } while ( str ); 63.115 -}
64.1 --- a/xen/drivers/passthrough/amd/iommu_init.c Fri Feb 13 10:56:01 2009 +0900 64.2 +++ b/xen/drivers/passthrough/amd/iommu_init.c Fri Feb 13 11:22:28 2009 +0900 64.3 @@ -479,26 +479,27 @@ static int set_iommu_interrupt_handler(s 64.4 { 64.5 int vector, ret; 64.6 64.7 - vector = assign_irq_vector(AUTO_ASSIGN); 64.8 - vector_to_iommu[vector] = iommu; 64.9 - 64.10 - /* make irq == vector */ 64.11 - irq_vector[vector] = vector; 64.12 - vector_irq[vector] = vector; 64.13 - 64.14 - if ( !vector ) 64.15 + vector = assign_irq_vector(AUTO_ASSIGN_IRQ); 64.16 + if ( vector <= 0 ) 64.17 { 64.18 - amd_iov_error("no vectors\n"); 64.19 + gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n"); 64.20 return 0; 64.21 } 64.22 64.23 irq_desc[vector].handler = &iommu_msi_type; 64.24 - ret = request_irq(vector, amd_iommu_page_fault, 0, "amd_iommu", iommu); 64.25 + ret = request_irq_vector(vector, amd_iommu_page_fault, 0, 64.26 + "amd_iommu", iommu); 64.27 if ( ret ) 64.28 { 64.29 + irq_desc[vector].handler = &no_irq_type; 64.30 + free_irq_vector(vector); 64.31 amd_iov_error("can't request irq\n"); 64.32 return 0; 64.33 } 64.34 + 64.35 + /* Make sure that vector is never re-used. */ 64.36 + vector_irq[vector] = NEVER_ASSIGN_IRQ; 64.37 + vector_to_iommu[vector] = iommu; 64.38 iommu->vector = vector; 64.39 return vector; 64.40 }
65.1 --- a/xen/drivers/passthrough/amd/iommu_map.c Fri Feb 13 10:56:01 2009 +0900 65.2 +++ b/xen/drivers/passthrough/amd/iommu_map.c Fri Feb 13 11:22:28 2009 +0900 65.3 @@ -461,8 +461,8 @@ int amd_iommu_map_page(struct domain *d, 65.4 iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn); 65.5 if ( iommu_l2e == 0 ) 65.6 { 65.7 + spin_unlock_irqrestore(&hd->mapping_lock, flags); 65.8 amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); 65.9 - spin_unlock_irqrestore(&hd->mapping_lock, flags); 65.10 return -EFAULT; 65.11 } 65.12 set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir); 65.13 @@ -493,8 +493,8 @@ int amd_iommu_unmap_page(struct domain * 65.14 65.15 if ( iommu_l2e == 0 ) 65.16 { 65.17 + spin_unlock_irqrestore(&hd->mapping_lock, flags); 65.18 amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); 65.19 - spin_unlock_irqrestore(&hd->mapping_lock, flags); 65.20 return -EFAULT; 65.21 } 65.22 65.23 @@ -533,9 +533,9 @@ int amd_iommu_reserve_domain_unity_map( 65.24 65.25 if ( iommu_l2e == 0 ) 65.26 { 65.27 - amd_iov_error( 65.28 - "Invalid IO pagetable entry phys_addr = %lx\n", phys_addr); 65.29 spin_unlock_irqrestore(&hd->mapping_lock, flags); 65.30 + amd_iov_error("Invalid IO pagetable entry phys_addr = %lx\n", 65.31 + phys_addr); 65.32 return -EFAULT; 65.33 } 65.34 65.35 @@ -552,7 +552,6 @@ int amd_iommu_sync_p2m(struct domain *d) 65.36 { 65.37 unsigned long mfn, gfn, flags; 65.38 u64 iommu_l2e; 65.39 - struct list_head *entry; 65.40 struct page_info *page; 65.41 struct hvm_iommu *hd; 65.42 int iw = IOMMU_IO_WRITE_ENABLED; 65.43 @@ -568,10 +567,10 @@ int amd_iommu_sync_p2m(struct domain *d) 65.44 if ( hd->p2m_synchronized ) 65.45 goto out; 65.46 65.47 - for ( entry = d->page_list.next; entry != &d->page_list; 65.48 - entry = entry->next ) 65.49 + spin_lock(&d->page_alloc_lock); 65.50 + 65.51 + page_list_for_each ( page, &d->page_list ) 65.52 { 65.53 - page = list_entry(entry, struct page_info, list); 65.54 mfn = page_to_mfn(page); 65.55 gfn = get_gpfn_from_mfn(mfn); 65.56 65.57 @@ -582,14 +581,17 @@ int amd_iommu_sync_p2m(struct domain *d) 65.58 65.59 if ( iommu_l2e == 0 ) 65.60 { 65.61 + spin_unlock(&d->page_alloc_lock); 65.62 + spin_unlock_irqrestore(&hd->mapping_lock, flags); 65.63 amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn); 65.64 - spin_unlock_irqrestore(&hd->mapping_lock, flags); 65.65 return -EFAULT; 65.66 } 65.67 65.68 set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir); 65.69 } 65.70 65.71 + spin_unlock(&d->page_alloc_lock); 65.72 + 65.73 hd->p2m_synchronized = 1; 65.74 65.75 out:
66.1 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Feb 13 10:56:01 2009 +0900 66.2 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Feb 13 11:22:28 2009 +0900 66.3 @@ -23,7 +23,6 @@ 66.4 #include <xen/pci_regs.h> 66.5 #include <asm/amd-iommu.h> 66.6 #include <asm/hvm/svm/amd-iommu-proto.h> 66.7 -#include <asm/mm.h> 66.8 66.9 extern unsigned short ivrs_bdf_entries; 66.10 extern struct ivrs_mappings *ivrs_mappings;
67.1 --- a/xen/drivers/passthrough/io.c Fri Feb 13 10:56:01 2009 +0900 67.2 +++ b/xen/drivers/passthrough/io.c Fri Feb 13 11:22:28 2009 +0900 67.3 @@ -87,8 +87,8 @@ int pt_irq_create_bind_vtd( 67.4 67.5 if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 ) 67.6 { 67.7 + spin_unlock(&d->event_lock); 67.8 xfree(hvm_irq_dpci); 67.9 - spin_unlock(&d->event_lock); 67.10 return -EINVAL; 67.11 } 67.12 }
68.1 --- a/xen/drivers/passthrough/iommu.c Fri Feb 13 10:56:01 2009 +0900 68.2 +++ b/xen/drivers/passthrough/iommu.c Fri Feb 13 11:22:28 2009 +0900 68.3 @@ -33,6 +33,8 @@ int amd_iov_detect(void); 68.4 * no-pv Disable IOMMU for PV domains (default) 68.5 * force|required Don't boot unless IOMMU is enabled 68.6 * passthrough Bypass VT-d translation for Dom0 68.7 + * snoop Utilize the snoop control for IOMMU (default) 68.8 + * no-snoop Dont utilize the snoop control for IOMMU 68.9 */ 68.10 custom_param("iommu", parse_iommu_param); 68.11 int iommu_enabled = 0; 68.12 @@ -45,6 +47,7 @@ static void __init parse_iommu_param(cha 68.13 { 68.14 char *ss; 68.15 iommu_enabled = 1; 68.16 + iommu_snoop = 1; 68.17 68.18 do { 68.19 ss = strchr(s, ','); 68.20 @@ -62,6 +65,10 @@ static void __init parse_iommu_param(cha 68.21 force_iommu = 1; 68.22 else if ( !strcmp(s, "passthrough") ) 68.23 iommu_passthrough = 1; 68.24 + else if ( !strcmp(s, "snoop") ) 68.25 + iommu_snoop = 1; 68.26 + else if ( !strcmp(s, "no-snoop") ) 68.27 + iommu_snoop = 0; 68.28 68.29 s = ss + 1; 68.30 } while ( ss ); 68.31 @@ -141,7 +148,7 @@ static int iommu_populate_page_table(str 68.32 68.33 spin_lock(&d->page_alloc_lock); 68.34 68.35 - list_for_each_entry ( page, &d->page_list, list ) 68.36 + page_list_for_each ( page, &d->page_list ) 68.37 { 68.38 if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page ) 68.39 {
69.1 --- a/xen/drivers/passthrough/vtd/dmar.c Fri Feb 13 10:56:01 2009 +0900 69.2 +++ b/xen/drivers/passthrough/vtd/dmar.c Fri Feb 13 11:22:28 2009 +0900 69.3 @@ -21,6 +21,7 @@ 69.4 69.5 #include <xen/init.h> 69.6 #include <xen/bitmap.h> 69.7 +#include <xen/errno.h> 69.8 #include <xen/kernel.h> 69.9 #include <xen/acpi.h> 69.10 #include <xen/mm.h> 69.11 @@ -518,8 +519,6 @@ static int __init acpi_parse_dmar(struct 69.12 int acpi_dmar_init(void) 69.13 { 69.14 int rc; 69.15 - struct acpi_drhd_unit *drhd; 69.16 - struct iommu *iommu; 69.17 69.18 rc = -ENODEV; 69.19 if ( force_iommu ) 69.20 @@ -536,20 +535,7 @@ int acpi_dmar_init(void) 69.21 if ( list_empty(&acpi_drhd_units) ) 69.22 goto fail; 69.23 69.24 - /* Giving that all devices within guest use same io page table, 69.25 - * enable snoop control only if all VT-d engines support it. 69.26 - */ 69.27 - iommu_snoop = 1; 69.28 - for_each_drhd_unit ( drhd ) 69.29 - { 69.30 - iommu = drhd->iommu; 69.31 - if ( !ecap_snp_ctl(iommu->ecap) ) { 69.32 - iommu_snoop = 0; 69.33 - break; 69.34 - } 69.35 - } 69.36 - 69.37 - printk("Intel VT-d has been enabled, snoop_control=%d.\n", iommu_snoop); 69.38 + printk("Intel VT-d has been enabled\n"); 69.39 69.40 return 0; 69.41
70.1 --- a/xen/drivers/passthrough/vtd/ia64/vtd.c Fri Feb 13 10:56:01 2009 +0900 70.2 +++ b/xen/drivers/passthrough/vtd/ia64/vtd.c Fri Feb 13 11:22:28 2009 +0900 70.3 @@ -29,7 +29,9 @@ 70.4 #include "../vtd.h" 70.5 70.6 70.7 -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; 70.8 +int vector_irq[NR_VECTORS] __read_mostly = { 70.9 + [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ 70.10 +}; 70.11 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ 70.12 u8 irq_vector[NR_IRQS] __read_mostly; 70.13 70.14 @@ -45,18 +47,19 @@ void unmap_vtd_domain_page(void *va) 70.15 } 70.16 70.17 /* Allocate page table, return its machine address */ 70.18 -u64 alloc_pgtable_maddr(struct domain *d) 70.19 +u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages) 70.20 { 70.21 struct page_info *pg; 70.22 u64 *vaddr; 70.23 70.24 - pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0); 70.25 + pg = alloc_domheap_pages(NULL, get_order_from_pages(npages), 70.26 + d ? MEMF_node(domain_to_node(d)) : 0); 70.27 vaddr = map_domain_page(page_to_mfn(pg)); 70.28 if ( !vaddr ) 70.29 return 0; 70.30 - memset(vaddr, 0, PAGE_SIZE); 70.31 + memset(vaddr, 0, PAGE_SIZE * npages); 70.32 70.33 - iommu_flush_cache_page(vaddr); 70.34 + iommu_flush_cache_page(vaddr, npages); 70.35 unmap_domain_page(vaddr); 70.36 70.37 return page_to_maddr(pg);
71.1 --- a/xen/drivers/passthrough/vtd/intremap.c Fri Feb 13 10:56:01 2009 +0900 71.2 +++ b/xen/drivers/passthrough/vtd/intremap.c Fri Feb 13 11:22:28 2009 +0900 71.3 @@ -502,7 +502,7 @@ int intremap_setup(struct iommu *iommu) 71.4 ir_ctrl = iommu_ir_ctrl(iommu); 71.5 if ( ir_ctrl->iremap_maddr == 0 ) 71.6 { 71.7 - ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL); 71.8 + ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL, 1); 71.9 if ( ir_ctrl->iremap_maddr == 0 ) 71.10 { 71.11 dprintk(XENLOG_WARNING VTDPREFIX,
72.1 --- a/xen/drivers/passthrough/vtd/iommu.c Fri Feb 13 10:56:01 2009 +0900 72.2 +++ b/xen/drivers/passthrough/vtd/iommu.c Fri Feb 13 11:22:28 2009 +0900 72.3 @@ -129,9 +129,9 @@ void iommu_flush_cache_entry(void *addr) 72.4 __iommu_flush_cache(addr, 8); 72.5 } 72.6 72.7 -void iommu_flush_cache_page(void *addr) 72.8 +void iommu_flush_cache_page(void *addr, unsigned long npages) 72.9 { 72.10 - __iommu_flush_cache(addr, PAGE_SIZE_4K); 72.11 + __iommu_flush_cache(addr, PAGE_SIZE_4K * npages); 72.12 } 72.13 72.14 int nr_iommus; 72.15 @@ -146,7 +146,7 @@ static u64 bus_to_context_maddr(struct i 72.16 root = &root_entries[bus]; 72.17 if ( !root_present(*root) ) 72.18 { 72.19 - maddr = alloc_pgtable_maddr(NULL); 72.20 + maddr = alloc_pgtable_maddr(NULL, 1); 72.21 if ( maddr == 0 ) 72.22 { 72.23 unmap_vtd_domain_page(root_entries); 72.24 @@ -174,7 +174,7 @@ static u64 addr_to_dma_page_maddr(struct 72.25 addr &= (((u64)1) << addr_width) - 1; 72.26 ASSERT(spin_is_locked(&hd->mapping_lock)); 72.27 if ( hd->pgd_maddr == 0 ) 72.28 - if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain)) == 0) ) 72.29 + if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain, 1)) == 0) ) 72.30 goto out; 72.31 72.32 parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr); 72.33 @@ -187,7 +187,7 @@ static u64 addr_to_dma_page_maddr(struct 72.34 { 72.35 if ( !alloc ) 72.36 break; 72.37 - maddr = alloc_pgtable_maddr(domain); 72.38 + maddr = alloc_pgtable_maddr(domain, 1); 72.39 if ( !maddr ) 72.40 break; 72.41 dma_set_pte_addr(*pte, maddr); 72.42 @@ -577,7 +577,7 @@ static int iommu_set_root_entry(struct i 72.43 spin_lock(&iommu->lock); 72.44 72.45 if ( iommu->root_maddr == 0 ) 72.46 - iommu->root_maddr = alloc_pgtable_maddr(NULL); 72.47 + iommu->root_maddr = alloc_pgtable_maddr(NULL, 1); 72.48 if ( iommu->root_maddr == 0 ) 72.49 { 72.50 spin_unlock(&iommu->lock); 72.51 @@ -874,23 +874,27 @@ int iommu_set_interrupt(struct iommu *io 72.52 { 72.53 int vector, ret; 72.54 72.55 - vector = assign_irq_vector(AUTO_ASSIGN); 72.56 - vector_to_iommu[vector] = iommu; 72.57 - 72.58 - /* VT-d fault is a MSI, make irq == vector */ 72.59 - irq_vector[vector] = vector; 72.60 - vector_irq[vector] = vector; 72.61 - 72.62 - if ( !vector ) 72.63 + vector = assign_irq_vector(AUTO_ASSIGN_IRQ); 72.64 + if ( vector <= 0 ) 72.65 { 72.66 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n"); 72.67 return -EINVAL; 72.68 } 72.69 72.70 irq_desc[vector].handler = &dma_msi_type; 72.71 - ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu); 72.72 + ret = request_irq_vector(vector, iommu_page_fault, 0, "dmar", iommu); 72.73 if ( ret ) 72.74 + { 72.75 + irq_desc[vector].handler = &no_irq_type; 72.76 + free_irq_vector(vector); 72.77 gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n"); 72.78 + return ret; 72.79 + } 72.80 + 72.81 + /* Make sure that vector is never re-used. */ 72.82 + vector_irq[vector] = NEVER_ASSIGN_IRQ; 72.83 + vector_to_iommu[vector] = iommu; 72.84 + 72.85 return vector; 72.86 } 72.87 72.88 @@ -966,7 +970,7 @@ static void iommu_free(struct acpi_drhd_ 72.89 iounmap(iommu->reg); 72.90 72.91 free_intel_iommu(iommu->intel); 72.92 - free_irq(iommu->vector); 72.93 + release_irq_vector(iommu->vector); 72.94 xfree(iommu); 72.95 72.96 drhd->iommu = NULL; 72.97 @@ -1677,6 +1681,11 @@ static int init_vtd_hw(void) 72.98 } 72.99 72.100 vector = iommu_set_interrupt(iommu); 72.101 + if ( vector < 0 ) 72.102 + { 72.103 + gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n"); 72.104 + return vector; 72.105 + } 72.106 dma_msi_data_init(iommu, vector); 72.107 dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map))); 72.108 iommu->vector = vector; 72.109 @@ -1756,6 +1765,23 @@ int intel_vtd_setup(void) 72.110 if ( init_vtd_hw() ) 72.111 goto error; 72.112 72.113 + /* Giving that all devices within guest use same io page table, 72.114 + * enable snoop control only if all VT-d engines support it. 72.115 + */ 72.116 + 72.117 + if ( iommu_snoop ) 72.118 + { 72.119 + for_each_drhd_unit ( drhd ) 72.120 + { 72.121 + iommu = drhd->iommu; 72.122 + if ( !ecap_snp_ctl(iommu->ecap) ) { 72.123 + iommu_snoop = 0; 72.124 + break; 72.125 + } 72.126 + } 72.127 + } 72.128 + 72.129 + printk("Intel VT-d snoop control %sabled\n", iommu_snoop ? "en" : "dis"); 72.130 register_keyhandler('V', dump_iommu_info, "dump iommu info"); 72.131 72.132 return 0; 72.133 @@ -1764,6 +1790,7 @@ int intel_vtd_setup(void) 72.134 for_each_drhd_unit ( drhd ) 72.135 iommu_free(drhd); 72.136 vtd_enabled = 0; 72.137 + iommu_snoop = 0; 72.138 return -ENOMEM; 72.139 } 72.140
73.1 --- a/xen/drivers/passthrough/vtd/iommu.h Fri Feb 13 10:56:01 2009 +0900 73.2 +++ b/xen/drivers/passthrough/vtd/iommu.h Fri Feb 13 11:22:28 2009 +0900 73.3 @@ -397,7 +397,9 @@ struct poll_info { 73.4 u32 udata; 73.5 }; 73.6 73.7 -#define QINVAL_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct qinval_entry)) 73.8 +#define MAX_QINVAL_PAGES 8 73.9 +#define NUM_QINVAL_PAGES 1 73.10 +#define QINVAL_ENTRY_NR (PAGE_SIZE_4K*NUM_QINVAL_PAGES/sizeof(struct qinval_entry)) 73.11 #define qinval_present(v) ((v).lo & 1) 73.12 #define qinval_fault_disable(v) (((v).lo >> 1) & 1) 73.13
74.1 --- a/xen/drivers/passthrough/vtd/qinval.c Fri Feb 13 10:56:01 2009 +0900 74.2 +++ b/xen/drivers/passthrough/vtd/qinval.c Fri Feb 13 11:22:28 2009 +0900 74.3 @@ -427,7 +427,7 @@ int qinval_setup(struct iommu *iommu) 74.4 74.5 if ( qi_ctrl->qinval_maddr == 0 ) 74.6 { 74.7 - qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL); 74.8 + qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL, NUM_QINVAL_PAGES); 74.9 if ( qi_ctrl->qinval_maddr == 0 ) 74.10 { 74.11 dprintk(XENLOG_WARNING VTDPREFIX, 74.12 @@ -445,6 +445,8 @@ int qinval_setup(struct iommu *iommu) 74.13 * registers are automatically reset to 0 with write 74.14 * to IQA register. 74.15 */ 74.16 + if ( NUM_QINVAL_PAGES <= MAX_QINVAL_PAGES ) 74.17 + qi_ctrl->qinval_maddr |= NUM_QINVAL_PAGES - 1; 74.18 dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr); 74.19 74.20 /* enable queued invalidation hardware */
75.1 --- a/xen/drivers/passthrough/vtd/vtd.h Fri Feb 13 10:56:01 2009 +0900 75.2 +++ b/xen/drivers/passthrough/vtd/vtd.h Fri Feb 13 11:22:28 2009 +0900 75.3 @@ -101,12 +101,12 @@ unsigned int get_cache_line_size(void); 75.4 void cacheline_flush(char *); 75.5 void flush_all_cache(void); 75.6 void *map_to_nocache_virt(int nr_iommus, u64 maddr); 75.7 -u64 alloc_pgtable_maddr(struct domain *d); 75.8 +u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages); 75.9 void free_pgtable_maddr(u64 maddr); 75.10 void *map_vtd_domain_page(u64 maddr); 75.11 void unmap_vtd_domain_page(void *va); 75.12 75.13 void iommu_flush_cache_entry(void *addr); 75.14 -void iommu_flush_cache_page(void *addr); 75.15 +void iommu_flush_cache_page(void *addr, unsigned long npages); 75.16 75.17 #endif // _VTD_H_
76.1 --- a/xen/drivers/passthrough/vtd/x86/vtd.c Fri Feb 13 10:56:01 2009 +0900 76.2 +++ b/xen/drivers/passthrough/vtd/x86/vtd.c Fri Feb 13 11:22:28 2009 +0900 76.3 @@ -38,20 +38,21 @@ void unmap_vtd_domain_page(void *va) 76.4 } 76.5 76.6 /* Allocate page table, return its machine address */ 76.7 -u64 alloc_pgtable_maddr(struct domain *d) 76.8 +u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages) 76.9 { 76.10 struct page_info *pg; 76.11 u64 *vaddr; 76.12 unsigned long mfn; 76.13 76.14 - pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0); 76.15 + pg = alloc_domheap_pages(NULL, get_order_from_pages(npages), 76.16 + d ? MEMF_node(domain_to_node(d)) : 0); 76.17 if ( !pg ) 76.18 return 0; 76.19 mfn = page_to_mfn(pg); 76.20 vaddr = map_domain_page(mfn); 76.21 - memset(vaddr, 0, PAGE_SIZE); 76.22 + memset(vaddr, 0, PAGE_SIZE * npages); 76.23 76.24 - iommu_flush_cache_page(vaddr); 76.25 + iommu_flush_cache_page(vaddr, npages); 76.26 unmap_domain_page(vaddr); 76.27 76.28 return (u64)mfn << PAGE_SHIFT_4K;
77.1 --- a/xen/include/acpi/cpufreq/cpufreq.h Fri Feb 13 10:56:01 2009 +0900 77.2 +++ b/xen/include/acpi/cpufreq/cpufreq.h Fri Feb 13 11:22:28 2009 +0900 77.3 @@ -87,6 +87,7 @@ struct cpufreq_governor { 77.4 char name[CPUFREQ_NAME_LEN]; 77.5 int (*governor)(struct cpufreq_policy *policy, 77.6 unsigned int event); 77.7 + void (*handle_option)(const char *name, const char *value); 77.8 struct list_head governor_list; 77.9 }; 77.10
78.1 --- a/xen/include/asm-ia64/hardirq.h Fri Feb 13 10:56:01 2009 +0900 78.2 +++ b/xen/include/asm-ia64/hardirq.h Fri Feb 13 11:22:28 2009 +0900 78.3 @@ -4,6 +4,7 @@ 78.4 #define __ARCH_IRQ_STAT 1 78.5 #define HARDIRQ_BITS 14 78.6 #include <linux/hardirq.h> 78.7 +#include <xen/sched.h> 78.8 78.9 #define local_softirq_pending() (local_cpu_data->softirq_pending) 78.10
79.1 --- a/xen/include/asm-ia64/hvm/iommu.h Fri Feb 13 10:56:01 2009 +0900 79.2 +++ b/xen/include/asm-ia64/hvm/iommu.h Fri Feb 13 11:22:28 2009 +0900 79.3 @@ -28,7 +28,6 @@ static inline void pci_cleanup_msi(struc 79.4 /* TODO */ 79.5 } 79.6 79.7 -#define AUTO_ASSIGN -1 79.8 79.9 extern int assign_irq_vector (int irq); 79.10
80.1 --- a/xen/include/asm-ia64/hvm/irq.h Fri Feb 13 10:56:01 2009 +0900 80.2 +++ b/xen/include/asm-ia64/hvm/irq.h Fri Feb 13 11:22:28 2009 +0900 80.3 @@ -90,14 +90,18 @@ struct hvm_irq { 80.4 #define hvm_pci_intx_link(dev, intx) \ 80.5 (((dev) + (intx)) & 3) 80.6 80.7 -/* Extract the IA-64 vector that corresponds to IRQ. */ 80.8 -static inline int 80.9 -irq_to_vector (int irq) 80.10 +#define IA64_INVALID_VECTOR ((unsigned int)((int)-1)) 80.11 +static inline unsigned int irq_to_vector(int irq) 80.12 { 80.13 - return irq; 80.14 + int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); 80.15 + unsigned int vector; 80.16 + 80.17 + if ( acpi_gsi_to_irq(irq, &vector) < 0) 80.18 + return 0; 80.19 + 80.20 + return vector; 80.21 } 80.22 80.23 - 80.24 extern u8 irq_vector[NR_IRQS]; 80.25 extern int vector_irq[NR_VECTORS]; 80.26
81.1 --- a/xen/include/asm-ia64/linux-xen/asm/smp.h Fri Feb 13 10:56:01 2009 +0900 81.2 +++ b/xen/include/asm-ia64/linux-xen/asm/smp.h Fri Feb 13 11:22:28 2009 +0900 81.3 @@ -47,7 +47,6 @@ ia64_get_lid (void) 81.4 #define SMP_IPI_REDIRECTION (1 << 1) 81.5 81.6 #ifdef XEN 81.7 -#include <xen/sched.h> 81.8 #define raw_smp_processor_id() (current->processor) 81.9 #else 81.10 #define raw_smp_processor_id() (current_thread_info()->cpu)
82.1 --- a/xen/include/asm-ia64/linux-xen/linux/interrupt.h Fri Feb 13 10:56:01 2009 +0900 82.2 +++ b/xen/include/asm-ia64/linux-xen/linux/interrupt.h Fri Feb 13 11:22:28 2009 +0900 82.3 @@ -52,10 +52,10 @@ struct irqaction { 82.4 }; 82.5 82.6 extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs); 82.7 -extern int request_irq(unsigned int, 82.8 +extern int request_irq_vector(unsigned int, 82.9 irqreturn_t (*handler)(int, void *, struct pt_regs *), 82.10 unsigned long, const char *, void *); 82.11 -extern void free_irq(unsigned int, void *); 82.12 +extern void release_irq_vector(unsigned int, void *); 82.13 #endif 82.14 82.15
83.1 --- a/xen/include/asm-ia64/linux/asm/hw_irq.h Fri Feb 13 10:56:01 2009 +0900 83.2 +++ b/xen/include/asm-ia64/linux/asm/hw_irq.h Fri Feb 13 11:22:28 2009 +0900 83.3 @@ -34,7 +34,7 @@ typedef u8 ia64_vector; 83.4 #define IA64_MAX_VECTORED_IRQ 255 83.5 #define IA64_NUM_VECTORS 256 83.6 83.7 -#define AUTO_ASSIGN -1 83.8 +#define AUTO_ASSIGN_IRQ (-1) 83.9 83.10 #define IA64_SPURIOUS_INT_VECTOR 0x0f 83.11
84.1 --- a/xen/include/asm-ia64/mm.h Fri Feb 13 10:56:01 2009 +0900 84.2 +++ b/xen/include/asm-ia64/mm.h Fri Feb 13 11:22:28 2009 +0900 84.3 @@ -13,7 +13,6 @@ 84.4 #include <xen/list.h> 84.5 #include <xen/spinlock.h> 84.6 #include <xen/perfc.h> 84.7 -#include <xen/sched.h> 84.8 84.9 #include <asm/processor.h> 84.10 #include <asm/atomic.h> 84.11 @@ -63,21 +62,14 @@ struct page_info 84.12 struct { 84.13 /* Order-size of the free chunk this page is the head of. */ 84.14 u32 order; 84.15 - /* Mask of possibly-tainted TLBs. */ 84.16 - cpumask_t cpumask; 84.17 + /* Do TLBs need flushing for safety before next page use? */ 84.18 + bool_t need_tlbflush; 84.19 } free; 84.20 84.21 } u; 84.22 84.23 /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */ 84.24 u32 tlbflush_timestamp; 84.25 - 84.26 -#if 0 84.27 -// following added for Linux compiling 84.28 - page_flags_t flags; 84.29 - atomic_t _count; 84.30 - struct list_head lru; // is this the same as above "list"? 84.31 -#endif 84.32 }; 84.33 84.34 #define set_page_count(p,v) atomic_set(&(p)->_count, v - 1)
85.1 --- a/xen/include/asm-ia64/tlbflush.h Fri Feb 13 10:56:01 2009 +0900 85.2 +++ b/xen/include/asm-ia64/tlbflush.h Fri Feb 13 11:22:28 2009 +0900 85.3 @@ -1,7 +1,8 @@ 85.4 #ifndef __FLUSHTLB_H__ 85.5 #define __FLUSHTLB_H__ 85.6 85.7 -#include <xen/sched.h> 85.8 +struct vcpu; 85.9 +struct domain; 85.10 85.11 /* TLB flushes can be either local (current vcpu only) or domain wide (on 85.12 all vcpus).
86.1 --- a/xen/include/asm-x86/domain.h Fri Feb 13 10:56:01 2009 +0900 86.2 +++ b/xen/include/asm-x86/domain.h Fri Feb 13 11:22:28 2009 +0900 86.3 @@ -79,11 +79,11 @@ struct shadow_domain { 86.4 int locker; /* processor which holds the lock */ 86.5 const char *locker_function; /* Func that took it */ 86.6 unsigned int opt_flags; /* runtime tunable optimizations on/off */ 86.7 - struct list_head pinned_shadows; 86.8 + struct page_list_head pinned_shadows; 86.9 86.10 /* Memory allocation */ 86.11 - struct list_head freelists[SHADOW_MAX_ORDER + 1]; 86.12 - struct list_head p2m_freelist; 86.13 + struct page_list_head freelists[SHADOW_MAX_ORDER + 1]; 86.14 + struct page_list_head p2m_freelist; 86.15 unsigned int total_pages; /* number of pages allocated */ 86.16 unsigned int free_pages; /* number of pages on freelists */ 86.17 unsigned int p2m_pages; /* number of pages allocates to p2m */ 86.18 @@ -92,7 +92,7 @@ struct shadow_domain { 86.19 pagetable_t unpaged_pagetable; 86.20 86.21 /* Shadow hashtable */ 86.22 - struct shadow_page_info **hash_table; 86.23 + struct page_info **hash_table; 86.24 int hash_walking; /* Some function is walking the hash table */ 86.25 86.26 /* Fast MMIO path heuristic */ 86.27 @@ -143,7 +143,7 @@ struct hap_domain { 86.28 int locker; 86.29 const char *locker_function; 86.30 86.31 - struct list_head freelist; 86.32 + struct page_list_head freelist; 86.33 unsigned int total_pages; /* number of pages allocated */ 86.34 unsigned int free_pages; /* number of pages on freelists */ 86.35 unsigned int p2m_pages; /* number of pages allocates to p2m */ 86.36 @@ -265,7 +265,7 @@ struct arch_domain 86.37 RELMEM_l2, 86.38 RELMEM_done, 86.39 } relmem; 86.40 - struct list_head relmem_list; 86.41 + struct page_list_head relmem_list; 86.42 86.43 cpuid_input_t cpuids[MAX_CPUID_INPUT]; 86.44 } __cacheline_aligned; 86.45 @@ -352,6 +352,7 @@ struct arch_vcpu 86.46 86.47 /* Current LDT details. */ 86.48 unsigned long shadow_ldt_mapcnt; 86.49 + spinlock_t shadow_ldt_lock; 86.50 86.51 struct paging_vcpu paging; 86.52
87.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h Fri Feb 13 10:56:01 2009 +0900 87.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Fri Feb 13 11:22:28 2009 +0900 87.3 @@ -48,7 +48,11 @@ typedef union { 87.4 #define EPTE_SUPER_PAGE_MASK 0x80 87.5 #define EPTE_MFN_MASK 0x1fffffffffff000 87.6 #define EPTE_AVAIL1_MASK 0xF00 87.7 -#define EPTE_EMT_MASK 0x78 87.8 +#define EPTE_EMT_MASK 0x38 87.9 +#define EPTE_IGMT_MASK 0x40 87.10 +#define EPTE_AVAIL1_SHIFT 8 87.11 +#define EPTE_EMT_SHIFT 3 87.12 +#define EPTE_IGMT_SHIFT 6 87.13 87.14 void vmx_asm_vmexit_handler(struct cpu_user_regs); 87.15 void vmx_asm_do_vmentry(void);
88.1 --- a/xen/include/asm-x86/iocap.h Fri Feb 13 10:56:01 2009 +0900 88.2 +++ b/xen/include/asm-x86/iocap.h Fri Feb 13 11:22:28 2009 +0900 88.3 @@ -14,7 +14,8 @@ 88.4 #define ioports_access_permitted(d, s, e) \ 88.5 rangeset_contains_range((d)->arch.ioport_caps, s, e) 88.6 88.7 -#define cache_flush_permitted(d) \ 88.8 - (!rangeset_is_empty((d)->iomem_caps)) 88.9 +#define cache_flush_permitted(d) \ 88.10 + (!rangeset_is_empty((d)->iomem_caps) || \ 88.11 + !rangeset_is_empty((d)->arch.ioport_caps)) 88.12 88.13 #endif /* __X86_IOCAP_H__ */
89.1 --- a/xen/include/asm-x86/irq.h Fri Feb 13 10:56:01 2009 +0900 89.2 +++ b/xen/include/asm-x86/irq.h Fri Feb 13 11:22:28 2009 +0900 89.3 @@ -19,9 +19,6 @@ 89.4 89.5 extern int vector_irq[NR_VECTORS]; 89.6 extern u8 irq_vector[NR_IRQS]; 89.7 -#define AUTO_ASSIGN -1 89.8 -#define NEVER_ASSIGN -2 89.9 -#define FREE_TO_ASSIGN -3 89.10 89.11 #define platform_legacy_irq(irq) ((irq) < 16) 89.12
90.1 --- a/xen/include/asm-x86/mm.h Fri Feb 13 10:56:01 2009 +0900 90.2 +++ b/xen/include/asm-x86/mm.h Fri Feb 13 11:22:28 2009 +0900 90.3 @@ -12,15 +12,40 @@ 90.4 * Per-page-frame information. 90.5 * 90.6 * Every architecture must ensure the following: 90.7 - * 1. 'struct page_info' contains a 'struct list_head list'. 90.8 + * 1. 'struct page_info' contains a 'struct page_list_entry list'. 90.9 * 2. Provide a PFN_ORDER() macro for accessing the order of a free page. 90.10 */ 90.11 -#define PFN_ORDER(_pfn) ((_pfn)->u.free.order) 90.12 +#define PFN_ORDER(_pfn) ((_pfn)->v.free.order) 90.13 + 90.14 +/* 90.15 + * This definition is solely for the use in struct page_info (and 90.16 + * struct page_list_head), intended to allow easy adjustment once x86-64 90.17 + * wants to support more than 16TB. 90.18 + * 'unsigned long' should be used for MFNs everywhere else. 90.19 + */ 90.20 +#define __mfn_t unsigned int 90.21 +#define PRpgmfn "08x" 90.22 + 90.23 +#undef page_list_entry 90.24 +struct page_list_entry 90.25 +{ 90.26 + __mfn_t next, prev; 90.27 +}; 90.28 90.29 struct page_info 90.30 { 90.31 - /* Each frame can be threaded onto a doubly-linked list. */ 90.32 - struct list_head list; 90.33 + union { 90.34 + /* Each frame can be threaded onto a doubly-linked list. 90.35 + * 90.36 + * For unused shadow pages, a list of pages of this order; for 90.37 + * pinnable shadows, if pinned, a list of other pinned shadows 90.38 + * (see sh_type_is_pinnable() below for the definition of 90.39 + * "pinnable" shadow types). 90.40 + */ 90.41 + struct page_list_entry list; 90.42 + /* For non-pinnable shadows, a higher entry that points at us. */ 90.43 + paddr_t up; 90.44 + }; 90.45 90.46 /* Reference count and various PGC_xxx flags and fields. */ 90.47 unsigned long count_info; 90.48 @@ -30,23 +55,48 @@ struct page_info 90.49 90.50 /* Page is in use: ((count_info & PGC_count_mask) != 0). */ 90.51 struct { 90.52 - /* Owner of this page (NULL if page is anonymous). */ 90.53 - u32 _domain; /* pickled format */ 90.54 /* Type reference count and various PGT_xxx flags and fields. */ 90.55 unsigned long type_info; 90.56 } inuse; 90.57 90.58 + /* Page is in use as a shadow: count_info == 0. */ 90.59 + struct { 90.60 + unsigned long type:5; /* What kind of shadow is this? */ 90.61 + unsigned long pinned:1; /* Is the shadow pinned? */ 90.62 + unsigned long count:26; /* Reference count */ 90.63 + } sh; 90.64 + 90.65 /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */ 90.66 struct { 90.67 - /* Order-size of the free chunk this page is the head of. */ 90.68 - u32 order; 90.69 - /* Mask of possibly-tainted TLBs. */ 90.70 - cpumask_t cpumask; 90.71 + /* Do TLBs need flushing for safety before next page use? */ 90.72 + bool_t need_tlbflush; 90.73 } free; 90.74 90.75 } u; 90.76 90.77 union { 90.78 + 90.79 + /* Page is in use, but not as a shadow. */ 90.80 + struct { 90.81 + /* Owner of this page (NULL if page is anonymous). */ 90.82 + u32 _domain; /* pickled format */ 90.83 + } inuse; 90.84 + 90.85 + /* Page is in use as a shadow. */ 90.86 + struct { 90.87 + /* GMFN of guest page we're a shadow of. */ 90.88 + __mfn_t back; 90.89 + } sh; 90.90 + 90.91 + /* Page is on a free list (including shadow code free lists). */ 90.92 + struct { 90.93 + /* Order-size of the free chunk this page is the head of. */ 90.94 + unsigned int order; 90.95 + } free; 90.96 + 90.97 + } v; 90.98 + 90.99 + union { 90.100 /* 90.101 * Timestamp from 'TLB clock', used to avoid extra safety flushes. 90.102 * Only valid for: a) free pages, and b) pages with zero type count 90.103 @@ -95,9 +145,14 @@ struct page_info 90.104 * tracked for TLB-flush avoidance when a guest runs in shadow mode. 90.105 */ 90.106 u32 shadow_flags; 90.107 + 90.108 + /* When in use as a shadow, next shadow in this hash chain. */ 90.109 + __mfn_t next_shadow; 90.110 }; 90.111 }; 90.112 90.113 +#undef __mfn_t 90.114 + 90.115 #define PG_shift(idx) (BITS_PER_LONG - (idx)) 90.116 #define PG_mask(x, idx) (x ## UL << PG_shift(idx)) 90.117 90.118 @@ -155,7 +210,8 @@ struct page_info 90.119 }) 90.120 #else 90.121 #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap) 90.122 -#define is_xen_heap_mfn(mfn) is_xen_heap_page(&frame_table[mfn]) 90.123 +#define is_xen_heap_mfn(mfn) \ 90.124 + (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn))) 90.125 #endif 90.126 90.127 #if defined(__i386__) 90.128 @@ -174,10 +230,10 @@ struct page_info 90.129 #define SHADOW_OOS_FIXUPS 2 90.130 90.131 #define page_get_owner(_p) \ 90.132 - ((struct domain *)((_p)->u.inuse._domain ? \ 90.133 - mfn_to_virt((_p)->u.inuse._domain) : NULL)) 90.134 + ((struct domain *)((_p)->v.inuse._domain ? \ 90.135 + mfn_to_virt((_p)->v.inuse._domain) : NULL)) 90.136 #define page_set_owner(_p,_d) \ 90.137 - ((_p)->u.inuse._domain = (_d) ? virt_to_mfn(_d) : 0) 90.138 + ((_p)->v.inuse._domain = (_d) ? virt_to_mfn(_d) : 0) 90.139 90.140 #define maddr_get_owner(ma) (page_get_owner(maddr_to_page((ma)))) 90.141 #define vaddr_get_owner(va) (page_get_owner(virt_to_page((va))))
91.1 --- a/xen/include/asm-x86/p2m.h Fri Feb 13 10:56:01 2009 +0900 91.2 +++ b/xen/include/asm-x86/p2m.h Fri Feb 13 11:22:28 2009 +0900 91.3 @@ -110,7 +110,7 @@ struct p2m_domain { 91.4 const char *locker_function; /* Func that took it */ 91.5 91.6 /* Pages used to construct the p2m */ 91.7 - struct list_head pages; 91.8 + struct page_list_head pages; 91.9 91.10 /* Functions to call to get or free pages for the p2m */ 91.11 struct page_info * (*alloc_page )(struct domain *d); 91.12 @@ -148,7 +148,7 @@ struct p2m_domain { 91.13 * protect moving stuff from the PoD cache to the domain page list. 91.14 */ 91.15 struct { 91.16 - struct list_head super, /* List of superpages */ 91.17 + struct page_list_head super, /* List of superpages */ 91.18 single; /* Non-super lists */ 91.19 int count, /* # of pages in cache lists */ 91.20 entry_count; /* # of pages in p2m marked pod */
92.1 --- a/xen/include/asm-x86/page.h Fri Feb 13 10:56:01 2009 +0900 92.2 +++ b/xen/include/asm-x86/page.h Fri Feb 13 11:22:28 2009 +0900 92.3 @@ -220,31 +220,47 @@ void copy_page_sse2(void *, const void * 92.4 copy_page_sse2(_t, _f) : \ 92.5 (void)memcpy(_t, _f, PAGE_SIZE)) 92.6 92.7 -#define mfn_valid(mfn) ((mfn) < max_page) 92.8 +#define __mfn_valid(mfn) ((mfn) < max_page) 92.9 92.10 /* Convert between Xen-heap virtual addresses and machine addresses. */ 92.11 #define __pa(x) (virt_to_maddr(x)) 92.12 #define __va(x) (maddr_to_virt(x)) 92.13 92.14 /* Convert between Xen-heap virtual addresses and machine frame numbers. */ 92.15 -#define virt_to_mfn(va) (virt_to_maddr(va) >> PAGE_SHIFT) 92.16 -#define mfn_to_virt(mfn) (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT)) 92.17 +#define __virt_to_mfn(va) (virt_to_maddr(va) >> PAGE_SHIFT) 92.18 +#define __mfn_to_virt(mfn) (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT)) 92.19 92.20 /* Convert between machine frame numbers and page-info structures. */ 92.21 -#define mfn_to_page(mfn) (frame_table + (mfn)) 92.22 -#define page_to_mfn(pg) ((unsigned long)((pg) - frame_table)) 92.23 +#define __mfn_to_page(mfn) (frame_table + (mfn)) 92.24 +#define __page_to_mfn(pg) ((unsigned long)((pg) - frame_table)) 92.25 92.26 /* Convert between machine addresses and page-info structures. */ 92.27 -#define maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT)) 92.28 -#define page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT) 92.29 +#define __maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT)) 92.30 +#define __page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT) 92.31 92.32 /* Convert between Xen-heap virtual addresses and page-info structures. */ 92.33 -#define virt_to_page(va) (frame_table + (__pa(va) >> PAGE_SHIFT)) 92.34 -#define page_to_virt(pg) (maddr_to_virt(page_to_maddr(pg))) 92.35 +#define __virt_to_page(va) (frame_table + (__pa(va) >> PAGE_SHIFT)) 92.36 +#define __page_to_virt(pg) (maddr_to_virt(page_to_maddr(pg))) 92.37 92.38 /* Convert between frame number and address formats. */ 92.39 -#define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT) 92.40 -#define paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT)) 92.41 +#define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT) 92.42 +#define __paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT)) 92.43 + 92.44 +/* 92.45 + * We define non-underscored wrappers for above conversion functions. These are 92.46 + * overridden in various source files while underscored versions remain intact. 92.47 + */ 92.48 +#define mfn_valid(mfn) __mfn_valid(mfn) 92.49 +#define virt_to_mfn(va) __virt_to_mfn(va) 92.50 +#define mfn_to_virt(mfn) __mfn_to_virt(mfn) 92.51 +#define mfn_to_page(mfn) __mfn_to_page(mfn) 92.52 +#define page_to_mfn(pg) __page_to_mfn(pg) 92.53 +#define maddr_to_page(ma) __maddr_to_page(ma) 92.54 +#define page_to_maddr(pg) __page_to_maddr(pg) 92.55 +#define virt_to_page(va) __virt_to_page(va) 92.56 +#define page_to_virt(pg) __page_to_virt(pg) 92.57 +#define pfn_to_paddr(pfn) __pfn_to_paddr(pfn) 92.58 +#define paddr_to_pfn(pa) __paddr_to_pfn(pa) 92.59 92.60 #endif /* !defined(__ASSEMBLY__) */ 92.61
93.1 --- a/xen/include/asm-x86/perfc.h Fri Feb 13 10:56:01 2009 +0900 93.2 +++ b/xen/include/asm-x86/perfc.h Fri Feb 13 11:22:28 2009 +0900 93.3 @@ -1,6 +1,5 @@ 93.4 #ifndef __ASM_PERFC_H__ 93.5 #define __ASM_PERFC_H__ 93.6 -#include <asm/mm.h> 93.7 93.8 static inline void arch_perfc_printall(void) 93.9 {
94.1 --- a/xen/include/asm-x86/processor.h Fri Feb 13 10:56:01 2009 +0900 94.2 +++ b/xen/include/asm-x86/processor.h Fri Feb 13 11:22:28 2009 +0900 94.3 @@ -188,6 +188,7 @@ extern struct cpuinfo_x86 cpu_data[]; 94.4 #define current_cpu_data boot_cpu_data 94.5 #endif 94.6 94.7 +extern u64 host_pat; 94.8 extern int phys_proc_id[NR_CPUS]; 94.9 extern int cpu_core_id[NR_CPUS]; 94.10
95.1 --- a/xen/include/public/arch-ia64/hvm/save.h Fri Feb 13 10:56:01 2009 +0900 95.2 +++ b/xen/include/public/arch-ia64/hvm/save.h Fri Feb 13 11:22:28 2009 +0900 95.3 @@ -23,8 +23,8 @@ 95.4 #ifndef __XEN_PUBLIC_HVM_SAVE_IA64_H__ 95.5 #define __XEN_PUBLIC_HVM_SAVE_IA64_H__ 95.6 95.7 -#include <public/hvm/save.h> 95.8 -#include <public/arch-ia64.h> 95.9 +#include "../../hvm/save.h" 95.10 +#include "../../arch-ia64.h" 95.11 95.12 /* 95.13 * Save/restore header: general info about the save file.
96.1 --- a/xen/include/public/arch-x86/hvm/save.h Fri Feb 13 10:56:01 2009 +0900 96.2 +++ b/xen/include/public/arch-x86/hvm/save.h Fri Feb 13 11:22:28 2009 +0900 96.3 @@ -287,7 +287,7 @@ struct hvm_hw_pci_irqs { 96.4 * Indexed by: device*4 + INTx#. 96.5 */ 96.6 union { 96.7 - DECLARE_BITMAP(i, 32*4); 96.8 + unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */ 96.9 uint64_t pad[2]; 96.10 }; 96.11 }; 96.12 @@ -300,7 +300,7 @@ struct hvm_hw_isa_irqs { 96.13 * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). 96.14 */ 96.15 union { 96.16 - DECLARE_BITMAP(i, 16); 96.17 + unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */ 96.18 uint64_t pad[1]; 96.19 }; 96.20 };
97.1 --- a/xen/include/public/arch-x86/xen-mca.h Fri Feb 13 10:56:01 2009 +0900 97.2 +++ b/xen/include/public/arch-x86/xen-mca.h Fri Feb 13 11:22:28 2009 +0900 97.3 @@ -56,7 +56,7 @@ 97.4 /* Hypercall */ 97.5 #define __HYPERVISOR_mca __HYPERVISOR_arch_0 97.6 97.7 -#define XEN_MCA_INTERFACE_VERSION 0x03000001 97.8 +#define XEN_MCA_INTERFACE_VERSION 0x03000002 97.9 97.10 /* IN: Dom0 calls hypercall from MC event handler. */ 97.11 #define XEN_MC_CORRECTABLE 0x0 97.12 @@ -118,7 +118,7 @@ struct mcinfo_global { 97.13 uint16_t mc_domid; 97.14 uint32_t mc_socketid; /* physical socket of the physical core */ 97.15 uint16_t mc_coreid; /* physical impacted core */ 97.16 - uint8_t mc_apicid; 97.17 + uint32_t mc_apicid; 97.18 uint16_t mc_core_threadid; /* core thread of physical core */ 97.19 uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ 97.20 uint64_t mc_gstatus; /* global status */ 97.21 @@ -175,6 +175,41 @@ struct mc_info { 97.22 }; 97.23 typedef struct mc_info mc_info_t; 97.24 97.25 +#define __MC_MSR_ARRAYSIZE 8 97.26 +#define __MC_NMSRS 1 97.27 +#define MC_NCAPS 7 /* 7 CPU feature flag words */ 97.28 +#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */ 97.29 +#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */ 97.30 +#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */ 97.31 +#define MC_CAPS_LINUX 3 /* Linux-defined */ 97.32 +#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */ 97.33 +#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ 97.34 +#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ 97.35 + 97.36 +typedef struct mcinfo_logical_cpu { 97.37 + uint32_t mc_cpunr; 97.38 + uint32_t mc_chipid; 97.39 + uint16_t mc_coreid; 97.40 + uint16_t mc_threadid; 97.41 + uint32_t mc_apicid; 97.42 + uint32_t mc_clusterid; 97.43 + uint32_t mc_ncores; 97.44 + uint32_t mc_ncores_active; 97.45 + uint32_t mc_nthreads; 97.46 + int32_t mc_cpuid_level; 97.47 + uint32_t mc_family; 97.48 + uint32_t mc_vendor; 97.49 + uint32_t mc_model; 97.50 + uint32_t mc_step; 97.51 + char mc_vendorid[16]; 97.52 + char mc_brandid[64]; 97.53 + uint32_t mc_cpu_caps[MC_NCAPS]; 97.54 + uint32_t mc_cache_size; 97.55 + uint32_t mc_cache_alignment; 97.56 + int32_t mc_nmsrvals; 97.57 + struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; 97.58 +} xen_mc_logical_cpu_t; 97.59 +DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); 97.60 97.61 97.62 /* 97.63 @@ -272,6 +307,14 @@ struct xen_mc_notifydomain { 97.64 typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; 97.65 DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); 97.66 97.67 +#define XEN_MC_physcpuinfo 3 97.68 +struct xen_mc_physcpuinfo { 97.69 + /* IN/OUT */ 97.70 + uint32_t ncpus; 97.71 + uint32_t pad0; 97.72 + /* OUT */ 97.73 + XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; 97.74 +}; 97.75 97.76 struct xen_mc { 97.77 uint32_t cmd; 97.78 @@ -279,6 +322,7 @@ struct xen_mc { 97.79 union { 97.80 struct xen_mc_fetch mc_fetch; 97.81 struct xen_mc_notifydomain mc_notifydomain; 97.82 + struct xen_mc_physcpuinfo mc_physcpuinfo; 97.83 uint8_t pad[MCINFO_HYPERCALLSIZE]; 97.84 } u; 97.85 };
98.1 --- a/xen/include/public/domctl.h Fri Feb 13 10:56:01 2009 +0900 98.2 +++ b/xen/include/public/domctl.h Fri Feb 13 11:22:28 2009 +0900 98.3 @@ -630,6 +630,17 @@ struct xen_domctl_debug_op { 98.4 typedef struct xen_domctl_debug_op xen_domctl_debug_op_t; 98.5 DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t); 98.6 98.7 +/* 98.8 + * Request a particular record from the HVM context 98.9 + */ 98.10 +#define XEN_DOMCTL_gethvmcontext_partial 55 98.11 +typedef struct xen_domctl_hvmcontext_partial { 98.12 + uint32_t type; /* IN: Type of record required */ 98.13 + uint32_t instance; /* IN: Instance of that type */ 98.14 + XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */ 98.15 +} xen_domctl_hvmcontext_partial_t; 98.16 +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t); 98.17 + 98.18 98.19 struct xen_domctl { 98.20 uint32_t cmd; 98.21 @@ -658,6 +669,7 @@ struct xen_domctl { 98.22 struct xen_domctl_settimeoffset settimeoffset; 98.23 struct xen_domctl_real_mode_area real_mode_area; 98.24 struct xen_domctl_hvmcontext hvmcontext; 98.25 + struct xen_domctl_hvmcontext_partial hvmcontext_partial; 98.26 struct xen_domctl_address_size address_size; 98.27 struct xen_domctl_sendtrigger sendtrigger; 98.28 struct xen_domctl_get_device_group get_device_group;
99.1 --- a/xen/include/public/io/pciif.h Fri Feb 13 10:56:01 2009 +0900 99.2 +++ b/xen/include/public/io/pciif.h Fri Feb 13 11:22:28 2009 +0900 99.3 @@ -29,7 +29,7 @@ 99.4 99.5 /* xen_pci_sharedinfo flags */ 99.6 #define _XEN_PCIF_active (0) 99.7 -#define XEN_PCIF_active (1<<_XEN_PCI_active) 99.8 +#define XEN_PCIF_active (1<<_XEN_PCIF_active) 99.9 #define _XEN_PCIB_AERHANDLER (1) 99.10 #define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) 99.11 #define _XEN_PCIB_active (2)
100.1 --- a/xen/include/xen/hvm/save.h Fri Feb 13 10:56:01 2009 +0900 100.2 +++ b/xen/include/xen/hvm/save.h Fri Feb 13 11:22:28 2009 +0900 100.3 @@ -152,6 +152,8 @@ static int __hvm_register_##_x##_save_an 100.4 /* Entry points for saving and restoring HVM domain state */ 100.5 size_t hvm_save_size(struct domain *d); 100.6 int hvm_save(struct domain *d, hvm_domain_context_t *h); 100.7 +int hvm_save_one(struct domain *d, uint16_t typecode, uint16_t instance, 100.8 + XEN_GUEST_HANDLE_64(uint8) handle); 100.9 int hvm_load(struct domain *d, hvm_domain_context_t *h); 100.10 100.11 /* Arch-specific definitions. */
101.1 --- a/xen/include/xen/iocap.h Fri Feb 13 10:56:01 2009 +0900 101.2 +++ b/xen/include/xen/iocap.h Fri Feb 13 11:22:28 2009 +0900 101.3 @@ -29,6 +29,7 @@ 101.4 rangeset_contains_singleton((d)->irq_caps, i) 101.5 101.6 #define multipage_allocation_permitted(d) \ 101.7 - (!rangeset_is_empty((d)->iomem_caps)) 101.8 + (!rangeset_is_empty((d)->iomem_caps) || \ 101.9 + !rangeset_is_empty((d)->arch.ioport_caps)) 101.10 101.11 #endif /* __XEN_IOCAP_H__ */
102.1 --- a/xen/include/xen/irq.h Fri Feb 13 10:56:01 2009 +0900 102.2 +++ b/xen/include/xen/irq.h Fri Feb 13 11:22:28 2009 +0900 102.3 @@ -25,6 +25,11 @@ struct irqaction 102.4 #define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */ 102.5 #define IRQ_PER_CPU 256 /* IRQ is per CPU */ 102.6 102.7 +/* Special IRQ numbers. */ 102.8 +#define AUTO_ASSIGN_IRQ (-1) 102.9 +#define NEVER_ASSIGN_IRQ (-2) 102.10 +#define FREE_TO_ASSIGN_IRQ (-3) 102.11 + 102.12 /* 102.13 * Interrupt controller descriptor. This is all we need 102.14 * to describe about the low-level hardware. 102.15 @@ -64,12 +69,21 @@ typedef struct { 102.16 102.17 extern irq_desc_t irq_desc[NR_VECTORS]; 102.18 102.19 -extern int setup_irq(unsigned int, struct irqaction *); 102.20 -extern void free_irq(unsigned int); 102.21 -extern int request_irq(unsigned int irq, 102.22 +extern int setup_irq_vector(unsigned int, struct irqaction *); 102.23 +extern void release_irq_vector(unsigned int); 102.24 +extern int request_irq_vector(unsigned int vector, 102.25 void (*handler)(int, void *, struct cpu_user_regs *), 102.26 unsigned long irqflags, const char * devname, void *dev_id); 102.27 102.28 +#define setup_irq(irq, action) \ 102.29 + setup_irq_vector(irq_to_vector(irq), action) 102.30 + 102.31 +#define release_irq(irq) \ 102.32 + release_irq_vector(irq_to_vector(irq)) 102.33 + 102.34 +#define request_irq(irq, handler, irqflags, devname, devid) \ 102.35 + request_irq_vector(irq_to_vector(irq), handler, irqflags, defname, devid) 102.36 + 102.37 extern hw_irq_controller no_irq_type; 102.38 extern void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs); 102.39
103.1 --- a/xen/include/xen/mm.h Fri Feb 13 10:56:01 2009 +0900 103.2 +++ b/xen/include/xen/mm.h Fri Feb 13 11:22:28 2009 +0900 103.3 @@ -85,22 +85,192 @@ int assign_pages( 103.4 #define MAX_ORDER 20 /* 2^20 contiguous pages */ 103.5 #endif 103.6 103.7 +#define page_list_entry list_head 103.8 + 103.9 +#include <asm/mm.h> 103.10 + 103.11 +#ifndef page_list_entry 103.12 +struct page_list_head 103.13 +{ 103.14 + struct page_info *next, *tail; 103.15 +}; 103.16 +/* These must only have instances in struct page_info. */ 103.17 +# define page_list_entry 103.18 + 103.19 +# define PAGE_LIST_HEAD_INIT(name) { NULL, NULL } 103.20 +# define PAGE_LIST_HEAD(name) \ 103.21 + struct page_list_head name = PAGE_LIST_HEAD_INIT(name) 103.22 +# define INIT_PAGE_LIST_HEAD(head) ((head)->tail = (head)->next = NULL) 103.23 +# define INIT_PAGE_LIST_ENTRY(ent) ((ent)->prev = (ent)->next = ~0) 103.24 + 103.25 +static inline int 103.26 +page_list_empty(const struct page_list_head *head) 103.27 +{ 103.28 + return !head->next; 103.29 +} 103.30 +static inline struct page_info * 103.31 +page_list_first(const struct page_list_head *head) 103.32 +{ 103.33 + return head->next; 103.34 +} 103.35 +static inline struct page_info * 103.36 +page_list_next(const struct page_info *page, 103.37 + const struct page_list_head *head) 103.38 +{ 103.39 + return page != head->tail ? mfn_to_page(page->list.next) : NULL; 103.40 +} 103.41 +static inline struct page_info * 103.42 +page_list_prev(const struct page_info *page, 103.43 + const struct page_list_head *head) 103.44 +{ 103.45 + return page != head->next ? mfn_to_page(page->list.prev) : NULL; 103.46 +} 103.47 +static inline void 103.48 +page_list_add(struct page_info *page, struct page_list_head *head) 103.49 +{ 103.50 + if ( head->next ) 103.51 + { 103.52 + page->list.next = page_to_mfn(head->next); 103.53 + head->next->list.prev = page_to_mfn(page); 103.54 + } 103.55 + else 103.56 + { 103.57 + head->tail = page; 103.58 + page->list.next = ~0; 103.59 + } 103.60 + page->list.prev = ~0; 103.61 + head->next = page; 103.62 +} 103.63 +static inline void 103.64 +page_list_add_tail(struct page_info *page, struct page_list_head *head) 103.65 +{ 103.66 + page->list.next = ~0; 103.67 + if ( head->next ) 103.68 + { 103.69 + page->list.prev = page_to_mfn(head->tail); 103.70 + head->tail->list.next = page_to_mfn(page); 103.71 + } 103.72 + else 103.73 + { 103.74 + page->list.prev = ~0; 103.75 + head->next = page; 103.76 + } 103.77 + head->tail = page; 103.78 +} 103.79 +static inline bool_t 103.80 +__page_list_del_head(struct page_info *page, struct page_list_head *head, 103.81 + struct page_info *next, struct page_info *prev) 103.82 +{ 103.83 + if ( head->next == page ) 103.84 + { 103.85 + if ( head->tail != page ) 103.86 + { 103.87 + next->list.prev = ~0; 103.88 + head->next = next; 103.89 + } 103.90 + else 103.91 + head->tail = head->next = NULL; 103.92 + return 1; 103.93 + } 103.94 + 103.95 + if ( head->tail == page ) 103.96 + { 103.97 + prev->list.next = ~0; 103.98 + head->tail = prev; 103.99 + return 1; 103.100 + } 103.101 + 103.102 + return 0; 103.103 +} 103.104 +static inline void 103.105 +page_list_del(struct page_info *page, struct page_list_head *head) 103.106 +{ 103.107 + struct page_info *next = mfn_to_page(page->list.next); 103.108 + struct page_info *prev = mfn_to_page(page->list.prev); 103.109 + 103.110 + if ( !__page_list_del_head(page, head, next, prev) ) 103.111 + { 103.112 + next->list.prev = page->list.prev; 103.113 + prev->list.next = page->list.next; 103.114 + } 103.115 +} 103.116 +static inline void 103.117 +page_list_del2(struct page_info *page, struct page_list_head *head1, 103.118 + struct page_list_head *head2) 103.119 +{ 103.120 + struct page_info *next = mfn_to_page(page->list.next); 103.121 + struct page_info *prev = mfn_to_page(page->list.prev); 103.122 + 103.123 + if ( !__page_list_del_head(page, head1, next, prev) && 103.124 + !__page_list_del_head(page, head2, next, prev) ) 103.125 + { 103.126 + next->list.prev = page->list.prev; 103.127 + prev->list.next = page->list.next; 103.128 + } 103.129 +} 103.130 +static inline struct page_info * 103.131 +page_list_remove_head(struct page_list_head *head) 103.132 +{ 103.133 + struct page_info *page = head->next; 103.134 + 103.135 + if ( page ) 103.136 + page_list_del(page, head); 103.137 + 103.138 + return page; 103.139 +} 103.140 + 103.141 +#define page_list_for_each(pos, head) \ 103.142 + for ( pos = (head)->next; pos; pos = page_list_next(pos, head) ) 103.143 +#define page_list_for_each_safe(pos, tmp, head) \ 103.144 + for ( pos = (head)->next; \ 103.145 + pos ? (tmp = page_list_next(pos, head), 1) : 0; \ 103.146 + pos = tmp ) 103.147 +#define page_list_for_each_safe_reverse(pos, tmp, head) \ 103.148 + for ( pos = (head)->tail; \ 103.149 + pos ? (tmp = page_list_prev(pos, head), 1) : 0; \ 103.150 + pos = tmp ) 103.151 +#else 103.152 +# define page_list_head list_head 103.153 +# define PAGE_LIST_HEAD_INIT LIST_HEAD_INIT 103.154 +# define PAGE_LIST_HEAD LIST_HEAD 103.155 +# define INIT_PAGE_LIST_HEAD INIT_LIST_HEAD 103.156 +# define INIT_PAGE_LIST_ENTRY INIT_LIST_HEAD 103.157 +# define page_list_empty list_empty 103.158 +# define page_list_first(hd) list_entry((hd)->next, \ 103.159 + struct page_info, list) 103.160 +# define page_list_next(pg, hd) list_entry((pg)->list.next, \ 103.161 + struct page_info, list) 103.162 +# define page_list_add(pg, hd) list_add(&(pg)->list, hd) 103.163 +# define page_list_add_tail(pg, hd) list_add_tail(&(pg)->list, hd) 103.164 +# define page_list_del(pg, hd) list_del(&(pg)->list) 103.165 +# define page_list_del2(pg, hd1, hd2) list_del(&(pg)->list) 103.166 +# define page_list_remove_head(hd) (!page_list_empty(hd) ? \ 103.167 + ({ \ 103.168 + struct page_info *__pg = page_list_first(hd); \ 103.169 + list_del(&__pg->list); \ 103.170 + __pg; \ 103.171 + }) : NULL) 103.172 +# define page_list_for_each(pos, head) list_for_each_entry(pos, head, list) 103.173 +# define page_list_for_each_safe(pos, tmp, head) \ 103.174 + list_for_each_entry_safe(pos, tmp, head, list) 103.175 +# define page_list_for_each_safe_reverse(pos, tmp, head) \ 103.176 + list_for_each_entry_safe_reverse(pos, tmp, head, list) 103.177 +#endif 103.178 + 103.179 /* Automatic page scrubbing for dead domains. */ 103.180 -extern struct list_head page_scrub_list; 103.181 -#define page_scrub_schedule_work() \ 103.182 - do { \ 103.183 - if ( !list_empty(&page_scrub_list) ) \ 103.184 - raise_softirq(PAGE_SCRUB_SOFTIRQ); \ 103.185 +extern struct page_list_head page_scrub_list; 103.186 +#define page_scrub_schedule_work() \ 103.187 + do { \ 103.188 + if ( !page_list_empty(&page_scrub_list) ) \ 103.189 + raise_softirq(PAGE_SCRUB_SOFTIRQ); \ 103.190 } while ( 0 ) 103.191 #define page_scrub_kick() \ 103.192 do { \ 103.193 - if ( !list_empty(&page_scrub_list) ) \ 103.194 + if ( !page_list_empty(&page_scrub_list) ) \ 103.195 cpumask_raise_softirq(cpu_online_map, PAGE_SCRUB_SOFTIRQ); \ 103.196 } while ( 0 ) 103.197 unsigned long avail_scrub_pages(void); 103.198 103.199 -#include <asm/mm.h> 103.200 - 103.201 int guest_remove_page(struct domain *d, unsigned long gmfn); 103.202 103.203 /* Returns TRUE if the whole page at @mfn is ordinary RAM. */
104.1 --- a/xen/include/xen/sched.h Fri Feb 13 10:56:01 2009 +0900 104.2 +++ b/xen/include/xen/sched.h Fri Feb 13 11:22:28 2009 +0900 104.3 @@ -19,6 +19,7 @@ 104.4 #include <xen/xenoprof.h> 104.5 #include <xen/rcupdate.h> 104.6 #include <xen/irq.h> 104.7 +#include <xen/mm.h> 104.8 104.9 #ifdef CONFIG_COMPAT 104.10 #include <compat/vcpu.h> 104.11 @@ -171,8 +172,8 @@ struct domain 104.12 spinlock_t domain_lock; 104.13 104.14 spinlock_t page_alloc_lock; /* protects all the following fields */ 104.15 - struct list_head page_list; /* linked list, of size tot_pages */ 104.16 - struct list_head xenpage_list; /* linked list, of size xenheap_pages */ 104.17 + struct page_list_head page_list; /* linked list, of size tot_pages */ 104.18 + struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */ 104.19 unsigned int tot_pages; /* number of pages currently possesed */ 104.20 unsigned int max_pages; /* maximum value for tot_pages */ 104.21 unsigned int xenheap_pages; /* # pages allocated from Xen heap */
105.1 --- a/xen/xsm/flask/hooks.c Fri Feb 13 10:56:01 2009 +0900 105.2 +++ b/xen/xsm/flask/hooks.c Fri Feb 13 11:22:28 2009 +0900 105.3 @@ -820,6 +820,7 @@ static int flask_hvmcontext(struct domai 105.4 perm = HVM__SETHVMC; 105.5 break; 105.6 case XEN_DOMCTL_gethvmcontext: 105.7 + case XEN_DOMCTL_gethvmcontext_partial: 105.8 perm = HVM__GETHVMC; 105.9 break; 105.10 default: