direct-io.hg
changeset 6516:23979fb12c49
Merge.
line diff
45.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Aug 16 14:27:16 2005 -0800 45.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Wed Aug 17 12:33:56 2005 -0800 45.3 @@ -44,7 +44,7 @@ c-obj-$(CONFIG_HPET_TIMER) += time_hpet 45.4 c-obj-$(CONFIG_EFI) += efi.o efi_stub.o 45.5 c-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 45.6 c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o 45.7 -c-obj-$(CONFIG_SWIOTLB) += swiotlb.o 45.8 +obj-$(CONFIG_SWIOTLB) += swiotlb.o 45.9 45.10 EXTRA_AFLAGS := -traditional 45.11
48.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Tue Aug 16 14:27:16 2005 -0800 48.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Wed Aug 17 12:33:56 2005 -0800 48.3 @@ -115,9 +115,6 @@ EXPORT_SYMBOL(__copy_from_user_ll); 48.4 EXPORT_SYMBOL(__copy_to_user_ll); 48.5 EXPORT_SYMBOL(strnlen_user); 48.6 48.7 -EXPORT_SYMBOL(dma_alloc_coherent); 48.8 -EXPORT_SYMBOL(dma_free_coherent); 48.9 - 48.10 #ifdef CONFIG_PCI 48.11 EXPORT_SYMBOL(pci_mem_start); 48.12 #endif
52.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Tue Aug 16 14:27:16 2005 -0800 52.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Wed Aug 17 12:33:56 2005 -0800 52.3 @@ -24,13 +24,14 @@ struct dma_coherent_mem { 52.4 unsigned long *bitmap; 52.5 }; 52.6 52.7 -static void iommu_bug(void) 52.8 -{ 52.9 - printk(KERN_ALERT "Fatal DMA error! Please use 'swiotlb=force'\n"); 52.10 - BUG(); 52.11 -} 52.12 - 52.13 -#define IOMMU_BUG_ON(test) do { if (unlikely(test)) iommu_bug(); } while(0) 52.14 +#define IOMMU_BUG_ON(test) \ 52.15 +do { \ 52.16 + if (unlikely(test)) { \ 52.17 + printk(KERN_ALERT "Fatal DMA error! " \ 52.18 + "Please use 'swiotlb=force'\n"); \ 52.19 + BUG(); \ 52.20 + } \ 52.21 +} while (0) 52.22 52.23 int 52.24 dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
60.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Tue Aug 16 14:27:16 2005 -0800 60.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Wed Aug 17 12:33:56 2005 -0800 60.3 @@ -35,6 +35,7 @@ 60.4 #include <asm/pgtable.h> 60.5 #include <asm-xen/hypervisor.h> 60.6 #include <asm-xen/balloon.h> 60.7 +#include <linux/module.h> 60.8 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 60.9 #include <linux/percpu.h> 60.10 #include <asm/tlbflush.h> 60.11 @@ -352,7 +353,6 @@ void xen_destroy_contiguous_region(unsig 60.12 balloon_unlock(flags); 60.13 } 60.14 60.15 -#ifdef CONFIG_XEN_PHYSDEV_ACCESS 60.16 60.17 unsigned long allocate_empty_lowmem_region(unsigned long pages) 60.18 { 60.19 @@ -401,4 +401,4 @@ unsigned long allocate_empty_lowmem_regi 60.20 return vstart; 60.21 } 60.22 60.23 -#endif /* CONFIG_XEN_PHYSDEV_ACCESS */ 60.24 +EXPORT_SYMBOL(allocate_empty_lowmem_region);
67.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Tue Aug 16 14:27:16 2005 -0800 67.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Wed Aug 17 12:33:56 2005 -0800 67.3 @@ -256,19 +256,23 @@ static void shutdown_handler(struct xenb 67.4 char *str; 67.5 67.6 str = (char *)xenbus_read("control", "shutdown", NULL); 67.7 - /* Ignore read errors and recursive shutdown events. */ 67.8 - if (IS_ERR(str) || !strcmp(str, __stringify(SHUTDOWN_INVALID))) 67.9 + /* Ignore read errors. */ 67.10 + if (IS_ERR(str)) 67.11 return; 67.12 - 67.13 - xenbus_printf("control", "shutdown", "%i", SHUTDOWN_INVALID); 67.14 + if (strlen(str) == 0) { 67.15 + kfree(str); 67.16 + return; 67.17 + } 67.18 67.19 - if (strcmp(str, "poweroff") == 0) { 67.20 + xenbus_write("control", "shutdown", "", O_CREAT); 67.21 + 67.22 + if (strcmp(str, "poweroff") == 0) 67.23 shutting_down = SHUTDOWN_POWEROFF; 67.24 - } else if (strcmp(str, "reboot") == 0) { 67.25 + else if (strcmp(str, "reboot") == 0) 67.26 shutting_down = SHUTDOWN_REBOOT; 67.27 - } else if (strcmp(str, "suspend") == 0) { 67.28 + else if (strcmp(str, "suspend") == 0) 67.29 shutting_down = SHUTDOWN_SUSPEND; 67.30 - } else { 67.31 + else { 67.32 printk("Ignoring shutdown request: %s\n", str); 67.33 shutting_down = SHUTDOWN_INVALID; 67.34 }
68.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c Tue Aug 16 14:27:16 2005 -0800 68.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c Wed Aug 17 12:33:56 2005 -0800 68.3 @@ -5,8 +5,6 @@ 68.4 #include <linux/kernel.h> 68.5 #include <linux/sched.h> 68.6 #include <linux/slab.h> 68.7 -#include <linux/string.h> 68.8 -#include <linux/errno.h> 68.9 #include <linux/netdevice.h> 68.10 #include <linux/inetdevice.h> 68.11 #include <linux/etherdevice.h> 68.12 @@ -14,34 +12,86 @@ 68.13 #include <linux/init.h> 68.14 #include <asm/io.h> 68.15 #include <asm/page.h> 68.16 - 68.17 -EXPORT_SYMBOL(__dev_alloc_skb); 68.18 +#include <asm-xen/hypervisor.h> 68.19 68.20 /* Referenced in netback.c. */ 68.21 /*static*/ kmem_cache_t *skbuff_cachep; 68.22 68.23 -/* Size must be cacheline-aligned (alloc_skb uses SKB_DATA_ALIGN). */ 68.24 -#define XEN_SKB_SIZE \ 68.25 - ((PAGE_SIZE - sizeof(struct skb_shared_info)) & ~(SMP_CACHE_BYTES - 1)) 68.26 +#define MAX_SKBUFF_ORDER 2 68.27 +static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1]; 68.28 68.29 struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask) 68.30 { 68.31 - struct sk_buff *skb; 68.32 - skb = alloc_skb_from_cache(skbuff_cachep, length + 16, gfp_mask); 68.33 - if ( likely(skb != NULL) ) 68.34 - skb_reserve(skb, 16); 68.35 - return skb; 68.36 + struct sk_buff *skb; 68.37 + int order; 68.38 + 68.39 + length = SKB_DATA_ALIGN(length + 16); 68.40 + order = get_order(length + sizeof(struct skb_shared_info)); 68.41 + if (order > MAX_SKBUFF_ORDER) { 68.42 + printk(KERN_ALERT "Attempt to allocate order %d skbuff. " 68.43 + "Increase MAX_SKBUFF_ORDER.\n", order); 68.44 + return NULL; 68.45 + } 68.46 + 68.47 + skb = alloc_skb_from_cache( 68.48 + skbuff_order_cachep[order], length, gfp_mask); 68.49 + if (skb != NULL) 68.50 + skb_reserve(skb, 16); 68.51 + 68.52 + return skb; 68.53 } 68.54 68.55 static void skbuff_ctor(void *buf, kmem_cache_t *cachep, unsigned long unused) 68.56 { 68.57 - scrub_pages(buf, 1); 68.58 + int order = 0; 68.59 + 68.60 + while (skbuff_order_cachep[order] != cachep) 68.61 + order++; 68.62 + 68.63 + if (order != 0) 68.64 + xen_create_contiguous_region((unsigned long)buf, order); 68.65 + 68.66 + scrub_pages(buf, 1 << order); 68.67 +} 68.68 + 68.69 +static void skbuff_dtor(void *buf, kmem_cache_t *cachep, unsigned long unused) 68.70 +{ 68.71 + int order = 0; 68.72 + 68.73 + while (skbuff_order_cachep[order] != cachep) 68.74 + order++; 68.75 + 68.76 + if (order != 0) 68.77 + xen_destroy_contiguous_region((unsigned long)buf, order); 68.78 } 68.79 68.80 static int __init skbuff_init(void) 68.81 { 68.82 - skbuff_cachep = kmem_cache_create( 68.83 - "xen-skb", PAGE_SIZE, PAGE_SIZE, 0, skbuff_ctor, NULL); 68.84 - return 0; 68.85 + static char name[MAX_SKBUFF_ORDER + 1][20]; 68.86 + unsigned long size; 68.87 + int order; 68.88 + 68.89 + for (order = 0; order <= MAX_SKBUFF_ORDER; order++) { 68.90 + size = PAGE_SIZE << order; 68.91 + sprintf(name[order], "xen-skb-%lu", size); 68.92 + skbuff_order_cachep[order] = kmem_cache_create( 68.93 + name[order], size, size, 0, skbuff_ctor, skbuff_dtor); 68.94 + } 68.95 + 68.96 + skbuff_cachep = skbuff_order_cachep[0]; 68.97 + 68.98 + return 0; 68.99 } 68.100 __initcall(skbuff_init); 68.101 + 68.102 +EXPORT_SYMBOL(__dev_alloc_skb); 68.103 + 68.104 +/* 68.105 + * Local variables: 68.106 + * c-file-style: "linux" 68.107 + * indent-tabs-mode: t 68.108 + * c-indent-level: 8 68.109 + * c-basic-offset: 8 68.110 + * tab-width: 8 68.111 + * End: 68.112 + */
88.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c Tue Aug 16 14:27:16 2005 -0800 88.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c Wed Aug 17 12:33:56 2005 -0800 88.3 @@ -250,7 +250,11 @@ static int vmalloc_fault(unsigned long a 88.4 happen within a race in page table update. In the later 88.5 case just flush. */ 88.6 88.7 - pgd = pgd_offset(current->mm ?: &init_mm, address); 88.8 + /* On Xen the line below does not always work. Needs investigating! */ 88.9 + /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/ 88.10 + pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id()); 88.11 + pgd += pgd_index(address); 88.12 + 88.13 pgd_ref = pgd_offset_k(address); 88.14 if (pgd_none(*pgd_ref)) 88.15 return -1;
119.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Tue Aug 16 14:27:16 2005 -0800 119.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Wed Aug 17 12:33:56 2005 -0800 119.3 @@ -1,6 +1,33 @@ 119.4 -/* Private include for xenbus communications. */ 119.5 +/* 119.6 + * Private include for xenbus communications. 119.7 + * 119.8 + * Copyright (C) 2005 Rusty Russell, IBM Corporation 119.9 + * 119.10 + * This file may be distributed separately from the Linux kernel, or 119.11 + * incorporated into other software packages, subject to the following license: 119.12 + * 119.13 + * Permission is hereby granted, free of charge, to any person obtaining a copy 119.14 + * of this source file (the "Software"), to deal in the Software without 119.15 + * restriction, including without limitation the rights to use, copy, modify, 119.16 + * merge, publish, distribute, sublicense, and/or sell copies of the Software, 119.17 + * and to permit persons to whom the Software is furnished to do so, subject to 119.18 + * the following conditions: 119.19 + * 119.20 + * The above copyright notice and this permission notice shall be included in 119.21 + * all copies or substantial portions of the Software. 119.22 + * 119.23 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 119.24 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 119.25 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 119.26 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 119.27 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 119.28 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 119.29 + * IN THE SOFTWARE. 119.30 + */ 119.31 + 119.32 #ifndef _XENBUS_COMMS_H 119.33 #define _XENBUS_COMMS_H 119.34 + 119.35 int xs_init(void); 119.36 int xb_init_comms(void); 119.37 void xb_suspend_comms(void);
120.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue Aug 16 14:27:16 2005 -0800 120.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Wed Aug 17 12:33:56 2005 -0800 120.3 @@ -309,6 +309,7 @@ void xenbus_suspend(void) 120.4 void xenbus_resume(void) 120.5 { 120.6 xb_init_comms(); 120.7 + reregister_xenbus_watches(); 120.8 up(&xenbus_lock); 120.9 } 120.10
121.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Tue Aug 16 14:27:16 2005 -0800 121.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Wed Aug 17 12:33:56 2005 -0800 121.3 @@ -496,6 +496,18 @@ void unregister_xenbus_watch(struct xenb 121.4 watch->node, err); 121.5 } 121.6 121.7 +/* Re-register callbacks to all watches. */ 121.8 +void reregister_xenbus_watches(void) 121.9 +{ 121.10 + struct xenbus_watch *watch; 121.11 + char token[sizeof(watch) * 2 + 1]; 121.12 + 121.13 + list_for_each_entry(watch, &watches, list) { 121.14 + sprintf(token, "%lX", (long)watch); 121.15 + xs_watch(watch->node, token); 121.16 + } 121.17 +} 121.18 + 121.19 static int watch_thread(void *unused) 121.20 { 121.21 for (;;) {
147.1 --- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Tue Aug 16 14:27:16 2005 -0800 147.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Wed Aug 17 12:33:56 2005 -0800 147.3 @@ -137,10 +137,8 @@ void xen_invlpg_mask(cpumask_t *mask, un 147.4 void xen_create_contiguous_region(unsigned long vstart, unsigned int order); 147.5 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); 147.6 147.7 -#ifdef CONFIG_XEN_PHYSDEV_ACCESS 147.8 /* Allocate a contiguous empty region of low memory. Return virtual start. */ 147.9 unsigned long allocate_empty_lowmem_region(unsigned long pages); 147.10 -#endif 147.11 147.12 #include <asm/hypercall.h> 147.13
148.1 --- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Tue Aug 16 14:27:16 2005 -0800 148.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Wed Aug 17 12:33:56 2005 -0800 148.3 @@ -1,5 +1,3 @@ 148.4 -#ifndef _ASM_XEN_XENBUS_H 148.5 -#define _ASM_XEN_XENBUS_H 148.6 /****************************************************************************** 148.7 * xenbus.h 148.8 * 148.9 @@ -28,6 +26,10 @@ 148.10 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 148.11 * IN THE SOFTWARE. 148.12 */ 148.13 + 148.14 +#ifndef _ASM_XEN_XENBUS_H 148.15 +#define _ASM_XEN_XENBUS_H 148.16 + 148.17 #include <linux/device.h> 148.18 #include <linux/notifier.h> 148.19 #include <asm/semaphore.h> 148.20 @@ -119,6 +121,7 @@ void unregister_xenstore_notifier(struct 148.21 148.22 int register_xenbus_watch(struct xenbus_watch *watch); 148.23 void unregister_xenbus_watch(struct xenbus_watch *watch); 148.24 +void reregister_xenbus_watches(void); 148.25 148.26 /* Called from xen core code. */ 148.27 void xenbus_suspend(void);
192.1 --- a/tools/examples/network-bridge Tue Aug 16 14:27:16 2005 -0800 192.2 +++ b/tools/examples/network-bridge Wed Aug 17 12:33:56 2005 -0800 192.3 @@ -189,7 +189,7 @@ op_start () { 192.4 fi 192.5 ip link set ${netdev} name p${netdev} 192.6 ip link set veth0 name ${netdev} 192.7 - ifconfig p${netdev} -arp down 192.8 + ifconfig p${netdev} 0.0.0.0 -arp down 192.9 ifconfig p${netdev} hw ether fe:ff:ff:ff:ff:ff 192.10 ifconfig ${netdev} hw ether ${mac} 192.11 add_to_bridge ${bridge} vif0.0
221.1 --- a/tools/misc/xend Tue Aug 16 14:27:16 2005 -0800 221.2 +++ b/tools/misc/xend Wed Aug 17 12:33:56 2005 -0800 221.3 @@ -117,11 +117,15 @@ def stop_xcs(): 221.4 return 221.5 221.6 def start_xenstored(): 221.7 - s,o = commands.getstatusoutput("/usr/sbin/xenstored --pid-file=/var/run/xenstore.pid"); 221.8 + XENSTORED_TRACE = os.getenv("XENSTORED_TRACE") 221.9 + cmd = "/usr/sbin/xenstored --pid-file=/var/run/xenstore.pid" 221.10 + if XENSTORED_TRACE: 221.11 + cmd += " -T /var/log/xenstored-trace.log" 221.12 + s,o = commands.getstatusoutput(cmd) 221.13 221.14 def start_consoled(): 221.15 if os.fork() == 0: 221.16 - os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled']); 221.17 + os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled']) 221.18 221.19 def main(): 221.20 try:
255.1 --- a/tools/python/xen/xend/XendDomain.py Tue Aug 16 14:27:16 2005 -0800 255.2 +++ b/tools/python/xen/xend/XendDomain.py Wed Aug 17 12:33:56 2005 -0800 255.3 @@ -320,8 +320,7 @@ class XendDomain: 255.4 @param vmconfig: vm configuration 255.5 """ 255.6 config = sxp.child_value(vmconfig, 'config') 255.7 - uuid = sxp.child_value(vmconfig, 'uuid') 255.8 - dominfo = XendDomainInfo.restore(self.dbmap, config, uuid=uuid) 255.9 + dominfo = XendDomainInfo.restore(self.dbmap, config) 255.10 return dominfo 255.11 255.12 def domain_restore(self, src, progress=False):
256.1 --- a/tools/python/xen/xend/XendDomainInfo.py Tue Aug 16 14:27:16 2005 -0800 256.2 +++ b/tools/python/xen/xend/XendDomainInfo.py Wed Aug 17 12:33:56 2005 -0800 256.3 @@ -195,19 +195,22 @@ class XendDomainInfo: 256.4 256.5 recreate = classmethod(recreate) 256.6 256.7 - def restore(cls, parentdb, config, uuid): 256.8 + def restore(cls, parentdb, config, uuid=None): 256.9 """Create a domain and a VM object to do a restore. 256.10 256.11 @param parentdb: parent db 256.12 @param config: domain configuration 256.13 @param uuid: uuid to use 256.14 """ 256.15 + if not uuid: 256.16 + uuid = getUuid() 256.17 db = parentdb.addChild(uuid) 256.18 vm = cls(db) 256.19 ssidref = int(sxp.child_value(config, 'ssidref')) 256.20 log.debug('restoring with ssidref='+str(ssidref)) 256.21 id = xc.domain_create(ssidref = ssidref) 256.22 vm.setdom(id) 256.23 + vm.clear_shutdown() 256.24 try: 256.25 vm.restore = True 256.26 vm.construct(config) 256.27 @@ -979,6 +982,11 @@ class XendDomainInfo: 256.28 if not reason in ['suspend']: 256.29 self.shutdown_pending = {'start':time.time(), 'reason':reason} 256.30 256.31 + def clear_shutdown(self): 256.32 + db = self.db.addChild("/control") 256.33 + db['shutdown'] = "" 256.34 + db.saveDB(save=True) 256.35 + 256.36 def send_sysrq(self, key=0): 256.37 db = self.db.addChild("/control"); 256.38 db['sysrq'] = '%c' % key;
290.1 --- a/tools/python/xen/xm/create.py Tue Aug 16 14:27:16 2005 -0800 290.2 +++ b/tools/python/xen/xm/create.py Wed Aug 17 12:33:56 2005 -0800 290.3 @@ -380,7 +380,6 @@ def randomMAC(): 290.4 290.5 @return: MAC address string 290.6 """ 290.7 - random.seed() 290.8 mac = [ 0xaa, 0x00, 0x00, 290.9 random.randint(0x00, 0x7f), 290.10 random.randint(0x00, 0xff), 290.11 @@ -689,6 +688,7 @@ def balloon_out(dom0_min_mem, opts): 290.12 del xc 290.13 290.14 def main(argv): 290.15 + random.seed() 290.16 opts = gopts 290.17 args = opts.parse(argv) 290.18 if opts.vals.help:
322.1 --- a/tools/xenstore/xenstored.h Tue Aug 16 14:27:16 2005 -0800 322.2 +++ b/tools/xenstore/xenstored.h Wed Aug 17 12:33:56 2005 -0800 322.3 @@ -1,21 +1,29 @@ 322.4 -/* 322.5 - Simple prototyle Xen Store Daemon providing simple tree-like database. 322.6 - Copyright (C) 2005 Rusty Russell IBM Corporation 322.7 - 322.8 - This program is free software; you can redistribute it and/or modify 322.9 - it under the terms of the GNU General Public License as published by 322.10 - the Free Software Foundation; either version 2 of the License, or 322.11 - (at your option) any later version. 322.12 +/* 322.13 + * Simple prototyle Xen Store Daemon providing simple tree-like database. 322.14 + * Copyright (C) 2005 Rusty Russell IBM Corporation 322.15 + * 322.16 + * This file may be distributed separately from the Linux kernel, or 322.17 + * incorporated into other software packages, subject to the following license: 322.18 + * 322.19 + * Permission is hereby granted, free of charge, to any person obtaining a copy 322.20 + * of this source file (the "Software"), to deal in the Software without 322.21 + * restriction, including without limitation the rights to use, copy, modify, 322.22 + * merge, publish, distribute, sublicense, and/or sell copies of the Software, 322.23 + * and to permit persons to whom the Software is furnished to do so, subject to 322.24 + * the following conditions: 322.25 + * 322.26 + * The above copyright notice and this permission notice shall be included in 322.27 + * all copies or substantial portions of the Software. 322.28 + * 322.29 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 322.30 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 322.31 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 322.32 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 322.33 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 322.34 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 322.35 + * IN THE SOFTWARE. 322.36 + */ 322.37 322.38 - This program is distributed in the hope that it will be useful, 322.39 - but WITHOUT ANY WARRANTY; without even the implied warranty of 322.40 - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 322.41 - GNU General Public License for more details. 322.42 - 322.43 - You should have received a copy of the GNU General Public License 322.44 - along with this program; if not, write to the Free Software 322.45 - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 322.46 -*/ 322.47 #ifndef _XENSTORED_H 322.48 #define _XENSTORED_H 322.49
370.1 --- a/xen/arch/ia64/xenmisc.c Tue Aug 16 14:27:16 2005 -0800 370.2 +++ b/xen/arch/ia64/xenmisc.c Wed Aug 17 12:33:56 2005 -0800 370.3 @@ -280,7 +280,6 @@ void cs01foo(void) {} 370.4 370.5 unsigned long context_switch_count = 0; 370.6 370.7 -// context_switch 370.8 void context_switch(struct vcpu *prev, struct vcpu *next) 370.9 { 370.10 //printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); 370.11 @@ -290,22 +289,14 @@ void context_switch(struct vcpu *prev, s 370.12 //if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo(); 370.13 //printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id); 370.14 #ifdef CONFIG_VTI 370.15 - unsigned long psr; 370.16 - /* Interrupt is enabled after next task is chosen. 370.17 - * So we have to disable it for stack switch. 370.18 - */ 370.19 - local_irq_save(psr); 370.20 vtm_domain_out(prev); 370.21 - /* Housekeeping for prev domain */ 370.22 -#endif // CONFIG_VTI 370.23 - 370.24 +#endif 370.25 context_switch_count++; 370.26 switch_to(prev,next,prev); 370.27 #ifdef CONFIG_VTI 370.28 - /* Post-setup for new domain */ 370.29 vtm_domain_in(current); 370.30 - local_irq_restore(psr); 370.31 -#endif // CONFIG_VTI 370.32 +#endif 370.33 + 370.34 // leave this debug for now: it acts as a heartbeat when more than 370.35 // one domain is active 370.36 { 370.37 @@ -315,25 +306,27 @@ int id = ((struct vcpu *)current)->domai 370.38 if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; } 370.39 if (!i--) { printk("+",id); i = 1000000; } 370.40 } 370.41 - clear_bit(_VCPUF_running, &prev->vcpu_flags); 370.42 - //if (!is_idle_task(next->domain) ) 370.43 - //send_guest_virq(next, VIRQ_TIMER); 370.44 + 370.45 #ifdef CONFIG_VTI 370.46 if (VMX_DOMAIN(current)) 370.47 vmx_load_all_rr(current); 370.48 - return; 370.49 -#else // CONFIG_VTI 370.50 +#else 370.51 if (!is_idle_task(current->domain)) { 370.52 load_region_regs(current); 370.53 if (vcpu_timer_expired(current)) vcpu_pend_timer(current); 370.54 } 370.55 if (vcpu_timer_expired(current)) vcpu_pend_timer(current); 370.56 -#endif // CONFIG_VTI 370.57 +#endif 370.58 +} 370.59 + 370.60 +void context_switch_finalise(struct vcpu *next) 370.61 +{ 370.62 + /* nothing to do */ 370.63 } 370.64 370.65 void continue_running(struct vcpu *same) 370.66 { 370.67 - /* nothing to do */ 370.68 + /* nothing to do */ 370.69 } 370.70 370.71 void panic_domain(struct pt_regs *regs, const char *fmt, ...)
379.1 --- a/xen/arch/x86/domain.c Tue Aug 16 14:27:16 2005 -0800 379.2 +++ b/xen/arch/x86/domain.c Wed Aug 17 12:33:56 2005 -0800 379.3 @@ -48,6 +48,8 @@ boolean_param("noreboot", opt_noreboot); 379.4 379.5 struct percpu_ctxt { 379.6 struct vcpu *curr_vcpu; 379.7 + unsigned int context_not_finalised; 379.8 + unsigned int dirty_segment_mask; 379.9 } __cacheline_aligned; 379.10 static struct percpu_ctxt percpu_ctxt[NR_CPUS]; 379.11 379.12 @@ -541,51 +543,59 @@ void toggle_guest_mode(struct vcpu *v) 379.13 __r; }) 379.14 379.15 #if CONFIG_VMX 379.16 -#define load_msrs(_p, _n) if (vmx_switch_on) vmx_load_msrs((_p), (_n)) 379.17 +#define load_msrs(n) if (vmx_switch_on) vmx_load_msrs(n) 379.18 #else 379.19 -#define load_msrs(_p, _n) ((void)0) 379.20 +#define load_msrs(n) ((void)0) 379.21 #endif 379.22 379.23 -static void load_segments(struct vcpu *p, struct vcpu *n) 379.24 +/* 379.25 + * save_segments() writes a mask of segments which are dirty (non-zero), 379.26 + * allowing load_segments() to avoid some expensive segment loads and 379.27 + * MSR writes. 379.28 + */ 379.29 +#define DIRTY_DS 0x01 379.30 +#define DIRTY_ES 0x02 379.31 +#define DIRTY_FS 0x04 379.32 +#define DIRTY_GS 0x08 379.33 +#define DIRTY_FS_BASE 0x10 379.34 +#define DIRTY_GS_BASE_USER 0x20 379.35 + 379.36 +static void load_segments(struct vcpu *n) 379.37 { 379.38 - struct vcpu_guest_context *pctxt = &p->arch.guest_context; 379.39 struct vcpu_guest_context *nctxt = &n->arch.guest_context; 379.40 int all_segs_okay = 1; 379.41 + unsigned int dirty_segment_mask, cpu = smp_processor_id(); 379.42 + 379.43 + /* Load and clear the dirty segment mask. */ 379.44 + dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask; 379.45 + percpu_ctxt[cpu].dirty_segment_mask = 0; 379.46 379.47 /* Either selector != 0 ==> reload. */ 379.48 - if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) ) 379.49 + if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) ) 379.50 all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds); 379.51 379.52 /* Either selector != 0 ==> reload. */ 379.53 - if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) ) 379.54 + if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) ) 379.55 all_segs_okay &= loadsegment(es, nctxt->user_regs.es); 379.56 379.57 /* 379.58 * Either selector != 0 ==> reload. 379.59 * Also reload to reset FS_BASE if it was non-zero. 379.60 */ 379.61 - if ( unlikely(pctxt->user_regs.fs | 379.62 - pctxt->fs_base | 379.63 + if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) | 379.64 nctxt->user_regs.fs) ) 379.65 - { 379.66 all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs); 379.67 - if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */ 379.68 - pctxt->fs_base = 0; 379.69 - } 379.70 379.71 /* 379.72 * Either selector != 0 ==> reload. 379.73 * Also reload to reset GS_BASE if it was non-zero. 379.74 */ 379.75 - if ( unlikely(pctxt->user_regs.gs | 379.76 - pctxt->gs_base_user | 379.77 + if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) | 379.78 nctxt->user_regs.gs) ) 379.79 { 379.80 /* Reset GS_BASE with user %gs? */ 379.81 - if ( pctxt->user_regs.gs || !nctxt->gs_base_user ) 379.82 + if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user ) 379.83 all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs); 379.84 - if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */ 379.85 - pctxt->gs_base_user = 0; 379.86 } 379.87 379.88 /* This can only be non-zero if selector is NULL. */ 379.89 @@ -650,7 +660,9 @@ static void load_segments(struct vcpu *p 379.90 379.91 static void save_segments(struct vcpu *v) 379.92 { 379.93 - struct cpu_user_regs *regs = &v->arch.guest_context.user_regs; 379.94 + struct vcpu_guest_context *ctxt = &v->arch.guest_context; 379.95 + struct cpu_user_regs *regs = &ctxt->user_regs; 379.96 + unsigned int dirty_segment_mask = 0; 379.97 379.98 if ( VMX_DOMAIN(v) ) 379.99 rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs); 379.100 @@ -659,18 +671,34 @@ static void save_segments(struct vcpu *v 379.101 __asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) ); 379.102 __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) ); 379.103 __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) ); 379.104 -} 379.105 + 379.106 + if ( regs->ds ) 379.107 + dirty_segment_mask |= DIRTY_DS; 379.108 + 379.109 + if ( regs->es ) 379.110 + dirty_segment_mask |= DIRTY_ES; 379.111 379.112 -static void clear_segments(void) 379.113 -{ 379.114 - __asm__ __volatile__ ( 379.115 - " movl %0,%%ds; " 379.116 - " movl %0,%%es; " 379.117 - " movl %0,%%fs; " 379.118 - " movl %0,%%gs; " 379.119 - ""safe_swapgs" " 379.120 - " movl %0,%%gs" 379.121 - : : "r" (0) ); 379.122 + if ( regs->fs ) 379.123 + { 379.124 + dirty_segment_mask |= DIRTY_FS; 379.125 + ctxt->fs_base = 0; /* != 0 selector kills fs_base */ 379.126 + } 379.127 + else if ( ctxt->fs_base ) 379.128 + { 379.129 + dirty_segment_mask |= DIRTY_FS_BASE; 379.130 + } 379.131 + 379.132 + if ( regs->gs ) 379.133 + { 379.134 + dirty_segment_mask |= DIRTY_GS; 379.135 + ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */ 379.136 + } 379.137 + else if ( ctxt->gs_base_user ) 379.138 + { 379.139 + dirty_segment_mask |= DIRTY_GS_BASE_USER; 379.140 + } 379.141 + 379.142 + percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask; 379.143 } 379.144 379.145 long do_switch_to_user(void) 379.146 @@ -706,10 +734,9 @@ long do_switch_to_user(void) 379.147 379.148 #elif defined(__i386__) 379.149 379.150 -#define load_segments(_p, _n) ((void)0) 379.151 -#define load_msrs(_p, _n) ((void)0) 379.152 -#define save_segments(_p) ((void)0) 379.153 -#define clear_segments() ((void)0) 379.154 +#define load_segments(n) ((void)0) 379.155 +#define load_msrs(n) ((void)0) 379.156 +#define save_segments(p) ((void)0) 379.157 379.158 static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu) 379.159 { 379.160 @@ -726,9 +753,9 @@ static inline void switch_kernel_stack(s 379.161 static void __context_switch(void) 379.162 { 379.163 struct cpu_user_regs *stack_regs = guest_cpu_user_regs(); 379.164 - unsigned int cpu = smp_processor_id(); 379.165 - struct vcpu *p = percpu_ctxt[cpu].curr_vcpu; 379.166 - struct vcpu *n = current; 379.167 + unsigned int cpu = smp_processor_id(); 379.168 + struct vcpu *p = percpu_ctxt[cpu].curr_vcpu; 379.169 + struct vcpu *n = current; 379.170 379.171 if ( !is_idle_task(p->domain) ) 379.172 { 379.173 @@ -786,23 +813,31 @@ static void __context_switch(void) 379.174 379.175 void context_switch(struct vcpu *prev, struct vcpu *next) 379.176 { 379.177 - struct vcpu *realprev; 379.178 + unsigned int cpu = smp_processor_id(); 379.179 379.180 - local_irq_disable(); 379.181 + ASSERT(!local_irq_is_enabled()); 379.182 379.183 set_current(next); 379.184 379.185 - if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) || 379.186 - is_idle_task(next->domain) ) 379.187 - { 379.188 - local_irq_enable(); 379.189 - } 379.190 - else 379.191 + if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) ) 379.192 { 379.193 __context_switch(); 379.194 + percpu_ctxt[cpu].context_not_finalised = 1; 379.195 + } 379.196 +} 379.197 379.198 - local_irq_enable(); 379.199 - 379.200 +void context_switch_finalise(struct vcpu *next) 379.201 +{ 379.202 + unsigned int cpu = smp_processor_id(); 379.203 + 379.204 + ASSERT(local_irq_is_enabled()); 379.205 + 379.206 + if ( percpu_ctxt[cpu].context_not_finalised ) 379.207 + { 379.208 + percpu_ctxt[cpu].context_not_finalised = 0; 379.209 + 379.210 + BUG_ON(percpu_ctxt[cpu].curr_vcpu != next); 379.211 + 379.212 if ( VMX_DOMAIN(next) ) 379.213 { 379.214 vmx_restore_msrs(next); 379.215 @@ -810,19 +845,11 @@ void context_switch(struct vcpu *prev, s 379.216 else 379.217 { 379.218 load_LDT(next); 379.219 - load_segments(realprev, next); 379.220 - load_msrs(realprev, next); 379.221 + load_segments(next); 379.222 + load_msrs(next); 379.223 } 379.224 } 379.225 379.226 - /* 379.227 - * We do this late on because it doesn't need to be protected by the 379.228 - * schedule_lock, and because we want this to be the very last use of 379.229 - * 'prev' (after this point, a dying domain's info structure may be freed 379.230 - * without warning). 379.231 - */ 379.232 - clear_bit(_VCPUF_running, &prev->vcpu_flags); 379.233 - 379.234 schedule_tail(next); 379.235 BUG(); 379.236 } 379.237 @@ -835,12 +862,19 @@ void continue_running(struct vcpu *same) 379.238 379.239 int __sync_lazy_execstate(void) 379.240 { 379.241 - if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current ) 379.242 - return 0; 379.243 - __context_switch(); 379.244 - load_LDT(current); 379.245 - clear_segments(); 379.246 - return 1; 379.247 + unsigned long flags; 379.248 + int switch_required; 379.249 + 379.250 + local_irq_save(flags); 379.251 + 379.252 + switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current); 379.253 + 379.254 + if ( switch_required ) 379.255 + __context_switch(); 379.256 + 379.257 + local_irq_restore(flags); 379.258 + 379.259 + return switch_required; 379.260 } 379.261 379.262 void sync_lazy_execstate_cpu(unsigned int cpu)
393.1 --- a/xen/arch/x86/vmx.c Tue Aug 16 14:27:16 2005 -0800 393.2 +++ b/xen/arch/x86/vmx.c Wed Aug 17 12:33:56 2005 -0800 393.3 @@ -65,7 +65,7 @@ static u32 msr_data_index[VMX_MSR_COUNT] 393.4 * are not modified once set for generic domains, we don't save them, 393.5 * but simply reset them to the values set at percpu_traps_init(). 393.6 */ 393.7 -void vmx_load_msrs(struct vcpu *p, struct vcpu *n) 393.8 +void vmx_load_msrs(struct vcpu *n) 393.9 { 393.10 struct msr_state *host_state; 393.11 host_state = &percpu_msr[smp_processor_id()];
412.1 --- a/xen/common/schedule.c Tue Aug 16 14:27:16 2005 -0800 412.2 +++ b/xen/common/schedule.c Wed Aug 17 12:33:56 2005 -0800 412.3 @@ -474,13 +474,14 @@ static void __enter_scheduler(void) 412.4 412.5 set_ac_timer(&schedule_data[cpu].s_timer, now + r_time); 412.6 412.7 - /* Must be protected by the schedule_lock! */ 412.8 - set_bit(_VCPUF_running, &next->vcpu_flags); 412.9 + if ( unlikely(prev == next) ) 412.10 + { 412.11 + spin_unlock_irq(&schedule_data[cpu].schedule_lock); 412.12 + return continue_running(prev); 412.13 + } 412.14 412.15 - spin_unlock_irq(&schedule_data[cpu].schedule_lock); 412.16 - 412.17 - if ( unlikely(prev == next) ) 412.18 - return continue_running(prev); 412.19 + clear_bit(_VCPUF_running, &prev->vcpu_flags); 412.20 + set_bit(_VCPUF_running, &next->vcpu_flags); 412.21 412.22 perfc_incrc(sched_ctx); 412.23 412.24 @@ -517,6 +518,10 @@ static void __enter_scheduler(void) 412.25 next->domain->domain_id, next->vcpu_id); 412.26 412.27 context_switch(prev, next); 412.28 + 412.29 + spin_unlock_irq(&schedule_data[cpu].schedule_lock); 412.30 + 412.31 + context_switch_finalise(next); 412.32 } 412.33 412.34 /* No locking needed -- pointer comparison is safe :-) */
434.1 --- a/xen/include/asm-x86/e820.h Tue Aug 16 14:27:16 2005 -0800 434.2 +++ b/xen/include/asm-x86/e820.h Wed Aug 17 12:33:56 2005 -0800 434.3 @@ -3,7 +3,7 @@ 434.4 434.5 #include <asm/page.h> 434.6 434.7 -#define E820MAX 32 434.8 +#define E820MAX 128 434.9 434.10 #define E820_RAM 1 434.11 #define E820_RESERVED 2
455.1 --- a/xen/include/asm-x86/vmx_vmcs.h Tue Aug 16 14:27:16 2005 -0800 455.2 +++ b/xen/include/asm-x86/vmx_vmcs.h Wed Aug 17 12:33:56 2005 -0800 455.3 @@ -28,10 +28,10 @@ extern int start_vmx(void); 455.4 extern void stop_vmx(void); 455.5 455.6 #if defined (__x86_64__) 455.7 -extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n); 455.8 +extern void vmx_load_msrs(struct vcpu *n); 455.9 void vmx_restore_msrs(struct vcpu *d); 455.10 #else 455.11 -#define vmx_load_msrs(_p, _n) ((void)0) 455.12 +#define vmx_load_msrs(_n) ((void)0) 455.13 #define vmx_restore_msrs(_v) ((void)0) 455.14 #endif 455.15
472.1 --- a/xen/include/xen/sched.h Tue Aug 16 14:27:16 2005 -0800 472.2 +++ b/xen/include/xen/sched.h Wed Aug 17 12:33:56 2005 -0800 472.3 @@ -258,12 +258,32 @@ extern void sync_lazy_execstate_mask(cpu 472.4 extern void sync_lazy_execstate_all(void); 472.5 extern int __sync_lazy_execstate(void); 472.6 472.7 -/* Called by the scheduler to switch to another vcpu. */ 472.8 +/* 472.9 + * Called by the scheduler to switch to another VCPU. On entry, although 472.10 + * VCPUF_running is no longer asserted for @prev, its context is still running 472.11 + * on the local CPU and is not committed to memory. The local scheduler lock 472.12 + * is therefore still held, and interrupts are disabled, because the local CPU 472.13 + * is in an inconsistent state. 472.14 + * 472.15 + * The callee must ensure that the local CPU is no longer running in @prev's 472.16 + * context, and that the context is saved to memory, before returning. 472.17 + * Alternatively, if implementing lazy context switching, it suffices to ensure 472.18 + * that invoking __sync_lazy_execstate() will switch and commit @prev's state. 472.19 + */ 472.20 extern void context_switch( 472.21 struct vcpu *prev, 472.22 struct vcpu *next); 472.23 472.24 -/* Called by the scheduler to continue running the current vcpu. */ 472.25 +/* 472.26 + * On some architectures (notably x86) it is not possible to entirely load 472.27 + * @next's context with interrupts disabled. These may implement a function to 472.28 + * finalise loading the new context after interrupts are re-enabled. This 472.29 + * function is not given @prev and is not permitted to access it. 472.30 + */ 472.31 +extern void context_switch_finalise( 472.32 + struct vcpu *next); 472.33 + 472.34 +/* Called by the scheduler to continue running the current VCPU. */ 472.35 extern void continue_running( 472.36 struct vcpu *same); 472.37