direct-io.hg
changeset 6506:c589ca6d292b
Merge.
line diff
1.1 --- a/.hgignore Thu Aug 11 10:44:59 2005 -0800 1.2 +++ b/.hgignore Thu Aug 11 12:38:44 2005 -0800 1.3 @@ -116,7 +116,6 @@ 1.4 ^tools/ioemu/target-.*/Makefile$ 1.5 ^tools/ioemu/target-.*/config\..*$ 1.6 ^tools/ioemu/target-.*/qemu-dm$ 1.7 -^tools/ioemu/target-.*/qemu-vgaram-bin$ 1.8 ^tools/libxc/xen/.*$ 1.9 ^tools/misc/cpuperf/cpuperf-perfcntr$ 1.10 ^tools/misc/cpuperf/cpuperf-xen$
2.1 --- a/Makefile Thu Aug 11 10:44:59 2005 -0800 2.2 +++ b/Makefile Thu Aug 11 12:38:44 2005 -0800 2.3 @@ -101,11 +101,6 @@ mrproper: clean 2.4 for i in $(ALLKERNELS) ; do $(MAKE) $$i-delete ; done 2.5 for i in $(ALLSPARSETREES) ; do $(MAKE) $$i-mrproper ; done 2.6 2.7 -install-twisted: 2.8 - wget http://www.twistedmatrix.com/products/get-current.epy 2.9 - tar -zxf Twisted-*.tar.gz 2.10 - cd Twisted-* && python setup.py install 2.11 - 2.12 install-logging: LOGGING=logging-0.4.9.2 2.13 install-logging: 2.14 [ -f $(LOGGING).tar.gz ] || wget http://www.red-dove.com/$(LOGGING).tar.gz 2.15 @@ -149,7 +144,6 @@ help: 2.16 @echo ' kclean - clean guest kernel build trees' 2.17 @echo '' 2.18 @echo 'Dependency installation targets:' 2.19 - @echo ' install-twisted - install the Twisted Matrix Framework' 2.20 @echo ' install-logging - install the Python Logging package' 2.21 @echo ' install-iptables - install iptables tools' 2.22 @echo ''
5.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Thu Aug 11 10:44:59 2005 -0800 5.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Thu Aug 11 12:38:44 2005 -0800 5.3 @@ -669,7 +669,7 @@ CONFIG_EXT3_FS_SECURITY=y 5.4 CONFIG_JBD=m 5.5 # CONFIG_JBD_DEBUG is not set 5.6 CONFIG_FS_MBCACHE=y 5.7 -CONFIG_REISERFS_FS=m 5.8 +CONFIG_REISERFS_FS=y 5.9 # CONFIG_REISERFS_CHECK is not set 5.10 CONFIG_REISERFS_PROC_INFO=y 5.11 CONFIG_REISERFS_FS_XATTR=y
7.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Thu Aug 11 10:44:59 2005 -0800 7.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c Thu Aug 11 12:38:44 2005 -0800 7.3 @@ -227,12 +227,38 @@ static unsigned long get_usec_offset(str 7.4 return scale_delta(delta, shadow->tsc_to_usec_mul, shadow->tsc_shift); 7.5 } 7.6 7.7 +static void __update_wallclock(time_t sec, long nsec) 7.8 +{ 7.9 + long wtm_nsec, xtime_nsec; 7.10 + time_t wtm_sec, xtime_sec; 7.11 + u64 tmp, wc_nsec; 7.12 + 7.13 + /* Adjust wall-clock time base based on wall_jiffies ticks. */ 7.14 + wc_nsec = processed_system_time; 7.15 + wc_nsec += (u64)sec * 1000000000ULL; 7.16 + wc_nsec += (u64)nsec; 7.17 + wc_nsec -= (jiffies - wall_jiffies) * (u64)(NSEC_PER_SEC / HZ); 7.18 + 7.19 + /* Split wallclock base into seconds and nanoseconds. */ 7.20 + tmp = wc_nsec; 7.21 + xtime_nsec = do_div(tmp, 1000000000); 7.22 + xtime_sec = (time_t)tmp; 7.23 + 7.24 + wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec); 7.25 + wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec); 7.26 + 7.27 + set_normalized_timespec(&xtime, xtime_sec, xtime_nsec); 7.28 + set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); 7.29 + 7.30 + time_adjust = 0; /* stop active adjtime() */ 7.31 + time_status |= STA_UNSYNC; 7.32 + time_maxerror = NTP_PHASE_LIMIT; 7.33 + time_esterror = NTP_PHASE_LIMIT; 7.34 +} 7.35 + 7.36 static void update_wallclock(void) 7.37 { 7.38 shared_info_t *s = HYPERVISOR_shared_info; 7.39 - long wtm_nsec, xtime_nsec; 7.40 - time_t wtm_sec, xtime_sec; 7.41 - u64 tmp, nsec; 7.42 7.43 do { 7.44 shadow_tv_version = s->wc_version; 7.45 @@ -243,25 +269,8 @@ static void update_wallclock(void) 7.46 } 7.47 while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version)); 7.48 7.49 - if (independent_wallclock) 7.50 - return; 7.51 - 7.52 - /* Adjust wall-clock time base based on wall_jiffies ticks. */ 7.53 - nsec = processed_system_time; 7.54 - nsec += (u64)shadow_tv.tv_sec * 1000000000ULL; 7.55 - nsec += (u64)shadow_tv.tv_nsec; 7.56 - nsec -= (jiffies - wall_jiffies) * (u64)(NSEC_PER_SEC / HZ); 7.57 - 7.58 - /* Split wallclock base into seconds and nanoseconds. */ 7.59 - tmp = nsec; 7.60 - xtime_nsec = do_div(tmp, 1000000000); 7.61 - xtime_sec = (time_t)tmp; 7.62 - 7.63 - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec); 7.64 - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec); 7.65 - 7.66 - set_normalized_timespec(&xtime, xtime_sec, xtime_nsec); 7.67 - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); 7.68 + if (!independent_wallclock) 7.69 + __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec); 7.70 } 7.71 7.72 /* 7.73 @@ -408,19 +417,15 @@ EXPORT_SYMBOL(do_gettimeofday); 7.74 7.75 int do_settimeofday(struct timespec *tv) 7.76 { 7.77 - time_t wtm_sec, sec = tv->tv_sec; 7.78 - long wtm_nsec; 7.79 + time_t sec; 7.80 s64 nsec; 7.81 - struct timespec xentime; 7.82 unsigned int cpu; 7.83 struct shadow_time_info *shadow; 7.84 + dom0_op_t op; 7.85 7.86 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) 7.87 return -EINVAL; 7.88 7.89 - if (!independent_wallclock && !(xen_start_info.flags & SIF_INITDOMAIN)) 7.90 - return 0; /* Silent failure? */ 7.91 - 7.92 cpu = get_cpu(); 7.93 shadow = &per_cpu(shadow_time, cpu); 7.94 7.95 @@ -431,51 +436,30 @@ int do_settimeofday(struct timespec *tv) 7.96 * overflows. If that were to happen then our shadow time values would 7.97 * be stale, so we can retry with fresh ones. 7.98 */ 7.99 - again: 7.100 - nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow); 7.101 - if (unlikely(!time_values_up_to_date(cpu))) { 7.102 + for ( ; ; ) { 7.103 + nsec = (s64)tv->tv_nsec - (s64)get_nsec_offset(shadow); 7.104 + if (time_values_up_to_date(cpu)) 7.105 + break; 7.106 get_time_values_from_xen(); 7.107 - goto again; 7.108 + } 7.109 + sec = tv->tv_sec; 7.110 + __normalize_time(&sec, &nsec); 7.111 + 7.112 + if ((xen_start_info.flags & SIF_INITDOMAIN) && 7.113 + !independent_wallclock) { 7.114 + op.cmd = DOM0_SETTIME; 7.115 + op.u.settime.secs = sec; 7.116 + op.u.settime.nsecs = nsec; 7.117 + op.u.settime.system_time = shadow->system_timestamp; 7.118 + HYPERVISOR_dom0_op(&op); 7.119 + update_wallclock(); 7.120 + } else if (independent_wallclock) { 7.121 + nsec -= shadow->system_timestamp; 7.122 + __normalize_time(&sec, &nsec); 7.123 + __update_wallclock(sec, nsec); 7.124 } 7.125 7.126 - __normalize_time(&sec, &nsec); 7.127 - set_normalized_timespec(&xentime, sec, nsec); 7.128 - 7.129 - /* 7.130 - * This is revolting. We need to set "xtime" correctly. However, the 7.131 - * value in this location is the value at the most recent update of 7.132 - * wall time. Discover what correction gettimeofday() would have 7.133 - * made, and then undo it! 7.134 - */ 7.135 - nsec -= (jiffies - wall_jiffies) * TICK_NSEC; 7.136 - 7.137 - nsec -= (shadow->system_timestamp - processed_system_time); 7.138 - 7.139 - __normalize_time(&sec, &nsec); 7.140 - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); 7.141 - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); 7.142 - 7.143 - set_normalized_timespec(&xtime, sec, nsec); 7.144 - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); 7.145 - 7.146 - time_adjust = 0; /* stop active adjtime() */ 7.147 - time_status |= STA_UNSYNC; 7.148 - time_maxerror = NTP_PHASE_LIMIT; 7.149 - time_esterror = NTP_PHASE_LIMIT; 7.150 - 7.151 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST 7.152 - if ((xen_start_info.flags & SIF_INITDOMAIN) && 7.153 - !independent_wallclock) { 7.154 - dom0_op_t op; 7.155 - op.cmd = DOM0_SETTIME; 7.156 - op.u.settime.secs = xentime.tv_sec; 7.157 - op.u.settime.nsecs = xentime.tv_nsec; 7.158 - op.u.settime.system_time = shadow->system_timestamp; 7.159 - write_sequnlock_irq(&xtime_lock); 7.160 - HYPERVISOR_dom0_op(&op); 7.161 - } else 7.162 -#endif 7.163 - write_sequnlock_irq(&xtime_lock); 7.164 + write_sequnlock_irq(&xtime_lock); 7.165 7.166 put_cpu(); 7.167 7.168 @@ -492,6 +476,9 @@ static int set_rtc_mmss(unsigned long no 7.169 7.170 WARN_ON(irqs_disabled()); 7.171 7.172 + if (!(xen_start_info.flags & SIF_INITDOMAIN)) 7.173 + return 0; 7.174 + 7.175 /* gets recalled with irq locally disabled */ 7.176 spin_lock_irq(&rtc_lock); 7.177 if (efi_enabled) 7.178 @@ -603,8 +590,10 @@ static inline void do_timer_interrupt(in 7.179 profile_tick(CPU_PROFILING, regs); 7.180 } 7.181 7.182 - if (unlikely(shadow_tv_version != HYPERVISOR_shared_info->wc_version)) 7.183 + if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) { 7.184 update_wallclock(); 7.185 + clock_was_set(); 7.186 + } 7.187 } 7.188 7.189 /*
8.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Thu Aug 11 10:44:59 2005 -0800 8.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c Thu Aug 11 12:38:44 2005 -0800 8.3 @@ -25,6 +25,7 @@ 8.4 #include <asm/mmu_context.h> 8.5 8.6 #include <asm-xen/foreign_page.h> 8.7 +#include <asm-xen/hypervisor.h> 8.8 8.9 void show_mem(void) 8.10 { 8.11 @@ -274,6 +275,11 @@ void pgd_ctor(void *pgd, kmem_cache_t *c 8.12 { 8.13 unsigned long flags; 8.14 8.15 +#ifdef CONFIG_X86_PAE 8.16 + /* this gives us a page below 4GB */ 8.17 + xen_contig_memory((unsigned long)pgd, 0); 8.18 +#endif 8.19 + 8.20 if (!HAVE_SHARED_KERNEL_PMD) 8.21 spin_lock_irqsave(&pgd_lock, flags); 8.22
9.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Thu Aug 11 10:44:59 2005 -0800 9.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig Thu Aug 11 12:38:44 2005 -0800 9.3 @@ -126,6 +126,10 @@ config X86_IO_APIC 9.4 bool 9.5 default XEN_PRIVILEGED_GUEST 9.6 9.7 +config X86_XEN_GENAPIC 9.8 + bool 9.9 + default XEN_PRIVILEGED_GUEST || SMP 9.10 + 9.11 config X86_LOCAL_APIC 9.12 bool 9.13 default XEN_PRIVILEGED_GUEST
10.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Thu Aug 11 10:44:59 2005 -0800 10.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile Thu Aug 11 12:38:44 2005 -0800 10.3 @@ -25,11 +25,12 @@ obj-$(CONFIG_ACPI_BOOT) += acpi/ 10.4 c-obj-$(CONFIG_X86_MSR) += msr.o 10.5 obj-$(CONFIG_MICROCODE) += microcode.o 10.6 obj-$(CONFIG_X86_CPUID) += cpuid.o 10.7 -#obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o 10.8 +obj-$(CONFIG_SMP) += smp.o smpboot.o 10.9 obj-$(CONFIG_X86_LOCAL_APIC) += apic.o 10.10 c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o 10.11 obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o 10.12 -c-obj-$(CONFIG_X86_IO_APIC) += genapic.o genapic_cluster.o genapic_flat.o 10.13 +obj-$(CONFIG_X86_XEN_GENAPIC) += genapic.o genapic_xen.o 10.14 +c-obj-$(CONFIG_X86_IO_APIC) += genapic_cluster.o genapic_flat.o 10.15 #obj-$(CONFIG_PM) += suspend.o 10.16 #obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o 10.17 #obj-$(CONFIG_CPU_FREQ) += cpufreq/
11.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c Thu Aug 11 10:44:59 2005 -0800 11.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/apic.c Thu Aug 11 12:38:44 2005 -0800 11.3 @@ -48,7 +48,7 @@ void smp_local_timer_interrupt(struct pt 11.4 int cpu = smp_processor_id(); 11.5 11.6 profile_tick(CPU_PROFILING, regs); 11.7 -#if 0 11.8 +#ifndef CONFIG_XEN 11.9 if (--per_cpu(prof_counter, cpu) <= 0) { 11.10 /* 11.11 * The multiplier may have changed since the last time we got
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 12.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c Thu Aug 11 12:38:44 2005 -0800 12.3 @@ -0,0 +1,123 @@ 12.4 +/* 12.5 + * Copyright 2004 James Cleverdon, IBM. 12.6 + * Subject to the GNU Public License, v.2 12.7 + * 12.8 + * Generic APIC sub-arch probe layer. 12.9 + * 12.10 + * Hacked for x86-64 by James Cleverdon from i386 architecture code by 12.11 + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and 12.12 + * James Cleverdon. 12.13 + */ 12.14 +#include <linux/config.h> 12.15 +#include <linux/threads.h> 12.16 +#include <linux/cpumask.h> 12.17 +#include <linux/string.h> 12.18 +#include <linux/kernel.h> 12.19 +#include <linux/ctype.h> 12.20 +#include <linux/init.h> 12.21 +#include <linux/module.h> 12.22 + 12.23 +#include <asm/smp.h> 12.24 +#include <asm/ipi.h> 12.25 + 12.26 +#if defined(CONFIG_ACPI_BUS) 12.27 +#include <acpi/acpi_bus.h> 12.28 +#endif 12.29 + 12.30 +/* which logical CPU number maps to which CPU (physical APIC ID) */ 12.31 +u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; 12.32 +EXPORT_SYMBOL(x86_cpu_to_apicid); 12.33 +u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; 12.34 + 12.35 +extern struct genapic apic_cluster; 12.36 +extern struct genapic apic_flat; 12.37 + 12.38 +#ifndef CONFIG_XEN 12.39 +struct genapic *genapic = &apic_flat; 12.40 +#else 12.41 +extern struct genapic apic_xen; 12.42 +struct genapic *genapic = &apic_xen; 12.43 +#endif 12.44 + 12.45 + 12.46 +/* 12.47 + * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 12.48 + */ 12.49 +void __init clustered_apic_check(void) 12.50 +{ 12.51 +#ifndef CONFIG_XEN 12.52 + long i; 12.53 + u8 clusters, max_cluster; 12.54 + u8 id; 12.55 + u8 cluster_cnt[NUM_APIC_CLUSTERS]; 12.56 + 12.57 + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 12.58 + /* AMD always uses flat mode right now */ 12.59 + genapic = &apic_flat; 12.60 + goto print; 12.61 + } 12.62 + 12.63 +#if defined(CONFIG_ACPI_BUS) 12.64 + /* 12.65 + * Some x86_64 machines use physical APIC mode regardless of how many 12.66 + * procs/clusters are present (x86_64 ES7000 is an example). 12.67 + */ 12.68 + if (acpi_fadt.revision > FADT2_REVISION_ID) 12.69 + if (acpi_fadt.force_apic_physical_destination_mode) { 12.70 + genapic = &apic_cluster; 12.71 + goto print; 12.72 + } 12.73 +#endif 12.74 + 12.75 + memset(cluster_cnt, 0, sizeof(cluster_cnt)); 12.76 + 12.77 + for (i = 0; i < NR_CPUS; i++) { 12.78 + id = bios_cpu_apicid[i]; 12.79 + if (id != BAD_APICID) 12.80 + cluster_cnt[APIC_CLUSTERID(id)]++; 12.81 + } 12.82 + 12.83 + clusters = 0; 12.84 + max_cluster = 0; 12.85 + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { 12.86 + if (cluster_cnt[i] > 0) { 12.87 + ++clusters; 12.88 + if (cluster_cnt[i] > max_cluster) 12.89 + max_cluster = cluster_cnt[i]; 12.90 + } 12.91 + } 12.92 + 12.93 + /* 12.94 + * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode, 12.95 + * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical 12.96 + * else physical mode. 12.97 + * (We don't use lowest priority delivery + HW APIC IRQ steering, so 12.98 + * can ignore the clustered logical case and go straight to physical.) 12.99 + */ 12.100 + if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) 12.101 + genapic = &apic_flat; 12.102 + else 12.103 + genapic = &apic_cluster; 12.104 + 12.105 +print: 12.106 +#else 12.107 + /* hardcode to xen apic functions */ 12.108 + genapic = &apic_xen; 12.109 +#endif 12.110 + printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 12.111 +} 12.112 + 12.113 +/* Same for both flat and clustered. */ 12.114 + 12.115 +#ifdef CONFIG_XEN 12.116 +extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest); 12.117 +#endif 12.118 + 12.119 +void send_IPI_self(int vector) 12.120 +{ 12.121 +#ifndef CONFIG_XEN 12.122 + __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 12.123 +#else 12.124 + xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 12.125 +#endif 12.126 +}
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c Thu Aug 11 12:38:44 2005 -0800 13.3 @@ -0,0 +1,167 @@ 13.4 +/* 13.5 + * Copyright 2004 James Cleverdon, IBM. 13.6 + * Subject to the GNU Public License, v.2 13.7 + * 13.8 + * Xen APIC subarch code. Maximum 8 CPUs, logical delivery. 13.9 + * 13.10 + * Hacked for x86-64 by James Cleverdon from i386 architecture code by 13.11 + * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and 13.12 + * James Cleverdon. 13.13 + * 13.14 + * Hacked to pieces for Xen by Chris Wright. 13.15 + */ 13.16 +#include <linux/config.h> 13.17 +#include <linux/threads.h> 13.18 +#include <linux/cpumask.h> 13.19 +#include <linux/string.h> 13.20 +#include <linux/kernel.h> 13.21 +#include <linux/ctype.h> 13.22 +#include <linux/init.h> 13.23 +#ifdef CONFIG_XEN_PRIVILEGED_GUEST 13.24 +#include <asm/smp.h> 13.25 +#include <asm/ipi.h> 13.26 +#else 13.27 +#include <asm/apic.h> 13.28 +#include <asm/apicdef.h> 13.29 +#include <asm/genapic.h> 13.30 +#endif 13.31 +#include <asm-xen/evtchn.h> 13.32 + 13.33 +DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]); 13.34 + 13.35 +static inline void __send_IPI_one(unsigned int cpu, int vector) 13.36 +{ 13.37 + unsigned int evtchn; 13.38 + Dprintk("%s\n", __FUNCTION__); 13.39 + 13.40 + evtchn = per_cpu(ipi_to_evtchn, cpu)[vector]; 13.41 + if (evtchn) 13.42 + notify_via_evtchn(evtchn); 13.43 + else 13.44 + printk("send_IPI to unbound port %d/%d", cpu, vector); 13.45 +} 13.46 + 13.47 +void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest) 13.48 +{ 13.49 + int cpu; 13.50 + 13.51 + switch (shortcut) { 13.52 + case APIC_DEST_SELF: 13.53 + __send_IPI_one(smp_processor_id(), vector); 13.54 + break; 13.55 + case APIC_DEST_ALLBUT: 13.56 + for (cpu = 0; cpu < NR_CPUS; ++cpu) { 13.57 + if (cpu == smp_processor_id()) 13.58 + continue; 13.59 + if (cpu_isset(cpu, cpu_online_map)) { 13.60 + __send_IPI_one(cpu, vector); 13.61 + } 13.62 + } 13.63 + break; 13.64 + case APIC_DEST_ALLINC: 13.65 + for (cpu = 0; cpu < NR_CPUS; ++cpu) { 13.66 + if (cpu_isset(cpu, cpu_online_map)) { 13.67 + __send_IPI_one(cpu, vector); 13.68 + } 13.69 + } 13.70 + break; 13.71 + default: 13.72 + printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut, 13.73 + vector); 13.74 + break; 13.75 + } 13.76 +} 13.77 + 13.78 +static cpumask_t xen_target_cpus(void) 13.79 +{ 13.80 + return cpu_online_map; 13.81 +} 13.82 + 13.83 +/* 13.84 + * Set up the logical destination ID. 13.85 + * Do nothing, not called now. 13.86 + */ 13.87 +static void xen_init_apic_ldr(void) 13.88 +{ 13.89 + Dprintk("%s\n", __FUNCTION__); 13.90 + return; 13.91 +} 13.92 + 13.93 +static void xen_send_IPI_allbutself(int vector) 13.94 +{ 13.95 + /* 13.96 + * if there are no other CPUs in the system then 13.97 + * we get an APIC send error if we try to broadcast. 13.98 + * thus we have to avoid sending IPIs in this case. 13.99 + */ 13.100 + Dprintk("%s\n", __FUNCTION__); 13.101 + if (num_online_cpus() > 1) 13.102 + xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL); 13.103 +} 13.104 + 13.105 +static void xen_send_IPI_all(int vector) 13.106 +{ 13.107 + Dprintk("%s\n", __FUNCTION__); 13.108 + xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 13.109 +} 13.110 + 13.111 +static void xen_send_IPI_mask(cpumask_t cpumask, int vector) 13.112 +{ 13.113 + unsigned long mask = cpus_addr(cpumask)[0]; 13.114 + unsigned int cpu; 13.115 + unsigned long flags; 13.116 + 13.117 + Dprintk("%s\n", __FUNCTION__); 13.118 + local_irq_save(flags); 13.119 + WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); 13.120 + 13.121 + for (cpu = 0; cpu < NR_CPUS; ++cpu) { 13.122 + if (cpu_isset(cpu, cpumask)) { 13.123 + __send_IPI_one(cpu, vector); 13.124 + } 13.125 + } 13.126 + local_irq_restore(flags); 13.127 +} 13.128 + 13.129 +#ifdef CONFIG_XEN_PRIVILEGED_GUEST 13.130 +static int xen_apic_id_registered(void) 13.131 +{ 13.132 + /* better be set */ 13.133 + Dprintk("%s\n", __FUNCTION__); 13.134 + return physid_isset(smp_processor_id(), phys_cpu_present_map); 13.135 +} 13.136 +#endif 13.137 + 13.138 +static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask) 13.139 +{ 13.140 + Dprintk("%s\n", __FUNCTION__); 13.141 + return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; 13.142 +} 13.143 + 13.144 +static unsigned int phys_pkg_id(int index_msb) 13.145 +{ 13.146 + u32 ebx; 13.147 + 13.148 + Dprintk("%s\n", __FUNCTION__); 13.149 + ebx = cpuid_ebx(1); 13.150 + return ((ebx >> 24) & 0xFF) >> index_msb; 13.151 +} 13.152 + 13.153 +struct genapic apic_xen = { 13.154 + .name = "xen", 13.155 +#ifdef CONFIG_XEN_PRIVILEGED_GUEST 13.156 + .int_delivery_mode = dest_LowestPrio, 13.157 +#endif 13.158 + .int_dest_mode = (APIC_DEST_LOGICAL != 0), 13.159 + .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST, 13.160 + .target_cpus = xen_target_cpus, 13.161 +#ifdef CONFIG_XEN_PRIVILEGED_GUEST 13.162 + .apic_id_registered = xen_apic_id_registered, 13.163 +#endif 13.164 + .init_apic_ldr = xen_init_apic_ldr, 13.165 + .send_IPI_all = xen_send_IPI_all, 13.166 + .send_IPI_allbutself = xen_send_IPI_allbutself, 13.167 + .send_IPI_mask = xen_send_IPI_mask, 13.168 + .cpu_mask_to_apicid = xen_cpu_mask_to_apicid, 13.169 + .phys_pkg_id = phys_pkg_id, 13.170 +};
14.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S Thu Aug 11 10:44:59 2005 -0800 14.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S Thu Aug 11 12:38:44 2005 -0800 14.3 @@ -41,7 +41,6 @@ 14.4 startup_64: 14.5 ENTRY(_start) 14.6 cld 14.7 - movq init_rsp(%rip),%rsp 14.8 /* Copy the necessary stuff from xen_start_info structure. */ 14.9 movq $xen_start_info_union,%rdi 14.10 movq $64,%rcx /* sizeof (union xen_start_info_union) / sizeof (long) */ 14.11 @@ -52,6 +51,7 @@ ENTRY(_start) 14.12 cld 14.13 #endif /* CONFIG_SMP */ 14.14 14.15 + movq init_rsp(%rip),%rsp 14.16 /* zero EFLAGS after setting rsp */ 14.17 pushq $0 14.18 popfq 14.19 @@ -204,6 +204,7 @@ ENTRY(cpu_gdt_table) 14.20 .quad 0,0 /* TSS */ 14.21 .quad 0,0 /* LDT */ 14.22 .quad 0,0,0 /* three TLS descriptors */ 14.23 + .quad 0 /* unused now? __KERNEL16_CS - 16bit PM for S3 wakeup. */ 14.24 14.25 gdt_end: 14.26 /* asm/segment.h:GDT_ENTRIES must match this */
15.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c Thu Aug 11 10:44:59 2005 -0800 15.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c Thu Aug 11 12:38:44 2005 -0800 15.3 @@ -21,6 +21,11 @@ 15.4 15.5 atomic_t irq_err_count; 15.6 15.7 +#ifdef CONFIG_X86_IO_APIC 15.8 +#ifdef APIC_MISMATCH_DEBUG 15.9 +atomic_t irq_mis_count; 15.10 +#endif 15.11 +#endif 15.12 15.13 /* 15.14 * Generic, controller-independent functions:
17.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Thu Aug 11 10:44:59 2005 -0800 17.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Thu Aug 11 12:38:44 2005 -0800 17.3 @@ -200,12 +200,14 @@ static struct resource adapter_rom_resou 17.4 #define ADAPTER_ROM_RESOURCES \ 17.5 (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) 17.6 17.7 +#ifdef CONFIG_XEN_PRIVILEGED_GUEST 17.8 static struct resource video_rom_resource = { 17.9 .name = "Video ROM", 17.10 .start = 0xc0000, 17.11 .end = 0xc7fff, 17.12 .flags = IORESOURCE_ROM, 17.13 }; 17.14 +#endif 17.15 17.16 static struct resource video_ram_resource = { 17.17 .name = "Video RAM area", 17.18 @@ -599,6 +601,19 @@ static void __init print_memory_map(char 17.19 } 17.20 } 17.21 17.22 +#ifdef CONFIG_XEN 17.23 +void __init smp_alloc_memory(void) 17.24 +{ 17.25 + int cpu; 17.26 + 17.27 + for (cpu = 1; cpu < NR_CPUS; cpu++) { 17.28 + cpu_gdt_descr[cpu].address = (unsigned long) 17.29 + alloc_bootmem_low_pages(PAGE_SIZE); 17.30 + /* XXX free unused pages later */ 17.31 + } 17.32 +} 17.33 +#endif 17.34 + 17.35 void __init setup_arch(char **cmdline_p) 17.36 { 17.37 int i, j; 17.38 @@ -740,6 +755,11 @@ void __init setup_arch(char **cmdline_p) 17.39 } 17.40 } 17.41 #endif 17.42 +#ifdef CONFIG_SMP 17.43 +#ifdef CONFIG_XEN 17.44 + smp_alloc_memory(); 17.45 +#endif 17.46 +#endif 17.47 paging_init(); 17.48 #ifdef CONFIG_X86_LOCAL_APIC 17.49 /*
18.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c Thu Aug 11 10:44:59 2005 -0800 18.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c Thu Aug 11 12:38:44 2005 -0800 18.3 @@ -276,9 +276,11 @@ void __init cpu_init (void) 18.4 * Initialize the per-CPU GDT with the boot GDT, 18.5 * and set up the GDT descriptor: 18.6 */ 18.7 +#ifndef CONFIG_XEN 18.8 if (cpu) { 18.9 memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); 18.10 } 18.11 +#endif 18.12 18.13 cpu_gdt_descr[cpu].size = GDT_SIZE; 18.14 cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
19.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c Thu Aug 11 10:44:59 2005 -0800 19.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c Thu Aug 11 12:38:44 2005 -0800 19.3 @@ -28,7 +28,12 @@ 19.4 #include <asm/mmu_context.h> 19.5 #include <asm/proto.h> 19.6 #include <asm/apicdef.h> 19.7 +#ifdef CONFIG_XEN 19.8 +#include <asm-xen/evtchn.h> 19.9 19.10 +#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg) 19.11 + 19.12 +#else 19.13 /* 19.14 * Smarter SMP flushing macros. 19.15 * c/o Linus Torvalds. 19.16 @@ -44,6 +49,7 @@ static struct mm_struct * flush_mm; 19.17 static unsigned long flush_va; 19.18 static DEFINE_SPINLOCK(tlbstate_lock); 19.19 #define FLUSH_ALL -1ULL 19.20 +#endif 19.21 19.22 /* 19.23 * We cannot call mmdrop() because we are in interrupt context, 19.24 @@ -57,6 +63,7 @@ static inline void leave_mm (unsigned lo 19.25 load_cr3(swapper_pg_dir); 19.26 } 19.27 19.28 +#ifndef CONFIG_XEN 19.29 /* 19.30 * 19.31 * The flush IPI assumes that a thread switch happens in this order: 19.32 @@ -250,6 +257,18 @@ void flush_tlb_all(void) 19.33 { 19.34 on_each_cpu(do_flush_tlb_all, NULL, 1, 1); 19.35 } 19.36 +#else 19.37 +asmlinkage void smp_invalidate_interrupt (void) 19.38 +{ return; } 19.39 +void flush_tlb_current_task(void) 19.40 +{ xen_tlb_flush_mask(¤t->mm->cpu_vm_mask); } 19.41 +void flush_tlb_mm (struct mm_struct * mm) 19.42 +{ xen_tlb_flush_mask(&mm->cpu_vm_mask); } 19.43 +void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) 19.44 +{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); } 19.45 +void flush_tlb_all(void) 19.46 +{ xen_tlb_flush_all(); } 19.47 +#endif /* Xen */ 19.48 19.49 void smp_kdb_stop(void) 19.50 { 19.51 @@ -310,13 +329,21 @@ static void __smp_call_function (void (* 19.52 19.53 /* Wait for response */ 19.54 while (atomic_read(&data.started) != cpus) 19.55 +#ifndef CONFIG_XEN 19.56 cpu_relax(); 19.57 +#else 19.58 + barrier(); 19.59 +#endif 19.60 19.61 if (!wait) 19.62 return; 19.63 19.64 while (atomic_read(&data.finished) != cpus) 19.65 +#ifndef CONFIG_XEN 19.66 cpu_relax(); 19.67 +#else 19.68 + barrier(); 19.69 +#endif 19.70 } 19.71 19.72 /* 19.73 @@ -350,7 +377,11 @@ void smp_stop_cpu(void) 19.74 */ 19.75 cpu_clear(smp_processor_id(), cpu_online_map); 19.76 local_irq_disable(); 19.77 +#ifndef CONFIG_XEN 19.78 disable_local_APIC(); 19.79 +#else 19.80 + xxprint("stop_this_cpu disable_local_APIC\n"); 19.81 +#endif 19.82 local_irq_enable(); 19.83 } 19.84 19.85 @@ -364,8 +395,10 @@ static void smp_really_stop_cpu(void *du 19.86 void smp_send_stop(void) 19.87 { 19.88 int nolock = 0; 19.89 +#ifndef CONFIG_XEN 19.90 if (reboot_force) 19.91 return; 19.92 +#endif 19.93 /* Don't deadlock on the call lock in panic */ 19.94 if (!spin_trylock(&call_lock)) { 19.95 /* ignore locking because we have paniced anyways */ 19.96 @@ -376,7 +409,11 @@ void smp_send_stop(void) 19.97 spin_unlock(&call_lock); 19.98 19.99 local_irq_disable(); 19.100 +#ifdef CONFIG_XEN 19.101 + xxprint("stop_this_cpu disable_local_APIC\n"); 19.102 +#else 19.103 disable_local_APIC(); 19.104 +#endif 19.105 local_irq_enable(); 19.106 } 19.107 19.108 @@ -385,18 +422,32 @@ void smp_send_stop(void) 19.109 * all the work is done automatically when 19.110 * we return from the interrupt. 19.111 */ 19.112 +#ifndef CONFIG_XEN 19.113 asmlinkage void smp_reschedule_interrupt(void) 19.114 +#else 19.115 +asmlinkage irqreturn_t smp_reschedule_interrupt(void) 19.116 +#endif 19.117 { 19.118 +#ifndef CONFIG_XEN 19.119 ack_APIC_irq(); 19.120 +#else 19.121 + return IRQ_HANDLED; 19.122 +#endif 19.123 } 19.124 19.125 +#ifndef CONFIG_XEN 19.126 asmlinkage void smp_call_function_interrupt(void) 19.127 +#else 19.128 +asmlinkage irqreturn_t smp_call_function_interrupt(void) 19.129 +#endif 19.130 { 19.131 void (*func) (void *info) = call_data->func; 19.132 void *info = call_data->info; 19.133 int wait = call_data->wait; 19.134 19.135 +#ifndef CONFIG_XEN 19.136 ack_APIC_irq(); 19.137 +#endif 19.138 /* 19.139 * Notify initiating CPU that I've grabbed the data and am 19.140 * about to execute the function 19.141 @@ -413,10 +464,16 @@ asmlinkage void smp_call_function_interr 19.142 mb(); 19.143 atomic_inc(&call_data->finished); 19.144 } 19.145 +#ifdef CONFIG_XEN 19.146 + return IRQ_HANDLED; 19.147 +#endif 19.148 } 19.149 19.150 int safe_smp_processor_id(void) 19.151 { 19.152 +#ifdef CONFIG_XEN 19.153 + return smp_processor_id(); 19.154 +#else 19.155 int apicid, i; 19.156 19.157 if (disable_apic) 19.158 @@ -437,4 +494,5 @@ int safe_smp_processor_id(void) 19.159 return 0; 19.160 19.161 return 0; /* Should not happen */ 19.162 +#endif 19.163 }
20.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Thu Aug 11 10:44:59 2005 -0800 20.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c Thu Aug 11 12:38:44 2005 -0800 20.3 @@ -47,6 +47,9 @@ 20.4 #include <linux/bootmem.h> 20.5 #include <linux/thread_info.h> 20.6 #include <linux/module.h> 20.7 +#ifdef CONFIG_XEN 20.8 +#include <linux/interrupt.h> 20.9 +#endif 20.10 20.11 #include <linux/delay.h> 20.12 #include <linux/mc146818rtc.h> 20.13 @@ -57,12 +60,21 @@ 20.14 #include <asm/tlbflush.h> 20.15 #include <asm/proto.h> 20.16 #include <asm/nmi.h> 20.17 +#ifdef CONFIG_XEN 20.18 +#include <asm/arch_hooks.h> 20.19 + 20.20 +#include <asm-xen/evtchn.h> 20.21 +#endif 20.22 20.23 /* Change for real CPU hotplug. Note other files need to be fixed 20.24 first too. */ 20.25 #define __cpuinit __init 20.26 #define __cpuinitdata __initdata 20.27 20.28 +#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST) 20.29 + unsigned int maxcpus = NR_CPUS; 20.30 +#endif 20.31 + 20.32 /* Number of siblings per CPU package */ 20.33 int smp_num_siblings = 1; 20.34 /* Package ID of each logical CPU */ 20.35 @@ -96,6 +108,7 @@ cpumask_t cpu_sibling_map[NR_CPUS] __cac 20.36 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; 20.37 EXPORT_SYMBOL(cpu_core_map); 20.38 20.39 +#ifndef CONFIG_XEN 20.40 /* 20.41 * Trampoline 80x86 program as an array. 20.42 */ 20.43 @@ -115,6 +128,7 @@ static unsigned long __cpuinit setup_tra 20.44 memcpy(tramp, trampoline_data, trampoline_end - trampoline_data); 20.45 return virt_to_phys(tramp); 20.46 } 20.47 +#endif 20.48 20.49 /* 20.50 * The bootstrap kernel entry code has set these up. Save them for 20.51 @@ -130,6 +144,7 @@ static void __cpuinit smp_store_cpu_info 20.52 print_cpu_info(c); 20.53 } 20.54 20.55 +#ifndef CONFIG_XEN 20.56 /* 20.57 * New Funky TSC sync algorithm borrowed from IA64. 20.58 * Main advantage is that it doesn't reset the TSCs fully and 20.59 @@ -331,6 +346,7 @@ static __init int notscsync_setup(char * 20.60 return 0; 20.61 } 20.62 __setup("notscsync", notscsync_setup); 20.63 +#endif 20.64 20.65 static atomic_t init_deasserted __cpuinitdata; 20.66 20.67 @@ -343,6 +359,7 @@ void __cpuinit smp_callin(void) 20.68 int cpuid, phys_id; 20.69 unsigned long timeout; 20.70 20.71 +#ifndef CONFIG_XEN 20.72 /* 20.73 * If waken up by an INIT in an 82489DX configuration 20.74 * we may get here before an INIT-deassert IPI reaches 20.75 @@ -352,10 +369,15 @@ void __cpuinit smp_callin(void) 20.76 while (!atomic_read(&init_deasserted)) 20.77 cpu_relax(); 20.78 20.79 +#endif 20.80 /* 20.81 * (This works even if the APIC is not enabled.) 20.82 */ 20.83 +#ifndef CONFIG_XEN 20.84 phys_id = GET_APIC_ID(apic_read(APIC_ID)); 20.85 +#else 20.86 + phys_id = smp_processor_id(); 20.87 +#endif 20.88 cpuid = smp_processor_id(); 20.89 if (cpu_isset(cpuid, cpu_callin_map)) { 20.90 panic("smp_callin: phys CPU#%d, CPU#%d already present??\n", 20.91 @@ -389,6 +411,7 @@ void __cpuinit smp_callin(void) 20.92 cpuid); 20.93 } 20.94 20.95 +#ifndef CONFIG_XEN 20.96 /* 20.97 * the boot CPU has finished the init stage and is spinning 20.98 * on callin_map until we finish. We are free to set up this 20.99 @@ -398,6 +421,7 @@ void __cpuinit smp_callin(void) 20.100 20.101 Dprintk("CALLIN, before setup_local_APIC().\n"); 20.102 setup_local_APIC(); 20.103 +#endif 20.104 20.105 /* 20.106 * Get our bogomips. 20.107 @@ -405,7 +429,9 @@ void __cpuinit smp_callin(void) 20.108 calibrate_delay(); 20.109 Dprintk("Stack at about %p\n",&cpuid); 20.110 20.111 +#ifndef CONFIG_XEN 20.112 disable_APIC_timer(); 20.113 +#endif 20.114 20.115 /* 20.116 * Save our processor parameters 20.117 @@ -418,6 +444,29 @@ void __cpuinit smp_callin(void) 20.118 cpu_set(cpuid, cpu_callin_map); 20.119 } 20.120 20.121 +#ifdef CONFIG_XEN 20.122 +static irqreturn_t ldebug_interrupt( 20.123 + int irq, void *dev_id, struct pt_regs *regs) 20.124 +{ 20.125 + return IRQ_HANDLED; 20.126 +} 20.127 + 20.128 +static DEFINE_PER_CPU(int, ldebug_irq); 20.129 +static char ldebug_name[NR_CPUS][15]; 20.130 + 20.131 +void ldebug_setup(void) 20.132 +{ 20.133 + int cpu = smp_processor_id(); 20.134 + 20.135 + per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG); 20.136 + sprintf(ldebug_name[cpu], "ldebug%d", cpu); 20.137 + BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt, 20.138 + SA_INTERRUPT, ldebug_name[cpu], NULL)); 20.139 +} 20.140 + 20.141 +extern void local_setup_timer(void); 20.142 +#endif 20.143 + 20.144 /* 20.145 * Setup code on secondary processor (after comming out of the trampoline) 20.146 */ 20.147 @@ -434,6 +483,7 @@ void __cpuinit start_secondary(void) 20.148 /* otherwise gcc will move up the smp_processor_id before the cpu_init */ 20.149 barrier(); 20.150 20.151 +#ifndef CONFIG_XEN 20.152 Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); 20.153 setup_secondary_APIC_clock(); 20.154 20.155 @@ -446,6 +496,12 @@ void __cpuinit start_secondary(void) 20.156 } 20.157 20.158 enable_APIC_timer(); 20.159 +#else 20.160 + local_setup_timer(); 20.161 + ldebug_setup(); 20.162 + smp_intr_init(); 20.163 + local_irq_enable(); 20.164 +#endif 20.165 20.166 /* 20.167 * Allow the master to continue. 20.168 @@ -453,10 +509,12 @@ void __cpuinit start_secondary(void) 20.169 cpu_set(smp_processor_id(), cpu_online_map); 20.170 mb(); 20.171 20.172 +#ifndef CONFIG_XEN 20.173 /* Wait for TSC sync to not schedule things before. 20.174 We still process interrupts, which could see an inconsistent 20.175 time in that window unfortunately. */ 20.176 tsc_sync_wait(); 20.177 +#endif 20.178 20.179 cpu_idle(); 20.180 } 20.181 @@ -464,6 +522,7 @@ void __cpuinit start_secondary(void) 20.182 extern volatile unsigned long init_rsp; 20.183 extern void (*initial_code)(void); 20.184 20.185 +#ifndef CONFIG_XEN 20.186 #if APIC_DEBUG 20.187 static void inquire_remote_apic(int apicid) 20.188 { 20.189 @@ -627,6 +686,7 @@ static int __cpuinit wakeup_secondary_vi 20.190 20.191 return (send_status | accept_status); 20.192 } 20.193 +#endif 20.194 20.195 /* 20.196 * Boot one CPU. 20.197 @@ -637,6 +697,14 @@ static int __cpuinit do_boot_cpu(int cpu 20.198 unsigned long boot_error; 20.199 int timeout; 20.200 unsigned long start_rip; 20.201 +#ifdef CONFIG_XEN 20.202 + vcpu_guest_context_t ctxt; 20.203 + extern void startup_64_smp(void); 20.204 + extern void hypervisor_callback(void); 20.205 + extern void failsafe_callback(void); 20.206 + extern void smp_trap_init(trap_info_t *); 20.207 + int i; 20.208 +#endif 20.209 /* 20.210 * We can't use kernel_thread since we must avoid to 20.211 * reschedule the child. 20.212 @@ -649,7 +717,11 @@ static int __cpuinit do_boot_cpu(int cpu 20.213 20.214 cpu_pda[cpu].pcurrent = idle; 20.215 20.216 +#ifndef CONFIG_XEN 20.217 start_rip = setup_trampoline(); 20.218 +#else 20.219 + start_rip = (unsigned long)startup_64_smp; 20.220 +#endif 20.221 20.222 init_rsp = idle->thread.rsp; 20.223 per_cpu(init_tss,cpu).rsp0 = init_rsp; 20.224 @@ -666,6 +738,93 @@ static int __cpuinit do_boot_cpu(int cpu 20.225 20.226 atomic_set(&init_deasserted, 0); 20.227 20.228 +#ifdef CONFIG_XEN 20.229 + if (cpu_gdt_descr[0].size > PAGE_SIZE) 20.230 + BUG(); 20.231 + cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size; 20.232 + memcpy((void *)cpu_gdt_descr[cpu].address, 20.233 + (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size); 20.234 + 20.235 + memset(&ctxt, 0, sizeof(ctxt)); 20.236 + 20.237 + ctxt.flags = VGCF_IN_KERNEL; 20.238 + ctxt.user_regs.ds = __USER_DS; 20.239 + ctxt.user_regs.es = __USER_DS; 20.240 + ctxt.user_regs.fs = 0; 20.241 + ctxt.user_regs.gs = 0; 20.242 + ctxt.user_regs.ss = __KERNEL_DS|0x3; 20.243 + ctxt.user_regs.cs = __KERNEL_CS|0x3; 20.244 + ctxt.user_regs.rip = start_rip; 20.245 + ctxt.user_regs.rsp = idle->thread.rsp; 20.246 +#define X86_EFLAGS_IOPL_RING3 0x3000 20.247 + ctxt.user_regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_IOPL_RING3; 20.248 + 20.249 + /* FPU is set up to default initial state. */ 20.250 + memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); 20.251 + 20.252 + /* Virtual IDT is empty at start-of-day. */ 20.253 + for ( i = 0; i < 256; i++ ) 20.254 + { 20.255 + ctxt.trap_ctxt[i].vector = i; 20.256 + ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS; 20.257 + } 20.258 + smp_trap_init(ctxt.trap_ctxt); 20.259 + 20.260 + /* No LDT. */ 20.261 + ctxt.ldt_ents = 0; 20.262 + 20.263 + { 20.264 + unsigned long va; 20.265 + int f; 20.266 + 20.267 + for (va = cpu_gdt_descr[cpu].address, f = 0; 20.268 + va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size; 20.269 + va += PAGE_SIZE, f++) { 20.270 + ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT; 20.271 + make_page_readonly((void *)va); 20.272 + } 20.273 + ctxt.gdt_ents = GDT_ENTRIES; 20.274 + } 20.275 + 20.276 + /* Ring 1 stack is the initial stack. */ 20.277 + ctxt.kernel_ss = __KERNEL_DS; 20.278 + ctxt.kernel_sp = idle->thread.rsp; 20.279 + 20.280 + /* Callback handlers. */ 20.281 + ctxt.event_callback_eip = (unsigned long)hypervisor_callback; 20.282 + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; 20.283 + ctxt.syscall_callback_eip = (unsigned long)system_call; 20.284 + 20.285 + ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(init_level4_pgt); 20.286 + 20.287 + boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt); 20.288 + 20.289 + if (!boot_error) { 20.290 + /* 20.291 + * allow APs to start initializing. 20.292 + */ 20.293 + Dprintk("Before Callout %d.\n", cpu); 20.294 + cpu_set(cpu, cpu_callout_map); 20.295 + Dprintk("After Callout %d.\n", cpu); 20.296 + 20.297 + /* 20.298 + * Wait 5s total for a response 20.299 + */ 20.300 + for (timeout = 0; timeout < 50000; timeout++) { 20.301 + if (cpu_isset(cpu, cpu_callin_map)) 20.302 + break; /* It has booted */ 20.303 + udelay(100); 20.304 + } 20.305 + 20.306 + if (cpu_isset(cpu, cpu_callin_map)) { 20.307 + /* number CPUs logically, starting from 1 (BSP is 0) */ 20.308 + Dprintk("CPU has booted.\n"); 20.309 + } else { 20.310 + boot_error= 1; 20.311 + } 20.312 + } 20.313 + x86_cpu_to_apicid[cpu] = apicid; 20.314 +#else 20.315 Dprintk("Setting warm reset code and vector.\n"); 20.316 20.317 CMOS_WRITE(0xa, 0xf); 20.318 @@ -729,6 +888,7 @@ static int __cpuinit do_boot_cpu(int cpu 20.319 #endif 20.320 } 20.321 } 20.322 +#endif 20.323 if (boot_error) { 20.324 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ 20.325 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ 20.326 @@ -790,6 +950,7 @@ static __cpuinit void detect_siblings(vo 20.327 } 20.328 } 20.329 20.330 +#ifndef CONFIG_XEN 20.331 /* 20.332 * Cleanup possible dangling ends... 20.333 */ 20.334 @@ -817,6 +978,7 @@ static __cpuinit void smp_cleanup_boot(v 20.335 free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE)); 20.336 #endif 20.337 } 20.338 +#endif 20.339 20.340 /* 20.341 * Fall back to non SMP mode after errors. 20.342 @@ -827,10 +989,12 @@ static __cpuinit void disable_smp(void) 20.343 { 20.344 cpu_present_map = cpumask_of_cpu(0); 20.345 cpu_possible_map = cpumask_of_cpu(0); 20.346 +#ifndef CONFIG_XEN 20.347 if (smp_found_config) 20.348 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); 20.349 else 20.350 phys_cpu_present_map = physid_mask_of_physid(0); 20.351 +#endif 20.352 cpu_set(0, cpu_sibling_map[0]); 20.353 cpu_set(0, cpu_core_map[0]); 20.354 } 20.355 @@ -857,6 +1021,7 @@ static __cpuinit void enforce_max_cpus(u 20.356 */ 20.357 static int __cpuinit smp_sanity_check(unsigned max_cpus) 20.358 { 20.359 +#ifndef CONFIG_XEN 20.360 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { 20.361 printk("weird, boot CPU (#%d) not listed by the BIOS.\n", 20.362 hard_smp_processor_id()); 20.363 @@ -896,13 +1061,19 @@ static int __cpuinit smp_sanity_check(un 20.364 nr_ioapics = 0; 20.365 return -1; 20.366 } 20.367 +#endif 20.368 20.369 /* 20.370 * If SMP should be disabled, then really disable it! 20.371 */ 20.372 if (!max_cpus) { 20.373 +#ifdef CONFIG_XEN 20.374 + HYPERVISOR_shared_info->n_vcpu = 1; 20.375 +#endif 20.376 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); 20.377 +#ifndef CONFIG_XEN 20.378 nr_ioapics = 0; 20.379 +#endif 20.380 return -1; 20.381 } 20.382 20.383 @@ -917,7 +1088,10 @@ void __cpuinit smp_prepare_cpus(unsigned 20.384 { 20.385 int i; 20.386 20.387 +#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST) 20.388 +#else 20.389 nmi_watchdog_default(); 20.390 +#endif 20.391 current_cpu_data = boot_cpu_data; 20.392 current_thread_info()->cpu = 0; /* needed? */ 20.393 20.394 @@ -927,8 +1101,12 @@ void __cpuinit smp_prepare_cpus(unsigned 20.395 * Fill in cpu_present_mask 20.396 */ 20.397 for (i = 0; i < NR_CPUS; i++) { 20.398 +#ifndef CONFIG_XEN 20.399 int apicid = cpu_present_to_apicid(i); 20.400 if (physid_isset(apicid, phys_cpu_present_map)) { 20.401 +#else 20.402 + if (i < HYPERVISOR_shared_info->n_vcpu) { 20.403 +#endif 20.404 cpu_set(i, cpu_present_map); 20.405 /* possible map would be different if we supported real 20.406 CPU hotplug. */ 20.407 @@ -942,6 +1120,9 @@ void __cpuinit smp_prepare_cpus(unsigned 20.408 return; 20.409 } 20.410 20.411 +#ifdef CONFIG_XEN 20.412 + smp_intr_init(); 20.413 +#else 20.414 20.415 /* 20.416 * Switch from PIC to APIC mode. 20.417 @@ -954,20 +1135,26 @@ void __cpuinit smp_prepare_cpus(unsigned 20.418 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); 20.419 /* Or can we switch back to PIC here? */ 20.420 } 20.421 +#endif 20.422 20.423 /* 20.424 * Now start the IO-APICs 20.425 */ 20.426 +#if defined(CONFIG_XEN) && !defined(CONFIG_XEN_PRIVILEGED_GUEST) 20.427 +#else 20.428 if (!skip_ioapic_setup && nr_ioapics) 20.429 setup_IO_APIC(); 20.430 else 20.431 nr_ioapics = 0; 20.432 +#endif 20.433 20.434 /* 20.435 * Set up local APIC timer on boot CPU. 20.436 */ 20.437 20.438 +#ifndef CONFIG_XEN 20.439 setup_boot_APIC_clock(); 20.440 +#endif 20.441 } 20.442 20.443 /* 20.444 @@ -989,17 +1176,23 @@ void __init smp_prepare_boot_cpu(void) 20.445 int __cpuinit __cpu_up(unsigned int cpu) 20.446 { 20.447 int err; 20.448 +#ifndef CONFIG_XEN 20.449 int apicid = cpu_present_to_apicid(cpu); 20.450 +#else 20.451 + int apicid = cpu; 20.452 +#endif 20.453 20.454 WARN_ON(irqs_disabled()); 20.455 20.456 Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); 20.457 20.458 +#ifndef CONFIG_XEN 20.459 if (apicid == BAD_APICID || apicid == boot_cpu_id || 20.460 !physid_isset(apicid, phys_cpu_present_map)) { 20.461 printk("__cpu_up: bad cpu %d\n", cpu); 20.462 return -EINVAL; 20.463 } 20.464 +#endif 20.465 20.466 /* Boot it! */ 20.467 err = do_boot_cpu(cpu, apicid); 20.468 @@ -1021,15 +1214,76 @@ int __cpuinit __cpu_up(unsigned int cpu) 20.469 */ 20.470 void __cpuinit smp_cpus_done(unsigned int max_cpus) 20.471 { 20.472 +#ifndef CONFIG_XEN 20.473 zap_low_mappings(); 20.474 smp_cleanup_boot(); 20.475 20.476 #ifdef CONFIG_X86_IO_APIC 20.477 setup_ioapic_dest(); 20.478 #endif 20.479 +#endif 20.480 20.481 detect_siblings(); 20.482 +#ifndef CONFIG_XEN 20.483 time_init_gtod(); 20.484 20.485 check_nmi_watchdog(); 20.486 +#endif 20.487 } 20.488 + 20.489 +#ifdef CONFIG_XEN 20.490 +extern int bind_ipi_to_irq(int ipi); 20.491 +extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *); 20.492 +extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *); 20.493 + 20.494 +static DEFINE_PER_CPU(int, resched_irq); 20.495 +static DEFINE_PER_CPU(int, callfunc_irq); 20.496 +static char resched_name[NR_CPUS][15]; 20.497 +static char callfunc_name[NR_CPUS][15]; 20.498 + 20.499 +void smp_intr_init(void) 20.500 +{ 20.501 + int cpu = smp_processor_id(); 20.502 + 20.503 + per_cpu(resched_irq, cpu) = 20.504 + bind_ipi_to_irq(RESCHEDULE_VECTOR); 20.505 + sprintf(resched_name[cpu], "resched%d", cpu); 20.506 + BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt, 20.507 + SA_INTERRUPT, resched_name[cpu], NULL)); 20.508 + 20.509 + per_cpu(callfunc_irq, cpu) = 20.510 + bind_ipi_to_irq(CALL_FUNCTION_VECTOR); 20.511 + sprintf(callfunc_name[cpu], "callfunc%d", cpu); 20.512 + BUG_ON(request_irq(per_cpu(callfunc_irq, cpu), 20.513 + smp_call_function_interrupt, 20.514 + SA_INTERRUPT, callfunc_name[cpu], NULL)); 20.515 +} 20.516 + 20.517 +static void smp_intr_exit(void) 20.518 +{ 20.519 + int cpu = smp_processor_id(); 20.520 + 20.521 + free_irq(per_cpu(resched_irq, cpu), NULL); 20.522 + unbind_ipi_from_irq(RESCHEDULE_VECTOR); 20.523 + 20.524 + free_irq(per_cpu(callfunc_irq, cpu), NULL); 20.525 + unbind_ipi_from_irq(CALL_FUNCTION_VECTOR); 20.526 +} 20.527 + 20.528 +extern void local_setup_timer_irq(void); 20.529 +extern void local_teardown_timer_irq(void); 20.530 + 20.531 +void smp_suspend(void) 20.532 +{ 20.533 + /* XXX todo: take down time and ipi's on all cpus */ 20.534 + local_teardown_timer_irq(); 20.535 + smp_intr_exit(); 20.536 +} 20.537 + 20.538 +void smp_resume(void) 20.539 +{ 20.540 + /* XXX todo: restore time and ipi's on all cpus */ 20.541 + smp_intr_init(); 20.542 + local_setup_timer_irq(); 20.543 +} 20.544 +#endif
21.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c Thu Aug 11 10:44:59 2005 -0800 21.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c Thu Aug 11 12:38:44 2005 -0800 21.3 @@ -953,6 +953,17 @@ void __init trap_init(void) 21.4 cpu_init(); 21.5 } 21.6 21.7 +void smp_trap_init(trap_info_t *trap_ctxt) 21.8 +{ 21.9 + trap_info_t *t = trap_table; 21.10 + 21.11 + for (t = trap_table; t->address; t++) { 21.12 + trap_ctxt[t->vector].flags = t->flags; 21.13 + trap_ctxt[t->vector].cs = t->cs; 21.14 + trap_ctxt[t->vector].address = t->address; 21.15 + } 21.16 +} 21.17 + 21.18 21.19 /* Actual parsing is done early in setup.c. */ 21.20 static int __init oops_dummy(char *s)
22.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S Thu Aug 11 10:44:59 2005 -0800 22.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S Thu Aug 11 12:38:44 2005 -0800 22.3 @@ -8,11 +8,14 @@ 22.4 #define sizeof_vcpu_shift 3 22.5 22.6 #ifdef CONFIG_SMP 22.7 -#define preempt_disable(reg) incl threadinfo_preempt_count(reg) 22.8 -#define preempt_enable(reg) decl threadinfo_preempt_count(reg) 22.9 +//#define preempt_disable(reg) incl threadinfo_preempt_count(reg) 22.10 +//#define preempt_enable(reg) decl threadinfo_preempt_count(reg) 22.11 +#define preempt_disable(reg) 22.12 +#define preempt_enable(reg) 22.13 #define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp) ; \ 22.14 movq %gs:pda_cpunumber,reg ; \ 22.15 - shl $sizeof_vcpu_shift,reg ; \ 22.16 + shl $32, reg ; \ 22.17 + shr $32-sizeof_vcpu_shift,reg ; \ 22.18 addq HYPERVISOR_shared_info,reg 22.19 #define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp) ; \ 22.20 #define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
29.1 --- a/tools/console/client/main.c Thu Aug 11 10:44:59 2005 -0800 29.2 +++ b/tools/console/client/main.c Thu Aug 11 12:38:44 2005 -0800 29.3 @@ -162,14 +162,11 @@ int main(int argc, char **argv) 29.4 struct termios attr; 29.5 int domid; 29.6 int xc_handle; 29.7 - char *sopt = "hf:pc"; 29.8 + char *sopt = "h"; 29.9 int ch; 29.10 int opt_ind=0; 29.11 struct option lopt[] = { 29.12 { "help", 0, 0, 'h' }, 29.13 - { "file", 1, 0, 'f' }, 29.14 - { "pty", 0, 0, 'p' }, 29.15 - { "ctty", 0, 0, 'c' }, 29.16 { 0 }, 29.17 29.18 }; 29.19 @@ -178,6 +175,7 @@ int main(int argc, char **argv) 29.20 int spty; 29.21 unsigned int len = 0; 29.22 struct xs_handle *xs; 29.23 + char *end; 29.24 29.25 while((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { 29.26 switch(ch) { 29.27 @@ -195,7 +193,13 @@ int main(int argc, char **argv) 29.28 exit(EINVAL); 29.29 } 29.30 29.31 - domid = atoi(argv[optind]); 29.32 + domid = strtol(argv[optind], &end, 10); 29.33 + if (end && *end) { 29.34 + fprintf(stderr, "Invalid DOMID `%s'\n", argv[optind]); 29.35 + fprintf(stderr, "Try `%s --help' for more information.\n", 29.36 + argv[0]); 29.37 + exit(EINVAL); 29.38 + } 29.39 29.40 xs = xs_daemon_open(); 29.41 if (xs == NULL) { 29.42 @@ -211,7 +215,11 @@ int main(int argc, char **argv) 29.43 29.44 snprintf(path, sizeof(path), "/console/%d/tty", domid); 29.45 str_pty = xs_read(xs, path, &len); 29.46 - if (str_pty == NULL) { 29.47 + /* FIXME consoled currently does not assume domain-0 doesn't have a 29.48 + console which is good when we break domain-0 up. To keep us 29.49 + user friendly, we'll bail out here since no data will ever show 29.50 + up on domain-0. */ 29.51 + if (domid == 0 || str_pty == NULL) { 29.52 err(errno, "Could not read tty from store"); 29.53 } 29.54 spty = open(str_pty, O_RDWR | O_NOCTTY);
35.1 --- a/tools/ioemu/target-i386-dm/Makefile Thu Aug 11 10:44:59 2005 -0800 35.2 +++ b/tools/ioemu/target-i386-dm/Makefile Thu Aug 11 12:38:44 2005 -0800 35.3 @@ -376,10 +376,10 @@ mixeng.o: mixeng.c mixeng.h mixeng_templ 35.4 $(CC) $(DEFINES) -c -o $@ $< 35.5 35.6 clean: 35.7 - rm -rf *.o *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp qemu-vgaram-bin 35.8 + rm -rf *.o *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp 35.9 35.10 distclean: 35.11 - rm -rf *.o *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp qemu-vgaram-bin 35.12 + rm -rf *.o *.a *~ $(PROGS) gen-op.h opc.h op.h nwfpe slirp 35.13 35.14 install: all 35.15 if [ ! -d $(INSTALL_DIR) ];then mkdir -p $(INSTALL_DIR);fi 35.16 @@ -387,8 +387,6 @@ install: all 35.17 install -m 755 -s $(PROGS) "$(INSTALL_DIR)" 35.18 install -m 755 qemu-dm.debug "$(INSTALL_DIR)" 35.19 install -m 755 qemu-ifup "$(DESTDIR)$(configdir)" 35.20 - gunzip -c qemu-vgaram-bin.gz >qemu-vgaram-bin 35.21 - install -m 755 qemu-vgaram-bin "$(DESTDIR)$(configdir)" 35.22 ifneq ($(wildcard .depend),) 35.23 include .depend 35.24 endif
37.1 Binary file tools/ioemu/target-i386-dm/qemu-vgaram-bin.gz has changed
48.1 --- a/tools/python/xen/xm/main.py Thu Aug 11 10:44:59 2005 -0800 48.2 +++ b/tools/python/xen/xm/main.py Thu Aug 11 12:38:44 2005 -0800 48.3 @@ -265,7 +265,7 @@ def xm_brief_list(domsinfo): 48.4 print 'Name Id Mem(MB) CPU VCPU(s) State Time(s)' 48.5 for dominfo in domsinfo: 48.6 if dominfo.has_key("ssidref1"): 48.7 - print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3s %(vcpus)5d %(state)5s %(cpu_time)7.1f %s:%(ssidref2)02x/p:%(ssidref1)02x" % dominfo) 48.8 + print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3s %(vcpus)5d %(state)5s %(cpu_time)7.1f s:%(ssidref2)02x/p:%(ssidref1)02x" % dominfo) 48.9 else: 48.10 print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3s %(vcpus)5d %(state)5s %(cpu_time)7.1f" % dominfo) 48.11 48.12 @@ -605,6 +605,10 @@ def xm_lookup_cmd(cmd): 48.13 deprecated(cmd,aliases[cmd]) 48.14 return commands[aliases[cmd]] 48.15 else: 48.16 + if len( cmd ) > 1: 48.17 + matched_commands = filter( lambda (command, func): command[ 0:len(cmd) ] == cmd, commands.iteritems() ) 48.18 + if len( matched_commands ) == 1: 48.19 + return matched_commands[0][1] 48.20 err('Sub Command %s not found!' % cmd) 48.21 usage() 48.22
54.1 --- a/xen/arch/ia64/vcpu.c Thu Aug 11 10:44:59 2005 -0800 54.2 +++ b/xen/arch/ia64/vcpu.c Thu Aug 11 12:38:44 2005 -0800 54.3 @@ -587,6 +587,14 @@ void vcpu_pend_interrupt(VCPU *vcpu, UIN 54.4 set_bit(vector,PSCBX(vcpu,irr)); 54.5 PSCB(vcpu,pending_interruption) = 1; 54.6 } 54.7 + 54.8 + /* Keir: I think you should unblock when an interrupt is pending. */ 54.9 + { 54.10 + int running = test_bit(_VCPUF_running, &vcpu->vcpu_flags); 54.11 + vcpu_unblock(vcpu); 54.12 + if ( running ) 54.13 + smp_send_event_check_cpu(vcpu->processor); 54.14 + } 54.15 } 54.16 54.17 void early_tick(VCPU *vcpu)
56.1 --- a/xen/arch/x86/domain.c Thu Aug 11 10:44:59 2005 -0800 56.2 +++ b/xen/arch/x86/domain.c Thu Aug 11 12:38:44 2005 -0800 56.3 @@ -279,8 +279,6 @@ void arch_do_createdomain(struct vcpu *v 56.4 56.5 shadow_lock_init(d); 56.6 INIT_LIST_HEAD(&d->arch.free_shadow_frames); 56.7 - 56.8 - init_domain_time(d); 56.9 } 56.10 56.11 void arch_do_boot_vcpu(struct vcpu *v) 56.12 @@ -503,7 +501,10 @@ int arch_set_info_guest( 56.13 } 56.14 56.15 update_pagetables(v); 56.16 - 56.17 + 56.18 + if ( v->vcpu_id == 0 ) 56.19 + init_domain_time(d); 56.20 + 56.21 /* Don't redo final setup */ 56.22 set_bit(_VCPUF_initialised, &v->vcpu_flags); 56.23
57.1 --- a/xen/arch/x86/domain_build.c Thu Aug 11 10:44:59 2005 -0800 57.2 +++ b/xen/arch/x86/domain_build.c Thu Aug 11 12:38:44 2005 -0800 57.3 @@ -74,7 +74,7 @@ int construct_dom0(struct domain *d, 57.4 unsigned long _initrd_start, unsigned long initrd_len, 57.5 char *cmdline) 57.6 { 57.7 - int i, rc, dom0_pae, xen_pae; 57.8 + int i, rc, dom0_pae, xen_pae, order; 57.9 unsigned long pfn, mfn; 57.10 unsigned long nr_pages; 57.11 unsigned long nr_pt_pages; 57.12 @@ -143,10 +143,6 @@ int construct_dom0(struct domain *d, 57.13 nr_pages = avail_domheap_pages() + 57.14 ((initrd_len + PAGE_SIZE - 1) >> PAGE_SHIFT) + 57.15 ((image_len + PAGE_SIZE - 1) >> PAGE_SHIFT); 57.16 - if ( (page = alloc_largest(d, nr_pages)) == NULL ) 57.17 - panic("Not enough RAM for DOM0 reservation.\n"); 57.18 - alloc_spfn = page_to_pfn(page); 57.19 - alloc_epfn = alloc_spfn + d->tot_pages; 57.20 57.21 if ( (rc = parseelfimage(&dsi)) != 0 ) 57.22 return rc; 57.23 @@ -215,8 +211,15 @@ int construct_dom0(struct domain *d, 57.24 #endif 57.25 } 57.26 57.27 - if ( ((v_end - dsi.v_start) >> PAGE_SHIFT) > (alloc_epfn - alloc_spfn) ) 57.28 - panic("Insufficient contiguous RAM to build kernel image.\n"); 57.29 + order = get_order(v_end - dsi.v_start); 57.30 + if ( (1UL << order) > nr_pages ) 57.31 + panic("Domain 0 allocation is too small for kernel image.\n"); 57.32 + 57.33 + /* Allocate from DMA pool: PAE L3 table must be below 4GB boundary. */ 57.34 + if ( (page = alloc_domheap_pages(d, order, ALLOC_DOM_DMA)) == NULL ) 57.35 + panic("Not enough RAM for domain 0 allocation.\n"); 57.36 + alloc_spfn = page_to_pfn(page); 57.37 + alloc_epfn = alloc_spfn + d->tot_pages; 57.38 57.39 printk("PHYSICAL MEMORY ARRANGEMENT:\n" 57.40 " Dom0 alloc.: %"PRIphysaddr"->%"PRIphysaddr, 57.41 @@ -615,6 +618,8 @@ int construct_dom0(struct domain *d, 57.42 /* DOM0 gets access to everything. */ 57.43 physdev_init_dom0(d); 57.44 57.45 + init_domain_time(d); 57.46 + 57.47 set_bit(_DOMF_constructed, &d->domain_flags); 57.48 57.49 new_thread(v, dsi.v_kernentry, vstack_end, vstartinfo_start);
58.1 --- a/xen/arch/x86/mm.c Thu Aug 11 10:44:59 2005 -0800 58.2 +++ b/xen/arch/x86/mm.c Thu Aug 11 12:38:44 2005 -0800 58.3 @@ -95,6 +95,7 @@ 58.4 #include <xen/irq.h> 58.5 #include <xen/softirq.h> 58.6 #include <xen/domain_page.h> 58.7 +#include <xen/event.h> 58.8 #include <asm/shadow.h> 58.9 #include <asm/page.h> 58.10 #include <asm/flushtlb.h> 58.11 @@ -856,6 +857,14 @@ static int alloc_l3_table(struct pfn_inf 58.12 58.13 ASSERT(!shadow_mode_refcounts(d)); 58.14 58.15 +#ifdef CONFIG_X86_PAE 58.16 + if ( pfn >= 0x100000 ) 58.17 + { 58.18 + MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn); 58.19 + return 0; 58.20 + } 58.21 +#endif 58.22 + 58.23 pl3e = map_domain_page(pfn); 58.24 for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) 58.25 {
59.1 --- a/xen/arch/x86/setup.c Thu Aug 11 10:44:59 2005 -0800 59.2 +++ b/xen/arch/x86/setup.c Thu Aug 11 12:38:44 2005 -0800 59.3 @@ -248,10 +248,11 @@ void __init __start_xen(multiboot_info_t 59.4 { 59.5 char *cmdline; 59.6 module_t *mod = (module_t *)__va(mbi->mods_addr); 59.7 - unsigned long firsthole_start, nr_pages; 59.8 + unsigned long nr_pages, modules_length; 59.9 unsigned long initial_images_start, initial_images_end; 59.10 unsigned long _initrd_start = 0, _initrd_len = 0; 59.11 unsigned int initrdidx = 1; 59.12 + physaddr_t s, e; 59.13 struct e820entry e820_raw[E820MAX]; 59.14 int i, e820_raw_nr = 0, bytes = 0; 59.15 struct ns16550_defaults ns16550 = { 59.16 @@ -330,22 +331,31 @@ void __init __start_xen(multiboot_info_t 59.17 59.18 max_page = init_e820(e820_raw, &e820_raw_nr); 59.19 59.20 - /* Find the first high-memory RAM hole. */ 59.21 - for ( i = 0; i < e820.nr_map; i++ ) 59.22 - if ( (e820.map[i].type == E820_RAM) && 59.23 - (e820.map[i].addr >= 0x100000) ) 59.24 - break; 59.25 - firsthole_start = e820.map[i].addr + e820.map[i].size; 59.26 + modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start; 59.27 59.28 - /* Relocate the Multiboot modules. */ 59.29 - initial_images_start = xenheap_phys_end; 59.30 - initial_images_end = initial_images_start + 59.31 - (mod[mbi->mods_count-1].mod_end - mod[0].mod_start); 59.32 - if ( initial_images_end > firsthole_start ) 59.33 + /* Find a large enough RAM extent to stash the DOM0 modules. */ 59.34 + for ( i = 0; ; i++ ) 59.35 { 59.36 - printk("Not enough memory to stash the DOM0 kernel image.\n"); 59.37 - for ( ; ; ) ; 59.38 + if ( (e820.map[i].type == E820_RAM) && 59.39 + (e820.map[i].size >= modules_length) && 59.40 + ((e820.map[i].addr + e820.map[i].size) >= 59.41 + (xenheap_phys_end + modules_length)) ) 59.42 + { 59.43 + /* Stash as near as possible to the beginning of the RAM extent. */ 59.44 + initial_images_start = e820.map[i].addr; 59.45 + if ( initial_images_start < xenheap_phys_end ) 59.46 + initial_images_start = xenheap_phys_end; 59.47 + initial_images_end = initial_images_start + modules_length; 59.48 + break; 59.49 + } 59.50 + 59.51 + if ( i == e820.nr_map ) 59.52 + { 59.53 + printk("Not enough memory to stash the DOM0 kernel image.\n"); 59.54 + for ( ; ; ) ; 59.55 + } 59.56 } 59.57 + 59.58 #if defined(CONFIG_X86_32) 59.59 memmove((void *)initial_images_start, /* use low mapping */ 59.60 (void *)mod[0].mod_start, /* use low mapping */ 59.61 @@ -358,16 +368,23 @@ void __init __start_xen(multiboot_info_t 59.62 59.63 /* Initialise boot-time allocator with all RAM situated after modules. */ 59.64 xenheap_phys_start = init_boot_allocator(__pa(&_end)); 59.65 - nr_pages = 0; 59.66 + nr_pages = 0; 59.67 for ( i = 0; i < e820.nr_map; i++ ) 59.68 { 59.69 if ( e820.map[i].type != E820_RAM ) 59.70 continue; 59.71 + 59.72 nr_pages += e820.map[i].size >> PAGE_SHIFT; 59.73 - if ( (e820.map[i].addr + e820.map[i].size) >= initial_images_end ) 59.74 - init_boot_pages((e820.map[i].addr < initial_images_end) ? 59.75 - initial_images_end : e820.map[i].addr, 59.76 - e820.map[i].addr + e820.map[i].size); 59.77 + 59.78 + /* Initialise boot heap, skipping Xen heap and dom0 modules. */ 59.79 + s = e820.map[i].addr; 59.80 + e = s + e820.map[i].size; 59.81 + if ( s < xenheap_phys_end ) 59.82 + s = xenheap_phys_end; 59.83 + if ( (s < initial_images_end) && (e > initial_images_start) ) 59.84 + s = initial_images_end; 59.85 + init_boot_pages(s, e); 59.86 + 59.87 #if defined (CONFIG_X86_64) 59.88 /* 59.89 * x86/64 maps all registered RAM. Points to note: 59.90 @@ -404,10 +421,30 @@ void __init __start_xen(multiboot_info_t 59.91 59.92 end_boot_allocator(); 59.93 59.94 - init_xenheap_pages(xenheap_phys_start, xenheap_phys_end); 59.95 - printk("Xen heap: %luMB (%lukB)\n", 59.96 - (xenheap_phys_end-xenheap_phys_start) >> 20, 59.97 - (xenheap_phys_end-xenheap_phys_start) >> 10); 59.98 + /* Initialise the Xen heap, skipping RAM holes. */ 59.99 + nr_pages = 0; 59.100 + for ( i = 0; i < e820.nr_map; i++ ) 59.101 + { 59.102 + if ( e820.map[i].type != E820_RAM ) 59.103 + continue; 59.104 + 59.105 + s = e820.map[i].addr; 59.106 + e = s + e820.map[i].size; 59.107 + if ( s < xenheap_phys_start ) 59.108 + s = xenheap_phys_start; 59.109 + if ( e > xenheap_phys_end ) 59.110 + e = xenheap_phys_end; 59.111 + 59.112 + if ( s < e ) 59.113 + { 59.114 + nr_pages += (e - s) >> PAGE_SHIFT; 59.115 + init_xenheap_pages(s, e); 59.116 + } 59.117 + } 59.118 + 59.119 + printk("Xen heap: %luMB (%lukB)\n", 59.120 + nr_pages >> (20 - PAGE_SHIFT), 59.121 + nr_pages << (PAGE_SHIFT - 10)); 59.122 59.123 early_boot = 0; 59.124
60.1 --- a/xen/arch/x86/time.c Thu Aug 11 10:44:59 2005 -0800 60.2 +++ b/xen/arch/x86/time.c Thu Aug 11 12:38:44 2005 -0800 60.3 @@ -44,6 +44,7 @@ spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED 60.4 int timer_ack = 0; 60.5 unsigned long volatile jiffies; 60.6 static u32 wc_sec, wc_nsec; /* UTC time at last 'time update'. */ 60.7 +static spinlock_t wc_lock = SPIN_LOCK_UNLOCKED; 60.8 60.9 struct time_scale { 60.10 int shift; 60.11 @@ -699,13 +700,14 @@ void do_settime(unsigned long secs, unsi 60.12 struct domain *d; 60.13 shared_info_t *s; 60.14 60.15 - x = (secs * 1000000000ULL) + (u64)nsecs + system_time_base; 60.16 + x = (secs * 1000000000ULL) + (u64)nsecs - system_time_base; 60.17 y = do_div(x, 1000000000); 60.18 60.19 wc_sec = _wc_sec = (u32)x; 60.20 wc_nsec = _wc_nsec = (u32)y; 60.21 60.22 read_lock(&domlist_lock); 60.23 + spin_lock(&wc_lock); 60.24 60.25 for_each_domain ( d ) 60.26 { 60.27 @@ -716,15 +718,18 @@ void do_settime(unsigned long secs, unsi 60.28 version_update_end(&s->wc_version); 60.29 } 60.30 60.31 + spin_unlock(&wc_lock); 60.32 read_unlock(&domlist_lock); 60.33 } 60.34 60.35 void init_domain_time(struct domain *d) 60.36 { 60.37 + spin_lock(&wc_lock); 60.38 version_update_begin(&d->shared_info->wc_version); 60.39 d->shared_info->wc_sec = wc_sec; 60.40 d->shared_info->wc_nsec = wc_nsec; 60.41 version_update_end(&d->shared_info->wc_version); 60.42 + spin_unlock(&wc_lock); 60.43 } 60.44 60.45 static void local_time_calibration(void *unused)
66.1 --- a/xen/common/page_alloc.c Thu Aug 11 10:44:59 2005 -0800 66.2 +++ b/xen/common/page_alloc.c Thu Aug 11 12:38:44 2005 -0800 66.3 @@ -418,6 +418,8 @@ void init_xenheap_pages(physaddr_t ps, p 66.4 66.5 ps = round_pgup(ps); 66.6 pe = round_pgdown(pe); 66.7 + if ( pe <= ps ) 66.8 + return; 66.9 66.10 memguard_guard_range(phys_to_virt(ps), pe - ps); 66.11 66.12 @@ -487,19 +489,25 @@ void init_domheap_pages(physaddr_t ps, p 66.13 66.14 ps = round_pgup(ps) >> PAGE_SHIFT; 66.15 pe = round_pgdown(pe) >> PAGE_SHIFT; 66.16 + if ( pe <= ps ) 66.17 + return; 66.18 66.19 - if (ps < MAX_DMADOM_PFN && pe > MAX_DMADOM_PFN) { 66.20 - init_heap_pages(MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps); 66.21 - init_heap_pages(MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN), 66.22 - pe - MAX_DMADOM_PFN); 66.23 + if ( (ps < MAX_DMADOM_PFN) && (pe > MAX_DMADOM_PFN) ) 66.24 + { 66.25 + init_heap_pages( 66.26 + MEMZONE_DMADOM, pfn_to_page(ps), MAX_DMADOM_PFN - ps); 66.27 + init_heap_pages( 66.28 + MEMZONE_DOM, pfn_to_page(MAX_DMADOM_PFN), pe - MAX_DMADOM_PFN); 66.29 } 66.30 else 66.31 + { 66.32 init_heap_pages(pfn_dom_zone_type(ps), pfn_to_page(ps), pe - ps); 66.33 + } 66.34 } 66.35 66.36 66.37 -struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order, 66.38 - unsigned int flags) 66.39 +struct pfn_info *alloc_domheap_pages( 66.40 + struct domain *d, unsigned int order, unsigned int flags) 66.41 { 66.42 struct pfn_info *pg; 66.43 cpumask_t mask;
67.1 --- a/xen/include/asm-x86/event.h Thu Aug 11 10:44:59 2005 -0800 67.2 +++ b/xen/include/asm-x86/event.h Thu Aug 11 12:38:44 2005 -0800 67.3 @@ -11,6 +11,19 @@ 67.4 67.5 static inline void evtchn_notify(struct vcpu *v) 67.6 { 67.7 + /* 67.8 + * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of 67.9 + * pending flag. These values may fluctuate (after all, we hold no 67.10 + * locks) but the key insight is that each change will cause 67.11 + * evtchn_upcall_pending to be polled. 67.12 + * 67.13 + * NB2. We save VCPUF_running across the unblock to avoid a needless 67.14 + * IPI for domains that we IPI'd to unblock. 67.15 + */ 67.16 + int running = test_bit(_VCPUF_running, &v->vcpu_flags); 67.17 + vcpu_unblock(v); 67.18 + if ( running ) 67.19 + smp_send_event_check_cpu(v->processor); 67.20 } 67.21 67.22 #endif
68.1 --- a/xen/include/asm-x86/page.h Thu Aug 11 10:44:59 2005 -0800 68.2 +++ b/xen/include/asm-x86/page.h Thu Aug 11 12:38:44 2005 -0800 68.3 @@ -283,13 +283,9 @@ extern void paging_init(void); 68.4 static __inline__ int get_order(unsigned long size) 68.5 { 68.6 int order; 68.7 - 68.8 - size = (size-1) >> (PAGE_SHIFT-1); 68.9 - order = -1; 68.10 - do { 68.11 + size = (size-1) >> PAGE_SHIFT; 68.12 + for ( order = 0; size; order++ ) 68.13 size >>= 1; 68.14 - order++; 68.15 - } while (size); 68.16 return order; 68.17 } 68.18
75.1 --- a/xen/include/xen/event.h Thu Aug 11 10:44:59 2005 -0800 75.2 +++ b/xen/include/xen/event.h Thu Aug 11 12:38:44 2005 -0800 75.3 @@ -26,30 +26,14 @@ static inline void evtchn_set_pending(st 75.4 { 75.5 struct domain *d = v->domain; 75.6 shared_info_t *s = d->shared_info; 75.7 - int running; 75.8 75.9 - /* These three operations must happen in strict order. */ 75.10 + /* These four operations must happen in strict order. */ 75.11 if ( !test_and_set_bit(port, &s->evtchn_pending[0]) && 75.12 !test_bit (port, &s->evtchn_mask[0]) && 75.13 - !test_and_set_bit(port>>5, &v->vcpu_info->evtchn_pending_sel) ) 75.14 + !test_and_set_bit(port>>5, &v->vcpu_info->evtchn_pending_sel) && 75.15 + !test_and_set_bit(0, &v->vcpu_info->evtchn_upcall_pending) ) 75.16 { 75.17 - /* The VCPU pending flag must be set /after/ update to evtchn-pend. */ 75.18 - set_bit(0, &v->vcpu_info->evtchn_upcall_pending); 75.19 evtchn_notify(v); 75.20 - 75.21 - /* 75.22 - * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of 75.23 - * pending flag. These values may fluctuate (after all, we hold no 75.24 - * locks) but the key insight is that each change will cause 75.25 - * evtchn_upcall_pending to be polled. 75.26 - * 75.27 - * NB2. We save VCPUF_running across the unblock to avoid a needless 75.28 - * IPI for domains that we IPI'd to unblock. 75.29 - */ 75.30 - running = test_bit(_VCPUF_running, &v->vcpu_flags); 75.31 - vcpu_unblock(v); 75.32 - if ( running ) 75.33 - smp_send_event_check_cpu(v->processor); 75.34 } 75.35 } 75.36 75.37 @@ -73,8 +57,9 @@ static inline void send_guest_virq(struc 75.38 */ 75.39 extern void send_guest_pirq(struct domain *d, int pirq); 75.40 75.41 -#define event_pending(_d) \ 75.42 - ((_d)->vcpu_info->evtchn_upcall_pending && \ 75.43 - !(_d)->vcpu_info->evtchn_upcall_mask) 75.44 +/* Note: Bitwise operations result in fast code with no branches. */ 75.45 +#define event_pending(v) \ 75.46 + ((v)->vcpu_info->evtchn_upcall_pending & \ 75.47 + ~(v)->vcpu_info->evtchn_upcall_mask) 75.48 75.49 #endif /* __XEN_EVENT_H__ */
76.1 --- a/xen/include/xen/sched.h Thu Aug 11 10:44:59 2005 -0800 76.2 +++ b/xen/include/xen/sched.h Thu Aug 11 12:38:44 2005 -0800 76.3 @@ -297,10 +297,9 @@ unsigned long __hypercall_create_continu 76.4 (unsigned long)(_a1), (unsigned long)(_a2), (unsigned long)(_a3), \ 76.5 (unsigned long)(_a4), (unsigned long)(_a5), (unsigned long)(_a6)) 76.6 76.7 -#define hypercall_preempt_check() (unlikely( \ 76.8 - softirq_pending(smp_processor_id()) | \ 76.9 - (!!current->vcpu_info->evtchn_upcall_pending & \ 76.10 - !current->vcpu_info->evtchn_upcall_mask) \ 76.11 +#define hypercall_preempt_check() (unlikely( \ 76.12 + softirq_pending(smp_processor_id()) | \ 76.13 + event_pending(current) \ 76.14 )) 76.15 76.16 /* This domain_hash and domain_list are protected by the domlist_lock. */
78.1 --- a/xen/tools/symbols.c Thu Aug 11 10:44:59 2005 -0800 78.2 +++ b/xen/tools/symbols.c Thu Aug 11 12:38:44 2005 -0800 78.3 @@ -311,7 +311,7 @@ write_src(void) 78.4 off = 0; 78.5 for (i = 0; i < cnt; i++) { 78.6 78.7 - if (!table[i].flags & SYM_FLAG_VALID) 78.8 + if (!(table[i].flags & SYM_FLAG_VALID)) 78.9 continue; 78.10 78.11 if ((valid & 0xFF) == 0)