direct-io.hg
changeset 6524:3a8f27c6d56c
Merge.
line diff
3.1 --- a/Makefile Fri Aug 19 16:47:24 2005 -0800 3.2 +++ b/Makefile Mon Aug 22 11:00:37 2005 -0700 3.3 @@ -172,6 +172,12 @@ uninstall: 3.4 rm -rf $(D)/usr/bin/xen* $(D)/usr/bin/miniterm 3.5 rm -rf $(D)/boot/*xen* 3.6 rm -rf $(D)/lib/modules/*xen* 3.7 + rm -rf $(D)/usr/bin/cpuperf-perfcntr $(D)/usr/bin/cpuperf-xen 3.8 + rm -rf $(D)/usr/bin/xc_shadow 3.9 + rm -rf $(D)/usr/share/xen $(D)/usr/libexec/xen 3.10 + rm -rf $(D)/usr/share/man/man1/xen* 3.11 + rm -rf $(D)/usr/share/man/man8/xen* 3.12 + rm -rf $(D)/usr/lib/xen 3.13 3.14 # Legacy targets for compatibility 3.15 linux24:
4.1 --- a/buildconfigs/Rules.mk Fri Aug 19 16:47:24 2005 -0800 4.2 +++ b/buildconfigs/Rules.mk Mon Aug 22 11:00:37 2005 -0700 4.3 @@ -66,6 +66,7 @@ pristine-%/.valid-pristine: %.tar.bz2 4.4 4.5 PATCHDIRS := $(wildcard patches/*-*) 4.6 4.7 +ifneq ($(PATCHDIRS),) 4.8 -include $(patsubst %,%/.makedep,$(PATCHDIRS)) 4.9 4.10 $(patsubst patches/%,patches/%/.makedep,$(PATCHDIRS)): patches/%/.makedep: 4.11 @@ -80,6 +81,7 @@ ref-%/.valid-ref: pristine-%/.valid-pris 4.12 ([ -d patches/$* ] && \ 4.13 for i in patches/$*/*.patch ; do ( cd $(@D) ; patch -p1 <../$$i || exit 1 ) ; done) || true 4.14 touch $@ # update timestamp to avoid rebuild 4.15 +endif 4.16 4.17 %-build: 4.18 $(MAKE) -f buildconfigs/mk.$* build
9.1 --- a/extras/mini-os/include/time.h Fri Aug 19 16:47:24 2005 -0800 9.2 +++ b/extras/mini-os/include/time.h Mon Aug 22 11:00:37 2005 -0700 9.3 @@ -28,7 +28,7 @@ 9.4 * of real time into system time 9.5 */ 9.6 typedef s64 s_time_t; 9.7 -#define NOW() ((s_time_t)get_s_time()) 9.8 +#define NOW() ((s_time_t)monotonic_clock()) 9.9 #define SECONDS(_s) (((s_time_t)(_s)) * 1000000000UL ) 9.10 #define TENTHS(_ts) (((s_time_t)(_ts)) * 100000000UL ) 9.11 #define HUNDREDTHS(_hs) (((s_time_t)(_hs)) * 10000000UL ) 9.12 @@ -36,7 +36,8 @@ typedef s64 s_time_t; 9.13 #define MICROSECS(_us) (((s_time_t)(_us)) * 1000UL ) 9.14 #define Time_Max ((s_time_t) 0x7fffffffffffffffLL) 9.15 #define FOREVER Time_Max 9.16 - 9.17 +#define NSEC_TO_USEC(_nsec) (_nsec / 1000UL) 9.18 +#define NSEC_TO_SEC(_nsec) (_nsec / 1000000000ULL) 9.19 9.20 /* wall clock time */ 9.21 typedef long time_t; 9.22 @@ -46,6 +47,11 @@ struct timeval { 9.23 suseconds_t tv_usec; /* microseconds */ 9.24 }; 9.25 9.26 +struct timespec { 9.27 + time_t ts_sec; 9.28 + long ts_nsec; 9.29 +}; 9.30 + 9.31 9.32 /* prototypes */ 9.33 void init_time(void);
10.1 --- a/extras/mini-os/kernel.c Fri Aug 19 16:47:24 2005 -0800 10.2 +++ b/extras/mini-os/kernel.c Mon Aug 22 11:00:37 2005 -0700 10.3 @@ -132,20 +132,6 @@ void start_kernel(start_info_t *si) 10.4 i = 0; 10.5 for ( ; ; ) 10.6 { 10.7 - if(i >= 1000) 10.8 - { 10.9 - { 10.10 - unsigned long saved; 10.11 - __asm__ ("movl %%esp, %0" 10.12 - :"=r"(saved) /* y is output operand */ 10.13 - /* x is input operand */); 10.14 -// :"a"); /* %eax is clobbered register */ 10.15 - printk("ESP=0x%lx\n", saved); 10.16 - } 10.17 - 10.18 - printk("1000 bloks\n"); 10.19 - i=0; 10.20 - } 10.21 // HYPERVISOR_yield(); 10.22 block(1); 10.23 i++;
11.1 --- a/extras/mini-os/time.c Fri Aug 19 16:47:24 2005 -0800 11.2 +++ b/extras/mini-os/time.c Mon Aug 22 11:00:37 2005 -0700 11.3 @@ -43,19 +43,20 @@ 11.4 * Time functions 11.5 *************************************************************************/ 11.6 11.7 -/* Cached *multiplier* to convert TSC counts to microseconds. 11.8 - * (see the equation below). 11.9 - * Equal to 2^32 * (1 / (clocks per usec) ). 11.10 - * Initialized in time_init. 11.11 - */ 11.12 -static unsigned long fast_gettimeoffset_quotient; 11.13 +/* These are peridically updated in shared_info, and then copied here. */ 11.14 +struct shadow_time_info { 11.15 + u64 tsc_timestamp; /* TSC at last update of time vals. */ 11.16 + u64 system_timestamp; /* Time, in nanosecs, since boot. */ 11.17 + u32 tsc_to_nsec_mul; 11.18 + u32 tsc_to_usec_mul; 11.19 + int tsc_shift; 11.20 + u32 version; 11.21 +}; 11.22 +static struct timespec shadow_ts; 11.23 +static u32 shadow_ts_version; 11.24 11.25 +static struct shadow_time_info shadow; 11.26 11.27 -/* These are peridically updated in shared_info, and then copied here. */ 11.28 -static u32 shadow_tsc_stamp; 11.29 -static s64 shadow_system_time; 11.30 -static u32 shadow_time_version; 11.31 -static struct timeval shadow_tv; 11.32 11.33 #ifndef rmb 11.34 #define rmb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") 11.35 @@ -63,116 +64,150 @@ static struct timeval shadow_tv; 11.36 11.37 #define HANDLE_USEC_OVERFLOW(_tv) \ 11.38 do { \ 11.39 - while ( (_tv).tv_usec >= 1000000 ) \ 11.40 + while ( (_tv)->tv_usec >= 1000000 ) \ 11.41 { \ 11.42 - (_tv).tv_usec -= 1000000; \ 11.43 - (_tv).tv_sec++; \ 11.44 + (_tv)->tv_usec -= 1000000; \ 11.45 + (_tv)->tv_sec++; \ 11.46 } \ 11.47 } while ( 0 ) 11.48 11.49 -static void get_time_values_from_xen(void) 11.50 +static inline int time_values_up_to_date(void) 11.51 +{ 11.52 + struct vcpu_time_info *src = &HYPERVISOR_shared_info->vcpu_time[0]; 11.53 + 11.54 + return (shadow.version == src->version); 11.55 +} 11.56 + 11.57 + 11.58 +/* 11.59 + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, 11.60 + * yielding a 64-bit result. 11.61 + */ 11.62 +static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) 11.63 { 11.64 - do { 11.65 - shadow_time_version = HYPERVISOR_shared_info->time_version2; 11.66 - rmb(); 11.67 - shadow_tv.tv_sec = HYPERVISOR_shared_info->wc_sec; 11.68 - shadow_tv.tv_usec = HYPERVISOR_shared_info->wc_usec; 11.69 - shadow_tsc_stamp = (u32)HYPERVISOR_shared_info->tsc_timestamp; 11.70 - shadow_system_time = HYPERVISOR_shared_info->system_time; 11.71 - rmb(); 11.72 - } 11.73 - while ( shadow_time_version != HYPERVISOR_shared_info->time_version1 ); 11.74 + u64 product; 11.75 +#ifdef __i386__ 11.76 + u32 tmp1, tmp2; 11.77 +#endif 11.78 + 11.79 + if ( shift < 0 ) 11.80 + delta >>= -shift; 11.81 + else 11.82 + delta <<= shift; 11.83 + 11.84 +#ifdef __i386__ 11.85 + __asm__ ( 11.86 + "mul %5 ; " 11.87 + "mov %4,%%eax ; " 11.88 + "mov %%edx,%4 ; " 11.89 + "mul %5 ; " 11.90 + "add %4,%%eax ; " 11.91 + "xor %5,%5 ; " 11.92 + "adc %5,%%edx ; " 11.93 + : "=A" (product), "=r" (tmp1), "=r" (tmp2) 11.94 + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); 11.95 +#else 11.96 + __asm__ ( 11.97 + "mul %%rdx ; shrd $32,%%rdx,%%rax" 11.98 + : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); 11.99 +#endif 11.100 + 11.101 + return product; 11.102 +} 11.103 + 11.104 + 11.105 +static unsigned long get_nsec_offset(void) 11.106 +{ 11.107 + u64 now, delta; 11.108 + rdtscll(now); 11.109 + delta = now - shadow.tsc_timestamp; 11.110 + return scale_delta(delta, shadow.tsc_to_nsec_mul, shadow.tsc_shift); 11.111 } 11.112 11.113 11.114 -#define TIME_VALUES_UP_TO_DATE \ 11.115 - (shadow_time_version == HYPERVISOR_shared_info->time_version2) 11.116 - 11.117 -static u32 get_time_delta_usecs(void) 11.118 +static void get_time_values_from_xen(void) 11.119 { 11.120 - register unsigned long eax, edx; 11.121 - 11.122 - /* Read the Time Stamp Counter */ 11.123 - 11.124 - rdtsc(eax,edx); 11.125 - 11.126 - /* .. relative to previous jiffy (32 bits is enough) */ 11.127 - eax -= shadow_tsc_stamp; 11.128 + struct vcpu_time_info *src = &HYPERVISOR_shared_info->vcpu_time[0]; 11.129 11.130 - /* 11.131 - * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient 11.132 - * = (tsc_low delta) * (usecs_per_clock) 11.133 - * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) 11.134 - * 11.135 - * Using a mull instead of a divl saves up to 31 clock cycles 11.136 - * in the critical path. 11.137 - */ 11.138 + do { 11.139 + shadow.version = src->version; 11.140 + rmb(); 11.141 + shadow.tsc_timestamp = src->tsc_timestamp; 11.142 + shadow.system_timestamp = src->system_time; 11.143 + shadow.tsc_to_nsec_mul = src->tsc_to_system_mul; 11.144 + shadow.tsc_shift = src->tsc_shift; 11.145 + rmb(); 11.146 + } 11.147 + while ((src->version & 1) | (shadow.version ^ src->version)); 11.148 11.149 - __asm__("mull %2" 11.150 - :"=a" (eax), "=d" (edx) 11.151 - :"rm" (fast_gettimeoffset_quotient), 11.152 - "0" (eax)); 11.153 - 11.154 - /* our adjusted time offset in microseconds */ 11.155 - return edx; 11.156 + shadow.tsc_to_usec_mul = shadow.tsc_to_nsec_mul / 1000; 11.157 } 11.158 11.159 -s64 get_s_time (void) 11.160 -{ 11.161 - u64 u_delta; 11.162 - s64 ret; 11.163 + 11.164 + 11.165 11.166 - again: 11.167 - 11.168 - u_delta = get_time_delta_usecs(); 11.169 - ret = shadow_system_time + (1000 * u_delta); 11.170 +/* monotonic_clock(): returns # of nanoseconds passed since time_init() 11.171 + * Note: This function is required to return accurate 11.172 + * time even in the absence of multiple timer ticks. 11.173 + */ 11.174 +u64 monotonic_clock(void) 11.175 +{ 11.176 + u64 time; 11.177 + u32 local_time_version; 11.178 11.179 - if ( unlikely(!TIME_VALUES_UP_TO_DATE) ) 11.180 - { 11.181 - /* 11.182 - * We may have blocked for a long time, rendering our calculations 11.183 - * invalid (e.g. the time delta may have overflowed). Detect that 11.184 - * and recalculate with fresh values. 11.185 - */ 11.186 - get_time_values_from_xen(); 11.187 - goto again; 11.188 - } 11.189 + do { 11.190 + local_time_version = shadow.version; 11.191 + rmb(); 11.192 + time = shadow.system_timestamp + get_nsec_offset(); 11.193 + if (!time_values_up_to_date()) 11.194 + get_time_values_from_xen(); 11.195 + rmb(); 11.196 + } while (local_time_version != shadow.version); 11.197 + 11.198 + return time; 11.199 +} 11.200 11.201 - return ret; 11.202 +static void update_wallclock(void) 11.203 +{ 11.204 + shared_info_t *s = HYPERVISOR_shared_info; 11.205 + 11.206 + do { 11.207 + shadow_ts_version = s->wc_version; 11.208 + rmb(); 11.209 + shadow_ts.ts_sec = s->wc_sec; 11.210 + shadow_ts.ts_nsec = s->wc_nsec; 11.211 + rmb(); 11.212 + } 11.213 + while ((s->wc_version & 1) | (shadow_ts_version ^ s->wc_version)); 11.214 } 11.215 11.216 + 11.217 void gettimeofday(struct timeval *tv) 11.218 { 11.219 - struct timeval _tv; 11.220 + u64 nsec = monotonic_clock(); 11.221 + nsec += shadow_ts.ts_nsec; 11.222 + 11.223 + 11.224 + tv->tv_sec = shadow_ts.ts_sec; 11.225 + tv->tv_sec += NSEC_TO_SEC(nsec); 11.226 + tv->tv_usec = NSEC_TO_USEC(nsec % 1000000000UL); 11.227 +} 11.228 11.229 - do { 11.230 - get_time_values_from_xen(); 11.231 - _tv.tv_usec = get_time_delta_usecs(); 11.232 - _tv.tv_sec = shadow_tv.tv_sec; 11.233 - _tv.tv_usec += shadow_tv.tv_usec; 11.234 - } 11.235 - while ( unlikely(!TIME_VALUES_UP_TO_DATE) ); 11.236 - 11.237 - HANDLE_USEC_OVERFLOW(_tv); 11.238 - *tv = _tv; 11.239 -} 11.240 11.241 static void print_current_time(void) 11.242 { 11.243 - struct timeval tv; 11.244 - 11.245 - get_time_values_from_xen(); 11.246 + struct timeval tv; 11.247 11.248 gettimeofday(&tv); 11.249 printk("T(s=%ld us=%ld)\n", tv.tv_sec, tv.tv_usec); 11.250 } 11.251 11.252 + 11.253 void block(u32 millisecs) 11.254 { 11.255 struct timeval tv; 11.256 gettimeofday(&tv); 11.257 - //printk("tv.tv_sec=%ld, tv.tv_usec=%ld, shadow_system_time=%lld\n", tv.tv_sec, tv.tv_usec, shadow_system_time ); 11.258 - HYPERVISOR_set_timer_op(get_s_time() + 1000000LL * (s64) millisecs); 11.259 + HYPERVISOR_set_timer_op(monotonic_clock() + 1000000LL * (s64) millisecs); 11.260 HYPERVISOR_block(); 11.261 } 11.262 11.263 @@ -185,7 +220,7 @@ static void timer_handler(int ev, struct 11.264 static int i; 11.265 11.266 get_time_values_from_xen(); 11.267 - 11.268 + update_wallclock(); 11.269 i++; 11.270 if (i >= 1000) { 11.271 print_current_time(); 11.272 @@ -197,24 +232,5 @@ static void timer_handler(int ev, struct 11.273 11.274 void init_time(void) 11.275 { 11.276 - u64 __cpu_khz; 11.277 - unsigned long cpu_khz; 11.278 - 11.279 - __cpu_khz = HYPERVISOR_shared_info->cpu_freq; 11.280 - 11.281 - cpu_khz = (u32) (__cpu_khz/1000); 11.282 - 11.283 - printk("Xen reported: %lu.%03lu MHz processor.\n", 11.284 - cpu_khz / 1000, cpu_khz % 1000); 11.285 - /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz = 11.286 - (2^32 * 1 / (clocks/us)) */ 11.287 - { 11.288 - unsigned long eax=0, edx=1000; 11.289 - __asm__("divl %2" 11.290 - :"=a" (fast_gettimeoffset_quotient), "=d" (edx) 11.291 - :"r" (cpu_khz), 11.292 - "0" (eax), "1" (edx)); 11.293 - } 11.294 - 11.295 bind_virq(VIRQ_TIMER, &timer_handler); 11.296 }
39.1 --- a/linux-2.6-xen-sparse/arch/xen/Kconfig Fri Aug 19 16:47:24 2005 -0800 39.2 +++ b/linux-2.6-xen-sparse/arch/xen/Kconfig Mon Aug 22 11:00:37 2005 -0700 39.3 @@ -61,15 +61,6 @@ config XEN_BLKDEV_TAP_BE 39.4 with the blktap. This option will be removed as the block drivers are 39.5 modified to use grant tables. 39.6 39.7 -config XEN_BLKDEV_GRANT 39.8 - bool "Grant table substrate for block drivers" 39.9 - depends on !XEN_BLKDEV_TAP_BE 39.10 - default y 39.11 - help 39.12 - This introduces the use of grant tables as a data exhange mechanism 39.13 - between the frontend and backend block drivers. This currently 39.14 - conflicts with the block tap. 39.15 - 39.16 config XEN_NETDEV_BACKEND 39.17 bool "Network-device backend driver" 39.18 depends on XEN_PHYSDEV_ACCESS
40.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Fri Aug 19 16:47:24 2005 -0800 40.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 Mon Aug 22 11:00:37 2005 -0700 40.3 @@ -14,7 +14,6 @@ CONFIG_XEN_PRIVILEGED_GUEST=y 40.4 CONFIG_XEN_PHYSDEV_ACCESS=y 40.5 CONFIG_XEN_BLKDEV_BACKEND=y 40.6 # CONFIG_XEN_BLKDEV_TAP_BE is not set 40.7 -CONFIG_XEN_BLKDEV_GRANT=y 40.8 CONFIG_XEN_NETDEV_BACKEND=y 40.9 CONFIG_XEN_BLKDEV_FRONTEND=y 40.10 CONFIG_XEN_NETDEV_FRONTEND=y
41.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Fri Aug 19 16:47:24 2005 -0800 41.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 Mon Aug 22 11:00:37 2005 -0700 41.3 @@ -14,7 +14,6 @@ CONFIG_XEN_PRIVILEGED_GUEST=y 41.4 CONFIG_XEN_PHYSDEV_ACCESS=y 41.5 CONFIG_XEN_BLKDEV_BACKEND=y 41.6 # CONFIG_XEN_BLKDEV_TAP_BE is not set 41.7 -CONFIG_XEN_BLKDEV_GRANT=y 41.8 CONFIG_XEN_NETDEV_BACKEND=y 41.9 CONFIG_XEN_BLKDEV_FRONTEND=y 41.10 CONFIG_XEN_NETDEV_FRONTEND=y
42.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Fri Aug 19 16:47:24 2005 -0800 42.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 Mon Aug 22 11:00:37 2005 -0700 42.3 @@ -12,7 +12,6 @@ CONFIG_NO_IDLE_HZ=y 42.4 # 42.5 # CONFIG_XEN_PRIVILEGED_GUEST is not set 42.6 # CONFIG_XEN_PHYSDEV_ACCESS is not set 42.7 -CONFIG_XEN_BLKDEV_GRANT=y 42.8 CONFIG_XEN_BLKDEV_FRONTEND=y 42.9 CONFIG_XEN_NETDEV_FRONTEND=y 42.10 CONFIG_XEN_NETDEV_GRANT_TX=y
43.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Fri Aug 19 16:47:24 2005 -0800 43.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 Mon Aug 22 11:00:37 2005 -0700 43.3 @@ -12,7 +12,6 @@ CONFIG_NO_IDLE_HZ=y 43.4 # 43.5 # CONFIG_XEN_PRIVILEGED_GUEST is not set 43.6 # CONFIG_XEN_PHYSDEV_ACCESS is not set 43.7 -CONFIG_XEN_BLKDEV_GRANT=y 43.8 CONFIG_XEN_BLKDEV_FRONTEND=y 43.9 CONFIG_XEN_NETDEV_FRONTEND=y 43.10 CONFIG_XEN_NETDEV_GRANT_TX=y
44.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Fri Aug 19 16:47:24 2005 -0800 44.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_32 Mon Aug 22 11:00:37 2005 -0700 44.3 @@ -14,7 +14,6 @@ CONFIG_XEN_PRIVILEGED_GUEST=y 44.4 CONFIG_XEN_PHYSDEV_ACCESS=y 44.5 CONFIG_XEN_BLKDEV_BACKEND=y 44.6 # CONFIG_XEN_BLKDEV_TAP_BE is not set 44.7 -CONFIG_XEN_BLKDEV_GRANT=y 44.8 CONFIG_XEN_NETDEV_BACKEND=y 44.9 CONFIG_XEN_BLKDEV_FRONTEND=y 44.10 CONFIG_XEN_NETDEV_FRONTEND=y
45.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Fri Aug 19 16:47:24 2005 -0800 45.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen_defconfig_x86_64 Mon Aug 22 11:00:37 2005 -0700 45.3 @@ -14,7 +14,6 @@ CONFIG_XEN_PRIVILEGED_GUEST=y 45.4 CONFIG_XEN_PHYSDEV_ACCESS=y 45.5 CONFIG_XEN_BLKDEV_BACKEND=y 45.6 # CONFIG_XEN_BLKDEV_TAP_BE is not set 45.7 -CONFIG_XEN_BLKDEV_GRANT=y 45.8 CONFIG_XEN_NETDEV_BACKEND=y 45.9 CONFIG_XEN_BLKDEV_FRONTEND=y 45.10 CONFIG_XEN_NETDEV_FRONTEND=y
61.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Fri Aug 19 16:47:24 2005 -0800 61.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c Mon Aug 22 11:00:37 2005 -0700 61.3 @@ -923,7 +923,8 @@ static int __init do_boot_cpu(int apicid 61.4 ctxt.ctrlreg[3] = virt_to_mfn(swapper_pg_dir) << PAGE_SHIFT; 61.5 61.6 boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt); 61.7 - printk("boot error: %ld\n", boot_error); 61.8 + if (boot_error) 61.9 + printk("boot error: %ld\n", boot_error); 61.10 61.11 if (!boot_error) { 61.12 /*
62.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Fri Aug 19 16:47:24 2005 -0800 62.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c Mon Aug 22 11:00:37 2005 -0700 62.3 @@ -43,20 +43,18 @@ 62.4 #define IO_TLB_SHIFT 11 62.5 62.6 int swiotlb_force; 62.7 +static char *iotlb_virt_start; 62.8 +static unsigned long iotlb_nslabs; 62.9 62.10 /* 62.11 * Used to do a quick range check in swiotlb_unmap_single and 62.12 * swiotlb_sync_single_*, to see if the memory was in fact allocated by this 62.13 * API. 62.14 */ 62.15 -static char *iotlb_virt_start, *iotlb_virt_end; 62.16 -static dma_addr_t iotlb_bus_start, iotlb_bus_end; 62.17 +static dma_addr_t iotlb_bus_start, iotlb_bus_mask; 62.18 62.19 -/* 62.20 - * The number of IO TLB blocks (in groups of 64) betweeen iotlb_virt_start and 62.21 - * iotlb_virt_end. This is command line adjustable via setup_io_tlb_npages. 62.22 - */ 62.23 -static unsigned long iotlb_nslabs; 62.24 +/* Does the given dma address reside within the swiotlb aperture? */ 62.25 +#define in_swiotlb_aperture(a) (!(((a) ^ iotlb_bus_start) & iotlb_bus_mask)) 62.26 62.27 /* 62.28 * When the IOMMU overflows we return a fallback buffer. This sets the size. 62.29 @@ -94,6 +92,9 @@ setup_io_tlb_npages(char *str) 62.30 iotlb_nslabs = simple_strtoul(str, &str, 0) << 62.31 (20 - IO_TLB_SHIFT); 62.32 iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE); 62.33 + /* Round up to power of two (xen_create_contiguous_region). */ 62.34 + while (iotlb_nslabs & (iotlb_nslabs-1)) 62.35 + iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1); 62.36 } 62.37 if (*str == ',') 62.38 ++str; 62.39 @@ -120,6 +121,9 @@ swiotlb_init_with_default_size (size_t d 62.40 if (!iotlb_nslabs) { 62.41 iotlb_nslabs = (default_size >> IO_TLB_SHIFT); 62.42 iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE); 62.43 + /* Round up to power of two (xen_create_contiguous_region). */ 62.44 + while (iotlb_nslabs & (iotlb_nslabs-1)) 62.45 + iotlb_nslabs += iotlb_nslabs & ~(iotlb_nslabs-1); 62.46 } 62.47 62.48 bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT); 62.49 @@ -133,17 +137,12 @@ swiotlb_init_with_default_size (size_t d 62.50 "Use dom0_mem Xen boot parameter to reserve\n" 62.51 "some DMA memory (e.g., dom0_mem=-128M).\n"); 62.52 62.53 - for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) 62.54 - xen_create_contiguous_region( 62.55 - (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT), 62.56 - get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT)); 62.57 - 62.58 - iotlb_virt_end = iotlb_virt_start + bytes; 62.59 + xen_create_contiguous_region( 62.60 + (unsigned long)iotlb_virt_start, get_order(bytes)); 62.61 62.62 /* 62.63 * Allocate and initialize the free list array. This array is used 62.64 - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE 62.65 - * between iotlb_virt_start and iotlb_virt_end. 62.66 + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE. 62.67 */ 62.68 io_tlb_list = alloc_bootmem(iotlb_nslabs * sizeof(int)); 62.69 for (i = 0; i < iotlb_nslabs; i++) 62.70 @@ -156,15 +155,19 @@ swiotlb_init_with_default_size (size_t d 62.71 * Get the overflow emergency buffer 62.72 */ 62.73 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); 62.74 + 62.75 iotlb_bus_start = virt_to_bus(iotlb_virt_start); 62.76 - iotlb_bus_end = iotlb_bus_start + bytes; 62.77 + iotlb_bus_mask = ~(dma_addr_t)(bytes - 1); 62.78 + 62.79 printk(KERN_INFO "Software IO TLB enabled: \n" 62.80 " Aperture: %lu megabytes\n" 62.81 " Bus range: 0x%016lx - 0x%016lx\n" 62.82 " Kernel range: 0x%016lx - 0x%016lx\n", 62.83 bytes >> 20, 62.84 - (unsigned long)iotlb_bus_start, (unsigned long)iotlb_bus_end, 62.85 - (unsigned long)iotlb_virt_start, (unsigned long)iotlb_virt_end); 62.86 + (unsigned long)iotlb_bus_start, 62.87 + (unsigned long)iotlb_bus_start + bytes, 62.88 + (unsigned long)iotlb_virt_start, 62.89 + (unsigned long)iotlb_virt_start + bytes); 62.90 } 62.91 62.92 void 62.93 @@ -444,7 +447,7 @@ swiotlb_unmap_single(struct device *hwde 62.94 int dir) 62.95 { 62.96 BUG_ON(dir == DMA_NONE); 62.97 - if ((dev_addr >= iotlb_bus_start) && (dev_addr < iotlb_bus_end)) 62.98 + if (in_swiotlb_aperture(dev_addr)) 62.99 unmap_single(hwdev, bus_to_virt(dev_addr), size, dir); 62.100 } 62.101 62.102 @@ -463,7 +466,7 @@ swiotlb_sync_single_for_cpu(struct devic 62.103 size_t size, int dir) 62.104 { 62.105 BUG_ON(dir == DMA_NONE); 62.106 - if ((dev_addr >= iotlb_bus_start) && (dev_addr < iotlb_bus_end)) 62.107 + if (in_swiotlb_aperture(dev_addr)) 62.108 sync_single(hwdev, bus_to_virt(dev_addr), size, dir); 62.109 } 62.110 62.111 @@ -472,7 +475,7 @@ swiotlb_sync_single_for_device(struct de 62.112 size_t size, int dir) 62.113 { 62.114 BUG_ON(dir == DMA_NONE); 62.115 - if ((dev_addr >= iotlb_bus_start) && (dev_addr < iotlb_bus_end)) 62.116 + if (in_swiotlb_aperture(dev_addr)) 62.117 sync_single(hwdev, bus_to_virt(dev_addr), size, dir); 62.118 } 62.119 62.120 @@ -610,7 +613,7 @@ swiotlb_unmap_page(struct device *hwdev, 62.121 size_t size, enum dma_data_direction direction) 62.122 { 62.123 BUG_ON(direction == DMA_NONE); 62.124 - if ((dma_address >= iotlb_bus_start) && (dma_address < iotlb_bus_end)) 62.125 + if (in_swiotlb_aperture(dma_address)) 62.126 unmap_single(hwdev, bus_to_virt(dma_address), size, direction); 62.127 } 62.128
67.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Fri Aug 19 16:47:24 2005 -0800 67.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Mon Aug 22 11:00:37 2005 -0700 67.3 @@ -405,54 +405,6 @@ void xen_destroy_contiguous_region(unsig 67.4 balloon_unlock(flags); 67.5 } 67.6 67.7 - 67.8 -unsigned long allocate_empty_lowmem_region(unsigned long pages) 67.9 -{ 67.10 - pgd_t *pgd; 67.11 - pud_t *pud; 67.12 - pmd_t *pmd; 67.13 - pte_t *pte; 67.14 - unsigned long *pfn_array; 67.15 - unsigned long vstart; 67.16 - unsigned long i; 67.17 - unsigned int order = get_order(pages*PAGE_SIZE); 67.18 - 67.19 - vstart = __get_free_pages(GFP_KERNEL, order); 67.20 - if (vstart == 0) 67.21 - return 0UL; 67.22 - 67.23 - scrub_pages(vstart, 1 << order); 67.24 - 67.25 - pfn_array = vmalloc((1<<order) * sizeof(*pfn_array)); 67.26 - BUG_ON(pfn_array == NULL); 67.27 - 67.28 - for (i = 0; i < (1<<order); i++) { 67.29 - pgd = pgd_offset_k( (vstart + (i*PAGE_SIZE))); 67.30 - pud = pud_offset(pgd, (vstart + (i*PAGE_SIZE))); 67.31 - pmd = pmd_offset(pud, (vstart + (i*PAGE_SIZE))); 67.32 - pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); 67.33 - pfn_array[i] = pte_mfn(*pte); 67.34 -#ifdef CONFIG_X86_64 67.35 - xen_l1_entry_update(pte, __pte(0)); 67.36 -#else 67.37 - BUG_ON(HYPERVISOR_update_va_mapping(vstart + (i*PAGE_SIZE), 67.38 - __pte_ma(0), 0)); 67.39 -#endif 67.40 - phys_to_machine_mapping[(__pa(vstart)>>PAGE_SHIFT)+i] = 67.41 - INVALID_P2M_ENTRY; 67.42 - } 67.43 - 67.44 - flush_tlb_all(); 67.45 - 67.46 - balloon_put_pages(pfn_array, 1 << order); 67.47 - 67.48 - vfree(pfn_array); 67.49 - 67.50 - return vstart; 67.51 -} 67.52 - 67.53 -EXPORT_SYMBOL(allocate_empty_lowmem_region); 67.54 - 67.55 /* 67.56 * Local variables: 67.57 * c-file-style: "linux"
68.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Fri Aug 19 16:47:24 2005 -0800 68.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c Mon Aug 22 11:00:37 2005 -0700 68.3 @@ -352,13 +352,6 @@ static void __init pagetable_init (void) 68.4 swapper_pg_dir = pgd_base; 68.5 init_mm.pgd = pgd_base; 68.6 68.7 -#ifdef CONFIG_X86_PAE 68.8 - int i; 68.9 - /* Init entries of the first-level page table to the zero page */ 68.10 - for (i = 0; i < PTRS_PER_PGD; i++) 68.11 - set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); 68.12 -#endif 68.13 - 68.14 /* Enable PSE if available */ 68.15 if (cpu_has_pse) { 68.16 set_in_cr4(X86_CR4_PSE); 68.17 @@ -383,17 +376,6 @@ static void __init pagetable_init (void) 68.18 page_table_range_init(vaddr, 0, pgd_base); 68.19 68.20 permanent_kmaps_init(pgd_base); 68.21 - 68.22 -#if 0 /* def CONFIG_X86_PAE */ 68.23 - /* 68.24 - * Add low memory identity-mappings - SMP needs it when 68.25 - * starting up on an AP from real-mode. In the non-PAE 68.26 - * case we already have these mappings through head.S. 68.27 - * All user-space mappings are explicitly cleared after 68.28 - * SMP startup. 68.29 - */ 68.30 - set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]); 68.31 -#endif 68.32 } 68.33 68.34 #if defined(CONFIG_PM_DISK) || defined(CONFIG_SOFTWARE_SUSPEND)
71.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Fri Aug 19 16:47:24 2005 -0800 71.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/Makefile Mon Aug 22 11:00:37 2005 -0700 71.3 @@ -17,7 +17,7 @@ l-pci-y += irq.o 71.4 c-pci-$(CONFIG_X86_VISWS) := visws.o fixup.o 71.5 pci-$(CONFIG_X86_VISWS) := 71.6 c-pci-$(CONFIG_X86_NUMAQ) := numa.o 71.7 -pci-$(CONFIG_X86_NUMAQ) := irq.o 71.8 +l-pci-$(CONFIG_X86_NUMAQ) := irq.o 71.9 71.10 obj-y += $(pci-y) 71.11 c-obj-y += $(c-pci-y) common.o 71.12 @@ -27,6 +27,7 @@ c-link := 71.13 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)): 71.14 @ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@ 71.15 71.16 -obj-y += $(c-obj-y) $(l-pci-y) 71.17 +# Make sure irq.o gets linked in before common.o 71.18 +obj-y += $(patsubst common.o,$(l-pci-y) common.o,$(c-obj-y)) 71.19 71.20 clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
75.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Fri Aug 19 16:47:24 2005 -0800 75.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Mon Aug 22 11:00:37 2005 -0700 75.3 @@ -143,13 +143,8 @@ static int __do_suspend(void *ignore) 75.4 #define usbif_resume() do{}while(0) 75.5 #endif 75.6 75.7 -#ifdef CONFIG_XEN_BLKDEV_GRANT 75.8 extern int gnttab_suspend(void); 75.9 extern int gnttab_resume(void); 75.10 -#else 75.11 -#define gnttab_suspend() do{}while(0) 75.12 -#define gnttab_resume() do{}while(0) 75.13 -#endif 75.14 75.15 #ifdef CONFIG_SMP 75.16 extern void smp_suspend(void);
79.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile Fri Aug 19 16:47:24 2005 -0800 79.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile Mon Aug 22 11:00:37 2005 -0700 79.3 @@ -36,8 +36,8 @@ quiet_cmd_syscall = SYSCALL $@ 79.4 $(call if_changed,syscall) 79.5 79.6 AFLAGS_vsyscall-int80.o = -m32 -I$(obj) 79.7 -AFLAGS_vsyscall-sysenter.o = -m32 79.8 -AFLAGS_vsyscall-syscall.o = -m32 79.9 +AFLAGS_vsyscall-sysenter.o = -m32 -I$(obj) 79.10 +AFLAGS_vsyscall-syscall.o = -m32 -I$(obj) 79.11 CFLAGS_ia32_ioctl.o += -Ifs/ 79.12 79.13 s-link := vsyscall-syscall.o vsyscall-sysenter.o vsyscall-sigreturn.o 79.14 @@ -48,13 +48,11 @@ s-link := vsyscall-syscall.o vsyscall-sy 79.15 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)): 79.16 @ln -fsn $(srctree)/arch/x86_64/ia32/$(notdir $@) $@ 79.17 79.18 -$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S ../../i386/kernel/vsyscall-note.S 79.19 -$(obj)/vsyscall-sysenter.S: $(obj)/vsyscall-sigreturn.S 79.20 -$(obj)/vsyscall-syscall.S: $(obj)/vsyscall-sigreturn.S 79.21 +$(obj)/vsyscall-int80.o $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-syscall.o: \ 79.22 + $(obj)/vsyscall-sigreturn.S $(obj)/../../i386/kernel/vsyscall-note.S 79.23 79.24 -../../i386/kernel/vsyscall-note.S: 79.25 - @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $(srctree)/arch/xen/i386/kernel/$(notdir $@) 79.26 - make -C arch/xen/i386/kernel vsyscall-note.S 79.27 +$(obj)/../../i386/kernel/vsyscall-note.S: 79.28 + @ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@ 79.29 79.30 obj-y += $(c-obj-y) $(s-obj-y) 79.31
96.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c Fri Aug 19 16:47:24 2005 -0800 96.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c Mon Aug 22 11:00:37 2005 -0700 96.3 @@ -210,15 +210,16 @@ static void __init map_vsyscall(void) 96.4 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); 96.5 } 96.6 96.7 -extern void __set_fixmap_user (enum fixed_addresses, unsigned long, pgprot_t); 96.8 - 96.9 +#ifdef CONFIG_XEN 96.10 static void __init map_vsyscall_user(void) 96.11 { 96.12 + extern void __set_fixmap_user(enum fixed_addresses, unsigned long, pgprot_t); 96.13 extern char __vsyscall_0; 96.14 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); 96.15 96.16 __set_fixmap_user(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); 96.17 } 96.18 +#endif 96.19 96.20 static int __init vsyscall_init(void) 96.21 { 96.22 @@ -227,7 +228,10 @@ static int __init vsyscall_init(void) 96.23 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); 96.24 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); 96.25 map_vsyscall(); 96.26 - map_vsyscall_user(); /* establish tranlation for user address space */ 96.27 +#ifdef CONFIG_XEN 96.28 + map_vsyscall_user(); 96.29 + sysctl_vsyscall = 0; /* disable vgettimeofay() */ 96.30 +#endif 96.31 #ifdef CONFIG_SYSCTL 96.32 register_sysctl_table(kernel_root_table2, 0); 96.33 #endif
101.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile Fri Aug 19 16:47:24 2005 -0800 101.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/pci/Makefile Mon Aug 22 11:00:37 2005 -0700 101.3 @@ -30,8 +30,9 @@ c-link := 101.4 $(patsubst %.o,$(obj)/%.c,$(c-i386-obj-y)): 101.5 @ln -fsn $(srctree)/arch/i386/pci/$(notdir $@) $@ 101.6 101.7 -obj-y += $(c-i386-obj-y) $(c-obj-y) 101.8 -obj-y += $(c-xen-obj-y) 101.9 +# Make sure irq.o gets linked in before common.o 101.10 +obj-y += $(patsubst common.o,$(c-xen-obj-y) common.o,$(c-i386-obj-y)) 101.11 +obj-y += $(c-obj-y) 101.12 101.13 clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link)) 101.14 clean-files += $(patsubst %.o,%.c,$(c-i386-obj-y) $(c-i386-obj-))
103.1 --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Fri Aug 19 16:47:24 2005 -0800 103.2 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Mon Aug 22 11:00:37 2005 -0700 103.3 @@ -83,12 +83,15 @@ static struct timer_list balloon_timer; 103.4 103.5 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 103.6 /* Use the private and mapping fields of struct page as a list. */ 103.7 -#define PAGE_TO_LIST(p) ( (struct list_head *)&p->private ) 103.8 -#define LIST_TO_PAGE(l) ( list_entry( ((unsigned long *)l), \ 103.9 - struct page, private ) ) 103.10 -#define UNLIST_PAGE(p) do { list_del(PAGE_TO_LIST(p)); \ 103.11 - p->mapping = NULL; \ 103.12 - p->private = 0; } while(0) 103.13 +#define PAGE_TO_LIST(p) ((struct list_head *)&p->private) 103.14 +#define LIST_TO_PAGE(l) \ 103.15 + (list_entry(((unsigned long *)l), struct page, private)) 103.16 +#define UNLIST_PAGE(p) \ 103.17 + do { \ 103.18 + list_del(PAGE_TO_LIST(p)); \ 103.19 + p->mapping = NULL; \ 103.20 + p->private = 0; \ 103.21 + } while(0) 103.22 #else 103.23 /* There's a dedicated list field in struct page we can use. */ 103.24 #define PAGE_TO_LIST(p) ( &p->list ) 103.25 @@ -104,56 +107,53 @@ static struct timer_list balloon_timer; 103.26 #endif 103.27 103.28 #define IPRINTK(fmt, args...) \ 103.29 - printk(KERN_INFO "xen_mem: " fmt, ##args) 103.30 + printk(KERN_INFO "xen_mem: " fmt, ##args) 103.31 #define WPRINTK(fmt, args...) \ 103.32 - printk(KERN_WARNING "xen_mem: " fmt, ##args) 103.33 + printk(KERN_WARNING "xen_mem: " fmt, ##args) 103.34 103.35 /* balloon_append: add the given page to the balloon. */ 103.36 static void balloon_append(struct page *page) 103.37 { 103.38 - /* Low memory is re-populated first, so highmem pages go at list tail. */ 103.39 - if ( PageHighMem(page) ) 103.40 - { 103.41 - list_add_tail(PAGE_TO_LIST(page), &ballooned_pages); 103.42 - balloon_high++; 103.43 - } 103.44 - else 103.45 - { 103.46 - list_add(PAGE_TO_LIST(page), &ballooned_pages); 103.47 - balloon_low++; 103.48 - } 103.49 + /* Lowmem is re-populated first, so highmem pages go at list tail. */ 103.50 + if (PageHighMem(page)) { 103.51 + list_add_tail(PAGE_TO_LIST(page), &ballooned_pages); 103.52 + balloon_high++; 103.53 + } else { 103.54 + list_add(PAGE_TO_LIST(page), &ballooned_pages); 103.55 + balloon_low++; 103.56 + } 103.57 } 103.58 103.59 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 103.60 static struct page *balloon_retrieve(void) 103.61 { 103.62 - struct page *page; 103.63 + struct page *page; 103.64 103.65 - if ( list_empty(&ballooned_pages) ) 103.66 - return NULL; 103.67 + if (list_empty(&ballooned_pages)) 103.68 + return NULL; 103.69 103.70 - page = LIST_TO_PAGE(ballooned_pages.next); 103.71 - UNLIST_PAGE(page); 103.72 + page = LIST_TO_PAGE(ballooned_pages.next); 103.73 + UNLIST_PAGE(page); 103.74 103.75 - if ( PageHighMem(page) ) 103.76 - balloon_high--; 103.77 - else 103.78 - balloon_low--; 103.79 + if (PageHighMem(page)) 103.80 + balloon_high--; 103.81 + else 103.82 + balloon_low--; 103.83 103.84 - return page; 103.85 + return page; 103.86 } 103.87 103.88 static void balloon_alarm(unsigned long unused) 103.89 { 103.90 - schedule_work(&balloon_worker); 103.91 + schedule_work(&balloon_worker); 103.92 } 103.93 103.94 static unsigned long current_target(void) 103.95 { 103.96 - unsigned long target = min(target_pages, hard_limit); 103.97 - if ( target > (current_pages + balloon_low + balloon_high) ) 103.98 - target = current_pages + balloon_low + balloon_high; 103.99 - return target; 103.100 + unsigned long target = min(target_pages, hard_limit); 103.101 + if (target > (current_pages + balloon_low + balloon_high)) 103.102 + target = current_pages + balloon_low + balloon_high; 103.103 + return target; 103.104 } 103.105 103.106 /* 103.107 @@ -164,161 +164,147 @@ static unsigned long current_target(void 103.108 */ 103.109 static void balloon_process(void *unused) 103.110 { 103.111 - unsigned long *mfn_list, pfn, i, flags; 103.112 - struct page *page; 103.113 - long credit, debt, rc; 103.114 - void *v; 103.115 + unsigned long *mfn_list, pfn, i, flags; 103.116 + struct page *page; 103.117 + long credit, debt, rc; 103.118 + void *v; 103.119 103.120 - down(&balloon_mutex); 103.121 + down(&balloon_mutex); 103.122 103.123 retry: 103.124 - mfn_list = NULL; 103.125 + mfn_list = NULL; 103.126 103.127 - if ( (credit = current_target() - current_pages) > 0 ) 103.128 - { 103.129 - mfn_list = (unsigned long *)vmalloc(credit * sizeof(*mfn_list)); 103.130 - if ( mfn_list == NULL ) 103.131 - goto out; 103.132 + if ((credit = current_target() - current_pages) > 0) { 103.133 + mfn_list = vmalloc(credit * sizeof(*mfn_list)); 103.134 + if (mfn_list == NULL) 103.135 + goto out; 103.136 103.137 - balloon_lock(flags); 103.138 - rc = HYPERVISOR_dom_mem_op( 103.139 - MEMOP_increase_reservation, mfn_list, credit, 0); 103.140 - balloon_unlock(flags); 103.141 - if ( rc < credit ) 103.142 - { 103.143 - /* We hit the Xen hard limit: reprobe. */ 103.144 - if ( HYPERVISOR_dom_mem_op( 103.145 - MEMOP_decrease_reservation, mfn_list, rc, 0) != rc ) 103.146 - BUG(); 103.147 - hard_limit = current_pages + rc - driver_pages; 103.148 - vfree(mfn_list); 103.149 - goto retry; 103.150 - } 103.151 + balloon_lock(flags); 103.152 + rc = HYPERVISOR_dom_mem_op( 103.153 + MEMOP_increase_reservation, mfn_list, credit, 0); 103.154 + balloon_unlock(flags); 103.155 + if (rc < credit) { 103.156 + /* We hit the Xen hard limit: reprobe. */ 103.157 + BUG_ON(HYPERVISOR_dom_mem_op( 103.158 + MEMOP_decrease_reservation, 103.159 + mfn_list, rc, 0) != rc); 103.160 + hard_limit = current_pages + rc - driver_pages; 103.161 + vfree(mfn_list); 103.162 + goto retry; 103.163 + } 103.164 103.165 - for ( i = 0; i < credit; i++ ) 103.166 - { 103.167 - if ( (page = balloon_retrieve()) == NULL ) 103.168 - BUG(); 103.169 + for (i = 0; i < credit; i++) { 103.170 + page = balloon_retrieve(); 103.171 + BUG_ON(page == NULL); 103.172 103.173 - pfn = page - mem_map; 103.174 - if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY ) 103.175 - BUG(); 103.176 + pfn = page - mem_map; 103.177 + if (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY) 103.178 + BUG(); 103.179 103.180 - /* Update P->M and M->P tables. */ 103.181 - phys_to_machine_mapping[pfn] = mfn_list[i]; 103.182 - xen_machphys_update(mfn_list[i], pfn); 103.183 + /* Update P->M and M->P tables. */ 103.184 + phys_to_machine_mapping[pfn] = mfn_list[i]; 103.185 + xen_machphys_update(mfn_list[i], pfn); 103.186 103.187 - /* Link back into the page tables if it's not a highmem page. */ 103.188 - if ( pfn < max_low_pfn ) 103.189 - { 103.190 - BUG_ON(HYPERVISOR_update_va_mapping( 103.191 - (unsigned long)__va(pfn << PAGE_SHIFT), 103.192 - pfn_pte_ma(mfn_list[i], PAGE_KERNEL), 0)); 103.193 - } 103.194 + /* Link back into the page tables if not highmem. */ 103.195 + if (pfn < max_low_pfn) 103.196 + BUG_ON(HYPERVISOR_update_va_mapping( 103.197 + (unsigned long)__va(pfn << PAGE_SHIFT), 103.198 + pfn_pte_ma(mfn_list[i], PAGE_KERNEL), 103.199 + 0)); 103.200 103.201 - /* Finally, relinquish the memory back to the system allocator. */ 103.202 - ClearPageReserved(page); 103.203 - set_page_count(page, 1); 103.204 - __free_page(page); 103.205 - } 103.206 + /* Relinquish the page back to the allocator. */ 103.207 + ClearPageReserved(page); 103.208 + set_page_count(page, 1); 103.209 + __free_page(page); 103.210 + } 103.211 103.212 - current_pages += credit; 103.213 - } 103.214 - else if ( credit < 0 ) 103.215 - { 103.216 - debt = -credit; 103.217 + current_pages += credit; 103.218 + } else if (credit < 0) { 103.219 + debt = -credit; 103.220 103.221 - mfn_list = (unsigned long *)vmalloc(debt * sizeof(*mfn_list)); 103.222 - if ( mfn_list == NULL ) 103.223 - goto out; 103.224 + mfn_list = vmalloc(debt * sizeof(*mfn_list)); 103.225 + if (mfn_list == NULL) 103.226 + goto out; 103.227 103.228 - for ( i = 0; i < debt; i++ ) 103.229 - { 103.230 - if ( (page = alloc_page(GFP_HIGHUSER)) == NULL ) 103.231 - { 103.232 - debt = i; 103.233 - break; 103.234 - } 103.235 + for (i = 0; i < debt; i++) { 103.236 + if ((page = alloc_page(GFP_HIGHUSER)) == NULL) { 103.237 + debt = i; 103.238 + break; 103.239 + } 103.240 103.241 - pfn = page - mem_map; 103.242 - mfn_list[i] = phys_to_machine_mapping[pfn]; 103.243 + pfn = page - mem_map; 103.244 + mfn_list[i] = phys_to_machine_mapping[pfn]; 103.245 103.246 - if ( !PageHighMem(page) ) 103.247 - { 103.248 - v = phys_to_virt(pfn << PAGE_SHIFT); 103.249 - scrub_pages(v, 1); 103.250 - BUG_ON(HYPERVISOR_update_va_mapping( 103.251 - (unsigned long)v, __pte_ma(0), 0)); 103.252 - } 103.253 + if (!PageHighMem(page)) { 103.254 + v = phys_to_virt(pfn << PAGE_SHIFT); 103.255 + scrub_pages(v, 1); 103.256 + BUG_ON(HYPERVISOR_update_va_mapping( 103.257 + (unsigned long)v, __pte_ma(0), 0)); 103.258 + } 103.259 #ifdef CONFIG_XEN_SCRUB_PAGES 103.260 - else 103.261 - { 103.262 - v = kmap(page); 103.263 - scrub_pages(v, 1); 103.264 - kunmap(page); 103.265 - } 103.266 + else { 103.267 + v = kmap(page); 103.268 + scrub_pages(v, 1); 103.269 + kunmap(page); 103.270 + } 103.271 #endif 103.272 - } 103.273 + } 103.274 103.275 - /* Ensure that ballooned highmem pages don't have cached mappings. */ 103.276 - kmap_flush_unused(); 103.277 - flush_tlb_all(); 103.278 + /* Ensure that ballooned highmem pages don't have kmaps. */ 103.279 + kmap_flush_unused(); 103.280 + flush_tlb_all(); 103.281 103.282 - /* No more mappings: invalidate pages in P2M and add to balloon. */ 103.283 - for ( i = 0; i < debt; i++ ) 103.284 - { 103.285 - pfn = mfn_to_pfn(mfn_list[i]); 103.286 - phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; 103.287 - balloon_append(pfn_to_page(pfn)); 103.288 - } 103.289 + /* No more mappings: invalidate P2M and add to balloon. */ 103.290 + for (i = 0; i < debt; i++) { 103.291 + pfn = mfn_to_pfn(mfn_list[i]); 103.292 + phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; 103.293 + balloon_append(pfn_to_page(pfn)); 103.294 + } 103.295 103.296 - if ( HYPERVISOR_dom_mem_op( 103.297 - MEMOP_decrease_reservation, mfn_list, debt, 0) != debt ) 103.298 - BUG(); 103.299 + BUG_ON(HYPERVISOR_dom_mem_op( 103.300 + MEMOP_decrease_reservation,mfn_list, debt, 0) != debt); 103.301 103.302 - current_pages -= debt; 103.303 - } 103.304 + current_pages -= debt; 103.305 + } 103.306 103.307 out: 103.308 - if ( mfn_list != NULL ) 103.309 - vfree(mfn_list); 103.310 + if (mfn_list != NULL) 103.311 + vfree(mfn_list); 103.312 103.313 - /* Schedule more work if there is some still to be done. */ 103.314 - if ( current_target() != current_pages ) 103.315 - mod_timer(&balloon_timer, jiffies + HZ); 103.316 + /* Schedule more work if there is some still to be done. */ 103.317 + if (current_target() != current_pages) 103.318 + mod_timer(&balloon_timer, jiffies + HZ); 103.319 103.320 - up(&balloon_mutex); 103.321 + up(&balloon_mutex); 103.322 } 103.323 103.324 /* Resets the Xen limit, sets new target, and kicks off processing. */ 103.325 static void set_new_target(unsigned long target) 103.326 { 103.327 - /* No need for lock. Not read-modify-write updates. */ 103.328 - hard_limit = ~0UL; 103.329 - target_pages = target; 103.330 - schedule_work(&balloon_worker); 103.331 + /* No need for lock. Not read-modify-write updates. */ 103.332 + hard_limit = ~0UL; 103.333 + target_pages = target; 103.334 + schedule_work(&balloon_worker); 103.335 } 103.336 103.337 static struct xenbus_watch target_watch = 103.338 { 103.339 - .node = "memory/target" 103.340 + .node = "memory/target" 103.341 }; 103.342 103.343 /* React to a change in the target key */ 103.344 static void watch_target(struct xenbus_watch *watch, const char *node) 103.345 { 103.346 - unsigned long new_target; 103.347 - int err; 103.348 + unsigned long new_target; 103.349 + int err; 103.350 103.351 - err = xenbus_scanf("memory", "target", "%lu", &new_target); 103.352 + err = xenbus_scanf("memory", "target", "%lu", &new_target); 103.353 + if (err != 1) { 103.354 + printk(KERN_ERR "Unable to read memory/target\n"); 103.355 + return; 103.356 + } 103.357 103.358 - if(err != 1) 103.359 - { 103.360 - printk(KERN_ERR "Unable to read memory/target\n"); 103.361 - return; 103.362 - } 103.363 - 103.364 - set_new_target(new_target >> PAGE_SHIFT); 103.365 + set_new_target(new_target >> PAGE_SHIFT); 103.366 103.367 } 103.368 103.369 @@ -329,141 +315,185 @@ int balloon_init_watcher(struct notifier 103.370 unsigned long event, 103.371 void *data) 103.372 { 103.373 - int err; 103.374 + int err; 103.375 103.376 - BUG_ON(down_trylock(&xenbus_lock) == 0); 103.377 - 103.378 - err = register_xenbus_watch(&target_watch); 103.379 + BUG_ON(down_trylock(&xenbus_lock) == 0); 103.380 103.381 - if (err) { 103.382 - printk(KERN_ERR "Failed to set balloon watcher\n"); 103.383 - } 103.384 + err = register_xenbus_watch(&target_watch); 103.385 + if (err) 103.386 + printk(KERN_ERR "Failed to set balloon watcher\n"); 103.387 103.388 - return NOTIFY_DONE; 103.389 + return NOTIFY_DONE; 103.390 103.391 } 103.392 103.393 static int balloon_write(struct file *file, const char __user *buffer, 103.394 unsigned long count, void *data) 103.395 { 103.396 - char memstring[64], *endchar; 103.397 - unsigned long long target_bytes; 103.398 + char memstring[64], *endchar; 103.399 + unsigned long long target_bytes; 103.400 103.401 - if ( !capable(CAP_SYS_ADMIN) ) 103.402 - return -EPERM; 103.403 + if (!capable(CAP_SYS_ADMIN)) 103.404 + return -EPERM; 103.405 103.406 - if ( count <= 1 ) 103.407 - return -EBADMSG; /* runt */ 103.408 - if ( count > sizeof(memstring) ) 103.409 - return -EFBIG; /* too long */ 103.410 + if (count <= 1) 103.411 + return -EBADMSG; /* runt */ 103.412 + if (count > sizeof(memstring)) 103.413 + return -EFBIG; /* too long */ 103.414 103.415 - if ( copy_from_user(memstring, buffer, count) ) 103.416 - return -EFAULT; 103.417 - memstring[sizeof(memstring)-1] = '\0'; 103.418 + if (copy_from_user(memstring, buffer, count)) 103.419 + return -EFAULT; 103.420 + memstring[sizeof(memstring)-1] = '\0'; 103.421 103.422 - target_bytes = memparse(memstring, &endchar); 103.423 - set_new_target(target_bytes >> PAGE_SHIFT); 103.424 + target_bytes = memparse(memstring, &endchar); 103.425 + set_new_target(target_bytes >> PAGE_SHIFT); 103.426 103.427 - return count; 103.428 + return count; 103.429 } 103.430 103.431 static int balloon_read(char *page, char **start, off_t off, 103.432 int count, int *eof, void *data) 103.433 { 103.434 - int len; 103.435 + int len; 103.436 103.437 - len = sprintf( 103.438 - page, 103.439 - "Current allocation: %8lu kB\n" 103.440 - "Requested target: %8lu kB\n" 103.441 - "Low-mem balloon: %8lu kB\n" 103.442 - "High-mem balloon: %8lu kB\n" 103.443 - "Xen hard limit: ", 103.444 - PAGES2KB(current_pages), PAGES2KB(target_pages), 103.445 - PAGES2KB(balloon_low), PAGES2KB(balloon_high)); 103.446 + len = sprintf( 103.447 + page, 103.448 + "Current allocation: %8lu kB\n" 103.449 + "Requested target: %8lu kB\n" 103.450 + "Low-mem balloon: %8lu kB\n" 103.451 + "High-mem balloon: %8lu kB\n" 103.452 + "Xen hard limit: ", 103.453 + PAGES2KB(current_pages), PAGES2KB(target_pages), 103.454 + PAGES2KB(balloon_low), PAGES2KB(balloon_high)); 103.455 103.456 - if ( hard_limit != ~0UL ) 103.457 - len += sprintf( 103.458 - page + len, 103.459 - "%8lu kB (inc. %8lu kB driver headroom)\n", 103.460 - PAGES2KB(hard_limit), PAGES2KB(driver_pages)); 103.461 - else 103.462 - len += sprintf( 103.463 - page + len, 103.464 - " ??? kB\n"); 103.465 + if (hard_limit != ~0UL) { 103.466 + len += sprintf( 103.467 + page + len, 103.468 + "%8lu kB (inc. %8lu kB driver headroom)\n", 103.469 + PAGES2KB(hard_limit), PAGES2KB(driver_pages)); 103.470 + } else { 103.471 + len += sprintf( 103.472 + page + len, 103.473 + " ??? kB\n"); 103.474 + } 103.475 103.476 - *eof = 1; 103.477 - return len; 103.478 + *eof = 1; 103.479 + return len; 103.480 } 103.481 103.482 static struct notifier_block xenstore_notifier; 103.483 103.484 static int __init balloon_init(void) 103.485 { 103.486 - unsigned long pfn; 103.487 - struct page *page; 103.488 + unsigned long pfn; 103.489 + struct page *page; 103.490 103.491 - IPRINTK("Initialising balloon driver.\n"); 103.492 + IPRINTK("Initialising balloon driver.\n"); 103.493 103.494 - current_pages = min(xen_start_info.nr_pages, max_pfn); 103.495 - target_pages = current_pages; 103.496 - balloon_low = 0; 103.497 - balloon_high = 0; 103.498 - driver_pages = 0UL; 103.499 - hard_limit = ~0UL; 103.500 + current_pages = min(xen_start_info.nr_pages, max_pfn); 103.501 + target_pages = current_pages; 103.502 + balloon_low = 0; 103.503 + balloon_high = 0; 103.504 + driver_pages = 0UL; 103.505 + hard_limit = ~0UL; 103.506 103.507 - init_timer(&balloon_timer); 103.508 - balloon_timer.data = 0; 103.509 - balloon_timer.function = balloon_alarm; 103.510 + init_timer(&balloon_timer); 103.511 + balloon_timer.data = 0; 103.512 + balloon_timer.function = balloon_alarm; 103.513 103.514 - if ( (balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL ) 103.515 - { 103.516 - WPRINTK("Unable to create /proc/xen/balloon.\n"); 103.517 - return -1; 103.518 - } 103.519 + if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) { 103.520 + WPRINTK("Unable to create /proc/xen/balloon.\n"); 103.521 + return -1; 103.522 + } 103.523 103.524 - balloon_pde->read_proc = balloon_read; 103.525 - balloon_pde->write_proc = balloon_write; 103.526 + balloon_pde->read_proc = balloon_read; 103.527 + balloon_pde->write_proc = balloon_write; 103.528 103.529 - /* Initialise the balloon with excess memory space. */ 103.530 - for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ ) 103.531 - { 103.532 - page = &mem_map[pfn]; 103.533 - if ( !PageReserved(page) ) 103.534 - balloon_append(page); 103.535 - } 103.536 + /* Initialise the balloon with excess memory space. */ 103.537 + for (pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++) { 103.538 + page = &mem_map[pfn]; 103.539 + if (!PageReserved(page)) 103.540 + balloon_append(page); 103.541 + } 103.542 103.543 - target_watch.callback = watch_target; 103.544 - xenstore_notifier.notifier_call = balloon_init_watcher; 103.545 + target_watch.callback = watch_target; 103.546 + xenstore_notifier.notifier_call = balloon_init_watcher; 103.547 103.548 - register_xenstore_notifier(&xenstore_notifier); 103.549 + register_xenstore_notifier(&xenstore_notifier); 103.550 103.551 - return 0; 103.552 + return 0; 103.553 } 103.554 103.555 subsys_initcall(balloon_init); 103.556 103.557 void balloon_update_driver_allowance(long delta) 103.558 { 103.559 - unsigned long flags; 103.560 - balloon_lock(flags); 103.561 - driver_pages += delta; /* non-atomic update */ 103.562 - balloon_unlock(flags); 103.563 + unsigned long flags; 103.564 + balloon_lock(flags); 103.565 + driver_pages += delta; /* non-atomic update */ 103.566 + balloon_unlock(flags); 103.567 } 103.568 103.569 -void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns) 103.570 +struct page *balloon_alloc_empty_page_range(unsigned long nr_pages) 103.571 { 103.572 - unsigned long flags; 103.573 + int f(pte_t *pte, struct page *pte_page, 103.574 + unsigned long addr, void *data) 103.575 + { 103.576 + unsigned long mfn = pte_mfn(*pte); 103.577 + set_pte(pte, __pte_ma(0)); 103.578 + phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] = 103.579 + INVALID_P2M_ENTRY; 103.580 + BUG_ON(HYPERVISOR_dom_mem_op( 103.581 + MEMOP_decrease_reservation, &mfn, 1, 0) != 1); 103.582 + return 0; 103.583 + } 103.584 + 103.585 + unsigned long vstart, flags; 103.586 + unsigned int order = get_order(nr_pages * PAGE_SIZE); 103.587 + 103.588 + vstart = __get_free_pages(GFP_KERNEL, order); 103.589 + if (vstart == 0) 103.590 + return NULL; 103.591 + 103.592 + scrub_pages(vstart, 1 << order); 103.593 103.594 - balloon_lock(flags); 103.595 - if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 103.596 - mfn_list, nr_mfns, 0) != nr_mfns ) 103.597 - BUG(); 103.598 - current_pages -= nr_mfns; /* non-atomic update */ 103.599 - balloon_unlock(flags); 103.600 + balloon_lock(flags); 103.601 + BUG_ON(generic_page_range( 103.602 + &init_mm, vstart, PAGE_SIZE << order, f, NULL) != 0); 103.603 + current_pages -= 1UL << order; 103.604 + balloon_unlock(flags); 103.605 + 103.606 + schedule_work(&balloon_worker); 103.607 + 103.608 + flush_tlb_all(); 103.609 + 103.610 + return virt_to_page(vstart); 103.611 +} 103.612 103.613 - schedule_work(&balloon_worker); 103.614 +void balloon_dealloc_empty_page_range( 103.615 + struct page *page, unsigned long nr_pages) 103.616 +{ 103.617 + unsigned long i, flags; 103.618 + unsigned int order = get_order(nr_pages * PAGE_SIZE); 103.619 + 103.620 + balloon_lock(flags); 103.621 + for (i = 0; i < (1UL << order); i++) 103.622 + balloon_append(page + i); 103.623 + balloon_unlock(flags); 103.624 + 103.625 + schedule_work(&balloon_worker); 103.626 } 103.627 103.628 EXPORT_SYMBOL(balloon_update_driver_allowance); 103.629 -EXPORT_SYMBOL(balloon_put_pages); 103.630 +EXPORT_SYMBOL(balloon_alloc_empty_page_range); 103.631 +EXPORT_SYMBOL(balloon_dealloc_empty_page_range); 103.632 + 103.633 +/* 103.634 + * Local variables: 103.635 + * c-file-style: "linux" 103.636 + * indent-tabs-mode: t 103.637 + * c-indent-level: 8 103.638 + * c-basic-offset: 8 103.639 + * tab-width: 8 103.640 + * End: 103.641 + */
105.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Fri Aug 19 16:47:24 2005 -0800 105.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Mon Aug 22 11:00:37 2005 -0700 105.3 @@ -80,7 +80,6 @@ static inline void flush_plugged_queue(v 105.4 } 105.5 #endif 105.6 105.7 -#ifdef CONFIG_XEN_BLKDEV_GRANT 105.8 /* When using grant tables to map a frame for device access then the 105.9 * handle returned must be used to unmap the frame. This is needed to 105.10 * drop the ref count on the frame. 105.11 @@ -89,7 +88,6 @@ static u16 pending_grant_handles[MMAP_PA 105.12 #define pending_handle(_idx, _i) \ 105.13 (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) 105.14 #define BLKBACK_INVALID_HANDLE (0xFFFF) 105.15 -#endif 105.16 105.17 #ifdef CONFIG_XEN_BLKDEV_TAP_BE 105.18 /* 105.19 @@ -110,7 +108,6 @@ static void make_response(blkif_t *blkif 105.20 105.21 static void fast_flush_area(int idx, int nr_pages) 105.22 { 105.23 -#ifdef CONFIG_XEN_BLKDEV_GRANT 105.24 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 105.25 unsigned int i, invcount = 0; 105.26 u16 handle; 105.27 @@ -129,21 +126,6 @@ static void fast_flush_area(int idx, int 105.28 if ( unlikely(HYPERVISOR_grant_table_op( 105.29 GNTTABOP_unmap_grant_ref, unmap, invcount))) 105.30 BUG(); 105.31 -#else 105.32 - 105.33 - multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 105.34 - int i; 105.35 - 105.36 - for ( i = 0; i < nr_pages; i++ ) 105.37 - { 105.38 - MULTI_update_va_mapping(mcl+i, MMAP_VADDR(idx, i), 105.39 - __pte(0), 0); 105.40 - } 105.41 - 105.42 - mcl[nr_pages-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; 105.43 - if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) 105.44 - BUG(); 105.45 -#endif 105.46 } 105.47 105.48 105.49 @@ -367,12 +349,7 @@ static void dispatch_rw_block_io(blkif_t 105.50 unsigned long fas = 0; 105.51 int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; 105.52 pending_req_t *pending_req; 105.53 -#ifdef CONFIG_XEN_BLKDEV_GRANT 105.54 struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 105.55 -#else 105.56 - unsigned long remap_prot; 105.57 - multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 105.58 -#endif 105.59 struct phys_req preq; 105.60 struct { 105.61 unsigned long buf; unsigned int nsec; 105.62 @@ -399,7 +376,6 @@ static void dispatch_rw_block_io(blkif_t 105.63 preq.sector_number = req->sector_number; 105.64 preq.nr_sects = 0; 105.65 105.66 -#ifdef CONFIG_XEN_BLKDEV_GRANT 105.67 for ( i = 0; i < nseg; i++ ) 105.68 { 105.69 fas = req->frame_and_sects[i]; 105.70 @@ -431,25 +407,15 @@ static void dispatch_rw_block_io(blkif_t 105.71 } 105.72 105.73 phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = 105.74 - FOREIGN_FRAME(map[i].dev_bus_addr); 105.75 + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT); 105.76 105.77 pending_handle(pending_idx, i) = map[i].handle; 105.78 } 105.79 -#endif 105.80 105.81 for ( i = 0; i < nseg; i++ ) 105.82 { 105.83 fas = req->frame_and_sects[i]; 105.84 -#ifdef CONFIG_XEN_BLKDEV_GRANT 105.85 - seg[i].buf = (map[i].dev_bus_addr << PAGE_SHIFT) | 105.86 - (blkif_first_sect(fas) << 9); 105.87 -#else 105.88 - seg[i].buf = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9); 105.89 - seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1; 105.90 - if ( seg[i].nsec <= 0 ) 105.91 - goto bad_descriptor; 105.92 - preq.nr_sects += seg[i].nsec; 105.93 -#endif 105.94 + seg[i].buf = map[i].dev_bus_addr | (blkif_first_sect(fas) << 9); 105.95 } 105.96 105.97 if ( vbd_translate(&preq, blkif, operation) != 0 ) 105.98 @@ -460,40 +426,6 @@ static void dispatch_rw_block_io(blkif_t 105.99 goto bad_descriptor; 105.100 } 105.101 105.102 -#ifndef CONFIG_XEN_BLKDEV_GRANT 105.103 - if ( operation == READ ) 105.104 - remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; 105.105 - else 105.106 - remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED; 105.107 - 105.108 - 105.109 - for ( i = 0; i < nseg; i++ ) 105.110 - { 105.111 - MULTI_update_va_mapping_otherdomain( 105.112 - mcl+i, MMAP_VADDR(pending_idx, i), 105.113 - pfn_pte_ma(seg[i].buf >> PAGE_SHIFT, __pgprot(remap_prot)), 105.114 - 0, blkif->domid); 105.115 -#ifdef CONFIG_XEN_BLKDEV_TAP_BE 105.116 - if ( blkif->is_blktap ) 105.117 - mcl[i].args[MULTI_UVMDOMID_INDEX] = ID_TO_DOM(req->id); 105.118 -#endif 105.119 - phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = 105.120 - FOREIGN_FRAME(seg[i].buf >> PAGE_SHIFT); 105.121 - } 105.122 - 105.123 - BUG_ON(HYPERVISOR_multicall(mcl, nseg) != 0); 105.124 - 105.125 - for ( i = 0; i < nseg; i++ ) 105.126 - { 105.127 - if ( unlikely(mcl[i].result != 0) ) 105.128 - { 105.129 - DPRINTK("invalid buffer -- could not remap it\n"); 105.130 - fast_flush_area(pending_idx, nseg); 105.131 - goto bad_descriptor; 105.132 - } 105.133 - } 105.134 -#endif /* end ifndef CONFIG_XEN_BLKDEV_GRANT */ 105.135 - 105.136 pending_req = &pending_reqs[pending_idx]; 105.137 pending_req->blkif = blkif; 105.138 pending_req->id = req->id; 105.139 @@ -637,6 +569,7 @@ void blkif_deschedule(blkif_t *blkif) 105.140 static int __init blkif_init(void) 105.141 { 105.142 int i; 105.143 + struct page *page; 105.144 105.145 if ( !(xen_start_info.flags & SIF_INITDOMAIN) && 105.146 !(xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) 105.147 @@ -644,8 +577,9 @@ static int __init blkif_init(void) 105.148 105.149 blkif_interface_init(); 105.150 105.151 - if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) 105.152 - BUG(); 105.153 + page = balloon_alloc_empty_page_range(MMAP_PAGES); 105.154 + BUG_ON(page == NULL); 105.155 + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); 105.156 105.157 pending_cons = 0; 105.158 pending_prod = MAX_PENDING_REQS; 105.159 @@ -667,10 +601,7 @@ static int __init blkif_init(void) 105.160 105.161 blkif_xenbus_init(); 105.162 105.163 -#ifdef CONFIG_XEN_BLKDEV_GRANT 105.164 memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES ); 105.165 - printk(KERN_ALERT "Blkif backend is using grant tables.\n"); 105.166 -#endif 105.167 105.168 #ifdef CONFIG_XEN_BLKDEV_TAP_BE 105.169 printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
106.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Fri Aug 19 16:47:24 2005 -0800 106.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h Mon Aug 22 11:00:37 2005 -0700 106.3 @@ -17,9 +17,7 @@ 106.4 #include <asm-xen/hypervisor.h> 106.5 #include <asm-xen/xen-public/io/blkif.h> 106.6 #include <asm-xen/xen-public/io/ring.h> 106.7 -#ifdef CONFIG_XEN_BLKDEV_GRANT 106.8 #include <asm-xen/gnttab.h> 106.9 -#endif 106.10 106.11 #if 0 106.12 #define ASSERT(_p) \ 106.13 @@ -53,7 +51,7 @@ typedef struct blkif_st { 106.14 rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs.*/ 106.15 spinlock_t vbd_lock; /* Protects VBD mapping. */ 106.16 /* Private fields. */ 106.17 - enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; 106.18 + enum { DISCONNECTED, CONNECTED } status; 106.19 /* 106.20 * DISCONNECT response is deferred until pending requests are ack'ed. 106.21 * We therefore need to store the id from the original request. 106.22 @@ -69,11 +67,9 @@ typedef struct blkif_st { 106.23 atomic_t refcnt; 106.24 106.25 struct work_struct work; 106.26 -#ifdef CONFIG_XEN_BLKDEV_GRANT 106.27 u16 shmem_handle; 106.28 unsigned long shmem_vaddr; 106.29 grant_ref_t shmem_ref; 106.30 -#endif 106.31 } blkif_t; 106.32 106.33 void blkif_create(blkif_be_create_t *create);
107.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Fri Aug 19 16:47:24 2005 -0800 107.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Mon Aug 22 11:00:37 2005 -0700 107.3 @@ -7,7 +7,6 @@ 107.4 */ 107.5 107.6 #include "common.h" 107.7 -#include <asm-xen/ctrl_if.h> 107.8 #include <asm-xen/evtchn.h> 107.9 107.10 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 107.11 @@ -48,19 +47,6 @@ blkif_t *blkif_find(domid_t domid) 107.12 return blkif; 107.13 } 107.14 107.15 -#ifndef CONFIG_XEN_BLKDEV_GRANT 107.16 -static int map_frontend_page(blkif_t *blkif, unsigned long localaddr, 107.17 - unsigned long shared_page) 107.18 -{ 107.19 - return direct_remap_area_pages(&init_mm, localaddr, 107.20 - shared_page<<PAGE_SHIFT, PAGE_SIZE, 107.21 - __pgprot(_KERNPG_TABLE), blkif->domid); 107.22 -} 107.23 - 107.24 -static void unmap_frontend_page(blkif_t *blkif) 107.25 -{ 107.26 -} 107.27 -#else 107.28 static int map_frontend_page(blkif_t *blkif, unsigned long localaddr, 107.29 unsigned long shared_page) 107.30 { 107.31 @@ -92,7 +78,6 @@ static void unmap_frontend_page(blkif_t 107.32 op.dev_bus_addr = 0; 107.33 BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); 107.34 } 107.35 -#endif /* CONFIG_XEN_BLKDEV_GRANT */ 107.36 107.37 int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) 107.38 { 107.39 @@ -138,52 +123,6 @@ int blkif_map(blkif_t *blkif, unsigned l 107.40 return 0; 107.41 } 107.42 107.43 -static void __blkif_disconnect_complete(void *arg) 107.44 -{ 107.45 - blkif_t *blkif = (blkif_t *)arg; 107.46 - ctrl_msg_t cmsg; 107.47 - blkif_be_disconnect_t disc; 107.48 - 107.49 - /* 107.50 - * These can't be done in blkif_disconnect() because at that point there 107.51 - * may be outstanding requests at the disc whose asynchronous responses 107.52 - * must still be notified to the remote driver. 107.53 - */ 107.54 - unmap_frontend_page(blkif); 107.55 - vfree(blkif->blk_ring.sring); 107.56 - 107.57 - /* Construct the deferred response message. */ 107.58 - cmsg.type = CMSG_BLKIF_BE; 107.59 - cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT; 107.60 - cmsg.id = blkif->disconnect_rspid; 107.61 - cmsg.length = sizeof(blkif_be_disconnect_t); 107.62 - disc.domid = blkif->domid; 107.63 - disc.blkif_handle = blkif->handle; 107.64 - disc.status = BLKIF_BE_STATUS_OKAY; 107.65 - memcpy(cmsg.msg, &disc, sizeof(disc)); 107.66 - 107.67 - /* 107.68 - * Make sure message is constructed /before/ status change, because 107.69 - * after the status change the 'blkif' structure could be deallocated at 107.70 - * any time. Also make sure we send the response /after/ status change, 107.71 - * as otherwise a subsequent CONNECT request could spuriously fail if 107.72 - * another CPU doesn't see the status change yet. 107.73 - */ 107.74 - mb(); 107.75 - BUG_ON(blkif->status != DISCONNECTING); 107.76 - blkif->status = DISCONNECTED; 107.77 - mb(); 107.78 - 107.79 - /* Send the successful response. */ 107.80 - ctrl_if_send_response(&cmsg); 107.81 -} 107.82 - 107.83 -void blkif_disconnect_complete(blkif_t *blkif) 107.84 -{ 107.85 - INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif); 107.86 - schedule_work(&blkif->work); 107.87 -} 107.88 - 107.89 void free_blkif(blkif_t *blkif) 107.90 { 107.91 blkif_t **pblkif;
109.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Fri Aug 19 16:47:24 2005 -0800 109.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Mon Aug 22 11:00:37 2005 -0700 109.3 @@ -75,16 +75,6 @@ static void frontend_changed(struct xenb 109.4 if (vbd_is_active(be->vbd)) 109.5 return; 109.6 109.7 -#ifndef CONFIG_XEN_BLKDEV_GRANT 109.8 - err = xenbus_gather(be->frontpath, "shared-frame", "%lu", &sharedmfn, 109.9 - "event-channel", "%u", &evtchn, NULL); 109.10 - if (err) { 109.11 - xenbus_dev_error(be->dev, err, 109.12 - "reading %s/shared-frame and event-channel", 109.13 - be->frontpath); 109.14 - return; 109.15 - } 109.16 -#else 109.17 err = xenbus_gather(be->frontpath, "grant-id", "%lu", &sharedmfn, 109.18 "event-channel", "%u", &evtchn, NULL); 109.19 if (err) { 109.20 @@ -93,7 +83,6 @@ static void frontend_changed(struct xenb 109.21 be->frontpath); 109.22 return; 109.23 } 109.24 -#endif 109.25 109.26 /* Domains must use same shared frame for all vbds. */ 109.27 if (be->blkif->status == CONNECTED && 109.28 @@ -182,16 +171,20 @@ static void backend_changed(struct xenbu 109.29 "frontend-id", "%li", &be->frontend_id, 109.30 "frontend", NULL, &frontend, 109.31 NULL); 109.32 - if (err == -ENOENT || err == -ERANGE || 109.33 + if (XENBUS_EXIST_ERR(err) || 109.34 strlen(frontend) == 0 || !xenbus_exists(frontend, "")) { 109.35 - if (frontend) 109.36 - kfree(frontend); 109.37 /* If we can't get a frontend path and a frontend-id, 109.38 * then our bus-id is no longer valid and we need to 109.39 * destroy the backend device. 109.40 */ 109.41 goto device_fail; 109.42 } 109.43 + if (err < 0) { 109.44 + xenbus_dev_error(dev, err, 109.45 + "reading %s/frontend or frontend-id", 109.46 + dev->nodename); 109.47 + goto device_fail; 109.48 + } 109.49 109.50 if (!be->frontpath || strcmp(frontend, be->frontpath)) { 109.51 if (be->watch.node) 109.52 @@ -199,6 +192,7 @@ static void backend_changed(struct xenbu 109.53 if (be->frontpath) 109.54 kfree(be->frontpath); 109.55 be->frontpath = frontend; 109.56 + frontend = NULL; 109.57 be->watch.node = be->frontpath; 109.58 be->watch.callback = frontend_changed; 109.59 err = register_xenbus_watch(&be->watch); 109.60 @@ -206,14 +200,13 @@ static void backend_changed(struct xenbu 109.61 be->watch.node = NULL; 109.62 goto device_fail; 109.63 } 109.64 - } else 109.65 - kfree(frontend); 109.66 + } 109.67 109.68 err = xenbus_scanf(dev->nodename, "physical-device", "%li", &pdev); 109.69 - if (err == -ENOENT || err == -ERANGE) 109.70 + if (XENBUS_EXIST_ERR(err)) 109.71 goto out; 109.72 if (err < 0) { 109.73 - xenbus_dev_error(dev, err, "Reading physical-device"); 109.74 + xenbus_dev_error(dev, err, "reading physical-device"); 109.75 goto device_fail; 109.76 } 109.77 if (be->pdev && be->pdev != pdev) { 109.78 @@ -253,12 +246,14 @@ static void backend_changed(struct xenbu 109.79 frontend_changed(&be->watch, be->frontpath); 109.80 } 109.81 109.82 + out: 109.83 + if (frontend) 109.84 + kfree(frontend); 109.85 return; 109.86 109.87 device_fail: 109.88 device_unregister(&be->dev->dev); 109.89 - out: 109.90 - return; 109.91 + goto out; 109.92 } 109.93 109.94 static int blkback_probe(struct xenbus_device *dev,
110.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Fri Aug 19 16:47:24 2005 -0800 110.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Mon Aug 22 11:00:37 2005 -0700 110.3 @@ -55,10 +55,8 @@ 110.4 #include <scsi/scsi.h> 110.5 #include <asm-xen/evtchn.h> 110.6 #include <asm-xen/xenbus.h> 110.7 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.8 #include <asm-xen/xen-public/grant_table.h> 110.9 #include <asm-xen/gnttab.h> 110.10 -#endif 110.11 110.12 typedef unsigned char byte; /* from linux/ide.h */ 110.13 110.14 @@ -78,14 +76,12 @@ static blkif_front_ring_t blk_ring; 110.15 110.16 #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) 110.17 110.18 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.19 static domid_t rdomid = 0; 110.20 static grant_ref_t gref_head, gref_terminal; 110.21 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 110.22 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) 110.23 #define GRANTREF_INVALID (1<<15) 110.24 static int shmem_ref; 110.25 -#endif 110.26 110.27 static struct blk_shadow { 110.28 blkif_request_t req; 110.29 @@ -131,30 +127,14 @@ static int sg_operation = -1; 110.30 110.31 static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r) 110.32 { 110.33 -#ifndef CONFIG_XEN_BLKDEV_GRANT 110.34 - int i; 110.35 -#endif 110.36 110.37 s->req = *r; 110.38 - 110.39 -#ifndef CONFIG_XEN_BLKDEV_GRANT 110.40 - for ( i = 0; i < r->nr_segments; i++ ) 110.41 - s->req.frame_and_sects[i] = machine_to_phys(r->frame_and_sects[i]); 110.42 -#endif 110.43 } 110.44 110.45 static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s) 110.46 { 110.47 -#ifndef CONFIG_XEN_BLKDEV_GRANT 110.48 - int i; 110.49 -#endif 110.50 110.51 *r = s->req; 110.52 - 110.53 -#ifndef CONFIG_XEN_BLKDEV_GRANT 110.54 - for ( i = 0; i < s->req.nr_segments; i++ ) 110.55 - r->frame_and_sects[i] = phys_to_machine(s->req.frame_and_sects[i]); 110.56 -#endif 110.57 } 110.58 110.59 110.60 @@ -256,9 +236,7 @@ static int blkif_queue_request(struct re 110.61 int idx; 110.62 unsigned long id; 110.63 unsigned int fsect, lsect; 110.64 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.65 int ref; 110.66 -#endif 110.67 110.68 if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) 110.69 return 1; 110.70 @@ -284,7 +262,6 @@ static int blkif_queue_request(struct re 110.71 buffer_ma = page_to_phys(bvec->bv_page); 110.72 fsect = bvec->bv_offset >> 9; 110.73 lsect = fsect + (bvec->bv_len >> 9) - 1; 110.74 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.75 /* install a grant reference. */ 110.76 ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); 110.77 ASSERT( ref != -ENOSPC ); 110.78 @@ -300,11 +277,6 @@ static int blkif_queue_request(struct re 110.79 110.80 ring_req->frame_and_sects[ring_req->nr_segments++] = 110.81 blkif_fas_from_gref(ref, fsect, lsect); 110.82 - 110.83 -#else 110.84 - ring_req->frame_and_sects[ring_req->nr_segments++] = 110.85 - blkif_fas(buffer_ma, fsect, lsect); 110.86 -#endif 110.87 } 110.88 } 110.89 110.90 @@ -711,9 +683,7 @@ static int blkif_queue_request(unsigned 110.91 blkif_request_t *req; 110.92 struct buffer_head *bh; 110.93 unsigned int fsect, lsect; 110.94 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.95 int ref; 110.96 -#endif 110.97 110.98 fsect = (buffer_ma & ~PAGE_MASK) >> 9; 110.99 lsect = fsect + nr_sectors - 1; 110.100 @@ -762,7 +732,6 @@ static int blkif_queue_request(unsigned 110.101 bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request; 110.102 blk_shadow[req->id].request = (unsigned long)id; 110.103 110.104 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.105 /* install a grant reference. */ 110.106 ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); 110.107 ASSERT( ref != -ENOSPC ); 110.108 @@ -778,10 +747,6 @@ static int blkif_queue_request(unsigned 110.109 110.110 req->frame_and_sects[req->nr_segments] = 110.111 blkif_fas_from_gref(ref, fsect, lsect); 110.112 -#else 110.113 - req->frame_and_sects[req->nr_segments] = 110.114 - blkif_fas(buffer_ma, fsect, lsect); 110.115 -#endif 110.116 if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) 110.117 sg_next_sect += nr_sectors; 110.118 else 110.119 @@ -819,7 +784,6 @@ static int blkif_queue_request(unsigned 110.120 req->sector_number = (blkif_sector_t)sector_number; 110.121 req->handle = handle; 110.122 req->nr_segments = 1; 110.123 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.124 /* install a grant reference. */ 110.125 ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); 110.126 ASSERT( ref != -ENOSPC ); 110.127 @@ -833,9 +797,6 @@ static int blkif_queue_request(unsigned 110.128 blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT; 110.129 110.130 req->frame_and_sects[0] = blkif_fas_from_gref(ref, fsect, lsect); 110.131 -#else 110.132 - req->frame_and_sects[0] = blkif_fas(buffer_ma, fsect, lsect); 110.133 -#endif 110.134 110.135 /* Keep a private copy so we can reissue requests when recovering. */ 110.136 pickle_request(&blk_shadow[xid], req); 110.137 @@ -1015,9 +976,7 @@ static void blkif_recover(void) 110.138 int i; 110.139 blkif_request_t *req; 110.140 struct blk_shadow *copy; 110.141 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.142 int j; 110.143 -#endif 110.144 110.145 /* Stage 1: Make a safe copy of the shadow state. */ 110.146 copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL); 110.147 @@ -1047,7 +1006,6 @@ static void blkif_recover(void) 110.148 req->id = GET_ID_FROM_FREELIST(); 110.149 memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i])); 110.150 110.151 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.152 /* Rewrite any grant references invalidated by suspend/resume. */ 110.153 for ( j = 0; j < req->nr_segments; j++ ) 110.154 { 110.155 @@ -1061,7 +1019,6 @@ static void blkif_recover(void) 110.156 req->frame_and_sects[j] &= ~GRANTREF_INVALID; 110.157 } 110.158 blk_shadow[req->id].req = *req; 110.159 -#endif 110.160 110.161 blk_ring.req_prod_pvt++; 110.162 } 110.163 @@ -1085,9 +1042,7 @@ static void blkif_connect(u16 evtchn, do 110.164 int err = 0; 110.165 110.166 blkif_evtchn = evtchn; 110.167 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.168 rdomid = domid; 110.169 -#endif 110.170 110.171 err = bind_evtchn_to_irqhandler( 110.172 blkif_evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL); 110.173 @@ -1134,25 +1089,24 @@ static void watch_for_status(struct xenb 110.174 "info", "%u", &binfo, 110.175 "sector-size", "%lu", §or_size, 110.176 NULL); 110.177 - 110.178 - if (err) 110.179 + if (err) { 110.180 xenbus_dev_error(info->dev, err, "reading backend fields"); 110.181 - else { 110.182 - xlvbd_add(sectors, info->vdevice, info->handle, binfo, 110.183 - sector_size); 110.184 - info->connected = 1; 110.185 + return; 110.186 + } 110.187 + 110.188 + xlvbd_add(sectors, info->vdevice, info->handle, binfo, sector_size); 110.189 + info->connected = 1; 110.190 110.191 - /* First to connect? blkif is now connected. */ 110.192 - if (blkif_vbds_connected++ == 0) 110.193 - blkif_state = BLKIF_STATE_CONNECTED; 110.194 - 110.195 - xenbus_dev_ok(info->dev); 110.196 + /* First to connect? blkif is now connected. */ 110.197 + if (blkif_vbds_connected++ == 0) 110.198 + blkif_state = BLKIF_STATE_CONNECTED; 110.199 110.200 - /* Kick pending requests. */ 110.201 - spin_lock_irq(&blkif_io_lock); 110.202 - kick_pending_request_queues(); 110.203 - spin_unlock_irq(&blkif_io_lock); 110.204 - } 110.205 + xenbus_dev_ok(info->dev); 110.206 + 110.207 + /* Kick pending requests. */ 110.208 + spin_lock_irq(&blkif_io_lock); 110.209 + kick_pending_request_queues(); 110.210 + spin_unlock_irq(&blkif_io_lock); 110.211 } 110.212 110.213 static int setup_blkring(struct xenbus_device *dev, unsigned int backend_id) 110.214 @@ -1169,7 +1123,6 @@ static int setup_blkring(struct xenbus_d 110.215 SHARED_RING_INIT(sring); 110.216 FRONT_RING_INIT(&blk_ring, sring, PAGE_SIZE); 110.217 110.218 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.219 shmem_ref = gnttab_claim_grant_reference(&gref_head, 110.220 gref_terminal); 110.221 ASSERT(shmem_ref != -ENOSPC); 110.222 @@ -1177,7 +1130,6 @@ static int setup_blkring(struct xenbus_d 110.223 backend_id, 110.224 virt_to_mfn(blk_ring.sring), 110.225 0); 110.226 -#endif 110.227 110.228 op.u.alloc_unbound.dom = backend_id; 110.229 err = HYPERVISOR_event_channel_op(&op); 110.230 @@ -1199,36 +1151,28 @@ static int talk_to_backend(struct xenbus 110.231 const char *message; 110.232 int err, backend_id; 110.233 110.234 - backend = xenbus_read(dev->nodename, "backend", NULL); 110.235 - if (IS_ERR(backend)) { 110.236 - err = PTR_ERR(backend); 110.237 - if (err == -ENOENT) 110.238 - goto out; 110.239 - xenbus_dev_error(dev, err, "reading %s/backend", 110.240 + backend = NULL; 110.241 + err = xenbus_gather(dev->nodename, 110.242 + "backend-id", "%i", &backend_id, 110.243 + "backend", NULL, &backend, 110.244 + NULL); 110.245 + if (XENBUS_EXIST_ERR(err)) 110.246 + goto out; 110.247 + if (backend && strlen(backend) == 0) { 110.248 + err = -ENOENT; 110.249 + goto out; 110.250 + } 110.251 + if (err < 0) { 110.252 + xenbus_dev_error(dev, err, "reading %s/backend or backend-id", 110.253 dev->nodename); 110.254 goto out; 110.255 } 110.256 - if (strlen(backend) == 0) { 110.257 - err = -ENOENT; 110.258 - goto free_backend; 110.259 - } 110.260 - 110.261 - /* FIXME: This driver can't handle backends on different 110.262 - * domains. Check and fail gracefully. */ 110.263 - err = xenbus_scanf(dev->nodename, "backend-id", "%i", &backend_id); 110.264 - if (err == -ENOENT) 110.265 - goto free_backend; 110.266 - if (err < 0) { 110.267 - xenbus_dev_error(dev, err, "reading %s/backend-id", 110.268 - dev->nodename); 110.269 - goto free_backend; 110.270 - } 110.271 110.272 /* First device? We create shared ring, alloc event channel. */ 110.273 if (blkif_vbds == 0) { 110.274 err = setup_blkring(dev, backend_id); 110.275 if (err) 110.276 - goto free_backend; 110.277 + goto out; 110.278 } 110.279 110.280 err = xenbus_transaction_start(dev->nodename); 110.281 @@ -1237,20 +1181,11 @@ static int talk_to_backend(struct xenbus 110.282 goto destroy_blkring; 110.283 } 110.284 110.285 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.286 err = xenbus_printf(dev->nodename, "grant-id","%u", shmem_ref); 110.287 if (err) { 110.288 message = "writing grant-id"; 110.289 goto abort_transaction; 110.290 } 110.291 -#else 110.292 - err = xenbus_printf(dev->nodename, "shared-frame", "%lu", 110.293 - virt_to_mfn(blk_ring.sring)); 110.294 - if (err) { 110.295 - message = "writing shared-frame"; 110.296 - goto abort_transaction; 110.297 - } 110.298 -#endif 110.299 err = xenbus_printf(dev->nodename, 110.300 "event-channel", "%u", blkif_evtchn); 110.301 if (err) { 110.302 @@ -1258,9 +1193,11 @@ static int talk_to_backend(struct xenbus 110.303 goto abort_transaction; 110.304 } 110.305 110.306 - info->watch.node = info->backend = backend; 110.307 + info->backend = backend; 110.308 + backend = NULL; 110.309 + 110.310 + info->watch.node = info->backend; 110.311 info->watch.callback = watch_for_status; 110.312 - 110.313 err = register_xenbus_watch(&info->watch); 110.314 if (err) { 110.315 message = "registering watch on backend"; 110.316 @@ -1272,20 +1209,20 @@ static int talk_to_backend(struct xenbus 110.317 xenbus_dev_error(dev, err, "completing transaction"); 110.318 goto destroy_blkring; 110.319 } 110.320 - return 0; 110.321 110.322 -abort_transaction: 110.323 + out: 110.324 + if (backend) 110.325 + kfree(backend); 110.326 + return err; 110.327 + 110.328 + abort_transaction: 110.329 xenbus_transaction_end(1); 110.330 /* Have to do this *outside* transaction. */ 110.331 xenbus_dev_error(dev, err, "%s", message); 110.332 -destroy_blkring: 110.333 + destroy_blkring: 110.334 if (blkif_vbds == 0) 110.335 blkif_free(); 110.336 -free_backend: 110.337 - kfree(backend); 110.338 -out: 110.339 - printk("%s:%u = %i\n", __FILE__, __LINE__, err); 110.340 - return err; 110.341 + goto out; 110.342 } 110.343 110.344 /* Setup supplies the backend dir, virtual device. 110.345 @@ -1301,7 +1238,7 @@ static int blkfront_probe(struct xenbus_ 110.346 110.347 /* FIXME: Use dynamic device id if this is not set. */ 110.348 err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice); 110.349 - if (err == -ENOENT) 110.350 + if (XENBUS_EXIST_ERR(err)) 110.351 return err; 110.352 if (err < 0) { 110.353 xenbus_dev_error(dev, err, "reading virtual-device"); 110.354 @@ -1316,6 +1253,7 @@ static int blkfront_probe(struct xenbus_ 110.355 info->dev = dev; 110.356 info->vdevice = vdevice; 110.357 info->connected = 0; 110.358 + 110.359 /* Front end dir is a number, which is used as the id. */ 110.360 info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0); 110.361 dev->data = info; 110.362 @@ -1425,13 +1363,10 @@ static int __init xlblk_init(void) 110.363 { 110.364 int i; 110.365 110.366 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.367 /* A grant for every ring slot, plus one for the ring itself. */ 110.368 - if ( 0 > gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1, 110.369 - &gref_head, &gref_terminal) ) 110.370 + if (gnttab_alloc_grant_references(MAXIMUM_OUTSTANDING_BLOCK_REQS + 1, 110.371 + &gref_head, &gref_terminal) < 0) 110.372 return 1; 110.373 - printk(KERN_ALERT "Blkif frontend is using grant tables.\n"); 110.374 -#endif 110.375 110.376 if ( (xen_start_info.flags & SIF_INITDOMAIN) || 110.377 (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) 110.378 @@ -1455,20 +1390,7 @@ static int __init xlblk_init(void) 110.379 static void blkif_completion(struct blk_shadow *s) 110.380 { 110.381 int i; 110.382 -#ifdef CONFIG_XEN_BLKDEV_GRANT 110.383 for ( i = 0; i < s->req.nr_segments; i++ ) 110.384 gnttab_release_grant_reference( 110.385 &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i])); 110.386 -#else 110.387 - /* This is a hack to get the dirty logging bits set */ 110.388 - if ( s->req.operation == BLKIF_OP_READ ) 110.389 - { 110.390 - for ( i = 0; i < s->req.nr_segments; i++ ) 110.391 - { 110.392 - unsigned long pfn = s->req.frame_and_sects[i] >> PAGE_SHIFT; 110.393 - unsigned long mfn = phys_to_machine_mapping[pfn]; 110.394 - xen_machphys_update(mfn, pfn); 110.395 - } 110.396 - } 110.397 -#endif 110.398 }
117.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Fri Aug 19 16:47:24 2005 -0800 117.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap_userdev.c Mon Aug 22 11:00:37 2005 -0700 117.3 @@ -5,7 +5,6 @@ 117.4 * Control interface between the driver and a character device. 117.5 * 117.6 * Copyright (c) 2004, Andrew Warfield 117.7 - * 117.8 */ 117.9 117.10 #include <linux/config.h> 117.11 @@ -535,8 +534,8 @@ int blktap_write_fe_ring(blkif_request_t 117.12 /* Set the necessary mappings in p2m and in the VM_FOREIGN 117.13 * vm_area_struct to allow user vaddr -> struct page lookups 117.14 * to work. This is needed for direct IO to foreign pages. */ 117.15 - phys_to_machine_mapping[__pa(kvaddr)>>PAGE_SHIFT] = 117.16 - FOREIGN_FRAME(map[i].dev_bus_addr); 117.17 + phys_to_machine_mapping[__pa(kvaddr) >> PAGE_SHIFT] = 117.18 + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT); 117.19 117.20 offset = (uvaddr - blktap_vma->vm_start) >> PAGE_SHIFT; 117.21 ((struct page **)blktap_vma->vm_private_data)[offset] = 117.22 @@ -776,9 +775,11 @@ static struct miscdevice blktap_miscdev 117.23 int blktap_init(void) 117.24 { 117.25 int err, i, j; 117.26 + struct page *page; 117.27 117.28 - if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) 117.29 - BUG(); 117.30 + page = balloon_alloc_empty_page_range(MMAP_PAGES); 117.31 + BUG_ON(page == NULL); 117.32 + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); 117.33 117.34 #ifdef CONFIG_XEN_BLKDEV_GRANT 117.35 for (i=0; i<MAX_PENDING_REQS ; i++)
121.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Fri Aug 19 16:47:24 2005 -0800 121.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Aug 22 11:00:37 2005 -0700 121.3 @@ -768,7 +768,7 @@ static void net_tx_action(unsigned long 121.4 continue; 121.5 } 121.6 phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] = 121.7 - FOREIGN_FRAME(mop->dev_bus_addr); 121.8 + FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT); 121.9 grant_tx_ref[pending_idx] = mop->handle; 121.10 #else 121.11 if ( unlikely(mcl[0].result != 0) ) 121.12 @@ -968,8 +968,9 @@ static int __init netback_init(void) 121.13 121.14 netif_interface_init(); 121.15 121.16 - mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS); 121.17 - BUG_ON(mmap_vstart == 0); 121.18 + page = balloon_alloc_empty_page_range(MAX_PENDING_REQS); 121.19 + BUG_ON(page == NULL); 121.20 + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); 121.21 121.22 for ( i = 0; i < MAX_PENDING_REQS; i++ ) 121.23 {
126.1 --- a/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Fri Aug 19 16:47:24 2005 -0800 126.2 +++ b/linux-2.6-xen-sparse/drivers/xen/usbback/usbback.c Mon Aug 22 11:00:37 2005 -0700 126.3 @@ -1027,13 +1027,15 @@ void usbif_release_ports(usbif_priv_t *u 126.4 static int __init usbif_init(void) 126.5 { 126.6 int i; 126.7 + struct page *page; 126.8 126.9 if ( !(xen_start_info.flags & SIF_INITDOMAIN) && 126.10 !(xen_start_info.flags & SIF_USB_BE_DOMAIN) ) 126.11 return 0; 126.12 - 126.13 - if ( (mmap_vstart = allocate_empty_lowmem_region(MMAP_PAGES)) == 0 ) 126.14 - BUG(); 126.15 + 126.16 + page = balloon_alloc_empty_page_range(MMAP_PAGES); 126.17 + BUG_ON(page == NULL); 126.18 + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); 126.19 126.20 pending_cons = 0; 126.21 pending_prod = MAX_PENDING_REQS;
152.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Fri Aug 19 16:47:24 2005 -0800 152.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h Mon Aug 22 11:00:37 2005 -0700 152.3 @@ -507,14 +507,14 @@ HYPERVISOR_vcpu_pickle( 152.4 int vcpu, vcpu_guest_context_t *ctxt) 152.5 { 152.6 int ret; 152.7 - unsigned long ign1, ign2; 152.8 + 152.9 __asm__ __volatile__ ( 152.10 TRAP_INSTR 152.11 - : "=a" (ret), "=b" (ign1), "=c" (ign2) 152.12 - : "0" (__HYPERVISOR_sched_op), 152.13 - "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)), 152.14 - "2" (ctxt) 152.15 - : __syscall_clobber ); 152.16 + : "=a" (ret) 152.17 + : "0" ((unsigned long)__HYPERVISOR_sched_op), 152.18 + "D" ((unsigned long)SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)), 152.19 + "S" ((unsigned long)ctxt) 152.20 + : __syscall_clobber ); 152.21 152.22 return ret; 152.23 }
159.1 --- a/linux-2.6-xen-sparse/include/asm-xen/balloon.h Fri Aug 19 16:47:24 2005 -0800 159.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/balloon.h Mon Aug 22 11:00:37 2005 -0700 159.3 @@ -35,10 +35,19 @@ 159.4 * Inform the balloon driver that it should allow some slop for device-driver 159.5 * memory activities. 159.6 */ 159.7 -extern void balloon_update_driver_allowance(long delta); 159.8 +extern void 159.9 +balloon_update_driver_allowance( 159.10 + long delta); 159.11 159.12 -/* Give up unmapped pages to the balloon driver. */ 159.13 -extern void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns); 159.14 +/* Allocate an empty low-memory page range. */ 159.15 +extern struct page * 159.16 +balloon_alloc_empty_page_range( 159.17 + unsigned long nr_pages); 159.18 + 159.19 +/* Deallocate an empty page range, adding to the balloon. */ 159.20 +extern void 159.21 +balloon_dealloc_empty_page_range( 159.22 + struct page *page, unsigned long nr_pages); 159.23 159.24 /* 159.25 * Prevent the balloon driver from changing the memory reservation during
163.1 --- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Fri Aug 19 16:47:24 2005 -0800 163.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Mon Aug 22 11:00:37 2005 -0700 163.3 @@ -137,9 +137,6 @@ void xen_invlpg_mask(cpumask_t *mask, un 163.4 void xen_create_contiguous_region(unsigned long vstart, unsigned int order); 163.5 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); 163.6 163.7 -/* Allocate a contiguous empty region of low memory. Return virtual start. */ 163.8 -unsigned long allocate_empty_lowmem_region(unsigned long pages); 163.9 - 163.10 #include <asm/hypercall.h> 163.11 163.12 #if defined(CONFIG_X86_64)
164.1 --- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Fri Aug 19 16:47:24 2005 -0800 164.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Mon Aug 22 11:00:37 2005 -0700 164.3 @@ -128,4 +128,14 @@ void reregister_xenbus_watches(void); 164.4 void xenbus_suspend(void); 164.5 void xenbus_resume(void); 164.6 164.7 +#define XENBUS_IS_ERR_READ(str) ({ \ 164.8 + if (!IS_ERR(str) && strlen(str) == 0) { \ 164.9 + kfree(str); \ 164.10 + str = ERR_PTR(-ERANGE); \ 164.11 + } \ 164.12 + IS_ERR(str); \ 164.13 +}) 164.14 + 164.15 +#define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE) 164.16 + 164.17 #endif /* _ASM_XEN_XENBUS_H */
214.1 --- a/tools/firmware/vmxassist/vm86.c Fri Aug 19 16:47:24 2005 -0800 214.2 +++ b/tools/firmware/vmxassist/vm86.c Mon Aug 22 11:00:37 2005 -0700 214.3 @@ -38,7 +38,7 @@ unsigned prev_eip = 0; 214.4 enum vm86_mode mode; 214.5 214.6 #ifdef DEBUG 214.7 -int traceset = 0; 214.8 +int traceset = 0xff; 214.9 214.10 char *states[] = { 214.11 "<VM86_REAL>", 214.12 @@ -446,6 +446,8 @@ movcr(struct regs *regs, unsigned prefix 214.13 #endif 214.14 if (getreg(regs, modrm) & CR0_PE) 214.15 set_mode(regs, VM86_REAL_TO_PROTECTED); 214.16 + else 214.17 + set_mode(regs, VM86_REAL); 214.18 214.19 break; 214.20 case 3: 214.21 @@ -603,7 +605,9 @@ set_mode(struct regs *regs, enum vm86_mo 214.22 { 214.23 switch (newmode) { 214.24 case VM86_REAL: 214.25 - if (mode == VM86_PROTECTED_TO_REAL) { 214.26 + if ((mode == VM86_PROTECTED_TO_REAL) || 214.27 + (mode == VM86_REAL_TO_PROTECTED)) { 214.28 + regs->eflags &= ~EFLAGS_TF; 214.29 real_mode(regs); 214.30 break; 214.31 } else if (mode == VM86_REAL) {
308.1 --- a/tools/python/xen/xm/create.py Fri Aug 19 16:47:24 2005 -0800 308.2 +++ b/tools/python/xen/xm/create.py Mon Aug 22 11:00:37 2005 -0700 308.3 @@ -688,7 +688,9 @@ def balloon_out(dom0_min_mem, opts): 308.4 dom0_cur_alloc = get_dom0_alloc() 308.5 dom0_new_alloc = dom0_cur_alloc - (domU_need_mem - free_mem) 308.6 308.7 - if free_mem < domU_need_mem and dom0_new_alloc >= dom0_min_mem: 308.8 + if free_mem < domU_need_mem and dom0_new_alloc < dom0_min_mem: 308.9 + ret = 1 308.10 + if free_mem < domU_need_mem and ret == 0: 308.11 308.12 server.xend_domain_mem_target_set(0, dom0_new_alloc) 308.13 308.14 @@ -734,7 +736,8 @@ def main(argv): 308.15 dom0_min_mem = xroot.get_dom0_min_mem() 308.16 if dom0_min_mem != 0: 308.17 if balloon_out(dom0_min_mem, opts): 308.18 - return 308.19 + print >>sys.stderr, "error: cannot allocate enough memory for domain" 308.20 + sys.exit(1) 308.21 308.22 dom = make_domain(opts, config) 308.23 if opts.vals.console_autoconnect:
311.1 --- a/tools/python/xen/xm/main.py Fri Aug 19 16:47:24 2005 -0800 311.2 +++ b/tools/python/xen/xm/main.py Mon Aug 22 11:00:37 2005 -0700 311.3 @@ -434,7 +434,7 @@ def xm_sedf(args): 311.4 arg_check(args, 6, "sedf") 311.5 311.6 dom = args[0] 311.7 - v = map(int, args[1:5]) 311.8 + v = map(int, args[1:6]) 311.9 from xen.xend.XendClient import server 311.10 server.xend_domain_cpu_sedf_set(dom, *v) 311.11
385.1 --- a/xen/arch/ia64/grant_table.c Fri Aug 19 16:47:24 2005 -0800 385.2 +++ b/xen/arch/ia64/grant_table.c Mon Aug 22 11:00:37 2005 -0700 385.3 @@ -546,15 +546,6 @@ static int 385.4 { 385.5 frame = act->frame; 385.6 } 385.7 - else if ( frame == GNTUNMAP_DEV_FROM_VIRT ) 385.8 - { 385.9 - if ( !( flags & GNTMAP_device_map ) ) 385.10 - PIN_FAIL(unmap_out, GNTST_bad_dev_addr, 385.11 - "Bad frame number: frame not mapped for dev access.\n"); 385.12 - frame = act->frame; 385.13 - 385.14 - /* Frame will be unmapped for device access below if virt addr okay. */ 385.15 - } 385.16 else 385.17 { 385.18 if ( unlikely(frame != act->frame) ) 385.19 @@ -616,15 +607,6 @@ static int 385.20 act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc 385.21 : GNTPIN_hstw_inc; 385.22 385.23 - if ( frame == GNTUNMAP_DEV_FROM_VIRT ) 385.24 - { 385.25 - act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc 385.26 - : GNTPIN_devw_inc; 385.27 - 385.28 - map->ref_and_flags &= ~GNTMAP_device_map; 385.29 - (void)__put_user(0, &uop->dev_bus_addr); 385.30 - } 385.31 - 385.32 rc = 0; 385.33 *va = virt; 385.34 }
417.1 --- a/xen/arch/x86/domain_build.c Fri Aug 19 16:47:24 2005 -0800 417.2 +++ b/xen/arch/x86/domain_build.c Mon Aug 22 11:00:37 2005 -0700 417.3 @@ -69,11 +69,21 @@ boolean_param("dom0_translate", opt_dom0 417.4 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) 417.5 #define round_pgdown(_p) ((_p)&PAGE_MASK) 417.6 417.7 -static struct pfn_info *alloc_largest(struct domain *d, unsigned long max) 417.8 +static struct pfn_info *alloc_chunk(struct domain *d, unsigned long max_pages) 417.9 { 417.10 struct pfn_info *page; 417.11 - unsigned int order = get_order(max * PAGE_SIZE); 417.12 - if ( (max & (max-1)) != 0 ) 417.13 + unsigned int order; 417.14 + /* 417.15 + * Allocate up to 2MB at a time: 417.16 + * 1. This prevents overflow of get_order() when allocating more than 417.17 + * 4GB to domain 0 on a PAE machine. 417.18 + * 2. It prevents allocating very large chunks from DMA pools before 417.19 + * the >4GB pool is fully depleted. 417.20 + */ 417.21 + if ( max_pages > (2UL << (20 - PAGE_SHIFT)) ) 417.22 + max_pages = 2UL << (20 - PAGE_SHIFT); 417.23 + order = get_order(max_pages << PAGE_SHIFT); 417.24 + if ( (max_pages & (max_pages-1)) != 0 ) 417.25 order--; 417.26 while ( (page = alloc_domheap_pages(d, order, 0)) == NULL ) 417.27 if ( order-- == 0 ) 417.28 @@ -608,7 +618,7 @@ int construct_dom0(struct domain *d, 417.29 } 417.30 while ( pfn < nr_pages ) 417.31 { 417.32 - if ( (page = alloc_largest(d, nr_pages - d->tot_pages)) == NULL ) 417.33 + if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL ) 417.34 panic("Not enough RAM for DOM0 reservation.\n"); 417.35 while ( pfn < d->tot_pages ) 417.36 {
421.1 --- a/xen/arch/x86/mm.c Fri Aug 19 16:47:24 2005 -0800 421.2 +++ b/xen/arch/x86/mm.c Mon Aug 22 11:00:37 2005 -0700 421.3 @@ -2176,7 +2176,7 @@ int do_mmu_update( 421.4 } 421.5 } 421.6 421.7 - *(unsigned long *)va = req.val; 421.8 + *(intpte_t *)va = req.val; 421.9 okay = 1; 421.10 421.11 if ( shadow_mode_enabled(d) ) 421.12 @@ -2386,7 +2386,7 @@ int clear_grant_pte_mapping( 421.13 } 421.14 421.15 /* Delete pagetable entry. */ 421.16 - if ( unlikely(__put_user(0, (unsigned long *)va))) 421.17 + if ( unlikely(__put_user(0, (intpte_t *)va))) 421.18 { 421.19 DPRINTK("Cannot delete PTE entry at %p.\n", va); 421.20 put_page_type(page); 421.21 @@ -2446,12 +2446,11 @@ int update_grant_va_mapping( 421.22 421.23 int clear_grant_va_mapping(unsigned long addr, unsigned long frame) 421.24 { 421.25 - l1_pgentry_t *pl1e; 421.26 - unsigned long _ol1e; 421.27 + l1_pgentry_t *pl1e, ol1e; 421.28 421.29 pl1e = &linear_pg_table[l1_linear_offset(addr)]; 421.30 421.31 - if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) ) 421.32 + if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) ) 421.33 { 421.34 DPRINTK("Could not find PTE entry for address %lx\n", addr); 421.35 return GNTST_general_error; 421.36 @@ -2461,15 +2460,15 @@ int clear_grant_va_mapping(unsigned long 421.37 * Check that the virtual address supplied is actually mapped to 421.38 * frame. 421.39 */ 421.40 - if ( unlikely((_ol1e >> PAGE_SHIFT) != frame )) 421.41 + if ( unlikely(l1e_get_pfn(ol1e) != frame) ) 421.42 { 421.43 DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n", 421.44 - _ol1e, addr, frame); 421.45 + l1e_get_pfn(ol1e), addr, frame); 421.46 return GNTST_general_error; 421.47 } 421.48 421.49 /* Delete pagetable entry. */ 421.50 - if ( unlikely(__put_user(0, (unsigned long *)pl1e))) 421.51 + if ( unlikely(__put_user(0, &pl1e->l1)) ) 421.52 { 421.53 DPRINTK("Cannot delete PTE entry at %p.\n", (unsigned long *)pl1e); 421.54 return GNTST_general_error;
447.1 --- a/xen/common/grant_table.c Fri Aug 19 16:47:24 2005 -0800 447.2 +++ b/xen/common/grant_table.c Mon Aug 22 11:00:37 2005 -0700 447.3 @@ -70,13 +70,13 @@ put_maptrack_handle( 447.4 447.5 static int 447.6 __gnttab_activate_grant_ref( 447.7 - struct domain *mapping_d, /* IN */ 447.8 + struct domain *mapping_d, /* IN */ 447.9 struct vcpu *mapping_ed, 447.10 - struct domain *granting_d, 447.11 - grant_ref_t ref, 447.12 - u16 dev_hst_ro_flags, 447.13 - unsigned long addr, 447.14 - unsigned long *pframe ) /* OUT */ 447.15 + struct domain *granting_d, 447.16 + grant_ref_t ref, 447.17 + u16 dev_hst_ro_flags, 447.18 + u64 addr, 447.19 + unsigned long *pframe ) /* OUT */ 447.20 { 447.21 domid_t sdom; 447.22 u16 sflags; 447.23 @@ -336,14 +336,15 @@ static int 447.24 gnttab_map_grant_ref_t *uop, 447.25 unsigned long *va) 447.26 { 447.27 - domid_t dom; 447.28 - grant_ref_t ref; 447.29 - struct domain *ld, *rd; 447.30 - struct vcpu *led; 447.31 - u16 dev_hst_ro_flags; 447.32 - int handle; 447.33 - unsigned long frame = 0, addr; 447.34 - int rc; 447.35 + domid_t dom; 447.36 + grant_ref_t ref; 447.37 + struct domain *ld, *rd; 447.38 + struct vcpu *led; 447.39 + u16 dev_hst_ro_flags; 447.40 + int handle; 447.41 + u64 addr; 447.42 + unsigned long frame = 0; 447.43 + int rc; 447.44 447.45 led = current; 447.46 ld = led->domain; 447.47 @@ -363,7 +364,7 @@ static int 447.48 (!(dev_hst_ro_flags & GNTMAP_contains_pte) && 447.49 unlikely(!__addr_ok(addr))) ) ) 447.50 { 447.51 - DPRINTK("Bad virtual address (%lx) or flags (%x).\n", 447.52 + DPRINTK("Bad virtual address (%"PRIx64") or flags (%"PRIx16").\n", 447.53 addr, dev_hst_ro_flags); 447.54 (void)__put_user(GNTST_bad_virt_addr, &uop->handle); 447.55 return GNTST_bad_gntref; 447.56 @@ -450,7 +451,7 @@ static int 447.57 = (ref << MAPTRACK_REF_SHIFT) | 447.58 (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK); 447.59 447.60 - (void)__put_user(frame, &uop->dev_bus_addr); 447.61 + (void)__put_user((u64)frame << PAGE_SHIFT, &uop->dev_bus_addr); 447.62 447.63 if ( ( dev_hst_ro_flags & GNTMAP_host_map ) && 447.64 !( dev_hst_ro_flags & GNTMAP_contains_pte) ) 447.65 @@ -492,29 +493,31 @@ static int 447.66 gnttab_unmap_grant_ref_t *uop, 447.67 unsigned long *va) 447.68 { 447.69 - domid_t dom; 447.70 - grant_ref_t ref; 447.71 - u16 handle; 447.72 - struct domain *ld, *rd; 447.73 - 447.74 + domid_t dom; 447.75 + grant_ref_t ref; 447.76 + u16 handle; 447.77 + struct domain *ld, *rd; 447.78 active_grant_entry_t *act; 447.79 - grant_entry_t *sha; 447.80 + grant_entry_t *sha; 447.81 grant_mapping_t *map; 447.82 - u16 flags; 447.83 - s16 rc = 1; 447.84 - unsigned long frame, addr; 447.85 + u16 flags; 447.86 + s16 rc = 1; 447.87 + u64 addr, dev_bus_addr; 447.88 + unsigned long frame; 447.89 447.90 ld = current->domain; 447.91 447.92 /* Bitwise-OR avoids short-circuiting which screws control flow. */ 447.93 if ( unlikely(__get_user(addr, &uop->host_addr) | 447.94 - __get_user(frame, &uop->dev_bus_addr) | 447.95 + __get_user(dev_bus_addr, &uop->dev_bus_addr) | 447.96 __get_user(handle, &uop->handle)) ) 447.97 { 447.98 DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n"); 447.99 return -EFAULT; /* don't set status */ 447.100 } 447.101 447.102 + frame = (unsigned long)(dev_bus_addr >> PAGE_SHIFT); 447.103 + 447.104 map = &ld->grant_table->maptrack[handle]; 447.105 447.106 if ( unlikely(handle >= ld->grant_table->maptrack_limit) || 447.107 @@ -553,15 +556,6 @@ static int 447.108 { 447.109 frame = act->frame; 447.110 } 447.111 - else if ( frame == GNTUNMAP_DEV_FROM_VIRT ) 447.112 - { 447.113 - if ( !( flags & GNTMAP_device_map ) ) 447.114 - PIN_FAIL(unmap_out, GNTST_bad_dev_addr, 447.115 - "Bad frame number: frame not mapped for dev access.\n"); 447.116 - frame = act->frame; 447.117 - 447.118 - /* Frame will be unmapped for device access below if virt addr okay. */ 447.119 - } 447.120 else 447.121 { 447.122 if ( unlikely(frame != act->frame) ) 447.123 @@ -597,15 +591,6 @@ static int 447.124 act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc 447.125 : GNTPIN_hstw_inc; 447.126 447.127 - if ( frame == GNTUNMAP_DEV_FROM_VIRT ) 447.128 - { 447.129 - act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc 447.130 - : GNTPIN_devw_inc; 447.131 - 447.132 - map->ref_and_flags &= ~GNTMAP_device_map; 447.133 - (void)__put_user(0, &uop->dev_bus_addr); 447.134 - } 447.135 - 447.136 rc = 0; 447.137 if ( !( flags & GNTMAP_contains_pte) ) 447.138 *va = addr;
498.1 --- a/xen/include/asm-x86/x86_32/page-3level.h Fri Aug 19 16:47:24 2005 -0800 498.2 +++ b/xen/include/asm-x86/x86_32/page-3level.h Mon Aug 22 11:00:37 2005 -0700 498.3 @@ -63,7 +63,7 @@ typedef l3_pgentry_t root_pgentry_t; 498.4 498.5 /* Extract flags into 32-bit integer, or turn 32-bit flags into a pte mask. */ 498.6 #define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF)) 498.7 -#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 40) | ((x) & 0xFFF)) 498.8 +#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF)) 498.9 498.10 #define L1_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PAT/GLOBAL */ 498.11 #define L2_DISALLOW_MASK (0xFFFFF180U & ~_PAGE_NX) /* PSE/GLOBAL */
499.1 --- a/xen/include/asm-x86/x86_32/uaccess.h Fri Aug 19 16:47:24 2005 -0800 499.2 +++ b/xen/include/asm-x86/x86_32/uaccess.h Mon Aug 22 11:00:37 2005 -0700 499.3 @@ -22,7 +22,11 @@ 499.4 #define array_access_ok(addr,count,size) \ 499.5 (likely(count < (~0UL/size)) && access_ok(addr,count*size)) 499.6 499.7 +/* Undefined function to catch size mismatches on 64-bit get_user/put_user. */ 499.8 +extern void __uaccess_var_not_u64(void); 499.9 + 499.10 #define __put_user_u64(x, addr, retval, errret) \ 499.11 + if (sizeof(x) != 8) __uaccess_var_not_u64(); \ 499.12 __asm__ __volatile__( \ 499.13 "1: movl %%eax,0(%2)\n" \ 499.14 "2: movl %%edx,4(%2)\n" \ 499.15 @@ -52,6 +56,7 @@ do { \ 499.16 } while (0) 499.17 499.18 #define __get_user_u64(x, addr, retval, errret) \ 499.19 + if (sizeof(x) != 8) __uaccess_var_not_u64(); \ 499.20 __asm__ __volatile__( \ 499.21 "1: movl 0(%2),%%eax\n" \ 499.22 "2: movl 4(%2),%%edx\n" \
504.1 --- a/xen/include/public/dom0_ops.h Fri Aug 19 16:47:24 2005 -0800 504.2 +++ b/xen/include/public/dom0_ops.h Mon Aug 22 11:00:37 2005 -0700 504.3 @@ -19,7 +19,7 @@ 504.4 * This makes sure that old versions of dom0 tools will stop working in a 504.5 * well-defined way (rather than crashing the machine, for instance). 504.6 */ 504.7 -#define DOM0_INTERFACE_VERSION 0xAAAA100F 504.8 +#define DOM0_INTERFACE_VERSION 0xAAAA1010 504.9 504.10 /************************************************************************/ 504.11
505.1 --- a/xen/include/public/grant_table.h Fri Aug 19 16:47:24 2005 -0800 505.2 +++ b/xen/include/public/grant_table.h Mon Aug 22 11:00:37 2005 -0700 505.3 @@ -183,8 +183,6 @@ typedef struct gnttab_unmap_grant_ref { 505.4 s16 status; /* GNTST_* */ 505.5 } gnttab_unmap_grant_ref_t; 505.6 505.7 -#define GNTUNMAP_DEV_FROM_VIRT (~0U) 505.8 - 505.9 /* 505.10 * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least 505.11 * <nr_frames> pages. The frame addresses are written to the <frame_list>.
506.1 --- a/xen/include/public/io/blkif.h Fri Aug 19 16:47:24 2005 -0800 506.2 +++ b/xen/include/public/io/blkif.h Mon Aug 22 11:00:37 2005 -0700 506.3 @@ -36,11 +36,7 @@ typedef struct blkif_request { 506.4 unsigned long id; /* private guest value, echoed in resp */ 506.5 blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 506.6 /* @f_a_s[4:0]=last_sect ; @f_a_s[9:5]=first_sect */ 506.7 -#ifdef CONFIG_XEN_BLKDEV_GRANT 506.8 /* @f_a_s[:16]= grant reference (16 bits) */ 506.9 -#else 506.10 - /* @f_a_s[:12]=@frame: machine page frame number. */ 506.11 -#endif 506.12 /* @first_sect: first sector in frame to transfer (inclusive). */ 506.13 /* @last_sect: last sector in frame to transfer (inclusive). */ 506.14 unsigned long frame_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 506.15 @@ -50,10 +46,8 @@ typedef struct blkif_request { 506.16 #define blkif_first_sect(_fas) (((_fas)>>5)&31) 506.17 #define blkif_last_sect(_fas) ((_fas)&31) 506.18 506.19 -#ifdef CONFIG_XEN_BLKDEV_GRANT 506.20 #define blkif_fas_from_gref(_gref, _fs, _ls) (((_gref)<<16)|((_fs)<<5)|(_ls)) 506.21 #define blkif_gref_from_fas(_fas) ((_fas)>>16) 506.22 -#endif 506.23 506.24 typedef struct blkif_response { 506.25 unsigned long id; /* copied from request */
509.1 --- a/xen/include/public/physdev.h Fri Aug 19 16:47:24 2005 -0800 509.2 +++ b/xen/include/public/physdev.h Mon Aug 22 11:00:37 2005 -0700 509.3 @@ -27,8 +27,8 @@ typedef struct physdevop_set_iopl { 509.4 509.5 typedef struct physdevop_set_iobitmap { 509.6 /* IN */ 509.7 - char *bitmap; 509.8 - u32 nr_ports; 509.9 + u8 *bitmap; 509.10 + u32 nr_ports; 509.11 } physdevop_set_iobitmap_t; 509.12 509.13 typedef struct physdevop_apic {