ia64/linux-2.6.18-xen.hg

view drivers/md/raid6sse2.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2002 H. Peter Anvin - All Rights Reserved
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
8 * Bostom MA 02111-1307, USA; either version 2 of the License, or
9 * (at your option) any later version; incorporated herein by reference.
10 *
11 * ----------------------------------------------------------------------- */
13 /*
14 * raid6sse2.c
15 *
16 * SSE-2 implementation of RAID-6 syndrome functions
17 *
18 */
20 #if defined(__i386__) || defined(__x86_64__)
22 #include "raid6.h"
23 #include "raid6x86.h"
25 static const struct raid6_sse_constants {
26 u64 x1d[2];
27 } raid6_sse_constants __attribute__((aligned(16))) = {
28 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
29 };
31 static int raid6_have_sse2(void)
32 {
33 #ifdef __KERNEL__
34 /* Not really boot_cpu but "all_cpus" */
35 return boot_cpu_has(X86_FEATURE_MMX) &&
36 boot_cpu_has(X86_FEATURE_FXSR) &&
37 boot_cpu_has(X86_FEATURE_XMM) &&
38 boot_cpu_has(X86_FEATURE_XMM2);
39 #else
40 /* User space test code */
41 u32 features = cpuid_features();
42 return ( (features & (15<<23)) == (15<<23) );
43 #endif
44 }
46 /*
47 * Plain SSE2 implementation
48 */
49 static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
50 {
51 u8 **dptr = (u8 **)ptrs;
52 u8 *p, *q;
53 int d, z, z0;
54 raid6_sse_save_t sa;
56 z0 = disks - 3; /* Highest data disk */
57 p = dptr[z0+1]; /* XOR parity */
58 q = dptr[z0+2]; /* RS syndrome */
60 raid6_before_sse2(&sa);
62 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
63 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
65 for ( d = 0 ; d < bytes ; d += 16 ) {
66 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
67 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
68 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
69 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
70 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
71 for ( z = z0-2 ; z >= 0 ; z-- ) {
72 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
73 asm volatile("pcmpgtb %xmm4,%xmm5");
74 asm volatile("paddb %xmm4,%xmm4");
75 asm volatile("pand %xmm0,%xmm5");
76 asm volatile("pxor %xmm5,%xmm4");
77 asm volatile("pxor %xmm5,%xmm5");
78 asm volatile("pxor %xmm6,%xmm2");
79 asm volatile("pxor %xmm6,%xmm4");
80 asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
81 }
82 asm volatile("pcmpgtb %xmm4,%xmm5");
83 asm volatile("paddb %xmm4,%xmm4");
84 asm volatile("pand %xmm0,%xmm5");
85 asm volatile("pxor %xmm5,%xmm4");
86 asm volatile("pxor %xmm5,%xmm5");
87 asm volatile("pxor %xmm6,%xmm2");
88 asm volatile("pxor %xmm6,%xmm4");
90 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
91 asm volatile("pxor %xmm2,%xmm2");
92 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
93 asm volatile("pxor %xmm4,%xmm4");
94 }
96 raid6_after_sse2(&sa);
97 asm volatile("sfence" : : : "memory");
98 }
100 const struct raid6_calls raid6_sse2x1 = {
101 raid6_sse21_gen_syndrome,
102 raid6_have_sse2,
103 "sse2x1",
104 1 /* Has cache hints */
105 };
107 /*
108 * Unrolled-by-2 SSE2 implementation
109 */
110 static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
111 {
112 u8 **dptr = (u8 **)ptrs;
113 u8 *p, *q;
114 int d, z, z0;
115 raid6_sse_save_t sa;
117 z0 = disks - 3; /* Highest data disk */
118 p = dptr[z0+1]; /* XOR parity */
119 q = dptr[z0+2]; /* RS syndrome */
121 raid6_before_sse2(&sa);
123 asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
124 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
125 asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
127 /* We uniformly assume a single prefetch covers at least 32 bytes */
128 for ( d = 0 ; d < bytes ; d += 32 ) {
129 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
130 asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
131 asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
132 asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
133 asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
134 for ( z = z0-1 ; z >= 0 ; z-- ) {
135 asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
136 asm volatile("pcmpgtb %xmm4,%xmm5");
137 asm volatile("pcmpgtb %xmm6,%xmm7");
138 asm volatile("paddb %xmm4,%xmm4");
139 asm volatile("paddb %xmm6,%xmm6");
140 asm volatile("pand %xmm0,%xmm5");
141 asm volatile("pand %xmm0,%xmm7");
142 asm volatile("pxor %xmm5,%xmm4");
143 asm volatile("pxor %xmm7,%xmm6");
144 asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
145 asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
146 asm volatile("pxor %xmm5,%xmm2");
147 asm volatile("pxor %xmm7,%xmm3");
148 asm volatile("pxor %xmm5,%xmm4");
149 asm volatile("pxor %xmm7,%xmm6");
150 asm volatile("pxor %xmm5,%xmm5");
151 asm volatile("pxor %xmm7,%xmm7");
152 }
153 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
154 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
155 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
156 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
157 }
159 raid6_after_sse2(&sa);
160 asm volatile("sfence" : : : "memory");
161 }
163 const struct raid6_calls raid6_sse2x2 = {
164 raid6_sse22_gen_syndrome,
165 raid6_have_sse2,
166 "sse2x2",
167 1 /* Has cache hints */
168 };
170 #endif
172 #ifdef __x86_64__
174 /*
175 * Unrolled-by-4 SSE2 implementation
176 */
177 static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
178 {
179 u8 **dptr = (u8 **)ptrs;
180 u8 *p, *q;
181 int d, z, z0;
182 raid6_sse16_save_t sa;
184 z0 = disks - 3; /* Highest data disk */
185 p = dptr[z0+1]; /* XOR parity */
186 q = dptr[z0+2]; /* RS syndrome */
188 raid6_before_sse16(&sa);
190 asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
191 asm volatile("pxor %xmm2,%xmm2"); /* P[0] */
192 asm volatile("pxor %xmm3,%xmm3"); /* P[1] */
193 asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */
194 asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
195 asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */
196 asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
197 asm volatile("pxor %xmm10,%xmm10"); /* P[2] */
198 asm volatile("pxor %xmm11,%xmm11"); /* P[3] */
199 asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */
200 asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */
201 asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */
202 asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */
204 for ( d = 0 ; d < bytes ; d += 64 ) {
205 for ( z = z0 ; z >= 0 ; z-- ) {
206 /* The second prefetch seems to improve performance... */
207 asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
208 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
209 asm volatile("pcmpgtb %xmm4,%xmm5");
210 asm volatile("pcmpgtb %xmm6,%xmm7");
211 asm volatile("pcmpgtb %xmm12,%xmm13");
212 asm volatile("pcmpgtb %xmm14,%xmm15");
213 asm volatile("paddb %xmm4,%xmm4");
214 asm volatile("paddb %xmm6,%xmm6");
215 asm volatile("paddb %xmm12,%xmm12");
216 asm volatile("paddb %xmm14,%xmm14");
217 asm volatile("pand %xmm0,%xmm5");
218 asm volatile("pand %xmm0,%xmm7");
219 asm volatile("pand %xmm0,%xmm13");
220 asm volatile("pand %xmm0,%xmm15");
221 asm volatile("pxor %xmm5,%xmm4");
222 asm volatile("pxor %xmm7,%xmm6");
223 asm volatile("pxor %xmm13,%xmm12");
224 asm volatile("pxor %xmm15,%xmm14");
225 asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
226 asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
227 asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
228 asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
229 asm volatile("pxor %xmm5,%xmm2");
230 asm volatile("pxor %xmm7,%xmm3");
231 asm volatile("pxor %xmm13,%xmm10");
232 asm volatile("pxor %xmm15,%xmm11");
233 asm volatile("pxor %xmm5,%xmm4");
234 asm volatile("pxor %xmm7,%xmm6");
235 asm volatile("pxor %xmm13,%xmm12");
236 asm volatile("pxor %xmm15,%xmm14");
237 asm volatile("pxor %xmm5,%xmm5");
238 asm volatile("pxor %xmm7,%xmm7");
239 asm volatile("pxor %xmm13,%xmm13");
240 asm volatile("pxor %xmm15,%xmm15");
241 }
242 asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
243 asm volatile("pxor %xmm2,%xmm2");
244 asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
245 asm volatile("pxor %xmm3,%xmm3");
246 asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
247 asm volatile("pxor %xmm10,%xmm10");
248 asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
249 asm volatile("pxor %xmm11,%xmm11");
250 asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
251 asm volatile("pxor %xmm4,%xmm4");
252 asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
253 asm volatile("pxor %xmm6,%xmm6");
254 asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
255 asm volatile("pxor %xmm12,%xmm12");
256 asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
257 asm volatile("pxor %xmm14,%xmm14");
258 }
259 asm volatile("sfence" : : : "memory");
260 raid6_after_sse16(&sa);
261 }
263 const struct raid6_calls raid6_sse2x4 = {
264 raid6_sse24_gen_syndrome,
265 raid6_have_sse2,
266 "sse2x4",
267 1 /* Has cache hints */
268 };
270 #endif