ia64/linux-2.6.18-xen.hg

view arch/sparc/lib/memcpy.S @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /* memcpy.S: Sparc optimized memcpy and memmove code
2 * Hand optimized from GNU libc's memcpy and memmove
3 * Copyright (C) 1991,1996 Free Software Foundation
4 * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi)
5 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
6 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
7 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
8 */
10 #ifdef __KERNEL__
12 #define FUNC(x) \
13 .globl x; \
14 .type x,@function; \
15 .align 4; \
16 x:
18 #undef FASTER_REVERSE
19 #undef FASTER_NONALIGNED
20 #define FASTER_ALIGNED
22 /* In kernel these functions don't return a value.
23 * One should use macros in asm/string.h for that purpose.
24 * We return 0, so that bugs are more apparent.
25 */
26 #define SETUP_RETL
27 #define RETL_INSN clr %o0
29 #else
31 /* libc */
33 #include "DEFS.h"
35 #define FASTER_REVERSE
36 #define FASTER_NONALIGNED
37 #define FASTER_ALIGNED
39 #define SETUP_RETL mov %o0, %g6
40 #define RETL_INSN mov %g6, %o0
42 #endif
44 /* Both these macros have to start with exactly the same insn */
45 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
46 ldd [%src + (offset) + 0x00], %t0; \
47 ldd [%src + (offset) + 0x08], %t2; \
48 ldd [%src + (offset) + 0x10], %t4; \
49 ldd [%src + (offset) + 0x18], %t6; \
50 st %t0, [%dst + (offset) + 0x00]; \
51 st %t1, [%dst + (offset) + 0x04]; \
52 st %t2, [%dst + (offset) + 0x08]; \
53 st %t3, [%dst + (offset) + 0x0c]; \
54 st %t4, [%dst + (offset) + 0x10]; \
55 st %t5, [%dst + (offset) + 0x14]; \
56 st %t6, [%dst + (offset) + 0x18]; \
57 st %t7, [%dst + (offset) + 0x1c];
59 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
60 ldd [%src + (offset) + 0x00], %t0; \
61 ldd [%src + (offset) + 0x08], %t2; \
62 ldd [%src + (offset) + 0x10], %t4; \
63 ldd [%src + (offset) + 0x18], %t6; \
64 std %t0, [%dst + (offset) + 0x00]; \
65 std %t2, [%dst + (offset) + 0x08]; \
66 std %t4, [%dst + (offset) + 0x10]; \
67 std %t6, [%dst + (offset) + 0x18];
69 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
70 ldd [%src - (offset) - 0x10], %t0; \
71 ldd [%src - (offset) - 0x08], %t2; \
72 st %t0, [%dst - (offset) - 0x10]; \
73 st %t1, [%dst - (offset) - 0x0c]; \
74 st %t2, [%dst - (offset) - 0x08]; \
75 st %t3, [%dst - (offset) - 0x04];
77 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
78 ldd [%src - (offset) - 0x10], %t0; \
79 ldd [%src - (offset) - 0x08], %t2; \
80 std %t0, [%dst - (offset) - 0x10]; \
81 std %t2, [%dst - (offset) - 0x08];
83 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
84 ldub [%src - (offset) - 0x02], %t0; \
85 ldub [%src - (offset) - 0x01], %t1; \
86 stb %t0, [%dst - (offset) - 0x02]; \
87 stb %t1, [%dst - (offset) - 0x01];
89 /* Both these macros have to start with exactly the same insn */
90 #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
91 ldd [%src - (offset) - 0x20], %t0; \
92 ldd [%src - (offset) - 0x18], %t2; \
93 ldd [%src - (offset) - 0x10], %t4; \
94 ldd [%src - (offset) - 0x08], %t6; \
95 st %t0, [%dst - (offset) - 0x20]; \
96 st %t1, [%dst - (offset) - 0x1c]; \
97 st %t2, [%dst - (offset) - 0x18]; \
98 st %t3, [%dst - (offset) - 0x14]; \
99 st %t4, [%dst - (offset) - 0x10]; \
100 st %t5, [%dst - (offset) - 0x0c]; \
101 st %t6, [%dst - (offset) - 0x08]; \
102 st %t7, [%dst - (offset) - 0x04];
104 #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
105 ldd [%src - (offset) - 0x20], %t0; \
106 ldd [%src - (offset) - 0x18], %t2; \
107 ldd [%src - (offset) - 0x10], %t4; \
108 ldd [%src - (offset) - 0x08], %t6; \
109 std %t0, [%dst - (offset) - 0x20]; \
110 std %t2, [%dst - (offset) - 0x18]; \
111 std %t4, [%dst - (offset) - 0x10]; \
112 std %t6, [%dst - (offset) - 0x08];
114 #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
115 ldd [%src + (offset) + 0x00], %t0; \
116 ldd [%src + (offset) + 0x08], %t2; \
117 st %t0, [%dst + (offset) + 0x00]; \
118 st %t1, [%dst + (offset) + 0x04]; \
119 st %t2, [%dst + (offset) + 0x08]; \
120 st %t3, [%dst + (offset) + 0x0c];
122 #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
123 ldub [%src + (offset) + 0x00], %t0; \
124 ldub [%src + (offset) + 0x01], %t1; \
125 stb %t0, [%dst + (offset) + 0x00]; \
126 stb %t1, [%dst + (offset) + 0x01];
128 #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
129 ldd [%src + (offset) + 0x00], %t0; \
130 ldd [%src + (offset) + 0x08], %t2; \
131 srl %t0, shir, %t5; \
132 srl %t1, shir, %t6; \
133 sll %t0, shil, %t0; \
134 or %t5, %prev, %t5; \
135 sll %t1, shil, %prev; \
136 or %t6, %t0, %t0; \
137 srl %t2, shir, %t1; \
138 srl %t3, shir, %t6; \
139 sll %t2, shil, %t2; \
140 or %t1, %prev, %t1; \
141 std %t4, [%dst + (offset) + (offset2) - 0x04]; \
142 std %t0, [%dst + (offset) + (offset2) + 0x04]; \
143 sll %t3, shil, %prev; \
144 or %t6, %t2, %t4;
146 #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
147 ldd [%src + (offset) + 0x00], %t0; \
148 ldd [%src + (offset) + 0x08], %t2; \
149 srl %t0, shir, %t4; \
150 srl %t1, shir, %t5; \
151 sll %t0, shil, %t6; \
152 or %t4, %prev, %t0; \
153 sll %t1, shil, %prev; \
154 or %t5, %t6, %t1; \
155 srl %t2, shir, %t4; \
156 srl %t3, shir, %t5; \
157 sll %t2, shil, %t6; \
158 or %t4, %prev, %t2; \
159 sll %t3, shil, %prev; \
160 or %t5, %t6, %t3; \
161 std %t0, [%dst + (offset) + (offset2) + 0x00]; \
162 std %t2, [%dst + (offset) + (offset2) + 0x08];
164 .text
165 .align 4
167 #ifdef FASTER_REVERSE
169 70: /* rdword_align */
171 andcc %o1, 1, %g0
172 be 4f
173 andcc %o1, 2, %g0
175 ldub [%o1 - 1], %g2
176 sub %o1, 1, %o1
177 stb %g2, [%o0 - 1]
178 sub %o2, 1, %o2
179 be 3f
180 sub %o0, 1, %o0
181 4:
182 lduh [%o1 - 2], %g2
183 sub %o1, 2, %o1
184 sth %g2, [%o0 - 2]
185 sub %o2, 2, %o2
186 b 3f
187 sub %o0, 2, %o0
189 #endif /* FASTER_REVERSE */
191 0:
192 retl
193 nop ! Only bcopy returns here and it retuns void...
195 #ifdef __KERNEL__
196 FUNC(amemmove)
197 FUNC(__memmove)
198 #endif
199 FUNC(memmove)
200 cmp %o0, %o1
201 SETUP_RETL
202 bleu 9f
203 sub %o0, %o1, %o4
205 add %o1, %o2, %o3
206 cmp %o3, %o0
207 bleu 0f
208 andcc %o4, 3, %o5
210 #ifndef FASTER_REVERSE
212 add %o1, %o2, %o1
213 add %o0, %o2, %o0
214 sub %o1, 1, %o1
215 sub %o0, 1, %o0
217 1: /* reverse_bytes */
219 ldub [%o1], %o4
220 subcc %o2, 1, %o2
221 stb %o4, [%o0]
222 sub %o1, 1, %o1
223 bne 1b
224 sub %o0, 1, %o0
226 retl
227 RETL_INSN
229 #else /* FASTER_REVERSE */
231 add %o1, %o2, %o1
232 add %o0, %o2, %o0
233 bne 77f
234 cmp %o2, 15
235 bleu 91f
236 andcc %o1, 3, %g0
237 bne 70b
238 3:
239 andcc %o1, 4, %g0
241 be 2f
242 mov %o2, %g1
244 ld [%o1 - 4], %o4
245 sub %g1, 4, %g1
246 st %o4, [%o0 - 4]
247 sub %o1, 4, %o1
248 sub %o0, 4, %o0
249 2:
250 andcc %g1, 0xffffff80, %g7
251 be 3f
252 andcc %o0, 4, %g0
254 be 74f + 4
255 5:
256 RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
257 RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
258 RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
259 RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
260 subcc %g7, 128, %g7
261 sub %o1, 128, %o1
262 bne 5b
263 sub %o0, 128, %o0
264 3:
265 andcc %g1, 0x70, %g7
266 be 72f
267 andcc %g1, 8, %g0
269 sethi %hi(72f), %o5
270 srl %g7, 1, %o4
271 add %g7, %o4, %o4
272 sub %o1, %g7, %o1
273 sub %o5, %o4, %o5
274 jmpl %o5 + %lo(72f), %g0
275 sub %o0, %g7, %o0
277 71: /* rmemcpy_table */
278 RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
279 RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
280 RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
281 RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
282 RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
283 RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
284 RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
286 72: /* rmemcpy_table_end */
288 be 73f
289 andcc %g1, 4, %g0
291 ldd [%o1 - 0x08], %g2
292 sub %o0, 8, %o0
293 sub %o1, 8, %o1
294 st %g2, [%o0]
295 st %g3, [%o0 + 0x04]
297 73: /* rmemcpy_last7 */
299 be 1f
300 andcc %g1, 2, %g0
302 ld [%o1 - 4], %g2
303 sub %o1, 4, %o1
304 st %g2, [%o0 - 4]
305 sub %o0, 4, %o0
306 1:
307 be 1f
308 andcc %g1, 1, %g0
310 lduh [%o1 - 2], %g2
311 sub %o1, 2, %o1
312 sth %g2, [%o0 - 2]
313 sub %o0, 2, %o0
314 1:
315 be 1f
316 nop
318 ldub [%o1 - 1], %g2
319 stb %g2, [%o0 - 1]
320 1:
321 retl
322 RETL_INSN
324 74: /* rldd_std */
325 RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
326 RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
327 RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
328 RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
329 subcc %g7, 128, %g7
330 sub %o1, 128, %o1
331 bne 74b
332 sub %o0, 128, %o0
334 andcc %g1, 0x70, %g7
335 be 72b
336 andcc %g1, 8, %g0
338 sethi %hi(72b), %o5
339 srl %g7, 1, %o4
340 add %g7, %o4, %o4
341 sub %o1, %g7, %o1
342 sub %o5, %o4, %o5
343 jmpl %o5 + %lo(72b), %g0
344 sub %o0, %g7, %o0
346 75: /* rshort_end */
348 and %o2, 0xe, %o3
349 2:
350 sethi %hi(76f), %o5
351 sll %o3, 3, %o4
352 sub %o0, %o3, %o0
353 sub %o5, %o4, %o5
354 sub %o1, %o3, %o1
355 jmpl %o5 + %lo(76f), %g0
356 andcc %o2, 1, %g0
358 RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
359 RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
360 RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
361 RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
362 RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
363 RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
364 RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
366 76: /* rshort_table_end */
368 be 1f
369 nop
370 ldub [%o1 - 1], %g2
371 stb %g2, [%o0 - 1]
372 1:
373 retl
374 RETL_INSN
376 91: /* rshort_aligned_end */
378 bne 75b
379 andcc %o2, 8, %g0
381 be 1f
382 andcc %o2, 4, %g0
384 ld [%o1 - 0x08], %g2
385 ld [%o1 - 0x04], %g3
386 sub %o1, 8, %o1
387 st %g2, [%o0 - 0x08]
388 st %g3, [%o0 - 0x04]
389 sub %o0, 8, %o0
390 1:
391 b 73b
392 mov %o2, %g1
394 77: /* rnon_aligned */
395 cmp %o2, 15
396 bleu 75b
397 andcc %o0, 3, %g0
398 be 64f
399 andcc %o0, 1, %g0
400 be 63f
401 andcc %o0, 2, %g0
402 ldub [%o1 - 1], %g5
403 sub %o1, 1, %o1
404 stb %g5, [%o0 - 1]
405 sub %o0, 1, %o0
406 be 64f
407 sub %o2, 1, %o2
408 63:
409 ldub [%o1 - 1], %g5
410 sub %o1, 2, %o1
411 stb %g5, [%o0 - 1]
412 sub %o0, 2, %o0
413 ldub [%o1], %g5
414 sub %o2, 2, %o2
415 stb %g5, [%o0]
416 64:
417 and %o1, 3, %g2
418 and %o1, -4, %o1
419 and %o2, 0xc, %g3
420 add %o1, 4, %o1
421 cmp %g3, 4
422 sll %g2, 3, %g4
423 mov 32, %g2
424 be 4f
425 sub %g2, %g4, %g7
427 blu 3f
428 cmp %g3, 8
430 be 2f
431 srl %o2, 2, %g3
433 ld [%o1 - 4], %o3
434 add %o0, -8, %o0
435 ld [%o1 - 8], %o4
436 add %o1, -16, %o1
437 b 7f
438 add %g3, 1, %g3
439 2:
440 ld [%o1 - 4], %o4
441 add %o0, -4, %o0
442 ld [%o1 - 8], %g1
443 add %o1, -12, %o1
444 b 8f
445 add %g3, 2, %g3
446 3:
447 ld [%o1 - 4], %o5
448 add %o0, -12, %o0
449 ld [%o1 - 8], %o3
450 add %o1, -20, %o1
451 b 6f
452 srl %o2, 2, %g3
453 4:
454 ld [%o1 - 4], %g1
455 srl %o2, 2, %g3
456 ld [%o1 - 8], %o5
457 add %o1, -24, %o1
458 add %o0, -16, %o0
459 add %g3, -1, %g3
461 ld [%o1 + 12], %o3
462 5:
463 sll %o5, %g4, %g2
464 srl %g1, %g7, %g5
465 or %g2, %g5, %g2
466 st %g2, [%o0 + 12]
467 6:
468 ld [%o1 + 8], %o4
469 sll %o3, %g4, %g2
470 srl %o5, %g7, %g5
471 or %g2, %g5, %g2
472 st %g2, [%o0 + 8]
473 7:
474 ld [%o1 + 4], %g1
475 sll %o4, %g4, %g2
476 srl %o3, %g7, %g5
477 or %g2, %g5, %g2
478 st %g2, [%o0 + 4]
479 8:
480 ld [%o1], %o5
481 sll %g1, %g4, %g2
482 srl %o4, %g7, %g5
483 addcc %g3, -4, %g3
484 or %g2, %g5, %g2
485 add %o1, -16, %o1
486 st %g2, [%o0]
487 add %o0, -16, %o0
488 bne,a 5b
489 ld [%o1 + 12], %o3
490 sll %o5, %g4, %g2
491 srl %g1, %g7, %g5
492 srl %g4, 3, %g3
493 or %g2, %g5, %g2
494 add %o1, %g3, %o1
495 andcc %o2, 2, %g0
496 st %g2, [%o0 + 12]
497 be 1f
498 andcc %o2, 1, %g0
500 ldub [%o1 + 15], %g5
501 add %o1, -2, %o1
502 stb %g5, [%o0 + 11]
503 add %o0, -2, %o0
504 ldub [%o1 + 16], %g5
505 stb %g5, [%o0 + 12]
506 1:
507 be 1f
508 nop
509 ldub [%o1 + 15], %g5
510 stb %g5, [%o0 + 11]
511 1:
512 retl
513 RETL_INSN
515 #endif /* FASTER_REVERSE */
517 /* NOTE: This code is executed just for the cases,
518 where %src (=%o1) & 3 is != 0.
519 We need to align it to 4. So, for (%src & 3)
520 1 we need to do ldub,lduh
521 2 lduh
522 3 just ldub
523 so even if it looks weird, the branches
524 are correct here. -jj
525 */
526 78: /* dword_align */
528 andcc %o1, 1, %g0
529 be 4f
530 andcc %o1, 2, %g0
532 ldub [%o1], %g2
533 add %o1, 1, %o1
534 stb %g2, [%o0]
535 sub %o2, 1, %o2
536 bne 3f
537 add %o0, 1, %o0
538 4:
539 lduh [%o1], %g2
540 add %o1, 2, %o1
541 sth %g2, [%o0]
542 sub %o2, 2, %o2
543 b 3f
544 add %o0, 2, %o0
546 #ifdef __KERNEL__
547 FUNC(__memcpy)
548 #endif
549 FUNC(memcpy) /* %o0=dst %o1=src %o2=len */
551 sub %o0, %o1, %o4
552 SETUP_RETL
553 9:
554 andcc %o4, 3, %o5
555 0:
556 bne 86f
557 cmp %o2, 15
559 bleu 90f
560 andcc %o1, 3, %g0
562 bne 78b
563 3:
564 andcc %o1, 4, %g0
566 be 2f
567 mov %o2, %g1
569 ld [%o1], %o4
570 sub %g1, 4, %g1
571 st %o4, [%o0]
572 add %o1, 4, %o1
573 add %o0, 4, %o0
574 2:
575 andcc %g1, 0xffffff80, %g7
576 be 3f
577 andcc %o0, 4, %g0
579 be 82f + 4
580 5:
581 MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
582 MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
583 MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
584 MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
585 subcc %g7, 128, %g7
586 add %o1, 128, %o1
587 bne 5b
588 add %o0, 128, %o0
589 3:
590 andcc %g1, 0x70, %g7
591 be 80f
592 andcc %g1, 8, %g0
594 sethi %hi(80f), %o5
595 srl %g7, 1, %o4
596 add %g7, %o4, %o4
597 add %o1, %g7, %o1
598 sub %o5, %o4, %o5
599 jmpl %o5 + %lo(80f), %g0
600 add %o0, %g7, %o0
602 79: /* memcpy_table */
604 MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
605 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
606 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
607 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
608 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
609 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
610 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
612 80: /* memcpy_table_end */
613 be 81f
614 andcc %g1, 4, %g0
616 ldd [%o1], %g2
617 add %o0, 8, %o0
618 st %g2, [%o0 - 0x08]
619 add %o1, 8, %o1
620 st %g3, [%o0 - 0x04]
622 81: /* memcpy_last7 */
624 be 1f
625 andcc %g1, 2, %g0
627 ld [%o1], %g2
628 add %o1, 4, %o1
629 st %g2, [%o0]
630 add %o0, 4, %o0
631 1:
632 be 1f
633 andcc %g1, 1, %g0
635 lduh [%o1], %g2
636 add %o1, 2, %o1
637 sth %g2, [%o0]
638 add %o0, 2, %o0
639 1:
640 be 1f
641 nop
643 ldub [%o1], %g2
644 stb %g2, [%o0]
645 1:
646 retl
647 RETL_INSN
649 82: /* ldd_std */
650 MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
651 MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
652 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
653 MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
654 subcc %g7, 128, %g7
655 add %o1, 128, %o1
656 bne 82b
657 add %o0, 128, %o0
659 #ifndef FASTER_ALIGNED
661 andcc %g1, 0x70, %g7
662 be 80b
663 andcc %g1, 8, %g0
665 sethi %hi(80b), %o5
666 srl %g7, 1, %o4
667 add %g7, %o4, %o4
668 add %o1, %g7, %o1
669 sub %o5, %o4, %o5
670 jmpl %o5 + %lo(80b), %g0
671 add %o0, %g7, %o0
673 #else /* FASTER_ALIGNED */
675 andcc %g1, 0x70, %g7
676 be 84f
677 andcc %g1, 8, %g0
679 sethi %hi(84f), %o5
680 add %o1, %g7, %o1
681 sub %o5, %g7, %o5
682 jmpl %o5 + %lo(84f), %g0
683 add %o0, %g7, %o0
685 83: /* amemcpy_table */
687 MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
688 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
689 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
690 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
691 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
692 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
693 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
695 84: /* amemcpy_table_end */
696 be 85f
697 andcc %g1, 4, %g0
699 ldd [%o1], %g2
700 add %o0, 8, %o0
701 std %g2, [%o0 - 0x08]
702 add %o1, 8, %o1
703 85: /* amemcpy_last7 */
704 be 1f
705 andcc %g1, 2, %g0
707 ld [%o1], %g2
708 add %o1, 4, %o1
709 st %g2, [%o0]
710 add %o0, 4, %o0
711 1:
712 be 1f
713 andcc %g1, 1, %g0
715 lduh [%o1], %g2
716 add %o1, 2, %o1
717 sth %g2, [%o0]
718 add %o0, 2, %o0
719 1:
720 be 1f
721 nop
723 ldub [%o1], %g2
724 stb %g2, [%o0]
725 1:
726 retl
727 RETL_INSN
729 #endif /* FASTER_ALIGNED */
731 86: /* non_aligned */
732 cmp %o2, 6
733 bleu 88f
735 #ifdef FASTER_NONALIGNED
737 cmp %o2, 256
738 bcc 87f
740 #endif /* FASTER_NONALIGNED */
742 andcc %o0, 3, %g0
743 be 61f
744 andcc %o0, 1, %g0
745 be 60f
746 andcc %o0, 2, %g0
748 ldub [%o1], %g5
749 add %o1, 1, %o1
750 stb %g5, [%o0]
751 sub %o2, 1, %o2
752 bne 61f
753 add %o0, 1, %o0
754 60:
755 ldub [%o1], %g3
756 add %o1, 2, %o1
757 stb %g3, [%o0]
758 sub %o2, 2, %o2
759 ldub [%o1 - 1], %g3
760 add %o0, 2, %o0
761 stb %g3, [%o0 - 1]
762 61:
763 and %o1, 3, %g2
764 and %o2, 0xc, %g3
765 and %o1, -4, %o1
766 cmp %g3, 4
767 sll %g2, 3, %g4
768 mov 32, %g2
769 be 4f
770 sub %g2, %g4, %g7
772 blu 3f
773 cmp %g3, 0x8
775 be 2f
776 srl %o2, 2, %g3
778 ld [%o1], %o3
779 add %o0, -8, %o0
780 ld [%o1 + 4], %o4
781 b 8f
782 add %g3, 1, %g3
783 2:
784 ld [%o1], %o4
785 add %o0, -12, %o0
786 ld [%o1 + 4], %o5
787 add %g3, 2, %g3
788 b 9f
789 add %o1, -4, %o1
790 3:
791 ld [%o1], %g1
792 add %o0, -4, %o0
793 ld [%o1 + 4], %o3
794 srl %o2, 2, %g3
795 b 7f
796 add %o1, 4, %o1
797 4:
798 ld [%o1], %o5
799 cmp %o2, 7
800 ld [%o1 + 4], %g1
801 srl %o2, 2, %g3
802 bleu 10f
803 add %o1, 8, %o1
805 ld [%o1], %o3
806 add %g3, -1, %g3
807 5:
808 sll %o5, %g4, %g2
809 srl %g1, %g7, %g5
810 or %g2, %g5, %g2
811 st %g2, [%o0]
812 7:
813 ld [%o1 + 4], %o4
814 sll %g1, %g4, %g2
815 srl %o3, %g7, %g5
816 or %g2, %g5, %g2
817 st %g2, [%o0 + 4]
818 8:
819 ld [%o1 + 8], %o5
820 sll %o3, %g4, %g2
821 srl %o4, %g7, %g5
822 or %g2, %g5, %g2
823 st %g2, [%o0 + 8]
824 9:
825 ld [%o1 + 12], %g1
826 sll %o4, %g4, %g2
827 srl %o5, %g7, %g5
828 addcc %g3, -4, %g3
829 or %g2, %g5, %g2
830 add %o1, 16, %o1
831 st %g2, [%o0 + 12]
832 add %o0, 16, %o0
833 bne,a 5b
834 ld [%o1], %o3
835 10:
836 sll %o5, %g4, %g2
837 srl %g1, %g7, %g5
838 srl %g7, 3, %g3
839 or %g2, %g5, %g2
840 sub %o1, %g3, %o1
841 andcc %o2, 2, %g0
842 st %g2, [%o0]
843 be 1f
844 andcc %o2, 1, %g0
846 ldub [%o1], %g2
847 add %o1, 2, %o1
848 stb %g2, [%o0 + 4]
849 add %o0, 2, %o0
850 ldub [%o1 - 1], %g2
851 stb %g2, [%o0 + 3]
852 1:
853 be 1f
854 nop
855 ldub [%o1], %g2
856 stb %g2, [%o0 + 4]
857 1:
858 retl
859 RETL_INSN
861 #ifdef FASTER_NONALIGNED
863 87: /* faster_nonaligned */
865 andcc %o1, 3, %g0
866 be 3f
867 andcc %o1, 1, %g0
869 be 4f
870 andcc %o1, 2, %g0
872 ldub [%o1], %g2
873 add %o1, 1, %o1
874 stb %g2, [%o0]
875 sub %o2, 1, %o2
876 bne 3f
877 add %o0, 1, %o0
878 4:
879 lduh [%o1], %g2
880 add %o1, 2, %o1
881 srl %g2, 8, %g3
882 sub %o2, 2, %o2
883 stb %g3, [%o0]
884 add %o0, 2, %o0
885 stb %g2, [%o0 - 1]
886 3:
887 andcc %o1, 4, %g0
889 bne 2f
890 cmp %o5, 1
892 ld [%o1], %o4
893 srl %o4, 24, %g2
894 stb %g2, [%o0]
895 srl %o4, 16, %g3
896 stb %g3, [%o0 + 1]
897 srl %o4, 8, %g2
898 stb %g2, [%o0 + 2]
899 sub %o2, 4, %o2
900 stb %o4, [%o0 + 3]
901 add %o1, 4, %o1
902 add %o0, 4, %o0
903 2:
904 be 33f
905 cmp %o5, 2
906 be 32f
907 sub %o2, 4, %o2
908 31:
909 ld [%o1], %g2
910 add %o1, 4, %o1
911 srl %g2, 24, %g3
912 and %o0, 7, %g5
913 stb %g3, [%o0]
914 cmp %g5, 7
915 sll %g2, 8, %g1
916 add %o0, 4, %o0
917 be 41f
918 and %o2, 0xffffffc0, %o3
919 ld [%o0 - 7], %o4
920 4:
921 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
922 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
923 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
924 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
925 subcc %o3, 64, %o3
926 add %o1, 64, %o1
927 bne 4b
928 add %o0, 64, %o0
930 andcc %o2, 0x30, %o3
931 be,a 1f
932 srl %g1, 16, %g2
933 4:
934 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
935 subcc %o3, 16, %o3
936 add %o1, 16, %o1
937 bne 4b
938 add %o0, 16, %o0
940 srl %g1, 16, %g2
941 1:
942 st %o4, [%o0 - 7]
943 sth %g2, [%o0 - 3]
944 srl %g1, 8, %g4
945 b 88f
946 stb %g4, [%o0 - 1]
947 32:
948 ld [%o1], %g2
949 add %o1, 4, %o1
950 srl %g2, 16, %g3
951 and %o0, 7, %g5
952 sth %g3, [%o0]
953 cmp %g5, 6
954 sll %g2, 16, %g1
955 add %o0, 4, %o0
956 be 42f
957 and %o2, 0xffffffc0, %o3
958 ld [%o0 - 6], %o4
959 4:
960 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
961 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
962 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
963 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
964 subcc %o3, 64, %o3
965 add %o1, 64, %o1
966 bne 4b
967 add %o0, 64, %o0
969 andcc %o2, 0x30, %o3
970 be,a 1f
971 srl %g1, 16, %g2
972 4:
973 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
974 subcc %o3, 16, %o3
975 add %o1, 16, %o1
976 bne 4b
977 add %o0, 16, %o0
979 srl %g1, 16, %g2
980 1:
981 st %o4, [%o0 - 6]
982 b 88f
983 sth %g2, [%o0 - 2]
984 33:
985 ld [%o1], %g2
986 sub %o2, 4, %o2
987 srl %g2, 24, %g3
988 and %o0, 7, %g5
989 stb %g3, [%o0]
990 cmp %g5, 5
991 srl %g2, 8, %g4
992 sll %g2, 24, %g1
993 sth %g4, [%o0 + 1]
994 add %o1, 4, %o1
995 be 43f
996 and %o2, 0xffffffc0, %o3
998 ld [%o0 - 1], %o4
999 add %o0, 4, %o0
1000 4:
1001 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
1002 SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
1003 SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
1004 SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
1005 subcc %o3, 64, %o3
1006 add %o1, 64, %o1
1007 bne 4b
1008 add %o0, 64, %o0
1010 andcc %o2, 0x30, %o3
1011 be,a 1f
1012 srl %g1, 24, %g2
1013 4:
1014 SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, -1)
1015 subcc %o3, 16, %o3
1016 add %o1, 16, %o1
1017 bne 4b
1018 add %o0, 16, %o0
1020 srl %g1, 24, %g2
1021 1:
1022 st %o4, [%o0 - 5]
1023 b 88f
1024 stb %g2, [%o0 - 1]
1025 41:
1026 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
1027 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
1028 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
1029 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
1030 subcc %o3, 64, %o3
1031 add %o1, 64, %o1
1032 bne 41b
1033 add %o0, 64, %o0
1035 andcc %o2, 0x30, %o3
1036 be,a 1f
1037 srl %g1, 16, %g2
1038 4:
1039 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 8, 24, -3)
1040 subcc %o3, 16, %o3
1041 add %o1, 16, %o1
1042 bne 4b
1043 add %o0, 16, %o0
1045 srl %g1, 16, %g2
1046 1:
1047 sth %g2, [%o0 - 3]
1048 srl %g1, 8, %g4
1049 b 88f
1050 stb %g4, [%o0 - 1]
1051 43:
1052 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
1053 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
1054 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
1055 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
1056 subcc %o3, 64, %o3
1057 add %o1, 64, %o1
1058 bne 43b
1059 add %o0, 64, %o0
1061 andcc %o2, 0x30, %o3
1062 be,a 1f
1063 srl %g1, 24, %g2
1064 4:
1065 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 24, 8, 3)
1066 subcc %o3, 16, %o3
1067 add %o1, 16, %o1
1068 bne 4b
1069 add %o0, 16, %o0
1071 srl %g1, 24, %g2
1072 1:
1073 stb %g2, [%o0 + 3]
1074 b 88f
1075 add %o0, 4, %o0
1076 42:
1077 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
1078 SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
1079 SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
1080 SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
1081 subcc %o3, 64, %o3
1082 add %o1, 64, %o1
1083 bne 42b
1084 add %o0, 64, %o0
1086 andcc %o2, 0x30, %o3
1087 be,a 1f
1088 srl %g1, 16, %g2
1089 4:
1090 SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g7, g1, 16, 16, -2)
1091 subcc %o3, 16, %o3
1092 add %o1, 16, %o1
1093 bne 4b
1094 add %o0, 16, %o0
1096 srl %g1, 16, %g2
1097 1:
1098 sth %g2, [%o0 - 2]
1100 /* Fall through */
1102 #endif /* FASTER_NONALIGNED */
1104 88: /* short_end */
1106 and %o2, 0xe, %o3
1107 20:
1108 sethi %hi(89f), %o5
1109 sll %o3, 3, %o4
1110 add %o0, %o3, %o0
1111 sub %o5, %o4, %o5
1112 add %o1, %o3, %o1
1113 jmpl %o5 + %lo(89f), %g0
1114 andcc %o2, 1, %g0
1116 MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
1117 MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
1118 MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
1119 MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
1120 MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
1121 MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
1122 MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
1124 89: /* short_table_end */
1126 be 1f
1127 nop
1129 ldub [%o1], %g2
1130 stb %g2, [%o0]
1131 1:
1132 retl
1133 RETL_INSN
1135 90: /* short_aligned_end */
1136 bne 88b
1137 andcc %o2, 8, %g0
1139 be 1f
1140 andcc %o2, 4, %g0
1142 ld [%o1 + 0x00], %g2
1143 ld [%o1 + 0x04], %g3
1144 add %o1, 8, %o1
1145 st %g2, [%o0 + 0x00]
1146 st %g3, [%o0 + 0x04]
1147 add %o0, 8, %o0
1148 1:
1149 b 81b
1150 mov %o2, %g1