ia64/linux-2.6.18-xen.hg

view arch/arm26/lib/csumpartialcopygeneric.S @ 647:a5bb490065f6

Fix the build after public header sync.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Aug 13 14:01:49 2008 +0100 (2008-08-13)
parents 831230e53067
children
line source
1 /*
2 * linux/arch/arm26/lib/csumpartialcopygeneric.S
3 *
4 * Copyright (C) 1995-2001 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * JMA 01/06/03 Commented out some shl0s; probobly irrelevant to arm26
11 *
12 */
14 /*
15 * unsigned int
16 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
17 * r0 = src, r1 = dst, r2 = len, r3 = sum
18 * Returns : r0 = checksum
19 *
20 * Note that 'tst' and 'teq' preserve the carry flag.
21 */
23 /* Quick hack */
24 .macro save_regs
25 stmfd sp!, {r1, r4 - r8, fp, ip, lr, pc}
26 .endm
28 /* end Quick Hack */
30 src .req r0
31 dst .req r1
32 len .req r2
33 sum .req r3
35 .zero: mov r0, sum
36 load_regs ea
38 /*
39 * Align an unaligned destination pointer. We know that
40 * we have >= 8 bytes here, so we don't need to check
41 * the length. Note that the source pointer hasn't been
42 * aligned yet.
43 */
44 .dst_unaligned: tst dst, #1
45 beq .dst_16bit
47 load1b ip
48 sub len, len, #1
49 adcs sum, sum, ip, lsl #byte(1) @ update checksum
50 strb ip, [dst], #1
51 tst dst, #2
52 moveq pc, lr @ dst is now 32bit aligned
54 .dst_16bit: load2b r8, ip
55 sub len, len, #2
56 adcs sum, sum, r8, lsl #byte(0)
57 strb r8, [dst], #1
58 adcs sum, sum, ip, lsl #byte(1)
59 strb ip, [dst], #1
60 mov pc, lr @ dst is now 32bit aligned
62 /*
63 * Handle 0 to 7 bytes, with any alignment of source and
64 * destination pointers. Note that when we get here, C = 0
65 */
66 .less8: teq len, #0 @ check for zero count
67 beq .zero
69 /* we must have at least one byte. */
70 tst dst, #1 @ dst 16-bit aligned
71 beq .less8_aligned
73 /* Align dst */
74 load1b ip
75 sub len, len, #1
76 adcs sum, sum, ip, lsl #byte(1) @ update checksum
77 strb ip, [dst], #1
78 tst len, #6
79 beq .less8_byteonly
81 1: load2b r8, ip
82 sub len, len, #2
83 adcs sum, sum, r8, lsl #byte(0)
84 strb r8, [dst], #1
85 adcs sum, sum, ip, lsl #byte(1)
86 strb ip, [dst], #1
87 .less8_aligned: tst len, #6
88 bne 1b
89 .less8_byteonly:
90 tst len, #1
91 beq .done
92 load1b r8
93 adcs sum, sum, r8, lsl #byte(0) @ update checksum
94 strb r8, [dst], #1
95 b .done
97 FN_ENTRY
98 mov ip, sp
99 save_regs
100 sub fp, ip, #4
102 cmp len, #8 @ Ensure that we have at least
103 blo .less8 @ 8 bytes to copy.
105 adds sum, sum, #0 @ C = 0
106 tst dst, #3 @ Test destination alignment
107 blne .dst_unaligned @ align destination, return here
109 /*
110 * Ok, the dst pointer is now 32bit aligned, and we know
111 * that we must have more than 4 bytes to copy. Note
112 * that C contains the carry from the dst alignment above.
113 */
115 tst src, #3 @ Test source alignment
116 bne .src_not_aligned
118 /* Routine for src & dst aligned */
120 bics ip, len, #15
121 beq 2f
123 1: load4l r4, r5, r6, r7
124 stmia dst!, {r4, r5, r6, r7}
125 adcs sum, sum, r4
126 adcs sum, sum, r5
127 adcs sum, sum, r6
128 adcs sum, sum, r7
129 sub ip, ip, #16
130 teq ip, #0
131 bne 1b
133 2: ands ip, len, #12
134 beq 4f
135 tst ip, #8
136 beq 3f
137 load2l r4, r5
138 stmia dst!, {r4, r5}
139 adcs sum, sum, r4
140 adcs sum, sum, r5
141 tst ip, #4
142 beq 4f
144 3: load1l r4
145 str r4, [dst], #4
146 adcs sum, sum, r4
148 4: ands len, len, #3
149 beq .done
150 load1l r4
151 tst len, #2
152 /* mov r5, r4, lsr #byte(0)
153 FIXME? 0 Shift anyhow!
154 */
155 beq .exit
156 adcs sum, sum, r4, push #16
157 strb r5, [dst], #1
158 mov r5, r4, lsr #byte(1)
159 strb r5, [dst], #1
160 mov r5, r4, lsr #byte(2)
161 .exit: tst len, #1
162 strneb r5, [dst], #1
163 andne r5, r5, #255
164 adcnes sum, sum, r5, lsl #byte(0)
166 /*
167 * If the dst pointer was not 16-bit aligned, we
168 * need to rotate the checksum here to get around
169 * the inefficient byte manipulations in the
170 * architecture independent code.
171 */
172 .done: adc r0, sum, #0
173 ldr sum, [sp, #0] @ dst
174 tst sum, #1
175 movne sum, r0, lsl #8
176 orrne r0, sum, r0, lsr #24
177 load_regs ea
179 .src_not_aligned:
180 adc sum, sum, #0 @ include C from dst alignment
181 and ip, src, #3
182 bic src, src, #3
183 load1l r5
184 cmp ip, #2
185 beq .src2_aligned
186 bhi .src3_aligned
187 mov r4, r5, pull #8 @ C = 0
188 bics ip, len, #15
189 beq 2f
190 1: load4l r5, r6, r7, r8
191 orr r4, r4, r5, push #24
192 mov r5, r5, pull #8
193 orr r5, r5, r6, push #24
194 mov r6, r6, pull #8
195 orr r6, r6, r7, push #24
196 mov r7, r7, pull #8
197 orr r7, r7, r8, push #24
198 stmia dst!, {r4, r5, r6, r7}
199 adcs sum, sum, r4
200 adcs sum, sum, r5
201 adcs sum, sum, r6
202 adcs sum, sum, r7
203 mov r4, r8, pull #8
204 sub ip, ip, #16
205 teq ip, #0
206 bne 1b
207 2: ands ip, len, #12
208 beq 4f
209 tst ip, #8
210 beq 3f
211 load2l r5, r6
212 orr r4, r4, r5, push #24
213 mov r5, r5, pull #8
214 orr r5, r5, r6, push #24
215 stmia dst!, {r4, r5}
216 adcs sum, sum, r4
217 adcs sum, sum, r5
218 mov r4, r6, pull #8
219 tst ip, #4
220 beq 4f
221 3: load1l r5
222 orr r4, r4, r5, push #24
223 str r4, [dst], #4
224 adcs sum, sum, r4
225 mov r4, r5, pull #8
226 4: ands len, len, #3
227 beq .done
228 /* mov r5, r4, lsr #byte(0)
229 FIXME? 0 Shift anyhow
230 */
231 tst len, #2
232 beq .exit
233 adcs sum, sum, r4, push #16
234 strb r5, [dst], #1
235 mov r5, r4, lsr #byte(1)
236 strb r5, [dst], #1
237 mov r5, r4, lsr #byte(2)
238 b .exit
240 .src2_aligned: mov r4, r5, pull #16
241 adds sum, sum, #0
242 bics ip, len, #15
243 beq 2f
244 1: load4l r5, r6, r7, r8
245 orr r4, r4, r5, push #16
246 mov r5, r5, pull #16
247 orr r5, r5, r6, push #16
248 mov r6, r6, pull #16
249 orr r6, r6, r7, push #16
250 mov r7, r7, pull #16
251 orr r7, r7, r8, push #16
252 stmia dst!, {r4, r5, r6, r7}
253 adcs sum, sum, r4
254 adcs sum, sum, r5
255 adcs sum, sum, r6
256 adcs sum, sum, r7
257 mov r4, r8, pull #16
258 sub ip, ip, #16
259 teq ip, #0
260 bne 1b
261 2: ands ip, len, #12
262 beq 4f
263 tst ip, #8
264 beq 3f
265 load2l r5, r6
266 orr r4, r4, r5, push #16
267 mov r5, r5, pull #16
268 orr r5, r5, r6, push #16
269 stmia dst!, {r4, r5}
270 adcs sum, sum, r4
271 adcs sum, sum, r5
272 mov r4, r6, pull #16
273 tst ip, #4
274 beq 4f
275 3: load1l r5
276 orr r4, r4, r5, push #16
277 str r4, [dst], #4
278 adcs sum, sum, r4
279 mov r4, r5, pull #16
280 4: ands len, len, #3
281 beq .done
282 /* mov r5, r4, lsr #byte(0)
283 FIXME? 0 Shift anyhow
284 */
285 tst len, #2
286 beq .exit
287 adcs sum, sum, r4
288 strb r5, [dst], #1
289 mov r5, r4, lsr #byte(1)
290 strb r5, [dst], #1
291 tst len, #1
292 beq .done
293 load1b r5
294 b .exit
296 .src3_aligned: mov r4, r5, pull #24
297 adds sum, sum, #0
298 bics ip, len, #15
299 beq 2f
300 1: load4l r5, r6, r7, r8
301 orr r4, r4, r5, push #8
302 mov r5, r5, pull #24
303 orr r5, r5, r6, push #8
304 mov r6, r6, pull #24
305 orr r6, r6, r7, push #8
306 mov r7, r7, pull #24
307 orr r7, r7, r8, push #8
308 stmia dst!, {r4, r5, r6, r7}
309 adcs sum, sum, r4
310 adcs sum, sum, r5
311 adcs sum, sum, r6
312 adcs sum, sum, r7
313 mov r4, r8, pull #24
314 sub ip, ip, #16
315 teq ip, #0
316 bne 1b
317 2: ands ip, len, #12
318 beq 4f
319 tst ip, #8
320 beq 3f
321 load2l r5, r6
322 orr r4, r4, r5, push #8
323 mov r5, r5, pull #24
324 orr r5, r5, r6, push #8
325 stmia dst!, {r4, r5}
326 adcs sum, sum, r4
327 adcs sum, sum, r5
328 mov r4, r6, pull #24
329 tst ip, #4
330 beq 4f
331 3: load1l r5
332 orr r4, r4, r5, push #8
333 str r4, [dst], #4
334 adcs sum, sum, r4
335 mov r4, r5, pull #24
336 4: ands len, len, #3
337 beq .done
338 /* mov r5, r4, lsr #byte(0)
339 FIXME? 0 Shift anyhow
340 */
341 tst len, #2
342 beq .exit
343 strb r5, [dst], #1
344 adcs sum, sum, r4
345 load1l r4
346 /* mov r5, r4, lsr #byte(0)
347 FIXME? 0 Shift anyhow
348 */
349 strb r5, [dst], #1
350 adcs sum, sum, r4, push #24
351 mov r5, r4, lsr #byte(1)
352 b .exit