ia64/linux-2.6.18-xen.hg

view arch/ia64/kernel/fsys.S @ 855:b8bef3d2d3c3

[IA64] fix fsys.S paravirtualization

fix fsys.S paravirtualization.
event_mask must be cleared before checking event_pending.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Isaku Yamahata <yamahata@valinux.co.jp>
date Tue Apr 07 11:31:17 2009 +0900 (2009-04-07)
parents a533be77c572
children
line source
1 /*
2 * This file contains the light-weight system call handlers (fsyscall-handlers).
3 *
4 * Copyright (C) 2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * 25-Sep-03 davidm Implement fsys_rt_sigprocmask().
8 * 18-Feb-03 louisk Implement fsys_gettimeofday().
9 * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
10 * probably broke it along the way... ;-)
11 * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
12 * it capable of using memory based clocks without falling back to C code.
13 */
15 #include <asm/asmmacro.h>
16 #include <asm/errno.h>
17 #include <asm/asm-offsets.h>
18 #include <asm/percpu.h>
19 #include <asm/thread_info.h>
20 #include <asm/sal.h>
21 #include <asm/signal.h>
22 #include <asm/system.h>
23 #include <asm/unistd.h>
25 #include "entry.h"
27 /*
28 * See Documentation/ia64/fsys.txt for details on fsyscalls.
29 *
30 * On entry to an fsyscall handler:
31 * r10 = 0 (i.e., defaults to "successful syscall return")
32 * r11 = saved ar.pfs (a user-level value)
33 * r15 = system call number
34 * r16 = "current" task pointer (in normal kernel-mode, this is in r13)
35 * r32-r39 = system call arguments
36 * b6 = return address (a user-level value)
37 * ar.pfs = previous frame-state (a user-level value)
38 * PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
39 * all other registers may contain values passed in from user-mode
40 *
41 * On return from an fsyscall handler:
42 * r11 = saved ar.pfs (as passed into the fsyscall handler)
43 * r15 = system call number (as passed into the fsyscall handler)
44 * r32-r39 = system call arguments (as passed into the fsyscall handler)
45 * b6 = return address (as passed into the fsyscall handler)
46 * ar.pfs = previous frame-state (as passed into the fsyscall handler)
47 */
49 ENTRY(fsys_ni_syscall)
50 .prologue
51 .altrp b6
52 .body
53 mov r8=ENOSYS
54 mov r10=-1
55 FSYS_RETURN
56 END(fsys_ni_syscall)
58 ENTRY(fsys_getpid)
59 .prologue
60 .altrp b6
61 .body
62 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
63 ;;
64 ld4 r9=[r9]
65 add r8=IA64_TASK_TGID_OFFSET,r16
66 ;;
67 and r9=TIF_ALLWORK_MASK,r9
68 ld4 r8=[r8] // r8 = current->tgid
69 ;;
70 cmp.ne p8,p0=0,r9
71 (p8) br.spnt.many fsys_fallback_syscall
72 FSYS_RETURN
73 END(fsys_getpid)
75 ENTRY(fsys_getppid)
76 .prologue
77 .altrp b6
78 .body
79 add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
80 ;;
81 ld8 r17=[r17] // r17 = current->group_leader
82 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
83 ;;
85 ld4 r9=[r9]
86 add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
87 ;;
88 and r9=TIF_ALLWORK_MASK,r9
90 1: ld8 r18=[r17] // r18 = current->group_leader->real_parent
91 ;;
92 cmp.ne p8,p0=0,r9
93 add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = &current->group_leader->real_parent->tgid
94 ;;
96 /*
97 * The .acq is needed to ensure that the read of tgid has returned its data before
98 * we re-check "real_parent".
99 */
100 ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid
101 #ifdef CONFIG_SMP
102 /*
103 * Re-read current->group_leader->real_parent.
104 */
105 ld8 r19=[r17] // r19 = current->group_leader->real_parent
106 (p8) br.spnt.many fsys_fallback_syscall
107 ;;
108 cmp.ne p6,p0=r18,r19 // did real_parent change?
109 mov r19=0 // i must not leak kernel bits...
110 (p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check
111 ;;
112 mov r17=0 // i must not leak kernel bits...
113 mov r18=0 // i must not leak kernel bits...
114 #else
115 mov r17=0 // i must not leak kernel bits...
116 mov r18=0 // i must not leak kernel bits...
117 mov r19=0 // i must not leak kernel bits...
118 #endif
119 FSYS_RETURN
120 END(fsys_getppid)
122 ENTRY(fsys_set_tid_address)
123 .prologue
124 .altrp b6
125 .body
126 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
127 ;;
128 ld4 r9=[r9]
129 tnat.z p6,p7=r32 // check argument register for being NaT
130 ;;
131 and r9=TIF_ALLWORK_MASK,r9
132 add r8=IA64_TASK_PID_OFFSET,r16
133 add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
134 ;;
135 ld4 r8=[r8]
136 cmp.ne p8,p0=0,r9
137 mov r17=-1
138 ;;
139 (p6) st8 [r18]=r32
140 (p7) st8 [r18]=r17
141 (p8) br.spnt.many fsys_fallback_syscall
142 ;;
143 mov r17=0 // i must not leak kernel bits...
144 mov r18=0 // i must not leak kernel bits...
145 FSYS_RETURN
146 END(fsys_set_tid_address)
148 /*
149 * Ensure that the time interpolator structure is compatible with the asm code
150 */
151 #if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
152 || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
153 #error fsys_gettimeofday incompatible with changes to struct time_interpolator
154 #endif
155 #define CLOCK_REALTIME 0
156 #define CLOCK_MONOTONIC 1
157 #define CLOCK_DIVIDE_BY_1000 0x4000
158 #define CLOCK_ADD_MONOTONIC 0x8000
160 ENTRY(fsys_gettimeofday)
161 .prologue
162 .altrp b6
163 .body
164 mov r31 = r32
165 tnat.nz p6,p0 = r33 // guard against NaT argument
166 (p6) br.cond.spnt.few .fail_einval
167 mov r30 = CLOCK_DIVIDE_BY_1000
168 ;;
169 .gettime:
170 // Register map
171 // Incoming r31 = pointer to address where to place result
172 // r30 = flags determining how time is processed
173 // r2,r3 = temp r4-r7 preserved
174 // r8 = result nanoseconds
175 // r9 = result seconds
176 // r10 = temporary storage for clock difference
177 // r11 = preserved: saved ar.pfs
178 // r12 = preserved: memory stack
179 // r13 = preserved: thread pointer
180 // r14 = address of mask / mask
181 // r15 = preserved: system call number
182 // r16 = preserved: current task pointer
183 // r17 = wall to monotonic use
184 // r18 = time_interpolator->offset
185 // r19 = address of wall_to_monotonic
186 // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
187 // r21 = shift factor
188 // r22 = address of time interpolator->last_counter
189 // r23 = address of time_interpolator->last_cycle
190 // r24 = adress of time_interpolator->offset
191 // r25 = last_cycle value
192 // r26 = last_counter value
193 // r27 = pointer to xtime
194 // r28 = sequence number at the beginning of critcal section
195 // r29 = address of seqlock
196 // r30 = time processing flags / memory address
197 // r31 = pointer to result
198 // Predicates
199 // p6,p7 short term use
200 // p8 = timesource ar.itc
201 // p9 = timesource mmio64
202 // p10 = timesource mmio32
203 // p11 = timesource not to be handled by asm code
204 // p12 = memory time source ( = p9 | p10)
205 // p13 = do cmpxchg with time_interpolator_last_cycle
206 // p14 = Divide by 1000
207 // p15 = Add monotonic
208 //
209 // Note that instructions are optimized for McKinley. McKinley can process two
210 // bundles simultaneously and therefore we continuously try to feed the CPU
211 // two bundles and then a stop.
212 tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure
213 mov pr = r30,0xc000 // Set predicates according to function
214 add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
215 movl r20 = time_interpolator
216 ;;
217 ld8 r20 = [r20] // get pointer to time_interpolator structure
218 movl r29 = xtime_lock
219 ld4 r2 = [r2] // process work pending flags
220 movl r27 = xtime
221 ;; // only one bundle here
222 ld8 r21 = [r20] // first quad with control information
223 and r2 = TIF_ALLWORK_MASK,r2
224 (p6) br.cond.spnt.few .fail_einval // deferred branch
225 ;;
226 add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
227 extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc
228 extr r8 = r21,0,16 // time_interpolator->source
229 cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
230 (p6) br.cond.spnt.many fsys_fallback_syscall
231 ;;
232 cmp.eq p8,p12 = 0,r8 // Check for cpu timer
233 cmp.eq p9,p0 = 1,r8 // MMIO64 ?
234 extr r2 = r21,24,8 // time_interpolator->jitter
235 cmp.eq p10,p0 = 2,r8 // MMIO32 ?
236 cmp.ltu p11,p0 = 2,r8 // function or other clock
237 (p11) br.cond.spnt.many fsys_fallback_syscall
238 ;;
239 setf.sig f7 = r3 // Setup for scaling of counter
240 (p15) movl r19 = wall_to_monotonic
241 (p12) ld8 r30 = [r10]
242 cmp.ne p13,p0 = r2,r0 // need jitter compensation?
243 extr r21 = r21,16,8 // shift factor
244 ;;
245 .time_redo:
246 .pred.rel.mutex p8,p9,p10
247 ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes
248 (p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
249 add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
250 (p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues..
251 (p10) ld4 r2 = [r30] // readw(ti->address)
252 (p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
253 ;; // could be removed by moving the last add upward
254 ld8 r26 = [r22] // time_interpolator->last_counter
255 (p13) ld8 r25 = [r23] // time interpolator->last_cycle
256 add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
257 (p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
258 ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
259 add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
260 ;;
261 ld8 r18 = [r24] // time_interpolator->offset
262 ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec
263 (p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
264 ;;
265 ld8 r14 = [r14] // time_interpolator->mask
266 (p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
267 sub r10 = r2,r26 // current_counter - last_counter
268 ;;
269 (p6) sub r10 = r25,r26 // time we got was less than last_cycle
270 (p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
271 ;;
272 and r10 = r10,r14 // Apply mask
273 ;;
274 setf.sig f8 = r10
275 nop.i 123
276 ;;
277 (p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv
278 EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time
279 xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
280 (p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs
281 ;;
282 (p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
283 (p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo
284 // simulate tbit.nz.or p7,p0 = r28,0
285 and r28 = ~1,r28 // Make sequence even to force retry if odd
286 getf.sig r2 = f8
287 mf
288 add r8 = r8,r18 // Add time interpolator offset
289 ;;
290 ld4 r10 = [r29] // xtime_lock.sequence
291 (p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs
292 shr.u r2 = r2,r21
293 ;; // overloaded 3 bundles!
294 // End critical section.
295 add r8 = r8,r2 // Add xtime.nsecs
296 cmp4.ne.or p7,p0 = r28,r10
297 (p7) br.cond.dpnt.few .time_redo // sequence number changed ?
298 // Now r8=tv->tv_nsec and r9=tv->tv_sec
299 mov r10 = r0
300 movl r2 = 1000000000
301 add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
302 (p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack
303 ;;
304 .time_normalize:
305 mov r21 = r8
306 cmp.ge p6,p0 = r8,r2
307 (p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting some time
308 ;;
309 (p14) setf.sig f8 = r20
310 (p6) sub r8 = r8,r2
311 (p6) add r9 = 1,r9 // two nops before the branch.
312 (p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod
313 (p6) br.cond.dpnt.few .time_normalize
314 ;;
315 // Divided by 8 though shift. Now divide by 125
316 // The compiler was able to do that with a multiply
317 // and a shift and we do the same
318 EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
319 (p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it...
320 ;;
321 mov r8 = r0
322 (p14) getf.sig r2 = f8
323 ;;
324 (p14) shr.u r21 = r2, 4
325 ;;
326 EX(.fail_efault, st8 [r31] = r9)
327 EX(.fail_efault, st8 [r23] = r21)
328 FSYS_RETURN
329 .fail_einval:
330 mov r8 = EINVAL
331 mov r10 = -1
332 FSYS_RETURN
333 .fail_efault:
334 mov r8 = EFAULT
335 mov r10 = -1
336 FSYS_RETURN
337 END(fsys_gettimeofday)
339 ENTRY(fsys_clock_gettime)
340 .prologue
341 .altrp b6
342 .body
343 cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
344 // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
345 (p6) br.spnt.few fsys_fallback_syscall
346 mov r31 = r33
347 shl r30 = r32,15
348 br.many .gettime
349 END(fsys_clock_gettime)
351 /*
352 * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
353 */
354 #if _NSIG_WORDS != 1
355 # error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
356 #endif
357 ENTRY(fsys_rt_sigprocmask)
358 .prologue
359 .altrp b6
360 .body
362 add r2=IA64_TASK_BLOCKED_OFFSET,r16
363 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
364 cmp4.ltu p6,p0=SIG_SETMASK,r32
366 cmp.ne p15,p0=r0,r34 // oset != NULL?
367 tnat.nz p8,p0=r34
368 add r31=IA64_TASK_SIGHAND_OFFSET,r16
369 ;;
370 ld8 r3=[r2] // read/prefetch current->blocked
371 ld4 r9=[r9]
372 tnat.nz.or p6,p0=r35
374 cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
375 tnat.nz.or p6,p0=r32
376 (p6) br.spnt.few .fail_einval // fail with EINVAL
377 ;;
378 #ifdef CONFIG_SMP
379 ld8 r31=[r31] // r31 <- current->sighand
380 #endif
381 and r9=TIF_ALLWORK_MASK,r9
382 tnat.nz.or p8,p0=r33
383 ;;
384 cmp.ne p7,p0=0,r9
385 cmp.eq p6,p0=r0,r33 // set == NULL?
386 add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
387 (p8) br.spnt.few .fail_efault // fail with EFAULT
388 (p7) br.spnt.many fsys_fallback_syscall // got pending kernel work...
389 (p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask
391 /* Argh, we actually have to do some work and _update_ the signal mask: */
393 EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set
394 EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
395 mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
396 ;;
398 rsm psr.i // mask interrupt delivery
399 mov ar.ccv=0
400 andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP
402 #ifdef CONFIG_SMP
403 mov r17=1
404 ;;
405 cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock
406 mov r8=EINVAL // default to EINVAL
407 ;;
408 ld8 r3=[r2] // re-read current->blocked now that we hold the lock
409 cmp4.ne p6,p0=r18,r0
410 (p6) br.cond.spnt.many .lock_contention
411 ;;
412 #else
413 ld8 r3=[r2] // re-read current->blocked now that we hold the lock
414 mov r8=EINVAL // default to EINVAL
415 #endif
416 add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
417 add r19=IA64_TASK_SIGNAL_OFFSET,r16
418 cmp4.eq p6,p0=SIG_BLOCK,r32
419 ;;
420 ld8 r19=[r19] // r19 <- current->signal
421 cmp4.eq p7,p0=SIG_UNBLOCK,r32
422 cmp4.eq p8,p0=SIG_SETMASK,r32
423 ;;
424 ld8 r18=[r18] // r18 <- current->pending.signal
425 .pred.rel.mutex p6,p7,p8
426 (p6) or r14=r3,r14 // SIG_BLOCK
427 (p7) andcm r14=r3,r14 // SIG_UNBLOCK
429 (p8) mov r14=r14 // SIG_SETMASK
430 (p6) mov r8=0 // clear error code
431 // recalc_sigpending()
432 add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
434 add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
435 ;;
436 ld4 r17=[r17] // r17 <- current->signal->group_stop_count
437 (p7) mov r8=0 // clear error code
439 ld8 r19=[r19] // r19 <- current->signal->shared_pending
440 ;;
441 cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)?
442 (p8) mov r8=0 // clear error code
444 or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending
445 ;;
446 // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
447 andcm r18=r18,r14
448 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
449 ;;
451 (p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending
452 mov r19=0 // i must not leak kernel bits...
453 (p6) br.cond.dpnt.many .sig_pending
454 ;;
456 1: ld4 r17=[r9] // r17 <- current->thread_info->flags
457 ;;
458 mov ar.ccv=r17
459 and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING)
460 ;;
462 st8 [r2]=r14 // update current->blocked with new mask
463 cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18
464 ;;
465 cmp.ne p6,p0=r17,r8 // update failed?
466 (p6) br.cond.spnt.few 1b // yes -> retry
468 #ifdef CONFIG_SMP
469 st4.rel [r31]=r0 // release the lock
470 #endif
471 ssm psr.i
472 ;;
474 srlz.d // ensure psr.i is set again
475 mov r18=0 // i must not leak kernel bits...
477 .store_mask:
478 EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset
479 EX(.fail_efault, (p15) st8 [r34]=r3)
480 mov r2=0 // i must not leak kernel bits...
481 mov r3=0 // i must not leak kernel bits...
482 mov r8=0 // return 0
483 mov r9=0 // i must not leak kernel bits...
484 mov r14=0 // i must not leak kernel bits...
485 mov r17=0 // i must not leak kernel bits...
486 mov r31=0 // i must not leak kernel bits...
487 FSYS_RETURN
489 .sig_pending:
490 #ifdef CONFIG_SMP
491 st4.rel [r31]=r0 // release the lock
492 #endif
493 ssm psr.i
494 ;;
495 srlz.d
496 br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall
498 #ifdef CONFIG_SMP
499 .lock_contention:
500 /* Rather than spinning here, fall back on doing a heavy-weight syscall. */
501 ssm psr.i
502 ;;
503 srlz.d
504 br.sptk.many fsys_fallback_syscall
505 #endif
506 END(fsys_rt_sigprocmask)
508 ENTRY(fsys_fallback_syscall)
509 .prologue
510 .altrp b6
511 .body
512 /*
513 * We only get here from light-weight syscall handlers. Thus, we already
514 * know that r15 contains a valid syscall number. No need to re-check.
515 */
516 adds r17=-1024,r15
517 movl r14=sys_call_table
518 ;;
519 #ifdef CONFIG_XEN
520 movl r18=running_on_xen;;
521 ld4 r18=[r18];;
522 // p14 = running_on_xen
523 // p15 = !running_on_xen
524 cmp.ne p14,p15=r0,r18
525 ;;
526 (p14) movl r18=XSI_PSR_I_ADDR;;
527 (p14) ld8 r18=[r18]
528 (p14) mov r29=1;;
529 (p14) st1 [r18]=r29
530 (p15) rsm psr.i
531 #else
532 rsm psr.i
533 #endif
534 shladd r18=r17,3,r14
535 ;;
536 ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
537 #ifdef CONFIG_XEN
538 (p14) mov r27=r8
539 (p14) XEN_HYPER_GET_PSR
540 ;;
541 (p14) mov r29=r8
542 (p14) mov r8=r27
543 (p15) mov r29=psr // read psr (12 cyc load latency)
544 #else
545 mov r29=psr // read psr (12 cyc load latency)
546 #endif
547 mov r27=ar.rsc
548 mov r21=ar.fpsr
549 mov r26=ar.pfs
550 END(fsys_fallback_syscall)
551 /* FALL THROUGH */
552 GLOBAL_ENTRY(fsys_bubble_down)
553 .prologue
554 .altrp b6
555 .body
556 /*
557 * We get here for syscalls that don't have a lightweight
558 * handler. For those, we need to bubble down into the kernel
559 * and that requires setting up a minimal pt_regs structure,
560 * and initializing the CPU state more or less as if an
561 * interruption had occurred. To make syscall-restarts work,
562 * we setup pt_regs such that cr_iip points to the second
563 * instruction in syscall_via_break. Decrementing the IP
564 * hence will restart the syscall via break and not
565 * decrementing IP will return us to the caller, as usual.
566 * Note that we preserve the value of psr.pp rather than
567 * initializing it from dcr.pp. This makes it possible to
568 * distinguish fsyscall execution from other privileged
569 * execution.
570 *
571 * On entry:
572 * - normal fsyscall handler register usage, except
573 * that we also have:
574 * - r18: address of syscall entry point
575 * - r21: ar.fpsr
576 * - r26: ar.pfs
577 * - r27: ar.rsc
578 * - r29: psr
579 *
580 * We used to clear some PSR bits here but that requires slow
581 * serialization. Fortuntely, that isn't really necessary.
582 * The rationale is as follows: we used to clear bits
583 * ~PSR_PRESERVED_BITS in PSR.L. Since
584 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
585 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
586 * However,
587 *
588 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
589 * PSR.AC : don't care (kernel normally turns PSR.AC on)
590 * PSR.I : already turned off by the time fsys_bubble_down gets
591 * invoked
592 * PSR.DFL: always 0 (kernel never turns it on)
593 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
594 * initiative
595 * PSR.DI : always 0 (kernel never turns it on)
596 * PSR.SI : always 0 (kernel never turns it on)
597 * PSR.DB : don't care --- kernel never enables kernel-level
598 * breakpoints
599 * PSR.TB : must be 0 already; if it wasn't zero on entry to
600 * __kernel_syscall_via_epc, the branch to fsys_bubble_down
601 * will trigger a taken branch; the taken-trap-handler then
602 * converts the syscall into a break-based system-call.
603 */
604 /*
605 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
606 * The rest we have to synthesize.
607 */
608 # define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \
609 | (0x1 << IA64_PSR_RI_BIT) \
610 | IA64_PSR_BN | IA64_PSR_I)
612 invala // M0|1
613 movl r14=ia64_ret_from_syscall // X
615 nop.m 0
616 movl r28=__kernel_syscall_via_break // X create cr.iip
617 ;;
619 mov r2=r16 // A get task addr to addl-addressable register
620 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
621 mov r31=pr // I0 save pr (2 cyc)
622 ;;
623 st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
624 addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS
625 add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A
626 ;;
627 ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags
628 lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store
629 nop.i 0
630 ;;
631 mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
632 nop.m 0
633 nop.i 0
634 ;;
635 mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
636 mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!)
637 nop.i 0
638 ;;
639 mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS
640 movl r8=PSR_ONE_BITS // X
641 ;;
642 mov r25=ar.unat // M2 (5 cyc) save ar.unat
643 mov r19=b6 // I0 save b6 (2 cyc)
644 mov r20=r1 // A save caller's gp in r20
645 ;;
646 or r29=r8,r29 // A construct cr.ipsr value to save
647 mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc)
648 addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
650 mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc)
651 cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
652 br.call.sptk.many b7=ia64_syscall_setup // B
653 ;;
654 mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
655 mov rp=r14 // I0 set the real return addr
656 and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
657 ;;
658 #ifdef CONFIG_XEN
659 movl r14=running_on_xen;;
660 ld4 r14=[r14];;
661 // p14 = running_on_xen
662 // p15 = !running_on_xen
663 cmp.ne p14,p15=r0,r14
664 ;;
665 (p14) movl r14=XSI_PSR_I_ADDR
666 ;;
667 (p14) ld8 r14=[r14]
668 ;;
669 (p14) st1 [r14]=r0,-1 // clear event_mask
670 ;;
671 (p14) ld1 r14=[r14] // event_pending
672 ;;
673 (p14) cmp.ne.unc p13,p14=r14,r0
674 (p13) XEN_HYPER_SSM_I
675 (p15) ssm psr.i
676 #else
677 ssm psr.i // M2 we're on kernel stacks now, reenable irqs
678 #endif
679 cmp.eq p8,p0=r3,r0 // A
680 (p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
682 nop.m 0
683 (p8) br.call.sptk.many b6=b6 // B (ignore return address)
684 br.cond.spnt ia64_trace_syscall // B
685 END(fsys_bubble_down)
687 .rodata
688 .align 8
689 .globl fsyscall_table
691 data8 fsys_bubble_down
692 fsyscall_table:
693 data8 fsys_ni_syscall
694 data8 0 // exit // 1025
695 data8 0 // read
696 data8 0 // write
697 data8 0 // open
698 data8 0 // close
699 data8 0 // creat // 1030
700 data8 0 // link
701 data8 0 // unlink
702 data8 0 // execve
703 data8 0 // chdir
704 data8 0 // fchdir // 1035
705 data8 0 // utimes
706 data8 0 // mknod
707 data8 0 // chmod
708 data8 0 // chown
709 data8 0 // lseek // 1040
710 data8 fsys_getpid // getpid
711 data8 fsys_getppid // getppid
712 data8 0 // mount
713 data8 0 // umount
714 data8 0 // setuid // 1045
715 data8 0 // getuid
716 data8 0 // geteuid
717 data8 0 // ptrace
718 data8 0 // access
719 data8 0 // sync // 1050
720 data8 0 // fsync
721 data8 0 // fdatasync
722 data8 0 // kill
723 data8 0 // rename
724 data8 0 // mkdir // 1055
725 data8 0 // rmdir
726 data8 0 // dup
727 data8 0 // pipe
728 data8 0 // times
729 data8 0 // brk // 1060
730 data8 0 // setgid
731 data8 0 // getgid
732 data8 0 // getegid
733 data8 0 // acct
734 data8 0 // ioctl // 1065
735 data8 0 // fcntl
736 data8 0 // umask
737 data8 0 // chroot
738 data8 0 // ustat
739 data8 0 // dup2 // 1070
740 data8 0 // setreuid
741 data8 0 // setregid
742 data8 0 // getresuid
743 data8 0 // setresuid
744 data8 0 // getresgid // 1075
745 data8 0 // setresgid
746 data8 0 // getgroups
747 data8 0 // setgroups
748 data8 0 // getpgid
749 data8 0 // setpgid // 1080
750 data8 0 // setsid
751 data8 0 // getsid
752 data8 0 // sethostname
753 data8 0 // setrlimit
754 data8 0 // getrlimit // 1085
755 data8 0 // getrusage
756 data8 fsys_gettimeofday // gettimeofday
757 data8 0 // settimeofday
758 data8 0 // select
759 data8 0 // poll // 1090
760 data8 0 // symlink
761 data8 0 // readlink
762 data8 0 // uselib
763 data8 0 // swapon
764 data8 0 // swapoff // 1095
765 data8 0 // reboot
766 data8 0 // truncate
767 data8 0 // ftruncate
768 data8 0 // fchmod
769 data8 0 // fchown // 1100
770 data8 0 // getpriority
771 data8 0 // setpriority
772 data8 0 // statfs
773 data8 0 // fstatfs
774 data8 0 // gettid // 1105
775 data8 0 // semget
776 data8 0 // semop
777 data8 0 // semctl
778 data8 0 // msgget
779 data8 0 // msgsnd // 1110
780 data8 0 // msgrcv
781 data8 0 // msgctl
782 data8 0 // shmget
783 data8 0 // shmat
784 data8 0 // shmdt // 1115
785 data8 0 // shmctl
786 data8 0 // syslog
787 data8 0 // setitimer
788 data8 0 // getitimer
789 data8 0 // 1120
790 data8 0
791 data8 0
792 data8 0 // vhangup
793 data8 0 // lchown
794 data8 0 // remap_file_pages // 1125
795 data8 0 // wait4
796 data8 0 // sysinfo
797 data8 0 // clone
798 data8 0 // setdomainname
799 data8 0 // newuname // 1130
800 data8 0 // adjtimex
801 data8 0
802 data8 0 // init_module
803 data8 0 // delete_module
804 data8 0 // 1135
805 data8 0
806 data8 0 // quotactl
807 data8 0 // bdflush
808 data8 0 // sysfs
809 data8 0 // personality // 1140
810 data8 0 // afs_syscall
811 data8 0 // setfsuid
812 data8 0 // setfsgid
813 data8 0 // getdents
814 data8 0 // flock // 1145
815 data8 0 // readv
816 data8 0 // writev
817 data8 0 // pread64
818 data8 0 // pwrite64
819 data8 0 // sysctl // 1150
820 data8 0 // mmap
821 data8 0 // munmap
822 data8 0 // mlock
823 data8 0 // mlockall
824 data8 0 // mprotect // 1155
825 data8 0 // mremap
826 data8 0 // msync
827 data8 0 // munlock
828 data8 0 // munlockall
829 data8 0 // sched_getparam // 1160
830 data8 0 // sched_setparam
831 data8 0 // sched_getscheduler
832 data8 0 // sched_setscheduler
833 data8 0 // sched_yield
834 data8 0 // sched_get_priority_max // 1165
835 data8 0 // sched_get_priority_min
836 data8 0 // sched_rr_get_interval
837 data8 0 // nanosleep
838 data8 0 // nfsservctl
839 data8 0 // prctl // 1170
840 data8 0 // getpagesize
841 data8 0 // mmap2
842 data8 0 // pciconfig_read
843 data8 0 // pciconfig_write
844 data8 0 // perfmonctl // 1175
845 data8 0 // sigaltstack
846 data8 0 // rt_sigaction
847 data8 0 // rt_sigpending
848 data8 fsys_rt_sigprocmask // rt_sigprocmask
849 data8 0 // rt_sigqueueinfo // 1180
850 data8 0 // rt_sigreturn
851 data8 0 // rt_sigsuspend
852 data8 0 // rt_sigtimedwait
853 data8 0 // getcwd
854 data8 0 // capget // 1185
855 data8 0 // capset
856 data8 0 // sendfile
857 data8 0
858 data8 0
859 data8 0 // socket // 1190
860 data8 0 // bind
861 data8 0 // connect
862 data8 0 // listen
863 data8 0 // accept
864 data8 0 // getsockname // 1195
865 data8 0 // getpeername
866 data8 0 // socketpair
867 data8 0 // send
868 data8 0 // sendto
869 data8 0 // recv // 1200
870 data8 0 // recvfrom
871 data8 0 // shutdown
872 data8 0 // setsockopt
873 data8 0 // getsockopt
874 data8 0 // sendmsg // 1205
875 data8 0 // recvmsg
876 data8 0 // pivot_root
877 data8 0 // mincore
878 data8 0 // madvise
879 data8 0 // newstat // 1210
880 data8 0 // newlstat
881 data8 0 // newfstat
882 data8 0 // clone2
883 data8 0 // getdents64
884 data8 0 // getunwind // 1215
885 data8 0 // readahead
886 data8 0 // setxattr
887 data8 0 // lsetxattr
888 data8 0 // fsetxattr
889 data8 0 // getxattr // 1220
890 data8 0 // lgetxattr
891 data8 0 // fgetxattr
892 data8 0 // listxattr
893 data8 0 // llistxattr
894 data8 0 // flistxattr // 1225
895 data8 0 // removexattr
896 data8 0 // lremovexattr
897 data8 0 // fremovexattr
898 data8 0 // tkill
899 data8 0 // futex // 1230
900 data8 0 // sched_setaffinity
901 data8 0 // sched_getaffinity
902 data8 fsys_set_tid_address // set_tid_address
903 data8 0 // fadvise64_64
904 data8 0 // tgkill // 1235
905 data8 0 // exit_group
906 data8 0 // lookup_dcookie
907 data8 0 // io_setup
908 data8 0 // io_destroy
909 data8 0 // io_getevents // 1240
910 data8 0 // io_submit
911 data8 0 // io_cancel
912 data8 0 // epoll_create
913 data8 0 // epoll_ctl
914 data8 0 // epoll_wait // 1245
915 data8 0 // restart_syscall
916 data8 0 // semtimedop
917 data8 0 // timer_create
918 data8 0 // timer_settime
919 data8 0 // timer_gettime // 1250
920 data8 0 // timer_getoverrun
921 data8 0 // timer_delete
922 data8 0 // clock_settime
923 data8 fsys_clock_gettime // clock_gettime
925 // fill in zeros for the remaining entries
926 .zero:
927 .space fsyscall_table + 8*NR_syscalls - .zero, 0