]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/ia64/kernel/fsys.S
[IA64] fix siglock
[net-next-2.6.git] / arch / ia64 / kernel / fsys.S
CommitLineData
1da177e4
LT
1/*
2 * This file contains the light-weight system call handlers (fsyscall-handlers).
3 *
4 * Copyright (C) 2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * 25-Sep-03 davidm Implement fsys_rt_sigprocmask().
8 * 18-Feb-03 louisk Implement fsys_gettimeofday().
9 * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
10 * probably broke it along the way... ;-)
11 * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
12 * it capable of using memory based clocks without falling back to C code.
3bc207d2
FY
13 * 08-Feb-07 Fenghua Yu Implement fsys_getcpu.
14 *
1da177e4
LT
15 */
16
17#include <asm/asmmacro.h>
18#include <asm/errno.h>
39e01cb8 19#include <asm/asm-offsets.h>
1da177e4
LT
20#include <asm/percpu.h>
21#include <asm/thread_info.h>
22#include <asm/sal.h>
23#include <asm/signal.h>
24#include <asm/system.h>
25#include <asm/unistd.h>
26
27#include "entry.h"
dd97d5cb 28#include "paravirt_inst.h"
1da177e4
LT
29
30/*
31 * See Documentation/ia64/fsys.txt for details on fsyscalls.
32 *
33 * On entry to an fsyscall handler:
34 * r10 = 0 (i.e., defaults to "successful syscall return")
35 * r11 = saved ar.pfs (a user-level value)
36 * r15 = system call number
37 * r16 = "current" task pointer (in normal kernel-mode, this is in r13)
38 * r32-r39 = system call arguments
39 * b6 = return address (a user-level value)
40 * ar.pfs = previous frame-state (a user-level value)
41 * PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
42 * all other registers may contain values passed in from user-mode
43 *
44 * On return from an fsyscall handler:
45 * r11 = saved ar.pfs (as passed into the fsyscall handler)
46 * r15 = system call number (as passed into the fsyscall handler)
47 * r32-r39 = system call arguments (as passed into the fsyscall handler)
48 * b6 = return address (as passed into the fsyscall handler)
49 * ar.pfs = previous frame-state (as passed into the fsyscall handler)
50 */
51
52ENTRY(fsys_ni_syscall)
53 .prologue
54 .altrp b6
55 .body
56 mov r8=ENOSYS
57 mov r10=-1
58 FSYS_RETURN
59END(fsys_ni_syscall)
60
61ENTRY(fsys_getpid)
62 .prologue
63 .altrp b6
64 .body
96ded9da
PE
65 add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
66 ;;
67 ld8 r17=[r17] // r17 = current->group_leader
1da177e4
LT
68 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
69 ;;
70 ld4 r9=[r9]
96ded9da 71 add r17=IA64_TASK_TGIDLINK_OFFSET,r17
1da177e4
LT
72 ;;
73 and r9=TIF_ALLWORK_MASK,r9
96ded9da
PE
74 ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid
75 ;;
76 add r8=IA64_PID_LEVEL_OFFSET,r17
77 ;;
78 ld4 r8=[r8] // r8 = pid->level
79 add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
80 ;;
81 shl r8=r8,IA64_UPID_SHIFT
82 ;;
83 add r17=r17,r8 // r17 = &pid->numbers[pid->level]
84 ;;
85 ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
86 ;;
87 mov r17=0
1da177e4
LT
88 ;;
89 cmp.ne p8,p0=0,r9
90(p8) br.spnt.many fsys_fallback_syscall
91 FSYS_RETURN
92END(fsys_getpid)
93
94ENTRY(fsys_getppid)
95 .prologue
96 .altrp b6
97 .body
98 add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
99 ;;
100 ld8 r17=[r17] // r17 = current->group_leader
101 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
102 ;;
103
104 ld4 r9=[r9]
105 add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
106 ;;
107 and r9=TIF_ALLWORK_MASK,r9
108
1091: ld8 r18=[r17] // r18 = current->group_leader->real_parent
110 ;;
111 cmp.ne p8,p0=0,r9
112 add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = &current->group_leader->real_parent->tgid
113 ;;
114
115 /*
116 * The .acq is needed to ensure that the read of tgid has returned its data before
117 * we re-check "real_parent".
118 */
119 ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid
120#ifdef CONFIG_SMP
121 /*
122 * Re-read current->group_leader->real_parent.
123 */
124 ld8 r19=[r17] // r19 = current->group_leader->real_parent
125(p8) br.spnt.many fsys_fallback_syscall
126 ;;
127 cmp.ne p6,p0=r18,r19 // did real_parent change?
128 mov r19=0 // i must not leak kernel bits...
129(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check
130 ;;
131 mov r17=0 // i must not leak kernel bits...
132 mov r18=0 // i must not leak kernel bits...
133#else
134 mov r17=0 // i must not leak kernel bits...
135 mov r18=0 // i must not leak kernel bits...
136 mov r19=0 // i must not leak kernel bits...
137#endif
138 FSYS_RETURN
139END(fsys_getppid)
140
141ENTRY(fsys_set_tid_address)
142 .prologue
143 .altrp b6
144 .body
145 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
96ded9da 146 add r17=IA64_TASK_TGIDLINK_OFFSET,r16
1da177e4
LT
147 ;;
148 ld4 r9=[r9]
149 tnat.z p6,p7=r32 // check argument register for being NaT
96ded9da 150 ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid
1da177e4
LT
151 ;;
152 and r9=TIF_ALLWORK_MASK,r9
96ded9da 153 add r8=IA64_PID_LEVEL_OFFSET,r17
1da177e4
LT
154 add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
155 ;;
96ded9da
PE
156 ld4 r8=[r8] // r8 = pid->level
157 add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
158 ;;
159 shl r8=r8,IA64_UPID_SHIFT
160 ;;
161 add r17=r17,r8 // r17 = &pid->numbers[pid->level]
162 ;;
163 ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
164 ;;
1da177e4
LT
165 cmp.ne p8,p0=0,r9
166 mov r17=-1
167 ;;
168(p6) st8 [r18]=r32
169(p7) st8 [r18]=r17
170(p8) br.spnt.many fsys_fallback_syscall
171 ;;
172 mov r17=0 // i must not leak kernel bits...
173 mov r18=0 // i must not leak kernel bits...
174 FSYS_RETURN
175END(fsys_set_tid_address)
176
0aa366f3
TL
177#if IA64_GTOD_LOCK_OFFSET !=0
178#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t
179#endif
180#if IA64_ITC_JITTER_OFFSET !=0
181#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t
1da177e4
LT
182#endif
183#define CLOCK_REALTIME 0
184#define CLOCK_MONOTONIC 1
185#define CLOCK_DIVIDE_BY_1000 0x4000
186#define CLOCK_ADD_MONOTONIC 0x8000
187
188ENTRY(fsys_gettimeofday)
189 .prologue
190 .altrp b6
191 .body
192 mov r31 = r32
193 tnat.nz p6,p0 = r33 // guard against NaT argument
194(p6) br.cond.spnt.few .fail_einval
195 mov r30 = CLOCK_DIVIDE_BY_1000
196 ;;
197.gettime:
198 // Register map
199 // Incoming r31 = pointer to address where to place result
200 // r30 = flags determining how time is processed
201 // r2,r3 = temp r4-r7 preserved
202 // r8 = result nanoseconds
203 // r9 = result seconds
204 // r10 = temporary storage for clock difference
205 // r11 = preserved: saved ar.pfs
206 // r12 = preserved: memory stack
207 // r13 = preserved: thread pointer
0aa366f3 208 // r14 = address of mask / mask value
1da177e4
LT
209 // r15 = preserved: system call number
210 // r16 = preserved: current task pointer
0aa366f3
TL
211 // r17 = (not used)
212 // r18 = (not used)
213 // r19 = address of itc_lastcycle
214 // r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence)
215 // r21 = address of mmio_ptr
216 // r22 = address of wall_time or monotonic_time
217 // r23 = address of shift / value
218 // r24 = address mult factor / cycle_last value
219 // r25 = itc_lastcycle value
220 // r26 = address clocksource cycle_last
221 // r27 = (not used)
1da177e4 222 // r28 = sequence number at the beginning of critcal section
0aa366f3 223 // r29 = address of itc_jitter
1da177e4
LT
224 // r30 = time processing flags / memory address
225 // r31 = pointer to result
226 // Predicates
227 // p6,p7 short term use
228 // p8 = timesource ar.itc
229 // p9 = timesource mmio64
0aa366f3 230 // p10 = timesource mmio32 - not used
1da177e4 231 // p11 = timesource not to be handled by asm code
0aa366f3
TL
232 // p12 = memory time source ( = p9 | p10) - not used
233 // p13 = do cmpxchg with itc_lastcycle
1da177e4
LT
234 // p14 = Divide by 1000
235 // p15 = Add monotonic
236 //
0aa366f3
TL
237 // Note that instructions are optimized for McKinley. McKinley can
238 // process two bundles simultaneously and therefore we continuously
239 // try to feed the CPU two bundles and then a stop.
4fe01c68 240
1da177e4 241 add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
4fe01c68
HS
242 tnat.nz p6,p0 = r31 // guard against Nat argument
243(p6) br.cond.spnt.few .fail_einval
0aa366f3 244 movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
1da177e4 245 ;;
4fe01c68 246 ld4 r2 = [r2] // process work pending flags
0aa366f3
TL
247 movl r29 = itc_jitter_data // itc_jitter
248 add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
0aa366f3 249 add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
4fe01c68
HS
250 mov pr = r30,0xc000 // Set predicates according to function
251 ;;
1da177e4 252 and r2 = TIF_ALLWORK_MASK,r2
4fe01c68
HS
253 add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
254(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
1da177e4 255 ;;
4fe01c68 256 add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
1da177e4 257 cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
4fe01c68 258(p6) br.cond.spnt.many fsys_fallback_syscall
1da177e4 259 ;;
0aa366f3
TL
260 // Begin critical section
261.time_redo:
262 ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first
263 ;;
264 and r28 = ~1,r28 // And make sequence even to force retry if odd
1da177e4 265 ;;
0aa366f3
TL
266 ld8 r30 = [r21] // clocksource->mmio_ptr
267 add r24 = IA64_CLKSRC_MULT_OFFSET,r20
268 ld4 r2 = [r29] // itc_jitter value
269 add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20
270 add r14 = IA64_CLKSRC_MASK_OFFSET,r20
1da177e4 271 ;;
0aa366f3
TL
272 ld4 r3 = [r24] // clocksource mult value
273 ld8 r14 = [r14] // clocksource mask value
274 cmp.eq p8,p9 = 0,r30 // use cpu timer if no mmio_ptr
829a9996 275 ;;
0aa366f3
TL
276 setf.sig f7 = r3 // Setup for mult scaling of counter
277(p8) cmp.ne p13,p0 = r2,r0 // need itc_jitter compensation, set p13
278 ld4 r23 = [r23] // clocksource shift value
279 ld8 r24 = [r26] // get clksrc_cycle_last value
280(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control
829a9996 281 ;;
0aa366f3 282 .pred.rel.mutex p8,p9
94752a79 283 MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!!
0aa366f3
TL
284(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
285(p13) ld8 r25 = [r19] // get itc_lastcycle value
0aa366f3
TL
286 ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
287 ;;
288 ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec
289(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
290 ;;
291(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
292 sub r10 = r2,r24 // current_cycle - last_cycle
293 ;;
294(p6) sub r10 = r25,r24 // time we got was less than last_cycle
1da177e4
LT
295(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
296 ;;
0aa366f3
TL
297(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv
298 ;;
299(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful
300 ;;
301(p7) sub r10 = r3,r24 // then use new last_cycle instead
302 ;;
1da177e4
LT
303 and r10 = r10,r14 // Apply mask
304 ;;
305 setf.sig f8 = r10
306 nop.i 123
307 ;;
0aa366f3
TL
308 // fault check takes 5 cycles and we have spare time
309EX(.fail_efault, probe.w.fault r31, 3)
1da177e4 310 xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
1da177e4 311 ;;
1da177e4
LT
312 getf.sig r2 = f8
313 mf
1da177e4 314 ;;
0aa366f3
TL
315 ld4 r10 = [r20] // gtod_lock.sequence
316 shr.u r2 = r2,r23 // shift by factor
4fe01c68 317 ;;
1da177e4 318 add r8 = r8,r2 // Add xtime.nsecs
0aa366f3
TL
319 cmp4.ne p7,p0 = r28,r10
320(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo
321 // End critical section.
1da177e4
LT
322 // Now r8=tv->tv_nsec and r9=tv->tv_sec
323 mov r10 = r0
324 movl r2 = 1000000000
325 add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
326(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack
327 ;;
328.time_normalize:
329 mov r21 = r8
330 cmp.ge p6,p0 = r8,r2
0aa366f3 331(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time
1da177e4
LT
332 ;;
333(p14) setf.sig f8 = r20
334(p6) sub r8 = r8,r2
0aa366f3
TL
335(p6) add r9 = 1,r9 // two nops before the branch.
336(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod
1da177e4
LT
337(p6) br.cond.dpnt.few .time_normalize
338 ;;
339 // Divided by 8 though shift. Now divide by 125
340 // The compiler was able to do that with a multiply
341 // and a shift and we do the same
0aa366f3
TL
342EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
343(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it
1da177e4 344 ;;
1da177e4
LT
345(p14) getf.sig r2 = f8
346 ;;
4fe01c68 347 mov r8 = r0
1da177e4
LT
348(p14) shr.u r21 = r2, 4
349 ;;
350EX(.fail_efault, st8 [r31] = r9)
351EX(.fail_efault, st8 [r23] = r21)
352 FSYS_RETURN
353.fail_einval:
354 mov r8 = EINVAL
355 mov r10 = -1
356 FSYS_RETURN
357.fail_efault:
358 mov r8 = EFAULT
359 mov r10 = -1
360 FSYS_RETURN
361END(fsys_gettimeofday)
362
363ENTRY(fsys_clock_gettime)
364 .prologue
365 .altrp b6
366 .body
367 cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
368 // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
369(p6) br.spnt.few fsys_fallback_syscall
370 mov r31 = r33
371 shl r30 = r32,15
372 br.many .gettime
373END(fsys_clock_gettime)
374
375/*
376 * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
377 */
378#if _NSIG_WORDS != 1
379# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
380#endif
381ENTRY(fsys_rt_sigprocmask)
382 .prologue
383 .altrp b6
384 .body
385
386 add r2=IA64_TASK_BLOCKED_OFFSET,r16
387 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
388 cmp4.ltu p6,p0=SIG_SETMASK,r32
389
390 cmp.ne p15,p0=r0,r34 // oset != NULL?
391 tnat.nz p8,p0=r34
392 add r31=IA64_TASK_SIGHAND_OFFSET,r16
393 ;;
394 ld8 r3=[r2] // read/prefetch current->blocked
395 ld4 r9=[r9]
396 tnat.nz.or p6,p0=r35
397
398 cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
399 tnat.nz.or p6,p0=r32
400(p6) br.spnt.few .fail_einval // fail with EINVAL
401 ;;
402#ifdef CONFIG_SMP
403 ld8 r31=[r31] // r31 <- current->sighand
404#endif
405 and r9=TIF_ALLWORK_MASK,r9
406 tnat.nz.or p8,p0=r33
407 ;;
408 cmp.ne p7,p0=0,r9
409 cmp.eq p6,p0=r0,r33 // set == NULL?
410 add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
411(p8) br.spnt.few .fail_efault // fail with EFAULT
412(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work...
413(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask
414
415 /* Argh, we actually have to do some work and _update_ the signal mask: */
416
417EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set
418EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
419 mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
420 ;;
421
84b8857a 422 RSM_PSR_I(p0, r18, r19) // mask interrupt delivery
1da177e4
LT
423 mov ar.ccv=0
424 andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP
425
426#ifdef CONFIG_SMP
f574c843
TL
427 // __ticket_spin_trylock(r31)
428 ld4 r17=[r31]
1da177e4
LT
429 mov r8=EINVAL // default to EINVAL
430 ;;
f574c843
TL
431 extr r9=r17,17,15
432 ;;
433 xor r18=r17,r9
434 adds r19=1,r17
435 ;;
436 extr.u r18=r18,0,15
437 ;;
438 cmp.eq p0,p7=0,r18
439(p7) br.cond.spnt.many .lock_contention
440 mov.m ar.ccv=r17
441 ;;
442 cmpxchg4.acq r9=[r31],r19,ar.ccv
443 ;;
444 cmp4.eq p0,p7=r9,r17
445(p7) br.cond.spnt.many .lock_contention
1da177e4 446 ld8 r3=[r2] // re-read current->blocked now that we hold the lock
1da177e4
LT
447 ;;
448#else
449 ld8 r3=[r2] // re-read current->blocked now that we hold the lock
450 mov r8=EINVAL // default to EINVAL
451#endif
452 add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
453 add r19=IA64_TASK_SIGNAL_OFFSET,r16
454 cmp4.eq p6,p0=SIG_BLOCK,r32
455 ;;
456 ld8 r19=[r19] // r19 <- current->signal
457 cmp4.eq p7,p0=SIG_UNBLOCK,r32
458 cmp4.eq p8,p0=SIG_SETMASK,r32
459 ;;
460 ld8 r18=[r18] // r18 <- current->pending.signal
461 .pred.rel.mutex p6,p7,p8
462(p6) or r14=r3,r14 // SIG_BLOCK
463(p7) andcm r14=r3,r14 // SIG_UNBLOCK
464
465(p8) mov r14=r14 // SIG_SETMASK
466(p6) mov r8=0 // clear error code
467 // recalc_sigpending()
468 add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
469
470 add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
471 ;;
472 ld4 r17=[r17] // r17 <- current->signal->group_stop_count
473(p7) mov r8=0 // clear error code
474
475 ld8 r19=[r19] // r19 <- current->signal->shared_pending
476 ;;
477 cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)?
478(p8) mov r8=0 // clear error code
479
480 or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending
481 ;;
482 // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
483 andcm r18=r18,r14
484 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
485 ;;
486
487(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending
488 mov r19=0 // i must not leak kernel bits...
489(p6) br.cond.dpnt.many .sig_pending
490 ;;
491
4921: ld4 r17=[r9] // r17 <- current->thread_info->flags
493 ;;
494 mov ar.ccv=r17
495 and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING)
496 ;;
497
498 st8 [r2]=r14 // update current->blocked with new mask
a2a64769 499 cmpxchg4.acq r8=[r9],r18,ar.ccv // current->thread_info->flags <- r18
1da177e4 500 ;;
a2a64769 501 cmp.ne p6,p0=r17,r8 // update failed?
1da177e4
LT
502(p6) br.cond.spnt.few 1b // yes -> retry
503
504#ifdef CONFIG_SMP
f574c843
TL
505 // __ticket_spin_unlock(r31)
506 adds r31=2,r31
507 ;;
508 ld2.bias r2=[r31]
509 mov r3=65534
510 ;;
511 adds r2=2,r2
512 ;;
513 and r3=r3,r2
514 ;;
515 st2.rel [r31]=r3
1da177e4 516#endif
84b8857a 517 SSM_PSR_I(p0, p9, r31)
1da177e4
LT
518 ;;
519
520 srlz.d // ensure psr.i is set again
521 mov r18=0 // i must not leak kernel bits...
522
523.store_mask:
524EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset
525EX(.fail_efault, (p15) st8 [r34]=r3)
526 mov r2=0 // i must not leak kernel bits...
527 mov r3=0 // i must not leak kernel bits...
528 mov r8=0 // return 0
529 mov r9=0 // i must not leak kernel bits...
530 mov r14=0 // i must not leak kernel bits...
531 mov r17=0 // i must not leak kernel bits...
532 mov r31=0 // i must not leak kernel bits...
533 FSYS_RETURN
534
535.sig_pending:
536#ifdef CONFIG_SMP
f574c843
TL
537 // __ticket_spin_unlock(r31)
538 adds r31=2,r31
539 ;;
540 ld2.bias r2=[r31]
541 mov r3=65534
542 ;;
543 adds r2=2,r2
544 ;;
545 and r3=r3,r2
546 ;;
547 st2.rel [r31]=r3
1da177e4 548#endif
84b8857a 549 SSM_PSR_I(p0, p9, r17)
1da177e4
LT
550 ;;
551 srlz.d
552 br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall
553
554#ifdef CONFIG_SMP
555.lock_contention:
556 /* Rather than spinning here, fall back on doing a heavy-weight syscall. */
84b8857a 557 SSM_PSR_I(p0, p9, r17)
1da177e4
LT
558 ;;
559 srlz.d
560 br.sptk.many fsys_fallback_syscall
561#endif
562END(fsys_rt_sigprocmask)
563
3bc207d2
FY
564/*
565 * fsys_getcpu doesn't use the third parameter in this implementation. It reads
566 * current_thread_info()->cpu and corresponding node in cpu_to_node_map.
567 */
568ENTRY(fsys_getcpu)
569 .prologue
570 .altrp b6
571 .body
572 ;;
573 add r2=TI_FLAGS+IA64_TASK_SIZE,r16
574 tnat.nz p6,p0 = r32 // guard against NaT argument
575 add r3=TI_CPU+IA64_TASK_SIZE,r16
576 ;;
577 ld4 r3=[r3] // M r3 = thread_info->cpu
578 ld4 r2=[r2] // M r2 = thread_info->flags
579(p6) br.cond.spnt.few .fail_einval // B
580 ;;
581 tnat.nz p7,p0 = r33 // I guard against NaT argument
582(p7) br.cond.spnt.few .fail_einval // B
583#ifdef CONFIG_NUMA
584 movl r17=cpu_to_node_map
585 ;;
586EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
587EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
588 shladd r18=r3,1,r17
589 ;;
590 ld2 r20=[r18] // r20 = cpu_to_node_map[cpu]
591 and r2 = TIF_ALLWORK_MASK,r2
592 ;;
593 cmp.ne p8,p0=0,r2
594(p8) br.spnt.many fsys_fallback_syscall
595 ;;
596 ;;
597EX(.fail_efault, st4 [r32] = r3)
598EX(.fail_efault, st2 [r33] = r20)
599 mov r8=0
600 ;;
601#else
602EX(.fail_efault, probe.w.fault r32, 3) // M This takes 5 cycles
603EX(.fail_efault, probe.w.fault r33, 3) // M This takes 5 cycles
604 and r2 = TIF_ALLWORK_MASK,r2
605 ;;
606 cmp.ne p8,p0=0,r2
607(p8) br.spnt.many fsys_fallback_syscall
608 ;;
609EX(.fail_efault, st4 [r32] = r3)
610EX(.fail_efault, st2 [r33] = r0)
611 mov r8=0
612 ;;
613#endif
614 FSYS_RETURN
615END(fsys_getcpu)
616
1da177e4
LT
617ENTRY(fsys_fallback_syscall)
618 .prologue
619 .altrp b6
620 .body
621 /*
622 * We only get here from light-weight syscall handlers. Thus, we already
623 * know that r15 contains a valid syscall number. No need to re-check.
624 */
625 adds r17=-1024,r15
626 movl r14=sys_call_table
627 ;;
84b8857a 628 RSM_PSR_I(p0, r26, r27)
1da177e4
LT
629 shladd r18=r17,3,r14
630 ;;
631 ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
84b8857a 632 MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency)
1da177e4
LT
633 mov r27=ar.rsc
634 mov r21=ar.fpsr
635 mov r26=ar.pfs
636END(fsys_fallback_syscall)
637 /* FALL THROUGH */
dd97d5cb 638GLOBAL_ENTRY(paravirt_fsys_bubble_down)
1da177e4
LT
639 .prologue
640 .altrp b6
641 .body
642 /*
fbf7192b
DMT
643 * We get here for syscalls that don't have a lightweight
644 * handler. For those, we need to bubble down into the kernel
645 * and that requires setting up a minimal pt_regs structure,
646 * and initializing the CPU state more or less as if an
647 * interruption had occurred. To make syscall-restarts work,
648 * we setup pt_regs such that cr_iip points to the second
649 * instruction in syscall_via_break. Decrementing the IP
650 * hence will restart the syscall via break and not
651 * decrementing IP will return us to the caller, as usual.
652 * Note that we preserve the value of psr.pp rather than
653 * initializing it from dcr.pp. This makes it possible to
654 * distinguish fsyscall execution from other privileged
655 * execution.
1da177e4
LT
656 *
657 * On entry:
fbf7192b
DMT
658 * - normal fsyscall handler register usage, except
659 * that we also have:
1da177e4
LT
660 * - r18: address of syscall entry point
661 * - r21: ar.fpsr
662 * - r26: ar.pfs
663 * - r27: ar.rsc
664 * - r29: psr
fbf7192b
DMT
665 *
666 * We used to clear some PSR bits here but that requires slow
667 * serialization. Fortuntely, that isn't really necessary.
668 * The rationale is as follows: we used to clear bits
669 * ~PSR_PRESERVED_BITS in PSR.L. Since
670 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
671 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
672 * However,
673 *
674 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
675 * PSR.AC : don't care (kernel normally turns PSR.AC on)
dd97d5cb 676 * PSR.I : already turned off by the time paravirt_fsys_bubble_down gets
fbf7192b
DMT
677 * invoked
678 * PSR.DFL: always 0 (kernel never turns it on)
679 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
680 * initiative
681 * PSR.DI : always 0 (kernel never turns it on)
682 * PSR.SI : always 0 (kernel never turns it on)
683 * PSR.DB : don't care --- kernel never enables kernel-level
684 * breakpoints
685 * PSR.TB : must be 0 already; if it wasn't zero on entry to
dd97d5cb 686 * __kernel_syscall_via_epc, the branch to paravirt_fsys_bubble_down
fbf7192b
DMT
687 * will trigger a taken branch; the taken-trap-handler then
688 * converts the syscall into a break-based system-call.
1da177e4 689 */
1da177e4 690 /*
fbf7192b
DMT
691 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
692 * The rest we have to synthesize.
1da177e4 693 */
fbf7192b
DMT
694# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \
695 | (0x1 << IA64_PSR_RI_BIT) \
1da177e4
LT
696 | IA64_PSR_BN | IA64_PSR_I)
697
fbf7192b
DMT
698 invala // M0|1
699 movl r14=ia64_ret_from_syscall // X
1da177e4 700
1ba7be7d 701 nop.m 0
fbf7192b 702 movl r28=__kernel_syscall_via_break // X create cr.iip
1da177e4 703 ;;
1ba7be7d 704
fbf7192b
DMT
705 mov r2=r16 // A get task addr to addl-addressable register
706 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
707 mov r31=pr // I0 save pr (2 cyc)
1da177e4 708 ;;
fbf7192b
DMT
709 st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag
710 addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS
711 add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A
1da177e4 712 ;;
fbf7192b
DMT
713 ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags
714 lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store
1ba7be7d 715 nop.i 0
1da177e4 716 ;;
fbf7192b 717 mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
b64f34cd 718#ifdef CONFIG_VIRT_CPU_ACCOUNTING
94752a79 719 MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting
b64f34cd 720#else
1ba7be7d 721 nop.m 0
b64f34cd 722#endif
1ba7be7d
DMT
723 nop.i 0
724 ;;
fbf7192b
DMT
725 mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
726 mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!)
1ba7be7d 727 nop.i 0
1da177e4 728 ;;
fbf7192b 729 mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS
1ba7be7d 730 movl r8=PSR_ONE_BITS // X
1da177e4 731 ;;
fbf7192b
DMT
732 mov r25=ar.unat // M2 (5 cyc) save ar.unat
733 mov r19=b6 // I0 save b6 (2 cyc)
734 mov r20=r1 // A save caller's gp in r20
1ba7be7d 735 ;;
fbf7192b
DMT
736 or r29=r8,r29 // A construct cr.ipsr value to save
737 mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc)
738 addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
1ba7be7d 739
fbf7192b
DMT
740 mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc)
741 cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
742 br.call.sptk.many b7=ia64_syscall_setup // B
1da177e4 743 ;;
b64f34cd
HS
744#ifdef CONFIG_VIRT_CPU_ACCOUNTING
745 // mov.m r30=ar.itc is called in advance
746 add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
747 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
748 ;;
749 ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
750 ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel
751 ;;
752 ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
753 ld8 r21=[r17] // cumulated utime
754 sub r22=r19,r18 // stime before leave kernel
755 ;;
756 st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp
757 sub r18=r30,r19 // elapsed time in user mode
758 ;;
759 add r20=r20,r22 // sum stime
760 add r21=r21,r18 // sum utime
761 ;;
762 st8 [r16]=r20 // update stime
763 st8 [r17]=r21 // update utime
764 ;;
765#endif
fbf7192b
DMT
766 mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
767 mov rp=r14 // I0 set the real return addr
ebcc80c1 768 and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A
1da177e4 769 ;;
84b8857a 770 SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs
ebcc80c1 771 cmp.eq p8,p0=r3,r0 // A
fbf7192b 772(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT
1ba7be7d
DMT
773
774 nop.m 0
fbf7192b
DMT
775(p8) br.call.sptk.many b6=b6 // B (ignore return address)
776 br.cond.spnt ia64_trace_syscall // B
dd97d5cb 777END(paravirt_fsys_bubble_down)
1da177e4
LT
778
779 .rodata
780 .align 8
dd97d5cb 781 .globl paravirt_fsyscall_table
1da177e4 782
dd97d5cb
IY
783 data8 paravirt_fsys_bubble_down
784paravirt_fsyscall_table:
1da177e4
LT
785 data8 fsys_ni_syscall
786 data8 0 // exit // 1025
787 data8 0 // read
788 data8 0 // write
789 data8 0 // open
790 data8 0 // close
791 data8 0 // creat // 1030
792 data8 0 // link
793 data8 0 // unlink
794 data8 0 // execve
795 data8 0 // chdir
796 data8 0 // fchdir // 1035
797 data8 0 // utimes
798 data8 0 // mknod
799 data8 0 // chmod
800 data8 0 // chown
801 data8 0 // lseek // 1040
802 data8 fsys_getpid // getpid
803 data8 fsys_getppid // getppid
804 data8 0 // mount
805 data8 0 // umount
806 data8 0 // setuid // 1045
807 data8 0 // getuid
808 data8 0 // geteuid
809 data8 0 // ptrace
810 data8 0 // access
811 data8 0 // sync // 1050
812 data8 0 // fsync
813 data8 0 // fdatasync
814 data8 0 // kill
815 data8 0 // rename
816 data8 0 // mkdir // 1055
817 data8 0 // rmdir
818 data8 0 // dup
819 data8 0 // pipe
820 data8 0 // times
821 data8 0 // brk // 1060
822 data8 0 // setgid
823 data8 0 // getgid
824 data8 0 // getegid
825 data8 0 // acct
826 data8 0 // ioctl // 1065
827 data8 0 // fcntl
828 data8 0 // umask
829 data8 0 // chroot
830 data8 0 // ustat
831 data8 0 // dup2 // 1070
832 data8 0 // setreuid
833 data8 0 // setregid
834 data8 0 // getresuid
835 data8 0 // setresuid
836 data8 0 // getresgid // 1075
837 data8 0 // setresgid
838 data8 0 // getgroups
839 data8 0 // setgroups
840 data8 0 // getpgid
841 data8 0 // setpgid // 1080
842 data8 0 // setsid
843 data8 0 // getsid
844 data8 0 // sethostname
845 data8 0 // setrlimit
846 data8 0 // getrlimit // 1085
847 data8 0 // getrusage
848 data8 fsys_gettimeofday // gettimeofday
849 data8 0 // settimeofday
850 data8 0 // select
851 data8 0 // poll // 1090
852 data8 0 // symlink
853 data8 0 // readlink
854 data8 0 // uselib
855 data8 0 // swapon
856 data8 0 // swapoff // 1095
857 data8 0 // reboot
858 data8 0 // truncate
859 data8 0 // ftruncate
860 data8 0 // fchmod
861 data8 0 // fchown // 1100
862 data8 0 // getpriority
863 data8 0 // setpriority
864 data8 0 // statfs
865 data8 0 // fstatfs
866 data8 0 // gettid // 1105
867 data8 0 // semget
868 data8 0 // semop
869 data8 0 // semctl
870 data8 0 // msgget
871 data8 0 // msgsnd // 1110
872 data8 0 // msgrcv
873 data8 0 // msgctl
874 data8 0 // shmget
875 data8 0 // shmat
876 data8 0 // shmdt // 1115
877 data8 0 // shmctl
878 data8 0 // syslog
879 data8 0 // setitimer
880 data8 0 // getitimer
881 data8 0 // 1120
882 data8 0
883 data8 0
884 data8 0 // vhangup
885 data8 0 // lchown
886 data8 0 // remap_file_pages // 1125
887 data8 0 // wait4
888 data8 0 // sysinfo
889 data8 0 // clone
890 data8 0 // setdomainname
891 data8 0 // newuname // 1130
892 data8 0 // adjtimex
893 data8 0
894 data8 0 // init_module
895 data8 0 // delete_module
896 data8 0 // 1135
897 data8 0
898 data8 0 // quotactl
899 data8 0 // bdflush
900 data8 0 // sysfs
901 data8 0 // personality // 1140
902 data8 0 // afs_syscall
903 data8 0 // setfsuid
904 data8 0 // setfsgid
905 data8 0 // getdents
906 data8 0 // flock // 1145
907 data8 0 // readv
908 data8 0 // writev
909 data8 0 // pread64
910 data8 0 // pwrite64
911 data8 0 // sysctl // 1150
912 data8 0 // mmap
913 data8 0 // munmap
914 data8 0 // mlock
915 data8 0 // mlockall
916 data8 0 // mprotect // 1155
917 data8 0 // mremap
918 data8 0 // msync
919 data8 0 // munlock
920 data8 0 // munlockall
921 data8 0 // sched_getparam // 1160
922 data8 0 // sched_setparam
923 data8 0 // sched_getscheduler
924 data8 0 // sched_setscheduler
925 data8 0 // sched_yield
926 data8 0 // sched_get_priority_max // 1165
927 data8 0 // sched_get_priority_min
928 data8 0 // sched_rr_get_interval
929 data8 0 // nanosleep
930 data8 0 // nfsservctl
931 data8 0 // prctl // 1170
932 data8 0 // getpagesize
933 data8 0 // mmap2
934 data8 0 // pciconfig_read
935 data8 0 // pciconfig_write
936 data8 0 // perfmonctl // 1175
937 data8 0 // sigaltstack
938 data8 0 // rt_sigaction
939 data8 0 // rt_sigpending
940 data8 fsys_rt_sigprocmask // rt_sigprocmask
941 data8 0 // rt_sigqueueinfo // 1180
942 data8 0 // rt_sigreturn
943 data8 0 // rt_sigsuspend
944 data8 0 // rt_sigtimedwait
945 data8 0 // getcwd
946 data8 0 // capget // 1185
947 data8 0 // capset
948 data8 0 // sendfile
949 data8 0
950 data8 0
951 data8 0 // socket // 1190
952 data8 0 // bind
953 data8 0 // connect
954 data8 0 // listen
955 data8 0 // accept
956 data8 0 // getsockname // 1195
957 data8 0 // getpeername
958 data8 0 // socketpair
959 data8 0 // send
960 data8 0 // sendto
961 data8 0 // recv // 1200
962 data8 0 // recvfrom
963 data8 0 // shutdown
964 data8 0 // setsockopt
965 data8 0 // getsockopt
966 data8 0 // sendmsg // 1205
967 data8 0 // recvmsg
968 data8 0 // pivot_root
969 data8 0 // mincore
970 data8 0 // madvise
971 data8 0 // newstat // 1210
972 data8 0 // newlstat
973 data8 0 // newfstat
974 data8 0 // clone2
975 data8 0 // getdents64
976 data8 0 // getunwind // 1215
977 data8 0 // readahead
978 data8 0 // setxattr
979 data8 0 // lsetxattr
980 data8 0 // fsetxattr
981 data8 0 // getxattr // 1220
982 data8 0 // lgetxattr
983 data8 0 // fgetxattr
984 data8 0 // listxattr
985 data8 0 // llistxattr
986 data8 0 // flistxattr // 1225
987 data8 0 // removexattr
988 data8 0 // lremovexattr
989 data8 0 // fremovexattr
990 data8 0 // tkill
991 data8 0 // futex // 1230
992 data8 0 // sched_setaffinity
993 data8 0 // sched_getaffinity
994 data8 fsys_set_tid_address // set_tid_address
995 data8 0 // fadvise64_64
996 data8 0 // tgkill // 1235
997 data8 0 // exit_group
998 data8 0 // lookup_dcookie
999 data8 0 // io_setup
1000 data8 0 // io_destroy
1001 data8 0 // io_getevents // 1240
1002 data8 0 // io_submit
1003 data8 0 // io_cancel
1004 data8 0 // epoll_create
1005 data8 0 // epoll_ctl
1006 data8 0 // epoll_wait // 1245
1007 data8 0 // restart_syscall
1008 data8 0 // semtimedop
1009 data8 0 // timer_create
1010 data8 0 // timer_settime
1011 data8 0 // timer_gettime // 1250
1012 data8 0 // timer_getoverrun
1013 data8 0 // timer_delete
1014 data8 0 // clock_settime
1015 data8 fsys_clock_gettime // clock_gettime
3bc207d2
FY
1016 data8 0 // clock_getres // 1255
1017 data8 0 // clock_nanosleep
1018 data8 0 // fstatfs64
1019 data8 0 // statfs64
1020 data8 0 // mbind
1021 data8 0 // get_mempolicy // 1260
1022 data8 0 // set_mempolicy
1023 data8 0 // mq_open
1024 data8 0 // mq_unlink
1025 data8 0 // mq_timedsend
1026 data8 0 // mq_timedreceive // 1265
1027 data8 0 // mq_notify
1028 data8 0 // mq_getsetattr
1029 data8 0 // kexec_load
1030 data8 0 // vserver
1031 data8 0 // waitid // 1270
1032 data8 0 // add_key
1033 data8 0 // request_key
1034 data8 0 // keyctl
1035 data8 0 // ioprio_set
1036 data8 0 // ioprio_get // 1275
1037 data8 0 // move_pages
1038 data8 0 // inotify_init
1039 data8 0 // inotify_add_watch
1040 data8 0 // inotify_rm_watch
1041 data8 0 // migrate_pages // 1280
1042 data8 0 // openat
1043 data8 0 // mkdirat
1044 data8 0 // mknodat
1045 data8 0 // fchownat
1046 data8 0 // futimesat // 1285
1047 data8 0 // newfstatat
1048 data8 0 // unlinkat
1049 data8 0 // renameat
1050 data8 0 // linkat
1051 data8 0 // symlinkat // 1290
1052 data8 0 // readlinkat
1053 data8 0 // fchmodat
1054 data8 0 // faccessat
1055 data8 0
1056 data8 0 // 1295
1057 data8 0 // unshare
1058 data8 0 // splice
1059 data8 0 // set_robust_list
1060 data8 0 // get_robust_list
1061 data8 0 // sync_file_range // 1300
1062 data8 0 // tee
1063 data8 0 // vmsplice
1064 data8 0
1065 data8 fsys_getcpu // getcpu // 1304
9ed2ad86 1066
c8c1635f
KC
1067 // fill in zeros for the remaining entries
1068 .zero:
dd97d5cb 1069 .space paravirt_fsyscall_table + 8*NR_syscalls - .zero, 0