]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/kernel/entry_64.S
x86_64: remove bogus optimization in sysret_signal
[net-next-2.6.git] / arch / x86 / kernel / entry_64.S
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
1da177e4
LT
7 */
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
2e91a17b
AK
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
1da177e4
LT
38 */
39
1da177e4
LT
40#include <linux/linkage.h>
41#include <asm/segment.h>
1da177e4
LT
42#include <asm/cache.h>
43#include <asm/errno.h>
44#include <asm/dwarf2.h>
45#include <asm/calling.h>
e2d5df93 46#include <asm/asm-offsets.h>
1da177e4
LT
47#include <asm/msr.h>
48#include <asm/unistd.h>
49#include <asm/thread_info.h>
50#include <asm/hw_irq.h>
5f8efbb9 51#include <asm/page.h>
2601e64d 52#include <asm/irqflags.h>
72fe4858 53#include <asm/paravirt.h>
395a59d0 54#include <asm/ftrace.h>
1da177e4
LT
55
56 .code64
57
16444a8a 58#ifdef CONFIG_FTRACE
d61f82d0
SR
59#ifdef CONFIG_DYNAMIC_FTRACE
60ENTRY(mcount)
61
62 subq $0x38, %rsp
63 movq %rax, (%rsp)
64 movq %rcx, 8(%rsp)
65 movq %rdx, 16(%rsp)
66 movq %rsi, 24(%rsp)
67 movq %rdi, 32(%rsp)
68 movq %r8, 40(%rsp)
69 movq %r9, 48(%rsp)
70
71 movq 0x38(%rsp), %rdi
395a59d0 72 subq $MCOUNT_INSN_SIZE, %rdi
d61f82d0
SR
73
74.globl mcount_call
75mcount_call:
76 call ftrace_stub
77
78 movq 48(%rsp), %r9
79 movq 40(%rsp), %r8
80 movq 32(%rsp), %rdi
81 movq 24(%rsp), %rsi
82 movq 16(%rsp), %rdx
83 movq 8(%rsp), %rcx
84 movq (%rsp), %rax
85 addq $0x38, %rsp
86
87 retq
88END(mcount)
89
90ENTRY(ftrace_caller)
91
92 /* taken from glibc */
93 subq $0x38, %rsp
94 movq %rax, (%rsp)
95 movq %rcx, 8(%rsp)
96 movq %rdx, 16(%rsp)
97 movq %rsi, 24(%rsp)
98 movq %rdi, 32(%rsp)
99 movq %r8, 40(%rsp)
100 movq %r9, 48(%rsp)
101
102 movq 0x38(%rsp), %rdi
103 movq 8(%rbp), %rsi
395a59d0 104 subq $MCOUNT_INSN_SIZE, %rdi
d61f82d0
SR
105
106.globl ftrace_call
107ftrace_call:
108 call ftrace_stub
109
110 movq 48(%rsp), %r9
111 movq 40(%rsp), %r8
112 movq 32(%rsp), %rdi
113 movq 24(%rsp), %rsi
114 movq 16(%rsp), %rdx
115 movq 8(%rsp), %rcx
116 movq (%rsp), %rax
117 addq $0x38, %rsp
118
119.globl ftrace_stub
120ftrace_stub:
121 retq
122END(ftrace_caller)
123
124#else /* ! CONFIG_DYNAMIC_FTRACE */
16444a8a
ACM
125ENTRY(mcount)
126 cmpq $ftrace_stub, ftrace_trace_function
127 jnz trace
128.globl ftrace_stub
129ftrace_stub:
130 retq
131
132trace:
133 /* taken from glibc */
134 subq $0x38, %rsp
135 movq %rax, (%rsp)
136 movq %rcx, 8(%rsp)
137 movq %rdx, 16(%rsp)
138 movq %rsi, 24(%rsp)
139 movq %rdi, 32(%rsp)
140 movq %r8, 40(%rsp)
141 movq %r9, 48(%rsp)
142
143 movq 0x38(%rsp), %rdi
144 movq 8(%rbp), %rsi
395a59d0 145 subq $MCOUNT_INSN_SIZE, %rdi
16444a8a
ACM
146
147 call *ftrace_trace_function
148
149 movq 48(%rsp), %r9
150 movq 40(%rsp), %r8
151 movq 32(%rsp), %rdi
152 movq 24(%rsp), %rsi
153 movq 16(%rsp), %rdx
154 movq 8(%rsp), %rcx
155 movq (%rsp), %rax
156 addq $0x38, %rsp
157
158 jmp ftrace_stub
159END(mcount)
d61f82d0
SR
160#endif /* CONFIG_DYNAMIC_FTRACE */
161#endif /* CONFIG_FTRACE */
16444a8a 162
dc37db4d 163#ifndef CONFIG_PREEMPT
1da177e4
LT
164#define retint_kernel retint_restore_args
165#endif
2601e64d 166
72fe4858 167#ifdef CONFIG_PARAVIRT
2be29982 168ENTRY(native_usergs_sysret64)
72fe4858
GOC
169 swapgs
170 sysretq
171#endif /* CONFIG_PARAVIRT */
172
2601e64d
IM
173
174.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
175#ifdef CONFIG_TRACE_IRQFLAGS
176 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
177 jnc 1f
178 TRACE_IRQS_ON
1791:
180#endif
181.endm
182
1da177e4
LT
183/*
184 * C code is not supposed to know about undefined top of stack. Every time
185 * a C function with an pt_regs argument is called from the SYSCALL based
186 * fast path FIXUP_TOP_OF_STACK is needed.
187 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
188 * manipulation.
189 */
190
191 /* %rsp:at FRAMEEND */
192 .macro FIXUP_TOP_OF_STACK tmp
193 movq %gs:pda_oldrsp,\tmp
194 movq \tmp,RSP(%rsp)
195 movq $__USER_DS,SS(%rsp)
196 movq $__USER_CS,CS(%rsp)
197 movq $-1,RCX(%rsp)
198 movq R11(%rsp),\tmp /* get eflags */
199 movq \tmp,EFLAGS(%rsp)
200 .endm
201
202 .macro RESTORE_TOP_OF_STACK tmp,offset=0
203 movq RSP-\offset(%rsp),\tmp
204 movq \tmp,%gs:pda_oldrsp
205 movq EFLAGS-\offset(%rsp),\tmp
206 movq \tmp,R11-\offset(%rsp)
207 .endm
208
209 .macro FAKE_STACK_FRAME child_rip
210 /* push in order ss, rsp, eflags, cs, rip */
3829ee6b 211 xorl %eax, %eax
e04e0a63 212 pushq $__KERNEL_DS /* ss */
1da177e4 213 CFI_ADJUST_CFA_OFFSET 8
7effaa88 214 /*CFI_REL_OFFSET ss,0*/
1da177e4
LT
215 pushq %rax /* rsp */
216 CFI_ADJUST_CFA_OFFSET 8
7effaa88 217 CFI_REL_OFFSET rsp,0
1da177e4
LT
218 pushq $(1<<9) /* eflags - interrupts on */
219 CFI_ADJUST_CFA_OFFSET 8
7effaa88 220 /*CFI_REL_OFFSET rflags,0*/
1da177e4
LT
221 pushq $__KERNEL_CS /* cs */
222 CFI_ADJUST_CFA_OFFSET 8
7effaa88 223 /*CFI_REL_OFFSET cs,0*/
1da177e4
LT
224 pushq \child_rip /* rip */
225 CFI_ADJUST_CFA_OFFSET 8
7effaa88 226 CFI_REL_OFFSET rip,0
1da177e4
LT
227 pushq %rax /* orig rax */
228 CFI_ADJUST_CFA_OFFSET 8
229 .endm
230
231 .macro UNFAKE_STACK_FRAME
232 addq $8*6, %rsp
233 CFI_ADJUST_CFA_OFFSET -(6*8)
234 .endm
235
7effaa88
JB
236 .macro CFI_DEFAULT_STACK start=1
237 .if \start
238 CFI_STARTPROC simple
adf14236 239 CFI_SIGNAL_FRAME
7effaa88
JB
240 CFI_DEF_CFA rsp,SS+8
241 .else
242 CFI_DEF_CFA_OFFSET SS+8
243 .endif
244 CFI_REL_OFFSET r15,R15
245 CFI_REL_OFFSET r14,R14
246 CFI_REL_OFFSET r13,R13
247 CFI_REL_OFFSET r12,R12
248 CFI_REL_OFFSET rbp,RBP
249 CFI_REL_OFFSET rbx,RBX
250 CFI_REL_OFFSET r11,R11
251 CFI_REL_OFFSET r10,R10
252 CFI_REL_OFFSET r9,R9
253 CFI_REL_OFFSET r8,R8
254 CFI_REL_OFFSET rax,RAX
255 CFI_REL_OFFSET rcx,RCX
256 CFI_REL_OFFSET rdx,RDX
257 CFI_REL_OFFSET rsi,RSI
258 CFI_REL_OFFSET rdi,RDI
259 CFI_REL_OFFSET rip,RIP
260 /*CFI_REL_OFFSET cs,CS*/
261 /*CFI_REL_OFFSET rflags,EFLAGS*/
262 CFI_REL_OFFSET rsp,RSP
263 /*CFI_REL_OFFSET ss,SS*/
1da177e4
LT
264 .endm
265/*
266 * A newly forked process directly context switches into this.
267 */
268/* rdi: prev */
269ENTRY(ret_from_fork)
1da177e4 270 CFI_DEFAULT_STACK
658fdbef
AK
271 push kernel_eflags(%rip)
272 CFI_ADJUST_CFA_OFFSET 4
273 popf # reset kernel eflags
274 CFI_ADJUST_CFA_OFFSET -4
1da177e4
LT
275 call schedule_tail
276 GET_THREAD_INFO(%rcx)
26ccb8a7 277 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
1da177e4
LT
278 jnz rff_trace
279rff_action:
280 RESTORE_REST
281 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
282 je int_ret_from_sys_call
26ccb8a7 283 testl $_TIF_IA32,TI_flags(%rcx)
1da177e4
LT
284 jnz int_ret_from_sys_call
285 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
286 jmp ret_from_sys_call
287rff_trace:
288 movq %rsp,%rdi
289 call syscall_trace_leave
290 GET_THREAD_INFO(%rcx)
291 jmp rff_action
292 CFI_ENDPROC
4b787e0b 293END(ret_from_fork)
1da177e4
LT
294
295/*
296 * System call entry. Upto 6 arguments in registers are supported.
297 *
298 * SYSCALL does not save anything on the stack and does not change the
299 * stack pointer.
300 */
301
302/*
303 * Register setup:
304 * rax system call number
305 * rdi arg0
306 * rcx return address for syscall/sysret, C arg3
307 * rsi arg1
308 * rdx arg2
309 * r10 arg3 (--> moved to rcx for C)
310 * r8 arg4
311 * r9 arg5
312 * r11 eflags for syscall/sysret, temporary for C
313 * r12-r15,rbp,rbx saved by C code, not touched.
314 *
315 * Interrupts are off on entry.
316 * Only called from user space.
317 *
318 * XXX if we had a free scratch register we could save the RSP into the stack frame
319 * and report it properly in ps. Unfortunately we haven't.
7bf36bbc
AK
320 *
321 * When user can change the frames always force IRET. That is because
322 * it deals with uncanonical addresses better. SYSRET has trouble
323 * with them due to bugs in both AMD and Intel CPUs.
1da177e4
LT
324 */
325
326ENTRY(system_call)
7effaa88 327 CFI_STARTPROC simple
adf14236 328 CFI_SIGNAL_FRAME
dffead4e 329 CFI_DEF_CFA rsp,PDA_STACKOFFSET
7effaa88
JB
330 CFI_REGISTER rip,rcx
331 /*CFI_REGISTER rflags,r11*/
72fe4858
GOC
332 SWAPGS_UNSAFE_STACK
333 /*
334 * A hypervisor implementation might want to use a label
335 * after the swapgs, so that it can do the swapgs
336 * for the guest and jump here on syscall.
337 */
338ENTRY(system_call_after_swapgs)
339
1da177e4
LT
340 movq %rsp,%gs:pda_oldrsp
341 movq %gs:pda_kernelstack,%rsp
2601e64d
IM
342 /*
343 * No need to follow this irqs off/on section - it's straight
344 * and short:
345 */
72fe4858 346 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4
LT
347 SAVE_ARGS 8,1
348 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
7effaa88
JB
349 movq %rcx,RIP-ARGOFFSET(%rsp)
350 CFI_REL_OFFSET rip,RIP-ARGOFFSET
1da177e4 351 GET_THREAD_INFO(%rcx)
d4d67150 352 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
1da177e4
LT
353 jnz tracesys
354 cmpq $__NR_syscall_max,%rax
355 ja badsys
356 movq %r10,%rcx
357 call *sys_call_table(,%rax,8) # XXX: rip relative
358 movq %rax,RAX-ARGOFFSET(%rsp)
359/*
360 * Syscall return path ending with SYSRET (fast path)
361 * Has incomplete stack frame and undefined top of stack.
362 */
1da177e4 363ret_from_sys_call:
11b854b2 364 movl $_TIF_ALLWORK_MASK,%edi
1da177e4
LT
365 /* edi: flagmask */
366sysret_check:
10cd706d 367 LOCKDEP_SYS_EXIT
1da177e4 368 GET_THREAD_INFO(%rcx)
72fe4858 369 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 370 TRACE_IRQS_OFF
26ccb8a7 371 movl TI_flags(%rcx),%edx
1da177e4
LT
372 andl %edi,%edx
373 jnz sysret_careful
bcddc015 374 CFI_REMEMBER_STATE
2601e64d
IM
375 /*
376 * sysretq will re-enable interrupts:
377 */
378 TRACE_IRQS_ON
1da177e4 379 movq RIP-ARGOFFSET(%rsp),%rcx
7effaa88 380 CFI_REGISTER rip,rcx
1da177e4 381 RESTORE_ARGS 0,-ARG_SKIP,1
7effaa88 382 /*CFI_REGISTER rflags,r11*/
c7245da6 383 movq %gs:pda_oldrsp, %rsp
2be29982 384 USERGS_SYSRET64
1da177e4 385
bcddc015 386 CFI_RESTORE_STATE
1da177e4
LT
387 /* Handle reschedules */
388 /* edx: work, edi: workmask */
389sysret_careful:
390 bt $TIF_NEED_RESCHED,%edx
391 jnc sysret_signal
2601e64d 392 TRACE_IRQS_ON
72fe4858 393 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 394 pushq %rdi
7effaa88 395 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
396 call schedule
397 popq %rdi
7effaa88 398 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
399 jmp sysret_check
400
401 /* Handle a signal */
402sysret_signal:
2601e64d 403 TRACE_IRQS_ON
72fe4858 404 ENABLE_INTERRUPTS(CLBR_NONE)
10ffdbb8 405 /* edx: work flags (arg3) */
1da177e4
LT
406 leaq do_notify_resume(%rip),%rax
407 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
408 xorl %esi,%esi # oldset -> arg2
409 call ptregscall_common
15e8f348 410 movl $_TIF_WORK_MASK,%edi
7bf36bbc
AK
411 /* Use IRET because user could have changed frame. This
412 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
72fe4858 413 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 414 TRACE_IRQS_OFF
7bf36bbc 415 jmp int_with_check
1da177e4 416
7effaa88
JB
417badsys:
418 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
419 jmp ret_from_sys_call
420
1da177e4
LT
421 /* Do syscall tracing */
422tracesys:
423 SAVE_REST
a31f8dd7 424 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
1da177e4
LT
425 FIXUP_TOP_OF_STACK %rdi
426 movq %rsp,%rdi
427 call syscall_trace_enter
d4d67150
RM
428 /*
429 * Reload arg registers from stack in case ptrace changed them.
430 * We don't reload %rax because syscall_trace_enter() returned
431 * the value it wants us to use in the table lookup.
432 */
433 LOAD_ARGS ARGOFFSET, 1
1da177e4
LT
434 RESTORE_REST
435 cmpq $__NR_syscall_max,%rax
a31f8dd7 436 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
1da177e4
LT
437 movq %r10,%rcx /* fixup for C */
438 call *sys_call_table(,%rax,8)
a31f8dd7 439 movq %rax,RAX-ARGOFFSET(%rsp)
7bf36bbc 440 /* Use IRET because user could have changed frame */
1da177e4 441
1da177e4
LT
442/*
443 * Syscall return path ending with IRET.
444 * Has correct top of stack, but partial stack frame.
bcddc015
JB
445 */
446 .globl int_ret_from_sys_call
447int_ret_from_sys_call:
72fe4858 448 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 449 TRACE_IRQS_OFF
1da177e4
LT
450 testl $3,CS-ARGOFFSET(%rsp)
451 je retint_restore_args
452 movl $_TIF_ALLWORK_MASK,%edi
453 /* edi: mask to check */
454int_with_check:
10cd706d 455 LOCKDEP_SYS_EXIT_IRQ
1da177e4 456 GET_THREAD_INFO(%rcx)
26ccb8a7 457 movl TI_flags(%rcx),%edx
1da177e4
LT
458 andl %edi,%edx
459 jnz int_careful
26ccb8a7 460 andl $~TS_COMPAT,TI_status(%rcx)
1da177e4
LT
461 jmp retint_swapgs
462
463 /* Either reschedule or signal or syscall exit tracking needed. */
464 /* First do a reschedule test. */
465 /* edx: work, edi: workmask */
466int_careful:
467 bt $TIF_NEED_RESCHED,%edx
468 jnc int_very_careful
2601e64d 469 TRACE_IRQS_ON
72fe4858 470 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 471 pushq %rdi
7effaa88 472 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
473 call schedule
474 popq %rdi
7effaa88 475 CFI_ADJUST_CFA_OFFSET -8
72fe4858 476 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 477 TRACE_IRQS_OFF
1da177e4
LT
478 jmp int_with_check
479
480 /* handle signals and tracing -- both require a full stack frame */
481int_very_careful:
2601e64d 482 TRACE_IRQS_ON
72fe4858 483 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4
LT
484 SAVE_REST
485 /* Check for syscall exit trace */
d4d67150 486 testl $_TIF_WORK_SYSCALL_EXIT,%edx
1da177e4
LT
487 jz int_signal
488 pushq %rdi
7effaa88 489 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
490 leaq 8(%rsp),%rdi # &ptregs -> arg1
491 call syscall_trace_leave
492 popq %rdi
7effaa88 493 CFI_ADJUST_CFA_OFFSET -8
d4d67150 494 andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
1da177e4
LT
495 jmp int_restore_rest
496
497int_signal:
8f4d37ec 498 testl $_TIF_DO_NOTIFY_MASK,%edx
1da177e4
LT
499 jz 1f
500 movq %rsp,%rdi # &ptregs -> arg1
501 xorl %esi,%esi # oldset -> arg2
502 call do_notify_resume
eca91e78 5031: movl $_TIF_WORK_MASK,%edi
1da177e4
LT
504int_restore_rest:
505 RESTORE_REST
72fe4858 506 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 507 TRACE_IRQS_OFF
1da177e4
LT
508 jmp int_with_check
509 CFI_ENDPROC
bcddc015 510END(system_call)
1da177e4
LT
511
512/*
513 * Certain special system calls that need to save a complete full stack frame.
514 */
515
516 .macro PTREGSCALL label,func,arg
517 .globl \label
518\label:
519 leaq \func(%rip),%rax
520 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
521 jmp ptregscall_common
4b787e0b 522END(\label)
1da177e4
LT
523 .endm
524
7effaa88
JB
525 CFI_STARTPROC
526
1da177e4
LT
527 PTREGSCALL stub_clone, sys_clone, %r8
528 PTREGSCALL stub_fork, sys_fork, %rdi
529 PTREGSCALL stub_vfork, sys_vfork, %rdi
1da177e4
LT
530 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
531 PTREGSCALL stub_iopl, sys_iopl, %rsi
532
533ENTRY(ptregscall_common)
1da177e4 534 popq %r11
7effaa88
JB
535 CFI_ADJUST_CFA_OFFSET -8
536 CFI_REGISTER rip, r11
1da177e4
LT
537 SAVE_REST
538 movq %r11, %r15
7effaa88 539 CFI_REGISTER rip, r15
1da177e4
LT
540 FIXUP_TOP_OF_STACK %r11
541 call *%rax
542 RESTORE_TOP_OF_STACK %r11
543 movq %r15, %r11
7effaa88 544 CFI_REGISTER rip, r11
1da177e4
LT
545 RESTORE_REST
546 pushq %r11
7effaa88
JB
547 CFI_ADJUST_CFA_OFFSET 8
548 CFI_REL_OFFSET rip, 0
1da177e4
LT
549 ret
550 CFI_ENDPROC
4b787e0b 551END(ptregscall_common)
1da177e4
LT
552
553ENTRY(stub_execve)
554 CFI_STARTPROC
555 popq %r11
7effaa88
JB
556 CFI_ADJUST_CFA_OFFSET -8
557 CFI_REGISTER rip, r11
1da177e4 558 SAVE_REST
1da177e4 559 FIXUP_TOP_OF_STACK %r11
5d119b2c 560 movq %rsp, %rcx
1da177e4 561 call sys_execve
1da177e4 562 RESTORE_TOP_OF_STACK %r11
1da177e4
LT
563 movq %rax,RAX(%rsp)
564 RESTORE_REST
565 jmp int_ret_from_sys_call
566 CFI_ENDPROC
4b787e0b 567END(stub_execve)
1da177e4
LT
568
569/*
570 * sigreturn is special because it needs to restore all registers on return.
571 * This cannot be done with SYSRET, so use the IRET return path instead.
572 */
573ENTRY(stub_rt_sigreturn)
574 CFI_STARTPROC
7effaa88
JB
575 addq $8, %rsp
576 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
577 SAVE_REST
578 movq %rsp,%rdi
579 FIXUP_TOP_OF_STACK %r11
580 call sys_rt_sigreturn
581 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
582 RESTORE_REST
583 jmp int_ret_from_sys_call
584 CFI_ENDPROC
4b787e0b 585END(stub_rt_sigreturn)
1da177e4 586
7effaa88
JB
587/*
588 * initial frame state for interrupts and exceptions
589 */
590 .macro _frame ref
591 CFI_STARTPROC simple
adf14236 592 CFI_SIGNAL_FRAME
7effaa88
JB
593 CFI_DEF_CFA rsp,SS+8-\ref
594 /*CFI_REL_OFFSET ss,SS-\ref*/
595 CFI_REL_OFFSET rsp,RSP-\ref
596 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
597 /*CFI_REL_OFFSET cs,CS-\ref*/
598 CFI_REL_OFFSET rip,RIP-\ref
599 .endm
600
601/* initial frame state for interrupts (and exceptions without error code) */
602#define INTR_FRAME _frame RIP
603/* initial frame state for exceptions with error code (and interrupts with
604 vector already pushed) */
605#define XCPT_FRAME _frame ORIG_RAX
606
1da177e4
LT
607/*
608 * Interrupt entry/exit.
609 *
610 * Interrupt entry points save only callee clobbered registers in fast path.
611 *
612 * Entry runs with interrupts off.
613 */
614
615/* 0(%rsp): interrupt number */
616 .macro interrupt func
1da177e4 617 cld
1da177e4
LT
618 SAVE_ARGS
619 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
1de9c3f6
JB
620 pushq %rbp
621 CFI_ADJUST_CFA_OFFSET 8
622 CFI_REL_OFFSET rbp, 0
623 movq %rsp,%rbp
624 CFI_DEF_CFA_REGISTER rbp
1da177e4
LT
625 testl $3,CS(%rdi)
626 je 1f
72fe4858 627 SWAPGS
96e54049
AK
628 /* irqcount is used to check if a CPU is already on an interrupt
629 stack or not. While this is essentially redundant with preempt_count
630 it is a little cheaper to use a separate counter in the PDA
631 (short of moving irq_enter into assembly, which would be too
632 much work) */
6331: incl %gs:pda_irqcount
1de9c3f6 634 cmoveq %gs:pda_irqstackptr,%rsp
2699500b 635 push %rbp # backlink for old unwinder
2601e64d
IM
636 /*
637 * We entered an interrupt context - irqs are off:
638 */
639 TRACE_IRQS_OFF
1da177e4
LT
640 call \func
641 .endm
642
643ENTRY(common_interrupt)
7effaa88 644 XCPT_FRAME
1da177e4
LT
645 interrupt do_IRQ
646 /* 0(%rsp): oldrsp-ARGOFFSET */
7effaa88 647ret_from_intr:
72fe4858 648 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 649 TRACE_IRQS_OFF
3829ee6b 650 decl %gs:pda_irqcount
1de9c3f6 651 leaveq
7effaa88 652 CFI_DEF_CFA_REGISTER rsp
1de9c3f6 653 CFI_ADJUST_CFA_OFFSET -8
7effaa88 654exit_intr:
1da177e4
LT
655 GET_THREAD_INFO(%rcx)
656 testl $3,CS-ARGOFFSET(%rsp)
657 je retint_kernel
658
659 /* Interrupt came from user space */
660 /*
661 * Has a correct top of stack, but a partial stack frame
662 * %rcx: thread info. Interrupts off.
663 */
664retint_with_reschedule:
665 movl $_TIF_WORK_MASK,%edi
7effaa88 666retint_check:
10cd706d 667 LOCKDEP_SYS_EXIT_IRQ
26ccb8a7 668 movl TI_flags(%rcx),%edx
1da177e4 669 andl %edi,%edx
7effaa88 670 CFI_REMEMBER_STATE
1da177e4 671 jnz retint_careful
10cd706d
PZ
672
673retint_swapgs: /* return to user-space */
2601e64d
IM
674 /*
675 * The iretq could re-enable interrupts:
676 */
72fe4858 677 DISABLE_INTERRUPTS(CLBR_ANY)
2601e64d 678 TRACE_IRQS_IRETQ
72fe4858 679 SWAPGS
2601e64d
IM
680 jmp restore_args
681
10cd706d 682retint_restore_args: /* return to kernel space */
72fe4858 683 DISABLE_INTERRUPTS(CLBR_ANY)
2601e64d
IM
684 /*
685 * The iretq could re-enable interrupts:
686 */
687 TRACE_IRQS_IRETQ
688restore_args:
3701d863
IM
689 RESTORE_ARGS 0,8,0
690
f7f3d791 691irq_return:
72fe4858 692 INTERRUPT_RETURN
3701d863
IM
693
694 .section __ex_table, "a"
695 .quad irq_return, bad_iret
696 .previous
697
698#ifdef CONFIG_PARAVIRT
72fe4858 699ENTRY(native_iret)
1da177e4
LT
700 iretq
701
702 .section __ex_table,"a"
72fe4858 703 .quad native_iret, bad_iret
1da177e4 704 .previous
3701d863
IM
705#endif
706
1da177e4 707 .section .fixup,"ax"
1da177e4 708bad_iret:
3aa4b37d
RM
709 /*
710 * The iret traps when the %cs or %ss being restored is bogus.
711 * We've lost the original trap vector and error code.
712 * #GPF is the most likely one to get for an invalid selector.
713 * So pretend we completed the iret and took the #GPF in user mode.
714 *
715 * We are now running with the kernel GS after exception recovery.
716 * But error_entry expects us to have user GS to match the user %cs,
717 * so swap back.
718 */
719 pushq $0
720
721 SWAPGS
722 jmp general_protection
723
72fe4858
GOC
724 .previous
725
7effaa88 726 /* edi: workmask, edx: work */
1da177e4 727retint_careful:
7effaa88 728 CFI_RESTORE_STATE
1da177e4
LT
729 bt $TIF_NEED_RESCHED,%edx
730 jnc retint_signal
2601e64d 731 TRACE_IRQS_ON
72fe4858 732 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4 733 pushq %rdi
7effaa88 734 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
735 call schedule
736 popq %rdi
7effaa88 737 CFI_ADJUST_CFA_OFFSET -8
1da177e4 738 GET_THREAD_INFO(%rcx)
72fe4858 739 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 740 TRACE_IRQS_OFF
1da177e4
LT
741 jmp retint_check
742
743retint_signal:
8f4d37ec 744 testl $_TIF_DO_NOTIFY_MASK,%edx
10ffdbb8 745 jz retint_swapgs
2601e64d 746 TRACE_IRQS_ON
72fe4858 747 ENABLE_INTERRUPTS(CLBR_NONE)
1da177e4
LT
748 SAVE_REST
749 movq $-1,ORIG_RAX(%rsp)
3829ee6b 750 xorl %esi,%esi # oldset
1da177e4
LT
751 movq %rsp,%rdi # &pt_regs
752 call do_notify_resume
753 RESTORE_REST
72fe4858 754 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 755 TRACE_IRQS_OFF
be9e6870 756 GET_THREAD_INFO(%rcx)
eca91e78 757 jmp retint_with_reschedule
1da177e4
LT
758
759#ifdef CONFIG_PREEMPT
760 /* Returning to kernel space. Check if we need preemption */
761 /* rcx: threadinfo. interrupts off. */
b06babac 762ENTRY(retint_kernel)
26ccb8a7 763 cmpl $0,TI_preempt_count(%rcx)
1da177e4 764 jnz retint_restore_args
26ccb8a7 765 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
1da177e4
LT
766 jnc retint_restore_args
767 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
768 jnc retint_restore_args
769 call preempt_schedule_irq
770 jmp exit_intr
771#endif
4b787e0b 772
1da177e4 773 CFI_ENDPROC
4b787e0b 774END(common_interrupt)
1da177e4
LT
775
776/*
777 * APIC interrupts.
778 */
779 .macro apicinterrupt num,func
7effaa88 780 INTR_FRAME
19eadf98 781 pushq $~(\num)
7effaa88 782 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
783 interrupt \func
784 jmp ret_from_intr
785 CFI_ENDPROC
786 .endm
787
788ENTRY(thermal_interrupt)
789 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
4b787e0b 790END(thermal_interrupt)
1da177e4 791
89b831ef
JS
792ENTRY(threshold_interrupt)
793 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
4b787e0b 794END(threshold_interrupt)
89b831ef 795
1da177e4
LT
796#ifdef CONFIG_SMP
797ENTRY(reschedule_interrupt)
798 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
4b787e0b 799END(reschedule_interrupt)
1da177e4 800
e5bc8b6b
AK
801 .macro INVALIDATE_ENTRY num
802ENTRY(invalidate_interrupt\num)
803 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
4b787e0b 804END(invalidate_interrupt\num)
e5bc8b6b
AK
805 .endm
806
807 INVALIDATE_ENTRY 0
808 INVALIDATE_ENTRY 1
809 INVALIDATE_ENTRY 2
810 INVALIDATE_ENTRY 3
811 INVALIDATE_ENTRY 4
812 INVALIDATE_ENTRY 5
813 INVALIDATE_ENTRY 6
814 INVALIDATE_ENTRY 7
1da177e4
LT
815
816ENTRY(call_function_interrupt)
817 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
4b787e0b 818END(call_function_interrupt)
3b16cf87
JA
819ENTRY(call_function_single_interrupt)
820 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
821END(call_function_single_interrupt)
61014292
EB
822ENTRY(irq_move_cleanup_interrupt)
823 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
824END(irq_move_cleanup_interrupt)
1da177e4
LT
825#endif
826
1da177e4
LT
827ENTRY(apic_timer_interrupt)
828 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
4b787e0b 829END(apic_timer_interrupt)
1da177e4 830
1812924b
CW
831ENTRY(uv_bau_message_intr1)
832 apicinterrupt 220,uv_bau_message_interrupt
833END(uv_bau_message_intr1)
834
1da177e4
LT
835ENTRY(error_interrupt)
836 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
4b787e0b 837END(error_interrupt)
1da177e4
LT
838
839ENTRY(spurious_interrupt)
840 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
4b787e0b 841END(spurious_interrupt)
1da177e4
LT
842
843/*
844 * Exception entry points.
845 */
846 .macro zeroentry sym
7effaa88 847 INTR_FRAME
fab58420 848 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 849 pushq $0 /* push error code/oldrax */
7effaa88 850 CFI_ADJUST_CFA_OFFSET 8
1da177e4 851 pushq %rax /* push real oldrax to the rdi slot */
7effaa88 852 CFI_ADJUST_CFA_OFFSET 8
37550907 853 CFI_REL_OFFSET rax,0
1da177e4
LT
854 leaq \sym(%rip),%rax
855 jmp error_entry
7effaa88 856 CFI_ENDPROC
1da177e4
LT
857 .endm
858
859 .macro errorentry sym
7effaa88 860 XCPT_FRAME
fab58420 861 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 862 pushq %rax
7effaa88 863 CFI_ADJUST_CFA_OFFSET 8
37550907 864 CFI_REL_OFFSET rax,0
1da177e4
LT
865 leaq \sym(%rip),%rax
866 jmp error_entry
7effaa88 867 CFI_ENDPROC
1da177e4
LT
868 .endm
869
870 /* error code is on the stack already */
871 /* handle NMI like exceptions that can happen everywhere */
2601e64d 872 .macro paranoidentry sym, ist=0, irqtrace=1
1da177e4
LT
873 SAVE_ALL
874 cld
875 movl $1,%ebx
876 movl $MSR_GS_BASE,%ecx
877 rdmsr
878 testl %edx,%edx
879 js 1f
72fe4858 880 SWAPGS
1da177e4 881 xorl %ebx,%ebx
b556b35e
JB
8821:
883 .if \ist
884 movq %gs:pda_data_offset, %rbp
885 .endif
886 movq %rsp,%rdi
1da177e4
LT
887 movq ORIG_RAX(%rsp),%rsi
888 movq $-1,ORIG_RAX(%rsp)
b556b35e 889 .if \ist
5f8efbb9 890 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
b556b35e 891 .endif
1da177e4 892 call \sym
b556b35e 893 .if \ist
5f8efbb9 894 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
b556b35e 895 .endif
72fe4858 896 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d
IM
897 .if \irqtrace
898 TRACE_IRQS_OFF
899 .endif
1da177e4 900 .endm
2601e64d
IM
901
902 /*
903 * "Paranoid" exit path from exception stack.
904 * Paranoid because this is used by NMIs and cannot take
905 * any kernel state for granted.
906 * We don't do kernel preemption checks here, because only
907 * NMI should be common and it does not enable IRQs and
908 * cannot get reschedule ticks.
909 *
910 * "trace" is 0 for the NMI handler only, because irq-tracing
911 * is fundamentally NMI-unsafe. (we cannot change the soft and
912 * hard flags at once, atomically)
913 */
914 .macro paranoidexit trace=1
915 /* ebx: no swapgs flag */
916paranoid_exit\trace:
917 testl %ebx,%ebx /* swapgs needed? */
918 jnz paranoid_restore\trace
919 testl $3,CS(%rsp)
920 jnz paranoid_userspace\trace
921paranoid_swapgs\trace:
7a0a2dff 922 .if \trace
2601e64d 923 TRACE_IRQS_IRETQ 0
7a0a2dff 924 .endif
72fe4858 925 SWAPGS_UNSAFE_STACK
2601e64d
IM
926paranoid_restore\trace:
927 RESTORE_ALL 8
3701d863 928 jmp irq_return
2601e64d
IM
929paranoid_userspace\trace:
930 GET_THREAD_INFO(%rcx)
26ccb8a7 931 movl TI_flags(%rcx),%ebx
2601e64d
IM
932 andl $_TIF_WORK_MASK,%ebx
933 jz paranoid_swapgs\trace
934 movq %rsp,%rdi /* &pt_regs */
935 call sync_regs
936 movq %rax,%rsp /* switch stack for scheduling */
937 testl $_TIF_NEED_RESCHED,%ebx
938 jnz paranoid_schedule\trace
939 movl %ebx,%edx /* arg3: thread flags */
940 .if \trace
941 TRACE_IRQS_ON
942 .endif
72fe4858 943 ENABLE_INTERRUPTS(CLBR_NONE)
2601e64d
IM
944 xorl %esi,%esi /* arg2: oldset */
945 movq %rsp,%rdi /* arg1: &pt_regs */
946 call do_notify_resume
72fe4858 947 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d
IM
948 .if \trace
949 TRACE_IRQS_OFF
950 .endif
951 jmp paranoid_userspace\trace
952paranoid_schedule\trace:
953 .if \trace
954 TRACE_IRQS_ON
955 .endif
72fe4858 956 ENABLE_INTERRUPTS(CLBR_ANY)
2601e64d 957 call schedule
72fe4858 958 DISABLE_INTERRUPTS(CLBR_ANY)
2601e64d
IM
959 .if \trace
960 TRACE_IRQS_OFF
961 .endif
962 jmp paranoid_userspace\trace
963 CFI_ENDPROC
964 .endm
965
1da177e4
LT
966/*
967 * Exception entry point. This expects an error code/orig_rax on the stack
968 * and the exception handler in %rax.
969 */
d28c4393 970KPROBE_ENTRY(error_entry)
7effaa88 971 _frame RDI
37550907 972 CFI_REL_OFFSET rax,0
1da177e4
LT
973 /* rdi slot contains rax, oldrax contains error code */
974 cld
975 subq $14*8,%rsp
976 CFI_ADJUST_CFA_OFFSET (14*8)
977 movq %rsi,13*8(%rsp)
978 CFI_REL_OFFSET rsi,RSI
979 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
37550907 980 CFI_REGISTER rax,rsi
1da177e4
LT
981 movq %rdx,12*8(%rsp)
982 CFI_REL_OFFSET rdx,RDX
983 movq %rcx,11*8(%rsp)
984 CFI_REL_OFFSET rcx,RCX
985 movq %rsi,10*8(%rsp) /* store rax */
986 CFI_REL_OFFSET rax,RAX
987 movq %r8, 9*8(%rsp)
988 CFI_REL_OFFSET r8,R8
989 movq %r9, 8*8(%rsp)
990 CFI_REL_OFFSET r9,R9
991 movq %r10,7*8(%rsp)
992 CFI_REL_OFFSET r10,R10
993 movq %r11,6*8(%rsp)
994 CFI_REL_OFFSET r11,R11
995 movq %rbx,5*8(%rsp)
996 CFI_REL_OFFSET rbx,RBX
997 movq %rbp,4*8(%rsp)
998 CFI_REL_OFFSET rbp,RBP
999 movq %r12,3*8(%rsp)
1000 CFI_REL_OFFSET r12,R12
1001 movq %r13,2*8(%rsp)
1002 CFI_REL_OFFSET r13,R13
1003 movq %r14,1*8(%rsp)
1004 CFI_REL_OFFSET r14,R14
1005 movq %r15,(%rsp)
1006 CFI_REL_OFFSET r15,R15
1007 xorl %ebx,%ebx
1008 testl $3,CS(%rsp)
1009 je error_kernelspace
1010error_swapgs:
72fe4858 1011 SWAPGS
1da177e4
LT
1012error_sti:
1013 movq %rdi,RDI(%rsp)
37550907 1014 CFI_REL_OFFSET rdi,RDI
1da177e4
LT
1015 movq %rsp,%rdi
1016 movq ORIG_RAX(%rsp),%rsi /* get error code */
1017 movq $-1,ORIG_RAX(%rsp)
1018 call *%rax
10cd706d
PZ
1019 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1020error_exit:
1021 movl %ebx,%eax
1da177e4 1022 RESTORE_REST
72fe4858 1023 DISABLE_INTERRUPTS(CLBR_NONE)
2601e64d 1024 TRACE_IRQS_OFF
1da177e4
LT
1025 GET_THREAD_INFO(%rcx)
1026 testl %eax,%eax
1027 jne retint_kernel
10cd706d 1028 LOCKDEP_SYS_EXIT_IRQ
26ccb8a7 1029 movl TI_flags(%rcx),%edx
1da177e4
LT
1030 movl $_TIF_WORK_MASK,%edi
1031 andl %edi,%edx
1032 jnz retint_careful
10cd706d 1033 jmp retint_swapgs
1da177e4
LT
1034 CFI_ENDPROC
1035
1036error_kernelspace:
1037 incl %ebx
1038 /* There are two places in the kernel that can potentially fault with
1039 usergs. Handle them here. The exception handlers after
1040 iret run with kernel gs again, so don't set the user space flag.
1041 B stepping K8s sometimes report an truncated RIP for IRET
1042 exceptions returning to compat mode. Check for these here too. */
9d8ad5d6
VN
1043 leaq irq_return(%rip),%rcx
1044 cmpq %rcx,RIP(%rsp)
1da177e4 1045 je error_swapgs
9d8ad5d6
VN
1046 movl %ecx,%ecx /* zero extend */
1047 cmpq %rcx,RIP(%rsp)
1da177e4
LT
1048 je error_swapgs
1049 cmpq $gs_change,RIP(%rsp)
1050 je error_swapgs
1051 jmp error_sti
d28c4393 1052KPROBE_END(error_entry)
1da177e4
LT
1053
1054 /* Reload gs selector with exception handling */
1055 /* edi: new selector */
9f9d489a 1056ENTRY(native_load_gs_index)
7effaa88 1057 CFI_STARTPROC
1da177e4 1058 pushf
7effaa88 1059 CFI_ADJUST_CFA_OFFSET 8
72fe4858
GOC
1060 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1061 SWAPGS
1da177e4
LT
1062gs_change:
1063 movl %edi,%gs
10642: mfence /* workaround */
72fe4858 1065 SWAPGS
1da177e4 1066 popf
7effaa88 1067 CFI_ADJUST_CFA_OFFSET -8
1da177e4 1068 ret
7effaa88 1069 CFI_ENDPROC
9f9d489a 1070ENDPROC(native_load_gs_index)
1da177e4
LT
1071
1072 .section __ex_table,"a"
1073 .align 8
1074 .quad gs_change,bad_gs
1075 .previous
1076 .section .fixup,"ax"
1077 /* running with kernelgs */
1078bad_gs:
72fe4858 1079 SWAPGS /* switch back to user gs */
1da177e4
LT
1080 xorl %eax,%eax
1081 movl %eax,%gs
1082 jmp 2b
1083 .previous
1084
1085/*
1086 * Create a kernel thread.
1087 *
1088 * C extern interface:
1089 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1090 *
1091 * asm input arguments:
1092 * rdi: fn, rsi: arg, rdx: flags
1093 */
1094ENTRY(kernel_thread)
1095 CFI_STARTPROC
1096 FAKE_STACK_FRAME $child_rip
1097 SAVE_ALL
1098
1099 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1100 movq %rdx,%rdi
1101 orq kernel_thread_flags(%rip),%rdi
1102 movq $-1, %rsi
1103 movq %rsp, %rdx
1104
1105 xorl %r8d,%r8d
1106 xorl %r9d,%r9d
1107
1108 # clone now
1109 call do_fork
1110 movq %rax,RAX(%rsp)
1111 xorl %edi,%edi
1112
1113 /*
1114 * It isn't worth to check for reschedule here,
1115 * so internally to the x86_64 port you can rely on kernel_thread()
1116 * not to reschedule the child before returning, this avoids the need
1117 * of hacks for example to fork off the per-CPU idle tasks.
1118 * [Hopefully no generic code relies on the reschedule -AK]
1119 */
1120 RESTORE_ALL
1121 UNFAKE_STACK_FRAME
1122 ret
1123 CFI_ENDPROC
4b787e0b 1124ENDPROC(kernel_thread)
1da177e4
LT
1125
1126child_rip:
c05991ed
AK
1127 pushq $0 # fake return address
1128 CFI_STARTPROC
1da177e4
LT
1129 /*
1130 * Here we are in the child and the registers are set as they were
1131 * at kernel_thread() invocation in the parent.
1132 */
1133 movq %rdi, %rax
1134 movq %rsi, %rdi
1135 call *%rax
1136 # exit
1c5b5cfd 1137 mov %eax, %edi
1da177e4 1138 call do_exit
c05991ed 1139 CFI_ENDPROC
4b787e0b 1140ENDPROC(child_rip)
1da177e4
LT
1141
1142/*
1143 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1144 *
1145 * C extern interface:
1146 * extern long execve(char *name, char **argv, char **envp)
1147 *
1148 * asm input arguments:
1149 * rdi: name, rsi: argv, rdx: envp
1150 *
1151 * We want to fallback into:
5d119b2c 1152 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1da177e4
LT
1153 *
1154 * do_sys_execve asm fallback arguments:
5d119b2c 1155 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1da177e4 1156 */
3db03b4a 1157ENTRY(kernel_execve)
1da177e4
LT
1158 CFI_STARTPROC
1159 FAKE_STACK_FRAME $0
1160 SAVE_ALL
5d119b2c 1161 movq %rsp,%rcx
1da177e4
LT
1162 call sys_execve
1163 movq %rax, RAX(%rsp)
1164 RESTORE_REST
1165 testq %rax,%rax
1166 je int_ret_from_sys_call
1167 RESTORE_ARGS
1168 UNFAKE_STACK_FRAME
1169 ret
1170 CFI_ENDPROC
3db03b4a 1171ENDPROC(kernel_execve)
1da177e4 1172
0f2fbdcb 1173KPROBE_ENTRY(page_fault)
1da177e4 1174 errorentry do_page_fault
d28c4393 1175KPROBE_END(page_fault)
1da177e4
LT
1176
1177ENTRY(coprocessor_error)
1178 zeroentry do_coprocessor_error
4b787e0b 1179END(coprocessor_error)
1da177e4
LT
1180
1181ENTRY(simd_coprocessor_error)
1182 zeroentry do_simd_coprocessor_error
4b787e0b 1183END(simd_coprocessor_error)
1da177e4
LT
1184
1185ENTRY(device_not_available)
1186 zeroentry math_state_restore
4b787e0b 1187END(device_not_available)
1da177e4
LT
1188
1189 /* runs on exception stack */
0f2fbdcb 1190KPROBE_ENTRY(debug)
7effaa88 1191 INTR_FRAME
09402947 1192 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4
LT
1193 pushq $0
1194 CFI_ADJUST_CFA_OFFSET 8
5f8efbb9 1195 paranoidentry do_debug, DEBUG_STACK
2601e64d 1196 paranoidexit
d28c4393 1197KPROBE_END(debug)
1da177e4
LT
1198
1199 /* runs on exception stack */
eddb6fb9 1200KPROBE_ENTRY(nmi)
7effaa88 1201 INTR_FRAME
09402947 1202 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 1203 pushq $-1
7effaa88 1204 CFI_ADJUST_CFA_OFFSET 8
2601e64d
IM
1205 paranoidentry do_nmi, 0, 0
1206#ifdef CONFIG_TRACE_IRQFLAGS
1207 paranoidexit 0
1208#else
1209 jmp paranoid_exit1
1210 CFI_ENDPROC
1211#endif
d28c4393 1212KPROBE_END(nmi)
6fefb0d1 1213
0f2fbdcb 1214KPROBE_ENTRY(int3)
b556b35e 1215 INTR_FRAME
09402947 1216 PARAVIRT_ADJUST_EXCEPTION_FRAME
b556b35e
JB
1217 pushq $0
1218 CFI_ADJUST_CFA_OFFSET 8
5f8efbb9 1219 paranoidentry do_int3, DEBUG_STACK
2601e64d 1220 jmp paranoid_exit1
b556b35e 1221 CFI_ENDPROC
d28c4393 1222KPROBE_END(int3)
1da177e4
LT
1223
1224ENTRY(overflow)
1225 zeroentry do_overflow
4b787e0b 1226END(overflow)
1da177e4
LT
1227
1228ENTRY(bounds)
1229 zeroentry do_bounds
4b787e0b 1230END(bounds)
1da177e4
LT
1231
1232ENTRY(invalid_op)
1233 zeroentry do_invalid_op
4b787e0b 1234END(invalid_op)
1da177e4
LT
1235
1236ENTRY(coprocessor_segment_overrun)
1237 zeroentry do_coprocessor_segment_overrun
4b787e0b 1238END(coprocessor_segment_overrun)
1da177e4 1239
1da177e4
LT
1240 /* runs on exception stack */
1241ENTRY(double_fault)
7effaa88 1242 XCPT_FRAME
09402947 1243 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 1244 paranoidentry do_double_fault
2601e64d 1245 jmp paranoid_exit1
1da177e4 1246 CFI_ENDPROC
4b787e0b 1247END(double_fault)
1da177e4
LT
1248
1249ENTRY(invalid_TSS)
1250 errorentry do_invalid_TSS
4b787e0b 1251END(invalid_TSS)
1da177e4
LT
1252
1253ENTRY(segment_not_present)
1254 errorentry do_segment_not_present
4b787e0b 1255END(segment_not_present)
1da177e4
LT
1256
1257 /* runs on exception stack */
1258ENTRY(stack_segment)
7effaa88 1259 XCPT_FRAME
09402947 1260 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4 1261 paranoidentry do_stack_segment
2601e64d 1262 jmp paranoid_exit1
1da177e4 1263 CFI_ENDPROC
4b787e0b 1264END(stack_segment)
1da177e4 1265
0f2fbdcb 1266KPROBE_ENTRY(general_protection)
1da177e4 1267 errorentry do_general_protection
d28c4393 1268KPROBE_END(general_protection)
1da177e4
LT
1269
1270ENTRY(alignment_check)
1271 errorentry do_alignment_check
4b787e0b 1272END(alignment_check)
1da177e4
LT
1273
1274ENTRY(divide_error)
1275 zeroentry do_divide_error
4b787e0b 1276END(divide_error)
1da177e4
LT
1277
1278ENTRY(spurious_interrupt_bug)
1279 zeroentry do_spurious_interrupt_bug
4b787e0b 1280END(spurious_interrupt_bug)
1da177e4
LT
1281
1282#ifdef CONFIG_X86_MCE
1283 /* runs on exception stack */
1284ENTRY(machine_check)
7effaa88 1285 INTR_FRAME
09402947 1286 PARAVIRT_ADJUST_EXCEPTION_FRAME
1da177e4
LT
1287 pushq $0
1288 CFI_ADJUST_CFA_OFFSET 8
1289 paranoidentry do_machine_check
2601e64d 1290 jmp paranoid_exit1
1da177e4 1291 CFI_ENDPROC
4b787e0b 1292END(machine_check)
1da177e4
LT
1293#endif
1294
2699500b 1295/* Call softirq on interrupt stack. Interrupts are off. */
ed6b676c 1296ENTRY(call_softirq)
7effaa88 1297 CFI_STARTPROC
2699500b
AK
1298 push %rbp
1299 CFI_ADJUST_CFA_OFFSET 8
1300 CFI_REL_OFFSET rbp,0
1301 mov %rsp,%rbp
1302 CFI_DEF_CFA_REGISTER rbp
ed6b676c 1303 incl %gs:pda_irqcount
2699500b
AK
1304 cmove %gs:pda_irqstackptr,%rsp
1305 push %rbp # backlink for old unwinder
ed6b676c 1306 call __do_softirq
2699500b 1307 leaveq
7effaa88 1308 CFI_DEF_CFA_REGISTER rsp
2699500b 1309 CFI_ADJUST_CFA_OFFSET -8
ed6b676c 1310 decl %gs:pda_irqcount
ed6b676c 1311 ret
7effaa88 1312 CFI_ENDPROC
4b787e0b 1313ENDPROC(call_softirq)
75154f40
AK
1314
1315KPROBE_ENTRY(ignore_sysret)
1316 CFI_STARTPROC
1317 mov $-ENOSYS,%eax
1318 sysret
1319 CFI_ENDPROC
1320ENDPROC(ignore_sysret)
3d75e1b8
JF
1321
1322#ifdef CONFIG_XEN
1323ENTRY(xen_hypervisor_callback)
1324 zeroentry xen_do_hypervisor_callback
1325END(xen_hypervisor_callback)
1326
1327/*
1328# A note on the "critical region" in our callback handler.
1329# We want to avoid stacking callback handlers due to events occurring
1330# during handling of the last event. To do this, we keep events disabled
1331# until we've done all processing. HOWEVER, we must enable events before
1332# popping the stack frame (can't be done atomically) and so it would still
1333# be possible to get enough handler activations to overflow the stack.
1334# Although unlikely, bugs of that kind are hard to track down, so we'd
1335# like to avoid the possibility.
1336# So, on entry to the handler we detect whether we interrupted an
1337# existing activation in its critical region -- if so, we pop the current
1338# activation and restart the handler using the previous one.
1339*/
1340ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1341 CFI_STARTPROC
1342/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
1343 see the correct pointer to the pt_regs */
1344 movq %rdi, %rsp # we don't return, adjust the stack frame
1345 CFI_ENDPROC
1346 CFI_DEFAULT_STACK
134711: incl %gs:pda_irqcount
1348 movq %rsp,%rbp
1349 CFI_DEF_CFA_REGISTER rbp
1350 cmovzq %gs:pda_irqstackptr,%rsp
1351 pushq %rbp # backlink for old unwinder
1352 call xen_evtchn_do_upcall
1353 popq %rsp
1354 CFI_DEF_CFA_REGISTER rsp
1355 decl %gs:pda_irqcount
1356 jmp error_exit
1357 CFI_ENDPROC
1358END(do_hypervisor_callback)
1359
1360/*
1361# Hypervisor uses this for application faults while it executes.
1362# We get here for two reasons:
1363# 1. Fault while reloading DS, ES, FS or GS
1364# 2. Fault while executing IRET
1365# Category 1 we do not need to fix up as Xen has already reloaded all segment
1366# registers that could be reloaded and zeroed the others.
1367# Category 2 we fix up by killing the current process. We cannot use the
1368# normal Linux return path in this case because if we use the IRET hypercall
1369# to pop the stack frame we end up in an infinite loop of failsafe callbacks.
1370# We distinguish between categories by comparing each saved segment register
1371# with its current contents: any discrepancy means we in category 1.
1372*/
1373ENTRY(xen_failsafe_callback)
4a5c3e77
JF
1374 framesz = (RIP-0x30) /* workaround buggy gas */
1375 _frame framesz
3d75e1b8
JF
1376 CFI_REL_OFFSET rcx, 0
1377 CFI_REL_OFFSET r11, 8
1378 movw %ds,%cx
1379 cmpw %cx,0x10(%rsp)
1380 CFI_REMEMBER_STATE
1381 jne 1f
1382 movw %es,%cx
1383 cmpw %cx,0x18(%rsp)
1384 jne 1f
1385 movw %fs,%cx
1386 cmpw %cx,0x20(%rsp)
1387 jne 1f
1388 movw %gs,%cx
1389 cmpw %cx,0x28(%rsp)
1390 jne 1f
1391 /* All segments match their saved values => Category 2 (Bad IRET). */
1392 movq (%rsp),%rcx
1393 CFI_RESTORE rcx
1394 movq 8(%rsp),%r11
1395 CFI_RESTORE r11
1396 addq $0x30,%rsp
1397 CFI_ADJUST_CFA_OFFSET -0x30
4a5c3e77
JF
1398 pushq $0
1399 CFI_ADJUST_CFA_OFFSET 8
1400 pushq %r11
1401 CFI_ADJUST_CFA_OFFSET 8
1402 pushq %rcx
1403 CFI_ADJUST_CFA_OFFSET 8
1404 jmp general_protection
3d75e1b8
JF
1405 CFI_RESTORE_STATE
14061: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
1407 movq (%rsp),%rcx
1408 CFI_RESTORE rcx
1409 movq 8(%rsp),%r11
1410 CFI_RESTORE r11
1411 addq $0x30,%rsp
1412 CFI_ADJUST_CFA_OFFSET -0x30
1413 pushq $0
1414 CFI_ADJUST_CFA_OFFSET 8
1415 SAVE_ALL
1416 jmp error_exit
1417 CFI_ENDPROC
3d75e1b8
JF
1418END(xen_failsafe_callback)
1419
1420#endif /* CONFIG_XEN */