[PATCH] i386: probe_roms() cleanup
[deliverable/linux.git] / arch / x86_64 / kernel / entry.S
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
1da177e4
LT
7 */
8
9/*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
2e91a17b
AK
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
1da177e4
LT
38 */
39
1da177e4
LT
40#include <linux/linkage.h>
41#include <asm/segment.h>
1da177e4
LT
42#include <asm/cache.h>
43#include <asm/errno.h>
44#include <asm/dwarf2.h>
45#include <asm/calling.h>
e2d5df93 46#include <asm/asm-offsets.h>
1da177e4
LT
47#include <asm/msr.h>
48#include <asm/unistd.h>
49#include <asm/thread_info.h>
50#include <asm/hw_irq.h>
5f8efbb9 51#include <asm/page.h>
2601e64d 52#include <asm/irqflags.h>
1da177e4
LT
53
54 .code64
55
dc37db4d 56#ifndef CONFIG_PREEMPT
1da177e4
LT
57#define retint_kernel retint_restore_args
58#endif
2601e64d
IM
59
60
61.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
62#ifdef CONFIG_TRACE_IRQFLAGS
63 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
64 jnc 1f
65 TRACE_IRQS_ON
661:
67#endif
68.endm
69
1da177e4
LT
70/*
71 * C code is not supposed to know about undefined top of stack. Every time
72 * a C function with an pt_regs argument is called from the SYSCALL based
73 * fast path FIXUP_TOP_OF_STACK is needed.
74 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
75 * manipulation.
76 */
77
78 /* %rsp:at FRAMEEND */
79 .macro FIXUP_TOP_OF_STACK tmp
80 movq %gs:pda_oldrsp,\tmp
81 movq \tmp,RSP(%rsp)
82 movq $__USER_DS,SS(%rsp)
83 movq $__USER_CS,CS(%rsp)
84 movq $-1,RCX(%rsp)
85 movq R11(%rsp),\tmp /* get eflags */
86 movq \tmp,EFLAGS(%rsp)
87 .endm
88
89 .macro RESTORE_TOP_OF_STACK tmp,offset=0
90 movq RSP-\offset(%rsp),\tmp
91 movq \tmp,%gs:pda_oldrsp
92 movq EFLAGS-\offset(%rsp),\tmp
93 movq \tmp,R11-\offset(%rsp)
94 .endm
95
96 .macro FAKE_STACK_FRAME child_rip
97 /* push in order ss, rsp, eflags, cs, rip */
3829ee6b 98 xorl %eax, %eax
1da177e4
LT
99 pushq %rax /* ss */
100 CFI_ADJUST_CFA_OFFSET 8
7effaa88 101 /*CFI_REL_OFFSET ss,0*/
1da177e4
LT
102 pushq %rax /* rsp */
103 CFI_ADJUST_CFA_OFFSET 8
7effaa88 104 CFI_REL_OFFSET rsp,0
1da177e4
LT
105 pushq $(1<<9) /* eflags - interrupts on */
106 CFI_ADJUST_CFA_OFFSET 8
7effaa88 107 /*CFI_REL_OFFSET rflags,0*/
1da177e4
LT
108 pushq $__KERNEL_CS /* cs */
109 CFI_ADJUST_CFA_OFFSET 8
7effaa88 110 /*CFI_REL_OFFSET cs,0*/
1da177e4
LT
111 pushq \child_rip /* rip */
112 CFI_ADJUST_CFA_OFFSET 8
7effaa88 113 CFI_REL_OFFSET rip,0
1da177e4
LT
114 pushq %rax /* orig rax */
115 CFI_ADJUST_CFA_OFFSET 8
116 .endm
117
118 .macro UNFAKE_STACK_FRAME
119 addq $8*6, %rsp
120 CFI_ADJUST_CFA_OFFSET -(6*8)
121 .endm
122
7effaa88
JB
123 .macro CFI_DEFAULT_STACK start=1
124 .if \start
125 CFI_STARTPROC simple
adf14236 126 CFI_SIGNAL_FRAME
7effaa88
JB
127 CFI_DEF_CFA rsp,SS+8
128 .else
129 CFI_DEF_CFA_OFFSET SS+8
130 .endif
131 CFI_REL_OFFSET r15,R15
132 CFI_REL_OFFSET r14,R14
133 CFI_REL_OFFSET r13,R13
134 CFI_REL_OFFSET r12,R12
135 CFI_REL_OFFSET rbp,RBP
136 CFI_REL_OFFSET rbx,RBX
137 CFI_REL_OFFSET r11,R11
138 CFI_REL_OFFSET r10,R10
139 CFI_REL_OFFSET r9,R9
140 CFI_REL_OFFSET r8,R8
141 CFI_REL_OFFSET rax,RAX
142 CFI_REL_OFFSET rcx,RCX
143 CFI_REL_OFFSET rdx,RDX
144 CFI_REL_OFFSET rsi,RSI
145 CFI_REL_OFFSET rdi,RDI
146 CFI_REL_OFFSET rip,RIP
147 /*CFI_REL_OFFSET cs,CS*/
148 /*CFI_REL_OFFSET rflags,EFLAGS*/
149 CFI_REL_OFFSET rsp,RSP
150 /*CFI_REL_OFFSET ss,SS*/
1da177e4
LT
151 .endm
152/*
153 * A newly forked process directly context switches into this.
154 */
155/* rdi: prev */
156ENTRY(ret_from_fork)
1da177e4 157 CFI_DEFAULT_STACK
658fdbef
AK
158 push kernel_eflags(%rip)
159 CFI_ADJUST_CFA_OFFSET 4
160 popf # reset kernel eflags
161 CFI_ADJUST_CFA_OFFSET -4
1da177e4
LT
162 call schedule_tail
163 GET_THREAD_INFO(%rcx)
164 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
165 jnz rff_trace
166rff_action:
167 RESTORE_REST
168 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
169 je int_ret_from_sys_call
170 testl $_TIF_IA32,threadinfo_flags(%rcx)
171 jnz int_ret_from_sys_call
172 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
173 jmp ret_from_sys_call
174rff_trace:
175 movq %rsp,%rdi
176 call syscall_trace_leave
177 GET_THREAD_INFO(%rcx)
178 jmp rff_action
179 CFI_ENDPROC
4b787e0b 180END(ret_from_fork)
1da177e4
LT
181
182/*
183 * System call entry. Upto 6 arguments in registers are supported.
184 *
185 * SYSCALL does not save anything on the stack and does not change the
186 * stack pointer.
187 */
188
189/*
190 * Register setup:
191 * rax system call number
192 * rdi arg0
193 * rcx return address for syscall/sysret, C arg3
194 * rsi arg1
195 * rdx arg2
196 * r10 arg3 (--> moved to rcx for C)
197 * r8 arg4
198 * r9 arg5
199 * r11 eflags for syscall/sysret, temporary for C
200 * r12-r15,rbp,rbx saved by C code, not touched.
201 *
202 * Interrupts are off on entry.
203 * Only called from user space.
204 *
205 * XXX if we had a free scratch register we could save the RSP into the stack frame
206 * and report it properly in ps. Unfortunately we haven't.
7bf36bbc
AK
207 *
208 * When user can change the frames always force IRET. That is because
209 * it deals with uncanonical addresses better. SYSRET has trouble
210 * with them due to bugs in both AMD and Intel CPUs.
1da177e4
LT
211 */
212
213ENTRY(system_call)
7effaa88 214 CFI_STARTPROC simple
adf14236 215 CFI_SIGNAL_FRAME
dffead4e 216 CFI_DEF_CFA rsp,PDA_STACKOFFSET
7effaa88
JB
217 CFI_REGISTER rip,rcx
218 /*CFI_REGISTER rflags,r11*/
1da177e4
LT
219 swapgs
220 movq %rsp,%gs:pda_oldrsp
221 movq %gs:pda_kernelstack,%rsp
2601e64d
IM
222 /*
223 * No need to follow this irqs off/on section - it's straight
224 * and short:
225 */
1da177e4
LT
226 sti
227 SAVE_ARGS 8,1
228 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
7effaa88
JB
229 movq %rcx,RIP-ARGOFFSET(%rsp)
230 CFI_REL_OFFSET rip,RIP-ARGOFFSET
1da177e4
LT
231 GET_THREAD_INFO(%rcx)
232 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
233 jnz tracesys
234 cmpq $__NR_syscall_max,%rax
235 ja badsys
236 movq %r10,%rcx
237 call *sys_call_table(,%rax,8) # XXX: rip relative
238 movq %rax,RAX-ARGOFFSET(%rsp)
239/*
240 * Syscall return path ending with SYSRET (fast path)
241 * Has incomplete stack frame and undefined top of stack.
242 */
1da177e4 243ret_from_sys_call:
11b854b2 244 movl $_TIF_ALLWORK_MASK,%edi
1da177e4
LT
245 /* edi: flagmask */
246sysret_check:
247 GET_THREAD_INFO(%rcx)
248 cli
2601e64d 249 TRACE_IRQS_OFF
1da177e4
LT
250 movl threadinfo_flags(%rcx),%edx
251 andl %edi,%edx
252 jnz sysret_careful
bcddc015 253 CFI_REMEMBER_STATE
2601e64d
IM
254 /*
255 * sysretq will re-enable interrupts:
256 */
257 TRACE_IRQS_ON
1da177e4 258 movq RIP-ARGOFFSET(%rsp),%rcx
7effaa88 259 CFI_REGISTER rip,rcx
1da177e4 260 RESTORE_ARGS 0,-ARG_SKIP,1
7effaa88 261 /*CFI_REGISTER rflags,r11*/
1da177e4
LT
262 movq %gs:pda_oldrsp,%rsp
263 swapgs
264 sysretq
265
bcddc015 266 CFI_RESTORE_STATE
1da177e4
LT
267 /* Handle reschedules */
268 /* edx: work, edi: workmask */
269sysret_careful:
270 bt $TIF_NEED_RESCHED,%edx
271 jnc sysret_signal
2601e64d 272 TRACE_IRQS_ON
1da177e4
LT
273 sti
274 pushq %rdi
7effaa88 275 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
276 call schedule
277 popq %rdi
7effaa88 278 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
279 jmp sysret_check
280
281 /* Handle a signal */
282sysret_signal:
2601e64d 283 TRACE_IRQS_ON
1da177e4 284 sti
10ffdbb8
AK
285 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
286 jz 1f
287
288 /* Really a signal */
289 /* edx: work flags (arg3) */
1da177e4
LT
290 leaq do_notify_resume(%rip),%rax
291 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
292 xorl %esi,%esi # oldset -> arg2
293 call ptregscall_common
10ffdbb8 2941: movl $_TIF_NEED_RESCHED,%edi
7bf36bbc
AK
295 /* Use IRET because user could have changed frame. This
296 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
297 cli
2601e64d 298 TRACE_IRQS_OFF
7bf36bbc 299 jmp int_with_check
1da177e4 300
7effaa88
JB
301badsys:
302 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
303 jmp ret_from_sys_call
304
1da177e4
LT
305 /* Do syscall tracing */
306tracesys:
307 SAVE_REST
308 movq $-ENOSYS,RAX(%rsp)
309 FIXUP_TOP_OF_STACK %rdi
310 movq %rsp,%rdi
311 call syscall_trace_enter
312 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
313 RESTORE_REST
314 cmpq $__NR_syscall_max,%rax
cc7d479f
JB
315 movq $-ENOSYS,%rcx
316 cmova %rcx,%rax
1da177e4
LT
317 ja 1f
318 movq %r10,%rcx /* fixup for C */
319 call *sys_call_table(,%rax,8)
822ff019 3201: movq %rax,RAX-ARGOFFSET(%rsp)
7bf36bbc 321 /* Use IRET because user could have changed frame */
1da177e4 322
1da177e4
LT
323/*
324 * Syscall return path ending with IRET.
325 * Has correct top of stack, but partial stack frame.
bcddc015
JB
326 */
327 .globl int_ret_from_sys_call
328int_ret_from_sys_call:
1da177e4 329 cli
2601e64d 330 TRACE_IRQS_OFF
1da177e4
LT
331 testl $3,CS-ARGOFFSET(%rsp)
332 je retint_restore_args
333 movl $_TIF_ALLWORK_MASK,%edi
334 /* edi: mask to check */
335int_with_check:
336 GET_THREAD_INFO(%rcx)
337 movl threadinfo_flags(%rcx),%edx
338 andl %edi,%edx
339 jnz int_careful
bf2fcc6f 340 andl $~TS_COMPAT,threadinfo_status(%rcx)
1da177e4
LT
341 jmp retint_swapgs
342
343 /* Either reschedule or signal or syscall exit tracking needed. */
344 /* First do a reschedule test. */
345 /* edx: work, edi: workmask */
346int_careful:
347 bt $TIF_NEED_RESCHED,%edx
348 jnc int_very_careful
2601e64d 349 TRACE_IRQS_ON
1da177e4
LT
350 sti
351 pushq %rdi
7effaa88 352 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
353 call schedule
354 popq %rdi
7effaa88 355 CFI_ADJUST_CFA_OFFSET -8
cdd219cd 356 cli
2601e64d 357 TRACE_IRQS_OFF
1da177e4
LT
358 jmp int_with_check
359
360 /* handle signals and tracing -- both require a full stack frame */
361int_very_careful:
2601e64d 362 TRACE_IRQS_ON
1da177e4
LT
363 sti
364 SAVE_REST
365 /* Check for syscall exit trace */
366 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
367 jz int_signal
368 pushq %rdi
7effaa88 369 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
370 leaq 8(%rsp),%rdi # &ptregs -> arg1
371 call syscall_trace_leave
372 popq %rdi
7effaa88 373 CFI_ADJUST_CFA_OFFSET -8
36c1104e 374 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
1da177e4
LT
375 jmp int_restore_rest
376
377int_signal:
378 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
379 jz 1f
380 movq %rsp,%rdi # &ptregs -> arg1
381 xorl %esi,%esi # oldset -> arg2
382 call do_notify_resume
3831: movl $_TIF_NEED_RESCHED,%edi
384int_restore_rest:
385 RESTORE_REST
be9e6870 386 cli
2601e64d 387 TRACE_IRQS_OFF
1da177e4
LT
388 jmp int_with_check
389 CFI_ENDPROC
bcddc015 390END(system_call)
1da177e4
LT
391
392/*
393 * Certain special system calls that need to save a complete full stack frame.
394 */
395
396 .macro PTREGSCALL label,func,arg
397 .globl \label
398\label:
399 leaq \func(%rip),%rax
400 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
401 jmp ptregscall_common
4b787e0b 402END(\label)
1da177e4
LT
403 .endm
404
7effaa88
JB
405 CFI_STARTPROC
406
1da177e4
LT
407 PTREGSCALL stub_clone, sys_clone, %r8
408 PTREGSCALL stub_fork, sys_fork, %rdi
409 PTREGSCALL stub_vfork, sys_vfork, %rdi
410 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
411 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
412 PTREGSCALL stub_iopl, sys_iopl, %rsi
413
414ENTRY(ptregscall_common)
1da177e4 415 popq %r11
7effaa88
JB
416 CFI_ADJUST_CFA_OFFSET -8
417 CFI_REGISTER rip, r11
1da177e4
LT
418 SAVE_REST
419 movq %r11, %r15
7effaa88 420 CFI_REGISTER rip, r15
1da177e4
LT
421 FIXUP_TOP_OF_STACK %r11
422 call *%rax
423 RESTORE_TOP_OF_STACK %r11
424 movq %r15, %r11
7effaa88 425 CFI_REGISTER rip, r11
1da177e4
LT
426 RESTORE_REST
427 pushq %r11
7effaa88
JB
428 CFI_ADJUST_CFA_OFFSET 8
429 CFI_REL_OFFSET rip, 0
1da177e4
LT
430 ret
431 CFI_ENDPROC
4b787e0b 432END(ptregscall_common)
1da177e4
LT
433
434ENTRY(stub_execve)
435 CFI_STARTPROC
436 popq %r11
7effaa88
JB
437 CFI_ADJUST_CFA_OFFSET -8
438 CFI_REGISTER rip, r11
1da177e4 439 SAVE_REST
1da177e4
LT
440 FIXUP_TOP_OF_STACK %r11
441 call sys_execve
1da177e4 442 RESTORE_TOP_OF_STACK %r11
1da177e4
LT
443 movq %rax,RAX(%rsp)
444 RESTORE_REST
445 jmp int_ret_from_sys_call
446 CFI_ENDPROC
4b787e0b 447END(stub_execve)
1da177e4
LT
448
449/*
450 * sigreturn is special because it needs to restore all registers on return.
451 * This cannot be done with SYSRET, so use the IRET return path instead.
452 */
453ENTRY(stub_rt_sigreturn)
454 CFI_STARTPROC
7effaa88
JB
455 addq $8, %rsp
456 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
457 SAVE_REST
458 movq %rsp,%rdi
459 FIXUP_TOP_OF_STACK %r11
460 call sys_rt_sigreturn
461 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
462 RESTORE_REST
463 jmp int_ret_from_sys_call
464 CFI_ENDPROC
4b787e0b 465END(stub_rt_sigreturn)
1da177e4 466
7effaa88
JB
467/*
468 * initial frame state for interrupts and exceptions
469 */
470 .macro _frame ref
471 CFI_STARTPROC simple
adf14236 472 CFI_SIGNAL_FRAME
7effaa88
JB
473 CFI_DEF_CFA rsp,SS+8-\ref
474 /*CFI_REL_OFFSET ss,SS-\ref*/
475 CFI_REL_OFFSET rsp,RSP-\ref
476 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
477 /*CFI_REL_OFFSET cs,CS-\ref*/
478 CFI_REL_OFFSET rip,RIP-\ref
479 .endm
480
481/* initial frame state for interrupts (and exceptions without error code) */
482#define INTR_FRAME _frame RIP
483/* initial frame state for exceptions with error code (and interrupts with
484 vector already pushed) */
485#define XCPT_FRAME _frame ORIG_RAX
486
1da177e4
LT
487/*
488 * Interrupt entry/exit.
489 *
490 * Interrupt entry points save only callee clobbered registers in fast path.
491 *
492 * Entry runs with interrupts off.
493 */
494
495/* 0(%rsp): interrupt number */
496 .macro interrupt func
1da177e4 497 cld
1da177e4
LT
498 SAVE_ARGS
499 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
1de9c3f6
JB
500 pushq %rbp
501 CFI_ADJUST_CFA_OFFSET 8
502 CFI_REL_OFFSET rbp, 0
503 movq %rsp,%rbp
504 CFI_DEF_CFA_REGISTER rbp
1da177e4
LT
505 testl $3,CS(%rdi)
506 je 1f
507 swapgs
96e54049
AK
508 /* irqcount is used to check if a CPU is already on an interrupt
509 stack or not. While this is essentially redundant with preempt_count
510 it is a little cheaper to use a separate counter in the PDA
511 (short of moving irq_enter into assembly, which would be too
512 much work) */
5131: incl %gs:pda_irqcount
1de9c3f6 514 cmoveq %gs:pda_irqstackptr,%rsp
2699500b 515 push %rbp # backlink for old unwinder
2601e64d
IM
516 /*
517 * We entered an interrupt context - irqs are off:
518 */
519 TRACE_IRQS_OFF
1da177e4
LT
520 call \func
521 .endm
522
523ENTRY(common_interrupt)
7effaa88 524 XCPT_FRAME
1da177e4
LT
525 interrupt do_IRQ
526 /* 0(%rsp): oldrsp-ARGOFFSET */
7effaa88 527ret_from_intr:
1da177e4 528 cli
2601e64d 529 TRACE_IRQS_OFF
3829ee6b 530 decl %gs:pda_irqcount
1de9c3f6 531 leaveq
7effaa88 532 CFI_DEF_CFA_REGISTER rsp
1de9c3f6 533 CFI_ADJUST_CFA_OFFSET -8
7effaa88 534exit_intr:
1da177e4
LT
535 GET_THREAD_INFO(%rcx)
536 testl $3,CS-ARGOFFSET(%rsp)
537 je retint_kernel
538
539 /* Interrupt came from user space */
540 /*
541 * Has a correct top of stack, but a partial stack frame
542 * %rcx: thread info. Interrupts off.
543 */
544retint_with_reschedule:
545 movl $_TIF_WORK_MASK,%edi
7effaa88 546retint_check:
1da177e4
LT
547 movl threadinfo_flags(%rcx),%edx
548 andl %edi,%edx
7effaa88 549 CFI_REMEMBER_STATE
1da177e4
LT
550 jnz retint_careful
551retint_swapgs:
2601e64d
IM
552 /*
553 * The iretq could re-enable interrupts:
554 */
555 cli
556 TRACE_IRQS_IRETQ
1da177e4 557 swapgs
2601e64d
IM
558 jmp restore_args
559
1da177e4
LT
560retint_restore_args:
561 cli
2601e64d
IM
562 /*
563 * The iretq could re-enable interrupts:
564 */
565 TRACE_IRQS_IRETQ
566restore_args:
1da177e4
LT
567 RESTORE_ARGS 0,8,0
568iret_label:
569 iretq
570
571 .section __ex_table,"a"
572 .quad iret_label,bad_iret
573 .previous
574 .section .fixup,"ax"
575 /* force a signal here? this matches i386 behaviour */
576 /* running with kernel gs */
577bad_iret:
3076a492 578 movq $11,%rdi /* SIGSEGV */
2601e64d 579 TRACE_IRQS_ON
2391c4b5 580 sti
1da177e4
LT
581 jmp do_exit
582 .previous
583
7effaa88 584 /* edi: workmask, edx: work */
1da177e4 585retint_careful:
7effaa88 586 CFI_RESTORE_STATE
1da177e4
LT
587 bt $TIF_NEED_RESCHED,%edx
588 jnc retint_signal
2601e64d 589 TRACE_IRQS_ON
1da177e4
LT
590 sti
591 pushq %rdi
7effaa88 592 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
593 call schedule
594 popq %rdi
7effaa88 595 CFI_ADJUST_CFA_OFFSET -8
1da177e4
LT
596 GET_THREAD_INFO(%rcx)
597 cli
2601e64d 598 TRACE_IRQS_OFF
1da177e4
LT
599 jmp retint_check
600
601retint_signal:
10ffdbb8
AK
602 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
603 jz retint_swapgs
2601e64d 604 TRACE_IRQS_ON
1da177e4
LT
605 sti
606 SAVE_REST
607 movq $-1,ORIG_RAX(%rsp)
3829ee6b 608 xorl %esi,%esi # oldset
1da177e4
LT
609 movq %rsp,%rdi # &pt_regs
610 call do_notify_resume
611 RESTORE_REST
612 cli
2601e64d 613 TRACE_IRQS_OFF
10ffdbb8 614 movl $_TIF_NEED_RESCHED,%edi
be9e6870 615 GET_THREAD_INFO(%rcx)
1da177e4
LT
616 jmp retint_check
617
618#ifdef CONFIG_PREEMPT
619 /* Returning to kernel space. Check if we need preemption */
620 /* rcx: threadinfo. interrupts off. */
b06babac 621ENTRY(retint_kernel)
1da177e4
LT
622 cmpl $0,threadinfo_preempt_count(%rcx)
623 jnz retint_restore_args
624 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
625 jnc retint_restore_args
626 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
627 jnc retint_restore_args
628 call preempt_schedule_irq
629 jmp exit_intr
630#endif
4b787e0b 631
1da177e4 632 CFI_ENDPROC
4b787e0b 633END(common_interrupt)
1da177e4
LT
634
635/*
636 * APIC interrupts.
637 */
638 .macro apicinterrupt num,func
7effaa88 639 INTR_FRAME
19eadf98 640 pushq $~(\num)
7effaa88 641 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
642 interrupt \func
643 jmp ret_from_intr
644 CFI_ENDPROC
645 .endm
646
647ENTRY(thermal_interrupt)
648 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
4b787e0b 649END(thermal_interrupt)
1da177e4 650
89b831ef
JS
651ENTRY(threshold_interrupt)
652 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
4b787e0b 653END(threshold_interrupt)
89b831ef 654
1da177e4
LT
655#ifdef CONFIG_SMP
656ENTRY(reschedule_interrupt)
657 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
4b787e0b 658END(reschedule_interrupt)
1da177e4 659
e5bc8b6b
AK
660 .macro INVALIDATE_ENTRY num
661ENTRY(invalidate_interrupt\num)
662 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
4b787e0b 663END(invalidate_interrupt\num)
e5bc8b6b
AK
664 .endm
665
666 INVALIDATE_ENTRY 0
667 INVALIDATE_ENTRY 1
668 INVALIDATE_ENTRY 2
669 INVALIDATE_ENTRY 3
670 INVALIDATE_ENTRY 4
671 INVALIDATE_ENTRY 5
672 INVALIDATE_ENTRY 6
673 INVALIDATE_ENTRY 7
1da177e4
LT
674
675ENTRY(call_function_interrupt)
676 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
4b787e0b 677END(call_function_interrupt)
61014292
EB
678ENTRY(irq_move_cleanup_interrupt)
679 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
680END(irq_move_cleanup_interrupt)
1da177e4
LT
681#endif
682
1da177e4
LT
683ENTRY(apic_timer_interrupt)
684 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
4b787e0b 685END(apic_timer_interrupt)
1da177e4
LT
686
687ENTRY(error_interrupt)
688 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
4b787e0b 689END(error_interrupt)
1da177e4
LT
690
691ENTRY(spurious_interrupt)
692 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
4b787e0b 693END(spurious_interrupt)
1da177e4
LT
694
695/*
696 * Exception entry points.
697 */
698 .macro zeroentry sym
7effaa88 699 INTR_FRAME
1da177e4 700 pushq $0 /* push error code/oldrax */
7effaa88 701 CFI_ADJUST_CFA_OFFSET 8
1da177e4 702 pushq %rax /* push real oldrax to the rdi slot */
7effaa88 703 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
704 leaq \sym(%rip),%rax
705 jmp error_entry
7effaa88 706 CFI_ENDPROC
1da177e4
LT
707 .endm
708
709 .macro errorentry sym
7effaa88 710 XCPT_FRAME
1da177e4 711 pushq %rax
7effaa88 712 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
713 leaq \sym(%rip),%rax
714 jmp error_entry
7effaa88 715 CFI_ENDPROC
1da177e4
LT
716 .endm
717
718 /* error code is on the stack already */
719 /* handle NMI like exceptions that can happen everywhere */
2601e64d 720 .macro paranoidentry sym, ist=0, irqtrace=1
1da177e4
LT
721 SAVE_ALL
722 cld
723 movl $1,%ebx
724 movl $MSR_GS_BASE,%ecx
725 rdmsr
726 testl %edx,%edx
727 js 1f
728 swapgs
729 xorl %ebx,%ebx
b556b35e
JB
7301:
731 .if \ist
732 movq %gs:pda_data_offset, %rbp
733 .endif
734 movq %rsp,%rdi
1da177e4
LT
735 movq ORIG_RAX(%rsp),%rsi
736 movq $-1,ORIG_RAX(%rsp)
b556b35e 737 .if \ist
5f8efbb9 738 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
b556b35e 739 .endif
1da177e4 740 call \sym
b556b35e 741 .if \ist
5f8efbb9 742 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
b556b35e 743 .endif
6fefb0d1 744 cli
2601e64d
IM
745 .if \irqtrace
746 TRACE_IRQS_OFF
747 .endif
1da177e4 748 .endm
2601e64d
IM
749
750 /*
751 * "Paranoid" exit path from exception stack.
752 * Paranoid because this is used by NMIs and cannot take
753 * any kernel state for granted.
754 * We don't do kernel preemption checks here, because only
755 * NMI should be common and it does not enable IRQs and
756 * cannot get reschedule ticks.
757 *
758 * "trace" is 0 for the NMI handler only, because irq-tracing
759 * is fundamentally NMI-unsafe. (we cannot change the soft and
760 * hard flags at once, atomically)
761 */
762 .macro paranoidexit trace=1
763 /* ebx: no swapgs flag */
764paranoid_exit\trace:
765 testl %ebx,%ebx /* swapgs needed? */
766 jnz paranoid_restore\trace
767 testl $3,CS(%rsp)
768 jnz paranoid_userspace\trace
769paranoid_swapgs\trace:
7a0a2dff 770 .if \trace
2601e64d 771 TRACE_IRQS_IRETQ 0
7a0a2dff 772 .endif
2601e64d
IM
773 swapgs
774paranoid_restore\trace:
775 RESTORE_ALL 8
776 iretq
777paranoid_userspace\trace:
778 GET_THREAD_INFO(%rcx)
779 movl threadinfo_flags(%rcx),%ebx
780 andl $_TIF_WORK_MASK,%ebx
781 jz paranoid_swapgs\trace
782 movq %rsp,%rdi /* &pt_regs */
783 call sync_regs
784 movq %rax,%rsp /* switch stack for scheduling */
785 testl $_TIF_NEED_RESCHED,%ebx
786 jnz paranoid_schedule\trace
787 movl %ebx,%edx /* arg3: thread flags */
788 .if \trace
789 TRACE_IRQS_ON
790 .endif
791 sti
792 xorl %esi,%esi /* arg2: oldset */
793 movq %rsp,%rdi /* arg1: &pt_regs */
794 call do_notify_resume
795 cli
796 .if \trace
797 TRACE_IRQS_OFF
798 .endif
799 jmp paranoid_userspace\trace
800paranoid_schedule\trace:
801 .if \trace
802 TRACE_IRQS_ON
803 .endif
804 sti
805 call schedule
806 cli
807 .if \trace
808 TRACE_IRQS_OFF
809 .endif
810 jmp paranoid_userspace\trace
811 CFI_ENDPROC
812 .endm
813
1da177e4
LT
814/*
815 * Exception entry point. This expects an error code/orig_rax on the stack
816 * and the exception handler in %rax.
817 */
d28c4393 818KPROBE_ENTRY(error_entry)
7effaa88 819 _frame RDI
1da177e4
LT
820 /* rdi slot contains rax, oldrax contains error code */
821 cld
822 subq $14*8,%rsp
823 CFI_ADJUST_CFA_OFFSET (14*8)
824 movq %rsi,13*8(%rsp)
825 CFI_REL_OFFSET rsi,RSI
826 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
827 movq %rdx,12*8(%rsp)
828 CFI_REL_OFFSET rdx,RDX
829 movq %rcx,11*8(%rsp)
830 CFI_REL_OFFSET rcx,RCX
831 movq %rsi,10*8(%rsp) /* store rax */
832 CFI_REL_OFFSET rax,RAX
833 movq %r8, 9*8(%rsp)
834 CFI_REL_OFFSET r8,R8
835 movq %r9, 8*8(%rsp)
836 CFI_REL_OFFSET r9,R9
837 movq %r10,7*8(%rsp)
838 CFI_REL_OFFSET r10,R10
839 movq %r11,6*8(%rsp)
840 CFI_REL_OFFSET r11,R11
841 movq %rbx,5*8(%rsp)
842 CFI_REL_OFFSET rbx,RBX
843 movq %rbp,4*8(%rsp)
844 CFI_REL_OFFSET rbp,RBP
845 movq %r12,3*8(%rsp)
846 CFI_REL_OFFSET r12,R12
847 movq %r13,2*8(%rsp)
848 CFI_REL_OFFSET r13,R13
849 movq %r14,1*8(%rsp)
850 CFI_REL_OFFSET r14,R14
851 movq %r15,(%rsp)
852 CFI_REL_OFFSET r15,R15
853 xorl %ebx,%ebx
854 testl $3,CS(%rsp)
855 je error_kernelspace
856error_swapgs:
857 swapgs
858error_sti:
859 movq %rdi,RDI(%rsp)
860 movq %rsp,%rdi
861 movq ORIG_RAX(%rsp),%rsi /* get error code */
862 movq $-1,ORIG_RAX(%rsp)
863 call *%rax
864 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
865error_exit:
866 movl %ebx,%eax
867 RESTORE_REST
868 cli
2601e64d 869 TRACE_IRQS_OFF
1da177e4
LT
870 GET_THREAD_INFO(%rcx)
871 testl %eax,%eax
872 jne retint_kernel
873 movl threadinfo_flags(%rcx),%edx
874 movl $_TIF_WORK_MASK,%edi
875 andl %edi,%edx
876 jnz retint_careful
2601e64d
IM
877 /*
878 * The iret might restore flags:
879 */
880 TRACE_IRQS_IRETQ
1da177e4
LT
881 swapgs
882 RESTORE_ARGS 0,8,0
505cc4e1 883 jmp iret_label
1da177e4
LT
884 CFI_ENDPROC
885
886error_kernelspace:
887 incl %ebx
888 /* There are two places in the kernel that can potentially fault with
889 usergs. Handle them here. The exception handlers after
890 iret run with kernel gs again, so don't set the user space flag.
891 B stepping K8s sometimes report an truncated RIP for IRET
892 exceptions returning to compat mode. Check for these here too. */
893 leaq iret_label(%rip),%rbp
894 cmpq %rbp,RIP(%rsp)
895 je error_swapgs
896 movl %ebp,%ebp /* zero extend */
897 cmpq %rbp,RIP(%rsp)
898 je error_swapgs
899 cmpq $gs_change,RIP(%rsp)
900 je error_swapgs
901 jmp error_sti
d28c4393 902KPROBE_END(error_entry)
1da177e4
LT
903
904 /* Reload gs selector with exception handling */
905 /* edi: new selector */
906ENTRY(load_gs_index)
7effaa88 907 CFI_STARTPROC
1da177e4 908 pushf
7effaa88 909 CFI_ADJUST_CFA_OFFSET 8
1da177e4
LT
910 cli
911 swapgs
912gs_change:
913 movl %edi,%gs
9142: mfence /* workaround */
915 swapgs
916 popf
7effaa88 917 CFI_ADJUST_CFA_OFFSET -8
1da177e4 918 ret
7effaa88 919 CFI_ENDPROC
4b787e0b 920ENDPROC(load_gs_index)
1da177e4
LT
921
922 .section __ex_table,"a"
923 .align 8
924 .quad gs_change,bad_gs
925 .previous
926 .section .fixup,"ax"
927 /* running with kernelgs */
928bad_gs:
929 swapgs /* switch back to user gs */
930 xorl %eax,%eax
931 movl %eax,%gs
932 jmp 2b
933 .previous
934
935/*
936 * Create a kernel thread.
937 *
938 * C extern interface:
939 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
940 *
941 * asm input arguments:
942 * rdi: fn, rsi: arg, rdx: flags
943 */
944ENTRY(kernel_thread)
945 CFI_STARTPROC
946 FAKE_STACK_FRAME $child_rip
947 SAVE_ALL
948
949 # rdi: flags, rsi: usp, rdx: will be &pt_regs
950 movq %rdx,%rdi
951 orq kernel_thread_flags(%rip),%rdi
952 movq $-1, %rsi
953 movq %rsp, %rdx
954
955 xorl %r8d,%r8d
956 xorl %r9d,%r9d
957
958 # clone now
959 call do_fork
960 movq %rax,RAX(%rsp)
961 xorl %edi,%edi
962
963 /*
964 * It isn't worth to check for reschedule here,
965 * so internally to the x86_64 port you can rely on kernel_thread()
966 * not to reschedule the child before returning, this avoids the need
967 * of hacks for example to fork off the per-CPU idle tasks.
968 * [Hopefully no generic code relies on the reschedule -AK]
969 */
970 RESTORE_ALL
971 UNFAKE_STACK_FRAME
972 ret
973 CFI_ENDPROC
4b787e0b 974ENDPROC(kernel_thread)
1da177e4
LT
975
976child_rip:
c05991ed
AK
977 pushq $0 # fake return address
978 CFI_STARTPROC
1da177e4
LT
979 /*
980 * Here we are in the child and the registers are set as they were
981 * at kernel_thread() invocation in the parent.
982 */
983 movq %rdi, %rax
984 movq %rsi, %rdi
985 call *%rax
986 # exit
3829ee6b 987 xorl %edi, %edi
1da177e4 988 call do_exit
c05991ed 989 CFI_ENDPROC
4b787e0b 990ENDPROC(child_rip)
1da177e4
LT
991
992/*
993 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
994 *
995 * C extern interface:
996 * extern long execve(char *name, char **argv, char **envp)
997 *
998 * asm input arguments:
999 * rdi: name, rsi: argv, rdx: envp
1000 *
1001 * We want to fallback into:
1002 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
1003 *
1004 * do_sys_execve asm fallback arguments:
1005 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1006 */
3db03b4a 1007ENTRY(kernel_execve)
1da177e4
LT
1008 CFI_STARTPROC
1009 FAKE_STACK_FRAME $0
1010 SAVE_ALL
1011 call sys_execve
1012 movq %rax, RAX(%rsp)
1013 RESTORE_REST
1014 testq %rax,%rax
1015 je int_ret_from_sys_call
1016 RESTORE_ARGS
1017 UNFAKE_STACK_FRAME
1018 ret
1019 CFI_ENDPROC
3db03b4a 1020ENDPROC(kernel_execve)
1da177e4 1021
0f2fbdcb 1022KPROBE_ENTRY(page_fault)
1da177e4 1023 errorentry do_page_fault
d28c4393 1024KPROBE_END(page_fault)
1da177e4
LT
1025
1026ENTRY(coprocessor_error)
1027 zeroentry do_coprocessor_error
4b787e0b 1028END(coprocessor_error)
1da177e4
LT
1029
1030ENTRY(simd_coprocessor_error)
1031 zeroentry do_simd_coprocessor_error
4b787e0b 1032END(simd_coprocessor_error)
1da177e4
LT
1033
1034ENTRY(device_not_available)
1035 zeroentry math_state_restore
4b787e0b 1036END(device_not_available)
1da177e4
LT
1037
1038 /* runs on exception stack */
0f2fbdcb 1039KPROBE_ENTRY(debug)
7effaa88 1040 INTR_FRAME
1da177e4
LT
1041 pushq $0
1042 CFI_ADJUST_CFA_OFFSET 8
5f8efbb9 1043 paranoidentry do_debug, DEBUG_STACK
2601e64d 1044 paranoidexit
d28c4393 1045KPROBE_END(debug)
1da177e4
LT
1046
1047 /* runs on exception stack */
eddb6fb9 1048KPROBE_ENTRY(nmi)
7effaa88 1049 INTR_FRAME
1da177e4 1050 pushq $-1
7effaa88 1051 CFI_ADJUST_CFA_OFFSET 8
2601e64d
IM
1052 paranoidentry do_nmi, 0, 0
1053#ifdef CONFIG_TRACE_IRQFLAGS
1054 paranoidexit 0
1055#else
1056 jmp paranoid_exit1
1057 CFI_ENDPROC
1058#endif
d28c4393 1059KPROBE_END(nmi)
6fefb0d1 1060
0f2fbdcb 1061KPROBE_ENTRY(int3)
b556b35e
JB
1062 INTR_FRAME
1063 pushq $0
1064 CFI_ADJUST_CFA_OFFSET 8
5f8efbb9 1065 paranoidentry do_int3, DEBUG_STACK
2601e64d 1066 jmp paranoid_exit1
b556b35e 1067 CFI_ENDPROC
d28c4393 1068KPROBE_END(int3)
1da177e4
LT
1069
1070ENTRY(overflow)
1071 zeroentry do_overflow
4b787e0b 1072END(overflow)
1da177e4
LT
1073
1074ENTRY(bounds)
1075 zeroentry do_bounds
4b787e0b 1076END(bounds)
1da177e4
LT
1077
1078ENTRY(invalid_op)
1079 zeroentry do_invalid_op
4b787e0b 1080END(invalid_op)
1da177e4
LT
1081
1082ENTRY(coprocessor_segment_overrun)
1083 zeroentry do_coprocessor_segment_overrun
4b787e0b 1084END(coprocessor_segment_overrun)
1da177e4
LT
1085
1086ENTRY(reserved)
1087 zeroentry do_reserved
4b787e0b 1088END(reserved)
1da177e4
LT
1089
1090 /* runs on exception stack */
1091ENTRY(double_fault)
7effaa88 1092 XCPT_FRAME
1da177e4 1093 paranoidentry do_double_fault
2601e64d 1094 jmp paranoid_exit1
1da177e4 1095 CFI_ENDPROC
4b787e0b 1096END(double_fault)
1da177e4
LT
1097
1098ENTRY(invalid_TSS)
1099 errorentry do_invalid_TSS
4b787e0b 1100END(invalid_TSS)
1da177e4
LT
1101
1102ENTRY(segment_not_present)
1103 errorentry do_segment_not_present
4b787e0b 1104END(segment_not_present)
1da177e4
LT
1105
1106 /* runs on exception stack */
1107ENTRY(stack_segment)
7effaa88 1108 XCPT_FRAME
1da177e4 1109 paranoidentry do_stack_segment
2601e64d 1110 jmp paranoid_exit1
1da177e4 1111 CFI_ENDPROC
4b787e0b 1112END(stack_segment)
1da177e4 1113
0f2fbdcb 1114KPROBE_ENTRY(general_protection)
1da177e4 1115 errorentry do_general_protection
d28c4393 1116KPROBE_END(general_protection)
1da177e4
LT
1117
1118ENTRY(alignment_check)
1119 errorentry do_alignment_check
4b787e0b 1120END(alignment_check)
1da177e4
LT
1121
1122ENTRY(divide_error)
1123 zeroentry do_divide_error
4b787e0b 1124END(divide_error)
1da177e4
LT
1125
1126ENTRY(spurious_interrupt_bug)
1127 zeroentry do_spurious_interrupt_bug
4b787e0b 1128END(spurious_interrupt_bug)
1da177e4
LT
1129
1130#ifdef CONFIG_X86_MCE
1131 /* runs on exception stack */
1132ENTRY(machine_check)
7effaa88 1133 INTR_FRAME
1da177e4
LT
1134 pushq $0
1135 CFI_ADJUST_CFA_OFFSET 8
1136 paranoidentry do_machine_check
2601e64d 1137 jmp paranoid_exit1
1da177e4 1138 CFI_ENDPROC
4b787e0b 1139END(machine_check)
1da177e4
LT
1140#endif
1141
2699500b 1142/* Call softirq on interrupt stack. Interrupts are off. */
ed6b676c 1143ENTRY(call_softirq)
7effaa88 1144 CFI_STARTPROC
2699500b
AK
1145 push %rbp
1146 CFI_ADJUST_CFA_OFFSET 8
1147 CFI_REL_OFFSET rbp,0
1148 mov %rsp,%rbp
1149 CFI_DEF_CFA_REGISTER rbp
ed6b676c 1150 incl %gs:pda_irqcount
2699500b
AK
1151 cmove %gs:pda_irqstackptr,%rsp
1152 push %rbp # backlink for old unwinder
ed6b676c 1153 call __do_softirq
2699500b 1154 leaveq
7effaa88 1155 CFI_DEF_CFA_REGISTER rsp
2699500b 1156 CFI_ADJUST_CFA_OFFSET -8
ed6b676c 1157 decl %gs:pda_irqcount
ed6b676c 1158 ret
7effaa88 1159 CFI_ENDPROC
4b787e0b 1160ENDPROC(call_softirq)
This page took 0.258032 seconds and 5 git commands to generate.