ba41bf42748d7657548b8e39191d9496f86a9480
[deliverable/linux.git] / arch / x86 / kernel / entry_64.S
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */
8
9 /*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */
39
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
47 #include <asm/msr.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
51 #include <asm/page.h>
52 #include <asm/irqflags.h>
53 #include <asm/paravirt.h>
54 #include <asm/ftrace.h>
55
56 .code64
57
58 #ifdef CONFIG_FTRACE
59 #ifdef CONFIG_DYNAMIC_FTRACE
60 ENTRY(mcount)
61
62 subq $0x38, %rsp
63 movq %rax, (%rsp)
64 movq %rcx, 8(%rsp)
65 movq %rdx, 16(%rsp)
66 movq %rsi, 24(%rsp)
67 movq %rdi, 32(%rsp)
68 movq %r8, 40(%rsp)
69 movq %r9, 48(%rsp)
70
71 movq 0x38(%rsp), %rdi
72 subq $MCOUNT_INSN_SIZE, %rdi
73
74 .globl mcount_call
75 mcount_call:
76 call ftrace_stub
77
78 movq 48(%rsp), %r9
79 movq 40(%rsp), %r8
80 movq 32(%rsp), %rdi
81 movq 24(%rsp), %rsi
82 movq 16(%rsp), %rdx
83 movq 8(%rsp), %rcx
84 movq (%rsp), %rax
85 addq $0x38, %rsp
86
87 retq
88 END(mcount)
89
90 ENTRY(ftrace_caller)
91
92 /* taken from glibc */
93 subq $0x38, %rsp
94 movq %rax, (%rsp)
95 movq %rcx, 8(%rsp)
96 movq %rdx, 16(%rsp)
97 movq %rsi, 24(%rsp)
98 movq %rdi, 32(%rsp)
99 movq %r8, 40(%rsp)
100 movq %r9, 48(%rsp)
101
102 movq 0x38(%rsp), %rdi
103 movq 8(%rbp), %rsi
104 subq $MCOUNT_INSN_SIZE, %rdi
105
106 .globl ftrace_call
107 ftrace_call:
108 call ftrace_stub
109
110 movq 48(%rsp), %r9
111 movq 40(%rsp), %r8
112 movq 32(%rsp), %rdi
113 movq 24(%rsp), %rsi
114 movq 16(%rsp), %rdx
115 movq 8(%rsp), %rcx
116 movq (%rsp), %rax
117 addq $0x38, %rsp
118
119 .globl ftrace_stub
120 ftrace_stub:
121 retq
122 END(ftrace_caller)
123
124 #else /* ! CONFIG_DYNAMIC_FTRACE */
125 ENTRY(mcount)
126 cmpq $ftrace_stub, ftrace_trace_function
127 jnz trace
128 .globl ftrace_stub
129 ftrace_stub:
130 retq
131
132 trace:
133 /* taken from glibc */
134 subq $0x38, %rsp
135 movq %rax, (%rsp)
136 movq %rcx, 8(%rsp)
137 movq %rdx, 16(%rsp)
138 movq %rsi, 24(%rsp)
139 movq %rdi, 32(%rsp)
140 movq %r8, 40(%rsp)
141 movq %r9, 48(%rsp)
142
143 movq 0x38(%rsp), %rdi
144 movq 8(%rbp), %rsi
145 subq $MCOUNT_INSN_SIZE, %rdi
146
147 call *ftrace_trace_function
148
149 movq 48(%rsp), %r9
150 movq 40(%rsp), %r8
151 movq 32(%rsp), %rdi
152 movq 24(%rsp), %rsi
153 movq 16(%rsp), %rdx
154 movq 8(%rsp), %rcx
155 movq (%rsp), %rax
156 addq $0x38, %rsp
157
158 jmp ftrace_stub
159 END(mcount)
160 #endif /* CONFIG_DYNAMIC_FTRACE */
161 #endif /* CONFIG_FTRACE */
162
163 #ifndef CONFIG_PREEMPT
164 #define retint_kernel retint_restore_args
165 #endif
166
167 #ifdef CONFIG_PARAVIRT
168 ENTRY(native_usergs_sysret64)
169 swapgs
170 sysretq
171 #endif /* CONFIG_PARAVIRT */
172
173
174 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
175 #ifdef CONFIG_TRACE_IRQFLAGS
176 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
177 jnc 1f
178 TRACE_IRQS_ON
179 1:
180 #endif
181 .endm
182
183 /*
184 * C code is not supposed to know about undefined top of stack. Every time
185 * a C function with an pt_regs argument is called from the SYSCALL based
186 * fast path FIXUP_TOP_OF_STACK is needed.
187 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
188 * manipulation.
189 */
190
191 /* %rsp:at FRAMEEND */
192 .macro FIXUP_TOP_OF_STACK tmp
193 movq %gs:pda_oldrsp,\tmp
194 movq \tmp,RSP(%rsp)
195 movq $__USER_DS,SS(%rsp)
196 movq $__USER_CS,CS(%rsp)
197 movq $-1,RCX(%rsp)
198 movq R11(%rsp),\tmp /* get eflags */
199 movq \tmp,EFLAGS(%rsp)
200 .endm
201
202 .macro RESTORE_TOP_OF_STACK tmp,offset=0
203 movq RSP-\offset(%rsp),\tmp
204 movq \tmp,%gs:pda_oldrsp
205 movq EFLAGS-\offset(%rsp),\tmp
206 movq \tmp,R11-\offset(%rsp)
207 .endm
208
209 .macro FAKE_STACK_FRAME child_rip
210 /* push in order ss, rsp, eflags, cs, rip */
211 xorl %eax, %eax
212 pushq $__KERNEL_DS /* ss */
213 CFI_ADJUST_CFA_OFFSET 8
214 /*CFI_REL_OFFSET ss,0*/
215 pushq %rax /* rsp */
216 CFI_ADJUST_CFA_OFFSET 8
217 CFI_REL_OFFSET rsp,0
218 pushq $(1<<9) /* eflags - interrupts on */
219 CFI_ADJUST_CFA_OFFSET 8
220 /*CFI_REL_OFFSET rflags,0*/
221 pushq $__KERNEL_CS /* cs */
222 CFI_ADJUST_CFA_OFFSET 8
223 /*CFI_REL_OFFSET cs,0*/
224 pushq \child_rip /* rip */
225 CFI_ADJUST_CFA_OFFSET 8
226 CFI_REL_OFFSET rip,0
227 pushq %rax /* orig rax */
228 CFI_ADJUST_CFA_OFFSET 8
229 .endm
230
231 .macro UNFAKE_STACK_FRAME
232 addq $8*6, %rsp
233 CFI_ADJUST_CFA_OFFSET -(6*8)
234 .endm
235
236 .macro CFI_DEFAULT_STACK start=1
237 .if \start
238 CFI_STARTPROC simple
239 CFI_SIGNAL_FRAME
240 CFI_DEF_CFA rsp,SS+8
241 .else
242 CFI_DEF_CFA_OFFSET SS+8
243 .endif
244 CFI_REL_OFFSET r15,R15
245 CFI_REL_OFFSET r14,R14
246 CFI_REL_OFFSET r13,R13
247 CFI_REL_OFFSET r12,R12
248 CFI_REL_OFFSET rbp,RBP
249 CFI_REL_OFFSET rbx,RBX
250 CFI_REL_OFFSET r11,R11
251 CFI_REL_OFFSET r10,R10
252 CFI_REL_OFFSET r9,R9
253 CFI_REL_OFFSET r8,R8
254 CFI_REL_OFFSET rax,RAX
255 CFI_REL_OFFSET rcx,RCX
256 CFI_REL_OFFSET rdx,RDX
257 CFI_REL_OFFSET rsi,RSI
258 CFI_REL_OFFSET rdi,RDI
259 CFI_REL_OFFSET rip,RIP
260 /*CFI_REL_OFFSET cs,CS*/
261 /*CFI_REL_OFFSET rflags,EFLAGS*/
262 CFI_REL_OFFSET rsp,RSP
263 /*CFI_REL_OFFSET ss,SS*/
264 .endm
265 /*
266 * A newly forked process directly context switches into this.
267 */
268 /* rdi: prev */
269 ENTRY(ret_from_fork)
270 CFI_DEFAULT_STACK
271 push kernel_eflags(%rip)
272 CFI_ADJUST_CFA_OFFSET 4
273 popf # reset kernel eflags
274 CFI_ADJUST_CFA_OFFSET -4
275 call schedule_tail
276 GET_THREAD_INFO(%rcx)
277 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
278 jnz rff_trace
279 rff_action:
280 RESTORE_REST
281 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
282 je int_ret_from_sys_call
283 testl $_TIF_IA32,TI_flags(%rcx)
284 jnz int_ret_from_sys_call
285 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
286 jmp ret_from_sys_call
287 rff_trace:
288 movq %rsp,%rdi
289 call syscall_trace_leave
290 GET_THREAD_INFO(%rcx)
291 jmp rff_action
292 CFI_ENDPROC
293 END(ret_from_fork)
294
295 /*
296 * System call entry. Upto 6 arguments in registers are supported.
297 *
298 * SYSCALL does not save anything on the stack and does not change the
299 * stack pointer.
300 */
301
302 /*
303 * Register setup:
304 * rax system call number
305 * rdi arg0
306 * rcx return address for syscall/sysret, C arg3
307 * rsi arg1
308 * rdx arg2
309 * r10 arg3 (--> moved to rcx for C)
310 * r8 arg4
311 * r9 arg5
312 * r11 eflags for syscall/sysret, temporary for C
313 * r12-r15,rbp,rbx saved by C code, not touched.
314 *
315 * Interrupts are off on entry.
316 * Only called from user space.
317 *
318 * XXX if we had a free scratch register we could save the RSP into the stack frame
319 * and report it properly in ps. Unfortunately we haven't.
320 *
321 * When user can change the frames always force IRET. That is because
322 * it deals with uncanonical addresses better. SYSRET has trouble
323 * with them due to bugs in both AMD and Intel CPUs.
324 */
325
326 ENTRY(system_call)
327 CFI_STARTPROC simple
328 CFI_SIGNAL_FRAME
329 CFI_DEF_CFA rsp,PDA_STACKOFFSET
330 CFI_REGISTER rip,rcx
331 /*CFI_REGISTER rflags,r11*/
332 SWAPGS_UNSAFE_STACK
333 /*
334 * A hypervisor implementation might want to use a label
335 * after the swapgs, so that it can do the swapgs
336 * for the guest and jump here on syscall.
337 */
338 ENTRY(system_call_after_swapgs)
339
340 movq %rsp,%gs:pda_oldrsp
341 movq %gs:pda_kernelstack,%rsp
342 /*
343 * No need to follow this irqs off/on section - it's straight
344 * and short:
345 */
346 ENABLE_INTERRUPTS(CLBR_NONE)
347 SAVE_ARGS 8,1
348 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
349 movq %rcx,RIP-ARGOFFSET(%rsp)
350 CFI_REL_OFFSET rip,RIP-ARGOFFSET
351 GET_THREAD_INFO(%rcx)
352 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
353 TI_flags(%rcx)
354 jnz tracesys
355 cmpq $__NR_syscall_max,%rax
356 ja badsys
357 movq %r10,%rcx
358 call *sys_call_table(,%rax,8) # XXX: rip relative
359 movq %rax,RAX-ARGOFFSET(%rsp)
360 /*
361 * Syscall return path ending with SYSRET (fast path)
362 * Has incomplete stack frame and undefined top of stack.
363 */
364 ret_from_sys_call:
365 movl $_TIF_ALLWORK_MASK,%edi
366 /* edi: flagmask */
367 sysret_check:
368 LOCKDEP_SYS_EXIT
369 GET_THREAD_INFO(%rcx)
370 DISABLE_INTERRUPTS(CLBR_NONE)
371 TRACE_IRQS_OFF
372 movl TI_flags(%rcx),%edx
373 andl %edi,%edx
374 jnz sysret_careful
375 CFI_REMEMBER_STATE
376 /*
377 * sysretq will re-enable interrupts:
378 */
379 TRACE_IRQS_ON
380 movq RIP-ARGOFFSET(%rsp),%rcx
381 CFI_REGISTER rip,rcx
382 RESTORE_ARGS 0,-ARG_SKIP,1
383 /*CFI_REGISTER rflags,r11*/
384 movq %gs:pda_oldrsp, %rsp
385 USERGS_SYSRET64
386
387 CFI_RESTORE_STATE
388 /* Handle reschedules */
389 /* edx: work, edi: workmask */
390 sysret_careful:
391 bt $TIF_NEED_RESCHED,%edx
392 jnc sysret_signal
393 TRACE_IRQS_ON
394 ENABLE_INTERRUPTS(CLBR_NONE)
395 pushq %rdi
396 CFI_ADJUST_CFA_OFFSET 8
397 call schedule
398 popq %rdi
399 CFI_ADJUST_CFA_OFFSET -8
400 jmp sysret_check
401
402 /* Handle a signal */
403 sysret_signal:
404 TRACE_IRQS_ON
405 ENABLE_INTERRUPTS(CLBR_NONE)
406 testl $_TIF_DO_NOTIFY_MASK,%edx
407 jz 1f
408
409 /* Really a signal */
410 /* edx: work flags (arg3) */
411 leaq do_notify_resume(%rip),%rax
412 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
413 xorl %esi,%esi # oldset -> arg2
414 call ptregscall_common
415 1: movl $_TIF_WORK_MASK,%edi
416 /* Use IRET because user could have changed frame. This
417 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
418 DISABLE_INTERRUPTS(CLBR_NONE)
419 TRACE_IRQS_OFF
420 jmp int_with_check
421
422 badsys:
423 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
424 jmp ret_from_sys_call
425
426 /* Do syscall tracing */
427 tracesys:
428 SAVE_REST
429 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
430 FIXUP_TOP_OF_STACK %rdi
431 movq %rsp,%rdi
432 call syscall_trace_enter
433 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
434 RESTORE_REST
435 cmpq $__NR_syscall_max,%rax
436 ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
437 movq %r10,%rcx /* fixup for C */
438 call *sys_call_table(,%rax,8)
439 movq %rax,RAX-ARGOFFSET(%rsp)
440 /* Use IRET because user could have changed frame */
441
442 /*
443 * Syscall return path ending with IRET.
444 * Has correct top of stack, but partial stack frame.
445 */
446 .globl int_ret_from_sys_call
447 int_ret_from_sys_call:
448 DISABLE_INTERRUPTS(CLBR_NONE)
449 TRACE_IRQS_OFF
450 testl $3,CS-ARGOFFSET(%rsp)
451 je retint_restore_args
452 movl $_TIF_ALLWORK_MASK,%edi
453 /* edi: mask to check */
454 int_with_check:
455 LOCKDEP_SYS_EXIT_IRQ
456 GET_THREAD_INFO(%rcx)
457 movl TI_flags(%rcx),%edx
458 andl %edi,%edx
459 jnz int_careful
460 andl $~TS_COMPAT,TI_status(%rcx)
461 jmp retint_swapgs
462
463 /* Either reschedule or signal or syscall exit tracking needed. */
464 /* First do a reschedule test. */
465 /* edx: work, edi: workmask */
466 int_careful:
467 bt $TIF_NEED_RESCHED,%edx
468 jnc int_very_careful
469 TRACE_IRQS_ON
470 ENABLE_INTERRUPTS(CLBR_NONE)
471 pushq %rdi
472 CFI_ADJUST_CFA_OFFSET 8
473 call schedule
474 popq %rdi
475 CFI_ADJUST_CFA_OFFSET -8
476 DISABLE_INTERRUPTS(CLBR_NONE)
477 TRACE_IRQS_OFF
478 jmp int_with_check
479
480 /* handle signals and tracing -- both require a full stack frame */
481 int_very_careful:
482 TRACE_IRQS_ON
483 ENABLE_INTERRUPTS(CLBR_NONE)
484 SAVE_REST
485 /* Check for syscall exit trace */
486 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
487 jz int_signal
488 pushq %rdi
489 CFI_ADJUST_CFA_OFFSET 8
490 leaq 8(%rsp),%rdi # &ptregs -> arg1
491 call syscall_trace_leave
492 popq %rdi
493 CFI_ADJUST_CFA_OFFSET -8
494 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
495 jmp int_restore_rest
496
497 int_signal:
498 testl $_TIF_DO_NOTIFY_MASK,%edx
499 jz 1f
500 movq %rsp,%rdi # &ptregs -> arg1
501 xorl %esi,%esi # oldset -> arg2
502 call do_notify_resume
503 1: movl $_TIF_WORK_MASK,%edi
504 int_restore_rest:
505 RESTORE_REST
506 DISABLE_INTERRUPTS(CLBR_NONE)
507 TRACE_IRQS_OFF
508 jmp int_with_check
509 CFI_ENDPROC
510 END(system_call)
511
512 /*
513 * Certain special system calls that need to save a complete full stack frame.
514 */
515
516 .macro PTREGSCALL label,func,arg
517 .globl \label
518 \label:
519 leaq \func(%rip),%rax
520 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
521 jmp ptregscall_common
522 END(\label)
523 .endm
524
525 CFI_STARTPROC
526
527 PTREGSCALL stub_clone, sys_clone, %r8
528 PTREGSCALL stub_fork, sys_fork, %rdi
529 PTREGSCALL stub_vfork, sys_vfork, %rdi
530 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
531 PTREGSCALL stub_iopl, sys_iopl, %rsi
532
533 ENTRY(ptregscall_common)
534 popq %r11
535 CFI_ADJUST_CFA_OFFSET -8
536 CFI_REGISTER rip, r11
537 SAVE_REST
538 movq %r11, %r15
539 CFI_REGISTER rip, r15
540 FIXUP_TOP_OF_STACK %r11
541 call *%rax
542 RESTORE_TOP_OF_STACK %r11
543 movq %r15, %r11
544 CFI_REGISTER rip, r11
545 RESTORE_REST
546 pushq %r11
547 CFI_ADJUST_CFA_OFFSET 8
548 CFI_REL_OFFSET rip, 0
549 ret
550 CFI_ENDPROC
551 END(ptregscall_common)
552
553 ENTRY(stub_execve)
554 CFI_STARTPROC
555 popq %r11
556 CFI_ADJUST_CFA_OFFSET -8
557 CFI_REGISTER rip, r11
558 SAVE_REST
559 FIXUP_TOP_OF_STACK %r11
560 movq %rsp, %rcx
561 call sys_execve
562 RESTORE_TOP_OF_STACK %r11
563 movq %rax,RAX(%rsp)
564 RESTORE_REST
565 jmp int_ret_from_sys_call
566 CFI_ENDPROC
567 END(stub_execve)
568
569 /*
570 * sigreturn is special because it needs to restore all registers on return.
571 * This cannot be done with SYSRET, so use the IRET return path instead.
572 */
573 ENTRY(stub_rt_sigreturn)
574 CFI_STARTPROC
575 addq $8, %rsp
576 CFI_ADJUST_CFA_OFFSET -8
577 SAVE_REST
578 movq %rsp,%rdi
579 FIXUP_TOP_OF_STACK %r11
580 call sys_rt_sigreturn
581 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
582 RESTORE_REST
583 jmp int_ret_from_sys_call
584 CFI_ENDPROC
585 END(stub_rt_sigreturn)
586
587 /*
588 * initial frame state for interrupts and exceptions
589 */
590 .macro _frame ref
591 CFI_STARTPROC simple
592 CFI_SIGNAL_FRAME
593 CFI_DEF_CFA rsp,SS+8-\ref
594 /*CFI_REL_OFFSET ss,SS-\ref*/
595 CFI_REL_OFFSET rsp,RSP-\ref
596 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
597 /*CFI_REL_OFFSET cs,CS-\ref*/
598 CFI_REL_OFFSET rip,RIP-\ref
599 .endm
600
601 /* initial frame state for interrupts (and exceptions without error code) */
602 #define INTR_FRAME _frame RIP
603 /* initial frame state for exceptions with error code (and interrupts with
604 vector already pushed) */
605 #define XCPT_FRAME _frame ORIG_RAX
606
607 /*
608 * Interrupt entry/exit.
609 *
610 * Interrupt entry points save only callee clobbered registers in fast path.
611 *
612 * Entry runs with interrupts off.
613 */
614
615 /* 0(%rsp): interrupt number */
616 .macro interrupt func
617 cld
618 SAVE_ARGS
619 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
620 pushq %rbp
621 CFI_ADJUST_CFA_OFFSET 8
622 CFI_REL_OFFSET rbp, 0
623 movq %rsp,%rbp
624 CFI_DEF_CFA_REGISTER rbp
625 testl $3,CS(%rdi)
626 je 1f
627 SWAPGS
628 /* irqcount is used to check if a CPU is already on an interrupt
629 stack or not. While this is essentially redundant with preempt_count
630 it is a little cheaper to use a separate counter in the PDA
631 (short of moving irq_enter into assembly, which would be too
632 much work) */
633 1: incl %gs:pda_irqcount
634 cmoveq %gs:pda_irqstackptr,%rsp
635 push %rbp # backlink for old unwinder
636 /*
637 * We entered an interrupt context - irqs are off:
638 */
639 TRACE_IRQS_OFF
640 call \func
641 .endm
642
643 ENTRY(common_interrupt)
644 XCPT_FRAME
645 interrupt do_IRQ
646 /* 0(%rsp): oldrsp-ARGOFFSET */
647 ret_from_intr:
648 DISABLE_INTERRUPTS(CLBR_NONE)
649 TRACE_IRQS_OFF
650 decl %gs:pda_irqcount
651 leaveq
652 CFI_DEF_CFA_REGISTER rsp
653 CFI_ADJUST_CFA_OFFSET -8
654 exit_intr:
655 GET_THREAD_INFO(%rcx)
656 testl $3,CS-ARGOFFSET(%rsp)
657 je retint_kernel
658
659 /* Interrupt came from user space */
660 /*
661 * Has a correct top of stack, but a partial stack frame
662 * %rcx: thread info. Interrupts off.
663 */
664 retint_with_reschedule:
665 movl $_TIF_WORK_MASK,%edi
666 retint_check:
667 LOCKDEP_SYS_EXIT_IRQ
668 movl TI_flags(%rcx),%edx
669 andl %edi,%edx
670 CFI_REMEMBER_STATE
671 jnz retint_careful
672
673 retint_swapgs: /* return to user-space */
674 /*
675 * The iretq could re-enable interrupts:
676 */
677 DISABLE_INTERRUPTS(CLBR_ANY)
678 TRACE_IRQS_IRETQ
679 SWAPGS
680 jmp restore_args
681
682 retint_restore_args: /* return to kernel space */
683 DISABLE_INTERRUPTS(CLBR_ANY)
684 /*
685 * The iretq could re-enable interrupts:
686 */
687 TRACE_IRQS_IRETQ
688 restore_args:
689 RESTORE_ARGS 0,8,0
690
691 irq_return:
692 INTERRUPT_RETURN
693
694 .section __ex_table, "a"
695 .quad irq_return, bad_iret
696 .previous
697
698 #ifdef CONFIG_PARAVIRT
699 ENTRY(native_iret)
700 iretq
701
702 .section __ex_table,"a"
703 .quad native_iret, bad_iret
704 .previous
705 #endif
706
707 .section .fixup,"ax"
708 bad_iret:
709 /*
710 * The iret traps when the %cs or %ss being restored is bogus.
711 * We've lost the original trap vector and error code.
712 * #GPF is the most likely one to get for an invalid selector.
713 * So pretend we completed the iret and took the #GPF in user mode.
714 *
715 * We are now running with the kernel GS after exception recovery.
716 * But error_entry expects us to have user GS to match the user %cs,
717 * so swap back.
718 */
719 pushq $0
720
721 SWAPGS
722 jmp general_protection
723
724 .previous
725
726 /* edi: workmask, edx: work */
727 retint_careful:
728 CFI_RESTORE_STATE
729 bt $TIF_NEED_RESCHED,%edx
730 jnc retint_signal
731 TRACE_IRQS_ON
732 ENABLE_INTERRUPTS(CLBR_NONE)
733 pushq %rdi
734 CFI_ADJUST_CFA_OFFSET 8
735 call schedule
736 popq %rdi
737 CFI_ADJUST_CFA_OFFSET -8
738 GET_THREAD_INFO(%rcx)
739 DISABLE_INTERRUPTS(CLBR_NONE)
740 TRACE_IRQS_OFF
741 jmp retint_check
742
743 retint_signal:
744 testl $_TIF_DO_NOTIFY_MASK,%edx
745 jz retint_swapgs
746 TRACE_IRQS_ON
747 ENABLE_INTERRUPTS(CLBR_NONE)
748 SAVE_REST
749 movq $-1,ORIG_RAX(%rsp)
750 xorl %esi,%esi # oldset
751 movq %rsp,%rdi # &pt_regs
752 call do_notify_resume
753 RESTORE_REST
754 DISABLE_INTERRUPTS(CLBR_NONE)
755 TRACE_IRQS_OFF
756 GET_THREAD_INFO(%rcx)
757 jmp retint_with_reschedule
758
759 #ifdef CONFIG_PREEMPT
760 /* Returning to kernel space. Check if we need preemption */
761 /* rcx: threadinfo. interrupts off. */
762 ENTRY(retint_kernel)
763 cmpl $0,TI_preempt_count(%rcx)
764 jnz retint_restore_args
765 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
766 jnc retint_restore_args
767 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
768 jnc retint_restore_args
769 call preempt_schedule_irq
770 jmp exit_intr
771 #endif
772
773 CFI_ENDPROC
774 END(common_interrupt)
775
776 /*
777 * APIC interrupts.
778 */
779 .macro apicinterrupt num,func
780 INTR_FRAME
781 pushq $~(\num)
782 CFI_ADJUST_CFA_OFFSET 8
783 interrupt \func
784 jmp ret_from_intr
785 CFI_ENDPROC
786 .endm
787
788 ENTRY(thermal_interrupt)
789 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
790 END(thermal_interrupt)
791
792 ENTRY(threshold_interrupt)
793 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
794 END(threshold_interrupt)
795
796 #ifdef CONFIG_SMP
797 ENTRY(reschedule_interrupt)
798 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
799 END(reschedule_interrupt)
800
801 .macro INVALIDATE_ENTRY num
802 ENTRY(invalidate_interrupt\num)
803 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
804 END(invalidate_interrupt\num)
805 .endm
806
807 INVALIDATE_ENTRY 0
808 INVALIDATE_ENTRY 1
809 INVALIDATE_ENTRY 2
810 INVALIDATE_ENTRY 3
811 INVALIDATE_ENTRY 4
812 INVALIDATE_ENTRY 5
813 INVALIDATE_ENTRY 6
814 INVALIDATE_ENTRY 7
815
816 ENTRY(call_function_interrupt)
817 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
818 END(call_function_interrupt)
819 ENTRY(irq_move_cleanup_interrupt)
820 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
821 END(irq_move_cleanup_interrupt)
822 #endif
823
824 ENTRY(apic_timer_interrupt)
825 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
826 END(apic_timer_interrupt)
827
828 ENTRY(uv_bau_message_intr1)
829 apicinterrupt 220,uv_bau_message_interrupt
830 END(uv_bau_message_intr1)
831
832 ENTRY(error_interrupt)
833 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
834 END(error_interrupt)
835
836 ENTRY(spurious_interrupt)
837 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
838 END(spurious_interrupt)
839
840 /*
841 * Exception entry points.
842 */
843 .macro zeroentry sym
844 INTR_FRAME
845 PARAVIRT_ADJUST_EXCEPTION_FRAME
846 pushq $0 /* push error code/oldrax */
847 CFI_ADJUST_CFA_OFFSET 8
848 pushq %rax /* push real oldrax to the rdi slot */
849 CFI_ADJUST_CFA_OFFSET 8
850 CFI_REL_OFFSET rax,0
851 leaq \sym(%rip),%rax
852 jmp error_entry
853 CFI_ENDPROC
854 .endm
855
856 .macro errorentry sym
857 XCPT_FRAME
858 PARAVIRT_ADJUST_EXCEPTION_FRAME
859 pushq %rax
860 CFI_ADJUST_CFA_OFFSET 8
861 CFI_REL_OFFSET rax,0
862 leaq \sym(%rip),%rax
863 jmp error_entry
864 CFI_ENDPROC
865 .endm
866
867 /* error code is on the stack already */
868 /* handle NMI like exceptions that can happen everywhere */
869 .macro paranoidentry sym, ist=0, irqtrace=1
870 SAVE_ALL
871 cld
872 movl $1,%ebx
873 movl $MSR_GS_BASE,%ecx
874 rdmsr
875 testl %edx,%edx
876 js 1f
877 SWAPGS
878 xorl %ebx,%ebx
879 1:
880 .if \ist
881 movq %gs:pda_data_offset, %rbp
882 .endif
883 movq %rsp,%rdi
884 movq ORIG_RAX(%rsp),%rsi
885 movq $-1,ORIG_RAX(%rsp)
886 .if \ist
887 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
888 .endif
889 call \sym
890 .if \ist
891 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
892 .endif
893 DISABLE_INTERRUPTS(CLBR_NONE)
894 .if \irqtrace
895 TRACE_IRQS_OFF
896 .endif
897 .endm
898
899 /*
900 * "Paranoid" exit path from exception stack.
901 * Paranoid because this is used by NMIs and cannot take
902 * any kernel state for granted.
903 * We don't do kernel preemption checks here, because only
904 * NMI should be common and it does not enable IRQs and
905 * cannot get reschedule ticks.
906 *
907 * "trace" is 0 for the NMI handler only, because irq-tracing
908 * is fundamentally NMI-unsafe. (we cannot change the soft and
909 * hard flags at once, atomically)
910 */
911 .macro paranoidexit trace=1
912 /* ebx: no swapgs flag */
913 paranoid_exit\trace:
914 testl %ebx,%ebx /* swapgs needed? */
915 jnz paranoid_restore\trace
916 testl $3,CS(%rsp)
917 jnz paranoid_userspace\trace
918 paranoid_swapgs\trace:
919 .if \trace
920 TRACE_IRQS_IRETQ 0
921 .endif
922 SWAPGS_UNSAFE_STACK
923 paranoid_restore\trace:
924 RESTORE_ALL 8
925 jmp irq_return
926 paranoid_userspace\trace:
927 GET_THREAD_INFO(%rcx)
928 movl TI_flags(%rcx),%ebx
929 andl $_TIF_WORK_MASK,%ebx
930 jz paranoid_swapgs\trace
931 movq %rsp,%rdi /* &pt_regs */
932 call sync_regs
933 movq %rax,%rsp /* switch stack for scheduling */
934 testl $_TIF_NEED_RESCHED,%ebx
935 jnz paranoid_schedule\trace
936 movl %ebx,%edx /* arg3: thread flags */
937 .if \trace
938 TRACE_IRQS_ON
939 .endif
940 ENABLE_INTERRUPTS(CLBR_NONE)
941 xorl %esi,%esi /* arg2: oldset */
942 movq %rsp,%rdi /* arg1: &pt_regs */
943 call do_notify_resume
944 DISABLE_INTERRUPTS(CLBR_NONE)
945 .if \trace
946 TRACE_IRQS_OFF
947 .endif
948 jmp paranoid_userspace\trace
949 paranoid_schedule\trace:
950 .if \trace
951 TRACE_IRQS_ON
952 .endif
953 ENABLE_INTERRUPTS(CLBR_ANY)
954 call schedule
955 DISABLE_INTERRUPTS(CLBR_ANY)
956 .if \trace
957 TRACE_IRQS_OFF
958 .endif
959 jmp paranoid_userspace\trace
960 CFI_ENDPROC
961 .endm
962
963 /*
964 * Exception entry point. This expects an error code/orig_rax on the stack
965 * and the exception handler in %rax.
966 */
967 KPROBE_ENTRY(error_entry)
968 _frame RDI
969 CFI_REL_OFFSET rax,0
970 /* rdi slot contains rax, oldrax contains error code */
971 cld
972 subq $14*8,%rsp
973 CFI_ADJUST_CFA_OFFSET (14*8)
974 movq %rsi,13*8(%rsp)
975 CFI_REL_OFFSET rsi,RSI
976 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
977 CFI_REGISTER rax,rsi
978 movq %rdx,12*8(%rsp)
979 CFI_REL_OFFSET rdx,RDX
980 movq %rcx,11*8(%rsp)
981 CFI_REL_OFFSET rcx,RCX
982 movq %rsi,10*8(%rsp) /* store rax */
983 CFI_REL_OFFSET rax,RAX
984 movq %r8, 9*8(%rsp)
985 CFI_REL_OFFSET r8,R8
986 movq %r9, 8*8(%rsp)
987 CFI_REL_OFFSET r9,R9
988 movq %r10,7*8(%rsp)
989 CFI_REL_OFFSET r10,R10
990 movq %r11,6*8(%rsp)
991 CFI_REL_OFFSET r11,R11
992 movq %rbx,5*8(%rsp)
993 CFI_REL_OFFSET rbx,RBX
994 movq %rbp,4*8(%rsp)
995 CFI_REL_OFFSET rbp,RBP
996 movq %r12,3*8(%rsp)
997 CFI_REL_OFFSET r12,R12
998 movq %r13,2*8(%rsp)
999 CFI_REL_OFFSET r13,R13
1000 movq %r14,1*8(%rsp)
1001 CFI_REL_OFFSET r14,R14
1002 movq %r15,(%rsp)
1003 CFI_REL_OFFSET r15,R15
1004 xorl %ebx,%ebx
1005 testl $3,CS(%rsp)
1006 je error_kernelspace
1007 error_swapgs:
1008 SWAPGS
1009 error_sti:
1010 movq %rdi,RDI(%rsp)
1011 CFI_REL_OFFSET rdi,RDI
1012 movq %rsp,%rdi
1013 movq ORIG_RAX(%rsp),%rsi /* get error code */
1014 movq $-1,ORIG_RAX(%rsp)
1015 call *%rax
1016 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1017 error_exit:
1018 movl %ebx,%eax
1019 RESTORE_REST
1020 DISABLE_INTERRUPTS(CLBR_NONE)
1021 TRACE_IRQS_OFF
1022 GET_THREAD_INFO(%rcx)
1023 testl %eax,%eax
1024 jne retint_kernel
1025 LOCKDEP_SYS_EXIT_IRQ
1026 movl TI_flags(%rcx),%edx
1027 movl $_TIF_WORK_MASK,%edi
1028 andl %edi,%edx
1029 jnz retint_careful
1030 jmp retint_swapgs
1031 CFI_ENDPROC
1032
1033 error_kernelspace:
1034 incl %ebx
1035 /* There are two places in the kernel that can potentially fault with
1036 usergs. Handle them here. The exception handlers after
1037 iret run with kernel gs again, so don't set the user space flag.
1038 B stepping K8s sometimes report an truncated RIP for IRET
1039 exceptions returning to compat mode. Check for these here too. */
1040 leaq irq_return(%rip),%rcx
1041 cmpq %rcx,RIP(%rsp)
1042 je error_swapgs
1043 movl %ecx,%ecx /* zero extend */
1044 cmpq %rcx,RIP(%rsp)
1045 je error_swapgs
1046 cmpq $gs_change,RIP(%rsp)
1047 je error_swapgs
1048 jmp error_sti
1049 KPROBE_END(error_entry)
1050
1051 /* Reload gs selector with exception handling */
1052 /* edi: new selector */
1053 ENTRY(native_load_gs_index)
1054 CFI_STARTPROC
1055 pushf
1056 CFI_ADJUST_CFA_OFFSET 8
1057 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
1058 SWAPGS
1059 gs_change:
1060 movl %edi,%gs
1061 2: mfence /* workaround */
1062 SWAPGS
1063 popf
1064 CFI_ADJUST_CFA_OFFSET -8
1065 ret
1066 CFI_ENDPROC
1067 ENDPROC(native_load_gs_index)
1068
1069 .section __ex_table,"a"
1070 .align 8
1071 .quad gs_change,bad_gs
1072 .previous
1073 .section .fixup,"ax"
1074 /* running with kernelgs */
1075 bad_gs:
1076 SWAPGS /* switch back to user gs */
1077 xorl %eax,%eax
1078 movl %eax,%gs
1079 jmp 2b
1080 .previous
1081
1082 /*
1083 * Create a kernel thread.
1084 *
1085 * C extern interface:
1086 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
1087 *
1088 * asm input arguments:
1089 * rdi: fn, rsi: arg, rdx: flags
1090 */
1091 ENTRY(kernel_thread)
1092 CFI_STARTPROC
1093 FAKE_STACK_FRAME $child_rip
1094 SAVE_ALL
1095
1096 # rdi: flags, rsi: usp, rdx: will be &pt_regs
1097 movq %rdx,%rdi
1098 orq kernel_thread_flags(%rip),%rdi
1099 movq $-1, %rsi
1100 movq %rsp, %rdx
1101
1102 xorl %r8d,%r8d
1103 xorl %r9d,%r9d
1104
1105 # clone now
1106 call do_fork
1107 movq %rax,RAX(%rsp)
1108 xorl %edi,%edi
1109
1110 /*
1111 * It isn't worth to check for reschedule here,
1112 * so internally to the x86_64 port you can rely on kernel_thread()
1113 * not to reschedule the child before returning, this avoids the need
1114 * of hacks for example to fork off the per-CPU idle tasks.
1115 * [Hopefully no generic code relies on the reschedule -AK]
1116 */
1117 RESTORE_ALL
1118 UNFAKE_STACK_FRAME
1119 ret
1120 CFI_ENDPROC
1121 ENDPROC(kernel_thread)
1122
1123 child_rip:
1124 pushq $0 # fake return address
1125 CFI_STARTPROC
1126 /*
1127 * Here we are in the child and the registers are set as they were
1128 * at kernel_thread() invocation in the parent.
1129 */
1130 movq %rdi, %rax
1131 movq %rsi, %rdi
1132 call *%rax
1133 # exit
1134 mov %eax, %edi
1135 call do_exit
1136 CFI_ENDPROC
1137 ENDPROC(child_rip)
1138
1139 /*
1140 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1141 *
1142 * C extern interface:
1143 * extern long execve(char *name, char **argv, char **envp)
1144 *
1145 * asm input arguments:
1146 * rdi: name, rsi: argv, rdx: envp
1147 *
1148 * We want to fallback into:
1149 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
1150 *
1151 * do_sys_execve asm fallback arguments:
1152 * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
1153 */
1154 ENTRY(kernel_execve)
1155 CFI_STARTPROC
1156 FAKE_STACK_FRAME $0
1157 SAVE_ALL
1158 movq %rsp,%rcx
1159 call sys_execve
1160 movq %rax, RAX(%rsp)
1161 RESTORE_REST
1162 testq %rax,%rax
1163 je int_ret_from_sys_call
1164 RESTORE_ARGS
1165 UNFAKE_STACK_FRAME
1166 ret
1167 CFI_ENDPROC
1168 ENDPROC(kernel_execve)
1169
1170 KPROBE_ENTRY(page_fault)
1171 errorentry do_page_fault
1172 KPROBE_END(page_fault)
1173
1174 ENTRY(coprocessor_error)
1175 zeroentry do_coprocessor_error
1176 END(coprocessor_error)
1177
1178 ENTRY(simd_coprocessor_error)
1179 zeroentry do_simd_coprocessor_error
1180 END(simd_coprocessor_error)
1181
1182 ENTRY(device_not_available)
1183 zeroentry math_state_restore
1184 END(device_not_available)
1185
1186 /* runs on exception stack */
1187 KPROBE_ENTRY(debug)
1188 INTR_FRAME
1189 pushq $0
1190 CFI_ADJUST_CFA_OFFSET 8
1191 paranoidentry do_debug, DEBUG_STACK
1192 paranoidexit
1193 KPROBE_END(debug)
1194
1195 /* runs on exception stack */
1196 KPROBE_ENTRY(nmi)
1197 INTR_FRAME
1198 pushq $-1
1199 CFI_ADJUST_CFA_OFFSET 8
1200 paranoidentry do_nmi, 0, 0
1201 #ifdef CONFIG_TRACE_IRQFLAGS
1202 paranoidexit 0
1203 #else
1204 jmp paranoid_exit1
1205 CFI_ENDPROC
1206 #endif
1207 KPROBE_END(nmi)
1208
1209 KPROBE_ENTRY(int3)
1210 INTR_FRAME
1211 pushq $0
1212 CFI_ADJUST_CFA_OFFSET 8
1213 paranoidentry do_int3, DEBUG_STACK
1214 jmp paranoid_exit1
1215 CFI_ENDPROC
1216 KPROBE_END(int3)
1217
1218 ENTRY(overflow)
1219 zeroentry do_overflow
1220 END(overflow)
1221
1222 ENTRY(bounds)
1223 zeroentry do_bounds
1224 END(bounds)
1225
1226 ENTRY(invalid_op)
1227 zeroentry do_invalid_op
1228 END(invalid_op)
1229
1230 ENTRY(coprocessor_segment_overrun)
1231 zeroentry do_coprocessor_segment_overrun
1232 END(coprocessor_segment_overrun)
1233
1234 /* runs on exception stack */
1235 ENTRY(double_fault)
1236 XCPT_FRAME
1237 paranoidentry do_double_fault
1238 jmp paranoid_exit1
1239 CFI_ENDPROC
1240 END(double_fault)
1241
1242 ENTRY(invalid_TSS)
1243 errorentry do_invalid_TSS
1244 END(invalid_TSS)
1245
1246 ENTRY(segment_not_present)
1247 errorentry do_segment_not_present
1248 END(segment_not_present)
1249
1250 /* runs on exception stack */
1251 ENTRY(stack_segment)
1252 XCPT_FRAME
1253 paranoidentry do_stack_segment
1254 jmp paranoid_exit1
1255 CFI_ENDPROC
1256 END(stack_segment)
1257
1258 KPROBE_ENTRY(general_protection)
1259 errorentry do_general_protection
1260 KPROBE_END(general_protection)
1261
1262 ENTRY(alignment_check)
1263 errorentry do_alignment_check
1264 END(alignment_check)
1265
1266 ENTRY(divide_error)
1267 zeroentry do_divide_error
1268 END(divide_error)
1269
1270 ENTRY(spurious_interrupt_bug)
1271 zeroentry do_spurious_interrupt_bug
1272 END(spurious_interrupt_bug)
1273
1274 #ifdef CONFIG_X86_MCE
1275 /* runs on exception stack */
1276 ENTRY(machine_check)
1277 INTR_FRAME
1278 pushq $0
1279 CFI_ADJUST_CFA_OFFSET 8
1280 paranoidentry do_machine_check
1281 jmp paranoid_exit1
1282 CFI_ENDPROC
1283 END(machine_check)
1284 #endif
1285
1286 /* Call softirq on interrupt stack. Interrupts are off. */
1287 ENTRY(call_softirq)
1288 CFI_STARTPROC
1289 push %rbp
1290 CFI_ADJUST_CFA_OFFSET 8
1291 CFI_REL_OFFSET rbp,0
1292 mov %rsp,%rbp
1293 CFI_DEF_CFA_REGISTER rbp
1294 incl %gs:pda_irqcount
1295 cmove %gs:pda_irqstackptr,%rsp
1296 push %rbp # backlink for old unwinder
1297 call __do_softirq
1298 leaveq
1299 CFI_DEF_CFA_REGISTER rsp
1300 CFI_ADJUST_CFA_OFFSET -8
1301 decl %gs:pda_irqcount
1302 ret
1303 CFI_ENDPROC
1304 ENDPROC(call_softirq)
1305
1306 KPROBE_ENTRY(ignore_sysret)
1307 CFI_STARTPROC
1308 mov $-ENOSYS,%eax
1309 sysret
1310 CFI_ENDPROC
1311 ENDPROC(ignore_sysret)
This page took 0.06004 seconds and 5 git commands to generate.