Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfashe...
[deliverable/linux.git] / arch / x86 / kernel / entry_64.S
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */
8
9 /*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */
39
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
47 #include <asm/msr.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
51 #include <asm/page.h>
52 #include <asm/irqflags.h>
53
54 .code64
55
56 #ifndef CONFIG_PREEMPT
57 #define retint_kernel retint_restore_args
58 #endif
59
60
61 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
62 #ifdef CONFIG_TRACE_IRQFLAGS
63 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
64 jnc 1f
65 TRACE_IRQS_ON
66 1:
67 #endif
68 .endm
69
70 /*
71 * C code is not supposed to know about undefined top of stack. Every time
72 * a C function with an pt_regs argument is called from the SYSCALL based
73 * fast path FIXUP_TOP_OF_STACK is needed.
74 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
75 * manipulation.
76 */
77
78 /* %rsp:at FRAMEEND */
79 .macro FIXUP_TOP_OF_STACK tmp
80 movq %gs:pda_oldrsp,\tmp
81 movq \tmp,RSP(%rsp)
82 movq $__USER_DS,SS(%rsp)
83 movq $__USER_CS,CS(%rsp)
84 movq $-1,RCX(%rsp)
85 movq R11(%rsp),\tmp /* get eflags */
86 movq \tmp,EFLAGS(%rsp)
87 .endm
88
89 .macro RESTORE_TOP_OF_STACK tmp,offset=0
90 movq RSP-\offset(%rsp),\tmp
91 movq \tmp,%gs:pda_oldrsp
92 movq EFLAGS-\offset(%rsp),\tmp
93 movq \tmp,R11-\offset(%rsp)
94 .endm
95
96 .macro FAKE_STACK_FRAME child_rip
97 /* push in order ss, rsp, eflags, cs, rip */
98 xorl %eax, %eax
99 pushq %rax /* ss */
100 CFI_ADJUST_CFA_OFFSET 8
101 /*CFI_REL_OFFSET ss,0*/
102 pushq %rax /* rsp */
103 CFI_ADJUST_CFA_OFFSET 8
104 CFI_REL_OFFSET rsp,0
105 pushq $(1<<9) /* eflags - interrupts on */
106 CFI_ADJUST_CFA_OFFSET 8
107 /*CFI_REL_OFFSET rflags,0*/
108 pushq $__KERNEL_CS /* cs */
109 CFI_ADJUST_CFA_OFFSET 8
110 /*CFI_REL_OFFSET cs,0*/
111 pushq \child_rip /* rip */
112 CFI_ADJUST_CFA_OFFSET 8
113 CFI_REL_OFFSET rip,0
114 pushq %rax /* orig rax */
115 CFI_ADJUST_CFA_OFFSET 8
116 .endm
117
118 .macro UNFAKE_STACK_FRAME
119 addq $8*6, %rsp
120 CFI_ADJUST_CFA_OFFSET -(6*8)
121 .endm
122
123 .macro CFI_DEFAULT_STACK start=1
124 .if \start
125 CFI_STARTPROC simple
126 CFI_SIGNAL_FRAME
127 CFI_DEF_CFA rsp,SS+8
128 .else
129 CFI_DEF_CFA_OFFSET SS+8
130 .endif
131 CFI_REL_OFFSET r15,R15
132 CFI_REL_OFFSET r14,R14
133 CFI_REL_OFFSET r13,R13
134 CFI_REL_OFFSET r12,R12
135 CFI_REL_OFFSET rbp,RBP
136 CFI_REL_OFFSET rbx,RBX
137 CFI_REL_OFFSET r11,R11
138 CFI_REL_OFFSET r10,R10
139 CFI_REL_OFFSET r9,R9
140 CFI_REL_OFFSET r8,R8
141 CFI_REL_OFFSET rax,RAX
142 CFI_REL_OFFSET rcx,RCX
143 CFI_REL_OFFSET rdx,RDX
144 CFI_REL_OFFSET rsi,RSI
145 CFI_REL_OFFSET rdi,RDI
146 CFI_REL_OFFSET rip,RIP
147 /*CFI_REL_OFFSET cs,CS*/
148 /*CFI_REL_OFFSET rflags,EFLAGS*/
149 CFI_REL_OFFSET rsp,RSP
150 /*CFI_REL_OFFSET ss,SS*/
151 .endm
152 /*
153 * A newly forked process directly context switches into this.
154 */
155 /* rdi: prev */
156 ENTRY(ret_from_fork)
157 CFI_DEFAULT_STACK
158 push kernel_eflags(%rip)
159 CFI_ADJUST_CFA_OFFSET 4
160 popf # reset kernel eflags
161 CFI_ADJUST_CFA_OFFSET -4
162 call schedule_tail
163 GET_THREAD_INFO(%rcx)
164 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
165 jnz rff_trace
166 rff_action:
167 RESTORE_REST
168 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
169 je int_ret_from_sys_call
170 testl $_TIF_IA32,threadinfo_flags(%rcx)
171 jnz int_ret_from_sys_call
172 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
173 jmp ret_from_sys_call
174 rff_trace:
175 movq %rsp,%rdi
176 call syscall_trace_leave
177 GET_THREAD_INFO(%rcx)
178 jmp rff_action
179 CFI_ENDPROC
180 END(ret_from_fork)
181
182 /*
183 * System call entry. Upto 6 arguments in registers are supported.
184 *
185 * SYSCALL does not save anything on the stack and does not change the
186 * stack pointer.
187 */
188
189 /*
190 * Register setup:
191 * rax system call number
192 * rdi arg0
193 * rcx return address for syscall/sysret, C arg3
194 * rsi arg1
195 * rdx arg2
196 * r10 arg3 (--> moved to rcx for C)
197 * r8 arg4
198 * r9 arg5
199 * r11 eflags for syscall/sysret, temporary for C
200 * r12-r15,rbp,rbx saved by C code, not touched.
201 *
202 * Interrupts are off on entry.
203 * Only called from user space.
204 *
205 * XXX if we had a free scratch register we could save the RSP into the stack frame
206 * and report it properly in ps. Unfortunately we haven't.
207 *
208 * When user can change the frames always force IRET. That is because
209 * it deals with uncanonical addresses better. SYSRET has trouble
210 * with them due to bugs in both AMD and Intel CPUs.
211 */
212
213 ENTRY(system_call)
214 CFI_STARTPROC simple
215 CFI_SIGNAL_FRAME
216 CFI_DEF_CFA rsp,PDA_STACKOFFSET
217 CFI_REGISTER rip,rcx
218 /*CFI_REGISTER rflags,r11*/
219 swapgs
220 movq %rsp,%gs:pda_oldrsp
221 movq %gs:pda_kernelstack,%rsp
222 /*
223 * No need to follow this irqs off/on section - it's straight
224 * and short:
225 */
226 sti
227 SAVE_ARGS 8,1
228 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
229 movq %rcx,RIP-ARGOFFSET(%rsp)
230 CFI_REL_OFFSET rip,RIP-ARGOFFSET
231 GET_THREAD_INFO(%rcx)
232 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
233 jnz tracesys
234 cmpq $__NR_syscall_max,%rax
235 ja badsys
236 movq %r10,%rcx
237 call *sys_call_table(,%rax,8) # XXX: rip relative
238 movq %rax,RAX-ARGOFFSET(%rsp)
239 /*
240 * Syscall return path ending with SYSRET (fast path)
241 * Has incomplete stack frame and undefined top of stack.
242 */
243 ret_from_sys_call:
244 movl $_TIF_ALLWORK_MASK,%edi
245 /* edi: flagmask */
246 sysret_check:
247 LOCKDEP_SYS_EXIT
248 GET_THREAD_INFO(%rcx)
249 cli
250 TRACE_IRQS_OFF
251 movl threadinfo_flags(%rcx),%edx
252 andl %edi,%edx
253 jnz sysret_careful
254 CFI_REMEMBER_STATE
255 /*
256 * sysretq will re-enable interrupts:
257 */
258 TRACE_IRQS_ON
259 movq RIP-ARGOFFSET(%rsp),%rcx
260 CFI_REGISTER rip,rcx
261 RESTORE_ARGS 0,-ARG_SKIP,1
262 /*CFI_REGISTER rflags,r11*/
263 movq %gs:pda_oldrsp,%rsp
264 swapgs
265 sysretq
266
267 CFI_RESTORE_STATE
268 /* Handle reschedules */
269 /* edx: work, edi: workmask */
270 sysret_careful:
271 bt $TIF_NEED_RESCHED,%edx
272 jnc sysret_signal
273 TRACE_IRQS_ON
274 sti
275 pushq %rdi
276 CFI_ADJUST_CFA_OFFSET 8
277 call schedule
278 popq %rdi
279 CFI_ADJUST_CFA_OFFSET -8
280 jmp sysret_check
281
282 /* Handle a signal */
283 sysret_signal:
284 TRACE_IRQS_ON
285 sti
286 testl $_TIF_DO_NOTIFY_MASK,%edx
287 jz 1f
288
289 /* Really a signal */
290 /* edx: work flags (arg3) */
291 leaq do_notify_resume(%rip),%rax
292 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
293 xorl %esi,%esi # oldset -> arg2
294 call ptregscall_common
295 1: movl $_TIF_NEED_RESCHED,%edi
296 /* Use IRET because user could have changed frame. This
297 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
298 cli
299 TRACE_IRQS_OFF
300 jmp int_with_check
301
302 badsys:
303 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
304 jmp ret_from_sys_call
305
306 /* Do syscall tracing */
307 tracesys:
308 SAVE_REST
309 movq $-ENOSYS,RAX(%rsp)
310 FIXUP_TOP_OF_STACK %rdi
311 movq %rsp,%rdi
312 call syscall_trace_enter
313 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
314 RESTORE_REST
315 cmpq $__NR_syscall_max,%rax
316 movq $-ENOSYS,%rcx
317 cmova %rcx,%rax
318 ja 1f
319 movq %r10,%rcx /* fixup for C */
320 call *sys_call_table(,%rax,8)
321 1: movq %rax,RAX-ARGOFFSET(%rsp)
322 /* Use IRET because user could have changed frame */
323
324 /*
325 * Syscall return path ending with IRET.
326 * Has correct top of stack, but partial stack frame.
327 */
328 .globl int_ret_from_sys_call
329 int_ret_from_sys_call:
330 cli
331 TRACE_IRQS_OFF
332 testl $3,CS-ARGOFFSET(%rsp)
333 je retint_restore_args
334 movl $_TIF_ALLWORK_MASK,%edi
335 /* edi: mask to check */
336 int_with_check:
337 LOCKDEP_SYS_EXIT_IRQ
338 GET_THREAD_INFO(%rcx)
339 movl threadinfo_flags(%rcx),%edx
340 andl %edi,%edx
341 jnz int_careful
342 andl $~TS_COMPAT,threadinfo_status(%rcx)
343 jmp retint_swapgs
344
345 /* Either reschedule or signal or syscall exit tracking needed. */
346 /* First do a reschedule test. */
347 /* edx: work, edi: workmask */
348 int_careful:
349 bt $TIF_NEED_RESCHED,%edx
350 jnc int_very_careful
351 TRACE_IRQS_ON
352 sti
353 pushq %rdi
354 CFI_ADJUST_CFA_OFFSET 8
355 call schedule
356 popq %rdi
357 CFI_ADJUST_CFA_OFFSET -8
358 cli
359 TRACE_IRQS_OFF
360 jmp int_with_check
361
362 /* handle signals and tracing -- both require a full stack frame */
363 int_very_careful:
364 TRACE_IRQS_ON
365 sti
366 SAVE_REST
367 /* Check for syscall exit trace */
368 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
369 jz int_signal
370 pushq %rdi
371 CFI_ADJUST_CFA_OFFSET 8
372 leaq 8(%rsp),%rdi # &ptregs -> arg1
373 call syscall_trace_leave
374 popq %rdi
375 CFI_ADJUST_CFA_OFFSET -8
376 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
377 jmp int_restore_rest
378
379 int_signal:
380 testl $_TIF_DO_NOTIFY_MASK,%edx
381 jz 1f
382 movq %rsp,%rdi # &ptregs -> arg1
383 xorl %esi,%esi # oldset -> arg2
384 call do_notify_resume
385 1: movl $_TIF_NEED_RESCHED,%edi
386 int_restore_rest:
387 RESTORE_REST
388 cli
389 TRACE_IRQS_OFF
390 jmp int_with_check
391 CFI_ENDPROC
392 END(system_call)
393
394 /*
395 * Certain special system calls that need to save a complete full stack frame.
396 */
397
398 .macro PTREGSCALL label,func,arg
399 .globl \label
400 \label:
401 leaq \func(%rip),%rax
402 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
403 jmp ptregscall_common
404 END(\label)
405 .endm
406
407 CFI_STARTPROC
408
409 PTREGSCALL stub_clone, sys_clone, %r8
410 PTREGSCALL stub_fork, sys_fork, %rdi
411 PTREGSCALL stub_vfork, sys_vfork, %rdi
412 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
413 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
414 PTREGSCALL stub_iopl, sys_iopl, %rsi
415
416 ENTRY(ptregscall_common)
417 popq %r11
418 CFI_ADJUST_CFA_OFFSET -8
419 CFI_REGISTER rip, r11
420 SAVE_REST
421 movq %r11, %r15
422 CFI_REGISTER rip, r15
423 FIXUP_TOP_OF_STACK %r11
424 call *%rax
425 RESTORE_TOP_OF_STACK %r11
426 movq %r15, %r11
427 CFI_REGISTER rip, r11
428 RESTORE_REST
429 pushq %r11
430 CFI_ADJUST_CFA_OFFSET 8
431 CFI_REL_OFFSET rip, 0
432 ret
433 CFI_ENDPROC
434 END(ptregscall_common)
435
436 ENTRY(stub_execve)
437 CFI_STARTPROC
438 popq %r11
439 CFI_ADJUST_CFA_OFFSET -8
440 CFI_REGISTER rip, r11
441 SAVE_REST
442 FIXUP_TOP_OF_STACK %r11
443 call sys_execve
444 RESTORE_TOP_OF_STACK %r11
445 movq %rax,RAX(%rsp)
446 RESTORE_REST
447 jmp int_ret_from_sys_call
448 CFI_ENDPROC
449 END(stub_execve)
450
451 /*
452 * sigreturn is special because it needs to restore all registers on return.
453 * This cannot be done with SYSRET, so use the IRET return path instead.
454 */
455 ENTRY(stub_rt_sigreturn)
456 CFI_STARTPROC
457 addq $8, %rsp
458 CFI_ADJUST_CFA_OFFSET -8
459 SAVE_REST
460 movq %rsp,%rdi
461 FIXUP_TOP_OF_STACK %r11
462 call sys_rt_sigreturn
463 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
464 RESTORE_REST
465 jmp int_ret_from_sys_call
466 CFI_ENDPROC
467 END(stub_rt_sigreturn)
468
469 /*
470 * initial frame state for interrupts and exceptions
471 */
472 .macro _frame ref
473 CFI_STARTPROC simple
474 CFI_SIGNAL_FRAME
475 CFI_DEF_CFA rsp,SS+8-\ref
476 /*CFI_REL_OFFSET ss,SS-\ref*/
477 CFI_REL_OFFSET rsp,RSP-\ref
478 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
479 /*CFI_REL_OFFSET cs,CS-\ref*/
480 CFI_REL_OFFSET rip,RIP-\ref
481 .endm
482
483 /* initial frame state for interrupts (and exceptions without error code) */
484 #define INTR_FRAME _frame RIP
485 /* initial frame state for exceptions with error code (and interrupts with
486 vector already pushed) */
487 #define XCPT_FRAME _frame ORIG_RAX
488
489 /*
490 * Interrupt entry/exit.
491 *
492 * Interrupt entry points save only callee clobbered registers in fast path.
493 *
494 * Entry runs with interrupts off.
495 */
496
497 /* 0(%rsp): interrupt number */
498 .macro interrupt func
499 cld
500 SAVE_ARGS
501 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
502 pushq %rbp
503 CFI_ADJUST_CFA_OFFSET 8
504 CFI_REL_OFFSET rbp, 0
505 movq %rsp,%rbp
506 CFI_DEF_CFA_REGISTER rbp
507 testl $3,CS(%rdi)
508 je 1f
509 swapgs
510 /* irqcount is used to check if a CPU is already on an interrupt
511 stack or not. While this is essentially redundant with preempt_count
512 it is a little cheaper to use a separate counter in the PDA
513 (short of moving irq_enter into assembly, which would be too
514 much work) */
515 1: incl %gs:pda_irqcount
516 cmoveq %gs:pda_irqstackptr,%rsp
517 push %rbp # backlink for old unwinder
518 /*
519 * We entered an interrupt context - irqs are off:
520 */
521 TRACE_IRQS_OFF
522 call \func
523 .endm
524
525 ENTRY(common_interrupt)
526 XCPT_FRAME
527 interrupt do_IRQ
528 /* 0(%rsp): oldrsp-ARGOFFSET */
529 ret_from_intr:
530 cli
531 TRACE_IRQS_OFF
532 decl %gs:pda_irqcount
533 leaveq
534 CFI_DEF_CFA_REGISTER rsp
535 CFI_ADJUST_CFA_OFFSET -8
536 exit_intr:
537 GET_THREAD_INFO(%rcx)
538 testl $3,CS-ARGOFFSET(%rsp)
539 je retint_kernel
540
541 /* Interrupt came from user space */
542 /*
543 * Has a correct top of stack, but a partial stack frame
544 * %rcx: thread info. Interrupts off.
545 */
546 retint_with_reschedule:
547 movl $_TIF_WORK_MASK,%edi
548 retint_check:
549 LOCKDEP_SYS_EXIT_IRQ
550 movl threadinfo_flags(%rcx),%edx
551 andl %edi,%edx
552 CFI_REMEMBER_STATE
553 jnz retint_careful
554
555 retint_swapgs: /* return to user-space */
556 /*
557 * The iretq could re-enable interrupts:
558 */
559 cli
560 TRACE_IRQS_IRETQ
561 swapgs
562 jmp restore_args
563
564 retint_restore_args: /* return to kernel space */
565 cli
566 /*
567 * The iretq could re-enable interrupts:
568 */
569 TRACE_IRQS_IRETQ
570 restore_args:
571 RESTORE_ARGS 0,8,0
572 iret_label:
573 iretq
574
575 .section __ex_table,"a"
576 .quad iret_label,bad_iret
577 .previous
578 .section .fixup,"ax"
579 /* force a signal here? this matches i386 behaviour */
580 /* running with kernel gs */
581 bad_iret:
582 movq $11,%rdi /* SIGSEGV */
583 TRACE_IRQS_ON
584 sti
585 jmp do_exit
586 .previous
587
588 /* edi: workmask, edx: work */
589 retint_careful:
590 CFI_RESTORE_STATE
591 bt $TIF_NEED_RESCHED,%edx
592 jnc retint_signal
593 TRACE_IRQS_ON
594 sti
595 pushq %rdi
596 CFI_ADJUST_CFA_OFFSET 8
597 call schedule
598 popq %rdi
599 CFI_ADJUST_CFA_OFFSET -8
600 GET_THREAD_INFO(%rcx)
601 cli
602 TRACE_IRQS_OFF
603 jmp retint_check
604
605 retint_signal:
606 testl $_TIF_DO_NOTIFY_MASK,%edx
607 jz retint_swapgs
608 TRACE_IRQS_ON
609 sti
610 SAVE_REST
611 movq $-1,ORIG_RAX(%rsp)
612 xorl %esi,%esi # oldset
613 movq %rsp,%rdi # &pt_regs
614 call do_notify_resume
615 RESTORE_REST
616 cli
617 TRACE_IRQS_OFF
618 movl $_TIF_NEED_RESCHED,%edi
619 GET_THREAD_INFO(%rcx)
620 jmp retint_check
621
622 #ifdef CONFIG_PREEMPT
623 /* Returning to kernel space. Check if we need preemption */
624 /* rcx: threadinfo. interrupts off. */
625 ENTRY(retint_kernel)
626 cmpl $0,threadinfo_preempt_count(%rcx)
627 jnz retint_restore_args
628 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
629 jnc retint_restore_args
630 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
631 jnc retint_restore_args
632 call preempt_schedule_irq
633 jmp exit_intr
634 #endif
635
636 CFI_ENDPROC
637 END(common_interrupt)
638
639 /*
640 * APIC interrupts.
641 */
642 .macro apicinterrupt num,func
643 INTR_FRAME
644 pushq $~(\num)
645 CFI_ADJUST_CFA_OFFSET 8
646 interrupt \func
647 jmp ret_from_intr
648 CFI_ENDPROC
649 .endm
650
651 ENTRY(thermal_interrupt)
652 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
653 END(thermal_interrupt)
654
655 ENTRY(threshold_interrupt)
656 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
657 END(threshold_interrupt)
658
659 #ifdef CONFIG_SMP
660 ENTRY(reschedule_interrupt)
661 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
662 END(reschedule_interrupt)
663
664 .macro INVALIDATE_ENTRY num
665 ENTRY(invalidate_interrupt\num)
666 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
667 END(invalidate_interrupt\num)
668 .endm
669
670 INVALIDATE_ENTRY 0
671 INVALIDATE_ENTRY 1
672 INVALIDATE_ENTRY 2
673 INVALIDATE_ENTRY 3
674 INVALIDATE_ENTRY 4
675 INVALIDATE_ENTRY 5
676 INVALIDATE_ENTRY 6
677 INVALIDATE_ENTRY 7
678
679 ENTRY(call_function_interrupt)
680 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
681 END(call_function_interrupt)
682 ENTRY(irq_move_cleanup_interrupt)
683 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
684 END(irq_move_cleanup_interrupt)
685 #endif
686
687 ENTRY(apic_timer_interrupt)
688 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
689 END(apic_timer_interrupt)
690
691 ENTRY(error_interrupt)
692 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
693 END(error_interrupt)
694
695 ENTRY(spurious_interrupt)
696 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
697 END(spurious_interrupt)
698
699 /*
700 * Exception entry points.
701 */
702 .macro zeroentry sym
703 INTR_FRAME
704 pushq $0 /* push error code/oldrax */
705 CFI_ADJUST_CFA_OFFSET 8
706 pushq %rax /* push real oldrax to the rdi slot */
707 CFI_ADJUST_CFA_OFFSET 8
708 CFI_REL_OFFSET rax,0
709 leaq \sym(%rip),%rax
710 jmp error_entry
711 CFI_ENDPROC
712 .endm
713
714 .macro errorentry sym
715 XCPT_FRAME
716 pushq %rax
717 CFI_ADJUST_CFA_OFFSET 8
718 CFI_REL_OFFSET rax,0
719 leaq \sym(%rip),%rax
720 jmp error_entry
721 CFI_ENDPROC
722 .endm
723
724 /* error code is on the stack already */
725 /* handle NMI like exceptions that can happen everywhere */
726 .macro paranoidentry sym, ist=0, irqtrace=1
727 SAVE_ALL
728 cld
729 movl $1,%ebx
730 movl $MSR_GS_BASE,%ecx
731 rdmsr
732 testl %edx,%edx
733 js 1f
734 swapgs
735 xorl %ebx,%ebx
736 1:
737 .if \ist
738 movq %gs:pda_data_offset, %rbp
739 .endif
740 movq %rsp,%rdi
741 movq ORIG_RAX(%rsp),%rsi
742 movq $-1,ORIG_RAX(%rsp)
743 .if \ist
744 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
745 .endif
746 call \sym
747 .if \ist
748 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
749 .endif
750 cli
751 .if \irqtrace
752 TRACE_IRQS_OFF
753 .endif
754 .endm
755
756 /*
757 * "Paranoid" exit path from exception stack.
758 * Paranoid because this is used by NMIs and cannot take
759 * any kernel state for granted.
760 * We don't do kernel preemption checks here, because only
761 * NMI should be common and it does not enable IRQs and
762 * cannot get reschedule ticks.
763 *
764 * "trace" is 0 for the NMI handler only, because irq-tracing
765 * is fundamentally NMI-unsafe. (we cannot change the soft and
766 * hard flags at once, atomically)
767 */
768 .macro paranoidexit trace=1
769 /* ebx: no swapgs flag */
770 paranoid_exit\trace:
771 testl %ebx,%ebx /* swapgs needed? */
772 jnz paranoid_restore\trace
773 testl $3,CS(%rsp)
774 jnz paranoid_userspace\trace
775 paranoid_swapgs\trace:
776 .if \trace
777 TRACE_IRQS_IRETQ 0
778 .endif
779 swapgs
780 paranoid_restore\trace:
781 RESTORE_ALL 8
782 iretq
783 paranoid_userspace\trace:
784 GET_THREAD_INFO(%rcx)
785 movl threadinfo_flags(%rcx),%ebx
786 andl $_TIF_WORK_MASK,%ebx
787 jz paranoid_swapgs\trace
788 movq %rsp,%rdi /* &pt_regs */
789 call sync_regs
790 movq %rax,%rsp /* switch stack for scheduling */
791 testl $_TIF_NEED_RESCHED,%ebx
792 jnz paranoid_schedule\trace
793 movl %ebx,%edx /* arg3: thread flags */
794 .if \trace
795 TRACE_IRQS_ON
796 .endif
797 sti
798 xorl %esi,%esi /* arg2: oldset */
799 movq %rsp,%rdi /* arg1: &pt_regs */
800 call do_notify_resume
801 cli
802 .if \trace
803 TRACE_IRQS_OFF
804 .endif
805 jmp paranoid_userspace\trace
806 paranoid_schedule\trace:
807 .if \trace
808 TRACE_IRQS_ON
809 .endif
810 sti
811 call schedule
812 cli
813 .if \trace
814 TRACE_IRQS_OFF
815 .endif
816 jmp paranoid_userspace\trace
817 CFI_ENDPROC
818 .endm
819
820 /*
821 * Exception entry point. This expects an error code/orig_rax on the stack
822 * and the exception handler in %rax.
823 */
824 KPROBE_ENTRY(error_entry)
825 _frame RDI
826 CFI_REL_OFFSET rax,0
827 /* rdi slot contains rax, oldrax contains error code */
828 cld
829 subq $14*8,%rsp
830 CFI_ADJUST_CFA_OFFSET (14*8)
831 movq %rsi,13*8(%rsp)
832 CFI_REL_OFFSET rsi,RSI
833 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
834 CFI_REGISTER rax,rsi
835 movq %rdx,12*8(%rsp)
836 CFI_REL_OFFSET rdx,RDX
837 movq %rcx,11*8(%rsp)
838 CFI_REL_OFFSET rcx,RCX
839 movq %rsi,10*8(%rsp) /* store rax */
840 CFI_REL_OFFSET rax,RAX
841 movq %r8, 9*8(%rsp)
842 CFI_REL_OFFSET r8,R8
843 movq %r9, 8*8(%rsp)
844 CFI_REL_OFFSET r9,R9
845 movq %r10,7*8(%rsp)
846 CFI_REL_OFFSET r10,R10
847 movq %r11,6*8(%rsp)
848 CFI_REL_OFFSET r11,R11
849 movq %rbx,5*8(%rsp)
850 CFI_REL_OFFSET rbx,RBX
851 movq %rbp,4*8(%rsp)
852 CFI_REL_OFFSET rbp,RBP
853 movq %r12,3*8(%rsp)
854 CFI_REL_OFFSET r12,R12
855 movq %r13,2*8(%rsp)
856 CFI_REL_OFFSET r13,R13
857 movq %r14,1*8(%rsp)
858 CFI_REL_OFFSET r14,R14
859 movq %r15,(%rsp)
860 CFI_REL_OFFSET r15,R15
861 xorl %ebx,%ebx
862 testl $3,CS(%rsp)
863 je error_kernelspace
864 error_swapgs:
865 swapgs
866 error_sti:
867 movq %rdi,RDI(%rsp)
868 CFI_REL_OFFSET rdi,RDI
869 movq %rsp,%rdi
870 movq ORIG_RAX(%rsp),%rsi /* get error code */
871 movq $-1,ORIG_RAX(%rsp)
872 call *%rax
873 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
874 error_exit:
875 movl %ebx,%eax
876 RESTORE_REST
877 cli
878 TRACE_IRQS_OFF
879 GET_THREAD_INFO(%rcx)
880 testl %eax,%eax
881 jne retint_kernel
882 LOCKDEP_SYS_EXIT_IRQ
883 movl threadinfo_flags(%rcx),%edx
884 movl $_TIF_WORK_MASK,%edi
885 andl %edi,%edx
886 jnz retint_careful
887 jmp retint_swapgs
888 CFI_ENDPROC
889
890 error_kernelspace:
891 incl %ebx
892 /* There are two places in the kernel that can potentially fault with
893 usergs. Handle them here. The exception handlers after
894 iret run with kernel gs again, so don't set the user space flag.
895 B stepping K8s sometimes report an truncated RIP for IRET
896 exceptions returning to compat mode. Check for these here too. */
897 leaq iret_label(%rip),%rbp
898 cmpq %rbp,RIP(%rsp)
899 je error_swapgs
900 movl %ebp,%ebp /* zero extend */
901 cmpq %rbp,RIP(%rsp)
902 je error_swapgs
903 cmpq $gs_change,RIP(%rsp)
904 je error_swapgs
905 jmp error_sti
906 KPROBE_END(error_entry)
907
908 /* Reload gs selector with exception handling */
909 /* edi: new selector */
910 ENTRY(load_gs_index)
911 CFI_STARTPROC
912 pushf
913 CFI_ADJUST_CFA_OFFSET 8
914 cli
915 swapgs
916 gs_change:
917 movl %edi,%gs
918 2: mfence /* workaround */
919 swapgs
920 popf
921 CFI_ADJUST_CFA_OFFSET -8
922 ret
923 CFI_ENDPROC
924 ENDPROC(load_gs_index)
925
926 .section __ex_table,"a"
927 .align 8
928 .quad gs_change,bad_gs
929 .previous
930 .section .fixup,"ax"
931 /* running with kernelgs */
932 bad_gs:
933 swapgs /* switch back to user gs */
934 xorl %eax,%eax
935 movl %eax,%gs
936 jmp 2b
937 .previous
938
939 /*
940 * Create a kernel thread.
941 *
942 * C extern interface:
943 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
944 *
945 * asm input arguments:
946 * rdi: fn, rsi: arg, rdx: flags
947 */
948 ENTRY(kernel_thread)
949 CFI_STARTPROC
950 FAKE_STACK_FRAME $child_rip
951 SAVE_ALL
952
953 # rdi: flags, rsi: usp, rdx: will be &pt_regs
954 movq %rdx,%rdi
955 orq kernel_thread_flags(%rip),%rdi
956 movq $-1, %rsi
957 movq %rsp, %rdx
958
959 xorl %r8d,%r8d
960 xorl %r9d,%r9d
961
962 # clone now
963 call do_fork
964 movq %rax,RAX(%rsp)
965 xorl %edi,%edi
966
967 /*
968 * It isn't worth to check for reschedule here,
969 * so internally to the x86_64 port you can rely on kernel_thread()
970 * not to reschedule the child before returning, this avoids the need
971 * of hacks for example to fork off the per-CPU idle tasks.
972 * [Hopefully no generic code relies on the reschedule -AK]
973 */
974 RESTORE_ALL
975 UNFAKE_STACK_FRAME
976 ret
977 CFI_ENDPROC
978 ENDPROC(kernel_thread)
979
980 child_rip:
981 pushq $0 # fake return address
982 CFI_STARTPROC
983 /*
984 * Here we are in the child and the registers are set as they were
985 * at kernel_thread() invocation in the parent.
986 */
987 movq %rdi, %rax
988 movq %rsi, %rdi
989 call *%rax
990 # exit
991 mov %eax, %edi
992 call do_exit
993 CFI_ENDPROC
994 ENDPROC(child_rip)
995
996 /*
997 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
998 *
999 * C extern interface:
1000 * extern long execve(char *name, char **argv, char **envp)
1001 *
1002 * asm input arguments:
1003 * rdi: name, rsi: argv, rdx: envp
1004 *
1005 * We want to fallback into:
1006 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
1007 *
1008 * do_sys_execve asm fallback arguments:
1009 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1010 */
1011 ENTRY(kernel_execve)
1012 CFI_STARTPROC
1013 FAKE_STACK_FRAME $0
1014 SAVE_ALL
1015 call sys_execve
1016 movq %rax, RAX(%rsp)
1017 RESTORE_REST
1018 testq %rax,%rax
1019 je int_ret_from_sys_call
1020 RESTORE_ARGS
1021 UNFAKE_STACK_FRAME
1022 ret
1023 CFI_ENDPROC
1024 ENDPROC(kernel_execve)
1025
1026 KPROBE_ENTRY(page_fault)
1027 errorentry do_page_fault
1028 KPROBE_END(page_fault)
1029
1030 ENTRY(coprocessor_error)
1031 zeroentry do_coprocessor_error
1032 END(coprocessor_error)
1033
1034 ENTRY(simd_coprocessor_error)
1035 zeroentry do_simd_coprocessor_error
1036 END(simd_coprocessor_error)
1037
1038 ENTRY(device_not_available)
1039 zeroentry math_state_restore
1040 END(device_not_available)
1041
1042 /* runs on exception stack */
1043 KPROBE_ENTRY(debug)
1044 INTR_FRAME
1045 pushq $0
1046 CFI_ADJUST_CFA_OFFSET 8
1047 paranoidentry do_debug, DEBUG_STACK
1048 paranoidexit
1049 KPROBE_END(debug)
1050
1051 /* runs on exception stack */
1052 KPROBE_ENTRY(nmi)
1053 INTR_FRAME
1054 pushq $-1
1055 CFI_ADJUST_CFA_OFFSET 8
1056 paranoidentry do_nmi, 0, 0
1057 #ifdef CONFIG_TRACE_IRQFLAGS
1058 paranoidexit 0
1059 #else
1060 jmp paranoid_exit1
1061 CFI_ENDPROC
1062 #endif
1063 KPROBE_END(nmi)
1064
1065 KPROBE_ENTRY(int3)
1066 INTR_FRAME
1067 pushq $0
1068 CFI_ADJUST_CFA_OFFSET 8
1069 paranoidentry do_int3, DEBUG_STACK
1070 jmp paranoid_exit1
1071 CFI_ENDPROC
1072 KPROBE_END(int3)
1073
1074 ENTRY(overflow)
1075 zeroentry do_overflow
1076 END(overflow)
1077
1078 ENTRY(bounds)
1079 zeroentry do_bounds
1080 END(bounds)
1081
1082 ENTRY(invalid_op)
1083 zeroentry do_invalid_op
1084 END(invalid_op)
1085
1086 ENTRY(coprocessor_segment_overrun)
1087 zeroentry do_coprocessor_segment_overrun
1088 END(coprocessor_segment_overrun)
1089
1090 ENTRY(reserved)
1091 zeroentry do_reserved
1092 END(reserved)
1093
1094 /* runs on exception stack */
1095 ENTRY(double_fault)
1096 XCPT_FRAME
1097 paranoidentry do_double_fault
1098 jmp paranoid_exit1
1099 CFI_ENDPROC
1100 END(double_fault)
1101
1102 ENTRY(invalid_TSS)
1103 errorentry do_invalid_TSS
1104 END(invalid_TSS)
1105
1106 ENTRY(segment_not_present)
1107 errorentry do_segment_not_present
1108 END(segment_not_present)
1109
1110 /* runs on exception stack */
1111 ENTRY(stack_segment)
1112 XCPT_FRAME
1113 paranoidentry do_stack_segment
1114 jmp paranoid_exit1
1115 CFI_ENDPROC
1116 END(stack_segment)
1117
1118 KPROBE_ENTRY(general_protection)
1119 errorentry do_general_protection
1120 KPROBE_END(general_protection)
1121
1122 ENTRY(alignment_check)
1123 errorentry do_alignment_check
1124 END(alignment_check)
1125
1126 ENTRY(divide_error)
1127 zeroentry do_divide_error
1128 END(divide_error)
1129
1130 ENTRY(spurious_interrupt_bug)
1131 zeroentry do_spurious_interrupt_bug
1132 END(spurious_interrupt_bug)
1133
1134 #ifdef CONFIG_X86_MCE
1135 /* runs on exception stack */
1136 ENTRY(machine_check)
1137 INTR_FRAME
1138 pushq $0
1139 CFI_ADJUST_CFA_OFFSET 8
1140 paranoidentry do_machine_check
1141 jmp paranoid_exit1
1142 CFI_ENDPROC
1143 END(machine_check)
1144 #endif
1145
1146 /* Call softirq on interrupt stack. Interrupts are off. */
1147 ENTRY(call_softirq)
1148 CFI_STARTPROC
1149 push %rbp
1150 CFI_ADJUST_CFA_OFFSET 8
1151 CFI_REL_OFFSET rbp,0
1152 mov %rsp,%rbp
1153 CFI_DEF_CFA_REGISTER rbp
1154 incl %gs:pda_irqcount
1155 cmove %gs:pda_irqstackptr,%rsp
1156 push %rbp # backlink for old unwinder
1157 call __do_softirq
1158 leaveq
1159 CFI_DEF_CFA_REGISTER rsp
1160 CFI_ADJUST_CFA_OFFSET -8
1161 decl %gs:pda_irqcount
1162 ret
1163 CFI_ENDPROC
1164 ENDPROC(call_softirq)
1165
1166 KPROBE_ENTRY(ignore_sysret)
1167 CFI_STARTPROC
1168 mov $-ENOSYS,%eax
1169 sysret
1170 CFI_ENDPROC
1171 ENDPROC(ignore_sysret)
This page took 0.054467 seconds and 6 git commands to generate.