[PATCH] Fix a irqcount comment in entry.S
[deliverable/linux.git] / arch / x86_64 / kernel / entry.S
1 /*
2 * linux/arch/x86_64/entry.S
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
6 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
7 */
8
9 /*
10 * entry.S contains the system-call and fault low-level handling routines.
11 *
12 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call.
14 *
15 * Normal syscalls and interrupts don't save a full stack frame, this is
16 * only done for syscall tracing, signals or fork/exec et.al.
17 *
18 * A note on terminology:
19 * - top of stack: Architecture defined interrupt frame from SS to RIP
20 * at the top of the kernel process stack.
21 * - partial stack frame: partially saved registers upto R11.
22 * - full stack frame: Like partial stack frame, but all register saved.
23 *
24 * Some macro usage:
25 * - CFI macros are used to generate dwarf2 unwind information for better
26 * backtraces. They don't change any code.
27 * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
28 * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
29 * There are unfortunately lots of special cases where some registers
30 * not touched. The macro is a big mess that should be cleaned up.
31 * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
32 * Gives a full stack frame.
33 * - ENTRY/END Define functions in the symbol table.
34 * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
35 * frame that is otherwise undefined after a SYSCALL
36 * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
37 * - errorentry/paranoidentry/zeroentry - Define exception entry points.
38 */
39
40 #include <linux/linkage.h>
41 #include <asm/segment.h>
42 #include <asm/cache.h>
43 #include <asm/errno.h>
44 #include <asm/dwarf2.h>
45 #include <asm/calling.h>
46 #include <asm/asm-offsets.h>
47 #include <asm/msr.h>
48 #include <asm/unistd.h>
49 #include <asm/thread_info.h>
50 #include <asm/hw_irq.h>
51 #include <asm/page.h>
52 #include <asm/irqflags.h>
53
54 .code64
55
56 #ifndef CONFIG_PREEMPT
57 #define retint_kernel retint_restore_args
58 #endif
59
60
61 .macro TRACE_IRQS_IRETQ offset=ARGOFFSET
62 #ifdef CONFIG_TRACE_IRQFLAGS
63 bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
64 jnc 1f
65 TRACE_IRQS_ON
66 1:
67 #endif
68 .endm
69
70 /*
71 * C code is not supposed to know about undefined top of stack. Every time
72 * a C function with an pt_regs argument is called from the SYSCALL based
73 * fast path FIXUP_TOP_OF_STACK is needed.
74 * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
75 * manipulation.
76 */
77
78 /* %rsp:at FRAMEEND */
79 .macro FIXUP_TOP_OF_STACK tmp
80 movq %gs:pda_oldrsp,\tmp
81 movq \tmp,RSP(%rsp)
82 movq $__USER_DS,SS(%rsp)
83 movq $__USER_CS,CS(%rsp)
84 movq $-1,RCX(%rsp)
85 movq R11(%rsp),\tmp /* get eflags */
86 movq \tmp,EFLAGS(%rsp)
87 .endm
88
89 .macro RESTORE_TOP_OF_STACK tmp,offset=0
90 movq RSP-\offset(%rsp),\tmp
91 movq \tmp,%gs:pda_oldrsp
92 movq EFLAGS-\offset(%rsp),\tmp
93 movq \tmp,R11-\offset(%rsp)
94 .endm
95
96 .macro FAKE_STACK_FRAME child_rip
97 /* push in order ss, rsp, eflags, cs, rip */
98 xorl %eax, %eax
99 pushq %rax /* ss */
100 CFI_ADJUST_CFA_OFFSET 8
101 /*CFI_REL_OFFSET ss,0*/
102 pushq %rax /* rsp */
103 CFI_ADJUST_CFA_OFFSET 8
104 CFI_REL_OFFSET rsp,0
105 pushq $(1<<9) /* eflags - interrupts on */
106 CFI_ADJUST_CFA_OFFSET 8
107 /*CFI_REL_OFFSET rflags,0*/
108 pushq $__KERNEL_CS /* cs */
109 CFI_ADJUST_CFA_OFFSET 8
110 /*CFI_REL_OFFSET cs,0*/
111 pushq \child_rip /* rip */
112 CFI_ADJUST_CFA_OFFSET 8
113 CFI_REL_OFFSET rip,0
114 pushq %rax /* orig rax */
115 CFI_ADJUST_CFA_OFFSET 8
116 .endm
117
118 .macro UNFAKE_STACK_FRAME
119 addq $8*6, %rsp
120 CFI_ADJUST_CFA_OFFSET -(6*8)
121 .endm
122
123 .macro CFI_DEFAULT_STACK start=1
124 .if \start
125 CFI_STARTPROC simple
126 CFI_DEF_CFA rsp,SS+8
127 .else
128 CFI_DEF_CFA_OFFSET SS+8
129 .endif
130 CFI_REL_OFFSET r15,R15
131 CFI_REL_OFFSET r14,R14
132 CFI_REL_OFFSET r13,R13
133 CFI_REL_OFFSET r12,R12
134 CFI_REL_OFFSET rbp,RBP
135 CFI_REL_OFFSET rbx,RBX
136 CFI_REL_OFFSET r11,R11
137 CFI_REL_OFFSET r10,R10
138 CFI_REL_OFFSET r9,R9
139 CFI_REL_OFFSET r8,R8
140 CFI_REL_OFFSET rax,RAX
141 CFI_REL_OFFSET rcx,RCX
142 CFI_REL_OFFSET rdx,RDX
143 CFI_REL_OFFSET rsi,RSI
144 CFI_REL_OFFSET rdi,RDI
145 CFI_REL_OFFSET rip,RIP
146 /*CFI_REL_OFFSET cs,CS*/
147 /*CFI_REL_OFFSET rflags,EFLAGS*/
148 CFI_REL_OFFSET rsp,RSP
149 /*CFI_REL_OFFSET ss,SS*/
150 .endm
151 /*
152 * A newly forked process directly context switches into this.
153 */
154 /* rdi: prev */
155 ENTRY(ret_from_fork)
156 CFI_DEFAULT_STACK
157 call schedule_tail
158 GET_THREAD_INFO(%rcx)
159 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
160 jnz rff_trace
161 rff_action:
162 RESTORE_REST
163 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
164 je int_ret_from_sys_call
165 testl $_TIF_IA32,threadinfo_flags(%rcx)
166 jnz int_ret_from_sys_call
167 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
168 jmp ret_from_sys_call
169 rff_trace:
170 movq %rsp,%rdi
171 call syscall_trace_leave
172 GET_THREAD_INFO(%rcx)
173 jmp rff_action
174 CFI_ENDPROC
175 END(ret_from_fork)
176
177 /*
178 * System call entry. Upto 6 arguments in registers are supported.
179 *
180 * SYSCALL does not save anything on the stack and does not change the
181 * stack pointer.
182 */
183
184 /*
185 * Register setup:
186 * rax system call number
187 * rdi arg0
188 * rcx return address for syscall/sysret, C arg3
189 * rsi arg1
190 * rdx arg2
191 * r10 arg3 (--> moved to rcx for C)
192 * r8 arg4
193 * r9 arg5
194 * r11 eflags for syscall/sysret, temporary for C
195 * r12-r15,rbp,rbx saved by C code, not touched.
196 *
197 * Interrupts are off on entry.
198 * Only called from user space.
199 *
200 * XXX if we had a free scratch register we could save the RSP into the stack frame
201 * and report it properly in ps. Unfortunately we haven't.
202 *
203 * When user can change the frames always force IRET. That is because
204 * it deals with uncanonical addresses better. SYSRET has trouble
205 * with them due to bugs in both AMD and Intel CPUs.
206 */
207
208 ENTRY(system_call)
209 CFI_STARTPROC simple
210 CFI_DEF_CFA rsp,PDA_STACKOFFSET
211 CFI_REGISTER rip,rcx
212 /*CFI_REGISTER rflags,r11*/
213 swapgs
214 movq %rsp,%gs:pda_oldrsp
215 movq %gs:pda_kernelstack,%rsp
216 /*
217 * No need to follow this irqs off/on section - it's straight
218 * and short:
219 */
220 sti
221 SAVE_ARGS 8,1
222 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
223 movq %rcx,RIP-ARGOFFSET(%rsp)
224 CFI_REL_OFFSET rip,RIP-ARGOFFSET
225 GET_THREAD_INFO(%rcx)
226 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
227 CFI_REMEMBER_STATE
228 jnz tracesys
229 cmpq $__NR_syscall_max,%rax
230 ja badsys
231 movq %r10,%rcx
232 call *sys_call_table(,%rax,8) # XXX: rip relative
233 movq %rax,RAX-ARGOFFSET(%rsp)
234 /*
235 * Syscall return path ending with SYSRET (fast path)
236 * Has incomplete stack frame and undefined top of stack.
237 */
238 .globl ret_from_sys_call
239 ret_from_sys_call:
240 movl $_TIF_ALLWORK_MASK,%edi
241 /* edi: flagmask */
242 sysret_check:
243 GET_THREAD_INFO(%rcx)
244 cli
245 TRACE_IRQS_OFF
246 movl threadinfo_flags(%rcx),%edx
247 andl %edi,%edx
248 CFI_REMEMBER_STATE
249 jnz sysret_careful
250 /*
251 * sysretq will re-enable interrupts:
252 */
253 TRACE_IRQS_ON
254 movq RIP-ARGOFFSET(%rsp),%rcx
255 CFI_REGISTER rip,rcx
256 RESTORE_ARGS 0,-ARG_SKIP,1
257 /*CFI_REGISTER rflags,r11*/
258 movq %gs:pda_oldrsp,%rsp
259 swapgs
260 sysretq
261
262 /* Handle reschedules */
263 /* edx: work, edi: workmask */
264 sysret_careful:
265 CFI_RESTORE_STATE
266 bt $TIF_NEED_RESCHED,%edx
267 jnc sysret_signal
268 TRACE_IRQS_ON
269 sti
270 pushq %rdi
271 CFI_ADJUST_CFA_OFFSET 8
272 call schedule
273 popq %rdi
274 CFI_ADJUST_CFA_OFFSET -8
275 jmp sysret_check
276
277 /* Handle a signal */
278 sysret_signal:
279 TRACE_IRQS_ON
280 sti
281 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
282 jz 1f
283
284 /* Really a signal */
285 /* edx: work flags (arg3) */
286 leaq do_notify_resume(%rip),%rax
287 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
288 xorl %esi,%esi # oldset -> arg2
289 call ptregscall_common
290 1: movl $_TIF_NEED_RESCHED,%edi
291 /* Use IRET because user could have changed frame. This
292 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
293 cli
294 TRACE_IRQS_OFF
295 jmp int_with_check
296
297 badsys:
298 movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
299 jmp ret_from_sys_call
300
301 /* Do syscall tracing */
302 tracesys:
303 CFI_RESTORE_STATE
304 SAVE_REST
305 movq $-ENOSYS,RAX(%rsp)
306 FIXUP_TOP_OF_STACK %rdi
307 movq %rsp,%rdi
308 call syscall_trace_enter
309 LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
310 RESTORE_REST
311 cmpq $__NR_syscall_max,%rax
312 ja 1f
313 movq %r10,%rcx /* fixup for C */
314 call *sys_call_table(,%rax,8)
315 1: movq %rax,RAX-ARGOFFSET(%rsp)
316 /* Use IRET because user could have changed frame */
317 jmp int_ret_from_sys_call
318 CFI_ENDPROC
319 END(system_call)
320
321 /*
322 * Syscall return path ending with IRET.
323 * Has correct top of stack, but partial stack frame.
324 */
325 ENTRY(int_ret_from_sys_call)
326 CFI_STARTPROC simple
327 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
328 /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
329 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
330 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
331 /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
332 CFI_REL_OFFSET rip,RIP-ARGOFFSET
333 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
334 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
335 CFI_REL_OFFSET rax,RAX-ARGOFFSET
336 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
337 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
338 CFI_REL_OFFSET r8,R8-ARGOFFSET
339 CFI_REL_OFFSET r9,R9-ARGOFFSET
340 CFI_REL_OFFSET r10,R10-ARGOFFSET
341 CFI_REL_OFFSET r11,R11-ARGOFFSET
342 cli
343 TRACE_IRQS_OFF
344 testl $3,CS-ARGOFFSET(%rsp)
345 je retint_restore_args
346 movl $_TIF_ALLWORK_MASK,%edi
347 /* edi: mask to check */
348 int_with_check:
349 GET_THREAD_INFO(%rcx)
350 movl threadinfo_flags(%rcx),%edx
351 andl %edi,%edx
352 jnz int_careful
353 andl $~TS_COMPAT,threadinfo_status(%rcx)
354 jmp retint_swapgs
355
356 /* Either reschedule or signal or syscall exit tracking needed. */
357 /* First do a reschedule test. */
358 /* edx: work, edi: workmask */
359 int_careful:
360 bt $TIF_NEED_RESCHED,%edx
361 jnc int_very_careful
362 TRACE_IRQS_ON
363 sti
364 pushq %rdi
365 CFI_ADJUST_CFA_OFFSET 8
366 call schedule
367 popq %rdi
368 CFI_ADJUST_CFA_OFFSET -8
369 cli
370 TRACE_IRQS_OFF
371 jmp int_with_check
372
373 /* handle signals and tracing -- both require a full stack frame */
374 int_very_careful:
375 TRACE_IRQS_ON
376 sti
377 SAVE_REST
378 /* Check for syscall exit trace */
379 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
380 jz int_signal
381 pushq %rdi
382 CFI_ADJUST_CFA_OFFSET 8
383 leaq 8(%rsp),%rdi # &ptregs -> arg1
384 call syscall_trace_leave
385 popq %rdi
386 CFI_ADJUST_CFA_OFFSET -8
387 andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
388 cli
389 TRACE_IRQS_OFF
390 jmp int_restore_rest
391
392 int_signal:
393 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
394 jz 1f
395 movq %rsp,%rdi # &ptregs -> arg1
396 xorl %esi,%esi # oldset -> arg2
397 call do_notify_resume
398 1: movl $_TIF_NEED_RESCHED,%edi
399 int_restore_rest:
400 RESTORE_REST
401 cli
402 TRACE_IRQS_OFF
403 jmp int_with_check
404 CFI_ENDPROC
405 END(int_ret_from_sys_call)
406
407 /*
408 * Certain special system calls that need to save a complete full stack frame.
409 */
410
411 .macro PTREGSCALL label,func,arg
412 .globl \label
413 \label:
414 leaq \func(%rip),%rax
415 leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
416 jmp ptregscall_common
417 END(\label)
418 .endm
419
420 CFI_STARTPROC
421
422 PTREGSCALL stub_clone, sys_clone, %r8
423 PTREGSCALL stub_fork, sys_fork, %rdi
424 PTREGSCALL stub_vfork, sys_vfork, %rdi
425 PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
426 PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
427 PTREGSCALL stub_iopl, sys_iopl, %rsi
428
429 ENTRY(ptregscall_common)
430 popq %r11
431 CFI_ADJUST_CFA_OFFSET -8
432 CFI_REGISTER rip, r11
433 SAVE_REST
434 movq %r11, %r15
435 CFI_REGISTER rip, r15
436 FIXUP_TOP_OF_STACK %r11
437 call *%rax
438 RESTORE_TOP_OF_STACK %r11
439 movq %r15, %r11
440 CFI_REGISTER rip, r11
441 RESTORE_REST
442 pushq %r11
443 CFI_ADJUST_CFA_OFFSET 8
444 CFI_REL_OFFSET rip, 0
445 ret
446 CFI_ENDPROC
447 END(ptregscall_common)
448
449 ENTRY(stub_execve)
450 CFI_STARTPROC
451 popq %r11
452 CFI_ADJUST_CFA_OFFSET -8
453 CFI_REGISTER rip, r11
454 SAVE_REST
455 FIXUP_TOP_OF_STACK %r11
456 call sys_execve
457 RESTORE_TOP_OF_STACK %r11
458 movq %rax,RAX(%rsp)
459 RESTORE_REST
460 jmp int_ret_from_sys_call
461 CFI_ENDPROC
462 END(stub_execve)
463
464 /*
465 * sigreturn is special because it needs to restore all registers on return.
466 * This cannot be done with SYSRET, so use the IRET return path instead.
467 */
468 ENTRY(stub_rt_sigreturn)
469 CFI_STARTPROC
470 addq $8, %rsp
471 CFI_ADJUST_CFA_OFFSET -8
472 SAVE_REST
473 movq %rsp,%rdi
474 FIXUP_TOP_OF_STACK %r11
475 call sys_rt_sigreturn
476 movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
477 RESTORE_REST
478 jmp int_ret_from_sys_call
479 CFI_ENDPROC
480 END(stub_rt_sigreturn)
481
482 /*
483 * initial frame state for interrupts and exceptions
484 */
485 .macro _frame ref
486 CFI_STARTPROC simple
487 CFI_DEF_CFA rsp,SS+8-\ref
488 /*CFI_REL_OFFSET ss,SS-\ref*/
489 CFI_REL_OFFSET rsp,RSP-\ref
490 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
491 /*CFI_REL_OFFSET cs,CS-\ref*/
492 CFI_REL_OFFSET rip,RIP-\ref
493 .endm
494
495 /* initial frame state for interrupts (and exceptions without error code) */
496 #define INTR_FRAME _frame RIP
497 /* initial frame state for exceptions with error code (and interrupts with
498 vector already pushed) */
499 #define XCPT_FRAME _frame ORIG_RAX
500
501 /*
502 * Interrupt entry/exit.
503 *
504 * Interrupt entry points save only callee clobbered registers in fast path.
505 *
506 * Entry runs with interrupts off.
507 */
508
509 /* 0(%rsp): interrupt number */
510 .macro interrupt func
511 cld
512 SAVE_ARGS
513 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
514 pushq %rbp
515 CFI_ADJUST_CFA_OFFSET 8
516 CFI_REL_OFFSET rbp, 0
517 movq %rsp,%rbp
518 CFI_DEF_CFA_REGISTER rbp
519 testl $3,CS(%rdi)
520 je 1f
521 swapgs
522 /* irqcount is used to check if a CPU is already on an interrupt
523 stack or not. While this is essentially redundant with preempt_count
524 it is a little cheaper to use a separate counter in the PDA
525 (short of moving irq_enter into assembly, which would be too
526 much work) */
527 1: incl %gs:pda_irqcount
528 cmoveq %gs:pda_irqstackptr,%rsp
529 push %rbp # backlink for old unwinder
530 /*
531 * We entered an interrupt context - irqs are off:
532 */
533 TRACE_IRQS_OFF
534 call \func
535 .endm
536
537 ENTRY(common_interrupt)
538 XCPT_FRAME
539 interrupt do_IRQ
540 /* 0(%rsp): oldrsp-ARGOFFSET */
541 ret_from_intr:
542 cli
543 TRACE_IRQS_OFF
544 decl %gs:pda_irqcount
545 leaveq
546 CFI_DEF_CFA_REGISTER rsp
547 CFI_ADJUST_CFA_OFFSET -8
548 exit_intr:
549 GET_THREAD_INFO(%rcx)
550 testl $3,CS-ARGOFFSET(%rsp)
551 je retint_kernel
552
553 /* Interrupt came from user space */
554 /*
555 * Has a correct top of stack, but a partial stack frame
556 * %rcx: thread info. Interrupts off.
557 */
558 retint_with_reschedule:
559 movl $_TIF_WORK_MASK,%edi
560 retint_check:
561 movl threadinfo_flags(%rcx),%edx
562 andl %edi,%edx
563 CFI_REMEMBER_STATE
564 jnz retint_careful
565 retint_swapgs:
566 /*
567 * The iretq could re-enable interrupts:
568 */
569 cli
570 TRACE_IRQS_IRETQ
571 swapgs
572 jmp restore_args
573
574 retint_restore_args:
575 cli
576 /*
577 * The iretq could re-enable interrupts:
578 */
579 TRACE_IRQS_IRETQ
580 restore_args:
581 RESTORE_ARGS 0,8,0
582 iret_label:
583 iretq
584
585 .section __ex_table,"a"
586 .quad iret_label,bad_iret
587 .previous
588 .section .fixup,"ax"
589 /* force a signal here? this matches i386 behaviour */
590 /* running with kernel gs */
591 bad_iret:
592 movq $11,%rdi /* SIGSEGV */
593 TRACE_IRQS_ON
594 sti
595 jmp do_exit
596 .previous
597
598 /* edi: workmask, edx: work */
599 retint_careful:
600 CFI_RESTORE_STATE
601 bt $TIF_NEED_RESCHED,%edx
602 jnc retint_signal
603 TRACE_IRQS_ON
604 sti
605 pushq %rdi
606 CFI_ADJUST_CFA_OFFSET 8
607 call schedule
608 popq %rdi
609 CFI_ADJUST_CFA_OFFSET -8
610 GET_THREAD_INFO(%rcx)
611 cli
612 TRACE_IRQS_OFF
613 jmp retint_check
614
615 retint_signal:
616 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
617 jz retint_swapgs
618 TRACE_IRQS_ON
619 sti
620 SAVE_REST
621 movq $-1,ORIG_RAX(%rsp)
622 xorl %esi,%esi # oldset
623 movq %rsp,%rdi # &pt_regs
624 call do_notify_resume
625 RESTORE_REST
626 cli
627 TRACE_IRQS_OFF
628 movl $_TIF_NEED_RESCHED,%edi
629 GET_THREAD_INFO(%rcx)
630 jmp retint_check
631
632 #ifdef CONFIG_PREEMPT
633 /* Returning to kernel space. Check if we need preemption */
634 /* rcx: threadinfo. interrupts off. */
635 ENTRY(retint_kernel)
636 cmpl $0,threadinfo_preempt_count(%rcx)
637 jnz retint_restore_args
638 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
639 jnc retint_restore_args
640 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
641 jnc retint_restore_args
642 call preempt_schedule_irq
643 jmp exit_intr
644 #endif
645
646 CFI_ENDPROC
647 END(common_interrupt)
648
649 /*
650 * APIC interrupts.
651 */
652 .macro apicinterrupt num,func
653 INTR_FRAME
654 pushq $~(\num)
655 CFI_ADJUST_CFA_OFFSET 8
656 interrupt \func
657 jmp ret_from_intr
658 CFI_ENDPROC
659 .endm
660
661 ENTRY(thermal_interrupt)
662 apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
663 END(thermal_interrupt)
664
665 ENTRY(threshold_interrupt)
666 apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
667 END(threshold_interrupt)
668
669 #ifdef CONFIG_SMP
670 ENTRY(reschedule_interrupt)
671 apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
672 END(reschedule_interrupt)
673
674 .macro INVALIDATE_ENTRY num
675 ENTRY(invalidate_interrupt\num)
676 apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
677 END(invalidate_interrupt\num)
678 .endm
679
680 INVALIDATE_ENTRY 0
681 INVALIDATE_ENTRY 1
682 INVALIDATE_ENTRY 2
683 INVALIDATE_ENTRY 3
684 INVALIDATE_ENTRY 4
685 INVALIDATE_ENTRY 5
686 INVALIDATE_ENTRY 6
687 INVALIDATE_ENTRY 7
688
689 ENTRY(call_function_interrupt)
690 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
691 END(call_function_interrupt)
692 #endif
693
694 ENTRY(apic_timer_interrupt)
695 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
696 END(apic_timer_interrupt)
697
698 ENTRY(error_interrupt)
699 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
700 END(error_interrupt)
701
702 ENTRY(spurious_interrupt)
703 apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
704 END(spurious_interrupt)
705
706 /*
707 * Exception entry points.
708 */
709 .macro zeroentry sym
710 INTR_FRAME
711 pushq $0 /* push error code/oldrax */
712 CFI_ADJUST_CFA_OFFSET 8
713 pushq %rax /* push real oldrax to the rdi slot */
714 CFI_ADJUST_CFA_OFFSET 8
715 leaq \sym(%rip),%rax
716 jmp error_entry
717 CFI_ENDPROC
718 .endm
719
720 .macro errorentry sym
721 XCPT_FRAME
722 pushq %rax
723 CFI_ADJUST_CFA_OFFSET 8
724 leaq \sym(%rip),%rax
725 jmp error_entry
726 CFI_ENDPROC
727 .endm
728
729 /* error code is on the stack already */
730 /* handle NMI like exceptions that can happen everywhere */
731 .macro paranoidentry sym, ist=0, irqtrace=1
732 SAVE_ALL
733 cld
734 movl $1,%ebx
735 movl $MSR_GS_BASE,%ecx
736 rdmsr
737 testl %edx,%edx
738 js 1f
739 swapgs
740 xorl %ebx,%ebx
741 1:
742 .if \ist
743 movq %gs:pda_data_offset, %rbp
744 .endif
745 movq %rsp,%rdi
746 movq ORIG_RAX(%rsp),%rsi
747 movq $-1,ORIG_RAX(%rsp)
748 .if \ist
749 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
750 .endif
751 call \sym
752 .if \ist
753 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
754 .endif
755 cli
756 .if \irqtrace
757 TRACE_IRQS_OFF
758 .endif
759 .endm
760
761 /*
762 * "Paranoid" exit path from exception stack.
763 * Paranoid because this is used by NMIs and cannot take
764 * any kernel state for granted.
765 * We don't do kernel preemption checks here, because only
766 * NMI should be common and it does not enable IRQs and
767 * cannot get reschedule ticks.
768 *
769 * "trace" is 0 for the NMI handler only, because irq-tracing
770 * is fundamentally NMI-unsafe. (we cannot change the soft and
771 * hard flags at once, atomically)
772 */
773 .macro paranoidexit trace=1
774 /* ebx: no swapgs flag */
775 paranoid_exit\trace:
776 testl %ebx,%ebx /* swapgs needed? */
777 jnz paranoid_restore\trace
778 testl $3,CS(%rsp)
779 jnz paranoid_userspace\trace
780 paranoid_swapgs\trace:
781 .if \trace
782 TRACE_IRQS_IRETQ 0
783 .endif
784 swapgs
785 paranoid_restore\trace:
786 RESTORE_ALL 8
787 iretq
788 paranoid_userspace\trace:
789 GET_THREAD_INFO(%rcx)
790 movl threadinfo_flags(%rcx),%ebx
791 andl $_TIF_WORK_MASK,%ebx
792 jz paranoid_swapgs\trace
793 movq %rsp,%rdi /* &pt_regs */
794 call sync_regs
795 movq %rax,%rsp /* switch stack for scheduling */
796 testl $_TIF_NEED_RESCHED,%ebx
797 jnz paranoid_schedule\trace
798 movl %ebx,%edx /* arg3: thread flags */
799 .if \trace
800 TRACE_IRQS_ON
801 .endif
802 sti
803 xorl %esi,%esi /* arg2: oldset */
804 movq %rsp,%rdi /* arg1: &pt_regs */
805 call do_notify_resume
806 cli
807 .if \trace
808 TRACE_IRQS_OFF
809 .endif
810 jmp paranoid_userspace\trace
811 paranoid_schedule\trace:
812 .if \trace
813 TRACE_IRQS_ON
814 .endif
815 sti
816 call schedule
817 cli
818 .if \trace
819 TRACE_IRQS_OFF
820 .endif
821 jmp paranoid_userspace\trace
822 CFI_ENDPROC
823 .endm
824
825 /*
826 * Exception entry point. This expects an error code/orig_rax on the stack
827 * and the exception handler in %rax.
828 */
829 KPROBE_ENTRY(error_entry)
830 _frame RDI
831 /* rdi slot contains rax, oldrax contains error code */
832 cld
833 subq $14*8,%rsp
834 CFI_ADJUST_CFA_OFFSET (14*8)
835 movq %rsi,13*8(%rsp)
836 CFI_REL_OFFSET rsi,RSI
837 movq 14*8(%rsp),%rsi /* load rax from rdi slot */
838 movq %rdx,12*8(%rsp)
839 CFI_REL_OFFSET rdx,RDX
840 movq %rcx,11*8(%rsp)
841 CFI_REL_OFFSET rcx,RCX
842 movq %rsi,10*8(%rsp) /* store rax */
843 CFI_REL_OFFSET rax,RAX
844 movq %r8, 9*8(%rsp)
845 CFI_REL_OFFSET r8,R8
846 movq %r9, 8*8(%rsp)
847 CFI_REL_OFFSET r9,R9
848 movq %r10,7*8(%rsp)
849 CFI_REL_OFFSET r10,R10
850 movq %r11,6*8(%rsp)
851 CFI_REL_OFFSET r11,R11
852 movq %rbx,5*8(%rsp)
853 CFI_REL_OFFSET rbx,RBX
854 movq %rbp,4*8(%rsp)
855 CFI_REL_OFFSET rbp,RBP
856 movq %r12,3*8(%rsp)
857 CFI_REL_OFFSET r12,R12
858 movq %r13,2*8(%rsp)
859 CFI_REL_OFFSET r13,R13
860 movq %r14,1*8(%rsp)
861 CFI_REL_OFFSET r14,R14
862 movq %r15,(%rsp)
863 CFI_REL_OFFSET r15,R15
864 xorl %ebx,%ebx
865 testl $3,CS(%rsp)
866 je error_kernelspace
867 error_swapgs:
868 swapgs
869 error_sti:
870 movq %rdi,RDI(%rsp)
871 movq %rsp,%rdi
872 movq ORIG_RAX(%rsp),%rsi /* get error code */
873 movq $-1,ORIG_RAX(%rsp)
874 call *%rax
875 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
876 error_exit:
877 movl %ebx,%eax
878 RESTORE_REST
879 cli
880 TRACE_IRQS_OFF
881 GET_THREAD_INFO(%rcx)
882 testl %eax,%eax
883 jne retint_kernel
884 movl threadinfo_flags(%rcx),%edx
885 movl $_TIF_WORK_MASK,%edi
886 andl %edi,%edx
887 jnz retint_careful
888 /*
889 * The iret might restore flags:
890 */
891 TRACE_IRQS_IRETQ
892 swapgs
893 RESTORE_ARGS 0,8,0
894 jmp iret_label
895 CFI_ENDPROC
896
897 error_kernelspace:
898 incl %ebx
899 /* There are two places in the kernel that can potentially fault with
900 usergs. Handle them here. The exception handlers after
901 iret run with kernel gs again, so don't set the user space flag.
902 B stepping K8s sometimes report an truncated RIP for IRET
903 exceptions returning to compat mode. Check for these here too. */
904 leaq iret_label(%rip),%rbp
905 cmpq %rbp,RIP(%rsp)
906 je error_swapgs
907 movl %ebp,%ebp /* zero extend */
908 cmpq %rbp,RIP(%rsp)
909 je error_swapgs
910 cmpq $gs_change,RIP(%rsp)
911 je error_swapgs
912 jmp error_sti
913 KPROBE_END(error_entry)
914
915 /* Reload gs selector with exception handling */
916 /* edi: new selector */
917 ENTRY(load_gs_index)
918 CFI_STARTPROC
919 pushf
920 CFI_ADJUST_CFA_OFFSET 8
921 cli
922 swapgs
923 gs_change:
924 movl %edi,%gs
925 2: mfence /* workaround */
926 swapgs
927 popf
928 CFI_ADJUST_CFA_OFFSET -8
929 ret
930 CFI_ENDPROC
931 ENDPROC(load_gs_index)
932
933 .section __ex_table,"a"
934 .align 8
935 .quad gs_change,bad_gs
936 .previous
937 .section .fixup,"ax"
938 /* running with kernelgs */
939 bad_gs:
940 swapgs /* switch back to user gs */
941 xorl %eax,%eax
942 movl %eax,%gs
943 jmp 2b
944 .previous
945
946 /*
947 * Create a kernel thread.
948 *
949 * C extern interface:
950 * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
951 *
952 * asm input arguments:
953 * rdi: fn, rsi: arg, rdx: flags
954 */
955 ENTRY(kernel_thread)
956 CFI_STARTPROC
957 FAKE_STACK_FRAME $child_rip
958 SAVE_ALL
959
960 # rdi: flags, rsi: usp, rdx: will be &pt_regs
961 movq %rdx,%rdi
962 orq kernel_thread_flags(%rip),%rdi
963 movq $-1, %rsi
964 movq %rsp, %rdx
965
966 xorl %r8d,%r8d
967 xorl %r9d,%r9d
968
969 # clone now
970 call do_fork
971 movq %rax,RAX(%rsp)
972 xorl %edi,%edi
973
974 /*
975 * It isn't worth to check for reschedule here,
976 * so internally to the x86_64 port you can rely on kernel_thread()
977 * not to reschedule the child before returning, this avoids the need
978 * of hacks for example to fork off the per-CPU idle tasks.
979 * [Hopefully no generic code relies on the reschedule -AK]
980 */
981 RESTORE_ALL
982 UNFAKE_STACK_FRAME
983 ret
984 CFI_ENDPROC
985 ENDPROC(kernel_thread)
986
987 child_rip:
988 pushq $0 # fake return address
989 CFI_STARTPROC
990 /*
991 * Here we are in the child and the registers are set as they were
992 * at kernel_thread() invocation in the parent.
993 */
994 movq %rdi, %rax
995 movq %rsi, %rdi
996 call *%rax
997 # exit
998 xorl %edi, %edi
999 call do_exit
1000 CFI_ENDPROC
1001 ENDPROC(child_rip)
1002
1003 /*
1004 * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
1005 *
1006 * C extern interface:
1007 * extern long execve(char *name, char **argv, char **envp)
1008 *
1009 * asm input arguments:
1010 * rdi: name, rsi: argv, rdx: envp
1011 *
1012 * We want to fallback into:
1013 * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
1014 *
1015 * do_sys_execve asm fallback arguments:
1016 * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
1017 */
1018 ENTRY(execve)
1019 CFI_STARTPROC
1020 FAKE_STACK_FRAME $0
1021 SAVE_ALL
1022 call sys_execve
1023 movq %rax, RAX(%rsp)
1024 RESTORE_REST
1025 testq %rax,%rax
1026 je int_ret_from_sys_call
1027 RESTORE_ARGS
1028 UNFAKE_STACK_FRAME
1029 ret
1030 CFI_ENDPROC
1031 ENDPROC(execve)
1032
1033 KPROBE_ENTRY(page_fault)
1034 errorentry do_page_fault
1035 KPROBE_END(page_fault)
1036
1037 ENTRY(coprocessor_error)
1038 zeroentry do_coprocessor_error
1039 END(coprocessor_error)
1040
1041 ENTRY(simd_coprocessor_error)
1042 zeroentry do_simd_coprocessor_error
1043 END(simd_coprocessor_error)
1044
1045 ENTRY(device_not_available)
1046 zeroentry math_state_restore
1047 END(device_not_available)
1048
1049 /* runs on exception stack */
1050 KPROBE_ENTRY(debug)
1051 INTR_FRAME
1052 pushq $0
1053 CFI_ADJUST_CFA_OFFSET 8
1054 paranoidentry do_debug, DEBUG_STACK
1055 paranoidexit
1056 KPROBE_END(debug)
1057
1058 /* runs on exception stack */
1059 KPROBE_ENTRY(nmi)
1060 INTR_FRAME
1061 pushq $-1
1062 CFI_ADJUST_CFA_OFFSET 8
1063 paranoidentry do_nmi, 0, 0
1064 #ifdef CONFIG_TRACE_IRQFLAGS
1065 paranoidexit 0
1066 #else
1067 jmp paranoid_exit1
1068 CFI_ENDPROC
1069 #endif
1070 KPROBE_END(nmi)
1071
1072 KPROBE_ENTRY(int3)
1073 INTR_FRAME
1074 pushq $0
1075 CFI_ADJUST_CFA_OFFSET 8
1076 paranoidentry do_int3, DEBUG_STACK
1077 jmp paranoid_exit1
1078 CFI_ENDPROC
1079 KPROBE_END(int3)
1080
1081 ENTRY(overflow)
1082 zeroentry do_overflow
1083 END(overflow)
1084
1085 ENTRY(bounds)
1086 zeroentry do_bounds
1087 END(bounds)
1088
1089 ENTRY(invalid_op)
1090 zeroentry do_invalid_op
1091 END(invalid_op)
1092
1093 ENTRY(coprocessor_segment_overrun)
1094 zeroentry do_coprocessor_segment_overrun
1095 END(coprocessor_segment_overrun)
1096
1097 ENTRY(reserved)
1098 zeroentry do_reserved
1099 END(reserved)
1100
1101 /* runs on exception stack */
1102 ENTRY(double_fault)
1103 XCPT_FRAME
1104 paranoidentry do_double_fault
1105 jmp paranoid_exit1
1106 CFI_ENDPROC
1107 END(double_fault)
1108
1109 ENTRY(invalid_TSS)
1110 errorentry do_invalid_TSS
1111 END(invalid_TSS)
1112
1113 ENTRY(segment_not_present)
1114 errorentry do_segment_not_present
1115 END(segment_not_present)
1116
1117 /* runs on exception stack */
1118 ENTRY(stack_segment)
1119 XCPT_FRAME
1120 paranoidentry do_stack_segment
1121 jmp paranoid_exit1
1122 CFI_ENDPROC
1123 END(stack_segment)
1124
1125 KPROBE_ENTRY(general_protection)
1126 errorentry do_general_protection
1127 KPROBE_END(general_protection)
1128
1129 ENTRY(alignment_check)
1130 errorentry do_alignment_check
1131 END(alignment_check)
1132
1133 ENTRY(divide_error)
1134 zeroentry do_divide_error
1135 END(divide_error)
1136
1137 ENTRY(spurious_interrupt_bug)
1138 zeroentry do_spurious_interrupt_bug
1139 END(spurious_interrupt_bug)
1140
1141 #ifdef CONFIG_X86_MCE
1142 /* runs on exception stack */
1143 ENTRY(machine_check)
1144 INTR_FRAME
1145 pushq $0
1146 CFI_ADJUST_CFA_OFFSET 8
1147 paranoidentry do_machine_check
1148 jmp paranoid_exit1
1149 CFI_ENDPROC
1150 END(machine_check)
1151 #endif
1152
1153 /* Call softirq on interrupt stack. Interrupts are off. */
1154 ENTRY(call_softirq)
1155 CFI_STARTPROC
1156 push %rbp
1157 CFI_ADJUST_CFA_OFFSET 8
1158 CFI_REL_OFFSET rbp,0
1159 mov %rsp,%rbp
1160 CFI_DEF_CFA_REGISTER rbp
1161 incl %gs:pda_irqcount
1162 cmove %gs:pda_irqstackptr,%rsp
1163 push %rbp # backlink for old unwinder
1164 call __do_softirq
1165 leaveq
1166 CFI_DEF_CFA_REGISTER rsp
1167 CFI_ADJUST_CFA_OFFSET -8
1168 decl %gs:pda_irqcount
1169 ret
1170 CFI_ENDPROC
1171 ENDPROC(call_softirq)
1172
1173 #ifdef CONFIG_STACK_UNWIND
1174 ENTRY(arch_unwind_init_running)
1175 CFI_STARTPROC
1176 movq %r15, R15(%rdi)
1177 movq %r14, R14(%rdi)
1178 xchgq %rsi, %rdx
1179 movq %r13, R13(%rdi)
1180 movq %r12, R12(%rdi)
1181 xorl %eax, %eax
1182 movq %rbp, RBP(%rdi)
1183 movq %rbx, RBX(%rdi)
1184 movq (%rsp), %rcx
1185 movq %rax, R11(%rdi)
1186 movq %rax, R10(%rdi)
1187 movq %rax, R9(%rdi)
1188 movq %rax, R8(%rdi)
1189 movq %rax, RAX(%rdi)
1190 movq %rax, RCX(%rdi)
1191 movq %rax, RDX(%rdi)
1192 movq %rax, RSI(%rdi)
1193 movq %rax, RDI(%rdi)
1194 movq %rax, ORIG_RAX(%rdi)
1195 movq %rcx, RIP(%rdi)
1196 leaq 8(%rsp), %rcx
1197 movq $__KERNEL_CS, CS(%rdi)
1198 movq %rax, EFLAGS(%rdi)
1199 movq %rcx, RSP(%rdi)
1200 movq $__KERNEL_DS, SS(%rdi)
1201 jmpq *%rdx
1202 CFI_ENDPROC
1203 ENDPROC(arch_unwind_init_running)
1204 #endif
This page took 0.068392 seconds and 5 git commands to generate.