2 * Compatibility mode system call entry point for x86-64.
4 * Copyright 2000-2002 Andi Kleen, SuSE Labs.
7 #include <asm/dwarf2.h>
8 #include <asm/calling.h>
9 #include <asm/asm-offsets.h>
10 #include <asm/current.h>
11 #include <asm/errno.h>
12 #include <asm/ia32_unistd.h>
13 #include <asm/thread_info.h>
14 #include <asm/segment.h>
15 #include <asm/irqflags.h>
18 #include <linux/linkage.h>
19 #include <linux/err.h>
21 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
22 #include <linux/elf-em.h>
23 #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
24 #define __AUDIT_ARCH_LE 0x40000000
26 #ifndef CONFIG_AUDITSYSCALL
27 #define sysexit_audit ia32_ret_from_sys_call
28 #define sysretl_audit ia32_ret_from_sys_call
31 .section .entry.text, "ax"
33 .macro IA32_ARG_FIXUP noebp=0
41 movl %edx,%edx /* zero extension */
45 .macro CLEAR_RREGS offset=0, _r9=rax
47 movq %rax,\offset+R11(%rsp)
48 movq %rax,\offset+R10(%rsp)
49 movq %\_r9,\offset+R9(%rsp)
50 movq %rax,\offset+R8(%rsp)
54 * Reload arg registers from stack in case ptrace changed them.
55 * We don't reload %eax because syscall_trace_enter() returned
56 * the %rax value we should see. Instead, we just truncate that
57 * value to 32 bits again as we did on entry from user mode.
58 * If it's a new value set by user_regset during entry tracing,
59 * this matches the normal truncation of the user-mode value.
60 * If it's -1 to make us punt the syscall, then (u32)-1 is still
61 * an appropriately invalid value.
63 .macro LOAD_ARGS32 offset, _r9=0
65 movl \offset+R9(%rsp),%r9d
67 movl \offset+RCX(%rsp),%ecx
68 movl \offset+RDX(%rsp),%edx
69 movl \offset+RSI(%rsp),%esi
70 movl \offset+RDI(%rsp),%edi
71 movl %eax,%eax /* zero extension */
74 .macro CFI_STARTPROC32 simple
86 #ifdef CONFIG_PARAVIRT
87 ENTRY(native_usergs_sysret32)
90 ENDPROC(native_usergs_sysret32)
92 ENTRY(native_irq_enable_sysexit)
96 ENDPROC(native_irq_enable_sysexit)
100 * 32bit SYSENTER instruction entry.
103 * %eax System call number.
114 * This is purely a fast path. For anything complicated we use the int 0x80
115 * path below. Set up a complete hardware stack frame to share code
116 * with the int 0x80 path.
118 ENTRY(ia32_sysenter_target)
119 CFI_STARTPROC32 simple
124 movq PER_CPU_VAR(kernel_stack), %rsp
125 addq $(KERNEL_STACK_OFFSET),%rsp
127 * No need to follow this irqs on/off section: the syscall
128 * disabled irqs, here we enable it straight after entry:
130 ENABLE_INTERRUPTS(CLBR_NONE)
131 movl %ebp,%ebp /* zero extension */
132 pushq_cfi $__USER32_DS
133 /*CFI_REL_OFFSET ss,0*/
137 /*CFI_REL_OFFSET rflags,0*/
138 movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
140 pushq_cfi $__USER32_CS
141 /*CFI_REL_OFFSET cs,0*/
147 ALLOC_PT_GPREGS_ON_STACK
148 SAVE_C_REGS_EXCEPT_R891011
149 /* no need to do an access_ok check here because rbp has been
150 32bit zero extended */
153 _ASM_EXTABLE(1b,ia32_badarg)
157 * Sysenter doesn't filter flags, so we need to clear NT
158 * ourselves. To save a few cycles, we can check whether
159 * NT was set instead of doing an unconditional popfq.
161 testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
162 jnz sysenter_fix_flags
163 sysenter_flags_fixed:
165 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
166 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
168 jnz sysenter_tracesys
169 cmpq $(IA32_NR_syscalls-1),%rax
174 call *ia32_sys_call_table(,%rax,8)
175 movq %rax,RAX-ARGOFFSET(%rsp)
176 DISABLE_INTERRUPTS(CLBR_NONE)
178 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
180 sysexit_from_sys_call:
181 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
182 /* clear IF, that popfq doesn't enable interrupts early */
183 andl $~0x200,EFLAGS-ARGOFFSET(%rsp)
184 movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */
187 REMOVE_PT_GPREGS_FROM_STACK 3*8
193 /*CFI_RESTORE rflags*/
194 popq_cfi %rcx /* User %esp */
197 ENABLE_INTERRUPTS_SYSEXIT32
201 #ifdef CONFIG_AUDITSYSCALL
202 .macro auditsys_entry_common
203 movl %esi,%r8d /* 5th arg: 4th syscall arg */
204 movl %ecx,%r9d /*swap with edx*/
205 movl %edx,%ecx /* 4th arg: 3rd syscall arg */
206 movl %r9d,%edx /* 3rd arg: 2nd syscall arg */
207 movl %ebx,%esi /* 2nd arg: 1st syscall arg */
208 movl %eax,%edi /* 1st arg: syscall number */
209 call __audit_syscall_entry
210 movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
211 cmpq $(IA32_NR_syscalls-1),%rax
213 movl %ebx,%edi /* reload 1st syscall arg */
214 movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
215 movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */
216 movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */
217 movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
220 .macro auditsys_exit exit
221 testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
222 jnz ia32_ret_from_sys_call
224 ENABLE_INTERRUPTS(CLBR_NONE)
225 movl %eax,%esi /* second arg, syscall return value */
226 cmpl $-MAX_ERRNO,%eax /* is it an error ? */
228 movslq %eax, %rsi /* if error sign extend to 64 bits */
229 1: setbe %al /* 1 if error, 0 if not */
230 movzbl %al,%edi /* zero-extend that into %edi */
231 call __audit_syscall_exit
232 movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
233 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
234 DISABLE_INTERRUPTS(CLBR_NONE)
236 testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
238 CLEAR_RREGS -ARGOFFSET
243 auditsys_entry_common
244 movl %ebp,%r9d /* reload 6th syscall arg */
245 jmp sysenter_dispatch
248 auditsys_exit sysexit_from_sys_call
252 pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
254 jmp sysenter_flags_fixed
257 #ifdef CONFIG_AUDITSYSCALL
258 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
263 movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
264 movq %rsp,%rdi /* &pt_regs -> arg1 */
265 call syscall_trace_enter
266 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
268 cmpq $(IA32_NR_syscalls-1),%rax
269 ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
272 ENDPROC(ia32_sysenter_target)
275 * 32bit SYSCALL instruction entry.
278 * %eax System call number.
284 * %ebp Arg2 [note: not saved in the stack frame, should not be touched]
290 * This is purely a fast path. For anything complicated we use the int 0x80
291 * path below. Set up a complete hardware stack frame to share code
292 * with the int 0x80 path.
294 ENTRY(ia32_cstar_target)
295 CFI_STARTPROC32 simple
297 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
299 /*CFI_REGISTER rflags,r11*/
303 movq PER_CPU_VAR(kernel_stack),%rsp
305 * No need to follow this irqs on/off section: the syscall
306 * disabled irqs and here we enable it straight after entry:
308 ENABLE_INTERRUPTS(CLBR_NONE)
309 ALLOC_PT_GPREGS_ON_STACK 8
310 SAVE_C_REGS_EXCEPT_RCX_R891011
311 movl %eax,%eax /* zero extension */
312 movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
313 movq %rcx,RIP-ARGOFFSET(%rsp)
314 CFI_REL_OFFSET rip,RIP-ARGOFFSET
315 movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
317 movq $__USER32_CS,CS-ARGOFFSET(%rsp)
318 movq $__USER32_DS,SS-ARGOFFSET(%rsp)
319 movq %r11,EFLAGS-ARGOFFSET(%rsp)
320 /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
321 movq %r8,RSP-ARGOFFSET(%rsp)
322 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
323 /* no need to do an access_ok check here because r8 has been
324 32bit zero extended */
325 /* hardware stack frame is complete now */
328 _ASM_EXTABLE(1b,ia32_badarg)
330 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
331 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
334 cmpq $IA32_NR_syscalls-1,%rax
339 call *ia32_sys_call_table(,%rax,8)
340 movq %rax,RAX-ARGOFFSET(%rsp)
341 DISABLE_INTERRUPTS(CLBR_NONE)
343 testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
345 sysretl_from_sys_call:
346 andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
348 movl RIP-ARGOFFSET(%rsp),%ecx
350 movl EFLAGS-ARGOFFSET(%rsp),%r11d
351 /*CFI_REGISTER rflags,r11*/
356 movl RSP-ARGOFFSET(%rsp),%esp
360 #ifdef CONFIG_AUDITSYSCALL
363 movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */
364 auditsys_entry_common
365 movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */
369 auditsys_exit sysretl_from_sys_call
373 #ifdef CONFIG_AUDITSYSCALL
374 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
380 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
381 movq %rsp,%rdi /* &pt_regs -> arg1 */
382 call syscall_trace_enter
383 LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
386 cmpq $(IA32_NR_syscalls-1),%rax
387 ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
389 END(ia32_cstar_target)
398 * Emulated IA32 system calls via int 0x80.
401 * %eax System call number.
407 * %ebp Arg6 [note: not saved in the stack frame, should not be touched]
410 * Uses the same stack frame as the x86-64 version.
411 * All registers except %eax must be saved (but ptrace may violate that)
412 * Arguments are zero extended. For system calls that want sign extension and
413 * take long arguments a wrapper is needed. Most calls can just be called
415 * Assumes it is only called from user space and entered with interrupts off.
419 CFI_STARTPROC32 simple
421 CFI_DEF_CFA rsp,SS+8-RIP
422 /*CFI_REL_OFFSET ss,SS-RIP*/
423 CFI_REL_OFFSET rsp,RSP-RIP
424 /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
425 /*CFI_REL_OFFSET cs,CS-RIP*/
426 CFI_REL_OFFSET rip,RIP-RIP
427 PARAVIRT_ADJUST_EXCEPTION_FRAME
430 * No need to follow this irqs on/off section: the syscall
431 * disabled irqs and here we enable it straight after entry:
433 ENABLE_INTERRUPTS(CLBR_NONE)
437 /* note the registers are not zero extended to the sf.
438 this could be a problem. */
439 ALLOC_PT_GPREGS_ON_STACK
440 SAVE_C_REGS_EXCEPT_R891011
441 orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
442 testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
444 cmpq $(IA32_NR_syscalls-1),%rax
448 call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
450 movq %rax,RAX-ARGOFFSET(%rsp)
451 ia32_ret_from_sys_call:
452 CLEAR_RREGS -ARGOFFSET
453 jmp int_ret_from_sys_call
458 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
459 movq %rsp,%rdi /* &pt_regs -> arg1 */
460 call syscall_trace_enter
461 LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
463 cmpq $(IA32_NR_syscalls-1),%rax
464 ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
469 movq $0,ORIG_RAX-ARGOFFSET(%rsp)
475 .macro PTREGSCALL label, func
478 leaq \func(%rip),%rax
479 jmp ia32_ptregs_common
484 PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
485 PTREGSCALL stub32_sigreturn, sys32_sigreturn
486 PTREGSCALL stub32_execve, compat_sys_execve
487 PTREGSCALL stub32_execveat, compat_sys_execveat
488 PTREGSCALL stub32_fork, sys_fork
489 PTREGSCALL stub32_vfork, sys_vfork
493 leaq sys_clone(%rip),%rax
495 jmp ia32_ptregs_common
500 CFI_STARTPROC32 simple
502 CFI_DEF_CFA rsp,SS+8-ARGOFFSET
503 CFI_REL_OFFSET rax,RAX-ARGOFFSET
504 CFI_REL_OFFSET rcx,RCX-ARGOFFSET
505 CFI_REL_OFFSET rdx,RDX-ARGOFFSET
506 CFI_REL_OFFSET rsi,RSI-ARGOFFSET
507 CFI_REL_OFFSET rdi,RDI-ARGOFFSET
508 CFI_REL_OFFSET rip,RIP-ARGOFFSET
509 /* CFI_REL_OFFSET cs,CS-ARGOFFSET*/
510 /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
511 CFI_REL_OFFSET rsp,RSP-ARGOFFSET
512 /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/
518 END(ia32_ptregs_common)