1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/kernel.h>
4 #include <linux/ftrace.h>
5 #include <linux/perf_event.h>
6 #include <asm/syscall.h>
8 #include "trace_output.h"
11 static DEFINE_MUTEX(syscall_trace_lock
);
12 static int sys_refcount_enter
;
13 static int sys_refcount_exit
;
14 static DECLARE_BITMAP(enabled_enter_syscalls
, NR_syscalls
);
15 static DECLARE_BITMAP(enabled_exit_syscalls
, NR_syscalls
);
17 extern unsigned long __start_syscalls_metadata
[];
18 extern unsigned long __stop_syscalls_metadata
[];
20 static struct syscall_metadata
**syscalls_metadata
;
22 static struct syscall_metadata
*find_syscall_meta(unsigned long syscall
)
24 struct syscall_metadata
*start
;
25 struct syscall_metadata
*stop
;
26 char str
[KSYM_SYMBOL_LEN
];
29 start
= (struct syscall_metadata
*)__start_syscalls_metadata
;
30 stop
= (struct syscall_metadata
*)__stop_syscalls_metadata
;
31 kallsyms_lookup(syscall
, NULL
, NULL
, NULL
, str
);
33 for ( ; start
< stop
; start
++) {
35 * Only compare after the "sys" prefix. Archs that use
36 * syscall wrappers may have syscalls symbols aliases prefixed
37 * with "SyS" instead of "sys", leading to an unwanted
40 if (start
->name
&& !strcmp(start
->name
+ 3, str
+ 3))
46 static struct syscall_metadata
*syscall_nr_to_meta(int nr
)
48 if (!syscalls_metadata
|| nr
>= NR_syscalls
|| nr
< 0)
51 return syscalls_metadata
[nr
];
54 int syscall_name_to_nr(const char *name
)
58 if (!syscalls_metadata
)
61 for (i
= 0; i
< NR_syscalls
; i
++) {
62 if (syscalls_metadata
[i
]) {
63 if (!strcmp(syscalls_metadata
[i
]->name
, name
))
71 print_syscall_enter(struct trace_iterator
*iter
, int flags
)
73 struct trace_seq
*s
= &iter
->seq
;
74 struct trace_entry
*ent
= iter
->ent
;
75 struct syscall_trace_enter
*trace
;
76 struct syscall_metadata
*entry
;
79 trace
= (typeof(trace
))ent
;
81 entry
= syscall_nr_to_meta(syscall
);
86 if (entry
->enter_event
->id
!= ent
->type
) {
91 ret
= trace_seq_printf(s
, "%s(", entry
->name
);
93 return TRACE_TYPE_PARTIAL_LINE
;
95 for (i
= 0; i
< entry
->nb_args
; i
++) {
97 if (trace_flags
& TRACE_ITER_VERBOSE
) {
98 ret
= trace_seq_printf(s
, "%s ", entry
->types
[i
]);
100 return TRACE_TYPE_PARTIAL_LINE
;
102 /* parameter values */
103 ret
= trace_seq_printf(s
, "%s: %lx%s", entry
->args
[i
],
105 i
== entry
->nb_args
- 1 ? "" : ", ");
107 return TRACE_TYPE_PARTIAL_LINE
;
110 ret
= trace_seq_putc(s
, ')');
112 return TRACE_TYPE_PARTIAL_LINE
;
115 ret
= trace_seq_putc(s
, '\n');
117 return TRACE_TYPE_PARTIAL_LINE
;
119 return TRACE_TYPE_HANDLED
;
123 print_syscall_exit(struct trace_iterator
*iter
, int flags
)
125 struct trace_seq
*s
= &iter
->seq
;
126 struct trace_entry
*ent
= iter
->ent
;
127 struct syscall_trace_exit
*trace
;
129 struct syscall_metadata
*entry
;
132 trace
= (typeof(trace
))ent
;
134 entry
= syscall_nr_to_meta(syscall
);
137 trace_seq_printf(s
, "\n");
138 return TRACE_TYPE_HANDLED
;
141 if (entry
->exit_event
->id
!= ent
->type
) {
143 return TRACE_TYPE_UNHANDLED
;
146 ret
= trace_seq_printf(s
, "%s -> 0x%lx\n", entry
->name
,
149 return TRACE_TYPE_PARTIAL_LINE
;
151 return TRACE_TYPE_HANDLED
;
154 extern char *__bad_type_size(void);
156 #define SYSCALL_FIELD(type, name) \
157 sizeof(type) != sizeof(trace.name) ? \
158 __bad_type_size() : \
159 #type, #name, offsetof(typeof(trace), name), \
160 sizeof(trace.name), is_signed_type(type)
162 int syscall_enter_format(struct ftrace_event_call
*call
, struct trace_seq
*s
)
166 struct syscall_metadata
*entry
= call
->data
;
167 struct syscall_trace_enter trace
;
168 int offset
= offsetof(struct syscall_trace_enter
, args
);
170 ret
= trace_seq_printf(s
, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
172 SYSCALL_FIELD(int, nr
));
176 for (i
= 0; i
< entry
->nb_args
; i
++) {
177 ret
= trace_seq_printf(s
, "\tfield:%s %s;", entry
->types
[i
],
181 ret
= trace_seq_printf(s
, "\toffset:%d;\tsize:%zu;"
182 "\tsigned:%u;\n", offset
,
183 sizeof(unsigned long),
184 is_signed_type(unsigned long));
187 offset
+= sizeof(unsigned long);
190 trace_seq_puts(s
, "\nprint fmt: \"");
191 for (i
= 0; i
< entry
->nb_args
; i
++) {
192 ret
= trace_seq_printf(s
, "%s: 0x%%0%zulx%s", entry
->args
[i
],
193 sizeof(unsigned long),
194 i
== entry
->nb_args
- 1 ? "" : ", ");
198 trace_seq_putc(s
, '"');
200 for (i
= 0; i
< entry
->nb_args
; i
++) {
201 ret
= trace_seq_printf(s
, ", ((unsigned long)(REC->%s))",
207 return trace_seq_putc(s
, '\n');
210 int syscall_exit_format(struct ftrace_event_call
*call
, struct trace_seq
*s
)
213 struct syscall_trace_exit trace
;
215 ret
= trace_seq_printf(s
,
216 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
218 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
220 SYSCALL_FIELD(int, nr
),
221 SYSCALL_FIELD(long, ret
));
225 return trace_seq_printf(s
, "\nprint fmt: \"0x%%lx\", REC->ret\n");
228 int syscall_enter_define_fields(struct ftrace_event_call
*call
)
230 struct syscall_trace_enter trace
;
231 struct syscall_metadata
*meta
= call
->data
;
234 int offset
= offsetof(typeof(trace
), args
);
236 ret
= trace_define_common_fields(call
);
240 ret
= trace_define_field(call
, SYSCALL_FIELD(int, nr
), FILTER_OTHER
);
244 for (i
= 0; i
< meta
->nb_args
; i
++) {
245 ret
= trace_define_field(call
, meta
->types
[i
],
246 meta
->args
[i
], offset
,
247 sizeof(unsigned long), 0,
249 offset
+= sizeof(unsigned long);
255 int syscall_exit_define_fields(struct ftrace_event_call
*call
)
257 struct syscall_trace_exit trace
;
260 ret
= trace_define_common_fields(call
);
264 ret
= trace_define_field(call
, SYSCALL_FIELD(int, nr
), FILTER_OTHER
);
268 ret
= trace_define_field(call
, SYSCALL_FIELD(long, ret
),
274 void ftrace_syscall_enter(struct pt_regs
*regs
, long id
)
276 struct syscall_trace_enter
*entry
;
277 struct syscall_metadata
*sys_data
;
278 struct ring_buffer_event
*event
;
279 struct ring_buffer
*buffer
;
283 syscall_nr
= syscall_get_nr(current
, regs
);
286 if (!test_bit(syscall_nr
, enabled_enter_syscalls
))
289 sys_data
= syscall_nr_to_meta(syscall_nr
);
293 size
= sizeof(*entry
) + sizeof(unsigned long) * sys_data
->nb_args
;
295 event
= trace_current_buffer_lock_reserve(&buffer
,
296 sys_data
->enter_event
->id
, size
, 0, 0);
300 entry
= ring_buffer_event_data(event
);
301 entry
->nr
= syscall_nr
;
302 syscall_get_arguments(current
, regs
, 0, sys_data
->nb_args
, entry
->args
);
304 if (!filter_current_check_discard(buffer
, sys_data
->enter_event
,
306 trace_current_buffer_unlock_commit(buffer
, event
, 0, 0);
309 void ftrace_syscall_exit(struct pt_regs
*regs
, long ret
)
311 struct syscall_trace_exit
*entry
;
312 struct syscall_metadata
*sys_data
;
313 struct ring_buffer_event
*event
;
314 struct ring_buffer
*buffer
;
317 syscall_nr
= syscall_get_nr(current
, regs
);
320 if (!test_bit(syscall_nr
, enabled_exit_syscalls
))
323 sys_data
= syscall_nr_to_meta(syscall_nr
);
327 event
= trace_current_buffer_lock_reserve(&buffer
,
328 sys_data
->exit_event
->id
, sizeof(*entry
), 0, 0);
332 entry
= ring_buffer_event_data(event
);
333 entry
->nr
= syscall_nr
;
334 entry
->ret
= syscall_get_return_value(current
, regs
);
336 if (!filter_current_check_discard(buffer
, sys_data
->exit_event
,
338 trace_current_buffer_unlock_commit(buffer
, event
, 0, 0);
341 int reg_event_syscall_enter(struct ftrace_event_call
*call
)
347 name
= ((struct syscall_metadata
*)call
->data
)->name
;
348 num
= syscall_name_to_nr(name
);
349 if (num
< 0 || num
>= NR_syscalls
)
351 mutex_lock(&syscall_trace_lock
);
352 if (!sys_refcount_enter
)
353 ret
= register_trace_sys_enter(ftrace_syscall_enter
);
355 pr_info("event trace: Could not activate"
356 "syscall entry trace point");
358 set_bit(num
, enabled_enter_syscalls
);
359 sys_refcount_enter
++;
361 mutex_unlock(&syscall_trace_lock
);
365 void unreg_event_syscall_enter(struct ftrace_event_call
*call
)
370 name
= ((struct syscall_metadata
*)call
->data
)->name
;
371 num
= syscall_name_to_nr(name
);
372 if (num
< 0 || num
>= NR_syscalls
)
374 mutex_lock(&syscall_trace_lock
);
375 sys_refcount_enter
--;
376 clear_bit(num
, enabled_enter_syscalls
);
377 if (!sys_refcount_enter
)
378 unregister_trace_sys_enter(ftrace_syscall_enter
);
379 mutex_unlock(&syscall_trace_lock
);
382 int reg_event_syscall_exit(struct ftrace_event_call
*call
)
388 name
= ((struct syscall_metadata
*)call
->data
)->name
;
389 num
= syscall_name_to_nr(name
);
390 if (num
< 0 || num
>= NR_syscalls
)
392 mutex_lock(&syscall_trace_lock
);
393 if (!sys_refcount_exit
)
394 ret
= register_trace_sys_exit(ftrace_syscall_exit
);
396 pr_info("event trace: Could not activate"
397 "syscall exit trace point");
399 set_bit(num
, enabled_exit_syscalls
);
402 mutex_unlock(&syscall_trace_lock
);
406 void unreg_event_syscall_exit(struct ftrace_event_call
*call
)
411 name
= ((struct syscall_metadata
*)call
->data
)->name
;
412 num
= syscall_name_to_nr(name
);
413 if (num
< 0 || num
>= NR_syscalls
)
415 mutex_lock(&syscall_trace_lock
);
417 clear_bit(num
, enabled_exit_syscalls
);
418 if (!sys_refcount_exit
)
419 unregister_trace_sys_exit(ftrace_syscall_exit
);
420 mutex_unlock(&syscall_trace_lock
);
423 int __init
init_ftrace_syscalls(void)
425 struct syscall_metadata
*meta
;
429 syscalls_metadata
= kzalloc(sizeof(*syscalls_metadata
) *
430 NR_syscalls
, GFP_KERNEL
);
431 if (!syscalls_metadata
) {
436 for (i
= 0; i
< NR_syscalls
; i
++) {
437 addr
= arch_syscall_addr(i
);
438 meta
= find_syscall_meta(addr
);
439 syscalls_metadata
[i
] = meta
;
444 core_initcall(init_ftrace_syscalls
);
446 #ifdef CONFIG_EVENT_PROFILE
448 static DECLARE_BITMAP(enabled_prof_enter_syscalls
, NR_syscalls
);
449 static DECLARE_BITMAP(enabled_prof_exit_syscalls
, NR_syscalls
);
450 static int sys_prof_refcount_enter
;
451 static int sys_prof_refcount_exit
;
453 static void prof_syscall_enter(struct pt_regs
*regs
, long id
)
455 struct syscall_metadata
*sys_data
;
456 struct syscall_trace_enter
*rec
;
465 syscall_nr
= syscall_get_nr(current
, regs
);
466 if (!test_bit(syscall_nr
, enabled_prof_enter_syscalls
))
469 sys_data
= syscall_nr_to_meta(syscall_nr
);
473 /* get the size after alignment with the u32 buffer size field */
474 size
= sizeof(unsigned long) * sys_data
->nb_args
+ sizeof(*rec
);
475 size
= ALIGN(size
+ sizeof(u32
), sizeof(u64
));
478 if (WARN_ONCE(size
> FTRACE_MAX_PROFILE_SIZE
,
479 "profile buffer not large enough"))
482 /* Protect the per cpu buffer, begin the rcu read side */
483 local_irq_save(flags
);
485 rctx
= perf_swevent_get_recursion_context();
489 cpu
= smp_processor_id();
491 trace_buf
= rcu_dereference(perf_trace_buf
);
496 raw_data
= per_cpu_ptr(trace_buf
, cpu
);
498 /* zero the dead bytes from align to not leak stack to user */
499 *(u64
*)(&raw_data
[size
- sizeof(u64
)]) = 0ULL;
501 rec
= (struct syscall_trace_enter
*) raw_data
;
502 tracing_generic_entry_update(&rec
->ent
, 0, 0);
503 rec
->ent
.type
= sys_data
->enter_event
->id
;
504 rec
->nr
= syscall_nr
;
505 syscall_get_arguments(current
, regs
, 0, sys_data
->nb_args
,
506 (unsigned long *)&rec
->args
);
507 perf_tp_event(sys_data
->enter_event
->id
, 0, 1, rec
, size
);
510 perf_swevent_put_recursion_context(rctx
);
512 local_irq_restore(flags
);
515 int reg_prof_syscall_enter(char *name
)
520 num
= syscall_name_to_nr(name
);
521 if (num
< 0 || num
>= NR_syscalls
)
524 mutex_lock(&syscall_trace_lock
);
525 if (!sys_prof_refcount_enter
)
526 ret
= register_trace_sys_enter(prof_syscall_enter
);
528 pr_info("event trace: Could not activate"
529 "syscall entry trace point");
531 set_bit(num
, enabled_prof_enter_syscalls
);
532 sys_prof_refcount_enter
++;
534 mutex_unlock(&syscall_trace_lock
);
538 void unreg_prof_syscall_enter(char *name
)
542 num
= syscall_name_to_nr(name
);
543 if (num
< 0 || num
>= NR_syscalls
)
546 mutex_lock(&syscall_trace_lock
);
547 sys_prof_refcount_enter
--;
548 clear_bit(num
, enabled_prof_enter_syscalls
);
549 if (!sys_prof_refcount_enter
)
550 unregister_trace_sys_enter(prof_syscall_enter
);
551 mutex_unlock(&syscall_trace_lock
);
554 static void prof_syscall_exit(struct pt_regs
*regs
, long ret
)
556 struct syscall_metadata
*sys_data
;
557 struct syscall_trace_exit
*rec
;
566 syscall_nr
= syscall_get_nr(current
, regs
);
567 if (!test_bit(syscall_nr
, enabled_prof_exit_syscalls
))
570 sys_data
= syscall_nr_to_meta(syscall_nr
);
574 /* We can probably do that at build time */
575 size
= ALIGN(sizeof(*rec
) + sizeof(u32
), sizeof(u64
));
579 * Impossible, but be paranoid with the future
580 * How to put this check outside runtime?
582 if (WARN_ONCE(size
> FTRACE_MAX_PROFILE_SIZE
,
583 "exit event has grown above profile buffer size"))
586 /* Protect the per cpu buffer, begin the rcu read side */
587 local_irq_save(flags
);
589 rctx
= perf_swevent_get_recursion_context();
593 cpu
= smp_processor_id();
595 trace_buf
= rcu_dereference(perf_trace_buf
);
600 raw_data
= per_cpu_ptr(trace_buf
, cpu
);
602 /* zero the dead bytes from align to not leak stack to user */
603 *(u64
*)(&raw_data
[size
- sizeof(u64
)]) = 0ULL;
605 rec
= (struct syscall_trace_exit
*)raw_data
;
607 tracing_generic_entry_update(&rec
->ent
, 0, 0);
608 rec
->ent
.type
= sys_data
->exit_event
->id
;
609 rec
->nr
= syscall_nr
;
610 rec
->ret
= syscall_get_return_value(current
, regs
);
612 perf_tp_event(sys_data
->exit_event
->id
, 0, 1, rec
, size
);
615 perf_swevent_put_recursion_context(rctx
);
617 local_irq_restore(flags
);
620 int reg_prof_syscall_exit(char *name
)
625 num
= syscall_name_to_nr(name
);
626 if (num
< 0 || num
>= NR_syscalls
)
629 mutex_lock(&syscall_trace_lock
);
630 if (!sys_prof_refcount_exit
)
631 ret
= register_trace_sys_exit(prof_syscall_exit
);
633 pr_info("event trace: Could not activate"
634 "syscall entry trace point");
636 set_bit(num
, enabled_prof_exit_syscalls
);
637 sys_prof_refcount_exit
++;
639 mutex_unlock(&syscall_trace_lock
);
643 void unreg_prof_syscall_exit(char *name
)
647 num
= syscall_name_to_nr(name
);
648 if (num
< 0 || num
>= NR_syscalls
)
651 mutex_lock(&syscall_trace_lock
);
652 sys_prof_refcount_exit
--;
653 clear_bit(num
, enabled_prof_exit_syscalls
);
654 if (!sys_prof_refcount_exit
)
655 unregister_trace_sys_exit(prof_syscall_exit
);
656 mutex_unlock(&syscall_trace_lock
);