1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/slab.h>
4 #include <linux/kernel.h>
5 #include <linux/ftrace.h>
6 #include <linux/perf_event.h>
7 #include <asm/syscall.h>
9 #include "trace_output.h"
12 static DEFINE_MUTEX(syscall_trace_lock
);
13 static int sys_refcount_enter
;
14 static int sys_refcount_exit
;
15 static DECLARE_BITMAP(enabled_enter_syscalls
, NR_syscalls
);
16 static DECLARE_BITMAP(enabled_exit_syscalls
, NR_syscalls
);
18 static int syscall_enter_register(struct ftrace_event_call
*event
,
20 static int syscall_exit_register(struct ftrace_event_call
*event
,
23 struct ftrace_event_class event_class_syscall_enter
= {
25 .reg
= syscall_enter_register
28 struct ftrace_event_class event_class_syscall_exit
= {
30 .reg
= syscall_exit_register
33 extern unsigned long __start_syscalls_metadata
[];
34 extern unsigned long __stop_syscalls_metadata
[];
36 static struct syscall_metadata
**syscalls_metadata
;
38 static struct syscall_metadata
*find_syscall_meta(unsigned long syscall
)
40 struct syscall_metadata
*start
;
41 struct syscall_metadata
*stop
;
42 char str
[KSYM_SYMBOL_LEN
];
45 start
= (struct syscall_metadata
*)__start_syscalls_metadata
;
46 stop
= (struct syscall_metadata
*)__stop_syscalls_metadata
;
47 kallsyms_lookup(syscall
, NULL
, NULL
, NULL
, str
);
49 for ( ; start
< stop
; start
++) {
51 * Only compare after the "sys" prefix. Archs that use
52 * syscall wrappers may have syscalls symbols aliases prefixed
53 * with "SyS" instead of "sys", leading to an unwanted
56 if (start
->name
&& !strcmp(start
->name
+ 3, str
+ 3))
62 static struct syscall_metadata
*syscall_nr_to_meta(int nr
)
64 if (!syscalls_metadata
|| nr
>= NR_syscalls
|| nr
< 0)
67 return syscalls_metadata
[nr
];
71 print_syscall_enter(struct trace_iterator
*iter
, int flags
)
73 struct trace_seq
*s
= &iter
->seq
;
74 struct trace_entry
*ent
= iter
->ent
;
75 struct syscall_trace_enter
*trace
;
76 struct syscall_metadata
*entry
;
79 trace
= (typeof(trace
))ent
;
81 entry
= syscall_nr_to_meta(syscall
);
86 if (entry
->enter_event
->id
!= ent
->type
) {
91 ret
= trace_seq_printf(s
, "%s(", entry
->name
);
93 return TRACE_TYPE_PARTIAL_LINE
;
95 for (i
= 0; i
< entry
->nb_args
; i
++) {
97 if (trace_flags
& TRACE_ITER_VERBOSE
) {
98 ret
= trace_seq_printf(s
, "%s ", entry
->types
[i
]);
100 return TRACE_TYPE_PARTIAL_LINE
;
102 /* parameter values */
103 ret
= trace_seq_printf(s
, "%s: %lx%s", entry
->args
[i
],
105 i
== entry
->nb_args
- 1 ? "" : ", ");
107 return TRACE_TYPE_PARTIAL_LINE
;
110 ret
= trace_seq_putc(s
, ')');
112 return TRACE_TYPE_PARTIAL_LINE
;
115 ret
= trace_seq_putc(s
, '\n');
117 return TRACE_TYPE_PARTIAL_LINE
;
119 return TRACE_TYPE_HANDLED
;
123 print_syscall_exit(struct trace_iterator
*iter
, int flags
)
125 struct trace_seq
*s
= &iter
->seq
;
126 struct trace_entry
*ent
= iter
->ent
;
127 struct syscall_trace_exit
*trace
;
129 struct syscall_metadata
*entry
;
132 trace
= (typeof(trace
))ent
;
134 entry
= syscall_nr_to_meta(syscall
);
137 trace_seq_printf(s
, "\n");
138 return TRACE_TYPE_HANDLED
;
141 if (entry
->exit_event
->id
!= ent
->type
) {
143 return TRACE_TYPE_UNHANDLED
;
146 ret
= trace_seq_printf(s
, "%s -> 0x%lx\n", entry
->name
,
149 return TRACE_TYPE_PARTIAL_LINE
;
151 return TRACE_TYPE_HANDLED
;
154 extern char *__bad_type_size(void);
156 #define SYSCALL_FIELD(type, name) \
157 sizeof(type) != sizeof(trace.name) ? \
158 __bad_type_size() : \
159 #type, #name, offsetof(typeof(trace), name), \
160 sizeof(trace.name), is_signed_type(type)
163 int __set_enter_print_fmt(struct syscall_metadata
*entry
, char *buf
, int len
)
168 /* When len=0, we just calculate the needed length */
169 #define LEN_OR_ZERO (len ? len - pos : 0)
171 pos
+= snprintf(buf
+ pos
, LEN_OR_ZERO
, "\"");
172 for (i
= 0; i
< entry
->nb_args
; i
++) {
173 pos
+= snprintf(buf
+ pos
, LEN_OR_ZERO
, "%s: 0x%%0%zulx%s",
174 entry
->args
[i
], sizeof(unsigned long),
175 i
== entry
->nb_args
- 1 ? "" : ", ");
177 pos
+= snprintf(buf
+ pos
, LEN_OR_ZERO
, "\"");
179 for (i
= 0; i
< entry
->nb_args
; i
++) {
180 pos
+= snprintf(buf
+ pos
, LEN_OR_ZERO
,
181 ", ((unsigned long)(REC->%s))", entry
->args
[i
]);
186 /* return the length of print_fmt */
190 static int set_syscall_print_fmt(struct ftrace_event_call
*call
)
194 struct syscall_metadata
*entry
= call
->data
;
196 if (entry
->enter_event
!= call
) {
197 call
->print_fmt
= "\"0x%lx\", REC->ret";
201 /* First: called with 0 length to calculate the needed length */
202 len
= __set_enter_print_fmt(entry
, NULL
, 0);
204 print_fmt
= kmalloc(len
+ 1, GFP_KERNEL
);
208 /* Second: actually write the @print_fmt */
209 __set_enter_print_fmt(entry
, print_fmt
, len
+ 1);
210 call
->print_fmt
= print_fmt
;
215 static void free_syscall_print_fmt(struct ftrace_event_call
*call
)
217 struct syscall_metadata
*entry
= call
->data
;
219 if (entry
->enter_event
== call
)
220 kfree(call
->print_fmt
);
223 int syscall_enter_define_fields(struct ftrace_event_call
*call
)
225 struct syscall_trace_enter trace
;
226 struct syscall_metadata
*meta
= call
->data
;
229 int offset
= offsetof(typeof(trace
), args
);
231 ret
= trace_define_field(call
, SYSCALL_FIELD(int, nr
), FILTER_OTHER
);
235 for (i
= 0; i
< meta
->nb_args
; i
++) {
236 ret
= trace_define_field(call
, meta
->types
[i
],
237 meta
->args
[i
], offset
,
238 sizeof(unsigned long), 0,
240 offset
+= sizeof(unsigned long);
246 int syscall_exit_define_fields(struct ftrace_event_call
*call
)
248 struct syscall_trace_exit trace
;
251 ret
= trace_define_field(call
, SYSCALL_FIELD(int, nr
), FILTER_OTHER
);
255 ret
= trace_define_field(call
, SYSCALL_FIELD(long, ret
),
261 void ftrace_syscall_enter(void *ignore
, struct pt_regs
*regs
, long id
)
263 struct syscall_trace_enter
*entry
;
264 struct syscall_metadata
*sys_data
;
265 struct ring_buffer_event
*event
;
266 struct ring_buffer
*buffer
;
270 syscall_nr
= syscall_get_nr(current
, regs
);
273 if (!test_bit(syscall_nr
, enabled_enter_syscalls
))
276 sys_data
= syscall_nr_to_meta(syscall_nr
);
280 size
= sizeof(*entry
) + sizeof(unsigned long) * sys_data
->nb_args
;
282 event
= trace_current_buffer_lock_reserve(&buffer
,
283 sys_data
->enter_event
->id
, size
, 0, 0);
287 entry
= ring_buffer_event_data(event
);
288 entry
->nr
= syscall_nr
;
289 syscall_get_arguments(current
, regs
, 0, sys_data
->nb_args
, entry
->args
);
291 if (!filter_current_check_discard(buffer
, sys_data
->enter_event
,
293 trace_current_buffer_unlock_commit(buffer
, event
, 0, 0);
296 void ftrace_syscall_exit(void *ignore
, struct pt_regs
*regs
, long ret
)
298 struct syscall_trace_exit
*entry
;
299 struct syscall_metadata
*sys_data
;
300 struct ring_buffer_event
*event
;
301 struct ring_buffer
*buffer
;
304 syscall_nr
= syscall_get_nr(current
, regs
);
307 if (!test_bit(syscall_nr
, enabled_exit_syscalls
))
310 sys_data
= syscall_nr_to_meta(syscall_nr
);
314 event
= trace_current_buffer_lock_reserve(&buffer
,
315 sys_data
->exit_event
->id
, sizeof(*entry
), 0, 0);
319 entry
= ring_buffer_event_data(event
);
320 entry
->nr
= syscall_nr
;
321 entry
->ret
= syscall_get_return_value(current
, regs
);
323 if (!filter_current_check_discard(buffer
, sys_data
->exit_event
,
325 trace_current_buffer_unlock_commit(buffer
, event
, 0, 0);
328 int reg_event_syscall_enter(struct ftrace_event_call
*call
)
333 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
334 if (num
< 0 || num
>= NR_syscalls
)
336 mutex_lock(&syscall_trace_lock
);
337 if (!sys_refcount_enter
)
338 ret
= register_trace_sys_enter(ftrace_syscall_enter
, NULL
);
340 set_bit(num
, enabled_enter_syscalls
);
341 sys_refcount_enter
++;
343 mutex_unlock(&syscall_trace_lock
);
347 void unreg_event_syscall_enter(struct ftrace_event_call
*call
)
351 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
352 if (num
< 0 || num
>= NR_syscalls
)
354 mutex_lock(&syscall_trace_lock
);
355 sys_refcount_enter
--;
356 clear_bit(num
, enabled_enter_syscalls
);
357 if (!sys_refcount_enter
)
358 unregister_trace_sys_enter(ftrace_syscall_enter
, NULL
);
359 mutex_unlock(&syscall_trace_lock
);
362 int reg_event_syscall_exit(struct ftrace_event_call
*call
)
367 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
368 if (num
< 0 || num
>= NR_syscalls
)
370 mutex_lock(&syscall_trace_lock
);
371 if (!sys_refcount_exit
)
372 ret
= register_trace_sys_exit(ftrace_syscall_exit
, NULL
);
374 set_bit(num
, enabled_exit_syscalls
);
377 mutex_unlock(&syscall_trace_lock
);
381 void unreg_event_syscall_exit(struct ftrace_event_call
*call
)
385 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
386 if (num
< 0 || num
>= NR_syscalls
)
388 mutex_lock(&syscall_trace_lock
);
390 clear_bit(num
, enabled_exit_syscalls
);
391 if (!sys_refcount_exit
)
392 unregister_trace_sys_exit(ftrace_syscall_exit
, NULL
);
393 mutex_unlock(&syscall_trace_lock
);
396 int init_syscall_trace(struct ftrace_event_call
*call
)
400 if (set_syscall_print_fmt(call
) < 0)
403 id
= trace_event_raw_init(call
);
406 free_syscall_print_fmt(call
);
413 unsigned long __init
arch_syscall_addr(int nr
)
415 return (unsigned long)sys_call_table
[nr
];
418 int __init
init_ftrace_syscalls(void)
420 struct syscall_metadata
*meta
;
424 syscalls_metadata
= kzalloc(sizeof(*syscalls_metadata
) *
425 NR_syscalls
, GFP_KERNEL
);
426 if (!syscalls_metadata
) {
431 for (i
= 0; i
< NR_syscalls
; i
++) {
432 addr
= arch_syscall_addr(i
);
433 meta
= find_syscall_meta(addr
);
437 meta
->syscall_nr
= i
;
438 syscalls_metadata
[i
] = meta
;
443 core_initcall(init_ftrace_syscalls
);
445 #ifdef CONFIG_PERF_EVENTS
447 static DECLARE_BITMAP(enabled_perf_enter_syscalls
, NR_syscalls
);
448 static DECLARE_BITMAP(enabled_perf_exit_syscalls
, NR_syscalls
);
449 static int sys_perf_refcount_enter
;
450 static int sys_perf_refcount_exit
;
452 static void perf_syscall_enter(void *ignore
, struct pt_regs
*regs
, long id
)
454 struct syscall_metadata
*sys_data
;
455 struct syscall_trace_enter
*rec
;
461 syscall_nr
= syscall_get_nr(current
, regs
);
462 if (!test_bit(syscall_nr
, enabled_perf_enter_syscalls
))
465 sys_data
= syscall_nr_to_meta(syscall_nr
);
469 /* get the size after alignment with the u32 buffer size field */
470 size
= sizeof(unsigned long) * sys_data
->nb_args
+ sizeof(*rec
);
471 size
= ALIGN(size
+ sizeof(u32
), sizeof(u64
));
474 if (WARN_ONCE(size
> PERF_MAX_TRACE_SIZE
,
475 "perf buffer not large enough"))
478 rec
= (struct syscall_trace_enter
*)perf_trace_buf_prepare(size
,
479 sys_data
->enter_event
->id
, &rctx
, &flags
);
483 rec
->nr
= syscall_nr
;
484 syscall_get_arguments(current
, regs
, 0, sys_data
->nb_args
,
485 (unsigned long *)&rec
->args
);
486 perf_trace_buf_submit(rec
, size
, rctx
, 0, 1, flags
, regs
);
489 int perf_sysenter_enable(struct ftrace_event_call
*call
)
494 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
496 mutex_lock(&syscall_trace_lock
);
497 if (!sys_perf_refcount_enter
)
498 ret
= register_trace_sys_enter(perf_syscall_enter
, NULL
);
500 pr_info("event trace: Could not activate"
501 "syscall entry trace point");
503 set_bit(num
, enabled_perf_enter_syscalls
);
504 sys_perf_refcount_enter
++;
506 mutex_unlock(&syscall_trace_lock
);
510 void perf_sysenter_disable(struct ftrace_event_call
*call
)
514 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
516 mutex_lock(&syscall_trace_lock
);
517 sys_perf_refcount_enter
--;
518 clear_bit(num
, enabled_perf_enter_syscalls
);
519 if (!sys_perf_refcount_enter
)
520 unregister_trace_sys_enter(perf_syscall_enter
, NULL
);
521 mutex_unlock(&syscall_trace_lock
);
524 static void perf_syscall_exit(void *ignore
, struct pt_regs
*regs
, long ret
)
526 struct syscall_metadata
*sys_data
;
527 struct syscall_trace_exit
*rec
;
533 syscall_nr
= syscall_get_nr(current
, regs
);
534 if (!test_bit(syscall_nr
, enabled_perf_exit_syscalls
))
537 sys_data
= syscall_nr_to_meta(syscall_nr
);
541 /* We can probably do that at build time */
542 size
= ALIGN(sizeof(*rec
) + sizeof(u32
), sizeof(u64
));
546 * Impossible, but be paranoid with the future
547 * How to put this check outside runtime?
549 if (WARN_ONCE(size
> PERF_MAX_TRACE_SIZE
,
550 "exit event has grown above perf buffer size"))
553 rec
= (struct syscall_trace_exit
*)perf_trace_buf_prepare(size
,
554 sys_data
->exit_event
->id
, &rctx
, &flags
);
558 rec
->nr
= syscall_nr
;
559 rec
->ret
= syscall_get_return_value(current
, regs
);
561 perf_trace_buf_submit(rec
, size
, rctx
, 0, 1, flags
, regs
);
564 int perf_sysexit_enable(struct ftrace_event_call
*call
)
569 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
571 mutex_lock(&syscall_trace_lock
);
572 if (!sys_perf_refcount_exit
)
573 ret
= register_trace_sys_exit(perf_syscall_exit
, NULL
);
575 pr_info("event trace: Could not activate"
576 "syscall exit trace point");
578 set_bit(num
, enabled_perf_exit_syscalls
);
579 sys_perf_refcount_exit
++;
581 mutex_unlock(&syscall_trace_lock
);
585 void perf_sysexit_disable(struct ftrace_event_call
*call
)
589 num
= ((struct syscall_metadata
*)call
->data
)->syscall_nr
;
591 mutex_lock(&syscall_trace_lock
);
592 sys_perf_refcount_exit
--;
593 clear_bit(num
, enabled_perf_exit_syscalls
);
594 if (!sys_perf_refcount_exit
)
595 unregister_trace_sys_exit(perf_syscall_exit
, NULL
);
596 mutex_unlock(&syscall_trace_lock
);
599 #endif /* CONFIG_PERF_EVENTS */
601 static int syscall_enter_register(struct ftrace_event_call
*event
,
605 case TRACE_REG_REGISTER
:
606 return reg_event_syscall_enter(event
);
607 case TRACE_REG_UNREGISTER
:
608 unreg_event_syscall_enter(event
);
611 #ifdef CONFIG_PERF_EVENTS
612 case TRACE_REG_PERF_REGISTER
:
613 return perf_sysenter_enable(event
);
614 case TRACE_REG_PERF_UNREGISTER
:
615 perf_sysenter_disable(event
);
622 static int syscall_exit_register(struct ftrace_event_call
*event
,
626 case TRACE_REG_REGISTER
:
627 return reg_event_syscall_exit(event
);
628 case TRACE_REG_UNREGISTER
:
629 unreg_event_syscall_exit(event
);
632 #ifdef CONFIG_PERF_EVENTS
633 case TRACE_REG_PERF_REGISTER
:
634 return perf_sysexit_enable(event
);
635 case TRACE_REG_PERF_UNREGISTER
:
636 perf_sysexit_disable(event
);