1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/kernel.h>
4 #include <linux/ftrace.h>
5 #include <linux/perf_event.h>
6 #include <asm/syscall.h>
8 #include "trace_output.h"
11 static DEFINE_MUTEX(syscall_trace_lock
);
12 static int sys_refcount_enter
;
13 static int sys_refcount_exit
;
14 static DECLARE_BITMAP(enabled_enter_syscalls
, NR_syscalls
);
15 static DECLARE_BITMAP(enabled_exit_syscalls
, NR_syscalls
);
17 extern unsigned long __start_syscalls_metadata
[];
18 extern unsigned long __stop_syscalls_metadata
[];
20 static struct syscall_metadata
**syscalls_metadata
;
22 static struct syscall_metadata
*find_syscall_meta(unsigned long syscall
)
24 struct syscall_metadata
*start
;
25 struct syscall_metadata
*stop
;
26 char str
[KSYM_SYMBOL_LEN
];
29 start
= (struct syscall_metadata
*)__start_syscalls_metadata
;
30 stop
= (struct syscall_metadata
*)__stop_syscalls_metadata
;
31 kallsyms_lookup(syscall
, NULL
, NULL
, NULL
, str
);
33 for ( ; start
< stop
; start
++) {
35 * Only compare after the "sys" prefix. Archs that use
36 * syscall wrappers may have syscalls symbols aliases prefixed
37 * with "SyS" instead of "sys", leading to an unwanted
40 if (start
->name
&& !strcmp(start
->name
+ 3, str
+ 3))
46 static struct syscall_metadata
*syscall_nr_to_meta(int nr
)
48 if (!syscalls_metadata
|| nr
>= NR_syscalls
|| nr
< 0)
51 return syscalls_metadata
[nr
];
54 int syscall_name_to_nr(char *name
)
58 if (!syscalls_metadata
)
61 for (i
= 0; i
< NR_syscalls
; i
++) {
62 if (syscalls_metadata
[i
]) {
63 if (!strcmp(syscalls_metadata
[i
]->name
, name
))
70 void set_syscall_enter_id(int num
, int id
)
72 syscalls_metadata
[num
]->enter_id
= id
;
75 void set_syscall_exit_id(int num
, int id
)
77 syscalls_metadata
[num
]->exit_id
= id
;
81 print_syscall_enter(struct trace_iterator
*iter
, int flags
)
83 struct trace_seq
*s
= &iter
->seq
;
84 struct trace_entry
*ent
= iter
->ent
;
85 struct syscall_trace_enter
*trace
;
86 struct syscall_metadata
*entry
;
89 trace
= (typeof(trace
))ent
;
91 entry
= syscall_nr_to_meta(syscall
);
96 if (entry
->enter_id
!= ent
->type
) {
101 ret
= trace_seq_printf(s
, "%s(", entry
->name
);
103 return TRACE_TYPE_PARTIAL_LINE
;
105 for (i
= 0; i
< entry
->nb_args
; i
++) {
106 /* parameter types */
107 if (trace_flags
& TRACE_ITER_VERBOSE
) {
108 ret
= trace_seq_printf(s
, "%s ", entry
->types
[i
]);
110 return TRACE_TYPE_PARTIAL_LINE
;
112 /* parameter values */
113 ret
= trace_seq_printf(s
, "%s: %lx%s", entry
->args
[i
],
115 i
== entry
->nb_args
- 1 ? "" : ", ");
117 return TRACE_TYPE_PARTIAL_LINE
;
120 ret
= trace_seq_putc(s
, ')');
122 return TRACE_TYPE_PARTIAL_LINE
;
125 ret
= trace_seq_putc(s
, '\n');
127 return TRACE_TYPE_PARTIAL_LINE
;
129 return TRACE_TYPE_HANDLED
;
133 print_syscall_exit(struct trace_iterator
*iter
, int flags
)
135 struct trace_seq
*s
= &iter
->seq
;
136 struct trace_entry
*ent
= iter
->ent
;
137 struct syscall_trace_exit
*trace
;
139 struct syscall_metadata
*entry
;
142 trace
= (typeof(trace
))ent
;
144 entry
= syscall_nr_to_meta(syscall
);
147 trace_seq_printf(s
, "\n");
148 return TRACE_TYPE_HANDLED
;
151 if (entry
->exit_id
!= ent
->type
) {
153 return TRACE_TYPE_UNHANDLED
;
156 ret
= trace_seq_printf(s
, "%s -> 0x%lx\n", entry
->name
,
159 return TRACE_TYPE_PARTIAL_LINE
;
161 return TRACE_TYPE_HANDLED
;
164 extern char *__bad_type_size(void);
166 #define SYSCALL_FIELD(type, name) \
167 sizeof(type) != sizeof(trace.name) ? \
168 __bad_type_size() : \
169 #type, #name, offsetof(typeof(trace), name), \
170 sizeof(trace.name), is_signed_type(type)
172 int syscall_enter_format(struct ftrace_event_call
*call
, struct trace_seq
*s
)
177 struct syscall_metadata
*entry
;
178 struct syscall_trace_enter trace
;
179 int offset
= offsetof(struct syscall_trace_enter
, args
);
181 nr
= syscall_name_to_nr(call
->data
);
182 entry
= syscall_nr_to_meta(nr
);
187 ret
= trace_seq_printf(s
, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
189 SYSCALL_FIELD(int, nr
));
193 for (i
= 0; i
< entry
->nb_args
; i
++) {
194 ret
= trace_seq_printf(s
, "\tfield:%s %s;", entry
->types
[i
],
198 ret
= trace_seq_printf(s
, "\toffset:%d;\tsize:%zu;"
199 "\tsigned:%u;\n", offset
,
200 sizeof(unsigned long),
201 is_signed_type(unsigned long));
204 offset
+= sizeof(unsigned long);
207 trace_seq_puts(s
, "\nprint fmt: \"");
208 for (i
= 0; i
< entry
->nb_args
; i
++) {
209 ret
= trace_seq_printf(s
, "%s: 0x%%0%zulx%s", entry
->args
[i
],
210 sizeof(unsigned long),
211 i
== entry
->nb_args
- 1 ? "" : ", ");
215 trace_seq_putc(s
, '"');
217 for (i
= 0; i
< entry
->nb_args
; i
++) {
218 ret
= trace_seq_printf(s
, ", ((unsigned long)(REC->%s))",
224 return trace_seq_putc(s
, '\n');
227 int syscall_exit_format(struct ftrace_event_call
*call
, struct trace_seq
*s
)
230 struct syscall_trace_exit trace
;
232 ret
= trace_seq_printf(s
,
233 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
235 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
237 SYSCALL_FIELD(int, nr
),
238 SYSCALL_FIELD(long, ret
));
242 return trace_seq_printf(s
, "\nprint fmt: \"0x%%lx\", REC->ret\n");
245 int syscall_enter_define_fields(struct ftrace_event_call
*call
)
247 struct syscall_trace_enter trace
;
248 struct syscall_metadata
*meta
;
252 int offset
= offsetof(typeof(trace
), args
);
254 nr
= syscall_name_to_nr(call
->data
);
255 meta
= syscall_nr_to_meta(nr
);
260 ret
= trace_define_common_fields(call
);
264 for (i
= 0; i
< meta
->nb_args
; i
++) {
265 ret
= trace_define_field(call
, meta
->types
[i
],
266 meta
->args
[i
], offset
,
267 sizeof(unsigned long), 0,
269 offset
+= sizeof(unsigned long);
275 int syscall_exit_define_fields(struct ftrace_event_call
*call
)
277 struct syscall_trace_exit trace
;
280 ret
= trace_define_common_fields(call
);
284 ret
= trace_define_field(call
, SYSCALL_FIELD(long, ret
),
290 void ftrace_syscall_enter(struct pt_regs
*regs
, long id
)
292 struct syscall_trace_enter
*entry
;
293 struct syscall_metadata
*sys_data
;
294 struct ring_buffer_event
*event
;
295 struct ring_buffer
*buffer
;
299 syscall_nr
= syscall_get_nr(current
, regs
);
302 if (!test_bit(syscall_nr
, enabled_enter_syscalls
))
305 sys_data
= syscall_nr_to_meta(syscall_nr
);
309 size
= sizeof(*entry
) + sizeof(unsigned long) * sys_data
->nb_args
;
311 event
= trace_current_buffer_lock_reserve(&buffer
, sys_data
->enter_id
,
316 entry
= ring_buffer_event_data(event
);
317 entry
->nr
= syscall_nr
;
318 syscall_get_arguments(current
, regs
, 0, sys_data
->nb_args
, entry
->args
);
320 if (!filter_current_check_discard(buffer
, sys_data
->enter_event
,
322 trace_current_buffer_unlock_commit(buffer
, event
, 0, 0);
325 void ftrace_syscall_exit(struct pt_regs
*regs
, long ret
)
327 struct syscall_trace_exit
*entry
;
328 struct syscall_metadata
*sys_data
;
329 struct ring_buffer_event
*event
;
330 struct ring_buffer
*buffer
;
333 syscall_nr
= syscall_get_nr(current
, regs
);
336 if (!test_bit(syscall_nr
, enabled_exit_syscalls
))
339 sys_data
= syscall_nr_to_meta(syscall_nr
);
343 event
= trace_current_buffer_lock_reserve(&buffer
, sys_data
->exit_id
,
344 sizeof(*entry
), 0, 0);
348 entry
= ring_buffer_event_data(event
);
349 entry
->nr
= syscall_nr
;
350 entry
->ret
= syscall_get_return_value(current
, regs
);
352 if (!filter_current_check_discard(buffer
, sys_data
->exit_event
,
354 trace_current_buffer_unlock_commit(buffer
, event
, 0, 0);
357 int reg_event_syscall_enter(struct ftrace_event_call
*call
)
363 name
= (char *)call
->data
;
364 num
= syscall_name_to_nr(name
);
365 if (num
< 0 || num
>= NR_syscalls
)
367 mutex_lock(&syscall_trace_lock
);
368 if (!sys_refcount_enter
)
369 ret
= register_trace_sys_enter(ftrace_syscall_enter
);
371 pr_info("event trace: Could not activate"
372 "syscall entry trace point");
374 set_bit(num
, enabled_enter_syscalls
);
375 sys_refcount_enter
++;
377 mutex_unlock(&syscall_trace_lock
);
381 void unreg_event_syscall_enter(struct ftrace_event_call
*call
)
386 name
= (char *)call
->data
;
387 num
= syscall_name_to_nr(name
);
388 if (num
< 0 || num
>= NR_syscalls
)
390 mutex_lock(&syscall_trace_lock
);
391 sys_refcount_enter
--;
392 clear_bit(num
, enabled_enter_syscalls
);
393 if (!sys_refcount_enter
)
394 unregister_trace_sys_enter(ftrace_syscall_enter
);
395 mutex_unlock(&syscall_trace_lock
);
398 int reg_event_syscall_exit(struct ftrace_event_call
*call
)
405 num
= syscall_name_to_nr(name
);
406 if (num
< 0 || num
>= NR_syscalls
)
408 mutex_lock(&syscall_trace_lock
);
409 if (!sys_refcount_exit
)
410 ret
= register_trace_sys_exit(ftrace_syscall_exit
);
412 pr_info("event trace: Could not activate"
413 "syscall exit trace point");
415 set_bit(num
, enabled_exit_syscalls
);
418 mutex_unlock(&syscall_trace_lock
);
422 void unreg_event_syscall_exit(struct ftrace_event_call
*call
)
428 num
= syscall_name_to_nr(name
);
429 if (num
< 0 || num
>= NR_syscalls
)
431 mutex_lock(&syscall_trace_lock
);
433 clear_bit(num
, enabled_exit_syscalls
);
434 if (!sys_refcount_exit
)
435 unregister_trace_sys_exit(ftrace_syscall_exit
);
436 mutex_unlock(&syscall_trace_lock
);
439 struct trace_event event_syscall_enter
= {
440 .trace
= print_syscall_enter
,
443 struct trace_event event_syscall_exit
= {
444 .trace
= print_syscall_exit
,
447 int __init
init_ftrace_syscalls(void)
449 struct syscall_metadata
*meta
;
453 syscalls_metadata
= kzalloc(sizeof(*syscalls_metadata
) *
454 NR_syscalls
, GFP_KERNEL
);
455 if (!syscalls_metadata
) {
460 for (i
= 0; i
< NR_syscalls
; i
++) {
461 addr
= arch_syscall_addr(i
);
462 meta
= find_syscall_meta(addr
);
463 syscalls_metadata
[i
] = meta
;
468 core_initcall(init_ftrace_syscalls
);
470 #ifdef CONFIG_EVENT_PROFILE
472 static DECLARE_BITMAP(enabled_prof_enter_syscalls
, NR_syscalls
);
473 static DECLARE_BITMAP(enabled_prof_exit_syscalls
, NR_syscalls
);
474 static int sys_prof_refcount_enter
;
475 static int sys_prof_refcount_exit
;
477 static void prof_syscall_enter(struct pt_regs
*regs
, long id
)
479 struct syscall_metadata
*sys_data
;
480 struct syscall_trace_enter
*rec
;
487 syscall_nr
= syscall_get_nr(current
, regs
);
488 if (!test_bit(syscall_nr
, enabled_prof_enter_syscalls
))
491 sys_data
= syscall_nr_to_meta(syscall_nr
);
495 /* get the size after alignment with the u32 buffer size field */
496 size
= sizeof(unsigned long) * sys_data
->nb_args
+ sizeof(*rec
);
497 size
= ALIGN(size
+ sizeof(u32
), sizeof(u64
));
500 if (WARN_ONCE(size
> FTRACE_MAX_PROFILE_SIZE
,
501 "profile buffer not large enough"))
504 /* Protect the per cpu buffer, begin the rcu read side */
505 local_irq_save(flags
);
507 cpu
= smp_processor_id();
510 raw_data
= rcu_dereference(trace_profile_buf_nmi
);
512 raw_data
= rcu_dereference(trace_profile_buf
);
517 raw_data
= per_cpu_ptr(raw_data
, cpu
);
519 /* zero the dead bytes from align to not leak stack to user */
520 *(u64
*)(&raw_data
[size
- sizeof(u64
)]) = 0ULL;
522 rec
= (struct syscall_trace_enter
*) raw_data
;
523 tracing_generic_entry_update(&rec
->ent
, 0, 0);
524 rec
->ent
.type
= sys_data
->enter_id
;
525 rec
->nr
= syscall_nr
;
526 syscall_get_arguments(current
, regs
, 0, sys_data
->nb_args
,
527 (unsigned long *)&rec
->args
);
528 perf_tp_event(sys_data
->enter_id
, 0, 1, rec
, size
);
531 local_irq_restore(flags
);
534 int reg_prof_syscall_enter(char *name
)
539 num
= syscall_name_to_nr(name
);
540 if (num
< 0 || num
>= NR_syscalls
)
543 mutex_lock(&syscall_trace_lock
);
544 if (!sys_prof_refcount_enter
)
545 ret
= register_trace_sys_enter(prof_syscall_enter
);
547 pr_info("event trace: Could not activate"
548 "syscall entry trace point");
550 set_bit(num
, enabled_prof_enter_syscalls
);
551 sys_prof_refcount_enter
++;
553 mutex_unlock(&syscall_trace_lock
);
557 void unreg_prof_syscall_enter(char *name
)
561 num
= syscall_name_to_nr(name
);
562 if (num
< 0 || num
>= NR_syscalls
)
565 mutex_lock(&syscall_trace_lock
);
566 sys_prof_refcount_enter
--;
567 clear_bit(num
, enabled_prof_enter_syscalls
);
568 if (!sys_prof_refcount_enter
)
569 unregister_trace_sys_enter(prof_syscall_enter
);
570 mutex_unlock(&syscall_trace_lock
);
573 static void prof_syscall_exit(struct pt_regs
*regs
, long ret
)
575 struct syscall_metadata
*sys_data
;
576 struct syscall_trace_exit
*rec
;
583 syscall_nr
= syscall_get_nr(current
, regs
);
584 if (!test_bit(syscall_nr
, enabled_prof_exit_syscalls
))
587 sys_data
= syscall_nr_to_meta(syscall_nr
);
591 /* We can probably do that at build time */
592 size
= ALIGN(sizeof(*rec
) + sizeof(u32
), sizeof(u64
));
596 * Impossible, but be paranoid with the future
597 * How to put this check outside runtime?
599 if (WARN_ONCE(size
> FTRACE_MAX_PROFILE_SIZE
,
600 "exit event has grown above profile buffer size"))
603 /* Protect the per cpu buffer, begin the rcu read side */
604 local_irq_save(flags
);
605 cpu
= smp_processor_id();
608 raw_data
= rcu_dereference(trace_profile_buf_nmi
);
610 raw_data
= rcu_dereference(trace_profile_buf
);
615 raw_data
= per_cpu_ptr(raw_data
, cpu
);
617 /* zero the dead bytes from align to not leak stack to user */
618 *(u64
*)(&raw_data
[size
- sizeof(u64
)]) = 0ULL;
620 rec
= (struct syscall_trace_exit
*)raw_data
;
622 tracing_generic_entry_update(&rec
->ent
, 0, 0);
623 rec
->ent
.type
= sys_data
->exit_id
;
624 rec
->nr
= syscall_nr
;
625 rec
->ret
= syscall_get_return_value(current
, regs
);
627 perf_tp_event(sys_data
->exit_id
, 0, 1, rec
, size
);
630 local_irq_restore(flags
);
633 int reg_prof_syscall_exit(char *name
)
638 num
= syscall_name_to_nr(name
);
639 if (num
< 0 || num
>= NR_syscalls
)
642 mutex_lock(&syscall_trace_lock
);
643 if (!sys_prof_refcount_exit
)
644 ret
= register_trace_sys_exit(prof_syscall_exit
);
646 pr_info("event trace: Could not activate"
647 "syscall entry trace point");
649 set_bit(num
, enabled_prof_exit_syscalls
);
650 sys_prof_refcount_exit
++;
652 mutex_unlock(&syscall_trace_lock
);
656 void unreg_prof_syscall_exit(char *name
)
660 num
= syscall_name_to_nr(name
);
661 if (num
< 0 || num
>= NR_syscalls
)
664 mutex_lock(&syscall_trace_lock
);
665 sys_prof_refcount_exit
--;
666 clear_bit(num
, enabled_prof_exit_syscalls
);
667 if (!sys_prof_refcount_exit
)
668 unregister_trace_sys_exit(prof_syscall_exit
);
669 mutex_unlock(&syscall_trace_lock
);