27eb18d69222a7d3dafbb25f8bfef07145c3a0f7
[deliverable/linux.git] / kernel / trace / trace_syscalls.c
1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/kernel.h>
4 #include <linux/ftrace.h>
5 #include <linux/perf_event.h>
6 #include <asm/syscall.h>
7
8 #include "trace_output.h"
9 #include "trace.h"
10
11 static DEFINE_MUTEX(syscall_trace_lock);
12 static int sys_refcount_enter;
13 static int sys_refcount_exit;
14 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
15 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
16
17 extern unsigned long __start_syscalls_metadata[];
18 extern unsigned long __stop_syscalls_metadata[];
19
20 static struct syscall_metadata **syscalls_metadata;
21
22 static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
23 {
24 struct syscall_metadata *start;
25 struct syscall_metadata *stop;
26 char str[KSYM_SYMBOL_LEN];
27
28
29 start = (struct syscall_metadata *)__start_syscalls_metadata;
30 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
31 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
32
33 for ( ; start < stop; start++) {
34 /*
35 * Only compare after the "sys" prefix. Archs that use
36 * syscall wrappers may have syscalls symbols aliases prefixed
37 * with "SyS" instead of "sys", leading to an unwanted
38 * mismatch.
39 */
40 if (start->name && !strcmp(start->name + 3, str + 3))
41 return start;
42 }
43 return NULL;
44 }
45
46 static struct syscall_metadata *syscall_nr_to_meta(int nr)
47 {
48 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
49 return NULL;
50
51 return syscalls_metadata[nr];
52 }
53
54 int syscall_name_to_nr(const char *name)
55 {
56 int i;
57
58 if (!syscalls_metadata)
59 return -1;
60
61 for (i = 0; i < NR_syscalls; i++) {
62 if (syscalls_metadata[i]) {
63 if (!strcmp(syscalls_metadata[i]->name, name))
64 return i;
65 }
66 }
67 return -1;
68 }
69
70 enum print_line_t
71 print_syscall_enter(struct trace_iterator *iter, int flags)
72 {
73 struct trace_seq *s = &iter->seq;
74 struct trace_entry *ent = iter->ent;
75 struct syscall_trace_enter *trace;
76 struct syscall_metadata *entry;
77 int i, ret, syscall;
78
79 trace = (typeof(trace))ent;
80 syscall = trace->nr;
81 entry = syscall_nr_to_meta(syscall);
82
83 if (!entry)
84 goto end;
85
86 if (entry->enter_event->id != ent->type) {
87 WARN_ON_ONCE(1);
88 goto end;
89 }
90
91 ret = trace_seq_printf(s, "%s(", entry->name);
92 if (!ret)
93 return TRACE_TYPE_PARTIAL_LINE;
94
95 for (i = 0; i < entry->nb_args; i++) {
96 /* parameter types */
97 if (trace_flags & TRACE_ITER_VERBOSE) {
98 ret = trace_seq_printf(s, "%s ", entry->types[i]);
99 if (!ret)
100 return TRACE_TYPE_PARTIAL_LINE;
101 }
102 /* parameter values */
103 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
104 trace->args[i],
105 i == entry->nb_args - 1 ? "" : ", ");
106 if (!ret)
107 return TRACE_TYPE_PARTIAL_LINE;
108 }
109
110 ret = trace_seq_putc(s, ')');
111 if (!ret)
112 return TRACE_TYPE_PARTIAL_LINE;
113
114 end:
115 ret = trace_seq_putc(s, '\n');
116 if (!ret)
117 return TRACE_TYPE_PARTIAL_LINE;
118
119 return TRACE_TYPE_HANDLED;
120 }
121
122 enum print_line_t
123 print_syscall_exit(struct trace_iterator *iter, int flags)
124 {
125 struct trace_seq *s = &iter->seq;
126 struct trace_entry *ent = iter->ent;
127 struct syscall_trace_exit *trace;
128 int syscall;
129 struct syscall_metadata *entry;
130 int ret;
131
132 trace = (typeof(trace))ent;
133 syscall = trace->nr;
134 entry = syscall_nr_to_meta(syscall);
135
136 if (!entry) {
137 trace_seq_printf(s, "\n");
138 return TRACE_TYPE_HANDLED;
139 }
140
141 if (entry->exit_event->id != ent->type) {
142 WARN_ON_ONCE(1);
143 return TRACE_TYPE_UNHANDLED;
144 }
145
146 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
147 trace->ret);
148 if (!ret)
149 return TRACE_TYPE_PARTIAL_LINE;
150
151 return TRACE_TYPE_HANDLED;
152 }
153
154 extern char *__bad_type_size(void);
155
156 #define SYSCALL_FIELD(type, name) \
157 sizeof(type) != sizeof(trace.name) ? \
158 __bad_type_size() : \
159 #type, #name, offsetof(typeof(trace), name), \
160 sizeof(trace.name), is_signed_type(type)
161
162 int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
163 {
164 int i;
165 int ret;
166 struct syscall_metadata *entry = call->data;
167 struct syscall_trace_enter trace;
168 int offset = offsetof(struct syscall_trace_enter, args);
169
170 ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
171 "\tsigned:%u;\n",
172 SYSCALL_FIELD(int, nr));
173 if (!ret)
174 return 0;
175
176 for (i = 0; i < entry->nb_args; i++) {
177 ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
178 entry->args[i]);
179 if (!ret)
180 return 0;
181 ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
182 "\tsigned:%u;\n", offset,
183 sizeof(unsigned long),
184 is_signed_type(unsigned long));
185 if (!ret)
186 return 0;
187 offset += sizeof(unsigned long);
188 }
189
190 trace_seq_puts(s, "\nprint fmt: \"");
191 for (i = 0; i < entry->nb_args; i++) {
192 ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
193 sizeof(unsigned long),
194 i == entry->nb_args - 1 ? "" : ", ");
195 if (!ret)
196 return 0;
197 }
198 trace_seq_putc(s, '"');
199
200 for (i = 0; i < entry->nb_args; i++) {
201 ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
202 entry->args[i]);
203 if (!ret)
204 return 0;
205 }
206
207 return trace_seq_putc(s, '\n');
208 }
209
210 int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
211 {
212 int ret;
213 struct syscall_trace_exit trace;
214
215 ret = trace_seq_printf(s,
216 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
217 "\tsigned:%u;\n"
218 "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
219 "\tsigned:%u;\n",
220 SYSCALL_FIELD(int, nr),
221 SYSCALL_FIELD(long, ret));
222 if (!ret)
223 return 0;
224
225 return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
226 }
227
228 int syscall_enter_define_fields(struct ftrace_event_call *call)
229 {
230 struct syscall_trace_enter trace;
231 struct syscall_metadata *meta = call->data;
232 int ret;
233 int i;
234 int offset = offsetof(typeof(trace), args);
235
236 ret = trace_define_common_fields(call);
237 if (ret)
238 return ret;
239
240 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
241 if (ret)
242 return ret;
243
244 for (i = 0; i < meta->nb_args; i++) {
245 ret = trace_define_field(call, meta->types[i],
246 meta->args[i], offset,
247 sizeof(unsigned long), 0,
248 FILTER_OTHER);
249 offset += sizeof(unsigned long);
250 }
251
252 return ret;
253 }
254
255 int syscall_exit_define_fields(struct ftrace_event_call *call)
256 {
257 struct syscall_trace_exit trace;
258 int ret;
259
260 ret = trace_define_common_fields(call);
261 if (ret)
262 return ret;
263
264 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
265 if (ret)
266 return ret;
267
268 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
269 FILTER_OTHER);
270
271 return ret;
272 }
273
274 void ftrace_syscall_enter(struct pt_regs *regs, long id)
275 {
276 struct syscall_trace_enter *entry;
277 struct syscall_metadata *sys_data;
278 struct ring_buffer_event *event;
279 struct ring_buffer *buffer;
280 int size;
281 int syscall_nr;
282
283 syscall_nr = syscall_get_nr(current, regs);
284 if (syscall_nr < 0)
285 return;
286 if (!test_bit(syscall_nr, enabled_enter_syscalls))
287 return;
288
289 sys_data = syscall_nr_to_meta(syscall_nr);
290 if (!sys_data)
291 return;
292
293 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
294
295 event = trace_current_buffer_lock_reserve(&buffer,
296 sys_data->enter_event->id, size, 0, 0);
297 if (!event)
298 return;
299
300 entry = ring_buffer_event_data(event);
301 entry->nr = syscall_nr;
302 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
303
304 if (!filter_current_check_discard(buffer, sys_data->enter_event,
305 entry, event))
306 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
307 }
308
309 void ftrace_syscall_exit(struct pt_regs *regs, long ret)
310 {
311 struct syscall_trace_exit *entry;
312 struct syscall_metadata *sys_data;
313 struct ring_buffer_event *event;
314 struct ring_buffer *buffer;
315 int syscall_nr;
316
317 syscall_nr = syscall_get_nr(current, regs);
318 if (syscall_nr < 0)
319 return;
320 if (!test_bit(syscall_nr, enabled_exit_syscalls))
321 return;
322
323 sys_data = syscall_nr_to_meta(syscall_nr);
324 if (!sys_data)
325 return;
326
327 event = trace_current_buffer_lock_reserve(&buffer,
328 sys_data->exit_event->id, sizeof(*entry), 0, 0);
329 if (!event)
330 return;
331
332 entry = ring_buffer_event_data(event);
333 entry->nr = syscall_nr;
334 entry->ret = syscall_get_return_value(current, regs);
335
336 if (!filter_current_check_discard(buffer, sys_data->exit_event,
337 entry, event))
338 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
339 }
340
341 int reg_event_syscall_enter(struct ftrace_event_call *call)
342 {
343 int ret = 0;
344 int num;
345 const char *name;
346
347 name = ((struct syscall_metadata *)call->data)->name;
348 num = syscall_name_to_nr(name);
349 if (num < 0 || num >= NR_syscalls)
350 return -ENOSYS;
351 mutex_lock(&syscall_trace_lock);
352 if (!sys_refcount_enter)
353 ret = register_trace_sys_enter(ftrace_syscall_enter);
354 if (ret) {
355 pr_info("event trace: Could not activate"
356 "syscall entry trace point");
357 } else {
358 set_bit(num, enabled_enter_syscalls);
359 sys_refcount_enter++;
360 }
361 mutex_unlock(&syscall_trace_lock);
362 return ret;
363 }
364
365 void unreg_event_syscall_enter(struct ftrace_event_call *call)
366 {
367 int num;
368 const char *name;
369
370 name = ((struct syscall_metadata *)call->data)->name;
371 num = syscall_name_to_nr(name);
372 if (num < 0 || num >= NR_syscalls)
373 return;
374 mutex_lock(&syscall_trace_lock);
375 sys_refcount_enter--;
376 clear_bit(num, enabled_enter_syscalls);
377 if (!sys_refcount_enter)
378 unregister_trace_sys_enter(ftrace_syscall_enter);
379 mutex_unlock(&syscall_trace_lock);
380 }
381
382 int reg_event_syscall_exit(struct ftrace_event_call *call)
383 {
384 int ret = 0;
385 int num;
386 const char *name;
387
388 name = ((struct syscall_metadata *)call->data)->name;
389 num = syscall_name_to_nr(name);
390 if (num < 0 || num >= NR_syscalls)
391 return -ENOSYS;
392 mutex_lock(&syscall_trace_lock);
393 if (!sys_refcount_exit)
394 ret = register_trace_sys_exit(ftrace_syscall_exit);
395 if (ret) {
396 pr_info("event trace: Could not activate"
397 "syscall exit trace point");
398 } else {
399 set_bit(num, enabled_exit_syscalls);
400 sys_refcount_exit++;
401 }
402 mutex_unlock(&syscall_trace_lock);
403 return ret;
404 }
405
406 void unreg_event_syscall_exit(struct ftrace_event_call *call)
407 {
408 int num;
409 const char *name;
410
411 name = ((struct syscall_metadata *)call->data)->name;
412 num = syscall_name_to_nr(name);
413 if (num < 0 || num >= NR_syscalls)
414 return;
415 mutex_lock(&syscall_trace_lock);
416 sys_refcount_exit--;
417 clear_bit(num, enabled_exit_syscalls);
418 if (!sys_refcount_exit)
419 unregister_trace_sys_exit(ftrace_syscall_exit);
420 mutex_unlock(&syscall_trace_lock);
421 }
422
423 int __init init_ftrace_syscalls(void)
424 {
425 struct syscall_metadata *meta;
426 unsigned long addr;
427 int i;
428
429 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
430 NR_syscalls, GFP_KERNEL);
431 if (!syscalls_metadata) {
432 WARN_ON(1);
433 return -ENOMEM;
434 }
435
436 for (i = 0; i < NR_syscalls; i++) {
437 addr = arch_syscall_addr(i);
438 meta = find_syscall_meta(addr);
439 syscalls_metadata[i] = meta;
440 }
441
442 return 0;
443 }
444 core_initcall(init_ftrace_syscalls);
445
446 #ifdef CONFIG_EVENT_PROFILE
447
448 static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
449 static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
450 static int sys_prof_refcount_enter;
451 static int sys_prof_refcount_exit;
452
453 static void prof_syscall_enter(struct pt_regs *regs, long id)
454 {
455 struct syscall_metadata *sys_data;
456 struct syscall_trace_enter *rec;
457 unsigned long flags;
458 char *trace_buf;
459 char *raw_data;
460 int syscall_nr;
461 int rctx;
462 int size;
463 int cpu;
464
465 syscall_nr = syscall_get_nr(current, regs);
466 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
467 return;
468
469 sys_data = syscall_nr_to_meta(syscall_nr);
470 if (!sys_data)
471 return;
472
473 /* get the size after alignment with the u32 buffer size field */
474 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
475 size = ALIGN(size + sizeof(u32), sizeof(u64));
476 size -= sizeof(u32);
477
478 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
479 "profile buffer not large enough"))
480 return;
481
482 /* Protect the per cpu buffer, begin the rcu read side */
483 local_irq_save(flags);
484
485 rctx = perf_swevent_get_recursion_context();
486 if (rctx < 0)
487 goto end_recursion;
488
489 cpu = smp_processor_id();
490
491 trace_buf = rcu_dereference(perf_trace_buf);
492
493 if (!trace_buf)
494 goto end;
495
496 raw_data = per_cpu_ptr(trace_buf, cpu);
497
498 /* zero the dead bytes from align to not leak stack to user */
499 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
500
501 rec = (struct syscall_trace_enter *) raw_data;
502 tracing_generic_entry_update(&rec->ent, 0, 0);
503 rec->ent.type = sys_data->enter_event->id;
504 rec->nr = syscall_nr;
505 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
506 (unsigned long *)&rec->args);
507 perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size);
508
509 end:
510 perf_swevent_put_recursion_context(rctx);
511 end_recursion:
512 local_irq_restore(flags);
513 }
514
515 int reg_prof_syscall_enter(char *name)
516 {
517 int ret = 0;
518 int num;
519
520 num = syscall_name_to_nr(name);
521 if (num < 0 || num >= NR_syscalls)
522 return -ENOSYS;
523
524 mutex_lock(&syscall_trace_lock);
525 if (!sys_prof_refcount_enter)
526 ret = register_trace_sys_enter(prof_syscall_enter);
527 if (ret) {
528 pr_info("event trace: Could not activate"
529 "syscall entry trace point");
530 } else {
531 set_bit(num, enabled_prof_enter_syscalls);
532 sys_prof_refcount_enter++;
533 }
534 mutex_unlock(&syscall_trace_lock);
535 return ret;
536 }
537
538 void unreg_prof_syscall_enter(char *name)
539 {
540 int num;
541
542 num = syscall_name_to_nr(name);
543 if (num < 0 || num >= NR_syscalls)
544 return;
545
546 mutex_lock(&syscall_trace_lock);
547 sys_prof_refcount_enter--;
548 clear_bit(num, enabled_prof_enter_syscalls);
549 if (!sys_prof_refcount_enter)
550 unregister_trace_sys_enter(prof_syscall_enter);
551 mutex_unlock(&syscall_trace_lock);
552 }
553
554 static void prof_syscall_exit(struct pt_regs *regs, long ret)
555 {
556 struct syscall_metadata *sys_data;
557 struct syscall_trace_exit *rec;
558 unsigned long flags;
559 int syscall_nr;
560 char *trace_buf;
561 char *raw_data;
562 int rctx;
563 int size;
564 int cpu;
565
566 syscall_nr = syscall_get_nr(current, regs);
567 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
568 return;
569
570 sys_data = syscall_nr_to_meta(syscall_nr);
571 if (!sys_data)
572 return;
573
574 /* We can probably do that at build time */
575 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
576 size -= sizeof(u32);
577
578 /*
579 * Impossible, but be paranoid with the future
580 * How to put this check outside runtime?
581 */
582 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
583 "exit event has grown above profile buffer size"))
584 return;
585
586 /* Protect the per cpu buffer, begin the rcu read side */
587 local_irq_save(flags);
588
589 rctx = perf_swevent_get_recursion_context();
590 if (rctx < 0)
591 goto end_recursion;
592
593 cpu = smp_processor_id();
594
595 trace_buf = rcu_dereference(perf_trace_buf);
596
597 if (!trace_buf)
598 goto end;
599
600 raw_data = per_cpu_ptr(trace_buf, cpu);
601
602 /* zero the dead bytes from align to not leak stack to user */
603 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
604
605 rec = (struct syscall_trace_exit *)raw_data;
606
607 tracing_generic_entry_update(&rec->ent, 0, 0);
608 rec->ent.type = sys_data->exit_event->id;
609 rec->nr = syscall_nr;
610 rec->ret = syscall_get_return_value(current, regs);
611
612 perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size);
613
614 end:
615 perf_swevent_put_recursion_context(rctx);
616 end_recursion:
617 local_irq_restore(flags);
618 }
619
620 int reg_prof_syscall_exit(char *name)
621 {
622 int ret = 0;
623 int num;
624
625 num = syscall_name_to_nr(name);
626 if (num < 0 || num >= NR_syscalls)
627 return -ENOSYS;
628
629 mutex_lock(&syscall_trace_lock);
630 if (!sys_prof_refcount_exit)
631 ret = register_trace_sys_exit(prof_syscall_exit);
632 if (ret) {
633 pr_info("event trace: Could not activate"
634 "syscall entry trace point");
635 } else {
636 set_bit(num, enabled_prof_exit_syscalls);
637 sys_prof_refcount_exit++;
638 }
639 mutex_unlock(&syscall_trace_lock);
640 return ret;
641 }
642
643 void unreg_prof_syscall_exit(char *name)
644 {
645 int num;
646
647 num = syscall_name_to_nr(name);
648 if (num < 0 || num >= NR_syscalls)
649 return;
650
651 mutex_lock(&syscall_trace_lock);
652 sys_prof_refcount_exit--;
653 clear_bit(num, enabled_prof_exit_syscalls);
654 if (!sys_prof_refcount_exit)
655 unregister_trace_sys_exit(prof_syscall_exit);
656 mutex_unlock(&syscall_trace_lock);
657 }
658
659 #endif
660
661
This page took 0.066987 seconds and 4 git commands to generate.