perf trace: Exclude the kernel part of the callchain leading to a syscall
[deliverable/linux.git] / tools / perf / builtin-trace.c
1 /*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39
40 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
41 #include <stdlib.h>
42 #include <sys/mman.h>
43 #include <linux/futex.h>
44 #include <linux/err.h>
45 #include <linux/seccomp.h>
46 #include <linux/filter.h>
47 #include <linux/audit.h>
48 #include <sys/ptrace.h>
49 #include <linux/random.h>
50
51 /* For older distros: */
52 #ifndef MAP_STACK
53 # define MAP_STACK 0x20000
54 #endif
55
56 #ifndef MADV_HWPOISON
57 # define MADV_HWPOISON 100
58
59 #endif
60
61 #ifndef MADV_MERGEABLE
62 # define MADV_MERGEABLE 12
63 #endif
64
65 #ifndef MADV_UNMERGEABLE
66 # define MADV_UNMERGEABLE 13
67 #endif
68
69 #ifndef EFD_SEMAPHORE
70 # define EFD_SEMAPHORE 1
71 #endif
72
73 #ifndef EFD_NONBLOCK
74 # define EFD_NONBLOCK 00004000
75 #endif
76
77 #ifndef EFD_CLOEXEC
78 # define EFD_CLOEXEC 02000000
79 #endif
80
81 #ifndef O_CLOEXEC
82 # define O_CLOEXEC 02000000
83 #endif
84
85 #ifndef SOCK_DCCP
86 # define SOCK_DCCP 6
87 #endif
88
89 #ifndef SOCK_CLOEXEC
90 # define SOCK_CLOEXEC 02000000
91 #endif
92
93 #ifndef SOCK_NONBLOCK
94 # define SOCK_NONBLOCK 00004000
95 #endif
96
97 #ifndef MSG_CMSG_CLOEXEC
98 # define MSG_CMSG_CLOEXEC 0x40000000
99 #endif
100
101 #ifndef PERF_FLAG_FD_NO_GROUP
102 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
103 #endif
104
105 #ifndef PERF_FLAG_FD_OUTPUT
106 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
107 #endif
108
109 #ifndef PERF_FLAG_PID_CGROUP
110 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
111 #endif
112
113 #ifndef PERF_FLAG_FD_CLOEXEC
114 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
115 #endif
116
117 struct trace {
118 struct perf_tool tool;
119 struct syscalltbl *sctbl;
120 struct {
121 int max;
122 struct syscall *table;
123 struct {
124 struct perf_evsel *sys_enter,
125 *sys_exit;
126 } events;
127 } syscalls;
128 struct record_opts opts;
129 struct perf_evlist *evlist;
130 struct machine *host;
131 struct thread *current;
132 u64 base_time;
133 FILE *output;
134 unsigned long nr_events;
135 struct strlist *ev_qualifier;
136 struct {
137 size_t nr;
138 int *entries;
139 } ev_qualifier_ids;
140 struct intlist *tid_list;
141 struct intlist *pid_list;
142 struct {
143 size_t nr;
144 pid_t *entries;
145 } filter_pids;
146 double duration_filter;
147 double runtime_ms;
148 struct {
149 u64 vfs_getname,
150 proc_getname;
151 } stats;
152 bool not_ev_qualifier;
153 bool live;
154 bool full_time;
155 bool sched;
156 bool multiple_threads;
157 bool summary;
158 bool summary_only;
159 bool show_comm;
160 bool show_tool_stats;
161 bool trace_syscalls;
162 bool kernel_syscallchains;
163 bool force;
164 bool vfs_getname;
165 int trace_pgfaults;
166 int open_id;
167 };
168
169 struct tp_field {
170 int offset;
171 union {
172 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
173 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
174 };
175 };
176
177 #define TP_UINT_FIELD(bits) \
178 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
179 { \
180 u##bits value; \
181 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
182 return value; \
183 }
184
185 TP_UINT_FIELD(8);
186 TP_UINT_FIELD(16);
187 TP_UINT_FIELD(32);
188 TP_UINT_FIELD(64);
189
190 #define TP_UINT_FIELD__SWAPPED(bits) \
191 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
192 { \
193 u##bits value; \
194 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
195 return bswap_##bits(value);\
196 }
197
198 TP_UINT_FIELD__SWAPPED(16);
199 TP_UINT_FIELD__SWAPPED(32);
200 TP_UINT_FIELD__SWAPPED(64);
201
202 static int tp_field__init_uint(struct tp_field *field,
203 struct format_field *format_field,
204 bool needs_swap)
205 {
206 field->offset = format_field->offset;
207
208 switch (format_field->size) {
209 case 1:
210 field->integer = tp_field__u8;
211 break;
212 case 2:
213 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
214 break;
215 case 4:
216 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
217 break;
218 case 8:
219 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
220 break;
221 default:
222 return -1;
223 }
224
225 return 0;
226 }
227
228 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
229 {
230 return sample->raw_data + field->offset;
231 }
232
233 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
234 {
235 field->offset = format_field->offset;
236 field->pointer = tp_field__ptr;
237 return 0;
238 }
239
240 struct syscall_tp {
241 struct tp_field id;
242 union {
243 struct tp_field args, ret;
244 };
245 };
246
247 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
248 struct tp_field *field,
249 const char *name)
250 {
251 struct format_field *format_field = perf_evsel__field(evsel, name);
252
253 if (format_field == NULL)
254 return -1;
255
256 return tp_field__init_uint(field, format_field, evsel->needs_swap);
257 }
258
259 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
260 ({ struct syscall_tp *sc = evsel->priv;\
261 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
262
263 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
264 struct tp_field *field,
265 const char *name)
266 {
267 struct format_field *format_field = perf_evsel__field(evsel, name);
268
269 if (format_field == NULL)
270 return -1;
271
272 return tp_field__init_ptr(field, format_field);
273 }
274
275 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
276 ({ struct syscall_tp *sc = evsel->priv;\
277 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
278
279 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
280 {
281 zfree(&evsel->priv);
282 perf_evsel__delete(evsel);
283 }
284
285 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
286 {
287 evsel->priv = malloc(sizeof(struct syscall_tp));
288 if (evsel->priv != NULL) {
289 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
290 goto out_delete;
291
292 evsel->handler = handler;
293 return 0;
294 }
295
296 return -ENOMEM;
297
298 out_delete:
299 zfree(&evsel->priv);
300 return -ENOENT;
301 }
302
303 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
304 {
305 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
306
307 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
308 if (IS_ERR(evsel))
309 evsel = perf_evsel__newtp("syscalls", direction);
310
311 if (IS_ERR(evsel))
312 return NULL;
313
314 if (perf_evsel__init_syscall_tp(evsel, handler))
315 goto out_delete;
316
317 return evsel;
318
319 out_delete:
320 perf_evsel__delete_priv(evsel);
321 return NULL;
322 }
323
324 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
325 ({ struct syscall_tp *fields = evsel->priv; \
326 fields->name.integer(&fields->name, sample); })
327
328 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
329 ({ struct syscall_tp *fields = evsel->priv; \
330 fields->name.pointer(&fields->name, sample); })
331
332 struct syscall_arg {
333 unsigned long val;
334 struct thread *thread;
335 struct trace *trace;
336 void *parm;
337 u8 idx;
338 u8 mask;
339 };
340
341 struct strarray {
342 int offset;
343 int nr_entries;
344 const char **entries;
345 };
346
347 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
348 .nr_entries = ARRAY_SIZE(array), \
349 .entries = array, \
350 }
351
352 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
353 .offset = off, \
354 .nr_entries = ARRAY_SIZE(array), \
355 .entries = array, \
356 }
357
358 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
359 const char *intfmt,
360 struct syscall_arg *arg)
361 {
362 struct strarray *sa = arg->parm;
363 int idx = arg->val - sa->offset;
364
365 if (idx < 0 || idx >= sa->nr_entries)
366 return scnprintf(bf, size, intfmt, arg->val);
367
368 return scnprintf(bf, size, "%s", sa->entries[idx]);
369 }
370
371 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
372 struct syscall_arg *arg)
373 {
374 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
375 }
376
377 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
378
379 #if defined(__i386__) || defined(__x86_64__)
380 /*
381 * FIXME: Make this available to all arches as soon as the ioctl beautifier
382 * gets rewritten to support all arches.
383 */
384 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
385 struct syscall_arg *arg)
386 {
387 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
388 }
389
390 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
391 #endif /* defined(__i386__) || defined(__x86_64__) */
392
393 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
394 struct syscall_arg *arg);
395
396 #define SCA_FD syscall_arg__scnprintf_fd
397
398 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
399 struct syscall_arg *arg)
400 {
401 int fd = arg->val;
402
403 if (fd == AT_FDCWD)
404 return scnprintf(bf, size, "CWD");
405
406 return syscall_arg__scnprintf_fd(bf, size, arg);
407 }
408
409 #define SCA_FDAT syscall_arg__scnprintf_fd_at
410
411 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
412 struct syscall_arg *arg);
413
414 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
415
416 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
417 struct syscall_arg *arg)
418 {
419 return scnprintf(bf, size, "%#lx", arg->val);
420 }
421
422 #define SCA_HEX syscall_arg__scnprintf_hex
423
424 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
425 struct syscall_arg *arg)
426 {
427 return scnprintf(bf, size, "%d", arg->val);
428 }
429
430 #define SCA_INT syscall_arg__scnprintf_int
431
432 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
433 struct syscall_arg *arg)
434 {
435 int printed = 0, prot = arg->val;
436
437 if (prot == PROT_NONE)
438 return scnprintf(bf, size, "NONE");
439 #define P_MMAP_PROT(n) \
440 if (prot & PROT_##n) { \
441 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
442 prot &= ~PROT_##n; \
443 }
444
445 P_MMAP_PROT(EXEC);
446 P_MMAP_PROT(READ);
447 P_MMAP_PROT(WRITE);
448 #ifdef PROT_SEM
449 P_MMAP_PROT(SEM);
450 #endif
451 P_MMAP_PROT(GROWSDOWN);
452 P_MMAP_PROT(GROWSUP);
453 #undef P_MMAP_PROT
454
455 if (prot)
456 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
457
458 return printed;
459 }
460
461 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
462
463 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
464 struct syscall_arg *arg)
465 {
466 int printed = 0, flags = arg->val;
467
468 #define P_MMAP_FLAG(n) \
469 if (flags & MAP_##n) { \
470 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
471 flags &= ~MAP_##n; \
472 }
473
474 P_MMAP_FLAG(SHARED);
475 P_MMAP_FLAG(PRIVATE);
476 #ifdef MAP_32BIT
477 P_MMAP_FLAG(32BIT);
478 #endif
479 P_MMAP_FLAG(ANONYMOUS);
480 P_MMAP_FLAG(DENYWRITE);
481 P_MMAP_FLAG(EXECUTABLE);
482 P_MMAP_FLAG(FILE);
483 P_MMAP_FLAG(FIXED);
484 P_MMAP_FLAG(GROWSDOWN);
485 #ifdef MAP_HUGETLB
486 P_MMAP_FLAG(HUGETLB);
487 #endif
488 P_MMAP_FLAG(LOCKED);
489 P_MMAP_FLAG(NONBLOCK);
490 P_MMAP_FLAG(NORESERVE);
491 P_MMAP_FLAG(POPULATE);
492 P_MMAP_FLAG(STACK);
493 #ifdef MAP_UNINITIALIZED
494 P_MMAP_FLAG(UNINITIALIZED);
495 #endif
496 #undef P_MMAP_FLAG
497
498 if (flags)
499 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
500
501 return printed;
502 }
503
504 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
505
506 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
507 struct syscall_arg *arg)
508 {
509 int printed = 0, flags = arg->val;
510
511 #define P_MREMAP_FLAG(n) \
512 if (flags & MREMAP_##n) { \
513 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
514 flags &= ~MREMAP_##n; \
515 }
516
517 P_MREMAP_FLAG(MAYMOVE);
518 #ifdef MREMAP_FIXED
519 P_MREMAP_FLAG(FIXED);
520 #endif
521 #undef P_MREMAP_FLAG
522
523 if (flags)
524 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
525
526 return printed;
527 }
528
529 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
530
531 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
532 struct syscall_arg *arg)
533 {
534 int behavior = arg->val;
535
536 switch (behavior) {
537 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
538 P_MADV_BHV(NORMAL);
539 P_MADV_BHV(RANDOM);
540 P_MADV_BHV(SEQUENTIAL);
541 P_MADV_BHV(WILLNEED);
542 P_MADV_BHV(DONTNEED);
543 P_MADV_BHV(REMOVE);
544 P_MADV_BHV(DONTFORK);
545 P_MADV_BHV(DOFORK);
546 P_MADV_BHV(HWPOISON);
547 #ifdef MADV_SOFT_OFFLINE
548 P_MADV_BHV(SOFT_OFFLINE);
549 #endif
550 P_MADV_BHV(MERGEABLE);
551 P_MADV_BHV(UNMERGEABLE);
552 #ifdef MADV_HUGEPAGE
553 P_MADV_BHV(HUGEPAGE);
554 #endif
555 #ifdef MADV_NOHUGEPAGE
556 P_MADV_BHV(NOHUGEPAGE);
557 #endif
558 #ifdef MADV_DONTDUMP
559 P_MADV_BHV(DONTDUMP);
560 #endif
561 #ifdef MADV_DODUMP
562 P_MADV_BHV(DODUMP);
563 #endif
564 #undef P_MADV_PHV
565 default: break;
566 }
567
568 return scnprintf(bf, size, "%#x", behavior);
569 }
570
571 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
572
573 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
574 struct syscall_arg *arg)
575 {
576 int printed = 0, op = arg->val;
577
578 if (op == 0)
579 return scnprintf(bf, size, "NONE");
580 #define P_CMD(cmd) \
581 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
582 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
583 op &= ~LOCK_##cmd; \
584 }
585
586 P_CMD(SH);
587 P_CMD(EX);
588 P_CMD(NB);
589 P_CMD(UN);
590 P_CMD(MAND);
591 P_CMD(RW);
592 P_CMD(READ);
593 P_CMD(WRITE);
594 #undef P_OP
595
596 if (op)
597 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
598
599 return printed;
600 }
601
602 #define SCA_FLOCK syscall_arg__scnprintf_flock
603
604 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
605 {
606 enum syscall_futex_args {
607 SCF_UADDR = (1 << 0),
608 SCF_OP = (1 << 1),
609 SCF_VAL = (1 << 2),
610 SCF_TIMEOUT = (1 << 3),
611 SCF_UADDR2 = (1 << 4),
612 SCF_VAL3 = (1 << 5),
613 };
614 int op = arg->val;
615 int cmd = op & FUTEX_CMD_MASK;
616 size_t printed = 0;
617
618 switch (cmd) {
619 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
620 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
621 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
622 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
623 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
624 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
625 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
626 P_FUTEX_OP(WAKE_OP); break;
627 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
628 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
629 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
630 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
631 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
632 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
633 default: printed = scnprintf(bf, size, "%#x", cmd); break;
634 }
635
636 if (op & FUTEX_PRIVATE_FLAG)
637 printed += scnprintf(bf + printed, size - printed, "|PRIV");
638
639 if (op & FUTEX_CLOCK_REALTIME)
640 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
641
642 return printed;
643 }
644
645 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
646
647 static const char *bpf_cmd[] = {
648 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
649 "MAP_GET_NEXT_KEY", "PROG_LOAD",
650 };
651 static DEFINE_STRARRAY(bpf_cmd);
652
653 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
654 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
655
656 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
657 static DEFINE_STRARRAY(itimers);
658
659 static const char *keyctl_options[] = {
660 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
661 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
662 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
663 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
664 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
665 };
666 static DEFINE_STRARRAY(keyctl_options);
667
668 static const char *whences[] = { "SET", "CUR", "END",
669 #ifdef SEEK_DATA
670 "DATA",
671 #endif
672 #ifdef SEEK_HOLE
673 "HOLE",
674 #endif
675 };
676 static DEFINE_STRARRAY(whences);
677
678 static const char *fcntl_cmds[] = {
679 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
680 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
681 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
682 "F_GETOWNER_UIDS",
683 };
684 static DEFINE_STRARRAY(fcntl_cmds);
685
686 static const char *rlimit_resources[] = {
687 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
688 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
689 "RTTIME",
690 };
691 static DEFINE_STRARRAY(rlimit_resources);
692
693 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
694 static DEFINE_STRARRAY(sighow);
695
696 static const char *clockid[] = {
697 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
698 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
699 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
700 };
701 static DEFINE_STRARRAY(clockid);
702
703 static const char *socket_families[] = {
704 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
705 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
706 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
707 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
708 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
709 "ALG", "NFC", "VSOCK",
710 };
711 static DEFINE_STRARRAY(socket_families);
712
713 #ifndef SOCK_TYPE_MASK
714 #define SOCK_TYPE_MASK 0xf
715 #endif
716
717 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
718 struct syscall_arg *arg)
719 {
720 size_t printed;
721 int type = arg->val,
722 flags = type & ~SOCK_TYPE_MASK;
723
724 type &= SOCK_TYPE_MASK;
725 /*
726 * Can't use a strarray, MIPS may override for ABI reasons.
727 */
728 switch (type) {
729 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
730 P_SK_TYPE(STREAM);
731 P_SK_TYPE(DGRAM);
732 P_SK_TYPE(RAW);
733 P_SK_TYPE(RDM);
734 P_SK_TYPE(SEQPACKET);
735 P_SK_TYPE(DCCP);
736 P_SK_TYPE(PACKET);
737 #undef P_SK_TYPE
738 default:
739 printed = scnprintf(bf, size, "%#x", type);
740 }
741
742 #define P_SK_FLAG(n) \
743 if (flags & SOCK_##n) { \
744 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
745 flags &= ~SOCK_##n; \
746 }
747
748 P_SK_FLAG(CLOEXEC);
749 P_SK_FLAG(NONBLOCK);
750 #undef P_SK_FLAG
751
752 if (flags)
753 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
754
755 return printed;
756 }
757
758 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
759
760 #ifndef MSG_PROBE
761 #define MSG_PROBE 0x10
762 #endif
763 #ifndef MSG_WAITFORONE
764 #define MSG_WAITFORONE 0x10000
765 #endif
766 #ifndef MSG_SENDPAGE_NOTLAST
767 #define MSG_SENDPAGE_NOTLAST 0x20000
768 #endif
769 #ifndef MSG_FASTOPEN
770 #define MSG_FASTOPEN 0x20000000
771 #endif
772
773 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
774 struct syscall_arg *arg)
775 {
776 int printed = 0, flags = arg->val;
777
778 if (flags == 0)
779 return scnprintf(bf, size, "NONE");
780 #define P_MSG_FLAG(n) \
781 if (flags & MSG_##n) { \
782 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
783 flags &= ~MSG_##n; \
784 }
785
786 P_MSG_FLAG(OOB);
787 P_MSG_FLAG(PEEK);
788 P_MSG_FLAG(DONTROUTE);
789 P_MSG_FLAG(TRYHARD);
790 P_MSG_FLAG(CTRUNC);
791 P_MSG_FLAG(PROBE);
792 P_MSG_FLAG(TRUNC);
793 P_MSG_FLAG(DONTWAIT);
794 P_MSG_FLAG(EOR);
795 P_MSG_FLAG(WAITALL);
796 P_MSG_FLAG(FIN);
797 P_MSG_FLAG(SYN);
798 P_MSG_FLAG(CONFIRM);
799 P_MSG_FLAG(RST);
800 P_MSG_FLAG(ERRQUEUE);
801 P_MSG_FLAG(NOSIGNAL);
802 P_MSG_FLAG(MORE);
803 P_MSG_FLAG(WAITFORONE);
804 P_MSG_FLAG(SENDPAGE_NOTLAST);
805 P_MSG_FLAG(FASTOPEN);
806 P_MSG_FLAG(CMSG_CLOEXEC);
807 #undef P_MSG_FLAG
808
809 if (flags)
810 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
811
812 return printed;
813 }
814
815 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
816
817 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
818 struct syscall_arg *arg)
819 {
820 size_t printed = 0;
821 int mode = arg->val;
822
823 if (mode == F_OK) /* 0 */
824 return scnprintf(bf, size, "F");
825 #define P_MODE(n) \
826 if (mode & n##_OK) { \
827 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
828 mode &= ~n##_OK; \
829 }
830
831 P_MODE(R);
832 P_MODE(W);
833 P_MODE(X);
834 #undef P_MODE
835
836 if (mode)
837 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
838
839 return printed;
840 }
841
842 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
843
844 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
845 struct syscall_arg *arg);
846
847 #define SCA_FILENAME syscall_arg__scnprintf_filename
848
849 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
850 struct syscall_arg *arg)
851 {
852 int printed = 0, flags = arg->val;
853
854 if (!(flags & O_CREAT))
855 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
856
857 if (flags == 0)
858 return scnprintf(bf, size, "RDONLY");
859 #define P_FLAG(n) \
860 if (flags & O_##n) { \
861 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
862 flags &= ~O_##n; \
863 }
864
865 P_FLAG(APPEND);
866 P_FLAG(ASYNC);
867 P_FLAG(CLOEXEC);
868 P_FLAG(CREAT);
869 P_FLAG(DIRECT);
870 P_FLAG(DIRECTORY);
871 P_FLAG(EXCL);
872 P_FLAG(LARGEFILE);
873 P_FLAG(NOATIME);
874 P_FLAG(NOCTTY);
875 #ifdef O_NONBLOCK
876 P_FLAG(NONBLOCK);
877 #elif O_NDELAY
878 P_FLAG(NDELAY);
879 #endif
880 #ifdef O_PATH
881 P_FLAG(PATH);
882 #endif
883 P_FLAG(RDWR);
884 #ifdef O_DSYNC
885 if ((flags & O_SYNC) == O_SYNC)
886 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
887 else {
888 P_FLAG(DSYNC);
889 }
890 #else
891 P_FLAG(SYNC);
892 #endif
893 P_FLAG(TRUNC);
894 P_FLAG(WRONLY);
895 #undef P_FLAG
896
897 if (flags)
898 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
899
900 return printed;
901 }
902
903 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
904
905 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
906 struct syscall_arg *arg)
907 {
908 int printed = 0, flags = arg->val;
909
910 if (flags == 0)
911 return 0;
912
913 #define P_FLAG(n) \
914 if (flags & PERF_FLAG_##n) { \
915 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
916 flags &= ~PERF_FLAG_##n; \
917 }
918
919 P_FLAG(FD_NO_GROUP);
920 P_FLAG(FD_OUTPUT);
921 P_FLAG(PID_CGROUP);
922 P_FLAG(FD_CLOEXEC);
923 #undef P_FLAG
924
925 if (flags)
926 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
927
928 return printed;
929 }
930
931 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
932
933 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
934 struct syscall_arg *arg)
935 {
936 int printed = 0, flags = arg->val;
937
938 if (flags == 0)
939 return scnprintf(bf, size, "NONE");
940 #define P_FLAG(n) \
941 if (flags & EFD_##n) { \
942 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
943 flags &= ~EFD_##n; \
944 }
945
946 P_FLAG(SEMAPHORE);
947 P_FLAG(CLOEXEC);
948 P_FLAG(NONBLOCK);
949 #undef P_FLAG
950
951 if (flags)
952 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
953
954 return printed;
955 }
956
957 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
958
959 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
960 struct syscall_arg *arg)
961 {
962 int printed = 0, flags = arg->val;
963
964 #define P_FLAG(n) \
965 if (flags & O_##n) { \
966 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
967 flags &= ~O_##n; \
968 }
969
970 P_FLAG(CLOEXEC);
971 P_FLAG(NONBLOCK);
972 #undef P_FLAG
973
974 if (flags)
975 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
976
977 return printed;
978 }
979
980 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
981
982 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
983 {
984 int sig = arg->val;
985
986 switch (sig) {
987 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
988 P_SIGNUM(HUP);
989 P_SIGNUM(INT);
990 P_SIGNUM(QUIT);
991 P_SIGNUM(ILL);
992 P_SIGNUM(TRAP);
993 P_SIGNUM(ABRT);
994 P_SIGNUM(BUS);
995 P_SIGNUM(FPE);
996 P_SIGNUM(KILL);
997 P_SIGNUM(USR1);
998 P_SIGNUM(SEGV);
999 P_SIGNUM(USR2);
1000 P_SIGNUM(PIPE);
1001 P_SIGNUM(ALRM);
1002 P_SIGNUM(TERM);
1003 P_SIGNUM(CHLD);
1004 P_SIGNUM(CONT);
1005 P_SIGNUM(STOP);
1006 P_SIGNUM(TSTP);
1007 P_SIGNUM(TTIN);
1008 P_SIGNUM(TTOU);
1009 P_SIGNUM(URG);
1010 P_SIGNUM(XCPU);
1011 P_SIGNUM(XFSZ);
1012 P_SIGNUM(VTALRM);
1013 P_SIGNUM(PROF);
1014 P_SIGNUM(WINCH);
1015 P_SIGNUM(IO);
1016 P_SIGNUM(PWR);
1017 P_SIGNUM(SYS);
1018 #ifdef SIGEMT
1019 P_SIGNUM(EMT);
1020 #endif
1021 #ifdef SIGSTKFLT
1022 P_SIGNUM(STKFLT);
1023 #endif
1024 #ifdef SIGSWI
1025 P_SIGNUM(SWI);
1026 #endif
1027 default: break;
1028 }
1029
1030 return scnprintf(bf, size, "%#x", sig);
1031 }
1032
1033 #define SCA_SIGNUM syscall_arg__scnprintf_signum
1034
1035 #if defined(__i386__) || defined(__x86_64__)
1036 /*
1037 * FIXME: Make this available to all arches.
1038 */
1039 #define TCGETS 0x5401
1040
1041 static const char *tioctls[] = {
1042 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
1043 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
1044 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
1045 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
1046 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
1047 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
1048 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
1049 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
1050 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
1051 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
1052 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
1053 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
1054 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
1055 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
1056 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
1057 };
1058
1059 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1060 #endif /* defined(__i386__) || defined(__x86_64__) */
1061
1062 static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
1063 {
1064 int op = arg->val;
1065 size_t printed = 0;
1066
1067 switch (op) {
1068 #define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
1069 P_SECCOMP_SET_MODE_OP(STRICT);
1070 P_SECCOMP_SET_MODE_OP(FILTER);
1071 #undef P_SECCOMP_SET_MODE_OP
1072 default: printed = scnprintf(bf, size, "%#x", op); break;
1073 }
1074
1075 return printed;
1076 }
1077
1078 #define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op
1079
1080 static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
1081 struct syscall_arg *arg)
1082 {
1083 int printed = 0, flags = arg->val;
1084
1085 #define P_FLAG(n) \
1086 if (flags & SECCOMP_FILTER_FLAG_##n) { \
1087 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1088 flags &= ~SECCOMP_FILTER_FLAG_##n; \
1089 }
1090
1091 P_FLAG(TSYNC);
1092 #undef P_FLAG
1093
1094 if (flags)
1095 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
1096
1097 return printed;
1098 }
1099
1100 #define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
1101
1102 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
1103 struct syscall_arg *arg)
1104 {
1105 int printed = 0, flags = arg->val;
1106
1107 #define P_FLAG(n) \
1108 if (flags & GRND_##n) { \
1109 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1110 flags &= ~GRND_##n; \
1111 }
1112
1113 P_FLAG(RANDOM);
1114 P_FLAG(NONBLOCK);
1115 #undef P_FLAG
1116
1117 if (flags)
1118 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
1119
1120 return printed;
1121 }
1122
1123 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
1124
1125 #define STRARRAY(arg, name, array) \
1126 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1127 .arg_parm = { [arg] = &strarray__##array, }
1128
1129 #include "trace/beauty/pid.c"
1130 #include "trace/beauty/mode_t.c"
1131 #include "trace/beauty/sched_policy.c"
1132 #include "trace/beauty/waitid_options.c"
1133
1134 static struct syscall_fmt {
1135 const char *name;
1136 const char *alias;
1137 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1138 void *arg_parm[6];
1139 bool errmsg;
1140 bool errpid;
1141 bool timeout;
1142 bool hexret;
1143 } syscall_fmts[] = {
1144 { .name = "access", .errmsg = true,
1145 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1146 [1] = SCA_ACCMODE, /* mode */ }, },
1147 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
1148 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1149 { .name = "brk", .hexret = true,
1150 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1151 { .name = "chdir", .errmsg = true,
1152 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1153 { .name = "chmod", .errmsg = true,
1154 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1155 { .name = "chroot", .errmsg = true,
1156 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1157 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
1158 { .name = "clone", .errpid = true, },
1159 { .name = "close", .errmsg = true,
1160 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1161 { .name = "connect", .errmsg = true, },
1162 { .name = "creat", .errmsg = true,
1163 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1164 { .name = "dup", .errmsg = true,
1165 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1166 { .name = "dup2", .errmsg = true,
1167 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1168 { .name = "dup3", .errmsg = true,
1169 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1170 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1171 { .name = "eventfd2", .errmsg = true,
1172 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1173 { .name = "faccessat", .errmsg = true,
1174 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1175 [1] = SCA_FILENAME, /* filename */ }, },
1176 { .name = "fadvise64", .errmsg = true,
1177 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1178 { .name = "fallocate", .errmsg = true,
1179 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1180 { .name = "fchdir", .errmsg = true,
1181 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1182 { .name = "fchmod", .errmsg = true,
1183 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1184 { .name = "fchmodat", .errmsg = true,
1185 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1186 [1] = SCA_FILENAME, /* filename */ }, },
1187 { .name = "fchown", .errmsg = true,
1188 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1189 { .name = "fchownat", .errmsg = true,
1190 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1191 [1] = SCA_FILENAME, /* filename */ }, },
1192 { .name = "fcntl", .errmsg = true,
1193 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1194 [1] = SCA_STRARRAY, /* cmd */ },
1195 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1196 { .name = "fdatasync", .errmsg = true,
1197 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1198 { .name = "flock", .errmsg = true,
1199 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1200 [1] = SCA_FLOCK, /* cmd */ }, },
1201 { .name = "fsetxattr", .errmsg = true,
1202 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1203 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1204 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1205 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1206 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1207 [1] = SCA_FILENAME, /* filename */ }, },
1208 { .name = "fstatfs", .errmsg = true,
1209 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1210 { .name = "fsync", .errmsg = true,
1211 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1212 { .name = "ftruncate", .errmsg = true,
1213 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1214 { .name = "futex", .errmsg = true,
1215 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1216 { .name = "futimesat", .errmsg = true,
1217 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1218 [1] = SCA_FILENAME, /* filename */ }, },
1219 { .name = "getdents", .errmsg = true,
1220 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1221 { .name = "getdents64", .errmsg = true,
1222 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1223 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1224 { .name = "getpid", .errpid = true, },
1225 { .name = "getpgid", .errpid = true, },
1226 { .name = "getppid", .errpid = true, },
1227 { .name = "getrandom", .errmsg = true,
1228 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
1229 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1230 { .name = "getxattr", .errmsg = true,
1231 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1232 { .name = "inotify_add_watch", .errmsg = true,
1233 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1234 { .name = "ioctl", .errmsg = true,
1235 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1236 #if defined(__i386__) || defined(__x86_64__)
1237 /*
1238 * FIXME: Make this available to all arches.
1239 */
1240 [1] = SCA_STRHEXARRAY, /* cmd */
1241 [2] = SCA_HEX, /* arg */ },
1242 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1243 #else
1244 [2] = SCA_HEX, /* arg */ }, },
1245 #endif
1246 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
1247 { .name = "kill", .errmsg = true,
1248 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1249 { .name = "lchown", .errmsg = true,
1250 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1251 { .name = "lgetxattr", .errmsg = true,
1252 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1253 { .name = "linkat", .errmsg = true,
1254 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1255 { .name = "listxattr", .errmsg = true,
1256 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1257 { .name = "llistxattr", .errmsg = true,
1258 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1259 { .name = "lremovexattr", .errmsg = true,
1260 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1261 { .name = "lseek", .errmsg = true,
1262 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1263 [2] = SCA_STRARRAY, /* whence */ },
1264 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1265 { .name = "lsetxattr", .errmsg = true,
1266 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1267 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1268 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1269 { .name = "lsxattr", .errmsg = true,
1270 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1271 { .name = "madvise", .errmsg = true,
1272 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1273 [2] = SCA_MADV_BHV, /* behavior */ }, },
1274 { .name = "mkdir", .errmsg = true,
1275 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1276 { .name = "mkdirat", .errmsg = true,
1277 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1278 [1] = SCA_FILENAME, /* pathname */ }, },
1279 { .name = "mknod", .errmsg = true,
1280 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1281 { .name = "mknodat", .errmsg = true,
1282 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1283 [1] = SCA_FILENAME, /* filename */ }, },
1284 { .name = "mlock", .errmsg = true,
1285 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1286 { .name = "mlockall", .errmsg = true,
1287 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1288 { .name = "mmap", .hexret = true,
1289 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1290 [2] = SCA_MMAP_PROT, /* prot */
1291 [3] = SCA_MMAP_FLAGS, /* flags */
1292 [4] = SCA_FD, /* fd */ }, },
1293 { .name = "mprotect", .errmsg = true,
1294 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1295 [2] = SCA_MMAP_PROT, /* prot */ }, },
1296 { .name = "mq_unlink", .errmsg = true,
1297 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1298 { .name = "mremap", .hexret = true,
1299 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1300 [3] = SCA_MREMAP_FLAGS, /* flags */
1301 [4] = SCA_HEX, /* new_addr */ }, },
1302 { .name = "munlock", .errmsg = true,
1303 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1304 { .name = "munmap", .errmsg = true,
1305 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1306 { .name = "name_to_handle_at", .errmsg = true,
1307 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1308 { .name = "newfstatat", .errmsg = true,
1309 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1310 [1] = SCA_FILENAME, /* filename */ }, },
1311 { .name = "open", .errmsg = true,
1312 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1313 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1314 { .name = "open_by_handle_at", .errmsg = true,
1315 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1316 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1317 { .name = "openat", .errmsg = true,
1318 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1319 [1] = SCA_FILENAME, /* filename */
1320 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1321 { .name = "perf_event_open", .errmsg = true,
1322 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1323 [2] = SCA_INT, /* cpu */
1324 [3] = SCA_FD, /* group_fd */
1325 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1326 { .name = "pipe2", .errmsg = true,
1327 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1328 { .name = "poll", .errmsg = true, .timeout = true, },
1329 { .name = "ppoll", .errmsg = true, .timeout = true, },
1330 { .name = "pread", .errmsg = true, .alias = "pread64",
1331 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1332 { .name = "preadv", .errmsg = true, .alias = "pread",
1333 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1334 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1335 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1336 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1337 { .name = "pwritev", .errmsg = true,
1338 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1339 { .name = "read", .errmsg = true,
1340 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1341 { .name = "readlink", .errmsg = true,
1342 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1343 { .name = "readlinkat", .errmsg = true,
1344 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1345 [1] = SCA_FILENAME, /* pathname */ }, },
1346 { .name = "readv", .errmsg = true,
1347 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1348 { .name = "recvfrom", .errmsg = true,
1349 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1350 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1351 { .name = "recvmmsg", .errmsg = true,
1352 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1353 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1354 { .name = "recvmsg", .errmsg = true,
1355 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1356 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1357 { .name = "removexattr", .errmsg = true,
1358 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1359 { .name = "renameat", .errmsg = true,
1360 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1361 { .name = "rmdir", .errmsg = true,
1362 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1363 { .name = "rt_sigaction", .errmsg = true,
1364 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1365 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1366 { .name = "rt_sigqueueinfo", .errmsg = true,
1367 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1368 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1369 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1370 { .name = "sched_setscheduler", .errmsg = true,
1371 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
1372 { .name = "seccomp", .errmsg = true,
1373 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
1374 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
1375 { .name = "select", .errmsg = true, .timeout = true, },
1376 { .name = "sendmmsg", .errmsg = true,
1377 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1378 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1379 { .name = "sendmsg", .errmsg = true,
1380 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1381 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1382 { .name = "sendto", .errmsg = true,
1383 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1384 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1385 { .name = "set_tid_address", .errpid = true, },
1386 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1387 { .name = "setpgid", .errmsg = true, },
1388 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1389 { .name = "setxattr", .errmsg = true,
1390 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1391 { .name = "shutdown", .errmsg = true,
1392 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1393 { .name = "socket", .errmsg = true,
1394 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1395 [1] = SCA_SK_TYPE, /* type */ },
1396 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1397 { .name = "socketpair", .errmsg = true,
1398 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1399 [1] = SCA_SK_TYPE, /* type */ },
1400 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1401 { .name = "stat", .errmsg = true, .alias = "newstat",
1402 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1403 { .name = "statfs", .errmsg = true,
1404 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1405 { .name = "swapoff", .errmsg = true,
1406 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1407 { .name = "swapon", .errmsg = true,
1408 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1409 { .name = "symlinkat", .errmsg = true,
1410 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1411 { .name = "tgkill", .errmsg = true,
1412 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1413 { .name = "tkill", .errmsg = true,
1414 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1415 { .name = "truncate", .errmsg = true,
1416 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1417 { .name = "uname", .errmsg = true, .alias = "newuname", },
1418 { .name = "unlinkat", .errmsg = true,
1419 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1420 [1] = SCA_FILENAME, /* pathname */ }, },
1421 { .name = "utime", .errmsg = true,
1422 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1423 { .name = "utimensat", .errmsg = true,
1424 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1425 [1] = SCA_FILENAME, /* filename */ }, },
1426 { .name = "utimes", .errmsg = true,
1427 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1428 { .name = "vmsplice", .errmsg = true,
1429 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1430 { .name = "wait4", .errpid = true,
1431 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
1432 { .name = "waitid", .errpid = true,
1433 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
1434 { .name = "write", .errmsg = true,
1435 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1436 { .name = "writev", .errmsg = true,
1437 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1438 };
1439
1440 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1441 {
1442 const struct syscall_fmt *fmt = fmtp;
1443 return strcmp(name, fmt->name);
1444 }
1445
1446 static struct syscall_fmt *syscall_fmt__find(const char *name)
1447 {
1448 const int nmemb = ARRAY_SIZE(syscall_fmts);
1449 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1450 }
1451
1452 struct syscall {
1453 struct event_format *tp_format;
1454 int nr_args;
1455 struct format_field *args;
1456 const char *name;
1457 bool is_exit;
1458 struct syscall_fmt *fmt;
1459 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1460 void **arg_parm;
1461 };
1462
1463 static size_t fprintf_duration(unsigned long t, FILE *fp)
1464 {
1465 double duration = (double)t / NSEC_PER_MSEC;
1466 size_t printed = fprintf(fp, "(");
1467
1468 if (duration >= 1.0)
1469 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1470 else if (duration >= 0.01)
1471 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1472 else
1473 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1474 return printed + fprintf(fp, "): ");
1475 }
1476
1477 /**
1478 * filename.ptr: The filename char pointer that will be vfs_getname'd
1479 * filename.entry_str_pos: Where to insert the string translated from
1480 * filename.ptr by the vfs_getname tracepoint/kprobe.
1481 */
1482 struct thread_trace {
1483 u64 entry_time;
1484 u64 exit_time;
1485 bool entry_pending;
1486 unsigned long nr_events;
1487 unsigned long pfmaj, pfmin;
1488 char *entry_str;
1489 double runtime_ms;
1490 struct {
1491 unsigned long ptr;
1492 short int entry_str_pos;
1493 bool pending_open;
1494 unsigned int namelen;
1495 char *name;
1496 } filename;
1497 struct {
1498 int max;
1499 char **table;
1500 } paths;
1501
1502 struct intlist *syscall_stats;
1503 };
1504
1505 static struct thread_trace *thread_trace__new(void)
1506 {
1507 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1508
1509 if (ttrace)
1510 ttrace->paths.max = -1;
1511
1512 ttrace->syscall_stats = intlist__new(NULL);
1513
1514 return ttrace;
1515 }
1516
1517 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1518 {
1519 struct thread_trace *ttrace;
1520
1521 if (thread == NULL)
1522 goto fail;
1523
1524 if (thread__priv(thread) == NULL)
1525 thread__set_priv(thread, thread_trace__new());
1526
1527 if (thread__priv(thread) == NULL)
1528 goto fail;
1529
1530 ttrace = thread__priv(thread);
1531 ++ttrace->nr_events;
1532
1533 return ttrace;
1534 fail:
1535 color_fprintf(fp, PERF_COLOR_RED,
1536 "WARNING: not enough memory, dropping samples!\n");
1537 return NULL;
1538 }
1539
1540 #define TRACE_PFMAJ (1 << 0)
1541 #define TRACE_PFMIN (1 << 1)
1542
1543 static const size_t trace__entry_str_size = 2048;
1544
1545 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1546 {
1547 struct thread_trace *ttrace = thread__priv(thread);
1548
1549 if (fd > ttrace->paths.max) {
1550 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1551
1552 if (npath == NULL)
1553 return -1;
1554
1555 if (ttrace->paths.max != -1) {
1556 memset(npath + ttrace->paths.max + 1, 0,
1557 (fd - ttrace->paths.max) * sizeof(char *));
1558 } else {
1559 memset(npath, 0, (fd + 1) * sizeof(char *));
1560 }
1561
1562 ttrace->paths.table = npath;
1563 ttrace->paths.max = fd;
1564 }
1565
1566 ttrace->paths.table[fd] = strdup(pathname);
1567
1568 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1569 }
1570
1571 static int thread__read_fd_path(struct thread *thread, int fd)
1572 {
1573 char linkname[PATH_MAX], pathname[PATH_MAX];
1574 struct stat st;
1575 int ret;
1576
1577 if (thread->pid_ == thread->tid) {
1578 scnprintf(linkname, sizeof(linkname),
1579 "/proc/%d/fd/%d", thread->pid_, fd);
1580 } else {
1581 scnprintf(linkname, sizeof(linkname),
1582 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1583 }
1584
1585 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1586 return -1;
1587
1588 ret = readlink(linkname, pathname, sizeof(pathname));
1589
1590 if (ret < 0 || ret > st.st_size)
1591 return -1;
1592
1593 pathname[ret] = '\0';
1594 return trace__set_fd_pathname(thread, fd, pathname);
1595 }
1596
1597 static const char *thread__fd_path(struct thread *thread, int fd,
1598 struct trace *trace)
1599 {
1600 struct thread_trace *ttrace = thread__priv(thread);
1601
1602 if (ttrace == NULL)
1603 return NULL;
1604
1605 if (fd < 0)
1606 return NULL;
1607
1608 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1609 if (!trace->live)
1610 return NULL;
1611 ++trace->stats.proc_getname;
1612 if (thread__read_fd_path(thread, fd))
1613 return NULL;
1614 }
1615
1616 return ttrace->paths.table[fd];
1617 }
1618
1619 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1620 struct syscall_arg *arg)
1621 {
1622 int fd = arg->val;
1623 size_t printed = scnprintf(bf, size, "%d", fd);
1624 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1625
1626 if (path)
1627 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1628
1629 return printed;
1630 }
1631
1632 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1633 struct syscall_arg *arg)
1634 {
1635 int fd = arg->val;
1636 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1637 struct thread_trace *ttrace = thread__priv(arg->thread);
1638
1639 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1640 zfree(&ttrace->paths.table[fd]);
1641
1642 return printed;
1643 }
1644
1645 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1646 unsigned long ptr)
1647 {
1648 struct thread_trace *ttrace = thread__priv(thread);
1649
1650 ttrace->filename.ptr = ptr;
1651 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1652 }
1653
1654 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1655 struct syscall_arg *arg)
1656 {
1657 unsigned long ptr = arg->val;
1658
1659 if (!arg->trace->vfs_getname)
1660 return scnprintf(bf, size, "%#x", ptr);
1661
1662 thread__set_filename_pos(arg->thread, bf, ptr);
1663 return 0;
1664 }
1665
1666 static bool trace__filter_duration(struct trace *trace, double t)
1667 {
1668 return t < (trace->duration_filter * NSEC_PER_MSEC);
1669 }
1670
1671 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1672 {
1673 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1674
1675 return fprintf(fp, "%10.3f ", ts);
1676 }
1677
1678 static bool done = false;
1679 static bool interrupted = false;
1680
1681 static void sig_handler(int sig)
1682 {
1683 done = true;
1684 interrupted = sig == SIGINT;
1685 }
1686
1687 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1688 u64 duration, u64 tstamp, FILE *fp)
1689 {
1690 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1691 printed += fprintf_duration(duration, fp);
1692
1693 if (trace->multiple_threads) {
1694 if (trace->show_comm)
1695 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1696 printed += fprintf(fp, "%d ", thread->tid);
1697 }
1698
1699 return printed;
1700 }
1701
1702 static int trace__process_event(struct trace *trace, struct machine *machine,
1703 union perf_event *event, struct perf_sample *sample)
1704 {
1705 int ret = 0;
1706
1707 switch (event->header.type) {
1708 case PERF_RECORD_LOST:
1709 color_fprintf(trace->output, PERF_COLOR_RED,
1710 "LOST %" PRIu64 " events!\n", event->lost.lost);
1711 ret = machine__process_lost_event(machine, event, sample);
1712 break;
1713 default:
1714 ret = machine__process_event(machine, event, sample);
1715 break;
1716 }
1717
1718 return ret;
1719 }
1720
1721 static int trace__tool_process(struct perf_tool *tool,
1722 union perf_event *event,
1723 struct perf_sample *sample,
1724 struct machine *machine)
1725 {
1726 struct trace *trace = container_of(tool, struct trace, tool);
1727 return trace__process_event(trace, machine, event, sample);
1728 }
1729
1730 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1731 {
1732 int err = symbol__init(NULL);
1733
1734 if (err)
1735 return err;
1736
1737 trace->host = machine__new_host();
1738 if (trace->host == NULL)
1739 return -ENOMEM;
1740
1741 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1742 return -errno;
1743
1744 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1745 evlist->threads, trace__tool_process, false,
1746 trace->opts.proc_map_timeout);
1747 if (err)
1748 symbol__exit();
1749
1750 return err;
1751 }
1752
1753 static int syscall__set_arg_fmts(struct syscall *sc)
1754 {
1755 struct format_field *field;
1756 int idx = 0;
1757
1758 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1759 if (sc->arg_scnprintf == NULL)
1760 return -1;
1761
1762 if (sc->fmt)
1763 sc->arg_parm = sc->fmt->arg_parm;
1764
1765 for (field = sc->args; field; field = field->next) {
1766 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1767 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1768 else if (field->flags & FIELD_IS_POINTER)
1769 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1770 else if (strcmp(field->type, "pid_t") == 0)
1771 sc->arg_scnprintf[idx] = SCA_PID;
1772 else if (strcmp(field->type, "umode_t") == 0)
1773 sc->arg_scnprintf[idx] = SCA_MODE_T;
1774 ++idx;
1775 }
1776
1777 return 0;
1778 }
1779
1780 static int trace__read_syscall_info(struct trace *trace, int id)
1781 {
1782 char tp_name[128];
1783 struct syscall *sc;
1784 const char *name = syscalltbl__name(trace->sctbl, id);
1785
1786 if (name == NULL)
1787 return -1;
1788
1789 if (id > trace->syscalls.max) {
1790 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1791
1792 if (nsyscalls == NULL)
1793 return -1;
1794
1795 if (trace->syscalls.max != -1) {
1796 memset(nsyscalls + trace->syscalls.max + 1, 0,
1797 (id - trace->syscalls.max) * sizeof(*sc));
1798 } else {
1799 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1800 }
1801
1802 trace->syscalls.table = nsyscalls;
1803 trace->syscalls.max = id;
1804 }
1805
1806 sc = trace->syscalls.table + id;
1807 sc->name = name;
1808
1809 sc->fmt = syscall_fmt__find(sc->name);
1810
1811 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1812 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1813
1814 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1815 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1816 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1817 }
1818
1819 if (IS_ERR(sc->tp_format))
1820 return -1;
1821
1822 sc->args = sc->tp_format->format.fields;
1823 sc->nr_args = sc->tp_format->format.nr_fields;
1824 /*
1825 * We need to check and discard the first variable '__syscall_nr'
1826 * or 'nr' that mean the syscall number. It is needless here.
1827 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1828 */
1829 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1830 sc->args = sc->args->next;
1831 --sc->nr_args;
1832 }
1833
1834 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1835
1836 return syscall__set_arg_fmts(sc);
1837 }
1838
1839 static int trace__validate_ev_qualifier(struct trace *trace)
1840 {
1841 int err = 0, i;
1842 struct str_node *pos;
1843
1844 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1845 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1846 sizeof(trace->ev_qualifier_ids.entries[0]));
1847
1848 if (trace->ev_qualifier_ids.entries == NULL) {
1849 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1850 trace->output);
1851 err = -EINVAL;
1852 goto out;
1853 }
1854
1855 i = 0;
1856
1857 strlist__for_each(pos, trace->ev_qualifier) {
1858 const char *sc = pos->s;
1859 int id = syscalltbl__id(trace->sctbl, sc);
1860
1861 if (id < 0) {
1862 if (err == 0) {
1863 fputs("Error:\tInvalid syscall ", trace->output);
1864 err = -EINVAL;
1865 } else {
1866 fputs(", ", trace->output);
1867 }
1868
1869 fputs(sc, trace->output);
1870 }
1871
1872 trace->ev_qualifier_ids.entries[i++] = id;
1873 }
1874
1875 if (err < 0) {
1876 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1877 "\nHint:\tand: 'man syscalls'\n", trace->output);
1878 zfree(&trace->ev_qualifier_ids.entries);
1879 trace->ev_qualifier_ids.nr = 0;
1880 }
1881 out:
1882 return err;
1883 }
1884
1885 /*
1886 * args is to be interpreted as a series of longs but we need to handle
1887 * 8-byte unaligned accesses. args points to raw_data within the event
1888 * and raw_data is guaranteed to be 8-byte unaligned because it is
1889 * preceded by raw_size which is a u32. So we need to copy args to a temp
1890 * variable to read it. Most notably this avoids extended load instructions
1891 * on unaligned addresses
1892 */
1893
1894 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1895 unsigned char *args, struct trace *trace,
1896 struct thread *thread)
1897 {
1898 size_t printed = 0;
1899 unsigned char *p;
1900 unsigned long val;
1901
1902 if (sc->args != NULL) {
1903 struct format_field *field;
1904 u8 bit = 1;
1905 struct syscall_arg arg = {
1906 .idx = 0,
1907 .mask = 0,
1908 .trace = trace,
1909 .thread = thread,
1910 };
1911
1912 for (field = sc->args; field;
1913 field = field->next, ++arg.idx, bit <<= 1) {
1914 if (arg.mask & bit)
1915 continue;
1916
1917 /* special care for unaligned accesses */
1918 p = args + sizeof(unsigned long) * arg.idx;
1919 memcpy(&val, p, sizeof(val));
1920
1921 /*
1922 * Suppress this argument if its value is zero and
1923 * and we don't have a string associated in an
1924 * strarray for it.
1925 */
1926 if (val == 0 &&
1927 !(sc->arg_scnprintf &&
1928 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1929 sc->arg_parm[arg.idx]))
1930 continue;
1931
1932 printed += scnprintf(bf + printed, size - printed,
1933 "%s%s: ", printed ? ", " : "", field->name);
1934 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1935 arg.val = val;
1936 if (sc->arg_parm)
1937 arg.parm = sc->arg_parm[arg.idx];
1938 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1939 size - printed, &arg);
1940 } else {
1941 printed += scnprintf(bf + printed, size - printed,
1942 "%ld", val);
1943 }
1944 }
1945 } else {
1946 int i = 0;
1947
1948 while (i < 6) {
1949 /* special care for unaligned accesses */
1950 p = args + sizeof(unsigned long) * i;
1951 memcpy(&val, p, sizeof(val));
1952 printed += scnprintf(bf + printed, size - printed,
1953 "%sarg%d: %ld",
1954 printed ? ", " : "", i, val);
1955 ++i;
1956 }
1957 }
1958
1959 return printed;
1960 }
1961
1962 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1963 union perf_event *event,
1964 struct perf_sample *sample);
1965
1966 static struct syscall *trace__syscall_info(struct trace *trace,
1967 struct perf_evsel *evsel, int id)
1968 {
1969
1970 if (id < 0) {
1971
1972 /*
1973 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1974 * before that, leaving at a higher verbosity level till that is
1975 * explained. Reproduced with plain ftrace with:
1976 *
1977 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1978 * grep "NR -1 " /t/trace_pipe
1979 *
1980 * After generating some load on the machine.
1981 */
1982 if (verbose > 1) {
1983 static u64 n;
1984 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1985 id, perf_evsel__name(evsel), ++n);
1986 }
1987 return NULL;
1988 }
1989
1990 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1991 trace__read_syscall_info(trace, id))
1992 goto out_cant_read;
1993
1994 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1995 goto out_cant_read;
1996
1997 return &trace->syscalls.table[id];
1998
1999 out_cant_read:
2000 if (verbose) {
2001 fprintf(trace->output, "Problems reading syscall %d", id);
2002 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
2003 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
2004 fputs(" information\n", trace->output);
2005 }
2006 return NULL;
2007 }
2008
2009 static void thread__update_stats(struct thread_trace *ttrace,
2010 int id, struct perf_sample *sample)
2011 {
2012 struct int_node *inode;
2013 struct stats *stats;
2014 u64 duration = 0;
2015
2016 inode = intlist__findnew(ttrace->syscall_stats, id);
2017 if (inode == NULL)
2018 return;
2019
2020 stats = inode->priv;
2021 if (stats == NULL) {
2022 stats = malloc(sizeof(struct stats));
2023 if (stats == NULL)
2024 return;
2025 init_stats(stats);
2026 inode->priv = stats;
2027 }
2028
2029 if (ttrace->entry_time && sample->time > ttrace->entry_time)
2030 duration = sample->time - ttrace->entry_time;
2031
2032 update_stats(stats, duration);
2033 }
2034
2035 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
2036 {
2037 struct thread_trace *ttrace;
2038 u64 duration;
2039 size_t printed;
2040
2041 if (trace->current == NULL)
2042 return 0;
2043
2044 ttrace = thread__priv(trace->current);
2045
2046 if (!ttrace->entry_pending)
2047 return 0;
2048
2049 duration = sample->time - ttrace->entry_time;
2050
2051 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
2052 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
2053 ttrace->entry_pending = false;
2054
2055 return printed;
2056 }
2057
2058 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
2059 union perf_event *event __maybe_unused,
2060 struct perf_sample *sample)
2061 {
2062 char *msg;
2063 void *args;
2064 size_t printed = 0;
2065 struct thread *thread;
2066 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2067 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2068 struct thread_trace *ttrace;
2069
2070 if (sc == NULL)
2071 return -1;
2072
2073 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2074 ttrace = thread__trace(thread, trace->output);
2075 if (ttrace == NULL)
2076 goto out_put;
2077
2078 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2079
2080 if (ttrace->entry_str == NULL) {
2081 ttrace->entry_str = malloc(trace__entry_str_size);
2082 if (!ttrace->entry_str)
2083 goto out_put;
2084 }
2085
2086 if (!trace->summary_only)
2087 trace__printf_interrupted_entry(trace, sample);
2088
2089 ttrace->entry_time = sample->time;
2090 msg = ttrace->entry_str;
2091 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
2092
2093 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
2094 args, trace, thread);
2095
2096 if (sc->is_exit) {
2097 if (!trace->duration_filter && !trace->summary_only) {
2098 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
2099 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
2100 }
2101 } else {
2102 ttrace->entry_pending = true;
2103 /* See trace__vfs_getname & trace__sys_exit */
2104 ttrace->filename.pending_open = false;
2105 }
2106
2107 if (trace->current != thread) {
2108 thread__put(trace->current);
2109 trace->current = thread__get(thread);
2110 }
2111 err = 0;
2112 out_put:
2113 thread__put(thread);
2114 return err;
2115 }
2116
2117 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2118 union perf_event *event __maybe_unused,
2119 struct perf_sample *sample)
2120 {
2121 long ret;
2122 u64 duration = 0;
2123 struct thread *thread;
2124 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2125 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2126 struct thread_trace *ttrace;
2127
2128 if (sc == NULL)
2129 return -1;
2130
2131 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2132 ttrace = thread__trace(thread, trace->output);
2133 if (ttrace == NULL)
2134 goto out_put;
2135
2136 if (trace->summary)
2137 thread__update_stats(ttrace, id, sample);
2138
2139 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2140
2141 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
2142 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2143 ttrace->filename.pending_open = false;
2144 ++trace->stats.vfs_getname;
2145 }
2146
2147 ttrace->exit_time = sample->time;
2148
2149 if (ttrace->entry_time) {
2150 duration = sample->time - ttrace->entry_time;
2151 if (trace__filter_duration(trace, duration))
2152 goto out;
2153 } else if (trace->duration_filter)
2154 goto out;
2155
2156 if (trace->summary_only)
2157 goto out;
2158
2159 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2160
2161 if (ttrace->entry_pending) {
2162 fprintf(trace->output, "%-70s", ttrace->entry_str);
2163 } else {
2164 fprintf(trace->output, " ... [");
2165 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2166 fprintf(trace->output, "]: %s()", sc->name);
2167 }
2168
2169 if (sc->fmt == NULL) {
2170 signed_print:
2171 fprintf(trace->output, ") = %ld", ret);
2172 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
2173 char bf[STRERR_BUFSIZE];
2174 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2175 *e = audit_errno_to_name(-ret);
2176
2177 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2178 } else if (ret == 0 && sc->fmt->timeout)
2179 fprintf(trace->output, ") = 0 Timeout");
2180 else if (sc->fmt->hexret)
2181 fprintf(trace->output, ") = %#lx", ret);
2182 else if (sc->fmt->errpid) {
2183 struct thread *child = machine__find_thread(trace->host, ret, ret);
2184
2185 if (child != NULL) {
2186 fprintf(trace->output, ") = %ld", ret);
2187 if (child->comm_set)
2188 fprintf(trace->output, " (%s)", thread__comm_str(child));
2189 thread__put(child);
2190 }
2191 } else
2192 goto signed_print;
2193
2194 fputc('\n', trace->output);
2195
2196 if (sample->callchain) {
2197 struct addr_location al;
2198 /* TODO: user-configurable print_opts */
2199 const unsigned int print_opts = PRINT_IP_OPT_SYM
2200 | PRINT_IP_OPT_DSO;
2201
2202 if (machine__resolve(trace->host, &al, sample) < 0) {
2203 pr_err("problem processing %d event, skipping it.\n",
2204 event->header.type);
2205 goto out_put;
2206 }
2207 perf_evsel__fprintf_callchain(evsel, sample, &al, 38, print_opts,
2208 scripting_max_stack, trace->output);
2209 }
2210 out:
2211 ttrace->entry_pending = false;
2212 err = 0;
2213 out_put:
2214 thread__put(thread);
2215 return err;
2216 }
2217
2218 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2219 union perf_event *event __maybe_unused,
2220 struct perf_sample *sample)
2221 {
2222 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2223 struct thread_trace *ttrace;
2224 size_t filename_len, entry_str_len, to_move;
2225 ssize_t remaining_space;
2226 char *pos;
2227 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2228
2229 if (!thread)
2230 goto out;
2231
2232 ttrace = thread__priv(thread);
2233 if (!ttrace)
2234 goto out;
2235
2236 filename_len = strlen(filename);
2237
2238 if (ttrace->filename.namelen < filename_len) {
2239 char *f = realloc(ttrace->filename.name, filename_len + 1);
2240
2241 if (f == NULL)
2242 goto out;
2243
2244 ttrace->filename.namelen = filename_len;
2245 ttrace->filename.name = f;
2246 }
2247
2248 strcpy(ttrace->filename.name, filename);
2249 ttrace->filename.pending_open = true;
2250
2251 if (!ttrace->filename.ptr)
2252 goto out;
2253
2254 entry_str_len = strlen(ttrace->entry_str);
2255 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2256 if (remaining_space <= 0)
2257 goto out;
2258
2259 if (filename_len > (size_t)remaining_space) {
2260 filename += filename_len - remaining_space;
2261 filename_len = remaining_space;
2262 }
2263
2264 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2265 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2266 memmove(pos + filename_len, pos, to_move);
2267 memcpy(pos, filename, filename_len);
2268
2269 ttrace->filename.ptr = 0;
2270 ttrace->filename.entry_str_pos = 0;
2271 out:
2272 return 0;
2273 }
2274
2275 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2276 union perf_event *event __maybe_unused,
2277 struct perf_sample *sample)
2278 {
2279 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2280 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2281 struct thread *thread = machine__findnew_thread(trace->host,
2282 sample->pid,
2283 sample->tid);
2284 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2285
2286 if (ttrace == NULL)
2287 goto out_dump;
2288
2289 ttrace->runtime_ms += runtime_ms;
2290 trace->runtime_ms += runtime_ms;
2291 thread__put(thread);
2292 return 0;
2293
2294 out_dump:
2295 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2296 evsel->name,
2297 perf_evsel__strval(evsel, sample, "comm"),
2298 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2299 runtime,
2300 perf_evsel__intval(evsel, sample, "vruntime"));
2301 thread__put(thread);
2302 return 0;
2303 }
2304
2305 static void bpf_output__printer(enum binary_printer_ops op,
2306 unsigned int val, void *extra)
2307 {
2308 FILE *output = extra;
2309 unsigned char ch = (unsigned char)val;
2310
2311 switch (op) {
2312 case BINARY_PRINT_CHAR_DATA:
2313 fprintf(output, "%c", isprint(ch) ? ch : '.');
2314 break;
2315 case BINARY_PRINT_DATA_BEGIN:
2316 case BINARY_PRINT_LINE_BEGIN:
2317 case BINARY_PRINT_ADDR:
2318 case BINARY_PRINT_NUM_DATA:
2319 case BINARY_PRINT_NUM_PAD:
2320 case BINARY_PRINT_SEP:
2321 case BINARY_PRINT_CHAR_PAD:
2322 case BINARY_PRINT_LINE_END:
2323 case BINARY_PRINT_DATA_END:
2324 default:
2325 break;
2326 }
2327 }
2328
2329 static void bpf_output__fprintf(struct trace *trace,
2330 struct perf_sample *sample)
2331 {
2332 print_binary(sample->raw_data, sample->raw_size, 8,
2333 bpf_output__printer, trace->output);
2334 }
2335
2336 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2337 union perf_event *event __maybe_unused,
2338 struct perf_sample *sample)
2339 {
2340 trace__printf_interrupted_entry(trace, sample);
2341 trace__fprintf_tstamp(trace, sample->time, trace->output);
2342
2343 if (trace->trace_syscalls)
2344 fprintf(trace->output, "( ): ");
2345
2346 fprintf(trace->output, "%s:", evsel->name);
2347
2348 if (perf_evsel__is_bpf_output(evsel)) {
2349 bpf_output__fprintf(trace, sample);
2350 } else if (evsel->tp_format) {
2351 event_format__fprintf(evsel->tp_format, sample->cpu,
2352 sample->raw_data, sample->raw_size,
2353 trace->output);
2354 }
2355
2356 fprintf(trace->output, ")\n");
2357 return 0;
2358 }
2359
2360 static void print_location(FILE *f, struct perf_sample *sample,
2361 struct addr_location *al,
2362 bool print_dso, bool print_sym)
2363 {
2364
2365 if ((verbose || print_dso) && al->map)
2366 fprintf(f, "%s@", al->map->dso->long_name);
2367
2368 if ((verbose || print_sym) && al->sym)
2369 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2370 al->addr - al->sym->start);
2371 else if (al->map)
2372 fprintf(f, "0x%" PRIx64, al->addr);
2373 else
2374 fprintf(f, "0x%" PRIx64, sample->addr);
2375 }
2376
2377 static int trace__pgfault(struct trace *trace,
2378 struct perf_evsel *evsel,
2379 union perf_event *event __maybe_unused,
2380 struct perf_sample *sample)
2381 {
2382 struct thread *thread;
2383 struct addr_location al;
2384 char map_type = 'd';
2385 struct thread_trace *ttrace;
2386 int err = -1;
2387
2388 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2389 ttrace = thread__trace(thread, trace->output);
2390 if (ttrace == NULL)
2391 goto out_put;
2392
2393 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2394 ttrace->pfmaj++;
2395 else
2396 ttrace->pfmin++;
2397
2398 if (trace->summary_only)
2399 goto out;
2400
2401 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
2402 sample->ip, &al);
2403
2404 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2405
2406 fprintf(trace->output, "%sfault [",
2407 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2408 "maj" : "min");
2409
2410 print_location(trace->output, sample, &al, false, true);
2411
2412 fprintf(trace->output, "] => ");
2413
2414 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
2415 sample->addr, &al);
2416
2417 if (!al.map) {
2418 thread__find_addr_location(thread, sample->cpumode,
2419 MAP__FUNCTION, sample->addr, &al);
2420
2421 if (al.map)
2422 map_type = 'x';
2423 else
2424 map_type = '?';
2425 }
2426
2427 print_location(trace->output, sample, &al, true, false);
2428
2429 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2430 out:
2431 err = 0;
2432 out_put:
2433 thread__put(thread);
2434 return err;
2435 }
2436
2437 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2438 {
2439 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2440 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2441 return false;
2442
2443 if (trace->pid_list || trace->tid_list)
2444 return true;
2445
2446 return false;
2447 }
2448
2449 static void trace__set_base_time(struct trace *trace,
2450 struct perf_evsel *evsel,
2451 struct perf_sample *sample)
2452 {
2453 /*
2454 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2455 * and don't use sample->time unconditionally, we may end up having
2456 * some other event in the future without PERF_SAMPLE_TIME for good
2457 * reason, i.e. we may not be interested in its timestamps, just in
2458 * it taking place, picking some piece of information when it
2459 * appears in our event stream (vfs_getname comes to mind).
2460 */
2461 if (trace->base_time == 0 && !trace->full_time &&
2462 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
2463 trace->base_time = sample->time;
2464 }
2465
2466 static int trace__process_sample(struct perf_tool *tool,
2467 union perf_event *event,
2468 struct perf_sample *sample,
2469 struct perf_evsel *evsel,
2470 struct machine *machine __maybe_unused)
2471 {
2472 struct trace *trace = container_of(tool, struct trace, tool);
2473 int err = 0;
2474
2475 tracepoint_handler handler = evsel->handler;
2476
2477 if (skip_sample(trace, sample))
2478 return 0;
2479
2480 trace__set_base_time(trace, evsel, sample);
2481
2482 if (handler) {
2483 ++trace->nr_events;
2484 handler(trace, evsel, event, sample);
2485 }
2486
2487 return err;
2488 }
2489
2490 static int parse_target_str(struct trace *trace)
2491 {
2492 if (trace->opts.target.pid) {
2493 trace->pid_list = intlist__new(trace->opts.target.pid);
2494 if (trace->pid_list == NULL) {
2495 pr_err("Error parsing process id string\n");
2496 return -EINVAL;
2497 }
2498 }
2499
2500 if (trace->opts.target.tid) {
2501 trace->tid_list = intlist__new(trace->opts.target.tid);
2502 if (trace->tid_list == NULL) {
2503 pr_err("Error parsing thread id string\n");
2504 return -EINVAL;
2505 }
2506 }
2507
2508 return 0;
2509 }
2510
2511 static int trace__record(struct trace *trace, int argc, const char **argv)
2512 {
2513 unsigned int rec_argc, i, j;
2514 const char **rec_argv;
2515 const char * const record_args[] = {
2516 "record",
2517 "-R",
2518 "-m", "1024",
2519 "-c", "1",
2520 };
2521
2522 const char * const sc_args[] = { "-e", };
2523 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2524 const char * const majpf_args[] = { "-e", "major-faults" };
2525 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2526 const char * const minpf_args[] = { "-e", "minor-faults" };
2527 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2528
2529 /* +1 is for the event string below */
2530 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2531 majpf_args_nr + minpf_args_nr + argc;
2532 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2533
2534 if (rec_argv == NULL)
2535 return -ENOMEM;
2536
2537 j = 0;
2538 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2539 rec_argv[j++] = record_args[i];
2540
2541 if (trace->trace_syscalls) {
2542 for (i = 0; i < sc_args_nr; i++)
2543 rec_argv[j++] = sc_args[i];
2544
2545 /* event string may be different for older kernels - e.g., RHEL6 */
2546 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2547 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2548 else if (is_valid_tracepoint("syscalls:sys_enter"))
2549 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2550 else {
2551 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2552 return -1;
2553 }
2554 }
2555
2556 if (trace->trace_pgfaults & TRACE_PFMAJ)
2557 for (i = 0; i < majpf_args_nr; i++)
2558 rec_argv[j++] = majpf_args[i];
2559
2560 if (trace->trace_pgfaults & TRACE_PFMIN)
2561 for (i = 0; i < minpf_args_nr; i++)
2562 rec_argv[j++] = minpf_args[i];
2563
2564 for (i = 0; i < (unsigned int)argc; i++)
2565 rec_argv[j++] = argv[i];
2566
2567 return cmd_record(j, rec_argv, NULL);
2568 }
2569
2570 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2571
2572 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2573 {
2574 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2575
2576 if (IS_ERR(evsel))
2577 return false;
2578
2579 if (perf_evsel__field(evsel, "pathname") == NULL) {
2580 perf_evsel__delete(evsel);
2581 return false;
2582 }
2583
2584 evsel->handler = trace__vfs_getname;
2585 perf_evlist__add(evlist, evsel);
2586 return true;
2587 }
2588
2589 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2590 u64 config)
2591 {
2592 struct perf_evsel *evsel;
2593 struct perf_event_attr attr = {
2594 .type = PERF_TYPE_SOFTWARE,
2595 .mmap_data = 1,
2596 };
2597
2598 attr.config = config;
2599 attr.sample_period = 1;
2600
2601 event_attr_init(&attr);
2602
2603 evsel = perf_evsel__new(&attr);
2604 if (!evsel)
2605 return -ENOMEM;
2606
2607 evsel->handler = trace__pgfault;
2608 perf_evlist__add(evlist, evsel);
2609
2610 return 0;
2611 }
2612
2613 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2614 {
2615 const u32 type = event->header.type;
2616 struct perf_evsel *evsel;
2617
2618 if (type != PERF_RECORD_SAMPLE) {
2619 trace__process_event(trace, trace->host, event, sample);
2620 return;
2621 }
2622
2623 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2624 if (evsel == NULL) {
2625 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2626 return;
2627 }
2628
2629 trace__set_base_time(trace, evsel, sample);
2630
2631 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2632 sample->raw_data == NULL) {
2633 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2634 perf_evsel__name(evsel), sample->tid,
2635 sample->cpu, sample->raw_size);
2636 } else {
2637 tracepoint_handler handler = evsel->handler;
2638 handler(trace, evsel, event, sample);
2639 }
2640 }
2641
2642 static int trace__add_syscall_newtp(struct trace *trace)
2643 {
2644 int ret = -1;
2645 struct perf_evlist *evlist = trace->evlist;
2646 struct perf_evsel *sys_enter, *sys_exit;
2647
2648 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2649 if (sys_enter == NULL)
2650 goto out;
2651
2652 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2653 goto out_delete_sys_enter;
2654
2655 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2656 if (sys_exit == NULL)
2657 goto out_delete_sys_enter;
2658
2659 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2660 goto out_delete_sys_exit;
2661
2662 perf_evlist__add(evlist, sys_enter);
2663 perf_evlist__add(evlist, sys_exit);
2664
2665 if (trace->opts.callgraph_set && !trace->kernel_syscallchains) {
2666 /*
2667 * We're interested only in the user space callchain
2668 * leading to the syscall, allow overriding that for
2669 * debugging reasons using --kernel_syscall_callchains
2670 */
2671 sys_exit->attr.exclude_callchain_kernel = 1;
2672 }
2673
2674 trace->syscalls.events.sys_enter = sys_enter;
2675 trace->syscalls.events.sys_exit = sys_exit;
2676
2677 ret = 0;
2678 out:
2679 return ret;
2680
2681 out_delete_sys_exit:
2682 perf_evsel__delete_priv(sys_exit);
2683 out_delete_sys_enter:
2684 perf_evsel__delete_priv(sys_enter);
2685 goto out;
2686 }
2687
2688 static int trace__set_ev_qualifier_filter(struct trace *trace)
2689 {
2690 int err = -1;
2691 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2692 trace->ev_qualifier_ids.nr,
2693 trace->ev_qualifier_ids.entries);
2694
2695 if (filter == NULL)
2696 goto out_enomem;
2697
2698 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2699 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2700
2701 free(filter);
2702 out:
2703 return err;
2704 out_enomem:
2705 errno = ENOMEM;
2706 goto out;
2707 }
2708
2709 static int trace__run(struct trace *trace, int argc, const char **argv)
2710 {
2711 struct perf_evlist *evlist = trace->evlist;
2712 struct perf_evsel *evsel;
2713 int err = -1, i;
2714 unsigned long before;
2715 const bool forks = argc > 0;
2716 bool draining = false;
2717
2718 trace->live = true;
2719
2720 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2721 goto out_error_raw_syscalls;
2722
2723 if (trace->trace_syscalls)
2724 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2725
2726 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2727 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2728 goto out_error_mem;
2729 }
2730
2731 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2732 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2733 goto out_error_mem;
2734
2735 if (trace->sched &&
2736 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2737 trace__sched_stat_runtime))
2738 goto out_error_sched_stat_runtime;
2739
2740 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2741 if (err < 0) {
2742 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2743 goto out_delete_evlist;
2744 }
2745
2746 err = trace__symbols_init(trace, evlist);
2747 if (err < 0) {
2748 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2749 goto out_delete_evlist;
2750 }
2751
2752 perf_evlist__config(evlist, &trace->opts);
2753
2754 signal(SIGCHLD, sig_handler);
2755 signal(SIGINT, sig_handler);
2756
2757 if (forks) {
2758 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2759 argv, false, NULL);
2760 if (err < 0) {
2761 fprintf(trace->output, "Couldn't run the workload!\n");
2762 goto out_delete_evlist;
2763 }
2764 }
2765
2766 err = perf_evlist__open(evlist);
2767 if (err < 0)
2768 goto out_error_open;
2769
2770 err = bpf__apply_obj_config();
2771 if (err) {
2772 char errbuf[BUFSIZ];
2773
2774 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2775 pr_err("ERROR: Apply config to BPF failed: %s\n",
2776 errbuf);
2777 goto out_error_open;
2778 }
2779
2780 /*
2781 * Better not use !target__has_task() here because we need to cover the
2782 * case where no threads were specified in the command line, but a
2783 * workload was, and in that case we will fill in the thread_map when
2784 * we fork the workload in perf_evlist__prepare_workload.
2785 */
2786 if (trace->filter_pids.nr > 0)
2787 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2788 else if (thread_map__pid(evlist->threads, 0) == -1)
2789 err = perf_evlist__set_filter_pid(evlist, getpid());
2790
2791 if (err < 0)
2792 goto out_error_mem;
2793
2794 if (trace->ev_qualifier_ids.nr > 0) {
2795 err = trace__set_ev_qualifier_filter(trace);
2796 if (err < 0)
2797 goto out_errno;
2798
2799 pr_debug("event qualifier tracepoint filter: %s\n",
2800 trace->syscalls.events.sys_exit->filter);
2801 }
2802
2803 err = perf_evlist__apply_filters(evlist, &evsel);
2804 if (err < 0)
2805 goto out_error_apply_filters;
2806
2807 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2808 if (err < 0)
2809 goto out_error_mmap;
2810
2811 if (!target__none(&trace->opts.target))
2812 perf_evlist__enable(evlist);
2813
2814 if (forks)
2815 perf_evlist__start_workload(evlist);
2816
2817 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2818 evlist->threads->nr > 1 ||
2819 perf_evlist__first(evlist)->attr.inherit;
2820 again:
2821 before = trace->nr_events;
2822
2823 for (i = 0; i < evlist->nr_mmaps; i++) {
2824 union perf_event *event;
2825
2826 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2827 struct perf_sample sample;
2828
2829 ++trace->nr_events;
2830
2831 err = perf_evlist__parse_sample(evlist, event, &sample);
2832 if (err) {
2833 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2834 goto next_event;
2835 }
2836
2837 trace__handle_event(trace, event, &sample);
2838 next_event:
2839 perf_evlist__mmap_consume(evlist, i);
2840
2841 if (interrupted)
2842 goto out_disable;
2843
2844 if (done && !draining) {
2845 perf_evlist__disable(evlist);
2846 draining = true;
2847 }
2848 }
2849 }
2850
2851 if (trace->nr_events == before) {
2852 int timeout = done ? 100 : -1;
2853
2854 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2855 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2856 draining = true;
2857
2858 goto again;
2859 }
2860 } else {
2861 goto again;
2862 }
2863
2864 out_disable:
2865 thread__zput(trace->current);
2866
2867 perf_evlist__disable(evlist);
2868
2869 if (!err) {
2870 if (trace->summary)
2871 trace__fprintf_thread_summary(trace, trace->output);
2872
2873 if (trace->show_tool_stats) {
2874 fprintf(trace->output, "Stats:\n "
2875 " vfs_getname : %" PRIu64 "\n"
2876 " proc_getname: %" PRIu64 "\n",
2877 trace->stats.vfs_getname,
2878 trace->stats.proc_getname);
2879 }
2880 }
2881
2882 out_delete_evlist:
2883 perf_evlist__delete(evlist);
2884 trace->evlist = NULL;
2885 trace->live = false;
2886 return err;
2887 {
2888 char errbuf[BUFSIZ];
2889
2890 out_error_sched_stat_runtime:
2891 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2892 goto out_error;
2893
2894 out_error_raw_syscalls:
2895 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2896 goto out_error;
2897
2898 out_error_mmap:
2899 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2900 goto out_error;
2901
2902 out_error_open:
2903 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2904
2905 out_error:
2906 fprintf(trace->output, "%s\n", errbuf);
2907 goto out_delete_evlist;
2908
2909 out_error_apply_filters:
2910 fprintf(trace->output,
2911 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2912 evsel->filter, perf_evsel__name(evsel), errno,
2913 strerror_r(errno, errbuf, sizeof(errbuf)));
2914 goto out_delete_evlist;
2915 }
2916 out_error_mem:
2917 fprintf(trace->output, "Not enough memory to run!\n");
2918 goto out_delete_evlist;
2919
2920 out_errno:
2921 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2922 goto out_delete_evlist;
2923 }
2924
2925 static int trace__replay(struct trace *trace)
2926 {
2927 const struct perf_evsel_str_handler handlers[] = {
2928 { "probe:vfs_getname", trace__vfs_getname, },
2929 };
2930 struct perf_data_file file = {
2931 .path = input_name,
2932 .mode = PERF_DATA_MODE_READ,
2933 .force = trace->force,
2934 };
2935 struct perf_session *session;
2936 struct perf_evsel *evsel;
2937 int err = -1;
2938
2939 trace->tool.sample = trace__process_sample;
2940 trace->tool.mmap = perf_event__process_mmap;
2941 trace->tool.mmap2 = perf_event__process_mmap2;
2942 trace->tool.comm = perf_event__process_comm;
2943 trace->tool.exit = perf_event__process_exit;
2944 trace->tool.fork = perf_event__process_fork;
2945 trace->tool.attr = perf_event__process_attr;
2946 trace->tool.tracing_data = perf_event__process_tracing_data;
2947 trace->tool.build_id = perf_event__process_build_id;
2948
2949 trace->tool.ordered_events = true;
2950 trace->tool.ordering_requires_timestamps = true;
2951
2952 /* add tid to output */
2953 trace->multiple_threads = true;
2954
2955 session = perf_session__new(&file, false, &trace->tool);
2956 if (session == NULL)
2957 return -1;
2958
2959 if (symbol__init(&session->header.env) < 0)
2960 goto out;
2961
2962 trace->host = &session->machines.host;
2963
2964 err = perf_session__set_tracepoints_handlers(session, handlers);
2965 if (err)
2966 goto out;
2967
2968 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2969 "raw_syscalls:sys_enter");
2970 /* older kernels have syscalls tp versus raw_syscalls */
2971 if (evsel == NULL)
2972 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2973 "syscalls:sys_enter");
2974
2975 if (evsel &&
2976 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2977 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2978 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2979 goto out;
2980 }
2981
2982 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2983 "raw_syscalls:sys_exit");
2984 if (evsel == NULL)
2985 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2986 "syscalls:sys_exit");
2987 if (evsel &&
2988 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2989 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2990 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2991 goto out;
2992 }
2993
2994 evlist__for_each(session->evlist, evsel) {
2995 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2996 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2997 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2998 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2999 evsel->handler = trace__pgfault;
3000 }
3001
3002 err = parse_target_str(trace);
3003 if (err != 0)
3004 goto out;
3005
3006 setup_pager();
3007
3008 err = perf_session__process_events(session);
3009 if (err)
3010 pr_err("Failed to process events, error %d", err);
3011
3012 else if (trace->summary)
3013 trace__fprintf_thread_summary(trace, trace->output);
3014
3015 out:
3016 perf_session__delete(session);
3017
3018 return err;
3019 }
3020
3021 static size_t trace__fprintf_threads_header(FILE *fp)
3022 {
3023 size_t printed;
3024
3025 printed = fprintf(fp, "\n Summary of events:\n\n");
3026
3027 return printed;
3028 }
3029
3030 static size_t thread__dump_stats(struct thread_trace *ttrace,
3031 struct trace *trace, FILE *fp)
3032 {
3033 struct stats *stats;
3034 size_t printed = 0;
3035 struct syscall *sc;
3036 struct int_node *inode = intlist__first(ttrace->syscall_stats);
3037
3038 if (inode == NULL)
3039 return 0;
3040
3041 printed += fprintf(fp, "\n");
3042
3043 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
3044 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
3045 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
3046
3047 /* each int_node is a syscall */
3048 while (inode) {
3049 stats = inode->priv;
3050 if (stats) {
3051 double min = (double)(stats->min) / NSEC_PER_MSEC;
3052 double max = (double)(stats->max) / NSEC_PER_MSEC;
3053 double avg = avg_stats(stats);
3054 double pct;
3055 u64 n = (u64) stats->n;
3056
3057 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
3058 avg /= NSEC_PER_MSEC;
3059
3060 sc = &trace->syscalls.table[inode->i];
3061 printed += fprintf(fp, " %-15s", sc->name);
3062 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
3063 n, avg * n, min, avg);
3064 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
3065 }
3066
3067 inode = intlist__next(inode);
3068 }
3069
3070 printed += fprintf(fp, "\n\n");
3071
3072 return printed;
3073 }
3074
3075 /* struct used to pass data to per-thread function */
3076 struct summary_data {
3077 FILE *fp;
3078 struct trace *trace;
3079 size_t printed;
3080 };
3081
3082 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
3083 {
3084 struct summary_data *data = priv;
3085 FILE *fp = data->fp;
3086 size_t printed = data->printed;
3087 struct trace *trace = data->trace;
3088 struct thread_trace *ttrace = thread__priv(thread);
3089 double ratio;
3090
3091 if (ttrace == NULL)
3092 return 0;
3093
3094 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
3095
3096 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
3097 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
3098 printed += fprintf(fp, "%.1f%%", ratio);
3099 if (ttrace->pfmaj)
3100 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
3101 if (ttrace->pfmin)
3102 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
3103 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
3104 printed += thread__dump_stats(ttrace, trace, fp);
3105
3106 data->printed += printed;
3107
3108 return 0;
3109 }
3110
3111 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
3112 {
3113 struct summary_data data = {
3114 .fp = fp,
3115 .trace = trace
3116 };
3117 data.printed = trace__fprintf_threads_header(fp);
3118
3119 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
3120
3121 return data.printed;
3122 }
3123
3124 static int trace__set_duration(const struct option *opt, const char *str,
3125 int unset __maybe_unused)
3126 {
3127 struct trace *trace = opt->value;
3128
3129 trace->duration_filter = atof(str);
3130 return 0;
3131 }
3132
3133 static int trace__set_filter_pids(const struct option *opt, const char *str,
3134 int unset __maybe_unused)
3135 {
3136 int ret = -1;
3137 size_t i;
3138 struct trace *trace = opt->value;
3139 /*
3140 * FIXME: introduce a intarray class, plain parse csv and create a
3141 * { int nr, int entries[] } struct...
3142 */
3143 struct intlist *list = intlist__new(str);
3144
3145 if (list == NULL)
3146 return -1;
3147
3148 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3149 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3150
3151 if (trace->filter_pids.entries == NULL)
3152 goto out;
3153
3154 trace->filter_pids.entries[0] = getpid();
3155
3156 for (i = 1; i < trace->filter_pids.nr; ++i)
3157 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3158
3159 intlist__delete(list);
3160 ret = 0;
3161 out:
3162 return ret;
3163 }
3164
3165 static int trace__open_output(struct trace *trace, const char *filename)
3166 {
3167 struct stat st;
3168
3169 if (!stat(filename, &st) && st.st_size) {
3170 char oldname[PATH_MAX];
3171
3172 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3173 unlink(oldname);
3174 rename(filename, oldname);
3175 }
3176
3177 trace->output = fopen(filename, "w");
3178
3179 return trace->output == NULL ? -errno : 0;
3180 }
3181
3182 static int parse_pagefaults(const struct option *opt, const char *str,
3183 int unset __maybe_unused)
3184 {
3185 int *trace_pgfaults = opt->value;
3186
3187 if (strcmp(str, "all") == 0)
3188 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3189 else if (strcmp(str, "maj") == 0)
3190 *trace_pgfaults |= TRACE_PFMAJ;
3191 else if (strcmp(str, "min") == 0)
3192 *trace_pgfaults |= TRACE_PFMIN;
3193 else
3194 return -1;
3195
3196 return 0;
3197 }
3198
3199 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3200 {
3201 struct perf_evsel *evsel;
3202
3203 evlist__for_each(evlist, evsel)
3204 evsel->handler = handler;
3205 }
3206
3207 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3208 {
3209 const char *trace_usage[] = {
3210 "perf trace [<options>] [<command>]",
3211 "perf trace [<options>] -- <command> [<options>]",
3212 "perf trace record [<options>] [<command>]",
3213 "perf trace record [<options>] -- <command> [<options>]",
3214 NULL
3215 };
3216 struct trace trace = {
3217 .syscalls = {
3218 . max = -1,
3219 },
3220 .opts = {
3221 .target = {
3222 .uid = UINT_MAX,
3223 .uses_mmap = true,
3224 },
3225 .user_freq = UINT_MAX,
3226 .user_interval = ULLONG_MAX,
3227 .no_buffering = true,
3228 .mmap_pages = UINT_MAX,
3229 .proc_map_timeout = 500,
3230 },
3231 .output = stderr,
3232 .show_comm = true,
3233 .trace_syscalls = true,
3234 .kernel_syscallchains = false,
3235 };
3236 const char *output_name = NULL;
3237 const char *ev_qualifier_str = NULL;
3238 const struct option trace_options[] = {
3239 OPT_CALLBACK(0, "event", &trace.evlist, "event",
3240 "event selector. use 'perf list' to list available events",
3241 parse_events_option),
3242 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3243 "show the thread COMM next to its id"),
3244 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3245 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3246 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3247 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3248 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3249 "trace events on existing process id"),
3250 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3251 "trace events on existing thread id"),
3252 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3253 "pids to filter (by the kernel)", trace__set_filter_pids),
3254 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3255 "system-wide collection from all CPUs"),
3256 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3257 "list of cpus to monitor"),
3258 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3259 "child tasks do not inherit counters"),
3260 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3261 "number of mmap data pages",
3262 perf_evlist__parse_mmap_pages),
3263 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3264 "user to profile"),
3265 OPT_CALLBACK(0, "duration", &trace, "float",
3266 "show only events with duration > N.M ms",
3267 trace__set_duration),
3268 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3269 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3270 OPT_BOOLEAN('T', "time", &trace.full_time,
3271 "Show full timestamp, not time relative to first start"),
3272 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3273 "Show only syscall summary with statistics"),
3274 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3275 "Show all syscalls and summary with statistics"),
3276 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3277 "Trace pagefaults", parse_pagefaults, "maj"),
3278 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3279 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3280 OPT_CALLBACK(0, "call-graph", &trace.opts,
3281 "record_mode[,record_size]", record_callchain_help,
3282 &record_parse_callchain_opt),
3283 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3284 "Show the kernel callchains on the syscall exit path"),
3285 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3286 "per thread proc mmap processing timeout in ms"),
3287 OPT_END()
3288 };
3289 const char * const trace_subcommands[] = { "record", NULL };
3290 int err;
3291 char bf[BUFSIZ];
3292
3293 signal(SIGSEGV, sighandler_dump_stack);
3294 signal(SIGFPE, sighandler_dump_stack);
3295
3296 trace.evlist = perf_evlist__new();
3297 trace.sctbl = syscalltbl__new();
3298
3299 if (trace.evlist == NULL || trace.sctbl == NULL) {
3300 pr_err("Not enough memory to run!\n");
3301 err = -ENOMEM;
3302 goto out;
3303 }
3304
3305 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3306 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3307
3308 err = bpf__setup_stdout(trace.evlist);
3309 if (err) {
3310 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3311 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3312 goto out;
3313 }
3314
3315 if (trace.trace_pgfaults) {
3316 trace.opts.sample_address = true;
3317 trace.opts.sample_time = true;
3318 }
3319
3320 if (trace.opts.callgraph_set)
3321 symbol_conf.use_callchain = true;
3322
3323 if (trace.evlist->nr_entries > 0)
3324 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3325
3326 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3327 return trace__record(&trace, argc-1, &argv[1]);
3328
3329 /* summary_only implies summary option, but don't overwrite summary if set */
3330 if (trace.summary_only)
3331 trace.summary = trace.summary_only;
3332
3333 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3334 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3335 pr_err("Please specify something to trace.\n");
3336 return -1;
3337 }
3338
3339 if (output_name != NULL) {
3340 err = trace__open_output(&trace, output_name);
3341 if (err < 0) {
3342 perror("failed to create output file");
3343 goto out;
3344 }
3345 }
3346
3347 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3348
3349 if (ev_qualifier_str != NULL) {
3350 const char *s = ev_qualifier_str;
3351 struct strlist_config slist_config = {
3352 .dirname = system_path(STRACE_GROUPS_DIR),
3353 };
3354
3355 trace.not_ev_qualifier = *s == '!';
3356 if (trace.not_ev_qualifier)
3357 ++s;
3358 trace.ev_qualifier = strlist__new(s, &slist_config);
3359 if (trace.ev_qualifier == NULL) {
3360 fputs("Not enough memory to parse event qualifier",
3361 trace.output);
3362 err = -ENOMEM;
3363 goto out_close;
3364 }
3365
3366 err = trace__validate_ev_qualifier(&trace);
3367 if (err)
3368 goto out_close;
3369 }
3370
3371 err = target__validate(&trace.opts.target);
3372 if (err) {
3373 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3374 fprintf(trace.output, "%s", bf);
3375 goto out_close;
3376 }
3377
3378 err = target__parse_uid(&trace.opts.target);
3379 if (err) {
3380 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3381 fprintf(trace.output, "%s", bf);
3382 goto out_close;
3383 }
3384
3385 if (!argc && target__none(&trace.opts.target))
3386 trace.opts.target.system_wide = true;
3387
3388 if (input_name)
3389 err = trace__replay(&trace);
3390 else
3391 err = trace__run(&trace, argc, argv);
3392
3393 out_close:
3394 if (output_name != NULL)
3395 fclose(trace.output);
3396 out:
3397 return err;
3398 }
This page took 0.15005 seconds and 5 git commands to generate.