63a3cc9b717c4a93614de9b15c86bba1cbc2c512
[deliverable/linux.git] / tools / perf / builtin-trace.c
1 /*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39
40 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
41 #include <stdlib.h>
42 #include <sys/mman.h>
43 #include <linux/futex.h>
44 #include <linux/err.h>
45 #include <linux/seccomp.h>
46 #include <linux/filter.h>
47 #include <linux/audit.h>
48 #include <sys/ptrace.h>
49 #include <linux/random.h>
50
51 /* For older distros: */
52 #ifndef MAP_STACK
53 # define MAP_STACK 0x20000
54 #endif
55
56 #ifndef MADV_HWPOISON
57 # define MADV_HWPOISON 100
58
59 #endif
60
61 #ifndef MADV_MERGEABLE
62 # define MADV_MERGEABLE 12
63 #endif
64
65 #ifndef MADV_UNMERGEABLE
66 # define MADV_UNMERGEABLE 13
67 #endif
68
69 #ifndef EFD_SEMAPHORE
70 # define EFD_SEMAPHORE 1
71 #endif
72
73 #ifndef EFD_NONBLOCK
74 # define EFD_NONBLOCK 00004000
75 #endif
76
77 #ifndef EFD_CLOEXEC
78 # define EFD_CLOEXEC 02000000
79 #endif
80
81 #ifndef O_CLOEXEC
82 # define O_CLOEXEC 02000000
83 #endif
84
85 #ifndef SOCK_DCCP
86 # define SOCK_DCCP 6
87 #endif
88
89 #ifndef SOCK_CLOEXEC
90 # define SOCK_CLOEXEC 02000000
91 #endif
92
93 #ifndef SOCK_NONBLOCK
94 # define SOCK_NONBLOCK 00004000
95 #endif
96
97 #ifndef MSG_CMSG_CLOEXEC
98 # define MSG_CMSG_CLOEXEC 0x40000000
99 #endif
100
101 #ifndef PERF_FLAG_FD_NO_GROUP
102 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
103 #endif
104
105 #ifndef PERF_FLAG_FD_OUTPUT
106 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
107 #endif
108
109 #ifndef PERF_FLAG_PID_CGROUP
110 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
111 #endif
112
113 #ifndef PERF_FLAG_FD_CLOEXEC
114 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
115 #endif
116
117 struct trace {
118 struct perf_tool tool;
119 struct syscalltbl *sctbl;
120 struct {
121 int max;
122 struct syscall *table;
123 struct {
124 struct perf_evsel *sys_enter,
125 *sys_exit;
126 } events;
127 } syscalls;
128 struct record_opts opts;
129 struct perf_evlist *evlist;
130 struct machine *host;
131 struct thread *current;
132 u64 base_time;
133 FILE *output;
134 unsigned long nr_events;
135 struct strlist *ev_qualifier;
136 struct {
137 size_t nr;
138 int *entries;
139 } ev_qualifier_ids;
140 struct intlist *tid_list;
141 struct intlist *pid_list;
142 struct {
143 size_t nr;
144 pid_t *entries;
145 } filter_pids;
146 double duration_filter;
147 double runtime_ms;
148 struct {
149 u64 vfs_getname,
150 proc_getname;
151 } stats;
152 bool not_ev_qualifier;
153 bool live;
154 bool full_time;
155 bool sched;
156 bool multiple_threads;
157 bool summary;
158 bool summary_only;
159 bool show_comm;
160 bool show_tool_stats;
161 bool trace_syscalls;
162 bool force;
163 bool vfs_getname;
164 int trace_pgfaults;
165 int open_id;
166 };
167
168 struct tp_field {
169 int offset;
170 union {
171 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
172 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
173 };
174 };
175
176 #define TP_UINT_FIELD(bits) \
177 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
178 { \
179 u##bits value; \
180 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
181 return value; \
182 }
183
184 TP_UINT_FIELD(8);
185 TP_UINT_FIELD(16);
186 TP_UINT_FIELD(32);
187 TP_UINT_FIELD(64);
188
189 #define TP_UINT_FIELD__SWAPPED(bits) \
190 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
191 { \
192 u##bits value; \
193 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
194 return bswap_##bits(value);\
195 }
196
197 TP_UINT_FIELD__SWAPPED(16);
198 TP_UINT_FIELD__SWAPPED(32);
199 TP_UINT_FIELD__SWAPPED(64);
200
201 static int tp_field__init_uint(struct tp_field *field,
202 struct format_field *format_field,
203 bool needs_swap)
204 {
205 field->offset = format_field->offset;
206
207 switch (format_field->size) {
208 case 1:
209 field->integer = tp_field__u8;
210 break;
211 case 2:
212 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
213 break;
214 case 4:
215 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
216 break;
217 case 8:
218 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
219 break;
220 default:
221 return -1;
222 }
223
224 return 0;
225 }
226
227 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
228 {
229 return sample->raw_data + field->offset;
230 }
231
232 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
233 {
234 field->offset = format_field->offset;
235 field->pointer = tp_field__ptr;
236 return 0;
237 }
238
239 struct syscall_tp {
240 struct tp_field id;
241 union {
242 struct tp_field args, ret;
243 };
244 };
245
246 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
247 struct tp_field *field,
248 const char *name)
249 {
250 struct format_field *format_field = perf_evsel__field(evsel, name);
251
252 if (format_field == NULL)
253 return -1;
254
255 return tp_field__init_uint(field, format_field, evsel->needs_swap);
256 }
257
258 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
259 ({ struct syscall_tp *sc = evsel->priv;\
260 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
261
262 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
263 struct tp_field *field,
264 const char *name)
265 {
266 struct format_field *format_field = perf_evsel__field(evsel, name);
267
268 if (format_field == NULL)
269 return -1;
270
271 return tp_field__init_ptr(field, format_field);
272 }
273
274 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
275 ({ struct syscall_tp *sc = evsel->priv;\
276 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
277
278 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
279 {
280 zfree(&evsel->priv);
281 perf_evsel__delete(evsel);
282 }
283
284 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
285 {
286 evsel->priv = malloc(sizeof(struct syscall_tp));
287 if (evsel->priv != NULL) {
288 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
289 goto out_delete;
290
291 evsel->handler = handler;
292 return 0;
293 }
294
295 return -ENOMEM;
296
297 out_delete:
298 zfree(&evsel->priv);
299 return -ENOENT;
300 }
301
302 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
303 {
304 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
305
306 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
307 if (IS_ERR(evsel))
308 evsel = perf_evsel__newtp("syscalls", direction);
309
310 if (IS_ERR(evsel))
311 return NULL;
312
313 if (perf_evsel__init_syscall_tp(evsel, handler))
314 goto out_delete;
315
316 return evsel;
317
318 out_delete:
319 perf_evsel__delete_priv(evsel);
320 return NULL;
321 }
322
323 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
324 ({ struct syscall_tp *fields = evsel->priv; \
325 fields->name.integer(&fields->name, sample); })
326
327 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
328 ({ struct syscall_tp *fields = evsel->priv; \
329 fields->name.pointer(&fields->name, sample); })
330
331 struct syscall_arg {
332 unsigned long val;
333 struct thread *thread;
334 struct trace *trace;
335 void *parm;
336 u8 idx;
337 u8 mask;
338 };
339
340 struct strarray {
341 int offset;
342 int nr_entries;
343 const char **entries;
344 };
345
346 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
347 .nr_entries = ARRAY_SIZE(array), \
348 .entries = array, \
349 }
350
351 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
352 .offset = off, \
353 .nr_entries = ARRAY_SIZE(array), \
354 .entries = array, \
355 }
356
357 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
358 const char *intfmt,
359 struct syscall_arg *arg)
360 {
361 struct strarray *sa = arg->parm;
362 int idx = arg->val - sa->offset;
363
364 if (idx < 0 || idx >= sa->nr_entries)
365 return scnprintf(bf, size, intfmt, arg->val);
366
367 return scnprintf(bf, size, "%s", sa->entries[idx]);
368 }
369
370 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
371 struct syscall_arg *arg)
372 {
373 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
374 }
375
376 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
377
378 #if defined(__i386__) || defined(__x86_64__)
379 /*
380 * FIXME: Make this available to all arches as soon as the ioctl beautifier
381 * gets rewritten to support all arches.
382 */
383 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
384 struct syscall_arg *arg)
385 {
386 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
387 }
388
389 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
390 #endif /* defined(__i386__) || defined(__x86_64__) */
391
392 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
393 struct syscall_arg *arg);
394
395 #define SCA_FD syscall_arg__scnprintf_fd
396
397 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
398 struct syscall_arg *arg)
399 {
400 int fd = arg->val;
401
402 if (fd == AT_FDCWD)
403 return scnprintf(bf, size, "CWD");
404
405 return syscall_arg__scnprintf_fd(bf, size, arg);
406 }
407
408 #define SCA_FDAT syscall_arg__scnprintf_fd_at
409
410 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
411 struct syscall_arg *arg);
412
413 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
414
415 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
416 struct syscall_arg *arg)
417 {
418 return scnprintf(bf, size, "%#lx", arg->val);
419 }
420
421 #define SCA_HEX syscall_arg__scnprintf_hex
422
423 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
424 struct syscall_arg *arg)
425 {
426 return scnprintf(bf, size, "%d", arg->val);
427 }
428
429 #define SCA_INT syscall_arg__scnprintf_int
430
431 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
432 struct syscall_arg *arg)
433 {
434 int printed = 0, prot = arg->val;
435
436 if (prot == PROT_NONE)
437 return scnprintf(bf, size, "NONE");
438 #define P_MMAP_PROT(n) \
439 if (prot & PROT_##n) { \
440 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
441 prot &= ~PROT_##n; \
442 }
443
444 P_MMAP_PROT(EXEC);
445 P_MMAP_PROT(READ);
446 P_MMAP_PROT(WRITE);
447 #ifdef PROT_SEM
448 P_MMAP_PROT(SEM);
449 #endif
450 P_MMAP_PROT(GROWSDOWN);
451 P_MMAP_PROT(GROWSUP);
452 #undef P_MMAP_PROT
453
454 if (prot)
455 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
456
457 return printed;
458 }
459
460 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
461
462 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
463 struct syscall_arg *arg)
464 {
465 int printed = 0, flags = arg->val;
466
467 #define P_MMAP_FLAG(n) \
468 if (flags & MAP_##n) { \
469 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
470 flags &= ~MAP_##n; \
471 }
472
473 P_MMAP_FLAG(SHARED);
474 P_MMAP_FLAG(PRIVATE);
475 #ifdef MAP_32BIT
476 P_MMAP_FLAG(32BIT);
477 #endif
478 P_MMAP_FLAG(ANONYMOUS);
479 P_MMAP_FLAG(DENYWRITE);
480 P_MMAP_FLAG(EXECUTABLE);
481 P_MMAP_FLAG(FILE);
482 P_MMAP_FLAG(FIXED);
483 P_MMAP_FLAG(GROWSDOWN);
484 #ifdef MAP_HUGETLB
485 P_MMAP_FLAG(HUGETLB);
486 #endif
487 P_MMAP_FLAG(LOCKED);
488 P_MMAP_FLAG(NONBLOCK);
489 P_MMAP_FLAG(NORESERVE);
490 P_MMAP_FLAG(POPULATE);
491 P_MMAP_FLAG(STACK);
492 #ifdef MAP_UNINITIALIZED
493 P_MMAP_FLAG(UNINITIALIZED);
494 #endif
495 #undef P_MMAP_FLAG
496
497 if (flags)
498 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
499
500 return printed;
501 }
502
503 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
504
505 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
506 struct syscall_arg *arg)
507 {
508 int printed = 0, flags = arg->val;
509
510 #define P_MREMAP_FLAG(n) \
511 if (flags & MREMAP_##n) { \
512 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
513 flags &= ~MREMAP_##n; \
514 }
515
516 P_MREMAP_FLAG(MAYMOVE);
517 #ifdef MREMAP_FIXED
518 P_MREMAP_FLAG(FIXED);
519 #endif
520 #undef P_MREMAP_FLAG
521
522 if (flags)
523 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
524
525 return printed;
526 }
527
528 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
529
530 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
531 struct syscall_arg *arg)
532 {
533 int behavior = arg->val;
534
535 switch (behavior) {
536 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
537 P_MADV_BHV(NORMAL);
538 P_MADV_BHV(RANDOM);
539 P_MADV_BHV(SEQUENTIAL);
540 P_MADV_BHV(WILLNEED);
541 P_MADV_BHV(DONTNEED);
542 P_MADV_BHV(REMOVE);
543 P_MADV_BHV(DONTFORK);
544 P_MADV_BHV(DOFORK);
545 P_MADV_BHV(HWPOISON);
546 #ifdef MADV_SOFT_OFFLINE
547 P_MADV_BHV(SOFT_OFFLINE);
548 #endif
549 P_MADV_BHV(MERGEABLE);
550 P_MADV_BHV(UNMERGEABLE);
551 #ifdef MADV_HUGEPAGE
552 P_MADV_BHV(HUGEPAGE);
553 #endif
554 #ifdef MADV_NOHUGEPAGE
555 P_MADV_BHV(NOHUGEPAGE);
556 #endif
557 #ifdef MADV_DONTDUMP
558 P_MADV_BHV(DONTDUMP);
559 #endif
560 #ifdef MADV_DODUMP
561 P_MADV_BHV(DODUMP);
562 #endif
563 #undef P_MADV_PHV
564 default: break;
565 }
566
567 return scnprintf(bf, size, "%#x", behavior);
568 }
569
570 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
571
572 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
573 struct syscall_arg *arg)
574 {
575 int printed = 0, op = arg->val;
576
577 if (op == 0)
578 return scnprintf(bf, size, "NONE");
579 #define P_CMD(cmd) \
580 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
581 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
582 op &= ~LOCK_##cmd; \
583 }
584
585 P_CMD(SH);
586 P_CMD(EX);
587 P_CMD(NB);
588 P_CMD(UN);
589 P_CMD(MAND);
590 P_CMD(RW);
591 P_CMD(READ);
592 P_CMD(WRITE);
593 #undef P_OP
594
595 if (op)
596 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
597
598 return printed;
599 }
600
601 #define SCA_FLOCK syscall_arg__scnprintf_flock
602
603 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
604 {
605 enum syscall_futex_args {
606 SCF_UADDR = (1 << 0),
607 SCF_OP = (1 << 1),
608 SCF_VAL = (1 << 2),
609 SCF_TIMEOUT = (1 << 3),
610 SCF_UADDR2 = (1 << 4),
611 SCF_VAL3 = (1 << 5),
612 };
613 int op = arg->val;
614 int cmd = op & FUTEX_CMD_MASK;
615 size_t printed = 0;
616
617 switch (cmd) {
618 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
619 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
620 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
621 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
622 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
623 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
624 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
625 P_FUTEX_OP(WAKE_OP); break;
626 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
627 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
628 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
629 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
630 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
631 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
632 default: printed = scnprintf(bf, size, "%#x", cmd); break;
633 }
634
635 if (op & FUTEX_PRIVATE_FLAG)
636 printed += scnprintf(bf + printed, size - printed, "|PRIV");
637
638 if (op & FUTEX_CLOCK_REALTIME)
639 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
640
641 return printed;
642 }
643
644 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
645
646 static const char *bpf_cmd[] = {
647 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
648 "MAP_GET_NEXT_KEY", "PROG_LOAD",
649 };
650 static DEFINE_STRARRAY(bpf_cmd);
651
652 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
653 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
654
655 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
656 static DEFINE_STRARRAY(itimers);
657
658 static const char *keyctl_options[] = {
659 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
660 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
661 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
662 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
663 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
664 };
665 static DEFINE_STRARRAY(keyctl_options);
666
667 static const char *whences[] = { "SET", "CUR", "END",
668 #ifdef SEEK_DATA
669 "DATA",
670 #endif
671 #ifdef SEEK_HOLE
672 "HOLE",
673 #endif
674 };
675 static DEFINE_STRARRAY(whences);
676
677 static const char *fcntl_cmds[] = {
678 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
679 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
680 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
681 "F_GETOWNER_UIDS",
682 };
683 static DEFINE_STRARRAY(fcntl_cmds);
684
685 static const char *rlimit_resources[] = {
686 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
687 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
688 "RTTIME",
689 };
690 static DEFINE_STRARRAY(rlimit_resources);
691
692 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
693 static DEFINE_STRARRAY(sighow);
694
695 static const char *clockid[] = {
696 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
697 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
698 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
699 };
700 static DEFINE_STRARRAY(clockid);
701
702 static const char *socket_families[] = {
703 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
704 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
705 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
706 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
707 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
708 "ALG", "NFC", "VSOCK",
709 };
710 static DEFINE_STRARRAY(socket_families);
711
712 #ifndef SOCK_TYPE_MASK
713 #define SOCK_TYPE_MASK 0xf
714 #endif
715
716 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
717 struct syscall_arg *arg)
718 {
719 size_t printed;
720 int type = arg->val,
721 flags = type & ~SOCK_TYPE_MASK;
722
723 type &= SOCK_TYPE_MASK;
724 /*
725 * Can't use a strarray, MIPS may override for ABI reasons.
726 */
727 switch (type) {
728 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
729 P_SK_TYPE(STREAM);
730 P_SK_TYPE(DGRAM);
731 P_SK_TYPE(RAW);
732 P_SK_TYPE(RDM);
733 P_SK_TYPE(SEQPACKET);
734 P_SK_TYPE(DCCP);
735 P_SK_TYPE(PACKET);
736 #undef P_SK_TYPE
737 default:
738 printed = scnprintf(bf, size, "%#x", type);
739 }
740
741 #define P_SK_FLAG(n) \
742 if (flags & SOCK_##n) { \
743 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
744 flags &= ~SOCK_##n; \
745 }
746
747 P_SK_FLAG(CLOEXEC);
748 P_SK_FLAG(NONBLOCK);
749 #undef P_SK_FLAG
750
751 if (flags)
752 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
753
754 return printed;
755 }
756
757 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
758
759 #ifndef MSG_PROBE
760 #define MSG_PROBE 0x10
761 #endif
762 #ifndef MSG_WAITFORONE
763 #define MSG_WAITFORONE 0x10000
764 #endif
765 #ifndef MSG_SENDPAGE_NOTLAST
766 #define MSG_SENDPAGE_NOTLAST 0x20000
767 #endif
768 #ifndef MSG_FASTOPEN
769 #define MSG_FASTOPEN 0x20000000
770 #endif
771
772 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
773 struct syscall_arg *arg)
774 {
775 int printed = 0, flags = arg->val;
776
777 if (flags == 0)
778 return scnprintf(bf, size, "NONE");
779 #define P_MSG_FLAG(n) \
780 if (flags & MSG_##n) { \
781 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
782 flags &= ~MSG_##n; \
783 }
784
785 P_MSG_FLAG(OOB);
786 P_MSG_FLAG(PEEK);
787 P_MSG_FLAG(DONTROUTE);
788 P_MSG_FLAG(TRYHARD);
789 P_MSG_FLAG(CTRUNC);
790 P_MSG_FLAG(PROBE);
791 P_MSG_FLAG(TRUNC);
792 P_MSG_FLAG(DONTWAIT);
793 P_MSG_FLAG(EOR);
794 P_MSG_FLAG(WAITALL);
795 P_MSG_FLAG(FIN);
796 P_MSG_FLAG(SYN);
797 P_MSG_FLAG(CONFIRM);
798 P_MSG_FLAG(RST);
799 P_MSG_FLAG(ERRQUEUE);
800 P_MSG_FLAG(NOSIGNAL);
801 P_MSG_FLAG(MORE);
802 P_MSG_FLAG(WAITFORONE);
803 P_MSG_FLAG(SENDPAGE_NOTLAST);
804 P_MSG_FLAG(FASTOPEN);
805 P_MSG_FLAG(CMSG_CLOEXEC);
806 #undef P_MSG_FLAG
807
808 if (flags)
809 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
810
811 return printed;
812 }
813
814 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
815
816 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
817 struct syscall_arg *arg)
818 {
819 size_t printed = 0;
820 int mode = arg->val;
821
822 if (mode == F_OK) /* 0 */
823 return scnprintf(bf, size, "F");
824 #define P_MODE(n) \
825 if (mode & n##_OK) { \
826 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
827 mode &= ~n##_OK; \
828 }
829
830 P_MODE(R);
831 P_MODE(W);
832 P_MODE(X);
833 #undef P_MODE
834
835 if (mode)
836 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
837
838 return printed;
839 }
840
841 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
842
843 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
844 struct syscall_arg *arg);
845
846 #define SCA_FILENAME syscall_arg__scnprintf_filename
847
848 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
849 struct syscall_arg *arg)
850 {
851 int printed = 0, flags = arg->val;
852
853 if (!(flags & O_CREAT))
854 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
855
856 if (flags == 0)
857 return scnprintf(bf, size, "RDONLY");
858 #define P_FLAG(n) \
859 if (flags & O_##n) { \
860 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
861 flags &= ~O_##n; \
862 }
863
864 P_FLAG(APPEND);
865 P_FLAG(ASYNC);
866 P_FLAG(CLOEXEC);
867 P_FLAG(CREAT);
868 P_FLAG(DIRECT);
869 P_FLAG(DIRECTORY);
870 P_FLAG(EXCL);
871 P_FLAG(LARGEFILE);
872 P_FLAG(NOATIME);
873 P_FLAG(NOCTTY);
874 #ifdef O_NONBLOCK
875 P_FLAG(NONBLOCK);
876 #elif O_NDELAY
877 P_FLAG(NDELAY);
878 #endif
879 #ifdef O_PATH
880 P_FLAG(PATH);
881 #endif
882 P_FLAG(RDWR);
883 #ifdef O_DSYNC
884 if ((flags & O_SYNC) == O_SYNC)
885 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
886 else {
887 P_FLAG(DSYNC);
888 }
889 #else
890 P_FLAG(SYNC);
891 #endif
892 P_FLAG(TRUNC);
893 P_FLAG(WRONLY);
894 #undef P_FLAG
895
896 if (flags)
897 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
898
899 return printed;
900 }
901
902 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
903
904 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
905 struct syscall_arg *arg)
906 {
907 int printed = 0, flags = arg->val;
908
909 if (flags == 0)
910 return 0;
911
912 #define P_FLAG(n) \
913 if (flags & PERF_FLAG_##n) { \
914 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
915 flags &= ~PERF_FLAG_##n; \
916 }
917
918 P_FLAG(FD_NO_GROUP);
919 P_FLAG(FD_OUTPUT);
920 P_FLAG(PID_CGROUP);
921 P_FLAG(FD_CLOEXEC);
922 #undef P_FLAG
923
924 if (flags)
925 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
926
927 return printed;
928 }
929
930 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
931
932 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
933 struct syscall_arg *arg)
934 {
935 int printed = 0, flags = arg->val;
936
937 if (flags == 0)
938 return scnprintf(bf, size, "NONE");
939 #define P_FLAG(n) \
940 if (flags & EFD_##n) { \
941 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
942 flags &= ~EFD_##n; \
943 }
944
945 P_FLAG(SEMAPHORE);
946 P_FLAG(CLOEXEC);
947 P_FLAG(NONBLOCK);
948 #undef P_FLAG
949
950 if (flags)
951 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
952
953 return printed;
954 }
955
956 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
957
958 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
959 struct syscall_arg *arg)
960 {
961 int printed = 0, flags = arg->val;
962
963 #define P_FLAG(n) \
964 if (flags & O_##n) { \
965 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
966 flags &= ~O_##n; \
967 }
968
969 P_FLAG(CLOEXEC);
970 P_FLAG(NONBLOCK);
971 #undef P_FLAG
972
973 if (flags)
974 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
975
976 return printed;
977 }
978
979 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
980
981 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
982 {
983 int sig = arg->val;
984
985 switch (sig) {
986 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
987 P_SIGNUM(HUP);
988 P_SIGNUM(INT);
989 P_SIGNUM(QUIT);
990 P_SIGNUM(ILL);
991 P_SIGNUM(TRAP);
992 P_SIGNUM(ABRT);
993 P_SIGNUM(BUS);
994 P_SIGNUM(FPE);
995 P_SIGNUM(KILL);
996 P_SIGNUM(USR1);
997 P_SIGNUM(SEGV);
998 P_SIGNUM(USR2);
999 P_SIGNUM(PIPE);
1000 P_SIGNUM(ALRM);
1001 P_SIGNUM(TERM);
1002 P_SIGNUM(CHLD);
1003 P_SIGNUM(CONT);
1004 P_SIGNUM(STOP);
1005 P_SIGNUM(TSTP);
1006 P_SIGNUM(TTIN);
1007 P_SIGNUM(TTOU);
1008 P_SIGNUM(URG);
1009 P_SIGNUM(XCPU);
1010 P_SIGNUM(XFSZ);
1011 P_SIGNUM(VTALRM);
1012 P_SIGNUM(PROF);
1013 P_SIGNUM(WINCH);
1014 P_SIGNUM(IO);
1015 P_SIGNUM(PWR);
1016 P_SIGNUM(SYS);
1017 #ifdef SIGEMT
1018 P_SIGNUM(EMT);
1019 #endif
1020 #ifdef SIGSTKFLT
1021 P_SIGNUM(STKFLT);
1022 #endif
1023 #ifdef SIGSWI
1024 P_SIGNUM(SWI);
1025 #endif
1026 default: break;
1027 }
1028
1029 return scnprintf(bf, size, "%#x", sig);
1030 }
1031
1032 #define SCA_SIGNUM syscall_arg__scnprintf_signum
1033
1034 #if defined(__i386__) || defined(__x86_64__)
1035 /*
1036 * FIXME: Make this available to all arches.
1037 */
1038 #define TCGETS 0x5401
1039
1040 static const char *tioctls[] = {
1041 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
1042 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
1043 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
1044 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
1045 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
1046 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
1047 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
1048 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
1049 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
1050 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
1051 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
1052 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
1053 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
1054 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
1055 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
1056 };
1057
1058 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1059 #endif /* defined(__i386__) || defined(__x86_64__) */
1060
1061 static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
1062 {
1063 int op = arg->val;
1064 size_t printed = 0;
1065
1066 switch (op) {
1067 #define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
1068 P_SECCOMP_SET_MODE_OP(STRICT);
1069 P_SECCOMP_SET_MODE_OP(FILTER);
1070 #undef P_SECCOMP_SET_MODE_OP
1071 default: printed = scnprintf(bf, size, "%#x", op); break;
1072 }
1073
1074 return printed;
1075 }
1076
1077 #define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op
1078
1079 static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
1080 struct syscall_arg *arg)
1081 {
1082 int printed = 0, flags = arg->val;
1083
1084 #define P_FLAG(n) \
1085 if (flags & SECCOMP_FILTER_FLAG_##n) { \
1086 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1087 flags &= ~SECCOMP_FILTER_FLAG_##n; \
1088 }
1089
1090 P_FLAG(TSYNC);
1091 #undef P_FLAG
1092
1093 if (flags)
1094 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
1095
1096 return printed;
1097 }
1098
1099 #define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
1100
1101 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
1102 struct syscall_arg *arg)
1103 {
1104 int printed = 0, flags = arg->val;
1105
1106 #define P_FLAG(n) \
1107 if (flags & GRND_##n) { \
1108 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1109 flags &= ~GRND_##n; \
1110 }
1111
1112 P_FLAG(RANDOM);
1113 P_FLAG(NONBLOCK);
1114 #undef P_FLAG
1115
1116 if (flags)
1117 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
1118
1119 return printed;
1120 }
1121
1122 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
1123
1124 #define STRARRAY(arg, name, array) \
1125 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1126 .arg_parm = { [arg] = &strarray__##array, }
1127
1128 #include "trace/beauty/pid.c"
1129 #include "trace/beauty/mode_t.c"
1130 #include "trace/beauty/sched_policy.c"
1131 #include "trace/beauty/waitid_options.c"
1132
1133 static struct syscall_fmt {
1134 const char *name;
1135 const char *alias;
1136 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1137 void *arg_parm[6];
1138 bool errmsg;
1139 bool errpid;
1140 bool timeout;
1141 bool hexret;
1142 } syscall_fmts[] = {
1143 { .name = "access", .errmsg = true,
1144 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1145 [1] = SCA_ACCMODE, /* mode */ }, },
1146 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
1147 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1148 { .name = "brk", .hexret = true,
1149 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1150 { .name = "chdir", .errmsg = true,
1151 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1152 { .name = "chmod", .errmsg = true,
1153 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1154 { .name = "chroot", .errmsg = true,
1155 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1156 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
1157 { .name = "clone", .errpid = true, },
1158 { .name = "close", .errmsg = true,
1159 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1160 { .name = "connect", .errmsg = true, },
1161 { .name = "creat", .errmsg = true,
1162 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1163 { .name = "dup", .errmsg = true,
1164 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1165 { .name = "dup2", .errmsg = true,
1166 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1167 { .name = "dup3", .errmsg = true,
1168 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1169 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1170 { .name = "eventfd2", .errmsg = true,
1171 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1172 { .name = "faccessat", .errmsg = true,
1173 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1174 [1] = SCA_FILENAME, /* filename */ }, },
1175 { .name = "fadvise64", .errmsg = true,
1176 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1177 { .name = "fallocate", .errmsg = true,
1178 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1179 { .name = "fchdir", .errmsg = true,
1180 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1181 { .name = "fchmod", .errmsg = true,
1182 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1183 { .name = "fchmodat", .errmsg = true,
1184 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1185 [1] = SCA_FILENAME, /* filename */ }, },
1186 { .name = "fchown", .errmsg = true,
1187 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1188 { .name = "fchownat", .errmsg = true,
1189 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1190 [1] = SCA_FILENAME, /* filename */ }, },
1191 { .name = "fcntl", .errmsg = true,
1192 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1193 [1] = SCA_STRARRAY, /* cmd */ },
1194 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1195 { .name = "fdatasync", .errmsg = true,
1196 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1197 { .name = "flock", .errmsg = true,
1198 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1199 [1] = SCA_FLOCK, /* cmd */ }, },
1200 { .name = "fsetxattr", .errmsg = true,
1201 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1202 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1203 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1204 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1205 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1206 [1] = SCA_FILENAME, /* filename */ }, },
1207 { .name = "fstatfs", .errmsg = true,
1208 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1209 { .name = "fsync", .errmsg = true,
1210 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1211 { .name = "ftruncate", .errmsg = true,
1212 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1213 { .name = "futex", .errmsg = true,
1214 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1215 { .name = "futimesat", .errmsg = true,
1216 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1217 [1] = SCA_FILENAME, /* filename */ }, },
1218 { .name = "getdents", .errmsg = true,
1219 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1220 { .name = "getdents64", .errmsg = true,
1221 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1222 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1223 { .name = "getpid", .errpid = true, },
1224 { .name = "getpgid", .errpid = true, },
1225 { .name = "getppid", .errpid = true, },
1226 { .name = "getrandom", .errmsg = true,
1227 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
1228 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1229 { .name = "getxattr", .errmsg = true,
1230 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1231 { .name = "inotify_add_watch", .errmsg = true,
1232 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1233 { .name = "ioctl", .errmsg = true,
1234 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1235 #if defined(__i386__) || defined(__x86_64__)
1236 /*
1237 * FIXME: Make this available to all arches.
1238 */
1239 [1] = SCA_STRHEXARRAY, /* cmd */
1240 [2] = SCA_HEX, /* arg */ },
1241 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1242 #else
1243 [2] = SCA_HEX, /* arg */ }, },
1244 #endif
1245 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
1246 { .name = "kill", .errmsg = true,
1247 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1248 { .name = "lchown", .errmsg = true,
1249 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1250 { .name = "lgetxattr", .errmsg = true,
1251 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1252 { .name = "linkat", .errmsg = true,
1253 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1254 { .name = "listxattr", .errmsg = true,
1255 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1256 { .name = "llistxattr", .errmsg = true,
1257 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1258 { .name = "lremovexattr", .errmsg = true,
1259 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1260 { .name = "lseek", .errmsg = true,
1261 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1262 [2] = SCA_STRARRAY, /* whence */ },
1263 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1264 { .name = "lsetxattr", .errmsg = true,
1265 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1266 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1267 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1268 { .name = "lsxattr", .errmsg = true,
1269 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1270 { .name = "madvise", .errmsg = true,
1271 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1272 [2] = SCA_MADV_BHV, /* behavior */ }, },
1273 { .name = "mkdir", .errmsg = true,
1274 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1275 { .name = "mkdirat", .errmsg = true,
1276 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1277 [1] = SCA_FILENAME, /* pathname */ }, },
1278 { .name = "mknod", .errmsg = true,
1279 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1280 { .name = "mknodat", .errmsg = true,
1281 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1282 [1] = SCA_FILENAME, /* filename */ }, },
1283 { .name = "mlock", .errmsg = true,
1284 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1285 { .name = "mlockall", .errmsg = true,
1286 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1287 { .name = "mmap", .hexret = true,
1288 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1289 [2] = SCA_MMAP_PROT, /* prot */
1290 [3] = SCA_MMAP_FLAGS, /* flags */
1291 [4] = SCA_FD, /* fd */ }, },
1292 { .name = "mprotect", .errmsg = true,
1293 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1294 [2] = SCA_MMAP_PROT, /* prot */ }, },
1295 { .name = "mq_unlink", .errmsg = true,
1296 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1297 { .name = "mremap", .hexret = true,
1298 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1299 [3] = SCA_MREMAP_FLAGS, /* flags */
1300 [4] = SCA_HEX, /* new_addr */ }, },
1301 { .name = "munlock", .errmsg = true,
1302 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1303 { .name = "munmap", .errmsg = true,
1304 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1305 { .name = "name_to_handle_at", .errmsg = true,
1306 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1307 { .name = "newfstatat", .errmsg = true,
1308 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1309 [1] = SCA_FILENAME, /* filename */ }, },
1310 { .name = "open", .errmsg = true,
1311 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1312 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1313 { .name = "open_by_handle_at", .errmsg = true,
1314 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1315 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1316 { .name = "openat", .errmsg = true,
1317 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1318 [1] = SCA_FILENAME, /* filename */
1319 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1320 { .name = "perf_event_open", .errmsg = true,
1321 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1322 [2] = SCA_INT, /* cpu */
1323 [3] = SCA_FD, /* group_fd */
1324 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1325 { .name = "pipe2", .errmsg = true,
1326 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1327 { .name = "poll", .errmsg = true, .timeout = true, },
1328 { .name = "ppoll", .errmsg = true, .timeout = true, },
1329 { .name = "pread", .errmsg = true, .alias = "pread64",
1330 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1331 { .name = "preadv", .errmsg = true, .alias = "pread",
1332 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1333 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1334 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1335 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1336 { .name = "pwritev", .errmsg = true,
1337 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1338 { .name = "read", .errmsg = true,
1339 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1340 { .name = "readlink", .errmsg = true,
1341 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1342 { .name = "readlinkat", .errmsg = true,
1343 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1344 [1] = SCA_FILENAME, /* pathname */ }, },
1345 { .name = "readv", .errmsg = true,
1346 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1347 { .name = "recvfrom", .errmsg = true,
1348 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1349 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1350 { .name = "recvmmsg", .errmsg = true,
1351 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1352 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1353 { .name = "recvmsg", .errmsg = true,
1354 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1355 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1356 { .name = "removexattr", .errmsg = true,
1357 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1358 { .name = "renameat", .errmsg = true,
1359 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1360 { .name = "rmdir", .errmsg = true,
1361 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1362 { .name = "rt_sigaction", .errmsg = true,
1363 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1364 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1365 { .name = "rt_sigqueueinfo", .errmsg = true,
1366 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1367 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1368 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1369 { .name = "sched_setscheduler", .errmsg = true,
1370 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
1371 { .name = "seccomp", .errmsg = true,
1372 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
1373 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
1374 { .name = "select", .errmsg = true, .timeout = true, },
1375 { .name = "sendmmsg", .errmsg = true,
1376 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1377 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1378 { .name = "sendmsg", .errmsg = true,
1379 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1380 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1381 { .name = "sendto", .errmsg = true,
1382 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1383 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1384 { .name = "set_tid_address", .errpid = true, },
1385 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1386 { .name = "setpgid", .errmsg = true, },
1387 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1388 { .name = "setxattr", .errmsg = true,
1389 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1390 { .name = "shutdown", .errmsg = true,
1391 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1392 { .name = "socket", .errmsg = true,
1393 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1394 [1] = SCA_SK_TYPE, /* type */ },
1395 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1396 { .name = "socketpair", .errmsg = true,
1397 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1398 [1] = SCA_SK_TYPE, /* type */ },
1399 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1400 { .name = "stat", .errmsg = true, .alias = "newstat",
1401 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1402 { .name = "statfs", .errmsg = true,
1403 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1404 { .name = "swapoff", .errmsg = true,
1405 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1406 { .name = "swapon", .errmsg = true,
1407 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1408 { .name = "symlinkat", .errmsg = true,
1409 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1410 { .name = "tgkill", .errmsg = true,
1411 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1412 { .name = "tkill", .errmsg = true,
1413 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1414 { .name = "truncate", .errmsg = true,
1415 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1416 { .name = "uname", .errmsg = true, .alias = "newuname", },
1417 { .name = "unlinkat", .errmsg = true,
1418 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1419 [1] = SCA_FILENAME, /* pathname */ }, },
1420 { .name = "utime", .errmsg = true,
1421 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1422 { .name = "utimensat", .errmsg = true,
1423 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1424 [1] = SCA_FILENAME, /* filename */ }, },
1425 { .name = "utimes", .errmsg = true,
1426 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1427 { .name = "vmsplice", .errmsg = true,
1428 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1429 { .name = "wait4", .errpid = true,
1430 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
1431 { .name = "waitid", .errpid = true,
1432 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
1433 { .name = "write", .errmsg = true,
1434 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1435 { .name = "writev", .errmsg = true,
1436 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1437 };
1438
1439 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1440 {
1441 const struct syscall_fmt *fmt = fmtp;
1442 return strcmp(name, fmt->name);
1443 }
1444
1445 static struct syscall_fmt *syscall_fmt__find(const char *name)
1446 {
1447 const int nmemb = ARRAY_SIZE(syscall_fmts);
1448 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1449 }
1450
1451 struct syscall {
1452 struct event_format *tp_format;
1453 int nr_args;
1454 struct format_field *args;
1455 const char *name;
1456 bool is_exit;
1457 struct syscall_fmt *fmt;
1458 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1459 void **arg_parm;
1460 };
1461
1462 static size_t fprintf_duration(unsigned long t, FILE *fp)
1463 {
1464 double duration = (double)t / NSEC_PER_MSEC;
1465 size_t printed = fprintf(fp, "(");
1466
1467 if (duration >= 1.0)
1468 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1469 else if (duration >= 0.01)
1470 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1471 else
1472 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1473 return printed + fprintf(fp, "): ");
1474 }
1475
1476 /**
1477 * filename.ptr: The filename char pointer that will be vfs_getname'd
1478 * filename.entry_str_pos: Where to insert the string translated from
1479 * filename.ptr by the vfs_getname tracepoint/kprobe.
1480 */
1481 struct thread_trace {
1482 u64 entry_time;
1483 u64 exit_time;
1484 bool entry_pending;
1485 unsigned long nr_events;
1486 unsigned long pfmaj, pfmin;
1487 char *entry_str;
1488 double runtime_ms;
1489 struct {
1490 unsigned long ptr;
1491 short int entry_str_pos;
1492 bool pending_open;
1493 unsigned int namelen;
1494 char *name;
1495 } filename;
1496 struct {
1497 int max;
1498 char **table;
1499 } paths;
1500
1501 struct intlist *syscall_stats;
1502 };
1503
1504 static struct thread_trace *thread_trace__new(void)
1505 {
1506 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1507
1508 if (ttrace)
1509 ttrace->paths.max = -1;
1510
1511 ttrace->syscall_stats = intlist__new(NULL);
1512
1513 return ttrace;
1514 }
1515
1516 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1517 {
1518 struct thread_trace *ttrace;
1519
1520 if (thread == NULL)
1521 goto fail;
1522
1523 if (thread__priv(thread) == NULL)
1524 thread__set_priv(thread, thread_trace__new());
1525
1526 if (thread__priv(thread) == NULL)
1527 goto fail;
1528
1529 ttrace = thread__priv(thread);
1530 ++ttrace->nr_events;
1531
1532 return ttrace;
1533 fail:
1534 color_fprintf(fp, PERF_COLOR_RED,
1535 "WARNING: not enough memory, dropping samples!\n");
1536 return NULL;
1537 }
1538
1539 #define TRACE_PFMAJ (1 << 0)
1540 #define TRACE_PFMIN (1 << 1)
1541
1542 static const size_t trace__entry_str_size = 2048;
1543
1544 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1545 {
1546 struct thread_trace *ttrace = thread__priv(thread);
1547
1548 if (fd > ttrace->paths.max) {
1549 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1550
1551 if (npath == NULL)
1552 return -1;
1553
1554 if (ttrace->paths.max != -1) {
1555 memset(npath + ttrace->paths.max + 1, 0,
1556 (fd - ttrace->paths.max) * sizeof(char *));
1557 } else {
1558 memset(npath, 0, (fd + 1) * sizeof(char *));
1559 }
1560
1561 ttrace->paths.table = npath;
1562 ttrace->paths.max = fd;
1563 }
1564
1565 ttrace->paths.table[fd] = strdup(pathname);
1566
1567 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1568 }
1569
1570 static int thread__read_fd_path(struct thread *thread, int fd)
1571 {
1572 char linkname[PATH_MAX], pathname[PATH_MAX];
1573 struct stat st;
1574 int ret;
1575
1576 if (thread->pid_ == thread->tid) {
1577 scnprintf(linkname, sizeof(linkname),
1578 "/proc/%d/fd/%d", thread->pid_, fd);
1579 } else {
1580 scnprintf(linkname, sizeof(linkname),
1581 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1582 }
1583
1584 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1585 return -1;
1586
1587 ret = readlink(linkname, pathname, sizeof(pathname));
1588
1589 if (ret < 0 || ret > st.st_size)
1590 return -1;
1591
1592 pathname[ret] = '\0';
1593 return trace__set_fd_pathname(thread, fd, pathname);
1594 }
1595
1596 static const char *thread__fd_path(struct thread *thread, int fd,
1597 struct trace *trace)
1598 {
1599 struct thread_trace *ttrace = thread__priv(thread);
1600
1601 if (ttrace == NULL)
1602 return NULL;
1603
1604 if (fd < 0)
1605 return NULL;
1606
1607 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1608 if (!trace->live)
1609 return NULL;
1610 ++trace->stats.proc_getname;
1611 if (thread__read_fd_path(thread, fd))
1612 return NULL;
1613 }
1614
1615 return ttrace->paths.table[fd];
1616 }
1617
1618 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1619 struct syscall_arg *arg)
1620 {
1621 int fd = arg->val;
1622 size_t printed = scnprintf(bf, size, "%d", fd);
1623 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1624
1625 if (path)
1626 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1627
1628 return printed;
1629 }
1630
1631 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1632 struct syscall_arg *arg)
1633 {
1634 int fd = arg->val;
1635 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1636 struct thread_trace *ttrace = thread__priv(arg->thread);
1637
1638 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1639 zfree(&ttrace->paths.table[fd]);
1640
1641 return printed;
1642 }
1643
1644 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1645 unsigned long ptr)
1646 {
1647 struct thread_trace *ttrace = thread__priv(thread);
1648
1649 ttrace->filename.ptr = ptr;
1650 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1651 }
1652
1653 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1654 struct syscall_arg *arg)
1655 {
1656 unsigned long ptr = arg->val;
1657
1658 if (!arg->trace->vfs_getname)
1659 return scnprintf(bf, size, "%#x", ptr);
1660
1661 thread__set_filename_pos(arg->thread, bf, ptr);
1662 return 0;
1663 }
1664
1665 static bool trace__filter_duration(struct trace *trace, double t)
1666 {
1667 return t < (trace->duration_filter * NSEC_PER_MSEC);
1668 }
1669
1670 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1671 {
1672 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1673
1674 return fprintf(fp, "%10.3f ", ts);
1675 }
1676
1677 static bool done = false;
1678 static bool interrupted = false;
1679
1680 static void sig_handler(int sig)
1681 {
1682 done = true;
1683 interrupted = sig == SIGINT;
1684 }
1685
1686 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1687 u64 duration, u64 tstamp, FILE *fp)
1688 {
1689 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1690 printed += fprintf_duration(duration, fp);
1691
1692 if (trace->multiple_threads) {
1693 if (trace->show_comm)
1694 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1695 printed += fprintf(fp, "%d ", thread->tid);
1696 }
1697
1698 return printed;
1699 }
1700
1701 static int trace__process_event(struct trace *trace, struct machine *machine,
1702 union perf_event *event, struct perf_sample *sample)
1703 {
1704 int ret = 0;
1705
1706 switch (event->header.type) {
1707 case PERF_RECORD_LOST:
1708 color_fprintf(trace->output, PERF_COLOR_RED,
1709 "LOST %" PRIu64 " events!\n", event->lost.lost);
1710 ret = machine__process_lost_event(machine, event, sample);
1711 break;
1712 default:
1713 ret = machine__process_event(machine, event, sample);
1714 break;
1715 }
1716
1717 return ret;
1718 }
1719
1720 static int trace__tool_process(struct perf_tool *tool,
1721 union perf_event *event,
1722 struct perf_sample *sample,
1723 struct machine *machine)
1724 {
1725 struct trace *trace = container_of(tool, struct trace, tool);
1726 return trace__process_event(trace, machine, event, sample);
1727 }
1728
1729 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1730 {
1731 int err = symbol__init(NULL);
1732
1733 if (err)
1734 return err;
1735
1736 trace->host = machine__new_host();
1737 if (trace->host == NULL)
1738 return -ENOMEM;
1739
1740 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1741 return -errno;
1742
1743 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1744 evlist->threads, trace__tool_process, false,
1745 trace->opts.proc_map_timeout);
1746 if (err)
1747 symbol__exit();
1748
1749 return err;
1750 }
1751
1752 static int syscall__set_arg_fmts(struct syscall *sc)
1753 {
1754 struct format_field *field;
1755 int idx = 0;
1756
1757 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1758 if (sc->arg_scnprintf == NULL)
1759 return -1;
1760
1761 if (sc->fmt)
1762 sc->arg_parm = sc->fmt->arg_parm;
1763
1764 for (field = sc->args; field; field = field->next) {
1765 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1766 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1767 else if (field->flags & FIELD_IS_POINTER)
1768 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1769 else if (strcmp(field->type, "pid_t") == 0)
1770 sc->arg_scnprintf[idx] = SCA_PID;
1771 else if (strcmp(field->type, "umode_t") == 0)
1772 sc->arg_scnprintf[idx] = SCA_MODE_T;
1773 ++idx;
1774 }
1775
1776 return 0;
1777 }
1778
1779 static int trace__read_syscall_info(struct trace *trace, int id)
1780 {
1781 char tp_name[128];
1782 struct syscall *sc;
1783 const char *name = syscalltbl__name(trace->sctbl, id);
1784
1785 if (name == NULL)
1786 return -1;
1787
1788 if (id > trace->syscalls.max) {
1789 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1790
1791 if (nsyscalls == NULL)
1792 return -1;
1793
1794 if (trace->syscalls.max != -1) {
1795 memset(nsyscalls + trace->syscalls.max + 1, 0,
1796 (id - trace->syscalls.max) * sizeof(*sc));
1797 } else {
1798 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1799 }
1800
1801 trace->syscalls.table = nsyscalls;
1802 trace->syscalls.max = id;
1803 }
1804
1805 sc = trace->syscalls.table + id;
1806 sc->name = name;
1807
1808 sc->fmt = syscall_fmt__find(sc->name);
1809
1810 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1811 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1812
1813 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1814 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1815 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1816 }
1817
1818 if (IS_ERR(sc->tp_format))
1819 return -1;
1820
1821 sc->args = sc->tp_format->format.fields;
1822 sc->nr_args = sc->tp_format->format.nr_fields;
1823 /*
1824 * We need to check and discard the first variable '__syscall_nr'
1825 * or 'nr' that mean the syscall number. It is needless here.
1826 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1827 */
1828 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1829 sc->args = sc->args->next;
1830 --sc->nr_args;
1831 }
1832
1833 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1834
1835 return syscall__set_arg_fmts(sc);
1836 }
1837
1838 static int trace__validate_ev_qualifier(struct trace *trace)
1839 {
1840 int err = 0, i;
1841 struct str_node *pos;
1842
1843 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1844 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1845 sizeof(trace->ev_qualifier_ids.entries[0]));
1846
1847 if (trace->ev_qualifier_ids.entries == NULL) {
1848 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1849 trace->output);
1850 err = -EINVAL;
1851 goto out;
1852 }
1853
1854 i = 0;
1855
1856 strlist__for_each(pos, trace->ev_qualifier) {
1857 const char *sc = pos->s;
1858 int id = syscalltbl__id(trace->sctbl, sc);
1859
1860 if (id < 0) {
1861 if (err == 0) {
1862 fputs("Error:\tInvalid syscall ", trace->output);
1863 err = -EINVAL;
1864 } else {
1865 fputs(", ", trace->output);
1866 }
1867
1868 fputs(sc, trace->output);
1869 }
1870
1871 trace->ev_qualifier_ids.entries[i++] = id;
1872 }
1873
1874 if (err < 0) {
1875 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1876 "\nHint:\tand: 'man syscalls'\n", trace->output);
1877 zfree(&trace->ev_qualifier_ids.entries);
1878 trace->ev_qualifier_ids.nr = 0;
1879 }
1880 out:
1881 return err;
1882 }
1883
1884 /*
1885 * args is to be interpreted as a series of longs but we need to handle
1886 * 8-byte unaligned accesses. args points to raw_data within the event
1887 * and raw_data is guaranteed to be 8-byte unaligned because it is
1888 * preceded by raw_size which is a u32. So we need to copy args to a temp
1889 * variable to read it. Most notably this avoids extended load instructions
1890 * on unaligned addresses
1891 */
1892
1893 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1894 unsigned char *args, struct trace *trace,
1895 struct thread *thread)
1896 {
1897 size_t printed = 0;
1898 unsigned char *p;
1899 unsigned long val;
1900
1901 if (sc->args != NULL) {
1902 struct format_field *field;
1903 u8 bit = 1;
1904 struct syscall_arg arg = {
1905 .idx = 0,
1906 .mask = 0,
1907 .trace = trace,
1908 .thread = thread,
1909 };
1910
1911 for (field = sc->args; field;
1912 field = field->next, ++arg.idx, bit <<= 1) {
1913 if (arg.mask & bit)
1914 continue;
1915
1916 /* special care for unaligned accesses */
1917 p = args + sizeof(unsigned long) * arg.idx;
1918 memcpy(&val, p, sizeof(val));
1919
1920 /*
1921 * Suppress this argument if its value is zero and
1922 * and we don't have a string associated in an
1923 * strarray for it.
1924 */
1925 if (val == 0 &&
1926 !(sc->arg_scnprintf &&
1927 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1928 sc->arg_parm[arg.idx]))
1929 continue;
1930
1931 printed += scnprintf(bf + printed, size - printed,
1932 "%s%s: ", printed ? ", " : "", field->name);
1933 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1934 arg.val = val;
1935 if (sc->arg_parm)
1936 arg.parm = sc->arg_parm[arg.idx];
1937 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1938 size - printed, &arg);
1939 } else {
1940 printed += scnprintf(bf + printed, size - printed,
1941 "%ld", val);
1942 }
1943 }
1944 } else {
1945 int i = 0;
1946
1947 while (i < 6) {
1948 /* special care for unaligned accesses */
1949 p = args + sizeof(unsigned long) * i;
1950 memcpy(&val, p, sizeof(val));
1951 printed += scnprintf(bf + printed, size - printed,
1952 "%sarg%d: %ld",
1953 printed ? ", " : "", i, val);
1954 ++i;
1955 }
1956 }
1957
1958 return printed;
1959 }
1960
1961 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1962 union perf_event *event,
1963 struct perf_sample *sample);
1964
1965 static struct syscall *trace__syscall_info(struct trace *trace,
1966 struct perf_evsel *evsel, int id)
1967 {
1968
1969 if (id < 0) {
1970
1971 /*
1972 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1973 * before that, leaving at a higher verbosity level till that is
1974 * explained. Reproduced with plain ftrace with:
1975 *
1976 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1977 * grep "NR -1 " /t/trace_pipe
1978 *
1979 * After generating some load on the machine.
1980 */
1981 if (verbose > 1) {
1982 static u64 n;
1983 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1984 id, perf_evsel__name(evsel), ++n);
1985 }
1986 return NULL;
1987 }
1988
1989 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1990 trace__read_syscall_info(trace, id))
1991 goto out_cant_read;
1992
1993 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1994 goto out_cant_read;
1995
1996 return &trace->syscalls.table[id];
1997
1998 out_cant_read:
1999 if (verbose) {
2000 fprintf(trace->output, "Problems reading syscall %d", id);
2001 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
2002 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
2003 fputs(" information\n", trace->output);
2004 }
2005 return NULL;
2006 }
2007
2008 static void thread__update_stats(struct thread_trace *ttrace,
2009 int id, struct perf_sample *sample)
2010 {
2011 struct int_node *inode;
2012 struct stats *stats;
2013 u64 duration = 0;
2014
2015 inode = intlist__findnew(ttrace->syscall_stats, id);
2016 if (inode == NULL)
2017 return;
2018
2019 stats = inode->priv;
2020 if (stats == NULL) {
2021 stats = malloc(sizeof(struct stats));
2022 if (stats == NULL)
2023 return;
2024 init_stats(stats);
2025 inode->priv = stats;
2026 }
2027
2028 if (ttrace->entry_time && sample->time > ttrace->entry_time)
2029 duration = sample->time - ttrace->entry_time;
2030
2031 update_stats(stats, duration);
2032 }
2033
2034 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
2035 {
2036 struct thread_trace *ttrace;
2037 u64 duration;
2038 size_t printed;
2039
2040 if (trace->current == NULL)
2041 return 0;
2042
2043 ttrace = thread__priv(trace->current);
2044
2045 if (!ttrace->entry_pending)
2046 return 0;
2047
2048 duration = sample->time - ttrace->entry_time;
2049
2050 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
2051 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
2052 ttrace->entry_pending = false;
2053
2054 return printed;
2055 }
2056
2057 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
2058 union perf_event *event __maybe_unused,
2059 struct perf_sample *sample)
2060 {
2061 char *msg;
2062 void *args;
2063 size_t printed = 0;
2064 struct thread *thread;
2065 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2066 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2067 struct thread_trace *ttrace;
2068
2069 if (sc == NULL)
2070 return -1;
2071
2072 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2073 ttrace = thread__trace(thread, trace->output);
2074 if (ttrace == NULL)
2075 goto out_put;
2076
2077 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2078
2079 if (ttrace->entry_str == NULL) {
2080 ttrace->entry_str = malloc(trace__entry_str_size);
2081 if (!ttrace->entry_str)
2082 goto out_put;
2083 }
2084
2085 if (!trace->summary_only)
2086 trace__printf_interrupted_entry(trace, sample);
2087
2088 ttrace->entry_time = sample->time;
2089 msg = ttrace->entry_str;
2090 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
2091
2092 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
2093 args, trace, thread);
2094
2095 if (sc->is_exit) {
2096 if (!trace->duration_filter && !trace->summary_only) {
2097 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
2098 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
2099 }
2100 } else {
2101 ttrace->entry_pending = true;
2102 /* See trace__vfs_getname & trace__sys_exit */
2103 ttrace->filename.pending_open = false;
2104 }
2105
2106 if (trace->current != thread) {
2107 thread__put(trace->current);
2108 trace->current = thread__get(thread);
2109 }
2110 err = 0;
2111 out_put:
2112 thread__put(thread);
2113 return err;
2114 }
2115
2116 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2117 union perf_event *event __maybe_unused,
2118 struct perf_sample *sample)
2119 {
2120 long ret;
2121 u64 duration = 0;
2122 struct thread *thread;
2123 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2124 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2125 struct thread_trace *ttrace;
2126
2127 if (sc == NULL)
2128 return -1;
2129
2130 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2131 ttrace = thread__trace(thread, trace->output);
2132 if (ttrace == NULL)
2133 goto out_put;
2134
2135 if (trace->summary)
2136 thread__update_stats(ttrace, id, sample);
2137
2138 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2139
2140 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
2141 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2142 ttrace->filename.pending_open = false;
2143 ++trace->stats.vfs_getname;
2144 }
2145
2146 ttrace->exit_time = sample->time;
2147
2148 if (ttrace->entry_time) {
2149 duration = sample->time - ttrace->entry_time;
2150 if (trace__filter_duration(trace, duration))
2151 goto out;
2152 } else if (trace->duration_filter)
2153 goto out;
2154
2155 if (trace->summary_only)
2156 goto out;
2157
2158 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2159
2160 if (ttrace->entry_pending) {
2161 fprintf(trace->output, "%-70s", ttrace->entry_str);
2162 } else {
2163 fprintf(trace->output, " ... [");
2164 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2165 fprintf(trace->output, "]: %s()", sc->name);
2166 }
2167
2168 if (sc->fmt == NULL) {
2169 signed_print:
2170 fprintf(trace->output, ") = %ld", ret);
2171 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
2172 char bf[STRERR_BUFSIZE];
2173 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2174 *e = audit_errno_to_name(-ret);
2175
2176 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2177 } else if (ret == 0 && sc->fmt->timeout)
2178 fprintf(trace->output, ") = 0 Timeout");
2179 else if (sc->fmt->hexret)
2180 fprintf(trace->output, ") = %#lx", ret);
2181 else if (sc->fmt->errpid) {
2182 struct thread *child = machine__find_thread(trace->host, ret, ret);
2183
2184 if (child != NULL) {
2185 fprintf(trace->output, ") = %ld", ret);
2186 if (child->comm_set)
2187 fprintf(trace->output, " (%s)", thread__comm_str(child));
2188 thread__put(child);
2189 }
2190 } else
2191 goto signed_print;
2192
2193 fputc('\n', trace->output);
2194
2195 if (sample->callchain) {
2196 struct addr_location al;
2197 /* TODO: user-configurable print_opts */
2198 const unsigned int print_opts = PRINT_IP_OPT_SYM
2199 | PRINT_IP_OPT_DSO;
2200
2201 if (machine__resolve(trace->host, &al, sample) < 0) {
2202 pr_err("problem processing %d event, skipping it.\n",
2203 event->header.type);
2204 goto out_put;
2205 }
2206 perf_evsel__fprintf_callchain(evsel, sample, &al, 38, print_opts,
2207 scripting_max_stack, trace->output);
2208 }
2209 out:
2210 ttrace->entry_pending = false;
2211 err = 0;
2212 out_put:
2213 thread__put(thread);
2214 return err;
2215 }
2216
2217 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2218 union perf_event *event __maybe_unused,
2219 struct perf_sample *sample)
2220 {
2221 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2222 struct thread_trace *ttrace;
2223 size_t filename_len, entry_str_len, to_move;
2224 ssize_t remaining_space;
2225 char *pos;
2226 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2227
2228 if (!thread)
2229 goto out;
2230
2231 ttrace = thread__priv(thread);
2232 if (!ttrace)
2233 goto out;
2234
2235 filename_len = strlen(filename);
2236
2237 if (ttrace->filename.namelen < filename_len) {
2238 char *f = realloc(ttrace->filename.name, filename_len + 1);
2239
2240 if (f == NULL)
2241 goto out;
2242
2243 ttrace->filename.namelen = filename_len;
2244 ttrace->filename.name = f;
2245 }
2246
2247 strcpy(ttrace->filename.name, filename);
2248 ttrace->filename.pending_open = true;
2249
2250 if (!ttrace->filename.ptr)
2251 goto out;
2252
2253 entry_str_len = strlen(ttrace->entry_str);
2254 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2255 if (remaining_space <= 0)
2256 goto out;
2257
2258 if (filename_len > (size_t)remaining_space) {
2259 filename += filename_len - remaining_space;
2260 filename_len = remaining_space;
2261 }
2262
2263 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2264 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2265 memmove(pos + filename_len, pos, to_move);
2266 memcpy(pos, filename, filename_len);
2267
2268 ttrace->filename.ptr = 0;
2269 ttrace->filename.entry_str_pos = 0;
2270 out:
2271 return 0;
2272 }
2273
2274 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2275 union perf_event *event __maybe_unused,
2276 struct perf_sample *sample)
2277 {
2278 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2279 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2280 struct thread *thread = machine__findnew_thread(trace->host,
2281 sample->pid,
2282 sample->tid);
2283 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2284
2285 if (ttrace == NULL)
2286 goto out_dump;
2287
2288 ttrace->runtime_ms += runtime_ms;
2289 trace->runtime_ms += runtime_ms;
2290 thread__put(thread);
2291 return 0;
2292
2293 out_dump:
2294 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2295 evsel->name,
2296 perf_evsel__strval(evsel, sample, "comm"),
2297 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2298 runtime,
2299 perf_evsel__intval(evsel, sample, "vruntime"));
2300 thread__put(thread);
2301 return 0;
2302 }
2303
2304 static void bpf_output__printer(enum binary_printer_ops op,
2305 unsigned int val, void *extra)
2306 {
2307 FILE *output = extra;
2308 unsigned char ch = (unsigned char)val;
2309
2310 switch (op) {
2311 case BINARY_PRINT_CHAR_DATA:
2312 fprintf(output, "%c", isprint(ch) ? ch : '.');
2313 break;
2314 case BINARY_PRINT_DATA_BEGIN:
2315 case BINARY_PRINT_LINE_BEGIN:
2316 case BINARY_PRINT_ADDR:
2317 case BINARY_PRINT_NUM_DATA:
2318 case BINARY_PRINT_NUM_PAD:
2319 case BINARY_PRINT_SEP:
2320 case BINARY_PRINT_CHAR_PAD:
2321 case BINARY_PRINT_LINE_END:
2322 case BINARY_PRINT_DATA_END:
2323 default:
2324 break;
2325 }
2326 }
2327
2328 static void bpf_output__fprintf(struct trace *trace,
2329 struct perf_sample *sample)
2330 {
2331 print_binary(sample->raw_data, sample->raw_size, 8,
2332 bpf_output__printer, trace->output);
2333 }
2334
2335 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2336 union perf_event *event __maybe_unused,
2337 struct perf_sample *sample)
2338 {
2339 trace__printf_interrupted_entry(trace, sample);
2340 trace__fprintf_tstamp(trace, sample->time, trace->output);
2341
2342 if (trace->trace_syscalls)
2343 fprintf(trace->output, "( ): ");
2344
2345 fprintf(trace->output, "%s:", evsel->name);
2346
2347 if (perf_evsel__is_bpf_output(evsel)) {
2348 bpf_output__fprintf(trace, sample);
2349 } else if (evsel->tp_format) {
2350 event_format__fprintf(evsel->tp_format, sample->cpu,
2351 sample->raw_data, sample->raw_size,
2352 trace->output);
2353 }
2354
2355 fprintf(trace->output, ")\n");
2356 return 0;
2357 }
2358
2359 static void print_location(FILE *f, struct perf_sample *sample,
2360 struct addr_location *al,
2361 bool print_dso, bool print_sym)
2362 {
2363
2364 if ((verbose || print_dso) && al->map)
2365 fprintf(f, "%s@", al->map->dso->long_name);
2366
2367 if ((verbose || print_sym) && al->sym)
2368 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2369 al->addr - al->sym->start);
2370 else if (al->map)
2371 fprintf(f, "0x%" PRIx64, al->addr);
2372 else
2373 fprintf(f, "0x%" PRIx64, sample->addr);
2374 }
2375
2376 static int trace__pgfault(struct trace *trace,
2377 struct perf_evsel *evsel,
2378 union perf_event *event __maybe_unused,
2379 struct perf_sample *sample)
2380 {
2381 struct thread *thread;
2382 struct addr_location al;
2383 char map_type = 'd';
2384 struct thread_trace *ttrace;
2385 int err = -1;
2386
2387 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2388 ttrace = thread__trace(thread, trace->output);
2389 if (ttrace == NULL)
2390 goto out_put;
2391
2392 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2393 ttrace->pfmaj++;
2394 else
2395 ttrace->pfmin++;
2396
2397 if (trace->summary_only)
2398 goto out;
2399
2400 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
2401 sample->ip, &al);
2402
2403 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2404
2405 fprintf(trace->output, "%sfault [",
2406 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2407 "maj" : "min");
2408
2409 print_location(trace->output, sample, &al, false, true);
2410
2411 fprintf(trace->output, "] => ");
2412
2413 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
2414 sample->addr, &al);
2415
2416 if (!al.map) {
2417 thread__find_addr_location(thread, sample->cpumode,
2418 MAP__FUNCTION, sample->addr, &al);
2419
2420 if (al.map)
2421 map_type = 'x';
2422 else
2423 map_type = '?';
2424 }
2425
2426 print_location(trace->output, sample, &al, true, false);
2427
2428 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2429 out:
2430 err = 0;
2431 out_put:
2432 thread__put(thread);
2433 return err;
2434 }
2435
2436 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2437 {
2438 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2439 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2440 return false;
2441
2442 if (trace->pid_list || trace->tid_list)
2443 return true;
2444
2445 return false;
2446 }
2447
2448 static void trace__set_base_time(struct trace *trace,
2449 struct perf_evsel *evsel,
2450 struct perf_sample *sample)
2451 {
2452 /*
2453 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2454 * and don't use sample->time unconditionally, we may end up having
2455 * some other event in the future without PERF_SAMPLE_TIME for good
2456 * reason, i.e. we may not be interested in its timestamps, just in
2457 * it taking place, picking some piece of information when it
2458 * appears in our event stream (vfs_getname comes to mind).
2459 */
2460 if (trace->base_time == 0 && !trace->full_time &&
2461 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
2462 trace->base_time = sample->time;
2463 }
2464
2465 static int trace__process_sample(struct perf_tool *tool,
2466 union perf_event *event,
2467 struct perf_sample *sample,
2468 struct perf_evsel *evsel,
2469 struct machine *machine __maybe_unused)
2470 {
2471 struct trace *trace = container_of(tool, struct trace, tool);
2472 int err = 0;
2473
2474 tracepoint_handler handler = evsel->handler;
2475
2476 if (skip_sample(trace, sample))
2477 return 0;
2478
2479 trace__set_base_time(trace, evsel, sample);
2480
2481 if (handler) {
2482 ++trace->nr_events;
2483 handler(trace, evsel, event, sample);
2484 }
2485
2486 return err;
2487 }
2488
2489 static int parse_target_str(struct trace *trace)
2490 {
2491 if (trace->opts.target.pid) {
2492 trace->pid_list = intlist__new(trace->opts.target.pid);
2493 if (trace->pid_list == NULL) {
2494 pr_err("Error parsing process id string\n");
2495 return -EINVAL;
2496 }
2497 }
2498
2499 if (trace->opts.target.tid) {
2500 trace->tid_list = intlist__new(trace->opts.target.tid);
2501 if (trace->tid_list == NULL) {
2502 pr_err("Error parsing thread id string\n");
2503 return -EINVAL;
2504 }
2505 }
2506
2507 return 0;
2508 }
2509
2510 static int trace__record(struct trace *trace, int argc, const char **argv)
2511 {
2512 unsigned int rec_argc, i, j;
2513 const char **rec_argv;
2514 const char * const record_args[] = {
2515 "record",
2516 "-R",
2517 "-m", "1024",
2518 "-c", "1",
2519 };
2520
2521 const char * const sc_args[] = { "-e", };
2522 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2523 const char * const majpf_args[] = { "-e", "major-faults" };
2524 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2525 const char * const minpf_args[] = { "-e", "minor-faults" };
2526 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2527
2528 /* +1 is for the event string below */
2529 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2530 majpf_args_nr + minpf_args_nr + argc;
2531 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2532
2533 if (rec_argv == NULL)
2534 return -ENOMEM;
2535
2536 j = 0;
2537 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2538 rec_argv[j++] = record_args[i];
2539
2540 if (trace->trace_syscalls) {
2541 for (i = 0; i < sc_args_nr; i++)
2542 rec_argv[j++] = sc_args[i];
2543
2544 /* event string may be different for older kernels - e.g., RHEL6 */
2545 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2546 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2547 else if (is_valid_tracepoint("syscalls:sys_enter"))
2548 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2549 else {
2550 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2551 return -1;
2552 }
2553 }
2554
2555 if (trace->trace_pgfaults & TRACE_PFMAJ)
2556 for (i = 0; i < majpf_args_nr; i++)
2557 rec_argv[j++] = majpf_args[i];
2558
2559 if (trace->trace_pgfaults & TRACE_PFMIN)
2560 for (i = 0; i < minpf_args_nr; i++)
2561 rec_argv[j++] = minpf_args[i];
2562
2563 for (i = 0; i < (unsigned int)argc; i++)
2564 rec_argv[j++] = argv[i];
2565
2566 return cmd_record(j, rec_argv, NULL);
2567 }
2568
2569 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2570
2571 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2572 {
2573 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2574
2575 if (IS_ERR(evsel))
2576 return false;
2577
2578 if (perf_evsel__field(evsel, "pathname") == NULL) {
2579 perf_evsel__delete(evsel);
2580 return false;
2581 }
2582
2583 evsel->handler = trace__vfs_getname;
2584 perf_evlist__add(evlist, evsel);
2585 return true;
2586 }
2587
2588 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2589 u64 config)
2590 {
2591 struct perf_evsel *evsel;
2592 struct perf_event_attr attr = {
2593 .type = PERF_TYPE_SOFTWARE,
2594 .mmap_data = 1,
2595 };
2596
2597 attr.config = config;
2598 attr.sample_period = 1;
2599
2600 event_attr_init(&attr);
2601
2602 evsel = perf_evsel__new(&attr);
2603 if (!evsel)
2604 return -ENOMEM;
2605
2606 evsel->handler = trace__pgfault;
2607 perf_evlist__add(evlist, evsel);
2608
2609 return 0;
2610 }
2611
2612 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2613 {
2614 const u32 type = event->header.type;
2615 struct perf_evsel *evsel;
2616
2617 if (type != PERF_RECORD_SAMPLE) {
2618 trace__process_event(trace, trace->host, event, sample);
2619 return;
2620 }
2621
2622 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2623 if (evsel == NULL) {
2624 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2625 return;
2626 }
2627
2628 trace__set_base_time(trace, evsel, sample);
2629
2630 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2631 sample->raw_data == NULL) {
2632 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2633 perf_evsel__name(evsel), sample->tid,
2634 sample->cpu, sample->raw_size);
2635 } else {
2636 tracepoint_handler handler = evsel->handler;
2637 handler(trace, evsel, event, sample);
2638 }
2639 }
2640
2641 static int trace__add_syscall_newtp(struct trace *trace)
2642 {
2643 int ret = -1;
2644 struct perf_evlist *evlist = trace->evlist;
2645 struct perf_evsel *sys_enter, *sys_exit;
2646
2647 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2648 if (sys_enter == NULL)
2649 goto out;
2650
2651 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2652 goto out_delete_sys_enter;
2653
2654 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2655 if (sys_exit == NULL)
2656 goto out_delete_sys_enter;
2657
2658 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2659 goto out_delete_sys_exit;
2660
2661 perf_evlist__add(evlist, sys_enter);
2662 perf_evlist__add(evlist, sys_exit);
2663
2664 trace->syscalls.events.sys_enter = sys_enter;
2665 trace->syscalls.events.sys_exit = sys_exit;
2666
2667 ret = 0;
2668 out:
2669 return ret;
2670
2671 out_delete_sys_exit:
2672 perf_evsel__delete_priv(sys_exit);
2673 out_delete_sys_enter:
2674 perf_evsel__delete_priv(sys_enter);
2675 goto out;
2676 }
2677
2678 static int trace__set_ev_qualifier_filter(struct trace *trace)
2679 {
2680 int err = -1;
2681 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2682 trace->ev_qualifier_ids.nr,
2683 trace->ev_qualifier_ids.entries);
2684
2685 if (filter == NULL)
2686 goto out_enomem;
2687
2688 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2689 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2690
2691 free(filter);
2692 out:
2693 return err;
2694 out_enomem:
2695 errno = ENOMEM;
2696 goto out;
2697 }
2698
2699 static int trace__run(struct trace *trace, int argc, const char **argv)
2700 {
2701 struct perf_evlist *evlist = trace->evlist;
2702 struct perf_evsel *evsel;
2703 int err = -1, i;
2704 unsigned long before;
2705 const bool forks = argc > 0;
2706 bool draining = false;
2707
2708 trace->live = true;
2709
2710 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2711 goto out_error_raw_syscalls;
2712
2713 if (trace->trace_syscalls)
2714 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2715
2716 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2717 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2718 goto out_error_mem;
2719 }
2720
2721 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2722 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2723 goto out_error_mem;
2724
2725 if (trace->sched &&
2726 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2727 trace__sched_stat_runtime))
2728 goto out_error_sched_stat_runtime;
2729
2730 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2731 if (err < 0) {
2732 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2733 goto out_delete_evlist;
2734 }
2735
2736 err = trace__symbols_init(trace, evlist);
2737 if (err < 0) {
2738 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2739 goto out_delete_evlist;
2740 }
2741
2742 perf_evlist__config(evlist, &trace->opts);
2743
2744 signal(SIGCHLD, sig_handler);
2745 signal(SIGINT, sig_handler);
2746
2747 if (forks) {
2748 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2749 argv, false, NULL);
2750 if (err < 0) {
2751 fprintf(trace->output, "Couldn't run the workload!\n");
2752 goto out_delete_evlist;
2753 }
2754 }
2755
2756 err = perf_evlist__open(evlist);
2757 if (err < 0)
2758 goto out_error_open;
2759
2760 err = bpf__apply_obj_config();
2761 if (err) {
2762 char errbuf[BUFSIZ];
2763
2764 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2765 pr_err("ERROR: Apply config to BPF failed: %s\n",
2766 errbuf);
2767 goto out_error_open;
2768 }
2769
2770 /*
2771 * Better not use !target__has_task() here because we need to cover the
2772 * case where no threads were specified in the command line, but a
2773 * workload was, and in that case we will fill in the thread_map when
2774 * we fork the workload in perf_evlist__prepare_workload.
2775 */
2776 if (trace->filter_pids.nr > 0)
2777 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2778 else if (thread_map__pid(evlist->threads, 0) == -1)
2779 err = perf_evlist__set_filter_pid(evlist, getpid());
2780
2781 if (err < 0)
2782 goto out_error_mem;
2783
2784 if (trace->ev_qualifier_ids.nr > 0) {
2785 err = trace__set_ev_qualifier_filter(trace);
2786 if (err < 0)
2787 goto out_errno;
2788
2789 pr_debug("event qualifier tracepoint filter: %s\n",
2790 trace->syscalls.events.sys_exit->filter);
2791 }
2792
2793 err = perf_evlist__apply_filters(evlist, &evsel);
2794 if (err < 0)
2795 goto out_error_apply_filters;
2796
2797 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2798 if (err < 0)
2799 goto out_error_mmap;
2800
2801 if (!target__none(&trace->opts.target))
2802 perf_evlist__enable(evlist);
2803
2804 if (forks)
2805 perf_evlist__start_workload(evlist);
2806
2807 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2808 evlist->threads->nr > 1 ||
2809 perf_evlist__first(evlist)->attr.inherit;
2810 again:
2811 before = trace->nr_events;
2812
2813 for (i = 0; i < evlist->nr_mmaps; i++) {
2814 union perf_event *event;
2815
2816 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2817 struct perf_sample sample;
2818
2819 ++trace->nr_events;
2820
2821 err = perf_evlist__parse_sample(evlist, event, &sample);
2822 if (err) {
2823 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2824 goto next_event;
2825 }
2826
2827 trace__handle_event(trace, event, &sample);
2828 next_event:
2829 perf_evlist__mmap_consume(evlist, i);
2830
2831 if (interrupted)
2832 goto out_disable;
2833
2834 if (done && !draining) {
2835 perf_evlist__disable(evlist);
2836 draining = true;
2837 }
2838 }
2839 }
2840
2841 if (trace->nr_events == before) {
2842 int timeout = done ? 100 : -1;
2843
2844 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2845 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2846 draining = true;
2847
2848 goto again;
2849 }
2850 } else {
2851 goto again;
2852 }
2853
2854 out_disable:
2855 thread__zput(trace->current);
2856
2857 perf_evlist__disable(evlist);
2858
2859 if (!err) {
2860 if (trace->summary)
2861 trace__fprintf_thread_summary(trace, trace->output);
2862
2863 if (trace->show_tool_stats) {
2864 fprintf(trace->output, "Stats:\n "
2865 " vfs_getname : %" PRIu64 "\n"
2866 " proc_getname: %" PRIu64 "\n",
2867 trace->stats.vfs_getname,
2868 trace->stats.proc_getname);
2869 }
2870 }
2871
2872 out_delete_evlist:
2873 perf_evlist__delete(evlist);
2874 trace->evlist = NULL;
2875 trace->live = false;
2876 return err;
2877 {
2878 char errbuf[BUFSIZ];
2879
2880 out_error_sched_stat_runtime:
2881 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2882 goto out_error;
2883
2884 out_error_raw_syscalls:
2885 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2886 goto out_error;
2887
2888 out_error_mmap:
2889 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2890 goto out_error;
2891
2892 out_error_open:
2893 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2894
2895 out_error:
2896 fprintf(trace->output, "%s\n", errbuf);
2897 goto out_delete_evlist;
2898
2899 out_error_apply_filters:
2900 fprintf(trace->output,
2901 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2902 evsel->filter, perf_evsel__name(evsel), errno,
2903 strerror_r(errno, errbuf, sizeof(errbuf)));
2904 goto out_delete_evlist;
2905 }
2906 out_error_mem:
2907 fprintf(trace->output, "Not enough memory to run!\n");
2908 goto out_delete_evlist;
2909
2910 out_errno:
2911 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2912 goto out_delete_evlist;
2913 }
2914
2915 static int trace__replay(struct trace *trace)
2916 {
2917 const struct perf_evsel_str_handler handlers[] = {
2918 { "probe:vfs_getname", trace__vfs_getname, },
2919 };
2920 struct perf_data_file file = {
2921 .path = input_name,
2922 .mode = PERF_DATA_MODE_READ,
2923 .force = trace->force,
2924 };
2925 struct perf_session *session;
2926 struct perf_evsel *evsel;
2927 int err = -1;
2928
2929 trace->tool.sample = trace__process_sample;
2930 trace->tool.mmap = perf_event__process_mmap;
2931 trace->tool.mmap2 = perf_event__process_mmap2;
2932 trace->tool.comm = perf_event__process_comm;
2933 trace->tool.exit = perf_event__process_exit;
2934 trace->tool.fork = perf_event__process_fork;
2935 trace->tool.attr = perf_event__process_attr;
2936 trace->tool.tracing_data = perf_event__process_tracing_data;
2937 trace->tool.build_id = perf_event__process_build_id;
2938
2939 trace->tool.ordered_events = true;
2940 trace->tool.ordering_requires_timestamps = true;
2941
2942 /* add tid to output */
2943 trace->multiple_threads = true;
2944
2945 session = perf_session__new(&file, false, &trace->tool);
2946 if (session == NULL)
2947 return -1;
2948
2949 if (symbol__init(&session->header.env) < 0)
2950 goto out;
2951
2952 trace->host = &session->machines.host;
2953
2954 err = perf_session__set_tracepoints_handlers(session, handlers);
2955 if (err)
2956 goto out;
2957
2958 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2959 "raw_syscalls:sys_enter");
2960 /* older kernels have syscalls tp versus raw_syscalls */
2961 if (evsel == NULL)
2962 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2963 "syscalls:sys_enter");
2964
2965 if (evsel &&
2966 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2967 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2968 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2969 goto out;
2970 }
2971
2972 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2973 "raw_syscalls:sys_exit");
2974 if (evsel == NULL)
2975 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2976 "syscalls:sys_exit");
2977 if (evsel &&
2978 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2979 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2980 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2981 goto out;
2982 }
2983
2984 evlist__for_each(session->evlist, evsel) {
2985 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2986 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2987 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2988 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2989 evsel->handler = trace__pgfault;
2990 }
2991
2992 err = parse_target_str(trace);
2993 if (err != 0)
2994 goto out;
2995
2996 setup_pager();
2997
2998 err = perf_session__process_events(session);
2999 if (err)
3000 pr_err("Failed to process events, error %d", err);
3001
3002 else if (trace->summary)
3003 trace__fprintf_thread_summary(trace, trace->output);
3004
3005 out:
3006 perf_session__delete(session);
3007
3008 return err;
3009 }
3010
3011 static size_t trace__fprintf_threads_header(FILE *fp)
3012 {
3013 size_t printed;
3014
3015 printed = fprintf(fp, "\n Summary of events:\n\n");
3016
3017 return printed;
3018 }
3019
3020 static size_t thread__dump_stats(struct thread_trace *ttrace,
3021 struct trace *trace, FILE *fp)
3022 {
3023 struct stats *stats;
3024 size_t printed = 0;
3025 struct syscall *sc;
3026 struct int_node *inode = intlist__first(ttrace->syscall_stats);
3027
3028 if (inode == NULL)
3029 return 0;
3030
3031 printed += fprintf(fp, "\n");
3032
3033 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
3034 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
3035 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
3036
3037 /* each int_node is a syscall */
3038 while (inode) {
3039 stats = inode->priv;
3040 if (stats) {
3041 double min = (double)(stats->min) / NSEC_PER_MSEC;
3042 double max = (double)(stats->max) / NSEC_PER_MSEC;
3043 double avg = avg_stats(stats);
3044 double pct;
3045 u64 n = (u64) stats->n;
3046
3047 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
3048 avg /= NSEC_PER_MSEC;
3049
3050 sc = &trace->syscalls.table[inode->i];
3051 printed += fprintf(fp, " %-15s", sc->name);
3052 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
3053 n, avg * n, min, avg);
3054 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
3055 }
3056
3057 inode = intlist__next(inode);
3058 }
3059
3060 printed += fprintf(fp, "\n\n");
3061
3062 return printed;
3063 }
3064
3065 /* struct used to pass data to per-thread function */
3066 struct summary_data {
3067 FILE *fp;
3068 struct trace *trace;
3069 size_t printed;
3070 };
3071
3072 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
3073 {
3074 struct summary_data *data = priv;
3075 FILE *fp = data->fp;
3076 size_t printed = data->printed;
3077 struct trace *trace = data->trace;
3078 struct thread_trace *ttrace = thread__priv(thread);
3079 double ratio;
3080
3081 if (ttrace == NULL)
3082 return 0;
3083
3084 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
3085
3086 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
3087 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
3088 printed += fprintf(fp, "%.1f%%", ratio);
3089 if (ttrace->pfmaj)
3090 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
3091 if (ttrace->pfmin)
3092 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
3093 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
3094 printed += thread__dump_stats(ttrace, trace, fp);
3095
3096 data->printed += printed;
3097
3098 return 0;
3099 }
3100
3101 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
3102 {
3103 struct summary_data data = {
3104 .fp = fp,
3105 .trace = trace
3106 };
3107 data.printed = trace__fprintf_threads_header(fp);
3108
3109 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
3110
3111 return data.printed;
3112 }
3113
3114 static int trace__set_duration(const struct option *opt, const char *str,
3115 int unset __maybe_unused)
3116 {
3117 struct trace *trace = opt->value;
3118
3119 trace->duration_filter = atof(str);
3120 return 0;
3121 }
3122
3123 static int trace__set_filter_pids(const struct option *opt, const char *str,
3124 int unset __maybe_unused)
3125 {
3126 int ret = -1;
3127 size_t i;
3128 struct trace *trace = opt->value;
3129 /*
3130 * FIXME: introduce a intarray class, plain parse csv and create a
3131 * { int nr, int entries[] } struct...
3132 */
3133 struct intlist *list = intlist__new(str);
3134
3135 if (list == NULL)
3136 return -1;
3137
3138 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3139 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3140
3141 if (trace->filter_pids.entries == NULL)
3142 goto out;
3143
3144 trace->filter_pids.entries[0] = getpid();
3145
3146 for (i = 1; i < trace->filter_pids.nr; ++i)
3147 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3148
3149 intlist__delete(list);
3150 ret = 0;
3151 out:
3152 return ret;
3153 }
3154
3155 static int trace__open_output(struct trace *trace, const char *filename)
3156 {
3157 struct stat st;
3158
3159 if (!stat(filename, &st) && st.st_size) {
3160 char oldname[PATH_MAX];
3161
3162 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3163 unlink(oldname);
3164 rename(filename, oldname);
3165 }
3166
3167 trace->output = fopen(filename, "w");
3168
3169 return trace->output == NULL ? -errno : 0;
3170 }
3171
3172 static int parse_pagefaults(const struct option *opt, const char *str,
3173 int unset __maybe_unused)
3174 {
3175 int *trace_pgfaults = opt->value;
3176
3177 if (strcmp(str, "all") == 0)
3178 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3179 else if (strcmp(str, "maj") == 0)
3180 *trace_pgfaults |= TRACE_PFMAJ;
3181 else if (strcmp(str, "min") == 0)
3182 *trace_pgfaults |= TRACE_PFMIN;
3183 else
3184 return -1;
3185
3186 return 0;
3187 }
3188
3189 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3190 {
3191 struct perf_evsel *evsel;
3192
3193 evlist__for_each(evlist, evsel)
3194 evsel->handler = handler;
3195 }
3196
3197 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3198 {
3199 const char *trace_usage[] = {
3200 "perf trace [<options>] [<command>]",
3201 "perf trace [<options>] -- <command> [<options>]",
3202 "perf trace record [<options>] [<command>]",
3203 "perf trace record [<options>] -- <command> [<options>]",
3204 NULL
3205 };
3206 struct trace trace = {
3207 .syscalls = {
3208 . max = -1,
3209 },
3210 .opts = {
3211 .target = {
3212 .uid = UINT_MAX,
3213 .uses_mmap = true,
3214 },
3215 .user_freq = UINT_MAX,
3216 .user_interval = ULLONG_MAX,
3217 .no_buffering = true,
3218 .mmap_pages = UINT_MAX,
3219 .proc_map_timeout = 500,
3220 },
3221 .output = stderr,
3222 .show_comm = true,
3223 .trace_syscalls = true,
3224 };
3225 const char *output_name = NULL;
3226 const char *ev_qualifier_str = NULL;
3227 const struct option trace_options[] = {
3228 OPT_CALLBACK(0, "event", &trace.evlist, "event",
3229 "event selector. use 'perf list' to list available events",
3230 parse_events_option),
3231 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3232 "show the thread COMM next to its id"),
3233 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3234 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3235 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3236 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3237 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3238 "trace events on existing process id"),
3239 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3240 "trace events on existing thread id"),
3241 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3242 "pids to filter (by the kernel)", trace__set_filter_pids),
3243 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3244 "system-wide collection from all CPUs"),
3245 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3246 "list of cpus to monitor"),
3247 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3248 "child tasks do not inherit counters"),
3249 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3250 "number of mmap data pages",
3251 perf_evlist__parse_mmap_pages),
3252 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3253 "user to profile"),
3254 OPT_CALLBACK(0, "duration", &trace, "float",
3255 "show only events with duration > N.M ms",
3256 trace__set_duration),
3257 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3258 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3259 OPT_BOOLEAN('T', "time", &trace.full_time,
3260 "Show full timestamp, not time relative to first start"),
3261 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3262 "Show only syscall summary with statistics"),
3263 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3264 "Show all syscalls and summary with statistics"),
3265 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3266 "Trace pagefaults", parse_pagefaults, "maj"),
3267 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3268 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3269 OPT_CALLBACK(0, "call-graph", &trace.opts,
3270 "record_mode[,record_size]", record_callchain_help,
3271 &record_parse_callchain_opt),
3272 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3273 "per thread proc mmap processing timeout in ms"),
3274 OPT_END()
3275 };
3276 const char * const trace_subcommands[] = { "record", NULL };
3277 int err;
3278 char bf[BUFSIZ];
3279
3280 signal(SIGSEGV, sighandler_dump_stack);
3281 signal(SIGFPE, sighandler_dump_stack);
3282
3283 trace.evlist = perf_evlist__new();
3284 trace.sctbl = syscalltbl__new();
3285
3286 if (trace.evlist == NULL || trace.sctbl == NULL) {
3287 pr_err("Not enough memory to run!\n");
3288 err = -ENOMEM;
3289 goto out;
3290 }
3291
3292 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3293 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3294
3295 err = bpf__setup_stdout(trace.evlist);
3296 if (err) {
3297 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3298 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3299 goto out;
3300 }
3301
3302 if (trace.trace_pgfaults) {
3303 trace.opts.sample_address = true;
3304 trace.opts.sample_time = true;
3305 }
3306
3307 if (trace.opts.callgraph_set)
3308 symbol_conf.use_callchain = true;
3309
3310 if (trace.evlist->nr_entries > 0)
3311 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3312
3313 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3314 return trace__record(&trace, argc-1, &argv[1]);
3315
3316 /* summary_only implies summary option, but don't overwrite summary if set */
3317 if (trace.summary_only)
3318 trace.summary = trace.summary_only;
3319
3320 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3321 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3322 pr_err("Please specify something to trace.\n");
3323 return -1;
3324 }
3325
3326 if (output_name != NULL) {
3327 err = trace__open_output(&trace, output_name);
3328 if (err < 0) {
3329 perror("failed to create output file");
3330 goto out;
3331 }
3332 }
3333
3334 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3335
3336 if (ev_qualifier_str != NULL) {
3337 const char *s = ev_qualifier_str;
3338 struct strlist_config slist_config = {
3339 .dirname = system_path(STRACE_GROUPS_DIR),
3340 };
3341
3342 trace.not_ev_qualifier = *s == '!';
3343 if (trace.not_ev_qualifier)
3344 ++s;
3345 trace.ev_qualifier = strlist__new(s, &slist_config);
3346 if (trace.ev_qualifier == NULL) {
3347 fputs("Not enough memory to parse event qualifier",
3348 trace.output);
3349 err = -ENOMEM;
3350 goto out_close;
3351 }
3352
3353 err = trace__validate_ev_qualifier(&trace);
3354 if (err)
3355 goto out_close;
3356 }
3357
3358 err = target__validate(&trace.opts.target);
3359 if (err) {
3360 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3361 fprintf(trace.output, "%s", bf);
3362 goto out_close;
3363 }
3364
3365 err = target__parse_uid(&trace.opts.target);
3366 if (err) {
3367 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3368 fprintf(trace.output, "%s", bf);
3369 goto out_close;
3370 }
3371
3372 if (!argc && target__none(&trace.opts.target))
3373 trace.opts.target.system_wide = true;
3374
3375 if (input_name)
3376 err = trace__replay(&trace);
3377 else
3378 err = trace__run(&trace, argc, argv);
3379
3380 out_close:
3381 if (output_name != NULL)
3382 fclose(trace.output);
3383 out:
3384 return err;
3385 }
This page took 0.155389 seconds and 4 git commands to generate.