perf trace: Use the fd->name beautifier as default for "fd" args
[deliverable/linux.git] / tools / perf / builtin-trace.c
1 /*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39 #include "rb_resort.h"
40
41 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
42 #include <stdlib.h>
43 #include <linux/err.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <sys/ptrace.h>
47 #include <linux/random.h>
48 #include <linux/stringify.h>
49
50 #ifndef O_CLOEXEC
51 # define O_CLOEXEC 02000000
52 #endif
53
54 struct trace {
55 struct perf_tool tool;
56 struct syscalltbl *sctbl;
57 struct {
58 int max;
59 struct syscall *table;
60 struct {
61 struct perf_evsel *sys_enter,
62 *sys_exit;
63 } events;
64 } syscalls;
65 struct record_opts opts;
66 struct perf_evlist *evlist;
67 struct machine *host;
68 struct thread *current;
69 u64 base_time;
70 FILE *output;
71 unsigned long nr_events;
72 struct strlist *ev_qualifier;
73 struct {
74 size_t nr;
75 int *entries;
76 } ev_qualifier_ids;
77 struct intlist *tid_list;
78 struct intlist *pid_list;
79 struct {
80 size_t nr;
81 pid_t *entries;
82 } filter_pids;
83 double duration_filter;
84 double runtime_ms;
85 struct {
86 u64 vfs_getname,
87 proc_getname;
88 } stats;
89 unsigned int max_stack;
90 unsigned int min_stack;
91 bool not_ev_qualifier;
92 bool live;
93 bool full_time;
94 bool sched;
95 bool multiple_threads;
96 bool summary;
97 bool summary_only;
98 bool show_comm;
99 bool show_tool_stats;
100 bool trace_syscalls;
101 bool kernel_syscallchains;
102 bool force;
103 bool vfs_getname;
104 int trace_pgfaults;
105 int open_id;
106 };
107
108 struct tp_field {
109 int offset;
110 union {
111 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
113 };
114 };
115
116 #define TP_UINT_FIELD(bits) \
117 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
118 { \
119 u##bits value; \
120 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121 return value; \
122 }
123
124 TP_UINT_FIELD(8);
125 TP_UINT_FIELD(16);
126 TP_UINT_FIELD(32);
127 TP_UINT_FIELD(64);
128
129 #define TP_UINT_FIELD__SWAPPED(bits) \
130 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
131 { \
132 u##bits value; \
133 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
134 return bswap_##bits(value);\
135 }
136
137 TP_UINT_FIELD__SWAPPED(16);
138 TP_UINT_FIELD__SWAPPED(32);
139 TP_UINT_FIELD__SWAPPED(64);
140
141 static int tp_field__init_uint(struct tp_field *field,
142 struct format_field *format_field,
143 bool needs_swap)
144 {
145 field->offset = format_field->offset;
146
147 switch (format_field->size) {
148 case 1:
149 field->integer = tp_field__u8;
150 break;
151 case 2:
152 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153 break;
154 case 4:
155 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156 break;
157 case 8:
158 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159 break;
160 default:
161 return -1;
162 }
163
164 return 0;
165 }
166
167 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
168 {
169 return sample->raw_data + field->offset;
170 }
171
172 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
173 {
174 field->offset = format_field->offset;
175 field->pointer = tp_field__ptr;
176 return 0;
177 }
178
179 struct syscall_tp {
180 struct tp_field id;
181 union {
182 struct tp_field args, ret;
183 };
184 };
185
186 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187 struct tp_field *field,
188 const char *name)
189 {
190 struct format_field *format_field = perf_evsel__field(evsel, name);
191
192 if (format_field == NULL)
193 return -1;
194
195 return tp_field__init_uint(field, format_field, evsel->needs_swap);
196 }
197
198 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199 ({ struct syscall_tp *sc = evsel->priv;\
200 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
201
202 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203 struct tp_field *field,
204 const char *name)
205 {
206 struct format_field *format_field = perf_evsel__field(evsel, name);
207
208 if (format_field == NULL)
209 return -1;
210
211 return tp_field__init_ptr(field, format_field);
212 }
213
214 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215 ({ struct syscall_tp *sc = evsel->priv;\
216 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
217
218 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
219 {
220 zfree(&evsel->priv);
221 perf_evsel__delete(evsel);
222 }
223
224 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
225 {
226 evsel->priv = malloc(sizeof(struct syscall_tp));
227 if (evsel->priv != NULL) {
228 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229 goto out_delete;
230
231 evsel->handler = handler;
232 return 0;
233 }
234
235 return -ENOMEM;
236
237 out_delete:
238 zfree(&evsel->priv);
239 return -ENOENT;
240 }
241
242 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
243 {
244 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
245
246 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
247 if (IS_ERR(evsel))
248 evsel = perf_evsel__newtp("syscalls", direction);
249
250 if (IS_ERR(evsel))
251 return NULL;
252
253 if (perf_evsel__init_syscall_tp(evsel, handler))
254 goto out_delete;
255
256 return evsel;
257
258 out_delete:
259 perf_evsel__delete_priv(evsel);
260 return NULL;
261 }
262
263 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
264 ({ struct syscall_tp *fields = evsel->priv; \
265 fields->name.integer(&fields->name, sample); })
266
267 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268 ({ struct syscall_tp *fields = evsel->priv; \
269 fields->name.pointer(&fields->name, sample); })
270
271 struct syscall_arg {
272 unsigned long val;
273 struct thread *thread;
274 struct trace *trace;
275 void *parm;
276 u8 idx;
277 u8 mask;
278 };
279
280 struct strarray {
281 int offset;
282 int nr_entries;
283 const char **entries;
284 };
285
286 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287 .nr_entries = ARRAY_SIZE(array), \
288 .entries = array, \
289 }
290
291 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292 .offset = off, \
293 .nr_entries = ARRAY_SIZE(array), \
294 .entries = array, \
295 }
296
297 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 const char *intfmt,
299 struct syscall_arg *arg)
300 {
301 struct strarray *sa = arg->parm;
302 int idx = arg->val - sa->offset;
303
304 if (idx < 0 || idx >= sa->nr_entries)
305 return scnprintf(bf, size, intfmt, arg->val);
306
307 return scnprintf(bf, size, "%s", sa->entries[idx]);
308 }
309
310 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311 struct syscall_arg *arg)
312 {
313 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
314 }
315
316 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
317
318 #if defined(__i386__) || defined(__x86_64__)
319 /*
320 * FIXME: Make this available to all arches as soon as the ioctl beautifier
321 * gets rewritten to support all arches.
322 */
323 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324 struct syscall_arg *arg)
325 {
326 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
327 }
328
329 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
330 #endif /* defined(__i386__) || defined(__x86_64__) */
331
332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333 struct syscall_arg *arg);
334
335 #define SCA_FD syscall_arg__scnprintf_fd
336
337 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
338 struct syscall_arg *arg)
339 {
340 int fd = arg->val;
341
342 if (fd == AT_FDCWD)
343 return scnprintf(bf, size, "CWD");
344
345 return syscall_arg__scnprintf_fd(bf, size, arg);
346 }
347
348 #define SCA_FDAT syscall_arg__scnprintf_fd_at
349
350 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
351 struct syscall_arg *arg);
352
353 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
354
355 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
356 struct syscall_arg *arg)
357 {
358 return scnprintf(bf, size, "%#lx", arg->val);
359 }
360
361 #define SCA_HEX syscall_arg__scnprintf_hex
362
363 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
364 struct syscall_arg *arg)
365 {
366 return scnprintf(bf, size, "%d", arg->val);
367 }
368
369 #define SCA_INT syscall_arg__scnprintf_int
370
371 static const char *bpf_cmd[] = {
372 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
373 "MAP_GET_NEXT_KEY", "PROG_LOAD",
374 };
375 static DEFINE_STRARRAY(bpf_cmd);
376
377 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
378 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
379
380 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
381 static DEFINE_STRARRAY(itimers);
382
383 static const char *keyctl_options[] = {
384 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
385 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
386 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
387 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
388 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
389 };
390 static DEFINE_STRARRAY(keyctl_options);
391
392 static const char *whences[] = { "SET", "CUR", "END",
393 #ifdef SEEK_DATA
394 "DATA",
395 #endif
396 #ifdef SEEK_HOLE
397 "HOLE",
398 #endif
399 };
400 static DEFINE_STRARRAY(whences);
401
402 static const char *fcntl_cmds[] = {
403 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
404 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
405 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
406 "F_GETOWNER_UIDS",
407 };
408 static DEFINE_STRARRAY(fcntl_cmds);
409
410 static const char *rlimit_resources[] = {
411 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
412 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
413 "RTTIME",
414 };
415 static DEFINE_STRARRAY(rlimit_resources);
416
417 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
418 static DEFINE_STRARRAY(sighow);
419
420 static const char *clockid[] = {
421 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
422 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
423 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
424 };
425 static DEFINE_STRARRAY(clockid);
426
427 static const char *socket_families[] = {
428 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
429 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
430 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
431 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
432 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
433 "ALG", "NFC", "VSOCK",
434 };
435 static DEFINE_STRARRAY(socket_families);
436
437 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
438 struct syscall_arg *arg)
439 {
440 size_t printed = 0;
441 int mode = arg->val;
442
443 if (mode == F_OK) /* 0 */
444 return scnprintf(bf, size, "F");
445 #define P_MODE(n) \
446 if (mode & n##_OK) { \
447 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
448 mode &= ~n##_OK; \
449 }
450
451 P_MODE(R);
452 P_MODE(W);
453 P_MODE(X);
454 #undef P_MODE
455
456 if (mode)
457 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
458
459 return printed;
460 }
461
462 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
463
464 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
465 struct syscall_arg *arg);
466
467 #define SCA_FILENAME syscall_arg__scnprintf_filename
468
469 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
470 struct syscall_arg *arg)
471 {
472 int printed = 0, flags = arg->val;
473
474 #define P_FLAG(n) \
475 if (flags & O_##n) { \
476 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
477 flags &= ~O_##n; \
478 }
479
480 P_FLAG(CLOEXEC);
481 P_FLAG(NONBLOCK);
482 #undef P_FLAG
483
484 if (flags)
485 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
486
487 return printed;
488 }
489
490 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
491
492 #if defined(__i386__) || defined(__x86_64__)
493 /*
494 * FIXME: Make this available to all arches.
495 */
496 #define TCGETS 0x5401
497
498 static const char *tioctls[] = {
499 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
500 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
501 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
502 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
503 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
504 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
505 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
506 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
507 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
508 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
509 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
510 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
511 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
512 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
513 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
514 };
515
516 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
517 #endif /* defined(__i386__) || defined(__x86_64__) */
518
519 #ifndef GRND_NONBLOCK
520 #define GRND_NONBLOCK 0x0001
521 #endif
522 #ifndef GRND_RANDOM
523 #define GRND_RANDOM 0x0002
524 #endif
525
526 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
527 struct syscall_arg *arg)
528 {
529 int printed = 0, flags = arg->val;
530
531 #define P_FLAG(n) \
532 if (flags & GRND_##n) { \
533 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
534 flags &= ~GRND_##n; \
535 }
536
537 P_FLAG(RANDOM);
538 P_FLAG(NONBLOCK);
539 #undef P_FLAG
540
541 if (flags)
542 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
543
544 return printed;
545 }
546
547 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
548
549 #define STRARRAY(arg, name, array) \
550 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
551 .arg_parm = { [arg] = &strarray__##array, }
552
553 #include "trace/beauty/eventfd.c"
554 #include "trace/beauty/flock.c"
555 #include "trace/beauty/futex_op.c"
556 #include "trace/beauty/mmap.c"
557 #include "trace/beauty/mode_t.c"
558 #include "trace/beauty/msg_flags.c"
559 #include "trace/beauty/open_flags.c"
560 #include "trace/beauty/perf_event_open.c"
561 #include "trace/beauty/pid.c"
562 #include "trace/beauty/sched_policy.c"
563 #include "trace/beauty/seccomp.c"
564 #include "trace/beauty/signum.c"
565 #include "trace/beauty/socket_type.c"
566 #include "trace/beauty/waitid_options.c"
567
568 static struct syscall_fmt {
569 const char *name;
570 const char *alias;
571 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
572 void *arg_parm[6];
573 bool errmsg;
574 bool errpid;
575 bool timeout;
576 bool hexret;
577 } syscall_fmts[] = {
578 { .name = "access", .errmsg = true,
579 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
580 [1] = SCA_ACCMODE, /* mode */ }, },
581 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
582 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
583 { .name = "brk", .hexret = true,
584 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
585 { .name = "chdir", .errmsg = true,
586 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
587 { .name = "chmod", .errmsg = true,
588 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
589 { .name = "chroot", .errmsg = true,
590 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
591 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
592 { .name = "clone", .errpid = true, },
593 { .name = "close", .errmsg = true,
594 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
595 { .name = "connect", .errmsg = true, },
596 { .name = "creat", .errmsg = true,
597 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
598 { .name = "dup", .errmsg = true, },
599 { .name = "dup2", .errmsg = true, },
600 { .name = "dup3", .errmsg = true, },
601 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
602 { .name = "eventfd2", .errmsg = true,
603 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
604 { .name = "faccessat", .errmsg = true,
605 .arg_scnprintf = { [1] = SCA_FILENAME, /* filename */ }, },
606 { .name = "fadvise64", .errmsg = true, },
607 { .name = "fallocate", .errmsg = true, },
608 { .name = "fchdir", .errmsg = true, },
609 { .name = "fchmod", .errmsg = true, },
610 { .name = "fchmodat", .errmsg = true,
611 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
612 [1] = SCA_FILENAME, /* filename */ }, },
613 { .name = "fchown", .errmsg = true, },
614 { .name = "fchownat", .errmsg = true,
615 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
616 [1] = SCA_FILENAME, /* filename */ }, },
617 { .name = "fcntl", .errmsg = true,
618 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
619 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
620 { .name = "fdatasync", .errmsg = true, },
621 { .name = "flock", .errmsg = true,
622 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
623 { .name = "fsetxattr", .errmsg = true, },
624 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
625 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
626 .arg_scnprintf = { [1] = SCA_FILENAME, /* filename */ }, },
627 { .name = "fstatfs", .errmsg = true, },
628 { .name = "fsync", .errmsg = true, },
629 { .name = "ftruncate", .errmsg = true, },
630 { .name = "futex", .errmsg = true,
631 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
632 { .name = "futimesat", .errmsg = true,
633 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
634 [1] = SCA_FILENAME, /* filename */ }, },
635 { .name = "getdents", .errmsg = true, },
636 { .name = "getdents64", .errmsg = true, },
637 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
638 { .name = "getpid", .errpid = true, },
639 { .name = "getpgid", .errpid = true, },
640 { .name = "getppid", .errpid = true, },
641 { .name = "getrandom", .errmsg = true,
642 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
643 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
644 { .name = "getxattr", .errmsg = true,
645 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
646 { .name = "inotify_add_watch", .errmsg = true,
647 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
648 { .name = "ioctl", .errmsg = true,
649 .arg_scnprintf = {
650 #if defined(__i386__) || defined(__x86_64__)
651 /*
652 * FIXME: Make this available to all arches.
653 */
654 [1] = SCA_STRHEXARRAY, /* cmd */
655 [2] = SCA_HEX, /* arg */ },
656 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
657 #else
658 [2] = SCA_HEX, /* arg */ }, },
659 #endif
660 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
661 { .name = "kill", .errmsg = true,
662 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
663 { .name = "lchown", .errmsg = true,
664 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
665 { .name = "lgetxattr", .errmsg = true,
666 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
667 { .name = "linkat", .errmsg = true,
668 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
669 { .name = "listxattr", .errmsg = true,
670 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
671 { .name = "llistxattr", .errmsg = true,
672 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
673 { .name = "lremovexattr", .errmsg = true,
674 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
675 { .name = "lseek", .errmsg = true,
676 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
677 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
678 { .name = "lsetxattr", .errmsg = true,
679 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
680 { .name = "lstat", .errmsg = true, .alias = "newlstat",
681 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
682 { .name = "lsxattr", .errmsg = true,
683 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
684 { .name = "madvise", .errmsg = true,
685 .arg_scnprintf = { [0] = SCA_HEX, /* start */
686 [2] = SCA_MADV_BHV, /* behavior */ }, },
687 { .name = "mkdir", .errmsg = true,
688 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
689 { .name = "mkdirat", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
691 [1] = SCA_FILENAME, /* pathname */ }, },
692 { .name = "mknod", .errmsg = true,
693 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
694 { .name = "mknodat", .errmsg = true,
695 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
696 [1] = SCA_FILENAME, /* filename */ }, },
697 { .name = "mlock", .errmsg = true,
698 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
699 { .name = "mlockall", .errmsg = true,
700 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
701 { .name = "mmap", .hexret = true,
702 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
703 [2] = SCA_MMAP_PROT, /* prot */
704 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
705 { .name = "mprotect", .errmsg = true,
706 .arg_scnprintf = { [0] = SCA_HEX, /* start */
707 [2] = SCA_MMAP_PROT, /* prot */ }, },
708 { .name = "mq_unlink", .errmsg = true,
709 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
710 { .name = "mremap", .hexret = true,
711 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
712 [3] = SCA_MREMAP_FLAGS, /* flags */
713 [4] = SCA_HEX, /* new_addr */ }, },
714 { .name = "munlock", .errmsg = true,
715 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
716 { .name = "munmap", .errmsg = true,
717 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
718 { .name = "name_to_handle_at", .errmsg = true,
719 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
720 { .name = "newfstatat", .errmsg = true,
721 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
722 [1] = SCA_FILENAME, /* filename */ }, },
723 { .name = "open", .errmsg = true,
724 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
725 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
726 { .name = "open_by_handle_at", .errmsg = true,
727 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
728 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
729 { .name = "openat", .errmsg = true,
730 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
731 [1] = SCA_FILENAME, /* filename */
732 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
733 { .name = "perf_event_open", .errmsg = true,
734 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
735 [3] = SCA_FD, /* group_fd */
736 [4] = SCA_PERF_FLAGS, /* flags */ }, },
737 { .name = "pipe2", .errmsg = true,
738 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
739 { .name = "poll", .errmsg = true, .timeout = true, },
740 { .name = "ppoll", .errmsg = true, .timeout = true, },
741 { .name = "pread", .errmsg = true, .alias = "pread64", },
742 { .name = "preadv", .errmsg = true, .alias = "pread", },
743 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
744 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
745 { .name = "pwritev", .errmsg = true, },
746 { .name = "read", .errmsg = true, },
747 { .name = "readlink", .errmsg = true,
748 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
749 { .name = "readlinkat", .errmsg = true,
750 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
751 [1] = SCA_FILENAME, /* pathname */ }, },
752 { .name = "readv", .errmsg = true, },
753 { .name = "recvfrom", .errmsg = true,
754 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
755 { .name = "recvmmsg", .errmsg = true,
756 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
757 { .name = "recvmsg", .errmsg = true,
758 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
759 { .name = "removexattr", .errmsg = true,
760 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
761 { .name = "renameat", .errmsg = true,
762 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
763 { .name = "rmdir", .errmsg = true,
764 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
765 { .name = "rt_sigaction", .errmsg = true,
766 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
767 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
768 { .name = "rt_sigqueueinfo", .errmsg = true,
769 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
770 { .name = "rt_tgsigqueueinfo", .errmsg = true,
771 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
772 { .name = "sched_setscheduler", .errmsg = true,
773 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
774 { .name = "seccomp", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
776 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
777 { .name = "select", .errmsg = true, .timeout = true, },
778 { .name = "sendmmsg", .errmsg = true,
779 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
780 { .name = "sendmsg", .errmsg = true,
781 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
782 { .name = "sendto", .errmsg = true,
783 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
784 { .name = "set_tid_address", .errpid = true, },
785 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
786 { .name = "setpgid", .errmsg = true, },
787 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
788 { .name = "setxattr", .errmsg = true,
789 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
790 { .name = "shutdown", .errmsg = true, },
791 { .name = "socket", .errmsg = true,
792 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
793 [1] = SCA_SK_TYPE, /* type */ },
794 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
795 { .name = "socketpair", .errmsg = true,
796 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
797 [1] = SCA_SK_TYPE, /* type */ },
798 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
799 { .name = "stat", .errmsg = true, .alias = "newstat",
800 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
801 { .name = "statfs", .errmsg = true,
802 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
803 { .name = "swapoff", .errmsg = true,
804 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
805 { .name = "swapon", .errmsg = true,
806 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
807 { .name = "symlinkat", .errmsg = true,
808 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
809 { .name = "tgkill", .errmsg = true,
810 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
811 { .name = "tkill", .errmsg = true,
812 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
813 { .name = "truncate", .errmsg = true,
814 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
815 { .name = "uname", .errmsg = true, .alias = "newuname", },
816 { .name = "unlinkat", .errmsg = true,
817 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
818 [1] = SCA_FILENAME, /* pathname */ }, },
819 { .name = "utime", .errmsg = true,
820 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
821 { .name = "utimensat", .errmsg = true,
822 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
823 [1] = SCA_FILENAME, /* filename */ }, },
824 { .name = "utimes", .errmsg = true,
825 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
826 { .name = "vmsplice", .errmsg = true, },
827 { .name = "wait4", .errpid = true,
828 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
829 { .name = "waitid", .errpid = true,
830 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
831 { .name = "write", .errmsg = true, },
832 { .name = "writev", .errmsg = true, },
833 };
834
835 static int syscall_fmt__cmp(const void *name, const void *fmtp)
836 {
837 const struct syscall_fmt *fmt = fmtp;
838 return strcmp(name, fmt->name);
839 }
840
841 static struct syscall_fmt *syscall_fmt__find(const char *name)
842 {
843 const int nmemb = ARRAY_SIZE(syscall_fmts);
844 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
845 }
846
847 struct syscall {
848 struct event_format *tp_format;
849 int nr_args;
850 struct format_field *args;
851 const char *name;
852 bool is_exit;
853 struct syscall_fmt *fmt;
854 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
855 void **arg_parm;
856 };
857
858 static size_t fprintf_duration(unsigned long t, FILE *fp)
859 {
860 double duration = (double)t / NSEC_PER_MSEC;
861 size_t printed = fprintf(fp, "(");
862
863 if (duration >= 1.0)
864 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
865 else if (duration >= 0.01)
866 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
867 else
868 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
869 return printed + fprintf(fp, "): ");
870 }
871
872 /**
873 * filename.ptr: The filename char pointer that will be vfs_getname'd
874 * filename.entry_str_pos: Where to insert the string translated from
875 * filename.ptr by the vfs_getname tracepoint/kprobe.
876 */
877 struct thread_trace {
878 u64 entry_time;
879 u64 exit_time;
880 bool entry_pending;
881 unsigned long nr_events;
882 unsigned long pfmaj, pfmin;
883 char *entry_str;
884 double runtime_ms;
885 struct {
886 unsigned long ptr;
887 short int entry_str_pos;
888 bool pending_open;
889 unsigned int namelen;
890 char *name;
891 } filename;
892 struct {
893 int max;
894 char **table;
895 } paths;
896
897 struct intlist *syscall_stats;
898 };
899
900 static struct thread_trace *thread_trace__new(void)
901 {
902 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
903
904 if (ttrace)
905 ttrace->paths.max = -1;
906
907 ttrace->syscall_stats = intlist__new(NULL);
908
909 return ttrace;
910 }
911
912 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
913 {
914 struct thread_trace *ttrace;
915
916 if (thread == NULL)
917 goto fail;
918
919 if (thread__priv(thread) == NULL)
920 thread__set_priv(thread, thread_trace__new());
921
922 if (thread__priv(thread) == NULL)
923 goto fail;
924
925 ttrace = thread__priv(thread);
926 ++ttrace->nr_events;
927
928 return ttrace;
929 fail:
930 color_fprintf(fp, PERF_COLOR_RED,
931 "WARNING: not enough memory, dropping samples!\n");
932 return NULL;
933 }
934
935 #define TRACE_PFMAJ (1 << 0)
936 #define TRACE_PFMIN (1 << 1)
937
938 static const size_t trace__entry_str_size = 2048;
939
940 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
941 {
942 struct thread_trace *ttrace = thread__priv(thread);
943
944 if (fd > ttrace->paths.max) {
945 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
946
947 if (npath == NULL)
948 return -1;
949
950 if (ttrace->paths.max != -1) {
951 memset(npath + ttrace->paths.max + 1, 0,
952 (fd - ttrace->paths.max) * sizeof(char *));
953 } else {
954 memset(npath, 0, (fd + 1) * sizeof(char *));
955 }
956
957 ttrace->paths.table = npath;
958 ttrace->paths.max = fd;
959 }
960
961 ttrace->paths.table[fd] = strdup(pathname);
962
963 return ttrace->paths.table[fd] != NULL ? 0 : -1;
964 }
965
966 static int thread__read_fd_path(struct thread *thread, int fd)
967 {
968 char linkname[PATH_MAX], pathname[PATH_MAX];
969 struct stat st;
970 int ret;
971
972 if (thread->pid_ == thread->tid) {
973 scnprintf(linkname, sizeof(linkname),
974 "/proc/%d/fd/%d", thread->pid_, fd);
975 } else {
976 scnprintf(linkname, sizeof(linkname),
977 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
978 }
979
980 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
981 return -1;
982
983 ret = readlink(linkname, pathname, sizeof(pathname));
984
985 if (ret < 0 || ret > st.st_size)
986 return -1;
987
988 pathname[ret] = '\0';
989 return trace__set_fd_pathname(thread, fd, pathname);
990 }
991
992 static const char *thread__fd_path(struct thread *thread, int fd,
993 struct trace *trace)
994 {
995 struct thread_trace *ttrace = thread__priv(thread);
996
997 if (ttrace == NULL)
998 return NULL;
999
1000 if (fd < 0)
1001 return NULL;
1002
1003 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1004 if (!trace->live)
1005 return NULL;
1006 ++trace->stats.proc_getname;
1007 if (thread__read_fd_path(thread, fd))
1008 return NULL;
1009 }
1010
1011 return ttrace->paths.table[fd];
1012 }
1013
1014 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1015 struct syscall_arg *arg)
1016 {
1017 int fd = arg->val;
1018 size_t printed = scnprintf(bf, size, "%d", fd);
1019 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1020
1021 if (path)
1022 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1023
1024 return printed;
1025 }
1026
1027 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1028 struct syscall_arg *arg)
1029 {
1030 int fd = arg->val;
1031 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1032 struct thread_trace *ttrace = thread__priv(arg->thread);
1033
1034 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1035 zfree(&ttrace->paths.table[fd]);
1036
1037 return printed;
1038 }
1039
1040 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1041 unsigned long ptr)
1042 {
1043 struct thread_trace *ttrace = thread__priv(thread);
1044
1045 ttrace->filename.ptr = ptr;
1046 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1047 }
1048
1049 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1050 struct syscall_arg *arg)
1051 {
1052 unsigned long ptr = arg->val;
1053
1054 if (!arg->trace->vfs_getname)
1055 return scnprintf(bf, size, "%#x", ptr);
1056
1057 thread__set_filename_pos(arg->thread, bf, ptr);
1058 return 0;
1059 }
1060
1061 static bool trace__filter_duration(struct trace *trace, double t)
1062 {
1063 return t < (trace->duration_filter * NSEC_PER_MSEC);
1064 }
1065
1066 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1067 {
1068 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1069
1070 return fprintf(fp, "%10.3f ", ts);
1071 }
1072
1073 static bool done = false;
1074 static bool interrupted = false;
1075
1076 static void sig_handler(int sig)
1077 {
1078 done = true;
1079 interrupted = sig == SIGINT;
1080 }
1081
1082 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1083 u64 duration, u64 tstamp, FILE *fp)
1084 {
1085 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1086 printed += fprintf_duration(duration, fp);
1087
1088 if (trace->multiple_threads) {
1089 if (trace->show_comm)
1090 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1091 printed += fprintf(fp, "%d ", thread->tid);
1092 }
1093
1094 return printed;
1095 }
1096
1097 static int trace__process_event(struct trace *trace, struct machine *machine,
1098 union perf_event *event, struct perf_sample *sample)
1099 {
1100 int ret = 0;
1101
1102 switch (event->header.type) {
1103 case PERF_RECORD_LOST:
1104 color_fprintf(trace->output, PERF_COLOR_RED,
1105 "LOST %" PRIu64 " events!\n", event->lost.lost);
1106 ret = machine__process_lost_event(machine, event, sample);
1107 break;
1108 default:
1109 ret = machine__process_event(machine, event, sample);
1110 break;
1111 }
1112
1113 return ret;
1114 }
1115
1116 static int trace__tool_process(struct perf_tool *tool,
1117 union perf_event *event,
1118 struct perf_sample *sample,
1119 struct machine *machine)
1120 {
1121 struct trace *trace = container_of(tool, struct trace, tool);
1122 return trace__process_event(trace, machine, event, sample);
1123 }
1124
1125 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1126 {
1127 struct machine *machine = vmachine;
1128
1129 if (machine->kptr_restrict_warned)
1130 return NULL;
1131
1132 if (symbol_conf.kptr_restrict) {
1133 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1134 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1135 "Kernel samples will not be resolved.\n");
1136 machine->kptr_restrict_warned = true;
1137 return NULL;
1138 }
1139
1140 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1141 }
1142
1143 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1144 {
1145 int err = symbol__init(NULL);
1146
1147 if (err)
1148 return err;
1149
1150 trace->host = machine__new_host();
1151 if (trace->host == NULL)
1152 return -ENOMEM;
1153
1154 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1155 return -errno;
1156
1157 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1158 evlist->threads, trace__tool_process, false,
1159 trace->opts.proc_map_timeout);
1160 if (err)
1161 symbol__exit();
1162
1163 return err;
1164 }
1165
1166 static int syscall__set_arg_fmts(struct syscall *sc)
1167 {
1168 struct format_field *field;
1169 int idx = 0, len;
1170
1171 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1172 if (sc->arg_scnprintf == NULL)
1173 return -1;
1174
1175 if (sc->fmt)
1176 sc->arg_parm = sc->fmt->arg_parm;
1177
1178 for (field = sc->args; field; field = field->next) {
1179 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1180 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1181 else if (field->flags & FIELD_IS_POINTER)
1182 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1183 else if (strcmp(field->type, "pid_t") == 0)
1184 sc->arg_scnprintf[idx] = SCA_PID;
1185 else if (strcmp(field->type, "umode_t") == 0)
1186 sc->arg_scnprintf[idx] = SCA_MODE_T;
1187 else if ((strcmp(field->type, "int") == 0 ||
1188 strcmp(field->type, "unsigned int") == 0 ||
1189 strcmp(field->type, "long") == 0) &&
1190 (len = strlen(field->name)) >= 2 &&
1191 strcmp(field->name + len - 2, "fd") == 0) {
1192 /*
1193 * /sys/kernel/tracing/events/syscalls/sys_enter*
1194 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1195 * 65 int
1196 * 23 unsigned int
1197 * 7 unsigned long
1198 */
1199 sc->arg_scnprintf[idx] = SCA_FD;
1200 }
1201 ++idx;
1202 }
1203
1204 return 0;
1205 }
1206
1207 static int trace__read_syscall_info(struct trace *trace, int id)
1208 {
1209 char tp_name[128];
1210 struct syscall *sc;
1211 const char *name = syscalltbl__name(trace->sctbl, id);
1212
1213 if (name == NULL)
1214 return -1;
1215
1216 if (id > trace->syscalls.max) {
1217 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1218
1219 if (nsyscalls == NULL)
1220 return -1;
1221
1222 if (trace->syscalls.max != -1) {
1223 memset(nsyscalls + trace->syscalls.max + 1, 0,
1224 (id - trace->syscalls.max) * sizeof(*sc));
1225 } else {
1226 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1227 }
1228
1229 trace->syscalls.table = nsyscalls;
1230 trace->syscalls.max = id;
1231 }
1232
1233 sc = trace->syscalls.table + id;
1234 sc->name = name;
1235
1236 sc->fmt = syscall_fmt__find(sc->name);
1237
1238 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1239 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1240
1241 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1242 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1243 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1244 }
1245
1246 if (IS_ERR(sc->tp_format))
1247 return -1;
1248
1249 sc->args = sc->tp_format->format.fields;
1250 sc->nr_args = sc->tp_format->format.nr_fields;
1251 /*
1252 * We need to check and discard the first variable '__syscall_nr'
1253 * or 'nr' that mean the syscall number. It is needless here.
1254 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1255 */
1256 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1257 sc->args = sc->args->next;
1258 --sc->nr_args;
1259 }
1260
1261 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1262
1263 return syscall__set_arg_fmts(sc);
1264 }
1265
1266 static int trace__validate_ev_qualifier(struct trace *trace)
1267 {
1268 int err = 0, i;
1269 struct str_node *pos;
1270
1271 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1272 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1273 sizeof(trace->ev_qualifier_ids.entries[0]));
1274
1275 if (trace->ev_qualifier_ids.entries == NULL) {
1276 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1277 trace->output);
1278 err = -EINVAL;
1279 goto out;
1280 }
1281
1282 i = 0;
1283
1284 strlist__for_each(pos, trace->ev_qualifier) {
1285 const char *sc = pos->s;
1286 int id = syscalltbl__id(trace->sctbl, sc);
1287
1288 if (id < 0) {
1289 if (err == 0) {
1290 fputs("Error:\tInvalid syscall ", trace->output);
1291 err = -EINVAL;
1292 } else {
1293 fputs(", ", trace->output);
1294 }
1295
1296 fputs(sc, trace->output);
1297 }
1298
1299 trace->ev_qualifier_ids.entries[i++] = id;
1300 }
1301
1302 if (err < 0) {
1303 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1304 "\nHint:\tand: 'man syscalls'\n", trace->output);
1305 zfree(&trace->ev_qualifier_ids.entries);
1306 trace->ev_qualifier_ids.nr = 0;
1307 }
1308 out:
1309 return err;
1310 }
1311
1312 /*
1313 * args is to be interpreted as a series of longs but we need to handle
1314 * 8-byte unaligned accesses. args points to raw_data within the event
1315 * and raw_data is guaranteed to be 8-byte unaligned because it is
1316 * preceded by raw_size which is a u32. So we need to copy args to a temp
1317 * variable to read it. Most notably this avoids extended load instructions
1318 * on unaligned addresses
1319 */
1320
1321 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1322 unsigned char *args, struct trace *trace,
1323 struct thread *thread)
1324 {
1325 size_t printed = 0;
1326 unsigned char *p;
1327 unsigned long val;
1328
1329 if (sc->args != NULL) {
1330 struct format_field *field;
1331 u8 bit = 1;
1332 struct syscall_arg arg = {
1333 .idx = 0,
1334 .mask = 0,
1335 .trace = trace,
1336 .thread = thread,
1337 };
1338
1339 for (field = sc->args; field;
1340 field = field->next, ++arg.idx, bit <<= 1) {
1341 if (arg.mask & bit)
1342 continue;
1343
1344 /* special care for unaligned accesses */
1345 p = args + sizeof(unsigned long) * arg.idx;
1346 memcpy(&val, p, sizeof(val));
1347
1348 /*
1349 * Suppress this argument if its value is zero and
1350 * and we don't have a string associated in an
1351 * strarray for it.
1352 */
1353 if (val == 0 &&
1354 !(sc->arg_scnprintf &&
1355 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1356 sc->arg_parm[arg.idx]))
1357 continue;
1358
1359 printed += scnprintf(bf + printed, size - printed,
1360 "%s%s: ", printed ? ", " : "", field->name);
1361 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1362 arg.val = val;
1363 if (sc->arg_parm)
1364 arg.parm = sc->arg_parm[arg.idx];
1365 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1366 size - printed, &arg);
1367 } else {
1368 printed += scnprintf(bf + printed, size - printed,
1369 "%ld", val);
1370 }
1371 }
1372 } else if (IS_ERR(sc->tp_format)) {
1373 /*
1374 * If we managed to read the tracepoint /format file, then we
1375 * may end up not having any args, like with gettid(), so only
1376 * print the raw args when we didn't manage to read it.
1377 */
1378 int i = 0;
1379
1380 while (i < 6) {
1381 /* special care for unaligned accesses */
1382 p = args + sizeof(unsigned long) * i;
1383 memcpy(&val, p, sizeof(val));
1384 printed += scnprintf(bf + printed, size - printed,
1385 "%sarg%d: %ld",
1386 printed ? ", " : "", i, val);
1387 ++i;
1388 }
1389 }
1390
1391 return printed;
1392 }
1393
1394 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1395 union perf_event *event,
1396 struct perf_sample *sample);
1397
1398 static struct syscall *trace__syscall_info(struct trace *trace,
1399 struct perf_evsel *evsel, int id)
1400 {
1401
1402 if (id < 0) {
1403
1404 /*
1405 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1406 * before that, leaving at a higher verbosity level till that is
1407 * explained. Reproduced with plain ftrace with:
1408 *
1409 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1410 * grep "NR -1 " /t/trace_pipe
1411 *
1412 * After generating some load on the machine.
1413 */
1414 if (verbose > 1) {
1415 static u64 n;
1416 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1417 id, perf_evsel__name(evsel), ++n);
1418 }
1419 return NULL;
1420 }
1421
1422 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1423 trace__read_syscall_info(trace, id))
1424 goto out_cant_read;
1425
1426 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1427 goto out_cant_read;
1428
1429 return &trace->syscalls.table[id];
1430
1431 out_cant_read:
1432 if (verbose) {
1433 fprintf(trace->output, "Problems reading syscall %d", id);
1434 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1435 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1436 fputs(" information\n", trace->output);
1437 }
1438 return NULL;
1439 }
1440
1441 static void thread__update_stats(struct thread_trace *ttrace,
1442 int id, struct perf_sample *sample)
1443 {
1444 struct int_node *inode;
1445 struct stats *stats;
1446 u64 duration = 0;
1447
1448 inode = intlist__findnew(ttrace->syscall_stats, id);
1449 if (inode == NULL)
1450 return;
1451
1452 stats = inode->priv;
1453 if (stats == NULL) {
1454 stats = malloc(sizeof(struct stats));
1455 if (stats == NULL)
1456 return;
1457 init_stats(stats);
1458 inode->priv = stats;
1459 }
1460
1461 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1462 duration = sample->time - ttrace->entry_time;
1463
1464 update_stats(stats, duration);
1465 }
1466
1467 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1468 {
1469 struct thread_trace *ttrace;
1470 u64 duration;
1471 size_t printed;
1472
1473 if (trace->current == NULL)
1474 return 0;
1475
1476 ttrace = thread__priv(trace->current);
1477
1478 if (!ttrace->entry_pending)
1479 return 0;
1480
1481 duration = sample->time - ttrace->entry_time;
1482
1483 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1484 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1485 ttrace->entry_pending = false;
1486
1487 return printed;
1488 }
1489
1490 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1491 union perf_event *event __maybe_unused,
1492 struct perf_sample *sample)
1493 {
1494 char *msg;
1495 void *args;
1496 size_t printed = 0;
1497 struct thread *thread;
1498 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1499 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1500 struct thread_trace *ttrace;
1501
1502 if (sc == NULL)
1503 return -1;
1504
1505 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1506 ttrace = thread__trace(thread, trace->output);
1507 if (ttrace == NULL)
1508 goto out_put;
1509
1510 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1511
1512 if (ttrace->entry_str == NULL) {
1513 ttrace->entry_str = malloc(trace__entry_str_size);
1514 if (!ttrace->entry_str)
1515 goto out_put;
1516 }
1517
1518 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1519 trace__printf_interrupted_entry(trace, sample);
1520
1521 ttrace->entry_time = sample->time;
1522 msg = ttrace->entry_str;
1523 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1524
1525 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1526 args, trace, thread);
1527
1528 if (sc->is_exit) {
1529 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1530 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1531 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1532 }
1533 } else {
1534 ttrace->entry_pending = true;
1535 /* See trace__vfs_getname & trace__sys_exit */
1536 ttrace->filename.pending_open = false;
1537 }
1538
1539 if (trace->current != thread) {
1540 thread__put(trace->current);
1541 trace->current = thread__get(thread);
1542 }
1543 err = 0;
1544 out_put:
1545 thread__put(thread);
1546 return err;
1547 }
1548
1549 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1550 struct perf_sample *sample,
1551 struct callchain_cursor *cursor)
1552 {
1553 struct addr_location al;
1554
1555 if (machine__resolve(trace->host, &al, sample) < 0 ||
1556 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1557 return -1;
1558
1559 return 0;
1560 }
1561
1562 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1563 {
1564 /* TODO: user-configurable print_opts */
1565 const unsigned int print_opts = EVSEL__PRINT_SYM |
1566 EVSEL__PRINT_DSO |
1567 EVSEL__PRINT_UNKNOWN_AS_ADDR;
1568
1569 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1570 }
1571
1572 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1573 union perf_event *event __maybe_unused,
1574 struct perf_sample *sample)
1575 {
1576 long ret;
1577 u64 duration = 0;
1578 struct thread *thread;
1579 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1580 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1581 struct thread_trace *ttrace;
1582
1583 if (sc == NULL)
1584 return -1;
1585
1586 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1587 ttrace = thread__trace(thread, trace->output);
1588 if (ttrace == NULL)
1589 goto out_put;
1590
1591 if (trace->summary)
1592 thread__update_stats(ttrace, id, sample);
1593
1594 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1595
1596 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1597 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1598 ttrace->filename.pending_open = false;
1599 ++trace->stats.vfs_getname;
1600 }
1601
1602 ttrace->exit_time = sample->time;
1603
1604 if (ttrace->entry_time) {
1605 duration = sample->time - ttrace->entry_time;
1606 if (trace__filter_duration(trace, duration))
1607 goto out;
1608 } else if (trace->duration_filter)
1609 goto out;
1610
1611 if (sample->callchain) {
1612 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1613 if (callchain_ret == 0) {
1614 if (callchain_cursor.nr < trace->min_stack)
1615 goto out;
1616 callchain_ret = 1;
1617 }
1618 }
1619
1620 if (trace->summary_only)
1621 goto out;
1622
1623 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1624
1625 if (ttrace->entry_pending) {
1626 fprintf(trace->output, "%-70s", ttrace->entry_str);
1627 } else {
1628 fprintf(trace->output, " ... [");
1629 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1630 fprintf(trace->output, "]: %s()", sc->name);
1631 }
1632
1633 if (sc->fmt == NULL) {
1634 signed_print:
1635 fprintf(trace->output, ") = %ld", ret);
1636 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1637 char bf[STRERR_BUFSIZE];
1638 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1639 *e = audit_errno_to_name(-ret);
1640
1641 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1642 } else if (ret == 0 && sc->fmt->timeout)
1643 fprintf(trace->output, ") = 0 Timeout");
1644 else if (sc->fmt->hexret)
1645 fprintf(trace->output, ") = %#lx", ret);
1646 else if (sc->fmt->errpid) {
1647 struct thread *child = machine__find_thread(trace->host, ret, ret);
1648
1649 if (child != NULL) {
1650 fprintf(trace->output, ") = %ld", ret);
1651 if (child->comm_set)
1652 fprintf(trace->output, " (%s)", thread__comm_str(child));
1653 thread__put(child);
1654 }
1655 } else
1656 goto signed_print;
1657
1658 fputc('\n', trace->output);
1659
1660 if (callchain_ret > 0)
1661 trace__fprintf_callchain(trace, sample);
1662 else if (callchain_ret < 0)
1663 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1664 out:
1665 ttrace->entry_pending = false;
1666 err = 0;
1667 out_put:
1668 thread__put(thread);
1669 return err;
1670 }
1671
1672 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1673 union perf_event *event __maybe_unused,
1674 struct perf_sample *sample)
1675 {
1676 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1677 struct thread_trace *ttrace;
1678 size_t filename_len, entry_str_len, to_move;
1679 ssize_t remaining_space;
1680 char *pos;
1681 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1682
1683 if (!thread)
1684 goto out;
1685
1686 ttrace = thread__priv(thread);
1687 if (!ttrace)
1688 goto out;
1689
1690 filename_len = strlen(filename);
1691
1692 if (ttrace->filename.namelen < filename_len) {
1693 char *f = realloc(ttrace->filename.name, filename_len + 1);
1694
1695 if (f == NULL)
1696 goto out;
1697
1698 ttrace->filename.namelen = filename_len;
1699 ttrace->filename.name = f;
1700 }
1701
1702 strcpy(ttrace->filename.name, filename);
1703 ttrace->filename.pending_open = true;
1704
1705 if (!ttrace->filename.ptr)
1706 goto out;
1707
1708 entry_str_len = strlen(ttrace->entry_str);
1709 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1710 if (remaining_space <= 0)
1711 goto out;
1712
1713 if (filename_len > (size_t)remaining_space) {
1714 filename += filename_len - remaining_space;
1715 filename_len = remaining_space;
1716 }
1717
1718 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1719 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1720 memmove(pos + filename_len, pos, to_move);
1721 memcpy(pos, filename, filename_len);
1722
1723 ttrace->filename.ptr = 0;
1724 ttrace->filename.entry_str_pos = 0;
1725 out:
1726 return 0;
1727 }
1728
1729 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1730 union perf_event *event __maybe_unused,
1731 struct perf_sample *sample)
1732 {
1733 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1734 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1735 struct thread *thread = machine__findnew_thread(trace->host,
1736 sample->pid,
1737 sample->tid);
1738 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1739
1740 if (ttrace == NULL)
1741 goto out_dump;
1742
1743 ttrace->runtime_ms += runtime_ms;
1744 trace->runtime_ms += runtime_ms;
1745 thread__put(thread);
1746 return 0;
1747
1748 out_dump:
1749 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1750 evsel->name,
1751 perf_evsel__strval(evsel, sample, "comm"),
1752 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1753 runtime,
1754 perf_evsel__intval(evsel, sample, "vruntime"));
1755 thread__put(thread);
1756 return 0;
1757 }
1758
1759 static void bpf_output__printer(enum binary_printer_ops op,
1760 unsigned int val, void *extra)
1761 {
1762 FILE *output = extra;
1763 unsigned char ch = (unsigned char)val;
1764
1765 switch (op) {
1766 case BINARY_PRINT_CHAR_DATA:
1767 fprintf(output, "%c", isprint(ch) ? ch : '.');
1768 break;
1769 case BINARY_PRINT_DATA_BEGIN:
1770 case BINARY_PRINT_LINE_BEGIN:
1771 case BINARY_PRINT_ADDR:
1772 case BINARY_PRINT_NUM_DATA:
1773 case BINARY_PRINT_NUM_PAD:
1774 case BINARY_PRINT_SEP:
1775 case BINARY_PRINT_CHAR_PAD:
1776 case BINARY_PRINT_LINE_END:
1777 case BINARY_PRINT_DATA_END:
1778 default:
1779 break;
1780 }
1781 }
1782
1783 static void bpf_output__fprintf(struct trace *trace,
1784 struct perf_sample *sample)
1785 {
1786 print_binary(sample->raw_data, sample->raw_size, 8,
1787 bpf_output__printer, trace->output);
1788 }
1789
1790 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1791 union perf_event *event __maybe_unused,
1792 struct perf_sample *sample)
1793 {
1794 int callchain_ret = 0;
1795
1796 if (sample->callchain) {
1797 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1798 if (callchain_ret == 0) {
1799 if (callchain_cursor.nr < trace->min_stack)
1800 goto out;
1801 callchain_ret = 1;
1802 }
1803 }
1804
1805 trace__printf_interrupted_entry(trace, sample);
1806 trace__fprintf_tstamp(trace, sample->time, trace->output);
1807
1808 if (trace->trace_syscalls)
1809 fprintf(trace->output, "( ): ");
1810
1811 fprintf(trace->output, "%s:", evsel->name);
1812
1813 if (perf_evsel__is_bpf_output(evsel)) {
1814 bpf_output__fprintf(trace, sample);
1815 } else if (evsel->tp_format) {
1816 event_format__fprintf(evsel->tp_format, sample->cpu,
1817 sample->raw_data, sample->raw_size,
1818 trace->output);
1819 }
1820
1821 fprintf(trace->output, ")\n");
1822
1823 if (callchain_ret > 0)
1824 trace__fprintf_callchain(trace, sample);
1825 else if (callchain_ret < 0)
1826 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1827 out:
1828 return 0;
1829 }
1830
1831 static void print_location(FILE *f, struct perf_sample *sample,
1832 struct addr_location *al,
1833 bool print_dso, bool print_sym)
1834 {
1835
1836 if ((verbose || print_dso) && al->map)
1837 fprintf(f, "%s@", al->map->dso->long_name);
1838
1839 if ((verbose || print_sym) && al->sym)
1840 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1841 al->addr - al->sym->start);
1842 else if (al->map)
1843 fprintf(f, "0x%" PRIx64, al->addr);
1844 else
1845 fprintf(f, "0x%" PRIx64, sample->addr);
1846 }
1847
1848 static int trace__pgfault(struct trace *trace,
1849 struct perf_evsel *evsel,
1850 union perf_event *event __maybe_unused,
1851 struct perf_sample *sample)
1852 {
1853 struct thread *thread;
1854 struct addr_location al;
1855 char map_type = 'd';
1856 struct thread_trace *ttrace;
1857 int err = -1;
1858 int callchain_ret = 0;
1859
1860 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1861
1862 if (sample->callchain) {
1863 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1864 if (callchain_ret == 0) {
1865 if (callchain_cursor.nr < trace->min_stack)
1866 goto out_put;
1867 callchain_ret = 1;
1868 }
1869 }
1870
1871 ttrace = thread__trace(thread, trace->output);
1872 if (ttrace == NULL)
1873 goto out_put;
1874
1875 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1876 ttrace->pfmaj++;
1877 else
1878 ttrace->pfmin++;
1879
1880 if (trace->summary_only)
1881 goto out;
1882
1883 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1884 sample->ip, &al);
1885
1886 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1887
1888 fprintf(trace->output, "%sfault [",
1889 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1890 "maj" : "min");
1891
1892 print_location(trace->output, sample, &al, false, true);
1893
1894 fprintf(trace->output, "] => ");
1895
1896 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1897 sample->addr, &al);
1898
1899 if (!al.map) {
1900 thread__find_addr_location(thread, sample->cpumode,
1901 MAP__FUNCTION, sample->addr, &al);
1902
1903 if (al.map)
1904 map_type = 'x';
1905 else
1906 map_type = '?';
1907 }
1908
1909 print_location(trace->output, sample, &al, true, false);
1910
1911 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1912
1913 if (callchain_ret > 0)
1914 trace__fprintf_callchain(trace, sample);
1915 else if (callchain_ret < 0)
1916 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1917 out:
1918 err = 0;
1919 out_put:
1920 thread__put(thread);
1921 return err;
1922 }
1923
1924 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1925 {
1926 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1927 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1928 return false;
1929
1930 if (trace->pid_list || trace->tid_list)
1931 return true;
1932
1933 return false;
1934 }
1935
1936 static void trace__set_base_time(struct trace *trace,
1937 struct perf_evsel *evsel,
1938 struct perf_sample *sample)
1939 {
1940 /*
1941 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1942 * and don't use sample->time unconditionally, we may end up having
1943 * some other event in the future without PERF_SAMPLE_TIME for good
1944 * reason, i.e. we may not be interested in its timestamps, just in
1945 * it taking place, picking some piece of information when it
1946 * appears in our event stream (vfs_getname comes to mind).
1947 */
1948 if (trace->base_time == 0 && !trace->full_time &&
1949 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1950 trace->base_time = sample->time;
1951 }
1952
1953 static int trace__process_sample(struct perf_tool *tool,
1954 union perf_event *event,
1955 struct perf_sample *sample,
1956 struct perf_evsel *evsel,
1957 struct machine *machine __maybe_unused)
1958 {
1959 struct trace *trace = container_of(tool, struct trace, tool);
1960 int err = 0;
1961
1962 tracepoint_handler handler = evsel->handler;
1963
1964 if (skip_sample(trace, sample))
1965 return 0;
1966
1967 trace__set_base_time(trace, evsel, sample);
1968
1969 if (handler) {
1970 ++trace->nr_events;
1971 handler(trace, evsel, event, sample);
1972 }
1973
1974 return err;
1975 }
1976
1977 static int parse_target_str(struct trace *trace)
1978 {
1979 if (trace->opts.target.pid) {
1980 trace->pid_list = intlist__new(trace->opts.target.pid);
1981 if (trace->pid_list == NULL) {
1982 pr_err("Error parsing process id string\n");
1983 return -EINVAL;
1984 }
1985 }
1986
1987 if (trace->opts.target.tid) {
1988 trace->tid_list = intlist__new(trace->opts.target.tid);
1989 if (trace->tid_list == NULL) {
1990 pr_err("Error parsing thread id string\n");
1991 return -EINVAL;
1992 }
1993 }
1994
1995 return 0;
1996 }
1997
1998 static int trace__record(struct trace *trace, int argc, const char **argv)
1999 {
2000 unsigned int rec_argc, i, j;
2001 const char **rec_argv;
2002 const char * const record_args[] = {
2003 "record",
2004 "-R",
2005 "-m", "1024",
2006 "-c", "1",
2007 };
2008
2009 const char * const sc_args[] = { "-e", };
2010 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2011 const char * const majpf_args[] = { "-e", "major-faults" };
2012 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2013 const char * const minpf_args[] = { "-e", "minor-faults" };
2014 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2015
2016 /* +1 is for the event string below */
2017 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2018 majpf_args_nr + minpf_args_nr + argc;
2019 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2020
2021 if (rec_argv == NULL)
2022 return -ENOMEM;
2023
2024 j = 0;
2025 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2026 rec_argv[j++] = record_args[i];
2027
2028 if (trace->trace_syscalls) {
2029 for (i = 0; i < sc_args_nr; i++)
2030 rec_argv[j++] = sc_args[i];
2031
2032 /* event string may be different for older kernels - e.g., RHEL6 */
2033 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2034 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2035 else if (is_valid_tracepoint("syscalls:sys_enter"))
2036 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2037 else {
2038 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2039 return -1;
2040 }
2041 }
2042
2043 if (trace->trace_pgfaults & TRACE_PFMAJ)
2044 for (i = 0; i < majpf_args_nr; i++)
2045 rec_argv[j++] = majpf_args[i];
2046
2047 if (trace->trace_pgfaults & TRACE_PFMIN)
2048 for (i = 0; i < minpf_args_nr; i++)
2049 rec_argv[j++] = minpf_args[i];
2050
2051 for (i = 0; i < (unsigned int)argc; i++)
2052 rec_argv[j++] = argv[i];
2053
2054 return cmd_record(j, rec_argv, NULL);
2055 }
2056
2057 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2058
2059 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2060 {
2061 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2062
2063 if (IS_ERR(evsel))
2064 return false;
2065
2066 if (perf_evsel__field(evsel, "pathname") == NULL) {
2067 perf_evsel__delete(evsel);
2068 return false;
2069 }
2070
2071 evsel->handler = trace__vfs_getname;
2072 perf_evlist__add(evlist, evsel);
2073 return true;
2074 }
2075
2076 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2077 {
2078 struct perf_evsel *evsel;
2079 struct perf_event_attr attr = {
2080 .type = PERF_TYPE_SOFTWARE,
2081 .mmap_data = 1,
2082 };
2083
2084 attr.config = config;
2085 attr.sample_period = 1;
2086
2087 event_attr_init(&attr);
2088
2089 evsel = perf_evsel__new(&attr);
2090 if (evsel)
2091 evsel->handler = trace__pgfault;
2092
2093 return evsel;
2094 }
2095
2096 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2097 {
2098 const u32 type = event->header.type;
2099 struct perf_evsel *evsel;
2100
2101 if (type != PERF_RECORD_SAMPLE) {
2102 trace__process_event(trace, trace->host, event, sample);
2103 return;
2104 }
2105
2106 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2107 if (evsel == NULL) {
2108 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2109 return;
2110 }
2111
2112 trace__set_base_time(trace, evsel, sample);
2113
2114 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2115 sample->raw_data == NULL) {
2116 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2117 perf_evsel__name(evsel), sample->tid,
2118 sample->cpu, sample->raw_size);
2119 } else {
2120 tracepoint_handler handler = evsel->handler;
2121 handler(trace, evsel, event, sample);
2122 }
2123 }
2124
2125 static int trace__add_syscall_newtp(struct trace *trace)
2126 {
2127 int ret = -1;
2128 struct perf_evlist *evlist = trace->evlist;
2129 struct perf_evsel *sys_enter, *sys_exit;
2130
2131 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2132 if (sys_enter == NULL)
2133 goto out;
2134
2135 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2136 goto out_delete_sys_enter;
2137
2138 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2139 if (sys_exit == NULL)
2140 goto out_delete_sys_enter;
2141
2142 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2143 goto out_delete_sys_exit;
2144
2145 perf_evlist__add(evlist, sys_enter);
2146 perf_evlist__add(evlist, sys_exit);
2147
2148 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2149 /*
2150 * We're interested only in the user space callchain
2151 * leading to the syscall, allow overriding that for
2152 * debugging reasons using --kernel_syscall_callchains
2153 */
2154 sys_exit->attr.exclude_callchain_kernel = 1;
2155 }
2156
2157 trace->syscalls.events.sys_enter = sys_enter;
2158 trace->syscalls.events.sys_exit = sys_exit;
2159
2160 ret = 0;
2161 out:
2162 return ret;
2163
2164 out_delete_sys_exit:
2165 perf_evsel__delete_priv(sys_exit);
2166 out_delete_sys_enter:
2167 perf_evsel__delete_priv(sys_enter);
2168 goto out;
2169 }
2170
2171 static int trace__set_ev_qualifier_filter(struct trace *trace)
2172 {
2173 int err = -1;
2174 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2175 trace->ev_qualifier_ids.nr,
2176 trace->ev_qualifier_ids.entries);
2177
2178 if (filter == NULL)
2179 goto out_enomem;
2180
2181 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2182 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2183
2184 free(filter);
2185 out:
2186 return err;
2187 out_enomem:
2188 errno = ENOMEM;
2189 goto out;
2190 }
2191
2192 static int trace__run(struct trace *trace, int argc, const char **argv)
2193 {
2194 struct perf_evlist *evlist = trace->evlist;
2195 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2196 int err = -1, i;
2197 unsigned long before;
2198 const bool forks = argc > 0;
2199 bool draining = false;
2200
2201 trace->live = true;
2202
2203 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2204 goto out_error_raw_syscalls;
2205
2206 if (trace->trace_syscalls)
2207 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2208
2209 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2210 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2211 if (pgfault_maj == NULL)
2212 goto out_error_mem;
2213 perf_evlist__add(evlist, pgfault_maj);
2214 }
2215
2216 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2217 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2218 if (pgfault_min == NULL)
2219 goto out_error_mem;
2220 perf_evlist__add(evlist, pgfault_min);
2221 }
2222
2223 if (trace->sched &&
2224 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2225 trace__sched_stat_runtime))
2226 goto out_error_sched_stat_runtime;
2227
2228 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2229 if (err < 0) {
2230 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2231 goto out_delete_evlist;
2232 }
2233
2234 err = trace__symbols_init(trace, evlist);
2235 if (err < 0) {
2236 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2237 goto out_delete_evlist;
2238 }
2239
2240 perf_evlist__config(evlist, &trace->opts, NULL);
2241
2242 if (callchain_param.enabled) {
2243 bool use_identifier = false;
2244
2245 if (trace->syscalls.events.sys_exit) {
2246 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2247 &trace->opts, &callchain_param);
2248 use_identifier = true;
2249 }
2250
2251 if (pgfault_maj) {
2252 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2253 use_identifier = true;
2254 }
2255
2256 if (pgfault_min) {
2257 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2258 use_identifier = true;
2259 }
2260
2261 if (use_identifier) {
2262 /*
2263 * Now we have evsels with different sample_ids, use
2264 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2265 * from a fixed position in each ring buffer record.
2266 *
2267 * As of this the changeset introducing this comment, this
2268 * isn't strictly needed, as the fields that can come before
2269 * PERF_SAMPLE_ID are all used, but we'll probably disable
2270 * some of those for things like copying the payload of
2271 * pointer syscall arguments, and for vfs_getname we don't
2272 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2273 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2274 */
2275 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2276 perf_evlist__reset_sample_bit(evlist, ID);
2277 }
2278 }
2279
2280 signal(SIGCHLD, sig_handler);
2281 signal(SIGINT, sig_handler);
2282
2283 if (forks) {
2284 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2285 argv, false, NULL);
2286 if (err < 0) {
2287 fprintf(trace->output, "Couldn't run the workload!\n");
2288 goto out_delete_evlist;
2289 }
2290 }
2291
2292 err = perf_evlist__open(evlist);
2293 if (err < 0)
2294 goto out_error_open;
2295
2296 err = bpf__apply_obj_config();
2297 if (err) {
2298 char errbuf[BUFSIZ];
2299
2300 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2301 pr_err("ERROR: Apply config to BPF failed: %s\n",
2302 errbuf);
2303 goto out_error_open;
2304 }
2305
2306 /*
2307 * Better not use !target__has_task() here because we need to cover the
2308 * case where no threads were specified in the command line, but a
2309 * workload was, and in that case we will fill in the thread_map when
2310 * we fork the workload in perf_evlist__prepare_workload.
2311 */
2312 if (trace->filter_pids.nr > 0)
2313 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2314 else if (thread_map__pid(evlist->threads, 0) == -1)
2315 err = perf_evlist__set_filter_pid(evlist, getpid());
2316
2317 if (err < 0)
2318 goto out_error_mem;
2319
2320 if (trace->ev_qualifier_ids.nr > 0) {
2321 err = trace__set_ev_qualifier_filter(trace);
2322 if (err < 0)
2323 goto out_errno;
2324
2325 pr_debug("event qualifier tracepoint filter: %s\n",
2326 trace->syscalls.events.sys_exit->filter);
2327 }
2328
2329 err = perf_evlist__apply_filters(evlist, &evsel);
2330 if (err < 0)
2331 goto out_error_apply_filters;
2332
2333 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2334 if (err < 0)
2335 goto out_error_mmap;
2336
2337 if (!target__none(&trace->opts.target))
2338 perf_evlist__enable(evlist);
2339
2340 if (forks)
2341 perf_evlist__start_workload(evlist);
2342
2343 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2344 evlist->threads->nr > 1 ||
2345 perf_evlist__first(evlist)->attr.inherit;
2346 again:
2347 before = trace->nr_events;
2348
2349 for (i = 0; i < evlist->nr_mmaps; i++) {
2350 union perf_event *event;
2351
2352 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2353 struct perf_sample sample;
2354
2355 ++trace->nr_events;
2356
2357 err = perf_evlist__parse_sample(evlist, event, &sample);
2358 if (err) {
2359 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2360 goto next_event;
2361 }
2362
2363 trace__handle_event(trace, event, &sample);
2364 next_event:
2365 perf_evlist__mmap_consume(evlist, i);
2366
2367 if (interrupted)
2368 goto out_disable;
2369
2370 if (done && !draining) {
2371 perf_evlist__disable(evlist);
2372 draining = true;
2373 }
2374 }
2375 }
2376
2377 if (trace->nr_events == before) {
2378 int timeout = done ? 100 : -1;
2379
2380 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2381 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2382 draining = true;
2383
2384 goto again;
2385 }
2386 } else {
2387 goto again;
2388 }
2389
2390 out_disable:
2391 thread__zput(trace->current);
2392
2393 perf_evlist__disable(evlist);
2394
2395 if (!err) {
2396 if (trace->summary)
2397 trace__fprintf_thread_summary(trace, trace->output);
2398
2399 if (trace->show_tool_stats) {
2400 fprintf(trace->output, "Stats:\n "
2401 " vfs_getname : %" PRIu64 "\n"
2402 " proc_getname: %" PRIu64 "\n",
2403 trace->stats.vfs_getname,
2404 trace->stats.proc_getname);
2405 }
2406 }
2407
2408 out_delete_evlist:
2409 perf_evlist__delete(evlist);
2410 trace->evlist = NULL;
2411 trace->live = false;
2412 return err;
2413 {
2414 char errbuf[BUFSIZ];
2415
2416 out_error_sched_stat_runtime:
2417 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2418 goto out_error;
2419
2420 out_error_raw_syscalls:
2421 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2422 goto out_error;
2423
2424 out_error_mmap:
2425 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2426 goto out_error;
2427
2428 out_error_open:
2429 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2430
2431 out_error:
2432 fprintf(trace->output, "%s\n", errbuf);
2433 goto out_delete_evlist;
2434
2435 out_error_apply_filters:
2436 fprintf(trace->output,
2437 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2438 evsel->filter, perf_evsel__name(evsel), errno,
2439 strerror_r(errno, errbuf, sizeof(errbuf)));
2440 goto out_delete_evlist;
2441 }
2442 out_error_mem:
2443 fprintf(trace->output, "Not enough memory to run!\n");
2444 goto out_delete_evlist;
2445
2446 out_errno:
2447 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2448 goto out_delete_evlist;
2449 }
2450
2451 static int trace__replay(struct trace *trace)
2452 {
2453 const struct perf_evsel_str_handler handlers[] = {
2454 { "probe:vfs_getname", trace__vfs_getname, },
2455 };
2456 struct perf_data_file file = {
2457 .path = input_name,
2458 .mode = PERF_DATA_MODE_READ,
2459 .force = trace->force,
2460 };
2461 struct perf_session *session;
2462 struct perf_evsel *evsel;
2463 int err = -1;
2464
2465 trace->tool.sample = trace__process_sample;
2466 trace->tool.mmap = perf_event__process_mmap;
2467 trace->tool.mmap2 = perf_event__process_mmap2;
2468 trace->tool.comm = perf_event__process_comm;
2469 trace->tool.exit = perf_event__process_exit;
2470 trace->tool.fork = perf_event__process_fork;
2471 trace->tool.attr = perf_event__process_attr;
2472 trace->tool.tracing_data = perf_event__process_tracing_data;
2473 trace->tool.build_id = perf_event__process_build_id;
2474
2475 trace->tool.ordered_events = true;
2476 trace->tool.ordering_requires_timestamps = true;
2477
2478 /* add tid to output */
2479 trace->multiple_threads = true;
2480
2481 session = perf_session__new(&file, false, &trace->tool);
2482 if (session == NULL)
2483 return -1;
2484
2485 if (symbol__init(&session->header.env) < 0)
2486 goto out;
2487
2488 trace->host = &session->machines.host;
2489
2490 err = perf_session__set_tracepoints_handlers(session, handlers);
2491 if (err)
2492 goto out;
2493
2494 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2495 "raw_syscalls:sys_enter");
2496 /* older kernels have syscalls tp versus raw_syscalls */
2497 if (evsel == NULL)
2498 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2499 "syscalls:sys_enter");
2500
2501 if (evsel &&
2502 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2503 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2504 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2505 goto out;
2506 }
2507
2508 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2509 "raw_syscalls:sys_exit");
2510 if (evsel == NULL)
2511 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2512 "syscalls:sys_exit");
2513 if (evsel &&
2514 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2515 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2516 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2517 goto out;
2518 }
2519
2520 evlist__for_each(session->evlist, evsel) {
2521 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2522 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2523 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2524 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2525 evsel->handler = trace__pgfault;
2526 }
2527
2528 err = parse_target_str(trace);
2529 if (err != 0)
2530 goto out;
2531
2532 setup_pager();
2533
2534 err = perf_session__process_events(session);
2535 if (err)
2536 pr_err("Failed to process events, error %d", err);
2537
2538 else if (trace->summary)
2539 trace__fprintf_thread_summary(trace, trace->output);
2540
2541 out:
2542 perf_session__delete(session);
2543
2544 return err;
2545 }
2546
2547 static size_t trace__fprintf_threads_header(FILE *fp)
2548 {
2549 size_t printed;
2550
2551 printed = fprintf(fp, "\n Summary of events:\n\n");
2552
2553 return printed;
2554 }
2555
2556 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2557 struct stats *stats;
2558 double msecs;
2559 int syscall;
2560 )
2561 {
2562 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2563 struct stats *stats = source->priv;
2564
2565 entry->syscall = source->i;
2566 entry->stats = stats;
2567 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2568 }
2569
2570 static size_t thread__dump_stats(struct thread_trace *ttrace,
2571 struct trace *trace, FILE *fp)
2572 {
2573 size_t printed = 0;
2574 struct syscall *sc;
2575 struct rb_node *nd;
2576 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2577
2578 if (syscall_stats == NULL)
2579 return 0;
2580
2581 printed += fprintf(fp, "\n");
2582
2583 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2584 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2585 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2586
2587 resort_rb__for_each(nd, syscall_stats) {
2588 struct stats *stats = syscall_stats_entry->stats;
2589 if (stats) {
2590 double min = (double)(stats->min) / NSEC_PER_MSEC;
2591 double max = (double)(stats->max) / NSEC_PER_MSEC;
2592 double avg = avg_stats(stats);
2593 double pct;
2594 u64 n = (u64) stats->n;
2595
2596 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2597 avg /= NSEC_PER_MSEC;
2598
2599 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2600 printed += fprintf(fp, " %-15s", sc->name);
2601 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2602 n, syscall_stats_entry->msecs, min, avg);
2603 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2604 }
2605 }
2606
2607 resort_rb__delete(syscall_stats);
2608 printed += fprintf(fp, "\n\n");
2609
2610 return printed;
2611 }
2612
2613 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2614 {
2615 size_t printed = 0;
2616 struct thread_trace *ttrace = thread__priv(thread);
2617 double ratio;
2618
2619 if (ttrace == NULL)
2620 return 0;
2621
2622 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2623
2624 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2625 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2626 printed += fprintf(fp, "%.1f%%", ratio);
2627 if (ttrace->pfmaj)
2628 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2629 if (ttrace->pfmin)
2630 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2631 if (trace->sched)
2632 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2633 else if (fputc('\n', fp) != EOF)
2634 ++printed;
2635
2636 printed += thread__dump_stats(ttrace, trace, fp);
2637
2638 return printed;
2639 }
2640
2641 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2642 {
2643 return ttrace ? ttrace->nr_events : 0;
2644 }
2645
2646 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2647 struct thread *thread;
2648 )
2649 {
2650 entry->thread = rb_entry(nd, struct thread, rb_node);
2651 }
2652
2653 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2654 {
2655 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2656 size_t printed = trace__fprintf_threads_header(fp);
2657 struct rb_node *nd;
2658
2659 if (threads == NULL) {
2660 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2661 return 0;
2662 }
2663
2664 resort_rb__for_each(nd, threads)
2665 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2666
2667 resort_rb__delete(threads);
2668
2669 return printed;
2670 }
2671
2672 static int trace__set_duration(const struct option *opt, const char *str,
2673 int unset __maybe_unused)
2674 {
2675 struct trace *trace = opt->value;
2676
2677 trace->duration_filter = atof(str);
2678 return 0;
2679 }
2680
2681 static int trace__set_filter_pids(const struct option *opt, const char *str,
2682 int unset __maybe_unused)
2683 {
2684 int ret = -1;
2685 size_t i;
2686 struct trace *trace = opt->value;
2687 /*
2688 * FIXME: introduce a intarray class, plain parse csv and create a
2689 * { int nr, int entries[] } struct...
2690 */
2691 struct intlist *list = intlist__new(str);
2692
2693 if (list == NULL)
2694 return -1;
2695
2696 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2697 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2698
2699 if (trace->filter_pids.entries == NULL)
2700 goto out;
2701
2702 trace->filter_pids.entries[0] = getpid();
2703
2704 for (i = 1; i < trace->filter_pids.nr; ++i)
2705 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2706
2707 intlist__delete(list);
2708 ret = 0;
2709 out:
2710 return ret;
2711 }
2712
2713 static int trace__open_output(struct trace *trace, const char *filename)
2714 {
2715 struct stat st;
2716
2717 if (!stat(filename, &st) && st.st_size) {
2718 char oldname[PATH_MAX];
2719
2720 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2721 unlink(oldname);
2722 rename(filename, oldname);
2723 }
2724
2725 trace->output = fopen(filename, "w");
2726
2727 return trace->output == NULL ? -errno : 0;
2728 }
2729
2730 static int parse_pagefaults(const struct option *opt, const char *str,
2731 int unset __maybe_unused)
2732 {
2733 int *trace_pgfaults = opt->value;
2734
2735 if (strcmp(str, "all") == 0)
2736 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2737 else if (strcmp(str, "maj") == 0)
2738 *trace_pgfaults |= TRACE_PFMAJ;
2739 else if (strcmp(str, "min") == 0)
2740 *trace_pgfaults |= TRACE_PFMIN;
2741 else
2742 return -1;
2743
2744 return 0;
2745 }
2746
2747 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2748 {
2749 struct perf_evsel *evsel;
2750
2751 evlist__for_each(evlist, evsel)
2752 evsel->handler = handler;
2753 }
2754
2755 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2756 {
2757 const char *trace_usage[] = {
2758 "perf trace [<options>] [<command>]",
2759 "perf trace [<options>] -- <command> [<options>]",
2760 "perf trace record [<options>] [<command>]",
2761 "perf trace record [<options>] -- <command> [<options>]",
2762 NULL
2763 };
2764 struct trace trace = {
2765 .syscalls = {
2766 . max = -1,
2767 },
2768 .opts = {
2769 .target = {
2770 .uid = UINT_MAX,
2771 .uses_mmap = true,
2772 },
2773 .user_freq = UINT_MAX,
2774 .user_interval = ULLONG_MAX,
2775 .no_buffering = true,
2776 .mmap_pages = UINT_MAX,
2777 .proc_map_timeout = 500,
2778 },
2779 .output = stderr,
2780 .show_comm = true,
2781 .trace_syscalls = true,
2782 .kernel_syscallchains = false,
2783 .max_stack = UINT_MAX,
2784 };
2785 const char *output_name = NULL;
2786 const char *ev_qualifier_str = NULL;
2787 const struct option trace_options[] = {
2788 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2789 "event selector. use 'perf list' to list available events",
2790 parse_events_option),
2791 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2792 "show the thread COMM next to its id"),
2793 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2794 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2795 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2796 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2797 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2798 "trace events on existing process id"),
2799 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2800 "trace events on existing thread id"),
2801 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2802 "pids to filter (by the kernel)", trace__set_filter_pids),
2803 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2804 "system-wide collection from all CPUs"),
2805 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2806 "list of cpus to monitor"),
2807 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2808 "child tasks do not inherit counters"),
2809 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2810 "number of mmap data pages",
2811 perf_evlist__parse_mmap_pages),
2812 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2813 "user to profile"),
2814 OPT_CALLBACK(0, "duration", &trace, "float",
2815 "show only events with duration > N.M ms",
2816 trace__set_duration),
2817 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2818 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2819 OPT_BOOLEAN('T', "time", &trace.full_time,
2820 "Show full timestamp, not time relative to first start"),
2821 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2822 "Show only syscall summary with statistics"),
2823 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2824 "Show all syscalls and summary with statistics"),
2825 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2826 "Trace pagefaults", parse_pagefaults, "maj"),
2827 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2828 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2829 OPT_CALLBACK(0, "call-graph", &trace.opts,
2830 "record_mode[,record_size]", record_callchain_help,
2831 &record_parse_callchain_opt),
2832 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2833 "Show the kernel callchains on the syscall exit path"),
2834 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2835 "Set the minimum stack depth when parsing the callchain, "
2836 "anything below the specified depth will be ignored."),
2837 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2838 "Set the maximum stack depth when parsing the callchain, "
2839 "anything beyond the specified depth will be ignored. "
2840 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2841 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2842 "per thread proc mmap processing timeout in ms"),
2843 OPT_END()
2844 };
2845 bool __maybe_unused max_stack_user_set = true;
2846 bool mmap_pages_user_set = true;
2847 const char * const trace_subcommands[] = { "record", NULL };
2848 int err;
2849 char bf[BUFSIZ];
2850
2851 signal(SIGSEGV, sighandler_dump_stack);
2852 signal(SIGFPE, sighandler_dump_stack);
2853
2854 trace.evlist = perf_evlist__new();
2855 trace.sctbl = syscalltbl__new();
2856
2857 if (trace.evlist == NULL || trace.sctbl == NULL) {
2858 pr_err("Not enough memory to run!\n");
2859 err = -ENOMEM;
2860 goto out;
2861 }
2862
2863 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2864 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2865
2866 err = bpf__setup_stdout(trace.evlist);
2867 if (err) {
2868 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2869 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2870 goto out;
2871 }
2872
2873 err = -1;
2874
2875 if (trace.trace_pgfaults) {
2876 trace.opts.sample_address = true;
2877 trace.opts.sample_time = true;
2878 }
2879
2880 if (trace.opts.mmap_pages == UINT_MAX)
2881 mmap_pages_user_set = false;
2882
2883 if (trace.max_stack == UINT_MAX) {
2884 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2885 max_stack_user_set = false;
2886 }
2887
2888 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2889 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2890 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2891 #endif
2892
2893 if (callchain_param.enabled) {
2894 if (!mmap_pages_user_set && geteuid() == 0)
2895 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2896
2897 symbol_conf.use_callchain = true;
2898 }
2899
2900 if (trace.evlist->nr_entries > 0)
2901 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2902
2903 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2904 return trace__record(&trace, argc-1, &argv[1]);
2905
2906 /* summary_only implies summary option, but don't overwrite summary if set */
2907 if (trace.summary_only)
2908 trace.summary = trace.summary_only;
2909
2910 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2911 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2912 pr_err("Please specify something to trace.\n");
2913 return -1;
2914 }
2915
2916 if (!trace.trace_syscalls && ev_qualifier_str) {
2917 pr_err("The -e option can't be used with --no-syscalls.\n");
2918 goto out;
2919 }
2920
2921 if (output_name != NULL) {
2922 err = trace__open_output(&trace, output_name);
2923 if (err < 0) {
2924 perror("failed to create output file");
2925 goto out;
2926 }
2927 }
2928
2929 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2930
2931 if (ev_qualifier_str != NULL) {
2932 const char *s = ev_qualifier_str;
2933 struct strlist_config slist_config = {
2934 .dirname = system_path(STRACE_GROUPS_DIR),
2935 };
2936
2937 trace.not_ev_qualifier = *s == '!';
2938 if (trace.not_ev_qualifier)
2939 ++s;
2940 trace.ev_qualifier = strlist__new(s, &slist_config);
2941 if (trace.ev_qualifier == NULL) {
2942 fputs("Not enough memory to parse event qualifier",
2943 trace.output);
2944 err = -ENOMEM;
2945 goto out_close;
2946 }
2947
2948 err = trace__validate_ev_qualifier(&trace);
2949 if (err)
2950 goto out_close;
2951 }
2952
2953 err = target__validate(&trace.opts.target);
2954 if (err) {
2955 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2956 fprintf(trace.output, "%s", bf);
2957 goto out_close;
2958 }
2959
2960 err = target__parse_uid(&trace.opts.target);
2961 if (err) {
2962 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2963 fprintf(trace.output, "%s", bf);
2964 goto out_close;
2965 }
2966
2967 if (!argc && target__none(&trace.opts.target))
2968 trace.opts.target.system_wide = true;
2969
2970 if (input_name)
2971 err = trace__replay(&trace);
2972 else
2973 err = trace__run(&trace, argc, argv);
2974
2975 out_close:
2976 if (output_name != NULL)
2977 fclose(trace.output);
2978 out:
2979 return err;
2980 }
This page took 0.114021 seconds and 6 git commands to generate.