7401de71d0974f53e923a42d85514046bd702d75
[deliverable/linux.git] / tools / perf / builtin-trace.c
1 /*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39 #include "rb_resort.h"
40
41 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
42 #include <stdlib.h>
43 #include <linux/err.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <sys/ptrace.h>
47 #include <linux/random.h>
48 #include <linux/stringify.h>
49
50 #ifndef O_CLOEXEC
51 # define O_CLOEXEC 02000000
52 #endif
53
54 struct trace {
55 struct perf_tool tool;
56 struct syscalltbl *sctbl;
57 struct {
58 int max;
59 struct syscall *table;
60 struct {
61 struct perf_evsel *sys_enter,
62 *sys_exit;
63 } events;
64 } syscalls;
65 struct record_opts opts;
66 struct perf_evlist *evlist;
67 struct machine *host;
68 struct thread *current;
69 u64 base_time;
70 FILE *output;
71 unsigned long nr_events;
72 struct strlist *ev_qualifier;
73 struct {
74 size_t nr;
75 int *entries;
76 } ev_qualifier_ids;
77 struct intlist *tid_list;
78 struct intlist *pid_list;
79 struct {
80 size_t nr;
81 pid_t *entries;
82 } filter_pids;
83 double duration_filter;
84 double runtime_ms;
85 struct {
86 u64 vfs_getname,
87 proc_getname;
88 } stats;
89 unsigned int max_stack;
90 unsigned int min_stack;
91 bool not_ev_qualifier;
92 bool live;
93 bool full_time;
94 bool sched;
95 bool multiple_threads;
96 bool summary;
97 bool summary_only;
98 bool show_comm;
99 bool show_tool_stats;
100 bool trace_syscalls;
101 bool kernel_syscallchains;
102 bool force;
103 bool vfs_getname;
104 int trace_pgfaults;
105 int open_id;
106 };
107
108 struct tp_field {
109 int offset;
110 union {
111 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
113 };
114 };
115
116 #define TP_UINT_FIELD(bits) \
117 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
118 { \
119 u##bits value; \
120 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121 return value; \
122 }
123
124 TP_UINT_FIELD(8);
125 TP_UINT_FIELD(16);
126 TP_UINT_FIELD(32);
127 TP_UINT_FIELD(64);
128
129 #define TP_UINT_FIELD__SWAPPED(bits) \
130 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
131 { \
132 u##bits value; \
133 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
134 return bswap_##bits(value);\
135 }
136
137 TP_UINT_FIELD__SWAPPED(16);
138 TP_UINT_FIELD__SWAPPED(32);
139 TP_UINT_FIELD__SWAPPED(64);
140
141 static int tp_field__init_uint(struct tp_field *field,
142 struct format_field *format_field,
143 bool needs_swap)
144 {
145 field->offset = format_field->offset;
146
147 switch (format_field->size) {
148 case 1:
149 field->integer = tp_field__u8;
150 break;
151 case 2:
152 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153 break;
154 case 4:
155 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156 break;
157 case 8:
158 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159 break;
160 default:
161 return -1;
162 }
163
164 return 0;
165 }
166
167 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
168 {
169 return sample->raw_data + field->offset;
170 }
171
172 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
173 {
174 field->offset = format_field->offset;
175 field->pointer = tp_field__ptr;
176 return 0;
177 }
178
179 struct syscall_tp {
180 struct tp_field id;
181 union {
182 struct tp_field args, ret;
183 };
184 };
185
186 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187 struct tp_field *field,
188 const char *name)
189 {
190 struct format_field *format_field = perf_evsel__field(evsel, name);
191
192 if (format_field == NULL)
193 return -1;
194
195 return tp_field__init_uint(field, format_field, evsel->needs_swap);
196 }
197
198 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199 ({ struct syscall_tp *sc = evsel->priv;\
200 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
201
202 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203 struct tp_field *field,
204 const char *name)
205 {
206 struct format_field *format_field = perf_evsel__field(evsel, name);
207
208 if (format_field == NULL)
209 return -1;
210
211 return tp_field__init_ptr(field, format_field);
212 }
213
214 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215 ({ struct syscall_tp *sc = evsel->priv;\
216 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
217
218 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
219 {
220 zfree(&evsel->priv);
221 perf_evsel__delete(evsel);
222 }
223
224 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
225 {
226 evsel->priv = malloc(sizeof(struct syscall_tp));
227 if (evsel->priv != NULL) {
228 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229 goto out_delete;
230
231 evsel->handler = handler;
232 return 0;
233 }
234
235 return -ENOMEM;
236
237 out_delete:
238 zfree(&evsel->priv);
239 return -ENOENT;
240 }
241
242 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
243 {
244 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
245
246 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
247 if (IS_ERR(evsel))
248 evsel = perf_evsel__newtp("syscalls", direction);
249
250 if (IS_ERR(evsel))
251 return NULL;
252
253 if (perf_evsel__init_syscall_tp(evsel, handler))
254 goto out_delete;
255
256 return evsel;
257
258 out_delete:
259 perf_evsel__delete_priv(evsel);
260 return NULL;
261 }
262
263 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
264 ({ struct syscall_tp *fields = evsel->priv; \
265 fields->name.integer(&fields->name, sample); })
266
267 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
268 ({ struct syscall_tp *fields = evsel->priv; \
269 fields->name.pointer(&fields->name, sample); })
270
271 struct syscall_arg {
272 unsigned long val;
273 struct thread *thread;
274 struct trace *trace;
275 void *parm;
276 u8 idx;
277 u8 mask;
278 };
279
280 struct strarray {
281 int offset;
282 int nr_entries;
283 const char **entries;
284 };
285
286 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
287 .nr_entries = ARRAY_SIZE(array), \
288 .entries = array, \
289 }
290
291 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
292 .offset = off, \
293 .nr_entries = ARRAY_SIZE(array), \
294 .entries = array, \
295 }
296
297 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
298 const char *intfmt,
299 struct syscall_arg *arg)
300 {
301 struct strarray *sa = arg->parm;
302 int idx = arg->val - sa->offset;
303
304 if (idx < 0 || idx >= sa->nr_entries)
305 return scnprintf(bf, size, intfmt, arg->val);
306
307 return scnprintf(bf, size, "%s", sa->entries[idx]);
308 }
309
310 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
311 struct syscall_arg *arg)
312 {
313 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
314 }
315
316 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
317
318 #if defined(__i386__) || defined(__x86_64__)
319 /*
320 * FIXME: Make this available to all arches as soon as the ioctl beautifier
321 * gets rewritten to support all arches.
322 */
323 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
324 struct syscall_arg *arg)
325 {
326 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
327 }
328
329 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
330 #endif /* defined(__i386__) || defined(__x86_64__) */
331
332 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
333 struct syscall_arg *arg);
334
335 #define SCA_FD syscall_arg__scnprintf_fd
336
337 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
338 struct syscall_arg *arg)
339 {
340 int fd = arg->val;
341
342 if (fd == AT_FDCWD)
343 return scnprintf(bf, size, "CWD");
344
345 return syscall_arg__scnprintf_fd(bf, size, arg);
346 }
347
348 #define SCA_FDAT syscall_arg__scnprintf_fd_at
349
350 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
351 struct syscall_arg *arg);
352
353 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
354
355 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
356 struct syscall_arg *arg)
357 {
358 return scnprintf(bf, size, "%#lx", arg->val);
359 }
360
361 #define SCA_HEX syscall_arg__scnprintf_hex
362
363 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
364 struct syscall_arg *arg)
365 {
366 return scnprintf(bf, size, "%d", arg->val);
367 }
368
369 #define SCA_INT syscall_arg__scnprintf_int
370
371 static const char *bpf_cmd[] = {
372 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
373 "MAP_GET_NEXT_KEY", "PROG_LOAD",
374 };
375 static DEFINE_STRARRAY(bpf_cmd);
376
377 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
378 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
379
380 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
381 static DEFINE_STRARRAY(itimers);
382
383 static const char *keyctl_options[] = {
384 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
385 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
386 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
387 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
388 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
389 };
390 static DEFINE_STRARRAY(keyctl_options);
391
392 static const char *whences[] = { "SET", "CUR", "END",
393 #ifdef SEEK_DATA
394 "DATA",
395 #endif
396 #ifdef SEEK_HOLE
397 "HOLE",
398 #endif
399 };
400 static DEFINE_STRARRAY(whences);
401
402 static const char *fcntl_cmds[] = {
403 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
404 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
405 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
406 "F_GETOWNER_UIDS",
407 };
408 static DEFINE_STRARRAY(fcntl_cmds);
409
410 static const char *rlimit_resources[] = {
411 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
412 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
413 "RTTIME",
414 };
415 static DEFINE_STRARRAY(rlimit_resources);
416
417 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
418 static DEFINE_STRARRAY(sighow);
419
420 static const char *clockid[] = {
421 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
422 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
423 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
424 };
425 static DEFINE_STRARRAY(clockid);
426
427 static const char *socket_families[] = {
428 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
429 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
430 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
431 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
432 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
433 "ALG", "NFC", "VSOCK",
434 };
435 static DEFINE_STRARRAY(socket_families);
436
437 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
438 struct syscall_arg *arg)
439 {
440 size_t printed = 0;
441 int mode = arg->val;
442
443 if (mode == F_OK) /* 0 */
444 return scnprintf(bf, size, "F");
445 #define P_MODE(n) \
446 if (mode & n##_OK) { \
447 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
448 mode &= ~n##_OK; \
449 }
450
451 P_MODE(R);
452 P_MODE(W);
453 P_MODE(X);
454 #undef P_MODE
455
456 if (mode)
457 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
458
459 return printed;
460 }
461
462 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
463
464 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
465 struct syscall_arg *arg);
466
467 #define SCA_FILENAME syscall_arg__scnprintf_filename
468
469 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
470 struct syscall_arg *arg)
471 {
472 int printed = 0, flags = arg->val;
473
474 #define P_FLAG(n) \
475 if (flags & O_##n) { \
476 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
477 flags &= ~O_##n; \
478 }
479
480 P_FLAG(CLOEXEC);
481 P_FLAG(NONBLOCK);
482 #undef P_FLAG
483
484 if (flags)
485 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
486
487 return printed;
488 }
489
490 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
491
492 #if defined(__i386__) || defined(__x86_64__)
493 /*
494 * FIXME: Make this available to all arches.
495 */
496 #define TCGETS 0x5401
497
498 static const char *tioctls[] = {
499 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
500 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
501 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
502 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
503 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
504 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
505 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
506 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
507 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
508 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
509 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
510 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
511 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
512 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
513 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
514 };
515
516 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
517 #endif /* defined(__i386__) || defined(__x86_64__) */
518
519 #ifndef GRND_NONBLOCK
520 #define GRND_NONBLOCK 0x0001
521 #endif
522 #ifndef GRND_RANDOM
523 #define GRND_RANDOM 0x0002
524 #endif
525
526 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
527 struct syscall_arg *arg)
528 {
529 int printed = 0, flags = arg->val;
530
531 #define P_FLAG(n) \
532 if (flags & GRND_##n) { \
533 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
534 flags &= ~GRND_##n; \
535 }
536
537 P_FLAG(RANDOM);
538 P_FLAG(NONBLOCK);
539 #undef P_FLAG
540
541 if (flags)
542 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
543
544 return printed;
545 }
546
547 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
548
549 #define STRARRAY(arg, name, array) \
550 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
551 .arg_parm = { [arg] = &strarray__##array, }
552
553 #include "trace/beauty/eventfd.c"
554 #include "trace/beauty/flock.c"
555 #include "trace/beauty/futex_op.c"
556 #include "trace/beauty/mmap.c"
557 #include "trace/beauty/mode_t.c"
558 #include "trace/beauty/msg_flags.c"
559 #include "trace/beauty/open_flags.c"
560 #include "trace/beauty/perf_event_open.c"
561 #include "trace/beauty/pid.c"
562 #include "trace/beauty/sched_policy.c"
563 #include "trace/beauty/seccomp.c"
564 #include "trace/beauty/signum.c"
565 #include "trace/beauty/socket_type.c"
566 #include "trace/beauty/waitid_options.c"
567
568 static struct syscall_fmt {
569 const char *name;
570 const char *alias;
571 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
572 void *arg_parm[6];
573 bool errmsg;
574 bool errpid;
575 bool timeout;
576 bool hexret;
577 } syscall_fmts[] = {
578 { .name = "access", .errmsg = true,
579 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
580 [1] = SCA_ACCMODE, /* mode */ }, },
581 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
582 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
583 { .name = "brk", .hexret = true,
584 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
585 { .name = "chdir", .errmsg = true,
586 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
587 { .name = "chmod", .errmsg = true,
588 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
589 { .name = "chroot", .errmsg = true,
590 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
591 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
592 { .name = "clone", .errpid = true, },
593 { .name = "close", .errmsg = true,
594 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
595 { .name = "connect", .errmsg = true, },
596 { .name = "creat", .errmsg = true,
597 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
598 { .name = "dup", .errmsg = true,
599 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
600 { .name = "dup2", .errmsg = true,
601 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
602 { .name = "dup3", .errmsg = true,
603 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
604 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
605 { .name = "eventfd2", .errmsg = true,
606 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
607 { .name = "faccessat", .errmsg = true,
608 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
609 [1] = SCA_FILENAME, /* filename */ }, },
610 { .name = "fadvise64", .errmsg = true,
611 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
612 { .name = "fallocate", .errmsg = true,
613 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
614 { .name = "fchdir", .errmsg = true,
615 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
616 { .name = "fchmod", .errmsg = true,
617 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
618 { .name = "fchmodat", .errmsg = true,
619 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
620 [1] = SCA_FILENAME, /* filename */ }, },
621 { .name = "fchown", .errmsg = true,
622 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
623 { .name = "fchownat", .errmsg = true,
624 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
625 [1] = SCA_FILENAME, /* filename */ }, },
626 { .name = "fcntl", .errmsg = true,
627 .arg_scnprintf = { [0] = SCA_FD, /* fd */
628 [1] = SCA_STRARRAY, /* cmd */ },
629 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
630 { .name = "fdatasync", .errmsg = true,
631 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
632 { .name = "flock", .errmsg = true,
633 .arg_scnprintf = { [0] = SCA_FD, /* fd */
634 [1] = SCA_FLOCK, /* cmd */ }, },
635 { .name = "fsetxattr", .errmsg = true,
636 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
637 { .name = "fstat", .errmsg = true, .alias = "newfstat",
638 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
639 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
640 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
641 [1] = SCA_FILENAME, /* filename */ }, },
642 { .name = "fstatfs", .errmsg = true,
643 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
644 { .name = "fsync", .errmsg = true,
645 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
646 { .name = "ftruncate", .errmsg = true,
647 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
648 { .name = "futex", .errmsg = true,
649 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
650 { .name = "futimesat", .errmsg = true,
651 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
652 [1] = SCA_FILENAME, /* filename */ }, },
653 { .name = "getdents", .errmsg = true,
654 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
655 { .name = "getdents64", .errmsg = true,
656 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
657 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
658 { .name = "getpid", .errpid = true, },
659 { .name = "getpgid", .errpid = true, },
660 { .name = "getppid", .errpid = true, },
661 { .name = "getrandom", .errmsg = true,
662 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
663 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
664 { .name = "getxattr", .errmsg = true,
665 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
666 { .name = "inotify_add_watch", .errmsg = true,
667 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
668 { .name = "ioctl", .errmsg = true,
669 .arg_scnprintf = { [0] = SCA_FD, /* fd */
670 #if defined(__i386__) || defined(__x86_64__)
671 /*
672 * FIXME: Make this available to all arches.
673 */
674 [1] = SCA_STRHEXARRAY, /* cmd */
675 [2] = SCA_HEX, /* arg */ },
676 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
677 #else
678 [2] = SCA_HEX, /* arg */ }, },
679 #endif
680 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
681 { .name = "kill", .errmsg = true,
682 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
683 { .name = "lchown", .errmsg = true,
684 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
685 { .name = "lgetxattr", .errmsg = true,
686 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
687 { .name = "linkat", .errmsg = true,
688 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
689 { .name = "listxattr", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
691 { .name = "llistxattr", .errmsg = true,
692 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
693 { .name = "lremovexattr", .errmsg = true,
694 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
695 { .name = "lseek", .errmsg = true,
696 .arg_scnprintf = { [0] = SCA_FD, /* fd */
697 [2] = SCA_STRARRAY, /* whence */ },
698 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
699 { .name = "lsetxattr", .errmsg = true,
700 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
701 { .name = "lstat", .errmsg = true, .alias = "newlstat",
702 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
703 { .name = "lsxattr", .errmsg = true,
704 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
705 { .name = "madvise", .errmsg = true,
706 .arg_scnprintf = { [0] = SCA_HEX, /* start */
707 [2] = SCA_MADV_BHV, /* behavior */ }, },
708 { .name = "mkdir", .errmsg = true,
709 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
710 { .name = "mkdirat", .errmsg = true,
711 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
712 [1] = SCA_FILENAME, /* pathname */ }, },
713 { .name = "mknod", .errmsg = true,
714 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
715 { .name = "mknodat", .errmsg = true,
716 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
717 [1] = SCA_FILENAME, /* filename */ }, },
718 { .name = "mlock", .errmsg = true,
719 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
720 { .name = "mlockall", .errmsg = true,
721 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
722 { .name = "mmap", .hexret = true,
723 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
724 [2] = SCA_MMAP_PROT, /* prot */
725 [3] = SCA_MMAP_FLAGS, /* flags */
726 [4] = SCA_FD, /* fd */ }, },
727 { .name = "mprotect", .errmsg = true,
728 .arg_scnprintf = { [0] = SCA_HEX, /* start */
729 [2] = SCA_MMAP_PROT, /* prot */ }, },
730 { .name = "mq_unlink", .errmsg = true,
731 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
732 { .name = "mremap", .hexret = true,
733 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
734 [3] = SCA_MREMAP_FLAGS, /* flags */
735 [4] = SCA_HEX, /* new_addr */ }, },
736 { .name = "munlock", .errmsg = true,
737 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
738 { .name = "munmap", .errmsg = true,
739 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
740 { .name = "name_to_handle_at", .errmsg = true,
741 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
742 { .name = "newfstatat", .errmsg = true,
743 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
744 [1] = SCA_FILENAME, /* filename */ }, },
745 { .name = "open", .errmsg = true,
746 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
747 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
748 { .name = "open_by_handle_at", .errmsg = true,
749 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
750 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
751 { .name = "openat", .errmsg = true,
752 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
753 [1] = SCA_FILENAME, /* filename */
754 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
755 { .name = "perf_event_open", .errmsg = true,
756 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
757 [3] = SCA_FD, /* group_fd */
758 [4] = SCA_PERF_FLAGS, /* flags */ }, },
759 { .name = "pipe2", .errmsg = true,
760 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
761 { .name = "poll", .errmsg = true, .timeout = true, },
762 { .name = "ppoll", .errmsg = true, .timeout = true, },
763 { .name = "pread", .errmsg = true, .alias = "pread64",
764 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
765 { .name = "preadv", .errmsg = true, .alias = "pread",
766 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
767 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
768 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
769 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
770 { .name = "pwritev", .errmsg = true,
771 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
772 { .name = "read", .errmsg = true,
773 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
774 { .name = "readlink", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
776 { .name = "readlinkat", .errmsg = true,
777 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
778 [1] = SCA_FILENAME, /* pathname */ }, },
779 { .name = "readv", .errmsg = true,
780 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
781 { .name = "recvfrom", .errmsg = true,
782 .arg_scnprintf = { [0] = SCA_FD, /* fd */
783 [3] = SCA_MSG_FLAGS, /* flags */ }, },
784 { .name = "recvmmsg", .errmsg = true,
785 .arg_scnprintf = { [0] = SCA_FD, /* fd */
786 [3] = SCA_MSG_FLAGS, /* flags */ }, },
787 { .name = "recvmsg", .errmsg = true,
788 .arg_scnprintf = { [0] = SCA_FD, /* fd */
789 [2] = SCA_MSG_FLAGS, /* flags */ }, },
790 { .name = "removexattr", .errmsg = true,
791 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
792 { .name = "renameat", .errmsg = true,
793 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
794 { .name = "rmdir", .errmsg = true,
795 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
796 { .name = "rt_sigaction", .errmsg = true,
797 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
798 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
799 { .name = "rt_sigqueueinfo", .errmsg = true,
800 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
801 { .name = "rt_tgsigqueueinfo", .errmsg = true,
802 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
803 { .name = "sched_setscheduler", .errmsg = true,
804 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
805 { .name = "seccomp", .errmsg = true,
806 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
807 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
808 { .name = "select", .errmsg = true, .timeout = true, },
809 { .name = "sendmmsg", .errmsg = true,
810 .arg_scnprintf = { [0] = SCA_FD, /* fd */
811 [3] = SCA_MSG_FLAGS, /* flags */ }, },
812 { .name = "sendmsg", .errmsg = true,
813 .arg_scnprintf = { [0] = SCA_FD, /* fd */
814 [2] = SCA_MSG_FLAGS, /* flags */ }, },
815 { .name = "sendto", .errmsg = true,
816 .arg_scnprintf = { [0] = SCA_FD, /* fd */
817 [3] = SCA_MSG_FLAGS, /* flags */ }, },
818 { .name = "set_tid_address", .errpid = true, },
819 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
820 { .name = "setpgid", .errmsg = true, },
821 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
822 { .name = "setxattr", .errmsg = true,
823 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
824 { .name = "shutdown", .errmsg = true,
825 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
826 { .name = "socket", .errmsg = true,
827 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
828 [1] = SCA_SK_TYPE, /* type */ },
829 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
830 { .name = "socketpair", .errmsg = true,
831 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
832 [1] = SCA_SK_TYPE, /* type */ },
833 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
834 { .name = "stat", .errmsg = true, .alias = "newstat",
835 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
836 { .name = "statfs", .errmsg = true,
837 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
838 { .name = "swapoff", .errmsg = true,
839 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
840 { .name = "swapon", .errmsg = true,
841 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
842 { .name = "symlinkat", .errmsg = true,
843 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
844 { .name = "tgkill", .errmsg = true,
845 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
846 { .name = "tkill", .errmsg = true,
847 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
848 { .name = "truncate", .errmsg = true,
849 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
850 { .name = "uname", .errmsg = true, .alias = "newuname", },
851 { .name = "unlinkat", .errmsg = true,
852 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
853 [1] = SCA_FILENAME, /* pathname */ }, },
854 { .name = "utime", .errmsg = true,
855 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
856 { .name = "utimensat", .errmsg = true,
857 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
858 [1] = SCA_FILENAME, /* filename */ }, },
859 { .name = "utimes", .errmsg = true,
860 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
861 { .name = "vmsplice", .errmsg = true,
862 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
863 { .name = "wait4", .errpid = true,
864 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
865 { .name = "waitid", .errpid = true,
866 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
867 { .name = "write", .errmsg = true,
868 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
869 { .name = "writev", .errmsg = true,
870 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
871 };
872
873 static int syscall_fmt__cmp(const void *name, const void *fmtp)
874 {
875 const struct syscall_fmt *fmt = fmtp;
876 return strcmp(name, fmt->name);
877 }
878
879 static struct syscall_fmt *syscall_fmt__find(const char *name)
880 {
881 const int nmemb = ARRAY_SIZE(syscall_fmts);
882 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
883 }
884
885 struct syscall {
886 struct event_format *tp_format;
887 int nr_args;
888 struct format_field *args;
889 const char *name;
890 bool is_exit;
891 struct syscall_fmt *fmt;
892 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
893 void **arg_parm;
894 };
895
896 static size_t fprintf_duration(unsigned long t, FILE *fp)
897 {
898 double duration = (double)t / NSEC_PER_MSEC;
899 size_t printed = fprintf(fp, "(");
900
901 if (duration >= 1.0)
902 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
903 else if (duration >= 0.01)
904 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
905 else
906 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
907 return printed + fprintf(fp, "): ");
908 }
909
910 /**
911 * filename.ptr: The filename char pointer that will be vfs_getname'd
912 * filename.entry_str_pos: Where to insert the string translated from
913 * filename.ptr by the vfs_getname tracepoint/kprobe.
914 */
915 struct thread_trace {
916 u64 entry_time;
917 u64 exit_time;
918 bool entry_pending;
919 unsigned long nr_events;
920 unsigned long pfmaj, pfmin;
921 char *entry_str;
922 double runtime_ms;
923 struct {
924 unsigned long ptr;
925 short int entry_str_pos;
926 bool pending_open;
927 unsigned int namelen;
928 char *name;
929 } filename;
930 struct {
931 int max;
932 char **table;
933 } paths;
934
935 struct intlist *syscall_stats;
936 };
937
938 static struct thread_trace *thread_trace__new(void)
939 {
940 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
941
942 if (ttrace)
943 ttrace->paths.max = -1;
944
945 ttrace->syscall_stats = intlist__new(NULL);
946
947 return ttrace;
948 }
949
950 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
951 {
952 struct thread_trace *ttrace;
953
954 if (thread == NULL)
955 goto fail;
956
957 if (thread__priv(thread) == NULL)
958 thread__set_priv(thread, thread_trace__new());
959
960 if (thread__priv(thread) == NULL)
961 goto fail;
962
963 ttrace = thread__priv(thread);
964 ++ttrace->nr_events;
965
966 return ttrace;
967 fail:
968 color_fprintf(fp, PERF_COLOR_RED,
969 "WARNING: not enough memory, dropping samples!\n");
970 return NULL;
971 }
972
973 #define TRACE_PFMAJ (1 << 0)
974 #define TRACE_PFMIN (1 << 1)
975
976 static const size_t trace__entry_str_size = 2048;
977
978 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
979 {
980 struct thread_trace *ttrace = thread__priv(thread);
981
982 if (fd > ttrace->paths.max) {
983 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
984
985 if (npath == NULL)
986 return -1;
987
988 if (ttrace->paths.max != -1) {
989 memset(npath + ttrace->paths.max + 1, 0,
990 (fd - ttrace->paths.max) * sizeof(char *));
991 } else {
992 memset(npath, 0, (fd + 1) * sizeof(char *));
993 }
994
995 ttrace->paths.table = npath;
996 ttrace->paths.max = fd;
997 }
998
999 ttrace->paths.table[fd] = strdup(pathname);
1000
1001 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1002 }
1003
1004 static int thread__read_fd_path(struct thread *thread, int fd)
1005 {
1006 char linkname[PATH_MAX], pathname[PATH_MAX];
1007 struct stat st;
1008 int ret;
1009
1010 if (thread->pid_ == thread->tid) {
1011 scnprintf(linkname, sizeof(linkname),
1012 "/proc/%d/fd/%d", thread->pid_, fd);
1013 } else {
1014 scnprintf(linkname, sizeof(linkname),
1015 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1016 }
1017
1018 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1019 return -1;
1020
1021 ret = readlink(linkname, pathname, sizeof(pathname));
1022
1023 if (ret < 0 || ret > st.st_size)
1024 return -1;
1025
1026 pathname[ret] = '\0';
1027 return trace__set_fd_pathname(thread, fd, pathname);
1028 }
1029
1030 static const char *thread__fd_path(struct thread *thread, int fd,
1031 struct trace *trace)
1032 {
1033 struct thread_trace *ttrace = thread__priv(thread);
1034
1035 if (ttrace == NULL)
1036 return NULL;
1037
1038 if (fd < 0)
1039 return NULL;
1040
1041 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1042 if (!trace->live)
1043 return NULL;
1044 ++trace->stats.proc_getname;
1045 if (thread__read_fd_path(thread, fd))
1046 return NULL;
1047 }
1048
1049 return ttrace->paths.table[fd];
1050 }
1051
1052 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1053 struct syscall_arg *arg)
1054 {
1055 int fd = arg->val;
1056 size_t printed = scnprintf(bf, size, "%d", fd);
1057 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1058
1059 if (path)
1060 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1061
1062 return printed;
1063 }
1064
1065 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1066 struct syscall_arg *arg)
1067 {
1068 int fd = arg->val;
1069 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1070 struct thread_trace *ttrace = thread__priv(arg->thread);
1071
1072 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1073 zfree(&ttrace->paths.table[fd]);
1074
1075 return printed;
1076 }
1077
1078 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1079 unsigned long ptr)
1080 {
1081 struct thread_trace *ttrace = thread__priv(thread);
1082
1083 ttrace->filename.ptr = ptr;
1084 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1085 }
1086
1087 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1088 struct syscall_arg *arg)
1089 {
1090 unsigned long ptr = arg->val;
1091
1092 if (!arg->trace->vfs_getname)
1093 return scnprintf(bf, size, "%#x", ptr);
1094
1095 thread__set_filename_pos(arg->thread, bf, ptr);
1096 return 0;
1097 }
1098
1099 static bool trace__filter_duration(struct trace *trace, double t)
1100 {
1101 return t < (trace->duration_filter * NSEC_PER_MSEC);
1102 }
1103
1104 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1105 {
1106 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1107
1108 return fprintf(fp, "%10.3f ", ts);
1109 }
1110
1111 static bool done = false;
1112 static bool interrupted = false;
1113
1114 static void sig_handler(int sig)
1115 {
1116 done = true;
1117 interrupted = sig == SIGINT;
1118 }
1119
1120 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1121 u64 duration, u64 tstamp, FILE *fp)
1122 {
1123 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1124 printed += fprintf_duration(duration, fp);
1125
1126 if (trace->multiple_threads) {
1127 if (trace->show_comm)
1128 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1129 printed += fprintf(fp, "%d ", thread->tid);
1130 }
1131
1132 return printed;
1133 }
1134
1135 static int trace__process_event(struct trace *trace, struct machine *machine,
1136 union perf_event *event, struct perf_sample *sample)
1137 {
1138 int ret = 0;
1139
1140 switch (event->header.type) {
1141 case PERF_RECORD_LOST:
1142 color_fprintf(trace->output, PERF_COLOR_RED,
1143 "LOST %" PRIu64 " events!\n", event->lost.lost);
1144 ret = machine__process_lost_event(machine, event, sample);
1145 break;
1146 default:
1147 ret = machine__process_event(machine, event, sample);
1148 break;
1149 }
1150
1151 return ret;
1152 }
1153
1154 static int trace__tool_process(struct perf_tool *tool,
1155 union perf_event *event,
1156 struct perf_sample *sample,
1157 struct machine *machine)
1158 {
1159 struct trace *trace = container_of(tool, struct trace, tool);
1160 return trace__process_event(trace, machine, event, sample);
1161 }
1162
1163 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1164 {
1165 struct machine *machine = vmachine;
1166
1167 if (machine->kptr_restrict_warned)
1168 return NULL;
1169
1170 if (symbol_conf.kptr_restrict) {
1171 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1172 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1173 "Kernel samples will not be resolved.\n");
1174 machine->kptr_restrict_warned = true;
1175 return NULL;
1176 }
1177
1178 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1179 }
1180
1181 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1182 {
1183 int err = symbol__init(NULL);
1184
1185 if (err)
1186 return err;
1187
1188 trace->host = machine__new_host();
1189 if (trace->host == NULL)
1190 return -ENOMEM;
1191
1192 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1193 return -errno;
1194
1195 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1196 evlist->threads, trace__tool_process, false,
1197 trace->opts.proc_map_timeout);
1198 if (err)
1199 symbol__exit();
1200
1201 return err;
1202 }
1203
1204 static int syscall__set_arg_fmts(struct syscall *sc)
1205 {
1206 struct format_field *field;
1207 int idx = 0;
1208
1209 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1210 if (sc->arg_scnprintf == NULL)
1211 return -1;
1212
1213 if (sc->fmt)
1214 sc->arg_parm = sc->fmt->arg_parm;
1215
1216 for (field = sc->args; field; field = field->next) {
1217 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1218 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1219 else if (field->flags & FIELD_IS_POINTER)
1220 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1221 else if (strcmp(field->type, "pid_t") == 0)
1222 sc->arg_scnprintf[idx] = SCA_PID;
1223 else if (strcmp(field->type, "umode_t") == 0)
1224 sc->arg_scnprintf[idx] = SCA_MODE_T;
1225 ++idx;
1226 }
1227
1228 return 0;
1229 }
1230
1231 static int trace__read_syscall_info(struct trace *trace, int id)
1232 {
1233 char tp_name[128];
1234 struct syscall *sc;
1235 const char *name = syscalltbl__name(trace->sctbl, id);
1236
1237 if (name == NULL)
1238 return -1;
1239
1240 if (id > trace->syscalls.max) {
1241 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1242
1243 if (nsyscalls == NULL)
1244 return -1;
1245
1246 if (trace->syscalls.max != -1) {
1247 memset(nsyscalls + trace->syscalls.max + 1, 0,
1248 (id - trace->syscalls.max) * sizeof(*sc));
1249 } else {
1250 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1251 }
1252
1253 trace->syscalls.table = nsyscalls;
1254 trace->syscalls.max = id;
1255 }
1256
1257 sc = trace->syscalls.table + id;
1258 sc->name = name;
1259
1260 sc->fmt = syscall_fmt__find(sc->name);
1261
1262 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1263 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1264
1265 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1266 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1267 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1268 }
1269
1270 if (IS_ERR(sc->tp_format))
1271 return -1;
1272
1273 sc->args = sc->tp_format->format.fields;
1274 sc->nr_args = sc->tp_format->format.nr_fields;
1275 /*
1276 * We need to check and discard the first variable '__syscall_nr'
1277 * or 'nr' that mean the syscall number. It is needless here.
1278 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1279 */
1280 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1281 sc->args = sc->args->next;
1282 --sc->nr_args;
1283 }
1284
1285 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1286
1287 return syscall__set_arg_fmts(sc);
1288 }
1289
1290 static int trace__validate_ev_qualifier(struct trace *trace)
1291 {
1292 int err = 0, i;
1293 struct str_node *pos;
1294
1295 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1296 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1297 sizeof(trace->ev_qualifier_ids.entries[0]));
1298
1299 if (trace->ev_qualifier_ids.entries == NULL) {
1300 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1301 trace->output);
1302 err = -EINVAL;
1303 goto out;
1304 }
1305
1306 i = 0;
1307
1308 strlist__for_each(pos, trace->ev_qualifier) {
1309 const char *sc = pos->s;
1310 int id = syscalltbl__id(trace->sctbl, sc);
1311
1312 if (id < 0) {
1313 if (err == 0) {
1314 fputs("Error:\tInvalid syscall ", trace->output);
1315 err = -EINVAL;
1316 } else {
1317 fputs(", ", trace->output);
1318 }
1319
1320 fputs(sc, trace->output);
1321 }
1322
1323 trace->ev_qualifier_ids.entries[i++] = id;
1324 }
1325
1326 if (err < 0) {
1327 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1328 "\nHint:\tand: 'man syscalls'\n", trace->output);
1329 zfree(&trace->ev_qualifier_ids.entries);
1330 trace->ev_qualifier_ids.nr = 0;
1331 }
1332 out:
1333 return err;
1334 }
1335
1336 /*
1337 * args is to be interpreted as a series of longs but we need to handle
1338 * 8-byte unaligned accesses. args points to raw_data within the event
1339 * and raw_data is guaranteed to be 8-byte unaligned because it is
1340 * preceded by raw_size which is a u32. So we need to copy args to a temp
1341 * variable to read it. Most notably this avoids extended load instructions
1342 * on unaligned addresses
1343 */
1344
1345 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1346 unsigned char *args, struct trace *trace,
1347 struct thread *thread)
1348 {
1349 size_t printed = 0;
1350 unsigned char *p;
1351 unsigned long val;
1352
1353 if (sc->args != NULL) {
1354 struct format_field *field;
1355 u8 bit = 1;
1356 struct syscall_arg arg = {
1357 .idx = 0,
1358 .mask = 0,
1359 .trace = trace,
1360 .thread = thread,
1361 };
1362
1363 for (field = sc->args; field;
1364 field = field->next, ++arg.idx, bit <<= 1) {
1365 if (arg.mask & bit)
1366 continue;
1367
1368 /* special care for unaligned accesses */
1369 p = args + sizeof(unsigned long) * arg.idx;
1370 memcpy(&val, p, sizeof(val));
1371
1372 /*
1373 * Suppress this argument if its value is zero and
1374 * and we don't have a string associated in an
1375 * strarray for it.
1376 */
1377 if (val == 0 &&
1378 !(sc->arg_scnprintf &&
1379 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1380 sc->arg_parm[arg.idx]))
1381 continue;
1382
1383 printed += scnprintf(bf + printed, size - printed,
1384 "%s%s: ", printed ? ", " : "", field->name);
1385 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1386 arg.val = val;
1387 if (sc->arg_parm)
1388 arg.parm = sc->arg_parm[arg.idx];
1389 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1390 size - printed, &arg);
1391 } else {
1392 printed += scnprintf(bf + printed, size - printed,
1393 "%ld", val);
1394 }
1395 }
1396 } else if (IS_ERR(sc->tp_format)) {
1397 /*
1398 * If we managed to read the tracepoint /format file, then we
1399 * may end up not having any args, like with gettid(), so only
1400 * print the raw args when we didn't manage to read it.
1401 */
1402 int i = 0;
1403
1404 while (i < 6) {
1405 /* special care for unaligned accesses */
1406 p = args + sizeof(unsigned long) * i;
1407 memcpy(&val, p, sizeof(val));
1408 printed += scnprintf(bf + printed, size - printed,
1409 "%sarg%d: %ld",
1410 printed ? ", " : "", i, val);
1411 ++i;
1412 }
1413 }
1414
1415 return printed;
1416 }
1417
1418 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1419 union perf_event *event,
1420 struct perf_sample *sample);
1421
1422 static struct syscall *trace__syscall_info(struct trace *trace,
1423 struct perf_evsel *evsel, int id)
1424 {
1425
1426 if (id < 0) {
1427
1428 /*
1429 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1430 * before that, leaving at a higher verbosity level till that is
1431 * explained. Reproduced with plain ftrace with:
1432 *
1433 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1434 * grep "NR -1 " /t/trace_pipe
1435 *
1436 * After generating some load on the machine.
1437 */
1438 if (verbose > 1) {
1439 static u64 n;
1440 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1441 id, perf_evsel__name(evsel), ++n);
1442 }
1443 return NULL;
1444 }
1445
1446 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1447 trace__read_syscall_info(trace, id))
1448 goto out_cant_read;
1449
1450 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1451 goto out_cant_read;
1452
1453 return &trace->syscalls.table[id];
1454
1455 out_cant_read:
1456 if (verbose) {
1457 fprintf(trace->output, "Problems reading syscall %d", id);
1458 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1459 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1460 fputs(" information\n", trace->output);
1461 }
1462 return NULL;
1463 }
1464
1465 static void thread__update_stats(struct thread_trace *ttrace,
1466 int id, struct perf_sample *sample)
1467 {
1468 struct int_node *inode;
1469 struct stats *stats;
1470 u64 duration = 0;
1471
1472 inode = intlist__findnew(ttrace->syscall_stats, id);
1473 if (inode == NULL)
1474 return;
1475
1476 stats = inode->priv;
1477 if (stats == NULL) {
1478 stats = malloc(sizeof(struct stats));
1479 if (stats == NULL)
1480 return;
1481 init_stats(stats);
1482 inode->priv = stats;
1483 }
1484
1485 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1486 duration = sample->time - ttrace->entry_time;
1487
1488 update_stats(stats, duration);
1489 }
1490
1491 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1492 {
1493 struct thread_trace *ttrace;
1494 u64 duration;
1495 size_t printed;
1496
1497 if (trace->current == NULL)
1498 return 0;
1499
1500 ttrace = thread__priv(trace->current);
1501
1502 if (!ttrace->entry_pending)
1503 return 0;
1504
1505 duration = sample->time - ttrace->entry_time;
1506
1507 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1508 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1509 ttrace->entry_pending = false;
1510
1511 return printed;
1512 }
1513
1514 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1515 union perf_event *event __maybe_unused,
1516 struct perf_sample *sample)
1517 {
1518 char *msg;
1519 void *args;
1520 size_t printed = 0;
1521 struct thread *thread;
1522 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1523 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1524 struct thread_trace *ttrace;
1525
1526 if (sc == NULL)
1527 return -1;
1528
1529 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1530 ttrace = thread__trace(thread, trace->output);
1531 if (ttrace == NULL)
1532 goto out_put;
1533
1534 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1535
1536 if (ttrace->entry_str == NULL) {
1537 ttrace->entry_str = malloc(trace__entry_str_size);
1538 if (!ttrace->entry_str)
1539 goto out_put;
1540 }
1541
1542 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1543 trace__printf_interrupted_entry(trace, sample);
1544
1545 ttrace->entry_time = sample->time;
1546 msg = ttrace->entry_str;
1547 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1548
1549 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1550 args, trace, thread);
1551
1552 if (sc->is_exit) {
1553 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1554 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1555 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1556 }
1557 } else {
1558 ttrace->entry_pending = true;
1559 /* See trace__vfs_getname & trace__sys_exit */
1560 ttrace->filename.pending_open = false;
1561 }
1562
1563 if (trace->current != thread) {
1564 thread__put(trace->current);
1565 trace->current = thread__get(thread);
1566 }
1567 err = 0;
1568 out_put:
1569 thread__put(thread);
1570 return err;
1571 }
1572
1573 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1574 struct perf_sample *sample,
1575 struct callchain_cursor *cursor)
1576 {
1577 struct addr_location al;
1578
1579 if (machine__resolve(trace->host, &al, sample) < 0 ||
1580 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1581 return -1;
1582
1583 return 0;
1584 }
1585
1586 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1587 {
1588 /* TODO: user-configurable print_opts */
1589 const unsigned int print_opts = EVSEL__PRINT_SYM |
1590 EVSEL__PRINT_DSO |
1591 EVSEL__PRINT_UNKNOWN_AS_ADDR;
1592
1593 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1594 }
1595
1596 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1597 union perf_event *event __maybe_unused,
1598 struct perf_sample *sample)
1599 {
1600 long ret;
1601 u64 duration = 0;
1602 struct thread *thread;
1603 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1604 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1605 struct thread_trace *ttrace;
1606
1607 if (sc == NULL)
1608 return -1;
1609
1610 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1611 ttrace = thread__trace(thread, trace->output);
1612 if (ttrace == NULL)
1613 goto out_put;
1614
1615 if (trace->summary)
1616 thread__update_stats(ttrace, id, sample);
1617
1618 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1619
1620 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1621 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1622 ttrace->filename.pending_open = false;
1623 ++trace->stats.vfs_getname;
1624 }
1625
1626 ttrace->exit_time = sample->time;
1627
1628 if (ttrace->entry_time) {
1629 duration = sample->time - ttrace->entry_time;
1630 if (trace__filter_duration(trace, duration))
1631 goto out;
1632 } else if (trace->duration_filter)
1633 goto out;
1634
1635 if (sample->callchain) {
1636 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1637 if (callchain_ret == 0) {
1638 if (callchain_cursor.nr < trace->min_stack)
1639 goto out;
1640 callchain_ret = 1;
1641 }
1642 }
1643
1644 if (trace->summary_only)
1645 goto out;
1646
1647 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1648
1649 if (ttrace->entry_pending) {
1650 fprintf(trace->output, "%-70s", ttrace->entry_str);
1651 } else {
1652 fprintf(trace->output, " ... [");
1653 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1654 fprintf(trace->output, "]: %s()", sc->name);
1655 }
1656
1657 if (sc->fmt == NULL) {
1658 signed_print:
1659 fprintf(trace->output, ") = %ld", ret);
1660 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1661 char bf[STRERR_BUFSIZE];
1662 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1663 *e = audit_errno_to_name(-ret);
1664
1665 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1666 } else if (ret == 0 && sc->fmt->timeout)
1667 fprintf(trace->output, ") = 0 Timeout");
1668 else if (sc->fmt->hexret)
1669 fprintf(trace->output, ") = %#lx", ret);
1670 else if (sc->fmt->errpid) {
1671 struct thread *child = machine__find_thread(trace->host, ret, ret);
1672
1673 if (child != NULL) {
1674 fprintf(trace->output, ") = %ld", ret);
1675 if (child->comm_set)
1676 fprintf(trace->output, " (%s)", thread__comm_str(child));
1677 thread__put(child);
1678 }
1679 } else
1680 goto signed_print;
1681
1682 fputc('\n', trace->output);
1683
1684 if (callchain_ret > 0)
1685 trace__fprintf_callchain(trace, sample);
1686 else if (callchain_ret < 0)
1687 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1688 out:
1689 ttrace->entry_pending = false;
1690 err = 0;
1691 out_put:
1692 thread__put(thread);
1693 return err;
1694 }
1695
1696 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1697 union perf_event *event __maybe_unused,
1698 struct perf_sample *sample)
1699 {
1700 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1701 struct thread_trace *ttrace;
1702 size_t filename_len, entry_str_len, to_move;
1703 ssize_t remaining_space;
1704 char *pos;
1705 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1706
1707 if (!thread)
1708 goto out;
1709
1710 ttrace = thread__priv(thread);
1711 if (!ttrace)
1712 goto out;
1713
1714 filename_len = strlen(filename);
1715
1716 if (ttrace->filename.namelen < filename_len) {
1717 char *f = realloc(ttrace->filename.name, filename_len + 1);
1718
1719 if (f == NULL)
1720 goto out;
1721
1722 ttrace->filename.namelen = filename_len;
1723 ttrace->filename.name = f;
1724 }
1725
1726 strcpy(ttrace->filename.name, filename);
1727 ttrace->filename.pending_open = true;
1728
1729 if (!ttrace->filename.ptr)
1730 goto out;
1731
1732 entry_str_len = strlen(ttrace->entry_str);
1733 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1734 if (remaining_space <= 0)
1735 goto out;
1736
1737 if (filename_len > (size_t)remaining_space) {
1738 filename += filename_len - remaining_space;
1739 filename_len = remaining_space;
1740 }
1741
1742 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1743 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1744 memmove(pos + filename_len, pos, to_move);
1745 memcpy(pos, filename, filename_len);
1746
1747 ttrace->filename.ptr = 0;
1748 ttrace->filename.entry_str_pos = 0;
1749 out:
1750 return 0;
1751 }
1752
1753 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1754 union perf_event *event __maybe_unused,
1755 struct perf_sample *sample)
1756 {
1757 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1758 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1759 struct thread *thread = machine__findnew_thread(trace->host,
1760 sample->pid,
1761 sample->tid);
1762 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1763
1764 if (ttrace == NULL)
1765 goto out_dump;
1766
1767 ttrace->runtime_ms += runtime_ms;
1768 trace->runtime_ms += runtime_ms;
1769 thread__put(thread);
1770 return 0;
1771
1772 out_dump:
1773 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1774 evsel->name,
1775 perf_evsel__strval(evsel, sample, "comm"),
1776 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1777 runtime,
1778 perf_evsel__intval(evsel, sample, "vruntime"));
1779 thread__put(thread);
1780 return 0;
1781 }
1782
1783 static void bpf_output__printer(enum binary_printer_ops op,
1784 unsigned int val, void *extra)
1785 {
1786 FILE *output = extra;
1787 unsigned char ch = (unsigned char)val;
1788
1789 switch (op) {
1790 case BINARY_PRINT_CHAR_DATA:
1791 fprintf(output, "%c", isprint(ch) ? ch : '.');
1792 break;
1793 case BINARY_PRINT_DATA_BEGIN:
1794 case BINARY_PRINT_LINE_BEGIN:
1795 case BINARY_PRINT_ADDR:
1796 case BINARY_PRINT_NUM_DATA:
1797 case BINARY_PRINT_NUM_PAD:
1798 case BINARY_PRINT_SEP:
1799 case BINARY_PRINT_CHAR_PAD:
1800 case BINARY_PRINT_LINE_END:
1801 case BINARY_PRINT_DATA_END:
1802 default:
1803 break;
1804 }
1805 }
1806
1807 static void bpf_output__fprintf(struct trace *trace,
1808 struct perf_sample *sample)
1809 {
1810 print_binary(sample->raw_data, sample->raw_size, 8,
1811 bpf_output__printer, trace->output);
1812 }
1813
1814 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1815 union perf_event *event __maybe_unused,
1816 struct perf_sample *sample)
1817 {
1818 int callchain_ret = 0;
1819
1820 if (sample->callchain) {
1821 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1822 if (callchain_ret == 0) {
1823 if (callchain_cursor.nr < trace->min_stack)
1824 goto out;
1825 callchain_ret = 1;
1826 }
1827 }
1828
1829 trace__printf_interrupted_entry(trace, sample);
1830 trace__fprintf_tstamp(trace, sample->time, trace->output);
1831
1832 if (trace->trace_syscalls)
1833 fprintf(trace->output, "( ): ");
1834
1835 fprintf(trace->output, "%s:", evsel->name);
1836
1837 if (perf_evsel__is_bpf_output(evsel)) {
1838 bpf_output__fprintf(trace, sample);
1839 } else if (evsel->tp_format) {
1840 event_format__fprintf(evsel->tp_format, sample->cpu,
1841 sample->raw_data, sample->raw_size,
1842 trace->output);
1843 }
1844
1845 fprintf(trace->output, ")\n");
1846
1847 if (callchain_ret > 0)
1848 trace__fprintf_callchain(trace, sample);
1849 else if (callchain_ret < 0)
1850 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1851 out:
1852 return 0;
1853 }
1854
1855 static void print_location(FILE *f, struct perf_sample *sample,
1856 struct addr_location *al,
1857 bool print_dso, bool print_sym)
1858 {
1859
1860 if ((verbose || print_dso) && al->map)
1861 fprintf(f, "%s@", al->map->dso->long_name);
1862
1863 if ((verbose || print_sym) && al->sym)
1864 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1865 al->addr - al->sym->start);
1866 else if (al->map)
1867 fprintf(f, "0x%" PRIx64, al->addr);
1868 else
1869 fprintf(f, "0x%" PRIx64, sample->addr);
1870 }
1871
1872 static int trace__pgfault(struct trace *trace,
1873 struct perf_evsel *evsel,
1874 union perf_event *event __maybe_unused,
1875 struct perf_sample *sample)
1876 {
1877 struct thread *thread;
1878 struct addr_location al;
1879 char map_type = 'd';
1880 struct thread_trace *ttrace;
1881 int err = -1;
1882 int callchain_ret = 0;
1883
1884 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1885
1886 if (sample->callchain) {
1887 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1888 if (callchain_ret == 0) {
1889 if (callchain_cursor.nr < trace->min_stack)
1890 goto out_put;
1891 callchain_ret = 1;
1892 }
1893 }
1894
1895 ttrace = thread__trace(thread, trace->output);
1896 if (ttrace == NULL)
1897 goto out_put;
1898
1899 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1900 ttrace->pfmaj++;
1901 else
1902 ttrace->pfmin++;
1903
1904 if (trace->summary_only)
1905 goto out;
1906
1907 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1908 sample->ip, &al);
1909
1910 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1911
1912 fprintf(trace->output, "%sfault [",
1913 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1914 "maj" : "min");
1915
1916 print_location(trace->output, sample, &al, false, true);
1917
1918 fprintf(trace->output, "] => ");
1919
1920 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1921 sample->addr, &al);
1922
1923 if (!al.map) {
1924 thread__find_addr_location(thread, sample->cpumode,
1925 MAP__FUNCTION, sample->addr, &al);
1926
1927 if (al.map)
1928 map_type = 'x';
1929 else
1930 map_type = '?';
1931 }
1932
1933 print_location(trace->output, sample, &al, true, false);
1934
1935 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1936
1937 if (callchain_ret > 0)
1938 trace__fprintf_callchain(trace, sample);
1939 else if (callchain_ret < 0)
1940 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1941 out:
1942 err = 0;
1943 out_put:
1944 thread__put(thread);
1945 return err;
1946 }
1947
1948 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1949 {
1950 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1951 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1952 return false;
1953
1954 if (trace->pid_list || trace->tid_list)
1955 return true;
1956
1957 return false;
1958 }
1959
1960 static void trace__set_base_time(struct trace *trace,
1961 struct perf_evsel *evsel,
1962 struct perf_sample *sample)
1963 {
1964 /*
1965 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1966 * and don't use sample->time unconditionally, we may end up having
1967 * some other event in the future without PERF_SAMPLE_TIME for good
1968 * reason, i.e. we may not be interested in its timestamps, just in
1969 * it taking place, picking some piece of information when it
1970 * appears in our event stream (vfs_getname comes to mind).
1971 */
1972 if (trace->base_time == 0 && !trace->full_time &&
1973 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1974 trace->base_time = sample->time;
1975 }
1976
1977 static int trace__process_sample(struct perf_tool *tool,
1978 union perf_event *event,
1979 struct perf_sample *sample,
1980 struct perf_evsel *evsel,
1981 struct machine *machine __maybe_unused)
1982 {
1983 struct trace *trace = container_of(tool, struct trace, tool);
1984 int err = 0;
1985
1986 tracepoint_handler handler = evsel->handler;
1987
1988 if (skip_sample(trace, sample))
1989 return 0;
1990
1991 trace__set_base_time(trace, evsel, sample);
1992
1993 if (handler) {
1994 ++trace->nr_events;
1995 handler(trace, evsel, event, sample);
1996 }
1997
1998 return err;
1999 }
2000
2001 static int parse_target_str(struct trace *trace)
2002 {
2003 if (trace->opts.target.pid) {
2004 trace->pid_list = intlist__new(trace->opts.target.pid);
2005 if (trace->pid_list == NULL) {
2006 pr_err("Error parsing process id string\n");
2007 return -EINVAL;
2008 }
2009 }
2010
2011 if (trace->opts.target.tid) {
2012 trace->tid_list = intlist__new(trace->opts.target.tid);
2013 if (trace->tid_list == NULL) {
2014 pr_err("Error parsing thread id string\n");
2015 return -EINVAL;
2016 }
2017 }
2018
2019 return 0;
2020 }
2021
2022 static int trace__record(struct trace *trace, int argc, const char **argv)
2023 {
2024 unsigned int rec_argc, i, j;
2025 const char **rec_argv;
2026 const char * const record_args[] = {
2027 "record",
2028 "-R",
2029 "-m", "1024",
2030 "-c", "1",
2031 };
2032
2033 const char * const sc_args[] = { "-e", };
2034 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2035 const char * const majpf_args[] = { "-e", "major-faults" };
2036 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2037 const char * const minpf_args[] = { "-e", "minor-faults" };
2038 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2039
2040 /* +1 is for the event string below */
2041 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2042 majpf_args_nr + minpf_args_nr + argc;
2043 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2044
2045 if (rec_argv == NULL)
2046 return -ENOMEM;
2047
2048 j = 0;
2049 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2050 rec_argv[j++] = record_args[i];
2051
2052 if (trace->trace_syscalls) {
2053 for (i = 0; i < sc_args_nr; i++)
2054 rec_argv[j++] = sc_args[i];
2055
2056 /* event string may be different for older kernels - e.g., RHEL6 */
2057 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2058 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2059 else if (is_valid_tracepoint("syscalls:sys_enter"))
2060 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2061 else {
2062 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2063 return -1;
2064 }
2065 }
2066
2067 if (trace->trace_pgfaults & TRACE_PFMAJ)
2068 for (i = 0; i < majpf_args_nr; i++)
2069 rec_argv[j++] = majpf_args[i];
2070
2071 if (trace->trace_pgfaults & TRACE_PFMIN)
2072 for (i = 0; i < minpf_args_nr; i++)
2073 rec_argv[j++] = minpf_args[i];
2074
2075 for (i = 0; i < (unsigned int)argc; i++)
2076 rec_argv[j++] = argv[i];
2077
2078 return cmd_record(j, rec_argv, NULL);
2079 }
2080
2081 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2082
2083 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2084 {
2085 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2086
2087 if (IS_ERR(evsel))
2088 return false;
2089
2090 if (perf_evsel__field(evsel, "pathname") == NULL) {
2091 perf_evsel__delete(evsel);
2092 return false;
2093 }
2094
2095 evsel->handler = trace__vfs_getname;
2096 perf_evlist__add(evlist, evsel);
2097 return true;
2098 }
2099
2100 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2101 {
2102 struct perf_evsel *evsel;
2103 struct perf_event_attr attr = {
2104 .type = PERF_TYPE_SOFTWARE,
2105 .mmap_data = 1,
2106 };
2107
2108 attr.config = config;
2109 attr.sample_period = 1;
2110
2111 event_attr_init(&attr);
2112
2113 evsel = perf_evsel__new(&attr);
2114 if (evsel)
2115 evsel->handler = trace__pgfault;
2116
2117 return evsel;
2118 }
2119
2120 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2121 {
2122 const u32 type = event->header.type;
2123 struct perf_evsel *evsel;
2124
2125 if (type != PERF_RECORD_SAMPLE) {
2126 trace__process_event(trace, trace->host, event, sample);
2127 return;
2128 }
2129
2130 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2131 if (evsel == NULL) {
2132 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2133 return;
2134 }
2135
2136 trace__set_base_time(trace, evsel, sample);
2137
2138 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2139 sample->raw_data == NULL) {
2140 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2141 perf_evsel__name(evsel), sample->tid,
2142 sample->cpu, sample->raw_size);
2143 } else {
2144 tracepoint_handler handler = evsel->handler;
2145 handler(trace, evsel, event, sample);
2146 }
2147 }
2148
2149 static int trace__add_syscall_newtp(struct trace *trace)
2150 {
2151 int ret = -1;
2152 struct perf_evlist *evlist = trace->evlist;
2153 struct perf_evsel *sys_enter, *sys_exit;
2154
2155 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2156 if (sys_enter == NULL)
2157 goto out;
2158
2159 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2160 goto out_delete_sys_enter;
2161
2162 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2163 if (sys_exit == NULL)
2164 goto out_delete_sys_enter;
2165
2166 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2167 goto out_delete_sys_exit;
2168
2169 perf_evlist__add(evlist, sys_enter);
2170 perf_evlist__add(evlist, sys_exit);
2171
2172 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2173 /*
2174 * We're interested only in the user space callchain
2175 * leading to the syscall, allow overriding that for
2176 * debugging reasons using --kernel_syscall_callchains
2177 */
2178 sys_exit->attr.exclude_callchain_kernel = 1;
2179 }
2180
2181 trace->syscalls.events.sys_enter = sys_enter;
2182 trace->syscalls.events.sys_exit = sys_exit;
2183
2184 ret = 0;
2185 out:
2186 return ret;
2187
2188 out_delete_sys_exit:
2189 perf_evsel__delete_priv(sys_exit);
2190 out_delete_sys_enter:
2191 perf_evsel__delete_priv(sys_enter);
2192 goto out;
2193 }
2194
2195 static int trace__set_ev_qualifier_filter(struct trace *trace)
2196 {
2197 int err = -1;
2198 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2199 trace->ev_qualifier_ids.nr,
2200 trace->ev_qualifier_ids.entries);
2201
2202 if (filter == NULL)
2203 goto out_enomem;
2204
2205 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2206 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2207
2208 free(filter);
2209 out:
2210 return err;
2211 out_enomem:
2212 errno = ENOMEM;
2213 goto out;
2214 }
2215
2216 static int trace__run(struct trace *trace, int argc, const char **argv)
2217 {
2218 struct perf_evlist *evlist = trace->evlist;
2219 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2220 int err = -1, i;
2221 unsigned long before;
2222 const bool forks = argc > 0;
2223 bool draining = false;
2224
2225 trace->live = true;
2226
2227 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2228 goto out_error_raw_syscalls;
2229
2230 if (trace->trace_syscalls)
2231 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2232
2233 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2234 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2235 if (pgfault_maj == NULL)
2236 goto out_error_mem;
2237 perf_evlist__add(evlist, pgfault_maj);
2238 }
2239
2240 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2241 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2242 if (pgfault_min == NULL)
2243 goto out_error_mem;
2244 perf_evlist__add(evlist, pgfault_min);
2245 }
2246
2247 if (trace->sched &&
2248 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2249 trace__sched_stat_runtime))
2250 goto out_error_sched_stat_runtime;
2251
2252 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2253 if (err < 0) {
2254 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2255 goto out_delete_evlist;
2256 }
2257
2258 err = trace__symbols_init(trace, evlist);
2259 if (err < 0) {
2260 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2261 goto out_delete_evlist;
2262 }
2263
2264 perf_evlist__config(evlist, &trace->opts, NULL);
2265
2266 if (callchain_param.enabled) {
2267 bool use_identifier = false;
2268
2269 if (trace->syscalls.events.sys_exit) {
2270 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2271 &trace->opts, &callchain_param);
2272 use_identifier = true;
2273 }
2274
2275 if (pgfault_maj) {
2276 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2277 use_identifier = true;
2278 }
2279
2280 if (pgfault_min) {
2281 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2282 use_identifier = true;
2283 }
2284
2285 if (use_identifier) {
2286 /*
2287 * Now we have evsels with different sample_ids, use
2288 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2289 * from a fixed position in each ring buffer record.
2290 *
2291 * As of this the changeset introducing this comment, this
2292 * isn't strictly needed, as the fields that can come before
2293 * PERF_SAMPLE_ID are all used, but we'll probably disable
2294 * some of those for things like copying the payload of
2295 * pointer syscall arguments, and for vfs_getname we don't
2296 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2297 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2298 */
2299 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2300 perf_evlist__reset_sample_bit(evlist, ID);
2301 }
2302 }
2303
2304 signal(SIGCHLD, sig_handler);
2305 signal(SIGINT, sig_handler);
2306
2307 if (forks) {
2308 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2309 argv, false, NULL);
2310 if (err < 0) {
2311 fprintf(trace->output, "Couldn't run the workload!\n");
2312 goto out_delete_evlist;
2313 }
2314 }
2315
2316 err = perf_evlist__open(evlist);
2317 if (err < 0)
2318 goto out_error_open;
2319
2320 err = bpf__apply_obj_config();
2321 if (err) {
2322 char errbuf[BUFSIZ];
2323
2324 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2325 pr_err("ERROR: Apply config to BPF failed: %s\n",
2326 errbuf);
2327 goto out_error_open;
2328 }
2329
2330 /*
2331 * Better not use !target__has_task() here because we need to cover the
2332 * case where no threads were specified in the command line, but a
2333 * workload was, and in that case we will fill in the thread_map when
2334 * we fork the workload in perf_evlist__prepare_workload.
2335 */
2336 if (trace->filter_pids.nr > 0)
2337 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2338 else if (thread_map__pid(evlist->threads, 0) == -1)
2339 err = perf_evlist__set_filter_pid(evlist, getpid());
2340
2341 if (err < 0)
2342 goto out_error_mem;
2343
2344 if (trace->ev_qualifier_ids.nr > 0) {
2345 err = trace__set_ev_qualifier_filter(trace);
2346 if (err < 0)
2347 goto out_errno;
2348
2349 pr_debug("event qualifier tracepoint filter: %s\n",
2350 trace->syscalls.events.sys_exit->filter);
2351 }
2352
2353 err = perf_evlist__apply_filters(evlist, &evsel);
2354 if (err < 0)
2355 goto out_error_apply_filters;
2356
2357 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2358 if (err < 0)
2359 goto out_error_mmap;
2360
2361 if (!target__none(&trace->opts.target))
2362 perf_evlist__enable(evlist);
2363
2364 if (forks)
2365 perf_evlist__start_workload(evlist);
2366
2367 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2368 evlist->threads->nr > 1 ||
2369 perf_evlist__first(evlist)->attr.inherit;
2370 again:
2371 before = trace->nr_events;
2372
2373 for (i = 0; i < evlist->nr_mmaps; i++) {
2374 union perf_event *event;
2375
2376 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2377 struct perf_sample sample;
2378
2379 ++trace->nr_events;
2380
2381 err = perf_evlist__parse_sample(evlist, event, &sample);
2382 if (err) {
2383 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2384 goto next_event;
2385 }
2386
2387 trace__handle_event(trace, event, &sample);
2388 next_event:
2389 perf_evlist__mmap_consume(evlist, i);
2390
2391 if (interrupted)
2392 goto out_disable;
2393
2394 if (done && !draining) {
2395 perf_evlist__disable(evlist);
2396 draining = true;
2397 }
2398 }
2399 }
2400
2401 if (trace->nr_events == before) {
2402 int timeout = done ? 100 : -1;
2403
2404 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2405 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2406 draining = true;
2407
2408 goto again;
2409 }
2410 } else {
2411 goto again;
2412 }
2413
2414 out_disable:
2415 thread__zput(trace->current);
2416
2417 perf_evlist__disable(evlist);
2418
2419 if (!err) {
2420 if (trace->summary)
2421 trace__fprintf_thread_summary(trace, trace->output);
2422
2423 if (trace->show_tool_stats) {
2424 fprintf(trace->output, "Stats:\n "
2425 " vfs_getname : %" PRIu64 "\n"
2426 " proc_getname: %" PRIu64 "\n",
2427 trace->stats.vfs_getname,
2428 trace->stats.proc_getname);
2429 }
2430 }
2431
2432 out_delete_evlist:
2433 perf_evlist__delete(evlist);
2434 trace->evlist = NULL;
2435 trace->live = false;
2436 return err;
2437 {
2438 char errbuf[BUFSIZ];
2439
2440 out_error_sched_stat_runtime:
2441 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2442 goto out_error;
2443
2444 out_error_raw_syscalls:
2445 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2446 goto out_error;
2447
2448 out_error_mmap:
2449 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2450 goto out_error;
2451
2452 out_error_open:
2453 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2454
2455 out_error:
2456 fprintf(trace->output, "%s\n", errbuf);
2457 goto out_delete_evlist;
2458
2459 out_error_apply_filters:
2460 fprintf(trace->output,
2461 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2462 evsel->filter, perf_evsel__name(evsel), errno,
2463 strerror_r(errno, errbuf, sizeof(errbuf)));
2464 goto out_delete_evlist;
2465 }
2466 out_error_mem:
2467 fprintf(trace->output, "Not enough memory to run!\n");
2468 goto out_delete_evlist;
2469
2470 out_errno:
2471 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2472 goto out_delete_evlist;
2473 }
2474
2475 static int trace__replay(struct trace *trace)
2476 {
2477 const struct perf_evsel_str_handler handlers[] = {
2478 { "probe:vfs_getname", trace__vfs_getname, },
2479 };
2480 struct perf_data_file file = {
2481 .path = input_name,
2482 .mode = PERF_DATA_MODE_READ,
2483 .force = trace->force,
2484 };
2485 struct perf_session *session;
2486 struct perf_evsel *evsel;
2487 int err = -1;
2488
2489 trace->tool.sample = trace__process_sample;
2490 trace->tool.mmap = perf_event__process_mmap;
2491 trace->tool.mmap2 = perf_event__process_mmap2;
2492 trace->tool.comm = perf_event__process_comm;
2493 trace->tool.exit = perf_event__process_exit;
2494 trace->tool.fork = perf_event__process_fork;
2495 trace->tool.attr = perf_event__process_attr;
2496 trace->tool.tracing_data = perf_event__process_tracing_data;
2497 trace->tool.build_id = perf_event__process_build_id;
2498
2499 trace->tool.ordered_events = true;
2500 trace->tool.ordering_requires_timestamps = true;
2501
2502 /* add tid to output */
2503 trace->multiple_threads = true;
2504
2505 session = perf_session__new(&file, false, &trace->tool);
2506 if (session == NULL)
2507 return -1;
2508
2509 if (symbol__init(&session->header.env) < 0)
2510 goto out;
2511
2512 trace->host = &session->machines.host;
2513
2514 err = perf_session__set_tracepoints_handlers(session, handlers);
2515 if (err)
2516 goto out;
2517
2518 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2519 "raw_syscalls:sys_enter");
2520 /* older kernels have syscalls tp versus raw_syscalls */
2521 if (evsel == NULL)
2522 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2523 "syscalls:sys_enter");
2524
2525 if (evsel &&
2526 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2527 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2528 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2529 goto out;
2530 }
2531
2532 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2533 "raw_syscalls:sys_exit");
2534 if (evsel == NULL)
2535 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2536 "syscalls:sys_exit");
2537 if (evsel &&
2538 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2539 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2540 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2541 goto out;
2542 }
2543
2544 evlist__for_each(session->evlist, evsel) {
2545 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2546 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2547 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2548 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2549 evsel->handler = trace__pgfault;
2550 }
2551
2552 err = parse_target_str(trace);
2553 if (err != 0)
2554 goto out;
2555
2556 setup_pager();
2557
2558 err = perf_session__process_events(session);
2559 if (err)
2560 pr_err("Failed to process events, error %d", err);
2561
2562 else if (trace->summary)
2563 trace__fprintf_thread_summary(trace, trace->output);
2564
2565 out:
2566 perf_session__delete(session);
2567
2568 return err;
2569 }
2570
2571 static size_t trace__fprintf_threads_header(FILE *fp)
2572 {
2573 size_t printed;
2574
2575 printed = fprintf(fp, "\n Summary of events:\n\n");
2576
2577 return printed;
2578 }
2579
2580 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2581 struct stats *stats;
2582 double msecs;
2583 int syscall;
2584 )
2585 {
2586 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2587 struct stats *stats = source->priv;
2588
2589 entry->syscall = source->i;
2590 entry->stats = stats;
2591 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2592 }
2593
2594 static size_t thread__dump_stats(struct thread_trace *ttrace,
2595 struct trace *trace, FILE *fp)
2596 {
2597 size_t printed = 0;
2598 struct syscall *sc;
2599 struct rb_node *nd;
2600 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2601
2602 if (syscall_stats == NULL)
2603 return 0;
2604
2605 printed += fprintf(fp, "\n");
2606
2607 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2608 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2609 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2610
2611 resort_rb__for_each(nd, syscall_stats) {
2612 struct stats *stats = syscall_stats_entry->stats;
2613 if (stats) {
2614 double min = (double)(stats->min) / NSEC_PER_MSEC;
2615 double max = (double)(stats->max) / NSEC_PER_MSEC;
2616 double avg = avg_stats(stats);
2617 double pct;
2618 u64 n = (u64) stats->n;
2619
2620 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2621 avg /= NSEC_PER_MSEC;
2622
2623 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2624 printed += fprintf(fp, " %-15s", sc->name);
2625 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2626 n, syscall_stats_entry->msecs, min, avg);
2627 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2628 }
2629 }
2630
2631 resort_rb__delete(syscall_stats);
2632 printed += fprintf(fp, "\n\n");
2633
2634 return printed;
2635 }
2636
2637 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2638 {
2639 size_t printed = 0;
2640 struct thread_trace *ttrace = thread__priv(thread);
2641 double ratio;
2642
2643 if (ttrace == NULL)
2644 return 0;
2645
2646 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2647
2648 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2649 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2650 printed += fprintf(fp, "%.1f%%", ratio);
2651 if (ttrace->pfmaj)
2652 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2653 if (ttrace->pfmin)
2654 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2655 if (trace->sched)
2656 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2657 else if (fputc('\n', fp) != EOF)
2658 ++printed;
2659
2660 printed += thread__dump_stats(ttrace, trace, fp);
2661
2662 return printed;
2663 }
2664
2665 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2666 {
2667 return ttrace ? ttrace->nr_events : 0;
2668 }
2669
2670 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2671 struct thread *thread;
2672 )
2673 {
2674 entry->thread = rb_entry(nd, struct thread, rb_node);
2675 }
2676
2677 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2678 {
2679 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2680 size_t printed = trace__fprintf_threads_header(fp);
2681 struct rb_node *nd;
2682
2683 if (threads == NULL) {
2684 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2685 return 0;
2686 }
2687
2688 resort_rb__for_each(nd, threads)
2689 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2690
2691 resort_rb__delete(threads);
2692
2693 return printed;
2694 }
2695
2696 static int trace__set_duration(const struct option *opt, const char *str,
2697 int unset __maybe_unused)
2698 {
2699 struct trace *trace = opt->value;
2700
2701 trace->duration_filter = atof(str);
2702 return 0;
2703 }
2704
2705 static int trace__set_filter_pids(const struct option *opt, const char *str,
2706 int unset __maybe_unused)
2707 {
2708 int ret = -1;
2709 size_t i;
2710 struct trace *trace = opt->value;
2711 /*
2712 * FIXME: introduce a intarray class, plain parse csv and create a
2713 * { int nr, int entries[] } struct...
2714 */
2715 struct intlist *list = intlist__new(str);
2716
2717 if (list == NULL)
2718 return -1;
2719
2720 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2721 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2722
2723 if (trace->filter_pids.entries == NULL)
2724 goto out;
2725
2726 trace->filter_pids.entries[0] = getpid();
2727
2728 for (i = 1; i < trace->filter_pids.nr; ++i)
2729 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2730
2731 intlist__delete(list);
2732 ret = 0;
2733 out:
2734 return ret;
2735 }
2736
2737 static int trace__open_output(struct trace *trace, const char *filename)
2738 {
2739 struct stat st;
2740
2741 if (!stat(filename, &st) && st.st_size) {
2742 char oldname[PATH_MAX];
2743
2744 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2745 unlink(oldname);
2746 rename(filename, oldname);
2747 }
2748
2749 trace->output = fopen(filename, "w");
2750
2751 return trace->output == NULL ? -errno : 0;
2752 }
2753
2754 static int parse_pagefaults(const struct option *opt, const char *str,
2755 int unset __maybe_unused)
2756 {
2757 int *trace_pgfaults = opt->value;
2758
2759 if (strcmp(str, "all") == 0)
2760 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2761 else if (strcmp(str, "maj") == 0)
2762 *trace_pgfaults |= TRACE_PFMAJ;
2763 else if (strcmp(str, "min") == 0)
2764 *trace_pgfaults |= TRACE_PFMIN;
2765 else
2766 return -1;
2767
2768 return 0;
2769 }
2770
2771 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2772 {
2773 struct perf_evsel *evsel;
2774
2775 evlist__for_each(evlist, evsel)
2776 evsel->handler = handler;
2777 }
2778
2779 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2780 {
2781 const char *trace_usage[] = {
2782 "perf trace [<options>] [<command>]",
2783 "perf trace [<options>] -- <command> [<options>]",
2784 "perf trace record [<options>] [<command>]",
2785 "perf trace record [<options>] -- <command> [<options>]",
2786 NULL
2787 };
2788 struct trace trace = {
2789 .syscalls = {
2790 . max = -1,
2791 },
2792 .opts = {
2793 .target = {
2794 .uid = UINT_MAX,
2795 .uses_mmap = true,
2796 },
2797 .user_freq = UINT_MAX,
2798 .user_interval = ULLONG_MAX,
2799 .no_buffering = true,
2800 .mmap_pages = UINT_MAX,
2801 .proc_map_timeout = 500,
2802 },
2803 .output = stderr,
2804 .show_comm = true,
2805 .trace_syscalls = true,
2806 .kernel_syscallchains = false,
2807 .max_stack = UINT_MAX,
2808 };
2809 const char *output_name = NULL;
2810 const char *ev_qualifier_str = NULL;
2811 const struct option trace_options[] = {
2812 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2813 "event selector. use 'perf list' to list available events",
2814 parse_events_option),
2815 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2816 "show the thread COMM next to its id"),
2817 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2818 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
2819 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2820 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2821 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2822 "trace events on existing process id"),
2823 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2824 "trace events on existing thread id"),
2825 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2826 "pids to filter (by the kernel)", trace__set_filter_pids),
2827 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2828 "system-wide collection from all CPUs"),
2829 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2830 "list of cpus to monitor"),
2831 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2832 "child tasks do not inherit counters"),
2833 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2834 "number of mmap data pages",
2835 perf_evlist__parse_mmap_pages),
2836 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2837 "user to profile"),
2838 OPT_CALLBACK(0, "duration", &trace, "float",
2839 "show only events with duration > N.M ms",
2840 trace__set_duration),
2841 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2842 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2843 OPT_BOOLEAN('T', "time", &trace.full_time,
2844 "Show full timestamp, not time relative to first start"),
2845 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2846 "Show only syscall summary with statistics"),
2847 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2848 "Show all syscalls and summary with statistics"),
2849 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2850 "Trace pagefaults", parse_pagefaults, "maj"),
2851 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2852 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2853 OPT_CALLBACK(0, "call-graph", &trace.opts,
2854 "record_mode[,record_size]", record_callchain_help,
2855 &record_parse_callchain_opt),
2856 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2857 "Show the kernel callchains on the syscall exit path"),
2858 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2859 "Set the minimum stack depth when parsing the callchain, "
2860 "anything below the specified depth will be ignored."),
2861 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2862 "Set the maximum stack depth when parsing the callchain, "
2863 "anything beyond the specified depth will be ignored. "
2864 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2865 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2866 "per thread proc mmap processing timeout in ms"),
2867 OPT_END()
2868 };
2869 bool __maybe_unused max_stack_user_set = true;
2870 bool mmap_pages_user_set = true;
2871 const char * const trace_subcommands[] = { "record", NULL };
2872 int err;
2873 char bf[BUFSIZ];
2874
2875 signal(SIGSEGV, sighandler_dump_stack);
2876 signal(SIGFPE, sighandler_dump_stack);
2877
2878 trace.evlist = perf_evlist__new();
2879 trace.sctbl = syscalltbl__new();
2880
2881 if (trace.evlist == NULL || trace.sctbl == NULL) {
2882 pr_err("Not enough memory to run!\n");
2883 err = -ENOMEM;
2884 goto out;
2885 }
2886
2887 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2888 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2889
2890 err = bpf__setup_stdout(trace.evlist);
2891 if (err) {
2892 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2893 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2894 goto out;
2895 }
2896
2897 err = -1;
2898
2899 if (trace.trace_pgfaults) {
2900 trace.opts.sample_address = true;
2901 trace.opts.sample_time = true;
2902 }
2903
2904 if (trace.opts.mmap_pages == UINT_MAX)
2905 mmap_pages_user_set = false;
2906
2907 if (trace.max_stack == UINT_MAX) {
2908 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2909 max_stack_user_set = false;
2910 }
2911
2912 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2913 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled)
2914 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2915 #endif
2916
2917 if (callchain_param.enabled) {
2918 if (!mmap_pages_user_set && geteuid() == 0)
2919 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2920
2921 symbol_conf.use_callchain = true;
2922 }
2923
2924 if (trace.evlist->nr_entries > 0)
2925 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2926
2927 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2928 return trace__record(&trace, argc-1, &argv[1]);
2929
2930 /* summary_only implies summary option, but don't overwrite summary if set */
2931 if (trace.summary_only)
2932 trace.summary = trace.summary_only;
2933
2934 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2935 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2936 pr_err("Please specify something to trace.\n");
2937 return -1;
2938 }
2939
2940 if (!trace.trace_syscalls && ev_qualifier_str) {
2941 pr_err("The -e option can't be used with --no-syscalls.\n");
2942 goto out;
2943 }
2944
2945 if (output_name != NULL) {
2946 err = trace__open_output(&trace, output_name);
2947 if (err < 0) {
2948 perror("failed to create output file");
2949 goto out;
2950 }
2951 }
2952
2953 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2954
2955 if (ev_qualifier_str != NULL) {
2956 const char *s = ev_qualifier_str;
2957 struct strlist_config slist_config = {
2958 .dirname = system_path(STRACE_GROUPS_DIR),
2959 };
2960
2961 trace.not_ev_qualifier = *s == '!';
2962 if (trace.not_ev_qualifier)
2963 ++s;
2964 trace.ev_qualifier = strlist__new(s, &slist_config);
2965 if (trace.ev_qualifier == NULL) {
2966 fputs("Not enough memory to parse event qualifier",
2967 trace.output);
2968 err = -ENOMEM;
2969 goto out_close;
2970 }
2971
2972 err = trace__validate_ev_qualifier(&trace);
2973 if (err)
2974 goto out_close;
2975 }
2976
2977 err = target__validate(&trace.opts.target);
2978 if (err) {
2979 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2980 fprintf(trace.output, "%s", bf);
2981 goto out_close;
2982 }
2983
2984 err = target__parse_uid(&trace.opts.target);
2985 if (err) {
2986 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2987 fprintf(trace.output, "%s", bf);
2988 goto out_close;
2989 }
2990
2991 if (!argc && target__none(&trace.opts.target))
2992 trace.opts.target.system_wide = true;
2993
2994 if (input_name)
2995 err = trace__replay(&trace);
2996 else
2997 err = trace__run(&trace, argc, argv);
2998
2999 out_close:
3000 if (output_name != NULL)
3001 fclose(trace.output);
3002 out:
3003 return err;
3004 }
This page took 0.120068 seconds and 4 git commands to generate.