tools lib traceevent: Add pevent_unregister_event_handler()
[deliverable/linux.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK 0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON 100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
38 #endif
39
40 struct tp_field {
41 int offset;
42 union {
43 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
44 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
45 };
46 };
47
48 #define TP_UINT_FIELD(bits) \
49 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
50 { \
51 return *(u##bits *)(sample->raw_data + field->offset); \
52 }
53
54 TP_UINT_FIELD(8);
55 TP_UINT_FIELD(16);
56 TP_UINT_FIELD(32);
57 TP_UINT_FIELD(64);
58
59 #define TP_UINT_FIELD__SWAPPED(bits) \
60 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
61 { \
62 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
63 return bswap_##bits(value);\
64 }
65
66 TP_UINT_FIELD__SWAPPED(16);
67 TP_UINT_FIELD__SWAPPED(32);
68 TP_UINT_FIELD__SWAPPED(64);
69
70 static int tp_field__init_uint(struct tp_field *field,
71 struct format_field *format_field,
72 bool needs_swap)
73 {
74 field->offset = format_field->offset;
75
76 switch (format_field->size) {
77 case 1:
78 field->integer = tp_field__u8;
79 break;
80 case 2:
81 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
82 break;
83 case 4:
84 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
85 break;
86 case 8:
87 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
88 break;
89 default:
90 return -1;
91 }
92
93 return 0;
94 }
95
96 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
97 {
98 return sample->raw_data + field->offset;
99 }
100
101 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
102 {
103 field->offset = format_field->offset;
104 field->pointer = tp_field__ptr;
105 return 0;
106 }
107
108 struct syscall_tp {
109 struct tp_field id;
110 union {
111 struct tp_field args, ret;
112 };
113 };
114
115 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
116 struct tp_field *field,
117 const char *name)
118 {
119 struct format_field *format_field = perf_evsel__field(evsel, name);
120
121 if (format_field == NULL)
122 return -1;
123
124 return tp_field__init_uint(field, format_field, evsel->needs_swap);
125 }
126
127 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
128 ({ struct syscall_tp *sc = evsel->priv;\
129 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
130
131 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
132 struct tp_field *field,
133 const char *name)
134 {
135 struct format_field *format_field = perf_evsel__field(evsel, name);
136
137 if (format_field == NULL)
138 return -1;
139
140 return tp_field__init_ptr(field, format_field);
141 }
142
143 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
144 ({ struct syscall_tp *sc = evsel->priv;\
145 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
146
147 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
148 {
149 zfree(&evsel->priv);
150 perf_evsel__delete(evsel);
151 }
152
153 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
154 {
155 evsel->priv = malloc(sizeof(struct syscall_tp));
156 if (evsel->priv != NULL) {
157 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
158 goto out_delete;
159
160 evsel->handler = handler;
161 return 0;
162 }
163
164 return -ENOMEM;
165
166 out_delete:
167 zfree(&evsel->priv);
168 return -ENOENT;
169 }
170
171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
172 {
173 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
174
175 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
176 if (evsel == NULL)
177 evsel = perf_evsel__newtp("syscalls", direction);
178
179 if (evsel) {
180 if (perf_evsel__init_syscall_tp(evsel, handler))
181 goto out_delete;
182 }
183
184 return evsel;
185
186 out_delete:
187 perf_evsel__delete_priv(evsel);
188 return NULL;
189 }
190
191 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
192 ({ struct syscall_tp *fields = evsel->priv; \
193 fields->name.integer(&fields->name, sample); })
194
195 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
196 ({ struct syscall_tp *fields = evsel->priv; \
197 fields->name.pointer(&fields->name, sample); })
198
199 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
200 void *sys_enter_handler,
201 void *sys_exit_handler)
202 {
203 int ret = -1;
204 struct perf_evsel *sys_enter, *sys_exit;
205
206 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
207 if (sys_enter == NULL)
208 goto out;
209
210 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
211 goto out_delete_sys_enter;
212
213 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
214 if (sys_exit == NULL)
215 goto out_delete_sys_enter;
216
217 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
218 goto out_delete_sys_exit;
219
220 perf_evlist__add(evlist, sys_enter);
221 perf_evlist__add(evlist, sys_exit);
222
223 ret = 0;
224 out:
225 return ret;
226
227 out_delete_sys_exit:
228 perf_evsel__delete_priv(sys_exit);
229 out_delete_sys_enter:
230 perf_evsel__delete_priv(sys_enter);
231 goto out;
232 }
233
234
235 struct syscall_arg {
236 unsigned long val;
237 struct thread *thread;
238 struct trace *trace;
239 void *parm;
240 u8 idx;
241 u8 mask;
242 };
243
244 struct strarray {
245 int offset;
246 int nr_entries;
247 const char **entries;
248 };
249
250 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
251 .nr_entries = ARRAY_SIZE(array), \
252 .entries = array, \
253 }
254
255 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
256 .offset = off, \
257 .nr_entries = ARRAY_SIZE(array), \
258 .entries = array, \
259 }
260
261 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
262 const char *intfmt,
263 struct syscall_arg *arg)
264 {
265 struct strarray *sa = arg->parm;
266 int idx = arg->val - sa->offset;
267
268 if (idx < 0 || idx >= sa->nr_entries)
269 return scnprintf(bf, size, intfmt, arg->val);
270
271 return scnprintf(bf, size, "%s", sa->entries[idx]);
272 }
273
274 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
275 struct syscall_arg *arg)
276 {
277 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
278 }
279
280 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
281
282 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
283 struct syscall_arg *arg)
284 {
285 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
286 }
287
288 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
289
290 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
291 struct syscall_arg *arg);
292
293 #define SCA_FD syscall_arg__scnprintf_fd
294
295 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
296 struct syscall_arg *arg)
297 {
298 int fd = arg->val;
299
300 if (fd == AT_FDCWD)
301 return scnprintf(bf, size, "CWD");
302
303 return syscall_arg__scnprintf_fd(bf, size, arg);
304 }
305
306 #define SCA_FDAT syscall_arg__scnprintf_fd_at
307
308 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
309 struct syscall_arg *arg);
310
311 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
312
313 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
314 struct syscall_arg *arg)
315 {
316 return scnprintf(bf, size, "%#lx", arg->val);
317 }
318
319 #define SCA_HEX syscall_arg__scnprintf_hex
320
321 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
322 struct syscall_arg *arg)
323 {
324 int printed = 0, prot = arg->val;
325
326 if (prot == PROT_NONE)
327 return scnprintf(bf, size, "NONE");
328 #define P_MMAP_PROT(n) \
329 if (prot & PROT_##n) { \
330 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
331 prot &= ~PROT_##n; \
332 }
333
334 P_MMAP_PROT(EXEC);
335 P_MMAP_PROT(READ);
336 P_MMAP_PROT(WRITE);
337 #ifdef PROT_SEM
338 P_MMAP_PROT(SEM);
339 #endif
340 P_MMAP_PROT(GROWSDOWN);
341 P_MMAP_PROT(GROWSUP);
342 #undef P_MMAP_PROT
343
344 if (prot)
345 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
346
347 return printed;
348 }
349
350 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
351
352 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
353 struct syscall_arg *arg)
354 {
355 int printed = 0, flags = arg->val;
356
357 #define P_MMAP_FLAG(n) \
358 if (flags & MAP_##n) { \
359 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
360 flags &= ~MAP_##n; \
361 }
362
363 P_MMAP_FLAG(SHARED);
364 P_MMAP_FLAG(PRIVATE);
365 #ifdef MAP_32BIT
366 P_MMAP_FLAG(32BIT);
367 #endif
368 P_MMAP_FLAG(ANONYMOUS);
369 P_MMAP_FLAG(DENYWRITE);
370 P_MMAP_FLAG(EXECUTABLE);
371 P_MMAP_FLAG(FILE);
372 P_MMAP_FLAG(FIXED);
373 P_MMAP_FLAG(GROWSDOWN);
374 #ifdef MAP_HUGETLB
375 P_MMAP_FLAG(HUGETLB);
376 #endif
377 P_MMAP_FLAG(LOCKED);
378 P_MMAP_FLAG(NONBLOCK);
379 P_MMAP_FLAG(NORESERVE);
380 P_MMAP_FLAG(POPULATE);
381 P_MMAP_FLAG(STACK);
382 #ifdef MAP_UNINITIALIZED
383 P_MMAP_FLAG(UNINITIALIZED);
384 #endif
385 #undef P_MMAP_FLAG
386
387 if (flags)
388 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
389
390 return printed;
391 }
392
393 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
394
395 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
396 struct syscall_arg *arg)
397 {
398 int behavior = arg->val;
399
400 switch (behavior) {
401 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
402 P_MADV_BHV(NORMAL);
403 P_MADV_BHV(RANDOM);
404 P_MADV_BHV(SEQUENTIAL);
405 P_MADV_BHV(WILLNEED);
406 P_MADV_BHV(DONTNEED);
407 P_MADV_BHV(REMOVE);
408 P_MADV_BHV(DONTFORK);
409 P_MADV_BHV(DOFORK);
410 P_MADV_BHV(HWPOISON);
411 #ifdef MADV_SOFT_OFFLINE
412 P_MADV_BHV(SOFT_OFFLINE);
413 #endif
414 P_MADV_BHV(MERGEABLE);
415 P_MADV_BHV(UNMERGEABLE);
416 #ifdef MADV_HUGEPAGE
417 P_MADV_BHV(HUGEPAGE);
418 #endif
419 #ifdef MADV_NOHUGEPAGE
420 P_MADV_BHV(NOHUGEPAGE);
421 #endif
422 #ifdef MADV_DONTDUMP
423 P_MADV_BHV(DONTDUMP);
424 #endif
425 #ifdef MADV_DODUMP
426 P_MADV_BHV(DODUMP);
427 #endif
428 #undef P_MADV_PHV
429 default: break;
430 }
431
432 return scnprintf(bf, size, "%#x", behavior);
433 }
434
435 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
436
437 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
438 struct syscall_arg *arg)
439 {
440 int printed = 0, op = arg->val;
441
442 if (op == 0)
443 return scnprintf(bf, size, "NONE");
444 #define P_CMD(cmd) \
445 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
446 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
447 op &= ~LOCK_##cmd; \
448 }
449
450 P_CMD(SH);
451 P_CMD(EX);
452 P_CMD(NB);
453 P_CMD(UN);
454 P_CMD(MAND);
455 P_CMD(RW);
456 P_CMD(READ);
457 P_CMD(WRITE);
458 #undef P_OP
459
460 if (op)
461 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
462
463 return printed;
464 }
465
466 #define SCA_FLOCK syscall_arg__scnprintf_flock
467
468 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
469 {
470 enum syscall_futex_args {
471 SCF_UADDR = (1 << 0),
472 SCF_OP = (1 << 1),
473 SCF_VAL = (1 << 2),
474 SCF_TIMEOUT = (1 << 3),
475 SCF_UADDR2 = (1 << 4),
476 SCF_VAL3 = (1 << 5),
477 };
478 int op = arg->val;
479 int cmd = op & FUTEX_CMD_MASK;
480 size_t printed = 0;
481
482 switch (cmd) {
483 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
484 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
485 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
486 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
487 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
488 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
489 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
490 P_FUTEX_OP(WAKE_OP); break;
491 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
492 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
493 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
494 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
495 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
496 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
497 default: printed = scnprintf(bf, size, "%#x", cmd); break;
498 }
499
500 if (op & FUTEX_PRIVATE_FLAG)
501 printed += scnprintf(bf + printed, size - printed, "|PRIV");
502
503 if (op & FUTEX_CLOCK_REALTIME)
504 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
505
506 return printed;
507 }
508
509 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
510
511 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
512 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
513
514 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
515 static DEFINE_STRARRAY(itimers);
516
517 static const char *whences[] = { "SET", "CUR", "END",
518 #ifdef SEEK_DATA
519 "DATA",
520 #endif
521 #ifdef SEEK_HOLE
522 "HOLE",
523 #endif
524 };
525 static DEFINE_STRARRAY(whences);
526
527 static const char *fcntl_cmds[] = {
528 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
529 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
530 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
531 "F_GETOWNER_UIDS",
532 };
533 static DEFINE_STRARRAY(fcntl_cmds);
534
535 static const char *rlimit_resources[] = {
536 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
537 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
538 "RTTIME",
539 };
540 static DEFINE_STRARRAY(rlimit_resources);
541
542 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
543 static DEFINE_STRARRAY(sighow);
544
545 static const char *clockid[] = {
546 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
547 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
548 };
549 static DEFINE_STRARRAY(clockid);
550
551 static const char *socket_families[] = {
552 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
553 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
554 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
555 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
556 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
557 "ALG", "NFC", "VSOCK",
558 };
559 static DEFINE_STRARRAY(socket_families);
560
561 #ifndef SOCK_TYPE_MASK
562 #define SOCK_TYPE_MASK 0xf
563 #endif
564
565 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
566 struct syscall_arg *arg)
567 {
568 size_t printed;
569 int type = arg->val,
570 flags = type & ~SOCK_TYPE_MASK;
571
572 type &= SOCK_TYPE_MASK;
573 /*
574 * Can't use a strarray, MIPS may override for ABI reasons.
575 */
576 switch (type) {
577 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
578 P_SK_TYPE(STREAM);
579 P_SK_TYPE(DGRAM);
580 P_SK_TYPE(RAW);
581 P_SK_TYPE(RDM);
582 P_SK_TYPE(SEQPACKET);
583 P_SK_TYPE(DCCP);
584 P_SK_TYPE(PACKET);
585 #undef P_SK_TYPE
586 default:
587 printed = scnprintf(bf, size, "%#x", type);
588 }
589
590 #define P_SK_FLAG(n) \
591 if (flags & SOCK_##n) { \
592 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
593 flags &= ~SOCK_##n; \
594 }
595
596 P_SK_FLAG(CLOEXEC);
597 P_SK_FLAG(NONBLOCK);
598 #undef P_SK_FLAG
599
600 if (flags)
601 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
602
603 return printed;
604 }
605
606 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
607
608 #ifndef MSG_PROBE
609 #define MSG_PROBE 0x10
610 #endif
611 #ifndef MSG_WAITFORONE
612 #define MSG_WAITFORONE 0x10000
613 #endif
614 #ifndef MSG_SENDPAGE_NOTLAST
615 #define MSG_SENDPAGE_NOTLAST 0x20000
616 #endif
617 #ifndef MSG_FASTOPEN
618 #define MSG_FASTOPEN 0x20000000
619 #endif
620
621 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
622 struct syscall_arg *arg)
623 {
624 int printed = 0, flags = arg->val;
625
626 if (flags == 0)
627 return scnprintf(bf, size, "NONE");
628 #define P_MSG_FLAG(n) \
629 if (flags & MSG_##n) { \
630 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
631 flags &= ~MSG_##n; \
632 }
633
634 P_MSG_FLAG(OOB);
635 P_MSG_FLAG(PEEK);
636 P_MSG_FLAG(DONTROUTE);
637 P_MSG_FLAG(TRYHARD);
638 P_MSG_FLAG(CTRUNC);
639 P_MSG_FLAG(PROBE);
640 P_MSG_FLAG(TRUNC);
641 P_MSG_FLAG(DONTWAIT);
642 P_MSG_FLAG(EOR);
643 P_MSG_FLAG(WAITALL);
644 P_MSG_FLAG(FIN);
645 P_MSG_FLAG(SYN);
646 P_MSG_FLAG(CONFIRM);
647 P_MSG_FLAG(RST);
648 P_MSG_FLAG(ERRQUEUE);
649 P_MSG_FLAG(NOSIGNAL);
650 P_MSG_FLAG(MORE);
651 P_MSG_FLAG(WAITFORONE);
652 P_MSG_FLAG(SENDPAGE_NOTLAST);
653 P_MSG_FLAG(FASTOPEN);
654 P_MSG_FLAG(CMSG_CLOEXEC);
655 #undef P_MSG_FLAG
656
657 if (flags)
658 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
659
660 return printed;
661 }
662
663 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
664
665 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
666 struct syscall_arg *arg)
667 {
668 size_t printed = 0;
669 int mode = arg->val;
670
671 if (mode == F_OK) /* 0 */
672 return scnprintf(bf, size, "F");
673 #define P_MODE(n) \
674 if (mode & n##_OK) { \
675 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
676 mode &= ~n##_OK; \
677 }
678
679 P_MODE(R);
680 P_MODE(W);
681 P_MODE(X);
682 #undef P_MODE
683
684 if (mode)
685 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
686
687 return printed;
688 }
689
690 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
691
692 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
693 struct syscall_arg *arg)
694 {
695 int printed = 0, flags = arg->val;
696
697 if (!(flags & O_CREAT))
698 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
699
700 if (flags == 0)
701 return scnprintf(bf, size, "RDONLY");
702 #define P_FLAG(n) \
703 if (flags & O_##n) { \
704 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
705 flags &= ~O_##n; \
706 }
707
708 P_FLAG(APPEND);
709 P_FLAG(ASYNC);
710 P_FLAG(CLOEXEC);
711 P_FLAG(CREAT);
712 P_FLAG(DIRECT);
713 P_FLAG(DIRECTORY);
714 P_FLAG(EXCL);
715 P_FLAG(LARGEFILE);
716 P_FLAG(NOATIME);
717 P_FLAG(NOCTTY);
718 #ifdef O_NONBLOCK
719 P_FLAG(NONBLOCK);
720 #elif O_NDELAY
721 P_FLAG(NDELAY);
722 #endif
723 #ifdef O_PATH
724 P_FLAG(PATH);
725 #endif
726 P_FLAG(RDWR);
727 #ifdef O_DSYNC
728 if ((flags & O_SYNC) == O_SYNC)
729 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
730 else {
731 P_FLAG(DSYNC);
732 }
733 #else
734 P_FLAG(SYNC);
735 #endif
736 P_FLAG(TRUNC);
737 P_FLAG(WRONLY);
738 #undef P_FLAG
739
740 if (flags)
741 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
742
743 return printed;
744 }
745
746 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
747
748 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
749 struct syscall_arg *arg)
750 {
751 int printed = 0, flags = arg->val;
752
753 if (flags == 0)
754 return scnprintf(bf, size, "NONE");
755 #define P_FLAG(n) \
756 if (flags & EFD_##n) { \
757 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
758 flags &= ~EFD_##n; \
759 }
760
761 P_FLAG(SEMAPHORE);
762 P_FLAG(CLOEXEC);
763 P_FLAG(NONBLOCK);
764 #undef P_FLAG
765
766 if (flags)
767 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
768
769 return printed;
770 }
771
772 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
773
774 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
775 struct syscall_arg *arg)
776 {
777 int printed = 0, flags = arg->val;
778
779 #define P_FLAG(n) \
780 if (flags & O_##n) { \
781 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
782 flags &= ~O_##n; \
783 }
784
785 P_FLAG(CLOEXEC);
786 P_FLAG(NONBLOCK);
787 #undef P_FLAG
788
789 if (flags)
790 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
791
792 return printed;
793 }
794
795 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
796
797 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
798 {
799 int sig = arg->val;
800
801 switch (sig) {
802 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
803 P_SIGNUM(HUP);
804 P_SIGNUM(INT);
805 P_SIGNUM(QUIT);
806 P_SIGNUM(ILL);
807 P_SIGNUM(TRAP);
808 P_SIGNUM(ABRT);
809 P_SIGNUM(BUS);
810 P_SIGNUM(FPE);
811 P_SIGNUM(KILL);
812 P_SIGNUM(USR1);
813 P_SIGNUM(SEGV);
814 P_SIGNUM(USR2);
815 P_SIGNUM(PIPE);
816 P_SIGNUM(ALRM);
817 P_SIGNUM(TERM);
818 P_SIGNUM(STKFLT);
819 P_SIGNUM(CHLD);
820 P_SIGNUM(CONT);
821 P_SIGNUM(STOP);
822 P_SIGNUM(TSTP);
823 P_SIGNUM(TTIN);
824 P_SIGNUM(TTOU);
825 P_SIGNUM(URG);
826 P_SIGNUM(XCPU);
827 P_SIGNUM(XFSZ);
828 P_SIGNUM(VTALRM);
829 P_SIGNUM(PROF);
830 P_SIGNUM(WINCH);
831 P_SIGNUM(IO);
832 P_SIGNUM(PWR);
833 P_SIGNUM(SYS);
834 default: break;
835 }
836
837 return scnprintf(bf, size, "%#x", sig);
838 }
839
840 #define SCA_SIGNUM syscall_arg__scnprintf_signum
841
842 #define TCGETS 0x5401
843
844 static const char *tioctls[] = {
845 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
846 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
847 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
848 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
849 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
850 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
851 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
852 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
853 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
854 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
855 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
856 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
857 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
858 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
859 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
860 };
861
862 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
863
864 #define STRARRAY(arg, name, array) \
865 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
866 .arg_parm = { [arg] = &strarray__##array, }
867
868 static struct syscall_fmt {
869 const char *name;
870 const char *alias;
871 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
872 void *arg_parm[6];
873 bool errmsg;
874 bool timeout;
875 bool hexret;
876 } syscall_fmts[] = {
877 { .name = "access", .errmsg = true,
878 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
879 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
880 { .name = "brk", .hexret = true,
881 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
882 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
883 { .name = "close", .errmsg = true,
884 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
885 { .name = "connect", .errmsg = true, },
886 { .name = "dup", .errmsg = true,
887 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
888 { .name = "dup2", .errmsg = true,
889 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
890 { .name = "dup3", .errmsg = true,
891 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
892 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
893 { .name = "eventfd2", .errmsg = true,
894 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
895 { .name = "faccessat", .errmsg = true,
896 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
897 { .name = "fadvise64", .errmsg = true,
898 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
899 { .name = "fallocate", .errmsg = true,
900 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
901 { .name = "fchdir", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
903 { .name = "fchmod", .errmsg = true,
904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
905 { .name = "fchmodat", .errmsg = true,
906 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
907 { .name = "fchown", .errmsg = true,
908 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
909 { .name = "fchownat", .errmsg = true,
910 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
911 { .name = "fcntl", .errmsg = true,
912 .arg_scnprintf = { [0] = SCA_FD, /* fd */
913 [1] = SCA_STRARRAY, /* cmd */ },
914 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
915 { .name = "fdatasync", .errmsg = true,
916 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
917 { .name = "flock", .errmsg = true,
918 .arg_scnprintf = { [0] = SCA_FD, /* fd */
919 [1] = SCA_FLOCK, /* cmd */ }, },
920 { .name = "fsetxattr", .errmsg = true,
921 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
922 { .name = "fstat", .errmsg = true, .alias = "newfstat",
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
925 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
926 { .name = "fstatfs", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 { .name = "fsync", .errmsg = true,
929 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
930 { .name = "ftruncate", .errmsg = true,
931 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
932 { .name = "futex", .errmsg = true,
933 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
934 { .name = "futimesat", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
936 { .name = "getdents", .errmsg = true,
937 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
938 { .name = "getdents64", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
941 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
942 { .name = "ioctl", .errmsg = true,
943 .arg_scnprintf = { [0] = SCA_FD, /* fd */
944 [1] = SCA_STRHEXARRAY, /* cmd */
945 [2] = SCA_HEX, /* arg */ },
946 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
947 { .name = "kill", .errmsg = true,
948 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
949 { .name = "linkat", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
951 { .name = "lseek", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_FD, /* fd */
953 [2] = SCA_STRARRAY, /* whence */ },
954 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
955 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
956 { .name = "madvise", .errmsg = true,
957 .arg_scnprintf = { [0] = SCA_HEX, /* start */
958 [2] = SCA_MADV_BHV, /* behavior */ }, },
959 { .name = "mkdirat", .errmsg = true,
960 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
961 { .name = "mknodat", .errmsg = true,
962 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
963 { .name = "mlock", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965 { .name = "mlockall", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967 { .name = "mmap", .hexret = true,
968 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
969 [2] = SCA_MMAP_PROT, /* prot */
970 [3] = SCA_MMAP_FLAGS, /* flags */
971 [4] = SCA_FD, /* fd */ }, },
972 { .name = "mprotect", .errmsg = true,
973 .arg_scnprintf = { [0] = SCA_HEX, /* start */
974 [2] = SCA_MMAP_PROT, /* prot */ }, },
975 { .name = "mremap", .hexret = true,
976 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
977 [4] = SCA_HEX, /* new_addr */ }, },
978 { .name = "munlock", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
980 { .name = "munmap", .errmsg = true,
981 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
982 { .name = "name_to_handle_at", .errmsg = true,
983 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
984 { .name = "newfstatat", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
986 { .name = "open", .errmsg = true,
987 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
988 { .name = "open_by_handle_at", .errmsg = true,
989 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
990 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
991 { .name = "openat", .errmsg = true,
992 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
993 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
994 { .name = "pipe2", .errmsg = true,
995 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
996 { .name = "poll", .errmsg = true, .timeout = true, },
997 { .name = "ppoll", .errmsg = true, .timeout = true, },
998 { .name = "pread", .errmsg = true, .alias = "pread64",
999 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1000 { .name = "preadv", .errmsg = true, .alias = "pread",
1001 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1002 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1003 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1004 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1005 { .name = "pwritev", .errmsg = true,
1006 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1007 { .name = "read", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1009 { .name = "readlinkat", .errmsg = true,
1010 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1011 { .name = "readv", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013 { .name = "recvfrom", .errmsg = true,
1014 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1015 { .name = "recvmmsg", .errmsg = true,
1016 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1017 { .name = "recvmsg", .errmsg = true,
1018 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1019 { .name = "renameat", .errmsg = true,
1020 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1021 { .name = "rt_sigaction", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1023 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1024 { .name = "rt_sigqueueinfo", .errmsg = true,
1025 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1026 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1027 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1028 { .name = "select", .errmsg = true, .timeout = true, },
1029 { .name = "sendmmsg", .errmsg = true,
1030 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1031 { .name = "sendmsg", .errmsg = true,
1032 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1033 { .name = "sendto", .errmsg = true,
1034 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1035 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1036 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1037 { .name = "shutdown", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 { .name = "socket", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1041 [1] = SCA_SK_TYPE, /* type */ },
1042 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1043 { .name = "socketpair", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1045 [1] = SCA_SK_TYPE, /* type */ },
1046 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1047 { .name = "stat", .errmsg = true, .alias = "newstat", },
1048 { .name = "symlinkat", .errmsg = true,
1049 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1050 { .name = "tgkill", .errmsg = true,
1051 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1052 { .name = "tkill", .errmsg = true,
1053 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1054 { .name = "uname", .errmsg = true, .alias = "newuname", },
1055 { .name = "unlinkat", .errmsg = true,
1056 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1057 { .name = "utimensat", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1059 { .name = "write", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 { .name = "writev", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063 };
1064
1065 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1066 {
1067 const struct syscall_fmt *fmt = fmtp;
1068 return strcmp(name, fmt->name);
1069 }
1070
1071 static struct syscall_fmt *syscall_fmt__find(const char *name)
1072 {
1073 const int nmemb = ARRAY_SIZE(syscall_fmts);
1074 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1075 }
1076
1077 struct syscall {
1078 struct event_format *tp_format;
1079 const char *name;
1080 bool filtered;
1081 struct syscall_fmt *fmt;
1082 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1083 void **arg_parm;
1084 };
1085
1086 static size_t fprintf_duration(unsigned long t, FILE *fp)
1087 {
1088 double duration = (double)t / NSEC_PER_MSEC;
1089 size_t printed = fprintf(fp, "(");
1090
1091 if (duration >= 1.0)
1092 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1093 else if (duration >= 0.01)
1094 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1095 else
1096 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1097 return printed + fprintf(fp, "): ");
1098 }
1099
1100 struct thread_trace {
1101 u64 entry_time;
1102 u64 exit_time;
1103 bool entry_pending;
1104 unsigned long nr_events;
1105 char *entry_str;
1106 double runtime_ms;
1107 struct {
1108 int max;
1109 char **table;
1110 } paths;
1111
1112 struct intlist *syscall_stats;
1113 };
1114
1115 static struct thread_trace *thread_trace__new(void)
1116 {
1117 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1118
1119 if (ttrace)
1120 ttrace->paths.max = -1;
1121
1122 ttrace->syscall_stats = intlist__new(NULL);
1123
1124 return ttrace;
1125 }
1126
1127 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1128 {
1129 struct thread_trace *ttrace;
1130
1131 if (thread == NULL)
1132 goto fail;
1133
1134 if (thread->priv == NULL)
1135 thread->priv = thread_trace__new();
1136
1137 if (thread->priv == NULL)
1138 goto fail;
1139
1140 ttrace = thread->priv;
1141 ++ttrace->nr_events;
1142
1143 return ttrace;
1144 fail:
1145 color_fprintf(fp, PERF_COLOR_RED,
1146 "WARNING: not enough memory, dropping samples!\n");
1147 return NULL;
1148 }
1149
1150 struct trace {
1151 struct perf_tool tool;
1152 struct {
1153 int machine;
1154 int open_id;
1155 } audit;
1156 struct {
1157 int max;
1158 struct syscall *table;
1159 } syscalls;
1160 struct record_opts opts;
1161 struct machine *host;
1162 u64 base_time;
1163 FILE *output;
1164 unsigned long nr_events;
1165 struct strlist *ev_qualifier;
1166 const char *last_vfs_getname;
1167 struct intlist *tid_list;
1168 struct intlist *pid_list;
1169 double duration_filter;
1170 double runtime_ms;
1171 struct {
1172 u64 vfs_getname,
1173 proc_getname;
1174 } stats;
1175 bool not_ev_qualifier;
1176 bool live;
1177 bool full_time;
1178 bool sched;
1179 bool multiple_threads;
1180 bool summary;
1181 bool summary_only;
1182 bool show_comm;
1183 bool show_tool_stats;
1184 };
1185
1186 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1187 {
1188 struct thread_trace *ttrace = thread->priv;
1189
1190 if (fd > ttrace->paths.max) {
1191 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1192
1193 if (npath == NULL)
1194 return -1;
1195
1196 if (ttrace->paths.max != -1) {
1197 memset(npath + ttrace->paths.max + 1, 0,
1198 (fd - ttrace->paths.max) * sizeof(char *));
1199 } else {
1200 memset(npath, 0, (fd + 1) * sizeof(char *));
1201 }
1202
1203 ttrace->paths.table = npath;
1204 ttrace->paths.max = fd;
1205 }
1206
1207 ttrace->paths.table[fd] = strdup(pathname);
1208
1209 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1210 }
1211
1212 static int thread__read_fd_path(struct thread *thread, int fd)
1213 {
1214 char linkname[PATH_MAX], pathname[PATH_MAX];
1215 struct stat st;
1216 int ret;
1217
1218 if (thread->pid_ == thread->tid) {
1219 scnprintf(linkname, sizeof(linkname),
1220 "/proc/%d/fd/%d", thread->pid_, fd);
1221 } else {
1222 scnprintf(linkname, sizeof(linkname),
1223 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1224 }
1225
1226 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1227 return -1;
1228
1229 ret = readlink(linkname, pathname, sizeof(pathname));
1230
1231 if (ret < 0 || ret > st.st_size)
1232 return -1;
1233
1234 pathname[ret] = '\0';
1235 return trace__set_fd_pathname(thread, fd, pathname);
1236 }
1237
1238 static const char *thread__fd_path(struct thread *thread, int fd,
1239 struct trace *trace)
1240 {
1241 struct thread_trace *ttrace = thread->priv;
1242
1243 if (ttrace == NULL)
1244 return NULL;
1245
1246 if (fd < 0)
1247 return NULL;
1248
1249 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1250 if (!trace->live)
1251 return NULL;
1252 ++trace->stats.proc_getname;
1253 if (thread__read_fd_path(thread, fd)) {
1254 return NULL;
1255 }
1256
1257 return ttrace->paths.table[fd];
1258 }
1259
1260 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1261 struct syscall_arg *arg)
1262 {
1263 int fd = arg->val;
1264 size_t printed = scnprintf(bf, size, "%d", fd);
1265 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1266
1267 if (path)
1268 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1269
1270 return printed;
1271 }
1272
1273 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1274 struct syscall_arg *arg)
1275 {
1276 int fd = arg->val;
1277 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1278 struct thread_trace *ttrace = arg->thread->priv;
1279
1280 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1281 zfree(&ttrace->paths.table[fd]);
1282
1283 return printed;
1284 }
1285
1286 static bool trace__filter_duration(struct trace *trace, double t)
1287 {
1288 return t < (trace->duration_filter * NSEC_PER_MSEC);
1289 }
1290
1291 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1292 {
1293 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1294
1295 return fprintf(fp, "%10.3f ", ts);
1296 }
1297
1298 static bool done = false;
1299 static bool interrupted = false;
1300
1301 static void sig_handler(int sig)
1302 {
1303 done = true;
1304 interrupted = sig == SIGINT;
1305 }
1306
1307 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1308 u64 duration, u64 tstamp, FILE *fp)
1309 {
1310 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1311 printed += fprintf_duration(duration, fp);
1312
1313 if (trace->multiple_threads) {
1314 if (trace->show_comm)
1315 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1316 printed += fprintf(fp, "%d ", thread->tid);
1317 }
1318
1319 return printed;
1320 }
1321
1322 static int trace__process_event(struct trace *trace, struct machine *machine,
1323 union perf_event *event, struct perf_sample *sample)
1324 {
1325 int ret = 0;
1326
1327 switch (event->header.type) {
1328 case PERF_RECORD_LOST:
1329 color_fprintf(trace->output, PERF_COLOR_RED,
1330 "LOST %" PRIu64 " events!\n", event->lost.lost);
1331 ret = machine__process_lost_event(machine, event, sample);
1332 default:
1333 ret = machine__process_event(machine, event, sample);
1334 break;
1335 }
1336
1337 return ret;
1338 }
1339
1340 static int trace__tool_process(struct perf_tool *tool,
1341 union perf_event *event,
1342 struct perf_sample *sample,
1343 struct machine *machine)
1344 {
1345 struct trace *trace = container_of(tool, struct trace, tool);
1346 return trace__process_event(trace, machine, event, sample);
1347 }
1348
1349 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1350 {
1351 int err = symbol__init();
1352
1353 if (err)
1354 return err;
1355
1356 trace->host = machine__new_host();
1357 if (trace->host == NULL)
1358 return -ENOMEM;
1359
1360 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1361 evlist->threads, trace__tool_process, false);
1362 if (err)
1363 symbol__exit();
1364
1365 return err;
1366 }
1367
1368 static int syscall__set_arg_fmts(struct syscall *sc)
1369 {
1370 struct format_field *field;
1371 int idx = 0;
1372
1373 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1374 if (sc->arg_scnprintf == NULL)
1375 return -1;
1376
1377 if (sc->fmt)
1378 sc->arg_parm = sc->fmt->arg_parm;
1379
1380 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1381 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1382 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1383 else if (field->flags & FIELD_IS_POINTER)
1384 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1385 ++idx;
1386 }
1387
1388 return 0;
1389 }
1390
1391 static int trace__read_syscall_info(struct trace *trace, int id)
1392 {
1393 char tp_name[128];
1394 struct syscall *sc;
1395 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1396
1397 if (name == NULL)
1398 return -1;
1399
1400 if (id > trace->syscalls.max) {
1401 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1402
1403 if (nsyscalls == NULL)
1404 return -1;
1405
1406 if (trace->syscalls.max != -1) {
1407 memset(nsyscalls + trace->syscalls.max + 1, 0,
1408 (id - trace->syscalls.max) * sizeof(*sc));
1409 } else {
1410 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1411 }
1412
1413 trace->syscalls.table = nsyscalls;
1414 trace->syscalls.max = id;
1415 }
1416
1417 sc = trace->syscalls.table + id;
1418 sc->name = name;
1419
1420 if (trace->ev_qualifier) {
1421 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1422
1423 if (!(in ^ trace->not_ev_qualifier)) {
1424 sc->filtered = true;
1425 /*
1426 * No need to do read tracepoint information since this will be
1427 * filtered out.
1428 */
1429 return 0;
1430 }
1431 }
1432
1433 sc->fmt = syscall_fmt__find(sc->name);
1434
1435 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1436 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1437
1438 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1439 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1440 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1441 }
1442
1443 if (sc->tp_format == NULL)
1444 return -1;
1445
1446 return syscall__set_arg_fmts(sc);
1447 }
1448
1449 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1450 unsigned long *args, struct trace *trace,
1451 struct thread *thread)
1452 {
1453 size_t printed = 0;
1454
1455 if (sc->tp_format != NULL) {
1456 struct format_field *field;
1457 u8 bit = 1;
1458 struct syscall_arg arg = {
1459 .idx = 0,
1460 .mask = 0,
1461 .trace = trace,
1462 .thread = thread,
1463 };
1464
1465 for (field = sc->tp_format->format.fields->next; field;
1466 field = field->next, ++arg.idx, bit <<= 1) {
1467 if (arg.mask & bit)
1468 continue;
1469 /*
1470 * Suppress this argument if its value is zero and
1471 * and we don't have a string associated in an
1472 * strarray for it.
1473 */
1474 if (args[arg.idx] == 0 &&
1475 !(sc->arg_scnprintf &&
1476 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1477 sc->arg_parm[arg.idx]))
1478 continue;
1479
1480 printed += scnprintf(bf + printed, size - printed,
1481 "%s%s: ", printed ? ", " : "", field->name);
1482 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1483 arg.val = args[arg.idx];
1484 if (sc->arg_parm)
1485 arg.parm = sc->arg_parm[arg.idx];
1486 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1487 size - printed, &arg);
1488 } else {
1489 printed += scnprintf(bf + printed, size - printed,
1490 "%ld", args[arg.idx]);
1491 }
1492 }
1493 } else {
1494 int i = 0;
1495
1496 while (i < 6) {
1497 printed += scnprintf(bf + printed, size - printed,
1498 "%sarg%d: %ld",
1499 printed ? ", " : "", i, args[i]);
1500 ++i;
1501 }
1502 }
1503
1504 return printed;
1505 }
1506
1507 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1508 struct perf_sample *sample);
1509
1510 static struct syscall *trace__syscall_info(struct trace *trace,
1511 struct perf_evsel *evsel, int id)
1512 {
1513
1514 if (id < 0) {
1515
1516 /*
1517 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1518 * before that, leaving at a higher verbosity level till that is
1519 * explained. Reproduced with plain ftrace with:
1520 *
1521 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1522 * grep "NR -1 " /t/trace_pipe
1523 *
1524 * After generating some load on the machine.
1525 */
1526 if (verbose > 1) {
1527 static u64 n;
1528 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1529 id, perf_evsel__name(evsel), ++n);
1530 }
1531 return NULL;
1532 }
1533
1534 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1535 trace__read_syscall_info(trace, id))
1536 goto out_cant_read;
1537
1538 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1539 goto out_cant_read;
1540
1541 return &trace->syscalls.table[id];
1542
1543 out_cant_read:
1544 if (verbose) {
1545 fprintf(trace->output, "Problems reading syscall %d", id);
1546 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1547 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1548 fputs(" information\n", trace->output);
1549 }
1550 return NULL;
1551 }
1552
1553 static void thread__update_stats(struct thread_trace *ttrace,
1554 int id, struct perf_sample *sample)
1555 {
1556 struct int_node *inode;
1557 struct stats *stats;
1558 u64 duration = 0;
1559
1560 inode = intlist__findnew(ttrace->syscall_stats, id);
1561 if (inode == NULL)
1562 return;
1563
1564 stats = inode->priv;
1565 if (stats == NULL) {
1566 stats = malloc(sizeof(struct stats));
1567 if (stats == NULL)
1568 return;
1569 init_stats(stats);
1570 inode->priv = stats;
1571 }
1572
1573 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1574 duration = sample->time - ttrace->entry_time;
1575
1576 update_stats(stats, duration);
1577 }
1578
1579 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1580 struct perf_sample *sample)
1581 {
1582 char *msg;
1583 void *args;
1584 size_t printed = 0;
1585 struct thread *thread;
1586 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1587 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1588 struct thread_trace *ttrace;
1589
1590 if (sc == NULL)
1591 return -1;
1592
1593 if (sc->filtered)
1594 return 0;
1595
1596 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1597 ttrace = thread__trace(thread, trace->output);
1598 if (ttrace == NULL)
1599 return -1;
1600
1601 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1602 ttrace = thread->priv;
1603
1604 if (ttrace->entry_str == NULL) {
1605 ttrace->entry_str = malloc(1024);
1606 if (!ttrace->entry_str)
1607 return -1;
1608 }
1609
1610 ttrace->entry_time = sample->time;
1611 msg = ttrace->entry_str;
1612 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1613
1614 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1615 args, trace, thread);
1616
1617 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1618 if (!trace->duration_filter && !trace->summary_only) {
1619 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1620 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1621 }
1622 } else
1623 ttrace->entry_pending = true;
1624
1625 return 0;
1626 }
1627
1628 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1629 struct perf_sample *sample)
1630 {
1631 int ret;
1632 u64 duration = 0;
1633 struct thread *thread;
1634 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1635 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1636 struct thread_trace *ttrace;
1637
1638 if (sc == NULL)
1639 return -1;
1640
1641 if (sc->filtered)
1642 return 0;
1643
1644 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1645 ttrace = thread__trace(thread, trace->output);
1646 if (ttrace == NULL)
1647 return -1;
1648
1649 if (trace->summary)
1650 thread__update_stats(ttrace, id, sample);
1651
1652 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1653
1654 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1655 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1656 trace->last_vfs_getname = NULL;
1657 ++trace->stats.vfs_getname;
1658 }
1659
1660 ttrace = thread->priv;
1661
1662 ttrace->exit_time = sample->time;
1663
1664 if (ttrace->entry_time) {
1665 duration = sample->time - ttrace->entry_time;
1666 if (trace__filter_duration(trace, duration))
1667 goto out;
1668 } else if (trace->duration_filter)
1669 goto out;
1670
1671 if (trace->summary_only)
1672 goto out;
1673
1674 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1675
1676 if (ttrace->entry_pending) {
1677 fprintf(trace->output, "%-70s", ttrace->entry_str);
1678 } else {
1679 fprintf(trace->output, " ... [");
1680 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1681 fprintf(trace->output, "]: %s()", sc->name);
1682 }
1683
1684 if (sc->fmt == NULL) {
1685 signed_print:
1686 fprintf(trace->output, ") = %d", ret);
1687 } else if (ret < 0 && sc->fmt->errmsg) {
1688 char bf[256];
1689 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1690 *e = audit_errno_to_name(-ret);
1691
1692 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1693 } else if (ret == 0 && sc->fmt->timeout)
1694 fprintf(trace->output, ") = 0 Timeout");
1695 else if (sc->fmt->hexret)
1696 fprintf(trace->output, ") = %#x", ret);
1697 else
1698 goto signed_print;
1699
1700 fputc('\n', trace->output);
1701 out:
1702 ttrace->entry_pending = false;
1703
1704 return 0;
1705 }
1706
1707 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1708 struct perf_sample *sample)
1709 {
1710 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1711 return 0;
1712 }
1713
1714 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1715 struct perf_sample *sample)
1716 {
1717 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1718 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1719 struct thread *thread = machine__findnew_thread(trace->host,
1720 sample->pid,
1721 sample->tid);
1722 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1723
1724 if (ttrace == NULL)
1725 goto out_dump;
1726
1727 ttrace->runtime_ms += runtime_ms;
1728 trace->runtime_ms += runtime_ms;
1729 return 0;
1730
1731 out_dump:
1732 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1733 evsel->name,
1734 perf_evsel__strval(evsel, sample, "comm"),
1735 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1736 runtime,
1737 perf_evsel__intval(evsel, sample, "vruntime"));
1738 return 0;
1739 }
1740
1741 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1742 {
1743 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1744 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1745 return false;
1746
1747 if (trace->pid_list || trace->tid_list)
1748 return true;
1749
1750 return false;
1751 }
1752
1753 static int trace__process_sample(struct perf_tool *tool,
1754 union perf_event *event __maybe_unused,
1755 struct perf_sample *sample,
1756 struct perf_evsel *evsel,
1757 struct machine *machine __maybe_unused)
1758 {
1759 struct trace *trace = container_of(tool, struct trace, tool);
1760 int err = 0;
1761
1762 tracepoint_handler handler = evsel->handler;
1763
1764 if (skip_sample(trace, sample))
1765 return 0;
1766
1767 if (!trace->full_time && trace->base_time == 0)
1768 trace->base_time = sample->time;
1769
1770 if (handler) {
1771 ++trace->nr_events;
1772 handler(trace, evsel, sample);
1773 }
1774
1775 return err;
1776 }
1777
1778 static int parse_target_str(struct trace *trace)
1779 {
1780 if (trace->opts.target.pid) {
1781 trace->pid_list = intlist__new(trace->opts.target.pid);
1782 if (trace->pid_list == NULL) {
1783 pr_err("Error parsing process id string\n");
1784 return -EINVAL;
1785 }
1786 }
1787
1788 if (trace->opts.target.tid) {
1789 trace->tid_list = intlist__new(trace->opts.target.tid);
1790 if (trace->tid_list == NULL) {
1791 pr_err("Error parsing thread id string\n");
1792 return -EINVAL;
1793 }
1794 }
1795
1796 return 0;
1797 }
1798
1799 static int trace__record(int argc, const char **argv)
1800 {
1801 unsigned int rec_argc, i, j;
1802 const char **rec_argv;
1803 const char * const record_args[] = {
1804 "record",
1805 "-R",
1806 "-m", "1024",
1807 "-c", "1",
1808 "-e",
1809 };
1810
1811 /* +1 is for the event string below */
1812 rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1813 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1814
1815 if (rec_argv == NULL)
1816 return -ENOMEM;
1817
1818 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1819 rec_argv[i] = record_args[i];
1820
1821 /* event string may be different for older kernels - e.g., RHEL6 */
1822 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1823 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1824 else if (is_valid_tracepoint("syscalls:sys_enter"))
1825 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1826 else {
1827 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1828 return -1;
1829 }
1830 i++;
1831
1832 for (j = 0; j < (unsigned int)argc; j++, i++)
1833 rec_argv[i] = argv[j];
1834
1835 return cmd_record(i, rec_argv, NULL);
1836 }
1837
1838 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1839
1840 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1841 {
1842 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1843 if (evsel == NULL)
1844 return;
1845
1846 if (perf_evsel__field(evsel, "pathname") == NULL) {
1847 perf_evsel__delete(evsel);
1848 return;
1849 }
1850
1851 evsel->handler = trace__vfs_getname;
1852 perf_evlist__add(evlist, evsel);
1853 }
1854
1855 static int trace__run(struct trace *trace, int argc, const char **argv)
1856 {
1857 struct perf_evlist *evlist = perf_evlist__new();
1858 struct perf_evsel *evsel;
1859 int err = -1, i;
1860 unsigned long before;
1861 const bool forks = argc > 0;
1862
1863 trace->live = true;
1864
1865 if (evlist == NULL) {
1866 fprintf(trace->output, "Not enough memory to run!\n");
1867 goto out;
1868 }
1869
1870 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1871 goto out_error_tp;
1872
1873 perf_evlist__add_vfs_getname(evlist);
1874
1875 if (trace->sched &&
1876 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1877 trace__sched_stat_runtime))
1878 goto out_error_tp;
1879
1880 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1881 if (err < 0) {
1882 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1883 goto out_delete_evlist;
1884 }
1885
1886 err = trace__symbols_init(trace, evlist);
1887 if (err < 0) {
1888 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1889 goto out_delete_evlist;
1890 }
1891
1892 perf_evlist__config(evlist, &trace->opts);
1893
1894 signal(SIGCHLD, sig_handler);
1895 signal(SIGINT, sig_handler);
1896
1897 if (forks) {
1898 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1899 argv, false, NULL);
1900 if (err < 0) {
1901 fprintf(trace->output, "Couldn't run the workload!\n");
1902 goto out_delete_evlist;
1903 }
1904 }
1905
1906 err = perf_evlist__open(evlist);
1907 if (err < 0)
1908 goto out_error_open;
1909
1910 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1911 if (err < 0) {
1912 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1913 goto out_delete_evlist;
1914 }
1915
1916 perf_evlist__enable(evlist);
1917
1918 if (forks)
1919 perf_evlist__start_workload(evlist);
1920
1921 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1922 again:
1923 before = trace->nr_events;
1924
1925 for (i = 0; i < evlist->nr_mmaps; i++) {
1926 union perf_event *event;
1927
1928 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1929 const u32 type = event->header.type;
1930 tracepoint_handler handler;
1931 struct perf_sample sample;
1932
1933 ++trace->nr_events;
1934
1935 err = perf_evlist__parse_sample(evlist, event, &sample);
1936 if (err) {
1937 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1938 goto next_event;
1939 }
1940
1941 if (!trace->full_time && trace->base_time == 0)
1942 trace->base_time = sample.time;
1943
1944 if (type != PERF_RECORD_SAMPLE) {
1945 trace__process_event(trace, trace->host, event, &sample);
1946 continue;
1947 }
1948
1949 evsel = perf_evlist__id2evsel(evlist, sample.id);
1950 if (evsel == NULL) {
1951 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1952 goto next_event;
1953 }
1954
1955 if (sample.raw_data == NULL) {
1956 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1957 perf_evsel__name(evsel), sample.tid,
1958 sample.cpu, sample.raw_size);
1959 goto next_event;
1960 }
1961
1962 handler = evsel->handler;
1963 handler(trace, evsel, &sample);
1964 next_event:
1965 perf_evlist__mmap_consume(evlist, i);
1966
1967 if (interrupted)
1968 goto out_disable;
1969 }
1970 }
1971
1972 if (trace->nr_events == before) {
1973 int timeout = done ? 100 : -1;
1974
1975 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1976 goto again;
1977 } else {
1978 goto again;
1979 }
1980
1981 out_disable:
1982 perf_evlist__disable(evlist);
1983
1984 if (!err) {
1985 if (trace->summary)
1986 trace__fprintf_thread_summary(trace, trace->output);
1987
1988 if (trace->show_tool_stats) {
1989 fprintf(trace->output, "Stats:\n "
1990 " vfs_getname : %" PRIu64 "\n"
1991 " proc_getname: %" PRIu64 "\n",
1992 trace->stats.vfs_getname,
1993 trace->stats.proc_getname);
1994 }
1995 }
1996
1997 out_delete_evlist:
1998 perf_evlist__delete(evlist);
1999 out:
2000 trace->live = false;
2001 return err;
2002 {
2003 char errbuf[BUFSIZ];
2004
2005 out_error_tp:
2006 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2007 goto out_error;
2008
2009 out_error_open:
2010 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2011
2012 out_error:
2013 fprintf(trace->output, "%s\n", errbuf);
2014 goto out_delete_evlist;
2015 }
2016 }
2017
2018 static int trace__replay(struct trace *trace)
2019 {
2020 const struct perf_evsel_str_handler handlers[] = {
2021 { "probe:vfs_getname", trace__vfs_getname, },
2022 };
2023 struct perf_data_file file = {
2024 .path = input_name,
2025 .mode = PERF_DATA_MODE_READ,
2026 };
2027 struct perf_session *session;
2028 struct perf_evsel *evsel;
2029 int err = -1;
2030
2031 trace->tool.sample = trace__process_sample;
2032 trace->tool.mmap = perf_event__process_mmap;
2033 trace->tool.mmap2 = perf_event__process_mmap2;
2034 trace->tool.comm = perf_event__process_comm;
2035 trace->tool.exit = perf_event__process_exit;
2036 trace->tool.fork = perf_event__process_fork;
2037 trace->tool.attr = perf_event__process_attr;
2038 trace->tool.tracing_data = perf_event__process_tracing_data;
2039 trace->tool.build_id = perf_event__process_build_id;
2040
2041 trace->tool.ordered_samples = true;
2042 trace->tool.ordering_requires_timestamps = true;
2043
2044 /* add tid to output */
2045 trace->multiple_threads = true;
2046
2047 if (symbol__init() < 0)
2048 return -1;
2049
2050 session = perf_session__new(&file, false, &trace->tool);
2051 if (session == NULL)
2052 return -ENOMEM;
2053
2054 trace->host = &session->machines.host;
2055
2056 err = perf_session__set_tracepoints_handlers(session, handlers);
2057 if (err)
2058 goto out;
2059
2060 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2061 "raw_syscalls:sys_enter");
2062 /* older kernels have syscalls tp versus raw_syscalls */
2063 if (evsel == NULL)
2064 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2065 "syscalls:sys_enter");
2066 if (evsel == NULL) {
2067 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2068 goto out;
2069 }
2070
2071 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2072 perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2073 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2074 goto out;
2075 }
2076
2077 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2078 "raw_syscalls:sys_exit");
2079 if (evsel == NULL)
2080 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2081 "syscalls:sys_exit");
2082 if (evsel == NULL) {
2083 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2084 goto out;
2085 }
2086
2087 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2088 perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2089 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2090 goto out;
2091 }
2092
2093 err = parse_target_str(trace);
2094 if (err != 0)
2095 goto out;
2096
2097 setup_pager();
2098
2099 err = perf_session__process_events(session, &trace->tool);
2100 if (err)
2101 pr_err("Failed to process events, error %d", err);
2102
2103 else if (trace->summary)
2104 trace__fprintf_thread_summary(trace, trace->output);
2105
2106 out:
2107 perf_session__delete(session);
2108
2109 return err;
2110 }
2111
2112 static size_t trace__fprintf_threads_header(FILE *fp)
2113 {
2114 size_t printed;
2115
2116 printed = fprintf(fp, "\n Summary of events:\n\n");
2117
2118 return printed;
2119 }
2120
2121 static size_t thread__dump_stats(struct thread_trace *ttrace,
2122 struct trace *trace, FILE *fp)
2123 {
2124 struct stats *stats;
2125 size_t printed = 0;
2126 struct syscall *sc;
2127 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2128
2129 if (inode == NULL)
2130 return 0;
2131
2132 printed += fprintf(fp, "\n");
2133
2134 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2135 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2136 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2137
2138 /* each int_node is a syscall */
2139 while (inode) {
2140 stats = inode->priv;
2141 if (stats) {
2142 double min = (double)(stats->min) / NSEC_PER_MSEC;
2143 double max = (double)(stats->max) / NSEC_PER_MSEC;
2144 double avg = avg_stats(stats);
2145 double pct;
2146 u64 n = (u64) stats->n;
2147
2148 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2149 avg /= NSEC_PER_MSEC;
2150
2151 sc = &trace->syscalls.table[inode->i];
2152 printed += fprintf(fp, " %-15s", sc->name);
2153 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2154 n, min, avg);
2155 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2156 }
2157
2158 inode = intlist__next(inode);
2159 }
2160
2161 printed += fprintf(fp, "\n\n");
2162
2163 return printed;
2164 }
2165
2166 /* struct used to pass data to per-thread function */
2167 struct summary_data {
2168 FILE *fp;
2169 struct trace *trace;
2170 size_t printed;
2171 };
2172
2173 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2174 {
2175 struct summary_data *data = priv;
2176 FILE *fp = data->fp;
2177 size_t printed = data->printed;
2178 struct trace *trace = data->trace;
2179 struct thread_trace *ttrace = thread->priv;
2180 double ratio;
2181
2182 if (ttrace == NULL)
2183 return 0;
2184
2185 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2186
2187 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2188 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2189 printed += fprintf(fp, "%.1f%%", ratio);
2190 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2191 printed += thread__dump_stats(ttrace, trace, fp);
2192
2193 data->printed += printed;
2194
2195 return 0;
2196 }
2197
2198 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2199 {
2200 struct summary_data data = {
2201 .fp = fp,
2202 .trace = trace
2203 };
2204 data.printed = trace__fprintf_threads_header(fp);
2205
2206 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2207
2208 return data.printed;
2209 }
2210
2211 static int trace__set_duration(const struct option *opt, const char *str,
2212 int unset __maybe_unused)
2213 {
2214 struct trace *trace = opt->value;
2215
2216 trace->duration_filter = atof(str);
2217 return 0;
2218 }
2219
2220 static int trace__open_output(struct trace *trace, const char *filename)
2221 {
2222 struct stat st;
2223
2224 if (!stat(filename, &st) && st.st_size) {
2225 char oldname[PATH_MAX];
2226
2227 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2228 unlink(oldname);
2229 rename(filename, oldname);
2230 }
2231
2232 trace->output = fopen(filename, "w");
2233
2234 return trace->output == NULL ? -errno : 0;
2235 }
2236
2237 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2238 {
2239 const char * const trace_usage[] = {
2240 "perf trace [<options>] [<command>]",
2241 "perf trace [<options>] -- <command> [<options>]",
2242 "perf trace record [<options>] [<command>]",
2243 "perf trace record [<options>] -- <command> [<options>]",
2244 NULL
2245 };
2246 struct trace trace = {
2247 .audit = {
2248 .machine = audit_detect_machine(),
2249 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2250 },
2251 .syscalls = {
2252 . max = -1,
2253 },
2254 .opts = {
2255 .target = {
2256 .uid = UINT_MAX,
2257 .uses_mmap = true,
2258 },
2259 .user_freq = UINT_MAX,
2260 .user_interval = ULLONG_MAX,
2261 .no_buffering = true,
2262 .mmap_pages = 1024,
2263 },
2264 .output = stdout,
2265 .show_comm = true,
2266 };
2267 const char *output_name = NULL;
2268 const char *ev_qualifier_str = NULL;
2269 const struct option trace_options[] = {
2270 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2271 "show the thread COMM next to its id"),
2272 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2273 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2274 "list of events to trace"),
2275 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2276 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2277 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2278 "trace events on existing process id"),
2279 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2280 "trace events on existing thread id"),
2281 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2282 "system-wide collection from all CPUs"),
2283 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2284 "list of cpus to monitor"),
2285 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2286 "child tasks do not inherit counters"),
2287 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2288 "number of mmap data pages",
2289 perf_evlist__parse_mmap_pages),
2290 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2291 "user to profile"),
2292 OPT_CALLBACK(0, "duration", &trace, "float",
2293 "show only events with duration > N.M ms",
2294 trace__set_duration),
2295 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2296 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2297 OPT_BOOLEAN('T', "time", &trace.full_time,
2298 "Show full timestamp, not time relative to first start"),
2299 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2300 "Show only syscall summary with statistics"),
2301 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2302 "Show all syscalls and summary with statistics"),
2303 OPT_END()
2304 };
2305 int err;
2306 char bf[BUFSIZ];
2307
2308 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2309 return trace__record(argc-2, &argv[2]);
2310
2311 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2312
2313 /* summary_only implies summary option, but don't overwrite summary if set */
2314 if (trace.summary_only)
2315 trace.summary = trace.summary_only;
2316
2317 if (output_name != NULL) {
2318 err = trace__open_output(&trace, output_name);
2319 if (err < 0) {
2320 perror("failed to create output file");
2321 goto out;
2322 }
2323 }
2324
2325 if (ev_qualifier_str != NULL) {
2326 const char *s = ev_qualifier_str;
2327
2328 trace.not_ev_qualifier = *s == '!';
2329 if (trace.not_ev_qualifier)
2330 ++s;
2331 trace.ev_qualifier = strlist__new(true, s);
2332 if (trace.ev_qualifier == NULL) {
2333 fputs("Not enough memory to parse event qualifier",
2334 trace.output);
2335 err = -ENOMEM;
2336 goto out_close;
2337 }
2338 }
2339
2340 err = target__validate(&trace.opts.target);
2341 if (err) {
2342 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2343 fprintf(trace.output, "%s", bf);
2344 goto out_close;
2345 }
2346
2347 err = target__parse_uid(&trace.opts.target);
2348 if (err) {
2349 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2350 fprintf(trace.output, "%s", bf);
2351 goto out_close;
2352 }
2353
2354 if (!argc && target__none(&trace.opts.target))
2355 trace.opts.target.system_wide = true;
2356
2357 if (input_name)
2358 err = trace__replay(&trace);
2359 else
2360 err = trace__run(&trace, argc, argv);
2361
2362 out_close:
2363 if (output_name != NULL)
2364 fclose(trace.output);
2365 out:
2366 return err;
2367 }
This page took 0.118863 seconds and 5 git commands to generate.