perf trace: Don't set the base timestamp using events without PERF_SAMPLE_TIME
[deliverable/linux.git] / tools / perf / builtin-trace.c
1 /*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37
38 #include <libaudit.h>
39 #include <stdlib.h>
40 #include <sys/mman.h>
41 #include <linux/futex.h>
42 #include <linux/err.h>
43 #include <linux/seccomp.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <sys/ptrace.h>
47 #include <linux/random.h>
48
49 /* For older distros: */
50 #ifndef MAP_STACK
51 # define MAP_STACK 0x20000
52 #endif
53
54 #ifndef MADV_HWPOISON
55 # define MADV_HWPOISON 100
56
57 #endif
58
59 #ifndef MADV_MERGEABLE
60 # define MADV_MERGEABLE 12
61 #endif
62
63 #ifndef MADV_UNMERGEABLE
64 # define MADV_UNMERGEABLE 13
65 #endif
66
67 #ifndef EFD_SEMAPHORE
68 # define EFD_SEMAPHORE 1
69 #endif
70
71 #ifndef EFD_NONBLOCK
72 # define EFD_NONBLOCK 00004000
73 #endif
74
75 #ifndef EFD_CLOEXEC
76 # define EFD_CLOEXEC 02000000
77 #endif
78
79 #ifndef O_CLOEXEC
80 # define O_CLOEXEC 02000000
81 #endif
82
83 #ifndef SOCK_DCCP
84 # define SOCK_DCCP 6
85 #endif
86
87 #ifndef SOCK_CLOEXEC
88 # define SOCK_CLOEXEC 02000000
89 #endif
90
91 #ifndef SOCK_NONBLOCK
92 # define SOCK_NONBLOCK 00004000
93 #endif
94
95 #ifndef MSG_CMSG_CLOEXEC
96 # define MSG_CMSG_CLOEXEC 0x40000000
97 #endif
98
99 #ifndef PERF_FLAG_FD_NO_GROUP
100 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
101 #endif
102
103 #ifndef PERF_FLAG_FD_OUTPUT
104 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
105 #endif
106
107 #ifndef PERF_FLAG_PID_CGROUP
108 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
109 #endif
110
111 #ifndef PERF_FLAG_FD_CLOEXEC
112 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
113 #endif
114
115
116 struct tp_field {
117 int offset;
118 union {
119 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
120 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
121 };
122 };
123
124 #define TP_UINT_FIELD(bits) \
125 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
126 { \
127 u##bits value; \
128 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
129 return value; \
130 }
131
132 TP_UINT_FIELD(8);
133 TP_UINT_FIELD(16);
134 TP_UINT_FIELD(32);
135 TP_UINT_FIELD(64);
136
137 #define TP_UINT_FIELD__SWAPPED(bits) \
138 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
139 { \
140 u##bits value; \
141 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
142 return bswap_##bits(value);\
143 }
144
145 TP_UINT_FIELD__SWAPPED(16);
146 TP_UINT_FIELD__SWAPPED(32);
147 TP_UINT_FIELD__SWAPPED(64);
148
149 static int tp_field__init_uint(struct tp_field *field,
150 struct format_field *format_field,
151 bool needs_swap)
152 {
153 field->offset = format_field->offset;
154
155 switch (format_field->size) {
156 case 1:
157 field->integer = tp_field__u8;
158 break;
159 case 2:
160 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
161 break;
162 case 4:
163 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
164 break;
165 case 8:
166 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
167 break;
168 default:
169 return -1;
170 }
171
172 return 0;
173 }
174
175 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
176 {
177 return sample->raw_data + field->offset;
178 }
179
180 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
181 {
182 field->offset = format_field->offset;
183 field->pointer = tp_field__ptr;
184 return 0;
185 }
186
187 struct syscall_tp {
188 struct tp_field id;
189 union {
190 struct tp_field args, ret;
191 };
192 };
193
194 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
195 struct tp_field *field,
196 const char *name)
197 {
198 struct format_field *format_field = perf_evsel__field(evsel, name);
199
200 if (format_field == NULL)
201 return -1;
202
203 return tp_field__init_uint(field, format_field, evsel->needs_swap);
204 }
205
206 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
207 ({ struct syscall_tp *sc = evsel->priv;\
208 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
209
210 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
211 struct tp_field *field,
212 const char *name)
213 {
214 struct format_field *format_field = perf_evsel__field(evsel, name);
215
216 if (format_field == NULL)
217 return -1;
218
219 return tp_field__init_ptr(field, format_field);
220 }
221
222 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
223 ({ struct syscall_tp *sc = evsel->priv;\
224 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
225
226 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
227 {
228 zfree(&evsel->priv);
229 perf_evsel__delete(evsel);
230 }
231
232 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
233 {
234 evsel->priv = malloc(sizeof(struct syscall_tp));
235 if (evsel->priv != NULL) {
236 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
237 goto out_delete;
238
239 evsel->handler = handler;
240 return 0;
241 }
242
243 return -ENOMEM;
244
245 out_delete:
246 zfree(&evsel->priv);
247 return -ENOENT;
248 }
249
250 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
251 {
252 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
253
254 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
255 if (IS_ERR(evsel))
256 evsel = perf_evsel__newtp("syscalls", direction);
257
258 if (IS_ERR(evsel))
259 return NULL;
260
261 if (perf_evsel__init_syscall_tp(evsel, handler))
262 goto out_delete;
263
264 return evsel;
265
266 out_delete:
267 perf_evsel__delete_priv(evsel);
268 return NULL;
269 }
270
271 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
272 ({ struct syscall_tp *fields = evsel->priv; \
273 fields->name.integer(&fields->name, sample); })
274
275 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
276 ({ struct syscall_tp *fields = evsel->priv; \
277 fields->name.pointer(&fields->name, sample); })
278
279 struct syscall_arg {
280 unsigned long val;
281 struct thread *thread;
282 struct trace *trace;
283 void *parm;
284 u8 idx;
285 u8 mask;
286 };
287
288 struct strarray {
289 int offset;
290 int nr_entries;
291 const char **entries;
292 };
293
294 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
295 .nr_entries = ARRAY_SIZE(array), \
296 .entries = array, \
297 }
298
299 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
300 .offset = off, \
301 .nr_entries = ARRAY_SIZE(array), \
302 .entries = array, \
303 }
304
305 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
306 const char *intfmt,
307 struct syscall_arg *arg)
308 {
309 struct strarray *sa = arg->parm;
310 int idx = arg->val - sa->offset;
311
312 if (idx < 0 || idx >= sa->nr_entries)
313 return scnprintf(bf, size, intfmt, arg->val);
314
315 return scnprintf(bf, size, "%s", sa->entries[idx]);
316 }
317
318 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
319 struct syscall_arg *arg)
320 {
321 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
322 }
323
324 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
325
326 #if defined(__i386__) || defined(__x86_64__)
327 /*
328 * FIXME: Make this available to all arches as soon as the ioctl beautifier
329 * gets rewritten to support all arches.
330 */
331 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
332 struct syscall_arg *arg)
333 {
334 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
335 }
336
337 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
338 #endif /* defined(__i386__) || defined(__x86_64__) */
339
340 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
341 struct syscall_arg *arg);
342
343 #define SCA_FD syscall_arg__scnprintf_fd
344
345 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
346 struct syscall_arg *arg)
347 {
348 int fd = arg->val;
349
350 if (fd == AT_FDCWD)
351 return scnprintf(bf, size, "CWD");
352
353 return syscall_arg__scnprintf_fd(bf, size, arg);
354 }
355
356 #define SCA_FDAT syscall_arg__scnprintf_fd_at
357
358 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
359 struct syscall_arg *arg);
360
361 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
362
363 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
364 struct syscall_arg *arg)
365 {
366 return scnprintf(bf, size, "%#lx", arg->val);
367 }
368
369 #define SCA_HEX syscall_arg__scnprintf_hex
370
371 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
372 struct syscall_arg *arg)
373 {
374 return scnprintf(bf, size, "%d", arg->val);
375 }
376
377 #define SCA_INT syscall_arg__scnprintf_int
378
379 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
380 struct syscall_arg *arg)
381 {
382 int printed = 0, prot = arg->val;
383
384 if (prot == PROT_NONE)
385 return scnprintf(bf, size, "NONE");
386 #define P_MMAP_PROT(n) \
387 if (prot & PROT_##n) { \
388 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
389 prot &= ~PROT_##n; \
390 }
391
392 P_MMAP_PROT(EXEC);
393 P_MMAP_PROT(READ);
394 P_MMAP_PROT(WRITE);
395 #ifdef PROT_SEM
396 P_MMAP_PROT(SEM);
397 #endif
398 P_MMAP_PROT(GROWSDOWN);
399 P_MMAP_PROT(GROWSUP);
400 #undef P_MMAP_PROT
401
402 if (prot)
403 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
404
405 return printed;
406 }
407
408 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
409
410 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
411 struct syscall_arg *arg)
412 {
413 int printed = 0, flags = arg->val;
414
415 #define P_MMAP_FLAG(n) \
416 if (flags & MAP_##n) { \
417 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
418 flags &= ~MAP_##n; \
419 }
420
421 P_MMAP_FLAG(SHARED);
422 P_MMAP_FLAG(PRIVATE);
423 #ifdef MAP_32BIT
424 P_MMAP_FLAG(32BIT);
425 #endif
426 P_MMAP_FLAG(ANONYMOUS);
427 P_MMAP_FLAG(DENYWRITE);
428 P_MMAP_FLAG(EXECUTABLE);
429 P_MMAP_FLAG(FILE);
430 P_MMAP_FLAG(FIXED);
431 P_MMAP_FLAG(GROWSDOWN);
432 #ifdef MAP_HUGETLB
433 P_MMAP_FLAG(HUGETLB);
434 #endif
435 P_MMAP_FLAG(LOCKED);
436 P_MMAP_FLAG(NONBLOCK);
437 P_MMAP_FLAG(NORESERVE);
438 P_MMAP_FLAG(POPULATE);
439 P_MMAP_FLAG(STACK);
440 #ifdef MAP_UNINITIALIZED
441 P_MMAP_FLAG(UNINITIALIZED);
442 #endif
443 #undef P_MMAP_FLAG
444
445 if (flags)
446 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
447
448 return printed;
449 }
450
451 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
452
453 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
454 struct syscall_arg *arg)
455 {
456 int printed = 0, flags = arg->val;
457
458 #define P_MREMAP_FLAG(n) \
459 if (flags & MREMAP_##n) { \
460 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
461 flags &= ~MREMAP_##n; \
462 }
463
464 P_MREMAP_FLAG(MAYMOVE);
465 #ifdef MREMAP_FIXED
466 P_MREMAP_FLAG(FIXED);
467 #endif
468 #undef P_MREMAP_FLAG
469
470 if (flags)
471 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
472
473 return printed;
474 }
475
476 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
477
478 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
479 struct syscall_arg *arg)
480 {
481 int behavior = arg->val;
482
483 switch (behavior) {
484 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
485 P_MADV_BHV(NORMAL);
486 P_MADV_BHV(RANDOM);
487 P_MADV_BHV(SEQUENTIAL);
488 P_MADV_BHV(WILLNEED);
489 P_MADV_BHV(DONTNEED);
490 P_MADV_BHV(REMOVE);
491 P_MADV_BHV(DONTFORK);
492 P_MADV_BHV(DOFORK);
493 P_MADV_BHV(HWPOISON);
494 #ifdef MADV_SOFT_OFFLINE
495 P_MADV_BHV(SOFT_OFFLINE);
496 #endif
497 P_MADV_BHV(MERGEABLE);
498 P_MADV_BHV(UNMERGEABLE);
499 #ifdef MADV_HUGEPAGE
500 P_MADV_BHV(HUGEPAGE);
501 #endif
502 #ifdef MADV_NOHUGEPAGE
503 P_MADV_BHV(NOHUGEPAGE);
504 #endif
505 #ifdef MADV_DONTDUMP
506 P_MADV_BHV(DONTDUMP);
507 #endif
508 #ifdef MADV_DODUMP
509 P_MADV_BHV(DODUMP);
510 #endif
511 #undef P_MADV_PHV
512 default: break;
513 }
514
515 return scnprintf(bf, size, "%#x", behavior);
516 }
517
518 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
519
520 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
521 struct syscall_arg *arg)
522 {
523 int printed = 0, op = arg->val;
524
525 if (op == 0)
526 return scnprintf(bf, size, "NONE");
527 #define P_CMD(cmd) \
528 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
529 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
530 op &= ~LOCK_##cmd; \
531 }
532
533 P_CMD(SH);
534 P_CMD(EX);
535 P_CMD(NB);
536 P_CMD(UN);
537 P_CMD(MAND);
538 P_CMD(RW);
539 P_CMD(READ);
540 P_CMD(WRITE);
541 #undef P_OP
542
543 if (op)
544 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
545
546 return printed;
547 }
548
549 #define SCA_FLOCK syscall_arg__scnprintf_flock
550
551 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
552 {
553 enum syscall_futex_args {
554 SCF_UADDR = (1 << 0),
555 SCF_OP = (1 << 1),
556 SCF_VAL = (1 << 2),
557 SCF_TIMEOUT = (1 << 3),
558 SCF_UADDR2 = (1 << 4),
559 SCF_VAL3 = (1 << 5),
560 };
561 int op = arg->val;
562 int cmd = op & FUTEX_CMD_MASK;
563 size_t printed = 0;
564
565 switch (cmd) {
566 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
567 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
568 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
569 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
570 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
571 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
572 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
573 P_FUTEX_OP(WAKE_OP); break;
574 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
575 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
576 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
577 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
578 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
579 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
580 default: printed = scnprintf(bf, size, "%#x", cmd); break;
581 }
582
583 if (op & FUTEX_PRIVATE_FLAG)
584 printed += scnprintf(bf + printed, size - printed, "|PRIV");
585
586 if (op & FUTEX_CLOCK_REALTIME)
587 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
588
589 return printed;
590 }
591
592 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
593
594 static const char *bpf_cmd[] = {
595 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
596 "MAP_GET_NEXT_KEY", "PROG_LOAD",
597 };
598 static DEFINE_STRARRAY(bpf_cmd);
599
600 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
601 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
602
603 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
604 static DEFINE_STRARRAY(itimers);
605
606 static const char *keyctl_options[] = {
607 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
608 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
609 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
610 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
611 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
612 };
613 static DEFINE_STRARRAY(keyctl_options);
614
615 static const char *whences[] = { "SET", "CUR", "END",
616 #ifdef SEEK_DATA
617 "DATA",
618 #endif
619 #ifdef SEEK_HOLE
620 "HOLE",
621 #endif
622 };
623 static DEFINE_STRARRAY(whences);
624
625 static const char *fcntl_cmds[] = {
626 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
627 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
628 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
629 "F_GETOWNER_UIDS",
630 };
631 static DEFINE_STRARRAY(fcntl_cmds);
632
633 static const char *rlimit_resources[] = {
634 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
635 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
636 "RTTIME",
637 };
638 static DEFINE_STRARRAY(rlimit_resources);
639
640 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
641 static DEFINE_STRARRAY(sighow);
642
643 static const char *clockid[] = {
644 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
645 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
646 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
647 };
648 static DEFINE_STRARRAY(clockid);
649
650 static const char *socket_families[] = {
651 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
652 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
653 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
654 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
655 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
656 "ALG", "NFC", "VSOCK",
657 };
658 static DEFINE_STRARRAY(socket_families);
659
660 #ifndef SOCK_TYPE_MASK
661 #define SOCK_TYPE_MASK 0xf
662 #endif
663
664 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
665 struct syscall_arg *arg)
666 {
667 size_t printed;
668 int type = arg->val,
669 flags = type & ~SOCK_TYPE_MASK;
670
671 type &= SOCK_TYPE_MASK;
672 /*
673 * Can't use a strarray, MIPS may override for ABI reasons.
674 */
675 switch (type) {
676 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
677 P_SK_TYPE(STREAM);
678 P_SK_TYPE(DGRAM);
679 P_SK_TYPE(RAW);
680 P_SK_TYPE(RDM);
681 P_SK_TYPE(SEQPACKET);
682 P_SK_TYPE(DCCP);
683 P_SK_TYPE(PACKET);
684 #undef P_SK_TYPE
685 default:
686 printed = scnprintf(bf, size, "%#x", type);
687 }
688
689 #define P_SK_FLAG(n) \
690 if (flags & SOCK_##n) { \
691 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
692 flags &= ~SOCK_##n; \
693 }
694
695 P_SK_FLAG(CLOEXEC);
696 P_SK_FLAG(NONBLOCK);
697 #undef P_SK_FLAG
698
699 if (flags)
700 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
701
702 return printed;
703 }
704
705 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
706
707 #ifndef MSG_PROBE
708 #define MSG_PROBE 0x10
709 #endif
710 #ifndef MSG_WAITFORONE
711 #define MSG_WAITFORONE 0x10000
712 #endif
713 #ifndef MSG_SENDPAGE_NOTLAST
714 #define MSG_SENDPAGE_NOTLAST 0x20000
715 #endif
716 #ifndef MSG_FASTOPEN
717 #define MSG_FASTOPEN 0x20000000
718 #endif
719
720 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
721 struct syscall_arg *arg)
722 {
723 int printed = 0, flags = arg->val;
724
725 if (flags == 0)
726 return scnprintf(bf, size, "NONE");
727 #define P_MSG_FLAG(n) \
728 if (flags & MSG_##n) { \
729 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
730 flags &= ~MSG_##n; \
731 }
732
733 P_MSG_FLAG(OOB);
734 P_MSG_FLAG(PEEK);
735 P_MSG_FLAG(DONTROUTE);
736 P_MSG_FLAG(TRYHARD);
737 P_MSG_FLAG(CTRUNC);
738 P_MSG_FLAG(PROBE);
739 P_MSG_FLAG(TRUNC);
740 P_MSG_FLAG(DONTWAIT);
741 P_MSG_FLAG(EOR);
742 P_MSG_FLAG(WAITALL);
743 P_MSG_FLAG(FIN);
744 P_MSG_FLAG(SYN);
745 P_MSG_FLAG(CONFIRM);
746 P_MSG_FLAG(RST);
747 P_MSG_FLAG(ERRQUEUE);
748 P_MSG_FLAG(NOSIGNAL);
749 P_MSG_FLAG(MORE);
750 P_MSG_FLAG(WAITFORONE);
751 P_MSG_FLAG(SENDPAGE_NOTLAST);
752 P_MSG_FLAG(FASTOPEN);
753 P_MSG_FLAG(CMSG_CLOEXEC);
754 #undef P_MSG_FLAG
755
756 if (flags)
757 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
758
759 return printed;
760 }
761
762 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
763
764 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
765 struct syscall_arg *arg)
766 {
767 size_t printed = 0;
768 int mode = arg->val;
769
770 if (mode == F_OK) /* 0 */
771 return scnprintf(bf, size, "F");
772 #define P_MODE(n) \
773 if (mode & n##_OK) { \
774 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
775 mode &= ~n##_OK; \
776 }
777
778 P_MODE(R);
779 P_MODE(W);
780 P_MODE(X);
781 #undef P_MODE
782
783 if (mode)
784 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
785
786 return printed;
787 }
788
789 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
790
791 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
792 struct syscall_arg *arg);
793
794 #define SCA_FILENAME syscall_arg__scnprintf_filename
795
796 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
797 struct syscall_arg *arg)
798 {
799 int printed = 0, flags = arg->val;
800
801 if (!(flags & O_CREAT))
802 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
803
804 if (flags == 0)
805 return scnprintf(bf, size, "RDONLY");
806 #define P_FLAG(n) \
807 if (flags & O_##n) { \
808 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
809 flags &= ~O_##n; \
810 }
811
812 P_FLAG(APPEND);
813 P_FLAG(ASYNC);
814 P_FLAG(CLOEXEC);
815 P_FLAG(CREAT);
816 P_FLAG(DIRECT);
817 P_FLAG(DIRECTORY);
818 P_FLAG(EXCL);
819 P_FLAG(LARGEFILE);
820 P_FLAG(NOATIME);
821 P_FLAG(NOCTTY);
822 #ifdef O_NONBLOCK
823 P_FLAG(NONBLOCK);
824 #elif O_NDELAY
825 P_FLAG(NDELAY);
826 #endif
827 #ifdef O_PATH
828 P_FLAG(PATH);
829 #endif
830 P_FLAG(RDWR);
831 #ifdef O_DSYNC
832 if ((flags & O_SYNC) == O_SYNC)
833 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
834 else {
835 P_FLAG(DSYNC);
836 }
837 #else
838 P_FLAG(SYNC);
839 #endif
840 P_FLAG(TRUNC);
841 P_FLAG(WRONLY);
842 #undef P_FLAG
843
844 if (flags)
845 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
846
847 return printed;
848 }
849
850 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
851
852 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
853 struct syscall_arg *arg)
854 {
855 int printed = 0, flags = arg->val;
856
857 if (flags == 0)
858 return 0;
859
860 #define P_FLAG(n) \
861 if (flags & PERF_FLAG_##n) { \
862 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
863 flags &= ~PERF_FLAG_##n; \
864 }
865
866 P_FLAG(FD_NO_GROUP);
867 P_FLAG(FD_OUTPUT);
868 P_FLAG(PID_CGROUP);
869 P_FLAG(FD_CLOEXEC);
870 #undef P_FLAG
871
872 if (flags)
873 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
874
875 return printed;
876 }
877
878 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
879
880 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
881 struct syscall_arg *arg)
882 {
883 int printed = 0, flags = arg->val;
884
885 if (flags == 0)
886 return scnprintf(bf, size, "NONE");
887 #define P_FLAG(n) \
888 if (flags & EFD_##n) { \
889 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
890 flags &= ~EFD_##n; \
891 }
892
893 P_FLAG(SEMAPHORE);
894 P_FLAG(CLOEXEC);
895 P_FLAG(NONBLOCK);
896 #undef P_FLAG
897
898 if (flags)
899 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
900
901 return printed;
902 }
903
904 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
905
906 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
907 struct syscall_arg *arg)
908 {
909 int printed = 0, flags = arg->val;
910
911 #define P_FLAG(n) \
912 if (flags & O_##n) { \
913 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
914 flags &= ~O_##n; \
915 }
916
917 P_FLAG(CLOEXEC);
918 P_FLAG(NONBLOCK);
919 #undef P_FLAG
920
921 if (flags)
922 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
923
924 return printed;
925 }
926
927 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
928
929 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
930 {
931 int sig = arg->val;
932
933 switch (sig) {
934 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
935 P_SIGNUM(HUP);
936 P_SIGNUM(INT);
937 P_SIGNUM(QUIT);
938 P_SIGNUM(ILL);
939 P_SIGNUM(TRAP);
940 P_SIGNUM(ABRT);
941 P_SIGNUM(BUS);
942 P_SIGNUM(FPE);
943 P_SIGNUM(KILL);
944 P_SIGNUM(USR1);
945 P_SIGNUM(SEGV);
946 P_SIGNUM(USR2);
947 P_SIGNUM(PIPE);
948 P_SIGNUM(ALRM);
949 P_SIGNUM(TERM);
950 P_SIGNUM(CHLD);
951 P_SIGNUM(CONT);
952 P_SIGNUM(STOP);
953 P_SIGNUM(TSTP);
954 P_SIGNUM(TTIN);
955 P_SIGNUM(TTOU);
956 P_SIGNUM(URG);
957 P_SIGNUM(XCPU);
958 P_SIGNUM(XFSZ);
959 P_SIGNUM(VTALRM);
960 P_SIGNUM(PROF);
961 P_SIGNUM(WINCH);
962 P_SIGNUM(IO);
963 P_SIGNUM(PWR);
964 P_SIGNUM(SYS);
965 #ifdef SIGEMT
966 P_SIGNUM(EMT);
967 #endif
968 #ifdef SIGSTKFLT
969 P_SIGNUM(STKFLT);
970 #endif
971 #ifdef SIGSWI
972 P_SIGNUM(SWI);
973 #endif
974 default: break;
975 }
976
977 return scnprintf(bf, size, "%#x", sig);
978 }
979
980 #define SCA_SIGNUM syscall_arg__scnprintf_signum
981
982 #if defined(__i386__) || defined(__x86_64__)
983 /*
984 * FIXME: Make this available to all arches.
985 */
986 #define TCGETS 0x5401
987
988 static const char *tioctls[] = {
989 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
990 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
991 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
992 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
993 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
994 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
995 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
996 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
997 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
998 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
999 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
1000 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
1001 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
1002 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
1003 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
1004 };
1005
1006 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
1007 #endif /* defined(__i386__) || defined(__x86_64__) */
1008
1009 static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
1010 {
1011 int op = arg->val;
1012 size_t printed = 0;
1013
1014 switch (op) {
1015 #define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
1016 P_SECCOMP_SET_MODE_OP(STRICT);
1017 P_SECCOMP_SET_MODE_OP(FILTER);
1018 #undef P_SECCOMP_SET_MODE_OP
1019 default: printed = scnprintf(bf, size, "%#x", op); break;
1020 }
1021
1022 return printed;
1023 }
1024
1025 #define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op
1026
1027 static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
1028 struct syscall_arg *arg)
1029 {
1030 int printed = 0, flags = arg->val;
1031
1032 #define P_FLAG(n) \
1033 if (flags & SECCOMP_FILTER_FLAG_##n) { \
1034 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1035 flags &= ~SECCOMP_FILTER_FLAG_##n; \
1036 }
1037
1038 P_FLAG(TSYNC);
1039 #undef P_FLAG
1040
1041 if (flags)
1042 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
1043
1044 return printed;
1045 }
1046
1047 #define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
1048
1049 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
1050 struct syscall_arg *arg)
1051 {
1052 int printed = 0, flags = arg->val;
1053
1054 #define P_FLAG(n) \
1055 if (flags & GRND_##n) { \
1056 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1057 flags &= ~GRND_##n; \
1058 }
1059
1060 P_FLAG(RANDOM);
1061 P_FLAG(NONBLOCK);
1062 #undef P_FLAG
1063
1064 if (flags)
1065 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
1066
1067 return printed;
1068 }
1069
1070 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
1071
1072 #define STRARRAY(arg, name, array) \
1073 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1074 .arg_parm = { [arg] = &strarray__##array, }
1075
1076 static struct syscall_fmt {
1077 const char *name;
1078 const char *alias;
1079 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1080 void *arg_parm[6];
1081 bool errmsg;
1082 bool timeout;
1083 bool hexret;
1084 } syscall_fmts[] = {
1085 { .name = "access", .errmsg = true,
1086 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1087 [1] = SCA_ACCMODE, /* mode */ }, },
1088 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
1089 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
1090 { .name = "brk", .hexret = true,
1091 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1092 { .name = "chdir", .errmsg = true,
1093 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1094 { .name = "chmod", .errmsg = true,
1095 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1096 { .name = "chroot", .errmsg = true,
1097 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1098 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
1099 { .name = "close", .errmsg = true,
1100 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1101 { .name = "connect", .errmsg = true, },
1102 { .name = "creat", .errmsg = true,
1103 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1104 { .name = "dup", .errmsg = true,
1105 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1106 { .name = "dup2", .errmsg = true,
1107 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1108 { .name = "dup3", .errmsg = true,
1109 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1110 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1111 { .name = "eventfd2", .errmsg = true,
1112 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1113 { .name = "faccessat", .errmsg = true,
1114 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1115 [1] = SCA_FILENAME, /* filename */ }, },
1116 { .name = "fadvise64", .errmsg = true,
1117 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1118 { .name = "fallocate", .errmsg = true,
1119 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1120 { .name = "fchdir", .errmsg = true,
1121 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1122 { .name = "fchmod", .errmsg = true,
1123 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1124 { .name = "fchmodat", .errmsg = true,
1125 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1126 [1] = SCA_FILENAME, /* filename */ }, },
1127 { .name = "fchown", .errmsg = true,
1128 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1129 { .name = "fchownat", .errmsg = true,
1130 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1131 [1] = SCA_FILENAME, /* filename */ }, },
1132 { .name = "fcntl", .errmsg = true,
1133 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1134 [1] = SCA_STRARRAY, /* cmd */ },
1135 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1136 { .name = "fdatasync", .errmsg = true,
1137 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1138 { .name = "flock", .errmsg = true,
1139 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1140 [1] = SCA_FLOCK, /* cmd */ }, },
1141 { .name = "fsetxattr", .errmsg = true,
1142 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1143 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1144 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1145 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1146 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1147 [1] = SCA_FILENAME, /* filename */ }, },
1148 { .name = "fstatfs", .errmsg = true,
1149 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1150 { .name = "fsync", .errmsg = true,
1151 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1152 { .name = "ftruncate", .errmsg = true,
1153 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1154 { .name = "futex", .errmsg = true,
1155 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1156 { .name = "futimesat", .errmsg = true,
1157 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1158 [1] = SCA_FILENAME, /* filename */ }, },
1159 { .name = "getdents", .errmsg = true,
1160 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1161 { .name = "getdents64", .errmsg = true,
1162 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1163 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1164 { .name = "getrandom", .errmsg = true,
1165 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
1166 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1167 { .name = "getxattr", .errmsg = true,
1168 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1169 { .name = "inotify_add_watch", .errmsg = true,
1170 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1171 { .name = "ioctl", .errmsg = true,
1172 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1173 #if defined(__i386__) || defined(__x86_64__)
1174 /*
1175 * FIXME: Make this available to all arches.
1176 */
1177 [1] = SCA_STRHEXARRAY, /* cmd */
1178 [2] = SCA_HEX, /* arg */ },
1179 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1180 #else
1181 [2] = SCA_HEX, /* arg */ }, },
1182 #endif
1183 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
1184 { .name = "kill", .errmsg = true,
1185 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1186 { .name = "lchown", .errmsg = true,
1187 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1188 { .name = "lgetxattr", .errmsg = true,
1189 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1190 { .name = "linkat", .errmsg = true,
1191 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1192 { .name = "listxattr", .errmsg = true,
1193 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1194 { .name = "llistxattr", .errmsg = true,
1195 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1196 { .name = "lremovexattr", .errmsg = true,
1197 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1198 { .name = "lseek", .errmsg = true,
1199 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1200 [2] = SCA_STRARRAY, /* whence */ },
1201 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1202 { .name = "lsetxattr", .errmsg = true,
1203 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1204 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1205 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1206 { .name = "lsxattr", .errmsg = true,
1207 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1208 { .name = "madvise", .errmsg = true,
1209 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1210 [2] = SCA_MADV_BHV, /* behavior */ }, },
1211 { .name = "mkdir", .errmsg = true,
1212 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1213 { .name = "mkdirat", .errmsg = true,
1214 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1215 [1] = SCA_FILENAME, /* pathname */ }, },
1216 { .name = "mknod", .errmsg = true,
1217 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1218 { .name = "mknodat", .errmsg = true,
1219 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1220 [1] = SCA_FILENAME, /* filename */ }, },
1221 { .name = "mlock", .errmsg = true,
1222 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1223 { .name = "mlockall", .errmsg = true,
1224 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1225 { .name = "mmap", .hexret = true,
1226 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1227 [2] = SCA_MMAP_PROT, /* prot */
1228 [3] = SCA_MMAP_FLAGS, /* flags */
1229 [4] = SCA_FD, /* fd */ }, },
1230 { .name = "mprotect", .errmsg = true,
1231 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1232 [2] = SCA_MMAP_PROT, /* prot */ }, },
1233 { .name = "mq_unlink", .errmsg = true,
1234 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1235 { .name = "mremap", .hexret = true,
1236 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1237 [3] = SCA_MREMAP_FLAGS, /* flags */
1238 [4] = SCA_HEX, /* new_addr */ }, },
1239 { .name = "munlock", .errmsg = true,
1240 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1241 { .name = "munmap", .errmsg = true,
1242 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1243 { .name = "name_to_handle_at", .errmsg = true,
1244 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1245 { .name = "newfstatat", .errmsg = true,
1246 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1247 [1] = SCA_FILENAME, /* filename */ }, },
1248 { .name = "open", .errmsg = true,
1249 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1250 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1251 { .name = "open_by_handle_at", .errmsg = true,
1252 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1253 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1254 { .name = "openat", .errmsg = true,
1255 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1256 [1] = SCA_FILENAME, /* filename */
1257 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1258 { .name = "perf_event_open", .errmsg = true,
1259 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1260 [2] = SCA_INT, /* cpu */
1261 [3] = SCA_FD, /* group_fd */
1262 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1263 { .name = "pipe2", .errmsg = true,
1264 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1265 { .name = "poll", .errmsg = true, .timeout = true, },
1266 { .name = "ppoll", .errmsg = true, .timeout = true, },
1267 { .name = "pread", .errmsg = true, .alias = "pread64",
1268 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1269 { .name = "preadv", .errmsg = true, .alias = "pread",
1270 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1271 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1272 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1273 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1274 { .name = "pwritev", .errmsg = true,
1275 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1276 { .name = "read", .errmsg = true,
1277 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1278 { .name = "readlink", .errmsg = true,
1279 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1280 { .name = "readlinkat", .errmsg = true,
1281 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1282 [1] = SCA_FILENAME, /* pathname */ }, },
1283 { .name = "readv", .errmsg = true,
1284 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1285 { .name = "recvfrom", .errmsg = true,
1286 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1287 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1288 { .name = "recvmmsg", .errmsg = true,
1289 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1290 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1291 { .name = "recvmsg", .errmsg = true,
1292 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1293 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1294 { .name = "removexattr", .errmsg = true,
1295 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1296 { .name = "renameat", .errmsg = true,
1297 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1298 { .name = "rmdir", .errmsg = true,
1299 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1300 { .name = "rt_sigaction", .errmsg = true,
1301 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1302 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1303 { .name = "rt_sigqueueinfo", .errmsg = true,
1304 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1305 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1306 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1307 { .name = "seccomp", .errmsg = true,
1308 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
1309 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
1310 { .name = "select", .errmsg = true, .timeout = true, },
1311 { .name = "sendmmsg", .errmsg = true,
1312 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1313 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1314 { .name = "sendmsg", .errmsg = true,
1315 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1316 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1317 { .name = "sendto", .errmsg = true,
1318 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1319 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1320 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1321 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1322 { .name = "setxattr", .errmsg = true,
1323 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1324 { .name = "shutdown", .errmsg = true,
1325 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1326 { .name = "socket", .errmsg = true,
1327 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1328 [1] = SCA_SK_TYPE, /* type */ },
1329 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1330 { .name = "socketpair", .errmsg = true,
1331 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1332 [1] = SCA_SK_TYPE, /* type */ },
1333 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1334 { .name = "stat", .errmsg = true, .alias = "newstat",
1335 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1336 { .name = "statfs", .errmsg = true,
1337 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1338 { .name = "swapoff", .errmsg = true,
1339 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1340 { .name = "swapon", .errmsg = true,
1341 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1342 { .name = "symlinkat", .errmsg = true,
1343 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1344 { .name = "tgkill", .errmsg = true,
1345 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1346 { .name = "tkill", .errmsg = true,
1347 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1348 { .name = "truncate", .errmsg = true,
1349 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1350 { .name = "uname", .errmsg = true, .alias = "newuname", },
1351 { .name = "unlinkat", .errmsg = true,
1352 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1353 [1] = SCA_FILENAME, /* pathname */ }, },
1354 { .name = "utime", .errmsg = true,
1355 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1356 { .name = "utimensat", .errmsg = true,
1357 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1358 [1] = SCA_FILENAME, /* filename */ }, },
1359 { .name = "utimes", .errmsg = true,
1360 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1361 { .name = "vmsplice", .errmsg = true,
1362 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1363 { .name = "write", .errmsg = true,
1364 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1365 { .name = "writev", .errmsg = true,
1366 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1367 };
1368
1369 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1370 {
1371 const struct syscall_fmt *fmt = fmtp;
1372 return strcmp(name, fmt->name);
1373 }
1374
1375 static struct syscall_fmt *syscall_fmt__find(const char *name)
1376 {
1377 const int nmemb = ARRAY_SIZE(syscall_fmts);
1378 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1379 }
1380
1381 struct syscall {
1382 struct event_format *tp_format;
1383 int nr_args;
1384 struct format_field *args;
1385 const char *name;
1386 bool is_exit;
1387 struct syscall_fmt *fmt;
1388 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1389 void **arg_parm;
1390 };
1391
1392 static size_t fprintf_duration(unsigned long t, FILE *fp)
1393 {
1394 double duration = (double)t / NSEC_PER_MSEC;
1395 size_t printed = fprintf(fp, "(");
1396
1397 if (duration >= 1.0)
1398 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1399 else if (duration >= 0.01)
1400 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1401 else
1402 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1403 return printed + fprintf(fp, "): ");
1404 }
1405
1406 /**
1407 * filename.ptr: The filename char pointer that will be vfs_getname'd
1408 * filename.entry_str_pos: Where to insert the string translated from
1409 * filename.ptr by the vfs_getname tracepoint/kprobe.
1410 */
1411 struct thread_trace {
1412 u64 entry_time;
1413 u64 exit_time;
1414 bool entry_pending;
1415 unsigned long nr_events;
1416 unsigned long pfmaj, pfmin;
1417 char *entry_str;
1418 double runtime_ms;
1419 struct {
1420 unsigned long ptr;
1421 short int entry_str_pos;
1422 bool pending_open;
1423 unsigned int namelen;
1424 char *name;
1425 } filename;
1426 struct {
1427 int max;
1428 char **table;
1429 } paths;
1430
1431 struct intlist *syscall_stats;
1432 };
1433
1434 static struct thread_trace *thread_trace__new(void)
1435 {
1436 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1437
1438 if (ttrace)
1439 ttrace->paths.max = -1;
1440
1441 ttrace->syscall_stats = intlist__new(NULL);
1442
1443 return ttrace;
1444 }
1445
1446 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1447 {
1448 struct thread_trace *ttrace;
1449
1450 if (thread == NULL)
1451 goto fail;
1452
1453 if (thread__priv(thread) == NULL)
1454 thread__set_priv(thread, thread_trace__new());
1455
1456 if (thread__priv(thread) == NULL)
1457 goto fail;
1458
1459 ttrace = thread__priv(thread);
1460 ++ttrace->nr_events;
1461
1462 return ttrace;
1463 fail:
1464 color_fprintf(fp, PERF_COLOR_RED,
1465 "WARNING: not enough memory, dropping samples!\n");
1466 return NULL;
1467 }
1468
1469 #define TRACE_PFMAJ (1 << 0)
1470 #define TRACE_PFMIN (1 << 1)
1471
1472 static const size_t trace__entry_str_size = 2048;
1473
1474 struct trace {
1475 struct perf_tool tool;
1476 struct {
1477 int machine;
1478 int open_id;
1479 } audit;
1480 struct {
1481 int max;
1482 struct syscall *table;
1483 struct {
1484 struct perf_evsel *sys_enter,
1485 *sys_exit;
1486 } events;
1487 } syscalls;
1488 struct record_opts opts;
1489 struct perf_evlist *evlist;
1490 struct machine *host;
1491 struct thread *current;
1492 u64 base_time;
1493 FILE *output;
1494 unsigned long nr_events;
1495 struct strlist *ev_qualifier;
1496 struct {
1497 size_t nr;
1498 int *entries;
1499 } ev_qualifier_ids;
1500 struct intlist *tid_list;
1501 struct intlist *pid_list;
1502 struct {
1503 size_t nr;
1504 pid_t *entries;
1505 } filter_pids;
1506 double duration_filter;
1507 double runtime_ms;
1508 struct {
1509 u64 vfs_getname,
1510 proc_getname;
1511 } stats;
1512 bool not_ev_qualifier;
1513 bool live;
1514 bool full_time;
1515 bool sched;
1516 bool multiple_threads;
1517 bool summary;
1518 bool summary_only;
1519 bool show_comm;
1520 bool show_tool_stats;
1521 bool trace_syscalls;
1522 bool force;
1523 bool vfs_getname;
1524 int trace_pgfaults;
1525 };
1526
1527 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1528 {
1529 struct thread_trace *ttrace = thread__priv(thread);
1530
1531 if (fd > ttrace->paths.max) {
1532 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1533
1534 if (npath == NULL)
1535 return -1;
1536
1537 if (ttrace->paths.max != -1) {
1538 memset(npath + ttrace->paths.max + 1, 0,
1539 (fd - ttrace->paths.max) * sizeof(char *));
1540 } else {
1541 memset(npath, 0, (fd + 1) * sizeof(char *));
1542 }
1543
1544 ttrace->paths.table = npath;
1545 ttrace->paths.max = fd;
1546 }
1547
1548 ttrace->paths.table[fd] = strdup(pathname);
1549
1550 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1551 }
1552
1553 static int thread__read_fd_path(struct thread *thread, int fd)
1554 {
1555 char linkname[PATH_MAX], pathname[PATH_MAX];
1556 struct stat st;
1557 int ret;
1558
1559 if (thread->pid_ == thread->tid) {
1560 scnprintf(linkname, sizeof(linkname),
1561 "/proc/%d/fd/%d", thread->pid_, fd);
1562 } else {
1563 scnprintf(linkname, sizeof(linkname),
1564 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1565 }
1566
1567 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1568 return -1;
1569
1570 ret = readlink(linkname, pathname, sizeof(pathname));
1571
1572 if (ret < 0 || ret > st.st_size)
1573 return -1;
1574
1575 pathname[ret] = '\0';
1576 return trace__set_fd_pathname(thread, fd, pathname);
1577 }
1578
1579 static const char *thread__fd_path(struct thread *thread, int fd,
1580 struct trace *trace)
1581 {
1582 struct thread_trace *ttrace = thread__priv(thread);
1583
1584 if (ttrace == NULL)
1585 return NULL;
1586
1587 if (fd < 0)
1588 return NULL;
1589
1590 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1591 if (!trace->live)
1592 return NULL;
1593 ++trace->stats.proc_getname;
1594 if (thread__read_fd_path(thread, fd))
1595 return NULL;
1596 }
1597
1598 return ttrace->paths.table[fd];
1599 }
1600
1601 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1602 struct syscall_arg *arg)
1603 {
1604 int fd = arg->val;
1605 size_t printed = scnprintf(bf, size, "%d", fd);
1606 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1607
1608 if (path)
1609 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1610
1611 return printed;
1612 }
1613
1614 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1615 struct syscall_arg *arg)
1616 {
1617 int fd = arg->val;
1618 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1619 struct thread_trace *ttrace = thread__priv(arg->thread);
1620
1621 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1622 zfree(&ttrace->paths.table[fd]);
1623
1624 return printed;
1625 }
1626
1627 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1628 unsigned long ptr)
1629 {
1630 struct thread_trace *ttrace = thread__priv(thread);
1631
1632 ttrace->filename.ptr = ptr;
1633 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1634 }
1635
1636 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1637 struct syscall_arg *arg)
1638 {
1639 unsigned long ptr = arg->val;
1640
1641 if (!arg->trace->vfs_getname)
1642 return scnprintf(bf, size, "%#x", ptr);
1643
1644 thread__set_filename_pos(arg->thread, bf, ptr);
1645 return 0;
1646 }
1647
1648 static bool trace__filter_duration(struct trace *trace, double t)
1649 {
1650 return t < (trace->duration_filter * NSEC_PER_MSEC);
1651 }
1652
1653 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1654 {
1655 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1656
1657 return fprintf(fp, "%10.3f ", ts);
1658 }
1659
1660 static bool done = false;
1661 static bool interrupted = false;
1662
1663 static void sig_handler(int sig)
1664 {
1665 done = true;
1666 interrupted = sig == SIGINT;
1667 }
1668
1669 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1670 u64 duration, u64 tstamp, FILE *fp)
1671 {
1672 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1673 printed += fprintf_duration(duration, fp);
1674
1675 if (trace->multiple_threads) {
1676 if (trace->show_comm)
1677 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1678 printed += fprintf(fp, "%d ", thread->tid);
1679 }
1680
1681 return printed;
1682 }
1683
1684 static int trace__process_event(struct trace *trace, struct machine *machine,
1685 union perf_event *event, struct perf_sample *sample)
1686 {
1687 int ret = 0;
1688
1689 switch (event->header.type) {
1690 case PERF_RECORD_LOST:
1691 color_fprintf(trace->output, PERF_COLOR_RED,
1692 "LOST %" PRIu64 " events!\n", event->lost.lost);
1693 ret = machine__process_lost_event(machine, event, sample);
1694 break;
1695 default:
1696 ret = machine__process_event(machine, event, sample);
1697 break;
1698 }
1699
1700 return ret;
1701 }
1702
1703 static int trace__tool_process(struct perf_tool *tool,
1704 union perf_event *event,
1705 struct perf_sample *sample,
1706 struct machine *machine)
1707 {
1708 struct trace *trace = container_of(tool, struct trace, tool);
1709 return trace__process_event(trace, machine, event, sample);
1710 }
1711
1712 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1713 {
1714 int err = symbol__init(NULL);
1715
1716 if (err)
1717 return err;
1718
1719 trace->host = machine__new_host();
1720 if (trace->host == NULL)
1721 return -ENOMEM;
1722
1723 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1724 return -errno;
1725
1726 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1727 evlist->threads, trace__tool_process, false,
1728 trace->opts.proc_map_timeout);
1729 if (err)
1730 symbol__exit();
1731
1732 return err;
1733 }
1734
1735 static int syscall__set_arg_fmts(struct syscall *sc)
1736 {
1737 struct format_field *field;
1738 int idx = 0;
1739
1740 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1741 if (sc->arg_scnprintf == NULL)
1742 return -1;
1743
1744 if (sc->fmt)
1745 sc->arg_parm = sc->fmt->arg_parm;
1746
1747 for (field = sc->args; field; field = field->next) {
1748 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1749 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1750 else if (field->flags & FIELD_IS_POINTER)
1751 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1752 ++idx;
1753 }
1754
1755 return 0;
1756 }
1757
1758 static int trace__read_syscall_info(struct trace *trace, int id)
1759 {
1760 char tp_name[128];
1761 struct syscall *sc;
1762 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1763
1764 if (name == NULL)
1765 return -1;
1766
1767 if (id > trace->syscalls.max) {
1768 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1769
1770 if (nsyscalls == NULL)
1771 return -1;
1772
1773 if (trace->syscalls.max != -1) {
1774 memset(nsyscalls + trace->syscalls.max + 1, 0,
1775 (id - trace->syscalls.max) * sizeof(*sc));
1776 } else {
1777 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1778 }
1779
1780 trace->syscalls.table = nsyscalls;
1781 trace->syscalls.max = id;
1782 }
1783
1784 sc = trace->syscalls.table + id;
1785 sc->name = name;
1786
1787 sc->fmt = syscall_fmt__find(sc->name);
1788
1789 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1790 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1791
1792 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1793 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1794 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1795 }
1796
1797 if (IS_ERR(sc->tp_format))
1798 return -1;
1799
1800 sc->args = sc->tp_format->format.fields;
1801 sc->nr_args = sc->tp_format->format.nr_fields;
1802 /*
1803 * We need to check and discard the first variable '__syscall_nr'
1804 * or 'nr' that mean the syscall number. It is needless here.
1805 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1806 */
1807 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1808 sc->args = sc->args->next;
1809 --sc->nr_args;
1810 }
1811
1812 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1813
1814 return syscall__set_arg_fmts(sc);
1815 }
1816
1817 static int trace__validate_ev_qualifier(struct trace *trace)
1818 {
1819 int err = 0, i;
1820 struct str_node *pos;
1821
1822 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1823 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1824 sizeof(trace->ev_qualifier_ids.entries[0]));
1825
1826 if (trace->ev_qualifier_ids.entries == NULL) {
1827 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1828 trace->output);
1829 err = -EINVAL;
1830 goto out;
1831 }
1832
1833 i = 0;
1834
1835 strlist__for_each(pos, trace->ev_qualifier) {
1836 const char *sc = pos->s;
1837 int id = audit_name_to_syscall(sc, trace->audit.machine);
1838
1839 if (id < 0) {
1840 if (err == 0) {
1841 fputs("Error:\tInvalid syscall ", trace->output);
1842 err = -EINVAL;
1843 } else {
1844 fputs(", ", trace->output);
1845 }
1846
1847 fputs(sc, trace->output);
1848 }
1849
1850 trace->ev_qualifier_ids.entries[i++] = id;
1851 }
1852
1853 if (err < 0) {
1854 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1855 "\nHint:\tand: 'man syscalls'\n", trace->output);
1856 zfree(&trace->ev_qualifier_ids.entries);
1857 trace->ev_qualifier_ids.nr = 0;
1858 }
1859 out:
1860 return err;
1861 }
1862
1863 /*
1864 * args is to be interpreted as a series of longs but we need to handle
1865 * 8-byte unaligned accesses. args points to raw_data within the event
1866 * and raw_data is guaranteed to be 8-byte unaligned because it is
1867 * preceded by raw_size which is a u32. So we need to copy args to a temp
1868 * variable to read it. Most notably this avoids extended load instructions
1869 * on unaligned addresses
1870 */
1871
1872 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1873 unsigned char *args, struct trace *trace,
1874 struct thread *thread)
1875 {
1876 size_t printed = 0;
1877 unsigned char *p;
1878 unsigned long val;
1879
1880 if (sc->args != NULL) {
1881 struct format_field *field;
1882 u8 bit = 1;
1883 struct syscall_arg arg = {
1884 .idx = 0,
1885 .mask = 0,
1886 .trace = trace,
1887 .thread = thread,
1888 };
1889
1890 for (field = sc->args; field;
1891 field = field->next, ++arg.idx, bit <<= 1) {
1892 if (arg.mask & bit)
1893 continue;
1894
1895 /* special care for unaligned accesses */
1896 p = args + sizeof(unsigned long) * arg.idx;
1897 memcpy(&val, p, sizeof(val));
1898
1899 /*
1900 * Suppress this argument if its value is zero and
1901 * and we don't have a string associated in an
1902 * strarray for it.
1903 */
1904 if (val == 0 &&
1905 !(sc->arg_scnprintf &&
1906 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1907 sc->arg_parm[arg.idx]))
1908 continue;
1909
1910 printed += scnprintf(bf + printed, size - printed,
1911 "%s%s: ", printed ? ", " : "", field->name);
1912 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1913 arg.val = val;
1914 if (sc->arg_parm)
1915 arg.parm = sc->arg_parm[arg.idx];
1916 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1917 size - printed, &arg);
1918 } else {
1919 printed += scnprintf(bf + printed, size - printed,
1920 "%ld", val);
1921 }
1922 }
1923 } else {
1924 int i = 0;
1925
1926 while (i < 6) {
1927 /* special care for unaligned accesses */
1928 p = args + sizeof(unsigned long) * i;
1929 memcpy(&val, p, sizeof(val));
1930 printed += scnprintf(bf + printed, size - printed,
1931 "%sarg%d: %ld",
1932 printed ? ", " : "", i, val);
1933 ++i;
1934 }
1935 }
1936
1937 return printed;
1938 }
1939
1940 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1941 union perf_event *event,
1942 struct perf_sample *sample);
1943
1944 static struct syscall *trace__syscall_info(struct trace *trace,
1945 struct perf_evsel *evsel, int id)
1946 {
1947
1948 if (id < 0) {
1949
1950 /*
1951 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1952 * before that, leaving at a higher verbosity level till that is
1953 * explained. Reproduced with plain ftrace with:
1954 *
1955 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1956 * grep "NR -1 " /t/trace_pipe
1957 *
1958 * After generating some load on the machine.
1959 */
1960 if (verbose > 1) {
1961 static u64 n;
1962 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1963 id, perf_evsel__name(evsel), ++n);
1964 }
1965 return NULL;
1966 }
1967
1968 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1969 trace__read_syscall_info(trace, id))
1970 goto out_cant_read;
1971
1972 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1973 goto out_cant_read;
1974
1975 return &trace->syscalls.table[id];
1976
1977 out_cant_read:
1978 if (verbose) {
1979 fprintf(trace->output, "Problems reading syscall %d", id);
1980 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1981 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1982 fputs(" information\n", trace->output);
1983 }
1984 return NULL;
1985 }
1986
1987 static void thread__update_stats(struct thread_trace *ttrace,
1988 int id, struct perf_sample *sample)
1989 {
1990 struct int_node *inode;
1991 struct stats *stats;
1992 u64 duration = 0;
1993
1994 inode = intlist__findnew(ttrace->syscall_stats, id);
1995 if (inode == NULL)
1996 return;
1997
1998 stats = inode->priv;
1999 if (stats == NULL) {
2000 stats = malloc(sizeof(struct stats));
2001 if (stats == NULL)
2002 return;
2003 init_stats(stats);
2004 inode->priv = stats;
2005 }
2006
2007 if (ttrace->entry_time && sample->time > ttrace->entry_time)
2008 duration = sample->time - ttrace->entry_time;
2009
2010 update_stats(stats, duration);
2011 }
2012
2013 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
2014 {
2015 struct thread_trace *ttrace;
2016 u64 duration;
2017 size_t printed;
2018
2019 if (trace->current == NULL)
2020 return 0;
2021
2022 ttrace = thread__priv(trace->current);
2023
2024 if (!ttrace->entry_pending)
2025 return 0;
2026
2027 duration = sample->time - ttrace->entry_time;
2028
2029 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
2030 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
2031 ttrace->entry_pending = false;
2032
2033 return printed;
2034 }
2035
2036 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
2037 union perf_event *event __maybe_unused,
2038 struct perf_sample *sample)
2039 {
2040 char *msg;
2041 void *args;
2042 size_t printed = 0;
2043 struct thread *thread;
2044 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2045 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2046 struct thread_trace *ttrace;
2047
2048 if (sc == NULL)
2049 return -1;
2050
2051 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2052 ttrace = thread__trace(thread, trace->output);
2053 if (ttrace == NULL)
2054 goto out_put;
2055
2056 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
2057
2058 if (ttrace->entry_str == NULL) {
2059 ttrace->entry_str = malloc(trace__entry_str_size);
2060 if (!ttrace->entry_str)
2061 goto out_put;
2062 }
2063
2064 if (!trace->summary_only)
2065 trace__printf_interrupted_entry(trace, sample);
2066
2067 ttrace->entry_time = sample->time;
2068 msg = ttrace->entry_str;
2069 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
2070
2071 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
2072 args, trace, thread);
2073
2074 if (sc->is_exit) {
2075 if (!trace->duration_filter && !trace->summary_only) {
2076 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
2077 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
2078 }
2079 } else {
2080 ttrace->entry_pending = true;
2081 /* See trace__vfs_getname & trace__sys_exit */
2082 ttrace->filename.pending_open = false;
2083 }
2084
2085 if (trace->current != thread) {
2086 thread__put(trace->current);
2087 trace->current = thread__get(thread);
2088 }
2089 err = 0;
2090 out_put:
2091 thread__put(thread);
2092 return err;
2093 }
2094
2095 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2096 union perf_event *event __maybe_unused,
2097 struct perf_sample *sample)
2098 {
2099 long ret;
2100 u64 duration = 0;
2101 struct thread *thread;
2102 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2103 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2104 struct thread_trace *ttrace;
2105
2106 if (sc == NULL)
2107 return -1;
2108
2109 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2110 ttrace = thread__trace(thread, trace->output);
2111 if (ttrace == NULL)
2112 goto out_put;
2113
2114 if (trace->summary)
2115 thread__update_stats(ttrace, id, sample);
2116
2117 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2118
2119 if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2120 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2121 ttrace->filename.pending_open = false;
2122 ++trace->stats.vfs_getname;
2123 }
2124
2125 ttrace->exit_time = sample->time;
2126
2127 if (ttrace->entry_time) {
2128 duration = sample->time - ttrace->entry_time;
2129 if (trace__filter_duration(trace, duration))
2130 goto out;
2131 } else if (trace->duration_filter)
2132 goto out;
2133
2134 if (trace->summary_only)
2135 goto out;
2136
2137 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2138
2139 if (ttrace->entry_pending) {
2140 fprintf(trace->output, "%-70s", ttrace->entry_str);
2141 } else {
2142 fprintf(trace->output, " ... [");
2143 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2144 fprintf(trace->output, "]: %s()", sc->name);
2145 }
2146
2147 if (sc->fmt == NULL) {
2148 signed_print:
2149 fprintf(trace->output, ") = %ld", ret);
2150 } else if (ret < 0 && sc->fmt->errmsg) {
2151 char bf[STRERR_BUFSIZE];
2152 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2153 *e = audit_errno_to_name(-ret);
2154
2155 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2156 } else if (ret == 0 && sc->fmt->timeout)
2157 fprintf(trace->output, ") = 0 Timeout");
2158 else if (sc->fmt->hexret)
2159 fprintf(trace->output, ") = %#lx", ret);
2160 else
2161 goto signed_print;
2162
2163 fputc('\n', trace->output);
2164 out:
2165 ttrace->entry_pending = false;
2166 err = 0;
2167 out_put:
2168 thread__put(thread);
2169 return err;
2170 }
2171
2172 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2173 union perf_event *event __maybe_unused,
2174 struct perf_sample *sample)
2175 {
2176 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2177 struct thread_trace *ttrace;
2178 size_t filename_len, entry_str_len, to_move;
2179 ssize_t remaining_space;
2180 char *pos;
2181 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2182
2183 if (!thread)
2184 goto out;
2185
2186 ttrace = thread__priv(thread);
2187 if (!ttrace)
2188 goto out;
2189
2190 filename_len = strlen(filename);
2191
2192 if (ttrace->filename.namelen < filename_len) {
2193 char *f = realloc(ttrace->filename.name, filename_len + 1);
2194
2195 if (f == NULL)
2196 goto out;
2197
2198 ttrace->filename.namelen = filename_len;
2199 ttrace->filename.name = f;
2200 }
2201
2202 strcpy(ttrace->filename.name, filename);
2203 ttrace->filename.pending_open = true;
2204
2205 if (!ttrace->filename.ptr)
2206 goto out;
2207
2208 entry_str_len = strlen(ttrace->entry_str);
2209 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2210 if (remaining_space <= 0)
2211 goto out;
2212
2213 if (filename_len > (size_t)remaining_space) {
2214 filename += filename_len - remaining_space;
2215 filename_len = remaining_space;
2216 }
2217
2218 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2219 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2220 memmove(pos + filename_len, pos, to_move);
2221 memcpy(pos, filename, filename_len);
2222
2223 ttrace->filename.ptr = 0;
2224 ttrace->filename.entry_str_pos = 0;
2225 out:
2226 return 0;
2227 }
2228
2229 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2230 union perf_event *event __maybe_unused,
2231 struct perf_sample *sample)
2232 {
2233 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2234 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2235 struct thread *thread = machine__findnew_thread(trace->host,
2236 sample->pid,
2237 sample->tid);
2238 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2239
2240 if (ttrace == NULL)
2241 goto out_dump;
2242
2243 ttrace->runtime_ms += runtime_ms;
2244 trace->runtime_ms += runtime_ms;
2245 thread__put(thread);
2246 return 0;
2247
2248 out_dump:
2249 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2250 evsel->name,
2251 perf_evsel__strval(evsel, sample, "comm"),
2252 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2253 runtime,
2254 perf_evsel__intval(evsel, sample, "vruntime"));
2255 thread__put(thread);
2256 return 0;
2257 }
2258
2259 static void bpf_output__printer(enum binary_printer_ops op,
2260 unsigned int val, void *extra)
2261 {
2262 FILE *output = extra;
2263 unsigned char ch = (unsigned char)val;
2264
2265 switch (op) {
2266 case BINARY_PRINT_CHAR_DATA:
2267 fprintf(output, "%c", isprint(ch) ? ch : '.');
2268 break;
2269 case BINARY_PRINT_DATA_BEGIN:
2270 case BINARY_PRINT_LINE_BEGIN:
2271 case BINARY_PRINT_ADDR:
2272 case BINARY_PRINT_NUM_DATA:
2273 case BINARY_PRINT_NUM_PAD:
2274 case BINARY_PRINT_SEP:
2275 case BINARY_PRINT_CHAR_PAD:
2276 case BINARY_PRINT_LINE_END:
2277 case BINARY_PRINT_DATA_END:
2278 default:
2279 break;
2280 }
2281 }
2282
2283 static void bpf_output__fprintf(struct trace *trace,
2284 struct perf_sample *sample)
2285 {
2286 print_binary(sample->raw_data, sample->raw_size, 8,
2287 bpf_output__printer, trace->output);
2288 }
2289
2290 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2291 union perf_event *event __maybe_unused,
2292 struct perf_sample *sample)
2293 {
2294 trace__printf_interrupted_entry(trace, sample);
2295 trace__fprintf_tstamp(trace, sample->time, trace->output);
2296
2297 if (trace->trace_syscalls)
2298 fprintf(trace->output, "( ): ");
2299
2300 fprintf(trace->output, "%s:", evsel->name);
2301
2302 if (perf_evsel__is_bpf_output(evsel)) {
2303 bpf_output__fprintf(trace, sample);
2304 } else if (evsel->tp_format) {
2305 event_format__fprintf(evsel->tp_format, sample->cpu,
2306 sample->raw_data, sample->raw_size,
2307 trace->output);
2308 }
2309
2310 fprintf(trace->output, ")\n");
2311 return 0;
2312 }
2313
2314 static void print_location(FILE *f, struct perf_sample *sample,
2315 struct addr_location *al,
2316 bool print_dso, bool print_sym)
2317 {
2318
2319 if ((verbose || print_dso) && al->map)
2320 fprintf(f, "%s@", al->map->dso->long_name);
2321
2322 if ((verbose || print_sym) && al->sym)
2323 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2324 al->addr - al->sym->start);
2325 else if (al->map)
2326 fprintf(f, "0x%" PRIx64, al->addr);
2327 else
2328 fprintf(f, "0x%" PRIx64, sample->addr);
2329 }
2330
2331 static int trace__pgfault(struct trace *trace,
2332 struct perf_evsel *evsel,
2333 union perf_event *event __maybe_unused,
2334 struct perf_sample *sample)
2335 {
2336 struct thread *thread;
2337 struct addr_location al;
2338 char map_type = 'd';
2339 struct thread_trace *ttrace;
2340 int err = -1;
2341
2342 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2343 ttrace = thread__trace(thread, trace->output);
2344 if (ttrace == NULL)
2345 goto out_put;
2346
2347 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2348 ttrace->pfmaj++;
2349 else
2350 ttrace->pfmin++;
2351
2352 if (trace->summary_only)
2353 goto out;
2354
2355 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
2356 sample->ip, &al);
2357
2358 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2359
2360 fprintf(trace->output, "%sfault [",
2361 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2362 "maj" : "min");
2363
2364 print_location(trace->output, sample, &al, false, true);
2365
2366 fprintf(trace->output, "] => ");
2367
2368 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
2369 sample->addr, &al);
2370
2371 if (!al.map) {
2372 thread__find_addr_location(thread, sample->cpumode,
2373 MAP__FUNCTION, sample->addr, &al);
2374
2375 if (al.map)
2376 map_type = 'x';
2377 else
2378 map_type = '?';
2379 }
2380
2381 print_location(trace->output, sample, &al, true, false);
2382
2383 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2384 out:
2385 err = 0;
2386 out_put:
2387 thread__put(thread);
2388 return err;
2389 }
2390
2391 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2392 {
2393 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2394 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2395 return false;
2396
2397 if (trace->pid_list || trace->tid_list)
2398 return true;
2399
2400 return false;
2401 }
2402
2403 static void trace__set_base_time(struct trace *trace,
2404 struct perf_evsel *evsel,
2405 struct perf_sample *sample)
2406 {
2407 /*
2408 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2409 * and don't use sample->time unconditionally, we may end up having
2410 * some other event in the future without PERF_SAMPLE_TIME for good
2411 * reason, i.e. we may not be interested in its timestamps, just in
2412 * it taking place, picking some piece of information when it
2413 * appears in our event stream (vfs_getname comes to mind).
2414 */
2415 if (trace->base_time == 0 && !trace->full_time &&
2416 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
2417 trace->base_time = sample->time;
2418 }
2419
2420 static int trace__process_sample(struct perf_tool *tool,
2421 union perf_event *event,
2422 struct perf_sample *sample,
2423 struct perf_evsel *evsel,
2424 struct machine *machine __maybe_unused)
2425 {
2426 struct trace *trace = container_of(tool, struct trace, tool);
2427 int err = 0;
2428
2429 tracepoint_handler handler = evsel->handler;
2430
2431 if (skip_sample(trace, sample))
2432 return 0;
2433
2434 trace__set_base_time(trace, evsel, sample);
2435
2436 if (handler) {
2437 ++trace->nr_events;
2438 handler(trace, evsel, event, sample);
2439 }
2440
2441 return err;
2442 }
2443
2444 static int parse_target_str(struct trace *trace)
2445 {
2446 if (trace->opts.target.pid) {
2447 trace->pid_list = intlist__new(trace->opts.target.pid);
2448 if (trace->pid_list == NULL) {
2449 pr_err("Error parsing process id string\n");
2450 return -EINVAL;
2451 }
2452 }
2453
2454 if (trace->opts.target.tid) {
2455 trace->tid_list = intlist__new(trace->opts.target.tid);
2456 if (trace->tid_list == NULL) {
2457 pr_err("Error parsing thread id string\n");
2458 return -EINVAL;
2459 }
2460 }
2461
2462 return 0;
2463 }
2464
2465 static int trace__record(struct trace *trace, int argc, const char **argv)
2466 {
2467 unsigned int rec_argc, i, j;
2468 const char **rec_argv;
2469 const char * const record_args[] = {
2470 "record",
2471 "-R",
2472 "-m", "1024",
2473 "-c", "1",
2474 };
2475
2476 const char * const sc_args[] = { "-e", };
2477 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2478 const char * const majpf_args[] = { "-e", "major-faults" };
2479 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2480 const char * const minpf_args[] = { "-e", "minor-faults" };
2481 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2482
2483 /* +1 is for the event string below */
2484 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2485 majpf_args_nr + minpf_args_nr + argc;
2486 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2487
2488 if (rec_argv == NULL)
2489 return -ENOMEM;
2490
2491 j = 0;
2492 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2493 rec_argv[j++] = record_args[i];
2494
2495 if (trace->trace_syscalls) {
2496 for (i = 0; i < sc_args_nr; i++)
2497 rec_argv[j++] = sc_args[i];
2498
2499 /* event string may be different for older kernels - e.g., RHEL6 */
2500 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2501 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2502 else if (is_valid_tracepoint("syscalls:sys_enter"))
2503 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2504 else {
2505 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2506 return -1;
2507 }
2508 }
2509
2510 if (trace->trace_pgfaults & TRACE_PFMAJ)
2511 for (i = 0; i < majpf_args_nr; i++)
2512 rec_argv[j++] = majpf_args[i];
2513
2514 if (trace->trace_pgfaults & TRACE_PFMIN)
2515 for (i = 0; i < minpf_args_nr; i++)
2516 rec_argv[j++] = minpf_args[i];
2517
2518 for (i = 0; i < (unsigned int)argc; i++)
2519 rec_argv[j++] = argv[i];
2520
2521 return cmd_record(j, rec_argv, NULL);
2522 }
2523
2524 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2525
2526 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2527 {
2528 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2529
2530 if (IS_ERR(evsel))
2531 return false;
2532
2533 if (perf_evsel__field(evsel, "pathname") == NULL) {
2534 perf_evsel__delete(evsel);
2535 return false;
2536 }
2537
2538 evsel->handler = trace__vfs_getname;
2539 perf_evlist__add(evlist, evsel);
2540 return true;
2541 }
2542
2543 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2544 u64 config)
2545 {
2546 struct perf_evsel *evsel;
2547 struct perf_event_attr attr = {
2548 .type = PERF_TYPE_SOFTWARE,
2549 .mmap_data = 1,
2550 };
2551
2552 attr.config = config;
2553 attr.sample_period = 1;
2554
2555 event_attr_init(&attr);
2556
2557 evsel = perf_evsel__new(&attr);
2558 if (!evsel)
2559 return -ENOMEM;
2560
2561 evsel->handler = trace__pgfault;
2562 perf_evlist__add(evlist, evsel);
2563
2564 return 0;
2565 }
2566
2567 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2568 {
2569 const u32 type = event->header.type;
2570 struct perf_evsel *evsel;
2571
2572 if (type != PERF_RECORD_SAMPLE) {
2573 trace__process_event(trace, trace->host, event, sample);
2574 return;
2575 }
2576
2577 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2578 if (evsel == NULL) {
2579 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2580 return;
2581 }
2582
2583 trace__set_base_time(trace, evsel, sample);
2584
2585 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2586 sample->raw_data == NULL) {
2587 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2588 perf_evsel__name(evsel), sample->tid,
2589 sample->cpu, sample->raw_size);
2590 } else {
2591 tracepoint_handler handler = evsel->handler;
2592 handler(trace, evsel, event, sample);
2593 }
2594 }
2595
2596 static int trace__add_syscall_newtp(struct trace *trace)
2597 {
2598 int ret = -1;
2599 struct perf_evlist *evlist = trace->evlist;
2600 struct perf_evsel *sys_enter, *sys_exit;
2601
2602 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2603 if (sys_enter == NULL)
2604 goto out;
2605
2606 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2607 goto out_delete_sys_enter;
2608
2609 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2610 if (sys_exit == NULL)
2611 goto out_delete_sys_enter;
2612
2613 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2614 goto out_delete_sys_exit;
2615
2616 perf_evlist__add(evlist, sys_enter);
2617 perf_evlist__add(evlist, sys_exit);
2618
2619 trace->syscalls.events.sys_enter = sys_enter;
2620 trace->syscalls.events.sys_exit = sys_exit;
2621
2622 ret = 0;
2623 out:
2624 return ret;
2625
2626 out_delete_sys_exit:
2627 perf_evsel__delete_priv(sys_exit);
2628 out_delete_sys_enter:
2629 perf_evsel__delete_priv(sys_enter);
2630 goto out;
2631 }
2632
2633 static int trace__set_ev_qualifier_filter(struct trace *trace)
2634 {
2635 int err = -1;
2636 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2637 trace->ev_qualifier_ids.nr,
2638 trace->ev_qualifier_ids.entries);
2639
2640 if (filter == NULL)
2641 goto out_enomem;
2642
2643 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2644 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2645
2646 free(filter);
2647 out:
2648 return err;
2649 out_enomem:
2650 errno = ENOMEM;
2651 goto out;
2652 }
2653
2654 static int trace__run(struct trace *trace, int argc, const char **argv)
2655 {
2656 struct perf_evlist *evlist = trace->evlist;
2657 struct perf_evsel *evsel;
2658 int err = -1, i;
2659 unsigned long before;
2660 const bool forks = argc > 0;
2661 bool draining = false;
2662
2663 trace->live = true;
2664
2665 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2666 goto out_error_raw_syscalls;
2667
2668 if (trace->trace_syscalls)
2669 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2670
2671 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2672 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2673 goto out_error_mem;
2674 }
2675
2676 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2677 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2678 goto out_error_mem;
2679
2680 if (trace->sched &&
2681 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2682 trace__sched_stat_runtime))
2683 goto out_error_sched_stat_runtime;
2684
2685 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2686 if (err < 0) {
2687 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2688 goto out_delete_evlist;
2689 }
2690
2691 err = trace__symbols_init(trace, evlist);
2692 if (err < 0) {
2693 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2694 goto out_delete_evlist;
2695 }
2696
2697 perf_evlist__config(evlist, &trace->opts);
2698
2699 signal(SIGCHLD, sig_handler);
2700 signal(SIGINT, sig_handler);
2701
2702 if (forks) {
2703 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2704 argv, false, NULL);
2705 if (err < 0) {
2706 fprintf(trace->output, "Couldn't run the workload!\n");
2707 goto out_delete_evlist;
2708 }
2709 }
2710
2711 err = perf_evlist__open(evlist);
2712 if (err < 0)
2713 goto out_error_open;
2714
2715 err = bpf__apply_obj_config();
2716 if (err) {
2717 char errbuf[BUFSIZ];
2718
2719 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2720 pr_err("ERROR: Apply config to BPF failed: %s\n",
2721 errbuf);
2722 goto out_error_open;
2723 }
2724
2725 /*
2726 * Better not use !target__has_task() here because we need to cover the
2727 * case where no threads were specified in the command line, but a
2728 * workload was, and in that case we will fill in the thread_map when
2729 * we fork the workload in perf_evlist__prepare_workload.
2730 */
2731 if (trace->filter_pids.nr > 0)
2732 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2733 else if (thread_map__pid(evlist->threads, 0) == -1)
2734 err = perf_evlist__set_filter_pid(evlist, getpid());
2735
2736 if (err < 0)
2737 goto out_error_mem;
2738
2739 if (trace->ev_qualifier_ids.nr > 0) {
2740 err = trace__set_ev_qualifier_filter(trace);
2741 if (err < 0)
2742 goto out_errno;
2743
2744 pr_debug("event qualifier tracepoint filter: %s\n",
2745 trace->syscalls.events.sys_exit->filter);
2746 }
2747
2748 err = perf_evlist__apply_filters(evlist, &evsel);
2749 if (err < 0)
2750 goto out_error_apply_filters;
2751
2752 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2753 if (err < 0)
2754 goto out_error_mmap;
2755
2756 if (!target__none(&trace->opts.target))
2757 perf_evlist__enable(evlist);
2758
2759 if (forks)
2760 perf_evlist__start_workload(evlist);
2761
2762 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2763 evlist->threads->nr > 1 ||
2764 perf_evlist__first(evlist)->attr.inherit;
2765 again:
2766 before = trace->nr_events;
2767
2768 for (i = 0; i < evlist->nr_mmaps; i++) {
2769 union perf_event *event;
2770
2771 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2772 struct perf_sample sample;
2773
2774 ++trace->nr_events;
2775
2776 err = perf_evlist__parse_sample(evlist, event, &sample);
2777 if (err) {
2778 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2779 goto next_event;
2780 }
2781
2782 trace__handle_event(trace, event, &sample);
2783 next_event:
2784 perf_evlist__mmap_consume(evlist, i);
2785
2786 if (interrupted)
2787 goto out_disable;
2788
2789 if (done && !draining) {
2790 perf_evlist__disable(evlist);
2791 draining = true;
2792 }
2793 }
2794 }
2795
2796 if (trace->nr_events == before) {
2797 int timeout = done ? 100 : -1;
2798
2799 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2800 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2801 draining = true;
2802
2803 goto again;
2804 }
2805 } else {
2806 goto again;
2807 }
2808
2809 out_disable:
2810 thread__zput(trace->current);
2811
2812 perf_evlist__disable(evlist);
2813
2814 if (!err) {
2815 if (trace->summary)
2816 trace__fprintf_thread_summary(trace, trace->output);
2817
2818 if (trace->show_tool_stats) {
2819 fprintf(trace->output, "Stats:\n "
2820 " vfs_getname : %" PRIu64 "\n"
2821 " proc_getname: %" PRIu64 "\n",
2822 trace->stats.vfs_getname,
2823 trace->stats.proc_getname);
2824 }
2825 }
2826
2827 out_delete_evlist:
2828 perf_evlist__delete(evlist);
2829 trace->evlist = NULL;
2830 trace->live = false;
2831 return err;
2832 {
2833 char errbuf[BUFSIZ];
2834
2835 out_error_sched_stat_runtime:
2836 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2837 goto out_error;
2838
2839 out_error_raw_syscalls:
2840 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2841 goto out_error;
2842
2843 out_error_mmap:
2844 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2845 goto out_error;
2846
2847 out_error_open:
2848 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2849
2850 out_error:
2851 fprintf(trace->output, "%s\n", errbuf);
2852 goto out_delete_evlist;
2853
2854 out_error_apply_filters:
2855 fprintf(trace->output,
2856 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2857 evsel->filter, perf_evsel__name(evsel), errno,
2858 strerror_r(errno, errbuf, sizeof(errbuf)));
2859 goto out_delete_evlist;
2860 }
2861 out_error_mem:
2862 fprintf(trace->output, "Not enough memory to run!\n");
2863 goto out_delete_evlist;
2864
2865 out_errno:
2866 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2867 goto out_delete_evlist;
2868 }
2869
2870 static int trace__replay(struct trace *trace)
2871 {
2872 const struct perf_evsel_str_handler handlers[] = {
2873 { "probe:vfs_getname", trace__vfs_getname, },
2874 };
2875 struct perf_data_file file = {
2876 .path = input_name,
2877 .mode = PERF_DATA_MODE_READ,
2878 .force = trace->force,
2879 };
2880 struct perf_session *session;
2881 struct perf_evsel *evsel;
2882 int err = -1;
2883
2884 trace->tool.sample = trace__process_sample;
2885 trace->tool.mmap = perf_event__process_mmap;
2886 trace->tool.mmap2 = perf_event__process_mmap2;
2887 trace->tool.comm = perf_event__process_comm;
2888 trace->tool.exit = perf_event__process_exit;
2889 trace->tool.fork = perf_event__process_fork;
2890 trace->tool.attr = perf_event__process_attr;
2891 trace->tool.tracing_data = perf_event__process_tracing_data;
2892 trace->tool.build_id = perf_event__process_build_id;
2893
2894 trace->tool.ordered_events = true;
2895 trace->tool.ordering_requires_timestamps = true;
2896
2897 /* add tid to output */
2898 trace->multiple_threads = true;
2899
2900 session = perf_session__new(&file, false, &trace->tool);
2901 if (session == NULL)
2902 return -1;
2903
2904 if (symbol__init(&session->header.env) < 0)
2905 goto out;
2906
2907 trace->host = &session->machines.host;
2908
2909 err = perf_session__set_tracepoints_handlers(session, handlers);
2910 if (err)
2911 goto out;
2912
2913 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2914 "raw_syscalls:sys_enter");
2915 /* older kernels have syscalls tp versus raw_syscalls */
2916 if (evsel == NULL)
2917 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2918 "syscalls:sys_enter");
2919
2920 if (evsel &&
2921 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2922 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2923 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2924 goto out;
2925 }
2926
2927 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2928 "raw_syscalls:sys_exit");
2929 if (evsel == NULL)
2930 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2931 "syscalls:sys_exit");
2932 if (evsel &&
2933 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2934 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2935 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2936 goto out;
2937 }
2938
2939 evlist__for_each(session->evlist, evsel) {
2940 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2941 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2942 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2943 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2944 evsel->handler = trace__pgfault;
2945 }
2946
2947 err = parse_target_str(trace);
2948 if (err != 0)
2949 goto out;
2950
2951 setup_pager();
2952
2953 err = perf_session__process_events(session);
2954 if (err)
2955 pr_err("Failed to process events, error %d", err);
2956
2957 else if (trace->summary)
2958 trace__fprintf_thread_summary(trace, trace->output);
2959
2960 out:
2961 perf_session__delete(session);
2962
2963 return err;
2964 }
2965
2966 static size_t trace__fprintf_threads_header(FILE *fp)
2967 {
2968 size_t printed;
2969
2970 printed = fprintf(fp, "\n Summary of events:\n\n");
2971
2972 return printed;
2973 }
2974
2975 static size_t thread__dump_stats(struct thread_trace *ttrace,
2976 struct trace *trace, FILE *fp)
2977 {
2978 struct stats *stats;
2979 size_t printed = 0;
2980 struct syscall *sc;
2981 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2982
2983 if (inode == NULL)
2984 return 0;
2985
2986 printed += fprintf(fp, "\n");
2987
2988 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2989 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2990 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2991
2992 /* each int_node is a syscall */
2993 while (inode) {
2994 stats = inode->priv;
2995 if (stats) {
2996 double min = (double)(stats->min) / NSEC_PER_MSEC;
2997 double max = (double)(stats->max) / NSEC_PER_MSEC;
2998 double avg = avg_stats(stats);
2999 double pct;
3000 u64 n = (u64) stats->n;
3001
3002 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
3003 avg /= NSEC_PER_MSEC;
3004
3005 sc = &trace->syscalls.table[inode->i];
3006 printed += fprintf(fp, " %-15s", sc->name);
3007 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
3008 n, avg * n, min, avg);
3009 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
3010 }
3011
3012 inode = intlist__next(inode);
3013 }
3014
3015 printed += fprintf(fp, "\n\n");
3016
3017 return printed;
3018 }
3019
3020 /* struct used to pass data to per-thread function */
3021 struct summary_data {
3022 FILE *fp;
3023 struct trace *trace;
3024 size_t printed;
3025 };
3026
3027 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
3028 {
3029 struct summary_data *data = priv;
3030 FILE *fp = data->fp;
3031 size_t printed = data->printed;
3032 struct trace *trace = data->trace;
3033 struct thread_trace *ttrace = thread__priv(thread);
3034 double ratio;
3035
3036 if (ttrace == NULL)
3037 return 0;
3038
3039 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
3040
3041 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
3042 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
3043 printed += fprintf(fp, "%.1f%%", ratio);
3044 if (ttrace->pfmaj)
3045 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
3046 if (ttrace->pfmin)
3047 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
3048 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
3049 printed += thread__dump_stats(ttrace, trace, fp);
3050
3051 data->printed += printed;
3052
3053 return 0;
3054 }
3055
3056 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
3057 {
3058 struct summary_data data = {
3059 .fp = fp,
3060 .trace = trace
3061 };
3062 data.printed = trace__fprintf_threads_header(fp);
3063
3064 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
3065
3066 return data.printed;
3067 }
3068
3069 static int trace__set_duration(const struct option *opt, const char *str,
3070 int unset __maybe_unused)
3071 {
3072 struct trace *trace = opt->value;
3073
3074 trace->duration_filter = atof(str);
3075 return 0;
3076 }
3077
3078 static int trace__set_filter_pids(const struct option *opt, const char *str,
3079 int unset __maybe_unused)
3080 {
3081 int ret = -1;
3082 size_t i;
3083 struct trace *trace = opt->value;
3084 /*
3085 * FIXME: introduce a intarray class, plain parse csv and create a
3086 * { int nr, int entries[] } struct...
3087 */
3088 struct intlist *list = intlist__new(str);
3089
3090 if (list == NULL)
3091 return -1;
3092
3093 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
3094 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
3095
3096 if (trace->filter_pids.entries == NULL)
3097 goto out;
3098
3099 trace->filter_pids.entries[0] = getpid();
3100
3101 for (i = 1; i < trace->filter_pids.nr; ++i)
3102 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
3103
3104 intlist__delete(list);
3105 ret = 0;
3106 out:
3107 return ret;
3108 }
3109
3110 static int trace__open_output(struct trace *trace, const char *filename)
3111 {
3112 struct stat st;
3113
3114 if (!stat(filename, &st) && st.st_size) {
3115 char oldname[PATH_MAX];
3116
3117 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
3118 unlink(oldname);
3119 rename(filename, oldname);
3120 }
3121
3122 trace->output = fopen(filename, "w");
3123
3124 return trace->output == NULL ? -errno : 0;
3125 }
3126
3127 static int parse_pagefaults(const struct option *opt, const char *str,
3128 int unset __maybe_unused)
3129 {
3130 int *trace_pgfaults = opt->value;
3131
3132 if (strcmp(str, "all") == 0)
3133 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3134 else if (strcmp(str, "maj") == 0)
3135 *trace_pgfaults |= TRACE_PFMAJ;
3136 else if (strcmp(str, "min") == 0)
3137 *trace_pgfaults |= TRACE_PFMIN;
3138 else
3139 return -1;
3140
3141 return 0;
3142 }
3143
3144 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3145 {
3146 struct perf_evsel *evsel;
3147
3148 evlist__for_each(evlist, evsel)
3149 evsel->handler = handler;
3150 }
3151
3152 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3153 {
3154 const char *trace_usage[] = {
3155 "perf trace [<options>] [<command>]",
3156 "perf trace [<options>] -- <command> [<options>]",
3157 "perf trace record [<options>] [<command>]",
3158 "perf trace record [<options>] -- <command> [<options>]",
3159 NULL
3160 };
3161 struct trace trace = {
3162 .audit = {
3163 .machine = audit_detect_machine(),
3164 .open_id = audit_name_to_syscall("open", trace.audit.machine),
3165 },
3166 .syscalls = {
3167 . max = -1,
3168 },
3169 .opts = {
3170 .target = {
3171 .uid = UINT_MAX,
3172 .uses_mmap = true,
3173 },
3174 .user_freq = UINT_MAX,
3175 .user_interval = ULLONG_MAX,
3176 .no_buffering = true,
3177 .mmap_pages = UINT_MAX,
3178 .proc_map_timeout = 500,
3179 },
3180 .output = stderr,
3181 .show_comm = true,
3182 .trace_syscalls = true,
3183 };
3184 const char *output_name = NULL;
3185 const char *ev_qualifier_str = NULL;
3186 const struct option trace_options[] = {
3187 OPT_CALLBACK(0, "event", &trace.evlist, "event",
3188 "event selector. use 'perf list' to list available events",
3189 parse_events_option),
3190 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3191 "show the thread COMM next to its id"),
3192 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3193 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3194 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3195 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3196 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3197 "trace events on existing process id"),
3198 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3199 "trace events on existing thread id"),
3200 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3201 "pids to filter (by the kernel)", trace__set_filter_pids),
3202 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3203 "system-wide collection from all CPUs"),
3204 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3205 "list of cpus to monitor"),
3206 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3207 "child tasks do not inherit counters"),
3208 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3209 "number of mmap data pages",
3210 perf_evlist__parse_mmap_pages),
3211 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3212 "user to profile"),
3213 OPT_CALLBACK(0, "duration", &trace, "float",
3214 "show only events with duration > N.M ms",
3215 trace__set_duration),
3216 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3217 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3218 OPT_BOOLEAN('T', "time", &trace.full_time,
3219 "Show full timestamp, not time relative to first start"),
3220 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3221 "Show only syscall summary with statistics"),
3222 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3223 "Show all syscalls and summary with statistics"),
3224 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3225 "Trace pagefaults", parse_pagefaults, "maj"),
3226 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3227 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3228 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3229 "per thread proc mmap processing timeout in ms"),
3230 OPT_END()
3231 };
3232 const char * const trace_subcommands[] = { "record", NULL };
3233 int err;
3234 char bf[BUFSIZ];
3235
3236 signal(SIGSEGV, sighandler_dump_stack);
3237 signal(SIGFPE, sighandler_dump_stack);
3238
3239 trace.evlist = perf_evlist__new();
3240
3241 if (trace.evlist == NULL) {
3242 pr_err("Not enough memory to run!\n");
3243 err = -ENOMEM;
3244 goto out;
3245 }
3246
3247 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3248 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3249
3250 if (trace.trace_pgfaults) {
3251 trace.opts.sample_address = true;
3252 trace.opts.sample_time = true;
3253 }
3254
3255 if (trace.evlist->nr_entries > 0)
3256 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3257
3258 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3259 return trace__record(&trace, argc-1, &argv[1]);
3260
3261 /* summary_only implies summary option, but don't overwrite summary if set */
3262 if (trace.summary_only)
3263 trace.summary = trace.summary_only;
3264
3265 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3266 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3267 pr_err("Please specify something to trace.\n");
3268 return -1;
3269 }
3270
3271 if (output_name != NULL) {
3272 err = trace__open_output(&trace, output_name);
3273 if (err < 0) {
3274 perror("failed to create output file");
3275 goto out;
3276 }
3277 }
3278
3279 if (ev_qualifier_str != NULL) {
3280 const char *s = ev_qualifier_str;
3281 struct strlist_config slist_config = {
3282 .dirname = system_path(STRACE_GROUPS_DIR),
3283 };
3284
3285 trace.not_ev_qualifier = *s == '!';
3286 if (trace.not_ev_qualifier)
3287 ++s;
3288 trace.ev_qualifier = strlist__new(s, &slist_config);
3289 if (trace.ev_qualifier == NULL) {
3290 fputs("Not enough memory to parse event qualifier",
3291 trace.output);
3292 err = -ENOMEM;
3293 goto out_close;
3294 }
3295
3296 err = trace__validate_ev_qualifier(&trace);
3297 if (err)
3298 goto out_close;
3299 }
3300
3301 err = target__validate(&trace.opts.target);
3302 if (err) {
3303 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3304 fprintf(trace.output, "%s", bf);
3305 goto out_close;
3306 }
3307
3308 err = target__parse_uid(&trace.opts.target);
3309 if (err) {
3310 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3311 fprintf(trace.output, "%s", bf);
3312 goto out_close;
3313 }
3314
3315 if (!argc && target__none(&trace.opts.target))
3316 trace.opts.target.system_wide = true;
3317
3318 if (input_name)
3319 err = trace__replay(&trace);
3320 else
3321 err = trace__run(&trace, argc, argv);
3322
3323 out_close:
3324 if (output_name != NULL)
3325 fclose(trace.output);
3326 out:
3327 return err;
3328 }
This page took 0.391162 seconds and 6 git commands to generate.