4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
40 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
43 #include <linux/futex.h>
44 #include <linux/err.h>
45 #include <linux/seccomp.h>
46 #include <linux/filter.h>
47 #include <linux/audit.h>
48 #include <sys/ptrace.h>
49 #include <linux/random.h>
51 /* For older distros: */
53 # define MAP_STACK 0x20000
57 # define MADV_HWPOISON 100
61 #ifndef MADV_MERGEABLE
62 # define MADV_MERGEABLE 12
65 #ifndef MADV_UNMERGEABLE
66 # define MADV_UNMERGEABLE 13
70 # define EFD_SEMAPHORE 1
74 # define EFD_NONBLOCK 00004000
78 # define EFD_CLOEXEC 02000000
82 # define O_CLOEXEC 02000000
90 # define SOCK_CLOEXEC 02000000
94 # define SOCK_NONBLOCK 00004000
97 #ifndef MSG_CMSG_CLOEXEC
98 # define MSG_CMSG_CLOEXEC 0x40000000
101 #ifndef PERF_FLAG_FD_NO_GROUP
102 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
105 #ifndef PERF_FLAG_FD_OUTPUT
106 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
109 #ifndef PERF_FLAG_PID_CGROUP
110 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
113 #ifndef PERF_FLAG_FD_CLOEXEC
114 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
118 struct perf_tool tool
;
119 struct syscalltbl
*sctbl
;
122 struct syscall
*table
;
124 struct perf_evsel
*sys_enter
,
128 struct record_opts opts
;
129 struct perf_evlist
*evlist
;
130 struct machine
*host
;
131 struct thread
*current
;
134 unsigned long nr_events
;
135 struct strlist
*ev_qualifier
;
140 struct intlist
*tid_list
;
141 struct intlist
*pid_list
;
146 double duration_filter
;
152 bool not_ev_qualifier
;
156 bool multiple_threads
;
160 bool show_tool_stats
;
162 bool kernel_syscallchains
;
172 u64 (*integer
)(struct tp_field
*field
, struct perf_sample
*sample
);
173 void *(*pointer
)(struct tp_field
*field
, struct perf_sample
*sample
);
177 #define TP_UINT_FIELD(bits) \
178 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
181 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
190 #define TP_UINT_FIELD__SWAPPED(bits) \
191 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
194 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
195 return bswap_##bits(value);\
198 TP_UINT_FIELD__SWAPPED(16);
199 TP_UINT_FIELD__SWAPPED(32);
200 TP_UINT_FIELD__SWAPPED(64);
202 static int tp_field__init_uint(struct tp_field
*field
,
203 struct format_field
*format_field
,
206 field
->offset
= format_field
->offset
;
208 switch (format_field
->size
) {
210 field
->integer
= tp_field__u8
;
213 field
->integer
= needs_swap
? tp_field__swapped_u16
: tp_field__u16
;
216 field
->integer
= needs_swap
? tp_field__swapped_u32
: tp_field__u32
;
219 field
->integer
= needs_swap
? tp_field__swapped_u64
: tp_field__u64
;
228 static void *tp_field__ptr(struct tp_field
*field
, struct perf_sample
*sample
)
230 return sample
->raw_data
+ field
->offset
;
233 static int tp_field__init_ptr(struct tp_field
*field
, struct format_field
*format_field
)
235 field
->offset
= format_field
->offset
;
236 field
->pointer
= tp_field__ptr
;
243 struct tp_field args
, ret
;
247 static int perf_evsel__init_tp_uint_field(struct perf_evsel
*evsel
,
248 struct tp_field
*field
,
251 struct format_field
*format_field
= perf_evsel__field(evsel
, name
);
253 if (format_field
== NULL
)
256 return tp_field__init_uint(field
, format_field
, evsel
->needs_swap
);
259 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
260 ({ struct syscall_tp *sc = evsel->priv;\
261 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
263 static int perf_evsel__init_tp_ptr_field(struct perf_evsel
*evsel
,
264 struct tp_field
*field
,
267 struct format_field
*format_field
= perf_evsel__field(evsel
, name
);
269 if (format_field
== NULL
)
272 return tp_field__init_ptr(field
, format_field
);
275 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
276 ({ struct syscall_tp *sc = evsel->priv;\
277 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
279 static void perf_evsel__delete_priv(struct perf_evsel
*evsel
)
282 perf_evsel__delete(evsel
);
285 static int perf_evsel__init_syscall_tp(struct perf_evsel
*evsel
, void *handler
)
287 evsel
->priv
= malloc(sizeof(struct syscall_tp
));
288 if (evsel
->priv
!= NULL
) {
289 if (perf_evsel__init_sc_tp_uint_field(evsel
, id
))
292 evsel
->handler
= handler
;
303 static struct perf_evsel
*perf_evsel__syscall_newtp(const char *direction
, void *handler
)
305 struct perf_evsel
*evsel
= perf_evsel__newtp("raw_syscalls", direction
);
307 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
309 evsel
= perf_evsel__newtp("syscalls", direction
);
314 if (perf_evsel__init_syscall_tp(evsel
, handler
))
320 perf_evsel__delete_priv(evsel
);
324 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
325 ({ struct syscall_tp *fields = evsel->priv; \
326 fields->name.integer(&fields->name, sample); })
328 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
329 ({ struct syscall_tp *fields = evsel->priv; \
330 fields->name.pointer(&fields->name, sample); })
334 struct thread
*thread
;
344 const char **entries
;
347 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
348 .nr_entries = ARRAY_SIZE(array), \
352 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
354 .nr_entries = ARRAY_SIZE(array), \
358 static size_t __syscall_arg__scnprintf_strarray(char *bf
, size_t size
,
360 struct syscall_arg
*arg
)
362 struct strarray
*sa
= arg
->parm
;
363 int idx
= arg
->val
- sa
->offset
;
365 if (idx
< 0 || idx
>= sa
->nr_entries
)
366 return scnprintf(bf
, size
, intfmt
, arg
->val
);
368 return scnprintf(bf
, size
, "%s", sa
->entries
[idx
]);
371 static size_t syscall_arg__scnprintf_strarray(char *bf
, size_t size
,
372 struct syscall_arg
*arg
)
374 return __syscall_arg__scnprintf_strarray(bf
, size
, "%d", arg
);
377 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
379 #if defined(__i386__) || defined(__x86_64__)
381 * FIXME: Make this available to all arches as soon as the ioctl beautifier
382 * gets rewritten to support all arches.
384 static size_t syscall_arg__scnprintf_strhexarray(char *bf
, size_t size
,
385 struct syscall_arg
*arg
)
387 return __syscall_arg__scnprintf_strarray(bf
, size
, "%#x", arg
);
390 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
391 #endif /* defined(__i386__) || defined(__x86_64__) */
393 static size_t syscall_arg__scnprintf_fd(char *bf
, size_t size
,
394 struct syscall_arg
*arg
);
396 #define SCA_FD syscall_arg__scnprintf_fd
398 static size_t syscall_arg__scnprintf_fd_at(char *bf
, size_t size
,
399 struct syscall_arg
*arg
)
404 return scnprintf(bf
, size
, "CWD");
406 return syscall_arg__scnprintf_fd(bf
, size
, arg
);
409 #define SCA_FDAT syscall_arg__scnprintf_fd_at
411 static size_t syscall_arg__scnprintf_close_fd(char *bf
, size_t size
,
412 struct syscall_arg
*arg
);
414 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
416 static size_t syscall_arg__scnprintf_hex(char *bf
, size_t size
,
417 struct syscall_arg
*arg
)
419 return scnprintf(bf
, size
, "%#lx", arg
->val
);
422 #define SCA_HEX syscall_arg__scnprintf_hex
424 static size_t syscall_arg__scnprintf_int(char *bf
, size_t size
,
425 struct syscall_arg
*arg
)
427 return scnprintf(bf
, size
, "%d", arg
->val
);
430 #define SCA_INT syscall_arg__scnprintf_int
432 static size_t syscall_arg__scnprintf_mmap_prot(char *bf
, size_t size
,
433 struct syscall_arg
*arg
)
435 int printed
= 0, prot
= arg
->val
;
437 if (prot
== PROT_NONE
)
438 return scnprintf(bf
, size
, "NONE");
439 #define P_MMAP_PROT(n) \
440 if (prot & PROT_##n) { \
441 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
451 P_MMAP_PROT(GROWSDOWN
);
452 P_MMAP_PROT(GROWSUP
);
456 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", prot
);
461 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
463 static size_t syscall_arg__scnprintf_mmap_flags(char *bf
, size_t size
,
464 struct syscall_arg
*arg
)
466 int printed
= 0, flags
= arg
->val
;
468 #define P_MMAP_FLAG(n) \
469 if (flags & MAP_##n) { \
470 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
475 P_MMAP_FLAG(PRIVATE
);
479 P_MMAP_FLAG(ANONYMOUS
);
480 P_MMAP_FLAG(DENYWRITE
);
481 P_MMAP_FLAG(EXECUTABLE
);
484 P_MMAP_FLAG(GROWSDOWN
);
486 P_MMAP_FLAG(HUGETLB
);
489 P_MMAP_FLAG(NONBLOCK
);
490 P_MMAP_FLAG(NORESERVE
);
491 P_MMAP_FLAG(POPULATE
);
493 #ifdef MAP_UNINITIALIZED
494 P_MMAP_FLAG(UNINITIALIZED
);
499 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
504 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
506 static size_t syscall_arg__scnprintf_mremap_flags(char *bf
, size_t size
,
507 struct syscall_arg
*arg
)
509 int printed
= 0, flags
= arg
->val
;
511 #define P_MREMAP_FLAG(n) \
512 if (flags & MREMAP_##n) { \
513 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
514 flags &= ~MREMAP_##n; \
517 P_MREMAP_FLAG(MAYMOVE
);
519 P_MREMAP_FLAG(FIXED
);
524 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
529 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
531 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf
, size_t size
,
532 struct syscall_arg
*arg
)
534 int behavior
= arg
->val
;
537 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
540 P_MADV_BHV(SEQUENTIAL
);
541 P_MADV_BHV(WILLNEED
);
542 P_MADV_BHV(DONTNEED
);
544 P_MADV_BHV(DONTFORK
);
546 P_MADV_BHV(HWPOISON
);
547 #ifdef MADV_SOFT_OFFLINE
548 P_MADV_BHV(SOFT_OFFLINE
);
550 P_MADV_BHV(MERGEABLE
);
551 P_MADV_BHV(UNMERGEABLE
);
553 P_MADV_BHV(HUGEPAGE
);
555 #ifdef MADV_NOHUGEPAGE
556 P_MADV_BHV(NOHUGEPAGE
);
559 P_MADV_BHV(DONTDUMP
);
568 return scnprintf(bf
, size
, "%#x", behavior
);
571 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
573 static size_t syscall_arg__scnprintf_flock(char *bf
, size_t size
,
574 struct syscall_arg
*arg
)
576 int printed
= 0, op
= arg
->val
;
579 return scnprintf(bf
, size
, "NONE");
581 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
582 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
597 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", op
);
602 #define SCA_FLOCK syscall_arg__scnprintf_flock
604 static size_t syscall_arg__scnprintf_futex_op(char *bf
, size_t size
, struct syscall_arg
*arg
)
606 enum syscall_futex_args
{
607 SCF_UADDR
= (1 << 0),
610 SCF_TIMEOUT
= (1 << 3),
611 SCF_UADDR2
= (1 << 4),
615 int cmd
= op
& FUTEX_CMD_MASK
;
619 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
620 P_FUTEX_OP(WAIT
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
; break;
621 P_FUTEX_OP(WAKE
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
622 P_FUTEX_OP(FD
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
623 P_FUTEX_OP(REQUEUE
); arg
->mask
|= SCF_VAL3
|SCF_TIMEOUT
; break;
624 P_FUTEX_OP(CMP_REQUEUE
); arg
->mask
|= SCF_TIMEOUT
; break;
625 P_FUTEX_OP(CMP_REQUEUE_PI
); arg
->mask
|= SCF_TIMEOUT
; break;
626 P_FUTEX_OP(WAKE_OP
); break;
627 P_FUTEX_OP(LOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
628 P_FUTEX_OP(UNLOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
629 P_FUTEX_OP(TRYLOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
; break;
630 P_FUTEX_OP(WAIT_BITSET
); arg
->mask
|= SCF_UADDR2
; break;
631 P_FUTEX_OP(WAKE_BITSET
); arg
->mask
|= SCF_UADDR2
; break;
632 P_FUTEX_OP(WAIT_REQUEUE_PI
); break;
633 default: printed
= scnprintf(bf
, size
, "%#x", cmd
); break;
636 if (op
& FUTEX_PRIVATE_FLAG
)
637 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|PRIV");
639 if (op
& FUTEX_CLOCK_REALTIME
)
640 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|CLKRT");
645 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
647 static const char *bpf_cmd
[] = {
648 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
649 "MAP_GET_NEXT_KEY", "PROG_LOAD",
651 static DEFINE_STRARRAY(bpf_cmd
);
653 static const char *epoll_ctl_ops
[] = { "ADD", "DEL", "MOD", };
654 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops
, 1);
656 static const char *itimers
[] = { "REAL", "VIRTUAL", "PROF", };
657 static DEFINE_STRARRAY(itimers
);
659 static const char *keyctl_options
[] = {
660 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
661 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
662 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
663 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
664 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
666 static DEFINE_STRARRAY(keyctl_options
);
668 static const char *whences
[] = { "SET", "CUR", "END",
676 static DEFINE_STRARRAY(whences
);
678 static const char *fcntl_cmds
[] = {
679 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
680 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
681 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
684 static DEFINE_STRARRAY(fcntl_cmds
);
686 static const char *rlimit_resources
[] = {
687 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
688 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
691 static DEFINE_STRARRAY(rlimit_resources
);
693 static const char *sighow
[] = { "BLOCK", "UNBLOCK", "SETMASK", };
694 static DEFINE_STRARRAY(sighow
);
696 static const char *clockid
[] = {
697 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
698 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
699 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
701 static DEFINE_STRARRAY(clockid
);
703 static const char *socket_families
[] = {
704 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
705 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
706 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
707 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
708 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
709 "ALG", "NFC", "VSOCK",
711 static DEFINE_STRARRAY(socket_families
);
713 #ifndef SOCK_TYPE_MASK
714 #define SOCK_TYPE_MASK 0xf
717 static size_t syscall_arg__scnprintf_socket_type(char *bf
, size_t size
,
718 struct syscall_arg
*arg
)
722 flags
= type
& ~SOCK_TYPE_MASK
;
724 type
&= SOCK_TYPE_MASK
;
726 * Can't use a strarray, MIPS may override for ABI reasons.
729 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
734 P_SK_TYPE(SEQPACKET
);
739 printed
= scnprintf(bf
, size
, "%#x", type
);
742 #define P_SK_FLAG(n) \
743 if (flags & SOCK_##n) { \
744 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
745 flags &= ~SOCK_##n; \
753 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|%#x", flags
);
758 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
761 #define MSG_PROBE 0x10
763 #ifndef MSG_WAITFORONE
764 #define MSG_WAITFORONE 0x10000
766 #ifndef MSG_SENDPAGE_NOTLAST
767 #define MSG_SENDPAGE_NOTLAST 0x20000
770 #define MSG_FASTOPEN 0x20000000
773 static size_t syscall_arg__scnprintf_msg_flags(char *bf
, size_t size
,
774 struct syscall_arg
*arg
)
776 int printed
= 0, flags
= arg
->val
;
779 return scnprintf(bf
, size
, "NONE");
780 #define P_MSG_FLAG(n) \
781 if (flags & MSG_##n) { \
782 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
788 P_MSG_FLAG(DONTROUTE
);
793 P_MSG_FLAG(DONTWAIT
);
800 P_MSG_FLAG(ERRQUEUE
);
801 P_MSG_FLAG(NOSIGNAL
);
803 P_MSG_FLAG(WAITFORONE
);
804 P_MSG_FLAG(SENDPAGE_NOTLAST
);
805 P_MSG_FLAG(FASTOPEN
);
806 P_MSG_FLAG(CMSG_CLOEXEC
);
810 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
815 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
817 static size_t syscall_arg__scnprintf_access_mode(char *bf
, size_t size
,
818 struct syscall_arg
*arg
)
823 if (mode
== F_OK
) /* 0 */
824 return scnprintf(bf
, size
, "F");
826 if (mode & n##_OK) { \
827 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
837 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|%#x", mode
);
842 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
844 static size_t syscall_arg__scnprintf_filename(char *bf
, size_t size
,
845 struct syscall_arg
*arg
);
847 #define SCA_FILENAME syscall_arg__scnprintf_filename
849 static size_t syscall_arg__scnprintf_open_flags(char *bf
, size_t size
,
850 struct syscall_arg
*arg
)
852 int printed
= 0, flags
= arg
->val
;
854 if (!(flags
& O_CREAT
))
855 arg
->mask
|= 1 << (arg
->idx
+ 1); /* Mask the mode parm */
858 return scnprintf(bf
, size
, "RDONLY");
860 if (flags & O_##n) { \
861 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
885 if ((flags
& O_SYNC
) == O_SYNC
)
886 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%s", printed
? "|" : "", "SYNC");
898 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
903 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
905 static size_t syscall_arg__scnprintf_perf_flags(char *bf
, size_t size
,
906 struct syscall_arg
*arg
)
908 int printed
= 0, flags
= arg
->val
;
914 if (flags & PERF_FLAG_##n) { \
915 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
916 flags &= ~PERF_FLAG_##n; \
926 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
931 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
933 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf
, size_t size
,
934 struct syscall_arg
*arg
)
936 int printed
= 0, flags
= arg
->val
;
939 return scnprintf(bf
, size
, "NONE");
941 if (flags & EFD_##n) { \
942 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
952 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
957 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
959 static size_t syscall_arg__scnprintf_pipe_flags(char *bf
, size_t size
,
960 struct syscall_arg
*arg
)
962 int printed
= 0, flags
= arg
->val
;
965 if (flags & O_##n) { \
966 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
975 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
980 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
982 static size_t syscall_arg__scnprintf_signum(char *bf
, size_t size
, struct syscall_arg
*arg
)
987 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
1030 return scnprintf(bf
, size
, "%#x", sig
);
1033 #define SCA_SIGNUM syscall_arg__scnprintf_signum
1035 #if defined(__i386__) || defined(__x86_64__)
1037 * FIXME: Make this available to all arches.
1039 #define TCGETS 0x5401
1041 static const char *tioctls
[] = {
1042 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
1043 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
1044 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
1045 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
1046 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
1047 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
1048 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
1049 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
1050 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
1051 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
1052 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
1053 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
1054 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
1055 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
1056 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
1059 static DEFINE_STRARRAY_OFFSET(tioctls
, 0x5401);
1060 #endif /* defined(__i386__) || defined(__x86_64__) */
1062 static size_t syscall_arg__scnprintf_seccomp_op(char *bf
, size_t size
, struct syscall_arg
*arg
)
1068 #define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
1069 P_SECCOMP_SET_MODE_OP(STRICT
);
1070 P_SECCOMP_SET_MODE_OP(FILTER
);
1071 #undef P_SECCOMP_SET_MODE_OP
1072 default: printed
= scnprintf(bf
, size
, "%#x", op
); break;
1078 #define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op
1080 static size_t syscall_arg__scnprintf_seccomp_flags(char *bf
, size_t size
,
1081 struct syscall_arg
*arg
)
1083 int printed
= 0, flags
= arg
->val
;
1086 if (flags & SECCOMP_FILTER_FLAG_##n) { \
1087 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1088 flags &= ~SECCOMP_FILTER_FLAG_##n; \
1095 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
1100 #define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
1102 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf
, size_t size
,
1103 struct syscall_arg
*arg
)
1105 int printed
= 0, flags
= arg
->val
;
1108 if (flags & GRND_##n) { \
1109 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1110 flags &= ~GRND_##n; \
1118 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
1123 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
1125 #define STRARRAY(arg, name, array) \
1126 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1127 .arg_parm = { [arg] = &strarray__##array, }
1129 #include "trace/beauty/pid.c"
1130 #include "trace/beauty/mode_t.c"
1131 #include "trace/beauty/sched_policy.c"
1132 #include "trace/beauty/waitid_options.c"
1134 static struct syscall_fmt
{
1137 size_t (*arg_scnprintf
[6])(char *bf
, size_t size
, struct syscall_arg
*arg
);
1143 } syscall_fmts
[] = {
1144 { .name
= "access", .errmsg
= true,
1145 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */
1146 [1] = SCA_ACCMODE
, /* mode */ }, },
1147 { .name
= "arch_prctl", .errmsg
= true, .alias
= "prctl", },
1148 { .name
= "bpf", .errmsg
= true, STRARRAY(0, cmd
, bpf_cmd
), },
1149 { .name
= "brk", .hexret
= true,
1150 .arg_scnprintf
= { [0] = SCA_HEX
, /* brk */ }, },
1151 { .name
= "chdir", .errmsg
= true,
1152 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1153 { .name
= "chmod", .errmsg
= true,
1154 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1155 { .name
= "chroot", .errmsg
= true,
1156 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1157 { .name
= "clock_gettime", .errmsg
= true, STRARRAY(0, clk_id
, clockid
), },
1158 { .name
= "clone", .errpid
= true, },
1159 { .name
= "close", .errmsg
= true,
1160 .arg_scnprintf
= { [0] = SCA_CLOSE_FD
, /* fd */ }, },
1161 { .name
= "connect", .errmsg
= true, },
1162 { .name
= "creat", .errmsg
= true,
1163 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1164 { .name
= "dup", .errmsg
= true,
1165 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1166 { .name
= "dup2", .errmsg
= true,
1167 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1168 { .name
= "dup3", .errmsg
= true,
1169 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1170 { .name
= "epoll_ctl", .errmsg
= true, STRARRAY(1, op
, epoll_ctl_ops
), },
1171 { .name
= "eventfd2", .errmsg
= true,
1172 .arg_scnprintf
= { [1] = SCA_EFD_FLAGS
, /* flags */ }, },
1173 { .name
= "faccessat", .errmsg
= true,
1174 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1175 [1] = SCA_FILENAME
, /* filename */ }, },
1176 { .name
= "fadvise64", .errmsg
= true,
1177 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1178 { .name
= "fallocate", .errmsg
= true,
1179 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1180 { .name
= "fchdir", .errmsg
= true,
1181 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1182 { .name
= "fchmod", .errmsg
= true,
1183 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1184 { .name
= "fchmodat", .errmsg
= true,
1185 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1186 [1] = SCA_FILENAME
, /* filename */ }, },
1187 { .name
= "fchown", .errmsg
= true,
1188 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1189 { .name
= "fchownat", .errmsg
= true,
1190 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1191 [1] = SCA_FILENAME
, /* filename */ }, },
1192 { .name
= "fcntl", .errmsg
= true,
1193 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1194 [1] = SCA_STRARRAY
, /* cmd */ },
1195 .arg_parm
= { [1] = &strarray__fcntl_cmds
, /* cmd */ }, },
1196 { .name
= "fdatasync", .errmsg
= true,
1197 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1198 { .name
= "flock", .errmsg
= true,
1199 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1200 [1] = SCA_FLOCK
, /* cmd */ }, },
1201 { .name
= "fsetxattr", .errmsg
= true,
1202 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1203 { .name
= "fstat", .errmsg
= true, .alias
= "newfstat",
1204 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1205 { .name
= "fstatat", .errmsg
= true, .alias
= "newfstatat",
1206 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1207 [1] = SCA_FILENAME
, /* filename */ }, },
1208 { .name
= "fstatfs", .errmsg
= true,
1209 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1210 { .name
= "fsync", .errmsg
= true,
1211 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1212 { .name
= "ftruncate", .errmsg
= true,
1213 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1214 { .name
= "futex", .errmsg
= true,
1215 .arg_scnprintf
= { [1] = SCA_FUTEX_OP
, /* op */ }, },
1216 { .name
= "futimesat", .errmsg
= true,
1217 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1218 [1] = SCA_FILENAME
, /* filename */ }, },
1219 { .name
= "getdents", .errmsg
= true,
1220 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1221 { .name
= "getdents64", .errmsg
= true,
1222 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1223 { .name
= "getitimer", .errmsg
= true, STRARRAY(0, which
, itimers
), },
1224 { .name
= "getpid", .errpid
= true, },
1225 { .name
= "getpgid", .errpid
= true, },
1226 { .name
= "getppid", .errpid
= true, },
1227 { .name
= "getrandom", .errmsg
= true,
1228 .arg_scnprintf
= { [2] = SCA_GETRANDOM_FLAGS
, /* flags */ }, },
1229 { .name
= "getrlimit", .errmsg
= true, STRARRAY(0, resource
, rlimit_resources
), },
1230 { .name
= "getxattr", .errmsg
= true,
1231 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1232 { .name
= "inotify_add_watch", .errmsg
= true,
1233 .arg_scnprintf
= { [1] = SCA_FILENAME
, /* pathname */ }, },
1234 { .name
= "ioctl", .errmsg
= true,
1235 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1236 #if defined(__i386__) || defined(__x86_64__)
1238 * FIXME: Make this available to all arches.
1240 [1] = SCA_STRHEXARRAY
, /* cmd */
1241 [2] = SCA_HEX
, /* arg */ },
1242 .arg_parm
= { [1] = &strarray__tioctls
, /* cmd */ }, },
1244 [2] = SCA_HEX
, /* arg */ }, },
1246 { .name
= "keyctl", .errmsg
= true, STRARRAY(0, option
, keyctl_options
), },
1247 { .name
= "kill", .errmsg
= true,
1248 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1249 { .name
= "lchown", .errmsg
= true,
1250 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1251 { .name
= "lgetxattr", .errmsg
= true,
1252 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1253 { .name
= "linkat", .errmsg
= true,
1254 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */ }, },
1255 { .name
= "listxattr", .errmsg
= true,
1256 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1257 { .name
= "llistxattr", .errmsg
= true,
1258 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1259 { .name
= "lremovexattr", .errmsg
= true,
1260 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1261 { .name
= "lseek", .errmsg
= true,
1262 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1263 [2] = SCA_STRARRAY
, /* whence */ },
1264 .arg_parm
= { [2] = &strarray__whences
, /* whence */ }, },
1265 { .name
= "lsetxattr", .errmsg
= true,
1266 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1267 { .name
= "lstat", .errmsg
= true, .alias
= "newlstat",
1268 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1269 { .name
= "lsxattr", .errmsg
= true,
1270 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1271 { .name
= "madvise", .errmsg
= true,
1272 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
1273 [2] = SCA_MADV_BHV
, /* behavior */ }, },
1274 { .name
= "mkdir", .errmsg
= true,
1275 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1276 { .name
= "mkdirat", .errmsg
= true,
1277 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1278 [1] = SCA_FILENAME
, /* pathname */ }, },
1279 { .name
= "mknod", .errmsg
= true,
1280 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1281 { .name
= "mknodat", .errmsg
= true,
1282 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1283 [1] = SCA_FILENAME
, /* filename */ }, },
1284 { .name
= "mlock", .errmsg
= true,
1285 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1286 { .name
= "mlockall", .errmsg
= true,
1287 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1288 { .name
= "mmap", .hexret
= true,
1289 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
1290 [2] = SCA_MMAP_PROT
, /* prot */
1291 [3] = SCA_MMAP_FLAGS
, /* flags */
1292 [4] = SCA_FD
, /* fd */ }, },
1293 { .name
= "mprotect", .errmsg
= true,
1294 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
1295 [2] = SCA_MMAP_PROT
, /* prot */ }, },
1296 { .name
= "mq_unlink", .errmsg
= true,
1297 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* u_name */ }, },
1298 { .name
= "mremap", .hexret
= true,
1299 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
1300 [3] = SCA_MREMAP_FLAGS
, /* flags */
1301 [4] = SCA_HEX
, /* new_addr */ }, },
1302 { .name
= "munlock", .errmsg
= true,
1303 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1304 { .name
= "munmap", .errmsg
= true,
1305 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1306 { .name
= "name_to_handle_at", .errmsg
= true,
1307 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1308 { .name
= "newfstatat", .errmsg
= true,
1309 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1310 [1] = SCA_FILENAME
, /* filename */ }, },
1311 { .name
= "open", .errmsg
= true,
1312 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */
1313 [1] = SCA_OPEN_FLAGS
, /* flags */ }, },
1314 { .name
= "open_by_handle_at", .errmsg
= true,
1315 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1316 [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
1317 { .name
= "openat", .errmsg
= true,
1318 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1319 [1] = SCA_FILENAME
, /* filename */
1320 [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
1321 { .name
= "perf_event_open", .errmsg
= true,
1322 .arg_scnprintf
= { [1] = SCA_INT
, /* pid */
1323 [2] = SCA_INT
, /* cpu */
1324 [3] = SCA_FD
, /* group_fd */
1325 [4] = SCA_PERF_FLAGS
, /* flags */ }, },
1326 { .name
= "pipe2", .errmsg
= true,
1327 .arg_scnprintf
= { [1] = SCA_PIPE_FLAGS
, /* flags */ }, },
1328 { .name
= "poll", .errmsg
= true, .timeout
= true, },
1329 { .name
= "ppoll", .errmsg
= true, .timeout
= true, },
1330 { .name
= "pread", .errmsg
= true, .alias
= "pread64",
1331 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1332 { .name
= "preadv", .errmsg
= true, .alias
= "pread",
1333 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1334 { .name
= "prlimit64", .errmsg
= true, STRARRAY(1, resource
, rlimit_resources
), },
1335 { .name
= "pwrite", .errmsg
= true, .alias
= "pwrite64",
1336 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1337 { .name
= "pwritev", .errmsg
= true,
1338 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1339 { .name
= "read", .errmsg
= true,
1340 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1341 { .name
= "readlink", .errmsg
= true,
1342 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* path */ }, },
1343 { .name
= "readlinkat", .errmsg
= true,
1344 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1345 [1] = SCA_FILENAME
, /* pathname */ }, },
1346 { .name
= "readv", .errmsg
= true,
1347 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1348 { .name
= "recvfrom", .errmsg
= true,
1349 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1350 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1351 { .name
= "recvmmsg", .errmsg
= true,
1352 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1353 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1354 { .name
= "recvmsg", .errmsg
= true,
1355 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1356 [2] = SCA_MSG_FLAGS
, /* flags */ }, },
1357 { .name
= "removexattr", .errmsg
= true,
1358 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1359 { .name
= "renameat", .errmsg
= true,
1360 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1361 { .name
= "rmdir", .errmsg
= true,
1362 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1363 { .name
= "rt_sigaction", .errmsg
= true,
1364 .arg_scnprintf
= { [0] = SCA_SIGNUM
, /* sig */ }, },
1365 { .name
= "rt_sigprocmask", .errmsg
= true, STRARRAY(0, how
, sighow
), },
1366 { .name
= "rt_sigqueueinfo", .errmsg
= true,
1367 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1368 { .name
= "rt_tgsigqueueinfo", .errmsg
= true,
1369 .arg_scnprintf
= { [2] = SCA_SIGNUM
, /* sig */ }, },
1370 { .name
= "sched_setscheduler", .errmsg
= true,
1371 .arg_scnprintf
= { [1] = SCA_SCHED_POLICY
, /* policy */ }, },
1372 { .name
= "seccomp", .errmsg
= true,
1373 .arg_scnprintf
= { [0] = SCA_SECCOMP_OP
, /* op */
1374 [1] = SCA_SECCOMP_FLAGS
, /* flags */ }, },
1375 { .name
= "select", .errmsg
= true, .timeout
= true, },
1376 { .name
= "sendmmsg", .errmsg
= true,
1377 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1378 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1379 { .name
= "sendmsg", .errmsg
= true,
1380 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1381 [2] = SCA_MSG_FLAGS
, /* flags */ }, },
1382 { .name
= "sendto", .errmsg
= true,
1383 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1384 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1385 { .name
= "set_tid_address", .errpid
= true, },
1386 { .name
= "setitimer", .errmsg
= true, STRARRAY(0, which
, itimers
), },
1387 { .name
= "setpgid", .errmsg
= true, },
1388 { .name
= "setrlimit", .errmsg
= true, STRARRAY(0, resource
, rlimit_resources
), },
1389 { .name
= "setxattr", .errmsg
= true,
1390 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1391 { .name
= "shutdown", .errmsg
= true,
1392 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1393 { .name
= "socket", .errmsg
= true,
1394 .arg_scnprintf
= { [0] = SCA_STRARRAY
, /* family */
1395 [1] = SCA_SK_TYPE
, /* type */ },
1396 .arg_parm
= { [0] = &strarray__socket_families
, /* family */ }, },
1397 { .name
= "socketpair", .errmsg
= true,
1398 .arg_scnprintf
= { [0] = SCA_STRARRAY
, /* family */
1399 [1] = SCA_SK_TYPE
, /* type */ },
1400 .arg_parm
= { [0] = &strarray__socket_families
, /* family */ }, },
1401 { .name
= "stat", .errmsg
= true, .alias
= "newstat",
1402 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1403 { .name
= "statfs", .errmsg
= true,
1404 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1405 { .name
= "swapoff", .errmsg
= true,
1406 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* specialfile */ }, },
1407 { .name
= "swapon", .errmsg
= true,
1408 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* specialfile */ }, },
1409 { .name
= "symlinkat", .errmsg
= true,
1410 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1411 { .name
= "tgkill", .errmsg
= true,
1412 .arg_scnprintf
= { [2] = SCA_SIGNUM
, /* sig */ }, },
1413 { .name
= "tkill", .errmsg
= true,
1414 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1415 { .name
= "truncate", .errmsg
= true,
1416 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* path */ }, },
1417 { .name
= "uname", .errmsg
= true, .alias
= "newuname", },
1418 { .name
= "unlinkat", .errmsg
= true,
1419 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1420 [1] = SCA_FILENAME
, /* pathname */ }, },
1421 { .name
= "utime", .errmsg
= true,
1422 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1423 { .name
= "utimensat", .errmsg
= true,
1424 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dirfd */
1425 [1] = SCA_FILENAME
, /* filename */ }, },
1426 { .name
= "utimes", .errmsg
= true,
1427 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1428 { .name
= "vmsplice", .errmsg
= true,
1429 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1430 { .name
= "wait4", .errpid
= true,
1431 .arg_scnprintf
= { [2] = SCA_WAITID_OPTIONS
, /* options */ }, },
1432 { .name
= "waitid", .errpid
= true,
1433 .arg_scnprintf
= { [3] = SCA_WAITID_OPTIONS
, /* options */ }, },
1434 { .name
= "write", .errmsg
= true,
1435 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1436 { .name
= "writev", .errmsg
= true,
1437 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1440 static int syscall_fmt__cmp(const void *name
, const void *fmtp
)
1442 const struct syscall_fmt
*fmt
= fmtp
;
1443 return strcmp(name
, fmt
->name
);
1446 static struct syscall_fmt
*syscall_fmt__find(const char *name
)
1448 const int nmemb
= ARRAY_SIZE(syscall_fmts
);
1449 return bsearch(name
, syscall_fmts
, nmemb
, sizeof(struct syscall_fmt
), syscall_fmt__cmp
);
1453 struct event_format
*tp_format
;
1455 struct format_field
*args
;
1458 struct syscall_fmt
*fmt
;
1459 size_t (**arg_scnprintf
)(char *bf
, size_t size
, struct syscall_arg
*arg
);
1463 static size_t fprintf_duration(unsigned long t
, FILE *fp
)
1465 double duration
= (double)t
/ NSEC_PER_MSEC
;
1466 size_t printed
= fprintf(fp
, "(");
1468 if (duration
>= 1.0)
1469 printed
+= color_fprintf(fp
, PERF_COLOR_RED
, "%6.3f ms", duration
);
1470 else if (duration
>= 0.01)
1471 printed
+= color_fprintf(fp
, PERF_COLOR_YELLOW
, "%6.3f ms", duration
);
1473 printed
+= color_fprintf(fp
, PERF_COLOR_NORMAL
, "%6.3f ms", duration
);
1474 return printed
+ fprintf(fp
, "): ");
1478 * filename.ptr: The filename char pointer that will be vfs_getname'd
1479 * filename.entry_str_pos: Where to insert the string translated from
1480 * filename.ptr by the vfs_getname tracepoint/kprobe.
1482 struct thread_trace
{
1486 unsigned long nr_events
;
1487 unsigned long pfmaj
, pfmin
;
1492 short int entry_str_pos
;
1494 unsigned int namelen
;
1502 struct intlist
*syscall_stats
;
1505 static struct thread_trace
*thread_trace__new(void)
1507 struct thread_trace
*ttrace
= zalloc(sizeof(struct thread_trace
));
1510 ttrace
->paths
.max
= -1;
1512 ttrace
->syscall_stats
= intlist__new(NULL
);
1517 static struct thread_trace
*thread__trace(struct thread
*thread
, FILE *fp
)
1519 struct thread_trace
*ttrace
;
1524 if (thread__priv(thread
) == NULL
)
1525 thread__set_priv(thread
, thread_trace__new());
1527 if (thread__priv(thread
) == NULL
)
1530 ttrace
= thread__priv(thread
);
1531 ++ttrace
->nr_events
;
1535 color_fprintf(fp
, PERF_COLOR_RED
,
1536 "WARNING: not enough memory, dropping samples!\n");
1540 #define TRACE_PFMAJ (1 << 0)
1541 #define TRACE_PFMIN (1 << 1)
1543 static const size_t trace__entry_str_size
= 2048;
1545 static int trace__set_fd_pathname(struct thread
*thread
, int fd
, const char *pathname
)
1547 struct thread_trace
*ttrace
= thread__priv(thread
);
1549 if (fd
> ttrace
->paths
.max
) {
1550 char **npath
= realloc(ttrace
->paths
.table
, (fd
+ 1) * sizeof(char *));
1555 if (ttrace
->paths
.max
!= -1) {
1556 memset(npath
+ ttrace
->paths
.max
+ 1, 0,
1557 (fd
- ttrace
->paths
.max
) * sizeof(char *));
1559 memset(npath
, 0, (fd
+ 1) * sizeof(char *));
1562 ttrace
->paths
.table
= npath
;
1563 ttrace
->paths
.max
= fd
;
1566 ttrace
->paths
.table
[fd
] = strdup(pathname
);
1568 return ttrace
->paths
.table
[fd
] != NULL
? 0 : -1;
1571 static int thread__read_fd_path(struct thread
*thread
, int fd
)
1573 char linkname
[PATH_MAX
], pathname
[PATH_MAX
];
1577 if (thread
->pid_
== thread
->tid
) {
1578 scnprintf(linkname
, sizeof(linkname
),
1579 "/proc/%d/fd/%d", thread
->pid_
, fd
);
1581 scnprintf(linkname
, sizeof(linkname
),
1582 "/proc/%d/task/%d/fd/%d", thread
->pid_
, thread
->tid
, fd
);
1585 if (lstat(linkname
, &st
) < 0 || st
.st_size
+ 1 > (off_t
)sizeof(pathname
))
1588 ret
= readlink(linkname
, pathname
, sizeof(pathname
));
1590 if (ret
< 0 || ret
> st
.st_size
)
1593 pathname
[ret
] = '\0';
1594 return trace__set_fd_pathname(thread
, fd
, pathname
);
1597 static const char *thread__fd_path(struct thread
*thread
, int fd
,
1598 struct trace
*trace
)
1600 struct thread_trace
*ttrace
= thread__priv(thread
);
1608 if ((fd
> ttrace
->paths
.max
|| ttrace
->paths
.table
[fd
] == NULL
)) {
1611 ++trace
->stats
.proc_getname
;
1612 if (thread__read_fd_path(thread
, fd
))
1616 return ttrace
->paths
.table
[fd
];
1619 static size_t syscall_arg__scnprintf_fd(char *bf
, size_t size
,
1620 struct syscall_arg
*arg
)
1623 size_t printed
= scnprintf(bf
, size
, "%d", fd
);
1624 const char *path
= thread__fd_path(arg
->thread
, fd
, arg
->trace
);
1627 printed
+= scnprintf(bf
+ printed
, size
- printed
, "<%s>", path
);
1632 static size_t syscall_arg__scnprintf_close_fd(char *bf
, size_t size
,
1633 struct syscall_arg
*arg
)
1636 size_t printed
= syscall_arg__scnprintf_fd(bf
, size
, arg
);
1637 struct thread_trace
*ttrace
= thread__priv(arg
->thread
);
1639 if (ttrace
&& fd
>= 0 && fd
<= ttrace
->paths
.max
)
1640 zfree(&ttrace
->paths
.table
[fd
]);
1645 static void thread__set_filename_pos(struct thread
*thread
, const char *bf
,
1648 struct thread_trace
*ttrace
= thread__priv(thread
);
1650 ttrace
->filename
.ptr
= ptr
;
1651 ttrace
->filename
.entry_str_pos
= bf
- ttrace
->entry_str
;
1654 static size_t syscall_arg__scnprintf_filename(char *bf
, size_t size
,
1655 struct syscall_arg
*arg
)
1657 unsigned long ptr
= arg
->val
;
1659 if (!arg
->trace
->vfs_getname
)
1660 return scnprintf(bf
, size
, "%#x", ptr
);
1662 thread__set_filename_pos(arg
->thread
, bf
, ptr
);
1666 static bool trace__filter_duration(struct trace
*trace
, double t
)
1668 return t
< (trace
->duration_filter
* NSEC_PER_MSEC
);
1671 static size_t trace__fprintf_tstamp(struct trace
*trace
, u64 tstamp
, FILE *fp
)
1673 double ts
= (double)(tstamp
- trace
->base_time
) / NSEC_PER_MSEC
;
1675 return fprintf(fp
, "%10.3f ", ts
);
1678 static bool done
= false;
1679 static bool interrupted
= false;
1681 static void sig_handler(int sig
)
1684 interrupted
= sig
== SIGINT
;
1687 static size_t trace__fprintf_entry_head(struct trace
*trace
, struct thread
*thread
,
1688 u64 duration
, u64 tstamp
, FILE *fp
)
1690 size_t printed
= trace__fprintf_tstamp(trace
, tstamp
, fp
);
1691 printed
+= fprintf_duration(duration
, fp
);
1693 if (trace
->multiple_threads
) {
1694 if (trace
->show_comm
)
1695 printed
+= fprintf(fp
, "%.14s/", thread__comm_str(thread
));
1696 printed
+= fprintf(fp
, "%d ", thread
->tid
);
1702 static int trace__process_event(struct trace
*trace
, struct machine
*machine
,
1703 union perf_event
*event
, struct perf_sample
*sample
)
1707 switch (event
->header
.type
) {
1708 case PERF_RECORD_LOST
:
1709 color_fprintf(trace
->output
, PERF_COLOR_RED
,
1710 "LOST %" PRIu64
" events!\n", event
->lost
.lost
);
1711 ret
= machine__process_lost_event(machine
, event
, sample
);
1714 ret
= machine__process_event(machine
, event
, sample
);
1721 static int trace__tool_process(struct perf_tool
*tool
,
1722 union perf_event
*event
,
1723 struct perf_sample
*sample
,
1724 struct machine
*machine
)
1726 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
1727 return trace__process_event(trace
, machine
, event
, sample
);
1730 static int trace__symbols_init(struct trace
*trace
, struct perf_evlist
*evlist
)
1732 int err
= symbol__init(NULL
);
1737 trace
->host
= machine__new_host();
1738 if (trace
->host
== NULL
)
1741 if (trace_event__register_resolver(trace
->host
, machine__resolve_kernel_addr
) < 0)
1744 err
= __machine__synthesize_threads(trace
->host
, &trace
->tool
, &trace
->opts
.target
,
1745 evlist
->threads
, trace__tool_process
, false,
1746 trace
->opts
.proc_map_timeout
);
1753 static int syscall__set_arg_fmts(struct syscall
*sc
)
1755 struct format_field
*field
;
1758 sc
->arg_scnprintf
= calloc(sc
->nr_args
, sizeof(void *));
1759 if (sc
->arg_scnprintf
== NULL
)
1763 sc
->arg_parm
= sc
->fmt
->arg_parm
;
1765 for (field
= sc
->args
; field
; field
= field
->next
) {
1766 if (sc
->fmt
&& sc
->fmt
->arg_scnprintf
[idx
])
1767 sc
->arg_scnprintf
[idx
] = sc
->fmt
->arg_scnprintf
[idx
];
1768 else if (field
->flags
& FIELD_IS_POINTER
)
1769 sc
->arg_scnprintf
[idx
] = syscall_arg__scnprintf_hex
;
1770 else if (strcmp(field
->type
, "pid_t") == 0)
1771 sc
->arg_scnprintf
[idx
] = SCA_PID
;
1772 else if (strcmp(field
->type
, "umode_t") == 0)
1773 sc
->arg_scnprintf
[idx
] = SCA_MODE_T
;
1780 static int trace__read_syscall_info(struct trace
*trace
, int id
)
1784 const char *name
= syscalltbl__name(trace
->sctbl
, id
);
1789 if (id
> trace
->syscalls
.max
) {
1790 struct syscall
*nsyscalls
= realloc(trace
->syscalls
.table
, (id
+ 1) * sizeof(*sc
));
1792 if (nsyscalls
== NULL
)
1795 if (trace
->syscalls
.max
!= -1) {
1796 memset(nsyscalls
+ trace
->syscalls
.max
+ 1, 0,
1797 (id
- trace
->syscalls
.max
) * sizeof(*sc
));
1799 memset(nsyscalls
, 0, (id
+ 1) * sizeof(*sc
));
1802 trace
->syscalls
.table
= nsyscalls
;
1803 trace
->syscalls
.max
= id
;
1806 sc
= trace
->syscalls
.table
+ id
;
1809 sc
->fmt
= syscall_fmt__find(sc
->name
);
1811 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->name
);
1812 sc
->tp_format
= trace_event__tp_format("syscalls", tp_name
);
1814 if (IS_ERR(sc
->tp_format
) && sc
->fmt
&& sc
->fmt
->alias
) {
1815 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->fmt
->alias
);
1816 sc
->tp_format
= trace_event__tp_format("syscalls", tp_name
);
1819 if (IS_ERR(sc
->tp_format
))
1822 sc
->args
= sc
->tp_format
->format
.fields
;
1823 sc
->nr_args
= sc
->tp_format
->format
.nr_fields
;
1825 * We need to check and discard the first variable '__syscall_nr'
1826 * or 'nr' that mean the syscall number. It is needless here.
1827 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1829 if (sc
->args
&& (!strcmp(sc
->args
->name
, "__syscall_nr") || !strcmp(sc
->args
->name
, "nr"))) {
1830 sc
->args
= sc
->args
->next
;
1834 sc
->is_exit
= !strcmp(name
, "exit_group") || !strcmp(name
, "exit");
1836 return syscall__set_arg_fmts(sc
);
1839 static int trace__validate_ev_qualifier(struct trace
*trace
)
1842 struct str_node
*pos
;
1844 trace
->ev_qualifier_ids
.nr
= strlist__nr_entries(trace
->ev_qualifier
);
1845 trace
->ev_qualifier_ids
.entries
= malloc(trace
->ev_qualifier_ids
.nr
*
1846 sizeof(trace
->ev_qualifier_ids
.entries
[0]));
1848 if (trace
->ev_qualifier_ids
.entries
== NULL
) {
1849 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1857 strlist__for_each(pos
, trace
->ev_qualifier
) {
1858 const char *sc
= pos
->s
;
1859 int id
= syscalltbl__id(trace
->sctbl
, sc
);
1863 fputs("Error:\tInvalid syscall ", trace
->output
);
1866 fputs(", ", trace
->output
);
1869 fputs(sc
, trace
->output
);
1872 trace
->ev_qualifier_ids
.entries
[i
++] = id
;
1876 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1877 "\nHint:\tand: 'man syscalls'\n", trace
->output
);
1878 zfree(&trace
->ev_qualifier_ids
.entries
);
1879 trace
->ev_qualifier_ids
.nr
= 0;
1886 * args is to be interpreted as a series of longs but we need to handle
1887 * 8-byte unaligned accesses. args points to raw_data within the event
1888 * and raw_data is guaranteed to be 8-byte unaligned because it is
1889 * preceded by raw_size which is a u32. So we need to copy args to a temp
1890 * variable to read it. Most notably this avoids extended load instructions
1891 * on unaligned addresses
1894 static size_t syscall__scnprintf_args(struct syscall
*sc
, char *bf
, size_t size
,
1895 unsigned char *args
, struct trace
*trace
,
1896 struct thread
*thread
)
1902 if (sc
->args
!= NULL
) {
1903 struct format_field
*field
;
1905 struct syscall_arg arg
= {
1912 for (field
= sc
->args
; field
;
1913 field
= field
->next
, ++arg
.idx
, bit
<<= 1) {
1917 /* special care for unaligned accesses */
1918 p
= args
+ sizeof(unsigned long) * arg
.idx
;
1919 memcpy(&val
, p
, sizeof(val
));
1922 * Suppress this argument if its value is zero and
1923 * and we don't have a string associated in an
1927 !(sc
->arg_scnprintf
&&
1928 sc
->arg_scnprintf
[arg
.idx
] == SCA_STRARRAY
&&
1929 sc
->arg_parm
[arg
.idx
]))
1932 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1933 "%s%s: ", printed
? ", " : "", field
->name
);
1934 if (sc
->arg_scnprintf
&& sc
->arg_scnprintf
[arg
.idx
]) {
1937 arg
.parm
= sc
->arg_parm
[arg
.idx
];
1938 printed
+= sc
->arg_scnprintf
[arg
.idx
](bf
+ printed
,
1939 size
- printed
, &arg
);
1941 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1949 /* special care for unaligned accesses */
1950 p
= args
+ sizeof(unsigned long) * i
;
1951 memcpy(&val
, p
, sizeof(val
));
1952 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1954 printed
? ", " : "", i
, val
);
1962 typedef int (*tracepoint_handler
)(struct trace
*trace
, struct perf_evsel
*evsel
,
1963 union perf_event
*event
,
1964 struct perf_sample
*sample
);
1966 static struct syscall
*trace__syscall_info(struct trace
*trace
,
1967 struct perf_evsel
*evsel
, int id
)
1973 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1974 * before that, leaving at a higher verbosity level till that is
1975 * explained. Reproduced with plain ftrace with:
1977 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1978 * grep "NR -1 " /t/trace_pipe
1980 * After generating some load on the machine.
1984 fprintf(trace
->output
, "Invalid syscall %d id, skipping (%s, %" PRIu64
") ...\n",
1985 id
, perf_evsel__name(evsel
), ++n
);
1990 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
) &&
1991 trace__read_syscall_info(trace
, id
))
1994 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
))
1997 return &trace
->syscalls
.table
[id
];
2001 fprintf(trace
->output
, "Problems reading syscall %d", id
);
2002 if (id
<= trace
->syscalls
.max
&& trace
->syscalls
.table
[id
].name
!= NULL
)
2003 fprintf(trace
->output
, "(%s)", trace
->syscalls
.table
[id
].name
);
2004 fputs(" information\n", trace
->output
);
2009 static void thread__update_stats(struct thread_trace
*ttrace
,
2010 int id
, struct perf_sample
*sample
)
2012 struct int_node
*inode
;
2013 struct stats
*stats
;
2016 inode
= intlist__findnew(ttrace
->syscall_stats
, id
);
2020 stats
= inode
->priv
;
2021 if (stats
== NULL
) {
2022 stats
= malloc(sizeof(struct stats
));
2026 inode
->priv
= stats
;
2029 if (ttrace
->entry_time
&& sample
->time
> ttrace
->entry_time
)
2030 duration
= sample
->time
- ttrace
->entry_time
;
2032 update_stats(stats
, duration
);
2035 static int trace__printf_interrupted_entry(struct trace
*trace
, struct perf_sample
*sample
)
2037 struct thread_trace
*ttrace
;
2041 if (trace
->current
== NULL
)
2044 ttrace
= thread__priv(trace
->current
);
2046 if (!ttrace
->entry_pending
)
2049 duration
= sample
->time
- ttrace
->entry_time
;
2051 printed
= trace__fprintf_entry_head(trace
, trace
->current
, duration
, sample
->time
, trace
->output
);
2052 printed
+= fprintf(trace
->output
, "%-70s) ...\n", ttrace
->entry_str
);
2053 ttrace
->entry_pending
= false;
2058 static int trace__sys_enter(struct trace
*trace
, struct perf_evsel
*evsel
,
2059 union perf_event
*event __maybe_unused
,
2060 struct perf_sample
*sample
)
2065 struct thread
*thread
;
2066 int id
= perf_evsel__sc_tp_uint(evsel
, id
, sample
), err
= -1;
2067 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, id
);
2068 struct thread_trace
*ttrace
;
2073 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
2074 ttrace
= thread__trace(thread
, trace
->output
);
2078 args
= perf_evsel__sc_tp_ptr(evsel
, args
, sample
);
2080 if (ttrace
->entry_str
== NULL
) {
2081 ttrace
->entry_str
= malloc(trace__entry_str_size
);
2082 if (!ttrace
->entry_str
)
2086 if (!trace
->summary_only
)
2087 trace__printf_interrupted_entry(trace
, sample
);
2089 ttrace
->entry_time
= sample
->time
;
2090 msg
= ttrace
->entry_str
;
2091 printed
+= scnprintf(msg
+ printed
, trace__entry_str_size
- printed
, "%s(", sc
->name
);
2093 printed
+= syscall__scnprintf_args(sc
, msg
+ printed
, trace__entry_str_size
- printed
,
2094 args
, trace
, thread
);
2097 if (!trace
->duration_filter
&& !trace
->summary_only
) {
2098 trace__fprintf_entry_head(trace
, thread
, 1, sample
->time
, trace
->output
);
2099 fprintf(trace
->output
, "%-70s\n", ttrace
->entry_str
);
2102 ttrace
->entry_pending
= true;
2103 /* See trace__vfs_getname & trace__sys_exit */
2104 ttrace
->filename
.pending_open
= false;
2107 if (trace
->current
!= thread
) {
2108 thread__put(trace
->current
);
2109 trace
->current
= thread__get(thread
);
2113 thread__put(thread
);
2117 static int trace__fprintf_callchain(struct trace
*trace
, struct perf_evsel
*evsel
,
2118 struct perf_sample
*sample
)
2120 struct addr_location al
;
2121 /* TODO: user-configurable print_opts */
2122 const unsigned int print_opts
= EVSEL__PRINT_SYM
|
2124 EVSEL__PRINT_UNKNOWN_AS_ADDR
;
2126 if (sample
->callchain
== NULL
)
2129 if (machine__resolve(trace
->host
, &al
, sample
) < 0) {
2130 pr_err("Problem processing %s callchain, skipping...\n",
2131 perf_evsel__name(evsel
));
2135 return perf_evsel__fprintf_callchain(evsel
, sample
, &al
, 38, print_opts
,
2136 scripting_max_stack
, trace
->output
);
2139 static int trace__sys_exit(struct trace
*trace
, struct perf_evsel
*evsel
,
2140 union perf_event
*event __maybe_unused
,
2141 struct perf_sample
*sample
)
2145 struct thread
*thread
;
2146 int id
= perf_evsel__sc_tp_uint(evsel
, id
, sample
), err
= -1;
2147 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, id
);
2148 struct thread_trace
*ttrace
;
2153 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
2154 ttrace
= thread__trace(thread
, trace
->output
);
2159 thread__update_stats(ttrace
, id
, sample
);
2161 ret
= perf_evsel__sc_tp_uint(evsel
, ret
, sample
);
2163 if (id
== trace
->open_id
&& ret
>= 0 && ttrace
->filename
.pending_open
) {
2164 trace__set_fd_pathname(thread
, ret
, ttrace
->filename
.name
);
2165 ttrace
->filename
.pending_open
= false;
2166 ++trace
->stats
.vfs_getname
;
2169 ttrace
->exit_time
= sample
->time
;
2171 if (ttrace
->entry_time
) {
2172 duration
= sample
->time
- ttrace
->entry_time
;
2173 if (trace__filter_duration(trace
, duration
))
2175 } else if (trace
->duration_filter
)
2178 if (trace
->summary_only
)
2181 trace__fprintf_entry_head(trace
, thread
, duration
, sample
->time
, trace
->output
);
2183 if (ttrace
->entry_pending
) {
2184 fprintf(trace
->output
, "%-70s", ttrace
->entry_str
);
2186 fprintf(trace
->output
, " ... [");
2187 color_fprintf(trace
->output
, PERF_COLOR_YELLOW
, "continued");
2188 fprintf(trace
->output
, "]: %s()", sc
->name
);
2191 if (sc
->fmt
== NULL
) {
2193 fprintf(trace
->output
, ") = %ld", ret
);
2194 } else if (ret
< 0 && (sc
->fmt
->errmsg
|| sc
->fmt
->errpid
)) {
2195 char bf
[STRERR_BUFSIZE
];
2196 const char *emsg
= strerror_r(-ret
, bf
, sizeof(bf
)),
2197 *e
= audit_errno_to_name(-ret
);
2199 fprintf(trace
->output
, ") = -1 %s %s", e
, emsg
);
2200 } else if (ret
== 0 && sc
->fmt
->timeout
)
2201 fprintf(trace
->output
, ") = 0 Timeout");
2202 else if (sc
->fmt
->hexret
)
2203 fprintf(trace
->output
, ") = %#lx", ret
);
2204 else if (sc
->fmt
->errpid
) {
2205 struct thread
*child
= machine__find_thread(trace
->host
, ret
, ret
);
2207 if (child
!= NULL
) {
2208 fprintf(trace
->output
, ") = %ld", ret
);
2209 if (child
->comm_set
)
2210 fprintf(trace
->output
, " (%s)", thread__comm_str(child
));
2216 fputc('\n', trace
->output
);
2218 trace__fprintf_callchain(trace
, evsel
, sample
);
2220 ttrace
->entry_pending
= false;
2223 thread__put(thread
);
2227 static int trace__vfs_getname(struct trace
*trace
, struct perf_evsel
*evsel
,
2228 union perf_event
*event __maybe_unused
,
2229 struct perf_sample
*sample
)
2231 struct thread
*thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
2232 struct thread_trace
*ttrace
;
2233 size_t filename_len
, entry_str_len
, to_move
;
2234 ssize_t remaining_space
;
2236 const char *filename
= perf_evsel__rawptr(evsel
, sample
, "pathname");
2241 ttrace
= thread__priv(thread
);
2245 filename_len
= strlen(filename
);
2247 if (ttrace
->filename
.namelen
< filename_len
) {
2248 char *f
= realloc(ttrace
->filename
.name
, filename_len
+ 1);
2253 ttrace
->filename
.namelen
= filename_len
;
2254 ttrace
->filename
.name
= f
;
2257 strcpy(ttrace
->filename
.name
, filename
);
2258 ttrace
->filename
.pending_open
= true;
2260 if (!ttrace
->filename
.ptr
)
2263 entry_str_len
= strlen(ttrace
->entry_str
);
2264 remaining_space
= trace__entry_str_size
- entry_str_len
- 1; /* \0 */
2265 if (remaining_space
<= 0)
2268 if (filename_len
> (size_t)remaining_space
) {
2269 filename
+= filename_len
- remaining_space
;
2270 filename_len
= remaining_space
;
2273 to_move
= entry_str_len
- ttrace
->filename
.entry_str_pos
+ 1; /* \0 */
2274 pos
= ttrace
->entry_str
+ ttrace
->filename
.entry_str_pos
;
2275 memmove(pos
+ filename_len
, pos
, to_move
);
2276 memcpy(pos
, filename
, filename_len
);
2278 ttrace
->filename
.ptr
= 0;
2279 ttrace
->filename
.entry_str_pos
= 0;
2284 static int trace__sched_stat_runtime(struct trace
*trace
, struct perf_evsel
*evsel
,
2285 union perf_event
*event __maybe_unused
,
2286 struct perf_sample
*sample
)
2288 u64 runtime
= perf_evsel__intval(evsel
, sample
, "runtime");
2289 double runtime_ms
= (double)runtime
/ NSEC_PER_MSEC
;
2290 struct thread
*thread
= machine__findnew_thread(trace
->host
,
2293 struct thread_trace
*ttrace
= thread__trace(thread
, trace
->output
);
2298 ttrace
->runtime_ms
+= runtime_ms
;
2299 trace
->runtime_ms
+= runtime_ms
;
2300 thread__put(thread
);
2304 fprintf(trace
->output
, "%s: comm=%s,pid=%u,runtime=%" PRIu64
",vruntime=%" PRIu64
")\n",
2306 perf_evsel__strval(evsel
, sample
, "comm"),
2307 (pid_t
)perf_evsel__intval(evsel
, sample
, "pid"),
2309 perf_evsel__intval(evsel
, sample
, "vruntime"));
2310 thread__put(thread
);
2314 static void bpf_output__printer(enum binary_printer_ops op
,
2315 unsigned int val
, void *extra
)
2317 FILE *output
= extra
;
2318 unsigned char ch
= (unsigned char)val
;
2321 case BINARY_PRINT_CHAR_DATA
:
2322 fprintf(output
, "%c", isprint(ch
) ? ch
: '.');
2324 case BINARY_PRINT_DATA_BEGIN
:
2325 case BINARY_PRINT_LINE_BEGIN
:
2326 case BINARY_PRINT_ADDR
:
2327 case BINARY_PRINT_NUM_DATA
:
2328 case BINARY_PRINT_NUM_PAD
:
2329 case BINARY_PRINT_SEP
:
2330 case BINARY_PRINT_CHAR_PAD
:
2331 case BINARY_PRINT_LINE_END
:
2332 case BINARY_PRINT_DATA_END
:
2338 static void bpf_output__fprintf(struct trace
*trace
,
2339 struct perf_sample
*sample
)
2341 print_binary(sample
->raw_data
, sample
->raw_size
, 8,
2342 bpf_output__printer
, trace
->output
);
2345 static int trace__event_handler(struct trace
*trace
, struct perf_evsel
*evsel
,
2346 union perf_event
*event __maybe_unused
,
2347 struct perf_sample
*sample
)
2349 trace__printf_interrupted_entry(trace
, sample
);
2350 trace__fprintf_tstamp(trace
, sample
->time
, trace
->output
);
2352 if (trace
->trace_syscalls
)
2353 fprintf(trace
->output
, "( ): ");
2355 fprintf(trace
->output
, "%s:", evsel
->name
);
2357 if (perf_evsel__is_bpf_output(evsel
)) {
2358 bpf_output__fprintf(trace
, sample
);
2359 } else if (evsel
->tp_format
) {
2360 event_format__fprintf(evsel
->tp_format
, sample
->cpu
,
2361 sample
->raw_data
, sample
->raw_size
,
2365 fprintf(trace
->output
, ")\n");
2367 trace__fprintf_callchain(trace
, evsel
, sample
);
2372 static void print_location(FILE *f
, struct perf_sample
*sample
,
2373 struct addr_location
*al
,
2374 bool print_dso
, bool print_sym
)
2377 if ((verbose
|| print_dso
) && al
->map
)
2378 fprintf(f
, "%s@", al
->map
->dso
->long_name
);
2380 if ((verbose
|| print_sym
) && al
->sym
)
2381 fprintf(f
, "%s+0x%" PRIx64
, al
->sym
->name
,
2382 al
->addr
- al
->sym
->start
);
2384 fprintf(f
, "0x%" PRIx64
, al
->addr
);
2386 fprintf(f
, "0x%" PRIx64
, sample
->addr
);
2389 static int trace__pgfault(struct trace
*trace
,
2390 struct perf_evsel
*evsel
,
2391 union perf_event
*event __maybe_unused
,
2392 struct perf_sample
*sample
)
2394 struct thread
*thread
;
2395 struct addr_location al
;
2396 char map_type
= 'd';
2397 struct thread_trace
*ttrace
;
2400 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
2401 ttrace
= thread__trace(thread
, trace
->output
);
2405 if (evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
)
2410 if (trace
->summary_only
)
2413 thread__find_addr_location(thread
, sample
->cpumode
, MAP__FUNCTION
,
2416 trace__fprintf_entry_head(trace
, thread
, 0, sample
->time
, trace
->output
);
2418 fprintf(trace
->output
, "%sfault [",
2419 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
?
2422 print_location(trace
->output
, sample
, &al
, false, true);
2424 fprintf(trace
->output
, "] => ");
2426 thread__find_addr_location(thread
, sample
->cpumode
, MAP__VARIABLE
,
2430 thread__find_addr_location(thread
, sample
->cpumode
,
2431 MAP__FUNCTION
, sample
->addr
, &al
);
2439 print_location(trace
->output
, sample
, &al
, true, false);
2441 fprintf(trace
->output
, " (%c%c)\n", map_type
, al
.level
);
2445 thread__put(thread
);
2449 static bool skip_sample(struct trace
*trace
, struct perf_sample
*sample
)
2451 if ((trace
->pid_list
&& intlist__find(trace
->pid_list
, sample
->pid
)) ||
2452 (trace
->tid_list
&& intlist__find(trace
->tid_list
, sample
->tid
)))
2455 if (trace
->pid_list
|| trace
->tid_list
)
2461 static void trace__set_base_time(struct trace
*trace
,
2462 struct perf_evsel
*evsel
,
2463 struct perf_sample
*sample
)
2466 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2467 * and don't use sample->time unconditionally, we may end up having
2468 * some other event in the future without PERF_SAMPLE_TIME for good
2469 * reason, i.e. we may not be interested in its timestamps, just in
2470 * it taking place, picking some piece of information when it
2471 * appears in our event stream (vfs_getname comes to mind).
2473 if (trace
->base_time
== 0 && !trace
->full_time
&&
2474 (evsel
->attr
.sample_type
& PERF_SAMPLE_TIME
))
2475 trace
->base_time
= sample
->time
;
2478 static int trace__process_sample(struct perf_tool
*tool
,
2479 union perf_event
*event
,
2480 struct perf_sample
*sample
,
2481 struct perf_evsel
*evsel
,
2482 struct machine
*machine __maybe_unused
)
2484 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
2487 tracepoint_handler handler
= evsel
->handler
;
2489 if (skip_sample(trace
, sample
))
2492 trace__set_base_time(trace
, evsel
, sample
);
2496 handler(trace
, evsel
, event
, sample
);
2502 static int parse_target_str(struct trace
*trace
)
2504 if (trace
->opts
.target
.pid
) {
2505 trace
->pid_list
= intlist__new(trace
->opts
.target
.pid
);
2506 if (trace
->pid_list
== NULL
) {
2507 pr_err("Error parsing process id string\n");
2512 if (trace
->opts
.target
.tid
) {
2513 trace
->tid_list
= intlist__new(trace
->opts
.target
.tid
);
2514 if (trace
->tid_list
== NULL
) {
2515 pr_err("Error parsing thread id string\n");
2523 static int trace__record(struct trace
*trace
, int argc
, const char **argv
)
2525 unsigned int rec_argc
, i
, j
;
2526 const char **rec_argv
;
2527 const char * const record_args
[] = {
2534 const char * const sc_args
[] = { "-e", };
2535 unsigned int sc_args_nr
= ARRAY_SIZE(sc_args
);
2536 const char * const majpf_args
[] = { "-e", "major-faults" };
2537 unsigned int majpf_args_nr
= ARRAY_SIZE(majpf_args
);
2538 const char * const minpf_args
[] = { "-e", "minor-faults" };
2539 unsigned int minpf_args_nr
= ARRAY_SIZE(minpf_args
);
2541 /* +1 is for the event string below */
2542 rec_argc
= ARRAY_SIZE(record_args
) + sc_args_nr
+ 1 +
2543 majpf_args_nr
+ minpf_args_nr
+ argc
;
2544 rec_argv
= calloc(rec_argc
+ 1, sizeof(char *));
2546 if (rec_argv
== NULL
)
2550 for (i
= 0; i
< ARRAY_SIZE(record_args
); i
++)
2551 rec_argv
[j
++] = record_args
[i
];
2553 if (trace
->trace_syscalls
) {
2554 for (i
= 0; i
< sc_args_nr
; i
++)
2555 rec_argv
[j
++] = sc_args
[i
];
2557 /* event string may be different for older kernels - e.g., RHEL6 */
2558 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2559 rec_argv
[j
++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2560 else if (is_valid_tracepoint("syscalls:sys_enter"))
2561 rec_argv
[j
++] = "syscalls:sys_enter,syscalls:sys_exit";
2563 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2568 if (trace
->trace_pgfaults
& TRACE_PFMAJ
)
2569 for (i
= 0; i
< majpf_args_nr
; i
++)
2570 rec_argv
[j
++] = majpf_args
[i
];
2572 if (trace
->trace_pgfaults
& TRACE_PFMIN
)
2573 for (i
= 0; i
< minpf_args_nr
; i
++)
2574 rec_argv
[j
++] = minpf_args
[i
];
2576 for (i
= 0; i
< (unsigned int)argc
; i
++)
2577 rec_argv
[j
++] = argv
[i
];
2579 return cmd_record(j
, rec_argv
, NULL
);
2582 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
);
2584 static bool perf_evlist__add_vfs_getname(struct perf_evlist
*evlist
)
2586 struct perf_evsel
*evsel
= perf_evsel__newtp("probe", "vfs_getname");
2591 if (perf_evsel__field(evsel
, "pathname") == NULL
) {
2592 perf_evsel__delete(evsel
);
2596 evsel
->handler
= trace__vfs_getname
;
2597 perf_evlist__add(evlist
, evsel
);
2601 static int perf_evlist__add_pgfault(struct perf_evlist
*evlist
,
2604 struct perf_evsel
*evsel
;
2605 struct perf_event_attr attr
= {
2606 .type
= PERF_TYPE_SOFTWARE
,
2610 attr
.config
= config
;
2611 attr
.sample_period
= 1;
2613 event_attr_init(&attr
);
2615 evsel
= perf_evsel__new(&attr
);
2619 evsel
->handler
= trace__pgfault
;
2620 perf_evlist__add(evlist
, evsel
);
2625 static void trace__handle_event(struct trace
*trace
, union perf_event
*event
, struct perf_sample
*sample
)
2627 const u32 type
= event
->header
.type
;
2628 struct perf_evsel
*evsel
;
2630 if (type
!= PERF_RECORD_SAMPLE
) {
2631 trace__process_event(trace
, trace
->host
, event
, sample
);
2635 evsel
= perf_evlist__id2evsel(trace
->evlist
, sample
->id
);
2636 if (evsel
== NULL
) {
2637 fprintf(trace
->output
, "Unknown tp ID %" PRIu64
", skipping...\n", sample
->id
);
2641 trace__set_base_time(trace
, evsel
, sample
);
2643 if (evsel
->attr
.type
== PERF_TYPE_TRACEPOINT
&&
2644 sample
->raw_data
== NULL
) {
2645 fprintf(trace
->output
, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2646 perf_evsel__name(evsel
), sample
->tid
,
2647 sample
->cpu
, sample
->raw_size
);
2649 tracepoint_handler handler
= evsel
->handler
;
2650 handler(trace
, evsel
, event
, sample
);
2654 static int trace__add_syscall_newtp(struct trace
*trace
)
2657 struct perf_evlist
*evlist
= trace
->evlist
;
2658 struct perf_evsel
*sys_enter
, *sys_exit
;
2660 sys_enter
= perf_evsel__syscall_newtp("sys_enter", trace__sys_enter
);
2661 if (sys_enter
== NULL
)
2664 if (perf_evsel__init_sc_tp_ptr_field(sys_enter
, args
))
2665 goto out_delete_sys_enter
;
2667 sys_exit
= perf_evsel__syscall_newtp("sys_exit", trace__sys_exit
);
2668 if (sys_exit
== NULL
)
2669 goto out_delete_sys_enter
;
2671 if (perf_evsel__init_sc_tp_uint_field(sys_exit
, ret
))
2672 goto out_delete_sys_exit
;
2674 perf_evlist__add(evlist
, sys_enter
);
2675 perf_evlist__add(evlist
, sys_exit
);
2677 if (trace
->opts
.callgraph_set
&& !trace
->kernel_syscallchains
) {
2679 * We're interested only in the user space callchain
2680 * leading to the syscall, allow overriding that for
2681 * debugging reasons using --kernel_syscall_callchains
2683 sys_exit
->attr
.exclude_callchain_kernel
= 1;
2686 trace
->syscalls
.events
.sys_enter
= sys_enter
;
2687 trace
->syscalls
.events
.sys_exit
= sys_exit
;
2693 out_delete_sys_exit
:
2694 perf_evsel__delete_priv(sys_exit
);
2695 out_delete_sys_enter
:
2696 perf_evsel__delete_priv(sys_enter
);
2700 static int trace__set_ev_qualifier_filter(struct trace
*trace
)
2703 char *filter
= asprintf_expr_inout_ints("id", !trace
->not_ev_qualifier
,
2704 trace
->ev_qualifier_ids
.nr
,
2705 trace
->ev_qualifier_ids
.entries
);
2710 if (!perf_evsel__append_filter(trace
->syscalls
.events
.sys_enter
, "&&", filter
))
2711 err
= perf_evsel__append_filter(trace
->syscalls
.events
.sys_exit
, "&&", filter
);
2721 static int trace__run(struct trace
*trace
, int argc
, const char **argv
)
2723 struct perf_evlist
*evlist
= trace
->evlist
;
2724 struct perf_evsel
*evsel
;
2726 unsigned long before
;
2727 const bool forks
= argc
> 0;
2728 bool draining
= false;
2732 if (trace
->trace_syscalls
&& trace__add_syscall_newtp(trace
))
2733 goto out_error_raw_syscalls
;
2735 if (trace
->trace_syscalls
)
2736 trace
->vfs_getname
= perf_evlist__add_vfs_getname(evlist
);
2738 if ((trace
->trace_pgfaults
& TRACE_PFMAJ
) &&
2739 perf_evlist__add_pgfault(evlist
, PERF_COUNT_SW_PAGE_FAULTS_MAJ
)) {
2743 if ((trace
->trace_pgfaults
& TRACE_PFMIN
) &&
2744 perf_evlist__add_pgfault(evlist
, PERF_COUNT_SW_PAGE_FAULTS_MIN
))
2748 perf_evlist__add_newtp(evlist
, "sched", "sched_stat_runtime",
2749 trace__sched_stat_runtime
))
2750 goto out_error_sched_stat_runtime
;
2752 err
= perf_evlist__create_maps(evlist
, &trace
->opts
.target
);
2754 fprintf(trace
->output
, "Problems parsing the target to trace, check your options!\n");
2755 goto out_delete_evlist
;
2758 err
= trace__symbols_init(trace
, evlist
);
2760 fprintf(trace
->output
, "Problems initializing symbol libraries!\n");
2761 goto out_delete_evlist
;
2764 perf_evlist__config(evlist
, &trace
->opts
, NULL
);
2766 if (trace
->opts
.callgraph_set
&& trace
->syscalls
.events
.sys_exit
) {
2767 perf_evsel__config_callchain(trace
->syscalls
.events
.sys_exit
,
2768 &trace
->opts
, &callchain_param
);
2770 * Now we have evsels with different sample_ids, use
2771 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2772 * from a fixed position in each ring buffer record.
2774 * As of this the changeset introducing this comment, this
2775 * isn't strictly needed, as the fields that can come before
2776 * PERF_SAMPLE_ID are all used, but we'll probably disable
2777 * some of those for things like copying the payload of
2778 * pointer syscall arguments, and for vfs_getname we don't
2779 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2780 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2782 perf_evlist__set_sample_bit(evlist
, IDENTIFIER
);
2783 perf_evlist__reset_sample_bit(evlist
, ID
);
2786 signal(SIGCHLD
, sig_handler
);
2787 signal(SIGINT
, sig_handler
);
2790 err
= perf_evlist__prepare_workload(evlist
, &trace
->opts
.target
,
2793 fprintf(trace
->output
, "Couldn't run the workload!\n");
2794 goto out_delete_evlist
;
2798 err
= perf_evlist__open(evlist
);
2800 goto out_error_open
;
2802 err
= bpf__apply_obj_config();
2804 char errbuf
[BUFSIZ
];
2806 bpf__strerror_apply_obj_config(err
, errbuf
, sizeof(errbuf
));
2807 pr_err("ERROR: Apply config to BPF failed: %s\n",
2809 goto out_error_open
;
2813 * Better not use !target__has_task() here because we need to cover the
2814 * case where no threads were specified in the command line, but a
2815 * workload was, and in that case we will fill in the thread_map when
2816 * we fork the workload in perf_evlist__prepare_workload.
2818 if (trace
->filter_pids
.nr
> 0)
2819 err
= perf_evlist__set_filter_pids(evlist
, trace
->filter_pids
.nr
, trace
->filter_pids
.entries
);
2820 else if (thread_map__pid(evlist
->threads
, 0) == -1)
2821 err
= perf_evlist__set_filter_pid(evlist
, getpid());
2826 if (trace
->ev_qualifier_ids
.nr
> 0) {
2827 err
= trace__set_ev_qualifier_filter(trace
);
2831 pr_debug("event qualifier tracepoint filter: %s\n",
2832 trace
->syscalls
.events
.sys_exit
->filter
);
2835 err
= perf_evlist__apply_filters(evlist
, &evsel
);
2837 goto out_error_apply_filters
;
2839 err
= perf_evlist__mmap(evlist
, trace
->opts
.mmap_pages
, false);
2841 goto out_error_mmap
;
2843 if (!target__none(&trace
->opts
.target
))
2844 perf_evlist__enable(evlist
);
2847 perf_evlist__start_workload(evlist
);
2849 trace
->multiple_threads
= thread_map__pid(evlist
->threads
, 0) == -1 ||
2850 evlist
->threads
->nr
> 1 ||
2851 perf_evlist__first(evlist
)->attr
.inherit
;
2853 before
= trace
->nr_events
;
2855 for (i
= 0; i
< evlist
->nr_mmaps
; i
++) {
2856 union perf_event
*event
;
2858 while ((event
= perf_evlist__mmap_read(evlist
, i
)) != NULL
) {
2859 struct perf_sample sample
;
2863 err
= perf_evlist__parse_sample(evlist
, event
, &sample
);
2865 fprintf(trace
->output
, "Can't parse sample, err = %d, skipping...\n", err
);
2869 trace__handle_event(trace
, event
, &sample
);
2871 perf_evlist__mmap_consume(evlist
, i
);
2876 if (done
&& !draining
) {
2877 perf_evlist__disable(evlist
);
2883 if (trace
->nr_events
== before
) {
2884 int timeout
= done
? 100 : -1;
2886 if (!draining
&& perf_evlist__poll(evlist
, timeout
) > 0) {
2887 if (perf_evlist__filter_pollfd(evlist
, POLLERR
| POLLHUP
) == 0)
2897 thread__zput(trace
->current
);
2899 perf_evlist__disable(evlist
);
2903 trace__fprintf_thread_summary(trace
, trace
->output
);
2905 if (trace
->show_tool_stats
) {
2906 fprintf(trace
->output
, "Stats:\n "
2907 " vfs_getname : %" PRIu64
"\n"
2908 " proc_getname: %" PRIu64
"\n",
2909 trace
->stats
.vfs_getname
,
2910 trace
->stats
.proc_getname
);
2915 perf_evlist__delete(evlist
);
2916 trace
->evlist
= NULL
;
2917 trace
->live
= false;
2920 char errbuf
[BUFSIZ
];
2922 out_error_sched_stat_runtime
:
2923 tracing_path__strerror_open_tp(errno
, errbuf
, sizeof(errbuf
), "sched", "sched_stat_runtime");
2926 out_error_raw_syscalls
:
2927 tracing_path__strerror_open_tp(errno
, errbuf
, sizeof(errbuf
), "raw_syscalls", "sys_(enter|exit)");
2931 perf_evlist__strerror_mmap(evlist
, errno
, errbuf
, sizeof(errbuf
));
2935 perf_evlist__strerror_open(evlist
, errno
, errbuf
, sizeof(errbuf
));
2938 fprintf(trace
->output
, "%s\n", errbuf
);
2939 goto out_delete_evlist
;
2941 out_error_apply_filters
:
2942 fprintf(trace
->output
,
2943 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2944 evsel
->filter
, perf_evsel__name(evsel
), errno
,
2945 strerror_r(errno
, errbuf
, sizeof(errbuf
)));
2946 goto out_delete_evlist
;
2949 fprintf(trace
->output
, "Not enough memory to run!\n");
2950 goto out_delete_evlist
;
2953 fprintf(trace
->output
, "errno=%d,%s\n", errno
, strerror(errno
));
2954 goto out_delete_evlist
;
2957 static int trace__replay(struct trace
*trace
)
2959 const struct perf_evsel_str_handler handlers
[] = {
2960 { "probe:vfs_getname", trace__vfs_getname
, },
2962 struct perf_data_file file
= {
2964 .mode
= PERF_DATA_MODE_READ
,
2965 .force
= trace
->force
,
2967 struct perf_session
*session
;
2968 struct perf_evsel
*evsel
;
2971 trace
->tool
.sample
= trace__process_sample
;
2972 trace
->tool
.mmap
= perf_event__process_mmap
;
2973 trace
->tool
.mmap2
= perf_event__process_mmap2
;
2974 trace
->tool
.comm
= perf_event__process_comm
;
2975 trace
->tool
.exit
= perf_event__process_exit
;
2976 trace
->tool
.fork
= perf_event__process_fork
;
2977 trace
->tool
.attr
= perf_event__process_attr
;
2978 trace
->tool
.tracing_data
= perf_event__process_tracing_data
;
2979 trace
->tool
.build_id
= perf_event__process_build_id
;
2981 trace
->tool
.ordered_events
= true;
2982 trace
->tool
.ordering_requires_timestamps
= true;
2984 /* add tid to output */
2985 trace
->multiple_threads
= true;
2987 session
= perf_session__new(&file
, false, &trace
->tool
);
2988 if (session
== NULL
)
2991 if (symbol__init(&session
->header
.env
) < 0)
2994 trace
->host
= &session
->machines
.host
;
2996 err
= perf_session__set_tracepoints_handlers(session
, handlers
);
3000 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
3001 "raw_syscalls:sys_enter");
3002 /* older kernels have syscalls tp versus raw_syscalls */
3004 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
3005 "syscalls:sys_enter");
3008 (perf_evsel__init_syscall_tp(evsel
, trace__sys_enter
) < 0 ||
3009 perf_evsel__init_sc_tp_ptr_field(evsel
, args
))) {
3010 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
3014 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
3015 "raw_syscalls:sys_exit");
3017 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
3018 "syscalls:sys_exit");
3020 (perf_evsel__init_syscall_tp(evsel
, trace__sys_exit
) < 0 ||
3021 perf_evsel__init_sc_tp_uint_field(evsel
, ret
))) {
3022 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
3026 evlist__for_each(session
->evlist
, evsel
) {
3027 if (evsel
->attr
.type
== PERF_TYPE_SOFTWARE
&&
3028 (evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
||
3029 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MIN
||
3030 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS
))
3031 evsel
->handler
= trace__pgfault
;
3034 err
= parse_target_str(trace
);
3040 err
= perf_session__process_events(session
);
3042 pr_err("Failed to process events, error %d", err
);
3044 else if (trace
->summary
)
3045 trace__fprintf_thread_summary(trace
, trace
->output
);
3048 perf_session__delete(session
);
3053 static size_t trace__fprintf_threads_header(FILE *fp
)
3057 printed
= fprintf(fp
, "\n Summary of events:\n\n");
3062 static size_t thread__dump_stats(struct thread_trace
*ttrace
,
3063 struct trace
*trace
, FILE *fp
)
3065 struct stats
*stats
;
3068 struct int_node
*inode
= intlist__first(ttrace
->syscall_stats
);
3073 printed
+= fprintf(fp
, "\n");
3075 printed
+= fprintf(fp
, " syscall calls total min avg max stddev\n");
3076 printed
+= fprintf(fp
, " (msec) (msec) (msec) (msec) (%%)\n");
3077 printed
+= fprintf(fp
, " --------------- -------- --------- --------- --------- --------- ------\n");
3079 /* each int_node is a syscall */
3081 stats
= inode
->priv
;
3083 double min
= (double)(stats
->min
) / NSEC_PER_MSEC
;
3084 double max
= (double)(stats
->max
) / NSEC_PER_MSEC
;
3085 double avg
= avg_stats(stats
);
3087 u64 n
= (u64
) stats
->n
;
3089 pct
= avg
? 100.0 * stddev_stats(stats
)/avg
: 0.0;
3090 avg
/= NSEC_PER_MSEC
;
3092 sc
= &trace
->syscalls
.table
[inode
->i
];
3093 printed
+= fprintf(fp
, " %-15s", sc
->name
);
3094 printed
+= fprintf(fp
, " %8" PRIu64
" %9.3f %9.3f %9.3f",
3095 n
, avg
* n
, min
, avg
);
3096 printed
+= fprintf(fp
, " %9.3f %9.2f%%\n", max
, pct
);
3099 inode
= intlist__next(inode
);
3102 printed
+= fprintf(fp
, "\n\n");
3107 /* struct used to pass data to per-thread function */
3108 struct summary_data
{
3110 struct trace
*trace
;
3114 static int trace__fprintf_one_thread(struct thread
*thread
, void *priv
)
3116 struct summary_data
*data
= priv
;
3117 FILE *fp
= data
->fp
;
3118 size_t printed
= data
->printed
;
3119 struct trace
*trace
= data
->trace
;
3120 struct thread_trace
*ttrace
= thread__priv(thread
);
3126 ratio
= (double)ttrace
->nr_events
/ trace
->nr_events
* 100.0;
3128 printed
+= fprintf(fp
, " %s (%d), ", thread__comm_str(thread
), thread
->tid
);
3129 printed
+= fprintf(fp
, "%lu events, ", ttrace
->nr_events
);
3130 printed
+= fprintf(fp
, "%.1f%%", ratio
);
3132 printed
+= fprintf(fp
, ", %lu majfaults", ttrace
->pfmaj
);
3134 printed
+= fprintf(fp
, ", %lu minfaults", ttrace
->pfmin
);
3135 printed
+= fprintf(fp
, ", %.3f msec\n", ttrace
->runtime_ms
);
3136 printed
+= thread__dump_stats(ttrace
, trace
, fp
);
3138 data
->printed
+= printed
;
3143 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
)
3145 struct summary_data data
= {
3149 data
.printed
= trace__fprintf_threads_header(fp
);
3151 machine__for_each_thread(trace
->host
, trace__fprintf_one_thread
, &data
);
3153 return data
.printed
;
3156 static int trace__set_duration(const struct option
*opt
, const char *str
,
3157 int unset __maybe_unused
)
3159 struct trace
*trace
= opt
->value
;
3161 trace
->duration_filter
= atof(str
);
3165 static int trace__set_filter_pids(const struct option
*opt
, const char *str
,
3166 int unset __maybe_unused
)
3170 struct trace
*trace
= opt
->value
;
3172 * FIXME: introduce a intarray class, plain parse csv and create a
3173 * { int nr, int entries[] } struct...
3175 struct intlist
*list
= intlist__new(str
);
3180 i
= trace
->filter_pids
.nr
= intlist__nr_entries(list
) + 1;
3181 trace
->filter_pids
.entries
= calloc(i
, sizeof(pid_t
));
3183 if (trace
->filter_pids
.entries
== NULL
)
3186 trace
->filter_pids
.entries
[0] = getpid();
3188 for (i
= 1; i
< trace
->filter_pids
.nr
; ++i
)
3189 trace
->filter_pids
.entries
[i
] = intlist__entry(list
, i
- 1)->i
;
3191 intlist__delete(list
);
3197 static int trace__open_output(struct trace
*trace
, const char *filename
)
3201 if (!stat(filename
, &st
) && st
.st_size
) {
3202 char oldname
[PATH_MAX
];
3204 scnprintf(oldname
, sizeof(oldname
), "%s.old", filename
);
3206 rename(filename
, oldname
);
3209 trace
->output
= fopen(filename
, "w");
3211 return trace
->output
== NULL
? -errno
: 0;
3214 static int parse_pagefaults(const struct option
*opt
, const char *str
,
3215 int unset __maybe_unused
)
3217 int *trace_pgfaults
= opt
->value
;
3219 if (strcmp(str
, "all") == 0)
3220 *trace_pgfaults
|= TRACE_PFMAJ
| TRACE_PFMIN
;
3221 else if (strcmp(str
, "maj") == 0)
3222 *trace_pgfaults
|= TRACE_PFMAJ
;
3223 else if (strcmp(str
, "min") == 0)
3224 *trace_pgfaults
|= TRACE_PFMIN
;
3231 static void evlist__set_evsel_handler(struct perf_evlist
*evlist
, void *handler
)
3233 struct perf_evsel
*evsel
;
3235 evlist__for_each(evlist
, evsel
)
3236 evsel
->handler
= handler
;
3239 int cmd_trace(int argc
, const char **argv
, const char *prefix __maybe_unused
)
3241 const char *trace_usage
[] = {
3242 "perf trace [<options>] [<command>]",
3243 "perf trace [<options>] -- <command> [<options>]",
3244 "perf trace record [<options>] [<command>]",
3245 "perf trace record [<options>] -- <command> [<options>]",
3248 struct trace trace
= {
3257 .user_freq
= UINT_MAX
,
3258 .user_interval
= ULLONG_MAX
,
3259 .no_buffering
= true,
3260 .mmap_pages
= UINT_MAX
,
3261 .proc_map_timeout
= 500,
3265 .trace_syscalls
= true,
3266 .kernel_syscallchains
= false,
3268 const char *output_name
= NULL
;
3269 const char *ev_qualifier_str
= NULL
;
3270 const struct option trace_options
[] = {
3271 OPT_CALLBACK(0, "event", &trace
.evlist
, "event",
3272 "event selector. use 'perf list' to list available events",
3273 parse_events_option
),
3274 OPT_BOOLEAN(0, "comm", &trace
.show_comm
,
3275 "show the thread COMM next to its id"),
3276 OPT_BOOLEAN(0, "tool_stats", &trace
.show_tool_stats
, "show tool stats"),
3277 OPT_STRING('e', "expr", &ev_qualifier_str
, "expr", "list of syscalls to trace"),
3278 OPT_STRING('o', "output", &output_name
, "file", "output file name"),
3279 OPT_STRING('i', "input", &input_name
, "file", "Analyze events in file"),
3280 OPT_STRING('p', "pid", &trace
.opts
.target
.pid
, "pid",
3281 "trace events on existing process id"),
3282 OPT_STRING('t', "tid", &trace
.opts
.target
.tid
, "tid",
3283 "trace events on existing thread id"),
3284 OPT_CALLBACK(0, "filter-pids", &trace
, "CSV list of pids",
3285 "pids to filter (by the kernel)", trace__set_filter_pids
),
3286 OPT_BOOLEAN('a', "all-cpus", &trace
.opts
.target
.system_wide
,
3287 "system-wide collection from all CPUs"),
3288 OPT_STRING('C', "cpu", &trace
.opts
.target
.cpu_list
, "cpu",
3289 "list of cpus to monitor"),
3290 OPT_BOOLEAN(0, "no-inherit", &trace
.opts
.no_inherit
,
3291 "child tasks do not inherit counters"),
3292 OPT_CALLBACK('m', "mmap-pages", &trace
.opts
.mmap_pages
, "pages",
3293 "number of mmap data pages",
3294 perf_evlist__parse_mmap_pages
),
3295 OPT_STRING('u', "uid", &trace
.opts
.target
.uid_str
, "user",
3297 OPT_CALLBACK(0, "duration", &trace
, "float",
3298 "show only events with duration > N.M ms",
3299 trace__set_duration
),
3300 OPT_BOOLEAN(0, "sched", &trace
.sched
, "show blocking scheduler events"),
3301 OPT_INCR('v', "verbose", &verbose
, "be more verbose"),
3302 OPT_BOOLEAN('T', "time", &trace
.full_time
,
3303 "Show full timestamp, not time relative to first start"),
3304 OPT_BOOLEAN('s', "summary", &trace
.summary_only
,
3305 "Show only syscall summary with statistics"),
3306 OPT_BOOLEAN('S', "with-summary", &trace
.summary
,
3307 "Show all syscalls and summary with statistics"),
3308 OPT_CALLBACK_DEFAULT('F', "pf", &trace
.trace_pgfaults
, "all|maj|min",
3309 "Trace pagefaults", parse_pagefaults
, "maj"),
3310 OPT_BOOLEAN(0, "syscalls", &trace
.trace_syscalls
, "Trace syscalls"),
3311 OPT_BOOLEAN('f', "force", &trace
.force
, "don't complain, do it"),
3312 OPT_CALLBACK(0, "call-graph", &trace
.opts
,
3313 "record_mode[,record_size]", record_callchain_help
,
3314 &record_parse_callchain_opt
),
3315 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace
.kernel_syscallchains
,
3316 "Show the kernel callchains on the syscall exit path"),
3317 OPT_UINTEGER(0, "proc-map-timeout", &trace
.opts
.proc_map_timeout
,
3318 "per thread proc mmap processing timeout in ms"),
3321 const char * const trace_subcommands
[] = { "record", NULL
};
3325 signal(SIGSEGV
, sighandler_dump_stack
);
3326 signal(SIGFPE
, sighandler_dump_stack
);
3328 trace
.evlist
= perf_evlist__new();
3329 trace
.sctbl
= syscalltbl__new();
3331 if (trace
.evlist
== NULL
|| trace
.sctbl
== NULL
) {
3332 pr_err("Not enough memory to run!\n");
3337 argc
= parse_options_subcommand(argc
, argv
, trace_options
, trace_subcommands
,
3338 trace_usage
, PARSE_OPT_STOP_AT_NON_OPTION
);
3340 err
= bpf__setup_stdout(trace
.evlist
);
3342 bpf__strerror_setup_stdout(trace
.evlist
, err
, bf
, sizeof(bf
));
3343 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf
);
3349 if (trace
.trace_pgfaults
) {
3350 trace
.opts
.sample_address
= true;
3351 trace
.opts
.sample_time
= true;
3354 if (trace
.opts
.callgraph_set
)
3355 symbol_conf
.use_callchain
= true;
3357 if (trace
.evlist
->nr_entries
> 0)
3358 evlist__set_evsel_handler(trace
.evlist
, trace__event_handler
);
3360 if ((argc
>= 1) && (strcmp(argv
[0], "record") == 0))
3361 return trace__record(&trace
, argc
-1, &argv
[1]);
3363 /* summary_only implies summary option, but don't overwrite summary if set */
3364 if (trace
.summary_only
)
3365 trace
.summary
= trace
.summary_only
;
3367 if (!trace
.trace_syscalls
&& !trace
.trace_pgfaults
&&
3368 trace
.evlist
->nr_entries
== 0 /* Was --events used? */) {
3369 pr_err("Please specify something to trace.\n");
3373 if (!trace
.trace_syscalls
&& ev_qualifier_str
) {
3374 pr_err("The -e option can't be used with --no-syscalls.\n");
3378 if (output_name
!= NULL
) {
3379 err
= trace__open_output(&trace
, output_name
);
3381 perror("failed to create output file");
3386 trace
.open_id
= syscalltbl__id(trace
.sctbl
, "open");
3388 if (ev_qualifier_str
!= NULL
) {
3389 const char *s
= ev_qualifier_str
;
3390 struct strlist_config slist_config
= {
3391 .dirname
= system_path(STRACE_GROUPS_DIR
),
3394 trace
.not_ev_qualifier
= *s
== '!';
3395 if (trace
.not_ev_qualifier
)
3397 trace
.ev_qualifier
= strlist__new(s
, &slist_config
);
3398 if (trace
.ev_qualifier
== NULL
) {
3399 fputs("Not enough memory to parse event qualifier",
3405 err
= trace__validate_ev_qualifier(&trace
);
3410 err
= target__validate(&trace
.opts
.target
);
3412 target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
3413 fprintf(trace
.output
, "%s", bf
);
3417 err
= target__parse_uid(&trace
.opts
.target
);
3419 target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
3420 fprintf(trace
.output
, "%s", bf
);
3424 if (!argc
&& target__none(&trace
.opts
.target
))
3425 trace
.opts
.target
.system_wide
= true;
3428 err
= trace__replay(&trace
);
3430 err
= trace__run(&trace
, argc
, argv
);
3433 if (output_name
!= NULL
)
3434 fclose(trace
.output
);