4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
41 #include <linux/futex.h>
42 #include <linux/err.h>
43 #include <linux/seccomp.h>
44 #include <linux/filter.h>
45 #include <linux/audit.h>
46 #include <sys/ptrace.h>
47 #include <linux/random.h>
49 /* For older distros: */
51 # define MAP_STACK 0x20000
55 # define MADV_HWPOISON 100
59 #ifndef MADV_MERGEABLE
60 # define MADV_MERGEABLE 12
63 #ifndef MADV_UNMERGEABLE
64 # define MADV_UNMERGEABLE 13
68 # define EFD_SEMAPHORE 1
72 # define EFD_NONBLOCK 00004000
76 # define EFD_CLOEXEC 02000000
80 # define O_CLOEXEC 02000000
88 # define SOCK_CLOEXEC 02000000
92 # define SOCK_NONBLOCK 00004000
95 #ifndef MSG_CMSG_CLOEXEC
96 # define MSG_CMSG_CLOEXEC 0x40000000
99 #ifndef PERF_FLAG_FD_NO_GROUP
100 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
103 #ifndef PERF_FLAG_FD_OUTPUT
104 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
107 #ifndef PERF_FLAG_PID_CGROUP
108 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
111 #ifndef PERF_FLAG_FD_CLOEXEC
112 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
119 u64 (*integer
)(struct tp_field
*field
, struct perf_sample
*sample
);
120 void *(*pointer
)(struct tp_field
*field
, struct perf_sample
*sample
);
124 #define TP_UINT_FIELD(bits) \
125 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
128 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
137 #define TP_UINT_FIELD__SWAPPED(bits) \
138 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
141 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
142 return bswap_##bits(value);\
145 TP_UINT_FIELD__SWAPPED(16);
146 TP_UINT_FIELD__SWAPPED(32);
147 TP_UINT_FIELD__SWAPPED(64);
149 static int tp_field__init_uint(struct tp_field
*field
,
150 struct format_field
*format_field
,
153 field
->offset
= format_field
->offset
;
155 switch (format_field
->size
) {
157 field
->integer
= tp_field__u8
;
160 field
->integer
= needs_swap
? tp_field__swapped_u16
: tp_field__u16
;
163 field
->integer
= needs_swap
? tp_field__swapped_u32
: tp_field__u32
;
166 field
->integer
= needs_swap
? tp_field__swapped_u64
: tp_field__u64
;
175 static void *tp_field__ptr(struct tp_field
*field
, struct perf_sample
*sample
)
177 return sample
->raw_data
+ field
->offset
;
180 static int tp_field__init_ptr(struct tp_field
*field
, struct format_field
*format_field
)
182 field
->offset
= format_field
->offset
;
183 field
->pointer
= tp_field__ptr
;
190 struct tp_field args
, ret
;
194 static int perf_evsel__init_tp_uint_field(struct perf_evsel
*evsel
,
195 struct tp_field
*field
,
198 struct format_field
*format_field
= perf_evsel__field(evsel
, name
);
200 if (format_field
== NULL
)
203 return tp_field__init_uint(field
, format_field
, evsel
->needs_swap
);
206 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
207 ({ struct syscall_tp *sc = evsel->priv;\
208 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
210 static int perf_evsel__init_tp_ptr_field(struct perf_evsel
*evsel
,
211 struct tp_field
*field
,
214 struct format_field
*format_field
= perf_evsel__field(evsel
, name
);
216 if (format_field
== NULL
)
219 return tp_field__init_ptr(field
, format_field
);
222 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
223 ({ struct syscall_tp *sc = evsel->priv;\
224 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
226 static void perf_evsel__delete_priv(struct perf_evsel
*evsel
)
229 perf_evsel__delete(evsel
);
232 static int perf_evsel__init_syscall_tp(struct perf_evsel
*evsel
, void *handler
)
234 evsel
->priv
= malloc(sizeof(struct syscall_tp
));
235 if (evsel
->priv
!= NULL
) {
236 if (perf_evsel__init_sc_tp_uint_field(evsel
, id
))
239 evsel
->handler
= handler
;
250 static struct perf_evsel
*perf_evsel__syscall_newtp(const char *direction
, void *handler
)
252 struct perf_evsel
*evsel
= perf_evsel__newtp("raw_syscalls", direction
);
254 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
256 evsel
= perf_evsel__newtp("syscalls", direction
);
261 if (perf_evsel__init_syscall_tp(evsel
, handler
))
267 perf_evsel__delete_priv(evsel
);
271 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
272 ({ struct syscall_tp *fields = evsel->priv; \
273 fields->name.integer(&fields->name, sample); })
275 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
276 ({ struct syscall_tp *fields = evsel->priv; \
277 fields->name.pointer(&fields->name, sample); })
281 struct thread
*thread
;
291 const char **entries
;
294 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
295 .nr_entries = ARRAY_SIZE(array), \
299 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
301 .nr_entries = ARRAY_SIZE(array), \
305 static size_t __syscall_arg__scnprintf_strarray(char *bf
, size_t size
,
307 struct syscall_arg
*arg
)
309 struct strarray
*sa
= arg
->parm
;
310 int idx
= arg
->val
- sa
->offset
;
312 if (idx
< 0 || idx
>= sa
->nr_entries
)
313 return scnprintf(bf
, size
, intfmt
, arg
->val
);
315 return scnprintf(bf
, size
, "%s", sa
->entries
[idx
]);
318 static size_t syscall_arg__scnprintf_strarray(char *bf
, size_t size
,
319 struct syscall_arg
*arg
)
321 return __syscall_arg__scnprintf_strarray(bf
, size
, "%d", arg
);
324 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
326 #if defined(__i386__) || defined(__x86_64__)
328 * FIXME: Make this available to all arches as soon as the ioctl beautifier
329 * gets rewritten to support all arches.
331 static size_t syscall_arg__scnprintf_strhexarray(char *bf
, size_t size
,
332 struct syscall_arg
*arg
)
334 return __syscall_arg__scnprintf_strarray(bf
, size
, "%#x", arg
);
337 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
338 #endif /* defined(__i386__) || defined(__x86_64__) */
340 static size_t syscall_arg__scnprintf_fd(char *bf
, size_t size
,
341 struct syscall_arg
*arg
);
343 #define SCA_FD syscall_arg__scnprintf_fd
345 static size_t syscall_arg__scnprintf_fd_at(char *bf
, size_t size
,
346 struct syscall_arg
*arg
)
351 return scnprintf(bf
, size
, "CWD");
353 return syscall_arg__scnprintf_fd(bf
, size
, arg
);
356 #define SCA_FDAT syscall_arg__scnprintf_fd_at
358 static size_t syscall_arg__scnprintf_close_fd(char *bf
, size_t size
,
359 struct syscall_arg
*arg
);
361 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
363 static size_t syscall_arg__scnprintf_hex(char *bf
, size_t size
,
364 struct syscall_arg
*arg
)
366 return scnprintf(bf
, size
, "%#lx", arg
->val
);
369 #define SCA_HEX syscall_arg__scnprintf_hex
371 static size_t syscall_arg__scnprintf_int(char *bf
, size_t size
,
372 struct syscall_arg
*arg
)
374 return scnprintf(bf
, size
, "%d", arg
->val
);
377 #define SCA_INT syscall_arg__scnprintf_int
379 static size_t syscall_arg__scnprintf_mmap_prot(char *bf
, size_t size
,
380 struct syscall_arg
*arg
)
382 int printed
= 0, prot
= arg
->val
;
384 if (prot
== PROT_NONE
)
385 return scnprintf(bf
, size
, "NONE");
386 #define P_MMAP_PROT(n) \
387 if (prot & PROT_##n) { \
388 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
398 P_MMAP_PROT(GROWSDOWN
);
399 P_MMAP_PROT(GROWSUP
);
403 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", prot
);
408 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
410 static size_t syscall_arg__scnprintf_mmap_flags(char *bf
, size_t size
,
411 struct syscall_arg
*arg
)
413 int printed
= 0, flags
= arg
->val
;
415 #define P_MMAP_FLAG(n) \
416 if (flags & MAP_##n) { \
417 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
422 P_MMAP_FLAG(PRIVATE
);
426 P_MMAP_FLAG(ANONYMOUS
);
427 P_MMAP_FLAG(DENYWRITE
);
428 P_MMAP_FLAG(EXECUTABLE
);
431 P_MMAP_FLAG(GROWSDOWN
);
433 P_MMAP_FLAG(HUGETLB
);
436 P_MMAP_FLAG(NONBLOCK
);
437 P_MMAP_FLAG(NORESERVE
);
438 P_MMAP_FLAG(POPULATE
);
440 #ifdef MAP_UNINITIALIZED
441 P_MMAP_FLAG(UNINITIALIZED
);
446 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
451 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
453 static size_t syscall_arg__scnprintf_mremap_flags(char *bf
, size_t size
,
454 struct syscall_arg
*arg
)
456 int printed
= 0, flags
= arg
->val
;
458 #define P_MREMAP_FLAG(n) \
459 if (flags & MREMAP_##n) { \
460 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
461 flags &= ~MREMAP_##n; \
464 P_MREMAP_FLAG(MAYMOVE
);
466 P_MREMAP_FLAG(FIXED
);
471 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
476 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
478 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf
, size_t size
,
479 struct syscall_arg
*arg
)
481 int behavior
= arg
->val
;
484 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
487 P_MADV_BHV(SEQUENTIAL
);
488 P_MADV_BHV(WILLNEED
);
489 P_MADV_BHV(DONTNEED
);
491 P_MADV_BHV(DONTFORK
);
493 P_MADV_BHV(HWPOISON
);
494 #ifdef MADV_SOFT_OFFLINE
495 P_MADV_BHV(SOFT_OFFLINE
);
497 P_MADV_BHV(MERGEABLE
);
498 P_MADV_BHV(UNMERGEABLE
);
500 P_MADV_BHV(HUGEPAGE
);
502 #ifdef MADV_NOHUGEPAGE
503 P_MADV_BHV(NOHUGEPAGE
);
506 P_MADV_BHV(DONTDUMP
);
515 return scnprintf(bf
, size
, "%#x", behavior
);
518 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
520 static size_t syscall_arg__scnprintf_flock(char *bf
, size_t size
,
521 struct syscall_arg
*arg
)
523 int printed
= 0, op
= arg
->val
;
526 return scnprintf(bf
, size
, "NONE");
528 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
529 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
544 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", op
);
549 #define SCA_FLOCK syscall_arg__scnprintf_flock
551 static size_t syscall_arg__scnprintf_futex_op(char *bf
, size_t size
, struct syscall_arg
*arg
)
553 enum syscall_futex_args
{
554 SCF_UADDR
= (1 << 0),
557 SCF_TIMEOUT
= (1 << 3),
558 SCF_UADDR2
= (1 << 4),
562 int cmd
= op
& FUTEX_CMD_MASK
;
566 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
567 P_FUTEX_OP(WAIT
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
; break;
568 P_FUTEX_OP(WAKE
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
569 P_FUTEX_OP(FD
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
570 P_FUTEX_OP(REQUEUE
); arg
->mask
|= SCF_VAL3
|SCF_TIMEOUT
; break;
571 P_FUTEX_OP(CMP_REQUEUE
); arg
->mask
|= SCF_TIMEOUT
; break;
572 P_FUTEX_OP(CMP_REQUEUE_PI
); arg
->mask
|= SCF_TIMEOUT
; break;
573 P_FUTEX_OP(WAKE_OP
); break;
574 P_FUTEX_OP(LOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
575 P_FUTEX_OP(UNLOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
576 P_FUTEX_OP(TRYLOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
; break;
577 P_FUTEX_OP(WAIT_BITSET
); arg
->mask
|= SCF_UADDR2
; break;
578 P_FUTEX_OP(WAKE_BITSET
); arg
->mask
|= SCF_UADDR2
; break;
579 P_FUTEX_OP(WAIT_REQUEUE_PI
); break;
580 default: printed
= scnprintf(bf
, size
, "%#x", cmd
); break;
583 if (op
& FUTEX_PRIVATE_FLAG
)
584 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|PRIV");
586 if (op
& FUTEX_CLOCK_REALTIME
)
587 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|CLKRT");
592 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
594 static const char *bpf_cmd
[] = {
595 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
596 "MAP_GET_NEXT_KEY", "PROG_LOAD",
598 static DEFINE_STRARRAY(bpf_cmd
);
600 static const char *epoll_ctl_ops
[] = { "ADD", "DEL", "MOD", };
601 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops
, 1);
603 static const char *itimers
[] = { "REAL", "VIRTUAL", "PROF", };
604 static DEFINE_STRARRAY(itimers
);
606 static const char *keyctl_options
[] = {
607 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
608 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
609 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
610 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
611 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
613 static DEFINE_STRARRAY(keyctl_options
);
615 static const char *whences
[] = { "SET", "CUR", "END",
623 static DEFINE_STRARRAY(whences
);
625 static const char *fcntl_cmds
[] = {
626 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
627 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
628 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
631 static DEFINE_STRARRAY(fcntl_cmds
);
633 static const char *rlimit_resources
[] = {
634 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
635 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
638 static DEFINE_STRARRAY(rlimit_resources
);
640 static const char *sighow
[] = { "BLOCK", "UNBLOCK", "SETMASK", };
641 static DEFINE_STRARRAY(sighow
);
643 static const char *clockid
[] = {
644 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
645 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
646 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
648 static DEFINE_STRARRAY(clockid
);
650 static const char *socket_families
[] = {
651 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
652 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
653 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
654 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
655 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
656 "ALG", "NFC", "VSOCK",
658 static DEFINE_STRARRAY(socket_families
);
660 #ifndef SOCK_TYPE_MASK
661 #define SOCK_TYPE_MASK 0xf
664 static size_t syscall_arg__scnprintf_socket_type(char *bf
, size_t size
,
665 struct syscall_arg
*arg
)
669 flags
= type
& ~SOCK_TYPE_MASK
;
671 type
&= SOCK_TYPE_MASK
;
673 * Can't use a strarray, MIPS may override for ABI reasons.
676 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
681 P_SK_TYPE(SEQPACKET
);
686 printed
= scnprintf(bf
, size
, "%#x", type
);
689 #define P_SK_FLAG(n) \
690 if (flags & SOCK_##n) { \
691 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
692 flags &= ~SOCK_##n; \
700 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|%#x", flags
);
705 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
708 #define MSG_PROBE 0x10
710 #ifndef MSG_WAITFORONE
711 #define MSG_WAITFORONE 0x10000
713 #ifndef MSG_SENDPAGE_NOTLAST
714 #define MSG_SENDPAGE_NOTLAST 0x20000
717 #define MSG_FASTOPEN 0x20000000
720 static size_t syscall_arg__scnprintf_msg_flags(char *bf
, size_t size
,
721 struct syscall_arg
*arg
)
723 int printed
= 0, flags
= arg
->val
;
726 return scnprintf(bf
, size
, "NONE");
727 #define P_MSG_FLAG(n) \
728 if (flags & MSG_##n) { \
729 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
735 P_MSG_FLAG(DONTROUTE
);
740 P_MSG_FLAG(DONTWAIT
);
747 P_MSG_FLAG(ERRQUEUE
);
748 P_MSG_FLAG(NOSIGNAL
);
750 P_MSG_FLAG(WAITFORONE
);
751 P_MSG_FLAG(SENDPAGE_NOTLAST
);
752 P_MSG_FLAG(FASTOPEN
);
753 P_MSG_FLAG(CMSG_CLOEXEC
);
757 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
762 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
764 static size_t syscall_arg__scnprintf_access_mode(char *bf
, size_t size
,
765 struct syscall_arg
*arg
)
770 if (mode
== F_OK
) /* 0 */
771 return scnprintf(bf
, size
, "F");
773 if (mode & n##_OK) { \
774 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
784 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|%#x", mode
);
789 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
791 static size_t syscall_arg__scnprintf_filename(char *bf
, size_t size
,
792 struct syscall_arg
*arg
);
794 #define SCA_FILENAME syscall_arg__scnprintf_filename
796 static size_t syscall_arg__scnprintf_open_flags(char *bf
, size_t size
,
797 struct syscall_arg
*arg
)
799 int printed
= 0, flags
= arg
->val
;
801 if (!(flags
& O_CREAT
))
802 arg
->mask
|= 1 << (arg
->idx
+ 1); /* Mask the mode parm */
805 return scnprintf(bf
, size
, "RDONLY");
807 if (flags & O_##n) { \
808 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
832 if ((flags
& O_SYNC
) == O_SYNC
)
833 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%s", printed
? "|" : "", "SYNC");
845 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
850 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
852 static size_t syscall_arg__scnprintf_perf_flags(char *bf
, size_t size
,
853 struct syscall_arg
*arg
)
855 int printed
= 0, flags
= arg
->val
;
861 if (flags & PERF_FLAG_##n) { \
862 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
863 flags &= ~PERF_FLAG_##n; \
873 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
878 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
880 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf
, size_t size
,
881 struct syscall_arg
*arg
)
883 int printed
= 0, flags
= arg
->val
;
886 return scnprintf(bf
, size
, "NONE");
888 if (flags & EFD_##n) { \
889 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
899 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
904 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
906 static size_t syscall_arg__scnprintf_pipe_flags(char *bf
, size_t size
,
907 struct syscall_arg
*arg
)
909 int printed
= 0, flags
= arg
->val
;
912 if (flags & O_##n) { \
913 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
922 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
927 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
929 static size_t syscall_arg__scnprintf_signum(char *bf
, size_t size
, struct syscall_arg
*arg
)
934 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
977 return scnprintf(bf
, size
, "%#x", sig
);
980 #define SCA_SIGNUM syscall_arg__scnprintf_signum
982 #if defined(__i386__) || defined(__x86_64__)
984 * FIXME: Make this available to all arches.
986 #define TCGETS 0x5401
988 static const char *tioctls
[] = {
989 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
990 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
991 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
992 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
993 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
994 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
995 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
996 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
997 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
998 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
999 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
1000 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
1001 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
1002 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
1003 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
1006 static DEFINE_STRARRAY_OFFSET(tioctls
, 0x5401);
1007 #endif /* defined(__i386__) || defined(__x86_64__) */
1009 static size_t syscall_arg__scnprintf_seccomp_op(char *bf
, size_t size
, struct syscall_arg
*arg
)
1015 #define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
1016 P_SECCOMP_SET_MODE_OP(STRICT
);
1017 P_SECCOMP_SET_MODE_OP(FILTER
);
1018 #undef P_SECCOMP_SET_MODE_OP
1019 default: printed
= scnprintf(bf
, size
, "%#x", op
); break;
1025 #define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op
1027 static size_t syscall_arg__scnprintf_seccomp_flags(char *bf
, size_t size
,
1028 struct syscall_arg
*arg
)
1030 int printed
= 0, flags
= arg
->val
;
1033 if (flags & SECCOMP_FILTER_FLAG_##n) { \
1034 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1035 flags &= ~SECCOMP_FILTER_FLAG_##n; \
1042 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
1047 #define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
1049 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf
, size_t size
,
1050 struct syscall_arg
*arg
)
1052 int printed
= 0, flags
= arg
->val
;
1055 if (flags & GRND_##n) { \
1056 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
1057 flags &= ~GRND_##n; \
1065 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
1070 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
1072 #define STRARRAY(arg, name, array) \
1073 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
1074 .arg_parm = { [arg] = &strarray__##array, }
1076 static struct syscall_fmt
{
1079 size_t (*arg_scnprintf
[6])(char *bf
, size_t size
, struct syscall_arg
*arg
);
1084 } syscall_fmts
[] = {
1085 { .name
= "access", .errmsg
= true,
1086 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */
1087 [1] = SCA_ACCMODE
, /* mode */ }, },
1088 { .name
= "arch_prctl", .errmsg
= true, .alias
= "prctl", },
1089 { .name
= "bpf", .errmsg
= true, STRARRAY(0, cmd
, bpf_cmd
), },
1090 { .name
= "brk", .hexret
= true,
1091 .arg_scnprintf
= { [0] = SCA_HEX
, /* brk */ }, },
1092 { .name
= "chdir", .errmsg
= true,
1093 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1094 { .name
= "chmod", .errmsg
= true,
1095 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1096 { .name
= "chroot", .errmsg
= true,
1097 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1098 { .name
= "clock_gettime", .errmsg
= true, STRARRAY(0, clk_id
, clockid
), },
1099 { .name
= "close", .errmsg
= true,
1100 .arg_scnprintf
= { [0] = SCA_CLOSE_FD
, /* fd */ }, },
1101 { .name
= "connect", .errmsg
= true, },
1102 { .name
= "creat", .errmsg
= true,
1103 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1104 { .name
= "dup", .errmsg
= true,
1105 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1106 { .name
= "dup2", .errmsg
= true,
1107 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1108 { .name
= "dup3", .errmsg
= true,
1109 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1110 { .name
= "epoll_ctl", .errmsg
= true, STRARRAY(1, op
, epoll_ctl_ops
), },
1111 { .name
= "eventfd2", .errmsg
= true,
1112 .arg_scnprintf
= { [1] = SCA_EFD_FLAGS
, /* flags */ }, },
1113 { .name
= "faccessat", .errmsg
= true,
1114 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1115 [1] = SCA_FILENAME
, /* filename */ }, },
1116 { .name
= "fadvise64", .errmsg
= true,
1117 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1118 { .name
= "fallocate", .errmsg
= true,
1119 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1120 { .name
= "fchdir", .errmsg
= true,
1121 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1122 { .name
= "fchmod", .errmsg
= true,
1123 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1124 { .name
= "fchmodat", .errmsg
= true,
1125 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1126 [1] = SCA_FILENAME
, /* filename */ }, },
1127 { .name
= "fchown", .errmsg
= true,
1128 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1129 { .name
= "fchownat", .errmsg
= true,
1130 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1131 [1] = SCA_FILENAME
, /* filename */ }, },
1132 { .name
= "fcntl", .errmsg
= true,
1133 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1134 [1] = SCA_STRARRAY
, /* cmd */ },
1135 .arg_parm
= { [1] = &strarray__fcntl_cmds
, /* cmd */ }, },
1136 { .name
= "fdatasync", .errmsg
= true,
1137 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1138 { .name
= "flock", .errmsg
= true,
1139 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1140 [1] = SCA_FLOCK
, /* cmd */ }, },
1141 { .name
= "fsetxattr", .errmsg
= true,
1142 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1143 { .name
= "fstat", .errmsg
= true, .alias
= "newfstat",
1144 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1145 { .name
= "fstatat", .errmsg
= true, .alias
= "newfstatat",
1146 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1147 [1] = SCA_FILENAME
, /* filename */ }, },
1148 { .name
= "fstatfs", .errmsg
= true,
1149 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1150 { .name
= "fsync", .errmsg
= true,
1151 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1152 { .name
= "ftruncate", .errmsg
= true,
1153 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1154 { .name
= "futex", .errmsg
= true,
1155 .arg_scnprintf
= { [1] = SCA_FUTEX_OP
, /* op */ }, },
1156 { .name
= "futimesat", .errmsg
= true,
1157 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1158 [1] = SCA_FILENAME
, /* filename */ }, },
1159 { .name
= "getdents", .errmsg
= true,
1160 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1161 { .name
= "getdents64", .errmsg
= true,
1162 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1163 { .name
= "getitimer", .errmsg
= true, STRARRAY(0, which
, itimers
), },
1164 { .name
= "getrandom", .errmsg
= true,
1165 .arg_scnprintf
= { [2] = SCA_GETRANDOM_FLAGS
, /* flags */ }, },
1166 { .name
= "getrlimit", .errmsg
= true, STRARRAY(0, resource
, rlimit_resources
), },
1167 { .name
= "getxattr", .errmsg
= true,
1168 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1169 { .name
= "inotify_add_watch", .errmsg
= true,
1170 .arg_scnprintf
= { [1] = SCA_FILENAME
, /* pathname */ }, },
1171 { .name
= "ioctl", .errmsg
= true,
1172 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1173 #if defined(__i386__) || defined(__x86_64__)
1175 * FIXME: Make this available to all arches.
1177 [1] = SCA_STRHEXARRAY
, /* cmd */
1178 [2] = SCA_HEX
, /* arg */ },
1179 .arg_parm
= { [1] = &strarray__tioctls
, /* cmd */ }, },
1181 [2] = SCA_HEX
, /* arg */ }, },
1183 { .name
= "keyctl", .errmsg
= true, STRARRAY(0, option
, keyctl_options
), },
1184 { .name
= "kill", .errmsg
= true,
1185 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1186 { .name
= "lchown", .errmsg
= true,
1187 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1188 { .name
= "lgetxattr", .errmsg
= true,
1189 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1190 { .name
= "linkat", .errmsg
= true,
1191 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */ }, },
1192 { .name
= "listxattr", .errmsg
= true,
1193 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1194 { .name
= "llistxattr", .errmsg
= true,
1195 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1196 { .name
= "lremovexattr", .errmsg
= true,
1197 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1198 { .name
= "lseek", .errmsg
= true,
1199 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1200 [2] = SCA_STRARRAY
, /* whence */ },
1201 .arg_parm
= { [2] = &strarray__whences
, /* whence */ }, },
1202 { .name
= "lsetxattr", .errmsg
= true,
1203 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1204 { .name
= "lstat", .errmsg
= true, .alias
= "newlstat",
1205 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1206 { .name
= "lsxattr", .errmsg
= true,
1207 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1208 { .name
= "madvise", .errmsg
= true,
1209 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
1210 [2] = SCA_MADV_BHV
, /* behavior */ }, },
1211 { .name
= "mkdir", .errmsg
= true,
1212 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1213 { .name
= "mkdirat", .errmsg
= true,
1214 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1215 [1] = SCA_FILENAME
, /* pathname */ }, },
1216 { .name
= "mknod", .errmsg
= true,
1217 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1218 { .name
= "mknodat", .errmsg
= true,
1219 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1220 [1] = SCA_FILENAME
, /* filename */ }, },
1221 { .name
= "mlock", .errmsg
= true,
1222 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1223 { .name
= "mlockall", .errmsg
= true,
1224 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1225 { .name
= "mmap", .hexret
= true,
1226 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
1227 [2] = SCA_MMAP_PROT
, /* prot */
1228 [3] = SCA_MMAP_FLAGS
, /* flags */
1229 [4] = SCA_FD
, /* fd */ }, },
1230 { .name
= "mprotect", .errmsg
= true,
1231 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
1232 [2] = SCA_MMAP_PROT
, /* prot */ }, },
1233 { .name
= "mq_unlink", .errmsg
= true,
1234 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* u_name */ }, },
1235 { .name
= "mremap", .hexret
= true,
1236 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
1237 [3] = SCA_MREMAP_FLAGS
, /* flags */
1238 [4] = SCA_HEX
, /* new_addr */ }, },
1239 { .name
= "munlock", .errmsg
= true,
1240 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1241 { .name
= "munmap", .errmsg
= true,
1242 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1243 { .name
= "name_to_handle_at", .errmsg
= true,
1244 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1245 { .name
= "newfstatat", .errmsg
= true,
1246 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1247 [1] = SCA_FILENAME
, /* filename */ }, },
1248 { .name
= "open", .errmsg
= true,
1249 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */
1250 [1] = SCA_OPEN_FLAGS
, /* flags */ }, },
1251 { .name
= "open_by_handle_at", .errmsg
= true,
1252 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1253 [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
1254 { .name
= "openat", .errmsg
= true,
1255 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1256 [1] = SCA_FILENAME
, /* filename */
1257 [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
1258 { .name
= "perf_event_open", .errmsg
= true,
1259 .arg_scnprintf
= { [1] = SCA_INT
, /* pid */
1260 [2] = SCA_INT
, /* cpu */
1261 [3] = SCA_FD
, /* group_fd */
1262 [4] = SCA_PERF_FLAGS
, /* flags */ }, },
1263 { .name
= "pipe2", .errmsg
= true,
1264 .arg_scnprintf
= { [1] = SCA_PIPE_FLAGS
, /* flags */ }, },
1265 { .name
= "poll", .errmsg
= true, .timeout
= true, },
1266 { .name
= "ppoll", .errmsg
= true, .timeout
= true, },
1267 { .name
= "pread", .errmsg
= true, .alias
= "pread64",
1268 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1269 { .name
= "preadv", .errmsg
= true, .alias
= "pread",
1270 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1271 { .name
= "prlimit64", .errmsg
= true, STRARRAY(1, resource
, rlimit_resources
), },
1272 { .name
= "pwrite", .errmsg
= true, .alias
= "pwrite64",
1273 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1274 { .name
= "pwritev", .errmsg
= true,
1275 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1276 { .name
= "read", .errmsg
= true,
1277 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1278 { .name
= "readlink", .errmsg
= true,
1279 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* path */ }, },
1280 { .name
= "readlinkat", .errmsg
= true,
1281 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1282 [1] = SCA_FILENAME
, /* pathname */ }, },
1283 { .name
= "readv", .errmsg
= true,
1284 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1285 { .name
= "recvfrom", .errmsg
= true,
1286 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1287 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1288 { .name
= "recvmmsg", .errmsg
= true,
1289 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1290 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1291 { .name
= "recvmsg", .errmsg
= true,
1292 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1293 [2] = SCA_MSG_FLAGS
, /* flags */ }, },
1294 { .name
= "removexattr", .errmsg
= true,
1295 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1296 { .name
= "renameat", .errmsg
= true,
1297 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1298 { .name
= "rmdir", .errmsg
= true,
1299 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1300 { .name
= "rt_sigaction", .errmsg
= true,
1301 .arg_scnprintf
= { [0] = SCA_SIGNUM
, /* sig */ }, },
1302 { .name
= "rt_sigprocmask", .errmsg
= true, STRARRAY(0, how
, sighow
), },
1303 { .name
= "rt_sigqueueinfo", .errmsg
= true,
1304 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1305 { .name
= "rt_tgsigqueueinfo", .errmsg
= true,
1306 .arg_scnprintf
= { [2] = SCA_SIGNUM
, /* sig */ }, },
1307 { .name
= "seccomp", .errmsg
= true,
1308 .arg_scnprintf
= { [0] = SCA_SECCOMP_OP
, /* op */
1309 [1] = SCA_SECCOMP_FLAGS
, /* flags */ }, },
1310 { .name
= "select", .errmsg
= true, .timeout
= true, },
1311 { .name
= "sendmmsg", .errmsg
= true,
1312 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1313 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1314 { .name
= "sendmsg", .errmsg
= true,
1315 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1316 [2] = SCA_MSG_FLAGS
, /* flags */ }, },
1317 { .name
= "sendto", .errmsg
= true,
1318 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1319 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1320 { .name
= "setitimer", .errmsg
= true, STRARRAY(0, which
, itimers
), },
1321 { .name
= "setrlimit", .errmsg
= true, STRARRAY(0, resource
, rlimit_resources
), },
1322 { .name
= "setxattr", .errmsg
= true,
1323 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1324 { .name
= "shutdown", .errmsg
= true,
1325 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1326 { .name
= "socket", .errmsg
= true,
1327 .arg_scnprintf
= { [0] = SCA_STRARRAY
, /* family */
1328 [1] = SCA_SK_TYPE
, /* type */ },
1329 .arg_parm
= { [0] = &strarray__socket_families
, /* family */ }, },
1330 { .name
= "socketpair", .errmsg
= true,
1331 .arg_scnprintf
= { [0] = SCA_STRARRAY
, /* family */
1332 [1] = SCA_SK_TYPE
, /* type */ },
1333 .arg_parm
= { [0] = &strarray__socket_families
, /* family */ }, },
1334 { .name
= "stat", .errmsg
= true, .alias
= "newstat",
1335 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1336 { .name
= "statfs", .errmsg
= true,
1337 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1338 { .name
= "swapoff", .errmsg
= true,
1339 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* specialfile */ }, },
1340 { .name
= "swapon", .errmsg
= true,
1341 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* specialfile */ }, },
1342 { .name
= "symlinkat", .errmsg
= true,
1343 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1344 { .name
= "tgkill", .errmsg
= true,
1345 .arg_scnprintf
= { [2] = SCA_SIGNUM
, /* sig */ }, },
1346 { .name
= "tkill", .errmsg
= true,
1347 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1348 { .name
= "truncate", .errmsg
= true,
1349 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* path */ }, },
1350 { .name
= "uname", .errmsg
= true, .alias
= "newuname", },
1351 { .name
= "unlinkat", .errmsg
= true,
1352 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1353 [1] = SCA_FILENAME
, /* pathname */ }, },
1354 { .name
= "utime", .errmsg
= true,
1355 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1356 { .name
= "utimensat", .errmsg
= true,
1357 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dirfd */
1358 [1] = SCA_FILENAME
, /* filename */ }, },
1359 { .name
= "utimes", .errmsg
= true,
1360 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1361 { .name
= "vmsplice", .errmsg
= true,
1362 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1363 { .name
= "write", .errmsg
= true,
1364 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1365 { .name
= "writev", .errmsg
= true,
1366 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1369 static int syscall_fmt__cmp(const void *name
, const void *fmtp
)
1371 const struct syscall_fmt
*fmt
= fmtp
;
1372 return strcmp(name
, fmt
->name
);
1375 static struct syscall_fmt
*syscall_fmt__find(const char *name
)
1377 const int nmemb
= ARRAY_SIZE(syscall_fmts
);
1378 return bsearch(name
, syscall_fmts
, nmemb
, sizeof(struct syscall_fmt
), syscall_fmt__cmp
);
1382 struct event_format
*tp_format
;
1384 struct format_field
*args
;
1387 struct syscall_fmt
*fmt
;
1388 size_t (**arg_scnprintf
)(char *bf
, size_t size
, struct syscall_arg
*arg
);
1392 static size_t fprintf_duration(unsigned long t
, FILE *fp
)
1394 double duration
= (double)t
/ NSEC_PER_MSEC
;
1395 size_t printed
= fprintf(fp
, "(");
1397 if (duration
>= 1.0)
1398 printed
+= color_fprintf(fp
, PERF_COLOR_RED
, "%6.3f ms", duration
);
1399 else if (duration
>= 0.01)
1400 printed
+= color_fprintf(fp
, PERF_COLOR_YELLOW
, "%6.3f ms", duration
);
1402 printed
+= color_fprintf(fp
, PERF_COLOR_NORMAL
, "%6.3f ms", duration
);
1403 return printed
+ fprintf(fp
, "): ");
1407 * filename.ptr: The filename char pointer that will be vfs_getname'd
1408 * filename.entry_str_pos: Where to insert the string translated from
1409 * filename.ptr by the vfs_getname tracepoint/kprobe.
1411 struct thread_trace
{
1415 unsigned long nr_events
;
1416 unsigned long pfmaj
, pfmin
;
1421 short int entry_str_pos
;
1423 unsigned int namelen
;
1431 struct intlist
*syscall_stats
;
1434 static struct thread_trace
*thread_trace__new(void)
1436 struct thread_trace
*ttrace
= zalloc(sizeof(struct thread_trace
));
1439 ttrace
->paths
.max
= -1;
1441 ttrace
->syscall_stats
= intlist__new(NULL
);
1446 static struct thread_trace
*thread__trace(struct thread
*thread
, FILE *fp
)
1448 struct thread_trace
*ttrace
;
1453 if (thread__priv(thread
) == NULL
)
1454 thread__set_priv(thread
, thread_trace__new());
1456 if (thread__priv(thread
) == NULL
)
1459 ttrace
= thread__priv(thread
);
1460 ++ttrace
->nr_events
;
1464 color_fprintf(fp
, PERF_COLOR_RED
,
1465 "WARNING: not enough memory, dropping samples!\n");
1469 #define TRACE_PFMAJ (1 << 0)
1470 #define TRACE_PFMIN (1 << 1)
1472 static const size_t trace__entry_str_size
= 2048;
1475 struct perf_tool tool
;
1482 struct syscall
*table
;
1484 struct perf_evsel
*sys_enter
,
1488 struct record_opts opts
;
1489 struct perf_evlist
*evlist
;
1490 struct machine
*host
;
1491 struct thread
*current
;
1494 unsigned long nr_events
;
1495 struct strlist
*ev_qualifier
;
1500 struct intlist
*tid_list
;
1501 struct intlist
*pid_list
;
1506 double duration_filter
;
1512 bool not_ev_qualifier
;
1516 bool multiple_threads
;
1520 bool show_tool_stats
;
1521 bool trace_syscalls
;
1527 static int trace__set_fd_pathname(struct thread
*thread
, int fd
, const char *pathname
)
1529 struct thread_trace
*ttrace
= thread__priv(thread
);
1531 if (fd
> ttrace
->paths
.max
) {
1532 char **npath
= realloc(ttrace
->paths
.table
, (fd
+ 1) * sizeof(char *));
1537 if (ttrace
->paths
.max
!= -1) {
1538 memset(npath
+ ttrace
->paths
.max
+ 1, 0,
1539 (fd
- ttrace
->paths
.max
) * sizeof(char *));
1541 memset(npath
, 0, (fd
+ 1) * sizeof(char *));
1544 ttrace
->paths
.table
= npath
;
1545 ttrace
->paths
.max
= fd
;
1548 ttrace
->paths
.table
[fd
] = strdup(pathname
);
1550 return ttrace
->paths
.table
[fd
] != NULL
? 0 : -1;
1553 static int thread__read_fd_path(struct thread
*thread
, int fd
)
1555 char linkname
[PATH_MAX
], pathname
[PATH_MAX
];
1559 if (thread
->pid_
== thread
->tid
) {
1560 scnprintf(linkname
, sizeof(linkname
),
1561 "/proc/%d/fd/%d", thread
->pid_
, fd
);
1563 scnprintf(linkname
, sizeof(linkname
),
1564 "/proc/%d/task/%d/fd/%d", thread
->pid_
, thread
->tid
, fd
);
1567 if (lstat(linkname
, &st
) < 0 || st
.st_size
+ 1 > (off_t
)sizeof(pathname
))
1570 ret
= readlink(linkname
, pathname
, sizeof(pathname
));
1572 if (ret
< 0 || ret
> st
.st_size
)
1575 pathname
[ret
] = '\0';
1576 return trace__set_fd_pathname(thread
, fd
, pathname
);
1579 static const char *thread__fd_path(struct thread
*thread
, int fd
,
1580 struct trace
*trace
)
1582 struct thread_trace
*ttrace
= thread__priv(thread
);
1590 if ((fd
> ttrace
->paths
.max
|| ttrace
->paths
.table
[fd
] == NULL
)) {
1593 ++trace
->stats
.proc_getname
;
1594 if (thread__read_fd_path(thread
, fd
))
1598 return ttrace
->paths
.table
[fd
];
1601 static size_t syscall_arg__scnprintf_fd(char *bf
, size_t size
,
1602 struct syscall_arg
*arg
)
1605 size_t printed
= scnprintf(bf
, size
, "%d", fd
);
1606 const char *path
= thread__fd_path(arg
->thread
, fd
, arg
->trace
);
1609 printed
+= scnprintf(bf
+ printed
, size
- printed
, "<%s>", path
);
1614 static size_t syscall_arg__scnprintf_close_fd(char *bf
, size_t size
,
1615 struct syscall_arg
*arg
)
1618 size_t printed
= syscall_arg__scnprintf_fd(bf
, size
, arg
);
1619 struct thread_trace
*ttrace
= thread__priv(arg
->thread
);
1621 if (ttrace
&& fd
>= 0 && fd
<= ttrace
->paths
.max
)
1622 zfree(&ttrace
->paths
.table
[fd
]);
1627 static void thread__set_filename_pos(struct thread
*thread
, const char *bf
,
1630 struct thread_trace
*ttrace
= thread__priv(thread
);
1632 ttrace
->filename
.ptr
= ptr
;
1633 ttrace
->filename
.entry_str_pos
= bf
- ttrace
->entry_str
;
1636 static size_t syscall_arg__scnprintf_filename(char *bf
, size_t size
,
1637 struct syscall_arg
*arg
)
1639 unsigned long ptr
= arg
->val
;
1641 if (!arg
->trace
->vfs_getname
)
1642 return scnprintf(bf
, size
, "%#x", ptr
);
1644 thread__set_filename_pos(arg
->thread
, bf
, ptr
);
1648 static bool trace__filter_duration(struct trace
*trace
, double t
)
1650 return t
< (trace
->duration_filter
* NSEC_PER_MSEC
);
1653 static size_t trace__fprintf_tstamp(struct trace
*trace
, u64 tstamp
, FILE *fp
)
1655 double ts
= (double)(tstamp
- trace
->base_time
) / NSEC_PER_MSEC
;
1657 return fprintf(fp
, "%10.3f ", ts
);
1660 static bool done
= false;
1661 static bool interrupted
= false;
1663 static void sig_handler(int sig
)
1666 interrupted
= sig
== SIGINT
;
1669 static size_t trace__fprintf_entry_head(struct trace
*trace
, struct thread
*thread
,
1670 u64 duration
, u64 tstamp
, FILE *fp
)
1672 size_t printed
= trace__fprintf_tstamp(trace
, tstamp
, fp
);
1673 printed
+= fprintf_duration(duration
, fp
);
1675 if (trace
->multiple_threads
) {
1676 if (trace
->show_comm
)
1677 printed
+= fprintf(fp
, "%.14s/", thread__comm_str(thread
));
1678 printed
+= fprintf(fp
, "%d ", thread
->tid
);
1684 static int trace__process_event(struct trace
*trace
, struct machine
*machine
,
1685 union perf_event
*event
, struct perf_sample
*sample
)
1689 switch (event
->header
.type
) {
1690 case PERF_RECORD_LOST
:
1691 color_fprintf(trace
->output
, PERF_COLOR_RED
,
1692 "LOST %" PRIu64
" events!\n", event
->lost
.lost
);
1693 ret
= machine__process_lost_event(machine
, event
, sample
);
1696 ret
= machine__process_event(machine
, event
, sample
);
1703 static int trace__tool_process(struct perf_tool
*tool
,
1704 union perf_event
*event
,
1705 struct perf_sample
*sample
,
1706 struct machine
*machine
)
1708 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
1709 return trace__process_event(trace
, machine
, event
, sample
);
1712 static int trace__symbols_init(struct trace
*trace
, struct perf_evlist
*evlist
)
1714 int err
= symbol__init(NULL
);
1719 trace
->host
= machine__new_host();
1720 if (trace
->host
== NULL
)
1723 if (trace_event__register_resolver(trace
->host
, machine__resolve_kernel_addr
) < 0)
1726 err
= __machine__synthesize_threads(trace
->host
, &trace
->tool
, &trace
->opts
.target
,
1727 evlist
->threads
, trace__tool_process
, false,
1728 trace
->opts
.proc_map_timeout
);
1735 static int syscall__set_arg_fmts(struct syscall
*sc
)
1737 struct format_field
*field
;
1740 sc
->arg_scnprintf
= calloc(sc
->nr_args
, sizeof(void *));
1741 if (sc
->arg_scnprintf
== NULL
)
1745 sc
->arg_parm
= sc
->fmt
->arg_parm
;
1747 for (field
= sc
->args
; field
; field
= field
->next
) {
1748 if (sc
->fmt
&& sc
->fmt
->arg_scnprintf
[idx
])
1749 sc
->arg_scnprintf
[idx
] = sc
->fmt
->arg_scnprintf
[idx
];
1750 else if (field
->flags
& FIELD_IS_POINTER
)
1751 sc
->arg_scnprintf
[idx
] = syscall_arg__scnprintf_hex
;
1758 static int trace__read_syscall_info(struct trace
*trace
, int id
)
1762 const char *name
= audit_syscall_to_name(id
, trace
->audit
.machine
);
1767 if (id
> trace
->syscalls
.max
) {
1768 struct syscall
*nsyscalls
= realloc(trace
->syscalls
.table
, (id
+ 1) * sizeof(*sc
));
1770 if (nsyscalls
== NULL
)
1773 if (trace
->syscalls
.max
!= -1) {
1774 memset(nsyscalls
+ trace
->syscalls
.max
+ 1, 0,
1775 (id
- trace
->syscalls
.max
) * sizeof(*sc
));
1777 memset(nsyscalls
, 0, (id
+ 1) * sizeof(*sc
));
1780 trace
->syscalls
.table
= nsyscalls
;
1781 trace
->syscalls
.max
= id
;
1784 sc
= trace
->syscalls
.table
+ id
;
1787 sc
->fmt
= syscall_fmt__find(sc
->name
);
1789 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->name
);
1790 sc
->tp_format
= trace_event__tp_format("syscalls", tp_name
);
1792 if (IS_ERR(sc
->tp_format
) && sc
->fmt
&& sc
->fmt
->alias
) {
1793 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->fmt
->alias
);
1794 sc
->tp_format
= trace_event__tp_format("syscalls", tp_name
);
1797 if (IS_ERR(sc
->tp_format
))
1800 sc
->args
= sc
->tp_format
->format
.fields
;
1801 sc
->nr_args
= sc
->tp_format
->format
.nr_fields
;
1803 * We need to check and discard the first variable '__syscall_nr'
1804 * or 'nr' that mean the syscall number. It is needless here.
1805 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1807 if (sc
->args
&& (!strcmp(sc
->args
->name
, "__syscall_nr") || !strcmp(sc
->args
->name
, "nr"))) {
1808 sc
->args
= sc
->args
->next
;
1812 sc
->is_exit
= !strcmp(name
, "exit_group") || !strcmp(name
, "exit");
1814 return syscall__set_arg_fmts(sc
);
1817 static int trace__validate_ev_qualifier(struct trace
*trace
)
1820 struct str_node
*pos
;
1822 trace
->ev_qualifier_ids
.nr
= strlist__nr_entries(trace
->ev_qualifier
);
1823 trace
->ev_qualifier_ids
.entries
= malloc(trace
->ev_qualifier_ids
.nr
*
1824 sizeof(trace
->ev_qualifier_ids
.entries
[0]));
1826 if (trace
->ev_qualifier_ids
.entries
== NULL
) {
1827 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1835 strlist__for_each(pos
, trace
->ev_qualifier
) {
1836 const char *sc
= pos
->s
;
1837 int id
= audit_name_to_syscall(sc
, trace
->audit
.machine
);
1841 fputs("Error:\tInvalid syscall ", trace
->output
);
1844 fputs(", ", trace
->output
);
1847 fputs(sc
, trace
->output
);
1850 trace
->ev_qualifier_ids
.entries
[i
++] = id
;
1854 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1855 "\nHint:\tand: 'man syscalls'\n", trace
->output
);
1856 zfree(&trace
->ev_qualifier_ids
.entries
);
1857 trace
->ev_qualifier_ids
.nr
= 0;
1864 * args is to be interpreted as a series of longs but we need to handle
1865 * 8-byte unaligned accesses. args points to raw_data within the event
1866 * and raw_data is guaranteed to be 8-byte unaligned because it is
1867 * preceded by raw_size which is a u32. So we need to copy args to a temp
1868 * variable to read it. Most notably this avoids extended load instructions
1869 * on unaligned addresses
1872 static size_t syscall__scnprintf_args(struct syscall
*sc
, char *bf
, size_t size
,
1873 unsigned char *args
, struct trace
*trace
,
1874 struct thread
*thread
)
1880 if (sc
->args
!= NULL
) {
1881 struct format_field
*field
;
1883 struct syscall_arg arg
= {
1890 for (field
= sc
->args
; field
;
1891 field
= field
->next
, ++arg
.idx
, bit
<<= 1) {
1895 /* special care for unaligned accesses */
1896 p
= args
+ sizeof(unsigned long) * arg
.idx
;
1897 memcpy(&val
, p
, sizeof(val
));
1900 * Suppress this argument if its value is zero and
1901 * and we don't have a string associated in an
1905 !(sc
->arg_scnprintf
&&
1906 sc
->arg_scnprintf
[arg
.idx
] == SCA_STRARRAY
&&
1907 sc
->arg_parm
[arg
.idx
]))
1910 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1911 "%s%s: ", printed
? ", " : "", field
->name
);
1912 if (sc
->arg_scnprintf
&& sc
->arg_scnprintf
[arg
.idx
]) {
1915 arg
.parm
= sc
->arg_parm
[arg
.idx
];
1916 printed
+= sc
->arg_scnprintf
[arg
.idx
](bf
+ printed
,
1917 size
- printed
, &arg
);
1919 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1927 /* special care for unaligned accesses */
1928 p
= args
+ sizeof(unsigned long) * i
;
1929 memcpy(&val
, p
, sizeof(val
));
1930 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1932 printed
? ", " : "", i
, val
);
1940 typedef int (*tracepoint_handler
)(struct trace
*trace
, struct perf_evsel
*evsel
,
1941 union perf_event
*event
,
1942 struct perf_sample
*sample
);
1944 static struct syscall
*trace__syscall_info(struct trace
*trace
,
1945 struct perf_evsel
*evsel
, int id
)
1951 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1952 * before that, leaving at a higher verbosity level till that is
1953 * explained. Reproduced with plain ftrace with:
1955 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1956 * grep "NR -1 " /t/trace_pipe
1958 * After generating some load on the machine.
1962 fprintf(trace
->output
, "Invalid syscall %d id, skipping (%s, %" PRIu64
") ...\n",
1963 id
, perf_evsel__name(evsel
), ++n
);
1968 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
) &&
1969 trace__read_syscall_info(trace
, id
))
1972 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
))
1975 return &trace
->syscalls
.table
[id
];
1979 fprintf(trace
->output
, "Problems reading syscall %d", id
);
1980 if (id
<= trace
->syscalls
.max
&& trace
->syscalls
.table
[id
].name
!= NULL
)
1981 fprintf(trace
->output
, "(%s)", trace
->syscalls
.table
[id
].name
);
1982 fputs(" information\n", trace
->output
);
1987 static void thread__update_stats(struct thread_trace
*ttrace
,
1988 int id
, struct perf_sample
*sample
)
1990 struct int_node
*inode
;
1991 struct stats
*stats
;
1994 inode
= intlist__findnew(ttrace
->syscall_stats
, id
);
1998 stats
= inode
->priv
;
1999 if (stats
== NULL
) {
2000 stats
= malloc(sizeof(struct stats
));
2004 inode
->priv
= stats
;
2007 if (ttrace
->entry_time
&& sample
->time
> ttrace
->entry_time
)
2008 duration
= sample
->time
- ttrace
->entry_time
;
2010 update_stats(stats
, duration
);
2013 static int trace__printf_interrupted_entry(struct trace
*trace
, struct perf_sample
*sample
)
2015 struct thread_trace
*ttrace
;
2019 if (trace
->current
== NULL
)
2022 ttrace
= thread__priv(trace
->current
);
2024 if (!ttrace
->entry_pending
)
2027 duration
= sample
->time
- ttrace
->entry_time
;
2029 printed
= trace__fprintf_entry_head(trace
, trace
->current
, duration
, sample
->time
, trace
->output
);
2030 printed
+= fprintf(trace
->output
, "%-70s) ...\n", ttrace
->entry_str
);
2031 ttrace
->entry_pending
= false;
2036 static int trace__sys_enter(struct trace
*trace
, struct perf_evsel
*evsel
,
2037 union perf_event
*event __maybe_unused
,
2038 struct perf_sample
*sample
)
2043 struct thread
*thread
;
2044 int id
= perf_evsel__sc_tp_uint(evsel
, id
, sample
), err
= -1;
2045 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, id
);
2046 struct thread_trace
*ttrace
;
2051 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
2052 ttrace
= thread__trace(thread
, trace
->output
);
2056 args
= perf_evsel__sc_tp_ptr(evsel
, args
, sample
);
2058 if (ttrace
->entry_str
== NULL
) {
2059 ttrace
->entry_str
= malloc(trace__entry_str_size
);
2060 if (!ttrace
->entry_str
)
2064 if (!trace
->summary_only
)
2065 trace__printf_interrupted_entry(trace
, sample
);
2067 ttrace
->entry_time
= sample
->time
;
2068 msg
= ttrace
->entry_str
;
2069 printed
+= scnprintf(msg
+ printed
, trace__entry_str_size
- printed
, "%s(", sc
->name
);
2071 printed
+= syscall__scnprintf_args(sc
, msg
+ printed
, trace__entry_str_size
- printed
,
2072 args
, trace
, thread
);
2075 if (!trace
->duration_filter
&& !trace
->summary_only
) {
2076 trace__fprintf_entry_head(trace
, thread
, 1, sample
->time
, trace
->output
);
2077 fprintf(trace
->output
, "%-70s\n", ttrace
->entry_str
);
2080 ttrace
->entry_pending
= true;
2081 /* See trace__vfs_getname & trace__sys_exit */
2082 ttrace
->filename
.pending_open
= false;
2085 if (trace
->current
!= thread
) {
2086 thread__put(trace
->current
);
2087 trace
->current
= thread__get(thread
);
2091 thread__put(thread
);
2095 static int trace__sys_exit(struct trace
*trace
, struct perf_evsel
*evsel
,
2096 union perf_event
*event __maybe_unused
,
2097 struct perf_sample
*sample
)
2101 struct thread
*thread
;
2102 int id
= perf_evsel__sc_tp_uint(evsel
, id
, sample
), err
= -1;
2103 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, id
);
2104 struct thread_trace
*ttrace
;
2109 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
2110 ttrace
= thread__trace(thread
, trace
->output
);
2115 thread__update_stats(ttrace
, id
, sample
);
2117 ret
= perf_evsel__sc_tp_uint(evsel
, ret
, sample
);
2119 if (id
== trace
->audit
.open_id
&& ret
>= 0 && ttrace
->filename
.pending_open
) {
2120 trace__set_fd_pathname(thread
, ret
, ttrace
->filename
.name
);
2121 ttrace
->filename
.pending_open
= false;
2122 ++trace
->stats
.vfs_getname
;
2125 ttrace
->exit_time
= sample
->time
;
2127 if (ttrace
->entry_time
) {
2128 duration
= sample
->time
- ttrace
->entry_time
;
2129 if (trace__filter_duration(trace
, duration
))
2131 } else if (trace
->duration_filter
)
2134 if (trace
->summary_only
)
2137 trace__fprintf_entry_head(trace
, thread
, duration
, sample
->time
, trace
->output
);
2139 if (ttrace
->entry_pending
) {
2140 fprintf(trace
->output
, "%-70s", ttrace
->entry_str
);
2142 fprintf(trace
->output
, " ... [");
2143 color_fprintf(trace
->output
, PERF_COLOR_YELLOW
, "continued");
2144 fprintf(trace
->output
, "]: %s()", sc
->name
);
2147 if (sc
->fmt
== NULL
) {
2149 fprintf(trace
->output
, ") = %ld", ret
);
2150 } else if (ret
< 0 && sc
->fmt
->errmsg
) {
2151 char bf
[STRERR_BUFSIZE
];
2152 const char *emsg
= strerror_r(-ret
, bf
, sizeof(bf
)),
2153 *e
= audit_errno_to_name(-ret
);
2155 fprintf(trace
->output
, ") = -1 %s %s", e
, emsg
);
2156 } else if (ret
== 0 && sc
->fmt
->timeout
)
2157 fprintf(trace
->output
, ") = 0 Timeout");
2158 else if (sc
->fmt
->hexret
)
2159 fprintf(trace
->output
, ") = %#lx", ret
);
2163 fputc('\n', trace
->output
);
2165 ttrace
->entry_pending
= false;
2168 thread__put(thread
);
2172 static int trace__vfs_getname(struct trace
*trace
, struct perf_evsel
*evsel
,
2173 union perf_event
*event __maybe_unused
,
2174 struct perf_sample
*sample
)
2176 struct thread
*thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
2177 struct thread_trace
*ttrace
;
2178 size_t filename_len
, entry_str_len
, to_move
;
2179 ssize_t remaining_space
;
2181 const char *filename
= perf_evsel__rawptr(evsel
, sample
, "pathname");
2186 ttrace
= thread__priv(thread
);
2190 filename_len
= strlen(filename
);
2192 if (ttrace
->filename
.namelen
< filename_len
) {
2193 char *f
= realloc(ttrace
->filename
.name
, filename_len
+ 1);
2198 ttrace
->filename
.namelen
= filename_len
;
2199 ttrace
->filename
.name
= f
;
2202 strcpy(ttrace
->filename
.name
, filename
);
2203 ttrace
->filename
.pending_open
= true;
2205 if (!ttrace
->filename
.ptr
)
2208 entry_str_len
= strlen(ttrace
->entry_str
);
2209 remaining_space
= trace__entry_str_size
- entry_str_len
- 1; /* \0 */
2210 if (remaining_space
<= 0)
2213 if (filename_len
> (size_t)remaining_space
) {
2214 filename
+= filename_len
- remaining_space
;
2215 filename_len
= remaining_space
;
2218 to_move
= entry_str_len
- ttrace
->filename
.entry_str_pos
+ 1; /* \0 */
2219 pos
= ttrace
->entry_str
+ ttrace
->filename
.entry_str_pos
;
2220 memmove(pos
+ filename_len
, pos
, to_move
);
2221 memcpy(pos
, filename
, filename_len
);
2223 ttrace
->filename
.ptr
= 0;
2224 ttrace
->filename
.entry_str_pos
= 0;
2229 static int trace__sched_stat_runtime(struct trace
*trace
, struct perf_evsel
*evsel
,
2230 union perf_event
*event __maybe_unused
,
2231 struct perf_sample
*sample
)
2233 u64 runtime
= perf_evsel__intval(evsel
, sample
, "runtime");
2234 double runtime_ms
= (double)runtime
/ NSEC_PER_MSEC
;
2235 struct thread
*thread
= machine__findnew_thread(trace
->host
,
2238 struct thread_trace
*ttrace
= thread__trace(thread
, trace
->output
);
2243 ttrace
->runtime_ms
+= runtime_ms
;
2244 trace
->runtime_ms
+= runtime_ms
;
2245 thread__put(thread
);
2249 fprintf(trace
->output
, "%s: comm=%s,pid=%u,runtime=%" PRIu64
",vruntime=%" PRIu64
")\n",
2251 perf_evsel__strval(evsel
, sample
, "comm"),
2252 (pid_t
)perf_evsel__intval(evsel
, sample
, "pid"),
2254 perf_evsel__intval(evsel
, sample
, "vruntime"));
2255 thread__put(thread
);
2259 static void bpf_output__printer(enum binary_printer_ops op
,
2260 unsigned int val
, void *extra
)
2262 FILE *output
= extra
;
2263 unsigned char ch
= (unsigned char)val
;
2266 case BINARY_PRINT_CHAR_DATA
:
2267 fprintf(output
, "%c", isprint(ch
) ? ch
: '.');
2269 case BINARY_PRINT_DATA_BEGIN
:
2270 case BINARY_PRINT_LINE_BEGIN
:
2271 case BINARY_PRINT_ADDR
:
2272 case BINARY_PRINT_NUM_DATA
:
2273 case BINARY_PRINT_NUM_PAD
:
2274 case BINARY_PRINT_SEP
:
2275 case BINARY_PRINT_CHAR_PAD
:
2276 case BINARY_PRINT_LINE_END
:
2277 case BINARY_PRINT_DATA_END
:
2283 static void bpf_output__fprintf(struct trace
*trace
,
2284 struct perf_sample
*sample
)
2286 print_binary(sample
->raw_data
, sample
->raw_size
, 8,
2287 bpf_output__printer
, trace
->output
);
2290 static int trace__event_handler(struct trace
*trace
, struct perf_evsel
*evsel
,
2291 union perf_event
*event __maybe_unused
,
2292 struct perf_sample
*sample
)
2294 trace__printf_interrupted_entry(trace
, sample
);
2295 trace__fprintf_tstamp(trace
, sample
->time
, trace
->output
);
2297 if (trace
->trace_syscalls
)
2298 fprintf(trace
->output
, "( ): ");
2300 fprintf(trace
->output
, "%s:", evsel
->name
);
2302 if (perf_evsel__is_bpf_output(evsel
)) {
2303 bpf_output__fprintf(trace
, sample
);
2304 } else if (evsel
->tp_format
) {
2305 event_format__fprintf(evsel
->tp_format
, sample
->cpu
,
2306 sample
->raw_data
, sample
->raw_size
,
2310 fprintf(trace
->output
, ")\n");
2314 static void print_location(FILE *f
, struct perf_sample
*sample
,
2315 struct addr_location
*al
,
2316 bool print_dso
, bool print_sym
)
2319 if ((verbose
|| print_dso
) && al
->map
)
2320 fprintf(f
, "%s@", al
->map
->dso
->long_name
);
2322 if ((verbose
|| print_sym
) && al
->sym
)
2323 fprintf(f
, "%s+0x%" PRIx64
, al
->sym
->name
,
2324 al
->addr
- al
->sym
->start
);
2326 fprintf(f
, "0x%" PRIx64
, al
->addr
);
2328 fprintf(f
, "0x%" PRIx64
, sample
->addr
);
2331 static int trace__pgfault(struct trace
*trace
,
2332 struct perf_evsel
*evsel
,
2333 union perf_event
*event __maybe_unused
,
2334 struct perf_sample
*sample
)
2336 struct thread
*thread
;
2337 struct addr_location al
;
2338 char map_type
= 'd';
2339 struct thread_trace
*ttrace
;
2342 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
2343 ttrace
= thread__trace(thread
, trace
->output
);
2347 if (evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
)
2352 if (trace
->summary_only
)
2355 thread__find_addr_location(thread
, sample
->cpumode
, MAP__FUNCTION
,
2358 trace__fprintf_entry_head(trace
, thread
, 0, sample
->time
, trace
->output
);
2360 fprintf(trace
->output
, "%sfault [",
2361 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
?
2364 print_location(trace
->output
, sample
, &al
, false, true);
2366 fprintf(trace
->output
, "] => ");
2368 thread__find_addr_location(thread
, sample
->cpumode
, MAP__VARIABLE
,
2372 thread__find_addr_location(thread
, sample
->cpumode
,
2373 MAP__FUNCTION
, sample
->addr
, &al
);
2381 print_location(trace
->output
, sample
, &al
, true, false);
2383 fprintf(trace
->output
, " (%c%c)\n", map_type
, al
.level
);
2387 thread__put(thread
);
2391 static bool skip_sample(struct trace
*trace
, struct perf_sample
*sample
)
2393 if ((trace
->pid_list
&& intlist__find(trace
->pid_list
, sample
->pid
)) ||
2394 (trace
->tid_list
&& intlist__find(trace
->tid_list
, sample
->tid
)))
2397 if (trace
->pid_list
|| trace
->tid_list
)
2403 static void trace__set_base_time(struct trace
*trace
,
2404 struct perf_evsel
*evsel
,
2405 struct perf_sample
*sample
)
2408 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2409 * and don't use sample->time unconditionally, we may end up having
2410 * some other event in the future without PERF_SAMPLE_TIME for good
2411 * reason, i.e. we may not be interested in its timestamps, just in
2412 * it taking place, picking some piece of information when it
2413 * appears in our event stream (vfs_getname comes to mind).
2415 if (trace
->base_time
== 0 && !trace
->full_time
&&
2416 (evsel
->attr
.sample_type
& PERF_SAMPLE_TIME
))
2417 trace
->base_time
= sample
->time
;
2420 static int trace__process_sample(struct perf_tool
*tool
,
2421 union perf_event
*event
,
2422 struct perf_sample
*sample
,
2423 struct perf_evsel
*evsel
,
2424 struct machine
*machine __maybe_unused
)
2426 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
2429 tracepoint_handler handler
= evsel
->handler
;
2431 if (skip_sample(trace
, sample
))
2434 trace__set_base_time(trace
, evsel
, sample
);
2438 handler(trace
, evsel
, event
, sample
);
2444 static int parse_target_str(struct trace
*trace
)
2446 if (trace
->opts
.target
.pid
) {
2447 trace
->pid_list
= intlist__new(trace
->opts
.target
.pid
);
2448 if (trace
->pid_list
== NULL
) {
2449 pr_err("Error parsing process id string\n");
2454 if (trace
->opts
.target
.tid
) {
2455 trace
->tid_list
= intlist__new(trace
->opts
.target
.tid
);
2456 if (trace
->tid_list
== NULL
) {
2457 pr_err("Error parsing thread id string\n");
2465 static int trace__record(struct trace
*trace
, int argc
, const char **argv
)
2467 unsigned int rec_argc
, i
, j
;
2468 const char **rec_argv
;
2469 const char * const record_args
[] = {
2476 const char * const sc_args
[] = { "-e", };
2477 unsigned int sc_args_nr
= ARRAY_SIZE(sc_args
);
2478 const char * const majpf_args
[] = { "-e", "major-faults" };
2479 unsigned int majpf_args_nr
= ARRAY_SIZE(majpf_args
);
2480 const char * const minpf_args
[] = { "-e", "minor-faults" };
2481 unsigned int minpf_args_nr
= ARRAY_SIZE(minpf_args
);
2483 /* +1 is for the event string below */
2484 rec_argc
= ARRAY_SIZE(record_args
) + sc_args_nr
+ 1 +
2485 majpf_args_nr
+ minpf_args_nr
+ argc
;
2486 rec_argv
= calloc(rec_argc
+ 1, sizeof(char *));
2488 if (rec_argv
== NULL
)
2492 for (i
= 0; i
< ARRAY_SIZE(record_args
); i
++)
2493 rec_argv
[j
++] = record_args
[i
];
2495 if (trace
->trace_syscalls
) {
2496 for (i
= 0; i
< sc_args_nr
; i
++)
2497 rec_argv
[j
++] = sc_args
[i
];
2499 /* event string may be different for older kernels - e.g., RHEL6 */
2500 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2501 rec_argv
[j
++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2502 else if (is_valid_tracepoint("syscalls:sys_enter"))
2503 rec_argv
[j
++] = "syscalls:sys_enter,syscalls:sys_exit";
2505 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2510 if (trace
->trace_pgfaults
& TRACE_PFMAJ
)
2511 for (i
= 0; i
< majpf_args_nr
; i
++)
2512 rec_argv
[j
++] = majpf_args
[i
];
2514 if (trace
->trace_pgfaults
& TRACE_PFMIN
)
2515 for (i
= 0; i
< minpf_args_nr
; i
++)
2516 rec_argv
[j
++] = minpf_args
[i
];
2518 for (i
= 0; i
< (unsigned int)argc
; i
++)
2519 rec_argv
[j
++] = argv
[i
];
2521 return cmd_record(j
, rec_argv
, NULL
);
2524 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
);
2526 static bool perf_evlist__add_vfs_getname(struct perf_evlist
*evlist
)
2528 struct perf_evsel
*evsel
= perf_evsel__newtp("probe", "vfs_getname");
2533 if (perf_evsel__field(evsel
, "pathname") == NULL
) {
2534 perf_evsel__delete(evsel
);
2538 evsel
->handler
= trace__vfs_getname
;
2539 perf_evlist__add(evlist
, evsel
);
2543 static int perf_evlist__add_pgfault(struct perf_evlist
*evlist
,
2546 struct perf_evsel
*evsel
;
2547 struct perf_event_attr attr
= {
2548 .type
= PERF_TYPE_SOFTWARE
,
2552 attr
.config
= config
;
2553 attr
.sample_period
= 1;
2555 event_attr_init(&attr
);
2557 evsel
= perf_evsel__new(&attr
);
2561 evsel
->handler
= trace__pgfault
;
2562 perf_evlist__add(evlist
, evsel
);
2567 static void trace__handle_event(struct trace
*trace
, union perf_event
*event
, struct perf_sample
*sample
)
2569 const u32 type
= event
->header
.type
;
2570 struct perf_evsel
*evsel
;
2572 if (type
!= PERF_RECORD_SAMPLE
) {
2573 trace__process_event(trace
, trace
->host
, event
, sample
);
2577 evsel
= perf_evlist__id2evsel(trace
->evlist
, sample
->id
);
2578 if (evsel
== NULL
) {
2579 fprintf(trace
->output
, "Unknown tp ID %" PRIu64
", skipping...\n", sample
->id
);
2583 trace__set_base_time(trace
, evsel
, sample
);
2585 if (evsel
->attr
.type
== PERF_TYPE_TRACEPOINT
&&
2586 sample
->raw_data
== NULL
) {
2587 fprintf(trace
->output
, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2588 perf_evsel__name(evsel
), sample
->tid
,
2589 sample
->cpu
, sample
->raw_size
);
2591 tracepoint_handler handler
= evsel
->handler
;
2592 handler(trace
, evsel
, event
, sample
);
2596 static int trace__add_syscall_newtp(struct trace
*trace
)
2599 struct perf_evlist
*evlist
= trace
->evlist
;
2600 struct perf_evsel
*sys_enter
, *sys_exit
;
2602 sys_enter
= perf_evsel__syscall_newtp("sys_enter", trace__sys_enter
);
2603 if (sys_enter
== NULL
)
2606 if (perf_evsel__init_sc_tp_ptr_field(sys_enter
, args
))
2607 goto out_delete_sys_enter
;
2609 sys_exit
= perf_evsel__syscall_newtp("sys_exit", trace__sys_exit
);
2610 if (sys_exit
== NULL
)
2611 goto out_delete_sys_enter
;
2613 if (perf_evsel__init_sc_tp_uint_field(sys_exit
, ret
))
2614 goto out_delete_sys_exit
;
2616 perf_evlist__add(evlist
, sys_enter
);
2617 perf_evlist__add(evlist
, sys_exit
);
2619 trace
->syscalls
.events
.sys_enter
= sys_enter
;
2620 trace
->syscalls
.events
.sys_exit
= sys_exit
;
2626 out_delete_sys_exit
:
2627 perf_evsel__delete_priv(sys_exit
);
2628 out_delete_sys_enter
:
2629 perf_evsel__delete_priv(sys_enter
);
2633 static int trace__set_ev_qualifier_filter(struct trace
*trace
)
2636 char *filter
= asprintf_expr_inout_ints("id", !trace
->not_ev_qualifier
,
2637 trace
->ev_qualifier_ids
.nr
,
2638 trace
->ev_qualifier_ids
.entries
);
2643 if (!perf_evsel__append_filter(trace
->syscalls
.events
.sys_enter
, "&&", filter
))
2644 err
= perf_evsel__append_filter(trace
->syscalls
.events
.sys_exit
, "&&", filter
);
2654 static int trace__run(struct trace
*trace
, int argc
, const char **argv
)
2656 struct perf_evlist
*evlist
= trace
->evlist
;
2657 struct perf_evsel
*evsel
;
2659 unsigned long before
;
2660 const bool forks
= argc
> 0;
2661 bool draining
= false;
2665 if (trace
->trace_syscalls
&& trace__add_syscall_newtp(trace
))
2666 goto out_error_raw_syscalls
;
2668 if (trace
->trace_syscalls
)
2669 trace
->vfs_getname
= perf_evlist__add_vfs_getname(evlist
);
2671 if ((trace
->trace_pgfaults
& TRACE_PFMAJ
) &&
2672 perf_evlist__add_pgfault(evlist
, PERF_COUNT_SW_PAGE_FAULTS_MAJ
)) {
2676 if ((trace
->trace_pgfaults
& TRACE_PFMIN
) &&
2677 perf_evlist__add_pgfault(evlist
, PERF_COUNT_SW_PAGE_FAULTS_MIN
))
2681 perf_evlist__add_newtp(evlist
, "sched", "sched_stat_runtime",
2682 trace__sched_stat_runtime
))
2683 goto out_error_sched_stat_runtime
;
2685 err
= perf_evlist__create_maps(evlist
, &trace
->opts
.target
);
2687 fprintf(trace
->output
, "Problems parsing the target to trace, check your options!\n");
2688 goto out_delete_evlist
;
2691 err
= trace__symbols_init(trace
, evlist
);
2693 fprintf(trace
->output
, "Problems initializing symbol libraries!\n");
2694 goto out_delete_evlist
;
2697 perf_evlist__config(evlist
, &trace
->opts
);
2699 signal(SIGCHLD
, sig_handler
);
2700 signal(SIGINT
, sig_handler
);
2703 err
= perf_evlist__prepare_workload(evlist
, &trace
->opts
.target
,
2706 fprintf(trace
->output
, "Couldn't run the workload!\n");
2707 goto out_delete_evlist
;
2711 err
= perf_evlist__open(evlist
);
2713 goto out_error_open
;
2715 err
= bpf__apply_obj_config();
2717 char errbuf
[BUFSIZ
];
2719 bpf__strerror_apply_obj_config(err
, errbuf
, sizeof(errbuf
));
2720 pr_err("ERROR: Apply config to BPF failed: %s\n",
2722 goto out_error_open
;
2726 * Better not use !target__has_task() here because we need to cover the
2727 * case where no threads were specified in the command line, but a
2728 * workload was, and in that case we will fill in the thread_map when
2729 * we fork the workload in perf_evlist__prepare_workload.
2731 if (trace
->filter_pids
.nr
> 0)
2732 err
= perf_evlist__set_filter_pids(evlist
, trace
->filter_pids
.nr
, trace
->filter_pids
.entries
);
2733 else if (thread_map__pid(evlist
->threads
, 0) == -1)
2734 err
= perf_evlist__set_filter_pid(evlist
, getpid());
2739 if (trace
->ev_qualifier_ids
.nr
> 0) {
2740 err
= trace__set_ev_qualifier_filter(trace
);
2744 pr_debug("event qualifier tracepoint filter: %s\n",
2745 trace
->syscalls
.events
.sys_exit
->filter
);
2748 err
= perf_evlist__apply_filters(evlist
, &evsel
);
2750 goto out_error_apply_filters
;
2752 err
= perf_evlist__mmap(evlist
, trace
->opts
.mmap_pages
, false);
2754 goto out_error_mmap
;
2756 if (!target__none(&trace
->opts
.target
))
2757 perf_evlist__enable(evlist
);
2760 perf_evlist__start_workload(evlist
);
2762 trace
->multiple_threads
= thread_map__pid(evlist
->threads
, 0) == -1 ||
2763 evlist
->threads
->nr
> 1 ||
2764 perf_evlist__first(evlist
)->attr
.inherit
;
2766 before
= trace
->nr_events
;
2768 for (i
= 0; i
< evlist
->nr_mmaps
; i
++) {
2769 union perf_event
*event
;
2771 while ((event
= perf_evlist__mmap_read(evlist
, i
)) != NULL
) {
2772 struct perf_sample sample
;
2776 err
= perf_evlist__parse_sample(evlist
, event
, &sample
);
2778 fprintf(trace
->output
, "Can't parse sample, err = %d, skipping...\n", err
);
2782 trace__handle_event(trace
, event
, &sample
);
2784 perf_evlist__mmap_consume(evlist
, i
);
2789 if (done
&& !draining
) {
2790 perf_evlist__disable(evlist
);
2796 if (trace
->nr_events
== before
) {
2797 int timeout
= done
? 100 : -1;
2799 if (!draining
&& perf_evlist__poll(evlist
, timeout
) > 0) {
2800 if (perf_evlist__filter_pollfd(evlist
, POLLERR
| POLLHUP
) == 0)
2810 thread__zput(trace
->current
);
2812 perf_evlist__disable(evlist
);
2816 trace__fprintf_thread_summary(trace
, trace
->output
);
2818 if (trace
->show_tool_stats
) {
2819 fprintf(trace
->output
, "Stats:\n "
2820 " vfs_getname : %" PRIu64
"\n"
2821 " proc_getname: %" PRIu64
"\n",
2822 trace
->stats
.vfs_getname
,
2823 trace
->stats
.proc_getname
);
2828 perf_evlist__delete(evlist
);
2829 trace
->evlist
= NULL
;
2830 trace
->live
= false;
2833 char errbuf
[BUFSIZ
];
2835 out_error_sched_stat_runtime
:
2836 tracing_path__strerror_open_tp(errno
, errbuf
, sizeof(errbuf
), "sched", "sched_stat_runtime");
2839 out_error_raw_syscalls
:
2840 tracing_path__strerror_open_tp(errno
, errbuf
, sizeof(errbuf
), "raw_syscalls", "sys_(enter|exit)");
2844 perf_evlist__strerror_mmap(evlist
, errno
, errbuf
, sizeof(errbuf
));
2848 perf_evlist__strerror_open(evlist
, errno
, errbuf
, sizeof(errbuf
));
2851 fprintf(trace
->output
, "%s\n", errbuf
);
2852 goto out_delete_evlist
;
2854 out_error_apply_filters
:
2855 fprintf(trace
->output
,
2856 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2857 evsel
->filter
, perf_evsel__name(evsel
), errno
,
2858 strerror_r(errno
, errbuf
, sizeof(errbuf
)));
2859 goto out_delete_evlist
;
2862 fprintf(trace
->output
, "Not enough memory to run!\n");
2863 goto out_delete_evlist
;
2866 fprintf(trace
->output
, "errno=%d,%s\n", errno
, strerror(errno
));
2867 goto out_delete_evlist
;
2870 static int trace__replay(struct trace
*trace
)
2872 const struct perf_evsel_str_handler handlers
[] = {
2873 { "probe:vfs_getname", trace__vfs_getname
, },
2875 struct perf_data_file file
= {
2877 .mode
= PERF_DATA_MODE_READ
,
2878 .force
= trace
->force
,
2880 struct perf_session
*session
;
2881 struct perf_evsel
*evsel
;
2884 trace
->tool
.sample
= trace__process_sample
;
2885 trace
->tool
.mmap
= perf_event__process_mmap
;
2886 trace
->tool
.mmap2
= perf_event__process_mmap2
;
2887 trace
->tool
.comm
= perf_event__process_comm
;
2888 trace
->tool
.exit
= perf_event__process_exit
;
2889 trace
->tool
.fork
= perf_event__process_fork
;
2890 trace
->tool
.attr
= perf_event__process_attr
;
2891 trace
->tool
.tracing_data
= perf_event__process_tracing_data
;
2892 trace
->tool
.build_id
= perf_event__process_build_id
;
2894 trace
->tool
.ordered_events
= true;
2895 trace
->tool
.ordering_requires_timestamps
= true;
2897 /* add tid to output */
2898 trace
->multiple_threads
= true;
2900 session
= perf_session__new(&file
, false, &trace
->tool
);
2901 if (session
== NULL
)
2904 if (symbol__init(&session
->header
.env
) < 0)
2907 trace
->host
= &session
->machines
.host
;
2909 err
= perf_session__set_tracepoints_handlers(session
, handlers
);
2913 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2914 "raw_syscalls:sys_enter");
2915 /* older kernels have syscalls tp versus raw_syscalls */
2917 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2918 "syscalls:sys_enter");
2921 (perf_evsel__init_syscall_tp(evsel
, trace__sys_enter
) < 0 ||
2922 perf_evsel__init_sc_tp_ptr_field(evsel
, args
))) {
2923 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2927 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2928 "raw_syscalls:sys_exit");
2930 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2931 "syscalls:sys_exit");
2933 (perf_evsel__init_syscall_tp(evsel
, trace__sys_exit
) < 0 ||
2934 perf_evsel__init_sc_tp_uint_field(evsel
, ret
))) {
2935 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2939 evlist__for_each(session
->evlist
, evsel
) {
2940 if (evsel
->attr
.type
== PERF_TYPE_SOFTWARE
&&
2941 (evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
||
2942 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MIN
||
2943 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS
))
2944 evsel
->handler
= trace__pgfault
;
2947 err
= parse_target_str(trace
);
2953 err
= perf_session__process_events(session
);
2955 pr_err("Failed to process events, error %d", err
);
2957 else if (trace
->summary
)
2958 trace__fprintf_thread_summary(trace
, trace
->output
);
2961 perf_session__delete(session
);
2966 static size_t trace__fprintf_threads_header(FILE *fp
)
2970 printed
= fprintf(fp
, "\n Summary of events:\n\n");
2975 static size_t thread__dump_stats(struct thread_trace
*ttrace
,
2976 struct trace
*trace
, FILE *fp
)
2978 struct stats
*stats
;
2981 struct int_node
*inode
= intlist__first(ttrace
->syscall_stats
);
2986 printed
+= fprintf(fp
, "\n");
2988 printed
+= fprintf(fp
, " syscall calls total min avg max stddev\n");
2989 printed
+= fprintf(fp
, " (msec) (msec) (msec) (msec) (%%)\n");
2990 printed
+= fprintf(fp
, " --------------- -------- --------- --------- --------- --------- ------\n");
2992 /* each int_node is a syscall */
2994 stats
= inode
->priv
;
2996 double min
= (double)(stats
->min
) / NSEC_PER_MSEC
;
2997 double max
= (double)(stats
->max
) / NSEC_PER_MSEC
;
2998 double avg
= avg_stats(stats
);
3000 u64 n
= (u64
) stats
->n
;
3002 pct
= avg
? 100.0 * stddev_stats(stats
)/avg
: 0.0;
3003 avg
/= NSEC_PER_MSEC
;
3005 sc
= &trace
->syscalls
.table
[inode
->i
];
3006 printed
+= fprintf(fp
, " %-15s", sc
->name
);
3007 printed
+= fprintf(fp
, " %8" PRIu64
" %9.3f %9.3f %9.3f",
3008 n
, avg
* n
, min
, avg
);
3009 printed
+= fprintf(fp
, " %9.3f %9.2f%%\n", max
, pct
);
3012 inode
= intlist__next(inode
);
3015 printed
+= fprintf(fp
, "\n\n");
3020 /* struct used to pass data to per-thread function */
3021 struct summary_data
{
3023 struct trace
*trace
;
3027 static int trace__fprintf_one_thread(struct thread
*thread
, void *priv
)
3029 struct summary_data
*data
= priv
;
3030 FILE *fp
= data
->fp
;
3031 size_t printed
= data
->printed
;
3032 struct trace
*trace
= data
->trace
;
3033 struct thread_trace
*ttrace
= thread__priv(thread
);
3039 ratio
= (double)ttrace
->nr_events
/ trace
->nr_events
* 100.0;
3041 printed
+= fprintf(fp
, " %s (%d), ", thread__comm_str(thread
), thread
->tid
);
3042 printed
+= fprintf(fp
, "%lu events, ", ttrace
->nr_events
);
3043 printed
+= fprintf(fp
, "%.1f%%", ratio
);
3045 printed
+= fprintf(fp
, ", %lu majfaults", ttrace
->pfmaj
);
3047 printed
+= fprintf(fp
, ", %lu minfaults", ttrace
->pfmin
);
3048 printed
+= fprintf(fp
, ", %.3f msec\n", ttrace
->runtime_ms
);
3049 printed
+= thread__dump_stats(ttrace
, trace
, fp
);
3051 data
->printed
+= printed
;
3056 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
)
3058 struct summary_data data
= {
3062 data
.printed
= trace__fprintf_threads_header(fp
);
3064 machine__for_each_thread(trace
->host
, trace__fprintf_one_thread
, &data
);
3066 return data
.printed
;
3069 static int trace__set_duration(const struct option
*opt
, const char *str
,
3070 int unset __maybe_unused
)
3072 struct trace
*trace
= opt
->value
;
3074 trace
->duration_filter
= atof(str
);
3078 static int trace__set_filter_pids(const struct option
*opt
, const char *str
,
3079 int unset __maybe_unused
)
3083 struct trace
*trace
= opt
->value
;
3085 * FIXME: introduce a intarray class, plain parse csv and create a
3086 * { int nr, int entries[] } struct...
3088 struct intlist
*list
= intlist__new(str
);
3093 i
= trace
->filter_pids
.nr
= intlist__nr_entries(list
) + 1;
3094 trace
->filter_pids
.entries
= calloc(i
, sizeof(pid_t
));
3096 if (trace
->filter_pids
.entries
== NULL
)
3099 trace
->filter_pids
.entries
[0] = getpid();
3101 for (i
= 1; i
< trace
->filter_pids
.nr
; ++i
)
3102 trace
->filter_pids
.entries
[i
] = intlist__entry(list
, i
- 1)->i
;
3104 intlist__delete(list
);
3110 static int trace__open_output(struct trace
*trace
, const char *filename
)
3114 if (!stat(filename
, &st
) && st
.st_size
) {
3115 char oldname
[PATH_MAX
];
3117 scnprintf(oldname
, sizeof(oldname
), "%s.old", filename
);
3119 rename(filename
, oldname
);
3122 trace
->output
= fopen(filename
, "w");
3124 return trace
->output
== NULL
? -errno
: 0;
3127 static int parse_pagefaults(const struct option
*opt
, const char *str
,
3128 int unset __maybe_unused
)
3130 int *trace_pgfaults
= opt
->value
;
3132 if (strcmp(str
, "all") == 0)
3133 *trace_pgfaults
|= TRACE_PFMAJ
| TRACE_PFMIN
;
3134 else if (strcmp(str
, "maj") == 0)
3135 *trace_pgfaults
|= TRACE_PFMAJ
;
3136 else if (strcmp(str
, "min") == 0)
3137 *trace_pgfaults
|= TRACE_PFMIN
;
3144 static void evlist__set_evsel_handler(struct perf_evlist
*evlist
, void *handler
)
3146 struct perf_evsel
*evsel
;
3148 evlist__for_each(evlist
, evsel
)
3149 evsel
->handler
= handler
;
3152 int cmd_trace(int argc
, const char **argv
, const char *prefix __maybe_unused
)
3154 const char *trace_usage
[] = {
3155 "perf trace [<options>] [<command>]",
3156 "perf trace [<options>] -- <command> [<options>]",
3157 "perf trace record [<options>] [<command>]",
3158 "perf trace record [<options>] -- <command> [<options>]",
3161 struct trace trace
= {
3163 .machine
= audit_detect_machine(),
3164 .open_id
= audit_name_to_syscall("open", trace
.audit
.machine
),
3174 .user_freq
= UINT_MAX
,
3175 .user_interval
= ULLONG_MAX
,
3176 .no_buffering
= true,
3177 .mmap_pages
= UINT_MAX
,
3178 .proc_map_timeout
= 500,
3182 .trace_syscalls
= true,
3184 const char *output_name
= NULL
;
3185 const char *ev_qualifier_str
= NULL
;
3186 const struct option trace_options
[] = {
3187 OPT_CALLBACK(0, "event", &trace
.evlist
, "event",
3188 "event selector. use 'perf list' to list available events",
3189 parse_events_option
),
3190 OPT_BOOLEAN(0, "comm", &trace
.show_comm
,
3191 "show the thread COMM next to its id"),
3192 OPT_BOOLEAN(0, "tool_stats", &trace
.show_tool_stats
, "show tool stats"),
3193 OPT_STRING('e', "expr", &ev_qualifier_str
, "expr", "list of syscalls to trace"),
3194 OPT_STRING('o', "output", &output_name
, "file", "output file name"),
3195 OPT_STRING('i', "input", &input_name
, "file", "Analyze events in file"),
3196 OPT_STRING('p', "pid", &trace
.opts
.target
.pid
, "pid",
3197 "trace events on existing process id"),
3198 OPT_STRING('t', "tid", &trace
.opts
.target
.tid
, "tid",
3199 "trace events on existing thread id"),
3200 OPT_CALLBACK(0, "filter-pids", &trace
, "CSV list of pids",
3201 "pids to filter (by the kernel)", trace__set_filter_pids
),
3202 OPT_BOOLEAN('a', "all-cpus", &trace
.opts
.target
.system_wide
,
3203 "system-wide collection from all CPUs"),
3204 OPT_STRING('C', "cpu", &trace
.opts
.target
.cpu_list
, "cpu",
3205 "list of cpus to monitor"),
3206 OPT_BOOLEAN(0, "no-inherit", &trace
.opts
.no_inherit
,
3207 "child tasks do not inherit counters"),
3208 OPT_CALLBACK('m', "mmap-pages", &trace
.opts
.mmap_pages
, "pages",
3209 "number of mmap data pages",
3210 perf_evlist__parse_mmap_pages
),
3211 OPT_STRING('u', "uid", &trace
.opts
.target
.uid_str
, "user",
3213 OPT_CALLBACK(0, "duration", &trace
, "float",
3214 "show only events with duration > N.M ms",
3215 trace__set_duration
),
3216 OPT_BOOLEAN(0, "sched", &trace
.sched
, "show blocking scheduler events"),
3217 OPT_INCR('v', "verbose", &verbose
, "be more verbose"),
3218 OPT_BOOLEAN('T', "time", &trace
.full_time
,
3219 "Show full timestamp, not time relative to first start"),
3220 OPT_BOOLEAN('s', "summary", &trace
.summary_only
,
3221 "Show only syscall summary with statistics"),
3222 OPT_BOOLEAN('S', "with-summary", &trace
.summary
,
3223 "Show all syscalls and summary with statistics"),
3224 OPT_CALLBACK_DEFAULT('F', "pf", &trace
.trace_pgfaults
, "all|maj|min",
3225 "Trace pagefaults", parse_pagefaults
, "maj"),
3226 OPT_BOOLEAN(0, "syscalls", &trace
.trace_syscalls
, "Trace syscalls"),
3227 OPT_BOOLEAN('f', "force", &trace
.force
, "don't complain, do it"),
3228 OPT_UINTEGER(0, "proc-map-timeout", &trace
.opts
.proc_map_timeout
,
3229 "per thread proc mmap processing timeout in ms"),
3232 const char * const trace_subcommands
[] = { "record", NULL
};
3236 signal(SIGSEGV
, sighandler_dump_stack
);
3237 signal(SIGFPE
, sighandler_dump_stack
);
3239 trace
.evlist
= perf_evlist__new();
3241 if (trace
.evlist
== NULL
) {
3242 pr_err("Not enough memory to run!\n");
3247 argc
= parse_options_subcommand(argc
, argv
, trace_options
, trace_subcommands
,
3248 trace_usage
, PARSE_OPT_STOP_AT_NON_OPTION
);
3250 if (trace
.trace_pgfaults
) {
3251 trace
.opts
.sample_address
= true;
3252 trace
.opts
.sample_time
= true;
3255 if (trace
.evlist
->nr_entries
> 0)
3256 evlist__set_evsel_handler(trace
.evlist
, trace__event_handler
);
3258 if ((argc
>= 1) && (strcmp(argv
[0], "record") == 0))
3259 return trace__record(&trace
, argc
-1, &argv
[1]);
3261 /* summary_only implies summary option, but don't overwrite summary if set */
3262 if (trace
.summary_only
)
3263 trace
.summary
= trace
.summary_only
;
3265 if (!trace
.trace_syscalls
&& !trace
.trace_pgfaults
&&
3266 trace
.evlist
->nr_entries
== 0 /* Was --events used? */) {
3267 pr_err("Please specify something to trace.\n");
3271 if (output_name
!= NULL
) {
3272 err
= trace__open_output(&trace
, output_name
);
3274 perror("failed to create output file");
3279 if (ev_qualifier_str
!= NULL
) {
3280 const char *s
= ev_qualifier_str
;
3281 struct strlist_config slist_config
= {
3282 .dirname
= system_path(STRACE_GROUPS_DIR
),
3285 trace
.not_ev_qualifier
= *s
== '!';
3286 if (trace
.not_ev_qualifier
)
3288 trace
.ev_qualifier
= strlist__new(s
, &slist_config
);
3289 if (trace
.ev_qualifier
== NULL
) {
3290 fputs("Not enough memory to parse event qualifier",
3296 err
= trace__validate_ev_qualifier(&trace
);
3301 err
= target__validate(&trace
.opts
.target
);
3303 target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
3304 fprintf(trace
.output
, "%s", bf
);
3308 err
= target__parse_uid(&trace
.opts
.target
);
3310 target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
3311 fprintf(trace
.output
, "%s", bf
);
3315 if (!argc
&& target__none(&trace
.opts
.target
))
3316 trace
.opts
.target
.system_wide
= true;
3319 err
= trace__replay(&trace
);
3321 err
= trace__run(&trace
, argc
, argv
);
3324 if (output_name
!= NULL
)
3325 fclose(trace
.output
);