4 * Builtin top command: Display a continuously updated profile of
5 * any workload, CPU or specific PID.
7 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
9 * Improvements and fixes by:
11 * Arjan van de Ven <arjan@linux.intel.com>
12 * Yanmin Zhang <yanmin.zhang@intel.com>
13 * Wu Fengguang <fengguang.wu@intel.com>
14 * Mike Galbraith <efault@gmx.de>
15 * Paul Mackerras <paulus@samba.org>
17 * Released under the GPL v2. (and only v2, not any later version)
23 #include "util/symbol.h"
24 #include "util/color.h"
25 #include "util/thread.h"
26 #include "util/util.h"
27 #include <linux/rbtree.h>
28 #include "util/parse-options.h"
29 #include "util/parse-events.h"
31 #include "util/debug.h"
45 #include <sys/syscall.h>
46 #include <sys/ioctl.h>
48 #include <sys/prctl.h>
53 #include <linux/unistd.h>
54 #include <linux/types.h>
56 static int fd
[MAX_NR_CPUS
][MAX_COUNTERS
];
58 static int system_wide
= 0;
60 static int default_interval
= 0;
62 static int count_filter
= 5;
63 static int print_entries
;
65 static int target_pid
= -1;
66 static int inherit
= 0;
67 static int profile_cpu
= -1;
68 static int nr_cpus
= 0;
69 static unsigned int realtime_prio
= 0;
71 static unsigned int page_size
;
72 static unsigned int mmap_pages
= 16;
73 static int freq
= 1000; /* 1 KHz */
75 static int delay_secs
= 2;
77 static int dump_symtab
= 0;
79 static bool hide_kernel_symbols
= false;
80 static bool hide_user_symbols
= false;
81 static struct winsize winsize
;
82 const char *vmlinux_name
;
83 static const char *graph_line
=
84 "_____________________________________________________________________"
85 "_____________________________________________________________________";
86 static const char *graph_dotted_line
=
87 "---------------------------------------------------------------------"
88 "---------------------------------------------------------------------"
89 "---------------------------------------------------------------------";
97 unsigned long count
[MAX_COUNTERS
];
99 struct source_line
*next
;
102 static char *sym_filter
= NULL
;
103 struct sym_entry
*sym_filter_entry
= NULL
;
104 static int sym_pcnt_filter
= 5;
105 static int sym_counter
= 0;
106 static int display_weighted
= -1;
112 struct sym_entry_source
{
113 struct source_line
*source
;
114 struct source_line
*lines
;
115 struct source_line
**lines_tail
;
116 pthread_mutex_t lock
;
120 struct rb_node rb_node
;
121 struct list_head node
;
122 unsigned long snap_count
;
128 struct sym_entry_source
*src
;
129 unsigned long count
[0];
136 static inline struct symbol
*sym_entry__symbol(struct sym_entry
*self
)
138 return ((void *)self
) + symbol__priv_size
;
141 static void get_term_dimensions(struct winsize
*ws
)
143 char *s
= getenv("LINES");
146 ws
->ws_row
= atoi(s
);
147 s
= getenv("COLUMNS");
149 ws
->ws_col
= atoi(s
);
150 if (ws
->ws_row
&& ws
->ws_col
)
155 if (ioctl(1, TIOCGWINSZ
, ws
) == 0 &&
156 ws
->ws_row
&& ws
->ws_col
)
163 static void update_print_entries(struct winsize
*ws
)
165 print_entries
= ws
->ws_row
;
167 if (print_entries
> 9)
171 static void sig_winch_handler(int sig __used
)
173 get_term_dimensions(&winsize
);
174 update_print_entries(&winsize
);
177 static void parse_source(struct sym_entry
*syme
)
180 struct sym_entry_source
*source
;
183 char command
[PATH_MAX
*2];
190 if (syme
->src
== NULL
) {
191 syme
->src
= calloc(1, sizeof(*source
));
192 if (syme
->src
== NULL
)
194 pthread_mutex_init(&syme
->src
->lock
, NULL
);
200 pthread_mutex_lock(&source
->lock
);
204 sym
= sym_entry__symbol(syme
);
206 path
= map
->dso
->long_name
;
208 len
= sym
->end
- sym
->start
;
211 "objdump --start-address=0x%016Lx "
212 "--stop-address=0x%016Lx -dS %s",
213 map
->unmap_ip(map
, sym
->start
),
214 map
->unmap_ip(map
, sym
->end
), path
);
216 file
= popen(command
, "r");
220 pthread_mutex_lock(&source
->lock
);
221 source
->lines_tail
= &source
->lines
;
222 while (!feof(file
)) {
223 struct source_line
*src
;
227 src
= malloc(sizeof(struct source_line
));
229 memset(src
, 0, sizeof(struct source_line
));
231 if (getline(&src
->line
, &dummy
, file
) < 0)
236 c
= strchr(src
->line
, '\n');
241 *source
->lines_tail
= src
;
242 source
->lines_tail
= &src
->next
;
244 if (strlen(src
->line
)>8 && src
->line
[8] == ':') {
245 src
->eip
= strtoull(src
->line
, NULL
, 16);
246 src
->eip
= map
->unmap_ip(map
, src
->eip
);
248 if (strlen(src
->line
)>8 && src
->line
[16] == ':') {
249 src
->eip
= strtoull(src
->line
, NULL
, 16);
250 src
->eip
= map
->unmap_ip(map
, src
->eip
);
255 sym_filter_entry
= syme
;
256 pthread_mutex_unlock(&source
->lock
);
259 static void __zero_source_counters(struct sym_entry
*syme
)
262 struct source_line
*line
;
264 line
= syme
->src
->lines
;
266 for (i
= 0; i
< nr_counters
; i
++)
272 static void record_precise_ip(struct sym_entry
*syme
, int counter
, u64 ip
)
274 struct source_line
*line
;
276 if (syme
!= sym_filter_entry
)
279 if (pthread_mutex_trylock(&syme
->src
->lock
))
282 if (syme
->src
== NULL
|| syme
->src
->source
== NULL
)
285 for (line
= syme
->src
->lines
; line
; line
= line
->next
) {
286 if (line
->eip
== ip
) {
287 line
->count
[counter
]++;
294 pthread_mutex_unlock(&syme
->src
->lock
);
297 static void lookup_sym_source(struct sym_entry
*syme
)
299 struct symbol
*symbol
= sym_entry__symbol(syme
);
300 struct source_line
*line
;
301 char pattern
[PATH_MAX
];
303 sprintf(pattern
, "<%s>:", symbol
->name
);
305 pthread_mutex_lock(&syme
->src
->lock
);
306 for (line
= syme
->src
->lines
; line
; line
= line
->next
) {
307 if (strstr(line
->line
, pattern
)) {
308 syme
->src
->source
= line
;
312 pthread_mutex_unlock(&syme
->src
->lock
);
315 static void show_lines(struct source_line
*queue
, int count
, int total
)
318 struct source_line
*line
;
321 for (i
= 0; i
< count
; i
++) {
322 float pcnt
= 100.0*(float)line
->count
[sym_counter
]/(float)total
;
324 printf("%8li %4.1f%%\t%s\n", line
->count
[sym_counter
], pcnt
, line
->line
);
329 #define TRACE_COUNT 3
331 static void show_details(struct sym_entry
*syme
)
333 struct symbol
*symbol
;
334 struct source_line
*line
;
335 struct source_line
*line_queue
= NULL
;
337 int line_queue_count
= 0, total
= 0, more
= 0;
342 if (!syme
->src
->source
)
343 lookup_sym_source(syme
);
345 if (!syme
->src
->source
)
348 symbol
= sym_entry__symbol(syme
);
349 printf("Showing %s for %s\n", event_name(sym_counter
), symbol
->name
);
350 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter
);
352 pthread_mutex_lock(&syme
->src
->lock
);
353 line
= syme
->src
->source
;
355 total
+= line
->count
[sym_counter
];
359 line
= syme
->src
->source
;
363 if (!line_queue_count
)
367 if (line
->count
[sym_counter
])
368 pcnt
= 100.0 * line
->count
[sym_counter
] / (float)total
;
369 if (pcnt
>= (float)sym_pcnt_filter
) {
370 if (displayed
<= print_entries
)
371 show_lines(line_queue
, line_queue_count
, total
);
373 displayed
+= line_queue_count
;
374 line_queue_count
= 0;
376 } else if (line_queue_count
> TRACE_COUNT
) {
377 line_queue
= line_queue
->next
;
381 line
->count
[sym_counter
] = zero
? 0 : line
->count
[sym_counter
] * 7 / 8;
384 pthread_mutex_unlock(&syme
->src
->lock
);
386 printf("%d lines not displayed, maybe increase display entries [e]\n", more
);
390 * Symbols will be added here in event__process_sample and will get out
393 static LIST_HEAD(active_symbols
);
394 static pthread_mutex_t active_symbols_lock
= PTHREAD_MUTEX_INITIALIZER
;
397 * Ordering weight: count-1 * count-2 * ... / count-n
399 static double sym_weight(const struct sym_entry
*sym
)
401 double weight
= sym
->snap_count
;
404 if (!display_weighted
)
407 for (counter
= 1; counter
< nr_counters
-1; counter
++)
408 weight
*= sym
->count
[counter
];
410 weight
/= (sym
->count
[counter
] + 1);
416 static long userspace_samples
;
417 static const char CONSOLE_CLEAR
[] = "\e[H\e[2J";
419 static void __list_insert_active_sym(struct sym_entry
*syme
)
421 list_add(&syme
->node
, &active_symbols
);
424 static void list_remove_active_sym(struct sym_entry
*syme
)
426 pthread_mutex_lock(&active_symbols_lock
);
427 list_del_init(&syme
->node
);
428 pthread_mutex_unlock(&active_symbols_lock
);
431 static void rb_insert_active_sym(struct rb_root
*tree
, struct sym_entry
*se
)
433 struct rb_node
**p
= &tree
->rb_node
;
434 struct rb_node
*parent
= NULL
;
435 struct sym_entry
*iter
;
439 iter
= rb_entry(parent
, struct sym_entry
, rb_node
);
441 if (se
->weight
> iter
->weight
)
447 rb_link_node(&se
->rb_node
, parent
, p
);
448 rb_insert_color(&se
->rb_node
, tree
);
451 static void print_sym_table(void)
454 int counter
, snap
= !display_weighted
? sym_counter
: 0;
455 float samples_per_sec
= samples
/delay_secs
;
456 float ksamples_per_sec
= (samples
-userspace_samples
)/delay_secs
;
457 float sum_ksamples
= 0.0;
458 struct sym_entry
*syme
, *n
;
459 struct rb_root tmp
= RB_ROOT
;
461 int sym_width
= 0, dso_width
= 0;
462 const int win_width
= winsize
.ws_col
- 1;
463 struct dso
*unique_dso
= NULL
, *first_dso
= NULL
;
465 samples
= userspace_samples
= 0;
467 /* Sort the active symbols */
468 pthread_mutex_lock(&active_symbols_lock
);
469 syme
= list_entry(active_symbols
.next
, struct sym_entry
, node
);
470 pthread_mutex_unlock(&active_symbols_lock
);
472 list_for_each_entry_safe_from(syme
, n
, &active_symbols
, node
) {
473 syme
->snap_count
= syme
->count
[snap
];
474 if (syme
->snap_count
!= 0) {
476 if ((hide_user_symbols
&&
477 syme
->origin
== PERF_RECORD_MISC_USER
) ||
478 (hide_kernel_symbols
&&
479 syme
->origin
== PERF_RECORD_MISC_KERNEL
)) {
480 list_remove_active_sym(syme
);
483 syme
->weight
= sym_weight(syme
);
484 rb_insert_active_sym(&tmp
, syme
);
485 sum_ksamples
+= syme
->snap_count
;
487 for (j
= 0; j
< nr_counters
; j
++)
488 syme
->count
[j
] = zero
? 0 : syme
->count
[j
] * 7 / 8;
490 list_remove_active_sym(syme
);
495 printf("%-*.*s\n", win_width
, win_width
, graph_dotted_line
);
496 printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [",
498 100.0 - (100.0*((samples_per_sec
-ksamples_per_sec
)/samples_per_sec
)));
500 if (nr_counters
== 1 || !display_weighted
) {
501 printf("%Ld", (u64
)attrs
[0].sample_period
);
508 if (!display_weighted
)
509 printf("%s", event_name(sym_counter
));
510 else for (counter
= 0; counter
< nr_counters
; counter
++) {
514 printf("%s", event_name(counter
));
519 if (target_pid
!= -1)
520 printf(" (target_pid: %d", target_pid
);
524 if (profile_cpu
!= -1)
525 printf(", cpu: %d)\n", profile_cpu
);
527 if (target_pid
!= -1)
530 printf(", %d CPUs)\n", nr_cpus
);
533 printf("%-*.*s\n", win_width
, win_width
, graph_dotted_line
);
535 if (sym_filter_entry
) {
536 show_details(sym_filter_entry
);
541 * Find the longest symbol name that will be displayed
543 for (nd
= rb_first(&tmp
); nd
; nd
= rb_next(nd
)) {
544 syme
= rb_entry(nd
, struct sym_entry
, rb_node
);
545 if (++printed
> print_entries
||
546 (int)syme
->snap_count
< count_filter
)
549 if (first_dso
== NULL
)
550 unique_dso
= first_dso
= syme
->map
->dso
;
551 else if (syme
->map
->dso
!= first_dso
)
554 if (syme
->map
->dso
->long_name_len
> dso_width
)
555 dso_width
= syme
->map
->dso
->long_name_len
;
557 if (syme
->name_len
> sym_width
)
558 sym_width
= syme
->name_len
;
564 printf("DSO: %s\n", unique_dso
->long_name
);
566 int max_dso_width
= winsize
.ws_col
- sym_width
- 29;
567 if (dso_width
> max_dso_width
)
568 dso_width
= max_dso_width
;
571 if (nr_counters
== 1)
572 printf(" samples pcnt");
574 printf(" weight samples pcnt");
578 printf(" %-*.*s", sym_width
, sym_width
, "function");
582 printf(" %s _______ _____",
583 nr_counters
== 1 ? " " : "______");
585 printf(" ________________");
586 printf(" %-*.*s", sym_width
, sym_width
, graph_line
);
588 printf(" %-*.*s", dso_width
, dso_width
, graph_line
);
591 for (nd
= rb_first(&tmp
); nd
; nd
= rb_next(nd
)) {
595 syme
= rb_entry(nd
, struct sym_entry
, rb_node
);
596 sym
= sym_entry__symbol(syme
);
598 if (++printed
> print_entries
|| (int)syme
->snap_count
< count_filter
)
601 pcnt
= 100.0 - (100.0 * ((sum_ksamples
- syme
->snap_count
) /
604 if (nr_counters
== 1 || !display_weighted
)
605 printf("%20.2f ", syme
->weight
);
607 printf("%9.1f %10ld ", syme
->weight
, syme
->snap_count
);
609 percent_color_fprintf(stdout
, "%4.1f%%", pcnt
);
611 printf(" %016llx", sym
->start
);
612 printf(" %-*.*s", sym_width
, sym_width
, sym
->name
);
614 printf(" %-*.*s", dso_width
, dso_width
,
615 dso_width
>= syme
->map
->dso
->long_name_len
?
616 syme
->map
->dso
->long_name
:
617 syme
->map
->dso
->short_name
);
622 static void prompt_integer(int *target
, const char *msg
)
624 char *buf
= malloc(0), *p
;
628 fprintf(stdout
, "\n%s: ", msg
);
629 if (getline(&buf
, &dummy
, stdin
) < 0)
632 p
= strchr(buf
, '\n');
642 tmp
= strtoul(buf
, NULL
, 10);
648 static void prompt_percent(int *target
, const char *msg
)
652 prompt_integer(&tmp
, msg
);
653 if (tmp
>= 0 && tmp
<= 100)
657 static void prompt_symbol(struct sym_entry
**target
, const char *msg
)
659 char *buf
= malloc(0), *p
;
660 struct sym_entry
*syme
= *target
, *n
, *found
= NULL
;
663 /* zero counters of active symbol */
665 pthread_mutex_lock(&syme
->src
->lock
);
666 __zero_source_counters(syme
);
668 pthread_mutex_unlock(&syme
->src
->lock
);
671 fprintf(stdout
, "\n%s: ", msg
);
672 if (getline(&buf
, &dummy
, stdin
) < 0)
675 p
= strchr(buf
, '\n');
679 pthread_mutex_lock(&active_symbols_lock
);
680 syme
= list_entry(active_symbols
.next
, struct sym_entry
, node
);
681 pthread_mutex_unlock(&active_symbols_lock
);
683 list_for_each_entry_safe_from(syme
, n
, &active_symbols
, node
) {
684 struct symbol
*sym
= sym_entry__symbol(syme
);
686 if (!strcmp(buf
, sym
->name
)) {
693 fprintf(stderr
, "Sorry, %s is not active.\n", sym_filter
);
703 static void print_mapped_keys(void)
707 if (sym_filter_entry
) {
708 struct symbol
*sym
= sym_entry__symbol(sym_filter_entry
);
712 fprintf(stdout
, "\nMapped keys:\n");
713 fprintf(stdout
, "\t[d] display refresh delay. \t(%d)\n", delay_secs
);
714 fprintf(stdout
, "\t[e] display entries (lines). \t(%d)\n", print_entries
);
717 fprintf(stdout
, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter
));
719 fprintf(stdout
, "\t[f] profile display filter (count). \t(%d)\n", count_filter
);
722 fprintf(stdout
, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter
);
723 fprintf(stdout
, "\t[s] annotate symbol. \t(%s)\n", name
?: "NULL");
724 fprintf(stdout
, "\t[S] stop annotation.\n");
728 fprintf(stdout
, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted
? 1 : 0);
731 "\t[K] hide kernel_symbols symbols. \t(%s)\n",
732 hide_kernel_symbols
? "yes" : "no");
734 "\t[U] hide user symbols. \t(%s)\n",
735 hide_user_symbols
? "yes" : "no");
736 fprintf(stdout
, "\t[z] toggle sample zeroing. \t(%d)\n", zero
? 1 : 0);
737 fprintf(stdout
, "\t[qQ] quit.\n");
740 static int key_mapped(int c
)
754 return nr_counters
> 1 ? 1 : 0;
758 return vmlinux_name
? 1 : 0;
766 static void handle_keypress(int c
)
768 if (!key_mapped(c
)) {
769 struct pollfd stdin_poll
= { .fd
= 0, .events
= POLLIN
};
770 struct termios tc
, save
;
773 fprintf(stdout
, "\nEnter selection, or unmapped key to continue: ");
778 tc
.c_lflag
&= ~(ICANON
| ECHO
);
781 tcsetattr(0, TCSANOW
, &tc
);
783 poll(&stdin_poll
, 1, -1);
786 tcsetattr(0, TCSAFLUSH
, &save
);
793 prompt_integer(&delay_secs
, "Enter display delay");
798 prompt_integer(&print_entries
, "Enter display entries (lines)");
799 if (print_entries
== 0) {
800 sig_winch_handler(SIGWINCH
);
801 signal(SIGWINCH
, sig_winch_handler
);
803 signal(SIGWINCH
, SIG_DFL
);
806 if (nr_counters
> 1) {
809 fprintf(stderr
, "\nAvailable events:");
810 for (i
= 0; i
< nr_counters
; i
++)
811 fprintf(stderr
, "\n\t%d %s", i
, event_name(i
));
813 prompt_integer(&sym_counter
, "Enter details event counter");
815 if (sym_counter
>= nr_counters
) {
816 fprintf(stderr
, "Sorry, no such event, using %s.\n", event_name(0));
820 } else sym_counter
= 0;
823 prompt_integer(&count_filter
, "Enter display event count filter");
826 prompt_percent(&sym_pcnt_filter
, "Enter details display event filter (percent)");
829 hide_kernel_symbols
= !hide_kernel_symbols
;
833 printf("exiting.\n");
835 dsos__fprintf(stderr
);
838 prompt_symbol(&sym_filter_entry
, "Enter details symbol");
841 if (!sym_filter_entry
)
844 struct sym_entry
*syme
= sym_filter_entry
;
846 pthread_mutex_lock(&syme
->src
->lock
);
847 sym_filter_entry
= NULL
;
848 __zero_source_counters(syme
);
849 pthread_mutex_unlock(&syme
->src
->lock
);
853 hide_user_symbols
= !hide_user_symbols
;
856 display_weighted
= ~display_weighted
;
866 static void *display_thread(void *arg __used
)
868 struct pollfd stdin_poll
= { .fd
= 0, .events
= POLLIN
};
869 struct termios tc
, save
;
874 tc
.c_lflag
&= ~(ICANON
| ECHO
);
879 delay_msecs
= delay_secs
* 1000;
880 tcsetattr(0, TCSANOW
, &tc
);
886 } while (!poll(&stdin_poll
, 1, delay_msecs
) == 1);
889 tcsetattr(0, TCSAFLUSH
, &save
);
897 /* Tag samples to be skipped. */
898 static const char *skip_symbols
[] = {
904 "mwait_idle_with_hints",
906 "ppc64_runlatch_off",
907 "pseries_dedicated_idle_sleep",
911 static int symbol_filter(struct map
*map
, struct symbol
*sym
)
913 struct sym_entry
*syme
;
914 const char *name
= sym
->name
;
918 * ppc64 uses function descriptors and appends a '.' to the
919 * start of every instruction address. Remove it.
924 if (!strcmp(name
, "_text") ||
925 !strcmp(name
, "_etext") ||
926 !strcmp(name
, "_sinittext") ||
927 !strncmp("init_module", name
, 11) ||
928 !strncmp("cleanup_module", name
, 14) ||
929 strstr(name
, "_text_start") ||
930 strstr(name
, "_text_end"))
933 syme
= symbol__priv(sym
);
936 if (!sym_filter_entry
&& sym_filter
&& !strcmp(name
, sym_filter
))
937 sym_filter_entry
= syme
;
939 for (i
= 0; skip_symbols
[i
]; i
++) {
940 if (!strcmp(skip_symbols
[i
], name
)) {
947 syme
->name_len
= strlen(sym
->name
);
952 static void event__process_sample(const event_t
*self
, int counter
)
954 u64 ip
= self
->ip
.ip
;
956 struct sym_entry
*syme
;
958 u8 origin
= self
->header
.misc
& PERF_RECORD_MISC_CPUMODE_MASK
;
961 case PERF_RECORD_MISC_USER
: {
962 struct thread
*thread
;
964 if (hide_user_symbols
)
967 thread
= threads__findnew(self
->ip
.pid
);
971 map
= thread__find_map(thread
, ip
);
973 ip
= map
->map_ip(map
, ip
);
974 sym
= map__find_symbol(map
, ip
, symbol_filter
);
982 * If this is outside of all known maps,
983 * and is a negative address, try to look it
984 * up in the kernel dso, as it might be a
985 * vsyscall or vdso (which executes in user-mode).
987 if ((long long)ip
>= 0)
990 case PERF_RECORD_MISC_KERNEL
:
991 if (hide_kernel_symbols
)
994 sym
= kernel_maps__find_symbol(ip
, &map
, symbol_filter
);
1002 syme
= symbol__priv(sym
);
1005 syme
->count
[counter
]++;
1006 syme
->origin
= origin
;
1007 record_precise_ip(syme
, counter
, ip
);
1008 pthread_mutex_lock(&active_symbols_lock
);
1009 if (list_empty(&syme
->node
) || !syme
->node
.next
)
1010 __list_insert_active_sym(syme
);
1011 pthread_mutex_unlock(&active_symbols_lock
);
1017 static void event__process_mmap(event_t
*self
)
1019 struct thread
*thread
= threads__findnew(self
->mmap
.pid
);
1021 if (thread
!= NULL
) {
1022 struct map
*map
= map__new(&self
->mmap
, NULL
, 0);
1024 thread__insert_map(thread
, map
);
1028 static void event__process_comm(event_t
*self
)
1030 struct thread
*thread
= threads__findnew(self
->comm
.pid
);
1033 thread__set_comm(thread
, self
->comm
.comm
);
1036 static int event__process(event_t
*event
)
1038 switch (event
->header
.type
) {
1039 case PERF_RECORD_COMM
:
1040 event__process_comm(event
);
1042 case PERF_RECORD_MMAP
:
1043 event__process_mmap(event
);
1059 static unsigned int mmap_read_head(struct mmap_data
*md
)
1061 struct perf_event_mmap_page
*pc
= md
->base
;
1064 head
= pc
->data_head
;
1070 static void mmap_read_counter(struct mmap_data
*md
)
1072 unsigned int head
= mmap_read_head(md
);
1073 unsigned int old
= md
->prev
;
1074 unsigned char *data
= md
->base
+ page_size
;
1078 * If we're further behind than half the buffer, there's a chance
1079 * the writer will bite our tail and mess up the samples under us.
1081 * If we somehow ended up ahead of the head, we got messed up.
1083 * In either case, truncate and restart at head.
1086 if (diff
> md
->mask
/ 2 || diff
< 0) {
1087 fprintf(stderr
, "WARNING: failed to keep up with mmap data.\n");
1090 * head points to a known good entry, start there.
1095 for (; old
!= head
;) {
1096 event_t
*event
= (event_t
*)&data
[old
& md
->mask
];
1100 size_t size
= event
->header
.size
;
1103 * Event straddles the mmap boundary -- header should always
1104 * be inside due to u64 alignment of output.
1106 if ((old
& md
->mask
) + size
!= ((old
+ size
) & md
->mask
)) {
1107 unsigned int offset
= old
;
1108 unsigned int len
= min(sizeof(*event
), size
), cpy
;
1109 void *dst
= &event_copy
;
1112 cpy
= min(md
->mask
+ 1 - (offset
& md
->mask
), len
);
1113 memcpy(dst
, &data
[offset
& md
->mask
], cpy
);
1119 event
= &event_copy
;
1122 if (event
->header
.type
== PERF_RECORD_SAMPLE
)
1123 event__process_sample(event
, md
->counter
);
1125 event__process(event
);
1132 static struct pollfd event_array
[MAX_NR_CPUS
* MAX_COUNTERS
];
1133 static struct mmap_data mmap_array
[MAX_NR_CPUS
][MAX_COUNTERS
];
1135 static void mmap_read(void)
1139 for (i
= 0; i
< nr_cpus
; i
++) {
1140 for (counter
= 0; counter
< nr_counters
; counter
++)
1141 mmap_read_counter(&mmap_array
[i
][counter
]);
1148 static void start_counter(int i
, int counter
)
1150 struct perf_event_attr
*attr
;
1154 if (target_pid
== -1 && profile_cpu
== -1)
1157 attr
= attrs
+ counter
;
1159 attr
->sample_type
= PERF_SAMPLE_IP
| PERF_SAMPLE_TID
;
1162 attr
->sample_type
|= PERF_SAMPLE_PERIOD
;
1164 attr
->sample_freq
= freq
;
1167 attr
->inherit
= (cpu
< 0) && inherit
;
1171 fd
[i
][counter
] = sys_perf_event_open(attr
, target_pid
, cpu
, group_fd
, 0);
1173 if (fd
[i
][counter
] < 0) {
1176 if (err
== EPERM
|| err
== EACCES
)
1177 die("No permission - are you root?\n");
1179 * If it's cycles then fall back to hrtimer
1180 * based cpu-clock-tick sw counter, which
1181 * is always available even if no PMU support:
1183 if (attr
->type
== PERF_TYPE_HARDWARE
1184 && attr
->config
== PERF_COUNT_HW_CPU_CYCLES
) {
1187 warning(" ... trying to fall back to cpu-clock-ticks\n");
1189 attr
->type
= PERF_TYPE_SOFTWARE
;
1190 attr
->config
= PERF_COUNT_SW_CPU_CLOCK
;
1194 error("perfcounter syscall returned with %d (%s)\n",
1195 fd
[i
][counter
], strerror(err
));
1196 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
1199 assert(fd
[i
][counter
] >= 0);
1200 fcntl(fd
[i
][counter
], F_SETFL
, O_NONBLOCK
);
1203 * First counter acts as the group leader:
1205 if (group
&& group_fd
== -1)
1206 group_fd
= fd
[i
][counter
];
1208 event_array
[nr_poll
].fd
= fd
[i
][counter
];
1209 event_array
[nr_poll
].events
= POLLIN
;
1212 mmap_array
[i
][counter
].counter
= counter
;
1213 mmap_array
[i
][counter
].prev
= 0;
1214 mmap_array
[i
][counter
].mask
= mmap_pages
*page_size
- 1;
1215 mmap_array
[i
][counter
].base
= mmap(NULL
, (mmap_pages
+1)*page_size
,
1216 PROT_READ
, MAP_SHARED
, fd
[i
][counter
], 0);
1217 if (mmap_array
[i
][counter
].base
== MAP_FAILED
)
1218 die("failed to mmap with %d (%s)\n", errno
, strerror(errno
));
1221 static int __cmd_top(void)
1227 if (target_pid
!= -1)
1228 event__synthesize_thread(target_pid
, event__process
);
1230 event__synthesize_threads(event__process
);
1232 for (i
= 0; i
< nr_cpus
; i
++) {
1234 for (counter
= 0; counter
< nr_counters
; counter
++)
1235 start_counter(i
, counter
);
1238 /* Wait for a minimal set of events before starting the snapshot */
1239 poll(event_array
, nr_poll
, 100);
1243 if (pthread_create(&thread
, NULL
, display_thread
, NULL
)) {
1244 printf("Could not create display thread.\n");
1248 if (realtime_prio
) {
1249 struct sched_param param
;
1251 param
.sched_priority
= realtime_prio
;
1252 if (sched_setscheduler(0, SCHED_FIFO
, ¶m
)) {
1253 printf("Could not set realtime priority.\n");
1263 if (hits
== samples
)
1264 ret
= poll(event_array
, nr_poll
, 100);
1270 static const char * const top_usage
[] = {
1271 "perf top [<options>]",
1275 static const struct option options
[] = {
1276 OPT_CALLBACK('e', "event", NULL
, "event",
1277 "event selector. use 'perf list' to list available events",
1279 OPT_INTEGER('c', "count", &default_interval
,
1280 "event period to sample"),
1281 OPT_INTEGER('p', "pid", &target_pid
,
1282 "profile events on existing pid"),
1283 OPT_BOOLEAN('a', "all-cpus", &system_wide
,
1284 "system-wide collection from all CPUs"),
1285 OPT_INTEGER('C', "CPU", &profile_cpu
,
1286 "CPU to profile on"),
1287 OPT_STRING('k', "vmlinux", &vmlinux_name
, "file", "vmlinux pathname"),
1288 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols
,
1289 "hide kernel symbols"),
1290 OPT_INTEGER('m', "mmap-pages", &mmap_pages
,
1291 "number of mmap data pages"),
1292 OPT_INTEGER('r', "realtime", &realtime_prio
,
1293 "collect data with this RT SCHED_FIFO priority"),
1294 OPT_INTEGER('d', "delay", &delay_secs
,
1295 "number of seconds to delay between refreshes"),
1296 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab
,
1297 "dump the symbol table used for profiling"),
1298 OPT_INTEGER('f', "count-filter", &count_filter
,
1299 "only display functions with more events than this"),
1300 OPT_BOOLEAN('g', "group", &group
,
1301 "put the counters into a counter group"),
1302 OPT_BOOLEAN('i', "inherit", &inherit
,
1303 "child tasks inherit counters"),
1304 OPT_STRING('s', "sym-annotate", &sym_filter
, "symbol name",
1305 "symbol to annotate - requires -k option"),
1306 OPT_BOOLEAN('z', "zero", &zero
,
1307 "zero history across updates"),
1308 OPT_INTEGER('F', "freq", &freq
,
1309 "profile at this frequency"),
1310 OPT_INTEGER('E', "entries", &print_entries
,
1311 "display this many functions"),
1312 OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols
,
1313 "hide user symbols"),
1314 OPT_BOOLEAN('v', "verbose", &verbose
,
1315 "be more verbose (show counter open errors, etc)"),
1319 int cmd_top(int argc
, const char **argv
, const char *prefix __used
)
1323 page_size
= sysconf(_SC_PAGE_SIZE
);
1325 argc
= parse_options(argc
, argv
, options
, top_usage
, 0);
1327 usage_with_options(top_usage
, options
);
1329 /* CPU and PID are mutually exclusive */
1330 if (target_pid
!= -1 && profile_cpu
!= -1) {
1331 printf("WARNING: PID switch overriding CPU\n");
1339 symbol__init(sizeof(struct sym_entry
) +
1340 (nr_counters
+ 1) * sizeof(unsigned long));
1345 err
= kernel_maps__init(vmlinux_name
, !vmlinux_name
, true);
1348 parse_source(sym_filter_entry
);
1351 * User specified count overrides default frequency.
1353 if (default_interval
)
1356 default_interval
= freq
;
1358 fprintf(stderr
, "frequency and count are zero, aborting\n");
1363 * Fill in the ones not specifically initialized via -c:
1365 for (counter
= 0; counter
< nr_counters
; counter
++) {
1366 if (attrs
[counter
].sample_period
)
1369 attrs
[counter
].sample_period
= default_interval
;
1372 nr_cpus
= sysconf(_SC_NPROCESSORS_ONLN
);
1373 assert(nr_cpus
<= MAX_NR_CPUS
);
1374 assert(nr_cpus
>= 0);
1376 if (target_pid
!= -1 || profile_cpu
!= -1)
1379 get_term_dimensions(&winsize
);
1380 if (print_entries
== 0) {
1381 update_print_entries(&winsize
);
1382 signal(SIGWINCH
, sig_winch_handler
);