perf_counter: kerneltop: keep up with ABI changes
[deliverable/linux.git] / Documentation / perf_counter / kerneltop.c
CommitLineData
e0143bad
IM
1/*
2 * kerneltop.c: show top kernel functions - performance counters showcase
3
4 Build with:
5
cbe46555 6 cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
e0143bad
IM
7
8 Sample output:
9
10------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12------------------------------------------------------------------------------
13
14 weight RIP kernel function
15 ______ ________________ _______________
16
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
f7524bda 29 */
e0143bad 30
f7524bda
WF
31/*
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
e0143bad 33
f7524bda
WF
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
e0143bad 38
f7524bda 39 Sample output:
e0143bad 40
f7524bda 41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
e0143bad 42
f7524bda
WF
43 Performance counter stats for 'ls':
44
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
e0143bad 48 */
f7524bda
WF
49
50 /*
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
52 *
53 * Improvements and fixes by:
54 *
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
cbe46555 59 * Paul Mackerras <paulus@samba.org>
f7524bda
WF
60 *
61 * Released under the GPL v2. (and only v2, not any later version)
62 */
63
e0143bad
IM
64#define _GNU_SOURCE
65#include <sys/types.h>
66#include <sys/stat.h>
67#include <sys/time.h>
68#include <unistd.h>
69#include <stdint.h>
70#include <stdlib.h>
71#include <string.h>
cbe46555 72#include <limits.h>
e0143bad
IM
73#include <getopt.h>
74#include <assert.h>
75#include <fcntl.h>
76#include <stdio.h>
77#include <errno.h>
78#include <ctype.h>
79#include <time.h>
9dd49988
MG
80#include <sched.h>
81#include <pthread.h>
e0143bad 82
e0143bad
IM
83#include <sys/syscall.h>
84#include <sys/ioctl.h>
85#include <sys/poll.h>
86#include <sys/prctl.h>
87#include <sys/wait.h>
88#include <sys/uio.h>
bcbcb37c 89#include <sys/mman.h>
e0143bad
IM
90
91#include <linux/unistd.h>
cbe46555 92#include <linux/types.h>
e0143bad 93
383c5f8c 94#include "../../include/linux/perf_counter.h"
e0143bad 95
e0143bad 96
803d4f39
PZ
97/*
98 * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
99 * counters in the current task.
100 */
101#define PR_TASK_PERF_COUNTERS_DISABLE 31
102#define PR_TASK_PERF_COUNTERS_ENABLE 32
103
104#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
105
106#define rdclock() \
107({ \
108 struct timespec ts; \
109 \
110 clock_gettime(CLOCK_MONOTONIC, &ts); \
111 ts.tv_sec * 1000000000ULL + ts.tv_nsec; \
112})
113
114/*
115 * Pick up some kernel type conventions:
116 */
117#define __user
118#define asmlinkage
119
803d4f39 120#ifdef __x86_64__
bcbcb37c
PZ
121#define __NR_perf_counter_open 295
122#define rmb() asm volatile("lfence" ::: "memory")
123#define cpu_relax() asm volatile("rep; nop" ::: "memory");
803d4f39
PZ
124#endif
125
126#ifdef __i386__
bcbcb37c
PZ
127#define __NR_perf_counter_open 333
128#define rmb() asm volatile("lfence" ::: "memory")
129#define cpu_relax() asm volatile("rep; nop" ::: "memory");
803d4f39
PZ
130#endif
131
132#ifdef __powerpc__
133#define __NR_perf_counter_open 319
bcbcb37c
PZ
134#define rmb() asm volatile ("sync" ::: "memory")
135#define cpu_relax() asm volatile ("" ::: "memory");
803d4f39
PZ
136#endif
137
bcbcb37c 138#define unlikely(x) __builtin_expect(!!(x), 0)
00f0ad73
PZ
139#define min(x, y) ({ \
140 typeof(x) _min1 = (x); \
141 typeof(y) _min2 = (y); \
142 (void) (&_min1 == &_min2); \
143 _min1 < _min2 ? _min1 : _min2; })
bcbcb37c 144
803d4f39
PZ
145asmlinkage int sys_perf_counter_open(
146 struct perf_counter_hw_event *hw_event_uptr __user,
147 pid_t pid,
148 int cpu,
149 int group_fd,
150 unsigned long flags)
151{
cbe46555 152 return syscall(
803d4f39 153 __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
803d4f39
PZ
154}
155
f7524bda
WF
156#define MAX_COUNTERS 64
157#define MAX_NR_CPUS 256
158
803d4f39 159#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
f7524bda
WF
160
161static int run_perfstat = 0;
162static int system_wide = 0;
e0143bad 163
f7524bda 164static int nr_counters = 0;
803d4f39
PZ
165static __u64 event_id[MAX_COUNTERS] = {
166 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
cbe46555 167 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
803d4f39
PZ
168 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
169 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
170
171 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
172 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
173 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
174 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
175};
176static int default_interval = 100000;
e0143bad 177static int event_count[MAX_COUNTERS];
f7524bda
WF
178static int fd[MAX_NR_CPUS][MAX_COUNTERS];
179
180static __u64 count_filter = 100;
e0143bad
IM
181
182static int tid = -1;
183static int profile_cpu = -1;
184static int nr_cpus = 0;
185static int nmi = 1;
9dd49988 186static unsigned int realtime_prio = 0;
e0143bad 187static int group = 0;
bcbcb37c 188static unsigned int page_size;
00f0ad73 189static unsigned int mmap_pages = 16;
3c1ba6fa
PZ
190static int use_mmap = 0;
191static int use_munmap = 0;
e0143bad
IM
192
193static char *vmlinux;
194
195static char *sym_filter;
196static unsigned long filter_start;
197static unsigned long filter_end;
198
199static int delay_secs = 2;
200static int zero;
201static int dump_symtab;
202
31f004df
PM
203static int scale;
204
e0143bad
IM
205struct source_line {
206 uint64_t EIP;
207 unsigned long count;
208 char *line;
cbe46555 209 struct source_line *next;
e0143bad
IM
210};
211
cbe46555
PM
212static struct source_line *lines;
213static struct source_line **lines_tail;
f7524bda
WF
214
215const unsigned int default_count[] = {
81cdbe05 216 1000000,
f7524bda
WF
217 1000000,
218 10000,
219 10000,
220 1000000,
221 10000,
222};
223
224static char *hw_event_names[] = {
225 "CPU cycles",
226 "instructions",
227 "cache references",
228 "cache misses",
229 "branches",
230 "branch misses",
231 "bus cycles",
232};
233
234static char *sw_event_names[] = {
235 "cpu clock ticks",
236 "task clock ticks",
237 "pagefaults",
238 "context switches",
239 "CPU migrations",
803d4f39
PZ
240 "minor faults",
241 "major faults",
f7524bda
WF
242};
243
244struct event_symbol {
803d4f39 245 __u64 event;
f7524bda
WF
246 char *symbol;
247};
248
249static struct event_symbol event_symbols[] = {
803d4f39
PZ
250 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
251 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
252 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
253 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
254 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
255 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
256 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
257 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
258 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
259
260 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
261 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
262 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
263 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
264 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
265 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
266 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
267 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
268 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
269 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
f7524bda
WF
270};
271
803d4f39
PZ
272#define __PERF_COUNTER_FIELD(config, name) \
273 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
274
275#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
276#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
277#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
278#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
279
f7524bda
WF
280static void display_events_help(void)
281{
282 unsigned int i;
803d4f39 283 __u64 e;
f7524bda
WF
284
285 printf(
286 " -e EVENT --event=EVENT # symbolic-name abbreviations");
287
803d4f39
PZ
288 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
289 int type, id;
290
291 e = event_symbols[i].event;
292 type = PERF_COUNTER_TYPE(e);
293 id = PERF_COUNTER_ID(e);
294
295 printf("\n %d:%d: %-20s",
296 type, id, event_symbols[i].symbol);
f7524bda
WF
297 }
298
299 printf("\n"
300 " rNNN: raw PMU events (eventsel+umask)\n\n");
301}
302
303static void display_perfstat_help(void)
304{
305 printf(
306 "Usage: perfstat [<events...>] <cmd...>\n\n"
307 "PerfStat Options (up to %d event types can be specified):\n\n",
308 MAX_COUNTERS);
309
310 display_events_help();
311
312 printf(
31f004df 313 " -l # scale counter values\n"
f7524bda
WF
314 " -a # system-wide collection\n");
315 exit(0);
316}
e0143bad
IM
317
318static void display_help(void)
319{
f7524bda
WF
320 if (run_perfstat)
321 return display_perfstat_help();
322
e0143bad 323 printf(
f7524bda
WF
324 "Usage: kerneltop [<options>]\n"
325 " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
e0143bad
IM
326 "KernelTop Options (up to %d event types can be specified at once):\n\n",
327 MAX_COUNTERS);
f7524bda
WF
328
329 display_events_help();
330
e0143bad 331 printf(
f7524bda
WF
332 " -S --stat # perfstat COMMAND\n"
333 " -a # system-wide collection (for perfstat)\n\n"
e0143bad
IM
334 " -c CNT --count=CNT # event period to sample\n\n"
335 " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
336 " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
31f004df 337 " -l # show scale factor for RR events\n"
e0143bad 338 " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
f7524bda 339 " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
9dd49988 340 " -r prio --realtime=<prio> # event acquisition runs with SCHED_FIFO policy\n"
e0143bad 341 " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
f7524bda 342 " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
e0143bad
IM
343 " -z --zero # zero counts after display\n"
344 " -D --dump_symtab # dump symbol table to stderr on startup\n"
4c4ba21d 345 " -m pages --mmap_pages=<pages> # number of mmap data pages\n"
3c1ba6fa
PZ
346 " -M --mmap_info # print mmap info stream\n"
347 " -U --munmap_info # print munmap info stream\n"
f7524bda 348 );
e0143bad
IM
349
350 exit(0);
351}
352
f7524bda
WF
353static char *event_name(int ctr)
354{
803d4f39
PZ
355 __u64 config = event_id[ctr];
356 int type = PERF_COUNTER_TYPE(config);
357 int id = PERF_COUNTER_ID(config);
f7524bda 358 static char buf[32];
e0143bad 359
803d4f39
PZ
360 if (PERF_COUNTER_RAW(config)) {
361 sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
f7524bda
WF
362 return buf;
363 }
e0143bad 364
803d4f39
PZ
365 switch (type) {
366 case PERF_TYPE_HARDWARE:
367 if (id < PERF_HW_EVENTS_MAX)
368 return hw_event_names[id];
369 return "unknown-hardware";
370
371 case PERF_TYPE_SOFTWARE:
372 if (id < PERF_SW_EVENTS_MAX)
373 return sw_event_names[id];
374 return "unknown-software";
f7524bda 375
803d4f39
PZ
376 default:
377 break;
378 }
379
380 return "unknown";
f7524bda
WF
381}
382
383/*
384 * Each event can have multiple symbolic names.
385 * Symbolic names are (almost) exactly matched.
386 */
803d4f39 387static __u64 match_event_symbols(char *str)
f7524bda 388{
803d4f39
PZ
389 __u64 config, id;
390 int type;
f7524bda
WF
391 unsigned int i;
392
803d4f39
PZ
393 if (sscanf(str, "r%llx", &config) == 1)
394 return config | PERF_COUNTER_RAW_MASK;
395
396 if (sscanf(str, "%d:%llu", &type, &id) == 2)
397 return EID(type, id);
f7524bda
WF
398
399 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
400 if (!strncmp(str, event_symbols[i].symbol,
401 strlen(event_symbols[i].symbol)))
402 return event_symbols[i].event;
403 }
404
803d4f39 405 return ~0ULL;
f7524bda
WF
406}
407
408static int parse_events(char *str)
409{
803d4f39 410 __u64 config;
f7524bda
WF
411
412again:
413 if (nr_counters == MAX_COUNTERS)
414 return -1;
415
803d4f39
PZ
416 config = match_event_symbols(str);
417 if (config == ~0ULL)
418 return -1;
f7524bda 419
803d4f39 420 event_id[nr_counters] = config;
f7524bda
WF
421 nr_counters++;
422
423 str = strstr(str, ",");
424 if (str) {
425 str++;
426 goto again;
427 }
428
429 return 0;
430}
431
432
433/*
434 * perfstat
435 */
436
437char fault_here[1000000];
438
439static void create_perfstat_counter(int counter)
440{
441 struct perf_counter_hw_event hw_event;
442
443 memset(&hw_event, 0, sizeof(hw_event));
803d4f39 444 hw_event.config = event_id[counter];
3df70fd6 445 hw_event.record_type = 0;
f7524bda 446 hw_event.nmi = 0;
31f004df
PM
447 if (scale)
448 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
449 PERF_FORMAT_TOTAL_TIME_RUNNING;
f7524bda
WF
450
451 if (system_wide) {
452 int cpu;
453 for (cpu = 0; cpu < nr_cpus; cpu ++) {
454 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
455 if (fd[cpu][counter] < 0) {
456 printf("perfstat error: syscall returned with %d (%s)\n",
457 fd[cpu][counter], strerror(errno));
458 exit(-1);
e0143bad 459 }
f7524bda
WF
460 }
461 } else {
462 hw_event.inherit = 1;
463 hw_event.disabled = 1;
464
465 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
466 if (fd[0][counter] < 0) {
467 printf("perfstat error: syscall returned with %d (%s)\n",
468 fd[0][counter], strerror(errno));
469 exit(-1);
e0143bad
IM
470 }
471 }
f7524bda 472}
e0143bad 473
f7524bda
WF
474int do_perfstat(int argc, char *argv[])
475{
476 unsigned long long t0, t1;
477 int counter;
478 ssize_t res;
479 int status;
480 int pid;
481
482 if (!system_wide)
483 nr_cpus = 1;
484
485 for (counter = 0; counter < nr_counters; counter++)
486 create_perfstat_counter(counter);
487
488 argc -= optind;
489 argv += optind;
490
af9522cf
WF
491 if (!argc)
492 display_help();
493
f7524bda
WF
494 /*
495 * Enable counters and exec the command:
496 */
497 t0 = rdclock();
498 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
499
500 if ((pid = fork()) < 0)
501 perror("failed to fork");
502 if (!pid) {
503 if (execvp(argv[0], argv)) {
504 perror(argv[0]);
505 exit(-1);
506 }
95bb3be1 507 }
f7524bda
WF
508 while (wait(&status) >= 0)
509 ;
510 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
511 t1 = rdclock();
512
513 fflush(stdout);
514
515 fprintf(stderr, "\n");
516 fprintf(stderr, " Performance counter stats for \'%s\':\n",
517 argv[0]);
518 fprintf(stderr, "\n");
e0143bad
IM
519
520 for (counter = 0; counter < nr_counters; counter++) {
31f004df
PM
521 int cpu, nv;
522 __u64 count[3], single_count[3];
523 int scaled;
f7524bda 524
31f004df
PM
525 count[0] = count[1] = count[2] = 0;
526 nv = scale ? 3 : 1;
f7524bda
WF
527 for (cpu = 0; cpu < nr_cpus; cpu ++) {
528 res = read(fd[cpu][counter],
31f004df
PM
529 single_count, nv * sizeof(__u64));
530 assert(res == nv * sizeof(__u64));
531
532 count[0] += single_count[0];
533 if (scale) {
534 count[1] += single_count[1];
535 count[2] += single_count[2];
536 }
537 }
538
539 scaled = 0;
540 if (scale) {
541 if (count[2] == 0) {
542 fprintf(stderr, " %14s %-20s\n",
543 "<not counted>", event_name(counter));
544 continue;
545 }
546 if (count[2] < count[1]) {
547 scaled = 1;
548 count[0] = (unsigned long long)
549 ((double)count[0] * count[1] / count[2] + 0.5);
550 }
f7524bda 551 }
e0143bad 552
cbe46555
PM
553 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
554 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
f7524bda 555
31f004df 556 double msecs = (double)count[0] / 1000000;
f7524bda 557
31f004df 558 fprintf(stderr, " %14.6f %-20s (msecs)",
f7524bda
WF
559 msecs, event_name(counter));
560 } else {
31f004df
PM
561 fprintf(stderr, " %14Ld %-20s (events)",
562 count[0], event_name(counter));
f7524bda 563 }
31f004df
PM
564 if (scaled)
565 fprintf(stderr, " (scaled from %.2f%%)",
566 (double) count[2] / count[1] * 100);
567 fprintf(stderr, "\n");
e0143bad 568 }
f7524bda
WF
569 fprintf(stderr, "\n");
570 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
571 (double)(t1-t0)/1e6);
572 fprintf(stderr, "\n");
573
574 return 0;
e0143bad
IM
575}
576
f7524bda
WF
577/*
578 * Symbols
579 */
580
e0143bad
IM
581static uint64_t min_ip;
582static uint64_t max_ip = -1ll;
583
584struct sym_entry {
585 unsigned long long addr;
586 char *sym;
587 unsigned long count[MAX_COUNTERS];
588 int skip;
cbe46555 589 struct source_line *source;
e0143bad
IM
590};
591
592#define MAX_SYMS 100000
593
594static int sym_table_count;
595
596struct sym_entry *sym_filter_entry;
597
598static struct sym_entry sym_table[MAX_SYMS];
599
600static void show_details(struct sym_entry *sym);
601
602/*
ef45fa9e 603 * Ordering weight: count-1 * count-2 * ... / count-n
e0143bad
IM
604 */
605static double sym_weight(const struct sym_entry *sym)
606{
607 double weight;
608 int counter;
609
610 weight = sym->count[0];
611
612 for (counter = 1; counter < nr_counters-1; counter++)
613 weight *= sym->count[counter];
614
615 weight /= (sym->count[counter] + 1);
616
617 return weight;
618}
619
620static int compare(const void *__sym1, const void *__sym2)
621{
622 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
623
624 return sym_weight(sym1) < sym_weight(sym2);
625}
626
e0143bad
IM
627static long events;
628static long userspace_events;
629static const char CONSOLE_CLEAR[] = "\e[H\e[2J";
630
631static struct sym_entry tmp[MAX_SYMS];
632
633static void print_sym_table(void)
634{
635 int i, printed;
636 int counter;
637 float events_per_sec = events/delay_secs;
638 float kevents_per_sec = (events-userspace_events)/delay_secs;
6278af66 639 float sum_kevents = 0.0;
e0143bad 640
9dd49988 641 events = userspace_events = 0;
e0143bad
IM
642 memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
643 qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
644
6278af66
MG
645 for (i = 0; i < sym_table_count && tmp[i].count[0]; i++)
646 sum_kevents += tmp[i].count[0];
647
e0143bad
IM
648 write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
649
650 printf(
651"------------------------------------------------------------------------------\n");
6278af66 652 printf( " KernelTop:%8.0f irqs/sec kernel:%4.1f%% [%s, ",
e0143bad
IM
653 events_per_sec,
654 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
655 nmi ? "NMI" : "IRQ");
656
657 if (nr_counters == 1)
658 printf("%d ", event_count[0]);
659
660 for (counter = 0; counter < nr_counters; counter++) {
661 if (counter)
662 printf("/");
663
e3908612 664 printf("%s", event_name(counter));
e0143bad
IM
665 }
666
667 printf( "], ");
668
669 if (tid != -1)
670 printf(" (tid: %d", tid);
671 else
672 printf(" (all");
673
674 if (profile_cpu != -1)
675 printf(", cpu: %d)\n", profile_cpu);
676 else {
677 if (tid != -1)
678 printf(")\n");
679 else
680 printf(", %d CPUs)\n", nr_cpus);
681 }
682
683 printf("------------------------------------------------------------------------------\n\n");
684
685 if (nr_counters == 1)
6278af66 686 printf(" events pcnt");
e0143bad 687 else
6278af66 688 printf(" weight events pcnt");
e0143bad
IM
689
690 printf(" RIP kernel function\n"
6278af66 691 " ______ ______ _____ ________________ _______________\n\n"
e0143bad
IM
692 );
693
6278af66
MG
694 for (i = 0, printed = 0; i < sym_table_count; i++) {
695 float pcnt;
e0143bad
IM
696 int count;
697
6278af66
MG
698 if (printed <= 18 && tmp[i].count[0] >= count_filter) {
699 pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents));
700
701 if (nr_counters == 1)
702 printf("%19.2f - %4.1f%% - %016llx : %s\n",
703 sym_weight(tmp + i),
704 pcnt, tmp[i].addr, tmp[i].sym);
705 else
706 printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n",
707 sym_weight(tmp + i),
708 tmp[i].count[0],
709 pcnt, tmp[i].addr, tmp[i].sym);
710 printed++;
e0143bad
IM
711 }
712 /*
713 * Add decay to the counts:
714 */
715 for (count = 0; count < nr_counters; count++)
716 sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
717 }
718
719 if (sym_filter_entry)
720 show_details(sym_filter_entry);
721
e0143bad
IM
722 {
723 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
724
725 if (poll(&stdin_poll, 1, 0) == 1) {
726 printf("key pressed - exiting.\n");
727 exit(0);
728 }
729 }
730}
731
9dd49988
MG
732static void *display_thread(void *arg)
733{
734 printf("KernelTop refresh period: %d seconds\n", delay_secs);
735
736 while (!sleep(delay_secs))
737 print_sym_table();
738
739 return NULL;
740}
741
e0143bad
IM
742static int read_symbol(FILE *in, struct sym_entry *s)
743{
744 static int filter_match = 0;
745 char *sym, stype;
746 char str[500];
747 int rc, pos;
748
749 rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
750 if (rc == EOF)
751 return -1;
752
753 assert(rc == 3);
754
755 /* skip until end of line: */
756 pos = strlen(str);
757 do {
758 rc = fgetc(in);
759 if (rc == '\n' || rc == EOF || pos >= 499)
760 break;
761 str[pos] = rc;
762 pos++;
763 } while (1);
764 str[pos] = 0;
765
766 sym = str;
767
768 /* Filter out known duplicates and non-text symbols. */
769 if (!strcmp(sym, "_text"))
770 return 1;
771 if (!min_ip && !strcmp(sym, "_stext"))
772 return 1;
773 if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
774 return 1;
775 if (stype != 'T' && stype != 't')
776 return 1;
777 if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
778 return 1;
779 if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
780 return 1;
781
782 s->sym = malloc(strlen(str));
783 assert(s->sym);
784
785 strcpy((char *)s->sym, str);
786 s->skip = 0;
787
788 /* Tag events to be skipped. */
789 if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
790 s->skip = 1;
4c4ba21d
PZ
791 else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
792 s->skip = 1;
793 else if (!strcmp("mwait_idle", s->sym))
e0143bad
IM
794 s->skip = 1;
795
796 if (filter_match == 1) {
797 filter_end = s->addr;
798 filter_match = -1;
799 if (filter_end - filter_start > 10000) {
800 printf("hm, too large filter symbol <%s> - skipping.\n",
801 sym_filter);
802 printf("symbol filter start: %016lx\n", filter_start);
803 printf(" end: %016lx\n", filter_end);
804 filter_end = filter_start = 0;
805 sym_filter = NULL;
806 sleep(1);
807 }
808 }
809 if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
810 filter_match = 1;
811 filter_start = s->addr;
812 }
813
814 return 0;
815}
816
817int compare_addr(const void *__sym1, const void *__sym2)
818{
819 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
820
821 return sym1->addr > sym2->addr;
822}
823
824static void sort_symbol_table(void)
825{
826 int i, dups;
827
828 do {
829 qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
830 for (i = 0, dups = 0; i < sym_table_count; i++) {
831 if (sym_table[i].addr == sym_table[i+1].addr) {
832 sym_table[i+1].addr = -1ll;
833 dups++;
834 }
835 }
836 sym_table_count -= dups;
837 } while(dups);
838}
839
840static void parse_symbols(void)
841{
842 struct sym_entry *last;
843
844 FILE *kallsyms = fopen("/proc/kallsyms", "r");
845
846 if (!kallsyms) {
847 printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
848 exit(-1);
849 }
850
851 while (!feof(kallsyms)) {
852 if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
853 sym_table_count++;
854 assert(sym_table_count <= MAX_SYMS);
855 }
856 }
857
858 sort_symbol_table();
859 min_ip = sym_table[0].addr;
860 max_ip = sym_table[sym_table_count-1].addr;
861 last = sym_table + sym_table_count++;
862
863 last->addr = -1ll;
864 last->sym = "<end>";
865
866 if (filter_end) {
867 int count;
868 for (count=0; count < sym_table_count; count ++) {
869 if (!strcmp(sym_table[count].sym, sym_filter)) {
870 sym_filter_entry = &sym_table[count];
871 break;
872 }
873 }
874 }
875 if (dump_symtab) {
876 int i;
877
878 for (i = 0; i < sym_table_count; i++)
879 fprintf(stderr, "%llx %s\n",
880 sym_table[i].addr, sym_table[i].sym);
881 }
882}
883
f7524bda
WF
884/*
885 * Source lines
886 */
e0143bad
IM
887
888static void parse_vmlinux(char *filename)
889{
890 FILE *file;
891 char command[PATH_MAX*2];
892 if (!filename)
893 return;
894
895 sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
896
897 file = popen(command, "r");
898 if (!file)
899 return;
900
cbe46555 901 lines_tail = &lines;
e0143bad
IM
902 while (!feof(file)) {
903 struct source_line *src;
904 size_t dummy = 0;
905 char *c;
906
907 src = malloc(sizeof(struct source_line));
f7524bda 908 assert(src != NULL);
e0143bad
IM
909 memset(src, 0, sizeof(struct source_line));
910
911 if (getline(&src->line, &dummy, file) < 0)
912 break;
913 if (!src->line)
914 break;
915
916 c = strchr(src->line, '\n');
917 if (c)
918 *c = 0;
919
cbe46555
PM
920 src->next = NULL;
921 *lines_tail = src;
922 lines_tail = &src->next;
e0143bad
IM
923
924 if (strlen(src->line)>8 && src->line[8] == ':')
925 src->EIP = strtoull(src->line, NULL, 16);
926 if (strlen(src->line)>8 && src->line[16] == ':')
927 src->EIP = strtoull(src->line, NULL, 16);
928 }
929 pclose(file);
e0143bad
IM
930}
931
932static void record_precise_ip(uint64_t ip)
933{
934 struct source_line *line;
e0143bad 935
cbe46555 936 for (line = lines; line; line = line->next) {
e0143bad
IM
937 if (line->EIP == ip)
938 line->count++;
939 if (line->EIP > ip)
940 break;
e0143bad
IM
941 }
942}
943
944static void lookup_sym_in_vmlinux(struct sym_entry *sym)
945{
946 struct source_line *line;
e0143bad
IM
947 char pattern[PATH_MAX];
948 sprintf(pattern, "<%s>:", sym->sym);
949
cbe46555 950 for (line = lines; line; line = line->next) {
e0143bad 951 if (strstr(line->line, pattern)) {
cbe46555 952 sym->source = line;
e0143bad
IM
953 break;
954 }
e0143bad
IM
955 }
956}
957
cbe46555 958static void show_lines(struct source_line *line_queue, int line_queue_count)
e0143bad
IM
959{
960 int i;
961 struct source_line *line;
962
cbe46555
PM
963 line = line_queue;
964 for (i = 0; i < line_queue_count; i++) {
e0143bad 965 printf("%8li\t%s\n", line->count, line->line);
cbe46555 966 line = line->next;
e0143bad
IM
967 }
968}
969
970#define TRACE_COUNT 3
971
972static void show_details(struct sym_entry *sym)
973{
974 struct source_line *line;
cbe46555 975 struct source_line *line_queue = NULL;
e0143bad 976 int displayed = 0;
cbe46555 977 int line_queue_count = 0;
e0143bad
IM
978
979 if (!sym->source)
980 lookup_sym_in_vmlinux(sym);
981 if (!sym->source)
982 return;
983
984 printf("Showing details for %s\n", sym->sym);
985
cbe46555
PM
986 line = sym->source;
987 while (line) {
e0143bad
IM
988 if (displayed && strstr(line->line, ">:"))
989 break;
990
cbe46555
PM
991 if (!line_queue_count)
992 line_queue = line;
993 line_queue_count ++;
e0143bad
IM
994
995 if (line->count >= count_filter) {
cbe46555
PM
996 show_lines(line_queue, line_queue_count);
997 line_queue_count = 0;
998 line_queue = NULL;
999 } else if (line_queue_count > TRACE_COUNT) {
1000 line_queue = line_queue->next;
1001 line_queue_count --;
e0143bad
IM
1002 }
1003
1004 line->count = 0;
1005 displayed++;
1006 if (displayed > 300)
1007 break;
cbe46555 1008 line = line->next;
e0143bad
IM
1009 }
1010}
1011
1012/*
1013 * Binary search in the histogram table and record the hit:
1014 */
1015static void record_ip(uint64_t ip, int counter)
1016{
1017 int left_idx, middle_idx, right_idx, idx;
1018 unsigned long left, middle, right;
1019
1020 record_precise_ip(ip);
1021
1022 left_idx = 0;
1023 right_idx = sym_table_count-1;
1024 assert(ip <= max_ip && ip >= min_ip);
1025
1026 while (left_idx + 1 < right_idx) {
1027 middle_idx = (left_idx + right_idx) / 2;
1028
1029 left = sym_table[ left_idx].addr;
1030 middle = sym_table[middle_idx].addr;
1031 right = sym_table[ right_idx].addr;
1032
1033 if (!(left <= middle && middle <= right)) {
1034 printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
1035 printf("%d %d %d\n", left_idx, middle_idx, right_idx);
1036 }
1037 assert(left <= middle && middle <= right);
1038 if (!(left <= ip && ip <= right)) {
1039 printf(" left: %016lx\n", left);
193e8df1 1040 printf(" ip: %016lx\n", (unsigned long)ip);
e0143bad
IM
1041 printf("right: %016lx\n", right);
1042 }
1043 assert(left <= ip && ip <= right);
1044 /*
1045 * [ left .... target .... middle .... right ]
1046 * => right := middle
1047 */
1048 if (ip < middle) {
1049 right_idx = middle_idx;
1050 continue;
1051 }
1052 /*
1053 * [ left .... middle ... target ... right ]
1054 * => left := middle
1055 */
1056 left_idx = middle_idx;
1057 }
1058
1059 idx = left_idx;
1060
1061 if (!sym_table[idx].skip)
1062 sym_table[idx].count[counter]++;
1063 else events--;
1064}
1065
1066static void process_event(uint64_t ip, int counter)
1067{
1068 events++;
1069
1070 if (ip < min_ip || ip > max_ip) {
1071 userspace_events++;
1072 return;
1073 }
1074
1075 record_ip(ip, counter);
1076}
1077
f7524bda
WF
1078static void process_options(int argc, char *argv[])
1079{
1080 int error = 0, counter;
1081
1082 if (strstr(argv[0], "perfstat"))
1083 run_perfstat = 1;
1084
1085 for (;;) {
1086 int option_index = 0;
1087 /** Options for getopt */
1088 static struct option long_options[] = {
1089 {"count", required_argument, NULL, 'c'},
1090 {"cpu", required_argument, NULL, 'C'},
1091 {"delay", required_argument, NULL, 'd'},
1092 {"dump_symtab", no_argument, NULL, 'D'},
1093 {"event", required_argument, NULL, 'e'},
1094 {"filter", required_argument, NULL, 'f'},
1095 {"group", required_argument, NULL, 'g'},
1096 {"help", no_argument, NULL, 'h'},
1097 {"nmi", required_argument, NULL, 'n'},
9dd49988
MG
1098 {"mmap_info", no_argument, NULL, 'M'},
1099 {"mmap_pages", required_argument, NULL, 'm'},
1100 {"munmap_info", no_argument, NULL, 'U'},
f7524bda 1101 {"pid", required_argument, NULL, 'p'},
9dd49988
MG
1102 {"realtime", required_argument, NULL, 'r'},
1103 {"scale", no_argument, NULL, 'l'},
f7524bda
WF
1104 {"symbol", required_argument, NULL, 's'},
1105 {"stat", no_argument, NULL, 'S'},
9dd49988 1106 {"vmlinux", required_argument, NULL, 'x'},
f7524bda
WF
1107 {"zero", no_argument, NULL, 'z'},
1108 {NULL, 0, NULL, 0 }
1109 };
9dd49988 1110 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
f7524bda
WF
1111 long_options, &option_index);
1112 if (c == -1)
1113 break;
1114
1115 switch (c) {
1116 case 'a': system_wide = 1; break;
803d4f39 1117 case 'c': default_interval = atoi(optarg); break;
f7524bda
WF
1118 case 'C':
1119 /* CPU and PID are mutually exclusive */
1120 if (tid != -1) {
1121 printf("WARNING: CPU switch overriding PID\n");
1122 sleep(1);
1123 tid = -1;
1124 }
1125 profile_cpu = atoi(optarg); break;
1126 case 'd': delay_secs = atoi(optarg); break;
1127 case 'D': dump_symtab = 1; break;
1128
1129 case 'e': error = parse_events(optarg); break;
1130
1131 case 'f': count_filter = atoi(optarg); break;
1132 case 'g': group = atoi(optarg); break;
1133 case 'h': display_help(); break;
31f004df 1134 case 'l': scale = 1; break;
f7524bda
WF
1135 case 'n': nmi = atoi(optarg); break;
1136 case 'p':
1137 /* CPU and PID are mutually exclusive */
1138 if (profile_cpu != -1) {
1139 printf("WARNING: PID switch overriding CPU\n");
1140 sleep(1);
1141 profile_cpu = -1;
1142 }
1143 tid = atoi(optarg); break;
9dd49988 1144 case 'r': realtime_prio = atoi(optarg); break;
f7524bda
WF
1145 case 's': sym_filter = strdup(optarg); break;
1146 case 'S': run_perfstat = 1; break;
1147 case 'x': vmlinux = strdup(optarg); break;
1148 case 'z': zero = 1; break;
4c4ba21d 1149 case 'm': mmap_pages = atoi(optarg); break;
3c1ba6fa
PZ
1150 case 'M': use_mmap = 1; break;
1151 case 'U': use_munmap = 1; break;
f7524bda
WF
1152 default: error = 1; break;
1153 }
1154 }
1155 if (error)
1156 display_help();
1157
1158 if (!nr_counters) {
1159 if (run_perfstat)
1160 nr_counters = 8;
1161 else {
1162 nr_counters = 1;
1163 event_id[0] = 0;
1164 }
1165 }
1166
1167 for (counter = 0; counter < nr_counters; counter++) {
1168 if (event_count[counter])
1169 continue;
1170
803d4f39 1171 event_count[counter] = default_interval;
f7524bda
WF
1172 }
1173}
1174
bcbcb37c
PZ
1175struct mmap_data {
1176 int counter;
1177 void *base;
1178 unsigned int mask;
1179 unsigned int prev;
1180};
1181
1182static unsigned int mmap_read_head(struct mmap_data *md)
1183{
1184 struct perf_counter_mmap_page *pc = md->base;
19556439 1185 int head;
bcbcb37c
PZ
1186
1187 head = pc->data_head;
bcbcb37c 1188 rmb();
bcbcb37c
PZ
1189
1190 return head;
1191}
1192
4c4ba21d
PZ
1193struct timeval last_read, this_read;
1194
bcbcb37c
PZ
1195static void mmap_read(struct mmap_data *md)
1196{
1197 unsigned int head = mmap_read_head(md);
1198 unsigned int old = md->prev;
1199 unsigned char *data = md->base + page_size;
00f0ad73 1200 int diff;
bcbcb37c 1201
4c4ba21d
PZ
1202 gettimeofday(&this_read, NULL);
1203
00f0ad73
PZ
1204 /*
1205 * If we're further behind than half the buffer, there's a chance
1206 * the writer will bite our tail and screw up the events under us.
1207 *
1208 * If we somehow ended up ahead of the head, we got messed up.
1209 *
1210 * In either case, truncate and restart at head.
1211 */
1212 diff = head - old;
1213 if (diff > md->mask / 2 || diff < 0) {
4c4ba21d
PZ
1214 struct timeval iv;
1215 unsigned long msecs;
1216
1217 timersub(&this_read, &last_read, &iv);
1218 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
1219
00f0ad73
PZ
1220 fprintf(stderr, "WARNING: failed to keep up with mmap data."
1221 " Last read %lu msecs ago.\n", msecs);
4c4ba21d 1222
00f0ad73
PZ
1223 /*
1224 * head points to a known good entry, start there.
1225 */
4c4ba21d 1226 old = head;
bcbcb37c
PZ
1227 }
1228
4c4ba21d
PZ
1229 last_read = this_read;
1230
bcbcb37c 1231 for (; old != head;) {
3c1ba6fa 1232 struct ip_event {
00f0ad73
PZ
1233 struct perf_event_header header;
1234 __u64 ip;
1235 __u32 pid, tid;
3c1ba6fa
PZ
1236 };
1237 struct mmap_event {
1238 struct perf_event_header header;
1239 __u32 pid, tid;
1240 __u64 start;
1241 __u64 len;
1242 __u64 pgoff;
1243 char filename[PATH_MAX];
1244 };
1245
1246 typedef union event_union {
1247 struct perf_event_header header;
1248 struct ip_event ip;
1249 struct mmap_event mmap;
1250 } event_t;
1251
1252 event_t *event = (event_t *)&data[old & md->mask];
1253
1254 event_t event_copy;
00f0ad73
PZ
1255
1256 unsigned int size = event->header.size;
1257
1258 /*
1259 * Event straddles the mmap boundary -- header should always
1260 * be inside due to u64 alignment of output.
1261 */
1262 if ((old & md->mask) + size != ((old + size) & md->mask)) {
1263 unsigned int offset = old;
3c1ba6fa 1264 unsigned int len = min(sizeof(*event), size), cpy;
00f0ad73
PZ
1265 void *dst = &event_copy;
1266
1267 do {
1268 cpy = min(md->mask + 1 - (offset & md->mask), len);
1269 memcpy(dst, &data[offset & md->mask], cpy);
1270 offset += cpy;
1271 dst += cpy;
1272 len -= cpy;
1273 } while (len);
1274
1275 event = &event_copy;
1276 }
bcbcb37c 1277
00f0ad73
PZ
1278 old += size;
1279
808382b3
PZ
1280 if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
1281 if (event->header.type & PERF_RECORD_IP)
1282 process_event(event->ip.ip, md->counter);
1283 } else {
1284 switch (event->header.type) {
1285 case PERF_EVENT_MMAP:
1286 case PERF_EVENT_MUNMAP:
1287 printf("%s: %Lu %Lu %Lu %s\n",
1288 event->header.type == PERF_EVENT_MMAP
1289 ? "mmap" : "munmap",
1290 event->mmap.start,
1291 event->mmap.len,
1292 event->mmap.pgoff,
1293 event->mmap.filename);
1294 break;
1295 }
00f0ad73 1296 }
bcbcb37c
PZ
1297 }
1298
1299 md->prev = old;
1300}
1301
e0143bad
IM
1302int main(int argc, char *argv[])
1303{
0fd112e4 1304 struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
bcbcb37c 1305 struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
e0143bad 1306 struct perf_counter_hw_event hw_event;
9dd49988 1307 pthread_t thread;
0fd112e4 1308 int i, counter, group_fd, nr_poll = 0;
e0143bad 1309 unsigned int cpu;
e0143bad
IM
1310 int ret;
1311
bcbcb37c
PZ
1312 page_size = sysconf(_SC_PAGE_SIZE);
1313
e0143bad
IM
1314 process_options(argc, argv);
1315
1316 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
f7524bda
WF
1317 assert(nr_cpus <= MAX_NR_CPUS);
1318 assert(nr_cpus >= 0);
1319
1320 if (run_perfstat)
1321 return do_perfstat(argc, argv);
1322
e0143bad
IM
1323 if (tid != -1 || profile_cpu != -1)
1324 nr_cpus = 1;
1325
cbe46555
PM
1326 parse_symbols();
1327 if (vmlinux && sym_filter_entry)
1328 parse_vmlinux(vmlinux);
1329
e0143bad
IM
1330 for (i = 0; i < nr_cpus; i++) {
1331 group_fd = -1;
1332 for (counter = 0; counter < nr_counters; counter++) {
1333
1334 cpu = profile_cpu;
1335 if (tid == -1 && profile_cpu == -1)
1336 cpu = i;
1337
1338 memset(&hw_event, 0, sizeof(hw_event));
803d4f39 1339 hw_event.config = event_id[counter];
e0143bad 1340 hw_event.irq_period = event_count[counter];
3df70fd6 1341 hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID;
e0143bad 1342 hw_event.nmi = nmi;
3c1ba6fa
PZ
1343 hw_event.mmap = use_mmap;
1344 hw_event.munmap = use_munmap;
e0143bad
IM
1345
1346 fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
e0143bad 1347 if (fd[i][counter] < 0) {
cbe46555 1348 int err = errno;
e0143bad 1349 printf("kerneltop error: syscall returned with %d (%s)\n",
cbe46555
PM
1350 fd[i][counter], strerror(err));
1351 if (err == EPERM)
e0143bad
IM
1352 printf("Are you root?\n");
1353 exit(-1);
1354 }
1355 assert(fd[i][counter] >= 0);
cbe46555 1356 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
e0143bad
IM
1357
1358 /*
1359 * First counter acts as the group leader:
1360 */
1361 if (group && group_fd == -1)
1362 group_fd = fd[i][counter];
1363
0fd112e4
PZ
1364 event_array[nr_poll].fd = fd[i][counter];
1365 event_array[nr_poll].events = POLLIN;
1366 nr_poll++;
bcbcb37c
PZ
1367
1368 mmap_array[i][counter].counter = counter;
1369 mmap_array[i][counter].prev = 0;
4c4ba21d
PZ
1370 mmap_array[i][counter].mask = mmap_pages*page_size - 1;
1371 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
bcbcb37c
PZ
1372 PROT_READ, MAP_SHARED, fd[i][counter], 0);
1373 if (mmap_array[i][counter].base == MAP_FAILED) {
1374 printf("kerneltop error: failed to mmap with %d (%s)\n",
1375 errno, strerror(errno));
1376 exit(-1);
1377 }
e0143bad
IM
1378 }
1379 }
1380
9dd49988
MG
1381 if (pthread_create(&thread, NULL, display_thread, NULL)) {
1382 printf("Could not create display thread.\n");
1383 exit(-1);
1384 }
1385
1386 if (realtime_prio) {
1387 struct sched_param param;
1388
1389 param.sched_priority = realtime_prio;
1390 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1391 printf("Could not set realtime priority.\n");
1392 exit(-1);
1393 }
1394 }
e0143bad
IM
1395
1396 while (1) {
1397 int hits = events;
1398
1399 for (i = 0; i < nr_cpus; i++) {
bcbcb37c
PZ
1400 for (counter = 0; counter < nr_counters; counter++)
1401 mmap_read(&mmap_array[i][counter]);
e0143bad
IM
1402 }
1403
e0143bad 1404 if (hits == events)
9dd49988 1405 ret = poll(event_array, nr_poll, 100);
e0143bad
IM
1406 }
1407
1408 return 0;
1409}
This page took 0.119315 seconds and 5 git commands to generate.