Commit | Line | Data |
---|---|---|
e0143bad IM |
1 | /* |
2 | * kerneltop.c: show top kernel functions - performance counters showcase | |
3 | ||
4 | Build with: | |
5 | ||
cbe46555 | 6 | cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt |
e0143bad IM |
7 | |
8 | Sample output: | |
9 | ||
10 | ------------------------------------------------------------------------------ | |
11 | KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2) | |
12 | ------------------------------------------------------------------------------ | |
13 | ||
14 | weight RIP kernel function | |
15 | ______ ________________ _______________ | |
16 | ||
17 | 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev | |
18 | 33.00 - ffffffff804cb740 : sock_alloc_send_skb | |
19 | 31.26 - ffffffff804ce808 : skb_push | |
20 | 22.43 - ffffffff80510004 : tcp_established_options | |
21 | 19.00 - ffffffff8027d250 : find_get_page | |
22 | 15.76 - ffffffff804e4fc9 : eth_type_trans | |
23 | 15.20 - ffffffff804d8baa : dst_release | |
24 | 14.86 - ffffffff804cf5d8 : skb_release_head_state | |
25 | 14.00 - ffffffff802217d5 : read_hpet | |
26 | 12.00 - ffffffff804ffb7f : __ip_local_out | |
27 | 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish | |
28 | 8.54 - ffffffff805001a3 : ip_queue_xmit | |
f7524bda | 29 | */ |
e0143bad | 30 | |
f7524bda WF |
31 | /* |
32 | * perfstat: /usr/bin/time -alike performance counter statistics utility | |
e0143bad | 33 | |
f7524bda WF |
34 | It summarizes the counter events of all tasks (and child tasks), |
35 | covering all CPUs that the command (or workload) executes on. | |
36 | It only counts the per-task events of the workload started, | |
37 | independent of how many other tasks run on those CPUs. | |
e0143bad | 38 | |
f7524bda | 39 | Sample output: |
e0143bad | 40 | |
f7524bda | 41 | $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null |
e0143bad | 42 | |
f7524bda WF |
43 | Performance counter stats for 'ls': |
44 | ||
45 | 163516953 instructions | |
46 | 2295 cache-misses | |
47 | 2855182 branch-misses | |
e0143bad | 48 | */ |
f7524bda WF |
49 | |
50 | /* | |
51 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | |
52 | * | |
53 | * Improvements and fixes by: | |
54 | * | |
55 | * Arjan van de Ven <arjan@linux.intel.com> | |
56 | * Yanmin Zhang <yanmin.zhang@intel.com> | |
57 | * Wu Fengguang <fengguang.wu@intel.com> | |
58 | * Mike Galbraith <efault@gmx.de> | |
cbe46555 | 59 | * Paul Mackerras <paulus@samba.org> |
f7524bda WF |
60 | * |
61 | * Released under the GPL v2. (and only v2, not any later version) | |
62 | */ | |
63 | ||
e0143bad IM |
64 | #define _GNU_SOURCE |
65 | #include <sys/types.h> | |
66 | #include <sys/stat.h> | |
67 | #include <sys/time.h> | |
68 | #include <unistd.h> | |
69 | #include <stdint.h> | |
70 | #include <stdlib.h> | |
71 | #include <string.h> | |
cbe46555 | 72 | #include <limits.h> |
e0143bad IM |
73 | #include <getopt.h> |
74 | #include <assert.h> | |
75 | #include <fcntl.h> | |
76 | #include <stdio.h> | |
77 | #include <errno.h> | |
78 | #include <ctype.h> | |
79 | #include <time.h> | |
9dd49988 MG |
80 | #include <sched.h> |
81 | #include <pthread.h> | |
e0143bad | 82 | |
e0143bad IM |
83 | #include <sys/syscall.h> |
84 | #include <sys/ioctl.h> | |
85 | #include <sys/poll.h> | |
86 | #include <sys/prctl.h> | |
87 | #include <sys/wait.h> | |
88 | #include <sys/uio.h> | |
bcbcb37c | 89 | #include <sys/mman.h> |
e0143bad IM |
90 | |
91 | #include <linux/unistd.h> | |
cbe46555 | 92 | #include <linux/types.h> |
e0143bad | 93 | |
383c5f8c | 94 | #include "../../include/linux/perf_counter.h" |
e0143bad | 95 | |
e0143bad | 96 | |
803d4f39 PZ |
97 | /* |
98 | * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all | |
99 | * counters in the current task. | |
100 | */ | |
101 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | |
102 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | |
103 | ||
104 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | |
105 | ||
106 | #define rdclock() \ | |
107 | ({ \ | |
108 | struct timespec ts; \ | |
109 | \ | |
110 | clock_gettime(CLOCK_MONOTONIC, &ts); \ | |
111 | ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ | |
112 | }) | |
113 | ||
114 | /* | |
115 | * Pick up some kernel type conventions: | |
116 | */ | |
117 | #define __user | |
118 | #define asmlinkage | |
119 | ||
803d4f39 | 120 | #ifdef __x86_64__ |
bcbcb37c PZ |
121 | #define __NR_perf_counter_open 295 |
122 | #define rmb() asm volatile("lfence" ::: "memory") | |
123 | #define cpu_relax() asm volatile("rep; nop" ::: "memory"); | |
803d4f39 PZ |
124 | #endif |
125 | ||
126 | #ifdef __i386__ | |
bcbcb37c PZ |
127 | #define __NR_perf_counter_open 333 |
128 | #define rmb() asm volatile("lfence" ::: "memory") | |
129 | #define cpu_relax() asm volatile("rep; nop" ::: "memory"); | |
803d4f39 PZ |
130 | #endif |
131 | ||
132 | #ifdef __powerpc__ | |
133 | #define __NR_perf_counter_open 319 | |
bcbcb37c PZ |
134 | #define rmb() asm volatile ("sync" ::: "memory") |
135 | #define cpu_relax() asm volatile ("" ::: "memory"); | |
803d4f39 PZ |
136 | #endif |
137 | ||
bcbcb37c | 138 | #define unlikely(x) __builtin_expect(!!(x), 0) |
00f0ad73 PZ |
139 | #define min(x, y) ({ \ |
140 | typeof(x) _min1 = (x); \ | |
141 | typeof(y) _min2 = (y); \ | |
142 | (void) (&_min1 == &_min2); \ | |
143 | _min1 < _min2 ? _min1 : _min2; }) | |
bcbcb37c | 144 | |
803d4f39 PZ |
145 | asmlinkage int sys_perf_counter_open( |
146 | struct perf_counter_hw_event *hw_event_uptr __user, | |
147 | pid_t pid, | |
148 | int cpu, | |
149 | int group_fd, | |
150 | unsigned long flags) | |
151 | { | |
cbe46555 | 152 | return syscall( |
803d4f39 | 153 | __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); |
803d4f39 PZ |
154 | } |
155 | ||
f7524bda WF |
156 | #define MAX_COUNTERS 64 |
157 | #define MAX_NR_CPUS 256 | |
158 | ||
803d4f39 | 159 | #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) |
f7524bda WF |
160 | |
161 | static int run_perfstat = 0; | |
162 | static int system_wide = 0; | |
e0143bad | 163 | |
f7524bda | 164 | static int nr_counters = 0; |
803d4f39 PZ |
165 | static __u64 event_id[MAX_COUNTERS] = { |
166 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), | |
cbe46555 | 167 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), |
803d4f39 PZ |
168 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), |
169 | EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), | |
170 | ||
171 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), | |
172 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), | |
173 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), | |
174 | EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), | |
175 | }; | |
176 | static int default_interval = 100000; | |
e0143bad | 177 | static int event_count[MAX_COUNTERS]; |
f7524bda WF |
178 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; |
179 | ||
180 | static __u64 count_filter = 100; | |
e0143bad IM |
181 | |
182 | static int tid = -1; | |
183 | static int profile_cpu = -1; | |
184 | static int nr_cpus = 0; | |
185 | static int nmi = 1; | |
9dd49988 | 186 | static unsigned int realtime_prio = 0; |
e0143bad | 187 | static int group = 0; |
bcbcb37c | 188 | static unsigned int page_size; |
00f0ad73 | 189 | static unsigned int mmap_pages = 16; |
3c1ba6fa PZ |
190 | static int use_mmap = 0; |
191 | static int use_munmap = 0; | |
e0143bad IM |
192 | |
193 | static char *vmlinux; | |
194 | ||
195 | static char *sym_filter; | |
196 | static unsigned long filter_start; | |
197 | static unsigned long filter_end; | |
198 | ||
199 | static int delay_secs = 2; | |
200 | static int zero; | |
201 | static int dump_symtab; | |
202 | ||
31f004df PM |
203 | static int scale; |
204 | ||
e0143bad IM |
205 | struct source_line { |
206 | uint64_t EIP; | |
207 | unsigned long count; | |
208 | char *line; | |
cbe46555 | 209 | struct source_line *next; |
e0143bad IM |
210 | }; |
211 | ||
cbe46555 PM |
212 | static struct source_line *lines; |
213 | static struct source_line **lines_tail; | |
f7524bda WF |
214 | |
215 | const unsigned int default_count[] = { | |
81cdbe05 | 216 | 1000000, |
f7524bda WF |
217 | 1000000, |
218 | 10000, | |
219 | 10000, | |
220 | 1000000, | |
221 | 10000, | |
222 | }; | |
223 | ||
224 | static char *hw_event_names[] = { | |
225 | "CPU cycles", | |
226 | "instructions", | |
227 | "cache references", | |
228 | "cache misses", | |
229 | "branches", | |
230 | "branch misses", | |
231 | "bus cycles", | |
232 | }; | |
233 | ||
234 | static char *sw_event_names[] = { | |
235 | "cpu clock ticks", | |
236 | "task clock ticks", | |
237 | "pagefaults", | |
238 | "context switches", | |
239 | "CPU migrations", | |
803d4f39 PZ |
240 | "minor faults", |
241 | "major faults", | |
f7524bda WF |
242 | }; |
243 | ||
244 | struct event_symbol { | |
803d4f39 | 245 | __u64 event; |
f7524bda WF |
246 | char *symbol; |
247 | }; | |
248 | ||
249 | static struct event_symbol event_symbols[] = { | |
803d4f39 PZ |
250 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, |
251 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, | |
252 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, | |
253 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, | |
254 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, | |
255 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, | |
256 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, | |
257 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, | |
258 | {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, | |
259 | ||
260 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, | |
261 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, | |
262 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, | |
263 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, | |
264 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, | |
265 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, | |
266 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, | |
267 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, | |
268 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, | |
269 | {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, | |
f7524bda WF |
270 | }; |
271 | ||
803d4f39 PZ |
272 | #define __PERF_COUNTER_FIELD(config, name) \ |
273 | ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) | |
274 | ||
275 | #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) | |
276 | #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) | |
277 | #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) | |
278 | #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) | |
279 | ||
f7524bda WF |
280 | static void display_events_help(void) |
281 | { | |
282 | unsigned int i; | |
803d4f39 | 283 | __u64 e; |
f7524bda WF |
284 | |
285 | printf( | |
286 | " -e EVENT --event=EVENT # symbolic-name abbreviations"); | |
287 | ||
803d4f39 PZ |
288 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { |
289 | int type, id; | |
290 | ||
291 | e = event_symbols[i].event; | |
292 | type = PERF_COUNTER_TYPE(e); | |
293 | id = PERF_COUNTER_ID(e); | |
294 | ||
295 | printf("\n %d:%d: %-20s", | |
296 | type, id, event_symbols[i].symbol); | |
f7524bda WF |
297 | } |
298 | ||
299 | printf("\n" | |
300 | " rNNN: raw PMU events (eventsel+umask)\n\n"); | |
301 | } | |
302 | ||
303 | static void display_perfstat_help(void) | |
304 | { | |
305 | printf( | |
306 | "Usage: perfstat [<events...>] <cmd...>\n\n" | |
307 | "PerfStat Options (up to %d event types can be specified):\n\n", | |
308 | MAX_COUNTERS); | |
309 | ||
310 | display_events_help(); | |
311 | ||
312 | printf( | |
31f004df | 313 | " -l # scale counter values\n" |
f7524bda WF |
314 | " -a # system-wide collection\n"); |
315 | exit(0); | |
316 | } | |
e0143bad IM |
317 | |
318 | static void display_help(void) | |
319 | { | |
f7524bda WF |
320 | if (run_perfstat) |
321 | return display_perfstat_help(); | |
322 | ||
e0143bad | 323 | printf( |
f7524bda WF |
324 | "Usage: kerneltop [<options>]\n" |
325 | " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n" | |
e0143bad IM |
326 | "KernelTop Options (up to %d event types can be specified at once):\n\n", |
327 | MAX_COUNTERS); | |
f7524bda WF |
328 | |
329 | display_events_help(); | |
330 | ||
e0143bad | 331 | printf( |
f7524bda WF |
332 | " -S --stat # perfstat COMMAND\n" |
333 | " -a # system-wide collection (for perfstat)\n\n" | |
e0143bad IM |
334 | " -c CNT --count=CNT # event period to sample\n\n" |
335 | " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" | |
336 | " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" | |
31f004df | 337 | " -l # show scale factor for RR events\n" |
e0143bad | 338 | " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n" |
f7524bda | 339 | " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" |
9dd49988 | 340 | " -r prio --realtime=<prio> # event acquisition runs with SCHED_FIFO policy\n" |
e0143bad | 341 | " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n" |
f7524bda | 342 | " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n" |
e0143bad IM |
343 | " -z --zero # zero counts after display\n" |
344 | " -D --dump_symtab # dump symbol table to stderr on startup\n" | |
4c4ba21d | 345 | " -m pages --mmap_pages=<pages> # number of mmap data pages\n" |
3c1ba6fa PZ |
346 | " -M --mmap_info # print mmap info stream\n" |
347 | " -U --munmap_info # print munmap info stream\n" | |
f7524bda | 348 | ); |
e0143bad IM |
349 | |
350 | exit(0); | |
351 | } | |
352 | ||
f7524bda WF |
353 | static char *event_name(int ctr) |
354 | { | |
803d4f39 PZ |
355 | __u64 config = event_id[ctr]; |
356 | int type = PERF_COUNTER_TYPE(config); | |
357 | int id = PERF_COUNTER_ID(config); | |
f7524bda | 358 | static char buf[32]; |
e0143bad | 359 | |
803d4f39 PZ |
360 | if (PERF_COUNTER_RAW(config)) { |
361 | sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config)); | |
f7524bda WF |
362 | return buf; |
363 | } | |
e0143bad | 364 | |
803d4f39 PZ |
365 | switch (type) { |
366 | case PERF_TYPE_HARDWARE: | |
367 | if (id < PERF_HW_EVENTS_MAX) | |
368 | return hw_event_names[id]; | |
369 | return "unknown-hardware"; | |
370 | ||
371 | case PERF_TYPE_SOFTWARE: | |
372 | if (id < PERF_SW_EVENTS_MAX) | |
373 | return sw_event_names[id]; | |
374 | return "unknown-software"; | |
f7524bda | 375 | |
803d4f39 PZ |
376 | default: |
377 | break; | |
378 | } | |
379 | ||
380 | return "unknown"; | |
f7524bda WF |
381 | } |
382 | ||
383 | /* | |
384 | * Each event can have multiple symbolic names. | |
385 | * Symbolic names are (almost) exactly matched. | |
386 | */ | |
803d4f39 | 387 | static __u64 match_event_symbols(char *str) |
f7524bda | 388 | { |
803d4f39 PZ |
389 | __u64 config, id; |
390 | int type; | |
f7524bda WF |
391 | unsigned int i; |
392 | ||
803d4f39 PZ |
393 | if (sscanf(str, "r%llx", &config) == 1) |
394 | return config | PERF_COUNTER_RAW_MASK; | |
395 | ||
396 | if (sscanf(str, "%d:%llu", &type, &id) == 2) | |
397 | return EID(type, id); | |
f7524bda WF |
398 | |
399 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | |
400 | if (!strncmp(str, event_symbols[i].symbol, | |
401 | strlen(event_symbols[i].symbol))) | |
402 | return event_symbols[i].event; | |
403 | } | |
404 | ||
803d4f39 | 405 | return ~0ULL; |
f7524bda WF |
406 | } |
407 | ||
408 | static int parse_events(char *str) | |
409 | { | |
803d4f39 | 410 | __u64 config; |
f7524bda WF |
411 | |
412 | again: | |
413 | if (nr_counters == MAX_COUNTERS) | |
414 | return -1; | |
415 | ||
803d4f39 PZ |
416 | config = match_event_symbols(str); |
417 | if (config == ~0ULL) | |
418 | return -1; | |
f7524bda | 419 | |
803d4f39 | 420 | event_id[nr_counters] = config; |
f7524bda WF |
421 | nr_counters++; |
422 | ||
423 | str = strstr(str, ","); | |
424 | if (str) { | |
425 | str++; | |
426 | goto again; | |
427 | } | |
428 | ||
429 | return 0; | |
430 | } | |
431 | ||
432 | ||
433 | /* | |
434 | * perfstat | |
435 | */ | |
436 | ||
437 | char fault_here[1000000]; | |
438 | ||
439 | static void create_perfstat_counter(int counter) | |
440 | { | |
441 | struct perf_counter_hw_event hw_event; | |
442 | ||
443 | memset(&hw_event, 0, sizeof(hw_event)); | |
803d4f39 | 444 | hw_event.config = event_id[counter]; |
3df70fd6 | 445 | hw_event.record_type = 0; |
f7524bda | 446 | hw_event.nmi = 0; |
31f004df PM |
447 | if (scale) |
448 | hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | |
449 | PERF_FORMAT_TOTAL_TIME_RUNNING; | |
f7524bda WF |
450 | |
451 | if (system_wide) { | |
452 | int cpu; | |
453 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | |
454 | fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); | |
455 | if (fd[cpu][counter] < 0) { | |
456 | printf("perfstat error: syscall returned with %d (%s)\n", | |
457 | fd[cpu][counter], strerror(errno)); | |
458 | exit(-1); | |
e0143bad | 459 | } |
f7524bda WF |
460 | } |
461 | } else { | |
462 | hw_event.inherit = 1; | |
463 | hw_event.disabled = 1; | |
464 | ||
465 | fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); | |
466 | if (fd[0][counter] < 0) { | |
467 | printf("perfstat error: syscall returned with %d (%s)\n", | |
468 | fd[0][counter], strerror(errno)); | |
469 | exit(-1); | |
e0143bad IM |
470 | } |
471 | } | |
f7524bda | 472 | } |
e0143bad | 473 | |
f7524bda WF |
474 | int do_perfstat(int argc, char *argv[]) |
475 | { | |
476 | unsigned long long t0, t1; | |
477 | int counter; | |
478 | ssize_t res; | |
479 | int status; | |
480 | int pid; | |
481 | ||
482 | if (!system_wide) | |
483 | nr_cpus = 1; | |
484 | ||
485 | for (counter = 0; counter < nr_counters; counter++) | |
486 | create_perfstat_counter(counter); | |
487 | ||
488 | argc -= optind; | |
489 | argv += optind; | |
490 | ||
af9522cf WF |
491 | if (!argc) |
492 | display_help(); | |
493 | ||
f7524bda WF |
494 | /* |
495 | * Enable counters and exec the command: | |
496 | */ | |
497 | t0 = rdclock(); | |
498 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | |
499 | ||
500 | if ((pid = fork()) < 0) | |
501 | perror("failed to fork"); | |
502 | if (!pid) { | |
503 | if (execvp(argv[0], argv)) { | |
504 | perror(argv[0]); | |
505 | exit(-1); | |
506 | } | |
95bb3be1 | 507 | } |
f7524bda WF |
508 | while (wait(&status) >= 0) |
509 | ; | |
510 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | |
511 | t1 = rdclock(); | |
512 | ||
513 | fflush(stdout); | |
514 | ||
515 | fprintf(stderr, "\n"); | |
516 | fprintf(stderr, " Performance counter stats for \'%s\':\n", | |
517 | argv[0]); | |
518 | fprintf(stderr, "\n"); | |
e0143bad IM |
519 | |
520 | for (counter = 0; counter < nr_counters; counter++) { | |
31f004df PM |
521 | int cpu, nv; |
522 | __u64 count[3], single_count[3]; | |
523 | int scaled; | |
f7524bda | 524 | |
31f004df PM |
525 | count[0] = count[1] = count[2] = 0; |
526 | nv = scale ? 3 : 1; | |
f7524bda WF |
527 | for (cpu = 0; cpu < nr_cpus; cpu ++) { |
528 | res = read(fd[cpu][counter], | |
31f004df PM |
529 | single_count, nv * sizeof(__u64)); |
530 | assert(res == nv * sizeof(__u64)); | |
531 | ||
532 | count[0] += single_count[0]; | |
533 | if (scale) { | |
534 | count[1] += single_count[1]; | |
535 | count[2] += single_count[2]; | |
536 | } | |
537 | } | |
538 | ||
539 | scaled = 0; | |
540 | if (scale) { | |
541 | if (count[2] == 0) { | |
542 | fprintf(stderr, " %14s %-20s\n", | |
543 | "<not counted>", event_name(counter)); | |
544 | continue; | |
545 | } | |
546 | if (count[2] < count[1]) { | |
547 | scaled = 1; | |
548 | count[0] = (unsigned long long) | |
549 | ((double)count[0] * count[1] / count[2] + 0.5); | |
550 | } | |
f7524bda | 551 | } |
e0143bad | 552 | |
cbe46555 PM |
553 | if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || |
554 | event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { | |
f7524bda | 555 | |
31f004df | 556 | double msecs = (double)count[0] / 1000000; |
f7524bda | 557 | |
31f004df | 558 | fprintf(stderr, " %14.6f %-20s (msecs)", |
f7524bda WF |
559 | msecs, event_name(counter)); |
560 | } else { | |
31f004df PM |
561 | fprintf(stderr, " %14Ld %-20s (events)", |
562 | count[0], event_name(counter)); | |
f7524bda | 563 | } |
31f004df PM |
564 | if (scaled) |
565 | fprintf(stderr, " (scaled from %.2f%%)", | |
566 | (double) count[2] / count[1] * 100); | |
567 | fprintf(stderr, "\n"); | |
e0143bad | 568 | } |
f7524bda WF |
569 | fprintf(stderr, "\n"); |
570 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | |
571 | (double)(t1-t0)/1e6); | |
572 | fprintf(stderr, "\n"); | |
573 | ||
574 | return 0; | |
e0143bad IM |
575 | } |
576 | ||
f7524bda WF |
577 | /* |
578 | * Symbols | |
579 | */ | |
580 | ||
e0143bad IM |
581 | static uint64_t min_ip; |
582 | static uint64_t max_ip = -1ll; | |
583 | ||
584 | struct sym_entry { | |
585 | unsigned long long addr; | |
586 | char *sym; | |
587 | unsigned long count[MAX_COUNTERS]; | |
588 | int skip; | |
cbe46555 | 589 | struct source_line *source; |
e0143bad IM |
590 | }; |
591 | ||
592 | #define MAX_SYMS 100000 | |
593 | ||
594 | static int sym_table_count; | |
595 | ||
596 | struct sym_entry *sym_filter_entry; | |
597 | ||
598 | static struct sym_entry sym_table[MAX_SYMS]; | |
599 | ||
600 | static void show_details(struct sym_entry *sym); | |
601 | ||
602 | /* | |
ef45fa9e | 603 | * Ordering weight: count-1 * count-2 * ... / count-n |
e0143bad IM |
604 | */ |
605 | static double sym_weight(const struct sym_entry *sym) | |
606 | { | |
607 | double weight; | |
608 | int counter; | |
609 | ||
610 | weight = sym->count[0]; | |
611 | ||
612 | for (counter = 1; counter < nr_counters-1; counter++) | |
613 | weight *= sym->count[counter]; | |
614 | ||
615 | weight /= (sym->count[counter] + 1); | |
616 | ||
617 | return weight; | |
618 | } | |
619 | ||
620 | static int compare(const void *__sym1, const void *__sym2) | |
621 | { | |
622 | const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; | |
623 | ||
624 | return sym_weight(sym1) < sym_weight(sym2); | |
625 | } | |
626 | ||
e0143bad IM |
627 | static long events; |
628 | static long userspace_events; | |
629 | static const char CONSOLE_CLEAR[] = "\e[H\e[2J"; | |
630 | ||
631 | static struct sym_entry tmp[MAX_SYMS]; | |
632 | ||
633 | static void print_sym_table(void) | |
634 | { | |
635 | int i, printed; | |
636 | int counter; | |
637 | float events_per_sec = events/delay_secs; | |
638 | float kevents_per_sec = (events-userspace_events)/delay_secs; | |
6278af66 | 639 | float sum_kevents = 0.0; |
e0143bad | 640 | |
9dd49988 | 641 | events = userspace_events = 0; |
e0143bad IM |
642 | memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count); |
643 | qsort(tmp, sym_table_count, sizeof(tmp[0]), compare); | |
644 | ||
6278af66 MG |
645 | for (i = 0; i < sym_table_count && tmp[i].count[0]; i++) |
646 | sum_kevents += tmp[i].count[0]; | |
647 | ||
e0143bad IM |
648 | write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR)); |
649 | ||
650 | printf( | |
651 | "------------------------------------------------------------------------------\n"); | |
6278af66 | 652 | printf( " KernelTop:%8.0f irqs/sec kernel:%4.1f%% [%s, ", |
e0143bad IM |
653 | events_per_sec, |
654 | 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)), | |
655 | nmi ? "NMI" : "IRQ"); | |
656 | ||
657 | if (nr_counters == 1) | |
658 | printf("%d ", event_count[0]); | |
659 | ||
660 | for (counter = 0; counter < nr_counters; counter++) { | |
661 | if (counter) | |
662 | printf("/"); | |
663 | ||
e3908612 | 664 | printf("%s", event_name(counter)); |
e0143bad IM |
665 | } |
666 | ||
667 | printf( "], "); | |
668 | ||
669 | if (tid != -1) | |
670 | printf(" (tid: %d", tid); | |
671 | else | |
672 | printf(" (all"); | |
673 | ||
674 | if (profile_cpu != -1) | |
675 | printf(", cpu: %d)\n", profile_cpu); | |
676 | else { | |
677 | if (tid != -1) | |
678 | printf(")\n"); | |
679 | else | |
680 | printf(", %d CPUs)\n", nr_cpus); | |
681 | } | |
682 | ||
683 | printf("------------------------------------------------------------------------------\n\n"); | |
684 | ||
685 | if (nr_counters == 1) | |
6278af66 | 686 | printf(" events pcnt"); |
e0143bad | 687 | else |
6278af66 | 688 | printf(" weight events pcnt"); |
e0143bad IM |
689 | |
690 | printf(" RIP kernel function\n" | |
6278af66 | 691 | " ______ ______ _____ ________________ _______________\n\n" |
e0143bad IM |
692 | ); |
693 | ||
6278af66 MG |
694 | for (i = 0, printed = 0; i < sym_table_count; i++) { |
695 | float pcnt; | |
e0143bad IM |
696 | int count; |
697 | ||
6278af66 MG |
698 | if (printed <= 18 && tmp[i].count[0] >= count_filter) { |
699 | pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents)); | |
700 | ||
701 | if (nr_counters == 1) | |
702 | printf("%19.2f - %4.1f%% - %016llx : %s\n", | |
703 | sym_weight(tmp + i), | |
704 | pcnt, tmp[i].addr, tmp[i].sym); | |
705 | else | |
706 | printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n", | |
707 | sym_weight(tmp + i), | |
708 | tmp[i].count[0], | |
709 | pcnt, tmp[i].addr, tmp[i].sym); | |
710 | printed++; | |
e0143bad IM |
711 | } |
712 | /* | |
713 | * Add decay to the counts: | |
714 | */ | |
715 | for (count = 0; count < nr_counters; count++) | |
716 | sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8; | |
717 | } | |
718 | ||
719 | if (sym_filter_entry) | |
720 | show_details(sym_filter_entry); | |
721 | ||
e0143bad IM |
722 | { |
723 | struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; | |
724 | ||
725 | if (poll(&stdin_poll, 1, 0) == 1) { | |
726 | printf("key pressed - exiting.\n"); | |
727 | exit(0); | |
728 | } | |
729 | } | |
730 | } | |
731 | ||
9dd49988 MG |
732 | static void *display_thread(void *arg) |
733 | { | |
734 | printf("KernelTop refresh period: %d seconds\n", delay_secs); | |
735 | ||
736 | while (!sleep(delay_secs)) | |
737 | print_sym_table(); | |
738 | ||
739 | return NULL; | |
740 | } | |
741 | ||
e0143bad IM |
742 | static int read_symbol(FILE *in, struct sym_entry *s) |
743 | { | |
744 | static int filter_match = 0; | |
745 | char *sym, stype; | |
746 | char str[500]; | |
747 | int rc, pos; | |
748 | ||
749 | rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str); | |
750 | if (rc == EOF) | |
751 | return -1; | |
752 | ||
753 | assert(rc == 3); | |
754 | ||
755 | /* skip until end of line: */ | |
756 | pos = strlen(str); | |
757 | do { | |
758 | rc = fgetc(in); | |
759 | if (rc == '\n' || rc == EOF || pos >= 499) | |
760 | break; | |
761 | str[pos] = rc; | |
762 | pos++; | |
763 | } while (1); | |
764 | str[pos] = 0; | |
765 | ||
766 | sym = str; | |
767 | ||
768 | /* Filter out known duplicates and non-text symbols. */ | |
769 | if (!strcmp(sym, "_text")) | |
770 | return 1; | |
771 | if (!min_ip && !strcmp(sym, "_stext")) | |
772 | return 1; | |
773 | if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext")) | |
774 | return 1; | |
775 | if (stype != 'T' && stype != 't') | |
776 | return 1; | |
777 | if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14)) | |
778 | return 1; | |
779 | if (strstr(sym, "_text_start") || strstr(sym, "_text_end")) | |
780 | return 1; | |
781 | ||
782 | s->sym = malloc(strlen(str)); | |
783 | assert(s->sym); | |
784 | ||
785 | strcpy((char *)s->sym, str); | |
786 | s->skip = 0; | |
787 | ||
788 | /* Tag events to be skipped. */ | |
789 | if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym)) | |
790 | s->skip = 1; | |
4c4ba21d PZ |
791 | else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym)) |
792 | s->skip = 1; | |
793 | else if (!strcmp("mwait_idle", s->sym)) | |
e0143bad IM |
794 | s->skip = 1; |
795 | ||
796 | if (filter_match == 1) { | |
797 | filter_end = s->addr; | |
798 | filter_match = -1; | |
799 | if (filter_end - filter_start > 10000) { | |
800 | printf("hm, too large filter symbol <%s> - skipping.\n", | |
801 | sym_filter); | |
802 | printf("symbol filter start: %016lx\n", filter_start); | |
803 | printf(" end: %016lx\n", filter_end); | |
804 | filter_end = filter_start = 0; | |
805 | sym_filter = NULL; | |
806 | sleep(1); | |
807 | } | |
808 | } | |
809 | if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) { | |
810 | filter_match = 1; | |
811 | filter_start = s->addr; | |
812 | } | |
813 | ||
814 | return 0; | |
815 | } | |
816 | ||
817 | int compare_addr(const void *__sym1, const void *__sym2) | |
818 | { | |
819 | const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; | |
820 | ||
821 | return sym1->addr > sym2->addr; | |
822 | } | |
823 | ||
824 | static void sort_symbol_table(void) | |
825 | { | |
826 | int i, dups; | |
827 | ||
828 | do { | |
829 | qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr); | |
830 | for (i = 0, dups = 0; i < sym_table_count; i++) { | |
831 | if (sym_table[i].addr == sym_table[i+1].addr) { | |
832 | sym_table[i+1].addr = -1ll; | |
833 | dups++; | |
834 | } | |
835 | } | |
836 | sym_table_count -= dups; | |
837 | } while(dups); | |
838 | } | |
839 | ||
840 | static void parse_symbols(void) | |
841 | { | |
842 | struct sym_entry *last; | |
843 | ||
844 | FILE *kallsyms = fopen("/proc/kallsyms", "r"); | |
845 | ||
846 | if (!kallsyms) { | |
847 | printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n"); | |
848 | exit(-1); | |
849 | } | |
850 | ||
851 | while (!feof(kallsyms)) { | |
852 | if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) { | |
853 | sym_table_count++; | |
854 | assert(sym_table_count <= MAX_SYMS); | |
855 | } | |
856 | } | |
857 | ||
858 | sort_symbol_table(); | |
859 | min_ip = sym_table[0].addr; | |
860 | max_ip = sym_table[sym_table_count-1].addr; | |
861 | last = sym_table + sym_table_count++; | |
862 | ||
863 | last->addr = -1ll; | |
864 | last->sym = "<end>"; | |
865 | ||
866 | if (filter_end) { | |
867 | int count; | |
868 | for (count=0; count < sym_table_count; count ++) { | |
869 | if (!strcmp(sym_table[count].sym, sym_filter)) { | |
870 | sym_filter_entry = &sym_table[count]; | |
871 | break; | |
872 | } | |
873 | } | |
874 | } | |
875 | if (dump_symtab) { | |
876 | int i; | |
877 | ||
878 | for (i = 0; i < sym_table_count; i++) | |
879 | fprintf(stderr, "%llx %s\n", | |
880 | sym_table[i].addr, sym_table[i].sym); | |
881 | } | |
882 | } | |
883 | ||
f7524bda WF |
884 | /* |
885 | * Source lines | |
886 | */ | |
e0143bad IM |
887 | |
888 | static void parse_vmlinux(char *filename) | |
889 | { | |
890 | FILE *file; | |
891 | char command[PATH_MAX*2]; | |
892 | if (!filename) | |
893 | return; | |
894 | ||
895 | sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename); | |
896 | ||
897 | file = popen(command, "r"); | |
898 | if (!file) | |
899 | return; | |
900 | ||
cbe46555 | 901 | lines_tail = &lines; |
e0143bad IM |
902 | while (!feof(file)) { |
903 | struct source_line *src; | |
904 | size_t dummy = 0; | |
905 | char *c; | |
906 | ||
907 | src = malloc(sizeof(struct source_line)); | |
f7524bda | 908 | assert(src != NULL); |
e0143bad IM |
909 | memset(src, 0, sizeof(struct source_line)); |
910 | ||
911 | if (getline(&src->line, &dummy, file) < 0) | |
912 | break; | |
913 | if (!src->line) | |
914 | break; | |
915 | ||
916 | c = strchr(src->line, '\n'); | |
917 | if (c) | |
918 | *c = 0; | |
919 | ||
cbe46555 PM |
920 | src->next = NULL; |
921 | *lines_tail = src; | |
922 | lines_tail = &src->next; | |
e0143bad IM |
923 | |
924 | if (strlen(src->line)>8 && src->line[8] == ':') | |
925 | src->EIP = strtoull(src->line, NULL, 16); | |
926 | if (strlen(src->line)>8 && src->line[16] == ':') | |
927 | src->EIP = strtoull(src->line, NULL, 16); | |
928 | } | |
929 | pclose(file); | |
e0143bad IM |
930 | } |
931 | ||
932 | static void record_precise_ip(uint64_t ip) | |
933 | { | |
934 | struct source_line *line; | |
e0143bad | 935 | |
cbe46555 | 936 | for (line = lines; line; line = line->next) { |
e0143bad IM |
937 | if (line->EIP == ip) |
938 | line->count++; | |
939 | if (line->EIP > ip) | |
940 | break; | |
e0143bad IM |
941 | } |
942 | } | |
943 | ||
944 | static void lookup_sym_in_vmlinux(struct sym_entry *sym) | |
945 | { | |
946 | struct source_line *line; | |
e0143bad IM |
947 | char pattern[PATH_MAX]; |
948 | sprintf(pattern, "<%s>:", sym->sym); | |
949 | ||
cbe46555 | 950 | for (line = lines; line; line = line->next) { |
e0143bad | 951 | if (strstr(line->line, pattern)) { |
cbe46555 | 952 | sym->source = line; |
e0143bad IM |
953 | break; |
954 | } | |
e0143bad IM |
955 | } |
956 | } | |
957 | ||
cbe46555 | 958 | static void show_lines(struct source_line *line_queue, int line_queue_count) |
e0143bad IM |
959 | { |
960 | int i; | |
961 | struct source_line *line; | |
962 | ||
cbe46555 PM |
963 | line = line_queue; |
964 | for (i = 0; i < line_queue_count; i++) { | |
e0143bad | 965 | printf("%8li\t%s\n", line->count, line->line); |
cbe46555 | 966 | line = line->next; |
e0143bad IM |
967 | } |
968 | } | |
969 | ||
970 | #define TRACE_COUNT 3 | |
971 | ||
972 | static void show_details(struct sym_entry *sym) | |
973 | { | |
974 | struct source_line *line; | |
cbe46555 | 975 | struct source_line *line_queue = NULL; |
e0143bad | 976 | int displayed = 0; |
cbe46555 | 977 | int line_queue_count = 0; |
e0143bad IM |
978 | |
979 | if (!sym->source) | |
980 | lookup_sym_in_vmlinux(sym); | |
981 | if (!sym->source) | |
982 | return; | |
983 | ||
984 | printf("Showing details for %s\n", sym->sym); | |
985 | ||
cbe46555 PM |
986 | line = sym->source; |
987 | while (line) { | |
e0143bad IM |
988 | if (displayed && strstr(line->line, ">:")) |
989 | break; | |
990 | ||
cbe46555 PM |
991 | if (!line_queue_count) |
992 | line_queue = line; | |
993 | line_queue_count ++; | |
e0143bad IM |
994 | |
995 | if (line->count >= count_filter) { | |
cbe46555 PM |
996 | show_lines(line_queue, line_queue_count); |
997 | line_queue_count = 0; | |
998 | line_queue = NULL; | |
999 | } else if (line_queue_count > TRACE_COUNT) { | |
1000 | line_queue = line_queue->next; | |
1001 | line_queue_count --; | |
e0143bad IM |
1002 | } |
1003 | ||
1004 | line->count = 0; | |
1005 | displayed++; | |
1006 | if (displayed > 300) | |
1007 | break; | |
cbe46555 | 1008 | line = line->next; |
e0143bad IM |
1009 | } |
1010 | } | |
1011 | ||
1012 | /* | |
1013 | * Binary search in the histogram table and record the hit: | |
1014 | */ | |
1015 | static void record_ip(uint64_t ip, int counter) | |
1016 | { | |
1017 | int left_idx, middle_idx, right_idx, idx; | |
1018 | unsigned long left, middle, right; | |
1019 | ||
1020 | record_precise_ip(ip); | |
1021 | ||
1022 | left_idx = 0; | |
1023 | right_idx = sym_table_count-1; | |
1024 | assert(ip <= max_ip && ip >= min_ip); | |
1025 | ||
1026 | while (left_idx + 1 < right_idx) { | |
1027 | middle_idx = (left_idx + right_idx) / 2; | |
1028 | ||
1029 | left = sym_table[ left_idx].addr; | |
1030 | middle = sym_table[middle_idx].addr; | |
1031 | right = sym_table[ right_idx].addr; | |
1032 | ||
1033 | if (!(left <= middle && middle <= right)) { | |
1034 | printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right); | |
1035 | printf("%d %d %d\n", left_idx, middle_idx, right_idx); | |
1036 | } | |
1037 | assert(left <= middle && middle <= right); | |
1038 | if (!(left <= ip && ip <= right)) { | |
1039 | printf(" left: %016lx\n", left); | |
193e8df1 | 1040 | printf(" ip: %016lx\n", (unsigned long)ip); |
e0143bad IM |
1041 | printf("right: %016lx\n", right); |
1042 | } | |
1043 | assert(left <= ip && ip <= right); | |
1044 | /* | |
1045 | * [ left .... target .... middle .... right ] | |
1046 | * => right := middle | |
1047 | */ | |
1048 | if (ip < middle) { | |
1049 | right_idx = middle_idx; | |
1050 | continue; | |
1051 | } | |
1052 | /* | |
1053 | * [ left .... middle ... target ... right ] | |
1054 | * => left := middle | |
1055 | */ | |
1056 | left_idx = middle_idx; | |
1057 | } | |
1058 | ||
1059 | idx = left_idx; | |
1060 | ||
1061 | if (!sym_table[idx].skip) | |
1062 | sym_table[idx].count[counter]++; | |
1063 | else events--; | |
1064 | } | |
1065 | ||
1066 | static void process_event(uint64_t ip, int counter) | |
1067 | { | |
1068 | events++; | |
1069 | ||
1070 | if (ip < min_ip || ip > max_ip) { | |
1071 | userspace_events++; | |
1072 | return; | |
1073 | } | |
1074 | ||
1075 | record_ip(ip, counter); | |
1076 | } | |
1077 | ||
f7524bda WF |
1078 | static void process_options(int argc, char *argv[]) |
1079 | { | |
1080 | int error = 0, counter; | |
1081 | ||
1082 | if (strstr(argv[0], "perfstat")) | |
1083 | run_perfstat = 1; | |
1084 | ||
1085 | for (;;) { | |
1086 | int option_index = 0; | |
1087 | /** Options for getopt */ | |
1088 | static struct option long_options[] = { | |
1089 | {"count", required_argument, NULL, 'c'}, | |
1090 | {"cpu", required_argument, NULL, 'C'}, | |
1091 | {"delay", required_argument, NULL, 'd'}, | |
1092 | {"dump_symtab", no_argument, NULL, 'D'}, | |
1093 | {"event", required_argument, NULL, 'e'}, | |
1094 | {"filter", required_argument, NULL, 'f'}, | |
1095 | {"group", required_argument, NULL, 'g'}, | |
1096 | {"help", no_argument, NULL, 'h'}, | |
1097 | {"nmi", required_argument, NULL, 'n'}, | |
9dd49988 MG |
1098 | {"mmap_info", no_argument, NULL, 'M'}, |
1099 | {"mmap_pages", required_argument, NULL, 'm'}, | |
1100 | {"munmap_info", no_argument, NULL, 'U'}, | |
f7524bda | 1101 | {"pid", required_argument, NULL, 'p'}, |
9dd49988 MG |
1102 | {"realtime", required_argument, NULL, 'r'}, |
1103 | {"scale", no_argument, NULL, 'l'}, | |
f7524bda WF |
1104 | {"symbol", required_argument, NULL, 's'}, |
1105 | {"stat", no_argument, NULL, 'S'}, | |
9dd49988 | 1106 | {"vmlinux", required_argument, NULL, 'x'}, |
f7524bda WF |
1107 | {"zero", no_argument, NULL, 'z'}, |
1108 | {NULL, 0, NULL, 0 } | |
1109 | }; | |
9dd49988 | 1110 | int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU", |
f7524bda WF |
1111 | long_options, &option_index); |
1112 | if (c == -1) | |
1113 | break; | |
1114 | ||
1115 | switch (c) { | |
1116 | case 'a': system_wide = 1; break; | |
803d4f39 | 1117 | case 'c': default_interval = atoi(optarg); break; |
f7524bda WF |
1118 | case 'C': |
1119 | /* CPU and PID are mutually exclusive */ | |
1120 | if (tid != -1) { | |
1121 | printf("WARNING: CPU switch overriding PID\n"); | |
1122 | sleep(1); | |
1123 | tid = -1; | |
1124 | } | |
1125 | profile_cpu = atoi(optarg); break; | |
1126 | case 'd': delay_secs = atoi(optarg); break; | |
1127 | case 'D': dump_symtab = 1; break; | |
1128 | ||
1129 | case 'e': error = parse_events(optarg); break; | |
1130 | ||
1131 | case 'f': count_filter = atoi(optarg); break; | |
1132 | case 'g': group = atoi(optarg); break; | |
1133 | case 'h': display_help(); break; | |
31f004df | 1134 | case 'l': scale = 1; break; |
f7524bda WF |
1135 | case 'n': nmi = atoi(optarg); break; |
1136 | case 'p': | |
1137 | /* CPU and PID are mutually exclusive */ | |
1138 | if (profile_cpu != -1) { | |
1139 | printf("WARNING: PID switch overriding CPU\n"); | |
1140 | sleep(1); | |
1141 | profile_cpu = -1; | |
1142 | } | |
1143 | tid = atoi(optarg); break; | |
9dd49988 | 1144 | case 'r': realtime_prio = atoi(optarg); break; |
f7524bda WF |
1145 | case 's': sym_filter = strdup(optarg); break; |
1146 | case 'S': run_perfstat = 1; break; | |
1147 | case 'x': vmlinux = strdup(optarg); break; | |
1148 | case 'z': zero = 1; break; | |
4c4ba21d | 1149 | case 'm': mmap_pages = atoi(optarg); break; |
3c1ba6fa PZ |
1150 | case 'M': use_mmap = 1; break; |
1151 | case 'U': use_munmap = 1; break; | |
f7524bda WF |
1152 | default: error = 1; break; |
1153 | } | |
1154 | } | |
1155 | if (error) | |
1156 | display_help(); | |
1157 | ||
1158 | if (!nr_counters) { | |
1159 | if (run_perfstat) | |
1160 | nr_counters = 8; | |
1161 | else { | |
1162 | nr_counters = 1; | |
1163 | event_id[0] = 0; | |
1164 | } | |
1165 | } | |
1166 | ||
1167 | for (counter = 0; counter < nr_counters; counter++) { | |
1168 | if (event_count[counter]) | |
1169 | continue; | |
1170 | ||
803d4f39 | 1171 | event_count[counter] = default_interval; |
f7524bda WF |
1172 | } |
1173 | } | |
1174 | ||
bcbcb37c PZ |
1175 | struct mmap_data { |
1176 | int counter; | |
1177 | void *base; | |
1178 | unsigned int mask; | |
1179 | unsigned int prev; | |
1180 | }; | |
1181 | ||
1182 | static unsigned int mmap_read_head(struct mmap_data *md) | |
1183 | { | |
1184 | struct perf_counter_mmap_page *pc = md->base; | |
19556439 | 1185 | int head; |
bcbcb37c PZ |
1186 | |
1187 | head = pc->data_head; | |
bcbcb37c | 1188 | rmb(); |
bcbcb37c PZ |
1189 | |
1190 | return head; | |
1191 | } | |
1192 | ||
4c4ba21d PZ |
1193 | struct timeval last_read, this_read; |
1194 | ||
bcbcb37c PZ |
1195 | static void mmap_read(struct mmap_data *md) |
1196 | { | |
1197 | unsigned int head = mmap_read_head(md); | |
1198 | unsigned int old = md->prev; | |
1199 | unsigned char *data = md->base + page_size; | |
00f0ad73 | 1200 | int diff; |
bcbcb37c | 1201 | |
4c4ba21d PZ |
1202 | gettimeofday(&this_read, NULL); |
1203 | ||
00f0ad73 PZ |
1204 | /* |
1205 | * If we're further behind than half the buffer, there's a chance | |
1206 | * the writer will bite our tail and screw up the events under us. | |
1207 | * | |
1208 | * If we somehow ended up ahead of the head, we got messed up. | |
1209 | * | |
1210 | * In either case, truncate and restart at head. | |
1211 | */ | |
1212 | diff = head - old; | |
1213 | if (diff > md->mask / 2 || diff < 0) { | |
4c4ba21d PZ |
1214 | struct timeval iv; |
1215 | unsigned long msecs; | |
1216 | ||
1217 | timersub(&this_read, &last_read, &iv); | |
1218 | msecs = iv.tv_sec*1000 + iv.tv_usec/1000; | |
1219 | ||
00f0ad73 PZ |
1220 | fprintf(stderr, "WARNING: failed to keep up with mmap data." |
1221 | " Last read %lu msecs ago.\n", msecs); | |
4c4ba21d | 1222 | |
00f0ad73 PZ |
1223 | /* |
1224 | * head points to a known good entry, start there. | |
1225 | */ | |
4c4ba21d | 1226 | old = head; |
bcbcb37c PZ |
1227 | } |
1228 | ||
4c4ba21d PZ |
1229 | last_read = this_read; |
1230 | ||
bcbcb37c | 1231 | for (; old != head;) { |
3c1ba6fa | 1232 | struct ip_event { |
00f0ad73 PZ |
1233 | struct perf_event_header header; |
1234 | __u64 ip; | |
1235 | __u32 pid, tid; | |
3c1ba6fa PZ |
1236 | }; |
1237 | struct mmap_event { | |
1238 | struct perf_event_header header; | |
1239 | __u32 pid, tid; | |
1240 | __u64 start; | |
1241 | __u64 len; | |
1242 | __u64 pgoff; | |
1243 | char filename[PATH_MAX]; | |
1244 | }; | |
1245 | ||
1246 | typedef union event_union { | |
1247 | struct perf_event_header header; | |
1248 | struct ip_event ip; | |
1249 | struct mmap_event mmap; | |
1250 | } event_t; | |
1251 | ||
1252 | event_t *event = (event_t *)&data[old & md->mask]; | |
1253 | ||
1254 | event_t event_copy; | |
00f0ad73 PZ |
1255 | |
1256 | unsigned int size = event->header.size; | |
1257 | ||
1258 | /* | |
1259 | * Event straddles the mmap boundary -- header should always | |
1260 | * be inside due to u64 alignment of output. | |
1261 | */ | |
1262 | if ((old & md->mask) + size != ((old + size) & md->mask)) { | |
1263 | unsigned int offset = old; | |
3c1ba6fa | 1264 | unsigned int len = min(sizeof(*event), size), cpy; |
00f0ad73 PZ |
1265 | void *dst = &event_copy; |
1266 | ||
1267 | do { | |
1268 | cpy = min(md->mask + 1 - (offset & md->mask), len); | |
1269 | memcpy(dst, &data[offset & md->mask], cpy); | |
1270 | offset += cpy; | |
1271 | dst += cpy; | |
1272 | len -= cpy; | |
1273 | } while (len); | |
1274 | ||
1275 | event = &event_copy; | |
1276 | } | |
bcbcb37c | 1277 | |
00f0ad73 PZ |
1278 | old += size; |
1279 | ||
808382b3 PZ |
1280 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { |
1281 | if (event->header.type & PERF_RECORD_IP) | |
1282 | process_event(event->ip.ip, md->counter); | |
1283 | } else { | |
1284 | switch (event->header.type) { | |
1285 | case PERF_EVENT_MMAP: | |
1286 | case PERF_EVENT_MUNMAP: | |
1287 | printf("%s: %Lu %Lu %Lu %s\n", | |
1288 | event->header.type == PERF_EVENT_MMAP | |
1289 | ? "mmap" : "munmap", | |
1290 | event->mmap.start, | |
1291 | event->mmap.len, | |
1292 | event->mmap.pgoff, | |
1293 | event->mmap.filename); | |
1294 | break; | |
1295 | } | |
00f0ad73 | 1296 | } |
bcbcb37c PZ |
1297 | } |
1298 | ||
1299 | md->prev = old; | |
1300 | } | |
1301 | ||
e0143bad IM |
1302 | int main(int argc, char *argv[]) |
1303 | { | |
0fd112e4 | 1304 | struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; |
bcbcb37c | 1305 | struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; |
e0143bad | 1306 | struct perf_counter_hw_event hw_event; |
9dd49988 | 1307 | pthread_t thread; |
0fd112e4 | 1308 | int i, counter, group_fd, nr_poll = 0; |
e0143bad | 1309 | unsigned int cpu; |
e0143bad IM |
1310 | int ret; |
1311 | ||
bcbcb37c PZ |
1312 | page_size = sysconf(_SC_PAGE_SIZE); |
1313 | ||
e0143bad IM |
1314 | process_options(argc, argv); |
1315 | ||
1316 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | |
f7524bda WF |
1317 | assert(nr_cpus <= MAX_NR_CPUS); |
1318 | assert(nr_cpus >= 0); | |
1319 | ||
1320 | if (run_perfstat) | |
1321 | return do_perfstat(argc, argv); | |
1322 | ||
e0143bad IM |
1323 | if (tid != -1 || profile_cpu != -1) |
1324 | nr_cpus = 1; | |
1325 | ||
cbe46555 PM |
1326 | parse_symbols(); |
1327 | if (vmlinux && sym_filter_entry) | |
1328 | parse_vmlinux(vmlinux); | |
1329 | ||
e0143bad IM |
1330 | for (i = 0; i < nr_cpus; i++) { |
1331 | group_fd = -1; | |
1332 | for (counter = 0; counter < nr_counters; counter++) { | |
1333 | ||
1334 | cpu = profile_cpu; | |
1335 | if (tid == -1 && profile_cpu == -1) | |
1336 | cpu = i; | |
1337 | ||
1338 | memset(&hw_event, 0, sizeof(hw_event)); | |
803d4f39 | 1339 | hw_event.config = event_id[counter]; |
e0143bad | 1340 | hw_event.irq_period = event_count[counter]; |
3df70fd6 | 1341 | hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; |
e0143bad | 1342 | hw_event.nmi = nmi; |
3c1ba6fa PZ |
1343 | hw_event.mmap = use_mmap; |
1344 | hw_event.munmap = use_munmap; | |
e0143bad IM |
1345 | |
1346 | fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); | |
e0143bad | 1347 | if (fd[i][counter] < 0) { |
cbe46555 | 1348 | int err = errno; |
e0143bad | 1349 | printf("kerneltop error: syscall returned with %d (%s)\n", |
cbe46555 PM |
1350 | fd[i][counter], strerror(err)); |
1351 | if (err == EPERM) | |
e0143bad IM |
1352 | printf("Are you root?\n"); |
1353 | exit(-1); | |
1354 | } | |
1355 | assert(fd[i][counter] >= 0); | |
cbe46555 | 1356 | fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); |
e0143bad IM |
1357 | |
1358 | /* | |
1359 | * First counter acts as the group leader: | |
1360 | */ | |
1361 | if (group && group_fd == -1) | |
1362 | group_fd = fd[i][counter]; | |
1363 | ||
0fd112e4 PZ |
1364 | event_array[nr_poll].fd = fd[i][counter]; |
1365 | event_array[nr_poll].events = POLLIN; | |
1366 | nr_poll++; | |
bcbcb37c PZ |
1367 | |
1368 | mmap_array[i][counter].counter = counter; | |
1369 | mmap_array[i][counter].prev = 0; | |
4c4ba21d PZ |
1370 | mmap_array[i][counter].mask = mmap_pages*page_size - 1; |
1371 | mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | |
bcbcb37c PZ |
1372 | PROT_READ, MAP_SHARED, fd[i][counter], 0); |
1373 | if (mmap_array[i][counter].base == MAP_FAILED) { | |
1374 | printf("kerneltop error: failed to mmap with %d (%s)\n", | |
1375 | errno, strerror(errno)); | |
1376 | exit(-1); | |
1377 | } | |
e0143bad IM |
1378 | } |
1379 | } | |
1380 | ||
9dd49988 MG |
1381 | if (pthread_create(&thread, NULL, display_thread, NULL)) { |
1382 | printf("Could not create display thread.\n"); | |
1383 | exit(-1); | |
1384 | } | |
1385 | ||
1386 | if (realtime_prio) { | |
1387 | struct sched_param param; | |
1388 | ||
1389 | param.sched_priority = realtime_prio; | |
1390 | if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { | |
1391 | printf("Could not set realtime priority.\n"); | |
1392 | exit(-1); | |
1393 | } | |
1394 | } | |
e0143bad IM |
1395 | |
1396 | while (1) { | |
1397 | int hits = events; | |
1398 | ||
1399 | for (i = 0; i < nr_cpus; i++) { | |
bcbcb37c PZ |
1400 | for (counter = 0; counter < nr_counters; counter++) |
1401 | mmap_read(&mmap_array[i][counter]); | |
e0143bad IM |
1402 | } |
1403 | ||
e0143bad | 1404 | if (hits == events) |
9dd49988 | 1405 | ret = poll(event_array, nr_poll, 100); |
e0143bad IM |
1406 | } |
1407 | ||
1408 | return 0; | |
1409 | } |