ce661e2fa8ddd5da8fab84b8bb34e4c7cc5e16e5
[deliverable/linux.git] / Documentation / perf_counter / builtin-stat.c
1 /*
2 * perf stat: /usr/bin/time -alike performance counter statistics utility
3
4 It summarizes the counter events of all tasks (and child tasks),
5 covering all CPUs that the command (or workload) executes on.
6 It only counts the per-task events of the workload started,
7 independent of how many other tasks run on those CPUs.
8
9 Sample output:
10
11 $ perf stat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
12
13 Performance counter stats for 'ls':
14
15 163516953 instructions
16 2295 cache-misses
17 2855182 branch-misses
18 *
19 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
20 *
21 * Improvements and fixes by:
22 *
23 * Arjan van de Ven <arjan@linux.intel.com>
24 * Yanmin Zhang <yanmin.zhang@intel.com>
25 * Wu Fengguang <fengguang.wu@intel.com>
26 * Mike Galbraith <efault@gmx.de>
27 * Paul Mackerras <paulus@samba.org>
28 *
29 * Released under the GPL v2. (and only v2, not any later version)
30 */
31
32 #include "perf.h"
33 #include "builtin.h"
34 #include "util/util.h"
35 #include "util/parse-options.h"
36 #include "util/parse-events.h"
37
38 #include <sys/prctl.h>
39
40 static int system_wide = 0;
41 static int inherit = 1;
42
43 static __u64 default_event_id[MAX_COUNTERS] = {
44 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
45 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
46 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
47 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
48
49 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
50 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
51 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
52 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
53 };
54
55 static int default_interval = 100000;
56 static int event_count[MAX_COUNTERS];
57 static int fd[MAX_NR_CPUS][MAX_COUNTERS];
58
59 static int target_pid = -1;
60 static int nr_cpus = 0;
61 static unsigned int page_size;
62
63 static int scale = 1;
64
65 static const unsigned int default_count[] = {
66 1000000,
67 1000000,
68 10000,
69 10000,
70 1000000,
71 10000,
72 };
73
74 static void create_perfstat_counter(int counter)
75 {
76 struct perf_counter_hw_event hw_event;
77
78 memset(&hw_event, 0, sizeof(hw_event));
79 hw_event.config = event_id[counter];
80 hw_event.record_type = 0;
81 hw_event.nmi = 1;
82 hw_event.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
83 hw_event.exclude_user = event_mask[counter] & EVENT_MASK_USER;
84
85 if (scale)
86 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
87 PERF_FORMAT_TOTAL_TIME_RUNNING;
88
89 if (system_wide) {
90 int cpu;
91 for (cpu = 0; cpu < nr_cpus; cpu ++) {
92 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
93 if (fd[cpu][counter] < 0) {
94 printf("perfstat error: syscall returned with %d (%s)\n",
95 fd[cpu][counter], strerror(errno));
96 exit(-1);
97 }
98 }
99 } else {
100 hw_event.inherit = inherit;
101 hw_event.disabled = 1;
102
103 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
104 if (fd[0][counter] < 0) {
105 printf("perfstat error: syscall returned with %d (%s)\n",
106 fd[0][counter], strerror(errno));
107 exit(-1);
108 }
109 }
110 }
111
112 static int do_perfstat(int argc, const char **argv)
113 {
114 unsigned long long t0, t1;
115 int counter;
116 ssize_t res;
117 int status;
118 int pid;
119
120 if (!system_wide)
121 nr_cpus = 1;
122
123 for (counter = 0; counter < nr_counters; counter++)
124 create_perfstat_counter(counter);
125
126 /*
127 * Enable counters and exec the command:
128 */
129 t0 = rdclock();
130 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
131
132 if ((pid = fork()) < 0)
133 perror("failed to fork");
134 if (!pid) {
135 if (execvp(argv[0], (char **)argv)) {
136 perror(argv[0]);
137 exit(-1);
138 }
139 }
140 while (wait(&status) >= 0)
141 ;
142 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
143 t1 = rdclock();
144
145 fflush(stdout);
146
147 fprintf(stderr, "\n");
148 fprintf(stderr, " Performance counter stats for \'%s\':\n",
149 argv[0]);
150 fprintf(stderr, "\n");
151
152 for (counter = 0; counter < nr_counters; counter++) {
153 int cpu, nv;
154 __u64 count[3], single_count[3];
155 int scaled;
156
157 count[0] = count[1] = count[2] = 0;
158 nv = scale ? 3 : 1;
159 for (cpu = 0; cpu < nr_cpus; cpu ++) {
160 res = read(fd[cpu][counter],
161 single_count, nv * sizeof(__u64));
162 assert(res == nv * sizeof(__u64));
163
164 count[0] += single_count[0];
165 if (scale) {
166 count[1] += single_count[1];
167 count[2] += single_count[2];
168 }
169 }
170
171 scaled = 0;
172 if (scale) {
173 if (count[2] == 0) {
174 fprintf(stderr, " %14s %-20s\n",
175 "<not counted>", event_name(counter));
176 continue;
177 }
178 if (count[2] < count[1]) {
179 scaled = 1;
180 count[0] = (unsigned long long)
181 ((double)count[0] * count[1] / count[2] + 0.5);
182 }
183 }
184
185 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
186 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
187
188 double msecs = (double)count[0] / 1000000;
189
190 fprintf(stderr, " %14.6f %-20s (msecs)",
191 msecs, event_name(counter));
192 } else {
193 fprintf(stderr, " %14Ld %-20s (events)",
194 count[0], event_name(counter));
195 }
196 if (scaled)
197 fprintf(stderr, " (scaled from %.2f%%)",
198 (double) count[2] / count[1] * 100);
199 fprintf(stderr, "\n");
200 }
201 fprintf(stderr, "\n");
202 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
203 (double)(t1-t0)/1e6);
204 fprintf(stderr, "\n");
205
206 return 0;
207 }
208
209 static void skip_signal(int signo)
210 {
211 }
212
213 static const char * const stat_usage[] = {
214 "perf stat [<options>] <command>",
215 NULL
216 };
217
218 static char events_help_msg[EVENTS_HELP_MAX];
219
220 static const struct option options[] = {
221 OPT_CALLBACK('e', "event", NULL, "event",
222 events_help_msg, parse_events),
223 OPT_INTEGER('c', "count", &default_interval,
224 "event period to sample"),
225 OPT_BOOLEAN('i', "inherit", &inherit,
226 "child tasks inherit counters"),
227 OPT_INTEGER('p', "pid", &target_pid,
228 "stat events on existing pid"),
229 OPT_BOOLEAN('a', "all-cpus", &system_wide,
230 "system-wide collection from all CPUs"),
231 OPT_BOOLEAN('l', "scale", &scale,
232 "scale/normalize counters"),
233 OPT_END()
234 };
235
236 int cmd_stat(int argc, const char **argv, const char *prefix)
237 {
238 int counter;
239
240 page_size = sysconf(_SC_PAGE_SIZE);
241
242 create_events_help(events_help_msg);
243 memcpy(event_id, default_event_id, sizeof(default_event_id));
244
245 argc = parse_options(argc, argv, options, stat_usage, 0);
246 if (!argc)
247 usage_with_options(stat_usage, options);
248
249 if (!nr_counters) {
250 nr_counters = 8;
251 }
252
253 for (counter = 0; counter < nr_counters; counter++) {
254 if (event_count[counter])
255 continue;
256
257 event_count[counter] = default_interval;
258 }
259 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
260 assert(nr_cpus <= MAX_NR_CPUS);
261 assert(nr_cpus >= 0);
262
263 /*
264 * We dont want to block the signals - that would cause
265 * child tasks to inherit that and Ctrl-C would not work.
266 * What we want is for Ctrl-C to work in the exec()-ed
267 * task, but being ignored by perf stat itself:
268 */
269 signal(SIGINT, skip_signal);
270 signal(SIGALRM, skip_signal);
271 signal(SIGABRT, skip_signal);
272
273 return do_perfstat(argc, argv);
274 }
This page took 0.037311 seconds and 4 git commands to generate.