perf stat: Abstract stat metrics printing
[deliverable/linux.git] / tools / perf / util / stat-shadow.c
1 #include <stdio.h>
2 #include "evsel.h"
3 #include "stat.h"
4 #include "color.h"
5
6 enum {
7 CTX_BIT_USER = 1 << 0,
8 CTX_BIT_KERNEL = 1 << 1,
9 CTX_BIT_HV = 1 << 2,
10 CTX_BIT_HOST = 1 << 3,
11 CTX_BIT_IDLE = 1 << 4,
12 CTX_BIT_MAX = 1 << 5,
13 };
14
15 #define NUM_CTX CTX_BIT_MAX
16
17 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
18 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
19 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
20 static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
21 static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
22 static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
23 static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
24 static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
25 static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
26 static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
27 static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
28 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
29 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
30 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
31
32 struct stats walltime_nsecs_stats;
33
34 static int evsel_context(struct perf_evsel *evsel)
35 {
36 int ctx = 0;
37
38 if (evsel->attr.exclude_kernel)
39 ctx |= CTX_BIT_KERNEL;
40 if (evsel->attr.exclude_user)
41 ctx |= CTX_BIT_USER;
42 if (evsel->attr.exclude_hv)
43 ctx |= CTX_BIT_HV;
44 if (evsel->attr.exclude_host)
45 ctx |= CTX_BIT_HOST;
46 if (evsel->attr.exclude_idle)
47 ctx |= CTX_BIT_IDLE;
48
49 return ctx;
50 }
51
52 void perf_stat__reset_shadow_stats(void)
53 {
54 memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
55 memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
56 memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
57 memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
58 memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
59 memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
60 memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
61 memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
62 memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
63 memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
64 memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
65 memset(runtime_cycles_in_tx_stats, 0,
66 sizeof(runtime_cycles_in_tx_stats));
67 memset(runtime_transaction_stats, 0,
68 sizeof(runtime_transaction_stats));
69 memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
70 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
71 }
72
73 /*
74 * Update various tracking values we maintain to print
75 * more semantic information such as miss/hit ratios,
76 * instruction rates, etc:
77 */
78 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
79 int cpu)
80 {
81 int ctx = evsel_context(counter);
82
83 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
84 update_stats(&runtime_nsecs_stats[cpu], count[0]);
85 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
86 update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
87 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
88 update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
89 else if (perf_stat_evsel__is(counter, TRANSACTION_START))
90 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
91 else if (perf_stat_evsel__is(counter, ELISION_START))
92 update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
93 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
94 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
95 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
96 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
97 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
98 update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
99 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
100 update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
101 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
102 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
103 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
104 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
105 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
106 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
107 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
108 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
109 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
110 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
111 }
112
113 /* used for get_ratio_color() */
114 enum grc_type {
115 GRC_STALLED_CYCLES_FE,
116 GRC_STALLED_CYCLES_BE,
117 GRC_CACHE_MISSES,
118 GRC_MAX_NR
119 };
120
121 static const char *get_ratio_color(enum grc_type type, double ratio)
122 {
123 static const double grc_table[GRC_MAX_NR][3] = {
124 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
125 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
126 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
127 };
128 const char *color = PERF_COLOR_NORMAL;
129
130 if (ratio > grc_table[type][0])
131 color = PERF_COLOR_RED;
132 else if (ratio > grc_table[type][1])
133 color = PERF_COLOR_MAGENTA;
134 else if (ratio > grc_table[type][2])
135 color = PERF_COLOR_YELLOW;
136
137 return color;
138 }
139
140 static void print_stalled_cycles_frontend(int cpu,
141 struct perf_evsel *evsel
142 __maybe_unused, double avg,
143 struct perf_stat_output_ctx *out)
144 {
145 double total, ratio = 0.0;
146 const char *color;
147 int ctx = evsel_context(evsel);
148
149 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
150
151 if (total)
152 ratio = avg / total * 100.0;
153
154 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
155
156 if (ratio)
157 out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
158 ratio);
159 else
160 out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
161 }
162
163 static void print_stalled_cycles_backend(int cpu,
164 struct perf_evsel *evsel
165 __maybe_unused, double avg,
166 struct perf_stat_output_ctx *out)
167 {
168 double total, ratio = 0.0;
169 const char *color;
170 int ctx = evsel_context(evsel);
171
172 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
173
174 if (total)
175 ratio = avg / total * 100.0;
176
177 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
178
179 out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio);
180 }
181
182 static void print_branch_misses(int cpu,
183 struct perf_evsel *evsel __maybe_unused,
184 double avg,
185 struct perf_stat_output_ctx *out)
186 {
187 double total, ratio = 0.0;
188 const char *color;
189 int ctx = evsel_context(evsel);
190
191 total = avg_stats(&runtime_branches_stats[ctx][cpu]);
192
193 if (total)
194 ratio = avg / total * 100.0;
195
196 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
197
198 out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
199 }
200
201 static void print_l1_dcache_misses(int cpu,
202 struct perf_evsel *evsel __maybe_unused,
203 double avg,
204 struct perf_stat_output_ctx *out)
205 {
206 double total, ratio = 0.0;
207 const char *color;
208 int ctx = evsel_context(evsel);
209
210 total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
211
212 if (total)
213 ratio = avg / total * 100.0;
214
215 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
216
217 out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
218 }
219
220 static void print_l1_icache_misses(int cpu,
221 struct perf_evsel *evsel __maybe_unused,
222 double avg,
223 struct perf_stat_output_ctx *out)
224 {
225 double total, ratio = 0.0;
226 const char *color;
227 int ctx = evsel_context(evsel);
228
229 total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
230
231 if (total)
232 ratio = avg / total * 100.0;
233
234 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
235 out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
236 }
237
238 static void print_dtlb_cache_misses(int cpu,
239 struct perf_evsel *evsel __maybe_unused,
240 double avg,
241 struct perf_stat_output_ctx *out)
242 {
243 double total, ratio = 0.0;
244 const char *color;
245 int ctx = evsel_context(evsel);
246
247 total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
248
249 if (total)
250 ratio = avg / total * 100.0;
251
252 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
253 out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
254 }
255
256 static void print_itlb_cache_misses(int cpu,
257 struct perf_evsel *evsel __maybe_unused,
258 double avg,
259 struct perf_stat_output_ctx *out)
260 {
261 double total, ratio = 0.0;
262 const char *color;
263 int ctx = evsel_context(evsel);
264
265 total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
266
267 if (total)
268 ratio = avg / total * 100.0;
269
270 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
271 out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
272 }
273
274 static void print_ll_cache_misses(int cpu,
275 struct perf_evsel *evsel __maybe_unused,
276 double avg,
277 struct perf_stat_output_ctx *out)
278 {
279 double total, ratio = 0.0;
280 const char *color;
281 int ctx = evsel_context(evsel);
282
283 total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
284
285 if (total)
286 ratio = avg / total * 100.0;
287
288 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
289 out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
290 }
291
292 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
293 double avg, int cpu,
294 struct perf_stat_output_ctx *out)
295 {
296 void *ctxp = out->ctx;
297 print_metric_t print_metric = out->print_metric;
298 double total, ratio = 0.0, total2;
299 int ctx = evsel_context(evsel);
300
301 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
302 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
303 if (total) {
304 ratio = avg / total;
305 print_metric(ctxp, NULL, "%7.2f ",
306 "insn per cycle", ratio);
307 } else {
308 print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
309 }
310 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
311 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
312
313 out->new_line(ctxp);
314 if (total && avg) {
315 ratio = total / avg;
316 print_metric(ctxp, NULL, "%7.2f ",
317 "stalled cycles per insn",
318 ratio);
319 } else {
320 print_metric(ctxp, NULL, NULL,
321 "stalled cycles per insn", 0);
322 }
323 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
324 if (runtime_branches_stats[ctx][cpu].n != 0)
325 print_branch_misses(cpu, evsel, avg, out);
326 else
327 print_metric(ctxp, NULL, NULL, "of all branches", 0);
328 } else if (
329 evsel->attr.type == PERF_TYPE_HW_CACHE &&
330 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
331 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
332 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
333 if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
334 print_l1_dcache_misses(cpu, evsel, avg, out);
335 else
336 print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
337 } else if (
338 evsel->attr.type == PERF_TYPE_HW_CACHE &&
339 evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
340 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
341 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
342 if (runtime_l1_icache_stats[ctx][cpu].n != 0)
343 print_l1_icache_misses(cpu, evsel, avg, out);
344 else
345 print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
346 } else if (
347 evsel->attr.type == PERF_TYPE_HW_CACHE &&
348 evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
349 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
350 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
351 if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
352 print_dtlb_cache_misses(cpu, evsel, avg, out);
353 else
354 print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
355 } else if (
356 evsel->attr.type == PERF_TYPE_HW_CACHE &&
357 evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
358 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
359 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
360 if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
361 print_itlb_cache_misses(cpu, evsel, avg, out);
362 else
363 print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
364 } else if (
365 evsel->attr.type == PERF_TYPE_HW_CACHE &&
366 evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
367 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
368 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
369 if (runtime_ll_cache_stats[ctx][cpu].n != 0)
370 print_ll_cache_misses(cpu, evsel, avg, out);
371 else
372 print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
373 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
374 total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
375
376 if (total)
377 ratio = avg * 100 / total;
378
379 if (runtime_cacherefs_stats[ctx][cpu].n != 0)
380 print_metric(ctxp, NULL, "%8.3f %%",
381 "of all cache refs", ratio);
382 else
383 print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
384 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
385 print_stalled_cycles_frontend(cpu, evsel, avg, out);
386 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
387 print_stalled_cycles_backend(cpu, evsel, avg, out);
388 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
389 total = avg_stats(&runtime_nsecs_stats[cpu]);
390
391 if (total) {
392 ratio = avg / total;
393 print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
394 } else {
395 print_metric(ctxp, NULL, NULL, "Ghz", 0);
396 }
397 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
398 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
399 if (total)
400 print_metric(ctxp, NULL,
401 "%7.2f%%", "transactional cycles",
402 100.0 * (avg / total));
403 else
404 print_metric(ctxp, NULL, NULL, "transactional cycles",
405 0);
406 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
407 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
408 total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
409 if (total2 < avg)
410 total2 = avg;
411 if (total)
412 print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
413 100.0 * ((total2-avg) / total));
414 else
415 print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
416 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
417 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
418
419 if (avg)
420 ratio = total / avg;
421
422 if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
423 print_metric(ctxp, NULL, "%8.0f",
424 "cycles / transaction", ratio);
425 else
426 print_metric(ctxp, NULL, NULL, "cycles / transaction",
427 0);
428 } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
429 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
430
431 if (avg)
432 ratio = total / avg;
433
434 print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
435 } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
436 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
437 print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
438 avg / ratio);
439 else
440 print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
441 } else if (runtime_nsecs_stats[cpu].n != 0) {
442 char unit = 'M';
443 char unit_buf[10];
444
445 total = avg_stats(&runtime_nsecs_stats[cpu]);
446
447 if (total)
448 ratio = 1000.0 * avg / total;
449 if (ratio < 0.001) {
450 ratio *= 1000;
451 unit = 'K';
452 }
453 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
454 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
455 } else {
456 print_metric(ctxp, NULL, NULL, NULL, 0);
457 }
458 }
This page took 0.043811 seconds and 5 git commands to generate.