9 CTX_BIT_KERNEL
= 1 << 1,
11 CTX_BIT_HOST
= 1 << 3,
12 CTX_BIT_IDLE
= 1 << 4,
16 #define NUM_CTX CTX_BIT_MAX
19 * AGGR_GLOBAL: Use CPU 0
20 * AGGR_SOCKET: Use first CPU of socket
21 * AGGR_CORE: Use first CPU of core
22 * AGGR_NONE: Use matching CPU
23 * AGGR_THREAD: Not supported?
25 static struct stats runtime_nsecs_stats
[MAX_NR_CPUS
];
26 static struct stats runtime_cycles_stats
[NUM_CTX
][MAX_NR_CPUS
];
27 static struct stats runtime_stalled_cycles_front_stats
[NUM_CTX
][MAX_NR_CPUS
];
28 static struct stats runtime_stalled_cycles_back_stats
[NUM_CTX
][MAX_NR_CPUS
];
29 static struct stats runtime_branches_stats
[NUM_CTX
][MAX_NR_CPUS
];
30 static struct stats runtime_cacherefs_stats
[NUM_CTX
][MAX_NR_CPUS
];
31 static struct stats runtime_l1_dcache_stats
[NUM_CTX
][MAX_NR_CPUS
];
32 static struct stats runtime_l1_icache_stats
[NUM_CTX
][MAX_NR_CPUS
];
33 static struct stats runtime_ll_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
34 static struct stats runtime_itlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
35 static struct stats runtime_dtlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
36 static struct stats runtime_cycles_in_tx_stats
[NUM_CTX
][MAX_NR_CPUS
];
37 static struct stats runtime_transaction_stats
[NUM_CTX
][MAX_NR_CPUS
];
38 static struct stats runtime_elision_stats
[NUM_CTX
][MAX_NR_CPUS
];
39 static bool have_frontend_stalled
;
41 struct stats walltime_nsecs_stats
;
43 void perf_stat__init_shadow_stats(void)
45 have_frontend_stalled
= pmu_have_event("cpu", "stalled-cycles-frontend");
48 static int evsel_context(struct perf_evsel
*evsel
)
52 if (evsel
->attr
.exclude_kernel
)
53 ctx
|= CTX_BIT_KERNEL
;
54 if (evsel
->attr
.exclude_user
)
56 if (evsel
->attr
.exclude_hv
)
58 if (evsel
->attr
.exclude_host
)
60 if (evsel
->attr
.exclude_idle
)
66 void perf_stat__reset_shadow_stats(void)
68 memset(runtime_nsecs_stats
, 0, sizeof(runtime_nsecs_stats
));
69 memset(runtime_cycles_stats
, 0, sizeof(runtime_cycles_stats
));
70 memset(runtime_stalled_cycles_front_stats
, 0, sizeof(runtime_stalled_cycles_front_stats
));
71 memset(runtime_stalled_cycles_back_stats
, 0, sizeof(runtime_stalled_cycles_back_stats
));
72 memset(runtime_branches_stats
, 0, sizeof(runtime_branches_stats
));
73 memset(runtime_cacherefs_stats
, 0, sizeof(runtime_cacherefs_stats
));
74 memset(runtime_l1_dcache_stats
, 0, sizeof(runtime_l1_dcache_stats
));
75 memset(runtime_l1_icache_stats
, 0, sizeof(runtime_l1_icache_stats
));
76 memset(runtime_ll_cache_stats
, 0, sizeof(runtime_ll_cache_stats
));
77 memset(runtime_itlb_cache_stats
, 0, sizeof(runtime_itlb_cache_stats
));
78 memset(runtime_dtlb_cache_stats
, 0, sizeof(runtime_dtlb_cache_stats
));
79 memset(runtime_cycles_in_tx_stats
, 0,
80 sizeof(runtime_cycles_in_tx_stats
));
81 memset(runtime_transaction_stats
, 0,
82 sizeof(runtime_transaction_stats
));
83 memset(runtime_elision_stats
, 0, sizeof(runtime_elision_stats
));
84 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
88 * Update various tracking values we maintain to print
89 * more semantic information such as miss/hit ratios,
90 * instruction rates, etc:
92 void perf_stat__update_shadow_stats(struct perf_evsel
*counter
, u64
*count
,
95 int ctx
= evsel_context(counter
);
97 if (perf_evsel__match(counter
, SOFTWARE
, SW_TASK_CLOCK
) ||
98 perf_evsel__match(counter
, SOFTWARE
, SW_CPU_CLOCK
))
99 update_stats(&runtime_nsecs_stats
[cpu
], count
[0]);
100 else if (perf_evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
101 update_stats(&runtime_cycles_stats
[ctx
][cpu
], count
[0]);
102 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
103 update_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
], count
[0]);
104 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
105 update_stats(&runtime_transaction_stats
[ctx
][cpu
], count
[0]);
106 else if (perf_stat_evsel__is(counter
, ELISION_START
))
107 update_stats(&runtime_elision_stats
[ctx
][cpu
], count
[0]);
108 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
109 update_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
], count
[0]);
110 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
111 update_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
], count
[0]);
112 else if (perf_evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
113 update_stats(&runtime_branches_stats
[ctx
][cpu
], count
[0]);
114 else if (perf_evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
115 update_stats(&runtime_cacherefs_stats
[ctx
][cpu
], count
[0]);
116 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
117 update_stats(&runtime_l1_dcache_stats
[ctx
][cpu
], count
[0]);
118 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
119 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
120 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
121 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
122 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
123 update_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
], count
[0]);
124 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
125 update_stats(&runtime_itlb_cache_stats
[ctx
][cpu
], count
[0]);
128 /* used for get_ratio_color() */
130 GRC_STALLED_CYCLES_FE
,
131 GRC_STALLED_CYCLES_BE
,
136 static const char *get_ratio_color(enum grc_type type
, double ratio
)
138 static const double grc_table
[GRC_MAX_NR
][3] = {
139 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
140 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
141 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
143 const char *color
= PERF_COLOR_NORMAL
;
145 if (ratio
> grc_table
[type
][0])
146 color
= PERF_COLOR_RED
;
147 else if (ratio
> grc_table
[type
][1])
148 color
= PERF_COLOR_MAGENTA
;
149 else if (ratio
> grc_table
[type
][2])
150 color
= PERF_COLOR_YELLOW
;
155 static void print_stalled_cycles_frontend(int cpu
,
156 struct perf_evsel
*evsel
, double avg
,
157 struct perf_stat_output_ctx
*out
)
159 double total
, ratio
= 0.0;
161 int ctx
= evsel_context(evsel
);
163 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
166 ratio
= avg
/ total
* 100.0;
168 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
171 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
174 out
->print_metric(out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
177 static void print_stalled_cycles_backend(int cpu
,
178 struct perf_evsel
*evsel
, double avg
,
179 struct perf_stat_output_ctx
*out
)
181 double total
, ratio
= 0.0;
183 int ctx
= evsel_context(evsel
);
185 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
188 ratio
= avg
/ total
* 100.0;
190 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
192 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "backend cycles idle", ratio
);
195 static void print_branch_misses(int cpu
,
196 struct perf_evsel
*evsel
,
198 struct perf_stat_output_ctx
*out
)
200 double total
, ratio
= 0.0;
202 int ctx
= evsel_context(evsel
);
204 total
= avg_stats(&runtime_branches_stats
[ctx
][cpu
]);
207 ratio
= avg
/ total
* 100.0;
209 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
211 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
214 static void print_l1_dcache_misses(int cpu
,
215 struct perf_evsel
*evsel
,
217 struct perf_stat_output_ctx
*out
)
219 double total
, ratio
= 0.0;
221 int ctx
= evsel_context(evsel
);
223 total
= avg_stats(&runtime_l1_dcache_stats
[ctx
][cpu
]);
226 ratio
= avg
/ total
* 100.0;
228 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
230 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-dcache hits", ratio
);
233 static void print_l1_icache_misses(int cpu
,
234 struct perf_evsel
*evsel
,
236 struct perf_stat_output_ctx
*out
)
238 double total
, ratio
= 0.0;
240 int ctx
= evsel_context(evsel
);
242 total
= avg_stats(&runtime_l1_icache_stats
[ctx
][cpu
]);
245 ratio
= avg
/ total
* 100.0;
247 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
248 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-icache hits", ratio
);
251 static void print_dtlb_cache_misses(int cpu
,
252 struct perf_evsel
*evsel
,
254 struct perf_stat_output_ctx
*out
)
256 double total
, ratio
= 0.0;
258 int ctx
= evsel_context(evsel
);
260 total
= avg_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
]);
263 ratio
= avg
/ total
* 100.0;
265 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
266 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all dTLB cache hits", ratio
);
269 static void print_itlb_cache_misses(int cpu
,
270 struct perf_evsel
*evsel
,
272 struct perf_stat_output_ctx
*out
)
274 double total
, ratio
= 0.0;
276 int ctx
= evsel_context(evsel
);
278 total
= avg_stats(&runtime_itlb_cache_stats
[ctx
][cpu
]);
281 ratio
= avg
/ total
* 100.0;
283 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
284 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all iTLB cache hits", ratio
);
287 static void print_ll_cache_misses(int cpu
,
288 struct perf_evsel
*evsel
,
290 struct perf_stat_output_ctx
*out
)
292 double total
, ratio
= 0.0;
294 int ctx
= evsel_context(evsel
);
296 total
= avg_stats(&runtime_ll_cache_stats
[ctx
][cpu
]);
299 ratio
= avg
/ total
* 100.0;
301 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
302 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all LL-cache hits", ratio
);
305 void perf_stat__print_shadow_stats(struct perf_evsel
*evsel
,
307 struct perf_stat_output_ctx
*out
)
309 void *ctxp
= out
->ctx
;
310 print_metric_t print_metric
= out
->print_metric
;
311 double total
, ratio
= 0.0, total2
;
312 int ctx
= evsel_context(evsel
);
314 if (perf_evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
315 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
318 print_metric(ctxp
, NULL
, "%7.2f ",
319 "insn per cycle", ratio
);
321 print_metric(ctxp
, NULL
, NULL
, "insn per cycle", 0);
323 total
= avg_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
]);
324 total
= max(total
, avg_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
]));
329 print_metric(ctxp
, NULL
, "%7.2f ",
330 "stalled cycles per insn",
332 } else if (have_frontend_stalled
) {
333 print_metric(ctxp
, NULL
, NULL
,
334 "stalled cycles per insn", 0);
336 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
337 if (runtime_branches_stats
[ctx
][cpu
].n
!= 0)
338 print_branch_misses(cpu
, evsel
, avg
, out
);
340 print_metric(ctxp
, NULL
, NULL
, "of all branches", 0);
342 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
343 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
344 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
345 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
346 if (runtime_l1_dcache_stats
[ctx
][cpu
].n
!= 0)
347 print_l1_dcache_misses(cpu
, evsel
, avg
, out
);
349 print_metric(ctxp
, NULL
, NULL
, "of all L1-dcache hits", 0);
351 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
352 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
353 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
354 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
355 if (runtime_l1_icache_stats
[ctx
][cpu
].n
!= 0)
356 print_l1_icache_misses(cpu
, evsel
, avg
, out
);
358 print_metric(ctxp
, NULL
, NULL
, "of all L1-icache hits", 0);
360 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
361 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
362 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
363 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
364 if (runtime_dtlb_cache_stats
[ctx
][cpu
].n
!= 0)
365 print_dtlb_cache_misses(cpu
, evsel
, avg
, out
);
367 print_metric(ctxp
, NULL
, NULL
, "of all dTLB cache hits", 0);
369 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
370 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
371 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
372 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
373 if (runtime_itlb_cache_stats
[ctx
][cpu
].n
!= 0)
374 print_itlb_cache_misses(cpu
, evsel
, avg
, out
);
376 print_metric(ctxp
, NULL
, NULL
, "of all iTLB cache hits", 0);
378 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
379 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
380 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
381 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
382 if (runtime_ll_cache_stats
[ctx
][cpu
].n
!= 0)
383 print_ll_cache_misses(cpu
, evsel
, avg
, out
);
385 print_metric(ctxp
, NULL
, NULL
, "of all LL-cache hits", 0);
386 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
387 total
= avg_stats(&runtime_cacherefs_stats
[ctx
][cpu
]);
390 ratio
= avg
* 100 / total
;
392 if (runtime_cacherefs_stats
[ctx
][cpu
].n
!= 0)
393 print_metric(ctxp
, NULL
, "%8.3f %%",
394 "of all cache refs", ratio
);
396 print_metric(ctxp
, NULL
, NULL
, "of all cache refs", 0);
397 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
398 print_stalled_cycles_frontend(cpu
, evsel
, avg
, out
);
399 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
400 print_stalled_cycles_backend(cpu
, evsel
, avg
, out
);
401 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
402 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
406 print_metric(ctxp
, NULL
, "%8.3f", "GHz", ratio
);
408 print_metric(ctxp
, NULL
, NULL
, "Ghz", 0);
410 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
411 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
413 print_metric(ctxp
, NULL
,
414 "%7.2f%%", "transactional cycles",
415 100.0 * (avg
/ total
));
417 print_metric(ctxp
, NULL
, NULL
, "transactional cycles",
419 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
420 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
421 total2
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
425 print_metric(ctxp
, NULL
, "%7.2f%%", "aborted cycles",
426 100.0 * ((total2
-avg
) / total
));
428 print_metric(ctxp
, NULL
, NULL
, "aborted cycles", 0);
429 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
430 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
435 if (runtime_cycles_in_tx_stats
[ctx
][cpu
].n
!= 0)
436 print_metric(ctxp
, NULL
, "%8.0f",
437 "cycles / transaction", ratio
);
439 print_metric(ctxp
, NULL
, NULL
, "cycles / transaction",
441 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
442 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
447 print_metric(ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
448 } else if (perf_evsel__match(evsel
, SOFTWARE
, SW_TASK_CLOCK
) ||
449 perf_evsel__match(evsel
, SOFTWARE
, SW_CPU_CLOCK
)) {
450 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
451 print_metric(ctxp
, NULL
, "%8.3f", "CPUs utilized",
454 print_metric(ctxp
, NULL
, NULL
, "CPUs utilized", 0);
455 } else if (runtime_nsecs_stats
[cpu
].n
!= 0) {
459 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
462 ratio
= 1000.0 * avg
/ total
;
467 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
468 print_metric(ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
470 print_metric(ctxp
, NULL
, NULL
, NULL
, 0);