8 CTX_BIT_KERNEL
= 1 << 1,
10 CTX_BIT_HOST
= 1 << 3,
11 CTX_BIT_IDLE
= 1 << 4,
15 #define NUM_CTX CTX_BIT_MAX
17 static struct stats runtime_nsecs_stats
[MAX_NR_CPUS
];
18 static struct stats runtime_cycles_stats
[NUM_CTX
][MAX_NR_CPUS
];
19 static struct stats runtime_stalled_cycles_front_stats
[NUM_CTX
][MAX_NR_CPUS
];
20 static struct stats runtime_stalled_cycles_back_stats
[NUM_CTX
][MAX_NR_CPUS
];
21 static struct stats runtime_branches_stats
[NUM_CTX
][MAX_NR_CPUS
];
22 static struct stats runtime_cacherefs_stats
[NUM_CTX
][MAX_NR_CPUS
];
23 static struct stats runtime_l1_dcache_stats
[NUM_CTX
][MAX_NR_CPUS
];
24 static struct stats runtime_l1_icache_stats
[NUM_CTX
][MAX_NR_CPUS
];
25 static struct stats runtime_ll_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
26 static struct stats runtime_itlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
27 static struct stats runtime_dtlb_cache_stats
[NUM_CTX
][MAX_NR_CPUS
];
28 static struct stats runtime_cycles_in_tx_stats
[NUM_CTX
][MAX_NR_CPUS
];
29 static struct stats runtime_transaction_stats
[NUM_CTX
][MAX_NR_CPUS
];
30 static struct stats runtime_elision_stats
[NUM_CTX
][MAX_NR_CPUS
];
32 struct stats walltime_nsecs_stats
;
34 static int evsel_context(struct perf_evsel
*evsel
)
38 if (evsel
->attr
.exclude_kernel
)
39 ctx
|= CTX_BIT_KERNEL
;
40 if (evsel
->attr
.exclude_user
)
42 if (evsel
->attr
.exclude_hv
)
44 if (evsel
->attr
.exclude_host
)
46 if (evsel
->attr
.exclude_idle
)
52 void perf_stat__reset_shadow_stats(void)
54 memset(runtime_nsecs_stats
, 0, sizeof(runtime_nsecs_stats
));
55 memset(runtime_cycles_stats
, 0, sizeof(runtime_cycles_stats
));
56 memset(runtime_stalled_cycles_front_stats
, 0, sizeof(runtime_stalled_cycles_front_stats
));
57 memset(runtime_stalled_cycles_back_stats
, 0, sizeof(runtime_stalled_cycles_back_stats
));
58 memset(runtime_branches_stats
, 0, sizeof(runtime_branches_stats
));
59 memset(runtime_cacherefs_stats
, 0, sizeof(runtime_cacherefs_stats
));
60 memset(runtime_l1_dcache_stats
, 0, sizeof(runtime_l1_dcache_stats
));
61 memset(runtime_l1_icache_stats
, 0, sizeof(runtime_l1_icache_stats
));
62 memset(runtime_ll_cache_stats
, 0, sizeof(runtime_ll_cache_stats
));
63 memset(runtime_itlb_cache_stats
, 0, sizeof(runtime_itlb_cache_stats
));
64 memset(runtime_dtlb_cache_stats
, 0, sizeof(runtime_dtlb_cache_stats
));
65 memset(runtime_cycles_in_tx_stats
, 0,
66 sizeof(runtime_cycles_in_tx_stats
));
67 memset(runtime_transaction_stats
, 0,
68 sizeof(runtime_transaction_stats
));
69 memset(runtime_elision_stats
, 0, sizeof(runtime_elision_stats
));
70 memset(&walltime_nsecs_stats
, 0, sizeof(walltime_nsecs_stats
));
74 * Update various tracking values we maintain to print
75 * more semantic information such as miss/hit ratios,
76 * instruction rates, etc:
78 void perf_stat__update_shadow_stats(struct perf_evsel
*counter
, u64
*count
,
81 int ctx
= evsel_context(counter
);
83 if (perf_evsel__match(counter
, SOFTWARE
, SW_TASK_CLOCK
))
84 update_stats(&runtime_nsecs_stats
[cpu
], count
[0]);
85 else if (perf_evsel__match(counter
, HARDWARE
, HW_CPU_CYCLES
))
86 update_stats(&runtime_cycles_stats
[ctx
][cpu
], count
[0]);
87 else if (perf_stat_evsel__is(counter
, CYCLES_IN_TX
))
88 update_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
], count
[0]);
89 else if (perf_stat_evsel__is(counter
, TRANSACTION_START
))
90 update_stats(&runtime_transaction_stats
[ctx
][cpu
], count
[0]);
91 else if (perf_stat_evsel__is(counter
, ELISION_START
))
92 update_stats(&runtime_elision_stats
[ctx
][cpu
], count
[0]);
93 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
))
94 update_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
], count
[0]);
95 else if (perf_evsel__match(counter
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
))
96 update_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
], count
[0]);
97 else if (perf_evsel__match(counter
, HARDWARE
, HW_BRANCH_INSTRUCTIONS
))
98 update_stats(&runtime_branches_stats
[ctx
][cpu
], count
[0]);
99 else if (perf_evsel__match(counter
, HARDWARE
, HW_CACHE_REFERENCES
))
100 update_stats(&runtime_cacherefs_stats
[ctx
][cpu
], count
[0]);
101 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1D
))
102 update_stats(&runtime_l1_dcache_stats
[ctx
][cpu
], count
[0]);
103 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_L1I
))
104 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
105 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_LL
))
106 update_stats(&runtime_ll_cache_stats
[ctx
][cpu
], count
[0]);
107 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_DTLB
))
108 update_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
], count
[0]);
109 else if (perf_evsel__match(counter
, HW_CACHE
, HW_CACHE_ITLB
))
110 update_stats(&runtime_itlb_cache_stats
[ctx
][cpu
], count
[0]);
113 /* used for get_ratio_color() */
115 GRC_STALLED_CYCLES_FE
,
116 GRC_STALLED_CYCLES_BE
,
121 static const char *get_ratio_color(enum grc_type type
, double ratio
)
123 static const double grc_table
[GRC_MAX_NR
][3] = {
124 [GRC_STALLED_CYCLES_FE
] = { 50.0, 30.0, 10.0 },
125 [GRC_STALLED_CYCLES_BE
] = { 75.0, 50.0, 20.0 },
126 [GRC_CACHE_MISSES
] = { 20.0, 10.0, 5.0 },
128 const char *color
= PERF_COLOR_NORMAL
;
130 if (ratio
> grc_table
[type
][0])
131 color
= PERF_COLOR_RED
;
132 else if (ratio
> grc_table
[type
][1])
133 color
= PERF_COLOR_MAGENTA
;
134 else if (ratio
> grc_table
[type
][2])
135 color
= PERF_COLOR_YELLOW
;
140 static void print_stalled_cycles_frontend(int cpu
,
141 struct perf_evsel
*evsel
142 __maybe_unused
, double avg
,
143 struct perf_stat_output_ctx
*out
)
145 double total
, ratio
= 0.0;
147 int ctx
= evsel_context(evsel
);
149 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
152 ratio
= avg
/ total
* 100.0;
154 color
= get_ratio_color(GRC_STALLED_CYCLES_FE
, ratio
);
157 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "frontend cycles idle",
160 out
->print_metric(out
->ctx
, NULL
, NULL
, "frontend cycles idle", 0);
163 static void print_stalled_cycles_backend(int cpu
,
164 struct perf_evsel
*evsel
165 __maybe_unused
, double avg
,
166 struct perf_stat_output_ctx
*out
)
168 double total
, ratio
= 0.0;
170 int ctx
= evsel_context(evsel
);
172 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
175 ratio
= avg
/ total
* 100.0;
177 color
= get_ratio_color(GRC_STALLED_CYCLES_BE
, ratio
);
179 out
->print_metric(out
->ctx
, color
, "%6.2f%%", "backend cycles idle", ratio
);
182 static void print_branch_misses(int cpu
,
183 struct perf_evsel
*evsel __maybe_unused
,
185 struct perf_stat_output_ctx
*out
)
187 double total
, ratio
= 0.0;
189 int ctx
= evsel_context(evsel
);
191 total
= avg_stats(&runtime_branches_stats
[ctx
][cpu
]);
194 ratio
= avg
/ total
* 100.0;
196 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
198 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all branches", ratio
);
201 static void print_l1_dcache_misses(int cpu
,
202 struct perf_evsel
*evsel __maybe_unused
,
204 struct perf_stat_output_ctx
*out
)
206 double total
, ratio
= 0.0;
208 int ctx
= evsel_context(evsel
);
210 total
= avg_stats(&runtime_l1_dcache_stats
[ctx
][cpu
]);
213 ratio
= avg
/ total
* 100.0;
215 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
217 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-dcache hits", ratio
);
220 static void print_l1_icache_misses(int cpu
,
221 struct perf_evsel
*evsel __maybe_unused
,
223 struct perf_stat_output_ctx
*out
)
225 double total
, ratio
= 0.0;
227 int ctx
= evsel_context(evsel
);
229 total
= avg_stats(&runtime_l1_icache_stats
[ctx
][cpu
]);
232 ratio
= avg
/ total
* 100.0;
234 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
235 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all L1-icache hits", ratio
);
238 static void print_dtlb_cache_misses(int cpu
,
239 struct perf_evsel
*evsel __maybe_unused
,
241 struct perf_stat_output_ctx
*out
)
243 double total
, ratio
= 0.0;
245 int ctx
= evsel_context(evsel
);
247 total
= avg_stats(&runtime_dtlb_cache_stats
[ctx
][cpu
]);
250 ratio
= avg
/ total
* 100.0;
252 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
253 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all dTLB cache hits", ratio
);
256 static void print_itlb_cache_misses(int cpu
,
257 struct perf_evsel
*evsel __maybe_unused
,
259 struct perf_stat_output_ctx
*out
)
261 double total
, ratio
= 0.0;
263 int ctx
= evsel_context(evsel
);
265 total
= avg_stats(&runtime_itlb_cache_stats
[ctx
][cpu
]);
268 ratio
= avg
/ total
* 100.0;
270 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
271 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all iTLB cache hits", ratio
);
274 static void print_ll_cache_misses(int cpu
,
275 struct perf_evsel
*evsel __maybe_unused
,
277 struct perf_stat_output_ctx
*out
)
279 double total
, ratio
= 0.0;
281 int ctx
= evsel_context(evsel
);
283 total
= avg_stats(&runtime_ll_cache_stats
[ctx
][cpu
]);
286 ratio
= avg
/ total
* 100.0;
288 color
= get_ratio_color(GRC_CACHE_MISSES
, ratio
);
289 out
->print_metric(out
->ctx
, color
, "%7.2f%%", "of all LL-cache hits", ratio
);
292 void perf_stat__print_shadow_stats(struct perf_evsel
*evsel
,
294 struct perf_stat_output_ctx
*out
)
296 void *ctxp
= out
->ctx
;
297 print_metric_t print_metric
= out
->print_metric
;
298 double total
, ratio
= 0.0, total2
;
299 int ctx
= evsel_context(evsel
);
301 if (perf_evsel__match(evsel
, HARDWARE
, HW_INSTRUCTIONS
)) {
302 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
305 print_metric(ctxp
, NULL
, "%7.2f ",
306 "insn per cycle", ratio
);
308 print_metric(ctxp
, NULL
, NULL
, "insn per cycle", 0);
310 total
= avg_stats(&runtime_stalled_cycles_front_stats
[ctx
][cpu
]);
311 total
= max(total
, avg_stats(&runtime_stalled_cycles_back_stats
[ctx
][cpu
]));
316 print_metric(ctxp
, NULL
, "%7.2f ",
317 "stalled cycles per insn",
320 print_metric(ctxp
, NULL
, NULL
,
321 "stalled cycles per insn", 0);
323 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_BRANCH_MISSES
)) {
324 if (runtime_branches_stats
[ctx
][cpu
].n
!= 0)
325 print_branch_misses(cpu
, evsel
, avg
, out
);
327 print_metric(ctxp
, NULL
, NULL
, "of all branches", 0);
329 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
330 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1D
|
331 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
332 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
333 if (runtime_l1_dcache_stats
[ctx
][cpu
].n
!= 0)
334 print_l1_dcache_misses(cpu
, evsel
, avg
, out
);
336 print_metric(ctxp
, NULL
, NULL
, "of all L1-dcache hits", 0);
338 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
339 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_L1I
|
340 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
341 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
342 if (runtime_l1_icache_stats
[ctx
][cpu
].n
!= 0)
343 print_l1_icache_misses(cpu
, evsel
, avg
, out
);
345 print_metric(ctxp
, NULL
, NULL
, "of all L1-icache hits", 0);
347 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
348 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_DTLB
|
349 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
350 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
351 if (runtime_dtlb_cache_stats
[ctx
][cpu
].n
!= 0)
352 print_dtlb_cache_misses(cpu
, evsel
, avg
, out
);
354 print_metric(ctxp
, NULL
, NULL
, "of all dTLB cache hits", 0);
356 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
357 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_ITLB
|
358 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
359 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
360 if (runtime_itlb_cache_stats
[ctx
][cpu
].n
!= 0)
361 print_itlb_cache_misses(cpu
, evsel
, avg
, out
);
363 print_metric(ctxp
, NULL
, NULL
, "of all iTLB cache hits", 0);
365 evsel
->attr
.type
== PERF_TYPE_HW_CACHE
&&
366 evsel
->attr
.config
== ( PERF_COUNT_HW_CACHE_LL
|
367 ((PERF_COUNT_HW_CACHE_OP_READ
) << 8) |
368 ((PERF_COUNT_HW_CACHE_RESULT_MISS
) << 16))) {
369 if (runtime_ll_cache_stats
[ctx
][cpu
].n
!= 0)
370 print_ll_cache_misses(cpu
, evsel
, avg
, out
);
372 print_metric(ctxp
, NULL
, NULL
, "of all LL-cache hits", 0);
373 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CACHE_MISSES
)) {
374 total
= avg_stats(&runtime_cacherefs_stats
[ctx
][cpu
]);
377 ratio
= avg
* 100 / total
;
379 if (runtime_cacherefs_stats
[ctx
][cpu
].n
!= 0)
380 print_metric(ctxp
, NULL
, "%8.3f %%",
381 "of all cache refs", ratio
);
383 print_metric(ctxp
, NULL
, NULL
, "of all cache refs", 0);
384 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_FRONTEND
)) {
385 print_stalled_cycles_frontend(cpu
, evsel
, avg
, out
);
386 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_STALLED_CYCLES_BACKEND
)) {
387 print_stalled_cycles_backend(cpu
, evsel
, avg
, out
);
388 } else if (perf_evsel__match(evsel
, HARDWARE
, HW_CPU_CYCLES
)) {
389 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
393 print_metric(ctxp
, NULL
, "%8.3f", "GHz", ratio
);
395 print_metric(ctxp
, NULL
, NULL
, "Ghz", 0);
397 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX
)) {
398 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
400 print_metric(ctxp
, NULL
,
401 "%7.2f%%", "transactional cycles",
402 100.0 * (avg
/ total
));
404 print_metric(ctxp
, NULL
, NULL
, "transactional cycles",
406 } else if (perf_stat_evsel__is(evsel
, CYCLES_IN_TX_CP
)) {
407 total
= avg_stats(&runtime_cycles_stats
[ctx
][cpu
]);
408 total2
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
412 print_metric(ctxp
, NULL
, "%7.2f%%", "aborted cycles",
413 100.0 * ((total2
-avg
) / total
));
415 print_metric(ctxp
, NULL
, NULL
, "aborted cycles", 0);
416 } else if (perf_stat_evsel__is(evsel
, TRANSACTION_START
)) {
417 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
422 if (runtime_cycles_in_tx_stats
[ctx
][cpu
].n
!= 0)
423 print_metric(ctxp
, NULL
, "%8.0f",
424 "cycles / transaction", ratio
);
426 print_metric(ctxp
, NULL
, NULL
, "cycles / transaction",
428 } else if (perf_stat_evsel__is(evsel
, ELISION_START
)) {
429 total
= avg_stats(&runtime_cycles_in_tx_stats
[ctx
][cpu
]);
434 print_metric(ctxp
, NULL
, "%8.0f", "cycles / elision", ratio
);
435 } else if (perf_evsel__match(evsel
, SOFTWARE
, SW_TASK_CLOCK
)) {
436 if ((ratio
= avg_stats(&walltime_nsecs_stats
)) != 0)
437 print_metric(ctxp
, NULL
, "%8.3f", "CPUs utilized",
440 print_metric(ctxp
, NULL
, NULL
, "CPUs utilized", 0);
441 } else if (runtime_nsecs_stats
[cpu
].n
!= 0) {
445 total
= avg_stats(&runtime_nsecs_stats
[cpu
]);
448 ratio
= 1000.0 * avg
/ total
;
453 snprintf(unit_buf
, sizeof(unit_buf
), "%c/sec", unit
);
454 print_metric(ctxp
, NULL
, "%8.3f", unit_buf
, ratio
);
456 print_metric(ctxp
, NULL
, NULL
, NULL
, 0);