perf_counter, x86: Fix generic cache events on P6-mobile CPUs
[deliverable/linux.git] / arch / x86 / kernel / cpu / perf_counter.c
1 /*
2 * Performance counter x86 architecture code
3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 *
10 * For licencing details see kernel-base/COPYING
11 */
12
13 #include <linux/perf_counter.h>
14 #include <linux/capability.h>
15 #include <linux/notifier.h>
16 #include <linux/hardirq.h>
17 #include <linux/kprobes.h>
18 #include <linux/module.h>
19 #include <linux/kdebug.h>
20 #include <linux/sched.h>
21 #include <linux/uaccess.h>
22 #include <linux/highmem.h>
23
24 #include <asm/apic.h>
25 #include <asm/stacktrace.h>
26 #include <asm/nmi.h>
27
28 static u64 perf_counter_mask __read_mostly;
29
30 struct cpu_hw_counters {
31 struct perf_counter *counters[X86_PMC_IDX_MAX];
32 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
33 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
34 unsigned long interrupts;
35 int enabled;
36 };
37
38 /*
39 * struct x86_pmu - generic x86 pmu
40 */
41 struct x86_pmu {
42 const char *name;
43 int version;
44 int (*handle_irq)(struct pt_regs *);
45 void (*disable_all)(void);
46 void (*enable_all)(void);
47 void (*enable)(struct hw_perf_counter *, int);
48 void (*disable)(struct hw_perf_counter *, int);
49 unsigned eventsel;
50 unsigned perfctr;
51 u64 (*event_map)(int);
52 u64 (*raw_event)(u64);
53 int max_events;
54 int num_counters;
55 int num_counters_fixed;
56 int counter_bits;
57 u64 counter_mask;
58 u64 max_period;
59 u64 intel_ctrl;
60 };
61
62 static struct x86_pmu x86_pmu __read_mostly;
63
64 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
65 .enabled = 1,
66 };
67
68 /*
69 * Not sure about some of these
70 */
71 static const u64 p6_perfmon_event_map[] =
72 {
73 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
74 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
75 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
76 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
77 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
78 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
79 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
80 };
81
82 static u64 p6_pmu_event_map(int event)
83 {
84 return p6_perfmon_event_map[event];
85 }
86
87 /*
88 * Counter setting that is specified not to count anything.
89 * We use this to effectively disable a counter.
90 *
91 * L2_RQSTS with 0 MESI unit mask.
92 */
93 #define P6_NOP_COUNTER 0x0000002EULL
94
95 static u64 p6_pmu_raw_event(u64 event)
96 {
97 #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
98 #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
99 #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
100 #define P6_EVNTSEL_INV_MASK 0x00800000ULL
101 #define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
102
103 #define P6_EVNTSEL_MASK \
104 (P6_EVNTSEL_EVENT_MASK | \
105 P6_EVNTSEL_UNIT_MASK | \
106 P6_EVNTSEL_EDGE_MASK | \
107 P6_EVNTSEL_INV_MASK | \
108 P6_EVNTSEL_COUNTER_MASK)
109
110 return event & P6_EVNTSEL_MASK;
111 }
112
113
114 /*
115 * Intel PerfMon v3. Used on Core2 and later.
116 */
117 static const u64 intel_perfmon_event_map[] =
118 {
119 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
120 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
121 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
122 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
123 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
124 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
125 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
126 };
127
128 static u64 intel_pmu_event_map(int event)
129 {
130 return intel_perfmon_event_map[event];
131 }
132
133 /*
134 * Generalized hw caching related event table, filled
135 * in on a per model basis. A value of 0 means
136 * 'not supported', -1 means 'event makes no sense on
137 * this CPU', any other value means the raw event
138 * ID.
139 */
140
141 #define C(x) PERF_COUNT_HW_CACHE_##x
142
143 static u64 __read_mostly hw_cache_event_ids
144 [PERF_COUNT_HW_CACHE_MAX]
145 [PERF_COUNT_HW_CACHE_OP_MAX]
146 [PERF_COUNT_HW_CACHE_RESULT_MAX];
147
148 static const u64 nehalem_hw_cache_event_ids
149 [PERF_COUNT_HW_CACHE_MAX]
150 [PERF_COUNT_HW_CACHE_OP_MAX]
151 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
152 {
153 [ C(L1D) ] = {
154 [ C(OP_READ) ] = {
155 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
156 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
157 },
158 [ C(OP_WRITE) ] = {
159 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
160 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
161 },
162 [ C(OP_PREFETCH) ] = {
163 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
164 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
165 },
166 },
167 [ C(L1I ) ] = {
168 [ C(OP_READ) ] = {
169 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
170 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
171 },
172 [ C(OP_WRITE) ] = {
173 [ C(RESULT_ACCESS) ] = -1,
174 [ C(RESULT_MISS) ] = -1,
175 },
176 [ C(OP_PREFETCH) ] = {
177 [ C(RESULT_ACCESS) ] = 0x0,
178 [ C(RESULT_MISS) ] = 0x0,
179 },
180 },
181 [ C(LL ) ] = {
182 [ C(OP_READ) ] = {
183 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
184 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
185 },
186 [ C(OP_WRITE) ] = {
187 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
188 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
189 },
190 [ C(OP_PREFETCH) ] = {
191 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
192 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
193 },
194 },
195 [ C(DTLB) ] = {
196 [ C(OP_READ) ] = {
197 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
198 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
199 },
200 [ C(OP_WRITE) ] = {
201 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
202 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
203 },
204 [ C(OP_PREFETCH) ] = {
205 [ C(RESULT_ACCESS) ] = 0x0,
206 [ C(RESULT_MISS) ] = 0x0,
207 },
208 },
209 [ C(ITLB) ] = {
210 [ C(OP_READ) ] = {
211 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
212 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
213 },
214 [ C(OP_WRITE) ] = {
215 [ C(RESULT_ACCESS) ] = -1,
216 [ C(RESULT_MISS) ] = -1,
217 },
218 [ C(OP_PREFETCH) ] = {
219 [ C(RESULT_ACCESS) ] = -1,
220 [ C(RESULT_MISS) ] = -1,
221 },
222 },
223 [ C(BPU ) ] = {
224 [ C(OP_READ) ] = {
225 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
226 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
227 },
228 [ C(OP_WRITE) ] = {
229 [ C(RESULT_ACCESS) ] = -1,
230 [ C(RESULT_MISS) ] = -1,
231 },
232 [ C(OP_PREFETCH) ] = {
233 [ C(RESULT_ACCESS) ] = -1,
234 [ C(RESULT_MISS) ] = -1,
235 },
236 },
237 };
238
239 static const u64 core2_hw_cache_event_ids
240 [PERF_COUNT_HW_CACHE_MAX]
241 [PERF_COUNT_HW_CACHE_OP_MAX]
242 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
243 {
244 [ C(L1D) ] = {
245 [ C(OP_READ) ] = {
246 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
247 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
248 },
249 [ C(OP_WRITE) ] = {
250 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
251 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
252 },
253 [ C(OP_PREFETCH) ] = {
254 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
255 [ C(RESULT_MISS) ] = 0,
256 },
257 },
258 [ C(L1I ) ] = {
259 [ C(OP_READ) ] = {
260 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
261 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
262 },
263 [ C(OP_WRITE) ] = {
264 [ C(RESULT_ACCESS) ] = -1,
265 [ C(RESULT_MISS) ] = -1,
266 },
267 [ C(OP_PREFETCH) ] = {
268 [ C(RESULT_ACCESS) ] = 0,
269 [ C(RESULT_MISS) ] = 0,
270 },
271 },
272 [ C(LL ) ] = {
273 [ C(OP_READ) ] = {
274 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
275 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
276 },
277 [ C(OP_WRITE) ] = {
278 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
279 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
280 },
281 [ C(OP_PREFETCH) ] = {
282 [ C(RESULT_ACCESS) ] = 0,
283 [ C(RESULT_MISS) ] = 0,
284 },
285 },
286 [ C(DTLB) ] = {
287 [ C(OP_READ) ] = {
288 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
289 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
290 },
291 [ C(OP_WRITE) ] = {
292 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
293 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
294 },
295 [ C(OP_PREFETCH) ] = {
296 [ C(RESULT_ACCESS) ] = 0,
297 [ C(RESULT_MISS) ] = 0,
298 },
299 },
300 [ C(ITLB) ] = {
301 [ C(OP_READ) ] = {
302 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
303 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
304 },
305 [ C(OP_WRITE) ] = {
306 [ C(RESULT_ACCESS) ] = -1,
307 [ C(RESULT_MISS) ] = -1,
308 },
309 [ C(OP_PREFETCH) ] = {
310 [ C(RESULT_ACCESS) ] = -1,
311 [ C(RESULT_MISS) ] = -1,
312 },
313 },
314 [ C(BPU ) ] = {
315 [ C(OP_READ) ] = {
316 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
317 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
318 },
319 [ C(OP_WRITE) ] = {
320 [ C(RESULT_ACCESS) ] = -1,
321 [ C(RESULT_MISS) ] = -1,
322 },
323 [ C(OP_PREFETCH) ] = {
324 [ C(RESULT_ACCESS) ] = -1,
325 [ C(RESULT_MISS) ] = -1,
326 },
327 },
328 };
329
330 static const u64 atom_hw_cache_event_ids
331 [PERF_COUNT_HW_CACHE_MAX]
332 [PERF_COUNT_HW_CACHE_OP_MAX]
333 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
334 {
335 [ C(L1D) ] = {
336 [ C(OP_READ) ] = {
337 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
338 [ C(RESULT_MISS) ] = 0,
339 },
340 [ C(OP_WRITE) ] = {
341 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
342 [ C(RESULT_MISS) ] = 0,
343 },
344 [ C(OP_PREFETCH) ] = {
345 [ C(RESULT_ACCESS) ] = 0x0,
346 [ C(RESULT_MISS) ] = 0,
347 },
348 },
349 [ C(L1I ) ] = {
350 [ C(OP_READ) ] = {
351 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
352 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
353 },
354 [ C(OP_WRITE) ] = {
355 [ C(RESULT_ACCESS) ] = -1,
356 [ C(RESULT_MISS) ] = -1,
357 },
358 [ C(OP_PREFETCH) ] = {
359 [ C(RESULT_ACCESS) ] = 0,
360 [ C(RESULT_MISS) ] = 0,
361 },
362 },
363 [ C(LL ) ] = {
364 [ C(OP_READ) ] = {
365 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
366 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
367 },
368 [ C(OP_WRITE) ] = {
369 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
370 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
371 },
372 [ C(OP_PREFETCH) ] = {
373 [ C(RESULT_ACCESS) ] = 0,
374 [ C(RESULT_MISS) ] = 0,
375 },
376 },
377 [ C(DTLB) ] = {
378 [ C(OP_READ) ] = {
379 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
380 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
381 },
382 [ C(OP_WRITE) ] = {
383 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
384 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
385 },
386 [ C(OP_PREFETCH) ] = {
387 [ C(RESULT_ACCESS) ] = 0,
388 [ C(RESULT_MISS) ] = 0,
389 },
390 },
391 [ C(ITLB) ] = {
392 [ C(OP_READ) ] = {
393 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
394 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
395 },
396 [ C(OP_WRITE) ] = {
397 [ C(RESULT_ACCESS) ] = -1,
398 [ C(RESULT_MISS) ] = -1,
399 },
400 [ C(OP_PREFETCH) ] = {
401 [ C(RESULT_ACCESS) ] = -1,
402 [ C(RESULT_MISS) ] = -1,
403 },
404 },
405 [ C(BPU ) ] = {
406 [ C(OP_READ) ] = {
407 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
408 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
409 },
410 [ C(OP_WRITE) ] = {
411 [ C(RESULT_ACCESS) ] = -1,
412 [ C(RESULT_MISS) ] = -1,
413 },
414 [ C(OP_PREFETCH) ] = {
415 [ C(RESULT_ACCESS) ] = -1,
416 [ C(RESULT_MISS) ] = -1,
417 },
418 },
419 };
420
421 static u64 intel_pmu_raw_event(u64 event)
422 {
423 #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
424 #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
425 #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
426 #define CORE_EVNTSEL_INV_MASK 0x00800000ULL
427 #define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
428
429 #define CORE_EVNTSEL_MASK \
430 (CORE_EVNTSEL_EVENT_MASK | \
431 CORE_EVNTSEL_UNIT_MASK | \
432 CORE_EVNTSEL_EDGE_MASK | \
433 CORE_EVNTSEL_INV_MASK | \
434 CORE_EVNTSEL_COUNTER_MASK)
435
436 return event & CORE_EVNTSEL_MASK;
437 }
438
439 static const u64 amd_hw_cache_event_ids
440 [PERF_COUNT_HW_CACHE_MAX]
441 [PERF_COUNT_HW_CACHE_OP_MAX]
442 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
443 {
444 [ C(L1D) ] = {
445 [ C(OP_READ) ] = {
446 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
447 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
448 },
449 [ C(OP_WRITE) ] = {
450 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
451 [ C(RESULT_MISS) ] = 0,
452 },
453 [ C(OP_PREFETCH) ] = {
454 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
455 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
456 },
457 },
458 [ C(L1I ) ] = {
459 [ C(OP_READ) ] = {
460 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
461 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
462 },
463 [ C(OP_WRITE) ] = {
464 [ C(RESULT_ACCESS) ] = -1,
465 [ C(RESULT_MISS) ] = -1,
466 },
467 [ C(OP_PREFETCH) ] = {
468 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
469 [ C(RESULT_MISS) ] = 0,
470 },
471 },
472 [ C(LL ) ] = {
473 [ C(OP_READ) ] = {
474 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
475 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
476 },
477 [ C(OP_WRITE) ] = {
478 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
479 [ C(RESULT_MISS) ] = 0,
480 },
481 [ C(OP_PREFETCH) ] = {
482 [ C(RESULT_ACCESS) ] = 0,
483 [ C(RESULT_MISS) ] = 0,
484 },
485 },
486 [ C(DTLB) ] = {
487 [ C(OP_READ) ] = {
488 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
489 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
490 },
491 [ C(OP_WRITE) ] = {
492 [ C(RESULT_ACCESS) ] = 0,
493 [ C(RESULT_MISS) ] = 0,
494 },
495 [ C(OP_PREFETCH) ] = {
496 [ C(RESULT_ACCESS) ] = 0,
497 [ C(RESULT_MISS) ] = 0,
498 },
499 },
500 [ C(ITLB) ] = {
501 [ C(OP_READ) ] = {
502 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
503 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
504 },
505 [ C(OP_WRITE) ] = {
506 [ C(RESULT_ACCESS) ] = -1,
507 [ C(RESULT_MISS) ] = -1,
508 },
509 [ C(OP_PREFETCH) ] = {
510 [ C(RESULT_ACCESS) ] = -1,
511 [ C(RESULT_MISS) ] = -1,
512 },
513 },
514 [ C(BPU ) ] = {
515 [ C(OP_READ) ] = {
516 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
517 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
518 },
519 [ C(OP_WRITE) ] = {
520 [ C(RESULT_ACCESS) ] = -1,
521 [ C(RESULT_MISS) ] = -1,
522 },
523 [ C(OP_PREFETCH) ] = {
524 [ C(RESULT_ACCESS) ] = -1,
525 [ C(RESULT_MISS) ] = -1,
526 },
527 },
528 };
529
530 /*
531 * AMD Performance Monitor K7 and later.
532 */
533 static const u64 amd_perfmon_event_map[] =
534 {
535 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
536 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
537 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
538 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
539 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
540 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
541 };
542
543 static u64 amd_pmu_event_map(int event)
544 {
545 return amd_perfmon_event_map[event];
546 }
547
548 static u64 amd_pmu_raw_event(u64 event)
549 {
550 #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
551 #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
552 #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
553 #define K7_EVNTSEL_INV_MASK 0x000800000ULL
554 #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
555
556 #define K7_EVNTSEL_MASK \
557 (K7_EVNTSEL_EVENT_MASK | \
558 K7_EVNTSEL_UNIT_MASK | \
559 K7_EVNTSEL_EDGE_MASK | \
560 K7_EVNTSEL_INV_MASK | \
561 K7_EVNTSEL_COUNTER_MASK)
562
563 return event & K7_EVNTSEL_MASK;
564 }
565
566 /*
567 * Propagate counter elapsed time into the generic counter.
568 * Can only be executed on the CPU where the counter is active.
569 * Returns the delta events processed.
570 */
571 static u64
572 x86_perf_counter_update(struct perf_counter *counter,
573 struct hw_perf_counter *hwc, int idx)
574 {
575 int shift = 64 - x86_pmu.counter_bits;
576 u64 prev_raw_count, new_raw_count;
577 s64 delta;
578
579 /*
580 * Careful: an NMI might modify the previous counter value.
581 *
582 * Our tactic to handle this is to first atomically read and
583 * exchange a new raw count - then add that new-prev delta
584 * count to the generic counter atomically:
585 */
586 again:
587 prev_raw_count = atomic64_read(&hwc->prev_count);
588 rdmsrl(hwc->counter_base + idx, new_raw_count);
589
590 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
591 new_raw_count) != prev_raw_count)
592 goto again;
593
594 /*
595 * Now we have the new raw value and have updated the prev
596 * timestamp already. We can now calculate the elapsed delta
597 * (counter-)time and add that to the generic counter.
598 *
599 * Careful, not all hw sign-extends above the physical width
600 * of the count.
601 */
602 delta = (new_raw_count << shift) - (prev_raw_count << shift);
603 delta >>= shift;
604
605 atomic64_add(delta, &counter->count);
606 atomic64_sub(delta, &hwc->period_left);
607
608 return new_raw_count;
609 }
610
611 static atomic_t active_counters;
612 static DEFINE_MUTEX(pmc_reserve_mutex);
613
614 static bool reserve_pmc_hardware(void)
615 {
616 int i;
617
618 if (nmi_watchdog == NMI_LOCAL_APIC)
619 disable_lapic_nmi_watchdog();
620
621 for (i = 0; i < x86_pmu.num_counters; i++) {
622 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
623 goto perfctr_fail;
624 }
625
626 for (i = 0; i < x86_pmu.num_counters; i++) {
627 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
628 goto eventsel_fail;
629 }
630
631 return true;
632
633 eventsel_fail:
634 for (i--; i >= 0; i--)
635 release_evntsel_nmi(x86_pmu.eventsel + i);
636
637 i = x86_pmu.num_counters;
638
639 perfctr_fail:
640 for (i--; i >= 0; i--)
641 release_perfctr_nmi(x86_pmu.perfctr + i);
642
643 if (nmi_watchdog == NMI_LOCAL_APIC)
644 enable_lapic_nmi_watchdog();
645
646 return false;
647 }
648
649 static void release_pmc_hardware(void)
650 {
651 int i;
652
653 for (i = 0; i < x86_pmu.num_counters; i++) {
654 release_perfctr_nmi(x86_pmu.perfctr + i);
655 release_evntsel_nmi(x86_pmu.eventsel + i);
656 }
657
658 if (nmi_watchdog == NMI_LOCAL_APIC)
659 enable_lapic_nmi_watchdog();
660 }
661
662 static void hw_perf_counter_destroy(struct perf_counter *counter)
663 {
664 if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
665 release_pmc_hardware();
666 mutex_unlock(&pmc_reserve_mutex);
667 }
668 }
669
670 static inline int x86_pmu_initialized(void)
671 {
672 return x86_pmu.handle_irq != NULL;
673 }
674
675 static inline int
676 set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
677 {
678 unsigned int cache_type, cache_op, cache_result;
679 u64 config, val;
680
681 config = attr->config;
682
683 cache_type = (config >> 0) & 0xff;
684 if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
685 return -EINVAL;
686
687 cache_op = (config >> 8) & 0xff;
688 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
689 return -EINVAL;
690
691 cache_result = (config >> 16) & 0xff;
692 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
693 return -EINVAL;
694
695 val = hw_cache_event_ids[cache_type][cache_op][cache_result];
696
697 if (val == 0)
698 return -ENOENT;
699
700 if (val == -1)
701 return -EINVAL;
702
703 hwc->config |= val;
704
705 return 0;
706 }
707
708 /*
709 * Setup the hardware configuration for a given attr_type
710 */
711 static int __hw_perf_counter_init(struct perf_counter *counter)
712 {
713 struct perf_counter_attr *attr = &counter->attr;
714 struct hw_perf_counter *hwc = &counter->hw;
715 u64 config;
716 int err;
717
718 if (!x86_pmu_initialized())
719 return -ENODEV;
720
721 err = 0;
722 if (!atomic_inc_not_zero(&active_counters)) {
723 mutex_lock(&pmc_reserve_mutex);
724 if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware())
725 err = -EBUSY;
726 else
727 atomic_inc(&active_counters);
728 mutex_unlock(&pmc_reserve_mutex);
729 }
730 if (err)
731 return err;
732
733 /*
734 * Generate PMC IRQs:
735 * (keep 'enabled' bit clear for now)
736 */
737 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
738
739 /*
740 * Count user and OS events unless requested not to.
741 */
742 if (!attr->exclude_user)
743 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
744 if (!attr->exclude_kernel)
745 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
746
747 if (!hwc->sample_period) {
748 hwc->sample_period = x86_pmu.max_period;
749 hwc->last_period = hwc->sample_period;
750 atomic64_set(&hwc->period_left, hwc->sample_period);
751 }
752
753 counter->destroy = hw_perf_counter_destroy;
754
755 /*
756 * Raw event type provide the config in the event structure
757 */
758 if (attr->type == PERF_TYPE_RAW) {
759 hwc->config |= x86_pmu.raw_event(attr->config);
760 return 0;
761 }
762
763 if (attr->type == PERF_TYPE_HW_CACHE)
764 return set_ext_hw_attr(hwc, attr);
765
766 if (attr->config >= x86_pmu.max_events)
767 return -EINVAL;
768
769 /*
770 * The generic map:
771 */
772 config = x86_pmu.event_map(attr->config);
773
774 if (config == 0)
775 return -ENOENT;
776
777 if (config == -1LL)
778 return -EINVAL;
779
780 hwc->config |= config;
781
782 return 0;
783 }
784
785 static void p6_pmu_disable_all(void)
786 {
787 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
788 u64 val;
789
790 if (!cpuc->enabled)
791 return;
792
793 cpuc->enabled = 0;
794 barrier();
795
796 /* p6 only has one enable register */
797 rdmsrl(MSR_P6_EVNTSEL0, val);
798 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
799 wrmsrl(MSR_P6_EVNTSEL0, val);
800 }
801
802 static void intel_pmu_disable_all(void)
803 {
804 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
805 }
806
807 static void amd_pmu_disable_all(void)
808 {
809 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
810 int idx;
811
812 if (!cpuc->enabled)
813 return;
814
815 cpuc->enabled = 0;
816 /*
817 * ensure we write the disable before we start disabling the
818 * counters proper, so that amd_pmu_enable_counter() does the
819 * right thing.
820 */
821 barrier();
822
823 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
824 u64 val;
825
826 if (!test_bit(idx, cpuc->active_mask))
827 continue;
828 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
829 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
830 continue;
831 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
832 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
833 }
834 }
835
836 void hw_perf_disable(void)
837 {
838 if (!x86_pmu_initialized())
839 return;
840 return x86_pmu.disable_all();
841 }
842
843 static void p6_pmu_enable_all(void)
844 {
845 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
846 unsigned long val;
847
848 if (cpuc->enabled)
849 return;
850
851 cpuc->enabled = 1;
852 barrier();
853
854 /* p6 only has one enable register */
855 rdmsrl(MSR_P6_EVNTSEL0, val);
856 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
857 wrmsrl(MSR_P6_EVNTSEL0, val);
858 }
859
860 static void intel_pmu_enable_all(void)
861 {
862 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
863 }
864
865 static void amd_pmu_enable_all(void)
866 {
867 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
868 int idx;
869
870 if (cpuc->enabled)
871 return;
872
873 cpuc->enabled = 1;
874 barrier();
875
876 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
877 struct perf_counter *counter = cpuc->counters[idx];
878 u64 val;
879
880 if (!test_bit(idx, cpuc->active_mask))
881 continue;
882
883 val = counter->hw.config;
884 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
885 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
886 }
887 }
888
889 void hw_perf_enable(void)
890 {
891 if (!x86_pmu_initialized())
892 return;
893 x86_pmu.enable_all();
894 }
895
896 static inline u64 intel_pmu_get_status(void)
897 {
898 u64 status;
899
900 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
901
902 return status;
903 }
904
905 static inline void intel_pmu_ack_status(u64 ack)
906 {
907 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
908 }
909
910 static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
911 {
912 (void)checking_wrmsrl(hwc->config_base + idx,
913 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
914 }
915
916 static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
917 {
918 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
919 }
920
921 static inline void
922 intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
923 {
924 int idx = __idx - X86_PMC_IDX_FIXED;
925 u64 ctrl_val, mask;
926
927 mask = 0xfULL << (idx * 4);
928
929 rdmsrl(hwc->config_base, ctrl_val);
930 ctrl_val &= ~mask;
931 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
932 }
933
934 static inline void
935 p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
936 {
937 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
938 u64 val = P6_NOP_COUNTER;
939
940 if (cpuc->enabled)
941 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
942
943 (void)checking_wrmsrl(hwc->config_base + idx, val);
944 }
945
946 static inline void
947 intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
948 {
949 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
950 intel_pmu_disable_fixed(hwc, idx);
951 return;
952 }
953
954 x86_pmu_disable_counter(hwc, idx);
955 }
956
957 static inline void
958 amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
959 {
960 x86_pmu_disable_counter(hwc, idx);
961 }
962
963 static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
964
965 /*
966 * Set the next IRQ period, based on the hwc->period_left value.
967 * To be called with the counter disabled in hw:
968 */
969 static int
970 x86_perf_counter_set_period(struct perf_counter *counter,
971 struct hw_perf_counter *hwc, int idx)
972 {
973 s64 left = atomic64_read(&hwc->period_left);
974 s64 period = hwc->sample_period;
975 int err, ret = 0;
976
977 /*
978 * If we are way outside a reasoable range then just skip forward:
979 */
980 if (unlikely(left <= -period)) {
981 left = period;
982 atomic64_set(&hwc->period_left, left);
983 hwc->last_period = period;
984 ret = 1;
985 }
986
987 if (unlikely(left <= 0)) {
988 left += period;
989 atomic64_set(&hwc->period_left, left);
990 hwc->last_period = period;
991 ret = 1;
992 }
993 /*
994 * Quirk: certain CPUs dont like it if just 1 event is left:
995 */
996 if (unlikely(left < 2))
997 left = 2;
998
999 if (left > x86_pmu.max_period)
1000 left = x86_pmu.max_period;
1001
1002 per_cpu(prev_left[idx], smp_processor_id()) = left;
1003
1004 /*
1005 * The hw counter starts counting from this counter offset,
1006 * mark it to be able to extra future deltas:
1007 */
1008 atomic64_set(&hwc->prev_count, (u64)-left);
1009
1010 err = checking_wrmsrl(hwc->counter_base + idx,
1011 (u64)(-left) & x86_pmu.counter_mask);
1012
1013 perf_counter_update_userpage(counter);
1014
1015 return ret;
1016 }
1017
1018 static inline void
1019 intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
1020 {
1021 int idx = __idx - X86_PMC_IDX_FIXED;
1022 u64 ctrl_val, bits, mask;
1023 int err;
1024
1025 /*
1026 * Enable IRQ generation (0x8),
1027 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1028 * if requested:
1029 */
1030 bits = 0x8ULL;
1031 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1032 bits |= 0x2;
1033 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1034 bits |= 0x1;
1035 bits <<= (idx * 4);
1036 mask = 0xfULL << (idx * 4);
1037
1038 rdmsrl(hwc->config_base, ctrl_val);
1039 ctrl_val &= ~mask;
1040 ctrl_val |= bits;
1041 err = checking_wrmsrl(hwc->config_base, ctrl_val);
1042 }
1043
1044 static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1045 {
1046 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1047 u64 val;
1048
1049 val = hwc->config;
1050 if (cpuc->enabled)
1051 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1052
1053 (void)checking_wrmsrl(hwc->config_base + idx, val);
1054 }
1055
1056
1057 static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1058 {
1059 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1060 intel_pmu_enable_fixed(hwc, idx);
1061 return;
1062 }
1063
1064 x86_pmu_enable_counter(hwc, idx);
1065 }
1066
1067 static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1068 {
1069 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1070
1071 if (cpuc->enabled)
1072 x86_pmu_enable_counter(hwc, idx);
1073 }
1074
1075 static int
1076 fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
1077 {
1078 unsigned int event;
1079
1080 if (!x86_pmu.num_counters_fixed)
1081 return -1;
1082
1083 event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1084
1085 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1086 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1087 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1088 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1089 if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1090 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1091
1092 return -1;
1093 }
1094
1095 /*
1096 * Find a PMC slot for the freshly enabled / scheduled in counter:
1097 */
1098 static int x86_pmu_enable(struct perf_counter *counter)
1099 {
1100 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1101 struct hw_perf_counter *hwc = &counter->hw;
1102 int idx;
1103
1104 idx = fixed_mode_idx(counter, hwc);
1105 if (idx >= 0) {
1106 /*
1107 * Try to get the fixed counter, if that is already taken
1108 * then try to get a generic counter:
1109 */
1110 if (test_and_set_bit(idx, cpuc->used_mask))
1111 goto try_generic;
1112
1113 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1114 /*
1115 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
1116 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1117 */
1118 hwc->counter_base =
1119 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1120 hwc->idx = idx;
1121 } else {
1122 idx = hwc->idx;
1123 /* Try to get the previous generic counter again */
1124 if (test_and_set_bit(idx, cpuc->used_mask)) {
1125 try_generic:
1126 idx = find_first_zero_bit(cpuc->used_mask,
1127 x86_pmu.num_counters);
1128 if (idx == x86_pmu.num_counters)
1129 return -EAGAIN;
1130
1131 set_bit(idx, cpuc->used_mask);
1132 hwc->idx = idx;
1133 }
1134 hwc->config_base = x86_pmu.eventsel;
1135 hwc->counter_base = x86_pmu.perfctr;
1136 }
1137
1138 perf_counters_lapic_init();
1139
1140 x86_pmu.disable(hwc, idx);
1141
1142 cpuc->counters[idx] = counter;
1143 set_bit(idx, cpuc->active_mask);
1144
1145 x86_perf_counter_set_period(counter, hwc, idx);
1146 x86_pmu.enable(hwc, idx);
1147
1148 perf_counter_update_userpage(counter);
1149
1150 return 0;
1151 }
1152
1153 static void x86_pmu_unthrottle(struct perf_counter *counter)
1154 {
1155 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1156 struct hw_perf_counter *hwc = &counter->hw;
1157
1158 if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1159 cpuc->counters[hwc->idx] != counter))
1160 return;
1161
1162 x86_pmu.enable(hwc, hwc->idx);
1163 }
1164
1165 void perf_counter_print_debug(void)
1166 {
1167 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1168 struct cpu_hw_counters *cpuc;
1169 unsigned long flags;
1170 int cpu, idx;
1171
1172 if (!x86_pmu.num_counters)
1173 return;
1174
1175 local_irq_save(flags);
1176
1177 cpu = smp_processor_id();
1178 cpuc = &per_cpu(cpu_hw_counters, cpu);
1179
1180 if (x86_pmu.version >= 2) {
1181 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1182 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1183 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1184 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1185
1186 pr_info("\n");
1187 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1188 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1189 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1190 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1191 }
1192 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
1193
1194 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1195 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1196 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1197
1198 prev_left = per_cpu(prev_left[idx], cpu);
1199
1200 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
1201 cpu, idx, pmc_ctrl);
1202 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
1203 cpu, idx, pmc_count);
1204 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
1205 cpu, idx, prev_left);
1206 }
1207 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1208 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1209
1210 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1211 cpu, idx, pmc_count);
1212 }
1213 local_irq_restore(flags);
1214 }
1215
1216 static void x86_pmu_disable(struct perf_counter *counter)
1217 {
1218 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1219 struct hw_perf_counter *hwc = &counter->hw;
1220 int idx = hwc->idx;
1221
1222 /*
1223 * Must be done before we disable, otherwise the nmi handler
1224 * could reenable again:
1225 */
1226 clear_bit(idx, cpuc->active_mask);
1227 x86_pmu.disable(hwc, idx);
1228
1229 /*
1230 * Make sure the cleared pointer becomes visible before we
1231 * (potentially) free the counter:
1232 */
1233 barrier();
1234
1235 /*
1236 * Drain the remaining delta count out of a counter
1237 * that we are disabling:
1238 */
1239 x86_perf_counter_update(counter, hwc, idx);
1240 cpuc->counters[idx] = NULL;
1241 clear_bit(idx, cpuc->used_mask);
1242
1243 perf_counter_update_userpage(counter);
1244 }
1245
1246 /*
1247 * Save and restart an expired counter. Called by NMI contexts,
1248 * so it has to be careful about preempting normal counter ops:
1249 */
1250 static int intel_pmu_save_and_restart(struct perf_counter *counter)
1251 {
1252 struct hw_perf_counter *hwc = &counter->hw;
1253 int idx = hwc->idx;
1254 int ret;
1255
1256 x86_perf_counter_update(counter, hwc, idx);
1257 ret = x86_perf_counter_set_period(counter, hwc, idx);
1258
1259 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
1260 intel_pmu_enable_counter(hwc, idx);
1261
1262 return ret;
1263 }
1264
1265 static void intel_pmu_reset(void)
1266 {
1267 unsigned long flags;
1268 int idx;
1269
1270 if (!x86_pmu.num_counters)
1271 return;
1272
1273 local_irq_save(flags);
1274
1275 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1276
1277 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1278 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1279 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
1280 }
1281 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1282 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1283 }
1284
1285 local_irq_restore(flags);
1286 }
1287
1288 static int p6_pmu_handle_irq(struct pt_regs *regs)
1289 {
1290 struct perf_sample_data data;
1291 struct cpu_hw_counters *cpuc;
1292 struct perf_counter *counter;
1293 struct hw_perf_counter *hwc;
1294 int idx, handled = 0;
1295 u64 val;
1296
1297 data.regs = regs;
1298 data.addr = 0;
1299
1300 cpuc = &__get_cpu_var(cpu_hw_counters);
1301
1302 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1303 if (!test_bit(idx, cpuc->active_mask))
1304 continue;
1305
1306 counter = cpuc->counters[idx];
1307 hwc = &counter->hw;
1308
1309 val = x86_perf_counter_update(counter, hwc, idx);
1310 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1311 continue;
1312
1313 /*
1314 * counter overflow
1315 */
1316 handled = 1;
1317 data.period = counter->hw.last_period;
1318
1319 if (!x86_perf_counter_set_period(counter, hwc, idx))
1320 continue;
1321
1322 if (perf_counter_overflow(counter, 1, &data))
1323 p6_pmu_disable_counter(hwc, idx);
1324 }
1325
1326 if (handled)
1327 inc_irq_stat(apic_perf_irqs);
1328
1329 return handled;
1330 }
1331
1332 /*
1333 * This handler is triggered by the local APIC, so the APIC IRQ handling
1334 * rules apply:
1335 */
1336 static int intel_pmu_handle_irq(struct pt_regs *regs)
1337 {
1338 struct perf_sample_data data;
1339 struct cpu_hw_counters *cpuc;
1340 int bit, loops;
1341 u64 ack, status;
1342
1343 data.regs = regs;
1344 data.addr = 0;
1345
1346 cpuc = &__get_cpu_var(cpu_hw_counters);
1347
1348 perf_disable();
1349 status = intel_pmu_get_status();
1350 if (!status) {
1351 perf_enable();
1352 return 0;
1353 }
1354
1355 loops = 0;
1356 again:
1357 if (++loops > 100) {
1358 WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
1359 perf_counter_print_debug();
1360 intel_pmu_reset();
1361 perf_enable();
1362 return 1;
1363 }
1364
1365 inc_irq_stat(apic_perf_irqs);
1366 ack = status;
1367 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1368 struct perf_counter *counter = cpuc->counters[bit];
1369
1370 clear_bit(bit, (unsigned long *) &status);
1371 if (!test_bit(bit, cpuc->active_mask))
1372 continue;
1373
1374 if (!intel_pmu_save_and_restart(counter))
1375 continue;
1376
1377 data.period = counter->hw.last_period;
1378
1379 if (perf_counter_overflow(counter, 1, &data))
1380 intel_pmu_disable_counter(&counter->hw, bit);
1381 }
1382
1383 intel_pmu_ack_status(ack);
1384
1385 /*
1386 * Repeat if there is more work to be done:
1387 */
1388 status = intel_pmu_get_status();
1389 if (status)
1390 goto again;
1391
1392 perf_enable();
1393
1394 return 1;
1395 }
1396
1397 static int amd_pmu_handle_irq(struct pt_regs *regs)
1398 {
1399 struct perf_sample_data data;
1400 struct cpu_hw_counters *cpuc;
1401 struct perf_counter *counter;
1402 struct hw_perf_counter *hwc;
1403 int idx, handled = 0;
1404 u64 val;
1405
1406 data.regs = regs;
1407 data.addr = 0;
1408
1409 cpuc = &__get_cpu_var(cpu_hw_counters);
1410
1411 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1412 if (!test_bit(idx, cpuc->active_mask))
1413 continue;
1414
1415 counter = cpuc->counters[idx];
1416 hwc = &counter->hw;
1417
1418 val = x86_perf_counter_update(counter, hwc, idx);
1419 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1420 continue;
1421
1422 /*
1423 * counter overflow
1424 */
1425 handled = 1;
1426 data.period = counter->hw.last_period;
1427
1428 if (!x86_perf_counter_set_period(counter, hwc, idx))
1429 continue;
1430
1431 if (perf_counter_overflow(counter, 1, &data))
1432 amd_pmu_disable_counter(hwc, idx);
1433 }
1434
1435 if (handled)
1436 inc_irq_stat(apic_perf_irqs);
1437
1438 return handled;
1439 }
1440
1441 void smp_perf_pending_interrupt(struct pt_regs *regs)
1442 {
1443 irq_enter();
1444 ack_APIC_irq();
1445 inc_irq_stat(apic_pending_irqs);
1446 perf_counter_do_pending();
1447 irq_exit();
1448 }
1449
1450 void set_perf_counter_pending(void)
1451 {
1452 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1453 }
1454
1455 void perf_counters_lapic_init(void)
1456 {
1457 if (!x86_pmu_initialized())
1458 return;
1459
1460 /*
1461 * Always use NMI for PMU
1462 */
1463 apic_write(APIC_LVTPC, APIC_DM_NMI);
1464 }
1465
1466 static int __kprobes
1467 perf_counter_nmi_handler(struct notifier_block *self,
1468 unsigned long cmd, void *__args)
1469 {
1470 struct die_args *args = __args;
1471 struct pt_regs *regs;
1472
1473 if (!atomic_read(&active_counters))
1474 return NOTIFY_DONE;
1475
1476 switch (cmd) {
1477 case DIE_NMI:
1478 case DIE_NMI_IPI:
1479 break;
1480
1481 default:
1482 return NOTIFY_DONE;
1483 }
1484
1485 regs = args->regs;
1486
1487 apic_write(APIC_LVTPC, APIC_DM_NMI);
1488 /*
1489 * Can't rely on the handled return value to say it was our NMI, two
1490 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
1491 *
1492 * If the first NMI handles both, the latter will be empty and daze
1493 * the CPU.
1494 */
1495 x86_pmu.handle_irq(regs);
1496
1497 return NOTIFY_STOP;
1498 }
1499
1500 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
1501 .notifier_call = perf_counter_nmi_handler,
1502 .next = NULL,
1503 .priority = 1
1504 };
1505
1506 static struct x86_pmu p6_pmu = {
1507 .name = "p6",
1508 .handle_irq = p6_pmu_handle_irq,
1509 .disable_all = p6_pmu_disable_all,
1510 .enable_all = p6_pmu_enable_all,
1511 .enable = p6_pmu_enable_counter,
1512 .disable = p6_pmu_disable_counter,
1513 .eventsel = MSR_P6_EVNTSEL0,
1514 .perfctr = MSR_P6_PERFCTR0,
1515 .event_map = p6_pmu_event_map,
1516 .raw_event = p6_pmu_raw_event,
1517 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1518 .max_period = (1ULL << 31) - 1,
1519 .version = 0,
1520 .num_counters = 2,
1521 /*
1522 * Counters have 40 bits implemented. However they are designed such
1523 * that bits [32-39] are sign extensions of bit 31. As such the
1524 * effective width of a counter for P6-like PMU is 32 bits only.
1525 *
1526 * See IA-32 Intel Architecture Software developer manual Vol 3B
1527 */
1528 .counter_bits = 32,
1529 .counter_mask = (1ULL << 32) - 1,
1530 };
1531
1532 static struct x86_pmu intel_pmu = {
1533 .name = "Intel",
1534 .handle_irq = intel_pmu_handle_irq,
1535 .disable_all = intel_pmu_disable_all,
1536 .enable_all = intel_pmu_enable_all,
1537 .enable = intel_pmu_enable_counter,
1538 .disable = intel_pmu_disable_counter,
1539 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
1540 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
1541 .event_map = intel_pmu_event_map,
1542 .raw_event = intel_pmu_raw_event,
1543 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1544 /*
1545 * Intel PMCs cannot be accessed sanely above 32 bit width,
1546 * so we install an artificial 1<<31 period regardless of
1547 * the generic counter period:
1548 */
1549 .max_period = (1ULL << 31) - 1,
1550 };
1551
1552 static struct x86_pmu amd_pmu = {
1553 .name = "AMD",
1554 .handle_irq = amd_pmu_handle_irq,
1555 .disable_all = amd_pmu_disable_all,
1556 .enable_all = amd_pmu_enable_all,
1557 .enable = amd_pmu_enable_counter,
1558 .disable = amd_pmu_disable_counter,
1559 .eventsel = MSR_K7_EVNTSEL0,
1560 .perfctr = MSR_K7_PERFCTR0,
1561 .event_map = amd_pmu_event_map,
1562 .raw_event = amd_pmu_raw_event,
1563 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
1564 .num_counters = 4,
1565 .counter_bits = 48,
1566 .counter_mask = (1ULL << 48) - 1,
1567 /* use highest bit to detect overflow */
1568 .max_period = (1ULL << 47) - 1,
1569 };
1570
1571 static int p6_pmu_init(void)
1572 {
1573 switch (boot_cpu_data.x86_model) {
1574 case 1:
1575 case 3: /* Pentium Pro */
1576 case 5:
1577 case 6: /* Pentium II */
1578 case 7:
1579 case 8:
1580 case 11: /* Pentium III */
1581 break;
1582 case 9:
1583 case 13:
1584 /* Pentium M */
1585 break;
1586 default:
1587 pr_cont("unsupported p6 CPU model %d ",
1588 boot_cpu_data.x86_model);
1589 return -ENODEV;
1590 }
1591
1592 if (!cpu_has_apic) {
1593 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1594 return -ENODEV;
1595 }
1596
1597 x86_pmu = p6_pmu;
1598
1599 return 0;
1600 }
1601
1602 static int intel_pmu_init(void)
1603 {
1604 union cpuid10_edx edx;
1605 union cpuid10_eax eax;
1606 unsigned int unused;
1607 unsigned int ebx;
1608 int version;
1609
1610 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1611 /* check for P6 processor family */
1612 if (boot_cpu_data.x86 == 6) {
1613 return p6_pmu_init();
1614 } else {
1615 return -ENODEV;
1616 }
1617 }
1618
1619 /*
1620 * Check whether the Architectural PerfMon supports
1621 * Branch Misses Retired Event or not.
1622 */
1623 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1624 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1625 return -ENODEV;
1626
1627 version = eax.split.version_id;
1628 if (version < 2)
1629 return -ENODEV;
1630
1631 x86_pmu = intel_pmu;
1632 x86_pmu.version = version;
1633 x86_pmu.num_counters = eax.split.num_counters;
1634 x86_pmu.counter_bits = eax.split.bit_width;
1635 x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
1636
1637 /*
1638 * Quirk: v2 perfmon does not report fixed-purpose counters, so
1639 * assume at least 3 counters:
1640 */
1641 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
1642
1643 /*
1644 * Install the hw-cache-events table:
1645 */
1646 switch (boot_cpu_data.x86_model) {
1647 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
1648 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
1649 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
1650 case 29: /* six-core 45 nm xeon "Dunnington" */
1651 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
1652 sizeof(hw_cache_event_ids));
1653
1654 pr_cont("Core2 events, ");
1655 break;
1656 default:
1657 case 26:
1658 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
1659 sizeof(hw_cache_event_ids));
1660
1661 pr_cont("Nehalem/Corei7 events, ");
1662 break;
1663 case 28:
1664 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
1665 sizeof(hw_cache_event_ids));
1666
1667 pr_cont("Atom events, ");
1668 break;
1669 }
1670 return 0;
1671 }
1672
1673 static int amd_pmu_init(void)
1674 {
1675 /* Performance-monitoring supported from K7 and later: */
1676 if (boot_cpu_data.x86 < 6)
1677 return -ENODEV;
1678
1679 x86_pmu = amd_pmu;
1680
1681 /* Events are common for all AMDs */
1682 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
1683 sizeof(hw_cache_event_ids));
1684
1685 return 0;
1686 }
1687
1688 void __init init_hw_perf_counters(void)
1689 {
1690 int err;
1691
1692 pr_info("Performance Counters: ");
1693
1694 switch (boot_cpu_data.x86_vendor) {
1695 case X86_VENDOR_INTEL:
1696 err = intel_pmu_init();
1697 break;
1698 case X86_VENDOR_AMD:
1699 err = amd_pmu_init();
1700 break;
1701 default:
1702 return;
1703 }
1704 if (err != 0) {
1705 pr_cont("no PMU driver, software counters only.\n");
1706 return;
1707 }
1708
1709 pr_cont("%s PMU driver.\n", x86_pmu.name);
1710
1711 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1712 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
1713 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1714 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1715 }
1716 perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
1717 perf_max_counters = x86_pmu.num_counters;
1718
1719 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1720 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
1721 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1722 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1723 }
1724
1725 perf_counter_mask |=
1726 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1727 x86_pmu.intel_ctrl = perf_counter_mask;
1728
1729 perf_counters_lapic_init();
1730 register_die_notifier(&perf_counter_nmi_notifier);
1731
1732 pr_info("... version: %d\n", x86_pmu.version);
1733 pr_info("... bit width: %d\n", x86_pmu.counter_bits);
1734 pr_info("... generic counters: %d\n", x86_pmu.num_counters);
1735 pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);
1736 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1737 pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed);
1738 pr_info("... counter mask: %016Lx\n", perf_counter_mask);
1739 }
1740
1741 static inline void x86_pmu_read(struct perf_counter *counter)
1742 {
1743 x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
1744 }
1745
1746 static const struct pmu pmu = {
1747 .enable = x86_pmu_enable,
1748 .disable = x86_pmu_disable,
1749 .read = x86_pmu_read,
1750 .unthrottle = x86_pmu_unthrottle,
1751 };
1752
1753 const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
1754 {
1755 int err;
1756
1757 err = __hw_perf_counter_init(counter);
1758 if (err)
1759 return ERR_PTR(err);
1760
1761 return &pmu;
1762 }
1763
1764 /*
1765 * callchain support
1766 */
1767
1768 static inline
1769 void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1770 {
1771 if (entry->nr < PERF_MAX_STACK_DEPTH)
1772 entry->ip[entry->nr++] = ip;
1773 }
1774
1775 static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1776 static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1777 static DEFINE_PER_CPU(int, in_nmi_frame);
1778
1779
1780 static void
1781 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1782 {
1783 /* Ignore warnings */
1784 }
1785
1786 static void backtrace_warning(void *data, char *msg)
1787 {
1788 /* Ignore warnings */
1789 }
1790
1791 static int backtrace_stack(void *data, char *name)
1792 {
1793 per_cpu(in_nmi_frame, smp_processor_id()) =
1794 x86_is_stack_id(NMI_STACK, name);
1795
1796 return 0;
1797 }
1798
1799 static void backtrace_address(void *data, unsigned long addr, int reliable)
1800 {
1801 struct perf_callchain_entry *entry = data;
1802
1803 if (per_cpu(in_nmi_frame, smp_processor_id()))
1804 return;
1805
1806 if (reliable)
1807 callchain_store(entry, addr);
1808 }
1809
1810 static const struct stacktrace_ops backtrace_ops = {
1811 .warning = backtrace_warning,
1812 .warning_symbol = backtrace_warning_symbol,
1813 .stack = backtrace_stack,
1814 .address = backtrace_address,
1815 };
1816
1817 #include "../dumpstack.h"
1818
1819 static void
1820 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1821 {
1822 callchain_store(entry, PERF_CONTEXT_KERNEL);
1823 callchain_store(entry, regs->ip);
1824
1825 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1826 }
1827
1828 /*
1829 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1830 */
1831 static unsigned long
1832 copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1833 {
1834 unsigned long offset, addr = (unsigned long)from;
1835 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1836 unsigned long size, len = 0;
1837 struct page *page;
1838 void *map;
1839 int ret;
1840
1841 do {
1842 ret = __get_user_pages_fast(addr, 1, 0, &page);
1843 if (!ret)
1844 break;
1845
1846 offset = addr & (PAGE_SIZE - 1);
1847 size = min(PAGE_SIZE - offset, n - len);
1848
1849 map = kmap_atomic(page, type);
1850 memcpy(to, map+offset, size);
1851 kunmap_atomic(map, type);
1852 put_page(page);
1853
1854 len += size;
1855 to += size;
1856 addr += size;
1857
1858 } while (len < n);
1859
1860 return len;
1861 }
1862
1863 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1864 {
1865 unsigned long bytes;
1866
1867 bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
1868
1869 return bytes == sizeof(*frame);
1870 }
1871
1872 static void
1873 perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1874 {
1875 struct stack_frame frame;
1876 const void __user *fp;
1877
1878 if (!user_mode(regs))
1879 regs = task_pt_regs(current);
1880
1881 fp = (void __user *)regs->bp;
1882
1883 callchain_store(entry, PERF_CONTEXT_USER);
1884 callchain_store(entry, regs->ip);
1885
1886 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1887 frame.next_frame = NULL;
1888 frame.return_address = 0;
1889
1890 if (!copy_stack_frame(fp, &frame))
1891 break;
1892
1893 if ((unsigned long)fp < regs->sp)
1894 break;
1895
1896 callchain_store(entry, frame.return_address);
1897 fp = frame.next_frame;
1898 }
1899 }
1900
1901 static void
1902 perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1903 {
1904 int is_user;
1905
1906 if (!regs)
1907 return;
1908
1909 is_user = user_mode(regs);
1910
1911 if (!current || current->pid == 0)
1912 return;
1913
1914 if (is_user && current->state != TASK_RUNNING)
1915 return;
1916
1917 if (!is_user)
1918 perf_callchain_kernel(regs, entry);
1919
1920 if (current->mm)
1921 perf_callchain_user(regs, entry);
1922 }
1923
1924 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1925 {
1926 struct perf_callchain_entry *entry;
1927
1928 if (in_nmi())
1929 entry = &__get_cpu_var(nmi_entry);
1930 else
1931 entry = &__get_cpu_var(irq_entry);
1932
1933 entry->nr = 0;
1934
1935 perf_do_callchain(regs, entry);
1936
1937 return entry;
1938 }
This page took 0.084817 seconds and 5 git commands to generate.