perf/x86: Implement 64-bit counter support for IBS
[deliverable/linux.git] / arch / x86 / kernel / cpu / perf_event_amd_ibs.c
1 /*
2 * Performance events - AMD IBS
3 *
4 * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
5 *
6 * For licencing details see kernel-base/COPYING
7 */
8
9 #include <linux/perf_event.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12
13 #include <asm/apic.h>
14
15 static u32 ibs_caps;
16
17 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
18
19 #include <linux/kprobes.h>
20 #include <linux/hardirq.h>
21
22 #include <asm/nmi.h>
23
24 #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
25 #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
26
27 enum ibs_states {
28 IBS_ENABLED = 0,
29 IBS_STARTED = 1,
30 IBS_STOPPING = 2,
31
32 IBS_MAX_STATES,
33 };
34
35 struct cpu_perf_ibs {
36 struct perf_event *event;
37 unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)];
38 };
39
40 struct perf_ibs {
41 struct pmu pmu;
42 unsigned int msr;
43 u64 config_mask;
44 u64 cnt_mask;
45 u64 enable_mask;
46 u64 valid_mask;
47 u64 max_period;
48 unsigned long offset_mask[1];
49 int offset_max;
50 struct cpu_perf_ibs __percpu *pcpu;
51 u64 (*get_count)(u64 config);
52 };
53
54 struct perf_ibs_data {
55 u32 size;
56 union {
57 u32 data[0]; /* data buffer starts here */
58 u32 caps;
59 };
60 u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX];
61 };
62
63 static int
64 perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *count)
65 {
66 s64 left = local64_read(&hwc->period_left);
67 s64 period = hwc->sample_period;
68 int overflow = 0;
69
70 /*
71 * If we are way outside a reasonable range then just skip forward:
72 */
73 if (unlikely(left <= -period)) {
74 left = period;
75 local64_set(&hwc->period_left, left);
76 hwc->last_period = period;
77 overflow = 1;
78 }
79
80 if (unlikely(left <= 0)) {
81 left += period;
82 local64_set(&hwc->period_left, left);
83 hwc->last_period = period;
84 overflow = 1;
85 }
86
87 if (unlikely(left < min))
88 left = min;
89
90 if (left > max)
91 left = max;
92
93 *count = (u64)left;
94
95 return overflow;
96 }
97
98 static int
99 perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
100 {
101 struct hw_perf_event *hwc = &event->hw;
102 int shift = 64 - width;
103 u64 prev_raw_count;
104 u64 delta;
105
106 /*
107 * Careful: an NMI might modify the previous event value.
108 *
109 * Our tactic to handle this is to first atomically read and
110 * exchange a new raw count - then add that new-prev delta
111 * count to the generic event atomically:
112 */
113 prev_raw_count = local64_read(&hwc->prev_count);
114 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
115 new_raw_count) != prev_raw_count)
116 return 0;
117
118 /*
119 * Now we have the new raw value and have updated the prev
120 * timestamp already. We can now calculate the elapsed delta
121 * (event-)time and add that to the generic event.
122 *
123 * Careful, not all hw sign-extends above the physical width
124 * of the count.
125 */
126 delta = (new_raw_count << shift) - (prev_raw_count << shift);
127 delta >>= shift;
128
129 local64_add(delta, &event->count);
130 local64_sub(delta, &hwc->period_left);
131
132 return 1;
133 }
134
135 static struct perf_ibs perf_ibs_fetch;
136 static struct perf_ibs perf_ibs_op;
137
138 static struct perf_ibs *get_ibs_pmu(int type)
139 {
140 if (perf_ibs_fetch.pmu.type == type)
141 return &perf_ibs_fetch;
142 if (perf_ibs_op.pmu.type == type)
143 return &perf_ibs_op;
144 return NULL;
145 }
146
147 static int perf_ibs_init(struct perf_event *event)
148 {
149 struct hw_perf_event *hwc = &event->hw;
150 struct perf_ibs *perf_ibs;
151 u64 max_cnt, config;
152
153 perf_ibs = get_ibs_pmu(event->attr.type);
154 if (!perf_ibs)
155 return -ENOENT;
156
157 config = event->attr.config;
158 if (config & ~perf_ibs->config_mask)
159 return -EINVAL;
160
161 if (hwc->sample_period) {
162 if (config & perf_ibs->cnt_mask)
163 /* raw max_cnt may not be set */
164 return -EINVAL;
165 if (hwc->sample_period & 0x0f)
166 /* lower 4 bits can not be set in ibs max cnt */
167 return -EINVAL;
168 } else {
169 max_cnt = config & perf_ibs->cnt_mask;
170 config &= ~perf_ibs->cnt_mask;
171 event->attr.sample_period = max_cnt << 4;
172 hwc->sample_period = event->attr.sample_period;
173 }
174
175 if (!hwc->sample_period)
176 return -EINVAL;
177
178 hwc->config_base = perf_ibs->msr;
179 hwc->config = config;
180
181 return 0;
182 }
183
184 static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
185 struct hw_perf_event *hwc, u64 *period)
186 {
187 int ret;
188
189 /* ignore lower 4 bits in min count: */
190 ret = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
191 local64_set(&hwc->prev_count, 0);
192
193 return ret;
194 }
195
196 static u64 get_ibs_fetch_count(u64 config)
197 {
198 return (config & IBS_FETCH_CNT) >> 12;
199 }
200
201 static u64 get_ibs_op_count(u64 config)
202 {
203 return (config & IBS_OP_CUR_CNT) >> 32;
204 }
205
206 static void
207 perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
208 u64 config)
209 {
210 u64 count = perf_ibs->get_count(config);
211
212 while (!perf_event_try_update(event, count, 20)) {
213 rdmsrl(event->hw.config_base, config);
214 count = perf_ibs->get_count(config);
215 }
216 }
217
218 /* Note: The enable mask must be encoded in the config argument. */
219 static inline void perf_ibs_enable_event(struct hw_perf_event *hwc, u64 config)
220 {
221 wrmsrl(hwc->config_base, hwc->config | config);
222 }
223
224 /*
225 * We cannot restore the ibs pmu state, so we always needs to update
226 * the event while stopping it and then reset the state when starting
227 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
228 * perf_ibs_start()/perf_ibs_stop() and instead always do it.
229 */
230 static void perf_ibs_start(struct perf_event *event, int flags)
231 {
232 struct hw_perf_event *hwc = &event->hw;
233 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
234 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
235 u64 config;
236
237 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
238 return;
239
240 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
241 hwc->state = 0;
242
243 perf_ibs_set_period(perf_ibs, hwc, &config);
244 config = (config >> 4) | perf_ibs->enable_mask;
245 set_bit(IBS_STARTED, pcpu->state);
246 perf_ibs_enable_event(hwc, config);
247
248 perf_event_update_userpage(event);
249 }
250
251 static void perf_ibs_stop(struct perf_event *event, int flags)
252 {
253 struct hw_perf_event *hwc = &event->hw;
254 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
255 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
256 u64 val;
257 int stopping;
258
259 stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
260
261 if (!stopping && (hwc->state & PERF_HES_UPTODATE))
262 return;
263
264 rdmsrl(hwc->config_base, val);
265
266 if (stopping) {
267 set_bit(IBS_STOPPING, pcpu->state);
268 val &= ~perf_ibs->enable_mask;
269 wrmsrl(hwc->config_base, val);
270 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
271 hwc->state |= PERF_HES_STOPPED;
272 }
273
274 if (hwc->state & PERF_HES_UPTODATE)
275 return;
276
277 perf_ibs_event_update(perf_ibs, event, val);
278 hwc->state |= PERF_HES_UPTODATE;
279 }
280
281 static int perf_ibs_add(struct perf_event *event, int flags)
282 {
283 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
284 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
285
286 if (test_and_set_bit(IBS_ENABLED, pcpu->state))
287 return -ENOSPC;
288
289 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
290
291 pcpu->event = event;
292
293 if (flags & PERF_EF_START)
294 perf_ibs_start(event, PERF_EF_RELOAD);
295
296 return 0;
297 }
298
299 static void perf_ibs_del(struct perf_event *event, int flags)
300 {
301 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
302 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
303
304 if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
305 return;
306
307 perf_ibs_stop(event, PERF_EF_UPDATE);
308
309 pcpu->event = NULL;
310
311 perf_event_update_userpage(event);
312 }
313
314 static void perf_ibs_read(struct perf_event *event) { }
315
316 static struct perf_ibs perf_ibs_fetch = {
317 .pmu = {
318 .task_ctx_nr = perf_invalid_context,
319
320 .event_init = perf_ibs_init,
321 .add = perf_ibs_add,
322 .del = perf_ibs_del,
323 .start = perf_ibs_start,
324 .stop = perf_ibs_stop,
325 .read = perf_ibs_read,
326 },
327 .msr = MSR_AMD64_IBSFETCHCTL,
328 .config_mask = IBS_FETCH_CONFIG_MASK,
329 .cnt_mask = IBS_FETCH_MAX_CNT,
330 .enable_mask = IBS_FETCH_ENABLE,
331 .valid_mask = IBS_FETCH_VAL,
332 .max_period = IBS_FETCH_MAX_CNT << 4,
333 .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
334 .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
335
336 .get_count = get_ibs_fetch_count,
337 };
338
339 static struct perf_ibs perf_ibs_op = {
340 .pmu = {
341 .task_ctx_nr = perf_invalid_context,
342
343 .event_init = perf_ibs_init,
344 .add = perf_ibs_add,
345 .del = perf_ibs_del,
346 .start = perf_ibs_start,
347 .stop = perf_ibs_stop,
348 .read = perf_ibs_read,
349 },
350 .msr = MSR_AMD64_IBSOPCTL,
351 .config_mask = IBS_OP_CONFIG_MASK,
352 .cnt_mask = IBS_OP_MAX_CNT,
353 .enable_mask = IBS_OP_ENABLE,
354 .valid_mask = IBS_OP_VAL,
355 .max_period = IBS_OP_MAX_CNT << 4,
356 .offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
357 .offset_max = MSR_AMD64_IBSOP_REG_COUNT,
358
359 .get_count = get_ibs_op_count,
360 };
361
362 static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
363 {
364 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
365 struct perf_event *event = pcpu->event;
366 struct hw_perf_event *hwc = &event->hw;
367 struct perf_sample_data data;
368 struct perf_raw_record raw;
369 struct pt_regs regs;
370 struct perf_ibs_data ibs_data;
371 int offset, size, overflow, reenable;
372 unsigned int msr;
373 u64 *buf, config;
374
375 if (!test_bit(IBS_STARTED, pcpu->state)) {
376 /* Catch spurious interrupts after stopping IBS: */
377 if (!test_and_clear_bit(IBS_STOPPING, pcpu->state))
378 return 0;
379 rdmsrl(perf_ibs->msr, *ibs_data.regs);
380 return (*ibs_data.regs & perf_ibs->valid_mask) ? 1 : 0;
381 }
382
383 msr = hwc->config_base;
384 buf = ibs_data.regs;
385 rdmsrl(msr, *buf);
386 if (!(*buf++ & perf_ibs->valid_mask))
387 return 0;
388
389 perf_sample_data_init(&data, 0);
390 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
391 ibs_data.caps = ibs_caps;
392 size = 1;
393 offset = 1;
394 do {
395 rdmsrl(msr + offset, *buf++);
396 size++;
397 offset = find_next_bit(perf_ibs->offset_mask,
398 perf_ibs->offset_max,
399 offset + 1);
400 } while (offset < perf_ibs->offset_max);
401 raw.size = sizeof(u32) + sizeof(u64) * size;
402 raw.data = ibs_data.data;
403 data.raw = &raw;
404 }
405
406 regs = *iregs; /* XXX: update ip from ibs sample */
407
408 /*
409 * Emulate IbsOpCurCnt in MSRC001_1033 (IbsOpCtl), not
410 * supported in all cpus. As this triggered an interrupt, we
411 * set the current count to the max count.
412 */
413 config = ibs_data.regs[0];
414 if (perf_ibs == &perf_ibs_op && !(ibs_caps & IBS_CAPS_RDWROPCNT)) {
415 config &= ~IBS_OP_CUR_CNT;
416 config |= (config & IBS_OP_MAX_CNT) << 36;
417 }
418
419 perf_ibs_event_update(perf_ibs, event, config);
420
421 overflow = perf_ibs_set_period(perf_ibs, hwc, &config);
422 reenable = !(overflow && perf_event_overflow(event, &data, &regs));
423 config = (config >> 4) | (reenable ? perf_ibs->enable_mask : 0);
424 perf_ibs_enable_event(hwc, config);
425
426 perf_event_update_userpage(event);
427
428 return 1;
429 }
430
431 static int __kprobes
432 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
433 {
434 int handled = 0;
435
436 handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
437 handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
438
439 if (handled)
440 inc_irq_stat(apic_perf_irqs);
441
442 return handled;
443 }
444
445 static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
446 {
447 struct cpu_perf_ibs __percpu *pcpu;
448 int ret;
449
450 pcpu = alloc_percpu(struct cpu_perf_ibs);
451 if (!pcpu)
452 return -ENOMEM;
453
454 perf_ibs->pcpu = pcpu;
455
456 ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
457 if (ret) {
458 perf_ibs->pcpu = NULL;
459 free_percpu(pcpu);
460 }
461
462 return ret;
463 }
464
465 static __init int perf_event_ibs_init(void)
466 {
467 if (!ibs_caps)
468 return -ENODEV; /* ibs not supported by the cpu */
469
470 perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
471 perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
472 register_nmi_handler(NMI_LOCAL, &perf_ibs_nmi_handler, 0, "perf_ibs");
473 printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
474
475 return 0;
476 }
477
478 #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
479
480 static __init int perf_event_ibs_init(void) { return 0; }
481
482 #endif
483
484 /* IBS - apic initialization, for perf and oprofile */
485
486 static __init u32 __get_ibs_caps(void)
487 {
488 u32 caps;
489 unsigned int max_level;
490
491 if (!boot_cpu_has(X86_FEATURE_IBS))
492 return 0;
493
494 /* check IBS cpuid feature flags */
495 max_level = cpuid_eax(0x80000000);
496 if (max_level < IBS_CPUID_FEATURES)
497 return IBS_CAPS_DEFAULT;
498
499 caps = cpuid_eax(IBS_CPUID_FEATURES);
500 if (!(caps & IBS_CAPS_AVAIL))
501 /* cpuid flags not valid */
502 return IBS_CAPS_DEFAULT;
503
504 return caps;
505 }
506
507 u32 get_ibs_caps(void)
508 {
509 return ibs_caps;
510 }
511
512 EXPORT_SYMBOL(get_ibs_caps);
513
514 static inline int get_eilvt(int offset)
515 {
516 return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
517 }
518
519 static inline int put_eilvt(int offset)
520 {
521 return !setup_APIC_eilvt(offset, 0, 0, 1);
522 }
523
524 /*
525 * Check and reserve APIC extended interrupt LVT offset for IBS if available.
526 */
527 static inline int ibs_eilvt_valid(void)
528 {
529 int offset;
530 u64 val;
531 int valid = 0;
532
533 preempt_disable();
534
535 rdmsrl(MSR_AMD64_IBSCTL, val);
536 offset = val & IBSCTL_LVT_OFFSET_MASK;
537
538 if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
539 pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
540 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
541 goto out;
542 }
543
544 if (!get_eilvt(offset)) {
545 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
546 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
547 goto out;
548 }
549
550 valid = 1;
551 out:
552 preempt_enable();
553
554 return valid;
555 }
556
557 static int setup_ibs_ctl(int ibs_eilvt_off)
558 {
559 struct pci_dev *cpu_cfg;
560 int nodes;
561 u32 value = 0;
562
563 nodes = 0;
564 cpu_cfg = NULL;
565 do {
566 cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
567 PCI_DEVICE_ID_AMD_10H_NB_MISC,
568 cpu_cfg);
569 if (!cpu_cfg)
570 break;
571 ++nodes;
572 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
573 | IBSCTL_LVT_OFFSET_VALID);
574 pci_read_config_dword(cpu_cfg, IBSCTL, &value);
575 if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
576 pci_dev_put(cpu_cfg);
577 printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
578 "IBSCTL = 0x%08x\n", value);
579 return -EINVAL;
580 }
581 } while (1);
582
583 if (!nodes) {
584 printk(KERN_DEBUG "No CPU node configured for IBS\n");
585 return -ENODEV;
586 }
587
588 return 0;
589 }
590
591 /*
592 * This runs only on the current cpu. We try to find an LVT offset and
593 * setup the local APIC. For this we must disable preemption. On
594 * success we initialize all nodes with this offset. This updates then
595 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
596 * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
597 * is using the new offset.
598 */
599 static int force_ibs_eilvt_setup(void)
600 {
601 int offset;
602 int ret;
603
604 preempt_disable();
605 /* find the next free available EILVT entry, skip offset 0 */
606 for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
607 if (get_eilvt(offset))
608 break;
609 }
610 preempt_enable();
611
612 if (offset == APIC_EILVT_NR_MAX) {
613 printk(KERN_DEBUG "No EILVT entry available\n");
614 return -EBUSY;
615 }
616
617 ret = setup_ibs_ctl(offset);
618 if (ret)
619 goto out;
620
621 if (!ibs_eilvt_valid()) {
622 ret = -EFAULT;
623 goto out;
624 }
625
626 pr_info("IBS: LVT offset %d assigned\n", offset);
627
628 return 0;
629 out:
630 preempt_disable();
631 put_eilvt(offset);
632 preempt_enable();
633 return ret;
634 }
635
636 static inline int get_ibs_lvt_offset(void)
637 {
638 u64 val;
639
640 rdmsrl(MSR_AMD64_IBSCTL, val);
641 if (!(val & IBSCTL_LVT_OFFSET_VALID))
642 return -EINVAL;
643
644 return val & IBSCTL_LVT_OFFSET_MASK;
645 }
646
647 static void setup_APIC_ibs(void *dummy)
648 {
649 int offset;
650
651 offset = get_ibs_lvt_offset();
652 if (offset < 0)
653 goto failed;
654
655 if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
656 return;
657 failed:
658 pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
659 smp_processor_id());
660 }
661
662 static void clear_APIC_ibs(void *dummy)
663 {
664 int offset;
665
666 offset = get_ibs_lvt_offset();
667 if (offset >= 0)
668 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
669 }
670
671 static int __cpuinit
672 perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
673 {
674 switch (action & ~CPU_TASKS_FROZEN) {
675 case CPU_STARTING:
676 setup_APIC_ibs(NULL);
677 break;
678 case CPU_DYING:
679 clear_APIC_ibs(NULL);
680 break;
681 default:
682 break;
683 }
684
685 return NOTIFY_OK;
686 }
687
688 static __init int amd_ibs_init(void)
689 {
690 u32 caps;
691 int ret = -EINVAL;
692
693 caps = __get_ibs_caps();
694 if (!caps)
695 return -ENODEV; /* ibs not supported by the cpu */
696
697 /*
698 * Force LVT offset assignment for family 10h: The offsets are
699 * not assigned by the BIOS for this family, so the OS is
700 * responsible for doing it. If the OS assignment fails, fall
701 * back to BIOS settings and try to setup this.
702 */
703 if (boot_cpu_data.x86 == 0x10)
704 force_ibs_eilvt_setup();
705
706 if (!ibs_eilvt_valid())
707 goto out;
708
709 get_online_cpus();
710 ibs_caps = caps;
711 /* make ibs_caps visible to other cpus: */
712 smp_mb();
713 perf_cpu_notifier(perf_ibs_cpu_notifier);
714 smp_call_function(setup_APIC_ibs, NULL, 1);
715 put_online_cpus();
716
717 ret = perf_event_ibs_init();
718 out:
719 if (ret)
720 pr_err("Failed to setup IBS, %d\n", ret);
721 return ret;
722 }
723
724 /* Since we need the pci subsystem to init ibs we can't do this earlier: */
725 device_initcall(amd_ibs_init);
This page took 0.045276 seconds and 5 git commands to generate.