2 * Performance events - AMD IBS
4 * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
6 * For licencing details see kernel-base/COPYING
9 #include <linux/perf_event.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
12 #include <linux/ptrace.h>
18 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
20 #include <linux/kprobes.h>
21 #include <linux/hardirq.h>
25 #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
26 #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
37 struct perf_event
*event
;
38 unsigned long state
[BITS_TO_LONGS(IBS_MAX_STATES
)];
49 unsigned long offset_mask
[1];
51 struct cpu_perf_ibs __percpu
*pcpu
;
52 u64 (*get_count
)(u64 config
);
55 struct perf_ibs_data
{
58 u32 data
[0]; /* data buffer starts here */
61 u64 regs
[MSR_AMD64_IBS_REG_COUNT_MAX
];
65 perf_event_set_period(struct hw_perf_event
*hwc
, u64 min
, u64 max
, u64
*hw_period
)
67 s64 left
= local64_read(&hwc
->period_left
);
68 s64 period
= hwc
->sample_period
;
72 * If we are way outside a reasonable range then just skip forward:
74 if (unlikely(left
<= -period
)) {
76 local64_set(&hwc
->period_left
, left
);
77 hwc
->last_period
= period
;
81 if (unlikely(left
< (s64
)min
)) {
83 local64_set(&hwc
->period_left
, left
);
84 hwc
->last_period
= period
;
89 * If the hw period that triggers the sw overflow is too short
90 * we might hit the irq handler. This biases the results.
91 * Thus we shorten the next-to-last period and set the last
92 * period to the max period.
102 *hw_period
= (u64
)left
;
108 perf_event_try_update(struct perf_event
*event
, u64 new_raw_count
, int width
)
110 struct hw_perf_event
*hwc
= &event
->hw
;
111 int shift
= 64 - width
;
116 * Careful: an NMI might modify the previous event value.
118 * Our tactic to handle this is to first atomically read and
119 * exchange a new raw count - then add that new-prev delta
120 * count to the generic event atomically:
122 prev_raw_count
= local64_read(&hwc
->prev_count
);
123 if (local64_cmpxchg(&hwc
->prev_count
, prev_raw_count
,
124 new_raw_count
) != prev_raw_count
)
128 * Now we have the new raw value and have updated the prev
129 * timestamp already. We can now calculate the elapsed delta
130 * (event-)time and add that to the generic event.
132 * Careful, not all hw sign-extends above the physical width
135 delta
= (new_raw_count
<< shift
) - (prev_raw_count
<< shift
);
138 local64_add(delta
, &event
->count
);
139 local64_sub(delta
, &hwc
->period_left
);
144 static struct perf_ibs perf_ibs_fetch
;
145 static struct perf_ibs perf_ibs_op
;
147 static struct perf_ibs
*get_ibs_pmu(int type
)
149 if (perf_ibs_fetch
.pmu
.type
== type
)
150 return &perf_ibs_fetch
;
151 if (perf_ibs_op
.pmu
.type
== type
)
157 * Use IBS for precise event sampling:
159 * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count
160 * perf record -a -e r076:p ... # same as -e cpu-cycles:p
161 * perf record -a -e r0C1:p ... # use ibs op counting micro-ops
163 * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
164 * MSRC001_1033) is used to select either cycle or micro-ops counting
167 * The rip of IBS samples has skid 0. Thus, IBS supports precise
168 * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
169 * rip is invalid when IBS was not able to record the rip correctly.
170 * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
173 static int perf_ibs_precise_event(struct perf_event
*event
, u64
*config
)
175 switch (event
->attr
.precise_ip
) {
185 switch (event
->attr
.type
) {
186 case PERF_TYPE_HARDWARE
:
187 switch (event
->attr
.config
) {
188 case PERF_COUNT_HW_CPU_CYCLES
:
194 switch (event
->attr
.config
) {
199 *config
= IBS_OP_CNT_CTL
;
210 static int perf_ibs_init(struct perf_event
*event
)
212 struct hw_perf_event
*hwc
= &event
->hw
;
213 struct perf_ibs
*perf_ibs
;
217 perf_ibs
= get_ibs_pmu(event
->attr
.type
);
219 config
= event
->attr
.config
;
221 perf_ibs
= &perf_ibs_op
;
222 ret
= perf_ibs_precise_event(event
, &config
);
227 if (event
->pmu
!= &perf_ibs
->pmu
)
230 if (config
& ~perf_ibs
->config_mask
)
233 if (hwc
->sample_period
) {
234 if (config
& perf_ibs
->cnt_mask
)
235 /* raw max_cnt may not be set */
237 if (!event
->attr
.sample_freq
&& hwc
->sample_period
& 0x0f)
239 * lower 4 bits can not be set in ibs max cnt,
240 * but allowing it in case we adjust the
241 * sample period to set a frequency.
244 hwc
->sample_period
&= ~0x0FULL
;
245 if (!hwc
->sample_period
)
246 hwc
->sample_period
= 0x10;
248 max_cnt
= config
& perf_ibs
->cnt_mask
;
249 config
&= ~perf_ibs
->cnt_mask
;
250 event
->attr
.sample_period
= max_cnt
<< 4;
251 hwc
->sample_period
= event
->attr
.sample_period
;
254 if (!hwc
->sample_period
)
258 * If we modify hwc->sample_period, we also need to update
259 * hwc->last_period and hwc->period_left.
261 hwc
->last_period
= hwc
->sample_period
;
262 local64_set(&hwc
->period_left
, hwc
->sample_period
);
264 hwc
->config_base
= perf_ibs
->msr
;
265 hwc
->config
= config
;
270 static int perf_ibs_set_period(struct perf_ibs
*perf_ibs
,
271 struct hw_perf_event
*hwc
, u64
*period
)
275 /* ignore lower 4 bits in min count: */
276 overflow
= perf_event_set_period(hwc
, 1<<4, perf_ibs
->max_period
, period
);
277 local64_set(&hwc
->prev_count
, 0);
282 static u64
get_ibs_fetch_count(u64 config
)
284 return (config
& IBS_FETCH_CNT
) >> 12;
287 static u64
get_ibs_op_count(u64 config
)
289 return (config
& IBS_OP_CUR_CNT
) >> 32;
293 perf_ibs_event_update(struct perf_ibs
*perf_ibs
, struct perf_event
*event
,
296 u64 count
= perf_ibs
->get_count(*config
);
298 while (!perf_event_try_update(event
, count
, 20)) {
299 rdmsrl(event
->hw
.config_base
, *config
);
300 count
= perf_ibs
->get_count(*config
);
304 static inline void perf_ibs_enable_event(struct perf_ibs
*perf_ibs
,
305 struct hw_perf_event
*hwc
, u64 config
)
307 wrmsrl(hwc
->config_base
, hwc
->config
| config
| perf_ibs
->enable_mask
);
311 * Erratum #420 Instruction-Based Sampling Engine May Generate
312 * Interrupt that Cannot Be Cleared:
314 * Must clear counter mask first, then clear the enable bit. See
315 * Revision Guide for AMD Family 10h Processors, Publication #41322.
317 static inline void perf_ibs_disable_event(struct perf_ibs
*perf_ibs
,
318 struct hw_perf_event
*hwc
, u64 config
)
320 config
&= ~perf_ibs
->cnt_mask
;
321 wrmsrl(hwc
->config_base
, config
);
322 config
&= ~perf_ibs
->enable_mask
;
323 wrmsrl(hwc
->config_base
, config
);
327 * We cannot restore the ibs pmu state, so we always needs to update
328 * the event while stopping it and then reset the state when starting
329 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
330 * perf_ibs_start()/perf_ibs_stop() and instead always do it.
332 static void perf_ibs_start(struct perf_event
*event
, int flags
)
334 struct hw_perf_event
*hwc
= &event
->hw
;
335 struct perf_ibs
*perf_ibs
= container_of(event
->pmu
, struct perf_ibs
, pmu
);
336 struct cpu_perf_ibs
*pcpu
= this_cpu_ptr(perf_ibs
->pcpu
);
339 if (WARN_ON_ONCE(!(hwc
->state
& PERF_HES_STOPPED
)))
342 WARN_ON_ONCE(!(hwc
->state
& PERF_HES_UPTODATE
));
345 perf_ibs_set_period(perf_ibs
, hwc
, &period
);
346 set_bit(IBS_STARTED
, pcpu
->state
);
347 perf_ibs_enable_event(perf_ibs
, hwc
, period
>> 4);
349 perf_event_update_userpage(event
);
352 static void perf_ibs_stop(struct perf_event
*event
, int flags
)
354 struct hw_perf_event
*hwc
= &event
->hw
;
355 struct perf_ibs
*perf_ibs
= container_of(event
->pmu
, struct perf_ibs
, pmu
);
356 struct cpu_perf_ibs
*pcpu
= this_cpu_ptr(perf_ibs
->pcpu
);
360 stopping
= test_and_clear_bit(IBS_STARTED
, pcpu
->state
);
362 if (!stopping
&& (hwc
->state
& PERF_HES_UPTODATE
))
365 rdmsrl(hwc
->config_base
, config
);
368 set_bit(IBS_STOPPING
, pcpu
->state
);
369 perf_ibs_disable_event(perf_ibs
, hwc
, config
);
370 WARN_ON_ONCE(hwc
->state
& PERF_HES_STOPPED
);
371 hwc
->state
|= PERF_HES_STOPPED
;
374 if (hwc
->state
& PERF_HES_UPTODATE
)
377 perf_ibs_event_update(perf_ibs
, event
, &config
);
378 hwc
->state
|= PERF_HES_UPTODATE
;
381 static int perf_ibs_add(struct perf_event
*event
, int flags
)
383 struct perf_ibs
*perf_ibs
= container_of(event
->pmu
, struct perf_ibs
, pmu
);
384 struct cpu_perf_ibs
*pcpu
= this_cpu_ptr(perf_ibs
->pcpu
);
386 if (test_and_set_bit(IBS_ENABLED
, pcpu
->state
))
389 event
->hw
.state
= PERF_HES_UPTODATE
| PERF_HES_STOPPED
;
393 if (flags
& PERF_EF_START
)
394 perf_ibs_start(event
, PERF_EF_RELOAD
);
399 static void perf_ibs_del(struct perf_event
*event
, int flags
)
401 struct perf_ibs
*perf_ibs
= container_of(event
->pmu
, struct perf_ibs
, pmu
);
402 struct cpu_perf_ibs
*pcpu
= this_cpu_ptr(perf_ibs
->pcpu
);
404 if (!test_and_clear_bit(IBS_ENABLED
, pcpu
->state
))
407 perf_ibs_stop(event
, PERF_EF_UPDATE
);
411 perf_event_update_userpage(event
);
414 static void perf_ibs_read(struct perf_event
*event
) { }
416 static struct perf_ibs perf_ibs_fetch
= {
418 .task_ctx_nr
= perf_invalid_context
,
420 .event_init
= perf_ibs_init
,
423 .start
= perf_ibs_start
,
424 .stop
= perf_ibs_stop
,
425 .read
= perf_ibs_read
,
427 .msr
= MSR_AMD64_IBSFETCHCTL
,
428 .config_mask
= IBS_FETCH_CONFIG_MASK
,
429 .cnt_mask
= IBS_FETCH_MAX_CNT
,
430 .enable_mask
= IBS_FETCH_ENABLE
,
431 .valid_mask
= IBS_FETCH_VAL
,
432 .max_period
= IBS_FETCH_MAX_CNT
<< 4,
433 .offset_mask
= { MSR_AMD64_IBSFETCH_REG_MASK
},
434 .offset_max
= MSR_AMD64_IBSFETCH_REG_COUNT
,
436 .get_count
= get_ibs_fetch_count
,
439 static struct perf_ibs perf_ibs_op
= {
441 .task_ctx_nr
= perf_invalid_context
,
443 .event_init
= perf_ibs_init
,
446 .start
= perf_ibs_start
,
447 .stop
= perf_ibs_stop
,
448 .read
= perf_ibs_read
,
450 .msr
= MSR_AMD64_IBSOPCTL
,
451 .config_mask
= IBS_OP_CONFIG_MASK
,
452 .cnt_mask
= IBS_OP_MAX_CNT
,
453 .enable_mask
= IBS_OP_ENABLE
,
454 .valid_mask
= IBS_OP_VAL
,
455 .max_period
= IBS_OP_MAX_CNT
<< 4,
456 .offset_mask
= { MSR_AMD64_IBSOP_REG_MASK
},
457 .offset_max
= MSR_AMD64_IBSOP_REG_COUNT
,
459 .get_count
= get_ibs_op_count
,
462 static int perf_ibs_handle_irq(struct perf_ibs
*perf_ibs
, struct pt_regs
*iregs
)
464 struct cpu_perf_ibs
*pcpu
= this_cpu_ptr(perf_ibs
->pcpu
);
465 struct perf_event
*event
= pcpu
->event
;
466 struct hw_perf_event
*hwc
= &event
->hw
;
467 struct perf_sample_data data
;
468 struct perf_raw_record raw
;
470 struct perf_ibs_data ibs_data
;
471 int offset
, size
, check_rip
, offset_max
, throttle
= 0;
473 u64
*buf
, *config
, period
;
475 if (!test_bit(IBS_STARTED
, pcpu
->state
)) {
477 * Catch spurious interrupts after stopping IBS: After
478 * disabling IBS there could be still incomming NMIs
479 * with samples that even have the valid bit cleared.
480 * Mark all this NMIs as handled.
482 return test_and_clear_bit(IBS_STOPPING
, pcpu
->state
) ? 1 : 0;
485 msr
= hwc
->config_base
;
488 if (!(*buf
++ & perf_ibs
->valid_mask
))
492 * Emulate IbsOpCurCnt in MSRC001_1033 (IbsOpCtl), not
493 * supported in all cpus. As this triggered an interrupt, we
494 * set the current count to the max count.
496 config
= &ibs_data
.regs
[0];
497 if (perf_ibs
== &perf_ibs_op
&& !(ibs_caps
& IBS_CAPS_RDWROPCNT
)) {
498 *config
&= ~IBS_OP_CUR_CNT
;
499 *config
|= (*config
& IBS_OP_MAX_CNT
) << 36;
502 perf_ibs_event_update(perf_ibs
, event
, config
);
503 perf_sample_data_init(&data
, 0, hwc
->last_period
);
504 if (!perf_ibs_set_period(perf_ibs
, hwc
, &period
))
505 goto out
; /* no sw counter overflow */
507 ibs_data
.caps
= ibs_caps
;
510 check_rip
= (perf_ibs
== &perf_ibs_op
&& (ibs_caps
& IBS_CAPS_RIPINVALIDCHK
));
511 if (event
->attr
.sample_type
& PERF_SAMPLE_RAW
)
512 offset_max
= perf_ibs
->offset_max
;
518 rdmsrl(msr
+ offset
, *buf
++);
520 offset
= find_next_bit(perf_ibs
->offset_mask
,
521 perf_ibs
->offset_max
,
523 } while (offset
< offset_max
);
524 ibs_data
.size
= sizeof(u64
) * size
;
527 if (check_rip
&& (ibs_data
.regs
[2] & IBS_RIP_INVALID
)) {
528 regs
.flags
&= ~PERF_EFLAGS_EXACT
;
530 instruction_pointer_set(®s
, ibs_data
.regs
[1]);
531 regs
.flags
|= PERF_EFLAGS_EXACT
;
534 if (event
->attr
.sample_type
& PERF_SAMPLE_RAW
) {
535 raw
.size
= sizeof(u32
) + ibs_data
.size
;
536 raw
.data
= ibs_data
.data
;
540 throttle
= perf_event_overflow(event
, &data
, ®s
);
543 perf_ibs_disable_event(perf_ibs
, hwc
, *config
);
545 perf_ibs_enable_event(perf_ibs
, hwc
, period
>> 4);
547 perf_event_update_userpage(event
);
553 perf_ibs_nmi_handler(unsigned int cmd
, struct pt_regs
*regs
)
557 handled
+= perf_ibs_handle_irq(&perf_ibs_fetch
, regs
);
558 handled
+= perf_ibs_handle_irq(&perf_ibs_op
, regs
);
561 inc_irq_stat(apic_perf_irqs
);
566 static __init
int perf_ibs_pmu_init(struct perf_ibs
*perf_ibs
, char *name
)
568 struct cpu_perf_ibs __percpu
*pcpu
;
571 pcpu
= alloc_percpu(struct cpu_perf_ibs
);
575 perf_ibs
->pcpu
= pcpu
;
577 ret
= perf_pmu_register(&perf_ibs
->pmu
, name
, -1);
579 perf_ibs
->pcpu
= NULL
;
586 static __init
int perf_event_ibs_init(void)
589 return -ENODEV
; /* ibs not supported by the cpu */
591 perf_ibs_pmu_init(&perf_ibs_fetch
, "ibs_fetch");
592 if (ibs_caps
& IBS_CAPS_OPCNT
)
593 perf_ibs_op
.config_mask
|= IBS_OP_CNT_CTL
;
594 perf_ibs_pmu_init(&perf_ibs_op
, "ibs_op");
595 register_nmi_handler(NMI_LOCAL
, perf_ibs_nmi_handler
, 0, "perf_ibs");
596 printk(KERN_INFO
"perf: AMD IBS detected (0x%08x)\n", ibs_caps
);
601 #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
603 static __init
int perf_event_ibs_init(void) { return 0; }
607 /* IBS - apic initialization, for perf and oprofile */
609 static __init u32
__get_ibs_caps(void)
612 unsigned int max_level
;
614 if (!boot_cpu_has(X86_FEATURE_IBS
))
617 /* check IBS cpuid feature flags */
618 max_level
= cpuid_eax(0x80000000);
619 if (max_level
< IBS_CPUID_FEATURES
)
620 return IBS_CAPS_DEFAULT
;
622 caps
= cpuid_eax(IBS_CPUID_FEATURES
);
623 if (!(caps
& IBS_CAPS_AVAIL
))
624 /* cpuid flags not valid */
625 return IBS_CAPS_DEFAULT
;
630 u32
get_ibs_caps(void)
635 EXPORT_SYMBOL(get_ibs_caps
);
637 static inline int get_eilvt(int offset
)
639 return !setup_APIC_eilvt(offset
, 0, APIC_EILVT_MSG_NMI
, 1);
642 static inline int put_eilvt(int offset
)
644 return !setup_APIC_eilvt(offset
, 0, 0, 1);
648 * Check and reserve APIC extended interrupt LVT offset for IBS if available.
650 static inline int ibs_eilvt_valid(void)
658 rdmsrl(MSR_AMD64_IBSCTL
, val
);
659 offset
= val
& IBSCTL_LVT_OFFSET_MASK
;
661 if (!(val
& IBSCTL_LVT_OFFSET_VALID
)) {
662 pr_err(FW_BUG
"cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
663 smp_processor_id(), offset
, MSR_AMD64_IBSCTL
, val
);
667 if (!get_eilvt(offset
)) {
668 pr_err(FW_BUG
"cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
669 smp_processor_id(), offset
, MSR_AMD64_IBSCTL
, val
);
680 static int setup_ibs_ctl(int ibs_eilvt_off
)
682 struct pci_dev
*cpu_cfg
;
689 cpu_cfg
= pci_get_device(PCI_VENDOR_ID_AMD
,
690 PCI_DEVICE_ID_AMD_10H_NB_MISC
,
695 pci_write_config_dword(cpu_cfg
, IBSCTL
, ibs_eilvt_off
696 | IBSCTL_LVT_OFFSET_VALID
);
697 pci_read_config_dword(cpu_cfg
, IBSCTL
, &value
);
698 if (value
!= (ibs_eilvt_off
| IBSCTL_LVT_OFFSET_VALID
)) {
699 pci_dev_put(cpu_cfg
);
700 printk(KERN_DEBUG
"Failed to setup IBS LVT offset, "
701 "IBSCTL = 0x%08x\n", value
);
707 printk(KERN_DEBUG
"No CPU node configured for IBS\n");
715 * This runs only on the current cpu. We try to find an LVT offset and
716 * setup the local APIC. For this we must disable preemption. On
717 * success we initialize all nodes with this offset. This updates then
718 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
719 * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
720 * is using the new offset.
722 static int force_ibs_eilvt_setup(void)
728 /* find the next free available EILVT entry, skip offset 0 */
729 for (offset
= 1; offset
< APIC_EILVT_NR_MAX
; offset
++) {
730 if (get_eilvt(offset
))
735 if (offset
== APIC_EILVT_NR_MAX
) {
736 printk(KERN_DEBUG
"No EILVT entry available\n");
740 ret
= setup_ibs_ctl(offset
);
744 if (!ibs_eilvt_valid()) {
749 pr_info("IBS: LVT offset %d assigned\n", offset
);
759 static inline int get_ibs_lvt_offset(void)
763 rdmsrl(MSR_AMD64_IBSCTL
, val
);
764 if (!(val
& IBSCTL_LVT_OFFSET_VALID
))
767 return val
& IBSCTL_LVT_OFFSET_MASK
;
770 static void setup_APIC_ibs(void *dummy
)
774 offset
= get_ibs_lvt_offset();
778 if (!setup_APIC_eilvt(offset
, 0, APIC_EILVT_MSG_NMI
, 0))
781 pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
785 static void clear_APIC_ibs(void *dummy
)
789 offset
= get_ibs_lvt_offset();
791 setup_APIC_eilvt(offset
, 0, APIC_EILVT_MSG_FIX
, 1);
795 perf_ibs_cpu_notifier(struct notifier_block
*self
, unsigned long action
, void *hcpu
)
797 switch (action
& ~CPU_TASKS_FROZEN
) {
799 setup_APIC_ibs(NULL
);
802 clear_APIC_ibs(NULL
);
811 static __init
int amd_ibs_init(void)
816 caps
= __get_ibs_caps();
818 return -ENODEV
; /* ibs not supported by the cpu */
821 * Force LVT offset assignment for family 10h: The offsets are
822 * not assigned by the BIOS for this family, so the OS is
823 * responsible for doing it. If the OS assignment fails, fall
824 * back to BIOS settings and try to setup this.
826 if (boot_cpu_data
.x86
== 0x10)
827 force_ibs_eilvt_setup();
829 if (!ibs_eilvt_valid())
834 /* make ibs_caps visible to other cpus: */
836 perf_cpu_notifier(perf_ibs_cpu_notifier
);
837 smp_call_function(setup_APIC_ibs
, NULL
, 1);
840 ret
= perf_event_ibs_init();
843 pr_err("Failed to setup IBS, %d\n", ret
);
847 /* Since we need the pci subsystem to init ibs we can't do this earlier: */
848 device_initcall(amd_ibs_init
);