2 * Performance events - AMD IBS
4 * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
6 * For licencing details see kernel-base/COPYING
9 #include <linux/perf_event.h>
10 #include <linux/module.h>
11 #include <linux/pci.h>
17 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
19 #include <linux/kprobes.h>
20 #include <linux/hardirq.h>
24 #define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
25 #define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT
36 struct perf_event
*event
;
37 unsigned long state
[BITS_TO_LONGS(IBS_MAX_STATES
)];
48 unsigned long offset_mask
[1];
50 struct cpu_perf_ibs __percpu
*pcpu
;
51 u64 (*get_count
)(u64 config
);
54 struct perf_ibs_data
{
57 u32 data
[0]; /* data buffer starts here */
60 u64 regs
[MSR_AMD64_IBS_REG_COUNT_MAX
];
64 perf_event_set_period(struct hw_perf_event
*hwc
, u64 min
, u64 max
, u64
*count
)
66 s64 left
= local64_read(&hwc
->period_left
);
67 s64 period
= hwc
->sample_period
;
71 * If we are way outside a reasonable range then just skip forward:
73 if (unlikely(left
<= -period
)) {
75 local64_set(&hwc
->period_left
, left
);
76 hwc
->last_period
= period
;
80 if (unlikely(left
<= 0)) {
82 local64_set(&hwc
->period_left
, left
);
83 hwc
->last_period
= period
;
87 if (unlikely(left
< min
))
99 perf_event_try_update(struct perf_event
*event
, u64 new_raw_count
, int width
)
101 struct hw_perf_event
*hwc
= &event
->hw
;
102 int shift
= 64 - width
;
107 * Careful: an NMI might modify the previous event value.
109 * Our tactic to handle this is to first atomically read and
110 * exchange a new raw count - then add that new-prev delta
111 * count to the generic event atomically:
113 prev_raw_count
= local64_read(&hwc
->prev_count
);
114 if (local64_cmpxchg(&hwc
->prev_count
, prev_raw_count
,
115 new_raw_count
) != prev_raw_count
)
119 * Now we have the new raw value and have updated the prev
120 * timestamp already. We can now calculate the elapsed delta
121 * (event-)time and add that to the generic event.
123 * Careful, not all hw sign-extends above the physical width
126 delta
= (new_raw_count
<< shift
) - (prev_raw_count
<< shift
);
129 local64_add(delta
, &event
->count
);
130 local64_sub(delta
, &hwc
->period_left
);
135 static struct perf_ibs perf_ibs_fetch
;
136 static struct perf_ibs perf_ibs_op
;
138 static struct perf_ibs
*get_ibs_pmu(int type
)
140 if (perf_ibs_fetch
.pmu
.type
== type
)
141 return &perf_ibs_fetch
;
142 if (perf_ibs_op
.pmu
.type
== type
)
147 static int perf_ibs_init(struct perf_event
*event
)
149 struct hw_perf_event
*hwc
= &event
->hw
;
150 struct perf_ibs
*perf_ibs
;
153 perf_ibs
= get_ibs_pmu(event
->attr
.type
);
157 config
= event
->attr
.config
;
158 if (config
& ~perf_ibs
->config_mask
)
161 if (hwc
->sample_period
) {
162 if (config
& perf_ibs
->cnt_mask
)
163 /* raw max_cnt may not be set */
165 if (hwc
->sample_period
& 0x0f)
166 /* lower 4 bits can not be set in ibs max cnt */
169 max_cnt
= config
& perf_ibs
->cnt_mask
;
170 config
&= ~perf_ibs
->cnt_mask
;
171 event
->attr
.sample_period
= max_cnt
<< 4;
172 hwc
->sample_period
= event
->attr
.sample_period
;
175 if (!hwc
->sample_period
)
178 hwc
->config_base
= perf_ibs
->msr
;
179 hwc
->config
= config
;
184 static int perf_ibs_set_period(struct perf_ibs
*perf_ibs
,
185 struct hw_perf_event
*hwc
, u64
*period
)
189 /* ignore lower 4 bits in min count: */
190 ret
= perf_event_set_period(hwc
, 1<<4, perf_ibs
->max_period
, period
);
191 local64_set(&hwc
->prev_count
, 0);
196 static u64
get_ibs_fetch_count(u64 config
)
198 return (config
& IBS_FETCH_CNT
) >> 12;
201 static u64
get_ibs_op_count(u64 config
)
203 return (config
& IBS_OP_CUR_CNT
) >> 32;
207 perf_ibs_event_update(struct perf_ibs
*perf_ibs
, struct perf_event
*event
,
210 u64 count
= perf_ibs
->get_count(config
);
212 while (!perf_event_try_update(event
, count
, 20)) {
213 rdmsrl(event
->hw
.config_base
, config
);
214 count
= perf_ibs
->get_count(config
);
218 /* Note: The enable mask must be encoded in the config argument. */
219 static inline void perf_ibs_enable_event(struct hw_perf_event
*hwc
, u64 config
)
221 wrmsrl(hwc
->config_base
, hwc
->config
| config
);
225 * We cannot restore the ibs pmu state, so we always needs to update
226 * the event while stopping it and then reset the state when starting
227 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
228 * perf_ibs_start()/perf_ibs_stop() and instead always do it.
230 static void perf_ibs_start(struct perf_event
*event
, int flags
)
232 struct hw_perf_event
*hwc
= &event
->hw
;
233 struct perf_ibs
*perf_ibs
= container_of(event
->pmu
, struct perf_ibs
, pmu
);
234 struct cpu_perf_ibs
*pcpu
= this_cpu_ptr(perf_ibs
->pcpu
);
237 if (WARN_ON_ONCE(!(hwc
->state
& PERF_HES_STOPPED
)))
240 WARN_ON_ONCE(!(hwc
->state
& PERF_HES_UPTODATE
));
243 perf_ibs_set_period(perf_ibs
, hwc
, &config
);
244 config
= (config
>> 4) | perf_ibs
->enable_mask
;
245 set_bit(IBS_STARTED
, pcpu
->state
);
246 perf_ibs_enable_event(hwc
, config
);
248 perf_event_update_userpage(event
);
251 static void perf_ibs_stop(struct perf_event
*event
, int flags
)
253 struct hw_perf_event
*hwc
= &event
->hw
;
254 struct perf_ibs
*perf_ibs
= container_of(event
->pmu
, struct perf_ibs
, pmu
);
255 struct cpu_perf_ibs
*pcpu
= this_cpu_ptr(perf_ibs
->pcpu
);
259 stopping
= test_and_clear_bit(IBS_STARTED
, pcpu
->state
);
261 if (!stopping
&& (hwc
->state
& PERF_HES_UPTODATE
))
264 rdmsrl(hwc
->config_base
, val
);
267 set_bit(IBS_STOPPING
, pcpu
->state
);
268 val
&= ~perf_ibs
->enable_mask
;
269 wrmsrl(hwc
->config_base
, val
);
270 WARN_ON_ONCE(hwc
->state
& PERF_HES_STOPPED
);
271 hwc
->state
|= PERF_HES_STOPPED
;
274 if (hwc
->state
& PERF_HES_UPTODATE
)
277 perf_ibs_event_update(perf_ibs
, event
, val
);
278 hwc
->state
|= PERF_HES_UPTODATE
;
281 static int perf_ibs_add(struct perf_event
*event
, int flags
)
283 struct perf_ibs
*perf_ibs
= container_of(event
->pmu
, struct perf_ibs
, pmu
);
284 struct cpu_perf_ibs
*pcpu
= this_cpu_ptr(perf_ibs
->pcpu
);
286 if (test_and_set_bit(IBS_ENABLED
, pcpu
->state
))
289 event
->hw
.state
= PERF_HES_UPTODATE
| PERF_HES_STOPPED
;
293 if (flags
& PERF_EF_START
)
294 perf_ibs_start(event
, PERF_EF_RELOAD
);
299 static void perf_ibs_del(struct perf_event
*event
, int flags
)
301 struct perf_ibs
*perf_ibs
= container_of(event
->pmu
, struct perf_ibs
, pmu
);
302 struct cpu_perf_ibs
*pcpu
= this_cpu_ptr(perf_ibs
->pcpu
);
304 if (!test_and_clear_bit(IBS_ENABLED
, pcpu
->state
))
307 perf_ibs_stop(event
, PERF_EF_UPDATE
);
311 perf_event_update_userpage(event
);
314 static void perf_ibs_read(struct perf_event
*event
) { }
316 static struct perf_ibs perf_ibs_fetch
= {
318 .task_ctx_nr
= perf_invalid_context
,
320 .event_init
= perf_ibs_init
,
323 .start
= perf_ibs_start
,
324 .stop
= perf_ibs_stop
,
325 .read
= perf_ibs_read
,
327 .msr
= MSR_AMD64_IBSFETCHCTL
,
328 .config_mask
= IBS_FETCH_CONFIG_MASK
,
329 .cnt_mask
= IBS_FETCH_MAX_CNT
,
330 .enable_mask
= IBS_FETCH_ENABLE
,
331 .valid_mask
= IBS_FETCH_VAL
,
332 .max_period
= IBS_FETCH_MAX_CNT
<< 4,
333 .offset_mask
= { MSR_AMD64_IBSFETCH_REG_MASK
},
334 .offset_max
= MSR_AMD64_IBSFETCH_REG_COUNT
,
336 .get_count
= get_ibs_fetch_count
,
339 static struct perf_ibs perf_ibs_op
= {
341 .task_ctx_nr
= perf_invalid_context
,
343 .event_init
= perf_ibs_init
,
346 .start
= perf_ibs_start
,
347 .stop
= perf_ibs_stop
,
348 .read
= perf_ibs_read
,
350 .msr
= MSR_AMD64_IBSOPCTL
,
351 .config_mask
= IBS_OP_CONFIG_MASK
,
352 .cnt_mask
= IBS_OP_MAX_CNT
,
353 .enable_mask
= IBS_OP_ENABLE
,
354 .valid_mask
= IBS_OP_VAL
,
355 .max_period
= IBS_OP_MAX_CNT
<< 4,
356 .offset_mask
= { MSR_AMD64_IBSOP_REG_MASK
},
357 .offset_max
= MSR_AMD64_IBSOP_REG_COUNT
,
359 .get_count
= get_ibs_op_count
,
362 static int perf_ibs_handle_irq(struct perf_ibs
*perf_ibs
, struct pt_regs
*iregs
)
364 struct cpu_perf_ibs
*pcpu
= this_cpu_ptr(perf_ibs
->pcpu
);
365 struct perf_event
*event
= pcpu
->event
;
366 struct hw_perf_event
*hwc
= &event
->hw
;
367 struct perf_sample_data data
;
368 struct perf_raw_record raw
;
370 struct perf_ibs_data ibs_data
;
371 int offset
, size
, overflow
, reenable
;
375 if (!test_bit(IBS_STARTED
, pcpu
->state
)) {
376 /* Catch spurious interrupts after stopping IBS: */
377 if (!test_and_clear_bit(IBS_STOPPING
, pcpu
->state
))
379 rdmsrl(perf_ibs
->msr
, *ibs_data
.regs
);
380 return (*ibs_data
.regs
& perf_ibs
->valid_mask
) ? 1 : 0;
383 msr
= hwc
->config_base
;
386 if (!(*buf
++ & perf_ibs
->valid_mask
))
389 perf_sample_data_init(&data
, 0);
390 if (event
->attr
.sample_type
& PERF_SAMPLE_RAW
) {
391 ibs_data
.caps
= ibs_caps
;
395 rdmsrl(msr
+ offset
, *buf
++);
397 offset
= find_next_bit(perf_ibs
->offset_mask
,
398 perf_ibs
->offset_max
,
400 } while (offset
< perf_ibs
->offset_max
);
401 raw
.size
= sizeof(u32
) + sizeof(u64
) * size
;
402 raw
.data
= ibs_data
.data
;
406 regs
= *iregs
; /* XXX: update ip from ibs sample */
409 * Emulate IbsOpCurCnt in MSRC001_1033 (IbsOpCtl), not
410 * supported in all cpus. As this triggered an interrupt, we
411 * set the current count to the max count.
413 config
= ibs_data
.regs
[0];
414 if (perf_ibs
== &perf_ibs_op
&& !(ibs_caps
& IBS_CAPS_RDWROPCNT
)) {
415 config
&= ~IBS_OP_CUR_CNT
;
416 config
|= (config
& IBS_OP_MAX_CNT
) << 36;
419 perf_ibs_event_update(perf_ibs
, event
, config
);
421 overflow
= perf_ibs_set_period(perf_ibs
, hwc
, &config
);
422 reenable
= !(overflow
&& perf_event_overflow(event
, &data
, ®s
));
423 config
= (config
>> 4) | (reenable
? perf_ibs
->enable_mask
: 0);
424 perf_ibs_enable_event(hwc
, config
);
426 perf_event_update_userpage(event
);
432 perf_ibs_nmi_handler(unsigned int cmd
, struct pt_regs
*regs
)
436 handled
+= perf_ibs_handle_irq(&perf_ibs_fetch
, regs
);
437 handled
+= perf_ibs_handle_irq(&perf_ibs_op
, regs
);
440 inc_irq_stat(apic_perf_irqs
);
445 static __init
int perf_ibs_pmu_init(struct perf_ibs
*perf_ibs
, char *name
)
447 struct cpu_perf_ibs __percpu
*pcpu
;
450 pcpu
= alloc_percpu(struct cpu_perf_ibs
);
454 perf_ibs
->pcpu
= pcpu
;
456 ret
= perf_pmu_register(&perf_ibs
->pmu
, name
, -1);
458 perf_ibs
->pcpu
= NULL
;
465 static __init
int perf_event_ibs_init(void)
468 return -ENODEV
; /* ibs not supported by the cpu */
470 perf_ibs_pmu_init(&perf_ibs_fetch
, "ibs_fetch");
471 perf_ibs_pmu_init(&perf_ibs_op
, "ibs_op");
472 register_nmi_handler(NMI_LOCAL
, &perf_ibs_nmi_handler
, 0, "perf_ibs");
473 printk(KERN_INFO
"perf: AMD IBS detected (0x%08x)\n", ibs_caps
);
478 #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
480 static __init
int perf_event_ibs_init(void) { return 0; }
484 /* IBS - apic initialization, for perf and oprofile */
486 static __init u32
__get_ibs_caps(void)
489 unsigned int max_level
;
491 if (!boot_cpu_has(X86_FEATURE_IBS
))
494 /* check IBS cpuid feature flags */
495 max_level
= cpuid_eax(0x80000000);
496 if (max_level
< IBS_CPUID_FEATURES
)
497 return IBS_CAPS_DEFAULT
;
499 caps
= cpuid_eax(IBS_CPUID_FEATURES
);
500 if (!(caps
& IBS_CAPS_AVAIL
))
501 /* cpuid flags not valid */
502 return IBS_CAPS_DEFAULT
;
507 u32
get_ibs_caps(void)
512 EXPORT_SYMBOL(get_ibs_caps
);
514 static inline int get_eilvt(int offset
)
516 return !setup_APIC_eilvt(offset
, 0, APIC_EILVT_MSG_NMI
, 1);
519 static inline int put_eilvt(int offset
)
521 return !setup_APIC_eilvt(offset
, 0, 0, 1);
525 * Check and reserve APIC extended interrupt LVT offset for IBS if available.
527 static inline int ibs_eilvt_valid(void)
535 rdmsrl(MSR_AMD64_IBSCTL
, val
);
536 offset
= val
& IBSCTL_LVT_OFFSET_MASK
;
538 if (!(val
& IBSCTL_LVT_OFFSET_VALID
)) {
539 pr_err(FW_BUG
"cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
540 smp_processor_id(), offset
, MSR_AMD64_IBSCTL
, val
);
544 if (!get_eilvt(offset
)) {
545 pr_err(FW_BUG
"cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
546 smp_processor_id(), offset
, MSR_AMD64_IBSCTL
, val
);
557 static int setup_ibs_ctl(int ibs_eilvt_off
)
559 struct pci_dev
*cpu_cfg
;
566 cpu_cfg
= pci_get_device(PCI_VENDOR_ID_AMD
,
567 PCI_DEVICE_ID_AMD_10H_NB_MISC
,
572 pci_write_config_dword(cpu_cfg
, IBSCTL
, ibs_eilvt_off
573 | IBSCTL_LVT_OFFSET_VALID
);
574 pci_read_config_dword(cpu_cfg
, IBSCTL
, &value
);
575 if (value
!= (ibs_eilvt_off
| IBSCTL_LVT_OFFSET_VALID
)) {
576 pci_dev_put(cpu_cfg
);
577 printk(KERN_DEBUG
"Failed to setup IBS LVT offset, "
578 "IBSCTL = 0x%08x\n", value
);
584 printk(KERN_DEBUG
"No CPU node configured for IBS\n");
592 * This runs only on the current cpu. We try to find an LVT offset and
593 * setup the local APIC. For this we must disable preemption. On
594 * success we initialize all nodes with this offset. This updates then
595 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
596 * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
597 * is using the new offset.
599 static int force_ibs_eilvt_setup(void)
605 /* find the next free available EILVT entry, skip offset 0 */
606 for (offset
= 1; offset
< APIC_EILVT_NR_MAX
; offset
++) {
607 if (get_eilvt(offset
))
612 if (offset
== APIC_EILVT_NR_MAX
) {
613 printk(KERN_DEBUG
"No EILVT entry available\n");
617 ret
= setup_ibs_ctl(offset
);
621 if (!ibs_eilvt_valid()) {
626 pr_info("IBS: LVT offset %d assigned\n", offset
);
636 static inline int get_ibs_lvt_offset(void)
640 rdmsrl(MSR_AMD64_IBSCTL
, val
);
641 if (!(val
& IBSCTL_LVT_OFFSET_VALID
))
644 return val
& IBSCTL_LVT_OFFSET_MASK
;
647 static void setup_APIC_ibs(void *dummy
)
651 offset
= get_ibs_lvt_offset();
655 if (!setup_APIC_eilvt(offset
, 0, APIC_EILVT_MSG_NMI
, 0))
658 pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
662 static void clear_APIC_ibs(void *dummy
)
666 offset
= get_ibs_lvt_offset();
668 setup_APIC_eilvt(offset
, 0, APIC_EILVT_MSG_FIX
, 1);
672 perf_ibs_cpu_notifier(struct notifier_block
*self
, unsigned long action
, void *hcpu
)
674 switch (action
& ~CPU_TASKS_FROZEN
) {
676 setup_APIC_ibs(NULL
);
679 clear_APIC_ibs(NULL
);
688 static __init
int amd_ibs_init(void)
693 caps
= __get_ibs_caps();
695 return -ENODEV
; /* ibs not supported by the cpu */
698 * Force LVT offset assignment for family 10h: The offsets are
699 * not assigned by the BIOS for this family, so the OS is
700 * responsible for doing it. If the OS assignment fails, fall
701 * back to BIOS settings and try to setup this.
703 if (boot_cpu_data
.x86
== 0x10)
704 force_ibs_eilvt_setup();
706 if (!ibs_eilvt_valid())
711 /* make ibs_caps visible to other cpus: */
713 perf_cpu_notifier(perf_ibs_cpu_notifier
);
714 smp_call_function(setup_APIC_ibs
, NULL
, 1);
717 ret
= perf_event_ibs_init();
720 pr_err("Failed to setup IBS, %d\n", ret
);
724 /* Since we need the pci subsystem to init ibs we can't do this earlier: */
725 device_initcall(amd_ibs_init
);