| 1 | #include <linux/perf_event.h> |
| 2 | #include <linux/export.h> |
| 3 | #include <linux/types.h> |
| 4 | #include <linux/init.h> |
| 5 | #include <linux/slab.h> |
| 6 | #include <asm/apicdef.h> |
| 7 | |
| 8 | #include "../perf_event.h" |
| 9 | |
| 10 | static __initconst const u64 amd_hw_cache_event_ids |
| 11 | [PERF_COUNT_HW_CACHE_MAX] |
| 12 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 13 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 14 | { |
| 15 | [ C(L1D) ] = { |
| 16 | [ C(OP_READ) ] = { |
| 17 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
| 18 | [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ |
| 19 | }, |
| 20 | [ C(OP_WRITE) ] = { |
| 21 | [ C(RESULT_ACCESS) ] = 0, |
| 22 | [ C(RESULT_MISS) ] = 0, |
| 23 | }, |
| 24 | [ C(OP_PREFETCH) ] = { |
| 25 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ |
| 26 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ |
| 27 | }, |
| 28 | }, |
| 29 | [ C(L1I ) ] = { |
| 30 | [ C(OP_READ) ] = { |
| 31 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ |
| 32 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ |
| 33 | }, |
| 34 | [ C(OP_WRITE) ] = { |
| 35 | [ C(RESULT_ACCESS) ] = -1, |
| 36 | [ C(RESULT_MISS) ] = -1, |
| 37 | }, |
| 38 | [ C(OP_PREFETCH) ] = { |
| 39 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ |
| 40 | [ C(RESULT_MISS) ] = 0, |
| 41 | }, |
| 42 | }, |
| 43 | [ C(LL ) ] = { |
| 44 | [ C(OP_READ) ] = { |
| 45 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ |
| 46 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ |
| 47 | }, |
| 48 | [ C(OP_WRITE) ] = { |
| 49 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ |
| 50 | [ C(RESULT_MISS) ] = 0, |
| 51 | }, |
| 52 | [ C(OP_PREFETCH) ] = { |
| 53 | [ C(RESULT_ACCESS) ] = 0, |
| 54 | [ C(RESULT_MISS) ] = 0, |
| 55 | }, |
| 56 | }, |
| 57 | [ C(DTLB) ] = { |
| 58 | [ C(OP_READ) ] = { |
| 59 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
| 60 | [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ |
| 61 | }, |
| 62 | [ C(OP_WRITE) ] = { |
| 63 | [ C(RESULT_ACCESS) ] = 0, |
| 64 | [ C(RESULT_MISS) ] = 0, |
| 65 | }, |
| 66 | [ C(OP_PREFETCH) ] = { |
| 67 | [ C(RESULT_ACCESS) ] = 0, |
| 68 | [ C(RESULT_MISS) ] = 0, |
| 69 | }, |
| 70 | }, |
| 71 | [ C(ITLB) ] = { |
| 72 | [ C(OP_READ) ] = { |
| 73 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ |
| 74 | [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ |
| 75 | }, |
| 76 | [ C(OP_WRITE) ] = { |
| 77 | [ C(RESULT_ACCESS) ] = -1, |
| 78 | [ C(RESULT_MISS) ] = -1, |
| 79 | }, |
| 80 | [ C(OP_PREFETCH) ] = { |
| 81 | [ C(RESULT_ACCESS) ] = -1, |
| 82 | [ C(RESULT_MISS) ] = -1, |
| 83 | }, |
| 84 | }, |
| 85 | [ C(BPU ) ] = { |
| 86 | [ C(OP_READ) ] = { |
| 87 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ |
| 88 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ |
| 89 | }, |
| 90 | [ C(OP_WRITE) ] = { |
| 91 | [ C(RESULT_ACCESS) ] = -1, |
| 92 | [ C(RESULT_MISS) ] = -1, |
| 93 | }, |
| 94 | [ C(OP_PREFETCH) ] = { |
| 95 | [ C(RESULT_ACCESS) ] = -1, |
| 96 | [ C(RESULT_MISS) ] = -1, |
| 97 | }, |
| 98 | }, |
| 99 | [ C(NODE) ] = { |
| 100 | [ C(OP_READ) ] = { |
| 101 | [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ |
| 102 | [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */ |
| 103 | }, |
| 104 | [ C(OP_WRITE) ] = { |
| 105 | [ C(RESULT_ACCESS) ] = -1, |
| 106 | [ C(RESULT_MISS) ] = -1, |
| 107 | }, |
| 108 | [ C(OP_PREFETCH) ] = { |
| 109 | [ C(RESULT_ACCESS) ] = -1, |
| 110 | [ C(RESULT_MISS) ] = -1, |
| 111 | }, |
| 112 | }, |
| 113 | }; |
| 114 | |
| 115 | /* |
| 116 | * AMD Performance Monitor K7 and later. |
| 117 | */ |
| 118 | static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = |
| 119 | { |
| 120 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, |
| 121 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
| 122 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, |
| 123 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, |
| 124 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, |
| 125 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, |
| 126 | [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ |
| 127 | [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ |
| 128 | }; |
| 129 | |
| 130 | static u64 amd_pmu_event_map(int hw_event) |
| 131 | { |
| 132 | return amd_perfmon_event_map[hw_event]; |
| 133 | } |
| 134 | |
| 135 | /* |
| 136 | * Previously calculated offsets |
| 137 | */ |
| 138 | static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; |
| 139 | static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; |
| 140 | |
| 141 | /* |
| 142 | * Legacy CPUs: |
| 143 | * 4 counters starting at 0xc0010000 each offset by 1 |
| 144 | * |
| 145 | * CPUs with core performance counter extensions: |
| 146 | * 6 counters starting at 0xc0010200 each offset by 2 |
| 147 | */ |
| 148 | static inline int amd_pmu_addr_offset(int index, bool eventsel) |
| 149 | { |
| 150 | int offset; |
| 151 | |
| 152 | if (!index) |
| 153 | return index; |
| 154 | |
| 155 | if (eventsel) |
| 156 | offset = event_offsets[index]; |
| 157 | else |
| 158 | offset = count_offsets[index]; |
| 159 | |
| 160 | if (offset) |
| 161 | return offset; |
| 162 | |
| 163 | if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) |
| 164 | offset = index; |
| 165 | else |
| 166 | offset = index << 1; |
| 167 | |
| 168 | if (eventsel) |
| 169 | event_offsets[index] = offset; |
| 170 | else |
| 171 | count_offsets[index] = offset; |
| 172 | |
| 173 | return offset; |
| 174 | } |
| 175 | |
| 176 | static int amd_core_hw_config(struct perf_event *event) |
| 177 | { |
| 178 | if (event->attr.exclude_host && event->attr.exclude_guest) |
| 179 | /* |
| 180 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 |
| 181 | * and will count in both modes. We don't want to count in that |
| 182 | * case so we emulate no-counting by setting US = OS = 0. |
| 183 | */ |
| 184 | event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | |
| 185 | ARCH_PERFMON_EVENTSEL_OS); |
| 186 | else if (event->attr.exclude_host) |
| 187 | event->hw.config |= AMD64_EVENTSEL_GUESTONLY; |
| 188 | else if (event->attr.exclude_guest) |
| 189 | event->hw.config |= AMD64_EVENTSEL_HOSTONLY; |
| 190 | |
| 191 | return 0; |
| 192 | } |
| 193 | |
| 194 | /* |
| 195 | * AMD64 events are detected based on their event codes. |
| 196 | */ |
| 197 | static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) |
| 198 | { |
| 199 | return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); |
| 200 | } |
| 201 | |
| 202 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) |
| 203 | { |
| 204 | return (hwc->config & 0xe0) == 0xe0; |
| 205 | } |
| 206 | |
| 207 | static inline int amd_has_nb(struct cpu_hw_events *cpuc) |
| 208 | { |
| 209 | struct amd_nb *nb = cpuc->amd_nb; |
| 210 | |
| 211 | return nb && nb->nb_id != -1; |
| 212 | } |
| 213 | |
| 214 | static int amd_pmu_hw_config(struct perf_event *event) |
| 215 | { |
| 216 | int ret; |
| 217 | |
| 218 | /* pass precise event sampling to ibs: */ |
| 219 | if (event->attr.precise_ip && get_ibs_caps()) |
| 220 | return -ENOENT; |
| 221 | |
| 222 | if (has_branch_stack(event)) |
| 223 | return -EOPNOTSUPP; |
| 224 | |
| 225 | ret = x86_pmu_hw_config(event); |
| 226 | if (ret) |
| 227 | return ret; |
| 228 | |
| 229 | if (event->attr.type == PERF_TYPE_RAW) |
| 230 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; |
| 231 | |
| 232 | return amd_core_hw_config(event); |
| 233 | } |
| 234 | |
| 235 | static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, |
| 236 | struct perf_event *event) |
| 237 | { |
| 238 | struct amd_nb *nb = cpuc->amd_nb; |
| 239 | int i; |
| 240 | |
| 241 | /* |
| 242 | * need to scan whole list because event may not have |
| 243 | * been assigned during scheduling |
| 244 | * |
| 245 | * no race condition possible because event can only |
| 246 | * be removed on one CPU at a time AND PMU is disabled |
| 247 | * when we come here |
| 248 | */ |
| 249 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 250 | if (cmpxchg(nb->owners + i, event, NULL) == event) |
| 251 | break; |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | /* |
| 256 | * AMD64 NorthBridge events need special treatment because |
| 257 | * counter access needs to be synchronized across all cores |
| 258 | * of a package. Refer to BKDG section 3.12 |
| 259 | * |
| 260 | * NB events are events measuring L3 cache, Hypertransport |
| 261 | * traffic. They are identified by an event code >= 0xe00. |
| 262 | * They measure events on the NorthBride which is shared |
| 263 | * by all cores on a package. NB events are counted on a |
| 264 | * shared set of counters. When a NB event is programmed |
| 265 | * in a counter, the data actually comes from a shared |
| 266 | * counter. Thus, access to those counters needs to be |
| 267 | * synchronized. |
| 268 | * |
| 269 | * We implement the synchronization such that no two cores |
| 270 | * can be measuring NB events using the same counters. Thus, |
| 271 | * we maintain a per-NB allocation table. The available slot |
| 272 | * is propagated using the event_constraint structure. |
| 273 | * |
| 274 | * We provide only one choice for each NB event based on |
| 275 | * the fact that only NB events have restrictions. Consequently, |
| 276 | * if a counter is available, there is a guarantee the NB event |
| 277 | * will be assigned to it. If no slot is available, an empty |
| 278 | * constraint is returned and scheduling will eventually fail |
| 279 | * for this event. |
| 280 | * |
| 281 | * Note that all cores attached the same NB compete for the same |
| 282 | * counters to host NB events, this is why we use atomic ops. Some |
| 283 | * multi-chip CPUs may have more than one NB. |
| 284 | * |
| 285 | * Given that resources are allocated (cmpxchg), they must be |
| 286 | * eventually freed for others to use. This is accomplished by |
| 287 | * calling __amd_put_nb_event_constraints() |
| 288 | * |
| 289 | * Non NB events are not impacted by this restriction. |
| 290 | */ |
| 291 | static struct event_constraint * |
| 292 | __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, |
| 293 | struct event_constraint *c) |
| 294 | { |
| 295 | struct hw_perf_event *hwc = &event->hw; |
| 296 | struct amd_nb *nb = cpuc->amd_nb; |
| 297 | struct perf_event *old; |
| 298 | int idx, new = -1; |
| 299 | |
| 300 | if (!c) |
| 301 | c = &unconstrained; |
| 302 | |
| 303 | if (cpuc->is_fake) |
| 304 | return c; |
| 305 | |
| 306 | /* |
| 307 | * detect if already present, if so reuse |
| 308 | * |
| 309 | * cannot merge with actual allocation |
| 310 | * because of possible holes |
| 311 | * |
| 312 | * event can already be present yet not assigned (in hwc->idx) |
| 313 | * because of successive calls to x86_schedule_events() from |
| 314 | * hw_perf_group_sched_in() without hw_perf_enable() |
| 315 | */ |
| 316 | for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { |
| 317 | if (new == -1 || hwc->idx == idx) |
| 318 | /* assign free slot, prefer hwc->idx */ |
| 319 | old = cmpxchg(nb->owners + idx, NULL, event); |
| 320 | else if (nb->owners[idx] == event) |
| 321 | /* event already present */ |
| 322 | old = event; |
| 323 | else |
| 324 | continue; |
| 325 | |
| 326 | if (old && old != event) |
| 327 | continue; |
| 328 | |
| 329 | /* reassign to this slot */ |
| 330 | if (new != -1) |
| 331 | cmpxchg(nb->owners + new, event, NULL); |
| 332 | new = idx; |
| 333 | |
| 334 | /* already present, reuse */ |
| 335 | if (old == event) |
| 336 | break; |
| 337 | } |
| 338 | |
| 339 | if (new == -1) |
| 340 | return &emptyconstraint; |
| 341 | |
| 342 | return &nb->event_constraints[new]; |
| 343 | } |
| 344 | |
| 345 | static struct amd_nb *amd_alloc_nb(int cpu) |
| 346 | { |
| 347 | struct amd_nb *nb; |
| 348 | int i; |
| 349 | |
| 350 | nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu)); |
| 351 | if (!nb) |
| 352 | return NULL; |
| 353 | |
| 354 | nb->nb_id = -1; |
| 355 | |
| 356 | /* |
| 357 | * initialize all possible NB constraints |
| 358 | */ |
| 359 | for (i = 0; i < x86_pmu.num_counters; i++) { |
| 360 | __set_bit(i, nb->event_constraints[i].idxmsk); |
| 361 | nb->event_constraints[i].weight = 1; |
| 362 | } |
| 363 | return nb; |
| 364 | } |
| 365 | |
| 366 | static int amd_pmu_cpu_prepare(int cpu) |
| 367 | { |
| 368 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 369 | |
| 370 | WARN_ON_ONCE(cpuc->amd_nb); |
| 371 | |
| 372 | if (!x86_pmu.amd_nb_constraints) |
| 373 | return NOTIFY_OK; |
| 374 | |
| 375 | cpuc->amd_nb = amd_alloc_nb(cpu); |
| 376 | if (!cpuc->amd_nb) |
| 377 | return NOTIFY_BAD; |
| 378 | |
| 379 | return NOTIFY_OK; |
| 380 | } |
| 381 | |
| 382 | static void amd_pmu_cpu_starting(int cpu) |
| 383 | { |
| 384 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 385 | void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; |
| 386 | struct amd_nb *nb; |
| 387 | int i, nb_id; |
| 388 | |
| 389 | cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; |
| 390 | |
| 391 | if (!x86_pmu.amd_nb_constraints) |
| 392 | return; |
| 393 | |
| 394 | nb_id = amd_get_nb_id(cpu); |
| 395 | WARN_ON_ONCE(nb_id == BAD_APICID); |
| 396 | |
| 397 | for_each_online_cpu(i) { |
| 398 | nb = per_cpu(cpu_hw_events, i).amd_nb; |
| 399 | if (WARN_ON_ONCE(!nb)) |
| 400 | continue; |
| 401 | |
| 402 | if (nb->nb_id == nb_id) { |
| 403 | *onln = cpuc->amd_nb; |
| 404 | cpuc->amd_nb = nb; |
| 405 | break; |
| 406 | } |
| 407 | } |
| 408 | |
| 409 | cpuc->amd_nb->nb_id = nb_id; |
| 410 | cpuc->amd_nb->refcnt++; |
| 411 | } |
| 412 | |
| 413 | static void amd_pmu_cpu_dead(int cpu) |
| 414 | { |
| 415 | struct cpu_hw_events *cpuhw; |
| 416 | |
| 417 | if (!x86_pmu.amd_nb_constraints) |
| 418 | return; |
| 419 | |
| 420 | cpuhw = &per_cpu(cpu_hw_events, cpu); |
| 421 | |
| 422 | if (cpuhw->amd_nb) { |
| 423 | struct amd_nb *nb = cpuhw->amd_nb; |
| 424 | |
| 425 | if (nb->nb_id == -1 || --nb->refcnt == 0) |
| 426 | kfree(nb); |
| 427 | |
| 428 | cpuhw->amd_nb = NULL; |
| 429 | } |
| 430 | } |
| 431 | |
| 432 | static struct event_constraint * |
| 433 | amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
| 434 | struct perf_event *event) |
| 435 | { |
| 436 | /* |
| 437 | * if not NB event or no NB, then no constraints |
| 438 | */ |
| 439 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) |
| 440 | return &unconstrained; |
| 441 | |
| 442 | return __amd_get_nb_event_constraints(cpuc, event, NULL); |
| 443 | } |
| 444 | |
| 445 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, |
| 446 | struct perf_event *event) |
| 447 | { |
| 448 | if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) |
| 449 | __amd_put_nb_event_constraints(cpuc, event); |
| 450 | } |
| 451 | |
| 452 | PMU_FORMAT_ATTR(event, "config:0-7,32-35"); |
| 453 | PMU_FORMAT_ATTR(umask, "config:8-15" ); |
| 454 | PMU_FORMAT_ATTR(edge, "config:18" ); |
| 455 | PMU_FORMAT_ATTR(inv, "config:23" ); |
| 456 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); |
| 457 | |
| 458 | static struct attribute *amd_format_attr[] = { |
| 459 | &format_attr_event.attr, |
| 460 | &format_attr_umask.attr, |
| 461 | &format_attr_edge.attr, |
| 462 | &format_attr_inv.attr, |
| 463 | &format_attr_cmask.attr, |
| 464 | NULL, |
| 465 | }; |
| 466 | |
| 467 | /* AMD Family 15h */ |
| 468 | |
| 469 | #define AMD_EVENT_TYPE_MASK 0x000000F0ULL |
| 470 | |
| 471 | #define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL |
| 472 | #define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL |
| 473 | #define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL |
| 474 | #define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL |
| 475 | #define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL |
| 476 | #define AMD_EVENT_EX_LS 0x000000C0ULL |
| 477 | #define AMD_EVENT_DE 0x000000D0ULL |
| 478 | #define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL |
| 479 | |
| 480 | /* |
| 481 | * AMD family 15h event code/PMC mappings: |
| 482 | * |
| 483 | * type = event_code & 0x0F0: |
| 484 | * |
| 485 | * 0x000 FP PERF_CTL[5:3] |
| 486 | * 0x010 FP PERF_CTL[5:3] |
| 487 | * 0x020 LS PERF_CTL[5:0] |
| 488 | * 0x030 LS PERF_CTL[5:0] |
| 489 | * 0x040 DC PERF_CTL[5:0] |
| 490 | * 0x050 DC PERF_CTL[5:0] |
| 491 | * 0x060 CU PERF_CTL[2:0] |
| 492 | * 0x070 CU PERF_CTL[2:0] |
| 493 | * 0x080 IC/DE PERF_CTL[2:0] |
| 494 | * 0x090 IC/DE PERF_CTL[2:0] |
| 495 | * 0x0A0 --- |
| 496 | * 0x0B0 --- |
| 497 | * 0x0C0 EX/LS PERF_CTL[5:0] |
| 498 | * 0x0D0 DE PERF_CTL[2:0] |
| 499 | * 0x0E0 NB NB_PERF_CTL[3:0] |
| 500 | * 0x0F0 NB NB_PERF_CTL[3:0] |
| 501 | * |
| 502 | * Exceptions: |
| 503 | * |
| 504 | * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*) |
| 505 | * 0x003 FP PERF_CTL[3] |
| 506 | * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*) |
| 507 | * 0x00B FP PERF_CTL[3] |
| 508 | * 0x00D FP PERF_CTL[3] |
| 509 | * 0x023 DE PERF_CTL[2:0] |
| 510 | * 0x02D LS PERF_CTL[3] |
| 511 | * 0x02E LS PERF_CTL[3,0] |
| 512 | * 0x031 LS PERF_CTL[2:0] (**) |
| 513 | * 0x043 CU PERF_CTL[2:0] |
| 514 | * 0x045 CU PERF_CTL[2:0] |
| 515 | * 0x046 CU PERF_CTL[2:0] |
| 516 | * 0x054 CU PERF_CTL[2:0] |
| 517 | * 0x055 CU PERF_CTL[2:0] |
| 518 | * 0x08F IC PERF_CTL[0] |
| 519 | * 0x187 DE PERF_CTL[0] |
| 520 | * 0x188 DE PERF_CTL[0] |
| 521 | * 0x0DB EX PERF_CTL[5:0] |
| 522 | * 0x0DC LS PERF_CTL[5:0] |
| 523 | * 0x0DD LS PERF_CTL[5:0] |
| 524 | * 0x0DE LS PERF_CTL[5:0] |
| 525 | * 0x0DF LS PERF_CTL[5:0] |
| 526 | * 0x1C0 EX PERF_CTL[5:3] |
| 527 | * 0x1D6 EX PERF_CTL[5:0] |
| 528 | * 0x1D8 EX PERF_CTL[5:0] |
| 529 | * |
| 530 | * (*) depending on the umask all FPU counters may be used |
| 531 | * (**) only one unitmask enabled at a time |
| 532 | */ |
| 533 | |
| 534 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); |
| 535 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); |
| 536 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); |
| 537 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); |
| 538 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); |
| 539 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); |
| 540 | |
| 541 | static struct event_constraint * |
| 542 | amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, |
| 543 | struct perf_event *event) |
| 544 | { |
| 545 | struct hw_perf_event *hwc = &event->hw; |
| 546 | unsigned int event_code = amd_get_event_code(hwc); |
| 547 | |
| 548 | switch (event_code & AMD_EVENT_TYPE_MASK) { |
| 549 | case AMD_EVENT_FP: |
| 550 | switch (event_code) { |
| 551 | case 0x000: |
| 552 | if (!(hwc->config & 0x0000F000ULL)) |
| 553 | break; |
| 554 | if (!(hwc->config & 0x00000F00ULL)) |
| 555 | break; |
| 556 | return &amd_f15_PMC3; |
| 557 | case 0x004: |
| 558 | if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) |
| 559 | break; |
| 560 | return &amd_f15_PMC3; |
| 561 | case 0x003: |
| 562 | case 0x00B: |
| 563 | case 0x00D: |
| 564 | return &amd_f15_PMC3; |
| 565 | } |
| 566 | return &amd_f15_PMC53; |
| 567 | case AMD_EVENT_LS: |
| 568 | case AMD_EVENT_DC: |
| 569 | case AMD_EVENT_EX_LS: |
| 570 | switch (event_code) { |
| 571 | case 0x023: |
| 572 | case 0x043: |
| 573 | case 0x045: |
| 574 | case 0x046: |
| 575 | case 0x054: |
| 576 | case 0x055: |
| 577 | return &amd_f15_PMC20; |
| 578 | case 0x02D: |
| 579 | return &amd_f15_PMC3; |
| 580 | case 0x02E: |
| 581 | return &amd_f15_PMC30; |
| 582 | case 0x031: |
| 583 | if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) |
| 584 | return &amd_f15_PMC20; |
| 585 | return &emptyconstraint; |
| 586 | case 0x1C0: |
| 587 | return &amd_f15_PMC53; |
| 588 | default: |
| 589 | return &amd_f15_PMC50; |
| 590 | } |
| 591 | case AMD_EVENT_CU: |
| 592 | case AMD_EVENT_IC_DE: |
| 593 | case AMD_EVENT_DE: |
| 594 | switch (event_code) { |
| 595 | case 0x08F: |
| 596 | case 0x187: |
| 597 | case 0x188: |
| 598 | return &amd_f15_PMC0; |
| 599 | case 0x0DB ... 0x0DF: |
| 600 | case 0x1D6: |
| 601 | case 0x1D8: |
| 602 | return &amd_f15_PMC50; |
| 603 | default: |
| 604 | return &amd_f15_PMC20; |
| 605 | } |
| 606 | case AMD_EVENT_NB: |
| 607 | /* moved to perf_event_amd_uncore.c */ |
| 608 | return &emptyconstraint; |
| 609 | default: |
| 610 | return &emptyconstraint; |
| 611 | } |
| 612 | } |
| 613 | |
| 614 | static ssize_t amd_event_sysfs_show(char *page, u64 config) |
| 615 | { |
| 616 | u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | |
| 617 | (config & AMD64_EVENTSEL_EVENT) >> 24; |
| 618 | |
| 619 | return x86_event_sysfs_show(page, config, event); |
| 620 | } |
| 621 | |
| 622 | static __initconst const struct x86_pmu amd_pmu = { |
| 623 | .name = "AMD", |
| 624 | .handle_irq = x86_pmu_handle_irq, |
| 625 | .disable_all = x86_pmu_disable_all, |
| 626 | .enable_all = x86_pmu_enable_all, |
| 627 | .enable = x86_pmu_enable_event, |
| 628 | .disable = x86_pmu_disable_event, |
| 629 | .hw_config = amd_pmu_hw_config, |
| 630 | .schedule_events = x86_schedule_events, |
| 631 | .eventsel = MSR_K7_EVNTSEL0, |
| 632 | .perfctr = MSR_K7_PERFCTR0, |
| 633 | .addr_offset = amd_pmu_addr_offset, |
| 634 | .event_map = amd_pmu_event_map, |
| 635 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
| 636 | .num_counters = AMD64_NUM_COUNTERS, |
| 637 | .cntval_bits = 48, |
| 638 | .cntval_mask = (1ULL << 48) - 1, |
| 639 | .apic = 1, |
| 640 | /* use highest bit to detect overflow */ |
| 641 | .max_period = (1ULL << 47) - 1, |
| 642 | .get_event_constraints = amd_get_event_constraints, |
| 643 | .put_event_constraints = amd_put_event_constraints, |
| 644 | |
| 645 | .format_attrs = amd_format_attr, |
| 646 | .events_sysfs_show = amd_event_sysfs_show, |
| 647 | |
| 648 | .cpu_prepare = amd_pmu_cpu_prepare, |
| 649 | .cpu_starting = amd_pmu_cpu_starting, |
| 650 | .cpu_dead = amd_pmu_cpu_dead, |
| 651 | |
| 652 | .amd_nb_constraints = 1, |
| 653 | }; |
| 654 | |
| 655 | static int __init amd_core_pmu_init(void) |
| 656 | { |
| 657 | if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) |
| 658 | return 0; |
| 659 | |
| 660 | switch (boot_cpu_data.x86) { |
| 661 | case 0x15: |
| 662 | pr_cont("Fam15h "); |
| 663 | x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; |
| 664 | break; |
| 665 | |
| 666 | default: |
| 667 | pr_err("core perfctr but no constraints; unknown hardware!\n"); |
| 668 | return -ENODEV; |
| 669 | } |
| 670 | |
| 671 | /* |
| 672 | * If core performance counter extensions exists, we must use |
| 673 | * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also |
| 674 | * amd_pmu_addr_offset(). |
| 675 | */ |
| 676 | x86_pmu.eventsel = MSR_F15H_PERF_CTL; |
| 677 | x86_pmu.perfctr = MSR_F15H_PERF_CTR; |
| 678 | x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; |
| 679 | /* |
| 680 | * AMD Core perfctr has separate MSRs for the NB events, see |
| 681 | * the amd/uncore.c driver. |
| 682 | */ |
| 683 | x86_pmu.amd_nb_constraints = 0; |
| 684 | |
| 685 | pr_cont("core perfctr, "); |
| 686 | return 0; |
| 687 | } |
| 688 | |
| 689 | __init int amd_pmu_init(void) |
| 690 | { |
| 691 | int ret; |
| 692 | |
| 693 | /* Performance-monitoring supported from K7 and later: */ |
| 694 | if (boot_cpu_data.x86 < 6) |
| 695 | return -ENODEV; |
| 696 | |
| 697 | x86_pmu = amd_pmu; |
| 698 | |
| 699 | ret = amd_core_pmu_init(); |
| 700 | if (ret) |
| 701 | return ret; |
| 702 | |
| 703 | if (num_possible_cpus() == 1) { |
| 704 | /* |
| 705 | * No point in allocating data structures to serialize |
| 706 | * against other CPUs, when there is only the one CPU. |
| 707 | */ |
| 708 | x86_pmu.amd_nb_constraints = 0; |
| 709 | } |
| 710 | |
| 711 | /* Events are common for all AMDs */ |
| 712 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
| 713 | sizeof(hw_cache_event_ids)); |
| 714 | |
| 715 | return 0; |
| 716 | } |
| 717 | |
| 718 | void amd_pmu_enable_virt(void) |
| 719 | { |
| 720 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 721 | |
| 722 | cpuc->perf_ctr_virt_mask = 0; |
| 723 | |
| 724 | /* Reload all events */ |
| 725 | x86_pmu_disable_all(); |
| 726 | x86_pmu_enable_all(0); |
| 727 | } |
| 728 | EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); |
| 729 | |
| 730 | void amd_pmu_disable_virt(void) |
| 731 | { |
| 732 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 733 | |
| 734 | /* |
| 735 | * We only mask out the Host-only bit so that host-only counting works |
| 736 | * when SVM is disabled. If someone sets up a guest-only counter when |
| 737 | * SVM is disabled the Guest-only bits still gets set and the counter |
| 738 | * will not count anything. |
| 739 | */ |
| 740 | cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; |
| 741 | |
| 742 | /* Reload all events */ |
| 743 | x86_pmu_disable_all(); |
| 744 | x86_pmu_enable_all(0); |
| 745 | } |
| 746 | EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); |