| 1 | /* |
| 2 | * Per core/cpu state |
| 3 | * |
| 4 | * Used to coordinate shared registers between HT threads or |
| 5 | * among events on a single PMU. |
| 6 | */ |
| 7 | |
| 8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
| 9 | |
| 10 | #include <linux/stddef.h> |
| 11 | #include <linux/types.h> |
| 12 | #include <linux/init.h> |
| 13 | #include <linux/slab.h> |
| 14 | #include <linux/export.h> |
| 15 | #include <linux/nmi.h> |
| 16 | |
| 17 | #include <asm/cpufeature.h> |
| 18 | #include <asm/hardirq.h> |
| 19 | #include <asm/apic.h> |
| 20 | |
| 21 | #include "../perf_event.h" |
| 22 | |
| 23 | /* |
| 24 | * Intel PerfMon, used on Core and later. |
| 25 | */ |
| 26 | static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = |
| 27 | { |
| 28 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, |
| 29 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, |
| 30 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, |
| 31 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, |
| 32 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, |
| 33 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
| 34 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
| 35 | [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ |
| 36 | }; |
| 37 | |
| 38 | static struct event_constraint intel_core_event_constraints[] __read_mostly = |
| 39 | { |
| 40 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
| 41 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
| 42 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ |
| 43 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ |
| 44 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ |
| 45 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ |
| 46 | EVENT_CONSTRAINT_END |
| 47 | }; |
| 48 | |
| 49 | static struct event_constraint intel_core2_event_constraints[] __read_mostly = |
| 50 | { |
| 51 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 52 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 53 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 54 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ |
| 55 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
| 56 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
| 57 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ |
| 58 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ |
| 59 | INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ |
| 60 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ |
| 61 | INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ |
| 62 | INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ |
| 63 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ |
| 64 | EVENT_CONSTRAINT_END |
| 65 | }; |
| 66 | |
| 67 | static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = |
| 68 | { |
| 69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 70 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 71 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 72 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ |
| 73 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ |
| 74 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ |
| 75 | INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ |
| 76 | INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ |
| 77 | INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ |
| 78 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ |
| 79 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ |
| 80 | EVENT_CONSTRAINT_END |
| 81 | }; |
| 82 | |
| 83 | static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = |
| 84 | { |
| 85 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
| 86 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
| 87 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), |
| 88 | EVENT_EXTRA_END |
| 89 | }; |
| 90 | |
| 91 | static struct event_constraint intel_westmere_event_constraints[] __read_mostly = |
| 92 | { |
| 93 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 94 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 95 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 96 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ |
| 97 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ |
| 98 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ |
| 99 | INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */ |
| 100 | EVENT_CONSTRAINT_END |
| 101 | }; |
| 102 | |
| 103 | static struct event_constraint intel_snb_event_constraints[] __read_mostly = |
| 104 | { |
| 105 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 106 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 107 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 108 | INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ |
| 109 | INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ |
| 110 | INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ |
| 111 | INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ |
| 112 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ |
| 113 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
| 114 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
| 115 | INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ |
| 116 | INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ |
| 117 | |
| 118 | INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ |
| 119 | INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
| 120 | INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
| 121 | INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ |
| 122 | |
| 123 | EVENT_CONSTRAINT_END |
| 124 | }; |
| 125 | |
| 126 | static struct event_constraint intel_ivb_event_constraints[] __read_mostly = |
| 127 | { |
| 128 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 129 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 130 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 131 | INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ |
| 132 | INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */ |
| 133 | INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ |
| 134 | INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */ |
| 135 | INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ |
| 136 | INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ |
| 137 | INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */ |
| 138 | INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ |
| 139 | INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ |
| 140 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
| 141 | |
| 142 | INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ |
| 143 | INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
| 144 | INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
| 145 | INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ |
| 146 | |
| 147 | EVENT_CONSTRAINT_END |
| 148 | }; |
| 149 | |
| 150 | static struct extra_reg intel_westmere_extra_regs[] __read_mostly = |
| 151 | { |
| 152 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
| 153 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), |
| 154 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1), |
| 155 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b), |
| 156 | EVENT_EXTRA_END |
| 157 | }; |
| 158 | |
| 159 | static struct event_constraint intel_v1_event_constraints[] __read_mostly = |
| 160 | { |
| 161 | EVENT_CONSTRAINT_END |
| 162 | }; |
| 163 | |
| 164 | static struct event_constraint intel_gen_event_constraints[] __read_mostly = |
| 165 | { |
| 166 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 167 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 168 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 169 | EVENT_CONSTRAINT_END |
| 170 | }; |
| 171 | |
| 172 | static struct event_constraint intel_slm_event_constraints[] __read_mostly = |
| 173 | { |
| 174 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 175 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 176 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ |
| 177 | EVENT_CONSTRAINT_END |
| 178 | }; |
| 179 | |
| 180 | struct event_constraint intel_skl_event_constraints[] = { |
| 181 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 182 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 183 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 184 | INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
| 185 | EVENT_CONSTRAINT_END |
| 186 | }; |
| 187 | |
| 188 | static struct extra_reg intel_knl_extra_regs[] __read_mostly = { |
| 189 | INTEL_UEVENT_EXTRA_REG(0x01b7, |
| 190 | MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0), |
| 191 | INTEL_UEVENT_EXTRA_REG(0x02b7, |
| 192 | MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1), |
| 193 | EVENT_EXTRA_END |
| 194 | }; |
| 195 | |
| 196 | static struct extra_reg intel_snb_extra_regs[] __read_mostly = { |
| 197 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
| 198 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0), |
| 199 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1), |
| 200 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), |
| 201 | EVENT_EXTRA_END |
| 202 | }; |
| 203 | |
| 204 | static struct extra_reg intel_snbep_extra_regs[] __read_mostly = { |
| 205 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
| 206 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), |
| 207 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), |
| 208 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), |
| 209 | EVENT_EXTRA_END |
| 210 | }; |
| 211 | |
| 212 | static struct extra_reg intel_skl_extra_regs[] __read_mostly = { |
| 213 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), |
| 214 | INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), |
| 215 | INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), |
| 216 | /* |
| 217 | * Note the low 8 bits eventsel code is not a continuous field, containing |
| 218 | * some #GPing bits. These are masked out. |
| 219 | */ |
| 220 | INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), |
| 221 | EVENT_EXTRA_END |
| 222 | }; |
| 223 | |
| 224 | EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); |
| 225 | EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); |
| 226 | EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); |
| 227 | |
| 228 | struct attribute *nhm_events_attrs[] = { |
| 229 | EVENT_PTR(mem_ld_nhm), |
| 230 | NULL, |
| 231 | }; |
| 232 | |
| 233 | struct attribute *snb_events_attrs[] = { |
| 234 | EVENT_PTR(mem_ld_snb), |
| 235 | EVENT_PTR(mem_st_snb), |
| 236 | NULL, |
| 237 | }; |
| 238 | |
| 239 | static struct event_constraint intel_hsw_event_constraints[] = { |
| 240 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 241 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 242 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 243 | INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ |
| 244 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
| 245 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
| 246 | /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ |
| 247 | INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), |
| 248 | /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ |
| 249 | INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), |
| 250 | /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ |
| 251 | INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), |
| 252 | |
| 253 | INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ |
| 254 | INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ |
| 255 | INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ |
| 256 | INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ |
| 257 | |
| 258 | EVENT_CONSTRAINT_END |
| 259 | }; |
| 260 | |
| 261 | struct event_constraint intel_bdw_event_constraints[] = { |
| 262 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
| 263 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
| 264 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
| 265 | INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ |
| 266 | INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ |
| 267 | EVENT_CONSTRAINT_END |
| 268 | }; |
| 269 | |
| 270 | static u64 intel_pmu_event_map(int hw_event) |
| 271 | { |
| 272 | return intel_perfmon_event_map[hw_event]; |
| 273 | } |
| 274 | |
| 275 | /* |
| 276 | * Notes on the events: |
| 277 | * - data reads do not include code reads (comparable to earlier tables) |
| 278 | * - data counts include speculative execution (except L1 write, dtlb, bpu) |
| 279 | * - remote node access includes remote memory, remote cache, remote mmio. |
| 280 | * - prefetches are not included in the counts. |
| 281 | * - icache miss does not include decoded icache |
| 282 | */ |
| 283 | |
| 284 | #define SKL_DEMAND_DATA_RD BIT_ULL(0) |
| 285 | #define SKL_DEMAND_RFO BIT_ULL(1) |
| 286 | #define SKL_ANY_RESPONSE BIT_ULL(16) |
| 287 | #define SKL_SUPPLIER_NONE BIT_ULL(17) |
| 288 | #define SKL_L3_MISS_LOCAL_DRAM BIT_ULL(26) |
| 289 | #define SKL_L3_MISS_REMOTE_HOP0_DRAM BIT_ULL(27) |
| 290 | #define SKL_L3_MISS_REMOTE_HOP1_DRAM BIT_ULL(28) |
| 291 | #define SKL_L3_MISS_REMOTE_HOP2P_DRAM BIT_ULL(29) |
| 292 | #define SKL_L3_MISS (SKL_L3_MISS_LOCAL_DRAM| \ |
| 293 | SKL_L3_MISS_REMOTE_HOP0_DRAM| \ |
| 294 | SKL_L3_MISS_REMOTE_HOP1_DRAM| \ |
| 295 | SKL_L3_MISS_REMOTE_HOP2P_DRAM) |
| 296 | #define SKL_SPL_HIT BIT_ULL(30) |
| 297 | #define SKL_SNOOP_NONE BIT_ULL(31) |
| 298 | #define SKL_SNOOP_NOT_NEEDED BIT_ULL(32) |
| 299 | #define SKL_SNOOP_MISS BIT_ULL(33) |
| 300 | #define SKL_SNOOP_HIT_NO_FWD BIT_ULL(34) |
| 301 | #define SKL_SNOOP_HIT_WITH_FWD BIT_ULL(35) |
| 302 | #define SKL_SNOOP_HITM BIT_ULL(36) |
| 303 | #define SKL_SNOOP_NON_DRAM BIT_ULL(37) |
| 304 | #define SKL_ANY_SNOOP (SKL_SPL_HIT|SKL_SNOOP_NONE| \ |
| 305 | SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \ |
| 306 | SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \ |
| 307 | SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM) |
| 308 | #define SKL_DEMAND_READ SKL_DEMAND_DATA_RD |
| 309 | #define SKL_SNOOP_DRAM (SKL_SNOOP_NONE| \ |
| 310 | SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \ |
| 311 | SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \ |
| 312 | SKL_SNOOP_HITM|SKL_SPL_HIT) |
| 313 | #define SKL_DEMAND_WRITE SKL_DEMAND_RFO |
| 314 | #define SKL_LLC_ACCESS SKL_ANY_RESPONSE |
| 315 | #define SKL_L3_MISS_REMOTE (SKL_L3_MISS_REMOTE_HOP0_DRAM| \ |
| 316 | SKL_L3_MISS_REMOTE_HOP1_DRAM| \ |
| 317 | SKL_L3_MISS_REMOTE_HOP2P_DRAM) |
| 318 | |
| 319 | static __initconst const u64 skl_hw_cache_event_ids |
| 320 | [PERF_COUNT_HW_CACHE_MAX] |
| 321 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 322 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 323 | { |
| 324 | [ C(L1D ) ] = { |
| 325 | [ C(OP_READ) ] = { |
| 326 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ |
| 327 | [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ |
| 328 | }, |
| 329 | [ C(OP_WRITE) ] = { |
| 330 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ |
| 331 | [ C(RESULT_MISS) ] = 0x0, |
| 332 | }, |
| 333 | [ C(OP_PREFETCH) ] = { |
| 334 | [ C(RESULT_ACCESS) ] = 0x0, |
| 335 | [ C(RESULT_MISS) ] = 0x0, |
| 336 | }, |
| 337 | }, |
| 338 | [ C(L1I ) ] = { |
| 339 | [ C(OP_READ) ] = { |
| 340 | [ C(RESULT_ACCESS) ] = 0x0, |
| 341 | [ C(RESULT_MISS) ] = 0x283, /* ICACHE_64B.MISS */ |
| 342 | }, |
| 343 | [ C(OP_WRITE) ] = { |
| 344 | [ C(RESULT_ACCESS) ] = -1, |
| 345 | [ C(RESULT_MISS) ] = -1, |
| 346 | }, |
| 347 | [ C(OP_PREFETCH) ] = { |
| 348 | [ C(RESULT_ACCESS) ] = 0x0, |
| 349 | [ C(RESULT_MISS) ] = 0x0, |
| 350 | }, |
| 351 | }, |
| 352 | [ C(LL ) ] = { |
| 353 | [ C(OP_READ) ] = { |
| 354 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 355 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 356 | }, |
| 357 | [ C(OP_WRITE) ] = { |
| 358 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 359 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 360 | }, |
| 361 | [ C(OP_PREFETCH) ] = { |
| 362 | [ C(RESULT_ACCESS) ] = 0x0, |
| 363 | [ C(RESULT_MISS) ] = 0x0, |
| 364 | }, |
| 365 | }, |
| 366 | [ C(DTLB) ] = { |
| 367 | [ C(OP_READ) ] = { |
| 368 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ |
| 369 | [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ |
| 370 | }, |
| 371 | [ C(OP_WRITE) ] = { |
| 372 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ |
| 373 | [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */ |
| 374 | }, |
| 375 | [ C(OP_PREFETCH) ] = { |
| 376 | [ C(RESULT_ACCESS) ] = 0x0, |
| 377 | [ C(RESULT_MISS) ] = 0x0, |
| 378 | }, |
| 379 | }, |
| 380 | [ C(ITLB) ] = { |
| 381 | [ C(OP_READ) ] = { |
| 382 | [ C(RESULT_ACCESS) ] = 0x2085, /* ITLB_MISSES.STLB_HIT */ |
| 383 | [ C(RESULT_MISS) ] = 0xe85, /* ITLB_MISSES.WALK_COMPLETED */ |
| 384 | }, |
| 385 | [ C(OP_WRITE) ] = { |
| 386 | [ C(RESULT_ACCESS) ] = -1, |
| 387 | [ C(RESULT_MISS) ] = -1, |
| 388 | }, |
| 389 | [ C(OP_PREFETCH) ] = { |
| 390 | [ C(RESULT_ACCESS) ] = -1, |
| 391 | [ C(RESULT_MISS) ] = -1, |
| 392 | }, |
| 393 | }, |
| 394 | [ C(BPU ) ] = { |
| 395 | [ C(OP_READ) ] = { |
| 396 | [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ |
| 397 | [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ |
| 398 | }, |
| 399 | [ C(OP_WRITE) ] = { |
| 400 | [ C(RESULT_ACCESS) ] = -1, |
| 401 | [ C(RESULT_MISS) ] = -1, |
| 402 | }, |
| 403 | [ C(OP_PREFETCH) ] = { |
| 404 | [ C(RESULT_ACCESS) ] = -1, |
| 405 | [ C(RESULT_MISS) ] = -1, |
| 406 | }, |
| 407 | }, |
| 408 | [ C(NODE) ] = { |
| 409 | [ C(OP_READ) ] = { |
| 410 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 411 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 412 | }, |
| 413 | [ C(OP_WRITE) ] = { |
| 414 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 415 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 416 | }, |
| 417 | [ C(OP_PREFETCH) ] = { |
| 418 | [ C(RESULT_ACCESS) ] = 0x0, |
| 419 | [ C(RESULT_MISS) ] = 0x0, |
| 420 | }, |
| 421 | }, |
| 422 | }; |
| 423 | |
| 424 | static __initconst const u64 skl_hw_cache_extra_regs |
| 425 | [PERF_COUNT_HW_CACHE_MAX] |
| 426 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 427 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 428 | { |
| 429 | [ C(LL ) ] = { |
| 430 | [ C(OP_READ) ] = { |
| 431 | [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ| |
| 432 | SKL_LLC_ACCESS|SKL_ANY_SNOOP, |
| 433 | [ C(RESULT_MISS) ] = SKL_DEMAND_READ| |
| 434 | SKL_L3_MISS|SKL_ANY_SNOOP| |
| 435 | SKL_SUPPLIER_NONE, |
| 436 | }, |
| 437 | [ C(OP_WRITE) ] = { |
| 438 | [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE| |
| 439 | SKL_LLC_ACCESS|SKL_ANY_SNOOP, |
| 440 | [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE| |
| 441 | SKL_L3_MISS|SKL_ANY_SNOOP| |
| 442 | SKL_SUPPLIER_NONE, |
| 443 | }, |
| 444 | [ C(OP_PREFETCH) ] = { |
| 445 | [ C(RESULT_ACCESS) ] = 0x0, |
| 446 | [ C(RESULT_MISS) ] = 0x0, |
| 447 | }, |
| 448 | }, |
| 449 | [ C(NODE) ] = { |
| 450 | [ C(OP_READ) ] = { |
| 451 | [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ| |
| 452 | SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM, |
| 453 | [ C(RESULT_MISS) ] = SKL_DEMAND_READ| |
| 454 | SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM, |
| 455 | }, |
| 456 | [ C(OP_WRITE) ] = { |
| 457 | [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE| |
| 458 | SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM, |
| 459 | [ C(RESULT_MISS) ] = SKL_DEMAND_WRITE| |
| 460 | SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM, |
| 461 | }, |
| 462 | [ C(OP_PREFETCH) ] = { |
| 463 | [ C(RESULT_ACCESS) ] = 0x0, |
| 464 | [ C(RESULT_MISS) ] = 0x0, |
| 465 | }, |
| 466 | }, |
| 467 | }; |
| 468 | |
| 469 | #define SNB_DMND_DATA_RD (1ULL << 0) |
| 470 | #define SNB_DMND_RFO (1ULL << 1) |
| 471 | #define SNB_DMND_IFETCH (1ULL << 2) |
| 472 | #define SNB_DMND_WB (1ULL << 3) |
| 473 | #define SNB_PF_DATA_RD (1ULL << 4) |
| 474 | #define SNB_PF_RFO (1ULL << 5) |
| 475 | #define SNB_PF_IFETCH (1ULL << 6) |
| 476 | #define SNB_LLC_DATA_RD (1ULL << 7) |
| 477 | #define SNB_LLC_RFO (1ULL << 8) |
| 478 | #define SNB_LLC_IFETCH (1ULL << 9) |
| 479 | #define SNB_BUS_LOCKS (1ULL << 10) |
| 480 | #define SNB_STRM_ST (1ULL << 11) |
| 481 | #define SNB_OTHER (1ULL << 15) |
| 482 | #define SNB_RESP_ANY (1ULL << 16) |
| 483 | #define SNB_NO_SUPP (1ULL << 17) |
| 484 | #define SNB_LLC_HITM (1ULL << 18) |
| 485 | #define SNB_LLC_HITE (1ULL << 19) |
| 486 | #define SNB_LLC_HITS (1ULL << 20) |
| 487 | #define SNB_LLC_HITF (1ULL << 21) |
| 488 | #define SNB_LOCAL (1ULL << 22) |
| 489 | #define SNB_REMOTE (0xffULL << 23) |
| 490 | #define SNB_SNP_NONE (1ULL << 31) |
| 491 | #define SNB_SNP_NOT_NEEDED (1ULL << 32) |
| 492 | #define SNB_SNP_MISS (1ULL << 33) |
| 493 | #define SNB_NO_FWD (1ULL << 34) |
| 494 | #define SNB_SNP_FWD (1ULL << 35) |
| 495 | #define SNB_HITM (1ULL << 36) |
| 496 | #define SNB_NON_DRAM (1ULL << 37) |
| 497 | |
| 498 | #define SNB_DMND_READ (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD) |
| 499 | #define SNB_DMND_WRITE (SNB_DMND_RFO|SNB_LLC_RFO) |
| 500 | #define SNB_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) |
| 501 | |
| 502 | #define SNB_SNP_ANY (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \ |
| 503 | SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \ |
| 504 | SNB_HITM) |
| 505 | |
| 506 | #define SNB_DRAM_ANY (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY) |
| 507 | #define SNB_DRAM_REMOTE (SNB_REMOTE|SNB_SNP_ANY) |
| 508 | |
| 509 | #define SNB_L3_ACCESS SNB_RESP_ANY |
| 510 | #define SNB_L3_MISS (SNB_DRAM_ANY|SNB_NON_DRAM) |
| 511 | |
| 512 | static __initconst const u64 snb_hw_cache_extra_regs |
| 513 | [PERF_COUNT_HW_CACHE_MAX] |
| 514 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 515 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 516 | { |
| 517 | [ C(LL ) ] = { |
| 518 | [ C(OP_READ) ] = { |
| 519 | [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS, |
| 520 | [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_L3_MISS, |
| 521 | }, |
| 522 | [ C(OP_WRITE) ] = { |
| 523 | [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS, |
| 524 | [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_L3_MISS, |
| 525 | }, |
| 526 | [ C(OP_PREFETCH) ] = { |
| 527 | [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS, |
| 528 | [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_L3_MISS, |
| 529 | }, |
| 530 | }, |
| 531 | [ C(NODE) ] = { |
| 532 | [ C(OP_READ) ] = { |
| 533 | [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY, |
| 534 | [ C(RESULT_MISS) ] = SNB_DMND_READ|SNB_DRAM_REMOTE, |
| 535 | }, |
| 536 | [ C(OP_WRITE) ] = { |
| 537 | [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY, |
| 538 | [ C(RESULT_MISS) ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE, |
| 539 | }, |
| 540 | [ C(OP_PREFETCH) ] = { |
| 541 | [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY, |
| 542 | [ C(RESULT_MISS) ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE, |
| 543 | }, |
| 544 | }, |
| 545 | }; |
| 546 | |
| 547 | static __initconst const u64 snb_hw_cache_event_ids |
| 548 | [PERF_COUNT_HW_CACHE_MAX] |
| 549 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 550 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 551 | { |
| 552 | [ C(L1D) ] = { |
| 553 | [ C(OP_READ) ] = { |
| 554 | [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ |
| 555 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ |
| 556 | }, |
| 557 | [ C(OP_WRITE) ] = { |
| 558 | [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ |
| 559 | [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ |
| 560 | }, |
| 561 | [ C(OP_PREFETCH) ] = { |
| 562 | [ C(RESULT_ACCESS) ] = 0x0, |
| 563 | [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ |
| 564 | }, |
| 565 | }, |
| 566 | [ C(L1I ) ] = { |
| 567 | [ C(OP_READ) ] = { |
| 568 | [ C(RESULT_ACCESS) ] = 0x0, |
| 569 | [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ |
| 570 | }, |
| 571 | [ C(OP_WRITE) ] = { |
| 572 | [ C(RESULT_ACCESS) ] = -1, |
| 573 | [ C(RESULT_MISS) ] = -1, |
| 574 | }, |
| 575 | [ C(OP_PREFETCH) ] = { |
| 576 | [ C(RESULT_ACCESS) ] = 0x0, |
| 577 | [ C(RESULT_MISS) ] = 0x0, |
| 578 | }, |
| 579 | }, |
| 580 | [ C(LL ) ] = { |
| 581 | [ C(OP_READ) ] = { |
| 582 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
| 583 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 584 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ |
| 585 | [ C(RESULT_MISS) ] = 0x01b7, |
| 586 | }, |
| 587 | [ C(OP_WRITE) ] = { |
| 588 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
| 589 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 590 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
| 591 | [ C(RESULT_MISS) ] = 0x01b7, |
| 592 | }, |
| 593 | [ C(OP_PREFETCH) ] = { |
| 594 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
| 595 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 596 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
| 597 | [ C(RESULT_MISS) ] = 0x01b7, |
| 598 | }, |
| 599 | }, |
| 600 | [ C(DTLB) ] = { |
| 601 | [ C(OP_READ) ] = { |
| 602 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ |
| 603 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ |
| 604 | }, |
| 605 | [ C(OP_WRITE) ] = { |
| 606 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ |
| 607 | [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ |
| 608 | }, |
| 609 | [ C(OP_PREFETCH) ] = { |
| 610 | [ C(RESULT_ACCESS) ] = 0x0, |
| 611 | [ C(RESULT_MISS) ] = 0x0, |
| 612 | }, |
| 613 | }, |
| 614 | [ C(ITLB) ] = { |
| 615 | [ C(OP_READ) ] = { |
| 616 | [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ |
| 617 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ |
| 618 | }, |
| 619 | [ C(OP_WRITE) ] = { |
| 620 | [ C(RESULT_ACCESS) ] = -1, |
| 621 | [ C(RESULT_MISS) ] = -1, |
| 622 | }, |
| 623 | [ C(OP_PREFETCH) ] = { |
| 624 | [ C(RESULT_ACCESS) ] = -1, |
| 625 | [ C(RESULT_MISS) ] = -1, |
| 626 | }, |
| 627 | }, |
| 628 | [ C(BPU ) ] = { |
| 629 | [ C(OP_READ) ] = { |
| 630 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ |
| 631 | [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ |
| 632 | }, |
| 633 | [ C(OP_WRITE) ] = { |
| 634 | [ C(RESULT_ACCESS) ] = -1, |
| 635 | [ C(RESULT_MISS) ] = -1, |
| 636 | }, |
| 637 | [ C(OP_PREFETCH) ] = { |
| 638 | [ C(RESULT_ACCESS) ] = -1, |
| 639 | [ C(RESULT_MISS) ] = -1, |
| 640 | }, |
| 641 | }, |
| 642 | [ C(NODE) ] = { |
| 643 | [ C(OP_READ) ] = { |
| 644 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 645 | [ C(RESULT_MISS) ] = 0x01b7, |
| 646 | }, |
| 647 | [ C(OP_WRITE) ] = { |
| 648 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 649 | [ C(RESULT_MISS) ] = 0x01b7, |
| 650 | }, |
| 651 | [ C(OP_PREFETCH) ] = { |
| 652 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 653 | [ C(RESULT_MISS) ] = 0x01b7, |
| 654 | }, |
| 655 | }, |
| 656 | |
| 657 | }; |
| 658 | |
| 659 | /* |
| 660 | * Notes on the events: |
| 661 | * - data reads do not include code reads (comparable to earlier tables) |
| 662 | * - data counts include speculative execution (except L1 write, dtlb, bpu) |
| 663 | * - remote node access includes remote memory, remote cache, remote mmio. |
| 664 | * - prefetches are not included in the counts because they are not |
| 665 | * reliably counted. |
| 666 | */ |
| 667 | |
| 668 | #define HSW_DEMAND_DATA_RD BIT_ULL(0) |
| 669 | #define HSW_DEMAND_RFO BIT_ULL(1) |
| 670 | #define HSW_ANY_RESPONSE BIT_ULL(16) |
| 671 | #define HSW_SUPPLIER_NONE BIT_ULL(17) |
| 672 | #define HSW_L3_MISS_LOCAL_DRAM BIT_ULL(22) |
| 673 | #define HSW_L3_MISS_REMOTE_HOP0 BIT_ULL(27) |
| 674 | #define HSW_L3_MISS_REMOTE_HOP1 BIT_ULL(28) |
| 675 | #define HSW_L3_MISS_REMOTE_HOP2P BIT_ULL(29) |
| 676 | #define HSW_L3_MISS (HSW_L3_MISS_LOCAL_DRAM| \ |
| 677 | HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ |
| 678 | HSW_L3_MISS_REMOTE_HOP2P) |
| 679 | #define HSW_SNOOP_NONE BIT_ULL(31) |
| 680 | #define HSW_SNOOP_NOT_NEEDED BIT_ULL(32) |
| 681 | #define HSW_SNOOP_MISS BIT_ULL(33) |
| 682 | #define HSW_SNOOP_HIT_NO_FWD BIT_ULL(34) |
| 683 | #define HSW_SNOOP_HIT_WITH_FWD BIT_ULL(35) |
| 684 | #define HSW_SNOOP_HITM BIT_ULL(36) |
| 685 | #define HSW_SNOOP_NON_DRAM BIT_ULL(37) |
| 686 | #define HSW_ANY_SNOOP (HSW_SNOOP_NONE| \ |
| 687 | HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \ |
| 688 | HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \ |
| 689 | HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM) |
| 690 | #define HSW_SNOOP_DRAM (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM) |
| 691 | #define HSW_DEMAND_READ HSW_DEMAND_DATA_RD |
| 692 | #define HSW_DEMAND_WRITE HSW_DEMAND_RFO |
| 693 | #define HSW_L3_MISS_REMOTE (HSW_L3_MISS_REMOTE_HOP0|\ |
| 694 | HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P) |
| 695 | #define HSW_LLC_ACCESS HSW_ANY_RESPONSE |
| 696 | |
| 697 | #define BDW_L3_MISS_LOCAL BIT(26) |
| 698 | #define BDW_L3_MISS (BDW_L3_MISS_LOCAL| \ |
| 699 | HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \ |
| 700 | HSW_L3_MISS_REMOTE_HOP2P) |
| 701 | |
| 702 | |
| 703 | static __initconst const u64 hsw_hw_cache_event_ids |
| 704 | [PERF_COUNT_HW_CACHE_MAX] |
| 705 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 706 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 707 | { |
| 708 | [ C(L1D ) ] = { |
| 709 | [ C(OP_READ) ] = { |
| 710 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ |
| 711 | [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ |
| 712 | }, |
| 713 | [ C(OP_WRITE) ] = { |
| 714 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ |
| 715 | [ C(RESULT_MISS) ] = 0x0, |
| 716 | }, |
| 717 | [ C(OP_PREFETCH) ] = { |
| 718 | [ C(RESULT_ACCESS) ] = 0x0, |
| 719 | [ C(RESULT_MISS) ] = 0x0, |
| 720 | }, |
| 721 | }, |
| 722 | [ C(L1I ) ] = { |
| 723 | [ C(OP_READ) ] = { |
| 724 | [ C(RESULT_ACCESS) ] = 0x0, |
| 725 | [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */ |
| 726 | }, |
| 727 | [ C(OP_WRITE) ] = { |
| 728 | [ C(RESULT_ACCESS) ] = -1, |
| 729 | [ C(RESULT_MISS) ] = -1, |
| 730 | }, |
| 731 | [ C(OP_PREFETCH) ] = { |
| 732 | [ C(RESULT_ACCESS) ] = 0x0, |
| 733 | [ C(RESULT_MISS) ] = 0x0, |
| 734 | }, |
| 735 | }, |
| 736 | [ C(LL ) ] = { |
| 737 | [ C(OP_READ) ] = { |
| 738 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 739 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 740 | }, |
| 741 | [ C(OP_WRITE) ] = { |
| 742 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 743 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 744 | }, |
| 745 | [ C(OP_PREFETCH) ] = { |
| 746 | [ C(RESULT_ACCESS) ] = 0x0, |
| 747 | [ C(RESULT_MISS) ] = 0x0, |
| 748 | }, |
| 749 | }, |
| 750 | [ C(DTLB) ] = { |
| 751 | [ C(OP_READ) ] = { |
| 752 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ |
| 753 | [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */ |
| 754 | }, |
| 755 | [ C(OP_WRITE) ] = { |
| 756 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ |
| 757 | [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ |
| 758 | }, |
| 759 | [ C(OP_PREFETCH) ] = { |
| 760 | [ C(RESULT_ACCESS) ] = 0x0, |
| 761 | [ C(RESULT_MISS) ] = 0x0, |
| 762 | }, |
| 763 | }, |
| 764 | [ C(ITLB) ] = { |
| 765 | [ C(OP_READ) ] = { |
| 766 | [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */ |
| 767 | [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */ |
| 768 | }, |
| 769 | [ C(OP_WRITE) ] = { |
| 770 | [ C(RESULT_ACCESS) ] = -1, |
| 771 | [ C(RESULT_MISS) ] = -1, |
| 772 | }, |
| 773 | [ C(OP_PREFETCH) ] = { |
| 774 | [ C(RESULT_ACCESS) ] = -1, |
| 775 | [ C(RESULT_MISS) ] = -1, |
| 776 | }, |
| 777 | }, |
| 778 | [ C(BPU ) ] = { |
| 779 | [ C(OP_READ) ] = { |
| 780 | [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ |
| 781 | [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ |
| 782 | }, |
| 783 | [ C(OP_WRITE) ] = { |
| 784 | [ C(RESULT_ACCESS) ] = -1, |
| 785 | [ C(RESULT_MISS) ] = -1, |
| 786 | }, |
| 787 | [ C(OP_PREFETCH) ] = { |
| 788 | [ C(RESULT_ACCESS) ] = -1, |
| 789 | [ C(RESULT_MISS) ] = -1, |
| 790 | }, |
| 791 | }, |
| 792 | [ C(NODE) ] = { |
| 793 | [ C(OP_READ) ] = { |
| 794 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 795 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 796 | }, |
| 797 | [ C(OP_WRITE) ] = { |
| 798 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 799 | [ C(RESULT_MISS) ] = 0x1b7, /* OFFCORE_RESPONSE */ |
| 800 | }, |
| 801 | [ C(OP_PREFETCH) ] = { |
| 802 | [ C(RESULT_ACCESS) ] = 0x0, |
| 803 | [ C(RESULT_MISS) ] = 0x0, |
| 804 | }, |
| 805 | }, |
| 806 | }; |
| 807 | |
| 808 | static __initconst const u64 hsw_hw_cache_extra_regs |
| 809 | [PERF_COUNT_HW_CACHE_MAX] |
| 810 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 811 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 812 | { |
| 813 | [ C(LL ) ] = { |
| 814 | [ C(OP_READ) ] = { |
| 815 | [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| |
| 816 | HSW_LLC_ACCESS, |
| 817 | [ C(RESULT_MISS) ] = HSW_DEMAND_READ| |
| 818 | HSW_L3_MISS|HSW_ANY_SNOOP, |
| 819 | }, |
| 820 | [ C(OP_WRITE) ] = { |
| 821 | [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| |
| 822 | HSW_LLC_ACCESS, |
| 823 | [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| |
| 824 | HSW_L3_MISS|HSW_ANY_SNOOP, |
| 825 | }, |
| 826 | [ C(OP_PREFETCH) ] = { |
| 827 | [ C(RESULT_ACCESS) ] = 0x0, |
| 828 | [ C(RESULT_MISS) ] = 0x0, |
| 829 | }, |
| 830 | }, |
| 831 | [ C(NODE) ] = { |
| 832 | [ C(OP_READ) ] = { |
| 833 | [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ| |
| 834 | HSW_L3_MISS_LOCAL_DRAM| |
| 835 | HSW_SNOOP_DRAM, |
| 836 | [ C(RESULT_MISS) ] = HSW_DEMAND_READ| |
| 837 | HSW_L3_MISS_REMOTE| |
| 838 | HSW_SNOOP_DRAM, |
| 839 | }, |
| 840 | [ C(OP_WRITE) ] = { |
| 841 | [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE| |
| 842 | HSW_L3_MISS_LOCAL_DRAM| |
| 843 | HSW_SNOOP_DRAM, |
| 844 | [ C(RESULT_MISS) ] = HSW_DEMAND_WRITE| |
| 845 | HSW_L3_MISS_REMOTE| |
| 846 | HSW_SNOOP_DRAM, |
| 847 | }, |
| 848 | [ C(OP_PREFETCH) ] = { |
| 849 | [ C(RESULT_ACCESS) ] = 0x0, |
| 850 | [ C(RESULT_MISS) ] = 0x0, |
| 851 | }, |
| 852 | }, |
| 853 | }; |
| 854 | |
| 855 | static __initconst const u64 westmere_hw_cache_event_ids |
| 856 | [PERF_COUNT_HW_CACHE_MAX] |
| 857 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 858 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 859 | { |
| 860 | [ C(L1D) ] = { |
| 861 | [ C(OP_READ) ] = { |
| 862 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ |
| 863 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ |
| 864 | }, |
| 865 | [ C(OP_WRITE) ] = { |
| 866 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ |
| 867 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ |
| 868 | }, |
| 869 | [ C(OP_PREFETCH) ] = { |
| 870 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ |
| 871 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ |
| 872 | }, |
| 873 | }, |
| 874 | [ C(L1I ) ] = { |
| 875 | [ C(OP_READ) ] = { |
| 876 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ |
| 877 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ |
| 878 | }, |
| 879 | [ C(OP_WRITE) ] = { |
| 880 | [ C(RESULT_ACCESS) ] = -1, |
| 881 | [ C(RESULT_MISS) ] = -1, |
| 882 | }, |
| 883 | [ C(OP_PREFETCH) ] = { |
| 884 | [ C(RESULT_ACCESS) ] = 0x0, |
| 885 | [ C(RESULT_MISS) ] = 0x0, |
| 886 | }, |
| 887 | }, |
| 888 | [ C(LL ) ] = { |
| 889 | [ C(OP_READ) ] = { |
| 890 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
| 891 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 892 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ |
| 893 | [ C(RESULT_MISS) ] = 0x01b7, |
| 894 | }, |
| 895 | /* |
| 896 | * Use RFO, not WRITEBACK, because a write miss would typically occur |
| 897 | * on RFO. |
| 898 | */ |
| 899 | [ C(OP_WRITE) ] = { |
| 900 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
| 901 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 902 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
| 903 | [ C(RESULT_MISS) ] = 0x01b7, |
| 904 | }, |
| 905 | [ C(OP_PREFETCH) ] = { |
| 906 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
| 907 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 908 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
| 909 | [ C(RESULT_MISS) ] = 0x01b7, |
| 910 | }, |
| 911 | }, |
| 912 | [ C(DTLB) ] = { |
| 913 | [ C(OP_READ) ] = { |
| 914 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ |
| 915 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ |
| 916 | }, |
| 917 | [ C(OP_WRITE) ] = { |
| 918 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ |
| 919 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ |
| 920 | }, |
| 921 | [ C(OP_PREFETCH) ] = { |
| 922 | [ C(RESULT_ACCESS) ] = 0x0, |
| 923 | [ C(RESULT_MISS) ] = 0x0, |
| 924 | }, |
| 925 | }, |
| 926 | [ C(ITLB) ] = { |
| 927 | [ C(OP_READ) ] = { |
| 928 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ |
| 929 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ |
| 930 | }, |
| 931 | [ C(OP_WRITE) ] = { |
| 932 | [ C(RESULT_ACCESS) ] = -1, |
| 933 | [ C(RESULT_MISS) ] = -1, |
| 934 | }, |
| 935 | [ C(OP_PREFETCH) ] = { |
| 936 | [ C(RESULT_ACCESS) ] = -1, |
| 937 | [ C(RESULT_MISS) ] = -1, |
| 938 | }, |
| 939 | }, |
| 940 | [ C(BPU ) ] = { |
| 941 | [ C(OP_READ) ] = { |
| 942 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ |
| 943 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ |
| 944 | }, |
| 945 | [ C(OP_WRITE) ] = { |
| 946 | [ C(RESULT_ACCESS) ] = -1, |
| 947 | [ C(RESULT_MISS) ] = -1, |
| 948 | }, |
| 949 | [ C(OP_PREFETCH) ] = { |
| 950 | [ C(RESULT_ACCESS) ] = -1, |
| 951 | [ C(RESULT_MISS) ] = -1, |
| 952 | }, |
| 953 | }, |
| 954 | [ C(NODE) ] = { |
| 955 | [ C(OP_READ) ] = { |
| 956 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 957 | [ C(RESULT_MISS) ] = 0x01b7, |
| 958 | }, |
| 959 | [ C(OP_WRITE) ] = { |
| 960 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 961 | [ C(RESULT_MISS) ] = 0x01b7, |
| 962 | }, |
| 963 | [ C(OP_PREFETCH) ] = { |
| 964 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 965 | [ C(RESULT_MISS) ] = 0x01b7, |
| 966 | }, |
| 967 | }, |
| 968 | }; |
| 969 | |
| 970 | /* |
| 971 | * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits; |
| 972 | * See IA32 SDM Vol 3B 30.6.1.3 |
| 973 | */ |
| 974 | |
| 975 | #define NHM_DMND_DATA_RD (1 << 0) |
| 976 | #define NHM_DMND_RFO (1 << 1) |
| 977 | #define NHM_DMND_IFETCH (1 << 2) |
| 978 | #define NHM_DMND_WB (1 << 3) |
| 979 | #define NHM_PF_DATA_RD (1 << 4) |
| 980 | #define NHM_PF_DATA_RFO (1 << 5) |
| 981 | #define NHM_PF_IFETCH (1 << 6) |
| 982 | #define NHM_OFFCORE_OTHER (1 << 7) |
| 983 | #define NHM_UNCORE_HIT (1 << 8) |
| 984 | #define NHM_OTHER_CORE_HIT_SNP (1 << 9) |
| 985 | #define NHM_OTHER_CORE_HITM (1 << 10) |
| 986 | /* reserved */ |
| 987 | #define NHM_REMOTE_CACHE_FWD (1 << 12) |
| 988 | #define NHM_REMOTE_DRAM (1 << 13) |
| 989 | #define NHM_LOCAL_DRAM (1 << 14) |
| 990 | #define NHM_NON_DRAM (1 << 15) |
| 991 | |
| 992 | #define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD) |
| 993 | #define NHM_REMOTE (NHM_REMOTE_DRAM) |
| 994 | |
| 995 | #define NHM_DMND_READ (NHM_DMND_DATA_RD) |
| 996 | #define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB) |
| 997 | #define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO) |
| 998 | |
| 999 | #define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM) |
| 1000 | #define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD) |
| 1001 | #define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS) |
| 1002 | |
| 1003 | static __initconst const u64 nehalem_hw_cache_extra_regs |
| 1004 | [PERF_COUNT_HW_CACHE_MAX] |
| 1005 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 1006 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 1007 | { |
| 1008 | [ C(LL ) ] = { |
| 1009 | [ C(OP_READ) ] = { |
| 1010 | [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS, |
| 1011 | [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS, |
| 1012 | }, |
| 1013 | [ C(OP_WRITE) ] = { |
| 1014 | [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS, |
| 1015 | [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS, |
| 1016 | }, |
| 1017 | [ C(OP_PREFETCH) ] = { |
| 1018 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, |
| 1019 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, |
| 1020 | }, |
| 1021 | }, |
| 1022 | [ C(NODE) ] = { |
| 1023 | [ C(OP_READ) ] = { |
| 1024 | [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE, |
| 1025 | [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE, |
| 1026 | }, |
| 1027 | [ C(OP_WRITE) ] = { |
| 1028 | [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE, |
| 1029 | [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE, |
| 1030 | }, |
| 1031 | [ C(OP_PREFETCH) ] = { |
| 1032 | [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE, |
| 1033 | [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE, |
| 1034 | }, |
| 1035 | }, |
| 1036 | }; |
| 1037 | |
| 1038 | static __initconst const u64 nehalem_hw_cache_event_ids |
| 1039 | [PERF_COUNT_HW_CACHE_MAX] |
| 1040 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 1041 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 1042 | { |
| 1043 | [ C(L1D) ] = { |
| 1044 | [ C(OP_READ) ] = { |
| 1045 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ |
| 1046 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ |
| 1047 | }, |
| 1048 | [ C(OP_WRITE) ] = { |
| 1049 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ |
| 1050 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ |
| 1051 | }, |
| 1052 | [ C(OP_PREFETCH) ] = { |
| 1053 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ |
| 1054 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ |
| 1055 | }, |
| 1056 | }, |
| 1057 | [ C(L1I ) ] = { |
| 1058 | [ C(OP_READ) ] = { |
| 1059 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ |
| 1060 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ |
| 1061 | }, |
| 1062 | [ C(OP_WRITE) ] = { |
| 1063 | [ C(RESULT_ACCESS) ] = -1, |
| 1064 | [ C(RESULT_MISS) ] = -1, |
| 1065 | }, |
| 1066 | [ C(OP_PREFETCH) ] = { |
| 1067 | [ C(RESULT_ACCESS) ] = 0x0, |
| 1068 | [ C(RESULT_MISS) ] = 0x0, |
| 1069 | }, |
| 1070 | }, |
| 1071 | [ C(LL ) ] = { |
| 1072 | [ C(OP_READ) ] = { |
| 1073 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
| 1074 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1075 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ |
| 1076 | [ C(RESULT_MISS) ] = 0x01b7, |
| 1077 | }, |
| 1078 | /* |
| 1079 | * Use RFO, not WRITEBACK, because a write miss would typically occur |
| 1080 | * on RFO. |
| 1081 | */ |
| 1082 | [ C(OP_WRITE) ] = { |
| 1083 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
| 1084 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1085 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
| 1086 | [ C(RESULT_MISS) ] = 0x01b7, |
| 1087 | }, |
| 1088 | [ C(OP_PREFETCH) ] = { |
| 1089 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
| 1090 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1091 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
| 1092 | [ C(RESULT_MISS) ] = 0x01b7, |
| 1093 | }, |
| 1094 | }, |
| 1095 | [ C(DTLB) ] = { |
| 1096 | [ C(OP_READ) ] = { |
| 1097 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ |
| 1098 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ |
| 1099 | }, |
| 1100 | [ C(OP_WRITE) ] = { |
| 1101 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ |
| 1102 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ |
| 1103 | }, |
| 1104 | [ C(OP_PREFETCH) ] = { |
| 1105 | [ C(RESULT_ACCESS) ] = 0x0, |
| 1106 | [ C(RESULT_MISS) ] = 0x0, |
| 1107 | }, |
| 1108 | }, |
| 1109 | [ C(ITLB) ] = { |
| 1110 | [ C(OP_READ) ] = { |
| 1111 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ |
| 1112 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ |
| 1113 | }, |
| 1114 | [ C(OP_WRITE) ] = { |
| 1115 | [ C(RESULT_ACCESS) ] = -1, |
| 1116 | [ C(RESULT_MISS) ] = -1, |
| 1117 | }, |
| 1118 | [ C(OP_PREFETCH) ] = { |
| 1119 | [ C(RESULT_ACCESS) ] = -1, |
| 1120 | [ C(RESULT_MISS) ] = -1, |
| 1121 | }, |
| 1122 | }, |
| 1123 | [ C(BPU ) ] = { |
| 1124 | [ C(OP_READ) ] = { |
| 1125 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ |
| 1126 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ |
| 1127 | }, |
| 1128 | [ C(OP_WRITE) ] = { |
| 1129 | [ C(RESULT_ACCESS) ] = -1, |
| 1130 | [ C(RESULT_MISS) ] = -1, |
| 1131 | }, |
| 1132 | [ C(OP_PREFETCH) ] = { |
| 1133 | [ C(RESULT_ACCESS) ] = -1, |
| 1134 | [ C(RESULT_MISS) ] = -1, |
| 1135 | }, |
| 1136 | }, |
| 1137 | [ C(NODE) ] = { |
| 1138 | [ C(OP_READ) ] = { |
| 1139 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1140 | [ C(RESULT_MISS) ] = 0x01b7, |
| 1141 | }, |
| 1142 | [ C(OP_WRITE) ] = { |
| 1143 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1144 | [ C(RESULT_MISS) ] = 0x01b7, |
| 1145 | }, |
| 1146 | [ C(OP_PREFETCH) ] = { |
| 1147 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1148 | [ C(RESULT_MISS) ] = 0x01b7, |
| 1149 | }, |
| 1150 | }, |
| 1151 | }; |
| 1152 | |
| 1153 | static __initconst const u64 core2_hw_cache_event_ids |
| 1154 | [PERF_COUNT_HW_CACHE_MAX] |
| 1155 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 1156 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 1157 | { |
| 1158 | [ C(L1D) ] = { |
| 1159 | [ C(OP_READ) ] = { |
| 1160 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ |
| 1161 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ |
| 1162 | }, |
| 1163 | [ C(OP_WRITE) ] = { |
| 1164 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ |
| 1165 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ |
| 1166 | }, |
| 1167 | [ C(OP_PREFETCH) ] = { |
| 1168 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ |
| 1169 | [ C(RESULT_MISS) ] = 0, |
| 1170 | }, |
| 1171 | }, |
| 1172 | [ C(L1I ) ] = { |
| 1173 | [ C(OP_READ) ] = { |
| 1174 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ |
| 1175 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ |
| 1176 | }, |
| 1177 | [ C(OP_WRITE) ] = { |
| 1178 | [ C(RESULT_ACCESS) ] = -1, |
| 1179 | [ C(RESULT_MISS) ] = -1, |
| 1180 | }, |
| 1181 | [ C(OP_PREFETCH) ] = { |
| 1182 | [ C(RESULT_ACCESS) ] = 0, |
| 1183 | [ C(RESULT_MISS) ] = 0, |
| 1184 | }, |
| 1185 | }, |
| 1186 | [ C(LL ) ] = { |
| 1187 | [ C(OP_READ) ] = { |
| 1188 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ |
| 1189 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ |
| 1190 | }, |
| 1191 | [ C(OP_WRITE) ] = { |
| 1192 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ |
| 1193 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ |
| 1194 | }, |
| 1195 | [ C(OP_PREFETCH) ] = { |
| 1196 | [ C(RESULT_ACCESS) ] = 0, |
| 1197 | [ C(RESULT_MISS) ] = 0, |
| 1198 | }, |
| 1199 | }, |
| 1200 | [ C(DTLB) ] = { |
| 1201 | [ C(OP_READ) ] = { |
| 1202 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ |
| 1203 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ |
| 1204 | }, |
| 1205 | [ C(OP_WRITE) ] = { |
| 1206 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ |
| 1207 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ |
| 1208 | }, |
| 1209 | [ C(OP_PREFETCH) ] = { |
| 1210 | [ C(RESULT_ACCESS) ] = 0, |
| 1211 | [ C(RESULT_MISS) ] = 0, |
| 1212 | }, |
| 1213 | }, |
| 1214 | [ C(ITLB) ] = { |
| 1215 | [ C(OP_READ) ] = { |
| 1216 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ |
| 1217 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ |
| 1218 | }, |
| 1219 | [ C(OP_WRITE) ] = { |
| 1220 | [ C(RESULT_ACCESS) ] = -1, |
| 1221 | [ C(RESULT_MISS) ] = -1, |
| 1222 | }, |
| 1223 | [ C(OP_PREFETCH) ] = { |
| 1224 | [ C(RESULT_ACCESS) ] = -1, |
| 1225 | [ C(RESULT_MISS) ] = -1, |
| 1226 | }, |
| 1227 | }, |
| 1228 | [ C(BPU ) ] = { |
| 1229 | [ C(OP_READ) ] = { |
| 1230 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ |
| 1231 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ |
| 1232 | }, |
| 1233 | [ C(OP_WRITE) ] = { |
| 1234 | [ C(RESULT_ACCESS) ] = -1, |
| 1235 | [ C(RESULT_MISS) ] = -1, |
| 1236 | }, |
| 1237 | [ C(OP_PREFETCH) ] = { |
| 1238 | [ C(RESULT_ACCESS) ] = -1, |
| 1239 | [ C(RESULT_MISS) ] = -1, |
| 1240 | }, |
| 1241 | }, |
| 1242 | }; |
| 1243 | |
| 1244 | static __initconst const u64 atom_hw_cache_event_ids |
| 1245 | [PERF_COUNT_HW_CACHE_MAX] |
| 1246 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 1247 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 1248 | { |
| 1249 | [ C(L1D) ] = { |
| 1250 | [ C(OP_READ) ] = { |
| 1251 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ |
| 1252 | [ C(RESULT_MISS) ] = 0, |
| 1253 | }, |
| 1254 | [ C(OP_WRITE) ] = { |
| 1255 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ |
| 1256 | [ C(RESULT_MISS) ] = 0, |
| 1257 | }, |
| 1258 | [ C(OP_PREFETCH) ] = { |
| 1259 | [ C(RESULT_ACCESS) ] = 0x0, |
| 1260 | [ C(RESULT_MISS) ] = 0, |
| 1261 | }, |
| 1262 | }, |
| 1263 | [ C(L1I ) ] = { |
| 1264 | [ C(OP_READ) ] = { |
| 1265 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ |
| 1266 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ |
| 1267 | }, |
| 1268 | [ C(OP_WRITE) ] = { |
| 1269 | [ C(RESULT_ACCESS) ] = -1, |
| 1270 | [ C(RESULT_MISS) ] = -1, |
| 1271 | }, |
| 1272 | [ C(OP_PREFETCH) ] = { |
| 1273 | [ C(RESULT_ACCESS) ] = 0, |
| 1274 | [ C(RESULT_MISS) ] = 0, |
| 1275 | }, |
| 1276 | }, |
| 1277 | [ C(LL ) ] = { |
| 1278 | [ C(OP_READ) ] = { |
| 1279 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ |
| 1280 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ |
| 1281 | }, |
| 1282 | [ C(OP_WRITE) ] = { |
| 1283 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ |
| 1284 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ |
| 1285 | }, |
| 1286 | [ C(OP_PREFETCH) ] = { |
| 1287 | [ C(RESULT_ACCESS) ] = 0, |
| 1288 | [ C(RESULT_MISS) ] = 0, |
| 1289 | }, |
| 1290 | }, |
| 1291 | [ C(DTLB) ] = { |
| 1292 | [ C(OP_READ) ] = { |
| 1293 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ |
| 1294 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ |
| 1295 | }, |
| 1296 | [ C(OP_WRITE) ] = { |
| 1297 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ |
| 1298 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ |
| 1299 | }, |
| 1300 | [ C(OP_PREFETCH) ] = { |
| 1301 | [ C(RESULT_ACCESS) ] = 0, |
| 1302 | [ C(RESULT_MISS) ] = 0, |
| 1303 | }, |
| 1304 | }, |
| 1305 | [ C(ITLB) ] = { |
| 1306 | [ C(OP_READ) ] = { |
| 1307 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ |
| 1308 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ |
| 1309 | }, |
| 1310 | [ C(OP_WRITE) ] = { |
| 1311 | [ C(RESULT_ACCESS) ] = -1, |
| 1312 | [ C(RESULT_MISS) ] = -1, |
| 1313 | }, |
| 1314 | [ C(OP_PREFETCH) ] = { |
| 1315 | [ C(RESULT_ACCESS) ] = -1, |
| 1316 | [ C(RESULT_MISS) ] = -1, |
| 1317 | }, |
| 1318 | }, |
| 1319 | [ C(BPU ) ] = { |
| 1320 | [ C(OP_READ) ] = { |
| 1321 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ |
| 1322 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ |
| 1323 | }, |
| 1324 | [ C(OP_WRITE) ] = { |
| 1325 | [ C(RESULT_ACCESS) ] = -1, |
| 1326 | [ C(RESULT_MISS) ] = -1, |
| 1327 | }, |
| 1328 | [ C(OP_PREFETCH) ] = { |
| 1329 | [ C(RESULT_ACCESS) ] = -1, |
| 1330 | [ C(RESULT_MISS) ] = -1, |
| 1331 | }, |
| 1332 | }, |
| 1333 | }; |
| 1334 | |
| 1335 | static struct extra_reg intel_slm_extra_regs[] __read_mostly = |
| 1336 | { |
| 1337 | /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ |
| 1338 | INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0), |
| 1339 | INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1), |
| 1340 | EVENT_EXTRA_END |
| 1341 | }; |
| 1342 | |
| 1343 | #define SLM_DMND_READ SNB_DMND_DATA_RD |
| 1344 | #define SLM_DMND_WRITE SNB_DMND_RFO |
| 1345 | #define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO) |
| 1346 | |
| 1347 | #define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM) |
| 1348 | #define SLM_LLC_ACCESS SNB_RESP_ANY |
| 1349 | #define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM) |
| 1350 | |
| 1351 | static __initconst const u64 slm_hw_cache_extra_regs |
| 1352 | [PERF_COUNT_HW_CACHE_MAX] |
| 1353 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 1354 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 1355 | { |
| 1356 | [ C(LL ) ] = { |
| 1357 | [ C(OP_READ) ] = { |
| 1358 | [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, |
| 1359 | [ C(RESULT_MISS) ] = 0, |
| 1360 | }, |
| 1361 | [ C(OP_WRITE) ] = { |
| 1362 | [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, |
| 1363 | [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS, |
| 1364 | }, |
| 1365 | [ C(OP_PREFETCH) ] = { |
| 1366 | [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS, |
| 1367 | [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS, |
| 1368 | }, |
| 1369 | }, |
| 1370 | }; |
| 1371 | |
| 1372 | static __initconst const u64 slm_hw_cache_event_ids |
| 1373 | [PERF_COUNT_HW_CACHE_MAX] |
| 1374 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 1375 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
| 1376 | { |
| 1377 | [ C(L1D) ] = { |
| 1378 | [ C(OP_READ) ] = { |
| 1379 | [ C(RESULT_ACCESS) ] = 0, |
| 1380 | [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */ |
| 1381 | }, |
| 1382 | [ C(OP_WRITE) ] = { |
| 1383 | [ C(RESULT_ACCESS) ] = 0, |
| 1384 | [ C(RESULT_MISS) ] = 0, |
| 1385 | }, |
| 1386 | [ C(OP_PREFETCH) ] = { |
| 1387 | [ C(RESULT_ACCESS) ] = 0, |
| 1388 | [ C(RESULT_MISS) ] = 0, |
| 1389 | }, |
| 1390 | }, |
| 1391 | [ C(L1I ) ] = { |
| 1392 | [ C(OP_READ) ] = { |
| 1393 | [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */ |
| 1394 | [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */ |
| 1395 | }, |
| 1396 | [ C(OP_WRITE) ] = { |
| 1397 | [ C(RESULT_ACCESS) ] = -1, |
| 1398 | [ C(RESULT_MISS) ] = -1, |
| 1399 | }, |
| 1400 | [ C(OP_PREFETCH) ] = { |
| 1401 | [ C(RESULT_ACCESS) ] = 0, |
| 1402 | [ C(RESULT_MISS) ] = 0, |
| 1403 | }, |
| 1404 | }, |
| 1405 | [ C(LL ) ] = { |
| 1406 | [ C(OP_READ) ] = { |
| 1407 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
| 1408 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1409 | [ C(RESULT_MISS) ] = 0, |
| 1410 | }, |
| 1411 | [ C(OP_WRITE) ] = { |
| 1412 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
| 1413 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1414 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ |
| 1415 | [ C(RESULT_MISS) ] = 0x01b7, |
| 1416 | }, |
| 1417 | [ C(OP_PREFETCH) ] = { |
| 1418 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
| 1419 | [ C(RESULT_ACCESS) ] = 0x01b7, |
| 1420 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ |
| 1421 | [ C(RESULT_MISS) ] = 0x01b7, |
| 1422 | }, |
| 1423 | }, |
| 1424 | [ C(DTLB) ] = { |
| 1425 | [ C(OP_READ) ] = { |
| 1426 | [ C(RESULT_ACCESS) ] = 0, |
| 1427 | [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */ |
| 1428 | }, |
| 1429 | [ C(OP_WRITE) ] = { |
| 1430 | [ C(RESULT_ACCESS) ] = 0, |
| 1431 | [ C(RESULT_MISS) ] = 0, |
| 1432 | }, |
| 1433 | [ C(OP_PREFETCH) ] = { |
| 1434 | [ C(RESULT_ACCESS) ] = 0, |
| 1435 | [ C(RESULT_MISS) ] = 0, |
| 1436 | }, |
| 1437 | }, |
| 1438 | [ C(ITLB) ] = { |
| 1439 | [ C(OP_READ) ] = { |
| 1440 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ |
| 1441 | [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ |
| 1442 | }, |
| 1443 | [ C(OP_WRITE) ] = { |
| 1444 | [ C(RESULT_ACCESS) ] = -1, |
| 1445 | [ C(RESULT_MISS) ] = -1, |
| 1446 | }, |
| 1447 | [ C(OP_PREFETCH) ] = { |
| 1448 | [ C(RESULT_ACCESS) ] = -1, |
| 1449 | [ C(RESULT_MISS) ] = -1, |
| 1450 | }, |
| 1451 | }, |
| 1452 | [ C(BPU ) ] = { |
| 1453 | [ C(OP_READ) ] = { |
| 1454 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ |
| 1455 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ |
| 1456 | }, |
| 1457 | [ C(OP_WRITE) ] = { |
| 1458 | [ C(RESULT_ACCESS) ] = -1, |
| 1459 | [ C(RESULT_MISS) ] = -1, |
| 1460 | }, |
| 1461 | [ C(OP_PREFETCH) ] = { |
| 1462 | [ C(RESULT_ACCESS) ] = -1, |
| 1463 | [ C(RESULT_MISS) ] = -1, |
| 1464 | }, |
| 1465 | }, |
| 1466 | }; |
| 1467 | |
| 1468 | #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ |
| 1469 | #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ |
| 1470 | #define KNL_MCDRAM_LOCAL BIT_ULL(21) |
| 1471 | #define KNL_MCDRAM_FAR BIT_ULL(22) |
| 1472 | #define KNL_DDR_LOCAL BIT_ULL(23) |
| 1473 | #define KNL_DDR_FAR BIT_ULL(24) |
| 1474 | #define KNL_DRAM_ANY (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \ |
| 1475 | KNL_DDR_LOCAL | KNL_DDR_FAR) |
| 1476 | #define KNL_L2_READ SLM_DMND_READ |
| 1477 | #define KNL_L2_WRITE SLM_DMND_WRITE |
| 1478 | #define KNL_L2_PREFETCH SLM_DMND_PREFETCH |
| 1479 | #define KNL_L2_ACCESS SLM_LLC_ACCESS |
| 1480 | #define KNL_L2_MISS (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \ |
| 1481 | KNL_DRAM_ANY | SNB_SNP_ANY | \ |
| 1482 | SNB_NON_DRAM) |
| 1483 | |
| 1484 | static __initconst const u64 knl_hw_cache_extra_regs |
| 1485 | [PERF_COUNT_HW_CACHE_MAX] |
| 1486 | [PERF_COUNT_HW_CACHE_OP_MAX] |
| 1487 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = { |
| 1488 | [C(LL)] = { |
| 1489 | [C(OP_READ)] = { |
| 1490 | [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS, |
| 1491 | [C(RESULT_MISS)] = 0, |
| 1492 | }, |
| 1493 | [C(OP_WRITE)] = { |
| 1494 | [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS, |
| 1495 | [C(RESULT_MISS)] = KNL_L2_WRITE | KNL_L2_MISS, |
| 1496 | }, |
| 1497 | [C(OP_PREFETCH)] = { |
| 1498 | [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS, |
| 1499 | [C(RESULT_MISS)] = KNL_L2_PREFETCH | KNL_L2_MISS, |
| 1500 | }, |
| 1501 | }, |
| 1502 | }; |
| 1503 | |
| 1504 | /* |
| 1505 | * Used from PMIs where the LBRs are already disabled. |
| 1506 | * |
| 1507 | * This function could be called consecutively. It is required to remain in |
| 1508 | * disabled state if called consecutively. |
| 1509 | * |
| 1510 | * During consecutive calls, the same disable value will be written to related |
| 1511 | * registers, so the PMU state remains unchanged. hw.state in |
| 1512 | * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive |
| 1513 | * calls. |
| 1514 | */ |
| 1515 | static void __intel_pmu_disable_all(void) |
| 1516 | { |
| 1517 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1518 | |
| 1519 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
| 1520 | |
| 1521 | if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
| 1522 | intel_pmu_disable_bts(); |
| 1523 | else |
| 1524 | intel_bts_disable_local(); |
| 1525 | |
| 1526 | intel_pmu_pebs_disable_all(); |
| 1527 | } |
| 1528 | |
| 1529 | static void intel_pmu_disable_all(void) |
| 1530 | { |
| 1531 | __intel_pmu_disable_all(); |
| 1532 | intel_pmu_lbr_disable_all(); |
| 1533 | } |
| 1534 | |
| 1535 | static void __intel_pmu_enable_all(int added, bool pmi) |
| 1536 | { |
| 1537 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1538 | |
| 1539 | intel_pmu_pebs_enable_all(); |
| 1540 | intel_pmu_lbr_enable_all(pmi); |
| 1541 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, |
| 1542 | x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); |
| 1543 | |
| 1544 | if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
| 1545 | struct perf_event *event = |
| 1546 | cpuc->events[INTEL_PMC_IDX_FIXED_BTS]; |
| 1547 | |
| 1548 | if (WARN_ON_ONCE(!event)) |
| 1549 | return; |
| 1550 | |
| 1551 | intel_pmu_enable_bts(event->hw.config); |
| 1552 | } else |
| 1553 | intel_bts_enable_local(); |
| 1554 | } |
| 1555 | |
| 1556 | static void intel_pmu_enable_all(int added) |
| 1557 | { |
| 1558 | __intel_pmu_enable_all(added, false); |
| 1559 | } |
| 1560 | |
| 1561 | /* |
| 1562 | * Workaround for: |
| 1563 | * Intel Errata AAK100 (model 26) |
| 1564 | * Intel Errata AAP53 (model 30) |
| 1565 | * Intel Errata BD53 (model 44) |
| 1566 | * |
| 1567 | * The official story: |
| 1568 | * These chips need to be 'reset' when adding counters by programming the |
| 1569 | * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either |
| 1570 | * in sequence on the same PMC or on different PMCs. |
| 1571 | * |
| 1572 | * In practise it appears some of these events do in fact count, and |
| 1573 | * we need to programm all 4 events. |
| 1574 | */ |
| 1575 | static void intel_pmu_nhm_workaround(void) |
| 1576 | { |
| 1577 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1578 | static const unsigned long nhm_magic[4] = { |
| 1579 | 0x4300B5, |
| 1580 | 0x4300D2, |
| 1581 | 0x4300B1, |
| 1582 | 0x4300B1 |
| 1583 | }; |
| 1584 | struct perf_event *event; |
| 1585 | int i; |
| 1586 | |
| 1587 | /* |
| 1588 | * The Errata requires below steps: |
| 1589 | * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; |
| 1590 | * 2) Configure 4 PERFEVTSELx with the magic events and clear |
| 1591 | * the corresponding PMCx; |
| 1592 | * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; |
| 1593 | * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; |
| 1594 | * 5) Clear 4 pairs of ERFEVTSELx and PMCx; |
| 1595 | */ |
| 1596 | |
| 1597 | /* |
| 1598 | * The real steps we choose are a little different from above. |
| 1599 | * A) To reduce MSR operations, we don't run step 1) as they |
| 1600 | * are already cleared before this function is called; |
| 1601 | * B) Call x86_perf_event_update to save PMCx before configuring |
| 1602 | * PERFEVTSELx with magic number; |
| 1603 | * C) With step 5), we do clear only when the PERFEVTSELx is |
| 1604 | * not used currently. |
| 1605 | * D) Call x86_perf_event_set_period to restore PMCx; |
| 1606 | */ |
| 1607 | |
| 1608 | /* We always operate 4 pairs of PERF Counters */ |
| 1609 | for (i = 0; i < 4; i++) { |
| 1610 | event = cpuc->events[i]; |
| 1611 | if (event) |
| 1612 | x86_perf_event_update(event); |
| 1613 | } |
| 1614 | |
| 1615 | for (i = 0; i < 4; i++) { |
| 1616 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); |
| 1617 | wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); |
| 1618 | } |
| 1619 | |
| 1620 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); |
| 1621 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); |
| 1622 | |
| 1623 | for (i = 0; i < 4; i++) { |
| 1624 | event = cpuc->events[i]; |
| 1625 | |
| 1626 | if (event) { |
| 1627 | x86_perf_event_set_period(event); |
| 1628 | __x86_pmu_enable_event(&event->hw, |
| 1629 | ARCH_PERFMON_EVENTSEL_ENABLE); |
| 1630 | } else |
| 1631 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); |
| 1632 | } |
| 1633 | } |
| 1634 | |
| 1635 | static void intel_pmu_nhm_enable_all(int added) |
| 1636 | { |
| 1637 | if (added) |
| 1638 | intel_pmu_nhm_workaround(); |
| 1639 | intel_pmu_enable_all(added); |
| 1640 | } |
| 1641 | |
| 1642 | static inline u64 intel_pmu_get_status(void) |
| 1643 | { |
| 1644 | u64 status; |
| 1645 | |
| 1646 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
| 1647 | |
| 1648 | return status; |
| 1649 | } |
| 1650 | |
| 1651 | static inline void intel_pmu_ack_status(u64 ack) |
| 1652 | { |
| 1653 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
| 1654 | } |
| 1655 | |
| 1656 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
| 1657 | { |
| 1658 | int idx = hwc->idx - INTEL_PMC_IDX_FIXED; |
| 1659 | u64 ctrl_val, mask; |
| 1660 | |
| 1661 | mask = 0xfULL << (idx * 4); |
| 1662 | |
| 1663 | rdmsrl(hwc->config_base, ctrl_val); |
| 1664 | ctrl_val &= ~mask; |
| 1665 | wrmsrl(hwc->config_base, ctrl_val); |
| 1666 | } |
| 1667 | |
| 1668 | static inline bool event_is_checkpointed(struct perf_event *event) |
| 1669 | { |
| 1670 | return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0; |
| 1671 | } |
| 1672 | |
| 1673 | static void intel_pmu_disable_event(struct perf_event *event) |
| 1674 | { |
| 1675 | struct hw_perf_event *hwc = &event->hw; |
| 1676 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1677 | |
| 1678 | if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { |
| 1679 | intel_pmu_disable_bts(); |
| 1680 | intel_pmu_drain_bts_buffer(); |
| 1681 | return; |
| 1682 | } |
| 1683 | |
| 1684 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); |
| 1685 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); |
| 1686 | cpuc->intel_cp_status &= ~(1ull << hwc->idx); |
| 1687 | |
| 1688 | /* |
| 1689 | * must disable before any actual event |
| 1690 | * because any event may be combined with LBR |
| 1691 | */ |
| 1692 | if (needs_branch_stack(event)) |
| 1693 | intel_pmu_lbr_disable(event); |
| 1694 | |
| 1695 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| 1696 | intel_pmu_disable_fixed(hwc); |
| 1697 | return; |
| 1698 | } |
| 1699 | |
| 1700 | x86_pmu_disable_event(event); |
| 1701 | |
| 1702 | if (unlikely(event->attr.precise_ip)) |
| 1703 | intel_pmu_pebs_disable(event); |
| 1704 | } |
| 1705 | |
| 1706 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
| 1707 | { |
| 1708 | int idx = hwc->idx - INTEL_PMC_IDX_FIXED; |
| 1709 | u64 ctrl_val, bits, mask; |
| 1710 | |
| 1711 | /* |
| 1712 | * Enable IRQ generation (0x8), |
| 1713 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) |
| 1714 | * if requested: |
| 1715 | */ |
| 1716 | bits = 0x8ULL; |
| 1717 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) |
| 1718 | bits |= 0x2; |
| 1719 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) |
| 1720 | bits |= 0x1; |
| 1721 | |
| 1722 | /* |
| 1723 | * ANY bit is supported in v3 and up |
| 1724 | */ |
| 1725 | if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) |
| 1726 | bits |= 0x4; |
| 1727 | |
| 1728 | bits <<= (idx * 4); |
| 1729 | mask = 0xfULL << (idx * 4); |
| 1730 | |
| 1731 | rdmsrl(hwc->config_base, ctrl_val); |
| 1732 | ctrl_val &= ~mask; |
| 1733 | ctrl_val |= bits; |
| 1734 | wrmsrl(hwc->config_base, ctrl_val); |
| 1735 | } |
| 1736 | |
| 1737 | static void intel_pmu_enable_event(struct perf_event *event) |
| 1738 | { |
| 1739 | struct hw_perf_event *hwc = &event->hw; |
| 1740 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1741 | |
| 1742 | if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) { |
| 1743 | if (!__this_cpu_read(cpu_hw_events.enabled)) |
| 1744 | return; |
| 1745 | |
| 1746 | intel_pmu_enable_bts(hwc->config); |
| 1747 | return; |
| 1748 | } |
| 1749 | /* |
| 1750 | * must enabled before any actual event |
| 1751 | * because any event may be combined with LBR |
| 1752 | */ |
| 1753 | if (needs_branch_stack(event)) |
| 1754 | intel_pmu_lbr_enable(event); |
| 1755 | |
| 1756 | if (event->attr.exclude_host) |
| 1757 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); |
| 1758 | if (event->attr.exclude_guest) |
| 1759 | cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx); |
| 1760 | |
| 1761 | if (unlikely(event_is_checkpointed(event))) |
| 1762 | cpuc->intel_cp_status |= (1ull << hwc->idx); |
| 1763 | |
| 1764 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
| 1765 | intel_pmu_enable_fixed(hwc); |
| 1766 | return; |
| 1767 | } |
| 1768 | |
| 1769 | if (unlikely(event->attr.precise_ip)) |
| 1770 | intel_pmu_pebs_enable(event); |
| 1771 | |
| 1772 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); |
| 1773 | } |
| 1774 | |
| 1775 | /* |
| 1776 | * Save and restart an expired event. Called by NMI contexts, |
| 1777 | * so it has to be careful about preempting normal event ops: |
| 1778 | */ |
| 1779 | int intel_pmu_save_and_restart(struct perf_event *event) |
| 1780 | { |
| 1781 | x86_perf_event_update(event); |
| 1782 | /* |
| 1783 | * For a checkpointed counter always reset back to 0. This |
| 1784 | * avoids a situation where the counter overflows, aborts the |
| 1785 | * transaction and is then set back to shortly before the |
| 1786 | * overflow, and overflows and aborts again. |
| 1787 | */ |
| 1788 | if (unlikely(event_is_checkpointed(event))) { |
| 1789 | /* No race with NMIs because the counter should not be armed */ |
| 1790 | wrmsrl(event->hw.event_base, 0); |
| 1791 | local64_set(&event->hw.prev_count, 0); |
| 1792 | } |
| 1793 | return x86_perf_event_set_period(event); |
| 1794 | } |
| 1795 | |
| 1796 | static void intel_pmu_reset(void) |
| 1797 | { |
| 1798 | struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); |
| 1799 | unsigned long flags; |
| 1800 | int idx; |
| 1801 | |
| 1802 | if (!x86_pmu.num_counters) |
| 1803 | return; |
| 1804 | |
| 1805 | local_irq_save(flags); |
| 1806 | |
| 1807 | pr_info("clearing PMU state on CPU#%d\n", smp_processor_id()); |
| 1808 | |
| 1809 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 1810 | wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); |
| 1811 | wrmsrl_safe(x86_pmu_event_addr(idx), 0ull); |
| 1812 | } |
| 1813 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
| 1814 | wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
| 1815 | |
| 1816 | if (ds) |
| 1817 | ds->bts_index = ds->bts_buffer_base; |
| 1818 | |
| 1819 | /* Ack all overflows and disable fixed counters */ |
| 1820 | if (x86_pmu.version >= 2) { |
| 1821 | intel_pmu_ack_status(intel_pmu_get_status()); |
| 1822 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
| 1823 | } |
| 1824 | |
| 1825 | /* Reset LBRs and LBR freezing */ |
| 1826 | if (x86_pmu.lbr_nr) { |
| 1827 | update_debugctlmsr(get_debugctlmsr() & |
| 1828 | ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR)); |
| 1829 | } |
| 1830 | |
| 1831 | local_irq_restore(flags); |
| 1832 | } |
| 1833 | |
| 1834 | /* |
| 1835 | * This handler is triggered by the local APIC, so the APIC IRQ handling |
| 1836 | * rules apply: |
| 1837 | */ |
| 1838 | static int intel_pmu_handle_irq(struct pt_regs *regs) |
| 1839 | { |
| 1840 | struct perf_sample_data data; |
| 1841 | struct cpu_hw_events *cpuc; |
| 1842 | int bit, loops; |
| 1843 | u64 status; |
| 1844 | int handled; |
| 1845 | |
| 1846 | cpuc = this_cpu_ptr(&cpu_hw_events); |
| 1847 | |
| 1848 | /* |
| 1849 | * No known reason to not always do late ACK, |
| 1850 | * but just in case do it opt-in. |
| 1851 | */ |
| 1852 | if (!x86_pmu.late_ack) |
| 1853 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 1854 | __intel_pmu_disable_all(); |
| 1855 | handled = intel_pmu_drain_bts_buffer(); |
| 1856 | handled += intel_bts_interrupt(); |
| 1857 | status = intel_pmu_get_status(); |
| 1858 | if (!status) |
| 1859 | goto done; |
| 1860 | |
| 1861 | loops = 0; |
| 1862 | again: |
| 1863 | intel_pmu_lbr_read(); |
| 1864 | intel_pmu_ack_status(status); |
| 1865 | if (++loops > 100) { |
| 1866 | static bool warned = false; |
| 1867 | if (!warned) { |
| 1868 | WARN(1, "perfevents: irq loop stuck!\n"); |
| 1869 | perf_event_print_debug(); |
| 1870 | warned = true; |
| 1871 | } |
| 1872 | intel_pmu_reset(); |
| 1873 | goto done; |
| 1874 | } |
| 1875 | |
| 1876 | inc_irq_stat(apic_perf_irqs); |
| 1877 | |
| 1878 | |
| 1879 | /* |
| 1880 | * Ignore a range of extra bits in status that do not indicate |
| 1881 | * overflow by themselves. |
| 1882 | */ |
| 1883 | status &= ~(GLOBAL_STATUS_COND_CHG | |
| 1884 | GLOBAL_STATUS_ASIF | |
| 1885 | GLOBAL_STATUS_LBRS_FROZEN); |
| 1886 | if (!status) |
| 1887 | goto done; |
| 1888 | |
| 1889 | /* |
| 1890 | * PEBS overflow sets bit 62 in the global status register |
| 1891 | */ |
| 1892 | if (__test_and_clear_bit(62, (unsigned long *)&status)) { |
| 1893 | handled++; |
| 1894 | x86_pmu.drain_pebs(regs); |
| 1895 | /* |
| 1896 | * There are cases where, even though, the PEBS ovfl bit is set |
| 1897 | * in GLOBAL_OVF_STATUS, the PEBS events may also have their |
| 1898 | * overflow bits set for their counters. We must clear them |
| 1899 | * here because they have been processed as exact samples in |
| 1900 | * the drain_pebs() routine. They must not be processed again |
| 1901 | * in the for_each_bit_set() loop for regular samples below. |
| 1902 | */ |
| 1903 | status &= ~cpuc->pebs_enabled; |
| 1904 | status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; |
| 1905 | } |
| 1906 | |
| 1907 | /* |
| 1908 | * Intel PT |
| 1909 | */ |
| 1910 | if (__test_and_clear_bit(55, (unsigned long *)&status)) { |
| 1911 | handled++; |
| 1912 | intel_pt_interrupt(); |
| 1913 | } |
| 1914 | |
| 1915 | /* |
| 1916 | * Checkpointed counters can lead to 'spurious' PMIs because the |
| 1917 | * rollback caused by the PMI will have cleared the overflow status |
| 1918 | * bit. Therefore always force probe these counters. |
| 1919 | */ |
| 1920 | status |= cpuc->intel_cp_status; |
| 1921 | |
| 1922 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
| 1923 | struct perf_event *event = cpuc->events[bit]; |
| 1924 | |
| 1925 | handled++; |
| 1926 | |
| 1927 | if (!test_bit(bit, cpuc->active_mask)) |
| 1928 | continue; |
| 1929 | |
| 1930 | if (!intel_pmu_save_and_restart(event)) |
| 1931 | continue; |
| 1932 | |
| 1933 | perf_sample_data_init(&data, 0, event->hw.last_period); |
| 1934 | |
| 1935 | if (has_branch_stack(event)) |
| 1936 | data.br_stack = &cpuc->lbr_stack; |
| 1937 | |
| 1938 | if (perf_event_overflow(event, &data, regs)) |
| 1939 | x86_pmu_stop(event, 0); |
| 1940 | } |
| 1941 | |
| 1942 | /* |
| 1943 | * Repeat if there is more work to be done: |
| 1944 | */ |
| 1945 | status = intel_pmu_get_status(); |
| 1946 | if (status) |
| 1947 | goto again; |
| 1948 | |
| 1949 | done: |
| 1950 | /* Only restore PMU state when it's active. See x86_pmu_disable(). */ |
| 1951 | if (cpuc->enabled) |
| 1952 | __intel_pmu_enable_all(0, true); |
| 1953 | |
| 1954 | /* |
| 1955 | * Only unmask the NMI after the overflow counters |
| 1956 | * have been reset. This avoids spurious NMIs on |
| 1957 | * Haswell CPUs. |
| 1958 | */ |
| 1959 | if (x86_pmu.late_ack) |
| 1960 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 1961 | return handled; |
| 1962 | } |
| 1963 | |
| 1964 | static struct event_constraint * |
| 1965 | intel_bts_constraints(struct perf_event *event) |
| 1966 | { |
| 1967 | struct hw_perf_event *hwc = &event->hw; |
| 1968 | unsigned int hw_event, bts_event; |
| 1969 | |
| 1970 | if (event->attr.freq) |
| 1971 | return NULL; |
| 1972 | |
| 1973 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
| 1974 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
| 1975 | |
| 1976 | if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) |
| 1977 | return &bts_constraint; |
| 1978 | |
| 1979 | return NULL; |
| 1980 | } |
| 1981 | |
| 1982 | static int intel_alt_er(int idx, u64 config) |
| 1983 | { |
| 1984 | int alt_idx = idx; |
| 1985 | |
| 1986 | if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1)) |
| 1987 | return idx; |
| 1988 | |
| 1989 | if (idx == EXTRA_REG_RSP_0) |
| 1990 | alt_idx = EXTRA_REG_RSP_1; |
| 1991 | |
| 1992 | if (idx == EXTRA_REG_RSP_1) |
| 1993 | alt_idx = EXTRA_REG_RSP_0; |
| 1994 | |
| 1995 | if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask) |
| 1996 | return idx; |
| 1997 | |
| 1998 | return alt_idx; |
| 1999 | } |
| 2000 | |
| 2001 | static void intel_fixup_er(struct perf_event *event, int idx) |
| 2002 | { |
| 2003 | event->hw.extra_reg.idx = idx; |
| 2004 | |
| 2005 | if (idx == EXTRA_REG_RSP_0) { |
| 2006 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; |
| 2007 | event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event; |
| 2008 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; |
| 2009 | } else if (idx == EXTRA_REG_RSP_1) { |
| 2010 | event->hw.config &= ~INTEL_ARCH_EVENT_MASK; |
| 2011 | event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event; |
| 2012 | event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; |
| 2013 | } |
| 2014 | } |
| 2015 | |
| 2016 | /* |
| 2017 | * manage allocation of shared extra msr for certain events |
| 2018 | * |
| 2019 | * sharing can be: |
| 2020 | * per-cpu: to be shared between the various events on a single PMU |
| 2021 | * per-core: per-cpu + shared by HT threads |
| 2022 | */ |
| 2023 | static struct event_constraint * |
| 2024 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
| 2025 | struct perf_event *event, |
| 2026 | struct hw_perf_event_extra *reg) |
| 2027 | { |
| 2028 | struct event_constraint *c = &emptyconstraint; |
| 2029 | struct er_account *era; |
| 2030 | unsigned long flags; |
| 2031 | int idx = reg->idx; |
| 2032 | |
| 2033 | /* |
| 2034 | * reg->alloc can be set due to existing state, so for fake cpuc we |
| 2035 | * need to ignore this, otherwise we might fail to allocate proper fake |
| 2036 | * state for this extra reg constraint. Also see the comment below. |
| 2037 | */ |
| 2038 | if (reg->alloc && !cpuc->is_fake) |
| 2039 | return NULL; /* call x86_get_event_constraint() */ |
| 2040 | |
| 2041 | again: |
| 2042 | era = &cpuc->shared_regs->regs[idx]; |
| 2043 | /* |
| 2044 | * we use spin_lock_irqsave() to avoid lockdep issues when |
| 2045 | * passing a fake cpuc |
| 2046 | */ |
| 2047 | raw_spin_lock_irqsave(&era->lock, flags); |
| 2048 | |
| 2049 | if (!atomic_read(&era->ref) || era->config == reg->config) { |
| 2050 | |
| 2051 | /* |
| 2052 | * If its a fake cpuc -- as per validate_{group,event}() we |
| 2053 | * shouldn't touch event state and we can avoid doing so |
| 2054 | * since both will only call get_event_constraints() once |
| 2055 | * on each event, this avoids the need for reg->alloc. |
| 2056 | * |
| 2057 | * Not doing the ER fixup will only result in era->reg being |
| 2058 | * wrong, but since we won't actually try and program hardware |
| 2059 | * this isn't a problem either. |
| 2060 | */ |
| 2061 | if (!cpuc->is_fake) { |
| 2062 | if (idx != reg->idx) |
| 2063 | intel_fixup_er(event, idx); |
| 2064 | |
| 2065 | /* |
| 2066 | * x86_schedule_events() can call get_event_constraints() |
| 2067 | * multiple times on events in the case of incremental |
| 2068 | * scheduling(). reg->alloc ensures we only do the ER |
| 2069 | * allocation once. |
| 2070 | */ |
| 2071 | reg->alloc = 1; |
| 2072 | } |
| 2073 | |
| 2074 | /* lock in msr value */ |
| 2075 | era->config = reg->config; |
| 2076 | era->reg = reg->reg; |
| 2077 | |
| 2078 | /* one more user */ |
| 2079 | atomic_inc(&era->ref); |
| 2080 | |
| 2081 | /* |
| 2082 | * need to call x86_get_event_constraint() |
| 2083 | * to check if associated event has constraints |
| 2084 | */ |
| 2085 | c = NULL; |
| 2086 | } else { |
| 2087 | idx = intel_alt_er(idx, reg->config); |
| 2088 | if (idx != reg->idx) { |
| 2089 | raw_spin_unlock_irqrestore(&era->lock, flags); |
| 2090 | goto again; |
| 2091 | } |
| 2092 | } |
| 2093 | raw_spin_unlock_irqrestore(&era->lock, flags); |
| 2094 | |
| 2095 | return c; |
| 2096 | } |
| 2097 | |
| 2098 | static void |
| 2099 | __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, |
| 2100 | struct hw_perf_event_extra *reg) |
| 2101 | { |
| 2102 | struct er_account *era; |
| 2103 | |
| 2104 | /* |
| 2105 | * Only put constraint if extra reg was actually allocated. Also takes |
| 2106 | * care of event which do not use an extra shared reg. |
| 2107 | * |
| 2108 | * Also, if this is a fake cpuc we shouldn't touch any event state |
| 2109 | * (reg->alloc) and we don't care about leaving inconsistent cpuc state |
| 2110 | * either since it'll be thrown out. |
| 2111 | */ |
| 2112 | if (!reg->alloc || cpuc->is_fake) |
| 2113 | return; |
| 2114 | |
| 2115 | era = &cpuc->shared_regs->regs[reg->idx]; |
| 2116 | |
| 2117 | /* one fewer user */ |
| 2118 | atomic_dec(&era->ref); |
| 2119 | |
| 2120 | /* allocate again next time */ |
| 2121 | reg->alloc = 0; |
| 2122 | } |
| 2123 | |
| 2124 | static struct event_constraint * |
| 2125 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, |
| 2126 | struct perf_event *event) |
| 2127 | { |
| 2128 | struct event_constraint *c = NULL, *d; |
| 2129 | struct hw_perf_event_extra *xreg, *breg; |
| 2130 | |
| 2131 | xreg = &event->hw.extra_reg; |
| 2132 | if (xreg->idx != EXTRA_REG_NONE) { |
| 2133 | c = __intel_shared_reg_get_constraints(cpuc, event, xreg); |
| 2134 | if (c == &emptyconstraint) |
| 2135 | return c; |
| 2136 | } |
| 2137 | breg = &event->hw.branch_reg; |
| 2138 | if (breg->idx != EXTRA_REG_NONE) { |
| 2139 | d = __intel_shared_reg_get_constraints(cpuc, event, breg); |
| 2140 | if (d == &emptyconstraint) { |
| 2141 | __intel_shared_reg_put_constraints(cpuc, xreg); |
| 2142 | c = d; |
| 2143 | } |
| 2144 | } |
| 2145 | return c; |
| 2146 | } |
| 2147 | |
| 2148 | struct event_constraint * |
| 2149 | x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
| 2150 | struct perf_event *event) |
| 2151 | { |
| 2152 | struct event_constraint *c; |
| 2153 | |
| 2154 | if (x86_pmu.event_constraints) { |
| 2155 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
| 2156 | if ((event->hw.config & c->cmask) == c->code) { |
| 2157 | event->hw.flags |= c->flags; |
| 2158 | return c; |
| 2159 | } |
| 2160 | } |
| 2161 | } |
| 2162 | |
| 2163 | return &unconstrained; |
| 2164 | } |
| 2165 | |
| 2166 | static struct event_constraint * |
| 2167 | __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
| 2168 | struct perf_event *event) |
| 2169 | { |
| 2170 | struct event_constraint *c; |
| 2171 | |
| 2172 | c = intel_bts_constraints(event); |
| 2173 | if (c) |
| 2174 | return c; |
| 2175 | |
| 2176 | c = intel_shared_regs_constraints(cpuc, event); |
| 2177 | if (c) |
| 2178 | return c; |
| 2179 | |
| 2180 | c = intel_pebs_constraints(event); |
| 2181 | if (c) |
| 2182 | return c; |
| 2183 | |
| 2184 | return x86_get_event_constraints(cpuc, idx, event); |
| 2185 | } |
| 2186 | |
| 2187 | static void |
| 2188 | intel_start_scheduling(struct cpu_hw_events *cpuc) |
| 2189 | { |
| 2190 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; |
| 2191 | struct intel_excl_states *xl; |
| 2192 | int tid = cpuc->excl_thread_id; |
| 2193 | |
| 2194 | /* |
| 2195 | * nothing needed if in group validation mode |
| 2196 | */ |
| 2197 | if (cpuc->is_fake || !is_ht_workaround_enabled()) |
| 2198 | return; |
| 2199 | |
| 2200 | /* |
| 2201 | * no exclusion needed |
| 2202 | */ |
| 2203 | if (WARN_ON_ONCE(!excl_cntrs)) |
| 2204 | return; |
| 2205 | |
| 2206 | xl = &excl_cntrs->states[tid]; |
| 2207 | |
| 2208 | xl->sched_started = true; |
| 2209 | /* |
| 2210 | * lock shared state until we are done scheduling |
| 2211 | * in stop_event_scheduling() |
| 2212 | * makes scheduling appear as a transaction |
| 2213 | */ |
| 2214 | raw_spin_lock(&excl_cntrs->lock); |
| 2215 | } |
| 2216 | |
| 2217 | static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr) |
| 2218 | { |
| 2219 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; |
| 2220 | struct event_constraint *c = cpuc->event_constraint[idx]; |
| 2221 | struct intel_excl_states *xl; |
| 2222 | int tid = cpuc->excl_thread_id; |
| 2223 | |
| 2224 | if (cpuc->is_fake || !is_ht_workaround_enabled()) |
| 2225 | return; |
| 2226 | |
| 2227 | if (WARN_ON_ONCE(!excl_cntrs)) |
| 2228 | return; |
| 2229 | |
| 2230 | if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) |
| 2231 | return; |
| 2232 | |
| 2233 | xl = &excl_cntrs->states[tid]; |
| 2234 | |
| 2235 | lockdep_assert_held(&excl_cntrs->lock); |
| 2236 | |
| 2237 | if (c->flags & PERF_X86_EVENT_EXCL) |
| 2238 | xl->state[cntr] = INTEL_EXCL_EXCLUSIVE; |
| 2239 | else |
| 2240 | xl->state[cntr] = INTEL_EXCL_SHARED; |
| 2241 | } |
| 2242 | |
| 2243 | static void |
| 2244 | intel_stop_scheduling(struct cpu_hw_events *cpuc) |
| 2245 | { |
| 2246 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; |
| 2247 | struct intel_excl_states *xl; |
| 2248 | int tid = cpuc->excl_thread_id; |
| 2249 | |
| 2250 | /* |
| 2251 | * nothing needed if in group validation mode |
| 2252 | */ |
| 2253 | if (cpuc->is_fake || !is_ht_workaround_enabled()) |
| 2254 | return; |
| 2255 | /* |
| 2256 | * no exclusion needed |
| 2257 | */ |
| 2258 | if (WARN_ON_ONCE(!excl_cntrs)) |
| 2259 | return; |
| 2260 | |
| 2261 | xl = &excl_cntrs->states[tid]; |
| 2262 | |
| 2263 | xl->sched_started = false; |
| 2264 | /* |
| 2265 | * release shared state lock (acquired in intel_start_scheduling()) |
| 2266 | */ |
| 2267 | raw_spin_unlock(&excl_cntrs->lock); |
| 2268 | } |
| 2269 | |
| 2270 | static struct event_constraint * |
| 2271 | intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, |
| 2272 | int idx, struct event_constraint *c) |
| 2273 | { |
| 2274 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; |
| 2275 | struct intel_excl_states *xlo; |
| 2276 | int tid = cpuc->excl_thread_id; |
| 2277 | int is_excl, i; |
| 2278 | |
| 2279 | /* |
| 2280 | * validating a group does not require |
| 2281 | * enforcing cross-thread exclusion |
| 2282 | */ |
| 2283 | if (cpuc->is_fake || !is_ht_workaround_enabled()) |
| 2284 | return c; |
| 2285 | |
| 2286 | /* |
| 2287 | * no exclusion needed |
| 2288 | */ |
| 2289 | if (WARN_ON_ONCE(!excl_cntrs)) |
| 2290 | return c; |
| 2291 | |
| 2292 | /* |
| 2293 | * because we modify the constraint, we need |
| 2294 | * to make a copy. Static constraints come |
| 2295 | * from static const tables. |
| 2296 | * |
| 2297 | * only needed when constraint has not yet |
| 2298 | * been cloned (marked dynamic) |
| 2299 | */ |
| 2300 | if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) { |
| 2301 | struct event_constraint *cx; |
| 2302 | |
| 2303 | /* |
| 2304 | * grab pre-allocated constraint entry |
| 2305 | */ |
| 2306 | cx = &cpuc->constraint_list[idx]; |
| 2307 | |
| 2308 | /* |
| 2309 | * initialize dynamic constraint |
| 2310 | * with static constraint |
| 2311 | */ |
| 2312 | *cx = *c; |
| 2313 | |
| 2314 | /* |
| 2315 | * mark constraint as dynamic, so we |
| 2316 | * can free it later on |
| 2317 | */ |
| 2318 | cx->flags |= PERF_X86_EVENT_DYNAMIC; |
| 2319 | c = cx; |
| 2320 | } |
| 2321 | |
| 2322 | /* |
| 2323 | * From here on, the constraint is dynamic. |
| 2324 | * Either it was just allocated above, or it |
| 2325 | * was allocated during a earlier invocation |
| 2326 | * of this function |
| 2327 | */ |
| 2328 | |
| 2329 | /* |
| 2330 | * state of sibling HT |
| 2331 | */ |
| 2332 | xlo = &excl_cntrs->states[tid ^ 1]; |
| 2333 | |
| 2334 | /* |
| 2335 | * event requires exclusive counter access |
| 2336 | * across HT threads |
| 2337 | */ |
| 2338 | is_excl = c->flags & PERF_X86_EVENT_EXCL; |
| 2339 | if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { |
| 2340 | event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; |
| 2341 | if (!cpuc->n_excl++) |
| 2342 | WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); |
| 2343 | } |
| 2344 | |
| 2345 | /* |
| 2346 | * Modify static constraint with current dynamic |
| 2347 | * state of thread |
| 2348 | * |
| 2349 | * EXCLUSIVE: sibling counter measuring exclusive event |
| 2350 | * SHARED : sibling counter measuring non-exclusive event |
| 2351 | * UNUSED : sibling counter unused |
| 2352 | */ |
| 2353 | for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) { |
| 2354 | /* |
| 2355 | * exclusive event in sibling counter |
| 2356 | * our corresponding counter cannot be used |
| 2357 | * regardless of our event |
| 2358 | */ |
| 2359 | if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) |
| 2360 | __clear_bit(i, c->idxmsk); |
| 2361 | /* |
| 2362 | * if measuring an exclusive event, sibling |
| 2363 | * measuring non-exclusive, then counter cannot |
| 2364 | * be used |
| 2365 | */ |
| 2366 | if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) |
| 2367 | __clear_bit(i, c->idxmsk); |
| 2368 | } |
| 2369 | |
| 2370 | /* |
| 2371 | * recompute actual bit weight for scheduling algorithm |
| 2372 | */ |
| 2373 | c->weight = hweight64(c->idxmsk64); |
| 2374 | |
| 2375 | /* |
| 2376 | * if we return an empty mask, then switch |
| 2377 | * back to static empty constraint to avoid |
| 2378 | * the cost of freeing later on |
| 2379 | */ |
| 2380 | if (c->weight == 0) |
| 2381 | c = &emptyconstraint; |
| 2382 | |
| 2383 | return c; |
| 2384 | } |
| 2385 | |
| 2386 | static struct event_constraint * |
| 2387 | intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
| 2388 | struct perf_event *event) |
| 2389 | { |
| 2390 | struct event_constraint *c1 = NULL; |
| 2391 | struct event_constraint *c2; |
| 2392 | |
| 2393 | if (idx >= 0) /* fake does < 0 */ |
| 2394 | c1 = cpuc->event_constraint[idx]; |
| 2395 | |
| 2396 | /* |
| 2397 | * first time only |
| 2398 | * - static constraint: no change across incremental scheduling calls |
| 2399 | * - dynamic constraint: handled by intel_get_excl_constraints() |
| 2400 | */ |
| 2401 | c2 = __intel_get_event_constraints(cpuc, idx, event); |
| 2402 | if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) { |
| 2403 | bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX); |
| 2404 | c1->weight = c2->weight; |
| 2405 | c2 = c1; |
| 2406 | } |
| 2407 | |
| 2408 | if (cpuc->excl_cntrs) |
| 2409 | return intel_get_excl_constraints(cpuc, event, idx, c2); |
| 2410 | |
| 2411 | return c2; |
| 2412 | } |
| 2413 | |
| 2414 | static void intel_put_excl_constraints(struct cpu_hw_events *cpuc, |
| 2415 | struct perf_event *event) |
| 2416 | { |
| 2417 | struct hw_perf_event *hwc = &event->hw; |
| 2418 | struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; |
| 2419 | int tid = cpuc->excl_thread_id; |
| 2420 | struct intel_excl_states *xl; |
| 2421 | |
| 2422 | /* |
| 2423 | * nothing needed if in group validation mode |
| 2424 | */ |
| 2425 | if (cpuc->is_fake) |
| 2426 | return; |
| 2427 | |
| 2428 | if (WARN_ON_ONCE(!excl_cntrs)) |
| 2429 | return; |
| 2430 | |
| 2431 | if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { |
| 2432 | hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; |
| 2433 | if (!--cpuc->n_excl) |
| 2434 | WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); |
| 2435 | } |
| 2436 | |
| 2437 | /* |
| 2438 | * If event was actually assigned, then mark the counter state as |
| 2439 | * unused now. |
| 2440 | */ |
| 2441 | if (hwc->idx >= 0) { |
| 2442 | xl = &excl_cntrs->states[tid]; |
| 2443 | |
| 2444 | /* |
| 2445 | * put_constraint may be called from x86_schedule_events() |
| 2446 | * which already has the lock held so here make locking |
| 2447 | * conditional. |
| 2448 | */ |
| 2449 | if (!xl->sched_started) |
| 2450 | raw_spin_lock(&excl_cntrs->lock); |
| 2451 | |
| 2452 | xl->state[hwc->idx] = INTEL_EXCL_UNUSED; |
| 2453 | |
| 2454 | if (!xl->sched_started) |
| 2455 | raw_spin_unlock(&excl_cntrs->lock); |
| 2456 | } |
| 2457 | } |
| 2458 | |
| 2459 | static void |
| 2460 | intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, |
| 2461 | struct perf_event *event) |
| 2462 | { |
| 2463 | struct hw_perf_event_extra *reg; |
| 2464 | |
| 2465 | reg = &event->hw.extra_reg; |
| 2466 | if (reg->idx != EXTRA_REG_NONE) |
| 2467 | __intel_shared_reg_put_constraints(cpuc, reg); |
| 2468 | |
| 2469 | reg = &event->hw.branch_reg; |
| 2470 | if (reg->idx != EXTRA_REG_NONE) |
| 2471 | __intel_shared_reg_put_constraints(cpuc, reg); |
| 2472 | } |
| 2473 | |
| 2474 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
| 2475 | struct perf_event *event) |
| 2476 | { |
| 2477 | intel_put_shared_regs_event_constraints(cpuc, event); |
| 2478 | |
| 2479 | /* |
| 2480 | * is PMU has exclusive counter restrictions, then |
| 2481 | * all events are subject to and must call the |
| 2482 | * put_excl_constraints() routine |
| 2483 | */ |
| 2484 | if (cpuc->excl_cntrs) |
| 2485 | intel_put_excl_constraints(cpuc, event); |
| 2486 | } |
| 2487 | |
| 2488 | static void intel_pebs_aliases_core2(struct perf_event *event) |
| 2489 | { |
| 2490 | if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { |
| 2491 | /* |
| 2492 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P |
| 2493 | * (0x003c) so that we can use it with PEBS. |
| 2494 | * |
| 2495 | * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't |
| 2496 | * PEBS capable. However we can use INST_RETIRED.ANY_P |
| 2497 | * (0x00c0), which is a PEBS capable event, to get the same |
| 2498 | * count. |
| 2499 | * |
| 2500 | * INST_RETIRED.ANY_P counts the number of cycles that retires |
| 2501 | * CNTMASK instructions. By setting CNTMASK to a value (16) |
| 2502 | * larger than the maximum number of instructions that can be |
| 2503 | * retired per cycle (4) and then inverting the condition, we |
| 2504 | * count all cycles that retire 16 or less instructions, which |
| 2505 | * is every cycle. |
| 2506 | * |
| 2507 | * Thereby we gain a PEBS capable cycle counter. |
| 2508 | */ |
| 2509 | u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); |
| 2510 | |
| 2511 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); |
| 2512 | event->hw.config = alt_config; |
| 2513 | } |
| 2514 | } |
| 2515 | |
| 2516 | static void intel_pebs_aliases_snb(struct perf_event *event) |
| 2517 | { |
| 2518 | if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { |
| 2519 | /* |
| 2520 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P |
| 2521 | * (0x003c) so that we can use it with PEBS. |
| 2522 | * |
| 2523 | * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't |
| 2524 | * PEBS capable. However we can use UOPS_RETIRED.ALL |
| 2525 | * (0x01c2), which is a PEBS capable event, to get the same |
| 2526 | * count. |
| 2527 | * |
| 2528 | * UOPS_RETIRED.ALL counts the number of cycles that retires |
| 2529 | * CNTMASK micro-ops. By setting CNTMASK to a value (16) |
| 2530 | * larger than the maximum number of micro-ops that can be |
| 2531 | * retired per cycle (4) and then inverting the condition, we |
| 2532 | * count all cycles that retire 16 or less micro-ops, which |
| 2533 | * is every cycle. |
| 2534 | * |
| 2535 | * Thereby we gain a PEBS capable cycle counter. |
| 2536 | */ |
| 2537 | u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); |
| 2538 | |
| 2539 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); |
| 2540 | event->hw.config = alt_config; |
| 2541 | } |
| 2542 | } |
| 2543 | |
| 2544 | static void intel_pebs_aliases_precdist(struct perf_event *event) |
| 2545 | { |
| 2546 | if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { |
| 2547 | /* |
| 2548 | * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P |
| 2549 | * (0x003c) so that we can use it with PEBS. |
| 2550 | * |
| 2551 | * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't |
| 2552 | * PEBS capable. However we can use INST_RETIRED.PREC_DIST |
| 2553 | * (0x01c0), which is a PEBS capable event, to get the same |
| 2554 | * count. |
| 2555 | * |
| 2556 | * The PREC_DIST event has special support to minimize sample |
| 2557 | * shadowing effects. One drawback is that it can be |
| 2558 | * only programmed on counter 1, but that seems like an |
| 2559 | * acceptable trade off. |
| 2560 | */ |
| 2561 | u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16); |
| 2562 | |
| 2563 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); |
| 2564 | event->hw.config = alt_config; |
| 2565 | } |
| 2566 | } |
| 2567 | |
| 2568 | static void intel_pebs_aliases_ivb(struct perf_event *event) |
| 2569 | { |
| 2570 | if (event->attr.precise_ip < 3) |
| 2571 | return intel_pebs_aliases_snb(event); |
| 2572 | return intel_pebs_aliases_precdist(event); |
| 2573 | } |
| 2574 | |
| 2575 | static void intel_pebs_aliases_skl(struct perf_event *event) |
| 2576 | { |
| 2577 | if (event->attr.precise_ip < 3) |
| 2578 | return intel_pebs_aliases_core2(event); |
| 2579 | return intel_pebs_aliases_precdist(event); |
| 2580 | } |
| 2581 | |
| 2582 | static unsigned long intel_pmu_free_running_flags(struct perf_event *event) |
| 2583 | { |
| 2584 | unsigned long flags = x86_pmu.free_running_flags; |
| 2585 | |
| 2586 | if (event->attr.use_clockid) |
| 2587 | flags &= ~PERF_SAMPLE_TIME; |
| 2588 | return flags; |
| 2589 | } |
| 2590 | |
| 2591 | static int intel_pmu_hw_config(struct perf_event *event) |
| 2592 | { |
| 2593 | int ret = x86_pmu_hw_config(event); |
| 2594 | |
| 2595 | if (ret) |
| 2596 | return ret; |
| 2597 | |
| 2598 | if (event->attr.precise_ip) { |
| 2599 | if (!event->attr.freq) { |
| 2600 | event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; |
| 2601 | if (!(event->attr.sample_type & |
| 2602 | ~intel_pmu_free_running_flags(event))) |
| 2603 | event->hw.flags |= PERF_X86_EVENT_FREERUNNING; |
| 2604 | } |
| 2605 | if (x86_pmu.pebs_aliases) |
| 2606 | x86_pmu.pebs_aliases(event); |
| 2607 | } |
| 2608 | |
| 2609 | if (needs_branch_stack(event)) { |
| 2610 | ret = intel_pmu_setup_lbr_filter(event); |
| 2611 | if (ret) |
| 2612 | return ret; |
| 2613 | |
| 2614 | /* |
| 2615 | * BTS is set up earlier in this path, so don't account twice |
| 2616 | */ |
| 2617 | if (!intel_pmu_has_bts(event)) { |
| 2618 | /* disallow lbr if conflicting events are present */ |
| 2619 | if (x86_add_exclusive(x86_lbr_exclusive_lbr)) |
| 2620 | return -EBUSY; |
| 2621 | |
| 2622 | event->destroy = hw_perf_lbr_event_destroy; |
| 2623 | } |
| 2624 | } |
| 2625 | |
| 2626 | if (event->attr.type != PERF_TYPE_RAW) |
| 2627 | return 0; |
| 2628 | |
| 2629 | if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) |
| 2630 | return 0; |
| 2631 | |
| 2632 | if (x86_pmu.version < 3) |
| 2633 | return -EINVAL; |
| 2634 | |
| 2635 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) |
| 2636 | return -EACCES; |
| 2637 | |
| 2638 | event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; |
| 2639 | |
| 2640 | return 0; |
| 2641 | } |
| 2642 | |
| 2643 | struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr) |
| 2644 | { |
| 2645 | if (x86_pmu.guest_get_msrs) |
| 2646 | return x86_pmu.guest_get_msrs(nr); |
| 2647 | *nr = 0; |
| 2648 | return NULL; |
| 2649 | } |
| 2650 | EXPORT_SYMBOL_GPL(perf_guest_get_msrs); |
| 2651 | |
| 2652 | static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) |
| 2653 | { |
| 2654 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 2655 | struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; |
| 2656 | |
| 2657 | arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; |
| 2658 | arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; |
| 2659 | arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; |
| 2660 | /* |
| 2661 | * If PMU counter has PEBS enabled it is not enough to disable counter |
| 2662 | * on a guest entry since PEBS memory write can overshoot guest entry |
| 2663 | * and corrupt guest memory. Disabling PEBS solves the problem. |
| 2664 | */ |
| 2665 | arr[1].msr = MSR_IA32_PEBS_ENABLE; |
| 2666 | arr[1].host = cpuc->pebs_enabled; |
| 2667 | arr[1].guest = 0; |
| 2668 | |
| 2669 | *nr = 2; |
| 2670 | return arr; |
| 2671 | } |
| 2672 | |
| 2673 | static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr) |
| 2674 | { |
| 2675 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 2676 | struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs; |
| 2677 | int idx; |
| 2678 | |
| 2679 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 2680 | struct perf_event *event = cpuc->events[idx]; |
| 2681 | |
| 2682 | arr[idx].msr = x86_pmu_config_addr(idx); |
| 2683 | arr[idx].host = arr[idx].guest = 0; |
| 2684 | |
| 2685 | if (!test_bit(idx, cpuc->active_mask)) |
| 2686 | continue; |
| 2687 | |
| 2688 | arr[idx].host = arr[idx].guest = |
| 2689 | event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE; |
| 2690 | |
| 2691 | if (event->attr.exclude_host) |
| 2692 | arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
| 2693 | else if (event->attr.exclude_guest) |
| 2694 | arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
| 2695 | } |
| 2696 | |
| 2697 | *nr = x86_pmu.num_counters; |
| 2698 | return arr; |
| 2699 | } |
| 2700 | |
| 2701 | static void core_pmu_enable_event(struct perf_event *event) |
| 2702 | { |
| 2703 | if (!event->attr.exclude_host) |
| 2704 | x86_pmu_enable_event(event); |
| 2705 | } |
| 2706 | |
| 2707 | static void core_pmu_enable_all(int added) |
| 2708 | { |
| 2709 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
| 2710 | int idx; |
| 2711 | |
| 2712 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
| 2713 | struct hw_perf_event *hwc = &cpuc->events[idx]->hw; |
| 2714 | |
| 2715 | if (!test_bit(idx, cpuc->active_mask) || |
| 2716 | cpuc->events[idx]->attr.exclude_host) |
| 2717 | continue; |
| 2718 | |
| 2719 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); |
| 2720 | } |
| 2721 | } |
| 2722 | |
| 2723 | static int hsw_hw_config(struct perf_event *event) |
| 2724 | { |
| 2725 | int ret = intel_pmu_hw_config(event); |
| 2726 | |
| 2727 | if (ret) |
| 2728 | return ret; |
| 2729 | if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE)) |
| 2730 | return 0; |
| 2731 | event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); |
| 2732 | |
| 2733 | /* |
| 2734 | * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with |
| 2735 | * PEBS or in ANY thread mode. Since the results are non-sensical forbid |
| 2736 | * this combination. |
| 2737 | */ |
| 2738 | if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) && |
| 2739 | ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) || |
| 2740 | event->attr.precise_ip > 0)) |
| 2741 | return -EOPNOTSUPP; |
| 2742 | |
| 2743 | if (event_is_checkpointed(event)) { |
| 2744 | /* |
| 2745 | * Sampling of checkpointed events can cause situations where |
| 2746 | * the CPU constantly aborts because of a overflow, which is |
| 2747 | * then checkpointed back and ignored. Forbid checkpointing |
| 2748 | * for sampling. |
| 2749 | * |
| 2750 | * But still allow a long sampling period, so that perf stat |
| 2751 | * from KVM works. |
| 2752 | */ |
| 2753 | if (event->attr.sample_period > 0 && |
| 2754 | event->attr.sample_period < 0x7fffffff) |
| 2755 | return -EOPNOTSUPP; |
| 2756 | } |
| 2757 | return 0; |
| 2758 | } |
| 2759 | |
| 2760 | static struct event_constraint counter2_constraint = |
| 2761 | EVENT_CONSTRAINT(0, 0x4, 0); |
| 2762 | |
| 2763 | static struct event_constraint * |
| 2764 | hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx, |
| 2765 | struct perf_event *event) |
| 2766 | { |
| 2767 | struct event_constraint *c; |
| 2768 | |
| 2769 | c = intel_get_event_constraints(cpuc, idx, event); |
| 2770 | |
| 2771 | /* Handle special quirk on in_tx_checkpointed only in counter 2 */ |
| 2772 | if (event->hw.config & HSW_IN_TX_CHECKPOINTED) { |
| 2773 | if (c->idxmsk64 & (1U << 2)) |
| 2774 | return &counter2_constraint; |
| 2775 | return &emptyconstraint; |
| 2776 | } |
| 2777 | |
| 2778 | return c; |
| 2779 | } |
| 2780 | |
| 2781 | /* |
| 2782 | * Broadwell: |
| 2783 | * |
| 2784 | * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared |
| 2785 | * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine |
| 2786 | * the two to enforce a minimum period of 128 (the smallest value that has bits |
| 2787 | * 0-5 cleared and >= 100). |
| 2788 | * |
| 2789 | * Because of how the code in x86_perf_event_set_period() works, the truncation |
| 2790 | * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period |
| 2791 | * to make up for the 'lost' events due to carrying the 'error' in period_left. |
| 2792 | * |
| 2793 | * Therefore the effective (average) period matches the requested period, |
| 2794 | * despite coarser hardware granularity. |
| 2795 | */ |
| 2796 | static unsigned bdw_limit_period(struct perf_event *event, unsigned left) |
| 2797 | { |
| 2798 | if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == |
| 2799 | X86_CONFIG(.event=0xc0, .umask=0x01)) { |
| 2800 | if (left < 128) |
| 2801 | left = 128; |
| 2802 | left &= ~0x3fu; |
| 2803 | } |
| 2804 | return left; |
| 2805 | } |
| 2806 | |
| 2807 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
| 2808 | PMU_FORMAT_ATTR(umask, "config:8-15" ); |
| 2809 | PMU_FORMAT_ATTR(edge, "config:18" ); |
| 2810 | PMU_FORMAT_ATTR(pc, "config:19" ); |
| 2811 | PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ |
| 2812 | PMU_FORMAT_ATTR(inv, "config:23" ); |
| 2813 | PMU_FORMAT_ATTR(cmask, "config:24-31" ); |
| 2814 | PMU_FORMAT_ATTR(in_tx, "config:32"); |
| 2815 | PMU_FORMAT_ATTR(in_tx_cp, "config:33"); |
| 2816 | |
| 2817 | static struct attribute *intel_arch_formats_attr[] = { |
| 2818 | &format_attr_event.attr, |
| 2819 | &format_attr_umask.attr, |
| 2820 | &format_attr_edge.attr, |
| 2821 | &format_attr_pc.attr, |
| 2822 | &format_attr_inv.attr, |
| 2823 | &format_attr_cmask.attr, |
| 2824 | NULL, |
| 2825 | }; |
| 2826 | |
| 2827 | ssize_t intel_event_sysfs_show(char *page, u64 config) |
| 2828 | { |
| 2829 | u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); |
| 2830 | |
| 2831 | return x86_event_sysfs_show(page, config, event); |
| 2832 | } |
| 2833 | |
| 2834 | struct intel_shared_regs *allocate_shared_regs(int cpu) |
| 2835 | { |
| 2836 | struct intel_shared_regs *regs; |
| 2837 | int i; |
| 2838 | |
| 2839 | regs = kzalloc_node(sizeof(struct intel_shared_regs), |
| 2840 | GFP_KERNEL, cpu_to_node(cpu)); |
| 2841 | if (regs) { |
| 2842 | /* |
| 2843 | * initialize the locks to keep lockdep happy |
| 2844 | */ |
| 2845 | for (i = 0; i < EXTRA_REG_MAX; i++) |
| 2846 | raw_spin_lock_init(®s->regs[i].lock); |
| 2847 | |
| 2848 | regs->core_id = -1; |
| 2849 | } |
| 2850 | return regs; |
| 2851 | } |
| 2852 | |
| 2853 | static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu) |
| 2854 | { |
| 2855 | struct intel_excl_cntrs *c; |
| 2856 | |
| 2857 | c = kzalloc_node(sizeof(struct intel_excl_cntrs), |
| 2858 | GFP_KERNEL, cpu_to_node(cpu)); |
| 2859 | if (c) { |
| 2860 | raw_spin_lock_init(&c->lock); |
| 2861 | c->core_id = -1; |
| 2862 | } |
| 2863 | return c; |
| 2864 | } |
| 2865 | |
| 2866 | static int intel_pmu_cpu_prepare(int cpu) |
| 2867 | { |
| 2868 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 2869 | |
| 2870 | if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { |
| 2871 | cpuc->shared_regs = allocate_shared_regs(cpu); |
| 2872 | if (!cpuc->shared_regs) |
| 2873 | goto err; |
| 2874 | } |
| 2875 | |
| 2876 | if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { |
| 2877 | size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); |
| 2878 | |
| 2879 | cpuc->constraint_list = kzalloc(sz, GFP_KERNEL); |
| 2880 | if (!cpuc->constraint_list) |
| 2881 | goto err_shared_regs; |
| 2882 | |
| 2883 | cpuc->excl_cntrs = allocate_excl_cntrs(cpu); |
| 2884 | if (!cpuc->excl_cntrs) |
| 2885 | goto err_constraint_list; |
| 2886 | |
| 2887 | cpuc->excl_thread_id = 0; |
| 2888 | } |
| 2889 | |
| 2890 | return NOTIFY_OK; |
| 2891 | |
| 2892 | err_constraint_list: |
| 2893 | kfree(cpuc->constraint_list); |
| 2894 | cpuc->constraint_list = NULL; |
| 2895 | |
| 2896 | err_shared_regs: |
| 2897 | kfree(cpuc->shared_regs); |
| 2898 | cpuc->shared_regs = NULL; |
| 2899 | |
| 2900 | err: |
| 2901 | return NOTIFY_BAD; |
| 2902 | } |
| 2903 | |
| 2904 | static void intel_pmu_cpu_starting(int cpu) |
| 2905 | { |
| 2906 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 2907 | int core_id = topology_core_id(cpu); |
| 2908 | int i; |
| 2909 | |
| 2910 | init_debug_store_on_cpu(cpu); |
| 2911 | /* |
| 2912 | * Deal with CPUs that don't clear their LBRs on power-up. |
| 2913 | */ |
| 2914 | intel_pmu_lbr_reset(); |
| 2915 | |
| 2916 | cpuc->lbr_sel = NULL; |
| 2917 | |
| 2918 | if (!cpuc->shared_regs) |
| 2919 | return; |
| 2920 | |
| 2921 | if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) { |
| 2922 | for_each_cpu(i, topology_sibling_cpumask(cpu)) { |
| 2923 | struct intel_shared_regs *pc; |
| 2924 | |
| 2925 | pc = per_cpu(cpu_hw_events, i).shared_regs; |
| 2926 | if (pc && pc->core_id == core_id) { |
| 2927 | cpuc->kfree_on_online[0] = cpuc->shared_regs; |
| 2928 | cpuc->shared_regs = pc; |
| 2929 | break; |
| 2930 | } |
| 2931 | } |
| 2932 | cpuc->shared_regs->core_id = core_id; |
| 2933 | cpuc->shared_regs->refcnt++; |
| 2934 | } |
| 2935 | |
| 2936 | if (x86_pmu.lbr_sel_map) |
| 2937 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; |
| 2938 | |
| 2939 | if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { |
| 2940 | for_each_cpu(i, topology_sibling_cpumask(cpu)) { |
| 2941 | struct intel_excl_cntrs *c; |
| 2942 | |
| 2943 | c = per_cpu(cpu_hw_events, i).excl_cntrs; |
| 2944 | if (c && c->core_id == core_id) { |
| 2945 | cpuc->kfree_on_online[1] = cpuc->excl_cntrs; |
| 2946 | cpuc->excl_cntrs = c; |
| 2947 | cpuc->excl_thread_id = 1; |
| 2948 | break; |
| 2949 | } |
| 2950 | } |
| 2951 | cpuc->excl_cntrs->core_id = core_id; |
| 2952 | cpuc->excl_cntrs->refcnt++; |
| 2953 | } |
| 2954 | } |
| 2955 | |
| 2956 | static void free_excl_cntrs(int cpu) |
| 2957 | { |
| 2958 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 2959 | struct intel_excl_cntrs *c; |
| 2960 | |
| 2961 | c = cpuc->excl_cntrs; |
| 2962 | if (c) { |
| 2963 | if (c->core_id == -1 || --c->refcnt == 0) |
| 2964 | kfree(c); |
| 2965 | cpuc->excl_cntrs = NULL; |
| 2966 | kfree(cpuc->constraint_list); |
| 2967 | cpuc->constraint_list = NULL; |
| 2968 | } |
| 2969 | } |
| 2970 | |
| 2971 | static void intel_pmu_cpu_dying(int cpu) |
| 2972 | { |
| 2973 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
| 2974 | struct intel_shared_regs *pc; |
| 2975 | |
| 2976 | pc = cpuc->shared_regs; |
| 2977 | if (pc) { |
| 2978 | if (pc->core_id == -1 || --pc->refcnt == 0) |
| 2979 | kfree(pc); |
| 2980 | cpuc->shared_regs = NULL; |
| 2981 | } |
| 2982 | |
| 2983 | free_excl_cntrs(cpu); |
| 2984 | |
| 2985 | fini_debug_store_on_cpu(cpu); |
| 2986 | } |
| 2987 | |
| 2988 | static void intel_pmu_sched_task(struct perf_event_context *ctx, |
| 2989 | bool sched_in) |
| 2990 | { |
| 2991 | if (x86_pmu.pebs_active) |
| 2992 | intel_pmu_pebs_sched_task(ctx, sched_in); |
| 2993 | if (x86_pmu.lbr_nr) |
| 2994 | intel_pmu_lbr_sched_task(ctx, sched_in); |
| 2995 | } |
| 2996 | |
| 2997 | PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); |
| 2998 | |
| 2999 | PMU_FORMAT_ATTR(ldlat, "config1:0-15"); |
| 3000 | |
| 3001 | PMU_FORMAT_ATTR(frontend, "config1:0-23"); |
| 3002 | |
| 3003 | static struct attribute *intel_arch3_formats_attr[] = { |
| 3004 | &format_attr_event.attr, |
| 3005 | &format_attr_umask.attr, |
| 3006 | &format_attr_edge.attr, |
| 3007 | &format_attr_pc.attr, |
| 3008 | &format_attr_any.attr, |
| 3009 | &format_attr_inv.attr, |
| 3010 | &format_attr_cmask.attr, |
| 3011 | &format_attr_in_tx.attr, |
| 3012 | &format_attr_in_tx_cp.attr, |
| 3013 | |
| 3014 | &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ |
| 3015 | &format_attr_ldlat.attr, /* PEBS load latency */ |
| 3016 | NULL, |
| 3017 | }; |
| 3018 | |
| 3019 | static struct attribute *skl_format_attr[] = { |
| 3020 | &format_attr_frontend.attr, |
| 3021 | NULL, |
| 3022 | }; |
| 3023 | |
| 3024 | static __initconst const struct x86_pmu core_pmu = { |
| 3025 | .name = "core", |
| 3026 | .handle_irq = x86_pmu_handle_irq, |
| 3027 | .disable_all = x86_pmu_disable_all, |
| 3028 | .enable_all = core_pmu_enable_all, |
| 3029 | .enable = core_pmu_enable_event, |
| 3030 | .disable = x86_pmu_disable_event, |
| 3031 | .hw_config = x86_pmu_hw_config, |
| 3032 | .schedule_events = x86_schedule_events, |
| 3033 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
| 3034 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
| 3035 | .event_map = intel_pmu_event_map, |
| 3036 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
| 3037 | .apic = 1, |
| 3038 | .free_running_flags = PEBS_FREERUNNING_FLAGS, |
| 3039 | |
| 3040 | /* |
| 3041 | * Intel PMCs cannot be accessed sanely above 32-bit width, |
| 3042 | * so we install an artificial 1<<31 period regardless of |
| 3043 | * the generic event period: |
| 3044 | */ |
| 3045 | .max_period = (1ULL<<31) - 1, |
| 3046 | .get_event_constraints = intel_get_event_constraints, |
| 3047 | .put_event_constraints = intel_put_event_constraints, |
| 3048 | .event_constraints = intel_core_event_constraints, |
| 3049 | .guest_get_msrs = core_guest_get_msrs, |
| 3050 | .format_attrs = intel_arch_formats_attr, |
| 3051 | .events_sysfs_show = intel_event_sysfs_show, |
| 3052 | |
| 3053 | /* |
| 3054 | * Virtual (or funny metal) CPU can define x86_pmu.extra_regs |
| 3055 | * together with PMU version 1 and thus be using core_pmu with |
| 3056 | * shared_regs. We need following callbacks here to allocate |
| 3057 | * it properly. |
| 3058 | */ |
| 3059 | .cpu_prepare = intel_pmu_cpu_prepare, |
| 3060 | .cpu_starting = intel_pmu_cpu_starting, |
| 3061 | .cpu_dying = intel_pmu_cpu_dying, |
| 3062 | }; |
| 3063 | |
| 3064 | static __initconst const struct x86_pmu intel_pmu = { |
| 3065 | .name = "Intel", |
| 3066 | .handle_irq = intel_pmu_handle_irq, |
| 3067 | .disable_all = intel_pmu_disable_all, |
| 3068 | .enable_all = intel_pmu_enable_all, |
| 3069 | .enable = intel_pmu_enable_event, |
| 3070 | .disable = intel_pmu_disable_event, |
| 3071 | .hw_config = intel_pmu_hw_config, |
| 3072 | .schedule_events = x86_schedule_events, |
| 3073 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
| 3074 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
| 3075 | .event_map = intel_pmu_event_map, |
| 3076 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
| 3077 | .apic = 1, |
| 3078 | .free_running_flags = PEBS_FREERUNNING_FLAGS, |
| 3079 | /* |
| 3080 | * Intel PMCs cannot be accessed sanely above 32 bit width, |
| 3081 | * so we install an artificial 1<<31 period regardless of |
| 3082 | * the generic event period: |
| 3083 | */ |
| 3084 | .max_period = (1ULL << 31) - 1, |
| 3085 | .get_event_constraints = intel_get_event_constraints, |
| 3086 | .put_event_constraints = intel_put_event_constraints, |
| 3087 | .pebs_aliases = intel_pebs_aliases_core2, |
| 3088 | |
| 3089 | .format_attrs = intel_arch3_formats_attr, |
| 3090 | .events_sysfs_show = intel_event_sysfs_show, |
| 3091 | |
| 3092 | .cpu_prepare = intel_pmu_cpu_prepare, |
| 3093 | .cpu_starting = intel_pmu_cpu_starting, |
| 3094 | .cpu_dying = intel_pmu_cpu_dying, |
| 3095 | .guest_get_msrs = intel_guest_get_msrs, |
| 3096 | .sched_task = intel_pmu_sched_task, |
| 3097 | }; |
| 3098 | |
| 3099 | static __init void intel_clovertown_quirk(void) |
| 3100 | { |
| 3101 | /* |
| 3102 | * PEBS is unreliable due to: |
| 3103 | * |
| 3104 | * AJ67 - PEBS may experience CPL leaks |
| 3105 | * AJ68 - PEBS PMI may be delayed by one event |
| 3106 | * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] |
| 3107 | * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS |
| 3108 | * |
| 3109 | * AJ67 could be worked around by restricting the OS/USR flags. |
| 3110 | * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. |
| 3111 | * |
| 3112 | * AJ106 could possibly be worked around by not allowing LBR |
| 3113 | * usage from PEBS, including the fixup. |
| 3114 | * AJ68 could possibly be worked around by always programming |
| 3115 | * a pebs_event_reset[0] value and coping with the lost events. |
| 3116 | * |
| 3117 | * But taken together it might just make sense to not enable PEBS on |
| 3118 | * these chips. |
| 3119 | */ |
| 3120 | pr_warn("PEBS disabled due to CPU errata\n"); |
| 3121 | x86_pmu.pebs = 0; |
| 3122 | x86_pmu.pebs_constraints = NULL; |
| 3123 | } |
| 3124 | |
| 3125 | static int intel_snb_pebs_broken(int cpu) |
| 3126 | { |
| 3127 | u32 rev = UINT_MAX; /* default to broken for unknown models */ |
| 3128 | |
| 3129 | switch (cpu_data(cpu).x86_model) { |
| 3130 | case 42: /* SNB */ |
| 3131 | rev = 0x28; |
| 3132 | break; |
| 3133 | |
| 3134 | case 45: /* SNB-EP */ |
| 3135 | switch (cpu_data(cpu).x86_mask) { |
| 3136 | case 6: rev = 0x618; break; |
| 3137 | case 7: rev = 0x70c; break; |
| 3138 | } |
| 3139 | } |
| 3140 | |
| 3141 | return (cpu_data(cpu).microcode < rev); |
| 3142 | } |
| 3143 | |
| 3144 | static void intel_snb_check_microcode(void) |
| 3145 | { |
| 3146 | int pebs_broken = 0; |
| 3147 | int cpu; |
| 3148 | |
| 3149 | get_online_cpus(); |
| 3150 | for_each_online_cpu(cpu) { |
| 3151 | if ((pebs_broken = intel_snb_pebs_broken(cpu))) |
| 3152 | break; |
| 3153 | } |
| 3154 | put_online_cpus(); |
| 3155 | |
| 3156 | if (pebs_broken == x86_pmu.pebs_broken) |
| 3157 | return; |
| 3158 | |
| 3159 | /* |
| 3160 | * Serialized by the microcode lock.. |
| 3161 | */ |
| 3162 | if (x86_pmu.pebs_broken) { |
| 3163 | pr_info("PEBS enabled due to microcode update\n"); |
| 3164 | x86_pmu.pebs_broken = 0; |
| 3165 | } else { |
| 3166 | pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); |
| 3167 | x86_pmu.pebs_broken = 1; |
| 3168 | } |
| 3169 | } |
| 3170 | |
| 3171 | /* |
| 3172 | * Under certain circumstances, access certain MSR may cause #GP. |
| 3173 | * The function tests if the input MSR can be safely accessed. |
| 3174 | */ |
| 3175 | static bool check_msr(unsigned long msr, u64 mask) |
| 3176 | { |
| 3177 | u64 val_old, val_new, val_tmp; |
| 3178 | |
| 3179 | /* |
| 3180 | * Read the current value, change it and read it back to see if it |
| 3181 | * matches, this is needed to detect certain hardware emulators |
| 3182 | * (qemu/kvm) that don't trap on the MSR access and always return 0s. |
| 3183 | */ |
| 3184 | if (rdmsrl_safe(msr, &val_old)) |
| 3185 | return false; |
| 3186 | |
| 3187 | /* |
| 3188 | * Only change the bits which can be updated by wrmsrl. |
| 3189 | */ |
| 3190 | val_tmp = val_old ^ mask; |
| 3191 | if (wrmsrl_safe(msr, val_tmp) || |
| 3192 | rdmsrl_safe(msr, &val_new)) |
| 3193 | return false; |
| 3194 | |
| 3195 | if (val_new != val_tmp) |
| 3196 | return false; |
| 3197 | |
| 3198 | /* Here it's sure that the MSR can be safely accessed. |
| 3199 | * Restore the old value and return. |
| 3200 | */ |
| 3201 | wrmsrl(msr, val_old); |
| 3202 | |
| 3203 | return true; |
| 3204 | } |
| 3205 | |
| 3206 | static __init void intel_sandybridge_quirk(void) |
| 3207 | { |
| 3208 | x86_pmu.check_microcode = intel_snb_check_microcode; |
| 3209 | intel_snb_check_microcode(); |
| 3210 | } |
| 3211 | |
| 3212 | static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { |
| 3213 | { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, |
| 3214 | { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, |
| 3215 | { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, |
| 3216 | { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, |
| 3217 | { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, |
| 3218 | { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, |
| 3219 | { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, |
| 3220 | }; |
| 3221 | |
| 3222 | static __init void intel_arch_events_quirk(void) |
| 3223 | { |
| 3224 | int bit; |
| 3225 | |
| 3226 | /* disable event that reported as not presend by cpuid */ |
| 3227 | for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { |
| 3228 | intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; |
| 3229 | pr_warn("CPUID marked event: \'%s\' unavailable\n", |
| 3230 | intel_arch_events_map[bit].name); |
| 3231 | } |
| 3232 | } |
| 3233 | |
| 3234 | static __init void intel_nehalem_quirk(void) |
| 3235 | { |
| 3236 | union cpuid10_ebx ebx; |
| 3237 | |
| 3238 | ebx.full = x86_pmu.events_maskl; |
| 3239 | if (ebx.split.no_branch_misses_retired) { |
| 3240 | /* |
| 3241 | * Erratum AAJ80 detected, we work it around by using |
| 3242 | * the BR_MISP_EXEC.ANY event. This will over-count |
| 3243 | * branch-misses, but it's still much better than the |
| 3244 | * architectural event which is often completely bogus: |
| 3245 | */ |
| 3246 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; |
| 3247 | ebx.split.no_branch_misses_retired = 0; |
| 3248 | x86_pmu.events_maskl = ebx.full; |
| 3249 | pr_info("CPU erratum AAJ80 worked around\n"); |
| 3250 | } |
| 3251 | } |
| 3252 | |
| 3253 | /* |
| 3254 | * enable software workaround for errata: |
| 3255 | * SNB: BJ122 |
| 3256 | * IVB: BV98 |
| 3257 | * HSW: HSD29 |
| 3258 | * |
| 3259 | * Only needed when HT is enabled. However detecting |
| 3260 | * if HT is enabled is difficult (model specific). So instead, |
| 3261 | * we enable the workaround in the early boot, and verify if |
| 3262 | * it is needed in a later initcall phase once we have valid |
| 3263 | * topology information to check if HT is actually enabled |
| 3264 | */ |
| 3265 | static __init void intel_ht_bug(void) |
| 3266 | { |
| 3267 | x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED; |
| 3268 | |
| 3269 | x86_pmu.start_scheduling = intel_start_scheduling; |
| 3270 | x86_pmu.commit_scheduling = intel_commit_scheduling; |
| 3271 | x86_pmu.stop_scheduling = intel_stop_scheduling; |
| 3272 | } |
| 3273 | |
| 3274 | EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); |
| 3275 | EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") |
| 3276 | |
| 3277 | /* Haswell special events */ |
| 3278 | EVENT_ATTR_STR(tx-start, tx_start, "event=0xc9,umask=0x1"); |
| 3279 | EVENT_ATTR_STR(tx-commit, tx_commit, "event=0xc9,umask=0x2"); |
| 3280 | EVENT_ATTR_STR(tx-abort, tx_abort, "event=0xc9,umask=0x4"); |
| 3281 | EVENT_ATTR_STR(tx-capacity, tx_capacity, "event=0x54,umask=0x2"); |
| 3282 | EVENT_ATTR_STR(tx-conflict, tx_conflict, "event=0x54,umask=0x1"); |
| 3283 | EVENT_ATTR_STR(el-start, el_start, "event=0xc8,umask=0x1"); |
| 3284 | EVENT_ATTR_STR(el-commit, el_commit, "event=0xc8,umask=0x2"); |
| 3285 | EVENT_ATTR_STR(el-abort, el_abort, "event=0xc8,umask=0x4"); |
| 3286 | EVENT_ATTR_STR(el-capacity, el_capacity, "event=0x54,umask=0x2"); |
| 3287 | EVENT_ATTR_STR(el-conflict, el_conflict, "event=0x54,umask=0x1"); |
| 3288 | EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); |
| 3289 | EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1"); |
| 3290 | |
| 3291 | static struct attribute *hsw_events_attrs[] = { |
| 3292 | EVENT_PTR(tx_start), |
| 3293 | EVENT_PTR(tx_commit), |
| 3294 | EVENT_PTR(tx_abort), |
| 3295 | EVENT_PTR(tx_capacity), |
| 3296 | EVENT_PTR(tx_conflict), |
| 3297 | EVENT_PTR(el_start), |
| 3298 | EVENT_PTR(el_commit), |
| 3299 | EVENT_PTR(el_abort), |
| 3300 | EVENT_PTR(el_capacity), |
| 3301 | EVENT_PTR(el_conflict), |
| 3302 | EVENT_PTR(cycles_t), |
| 3303 | EVENT_PTR(cycles_ct), |
| 3304 | EVENT_PTR(mem_ld_hsw), |
| 3305 | EVENT_PTR(mem_st_hsw), |
| 3306 | NULL |
| 3307 | }; |
| 3308 | |
| 3309 | __init int intel_pmu_init(void) |
| 3310 | { |
| 3311 | union cpuid10_edx edx; |
| 3312 | union cpuid10_eax eax; |
| 3313 | union cpuid10_ebx ebx; |
| 3314 | struct event_constraint *c; |
| 3315 | unsigned int unused; |
| 3316 | struct extra_reg *er; |
| 3317 | int version, i; |
| 3318 | |
| 3319 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
| 3320 | switch (boot_cpu_data.x86) { |
| 3321 | case 0x6: |
| 3322 | return p6_pmu_init(); |
| 3323 | case 0xb: |
| 3324 | return knc_pmu_init(); |
| 3325 | case 0xf: |
| 3326 | return p4_pmu_init(); |
| 3327 | } |
| 3328 | return -ENODEV; |
| 3329 | } |
| 3330 | |
| 3331 | /* |
| 3332 | * Check whether the Architectural PerfMon supports |
| 3333 | * Branch Misses Retired hw_event or not. |
| 3334 | */ |
| 3335 | cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); |
| 3336 | if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) |
| 3337 | return -ENODEV; |
| 3338 | |
| 3339 | version = eax.split.version_id; |
| 3340 | if (version < 2) |
| 3341 | x86_pmu = core_pmu; |
| 3342 | else |
| 3343 | x86_pmu = intel_pmu; |
| 3344 | |
| 3345 | x86_pmu.version = version; |
| 3346 | x86_pmu.num_counters = eax.split.num_counters; |
| 3347 | x86_pmu.cntval_bits = eax.split.bit_width; |
| 3348 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
| 3349 | |
| 3350 | x86_pmu.events_maskl = ebx.full; |
| 3351 | x86_pmu.events_mask_len = eax.split.mask_length; |
| 3352 | |
| 3353 | x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); |
| 3354 | |
| 3355 | /* |
| 3356 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
| 3357 | * assume at least 3 events: |
| 3358 | */ |
| 3359 | if (version > 1) |
| 3360 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
| 3361 | |
| 3362 | if (boot_cpu_has(X86_FEATURE_PDCM)) { |
| 3363 | u64 capabilities; |
| 3364 | |
| 3365 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); |
| 3366 | x86_pmu.intel_cap.capabilities = capabilities; |
| 3367 | } |
| 3368 | |
| 3369 | intel_ds_init(); |
| 3370 | |
| 3371 | x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ |
| 3372 | |
| 3373 | /* |
| 3374 | * Install the hw-cache-events table: |
| 3375 | */ |
| 3376 | switch (boot_cpu_data.x86_model) { |
| 3377 | case 14: /* 65nm Core "Yonah" */ |
| 3378 | pr_cont("Core events, "); |
| 3379 | break; |
| 3380 | |
| 3381 | case 15: /* 65nm Core2 "Merom" */ |
| 3382 | x86_add_quirk(intel_clovertown_quirk); |
| 3383 | case 22: /* 65nm Core2 "Merom-L" */ |
| 3384 | case 23: /* 45nm Core2 "Penryn" */ |
| 3385 | case 29: /* 45nm Core2 "Dunnington (MP) */ |
| 3386 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
| 3387 | sizeof(hw_cache_event_ids)); |
| 3388 | |
| 3389 | intel_pmu_lbr_init_core(); |
| 3390 | |
| 3391 | x86_pmu.event_constraints = intel_core2_event_constraints; |
| 3392 | x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; |
| 3393 | pr_cont("Core2 events, "); |
| 3394 | break; |
| 3395 | |
| 3396 | case 30: /* 45nm Nehalem */ |
| 3397 | case 26: /* 45nm Nehalem-EP */ |
| 3398 | case 46: /* 45nm Nehalem-EX */ |
| 3399 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
| 3400 | sizeof(hw_cache_event_ids)); |
| 3401 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, |
| 3402 | sizeof(hw_cache_extra_regs)); |
| 3403 | |
| 3404 | intel_pmu_lbr_init_nhm(); |
| 3405 | |
| 3406 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
| 3407 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; |
| 3408 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
| 3409 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
| 3410 | |
| 3411 | x86_pmu.cpu_events = nhm_events_attrs; |
| 3412 | |
| 3413 | /* UOPS_ISSUED.STALLED_CYCLES */ |
| 3414 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
| 3415 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
| 3416 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
| 3417 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
| 3418 | X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); |
| 3419 | |
| 3420 | intel_pmu_pebs_data_source_nhm(); |
| 3421 | x86_add_quirk(intel_nehalem_quirk); |
| 3422 | |
| 3423 | pr_cont("Nehalem events, "); |
| 3424 | break; |
| 3425 | |
| 3426 | case 28: /* 45nm Atom "Pineview" */ |
| 3427 | case 38: /* 45nm Atom "Lincroft" */ |
| 3428 | case 39: /* 32nm Atom "Penwell" */ |
| 3429 | case 53: /* 32nm Atom "Cloverview" */ |
| 3430 | case 54: /* 32nm Atom "Cedarview" */ |
| 3431 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
| 3432 | sizeof(hw_cache_event_ids)); |
| 3433 | |
| 3434 | intel_pmu_lbr_init_atom(); |
| 3435 | |
| 3436 | x86_pmu.event_constraints = intel_gen_event_constraints; |
| 3437 | x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; |
| 3438 | x86_pmu.pebs_aliases = intel_pebs_aliases_core2; |
| 3439 | pr_cont("Atom events, "); |
| 3440 | break; |
| 3441 | |
| 3442 | case 55: /* 22nm Atom "Silvermont" */ |
| 3443 | case 76: /* 14nm Atom "Airmont" */ |
| 3444 | case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ |
| 3445 | memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, |
| 3446 | sizeof(hw_cache_event_ids)); |
| 3447 | memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, |
| 3448 | sizeof(hw_cache_extra_regs)); |
| 3449 | |
| 3450 | intel_pmu_lbr_init_atom(); |
| 3451 | |
| 3452 | x86_pmu.event_constraints = intel_slm_event_constraints; |
| 3453 | x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; |
| 3454 | x86_pmu.extra_regs = intel_slm_extra_regs; |
| 3455 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3456 | pr_cont("Silvermont events, "); |
| 3457 | break; |
| 3458 | |
| 3459 | case 37: /* 32nm Westmere */ |
| 3460 | case 44: /* 32nm Westmere-EP */ |
| 3461 | case 47: /* 32nm Westmere-EX */ |
| 3462 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
| 3463 | sizeof(hw_cache_event_ids)); |
| 3464 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, |
| 3465 | sizeof(hw_cache_extra_regs)); |
| 3466 | |
| 3467 | intel_pmu_lbr_init_nhm(); |
| 3468 | |
| 3469 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
| 3470 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
| 3471 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; |
| 3472 | x86_pmu.extra_regs = intel_westmere_extra_regs; |
| 3473 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3474 | |
| 3475 | x86_pmu.cpu_events = nhm_events_attrs; |
| 3476 | |
| 3477 | /* UOPS_ISSUED.STALLED_CYCLES */ |
| 3478 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
| 3479 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
| 3480 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
| 3481 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
| 3482 | X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); |
| 3483 | |
| 3484 | intel_pmu_pebs_data_source_nhm(); |
| 3485 | pr_cont("Westmere events, "); |
| 3486 | break; |
| 3487 | |
| 3488 | case 42: /* 32nm SandyBridge */ |
| 3489 | case 45: /* 32nm SandyBridge-E/EN/EP */ |
| 3490 | x86_add_quirk(intel_sandybridge_quirk); |
| 3491 | x86_add_quirk(intel_ht_bug); |
| 3492 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
| 3493 | sizeof(hw_cache_event_ids)); |
| 3494 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, |
| 3495 | sizeof(hw_cache_extra_regs)); |
| 3496 | |
| 3497 | intel_pmu_lbr_init_snb(); |
| 3498 | |
| 3499 | x86_pmu.event_constraints = intel_snb_event_constraints; |
| 3500 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; |
| 3501 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; |
| 3502 | if (boot_cpu_data.x86_model == 45) |
| 3503 | x86_pmu.extra_regs = intel_snbep_extra_regs; |
| 3504 | else |
| 3505 | x86_pmu.extra_regs = intel_snb_extra_regs; |
| 3506 | |
| 3507 | |
| 3508 | /* all extra regs are per-cpu when HT is on */ |
| 3509 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3510 | x86_pmu.flags |= PMU_FL_NO_HT_SHARING; |
| 3511 | |
| 3512 | x86_pmu.cpu_events = snb_events_attrs; |
| 3513 | |
| 3514 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
| 3515 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
| 3516 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
| 3517 | /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ |
| 3518 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
| 3519 | X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); |
| 3520 | |
| 3521 | pr_cont("SandyBridge events, "); |
| 3522 | break; |
| 3523 | |
| 3524 | case 58: /* 22nm IvyBridge */ |
| 3525 | case 62: /* 22nm IvyBridge-EP/EX */ |
| 3526 | x86_add_quirk(intel_ht_bug); |
| 3527 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
| 3528 | sizeof(hw_cache_event_ids)); |
| 3529 | /* dTLB-load-misses on IVB is different than SNB */ |
| 3530 | hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ |
| 3531 | |
| 3532 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, |
| 3533 | sizeof(hw_cache_extra_regs)); |
| 3534 | |
| 3535 | intel_pmu_lbr_init_snb(); |
| 3536 | |
| 3537 | x86_pmu.event_constraints = intel_ivb_event_constraints; |
| 3538 | x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; |
| 3539 | x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; |
| 3540 | x86_pmu.pebs_prec_dist = true; |
| 3541 | if (boot_cpu_data.x86_model == 62) |
| 3542 | x86_pmu.extra_regs = intel_snbep_extra_regs; |
| 3543 | else |
| 3544 | x86_pmu.extra_regs = intel_snb_extra_regs; |
| 3545 | /* all extra regs are per-cpu when HT is on */ |
| 3546 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3547 | x86_pmu.flags |= PMU_FL_NO_HT_SHARING; |
| 3548 | |
| 3549 | x86_pmu.cpu_events = snb_events_attrs; |
| 3550 | |
| 3551 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
| 3552 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
| 3553 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); |
| 3554 | |
| 3555 | pr_cont("IvyBridge events, "); |
| 3556 | break; |
| 3557 | |
| 3558 | |
| 3559 | case 60: /* 22nm Haswell Core */ |
| 3560 | case 63: /* 22nm Haswell Server */ |
| 3561 | case 69: /* 22nm Haswell ULT */ |
| 3562 | case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ |
| 3563 | x86_add_quirk(intel_ht_bug); |
| 3564 | x86_pmu.late_ack = true; |
| 3565 | memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
| 3566 | memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
| 3567 | |
| 3568 | intel_pmu_lbr_init_hsw(); |
| 3569 | |
| 3570 | x86_pmu.event_constraints = intel_hsw_event_constraints; |
| 3571 | x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; |
| 3572 | x86_pmu.extra_regs = intel_snbep_extra_regs; |
| 3573 | x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; |
| 3574 | x86_pmu.pebs_prec_dist = true; |
| 3575 | /* all extra regs are per-cpu when HT is on */ |
| 3576 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3577 | x86_pmu.flags |= PMU_FL_NO_HT_SHARING; |
| 3578 | |
| 3579 | x86_pmu.hw_config = hsw_hw_config; |
| 3580 | x86_pmu.get_event_constraints = hsw_get_event_constraints; |
| 3581 | x86_pmu.cpu_events = hsw_events_attrs; |
| 3582 | x86_pmu.lbr_double_abort = true; |
| 3583 | pr_cont("Haswell events, "); |
| 3584 | break; |
| 3585 | |
| 3586 | case 61: /* 14nm Broadwell Core-M */ |
| 3587 | case 86: /* 14nm Broadwell Xeon D */ |
| 3588 | case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ |
| 3589 | case 79: /* 14nm Broadwell Server */ |
| 3590 | x86_pmu.late_ack = true; |
| 3591 | memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
| 3592 | memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
| 3593 | |
| 3594 | /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */ |
| 3595 | hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ | |
| 3596 | BDW_L3_MISS|HSW_SNOOP_DRAM; |
| 3597 | hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS| |
| 3598 | HSW_SNOOP_DRAM; |
| 3599 | hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ| |
| 3600 | BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; |
| 3601 | hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE| |
| 3602 | BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM; |
| 3603 | |
| 3604 | intel_pmu_lbr_init_hsw(); |
| 3605 | |
| 3606 | x86_pmu.event_constraints = intel_bdw_event_constraints; |
| 3607 | x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints; |
| 3608 | x86_pmu.extra_regs = intel_snbep_extra_regs; |
| 3609 | x86_pmu.pebs_aliases = intel_pebs_aliases_ivb; |
| 3610 | x86_pmu.pebs_prec_dist = true; |
| 3611 | /* all extra regs are per-cpu when HT is on */ |
| 3612 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3613 | x86_pmu.flags |= PMU_FL_NO_HT_SHARING; |
| 3614 | |
| 3615 | x86_pmu.hw_config = hsw_hw_config; |
| 3616 | x86_pmu.get_event_constraints = hsw_get_event_constraints; |
| 3617 | x86_pmu.cpu_events = hsw_events_attrs; |
| 3618 | x86_pmu.limit_period = bdw_limit_period; |
| 3619 | pr_cont("Broadwell events, "); |
| 3620 | break; |
| 3621 | |
| 3622 | case 87: /* Knights Landing Xeon Phi */ |
| 3623 | memcpy(hw_cache_event_ids, |
| 3624 | slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
| 3625 | memcpy(hw_cache_extra_regs, |
| 3626 | knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
| 3627 | intel_pmu_lbr_init_knl(); |
| 3628 | |
| 3629 | x86_pmu.event_constraints = intel_slm_event_constraints; |
| 3630 | x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; |
| 3631 | x86_pmu.extra_regs = intel_knl_extra_regs; |
| 3632 | |
| 3633 | /* all extra regs are per-cpu when HT is on */ |
| 3634 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3635 | x86_pmu.flags |= PMU_FL_NO_HT_SHARING; |
| 3636 | |
| 3637 | pr_cont("Knights Landing events, "); |
| 3638 | break; |
| 3639 | |
| 3640 | case 78: /* 14nm Skylake Mobile */ |
| 3641 | case 94: /* 14nm Skylake Desktop */ |
| 3642 | x86_pmu.late_ack = true; |
| 3643 | memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
| 3644 | memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
| 3645 | intel_pmu_lbr_init_skl(); |
| 3646 | |
| 3647 | x86_pmu.event_constraints = intel_skl_event_constraints; |
| 3648 | x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; |
| 3649 | x86_pmu.extra_regs = intel_skl_extra_regs; |
| 3650 | x86_pmu.pebs_aliases = intel_pebs_aliases_skl; |
| 3651 | x86_pmu.pebs_prec_dist = true; |
| 3652 | /* all extra regs are per-cpu when HT is on */ |
| 3653 | x86_pmu.flags |= PMU_FL_HAS_RSP_1; |
| 3654 | x86_pmu.flags |= PMU_FL_NO_HT_SHARING; |
| 3655 | |
| 3656 | x86_pmu.hw_config = hsw_hw_config; |
| 3657 | x86_pmu.get_event_constraints = hsw_get_event_constraints; |
| 3658 | x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, |
| 3659 | skl_format_attr); |
| 3660 | WARN_ON(!x86_pmu.format_attrs); |
| 3661 | x86_pmu.cpu_events = hsw_events_attrs; |
| 3662 | pr_cont("Skylake events, "); |
| 3663 | break; |
| 3664 | |
| 3665 | default: |
| 3666 | switch (x86_pmu.version) { |
| 3667 | case 1: |
| 3668 | x86_pmu.event_constraints = intel_v1_event_constraints; |
| 3669 | pr_cont("generic architected perfmon v1, "); |
| 3670 | break; |
| 3671 | default: |
| 3672 | /* |
| 3673 | * default constraints for v2 and up |
| 3674 | */ |
| 3675 | x86_pmu.event_constraints = intel_gen_event_constraints; |
| 3676 | pr_cont("generic architected perfmon, "); |
| 3677 | break; |
| 3678 | } |
| 3679 | } |
| 3680 | |
| 3681 | if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { |
| 3682 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
| 3683 | x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); |
| 3684 | x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; |
| 3685 | } |
| 3686 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
| 3687 | |
| 3688 | if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { |
| 3689 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
| 3690 | x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); |
| 3691 | x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; |
| 3692 | } |
| 3693 | |
| 3694 | x86_pmu.intel_ctrl |= |
| 3695 | ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; |
| 3696 | |
| 3697 | if (x86_pmu.event_constraints) { |
| 3698 | /* |
| 3699 | * event on fixed counter2 (REF_CYCLES) only works on this |
| 3700 | * counter, so do not extend mask to generic counters |
| 3701 | */ |
| 3702 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
| 3703 | if (c->cmask == FIXED_EVENT_FLAGS |
| 3704 | && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) { |
| 3705 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
| 3706 | } |
| 3707 | c->idxmsk64 &= |
| 3708 | ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); |
| 3709 | c->weight = hweight64(c->idxmsk64); |
| 3710 | } |
| 3711 | } |
| 3712 | |
| 3713 | /* |
| 3714 | * Access LBR MSR may cause #GP under certain circumstances. |
| 3715 | * E.g. KVM doesn't support LBR MSR |
| 3716 | * Check all LBT MSR here. |
| 3717 | * Disable LBR access if any LBR MSRs can not be accessed. |
| 3718 | */ |
| 3719 | if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) |
| 3720 | x86_pmu.lbr_nr = 0; |
| 3721 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
| 3722 | if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && |
| 3723 | check_msr(x86_pmu.lbr_to + i, 0xffffUL))) |
| 3724 | x86_pmu.lbr_nr = 0; |
| 3725 | } |
| 3726 | |
| 3727 | /* |
| 3728 | * Access extra MSR may cause #GP under certain circumstances. |
| 3729 | * E.g. KVM doesn't support offcore event |
| 3730 | * Check all extra_regs here. |
| 3731 | */ |
| 3732 | if (x86_pmu.extra_regs) { |
| 3733 | for (er = x86_pmu.extra_regs; er->msr; er++) { |
| 3734 | er->extra_msr_access = check_msr(er->msr, 0x11UL); |
| 3735 | /* Disable LBR select mapping */ |
| 3736 | if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) |
| 3737 | x86_pmu.lbr_sel_map = NULL; |
| 3738 | } |
| 3739 | } |
| 3740 | |
| 3741 | /* Support full width counters using alternative MSR range */ |
| 3742 | if (x86_pmu.intel_cap.full_width_write) { |
| 3743 | x86_pmu.max_period = x86_pmu.cntval_mask; |
| 3744 | x86_pmu.perfctr = MSR_IA32_PMC0; |
| 3745 | pr_cont("full-width counters, "); |
| 3746 | } |
| 3747 | |
| 3748 | return 0; |
| 3749 | } |
| 3750 | |
| 3751 | /* |
| 3752 | * HT bug: phase 2 init |
| 3753 | * Called once we have valid topology information to check |
| 3754 | * whether or not HT is enabled |
| 3755 | * If HT is off, then we disable the workaround |
| 3756 | */ |
| 3757 | static __init int fixup_ht_bug(void) |
| 3758 | { |
| 3759 | int cpu = smp_processor_id(); |
| 3760 | int w, c; |
| 3761 | /* |
| 3762 | * problem not present on this CPU model, nothing to do |
| 3763 | */ |
| 3764 | if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) |
| 3765 | return 0; |
| 3766 | |
| 3767 | w = cpumask_weight(topology_sibling_cpumask(cpu)); |
| 3768 | if (w > 1) { |
| 3769 | pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); |
| 3770 | return 0; |
| 3771 | } |
| 3772 | |
| 3773 | if (lockup_detector_suspend() != 0) { |
| 3774 | pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n"); |
| 3775 | return 0; |
| 3776 | } |
| 3777 | |
| 3778 | x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); |
| 3779 | |
| 3780 | x86_pmu.start_scheduling = NULL; |
| 3781 | x86_pmu.commit_scheduling = NULL; |
| 3782 | x86_pmu.stop_scheduling = NULL; |
| 3783 | |
| 3784 | lockup_detector_resume(); |
| 3785 | |
| 3786 | get_online_cpus(); |
| 3787 | |
| 3788 | for_each_online_cpu(c) { |
| 3789 | free_excl_cntrs(c); |
| 3790 | } |
| 3791 | |
| 3792 | put_online_cpus(); |
| 3793 | pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n"); |
| 3794 | return 0; |
| 3795 | } |
| 3796 | subsys_initcall(fixup_ht_bug) |