| 1 | /* |
| 2 | * Performance event support for the System z CPU-measurement Sampling Facility |
| 3 | * |
| 4 | * Copyright IBM Corp. 2013 |
| 5 | * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> |
| 6 | * |
| 7 | * This program is free software; you can redistribute it and/or modify |
| 8 | * it under the terms of the GNU General Public License (version 2 only) |
| 9 | * as published by the Free Software Foundation. |
| 10 | */ |
| 11 | #define KMSG_COMPONENT "cpum_sf" |
| 12 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
| 13 | |
| 14 | #include <linux/kernel.h> |
| 15 | #include <linux/kernel_stat.h> |
| 16 | #include <linux/perf_event.h> |
| 17 | #include <linux/percpu.h> |
| 18 | #include <linux/notifier.h> |
| 19 | #include <linux/export.h> |
| 20 | #include <linux/slab.h> |
| 21 | #include <linux/mm.h> |
| 22 | #include <linux/moduleparam.h> |
| 23 | #include <asm/cpu_mf.h> |
| 24 | #include <asm/irq.h> |
| 25 | #include <asm/debug.h> |
| 26 | #include <asm/timex.h> |
| 27 | |
| 28 | /* Minimum number of sample-data-block-tables: |
| 29 | * At least one table is required for the sampling buffer structure. |
| 30 | * A single table contains up to 511 pointers to sample-data-blocks. |
| 31 | */ |
| 32 | #define CPUM_SF_MIN_SDBT 1 |
| 33 | |
| 34 | /* Number of sample-data-blocks per sample-data-block-table (SDBT): |
| 35 | * A table contains SDB pointers (8 bytes) and one table-link entry |
| 36 | * that points to the origin of the next SDBT. |
| 37 | */ |
| 38 | #define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) |
| 39 | |
| 40 | /* Maximum page offset for an SDBT table-link entry: |
| 41 | * If this page offset is reached, a table-link entry to the next SDBT |
| 42 | * must be added. |
| 43 | */ |
| 44 | #define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) |
| 45 | static inline int require_table_link(const void *sdbt) |
| 46 | { |
| 47 | return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; |
| 48 | } |
| 49 | |
| 50 | /* Minimum and maximum sampling buffer sizes: |
| 51 | * |
| 52 | * This number represents the maximum size of the sampling buffer taking |
| 53 | * the number of sample-data-block-tables into account. Note that these |
| 54 | * numbers apply to the basic-sampling function only. |
| 55 | * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if |
| 56 | * the diagnostic-sampling function is active. |
| 57 | * |
| 58 | * Sampling buffer size Buffer characteristics |
| 59 | * --------------------------------------------------- |
| 60 | * 64KB == 16 pages (4KB per page) |
| 61 | * 1 page for SDB-tables |
| 62 | * 15 pages for SDBs |
| 63 | * |
| 64 | * 32MB == 8192 pages (4KB per page) |
| 65 | * 16 pages for SDB-tables |
| 66 | * 8176 pages for SDBs |
| 67 | */ |
| 68 | static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; |
| 69 | static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; |
| 70 | static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1; |
| 71 | |
| 72 | struct sf_buffer { |
| 73 | unsigned long *sdbt; /* Sample-data-block-table origin */ |
| 74 | /* buffer characteristics (required for buffer increments) */ |
| 75 | unsigned long num_sdb; /* Number of sample-data-blocks */ |
| 76 | unsigned long num_sdbt; /* Number of sample-data-block-tables */ |
| 77 | unsigned long *tail; /* last sample-data-block-table */ |
| 78 | }; |
| 79 | |
| 80 | struct cpu_hw_sf { |
| 81 | /* CPU-measurement sampling information block */ |
| 82 | struct hws_qsi_info_block qsi; |
| 83 | /* CPU-measurement sampling control block */ |
| 84 | struct hws_lsctl_request_block lsctl; |
| 85 | struct sf_buffer sfb; /* Sampling buffer */ |
| 86 | unsigned int flags; /* Status flags */ |
| 87 | struct perf_event *event; /* Scheduled perf event */ |
| 88 | }; |
| 89 | static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); |
| 90 | |
| 91 | /* Debug feature */ |
| 92 | static debug_info_t *sfdbg; |
| 93 | |
| 94 | /* |
| 95 | * sf_disable() - Switch off sampling facility |
| 96 | */ |
| 97 | static int sf_disable(void) |
| 98 | { |
| 99 | struct hws_lsctl_request_block sreq; |
| 100 | |
| 101 | memset(&sreq, 0, sizeof(sreq)); |
| 102 | return lsctl(&sreq); |
| 103 | } |
| 104 | |
| 105 | /* |
| 106 | * sf_buffer_available() - Check for an allocated sampling buffer |
| 107 | */ |
| 108 | static int sf_buffer_available(struct cpu_hw_sf *cpuhw) |
| 109 | { |
| 110 | return !!cpuhw->sfb.sdbt; |
| 111 | } |
| 112 | |
| 113 | /* |
| 114 | * deallocate sampling facility buffer |
| 115 | */ |
| 116 | static void free_sampling_buffer(struct sf_buffer *sfb) |
| 117 | { |
| 118 | unsigned long *sdbt, *curr; |
| 119 | |
| 120 | if (!sfb->sdbt) |
| 121 | return; |
| 122 | |
| 123 | sdbt = sfb->sdbt; |
| 124 | curr = sdbt; |
| 125 | |
| 126 | /* Free the SDBT after all SDBs are processed... */ |
| 127 | while (1) { |
| 128 | if (!*curr || !sdbt) |
| 129 | break; |
| 130 | |
| 131 | /* Process table-link entries */ |
| 132 | if (is_link_entry(curr)) { |
| 133 | curr = get_next_sdbt(curr); |
| 134 | if (sdbt) |
| 135 | free_page((unsigned long) sdbt); |
| 136 | |
| 137 | /* If the origin is reached, sampling buffer is freed */ |
| 138 | if (curr == sfb->sdbt) |
| 139 | break; |
| 140 | else |
| 141 | sdbt = curr; |
| 142 | } else { |
| 143 | /* Process SDB pointer */ |
| 144 | if (*curr) { |
| 145 | free_page(*curr); |
| 146 | curr++; |
| 147 | } |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | debug_sprintf_event(sfdbg, 5, |
| 152 | "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); |
| 153 | memset(sfb, 0, sizeof(*sfb)); |
| 154 | } |
| 155 | |
| 156 | static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) |
| 157 | { |
| 158 | unsigned long sdb, *trailer; |
| 159 | |
| 160 | /* Allocate and initialize sample-data-block */ |
| 161 | sdb = get_zeroed_page(gfp_flags); |
| 162 | if (!sdb) |
| 163 | return -ENOMEM; |
| 164 | trailer = trailer_entry_ptr(sdb); |
| 165 | *trailer = SDB_TE_ALERT_REQ_MASK; |
| 166 | |
| 167 | /* Link SDB into the sample-data-block-table */ |
| 168 | *sdbt = sdb; |
| 169 | |
| 170 | return 0; |
| 171 | } |
| 172 | |
| 173 | /* |
| 174 | * realloc_sampling_buffer() - extend sampler memory |
| 175 | * |
| 176 | * Allocates new sample-data-blocks and adds them to the specified sampling |
| 177 | * buffer memory. |
| 178 | * |
| 179 | * Important: This modifies the sampling buffer and must be called when the |
| 180 | * sampling facility is disabled. |
| 181 | * |
| 182 | * Returns zero on success, non-zero otherwise. |
| 183 | */ |
| 184 | static int realloc_sampling_buffer(struct sf_buffer *sfb, |
| 185 | unsigned long num_sdb, gfp_t gfp_flags) |
| 186 | { |
| 187 | int i, rc; |
| 188 | unsigned long *new, *tail; |
| 189 | |
| 190 | if (!sfb->sdbt || !sfb->tail) |
| 191 | return -EINVAL; |
| 192 | |
| 193 | if (!is_link_entry(sfb->tail)) |
| 194 | return -EINVAL; |
| 195 | |
| 196 | /* Append to the existing sampling buffer, overwriting the table-link |
| 197 | * register. |
| 198 | * The tail variables always points to the "tail" (last and table-link) |
| 199 | * entry in an SDB-table. |
| 200 | */ |
| 201 | tail = sfb->tail; |
| 202 | |
| 203 | /* Do a sanity check whether the table-link entry points to |
| 204 | * the sampling buffer origin. |
| 205 | */ |
| 206 | if (sfb->sdbt != get_next_sdbt(tail)) { |
| 207 | debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " |
| 208 | "sampling buffer is not linked: origin=%p" |
| 209 | "tail=%p\n", |
| 210 | (void *) sfb->sdbt, (void *) tail); |
| 211 | return -EINVAL; |
| 212 | } |
| 213 | |
| 214 | /* Allocate remaining SDBs */ |
| 215 | rc = 0; |
| 216 | for (i = 0; i < num_sdb; i++) { |
| 217 | /* Allocate a new SDB-table if it is full. */ |
| 218 | if (require_table_link(tail)) { |
| 219 | new = (unsigned long *) get_zeroed_page(gfp_flags); |
| 220 | if (!new) { |
| 221 | rc = -ENOMEM; |
| 222 | break; |
| 223 | } |
| 224 | sfb->num_sdbt++; |
| 225 | /* Link current page to tail of chain */ |
| 226 | *tail = (unsigned long)(void *) new + 1; |
| 227 | tail = new; |
| 228 | } |
| 229 | |
| 230 | /* Allocate a new sample-data-block. |
| 231 | * If there is not enough memory, stop the realloc process |
| 232 | * and simply use what was allocated. If this is a temporary |
| 233 | * issue, a new realloc call (if required) might succeed. |
| 234 | */ |
| 235 | rc = alloc_sample_data_block(tail, gfp_flags); |
| 236 | if (rc) |
| 237 | break; |
| 238 | sfb->num_sdb++; |
| 239 | tail++; |
| 240 | } |
| 241 | |
| 242 | /* Link sampling buffer to its origin */ |
| 243 | *tail = (unsigned long) sfb->sdbt + 1; |
| 244 | sfb->tail = tail; |
| 245 | |
| 246 | debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" |
| 247 | " settings: sdbt=%lu sdb=%lu\n", |
| 248 | sfb->num_sdbt, sfb->num_sdb); |
| 249 | return rc; |
| 250 | } |
| 251 | |
| 252 | /* |
| 253 | * allocate_sampling_buffer() - allocate sampler memory |
| 254 | * |
| 255 | * Allocates and initializes a sampling buffer structure using the |
| 256 | * specified number of sample-data-blocks (SDB). For each allocation, |
| 257 | * a 4K page is used. The number of sample-data-block-tables (SDBT) |
| 258 | * are calculated from SDBs. |
| 259 | * Also set the ALERT_REQ mask in each SDBs trailer. |
| 260 | * |
| 261 | * Returns zero on success, non-zero otherwise. |
| 262 | */ |
| 263 | static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) |
| 264 | { |
| 265 | int rc; |
| 266 | |
| 267 | if (sfb->sdbt) |
| 268 | return -EINVAL; |
| 269 | |
| 270 | /* Allocate the sample-data-block-table origin */ |
| 271 | sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); |
| 272 | if (!sfb->sdbt) |
| 273 | return -ENOMEM; |
| 274 | sfb->num_sdb = 0; |
| 275 | sfb->num_sdbt = 1; |
| 276 | |
| 277 | /* Link the table origin to point to itself to prepare for |
| 278 | * realloc_sampling_buffer() invocation. |
| 279 | */ |
| 280 | sfb->tail = sfb->sdbt; |
| 281 | *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; |
| 282 | |
| 283 | /* Allocate requested number of sample-data-blocks */ |
| 284 | rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); |
| 285 | if (rc) { |
| 286 | free_sampling_buffer(sfb); |
| 287 | debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " |
| 288 | "realloc_sampling_buffer failed with rc=%i\n", rc); |
| 289 | } else |
| 290 | debug_sprintf_event(sfdbg, 4, |
| 291 | "alloc_sampling_buffer: tear=%p dear=%p\n", |
| 292 | sfb->sdbt, (void *) *sfb->sdbt); |
| 293 | return rc; |
| 294 | } |
| 295 | |
| 296 | static void sfb_set_limits(unsigned long min, unsigned long max) |
| 297 | { |
| 298 | struct hws_qsi_info_block si; |
| 299 | |
| 300 | CPUM_SF_MIN_SDB = min; |
| 301 | CPUM_SF_MAX_SDB = max; |
| 302 | |
| 303 | memset(&si, 0, sizeof(si)); |
| 304 | if (!qsi(&si)) |
| 305 | CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes); |
| 306 | } |
| 307 | |
| 308 | static unsigned long sfb_max_limit(struct hw_perf_event *hwc) |
| 309 | { |
| 310 | return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR |
| 311 | : CPUM_SF_MAX_SDB; |
| 312 | } |
| 313 | |
| 314 | static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, |
| 315 | struct hw_perf_event *hwc) |
| 316 | { |
| 317 | if (!sfb->sdbt) |
| 318 | return SFB_ALLOC_REG(hwc); |
| 319 | if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) |
| 320 | return SFB_ALLOC_REG(hwc) - sfb->num_sdb; |
| 321 | return 0; |
| 322 | } |
| 323 | |
| 324 | static int sfb_has_pending_allocs(struct sf_buffer *sfb, |
| 325 | struct hw_perf_event *hwc) |
| 326 | { |
| 327 | return sfb_pending_allocs(sfb, hwc) > 0; |
| 328 | } |
| 329 | |
| 330 | static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) |
| 331 | { |
| 332 | /* Limit the number of SDBs to not exceed the maximum */ |
| 333 | num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc)); |
| 334 | if (num) |
| 335 | SFB_ALLOC_REG(hwc) += num; |
| 336 | } |
| 337 | |
| 338 | static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) |
| 339 | { |
| 340 | SFB_ALLOC_REG(hwc) = 0; |
| 341 | sfb_account_allocs(num, hwc); |
| 342 | } |
| 343 | |
| 344 | static size_t event_sample_size(struct hw_perf_event *hwc) |
| 345 | { |
| 346 | struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); |
| 347 | size_t sample_size; |
| 348 | |
| 349 | /* The sample size depends on the sampling function: The basic-sampling |
| 350 | * function must be always enabled, diagnostic-sampling function is |
| 351 | * optional. |
| 352 | */ |
| 353 | sample_size = sfr->bsdes; |
| 354 | if (SAMPL_DIAG_MODE(hwc)) |
| 355 | sample_size += sfr->dsdes; |
| 356 | |
| 357 | return sample_size; |
| 358 | } |
| 359 | |
| 360 | static void deallocate_buffers(struct cpu_hw_sf *cpuhw) |
| 361 | { |
| 362 | if (cpuhw->sfb.sdbt) |
| 363 | free_sampling_buffer(&cpuhw->sfb); |
| 364 | } |
| 365 | |
| 366 | static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) |
| 367 | { |
| 368 | unsigned long n_sdb, freq, factor; |
| 369 | size_t sfr_size, sample_size; |
| 370 | struct sf_raw_sample *sfr; |
| 371 | |
| 372 | /* Allocate raw sample buffer |
| 373 | * |
| 374 | * The raw sample buffer is used to temporarily store sampling data |
| 375 | * entries for perf raw sample processing. The buffer size mainly |
| 376 | * depends on the size of diagnostic-sampling data entries which is |
| 377 | * machine-specific. The exact size calculation includes: |
| 378 | * 1. The first 4 bytes of diagnostic-sampling data entries are |
| 379 | * already reflected in the sf_raw_sample structure. Subtract |
| 380 | * these bytes. |
| 381 | * 2. The perf raw sample data must be 8-byte aligned (u64) and |
| 382 | * perf's internal data size must be considered too. So add |
| 383 | * an additional u32 for correct alignment and subtract before |
| 384 | * allocating the buffer. |
| 385 | * 3. Store the raw sample buffer pointer in the perf event |
| 386 | * hardware structure. |
| 387 | */ |
| 388 | sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) + |
| 389 | sizeof(u32), sizeof(u64)); |
| 390 | sfr_size -= sizeof(u32); |
| 391 | sfr = kzalloc(sfr_size, GFP_KERNEL); |
| 392 | if (!sfr) |
| 393 | return -ENOMEM; |
| 394 | sfr->size = sfr_size; |
| 395 | sfr->bsdes = cpuhw->qsi.bsdes; |
| 396 | sfr->dsdes = cpuhw->qsi.dsdes; |
| 397 | RAWSAMPLE_REG(hwc) = (unsigned long) sfr; |
| 398 | |
| 399 | /* Calculate sampling buffers using 4K pages |
| 400 | * |
| 401 | * 1. Determine the sample data size which depends on the used |
| 402 | * sampling functions, for example, basic-sampling or |
| 403 | * basic-sampling with diagnostic-sampling. |
| 404 | * |
| 405 | * 2. Use the sampling frequency as input. The sampling buffer is |
| 406 | * designed for almost one second. This can be adjusted through |
| 407 | * the "factor" variable. |
| 408 | * In any case, alloc_sampling_buffer() sets the Alert Request |
| 409 | * Control indicator to trigger a measurement-alert to harvest |
| 410 | * sample-data-blocks (sdb). |
| 411 | * |
| 412 | * 3. Compute the number of sample-data-blocks and ensure a minimum |
| 413 | * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not |
| 414 | * exceed a "calculated" maximum. The symbolic maximum is |
| 415 | * designed for basic-sampling only and needs to be increased if |
| 416 | * diagnostic-sampling is active. |
| 417 | * See also the remarks for these symbolic constants. |
| 418 | * |
| 419 | * 4. Compute the number of sample-data-block-tables (SDBT) and |
| 420 | * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up |
| 421 | * to 511 SDBs). |
| 422 | */ |
| 423 | sample_size = event_sample_size(hwc); |
| 424 | freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); |
| 425 | factor = 1; |
| 426 | n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); |
| 427 | if (n_sdb < CPUM_SF_MIN_SDB) |
| 428 | n_sdb = CPUM_SF_MIN_SDB; |
| 429 | |
| 430 | /* If there is already a sampling buffer allocated, it is very likely |
| 431 | * that the sampling facility is enabled too. If the event to be |
| 432 | * initialized requires a greater sampling buffer, the allocation must |
| 433 | * be postponed. Changing the sampling buffer requires the sampling |
| 434 | * facility to be in the disabled state. So, account the number of |
| 435 | * required SDBs and let cpumsf_pmu_enable() resize the buffer just |
| 436 | * before the event is started. |
| 437 | */ |
| 438 | sfb_init_allocs(n_sdb, hwc); |
| 439 | if (sf_buffer_available(cpuhw)) |
| 440 | return 0; |
| 441 | |
| 442 | debug_sprintf_event(sfdbg, 3, |
| 443 | "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" |
| 444 | " sample_size=%lu cpuhw=%p\n", |
| 445 | SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), |
| 446 | sample_size, cpuhw); |
| 447 | |
| 448 | return alloc_sampling_buffer(&cpuhw->sfb, |
| 449 | sfb_pending_allocs(&cpuhw->sfb, hwc)); |
| 450 | } |
| 451 | |
| 452 | static unsigned long min_percent(unsigned int percent, unsigned long base, |
| 453 | unsigned long min) |
| 454 | { |
| 455 | return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); |
| 456 | } |
| 457 | |
| 458 | static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) |
| 459 | { |
| 460 | /* Use a percentage-based approach to extend the sampling facility |
| 461 | * buffer. Accept up to 5% sample data loss. |
| 462 | * Vary the extents between 1% to 5% of the current number of |
| 463 | * sample-data-blocks. |
| 464 | */ |
| 465 | if (ratio <= 5) |
| 466 | return 0; |
| 467 | if (ratio <= 25) |
| 468 | return min_percent(1, base, 1); |
| 469 | if (ratio <= 50) |
| 470 | return min_percent(1, base, 1); |
| 471 | if (ratio <= 75) |
| 472 | return min_percent(2, base, 2); |
| 473 | if (ratio <= 100) |
| 474 | return min_percent(3, base, 3); |
| 475 | if (ratio <= 250) |
| 476 | return min_percent(4, base, 4); |
| 477 | |
| 478 | return min_percent(5, base, 8); |
| 479 | } |
| 480 | |
| 481 | static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, |
| 482 | struct hw_perf_event *hwc) |
| 483 | { |
| 484 | unsigned long ratio, num; |
| 485 | |
| 486 | if (!OVERFLOW_REG(hwc)) |
| 487 | return; |
| 488 | |
| 489 | /* The sample_overflow contains the average number of sample data |
| 490 | * that has been lost because sample-data-blocks were full. |
| 491 | * |
| 492 | * Calculate the total number of sample data entries that has been |
| 493 | * discarded. Then calculate the ratio of lost samples to total samples |
| 494 | * per second in percent. |
| 495 | */ |
| 496 | ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, |
| 497 | sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); |
| 498 | |
| 499 | /* Compute number of sample-data-blocks */ |
| 500 | num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); |
| 501 | if (num) |
| 502 | sfb_account_allocs(num, hwc); |
| 503 | |
| 504 | debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" |
| 505 | " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); |
| 506 | OVERFLOW_REG(hwc) = 0; |
| 507 | } |
| 508 | |
| 509 | /* extend_sampling_buffer() - Extend sampling buffer |
| 510 | * @sfb: Sampling buffer structure (for local CPU) |
| 511 | * @hwc: Perf event hardware structure |
| 512 | * |
| 513 | * Use this function to extend the sampling buffer based on the overflow counter |
| 514 | * and postponed allocation extents stored in the specified Perf event hardware. |
| 515 | * |
| 516 | * Important: This function disables the sampling facility in order to safely |
| 517 | * change the sampling buffer structure. Do not call this function |
| 518 | * when the PMU is active. |
| 519 | */ |
| 520 | static void extend_sampling_buffer(struct sf_buffer *sfb, |
| 521 | struct hw_perf_event *hwc) |
| 522 | { |
| 523 | unsigned long num, num_old; |
| 524 | int rc; |
| 525 | |
| 526 | num = sfb_pending_allocs(sfb, hwc); |
| 527 | if (!num) |
| 528 | return; |
| 529 | num_old = sfb->num_sdb; |
| 530 | |
| 531 | /* Disable the sampling facility to reset any states and also |
| 532 | * clear pending measurement alerts. |
| 533 | */ |
| 534 | sf_disable(); |
| 535 | |
| 536 | /* Extend the sampling buffer. |
| 537 | * This memory allocation typically happens in an atomic context when |
| 538 | * called by perf. Because this is a reallocation, it is fine if the |
| 539 | * new SDB-request cannot be satisfied immediately. |
| 540 | */ |
| 541 | rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); |
| 542 | if (rc) |
| 543 | debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " |
| 544 | "failed with rc=%i\n", rc); |
| 545 | |
| 546 | if (sfb_has_pending_allocs(sfb, hwc)) |
| 547 | debug_sprintf_event(sfdbg, 5, "sfb: extend: " |
| 548 | "req=%lu alloc=%lu remaining=%lu\n", |
| 549 | num, sfb->num_sdb - num_old, |
| 550 | sfb_pending_allocs(sfb, hwc)); |
| 551 | } |
| 552 | |
| 553 | |
| 554 | /* Number of perf events counting hardware events */ |
| 555 | static atomic_t num_events; |
| 556 | /* Used to avoid races in calling reserve/release_cpumf_hardware */ |
| 557 | static DEFINE_MUTEX(pmc_reserve_mutex); |
| 558 | |
| 559 | #define PMC_INIT 0 |
| 560 | #define PMC_RELEASE 1 |
| 561 | #define PMC_FAILURE 2 |
| 562 | static void setup_pmc_cpu(void *flags) |
| 563 | { |
| 564 | int err; |
| 565 | struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf); |
| 566 | |
| 567 | err = 0; |
| 568 | switch (*((int *) flags)) { |
| 569 | case PMC_INIT: |
| 570 | memset(cpusf, 0, sizeof(*cpusf)); |
| 571 | err = qsi(&cpusf->qsi); |
| 572 | if (err) |
| 573 | break; |
| 574 | cpusf->flags |= PMU_F_RESERVED; |
| 575 | err = sf_disable(); |
| 576 | if (err) |
| 577 | pr_err("Switching off the sampling facility failed " |
| 578 | "with rc=%i\n", err); |
| 579 | debug_sprintf_event(sfdbg, 5, |
| 580 | "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); |
| 581 | break; |
| 582 | case PMC_RELEASE: |
| 583 | cpusf->flags &= ~PMU_F_RESERVED; |
| 584 | err = sf_disable(); |
| 585 | if (err) { |
| 586 | pr_err("Switching off the sampling facility failed " |
| 587 | "with rc=%i\n", err); |
| 588 | } else |
| 589 | deallocate_buffers(cpusf); |
| 590 | debug_sprintf_event(sfdbg, 5, |
| 591 | "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); |
| 592 | break; |
| 593 | } |
| 594 | if (err) |
| 595 | *((int *) flags) |= PMC_FAILURE; |
| 596 | } |
| 597 | |
| 598 | static void release_pmc_hardware(void) |
| 599 | { |
| 600 | int flags = PMC_RELEASE; |
| 601 | |
| 602 | irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); |
| 603 | on_each_cpu(setup_pmc_cpu, &flags, 1); |
| 604 | perf_release_sampling(); |
| 605 | } |
| 606 | |
| 607 | static int reserve_pmc_hardware(void) |
| 608 | { |
| 609 | int flags = PMC_INIT; |
| 610 | int err; |
| 611 | |
| 612 | err = perf_reserve_sampling(); |
| 613 | if (err) |
| 614 | return err; |
| 615 | on_each_cpu(setup_pmc_cpu, &flags, 1); |
| 616 | if (flags & PMC_FAILURE) { |
| 617 | release_pmc_hardware(); |
| 618 | return -ENODEV; |
| 619 | } |
| 620 | irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); |
| 621 | |
| 622 | return 0; |
| 623 | } |
| 624 | |
| 625 | static void hw_perf_event_destroy(struct perf_event *event) |
| 626 | { |
| 627 | /* Free raw sample buffer */ |
| 628 | if (RAWSAMPLE_REG(&event->hw)) |
| 629 | kfree((void *) RAWSAMPLE_REG(&event->hw)); |
| 630 | |
| 631 | /* Release PMC if this is the last perf event */ |
| 632 | if (!atomic_add_unless(&num_events, -1, 1)) { |
| 633 | mutex_lock(&pmc_reserve_mutex); |
| 634 | if (atomic_dec_return(&num_events) == 0) |
| 635 | release_pmc_hardware(); |
| 636 | mutex_unlock(&pmc_reserve_mutex); |
| 637 | } |
| 638 | } |
| 639 | |
| 640 | static void hw_init_period(struct hw_perf_event *hwc, u64 period) |
| 641 | { |
| 642 | hwc->sample_period = period; |
| 643 | hwc->last_period = hwc->sample_period; |
| 644 | local64_set(&hwc->period_left, hwc->sample_period); |
| 645 | } |
| 646 | |
| 647 | static void hw_reset_registers(struct hw_perf_event *hwc, |
| 648 | unsigned long *sdbt_origin) |
| 649 | { |
| 650 | struct sf_raw_sample *sfr; |
| 651 | |
| 652 | /* (Re)set to first sample-data-block-table */ |
| 653 | TEAR_REG(hwc) = (unsigned long) sdbt_origin; |
| 654 | |
| 655 | /* (Re)set raw sampling buffer register */ |
| 656 | sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); |
| 657 | memset(&sfr->basic, 0, sizeof(sfr->basic)); |
| 658 | memset(&sfr->diag, 0, sfr->dsdes); |
| 659 | } |
| 660 | |
| 661 | static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, |
| 662 | unsigned long rate) |
| 663 | { |
| 664 | return clamp_t(unsigned long, rate, |
| 665 | si->min_sampl_rate, si->max_sampl_rate); |
| 666 | } |
| 667 | |
| 668 | static int __hw_perf_event_init(struct perf_event *event) |
| 669 | { |
| 670 | struct cpu_hw_sf *cpuhw; |
| 671 | struct hws_qsi_info_block si; |
| 672 | struct perf_event_attr *attr = &event->attr; |
| 673 | struct hw_perf_event *hwc = &event->hw; |
| 674 | unsigned long rate; |
| 675 | int cpu, err; |
| 676 | |
| 677 | /* Reserve CPU-measurement sampling facility */ |
| 678 | err = 0; |
| 679 | if (!atomic_inc_not_zero(&num_events)) { |
| 680 | mutex_lock(&pmc_reserve_mutex); |
| 681 | if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) |
| 682 | err = -EBUSY; |
| 683 | else |
| 684 | atomic_inc(&num_events); |
| 685 | mutex_unlock(&pmc_reserve_mutex); |
| 686 | } |
| 687 | event->destroy = hw_perf_event_destroy; |
| 688 | |
| 689 | if (err) |
| 690 | goto out; |
| 691 | |
| 692 | /* Access per-CPU sampling information (query sampling info) */ |
| 693 | /* |
| 694 | * The event->cpu value can be -1 to count on every CPU, for example, |
| 695 | * when attaching to a task. If this is specified, use the query |
| 696 | * sampling info from the current CPU, otherwise use event->cpu to |
| 697 | * retrieve the per-CPU information. |
| 698 | * Later, cpuhw indicates whether to allocate sampling buffers for a |
| 699 | * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL). |
| 700 | */ |
| 701 | memset(&si, 0, sizeof(si)); |
| 702 | cpuhw = NULL; |
| 703 | if (event->cpu == -1) |
| 704 | qsi(&si); |
| 705 | else { |
| 706 | /* Event is pinned to a particular CPU, retrieve the per-CPU |
| 707 | * sampling structure for accessing the CPU-specific QSI. |
| 708 | */ |
| 709 | cpuhw = &per_cpu(cpu_hw_sf, event->cpu); |
| 710 | si = cpuhw->qsi; |
| 711 | } |
| 712 | |
| 713 | /* Check sampling facility authorization and, if not authorized, |
| 714 | * fall back to other PMUs. It is safe to check any CPU because |
| 715 | * the authorization is identical for all configured CPUs. |
| 716 | */ |
| 717 | if (!si.as) { |
| 718 | err = -ENOENT; |
| 719 | goto out; |
| 720 | } |
| 721 | |
| 722 | /* Always enable basic sampling */ |
| 723 | SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; |
| 724 | |
| 725 | /* Check if diagnostic sampling is requested. Deny if the required |
| 726 | * sampling authorization is missing. |
| 727 | */ |
| 728 | if (attr->config == PERF_EVENT_CPUM_SF_DIAG) { |
| 729 | if (!si.ad) { |
| 730 | err = -EPERM; |
| 731 | goto out; |
| 732 | } |
| 733 | SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; |
| 734 | } |
| 735 | |
| 736 | /* Check and set other sampling flags */ |
| 737 | if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) |
| 738 | SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; |
| 739 | |
| 740 | /* The sampling information (si) contains information about the |
| 741 | * min/max sampling intervals and the CPU speed. So calculate the |
| 742 | * correct sampling interval and avoid the whole period adjust |
| 743 | * feedback loop. |
| 744 | */ |
| 745 | rate = 0; |
| 746 | if (attr->freq) { |
| 747 | rate = freq_to_sample_rate(&si, attr->sample_freq); |
| 748 | rate = hw_limit_rate(&si, rate); |
| 749 | attr->freq = 0; |
| 750 | attr->sample_period = rate; |
| 751 | } else { |
| 752 | /* The min/max sampling rates specifies the valid range |
| 753 | * of sample periods. If the specified sample period is |
| 754 | * out of range, limit the period to the range boundary. |
| 755 | */ |
| 756 | rate = hw_limit_rate(&si, hwc->sample_period); |
| 757 | |
| 758 | /* The perf core maintains a maximum sample rate that is |
| 759 | * configurable through the sysctl interface. Ensure the |
| 760 | * sampling rate does not exceed this value. This also helps |
| 761 | * to avoid throttling when pushing samples with |
| 762 | * perf_event_overflow(). |
| 763 | */ |
| 764 | if (sample_rate_to_freq(&si, rate) > |
| 765 | sysctl_perf_event_sample_rate) { |
| 766 | err = -EINVAL; |
| 767 | debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); |
| 768 | goto out; |
| 769 | } |
| 770 | } |
| 771 | SAMPL_RATE(hwc) = rate; |
| 772 | hw_init_period(hwc, SAMPL_RATE(hwc)); |
| 773 | |
| 774 | /* Initialize sample data overflow accounting */ |
| 775 | hwc->extra_reg.reg = REG_OVERFLOW; |
| 776 | OVERFLOW_REG(hwc) = 0; |
| 777 | |
| 778 | /* Allocate the per-CPU sampling buffer using the CPU information |
| 779 | * from the event. If the event is not pinned to a particular |
| 780 | * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling |
| 781 | * buffers for each online CPU. |
| 782 | */ |
| 783 | if (cpuhw) |
| 784 | /* Event is pinned to a particular CPU */ |
| 785 | err = allocate_buffers(cpuhw, hwc); |
| 786 | else { |
| 787 | /* Event is not pinned, allocate sampling buffer on |
| 788 | * each online CPU |
| 789 | */ |
| 790 | for_each_online_cpu(cpu) { |
| 791 | cpuhw = &per_cpu(cpu_hw_sf, cpu); |
| 792 | err = allocate_buffers(cpuhw, hwc); |
| 793 | if (err) |
| 794 | break; |
| 795 | } |
| 796 | } |
| 797 | out: |
| 798 | return err; |
| 799 | } |
| 800 | |
| 801 | static int cpumsf_pmu_event_init(struct perf_event *event) |
| 802 | { |
| 803 | int err; |
| 804 | |
| 805 | /* No support for taken branch sampling */ |
| 806 | if (has_branch_stack(event)) |
| 807 | return -EOPNOTSUPP; |
| 808 | |
| 809 | switch (event->attr.type) { |
| 810 | case PERF_TYPE_RAW: |
| 811 | if ((event->attr.config != PERF_EVENT_CPUM_SF) && |
| 812 | (event->attr.config != PERF_EVENT_CPUM_SF_DIAG)) |
| 813 | return -ENOENT; |
| 814 | break; |
| 815 | case PERF_TYPE_HARDWARE: |
| 816 | /* Support sampling of CPU cycles in addition to the |
| 817 | * counter facility. However, the counter facility |
| 818 | * is more precise and, hence, restrict this PMU to |
| 819 | * sampling events only. |
| 820 | */ |
| 821 | if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES) |
| 822 | return -ENOENT; |
| 823 | if (!is_sampling_event(event)) |
| 824 | return -ENOENT; |
| 825 | break; |
| 826 | default: |
| 827 | return -ENOENT; |
| 828 | } |
| 829 | |
| 830 | /* Check online status of the CPU to which the event is pinned */ |
| 831 | if (event->cpu >= nr_cpumask_bits || |
| 832 | (event->cpu >= 0 && !cpu_online(event->cpu))) |
| 833 | return -ENODEV; |
| 834 | |
| 835 | /* Force reset of idle/hv excludes regardless of what the |
| 836 | * user requested. |
| 837 | */ |
| 838 | if (event->attr.exclude_hv) |
| 839 | event->attr.exclude_hv = 0; |
| 840 | if (event->attr.exclude_idle) |
| 841 | event->attr.exclude_idle = 0; |
| 842 | |
| 843 | err = __hw_perf_event_init(event); |
| 844 | if (unlikely(err)) |
| 845 | if (event->destroy) |
| 846 | event->destroy(event); |
| 847 | return err; |
| 848 | } |
| 849 | |
| 850 | static void cpumsf_pmu_enable(struct pmu *pmu) |
| 851 | { |
| 852 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
| 853 | struct hw_perf_event *hwc; |
| 854 | int err; |
| 855 | |
| 856 | if (cpuhw->flags & PMU_F_ENABLED) |
| 857 | return; |
| 858 | |
| 859 | if (cpuhw->flags & PMU_F_ERR_MASK) |
| 860 | return; |
| 861 | |
| 862 | /* Check whether to extent the sampling buffer. |
| 863 | * |
| 864 | * Two conditions trigger an increase of the sampling buffer for a |
| 865 | * perf event: |
| 866 | * 1. Postponed buffer allocations from the event initialization. |
| 867 | * 2. Sampling overflows that contribute to pending allocations. |
| 868 | * |
| 869 | * Note that the extend_sampling_buffer() function disables the sampling |
| 870 | * facility, but it can be fully re-enabled using sampling controls that |
| 871 | * have been saved in cpumsf_pmu_disable(). |
| 872 | */ |
| 873 | if (cpuhw->event) { |
| 874 | hwc = &cpuhw->event->hw; |
| 875 | /* Account number of overflow-designated buffer extents */ |
| 876 | sfb_account_overflows(cpuhw, hwc); |
| 877 | if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) |
| 878 | extend_sampling_buffer(&cpuhw->sfb, hwc); |
| 879 | } |
| 880 | |
| 881 | /* (Re)enable the PMU and sampling facility */ |
| 882 | cpuhw->flags |= PMU_F_ENABLED; |
| 883 | barrier(); |
| 884 | |
| 885 | err = lsctl(&cpuhw->lsctl); |
| 886 | if (err) { |
| 887 | cpuhw->flags &= ~PMU_F_ENABLED; |
| 888 | pr_err("Loading sampling controls failed: op=%i err=%i\n", |
| 889 | 1, err); |
| 890 | return; |
| 891 | } |
| 892 | |
| 893 | debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " |
| 894 | "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, |
| 895 | cpuhw->lsctl.ed, cpuhw->lsctl.cd, |
| 896 | (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); |
| 897 | } |
| 898 | |
| 899 | static void cpumsf_pmu_disable(struct pmu *pmu) |
| 900 | { |
| 901 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
| 902 | struct hws_lsctl_request_block inactive; |
| 903 | struct hws_qsi_info_block si; |
| 904 | int err; |
| 905 | |
| 906 | if (!(cpuhw->flags & PMU_F_ENABLED)) |
| 907 | return; |
| 908 | |
| 909 | if (cpuhw->flags & PMU_F_ERR_MASK) |
| 910 | return; |
| 911 | |
| 912 | /* Switch off sampling activation control */ |
| 913 | inactive = cpuhw->lsctl; |
| 914 | inactive.cs = 0; |
| 915 | inactive.cd = 0; |
| 916 | |
| 917 | err = lsctl(&inactive); |
| 918 | if (err) { |
| 919 | pr_err("Loading sampling controls failed: op=%i err=%i\n", |
| 920 | 2, err); |
| 921 | return; |
| 922 | } |
| 923 | |
| 924 | /* Save state of TEAR and DEAR register contents */ |
| 925 | if (!qsi(&si)) { |
| 926 | /* TEAR/DEAR values are valid only if the sampling facility is |
| 927 | * enabled. Note that cpumsf_pmu_disable() might be called even |
| 928 | * for a disabled sampling facility because cpumsf_pmu_enable() |
| 929 | * controls the enable/disable state. |
| 930 | */ |
| 931 | if (si.es) { |
| 932 | cpuhw->lsctl.tear = si.tear; |
| 933 | cpuhw->lsctl.dear = si.dear; |
| 934 | } |
| 935 | } else |
| 936 | debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " |
| 937 | "qsi() failed with err=%i\n", err); |
| 938 | |
| 939 | cpuhw->flags &= ~PMU_F_ENABLED; |
| 940 | } |
| 941 | |
| 942 | /* perf_exclude_event() - Filter event |
| 943 | * @event: The perf event |
| 944 | * @regs: pt_regs structure |
| 945 | * @sde_regs: Sample-data-entry (sde) regs structure |
| 946 | * |
| 947 | * Filter perf events according to their exclude specification. |
| 948 | * |
| 949 | * Return non-zero if the event shall be excluded. |
| 950 | */ |
| 951 | static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, |
| 952 | struct perf_sf_sde_regs *sde_regs) |
| 953 | { |
| 954 | if (event->attr.exclude_user && user_mode(regs)) |
| 955 | return 1; |
| 956 | if (event->attr.exclude_kernel && !user_mode(regs)) |
| 957 | return 1; |
| 958 | if (event->attr.exclude_guest && sde_regs->in_guest) |
| 959 | return 1; |
| 960 | if (event->attr.exclude_host && !sde_regs->in_guest) |
| 961 | return 1; |
| 962 | return 0; |
| 963 | } |
| 964 | |
| 965 | /* perf_push_sample() - Push samples to perf |
| 966 | * @event: The perf event |
| 967 | * @sample: Hardware sample data |
| 968 | * |
| 969 | * Use the hardware sample data to create perf event sample. The sample |
| 970 | * is the pushed to the event subsystem and the function checks for |
| 971 | * possible event overflows. If an event overflow occurs, the PMU is |
| 972 | * stopped. |
| 973 | * |
| 974 | * Return non-zero if an event overflow occurred. |
| 975 | */ |
| 976 | static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) |
| 977 | { |
| 978 | int overflow; |
| 979 | struct pt_regs regs; |
| 980 | struct perf_sf_sde_regs *sde_regs; |
| 981 | struct perf_sample_data data; |
| 982 | struct perf_raw_record raw; |
| 983 | |
| 984 | /* Setup perf sample */ |
| 985 | perf_sample_data_init(&data, 0, event->hw.last_period); |
| 986 | raw.size = sfr->size; |
| 987 | raw.data = sfr; |
| 988 | data.raw = &raw; |
| 989 | |
| 990 | /* Setup pt_regs to look like an CPU-measurement external interrupt |
| 991 | * using the Program Request Alert code. The regs.int_parm_long |
| 992 | * field which is unused contains additional sample-data-entry related |
| 993 | * indicators. |
| 994 | */ |
| 995 | memset(®s, 0, sizeof(regs)); |
| 996 | regs.int_code = 0x1407; |
| 997 | regs.int_parm = CPU_MF_INT_SF_PRA; |
| 998 | sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; |
| 999 | |
| 1000 | regs.psw.addr = sfr->basic.ia; |
| 1001 | if (sfr->basic.T) |
| 1002 | regs.psw.mask |= PSW_MASK_DAT; |
| 1003 | if (sfr->basic.W) |
| 1004 | regs.psw.mask |= PSW_MASK_WAIT; |
| 1005 | if (sfr->basic.P) |
| 1006 | regs.psw.mask |= PSW_MASK_PSTATE; |
| 1007 | switch (sfr->basic.AS) { |
| 1008 | case 0x0: |
| 1009 | regs.psw.mask |= PSW_ASC_PRIMARY; |
| 1010 | break; |
| 1011 | case 0x1: |
| 1012 | regs.psw.mask |= PSW_ASC_ACCREG; |
| 1013 | break; |
| 1014 | case 0x2: |
| 1015 | regs.psw.mask |= PSW_ASC_SECONDARY; |
| 1016 | break; |
| 1017 | case 0x3: |
| 1018 | regs.psw.mask |= PSW_ASC_HOME; |
| 1019 | break; |
| 1020 | } |
| 1021 | |
| 1022 | /* The host-program-parameter (hpp) contains the sie control |
| 1023 | * block that is set by sie64a() in entry64.S. Check if hpp |
| 1024 | * refers to a valid control block and set sde_regs flags |
| 1025 | * accordingly. This would allow to use hpp values for other |
| 1026 | * purposes too. |
| 1027 | * For now, simply use a non-zero value as guest indicator. |
| 1028 | */ |
| 1029 | if (sfr->basic.hpp) |
| 1030 | sde_regs->in_guest = 1; |
| 1031 | |
| 1032 | overflow = 0; |
| 1033 | if (perf_exclude_event(event, ®s, sde_regs)) |
| 1034 | goto out; |
| 1035 | if (perf_event_overflow(event, &data, ®s)) { |
| 1036 | overflow = 1; |
| 1037 | event->pmu->stop(event, 0); |
| 1038 | } |
| 1039 | perf_event_update_userpage(event); |
| 1040 | out: |
| 1041 | return overflow; |
| 1042 | } |
| 1043 | |
| 1044 | static void perf_event_count_update(struct perf_event *event, u64 count) |
| 1045 | { |
| 1046 | local64_add(count, &event->count); |
| 1047 | } |
| 1048 | |
| 1049 | static int sample_format_is_valid(struct hws_combined_entry *sample, |
| 1050 | unsigned int flags) |
| 1051 | { |
| 1052 | if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) |
| 1053 | /* Only basic-sampling data entries with data-entry-format |
| 1054 | * version of 0x0001 can be processed. |
| 1055 | */ |
| 1056 | if (sample->basic.def != 0x0001) |
| 1057 | return 0; |
| 1058 | if (flags & PERF_CPUM_SF_DIAG_MODE) |
| 1059 | /* The data-entry-format number of diagnostic-sampling data |
| 1060 | * entries can vary. Because diagnostic data is just passed |
| 1061 | * through, do only a sanity check on the DEF. |
| 1062 | */ |
| 1063 | if (sample->diag.def < 0x8001) |
| 1064 | return 0; |
| 1065 | return 1; |
| 1066 | } |
| 1067 | |
| 1068 | static int sample_is_consistent(struct hws_combined_entry *sample, |
| 1069 | unsigned long flags) |
| 1070 | { |
| 1071 | /* This check applies only to basic-sampling data entries of potentially |
| 1072 | * combined-sampling data entries. Invalid entries cannot be processed |
| 1073 | * by the PMU and, thus, do not deliver an associated |
| 1074 | * diagnostic-sampling data entry. |
| 1075 | */ |
| 1076 | if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE))) |
| 1077 | return 0; |
| 1078 | /* |
| 1079 | * Samples are skipped, if they are invalid or for which the |
| 1080 | * instruction address is not predictable, i.e., the wait-state bit is |
| 1081 | * set. |
| 1082 | */ |
| 1083 | if (sample->basic.I || sample->basic.W) |
| 1084 | return 0; |
| 1085 | return 1; |
| 1086 | } |
| 1087 | |
| 1088 | static void reset_sample_slot(struct hws_combined_entry *sample, |
| 1089 | unsigned long flags) |
| 1090 | { |
| 1091 | if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) |
| 1092 | sample->basic.def = 0; |
| 1093 | if (flags & PERF_CPUM_SF_DIAG_MODE) |
| 1094 | sample->diag.def = 0; |
| 1095 | } |
| 1096 | |
| 1097 | static void sfr_store_sample(struct sf_raw_sample *sfr, |
| 1098 | struct hws_combined_entry *sample) |
| 1099 | { |
| 1100 | if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE)) |
| 1101 | sfr->basic = sample->basic; |
| 1102 | if (sfr->format & PERF_CPUM_SF_DIAG_MODE) |
| 1103 | memcpy(&sfr->diag, &sample->diag, sfr->dsdes); |
| 1104 | } |
| 1105 | |
| 1106 | static void debug_sample_entry(struct hws_combined_entry *sample, |
| 1107 | struct hws_trailer_entry *te, |
| 1108 | unsigned long flags) |
| 1109 | { |
| 1110 | debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " |
| 1111 | "sampling data entry: te->f=%i basic.def=%04x (%p)" |
| 1112 | " diag.def=%04x (%p)\n", te->f, |
| 1113 | sample->basic.def, &sample->basic, |
| 1114 | (flags & PERF_CPUM_SF_DIAG_MODE) |
| 1115 | ? sample->diag.def : 0xFFFF, |
| 1116 | (flags & PERF_CPUM_SF_DIAG_MODE) |
| 1117 | ? &sample->diag : NULL); |
| 1118 | } |
| 1119 | |
| 1120 | /* hw_collect_samples() - Walk through a sample-data-block and collect samples |
| 1121 | * @event: The perf event |
| 1122 | * @sdbt: Sample-data-block table |
| 1123 | * @overflow: Event overflow counter |
| 1124 | * |
| 1125 | * Walks through a sample-data-block and collects sampling data entries that are |
| 1126 | * then pushed to the perf event subsystem. Depending on the sampling function, |
| 1127 | * there can be either basic-sampling or combined-sampling data entries. A |
| 1128 | * combined-sampling data entry consists of a basic- and a diagnostic-sampling |
| 1129 | * data entry. The sampling function is determined by the flags in the perf |
| 1130 | * event hardware structure. The function always works with a combined-sampling |
| 1131 | * data entry but ignores the the diagnostic portion if it is not available. |
| 1132 | * |
| 1133 | * Note that the implementation focuses on basic-sampling data entries and, if |
| 1134 | * such an entry is not valid, the entire combined-sampling data entry is |
| 1135 | * ignored. |
| 1136 | * |
| 1137 | * The overflow variables counts the number of samples that has been discarded |
| 1138 | * due to a perf event overflow. |
| 1139 | */ |
| 1140 | static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, |
| 1141 | unsigned long long *overflow) |
| 1142 | { |
| 1143 | unsigned long flags = SAMPL_FLAGS(&event->hw); |
| 1144 | struct hws_combined_entry *sample; |
| 1145 | struct hws_trailer_entry *te; |
| 1146 | struct sf_raw_sample *sfr; |
| 1147 | size_t sample_size; |
| 1148 | |
| 1149 | /* Prepare and initialize raw sample data */ |
| 1150 | sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw); |
| 1151 | sfr->format = flags & PERF_CPUM_SF_MODE_MASK; |
| 1152 | |
| 1153 | sample_size = event_sample_size(&event->hw); |
| 1154 | te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); |
| 1155 | sample = (struct hws_combined_entry *) *sdbt; |
| 1156 | while ((unsigned long *) sample < (unsigned long *) te) { |
| 1157 | /* Check for an empty sample */ |
| 1158 | if (!sample->basic.def) |
| 1159 | break; |
| 1160 | |
| 1161 | /* Update perf event period */ |
| 1162 | perf_event_count_update(event, SAMPL_RATE(&event->hw)); |
| 1163 | |
| 1164 | /* Check sampling data entry */ |
| 1165 | if (sample_format_is_valid(sample, flags)) { |
| 1166 | /* If an event overflow occurred, the PMU is stopped to |
| 1167 | * throttle event delivery. Remaining sample data is |
| 1168 | * discarded. |
| 1169 | */ |
| 1170 | if (!*overflow) { |
| 1171 | if (sample_is_consistent(sample, flags)) { |
| 1172 | /* Deliver sample data to perf */ |
| 1173 | sfr_store_sample(sfr, sample); |
| 1174 | *overflow = perf_push_sample(event, sfr); |
| 1175 | } |
| 1176 | } else |
| 1177 | /* Count discarded samples */ |
| 1178 | *overflow += 1; |
| 1179 | } else { |
| 1180 | debug_sample_entry(sample, te, flags); |
| 1181 | /* Sample slot is not yet written or other record. |
| 1182 | * |
| 1183 | * This condition can occur if the buffer was reused |
| 1184 | * from a combined basic- and diagnostic-sampling. |
| 1185 | * If only basic-sampling is then active, entries are |
| 1186 | * written into the larger diagnostic entries. |
| 1187 | * This is typically the case for sample-data-blocks |
| 1188 | * that are not full. Stop processing if the first |
| 1189 | * invalid format was detected. |
| 1190 | */ |
| 1191 | if (!te->f) |
| 1192 | break; |
| 1193 | } |
| 1194 | |
| 1195 | /* Reset sample slot and advance to next sample */ |
| 1196 | reset_sample_slot(sample, flags); |
| 1197 | sample += sample_size; |
| 1198 | } |
| 1199 | } |
| 1200 | |
| 1201 | /* hw_perf_event_update() - Process sampling buffer |
| 1202 | * @event: The perf event |
| 1203 | * @flush_all: Flag to also flush partially filled sample-data-blocks |
| 1204 | * |
| 1205 | * Processes the sampling buffer and create perf event samples. |
| 1206 | * The sampling buffer position are retrieved and saved in the TEAR_REG |
| 1207 | * register of the specified perf event. |
| 1208 | * |
| 1209 | * Only full sample-data-blocks are processed. Specify the flash_all flag |
| 1210 | * to also walk through partially filled sample-data-blocks. It is ignored |
| 1211 | * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag |
| 1212 | * enforces the processing of full sample-data-blocks only (trailer entries |
| 1213 | * with the block-full-indicator bit set). |
| 1214 | */ |
| 1215 | static void hw_perf_event_update(struct perf_event *event, int flush_all) |
| 1216 | { |
| 1217 | struct hw_perf_event *hwc = &event->hw; |
| 1218 | struct hws_trailer_entry *te; |
| 1219 | unsigned long *sdbt; |
| 1220 | unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; |
| 1221 | int done; |
| 1222 | |
| 1223 | if (flush_all && SDB_FULL_BLOCKS(hwc)) |
| 1224 | flush_all = 0; |
| 1225 | |
| 1226 | sdbt = (unsigned long *) TEAR_REG(hwc); |
| 1227 | done = event_overflow = sampl_overflow = num_sdb = 0; |
| 1228 | while (!done) { |
| 1229 | /* Get the trailer entry of the sample-data-block */ |
| 1230 | te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); |
| 1231 | |
| 1232 | /* Leave loop if no more work to do (block full indicator) */ |
| 1233 | if (!te->f) { |
| 1234 | done = 1; |
| 1235 | if (!flush_all) |
| 1236 | break; |
| 1237 | } |
| 1238 | |
| 1239 | /* Check the sample overflow count */ |
| 1240 | if (te->overflow) |
| 1241 | /* Account sample overflows and, if a particular limit |
| 1242 | * is reached, extend the sampling buffer. |
| 1243 | * For details, see sfb_account_overflows(). |
| 1244 | */ |
| 1245 | sampl_overflow += te->overflow; |
| 1246 | |
| 1247 | /* Timestamps are valid for full sample-data-blocks only */ |
| 1248 | debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " |
| 1249 | "overflow=%llu timestamp=0x%llx\n", |
| 1250 | sdbt, te->overflow, |
| 1251 | (te->f) ? trailer_timestamp(te) : 0ULL); |
| 1252 | |
| 1253 | /* Collect all samples from a single sample-data-block and |
| 1254 | * flag if an (perf) event overflow happened. If so, the PMU |
| 1255 | * is stopped and remaining samples will be discarded. |
| 1256 | */ |
| 1257 | hw_collect_samples(event, sdbt, &event_overflow); |
| 1258 | num_sdb++; |
| 1259 | |
| 1260 | /* Reset trailer (using compare-double-and-swap) */ |
| 1261 | do { |
| 1262 | te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; |
| 1263 | te_flags |= SDB_TE_ALERT_REQ_MASK; |
| 1264 | } while (!cmpxchg_double(&te->flags, &te->overflow, |
| 1265 | te->flags, te->overflow, |
| 1266 | te_flags, 0ULL)); |
| 1267 | |
| 1268 | /* Advance to next sample-data-block */ |
| 1269 | sdbt++; |
| 1270 | if (is_link_entry(sdbt)) |
| 1271 | sdbt = get_next_sdbt(sdbt); |
| 1272 | |
| 1273 | /* Update event hardware registers */ |
| 1274 | TEAR_REG(hwc) = (unsigned long) sdbt; |
| 1275 | |
| 1276 | /* Stop processing sample-data if all samples of the current |
| 1277 | * sample-data-block were flushed even if it was not full. |
| 1278 | */ |
| 1279 | if (flush_all && done) |
| 1280 | break; |
| 1281 | |
| 1282 | /* If an event overflow happened, discard samples by |
| 1283 | * processing any remaining sample-data-blocks. |
| 1284 | */ |
| 1285 | if (event_overflow) |
| 1286 | flush_all = 1; |
| 1287 | } |
| 1288 | |
| 1289 | /* Account sample overflows in the event hardware structure */ |
| 1290 | if (sampl_overflow) |
| 1291 | OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + |
| 1292 | sampl_overflow, 1 + num_sdb); |
| 1293 | if (sampl_overflow || event_overflow) |
| 1294 | debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " |
| 1295 | "overflow stats: sample=%llu event=%llu\n", |
| 1296 | sampl_overflow, event_overflow); |
| 1297 | } |
| 1298 | |
| 1299 | static void cpumsf_pmu_read(struct perf_event *event) |
| 1300 | { |
| 1301 | /* Nothing to do ... updates are interrupt-driven */ |
| 1302 | } |
| 1303 | |
| 1304 | /* Activate sampling control. |
| 1305 | * Next call of pmu_enable() starts sampling. |
| 1306 | */ |
| 1307 | static void cpumsf_pmu_start(struct perf_event *event, int flags) |
| 1308 | { |
| 1309 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
| 1310 | |
| 1311 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) |
| 1312 | return; |
| 1313 | |
| 1314 | if (flags & PERF_EF_RELOAD) |
| 1315 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); |
| 1316 | |
| 1317 | perf_pmu_disable(event->pmu); |
| 1318 | event->hw.state = 0; |
| 1319 | cpuhw->lsctl.cs = 1; |
| 1320 | if (SAMPL_DIAG_MODE(&event->hw)) |
| 1321 | cpuhw->lsctl.cd = 1; |
| 1322 | perf_pmu_enable(event->pmu); |
| 1323 | } |
| 1324 | |
| 1325 | /* Deactivate sampling control. |
| 1326 | * Next call of pmu_enable() stops sampling. |
| 1327 | */ |
| 1328 | static void cpumsf_pmu_stop(struct perf_event *event, int flags) |
| 1329 | { |
| 1330 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
| 1331 | |
| 1332 | if (event->hw.state & PERF_HES_STOPPED) |
| 1333 | return; |
| 1334 | |
| 1335 | perf_pmu_disable(event->pmu); |
| 1336 | cpuhw->lsctl.cs = 0; |
| 1337 | cpuhw->lsctl.cd = 0; |
| 1338 | event->hw.state |= PERF_HES_STOPPED; |
| 1339 | |
| 1340 | if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { |
| 1341 | hw_perf_event_update(event, 1); |
| 1342 | event->hw.state |= PERF_HES_UPTODATE; |
| 1343 | } |
| 1344 | perf_pmu_enable(event->pmu); |
| 1345 | } |
| 1346 | |
| 1347 | static int cpumsf_pmu_add(struct perf_event *event, int flags) |
| 1348 | { |
| 1349 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
| 1350 | int err; |
| 1351 | |
| 1352 | if (cpuhw->flags & PMU_F_IN_USE) |
| 1353 | return -EAGAIN; |
| 1354 | |
| 1355 | if (!cpuhw->sfb.sdbt) |
| 1356 | return -EINVAL; |
| 1357 | |
| 1358 | err = 0; |
| 1359 | perf_pmu_disable(event->pmu); |
| 1360 | |
| 1361 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
| 1362 | |
| 1363 | /* Set up sampling controls. Always program the sampling register |
| 1364 | * using the SDB-table start. Reset TEAR_REG event hardware register |
| 1365 | * that is used by hw_perf_event_update() to store the sampling buffer |
| 1366 | * position after samples have been flushed. |
| 1367 | */ |
| 1368 | cpuhw->lsctl.s = 0; |
| 1369 | cpuhw->lsctl.h = 1; |
| 1370 | cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; |
| 1371 | cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; |
| 1372 | cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); |
| 1373 | hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); |
| 1374 | |
| 1375 | /* Ensure sampling functions are in the disabled state. If disabled, |
| 1376 | * switch on sampling enable control. */ |
| 1377 | if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) { |
| 1378 | err = -EAGAIN; |
| 1379 | goto out; |
| 1380 | } |
| 1381 | cpuhw->lsctl.es = 1; |
| 1382 | if (SAMPL_DIAG_MODE(&event->hw)) |
| 1383 | cpuhw->lsctl.ed = 1; |
| 1384 | |
| 1385 | /* Set in_use flag and store event */ |
| 1386 | cpuhw->event = event; |
| 1387 | cpuhw->flags |= PMU_F_IN_USE; |
| 1388 | |
| 1389 | if (flags & PERF_EF_START) |
| 1390 | cpumsf_pmu_start(event, PERF_EF_RELOAD); |
| 1391 | out: |
| 1392 | perf_event_update_userpage(event); |
| 1393 | perf_pmu_enable(event->pmu); |
| 1394 | return err; |
| 1395 | } |
| 1396 | |
| 1397 | static void cpumsf_pmu_del(struct perf_event *event, int flags) |
| 1398 | { |
| 1399 | struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); |
| 1400 | |
| 1401 | perf_pmu_disable(event->pmu); |
| 1402 | cpumsf_pmu_stop(event, PERF_EF_UPDATE); |
| 1403 | |
| 1404 | cpuhw->lsctl.es = 0; |
| 1405 | cpuhw->lsctl.ed = 0; |
| 1406 | cpuhw->flags &= ~PMU_F_IN_USE; |
| 1407 | cpuhw->event = NULL; |
| 1408 | |
| 1409 | perf_event_update_userpage(event); |
| 1410 | perf_pmu_enable(event->pmu); |
| 1411 | } |
| 1412 | |
| 1413 | CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); |
| 1414 | CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); |
| 1415 | |
| 1416 | static struct attribute *cpumsf_pmu_events_attr[] = { |
| 1417 | CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), |
| 1418 | NULL, |
| 1419 | NULL, |
| 1420 | }; |
| 1421 | |
| 1422 | PMU_FORMAT_ATTR(event, "config:0-63"); |
| 1423 | |
| 1424 | static struct attribute *cpumsf_pmu_format_attr[] = { |
| 1425 | &format_attr_event.attr, |
| 1426 | NULL, |
| 1427 | }; |
| 1428 | |
| 1429 | static struct attribute_group cpumsf_pmu_events_group = { |
| 1430 | .name = "events", |
| 1431 | .attrs = cpumsf_pmu_events_attr, |
| 1432 | }; |
| 1433 | static struct attribute_group cpumsf_pmu_format_group = { |
| 1434 | .name = "format", |
| 1435 | .attrs = cpumsf_pmu_format_attr, |
| 1436 | }; |
| 1437 | static const struct attribute_group *cpumsf_pmu_attr_groups[] = { |
| 1438 | &cpumsf_pmu_events_group, |
| 1439 | &cpumsf_pmu_format_group, |
| 1440 | NULL, |
| 1441 | }; |
| 1442 | |
| 1443 | static struct pmu cpumf_sampling = { |
| 1444 | .pmu_enable = cpumsf_pmu_enable, |
| 1445 | .pmu_disable = cpumsf_pmu_disable, |
| 1446 | |
| 1447 | .event_init = cpumsf_pmu_event_init, |
| 1448 | .add = cpumsf_pmu_add, |
| 1449 | .del = cpumsf_pmu_del, |
| 1450 | |
| 1451 | .start = cpumsf_pmu_start, |
| 1452 | .stop = cpumsf_pmu_stop, |
| 1453 | .read = cpumsf_pmu_read, |
| 1454 | |
| 1455 | .attr_groups = cpumsf_pmu_attr_groups, |
| 1456 | }; |
| 1457 | |
| 1458 | static void cpumf_measurement_alert(struct ext_code ext_code, |
| 1459 | unsigned int alert, unsigned long unused) |
| 1460 | { |
| 1461 | struct cpu_hw_sf *cpuhw; |
| 1462 | |
| 1463 | if (!(alert & CPU_MF_INT_SF_MASK)) |
| 1464 | return; |
| 1465 | inc_irq_stat(IRQEXT_CMS); |
| 1466 | cpuhw = this_cpu_ptr(&cpu_hw_sf); |
| 1467 | |
| 1468 | /* Measurement alerts are shared and might happen when the PMU |
| 1469 | * is not reserved. Ignore these alerts in this case. */ |
| 1470 | if (!(cpuhw->flags & PMU_F_RESERVED)) |
| 1471 | return; |
| 1472 | |
| 1473 | /* The processing below must take care of multiple alert events that |
| 1474 | * might be indicated concurrently. */ |
| 1475 | |
| 1476 | /* Program alert request */ |
| 1477 | if (alert & CPU_MF_INT_SF_PRA) { |
| 1478 | if (cpuhw->flags & PMU_F_IN_USE) |
| 1479 | hw_perf_event_update(cpuhw->event, 0); |
| 1480 | else |
| 1481 | WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); |
| 1482 | } |
| 1483 | |
| 1484 | /* Report measurement alerts only for non-PRA codes */ |
| 1485 | if (alert != CPU_MF_INT_SF_PRA) |
| 1486 | debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); |
| 1487 | |
| 1488 | /* Sampling authorization change request */ |
| 1489 | if (alert & CPU_MF_INT_SF_SACA) |
| 1490 | qsi(&cpuhw->qsi); |
| 1491 | |
| 1492 | /* Loss of sample data due to high-priority machine activities */ |
| 1493 | if (alert & CPU_MF_INT_SF_LSDA) { |
| 1494 | pr_err("Sample data was lost\n"); |
| 1495 | cpuhw->flags |= PMU_F_ERR_LSDA; |
| 1496 | sf_disable(); |
| 1497 | } |
| 1498 | |
| 1499 | /* Invalid sampling buffer entry */ |
| 1500 | if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { |
| 1501 | pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n", |
| 1502 | alert); |
| 1503 | cpuhw->flags |= PMU_F_ERR_IBE; |
| 1504 | sf_disable(); |
| 1505 | } |
| 1506 | } |
| 1507 | |
| 1508 | static int cpumf_pmu_notifier(struct notifier_block *self, |
| 1509 | unsigned long action, void *hcpu) |
| 1510 | { |
| 1511 | unsigned int cpu = (long) hcpu; |
| 1512 | int flags; |
| 1513 | |
| 1514 | /* Ignore the notification if no events are scheduled on the PMU. |
| 1515 | * This might be racy... |
| 1516 | */ |
| 1517 | if (!atomic_read(&num_events)) |
| 1518 | return NOTIFY_OK; |
| 1519 | |
| 1520 | switch (action & ~CPU_TASKS_FROZEN) { |
| 1521 | case CPU_ONLINE: |
| 1522 | case CPU_ONLINE_FROZEN: |
| 1523 | flags = PMC_INIT; |
| 1524 | smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); |
| 1525 | break; |
| 1526 | case CPU_DOWN_PREPARE: |
| 1527 | flags = PMC_RELEASE; |
| 1528 | smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); |
| 1529 | break; |
| 1530 | default: |
| 1531 | break; |
| 1532 | } |
| 1533 | |
| 1534 | return NOTIFY_OK; |
| 1535 | } |
| 1536 | |
| 1537 | static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) |
| 1538 | { |
| 1539 | if (!cpum_sf_avail()) |
| 1540 | return -ENODEV; |
| 1541 | return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); |
| 1542 | } |
| 1543 | |
| 1544 | static int param_set_sfb_size(const char *val, const struct kernel_param *kp) |
| 1545 | { |
| 1546 | int rc; |
| 1547 | unsigned long min, max; |
| 1548 | |
| 1549 | if (!cpum_sf_avail()) |
| 1550 | return -ENODEV; |
| 1551 | if (!val || !strlen(val)) |
| 1552 | return -EINVAL; |
| 1553 | |
| 1554 | /* Valid parameter values: "min,max" or "max" */ |
| 1555 | min = CPUM_SF_MIN_SDB; |
| 1556 | max = CPUM_SF_MAX_SDB; |
| 1557 | if (strchr(val, ',')) |
| 1558 | rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; |
| 1559 | else |
| 1560 | rc = kstrtoul(val, 10, &max); |
| 1561 | |
| 1562 | if (min < 2 || min >= max || max > get_num_physpages()) |
| 1563 | rc = -EINVAL; |
| 1564 | if (rc) |
| 1565 | return rc; |
| 1566 | |
| 1567 | sfb_set_limits(min, max); |
| 1568 | pr_info("The sampling buffer limits have changed to: " |
| 1569 | "min=%lu max=%lu (diag=x%lu)\n", |
| 1570 | CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); |
| 1571 | return 0; |
| 1572 | } |
| 1573 | |
| 1574 | #define param_check_sfb_size(name, p) __param_check(name, p, void) |
| 1575 | static struct kernel_param_ops param_ops_sfb_size = { |
| 1576 | .set = param_set_sfb_size, |
| 1577 | .get = param_get_sfb_size, |
| 1578 | }; |
| 1579 | |
| 1580 | #define RS_INIT_FAILURE_QSI 0x0001 |
| 1581 | #define RS_INIT_FAILURE_BSDES 0x0002 |
| 1582 | #define RS_INIT_FAILURE_ALRT 0x0003 |
| 1583 | #define RS_INIT_FAILURE_PERF 0x0004 |
| 1584 | static void __init pr_cpumsf_err(unsigned int reason) |
| 1585 | { |
| 1586 | pr_err("Sampling facility support for perf is not available: " |
| 1587 | "reason=%04x\n", reason); |
| 1588 | } |
| 1589 | |
| 1590 | static int __init init_cpum_sampling_pmu(void) |
| 1591 | { |
| 1592 | struct hws_qsi_info_block si; |
| 1593 | int err; |
| 1594 | |
| 1595 | if (!cpum_sf_avail()) |
| 1596 | return -ENODEV; |
| 1597 | |
| 1598 | memset(&si, 0, sizeof(si)); |
| 1599 | if (qsi(&si)) { |
| 1600 | pr_cpumsf_err(RS_INIT_FAILURE_QSI); |
| 1601 | return -ENODEV; |
| 1602 | } |
| 1603 | |
| 1604 | if (si.bsdes != sizeof(struct hws_basic_entry)) { |
| 1605 | pr_cpumsf_err(RS_INIT_FAILURE_BSDES); |
| 1606 | return -EINVAL; |
| 1607 | } |
| 1608 | |
| 1609 | if (si.ad) { |
| 1610 | sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); |
| 1611 | cpumsf_pmu_events_attr[1] = |
| 1612 | CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); |
| 1613 | } |
| 1614 | |
| 1615 | sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); |
| 1616 | if (!sfdbg) |
| 1617 | pr_err("Registering for s390dbf failed\n"); |
| 1618 | debug_register_view(sfdbg, &debug_sprintf_view); |
| 1619 | |
| 1620 | err = register_external_irq(EXT_IRQ_MEASURE_ALERT, |
| 1621 | cpumf_measurement_alert); |
| 1622 | if (err) { |
| 1623 | pr_cpumsf_err(RS_INIT_FAILURE_ALRT); |
| 1624 | goto out; |
| 1625 | } |
| 1626 | |
| 1627 | err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); |
| 1628 | if (err) { |
| 1629 | pr_cpumsf_err(RS_INIT_FAILURE_PERF); |
| 1630 | unregister_external_irq(EXT_IRQ_MEASURE_ALERT, |
| 1631 | cpumf_measurement_alert); |
| 1632 | goto out; |
| 1633 | } |
| 1634 | perf_cpu_notifier(cpumf_pmu_notifier); |
| 1635 | out: |
| 1636 | return err; |
| 1637 | } |
| 1638 | arch_initcall(init_cpum_sampling_pmu); |
| 1639 | core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); |