Commit | Line | Data |
---|---|---|
8c069ff4 HB |
1 | /* |
2 | * Performance event support for the System z CPU-measurement Sampling Facility | |
3 | * | |
4 | * Copyright IBM Corp. 2013 | |
5 | * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License (version 2 only) | |
9 | * as published by the Free Software Foundation. | |
10 | */ | |
11 | #define KMSG_COMPONENT "cpum_sf" | |
12 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
13 | ||
14 | #include <linux/kernel.h> | |
15 | #include <linux/kernel_stat.h> | |
16 | #include <linux/perf_event.h> | |
17 | #include <linux/percpu.h> | |
18 | #include <linux/notifier.h> | |
19 | #include <linux/export.h> | |
69f239ed HB |
20 | #include <linux/mm.h> |
21 | #include <linux/moduleparam.h> | |
8c069ff4 HB |
22 | #include <asm/cpu_mf.h> |
23 | #include <asm/irq.h> | |
24 | #include <asm/debug.h> | |
25 | #include <asm/timex.h> | |
26 | ||
27 | /* Minimum number of sample-data-block-tables: | |
28 | * At least one table is required for the sampling buffer structure. | |
29 | * A single table contains up to 511 pointers to sample-data-blocks. | |
30 | */ | |
69f239ed | 31 | #define CPUM_SF_MIN_SDBT 1 |
8c069ff4 | 32 | |
69f239ed HB |
33 | /* Number of sample-data-blocks per sample-data-block-table (SDBT): |
34 | * The table contains SDB origin (8 bytes) and one SDBT origin that | |
35 | * points to the next table. | |
8c069ff4 | 36 | */ |
69f239ed | 37 | #define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) |
8c069ff4 | 38 | |
69f239ed HB |
39 | /* Maximum page offset for an SDBT table-link entry: |
40 | * If this page offset is reached, a table-link entry to the next SDBT | |
41 | * must be added. | |
42 | */ | |
43 | #define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) | |
44 | static inline int require_table_link(const void *sdbt) | |
45 | { | |
46 | return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; | |
47 | } | |
48 | ||
49 | /* Minimum and maximum sampling buffer sizes: | |
50 | * | |
51 | * This number represents the maximum size of the sampling buffer | |
52 | * taking the number of sample-data-block-tables into account. | |
8c069ff4 | 53 | * |
69f239ed HB |
54 | * Sampling buffer size Buffer characteristics |
55 | * --------------------------------------------------- | |
56 | * 64KB == 16 pages (4KB per page) | |
57 | * 1 page for SDB-tables | |
58 | * 15 pages for SDBs | |
59 | * | |
60 | * 32MB == 8192 pages (4KB per page) | |
61 | * 16 pages for SDB-tables | |
62 | * 8176 pages for SDBs | |
8c069ff4 | 63 | */ |
69f239ed HB |
64 | static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; |
65 | static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; | |
8c069ff4 HB |
66 | |
67 | struct sf_buffer { | |
69f239ed | 68 | unsigned long *sdbt; /* Sample-data-block-table origin */ |
8c069ff4 | 69 | /* buffer characteristics (required for buffer increments) */ |
69f239ed HB |
70 | unsigned long num_sdb; /* Number of sample-data-blocks */ |
71 | unsigned long num_sdbt; /* Number of sample-data-block-tables */ | |
72 | unsigned long *tail; /* last sample-data-block-table */ | |
8c069ff4 HB |
73 | }; |
74 | ||
75 | struct cpu_hw_sf { | |
76 | /* CPU-measurement sampling information block */ | |
77 | struct hws_qsi_info_block qsi; | |
69f239ed | 78 | /* CPU-measurement sampling control block */ |
8c069ff4 HB |
79 | struct hws_lsctl_request_block lsctl; |
80 | struct sf_buffer sfb; /* Sampling buffer */ | |
81 | unsigned int flags; /* Status flags */ | |
82 | struct perf_event *event; /* Scheduled perf event */ | |
83 | }; | |
84 | static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); | |
85 | ||
86 | /* Debug feature */ | |
87 | static debug_info_t *sfdbg; | |
88 | ||
69f239ed HB |
89 | /* |
90 | * sf_disable() - Switch off sampling facility | |
91 | */ | |
92 | static int sf_disable(void) | |
93 | { | |
94 | struct hws_lsctl_request_block sreq; | |
95 | ||
96 | memset(&sreq, 0, sizeof(sreq)); | |
97 | return lsctl(&sreq); | |
98 | } | |
99 | ||
8c069ff4 HB |
100 | /* |
101 | * sf_buffer_available() - Check for an allocated sampling buffer | |
102 | */ | |
103 | static int sf_buffer_available(struct cpu_hw_sf *cpuhw) | |
104 | { | |
69f239ed | 105 | return !!cpuhw->sfb.sdbt; |
8c069ff4 HB |
106 | } |
107 | ||
108 | /* | |
109 | * deallocate sampling facility buffer | |
110 | */ | |
111 | static void free_sampling_buffer(struct sf_buffer *sfb) | |
112 | { | |
69f239ed | 113 | unsigned long *sdbt, *curr; |
8c069ff4 HB |
114 | |
115 | if (!sfb->sdbt) | |
116 | return; | |
117 | ||
118 | sdbt = sfb->sdbt; | |
69f239ed | 119 | curr = sdbt; |
8c069ff4 | 120 | |
69f239ed | 121 | /* Free the SDBT after all SDBs are processed... */ |
8c069ff4 HB |
122 | while (1) { |
123 | if (!*curr || !sdbt) | |
124 | break; | |
125 | ||
69f239ed | 126 | /* Process table-link entries */ |
8c069ff4 HB |
127 | if (is_link_entry(curr)) { |
128 | curr = get_next_sdbt(curr); | |
129 | if (sdbt) | |
69f239ed | 130 | free_page((unsigned long) sdbt); |
8c069ff4 | 131 | |
69f239ed HB |
132 | /* If the origin is reached, sampling buffer is freed */ |
133 | if (curr == sfb->sdbt) | |
8c069ff4 HB |
134 | break; |
135 | else | |
69f239ed | 136 | sdbt = curr; |
8c069ff4 | 137 | } else { |
69f239ed | 138 | /* Process SDB pointer */ |
8c069ff4 HB |
139 | if (*curr) { |
140 | free_page(*curr); | |
141 | curr++; | |
142 | } | |
143 | } | |
144 | } | |
145 | ||
146 | debug_sprintf_event(sfdbg, 5, | |
69f239ed | 147 | "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); |
8c069ff4 HB |
148 | memset(sfb, 0, sizeof(*sfb)); |
149 | } | |
150 | ||
69f239ed HB |
151 | static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) |
152 | { | |
153 | unsigned long sdb, *trailer; | |
154 | ||
155 | /* Allocate and initialize sample-data-block */ | |
156 | sdb = get_zeroed_page(gfp_flags); | |
157 | if (!sdb) | |
158 | return -ENOMEM; | |
159 | trailer = trailer_entry_ptr(sdb); | |
160 | *trailer = SDB_TE_ALERT_REQ_MASK; | |
161 | ||
162 | /* Link SDB into the sample-data-block-table */ | |
163 | *sdbt = sdb; | |
164 | ||
165 | return 0; | |
166 | } | |
167 | ||
168 | /* | |
169 | * realloc_sampling_buffer() - extend sampler memory | |
170 | * | |
171 | * Allocates new sample-data-blocks and adds them to the specified sampling | |
172 | * buffer memory. | |
173 | * | |
174 | * Important: This modifies the sampling buffer and must be called when the | |
175 | * sampling facility is disabled. | |
176 | * | |
177 | * Returns zero on success, non-zero otherwise. | |
178 | */ | |
179 | static int realloc_sampling_buffer(struct sf_buffer *sfb, | |
180 | unsigned long num_sdb, gfp_t gfp_flags) | |
181 | { | |
182 | int i, rc; | |
183 | unsigned long *new, *tail; | |
184 | ||
185 | if (!sfb->sdbt || !sfb->tail) | |
186 | return -EINVAL; | |
187 | ||
188 | if (!is_link_entry(sfb->tail)) | |
189 | return -EINVAL; | |
190 | ||
191 | /* Append to the existing sampling buffer, overwriting the table-link | |
192 | * register. | |
193 | * The tail variables always points to the "tail" (last and table-link) | |
194 | * entry in an SDB-table. | |
195 | */ | |
196 | tail = sfb->tail; | |
197 | ||
198 | /* Do a sanity check whether the table-link entry points to | |
199 | * the sampling buffer origin. | |
200 | */ | |
201 | if (sfb->sdbt != get_next_sdbt(tail)) { | |
202 | debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " | |
203 | "sampling buffer is not linked: origin=%p" | |
204 | "tail=%p\n", | |
205 | (void *) sfb->sdbt, (void *) tail); | |
206 | return -EINVAL; | |
207 | } | |
208 | ||
209 | /* Allocate remaining SDBs */ | |
210 | rc = 0; | |
211 | for (i = 0; i < num_sdb; i++) { | |
212 | /* Allocate a new SDB-table if it is full. */ | |
213 | if (require_table_link(tail)) { | |
214 | new = (unsigned long *) get_zeroed_page(gfp_flags); | |
215 | if (!new) { | |
216 | rc = -ENOMEM; | |
217 | break; | |
218 | } | |
219 | sfb->num_sdbt++; | |
220 | /* Link current page to tail of chain */ | |
221 | *tail = (unsigned long)(void *) new + 1; | |
222 | tail = new; | |
223 | } | |
224 | ||
225 | /* Allocate a new sample-data-block. | |
226 | * If there is not enough memory, stop the realloc process | |
227 | * and simply use what was allocated. If this is a temporary | |
228 | * issue, a new realloc call (if required) might succeed. | |
229 | */ | |
230 | rc = alloc_sample_data_block(tail, gfp_flags); | |
231 | if (rc) | |
232 | break; | |
233 | sfb->num_sdb++; | |
234 | tail++; | |
235 | } | |
236 | ||
237 | /* Link sampling buffer to its origin */ | |
238 | *tail = (unsigned long) sfb->sdbt + 1; | |
239 | sfb->tail = tail; | |
240 | ||
241 | debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" | |
242 | " settings: sdbt=%lu sdb=%lu\n", | |
243 | sfb->num_sdbt, sfb->num_sdb); | |
244 | return rc; | |
245 | } | |
246 | ||
8c069ff4 HB |
247 | /* |
248 | * allocate_sampling_buffer() - allocate sampler memory | |
249 | * | |
250 | * Allocates and initializes a sampling buffer structure using the | |
251 | * specified number of sample-data-blocks (SDB). For each allocation, | |
252 | * a 4K page is used. The number of sample-data-block-tables (SDBT) | |
253 | * are calculated from SDBs. | |
254 | * Also set the ALERT_REQ mask in each SDBs trailer. | |
255 | * | |
256 | * Returns zero on success, non-zero otherwise. | |
257 | */ | |
258 | static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) | |
259 | { | |
69f239ed | 260 | int rc; |
8c069ff4 HB |
261 | |
262 | if (sfb->sdbt) | |
263 | return -EINVAL; | |
69f239ed HB |
264 | |
265 | /* Allocate the sample-data-block-table origin */ | |
266 | sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); | |
267 | if (!sfb->sdbt) | |
268 | return -ENOMEM; | |
8c069ff4 | 269 | sfb->num_sdb = 0; |
69f239ed | 270 | sfb->num_sdbt = 1; |
8c069ff4 | 271 | |
69f239ed HB |
272 | /* Link the table origin to point to itself to prepare for |
273 | * realloc_sampling_buffer() invocation. | |
274 | */ | |
275 | sfb->tail = sfb->sdbt; | |
276 | *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; | |
8c069ff4 | 277 | |
69f239ed HB |
278 | /* Allocate requested number of sample-data-blocks */ |
279 | rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); | |
280 | if (rc) { | |
281 | free_sampling_buffer(sfb); | |
282 | debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " | |
283 | "realloc_sampling_buffer failed with rc=%i\n", rc); | |
284 | } else | |
285 | debug_sprintf_event(sfdbg, 4, | |
286 | "alloc_sampling_buffer: tear=%p dear=%p\n", | |
287 | sfb->sdbt, (void *) *sfb->sdbt); | |
288 | return rc; | |
289 | } | |
8c069ff4 | 290 | |
69f239ed HB |
291 | static void sfb_set_limits(unsigned long min, unsigned long max) |
292 | { | |
293 | CPUM_SF_MIN_SDB = min; | |
294 | CPUM_SF_MAX_SDB = max; | |
295 | } | |
8c069ff4 | 296 | |
69f239ed HB |
297 | static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, |
298 | struct hw_perf_event *hwc) | |
299 | { | |
300 | if (!sfb->sdbt) | |
301 | return SFB_ALLOC_REG(hwc); | |
302 | if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) | |
303 | return SFB_ALLOC_REG(hwc) - sfb->num_sdb; | |
304 | return 0; | |
305 | } | |
8c069ff4 | 306 | |
69f239ed HB |
307 | static int sfb_has_pending_allocs(struct sf_buffer *sfb, |
308 | struct hw_perf_event *hwc) | |
309 | { | |
310 | return sfb_pending_allocs(sfb, hwc) > 0; | |
311 | } | |
8c069ff4 | 312 | |
69f239ed HB |
313 | static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) |
314 | { | |
315 | /* Limit the number SDBs to not exceed the maximum */ | |
316 | num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc)); | |
317 | if (num) | |
318 | SFB_ALLOC_REG(hwc) += num; | |
8c069ff4 HB |
319 | } |
320 | ||
69f239ed HB |
321 | static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) |
322 | { | |
323 | SFB_ALLOC_REG(hwc) = 0; | |
324 | sfb_account_allocs(num, hwc); | |
325 | } | |
326 | ||
327 | static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) | |
8c069ff4 HB |
328 | { |
329 | unsigned long n_sdb, freq; | |
330 | unsigned long factor; | |
331 | ||
332 | /* Calculate sampling buffers using 4K pages | |
333 | * | |
334 | * 1. Use frequency as input. The samping buffer is designed for | |
335 | * a complete second. This can be adjusted through the "factor" | |
336 | * variable. | |
337 | * In any case, alloc_sampling_buffer() sets the Alert Request | |
338 | * Control indicator to trigger measurement-alert to harvest | |
339 | * sample-data-blocks (sdb). | |
340 | * | |
341 | * 2. Compute the number of sample-data-blocks and ensure a minimum | |
342 | * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not | |
343 | * exceed CPUM_SF_MAX_SDB. See also the remarks for these | |
344 | * symbolic constants. | |
345 | * | |
346 | * 3. Compute number of pages used for the sample-data-block-table | |
347 | * and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table | |
348 | * to manage up to 511 sample-data-blocks). | |
349 | */ | |
350 | freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); | |
351 | factor = 1; | |
352 | n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes)); | |
353 | if (n_sdb < CPUM_SF_MIN_SDB) | |
354 | n_sdb = CPUM_SF_MIN_SDB; | |
355 | ||
69f239ed HB |
356 | /* If there is already a sampling buffer allocated, it is very likely |
357 | * that the sampling facility is enabled too. If the event to be | |
358 | * initialized requires a greater sampling buffer, the allocation must | |
359 | * be postponed. Changing the sampling buffer requires the sampling | |
360 | * facility to be in the disabled state. So, account the number of | |
361 | * required SDBs and let cpumsf_pmu_enable() resize the buffer just | |
362 | * before the event is started. | |
8c069ff4 | 363 | */ |
69f239ed | 364 | sfb_init_allocs(n_sdb, hwc); |
8c069ff4 HB |
365 | if (sf_buffer_available(cpuhw)) |
366 | return 0; | |
367 | ||
368 | debug_sprintf_event(sfdbg, 3, | |
69f239ed | 369 | "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n", |
8c069ff4 HB |
370 | SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw); |
371 | ||
372 | return alloc_sampling_buffer(&cpuhw->sfb, | |
69f239ed | 373 | sfb_pending_allocs(&cpuhw->sfb, hwc)); |
8c069ff4 HB |
374 | } |
375 | ||
69f239ed HB |
376 | static unsigned long min_percent(unsigned int percent, unsigned long base, |
377 | unsigned long min) | |
378 | { | |
379 | return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); | |
380 | } | |
8c069ff4 | 381 | |
69f239ed HB |
382 | static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) |
383 | { | |
384 | /* Use a percentage-based approach to extend the sampling facility | |
385 | * buffer. Accept up to 5% sample data loss. | |
386 | * Vary the extents between 1% to 5% of the current number of | |
387 | * sample-data-blocks. | |
388 | */ | |
389 | if (ratio <= 5) | |
390 | return 0; | |
391 | if (ratio <= 25) | |
392 | return min_percent(1, base, 1); | |
393 | if (ratio <= 50) | |
394 | return min_percent(1, base, 1); | |
395 | if (ratio <= 75) | |
396 | return min_percent(2, base, 2); | |
397 | if (ratio <= 100) | |
398 | return min_percent(3, base, 3); | |
399 | if (ratio <= 250) | |
400 | return min_percent(4, base, 4); | |
401 | ||
402 | return min_percent(5, base, 8); | |
403 | } | |
8c069ff4 | 404 | |
69f239ed HB |
405 | static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, |
406 | struct hw_perf_event *hwc) | |
407 | { | |
408 | unsigned long ratio, num; | |
409 | ||
410 | if (!OVERFLOW_REG(hwc)) | |
411 | return; | |
412 | ||
413 | /* The sample_overflow contains the average number of sample data | |
414 | * that has been lost because sample-data-blocks were full. | |
415 | * | |
416 | * Calculate the total number of sample data entries that has been | |
417 | * discarded. Then calculate the ratio of lost samples to total samples | |
418 | * per second in percent. | |
419 | */ | |
420 | ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, | |
421 | sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); | |
422 | ||
423 | /* Compute number of sample-data-blocks */ | |
424 | num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); | |
425 | if (num) | |
426 | sfb_account_allocs(num, hwc); | |
427 | ||
428 | debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" | |
429 | " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); | |
430 | OVERFLOW_REG(hwc) = 0; | |
431 | } | |
432 | ||
433 | /* extend_sampling_buffer() - Extend sampling buffer | |
434 | * @sfb: Sampling buffer structure (for local CPU) | |
435 | * @hwc: Perf event hardware structure | |
436 | * | |
437 | * Use this function to extend the sampling buffer based on the overflow counter | |
438 | * and postponed allocation extents stored in the specified Perf event hardware. | |
439 | * | |
440 | * Important: This function disables the sampling facility in order to safely | |
441 | * change the sampling buffer structure. Do not call this function | |
442 | * when the PMU is active. | |
8c069ff4 | 443 | */ |
69f239ed HB |
444 | static void extend_sampling_buffer(struct sf_buffer *sfb, |
445 | struct hw_perf_event *hwc) | |
8c069ff4 | 446 | { |
69f239ed HB |
447 | unsigned long num, num_old; |
448 | int rc; | |
8c069ff4 | 449 | |
69f239ed HB |
450 | num = sfb_pending_allocs(sfb, hwc); |
451 | if (!num) | |
452 | return; | |
453 | num_old = sfb->num_sdb; | |
454 | ||
455 | /* Disable the sampling facility to reset any states and also | |
456 | * clear pending measurement alerts. | |
457 | */ | |
458 | sf_disable(); | |
459 | ||
460 | /* Extend the sampling buffer. | |
461 | * This memory allocation typically happens in an atomic context when | |
462 | * called by perf. Because this is a reallocation, it is fine if the | |
463 | * new SDB-request cannot be satisfied immediately. | |
464 | */ | |
465 | rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); | |
466 | if (rc) | |
467 | debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " | |
468 | "failed with rc=%i\n", rc); | |
469 | ||
470 | if (sfb_has_pending_allocs(sfb, hwc)) | |
471 | debug_sprintf_event(sfdbg, 5, "sfb: extend: " | |
472 | "req=%lu alloc=%lu remaining=%lu\n", | |
473 | num, sfb->num_sdb - num_old, | |
474 | sfb_pending_allocs(sfb, hwc)); | |
8c069ff4 HB |
475 | } |
476 | ||
477 | ||
69f239ed HB |
478 | /* Number of perf events counting hardware events */ |
479 | static atomic_t num_events; | |
480 | /* Used to avoid races in calling reserve/release_cpumf_hardware */ | |
481 | static DEFINE_MUTEX(pmc_reserve_mutex); | |
482 | ||
8c069ff4 HB |
483 | #define PMC_INIT 0 |
484 | #define PMC_RELEASE 1 | |
e28bb79d | 485 | #define PMC_FAILURE 2 |
8c069ff4 HB |
486 | static void setup_pmc_cpu(void *flags) |
487 | { | |
488 | int err; | |
489 | struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf); | |
490 | ||
8c069ff4 HB |
491 | err = 0; |
492 | switch (*((int *) flags)) { | |
493 | case PMC_INIT: | |
494 | memset(cpusf, 0, sizeof(*cpusf)); | |
495 | err = qsi(&cpusf->qsi); | |
496 | if (err) | |
497 | break; | |
498 | cpusf->flags |= PMU_F_RESERVED; | |
499 | err = sf_disable(); | |
500 | if (err) | |
501 | pr_err("Switching off the sampling facility failed " | |
502 | "with rc=%i\n", err); | |
503 | debug_sprintf_event(sfdbg, 5, | |
504 | "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); | |
505 | break; | |
506 | case PMC_RELEASE: | |
507 | cpusf->flags &= ~PMU_F_RESERVED; | |
508 | err = sf_disable(); | |
509 | if (err) { | |
510 | pr_err("Switching off the sampling facility failed " | |
511 | "with rc=%i\n", err); | |
512 | } else { | |
513 | if (cpusf->sfb.sdbt) | |
514 | free_sampling_buffer(&cpusf->sfb); | |
515 | } | |
516 | debug_sprintf_event(sfdbg, 5, | |
517 | "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); | |
518 | break; | |
519 | } | |
e28bb79d HB |
520 | if (err) |
521 | *((int *) flags) |= PMC_FAILURE; | |
8c069ff4 HB |
522 | } |
523 | ||
524 | static void release_pmc_hardware(void) | |
525 | { | |
526 | int flags = PMC_RELEASE; | |
527 | ||
528 | irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); | |
529 | on_each_cpu(setup_pmc_cpu, &flags, 1); | |
e28bb79d | 530 | perf_release_sampling(); |
8c069ff4 HB |
531 | } |
532 | ||
533 | static int reserve_pmc_hardware(void) | |
534 | { | |
535 | int flags = PMC_INIT; | |
e28bb79d | 536 | int err; |
8c069ff4 | 537 | |
e28bb79d HB |
538 | err = perf_reserve_sampling(); |
539 | if (err) | |
540 | return err; | |
8c069ff4 | 541 | on_each_cpu(setup_pmc_cpu, &flags, 1); |
e28bb79d HB |
542 | if (flags & PMC_FAILURE) { |
543 | release_pmc_hardware(); | |
544 | return -ENODEV; | |
545 | } | |
8c069ff4 HB |
546 | irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); |
547 | ||
548 | return 0; | |
549 | } | |
550 | ||
551 | static void hw_perf_event_destroy(struct perf_event *event) | |
552 | { | |
553 | /* Release PMC if this is the last perf event */ | |
554 | if (!atomic_add_unless(&num_events, -1, 1)) { | |
555 | mutex_lock(&pmc_reserve_mutex); | |
556 | if (atomic_dec_return(&num_events) == 0) | |
557 | release_pmc_hardware(); | |
558 | mutex_unlock(&pmc_reserve_mutex); | |
559 | } | |
560 | } | |
561 | ||
562 | static void hw_init_period(struct hw_perf_event *hwc, u64 period) | |
563 | { | |
564 | hwc->sample_period = period; | |
565 | hwc->last_period = hwc->sample_period; | |
566 | local64_set(&hwc->period_left, hwc->sample_period); | |
567 | } | |
568 | ||
569 | static void hw_reset_registers(struct hw_perf_event *hwc, | |
69f239ed | 570 | unsigned long *sdbt_origin) |
8c069ff4 | 571 | { |
69f239ed HB |
572 | /* (Re)set to first sample-data-block-table */ |
573 | TEAR_REG(hwc) = (unsigned long) sdbt_origin; | |
8c069ff4 HB |
574 | } |
575 | ||
576 | static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, | |
577 | unsigned long rate) | |
578 | { | |
69f239ed HB |
579 | return clamp_t(unsigned long, rate, |
580 | si->min_sampl_rate, si->max_sampl_rate); | |
8c069ff4 HB |
581 | } |
582 | ||
583 | static int __hw_perf_event_init(struct perf_event *event) | |
584 | { | |
585 | struct cpu_hw_sf *cpuhw; | |
586 | struct hws_qsi_info_block si; | |
587 | struct perf_event_attr *attr = &event->attr; | |
588 | struct hw_perf_event *hwc = &event->hw; | |
589 | unsigned long rate; | |
590 | int cpu, err; | |
591 | ||
592 | /* Reserve CPU-measurement sampling facility */ | |
593 | err = 0; | |
594 | if (!atomic_inc_not_zero(&num_events)) { | |
595 | mutex_lock(&pmc_reserve_mutex); | |
596 | if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) | |
597 | err = -EBUSY; | |
598 | else | |
599 | atomic_inc(&num_events); | |
600 | mutex_unlock(&pmc_reserve_mutex); | |
601 | } | |
602 | event->destroy = hw_perf_event_destroy; | |
603 | ||
604 | if (err) | |
605 | goto out; | |
606 | ||
607 | /* Access per-CPU sampling information (query sampling info) */ | |
608 | /* | |
609 | * The event->cpu value can be -1 to count on every CPU, for example, | |
610 | * when attaching to a task. If this is specified, use the query | |
611 | * sampling info from the current CPU, otherwise use event->cpu to | |
612 | * retrieve the per-CPU information. | |
613 | * Later, cpuhw indicates whether to allocate sampling buffers for a | |
614 | * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL). | |
615 | */ | |
616 | memset(&si, 0, sizeof(si)); | |
617 | cpuhw = NULL; | |
618 | if (event->cpu == -1) | |
619 | qsi(&si); | |
620 | else { | |
621 | /* Event is pinned to a particular CPU, retrieve the per-CPU | |
622 | * sampling structure for accessing the CPU-specific QSI. | |
623 | */ | |
624 | cpuhw = &per_cpu(cpu_hw_sf, event->cpu); | |
625 | si = cpuhw->qsi; | |
626 | } | |
627 | ||
628 | /* Check sampling facility authorization and, if not authorized, | |
629 | * fall back to other PMUs. It is safe to check any CPU because | |
630 | * the authorization is identical for all configured CPUs. | |
631 | */ | |
632 | if (!si.as) { | |
633 | err = -ENOENT; | |
634 | goto out; | |
635 | } | |
636 | ||
637 | /* The sampling information (si) contains information about the | |
638 | * min/max sampling intervals and the CPU speed. So calculate the | |
639 | * correct sampling interval and avoid the whole period adjust | |
640 | * feedback loop. | |
641 | */ | |
642 | rate = 0; | |
643 | if (attr->freq) { | |
644 | rate = freq_to_sample_rate(&si, attr->sample_freq); | |
645 | rate = hw_limit_rate(&si, rate); | |
646 | attr->freq = 0; | |
647 | attr->sample_period = rate; | |
648 | } else { | |
649 | /* The min/max sampling rates specifies the valid range | |
650 | * of sample periods. If the specified sample period is | |
651 | * out of range, limit the period to the range boundary. | |
652 | */ | |
653 | rate = hw_limit_rate(&si, hwc->sample_period); | |
654 | ||
655 | /* The perf core maintains a maximum sample rate that is | |
656 | * configurable through the sysctl interface. Ensure the | |
657 | * sampling rate does not exceed this value. This also helps | |
658 | * to avoid throttling when pushing samples with | |
659 | * perf_event_overflow(). | |
660 | */ | |
661 | if (sample_rate_to_freq(&si, rate) > | |
662 | sysctl_perf_event_sample_rate) { | |
663 | err = -EINVAL; | |
664 | debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); | |
665 | goto out; | |
666 | } | |
667 | } | |
668 | SAMPL_RATE(hwc) = rate; | |
669 | hw_init_period(hwc, SAMPL_RATE(hwc)); | |
670 | ||
69f239ed HB |
671 | /* Initialize sample data overflow accounting */ |
672 | hwc->extra_reg.reg = REG_OVERFLOW; | |
673 | OVERFLOW_REG(hwc) = 0; | |
674 | ||
8c069ff4 HB |
675 | /* Allocate the per-CPU sampling buffer using the CPU information |
676 | * from the event. If the event is not pinned to a particular | |
677 | * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling | |
678 | * buffers for each online CPU. | |
679 | */ | |
680 | if (cpuhw) | |
681 | /* Event is pinned to a particular CPU */ | |
682 | err = allocate_sdbt(cpuhw, hwc); | |
683 | else { | |
684 | /* Event is not pinned, allocate sampling buffer on | |
685 | * each online CPU | |
686 | */ | |
687 | for_each_online_cpu(cpu) { | |
688 | cpuhw = &per_cpu(cpu_hw_sf, cpu); | |
689 | err = allocate_sdbt(cpuhw, hwc); | |
690 | if (err) | |
691 | break; | |
692 | } | |
693 | } | |
694 | out: | |
695 | return err; | |
696 | } | |
697 | ||
698 | static int cpumsf_pmu_event_init(struct perf_event *event) | |
699 | { | |
700 | int err; | |
701 | ||
55baa2f8 HB |
702 | /* No support for taken branch sampling */ |
703 | if (has_branch_stack(event)) | |
704 | return -EOPNOTSUPP; | |
705 | ||
706 | switch (event->attr.type) { | |
707 | case PERF_TYPE_RAW: | |
708 | if (event->attr.config != PERF_EVENT_CPUM_SF) | |
709 | return -ENOENT; | |
710 | break; | |
711 | case PERF_TYPE_HARDWARE: | |
712 | /* Support sampling of CPU cycles in addition to the | |
713 | * counter facility. However, the counter facility | |
714 | * is more precise and, hence, restrict this PMU to | |
715 | * sampling events only. | |
716 | */ | |
717 | if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES) | |
718 | return -ENOENT; | |
719 | if (!is_sampling_event(event)) | |
720 | return -ENOENT; | |
721 | break; | |
722 | default: | |
8c069ff4 | 723 | return -ENOENT; |
55baa2f8 | 724 | } |
8c069ff4 | 725 | |
dd127b3b | 726 | /* Check online status of the CPU to which the event is pinned */ |
8c069ff4 HB |
727 | if (event->cpu >= nr_cpumask_bits || |
728 | (event->cpu >= 0 && !cpu_online(event->cpu))) | |
729 | return -ENODEV; | |
730 | ||
dd127b3b HB |
731 | /* Force reset of idle/hv excludes regardless of what the |
732 | * user requested. | |
733 | */ | |
734 | if (event->attr.exclude_hv) | |
735 | event->attr.exclude_hv = 0; | |
736 | if (event->attr.exclude_idle) | |
737 | event->attr.exclude_idle = 0; | |
738 | ||
8c069ff4 HB |
739 | err = __hw_perf_event_init(event); |
740 | if (unlikely(err)) | |
741 | if (event->destroy) | |
742 | event->destroy(event); | |
743 | return err; | |
744 | } | |
745 | ||
746 | static void cpumsf_pmu_enable(struct pmu *pmu) | |
747 | { | |
748 | struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); | |
69f239ed | 749 | struct hw_perf_event *hwc; |
8c069ff4 HB |
750 | int err; |
751 | ||
752 | if (cpuhw->flags & PMU_F_ENABLED) | |
753 | return; | |
754 | ||
755 | if (cpuhw->flags & PMU_F_ERR_MASK) | |
756 | return; | |
757 | ||
69f239ed HB |
758 | /* Check whether to extent the sampling buffer. |
759 | * | |
760 | * Two conditions trigger an increase of the sampling buffer for a | |
761 | * perf event: | |
762 | * 1. Postponed buffer allocations from the event initialization. | |
763 | * 2. Sampling overflows that contribute to pending allocations. | |
764 | * | |
765 | * Note that the extend_sampling_buffer() function disables the sampling | |
766 | * facility, but it can be fully re-enabled using sampling controls that | |
767 | * have been saved in cpumsf_pmu_disable(). | |
768 | */ | |
769 | if (cpuhw->event) { | |
770 | hwc = &cpuhw->event->hw; | |
771 | /* Account number of overflow-designated buffer extents */ | |
772 | sfb_account_overflows(cpuhw, hwc); | |
773 | if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) | |
774 | extend_sampling_buffer(&cpuhw->sfb, hwc); | |
775 | } | |
776 | ||
777 | /* (Re)enable the PMU and sampling facility */ | |
8c069ff4 HB |
778 | cpuhw->flags |= PMU_F_ENABLED; |
779 | barrier(); | |
780 | ||
781 | err = lsctl(&cpuhw->lsctl); | |
782 | if (err) { | |
783 | cpuhw->flags &= ~PMU_F_ENABLED; | |
784 | pr_err("Loading sampling controls failed: op=%i err=%i\n", | |
785 | 1, err); | |
786 | return; | |
787 | } | |
788 | ||
789 | debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n", | |
790 | cpuhw->lsctl.es, cpuhw->lsctl.cs, | |
791 | (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); | |
792 | } | |
793 | ||
794 | static void cpumsf_pmu_disable(struct pmu *pmu) | |
795 | { | |
796 | struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); | |
797 | struct hws_lsctl_request_block inactive; | |
798 | struct hws_qsi_info_block si; | |
799 | int err; | |
800 | ||
801 | if (!(cpuhw->flags & PMU_F_ENABLED)) | |
802 | return; | |
803 | ||
804 | if (cpuhw->flags & PMU_F_ERR_MASK) | |
805 | return; | |
806 | ||
807 | /* Switch off sampling activation control */ | |
808 | inactive = cpuhw->lsctl; | |
809 | inactive.cs = 0; | |
810 | ||
811 | err = lsctl(&inactive); | |
812 | if (err) { | |
813 | pr_err("Loading sampling controls failed: op=%i err=%i\n", | |
814 | 2, err); | |
815 | return; | |
816 | } | |
817 | ||
818 | /* Save state of TEAR and DEAR register contents */ | |
819 | if (!qsi(&si)) { | |
820 | /* TEAR/DEAR values are valid only if the sampling facility is | |
821 | * enabled. Note that cpumsf_pmu_disable() might be called even | |
822 | * for a disabled sampling facility because cpumsf_pmu_enable() | |
823 | * controls the enable/disable state. | |
824 | */ | |
825 | if (si.es) { | |
826 | cpuhw->lsctl.tear = si.tear; | |
827 | cpuhw->lsctl.dear = si.dear; | |
828 | } | |
829 | } else | |
830 | debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " | |
831 | "qsi() failed with err=%i\n", err); | |
832 | ||
833 | cpuhw->flags &= ~PMU_F_ENABLED; | |
834 | } | |
835 | ||
dd127b3b HB |
836 | /* perf_exclude_event() - Filter event |
837 | * @event: The perf event | |
838 | * @regs: pt_regs structure | |
839 | * @sde_regs: Sample-data-entry (sde) regs structure | |
840 | * | |
841 | * Filter perf events according to their exclude specification. | |
842 | * | |
843 | * Return non-zero if the event shall be excluded. | |
844 | */ | |
845 | static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, | |
846 | struct perf_sf_sde_regs *sde_regs) | |
847 | { | |
848 | if (event->attr.exclude_user && user_mode(regs)) | |
849 | return 1; | |
850 | if (event->attr.exclude_kernel && !user_mode(regs)) | |
851 | return 1; | |
852 | if (event->attr.exclude_guest && sde_regs->in_guest) | |
853 | return 1; | |
854 | if (event->attr.exclude_host && !sde_regs->in_guest) | |
855 | return 1; | |
856 | return 0; | |
857 | } | |
858 | ||
8c069ff4 HB |
859 | /* perf_push_sample() - Push samples to perf |
860 | * @event: The perf event | |
861 | * @sample: Hardware sample data | |
862 | * | |
863 | * Use the hardware sample data to create perf event sample. The sample | |
864 | * is the pushed to the event subsystem and the function checks for | |
865 | * possible event overflows. If an event overflow occurs, the PMU is | |
866 | * stopped. | |
867 | * | |
868 | * Return non-zero if an event overflow occurred. | |
869 | */ | |
870 | static int perf_push_sample(struct perf_event *event, | |
871 | struct hws_data_entry *sample) | |
872 | { | |
873 | int overflow; | |
874 | struct pt_regs regs; | |
443e802b | 875 | struct perf_sf_sde_regs *sde_regs; |
8c069ff4 HB |
876 | struct perf_sample_data data; |
877 | ||
878 | /* Skip samples that are invalid or for which the instruction address | |
879 | * is not predictable. For the latter, the wait-state bit is set. | |
880 | */ | |
881 | if (sample->I || sample->W) | |
882 | return 0; | |
883 | ||
884 | perf_sample_data_init(&data, 0, event->hw.last_period); | |
885 | ||
443e802b HB |
886 | /* Setup pt_regs to look like an CPU-measurement external interrupt |
887 | * using the Program Request Alert code. The regs.int_parm_long | |
888 | * field which is unused contains additional sample-data-entry related | |
889 | * indicators. | |
890 | */ | |
8c069ff4 | 891 | memset(®s, 0, sizeof(regs)); |
443e802b HB |
892 | regs.int_code = 0x1407; |
893 | regs.int_parm = CPU_MF_INT_SF_PRA; | |
894 | sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; | |
895 | ||
8c069ff4 HB |
896 | regs.psw.addr = sample->ia; |
897 | if (sample->T) | |
898 | regs.psw.mask |= PSW_MASK_DAT; | |
899 | if (sample->W) | |
900 | regs.psw.mask |= PSW_MASK_WAIT; | |
901 | if (sample->P) | |
902 | regs.psw.mask |= PSW_MASK_PSTATE; | |
903 | switch (sample->AS) { | |
904 | case 0x0: | |
905 | regs.psw.mask |= PSW_ASC_PRIMARY; | |
906 | break; | |
907 | case 0x1: | |
908 | regs.psw.mask |= PSW_ASC_ACCREG; | |
909 | break; | |
910 | case 0x2: | |
911 | regs.psw.mask |= PSW_ASC_SECONDARY; | |
912 | break; | |
913 | case 0x3: | |
914 | regs.psw.mask |= PSW_ASC_HOME; | |
915 | break; | |
916 | } | |
917 | ||
443e802b HB |
918 | /* The host-program-parameter (hpp) contains the sie control |
919 | * block that is set by sie64a() in entry64.S. Check if hpp | |
920 | * refers to a valid control block and set sde_regs flags | |
921 | * accordingly. This would allow to use hpp values for other | |
922 | * purposes too. | |
923 | * For now, simply use a non-zero value as guest indicator. | |
924 | */ | |
925 | if (sample->hpp) | |
926 | sde_regs->in_guest = 1; | |
927 | ||
8c069ff4 | 928 | overflow = 0; |
dd127b3b HB |
929 | if (perf_exclude_event(event, ®s, sde_regs)) |
930 | goto out; | |
8c069ff4 HB |
931 | if (perf_event_overflow(event, &data, ®s)) { |
932 | overflow = 1; | |
933 | event->pmu->stop(event, 0); | |
8c069ff4 HB |
934 | } |
935 | perf_event_update_userpage(event); | |
dd127b3b | 936 | out: |
8c069ff4 HB |
937 | return overflow; |
938 | } | |
939 | ||
940 | static void perf_event_count_update(struct perf_event *event, u64 count) | |
941 | { | |
942 | local64_add(count, &event->count); | |
943 | } | |
944 | ||
945 | /* hw_collect_samples() - Walk through a sample-data-block and collect samples | |
946 | * @event: The perf event | |
947 | * @sdbt: Sample-data-block table | |
948 | * @overflow: Event overflow counter | |
949 | * | |
950 | * Walks through a sample-data-block and collects hardware sample-data that is | |
951 | * pushed to the perf event subsystem. The overflow reports the number of | |
952 | * samples that has been discarded due to an event overflow. | |
953 | */ | |
954 | static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, | |
955 | unsigned long long *overflow) | |
956 | { | |
957 | struct hws_data_entry *sample; | |
958 | unsigned long *trailer; | |
959 | ||
960 | trailer = trailer_entry_ptr(*sdbt); | |
961 | sample = (struct hws_data_entry *) *sdbt; | |
962 | while ((unsigned long *) sample < trailer) { | |
963 | /* Check for an empty sample */ | |
964 | if (!sample->def) | |
965 | break; | |
966 | ||
967 | /* Update perf event period */ | |
968 | perf_event_count_update(event, SAMPL_RATE(&event->hw)); | |
969 | ||
970 | /* Check for basic sampling mode */ | |
971 | if (sample->def == 0x0001) { | |
972 | /* If an event overflow occurred, the PMU is stopped to | |
973 | * throttle event delivery. Remaining sample data is | |
974 | * discarded. | |
975 | */ | |
976 | if (!*overflow) | |
977 | *overflow = perf_push_sample(event, sample); | |
978 | else | |
979 | /* Count discarded samples */ | |
980 | *overflow += 1; | |
981 | } else | |
982 | /* Sample slot is not yet written or other record */ | |
983 | debug_sprintf_event(sfdbg, 5, "hw_collect_samples: " | |
984 | "Unknown sample data entry format:" | |
985 | " %i\n", sample->def); | |
986 | ||
987 | /* Reset sample slot and advance to next sample */ | |
988 | sample->def = 0; | |
989 | sample++; | |
990 | } | |
991 | } | |
992 | ||
993 | /* hw_perf_event_update() - Process sampling buffer | |
994 | * @event: The perf event | |
995 | * @flush_all: Flag to also flush partially filled sample-data-blocks | |
996 | * | |
997 | * Processes the sampling buffer and create perf event samples. | |
998 | * The sampling buffer position are retrieved and saved in the TEAR_REG | |
999 | * register of the specified perf event. | |
1000 | * | |
1001 | * Only full sample-data-blocks are processed. Specify the flash_all flag | |
1002 | * to also walk through partially filled sample-data-blocks. | |
1003 | * | |
1004 | */ | |
1005 | static void hw_perf_event_update(struct perf_event *event, int flush_all) | |
1006 | { | |
1007 | struct hw_perf_event *hwc = &event->hw; | |
1008 | struct hws_trailer_entry *te; | |
1009 | unsigned long *sdbt; | |
fcc77f50 | 1010 | unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; |
8c069ff4 HB |
1011 | int done; |
1012 | ||
1013 | sdbt = (unsigned long *) TEAR_REG(hwc); | |
69f239ed | 1014 | done = event_overflow = sampl_overflow = num_sdb = 0; |
8c069ff4 HB |
1015 | while (!done) { |
1016 | /* Get the trailer entry of the sample-data-block */ | |
1017 | te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); | |
1018 | ||
1019 | /* Leave loop if no more work to do (block full indicator) */ | |
1020 | if (!te->f) { | |
1021 | done = 1; | |
1022 | if (!flush_all) | |
1023 | break; | |
1024 | } | |
1025 | ||
69f239ed HB |
1026 | /* Check the sample overflow count */ |
1027 | if (te->overflow) | |
1028 | /* Account sample overflows and, if a particular limit | |
1029 | * is reached, extend the sampling buffer. | |
1030 | * For details, see sfb_account_overflows(). | |
8c069ff4 | 1031 | */ |
69f239ed | 1032 | sampl_overflow += te->overflow; |
8c069ff4 HB |
1033 | |
1034 | /* Timestamps are valid for full sample-data-blocks only */ | |
1035 | debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " | |
1036 | "overflow=%llu timestamp=0x%llx\n", | |
1037 | sdbt, te->overflow, | |
443d4beb | 1038 | (te->f) ? trailer_timestamp(te) : 0ULL); |
8c069ff4 HB |
1039 | |
1040 | /* Collect all samples from a single sample-data-block and | |
1041 | * flag if an (perf) event overflow happened. If so, the PMU | |
1042 | * is stopped and remaining samples will be discarded. | |
1043 | */ | |
1044 | hw_collect_samples(event, sdbt, &event_overflow); | |
69f239ed | 1045 | num_sdb++; |
8c069ff4 | 1046 | |
fcc77f50 HB |
1047 | /* Reset trailer (using compare-double-and-swap) */ |
1048 | do { | |
1049 | te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; | |
1050 | te_flags |= SDB_TE_ALERT_REQ_MASK; | |
1051 | } while (!cmpxchg_double(&te->flags, &te->overflow, | |
1052 | te->flags, te->overflow, | |
1053 | te_flags, 0ULL)); | |
8c069ff4 HB |
1054 | |
1055 | /* Advance to next sample-data-block */ | |
1056 | sdbt++; | |
1057 | if (is_link_entry(sdbt)) | |
1058 | sdbt = get_next_sdbt(sdbt); | |
1059 | ||
1060 | /* Update event hardware registers */ | |
1061 | TEAR_REG(hwc) = (unsigned long) sdbt; | |
1062 | ||
1063 | /* Stop processing sample-data if all samples of the current | |
1064 | * sample-data-block were flushed even if it was not full. | |
1065 | */ | |
1066 | if (flush_all && done) | |
1067 | break; | |
1068 | ||
1069 | /* If an event overflow happened, discard samples by | |
1070 | * processing any remaining sample-data-blocks. | |
1071 | */ | |
1072 | if (event_overflow) | |
1073 | flush_all = 1; | |
1074 | } | |
1075 | ||
69f239ed HB |
1076 | /* Account sample overflows in the event hardware structure */ |
1077 | if (sampl_overflow) | |
1078 | OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + | |
1079 | sampl_overflow, 1 + num_sdb); | |
8c069ff4 HB |
1080 | if (sampl_overflow || event_overflow) |
1081 | debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " | |
1082 | "overflow stats: sample=%llu event=%llu\n", | |
1083 | sampl_overflow, event_overflow); | |
1084 | } | |
1085 | ||
1086 | static void cpumsf_pmu_read(struct perf_event *event) | |
1087 | { | |
1088 | /* Nothing to do ... updates are interrupt-driven */ | |
1089 | } | |
1090 | ||
1091 | /* Activate sampling control. | |
1092 | * Next call of pmu_enable() starts sampling. | |
1093 | */ | |
1094 | static void cpumsf_pmu_start(struct perf_event *event, int flags) | |
1095 | { | |
1096 | struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); | |
1097 | ||
1098 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | |
1099 | return; | |
1100 | ||
1101 | if (flags & PERF_EF_RELOAD) | |
1102 | WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); | |
1103 | ||
1104 | perf_pmu_disable(event->pmu); | |
1105 | event->hw.state = 0; | |
1106 | cpuhw->lsctl.cs = 1; | |
1107 | perf_pmu_enable(event->pmu); | |
1108 | } | |
1109 | ||
1110 | /* Deactivate sampling control. | |
1111 | * Next call of pmu_enable() stops sampling. | |
1112 | */ | |
1113 | static void cpumsf_pmu_stop(struct perf_event *event, int flags) | |
1114 | { | |
1115 | struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); | |
1116 | ||
1117 | if (event->hw.state & PERF_HES_STOPPED) | |
1118 | return; | |
1119 | ||
1120 | perf_pmu_disable(event->pmu); | |
1121 | cpuhw->lsctl.cs = 0; | |
1122 | event->hw.state |= PERF_HES_STOPPED; | |
1123 | ||
1124 | if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { | |
1125 | hw_perf_event_update(event, 1); | |
1126 | event->hw.state |= PERF_HES_UPTODATE; | |
1127 | } | |
1128 | perf_pmu_enable(event->pmu); | |
1129 | } | |
1130 | ||
1131 | static int cpumsf_pmu_add(struct perf_event *event, int flags) | |
1132 | { | |
1133 | struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); | |
1134 | int err; | |
1135 | ||
1136 | if (cpuhw->flags & PMU_F_IN_USE) | |
1137 | return -EAGAIN; | |
1138 | ||
1139 | if (!cpuhw->sfb.sdbt) | |
1140 | return -EINVAL; | |
1141 | ||
1142 | err = 0; | |
1143 | perf_pmu_disable(event->pmu); | |
1144 | ||
1145 | event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | |
1146 | ||
1147 | /* Set up sampling controls. Always program the sampling register | |
1148 | * using the SDB-table start. Reset TEAR_REG event hardware register | |
1149 | * that is used by hw_perf_event_update() to store the sampling buffer | |
1150 | * position after samples have been flushed. | |
1151 | */ | |
1152 | cpuhw->lsctl.s = 0; | |
1153 | cpuhw->lsctl.h = 1; | |
69f239ed | 1154 | cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; |
8c069ff4 HB |
1155 | cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; |
1156 | cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); | |
1157 | hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); | |
1158 | ||
1159 | /* Ensure sampling functions are in the disabled state. If disabled, | |
1160 | * switch on sampling enable control. */ | |
1161 | if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) { | |
1162 | err = -EAGAIN; | |
1163 | goto out; | |
1164 | } | |
1165 | cpuhw->lsctl.es = 1; | |
1166 | ||
1167 | /* Set in_use flag and store event */ | |
1168 | event->hw.idx = 0; /* only one sampling event per CPU supported */ | |
1169 | cpuhw->event = event; | |
1170 | cpuhw->flags |= PMU_F_IN_USE; | |
1171 | ||
1172 | if (flags & PERF_EF_START) | |
1173 | cpumsf_pmu_start(event, PERF_EF_RELOAD); | |
1174 | out: | |
1175 | perf_event_update_userpage(event); | |
1176 | perf_pmu_enable(event->pmu); | |
1177 | return err; | |
1178 | } | |
1179 | ||
1180 | static void cpumsf_pmu_del(struct perf_event *event, int flags) | |
1181 | { | |
1182 | struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); | |
1183 | ||
1184 | perf_pmu_disable(event->pmu); | |
1185 | cpumsf_pmu_stop(event, PERF_EF_UPDATE); | |
1186 | ||
1187 | cpuhw->lsctl.es = 0; | |
1188 | cpuhw->flags &= ~PMU_F_IN_USE; | |
1189 | cpuhw->event = NULL; | |
1190 | ||
1191 | perf_event_update_userpage(event); | |
1192 | perf_pmu_enable(event->pmu); | |
1193 | } | |
1194 | ||
1195 | static int cpumsf_pmu_event_idx(struct perf_event *event) | |
1196 | { | |
1197 | return event->hw.idx; | |
1198 | } | |
1199 | ||
1200 | CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); | |
1201 | ||
1202 | static struct attribute *cpumsf_pmu_events_attr[] = { | |
1203 | CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), | |
1204 | NULL, | |
1205 | }; | |
1206 | ||
1207 | PMU_FORMAT_ATTR(event, "config:0-63"); | |
1208 | ||
1209 | static struct attribute *cpumsf_pmu_format_attr[] = { | |
1210 | &format_attr_event.attr, | |
1211 | NULL, | |
1212 | }; | |
1213 | ||
1214 | static struct attribute_group cpumsf_pmu_events_group = { | |
1215 | .name = "events", | |
1216 | .attrs = cpumsf_pmu_events_attr, | |
1217 | }; | |
1218 | static struct attribute_group cpumsf_pmu_format_group = { | |
1219 | .name = "format", | |
1220 | .attrs = cpumsf_pmu_format_attr, | |
1221 | }; | |
1222 | static const struct attribute_group *cpumsf_pmu_attr_groups[] = { | |
1223 | &cpumsf_pmu_events_group, | |
1224 | &cpumsf_pmu_format_group, | |
1225 | NULL, | |
1226 | }; | |
1227 | ||
1228 | static struct pmu cpumf_sampling = { | |
1229 | .pmu_enable = cpumsf_pmu_enable, | |
1230 | .pmu_disable = cpumsf_pmu_disable, | |
1231 | ||
1232 | .event_init = cpumsf_pmu_event_init, | |
1233 | .add = cpumsf_pmu_add, | |
1234 | .del = cpumsf_pmu_del, | |
1235 | ||
1236 | .start = cpumsf_pmu_start, | |
1237 | .stop = cpumsf_pmu_stop, | |
1238 | .read = cpumsf_pmu_read, | |
1239 | ||
1240 | .event_idx = cpumsf_pmu_event_idx, | |
1241 | .attr_groups = cpumsf_pmu_attr_groups, | |
1242 | }; | |
1243 | ||
1244 | static void cpumf_measurement_alert(struct ext_code ext_code, | |
1245 | unsigned int alert, unsigned long unused) | |
1246 | { | |
1247 | struct cpu_hw_sf *cpuhw; | |
1248 | ||
1249 | if (!(alert & CPU_MF_INT_SF_MASK)) | |
1250 | return; | |
1251 | inc_irq_stat(IRQEXT_CMS); | |
1252 | cpuhw = &__get_cpu_var(cpu_hw_sf); | |
1253 | ||
1254 | /* Measurement alerts are shared and might happen when the PMU | |
1255 | * is not reserved. Ignore these alerts in this case. */ | |
1256 | if (!(cpuhw->flags & PMU_F_RESERVED)) | |
1257 | return; | |
1258 | ||
1259 | /* The processing below must take care of multiple alert events that | |
1260 | * might be indicated concurrently. */ | |
1261 | ||
1262 | /* Program alert request */ | |
1263 | if (alert & CPU_MF_INT_SF_PRA) { | |
1264 | if (cpuhw->flags & PMU_F_IN_USE) | |
1265 | hw_perf_event_update(cpuhw->event, 0); | |
1266 | else | |
1267 | WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); | |
1268 | } | |
1269 | ||
1270 | /* Report measurement alerts only for non-PRA codes */ | |
1271 | if (alert != CPU_MF_INT_SF_PRA) | |
1272 | debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); | |
1273 | ||
1274 | /* Sampling authorization change request */ | |
1275 | if (alert & CPU_MF_INT_SF_SACA) | |
1276 | qsi(&cpuhw->qsi); | |
1277 | ||
1278 | /* Loss of sample data due to high-priority machine activities */ | |
1279 | if (alert & CPU_MF_INT_SF_LSDA) { | |
1280 | pr_err("Sample data was lost\n"); | |
1281 | cpuhw->flags |= PMU_F_ERR_LSDA; | |
1282 | sf_disable(); | |
1283 | } | |
1284 | ||
1285 | /* Invalid sampling buffer entry */ | |
1286 | if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { | |
1287 | pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n", | |
1288 | alert); | |
1289 | cpuhw->flags |= PMU_F_ERR_IBE; | |
1290 | sf_disable(); | |
1291 | } | |
1292 | } | |
1293 | ||
1294 | static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, | |
1295 | unsigned long action, void *hcpu) | |
1296 | { | |
1297 | unsigned int cpu = (long) hcpu; | |
1298 | int flags; | |
1299 | ||
1300 | /* Ignore the notification if no events are scheduled on the PMU. | |
1301 | * This might be racy... | |
1302 | */ | |
1303 | if (!atomic_read(&num_events)) | |
1304 | return NOTIFY_OK; | |
1305 | ||
1306 | switch (action & ~CPU_TASKS_FROZEN) { | |
1307 | case CPU_ONLINE: | |
1308 | case CPU_ONLINE_FROZEN: | |
1309 | flags = PMC_INIT; | |
1310 | smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); | |
1311 | break; | |
1312 | case CPU_DOWN_PREPARE: | |
1313 | flags = PMC_RELEASE; | |
1314 | smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); | |
1315 | break; | |
1316 | default: | |
1317 | break; | |
1318 | } | |
1319 | ||
1320 | return NOTIFY_OK; | |
1321 | } | |
1322 | ||
69f239ed HB |
1323 | static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) |
1324 | { | |
1325 | if (!cpum_sf_avail()) | |
1326 | return -ENODEV; | |
1327 | return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); | |
1328 | } | |
1329 | ||
1330 | static int param_set_sfb_size(const char *val, const struct kernel_param *kp) | |
1331 | { | |
1332 | int rc; | |
1333 | unsigned long min, max; | |
1334 | ||
1335 | if (!cpum_sf_avail()) | |
1336 | return -ENODEV; | |
1337 | if (!val || !strlen(val)) | |
1338 | return -EINVAL; | |
1339 | ||
1340 | /* Valid parameter values: "min,max" or "max" */ | |
1341 | min = CPUM_SF_MIN_SDB; | |
1342 | max = CPUM_SF_MAX_SDB; | |
1343 | if (strchr(val, ',')) | |
1344 | rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; | |
1345 | else | |
1346 | rc = kstrtoul(val, 10, &max); | |
1347 | ||
1348 | if (min < 2 || min >= max || max > get_num_physpages()) | |
1349 | rc = -EINVAL; | |
1350 | if (rc) | |
1351 | return rc; | |
1352 | ||
1353 | sfb_set_limits(min, max); | |
1354 | pr_info("Changed sampling buffer settings: min=%lu max=%lu\n", | |
1355 | CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); | |
1356 | return 0; | |
1357 | } | |
1358 | ||
1359 | #define param_check_sfb_size(name, p) __param_check(name, p, void) | |
1360 | static struct kernel_param_ops param_ops_sfb_size = { | |
1361 | .set = param_set_sfb_size, | |
1362 | .get = param_get_sfb_size, | |
1363 | }; | |
1364 | ||
8c069ff4 HB |
1365 | static int __init init_cpum_sampling_pmu(void) |
1366 | { | |
1367 | int err; | |
1368 | ||
1369 | if (!cpum_sf_avail()) | |
1370 | return -ENODEV; | |
1371 | ||
1372 | sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); | |
1373 | if (!sfdbg) | |
1374 | pr_err("Registering for s390dbf failed\n"); | |
1375 | debug_register_view(sfdbg, &debug_sprintf_view); | |
1376 | ||
1377 | err = register_external_interrupt(0x1407, cpumf_measurement_alert); | |
1378 | if (err) { | |
1379 | pr_err("Failed to register for CPU-measurement alerts\n"); | |
1380 | goto out; | |
1381 | } | |
1382 | ||
1383 | err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); | |
1384 | if (err) { | |
1385 | pr_err("Failed to register cpum_sf pmu\n"); | |
1386 | unregister_external_interrupt(0x1407, cpumf_measurement_alert); | |
1387 | goto out; | |
1388 | } | |
1389 | perf_cpu_notifier(cpumf_pmu_notifier); | |
1390 | out: | |
1391 | return err; | |
1392 | } | |
1393 | arch_initcall(init_cpum_sampling_pmu); | |
69f239ed | 1394 | core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); |