s390/cpum_sf: Filter perf events based event->attr.exclude_* settings
[deliverable/linux.git] / arch / s390 / kernel / perf_cpum_sf.c
CommitLineData
8c069ff4
HB
1/*
2 * Performance event support for the System z CPU-measurement Sampling Facility
3 *
4 * Copyright IBM Corp. 2013
5 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 */
11#define KMSG_COMPONENT "cpum_sf"
12#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
13
14#include <linux/kernel.h>
15#include <linux/kernel_stat.h>
16#include <linux/perf_event.h>
17#include <linux/percpu.h>
18#include <linux/notifier.h>
19#include <linux/export.h>
69f239ed
HB
20#include <linux/mm.h>
21#include <linux/moduleparam.h>
8c069ff4
HB
22#include <asm/cpu_mf.h>
23#include <asm/irq.h>
24#include <asm/debug.h>
25#include <asm/timex.h>
26
27/* Minimum number of sample-data-block-tables:
28 * At least one table is required for the sampling buffer structure.
29 * A single table contains up to 511 pointers to sample-data-blocks.
30 */
69f239ed 31#define CPUM_SF_MIN_SDBT 1
8c069ff4 32
69f239ed
HB
33/* Number of sample-data-blocks per sample-data-block-table (SDBT):
34 * The table contains SDB origin (8 bytes) and one SDBT origin that
35 * points to the next table.
8c069ff4 36 */
69f239ed 37#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
8c069ff4 38
69f239ed
HB
39/* Maximum page offset for an SDBT table-link entry:
40 * If this page offset is reached, a table-link entry to the next SDBT
41 * must be added.
42 */
43#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
44static inline int require_table_link(const void *sdbt)
45{
46 return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
47}
48
49/* Minimum and maximum sampling buffer sizes:
50 *
51 * This number represents the maximum size of the sampling buffer
52 * taking the number of sample-data-block-tables into account.
8c069ff4 53 *
69f239ed
HB
54 * Sampling buffer size Buffer characteristics
55 * ---------------------------------------------------
56 * 64KB == 16 pages (4KB per page)
57 * 1 page for SDB-tables
58 * 15 pages for SDBs
59 *
60 * 32MB == 8192 pages (4KB per page)
61 * 16 pages for SDB-tables
62 * 8176 pages for SDBs
8c069ff4 63 */
69f239ed
HB
64static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
65static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
8c069ff4
HB
66
67struct sf_buffer {
69f239ed 68 unsigned long *sdbt; /* Sample-data-block-table origin */
8c069ff4 69 /* buffer characteristics (required for buffer increments) */
69f239ed
HB
70 unsigned long num_sdb; /* Number of sample-data-blocks */
71 unsigned long num_sdbt; /* Number of sample-data-block-tables */
72 unsigned long *tail; /* last sample-data-block-table */
8c069ff4
HB
73};
74
75struct cpu_hw_sf {
76 /* CPU-measurement sampling information block */
77 struct hws_qsi_info_block qsi;
69f239ed 78 /* CPU-measurement sampling control block */
8c069ff4
HB
79 struct hws_lsctl_request_block lsctl;
80 struct sf_buffer sfb; /* Sampling buffer */
81 unsigned int flags; /* Status flags */
82 struct perf_event *event; /* Scheduled perf event */
83};
84static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
85
86/* Debug feature */
87static debug_info_t *sfdbg;
88
69f239ed
HB
89/*
90 * sf_disable() - Switch off sampling facility
91 */
92static int sf_disable(void)
93{
94 struct hws_lsctl_request_block sreq;
95
96 memset(&sreq, 0, sizeof(sreq));
97 return lsctl(&sreq);
98}
99
8c069ff4
HB
100/*
101 * sf_buffer_available() - Check for an allocated sampling buffer
102 */
103static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
104{
69f239ed 105 return !!cpuhw->sfb.sdbt;
8c069ff4
HB
106}
107
108/*
109 * deallocate sampling facility buffer
110 */
111static void free_sampling_buffer(struct sf_buffer *sfb)
112{
69f239ed 113 unsigned long *sdbt, *curr;
8c069ff4
HB
114
115 if (!sfb->sdbt)
116 return;
117
118 sdbt = sfb->sdbt;
69f239ed 119 curr = sdbt;
8c069ff4 120
69f239ed 121 /* Free the SDBT after all SDBs are processed... */
8c069ff4
HB
122 while (1) {
123 if (!*curr || !sdbt)
124 break;
125
69f239ed 126 /* Process table-link entries */
8c069ff4
HB
127 if (is_link_entry(curr)) {
128 curr = get_next_sdbt(curr);
129 if (sdbt)
69f239ed 130 free_page((unsigned long) sdbt);
8c069ff4 131
69f239ed
HB
132 /* If the origin is reached, sampling buffer is freed */
133 if (curr == sfb->sdbt)
8c069ff4
HB
134 break;
135 else
69f239ed 136 sdbt = curr;
8c069ff4 137 } else {
69f239ed 138 /* Process SDB pointer */
8c069ff4
HB
139 if (*curr) {
140 free_page(*curr);
141 curr++;
142 }
143 }
144 }
145
146 debug_sprintf_event(sfdbg, 5,
69f239ed 147 "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
8c069ff4
HB
148 memset(sfb, 0, sizeof(*sfb));
149}
150
69f239ed
HB
151static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
152{
153 unsigned long sdb, *trailer;
154
155 /* Allocate and initialize sample-data-block */
156 sdb = get_zeroed_page(gfp_flags);
157 if (!sdb)
158 return -ENOMEM;
159 trailer = trailer_entry_ptr(sdb);
160 *trailer = SDB_TE_ALERT_REQ_MASK;
161
162 /* Link SDB into the sample-data-block-table */
163 *sdbt = sdb;
164
165 return 0;
166}
167
168/*
169 * realloc_sampling_buffer() - extend sampler memory
170 *
171 * Allocates new sample-data-blocks and adds them to the specified sampling
172 * buffer memory.
173 *
174 * Important: This modifies the sampling buffer and must be called when the
175 * sampling facility is disabled.
176 *
177 * Returns zero on success, non-zero otherwise.
178 */
179static int realloc_sampling_buffer(struct sf_buffer *sfb,
180 unsigned long num_sdb, gfp_t gfp_flags)
181{
182 int i, rc;
183 unsigned long *new, *tail;
184
185 if (!sfb->sdbt || !sfb->tail)
186 return -EINVAL;
187
188 if (!is_link_entry(sfb->tail))
189 return -EINVAL;
190
191 /* Append to the existing sampling buffer, overwriting the table-link
192 * register.
193 * The tail variables always points to the "tail" (last and table-link)
194 * entry in an SDB-table.
195 */
196 tail = sfb->tail;
197
198 /* Do a sanity check whether the table-link entry points to
199 * the sampling buffer origin.
200 */
201 if (sfb->sdbt != get_next_sdbt(tail)) {
202 debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
203 "sampling buffer is not linked: origin=%p"
204 "tail=%p\n",
205 (void *) sfb->sdbt, (void *) tail);
206 return -EINVAL;
207 }
208
209 /* Allocate remaining SDBs */
210 rc = 0;
211 for (i = 0; i < num_sdb; i++) {
212 /* Allocate a new SDB-table if it is full. */
213 if (require_table_link(tail)) {
214 new = (unsigned long *) get_zeroed_page(gfp_flags);
215 if (!new) {
216 rc = -ENOMEM;
217 break;
218 }
219 sfb->num_sdbt++;
220 /* Link current page to tail of chain */
221 *tail = (unsigned long)(void *) new + 1;
222 tail = new;
223 }
224
225 /* Allocate a new sample-data-block.
226 * If there is not enough memory, stop the realloc process
227 * and simply use what was allocated. If this is a temporary
228 * issue, a new realloc call (if required) might succeed.
229 */
230 rc = alloc_sample_data_block(tail, gfp_flags);
231 if (rc)
232 break;
233 sfb->num_sdb++;
234 tail++;
235 }
236
237 /* Link sampling buffer to its origin */
238 *tail = (unsigned long) sfb->sdbt + 1;
239 sfb->tail = tail;
240
241 debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
242 " settings: sdbt=%lu sdb=%lu\n",
243 sfb->num_sdbt, sfb->num_sdb);
244 return rc;
245}
246
8c069ff4
HB
247/*
248 * allocate_sampling_buffer() - allocate sampler memory
249 *
250 * Allocates and initializes a sampling buffer structure using the
251 * specified number of sample-data-blocks (SDB). For each allocation,
252 * a 4K page is used. The number of sample-data-block-tables (SDBT)
253 * are calculated from SDBs.
254 * Also set the ALERT_REQ mask in each SDBs trailer.
255 *
256 * Returns zero on success, non-zero otherwise.
257 */
258static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
259{
69f239ed 260 int rc;
8c069ff4
HB
261
262 if (sfb->sdbt)
263 return -EINVAL;
69f239ed
HB
264
265 /* Allocate the sample-data-block-table origin */
266 sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
267 if (!sfb->sdbt)
268 return -ENOMEM;
8c069ff4 269 sfb->num_sdb = 0;
69f239ed 270 sfb->num_sdbt = 1;
8c069ff4 271
69f239ed
HB
272 /* Link the table origin to point to itself to prepare for
273 * realloc_sampling_buffer() invocation.
274 */
275 sfb->tail = sfb->sdbt;
276 *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
8c069ff4 277
69f239ed
HB
278 /* Allocate requested number of sample-data-blocks */
279 rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
280 if (rc) {
281 free_sampling_buffer(sfb);
282 debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
283 "realloc_sampling_buffer failed with rc=%i\n", rc);
284 } else
285 debug_sprintf_event(sfdbg, 4,
286 "alloc_sampling_buffer: tear=%p dear=%p\n",
287 sfb->sdbt, (void *) *sfb->sdbt);
288 return rc;
289}
8c069ff4 290
69f239ed
HB
291static void sfb_set_limits(unsigned long min, unsigned long max)
292{
293 CPUM_SF_MIN_SDB = min;
294 CPUM_SF_MAX_SDB = max;
295}
8c069ff4 296
69f239ed
HB
297static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
298 struct hw_perf_event *hwc)
299{
300 if (!sfb->sdbt)
301 return SFB_ALLOC_REG(hwc);
302 if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
303 return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
304 return 0;
305}
8c069ff4 306
69f239ed
HB
307static int sfb_has_pending_allocs(struct sf_buffer *sfb,
308 struct hw_perf_event *hwc)
309{
310 return sfb_pending_allocs(sfb, hwc) > 0;
311}
8c069ff4 312
69f239ed
HB
313static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
314{
315 /* Limit the number SDBs to not exceed the maximum */
316 num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc));
317 if (num)
318 SFB_ALLOC_REG(hwc) += num;
8c069ff4
HB
319}
320
69f239ed
HB
321static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
322{
323 SFB_ALLOC_REG(hwc) = 0;
324 sfb_account_allocs(num, hwc);
325}
326
327static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
8c069ff4
HB
328{
329 unsigned long n_sdb, freq;
330 unsigned long factor;
331
332 /* Calculate sampling buffers using 4K pages
333 *
334 * 1. Use frequency as input. The samping buffer is designed for
335 * a complete second. This can be adjusted through the "factor"
336 * variable.
337 * In any case, alloc_sampling_buffer() sets the Alert Request
338 * Control indicator to trigger measurement-alert to harvest
339 * sample-data-blocks (sdb).
340 *
341 * 2. Compute the number of sample-data-blocks and ensure a minimum
342 * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
343 * exceed CPUM_SF_MAX_SDB. See also the remarks for these
344 * symbolic constants.
345 *
346 * 3. Compute number of pages used for the sample-data-block-table
347 * and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table
348 * to manage up to 511 sample-data-blocks).
349 */
350 freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
351 factor = 1;
352 n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes));
353 if (n_sdb < CPUM_SF_MIN_SDB)
354 n_sdb = CPUM_SF_MIN_SDB;
355
69f239ed
HB
356 /* If there is already a sampling buffer allocated, it is very likely
357 * that the sampling facility is enabled too. If the event to be
358 * initialized requires a greater sampling buffer, the allocation must
359 * be postponed. Changing the sampling buffer requires the sampling
360 * facility to be in the disabled state. So, account the number of
361 * required SDBs and let cpumsf_pmu_enable() resize the buffer just
362 * before the event is started.
8c069ff4 363 */
69f239ed 364 sfb_init_allocs(n_sdb, hwc);
8c069ff4
HB
365 if (sf_buffer_available(cpuhw))
366 return 0;
367
368 debug_sprintf_event(sfdbg, 3,
69f239ed 369 "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n",
8c069ff4
HB
370 SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
371
372 return alloc_sampling_buffer(&cpuhw->sfb,
69f239ed 373 sfb_pending_allocs(&cpuhw->sfb, hwc));
8c069ff4
HB
374}
375
69f239ed
HB
376static unsigned long min_percent(unsigned int percent, unsigned long base,
377 unsigned long min)
378{
379 return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
380}
8c069ff4 381
69f239ed
HB
382static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
383{
384 /* Use a percentage-based approach to extend the sampling facility
385 * buffer. Accept up to 5% sample data loss.
386 * Vary the extents between 1% to 5% of the current number of
387 * sample-data-blocks.
388 */
389 if (ratio <= 5)
390 return 0;
391 if (ratio <= 25)
392 return min_percent(1, base, 1);
393 if (ratio <= 50)
394 return min_percent(1, base, 1);
395 if (ratio <= 75)
396 return min_percent(2, base, 2);
397 if (ratio <= 100)
398 return min_percent(3, base, 3);
399 if (ratio <= 250)
400 return min_percent(4, base, 4);
401
402 return min_percent(5, base, 8);
403}
8c069ff4 404
69f239ed
HB
405static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
406 struct hw_perf_event *hwc)
407{
408 unsigned long ratio, num;
409
410 if (!OVERFLOW_REG(hwc))
411 return;
412
413 /* The sample_overflow contains the average number of sample data
414 * that has been lost because sample-data-blocks were full.
415 *
416 * Calculate the total number of sample data entries that has been
417 * discarded. Then calculate the ratio of lost samples to total samples
418 * per second in percent.
419 */
420 ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
421 sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
422
423 /* Compute number of sample-data-blocks */
424 num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
425 if (num)
426 sfb_account_allocs(num, hwc);
427
428 debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
429 " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
430 OVERFLOW_REG(hwc) = 0;
431}
432
433/* extend_sampling_buffer() - Extend sampling buffer
434 * @sfb: Sampling buffer structure (for local CPU)
435 * @hwc: Perf event hardware structure
436 *
437 * Use this function to extend the sampling buffer based on the overflow counter
438 * and postponed allocation extents stored in the specified Perf event hardware.
439 *
440 * Important: This function disables the sampling facility in order to safely
441 * change the sampling buffer structure. Do not call this function
442 * when the PMU is active.
8c069ff4 443 */
69f239ed
HB
444static void extend_sampling_buffer(struct sf_buffer *sfb,
445 struct hw_perf_event *hwc)
8c069ff4 446{
69f239ed
HB
447 unsigned long num, num_old;
448 int rc;
8c069ff4 449
69f239ed
HB
450 num = sfb_pending_allocs(sfb, hwc);
451 if (!num)
452 return;
453 num_old = sfb->num_sdb;
454
455 /* Disable the sampling facility to reset any states and also
456 * clear pending measurement alerts.
457 */
458 sf_disable();
459
460 /* Extend the sampling buffer.
461 * This memory allocation typically happens in an atomic context when
462 * called by perf. Because this is a reallocation, it is fine if the
463 * new SDB-request cannot be satisfied immediately.
464 */
465 rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
466 if (rc)
467 debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
468 "failed with rc=%i\n", rc);
469
470 if (sfb_has_pending_allocs(sfb, hwc))
471 debug_sprintf_event(sfdbg, 5, "sfb: extend: "
472 "req=%lu alloc=%lu remaining=%lu\n",
473 num, sfb->num_sdb - num_old,
474 sfb_pending_allocs(sfb, hwc));
8c069ff4
HB
475}
476
477
69f239ed
HB
478/* Number of perf events counting hardware events */
479static atomic_t num_events;
480/* Used to avoid races in calling reserve/release_cpumf_hardware */
481static DEFINE_MUTEX(pmc_reserve_mutex);
482
8c069ff4
HB
483#define PMC_INIT 0
484#define PMC_RELEASE 1
e28bb79d 485#define PMC_FAILURE 2
8c069ff4
HB
486static void setup_pmc_cpu(void *flags)
487{
488 int err;
489 struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
490
8c069ff4
HB
491 err = 0;
492 switch (*((int *) flags)) {
493 case PMC_INIT:
494 memset(cpusf, 0, sizeof(*cpusf));
495 err = qsi(&cpusf->qsi);
496 if (err)
497 break;
498 cpusf->flags |= PMU_F_RESERVED;
499 err = sf_disable();
500 if (err)
501 pr_err("Switching off the sampling facility failed "
502 "with rc=%i\n", err);
503 debug_sprintf_event(sfdbg, 5,
504 "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
505 break;
506 case PMC_RELEASE:
507 cpusf->flags &= ~PMU_F_RESERVED;
508 err = sf_disable();
509 if (err) {
510 pr_err("Switching off the sampling facility failed "
511 "with rc=%i\n", err);
512 } else {
513 if (cpusf->sfb.sdbt)
514 free_sampling_buffer(&cpusf->sfb);
515 }
516 debug_sprintf_event(sfdbg, 5,
517 "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
518 break;
519 }
e28bb79d
HB
520 if (err)
521 *((int *) flags) |= PMC_FAILURE;
8c069ff4
HB
522}
523
524static void release_pmc_hardware(void)
525{
526 int flags = PMC_RELEASE;
527
528 irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
529 on_each_cpu(setup_pmc_cpu, &flags, 1);
e28bb79d 530 perf_release_sampling();
8c069ff4
HB
531}
532
533static int reserve_pmc_hardware(void)
534{
535 int flags = PMC_INIT;
e28bb79d 536 int err;
8c069ff4 537
e28bb79d
HB
538 err = perf_reserve_sampling();
539 if (err)
540 return err;
8c069ff4 541 on_each_cpu(setup_pmc_cpu, &flags, 1);
e28bb79d
HB
542 if (flags & PMC_FAILURE) {
543 release_pmc_hardware();
544 return -ENODEV;
545 }
8c069ff4
HB
546 irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
547
548 return 0;
549}
550
551static void hw_perf_event_destroy(struct perf_event *event)
552{
553 /* Release PMC if this is the last perf event */
554 if (!atomic_add_unless(&num_events, -1, 1)) {
555 mutex_lock(&pmc_reserve_mutex);
556 if (atomic_dec_return(&num_events) == 0)
557 release_pmc_hardware();
558 mutex_unlock(&pmc_reserve_mutex);
559 }
560}
561
562static void hw_init_period(struct hw_perf_event *hwc, u64 period)
563{
564 hwc->sample_period = period;
565 hwc->last_period = hwc->sample_period;
566 local64_set(&hwc->period_left, hwc->sample_period);
567}
568
569static void hw_reset_registers(struct hw_perf_event *hwc,
69f239ed 570 unsigned long *sdbt_origin)
8c069ff4 571{
69f239ed
HB
572 /* (Re)set to first sample-data-block-table */
573 TEAR_REG(hwc) = (unsigned long) sdbt_origin;
8c069ff4
HB
574}
575
576static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
577 unsigned long rate)
578{
69f239ed
HB
579 return clamp_t(unsigned long, rate,
580 si->min_sampl_rate, si->max_sampl_rate);
8c069ff4
HB
581}
582
583static int __hw_perf_event_init(struct perf_event *event)
584{
585 struct cpu_hw_sf *cpuhw;
586 struct hws_qsi_info_block si;
587 struct perf_event_attr *attr = &event->attr;
588 struct hw_perf_event *hwc = &event->hw;
589 unsigned long rate;
590 int cpu, err;
591
592 /* Reserve CPU-measurement sampling facility */
593 err = 0;
594 if (!atomic_inc_not_zero(&num_events)) {
595 mutex_lock(&pmc_reserve_mutex);
596 if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
597 err = -EBUSY;
598 else
599 atomic_inc(&num_events);
600 mutex_unlock(&pmc_reserve_mutex);
601 }
602 event->destroy = hw_perf_event_destroy;
603
604 if (err)
605 goto out;
606
607 /* Access per-CPU sampling information (query sampling info) */
608 /*
609 * The event->cpu value can be -1 to count on every CPU, for example,
610 * when attaching to a task. If this is specified, use the query
611 * sampling info from the current CPU, otherwise use event->cpu to
612 * retrieve the per-CPU information.
613 * Later, cpuhw indicates whether to allocate sampling buffers for a
614 * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
615 */
616 memset(&si, 0, sizeof(si));
617 cpuhw = NULL;
618 if (event->cpu == -1)
619 qsi(&si);
620 else {
621 /* Event is pinned to a particular CPU, retrieve the per-CPU
622 * sampling structure for accessing the CPU-specific QSI.
623 */
624 cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
625 si = cpuhw->qsi;
626 }
627
628 /* Check sampling facility authorization and, if not authorized,
629 * fall back to other PMUs. It is safe to check any CPU because
630 * the authorization is identical for all configured CPUs.
631 */
632 if (!si.as) {
633 err = -ENOENT;
634 goto out;
635 }
636
637 /* The sampling information (si) contains information about the
638 * min/max sampling intervals and the CPU speed. So calculate the
639 * correct sampling interval and avoid the whole period adjust
640 * feedback loop.
641 */
642 rate = 0;
643 if (attr->freq) {
644 rate = freq_to_sample_rate(&si, attr->sample_freq);
645 rate = hw_limit_rate(&si, rate);
646 attr->freq = 0;
647 attr->sample_period = rate;
648 } else {
649 /* The min/max sampling rates specifies the valid range
650 * of sample periods. If the specified sample period is
651 * out of range, limit the period to the range boundary.
652 */
653 rate = hw_limit_rate(&si, hwc->sample_period);
654
655 /* The perf core maintains a maximum sample rate that is
656 * configurable through the sysctl interface. Ensure the
657 * sampling rate does not exceed this value. This also helps
658 * to avoid throttling when pushing samples with
659 * perf_event_overflow().
660 */
661 if (sample_rate_to_freq(&si, rate) >
662 sysctl_perf_event_sample_rate) {
663 err = -EINVAL;
664 debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
665 goto out;
666 }
667 }
668 SAMPL_RATE(hwc) = rate;
669 hw_init_period(hwc, SAMPL_RATE(hwc));
670
69f239ed
HB
671 /* Initialize sample data overflow accounting */
672 hwc->extra_reg.reg = REG_OVERFLOW;
673 OVERFLOW_REG(hwc) = 0;
674
8c069ff4
HB
675 /* Allocate the per-CPU sampling buffer using the CPU information
676 * from the event. If the event is not pinned to a particular
677 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
678 * buffers for each online CPU.
679 */
680 if (cpuhw)
681 /* Event is pinned to a particular CPU */
682 err = allocate_sdbt(cpuhw, hwc);
683 else {
684 /* Event is not pinned, allocate sampling buffer on
685 * each online CPU
686 */
687 for_each_online_cpu(cpu) {
688 cpuhw = &per_cpu(cpu_hw_sf, cpu);
689 err = allocate_sdbt(cpuhw, hwc);
690 if (err)
691 break;
692 }
693 }
694out:
695 return err;
696}
697
698static int cpumsf_pmu_event_init(struct perf_event *event)
699{
700 int err;
701
55baa2f8
HB
702 /* No support for taken branch sampling */
703 if (has_branch_stack(event))
704 return -EOPNOTSUPP;
705
706 switch (event->attr.type) {
707 case PERF_TYPE_RAW:
708 if (event->attr.config != PERF_EVENT_CPUM_SF)
709 return -ENOENT;
710 break;
711 case PERF_TYPE_HARDWARE:
712 /* Support sampling of CPU cycles in addition to the
713 * counter facility. However, the counter facility
714 * is more precise and, hence, restrict this PMU to
715 * sampling events only.
716 */
717 if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
718 return -ENOENT;
719 if (!is_sampling_event(event))
720 return -ENOENT;
721 break;
722 default:
8c069ff4 723 return -ENOENT;
55baa2f8 724 }
8c069ff4 725
dd127b3b 726 /* Check online status of the CPU to which the event is pinned */
8c069ff4
HB
727 if (event->cpu >= nr_cpumask_bits ||
728 (event->cpu >= 0 && !cpu_online(event->cpu)))
729 return -ENODEV;
730
dd127b3b
HB
731 /* Force reset of idle/hv excludes regardless of what the
732 * user requested.
733 */
734 if (event->attr.exclude_hv)
735 event->attr.exclude_hv = 0;
736 if (event->attr.exclude_idle)
737 event->attr.exclude_idle = 0;
738
8c069ff4
HB
739 err = __hw_perf_event_init(event);
740 if (unlikely(err))
741 if (event->destroy)
742 event->destroy(event);
743 return err;
744}
745
746static void cpumsf_pmu_enable(struct pmu *pmu)
747{
748 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
69f239ed 749 struct hw_perf_event *hwc;
8c069ff4
HB
750 int err;
751
752 if (cpuhw->flags & PMU_F_ENABLED)
753 return;
754
755 if (cpuhw->flags & PMU_F_ERR_MASK)
756 return;
757
69f239ed
HB
758 /* Check whether to extent the sampling buffer.
759 *
760 * Two conditions trigger an increase of the sampling buffer for a
761 * perf event:
762 * 1. Postponed buffer allocations from the event initialization.
763 * 2. Sampling overflows that contribute to pending allocations.
764 *
765 * Note that the extend_sampling_buffer() function disables the sampling
766 * facility, but it can be fully re-enabled using sampling controls that
767 * have been saved in cpumsf_pmu_disable().
768 */
769 if (cpuhw->event) {
770 hwc = &cpuhw->event->hw;
771 /* Account number of overflow-designated buffer extents */
772 sfb_account_overflows(cpuhw, hwc);
773 if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
774 extend_sampling_buffer(&cpuhw->sfb, hwc);
775 }
776
777 /* (Re)enable the PMU and sampling facility */
8c069ff4
HB
778 cpuhw->flags |= PMU_F_ENABLED;
779 barrier();
780
781 err = lsctl(&cpuhw->lsctl);
782 if (err) {
783 cpuhw->flags &= ~PMU_F_ENABLED;
784 pr_err("Loading sampling controls failed: op=%i err=%i\n",
785 1, err);
786 return;
787 }
788
789 debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n",
790 cpuhw->lsctl.es, cpuhw->lsctl.cs,
791 (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
792}
793
794static void cpumsf_pmu_disable(struct pmu *pmu)
795{
796 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
797 struct hws_lsctl_request_block inactive;
798 struct hws_qsi_info_block si;
799 int err;
800
801 if (!(cpuhw->flags & PMU_F_ENABLED))
802 return;
803
804 if (cpuhw->flags & PMU_F_ERR_MASK)
805 return;
806
807 /* Switch off sampling activation control */
808 inactive = cpuhw->lsctl;
809 inactive.cs = 0;
810
811 err = lsctl(&inactive);
812 if (err) {
813 pr_err("Loading sampling controls failed: op=%i err=%i\n",
814 2, err);
815 return;
816 }
817
818 /* Save state of TEAR and DEAR register contents */
819 if (!qsi(&si)) {
820 /* TEAR/DEAR values are valid only if the sampling facility is
821 * enabled. Note that cpumsf_pmu_disable() might be called even
822 * for a disabled sampling facility because cpumsf_pmu_enable()
823 * controls the enable/disable state.
824 */
825 if (si.es) {
826 cpuhw->lsctl.tear = si.tear;
827 cpuhw->lsctl.dear = si.dear;
828 }
829 } else
830 debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
831 "qsi() failed with err=%i\n", err);
832
833 cpuhw->flags &= ~PMU_F_ENABLED;
834}
835
dd127b3b
HB
836/* perf_exclude_event() - Filter event
837 * @event: The perf event
838 * @regs: pt_regs structure
839 * @sde_regs: Sample-data-entry (sde) regs structure
840 *
841 * Filter perf events according to their exclude specification.
842 *
843 * Return non-zero if the event shall be excluded.
844 */
845static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
846 struct perf_sf_sde_regs *sde_regs)
847{
848 if (event->attr.exclude_user && user_mode(regs))
849 return 1;
850 if (event->attr.exclude_kernel && !user_mode(regs))
851 return 1;
852 if (event->attr.exclude_guest && sde_regs->in_guest)
853 return 1;
854 if (event->attr.exclude_host && !sde_regs->in_guest)
855 return 1;
856 return 0;
857}
858
8c069ff4
HB
859/* perf_push_sample() - Push samples to perf
860 * @event: The perf event
861 * @sample: Hardware sample data
862 *
863 * Use the hardware sample data to create perf event sample. The sample
864 * is the pushed to the event subsystem and the function checks for
865 * possible event overflows. If an event overflow occurs, the PMU is
866 * stopped.
867 *
868 * Return non-zero if an event overflow occurred.
869 */
870static int perf_push_sample(struct perf_event *event,
871 struct hws_data_entry *sample)
872{
873 int overflow;
874 struct pt_regs regs;
443e802b 875 struct perf_sf_sde_regs *sde_regs;
8c069ff4
HB
876 struct perf_sample_data data;
877
878 /* Skip samples that are invalid or for which the instruction address
879 * is not predictable. For the latter, the wait-state bit is set.
880 */
881 if (sample->I || sample->W)
882 return 0;
883
884 perf_sample_data_init(&data, 0, event->hw.last_period);
885
443e802b
HB
886 /* Setup pt_regs to look like an CPU-measurement external interrupt
887 * using the Program Request Alert code. The regs.int_parm_long
888 * field which is unused contains additional sample-data-entry related
889 * indicators.
890 */
8c069ff4 891 memset(&regs, 0, sizeof(regs));
443e802b
HB
892 regs.int_code = 0x1407;
893 regs.int_parm = CPU_MF_INT_SF_PRA;
894 sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
895
8c069ff4
HB
896 regs.psw.addr = sample->ia;
897 if (sample->T)
898 regs.psw.mask |= PSW_MASK_DAT;
899 if (sample->W)
900 regs.psw.mask |= PSW_MASK_WAIT;
901 if (sample->P)
902 regs.psw.mask |= PSW_MASK_PSTATE;
903 switch (sample->AS) {
904 case 0x0:
905 regs.psw.mask |= PSW_ASC_PRIMARY;
906 break;
907 case 0x1:
908 regs.psw.mask |= PSW_ASC_ACCREG;
909 break;
910 case 0x2:
911 regs.psw.mask |= PSW_ASC_SECONDARY;
912 break;
913 case 0x3:
914 regs.psw.mask |= PSW_ASC_HOME;
915 break;
916 }
917
443e802b
HB
918 /* The host-program-parameter (hpp) contains the sie control
919 * block that is set by sie64a() in entry64.S. Check if hpp
920 * refers to a valid control block and set sde_regs flags
921 * accordingly. This would allow to use hpp values for other
922 * purposes too.
923 * For now, simply use a non-zero value as guest indicator.
924 */
925 if (sample->hpp)
926 sde_regs->in_guest = 1;
927
8c069ff4 928 overflow = 0;
dd127b3b
HB
929 if (perf_exclude_event(event, &regs, sde_regs))
930 goto out;
8c069ff4
HB
931 if (perf_event_overflow(event, &data, &regs)) {
932 overflow = 1;
933 event->pmu->stop(event, 0);
8c069ff4
HB
934 }
935 perf_event_update_userpage(event);
dd127b3b 936out:
8c069ff4
HB
937 return overflow;
938}
939
940static void perf_event_count_update(struct perf_event *event, u64 count)
941{
942 local64_add(count, &event->count);
943}
944
945/* hw_collect_samples() - Walk through a sample-data-block and collect samples
946 * @event: The perf event
947 * @sdbt: Sample-data-block table
948 * @overflow: Event overflow counter
949 *
950 * Walks through a sample-data-block and collects hardware sample-data that is
951 * pushed to the perf event subsystem. The overflow reports the number of
952 * samples that has been discarded due to an event overflow.
953 */
954static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
955 unsigned long long *overflow)
956{
957 struct hws_data_entry *sample;
958 unsigned long *trailer;
959
960 trailer = trailer_entry_ptr(*sdbt);
961 sample = (struct hws_data_entry *) *sdbt;
962 while ((unsigned long *) sample < trailer) {
963 /* Check for an empty sample */
964 if (!sample->def)
965 break;
966
967 /* Update perf event period */
968 perf_event_count_update(event, SAMPL_RATE(&event->hw));
969
970 /* Check for basic sampling mode */
971 if (sample->def == 0x0001) {
972 /* If an event overflow occurred, the PMU is stopped to
973 * throttle event delivery. Remaining sample data is
974 * discarded.
975 */
976 if (!*overflow)
977 *overflow = perf_push_sample(event, sample);
978 else
979 /* Count discarded samples */
980 *overflow += 1;
981 } else
982 /* Sample slot is not yet written or other record */
983 debug_sprintf_event(sfdbg, 5, "hw_collect_samples: "
984 "Unknown sample data entry format:"
985 " %i\n", sample->def);
986
987 /* Reset sample slot and advance to next sample */
988 sample->def = 0;
989 sample++;
990 }
991}
992
993/* hw_perf_event_update() - Process sampling buffer
994 * @event: The perf event
995 * @flush_all: Flag to also flush partially filled sample-data-blocks
996 *
997 * Processes the sampling buffer and create perf event samples.
998 * The sampling buffer position are retrieved and saved in the TEAR_REG
999 * register of the specified perf event.
1000 *
1001 * Only full sample-data-blocks are processed. Specify the flash_all flag
1002 * to also walk through partially filled sample-data-blocks.
1003 *
1004 */
1005static void hw_perf_event_update(struct perf_event *event, int flush_all)
1006{
1007 struct hw_perf_event *hwc = &event->hw;
1008 struct hws_trailer_entry *te;
1009 unsigned long *sdbt;
fcc77f50 1010 unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
8c069ff4
HB
1011 int done;
1012
1013 sdbt = (unsigned long *) TEAR_REG(hwc);
69f239ed 1014 done = event_overflow = sampl_overflow = num_sdb = 0;
8c069ff4
HB
1015 while (!done) {
1016 /* Get the trailer entry of the sample-data-block */
1017 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
1018
1019 /* Leave loop if no more work to do (block full indicator) */
1020 if (!te->f) {
1021 done = 1;
1022 if (!flush_all)
1023 break;
1024 }
1025
69f239ed
HB
1026 /* Check the sample overflow count */
1027 if (te->overflow)
1028 /* Account sample overflows and, if a particular limit
1029 * is reached, extend the sampling buffer.
1030 * For details, see sfb_account_overflows().
8c069ff4 1031 */
69f239ed 1032 sampl_overflow += te->overflow;
8c069ff4
HB
1033
1034 /* Timestamps are valid for full sample-data-blocks only */
1035 debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
1036 "overflow=%llu timestamp=0x%llx\n",
1037 sdbt, te->overflow,
443d4beb 1038 (te->f) ? trailer_timestamp(te) : 0ULL);
8c069ff4
HB
1039
1040 /* Collect all samples from a single sample-data-block and
1041 * flag if an (perf) event overflow happened. If so, the PMU
1042 * is stopped and remaining samples will be discarded.
1043 */
1044 hw_collect_samples(event, sdbt, &event_overflow);
69f239ed 1045 num_sdb++;
8c069ff4 1046
fcc77f50
HB
1047 /* Reset trailer (using compare-double-and-swap) */
1048 do {
1049 te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
1050 te_flags |= SDB_TE_ALERT_REQ_MASK;
1051 } while (!cmpxchg_double(&te->flags, &te->overflow,
1052 te->flags, te->overflow,
1053 te_flags, 0ULL));
8c069ff4
HB
1054
1055 /* Advance to next sample-data-block */
1056 sdbt++;
1057 if (is_link_entry(sdbt))
1058 sdbt = get_next_sdbt(sdbt);
1059
1060 /* Update event hardware registers */
1061 TEAR_REG(hwc) = (unsigned long) sdbt;
1062
1063 /* Stop processing sample-data if all samples of the current
1064 * sample-data-block were flushed even if it was not full.
1065 */
1066 if (flush_all && done)
1067 break;
1068
1069 /* If an event overflow happened, discard samples by
1070 * processing any remaining sample-data-blocks.
1071 */
1072 if (event_overflow)
1073 flush_all = 1;
1074 }
1075
69f239ed
HB
1076 /* Account sample overflows in the event hardware structure */
1077 if (sampl_overflow)
1078 OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
1079 sampl_overflow, 1 + num_sdb);
8c069ff4
HB
1080 if (sampl_overflow || event_overflow)
1081 debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
1082 "overflow stats: sample=%llu event=%llu\n",
1083 sampl_overflow, event_overflow);
1084}
1085
1086static void cpumsf_pmu_read(struct perf_event *event)
1087{
1088 /* Nothing to do ... updates are interrupt-driven */
1089}
1090
1091/* Activate sampling control.
1092 * Next call of pmu_enable() starts sampling.
1093 */
1094static void cpumsf_pmu_start(struct perf_event *event, int flags)
1095{
1096 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1097
1098 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1099 return;
1100
1101 if (flags & PERF_EF_RELOAD)
1102 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1103
1104 perf_pmu_disable(event->pmu);
1105 event->hw.state = 0;
1106 cpuhw->lsctl.cs = 1;
1107 perf_pmu_enable(event->pmu);
1108}
1109
1110/* Deactivate sampling control.
1111 * Next call of pmu_enable() stops sampling.
1112 */
1113static void cpumsf_pmu_stop(struct perf_event *event, int flags)
1114{
1115 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1116
1117 if (event->hw.state & PERF_HES_STOPPED)
1118 return;
1119
1120 perf_pmu_disable(event->pmu);
1121 cpuhw->lsctl.cs = 0;
1122 event->hw.state |= PERF_HES_STOPPED;
1123
1124 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
1125 hw_perf_event_update(event, 1);
1126 event->hw.state |= PERF_HES_UPTODATE;
1127 }
1128 perf_pmu_enable(event->pmu);
1129}
1130
1131static int cpumsf_pmu_add(struct perf_event *event, int flags)
1132{
1133 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1134 int err;
1135
1136 if (cpuhw->flags & PMU_F_IN_USE)
1137 return -EAGAIN;
1138
1139 if (!cpuhw->sfb.sdbt)
1140 return -EINVAL;
1141
1142 err = 0;
1143 perf_pmu_disable(event->pmu);
1144
1145 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1146
1147 /* Set up sampling controls. Always program the sampling register
1148 * using the SDB-table start. Reset TEAR_REG event hardware register
1149 * that is used by hw_perf_event_update() to store the sampling buffer
1150 * position after samples have been flushed.
1151 */
1152 cpuhw->lsctl.s = 0;
1153 cpuhw->lsctl.h = 1;
69f239ed 1154 cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
8c069ff4
HB
1155 cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
1156 cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
1157 hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
1158
1159 /* Ensure sampling functions are in the disabled state. If disabled,
1160 * switch on sampling enable control. */
1161 if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) {
1162 err = -EAGAIN;
1163 goto out;
1164 }
1165 cpuhw->lsctl.es = 1;
1166
1167 /* Set in_use flag and store event */
1168 event->hw.idx = 0; /* only one sampling event per CPU supported */
1169 cpuhw->event = event;
1170 cpuhw->flags |= PMU_F_IN_USE;
1171
1172 if (flags & PERF_EF_START)
1173 cpumsf_pmu_start(event, PERF_EF_RELOAD);
1174out:
1175 perf_event_update_userpage(event);
1176 perf_pmu_enable(event->pmu);
1177 return err;
1178}
1179
1180static void cpumsf_pmu_del(struct perf_event *event, int flags)
1181{
1182 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1183
1184 perf_pmu_disable(event->pmu);
1185 cpumsf_pmu_stop(event, PERF_EF_UPDATE);
1186
1187 cpuhw->lsctl.es = 0;
1188 cpuhw->flags &= ~PMU_F_IN_USE;
1189 cpuhw->event = NULL;
1190
1191 perf_event_update_userpage(event);
1192 perf_pmu_enable(event->pmu);
1193}
1194
1195static int cpumsf_pmu_event_idx(struct perf_event *event)
1196{
1197 return event->hw.idx;
1198}
1199
1200CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
1201
1202static struct attribute *cpumsf_pmu_events_attr[] = {
1203 CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
1204 NULL,
1205};
1206
1207PMU_FORMAT_ATTR(event, "config:0-63");
1208
1209static struct attribute *cpumsf_pmu_format_attr[] = {
1210 &format_attr_event.attr,
1211 NULL,
1212};
1213
1214static struct attribute_group cpumsf_pmu_events_group = {
1215 .name = "events",
1216 .attrs = cpumsf_pmu_events_attr,
1217};
1218static struct attribute_group cpumsf_pmu_format_group = {
1219 .name = "format",
1220 .attrs = cpumsf_pmu_format_attr,
1221};
1222static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
1223 &cpumsf_pmu_events_group,
1224 &cpumsf_pmu_format_group,
1225 NULL,
1226};
1227
1228static struct pmu cpumf_sampling = {
1229 .pmu_enable = cpumsf_pmu_enable,
1230 .pmu_disable = cpumsf_pmu_disable,
1231
1232 .event_init = cpumsf_pmu_event_init,
1233 .add = cpumsf_pmu_add,
1234 .del = cpumsf_pmu_del,
1235
1236 .start = cpumsf_pmu_start,
1237 .stop = cpumsf_pmu_stop,
1238 .read = cpumsf_pmu_read,
1239
1240 .event_idx = cpumsf_pmu_event_idx,
1241 .attr_groups = cpumsf_pmu_attr_groups,
1242};
1243
1244static void cpumf_measurement_alert(struct ext_code ext_code,
1245 unsigned int alert, unsigned long unused)
1246{
1247 struct cpu_hw_sf *cpuhw;
1248
1249 if (!(alert & CPU_MF_INT_SF_MASK))
1250 return;
1251 inc_irq_stat(IRQEXT_CMS);
1252 cpuhw = &__get_cpu_var(cpu_hw_sf);
1253
1254 /* Measurement alerts are shared and might happen when the PMU
1255 * is not reserved. Ignore these alerts in this case. */
1256 if (!(cpuhw->flags & PMU_F_RESERVED))
1257 return;
1258
1259 /* The processing below must take care of multiple alert events that
1260 * might be indicated concurrently. */
1261
1262 /* Program alert request */
1263 if (alert & CPU_MF_INT_SF_PRA) {
1264 if (cpuhw->flags & PMU_F_IN_USE)
1265 hw_perf_event_update(cpuhw->event, 0);
1266 else
1267 WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
1268 }
1269
1270 /* Report measurement alerts only for non-PRA codes */
1271 if (alert != CPU_MF_INT_SF_PRA)
1272 debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
1273
1274 /* Sampling authorization change request */
1275 if (alert & CPU_MF_INT_SF_SACA)
1276 qsi(&cpuhw->qsi);
1277
1278 /* Loss of sample data due to high-priority machine activities */
1279 if (alert & CPU_MF_INT_SF_LSDA) {
1280 pr_err("Sample data was lost\n");
1281 cpuhw->flags |= PMU_F_ERR_LSDA;
1282 sf_disable();
1283 }
1284
1285 /* Invalid sampling buffer entry */
1286 if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
1287 pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
1288 alert);
1289 cpuhw->flags |= PMU_F_ERR_IBE;
1290 sf_disable();
1291 }
1292}
1293
1294static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
1295 unsigned long action, void *hcpu)
1296{
1297 unsigned int cpu = (long) hcpu;
1298 int flags;
1299
1300 /* Ignore the notification if no events are scheduled on the PMU.
1301 * This might be racy...
1302 */
1303 if (!atomic_read(&num_events))
1304 return NOTIFY_OK;
1305
1306 switch (action & ~CPU_TASKS_FROZEN) {
1307 case CPU_ONLINE:
1308 case CPU_ONLINE_FROZEN:
1309 flags = PMC_INIT;
1310 smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
1311 break;
1312 case CPU_DOWN_PREPARE:
1313 flags = PMC_RELEASE;
1314 smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
1315 break;
1316 default:
1317 break;
1318 }
1319
1320 return NOTIFY_OK;
1321}
1322
69f239ed
HB
1323static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
1324{
1325 if (!cpum_sf_avail())
1326 return -ENODEV;
1327 return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
1328}
1329
1330static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
1331{
1332 int rc;
1333 unsigned long min, max;
1334
1335 if (!cpum_sf_avail())
1336 return -ENODEV;
1337 if (!val || !strlen(val))
1338 return -EINVAL;
1339
1340 /* Valid parameter values: "min,max" or "max" */
1341 min = CPUM_SF_MIN_SDB;
1342 max = CPUM_SF_MAX_SDB;
1343 if (strchr(val, ','))
1344 rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
1345 else
1346 rc = kstrtoul(val, 10, &max);
1347
1348 if (min < 2 || min >= max || max > get_num_physpages())
1349 rc = -EINVAL;
1350 if (rc)
1351 return rc;
1352
1353 sfb_set_limits(min, max);
1354 pr_info("Changed sampling buffer settings: min=%lu max=%lu\n",
1355 CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
1356 return 0;
1357}
1358
1359#define param_check_sfb_size(name, p) __param_check(name, p, void)
1360static struct kernel_param_ops param_ops_sfb_size = {
1361 .set = param_set_sfb_size,
1362 .get = param_get_sfb_size,
1363};
1364
8c069ff4
HB
1365static int __init init_cpum_sampling_pmu(void)
1366{
1367 int err;
1368
1369 if (!cpum_sf_avail())
1370 return -ENODEV;
1371
1372 sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
1373 if (!sfdbg)
1374 pr_err("Registering for s390dbf failed\n");
1375 debug_register_view(sfdbg, &debug_sprintf_view);
1376
1377 err = register_external_interrupt(0x1407, cpumf_measurement_alert);
1378 if (err) {
1379 pr_err("Failed to register for CPU-measurement alerts\n");
1380 goto out;
1381 }
1382
1383 err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
1384 if (err) {
1385 pr_err("Failed to register cpum_sf pmu\n");
1386 unregister_external_interrupt(0x1407, cpumf_measurement_alert);
1387 goto out;
1388 }
1389 perf_cpu_notifier(cpumf_pmu_notifier);
1390out:
1391 return err;
1392}
1393arch_initcall(init_cpum_sampling_pmu);
69f239ed 1394core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
This page took 0.078518 seconds and 5 git commands to generate.