s390/cpum_sf: Detect KVM guest samples
[deliverable/linux.git] / arch / s390 / kernel / perf_cpum_sf.c
CommitLineData
8c069ff4
HB
1/*
2 * Performance event support for the System z CPU-measurement Sampling Facility
3 *
4 * Copyright IBM Corp. 2013
5 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only)
9 * as published by the Free Software Foundation.
10 */
11#define KMSG_COMPONENT "cpum_sf"
12#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
13
14#include <linux/kernel.h>
15#include <linux/kernel_stat.h>
16#include <linux/perf_event.h>
17#include <linux/percpu.h>
18#include <linux/notifier.h>
19#include <linux/export.h>
69f239ed
HB
20#include <linux/mm.h>
21#include <linux/moduleparam.h>
8c069ff4
HB
22#include <asm/cpu_mf.h>
23#include <asm/irq.h>
24#include <asm/debug.h>
25#include <asm/timex.h>
26
27/* Minimum number of sample-data-block-tables:
28 * At least one table is required for the sampling buffer structure.
29 * A single table contains up to 511 pointers to sample-data-blocks.
30 */
69f239ed 31#define CPUM_SF_MIN_SDBT 1
8c069ff4 32
69f239ed
HB
33/* Number of sample-data-blocks per sample-data-block-table (SDBT):
34 * The table contains SDB origin (8 bytes) and one SDBT origin that
35 * points to the next table.
8c069ff4 36 */
69f239ed 37#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8)
8c069ff4 38
69f239ed
HB
39/* Maximum page offset for an SDBT table-link entry:
40 * If this page offset is reached, a table-link entry to the next SDBT
41 * must be added.
42 */
43#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8)
44static inline int require_table_link(const void *sdbt)
45{
46 return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
47}
48
49/* Minimum and maximum sampling buffer sizes:
50 *
51 * This number represents the maximum size of the sampling buffer
52 * taking the number of sample-data-block-tables into account.
8c069ff4 53 *
69f239ed
HB
54 * Sampling buffer size Buffer characteristics
55 * ---------------------------------------------------
56 * 64KB == 16 pages (4KB per page)
57 * 1 page for SDB-tables
58 * 15 pages for SDBs
59 *
60 * 32MB == 8192 pages (4KB per page)
61 * 16 pages for SDB-tables
62 * 8176 pages for SDBs
8c069ff4 63 */
69f239ed
HB
64static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
65static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
8c069ff4
HB
66
67struct sf_buffer {
69f239ed 68 unsigned long *sdbt; /* Sample-data-block-table origin */
8c069ff4 69 /* buffer characteristics (required for buffer increments) */
69f239ed
HB
70 unsigned long num_sdb; /* Number of sample-data-blocks */
71 unsigned long num_sdbt; /* Number of sample-data-block-tables */
72 unsigned long *tail; /* last sample-data-block-table */
8c069ff4
HB
73};
74
75struct cpu_hw_sf {
76 /* CPU-measurement sampling information block */
77 struct hws_qsi_info_block qsi;
69f239ed 78 /* CPU-measurement sampling control block */
8c069ff4
HB
79 struct hws_lsctl_request_block lsctl;
80 struct sf_buffer sfb; /* Sampling buffer */
81 unsigned int flags; /* Status flags */
82 struct perf_event *event; /* Scheduled perf event */
83};
84static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
85
86/* Debug feature */
87static debug_info_t *sfdbg;
88
69f239ed
HB
89/*
90 * sf_disable() - Switch off sampling facility
91 */
92static int sf_disable(void)
93{
94 struct hws_lsctl_request_block sreq;
95
96 memset(&sreq, 0, sizeof(sreq));
97 return lsctl(&sreq);
98}
99
8c069ff4
HB
100/*
101 * sf_buffer_available() - Check for an allocated sampling buffer
102 */
103static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
104{
69f239ed 105 return !!cpuhw->sfb.sdbt;
8c069ff4
HB
106}
107
108/*
109 * deallocate sampling facility buffer
110 */
111static void free_sampling_buffer(struct sf_buffer *sfb)
112{
69f239ed 113 unsigned long *sdbt, *curr;
8c069ff4
HB
114
115 if (!sfb->sdbt)
116 return;
117
118 sdbt = sfb->sdbt;
69f239ed 119 curr = sdbt;
8c069ff4 120
69f239ed 121 /* Free the SDBT after all SDBs are processed... */
8c069ff4
HB
122 while (1) {
123 if (!*curr || !sdbt)
124 break;
125
69f239ed 126 /* Process table-link entries */
8c069ff4
HB
127 if (is_link_entry(curr)) {
128 curr = get_next_sdbt(curr);
129 if (sdbt)
69f239ed 130 free_page((unsigned long) sdbt);
8c069ff4 131
69f239ed
HB
132 /* If the origin is reached, sampling buffer is freed */
133 if (curr == sfb->sdbt)
8c069ff4
HB
134 break;
135 else
69f239ed 136 sdbt = curr;
8c069ff4 137 } else {
69f239ed 138 /* Process SDB pointer */
8c069ff4
HB
139 if (*curr) {
140 free_page(*curr);
141 curr++;
142 }
143 }
144 }
145
146 debug_sprintf_event(sfdbg, 5,
69f239ed 147 "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
8c069ff4
HB
148 memset(sfb, 0, sizeof(*sfb));
149}
150
69f239ed
HB
151static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
152{
153 unsigned long sdb, *trailer;
154
155 /* Allocate and initialize sample-data-block */
156 sdb = get_zeroed_page(gfp_flags);
157 if (!sdb)
158 return -ENOMEM;
159 trailer = trailer_entry_ptr(sdb);
160 *trailer = SDB_TE_ALERT_REQ_MASK;
161
162 /* Link SDB into the sample-data-block-table */
163 *sdbt = sdb;
164
165 return 0;
166}
167
168/*
169 * realloc_sampling_buffer() - extend sampler memory
170 *
171 * Allocates new sample-data-blocks and adds them to the specified sampling
172 * buffer memory.
173 *
174 * Important: This modifies the sampling buffer and must be called when the
175 * sampling facility is disabled.
176 *
177 * Returns zero on success, non-zero otherwise.
178 */
179static int realloc_sampling_buffer(struct sf_buffer *sfb,
180 unsigned long num_sdb, gfp_t gfp_flags)
181{
182 int i, rc;
183 unsigned long *new, *tail;
184
185 if (!sfb->sdbt || !sfb->tail)
186 return -EINVAL;
187
188 if (!is_link_entry(sfb->tail))
189 return -EINVAL;
190
191 /* Append to the existing sampling buffer, overwriting the table-link
192 * register.
193 * The tail variables always points to the "tail" (last and table-link)
194 * entry in an SDB-table.
195 */
196 tail = sfb->tail;
197
198 /* Do a sanity check whether the table-link entry points to
199 * the sampling buffer origin.
200 */
201 if (sfb->sdbt != get_next_sdbt(tail)) {
202 debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
203 "sampling buffer is not linked: origin=%p"
204 "tail=%p\n",
205 (void *) sfb->sdbt, (void *) tail);
206 return -EINVAL;
207 }
208
209 /* Allocate remaining SDBs */
210 rc = 0;
211 for (i = 0; i < num_sdb; i++) {
212 /* Allocate a new SDB-table if it is full. */
213 if (require_table_link(tail)) {
214 new = (unsigned long *) get_zeroed_page(gfp_flags);
215 if (!new) {
216 rc = -ENOMEM;
217 break;
218 }
219 sfb->num_sdbt++;
220 /* Link current page to tail of chain */
221 *tail = (unsigned long)(void *) new + 1;
222 tail = new;
223 }
224
225 /* Allocate a new sample-data-block.
226 * If there is not enough memory, stop the realloc process
227 * and simply use what was allocated. If this is a temporary
228 * issue, a new realloc call (if required) might succeed.
229 */
230 rc = alloc_sample_data_block(tail, gfp_flags);
231 if (rc)
232 break;
233 sfb->num_sdb++;
234 tail++;
235 }
236
237 /* Link sampling buffer to its origin */
238 *tail = (unsigned long) sfb->sdbt + 1;
239 sfb->tail = tail;
240
241 debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
242 " settings: sdbt=%lu sdb=%lu\n",
243 sfb->num_sdbt, sfb->num_sdb);
244 return rc;
245}
246
8c069ff4
HB
247/*
248 * allocate_sampling_buffer() - allocate sampler memory
249 *
250 * Allocates and initializes a sampling buffer structure using the
251 * specified number of sample-data-blocks (SDB). For each allocation,
252 * a 4K page is used. The number of sample-data-block-tables (SDBT)
253 * are calculated from SDBs.
254 * Also set the ALERT_REQ mask in each SDBs trailer.
255 *
256 * Returns zero on success, non-zero otherwise.
257 */
258static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
259{
69f239ed 260 int rc;
8c069ff4
HB
261
262 if (sfb->sdbt)
263 return -EINVAL;
69f239ed
HB
264
265 /* Allocate the sample-data-block-table origin */
266 sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL);
267 if (!sfb->sdbt)
268 return -ENOMEM;
8c069ff4 269 sfb->num_sdb = 0;
69f239ed 270 sfb->num_sdbt = 1;
8c069ff4 271
69f239ed
HB
272 /* Link the table origin to point to itself to prepare for
273 * realloc_sampling_buffer() invocation.
274 */
275 sfb->tail = sfb->sdbt;
276 *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1;
8c069ff4 277
69f239ed
HB
278 /* Allocate requested number of sample-data-blocks */
279 rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
280 if (rc) {
281 free_sampling_buffer(sfb);
282 debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
283 "realloc_sampling_buffer failed with rc=%i\n", rc);
284 } else
285 debug_sprintf_event(sfdbg, 4,
286 "alloc_sampling_buffer: tear=%p dear=%p\n",
287 sfb->sdbt, (void *) *sfb->sdbt);
288 return rc;
289}
8c069ff4 290
69f239ed
HB
291static void sfb_set_limits(unsigned long min, unsigned long max)
292{
293 CPUM_SF_MIN_SDB = min;
294 CPUM_SF_MAX_SDB = max;
295}
8c069ff4 296
69f239ed
HB
297static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
298 struct hw_perf_event *hwc)
299{
300 if (!sfb->sdbt)
301 return SFB_ALLOC_REG(hwc);
302 if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
303 return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
304 return 0;
305}
8c069ff4 306
69f239ed
HB
307static int sfb_has_pending_allocs(struct sf_buffer *sfb,
308 struct hw_perf_event *hwc)
309{
310 return sfb_pending_allocs(sfb, hwc) > 0;
311}
8c069ff4 312
69f239ed
HB
313static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
314{
315 /* Limit the number SDBs to not exceed the maximum */
316 num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc));
317 if (num)
318 SFB_ALLOC_REG(hwc) += num;
8c069ff4
HB
319}
320
69f239ed
HB
321static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
322{
323 SFB_ALLOC_REG(hwc) = 0;
324 sfb_account_allocs(num, hwc);
325}
326
327static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
8c069ff4
HB
328{
329 unsigned long n_sdb, freq;
330 unsigned long factor;
331
332 /* Calculate sampling buffers using 4K pages
333 *
334 * 1. Use frequency as input. The samping buffer is designed for
335 * a complete second. This can be adjusted through the "factor"
336 * variable.
337 * In any case, alloc_sampling_buffer() sets the Alert Request
338 * Control indicator to trigger measurement-alert to harvest
339 * sample-data-blocks (sdb).
340 *
341 * 2. Compute the number of sample-data-blocks and ensure a minimum
342 * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
343 * exceed CPUM_SF_MAX_SDB. See also the remarks for these
344 * symbolic constants.
345 *
346 * 3. Compute number of pages used for the sample-data-block-table
347 * and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table
348 * to manage up to 511 sample-data-blocks).
349 */
350 freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
351 factor = 1;
352 n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes));
353 if (n_sdb < CPUM_SF_MIN_SDB)
354 n_sdb = CPUM_SF_MIN_SDB;
355
69f239ed
HB
356 /* If there is already a sampling buffer allocated, it is very likely
357 * that the sampling facility is enabled too. If the event to be
358 * initialized requires a greater sampling buffer, the allocation must
359 * be postponed. Changing the sampling buffer requires the sampling
360 * facility to be in the disabled state. So, account the number of
361 * required SDBs and let cpumsf_pmu_enable() resize the buffer just
362 * before the event is started.
8c069ff4 363 */
69f239ed 364 sfb_init_allocs(n_sdb, hwc);
8c069ff4
HB
365 if (sf_buffer_available(cpuhw))
366 return 0;
367
368 debug_sprintf_event(sfdbg, 3,
69f239ed 369 "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n",
8c069ff4
HB
370 SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw);
371
372 return alloc_sampling_buffer(&cpuhw->sfb,
69f239ed 373 sfb_pending_allocs(&cpuhw->sfb, hwc));
8c069ff4
HB
374}
375
69f239ed
HB
376static unsigned long min_percent(unsigned int percent, unsigned long base,
377 unsigned long min)
378{
379 return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
380}
8c069ff4 381
69f239ed
HB
382static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
383{
384 /* Use a percentage-based approach to extend the sampling facility
385 * buffer. Accept up to 5% sample data loss.
386 * Vary the extents between 1% to 5% of the current number of
387 * sample-data-blocks.
388 */
389 if (ratio <= 5)
390 return 0;
391 if (ratio <= 25)
392 return min_percent(1, base, 1);
393 if (ratio <= 50)
394 return min_percent(1, base, 1);
395 if (ratio <= 75)
396 return min_percent(2, base, 2);
397 if (ratio <= 100)
398 return min_percent(3, base, 3);
399 if (ratio <= 250)
400 return min_percent(4, base, 4);
401
402 return min_percent(5, base, 8);
403}
8c069ff4 404
69f239ed
HB
405static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
406 struct hw_perf_event *hwc)
407{
408 unsigned long ratio, num;
409
410 if (!OVERFLOW_REG(hwc))
411 return;
412
413 /* The sample_overflow contains the average number of sample data
414 * that has been lost because sample-data-blocks were full.
415 *
416 * Calculate the total number of sample data entries that has been
417 * discarded. Then calculate the ratio of lost samples to total samples
418 * per second in percent.
419 */
420 ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
421 sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
422
423 /* Compute number of sample-data-blocks */
424 num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
425 if (num)
426 sfb_account_allocs(num, hwc);
427
428 debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
429 " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
430 OVERFLOW_REG(hwc) = 0;
431}
432
433/* extend_sampling_buffer() - Extend sampling buffer
434 * @sfb: Sampling buffer structure (for local CPU)
435 * @hwc: Perf event hardware structure
436 *
437 * Use this function to extend the sampling buffer based on the overflow counter
438 * and postponed allocation extents stored in the specified Perf event hardware.
439 *
440 * Important: This function disables the sampling facility in order to safely
441 * change the sampling buffer structure. Do not call this function
442 * when the PMU is active.
8c069ff4 443 */
69f239ed
HB
444static void extend_sampling_buffer(struct sf_buffer *sfb,
445 struct hw_perf_event *hwc)
8c069ff4 446{
69f239ed
HB
447 unsigned long num, num_old;
448 int rc;
8c069ff4 449
69f239ed
HB
450 num = sfb_pending_allocs(sfb, hwc);
451 if (!num)
452 return;
453 num_old = sfb->num_sdb;
454
455 /* Disable the sampling facility to reset any states and also
456 * clear pending measurement alerts.
457 */
458 sf_disable();
459
460 /* Extend the sampling buffer.
461 * This memory allocation typically happens in an atomic context when
462 * called by perf. Because this is a reallocation, it is fine if the
463 * new SDB-request cannot be satisfied immediately.
464 */
465 rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
466 if (rc)
467 debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
468 "failed with rc=%i\n", rc);
469
470 if (sfb_has_pending_allocs(sfb, hwc))
471 debug_sprintf_event(sfdbg, 5, "sfb: extend: "
472 "req=%lu alloc=%lu remaining=%lu\n",
473 num, sfb->num_sdb - num_old,
474 sfb_pending_allocs(sfb, hwc));
8c069ff4
HB
475}
476
477
69f239ed
HB
478/* Number of perf events counting hardware events */
479static atomic_t num_events;
480/* Used to avoid races in calling reserve/release_cpumf_hardware */
481static DEFINE_MUTEX(pmc_reserve_mutex);
482
8c069ff4
HB
483#define PMC_INIT 0
484#define PMC_RELEASE 1
e28bb79d 485#define PMC_FAILURE 2
8c069ff4
HB
486static void setup_pmc_cpu(void *flags)
487{
488 int err;
489 struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
490
8c069ff4
HB
491 err = 0;
492 switch (*((int *) flags)) {
493 case PMC_INIT:
494 memset(cpusf, 0, sizeof(*cpusf));
495 err = qsi(&cpusf->qsi);
496 if (err)
497 break;
498 cpusf->flags |= PMU_F_RESERVED;
499 err = sf_disable();
500 if (err)
501 pr_err("Switching off the sampling facility failed "
502 "with rc=%i\n", err);
503 debug_sprintf_event(sfdbg, 5,
504 "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
505 break;
506 case PMC_RELEASE:
507 cpusf->flags &= ~PMU_F_RESERVED;
508 err = sf_disable();
509 if (err) {
510 pr_err("Switching off the sampling facility failed "
511 "with rc=%i\n", err);
512 } else {
513 if (cpusf->sfb.sdbt)
514 free_sampling_buffer(&cpusf->sfb);
515 }
516 debug_sprintf_event(sfdbg, 5,
517 "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
518 break;
519 }
e28bb79d
HB
520 if (err)
521 *((int *) flags) |= PMC_FAILURE;
8c069ff4
HB
522}
523
524static void release_pmc_hardware(void)
525{
526 int flags = PMC_RELEASE;
527
528 irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
529 on_each_cpu(setup_pmc_cpu, &flags, 1);
e28bb79d 530 perf_release_sampling();
8c069ff4
HB
531}
532
533static int reserve_pmc_hardware(void)
534{
535 int flags = PMC_INIT;
e28bb79d 536 int err;
8c069ff4 537
e28bb79d
HB
538 err = perf_reserve_sampling();
539 if (err)
540 return err;
8c069ff4 541 on_each_cpu(setup_pmc_cpu, &flags, 1);
e28bb79d
HB
542 if (flags & PMC_FAILURE) {
543 release_pmc_hardware();
544 return -ENODEV;
545 }
8c069ff4
HB
546 irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
547
548 return 0;
549}
550
551static void hw_perf_event_destroy(struct perf_event *event)
552{
553 /* Release PMC if this is the last perf event */
554 if (!atomic_add_unless(&num_events, -1, 1)) {
555 mutex_lock(&pmc_reserve_mutex);
556 if (atomic_dec_return(&num_events) == 0)
557 release_pmc_hardware();
558 mutex_unlock(&pmc_reserve_mutex);
559 }
560}
561
562static void hw_init_period(struct hw_perf_event *hwc, u64 period)
563{
564 hwc->sample_period = period;
565 hwc->last_period = hwc->sample_period;
566 local64_set(&hwc->period_left, hwc->sample_period);
567}
568
569static void hw_reset_registers(struct hw_perf_event *hwc,
69f239ed 570 unsigned long *sdbt_origin)
8c069ff4 571{
69f239ed
HB
572 /* (Re)set to first sample-data-block-table */
573 TEAR_REG(hwc) = (unsigned long) sdbt_origin;
8c069ff4
HB
574}
575
576static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
577 unsigned long rate)
578{
69f239ed
HB
579 return clamp_t(unsigned long, rate,
580 si->min_sampl_rate, si->max_sampl_rate);
8c069ff4
HB
581}
582
583static int __hw_perf_event_init(struct perf_event *event)
584{
585 struct cpu_hw_sf *cpuhw;
586 struct hws_qsi_info_block si;
587 struct perf_event_attr *attr = &event->attr;
588 struct hw_perf_event *hwc = &event->hw;
589 unsigned long rate;
590 int cpu, err;
591
592 /* Reserve CPU-measurement sampling facility */
593 err = 0;
594 if (!atomic_inc_not_zero(&num_events)) {
595 mutex_lock(&pmc_reserve_mutex);
596 if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
597 err = -EBUSY;
598 else
599 atomic_inc(&num_events);
600 mutex_unlock(&pmc_reserve_mutex);
601 }
602 event->destroy = hw_perf_event_destroy;
603
604 if (err)
605 goto out;
606
607 /* Access per-CPU sampling information (query sampling info) */
608 /*
609 * The event->cpu value can be -1 to count on every CPU, for example,
610 * when attaching to a task. If this is specified, use the query
611 * sampling info from the current CPU, otherwise use event->cpu to
612 * retrieve the per-CPU information.
613 * Later, cpuhw indicates whether to allocate sampling buffers for a
614 * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
615 */
616 memset(&si, 0, sizeof(si));
617 cpuhw = NULL;
618 if (event->cpu == -1)
619 qsi(&si);
620 else {
621 /* Event is pinned to a particular CPU, retrieve the per-CPU
622 * sampling structure for accessing the CPU-specific QSI.
623 */
624 cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
625 si = cpuhw->qsi;
626 }
627
628 /* Check sampling facility authorization and, if not authorized,
629 * fall back to other PMUs. It is safe to check any CPU because
630 * the authorization is identical for all configured CPUs.
631 */
632 if (!si.as) {
633 err = -ENOENT;
634 goto out;
635 }
636
637 /* The sampling information (si) contains information about the
638 * min/max sampling intervals and the CPU speed. So calculate the
639 * correct sampling interval and avoid the whole period adjust
640 * feedback loop.
641 */
642 rate = 0;
643 if (attr->freq) {
644 rate = freq_to_sample_rate(&si, attr->sample_freq);
645 rate = hw_limit_rate(&si, rate);
646 attr->freq = 0;
647 attr->sample_period = rate;
648 } else {
649 /* The min/max sampling rates specifies the valid range
650 * of sample periods. If the specified sample period is
651 * out of range, limit the period to the range boundary.
652 */
653 rate = hw_limit_rate(&si, hwc->sample_period);
654
655 /* The perf core maintains a maximum sample rate that is
656 * configurable through the sysctl interface. Ensure the
657 * sampling rate does not exceed this value. This also helps
658 * to avoid throttling when pushing samples with
659 * perf_event_overflow().
660 */
661 if (sample_rate_to_freq(&si, rate) >
662 sysctl_perf_event_sample_rate) {
663 err = -EINVAL;
664 debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
665 goto out;
666 }
667 }
668 SAMPL_RATE(hwc) = rate;
669 hw_init_period(hwc, SAMPL_RATE(hwc));
670
69f239ed
HB
671 /* Initialize sample data overflow accounting */
672 hwc->extra_reg.reg = REG_OVERFLOW;
673 OVERFLOW_REG(hwc) = 0;
674
8c069ff4
HB
675 /* Allocate the per-CPU sampling buffer using the CPU information
676 * from the event. If the event is not pinned to a particular
677 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
678 * buffers for each online CPU.
679 */
680 if (cpuhw)
681 /* Event is pinned to a particular CPU */
682 err = allocate_sdbt(cpuhw, hwc);
683 else {
684 /* Event is not pinned, allocate sampling buffer on
685 * each online CPU
686 */
687 for_each_online_cpu(cpu) {
688 cpuhw = &per_cpu(cpu_hw_sf, cpu);
689 err = allocate_sdbt(cpuhw, hwc);
690 if (err)
691 break;
692 }
693 }
694out:
695 return err;
696}
697
698static int cpumsf_pmu_event_init(struct perf_event *event)
699{
700 int err;
701
55baa2f8
HB
702 /* No support for taken branch sampling */
703 if (has_branch_stack(event))
704 return -EOPNOTSUPP;
705
706 switch (event->attr.type) {
707 case PERF_TYPE_RAW:
708 if (event->attr.config != PERF_EVENT_CPUM_SF)
709 return -ENOENT;
710 break;
711 case PERF_TYPE_HARDWARE:
712 /* Support sampling of CPU cycles in addition to the
713 * counter facility. However, the counter facility
714 * is more precise and, hence, restrict this PMU to
715 * sampling events only.
716 */
717 if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
718 return -ENOENT;
719 if (!is_sampling_event(event))
720 return -ENOENT;
721 break;
722 default:
8c069ff4 723 return -ENOENT;
55baa2f8 724 }
8c069ff4
HB
725
726 if (event->cpu >= nr_cpumask_bits ||
727 (event->cpu >= 0 && !cpu_online(event->cpu)))
728 return -ENODEV;
729
730 err = __hw_perf_event_init(event);
731 if (unlikely(err))
732 if (event->destroy)
733 event->destroy(event);
734 return err;
735}
736
737static void cpumsf_pmu_enable(struct pmu *pmu)
738{
739 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
69f239ed 740 struct hw_perf_event *hwc;
8c069ff4
HB
741 int err;
742
743 if (cpuhw->flags & PMU_F_ENABLED)
744 return;
745
746 if (cpuhw->flags & PMU_F_ERR_MASK)
747 return;
748
69f239ed
HB
749 /* Check whether to extent the sampling buffer.
750 *
751 * Two conditions trigger an increase of the sampling buffer for a
752 * perf event:
753 * 1. Postponed buffer allocations from the event initialization.
754 * 2. Sampling overflows that contribute to pending allocations.
755 *
756 * Note that the extend_sampling_buffer() function disables the sampling
757 * facility, but it can be fully re-enabled using sampling controls that
758 * have been saved in cpumsf_pmu_disable().
759 */
760 if (cpuhw->event) {
761 hwc = &cpuhw->event->hw;
762 /* Account number of overflow-designated buffer extents */
763 sfb_account_overflows(cpuhw, hwc);
764 if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
765 extend_sampling_buffer(&cpuhw->sfb, hwc);
766 }
767
768 /* (Re)enable the PMU and sampling facility */
8c069ff4
HB
769 cpuhw->flags |= PMU_F_ENABLED;
770 barrier();
771
772 err = lsctl(&cpuhw->lsctl);
773 if (err) {
774 cpuhw->flags &= ~PMU_F_ENABLED;
775 pr_err("Loading sampling controls failed: op=%i err=%i\n",
776 1, err);
777 return;
778 }
779
780 debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n",
781 cpuhw->lsctl.es, cpuhw->lsctl.cs,
782 (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
783}
784
785static void cpumsf_pmu_disable(struct pmu *pmu)
786{
787 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
788 struct hws_lsctl_request_block inactive;
789 struct hws_qsi_info_block si;
790 int err;
791
792 if (!(cpuhw->flags & PMU_F_ENABLED))
793 return;
794
795 if (cpuhw->flags & PMU_F_ERR_MASK)
796 return;
797
798 /* Switch off sampling activation control */
799 inactive = cpuhw->lsctl;
800 inactive.cs = 0;
801
802 err = lsctl(&inactive);
803 if (err) {
804 pr_err("Loading sampling controls failed: op=%i err=%i\n",
805 2, err);
806 return;
807 }
808
809 /* Save state of TEAR and DEAR register contents */
810 if (!qsi(&si)) {
811 /* TEAR/DEAR values are valid only if the sampling facility is
812 * enabled. Note that cpumsf_pmu_disable() might be called even
813 * for a disabled sampling facility because cpumsf_pmu_enable()
814 * controls the enable/disable state.
815 */
816 if (si.es) {
817 cpuhw->lsctl.tear = si.tear;
818 cpuhw->lsctl.dear = si.dear;
819 }
820 } else
821 debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
822 "qsi() failed with err=%i\n", err);
823
824 cpuhw->flags &= ~PMU_F_ENABLED;
825}
826
827/* perf_push_sample() - Push samples to perf
828 * @event: The perf event
829 * @sample: Hardware sample data
830 *
831 * Use the hardware sample data to create perf event sample. The sample
832 * is the pushed to the event subsystem and the function checks for
833 * possible event overflows. If an event overflow occurs, the PMU is
834 * stopped.
835 *
836 * Return non-zero if an event overflow occurred.
837 */
838static int perf_push_sample(struct perf_event *event,
839 struct hws_data_entry *sample)
840{
841 int overflow;
842 struct pt_regs regs;
443e802b 843 struct perf_sf_sde_regs *sde_regs;
8c069ff4
HB
844 struct perf_sample_data data;
845
846 /* Skip samples that are invalid or for which the instruction address
847 * is not predictable. For the latter, the wait-state bit is set.
848 */
849 if (sample->I || sample->W)
850 return 0;
851
852 perf_sample_data_init(&data, 0, event->hw.last_period);
853
443e802b
HB
854 /* Setup pt_regs to look like an CPU-measurement external interrupt
855 * using the Program Request Alert code. The regs.int_parm_long
856 * field which is unused contains additional sample-data-entry related
857 * indicators.
858 */
8c069ff4 859 memset(&regs, 0, sizeof(regs));
443e802b
HB
860 regs.int_code = 0x1407;
861 regs.int_parm = CPU_MF_INT_SF_PRA;
862 sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
863
8c069ff4
HB
864 regs.psw.addr = sample->ia;
865 if (sample->T)
866 regs.psw.mask |= PSW_MASK_DAT;
867 if (sample->W)
868 regs.psw.mask |= PSW_MASK_WAIT;
869 if (sample->P)
870 regs.psw.mask |= PSW_MASK_PSTATE;
871 switch (sample->AS) {
872 case 0x0:
873 regs.psw.mask |= PSW_ASC_PRIMARY;
874 break;
875 case 0x1:
876 regs.psw.mask |= PSW_ASC_ACCREG;
877 break;
878 case 0x2:
879 regs.psw.mask |= PSW_ASC_SECONDARY;
880 break;
881 case 0x3:
882 regs.psw.mask |= PSW_ASC_HOME;
883 break;
884 }
885
443e802b
HB
886 /* The host-program-parameter (hpp) contains the sie control
887 * block that is set by sie64a() in entry64.S. Check if hpp
888 * refers to a valid control block and set sde_regs flags
889 * accordingly. This would allow to use hpp values for other
890 * purposes too.
891 * For now, simply use a non-zero value as guest indicator.
892 */
893 if (sample->hpp)
894 sde_regs->in_guest = 1;
895
8c069ff4
HB
896 overflow = 0;
897 if (perf_event_overflow(event, &data, &regs)) {
898 overflow = 1;
899 event->pmu->stop(event, 0);
8c069ff4
HB
900 }
901 perf_event_update_userpage(event);
902
903 return overflow;
904}
905
906static void perf_event_count_update(struct perf_event *event, u64 count)
907{
908 local64_add(count, &event->count);
909}
910
911/* hw_collect_samples() - Walk through a sample-data-block and collect samples
912 * @event: The perf event
913 * @sdbt: Sample-data-block table
914 * @overflow: Event overflow counter
915 *
916 * Walks through a sample-data-block and collects hardware sample-data that is
917 * pushed to the perf event subsystem. The overflow reports the number of
918 * samples that has been discarded due to an event overflow.
919 */
920static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
921 unsigned long long *overflow)
922{
923 struct hws_data_entry *sample;
924 unsigned long *trailer;
925
926 trailer = trailer_entry_ptr(*sdbt);
927 sample = (struct hws_data_entry *) *sdbt;
928 while ((unsigned long *) sample < trailer) {
929 /* Check for an empty sample */
930 if (!sample->def)
931 break;
932
933 /* Update perf event period */
934 perf_event_count_update(event, SAMPL_RATE(&event->hw));
935
936 /* Check for basic sampling mode */
937 if (sample->def == 0x0001) {
938 /* If an event overflow occurred, the PMU is stopped to
939 * throttle event delivery. Remaining sample data is
940 * discarded.
941 */
942 if (!*overflow)
943 *overflow = perf_push_sample(event, sample);
944 else
945 /* Count discarded samples */
946 *overflow += 1;
947 } else
948 /* Sample slot is not yet written or other record */
949 debug_sprintf_event(sfdbg, 5, "hw_collect_samples: "
950 "Unknown sample data entry format:"
951 " %i\n", sample->def);
952
953 /* Reset sample slot and advance to next sample */
954 sample->def = 0;
955 sample++;
956 }
957}
958
959/* hw_perf_event_update() - Process sampling buffer
960 * @event: The perf event
961 * @flush_all: Flag to also flush partially filled sample-data-blocks
962 *
963 * Processes the sampling buffer and create perf event samples.
964 * The sampling buffer position are retrieved and saved in the TEAR_REG
965 * register of the specified perf event.
966 *
967 * Only full sample-data-blocks are processed. Specify the flash_all flag
968 * to also walk through partially filled sample-data-blocks.
969 *
970 */
971static void hw_perf_event_update(struct perf_event *event, int flush_all)
972{
973 struct hw_perf_event *hwc = &event->hw;
974 struct hws_trailer_entry *te;
975 unsigned long *sdbt;
fcc77f50 976 unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags;
8c069ff4
HB
977 int done;
978
979 sdbt = (unsigned long *) TEAR_REG(hwc);
69f239ed 980 done = event_overflow = sampl_overflow = num_sdb = 0;
8c069ff4
HB
981 while (!done) {
982 /* Get the trailer entry of the sample-data-block */
983 te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt);
984
985 /* Leave loop if no more work to do (block full indicator) */
986 if (!te->f) {
987 done = 1;
988 if (!flush_all)
989 break;
990 }
991
69f239ed
HB
992 /* Check the sample overflow count */
993 if (te->overflow)
994 /* Account sample overflows and, if a particular limit
995 * is reached, extend the sampling buffer.
996 * For details, see sfb_account_overflows().
8c069ff4 997 */
69f239ed 998 sampl_overflow += te->overflow;
8c069ff4
HB
999
1000 /* Timestamps are valid for full sample-data-blocks only */
1001 debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
1002 "overflow=%llu timestamp=0x%llx\n",
1003 sdbt, te->overflow,
443d4beb 1004 (te->f) ? trailer_timestamp(te) : 0ULL);
8c069ff4
HB
1005
1006 /* Collect all samples from a single sample-data-block and
1007 * flag if an (perf) event overflow happened. If so, the PMU
1008 * is stopped and remaining samples will be discarded.
1009 */
1010 hw_collect_samples(event, sdbt, &event_overflow);
69f239ed 1011 num_sdb++;
8c069ff4 1012
fcc77f50
HB
1013 /* Reset trailer (using compare-double-and-swap) */
1014 do {
1015 te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
1016 te_flags |= SDB_TE_ALERT_REQ_MASK;
1017 } while (!cmpxchg_double(&te->flags, &te->overflow,
1018 te->flags, te->overflow,
1019 te_flags, 0ULL));
8c069ff4
HB
1020
1021 /* Advance to next sample-data-block */
1022 sdbt++;
1023 if (is_link_entry(sdbt))
1024 sdbt = get_next_sdbt(sdbt);
1025
1026 /* Update event hardware registers */
1027 TEAR_REG(hwc) = (unsigned long) sdbt;
1028
1029 /* Stop processing sample-data if all samples of the current
1030 * sample-data-block were flushed even if it was not full.
1031 */
1032 if (flush_all && done)
1033 break;
1034
1035 /* If an event overflow happened, discard samples by
1036 * processing any remaining sample-data-blocks.
1037 */
1038 if (event_overflow)
1039 flush_all = 1;
1040 }
1041
69f239ed
HB
1042 /* Account sample overflows in the event hardware structure */
1043 if (sampl_overflow)
1044 OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
1045 sampl_overflow, 1 + num_sdb);
8c069ff4
HB
1046 if (sampl_overflow || event_overflow)
1047 debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
1048 "overflow stats: sample=%llu event=%llu\n",
1049 sampl_overflow, event_overflow);
1050}
1051
1052static void cpumsf_pmu_read(struct perf_event *event)
1053{
1054 /* Nothing to do ... updates are interrupt-driven */
1055}
1056
1057/* Activate sampling control.
1058 * Next call of pmu_enable() starts sampling.
1059 */
1060static void cpumsf_pmu_start(struct perf_event *event, int flags)
1061{
1062 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1063
1064 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1065 return;
1066
1067 if (flags & PERF_EF_RELOAD)
1068 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1069
1070 perf_pmu_disable(event->pmu);
1071 event->hw.state = 0;
1072 cpuhw->lsctl.cs = 1;
1073 perf_pmu_enable(event->pmu);
1074}
1075
1076/* Deactivate sampling control.
1077 * Next call of pmu_enable() stops sampling.
1078 */
1079static void cpumsf_pmu_stop(struct perf_event *event, int flags)
1080{
1081 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1082
1083 if (event->hw.state & PERF_HES_STOPPED)
1084 return;
1085
1086 perf_pmu_disable(event->pmu);
1087 cpuhw->lsctl.cs = 0;
1088 event->hw.state |= PERF_HES_STOPPED;
1089
1090 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
1091 hw_perf_event_update(event, 1);
1092 event->hw.state |= PERF_HES_UPTODATE;
1093 }
1094 perf_pmu_enable(event->pmu);
1095}
1096
1097static int cpumsf_pmu_add(struct perf_event *event, int flags)
1098{
1099 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1100 int err;
1101
1102 if (cpuhw->flags & PMU_F_IN_USE)
1103 return -EAGAIN;
1104
1105 if (!cpuhw->sfb.sdbt)
1106 return -EINVAL;
1107
1108 err = 0;
1109 perf_pmu_disable(event->pmu);
1110
1111 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1112
1113 /* Set up sampling controls. Always program the sampling register
1114 * using the SDB-table start. Reset TEAR_REG event hardware register
1115 * that is used by hw_perf_event_update() to store the sampling buffer
1116 * position after samples have been flushed.
1117 */
1118 cpuhw->lsctl.s = 0;
1119 cpuhw->lsctl.h = 1;
69f239ed 1120 cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
8c069ff4
HB
1121 cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
1122 cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
1123 hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
1124
1125 /* Ensure sampling functions are in the disabled state. If disabled,
1126 * switch on sampling enable control. */
1127 if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) {
1128 err = -EAGAIN;
1129 goto out;
1130 }
1131 cpuhw->lsctl.es = 1;
1132
1133 /* Set in_use flag and store event */
1134 event->hw.idx = 0; /* only one sampling event per CPU supported */
1135 cpuhw->event = event;
1136 cpuhw->flags |= PMU_F_IN_USE;
1137
1138 if (flags & PERF_EF_START)
1139 cpumsf_pmu_start(event, PERF_EF_RELOAD);
1140out:
1141 perf_event_update_userpage(event);
1142 perf_pmu_enable(event->pmu);
1143 return err;
1144}
1145
1146static void cpumsf_pmu_del(struct perf_event *event, int flags)
1147{
1148 struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
1149
1150 perf_pmu_disable(event->pmu);
1151 cpumsf_pmu_stop(event, PERF_EF_UPDATE);
1152
1153 cpuhw->lsctl.es = 0;
1154 cpuhw->flags &= ~PMU_F_IN_USE;
1155 cpuhw->event = NULL;
1156
1157 perf_event_update_userpage(event);
1158 perf_pmu_enable(event->pmu);
1159}
1160
1161static int cpumsf_pmu_event_idx(struct perf_event *event)
1162{
1163 return event->hw.idx;
1164}
1165
1166CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
1167
1168static struct attribute *cpumsf_pmu_events_attr[] = {
1169 CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
1170 NULL,
1171};
1172
1173PMU_FORMAT_ATTR(event, "config:0-63");
1174
1175static struct attribute *cpumsf_pmu_format_attr[] = {
1176 &format_attr_event.attr,
1177 NULL,
1178};
1179
1180static struct attribute_group cpumsf_pmu_events_group = {
1181 .name = "events",
1182 .attrs = cpumsf_pmu_events_attr,
1183};
1184static struct attribute_group cpumsf_pmu_format_group = {
1185 .name = "format",
1186 .attrs = cpumsf_pmu_format_attr,
1187};
1188static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
1189 &cpumsf_pmu_events_group,
1190 &cpumsf_pmu_format_group,
1191 NULL,
1192};
1193
1194static struct pmu cpumf_sampling = {
1195 .pmu_enable = cpumsf_pmu_enable,
1196 .pmu_disable = cpumsf_pmu_disable,
1197
1198 .event_init = cpumsf_pmu_event_init,
1199 .add = cpumsf_pmu_add,
1200 .del = cpumsf_pmu_del,
1201
1202 .start = cpumsf_pmu_start,
1203 .stop = cpumsf_pmu_stop,
1204 .read = cpumsf_pmu_read,
1205
1206 .event_idx = cpumsf_pmu_event_idx,
1207 .attr_groups = cpumsf_pmu_attr_groups,
1208};
1209
1210static void cpumf_measurement_alert(struct ext_code ext_code,
1211 unsigned int alert, unsigned long unused)
1212{
1213 struct cpu_hw_sf *cpuhw;
1214
1215 if (!(alert & CPU_MF_INT_SF_MASK))
1216 return;
1217 inc_irq_stat(IRQEXT_CMS);
1218 cpuhw = &__get_cpu_var(cpu_hw_sf);
1219
1220 /* Measurement alerts are shared and might happen when the PMU
1221 * is not reserved. Ignore these alerts in this case. */
1222 if (!(cpuhw->flags & PMU_F_RESERVED))
1223 return;
1224
1225 /* The processing below must take care of multiple alert events that
1226 * might be indicated concurrently. */
1227
1228 /* Program alert request */
1229 if (alert & CPU_MF_INT_SF_PRA) {
1230 if (cpuhw->flags & PMU_F_IN_USE)
1231 hw_perf_event_update(cpuhw->event, 0);
1232 else
1233 WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
1234 }
1235
1236 /* Report measurement alerts only for non-PRA codes */
1237 if (alert != CPU_MF_INT_SF_PRA)
1238 debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
1239
1240 /* Sampling authorization change request */
1241 if (alert & CPU_MF_INT_SF_SACA)
1242 qsi(&cpuhw->qsi);
1243
1244 /* Loss of sample data due to high-priority machine activities */
1245 if (alert & CPU_MF_INT_SF_LSDA) {
1246 pr_err("Sample data was lost\n");
1247 cpuhw->flags |= PMU_F_ERR_LSDA;
1248 sf_disable();
1249 }
1250
1251 /* Invalid sampling buffer entry */
1252 if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
1253 pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
1254 alert);
1255 cpuhw->flags |= PMU_F_ERR_IBE;
1256 sf_disable();
1257 }
1258}
1259
1260static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self,
1261 unsigned long action, void *hcpu)
1262{
1263 unsigned int cpu = (long) hcpu;
1264 int flags;
1265
1266 /* Ignore the notification if no events are scheduled on the PMU.
1267 * This might be racy...
1268 */
1269 if (!atomic_read(&num_events))
1270 return NOTIFY_OK;
1271
1272 switch (action & ~CPU_TASKS_FROZEN) {
1273 case CPU_ONLINE:
1274 case CPU_ONLINE_FROZEN:
1275 flags = PMC_INIT;
1276 smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
1277 break;
1278 case CPU_DOWN_PREPARE:
1279 flags = PMC_RELEASE;
1280 smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
1281 break;
1282 default:
1283 break;
1284 }
1285
1286 return NOTIFY_OK;
1287}
1288
69f239ed
HB
1289static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
1290{
1291 if (!cpum_sf_avail())
1292 return -ENODEV;
1293 return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
1294}
1295
1296static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
1297{
1298 int rc;
1299 unsigned long min, max;
1300
1301 if (!cpum_sf_avail())
1302 return -ENODEV;
1303 if (!val || !strlen(val))
1304 return -EINVAL;
1305
1306 /* Valid parameter values: "min,max" or "max" */
1307 min = CPUM_SF_MIN_SDB;
1308 max = CPUM_SF_MAX_SDB;
1309 if (strchr(val, ','))
1310 rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
1311 else
1312 rc = kstrtoul(val, 10, &max);
1313
1314 if (min < 2 || min >= max || max > get_num_physpages())
1315 rc = -EINVAL;
1316 if (rc)
1317 return rc;
1318
1319 sfb_set_limits(min, max);
1320 pr_info("Changed sampling buffer settings: min=%lu max=%lu\n",
1321 CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
1322 return 0;
1323}
1324
1325#define param_check_sfb_size(name, p) __param_check(name, p, void)
1326static struct kernel_param_ops param_ops_sfb_size = {
1327 .set = param_set_sfb_size,
1328 .get = param_get_sfb_size,
1329};
1330
8c069ff4
HB
1331static int __init init_cpum_sampling_pmu(void)
1332{
1333 int err;
1334
1335 if (!cpum_sf_avail())
1336 return -ENODEV;
1337
1338 sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
1339 if (!sfdbg)
1340 pr_err("Registering for s390dbf failed\n");
1341 debug_register_view(sfdbg, &debug_sprintf_view);
1342
1343 err = register_external_interrupt(0x1407, cpumf_measurement_alert);
1344 if (err) {
1345 pr_err("Failed to register for CPU-measurement alerts\n");
1346 goto out;
1347 }
1348
1349 err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
1350 if (err) {
1351 pr_err("Failed to register cpum_sf pmu\n");
1352 unregister_external_interrupt(0x1407, cpumf_measurement_alert);
1353 goto out;
1354 }
1355 perf_cpu_notifier(cpumf_pmu_notifier);
1356out:
1357 return err;
1358}
1359arch_initcall(init_cpum_sampling_pmu);
69f239ed 1360core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
This page took 0.081426 seconds and 5 git commands to generate.