1badff6b6b28f2dbc6fee736d0ba42221fd2b187
[deliverable/linux.git] / arch / x86 / kernel / cpu / perf_event.c
1 /*
2 * Performance events x86 architecture code
3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10 * Copyright (C) 2009 Google, Inc., Stephane Eranian
11 *
12 * For licencing details see kernel-base/COPYING
13 */
14
15 #include <linux/perf_event.h>
16 #include <linux/capability.h>
17 #include <linux/notifier.h>
18 #include <linux/hardirq.h>
19 #include <linux/kprobes.h>
20 #include <linux/module.h>
21 #include <linux/kdebug.h>
22 #include <linux/sched.h>
23 #include <linux/uaccess.h>
24 #include <linux/highmem.h>
25 #include <linux/cpu.h>
26 #include <linux/bitops.h>
27
28 #include <asm/apic.h>
29 #include <asm/stacktrace.h>
30 #include <asm/nmi.h>
31
32 static u64 perf_event_mask __read_mostly;
33
34 struct event_constraint {
35 union {
36 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
37 u64 idxmsk64;
38 };
39 u64 code;
40 u64 cmask;
41 int weight;
42 };
43
44 struct amd_nb {
45 int nb_id; /* NorthBridge id */
46 int refcnt; /* reference count */
47 struct perf_event *owners[X86_PMC_IDX_MAX];
48 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
49 };
50
51 #define MAX_LBR_ENTRIES 16
52
53 struct cpu_hw_events {
54 /*
55 * Generic x86 PMC bits
56 */
57 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
58 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
59 unsigned long interrupts;
60 int enabled;
61
62 int n_events;
63 int n_added;
64 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
65 u64 tags[X86_PMC_IDX_MAX];
66 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
67
68 /*
69 * Intel DebugStore bits
70 */
71 struct debug_store *ds;
72 u64 pebs_enabled;
73
74 /*
75 * Intel LBR bits
76 */
77 int lbr_users;
78 void *lbr_context;
79 struct perf_branch_stack lbr_stack;
80 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
81
82 /*
83 * AMD specific bits
84 */
85 struct amd_nb *amd_nb;
86 };
87
88 #define __EVENT_CONSTRAINT(c, n, m, w) {\
89 { .idxmsk64 = (n) }, \
90 .code = (c), \
91 .cmask = (m), \
92 .weight = (w), \
93 }
94
95 #define EVENT_CONSTRAINT(c, n, m) \
96 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
97
98 /*
99 * Constraint on the Event code.
100 */
101 #define INTEL_EVENT_CONSTRAINT(c, n) \
102 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
103
104 /*
105 * Constraint on the Event code + UMask + fixed-mask
106 */
107 #define FIXED_EVENT_CONSTRAINT(c, n) \
108 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
109
110 /*
111 * Constraint on the Event code + UMask
112 */
113 #define PEBS_EVENT_CONSTRAINT(c, n) \
114 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
115
116 #define EVENT_CONSTRAINT_END \
117 EVENT_CONSTRAINT(0, 0, 0)
118
119 #define for_each_event_constraint(e, c) \
120 for ((e) = (c); (e)->cmask; (e)++)
121
122 /*
123 * struct x86_pmu - generic x86 pmu
124 */
125 struct x86_pmu {
126 /*
127 * Generic x86 PMC bits
128 */
129 const char *name;
130 int version;
131 int (*handle_irq)(struct pt_regs *);
132 void (*disable_all)(void);
133 void (*enable_all)(void);
134 void (*enable)(struct perf_event *);
135 void (*disable)(struct perf_event *);
136 unsigned eventsel;
137 unsigned perfctr;
138 u64 (*event_map)(int);
139 u64 (*raw_event)(u64);
140 int max_events;
141 int num_events;
142 int num_events_fixed;
143 int event_bits;
144 u64 event_mask;
145 int apic;
146 u64 max_period;
147 struct event_constraint *
148 (*get_event_constraints)(struct cpu_hw_events *cpuc,
149 struct perf_event *event);
150
151 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
152 struct perf_event *event);
153 struct event_constraint *event_constraints;
154
155 void (*cpu_prepare)(int cpu);
156 void (*cpu_starting)(int cpu);
157 void (*cpu_dying)(int cpu);
158 void (*cpu_dead)(int cpu);
159
160 /*
161 * Intel Arch Perfmon v2+
162 */
163 u64 intel_ctrl;
164
165 /*
166 * Intel DebugStore bits
167 */
168 int bts, pebs;
169 int pebs_record_size;
170 void (*drain_pebs)(struct pt_regs *regs);
171 struct event_constraint *pebs_constraints;
172
173 /*
174 * Intel LBR
175 */
176 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
177 int lbr_nr; /* hardware stack size */
178 int lbr_format; /* hardware format */
179 };
180
181 static struct x86_pmu x86_pmu __read_mostly;
182
183 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
184 .enabled = 1,
185 };
186
187 static int x86_perf_event_set_period(struct perf_event *event);
188
189 /*
190 * Generalized hw caching related hw_event table, filled
191 * in on a per model basis. A value of 0 means
192 * 'not supported', -1 means 'hw_event makes no sense on
193 * this CPU', any other value means the raw hw_event
194 * ID.
195 */
196
197 #define C(x) PERF_COUNT_HW_CACHE_##x
198
199 static u64 __read_mostly hw_cache_event_ids
200 [PERF_COUNT_HW_CACHE_MAX]
201 [PERF_COUNT_HW_CACHE_OP_MAX]
202 [PERF_COUNT_HW_CACHE_RESULT_MAX];
203
204 /*
205 * Propagate event elapsed time into the generic event.
206 * Can only be executed on the CPU where the event is active.
207 * Returns the delta events processed.
208 */
209 static u64
210 x86_perf_event_update(struct perf_event *event)
211 {
212 struct hw_perf_event *hwc = &event->hw;
213 int shift = 64 - x86_pmu.event_bits;
214 u64 prev_raw_count, new_raw_count;
215 int idx = hwc->idx;
216 s64 delta;
217
218 if (idx == X86_PMC_IDX_FIXED_BTS)
219 return 0;
220
221 /*
222 * Careful: an NMI might modify the previous event value.
223 *
224 * Our tactic to handle this is to first atomically read and
225 * exchange a new raw count - then add that new-prev delta
226 * count to the generic event atomically:
227 */
228 again:
229 prev_raw_count = atomic64_read(&hwc->prev_count);
230 rdmsrl(hwc->event_base + idx, new_raw_count);
231
232 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
233 new_raw_count) != prev_raw_count)
234 goto again;
235
236 /*
237 * Now we have the new raw value and have updated the prev
238 * timestamp already. We can now calculate the elapsed delta
239 * (event-)time and add that to the generic event.
240 *
241 * Careful, not all hw sign-extends above the physical width
242 * of the count.
243 */
244 delta = (new_raw_count << shift) - (prev_raw_count << shift);
245 delta >>= shift;
246
247 atomic64_add(delta, &event->count);
248 atomic64_sub(delta, &hwc->period_left);
249
250 return new_raw_count;
251 }
252
253 static atomic_t active_events;
254 static DEFINE_MUTEX(pmc_reserve_mutex);
255
256 static bool reserve_pmc_hardware(void)
257 {
258 #ifdef CONFIG_X86_LOCAL_APIC
259 int i;
260
261 if (nmi_watchdog == NMI_LOCAL_APIC)
262 disable_lapic_nmi_watchdog();
263
264 for (i = 0; i < x86_pmu.num_events; i++) {
265 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
266 goto perfctr_fail;
267 }
268
269 for (i = 0; i < x86_pmu.num_events; i++) {
270 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
271 goto eventsel_fail;
272 }
273 #endif
274
275 return true;
276
277 #ifdef CONFIG_X86_LOCAL_APIC
278 eventsel_fail:
279 for (i--; i >= 0; i--)
280 release_evntsel_nmi(x86_pmu.eventsel + i);
281
282 i = x86_pmu.num_events;
283
284 perfctr_fail:
285 for (i--; i >= 0; i--)
286 release_perfctr_nmi(x86_pmu.perfctr + i);
287
288 if (nmi_watchdog == NMI_LOCAL_APIC)
289 enable_lapic_nmi_watchdog();
290
291 return false;
292 #endif
293 }
294
295 static void release_pmc_hardware(void)
296 {
297 #ifdef CONFIG_X86_LOCAL_APIC
298 int i;
299
300 for (i = 0; i < x86_pmu.num_events; i++) {
301 release_perfctr_nmi(x86_pmu.perfctr + i);
302 release_evntsel_nmi(x86_pmu.eventsel + i);
303 }
304
305 if (nmi_watchdog == NMI_LOCAL_APIC)
306 enable_lapic_nmi_watchdog();
307 #endif
308 }
309
310 static int reserve_ds_buffers(void);
311 static void release_ds_buffers(void);
312
313 static void hw_perf_event_destroy(struct perf_event *event)
314 {
315 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
316 release_pmc_hardware();
317 release_ds_buffers();
318 mutex_unlock(&pmc_reserve_mutex);
319 }
320 }
321
322 static inline int x86_pmu_initialized(void)
323 {
324 return x86_pmu.handle_irq != NULL;
325 }
326
327 static inline int
328 set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
329 {
330 unsigned int cache_type, cache_op, cache_result;
331 u64 config, val;
332
333 config = attr->config;
334
335 cache_type = (config >> 0) & 0xff;
336 if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
337 return -EINVAL;
338
339 cache_op = (config >> 8) & 0xff;
340 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
341 return -EINVAL;
342
343 cache_result = (config >> 16) & 0xff;
344 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
345 return -EINVAL;
346
347 val = hw_cache_event_ids[cache_type][cache_op][cache_result];
348
349 if (val == 0)
350 return -ENOENT;
351
352 if (val == -1)
353 return -EINVAL;
354
355 hwc->config |= val;
356
357 return 0;
358 }
359
360 /*
361 * Setup the hardware configuration for a given attr_type
362 */
363 static int __hw_perf_event_init(struct perf_event *event)
364 {
365 struct perf_event_attr *attr = &event->attr;
366 struct hw_perf_event *hwc = &event->hw;
367 u64 config;
368 int err;
369
370 if (!x86_pmu_initialized())
371 return -ENODEV;
372
373 err = 0;
374 if (!atomic_inc_not_zero(&active_events)) {
375 mutex_lock(&pmc_reserve_mutex);
376 if (atomic_read(&active_events) == 0) {
377 if (!reserve_pmc_hardware())
378 err = -EBUSY;
379 else
380 err = reserve_ds_buffers();
381 }
382 if (!err)
383 atomic_inc(&active_events);
384 mutex_unlock(&pmc_reserve_mutex);
385 }
386 if (err)
387 return err;
388
389 event->destroy = hw_perf_event_destroy;
390
391 /*
392 * Generate PMC IRQs:
393 * (keep 'enabled' bit clear for now)
394 */
395 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
396
397 hwc->idx = -1;
398 hwc->last_cpu = -1;
399 hwc->last_tag = ~0ULL;
400
401 /*
402 * Count user and OS events unless requested not to.
403 */
404 if (!attr->exclude_user)
405 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
406 if (!attr->exclude_kernel)
407 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
408
409 if (!hwc->sample_period) {
410 hwc->sample_period = x86_pmu.max_period;
411 hwc->last_period = hwc->sample_period;
412 atomic64_set(&hwc->period_left, hwc->sample_period);
413 } else {
414 /*
415 * If we have a PMU initialized but no APIC
416 * interrupts, we cannot sample hardware
417 * events (user-space has to fall back and
418 * sample via a hrtimer based software event):
419 */
420 if (!x86_pmu.apic)
421 return -EOPNOTSUPP;
422 }
423
424 /*
425 * Raw hw_event type provide the config in the hw_event structure
426 */
427 if (attr->type == PERF_TYPE_RAW) {
428 hwc->config |= x86_pmu.raw_event(attr->config);
429 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
430 perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
431 return -EACCES;
432 return 0;
433 }
434
435 if (attr->type == PERF_TYPE_HW_CACHE)
436 return set_ext_hw_attr(hwc, attr);
437
438 if (attr->config >= x86_pmu.max_events)
439 return -EINVAL;
440
441 /*
442 * The generic map:
443 */
444 config = x86_pmu.event_map(attr->config);
445
446 if (config == 0)
447 return -ENOENT;
448
449 if (config == -1LL)
450 return -EINVAL;
451
452 /*
453 * Branch tracing:
454 */
455 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
456 (hwc->sample_period == 1)) {
457 /* BTS is not supported by this architecture. */
458 if (!x86_pmu.bts)
459 return -EOPNOTSUPP;
460
461 /* BTS is currently only allowed for user-mode. */
462 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
463 return -EOPNOTSUPP;
464 }
465
466 hwc->config |= config;
467
468 return 0;
469 }
470
471 static void x86_pmu_disable_all(void)
472 {
473 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
474 int idx;
475
476 for (idx = 0; idx < x86_pmu.num_events; idx++) {
477 u64 val;
478
479 if (!test_bit(idx, cpuc->active_mask))
480 continue;
481 rdmsrl(x86_pmu.eventsel + idx, val);
482 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
483 continue;
484 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
485 wrmsrl(x86_pmu.eventsel + idx, val);
486 }
487 }
488
489 void hw_perf_disable(void)
490 {
491 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
492
493 if (!x86_pmu_initialized())
494 return;
495
496 if (!cpuc->enabled)
497 return;
498
499 cpuc->n_added = 0;
500 cpuc->enabled = 0;
501 barrier();
502
503 x86_pmu.disable_all();
504 }
505
506 static void x86_pmu_enable_all(void)
507 {
508 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
509 int idx;
510
511 for (idx = 0; idx < x86_pmu.num_events; idx++) {
512 struct perf_event *event = cpuc->events[idx];
513 u64 val;
514
515 if (!test_bit(idx, cpuc->active_mask))
516 continue;
517
518 val = event->hw.config;
519 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
520 wrmsrl(x86_pmu.eventsel + idx, val);
521 }
522 }
523
524 static const struct pmu pmu;
525
526 static inline int is_x86_event(struct perf_event *event)
527 {
528 return event->pmu == &pmu;
529 }
530
531 static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
532 {
533 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
534 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
535 int i, j, w, wmax, num = 0;
536 struct hw_perf_event *hwc;
537
538 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
539
540 for (i = 0; i < n; i++) {
541 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
542 constraints[i] = c;
543 }
544
545 /*
546 * fastpath, try to reuse previous register
547 */
548 for (i = 0; i < n; i++) {
549 hwc = &cpuc->event_list[i]->hw;
550 c = constraints[i];
551
552 /* never assigned */
553 if (hwc->idx == -1)
554 break;
555
556 /* constraint still honored */
557 if (!test_bit(hwc->idx, c->idxmsk))
558 break;
559
560 /* not already used */
561 if (test_bit(hwc->idx, used_mask))
562 break;
563
564 __set_bit(hwc->idx, used_mask);
565 if (assign)
566 assign[i] = hwc->idx;
567 }
568 if (i == n)
569 goto done;
570
571 /*
572 * begin slow path
573 */
574
575 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
576
577 /*
578 * weight = number of possible counters
579 *
580 * 1 = most constrained, only works on one counter
581 * wmax = least constrained, works on any counter
582 *
583 * assign events to counters starting with most
584 * constrained events.
585 */
586 wmax = x86_pmu.num_events;
587
588 /*
589 * when fixed event counters are present,
590 * wmax is incremented by 1 to account
591 * for one more choice
592 */
593 if (x86_pmu.num_events_fixed)
594 wmax++;
595
596 for (w = 1, num = n; num && w <= wmax; w++) {
597 /* for each event */
598 for (i = 0; num && i < n; i++) {
599 c = constraints[i];
600 hwc = &cpuc->event_list[i]->hw;
601
602 if (c->weight != w)
603 continue;
604
605 for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
606 if (!test_bit(j, used_mask))
607 break;
608 }
609
610 if (j == X86_PMC_IDX_MAX)
611 break;
612
613 __set_bit(j, used_mask);
614
615 if (assign)
616 assign[i] = j;
617 num--;
618 }
619 }
620 done:
621 /*
622 * scheduling failed or is just a simulation,
623 * free resources if necessary
624 */
625 if (!assign || num) {
626 for (i = 0; i < n; i++) {
627 if (x86_pmu.put_event_constraints)
628 x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
629 }
630 }
631 return num ? -ENOSPC : 0;
632 }
633
634 /*
635 * dogrp: true if must collect siblings events (group)
636 * returns total number of events and error code
637 */
638 static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp)
639 {
640 struct perf_event *event;
641 int n, max_count;
642
643 max_count = x86_pmu.num_events + x86_pmu.num_events_fixed;
644
645 /* current number of events already accepted */
646 n = cpuc->n_events;
647
648 if (is_x86_event(leader)) {
649 if (n >= max_count)
650 return -ENOSPC;
651 cpuc->event_list[n] = leader;
652 n++;
653 }
654 if (!dogrp)
655 return n;
656
657 list_for_each_entry(event, &leader->sibling_list, group_entry) {
658 if (!is_x86_event(event) ||
659 event->state <= PERF_EVENT_STATE_OFF)
660 continue;
661
662 if (n >= max_count)
663 return -ENOSPC;
664
665 cpuc->event_list[n] = event;
666 n++;
667 }
668 return n;
669 }
670
671 static inline void x86_assign_hw_event(struct perf_event *event,
672 struct cpu_hw_events *cpuc, int i)
673 {
674 struct hw_perf_event *hwc = &event->hw;
675
676 hwc->idx = cpuc->assign[i];
677 hwc->last_cpu = smp_processor_id();
678 hwc->last_tag = ++cpuc->tags[i];
679
680 if (hwc->idx == X86_PMC_IDX_FIXED_BTS) {
681 hwc->config_base = 0;
682 hwc->event_base = 0;
683 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
684 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
685 /*
686 * We set it so that event_base + idx in wrmsr/rdmsr maps to
687 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
688 */
689 hwc->event_base =
690 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
691 } else {
692 hwc->config_base = x86_pmu.eventsel;
693 hwc->event_base = x86_pmu.perfctr;
694 }
695 }
696
697 static inline int match_prev_assignment(struct hw_perf_event *hwc,
698 struct cpu_hw_events *cpuc,
699 int i)
700 {
701 return hwc->idx == cpuc->assign[i] &&
702 hwc->last_cpu == smp_processor_id() &&
703 hwc->last_tag == cpuc->tags[i];
704 }
705
706 static int x86_pmu_start(struct perf_event *event);
707 static void x86_pmu_stop(struct perf_event *event);
708
709 void hw_perf_enable(void)
710 {
711 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
712 struct perf_event *event;
713 struct hw_perf_event *hwc;
714 int i;
715
716 if (!x86_pmu_initialized())
717 return;
718
719 if (cpuc->enabled)
720 return;
721
722 if (cpuc->n_added) {
723 int n_running = cpuc->n_events - cpuc->n_added;
724 /*
725 * apply assignment obtained either from
726 * hw_perf_group_sched_in() or x86_pmu_enable()
727 *
728 * step1: save events moving to new counters
729 * step2: reprogram moved events into new counters
730 */
731 for (i = 0; i < n_running; i++) {
732
733 event = cpuc->event_list[i];
734 hwc = &event->hw;
735
736 /*
737 * we can avoid reprogramming counter if:
738 * - assigned same counter as last time
739 * - running on same CPU as last time
740 * - no other event has used the counter since
741 */
742 if (hwc->idx == -1 ||
743 match_prev_assignment(hwc, cpuc, i))
744 continue;
745
746 x86_pmu_stop(event);
747
748 hwc->idx = -1;
749 }
750
751 for (i = 0; i < cpuc->n_events; i++) {
752
753 event = cpuc->event_list[i];
754 hwc = &event->hw;
755
756 if (i < n_running &&
757 match_prev_assignment(hwc, cpuc, i))
758 continue;
759
760 if (hwc->idx == -1)
761 x86_assign_hw_event(event, cpuc, i);
762
763 x86_pmu_start(event);
764 }
765 cpuc->n_added = 0;
766 perf_events_lapic_init();
767 }
768
769 cpuc->enabled = 1;
770 barrier();
771
772 x86_pmu.enable_all();
773 }
774
775 static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
776 {
777 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
778 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
779 }
780
781 static inline void x86_pmu_disable_event(struct perf_event *event)
782 {
783 struct hw_perf_event *hwc = &event->hw;
784 (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config);
785 }
786
787 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
788
789 /*
790 * Set the next IRQ period, based on the hwc->period_left value.
791 * To be called with the event disabled in hw:
792 */
793 static int
794 x86_perf_event_set_period(struct perf_event *event)
795 {
796 struct hw_perf_event *hwc = &event->hw;
797 s64 left = atomic64_read(&hwc->period_left);
798 s64 period = hwc->sample_period;
799 int err, ret = 0, idx = hwc->idx;
800
801 if (idx == X86_PMC_IDX_FIXED_BTS)
802 return 0;
803
804 /*
805 * If we are way outside a reasonable range then just skip forward:
806 */
807 if (unlikely(left <= -period)) {
808 left = period;
809 atomic64_set(&hwc->period_left, left);
810 hwc->last_period = period;
811 ret = 1;
812 }
813
814 if (unlikely(left <= 0)) {
815 left += period;
816 atomic64_set(&hwc->period_left, left);
817 hwc->last_period = period;
818 ret = 1;
819 }
820 /*
821 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
822 */
823 if (unlikely(left < 2))
824 left = 2;
825
826 if (left > x86_pmu.max_period)
827 left = x86_pmu.max_period;
828
829 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
830
831 /*
832 * The hw event starts counting from this event offset,
833 * mark it to be able to extra future deltas:
834 */
835 atomic64_set(&hwc->prev_count, (u64)-left);
836
837 err = checking_wrmsrl(hwc->event_base + idx,
838 (u64)(-left) & x86_pmu.event_mask);
839
840 perf_event_update_userpage(event);
841
842 return ret;
843 }
844
845 static void x86_pmu_enable_event(struct perf_event *event)
846 {
847 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
848 if (cpuc->enabled)
849 __x86_pmu_enable_event(&event->hw);
850 }
851
852 /*
853 * activate a single event
854 *
855 * The event is added to the group of enabled events
856 * but only if it can be scehduled with existing events.
857 *
858 * Called with PMU disabled. If successful and return value 1,
859 * then guaranteed to call perf_enable() and hw_perf_enable()
860 */
861 static int x86_pmu_enable(struct perf_event *event)
862 {
863 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
864 struct hw_perf_event *hwc;
865 int assign[X86_PMC_IDX_MAX];
866 int n, n0, ret;
867
868 hwc = &event->hw;
869
870 n0 = cpuc->n_events;
871 n = collect_events(cpuc, event, false);
872 if (n < 0)
873 return n;
874
875 ret = x86_schedule_events(cpuc, n, assign);
876 if (ret)
877 return ret;
878 /*
879 * copy new assignment, now we know it is possible
880 * will be used by hw_perf_enable()
881 */
882 memcpy(cpuc->assign, assign, n*sizeof(int));
883
884 cpuc->n_events = n;
885 cpuc->n_added += n - n0;
886
887 return 0;
888 }
889
890 static int x86_pmu_start(struct perf_event *event)
891 {
892 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
893 int idx = event->hw.idx;
894
895 if (idx == -1)
896 return -EAGAIN;
897
898 x86_perf_event_set_period(event);
899 cpuc->events[idx] = event;
900 __set_bit(idx, cpuc->active_mask);
901 x86_pmu.enable(event);
902 perf_event_update_userpage(event);
903
904 return 0;
905 }
906
907 static void x86_pmu_unthrottle(struct perf_event *event)
908 {
909 int ret = x86_pmu_start(event);
910 WARN_ON_ONCE(ret);
911 }
912
913 void perf_event_print_debug(void)
914 {
915 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
916 u64 pebs;
917 struct cpu_hw_events *cpuc;
918 unsigned long flags;
919 int cpu, idx;
920
921 if (!x86_pmu.num_events)
922 return;
923
924 local_irq_save(flags);
925
926 cpu = smp_processor_id();
927 cpuc = &per_cpu(cpu_hw_events, cpu);
928
929 if (x86_pmu.version >= 2) {
930 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
931 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
932 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
933 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
934 rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
935
936 pr_info("\n");
937 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
938 pr_info("CPU#%d: status: %016llx\n", cpu, status);
939 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
940 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
941 pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
942 }
943 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
944
945 for (idx = 0; idx < x86_pmu.num_events; idx++) {
946 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
947 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
948
949 prev_left = per_cpu(pmc_prev_left[idx], cpu);
950
951 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
952 cpu, idx, pmc_ctrl);
953 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
954 cpu, idx, pmc_count);
955 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
956 cpu, idx, prev_left);
957 }
958 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
959 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
960
961 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
962 cpu, idx, pmc_count);
963 }
964 local_irq_restore(flags);
965 }
966
967 static void x86_pmu_stop(struct perf_event *event)
968 {
969 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
970 struct hw_perf_event *hwc = &event->hw;
971 int idx = hwc->idx;
972
973 if (!__test_and_clear_bit(idx, cpuc->active_mask))
974 return;
975
976 x86_pmu.disable(event);
977
978 /*
979 * Drain the remaining delta count out of a event
980 * that we are disabling:
981 */
982 x86_perf_event_update(event);
983
984 cpuc->events[idx] = NULL;
985 }
986
987 static void x86_pmu_disable(struct perf_event *event)
988 {
989 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
990 int i;
991
992 x86_pmu_stop(event);
993
994 for (i = 0; i < cpuc->n_events; i++) {
995 if (event == cpuc->event_list[i]) {
996
997 if (x86_pmu.put_event_constraints)
998 x86_pmu.put_event_constraints(cpuc, event);
999
1000 while (++i < cpuc->n_events)
1001 cpuc->event_list[i-1] = cpuc->event_list[i];
1002
1003 --cpuc->n_events;
1004 break;
1005 }
1006 }
1007 perf_event_update_userpage(event);
1008 }
1009
1010 static int x86_pmu_handle_irq(struct pt_regs *regs)
1011 {
1012 struct perf_sample_data data;
1013 struct cpu_hw_events *cpuc;
1014 struct perf_event *event;
1015 struct hw_perf_event *hwc;
1016 int idx, handled = 0;
1017 u64 val;
1018
1019 perf_sample_data_init(&data, 0);
1020
1021 cpuc = &__get_cpu_var(cpu_hw_events);
1022
1023 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1024 if (!test_bit(idx, cpuc->active_mask))
1025 continue;
1026
1027 event = cpuc->events[idx];
1028 hwc = &event->hw;
1029
1030 val = x86_perf_event_update(event);
1031 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1032 continue;
1033
1034 /*
1035 * event overflow
1036 */
1037 handled = 1;
1038 data.period = event->hw.last_period;
1039
1040 if (!x86_perf_event_set_period(event))
1041 continue;
1042
1043 if (perf_event_overflow(event, 1, &data, regs))
1044 x86_pmu_stop(event);
1045 }
1046
1047 if (handled)
1048 inc_irq_stat(apic_perf_irqs);
1049
1050 return handled;
1051 }
1052
1053 void smp_perf_pending_interrupt(struct pt_regs *regs)
1054 {
1055 irq_enter();
1056 ack_APIC_irq();
1057 inc_irq_stat(apic_pending_irqs);
1058 perf_event_do_pending();
1059 irq_exit();
1060 }
1061
1062 void set_perf_event_pending(void)
1063 {
1064 #ifdef CONFIG_X86_LOCAL_APIC
1065 if (!x86_pmu.apic || !x86_pmu_initialized())
1066 return;
1067
1068 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1069 #endif
1070 }
1071
1072 void perf_events_lapic_init(void)
1073 {
1074 #ifdef CONFIG_X86_LOCAL_APIC
1075 if (!x86_pmu.apic || !x86_pmu_initialized())
1076 return;
1077
1078 /*
1079 * Always use NMI for PMU
1080 */
1081 apic_write(APIC_LVTPC, APIC_DM_NMI);
1082 #endif
1083 }
1084
1085 static int __kprobes
1086 perf_event_nmi_handler(struct notifier_block *self,
1087 unsigned long cmd, void *__args)
1088 {
1089 struct die_args *args = __args;
1090 struct pt_regs *regs;
1091
1092 if (!atomic_read(&active_events))
1093 return NOTIFY_DONE;
1094
1095 switch (cmd) {
1096 case DIE_NMI:
1097 case DIE_NMI_IPI:
1098 break;
1099
1100 default:
1101 return NOTIFY_DONE;
1102 }
1103
1104 regs = args->regs;
1105
1106 #ifdef CONFIG_X86_LOCAL_APIC
1107 apic_write(APIC_LVTPC, APIC_DM_NMI);
1108 #endif
1109 /*
1110 * Can't rely on the handled return value to say it was our NMI, two
1111 * events could trigger 'simultaneously' raising two back-to-back NMIs.
1112 *
1113 * If the first NMI handles both, the latter will be empty and daze
1114 * the CPU.
1115 */
1116 x86_pmu.handle_irq(regs);
1117
1118 return NOTIFY_STOP;
1119 }
1120
1121 static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1122 .notifier_call = perf_event_nmi_handler,
1123 .next = NULL,
1124 .priority = 1
1125 };
1126
1127 static struct event_constraint unconstrained;
1128 static struct event_constraint emptyconstraint;
1129
1130 static struct event_constraint *
1131 x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1132 {
1133 struct event_constraint *c;
1134
1135 if (x86_pmu.event_constraints) {
1136 for_each_event_constraint(c, x86_pmu.event_constraints) {
1137 if ((event->hw.config & c->cmask) == c->code)
1138 return c;
1139 }
1140 }
1141
1142 return &unconstrained;
1143 }
1144
1145 static int x86_event_sched_in(struct perf_event *event,
1146 struct perf_cpu_context *cpuctx)
1147 {
1148 int ret = 0;
1149
1150 event->state = PERF_EVENT_STATE_ACTIVE;
1151 event->oncpu = smp_processor_id();
1152 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
1153
1154 if (!is_x86_event(event))
1155 ret = event->pmu->enable(event);
1156
1157 if (!ret && !is_software_event(event))
1158 cpuctx->active_oncpu++;
1159
1160 if (!ret && event->attr.exclusive)
1161 cpuctx->exclusive = 1;
1162
1163 return ret;
1164 }
1165
1166 static void x86_event_sched_out(struct perf_event *event,
1167 struct perf_cpu_context *cpuctx)
1168 {
1169 event->state = PERF_EVENT_STATE_INACTIVE;
1170 event->oncpu = -1;
1171
1172 if (!is_x86_event(event))
1173 event->pmu->disable(event);
1174
1175 event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
1176
1177 if (!is_software_event(event))
1178 cpuctx->active_oncpu--;
1179
1180 if (event->attr.exclusive || !cpuctx->active_oncpu)
1181 cpuctx->exclusive = 0;
1182 }
1183
1184 /*
1185 * Called to enable a whole group of events.
1186 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
1187 * Assumes the caller has disabled interrupts and has
1188 * frozen the PMU with hw_perf_save_disable.
1189 *
1190 * called with PMU disabled. If successful and return value 1,
1191 * then guaranteed to call perf_enable() and hw_perf_enable()
1192 */
1193 int hw_perf_group_sched_in(struct perf_event *leader,
1194 struct perf_cpu_context *cpuctx,
1195 struct perf_event_context *ctx)
1196 {
1197 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1198 struct perf_event *sub;
1199 int assign[X86_PMC_IDX_MAX];
1200 int n0, n1, ret;
1201
1202 /* n0 = total number of events */
1203 n0 = collect_events(cpuc, leader, true);
1204 if (n0 < 0)
1205 return n0;
1206
1207 ret = x86_schedule_events(cpuc, n0, assign);
1208 if (ret)
1209 return ret;
1210
1211 ret = x86_event_sched_in(leader, cpuctx);
1212 if (ret)
1213 return ret;
1214
1215 n1 = 1;
1216 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1217 if (sub->state > PERF_EVENT_STATE_OFF) {
1218 ret = x86_event_sched_in(sub, cpuctx);
1219 if (ret)
1220 goto undo;
1221 ++n1;
1222 }
1223 }
1224 /*
1225 * copy new assignment, now we know it is possible
1226 * will be used by hw_perf_enable()
1227 */
1228 memcpy(cpuc->assign, assign, n0*sizeof(int));
1229
1230 cpuc->n_events = n0;
1231 cpuc->n_added += n1;
1232 ctx->nr_active += n1;
1233
1234 /*
1235 * 1 means successful and events are active
1236 * This is not quite true because we defer
1237 * actual activation until hw_perf_enable() but
1238 * this way we* ensure caller won't try to enable
1239 * individual events
1240 */
1241 return 1;
1242 undo:
1243 x86_event_sched_out(leader, cpuctx);
1244 n0 = 1;
1245 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1246 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
1247 x86_event_sched_out(sub, cpuctx);
1248 if (++n0 == n1)
1249 break;
1250 }
1251 }
1252 return ret;
1253 }
1254
1255 #include "perf_event_amd.c"
1256 #include "perf_event_p6.c"
1257 #include "perf_event_intel_lbr.c"
1258 #include "perf_event_intel_ds.c"
1259 #include "perf_event_intel.c"
1260
1261 static int __cpuinit
1262 x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1263 {
1264 unsigned int cpu = (long)hcpu;
1265
1266 switch (action & ~CPU_TASKS_FROZEN) {
1267 case CPU_UP_PREPARE:
1268 if (x86_pmu.cpu_prepare)
1269 x86_pmu.cpu_prepare(cpu);
1270 break;
1271
1272 case CPU_STARTING:
1273 if (x86_pmu.cpu_starting)
1274 x86_pmu.cpu_starting(cpu);
1275 break;
1276
1277 case CPU_DYING:
1278 if (x86_pmu.cpu_dying)
1279 x86_pmu.cpu_dying(cpu);
1280 break;
1281
1282 case CPU_DEAD:
1283 if (x86_pmu.cpu_dead)
1284 x86_pmu.cpu_dead(cpu);
1285 break;
1286
1287 default:
1288 break;
1289 }
1290
1291 return NOTIFY_OK;
1292 }
1293
1294 static void __init pmu_check_apic(void)
1295 {
1296 if (cpu_has_apic)
1297 return;
1298
1299 x86_pmu.apic = 0;
1300 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1301 pr_info("no hardware sampling interrupt available.\n");
1302 }
1303
1304 void __init init_hw_perf_events(void)
1305 {
1306 struct event_constraint *c;
1307 int err;
1308
1309 pr_info("Performance Events: ");
1310
1311 switch (boot_cpu_data.x86_vendor) {
1312 case X86_VENDOR_INTEL:
1313 err = intel_pmu_init();
1314 break;
1315 case X86_VENDOR_AMD:
1316 err = amd_pmu_init();
1317 break;
1318 default:
1319 return;
1320 }
1321 if (err != 0) {
1322 pr_cont("no PMU driver, software events only.\n");
1323 return;
1324 }
1325
1326 pmu_check_apic();
1327
1328 pr_cont("%s PMU driver.\n", x86_pmu.name);
1329
1330 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
1331 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
1332 x86_pmu.num_events, X86_PMC_MAX_GENERIC);
1333 x86_pmu.num_events = X86_PMC_MAX_GENERIC;
1334 }
1335 perf_event_mask = (1 << x86_pmu.num_events) - 1;
1336 perf_max_events = x86_pmu.num_events;
1337
1338 if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
1339 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
1340 x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
1341 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
1342 }
1343
1344 perf_event_mask |=
1345 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
1346 x86_pmu.intel_ctrl = perf_event_mask;
1347
1348 perf_events_lapic_init();
1349 register_die_notifier(&perf_event_nmi_notifier);
1350
1351 unconstrained = (struct event_constraint)
1352 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,
1353 0, x86_pmu.num_events);
1354
1355 if (x86_pmu.event_constraints) {
1356 for_each_event_constraint(c, x86_pmu.event_constraints) {
1357 if (c->cmask != INTEL_ARCH_FIXED_MASK)
1358 continue;
1359
1360 c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1;
1361 c->weight += x86_pmu.num_events;
1362 }
1363 }
1364
1365 pr_info("... version: %d\n", x86_pmu.version);
1366 pr_info("... bit width: %d\n", x86_pmu.event_bits);
1367 pr_info("... generic registers: %d\n", x86_pmu.num_events);
1368 pr_info("... value mask: %016Lx\n", x86_pmu.event_mask);
1369 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1370 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed);
1371 pr_info("... event mask: %016Lx\n", perf_event_mask);
1372
1373 perf_cpu_notifier(x86_pmu_notifier);
1374 }
1375
1376 static inline void x86_pmu_read(struct perf_event *event)
1377 {
1378 x86_perf_event_update(event);
1379 }
1380
1381 static const struct pmu pmu = {
1382 .enable = x86_pmu_enable,
1383 .disable = x86_pmu_disable,
1384 .start = x86_pmu_start,
1385 .stop = x86_pmu_stop,
1386 .read = x86_pmu_read,
1387 .unthrottle = x86_pmu_unthrottle,
1388 };
1389
1390 /*
1391 * validate that we can schedule this event
1392 */
1393 static int validate_event(struct perf_event *event)
1394 {
1395 struct cpu_hw_events *fake_cpuc;
1396 struct event_constraint *c;
1397 int ret = 0;
1398
1399 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1400 if (!fake_cpuc)
1401 return -ENOMEM;
1402
1403 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1404
1405 if (!c || !c->weight)
1406 ret = -ENOSPC;
1407
1408 if (x86_pmu.put_event_constraints)
1409 x86_pmu.put_event_constraints(fake_cpuc, event);
1410
1411 kfree(fake_cpuc);
1412
1413 return ret;
1414 }
1415
1416 /*
1417 * validate a single event group
1418 *
1419 * validation include:
1420 * - check events are compatible which each other
1421 * - events do not compete for the same counter
1422 * - number of events <= number of counters
1423 *
1424 * validation ensures the group can be loaded onto the
1425 * PMU if it was the only group available.
1426 */
1427 static int validate_group(struct perf_event *event)
1428 {
1429 struct perf_event *leader = event->group_leader;
1430 struct cpu_hw_events *fake_cpuc;
1431 int ret, n;
1432
1433 ret = -ENOMEM;
1434 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1435 if (!fake_cpuc)
1436 goto out;
1437
1438 /*
1439 * the event is not yet connected with its
1440 * siblings therefore we must first collect
1441 * existing siblings, then add the new event
1442 * before we can simulate the scheduling
1443 */
1444 ret = -ENOSPC;
1445 n = collect_events(fake_cpuc, leader, true);
1446 if (n < 0)
1447 goto out_free;
1448
1449 fake_cpuc->n_events = n;
1450 n = collect_events(fake_cpuc, event, false);
1451 if (n < 0)
1452 goto out_free;
1453
1454 fake_cpuc->n_events = n;
1455
1456 ret = x86_schedule_events(fake_cpuc, n, NULL);
1457
1458 out_free:
1459 kfree(fake_cpuc);
1460 out:
1461 return ret;
1462 }
1463
1464 const struct pmu *hw_perf_event_init(struct perf_event *event)
1465 {
1466 const struct pmu *tmp;
1467 int err;
1468
1469 err = __hw_perf_event_init(event);
1470 if (!err) {
1471 /*
1472 * we temporarily connect event to its pmu
1473 * such that validate_group() can classify
1474 * it as an x86 event using is_x86_event()
1475 */
1476 tmp = event->pmu;
1477 event->pmu = &pmu;
1478
1479 if (event->group_leader != event)
1480 err = validate_group(event);
1481 else
1482 err = validate_event(event);
1483
1484 event->pmu = tmp;
1485 }
1486 if (err) {
1487 if (event->destroy)
1488 event->destroy(event);
1489 return ERR_PTR(err);
1490 }
1491
1492 return &pmu;
1493 }
1494
1495 /*
1496 * callchain support
1497 */
1498
1499 static inline
1500 void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1501 {
1502 if (entry->nr < PERF_MAX_STACK_DEPTH)
1503 entry->ip[entry->nr++] = ip;
1504 }
1505
1506 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
1507 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
1508
1509
1510 static void
1511 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
1512 {
1513 /* Ignore warnings */
1514 }
1515
1516 static void backtrace_warning(void *data, char *msg)
1517 {
1518 /* Ignore warnings */
1519 }
1520
1521 static int backtrace_stack(void *data, char *name)
1522 {
1523 return 0;
1524 }
1525
1526 static void backtrace_address(void *data, unsigned long addr, int reliable)
1527 {
1528 struct perf_callchain_entry *entry = data;
1529
1530 if (reliable)
1531 callchain_store(entry, addr);
1532 }
1533
1534 static const struct stacktrace_ops backtrace_ops = {
1535 .warning = backtrace_warning,
1536 .warning_symbol = backtrace_warning_symbol,
1537 .stack = backtrace_stack,
1538 .address = backtrace_address,
1539 .walk_stack = print_context_stack_bp,
1540 };
1541
1542 #include "../dumpstack.h"
1543
1544 static void
1545 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1546 {
1547 callchain_store(entry, PERF_CONTEXT_KERNEL);
1548 callchain_store(entry, regs->ip);
1549
1550 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1551 }
1552
1553 /*
1554 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1555 */
1556 static unsigned long
1557 copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1558 {
1559 unsigned long offset, addr = (unsigned long)from;
1560 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1561 unsigned long size, len = 0;
1562 struct page *page;
1563 void *map;
1564 int ret;
1565
1566 do {
1567 ret = __get_user_pages_fast(addr, 1, 0, &page);
1568 if (!ret)
1569 break;
1570
1571 offset = addr & (PAGE_SIZE - 1);
1572 size = min(PAGE_SIZE - offset, n - len);
1573
1574 map = kmap_atomic(page, type);
1575 memcpy(to, map+offset, size);
1576 kunmap_atomic(map, type);
1577 put_page(page);
1578
1579 len += size;
1580 to += size;
1581 addr += size;
1582
1583 } while (len < n);
1584
1585 return len;
1586 }
1587
1588 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
1589 {
1590 unsigned long bytes;
1591
1592 bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
1593
1594 return bytes == sizeof(*frame);
1595 }
1596
1597 static void
1598 perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1599 {
1600 struct stack_frame frame;
1601 const void __user *fp;
1602
1603 if (!user_mode(regs))
1604 regs = task_pt_regs(current);
1605
1606 fp = (void __user *)regs->bp;
1607
1608 callchain_store(entry, PERF_CONTEXT_USER);
1609 callchain_store(entry, regs->ip);
1610
1611 while (entry->nr < PERF_MAX_STACK_DEPTH) {
1612 frame.next_frame = NULL;
1613 frame.return_address = 0;
1614
1615 if (!copy_stack_frame(fp, &frame))
1616 break;
1617
1618 if ((unsigned long)fp < regs->sp)
1619 break;
1620
1621 callchain_store(entry, frame.return_address);
1622 fp = frame.next_frame;
1623 }
1624 }
1625
1626 static void
1627 perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1628 {
1629 int is_user;
1630
1631 if (!regs)
1632 return;
1633
1634 is_user = user_mode(regs);
1635
1636 if (is_user && current->state != TASK_RUNNING)
1637 return;
1638
1639 if (!is_user)
1640 perf_callchain_kernel(regs, entry);
1641
1642 if (current->mm)
1643 perf_callchain_user(regs, entry);
1644 }
1645
1646 struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1647 {
1648 struct perf_callchain_entry *entry;
1649
1650 if (in_nmi())
1651 entry = &__get_cpu_var(pmc_nmi_entry);
1652 else
1653 entry = &__get_cpu_var(pmc_irq_entry);
1654
1655 entry->nr = 0;
1656
1657 perf_do_callchain(regs, entry);
1658
1659 return entry;
1660 }
This page took 0.063333 seconds and 4 git commands to generate.