Commit | Line | Data |
---|---|---|
0793a61d TG |
1 | /* |
2 | * Performance counters: | |
3 | * | |
4 | * Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de> | |
5 | * Copyright(C) 2008, Red Hat, Inc., Ingo Molnar | |
6 | * | |
7 | * Data type definitions, declarations, prototypes. | |
8 | * | |
9 | * Started by: Thomas Gleixner and Ingo Molnar | |
10 | * | |
11 | * For licencing details see kernel-base/COPYING | |
12 | */ | |
13 | #ifndef _LINUX_PERF_COUNTER_H | |
14 | #define _LINUX_PERF_COUNTER_H | |
15 | ||
f3dfd265 PM |
16 | #include <linux/types.h> |
17 | #include <linux/ioctl.h> | |
9aaa131a | 18 | #include <asm/byteorder.h> |
0793a61d TG |
19 | |
20 | /* | |
9f66a381 IM |
21 | * User-space ABI bits: |
22 | */ | |
23 | ||
24 | /* | |
b8e83514 | 25 | * hw_event.type |
0793a61d | 26 | */ |
b8e83514 PZ |
27 | enum perf_event_types { |
28 | PERF_TYPE_HARDWARE = 0, | |
29 | PERF_TYPE_SOFTWARE = 1, | |
30 | PERF_TYPE_TRACEPOINT = 2, | |
31 | ||
0793a61d | 32 | /* |
b8e83514 | 33 | * available TYPE space, raw is the max value. |
0793a61d | 34 | */ |
9f66a381 | 35 | |
b8e83514 PZ |
36 | PERF_TYPE_RAW = 128, |
37 | }; | |
6c594c21 | 38 | |
b8e83514 PZ |
39 | /* |
40 | * Generalized performance counter event types, used by the hw_event.event_id | |
41 | * parameter of the sys_perf_counter_open() syscall: | |
42 | */ | |
43 | enum hw_event_ids { | |
9f66a381 | 44 | /* |
b8e83514 | 45 | * Common hardware events, generalized by the kernel: |
9f66a381 | 46 | */ |
b8e83514 PZ |
47 | PERF_COUNT_CPU_CYCLES = 0, |
48 | PERF_COUNT_INSTRUCTIONS = 1, | |
49 | PERF_COUNT_CACHE_REFERENCES = 2, | |
50 | PERF_COUNT_CACHE_MISSES = 3, | |
51 | PERF_COUNT_BRANCH_INSTRUCTIONS = 4, | |
52 | PERF_COUNT_BRANCH_MISSES = 5, | |
53 | PERF_COUNT_BUS_CYCLES = 6, | |
54 | ||
55 | PERF_HW_EVENTS_MAX = 7, | |
56 | }; | |
e077df4f | 57 | |
b8e83514 PZ |
58 | /* |
59 | * Special "software" counters provided by the kernel, even if the hardware | |
60 | * does not support performance counters. These counters measure various | |
61 | * physical and sw events of the kernel (and allow the profiling of them as | |
62 | * well): | |
63 | */ | |
64 | enum sw_event_ids { | |
65 | PERF_COUNT_CPU_CLOCK = 0, | |
66 | PERF_COUNT_TASK_CLOCK = 1, | |
67 | PERF_COUNT_PAGE_FAULTS = 2, | |
68 | PERF_COUNT_CONTEXT_SWITCHES = 3, | |
69 | PERF_COUNT_CPU_MIGRATIONS = 4, | |
70 | PERF_COUNT_PAGE_FAULTS_MIN = 5, | |
71 | PERF_COUNT_PAGE_FAULTS_MAJ = 6, | |
72 | ||
73 | PERF_SW_EVENTS_MAX = 7, | |
0793a61d TG |
74 | }; |
75 | ||
76 | /* | |
77 | * IRQ-notification data record type: | |
78 | */ | |
9f66a381 | 79 | enum perf_counter_record_type { |
b8e83514 PZ |
80 | PERF_RECORD_SIMPLE = 0, |
81 | PERF_RECORD_IRQ = 1, | |
82 | PERF_RECORD_GROUP = 2, | |
0793a61d TG |
83 | }; |
84 | ||
f4a2deb4 PZ |
85 | #define __PERF_COUNTER_MASK(name) \ |
86 | (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ | |
87 | PERF_COUNTER_##name##_SHIFT) | |
88 | ||
89 | #define PERF_COUNTER_RAW_BITS 1 | |
90 | #define PERF_COUNTER_RAW_SHIFT 63 | |
91 | #define PERF_COUNTER_RAW_MASK __PERF_COUNTER_MASK(RAW) | |
92 | ||
93 | #define PERF_COUNTER_CONFIG_BITS 63 | |
94 | #define PERF_COUNTER_CONFIG_SHIFT 0 | |
95 | #define PERF_COUNTER_CONFIG_MASK __PERF_COUNTER_MASK(CONFIG) | |
96 | ||
97 | #define PERF_COUNTER_TYPE_BITS 7 | |
98 | #define PERF_COUNTER_TYPE_SHIFT 56 | |
99 | #define PERF_COUNTER_TYPE_MASK __PERF_COUNTER_MASK(TYPE) | |
100 | ||
101 | #define PERF_COUNTER_EVENT_BITS 56 | |
102 | #define PERF_COUNTER_EVENT_SHIFT 0 | |
103 | #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) | |
104 | ||
53cfbf59 PM |
105 | /* |
106 | * Bits that can be set in hw_event.read_format to request that | |
107 | * reads on the counter should return the indicated quantities, | |
108 | * in increasing order of bit value, after the counter value. | |
109 | */ | |
110 | enum perf_counter_read_format { | |
111 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1, | |
112 | PERF_FORMAT_TOTAL_TIME_RUNNING = 2, | |
113 | }; | |
114 | ||
9f66a381 IM |
115 | /* |
116 | * Hardware event to monitor via a performance monitoring counter: | |
117 | */ | |
118 | struct perf_counter_hw_event { | |
f4a2deb4 PZ |
119 | /* |
120 | * The MSB of the config word signifies if the rest contains cpu | |
121 | * specific (raw) counter configuration data, if unset, the next | |
122 | * 7 bits are an event type and the rest of the bits are the event | |
123 | * identifier. | |
124 | */ | |
125 | __u64 config; | |
9f66a381 | 126 | |
f3dfd265 | 127 | __u64 irq_period; |
2743a5b0 PM |
128 | __u64 record_type; |
129 | __u64 read_format; | |
9f66a381 | 130 | |
2743a5b0 | 131 | __u64 disabled : 1, /* off by default */ |
0475f9ea | 132 | nmi : 1, /* NMI sampling */ |
0475f9ea PM |
133 | inherit : 1, /* children inherit it */ |
134 | pinned : 1, /* must always be on PMU */ | |
135 | exclusive : 1, /* only group on PMU */ | |
136 | exclude_user : 1, /* don't count user */ | |
137 | exclude_kernel : 1, /* ditto kernel */ | |
138 | exclude_hv : 1, /* ditto hypervisor */ | |
2743a5b0 | 139 | exclude_idle : 1, /* don't count when idle */ |
ea5d20cf | 140 | include_tid : 1, /* include the tid */ |
0475f9ea | 141 | |
ea5d20cf | 142 | __reserved_1 : 54; |
2743a5b0 PM |
143 | |
144 | __u32 extra_config_len; | |
145 | __u32 __reserved_4; | |
9f66a381 | 146 | |
f3dfd265 | 147 | __u64 __reserved_2; |
2743a5b0 | 148 | __u64 __reserved_3; |
eab656ae TG |
149 | }; |
150 | ||
d859e29f PM |
151 | /* |
152 | * Ioctls that can be done on a perf counter fd: | |
153 | */ | |
154 | #define PERF_COUNTER_IOC_ENABLE _IO('$', 0) | |
155 | #define PERF_COUNTER_IOC_DISABLE _IO('$', 1) | |
156 | ||
37d81828 PM |
157 | /* |
158 | * Structure of the page that can be mapped via mmap | |
159 | */ | |
160 | struct perf_counter_mmap_page { | |
161 | __u32 version; /* version number of this structure */ | |
162 | __u32 compat_version; /* lowest version this is compat with */ | |
38ff667b PZ |
163 | |
164 | /* | |
165 | * Bits needed to read the hw counters in user-space. | |
166 | * | |
167 | * The index and offset should be read atomically using the seqlock: | |
168 | * | |
169 | * __u32 seq, index; | |
170 | * __s64 offset; | |
171 | * | |
172 | * again: | |
173 | * rmb(); | |
174 | * seq = pc->lock; | |
175 | * | |
176 | * if (unlikely(seq & 1)) { | |
177 | * cpu_relax(); | |
178 | * goto again; | |
179 | * } | |
180 | * | |
181 | * index = pc->index; | |
182 | * offset = pc->offset; | |
183 | * | |
184 | * rmb(); | |
185 | * if (pc->lock != seq) | |
186 | * goto again; | |
187 | * | |
188 | * After this, index contains architecture specific counter index + 1, | |
189 | * so that 0 means unavailable, offset contains the value to be added | |
190 | * to the result of the raw timer read to obtain this counter's value. | |
191 | */ | |
37d81828 PM |
192 | __u32 lock; /* seqlock for synchronization */ |
193 | __u32 index; /* hardware counter identifier */ | |
194 | __s64 offset; /* add to hardware counter value */ | |
7b732a75 | 195 | |
38ff667b PZ |
196 | /* |
197 | * Control data for the mmap() data buffer. | |
198 | * | |
199 | * User-space reading this value should issue an rmb(), on SMP capable | |
200 | * platforms, after reading this value -- see perf_counter_wakeup(). | |
201 | */ | |
7b732a75 | 202 | __u32 data_head; /* head in the data section */ |
37d81828 PM |
203 | }; |
204 | ||
5c148194 PZ |
205 | struct perf_event_header { |
206 | __u32 type; | |
207 | __u32 size; | |
208 | }; | |
209 | ||
210 | enum perf_event_type { | |
211 | PERF_EVENT_IP = 0, | |
212 | PERF_EVENT_GROUP = 1, | |
ea5d20cf PZ |
213 | |
214 | __PERF_EVENT_TID = 0x100, | |
5c148194 PZ |
215 | }; |
216 | ||
f3dfd265 | 217 | #ifdef __KERNEL__ |
9f66a381 | 218 | /* |
f3dfd265 | 219 | * Kernel-internal data types and definitions: |
9f66a381 IM |
220 | */ |
221 | ||
f3dfd265 PM |
222 | #ifdef CONFIG_PERF_COUNTERS |
223 | # include <asm/perf_counter.h> | |
224 | #endif | |
225 | ||
226 | #include <linux/list.h> | |
227 | #include <linux/mutex.h> | |
228 | #include <linux/rculist.h> | |
229 | #include <linux/rcupdate.h> | |
230 | #include <linux/spinlock.h> | |
d6d020e9 | 231 | #include <linux/hrtimer.h> |
f3dfd265 PM |
232 | #include <asm/atomic.h> |
233 | ||
234 | struct task_struct; | |
235 | ||
f4a2deb4 PZ |
236 | static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event) |
237 | { | |
238 | return hw_event->config & PERF_COUNTER_RAW_MASK; | |
239 | } | |
240 | ||
241 | static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event) | |
242 | { | |
243 | return hw_event->config & PERF_COUNTER_CONFIG_MASK; | |
244 | } | |
245 | ||
246 | static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event) | |
247 | { | |
248 | return (hw_event->config & PERF_COUNTER_TYPE_MASK) >> | |
249 | PERF_COUNTER_TYPE_SHIFT; | |
250 | } | |
251 | ||
252 | static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event) | |
253 | { | |
254 | return hw_event->config & PERF_COUNTER_EVENT_MASK; | |
255 | } | |
256 | ||
0793a61d | 257 | /** |
9f66a381 | 258 | * struct hw_perf_counter - performance counter hardware details: |
0793a61d TG |
259 | */ |
260 | struct hw_perf_counter { | |
ee06094f | 261 | #ifdef CONFIG_PERF_COUNTERS |
d6d020e9 PZ |
262 | union { |
263 | struct { /* hardware */ | |
264 | u64 config; | |
265 | unsigned long config_base; | |
266 | unsigned long counter_base; | |
267 | int nmi; | |
268 | unsigned int idx; | |
269 | }; | |
270 | union { /* software */ | |
271 | atomic64_t count; | |
272 | struct hrtimer hrtimer; | |
273 | }; | |
274 | }; | |
ee06094f | 275 | atomic64_t prev_count; |
9f66a381 | 276 | u64 irq_period; |
ee06094f IM |
277 | atomic64_t period_left; |
278 | #endif | |
0793a61d TG |
279 | }; |
280 | ||
621a01ea IM |
281 | struct perf_counter; |
282 | ||
283 | /** | |
284 | * struct hw_perf_counter_ops - performance counter hw ops | |
285 | */ | |
286 | struct hw_perf_counter_ops { | |
95cdd2e7 | 287 | int (*enable) (struct perf_counter *counter); |
7671581f IM |
288 | void (*disable) (struct perf_counter *counter); |
289 | void (*read) (struct perf_counter *counter); | |
621a01ea IM |
290 | }; |
291 | ||
6a930700 IM |
292 | /** |
293 | * enum perf_counter_active_state - the states of a counter | |
294 | */ | |
295 | enum perf_counter_active_state { | |
3b6f9e5c | 296 | PERF_COUNTER_STATE_ERROR = -2, |
6a930700 IM |
297 | PERF_COUNTER_STATE_OFF = -1, |
298 | PERF_COUNTER_STATE_INACTIVE = 0, | |
299 | PERF_COUNTER_STATE_ACTIVE = 1, | |
300 | }; | |
301 | ||
9b51f66d IM |
302 | struct file; |
303 | ||
7b732a75 PZ |
304 | struct perf_mmap_data { |
305 | struct rcu_head rcu_head; | |
306 | int nr_pages; | |
c7138f37 | 307 | atomic_t wakeup; |
7b732a75 PZ |
308 | atomic_t head; |
309 | struct perf_counter_mmap_page *user_page; | |
310 | void *data_pages[0]; | |
311 | }; | |
312 | ||
925d519a PZ |
313 | struct perf_wakeup_entry { |
314 | struct perf_wakeup_entry *next; | |
315 | }; | |
316 | ||
0793a61d TG |
317 | /** |
318 | * struct perf_counter - performance counter kernel representation: | |
319 | */ | |
320 | struct perf_counter { | |
ee06094f | 321 | #ifdef CONFIG_PERF_COUNTERS |
04289bb9 | 322 | struct list_head list_entry; |
592903cd | 323 | struct list_head event_entry; |
04289bb9 | 324 | struct list_head sibling_list; |
5c148194 | 325 | int nr_siblings; |
04289bb9 | 326 | struct perf_counter *group_leader; |
5c92d124 | 327 | const struct hw_perf_counter_ops *hw_ops; |
04289bb9 | 328 | |
6a930700 | 329 | enum perf_counter_active_state state; |
c07c99b6 | 330 | enum perf_counter_active_state prev_state; |
0793a61d | 331 | atomic64_t count; |
ee06094f | 332 | |
53cfbf59 PM |
333 | /* |
334 | * These are the total time in nanoseconds that the counter | |
335 | * has been enabled (i.e. eligible to run, and the task has | |
336 | * been scheduled in, if this is a per-task counter) | |
337 | * and running (scheduled onto the CPU), respectively. | |
338 | * | |
339 | * They are computed from tstamp_enabled, tstamp_running and | |
340 | * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. | |
341 | */ | |
342 | u64 total_time_enabled; | |
343 | u64 total_time_running; | |
344 | ||
345 | /* | |
346 | * These are timestamps used for computing total_time_enabled | |
347 | * and total_time_running when the counter is in INACTIVE or | |
348 | * ACTIVE state, measured in nanoseconds from an arbitrary point | |
349 | * in time. | |
350 | * tstamp_enabled: the notional time when the counter was enabled | |
351 | * tstamp_running: the notional time when the counter was scheduled on | |
352 | * tstamp_stopped: in INACTIVE state, the notional time when the | |
353 | * counter was scheduled off. | |
354 | */ | |
355 | u64 tstamp_enabled; | |
356 | u64 tstamp_running; | |
357 | u64 tstamp_stopped; | |
358 | ||
9f66a381 | 359 | struct perf_counter_hw_event hw_event; |
0793a61d TG |
360 | struct hw_perf_counter hw; |
361 | ||
362 | struct perf_counter_context *ctx; | |
363 | struct task_struct *task; | |
9b51f66d | 364 | struct file *filp; |
0793a61d | 365 | |
9b51f66d | 366 | struct perf_counter *parent; |
d859e29f PM |
367 | struct list_head child_list; |
368 | ||
53cfbf59 PM |
369 | /* |
370 | * These accumulate total time (in nanoseconds) that children | |
371 | * counters have been enabled and running, respectively. | |
372 | */ | |
373 | atomic64_t child_total_time_enabled; | |
374 | atomic64_t child_total_time_running; | |
375 | ||
0793a61d | 376 | /* |
d859e29f | 377 | * Protect attach/detach and child_list: |
0793a61d TG |
378 | */ |
379 | struct mutex mutex; | |
380 | ||
381 | int oncpu; | |
382 | int cpu; | |
383 | ||
7b732a75 PZ |
384 | /* mmap bits */ |
385 | struct mutex mmap_mutex; | |
386 | atomic_t mmap_count; | |
387 | struct perf_mmap_data *data; | |
37d81828 | 388 | |
7b732a75 | 389 | /* poll related */ |
0793a61d TG |
390 | wait_queue_head_t waitq; |
391 | /* optional: for NMIs */ | |
925d519a | 392 | struct perf_wakeup_entry wakeup; |
592903cd | 393 | |
e077df4f | 394 | void (*destroy)(struct perf_counter *); |
592903cd | 395 | struct rcu_head rcu_head; |
ee06094f | 396 | #endif |
0793a61d TG |
397 | }; |
398 | ||
399 | /** | |
400 | * struct perf_counter_context - counter context structure | |
401 | * | |
402 | * Used as a container for task counters and CPU counters as well: | |
403 | */ | |
404 | struct perf_counter_context { | |
405 | #ifdef CONFIG_PERF_COUNTERS | |
406 | /* | |
d859e29f PM |
407 | * Protect the states of the counters in the list, |
408 | * nr_active, and the list: | |
0793a61d TG |
409 | */ |
410 | spinlock_t lock; | |
d859e29f PM |
411 | /* |
412 | * Protect the list of counters. Locking either mutex or lock | |
413 | * is sufficient to ensure the list doesn't change; to change | |
414 | * the list you need to lock both the mutex and the spinlock. | |
415 | */ | |
416 | struct mutex mutex; | |
04289bb9 IM |
417 | |
418 | struct list_head counter_list; | |
592903cd | 419 | struct list_head event_list; |
0793a61d TG |
420 | int nr_counters; |
421 | int nr_active; | |
d859e29f | 422 | int is_active; |
0793a61d | 423 | struct task_struct *task; |
53cfbf59 PM |
424 | |
425 | /* | |
426 | * time_now is the current time in nanoseconds since an arbitrary | |
427 | * point in the past. For per-task counters, this is based on the | |
428 | * task clock, and for per-cpu counters it is based on the cpu clock. | |
429 | * time_lost is an offset from the task/cpu clock, used to make it | |
430 | * appear that time only passes while the context is scheduled in. | |
431 | */ | |
432 | u64 time_now; | |
433 | u64 time_lost; | |
0793a61d TG |
434 | #endif |
435 | }; | |
436 | ||
437 | /** | |
438 | * struct perf_counter_cpu_context - per cpu counter context structure | |
439 | */ | |
440 | struct perf_cpu_context { | |
441 | struct perf_counter_context ctx; | |
442 | struct perf_counter_context *task_ctx; | |
443 | int active_oncpu; | |
444 | int max_pertask; | |
3b6f9e5c | 445 | int exclusive; |
96f6d444 PZ |
446 | |
447 | /* | |
448 | * Recursion avoidance: | |
449 | * | |
450 | * task, softirq, irq, nmi context | |
451 | */ | |
452 | int recursion[4]; | |
0793a61d TG |
453 | }; |
454 | ||
455 | /* | |
456 | * Set by architecture code: | |
457 | */ | |
458 | extern int perf_max_counters; | |
459 | ||
460 | #ifdef CONFIG_PERF_COUNTERS | |
5c92d124 | 461 | extern const struct hw_perf_counter_ops * |
621a01ea IM |
462 | hw_perf_counter_init(struct perf_counter *counter); |
463 | ||
0793a61d TG |
464 | extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); |
465 | extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); | |
466 | extern void perf_counter_task_tick(struct task_struct *task, int cpu); | |
9b51f66d IM |
467 | extern void perf_counter_init_task(struct task_struct *child); |
468 | extern void perf_counter_exit_task(struct task_struct *child); | |
925d519a | 469 | extern void perf_counter_do_pending(void); |
0793a61d | 470 | extern void perf_counter_print_debug(void); |
1b023a96 | 471 | extern void perf_counter_unthrottle(void); |
01b2838c IM |
472 | extern u64 hw_perf_save_disable(void); |
473 | extern void hw_perf_restore(u64 ctrl); | |
1d1c7ddb IM |
474 | extern int perf_counter_task_disable(void); |
475 | extern int perf_counter_task_enable(void); | |
3cbed429 PM |
476 | extern int hw_perf_group_sched_in(struct perf_counter *group_leader, |
477 | struct perf_cpu_context *cpuctx, | |
478 | struct perf_counter_context *ctx, int cpu); | |
37d81828 | 479 | extern void perf_counter_update_userpage(struct perf_counter *counter); |
5c92d124 | 480 | |
0322cd6e PZ |
481 | extern void perf_counter_output(struct perf_counter *counter, |
482 | int nmi, struct pt_regs *regs); | |
3b6f9e5c PM |
483 | /* |
484 | * Return 1 for a software counter, 0 for a hardware counter | |
485 | */ | |
486 | static inline int is_software_counter(struct perf_counter *counter) | |
487 | { | |
f4a2deb4 PZ |
488 | return !perf_event_raw(&counter->hw_event) && |
489 | perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; | |
3b6f9e5c PM |
490 | } |
491 | ||
b8e83514 | 492 | extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); |
15dbf27c | 493 | |
0793a61d TG |
494 | #else |
495 | static inline void | |
496 | perf_counter_task_sched_in(struct task_struct *task, int cpu) { } | |
497 | static inline void | |
498 | perf_counter_task_sched_out(struct task_struct *task, int cpu) { } | |
499 | static inline void | |
500 | perf_counter_task_tick(struct task_struct *task, int cpu) { } | |
9b51f66d IM |
501 | static inline void perf_counter_init_task(struct task_struct *child) { } |
502 | static inline void perf_counter_exit_task(struct task_struct *child) { } | |
925d519a | 503 | static inline void perf_counter_do_pending(void) { } |
0793a61d | 504 | static inline void perf_counter_print_debug(void) { } |
1b023a96 | 505 | static inline void perf_counter_unthrottle(void) { } |
15dbf27c | 506 | static inline void hw_perf_restore(u64 ctrl) { } |
01b2838c | 507 | static inline u64 hw_perf_save_disable(void) { return 0; } |
1d1c7ddb IM |
508 | static inline int perf_counter_task_disable(void) { return -EINVAL; } |
509 | static inline int perf_counter_task_enable(void) { return -EINVAL; } | |
15dbf27c | 510 | |
925d519a PZ |
511 | static inline void |
512 | perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } | |
513 | ||
0793a61d TG |
514 | #endif |
515 | ||
f3dfd265 | 516 | #endif /* __KERNEL__ */ |
0793a61d | 517 | #endif /* _LINUX_PERF_COUNTER_H */ |