Commit | Line | Data |
---|---|---|
0793a61d TG |
1 | /* |
2 | * Performance counters: | |
3 | * | |
4 | * Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de> | |
5 | * Copyright(C) 2008, Red Hat, Inc., Ingo Molnar | |
6 | * | |
7 | * Data type definitions, declarations, prototypes. | |
8 | * | |
9 | * Started by: Thomas Gleixner and Ingo Molnar | |
10 | * | |
11 | * For licencing details see kernel-base/COPYING | |
12 | */ | |
13 | #ifndef _LINUX_PERF_COUNTER_H | |
14 | #define _LINUX_PERF_COUNTER_H | |
15 | ||
f3dfd265 PM |
16 | #include <linux/types.h> |
17 | #include <linux/ioctl.h> | |
9aaa131a | 18 | #include <asm/byteorder.h> |
0793a61d TG |
19 | |
20 | /* | |
9f66a381 IM |
21 | * User-space ABI bits: |
22 | */ | |
23 | ||
24 | /* | |
b8e83514 | 25 | * hw_event.type |
0793a61d | 26 | */ |
b8e83514 PZ |
27 | enum perf_event_types { |
28 | PERF_TYPE_HARDWARE = 0, | |
29 | PERF_TYPE_SOFTWARE = 1, | |
30 | PERF_TYPE_TRACEPOINT = 2, | |
31 | ||
0793a61d | 32 | /* |
b8e83514 | 33 | * available TYPE space, raw is the max value. |
0793a61d | 34 | */ |
9f66a381 | 35 | |
b8e83514 PZ |
36 | PERF_TYPE_RAW = 128, |
37 | }; | |
6c594c21 | 38 | |
b8e83514 PZ |
39 | /* |
40 | * Generalized performance counter event types, used by the hw_event.event_id | |
41 | * parameter of the sys_perf_counter_open() syscall: | |
42 | */ | |
43 | enum hw_event_ids { | |
9f66a381 | 44 | /* |
b8e83514 | 45 | * Common hardware events, generalized by the kernel: |
9f66a381 | 46 | */ |
b8e83514 PZ |
47 | PERF_COUNT_CPU_CYCLES = 0, |
48 | PERF_COUNT_INSTRUCTIONS = 1, | |
49 | PERF_COUNT_CACHE_REFERENCES = 2, | |
50 | PERF_COUNT_CACHE_MISSES = 3, | |
51 | PERF_COUNT_BRANCH_INSTRUCTIONS = 4, | |
52 | PERF_COUNT_BRANCH_MISSES = 5, | |
53 | PERF_COUNT_BUS_CYCLES = 6, | |
54 | ||
55 | PERF_HW_EVENTS_MAX = 7, | |
56 | }; | |
e077df4f | 57 | |
b8e83514 PZ |
58 | /* |
59 | * Special "software" counters provided by the kernel, even if the hardware | |
60 | * does not support performance counters. These counters measure various | |
61 | * physical and sw events of the kernel (and allow the profiling of them as | |
62 | * well): | |
63 | */ | |
64 | enum sw_event_ids { | |
65 | PERF_COUNT_CPU_CLOCK = 0, | |
66 | PERF_COUNT_TASK_CLOCK = 1, | |
67 | PERF_COUNT_PAGE_FAULTS = 2, | |
68 | PERF_COUNT_CONTEXT_SWITCHES = 3, | |
69 | PERF_COUNT_CPU_MIGRATIONS = 4, | |
70 | PERF_COUNT_PAGE_FAULTS_MIN = 5, | |
71 | PERF_COUNT_PAGE_FAULTS_MAJ = 6, | |
72 | ||
73 | PERF_SW_EVENTS_MAX = 7, | |
0793a61d TG |
74 | }; |
75 | ||
76 | /* | |
77 | * IRQ-notification data record type: | |
78 | */ | |
9f66a381 | 79 | enum perf_counter_record_type { |
b8e83514 PZ |
80 | PERF_RECORD_SIMPLE = 0, |
81 | PERF_RECORD_IRQ = 1, | |
82 | PERF_RECORD_GROUP = 2, | |
0793a61d TG |
83 | }; |
84 | ||
f4a2deb4 PZ |
85 | #define __PERF_COUNTER_MASK(name) \ |
86 | (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ | |
87 | PERF_COUNTER_##name##_SHIFT) | |
88 | ||
89 | #define PERF_COUNTER_RAW_BITS 1 | |
90 | #define PERF_COUNTER_RAW_SHIFT 63 | |
91 | #define PERF_COUNTER_RAW_MASK __PERF_COUNTER_MASK(RAW) | |
92 | ||
93 | #define PERF_COUNTER_CONFIG_BITS 63 | |
94 | #define PERF_COUNTER_CONFIG_SHIFT 0 | |
95 | #define PERF_COUNTER_CONFIG_MASK __PERF_COUNTER_MASK(CONFIG) | |
96 | ||
97 | #define PERF_COUNTER_TYPE_BITS 7 | |
98 | #define PERF_COUNTER_TYPE_SHIFT 56 | |
99 | #define PERF_COUNTER_TYPE_MASK __PERF_COUNTER_MASK(TYPE) | |
100 | ||
101 | #define PERF_COUNTER_EVENT_BITS 56 | |
102 | #define PERF_COUNTER_EVENT_SHIFT 0 | |
103 | #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) | |
104 | ||
53cfbf59 PM |
105 | /* |
106 | * Bits that can be set in hw_event.read_format to request that | |
107 | * reads on the counter should return the indicated quantities, | |
108 | * in increasing order of bit value, after the counter value. | |
109 | */ | |
110 | enum perf_counter_read_format { | |
111 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1, | |
112 | PERF_FORMAT_TOTAL_TIME_RUNNING = 2, | |
113 | }; | |
114 | ||
9f66a381 IM |
115 | /* |
116 | * Hardware event to monitor via a performance monitoring counter: | |
117 | */ | |
118 | struct perf_counter_hw_event { | |
f4a2deb4 PZ |
119 | /* |
120 | * The MSB of the config word signifies if the rest contains cpu | |
121 | * specific (raw) counter configuration data, if unset, the next | |
122 | * 7 bits are an event type and the rest of the bits are the event | |
123 | * identifier. | |
124 | */ | |
125 | __u64 config; | |
9f66a381 | 126 | |
f3dfd265 | 127 | __u64 irq_period; |
2743a5b0 PM |
128 | __u64 record_type; |
129 | __u64 read_format; | |
9f66a381 | 130 | |
2743a5b0 | 131 | __u64 disabled : 1, /* off by default */ |
0475f9ea | 132 | nmi : 1, /* NMI sampling */ |
0475f9ea PM |
133 | inherit : 1, /* children inherit it */ |
134 | pinned : 1, /* must always be on PMU */ | |
135 | exclusive : 1, /* only group on PMU */ | |
136 | exclude_user : 1, /* don't count user */ | |
137 | exclude_kernel : 1, /* ditto kernel */ | |
138 | exclude_hv : 1, /* ditto hypervisor */ | |
2743a5b0 | 139 | exclude_idle : 1, /* don't count when idle */ |
ea5d20cf | 140 | include_tid : 1, /* include the tid */ |
0475f9ea | 141 | |
ea5d20cf | 142 | __reserved_1 : 54; |
2743a5b0 PM |
143 | |
144 | __u32 extra_config_len; | |
145 | __u32 __reserved_4; | |
9f66a381 | 146 | |
f3dfd265 | 147 | __u64 __reserved_2; |
2743a5b0 | 148 | __u64 __reserved_3; |
eab656ae TG |
149 | }; |
150 | ||
d859e29f PM |
151 | /* |
152 | * Ioctls that can be done on a perf counter fd: | |
153 | */ | |
154 | #define PERF_COUNTER_IOC_ENABLE _IO('$', 0) | |
155 | #define PERF_COUNTER_IOC_DISABLE _IO('$', 1) | |
156 | ||
37d81828 PM |
157 | /* |
158 | * Structure of the page that can be mapped via mmap | |
159 | */ | |
160 | struct perf_counter_mmap_page { | |
161 | __u32 version; /* version number of this structure */ | |
162 | __u32 compat_version; /* lowest version this is compat with */ | |
163 | __u32 lock; /* seqlock for synchronization */ | |
164 | __u32 index; /* hardware counter identifier */ | |
165 | __s64 offset; /* add to hardware counter value */ | |
7b732a75 PZ |
166 | |
167 | __u32 data_head; /* head in the data section */ | |
37d81828 PM |
168 | }; |
169 | ||
5c148194 PZ |
170 | struct perf_event_header { |
171 | __u32 type; | |
172 | __u32 size; | |
173 | }; | |
174 | ||
175 | enum perf_event_type { | |
176 | PERF_EVENT_IP = 0, | |
177 | PERF_EVENT_GROUP = 1, | |
ea5d20cf PZ |
178 | |
179 | __PERF_EVENT_TID = 0x100, | |
5c148194 PZ |
180 | }; |
181 | ||
f3dfd265 | 182 | #ifdef __KERNEL__ |
9f66a381 | 183 | /* |
f3dfd265 | 184 | * Kernel-internal data types and definitions: |
9f66a381 IM |
185 | */ |
186 | ||
f3dfd265 PM |
187 | #ifdef CONFIG_PERF_COUNTERS |
188 | # include <asm/perf_counter.h> | |
189 | #endif | |
190 | ||
191 | #include <linux/list.h> | |
192 | #include <linux/mutex.h> | |
193 | #include <linux/rculist.h> | |
194 | #include <linux/rcupdate.h> | |
195 | #include <linux/spinlock.h> | |
d6d020e9 | 196 | #include <linux/hrtimer.h> |
f3dfd265 PM |
197 | #include <asm/atomic.h> |
198 | ||
199 | struct task_struct; | |
200 | ||
f4a2deb4 PZ |
201 | static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event) |
202 | { | |
203 | return hw_event->config & PERF_COUNTER_RAW_MASK; | |
204 | } | |
205 | ||
206 | static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event) | |
207 | { | |
208 | return hw_event->config & PERF_COUNTER_CONFIG_MASK; | |
209 | } | |
210 | ||
211 | static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event) | |
212 | { | |
213 | return (hw_event->config & PERF_COUNTER_TYPE_MASK) >> | |
214 | PERF_COUNTER_TYPE_SHIFT; | |
215 | } | |
216 | ||
217 | static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event) | |
218 | { | |
219 | return hw_event->config & PERF_COUNTER_EVENT_MASK; | |
220 | } | |
221 | ||
0793a61d | 222 | /** |
9f66a381 | 223 | * struct hw_perf_counter - performance counter hardware details: |
0793a61d TG |
224 | */ |
225 | struct hw_perf_counter { | |
ee06094f | 226 | #ifdef CONFIG_PERF_COUNTERS |
d6d020e9 PZ |
227 | union { |
228 | struct { /* hardware */ | |
229 | u64 config; | |
230 | unsigned long config_base; | |
231 | unsigned long counter_base; | |
232 | int nmi; | |
233 | unsigned int idx; | |
234 | }; | |
235 | union { /* software */ | |
236 | atomic64_t count; | |
237 | struct hrtimer hrtimer; | |
238 | }; | |
239 | }; | |
ee06094f | 240 | atomic64_t prev_count; |
9f66a381 | 241 | u64 irq_period; |
ee06094f IM |
242 | atomic64_t period_left; |
243 | #endif | |
0793a61d TG |
244 | }; |
245 | ||
621a01ea IM |
246 | struct perf_counter; |
247 | ||
248 | /** | |
249 | * struct hw_perf_counter_ops - performance counter hw ops | |
250 | */ | |
251 | struct hw_perf_counter_ops { | |
95cdd2e7 | 252 | int (*enable) (struct perf_counter *counter); |
7671581f IM |
253 | void (*disable) (struct perf_counter *counter); |
254 | void (*read) (struct perf_counter *counter); | |
621a01ea IM |
255 | }; |
256 | ||
6a930700 IM |
257 | /** |
258 | * enum perf_counter_active_state - the states of a counter | |
259 | */ | |
260 | enum perf_counter_active_state { | |
3b6f9e5c | 261 | PERF_COUNTER_STATE_ERROR = -2, |
6a930700 IM |
262 | PERF_COUNTER_STATE_OFF = -1, |
263 | PERF_COUNTER_STATE_INACTIVE = 0, | |
264 | PERF_COUNTER_STATE_ACTIVE = 1, | |
265 | }; | |
266 | ||
9b51f66d IM |
267 | struct file; |
268 | ||
7b732a75 PZ |
269 | struct perf_mmap_data { |
270 | struct rcu_head rcu_head; | |
271 | int nr_pages; | |
c7138f37 | 272 | atomic_t wakeup; |
7b732a75 PZ |
273 | atomic_t head; |
274 | struct perf_counter_mmap_page *user_page; | |
275 | void *data_pages[0]; | |
276 | }; | |
277 | ||
0793a61d TG |
278 | /** |
279 | * struct perf_counter - performance counter kernel representation: | |
280 | */ | |
281 | struct perf_counter { | |
ee06094f | 282 | #ifdef CONFIG_PERF_COUNTERS |
04289bb9 | 283 | struct list_head list_entry; |
592903cd | 284 | struct list_head event_entry; |
04289bb9 | 285 | struct list_head sibling_list; |
5c148194 | 286 | int nr_siblings; |
04289bb9 | 287 | struct perf_counter *group_leader; |
5c92d124 | 288 | const struct hw_perf_counter_ops *hw_ops; |
04289bb9 | 289 | |
6a930700 | 290 | enum perf_counter_active_state state; |
c07c99b6 | 291 | enum perf_counter_active_state prev_state; |
0793a61d | 292 | atomic64_t count; |
ee06094f | 293 | |
53cfbf59 PM |
294 | /* |
295 | * These are the total time in nanoseconds that the counter | |
296 | * has been enabled (i.e. eligible to run, and the task has | |
297 | * been scheduled in, if this is a per-task counter) | |
298 | * and running (scheduled onto the CPU), respectively. | |
299 | * | |
300 | * They are computed from tstamp_enabled, tstamp_running and | |
301 | * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. | |
302 | */ | |
303 | u64 total_time_enabled; | |
304 | u64 total_time_running; | |
305 | ||
306 | /* | |
307 | * These are timestamps used for computing total_time_enabled | |
308 | * and total_time_running when the counter is in INACTIVE or | |
309 | * ACTIVE state, measured in nanoseconds from an arbitrary point | |
310 | * in time. | |
311 | * tstamp_enabled: the notional time when the counter was enabled | |
312 | * tstamp_running: the notional time when the counter was scheduled on | |
313 | * tstamp_stopped: in INACTIVE state, the notional time when the | |
314 | * counter was scheduled off. | |
315 | */ | |
316 | u64 tstamp_enabled; | |
317 | u64 tstamp_running; | |
318 | u64 tstamp_stopped; | |
319 | ||
9f66a381 | 320 | struct perf_counter_hw_event hw_event; |
0793a61d TG |
321 | struct hw_perf_counter hw; |
322 | ||
323 | struct perf_counter_context *ctx; | |
324 | struct task_struct *task; | |
9b51f66d | 325 | struct file *filp; |
0793a61d | 326 | |
9b51f66d | 327 | struct perf_counter *parent; |
d859e29f PM |
328 | struct list_head child_list; |
329 | ||
53cfbf59 PM |
330 | /* |
331 | * These accumulate total time (in nanoseconds) that children | |
332 | * counters have been enabled and running, respectively. | |
333 | */ | |
334 | atomic64_t child_total_time_enabled; | |
335 | atomic64_t child_total_time_running; | |
336 | ||
0793a61d | 337 | /* |
d859e29f | 338 | * Protect attach/detach and child_list: |
0793a61d TG |
339 | */ |
340 | struct mutex mutex; | |
341 | ||
342 | int oncpu; | |
343 | int cpu; | |
344 | ||
7b732a75 PZ |
345 | /* mmap bits */ |
346 | struct mutex mmap_mutex; | |
347 | atomic_t mmap_count; | |
348 | struct perf_mmap_data *data; | |
37d81828 | 349 | |
7b732a75 | 350 | /* poll related */ |
0793a61d TG |
351 | wait_queue_head_t waitq; |
352 | /* optional: for NMIs */ | |
353 | int wakeup_pending; | |
592903cd | 354 | |
e077df4f | 355 | void (*destroy)(struct perf_counter *); |
592903cd | 356 | struct rcu_head rcu_head; |
ee06094f | 357 | #endif |
0793a61d TG |
358 | }; |
359 | ||
360 | /** | |
361 | * struct perf_counter_context - counter context structure | |
362 | * | |
363 | * Used as a container for task counters and CPU counters as well: | |
364 | */ | |
365 | struct perf_counter_context { | |
366 | #ifdef CONFIG_PERF_COUNTERS | |
367 | /* | |
d859e29f PM |
368 | * Protect the states of the counters in the list, |
369 | * nr_active, and the list: | |
0793a61d TG |
370 | */ |
371 | spinlock_t lock; | |
d859e29f PM |
372 | /* |
373 | * Protect the list of counters. Locking either mutex or lock | |
374 | * is sufficient to ensure the list doesn't change; to change | |
375 | * the list you need to lock both the mutex and the spinlock. | |
376 | */ | |
377 | struct mutex mutex; | |
04289bb9 IM |
378 | |
379 | struct list_head counter_list; | |
592903cd | 380 | struct list_head event_list; |
0793a61d TG |
381 | int nr_counters; |
382 | int nr_active; | |
d859e29f | 383 | int is_active; |
0793a61d | 384 | struct task_struct *task; |
53cfbf59 PM |
385 | |
386 | /* | |
387 | * time_now is the current time in nanoseconds since an arbitrary | |
388 | * point in the past. For per-task counters, this is based on the | |
389 | * task clock, and for per-cpu counters it is based on the cpu clock. | |
390 | * time_lost is an offset from the task/cpu clock, used to make it | |
391 | * appear that time only passes while the context is scheduled in. | |
392 | */ | |
393 | u64 time_now; | |
394 | u64 time_lost; | |
0793a61d TG |
395 | #endif |
396 | }; | |
397 | ||
398 | /** | |
399 | * struct perf_counter_cpu_context - per cpu counter context structure | |
400 | */ | |
401 | struct perf_cpu_context { | |
402 | struct perf_counter_context ctx; | |
403 | struct perf_counter_context *task_ctx; | |
404 | int active_oncpu; | |
405 | int max_pertask; | |
3b6f9e5c | 406 | int exclusive; |
96f6d444 PZ |
407 | |
408 | /* | |
409 | * Recursion avoidance: | |
410 | * | |
411 | * task, softirq, irq, nmi context | |
412 | */ | |
413 | int recursion[4]; | |
0793a61d TG |
414 | }; |
415 | ||
416 | /* | |
417 | * Set by architecture code: | |
418 | */ | |
419 | extern int perf_max_counters; | |
420 | ||
421 | #ifdef CONFIG_PERF_COUNTERS | |
5c92d124 | 422 | extern const struct hw_perf_counter_ops * |
621a01ea IM |
423 | hw_perf_counter_init(struct perf_counter *counter); |
424 | ||
0793a61d TG |
425 | extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); |
426 | extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); | |
427 | extern void perf_counter_task_tick(struct task_struct *task, int cpu); | |
9b51f66d IM |
428 | extern void perf_counter_init_task(struct task_struct *child); |
429 | extern void perf_counter_exit_task(struct task_struct *child); | |
0793a61d TG |
430 | extern void perf_counter_notify(struct pt_regs *regs); |
431 | extern void perf_counter_print_debug(void); | |
1b023a96 | 432 | extern void perf_counter_unthrottle(void); |
01b2838c IM |
433 | extern u64 hw_perf_save_disable(void); |
434 | extern void hw_perf_restore(u64 ctrl); | |
1d1c7ddb IM |
435 | extern int perf_counter_task_disable(void); |
436 | extern int perf_counter_task_enable(void); | |
3cbed429 PM |
437 | extern int hw_perf_group_sched_in(struct perf_counter *group_leader, |
438 | struct perf_cpu_context *cpuctx, | |
439 | struct perf_counter_context *ctx, int cpu); | |
37d81828 | 440 | extern void perf_counter_update_userpage(struct perf_counter *counter); |
5c92d124 | 441 | |
0322cd6e PZ |
442 | extern void perf_counter_output(struct perf_counter *counter, |
443 | int nmi, struct pt_regs *regs); | |
3b6f9e5c PM |
444 | /* |
445 | * Return 1 for a software counter, 0 for a hardware counter | |
446 | */ | |
447 | static inline int is_software_counter(struct perf_counter *counter) | |
448 | { | |
f4a2deb4 PZ |
449 | return !perf_event_raw(&counter->hw_event) && |
450 | perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; | |
3b6f9e5c PM |
451 | } |
452 | ||
b8e83514 | 453 | extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); |
15dbf27c | 454 | |
0793a61d TG |
455 | #else |
456 | static inline void | |
457 | perf_counter_task_sched_in(struct task_struct *task, int cpu) { } | |
458 | static inline void | |
459 | perf_counter_task_sched_out(struct task_struct *task, int cpu) { } | |
460 | static inline void | |
461 | perf_counter_task_tick(struct task_struct *task, int cpu) { } | |
9b51f66d IM |
462 | static inline void perf_counter_init_task(struct task_struct *child) { } |
463 | static inline void perf_counter_exit_task(struct task_struct *child) { } | |
0793a61d TG |
464 | static inline void perf_counter_notify(struct pt_regs *regs) { } |
465 | static inline void perf_counter_print_debug(void) { } | |
1b023a96 | 466 | static inline void perf_counter_unthrottle(void) { } |
15dbf27c | 467 | static inline void hw_perf_restore(u64 ctrl) { } |
01b2838c | 468 | static inline u64 hw_perf_save_disable(void) { return 0; } |
1d1c7ddb IM |
469 | static inline int perf_counter_task_disable(void) { return -EINVAL; } |
470 | static inline int perf_counter_task_enable(void) { return -EINVAL; } | |
15dbf27c | 471 | |
b8e83514 | 472 | static inline void perf_swcounter_event(u32 event, u64 nr, |
15dbf27c | 473 | int nmi, struct pt_regs *regs) { } |
0793a61d TG |
474 | #endif |
475 | ||
f3dfd265 | 476 | #endif /* __KERNEL__ */ |
0793a61d | 477 | #endif /* _LINUX_PERF_COUNTER_H */ |