| 1 | /* |
| 2 | * Performance events: |
| 3 | * |
| 4 | * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> |
| 5 | * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar |
| 6 | * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra |
| 7 | * |
| 8 | * Data type definitions, declarations, prototypes. |
| 9 | * |
| 10 | * Started by: Thomas Gleixner and Ingo Molnar |
| 11 | * |
| 12 | * For licencing details see kernel-base/COPYING |
| 13 | */ |
| 14 | #ifndef _LINUX_PERF_EVENT_H |
| 15 | #define _LINUX_PERF_EVENT_H |
| 16 | |
| 17 | #include <linux/types.h> |
| 18 | #include <linux/ioctl.h> |
| 19 | #include <asm/byteorder.h> |
| 20 | |
| 21 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
| 22 | #include <asm/hw_breakpoint.h> |
| 23 | #endif |
| 24 | |
| 25 | /* |
| 26 | * User-space ABI bits: |
| 27 | */ |
| 28 | |
| 29 | /* |
| 30 | * attr.type |
| 31 | */ |
| 32 | enum perf_type_id { |
| 33 | PERF_TYPE_HARDWARE = 0, |
| 34 | PERF_TYPE_SOFTWARE = 1, |
| 35 | PERF_TYPE_TRACEPOINT = 2, |
| 36 | PERF_TYPE_HW_CACHE = 3, |
| 37 | PERF_TYPE_RAW = 4, |
| 38 | PERF_TYPE_BREAKPOINT = 5, |
| 39 | |
| 40 | PERF_TYPE_MAX, /* non-ABI */ |
| 41 | }; |
| 42 | |
| 43 | /* |
| 44 | * Generalized performance event event_id types, used by the |
| 45 | * attr.event_id parameter of the sys_perf_event_open() |
| 46 | * syscall: |
| 47 | */ |
| 48 | enum perf_hw_id { |
| 49 | /* |
| 50 | * Common hardware events, generalized by the kernel: |
| 51 | */ |
| 52 | PERF_COUNT_HW_CPU_CYCLES = 0, |
| 53 | PERF_COUNT_HW_INSTRUCTIONS = 1, |
| 54 | PERF_COUNT_HW_CACHE_REFERENCES = 2, |
| 55 | PERF_COUNT_HW_CACHE_MISSES = 3, |
| 56 | PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, |
| 57 | PERF_COUNT_HW_BRANCH_MISSES = 5, |
| 58 | PERF_COUNT_HW_BUS_CYCLES = 6, |
| 59 | |
| 60 | PERF_COUNT_HW_MAX, /* non-ABI */ |
| 61 | }; |
| 62 | |
| 63 | /* |
| 64 | * Generalized hardware cache events: |
| 65 | * |
| 66 | * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x |
| 67 | * { read, write, prefetch } x |
| 68 | * { accesses, misses } |
| 69 | */ |
| 70 | enum perf_hw_cache_id { |
| 71 | PERF_COUNT_HW_CACHE_L1D = 0, |
| 72 | PERF_COUNT_HW_CACHE_L1I = 1, |
| 73 | PERF_COUNT_HW_CACHE_LL = 2, |
| 74 | PERF_COUNT_HW_CACHE_DTLB = 3, |
| 75 | PERF_COUNT_HW_CACHE_ITLB = 4, |
| 76 | PERF_COUNT_HW_CACHE_BPU = 5, |
| 77 | |
| 78 | PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ |
| 79 | }; |
| 80 | |
| 81 | enum perf_hw_cache_op_id { |
| 82 | PERF_COUNT_HW_CACHE_OP_READ = 0, |
| 83 | PERF_COUNT_HW_CACHE_OP_WRITE = 1, |
| 84 | PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, |
| 85 | |
| 86 | PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ |
| 87 | }; |
| 88 | |
| 89 | enum perf_hw_cache_op_result_id { |
| 90 | PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, |
| 91 | PERF_COUNT_HW_CACHE_RESULT_MISS = 1, |
| 92 | |
| 93 | PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ |
| 94 | }; |
| 95 | |
| 96 | /* |
| 97 | * Special "software" events provided by the kernel, even if the hardware |
| 98 | * does not support performance events. These events measure various |
| 99 | * physical and sw events of the kernel (and allow the profiling of them as |
| 100 | * well): |
| 101 | */ |
| 102 | enum perf_sw_ids { |
| 103 | PERF_COUNT_SW_CPU_CLOCK = 0, |
| 104 | PERF_COUNT_SW_TASK_CLOCK = 1, |
| 105 | PERF_COUNT_SW_PAGE_FAULTS = 2, |
| 106 | PERF_COUNT_SW_CONTEXT_SWITCHES = 3, |
| 107 | PERF_COUNT_SW_CPU_MIGRATIONS = 4, |
| 108 | PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, |
| 109 | PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, |
| 110 | PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, |
| 111 | PERF_COUNT_SW_EMULATION_FAULTS = 8, |
| 112 | |
| 113 | PERF_COUNT_SW_MAX, /* non-ABI */ |
| 114 | }; |
| 115 | |
| 116 | /* |
| 117 | * Bits that can be set in attr.sample_type to request information |
| 118 | * in the overflow packets. |
| 119 | */ |
| 120 | enum perf_event_sample_format { |
| 121 | PERF_SAMPLE_IP = 1U << 0, |
| 122 | PERF_SAMPLE_TID = 1U << 1, |
| 123 | PERF_SAMPLE_TIME = 1U << 2, |
| 124 | PERF_SAMPLE_ADDR = 1U << 3, |
| 125 | PERF_SAMPLE_READ = 1U << 4, |
| 126 | PERF_SAMPLE_CALLCHAIN = 1U << 5, |
| 127 | PERF_SAMPLE_ID = 1U << 6, |
| 128 | PERF_SAMPLE_CPU = 1U << 7, |
| 129 | PERF_SAMPLE_PERIOD = 1U << 8, |
| 130 | PERF_SAMPLE_STREAM_ID = 1U << 9, |
| 131 | PERF_SAMPLE_RAW = 1U << 10, |
| 132 | |
| 133 | PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ |
| 134 | }; |
| 135 | |
| 136 | /* |
| 137 | * The format of the data returned by read() on a perf event fd, |
| 138 | * as specified by attr.read_format: |
| 139 | * |
| 140 | * struct read_format { |
| 141 | * { u64 value; |
| 142 | * { u64 time_enabled; } && PERF_FORMAT_ENABLED |
| 143 | * { u64 time_running; } && PERF_FORMAT_RUNNING |
| 144 | * { u64 id; } && PERF_FORMAT_ID |
| 145 | * } && !PERF_FORMAT_GROUP |
| 146 | * |
| 147 | * { u64 nr; |
| 148 | * { u64 time_enabled; } && PERF_FORMAT_ENABLED |
| 149 | * { u64 time_running; } && PERF_FORMAT_RUNNING |
| 150 | * { u64 value; |
| 151 | * { u64 id; } && PERF_FORMAT_ID |
| 152 | * } cntr[nr]; |
| 153 | * } && PERF_FORMAT_GROUP |
| 154 | * }; |
| 155 | */ |
| 156 | enum perf_event_read_format { |
| 157 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, |
| 158 | PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, |
| 159 | PERF_FORMAT_ID = 1U << 2, |
| 160 | PERF_FORMAT_GROUP = 1U << 3, |
| 161 | |
| 162 | PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ |
| 163 | }; |
| 164 | |
| 165 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ |
| 166 | |
| 167 | /* |
| 168 | * Hardware event_id to monitor via a performance monitoring event: |
| 169 | */ |
| 170 | struct perf_event_attr { |
| 171 | |
| 172 | /* |
| 173 | * Major type: hardware/software/tracepoint/etc. |
| 174 | */ |
| 175 | __u32 type; |
| 176 | |
| 177 | /* |
| 178 | * Size of the attr structure, for fwd/bwd compat. |
| 179 | */ |
| 180 | __u32 size; |
| 181 | |
| 182 | /* |
| 183 | * Type specific configuration information. |
| 184 | */ |
| 185 | __u64 config; |
| 186 | |
| 187 | union { |
| 188 | __u64 sample_period; |
| 189 | __u64 sample_freq; |
| 190 | }; |
| 191 | |
| 192 | __u64 sample_type; |
| 193 | __u64 read_format; |
| 194 | |
| 195 | __u64 disabled : 1, /* off by default */ |
| 196 | inherit : 1, /* children inherit it */ |
| 197 | pinned : 1, /* must always be on PMU */ |
| 198 | exclusive : 1, /* only group on PMU */ |
| 199 | exclude_user : 1, /* don't count user */ |
| 200 | exclude_kernel : 1, /* ditto kernel */ |
| 201 | exclude_hv : 1, /* ditto hypervisor */ |
| 202 | exclude_idle : 1, /* don't count when idle */ |
| 203 | mmap : 1, /* include mmap data */ |
| 204 | comm : 1, /* include comm data */ |
| 205 | freq : 1, /* use freq, not period */ |
| 206 | inherit_stat : 1, /* per task counts */ |
| 207 | enable_on_exec : 1, /* next exec enables */ |
| 208 | task : 1, /* trace fork/exit */ |
| 209 | watermark : 1, /* wakeup_watermark */ |
| 210 | |
| 211 | __reserved_1 : 49; |
| 212 | |
| 213 | union { |
| 214 | __u32 wakeup_events; /* wakeup every n events */ |
| 215 | __u32 wakeup_watermark; /* bytes before wakeup */ |
| 216 | }; |
| 217 | |
| 218 | struct { /* Hardware breakpoint info */ |
| 219 | __u64 bp_addr; |
| 220 | __u32 bp_type; |
| 221 | __u32 bp_len; |
| 222 | __u64 __bp_reserved_1; |
| 223 | __u64 __bp_reserved_2; |
| 224 | }; |
| 225 | |
| 226 | __u32 __reserved_2; |
| 227 | |
| 228 | __u64 __reserved_3; |
| 229 | }; |
| 230 | |
| 231 | /* |
| 232 | * Ioctls that can be done on a perf event fd: |
| 233 | */ |
| 234 | #define PERF_EVENT_IOC_ENABLE _IO ('$', 0) |
| 235 | #define PERF_EVENT_IOC_DISABLE _IO ('$', 1) |
| 236 | #define PERF_EVENT_IOC_REFRESH _IO ('$', 2) |
| 237 | #define PERF_EVENT_IOC_RESET _IO ('$', 3) |
| 238 | #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) |
| 239 | #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) |
| 240 | #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) |
| 241 | |
| 242 | enum perf_event_ioc_flags { |
| 243 | PERF_IOC_FLAG_GROUP = 1U << 0, |
| 244 | }; |
| 245 | |
| 246 | /* |
| 247 | * Structure of the page that can be mapped via mmap |
| 248 | */ |
| 249 | struct perf_event_mmap_page { |
| 250 | __u32 version; /* version number of this structure */ |
| 251 | __u32 compat_version; /* lowest version this is compat with */ |
| 252 | |
| 253 | /* |
| 254 | * Bits needed to read the hw events in user-space. |
| 255 | * |
| 256 | * u32 seq; |
| 257 | * s64 count; |
| 258 | * |
| 259 | * do { |
| 260 | * seq = pc->lock; |
| 261 | * |
| 262 | * barrier() |
| 263 | * if (pc->index) { |
| 264 | * count = pmc_read(pc->index - 1); |
| 265 | * count += pc->offset; |
| 266 | * } else |
| 267 | * goto regular_read; |
| 268 | * |
| 269 | * barrier(); |
| 270 | * } while (pc->lock != seq); |
| 271 | * |
| 272 | * NOTE: for obvious reason this only works on self-monitoring |
| 273 | * processes. |
| 274 | */ |
| 275 | __u32 lock; /* seqlock for synchronization */ |
| 276 | __u32 index; /* hardware event identifier */ |
| 277 | __s64 offset; /* add to hardware event value */ |
| 278 | __u64 time_enabled; /* time event active */ |
| 279 | __u64 time_running; /* time event on cpu */ |
| 280 | |
| 281 | /* |
| 282 | * Hole for extension of the self monitor capabilities |
| 283 | */ |
| 284 | |
| 285 | __u64 __reserved[123]; /* align to 1k */ |
| 286 | |
| 287 | /* |
| 288 | * Control data for the mmap() data buffer. |
| 289 | * |
| 290 | * User-space reading the @data_head value should issue an rmb(), on |
| 291 | * SMP capable platforms, after reading this value -- see |
| 292 | * perf_event_wakeup(). |
| 293 | * |
| 294 | * When the mapping is PROT_WRITE the @data_tail value should be |
| 295 | * written by userspace to reflect the last read data. In this case |
| 296 | * the kernel will not over-write unread data. |
| 297 | */ |
| 298 | __u64 data_head; /* head in the data section */ |
| 299 | __u64 data_tail; /* user-space written tail */ |
| 300 | }; |
| 301 | |
| 302 | #define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0) |
| 303 | #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) |
| 304 | #define PERF_RECORD_MISC_KERNEL (1 << 0) |
| 305 | #define PERF_RECORD_MISC_USER (2 << 0) |
| 306 | #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) |
| 307 | |
| 308 | struct perf_event_header { |
| 309 | __u32 type; |
| 310 | __u16 misc; |
| 311 | __u16 size; |
| 312 | }; |
| 313 | |
| 314 | enum perf_event_type { |
| 315 | |
| 316 | /* |
| 317 | * The MMAP events record the PROT_EXEC mappings so that we can |
| 318 | * correlate userspace IPs to code. They have the following structure: |
| 319 | * |
| 320 | * struct { |
| 321 | * struct perf_event_header header; |
| 322 | * |
| 323 | * u32 pid, tid; |
| 324 | * u64 addr; |
| 325 | * u64 len; |
| 326 | * u64 pgoff; |
| 327 | * char filename[]; |
| 328 | * }; |
| 329 | */ |
| 330 | PERF_RECORD_MMAP = 1, |
| 331 | |
| 332 | /* |
| 333 | * struct { |
| 334 | * struct perf_event_header header; |
| 335 | * u64 id; |
| 336 | * u64 lost; |
| 337 | * }; |
| 338 | */ |
| 339 | PERF_RECORD_LOST = 2, |
| 340 | |
| 341 | /* |
| 342 | * struct { |
| 343 | * struct perf_event_header header; |
| 344 | * |
| 345 | * u32 pid, tid; |
| 346 | * char comm[]; |
| 347 | * }; |
| 348 | */ |
| 349 | PERF_RECORD_COMM = 3, |
| 350 | |
| 351 | /* |
| 352 | * struct { |
| 353 | * struct perf_event_header header; |
| 354 | * u32 pid, ppid; |
| 355 | * u32 tid, ptid; |
| 356 | * u64 time; |
| 357 | * }; |
| 358 | */ |
| 359 | PERF_RECORD_EXIT = 4, |
| 360 | |
| 361 | /* |
| 362 | * struct { |
| 363 | * struct perf_event_header header; |
| 364 | * u64 time; |
| 365 | * u64 id; |
| 366 | * u64 stream_id; |
| 367 | * }; |
| 368 | */ |
| 369 | PERF_RECORD_THROTTLE = 5, |
| 370 | PERF_RECORD_UNTHROTTLE = 6, |
| 371 | |
| 372 | /* |
| 373 | * struct { |
| 374 | * struct perf_event_header header; |
| 375 | * u32 pid, ppid; |
| 376 | * u32 tid, ptid; |
| 377 | * u64 time; |
| 378 | * }; |
| 379 | */ |
| 380 | PERF_RECORD_FORK = 7, |
| 381 | |
| 382 | /* |
| 383 | * struct { |
| 384 | * struct perf_event_header header; |
| 385 | * u32 pid, tid; |
| 386 | * |
| 387 | * struct read_format values; |
| 388 | * }; |
| 389 | */ |
| 390 | PERF_RECORD_READ = 8, |
| 391 | |
| 392 | /* |
| 393 | * struct { |
| 394 | * struct perf_event_header header; |
| 395 | * |
| 396 | * { u64 ip; } && PERF_SAMPLE_IP |
| 397 | * { u32 pid, tid; } && PERF_SAMPLE_TID |
| 398 | * { u64 time; } && PERF_SAMPLE_TIME |
| 399 | * { u64 addr; } && PERF_SAMPLE_ADDR |
| 400 | * { u64 id; } && PERF_SAMPLE_ID |
| 401 | * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID |
| 402 | * { u32 cpu, res; } && PERF_SAMPLE_CPU |
| 403 | * { u64 period; } && PERF_SAMPLE_PERIOD |
| 404 | * |
| 405 | * { struct read_format values; } && PERF_SAMPLE_READ |
| 406 | * |
| 407 | * { u64 nr, |
| 408 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN |
| 409 | * |
| 410 | * # |
| 411 | * # The RAW record below is opaque data wrt the ABI |
| 412 | * # |
| 413 | * # That is, the ABI doesn't make any promises wrt to |
| 414 | * # the stability of its content, it may vary depending |
| 415 | * # on event, hardware, kernel version and phase of |
| 416 | * # the moon. |
| 417 | * # |
| 418 | * # In other words, PERF_SAMPLE_RAW contents are not an ABI. |
| 419 | * # |
| 420 | * |
| 421 | * { u32 size; |
| 422 | * char data[size];}&& PERF_SAMPLE_RAW |
| 423 | * }; |
| 424 | */ |
| 425 | PERF_RECORD_SAMPLE = 9, |
| 426 | |
| 427 | PERF_RECORD_MAX, /* non-ABI */ |
| 428 | }; |
| 429 | |
| 430 | enum perf_callchain_context { |
| 431 | PERF_CONTEXT_HV = (__u64)-32, |
| 432 | PERF_CONTEXT_KERNEL = (__u64)-128, |
| 433 | PERF_CONTEXT_USER = (__u64)-512, |
| 434 | |
| 435 | PERF_CONTEXT_GUEST = (__u64)-2048, |
| 436 | PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, |
| 437 | PERF_CONTEXT_GUEST_USER = (__u64)-2560, |
| 438 | |
| 439 | PERF_CONTEXT_MAX = (__u64)-4095, |
| 440 | }; |
| 441 | |
| 442 | #define PERF_FLAG_FD_NO_GROUP (1U << 0) |
| 443 | #define PERF_FLAG_FD_OUTPUT (1U << 1) |
| 444 | |
| 445 | #ifdef __KERNEL__ |
| 446 | /* |
| 447 | * Kernel-internal data types and definitions: |
| 448 | */ |
| 449 | |
| 450 | #ifdef CONFIG_PERF_EVENTS |
| 451 | # include <asm/perf_event.h> |
| 452 | #endif |
| 453 | |
| 454 | #include <linux/list.h> |
| 455 | #include <linux/mutex.h> |
| 456 | #include <linux/rculist.h> |
| 457 | #include <linux/rcupdate.h> |
| 458 | #include <linux/spinlock.h> |
| 459 | #include <linux/hrtimer.h> |
| 460 | #include <linux/fs.h> |
| 461 | #include <linux/pid_namespace.h> |
| 462 | #include <linux/workqueue.h> |
| 463 | #include <asm/atomic.h> |
| 464 | |
| 465 | #define PERF_MAX_STACK_DEPTH 255 |
| 466 | |
| 467 | struct perf_callchain_entry { |
| 468 | __u64 nr; |
| 469 | __u64 ip[PERF_MAX_STACK_DEPTH]; |
| 470 | }; |
| 471 | |
| 472 | struct perf_raw_record { |
| 473 | u32 size; |
| 474 | void *data; |
| 475 | }; |
| 476 | |
| 477 | struct task_struct; |
| 478 | |
| 479 | /** |
| 480 | * struct hw_perf_event - performance event hardware details: |
| 481 | */ |
| 482 | struct hw_perf_event { |
| 483 | #ifdef CONFIG_PERF_EVENTS |
| 484 | union { |
| 485 | struct { /* hardware */ |
| 486 | u64 config; |
| 487 | unsigned long config_base; |
| 488 | unsigned long event_base; |
| 489 | int idx; |
| 490 | }; |
| 491 | struct { /* software */ |
| 492 | s64 remaining; |
| 493 | struct hrtimer hrtimer; |
| 494 | }; |
| 495 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
| 496 | union { /* breakpoint */ |
| 497 | struct arch_hw_breakpoint info; |
| 498 | }; |
| 499 | #endif |
| 500 | }; |
| 501 | atomic64_t prev_count; |
| 502 | u64 sample_period; |
| 503 | u64 last_period; |
| 504 | atomic64_t period_left; |
| 505 | u64 interrupts; |
| 506 | |
| 507 | u64 freq_count; |
| 508 | u64 freq_interrupts; |
| 509 | u64 freq_stamp; |
| 510 | #endif |
| 511 | }; |
| 512 | |
| 513 | struct perf_event; |
| 514 | |
| 515 | /** |
| 516 | * struct pmu - generic performance monitoring unit |
| 517 | */ |
| 518 | struct pmu { |
| 519 | int (*enable) (struct perf_event *event); |
| 520 | void (*disable) (struct perf_event *event); |
| 521 | void (*read) (struct perf_event *event); |
| 522 | void (*unthrottle) (struct perf_event *event); |
| 523 | }; |
| 524 | |
| 525 | /** |
| 526 | * enum perf_event_active_state - the states of a event |
| 527 | */ |
| 528 | enum perf_event_active_state { |
| 529 | PERF_EVENT_STATE_ERROR = -2, |
| 530 | PERF_EVENT_STATE_OFF = -1, |
| 531 | PERF_EVENT_STATE_INACTIVE = 0, |
| 532 | PERF_EVENT_STATE_ACTIVE = 1, |
| 533 | }; |
| 534 | |
| 535 | struct file; |
| 536 | |
| 537 | struct perf_mmap_data { |
| 538 | struct rcu_head rcu_head; |
| 539 | #ifdef CONFIG_PERF_USE_VMALLOC |
| 540 | struct work_struct work; |
| 541 | #endif |
| 542 | int data_order; |
| 543 | int nr_pages; /* nr of data pages */ |
| 544 | int writable; /* are we writable */ |
| 545 | int nr_locked; /* nr pages mlocked */ |
| 546 | |
| 547 | atomic_t poll; /* POLL_ for wakeups */ |
| 548 | atomic_t events; /* event_id limit */ |
| 549 | |
| 550 | atomic_long_t head; /* write position */ |
| 551 | atomic_long_t done_head; /* completed head */ |
| 552 | |
| 553 | atomic_t lock; /* concurrent writes */ |
| 554 | atomic_t wakeup; /* needs a wakeup */ |
| 555 | atomic_t lost; /* nr records lost */ |
| 556 | |
| 557 | long watermark; /* wakeup watermark */ |
| 558 | |
| 559 | struct perf_event_mmap_page *user_page; |
| 560 | void *data_pages[0]; |
| 561 | }; |
| 562 | |
| 563 | struct perf_pending_entry { |
| 564 | struct perf_pending_entry *next; |
| 565 | void (*func)(struct perf_pending_entry *); |
| 566 | }; |
| 567 | |
| 568 | struct perf_sample_data; |
| 569 | |
| 570 | typedef void (*perf_overflow_handler_t)(struct perf_event *, int, |
| 571 | struct perf_sample_data *, |
| 572 | struct pt_regs *regs); |
| 573 | |
| 574 | /** |
| 575 | * struct perf_event - performance event kernel representation: |
| 576 | */ |
| 577 | struct perf_event { |
| 578 | #ifdef CONFIG_PERF_EVENTS |
| 579 | struct list_head group_entry; |
| 580 | struct list_head event_entry; |
| 581 | struct list_head sibling_list; |
| 582 | int nr_siblings; |
| 583 | struct perf_event *group_leader; |
| 584 | struct perf_event *output; |
| 585 | const struct pmu *pmu; |
| 586 | |
| 587 | enum perf_event_active_state state; |
| 588 | atomic64_t count; |
| 589 | |
| 590 | /* |
| 591 | * These are the total time in nanoseconds that the event |
| 592 | * has been enabled (i.e. eligible to run, and the task has |
| 593 | * been scheduled in, if this is a per-task event) |
| 594 | * and running (scheduled onto the CPU), respectively. |
| 595 | * |
| 596 | * They are computed from tstamp_enabled, tstamp_running and |
| 597 | * tstamp_stopped when the event is in INACTIVE or ACTIVE state. |
| 598 | */ |
| 599 | u64 total_time_enabled; |
| 600 | u64 total_time_running; |
| 601 | |
| 602 | /* |
| 603 | * These are timestamps used for computing total_time_enabled |
| 604 | * and total_time_running when the event is in INACTIVE or |
| 605 | * ACTIVE state, measured in nanoseconds from an arbitrary point |
| 606 | * in time. |
| 607 | * tstamp_enabled: the notional time when the event was enabled |
| 608 | * tstamp_running: the notional time when the event was scheduled on |
| 609 | * tstamp_stopped: in INACTIVE state, the notional time when the |
| 610 | * event was scheduled off. |
| 611 | */ |
| 612 | u64 tstamp_enabled; |
| 613 | u64 tstamp_running; |
| 614 | u64 tstamp_stopped; |
| 615 | |
| 616 | struct perf_event_attr attr; |
| 617 | struct hw_perf_event hw; |
| 618 | |
| 619 | struct perf_event_context *ctx; |
| 620 | struct file *filp; |
| 621 | |
| 622 | /* |
| 623 | * These accumulate total time (in nanoseconds) that children |
| 624 | * events have been enabled and running, respectively. |
| 625 | */ |
| 626 | atomic64_t child_total_time_enabled; |
| 627 | atomic64_t child_total_time_running; |
| 628 | |
| 629 | /* |
| 630 | * Protect attach/detach and child_list: |
| 631 | */ |
| 632 | struct mutex child_mutex; |
| 633 | struct list_head child_list; |
| 634 | struct perf_event *parent; |
| 635 | |
| 636 | int oncpu; |
| 637 | int cpu; |
| 638 | |
| 639 | struct list_head owner_entry; |
| 640 | struct task_struct *owner; |
| 641 | |
| 642 | /* mmap bits */ |
| 643 | struct mutex mmap_mutex; |
| 644 | atomic_t mmap_count; |
| 645 | struct perf_mmap_data *data; |
| 646 | |
| 647 | /* poll related */ |
| 648 | wait_queue_head_t waitq; |
| 649 | struct fasync_struct *fasync; |
| 650 | |
| 651 | /* delayed work for NMIs and such */ |
| 652 | int pending_wakeup; |
| 653 | int pending_kill; |
| 654 | int pending_disable; |
| 655 | struct perf_pending_entry pending; |
| 656 | |
| 657 | atomic_t event_limit; |
| 658 | |
| 659 | void (*destroy)(struct perf_event *); |
| 660 | struct rcu_head rcu_head; |
| 661 | |
| 662 | struct pid_namespace *ns; |
| 663 | u64 id; |
| 664 | |
| 665 | perf_overflow_handler_t overflow_handler; |
| 666 | |
| 667 | #ifdef CONFIG_EVENT_PROFILE |
| 668 | struct event_filter *filter; |
| 669 | #endif |
| 670 | |
| 671 | #endif /* CONFIG_PERF_EVENTS */ |
| 672 | }; |
| 673 | |
| 674 | /** |
| 675 | * struct perf_event_context - event context structure |
| 676 | * |
| 677 | * Used as a container for task events and CPU events as well: |
| 678 | */ |
| 679 | struct perf_event_context { |
| 680 | /* |
| 681 | * Protect the states of the events in the list, |
| 682 | * nr_active, and the list: |
| 683 | */ |
| 684 | spinlock_t lock; |
| 685 | /* |
| 686 | * Protect the list of events. Locking either mutex or lock |
| 687 | * is sufficient to ensure the list doesn't change; to change |
| 688 | * the list you need to lock both the mutex and the spinlock. |
| 689 | */ |
| 690 | struct mutex mutex; |
| 691 | |
| 692 | struct list_head group_list; |
| 693 | struct list_head event_list; |
| 694 | int nr_events; |
| 695 | int nr_active; |
| 696 | int is_active; |
| 697 | int nr_stat; |
| 698 | atomic_t refcount; |
| 699 | struct task_struct *task; |
| 700 | |
| 701 | /* |
| 702 | * Context clock, runs when context enabled. |
| 703 | */ |
| 704 | u64 time; |
| 705 | u64 timestamp; |
| 706 | |
| 707 | /* |
| 708 | * These fields let us detect when two contexts have both |
| 709 | * been cloned (inherited) from a common ancestor. |
| 710 | */ |
| 711 | struct perf_event_context *parent_ctx; |
| 712 | u64 parent_gen; |
| 713 | u64 generation; |
| 714 | int pin_count; |
| 715 | struct rcu_head rcu_head; |
| 716 | }; |
| 717 | |
| 718 | /** |
| 719 | * struct perf_event_cpu_context - per cpu event context structure |
| 720 | */ |
| 721 | struct perf_cpu_context { |
| 722 | struct perf_event_context ctx; |
| 723 | struct perf_event_context *task_ctx; |
| 724 | int active_oncpu; |
| 725 | int max_pertask; |
| 726 | int exclusive; |
| 727 | |
| 728 | /* |
| 729 | * Recursion avoidance: |
| 730 | * |
| 731 | * task, softirq, irq, nmi context |
| 732 | */ |
| 733 | int recursion[4]; |
| 734 | }; |
| 735 | |
| 736 | struct perf_output_handle { |
| 737 | struct perf_event *event; |
| 738 | struct perf_mmap_data *data; |
| 739 | unsigned long head; |
| 740 | unsigned long offset; |
| 741 | int nmi; |
| 742 | int sample; |
| 743 | int locked; |
| 744 | }; |
| 745 | |
| 746 | #ifdef CONFIG_PERF_EVENTS |
| 747 | |
| 748 | /* |
| 749 | * Set by architecture code: |
| 750 | */ |
| 751 | extern int perf_max_events; |
| 752 | |
| 753 | extern const struct pmu *hw_perf_event_init(struct perf_event *event); |
| 754 | |
| 755 | extern void perf_event_task_sched_in(struct task_struct *task, int cpu); |
| 756 | extern void perf_event_task_sched_out(struct task_struct *task, |
| 757 | struct task_struct *next, int cpu); |
| 758 | extern void perf_event_task_tick(struct task_struct *task, int cpu); |
| 759 | extern int perf_event_init_task(struct task_struct *child); |
| 760 | extern void perf_event_exit_task(struct task_struct *child); |
| 761 | extern void perf_event_free_task(struct task_struct *task); |
| 762 | extern void set_perf_event_pending(void); |
| 763 | extern void perf_event_do_pending(void); |
| 764 | extern void perf_event_print_debug(void); |
| 765 | extern void __perf_disable(void); |
| 766 | extern bool __perf_enable(void); |
| 767 | extern void perf_disable(void); |
| 768 | extern void perf_enable(void); |
| 769 | extern int perf_event_task_disable(void); |
| 770 | extern int perf_event_task_enable(void); |
| 771 | extern int hw_perf_group_sched_in(struct perf_event *group_leader, |
| 772 | struct perf_cpu_context *cpuctx, |
| 773 | struct perf_event_context *ctx, int cpu); |
| 774 | extern void perf_event_update_userpage(struct perf_event *event); |
| 775 | extern int perf_event_release_kernel(struct perf_event *event); |
| 776 | extern struct perf_event * |
| 777 | perf_event_create_kernel_counter(struct perf_event_attr *attr, |
| 778 | int cpu, |
| 779 | pid_t pid, |
| 780 | perf_overflow_handler_t callback); |
| 781 | extern u64 perf_event_read_value(struct perf_event *event, |
| 782 | u64 *enabled, u64 *running); |
| 783 | |
| 784 | struct perf_sample_data { |
| 785 | u64 type; |
| 786 | |
| 787 | u64 ip; |
| 788 | struct { |
| 789 | u32 pid; |
| 790 | u32 tid; |
| 791 | } tid_entry; |
| 792 | u64 time; |
| 793 | u64 addr; |
| 794 | u64 id; |
| 795 | u64 stream_id; |
| 796 | struct { |
| 797 | u32 cpu; |
| 798 | u32 reserved; |
| 799 | } cpu_entry; |
| 800 | u64 period; |
| 801 | struct perf_callchain_entry *callchain; |
| 802 | struct perf_raw_record *raw; |
| 803 | }; |
| 804 | |
| 805 | extern void perf_output_sample(struct perf_output_handle *handle, |
| 806 | struct perf_event_header *header, |
| 807 | struct perf_sample_data *data, |
| 808 | struct perf_event *event); |
| 809 | extern void perf_prepare_sample(struct perf_event_header *header, |
| 810 | struct perf_sample_data *data, |
| 811 | struct perf_event *event, |
| 812 | struct pt_regs *regs); |
| 813 | |
| 814 | extern int perf_event_overflow(struct perf_event *event, int nmi, |
| 815 | struct perf_sample_data *data, |
| 816 | struct pt_regs *regs); |
| 817 | |
| 818 | /* |
| 819 | * Return 1 for a software event, 0 for a hardware event |
| 820 | */ |
| 821 | static inline int is_software_event(struct perf_event *event) |
| 822 | { |
| 823 | return (event->attr.type != PERF_TYPE_RAW) && |
| 824 | (event->attr.type != PERF_TYPE_HARDWARE) && |
| 825 | (event->attr.type != PERF_TYPE_HW_CACHE); |
| 826 | } |
| 827 | |
| 828 | extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
| 829 | |
| 830 | extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); |
| 831 | |
| 832 | static inline void |
| 833 | perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) |
| 834 | { |
| 835 | if (atomic_read(&perf_swevent_enabled[event_id])) |
| 836 | __perf_sw_event(event_id, nr, nmi, regs, addr); |
| 837 | } |
| 838 | |
| 839 | extern void __perf_event_mmap(struct vm_area_struct *vma); |
| 840 | |
| 841 | static inline void perf_event_mmap(struct vm_area_struct *vma) |
| 842 | { |
| 843 | if (vma->vm_flags & VM_EXEC) |
| 844 | __perf_event_mmap(vma); |
| 845 | } |
| 846 | |
| 847 | extern void perf_event_comm(struct task_struct *tsk); |
| 848 | extern void perf_event_fork(struct task_struct *tsk); |
| 849 | |
| 850 | extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); |
| 851 | |
| 852 | extern int sysctl_perf_event_paranoid; |
| 853 | extern int sysctl_perf_event_mlock; |
| 854 | extern int sysctl_perf_event_sample_rate; |
| 855 | |
| 856 | extern void perf_event_init(void); |
| 857 | extern void perf_tp_event(int event_id, u64 addr, u64 count, |
| 858 | void *record, int entry_size); |
| 859 | extern void perf_bp_event(struct perf_event *event, void *data); |
| 860 | |
| 861 | #ifndef perf_misc_flags |
| 862 | #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ |
| 863 | PERF_RECORD_MISC_KERNEL) |
| 864 | #define perf_instruction_pointer(regs) instruction_pointer(regs) |
| 865 | #endif |
| 866 | |
| 867 | extern int perf_output_begin(struct perf_output_handle *handle, |
| 868 | struct perf_event *event, unsigned int size, |
| 869 | int nmi, int sample); |
| 870 | extern void perf_output_end(struct perf_output_handle *handle); |
| 871 | extern void perf_output_copy(struct perf_output_handle *handle, |
| 872 | const void *buf, unsigned int len); |
| 873 | extern int perf_swevent_get_recursion_context(void); |
| 874 | extern void perf_swevent_put_recursion_context(int rctx); |
| 875 | #else |
| 876 | static inline void |
| 877 | perf_event_task_sched_in(struct task_struct *task, int cpu) { } |
| 878 | static inline void |
| 879 | perf_event_task_sched_out(struct task_struct *task, |
| 880 | struct task_struct *next, int cpu) { } |
| 881 | static inline void |
| 882 | perf_event_task_tick(struct task_struct *task, int cpu) { } |
| 883 | static inline int perf_event_init_task(struct task_struct *child) { return 0; } |
| 884 | static inline void perf_event_exit_task(struct task_struct *child) { } |
| 885 | static inline void perf_event_free_task(struct task_struct *task) { } |
| 886 | static inline void perf_event_do_pending(void) { } |
| 887 | static inline void perf_event_print_debug(void) { } |
| 888 | static inline void perf_disable(void) { } |
| 889 | static inline void perf_enable(void) { } |
| 890 | static inline int perf_event_task_disable(void) { return -EINVAL; } |
| 891 | static inline int perf_event_task_enable(void) { return -EINVAL; } |
| 892 | |
| 893 | static inline void |
| 894 | perf_sw_event(u32 event_id, u64 nr, int nmi, |
| 895 | struct pt_regs *regs, u64 addr) { } |
| 896 | static inline void |
| 897 | perf_bp_event(struct perf_event *event, void *data) { } |
| 898 | |
| 899 | static inline void perf_event_mmap(struct vm_area_struct *vma) { } |
| 900 | static inline void perf_event_comm(struct task_struct *tsk) { } |
| 901 | static inline void perf_event_fork(struct task_struct *tsk) { } |
| 902 | static inline void perf_event_init(void) { } |
| 903 | static inline int perf_swevent_get_recursion_context(void) { return -1; } |
| 904 | static inline void perf_swevent_put_recursion_context(int rctx) { } |
| 905 | |
| 906 | #endif |
| 907 | |
| 908 | #define perf_output_put(handle, x) \ |
| 909 | perf_output_copy((handle), &(x), sizeof(x)) |
| 910 | |
| 911 | #endif /* __KERNEL__ */ |
| 912 | #endif /* _LINUX_PERF_EVENT_H */ |