| 1 | /* |
| 2 | * Performance events: |
| 3 | * |
| 4 | * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> |
| 5 | * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar |
| 6 | * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra |
| 7 | * |
| 8 | * Data type definitions, declarations, prototypes. |
| 9 | * |
| 10 | * Started by: Thomas Gleixner and Ingo Molnar |
| 11 | * |
| 12 | * For licencing details see kernel-base/COPYING |
| 13 | */ |
| 14 | #ifndef _LINUX_PERF_EVENT_H |
| 15 | #define _LINUX_PERF_EVENT_H |
| 16 | |
| 17 | #include <linux/types.h> |
| 18 | #include <linux/ioctl.h> |
| 19 | #include <asm/byteorder.h> |
| 20 | |
| 21 | /* |
| 22 | * User-space ABI bits: |
| 23 | */ |
| 24 | |
| 25 | /* |
| 26 | * attr.type |
| 27 | */ |
| 28 | enum perf_type_id { |
| 29 | PERF_TYPE_HARDWARE = 0, |
| 30 | PERF_TYPE_SOFTWARE = 1, |
| 31 | PERF_TYPE_TRACEPOINT = 2, |
| 32 | PERF_TYPE_HW_CACHE = 3, |
| 33 | PERF_TYPE_RAW = 4, |
| 34 | PERF_TYPE_BREAKPOINT = 5, |
| 35 | |
| 36 | PERF_TYPE_MAX, /* non-ABI */ |
| 37 | }; |
| 38 | |
| 39 | /* |
| 40 | * Generalized performance event event_id types, used by the |
| 41 | * attr.event_id parameter of the sys_perf_event_open() |
| 42 | * syscall: |
| 43 | */ |
| 44 | enum perf_hw_id { |
| 45 | /* |
| 46 | * Common hardware events, generalized by the kernel: |
| 47 | */ |
| 48 | PERF_COUNT_HW_CPU_CYCLES = 0, |
| 49 | PERF_COUNT_HW_INSTRUCTIONS = 1, |
| 50 | PERF_COUNT_HW_CACHE_REFERENCES = 2, |
| 51 | PERF_COUNT_HW_CACHE_MISSES = 3, |
| 52 | PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, |
| 53 | PERF_COUNT_HW_BRANCH_MISSES = 5, |
| 54 | PERF_COUNT_HW_BUS_CYCLES = 6, |
| 55 | |
| 56 | PERF_COUNT_HW_MAX, /* non-ABI */ |
| 57 | }; |
| 58 | |
| 59 | /* |
| 60 | * Generalized hardware cache events: |
| 61 | * |
| 62 | * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x |
| 63 | * { read, write, prefetch } x |
| 64 | * { accesses, misses } |
| 65 | */ |
| 66 | enum perf_hw_cache_id { |
| 67 | PERF_COUNT_HW_CACHE_L1D = 0, |
| 68 | PERF_COUNT_HW_CACHE_L1I = 1, |
| 69 | PERF_COUNT_HW_CACHE_LL = 2, |
| 70 | PERF_COUNT_HW_CACHE_DTLB = 3, |
| 71 | PERF_COUNT_HW_CACHE_ITLB = 4, |
| 72 | PERF_COUNT_HW_CACHE_BPU = 5, |
| 73 | |
| 74 | PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ |
| 75 | }; |
| 76 | |
| 77 | enum perf_hw_cache_op_id { |
| 78 | PERF_COUNT_HW_CACHE_OP_READ = 0, |
| 79 | PERF_COUNT_HW_CACHE_OP_WRITE = 1, |
| 80 | PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, |
| 81 | |
| 82 | PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ |
| 83 | }; |
| 84 | |
| 85 | enum perf_hw_cache_op_result_id { |
| 86 | PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, |
| 87 | PERF_COUNT_HW_CACHE_RESULT_MISS = 1, |
| 88 | |
| 89 | PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ |
| 90 | }; |
| 91 | |
| 92 | /* |
| 93 | * Special "software" events provided by the kernel, even if the hardware |
| 94 | * does not support performance events. These events measure various |
| 95 | * physical and sw events of the kernel (and allow the profiling of them as |
| 96 | * well): |
| 97 | */ |
| 98 | enum perf_sw_ids { |
| 99 | PERF_COUNT_SW_CPU_CLOCK = 0, |
| 100 | PERF_COUNT_SW_TASK_CLOCK = 1, |
| 101 | PERF_COUNT_SW_PAGE_FAULTS = 2, |
| 102 | PERF_COUNT_SW_CONTEXT_SWITCHES = 3, |
| 103 | PERF_COUNT_SW_CPU_MIGRATIONS = 4, |
| 104 | PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, |
| 105 | PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, |
| 106 | PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, |
| 107 | PERF_COUNT_SW_EMULATION_FAULTS = 8, |
| 108 | |
| 109 | PERF_COUNT_SW_MAX, /* non-ABI */ |
| 110 | }; |
| 111 | |
| 112 | /* |
| 113 | * Bits that can be set in attr.sample_type to request information |
| 114 | * in the overflow packets. |
| 115 | */ |
| 116 | enum perf_event_sample_format { |
| 117 | PERF_SAMPLE_IP = 1U << 0, |
| 118 | PERF_SAMPLE_TID = 1U << 1, |
| 119 | PERF_SAMPLE_TIME = 1U << 2, |
| 120 | PERF_SAMPLE_ADDR = 1U << 3, |
| 121 | PERF_SAMPLE_READ = 1U << 4, |
| 122 | PERF_SAMPLE_CALLCHAIN = 1U << 5, |
| 123 | PERF_SAMPLE_ID = 1U << 6, |
| 124 | PERF_SAMPLE_CPU = 1U << 7, |
| 125 | PERF_SAMPLE_PERIOD = 1U << 8, |
| 126 | PERF_SAMPLE_STREAM_ID = 1U << 9, |
| 127 | PERF_SAMPLE_RAW = 1U << 10, |
| 128 | |
| 129 | PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ |
| 130 | }; |
| 131 | |
| 132 | /* |
| 133 | * The format of the data returned by read() on a perf event fd, |
| 134 | * as specified by attr.read_format: |
| 135 | * |
| 136 | * struct read_format { |
| 137 | * { u64 value; |
| 138 | * { u64 time_enabled; } && PERF_FORMAT_ENABLED |
| 139 | * { u64 time_running; } && PERF_FORMAT_RUNNING |
| 140 | * { u64 id; } && PERF_FORMAT_ID |
| 141 | * } && !PERF_FORMAT_GROUP |
| 142 | * |
| 143 | * { u64 nr; |
| 144 | * { u64 time_enabled; } && PERF_FORMAT_ENABLED |
| 145 | * { u64 time_running; } && PERF_FORMAT_RUNNING |
| 146 | * { u64 value; |
| 147 | * { u64 id; } && PERF_FORMAT_ID |
| 148 | * } cntr[nr]; |
| 149 | * } && PERF_FORMAT_GROUP |
| 150 | * }; |
| 151 | */ |
| 152 | enum perf_event_read_format { |
| 153 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, |
| 154 | PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, |
| 155 | PERF_FORMAT_ID = 1U << 2, |
| 156 | PERF_FORMAT_GROUP = 1U << 3, |
| 157 | |
| 158 | PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ |
| 159 | }; |
| 160 | |
| 161 | #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ |
| 162 | |
| 163 | /* |
| 164 | * Hardware event_id to monitor via a performance monitoring event: |
| 165 | */ |
| 166 | struct perf_event_attr { |
| 167 | |
| 168 | /* |
| 169 | * Major type: hardware/software/tracepoint/etc. |
| 170 | */ |
| 171 | __u32 type; |
| 172 | |
| 173 | /* |
| 174 | * Size of the attr structure, for fwd/bwd compat. |
| 175 | */ |
| 176 | __u32 size; |
| 177 | |
| 178 | /* |
| 179 | * Type specific configuration information. |
| 180 | */ |
| 181 | __u64 config; |
| 182 | |
| 183 | union { |
| 184 | __u64 sample_period; |
| 185 | __u64 sample_freq; |
| 186 | }; |
| 187 | |
| 188 | __u64 sample_type; |
| 189 | __u64 read_format; |
| 190 | |
| 191 | __u64 disabled : 1, /* off by default */ |
| 192 | inherit : 1, /* children inherit it */ |
| 193 | pinned : 1, /* must always be on PMU */ |
| 194 | exclusive : 1, /* only group on PMU */ |
| 195 | exclude_user : 1, /* don't count user */ |
| 196 | exclude_kernel : 1, /* ditto kernel */ |
| 197 | exclude_hv : 1, /* ditto hypervisor */ |
| 198 | exclude_idle : 1, /* don't count when idle */ |
| 199 | mmap : 1, /* include mmap data */ |
| 200 | comm : 1, /* include comm data */ |
| 201 | freq : 1, /* use freq, not period */ |
| 202 | inherit_stat : 1, /* per task counts */ |
| 203 | enable_on_exec : 1, /* next exec enables */ |
| 204 | task : 1, /* trace fork/exit */ |
| 205 | watermark : 1, /* wakeup_watermark */ |
| 206 | /* |
| 207 | * precise_ip: |
| 208 | * |
| 209 | * 0 - SAMPLE_IP can have arbitrary skid |
| 210 | * 1 - SAMPLE_IP must have constant skid |
| 211 | * 2 - SAMPLE_IP requested to have 0 skid |
| 212 | * 3 - SAMPLE_IP must have 0 skid |
| 213 | * |
| 214 | * See also PERF_RECORD_MISC_EXACT_IP |
| 215 | */ |
| 216 | precise_ip : 2, /* skid constraint */ |
| 217 | |
| 218 | __reserved_1 : 47; |
| 219 | |
| 220 | union { |
| 221 | __u32 wakeup_events; /* wakeup every n events */ |
| 222 | __u32 wakeup_watermark; /* bytes before wakeup */ |
| 223 | }; |
| 224 | |
| 225 | __u32 bp_type; |
| 226 | __u64 bp_addr; |
| 227 | __u64 bp_len; |
| 228 | }; |
| 229 | |
| 230 | /* |
| 231 | * Ioctls that can be done on a perf event fd: |
| 232 | */ |
| 233 | #define PERF_EVENT_IOC_ENABLE _IO ('$', 0) |
| 234 | #define PERF_EVENT_IOC_DISABLE _IO ('$', 1) |
| 235 | #define PERF_EVENT_IOC_REFRESH _IO ('$', 2) |
| 236 | #define PERF_EVENT_IOC_RESET _IO ('$', 3) |
| 237 | #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) |
| 238 | #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) |
| 239 | #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) |
| 240 | |
| 241 | enum perf_event_ioc_flags { |
| 242 | PERF_IOC_FLAG_GROUP = 1U << 0, |
| 243 | }; |
| 244 | |
| 245 | /* |
| 246 | * Structure of the page that can be mapped via mmap |
| 247 | */ |
| 248 | struct perf_event_mmap_page { |
| 249 | __u32 version; /* version number of this structure */ |
| 250 | __u32 compat_version; /* lowest version this is compat with */ |
| 251 | |
| 252 | /* |
| 253 | * Bits needed to read the hw events in user-space. |
| 254 | * |
| 255 | * u32 seq; |
| 256 | * s64 count; |
| 257 | * |
| 258 | * do { |
| 259 | * seq = pc->lock; |
| 260 | * |
| 261 | * barrier() |
| 262 | * if (pc->index) { |
| 263 | * count = pmc_read(pc->index - 1); |
| 264 | * count += pc->offset; |
| 265 | * } else |
| 266 | * goto regular_read; |
| 267 | * |
| 268 | * barrier(); |
| 269 | * } while (pc->lock != seq); |
| 270 | * |
| 271 | * NOTE: for obvious reason this only works on self-monitoring |
| 272 | * processes. |
| 273 | */ |
| 274 | __u32 lock; /* seqlock for synchronization */ |
| 275 | __u32 index; /* hardware event identifier */ |
| 276 | __s64 offset; /* add to hardware event value */ |
| 277 | __u64 time_enabled; /* time event active */ |
| 278 | __u64 time_running; /* time event on cpu */ |
| 279 | |
| 280 | /* |
| 281 | * Hole for extension of the self monitor capabilities |
| 282 | */ |
| 283 | |
| 284 | __u64 __reserved[123]; /* align to 1k */ |
| 285 | |
| 286 | /* |
| 287 | * Control data for the mmap() data buffer. |
| 288 | * |
| 289 | * User-space reading the @data_head value should issue an rmb(), on |
| 290 | * SMP capable platforms, after reading this value -- see |
| 291 | * perf_event_wakeup(). |
| 292 | * |
| 293 | * When the mapping is PROT_WRITE the @data_tail value should be |
| 294 | * written by userspace to reflect the last read data. In this case |
| 295 | * the kernel will not over-write unread data. |
| 296 | */ |
| 297 | __u64 data_head; /* head in the data section */ |
| 298 | __u64 data_tail; /* user-space written tail */ |
| 299 | }; |
| 300 | |
| 301 | #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) |
| 302 | #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) |
| 303 | #define PERF_RECORD_MISC_KERNEL (1 << 0) |
| 304 | #define PERF_RECORD_MISC_USER (2 << 0) |
| 305 | #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) |
| 306 | #define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) |
| 307 | #define PERF_RECORD_MISC_GUEST_USER (5 << 0) |
| 308 | |
| 309 | /* |
| 310 | * Indicates that the content of PERF_SAMPLE_IP points to |
| 311 | * the actual instruction that triggered the event. See also |
| 312 | * perf_event_attr::precise_ip. |
| 313 | */ |
| 314 | #define PERF_RECORD_MISC_EXACT_IP (1 << 14) |
| 315 | /* |
| 316 | * Reserve the last bit to indicate some extended misc field |
| 317 | */ |
| 318 | #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) |
| 319 | |
| 320 | struct perf_event_header { |
| 321 | __u32 type; |
| 322 | __u16 misc; |
| 323 | __u16 size; |
| 324 | }; |
| 325 | |
| 326 | enum perf_event_type { |
| 327 | |
| 328 | /* |
| 329 | * The MMAP events record the PROT_EXEC mappings so that we can |
| 330 | * correlate userspace IPs to code. They have the following structure: |
| 331 | * |
| 332 | * struct { |
| 333 | * struct perf_event_header header; |
| 334 | * |
| 335 | * u32 pid, tid; |
| 336 | * u64 addr; |
| 337 | * u64 len; |
| 338 | * u64 pgoff; |
| 339 | * char filename[]; |
| 340 | * }; |
| 341 | */ |
| 342 | PERF_RECORD_MMAP = 1, |
| 343 | |
| 344 | /* |
| 345 | * struct { |
| 346 | * struct perf_event_header header; |
| 347 | * u64 id; |
| 348 | * u64 lost; |
| 349 | * }; |
| 350 | */ |
| 351 | PERF_RECORD_LOST = 2, |
| 352 | |
| 353 | /* |
| 354 | * struct { |
| 355 | * struct perf_event_header header; |
| 356 | * |
| 357 | * u32 pid, tid; |
| 358 | * char comm[]; |
| 359 | * }; |
| 360 | */ |
| 361 | PERF_RECORD_COMM = 3, |
| 362 | |
| 363 | /* |
| 364 | * struct { |
| 365 | * struct perf_event_header header; |
| 366 | * u32 pid, ppid; |
| 367 | * u32 tid, ptid; |
| 368 | * u64 time; |
| 369 | * }; |
| 370 | */ |
| 371 | PERF_RECORD_EXIT = 4, |
| 372 | |
| 373 | /* |
| 374 | * struct { |
| 375 | * struct perf_event_header header; |
| 376 | * u64 time; |
| 377 | * u64 id; |
| 378 | * u64 stream_id; |
| 379 | * }; |
| 380 | */ |
| 381 | PERF_RECORD_THROTTLE = 5, |
| 382 | PERF_RECORD_UNTHROTTLE = 6, |
| 383 | |
| 384 | /* |
| 385 | * struct { |
| 386 | * struct perf_event_header header; |
| 387 | * u32 pid, ppid; |
| 388 | * u32 tid, ptid; |
| 389 | * u64 time; |
| 390 | * }; |
| 391 | */ |
| 392 | PERF_RECORD_FORK = 7, |
| 393 | |
| 394 | /* |
| 395 | * struct { |
| 396 | * struct perf_event_header header; |
| 397 | * u32 pid, tid; |
| 398 | * |
| 399 | * struct read_format values; |
| 400 | * }; |
| 401 | */ |
| 402 | PERF_RECORD_READ = 8, |
| 403 | |
| 404 | /* |
| 405 | * struct { |
| 406 | * struct perf_event_header header; |
| 407 | * |
| 408 | * { u64 ip; } && PERF_SAMPLE_IP |
| 409 | * { u32 pid, tid; } && PERF_SAMPLE_TID |
| 410 | * { u64 time; } && PERF_SAMPLE_TIME |
| 411 | * { u64 addr; } && PERF_SAMPLE_ADDR |
| 412 | * { u64 id; } && PERF_SAMPLE_ID |
| 413 | * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID |
| 414 | * { u32 cpu, res; } && PERF_SAMPLE_CPU |
| 415 | * { u64 period; } && PERF_SAMPLE_PERIOD |
| 416 | * |
| 417 | * { struct read_format values; } && PERF_SAMPLE_READ |
| 418 | * |
| 419 | * { u64 nr, |
| 420 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN |
| 421 | * |
| 422 | * # |
| 423 | * # The RAW record below is opaque data wrt the ABI |
| 424 | * # |
| 425 | * # That is, the ABI doesn't make any promises wrt to |
| 426 | * # the stability of its content, it may vary depending |
| 427 | * # on event, hardware, kernel version and phase of |
| 428 | * # the moon. |
| 429 | * # |
| 430 | * # In other words, PERF_SAMPLE_RAW contents are not an ABI. |
| 431 | * # |
| 432 | * |
| 433 | * { u32 size; |
| 434 | * char data[size];}&& PERF_SAMPLE_RAW |
| 435 | * }; |
| 436 | */ |
| 437 | PERF_RECORD_SAMPLE = 9, |
| 438 | |
| 439 | PERF_RECORD_MAX, /* non-ABI */ |
| 440 | }; |
| 441 | |
| 442 | enum perf_callchain_context { |
| 443 | PERF_CONTEXT_HV = (__u64)-32, |
| 444 | PERF_CONTEXT_KERNEL = (__u64)-128, |
| 445 | PERF_CONTEXT_USER = (__u64)-512, |
| 446 | |
| 447 | PERF_CONTEXT_GUEST = (__u64)-2048, |
| 448 | PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, |
| 449 | PERF_CONTEXT_GUEST_USER = (__u64)-2560, |
| 450 | |
| 451 | PERF_CONTEXT_MAX = (__u64)-4095, |
| 452 | }; |
| 453 | |
| 454 | #define PERF_FLAG_FD_NO_GROUP (1U << 0) |
| 455 | #define PERF_FLAG_FD_OUTPUT (1U << 1) |
| 456 | |
| 457 | #ifdef __KERNEL__ |
| 458 | /* |
| 459 | * Kernel-internal data types and definitions: |
| 460 | */ |
| 461 | |
| 462 | #ifdef CONFIG_PERF_EVENTS |
| 463 | # include <asm/perf_event.h> |
| 464 | #endif |
| 465 | |
| 466 | struct perf_guest_info_callbacks { |
| 467 | int (*is_in_guest) (void); |
| 468 | int (*is_user_mode) (void); |
| 469 | unsigned long (*get_guest_ip) (void); |
| 470 | }; |
| 471 | |
| 472 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
| 473 | #include <asm/hw_breakpoint.h> |
| 474 | #endif |
| 475 | |
| 476 | #include <linux/list.h> |
| 477 | #include <linux/mutex.h> |
| 478 | #include <linux/rculist.h> |
| 479 | #include <linux/rcupdate.h> |
| 480 | #include <linux/spinlock.h> |
| 481 | #include <linux/hrtimer.h> |
| 482 | #include <linux/fs.h> |
| 483 | #include <linux/pid_namespace.h> |
| 484 | #include <linux/workqueue.h> |
| 485 | #include <linux/ftrace.h> |
| 486 | #include <linux/cpu.h> |
| 487 | #include <asm/atomic.h> |
| 488 | #include <asm/local.h> |
| 489 | |
| 490 | #define PERF_MAX_STACK_DEPTH 255 |
| 491 | |
| 492 | struct perf_callchain_entry { |
| 493 | __u64 nr; |
| 494 | __u64 ip[PERF_MAX_STACK_DEPTH]; |
| 495 | }; |
| 496 | |
| 497 | struct perf_raw_record { |
| 498 | u32 size; |
| 499 | void *data; |
| 500 | }; |
| 501 | |
| 502 | struct perf_branch_entry { |
| 503 | __u64 from; |
| 504 | __u64 to; |
| 505 | __u64 flags; |
| 506 | }; |
| 507 | |
| 508 | struct perf_branch_stack { |
| 509 | __u64 nr; |
| 510 | struct perf_branch_entry entries[0]; |
| 511 | }; |
| 512 | |
| 513 | struct task_struct; |
| 514 | |
| 515 | /** |
| 516 | * struct hw_perf_event - performance event hardware details: |
| 517 | */ |
| 518 | struct hw_perf_event { |
| 519 | #ifdef CONFIG_PERF_EVENTS |
| 520 | union { |
| 521 | struct { /* hardware */ |
| 522 | u64 config; |
| 523 | u64 last_tag; |
| 524 | unsigned long config_base; |
| 525 | unsigned long event_base; |
| 526 | int idx; |
| 527 | int last_cpu; |
| 528 | }; |
| 529 | struct { /* software */ |
| 530 | s64 remaining; |
| 531 | struct hrtimer hrtimer; |
| 532 | }; |
| 533 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
| 534 | /* breakpoint */ |
| 535 | struct arch_hw_breakpoint info; |
| 536 | #endif |
| 537 | }; |
| 538 | atomic64_t prev_count; |
| 539 | u64 sample_period; |
| 540 | u64 last_period; |
| 541 | atomic64_t period_left; |
| 542 | u64 interrupts; |
| 543 | |
| 544 | u64 freq_time_stamp; |
| 545 | u64 freq_count_stamp; |
| 546 | #endif |
| 547 | }; |
| 548 | |
| 549 | struct perf_event; |
| 550 | |
| 551 | #define PERF_EVENT_TXN_STARTED 1 |
| 552 | |
| 553 | /** |
| 554 | * struct pmu - generic performance monitoring unit |
| 555 | */ |
| 556 | struct pmu { |
| 557 | int (*enable) (struct perf_event *event); |
| 558 | void (*disable) (struct perf_event *event); |
| 559 | int (*start) (struct perf_event *event); |
| 560 | void (*stop) (struct perf_event *event); |
| 561 | void (*read) (struct perf_event *event); |
| 562 | void (*unthrottle) (struct perf_event *event); |
| 563 | |
| 564 | /* |
| 565 | * group events scheduling is treated as a transaction, |
| 566 | * add group events as a whole and perform one schedulability test. |
| 567 | * If test fails, roll back the whole group |
| 568 | */ |
| 569 | |
| 570 | void (*start_txn) (const struct pmu *pmu); |
| 571 | void (*cancel_txn) (const struct pmu *pmu); |
| 572 | int (*commit_txn) (const struct pmu *pmu); |
| 573 | }; |
| 574 | |
| 575 | /** |
| 576 | * enum perf_event_active_state - the states of a event |
| 577 | */ |
| 578 | enum perf_event_active_state { |
| 579 | PERF_EVENT_STATE_ERROR = -2, |
| 580 | PERF_EVENT_STATE_OFF = -1, |
| 581 | PERF_EVENT_STATE_INACTIVE = 0, |
| 582 | PERF_EVENT_STATE_ACTIVE = 1, |
| 583 | }; |
| 584 | |
| 585 | struct file; |
| 586 | |
| 587 | struct perf_mmap_data { |
| 588 | struct rcu_head rcu_head; |
| 589 | #ifdef CONFIG_PERF_USE_VMALLOC |
| 590 | struct work_struct work; |
| 591 | #endif |
| 592 | int data_order; /* allocation order */ |
| 593 | int nr_pages; /* nr of data pages */ |
| 594 | int writable; /* are we writable */ |
| 595 | int nr_locked; /* nr pages mlocked */ |
| 596 | |
| 597 | atomic_t poll; /* POLL_ for wakeups */ |
| 598 | |
| 599 | local_t head; /* write position */ |
| 600 | local_t nest; /* nested writers */ |
| 601 | local_t events; /* event limit */ |
| 602 | local_t wakeup; /* needs a wakeup */ |
| 603 | local_t lost; /* nr records lost */ |
| 604 | |
| 605 | long watermark; /* wakeup watermark */ |
| 606 | |
| 607 | struct perf_event_mmap_page *user_page; |
| 608 | void *data_pages[0]; |
| 609 | }; |
| 610 | |
| 611 | struct perf_pending_entry { |
| 612 | struct perf_pending_entry *next; |
| 613 | void (*func)(struct perf_pending_entry *); |
| 614 | }; |
| 615 | |
| 616 | struct perf_sample_data; |
| 617 | |
| 618 | typedef void (*perf_overflow_handler_t)(struct perf_event *, int, |
| 619 | struct perf_sample_data *, |
| 620 | struct pt_regs *regs); |
| 621 | |
| 622 | enum perf_group_flag { |
| 623 | PERF_GROUP_SOFTWARE = 0x1, |
| 624 | }; |
| 625 | |
| 626 | #define SWEVENT_HLIST_BITS 8 |
| 627 | #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS) |
| 628 | |
| 629 | struct swevent_hlist { |
| 630 | struct hlist_head heads[SWEVENT_HLIST_SIZE]; |
| 631 | struct rcu_head rcu_head; |
| 632 | }; |
| 633 | |
| 634 | /** |
| 635 | * struct perf_event - performance event kernel representation: |
| 636 | */ |
| 637 | struct perf_event { |
| 638 | #ifdef CONFIG_PERF_EVENTS |
| 639 | struct list_head group_entry; |
| 640 | struct list_head event_entry; |
| 641 | struct list_head sibling_list; |
| 642 | struct hlist_node hlist_entry; |
| 643 | int nr_siblings; |
| 644 | int group_flags; |
| 645 | struct perf_event *group_leader; |
| 646 | struct perf_event *output; |
| 647 | const struct pmu *pmu; |
| 648 | |
| 649 | enum perf_event_active_state state; |
| 650 | atomic64_t count; |
| 651 | |
| 652 | /* |
| 653 | * These are the total time in nanoseconds that the event |
| 654 | * has been enabled (i.e. eligible to run, and the task has |
| 655 | * been scheduled in, if this is a per-task event) |
| 656 | * and running (scheduled onto the CPU), respectively. |
| 657 | * |
| 658 | * They are computed from tstamp_enabled, tstamp_running and |
| 659 | * tstamp_stopped when the event is in INACTIVE or ACTIVE state. |
| 660 | */ |
| 661 | u64 total_time_enabled; |
| 662 | u64 total_time_running; |
| 663 | |
| 664 | /* |
| 665 | * These are timestamps used for computing total_time_enabled |
| 666 | * and total_time_running when the event is in INACTIVE or |
| 667 | * ACTIVE state, measured in nanoseconds from an arbitrary point |
| 668 | * in time. |
| 669 | * tstamp_enabled: the notional time when the event was enabled |
| 670 | * tstamp_running: the notional time when the event was scheduled on |
| 671 | * tstamp_stopped: in INACTIVE state, the notional time when the |
| 672 | * event was scheduled off. |
| 673 | */ |
| 674 | u64 tstamp_enabled; |
| 675 | u64 tstamp_running; |
| 676 | u64 tstamp_stopped; |
| 677 | |
| 678 | struct perf_event_attr attr; |
| 679 | struct hw_perf_event hw; |
| 680 | |
| 681 | struct perf_event_context *ctx; |
| 682 | struct file *filp; |
| 683 | |
| 684 | /* |
| 685 | * These accumulate total time (in nanoseconds) that children |
| 686 | * events have been enabled and running, respectively. |
| 687 | */ |
| 688 | atomic64_t child_total_time_enabled; |
| 689 | atomic64_t child_total_time_running; |
| 690 | |
| 691 | /* |
| 692 | * Protect attach/detach and child_list: |
| 693 | */ |
| 694 | struct mutex child_mutex; |
| 695 | struct list_head child_list; |
| 696 | struct perf_event *parent; |
| 697 | |
| 698 | int oncpu; |
| 699 | int cpu; |
| 700 | |
| 701 | struct list_head owner_entry; |
| 702 | struct task_struct *owner; |
| 703 | |
| 704 | /* mmap bits */ |
| 705 | struct mutex mmap_mutex; |
| 706 | atomic_t mmap_count; |
| 707 | struct perf_mmap_data *data; |
| 708 | |
| 709 | /* poll related */ |
| 710 | wait_queue_head_t waitq; |
| 711 | struct fasync_struct *fasync; |
| 712 | |
| 713 | /* delayed work for NMIs and such */ |
| 714 | int pending_wakeup; |
| 715 | int pending_kill; |
| 716 | int pending_disable; |
| 717 | struct perf_pending_entry pending; |
| 718 | |
| 719 | atomic_t event_limit; |
| 720 | |
| 721 | void (*destroy)(struct perf_event *); |
| 722 | struct rcu_head rcu_head; |
| 723 | |
| 724 | struct pid_namespace *ns; |
| 725 | u64 id; |
| 726 | |
| 727 | perf_overflow_handler_t overflow_handler; |
| 728 | |
| 729 | #ifdef CONFIG_EVENT_TRACING |
| 730 | struct event_filter *filter; |
| 731 | #endif |
| 732 | |
| 733 | #endif /* CONFIG_PERF_EVENTS */ |
| 734 | }; |
| 735 | |
| 736 | /** |
| 737 | * struct perf_event_context - event context structure |
| 738 | * |
| 739 | * Used as a container for task events and CPU events as well: |
| 740 | */ |
| 741 | struct perf_event_context { |
| 742 | /* |
| 743 | * Protect the states of the events in the list, |
| 744 | * nr_active, and the list: |
| 745 | */ |
| 746 | raw_spinlock_t lock; |
| 747 | /* |
| 748 | * Protect the list of events. Locking either mutex or lock |
| 749 | * is sufficient to ensure the list doesn't change; to change |
| 750 | * the list you need to lock both the mutex and the spinlock. |
| 751 | */ |
| 752 | struct mutex mutex; |
| 753 | |
| 754 | struct list_head pinned_groups; |
| 755 | struct list_head flexible_groups; |
| 756 | struct list_head event_list; |
| 757 | int nr_events; |
| 758 | int nr_active; |
| 759 | int is_active; |
| 760 | int nr_stat; |
| 761 | atomic_t refcount; |
| 762 | struct task_struct *task; |
| 763 | |
| 764 | /* |
| 765 | * Context clock, runs when context enabled. |
| 766 | */ |
| 767 | u64 time; |
| 768 | u64 timestamp; |
| 769 | |
| 770 | /* |
| 771 | * These fields let us detect when two contexts have both |
| 772 | * been cloned (inherited) from a common ancestor. |
| 773 | */ |
| 774 | struct perf_event_context *parent_ctx; |
| 775 | u64 parent_gen; |
| 776 | u64 generation; |
| 777 | int pin_count; |
| 778 | struct rcu_head rcu_head; |
| 779 | }; |
| 780 | |
| 781 | /** |
| 782 | * struct perf_event_cpu_context - per cpu event context structure |
| 783 | */ |
| 784 | struct perf_cpu_context { |
| 785 | struct perf_event_context ctx; |
| 786 | struct perf_event_context *task_ctx; |
| 787 | int active_oncpu; |
| 788 | int max_pertask; |
| 789 | int exclusive; |
| 790 | struct swevent_hlist *swevent_hlist; |
| 791 | struct mutex hlist_mutex; |
| 792 | int hlist_refcount; |
| 793 | |
| 794 | /* |
| 795 | * Recursion avoidance: |
| 796 | * |
| 797 | * task, softirq, irq, nmi context |
| 798 | */ |
| 799 | int recursion[4]; |
| 800 | }; |
| 801 | |
| 802 | struct perf_output_handle { |
| 803 | struct perf_event *event; |
| 804 | struct perf_mmap_data *data; |
| 805 | unsigned long head; |
| 806 | unsigned long offset; |
| 807 | int nmi; |
| 808 | int sample; |
| 809 | }; |
| 810 | |
| 811 | #ifdef CONFIG_PERF_EVENTS |
| 812 | |
| 813 | /* |
| 814 | * Set by architecture code: |
| 815 | */ |
| 816 | extern int perf_max_events; |
| 817 | |
| 818 | extern const struct pmu *hw_perf_event_init(struct perf_event *event); |
| 819 | |
| 820 | extern void perf_event_task_sched_in(struct task_struct *task); |
| 821 | extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); |
| 822 | extern void perf_event_task_tick(struct task_struct *task); |
| 823 | extern int perf_event_init_task(struct task_struct *child); |
| 824 | extern void perf_event_exit_task(struct task_struct *child); |
| 825 | extern void perf_event_free_task(struct task_struct *task); |
| 826 | extern void set_perf_event_pending(void); |
| 827 | extern void perf_event_do_pending(void); |
| 828 | extern void perf_event_print_debug(void); |
| 829 | extern void __perf_disable(void); |
| 830 | extern bool __perf_enable(void); |
| 831 | extern void perf_disable(void); |
| 832 | extern void perf_enable(void); |
| 833 | extern int perf_event_task_disable(void); |
| 834 | extern int perf_event_task_enable(void); |
| 835 | extern void perf_event_update_userpage(struct perf_event *event); |
| 836 | extern int perf_event_release_kernel(struct perf_event *event); |
| 837 | extern struct perf_event * |
| 838 | perf_event_create_kernel_counter(struct perf_event_attr *attr, |
| 839 | int cpu, |
| 840 | pid_t pid, |
| 841 | perf_overflow_handler_t callback); |
| 842 | extern u64 perf_event_read_value(struct perf_event *event, |
| 843 | u64 *enabled, u64 *running); |
| 844 | |
| 845 | struct perf_sample_data { |
| 846 | u64 type; |
| 847 | |
| 848 | u64 ip; |
| 849 | struct { |
| 850 | u32 pid; |
| 851 | u32 tid; |
| 852 | } tid_entry; |
| 853 | u64 time; |
| 854 | u64 addr; |
| 855 | u64 id; |
| 856 | u64 stream_id; |
| 857 | struct { |
| 858 | u32 cpu; |
| 859 | u32 reserved; |
| 860 | } cpu_entry; |
| 861 | u64 period; |
| 862 | struct perf_callchain_entry *callchain; |
| 863 | struct perf_raw_record *raw; |
| 864 | }; |
| 865 | |
| 866 | static inline |
| 867 | void perf_sample_data_init(struct perf_sample_data *data, u64 addr) |
| 868 | { |
| 869 | data->addr = addr; |
| 870 | data->raw = NULL; |
| 871 | } |
| 872 | |
| 873 | extern void perf_output_sample(struct perf_output_handle *handle, |
| 874 | struct perf_event_header *header, |
| 875 | struct perf_sample_data *data, |
| 876 | struct perf_event *event); |
| 877 | extern void perf_prepare_sample(struct perf_event_header *header, |
| 878 | struct perf_sample_data *data, |
| 879 | struct perf_event *event, |
| 880 | struct pt_regs *regs); |
| 881 | |
| 882 | extern int perf_event_overflow(struct perf_event *event, int nmi, |
| 883 | struct perf_sample_data *data, |
| 884 | struct pt_regs *regs); |
| 885 | |
| 886 | /* |
| 887 | * Return 1 for a software event, 0 for a hardware event |
| 888 | */ |
| 889 | static inline int is_software_event(struct perf_event *event) |
| 890 | { |
| 891 | switch (event->attr.type) { |
| 892 | case PERF_TYPE_SOFTWARE: |
| 893 | case PERF_TYPE_TRACEPOINT: |
| 894 | /* for now the breakpoint stuff also works as software event */ |
| 895 | case PERF_TYPE_BREAKPOINT: |
| 896 | return 1; |
| 897 | } |
| 898 | return 0; |
| 899 | } |
| 900 | |
| 901 | extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; |
| 902 | |
| 903 | extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); |
| 904 | |
| 905 | extern void |
| 906 | perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); |
| 907 | |
| 908 | /* |
| 909 | * Take a snapshot of the regs. Skip ip and frame pointer to |
| 910 | * the nth caller. We only need a few of the regs: |
| 911 | * - ip for PERF_SAMPLE_IP |
| 912 | * - cs for user_mode() tests |
| 913 | * - bp for callchains |
| 914 | * - eflags, for future purposes, just in case |
| 915 | */ |
| 916 | static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip) |
| 917 | { |
| 918 | unsigned long ip; |
| 919 | |
| 920 | memset(regs, 0, sizeof(*regs)); |
| 921 | |
| 922 | switch (skip) { |
| 923 | case 1 : |
| 924 | ip = CALLER_ADDR0; |
| 925 | break; |
| 926 | case 2 : |
| 927 | ip = CALLER_ADDR1; |
| 928 | break; |
| 929 | case 3 : |
| 930 | ip = CALLER_ADDR2; |
| 931 | break; |
| 932 | case 4: |
| 933 | ip = CALLER_ADDR3; |
| 934 | break; |
| 935 | /* No need to support further for now */ |
| 936 | default: |
| 937 | ip = 0; |
| 938 | } |
| 939 | |
| 940 | return perf_arch_fetch_caller_regs(regs, ip, skip); |
| 941 | } |
| 942 | |
| 943 | static inline void |
| 944 | perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) |
| 945 | { |
| 946 | if (atomic_read(&perf_swevent_enabled[event_id])) { |
| 947 | struct pt_regs hot_regs; |
| 948 | |
| 949 | if (!regs) { |
| 950 | perf_fetch_caller_regs(&hot_regs, 1); |
| 951 | regs = &hot_regs; |
| 952 | } |
| 953 | __perf_sw_event(event_id, nr, nmi, regs, addr); |
| 954 | } |
| 955 | } |
| 956 | |
| 957 | extern void __perf_event_mmap(struct vm_area_struct *vma); |
| 958 | |
| 959 | static inline void perf_event_mmap(struct vm_area_struct *vma) |
| 960 | { |
| 961 | if (vma->vm_flags & VM_EXEC) |
| 962 | __perf_event_mmap(vma); |
| 963 | } |
| 964 | |
| 965 | extern struct perf_guest_info_callbacks *perf_guest_cbs; |
| 966 | extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); |
| 967 | extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); |
| 968 | |
| 969 | extern void perf_event_comm(struct task_struct *tsk); |
| 970 | extern void perf_event_fork(struct task_struct *tsk); |
| 971 | |
| 972 | extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); |
| 973 | |
| 974 | extern int sysctl_perf_event_paranoid; |
| 975 | extern int sysctl_perf_event_mlock; |
| 976 | extern int sysctl_perf_event_sample_rate; |
| 977 | |
| 978 | static inline bool perf_paranoid_tracepoint_raw(void) |
| 979 | { |
| 980 | return sysctl_perf_event_paranoid > -1; |
| 981 | } |
| 982 | |
| 983 | static inline bool perf_paranoid_cpu(void) |
| 984 | { |
| 985 | return sysctl_perf_event_paranoid > 0; |
| 986 | } |
| 987 | |
| 988 | static inline bool perf_paranoid_kernel(void) |
| 989 | { |
| 990 | return sysctl_perf_event_paranoid > 1; |
| 991 | } |
| 992 | |
| 993 | extern void perf_event_init(void); |
| 994 | extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, |
| 995 | int entry_size, struct pt_regs *regs, void *event); |
| 996 | extern void perf_bp_event(struct perf_event *event, void *data); |
| 997 | |
| 998 | #ifndef perf_misc_flags |
| 999 | #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ |
| 1000 | PERF_RECORD_MISC_KERNEL) |
| 1001 | #define perf_instruction_pointer(regs) instruction_pointer(regs) |
| 1002 | #endif |
| 1003 | |
| 1004 | extern int perf_output_begin(struct perf_output_handle *handle, |
| 1005 | struct perf_event *event, unsigned int size, |
| 1006 | int nmi, int sample); |
| 1007 | extern void perf_output_end(struct perf_output_handle *handle); |
| 1008 | extern void perf_output_copy(struct perf_output_handle *handle, |
| 1009 | const void *buf, unsigned int len); |
| 1010 | extern int perf_swevent_get_recursion_context(void); |
| 1011 | extern void perf_swevent_put_recursion_context(int rctx); |
| 1012 | extern void perf_event_enable(struct perf_event *event); |
| 1013 | extern void perf_event_disable(struct perf_event *event); |
| 1014 | #else |
| 1015 | static inline void |
| 1016 | perf_event_task_sched_in(struct task_struct *task) { } |
| 1017 | static inline void |
| 1018 | perf_event_task_sched_out(struct task_struct *task, |
| 1019 | struct task_struct *next) { } |
| 1020 | static inline void |
| 1021 | perf_event_task_tick(struct task_struct *task) { } |
| 1022 | static inline int perf_event_init_task(struct task_struct *child) { return 0; } |
| 1023 | static inline void perf_event_exit_task(struct task_struct *child) { } |
| 1024 | static inline void perf_event_free_task(struct task_struct *task) { } |
| 1025 | static inline void perf_event_do_pending(void) { } |
| 1026 | static inline void perf_event_print_debug(void) { } |
| 1027 | static inline void perf_disable(void) { } |
| 1028 | static inline void perf_enable(void) { } |
| 1029 | static inline int perf_event_task_disable(void) { return -EINVAL; } |
| 1030 | static inline int perf_event_task_enable(void) { return -EINVAL; } |
| 1031 | |
| 1032 | static inline void |
| 1033 | perf_sw_event(u32 event_id, u64 nr, int nmi, |
| 1034 | struct pt_regs *regs, u64 addr) { } |
| 1035 | static inline void |
| 1036 | perf_bp_event(struct perf_event *event, void *data) { } |
| 1037 | |
| 1038 | static inline int perf_register_guest_info_callbacks |
| 1039 | (struct perf_guest_info_callbacks *callbacks) { return 0; } |
| 1040 | static inline int perf_unregister_guest_info_callbacks |
| 1041 | (struct perf_guest_info_callbacks *callbacks) { return 0; } |
| 1042 | |
| 1043 | static inline void perf_event_mmap(struct vm_area_struct *vma) { } |
| 1044 | static inline void perf_event_comm(struct task_struct *tsk) { } |
| 1045 | static inline void perf_event_fork(struct task_struct *tsk) { } |
| 1046 | static inline void perf_event_init(void) { } |
| 1047 | static inline int perf_swevent_get_recursion_context(void) { return -1; } |
| 1048 | static inline void perf_swevent_put_recursion_context(int rctx) { } |
| 1049 | static inline void perf_event_enable(struct perf_event *event) { } |
| 1050 | static inline void perf_event_disable(struct perf_event *event) { } |
| 1051 | #endif |
| 1052 | |
| 1053 | #define perf_output_put(handle, x) \ |
| 1054 | perf_output_copy((handle), &(x), sizeof(x)) |
| 1055 | |
| 1056 | /* |
| 1057 | * This has to have a higher priority than migration_notifier in sched.c. |
| 1058 | */ |
| 1059 | #define perf_cpu_notifier(fn) \ |
| 1060 | do { \ |
| 1061 | static struct notifier_block fn##_nb __cpuinitdata = \ |
| 1062 | { .notifier_call = fn, .priority = 20 }; \ |
| 1063 | fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ |
| 1064 | (void *)(unsigned long)smp_processor_id()); \ |
| 1065 | fn(&fn##_nb, (unsigned long)CPU_STARTING, \ |
| 1066 | (void *)(unsigned long)smp_processor_id()); \ |
| 1067 | fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ |
| 1068 | (void *)(unsigned long)smp_processor_id()); \ |
| 1069 | register_cpu_notifier(&fn##_nb); \ |
| 1070 | } while (0) |
| 1071 | |
| 1072 | #endif /* __KERNEL__ */ |
| 1073 | #endif /* _LINUX_PERF_EVENT_H */ |