1 #define _FILE_OFFSET_BITS 64
3 #include <linux/kernel.h>
14 static int perf_session__open(struct perf_session
*self
, bool force
)
16 struct stat input_stat
;
18 if (!strcmp(self
->filename
, "-")) {
20 self
->fd
= STDIN_FILENO
;
22 if (perf_header__read(self
, self
->fd
) < 0)
23 pr_err("incompatible file format");
28 self
->fd
= open(self
->filename
, O_RDONLY
);
30 pr_err("failed to open file: %s", self
->filename
);
31 if (!strcmp(self
->filename
, "perf.data"))
32 pr_err(" (try 'perf record' first)");
37 if (fstat(self
->fd
, &input_stat
) < 0)
40 if (!force
&& input_stat
.st_uid
&& (input_stat
.st_uid
!= geteuid())) {
41 pr_err("file %s not owned by current user or root\n",
46 if (!input_stat
.st_size
) {
47 pr_info("zero-sized file (%s), nothing to do!\n",
52 if (perf_header__read(self
, self
->fd
) < 0) {
53 pr_err("incompatible file format");
57 self
->size
= input_stat
.st_size
;
66 void perf_session__update_sample_type(struct perf_session
*self
)
68 self
->sample_type
= perf_header__sample_type(&self
->header
);
71 int perf_session__create_kernel_maps(struct perf_session
*self
)
73 int ret
= machine__create_kernel_maps(&self
->host_machine
);
76 ret
= machines__create_guest_kernel_maps(&self
->machines
);
80 struct perf_session
*perf_session__new(const char *filename
, int mode
, bool force
, bool repipe
)
82 size_t len
= filename
? strlen(filename
) + 1 : 0;
83 struct perf_session
*self
= zalloc(sizeof(*self
) + len
);
88 if (perf_header__init(&self
->header
) < 0)
91 memcpy(self
->filename
, filename
, len
);
92 self
->threads
= RB_ROOT
;
93 self
->hists_tree
= RB_ROOT
;
94 self
->last_match
= NULL
;
95 self
->mmap_window
= 32;
98 self
->machines
= RB_ROOT
;
99 self
->repipe
= repipe
;
100 INIT_LIST_HEAD(&self
->ordered_samples
.samples_head
);
101 machine__init(&self
->host_machine
, "", HOST_KERNEL_ID
);
103 if (mode
== O_RDONLY
) {
104 if (perf_session__open(self
, force
) < 0)
106 } else if (mode
== O_WRONLY
) {
108 * In O_RDONLY mode this will be performed when reading the
109 * kernel MMAP event, in event__process_mmap().
111 if (perf_session__create_kernel_maps(self
) < 0)
115 perf_session__update_sample_type(self
);
122 perf_session__delete(self
);
126 void perf_session__delete(struct perf_session
*self
)
128 perf_header__exit(&self
->header
);
134 static bool symbol__match_parent_regex(struct symbol
*sym
)
136 if (sym
->name
&& !regexec(&parent_regex
, sym
->name
, 0, NULL
, 0))
142 struct map_symbol
*perf_session__resolve_callchain(struct perf_session
*self
,
143 struct thread
*thread
,
144 struct ip_callchain
*chain
,
145 struct symbol
**parent
)
147 u8 cpumode
= PERF_RECORD_MISC_USER
;
149 struct map_symbol
*syms
= calloc(chain
->nr
, sizeof(*syms
));
154 for (i
= 0; i
< chain
->nr
; i
++) {
155 u64 ip
= chain
->ips
[i
];
156 struct addr_location al
;
158 if (ip
>= PERF_CONTEXT_MAX
) {
160 case PERF_CONTEXT_HV
:
161 cpumode
= PERF_RECORD_MISC_HYPERVISOR
; break;
162 case PERF_CONTEXT_KERNEL
:
163 cpumode
= PERF_RECORD_MISC_KERNEL
; break;
164 case PERF_CONTEXT_USER
:
165 cpumode
= PERF_RECORD_MISC_USER
; break;
173 thread__find_addr_location(thread
, self
, cpumode
,
174 MAP__FUNCTION
, thread
->pid
, ip
, &al
, NULL
);
175 if (al
.sym
!= NULL
) {
176 if (sort__has_parent
&& !*parent
&&
177 symbol__match_parent_regex(al
.sym
))
179 if (!symbol_conf
.use_callchain
)
181 syms
[i
].map
= al
.map
;
182 syms
[i
].sym
= al
.sym
;
189 static int process_event_stub(event_t
*event __used
,
190 struct perf_session
*session __used
)
192 dump_printf(": unhandled!\n");
196 static int process_finished_round_stub(event_t
*event __used
,
197 struct perf_session
*session __used
,
198 struct perf_event_ops
*ops __used
)
200 dump_printf(": unhandled!\n");
204 static int process_finished_round(event_t
*event
,
205 struct perf_session
*session
,
206 struct perf_event_ops
*ops
);
208 static void perf_event_ops__fill_defaults(struct perf_event_ops
*handler
)
210 if (handler
->sample
== NULL
)
211 handler
->sample
= process_event_stub
;
212 if (handler
->mmap
== NULL
)
213 handler
->mmap
= process_event_stub
;
214 if (handler
->comm
== NULL
)
215 handler
->comm
= process_event_stub
;
216 if (handler
->fork
== NULL
)
217 handler
->fork
= process_event_stub
;
218 if (handler
->exit
== NULL
)
219 handler
->exit
= process_event_stub
;
220 if (handler
->lost
== NULL
)
221 handler
->lost
= process_event_stub
;
222 if (handler
->read
== NULL
)
223 handler
->read
= process_event_stub
;
224 if (handler
->throttle
== NULL
)
225 handler
->throttle
= process_event_stub
;
226 if (handler
->unthrottle
== NULL
)
227 handler
->unthrottle
= process_event_stub
;
228 if (handler
->attr
== NULL
)
229 handler
->attr
= process_event_stub
;
230 if (handler
->event_type
== NULL
)
231 handler
->event_type
= process_event_stub
;
232 if (handler
->tracing_data
== NULL
)
233 handler
->tracing_data
= process_event_stub
;
234 if (handler
->build_id
== NULL
)
235 handler
->build_id
= process_event_stub
;
236 if (handler
->finished_round
== NULL
) {
237 if (handler
->ordered_samples
)
238 handler
->finished_round
= process_finished_round
;
240 handler
->finished_round
= process_finished_round_stub
;
244 void mem_bswap_64(void *src
, int byte_size
)
248 while (byte_size
> 0) {
250 byte_size
-= sizeof(u64
);
255 static void event__all64_swap(event_t
*self
)
257 struct perf_event_header
*hdr
= &self
->header
;
258 mem_bswap_64(hdr
+ 1, self
->header
.size
- sizeof(*hdr
));
261 static void event__comm_swap(event_t
*self
)
263 self
->comm
.pid
= bswap_32(self
->comm
.pid
);
264 self
->comm
.tid
= bswap_32(self
->comm
.tid
);
267 static void event__mmap_swap(event_t
*self
)
269 self
->mmap
.pid
= bswap_32(self
->mmap
.pid
);
270 self
->mmap
.tid
= bswap_32(self
->mmap
.tid
);
271 self
->mmap
.start
= bswap_64(self
->mmap
.start
);
272 self
->mmap
.len
= bswap_64(self
->mmap
.len
);
273 self
->mmap
.pgoff
= bswap_64(self
->mmap
.pgoff
);
276 static void event__task_swap(event_t
*self
)
278 self
->fork
.pid
= bswap_32(self
->fork
.pid
);
279 self
->fork
.tid
= bswap_32(self
->fork
.tid
);
280 self
->fork
.ppid
= bswap_32(self
->fork
.ppid
);
281 self
->fork
.ptid
= bswap_32(self
->fork
.ptid
);
282 self
->fork
.time
= bswap_64(self
->fork
.time
);
285 static void event__read_swap(event_t
*self
)
287 self
->read
.pid
= bswap_32(self
->read
.pid
);
288 self
->read
.tid
= bswap_32(self
->read
.tid
);
289 self
->read
.value
= bswap_64(self
->read
.value
);
290 self
->read
.time_enabled
= bswap_64(self
->read
.time_enabled
);
291 self
->read
.time_running
= bswap_64(self
->read
.time_running
);
292 self
->read
.id
= bswap_64(self
->read
.id
);
295 static void event__attr_swap(event_t
*self
)
299 self
->attr
.attr
.type
= bswap_32(self
->attr
.attr
.type
);
300 self
->attr
.attr
.size
= bswap_32(self
->attr
.attr
.size
);
301 self
->attr
.attr
.config
= bswap_64(self
->attr
.attr
.config
);
302 self
->attr
.attr
.sample_period
= bswap_64(self
->attr
.attr
.sample_period
);
303 self
->attr
.attr
.sample_type
= bswap_64(self
->attr
.attr
.sample_type
);
304 self
->attr
.attr
.read_format
= bswap_64(self
->attr
.attr
.read_format
);
305 self
->attr
.attr
.wakeup_events
= bswap_32(self
->attr
.attr
.wakeup_events
);
306 self
->attr
.attr
.bp_type
= bswap_32(self
->attr
.attr
.bp_type
);
307 self
->attr
.attr
.bp_addr
= bswap_64(self
->attr
.attr
.bp_addr
);
308 self
->attr
.attr
.bp_len
= bswap_64(self
->attr
.attr
.bp_len
);
310 size
= self
->header
.size
;
311 size
-= (void *)&self
->attr
.id
- (void *)self
;
312 mem_bswap_64(self
->attr
.id
, size
);
315 static void event__event_type_swap(event_t
*self
)
317 self
->event_type
.event_type
.event_id
=
318 bswap_64(self
->event_type
.event_type
.event_id
);
321 static void event__tracing_data_swap(event_t
*self
)
323 self
->tracing_data
.size
= bswap_32(self
->tracing_data
.size
);
326 typedef void (*event__swap_op
)(event_t
*self
);
328 static event__swap_op event__swap_ops
[] = {
329 [PERF_RECORD_MMAP
] = event__mmap_swap
,
330 [PERF_RECORD_COMM
] = event__comm_swap
,
331 [PERF_RECORD_FORK
] = event__task_swap
,
332 [PERF_RECORD_EXIT
] = event__task_swap
,
333 [PERF_RECORD_LOST
] = event__all64_swap
,
334 [PERF_RECORD_READ
] = event__read_swap
,
335 [PERF_RECORD_SAMPLE
] = event__all64_swap
,
336 [PERF_RECORD_HEADER_ATTR
] = event__attr_swap
,
337 [PERF_RECORD_HEADER_EVENT_TYPE
] = event__event_type_swap
,
338 [PERF_RECORD_HEADER_TRACING_DATA
] = event__tracing_data_swap
,
339 [PERF_RECORD_HEADER_BUILD_ID
] = NULL
,
340 [PERF_RECORD_HEADER_MAX
] = NULL
,
343 struct sample_queue
{
345 struct sample_event
*event
;
346 struct list_head list
;
349 static void flush_sample_queue(struct perf_session
*s
,
350 struct perf_event_ops
*ops
)
352 struct list_head
*head
= &s
->ordered_samples
.samples_head
;
353 u64 limit
= s
->ordered_samples
.next_flush
;
354 struct sample_queue
*tmp
, *iter
;
356 if (!ops
->ordered_samples
|| !limit
)
359 list_for_each_entry_safe(iter
, tmp
, head
, list
) {
360 if (iter
->timestamp
> limit
)
363 if (iter
== s
->ordered_samples
.last_inserted
)
364 s
->ordered_samples
.last_inserted
= NULL
;
366 ops
->sample((event_t
*)iter
->event
, s
);
368 s
->ordered_samples
.last_flush
= iter
->timestamp
;
369 list_del(&iter
->list
);
376 * When perf record finishes a pass on every buffers, it records this pseudo
378 * We record the max timestamp t found in the pass n.
379 * Assuming these timestamps are monotonic across cpus, we know that if
380 * a buffer still has events with timestamps below t, they will be all
381 * available and then read in the pass n + 1.
382 * Hence when we start to read the pass n + 2, we can safely flush every
383 * events with timestamps below t.
385 * ============ PASS n =================
388 * cnt1 timestamps | cnt2 timestamps
391 * - | 4 <--- max recorded
393 * ============ PASS n + 1 ==============
396 * cnt1 timestamps | cnt2 timestamps
399 * 5 | 7 <---- max recorded
401 * Flush every events below timestamp 4
403 * ============ PASS n + 2 ==============
406 * cnt1 timestamps | cnt2 timestamps
411 * Flush every events below timestamp 7
414 static int process_finished_round(event_t
*event __used
,
415 struct perf_session
*session
,
416 struct perf_event_ops
*ops
)
418 flush_sample_queue(session
, ops
);
419 session
->ordered_samples
.next_flush
= session
->ordered_samples
.max_timestamp
;
424 static void __queue_sample_end(struct sample_queue
*new, struct list_head
*head
)
426 struct sample_queue
*iter
;
428 list_for_each_entry_reverse(iter
, head
, list
) {
429 if (iter
->timestamp
< new->timestamp
) {
430 list_add(&new->list
, &iter
->list
);
435 list_add(&new->list
, head
);
438 static void __queue_sample_before(struct sample_queue
*new,
439 struct sample_queue
*iter
,
440 struct list_head
*head
)
442 list_for_each_entry_continue_reverse(iter
, head
, list
) {
443 if (iter
->timestamp
< new->timestamp
) {
444 list_add(&new->list
, &iter
->list
);
449 list_add(&new->list
, head
);
452 static void __queue_sample_after(struct sample_queue
*new,
453 struct sample_queue
*iter
,
454 struct list_head
*head
)
456 list_for_each_entry_continue(iter
, head
, list
) {
457 if (iter
->timestamp
> new->timestamp
) {
458 list_add_tail(&new->list
, &iter
->list
);
462 list_add_tail(&new->list
, head
);
465 /* The queue is ordered by time */
466 static void __queue_sample_event(struct sample_queue
*new,
467 struct perf_session
*s
)
469 struct sample_queue
*last_inserted
= s
->ordered_samples
.last_inserted
;
470 struct list_head
*head
= &s
->ordered_samples
.samples_head
;
473 if (!last_inserted
) {
474 __queue_sample_end(new, head
);
479 * Most of the time the current event has a timestamp
480 * very close to the last event inserted, unless we just switched
481 * to another event buffer. Having a sorting based on a list and
482 * on the last inserted event that is close to the current one is
483 * probably more efficient than an rbtree based sorting.
485 if (last_inserted
->timestamp
>= new->timestamp
)
486 __queue_sample_before(new, last_inserted
, head
);
488 __queue_sample_after(new, last_inserted
, head
);
491 static int queue_sample_event(event_t
*event
, struct sample_data
*data
,
492 struct perf_session
*s
)
494 u64 timestamp
= data
->time
;
495 struct sample_queue
*new;
498 if (timestamp
< s
->ordered_samples
.last_flush
) {
499 printf("Warning: Timestamp below last timeslice flush\n");
503 new = malloc(sizeof(*new));
507 new->timestamp
= timestamp
;
509 new->event
= malloc(event
->header
.size
);
515 memcpy(new->event
, event
, event
->header
.size
);
517 __queue_sample_event(new, s
);
518 s
->ordered_samples
.last_inserted
= new;
520 if (new->timestamp
> s
->ordered_samples
.max_timestamp
)
521 s
->ordered_samples
.max_timestamp
= new->timestamp
;
526 static int perf_session__process_sample(event_t
*event
, struct perf_session
*s
,
527 struct perf_event_ops
*ops
)
529 struct sample_data data
;
531 if (!ops
->ordered_samples
)
532 return ops
->sample(event
, s
);
534 bzero(&data
, sizeof(struct sample_data
));
535 event__parse_sample(event
, s
->sample_type
, &data
);
537 queue_sample_event(event
, &data
, s
);
542 static int perf_session__process_event(struct perf_session
*self
,
544 struct perf_event_ops
*ops
,
545 u64 offset
, u64 head
)
549 if (event
->header
.type
< PERF_RECORD_HEADER_MAX
) {
550 dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
551 offset
+ head
, event
->header
.size
,
552 event__name
[event
->header
.type
]);
553 hists__inc_nr_events(&self
->hists
, event
->header
.type
);
556 if (self
->header
.needs_swap
&& event__swap_ops
[event
->header
.type
])
557 event__swap_ops
[event
->header
.type
](event
);
559 switch (event
->header
.type
) {
560 case PERF_RECORD_SAMPLE
:
561 return perf_session__process_sample(event
, self
, ops
);
562 case PERF_RECORD_MMAP
:
563 return ops
->mmap(event
, self
);
564 case PERF_RECORD_COMM
:
565 return ops
->comm(event
, self
);
566 case PERF_RECORD_FORK
:
567 return ops
->fork(event
, self
);
568 case PERF_RECORD_EXIT
:
569 return ops
->exit(event
, self
);
570 case PERF_RECORD_LOST
:
571 return ops
->lost(event
, self
);
572 case PERF_RECORD_READ
:
573 return ops
->read(event
, self
);
574 case PERF_RECORD_THROTTLE
:
575 return ops
->throttle(event
, self
);
576 case PERF_RECORD_UNTHROTTLE
:
577 return ops
->unthrottle(event
, self
);
578 case PERF_RECORD_HEADER_ATTR
:
579 return ops
->attr(event
, self
);
580 case PERF_RECORD_HEADER_EVENT_TYPE
:
581 return ops
->event_type(event
, self
);
582 case PERF_RECORD_HEADER_TRACING_DATA
:
583 /* setup for reading amidst mmap */
584 lseek(self
->fd
, offset
+ head
, SEEK_SET
);
585 return ops
->tracing_data(event
, self
);
586 case PERF_RECORD_HEADER_BUILD_ID
:
587 return ops
->build_id(event
, self
);
588 case PERF_RECORD_FINISHED_ROUND
:
589 return ops
->finished_round(event
, self
, ops
);
591 ++self
->hists
.stats
.nr_unknown_events
;
596 void perf_event_header__bswap(struct perf_event_header
*self
)
598 self
->type
= bswap_32(self
->type
);
599 self
->misc
= bswap_16(self
->misc
);
600 self
->size
= bswap_16(self
->size
);
603 static struct thread
*perf_session__register_idle_thread(struct perf_session
*self
)
605 struct thread
*thread
= perf_session__findnew(self
, 0);
607 if (thread
== NULL
|| thread__set_comm(thread
, "swapper")) {
608 pr_err("problem inserting idle task.\n");
615 int do_read(int fd
, void *buf
, size_t size
)
617 void *buf_start
= buf
;
620 int ret
= read(fd
, buf
, size
);
629 return buf
- buf_start
;
632 #define session_done() (*(volatile int *)(&session_done))
633 volatile int session_done
;
635 static int __perf_session__process_pipe_events(struct perf_session
*self
,
636 struct perf_event_ops
*ops
)
645 perf_event_ops__fill_defaults(ops
);
649 err
= do_read(self
->fd
, &event
, sizeof(struct perf_event_header
));
654 pr_err("failed to read event header\n");
658 if (self
->header
.needs_swap
)
659 perf_event_header__bswap(&event
.header
);
661 size
= event
.header
.size
;
666 p
+= sizeof(struct perf_event_header
);
668 if (size
- sizeof(struct perf_event_header
)) {
669 err
= do_read(self
->fd
, p
,
670 size
- sizeof(struct perf_event_header
));
673 pr_err("unexpected end of event stream\n");
677 pr_err("failed to read event data\n");
683 (skip
= perf_session__process_event(self
, &event
, ops
,
685 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
686 head
, event
.header
.size
, event
.header
.type
);
688 * assume we lost track of the stream, check alignment, and
689 * increment a single u64 in the hope to catch on again 'soon'.
691 if (unlikely(head
& 7))
699 dump_printf("\n%#Lx [%#x]: event: %d\n",
700 head
, event
.header
.size
, event
.header
.type
);
713 int __perf_session__process_events(struct perf_session
*self
,
714 u64 data_offset
, u64 data_size
,
715 u64 file_size
, struct perf_event_ops
*ops
)
717 int err
, mmap_prot
, mmap_flags
;
724 struct ui_progress
*progress
= ui_progress__new("Processing events...",
726 if (progress
== NULL
)
729 perf_event_ops__fill_defaults(ops
);
731 page_size
= sysconf(_SC_PAGESIZE
);
734 shift
= page_size
* (head
/ page_size
);
738 mmap_prot
= PROT_READ
;
739 mmap_flags
= MAP_SHARED
;
741 if (self
->header
.needs_swap
) {
742 mmap_prot
|= PROT_WRITE
;
743 mmap_flags
= MAP_PRIVATE
;
746 buf
= mmap(NULL
, page_size
* self
->mmap_window
, mmap_prot
,
747 mmap_flags
, self
->fd
, offset
);
748 if (buf
== MAP_FAILED
) {
749 pr_err("failed to mmap file\n");
755 event
= (event_t
*)(buf
+ head
);
756 ui_progress__update(progress
, offset
);
758 if (self
->header
.needs_swap
)
759 perf_event_header__bswap(&event
->header
);
760 size
= event
->header
.size
;
764 if (head
+ event
->header
.size
>= page_size
* self
->mmap_window
) {
767 shift
= page_size
* (head
/ page_size
);
769 munmap_ret
= munmap(buf
, page_size
* self
->mmap_window
);
770 assert(munmap_ret
== 0);
777 size
= event
->header
.size
;
779 dump_printf("\n%#Lx [%#x]: event: %d\n",
780 offset
+ head
, event
->header
.size
, event
->header
.type
);
783 perf_session__process_event(self
, event
, ops
, offset
, head
) < 0) {
784 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
785 offset
+ head
, event
->header
.size
,
788 * assume we lost track of the stream, check alignment, and
789 * increment a single u64 in the hope to catch on again 'soon'.
791 if (unlikely(head
& 7))
799 if (offset
+ head
>= data_offset
+ data_size
)
802 if (offset
+ head
< file_size
)
806 /* do the final flush for ordered samples */
807 self
->ordered_samples
.next_flush
= ULLONG_MAX
;
808 flush_sample_queue(self
, ops
);
810 ui_progress__delete(progress
);
814 int perf_session__process_events(struct perf_session
*self
,
815 struct perf_event_ops
*ops
)
819 if (perf_session__register_idle_thread(self
) == NULL
)
822 if (!symbol_conf
.full_paths
) {
825 if (getcwd(bf
, sizeof(bf
)) == NULL
) {
828 pr_err("failed to get the current directory\n");
831 self
->cwd
= strdup(bf
);
832 if (self
->cwd
== NULL
) {
836 self
->cwdlen
= strlen(self
->cwd
);
840 err
= __perf_session__process_events(self
,
841 self
->header
.data_offset
,
842 self
->header
.data_size
,
845 err
= __perf_session__process_pipe_events(self
, ops
);
850 bool perf_session__has_traces(struct perf_session
*self
, const char *msg
)
852 if (!(self
->sample_type
& PERF_SAMPLE_RAW
)) {
853 pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg
);
860 int perf_session__set_kallsyms_ref_reloc_sym(struct map
**maps
,
861 const char *symbol_name
,
866 struct ref_reloc_sym
*ref
;
868 ref
= zalloc(sizeof(struct ref_reloc_sym
));
872 ref
->name
= strdup(symbol_name
);
873 if (ref
->name
== NULL
) {
878 bracket
= strchr(ref
->name
, ']');
884 for (i
= 0; i
< MAP__NR_TYPES
; ++i
) {
885 struct kmap
*kmap
= map__kmap(maps
[i
]);
886 kmap
->ref_reloc_sym
= ref
;
892 size_t perf_session__fprintf_dsos(struct perf_session
*self
, FILE *fp
)
894 return __dsos__fprintf(&self
->host_machine
.kernel_dsos
, fp
) +
895 __dsos__fprintf(&self
->host_machine
.user_dsos
, fp
) +
896 machines__fprintf_dsos(&self
->machines
, fp
);
899 size_t perf_session__fprintf_dsos_buildid(struct perf_session
*self
, FILE *fp
,
902 size_t ret
= machine__fprintf_dsos_buildid(&self
->host_machine
, fp
, with_hits
);
903 return ret
+ machines__fprintf_dsos_buildid(&self
->machines
, fp
, with_hits
);