Merge branch 'kbuild' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild
[deliverable/linux.git] / tools / perf / builtin-record.c
1 /*
2 * builtin-record.c
3 *
4 * Builtin record command: Record the profile of a workload
5 * (or a CPU, or a PID) into the perf.data output file - for
6 * later analysis via perf report.
7 */
8 #define _FILE_OFFSET_BITS 64
9
10 #include "builtin.h"
11
12 #include "perf.h"
13
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33
34 #ifndef HAVE_ON_EXIT
35 #ifndef ATEXIT_MAX
36 #define ATEXIT_MAX 32
37 #endif
38 static int __on_exit_count = 0;
39 typedef void (*on_exit_func_t) (int, void *);
40 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
41 static void *__on_exit_args[ATEXIT_MAX];
42 static int __exitcode = 0;
43 static void __handle_on_exit_funcs(void);
44 static int on_exit(on_exit_func_t function, void *arg);
45 #define exit(x) (exit)(__exitcode = (x))
46
47 static int on_exit(on_exit_func_t function, void *arg)
48 {
49 if (__on_exit_count == ATEXIT_MAX)
50 return -ENOMEM;
51 else if (__on_exit_count == 0)
52 atexit(__handle_on_exit_funcs);
53 __on_exit_funcs[__on_exit_count] = function;
54 __on_exit_args[__on_exit_count++] = arg;
55 return 0;
56 }
57
58 static void __handle_on_exit_funcs(void)
59 {
60 int i;
61 for (i = 0; i < __on_exit_count; i++)
62 __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
63 }
64 #endif
65
66 enum write_mode_t {
67 WRITE_FORCE,
68 WRITE_APPEND
69 };
70
71 struct perf_record {
72 struct perf_tool tool;
73 struct perf_record_opts opts;
74 u64 bytes_written;
75 const char *output_name;
76 struct perf_evlist *evlist;
77 struct perf_session *session;
78 const char *progname;
79 int output;
80 unsigned int page_size;
81 int realtime_prio;
82 enum write_mode_t write_mode;
83 bool no_buildid;
84 bool no_buildid_cache;
85 bool force;
86 bool file_new;
87 bool append_file;
88 long samples;
89 off_t post_processing_offset;
90 };
91
92 static void advance_output(struct perf_record *rec, size_t size)
93 {
94 rec->bytes_written += size;
95 }
96
97 static int write_output(struct perf_record *rec, void *buf, size_t size)
98 {
99 while (size) {
100 int ret = write(rec->output, buf, size);
101
102 if (ret < 0) {
103 pr_err("failed to write\n");
104 return -1;
105 }
106
107 size -= ret;
108 buf += ret;
109
110 rec->bytes_written += ret;
111 }
112
113 return 0;
114 }
115
116 static int process_synthesized_event(struct perf_tool *tool,
117 union perf_event *event,
118 struct perf_sample *sample __maybe_unused,
119 struct machine *machine __maybe_unused)
120 {
121 struct perf_record *rec = container_of(tool, struct perf_record, tool);
122 if (write_output(rec, event, event->header.size) < 0)
123 return -1;
124
125 return 0;
126 }
127
128 static int perf_record__mmap_read(struct perf_record *rec,
129 struct perf_mmap *md)
130 {
131 unsigned int head = perf_mmap__read_head(md);
132 unsigned int old = md->prev;
133 unsigned char *data = md->base + rec->page_size;
134 unsigned long size;
135 void *buf;
136 int rc = 0;
137
138 if (old == head)
139 return 0;
140
141 rec->samples++;
142
143 size = head - old;
144
145 if ((old & md->mask) + size != (head & md->mask)) {
146 buf = &data[old & md->mask];
147 size = md->mask + 1 - (old & md->mask);
148 old += size;
149
150 if (write_output(rec, buf, size) < 0) {
151 rc = -1;
152 goto out;
153 }
154 }
155
156 buf = &data[old & md->mask];
157 size = head - old;
158 old += size;
159
160 if (write_output(rec, buf, size) < 0) {
161 rc = -1;
162 goto out;
163 }
164
165 md->prev = old;
166 perf_mmap__write_tail(md, old);
167
168 out:
169 return rc;
170 }
171
172 static volatile int done = 0;
173 static volatile int signr = -1;
174 static volatile int child_finished = 0;
175
176 static void sig_handler(int sig)
177 {
178 if (sig == SIGCHLD)
179 child_finished = 1;
180
181 done = 1;
182 signr = sig;
183 }
184
185 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
186 {
187 struct perf_record *rec = arg;
188 int status;
189
190 if (rec->evlist->workload.pid > 0) {
191 if (!child_finished)
192 kill(rec->evlist->workload.pid, SIGTERM);
193
194 wait(&status);
195 if (WIFSIGNALED(status))
196 psignal(WTERMSIG(status), rec->progname);
197 }
198
199 if (signr == -1 || signr == SIGUSR1)
200 return;
201
202 signal(signr, SIG_DFL);
203 kill(getpid(), signr);
204 }
205
206 static bool perf_evlist__equal(struct perf_evlist *evlist,
207 struct perf_evlist *other)
208 {
209 struct perf_evsel *pos, *pair;
210
211 if (evlist->nr_entries != other->nr_entries)
212 return false;
213
214 pair = perf_evlist__first(other);
215
216 list_for_each_entry(pos, &evlist->entries, node) {
217 if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
218 return false;
219 pair = perf_evsel__next(pair);
220 }
221
222 return true;
223 }
224
225 static int perf_record__open(struct perf_record *rec)
226 {
227 struct perf_evsel *pos;
228 struct perf_evlist *evlist = rec->evlist;
229 struct perf_session *session = rec->session;
230 struct perf_record_opts *opts = &rec->opts;
231 int rc = 0;
232
233 /*
234 * Set the evsel leader links before we configure attributes,
235 * since some might depend on this info.
236 */
237 if (opts->group)
238 perf_evlist__set_leader(evlist);
239
240 perf_evlist__config_attrs(evlist, opts);
241
242 list_for_each_entry(pos, &evlist->entries, node) {
243 struct perf_event_attr *attr = &pos->attr;
244 /*
245 * Check if parse_single_tracepoint_event has already asked for
246 * PERF_SAMPLE_TIME.
247 *
248 * XXX this is kludgy but short term fix for problems introduced by
249 * eac23d1c that broke 'perf script' by having different sample_types
250 * when using multiple tracepoint events when we use a perf binary
251 * that tries to use sample_id_all on an older kernel.
252 *
253 * We need to move counter creation to perf_session, support
254 * different sample_types, etc.
255 */
256 bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
257
258 fallback_missing_features:
259 if (opts->exclude_guest_missing)
260 attr->exclude_guest = attr->exclude_host = 0;
261 retry_sample_id:
262 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
263 try_again:
264 if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
265 int err = errno;
266
267 if (err == EPERM || err == EACCES) {
268 ui__error_paranoid();
269 rc = -err;
270 goto out;
271 } else if (err == ENODEV && opts->target.cpu_list) {
272 pr_err("No such device - did you specify"
273 " an out-of-range profile CPU?\n");
274 rc = -err;
275 goto out;
276 } else if (err == EINVAL) {
277 if (!opts->exclude_guest_missing &&
278 (attr->exclude_guest || attr->exclude_host)) {
279 pr_debug("Old kernel, cannot exclude "
280 "guest or host samples.\n");
281 opts->exclude_guest_missing = true;
282 goto fallback_missing_features;
283 } else if (!opts->sample_id_all_missing) {
284 /*
285 * Old kernel, no attr->sample_id_type_all field
286 */
287 opts->sample_id_all_missing = true;
288 if (!opts->sample_time && !opts->raw_samples && !time_needed)
289 attr->sample_type &= ~PERF_SAMPLE_TIME;
290
291 goto retry_sample_id;
292 }
293 }
294
295 /*
296 * If it's cycles then fall back to hrtimer
297 * based cpu-clock-tick sw counter, which
298 * is always available even if no PMU support.
299 *
300 * PPC returns ENXIO until 2.6.37 (behavior changed
301 * with commit b0a873e).
302 */
303 if ((err == ENOENT || err == ENXIO)
304 && attr->type == PERF_TYPE_HARDWARE
305 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
306
307 if (verbose)
308 ui__warning("The cycles event is not supported, "
309 "trying to fall back to cpu-clock-ticks\n");
310 attr->type = PERF_TYPE_SOFTWARE;
311 attr->config = PERF_COUNT_SW_CPU_CLOCK;
312 if (pos->name) {
313 free(pos->name);
314 pos->name = NULL;
315 }
316 goto try_again;
317 }
318
319 if (err == ENOENT) {
320 ui__error("The %s event is not supported.\n",
321 perf_evsel__name(pos));
322 rc = -err;
323 goto out;
324 } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) {
325 ui__error("\'precise\' request may not be supported. "
326 "Try removing 'p' modifier\n");
327 rc = -err;
328 goto out;
329 }
330
331 printf("\n");
332 error("sys_perf_event_open() syscall returned with %d "
333 "(%s) for event %s. /bin/dmesg may provide "
334 "additional information.\n",
335 err, strerror(err), perf_evsel__name(pos));
336
337 #if defined(__i386__) || defined(__x86_64__)
338 if (attr->type == PERF_TYPE_HARDWARE &&
339 err == EOPNOTSUPP) {
340 pr_err("No hardware sampling interrupt available."
341 " No APIC? If so then you can boot the kernel"
342 " with the \"lapic\" boot parameter to"
343 " force-enable it.\n");
344 rc = -err;
345 goto out;
346 }
347 #endif
348
349 pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
350 rc = -err;
351 goto out;
352 }
353 }
354
355 if (perf_evlist__apply_filters(evlist)) {
356 error("failed to set filter with %d (%s)\n", errno,
357 strerror(errno));
358 rc = -1;
359 goto out;
360 }
361
362 if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
363 if (errno == EPERM) {
364 pr_err("Permission error mapping pages.\n"
365 "Consider increasing "
366 "/proc/sys/kernel/perf_event_mlock_kb,\n"
367 "or try again with a smaller value of -m/--mmap_pages.\n"
368 "(current value: %d)\n", opts->mmap_pages);
369 rc = -errno;
370 } else if (!is_power_of_2(opts->mmap_pages) &&
371 (opts->mmap_pages != UINT_MAX)) {
372 pr_err("--mmap_pages/-m value must be a power of two.");
373 rc = -EINVAL;
374 } else {
375 pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
376 rc = -errno;
377 }
378 goto out;
379 }
380
381 if (rec->file_new)
382 session->evlist = evlist;
383 else {
384 if (!perf_evlist__equal(session->evlist, evlist)) {
385 fprintf(stderr, "incompatible append\n");
386 rc = -1;
387 goto out;
388 }
389 }
390
391 perf_session__set_id_hdr_size(session);
392 out:
393 return rc;
394 }
395
396 static int process_buildids(struct perf_record *rec)
397 {
398 u64 size = lseek(rec->output, 0, SEEK_CUR);
399
400 if (size == 0)
401 return 0;
402
403 rec->session->fd = rec->output;
404 return __perf_session__process_events(rec->session, rec->post_processing_offset,
405 size - rec->post_processing_offset,
406 size, &build_id__mark_dso_hit_ops);
407 }
408
409 static void perf_record__exit(int status, void *arg)
410 {
411 struct perf_record *rec = arg;
412
413 if (status != 0)
414 return;
415
416 if (!rec->opts.pipe_output) {
417 rec->session->header.data_size += rec->bytes_written;
418
419 if (!rec->no_buildid)
420 process_buildids(rec);
421 perf_session__write_header(rec->session, rec->evlist,
422 rec->output, true);
423 perf_session__delete(rec->session);
424 perf_evlist__delete(rec->evlist);
425 symbol__exit();
426 }
427 }
428
429 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
430 {
431 int err;
432 struct perf_tool *tool = data;
433
434 if (machine__is_host(machine))
435 return;
436
437 /*
438 *As for guest kernel when processing subcommand record&report,
439 *we arrange module mmap prior to guest kernel mmap and trigger
440 *a preload dso because default guest module symbols are loaded
441 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
442 *method is used to avoid symbol missing when the first addr is
443 *in module instead of in guest kernel.
444 */
445 err = perf_event__synthesize_modules(tool, process_synthesized_event,
446 machine);
447 if (err < 0)
448 pr_err("Couldn't record guest kernel [%d]'s reference"
449 " relocation symbol.\n", machine->pid);
450
451 /*
452 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
453 * have no _text sometimes.
454 */
455 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
456 machine, "_text");
457 if (err < 0)
458 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
459 machine, "_stext");
460 if (err < 0)
461 pr_err("Couldn't record guest kernel [%d]'s reference"
462 " relocation symbol.\n", machine->pid);
463 }
464
465 static struct perf_event_header finished_round_event = {
466 .size = sizeof(struct perf_event_header),
467 .type = PERF_RECORD_FINISHED_ROUND,
468 };
469
470 static int perf_record__mmap_read_all(struct perf_record *rec)
471 {
472 int i;
473 int rc = 0;
474
475 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
476 if (rec->evlist->mmap[i].base) {
477 if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
478 rc = -1;
479 goto out;
480 }
481 }
482 }
483
484 if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
485 rc = write_output(rec, &finished_round_event,
486 sizeof(finished_round_event));
487
488 out:
489 return rc;
490 }
491
492 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
493 {
494 struct stat st;
495 int flags;
496 int err, output, feat;
497 unsigned long waking = 0;
498 const bool forks = argc > 0;
499 struct machine *machine;
500 struct perf_tool *tool = &rec->tool;
501 struct perf_record_opts *opts = &rec->opts;
502 struct perf_evlist *evsel_list = rec->evlist;
503 const char *output_name = rec->output_name;
504 struct perf_session *session;
505 bool disabled = false;
506
507 rec->progname = argv[0];
508
509 rec->page_size = sysconf(_SC_PAGE_SIZE);
510
511 on_exit(perf_record__sig_exit, rec);
512 signal(SIGCHLD, sig_handler);
513 signal(SIGINT, sig_handler);
514 signal(SIGUSR1, sig_handler);
515
516 if (!output_name) {
517 if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
518 opts->pipe_output = true;
519 else
520 rec->output_name = output_name = "perf.data";
521 }
522 if (output_name) {
523 if (!strcmp(output_name, "-"))
524 opts->pipe_output = true;
525 else if (!stat(output_name, &st) && st.st_size) {
526 if (rec->write_mode == WRITE_FORCE) {
527 char oldname[PATH_MAX];
528 snprintf(oldname, sizeof(oldname), "%s.old",
529 output_name);
530 unlink(oldname);
531 rename(output_name, oldname);
532 }
533 } else if (rec->write_mode == WRITE_APPEND) {
534 rec->write_mode = WRITE_FORCE;
535 }
536 }
537
538 flags = O_CREAT|O_RDWR;
539 if (rec->write_mode == WRITE_APPEND)
540 rec->file_new = 0;
541 else
542 flags |= O_TRUNC;
543
544 if (opts->pipe_output)
545 output = STDOUT_FILENO;
546 else
547 output = open(output_name, flags, S_IRUSR | S_IWUSR);
548 if (output < 0) {
549 perror("failed to create output file");
550 return -1;
551 }
552
553 rec->output = output;
554
555 session = perf_session__new(output_name, O_WRONLY,
556 rec->write_mode == WRITE_FORCE, false, NULL);
557 if (session == NULL) {
558 pr_err("Not enough memory for reading perf file header\n");
559 return -1;
560 }
561
562 rec->session = session;
563
564 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
565 perf_header__set_feat(&session->header, feat);
566
567 if (rec->no_buildid)
568 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
569
570 if (!have_tracepoints(&evsel_list->entries))
571 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
572
573 if (!rec->opts.branch_stack)
574 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
575
576 if (!rec->file_new) {
577 err = perf_session__read_header(session, output);
578 if (err < 0)
579 goto out_delete_session;
580 }
581
582 if (forks) {
583 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
584 if (err < 0) {
585 pr_err("Couldn't run the workload!\n");
586 goto out_delete_session;
587 }
588 }
589
590 if (perf_record__open(rec) != 0) {
591 err = -1;
592 goto out_delete_session;
593 }
594
595 /*
596 * perf_session__delete(session) will be called at perf_record__exit()
597 */
598 on_exit(perf_record__exit, rec);
599
600 if (opts->pipe_output) {
601 err = perf_header__write_pipe(output);
602 if (err < 0)
603 goto out_delete_session;
604 } else if (rec->file_new) {
605 err = perf_session__write_header(session, evsel_list,
606 output, false);
607 if (err < 0)
608 goto out_delete_session;
609 }
610
611 if (!rec->no_buildid
612 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
613 pr_err("Couldn't generate buildids. "
614 "Use --no-buildid to profile anyway.\n");
615 err = -1;
616 goto out_delete_session;
617 }
618
619 rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
620
621 machine = perf_session__find_host_machine(session);
622 if (!machine) {
623 pr_err("Couldn't find native kernel information.\n");
624 err = -1;
625 goto out_delete_session;
626 }
627
628 if (opts->pipe_output) {
629 err = perf_event__synthesize_attrs(tool, session,
630 process_synthesized_event);
631 if (err < 0) {
632 pr_err("Couldn't synthesize attrs.\n");
633 goto out_delete_session;
634 }
635
636 err = perf_event__synthesize_event_types(tool, process_synthesized_event,
637 machine);
638 if (err < 0) {
639 pr_err("Couldn't synthesize event_types.\n");
640 goto out_delete_session;
641 }
642
643 if (have_tracepoints(&evsel_list->entries)) {
644 /*
645 * FIXME err <= 0 here actually means that
646 * there were no tracepoints so its not really
647 * an error, just that we don't need to
648 * synthesize anything. We really have to
649 * return this more properly and also
650 * propagate errors that now are calling die()
651 */
652 err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
653 process_synthesized_event);
654 if (err <= 0) {
655 pr_err("Couldn't record tracing data.\n");
656 goto out_delete_session;
657 }
658 advance_output(rec, err);
659 }
660 }
661
662 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
663 machine, "_text");
664 if (err < 0)
665 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
666 machine, "_stext");
667 if (err < 0)
668 pr_err("Couldn't record kernel reference relocation symbol\n"
669 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
670 "Check /proc/kallsyms permission or run as root.\n");
671
672 err = perf_event__synthesize_modules(tool, process_synthesized_event,
673 machine);
674 if (err < 0)
675 pr_err("Couldn't record kernel module information.\n"
676 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
677 "Check /proc/modules permission or run as root.\n");
678
679 if (perf_guest)
680 perf_session__process_machines(session, tool,
681 perf_event__synthesize_guest_os);
682
683 if (!opts->target.system_wide)
684 err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
685 process_synthesized_event,
686 machine);
687 else
688 err = perf_event__synthesize_threads(tool, process_synthesized_event,
689 machine);
690
691 if (err != 0)
692 goto out_delete_session;
693
694 if (rec->realtime_prio) {
695 struct sched_param param;
696
697 param.sched_priority = rec->realtime_prio;
698 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
699 pr_err("Could not set realtime priority.\n");
700 err = -1;
701 goto out_delete_session;
702 }
703 }
704
705 /*
706 * When perf is starting the traced process, all the events
707 * (apart from group members) have enable_on_exec=1 set,
708 * so don't spoil it by prematurely enabling them.
709 */
710 if (!perf_target__none(&opts->target))
711 perf_evlist__enable(evsel_list);
712
713 /*
714 * Let the child rip
715 */
716 if (forks)
717 perf_evlist__start_workload(evsel_list);
718
719 for (;;) {
720 int hits = rec->samples;
721
722 if (perf_record__mmap_read_all(rec) < 0) {
723 err = -1;
724 goto out_delete_session;
725 }
726
727 if (hits == rec->samples) {
728 if (done)
729 break;
730 err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
731 waking++;
732 }
733
734 /*
735 * When perf is starting the traced process, at the end events
736 * die with the process and we wait for that. Thus no need to
737 * disable events in this case.
738 */
739 if (done && !disabled && !perf_target__none(&opts->target)) {
740 perf_evlist__disable(evsel_list);
741 disabled = true;
742 }
743 }
744
745 if (quiet || signr == SIGUSR1)
746 return 0;
747
748 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
749
750 /*
751 * Approximate RIP event size: 24 bytes.
752 */
753 fprintf(stderr,
754 "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
755 (double)rec->bytes_written / 1024.0 / 1024.0,
756 output_name,
757 rec->bytes_written / 24);
758
759 return 0;
760
761 out_delete_session:
762 perf_session__delete(session);
763 return err;
764 }
765
766 #define BRANCH_OPT(n, m) \
767 { .name = n, .mode = (m) }
768
769 #define BRANCH_END { .name = NULL }
770
771 struct branch_mode {
772 const char *name;
773 int mode;
774 };
775
776 static const struct branch_mode branch_modes[] = {
777 BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
778 BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
779 BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
780 BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
781 BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
782 BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
783 BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
784 BRANCH_END
785 };
786
787 static int
788 parse_branch_stack(const struct option *opt, const char *str, int unset)
789 {
790 #define ONLY_PLM \
791 (PERF_SAMPLE_BRANCH_USER |\
792 PERF_SAMPLE_BRANCH_KERNEL |\
793 PERF_SAMPLE_BRANCH_HV)
794
795 uint64_t *mode = (uint64_t *)opt->value;
796 const struct branch_mode *br;
797 char *s, *os = NULL, *p;
798 int ret = -1;
799
800 if (unset)
801 return 0;
802
803 /*
804 * cannot set it twice, -b + --branch-filter for instance
805 */
806 if (*mode)
807 return -1;
808
809 /* str may be NULL in case no arg is passed to -b */
810 if (str) {
811 /* because str is read-only */
812 s = os = strdup(str);
813 if (!s)
814 return -1;
815
816 for (;;) {
817 p = strchr(s, ',');
818 if (p)
819 *p = '\0';
820
821 for (br = branch_modes; br->name; br++) {
822 if (!strcasecmp(s, br->name))
823 break;
824 }
825 if (!br->name) {
826 ui__warning("unknown branch filter %s,"
827 " check man page\n", s);
828 goto error;
829 }
830
831 *mode |= br->mode;
832
833 if (!p)
834 break;
835
836 s = p + 1;
837 }
838 }
839 ret = 0;
840
841 /* default to any branch */
842 if ((*mode & ~ONLY_PLM) == 0) {
843 *mode = PERF_SAMPLE_BRANCH_ANY;
844 }
845 error:
846 free(os);
847 return ret;
848 }
849
850 #ifdef LIBUNWIND_SUPPORT
851 static int get_stack_size(char *str, unsigned long *_size)
852 {
853 char *endptr;
854 unsigned long size;
855 unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
856
857 size = strtoul(str, &endptr, 0);
858
859 do {
860 if (*endptr)
861 break;
862
863 size = round_up(size, sizeof(u64));
864 if (!size || size > max_size)
865 break;
866
867 *_size = size;
868 return 0;
869
870 } while (0);
871
872 pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
873 max_size, str);
874 return -1;
875 }
876 #endif /* LIBUNWIND_SUPPORT */
877
878 static int
879 parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
880 int unset)
881 {
882 struct perf_record *rec = (struct perf_record *)opt->value;
883 char *tok, *name, *saveptr = NULL;
884 char *buf;
885 int ret = -1;
886
887 /* --no-call-graph */
888 if (unset)
889 return 0;
890
891 /* We specified default option if none is provided. */
892 BUG_ON(!arg);
893
894 /* We need buffer that we know we can write to. */
895 buf = malloc(strlen(arg) + 1);
896 if (!buf)
897 return -ENOMEM;
898
899 strcpy(buf, arg);
900
901 tok = strtok_r((char *)buf, ",", &saveptr);
902 name = tok ? : (char *)buf;
903
904 do {
905 /* Framepointer style */
906 if (!strncmp(name, "fp", sizeof("fp"))) {
907 if (!strtok_r(NULL, ",", &saveptr)) {
908 rec->opts.call_graph = CALLCHAIN_FP;
909 ret = 0;
910 } else
911 pr_err("callchain: No more arguments "
912 "needed for -g fp\n");
913 break;
914
915 #ifdef LIBUNWIND_SUPPORT
916 /* Dwarf style */
917 } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
918 const unsigned long default_stack_dump_size = 8192;
919
920 ret = 0;
921 rec->opts.call_graph = CALLCHAIN_DWARF;
922 rec->opts.stack_dump_size = default_stack_dump_size;
923
924 tok = strtok_r(NULL, ",", &saveptr);
925 if (tok) {
926 unsigned long size = 0;
927
928 ret = get_stack_size(tok, &size);
929 rec->opts.stack_dump_size = size;
930 }
931
932 if (!ret)
933 pr_debug("callchain: stack dump size %d\n",
934 rec->opts.stack_dump_size);
935 #endif /* LIBUNWIND_SUPPORT */
936 } else {
937 pr_err("callchain: Unknown -g option "
938 "value: %s\n", arg);
939 break;
940 }
941
942 } while (0);
943
944 free(buf);
945
946 if (!ret)
947 pr_debug("callchain: type %d\n", rec->opts.call_graph);
948
949 return ret;
950 }
951
952 static const char * const record_usage[] = {
953 "perf record [<options>] [<command>]",
954 "perf record [<options>] -- <command> [<options>]",
955 NULL
956 };
957
958 /*
959 * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
960 * because we need to have access to it in perf_record__exit, that is called
961 * after cmd_record() exits, but since record_options need to be accessible to
962 * builtin-script, leave it here.
963 *
964 * At least we don't ouch it in all the other functions here directly.
965 *
966 * Just say no to tons of global variables, sigh.
967 */
968 static struct perf_record record = {
969 .opts = {
970 .mmap_pages = UINT_MAX,
971 .user_freq = UINT_MAX,
972 .user_interval = ULLONG_MAX,
973 .freq = 4000,
974 .target = {
975 .uses_mmap = true,
976 },
977 },
978 .write_mode = WRITE_FORCE,
979 .file_new = true,
980 };
981
982 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
983
984 #ifdef LIBUNWIND_SUPPORT
985 static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
986 #else
987 static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
988 #endif
989
990 /*
991 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
992 * with it and switch to use the library functions in perf_evlist that came
993 * from builtin-record.c, i.e. use perf_record_opts,
994 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
995 * using pipes, etc.
996 */
997 const struct option record_options[] = {
998 OPT_CALLBACK('e', "event", &record.evlist, "event",
999 "event selector. use 'perf list' to list available events",
1000 parse_events_option),
1001 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
1002 "event filter", parse_filter),
1003 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
1004 "record events on existing process id"),
1005 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
1006 "record events on existing thread id"),
1007 OPT_INTEGER('r', "realtime", &record.realtime_prio,
1008 "collect data with this RT SCHED_FIFO priority"),
1009 OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
1010 "collect data without buffering"),
1011 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
1012 "collect raw sample records from all opened counters"),
1013 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
1014 "system-wide collection from all CPUs"),
1015 OPT_BOOLEAN('A', "append", &record.append_file,
1016 "append to the output file to do incremental profiling"),
1017 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1018 "list of cpus to monitor"),
1019 OPT_BOOLEAN('f', "force", &record.force,
1020 "overwrite existing data file (deprecated)"),
1021 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1022 OPT_STRING('o', "output", &record.output_name, "file",
1023 "output file name"),
1024 OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
1025 "child tasks do not inherit counters"),
1026 OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
1027 OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
1028 "number of mmap data pages"),
1029 OPT_BOOLEAN(0, "group", &record.opts.group,
1030 "put the counters into a counter group"),
1031 OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
1032 callchain_help, &parse_callchain_opt,
1033 "fp"),
1034 OPT_INCR('v', "verbose", &verbose,
1035 "be more verbose (show counter open errors, etc)"),
1036 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
1037 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
1038 "per thread counts"),
1039 OPT_BOOLEAN('d', "data", &record.opts.sample_address,
1040 "Sample addresses"),
1041 OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
1042 OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
1043 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
1044 "don't sample"),
1045 OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
1046 "do not update the buildid cache"),
1047 OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
1048 "do not collect buildids in perf.data"),
1049 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
1050 "monitor event in cgroup name only",
1051 parse_cgroups),
1052 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1053 "user to profile"),
1054
1055 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1056 "branch any", "sample any taken branches",
1057 parse_branch_stack),
1058
1059 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1060 "branch filter mask", "branch stack filter modes",
1061 parse_branch_stack),
1062 OPT_END()
1063 };
1064
1065 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1066 {
1067 int err = -ENOMEM;
1068 struct perf_evsel *pos;
1069 struct perf_evlist *evsel_list;
1070 struct perf_record *rec = &record;
1071 char errbuf[BUFSIZ];
1072
1073 evsel_list = perf_evlist__new(NULL, NULL);
1074 if (evsel_list == NULL)
1075 return -ENOMEM;
1076
1077 rec->evlist = evsel_list;
1078
1079 argc = parse_options(argc, argv, record_options, record_usage,
1080 PARSE_OPT_STOP_AT_NON_OPTION);
1081 if (!argc && perf_target__none(&rec->opts.target))
1082 usage_with_options(record_usage, record_options);
1083
1084 if (rec->force && rec->append_file) {
1085 ui__error("Can't overwrite and append at the same time."
1086 " You need to choose between -f and -A");
1087 usage_with_options(record_usage, record_options);
1088 } else if (rec->append_file) {
1089 rec->write_mode = WRITE_APPEND;
1090 } else {
1091 rec->write_mode = WRITE_FORCE;
1092 }
1093
1094 if (nr_cgroups && !rec->opts.target.system_wide) {
1095 ui__error("cgroup monitoring only available in"
1096 " system-wide mode\n");
1097 usage_with_options(record_usage, record_options);
1098 }
1099
1100 symbol__init();
1101
1102 if (symbol_conf.kptr_restrict)
1103 pr_warning(
1104 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1105 "check /proc/sys/kernel/kptr_restrict.\n\n"
1106 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1107 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1108 "Samples in kernel modules won't be resolved at all.\n\n"
1109 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1110 "even with a suitable vmlinux or kallsyms file.\n\n");
1111
1112 if (rec->no_buildid_cache || rec->no_buildid)
1113 disable_buildid_cache();
1114
1115 if (evsel_list->nr_entries == 0 &&
1116 perf_evlist__add_default(evsel_list) < 0) {
1117 pr_err("Not enough memory for event selector list\n");
1118 goto out_symbol_exit;
1119 }
1120
1121 err = perf_target__validate(&rec->opts.target);
1122 if (err) {
1123 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1124 ui__warning("%s", errbuf);
1125 }
1126
1127 err = perf_target__parse_uid(&rec->opts.target);
1128 if (err) {
1129 int saved_errno = errno;
1130
1131 perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1132 ui__error("%s", errbuf);
1133
1134 err = -saved_errno;
1135 goto out_free_fd;
1136 }
1137
1138 err = -ENOMEM;
1139 if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1140 usage_with_options(record_usage, record_options);
1141
1142 list_for_each_entry(pos, &evsel_list->entries, node) {
1143 if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1144 goto out_free_fd;
1145 }
1146
1147 if (rec->opts.user_interval != ULLONG_MAX)
1148 rec->opts.default_interval = rec->opts.user_interval;
1149 if (rec->opts.user_freq != UINT_MAX)
1150 rec->opts.freq = rec->opts.user_freq;
1151
1152 /*
1153 * User specified count overrides default frequency.
1154 */
1155 if (rec->opts.default_interval)
1156 rec->opts.freq = 0;
1157 else if (rec->opts.freq) {
1158 rec->opts.default_interval = rec->opts.freq;
1159 } else {
1160 ui__error("frequency and count are zero, aborting\n");
1161 err = -EINVAL;
1162 goto out_free_fd;
1163 }
1164
1165 err = __cmd_record(&record, argc, argv);
1166 out_free_fd:
1167 perf_evlist__delete_maps(evsel_list);
1168 out_symbol_exit:
1169 symbol__exit();
1170 return err;
1171 }
This page took 0.057703 seconds and 5 git commands to generate.