6 #include "util/parse-options.h"
7 #include "util/parse-events.h"
11 #define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
12 #define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
14 static int default_interval
= 100000;
15 static int event_count
[MAX_COUNTERS
];
17 static int fd
[MAX_NR_CPUS
][MAX_COUNTERS
];
18 static int nr_cpus
= 0;
19 static unsigned int page_size
;
20 static unsigned int mmap_pages
= 16;
22 static const char *output_name
= "perf.data";
24 static unsigned int realtime_prio
= 0;
25 static int system_wide
= 0;
26 static pid_t target_pid
= -1;
27 static int inherit
= 1;
30 const unsigned int default_count
[] = {
46 static unsigned int mmap_read_head(struct mmap_data
*md
)
48 struct perf_counter_mmap_page
*pc
= md
->base
;
58 static struct timeval last_read
, this_read
;
60 static void mmap_read(struct mmap_data
*md
)
62 unsigned int head
= mmap_read_head(md
);
63 unsigned int old
= md
->prev
;
64 unsigned char *data
= md
->base
+ page_size
;
69 gettimeofday(&this_read
, NULL
);
72 * If we're further behind than half the buffer, there's a chance
73 * the writer will bite our tail and screw up the events under us.
75 * If we somehow ended up ahead of the head, we got messed up.
77 * In either case, truncate and restart at head.
80 if (diff
> md
->mask
/ 2 || diff
< 0) {
84 timersub(&this_read
, &last_read
, &iv
);
85 msecs
= iv
.tv_sec
*1000 + iv
.tv_usec
/1000;
87 fprintf(stderr
, "WARNING: failed to keep up with mmap data."
88 " Last read %lu msecs ago.\n", msecs
);
91 * head points to a known good entry, start there.
96 last_read
= this_read
;
103 if ((old
& md
->mask
) + size
!= (head
& md
->mask
)) {
104 buf
= &data
[old
& md
->mask
];
105 size
= md
->mask
+ 1 - (old
& md
->mask
);
108 int ret
= write(output
, buf
, size
);
110 perror("failed to write");
118 buf
= &data
[old
& md
->mask
];
122 int ret
= write(output
, buf
, size
);
124 perror("failed to write");
134 static volatile int done
= 0;
136 static void sig_handler(int sig
)
141 static struct pollfd event_array
[MAX_NR_CPUS
* MAX_COUNTERS
];
142 static struct mmap_data mmap_array
[MAX_NR_CPUS
][MAX_COUNTERS
];
148 struct perf_event_header header
;
154 char filename
[PATH_MAX
];
158 struct perf_event_header header
;
164 static pid_t
pid_synthesize_comm_event(pid_t pid
)
166 struct comm_event comm_ev
;
167 char filename
[PATH_MAX
];
175 snprintf(filename
, sizeof(filename
), "/proc/%d/stat", pid
);
177 fd
= open(filename
, O_RDONLY
);
179 fprintf(stderr
, "couldn't open %s\n", filename
);
182 if (read(fd
, bf
, sizeof(bf
)) < 0) {
183 fprintf(stderr
, "couldn't read %s\n", filename
);
188 memset(&comm_ev
, 0, sizeof(comm_ev
));
189 nr
= sscanf(bf
, "%d %s %c %d %d ",
190 &spid
, comm
, &state
, &ppid
, &comm_ev
.pid
);
192 fprintf(stderr
, "couldn't get COMM and pgid, malformed %s\n",
196 comm_ev
.header
.type
= PERF_EVENT_COMM
;
199 comm
[--size
] = '\0'; /* Remove the ')' at the end */
200 --size
; /* Remove the '(' at the begin */
201 memcpy(comm_ev
.comm
, comm
+ 1, size
);
202 size
= ALIGN(size
, sizeof(uint64_t));
203 comm_ev
.header
.size
= sizeof(comm_ev
) - (sizeof(comm_ev
.comm
) - size
);
205 ret
= write(output
, &comm_ev
, comm_ev
.header
.size
);
207 perror("failed to write");
213 static void pid_synthesize_mmap_events(pid_t pid
, pid_t pgid
)
215 char filename
[PATH_MAX
];
218 snprintf(filename
, sizeof(filename
), "/proc/%d/maps", pid
);
220 fp
= fopen(filename
, "r");
222 fprintf(stderr
, "couldn't open %s\n", filename
);
227 unsigned char vm_read
, vm_write
, vm_exec
, vm_mayshare
;
228 struct mmap_event mmap_ev
= {
229 .header
.type
= PERF_EVENT_MMAP
,
234 if (fgets(bf
, sizeof(bf
), fp
) == NULL
)
237 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
238 sscanf(bf
, "%llx-%llx %c%c%c%c %llx %x:%x %lu",
239 &mmap_ev
.start
, &mmap_ev
.len
,
240 &vm_read
, &vm_write
, &vm_exec
, &vm_mayshare
,
241 &mmap_ev
.pgoff
, &major
, &minor
, &ino
);
242 if (vm_exec
== 'x') {
243 char *execname
= strrchr(bf
, ' ');
245 if (execname
== NULL
|| execname
[1] != '/')
249 size
= strlen(execname
);
250 execname
[size
- 1] = '\0'; /* Remove \n */
251 memcpy(mmap_ev
.filename
, execname
, size
);
252 size
= ALIGN(size
, sizeof(uint64_t));
253 mmap_ev
.len
-= mmap_ev
.start
;
254 mmap_ev
.header
.size
= (sizeof(mmap_ev
) -
255 (sizeof(mmap_ev
.filename
) - size
));
259 if (write(output
, &mmap_ev
, mmap_ev
.header
.size
) < 0) {
260 perror("failed to write");
269 static void open_counters(int cpu
, pid_t pid
)
271 struct perf_counter_hw_event hw_event
;
272 int counter
, group_fd
;
276 pid_t pgid
= pid_synthesize_comm_event(pid
);
277 pid_synthesize_mmap_events(pid
, pgid
);
281 for (counter
= 0; counter
< nr_counters
; counter
++) {
283 memset(&hw_event
, 0, sizeof(hw_event
));
284 hw_event
.config
= event_id
[counter
];
285 hw_event
.irq_period
= event_count
[counter
];
286 hw_event
.record_type
= PERF_RECORD_IP
| PERF_RECORD_TID
;
288 hw_event
.mmap
= track
;
289 hw_event
.comm
= track
;
290 hw_event
.inherit
= (cpu
< 0) && inherit
;
292 track
= 0; // only the first counter needs these
294 fd
[nr_cpu
][counter
] =
295 sys_perf_counter_open(&hw_event
, pid
, cpu
, group_fd
, 0);
297 if (fd
[nr_cpu
][counter
] < 0) {
299 printf("kerneltop error: syscall returned with %d (%s)\n",
300 fd
[nr_cpu
][counter
], strerror(err
));
302 printf("Are you root?\n");
305 assert(fd
[nr_cpu
][counter
] >= 0);
306 fcntl(fd
[nr_cpu
][counter
], F_SETFL
, O_NONBLOCK
);
309 * First counter acts as the group leader:
311 if (group
&& group_fd
== -1)
312 group_fd
= fd
[nr_cpu
][counter
];
314 event_array
[nr_poll
].fd
= fd
[nr_cpu
][counter
];
315 event_array
[nr_poll
].events
= POLLIN
;
318 mmap_array
[nr_cpu
][counter
].counter
= counter
;
319 mmap_array
[nr_cpu
][counter
].prev
= 0;
320 mmap_array
[nr_cpu
][counter
].mask
= mmap_pages
*page_size
- 1;
321 mmap_array
[nr_cpu
][counter
].base
= mmap(NULL
, (mmap_pages
+1)*page_size
,
322 PROT_READ
, MAP_SHARED
, fd
[nr_cpu
][counter
], 0);
323 if (mmap_array
[nr_cpu
][counter
].base
== MAP_FAILED
) {
324 printf("kerneltop error: failed to mmap with %d (%s)\n",
325 errno
, strerror(errno
));
332 static int __cmd_record(int argc
, const char **argv
)
338 page_size
= sysconf(_SC_PAGE_SIZE
);
339 nr_cpus
= sysconf(_SC_NPROCESSORS_ONLN
);
340 assert(nr_cpus
<= MAX_NR_CPUS
);
341 assert(nr_cpus
>= 0);
343 output
= open(output_name
, O_CREAT
|O_RDWR
, S_IRWXU
);
345 perror("failed to create output file");
350 open_counters(-1, target_pid
!= -1 ? target_pid
: 0);
351 } else for (i
= 0; i
< nr_cpus
; i
++)
352 open_counters(i
, target_pid
);
354 signal(SIGCHLD
, sig_handler
);
355 signal(SIGINT
, sig_handler
);
357 if (target_pid
== -1 && argc
) {
360 perror("failed to fork");
363 if (execvp(argv
[0], (char **)argv
)) {
371 struct sched_param param
;
373 param
.sched_priority
= realtime_prio
;
374 if (sched_setscheduler(0, SCHED_FIFO
, ¶m
)) {
375 printf("Could not set realtime priority.\n");
381 * TODO: store the current /proc/$/maps information somewhere
387 for (i
= 0; i
< nr_cpu
; i
++) {
388 for (counter
= 0; counter
< nr_counters
; counter
++)
389 mmap_read(&mmap_array
[i
][counter
]);
393 ret
= poll(event_array
, nr_poll
, 100);
399 static const char * const record_usage
[] = {
400 "perf record [<options>] <command>",
404 static char events_help_msg
[EVENTS_HELP_MAX
];
406 static const struct option options
[] = {
407 OPT_CALLBACK('e', "event", NULL
, "event",
408 events_help_msg
, parse_events
),
409 OPT_INTEGER('c', "count", &default_interval
,
410 "event period to sample"),
411 OPT_INTEGER('m', "mmap-pages", &mmap_pages
,
412 "number of mmap data pages"),
413 OPT_STRING('o', "output", &output_name
, "file",
415 OPT_BOOLEAN('i', "inherit", &inherit
,
416 "child tasks inherit counters"),
417 OPT_INTEGER('p', "pid", &target_pid
,
418 "record events on existing pid"),
419 OPT_INTEGER('r', "realtime", &realtime_prio
,
420 "collect data with this RT SCHED_FIFO priority"),
421 OPT_BOOLEAN('a', "all-cpus", &system_wide
,
422 "system-wide collection from all CPUs"),
426 int cmd_record(int argc
, const char **argv
, const char *prefix
)
430 create_events_help(events_help_msg
);
432 argc
= parse_options(argc
, argv
, options
, record_usage
, 0);
433 if (!argc
&& target_pid
== -1 && !system_wide
)
434 usage_with_options(record_usage
, options
);
441 for (counter
= 0; counter
< nr_counters
; counter
++) {
442 if (event_count
[counter
])
445 event_count
[counter
] = default_interval
;
448 return __cmd_record(argc
, argv
);