From: Ingo Molnar Date: Mon, 20 Apr 2009 13:58:01 +0000 (+0200) Subject: perf_counter tools: add 'perf record' command X-Git-Url: http://drtracing.org/?a=commitdiff_plain;h=e33e0a43736307512422e41aee6e24d5a8c39181;p=deliverable%2Flinux.git perf_counter tools: add 'perf record' command Move perf-record.c into the perf suite of commands. Signed-off-by: Ingo Molnar --- diff --git a/Documentation/perf_counter/Documentation/perf-record.txt b/Documentation/perf_counter/Documentation/perf-record.txt new file mode 100644 index 000000000000..d07700e35eb2 --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-record.txt @@ -0,0 +1,63 @@ +perf-record(1) +========== + +NAME +---- +perf-record - Run a command and record its profile into output.perf + +SYNOPSIS +-------- +[verse] +'perf record' [-e | --event=EVENT] [-l] [-a] + +DESCRIPTION +----------- +This command runs a command and gathers a performance counter profile +from it, into output.perf - without displaying anything. + +This file can then be inspected later on, using 'perf report'. + + +OPTIONS +------- +...:: + Any command you can specify in a shell. + +-e:: +--event=:: + 0:0: cpu-cycles + 0:0: cycles + 0:1: instructions + 0:2: cache-references + 0:3: cache-misses + 0:4: branch-instructions + 0:4: branches + 0:5: branch-misses + 0:6: bus-cycles + 1:0: cpu-clock + 1:1: task-clock + 1:2: page-faults + 1:2: faults + 1:5: minor-faults + 1:6: major-faults + 1:3: context-switches + 1:3: cs + 1:4: cpu-migrations + 1:4: migrations + rNNN: raw PMU events (eventsel+umask) + +-a:: + system-wide collection + +-l:: + scale counter values + +Configuration +------------- + +EXAMPLES +-------- + +SEE ALSO +-------- +linkperf:git-stat[1] diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index fb8b71744e59..b6c665eb22e4 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -309,6 +309,7 @@ LIB_OBJS += usage.o LIB_OBJS += wrapper.o BUILTIN_OBJS += builtin-help.o +BUILTIN_OBJS += builtin-record.o BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-top.o diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c new file mode 100644 index 000000000000..4a50abf843ee --- /dev/null +++ b/Documentation/perf_counter/builtin-record.c @@ -0,0 +1,506 @@ + + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../../include/linux/perf_counter.h" + + +/* + * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all + * counters in the current task. + */ +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define rdclock() \ +({ \ + struct timespec ts; \ + \ + clock_gettime(CLOCK_MONOTONIC, &ts); \ + ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ +}) + +/* + * Pick up some kernel type conventions: + */ +#define __user +#define asmlinkage + +#ifdef __x86_64__ +#define __NR_perf_counter_open 295 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#endif + +#ifdef __i386__ +#define __NR_perf_counter_open 333 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#endif + +#ifdef __powerpc__ +#define __NR_perf_counter_open 319 +#define rmb() asm volatile ("sync" ::: "memory") +#define cpu_relax() asm volatile ("" ::: "memory"); +#endif + +#define unlikely(x) __builtin_expect(!!(x), 0) +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +extern asmlinkage int sys_perf_counter_open( + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags); + +#define MAX_COUNTERS 64 +#define MAX_NR_CPUS 256 + +#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) + +static int nr_counters = 0; +static __u64 event_id[MAX_COUNTERS] = { }; +static int default_interval = 100000; +static int event_count[MAX_COUNTERS]; +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; +static int nr_cpus = 0; +static unsigned int page_size; +static unsigned int mmap_pages = 16; +static int output; +static char *output_name = "output.perf"; +static int group = 0; +static unsigned int realtime_prio = 0; + +const unsigned int default_count[] = { + 1000000, + 1000000, + 10000, + 10000, + 1000000, + 10000, +}; + +struct event_symbol { + __u64 event; + char *symbol; +}; + +static struct event_symbol event_symbols[] = { + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, + + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, +}; + +/* + * Each event can have multiple symbolic names. + * Symbolic names are (almost) exactly matched. + */ +static __u64 match_event_symbols(char *str) +{ + __u64 config, id; + int type; + unsigned int i; + + if (sscanf(str, "r%llx", &config) == 1) + return config | PERF_COUNTER_RAW_MASK; + + if (sscanf(str, "%d:%llu", &type, &id) == 2) + return EID(type, id); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return event_symbols[i].event; + } + + return ~0ULL; +} + +static int parse_events(char *str) +{ + __u64 config; + +again: + if (nr_counters == MAX_COUNTERS) + return -1; + + config = match_event_symbols(str); + if (config == ~0ULL) + return -1; + + event_id[nr_counters] = config; + nr_counters++; + + str = strstr(str, ","); + if (str) { + str++; + goto again; + } + + return 0; +} + +#define __PERF_COUNTER_FIELD(config, name) \ + ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) + +#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) +#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) +#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) +#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) + +static void display_events_help(void) +{ + unsigned int i; + __u64 e; + + printf( + " -e EVENT --event=EVENT # symbolic-name abbreviations"); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + int type, id; + + e = event_symbols[i].event; + type = PERF_COUNTER_TYPE(e); + id = PERF_COUNTER_ID(e); + + printf("\n %d:%d: %-20s", + type, id, event_symbols[i].symbol); + } + + printf("\n" + " rNNN: raw PMU events (eventsel+umask)\n\n"); +} + +static void display_help(void) +{ + printf( + "Usage: perf-record []\n" + "perf-record Options (up to %d event types can be specified at once):\n\n", + MAX_COUNTERS); + + display_events_help(); + + printf( + " -c CNT --count=CNT # event period to sample\n" + " -m pages --mmap_pages= # number of mmap data pages\n" + " -o file --output= # output file\n" + " -r prio --realtime= # use RT prio\n" + ); + + exit(0); +} + +static void process_options(int argc, char *argv[]) +{ + int error = 0, counter; + + for (;;) { + int option_index = 0; + /** Options for getopt */ + static struct option long_options[] = { + {"count", required_argument, NULL, 'c'}, + {"event", required_argument, NULL, 'e'}, + {"mmap_pages", required_argument, NULL, 'm'}, + {"output", required_argument, NULL, 'o'}, + {"realtime", required_argument, NULL, 'r'}, + {NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "+:c:e:m:o:r:", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'c': default_interval = atoi(optarg); break; + case 'e': error = parse_events(optarg); break; + case 'm': mmap_pages = atoi(optarg); break; + case 'o': output_name = strdup(optarg); break; + case 'r': realtime_prio = atoi(optarg); break; + default: error = 1; break; + } + } + if (error) + display_help(); + + if (!nr_counters) { + nr_counters = 1; + event_id[0] = 0; + } + + for (counter = 0; counter < nr_counters; counter++) { + if (event_count[counter]) + continue; + + event_count[counter] = default_interval; + } +} + +struct mmap_data { + int counter; + void *base; + unsigned int mask; + unsigned int prev; +}; + +static unsigned int mmap_read_head(struct mmap_data *md) +{ + struct perf_counter_mmap_page *pc = md->base; + int head; + + head = pc->data_head; + rmb(); + + return head; +} + +static long events; +static struct timeval last_read, this_read; + +static void mmap_read(struct mmap_data *md) +{ + unsigned int head = mmap_read_head(md); + unsigned int old = md->prev; + unsigned char *data = md->base + page_size; + unsigned long size; + void *buf; + int diff; + + gettimeofday(&this_read, NULL); + + /* + * If we're further behind than half the buffer, there's a chance + * the writer will bite our tail and screw up the events under us. + * + * If we somehow ended up ahead of the head, we got messed up. + * + * In either case, truncate and restart at head. + */ + diff = head - old; + if (diff > md->mask / 2 || diff < 0) { + struct timeval iv; + unsigned long msecs; + + timersub(&this_read, &last_read, &iv); + msecs = iv.tv_sec*1000 + iv.tv_usec/1000; + + fprintf(stderr, "WARNING: failed to keep up with mmap data." + " Last read %lu msecs ago.\n", msecs); + + /* + * head points to a known good entry, start there. + */ + old = head; + } + + last_read = this_read; + + if (old != head) + events++; + + size = head - old; + + if ((old & md->mask) + size != (head & md->mask)) { + buf = &data[old & md->mask]; + size = md->mask + 1 - (old & md->mask); + old += size; + while (size) { + int ret = write(output, buf, size); + if (ret < 0) { + perror("failed to write"); + exit(-1); + } + size -= ret; + buf += ret; + } + } + + buf = &data[old & md->mask]; + size = head - old; + old += size; + while (size) { + int ret = write(output, buf, size); + if (ret < 0) { + perror("failed to write"); + exit(-1); + } + size -= ret; + buf += ret; + } + + md->prev = old; +} + +static volatile int done = 0; + +static void sigchld_handler(int sig) +{ + if (sig == SIGCHLD) + done = 1; +} + +int cmd_record(int argc, const char **argv) +{ + struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; + struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; + struct perf_counter_hw_event hw_event; + int i, counter, group_fd, nr_poll = 0; + pid_t pid; + int ret; + + page_size = sysconf(_SC_PAGE_SIZE); + + process_options(argc, argv); + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert(nr_cpus >= 0); + + output = open(output_name, O_CREAT|O_RDWR, S_IRWXU); + if (output < 0) { + perror("failed to create output file"); + exit(-1); + } + + argc -= optind; + argv += optind; + + for (i = 0; i < nr_cpus; i++) { + group_fd = -1; + for (counter = 0; counter < nr_counters; counter++) { + + memset(&hw_event, 0, sizeof(hw_event)); + hw_event.config = event_id[counter]; + hw_event.irq_period = event_count[counter]; + hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; + hw_event.nmi = 1; + hw_event.mmap = 1; + hw_event.comm = 1; + + fd[i][counter] = sys_perf_counter_open(&hw_event, -1, i, group_fd, 0); + if (fd[i][counter] < 0) { + int err = errno; + printf("kerneltop error: syscall returned with %d (%s)\n", + fd[i][counter], strerror(err)); + if (err == EPERM) + printf("Are you root?\n"); + exit(-1); + } + assert(fd[i][counter] >= 0); + fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); + + /* + * First counter acts as the group leader: + */ + if (group && group_fd == -1) + group_fd = fd[i][counter]; + + event_array[nr_poll].fd = fd[i][counter]; + event_array[nr_poll].events = POLLIN; + nr_poll++; + + mmap_array[i][counter].counter = counter; + mmap_array[i][counter].prev = 0; + mmap_array[i][counter].mask = mmap_pages*page_size - 1; + mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, + PROT_READ, MAP_SHARED, fd[i][counter], 0); + if (mmap_array[i][counter].base == MAP_FAILED) { + printf("kerneltop error: failed to mmap with %d (%s)\n", + errno, strerror(errno)); + exit(-1); + } + } + } + + signal(SIGCHLD, sigchld_handler); + + pid = fork(); + if (pid < 0) + perror("failed to fork"); + + if (!pid) { + if (execvp(argv[0], argv)) { + perror(argv[0]); + exit(-1); + } + } + + if (realtime_prio) { + struct sched_param param; + + param.sched_priority = realtime_prio; + if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { + printf("Could not set realtime priority.\n"); + exit(-1); + } + } + + /* + * TODO: store the current /proc/$/maps information somewhere + */ + + while (!done) { + int hits = events; + + for (i = 0; i < nr_cpus; i++) { + for (counter = 0; counter < nr_counters; counter++) + mmap_read(&mmap_array[i][counter]); + } + + if (hits == events) + ret = poll(event_array, nr_poll, 100); + } + + return 0; +} diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h index 605323c691f1..5854b1715f54 100644 --- a/Documentation/perf_counter/builtin.h +++ b/Documentation/perf_counter/builtin.h @@ -14,6 +14,7 @@ extern void prune_packed_objects(int); extern int read_line_with_nul(char *buf, int size, FILE *file); extern int check_pager_config(const char *cmd); -extern int cmd_top(int argc, const char **argv, const char *prefix); +extern int cmd_record(int argc, const char **argv, const char *prefix); extern int cmd_stat(int argc, const char **argv, const char *prefix); +extern int cmd_top(int argc, const char **argv, const char *prefix); #endif diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt index 52455d46bfb5..d15210aa0cae 100644 --- a/Documentation/perf_counter/command-list.txt +++ b/Documentation/perf_counter/command-list.txt @@ -1,5 +1,6 @@ # List of known perf commands. # command name category [deprecated] [common] -perf-top mainporcelain common +perf-record mainporcelain common perf-stat mainporcelain common +perf-top mainporcelain common diff --git a/Documentation/perf_counter/perf-record.c b/Documentation/perf_counter/perf-record.c deleted file mode 100644 index 614de7c468b2..000000000000 --- a/Documentation/perf_counter/perf-record.c +++ /dev/null @@ -1,530 +0,0 @@ - - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "../../include/linux/perf_counter.h" - - -/* - * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all - * counters in the current task. - */ -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) - -#define rdclock() \ -({ \ - struct timespec ts; \ - \ - clock_gettime(CLOCK_MONOTONIC, &ts); \ - ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ -}) - -/* - * Pick up some kernel type conventions: - */ -#define __user -#define asmlinkage - -#ifdef __x86_64__ -#define __NR_perf_counter_open 295 -#define rmb() asm volatile("lfence" ::: "memory") -#define cpu_relax() asm volatile("rep; nop" ::: "memory"); -#endif - -#ifdef __i386__ -#define __NR_perf_counter_open 333 -#define rmb() asm volatile("lfence" ::: "memory") -#define cpu_relax() asm volatile("rep; nop" ::: "memory"); -#endif - -#ifdef __powerpc__ -#define __NR_perf_counter_open 319 -#define rmb() asm volatile ("sync" ::: "memory") -#define cpu_relax() asm volatile ("" ::: "memory"); -#endif - -#define unlikely(x) __builtin_expect(!!(x), 0) -#define min(x, y) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - (void) (&_min1 == &_min2); \ - _min1 < _min2 ? _min1 : _min2; }) - -asmlinkage int sys_perf_counter_open( - struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, - int cpu, - int group_fd, - unsigned long flags) -{ - return syscall( - __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); -} - -#define MAX_COUNTERS 64 -#define MAX_NR_CPUS 256 - -#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) - -static int nr_counters = 0; -static __u64 event_id[MAX_COUNTERS] = { }; -static int default_interval = 100000; -static int event_count[MAX_COUNTERS]; -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static int nr_cpus = 0; -static unsigned int page_size; -static unsigned int mmap_pages = 16; -static int output; -static char *output_name = "output.perf"; -static int group = 0; -static unsigned int realtime_prio = 0; - -const unsigned int default_count[] = { - 1000000, - 1000000, - 10000, - 10000, - 1000000, - 10000, -}; - -static char *hw_event_names[] = { - "CPU cycles", - "instructions", - "cache references", - "cache misses", - "branches", - "branch misses", - "bus cycles", -}; - -static char *sw_event_names[] = { - "cpu clock ticks", - "task clock ticks", - "pagefaults", - "context switches", - "CPU migrations", - "minor faults", - "major faults", -}; - -struct event_symbol { - __u64 event; - char *symbol; -}; - -static struct event_symbol event_symbols[] = { - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, - - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, -}; - -/* - * Each event can have multiple symbolic names. - * Symbolic names are (almost) exactly matched. - */ -static __u64 match_event_symbols(char *str) -{ - __u64 config, id; - int type; - unsigned int i; - - if (sscanf(str, "r%llx", &config) == 1) - return config | PERF_COUNTER_RAW_MASK; - - if (sscanf(str, "%d:%llu", &type, &id) == 2) - return EID(type, id); - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) - return event_symbols[i].event; - } - - return ~0ULL; -} - -static int parse_events(char *str) -{ - __u64 config; - -again: - if (nr_counters == MAX_COUNTERS) - return -1; - - config = match_event_symbols(str); - if (config == ~0ULL) - return -1; - - event_id[nr_counters] = config; - nr_counters++; - - str = strstr(str, ","); - if (str) { - str++; - goto again; - } - - return 0; -} - -#define __PERF_COUNTER_FIELD(config, name) \ - ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) - -#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) -#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) -#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) -#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) - -static void display_events_help(void) -{ - unsigned int i; - __u64 e; - - printf( - " -e EVENT --event=EVENT # symbolic-name abbreviations"); - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - int type, id; - - e = event_symbols[i].event; - type = PERF_COUNTER_TYPE(e); - id = PERF_COUNTER_ID(e); - - printf("\n %d:%d: %-20s", - type, id, event_symbols[i].symbol); - } - - printf("\n" - " rNNN: raw PMU events (eventsel+umask)\n\n"); -} - -static void display_help(void) -{ - printf( - "Usage: perf-record []\n" - "perf-record Options (up to %d event types can be specified at once):\n\n", - MAX_COUNTERS); - - display_events_help(); - - printf( - " -c CNT --count=CNT # event period to sample\n" - " -m pages --mmap_pages= # number of mmap data pages\n" - " -o file --output= # output file\n" - " -r prio --realtime= # use RT prio\n" - ); - - exit(0); -} - -static void process_options(int argc, char *argv[]) -{ - int error = 0, counter; - - for (;;) { - int option_index = 0; - /** Options for getopt */ - static struct option long_options[] = { - {"count", required_argument, NULL, 'c'}, - {"event", required_argument, NULL, 'e'}, - {"mmap_pages", required_argument, NULL, 'm'}, - {"output", required_argument, NULL, 'o'}, - {"realtime", required_argument, NULL, 'r'}, - {NULL, 0, NULL, 0 } - }; - int c = getopt_long(argc, argv, "+:c:e:m:o:r:", - long_options, &option_index); - if (c == -1) - break; - - switch (c) { - case 'c': default_interval = atoi(optarg); break; - case 'e': error = parse_events(optarg); break; - case 'm': mmap_pages = atoi(optarg); break; - case 'o': output_name = strdup(optarg); break; - case 'r': realtime_prio = atoi(optarg); break; - default: error = 1; break; - } - } - if (error) - display_help(); - - if (!nr_counters) { - nr_counters = 1; - event_id[0] = 0; - } - - for (counter = 0; counter < nr_counters; counter++) { - if (event_count[counter]) - continue; - - event_count[counter] = default_interval; - } -} - -struct mmap_data { - int counter; - void *base; - unsigned int mask; - unsigned int prev; -}; - -static unsigned int mmap_read_head(struct mmap_data *md) -{ - struct perf_counter_mmap_page *pc = md->base; - int head; - - head = pc->data_head; - rmb(); - - return head; -} - -static long events; -static struct timeval last_read, this_read; - -static void mmap_read(struct mmap_data *md) -{ - unsigned int head = mmap_read_head(md); - unsigned int old = md->prev; - unsigned char *data = md->base + page_size; - unsigned long size; - void *buf; - int diff; - - gettimeofday(&this_read, NULL); - - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and screw up the events under us. - * - * If we somehow ended up ahead of the head, we got messed up. - * - * In either case, truncate and restart at head. - */ - diff = head - old; - if (diff > md->mask / 2 || diff < 0) { - struct timeval iv; - unsigned long msecs; - - timersub(&this_read, &last_read, &iv); - msecs = iv.tv_sec*1000 + iv.tv_usec/1000; - - fprintf(stderr, "WARNING: failed to keep up with mmap data." - " Last read %lu msecs ago.\n", msecs); - - /* - * head points to a known good entry, start there. - */ - old = head; - } - - last_read = this_read; - - if (old != head) - events++; - - size = head - old; - - if ((old & md->mask) + size != (head & md->mask)) { - buf = &data[old & md->mask]; - size = md->mask + 1 - (old & md->mask); - old += size; - while (size) { - int ret = write(output, buf, size); - if (ret < 0) { - perror("failed to write"); - exit(-1); - } - size -= ret; - buf += ret; - } - } - - buf = &data[old & md->mask]; - size = head - old; - old += size; - while (size) { - int ret = write(output, buf, size); - if (ret < 0) { - perror("failed to write"); - exit(-1); - } - size -= ret; - buf += ret; - } - - md->prev = old; -} - -static volatile int done = 0; - -static void sigchld_handler(int sig) -{ - if (sig == SIGCHLD) - done = 1; -} - -int main(int argc, char *argv[]) -{ - struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; - struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; - struct perf_counter_hw_event hw_event; - int i, counter, group_fd, nr_poll = 0; - pid_t pid; - int ret; - - page_size = sysconf(_SC_PAGE_SIZE); - - process_options(argc, argv); - - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); - - output = open(output_name, O_CREAT|O_RDWR, S_IRWXU); - if (output < 0) { - perror("failed to create output file"); - exit(-1); - } - - argc -= optind; - argv += optind; - - for (i = 0; i < nr_cpus; i++) { - group_fd = -1; - for (counter = 0; counter < nr_counters; counter++) { - - memset(&hw_event, 0, sizeof(hw_event)); - hw_event.config = event_id[counter]; - hw_event.irq_period = event_count[counter]; - hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; - hw_event.nmi = 1; - hw_event.mmap = 1; - hw_event.comm = 1; - - fd[i][counter] = sys_perf_counter_open(&hw_event, -1, i, group_fd, 0); - if (fd[i][counter] < 0) { - int err = errno; - printf("kerneltop error: syscall returned with %d (%s)\n", - fd[i][counter], strerror(err)); - if (err == EPERM) - printf("Are you root?\n"); - exit(-1); - } - assert(fd[i][counter] >= 0); - fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); - - /* - * First counter acts as the group leader: - */ - if (group && group_fd == -1) - group_fd = fd[i][counter]; - - event_array[nr_poll].fd = fd[i][counter]; - event_array[nr_poll].events = POLLIN; - nr_poll++; - - mmap_array[i][counter].counter = counter; - mmap_array[i][counter].prev = 0; - mmap_array[i][counter].mask = mmap_pages*page_size - 1; - mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ, MAP_SHARED, fd[i][counter], 0); - if (mmap_array[i][counter].base == MAP_FAILED) { - printf("kerneltop error: failed to mmap with %d (%s)\n", - errno, strerror(errno)); - exit(-1); - } - } - } - - signal(SIGCHLD, sigchld_handler); - - pid = fork(); - if (pid < 0) - perror("failed to fork"); - - if (!pid) { - if (execvp(argv[0], argv)) { - perror(argv[0]); - exit(-1); - } - } - - if (realtime_prio) { - struct sched_param param; - - param.sched_priority = realtime_prio; - if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { - printf("Could not set realtime priority.\n"); - exit(-1); - } - } - - /* - * TODO: store the current /proc/$/maps information somewhere - */ - - while (!done) { - int hits = events; - - for (i = 0; i < nr_cpus; i++) { - for (counter = 0; counter < nr_counters; counter++) - mmap_read(&mmap_array[i][counter]); - } - - if (hits == events) - ret = poll(event_array, nr_poll, 100); - } - - return 0; -} diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c index ff8658f2a2f1..e849dd66b5ef 100644 --- a/Documentation/perf_counter/perf.c +++ b/Documentation/perf_counter/perf.c @@ -248,8 +248,9 @@ static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; static struct cmd_struct commands[] = { - { "top", cmd_top, 0 }, + { "record", cmd_record, 0 }, { "stat", cmd_stat, 0 }, + { "top", cmd_top, 0 }, }; int i; static const char ext[] = STRIP_EXTENSION;