5 #include "util/cache.h"
6 #include "util/symbol.h"
7 #include "util/thread.h"
8 #include "util/header.h"
10 #include "util/parse-options.h"
11 #include "util/trace-event.h"
13 #include "util/debug.h"
14 #include "util/data_map.h"
16 #include <linux/rbtree.h>
19 typedef int (*sort_fn_t
)(struct alloc_stat
*, struct alloc_stat
*);
21 static char const *input_name
= "perf.data";
23 static struct perf_header
*header
;
24 static u64 sample_type
;
26 static int alloc_flag
;
27 static int caller_flag
;
29 sort_fn_t alloc_sort_fn
;
30 sort_fn_t caller_sort_fn
;
32 static int alloc_lines
= -1;
33 static int caller_lines
= -1;
53 static struct rb_root root_alloc_stat
;
54 static struct rb_root root_alloc_sorted
;
55 static struct rb_root root_caller_stat
;
56 static struct rb_root root_caller_sorted
;
58 static unsigned long total_requested
, total_allocated
;
60 struct raw_event_sample
{
66 process_comm_event(event_t
*event
, unsigned long offset
, unsigned long head
)
68 struct thread
*thread
= threads__findnew(event
->comm
.pid
);
70 dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
71 (void *)(offset
+ head
),
72 (void *)(long)(event
->header
.size
),
73 event
->comm
.comm
, event
->comm
.pid
);
76 thread__set_comm(thread
, event
->comm
.comm
)) {
77 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
84 static void insert_alloc_stat(unsigned long ptr
,
85 int bytes_req
, int bytes_alloc
)
87 struct rb_node
**node
= &root_alloc_stat
.rb_node
;
88 struct rb_node
*parent
= NULL
;
89 struct alloc_stat
*data
= NULL
;
96 data
= rb_entry(*node
, struct alloc_stat
, node
);
99 node
= &(*node
)->rb_right
;
100 else if (ptr
< data
->ptr
)
101 node
= &(*node
)->rb_left
;
106 if (data
&& data
->ptr
== ptr
) {
108 data
->bytes_req
+= bytes_req
;
109 data
->bytes_alloc
+= bytes_req
;
111 data
= malloc(sizeof(*data
));
114 data
->bytes_req
= bytes_req
;
115 data
->bytes_alloc
= bytes_alloc
;
117 rb_link_node(&data
->node
, parent
, node
);
118 rb_insert_color(&data
->node
, &root_alloc_stat
);
122 static void insert_caller_stat(unsigned long call_site
,
123 int bytes_req
, int bytes_alloc
)
125 struct rb_node
**node
= &root_caller_stat
.rb_node
;
126 struct rb_node
*parent
= NULL
;
127 struct alloc_stat
*data
= NULL
;
134 data
= rb_entry(*node
, struct alloc_stat
, node
);
136 if (call_site
> data
->call_site
)
137 node
= &(*node
)->rb_right
;
138 else if (call_site
< data
->call_site
)
139 node
= &(*node
)->rb_left
;
144 if (data
&& data
->call_site
== call_site
) {
146 data
->bytes_req
+= bytes_req
;
147 data
->bytes_alloc
+= bytes_req
;
149 data
= malloc(sizeof(*data
));
150 data
->call_site
= call_site
;
152 data
->bytes_req
= bytes_req
;
153 data
->bytes_alloc
= bytes_alloc
;
155 rb_link_node(&data
->node
, parent
, node
);
156 rb_insert_color(&data
->node
, &root_caller_stat
);
160 static void process_alloc_event(struct raw_event_sample
*raw
,
163 u64 timestamp __used
,
164 struct thread
*thread __used
,
167 unsigned long call_site
;
172 ptr
= raw_field_value(event
, "ptr", raw
->data
);
173 call_site
= raw_field_value(event
, "call_site", raw
->data
);
174 bytes_req
= raw_field_value(event
, "bytes_req", raw
->data
);
175 bytes_alloc
= raw_field_value(event
, "bytes_alloc", raw
->data
);
177 insert_alloc_stat(ptr
, bytes_req
, bytes_alloc
);
178 insert_caller_stat(call_site
, bytes_req
, bytes_alloc
);
180 total_requested
+= bytes_req
;
181 total_allocated
+= bytes_alloc
;
184 static void process_free_event(struct raw_event_sample
*raw __used
,
185 struct event
*event __used
,
187 u64 timestamp __used
,
188 struct thread
*thread __used
)
193 process_raw_event(event_t
*raw_event __used
, void *more_data
,
194 int cpu
, u64 timestamp
, struct thread
*thread
)
196 struct raw_event_sample
*raw
= more_data
;
200 type
= trace_parse_common_type(raw
->data
);
201 event
= trace_find_event(type
);
203 if (!strcmp(event
->name
, "kmalloc") ||
204 !strcmp(event
->name
, "kmem_cache_alloc")) {
205 process_alloc_event(raw
, event
, cpu
, timestamp
, thread
, 0);
209 if (!strcmp(event
->name
, "kmalloc_node") ||
210 !strcmp(event
->name
, "kmem_cache_alloc_node")) {
211 process_alloc_event(raw
, event
, cpu
, timestamp
, thread
, 1);
215 if (!strcmp(event
->name
, "kfree") ||
216 !strcmp(event
->name
, "kmem_cache_free")) {
217 process_free_event(raw
, event
, cpu
, timestamp
, thread
);
223 process_sample_event(event_t
*event
, unsigned long offset
, unsigned long head
)
225 u64 ip
= event
->ip
.ip
;
229 void *more_data
= event
->ip
.__more_data
;
230 struct thread
*thread
= threads__findnew(event
->ip
.pid
);
232 if (sample_type
& PERF_SAMPLE_TIME
) {
233 timestamp
= *(u64
*)more_data
;
234 more_data
+= sizeof(u64
);
237 if (sample_type
& PERF_SAMPLE_CPU
) {
238 cpu
= *(u32
*)more_data
;
239 more_data
+= sizeof(u32
);
240 more_data
+= sizeof(u32
); /* reserved */
243 if (sample_type
& PERF_SAMPLE_PERIOD
) {
244 period
= *(u64
*)more_data
;
245 more_data
+= sizeof(u64
);
248 dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
249 (void *)(offset
+ head
),
250 (void *)(long)(event
->header
.size
),
252 event
->ip
.pid
, event
->ip
.tid
,
256 if (thread
== NULL
) {
257 pr_debug("problem processing %d event, skipping it.\n",
262 dump_printf(" ... thread: %s:%d\n", thread
->comm
, thread
->pid
);
264 process_raw_event(event
, more_data
, cpu
, timestamp
, thread
);
269 static int sample_type_check(u64 type
)
273 if (!(sample_type
& PERF_SAMPLE_RAW
)) {
275 "No trace sample to read. Did you call perf record "
283 static struct perf_file_handler file_handler
= {
284 .process_sample_event
= process_sample_event
,
285 .process_comm_event
= process_comm_event
,
286 .sample_type_check
= sample_type_check
,
289 static int read_events(void)
291 register_idle_thread();
292 register_perf_file_handler(&file_handler
);
294 return mmap_dispatch_perf_file(&header
, input_name
, NULL
, false, 0, 0,
298 static double fragmentation(unsigned long n_req
, unsigned long n_alloc
)
303 return 100.0 - (100.0 * n_req
/ n_alloc
);
306 static void __print_result(struct rb_root
*root
, int n_lines
, int is_caller
)
308 struct rb_node
*next
;
310 printf("%.78s\n", graph_dotted_line
);
311 printf("%-28s|", is_caller
? "Callsite": "Alloc Ptr");
312 printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n");
313 printf("%.78s\n", graph_dotted_line
);
315 next
= rb_first(root
);
317 while (next
&& n_lines
--) {
318 struct alloc_stat
*data
= rb_entry(next
, struct alloc_stat
,
320 struct symbol
*sym
= NULL
;
325 addr
= data
->call_site
;
326 sym
= kernel_maps__find_symbol(addr
, NULL
, NULL
);
331 snprintf(bf
, sizeof(bf
), "%s/%Lx", sym
->name
,
334 snprintf(bf
, sizeof(bf
), "%#Lx", addr
);
336 printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%%\n",
337 bf
, (unsigned long long)data
->bytes_alloc
,
338 (unsigned long)data
->bytes_alloc
/ data
->hit
,
339 (unsigned long long)data
->bytes_req
,
340 (unsigned long)data
->bytes_req
/ data
->hit
,
341 (unsigned long)data
->hit
,
342 fragmentation(data
->bytes_req
, data
->bytes_alloc
));
344 next
= rb_next(next
);
348 printf(" ... | ... | ... | ... | ... \n");
350 printf(" ------------------------------------------------------------------------------\n");
353 static void print_summary(void)
355 printf("\nSUMMARY\n=======\n");
356 printf("Total bytes requested: %lu\n", total_requested
);
357 printf("Total bytes allocated: %lu\n", total_allocated
);
358 printf("Total bytes wasted on internal fragmentation: %lu\n",
359 total_allocated
- total_requested
);
360 printf("Internal fragmentation: %f%%\n",
361 fragmentation(total_requested
, total_allocated
));
364 static void print_result(void)
367 __print_result(&root_caller_sorted
, caller_lines
, 1);
369 __print_result(&root_alloc_sorted
, alloc_lines
, 0);
373 static void sort_insert(struct rb_root
*root
, struct alloc_stat
*data
,
376 struct rb_node
**new = &(root
->rb_node
);
377 struct rb_node
*parent
= NULL
;
380 struct alloc_stat
*this;
383 this = rb_entry(*new, struct alloc_stat
, node
);
386 cmp
= sort_fn(data
, this);
389 new = &((*new)->rb_left
);
391 new = &((*new)->rb_right
);
394 rb_link_node(&data
->node
, parent
, new);
395 rb_insert_color(&data
->node
, root
);
398 static void __sort_result(struct rb_root
*root
, struct rb_root
*root_sorted
,
401 struct rb_node
*node
;
402 struct alloc_stat
*data
;
405 node
= rb_first(root
);
409 rb_erase(node
, root
);
410 data
= rb_entry(node
, struct alloc_stat
, node
);
411 sort_insert(root_sorted
, data
, sort_fn
);
415 static void sort_result(void)
417 __sort_result(&root_alloc_stat
, &root_alloc_sorted
, alloc_sort_fn
);
418 __sort_result(&root_caller_stat
, &root_caller_sorted
, caller_sort_fn
);
421 static int __cmd_kmem(void)
431 static const char * const kmem_usage
[] = {
432 "perf kmem [<options>] {record}",
437 static int ptr_cmp(struct alloc_stat
*l
, struct alloc_stat
*r
)
441 else if (l
->ptr
> r
->ptr
)
446 static int callsite_cmp(struct alloc_stat
*l
, struct alloc_stat
*r
)
448 if (l
->call_site
< r
->call_site
)
450 else if (l
->call_site
> r
->call_site
)
455 static int hit_cmp(struct alloc_stat
*l
, struct alloc_stat
*r
)
459 else if (l
->hit
> r
->hit
)
464 static int bytes_cmp(struct alloc_stat
*l
, struct alloc_stat
*r
)
466 if (l
->bytes_alloc
< r
->bytes_alloc
)
468 else if (l
->bytes_alloc
> r
->bytes_alloc
)
473 static int frag_cmp(struct alloc_stat
*l
, struct alloc_stat
*r
)
477 x
= fragmentation(l
->bytes_req
, l
->bytes_alloc
);
478 y
= fragmentation(r
->bytes_req
, r
->bytes_alloc
);
487 static int parse_sort_opt(const struct option
*opt __used
,
488 const char *arg
, int unset __used
)
495 if (strcmp(arg
, "ptr") == 0)
497 else if (strcmp(arg
, "call_site") == 0)
498 sort_fn
= callsite_cmp
;
499 else if (strcmp(arg
, "hit") == 0)
501 else if (strcmp(arg
, "bytes") == 0)
503 else if (strcmp(arg
, "frag") == 0)
508 if (caller_flag
> alloc_flag
)
509 caller_sort_fn
= sort_fn
;
511 alloc_sort_fn
= sort_fn
;
516 static int parse_stat_opt(const struct option
*opt __used
,
517 const char *arg
, int unset __used
)
522 if (strcmp(arg
, "alloc") == 0)
523 alloc_flag
= (caller_flag
+ 1);
524 else if (strcmp(arg
, "caller") == 0)
525 caller_flag
= (alloc_flag
+ 1);
531 static int parse_line_opt(const struct option
*opt __used
,
532 const char *arg
, int unset __used
)
539 lines
= strtoul(arg
, NULL
, 10);
541 if (caller_flag
> alloc_flag
)
542 caller_lines
= lines
;
549 static const struct option kmem_options
[] = {
550 OPT_STRING('i', "input", &input_name
, "file",
552 OPT_CALLBACK(0, "stat", NULL
, "<alloc>|<caller>",
553 "stat selector, Pass 'alloc' or 'caller'.",
555 OPT_CALLBACK('s', "sort", NULL
, "key",
556 "sort by key: ptr, call_site, hit, bytes, frag",
558 OPT_CALLBACK('l', "line", NULL
, "num",
564 static const char *record_args
[] = {
571 "-e", "kmem:kmalloc",
572 "-e", "kmem:kmalloc_node",
574 "-e", "kmem:kmem_cache_alloc",
575 "-e", "kmem:kmem_cache_alloc_node",
576 "-e", "kmem:kmem_cache_free",
579 static int __cmd_record(int argc
, const char **argv
)
581 unsigned int rec_argc
, i
, j
;
582 const char **rec_argv
;
584 rec_argc
= ARRAY_SIZE(record_args
) + argc
- 1;
585 rec_argv
= calloc(rec_argc
+ 1, sizeof(char *));
587 for (i
= 0; i
< ARRAY_SIZE(record_args
); i
++)
588 rec_argv
[i
] = strdup(record_args
[i
]);
590 for (j
= 1; j
< (unsigned int)argc
; j
++, i
++)
591 rec_argv
[i
] = argv
[j
];
593 return cmd_record(i
, rec_argv
, NULL
);
596 int cmd_kmem(int argc
, const char **argv
, const char *prefix __used
)
600 argc
= parse_options(argc
, argv
, kmem_options
, kmem_usage
, 0);
602 if (argc
&& !strncmp(argv
[0], "rec", 3))
603 return __cmd_record(argc
, argv
);
605 usage_with_options(kmem_usage
, kmem_options
);
608 alloc_sort_fn
= bytes_cmp
;
610 caller_sort_fn
= bytes_cmp
;