Commit | Line | Data |
---|---|---|
c9d05cfc MG |
1 | #!/usr/bin/perl |
2 | # This is a POC (proof of concept or piece of crap, take your pick) for reading the | |
3 | # text representation of trace output related to page allocation. It makes an attempt | |
4 | # to extract some high-level information on what is going on. The accuracy of the parser | |
5 | # may vary considerably | |
6 | # | |
7 | # Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe | |
8 | # other options | |
9 | # --prepend-parent Report on the parent proc and PID | |
10 | # --read-procstat If the trace lacks process info, get it from /proc | |
11 | # --ignore-pid Aggregate processes of the same name together | |
12 | # | |
13 | # Copyright (c) IBM Corporation 2009 | |
14 | # Author: Mel Gorman <mel@csn.ul.ie> | |
15 | use strict; | |
16 | use Getopt::Long; | |
17 | ||
18 | # Tracepoint events | |
19 | use constant MM_PAGE_ALLOC => 1; | |
b413d48a KK |
20 | use constant MM_PAGE_FREE => 2; |
21 | use constant MM_PAGE_FREE_BATCHED => 3; | |
c9d05cfc MG |
22 | use constant MM_PAGE_PCPU_DRAIN => 4; |
23 | use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5; | |
24 | use constant MM_PAGE_ALLOC_EXTFRAG => 6; | |
25 | use constant EVENT_UNKNOWN => 7; | |
26 | ||
27 | # Constants used to track state | |
28 | use constant STATE_PCPU_PAGES_DRAINED => 8; | |
29 | use constant STATE_PCPU_PAGES_REFILLED => 9; | |
30 | ||
31 | # High-level events extrapolated from tracepoints | |
32 | use constant HIGH_PCPU_DRAINS => 10; | |
33 | use constant HIGH_PCPU_REFILLS => 11; | |
34 | use constant HIGH_EXT_FRAGMENT => 12; | |
35 | use constant HIGH_EXT_FRAGMENT_SEVERE => 13; | |
36 | use constant HIGH_EXT_FRAGMENT_MODERATE => 14; | |
37 | use constant HIGH_EXT_FRAGMENT_CHANGED => 15; | |
38 | ||
39 | my %perprocesspid; | |
40 | my %perprocess; | |
41 | my $opt_ignorepid; | |
42 | my $opt_read_procstat; | |
43 | my $opt_prepend_parent; | |
44 | ||
45 | # Catch sigint and exit on request | |
46 | my $sigint_report = 0; | |
47 | my $sigint_exit = 0; | |
48 | my $sigint_pending = 0; | |
49 | my $sigint_received = 0; | |
50 | sub sigint_handler { | |
51 | my $current_time = time; | |
52 | if ($current_time - 2 > $sigint_received) { | |
53 | print "SIGINT received, report pending. Hit ctrl-c again to exit\n"; | |
54 | $sigint_report = 1; | |
55 | } else { | |
56 | if (!$sigint_exit) { | |
57 | print "Second SIGINT received quickly, exiting\n"; | |
58 | } | |
59 | $sigint_exit++; | |
60 | } | |
61 | ||
62 | if ($sigint_exit > 3) { | |
63 | print "Many SIGINTs received, exiting now without report\n"; | |
64 | exit; | |
65 | } | |
66 | ||
67 | $sigint_received = $current_time; | |
68 | $sigint_pending = 1; | |
69 | } | |
70 | $SIG{INT} = "sigint_handler"; | |
71 | ||
72 | # Parse command line options | |
73 | GetOptions( | |
74 | 'ignore-pid' => \$opt_ignorepid, | |
75 | 'read-procstat' => \$opt_read_procstat, | |
76 | 'prepend-parent' => \$opt_prepend_parent, | |
77 | ); | |
78 | ||
79 | # Defaults for dynamically discovered regex's | |
80 | my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])'; | |
81 | ||
82 | # Dyanically discovered regex | |
83 | my $regex_fragdetails; | |
84 | ||
85 | # Static regex used. Specified like this for readability and for use with /o | |
86 | # (process_pid) (cpus ) ( time ) (tpoint ) (details) | |
87 | my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)'; | |
88 | my $regex_statname = '[-0-9]*\s\((.*)\).*'; | |
89 | my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*'; | |
90 | ||
91 | sub generate_traceevent_regex { | |
92 | my $event = shift; | |
93 | my $default = shift; | |
94 | my $regex; | |
95 | ||
96 | # Read the event format or use the default | |
97 | if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) { | |
98 | $regex = $default; | |
99 | } else { | |
100 | my $line; | |
101 | while (!eof(FORMAT)) { | |
102 | $line = <FORMAT>; | |
103 | if ($line =~ /^print fmt:\s"(.*)",.*/) { | |
104 | $regex = $1; | |
105 | $regex =~ s/%p/\([0-9a-f]*\)/g; | |
106 | $regex =~ s/%d/\([-0-9]*\)/g; | |
107 | $regex =~ s/%lu/\([0-9]*\)/g; | |
108 | } | |
109 | } | |
110 | } | |
111 | ||
112 | # Verify fields are in the right order | |
113 | my $tuple; | |
114 | foreach $tuple (split /\s/, $regex) { | |
115 | my ($key, $value) = split(/=/, $tuple); | |
116 | my $expected = shift; | |
117 | if ($key ne $expected) { | |
118 | print("WARNING: Format not as expected '$key' != '$expected'"); | |
119 | $regex =~ s/$key=\((.*)\)/$key=$1/; | |
120 | } | |
121 | } | |
122 | ||
123 | if (defined shift) { | |
124 | die("Fewer fields than expected in format"); | |
125 | } | |
126 | ||
127 | return $regex; | |
128 | } | |
129 | $regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag", | |
130 | $regex_fragdetails_default, | |
131 | "page", "pfn", | |
132 | "alloc_order", "fallback_order", "pageblock_order", | |
133 | "alloc_migratetype", "fallback_migratetype", | |
134 | "fragmenting", "change_ownership"); | |
135 | ||
136 | sub read_statline($) { | |
137 | my $pid = $_[0]; | |
138 | my $statline; | |
139 | ||
140 | if (open(STAT, "/proc/$pid/stat")) { | |
141 | $statline = <STAT>; | |
142 | close(STAT); | |
143 | } | |
144 | ||
145 | if ($statline eq '') { | |
146 | $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0"; | |
147 | } | |
148 | ||
149 | return $statline; | |
150 | } | |
151 | ||
152 | sub guess_process_pid($$) { | |
153 | my $pid = $_[0]; | |
154 | my $statline = $_[1]; | |
155 | ||
156 | if ($pid == 0) { | |
157 | return "swapper-0"; | |
158 | } | |
159 | ||
160 | if ($statline !~ /$regex_statname/o) { | |
161 | die("Failed to math stat line for process name :: $statline"); | |
162 | } | |
163 | return "$1-$pid"; | |
164 | } | |
165 | ||
166 | sub parent_info($$) { | |
167 | my $pid = $_[0]; | |
168 | my $statline = $_[1]; | |
169 | my $ppid; | |
170 | ||
171 | if ($pid == 0) { | |
172 | return "NOPARENT-0"; | |
173 | } | |
174 | ||
175 | if ($statline !~ /$regex_statppid/o) { | |
176 | die("Failed to match stat line process ppid:: $statline"); | |
177 | } | |
178 | ||
179 | # Read the ppid stat line | |
180 | $ppid = $1; | |
181 | return guess_process_pid($ppid, read_statline($ppid)); | |
182 | } | |
183 | ||
184 | sub process_events { | |
185 | my $traceevent; | |
186 | my $process_pid; | |
187 | my $cpus; | |
188 | my $timestamp; | |
189 | my $tracepoint; | |
190 | my $details; | |
191 | my $statline; | |
192 | ||
193 | # Read each line of the event log | |
194 | EVENT_PROCESS: | |
195 | while ($traceevent = <STDIN>) { | |
196 | if ($traceevent =~ /$regex_traceevent/o) { | |
197 | $process_pid = $1; | |
198 | $tracepoint = $4; | |
199 | ||
200 | if ($opt_read_procstat || $opt_prepend_parent) { | |
201 | $process_pid =~ /(.*)-([0-9]*)$/; | |
202 | my $process = $1; | |
203 | my $pid = $2; | |
204 | ||
205 | $statline = read_statline($pid); | |
206 | ||
207 | if ($opt_read_procstat && $process eq '') { | |
208 | $process_pid = guess_process_pid($pid, $statline); | |
209 | } | |
210 | ||
211 | if ($opt_prepend_parent) { | |
212 | $process_pid = parent_info($pid, $statline) . " :: $process_pid"; | |
213 | } | |
214 | } | |
215 | ||
216 | # Unnecessary in this script. Uncomment if required | |
217 | # $cpus = $2; | |
218 | # $timestamp = $3; | |
219 | } else { | |
220 | next; | |
221 | } | |
222 | ||
223 | # Perl Switch() sucks majorly | |
224 | if ($tracepoint eq "mm_page_alloc") { | |
225 | $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++; | |
b413d48a KK |
226 | } elsif ($tracepoint eq "mm_page_free") { |
227 | $perprocesspid{$process_pid}->{MM_PAGE_FREE}++ | |
228 | } elsif ($tracepoint eq "mm_page_free_batched") { | |
229 | $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++; | |
c9d05cfc MG |
230 | } elsif ($tracepoint eq "mm_page_pcpu_drain") { |
231 | $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++; | |
232 | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++; | |
233 | } elsif ($tracepoint eq "mm_page_alloc_zone_locked") { | |
234 | $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++; | |
235 | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++; | |
236 | } elsif ($tracepoint eq "mm_page_alloc_extfrag") { | |
237 | ||
238 | # Extract the details of the event now | |
239 | $details = $5; | |
240 | ||
241 | my ($page, $pfn); | |
242 | my ($alloc_order, $fallback_order, $pageblock_order); | |
243 | my ($alloc_migratetype, $fallback_migratetype); | |
244 | my ($fragmenting, $change_ownership); | |
245 | ||
246 | if ($details !~ /$regex_fragdetails/o) { | |
247 | print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n"; | |
248 | next; | |
249 | } | |
250 | ||
251 | $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++; | |
252 | $page = $1; | |
253 | $pfn = $2; | |
254 | $alloc_order = $3; | |
255 | $fallback_order = $4; | |
256 | $pageblock_order = $5; | |
257 | $alloc_migratetype = $6; | |
258 | $fallback_migratetype = $7; | |
259 | $fragmenting = $8; | |
260 | $change_ownership = $9; | |
261 | ||
262 | if ($fragmenting) { | |
263 | $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++; | |
264 | if ($fallback_order <= 3) { | |
265 | $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++; | |
266 | } else { | |
267 | $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++; | |
268 | } | |
269 | } | |
270 | if ($change_ownership) { | |
271 | $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++; | |
272 | } | |
273 | } else { | |
274 | $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++; | |
275 | } | |
276 | ||
277 | # Catch a full pcpu drain event | |
278 | if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} && | |
279 | $tracepoint ne "mm_page_pcpu_drain") { | |
280 | ||
281 | $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++; | |
282 | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0; | |
283 | } | |
284 | ||
285 | # Catch a full pcpu refill event | |
286 | if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} && | |
287 | $tracepoint ne "mm_page_alloc_zone_locked") { | |
288 | $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++; | |
289 | $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0; | |
290 | } | |
291 | ||
292 | if ($sigint_pending) { | |
293 | last EVENT_PROCESS; | |
294 | } | |
295 | } | |
296 | } | |
297 | ||
298 | sub dump_stats { | |
299 | my $hashref = shift; | |
300 | my %stats = %$hashref; | |
301 | ||
302 | # Dump per-process stats | |
303 | my $process_pid; | |
304 | my $max_strlen = 0; | |
305 | ||
306 | # Get the maximum process name | |
307 | foreach $process_pid (keys %perprocesspid) { | |
308 | my $len = length($process_pid); | |
309 | if ($len > $max_strlen) { | |
310 | $max_strlen = $len; | |
311 | } | |
312 | } | |
313 | $max_strlen += 2; | |
314 | ||
315 | printf("\n"); | |
316 | printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n", | |
317 | "Process", "Pages", "Pages", "Pages", "Pages", "PCPU", "PCPU", "PCPU", "Fragment", "Fragment", "MigType", "Fragment", "Fragment", "Unknown"); | |
318 | printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n", | |
319 | "details", "allocd", "allocd", "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing", "Changed", "Severe", "Moderate", ""); | |
320 | ||
321 | printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n", | |
322 | "", "", "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", ""); | |
323 | ||
324 | foreach $process_pid (keys %stats) { | |
325 | # Dump final aggregates | |
326 | if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) { | |
327 | $stats{$process_pid}->{HIGH_PCPU_DRAINS}++; | |
328 | $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0; | |
329 | } | |
330 | if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) { | |
331 | $stats{$process_pid}->{HIGH_PCPU_REFILLS}++; | |
332 | $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0; | |
333 | } | |
334 | ||
335 | printf("%-" . $max_strlen . "s %8d %10d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d\n", | |
336 | $process_pid, | |
337 | $stats{$process_pid}->{MM_PAGE_ALLOC}, | |
338 | $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}, | |
b413d48a KK |
339 | $stats{$process_pid}->{MM_PAGE_FREE}, |
340 | $stats{$process_pid}->{MM_PAGE_FREE_BATCHED}, | |
c9d05cfc MG |
341 | $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN}, |
342 | $stats{$process_pid}->{HIGH_PCPU_DRAINS}, | |
343 | $stats{$process_pid}->{HIGH_PCPU_REFILLS}, | |
344 | $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}, | |
345 | $stats{$process_pid}->{HIGH_EXT_FRAG}, | |
346 | $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}, | |
347 | $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}, | |
348 | $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}, | |
349 | $stats{$process_pid}->{EVENT_UNKNOWN}); | |
350 | } | |
351 | } | |
352 | ||
353 | sub aggregate_perprocesspid() { | |
354 | my $process_pid; | |
355 | my $process; | |
356 | undef %perprocess; | |
357 | ||
358 | foreach $process_pid (keys %perprocesspid) { | |
359 | $process = $process_pid; | |
360 | $process =~ s/-([0-9])*$//; | |
361 | if ($process eq '') { | |
362 | $process = "NO_PROCESS_NAME"; | |
363 | } | |
364 | ||
365 | $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}; | |
366 | $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}; | |
b413d48a KK |
367 | $perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE}; |
368 | $perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}; | |
c9d05cfc MG |
369 | $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}; |
370 | $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}; | |
371 | $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}; | |
372 | $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}; | |
373 | $perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}; | |
374 | $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}; | |
375 | $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}; | |
376 | $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}; | |
377 | $perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN}; | |
378 | } | |
379 | } | |
380 | ||
381 | sub report() { | |
382 | if (!$opt_ignorepid) { | |
383 | dump_stats(\%perprocesspid); | |
384 | } else { | |
385 | aggregate_perprocesspid(); | |
386 | dump_stats(\%perprocess); | |
387 | } | |
388 | } | |
389 | ||
390 | # Process events or signals until neither is available | |
391 | sub signal_loop() { | |
392 | my $sigint_processed; | |
393 | do { | |
394 | $sigint_processed = 0; | |
395 | process_events(); | |
396 | ||
397 | # Handle pending signals if any | |
398 | if ($sigint_pending) { | |
399 | my $current_time = time; | |
400 | ||
401 | if ($sigint_exit) { | |
402 | print "Received exit signal\n"; | |
403 | $sigint_pending = 0; | |
404 | } | |
405 | if ($sigint_report) { | |
406 | if ($current_time >= $sigint_received + 2) { | |
407 | report(); | |
408 | $sigint_report = 0; | |
409 | $sigint_pending = 0; | |
410 | $sigint_processed = 1; | |
411 | } | |
412 | } | |
413 | } | |
414 | } while ($sigint_pending || $sigint_processed); | |
415 | } | |
416 | ||
417 | signal_loop(); | |
418 | report(); |