2003-04-08 Andrew Cagney <cagney@redhat.com>
[deliverable/binutils-gdb.git] / gprof / cg_print.c
CommitLineData
ef368dac
NC
1/* cg_print.c - Print routines for displaying call graphs.
2
af6dfb5b 3 Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
ef368dac
NC
4
5 This file is part of GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21\f
252b5132 22#include "libiberty.h"
6d9c411a
AM
23#include "gprof.h"
24#include "search_list.h"
25#include "source.h"
26#include "symtab.h"
252b5132
RH
27#include "cg_arcs.h"
28#include "cg_print.h"
29#include "hist.h"
30#include "utils.h"
1355568a 31#include "corefile.h"
252b5132 32
ef368dac 33/* Return value of comparison functions used to sort tables. */
252b5132
RH
34#define LESSTHAN -1
35#define EQUALTO 0
36#define GREATERTHAN 1
37
1355568a
AM
38static void print_header PARAMS ((void));
39static void print_cycle PARAMS ((Sym *));
40static int cmp_member PARAMS ((Sym *, Sym *));
41static void sort_members PARAMS ((Sym *));
42static void print_members PARAMS ((Sym *));
43static int cmp_arc PARAMS ((Arc *, Arc *));
44static void sort_parents PARAMS ((Sym *));
45static void print_parents PARAMS ((Sym *));
46static void sort_children PARAMS ((Sym *));
47static void print_children PARAMS ((Sym *));
48static void print_line PARAMS ((Sym *));
49static int cmp_name PARAMS ((const PTR, const PTR));
50static int cmp_arc_count PARAMS ((const PTR, const PTR));
51static int cmp_fun_nuses PARAMS ((const PTR, const PTR));
52static void order_and_dump_functions_by_arcs
53 PARAMS ((Arc **, unsigned long, int, Arc **, unsigned long *));
54
ef368dac 55/* Declarations of automatically generated functions to output blurbs. */
252b5132
RH
56extern void bsd_callg_blurb PARAMS ((FILE * fp));
57extern void fsf_callg_blurb PARAMS ((FILE * fp));
58
59double print_time = 0.0;
60
61
62static void
1355568a 63print_header ()
252b5132
RH
64{
65 if (first_output)
b34976b6 66 first_output = FALSE;
252b5132 67 else
ef368dac 68 printf ("\f\n");
0eee5820 69
252b5132
RH
70 if (!bsd_style_output)
71 {
72 if (print_descriptions)
ef368dac 73 printf (_("\t\t Call graph (explanation follows)\n\n"));
252b5132 74 else
ef368dac 75 printf (_("\t\t\tCall graph\n\n"));
252b5132 76 }
0eee5820 77
252b5132
RH
78 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
79 (long) hist_scale * sizeof (UNIT));
0eee5820 80
252b5132 81 if (print_time > 0.0)
ef368dac
NC
82 printf (_(" for %.2f%% of %.2f seconds\n\n"),
83 100.0 / print_time, print_time / hz);
252b5132
RH
84 else
85 {
86 printf (_(" no time propagated\n\n"));
0eee5820 87
ef368dac 88 /* This doesn't hurt, since all the numerators will be 0.0. */
252b5132
RH
89 print_time = 1.0;
90 }
0eee5820 91
252b5132
RH
92 if (bsd_style_output)
93 {
94 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
95 "", "", "", "", _("called"), _("total"), _("parents"));
96 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
af6dfb5b 97 _("index"), _("%time"), _("self"), _("descendants"),
252b5132
RH
98 _("called"), _("self"), _("name"), _("index"));
99 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
100 "", "", "", "", _("called"), _("total"), _("children"));
101 printf ("\n");
102 }
103 else
104 {
105 printf (_("index %% time self children called name\n"));
106 }
107}
108
ef368dac 109/* Print a cycle header. */
252b5132 110
252b5132 111static void
1355568a
AM
112print_cycle (cyc)
113 Sym *cyc;
252b5132
RH
114{
115 char buf[BUFSIZ];
116
117 sprintf (buf, "[%d]", cyc->cg.index);
118 printf (bsd_style_output
119 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
120 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
121 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
122 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
0eee5820 123
252b5132 124 if (cyc->cg.self_calls != 0)
ef368dac 125 printf ("+%-7lu", cyc->cg.self_calls);
252b5132 126 else
ef368dac
NC
127 printf (" %7.7s", "");
128
252b5132
RH
129 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
130}
131
ef368dac
NC
132/* Compare LEFT and RIGHT membmer. Major comparison key is
133 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
252b5132 134
252b5132 135static int
1355568a
AM
136cmp_member (left, right)
137 Sym *left;
138 Sym *right;
252b5132
RH
139{
140 double left_time = left->cg.prop.self + left->cg.prop.child;
141 double right_time = right->cg.prop.self + right->cg.prop.child;
142 unsigned long left_calls = left->ncalls + left->cg.self_calls;
143 unsigned long right_calls = right->ncalls + right->cg.self_calls;
144
145 if (left_time > right_time)
ef368dac
NC
146 return GREATERTHAN;
147
252b5132 148 if (left_time < right_time)
ef368dac 149 return LESSTHAN;
252b5132
RH
150
151 if (left_calls > right_calls)
ef368dac
NC
152 return GREATERTHAN;
153
252b5132 154 if (left_calls < right_calls)
ef368dac
NC
155 return LESSTHAN;
156
252b5132
RH
157 return EQUALTO;
158}
159
ef368dac 160/* Sort members of a cycle. */
252b5132 161
252b5132 162static void
1355568a
AM
163sort_members (cyc)
164 Sym *cyc;
252b5132
RH
165{
166 Sym *todo, *doing, *prev;
0eee5820 167
ef368dac
NC
168 /* Detach cycle members from cyclehead,
169 and insertion sort them back on. */
252b5132
RH
170 todo = cyc->cg.cyc.next;
171 cyc->cg.cyc.next = 0;
0eee5820 172
252b5132
RH
173 for (doing = todo; doing && doing->cg.cyc.next; doing = todo)
174 {
175 todo = doing->cg.cyc.next;
0eee5820 176
252b5132
RH
177 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
178 {
179 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
ef368dac 180 break;
252b5132 181 }
0eee5820 182
252b5132
RH
183 doing->cg.cyc.next = prev->cg.cyc.next;
184 prev->cg.cyc.next = doing;
185 }
186}
187
ef368dac 188/* Print the members of a cycle. */
252b5132 189
252b5132 190static void
1355568a
AM
191print_members (cyc)
192 Sym *cyc;
252b5132
RH
193{
194 Sym *member;
195
196 sort_members (cyc);
0eee5820 197
252b5132
RH
198 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
199 {
200 printf (bsd_style_output
201 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
202 : "%6.6s %5.5s %7.2f %7.2f %7lu",
203 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
204 member->ncalls);
0eee5820 205
252b5132 206 if (member->cg.self_calls != 0)
ef368dac 207 printf ("+%-7lu", member->cg.self_calls);
252b5132 208 else
ef368dac
NC
209 printf (" %7.7s", "");
210
252b5132
RH
211 printf (" ");
212 print_name (member);
213 printf ("\n");
214 }
215}
216
ef368dac 217/* Compare two arcs to/from the same child/parent.
0eee5820
AM
218 - if one arc is a self arc, it's least.
219 - if one arc is within a cycle, it's less than.
220 - if both arcs are within a cycle, compare arc counts.
221 - if neither arc is within a cycle, compare with
222 time + child_time as major key
223 arc count as minor key. */
252b5132 224
252b5132 225static int
1355568a
AM
226cmp_arc (left, right)
227 Arc *left;
228 Arc *right;
252b5132
RH
229{
230 Sym *left_parent = left->parent;
231 Sym *left_child = left->child;
232 Sym *right_parent = right->parent;
233 Sym *right_child = right->child;
234 double left_time, right_time;
235
236 DBG (TIMEDEBUG,
237 printf ("[cmp_arc] ");
238 print_name (left_parent);
239 printf (" calls ");
240 print_name (left_child);
241 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
242 left->count, left_child->ncalls);
243 printf ("[cmp_arc] ");
244 print_name (right_parent);
245 printf (" calls ");
246 print_name (right_child);
247 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
248 right->count, right_child->ncalls);
249 printf ("\n");
250 );
0eee5820 251
252b5132 252 if (left_parent == left_child)
ef368dac
NC
253 return LESSTHAN; /* Left is a self call. */
254
252b5132 255 if (right_parent == right_child)
ef368dac 256 return GREATERTHAN; /* Right is a self call. */
252b5132
RH
257
258 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
259 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
260 {
ef368dac 261 /* Left is a call within a cycle. */
252b5132
RH
262 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
263 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
264 {
ef368dac 265 /* Right is a call within the cycle, too. */
252b5132 266 if (left->count < right->count)
ef368dac
NC
267 return LESSTHAN;
268
252b5132 269 if (left->count > right->count)
ef368dac
NC
270 return GREATERTHAN;
271
252b5132
RH
272 return EQUALTO;
273 }
274 else
275 {
ef368dac 276 /* Right isn't a call within the cycle. */
252b5132
RH
277 return LESSTHAN;
278 }
279 }
280 else
281 {
ef368dac 282 /* Left isn't a call within a cycle. */
252b5132
RH
283 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
284 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
285 {
ef368dac 286 /* Right is a call within a cycle. */
252b5132
RH
287 return GREATERTHAN;
288 }
289 else
290 {
ef368dac 291 /* Neither is a call within a cycle. */
252b5132
RH
292 left_time = left->time + left->child_time;
293 right_time = right->time + right->child_time;
0eee5820 294
252b5132 295 if (left_time < right_time)
ef368dac
NC
296 return LESSTHAN;
297
252b5132 298 if (left_time > right_time)
ef368dac
NC
299 return GREATERTHAN;
300
252b5132 301 if (left->count < right->count)
ef368dac
NC
302 return LESSTHAN;
303
252b5132 304 if (left->count > right->count)
ef368dac
NC
305 return GREATERTHAN;
306
252b5132
RH
307 return EQUALTO;
308 }
309 }
310}
311
312
313static void
1355568a
AM
314sort_parents (child)
315 Sym * child;
252b5132
RH
316{
317 Arc *arc, *detached, sorted, *prev;
318
ef368dac
NC
319 /* Unlink parents from child, then insertion sort back on to
320 sorted's parents.
0eee5820
AM
321 *arc the arc you have detached and are inserting.
322 *detached the rest of the arcs to be sorted.
323 sorted arc list onto which you insertion sort.
324 *prev arc before the arc you are comparing. */
252b5132 325 sorted.next_parent = 0;
0eee5820 326
252b5132
RH
327 for (arc = child->cg.parents; arc; arc = detached)
328 {
329 detached = arc->next_parent;
330
ef368dac 331 /* Consider *arc as disconnected; insert it into sorted. */
252b5132
RH
332 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
333 {
334 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
ef368dac 335 break;
252b5132 336 }
0eee5820 337
252b5132
RH
338 arc->next_parent = prev->next_parent;
339 prev->next_parent = arc;
340 }
341
ef368dac 342 /* Reattach sorted arcs to child. */
252b5132
RH
343 child->cg.parents = sorted.next_parent;
344}
345
346
347static void
1355568a
AM
348print_parents (child)
349 Sym *child;
252b5132
RH
350{
351 Sym *parent;
352 Arc *arc;
353 Sym *cycle_head;
354
355 if (child->cg.cyc.head != 0)
ef368dac 356 cycle_head = child->cg.cyc.head;
252b5132 357 else
ef368dac 358 cycle_head = child;
0eee5820 359
252b5132
RH
360 if (!child->cg.parents)
361 {
362 printf (bsd_style_output
363 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
364 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
365 "", "", "", "", "", "");
366 return;
367 }
0eee5820 368
252b5132 369 sort_parents (child);
0eee5820 370
252b5132
RH
371 for (arc = child->cg.parents; arc; arc = arc->next_parent)
372 {
373 parent = arc->parent;
374 if (child == parent || (child->cg.cyc.num != 0
375 && parent->cg.cyc.num == child->cg.cyc.num))
376 {
ef368dac 377 /* Selfcall or call among siblings. */
252b5132
RH
378 printf (bsd_style_output
379 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
380 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
381 "", "", "", "",
382 arc->count, "");
383 print_name (parent);
384 printf ("\n");
385 }
386 else
387 {
ef368dac 388 /* Regular parent of child. */
252b5132
RH
389 printf (bsd_style_output
390 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
391 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
392 "", "",
393 arc->time / hz, arc->child_time / hz,
394 arc->count, cycle_head->ncalls);
395 print_name (parent);
396 printf ("\n");
397 }
398 }
399}
400
401
402static void
1355568a
AM
403sort_children (parent)
404 Sym *parent;
252b5132
RH
405{
406 Arc *arc, *detached, sorted, *prev;
0eee5820 407
ef368dac
NC
408 /* Unlink children from parent, then insertion sort back on to
409 sorted's children.
0eee5820
AM
410 *arc the arc you have detached and are inserting.
411 *detached the rest of the arcs to be sorted.
412 sorted arc list onto which you insertion sort.
413 *prev arc before the arc you are comparing. */
252b5132 414 sorted.next_child = 0;
0eee5820 415
252b5132
RH
416 for (arc = parent->cg.children; arc; arc = detached)
417 {
418 detached = arc->next_child;
419
ef368dac 420 /* Consider *arc as disconnected; insert it into sorted. */
252b5132
RH
421 for (prev = &sorted; prev->next_child; prev = prev->next_child)
422 {
423 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
ef368dac 424 break;
252b5132 425 }
0eee5820 426
252b5132
RH
427 arc->next_child = prev->next_child;
428 prev->next_child = arc;
429 }
430
ef368dac 431 /* Reattach sorted children to parent. */
252b5132
RH
432 parent->cg.children = sorted.next_child;
433}
434
435
436static void
1355568a
AM
437print_children (parent)
438 Sym *parent;
252b5132
RH
439{
440 Sym *child;
441 Arc *arc;
442
443 sort_children (parent);
444 arc = parent->cg.children;
0eee5820 445
252b5132
RH
446 for (arc = parent->cg.children; arc; arc = arc->next_child)
447 {
448 child = arc->child;
449 if (child == parent || (child->cg.cyc.num != 0
450 && child->cg.cyc.num == parent->cg.cyc.num))
451 {
ef368dac 452 /* Self call or call to sibling. */
252b5132
RH
453 printf (bsd_style_output
454 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
455 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
456 "", "", "", "", arc->count, "");
457 print_name (child);
458 printf ("\n");
459 }
460 else
461 {
ef368dac 462 /* Regular child of parent. */
252b5132
RH
463 printf (bsd_style_output
464 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
465 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
466 "", "",
467 arc->time / hz, arc->child_time / hz,
468 arc->count, child->cg.cyc.head->ncalls);
469 print_name (child);
470 printf ("\n");
471 }
472 }
473}
474
475
476static void
1355568a
AM
477print_line (np)
478 Sym *np;
252b5132
RH
479{
480 char buf[BUFSIZ];
481
482 sprintf (buf, "[%d]", np->cg.index);
483 printf (bsd_style_output
484 ? "%-6.6s %5.1f %7.2f %11.2f"
485 : "%-6.6s %5.1f %7.2f %7.2f", buf,
486 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
487 np->cg.prop.self / hz, np->cg.prop.child / hz);
0eee5820 488
252b5132
RH
489 if ((np->ncalls + np->cg.self_calls) != 0)
490 {
491 printf (" %7lu", np->ncalls);
0eee5820 492
252b5132 493 if (np->cg.self_calls != 0)
252b5132 494 printf ("+%-7lu ", np->cg.self_calls);
252b5132 495 else
252b5132 496 printf (" %7.7s ", "");
252b5132
RH
497 }
498 else
499 {
500 printf (" %7.7s %7.7s ", "", "");
501 }
0eee5820 502
252b5132
RH
503 print_name (np);
504 printf ("\n");
505}
506
507
ef368dac
NC
508/* Print dynamic call graph. */
509
252b5132 510void
1355568a
AM
511cg_print (timesortsym)
512 Sym ** timesortsym;
252b5132
RH
513{
514 unsigned int index;
515 Sym *parent;
516
517 if (print_descriptions && bsd_style_output)
ef368dac 518 bsd_callg_blurb (stdout);
252b5132
RH
519
520 print_header ();
521
522 for (index = 0; index < symtab.len + num_cycles; ++index)
523 {
524 parent = timesortsym[index];
0eee5820 525
252b5132
RH
526 if ((ignore_zeros && parent->ncalls == 0
527 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
528 && parent->cg.prop.child == 0)
529 || !parent->cg.print_flag
530 || (line_granularity && ! parent->is_func))
ef368dac 531 continue;
0eee5820 532
252b5132
RH
533 if (!parent->name && parent->cg.cyc.num != 0)
534 {
ef368dac 535 /* Cycle header. */
252b5132
RH
536 print_cycle (parent);
537 print_members (parent);
538 }
539 else
540 {
541 print_parents (parent);
542 print_line (parent);
543 print_children (parent);
544 }
0eee5820 545
252b5132
RH
546 if (bsd_style_output)
547 printf ("\n");
0eee5820 548
252b5132 549 printf ("-----------------------------------------------\n");
0eee5820 550
252b5132
RH
551 if (bsd_style_output)
552 printf ("\n");
553 }
0eee5820 554
252b5132 555 free (timesortsym);
0eee5820 556
252b5132 557 if (print_descriptions && !bsd_style_output)
ef368dac 558 fsf_callg_blurb (stdout);
252b5132
RH
559}
560
561
562static int
1355568a
AM
563cmp_name (left, right)
564 const PTR left;
565 const PTR right;
252b5132
RH
566{
567 const Sym **npp1 = (const Sym **) left;
568 const Sym **npp2 = (const Sym **) right;
569
570 return strcmp ((*npp1)->name, (*npp2)->name);
571}
572
573
574void
1355568a 575cg_print_index ()
252b5132
RH
576{
577 unsigned int index;
578 unsigned int nnames, todo, i, j;
579 int col, starting_col;
580 Sym **name_sorted_syms, *sym;
581 const char *filename;
582 char buf[20];
ef368dac 583 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
0eee5820 584
ef368dac
NC
585 /* Now, sort regular function name
586 alphabetically to create an index. */
252b5132 587 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
0eee5820 588
252b5132
RH
589 for (index = 0, nnames = 0; index < symtab.len; index++)
590 {
591 if (ignore_zeros && symtab.base[index].ncalls == 0
592 && symtab.base[index].hist.time == 0)
ef368dac
NC
593 continue;
594
252b5132
RH
595 name_sorted_syms[nnames++] = &symtab.base[index];
596 }
0eee5820 597
252b5132 598 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
0eee5820 599
252b5132 600 for (index = 1, todo = nnames; index <= num_cycles; index++)
ef368dac
NC
601 name_sorted_syms[todo++] = &cycle_header[index];
602
252b5132
RH
603 printf ("\f\n");
604 printf (_("Index by function name\n\n"));
605 index = (todo + 2) / 3;
0eee5820 606
252b5132
RH
607 for (i = 0; i < index; i++)
608 {
609 col = 0;
610 starting_col = 0;
0eee5820 611
252b5132
RH
612 for (j = i; j < todo; j += index)
613 {
614 sym = name_sorted_syms[j];
0eee5820 615
252b5132 616 if (sym->cg.print_flag)
ef368dac 617 sprintf (buf, "[%d]", sym->cg.index);
252b5132 618 else
ef368dac
NC
619 sprintf (buf, "(%d)", sym->cg.index);
620
252b5132
RH
621 if (j < nnames)
622 {
623 if (bsd_style_output)
624 {
625 printf ("%6.6s %-19.19s", buf, sym->name);
626 }
627 else
628 {
629 col += strlen (buf);
0eee5820 630
252b5132 631 for (; col < starting_col + 5; ++col)
ef368dac
NC
632 putchar (' ');
633
252b5132
RH
634 printf (" %s ", buf);
635 col += print_name_only (sym);
0eee5820 636
252b5132
RH
637 if (!line_granularity && sym->is_static && sym->file)
638 {
639 filename = sym->file->name;
0eee5820 640
252b5132
RH
641 if (!print_path)
642 {
643 filename = strrchr (filename, '/');
0eee5820 644
252b5132 645 if (filename)
ef368dac 646 ++filename;
252b5132 647 else
ef368dac 648 filename = sym->file->name;
252b5132 649 }
0eee5820 650
252b5132
RH
651 printf (" (%s)", filename);
652 col += strlen (filename) + 3;
653 }
654 }
655 }
656 else
657 {
658 if (bsd_style_output)
659 {
660 printf ("%6.6s ", buf);
661 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
662 printf ("%-19.19s", buf);
663 }
664 else
665 {
666 col += strlen (buf);
667 for (; col < starting_col + 5; ++col)
668 putchar (' ');
669 printf (" %s ", buf);
670 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
671 printf ("%s", buf);
672 col += strlen (buf);
673 }
674 }
0eee5820 675
252b5132
RH
676 starting_col += column_width;
677 }
0eee5820 678
252b5132
RH
679 printf ("\n");
680 }
0eee5820 681
252b5132
RH
682 free (name_sorted_syms);
683}
684
ef368dac
NC
685/* Compare two arcs based on their usage counts.
686 We want to sort in descending order. */
687
252b5132 688static int
1355568a
AM
689cmp_arc_count (left, right)
690 const PTR left;
691 const PTR right;
252b5132
RH
692{
693 const Arc **npp1 = (const Arc **) left;
694 const Arc **npp2 = (const Arc **) right;
695
696 if ((*npp1)->count > (*npp2)->count)
697 return -1;
698 else if ((*npp1)->count < (*npp2)->count)
699 return 1;
700 else
701 return 0;
702}
703
ef368dac
NC
704/* Compare two funtions based on their usage counts.
705 We want to sort in descending order. */
706
252b5132 707static int
1355568a
AM
708cmp_fun_nuses (left, right)
709 const PTR left;
710 const PTR right;
252b5132
RH
711{
712 const Sym **npp1 = (const Sym **) left;
713 const Sym **npp2 = (const Sym **) right;
714
715 if ((*npp1)->nuses > (*npp2)->nuses)
716 return -1;
717 else if ((*npp1)->nuses < (*npp2)->nuses)
718 return 1;
719 else
720 return 0;
721}
722
723/* Print a suggested function ordering based on the profiling data.
724
725 We perform 4 major steps when ordering functions:
726
727 * Group unused functions together and place them at the
728 end of the function order.
729
730 * Search the highest use arcs (those which account for 90% of
731 the total arc count) for functions which have several parents.
732
733 Group those with the most call sites together (currently the
734 top 1.25% which have at least five different call sites).
735
736 These are emitted at the start of the function order.
737
738 * Use a greedy placement algorithm to place functions which
739 occur in the top 99% of the arcs in the profile. Some provisions
740 are made to handle high usage arcs where the parent and/or
741 child has already been placed.
742
743 * Run the same greedy placement algorithm on the remaining
744 arcs to place the leftover functions.
745
746
747 The various "magic numbers" should (one day) be tuneable by command
748 line options. They were arrived at by benchmarking a few applications
749 with various values to see which values produced better overall function
750 orderings.
751
752 Of course, profiling errors, machine limitations (PA long calls), and
753 poor cutoff values for the placement algorithm may limit the usefullness
754 of the resulting function order. Improvements would be greatly appreciated.
0eee5820 755
252b5132
RH
756 Suggestions:
757
758 * Place the functions with many callers near the middle of the
759 list to reduce long calls.
760
761 * Propagate arc usage changes as functions are placed. Ie if
762 func1 and func2 are placed together, arcs to/from those arcs
763 to the same parent/child should be combined, then resort the
764 arcs to choose the next one.
765
766 * Implement some global positioning algorithm to place the
767 chains made by the greedy local positioning algorithm. Probably
768 by examining arcs which haven't been placed yet to tie two
769 chains together.
770
771 * Take a function's size and time into account in the algorithm;
772 size in particular is important on the PA (long calls). Placing
773 many small functions onto their own page may be wise.
774
775 * Use better profiling information; many published algorithms
776 are based on call sequences through time, rather than just
777 arc counts.
778
779 * Prodecure cloning could improve performance when a small number
780 of arcs account for most of the calls to a particular function.
781
782 * Use relocation information to avoid moving unused functions
783 completely out of the code stream; this would avoid severe lossage
784 when the profile data bears little resemblance to actual runs.
785
786 * Propagation of arc usages should also improve .o link line
787 ordering which shares the same arc placement algorithm with
788 the function ordering code (in fact it is a degenerate case
789 of function ordering). */
0eee5820 790
252b5132 791void
1355568a 792cg_print_function_ordering ()
252b5132
RH
793{
794 unsigned long index, used, unused, scratch_index;
795 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
796#ifdef __GNUC__
797 unsigned long long total_arcs, tmp_arcs_count;
798#else
799 unsigned long total_arcs, tmp_arcs_count;
800#endif
801 Sym **unused_syms, **used_syms, **scratch_syms;
802 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
803
804 index = 0;
805 used = 0;
806 unused = 0;
807 scratch_index = 0;
808 unplaced_arc_count = 0;
809 high_arc_count = 0;
810 scratch_arc_count = 0;
811
812 /* First group all the unused functions together. */
813 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
814 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
815 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
816 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
817 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
818 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
819
820 /* Walk through all the functions; mark those which are never
821 called as placed (we'll emit them as a group later). */
822 for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
823 {
824 if (symtab.base[index].ncalls == 0)
825 {
826 /* Filter out gprof generated names. */
827 if (strcmp (symtab.base[index].name, "<locore>")
828 && strcmp (symtab.base[index].name, "<hicore>"))
829 {
830 unused_syms[unused++] = &symtab.base[index];
831 symtab.base[index].has_been_placed = 1;
832 }
833 }
834 else
835 {
836 used_syms[used++] = &symtab.base[index];
837 symtab.base[index].has_been_placed = 0;
838 symtab.base[index].next = 0;
839 symtab.base[index].prev = 0;
840 symtab.base[index].nuses = 0;
841 }
842 }
843
844 /* Sort the arcs from most used to least used. */
845 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
846
847 /* Compute the total arc count. Also mark arcs as unplaced.
848
849 Note we don't compensate for overflow if that happens!
850 Overflow is much less likely when this file is compiled
851 with GCC as it can double-wide integers via long long. */
852 total_arcs = 0;
853 for (index = 0; index < numarcs; index++)
854 {
855 total_arcs += arcs[index]->count;
856 arcs[index]->has_been_placed = 0;
857 }
858
859 /* We want to pull out those functions which are referenced
860 by many highly used arcs and emit them as a group. This
861 could probably use some tuning. */
862 tmp_arcs_count = 0;
863 for (index = 0; index < numarcs; index++)
864 {
865 tmp_arcs_count += arcs[index]->count;
866
867 /* Count how many times each parent and child are used up
868 to our threshhold of arcs (90%). */
869 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
870 break;
871
872 arcs[index]->child->nuses++;
873 }
874
875 /* Now sort a temporary symbol table based on the number of
876 times each function was used in the highest used arcs. */
877 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
878 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
879
880 /* Now pick out those symbols we're going to emit as
881 a group. We take up to 1.25% of the used symbols. */
882 for (index = 0; index < used / 80; index++)
883 {
884 Sym *sym = scratch_syms[index];
885 Arc *arc;
886
887 /* If we hit symbols that aren't used from many call sites,
888 then we can quit. We choose five as the low limit for
889 no particular reason. */
890 if (sym->nuses == 5)
891 break;
892
893 /* We're going to need the arcs between these functions.
894 Unfortunately, we don't know all these functions
895 until we're done. So we keep track of all the arcs
896 to the functions we care about, then prune out those
0eee5820 897 which are uninteresting.
252b5132
RH
898
899 An interesting variation would be to quit when we found
900 multi-call site functions which account for some percentage
901 of the arcs. */
252b5132 902 arc = sym->cg.children;
0eee5820 903
252b5132
RH
904 while (arc)
905 {
906 if (arc->parent != arc->child)
907 scratch_arcs[scratch_arc_count++] = arc;
908 arc->has_been_placed = 1;
909 arc = arc->next_child;
910 }
911
912 arc = sym->cg.parents;
0eee5820 913
252b5132
RH
914 while (arc)
915 {
916 if (arc->parent != arc->child)
917 scratch_arcs[scratch_arc_count++] = arc;
918 arc->has_been_placed = 1;
919 arc = arc->next_parent;
920 }
921
922 /* Keep track of how many symbols we're going to place. */
923 scratch_index = index;
924
ef368dac
NC
925 /* A lie, but it makes identifying
926 these functions easier later. */
252b5132
RH
927 sym->has_been_placed = 1;
928 }
929
ef368dac
NC
930 /* Now walk through the temporary arcs and copy
931 those we care about into the high arcs array. */
252b5132
RH
932 for (index = 0; index < scratch_arc_count; index++)
933 {
934 Arc *arc = scratch_arcs[index];
935
936 /* If this arc refers to highly used functions, then
937 then we want to keep it. */
938 if (arc->child->has_been_placed
939 && arc->parent->has_been_placed)
940 {
941 high_arcs[high_arc_count++] = scratch_arcs[index];
942
943 /* We need to turn of has_been_placed since we're going to
944 use the main arc placement algorithm on these arcs. */
945 arc->child->has_been_placed = 0;
946 arc->parent->has_been_placed = 0;
947 }
948 }
949
ef368dac
NC
950 /* Dump the multi-site high usage functions which are not
951 going to be ordered by the main ordering algorithm. */
252b5132
RH
952 for (index = 0; index < scratch_index; index++)
953 {
954 if (scratch_syms[index]->has_been_placed)
955 printf ("%s\n", scratch_syms[index]->name);
956 }
957
ef368dac
NC
958 /* Now we can order the multi-site high use
959 functions based on the arcs between them. */
252b5132
RH
960 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
961 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
962 unplaced_arcs, &unplaced_arc_count);
963
ef368dac
NC
964 /* Order and dump the high use functions left,
965 these typically have only a few call sites. */
252b5132
RH
966 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
967 unplaced_arcs, &unplaced_arc_count);
968
969 /* Now place the rarely used functions. */
970 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
971 scratch_arcs, &scratch_arc_count);
972
973 /* Output any functions not emitted by the order_and_dump calls. */
974 for (index = 0; index < used; index++)
975 if (used_syms[index]->has_been_placed == 0)
976 printf("%s\n", used_syms[index]->name);
977
978 /* Output the unused functions. */
979 for (index = 0; index < unused; index++)
980 printf("%s\n", unused_syms[index]->name);
981
982 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
983 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
984 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
985 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
986 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
987 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
988
989 free (unused_syms);
990 free (used_syms);
991 free (scratch_syms);
992 free (high_arcs);
993 free (scratch_arcs);
994 free (unplaced_arcs);
995}
996
1355568a 997/* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
252b5132
RH
998 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
999
1355568a
AM
1000 If ALL is nonzero, then place all functions referenced by THE_ARCS,
1001 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
252b5132
RH
1002
1003#define MOST 0.99
1004static void
1355568a 1005order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
252b5132 1006 unplaced_arcs, unplaced_arc_count)
1355568a
AM
1007 Arc **the_arcs;
1008 unsigned long arc_count;
252b5132
RH
1009 int all;
1010 Arc **unplaced_arcs;
1011 unsigned long *unplaced_arc_count;
1012{
1013#ifdef __GNUC__
1014 unsigned long long tmp_arcs, total_arcs;
1015#else
1016 unsigned long tmp_arcs, total_arcs;
1017#endif
1018 unsigned int index;
1019
1020 /* If needed, compute the total arc count.
1021
1022 Note we don't compensate for overflow if that happens! */
1023 if (! all)
1024 {
1025 total_arcs = 0;
1355568a
AM
1026 for (index = 0; index < arc_count; index++)
1027 total_arcs += the_arcs[index]->count;
252b5132
RH
1028 }
1029 else
1030 total_arcs = 0;
1031
1032 tmp_arcs = 0;
0eee5820 1033
1355568a 1034 for (index = 0; index < arc_count; index++)
252b5132
RH
1035 {
1036 Sym *sym1, *sym2;
1037 Sym *child, *parent;
1038
1355568a 1039 tmp_arcs += the_arcs[index]->count;
252b5132
RH
1040
1041 /* Ignore this arc if it's already been placed. */
1355568a 1042 if (the_arcs[index]->has_been_placed)
252b5132
RH
1043 continue;
1044
1355568a
AM
1045 child = the_arcs[index]->child;
1046 parent = the_arcs[index]->parent;
252b5132
RH
1047
1048 /* If we're not using all arcs, and this is a rarely used
1049 arc, then put it on the unplaced_arc list. Similarly
1050 if both the parent and child of this arc have been placed. */
1051 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1052 || child->has_been_placed || parent->has_been_placed)
1053 {
1355568a 1054 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
252b5132
RH
1055 continue;
1056 }
1057
1058 /* If all slots in the parent and child are full, then there isn't
1059 anything we can do right now. We'll place this arc on the
1060 unplaced arc list in the hope that a global positioning
1061 algorithm can use it to place function chains. */
1062 if (parent->next && parent->prev && child->next && child->prev)
1063 {
1355568a 1064 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
252b5132
RH
1065 continue;
1066 }
1067
1068 /* If the parent is unattached, then find the closest
1069 place to attach it onto child's chain. Similarly
1070 for the opposite case. */
1071 if (!parent->next && !parent->prev)
1072 {
1073 int next_count = 0;
1074 int prev_count = 0;
1075 Sym *prev = child;
1076 Sym *next = child;
1077
1078 /* Walk to the beginning and end of the child's chain. */
1079 while (next->next)
1080 {
1081 next = next->next;
1082 next_count++;
1083 }
1084
1085 while (prev->prev)
1086 {
1087 prev = prev->prev;
1088 prev_count++;
1089 }
0eee5820 1090
252b5132
RH
1091 /* Choose the closest. */
1092 child = next_count < prev_count ? next : prev;
ef368dac 1093 }
252b5132
RH
1094 else if (! child->next && !child->prev)
1095 {
1096 int next_count = 0;
1097 int prev_count = 0;
1098 Sym *prev = parent;
1099 Sym *next = parent;
1100
1101 while (next->next)
1102 {
1103 next = next->next;
1104 next_count++;
1105 }
1106
1107 while (prev->prev)
1108 {
1109 prev = prev->prev;
1110 prev_count++;
1111 }
1112
1113 parent = prev_count < next_count ? prev : next;
1114 }
1115 else
1116 {
1117 /* Couldn't find anywhere to attach the functions,
1118 put the arc on the unplaced arc list. */
1355568a 1119 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
252b5132
RH
1120 continue;
1121 }
1122
1123 /* Make sure we don't tie two ends together. */
1124 sym1 = parent;
1125 if (sym1->next)
1126 while (sym1->next)
1127 sym1 = sym1->next;
1128 else
1129 while (sym1->prev)
1130 sym1 = sym1->prev;
1131
1132 sym2 = child;
1133 if (sym2->next)
1134 while (sym2->next)
1135 sym2 = sym2->next;
1136 else
1137 while (sym2->prev)
1138 sym2 = sym2->prev;
1139
1140 if (sym1 == child
1141 && sym2 == parent)
1142 {
1143 /* This would tie two ends together. */
1355568a 1144 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
252b5132
RH
1145 continue;
1146 }
1147
1148 if (parent->next)
1149 {
1150 /* Must attach to the parent's prev field. */
1151 if (! child->next)
1152 {
1153 /* parent-prev and child-next */
1154 parent->prev = child;
1155 child->next = parent;
1355568a 1156 the_arcs[index]->has_been_placed = 1;
252b5132
RH
1157 }
1158 }
1159 else if (parent->prev)
1160 {
1161 /* Must attach to the parent's next field. */
1162 if (! child->prev)
1163 {
1164 /* parent-next and child-prev */
1165 parent->next = child;
1166 child->prev = parent;
1355568a 1167 the_arcs[index]->has_been_placed = 1;
252b5132
RH
1168 }
1169 }
1170 else
1171 {
1172 /* Can attach to either field in the parent, depends
1173 on where we've got space in the child. */
1174 if (child->prev)
1175 {
ef368dac 1176 /* parent-prev and child-next. */
252b5132
RH
1177 parent->prev = child;
1178 child->next = parent;
1355568a 1179 the_arcs[index]->has_been_placed = 1;
252b5132
RH
1180 }
1181 else
1182 {
ef368dac 1183 /* parent-next and child-prev. */
252b5132
RH
1184 parent->next = child;
1185 child->prev = parent;
1355568a 1186 the_arcs[index]->has_been_placed = 1;
252b5132
RH
1187 }
1188 }
1189 }
1190
1191 /* Dump the chains of functions we've made. */
1355568a 1192 for (index = 0; index < arc_count; index++)
252b5132
RH
1193 {
1194 Sym *sym;
1355568a
AM
1195 if (the_arcs[index]->parent->has_been_placed
1196 || the_arcs[index]->child->has_been_placed)
252b5132
RH
1197 continue;
1198
1355568a 1199 sym = the_arcs[index]->parent;
252b5132
RH
1200
1201 /* If this symbol isn't attached to any other
1202 symbols, then we've got a rarely used arc.
1203
1204 Skip it for now, we'll deal with them later. */
1205 if (sym->next == NULL
1206 && sym->prev == NULL)
1207 continue;
1208
1209 /* Get to the start of this chain. */
1210 while (sym->prev)
1211 sym = sym->prev;
1212
1213 while (sym)
1214 {
1215 /* Mark it as placed. */
1216 sym->has_been_placed = 1;
1217 printf ("%s\n", sym->name);
1218 sym = sym->next;
1219 }
1220 }
1221
ef368dac
NC
1222 /* If we want to place all the arcs, then output
1223 those which weren't placed by the main algorithm. */
252b5132 1224 if (all)
1355568a 1225 for (index = 0; index < arc_count; index++)
252b5132
RH
1226 {
1227 Sym *sym;
1355568a
AM
1228 if (the_arcs[index]->parent->has_been_placed
1229 || the_arcs[index]->child->has_been_placed)
252b5132
RH
1230 continue;
1231
1355568a 1232 sym = the_arcs[index]->parent;
252b5132
RH
1233
1234 sym->has_been_placed = 1;
1235 printf ("%s\n", sym->name);
1236 }
1237}
1238
1239/* Print a suggested .o ordering for files on a link line based
1240 on profiling information. This uses the function placement
1241 code for the bulk of its work. */
1242
252b5132 1243void
1355568a 1244cg_print_file_ordering ()
252b5132
RH
1245{
1246 unsigned long scratch_arc_count, index;
1247 Arc **scratch_arcs;
252b5132
RH
1248 char *last;
1249
1250 scratch_arc_count = 0;
1251
1252 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1253 for (index = 0; index < numarcs; index++)
1254 {
1255 if (! arcs[index]->parent->mapped
1256 || ! arcs[index]->child->mapped)
1257 arcs[index]->has_been_placed = 1;
1258 }
1259
1260 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1261 scratch_arcs, &scratch_arc_count);
1262
1263 /* Output .o's not handled by the main placement algorithm. */
1264 for (index = 0; index < symtab.len; index++)
1265 {
1266 if (symtab.base[index].mapped
1267 && ! symtab.base[index].has_been_placed)
1268 printf ("%s\n", symtab.base[index].name);
1269 }
1270
1271 /* Now output any .o's that didn't have any text symbols. */
1272 last = NULL;
1273 for (index = 0; index < symbol_map_count; index++)
1274 {
1275 unsigned int index2;
1276
ef368dac
NC
1277 /* Don't bother searching if this symbol
1278 is the same as the previous one. */
252b5132
RH
1279 if (last && !strcmp (last, symbol_map[index].file_name))
1280 continue;
1281
1282 for (index2 = 0; index2 < symtab.len; index2++)
1283 {
1284 if (! symtab.base[index2].mapped)
1285 continue;
1286
1287 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1288 break;
1289 }
1290
ef368dac
NC
1291 /* If we didn't find it in the symbol table, then it must
1292 be a .o with no text symbols. Output it last. */
252b5132
RH
1293 if (index2 == symtab.len)
1294 printf ("%s\n", symbol_map[index].file_name);
1295 last = symbol_map[index].file_name;
0eee5820 1296 }
252b5132 1297}
This page took 0.212913 seconds and 4 git commands to generate.