2011-03-11 Michael Snyder <msnyder@vmware.com>
[deliverable/binutils-gdb.git] / gprof / cg_print.c
... / ...
CommitLineData
1/* cg_print.c - Print routines for displaying call graphs.
2
3 Copyright 2000, 2001, 2002, 2004, 2007, 2009
4 Free Software Foundation, Inc.
5
6 This file is part of GNU Binutils.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
22\f
23#include "gprof.h"
24#include "libiberty.h"
25#include "filenames.h"
26#include "search_list.h"
27#include "source.h"
28#include "symtab.h"
29#include "cg_arcs.h"
30#include "cg_print.h"
31#include "hist.h"
32#include "utils.h"
33#include "corefile.h"
34
35/* Return value of comparison functions used to sort tables. */
36#define LESSTHAN -1
37#define EQUALTO 0
38#define GREATERTHAN 1
39
40static void print_header (void);
41static void print_cycle (Sym *);
42static int cmp_member (Sym *, Sym *);
43static void sort_members (Sym *);
44static void print_members (Sym *);
45static int cmp_arc (Arc *, Arc *);
46static void sort_parents (Sym *);
47static void print_parents (Sym *);
48static void sort_children (Sym *);
49static void print_children (Sym *);
50static void print_line (Sym *);
51static int cmp_name (const PTR, const PTR);
52static int cmp_arc_count (const PTR, const PTR);
53static int cmp_fun_nuses (const PTR, const PTR);
54static void order_and_dump_functions_by_arcs
55 (Arc **, unsigned long, int, Arc **, unsigned long *);
56
57/* Declarations of automatically generated functions to output blurbs. */
58extern void bsd_callg_blurb (FILE * fp);
59extern void fsf_callg_blurb (FILE * fp);
60
61double print_time = 0.0;
62
63
64static void
65print_header ()
66{
67 if (first_output)
68 first_output = FALSE;
69 else
70 printf ("\f\n");
71
72 if (!bsd_style_output)
73 {
74 if (print_descriptions)
75 printf (_("\t\t Call graph (explanation follows)\n\n"));
76 else
77 printf (_("\t\t\tCall graph\n\n"));
78 }
79
80 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
81 (long) hist_scale * (long) sizeof (UNIT));
82
83 if (print_time > 0.0)
84 printf (_(" for %.2f%% of %.2f seconds\n\n"),
85 100.0 / print_time, print_time / hz);
86 else
87 {
88 printf (_(" no time propagated\n\n"));
89
90 /* This doesn't hurt, since all the numerators will be 0.0. */
91 print_time = 1.0;
92 }
93
94 if (bsd_style_output)
95 {
96 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
97 "", "", "", "", _("called"), _("total"), _("parents"));
98 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
99 _("index"), _("%time"), _("self"), _("descendants"),
100 _("called"), _("self"), _("name"), _("index"));
101 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
102 "", "", "", "", _("called"), _("total"), _("children"));
103 printf ("\n");
104 }
105 else
106 {
107 printf (_("index %% time self children called name\n"));
108 }
109}
110
111/* Print a cycle header. */
112
113static void
114print_cycle (Sym *cyc)
115{
116 char buf[BUFSIZ];
117
118 sprintf (buf, "[%d]", cyc->cg.index);
119 printf (bsd_style_output
120 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
121 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
122 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
123 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
124
125 if (cyc->cg.self_calls != 0)
126 printf ("+%-7lu", cyc->cg.self_calls);
127 else
128 printf (" %7.7s", "");
129
130 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
131}
132
133/* Compare LEFT and RIGHT membmer. Major comparison key is
134 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
135
136static int
137cmp_member (Sym *left, Sym *right)
138{
139 double left_time = left->cg.prop.self + left->cg.prop.child;
140 double right_time = right->cg.prop.self + right->cg.prop.child;
141 unsigned long left_calls = left->ncalls + left->cg.self_calls;
142 unsigned long right_calls = right->ncalls + right->cg.self_calls;
143
144 if (left_time > right_time)
145 return GREATERTHAN;
146
147 if (left_time < right_time)
148 return LESSTHAN;
149
150 if (left_calls > right_calls)
151 return GREATERTHAN;
152
153 if (left_calls < right_calls)
154 return LESSTHAN;
155
156 return EQUALTO;
157}
158
159/* Sort members of a cycle. */
160
161static void
162sort_members (Sym *cyc)
163{
164 Sym *todo, *doing, *prev;
165
166 /* Detach cycle members from cyclehead,
167 and insertion sort them back on. */
168 todo = cyc->cg.cyc.next;
169 cyc->cg.cyc.next = 0;
170
171 for (doing = todo; doing != NULL; doing = todo)
172 {
173 todo = doing->cg.cyc.next;
174
175 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
176 {
177 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
178 break;
179 }
180
181 doing->cg.cyc.next = prev->cg.cyc.next;
182 prev->cg.cyc.next = doing;
183 }
184}
185
186/* Print the members of a cycle. */
187
188static void
189print_members (Sym *cyc)
190{
191 Sym *member;
192
193 sort_members (cyc);
194
195 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
196 {
197 printf (bsd_style_output
198 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
199 : "%6.6s %5.5s %7.2f %7.2f %7lu",
200 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
201 member->ncalls);
202
203 if (member->cg.self_calls != 0)
204 printf ("+%-7lu", member->cg.self_calls);
205 else
206 printf (" %7.7s", "");
207
208 printf (" ");
209 print_name (member);
210 printf ("\n");
211 }
212}
213
214/* Compare two arcs to/from the same child/parent.
215 - if one arc is a self arc, it's least.
216 - if one arc is within a cycle, it's less than.
217 - if both arcs are within a cycle, compare arc counts.
218 - if neither arc is within a cycle, compare with
219 time + child_time as major key
220 arc count as minor key. */
221
222static int
223cmp_arc (Arc *left, Arc *right)
224{
225 Sym *left_parent = left->parent;
226 Sym *left_child = left->child;
227 Sym *right_parent = right->parent;
228 Sym *right_child = right->child;
229 double left_time, right_time;
230
231 DBG (TIMEDEBUG,
232 printf ("[cmp_arc] ");
233 print_name (left_parent);
234 printf (" calls ");
235 print_name (left_child);
236 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
237 left->count, left_child->ncalls);
238 printf ("[cmp_arc] ");
239 print_name (right_parent);
240 printf (" calls ");
241 print_name (right_child);
242 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
243 right->count, right_child->ncalls);
244 printf ("\n");
245 );
246
247 if (left_parent == left_child)
248 return LESSTHAN; /* Left is a self call. */
249
250 if (right_parent == right_child)
251 return GREATERTHAN; /* Right is a self call. */
252
253 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
254 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
255 {
256 /* Left is a call within a cycle. */
257 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
258 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
259 {
260 /* Right is a call within the cycle, too. */
261 if (left->count < right->count)
262 return LESSTHAN;
263
264 if (left->count > right->count)
265 return GREATERTHAN;
266
267 return EQUALTO;
268 }
269 else
270 {
271 /* Right isn't a call within the cycle. */
272 return LESSTHAN;
273 }
274 }
275 else
276 {
277 /* Left isn't a call within a cycle. */
278 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
279 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
280 {
281 /* Right is a call within a cycle. */
282 return GREATERTHAN;
283 }
284 else
285 {
286 /* Neither is a call within a cycle. */
287 left_time = left->time + left->child_time;
288 right_time = right->time + right->child_time;
289
290 if (left_time < right_time)
291 return LESSTHAN;
292
293 if (left_time > right_time)
294 return GREATERTHAN;
295
296 if (left->count < right->count)
297 return LESSTHAN;
298
299 if (left->count > right->count)
300 return GREATERTHAN;
301
302 return EQUALTO;
303 }
304 }
305}
306
307
308static void
309sort_parents (Sym * child)
310{
311 Arc *arc, *detached, sorted, *prev;
312
313 /* Unlink parents from child, then insertion sort back on to
314 sorted's parents.
315 *arc the arc you have detached and are inserting.
316 *detached the rest of the arcs to be sorted.
317 sorted arc list onto which you insertion sort.
318 *prev arc before the arc you are comparing. */
319 sorted.next_parent = 0;
320
321 for (arc = child->cg.parents; arc; arc = detached)
322 {
323 detached = arc->next_parent;
324
325 /* Consider *arc as disconnected; insert it into sorted. */
326 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
327 {
328 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
329 break;
330 }
331
332 arc->next_parent = prev->next_parent;
333 prev->next_parent = arc;
334 }
335
336 /* Reattach sorted arcs to child. */
337 child->cg.parents = sorted.next_parent;
338}
339
340
341static void
342print_parents (Sym *child)
343{
344 Sym *parent;
345 Arc *arc;
346 Sym *cycle_head;
347
348 if (child->cg.cyc.head != 0)
349 cycle_head = child->cg.cyc.head;
350 else
351 cycle_head = child;
352
353 if (!child->cg.parents)
354 {
355 printf (bsd_style_output
356 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
357 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
358 "", "", "", "", "", "");
359 return;
360 }
361
362 sort_parents (child);
363
364 for (arc = child->cg.parents; arc; arc = arc->next_parent)
365 {
366 parent = arc->parent;
367 if (child == parent || (child->cg.cyc.num != 0
368 && parent->cg.cyc.num == child->cg.cyc.num))
369 {
370 /* Selfcall or call among siblings. */
371 printf (bsd_style_output
372 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
373 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
374 "", "", "", "",
375 arc->count, "");
376 print_name (parent);
377 printf ("\n");
378 }
379 else
380 {
381 /* Regular parent of child. */
382 printf (bsd_style_output
383 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
384 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
385 "", "",
386 arc->time / hz, arc->child_time / hz,
387 arc->count, cycle_head->ncalls);
388 print_name (parent);
389 printf ("\n");
390 }
391 }
392}
393
394
395static void
396sort_children (Sym *parent)
397{
398 Arc *arc, *detached, sorted, *prev;
399
400 /* Unlink children from parent, then insertion sort back on to
401 sorted's children.
402 *arc the arc you have detached and are inserting.
403 *detached the rest of the arcs to be sorted.
404 sorted arc list onto which you insertion sort.
405 *prev arc before the arc you are comparing. */
406 sorted.next_child = 0;
407
408 for (arc = parent->cg.children; arc; arc = detached)
409 {
410 detached = arc->next_child;
411
412 /* Consider *arc as disconnected; insert it into sorted. */
413 for (prev = &sorted; prev->next_child; prev = prev->next_child)
414 {
415 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
416 break;
417 }
418
419 arc->next_child = prev->next_child;
420 prev->next_child = arc;
421 }
422
423 /* Reattach sorted children to parent. */
424 parent->cg.children = sorted.next_child;
425}
426
427
428static void
429print_children (Sym *parent)
430{
431 Sym *child;
432 Arc *arc;
433
434 sort_children (parent);
435 arc = parent->cg.children;
436
437 for (arc = parent->cg.children; arc; arc = arc->next_child)
438 {
439 child = arc->child;
440 if (child == parent || (child->cg.cyc.num != 0
441 && child->cg.cyc.num == parent->cg.cyc.num))
442 {
443 /* Self call or call to sibling. */
444 printf (bsd_style_output
445 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
446 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
447 "", "", "", "", arc->count, "");
448 print_name (child);
449 printf ("\n");
450 }
451 else
452 {
453 /* Regular child of parent. */
454 printf (bsd_style_output
455 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
456 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
457 "", "",
458 arc->time / hz, arc->child_time / hz,
459 arc->count, child->cg.cyc.head->ncalls);
460 print_name (child);
461 printf ("\n");
462 }
463 }
464}
465
466
467static void
468print_line (Sym *np)
469{
470 char buf[BUFSIZ];
471
472 sprintf (buf, "[%d]", np->cg.index);
473 printf (bsd_style_output
474 ? "%-6.6s %5.1f %7.2f %11.2f"
475 : "%-6.6s %5.1f %7.2f %7.2f", buf,
476 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
477 np->cg.prop.self / hz, np->cg.prop.child / hz);
478
479 if ((np->ncalls + np->cg.self_calls) != 0)
480 {
481 printf (" %7lu", np->ncalls);
482
483 if (np->cg.self_calls != 0)
484 printf ("+%-7lu ", np->cg.self_calls);
485 else
486 printf (" %7.7s ", "");
487 }
488 else
489 {
490 printf (" %7.7s %7.7s ", "", "");
491 }
492
493 print_name (np);
494 printf ("\n");
495}
496
497
498/* Print dynamic call graph. */
499
500void
501cg_print (Sym ** timesortsym)
502{
503 unsigned int sym_index;
504 Sym *parent;
505
506 if (print_descriptions && bsd_style_output)
507 bsd_callg_blurb (stdout);
508
509 print_header ();
510
511 for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
512 {
513 parent = timesortsym[sym_index];
514
515 if ((ignore_zeros && parent->ncalls == 0
516 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
517 && parent->cg.prop.child == 0)
518 || !parent->cg.print_flag
519 || (line_granularity && ! parent->is_func))
520 continue;
521
522 if (!parent->name && parent->cg.cyc.num != 0)
523 {
524 /* Cycle header. */
525 print_cycle (parent);
526 print_members (parent);
527 }
528 else
529 {
530 print_parents (parent);
531 print_line (parent);
532 print_children (parent);
533 }
534
535 if (bsd_style_output)
536 printf ("\n");
537
538 printf ("-----------------------------------------------\n");
539
540 if (bsd_style_output)
541 printf ("\n");
542 }
543
544 free (timesortsym);
545
546 if (print_descriptions && !bsd_style_output)
547 fsf_callg_blurb (stdout);
548}
549
550
551static int
552cmp_name (const PTR left, const PTR right)
553{
554 const Sym **npp1 = (const Sym **) left;
555 const Sym **npp2 = (const Sym **) right;
556
557 return strcmp ((*npp1)->name, (*npp2)->name);
558}
559
560
561void
562cg_print_index ()
563{
564 unsigned int sym_index;
565 unsigned int nnames, todo, i, j;
566 int col, starting_col;
567 Sym **name_sorted_syms, *sym;
568 const char *filename;
569 char buf[20];
570 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
571
572 /* Now, sort regular function name
573 alphabetically to create an index. */
574 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
575
576 for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
577 {
578 if (ignore_zeros && symtab.base[sym_index].ncalls == 0
579 && symtab.base[sym_index].hist.time == 0)
580 continue;
581
582 name_sorted_syms[nnames++] = &symtab.base[sym_index];
583 }
584
585 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
586
587 for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
588 name_sorted_syms[todo++] = &cycle_header[sym_index];
589
590 printf ("\f\n");
591 printf (_("Index by function name\n\n"));
592 sym_index = (todo + 2) / 3;
593
594 for (i = 0; i < sym_index; i++)
595 {
596 col = 0;
597 starting_col = 0;
598
599 for (j = i; j < todo; j += sym_index)
600 {
601 sym = name_sorted_syms[j];
602
603 if (sym->cg.print_flag)
604 sprintf (buf, "[%d]", sym->cg.index);
605 else
606 sprintf (buf, "(%d)", sym->cg.index);
607
608 if (j < nnames)
609 {
610 if (bsd_style_output)
611 {
612 printf ("%6.6s %-19.19s", buf, sym->name);
613 }
614 else
615 {
616 col += strlen (buf);
617
618 for (; col < starting_col + 5; ++col)
619 putchar (' ');
620
621 printf (" %s ", buf);
622 col += print_name_only (sym);
623
624 if (!line_granularity && sym->is_static && sym->file)
625 {
626 filename = sym->file->name;
627
628 if (!print_path)
629 {
630 filename = strrchr (filename, '/');
631
632 if (filename)
633 ++filename;
634 else
635 filename = sym->file->name;
636 }
637
638 printf (" (%s)", filename);
639 col += strlen (filename) + 3;
640 }
641 }
642 }
643 else
644 {
645 if (bsd_style_output)
646 {
647 printf ("%6.6s ", buf);
648 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
649 printf ("%-19.19s", buf);
650 }
651 else
652 {
653 col += strlen (buf);
654 for (; col < starting_col + 5; ++col)
655 putchar (' ');
656 printf (" %s ", buf);
657 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
658 printf ("%s", buf);
659 col += strlen (buf);
660 }
661 }
662
663 starting_col += column_width;
664 }
665
666 printf ("\n");
667 }
668
669 free (name_sorted_syms);
670}
671
672/* Compare two arcs based on their usage counts.
673 We want to sort in descending order. */
674
675static int
676cmp_arc_count (const PTR left, const PTR right)
677{
678 const Arc **npp1 = (const Arc **) left;
679 const Arc **npp2 = (const Arc **) right;
680
681 if ((*npp1)->count > (*npp2)->count)
682 return -1;
683 else if ((*npp1)->count < (*npp2)->count)
684 return 1;
685 else
686 return 0;
687}
688
689/* Compare two funtions based on their usage counts.
690 We want to sort in descending order. */
691
692static int
693cmp_fun_nuses (const PTR left, const PTR right)
694{
695 const Sym **npp1 = (const Sym **) left;
696 const Sym **npp2 = (const Sym **) right;
697
698 if ((*npp1)->nuses > (*npp2)->nuses)
699 return -1;
700 else if ((*npp1)->nuses < (*npp2)->nuses)
701 return 1;
702 else
703 return 0;
704}
705
706/* Print a suggested function ordering based on the profiling data.
707
708 We perform 4 major steps when ordering functions:
709
710 * Group unused functions together and place them at the
711 end of the function order.
712
713 * Search the highest use arcs (those which account for 90% of
714 the total arc count) for functions which have several parents.
715
716 Group those with the most call sites together (currently the
717 top 1.25% which have at least five different call sites).
718
719 These are emitted at the start of the function order.
720
721 * Use a greedy placement algorithm to place functions which
722 occur in the top 99% of the arcs in the profile. Some provisions
723 are made to handle high usage arcs where the parent and/or
724 child has already been placed.
725
726 * Run the same greedy placement algorithm on the remaining
727 arcs to place the leftover functions.
728
729
730 The various "magic numbers" should (one day) be tuneable by command
731 line options. They were arrived at by benchmarking a few applications
732 with various values to see which values produced better overall function
733 orderings.
734
735 Of course, profiling errors, machine limitations (PA long calls), and
736 poor cutoff values for the placement algorithm may limit the usefullness
737 of the resulting function order. Improvements would be greatly appreciated.
738
739 Suggestions:
740
741 * Place the functions with many callers near the middle of the
742 list to reduce long calls.
743
744 * Propagate arc usage changes as functions are placed. Ie if
745 func1 and func2 are placed together, arcs to/from those arcs
746 to the same parent/child should be combined, then resort the
747 arcs to choose the next one.
748
749 * Implement some global positioning algorithm to place the
750 chains made by the greedy local positioning algorithm. Probably
751 by examining arcs which haven't been placed yet to tie two
752 chains together.
753
754 * Take a function's size and time into account in the algorithm;
755 size in particular is important on the PA (long calls). Placing
756 many small functions onto their own page may be wise.
757
758 * Use better profiling information; many published algorithms
759 are based on call sequences through time, rather than just
760 arc counts.
761
762 * Prodecure cloning could improve performance when a small number
763 of arcs account for most of the calls to a particular function.
764
765 * Use relocation information to avoid moving unused functions
766 completely out of the code stream; this would avoid severe lossage
767 when the profile data bears little resemblance to actual runs.
768
769 * Propagation of arc usages should also improve .o link line
770 ordering which shares the same arc placement algorithm with
771 the function ordering code (in fact it is a degenerate case
772 of function ordering). */
773
774void
775cg_print_function_ordering (void)
776{
777 unsigned long sym_index;
778 unsigned long arc_index;
779 unsigned long used, unused, scratch_index;
780 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
781#ifdef __GNUC__
782 unsigned long long total_arcs, tmp_arcs_count;
783#else
784 unsigned long total_arcs, tmp_arcs_count;
785#endif
786 Sym **unused_syms, **used_syms, **scratch_syms;
787 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
788
789 sym_index = 0;
790 used = 0;
791 unused = 0;
792 scratch_index = 0;
793 unplaced_arc_count = 0;
794 high_arc_count = 0;
795 scratch_arc_count = 0;
796
797 /* First group all the unused functions together. */
798 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
799 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
800 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
801 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
802 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
803 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
804
805 /* Walk through all the functions; mark those which are never
806 called as placed (we'll emit them as a group later). */
807 for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
808 {
809 if (symtab.base[sym_index].ncalls == 0)
810 {
811 unused_syms[unused++] = &symtab.base[sym_index];
812 symtab.base[sym_index].has_been_placed = 1;
813 }
814 else
815 {
816 used_syms[used++] = &symtab.base[sym_index];
817 symtab.base[sym_index].has_been_placed = 0;
818 symtab.base[sym_index].next = 0;
819 symtab.base[sym_index].prev = 0;
820 symtab.base[sym_index].nuses = 0;
821 }
822 }
823
824 /* Sort the arcs from most used to least used. */
825 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
826
827 /* Compute the total arc count. Also mark arcs as unplaced.
828
829 Note we don't compensate for overflow if that happens!
830 Overflow is much less likely when this file is compiled
831 with GCC as it can double-wide integers via long long. */
832 total_arcs = 0;
833 for (arc_index = 0; arc_index < numarcs; arc_index++)
834 {
835 total_arcs += arcs[arc_index]->count;
836 arcs[arc_index]->has_been_placed = 0;
837 }
838
839 /* We want to pull out those functions which are referenced
840 by many highly used arcs and emit them as a group. This
841 could probably use some tuning. */
842 tmp_arcs_count = 0;
843 for (arc_index = 0; arc_index < numarcs; arc_index++)
844 {
845 tmp_arcs_count += arcs[arc_index]->count;
846
847 /* Count how many times each parent and child are used up
848 to our threshhold of arcs (90%). */
849 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
850 break;
851
852 arcs[arc_index]->child->nuses++;
853 }
854
855 /* Now sort a temporary symbol table based on the number of
856 times each function was used in the highest used arcs. */
857 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
858 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
859
860 /* Now pick out those symbols we're going to emit as
861 a group. We take up to 1.25% of the used symbols. */
862 for (sym_index = 0; sym_index < used / 80; sym_index++)
863 {
864 Sym *sym = scratch_syms[sym_index];
865 Arc *arc;
866
867 /* If we hit symbols that aren't used from many call sites,
868 then we can quit. We choose five as the low limit for
869 no particular reason. */
870 if (sym->nuses == 5)
871 break;
872
873 /* We're going to need the arcs between these functions.
874 Unfortunately, we don't know all these functions
875 until we're done. So we keep track of all the arcs
876 to the functions we care about, then prune out those
877 which are uninteresting.
878
879 An interesting variation would be to quit when we found
880 multi-call site functions which account for some percentage
881 of the arcs. */
882 arc = sym->cg.children;
883
884 while (arc)
885 {
886 if (arc->parent != arc->child)
887 scratch_arcs[scratch_arc_count++] = arc;
888 arc->has_been_placed = 1;
889 arc = arc->next_child;
890 }
891
892 arc = sym->cg.parents;
893
894 while (arc)
895 {
896 if (arc->parent != arc->child)
897 scratch_arcs[scratch_arc_count++] = arc;
898 arc->has_been_placed = 1;
899 arc = arc->next_parent;
900 }
901
902 /* Keep track of how many symbols we're going to place. */
903 scratch_index = sym_index;
904
905 /* A lie, but it makes identifying
906 these functions easier later. */
907 sym->has_been_placed = 1;
908 }
909
910 /* Now walk through the temporary arcs and copy
911 those we care about into the high arcs array. */
912 for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
913 {
914 Arc *arc = scratch_arcs[arc_index];
915
916 /* If this arc refers to highly used functions, then
917 then we want to keep it. */
918 if (arc->child->has_been_placed
919 && arc->parent->has_been_placed)
920 {
921 high_arcs[high_arc_count++] = scratch_arcs[arc_index];
922
923 /* We need to turn of has_been_placed since we're going to
924 use the main arc placement algorithm on these arcs. */
925 arc->child->has_been_placed = 0;
926 arc->parent->has_been_placed = 0;
927 }
928 }
929
930 /* Dump the multi-site high usage functions which are not
931 going to be ordered by the main ordering algorithm. */
932 for (sym_index = 0; sym_index < scratch_index; sym_index++)
933 {
934 if (scratch_syms[sym_index]->has_been_placed)
935 printf ("%s\n", scratch_syms[sym_index]->name);
936 }
937
938 /* Now we can order the multi-site high use
939 functions based on the arcs between them. */
940 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
941 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
942 unplaced_arcs, &unplaced_arc_count);
943
944 /* Order and dump the high use functions left,
945 these typically have only a few call sites. */
946 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
947 unplaced_arcs, &unplaced_arc_count);
948
949 /* Now place the rarely used functions. */
950 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
951 scratch_arcs, &scratch_arc_count);
952
953 /* Output any functions not emitted by the order_and_dump calls. */
954 for (sym_index = 0; sym_index < used; sym_index++)
955 if (used_syms[sym_index]->has_been_placed == 0)
956 printf("%s\n", used_syms[sym_index]->name);
957
958 /* Output the unused functions. */
959 for (sym_index = 0; sym_index < unused; sym_index++)
960 printf("%s\n", unused_syms[sym_index]->name);
961
962 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
963 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
964 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
965 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
966 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
967 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
968
969 free (unused_syms);
970 free (used_syms);
971 free (scratch_syms);
972 free (high_arcs);
973 free (scratch_arcs);
974 free (unplaced_arcs);
975}
976
977/* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
978 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
979
980 If ALL is nonzero, then place all functions referenced by THE_ARCS,
981 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
982
983#define MOST 0.99
984static void
985order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
986 unplaced_arcs, unplaced_arc_count)
987 Arc **the_arcs;
988 unsigned long arc_count;
989 int all;
990 Arc **unplaced_arcs;
991 unsigned long *unplaced_arc_count;
992{
993#ifdef __GNUC__
994 unsigned long long tmp_arcs, total_arcs;
995#else
996 unsigned long tmp_arcs, total_arcs;
997#endif
998 unsigned int arc_index;
999
1000 /* If needed, compute the total arc count.
1001
1002 Note we don't compensate for overflow if that happens! */
1003 if (! all)
1004 {
1005 total_arcs = 0;
1006 for (arc_index = 0; arc_index < arc_count; arc_index++)
1007 total_arcs += the_arcs[arc_index]->count;
1008 }
1009 else
1010 total_arcs = 0;
1011
1012 tmp_arcs = 0;
1013
1014 for (arc_index = 0; arc_index < arc_count; arc_index++)
1015 {
1016 Sym *sym1, *sym2;
1017 Sym *child, *parent;
1018
1019 tmp_arcs += the_arcs[arc_index]->count;
1020
1021 /* Ignore this arc if it's already been placed. */
1022 if (the_arcs[arc_index]->has_been_placed)
1023 continue;
1024
1025 child = the_arcs[arc_index]->child;
1026 parent = the_arcs[arc_index]->parent;
1027
1028 /* If we're not using all arcs, and this is a rarely used
1029 arc, then put it on the unplaced_arc list. Similarly
1030 if both the parent and child of this arc have been placed. */
1031 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1032 || child->has_been_placed || parent->has_been_placed)
1033 {
1034 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1035 continue;
1036 }
1037
1038 /* If all slots in the parent and child are full, then there isn't
1039 anything we can do right now. We'll place this arc on the
1040 unplaced arc list in the hope that a global positioning
1041 algorithm can use it to place function chains. */
1042 if (parent->next && parent->prev && child->next && child->prev)
1043 {
1044 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1045 continue;
1046 }
1047
1048 /* If the parent is unattached, then find the closest
1049 place to attach it onto child's chain. Similarly
1050 for the opposite case. */
1051 if (!parent->next && !parent->prev)
1052 {
1053 int next_count = 0;
1054 int prev_count = 0;
1055 Sym *prev = child;
1056 Sym *next = child;
1057
1058 /* Walk to the beginning and end of the child's chain. */
1059 while (next->next)
1060 {
1061 next = next->next;
1062 next_count++;
1063 }
1064
1065 while (prev->prev)
1066 {
1067 prev = prev->prev;
1068 prev_count++;
1069 }
1070
1071 /* Choose the closest. */
1072 child = next_count < prev_count ? next : prev;
1073 }
1074 else if (! child->next && !child->prev)
1075 {
1076 int next_count = 0;
1077 int prev_count = 0;
1078 Sym *prev = parent;
1079 Sym *next = parent;
1080
1081 while (next->next)
1082 {
1083 next = next->next;
1084 next_count++;
1085 }
1086
1087 while (prev->prev)
1088 {
1089 prev = prev->prev;
1090 prev_count++;
1091 }
1092
1093 parent = prev_count < next_count ? prev : next;
1094 }
1095 else
1096 {
1097 /* Couldn't find anywhere to attach the functions,
1098 put the arc on the unplaced arc list. */
1099 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1100 continue;
1101 }
1102
1103 /* Make sure we don't tie two ends together. */
1104 sym1 = parent;
1105 if (sym1->next)
1106 while (sym1->next)
1107 sym1 = sym1->next;
1108 else
1109 while (sym1->prev)
1110 sym1 = sym1->prev;
1111
1112 sym2 = child;
1113 if (sym2->next)
1114 while (sym2->next)
1115 sym2 = sym2->next;
1116 else
1117 while (sym2->prev)
1118 sym2 = sym2->prev;
1119
1120 if (sym1 == child
1121 && sym2 == parent)
1122 {
1123 /* This would tie two ends together. */
1124 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1125 continue;
1126 }
1127
1128 if (parent->next)
1129 {
1130 /* Must attach to the parent's prev field. */
1131 if (! child->next)
1132 {
1133 /* parent-prev and child-next */
1134 parent->prev = child;
1135 child->next = parent;
1136 the_arcs[arc_index]->has_been_placed = 1;
1137 }
1138 }
1139 else if (parent->prev)
1140 {
1141 /* Must attach to the parent's next field. */
1142 if (! child->prev)
1143 {
1144 /* parent-next and child-prev */
1145 parent->next = child;
1146 child->prev = parent;
1147 the_arcs[arc_index]->has_been_placed = 1;
1148 }
1149 }
1150 else
1151 {
1152 /* Can attach to either field in the parent, depends
1153 on where we've got space in the child. */
1154 if (child->prev)
1155 {
1156 /* parent-prev and child-next. */
1157 parent->prev = child;
1158 child->next = parent;
1159 the_arcs[arc_index]->has_been_placed = 1;
1160 }
1161 else
1162 {
1163 /* parent-next and child-prev. */
1164 parent->next = child;
1165 child->prev = parent;
1166 the_arcs[arc_index]->has_been_placed = 1;
1167 }
1168 }
1169 }
1170
1171 /* Dump the chains of functions we've made. */
1172 for (arc_index = 0; arc_index < arc_count; arc_index++)
1173 {
1174 Sym *sym;
1175 if (the_arcs[arc_index]->parent->has_been_placed
1176 || the_arcs[arc_index]->child->has_been_placed)
1177 continue;
1178
1179 sym = the_arcs[arc_index]->parent;
1180
1181 /* If this symbol isn't attached to any other
1182 symbols, then we've got a rarely used arc.
1183
1184 Skip it for now, we'll deal with them later. */
1185 if (sym->next == NULL
1186 && sym->prev == NULL)
1187 continue;
1188
1189 /* Get to the start of this chain. */
1190 while (sym->prev)
1191 sym = sym->prev;
1192
1193 while (sym)
1194 {
1195 /* Mark it as placed. */
1196 sym->has_been_placed = 1;
1197 printf ("%s\n", sym->name);
1198 sym = sym->next;
1199 }
1200 }
1201
1202 /* If we want to place all the arcs, then output
1203 those which weren't placed by the main algorithm. */
1204 if (all)
1205 for (arc_index = 0; arc_index < arc_count; arc_index++)
1206 {
1207 Sym *sym;
1208 if (the_arcs[arc_index]->parent->has_been_placed
1209 || the_arcs[arc_index]->child->has_been_placed)
1210 continue;
1211
1212 sym = the_arcs[arc_index]->parent;
1213
1214 sym->has_been_placed = 1;
1215 printf ("%s\n", sym->name);
1216 }
1217}
1218
1219/* Compare two function_map structs based on file name.
1220 We want to sort in ascending order. */
1221
1222static int
1223cmp_symbol_map (const void * l, const void * r)
1224{
1225 return filename_cmp (((struct function_map *) l)->file_name,
1226 ((struct function_map *) r)->file_name);
1227}
1228
1229/* Print a suggested .o ordering for files on a link line based
1230 on profiling information. This uses the function placement
1231 code for the bulk of its work. */
1232
1233void
1234cg_print_file_ordering (void)
1235{
1236 unsigned long scratch_arc_count;
1237 unsigned long arc_index;
1238 unsigned long sym_index;
1239 Arc **scratch_arcs;
1240 char *last;
1241
1242 scratch_arc_count = 0;
1243
1244 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1245 for (arc_index = 0; arc_index < numarcs; arc_index++)
1246 {
1247 if (! arcs[arc_index]->parent->mapped
1248 || ! arcs[arc_index]->child->mapped)
1249 arcs[arc_index]->has_been_placed = 1;
1250 }
1251
1252 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1253 scratch_arcs, &scratch_arc_count);
1254
1255 /* Output .o's not handled by the main placement algorithm. */
1256 for (sym_index = 0; sym_index < symtab.len; sym_index++)
1257 {
1258 if (symtab.base[sym_index].mapped
1259 && ! symtab.base[sym_index].has_been_placed)
1260 printf ("%s\n", symtab.base[sym_index].name);
1261 }
1262
1263 qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1264
1265 /* Now output any .o's that didn't have any text symbols. */
1266 last = NULL;
1267 for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1268 {
1269 unsigned int index2;
1270
1271 /* Don't bother searching if this symbol
1272 is the same as the previous one. */
1273 if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1274 continue;
1275
1276 for (index2 = 0; index2 < symtab.len; index2++)
1277 {
1278 if (! symtab.base[index2].mapped)
1279 continue;
1280
1281 if (!filename_cmp (symtab.base[index2].name,
1282 symbol_map[sym_index].file_name))
1283 break;
1284 }
1285
1286 /* If we didn't find it in the symbol table, then it must
1287 be a .o with no text symbols. Output it last. */
1288 if (index2 == symtab.len)
1289 printf ("%s\n", symbol_map[sym_index].file_name);
1290 last = symbol_map[sym_index].file_name;
1291 }
1292}
This page took 0.045677 seconds and 4 git commands to generate.