tweak
[deliverable/binutils-gdb.git] / gprof / cg_print.c
CommitLineData
5489fcc3
KR
1#include "libiberty.h"
2#include "cg_arcs.h"
3#include "cg_print.h"
4#include "hist.h"
5#include "utils.h"
6
7/*
8 * Return value of comparison functions used to sort tables:
9 */
10#define LESSTHAN -1
11#define EQUALTO 0
12#define GREATERTHAN 1
13
64c50fc5
JL
14static void order_and_dump_functions_by_arcs PARAMS ((Arc **, unsigned long,
15 int, Arc **,
16 unsigned long *));
5489fcc3 17/* declarations of automatically generated functions to output blurbs: */
12516a37
KR
18extern void bsd_callg_blurb PARAMS ((FILE * fp));
19extern void fsf_callg_blurb PARAMS ((FILE * fp));
5489fcc3
KR
20
21double print_time = 0.0;
22
23
24static void
12516a37 25DEFUN_VOID (print_header)
5489fcc3 26{
12516a37
KR
27 if (first_output)
28 {
29 first_output = FALSE;
30 }
31 else
32 {
33 printf ("\f\n");
03c35bcb 34 }
12516a37
KR
35 if (!bsd_style_output)
36 {
37 if (print_descriptions)
38 {
39 printf ("\t\t Call graph (explanation follows)\n\n");
40 }
41 else
42 {
43 printf ("\t\t\tCall graph\n\n");
03c35bcb
KR
44 }
45 }
12516a37
KR
46 printf ("\ngranularity: each sample hit covers %ld byte(s)",
47 (long) hist_scale * sizeof (UNIT));
48 if (print_time > 0.0)
49 {
50 printf (" for %.2f%% of %.2f seconds\n\n",
51 100.0 / print_time, print_time / hz);
52 }
53 else
54 {
55 printf (" no time propagated\n\n");
56 /*
57 * This doesn't hurt, since all the numerators will be 0.0:
58 */
59 print_time = 1.0;
03c35bcb 60 }
12516a37
KR
61 if (bsd_style_output)
62 {
63 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
64 "", "", "", "", "called", "total", "parents");
65 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
66 "index", "%time", "self", "descendents",
67 "called", "self", "name", "index");
68 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
69 "", "", "", "", "called", "total", "children");
70 printf ("\n");
71 }
72 else
73 {
74 printf ("index %% time self children called name\n");
03c35bcb
KR
75 }
76}
5489fcc3
KR
77
78
79/*
80 * Print a cycle header.
81 */
82static void
12516a37 83DEFUN (print_cycle, (cyc), Sym * cyc)
5489fcc3 84{
12516a37 85 char buf[BUFSIZ];
5489fcc3 86
12516a37 87 sprintf (buf, "[%d]", cyc->cg.index);
869b94c5
KR
88 printf (bsd_style_output
89 ? "%-6.6s %5.1f %7.2f %11.2f %7d"
90 : "%-6.6s %5.1f %7.2f %7.2f %7d", buf,
12516a37
KR
91 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
92 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
93 if (cyc->cg.self_calls != 0)
94 {
95 printf ("+%-7d", cyc->cg.self_calls);
96 }
97 else
98 {
99 printf (" %7.7s", "");
03c35bcb 100 }
869b94c5 101 printf (" <cycle %d as a whole> [%d]\n", cyc->cg.cyc.num, cyc->cg.index);
03c35bcb 102}
5489fcc3
KR
103
104
105/*
106 * Compare LEFT and RIGHT membmer. Major comparison key is
107 * CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.
108 */
109static int
12516a37 110DEFUN (cmp_member, (left, right), Sym * left AND Sym * right)
5489fcc3 111{
12516a37
KR
112 double left_time = left->cg.prop.self + left->cg.prop.child;
113 double right_time = right->cg.prop.self + right->cg.prop.child;
114 long left_calls = left->ncalls + left->cg.self_calls;
115 long right_calls = right->ncalls + right->cg.self_calls;
116
117 if (left_time > right_time)
118 {
119 return GREATERTHAN;
03c35bcb 120 }
12516a37
KR
121 if (left_time < right_time)
122 {
123 return LESSTHAN;
03c35bcb 124 }
12516a37
KR
125
126 if (left_calls > right_calls)
127 {
128 return GREATERTHAN;
03c35bcb 129 }
12516a37
KR
130 if (left_calls < right_calls)
131 {
132 return LESSTHAN;
03c35bcb 133 }
12516a37 134 return EQUALTO;
03c35bcb 135}
5489fcc3
KR
136
137
138/*
139 * Sort members of a cycle.
140 */
141static void
12516a37 142DEFUN (sort_members, (cyc), Sym * cyc)
5489fcc3 143{
12516a37
KR
144 Sym *todo, *doing, *prev;
145 /*
146 * Detach cycle members from cyclehead, and insertion sort them
147 * back on.
148 */
149 todo = cyc->cg.cyc.next;
150 cyc->cg.cyc.next = 0;
151 for (doing = todo; doing && doing->cg.cyc.next; doing = todo)
152 {
153 todo = doing->cg.cyc.next;
154 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
155 {
156 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
157 {
158 break;
03c35bcb
KR
159 }
160 }
12516a37
KR
161 doing->cg.cyc.next = prev->cg.cyc.next;
162 prev->cg.cyc.next = doing;
03c35bcb
KR
163 }
164}
5489fcc3
KR
165
166
167/*
168 * Print the members of a cycle.
169 */
170static void
12516a37 171DEFUN (print_members, (cyc), Sym * cyc)
5489fcc3 172{
12516a37
KR
173 Sym *member;
174
175 sort_members (cyc);
176 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
177 {
869b94c5
KR
178 printf (bsd_style_output
179 ? "%6.6s %5.5s %7.2f %11.2f %7d"
180 : "%6.6s %5.5s %7.2f %7.2f %7d",
12516a37
KR
181 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
182 member->ncalls);
183 if (member->cg.self_calls != 0)
184 {
185 printf ("+%-7d", member->cg.self_calls);
186 }
187 else
188 {
189 printf (" %7.7s", "");
03c35bcb 190 }
12516a37
KR
191 printf (" ");
192 print_name (member);
193 printf ("\n");
03c35bcb
KR
194 }
195}
5489fcc3
KR
196
197
198/*
199 * Compare two arcs to/from the same child/parent.
12516a37
KR
200 * - if one arc is a self arc, it's least.
201 * - if one arc is within a cycle, it's less than.
202 * - if both arcs are within a cycle, compare arc counts.
203 * - if neither arc is within a cycle, compare with
204 * time + child_time as major key
205 * arc count as minor key
5489fcc3
KR
206 */
207static int
12516a37 208DEFUN (cmp_arc, (left, right), Arc * left AND Arc * right)
5489fcc3 209{
12516a37
KR
210 Sym *left_parent = left->parent;
211 Sym *left_child = left->child;
212 Sym *right_parent = right->parent;
213 Sym *right_child = right->child;
214 double left_time, right_time;
215
216 DBG (TIMEDEBUG,
217 printf ("[cmp_arc] ");
218 print_name (left_parent);
219 printf (" calls ");
220 print_name (left_child);
221 printf (" %f + %f %d/%d\n", left->time, left->child_time,
5489fcc3 222 left->count, left_child->ncalls);
12516a37
KR
223 printf ("[cmp_arc] ");
224 print_name (right_parent);
225 printf (" calls ");
226 print_name (right_child);
227 printf (" %f + %f %d/%d\n", right->time, right->child_time,
5489fcc3 228 right->count, right_child->ncalls);
12516a37
KR
229 printf ("\n");
230 );
231 if (left_parent == left_child)
232 {
233 return LESSTHAN; /* left is a self call */
03c35bcb 234 }
12516a37
KR
235 if (right_parent == right_child)
236 {
237 return GREATERTHAN; /* right is a self call */
03c35bcb 238 }
12516a37
KR
239
240 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
241 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
242 {
243 /* left is a call within a cycle */
244 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
245 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
246 {
247 /* right is a call within the cycle, too */
248 if (left->count < right->count)
249 {
250 return LESSTHAN;
03c35bcb 251 }
12516a37
KR
252 if (left->count > right->count)
253 {
254 return GREATERTHAN;
03c35bcb 255 }
12516a37
KR
256 return EQUALTO;
257 }
258 else
5489fcc3 259 {
12516a37
KR
260 /* right isn't a call within the cycle */
261 return LESSTHAN;
03c35bcb 262 }
12516a37
KR
263 }
264 else
265 {
266 /* left isn't a call within a cycle */
267 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
268 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
5489fcc3 269 {
12516a37
KR
270 /* right is a call within a cycle */
271 return GREATERTHAN;
272 }
273 else
274 {
275 /* neither is a call within a cycle */
276 left_time = left->time + left->child_time;
277 right_time = right->time + right->child_time;
278 if (left_time < right_time)
279 {
280 return LESSTHAN;
03c35bcb 281 }
12516a37
KR
282 if (left_time > right_time)
283 {
284 return GREATERTHAN;
03c35bcb 285 }
12516a37
KR
286 if (left->count < right->count)
287 {
288 return LESSTHAN;
03c35bcb 289 }
12516a37
KR
290 if (left->count > right->count)
291 {
292 return GREATERTHAN;
03c35bcb 293 }
12516a37 294 return EQUALTO;
03c35bcb
KR
295 }
296 }
297}
5489fcc3
KR
298
299
300static void
12516a37 301DEFUN (sort_parents, (child), Sym * child)
5489fcc3 302{
12516a37
KR
303 Arc *arc, *detached, sorted, *prev;
304
305 /*
306 * Unlink parents from child, then insertion sort back on to
307 * sorted's parents.
308 * *arc the arc you have detached and are inserting.
309 * *detached the rest of the arcs to be sorted.
310 * sorted arc list onto which you insertion sort.
311 * *prev arc before the arc you are comparing.
312 */
313 sorted.next_parent = 0;
314 for (arc = child->cg.parents; arc; arc = detached)
315 {
316 detached = arc->next_parent;
317
318 /* consider *arc as disconnected; insert it into sorted: */
319 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
320 {
321 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
322 {
323 break;
03c35bcb
KR
324 }
325 }
12516a37
KR
326 arc->next_parent = prev->next_parent;
327 prev->next_parent = arc;
03c35bcb 328 }
12516a37
KR
329
330 /* reattach sorted arcs to child: */
331 child->cg.parents = sorted.next_parent;
03c35bcb 332}
5489fcc3
KR
333
334
335static void
12516a37 336DEFUN (print_parents, (child), Sym * child)
5489fcc3 337{
12516a37
KR
338 Sym *parent;
339 Arc *arc;
340 Sym *cycle_head;
341
342 if (child->cg.cyc.head != 0)
343 {
344 cycle_head = child->cg.cyc.head;
345 }
346 else
347 {
348 cycle_head = child;
03c35bcb 349 }
12516a37
KR
350 if (!child->cg.parents)
351 {
352 printf (bsd_style_output
353 ? "%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n"
354 : "%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n",
355 "", "", "", "", "", "");
356 return;
03c35bcb 357 }
12516a37
KR
358 sort_parents (child);
359 for (arc = child->cg.parents; arc; arc = arc->next_parent)
360 {
361 parent = arc->parent;
362 if (child == parent || (child->cg.cyc.num != 0
363 && parent->cg.cyc.num == child->cg.cyc.num))
5489fcc3 364 {
12516a37
KR
365 /* selfcall or call among siblings: */
366 printf (bsd_style_output
367 ? "%6.6s %5.5s %7.7s %11.11s %7d %7.7s "
368 : "%6.6s %5.5s %7.7s %7.7s %7d %7.7s ",
369 "", "", "", "",
370 arc->count, "");
371 print_name (parent);
372 printf ("\n");
373 }
374 else
375 {
376 /* regular parent of child: */
377 printf (bsd_style_output
378 ? "%6.6s %5.5s %7.2f %11.2f %7d/%-7d "
379 : "%6.6s %5.5s %7.2f %7.2f %7d/%-7d ",
380 "", "",
381 arc->time / hz, arc->child_time / hz,
382 arc->count, cycle_head->ncalls);
383 print_name (parent);
384 printf ("\n");
03c35bcb
KR
385 }
386 }
387}
5489fcc3
KR
388
389
390static void
12516a37 391DEFUN (sort_children, (parent), Sym * parent)
5489fcc3 392{
12516a37
KR
393 Arc *arc, *detached, sorted, *prev;
394 /*
395 * Unlink children from parent, then insertion sort back on to
396 * sorted's children.
397 * *arc the arc you have detached and are inserting.
398 * *detached the rest of the arcs to be sorted.
399 * sorted arc list onto which you insertion sort.
400 * *prev arc before the arc you are comparing.
401 */
402 sorted.next_child = 0;
403 for (arc = parent->cg.children; arc; arc = detached)
404 {
405 detached = arc->next_child;
406
407 /* consider *arc as disconnected; insert it into sorted: */
408 for (prev = &sorted; prev->next_child; prev = prev->next_child)
409 {
410 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
411 {
412 break;
03c35bcb
KR
413 }
414 }
12516a37
KR
415 arc->next_child = prev->next_child;
416 prev->next_child = arc;
03c35bcb 417 }
12516a37
KR
418
419 /* reattach sorted children to parent: */
420 parent->cg.children = sorted.next_child;
03c35bcb 421}
5489fcc3
KR
422
423
424static void
12516a37 425DEFUN (print_children, (parent), Sym * parent)
5489fcc3 426{
12516a37
KR
427 Sym *child;
428 Arc *arc;
429
430 sort_children (parent);
431 arc = parent->cg.children;
432 for (arc = parent->cg.children; arc; arc = arc->next_child)
433 {
434 child = arc->child;
435 if (child == parent || (child->cg.cyc.num != 0
436 && child->cg.cyc.num == parent->cg.cyc.num))
437 {
438 /* self call or call to sibling: */
439 printf (bsd_style_output
440 ? "%6.6s %5.5s %7.7s %11.11s %7d %7.7s "
441 : "%6.6s %5.5s %7.7s %7.7s %7d %7.7s ",
442 "", "", "", "", arc->count, "");
443 print_name (child);
444 printf ("\n");
445 }
446 else
5489fcc3 447 {
12516a37
KR
448 /* regular child of parent: */
449 printf (bsd_style_output
450 ? "%6.6s %5.5s %7.2f %11.2f %7d/%-7d "
451 : "%6.6s %5.5s %7.2f %7.2f %7d/%-7d ",
452 "", "",
453 arc->time / hz, arc->child_time / hz,
454 arc->count, child->cg.cyc.head->ncalls);
455 print_name (child);
456 printf ("\n");
03c35bcb
KR
457 }
458 }
459}
5489fcc3
KR
460
461
462static void
12516a37 463DEFUN (print_line, (np), Sym * np)
5489fcc3 464{
12516a37
KR
465 char buf[BUFSIZ];
466
467 sprintf (buf, "[%d]", np->cg.index);
468 printf (bsd_style_output
469 ? "%-6.6s %5.1f %7.2f %11.2f"
470 : "%-6.6s %5.1f %7.2f %7.2f", buf,
471 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
472 np->cg.prop.self / hz, np->cg.prop.child / hz);
473 if ((np->ncalls + np->cg.self_calls) != 0)
474 {
475 printf (" %7d", np->ncalls);
476 if (np->cg.self_calls != 0)
477 {
478 printf ("+%-7d ", np->cg.self_calls);
479 }
480 else
481 {
482 printf (" %7.7s ", "");
03c35bcb 483 }
12516a37
KR
484 }
485 else
486 {
487 printf (" %7.7s %7.7s ", "", "");
03c35bcb 488 }
12516a37
KR
489 print_name (np);
490 printf ("\n");
03c35bcb 491}
5489fcc3
KR
492
493
494/*
495 * Print dynamic call graph.
496 */
497void
12516a37 498DEFUN (cg_print, (timesortsym), Sym ** timesortsym)
5489fcc3 499{
6b84886a 500 unsigned int index;
12516a37 501 Sym *parent;
5489fcc3 502
12516a37
KR
503 if (print_descriptions && bsd_style_output)
504 {
505 bsd_callg_blurb (stdout);
03c35bcb 506 }
5489fcc3 507
12516a37 508 print_header ();
5489fcc3 509
12516a37
KR
510 for (index = 0; index < symtab.len + num_cycles; ++index)
511 {
512 parent = timesortsym[index];
513 if ((ignore_zeros && parent->ncalls == 0
514 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
515 && parent->cg.prop.child == 0)
7862d7d0
ILT
516 || !parent->cg.print_flag
517 || (line_granularity && ! parent->is_func))
12516a37
KR
518 {
519 continue;
03c35bcb 520 }
12516a37 521 if (!parent->name && parent->cg.cyc.num != 0)
5489fcc3 522 {
12516a37
KR
523 /* cycle header: */
524 print_cycle (parent);
525 print_members (parent);
526 }
527 else
528 {
529 print_parents (parent);
530 print_line (parent);
531 print_children (parent);
03c35bcb 532 }
12516a37
KR
533 if (bsd_style_output)
534 printf ("\n");
535 printf ("-----------------------------------------------\n");
536 if (bsd_style_output)
537 printf ("\n");
5489fcc3 538 }
12516a37
KR
539 free (timesortsym);
540 if (print_descriptions && !bsd_style_output)
541 {
542 fsf_callg_blurb (stdout);
5489fcc3 543 }
03c35bcb 544}
5489fcc3
KR
545
546
547static int
12516a37 548DEFUN (cmp_name, (left, right), const PTR left AND const PTR right)
5489fcc3 549{
12516a37
KR
550 const Sym **npp1 = (const Sym **) left;
551 const Sym **npp2 = (const Sym **) right;
5489fcc3 552
12516a37 553 return strcmp ((*npp1)->name, (*npp2)->name);
03c35bcb 554}
5489fcc3
KR
555
556
557void
12516a37 558DEFUN_VOID (cg_print_index)
5489fcc3 559{
6b84886a
ILT
560 unsigned int index;
561 unsigned int nnames, todo, i, j;
562 int col, starting_col;
12516a37
KR
563 Sym **name_sorted_syms, *sym;
564 const char *filename;
565 char buf[20];
566 int column_width = (output_width - 1) / 3; /* don't write in last col! */
567 /*
568 * Now, sort regular function name alphabetically to create an
569 * index:
570 */
571 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
572 for (index = 0, nnames = 0; index < symtab.len; index++)
573 {
574 if (ignore_zeros && symtab.base[index].ncalls == 0
575 && symtab.base[index].hist.time == 0)
576 {
577 continue;
03c35bcb 578 }
12516a37 579 name_sorted_syms[nnames++] = &symtab.base[index];
03c35bcb 580 }
12516a37
KR
581 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
582 for (index = 1, todo = nnames; index <= num_cycles; index++)
583 {
584 name_sorted_syms[todo++] = &cycle_header[index];
03c35bcb 585 }
12516a37
KR
586 printf ("\f\nIndex by function name\n\n");
587 index = (todo + 2) / 3;
588 for (i = 0; i < index; i++)
589 {
590 col = 0;
591 starting_col = 0;
592 for (j = i; j < todo; j += index)
5489fcc3 593 {
12516a37
KR
594 sym = name_sorted_syms[j];
595 if (sym->cg.print_flag)
596 {
597 sprintf (buf, "[%d]", sym->cg.index);
598 }
599 else
600 {
601 sprintf (buf, "(%d)", sym->cg.index);
03c35bcb 602 }
12516a37
KR
603 if (j < nnames)
604 {
605 if (bsd_style_output)
606 {
607 printf ("%6.6s %-19.19s", buf, sym->name);
608 }
609 else
610 {
611 col += strlen (buf);
612 for (; col < starting_col + 5; ++col)
613 {
614 putchar (' ');
03c35bcb 615 }
12516a37
KR
616 printf (" %s ", buf);
617 col += print_name_only (sym);
618 if (!line_granularity && sym->is_static && sym->file)
619 {
620 filename = sym->file->name;
621 if (!print_path)
622 {
623 filename = strrchr (filename, '/');
624 if (filename)
625 {
626 ++filename;
627 }
628 else
629 {
630 filename = sym->file->name;
03c35bcb
KR
631 }
632 }
12516a37
KR
633 printf (" (%s)", filename);
634 col += strlen (filename) + 3;
03c35bcb
KR
635 }
636 }
12516a37
KR
637 }
638 else
639 {
869b94c5
KR
640 if (bsd_style_output)
641 {
642 printf ("%6.6s ", buf);
643 sprintf (buf, "<cycle %d>", sym->cg.cyc.num);
644 printf ("%-19.19s", buf);
645 }
646 else
647 {
648 col += strlen (buf);
649 for (; col < starting_col + 5; ++col)
650 putchar (' ');
651 printf (" %s ", buf);
652 sprintf (buf, "<cycle %d>", sym->cg.cyc.num);
653 printf ("%s", buf);
654 col += strlen (buf);
655 }
03c35bcb 656 }
12516a37 657 starting_col += column_width;
03c35bcb 658 }
12516a37 659 printf ("\n");
03c35bcb 660 }
12516a37 661 free (name_sorted_syms);
03c35bcb 662}
64c50fc5
JL
663
664/* Compare two arcs based on their usage counts. We want to sort
665 in descending order. */
666static int
667DEFUN (cmp_arc_count, (left, right), const PTR left AND const PTR right)
668{
669 const Arc **npp1 = (const Arc **) left;
670 const Arc **npp2 = (const Arc **) right;
671
672 if ((*npp1)->count > (*npp2)->count)
673 return -1;
674 else if ((*npp1)->count < (*npp2)->count)
675 return 1;
676 else
677 return 0;
678}
679
680/* Compare two funtions based on their usage counts. We want to sort
681 in descending order. */
682static int
683DEFUN (cmp_fun_nuses, (left, right), const PTR left AND const PTR right)
684{
685 const Sym **npp1 = (const Sym **) left;
686 const Sym **npp2 = (const Sym **) right;
687
688 if ((*npp1)->nuses > (*npp2)->nuses)
689 return -1;
690 else if ((*npp1)->nuses < (*npp2)->nuses)
691 return 1;
692 else
693 return 0;
694}
695
696/* Print a suggested function ordering based on the profiling data.
697
698 We perform 4 major steps when ordering functions:
699
700 * Group unused functions together and place them at the
701 end of the function order.
702
703 * Search the highest use arcs (those which account for 90% of
704 the total arc count) for functions which have several parents.
705
706 Group those with the most call sites together (currently the
707 top 1.25% which have at least five different call sites).
708
709 These are emitted at the start of the function order.
710
711 * Use a greedy placement algorithm to place functions which
712 occur in the top 99% of the arcs in the profile. Some provisions
713 are made to handle high usage arcs where the parent and/or
714 child has already been placed.
715
716 * Run the same greedy placement algorithm on the remaining
717 arcs to place the leftover functions.
718
719
720 The various "magic numbers" should (one day) be tuneable by command
721 line options. They were arrived at by benchmarking a few applications
722 with various values to see which values produced better overall function
723 orderings.
724
725 Of course, profiling errors, machine limitations (PA long calls), and
726 poor cutoff values for the placement algorithm may limit the usefullness
727 of the resulting function order. Improvements would be greatly appreciated.
728
729 Suggestions:
730
731 * Place the functions with many callers near the middle of the
732 list to reduce long calls.
733
734 * Propagate arc usage changes as functions are placed. Ie if
735 func1 and func2 are placed together, arcs to/from those arcs
736 to the same parent/child should be combined, then resort the
737 arcs to choose the next one.
738
739 * Implement some global positioning algorithm to place the
740 chains made by the greedy local positioning algorithm. Probably
741 by examining arcs which haven't been placed yet to tie two
742 chains together.
743
744 * Take a function's size and time into account in the algorithm;
745 size in particular is important on the PA (long calls). Placing
746 many small functions onto their own page may be wise.
747
748 * Use better profiling information; many published algorithms
749 are based on call sequences through time, rather than just
750 arc counts.
751
752 * Prodecure cloning could improve performance when a small number
753 of arcs account for most of the calls to a particular function.
754
755 * Use relocation information to avoid moving unused functions
756 completely out of the code stream; this would avoid severe lossage
757 when the profile data bears little resemblance to actual runs.
758
759 * Propagation of arc usages should also improve .o link line
760 ordering which shares the same arc placement algorithm with
761 the function ordering code (in fact it is a degenerate case
762 of function ordering). */
763
764void
765DEFUN_VOID (cg_print_function_ordering)
766{
767 unsigned long index, used, unused, scratch_index;
768 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
7a542ed9 769#ifdef __GNUC__
64c50fc5
JL
770 unsigned long long total_arcs, tmp_arcs_count;
771#else
772 unsigned long total_arcs, tmp_arcs_count;
773#endif
774 Sym **unused_syms, **used_syms, **scratch_syms;
775 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
776
777 index = 0;
778 used = 0;
779 unused = 0;
780 scratch_index = 0;
781 unplaced_arc_count = 0;
782 high_arc_count = 0;
783 scratch_arc_count = 0;
784
785 /* First group all the unused functions together. */
786 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
787 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
788 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
789 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
790 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
791 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
792
793 /* Walk through all the functions; mark those which are never
794 called as placed (we'll emit them as a group later). */
795 for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
796 {
797 if (symtab.base[index].ncalls == 0)
798 {
799 /* Filter out gprof generated names. */
800 if (strcmp (symtab.base[index].name, "<locore>")
801 && strcmp (symtab.base[index].name, "<hicore>"))
802 {
803 unused_syms[unused++] = &symtab.base[index];
804 symtab.base[index].has_been_placed = 1;
805 }
806 }
807 else
808 {
809 used_syms[used++] = &symtab.base[index];
810 symtab.base[index].has_been_placed = 0;
811 symtab.base[index].next = 0;
812 symtab.base[index].prev = 0;
813 symtab.base[index].nuses = 0;
814 }
815 }
816
817 /* Sort the arcs from most used to least used. */
818 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
819
820 /* Compute the total arc count. Also mark arcs as unplaced.
821
822 Note we don't compensate for overflow if that happens!
823 Overflow is much less likely when this file is compiled
824 with GCC as it can double-wide integers via long long. */
825 total_arcs = 0;
826 for (index = 0; index < numarcs; index++)
827 {
828 total_arcs += arcs[index]->count;
829 arcs[index]->has_been_placed = 0;
830 }
831
832 /* We want to pull out those functions which are referenced
833 by many highly used arcs and emit them as a group. This
834 could probably use some tuning. */
835 tmp_arcs_count = 0;
836 for (index = 0; index < numarcs; index++)
837 {
838 tmp_arcs_count += arcs[index]->count;
839
840 /* Count how many times each parent and child are used up
841 to our threshhold of arcs (90%). */
842 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
843 break;
844
845 arcs[index]->child->nuses++;
846 }
847
848 /* Now sort a temporary symbol table based on the number of
849 times each function was used in the highest used arcs. */
df928c8f 850 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
64c50fc5
JL
851 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
852
853 /* Now pick out those symbols we're going to emit as
854 a group. We take up to 1.25% of the used symbols. */
855 for (index = 0; index < used / 80; index++)
856 {
857 Sym *sym = scratch_syms[index];
858 Arc *arc;
859
860 /* If we hit symbols that aren't used from many call sites,
861 then we can quit. We choose five as the low limit for
862 no particular reason. */
863 if (sym->nuses == 5)
864 break;
865
866 /* We're going to need the arcs between these functions.
867 Unfortunately, we don't know all these functions
868 until we're done. So we keep track of all the arcs
869 to the functions we care about, then prune out those
870 which are uninteresting.
871
872 An interesting variation would be to quit when we found
873 multi-call site functions which account for some percentage
874 of the arcs. */
875
876 arc = sym->cg.children;
877 while (arc)
878 {
879 if (arc->parent != arc->child)
880 scratch_arcs[scratch_arc_count++] = arc;
881 arc->has_been_placed = 1;
882 arc = arc->next_child;
883 }
884
885 arc = sym->cg.parents;
886 while (arc)
887 {
888 if (arc->parent != arc->child)
889 scratch_arcs[scratch_arc_count++] = arc;
890 arc->has_been_placed = 1;
891 arc = arc->next_parent;
892 }
893
894 /* Keep track of how many symbols we're going to place. */
895 scratch_index = index;
896
897 /* A lie, but it makes identifying these functions easier
898 later. */
899 sym->has_been_placed = 1;
900 }
901
902 /* Now walk through the temporary arcs and copy those we care about
903 into the high arcs array. */
904 for (index = 0; index < scratch_arc_count; index++)
905 {
906 Arc *arc = scratch_arcs[index];
907
908 /* If this arc refers to highly used functions, then
909 then we want to keep it. */
910 if (arc->child->has_been_placed
911 && arc->parent->has_been_placed)
912 {
913 high_arcs[high_arc_count++] = scratch_arcs[index];
914
915 /* We need to turn of has_been_placed since we're going to
916 use the main arc placement algorithm on these arcs. */
917 arc->child->has_been_placed = 0;
918 arc->parent->has_been_placed = 0;
919 }
920 }
921
922 /* Dump the multi-site high usage functions which are not going
923 to be ordered by the main ordering algorithm. */
924 for (index = 0; index < scratch_index; index++)
925 {
926 if (scratch_syms[index]->has_been_placed)
927 printf ("%s\n", scratch_syms[index]->name);
928 }
929
930 /* Now we can order the multi-site high use functions based on the
931 arcs between them. */
932 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
933 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
934 unplaced_arcs, &unplaced_arc_count);
935
936 /* Order and dump the high use functions left, these typically
937 have only a few call sites. */
938 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
939 unplaced_arcs, &unplaced_arc_count);
940
941 /* Now place the rarely used functions. */
942 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
943 scratch_arcs, &scratch_arc_count);
944
945 /* Output any functions not emitted by the order_and_dump calls. */
946 for (index = 0; index < used; index++)
947 if (used_syms[index]->has_been_placed == 0)
948 printf("%s\n", used_syms[index]->name);
949
950 /* Output the unused functions. */
951 for (index = 0; index < unused; index++)
952 printf("%s\n", unused_syms[index]->name);
953
954 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
955 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
956 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
957 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
958 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
959 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
960
961 free (unused_syms);
962 free (used_syms);
963 free (scratch_syms);
964 free (high_arcs);
965 free (scratch_arcs);
966 free (unplaced_arcs);
967}
968
969/* Place functions based on the arcs in ARCS with NUMARCS entries;
970 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
971
972 If ALL is nonzero, then place all functions referenced by ARCS,
973 else only place those referenced in the top 99% of the arcs in ARCS. */
974
975#define MOST 0.99
976static void
977order_and_dump_functions_by_arcs (arcs, numarcs, all,
978 unplaced_arcs, unplaced_arc_count)
979 Arc **arcs;
980 unsigned long numarcs;
981 int all;
982 Arc **unplaced_arcs;
983 unsigned long *unplaced_arc_count;
984{
7a542ed9 985#ifdef __GNUC__
64c50fc5
JL
986 unsigned long long tmp_arcs, total_arcs;
987#else
988 unsigned long tmp_arcs, total_arcs;
989#endif
990 unsigned int index;
991
992 /* If needed, compute the total arc count.
993
994 Note we don't compensate for overflow if that happens! */
995 if (! all)
996 {
997 total_arcs = 0;
998 for (index = 0; index < numarcs; index++)
999 total_arcs += arcs[index]->count;
1000 }
1001 else
1002 total_arcs = 0;
1003
1004 tmp_arcs = 0;
1005 for (index = 0; index < numarcs; index++)
1006 {
1007 Sym *sym1, *sym2;
1008 Sym *child, *parent;
1009
1010 tmp_arcs += arcs[index]->count;
1011
1012 /* Ignore this arc if it's already been placed. */
1013 if (arcs[index]->has_been_placed)
1014 continue;
1015
1016 child = arcs[index]->child;
1017 parent = arcs[index]->parent;
1018
1019 /* If we're not using all arcs, and this is a rarely used
1020 arc, then put it on the unplaced_arc list. Similarly
1021 if both the parent and child of this arc have been placed. */
1022 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1023 || child->has_been_placed || parent->has_been_placed)
1024 {
1025 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1026 continue;
1027 }
1028
1029 /* If all slots in the parent and child are full, then there isn't
1030 anything we can do right now. We'll place this arc on the
1031 unplaced arc list in the hope that a global positioning
1032 algorithm can use it to place function chains. */
1033 if (parent->next && parent->prev && child->next && child->prev)
1034 {
1035 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1036 continue;
1037 }
1038
1039 /* If the parent is unattached, then find the closest
1040 place to attach it onto child's chain. Similarly
1041 for the opposite case. */
1042 if (!parent->next && !parent->prev)
1043 {
1044 int next_count = 0;
1045 int prev_count = 0;
1046 Sym *prev = child;
1047 Sym *next = child;
1048
1049 /* Walk to the beginning and end of the child's chain. */
1050 while (next->next)
1051 {
1052 next = next->next;
1053 next_count++;
1054 }
1055
1056 while (prev->prev)
1057 {
1058 prev = prev->prev;
1059 prev_count++;
1060 }
1061
1062 /* Choose the closest. */
1063 child = next_count < prev_count ? next : prev;
1064 }
1065 else if (! child->next && !child->prev)
1066 {
1067 int next_count = 0;
1068 int prev_count = 0;
1069 Sym *prev = parent;
1070 Sym *next = parent;
1071
1072 while (next->next)
1073 {
1074 next = next->next;
1075 next_count++;
1076 }
1077
1078 while (prev->prev)
1079 {
1080 prev = prev->prev;
1081 prev_count++;
1082 }
1083
1084 parent = prev_count < next_count ? prev : next;
1085 }
1086 else
1087 {
1088 /* Couldn't find anywhere to attach the functions,
1089 put the arc on the unplaced arc list. */
1090 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1091 continue;
1092 }
1093
1094 /* Make sure we don't tie two ends together. */
1095 sym1 = parent;
1096 if (sym1->next)
1097 while (sym1->next)
1098 sym1 = sym1->next;
1099 else
1100 while (sym1->prev)
1101 sym1 = sym1->prev;
1102
1103 sym2 = child;
1104 if (sym2->next)
1105 while (sym2->next)
1106 sym2 = sym2->next;
1107 else
1108 while (sym2->prev)
1109 sym2 = sym2->prev;
1110
1111 if (sym1 == child
1112 && sym2 == parent)
1113 {
1114 /* This would tie two ends together. */
1115 unplaced_arcs[(*unplaced_arc_count)++] = arcs[index];
1116 continue;
1117 }
1118
1119 if (parent->next)
1120 {
1121 /* Must attach to the parent's prev field. */
1122 if (! child->next)
1123 {
1124 /* parent-prev and child-next */
1125 parent->prev = child;
1126 child->next = parent;
1127 arcs[index]->has_been_placed = 1;
1128 }
1129 }
1130 else if (parent->prev)
1131 {
1132 /* Must attach to the parent's next field. */
1133 if (! child->prev)
1134 {
1135 /* parent-next and child-prev */
1136 parent->next = child;
1137 child->prev = parent;
1138 arcs[index]->has_been_placed = 1;
1139 }
1140 }
1141 else
1142 {
1143 /* Can attach to either field in the parent, depends
1144 on where we've got space in the child. */
1145 if (child->prev)
1146 {
1147 /* parent-prev and child-next */
1148 parent->prev = child;
1149 child->next = parent;
1150 arcs[index]->has_been_placed = 1;
1151 }
1152 else
1153 {
1154 /* parent-next and child-prev */
1155 parent->next = child;
1156 child->prev = parent;
1157 arcs[index]->has_been_placed = 1;
1158 }
1159 }
1160 }
1161
1162 /* Dump the chains of functions we've made. */
1163 for (index = 0; index < numarcs; index++)
1164 {
1165 Sym *sym;
1166 if (arcs[index]->parent->has_been_placed
1167 || arcs[index]->child->has_been_placed)
1168 continue;
1169
1170 sym = arcs[index]->parent;
1171
1172 /* If this symbol isn't attached to any other
1173 symbols, then we've got a rarely used arc.
1174
1175 Skip it for now, we'll deal with them later. */
1176 if (sym->next == NULL
1177 && sym->prev == NULL)
1178 continue;
1179
1180 /* Get to the start of this chain. */
1181 while (sym->prev)
1182 sym = sym->prev;
1183
1184 while (sym)
1185 {
1186 /* Mark it as placed. */
1187 sym->has_been_placed = 1;
1188 printf ("%s\n", sym->name);
1189 sym = sym->next;
1190 }
1191 }
1192
1193 /* If we want to place all the arcs, then output those which weren't
1194 placed by the main algorithm. */
1195 if (all)
1196 for (index = 0; index < numarcs; index++)
1197 {
1198 Sym *sym;
1199 if (arcs[index]->parent->has_been_placed
1200 || arcs[index]->child->has_been_placed)
1201 continue;
1202
1203 sym = arcs[index]->parent;
1204
1205 sym->has_been_placed = 1;
1206 printf ("%s\n", sym->name);
1207 }
1208}
1209
1210/* Print a suggested .o ordering for files on a link line based
1211 on profiling information. This uses the function placement
1212 code for the bulk of its work. */
1213
1214struct function_map {
1215 char *function_name;
1216 char *file_name;
1217};
1218
1219void
1220DEFUN_VOID (cg_print_file_ordering)
1221{
1222 unsigned long scratch_arc_count, index;
1223 Arc **scratch_arcs;
1224 extern struct function_map *symbol_map;
6b84886a 1225 extern unsigned int symbol_map_count;
64c50fc5
JL
1226 char *last;
1227
1228 scratch_arc_count = 0;
1229
1230 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1231 for (index = 0; index < numarcs; index++)
1232 {
1233 if (! arcs[index]->parent->mapped
1234 || ! arcs[index]->child->mapped)
1235 arcs[index]->has_been_placed = 1;
1236 }
1237
1238 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1239 scratch_arcs, &scratch_arc_count);
1240
1241 /* Output .o's not handled by the main placement algorithm. */
1242 for (index = 0; index < symtab.len; index++)
1243 {
1244 if (symtab.base[index].mapped
1245 && ! symtab.base[index].has_been_placed)
1246 printf ("%s\n", symtab.base[index].name);
1247 }
1248
1249 /* Now output any .o's that didn't have any text symbols. */
1250 last = NULL;
1251 for (index = 0; index < symbol_map_count; index++)
1252 {
6b84886a 1253 unsigned int index2;
64c50fc5
JL
1254
1255 /* Don't bother searching if this symbol is the
1256 same as the previous one. */
1257 if (last && !strcmp (last, symbol_map[index].file_name))
1258 continue;
1259
1260 for (index2 = 0; index2 < symtab.len; index2++)
1261 {
1262 if (! symtab.base[index2].mapped)
1263 continue;
1264
1265 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1266 break;
1267 }
1268
1269 /* If we didn't find it in the symbol table, then it must be a .o
1270 with no text symbols. Output it last. */
1271 if (index2 == symtab.len)
1272 printf ("%s\n", symbol_map[index].file_name);
1273 last = symbol_map[index].file_name;
1274 }
1275}
This page took 0.15643 seconds and 4 git commands to generate.