1 /* CTF type deduplication.
2 Copyright (C) 2019 Free Software Foundation, Inc.
4 This file is part of libctf.
6 libctf is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; see the file COPYING. If not see
18 <http://www.gnu.org/licenses/>. */
26 /* (In the below, relevant functions are named in square brackets.) */
28 /* Type deduplication is a three-phase process:
30 [ctf_dedup, ctf_dedup_hash_type, ctf_dedup_rhash_type]
31 1) come up with unambiguous hash values for all types: no two types may have
32 the same hash value, and any given type should have only one hash value
33 (for optimal deduplication).
35 [ctf_dedup, ctf_dedup_detect_name_ambiguity,
36 ctf_dedup_conflictify_unshared, ctf_dedup_mark_conflicting_hash]
37 2) mark those distinct types with names that collide (and thus cannot be
38 declared simultaneously in the same translation unit) as conflicting, and
39 recursively mark all types that cite one of those types as conflicting as
40 well. Possibly mark all types cited in only one TU as conflicting, if
41 the CTF_LINK_SHARE_DUPLICATED link mode is active.
43 [ctf_dedup_emit, ctf_dedup_emit_struct_members, ctf_dedup_id_to_target]
44 3) emit all the types, one hash value at a time. Types not marked
45 conflicting are emitted once, into the shared dictionary: types marked
46 conflicting are emitted once per TU into a dictionary corresponding to
47 each TU in which they appear. Structs marked conflicting get at the very
48 least a forward emitted into the shared dict so that other dicts can cite
52 This all works over an array of inputs (usually in the same order as the
53 inputs on the link line). We don't use the ctf_link_inputs hash directly
54 because it is convenient to be able to address specific input types as a
55 *global type ID* or 'GID', a pair of an array offset and a ctf_id_t. Since
56 both are already 32 bits or less or can easily be constrained to that range,
57 we can pack them both into a single 64-bit hash word for easy lookups, which
58 would be much more annoying to do with a ctf_file_t * and a ctf_id_t. (On
59 32-bit platforms, we must do that anyway, since pointers, and thus hash keys
60 and values, are only 32 bits wide). We track which inputs are parents of
61 which other inputs so that we can correctly recognize that types we have
62 traversed in children may cite types in parents, and so that we can process
65 Note that thanks to ld -r, the deduplicator can be fed its own output, so the
66 inputs may themselves have child dicts. Since we need to support this usage
67 anyway, we can use it in one other place. If the caller finds translation
68 units to be too small a unit ambiguous types, links can be 'cu-mapped', where
69 the caller provides a mapping of input TU names to output child dict names.
70 This mapping can fuse many child TUs into one potential child dict, so that
71 ambiguous types in any of those input TUs go into the same child dict.
72 When a many:1 cu-mapping is detected, the ctf_dedup machinery is called
73 repeatedly, once for every output name that has more than one input, to fuse
74 all the input TUs associated with a given output dict into one, and once again
75 as normal to deduplicate all those intermediate outputs (and any 1:1 inputs)
76 together. This has much higher memory usage than otherwise, because in the
77 intermediate state, all the output TUs are in memory at once and cannot be
78 lazily opened. It also has implications for the emission code: if types
79 appear ambiguously in multiple input TUs that are all mapped to the same
80 child dict, we cannot put them in children in the cu-mapping link phase
81 because this output is meant to *become* a child in the next link stage and
82 parent/child relationships are only one level deep: so instead, we just hide
83 all but one of the ambiguous types.
85 There are a few other subtleties here that make this more complex than it
86 seems. Let's go over the steps above in more detail.
90 [ctf_dedup_hash_type, ctf_dedup_rhash_type]
91 Hashing proceeds recursively, mixing in the properties of each input type
92 (including its name, if any), and then adding the hash values of every type
93 cited by that type. The result is stashed in the cd_type_hashes so other
94 phases can find the hash values of input types given their IDs, and so that
95 if we encounter this type again while hashing we can just return its hash
96 value: it is also stashed in the *output mapping*, a mapping from hash value
97 to the set of GIDs corresponding to that type in all inputs. We also keep
98 track of the GID of the first appearance of the type in any input (in
99 cd_output_first_gid), and the GID of structs, unions, and forwards that only
100 appear in one TU (in cd_struct_origin). See below for where these things are
103 Everything in this phase is time-critical, because it is operating over
104 non-deduplicated types and so may have hundreds or thousands of times the
105 data volume to deal with than later phases. Trace output is hidden behind
106 ENABLE_LIBCTF_HASH_DEBUGGING to prevent the sheer number of calls to
107 ctf_dprintf from slowing things down (tenfold slowdowns are observed purely
108 from the calls to ctf_dprintf(), even with debugging switched off), and keep
109 down the volume of output (hundreds of gigabytes of debug output are not
110 uncommon on larger links).
112 We have to do *something* about potential cycles in the type graph. We'd
113 like to avoid emitting forwards in the final output if possible, because
114 forwards aren't much use: they have no members. We are mostly saved from
115 needing to worry about this at emission time by ctf_add_struct*()
116 automatically replacing newly-created forwards when the real struct/union
117 comes along. So we only have to avoid getting stuck in cycles during the
118 hashing phase, while also not confusing types that cite members that are
119 structs with each other. It is easiest to solve this problem by noting two
122 - all cycles in C depend on the presence of tagged structs/unions
123 - all tagged structs/unions have a unique name they can be disambiguated by
126 This means that we can break all cycles by ceasing to hash in cited types at
127 every tagged struct/union and instead hashing in a stub consisting of the
128 struct/union's *decorated name*, which is the name preceded by "s " or "u "
129 depending on the namespace (cached in cd_decorated_names). Forwards are
130 decorated identically (so a forward to "struct foo" would be represented as
131 "s foo"): this means that a citation of a forward to a type and a citation of
132 a concrete definition of a type with the same name ends up getting the same
135 Of course, it is quite possible to have two TUs with structs with the same
136 name and different definitions, but that's OK because when we scan for types
137 with ambiguous names we will identify these and mark them conflicting.
139 We populate one thing to help conflictedness marking. No unconflicted type
140 may cite a conflicted one, but this means that conflictedness marking must
141 walk from types to the types that cite them, which is the opposite of the
142 usual order. We can make this easier to do by constructing a *citers* graph
143 in cd_citers, which points from types to the types that cite them: because we
144 emit forwards corresponding to every conflicted struct/union, we don't need
145 to do this for citations of structs/unions by other types. This is very
146 convenient for us, because that's the only type we don't traverse
147 recursively: so we can construct the citers graph at the same time as we
148 hash, rather than needing to add an extra pass. (This graph is a dynhash of
149 *type hash values*, so it's small: in effect it is automatically
152 2) COLLISIONAL MARKING.
154 [ctf_dedup_detect_name_ambiguity, ctf_dedup_mark_conflicting_hash]
155 We identify types whose names collide during the hashing process, and count
156 the rough number of uses of each name (caching may throw it off a bit: this
157 doesn't need to be accurate). We then mark the less-frequently-cited types
158 with each names conflicting: the most-frequently-cited one goes into the
159 shared type dictionary, while all others are duplicated into per-TU
160 dictionaries, named after the input TU, that have the shared dictionary as a
161 parent. For structures and unions this is not quite good enough: we'd like
162 to have citations of forwards to ambiguously named structures and unions
163 *stay* as citations of forwards, so that the user can tell that the caller
164 didn't actually know which structure definition was meant: but if we put one
165 of those structures into the shared dictionary, it would supplant and replace
166 the forward, leaving no sign. So structures and unions do not take part in
167 this popularity contest: if their names are ambiguous, they are just
168 duplicated, and only a forward appears in the shared dict.
170 [ctf_dedup_propagate_conflictedness]
171 The process of marking types conflicted is itself recursive: we recursively
172 traverse the cd_citers graph populated in the hashing pass above and mark
173 everything that we encounter conflicted (without wasting time re-marking
174 anything that is already marked). This naturally terminates just where we
175 want it to (at types that are cited by no other types, and at structures and
176 unions) and suffices to ensure that types that cite conflicted types are
177 always marked conflicted.
179 [ctf_dedup_conflictify_unshared, ctf_dedup_multiple_input_dicts]
180 When linking in CTF_LINK_SHARE_DUPLICATED mode, we would like all types that
181 are used in only one TU to end up in a per-CU dict. The easiest way to do
182 that is to mark them conflicted. ctf_dedup_conflictify_unshared does this,
183 traversing the output mapping and using ctf_dedup_multiple_input_dicts to
184 check the number of input dicts each distinct type hash value came from:
185 types that only came from one get marked conflicted. One caveat here is that
186 we need to consider both structs and forwards to them: a struct that appears
187 in one TU and has a dozen citations to an opaque forward in other TUs should
188 *not* be considered to be used in only one TU, because users would find it
189 useful to be able to traverse into opaque structures of that sort: so we use
190 cd_struct_origin to check both structs/unions and the forwards corresponding
195 [ctf_dedup_walk_output_mapping, ctf_dedup_rwalk_output_mapping,
196 ctf_dedup_rwalk_one_output_mapping]
197 Emission involves another walk of the entire output mapping, this time
198 traversing everything other than struct members, recursively. Types are
199 emitted from leaves to trunk, emitting all types a type cites before emitting
200 the type itself. We sort the output mapping before traversing it, for
201 reproducibility and also correctness: the input dicts may have parent/child
202 relationships, so we simply sort all types that first appear in parents
203 before all children, then sort types that first appear in dicts appearing
204 earlier on the linker command line before those that appear later, then sort
205 by input ctf_id_t. (This is where we use cd_output_first_gid, collected
208 The walking is done using a recursive traverser which arranges to not revisit
209 any type already visited and to call its callback once per input GID for
210 input GIDs corresponding to conflicted output types. The traverser only
211 finds input types and calls a callback for them as many times as the output
212 needs to appear: it doesn't try to figure out anything about where the output
213 might go. That's done by the callback based on whether the type is
214 marked conflicted or not.
216 [ctf_dedup_emit_type, ctf_dedup_id_to_target, ctf_dedup_synthesize_forward]
217 ctf_dedup_emit_type is the (sole) callback for ctf_dedup_walk_output_mapping.
218 Conflicted types have all necessary dictionaries created, and then we emit
219 the type into each dictionary in turn, working over each input CTF type
220 corresponding to each hash value and using ctf_dedup_id_to_target to map each
221 input ctf_id_t into the corresponding type in the output (dealing with input
222 ctf_id_t's with parents in the process by simply chasing to the parent dict
223 if the type we're looking up is in there). Emitting structures involves
224 simply noting that the members of this structure need emission later on:
225 because you cannot cite a single structure member from another type, we avoid
226 emitting the members at this stage to keep recursion depths down a bit.
228 At this point, if we have by some mischance decided that two different types
229 with child types that hash to different values have in fact got the same hash
230 value themselves and *not* marked it conflicting, the type walk will walk
231 only *one* of them and in all likelihood we'll find that we are trying to
232 emit a type into some child dictionary that references a type that was never
233 emitted into that dictionary and assertion-fail. This always indicates a bug
234 in the conflictedness marking machinery or the hashing code, or both.
236 ctf_dedup_id_to_target calls ctf_dedup_synthesize_forward to do one extra
237 thing, alluded to above: if this is a conflicted tagged structure or union,
238 and the target is the shared dict (i.e., the type we're being asked to emit
239 is not itself conflicted so can't just point straight at the conflicted
240 type), we instead synthesise a forward with the same name, emit it into the
241 shared dict, record it in cd_output_emission_conflicted_forwards so that we
242 don't re-emit it, and return it. This means that cycles that contain
243 conflicts do not cause the entire cycle to be replicated in every child: only
244 that piece of the cycle which takes you back as far as the closest tagged
245 struct/union needs to be replicated. This trick means that no part of the
246 deduplicator needs a cycle detector: every recursive walk can stop at tagged
249 [ctf_dedup_emit_struct_members]
250 The final stage of emission is to walk over all structures with members
251 that need emission and emit all of them. Every type has been emitted at
252 this stage, so emission cannot fail.
254 [ctf_dedup_populate_type_mappings, ctf_dedup_populate_type_mapping]
255 Finally, we update the input -> output type ID mappings used by the ctf-link
256 machinery to update all the other sections. This is surprisingly expensive
257 and may be replaced with a scheme which lets the ctf-link machinery extract
258 the needed info directly from the deduplicator. */
260 /* Possible future optimizations are flagged with 'optimization opportunity'
263 /* Global optimization opportunity: a GC pass, eliminating types with no direct
264 or indirect citations from the other sections in the dictionary. */
266 /* Internal flag values for ctf_dedup_hash_type. */
268 /* Child call: consider forwardable types equivalent to forwards or stubs below
270 #define CTF_DEDUP_HASH_INTERNAL_CHILD 0x01
272 /* Transform references to single ctf_id_ts in passed-in inputs into a number
273 that will fit in a uint64_t. Needs rethinking if CTF_MAX_TYPE is boosted.
275 On 32-bit platforms, we pack things together differently: see the note
278 #if UINTPTR_MAX < UINT64_MAX
279 # define IDS_NEED_ALLOCATION 1
280 # define CTF_DEDUP_GID(fp, input, type) id_to_packed_id (fp, input, type)
281 # define CTF_DEDUP_GID_TO_INPUT(id) packed_id_to_input (id)
282 # define CTF_DEDUP_GID_TO_TYPE(id) packed_id_to_type (id)
284 # define CTF_DEDUP_GID(fp, input, type) \
285 (void *) (((uint64_t) input) << 32 | (type))
286 # define CTF_DEDUP_GID_TO_INPUT(id) ((int) (((uint64_t) id) >> 32))
287 # define CTF_DEDUP_GID_TO_TYPE(id) (ctf_id_t) (((uint64_t) id) & ~(0xffffffff00000000ULL))
290 #ifdef IDS_NEED_ALLOCATION
292 /* This is the 32-bit path, which stores GIDs in a pool and returns a pointer
293 into the pool. It is notably less efficient than the 64-bit direct storage
294 approach, but with a smaller key, this is all we can do. */
297 id_to_packed_id (ctf_file_t
*fp
, int input_num
, ctf_id_t type
)
300 ctf_type_id_key_t
*dynkey
= NULL
;
301 ctf_type_id_key_t key
= { input_num
, type
};
303 if (!ctf_dynhash_lookup_kv (fp
->ctf_dedup
.cd_id_to_file_t
,
304 &key
, &lookup
, NULL
))
306 if ((dynkey
= malloc (sizeof (ctf_type_id_key_t
))) == NULL
)
308 memcpy (dynkey
, &key
, sizeof (ctf_type_id_key_t
));
310 if (ctf_dynhash_insert (fp
->ctf_dedup
.cd_id_to_file_t
, dynkey
, NULL
) < 0)
313 ctf_dynhash_lookup_kv (fp
->ctf_dedup
.cd_id_to_file_t
,
314 dynkey
, &lookup
, NULL
);
316 /* We use a raw assert() here because there isn't really a way to get any sort
317 of error back from this routine without vastly complicating things for the
318 much more common case of !IDS_NEED_ALLOCATION. */
320 return (void *) lookup
;
324 ctf_set_errno (fp
, ENOMEM
);
329 packed_id_to_input (const void *id
)
331 const ctf_type_id_key_t
*key
= (ctf_type_id_key_t
*) id
;
333 return key
->ctii_input_num
;
337 packed_id_to_type (const void *id
)
339 const ctf_type_id_key_t
*key
= (ctf_type_id_key_t
*) id
;
341 return key
->ctii_type
;
345 /* Make an element in a dynhash-of-dynsets, or return it if already present. */
347 static ctf_dynset_t
*
348 make_set_element (ctf_dynhash_t
*set
, const void *key
)
350 ctf_dynset_t
*element
;
352 if ((element
= ctf_dynhash_lookup (set
, key
)) == NULL
)
354 if ((element
= ctf_dynset_create (htab_hash_string
,
355 ctf_dynset_eq_string
,
359 if (ctf_dynhash_insert (set
, (void *) key
, element
) < 0)
361 ctf_dynset_destroy (element
);
369 /* Initialize the dedup atoms table. */
371 ctf_dedup_atoms_init (ctf_file_t
*fp
)
373 if (fp
->ctf_dedup_atoms
)
376 if (!fp
->ctf_dedup_atoms_alloc
)
378 if ((fp
->ctf_dedup_atoms_alloc
379 = ctf_dynset_create (htab_hash_string
, ctf_dynset_eq_string
,
381 return ctf_set_errno (fp
, ENOMEM
);
383 fp
->ctf_dedup_atoms
= fp
->ctf_dedup_atoms_alloc
;
387 /* Intern things in the dedup atoms table. */
390 intern (ctf_file_t
*fp
, char *atom
)
397 if (!ctf_dynset_exists (fp
->ctf_dedup_atoms
, atom
, &foo
))
399 if (ctf_dynset_insert (fp
->ctf_dedup_atoms
, atom
) < 0)
401 ctf_set_errno (fp
, ENOMEM
);
409 return (const char *) foo
;
412 /* Add an indication of the namespace to a type name in a way that is not valid
413 for C identifiers. Used to maintain hashes of type names to other things
414 while allowing for the four C namespaces (normal, struct, union, enum).
415 Return a new dynamically-allocated string. */
417 ctf_decorate_type_name (ctf_file_t
*fp
, const char *name
, int kind
)
419 ctf_dedup_t
*d
= &fp
->ctf_dedup
;
444 if ((ret
= ctf_dynhash_lookup (d
->cd_decorated_names
[i
], name
)) == NULL
)
448 if ((str
= malloc (strlen (name
) + strlen (k
) + 1)) == NULL
)
453 ret
= intern (fp
, str
);
457 if (ctf_dynhash_cinsert (d
->cd_decorated_names
[i
], name
, ret
) < 0)
464 ctf_set_errno (fp
, ENOMEM
);
468 /* Hash a type, possibly debugging-dumping something about it as well. */
470 ctf_dedup_sha1_add (ctf_sha1_t
*sha1
, const void *buf
, size_t len
,
471 const char *description _libctf_unused_
,
472 unsigned long depth _libctf_unused_
)
474 ctf_sha1_add (sha1
, buf
, len
);
476 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
478 char tmp_hval
[CTF_SHA1_SIZE
];
480 ctf_sha1_fini (&tmp
, tmp_hval
);
481 ctf_dprintf ("%lu: after hash addition of %s: %s\n", depth
, description
,
487 ctf_dedup_hash_type (ctf_file_t
*fp
, ctf_file_t
*input
,
488 ctf_file_t
**inputs
, uint32_t *parents
,
489 int input_num
, ctf_id_t type
, int flags
,
491 int (*populate_fun
) (ctf_file_t
*fp
,
497 const char *decorated_name
,
500 /* Determine whether this type is being hashed as a stub (in which case it is
501 unsafe to cache it). */
503 ctf_dedup_is_stub (const char *name
, int kind
, int fwdkind
, int flags
)
505 /* We can cache all types unless we are recursing to children and are hashing
506 in a tagged struct, union or forward, all of which are replaced with their
507 decorated name as a stub and will have different hash values when hashed at
510 return ((flags
& CTF_DEDUP_HASH_INTERNAL_CHILD
) && name
511 && (kind
== CTF_K_STRUCT
|| kind
== CTF_K_UNION
512 || (kind
== CTF_K_FORWARD
&& (fwdkind
== CTF_K_STRUCT
513 || fwdkind
== CTF_K_UNION
))));
516 /* Populate struct_origin if need be (not already populated, or populated with
517 a different origin), in which case it must go to -1, "shared".)
519 Only called for forwards or forwardable types with names, when the link mode
520 is CTF_LINK_SHARE_DUPLICATED. */
522 ctf_dedup_record_origin (ctf_file_t
*fp
, int input_num
, const char *decorated
,
525 ctf_dedup_t
*d
= &fp
->ctf_dedup
;
527 int populate_origin
= 0;
529 if (ctf_dynhash_lookup_kv (d
->cd_struct_origin
, decorated
, NULL
, &origin
))
531 if (CTF_DEDUP_GID_TO_INPUT (origin
) != input_num
532 && CTF_DEDUP_GID_TO_INPUT (origin
) != -1)
535 origin
= CTF_DEDUP_GID (fp
, -1, -1);
545 if (ctf_dynhash_cinsert (d
->cd_struct_origin
, decorated
, origin
) < 0)
546 return ctf_set_errno (fp
, errno
);
550 /* Do the underlying hashing and recursion for ctf_dedup_hash_type (which it
551 calls, recursively). */
554 ctf_dedup_rhash_type (ctf_file_t
*fp
, ctf_file_t
*input
, ctf_file_t
**inputs
,
555 uint32_t *parents
, int input_num
, ctf_id_t type
,
556 void *type_id
, const ctf_type_t
*tp
, const char *name
,
557 const char *decorated
, int kind
, int flags
,
559 int (*populate_fun
) (ctf_file_t
*fp
,
565 const char *decorated_name
,
568 ctf_dedup_t
*d
= &fp
->ctf_dedup
;
569 ctf_next_t
*i
= NULL
;
572 char hashbuf
[CTF_SHA1_SIZE
];
573 const char *hval
= NULL
;
577 const char *citer
= NULL
;
578 ctf_dynset_t
*citers
= NULL
;
580 /* Add a citer to the citers set. */
581 #define ADD_CITER(citers, hval) \
584 whaterr = "updating citers"; \
586 if ((citers = ctf_dynset_create (htab_hash_string, \
587 ctf_dynset_eq_string, \
590 if (ctf_dynset_cinsert (citers, hval) < 0) \
594 /* If this is a named struct or union or a forward to one, and this is a child
595 traversal, treat this type as if it were a forward -- do not recurse to
596 children, ignore all content not already hashed in, and hash in the
597 decorated name of the type instead. */
599 if (ctf_dedup_is_stub (name
, kind
, tp
->ctt_type
, flags
))
601 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
602 ctf_dprintf ("Struct/union/forward citation: substituting forwarding "
603 "stub with decorated name %s\n", decorated
);
606 ctf_sha1_init (&hash
);
607 ctf_dedup_sha1_add (&hash
, decorated
, strlen (decorated
) + 1,
608 "decorated struct/union/forward name", depth
);
609 ctf_sha1_fini (&hash
, hashbuf
);
611 if ((hval
= intern (fp
, strdup (hashbuf
))) == NULL
)
613 ctf_err_warn (fp
, 0, "%s (%i): out of memory during forwarding-stub "
614 "hashing for type with GID %p; errno: %s",
615 ctf_link_input_name (input
), input_num
, type_id
,
617 return NULL
; /* errno is set for us. */
620 /* In share--duplicated link mode, make sure the origin of this type is
621 recorded, even if this is a type in a parent dict which will not be
622 directly traversed. */
623 if (d
->cd_link_flags
& CTF_LINK_SHARE_DUPLICATED
624 && ctf_dedup_record_origin (fp
, input_num
, decorated
, type_id
) < 0)
625 return NULL
; /* errno is set for us. */
630 /* Now ensure that subsequent recursive calls (but *not* the top-level call)
631 get this treatment. */
632 flags
|= CTF_DEDUP_HASH_INTERNAL_CHILD
;
634 /* If this is a struct, union, or forward with a name, record the unique
635 originating input TU, if there is one. */
637 if (decorated
&& (ctf_forwardable_kind (kind
) || kind
!= CTF_K_FORWARD
))
638 if (d
->cd_link_flags
& CTF_LINK_SHARE_DUPLICATED
639 && ctf_dedup_record_origin (fp
, input_num
, decorated
, type_id
) < 0)
640 return NULL
; /* errno is set for us. */
642 /* Mix in invariant stuff, transforming the type kind if needed. Note that
643 the vlen is *not* hashed in: the actual variable-length info is hashed in
644 instead, piecewise. The vlen is not part of the type, only the
645 variable-length data is: identical types with distinct vlens are quite
646 possible. Equally, we do not want to hash in the isroot flag: both the
647 compiler and the deduplicator set the nonroot flag to indicate clashes with
648 *other types in the same TU* with the same name: so two types can easily
649 have distinct nonroot flags, yet be exactly the same type.*/
651 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
652 ctf_dprintf ("%lu: hashing thing with ID %i/%lx (kind %i): %s.\n",
653 depth
, input_num
, type
, kind
, name
? name
: "");
656 ctf_sha1_init (&hash
);
658 ctf_dedup_sha1_add (&hash
, name
, strlen (name
) + 1, "name", depth
);
659 ctf_dedup_sha1_add (&hash
, &kind
, sizeof (uint32_t), "kind", depth
);
661 /* Hash content of this type. */
665 /* No extra state. */
669 /* Add the forwarded kind, stored in the ctt_type. */
670 ctf_dedup_sha1_add (&hash
, &tp
->ctt_type
, sizeof (tp
->ctt_type
),
671 "forwarded kind", depth
);
677 memset (&ep
, 0, sizeof (ctf_encoding_t
));
679 ctf_dedup_sha1_add (&hash
, &tp
->ctt_size
, sizeof (uint32_t), "size",
681 if (ctf_type_encoding (input
, type
, &ep
) < 0)
683 whaterr
= "encoding";
686 ctf_dedup_sha1_add (&hash
, &ep
, sizeof (ctf_encoding_t
), "encoding",
690 /* Types that reference other types. */
696 /* Hash the referenced type, if not already hashed, and mix it in. */
697 child_type
= ctf_type_reference (input
, type
);
698 if ((hval
= ctf_dedup_hash_type (fp
, input
, inputs
, parents
, input_num
,
699 child_type
, flags
, depth
,
700 populate_fun
)) == NULL
)
702 whaterr
= "referenced type hashing";
705 ctf_dedup_sha1_add (&hash
, hval
, strlen (hval
) + 1, "referenced type",
711 /* The slices of two types hash identically only if the type they overlay
712 also has the same encoding. This is not ideal, but in practice will work
713 well enough. We work directly rather than using the CTF API because
714 we do not want the slice's normal automatically-shine-through
715 semantics to kick in here. */
718 const ctf_slice_t
*slice
;
719 const ctf_dtdef_t
*dtd
;
723 child_type
= ctf_type_reference (input
, type
);
724 ctf_get_ctt_size (input
, tp
, &size
, &increment
);
725 ctf_dedup_sha1_add (&hash
, &size
, sizeof (ssize_t
), "size", depth
);
727 if ((hval
= ctf_dedup_hash_type (fp
, input
, inputs
, parents
, input_num
,
728 child_type
, flags
, depth
,
729 populate_fun
)) == NULL
)
731 whaterr
= "slice-referenced type hashing";
734 ctf_dedup_sha1_add (&hash
, hval
, strlen (hval
) + 1, "sliced type",
738 if ((dtd
= ctf_dynamic_type (input
, type
)) != NULL
)
739 slice
= &dtd
->dtd_u
.dtu_slice
;
741 slice
= (ctf_slice_t
*) ((uintptr_t) tp
+ increment
);
743 ctf_dedup_sha1_add (&hash
, &slice
->cts_offset
,
744 sizeof (slice
->cts_offset
), "slice offset", depth
);
745 ctf_dedup_sha1_add (&hash
, &slice
->cts_bits
,
746 sizeof (slice
->cts_bits
), "slice bits", depth
);
754 if (ctf_array_info (input
, type
, &ar
) < 0)
756 whaterr
= "array info";
760 if ((hval
= ctf_dedup_hash_type (fp
, input
, inputs
, parents
, input_num
,
761 ar
.ctr_contents
, flags
, depth
,
762 populate_fun
)) == NULL
)
764 whaterr
= "array contents type hashing";
767 ctf_dedup_sha1_add (&hash
, hval
, strlen (hval
) + 1, "array contents",
769 ADD_CITER (citers
, hval
);
771 if ((hval
= ctf_dedup_hash_type (fp
, input
, inputs
, parents
, input_num
,
772 ar
.ctr_index
, flags
, depth
,
773 populate_fun
)) == NULL
)
775 whaterr
= "array index type hashing";
778 ctf_dedup_sha1_add (&hash
, hval
, strlen (hval
) + 1, "array index",
780 ctf_dedup_sha1_add (&hash
, &ar
.ctr_nelems
, sizeof (ar
.ctr_nelems
),
781 "element count", depth
);
782 ADD_CITER (citers
, hval
);
792 if (ctf_func_type_info (input
, type
, &fi
) < 0)
794 whaterr
= "func type info";
798 if ((hval
= ctf_dedup_hash_type (fp
, input
, inputs
, parents
, input_num
,
799 fi
.ctc_return
, flags
, depth
,
800 populate_fun
)) == NULL
)
802 whaterr
= "func return type";
805 ctf_dedup_sha1_add (&hash
, hval
, strlen (hval
) + 1, "func return",
807 ctf_dedup_sha1_add (&hash
, &fi
.ctc_argc
, sizeof (fi
.ctc_argc
),
809 ctf_dedup_sha1_add (&hash
, &fi
.ctc_flags
, sizeof (fi
.ctc_flags
),
810 "func flags", depth
);
811 ADD_CITER (citers
, hval
);
813 if ((args
= calloc (fi
.ctc_argc
, sizeof (ctf_id_t
))) == NULL
)
815 whaterr
= "memory allocation";
819 if (ctf_func_type_args (input
, type
, fi
.ctc_argc
, args
) < 0)
822 whaterr
= "func arg type";
825 for (j
= 0; j
< fi
.ctc_argc
; j
++)
827 if ((hval
= ctf_dedup_hash_type (fp
, input
, inputs
, parents
,
828 input_num
, args
[j
], flags
, depth
,
829 populate_fun
)) == NULL
)
832 whaterr
= "func arg type hashing";
835 ctf_dedup_sha1_add (&hash
, hval
, strlen (hval
) + 1, "func arg type",
837 ADD_CITER (citers
, hval
);
847 ctf_dedup_sha1_add (&hash
, &tp
->ctt_size
, sizeof (uint32_t),
849 while ((ename
= ctf_enum_next (input
, type
, &i
, &val
)) != NULL
)
851 ctf_dedup_sha1_add (&hash
, ename
, strlen (ename
) + 1, "enumerator",
853 ctf_dedup_sha1_add (&hash
, &val
, sizeof (val
), "enumerand", depth
);
855 if (ctf_errno (input
) != ECTF_NEXT_END
)
857 whaterr
= "enum member iteration";
862 /* Top-level only. */
871 ctf_get_ctt_size (input
, tp
, &size
, NULL
);
872 ctf_dedup_sha1_add (&hash
, &size
, sizeof (ssize_t
), "struct size",
875 while ((offset
= ctf_member_next (input
, type
, &i
, &mname
,
880 ctf_dedup_sha1_add (&hash
, mname
, strlen (mname
) + 1,
881 "member name", depth
);
883 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
884 ctf_dprintf ("%lu: Traversing to member %s\n", depth
, mname
);
886 if ((hval
= ctf_dedup_hash_type (fp
, input
, inputs
, parents
,
887 input_num
, membtype
, flags
, depth
,
888 populate_fun
)) == NULL
)
890 whaterr
= "struct/union member type hashing";
894 ctf_dedup_sha1_add (&hash
, hval
, strlen (hval
) + 1, "member hash",
896 ctf_dedup_sha1_add (&hash
, &offset
, sizeof (offset
), "member offset",
898 ADD_CITER (citers
, hval
);
900 if (ctf_errno (input
) != ECTF_NEXT_END
)
902 whaterr
= "struct/union member iteration";
908 whaterr
= "unknown type kind";
911 ctf_sha1_fini (&hash
, hashbuf
);
913 if ((hval
= intern (fp
, strdup (hashbuf
))) == NULL
)
915 whaterr
= "hash internment";
919 /* Populate the citers for this type's subtypes, now the hash for the type
921 whaterr
= "citer tracking";
925 ctf_dynset_t
*citer_hashes
;
927 if ((citer_hashes
= make_set_element (d
->cd_citers
, citer
)) == NULL
)
929 if (ctf_dynset_cinsert (citer_hashes
, hval
) < 0)
936 while ((err
= ctf_dynset_cnext (citers
, &i
, &k
)) == 0)
938 ctf_dynset_t
*citer_hashes
;
939 citer
= (const char *) k
;
941 if ((citer_hashes
= make_set_element (d
->cd_citers
, citer
)) == NULL
)
944 if (ctf_dynset_exists (citer_hashes
, hval
, NULL
))
946 if (ctf_dynset_cinsert (citer_hashes
, hval
) < 0)
949 if (err
!= ECTF_NEXT_END
)
951 ctf_dynset_destroy (citers
);
957 ctf_next_destroy (i
);
959 ctf_sha1_fini (&hash
, NULL
);
960 ctf_err_warn (fp
, 0, "%s (%i): %s error during type hashing for "
961 "type %lx, kind %i: CTF error: %s; errno: %s",
962 ctf_link_input_name (input
), input_num
, whaterr
, type
,
963 kind
, ctf_errmsg (ctf_errno (fp
)), strerror (errno
));
966 ctf_set_errno (fp
, errno
);
967 ctf_err_warn (fp
, 0, "%s (%i): %s error during type hashing for "
968 "type %lx, kind %i: CTF error: %s; errno: %s",
969 ctf_link_input_name (input
), input_num
, whaterr
, type
,
970 kind
, ctf_errmsg (ctf_errno (fp
)), strerror (errno
));
974 /* Hash a TYPE in the INPUT: FP is the eventual output, where the ctf_dedup
975 state is stored. INPUT_NUM is the number of this input in the set of inputs.
976 Record its hash in FP's cd_type_hashes once it is known. PARENTS is
977 described in the comment above ctf_dedup.
979 (The flags argument currently accepts only the flag
980 CTF_DEDUP_HASH_INTERNAL_CHILD, an implementation detail used to prevent
981 struct/union hashing in recursive traversals below the TYPE.)
983 We use the CTF API rather than direct access wherever possible, because types
984 that appear identical through the API should be considered identical, with
985 one exception: slices should only be considered identical to other slices,
986 not to the corresponding unsliced type.
988 The POPULATE_FUN is a mandatory hook that populates other mappings with each
989 type we see (excepting types that are recursively hashed as stubs). The
990 caller should not rely on the order of calls to this hook, though it will be
991 called at least once for every non-stub reference to every type.
993 Returns a hash value (an atom), or NULL on error. */
996 ctf_dedup_hash_type (ctf_file_t
*fp
, ctf_file_t
*input
,
997 ctf_file_t
**inputs
, uint32_t *parents
,
998 int input_num
, ctf_id_t type
, int flags
,
1000 int (*populate_fun
) (ctf_file_t
*fp
,
1002 ctf_file_t
**inputs
,
1006 const char *decorated_name
,
1009 ctf_dedup_t
*d
= &fp
->ctf_dedup
;
1010 const ctf_type_t
*tp
;
1012 const char *hval
= NULL
;
1014 const char *whaterr
;
1015 const char *decorated
= NULL
;
1016 uint32_t kind
, fwdkind
;
1020 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1021 ctf_dprintf ("%lu: ctf_dedup_hash_type (%i, %lx, flags %x)\n", depth
, input_num
, type
, flags
);
1024 /* The unimplemented type doesn't really exist, but must be noted in parent
1025 hashes: so it gets a fixed, arbitrary hash. */
1027 return "00000000000000000000";
1029 /* Possible optimization: if the input type is in the parent type space, just
1030 copy recursively-cited hashes from the parent's types into the output
1031 mapping rather than rehashing them. */
1033 type_id
= CTF_DEDUP_GID (fp
, input_num
, type
);
1035 if ((tp
= ctf_lookup_by_id (&input
, type
)) == NULL
)
1037 ctf_err_warn (fp
, 0, "%s (%i): lookup failure for type %lx: flags %x",
1038 ctf_link_input_name (input
), input_num
, type
, flags
);
1039 return NULL
; /* errno is set for us. */
1042 kind
= LCTF_INFO_KIND (input
, tp
->ctt_info
);
1043 name
= ctf_strraw (input
, tp
->ctt_name
);
1045 if (tp
->ctt_name
== 0 || !name
|| name
[0] == '\0')
1048 /* Treat the unknown kind just like the unimplemented type. */
1049 if (kind
== CTF_K_UNKNOWN
)
1050 return "00000000000000000000";
1052 /* Decorate the name appropriately for the namespace it appears in: forwards
1053 appear in the namespace of their referent. */
1058 if (kind
== CTF_K_FORWARD
)
1059 fwdkind
= tp
->ctt_type
;
1061 if ((decorated
= ctf_decorate_type_name (fp
, name
, fwdkind
)) == NULL
)
1062 return NULL
; /* errno is set for us. */
1065 /* If not hashing a stub, we can rely on various sorts of caches.
1067 Optimization opportunity: we may be able to avoid calling the populate_fun
1070 if (!ctf_dedup_is_stub (name
, kind
, fwdkind
, flags
))
1072 if ((hval
= ctf_dynhash_lookup (d
->cd_type_hashes
, type_id
)) != NULL
)
1074 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1075 ctf_dprintf ("%lu: Known hash for ID %i/%lx: %s\n", depth
, input_num
,
1078 populate_fun (fp
, input
, inputs
, input_num
, type
, type_id
,
1085 /* We have never seen this type before, and must figure out its hash and the
1086 hashes of the types it cites.
1088 Hash this type, and call ourselves recursively. (The hashing part is
1089 optional, and is disabled if overidden_hval is set.) */
1091 if ((hval
= ctf_dedup_rhash_type (fp
, input
, inputs
, parents
, input_num
,
1092 type
, type_id
, tp
, name
, decorated
,
1093 kind
, flags
, depth
, populate_fun
)) == NULL
)
1094 return NULL
; /* errno is set for us. */
1096 /* The hash of this type is now known: record it unless caching is unsafe
1097 because the hash value will change later. This will be the final storage
1098 of this type's hash, so we call the population function on it. */
1100 if (!ctf_dedup_is_stub (name
, kind
, fwdkind
, flags
))
1102 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1103 ctf_dprintf ("Caching %lx, ID %p (%s), %s in final location\n", type
,
1104 type_id
, name
? name
: "", hval
);
1107 if (ctf_dynhash_cinsert (d
->cd_type_hashes
, type_id
, hval
) < 0)
1109 whaterr
= "hash caching";
1113 if (populate_fun (fp
, input
, inputs
, input_num
, type
, type_id
,
1114 decorated
, hval
) < 0)
1116 whaterr
= "population function";
1117 goto err
; /* errno is set for us. */
1121 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1122 ctf_dprintf ("%lu: Returning final hash for ID %i/%lx: %s\n", depth
,
1123 input_num
, type
, hval
);
1128 ctf_set_errno (fp
, errno
);
1130 ctf_err_warn (fp
, 0, "%s (%i): %s error during type hashing, type %lx, "
1131 "kind %i: CTF errno: %s; errno: %s",
1132 ctf_link_input_name (input
), input_num
, whaterr
, type
,
1133 kind
, ctf_errmsg (ctf_errno (fp
)),
1138 /* Populate a number of useful mappings not directly used by the hashing
1139 machinery: the output mapping, the cd_name_counts mapping from name -> hash
1140 -> count of hashval deduplication state for a given hashed type, and the
1141 cd_output_first_tu mapping. */
1144 ctf_dedup_populate_mappings (ctf_file_t
*fp
, ctf_file_t
*input _libctf_unused_
,
1145 ctf_file_t
**inputs _libctf_unused_
,
1146 int input_num _libctf_unused_
,
1147 ctf_id_t type _libctf_unused_
, void *id
,
1148 const char *decorated_name
,
1151 ctf_dedup_t
*d
= &fp
->ctf_dedup
;
1152 ctf_dynset_t
*type_ids
;
1153 ctf_dynhash_t
*name_counts
;
1156 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1157 ctf_dprintf ("Hash %s, %s, into output mapping for %i/%lx @ %s\n",
1158 hval
, decorated_name
? decorated_name
: "(unnamed)",
1159 input_num
, type
, ctf_link_input_name (input
));
1161 const char *orig_hval
;
1163 /* Make sure we never map a single GID to multiple hash values. */
1165 if ((orig_hval
= ctf_dynhash_lookup (d
->cd_output_mapping_guard
, id
)) != NULL
)
1167 /* We can rely on pointer identity here, since all hashes are
1169 if (!ctf_assert (fp
, orig_hval
== hval
))
1173 if (ctf_dynhash_cinsert (d
->cd_output_mapping_guard
, id
, hval
) < 0)
1174 return ctf_set_errno (fp
, errno
);
1177 /* Record the type in the output mapping: if this is the first time this type
1178 has been seen, also record it in the cd_output_first_gid. Because we
1179 traverse types in TU order and we do not merge types after the hashing
1180 phase, this will be the lowest TU this type ever appears in. */
1182 if ((type_ids
= ctf_dynhash_lookup (d
->cd_output_mapping
,
1185 if (ctf_dynhash_cinsert (d
->cd_output_first_gid
, hval
, id
) < 0)
1186 return ctf_set_errno (fp
, errno
);
1188 if ((type_ids
= ctf_dynset_create (htab_hash_pointer
,
1191 return ctf_set_errno (fp
, errno
);
1192 if (ctf_dynhash_insert (d
->cd_output_mapping
, (void *) hval
,
1195 ctf_dynset_destroy (type_ids
);
1196 return ctf_set_errno (fp
, errno
);
1199 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1201 /* Verify that all types with this hash are of the same kind, and that the
1202 first TU a type was seen in never falls. */
1206 ctf_next_t
*i
= NULL
;
1207 int orig_kind
= ctf_type_kind_unsliced (input
, type
);
1210 orig_first_tu
= CTF_DEDUP_GID_TO_INPUT
1211 (ctf_dynhash_lookup (d
->cd_output_first_gid
, hval
));
1212 if (!ctf_assert (fp
, orig_first_tu
<= CTF_DEDUP_GID_TO_INPUT (id
)))
1215 while ((err
= ctf_dynset_cnext (type_ids
, &i
, &one_id
)) == 0)
1217 ctf_file_t
*foo
= inputs
[CTF_DEDUP_GID_TO_INPUT (one_id
)];
1218 ctf_id_t bar
= CTF_DEDUP_GID_TO_TYPE (one_id
);
1219 if (ctf_type_kind_unsliced (foo
, bar
) != orig_kind
)
1221 ctf_err_warn (fp
, 1, "added wrong kind to output mapping "
1222 "for hash %s named %s: %p/%lx from %s is "
1223 "kind %i, but newly-added %p/%lx from %s is "
1225 decorated_name
? decorated_name
: "(unnamed)",
1227 ctf_link_input_name (foo
),
1228 ctf_type_kind_unsliced (foo
, bar
),
1229 (void *) input
, type
,
1230 ctf_link_input_name (input
), orig_kind
);
1231 if (!ctf_assert (fp
, ctf_type_kind_unsliced (foo
, bar
)
1236 if (err
!= ECTF_NEXT_END
)
1237 return ctf_set_errno (fp
, err
);
1241 /* This function will be repeatedly called for the same types many times:
1242 don't waste time reinserting the same keys in that case. */
1243 if (!ctf_dynset_exists (type_ids
, id
, NULL
)
1244 && ctf_dynset_insert (type_ids
, id
) < 0)
1245 return ctf_set_errno (fp
, errno
);
1247 /* The rest only needs to happen for types with names. */
1248 if (!decorated_name
)
1251 /* Count the number of occurrences of the hash value for this GID. */
1253 hval
= ctf_dynhash_lookup (d
->cd_type_hashes
, id
);
1255 /* Mapping from name -> hash(hashval, count) not already present? */
1256 if ((name_counts
= ctf_dynhash_lookup (d
->cd_name_counts
,
1257 decorated_name
)) == NULL
)
1259 if ((name_counts
= ctf_dynhash_create (ctf_hash_string
,
1261 NULL
, NULL
)) == NULL
)
1262 return ctf_set_errno (fp
, errno
);
1263 if (ctf_dynhash_cinsert (d
->cd_name_counts
, decorated_name
,
1266 ctf_dynhash_destroy (name_counts
);
1267 return ctf_set_errno (fp
, errno
);
1271 /* This will, conveniently, return NULL (i.e. 0) for a new entry. */
1272 count
= (long int) (uintptr_t) ctf_dynhash_lookup (name_counts
, hval
);
1274 if (ctf_dynhash_cinsert (name_counts
, hval
,
1275 (const void *) (uintptr_t) (count
+ 1)) < 0)
1276 return ctf_set_errno (fp
, errno
);
1281 /* Mark a single hash as corresponding to a conflicting type. Mark all types
1282 that cite it as conflicting as well, terminating the recursive walk only when
1283 types that are already conflicted or types do not cite other types are seen.
1284 (Tagged structures and unions do not appear in the cd_citers graph, so the
1285 walk also terminates there, since any reference to a conflicting structure is
1286 just going to reference an unconflicting forward instead: see
1287 ctf_dedup_maybe_synthesize_forward.) */
1290 ctf_dedup_mark_conflicting_hash (ctf_file_t
*fp
, const char *hval
)
1292 ctf_dedup_t
*d
= &fp
->ctf_dedup
;
1293 ctf_next_t
*i
= NULL
;
1296 ctf_dynset_t
*citers
;
1298 /* Mark conflicted if not already so marked. */
1299 if (ctf_dynset_exists (d
->cd_conflicting_types
, hval
, NULL
))
1302 ctf_dprintf ("Marking %s as conflicted\n", hval
);
1304 if (ctf_dynset_cinsert (d
->cd_conflicting_types
, hval
) < 0)
1306 ctf_dprintf ("Out of memory marking %s as conflicted\n", hval
);
1307 ctf_set_errno (fp
, errno
);
1311 /* If any types cite this type, mark them conflicted too. */
1312 if ((citers
= ctf_dynhash_lookup (d
->cd_citers
, hval
)) == NULL
)
1315 while ((err
= ctf_dynset_cnext (citers
, &i
, &k
)) == 0)
1317 const char *hv
= (const char *) k
;
1319 if (ctf_dynset_exists (d
->cd_conflicting_types
, hv
, NULL
))
1322 if (ctf_dedup_mark_conflicting_hash (fp
, hv
) < 0)
1324 ctf_next_destroy (i
);
1325 return -1; /* errno is set for us. */
1328 if (err
!= ECTF_NEXT_END
)
1329 return ctf_set_errno (fp
, err
);
1334 /* Look up a type kind from the output mapping, given a type hash value. */
1336 ctf_dedup_hash_kind (ctf_file_t
*fp
, ctf_file_t
**inputs
, const char *hash
)
1338 ctf_dedup_t
*d
= &fp
->ctf_dedup
;
1340 ctf_dynset_t
*type_ids
;
1342 /* Precondition: the output mapping is populated. */
1343 if (!ctf_assert (fp
, ctf_dynhash_elements (d
->cd_output_mapping
) > 0))
1346 /* Look up some GID from the output hash for this type. (They are all
1347 identical, so we can pick any). Don't assert if someone calls this
1348 function wrongly, but do assert if the output mapping knows about the hash,
1349 but has nothing associated with it. */
1351 type_ids
= ctf_dynhash_lookup (d
->cd_output_mapping
, hash
);
1354 ctf_dprintf ("Looked up type kind by nonexistent hash %s.\n", hash
);
1355 return ctf_set_errno (fp
, ECTF_INTERNAL
);
1357 id
= ctf_dynset_lookup_any (type_ids
);
1358 if (!ctf_assert (fp
, id
))
1361 return ctf_type_kind_unsliced (inputs
[CTF_DEDUP_GID_TO_INPUT (id
)],
1362 CTF_DEDUP_GID_TO_TYPE (id
));
1365 /* Used to keep a count of types: i.e. distinct type hash values. */
1366 typedef struct ctf_dedup_type_counter
1369 ctf_file_t
**inputs
;
1370 int num_non_forwards
;
1371 } ctf_dedup_type_counter_t
;
1373 /* Add to the type counter for one name entry from the cd_name_counts. */
1375 ctf_dedup_count_types (void *key_
, void *value _libctf_unused_
, void *arg_
)
1377 const char *hval
= (const char *) key_
;
1379 ctf_dedup_type_counter_t
*arg
= (ctf_dedup_type_counter_t
*) arg_
;
1381 kind
= ctf_dedup_hash_kind (arg
->fp
, arg
->inputs
, hval
);
1383 /* We rely on ctf_dedup_hash_kind setting the fp to -ECTF_INTERNAL on error to
1384 smuggle errors out of here. */
1386 if (kind
!= CTF_K_FORWARD
)
1388 arg
->num_non_forwards
++;
1389 ctf_dprintf ("Counting hash %s: kind %i: num_non_forwards is %i\n",
1390 hval
, kind
, arg
->num_non_forwards
);
1393 /* We only need to know if there is more than one non-forward (an ambiguous
1394 type): don't waste time iterating any more than needed to figure that
1397 if (arg
->num_non_forwards
> 1)
1403 /* Detect name ambiguity and mark ambiguous names as conflicting, other than the
1406 ctf_dedup_detect_name_ambiguity (ctf_file_t
*fp
, ctf_file_t
**inputs
)
1408 ctf_dedup_t
*d
= &fp
->ctf_dedup
;
1409 ctf_next_t
*i
= NULL
;
1415 /* Go through cd_name_counts for all CTF namespaces in turn. */
1417 while ((err
= ctf_dynhash_next (d
->cd_name_counts
, &i
, &k
, &v
)) == 0)
1419 const char *decorated
= (const char *) k
;
1420 ctf_dynhash_t
*name_counts
= (ctf_dynhash_t
*) v
;
1421 ctf_next_t
*j
= NULL
;
1423 /* If this is a forwardable kind or a forward (which we can tell without
1424 consulting the type because its decorated name has a space as its
1425 second character: see ctf_decorate_type_name), we are only interested
1426 in whether this name has many hashes associated with it: any such name
1427 is necessarily ambiguous, and types with that name are conflicting.
1428 Once we know whether this is true, we can skip to the next name: so use
1429 ctf_dynhash_iter_find for efficiency. */
1431 if (decorated
[0] != '\0' && decorated
[1] == ' ')
1433 ctf_dedup_type_counter_t counters
= { fp
, inputs
, 0 };
1434 ctf_dynhash_t
*counts
= (ctf_dynhash_t
*) v
;
1436 ctf_dynhash_iter_find (counts
, ctf_dedup_count_types
, &counters
);
1438 /* Check for assertion failure and pass it up. */
1439 if (ctf_errno (fp
) == ECTF_INTERNAL
)
1442 if (counters
.num_non_forwards
> 1)
1446 while ((err
= ctf_dynhash_cnext (counts
, &j
, &hval_
, NULL
)) == 0)
1448 const char *hval
= (const char *) hval_
;
1449 ctf_dynset_t
*type_ids
;
1453 /* Dig through the types in this hash to find the non-forwards
1454 and mark them ambiguous. */
1456 type_ids
= ctf_dynhash_lookup (d
->cd_output_mapping
, hval
);
1458 /* Nonexistent? Must be a forward with no referent. */
1462 id
= ctf_dynset_lookup_any (type_ids
);
1464 kind
= ctf_type_kind (inputs
[CTF_DEDUP_GID_TO_INPUT (id
)],
1465 CTF_DEDUP_GID_TO_TYPE (id
));
1467 if (kind
!= CTF_K_FORWARD
)
1469 ctf_dprintf ("Marking %p, with hash %s, conflicting: one "
1470 "of many non-forward GIDs for %s\n", id
,
1472 ctf_dedup_mark_conflicting_hash (fp
, hval
);
1475 if (err
!= ECTF_NEXT_END
)
1477 erm
= "marking conflicting structs/unions";
1484 /* This is an ordinary type. Find the most common type with this
1485 name, and mark it unconflicting: all others are conflicting. (We
1486 cannot do this sort of popularity contest with forwardable types
1487 because any forwards to that type would be immediately unified with
1488 the most-popular type on insertion, and we want conflicting structs
1489 et al to have all forwards left intact, so the user is notified
1490 that this type is conflicting. TODO: improve this in future by
1491 setting such forwards non-root-visible.) */
1496 long max_hcount
= -1;
1497 const char *max_hval
= NULL
;
1499 if (ctf_dynhash_elements (name_counts
) <= 1)
1502 /* First find the most common. */
1503 while ((err
= ctf_dynhash_cnext (name_counts
, &j
, &key
, &count
)) == 0)
1505 hval
= (const char *) key
;
1506 if ((long int) (uintptr_t) count
> max_hcount
)
1508 max_hcount
= (long int) (uintptr_t) count
;
1512 if (err
!= ECTF_NEXT_END
)
1514 erm
= "finding commonest conflicting type";
1518 /* Mark all the others as conflicting. */
1519 while ((err
= ctf_dynhash_cnext (name_counts
, &j
, &key
, NULL
)) == 0)
1521 hval
= (const char *) key
;
1522 if (strcmp (max_hval
, hval
) == 0)
1525 ctf_dprintf ("Marking %s, an uncommon hash for %s, conflicting\n",
1526 hval
, (const char *) k
);
1527 if (ctf_dedup_mark_conflicting_hash (fp
, hval
) < 0)
1529 erm
= "marking hashes as conflicting";
1533 if (err
!= ECTF_NEXT_END
)
1535 erm
= "marking uncommon conflicting types";
1540 if (err
!= ECTF_NEXT_END
)
1542 erm
= "scanning for ambiguous names";
1549 ctf_next_destroy (i
);
1550 ctf_err_warn (fp
, 0, "%s: %s", erm
, ctf_errmsg (ctf_errno (fp
)));
1554 ctf_err_warn (fp
, 0, "iteration failed %s: %s", erm
, ctf_errmsg (err
));
1555 return ctf_set_errno (fp
, err
);
1558 ctf_next_destroy (i
);
1559 return -1; /* errno is set for us. */
1562 /* Initialize the deduplication machinery. */
1565 ctf_dedup_init (ctf_file_t
*fp
)
1567 ctf_dedup_t
*d
= &fp
->ctf_dedup
;
1570 if (ctf_dedup_atoms_init (fp
) < 0)
1573 #if IDS_NEED_ALLOCATION
1574 if ((d
->cd_id_to_file_t
= ctf_dynhash_create (ctf_hash_type_id_key
,
1575 ctf_hash_eq_type_id_key
,
1576 free
, NULL
)) == NULL
)
1580 for (i
= 0; i
< 4; i
++)
1582 if ((d
->cd_decorated_names
[i
] = ctf_dynhash_create (ctf_hash_string
,
1584 NULL
, NULL
)) == NULL
)
1588 if ((d
->cd_name_counts
1589 = ctf_dynhash_create (ctf_hash_string
,
1590 ctf_hash_eq_string
, NULL
,
1591 (ctf_hash_free_fun
) ctf_dynhash_destroy
)) == NULL
)
1594 if ((d
->cd_type_hashes
1595 = ctf_dynhash_create (ctf_hash_integer
,
1596 ctf_hash_eq_integer
,
1597 NULL
, NULL
)) == NULL
)
1600 if ((d
->cd_struct_origin
1601 = ctf_dynhash_create (ctf_hash_string
,
1603 NULL
, NULL
)) == NULL
)
1607 = ctf_dynhash_create (ctf_hash_string
,
1608 ctf_hash_eq_string
, NULL
,
1609 (ctf_hash_free_fun
) ctf_dynset_destroy
)) == NULL
)
1612 if ((d
->cd_output_mapping
1613 = ctf_dynhash_create (ctf_hash_string
,
1614 ctf_hash_eq_string
, NULL
,
1615 (ctf_hash_free_fun
) ctf_dynset_destroy
)) == NULL
)
1618 if ((d
->cd_output_first_gid
1619 = ctf_dynhash_create (ctf_hash_string
,
1621 NULL
, NULL
)) == NULL
)
1624 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1625 if ((d
->cd_output_mapping_guard
1626 = ctf_dynhash_create (ctf_hash_integer
,
1627 ctf_hash_eq_integer
, NULL
, NULL
)) == NULL
)
1631 if ((d
->cd_emission_struct_members
1632 = ctf_dynhash_create (ctf_hash_integer
,
1633 ctf_hash_eq_integer
,
1634 NULL
, NULL
)) == NULL
)
1637 if ((d
->cd_conflicting_types
1638 = ctf_dynset_create (htab_hash_string
,
1639 ctf_dynset_eq_string
, NULL
)) == NULL
)
1645 ctf_err_warn (fp
, 0, "ctf_dedup_init: cannot initialize: "
1647 return ctf_set_errno (fp
, ENOMEM
);
1651 ctf_dedup_fini (ctf_file_t
*fp
, ctf_file_t
**outputs
, uint32_t noutputs
)
1653 ctf_dedup_t
*d
= &fp
->ctf_dedup
;
1656 /* ctf_dedup_atoms is kept across links. */
1657 #if IDS_NEED_ALLOCATION
1658 ctf_dynhash_destroy (d
->cd_id_to_file_t
);
1660 for (i
= 0; i
< 4; i
++)
1661 ctf_dynhash_destroy (d
->cd_decorated_names
[i
]);
1662 ctf_dynhash_destroy (d
->cd_name_counts
);
1663 ctf_dynhash_destroy (d
->cd_type_hashes
);
1664 ctf_dynhash_destroy (d
->cd_struct_origin
);
1665 ctf_dynhash_destroy (d
->cd_citers
);
1666 ctf_dynhash_destroy (d
->cd_output_mapping
);
1667 ctf_dynhash_destroy (d
->cd_output_first_gid
);
1668 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
1669 ctf_dynhash_destroy (d
->cd_output_mapping_guard
);
1671 ctf_dynhash_destroy (d
->cd_emission_struct_members
);
1672 ctf_dynset_destroy (d
->cd_conflicting_types
);
1674 /* Free the per-output state. */
1677 for (i
= 0; i
< noutputs
; i
++)
1679 ctf_dedup_t
*od
= &outputs
[i
]->ctf_dedup
;
1680 ctf_dynhash_destroy (od
->cd_output_emission_hashes
);
1681 ctf_dynhash_destroy (od
->cd_output_emission_conflicted_forwards
);
1682 ctf_file_close (od
->cd_output
);
1685 memset (d
, 0, sizeof (ctf_dedup_t
));
1688 /* Return 1 if this type is cited by multiple input dictionaries. */
1691 ctf_dedup_multiple_input_dicts (ctf_file_t
*output
, ctf_file_t
**inputs
,
1694 ctf_dedup_t
*d
= &output
->ctf_dedup
;
1695 ctf_dynset_t
*type_ids
;
1696 ctf_next_t
*i
= NULL
;
1698 ctf_file_t
*found
= NULL
, *relative_found
= NULL
;
1699 const char *type_id
;
1700 ctf_file_t
*input_fp
;
1703 const char *decorated
;
1708 type_ids
= ctf_dynhash_lookup (d
->cd_output_mapping
, hval
);
1709 if (!ctf_assert (output
, type_ids
))
1712 /* Scan across the IDs until we find proof that two disjoint dictionaries
1713 are referenced. Exit as soon as possible. Optimization opportunity, but
1714 possibly not worth it, given that this is only executed in
1715 CTF_LINK_SHARE_DUPLICATED mode. */
1717 while ((err
= ctf_dynset_next (type_ids
, &i
, &id
)) == 0)
1719 ctf_file_t
*fp
= inputs
[CTF_DEDUP_GID_TO_INPUT (id
)];
1721 if (fp
== found
|| fp
== relative_found
)
1731 && (fp
->ctf_parent
== found
|| found
->ctf_parent
== fp
))
1733 relative_found
= fp
;
1738 ctf_next_destroy (i
);
1741 if ((err
!= ECTF_NEXT_END
) && (err
!= 0))
1743 ctf_err_warn (output
, 0, "propagating conflictedness: %s",
1745 ctf_set_errno (output
, err
);
1752 /* This type itself does not appear in multiple input dicts: how about another
1753 related type with the same name (e.g. a forward if this is a struct,
1756 type_id
= ctf_dynset_lookup_any (type_ids
);
1757 if (!ctf_assert (output
, type_id
))
1760 input_fp
= inputs
[CTF_DEDUP_GID_TO_INPUT (type_id
)];
1761 input_id
= CTF_DEDUP_GID_TO_TYPE (type_id
);
1762 fwdkind
= ctf_type_kind_forwarded (input_fp
, input_id
);
1763 name
= ctf_type_name_raw (input_fp
, input_id
);
1765 if ((fwdkind
== CTF_K_STRUCT
|| fwdkind
== CTF_K_UNION
)
1766 && name
&& name
[0] != '\0')
1770 if ((decorated
= ctf_decorate_type_name (output
, name
,
1772 return -1; /* errno is set for us. */
1774 origin
= ctf_dynhash_lookup (d
->cd_struct_origin
, decorated
);
1775 if ((origin
!= NULL
) && (CTF_DEDUP_GID_TO_INPUT (origin
) < 0))
1782 /* Demote unconflicting types which reference only one input, or which reference
1783 two inputs where one input is the parent of the other, into conflicting
1784 types. Only used if the link mode is CTF_LINK_SHARE_DUPLICATED. */
1787 ctf_dedup_conflictify_unshared (ctf_file_t
*output
, ctf_file_t
**inputs
)
1789 ctf_dedup_t
*d
= &output
->ctf_dedup
;
1790 ctf_next_t
*i
= NULL
;
1793 ctf_dynset_t
*to_mark
= NULL
;
1795 if ((to_mark
= ctf_dynset_create (htab_hash_string
, ctf_dynset_eq_string
,
1799 while ((err
= ctf_dynhash_cnext (d
->cd_output_mapping
, &i
, &k
, NULL
)) == 0)
1801 const char *hval
= (const char *) k
;
1804 /* Types referenced by only one dict, with no type appearing under that
1805 name elsewhere, are marked conflicting. */
1807 conflicting
= !ctf_dedup_multiple_input_dicts (output
, inputs
, hval
);
1809 if (conflicting
< 0)
1810 goto err
; /* errno is set for us. */
1813 if (ctf_dynset_cinsert (to_mark
, hval
) < 0)
1816 if (err
!= ECTF_NEXT_END
)
1819 while ((err
= ctf_dynset_cnext (to_mark
, &i
, &k
)) == 0)
1821 const char *hval
= (const char *) k
;
1823 if (ctf_dedup_mark_conflicting_hash (output
, hval
) < 0)
1826 if (err
!= ECTF_NEXT_END
)
1829 ctf_dynset_destroy (to_mark
);
1834 ctf_set_errno (output
, errno
);
1836 err
= ctf_errno (output
);
1837 ctf_next_destroy (i
);
1839 ctf_set_errno (output
, err
);
1840 ctf_dynset_destroy (to_mark
);
1841 ctf_err_warn (output
, 0, "conflictifying unshared types: %s",
1842 ctf_errmsg (ctf_errno (output
)));
1846 /* The core deduplicator. Populate cd_output_mapping in the output ctf_dedup
1847 with a mapping of all types that belong in this dictionary and where they
1848 come from, and cd_conflicting_types with an indication of whether each type
1849 is conflicted or not. OUTPUT is the top-level output: INPUTS is the array of
1850 input dicts; NINPUTS is the size of that array; PARENTS is an NINPUTS-element
1851 array with each element corresponding to a input which is a child dict set to
1852 the number in the INPUTS array of that input's parent.
1854 If CU_MAPPED is set, this is a first pass for a link with a non-empty CU
1855 mapping: only one output will result.
1857 Only deduplicates: does not emit the types into the output. Call
1858 ctf_dedup_emit afterwards to do that. */
1861 ctf_dedup (ctf_file_t
*output
, ctf_file_t
**inputs
, uint32_t ninputs
,
1862 uint32_t *parents
, int cu_mapped
)
1864 ctf_dedup_t
*d
= &output
->ctf_dedup
;
1866 ctf_next_t
*it
= NULL
;
1868 for (i
= 0; i
< ninputs
; i
++)
1869 ctf_dprintf ("Input %i: %s\n", (int) i
, ctf_link_input_name (inputs
[i
]));
1871 if (ctf_dedup_init (output
) < 0)
1872 return -1; /* errno is set for us. */
1874 /* Some flags do not apply when CU-mapping: this is not a duplicated link,
1875 because there is only one output and we really don't want to end up marking
1876 all nonconflicting but appears-only-once types as conflicting (which in the
1877 CU-mapped link means we'd mark them all as non-root-visible!). */
1878 d
->cd_link_flags
= output
->ctf_link_flags
;
1880 d
->cd_link_flags
&= ~(CTF_LINK_SHARE_DUPLICATED
);
1882 /* Compute hash values for all types, recursively, treating child structures
1883 and unions equivalent to forwards, and hashing in the name of the referent
1884 of each such type into structures, unions, and non-opaque forwards.
1885 Populate a mapping from decorated name (including an indication of
1886 struct/union/enum namespace) to count of type hash values in
1887 cd_name_counts, a mapping from and a mapping from hash values to input type
1888 IDs in cd_output_mapping. */
1890 ctf_dprintf ("Computing type hashes\n");
1891 for (i
= 0; i
< ninputs
; i
++)
1895 while ((id
= ctf_type_next (inputs
[i
], &it
, NULL
, 1)) != CTF_ERR
)
1897 ctf_dedup_hash_type (output
, inputs
[i
], inputs
, parents
,
1898 i
, id
, 0, 0, ctf_dedup_populate_mappings
);
1900 if (ctf_errno (inputs
[i
]) != ECTF_NEXT_END
)
1902 ctf_err_warn (output
, 0, "iteration failure computing type "
1903 "hashes: %s", ctf_errmsg (ctf_errno (inputs
[i
])));
1904 return ctf_set_errno (output
, ctf_errno (inputs
[i
]));
1908 /* Go through the cd_name_counts name->hash->count mapping for all CTF
1909 namespaces: any name with many hashes associated with it at this stage is
1910 necessarily ambiguous. Mark all the hashes except the most common as
1911 conflicting in the output. */
1913 ctf_dprintf ("Detecting type name ambiguity\n");
1914 if (ctf_dedup_detect_name_ambiguity (output
, inputs
) < 0)
1915 return -1; /* errno is set for us. */
1917 /* If the link mode is CTF_LINK_SHARE_DUPLICATED, we change any unconflicting
1918 types whose output mapping references only one input dict into a
1919 conflicting type, so that they end up in the per-CU dictionaries. */
1921 if (d
->cd_link_flags
& CTF_LINK_SHARE_DUPLICATED
)
1923 ctf_dprintf ("Conflictifying unshared types\n");
1924 if (ctf_dedup_conflictify_unshared (output
, inputs
) < 0)
1925 return -1; /* errno is set for us. */
1931 ctf_dedup_rwalk_output_mapping (ctf_file_t
*output
, ctf_file_t
**inputs
,
1932 uint32_t ninputs
, uint32_t *parents
,
1933 ctf_dynset_t
*already_visited
,
1935 int (*visit_fun
) (const char *hval
,
1937 ctf_file_t
**inputs
,
1940 int already_visited
,
1946 void *arg
, unsigned long depth
);
1948 /* Like ctf_dedup_rwalk_output_mapping (which see), only takes a single target
1949 type and visits it. */
1951 ctf_dedup_rwalk_one_output_mapping (ctf_file_t
*output
,
1952 ctf_file_t
**inputs
, uint32_t ninputs
,
1954 ctf_dynset_t
*already_visited
,
1955 int visited
, void *type_id
,
1957 int (*visit_fun
) (const char *hval
,
1959 ctf_file_t
**inputs
,
1962 int already_visited
,
1968 void *arg
, unsigned long depth
)
1970 ctf_dedup_t
*d
= &output
->ctf_dedup
;
1975 const char *whaterr
;
1977 input_num
= CTF_DEDUP_GID_TO_INPUT (type_id
);
1978 fp
= inputs
[input_num
];
1979 type
= CTF_DEDUP_GID_TO_TYPE (type_id
);
1981 ctf_dprintf ("%lu: Starting walk over type %s, %i/%lx (%p), from %s, "
1982 "kind %i\n", depth
, hval
, input_num
, type
, (void *) fp
,
1983 ctf_link_input_name (fp
), ctf_type_kind_unsliced (fp
, type
));
1985 /* Get the single call we do if this type has already been visited out of the
1988 return visit_fun (hval
, output
, inputs
, ninputs
, parents
, visited
, fp
,
1989 type
, type_id
, depth
, arg
);
1991 /* This macro is really ugly, but the alternative is repeating this code many
1992 times, which is worse. */
1994 #define CTF_TYPE_WALK(type, errlabel, errmsg) \
1997 const char *hashval; \
1998 int cited_type_input_num = input_num; \
2000 if ((fp->ctf_flags & LCTF_CHILD) && (LCTF_TYPE_ISPARENT (fp, type))) \
2001 cited_type_input_num = parents[input_num]; \
2003 type_id = CTF_DEDUP_GID (output, cited_type_input_num, type); \
2007 ctf_dprintf ("Walking: unimplemented type\n"); \
2011 ctf_dprintf ("Looking up ID %i/%lx in type hashes\n", \
2012 cited_type_input_num, type); \
2013 hashval = ctf_dynhash_lookup (d->cd_type_hashes, type_id); \
2014 if (!ctf_assert (output, hashval)) \
2016 whaterr = "looking up ID in type hashes"; \
2019 ctf_dprintf ("ID %i/%lx has hash %s\n", cited_type_input_num, type, \
2022 ret = ctf_dedup_rwalk_output_mapping (output, inputs, ninputs, parents, \
2023 already_visited, hashval, \
2024 visit_fun, arg, depth); \
2032 switch (ctf_type_kind_unsliced (fp
, type
))
2035 /* Just skip things of unknown kind. */
2041 /* No types referenced. */
2045 case CTF_K_VOLATILE
:
2047 case CTF_K_RESTRICT
:
2050 CTF_TYPE_WALK (ctf_type_reference (fp
, type
), err
,
2051 "Referenced type walk");
2058 if (ctf_array_info (fp
, type
, &ar
) < 0)
2060 whaterr
= "array info lookup";
2064 CTF_TYPE_WALK (ar
.ctr_contents
, err
, "Array contents type walk");
2065 CTF_TYPE_WALK (ar
.ctr_index
, err
, "Array index type walk");
2069 case CTF_K_FUNCTION
:
2075 if (ctf_func_type_info (fp
, type
, &fi
) < 0)
2077 whaterr
= "func type info lookup";
2081 CTF_TYPE_WALK (fi
.ctc_return
, err
, "Func return type walk");
2083 if ((args
= calloc (fi
.ctc_argc
, sizeof (ctf_id_t
))) == NULL
)
2085 whaterr
= "memory allocation";
2089 if (ctf_func_type_args (fp
, type
, fi
.ctc_argc
, args
) < 0)
2091 whaterr
= "func arg type lookup";
2096 for (j
= 0; j
< fi
.ctc_argc
; j
++)
2097 CTF_TYPE_WALK (args
[j
], err_free_args
, "Func arg type walk");
2107 /* We do not recursively traverse the members of structures: they are
2108 emitted later, in a separate pass. */
2111 whaterr
= "unknown type kind";
2115 return visit_fun (hval
, output
, inputs
, ninputs
, parents
, visited
, fp
, type
,
2116 type_id
, depth
, arg
);
2119 ctf_set_errno (output
, ctf_errno (fp
));
2120 ctf_err_warn (fp
, 0, "%s during type walking in %s at ID %lx: %s", whaterr
,
2121 ctf_link_input_name (fp
), type
, ctf_errmsg (ctf_errno (fp
)));
2125 /* Recursively traverse the output mapping, and do something with each type
2126 visited, from leaves to root. VISIT_FUN, called as recursion unwinds,
2127 returns a negative error code or zero. Type hashes may be visited more than
2128 once, but are not recursed through repeatedly: ALREADY_VISITED tracks whether
2129 types have already been visited. */
2131 ctf_dedup_rwalk_output_mapping (ctf_file_t
*output
, ctf_file_t
**inputs
,
2132 uint32_t ninputs
, uint32_t *parents
,
2133 ctf_dynset_t
*already_visited
,
2135 int (*visit_fun
) (const char *hval
,
2137 ctf_file_t
**inputs
,
2140 int already_visited
,
2146 void *arg
, unsigned long depth
)
2148 ctf_dedup_t
*d
= &output
->ctf_dedup
;
2149 ctf_next_t
*i
= NULL
;
2152 ctf_dynset_t
*type_ids
;
2157 type_ids
= ctf_dynhash_lookup (d
->cd_output_mapping
, hval
);
2160 ctf_err_warn (output
, 0, "looked up type kind by nonexistent "
2162 return ctf_set_errno (output
, ECTF_INTERNAL
);
2165 /* Have we seen this type before? */
2167 if (!ctf_dynset_exists (already_visited
, hval
, NULL
))
2169 /* Mark as already-visited immediately, to eliminate the possibility of
2170 cycles: but remember we have not actually visited it yet for the
2171 upcoming call to the visit_fun. (All our callers handle cycles
2172 properly themselves, so we can just abort them aggressively as soon as
2173 we find ourselves in one.) */
2176 if (ctf_dynset_cinsert (already_visited
, hval
) < 0)
2178 ctf_err_warn (output
, 0, "out of memory tracking already-visited "
2180 return ctf_set_errno (output
, ENOMEM
);
2184 /* If this type is marked conflicted, traverse members and call
2185 ctf_dedup_rwalk_output_mapping_once on all the unique ones: otherwise, just
2186 pick a random one and use it. */
2188 if (!ctf_dynset_exists (d
->cd_conflicting_types
, hval
, NULL
))
2190 id
= ctf_dynset_lookup_any (type_ids
);
2191 if (!ctf_assert (output
, id
))
2194 return ctf_dedup_rwalk_one_output_mapping (output
, inputs
, ninputs
,
2195 parents
, already_visited
,
2196 visited
, id
, hval
, visit_fun
,
2200 while ((err
= ctf_dynset_next (type_ids
, &i
, &id
)) == 0)
2204 ret
= ctf_dedup_rwalk_one_output_mapping (output
, inputs
, ninputs
,
2205 parents
, already_visited
,
2207 visit_fun
, arg
, depth
);
2210 ctf_next_destroy (i
);
2211 return ret
; /* errno is set for us. */
2214 if (err
!= ECTF_NEXT_END
)
2216 ctf_err_warn (output
, 0, "walking many types with one hash: %s",
2218 return ctf_set_errno (output
, err
);
2224 typedef struct ctf_sort_om_cb_arg
2226 ctf_file_t
**inputs
;
2229 } ctf_sort_om_cb_arg_t
;
2231 /* Sort the output mapping into order: types first appearing in earlier inputs
2232 first, parents preceding children: if types first appear in the same input,
2233 sort those with earlier ctf_id_t's first. */
2235 sort_output_mapping (const ctf_next_hkv_t
*one
, const ctf_next_hkv_t
*two
,
2238 ctf_sort_om_cb_arg_t
*arg
= (ctf_sort_om_cb_arg_t
*) arg_
;
2239 ctf_dedup_t
*d
= arg
->d
;
2240 const char *one_hval
= (const char *) one
->hkv_key
;
2241 const char *two_hval
= (const char *) two
->hkv_key
;
2242 void *one_gid
, *two_gid
;
2243 uint32_t one_ninput
;
2244 uint32_t two_ninput
;
2250 one_gid
= ctf_dynhash_lookup (d
->cd_output_first_gid
, one_hval
);
2251 two_gid
= ctf_dynhash_lookup (d
->cd_output_first_gid
, two_hval
);
2253 one_ninput
= CTF_DEDUP_GID_TO_INPUT (one_gid
);
2254 two_ninput
= CTF_DEDUP_GID_TO_INPUT (two_gid
);
2256 one_type
= CTF_DEDUP_GID_TO_TYPE (one_gid
);
2257 two_type
= CTF_DEDUP_GID_TO_TYPE (two_gid
);
2259 /* It's kind of hard to smuggle an assertion failure out of here. */
2260 assert (one_ninput
< arg
->ninputs
&& two_ninput
< arg
->ninputs
);
2262 one_fp
= arg
->inputs
[one_ninput
];
2263 two_fp
= arg
->inputs
[two_ninput
];
2265 /* Parents before children. */
2267 if (!(one_fp
->ctf_flags
& LCTF_CHILD
)
2268 && (two_fp
->ctf_flags
& LCTF_CHILD
))
2270 else if ((one_fp
->ctf_flags
& LCTF_CHILD
)
2271 && !(two_fp
->ctf_flags
& LCTF_CHILD
))
2274 /* ninput order, types appearing in earlier TUs first. */
2276 if (one_ninput
< two_ninput
)
2278 else if (two_ninput
< one_ninput
)
2281 /* Same TU. Earliest ctf_id_t first. They cannot be the same. */
2283 assert (one_type
!= two_type
);
2284 if (one_type
< two_type
)
2290 /* The public entry point to ctf_dedup_rwalk_output_mapping, above. */
2292 ctf_dedup_walk_output_mapping (ctf_file_t
*output
, ctf_file_t
**inputs
,
2293 uint32_t ninputs
, uint32_t *parents
,
2294 int (*visit_fun
) (const char *hval
,
2296 ctf_file_t
**inputs
,
2299 int already_visited
,
2307 ctf_dynset_t
*already_visited
;
2308 ctf_next_t
*i
= NULL
;
2309 ctf_sort_om_cb_arg_t sort_arg
;
2313 if ((already_visited
= ctf_dynset_create (htab_hash_string
,
2314 ctf_dynset_eq_string
,
2316 return ctf_set_errno (output
, ENOMEM
);
2318 sort_arg
.inputs
= inputs
;
2319 sort_arg
.ninputs
= ninputs
;
2320 sort_arg
.d
= &output
->ctf_dedup
;
2322 while ((err
= ctf_dynhash_next_sorted (output
->ctf_dedup
.cd_output_mapping
,
2323 &i
, &k
, NULL
, sort_output_mapping
,
2326 const char *hval
= (const char *) k
;
2328 err
= ctf_dedup_rwalk_output_mapping (output
, inputs
, ninputs
, parents
,
2329 already_visited
, hval
, visit_fun
,
2333 ctf_next_destroy (i
);
2334 goto err
; /* errno is set for us. */
2337 if (err
!= ECTF_NEXT_END
)
2339 ctf_err_warn (output
, 0, "recursing over output mapping: %s",
2341 ctf_set_errno (output
, err
);
2344 ctf_dynset_destroy (already_visited
);
2348 ctf_dynset_destroy (already_visited
);
2352 /* Possibly synthesise a synthetic forward in TARGET to subsitute for a
2353 conflicted per-TU type ID in INPUT with hash HVAL. Return its CTF ID, or 0
2354 if none was needed. */
2356 ctf_dedup_maybe_synthesize_forward (ctf_file_t
*output
, ctf_file_t
*target
,
2357 ctf_file_t
*input
, ctf_id_t id
,
2360 ctf_dedup_t
*od
= &output
->ctf_dedup
;
2361 ctf_dedup_t
*td
= &target
->ctf_dedup
;
2365 const char *decorated
;
2367 ctf_id_t emitted_forward
;
2369 if (!ctf_dynset_exists (od
->cd_conflicting_types
, hval
, NULL
)
2370 || target
->ctf_flags
& LCTF_CHILD
2371 || !ctf_type_name_raw (input
, id
)
2372 || (((kind
= ctf_type_kind_unsliced (input
, id
)) != CTF_K_STRUCT
2373 && kind
!= CTF_K_UNION
&& kind
!= CTF_K_FORWARD
)))
2376 fwdkind
= ctf_type_kind_forwarded (input
, id
);
2377 name
= ctf_type_name_raw (input
, id
);
2379 ctf_dprintf ("Using synthetic forward for conflicted struct/union with "
2382 if (!ctf_assert (output
, name
))
2385 if ((decorated
= ctf_decorate_type_name (output
, name
, fwdkind
)) == NULL
)
2388 if (!ctf_dynhash_lookup_kv (td
->cd_output_emission_conflicted_forwards
,
2389 decorated
, NULL
, &v
))
2391 if ((emitted_forward
= ctf_add_forward (target
, CTF_ADD_ROOT
, name
,
2392 fwdkind
)) == CTF_ERR
)
2394 ctf_set_errno (output
, ctf_errno (target
));
2398 if (ctf_dynhash_cinsert (td
->cd_output_emission_conflicted_forwards
,
2399 decorated
, (void *) (uintptr_t)
2400 emitted_forward
) < 0)
2402 ctf_set_errno (output
, ENOMEM
);
2407 emitted_forward
= (ctf_id_t
) (uintptr_t) v
;
2409 ctf_dprintf ("Cross-TU conflicted struct: passing back forward, %lx\n",
2412 return emitted_forward
;
2415 /* Map a GID in some INPUT dict, in the form of an input number and a ctf_id_t,
2416 into a GID in a target output dict. If it returns 0, this is the
2417 unimplemented type, and the input type must have been 0. The OUTPUT dict is
2418 assumed to be the parent of the TARGET, if it is not the TARGET itself.
2420 Returns CTF_ERR on failure. Responds to an incoming CTF_ERR as an 'id' by
2421 returning CTF_ERR, to simplify callers. Errors are always propagated to the
2422 input, even if they relate to the target, for the same reason. (Target
2423 errors are expected to be very rare.)
2425 If the type in question is a citation of a conflicted type in a different TU,
2426 emit a forward of the right type in its place (if not already emitted), and
2427 record that forward in cd_output_emission_conflicted_forwards. This avoids
2428 the need to replicate the entire type graph below this point in the current
2429 TU (an appalling waste of space).
2431 TODO: maybe replace forwards in the same TU with their referents? Might
2432 make usability a bit better. */
2435 ctf_dedup_id_to_target (ctf_file_t
*output
, ctf_file_t
*target
,
2436 ctf_file_t
**inputs
, uint32_t ninputs
,
2437 uint32_t *parents
, ctf_file_t
*input
, int input_num
,
2440 ctf_dedup_t
*od
= &output
->ctf_dedup
;
2441 ctf_dedup_t
*td
= &target
->ctf_dedup
;
2442 ctf_file_t
*err_fp
= input
;
2445 ctf_id_t emitted_forward
;
2447 /* The target type of an error is an error. */
2451 /* The unimplemented type's ID never changes. */
2454 ctf_dprintf ("%i/%lx: unimplemented type\n", input_num
, id
);
2458 ctf_dprintf ("Mapping %i/%lx to target %p (%s)\n", input_num
,
2459 id
, (void *) target
, ctf_link_input_name (target
));
2461 /* If the input type is in the parent type space, and this is a child, reset
2462 the input to the parent (which must already have been emitted, since
2463 emission of parent dicts happens before children). */
2464 if ((input
->ctf_flags
& LCTF_CHILD
) && (LCTF_TYPE_ISPARENT (input
, id
)))
2466 if (!ctf_assert (output
, parents
[input_num
] <= ninputs
))
2468 input
= inputs
[parents
[input_num
]];
2469 input_num
= parents
[input_num
];
2472 hval
= ctf_dynhash_lookup (od
->cd_type_hashes
,
2473 CTF_DEDUP_GID (output
, input_num
, id
));
2475 if (!ctf_assert (output
, hval
&& td
->cd_output_emission_hashes
))
2478 /* If this type is a conflicted tagged structure, union, or forward,
2479 substitute a synthetic forward instead, emitting it if need be. Only do
2480 this if the target is in the parent dict: if it's in the child dict, we can
2481 just point straight at the thing itself. Of course, we might be looking in
2482 the child dict right now and not find it and have to look in the parent, so
2483 we have to do this check twice. */
2485 emitted_forward
= ctf_dedup_maybe_synthesize_forward (output
, target
,
2487 switch (emitted_forward
)
2489 case 0: /* No forward needed. */
2492 ctf_set_errno (err_fp
, ctf_errno (output
));
2493 ctf_err_warn (err_fp
, 0, "adding synthetic forward for type %i/%lx: "
2494 "%s", input_num
, id
, ctf_errmsg (ctf_errno (err_fp
)));
2497 return emitted_forward
;
2500 ctf_dprintf ("Looking up %i/%lx, hash %s, in target\n", input_num
, id
, hval
);
2502 target_id
= ctf_dynhash_lookup (td
->cd_output_emission_hashes
, hval
);
2505 /* Must be in the parent, so this must be a child, and they must not be
2507 ctf_dprintf ("Checking shared parent for target\n");
2508 if (!ctf_assert (output
, (target
!= output
)
2509 && (target
->ctf_flags
& LCTF_CHILD
)))
2512 target_id
= ctf_dynhash_lookup (od
->cd_output_emission_hashes
, hval
);
2514 emitted_forward
= ctf_dedup_maybe_synthesize_forward (output
, output
,
2516 switch (emitted_forward
)
2518 case 0: /* No forward needed. */
2521 ctf_set_errno (err_fp
, ctf_errno (output
));
2522 ctf_err_warn (err_fp
, 0, "adding synthetic forward for type %i/%lx: "
2523 "%s", input_num
, id
, ctf_errmsg (ctf_errno (err_fp
)));
2526 return emitted_forward
;
2529 if (!ctf_assert (output
, target_id
))
2531 return (ctf_id_t
) (uintptr_t) target_id
;
2534 /* Emit a single deduplicated TYPE with the given HVAL, located in a given
2535 INPUT, with the given (G)ID, into the shared OUTPUT or a
2536 possibly-newly-created per-CU dict. All the types this type depends upon
2537 have already been emitted. (This type itself may also have been emitted.)
2539 If the ARG is 1, this is a CU-mapped deduplication round mapping many
2540 ctf_file_t's into precisely one: conflicting types should be marked
2541 non-root-visible. If the ARG is 0, conflicting types go into per-CU
2542 dictionaries stored in the input's ctf_dedup.cd_output: otherwise, everything
2543 is emitted directly into the output. No struct/union members are emitted.
2545 Optimization opportunity: trace the ancestry of non-root-visible types and
2546 elide all that neither have a root-visible type somewhere towards their root,
2547 nor have the type visible via any other route (the function info section,
2548 data object section, backtrace section etc). */
2551 ctf_dedup_emit_type (const char *hval
, ctf_file_t
*output
, ctf_file_t
**inputs
,
2552 uint32_t ninputs
, uint32_t *parents
, int already_visited
,
2553 ctf_file_t
*input
, ctf_id_t type
, void *id
, int depth
,
2556 ctf_dedup_t
*d
= &output
->ctf_dedup
;
2557 int kind
= ctf_type_kind_unsliced (input
, type
);
2559 ctf_file_t
*target
= output
;
2560 ctf_file_t
*real_input
;
2561 const ctf_type_t
*tp
;
2562 int input_num
= CTF_DEDUP_GID_TO_INPUT (id
);
2563 int output_num
= (uint32_t) -1; /* 'shared' */
2564 int cu_mapped
= *(int *)arg
;
2568 ctf_next_t
*i
= NULL
;
2571 ctf_id_t maybe_dup
= 0;
2574 int emission_hashed
= 0;
2576 /* We don't want to re-emit something we've already emitted. */
2578 if (already_visited
)
2581 ctf_dprintf ("%i: Emitting type with hash %s from %s: determining target\n",
2582 depth
, hval
, ctf_link_input_name (input
));
2584 /* Conflicting types go into a per-CU output dictionary, unless this is a
2585 CU-mapped run. The import is not refcounted, since it goes into the
2586 ctf_link_outputs dict of the output that is its parent. */
2587 is_conflicting
= ctf_dynset_exists (d
->cd_conflicting_types
, hval
, NULL
);
2589 if (is_conflicting
&& !cu_mapped
)
2591 ctf_dprintf ("%i: Type %s in %i/%lx is conflicted: "
2592 "inserting into per-CU target.\n",
2593 depth
, hval
, input_num
, type
);
2595 if (input
->ctf_dedup
.cd_output
)
2596 target
= input
->ctf_dedup
.cd_output
;
2601 if ((target
= ctf_create (&err
)) == NULL
)
2603 ctf_err_warn (output
, 0, "cannot create per-CU CTF archive "
2604 "for CU %s: %s", ctf_link_input_name (input
),
2605 ctf_errmsg (err
)); ctf_set_errno (output
, err
);
2609 ctf_import_unref (target
, output
);
2610 if (ctf_cuname (input
) != NULL
)
2611 ctf_cuname_set (target
, ctf_cuname (input
));
2613 ctf_cuname_set (target
, "unnamed-CU");
2614 ctf_parent_name_set (target
, _CTF_SECTION
);
2616 input
->ctf_dedup
.cd_output
= target
;
2618 output_num
= input_num
;
2622 if ((tp
= ctf_lookup_by_id (&real_input
, type
)) == NULL
)
2624 ctf_err_warn (output
, 0, "%s: lookup failure for type %lx: %s",
2625 ctf_link_input_name (real_input
), type
,
2626 ctf_errmsg (ctf_errno (input
)));
2627 ctf_set_errno (output
, ctf_errno (input
));
2628 return -1; /* errno is set for us. */
2631 name
= ctf_strraw (real_input
, tp
->ctt_name
);
2633 /* Hide conflicting types, if we were asked to: also hide if a type with this
2634 name already exists and is not a forward. */
2635 if (cu_mapped
&& is_conflicting
)
2638 && (maybe_dup
= ctf_lookup_by_rawname (target
, kind
, name
)) != 0)
2640 if (ctf_type_kind (target
, maybe_dup
) != CTF_K_FORWARD
)
2644 ctf_dprintf ("%i: Emitting type with hash %s (%s), into target %i/%p\n",
2645 depth
, hval
, name
? name
: "", input_num
, (void *) target
);
2647 if (!target
->ctf_dedup
.cd_output_emission_hashes
)
2648 if ((target
->ctf_dedup
.cd_output_emission_hashes
2649 = ctf_dynhash_create (ctf_hash_string
, ctf_hash_eq_string
,
2650 NULL
, NULL
)) == NULL
)
2653 if (!target
->ctf_dedup
.cd_output_emission_conflicted_forwards
)
2654 if ((target
->ctf_dedup
.cd_output_emission_conflicted_forwards
2655 = ctf_dynhash_create (ctf_hash_string
, ctf_hash_eq_string
,
2656 NULL
, NULL
)) == NULL
)
2662 /* These are types that CTF cannot encode, marked as such by the compile.
2663 We intentionally do not re-emit these. */
2667 /* This will do nothing if the type to which this forwards already exists,
2668 and will be replaced with such a type if it appears later. */
2671 if ((new_type
= ctf_add_forward (target
, isroot
, name
,
2672 ctf_type_kind_forwarded (input
, type
)))
2680 if (ctf_type_encoding (input
, type
, &ep
) < 0)
2681 goto err_input
; /* errno is set for us. */
2682 if ((new_type
= ctf_add_encoded (target
, isroot
, name
, &ep
, kind
))
2691 if ((new_type
= ctf_add_enum (target
, isroot
, name
)) == CTF_ERR
)
2692 goto err_input
; /* errno is set for us. */
2694 while ((name
= ctf_enum_next (input
, type
, &i
, &val
)) != NULL
)
2696 if (ctf_add_enumerator (target
, new_type
, name
, val
) < 0)
2698 ctf_err_warn (target
, 0, "%s (%i): cannot add enumeration "
2699 "value %s from input type %lx: %s",
2700 ctf_link_input_name (input
), input_num
, name
,
2701 type
, ctf_errmsg (ctf_errno (target
)));
2702 ctf_next_destroy (i
);
2703 return ctf_set_errno (output
, ctf_errno (target
));
2706 if (ctf_errno (input
) != ECTF_NEXT_END
)
2714 ref
= ctf_type_reference (input
, type
);
2715 if ((ref
= ctf_dedup_id_to_target (output
, target
, inputs
, ninputs
,
2716 parents
, input
, input_num
,
2718 goto err_input
; /* errno is set for us. */
2720 if ((new_type
= ctf_add_typedef (target
, isroot
, name
, ref
)) == CTF_ERR
)
2721 goto err_target
; /* errno is set for us. */
2724 case CTF_K_VOLATILE
:
2726 case CTF_K_RESTRICT
:
2728 erm
= "pointer or cvr-qual";
2730 ref
= ctf_type_reference (input
, type
);
2731 if ((ref
= ctf_dedup_id_to_target (output
, target
, inputs
, ninputs
,
2732 parents
, input
, input_num
,
2734 goto err_input
; /* errno is set for us. */
2736 if ((new_type
= ctf_add_reftype (target
, isroot
, ref
, kind
)) == CTF_ERR
)
2737 goto err_target
; /* errno is set for us. */
2743 if (ctf_type_encoding (input
, type
, &ep
) < 0)
2744 goto err_input
; /* errno is set for us. */
2746 ref
= ctf_type_reference (input
, type
);
2747 if ((ref
= ctf_dedup_id_to_target (output
, target
, inputs
, ninputs
,
2748 parents
, input
, input_num
,
2752 if ((new_type
= ctf_add_slice (target
, isroot
, ref
, &ep
)) == CTF_ERR
)
2761 if (ctf_array_info (input
, type
, &ar
) < 0)
2764 ar
.ctr_contents
= ctf_dedup_id_to_target (output
, target
, inputs
,
2765 ninputs
, parents
, input
,
2766 input_num
, ar
.ctr_contents
);
2767 ar
.ctr_index
= ctf_dedup_id_to_target (output
, target
, inputs
, ninputs
,
2768 parents
, input
, input_num
,
2771 if (ar
.ctr_contents
== CTF_ERR
|| ar
.ctr_index
== CTF_ERR
)
2774 if ((new_type
= ctf_add_array (target
, isroot
, &ar
)) == CTF_ERR
)
2780 case CTF_K_FUNCTION
:
2787 if (ctf_func_type_info (input
, type
, &fi
) < 0)
2790 fi
.ctc_return
= ctf_dedup_id_to_target (output
, target
, inputs
, ninputs
,
2791 parents
, input
, input_num
,
2793 if (fi
.ctc_return
== CTF_ERR
)
2796 if ((args
= calloc (fi
.ctc_argc
, sizeof (ctf_id_t
))) == NULL
)
2798 ctf_set_errno (input
, ENOMEM
);
2802 erm
= "function args";
2803 if (ctf_func_type_args (input
, type
, fi
.ctc_argc
, args
) < 0)
2809 for (j
= 0; j
< fi
.ctc_argc
; j
++)
2811 args
[j
] = ctf_dedup_id_to_target (output
, target
, inputs
, ninputs
,
2812 parents
, input
, input_num
,
2814 if (args
[j
] == CTF_ERR
)
2818 if ((new_type
= ctf_add_function (target
, isroot
,
2819 &fi
, args
)) == CTF_ERR
)
2831 size_t size
= ctf_type_size (input
, type
);
2833 /* Insert the structure itself, so other types can refer to it. */
2835 erm
= "structure/union";
2836 if (kind
== CTF_K_STRUCT
)
2837 new_type
= ctf_add_struct_sized (target
, isroot
, name
, size
);
2839 new_type
= ctf_add_union_sized (target
, isroot
, name
, size
);
2841 if (new_type
== CTF_ERR
)
2844 out_id
= CTF_DEDUP_GID (output
, output_num
, new_type
);
2845 ctf_dprintf ("%i: Noting need to emit members of %p -> %p\n", depth
,
2847 /* Record the need to emit the members of this structure later. */
2848 if (ctf_dynhash_insert (d
->cd_emission_struct_members
, id
, out_id
) < 0)
2853 ctf_err_warn (output
, 0, "%s: unknown type kind for input type %lx",
2854 ctf_link_input_name (input
), type
);
2858 if (!emission_hashed
2860 && ctf_dynhash_cinsert (target
->ctf_dedup
.cd_output_emission_hashes
,
2861 hval
, (void *) (uintptr_t) new_type
) < 0)
2863 ctf_err_warn (output
, 0, "out of memory tracking deduplicated "
2865 return ctf_set_errno (output
, ENOMEM
);
2868 if (!emission_hashed
&& new_type
!= 0)
2869 ctf_dprintf ("%i: Inserted %s, %i/%lx -> %lx into emission hash for "
2870 "target %p (%s)\n", depth
, hval
, input_num
, type
, new_type
,
2871 (void *) target
, ctf_link_input_name (target
));
2876 ctf_err_warn (output
, 0, "out of memory creating emission-tracking hashes");
2877 return ctf_set_errno (output
, ENOMEM
);
2880 ctf_err_warn (output
, 0, "%s (%i): while emitting deduplicated %s, error "
2881 "getting input type %lx: %s", ctf_link_input_name (input
),
2882 input_num
,erm
, type
, ctf_errmsg (ctf_errno (input
)));
2883 ctf_set_errno (output
, ctf_errno (input
));
2886 ctf_err_warn (output
, 0, "%s (%i): while emitting deduplicated %s, error "
2887 "emitting target type from input type %lx: %s",
2888 ctf_link_input_name (input
), input_num
, erm
, type
,
2889 ctf_errmsg (ctf_errno (target
)));
2890 ctf_set_errno (output
, ctf_errno (target
));
2894 /* Traverse the cd_emission_struct_members and emit the members of all
2895 structures and unions. All other types are emitted and complete by this
2899 ctf_dedup_emit_struct_members (ctf_file_t
*output
, ctf_file_t
**inputs
,
2900 uint32_t ninputs
, uint32_t *parents
)
2902 ctf_dedup_t
*d
= &output
->ctf_dedup
;
2903 ctf_next_t
*i
= NULL
;
2904 void *input_id
, *target_id
;
2906 ctf_file_t
*err_fp
, *input_fp
;
2910 while ((err
= ctf_dynhash_next (d
->cd_emission_struct_members
, &i
,
2911 &input_id
, &target_id
)) == 0)
2913 ctf_next_t
*j
= NULL
;
2915 uint32_t target_num
;
2916 ctf_id_t input_type
, target_type
;
2921 input_num
= CTF_DEDUP_GID_TO_INPUT (input_id
);
2922 input_fp
= inputs
[input_num
];
2923 input_type
= CTF_DEDUP_GID_TO_TYPE (input_id
);
2925 /* The output is either -1 (for the shared, parent output dict) or the
2926 number of the corresponding input. */
2927 target_num
= CTF_DEDUP_GID_TO_INPUT (target_id
);
2928 if (target_num
== (uint32_t) -1)
2932 target
= inputs
[target_num
]->ctf_dedup
.cd_output
;
2933 if (!ctf_assert (output
, target
))
2936 err_type
= input_type
;
2940 target_type
= CTF_DEDUP_GID_TO_TYPE (target_id
);
2942 while ((offset
= ctf_member_next (input_fp
, input_type
, &j
, &name
,
2946 err_type
= target_type
;
2947 if ((membtype
= ctf_dedup_id_to_target (output
, target
, inputs
,
2948 ninputs
, parents
, input_fp
,
2950 membtype
)) == CTF_ERR
)
2952 ctf_next_destroy (j
);
2958 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
2959 ctf_dprintf ("Emitting %s, offset %zi\n", name
, offset
);
2961 if (ctf_add_member_offset (target
, target_type
, name
,
2962 membtype
, offset
) < 0)
2964 ctf_next_destroy (j
);
2968 if (ctf_errno (input_fp
) != ECTF_NEXT_END
)
2970 err
= ctf_errno (input_fp
);
2971 ctf_next_destroy (i
);
2975 if (err
!= ECTF_NEXT_END
)
2980 ctf_next_destroy (i
);
2981 ctf_err_warn (output
, 0, "%s (%i): error emitting members for structure "
2982 "type %lx: %s", ctf_link_input_name (input_fp
), input_num
,
2983 err_type
, ctf_errmsg (ctf_errno (err_fp
)));
2984 ctf_set_errno (output
, ctf_errno (err_fp
));
2987 ctf_err_warn (output
, 0, "iteration failure emitting structure members: %s",
2989 ctf_set_errno (output
, err
);
2993 /* Populate the type mapping used by the types in one FP (which must be an input
2994 dict containing a non-null cd_output resulting from a ctf_dedup_emit_type
2997 ctf_dedup_populate_type_mapping (ctf_file_t
*shared
, ctf_file_t
*fp
,
2998 ctf_file_t
**inputs
)
3000 ctf_dedup_t
*d
= &shared
->ctf_dedup
;
3001 ctf_file_t
*output
= fp
->ctf_dedup
.cd_output
;
3003 ctf_next_t
*i
= NULL
;
3006 /* The shared dict (the output) stores its types in the fp itself, not in a
3007 separate cd_output dict. */
3011 /* There may be no types to emit at all, or all the types in this TU may be
3013 if (!output
|| !output
->ctf_dedup
.cd_output_emission_hashes
)
3016 while ((err
= ctf_dynhash_cnext (output
->ctf_dedup
.cd_output_emission_hashes
,
3019 const char *hval
= (const char *) k
;
3020 ctf_id_t id_out
= (ctf_id_t
) (uintptr_t) v
;
3021 ctf_next_t
*j
= NULL
;
3022 ctf_dynset_t
*type_ids
;
3025 type_ids
= ctf_dynhash_lookup (d
->cd_output_mapping
, hval
);
3026 if (!ctf_assert (shared
, type_ids
))
3028 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
3029 ctf_dprintf ("Traversing emission hash: hval %s\n", hval
);
3032 while ((err
= ctf_dynset_cnext (type_ids
, &j
, &id
)) == 0)
3034 ctf_file_t
*input
= inputs
[CTF_DEDUP_GID_TO_INPUT (id
)];
3035 ctf_id_t id_in
= CTF_DEDUP_GID_TO_TYPE (id
);
3037 #ifdef ENABLE_LIBCTF_HASH_DEBUGGING
3038 ctf_dprintf ("Adding mapping from %i/%lx to %lx\n",
3039 CTF_DEDUP_GID_TO_INPUT (id
), id_in
, id_out
);
3041 ctf_add_type_mapping (input
, id_in
, output
, id_out
);
3043 if (err
!= ECTF_NEXT_END
)
3045 ctf_next_destroy (i
);
3049 if (err
!= ECTF_NEXT_END
)
3055 ctf_err_warn (shared
, 0, "iteration error populating the type mapping: %s",
3057 return ctf_set_errno (shared
, err
);
3060 /* Populate the type mapping machinery used by the rest of the linker,
3061 by ctf_add_type, etc. */
3063 ctf_dedup_populate_type_mappings (ctf_file_t
*output
, ctf_file_t
**inputs
,
3068 if (ctf_dedup_populate_type_mapping (output
, output
, inputs
) < 0)
3070 ctf_err_warn (output
, 0, "cannot populate type mappings for shared "
3071 "CTF dict: %s", ctf_errmsg (ctf_errno (output
)));
3072 return -1; /* errno is set for us. */
3075 for (i
= 0; i
< ninputs
; i
++)
3077 if (ctf_dedup_populate_type_mapping (output
, inputs
[i
], inputs
) < 0)
3079 ctf_err_warn (output
, 0, "cannot populate type mappings for per-CU "
3080 "CTF dict: %s", ctf_errmsg (ctf_errno (inputs
[i
])));
3081 return ctf_set_errno (output
, ctf_errno (inputs
[i
]));
3088 /* Emit deduplicated types into the outputs. The shared type repository is
3089 OUTPUT, on which the ctf_dedup function must have already been called. The
3090 PARENTS array contains the INPUTS index of the parent dict for every child
3091 dict at the corresponding index in the INPUTS (for non-child dicts, the value
3094 Return an array of fps with content emitted into them (starting with OUTPUT,
3095 which is the parent of all others, then all the newly-generated outputs).
3097 If CU_MAPPED is set, this is a first pass for a link with a non-empty CU
3098 mapping: only one output will result. */
3101 ctf_dedup_emit (ctf_file_t
*output
, ctf_file_t
**inputs
, uint32_t ninputs
,
3102 uint32_t *parents
, uint32_t *noutputs
, int cu_mapped
)
3104 size_t num_outputs
= 1; /* Always at least one output: us. */
3105 ctf_file_t
**outputs
;
3109 ctf_dprintf ("Triggering emission.\n");
3110 if (ctf_dedup_walk_output_mapping (output
, inputs
, ninputs
, parents
,
3111 ctf_dedup_emit_type
, &cu_mapped
) < 0)
3112 return NULL
; /* errno is set for us. */
3114 ctf_dprintf ("Populating struct members.\n");
3115 if (ctf_dedup_emit_struct_members (output
, inputs
, ninputs
, parents
) < 0)
3116 return NULL
; /* errno is set for us. */
3118 if (ctf_dedup_populate_type_mappings (output
, inputs
, ninputs
) < 0)
3119 return NULL
; /* errno is set for us. */
3121 for (i
= 0; i
< ninputs
; i
++)
3123 if (inputs
[i
]->ctf_dedup
.cd_output
)
3127 if (!ctf_assert (output
, !cu_mapped
|| (cu_mapped
&& num_outputs
== 1)))
3130 if ((outputs
= calloc (num_outputs
, sizeof (ctf_file_t
*))) == NULL
)
3132 ctf_err_warn (output
, 0, "out of memory allocating link outputs array");
3133 ctf_set_errno (output
, ENOMEM
);
3136 *noutputs
= num_outputs
;
3140 output
->ctf_refcnt
++;
3143 for (i
= 0; i
< ninputs
; i
++)
3145 if (inputs
[i
]->ctf_dedup
.cd_output
)
3147 *walk
= inputs
[i
]->ctf_dedup
.cd_output
;
3148 inputs
[i
]->ctf_dedup
.cd_output
= NULL
;
3153 ctf_dedup_fini (output
, outputs
, num_outputs
);