Commit | Line | Data |
---|---|---|
0f0c11f7 NA |
1 | /* CTF type deduplication. |
2 | Copyright (C) 2019 Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of libctf. | |
5 | ||
6 | libctf is free software; you can redistribute it and/or modify it under | |
7 | the terms of the GNU General Public License as published by the Free | |
8 | Software Foundation; either version 3, or (at your option) any later | |
9 | version. | |
10 | ||
11 | This program is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
14 | See the GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with this program; see the file COPYING. If not see | |
18 | <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #include <ctf-impl.h> | |
21 | #include <string.h> | |
22 | #include <errno.h> | |
23 | #include <assert.h> | |
24 | #include "hashtab.h" | |
25 | ||
26 | /* (In the below, relevant functions are named in square brackets.) */ | |
27 | ||
28 | /* Type deduplication is a three-phase process: | |
29 | ||
30 | [ctf_dedup, ctf_dedup_hash_type, ctf_dedup_rhash_type] | |
31 | 1) come up with unambiguous hash values for all types: no two types may have | |
32 | the same hash value, and any given type should have only one hash value | |
33 | (for optimal deduplication). | |
34 | ||
35 | [ctf_dedup, ctf_dedup_detect_name_ambiguity, | |
36 | ctf_dedup_conflictify_unshared, ctf_dedup_mark_conflicting_hash] | |
37 | 2) mark those distinct types with names that collide (and thus cannot be | |
38 | declared simultaneously in the same translation unit) as conflicting, and | |
39 | recursively mark all types that cite one of those types as conflicting as | |
40 | well. Possibly mark all types cited in only one TU as conflicting, if | |
41 | the CTF_LINK_SHARE_DUPLICATED link mode is active. | |
42 | ||
43 | [ctf_dedup_emit, ctf_dedup_emit_struct_members, ctf_dedup_id_to_target] | |
44 | 3) emit all the types, one hash value at a time. Types not marked | |
45 | conflicting are emitted once, into the shared dictionary: types marked | |
46 | conflicting are emitted once per TU into a dictionary corresponding to | |
47 | each TU in which they appear. Structs marked conflicting get at the very | |
48 | least a forward emitted into the shared dict so that other dicts can cite | |
49 | it if needed. | |
50 | ||
51 | [id_to_packed_id] | |
52 | This all works over an array of inputs (usually in the same order as the | |
53 | inputs on the link line). We don't use the ctf_link_inputs hash directly | |
54 | because it is convenient to be able to address specific input types as a | |
55 | *global type ID* or 'GID', a pair of an array offset and a ctf_id_t. Since | |
56 | both are already 32 bits or less or can easily be constrained to that range, | |
57 | we can pack them both into a single 64-bit hash word for easy lookups, which | |
139633c3 | 58 | would be much more annoying to do with a ctf_dict_t * and a ctf_id_t. (On |
0f0c11f7 NA |
59 | 32-bit platforms, we must do that anyway, since pointers, and thus hash keys |
60 | and values, are only 32 bits wide). We track which inputs are parents of | |
61 | which other inputs so that we can correctly recognize that types we have | |
62 | traversed in children may cite types in parents, and so that we can process | |
63 | the parents first.) | |
64 | ||
65 | Note that thanks to ld -r, the deduplicator can be fed its own output, so the | |
66 | inputs may themselves have child dicts. Since we need to support this usage | |
67 | anyway, we can use it in one other place. If the caller finds translation | |
68 | units to be too small a unit ambiguous types, links can be 'cu-mapped', where | |
69 | the caller provides a mapping of input TU names to output child dict names. | |
70 | This mapping can fuse many child TUs into one potential child dict, so that | |
71 | ambiguous types in any of those input TUs go into the same child dict. | |
72 | When a many:1 cu-mapping is detected, the ctf_dedup machinery is called | |
73 | repeatedly, once for every output name that has more than one input, to fuse | |
74 | all the input TUs associated with a given output dict into one, and once again | |
75 | as normal to deduplicate all those intermediate outputs (and any 1:1 inputs) | |
76 | together. This has much higher memory usage than otherwise, because in the | |
77 | intermediate state, all the output TUs are in memory at once and cannot be | |
78 | lazily opened. It also has implications for the emission code: if types | |
79 | appear ambiguously in multiple input TUs that are all mapped to the same | |
80 | child dict, we cannot put them in children in the cu-mapping link phase | |
81 | because this output is meant to *become* a child in the next link stage and | |
82 | parent/child relationships are only one level deep: so instead, we just hide | |
83 | all but one of the ambiguous types. | |
84 | ||
85 | There are a few other subtleties here that make this more complex than it | |
86 | seems. Let's go over the steps above in more detail. | |
87 | ||
88 | 1) HASHING. | |
89 | ||
90 | [ctf_dedup_hash_type, ctf_dedup_rhash_type] | |
91 | Hashing proceeds recursively, mixing in the properties of each input type | |
92 | (including its name, if any), and then adding the hash values of every type | |
93 | cited by that type. The result is stashed in the cd_type_hashes so other | |
94 | phases can find the hash values of input types given their IDs, and so that | |
95 | if we encounter this type again while hashing we can just return its hash | |
96 | value: it is also stashed in the *output mapping*, a mapping from hash value | |
97 | to the set of GIDs corresponding to that type in all inputs. We also keep | |
98 | track of the GID of the first appearance of the type in any input (in | |
99 | cd_output_first_gid), and the GID of structs, unions, and forwards that only | |
100 | appear in one TU (in cd_struct_origin). See below for where these things are | |
101 | used. | |
102 | ||
103 | Everything in this phase is time-critical, because it is operating over | |
104 | non-deduplicated types and so may have hundreds or thousands of times the | |
105 | data volume to deal with than later phases. Trace output is hidden behind | |
106 | ENABLE_LIBCTF_HASH_DEBUGGING to prevent the sheer number of calls to | |
107 | ctf_dprintf from slowing things down (tenfold slowdowns are observed purely | |
108 | from the calls to ctf_dprintf(), even with debugging switched off), and keep | |
109 | down the volume of output (hundreds of gigabytes of debug output are not | |
110 | uncommon on larger links). | |
111 | ||
112 | We have to do *something* about potential cycles in the type graph. We'd | |
113 | like to avoid emitting forwards in the final output if possible, because | |
114 | forwards aren't much use: they have no members. We are mostly saved from | |
115 | needing to worry about this at emission time by ctf_add_struct*() | |
116 | automatically replacing newly-created forwards when the real struct/union | |
117 | comes along. So we only have to avoid getting stuck in cycles during the | |
118 | hashing phase, while also not confusing types that cite members that are | |
119 | structs with each other. It is easiest to solve this problem by noting two | |
120 | things: | |
121 | ||
122 | - all cycles in C depend on the presence of tagged structs/unions | |
123 | - all tagged structs/unions have a unique name they can be disambiguated by | |
124 | ||
125 | [ctf_dedup_is_stub] | |
126 | This means that we can break all cycles by ceasing to hash in cited types at | |
127 | every tagged struct/union and instead hashing in a stub consisting of the | |
128 | struct/union's *decorated name*, which is the name preceded by "s " or "u " | |
129 | depending on the namespace (cached in cd_decorated_names). Forwards are | |
130 | decorated identically (so a forward to "struct foo" would be represented as | |
131 | "s foo"): this means that a citation of a forward to a type and a citation of | |
132 | a concrete definition of a type with the same name ends up getting the same | |
133 | hash value. | |
134 | ||
135 | Of course, it is quite possible to have two TUs with structs with the same | |
136 | name and different definitions, but that's OK because when we scan for types | |
137 | with ambiguous names we will identify these and mark them conflicting. | |
138 | ||
139 | We populate one thing to help conflictedness marking. No unconflicted type | |
140 | may cite a conflicted one, but this means that conflictedness marking must | |
141 | walk from types to the types that cite them, which is the opposite of the | |
142 | usual order. We can make this easier to do by constructing a *citers* graph | |
143 | in cd_citers, which points from types to the types that cite them: because we | |
144 | emit forwards corresponding to every conflicted struct/union, we don't need | |
145 | to do this for citations of structs/unions by other types. This is very | |
146 | convenient for us, because that's the only type we don't traverse | |
147 | recursively: so we can construct the citers graph at the same time as we | |
148 | hash, rather than needing to add an extra pass. (This graph is a dynhash of | |
149 | *type hash values*, so it's small: in effect it is automatically | |
150 | deduplicated.) | |
151 | ||
152 | 2) COLLISIONAL MARKING. | |
153 | ||
154 | [ctf_dedup_detect_name_ambiguity, ctf_dedup_mark_conflicting_hash] | |
155 | We identify types whose names collide during the hashing process, and count | |
156 | the rough number of uses of each name (caching may throw it off a bit: this | |
157 | doesn't need to be accurate). We then mark the less-frequently-cited types | |
158 | with each names conflicting: the most-frequently-cited one goes into the | |
159 | shared type dictionary, while all others are duplicated into per-TU | |
160 | dictionaries, named after the input TU, that have the shared dictionary as a | |
161 | parent. For structures and unions this is not quite good enough: we'd like | |
162 | to have citations of forwards to ambiguously named structures and unions | |
163 | *stay* as citations of forwards, so that the user can tell that the caller | |
164 | didn't actually know which structure definition was meant: but if we put one | |
165 | of those structures into the shared dictionary, it would supplant and replace | |
166 | the forward, leaving no sign. So structures and unions do not take part in | |
167 | this popularity contest: if their names are ambiguous, they are just | |
168 | duplicated, and only a forward appears in the shared dict. | |
169 | ||
170 | [ctf_dedup_propagate_conflictedness] | |
171 | The process of marking types conflicted is itself recursive: we recursively | |
172 | traverse the cd_citers graph populated in the hashing pass above and mark | |
173 | everything that we encounter conflicted (without wasting time re-marking | |
174 | anything that is already marked). This naturally terminates just where we | |
175 | want it to (at types that are cited by no other types, and at structures and | |
176 | unions) and suffices to ensure that types that cite conflicted types are | |
177 | always marked conflicted. | |
178 | ||
179 | [ctf_dedup_conflictify_unshared, ctf_dedup_multiple_input_dicts] | |
180 | When linking in CTF_LINK_SHARE_DUPLICATED mode, we would like all types that | |
181 | are used in only one TU to end up in a per-CU dict. The easiest way to do | |
182 | that is to mark them conflicted. ctf_dedup_conflictify_unshared does this, | |
183 | traversing the output mapping and using ctf_dedup_multiple_input_dicts to | |
184 | check the number of input dicts each distinct type hash value came from: | |
185 | types that only came from one get marked conflicted. One caveat here is that | |
186 | we need to consider both structs and forwards to them: a struct that appears | |
187 | in one TU and has a dozen citations to an opaque forward in other TUs should | |
188 | *not* be considered to be used in only one TU, because users would find it | |
189 | useful to be able to traverse into opaque structures of that sort: so we use | |
190 | cd_struct_origin to check both structs/unions and the forwards corresponding | |
191 | to them. | |
192 | ||
193 | 3) EMISSION. | |
194 | ||
195 | [ctf_dedup_walk_output_mapping, ctf_dedup_rwalk_output_mapping, | |
196 | ctf_dedup_rwalk_one_output_mapping] | |
197 | Emission involves another walk of the entire output mapping, this time | |
198 | traversing everything other than struct members, recursively. Types are | |
199 | emitted from leaves to trunk, emitting all types a type cites before emitting | |
200 | the type itself. We sort the output mapping before traversing it, for | |
201 | reproducibility and also correctness: the input dicts may have parent/child | |
202 | relationships, so we simply sort all types that first appear in parents | |
203 | before all children, then sort types that first appear in dicts appearing | |
204 | earlier on the linker command line before those that appear later, then sort | |
205 | by input ctf_id_t. (This is where we use cd_output_first_gid, collected | |
206 | above.) | |
207 | ||
208 | The walking is done using a recursive traverser which arranges to not revisit | |
209 | any type already visited and to call its callback once per input GID for | |
210 | input GIDs corresponding to conflicted output types. The traverser only | |
211 | finds input types and calls a callback for them as many times as the output | |
212 | needs to appear: it doesn't try to figure out anything about where the output | |
213 | might go. That's done by the callback based on whether the type is | |
214 | marked conflicted or not. | |
215 | ||
216 | [ctf_dedup_emit_type, ctf_dedup_id_to_target, ctf_dedup_synthesize_forward] | |
217 | ctf_dedup_emit_type is the (sole) callback for ctf_dedup_walk_output_mapping. | |
218 | Conflicted types have all necessary dictionaries created, and then we emit | |
219 | the type into each dictionary in turn, working over each input CTF type | |
220 | corresponding to each hash value and using ctf_dedup_id_to_target to map each | |
221 | input ctf_id_t into the corresponding type in the output (dealing with input | |
222 | ctf_id_t's with parents in the process by simply chasing to the parent dict | |
223 | if the type we're looking up is in there). Emitting structures involves | |
224 | simply noting that the members of this structure need emission later on: | |
225 | because you cannot cite a single structure member from another type, we avoid | |
226 | emitting the members at this stage to keep recursion depths down a bit. | |
227 | ||
228 | At this point, if we have by some mischance decided that two different types | |
229 | with child types that hash to different values have in fact got the same hash | |
230 | value themselves and *not* marked it conflicting, the type walk will walk | |
231 | only *one* of them and in all likelihood we'll find that we are trying to | |
232 | emit a type into some child dictionary that references a type that was never | |
233 | emitted into that dictionary and assertion-fail. This always indicates a bug | |
234 | in the conflictedness marking machinery or the hashing code, or both. | |
235 | ||
236 | ctf_dedup_id_to_target calls ctf_dedup_synthesize_forward to do one extra | |
237 | thing, alluded to above: if this is a conflicted tagged structure or union, | |
238 | and the target is the shared dict (i.e., the type we're being asked to emit | |
239 | is not itself conflicted so can't just point straight at the conflicted | |
240 | type), we instead synthesise a forward with the same name, emit it into the | |
241 | shared dict, record it in cd_output_emission_conflicted_forwards so that we | |
242 | don't re-emit it, and return it. This means that cycles that contain | |
243 | conflicts do not cause the entire cycle to be replicated in every child: only | |
244 | that piece of the cycle which takes you back as far as the closest tagged | |
245 | struct/union needs to be replicated. This trick means that no part of the | |
246 | deduplicator needs a cycle detector: every recursive walk can stop at tagged | |
247 | structures. | |
248 | ||
249 | [ctf_dedup_emit_struct_members] | |
250 | The final stage of emission is to walk over all structures with members | |
251 | that need emission and emit all of them. Every type has been emitted at | |
252 | this stage, so emission cannot fail. | |
253 | ||
254 | [ctf_dedup_populate_type_mappings, ctf_dedup_populate_type_mapping] | |
255 | Finally, we update the input -> output type ID mappings used by the ctf-link | |
256 | machinery to update all the other sections. This is surprisingly expensive | |
257 | and may be replaced with a scheme which lets the ctf-link machinery extract | |
258 | the needed info directly from the deduplicator. */ | |
259 | ||
260 | /* Possible future optimizations are flagged with 'optimization opportunity' | |
261 | below. */ | |
262 | ||
263 | /* Global optimization opportunity: a GC pass, eliminating types with no direct | |
264 | or indirect citations from the other sections in the dictionary. */ | |
265 | ||
266 | /* Internal flag values for ctf_dedup_hash_type. */ | |
267 | ||
268 | /* Child call: consider forwardable types equivalent to forwards or stubs below | |
269 | this point. */ | |
270 | #define CTF_DEDUP_HASH_INTERNAL_CHILD 0x01 | |
271 | ||
272 | /* Transform references to single ctf_id_ts in passed-in inputs into a number | |
273 | that will fit in a uint64_t. Needs rethinking if CTF_MAX_TYPE is boosted. | |
274 | ||
275 | On 32-bit platforms, we pack things together differently: see the note | |
276 | above. */ | |
277 | ||
278 | #if UINTPTR_MAX < UINT64_MAX | |
279 | # define IDS_NEED_ALLOCATION 1 | |
280 | # define CTF_DEDUP_GID(fp, input, type) id_to_packed_id (fp, input, type) | |
281 | # define CTF_DEDUP_GID_TO_INPUT(id) packed_id_to_input (id) | |
282 | # define CTF_DEDUP_GID_TO_TYPE(id) packed_id_to_type (id) | |
283 | #else | |
284 | # define CTF_DEDUP_GID(fp, input, type) \ | |
285 | (void *) (((uint64_t) input) << 32 | (type)) | |
286 | # define CTF_DEDUP_GID_TO_INPUT(id) ((int) (((uint64_t) id) >> 32)) | |
287 | # define CTF_DEDUP_GID_TO_TYPE(id) (ctf_id_t) (((uint64_t) id) & ~(0xffffffff00000000ULL)) | |
288 | #endif | |
289 | ||
290 | #ifdef IDS_NEED_ALLOCATION | |
291 | ||
292 | /* This is the 32-bit path, which stores GIDs in a pool and returns a pointer | |
293 | into the pool. It is notably less efficient than the 64-bit direct storage | |
294 | approach, but with a smaller key, this is all we can do. */ | |
295 | ||
296 | static void * | |
139633c3 | 297 | id_to_packed_id (ctf_dict_t *fp, int input_num, ctf_id_t type) |
0f0c11f7 NA |
298 | { |
299 | const void *lookup; | |
300 | ctf_type_id_key_t *dynkey = NULL; | |
301 | ctf_type_id_key_t key = { input_num, type }; | |
302 | ||
139633c3 | 303 | if (!ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_id_to_dict_t, |
0f0c11f7 NA |
304 | &key, &lookup, NULL)) |
305 | { | |
306 | if ((dynkey = malloc (sizeof (ctf_type_id_key_t))) == NULL) | |
307 | goto oom; | |
308 | memcpy (dynkey, &key, sizeof (ctf_type_id_key_t)); | |
309 | ||
139633c3 | 310 | if (ctf_dynhash_insert (fp->ctf_dedup.cd_id_to_dict_t, dynkey, NULL) < 0) |
0f0c11f7 NA |
311 | goto oom; |
312 | ||
139633c3 | 313 | ctf_dynhash_lookup_kv (fp->ctf_dedup.cd_id_to_dict_t, |
0f0c11f7 NA |
314 | dynkey, &lookup, NULL); |
315 | } | |
316 | /* We use a raw assert() here because there isn't really a way to get any sort | |
317 | of error back from this routine without vastly complicating things for the | |
318 | much more common case of !IDS_NEED_ALLOCATION. */ | |
319 | assert (lookup); | |
320 | return (void *) lookup; | |
321 | ||
322 | oom: | |
323 | free (dynkey); | |
324 | ctf_set_errno (fp, ENOMEM); | |
325 | return NULL; | |
326 | } | |
327 | ||
328 | static int | |
329 | packed_id_to_input (const void *id) | |
330 | { | |
331 | const ctf_type_id_key_t *key = (ctf_type_id_key_t *) id; | |
332 | ||
333 | return key->ctii_input_num; | |
334 | } | |
335 | ||
336 | static ctf_id_t | |
337 | packed_id_to_type (const void *id) | |
338 | { | |
339 | const ctf_type_id_key_t *key = (ctf_type_id_key_t *) id; | |
340 | ||
341 | return key->ctii_type; | |
342 | } | |
343 | #endif | |
344 | ||
345 | /* Make an element in a dynhash-of-dynsets, or return it if already present. */ | |
346 | ||
347 | static ctf_dynset_t * | |
348 | make_set_element (ctf_dynhash_t *set, const void *key) | |
349 | { | |
350 | ctf_dynset_t *element; | |
351 | ||
352 | if ((element = ctf_dynhash_lookup (set, key)) == NULL) | |
353 | { | |
354 | if ((element = ctf_dynset_create (htab_hash_string, | |
355 | ctf_dynset_eq_string, | |
356 | NULL)) == NULL) | |
357 | return NULL; | |
358 | ||
359 | if (ctf_dynhash_insert (set, (void *) key, element) < 0) | |
360 | { | |
361 | ctf_dynset_destroy (element); | |
362 | return NULL; | |
363 | } | |
364 | } | |
365 | ||
366 | return element; | |
367 | } | |
368 | ||
369 | /* Initialize the dedup atoms table. */ | |
370 | int | |
139633c3 | 371 | ctf_dedup_atoms_init (ctf_dict_t *fp) |
0f0c11f7 NA |
372 | { |
373 | if (fp->ctf_dedup_atoms) | |
374 | return 0; | |
375 | ||
376 | if (!fp->ctf_dedup_atoms_alloc) | |
377 | { | |
378 | if ((fp->ctf_dedup_atoms_alloc | |
379 | = ctf_dynset_create (htab_hash_string, ctf_dynset_eq_string, | |
380 | free)) == NULL) | |
381 | return ctf_set_errno (fp, ENOMEM); | |
382 | } | |
383 | fp->ctf_dedup_atoms = fp->ctf_dedup_atoms_alloc; | |
384 | return 0; | |
385 | } | |
386 | ||
387 | /* Intern things in the dedup atoms table. */ | |
388 | ||
389 | static const char * | |
139633c3 | 390 | intern (ctf_dict_t *fp, char *atom) |
0f0c11f7 NA |
391 | { |
392 | const void *foo; | |
393 | ||
394 | if (atom == NULL) | |
395 | return NULL; | |
396 | ||
397 | if (!ctf_dynset_exists (fp->ctf_dedup_atoms, atom, &foo)) | |
398 | { | |
399 | if (ctf_dynset_insert (fp->ctf_dedup_atoms, atom) < 0) | |
400 | { | |
401 | ctf_set_errno (fp, ENOMEM); | |
402 | return NULL; | |
403 | } | |
404 | foo = atom; | |
405 | } | |
406 | else | |
407 | free (atom); | |
408 | ||
409 | return (const char *) foo; | |
410 | } | |
411 | ||
412 | /* Add an indication of the namespace to a type name in a way that is not valid | |
413 | for C identifiers. Used to maintain hashes of type names to other things | |
414 | while allowing for the four C namespaces (normal, struct, union, enum). | |
415 | Return a new dynamically-allocated string. */ | |
416 | static const char * | |
139633c3 | 417 | ctf_decorate_type_name (ctf_dict_t *fp, const char *name, int kind) |
0f0c11f7 NA |
418 | { |
419 | ctf_dedup_t *d = &fp->ctf_dedup; | |
420 | const char *ret; | |
421 | const char *k; | |
422 | char *p; | |
423 | size_t i; | |
424 | ||
425 | switch (kind) | |
426 | { | |
427 | case CTF_K_STRUCT: | |
428 | k = "s "; | |
429 | i = 0; | |
430 | break; | |
431 | case CTF_K_UNION: | |
432 | k = "u "; | |
433 | i = 1; | |
434 | break; | |
435 | case CTF_K_ENUM: | |
436 | k = "e "; | |
437 | i = 2; | |
438 | break; | |
439 | default: | |
440 | k = ""; | |
441 | i = 3; | |
442 | } | |
443 | ||
444 | if ((ret = ctf_dynhash_lookup (d->cd_decorated_names[i], name)) == NULL) | |
445 | { | |
446 | char *str; | |
447 | ||
448 | if ((str = malloc (strlen (name) + strlen (k) + 1)) == NULL) | |
449 | goto oom; | |
450 | ||
451 | p = stpcpy (str, k); | |
452 | strcpy (p, name); | |
453 | ret = intern (fp, str); | |
454 | if (!ret) | |
455 | goto oom; | |
456 | ||
457 | if (ctf_dynhash_cinsert (d->cd_decorated_names[i], name, ret) < 0) | |
458 | goto oom; | |
459 | } | |
460 | ||
461 | return ret; | |
462 | ||
463 | oom: | |
464 | ctf_set_errno (fp, ENOMEM); | |
465 | return NULL; | |
466 | } | |
467 | ||
468 | /* Hash a type, possibly debugging-dumping something about it as well. */ | |
469 | static inline void | |
470 | ctf_dedup_sha1_add (ctf_sha1_t *sha1, const void *buf, size_t len, | |
471 | const char *description _libctf_unused_, | |
472 | unsigned long depth _libctf_unused_) | |
473 | { | |
474 | ctf_sha1_add (sha1, buf, len); | |
475 | ||
476 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
477 | ctf_sha1_t tmp; | |
478 | char tmp_hval[CTF_SHA1_SIZE]; | |
479 | tmp = *sha1; | |
480 | ctf_sha1_fini (&tmp, tmp_hval); | |
481 | ctf_dprintf ("%lu: after hash addition of %s: %s\n", depth, description, | |
482 | tmp_hval); | |
483 | #endif | |
484 | } | |
485 | ||
486 | static const char * | |
139633c3 NA |
487 | ctf_dedup_hash_type (ctf_dict_t *fp, ctf_dict_t *input, |
488 | ctf_dict_t **inputs, uint32_t *parents, | |
0f0c11f7 NA |
489 | int input_num, ctf_id_t type, int flags, |
490 | unsigned long depth, | |
139633c3 NA |
491 | int (*populate_fun) (ctf_dict_t *fp, |
492 | ctf_dict_t *input, | |
493 | ctf_dict_t **inputs, | |
0f0c11f7 NA |
494 | int input_num, |
495 | ctf_id_t type, | |
496 | void *id, | |
497 | const char *decorated_name, | |
498 | const char *hash)); | |
499 | ||
500 | /* Determine whether this type is being hashed as a stub (in which case it is | |
501 | unsafe to cache it). */ | |
502 | static int | |
503 | ctf_dedup_is_stub (const char *name, int kind, int fwdkind, int flags) | |
504 | { | |
505 | /* We can cache all types unless we are recursing to children and are hashing | |
506 | in a tagged struct, union or forward, all of which are replaced with their | |
507 | decorated name as a stub and will have different hash values when hashed at | |
508 | the top level. */ | |
509 | ||
510 | return ((flags & CTF_DEDUP_HASH_INTERNAL_CHILD) && name | |
511 | && (kind == CTF_K_STRUCT || kind == CTF_K_UNION | |
512 | || (kind == CTF_K_FORWARD && (fwdkind == CTF_K_STRUCT | |
513 | || fwdkind == CTF_K_UNION)))); | |
514 | } | |
515 | ||
516 | /* Populate struct_origin if need be (not already populated, or populated with | |
517 | a different origin), in which case it must go to -1, "shared".) | |
518 | ||
519 | Only called for forwards or forwardable types with names, when the link mode | |
520 | is CTF_LINK_SHARE_DUPLICATED. */ | |
521 | static int | |
139633c3 | 522 | ctf_dedup_record_origin (ctf_dict_t *fp, int input_num, const char *decorated, |
0f0c11f7 NA |
523 | void *id) |
524 | { | |
525 | ctf_dedup_t *d = &fp->ctf_dedup; | |
526 | void *origin; | |
527 | int populate_origin = 0; | |
528 | ||
529 | if (ctf_dynhash_lookup_kv (d->cd_struct_origin, decorated, NULL, &origin)) | |
530 | { | |
531 | if (CTF_DEDUP_GID_TO_INPUT (origin) != input_num | |
532 | && CTF_DEDUP_GID_TO_INPUT (origin) != -1) | |
533 | { | |
534 | populate_origin = 1; | |
535 | origin = CTF_DEDUP_GID (fp, -1, -1); | |
536 | } | |
537 | } | |
538 | else | |
539 | { | |
540 | populate_origin = 1; | |
541 | origin = id; | |
542 | } | |
543 | ||
544 | if (populate_origin) | |
545 | if (ctf_dynhash_cinsert (d->cd_struct_origin, decorated, origin) < 0) | |
546 | return ctf_set_errno (fp, errno); | |
547 | return 0; | |
548 | } | |
549 | ||
550 | /* Do the underlying hashing and recursion for ctf_dedup_hash_type (which it | |
551 | calls, recursively). */ | |
552 | ||
553 | static const char * | |
139633c3 | 554 | ctf_dedup_rhash_type (ctf_dict_t *fp, ctf_dict_t *input, ctf_dict_t **inputs, |
0f0c11f7 NA |
555 | uint32_t *parents, int input_num, ctf_id_t type, |
556 | void *type_id, const ctf_type_t *tp, const char *name, | |
557 | const char *decorated, int kind, int flags, | |
558 | unsigned long depth, | |
139633c3 NA |
559 | int (*populate_fun) (ctf_dict_t *fp, |
560 | ctf_dict_t *input, | |
561 | ctf_dict_t **inputs, | |
0f0c11f7 NA |
562 | int input_num, |
563 | ctf_id_t type, | |
564 | void *id, | |
565 | const char *decorated_name, | |
566 | const char *hash)) | |
567 | { | |
568 | ctf_dedup_t *d = &fp->ctf_dedup; | |
569 | ctf_next_t *i = NULL; | |
570 | ctf_sha1_t hash; | |
571 | ctf_id_t child_type; | |
572 | char hashbuf[CTF_SHA1_SIZE]; | |
573 | const char *hval = NULL; | |
574 | const char *whaterr; | |
575 | int err; | |
576 | ||
577 | const char *citer = NULL; | |
578 | ctf_dynset_t *citers = NULL; | |
579 | ||
580 | /* Add a citer to the citers set. */ | |
581 | #define ADD_CITER(citers, hval) \ | |
582 | do \ | |
583 | { \ | |
926c9e76 | 584 | whaterr = N_("error updating citers"); \ |
0f0c11f7 NA |
585 | if (!citers) \ |
586 | if ((citers = ctf_dynset_create (htab_hash_string, \ | |
587 | ctf_dynset_eq_string, \ | |
588 | NULL)) == NULL) \ | |
589 | goto oom; \ | |
590 | if (ctf_dynset_cinsert (citers, hval) < 0) \ | |
591 | goto oom; \ | |
592 | } while (0) | |
593 | ||
594 | /* If this is a named struct or union or a forward to one, and this is a child | |
595 | traversal, treat this type as if it were a forward -- do not recurse to | |
596 | children, ignore all content not already hashed in, and hash in the | |
597 | decorated name of the type instead. */ | |
598 | ||
599 | if (ctf_dedup_is_stub (name, kind, tp->ctt_type, flags)) | |
600 | { | |
601 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
602 | ctf_dprintf ("Struct/union/forward citation: substituting forwarding " | |
603 | "stub with decorated name %s\n", decorated); | |
604 | ||
605 | #endif | |
606 | ctf_sha1_init (&hash); | |
607 | ctf_dedup_sha1_add (&hash, decorated, strlen (decorated) + 1, | |
608 | "decorated struct/union/forward name", depth); | |
609 | ctf_sha1_fini (&hash, hashbuf); | |
610 | ||
611 | if ((hval = intern (fp, strdup (hashbuf))) == NULL) | |
612 | { | |
926c9e76 NA |
613 | ctf_err_warn (fp, 0, 0, _("%s (%i): out of memory during forwarding-" |
614 | "stub hashing for type with GID %p"), | |
615 | ctf_link_input_name (input), input_num, type_id); | |
0f0c11f7 NA |
616 | return NULL; /* errno is set for us. */ |
617 | } | |
618 | ||
926c9e76 | 619 | /* In share-duplicated link mode, make sure the origin of this type is |
0f0c11f7 NA |
620 | recorded, even if this is a type in a parent dict which will not be |
621 | directly traversed. */ | |
622 | if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED | |
623 | && ctf_dedup_record_origin (fp, input_num, decorated, type_id) < 0) | |
624 | return NULL; /* errno is set for us. */ | |
625 | ||
626 | return hval; | |
627 | } | |
628 | ||
629 | /* Now ensure that subsequent recursive calls (but *not* the top-level call) | |
630 | get this treatment. */ | |
631 | flags |= CTF_DEDUP_HASH_INTERNAL_CHILD; | |
632 | ||
633 | /* If this is a struct, union, or forward with a name, record the unique | |
634 | originating input TU, if there is one. */ | |
635 | ||
636 | if (decorated && (ctf_forwardable_kind (kind) || kind != CTF_K_FORWARD)) | |
637 | if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED | |
638 | && ctf_dedup_record_origin (fp, input_num, decorated, type_id) < 0) | |
639 | return NULL; /* errno is set for us. */ | |
640 | ||
0e28ade4 NA |
641 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING |
642 | ctf_dprintf ("%lu: hashing thing with ID %i/%lx (kind %i): %s.\n", | |
643 | depth, input_num, type, kind, name ? name : ""); | |
644 | #endif | |
645 | ||
646 | /* Some type kinds don't have names: the API provides no way to set the name, | |
647 | so the type the deduplicator outputs will be nameless even if the input | |
648 | somehow has a name, and the name should not be mixed into the hash. */ | |
649 | ||
650 | switch (kind) | |
651 | { | |
652 | case CTF_K_POINTER: | |
653 | case CTF_K_ARRAY: | |
654 | case CTF_K_FUNCTION: | |
655 | case CTF_K_VOLATILE: | |
656 | case CTF_K_CONST: | |
657 | case CTF_K_RESTRICT: | |
658 | case CTF_K_SLICE: | |
659 | name = NULL; | |
660 | } | |
661 | ||
0f0c11f7 NA |
662 | /* Mix in invariant stuff, transforming the type kind if needed. Note that |
663 | the vlen is *not* hashed in: the actual variable-length info is hashed in | |
664 | instead, piecewise. The vlen is not part of the type, only the | |
665 | variable-length data is: identical types with distinct vlens are quite | |
666 | possible. Equally, we do not want to hash in the isroot flag: both the | |
667 | compiler and the deduplicator set the nonroot flag to indicate clashes with | |
668 | *other types in the same TU* with the same name: so two types can easily | |
669 | have distinct nonroot flags, yet be exactly the same type.*/ | |
670 | ||
0f0c11f7 NA |
671 | ctf_sha1_init (&hash); |
672 | if (name) | |
673 | ctf_dedup_sha1_add (&hash, name, strlen (name) + 1, "name", depth); | |
674 | ctf_dedup_sha1_add (&hash, &kind, sizeof (uint32_t), "kind", depth); | |
675 | ||
676 | /* Hash content of this type. */ | |
677 | switch (kind) | |
678 | { | |
679 | case CTF_K_UNKNOWN: | |
680 | /* No extra state. */ | |
681 | break; | |
682 | case CTF_K_FORWARD: | |
683 | ||
684 | /* Add the forwarded kind, stored in the ctt_type. */ | |
685 | ctf_dedup_sha1_add (&hash, &tp->ctt_type, sizeof (tp->ctt_type), | |
686 | "forwarded kind", depth); | |
687 | break; | |
688 | case CTF_K_INTEGER: | |
689 | case CTF_K_FLOAT: | |
690 | { | |
691 | ctf_encoding_t ep; | |
692 | memset (&ep, 0, sizeof (ctf_encoding_t)); | |
693 | ||
694 | ctf_dedup_sha1_add (&hash, &tp->ctt_size, sizeof (uint32_t), "size", | |
695 | depth); | |
696 | if (ctf_type_encoding (input, type, &ep) < 0) | |
697 | { | |
926c9e76 | 698 | whaterr = N_("error getting encoding"); |
0f0c11f7 NA |
699 | goto err; |
700 | } | |
701 | ctf_dedup_sha1_add (&hash, &ep, sizeof (ctf_encoding_t), "encoding", | |
702 | depth); | |
703 | break; | |
704 | } | |
705 | /* Types that reference other types. */ | |
706 | case CTF_K_TYPEDEF: | |
707 | case CTF_K_VOLATILE: | |
708 | case CTF_K_CONST: | |
709 | case CTF_K_RESTRICT: | |
710 | case CTF_K_POINTER: | |
711 | /* Hash the referenced type, if not already hashed, and mix it in. */ | |
712 | child_type = ctf_type_reference (input, type); | |
713 | if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num, | |
714 | child_type, flags, depth, | |
715 | populate_fun)) == NULL) | |
716 | { | |
926c9e76 | 717 | whaterr = N_("error doing referenced type hashing"); |
0f0c11f7 NA |
718 | goto err; |
719 | } | |
720 | ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "referenced type", | |
721 | depth); | |
722 | citer = hval; | |
723 | ||
724 | break; | |
725 | ||
726 | /* The slices of two types hash identically only if the type they overlay | |
727 | also has the same encoding. This is not ideal, but in practice will work | |
728 | well enough. We work directly rather than using the CTF API because | |
729 | we do not want the slice's normal automatically-shine-through | |
730 | semantics to kick in here. */ | |
731 | case CTF_K_SLICE: | |
732 | { | |
733 | const ctf_slice_t *slice; | |
734 | const ctf_dtdef_t *dtd; | |
735 | ssize_t size; | |
736 | ssize_t increment; | |
737 | ||
738 | child_type = ctf_type_reference (input, type); | |
739 | ctf_get_ctt_size (input, tp, &size, &increment); | |
740 | ctf_dedup_sha1_add (&hash, &size, sizeof (ssize_t), "size", depth); | |
741 | ||
742 | if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num, | |
743 | child_type, flags, depth, | |
744 | populate_fun)) == NULL) | |
745 | { | |
926c9e76 | 746 | whaterr = N_("error doing slice-referenced type hashing"); |
0f0c11f7 NA |
747 | goto err; |
748 | } | |
749 | ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "sliced type", | |
750 | depth); | |
751 | citer = hval; | |
752 | ||
753 | if ((dtd = ctf_dynamic_type (input, type)) != NULL) | |
754 | slice = &dtd->dtd_u.dtu_slice; | |
755 | else | |
756 | slice = (ctf_slice_t *) ((uintptr_t) tp + increment); | |
757 | ||
758 | ctf_dedup_sha1_add (&hash, &slice->cts_offset, | |
759 | sizeof (slice->cts_offset), "slice offset", depth); | |
760 | ctf_dedup_sha1_add (&hash, &slice->cts_bits, | |
761 | sizeof (slice->cts_bits), "slice bits", depth); | |
762 | break; | |
763 | } | |
764 | ||
765 | case CTF_K_ARRAY: | |
766 | { | |
767 | ctf_arinfo_t ar; | |
768 | ||
769 | if (ctf_array_info (input, type, &ar) < 0) | |
770 | { | |
926c9e76 | 771 | whaterr = N_("error getting array info"); |
0f0c11f7 NA |
772 | goto err; |
773 | } | |
774 | ||
775 | if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num, | |
776 | ar.ctr_contents, flags, depth, | |
777 | populate_fun)) == NULL) | |
778 | { | |
926c9e76 | 779 | whaterr = N_("error doing array contents type hashing"); |
0f0c11f7 NA |
780 | goto err; |
781 | } | |
782 | ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "array contents", | |
783 | depth); | |
784 | ADD_CITER (citers, hval); | |
785 | ||
786 | if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num, | |
787 | ar.ctr_index, flags, depth, | |
788 | populate_fun)) == NULL) | |
789 | { | |
926c9e76 | 790 | whaterr = N_("error doing array index type hashing"); |
0f0c11f7 NA |
791 | goto err; |
792 | } | |
793 | ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "array index", | |
794 | depth); | |
795 | ctf_dedup_sha1_add (&hash, &ar.ctr_nelems, sizeof (ar.ctr_nelems), | |
796 | "element count", depth); | |
797 | ADD_CITER (citers, hval); | |
798 | ||
799 | break; | |
800 | } | |
801 | case CTF_K_FUNCTION: | |
802 | { | |
803 | ctf_funcinfo_t fi; | |
804 | ctf_id_t *args; | |
805 | uint32_t j; | |
806 | ||
807 | if (ctf_func_type_info (input, type, &fi) < 0) | |
808 | { | |
926c9e76 | 809 | whaterr = N_("error getting func type info"); |
0f0c11f7 NA |
810 | goto err; |
811 | } | |
812 | ||
813 | if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, input_num, | |
814 | fi.ctc_return, flags, depth, | |
815 | populate_fun)) == NULL) | |
816 | { | |
926c9e76 | 817 | whaterr = N_("error getting func return type"); |
0f0c11f7 NA |
818 | goto err; |
819 | } | |
820 | ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "func return", | |
821 | depth); | |
822 | ctf_dedup_sha1_add (&hash, &fi.ctc_argc, sizeof (fi.ctc_argc), | |
823 | "func argc", depth); | |
824 | ctf_dedup_sha1_add (&hash, &fi.ctc_flags, sizeof (fi.ctc_flags), | |
825 | "func flags", depth); | |
826 | ADD_CITER (citers, hval); | |
827 | ||
828 | if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL) | |
829 | { | |
926c9e76 | 830 | whaterr = N_("error doing memory allocation"); |
0f0c11f7 NA |
831 | goto err; |
832 | } | |
833 | ||
834 | if (ctf_func_type_args (input, type, fi.ctc_argc, args) < 0) | |
835 | { | |
836 | free (args); | |
926c9e76 | 837 | whaterr = N_("error getting func arg type"); |
0f0c11f7 NA |
838 | goto err; |
839 | } | |
840 | for (j = 0; j < fi.ctc_argc; j++) | |
841 | { | |
842 | if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, | |
843 | input_num, args[j], flags, depth, | |
844 | populate_fun)) == NULL) | |
845 | { | |
846 | free (args); | |
926c9e76 | 847 | whaterr = N_("error doing func arg type hashing"); |
0f0c11f7 NA |
848 | goto err; |
849 | } | |
850 | ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "func arg type", | |
851 | depth); | |
852 | ADD_CITER (citers, hval); | |
853 | } | |
854 | free (args); | |
855 | break; | |
856 | } | |
857 | case CTF_K_ENUM: | |
858 | { | |
859 | int val; | |
860 | const char *ename; | |
861 | ||
862 | ctf_dedup_sha1_add (&hash, &tp->ctt_size, sizeof (uint32_t), | |
863 | "enum size", depth); | |
864 | while ((ename = ctf_enum_next (input, type, &i, &val)) != NULL) | |
865 | { | |
866 | ctf_dedup_sha1_add (&hash, ename, strlen (ename) + 1, "enumerator", | |
867 | depth); | |
868 | ctf_dedup_sha1_add (&hash, &val, sizeof (val), "enumerand", depth); | |
869 | } | |
870 | if (ctf_errno (input) != ECTF_NEXT_END) | |
871 | { | |
926c9e76 | 872 | whaterr = N_("error doing enum member iteration"); |
0f0c11f7 NA |
873 | goto err; |
874 | } | |
875 | break; | |
876 | } | |
877 | /* Top-level only. */ | |
878 | case CTF_K_STRUCT: | |
879 | case CTF_K_UNION: | |
880 | { | |
881 | ssize_t offset; | |
882 | const char *mname; | |
883 | ctf_id_t membtype; | |
884 | ssize_t size; | |
885 | ||
886 | ctf_get_ctt_size (input, tp, &size, NULL); | |
887 | ctf_dedup_sha1_add (&hash, &size, sizeof (ssize_t), "struct size", | |
888 | depth); | |
889 | ||
890 | while ((offset = ctf_member_next (input, type, &i, &mname, | |
891 | &membtype)) >= 0) | |
892 | { | |
893 | if (mname == NULL) | |
894 | mname = ""; | |
895 | ctf_dedup_sha1_add (&hash, mname, strlen (mname) + 1, | |
896 | "member name", depth); | |
897 | ||
898 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
899 | ctf_dprintf ("%lu: Traversing to member %s\n", depth, mname); | |
900 | #endif | |
901 | if ((hval = ctf_dedup_hash_type (fp, input, inputs, parents, | |
902 | input_num, membtype, flags, depth, | |
903 | populate_fun)) == NULL) | |
904 | { | |
926c9e76 | 905 | whaterr = N_("error doing struct/union member type hashing"); |
0f0c11f7 NA |
906 | goto iterr; |
907 | } | |
908 | ||
909 | ctf_dedup_sha1_add (&hash, hval, strlen (hval) + 1, "member hash", | |
910 | depth); | |
911 | ctf_dedup_sha1_add (&hash, &offset, sizeof (offset), "member offset", | |
912 | depth); | |
913 | ADD_CITER (citers, hval); | |
914 | } | |
915 | if (ctf_errno (input) != ECTF_NEXT_END) | |
916 | { | |
926c9e76 | 917 | whaterr = N_("error doing struct/union member iteration"); |
0f0c11f7 NA |
918 | goto err; |
919 | } | |
920 | break; | |
921 | } | |
922 | default: | |
926c9e76 | 923 | whaterr = N_("error: unknown type kind"); |
0f0c11f7 NA |
924 | goto err; |
925 | } | |
926 | ctf_sha1_fini (&hash, hashbuf); | |
927 | ||
928 | if ((hval = intern (fp, strdup (hashbuf))) == NULL) | |
929 | { | |
926c9e76 | 930 | whaterr = N_("cannot intern hash"); |
0f0c11f7 NA |
931 | goto oom; |
932 | } | |
933 | ||
934 | /* Populate the citers for this type's subtypes, now the hash for the type | |
935 | itself is known. */ | |
926c9e76 | 936 | whaterr = N_("error tracking citers"); |
0f0c11f7 NA |
937 | |
938 | if (citer) | |
939 | { | |
940 | ctf_dynset_t *citer_hashes; | |
941 | ||
942 | if ((citer_hashes = make_set_element (d->cd_citers, citer)) == NULL) | |
943 | goto oom; | |
944 | if (ctf_dynset_cinsert (citer_hashes, hval) < 0) | |
945 | goto oom; | |
946 | } | |
947 | else if (citers) | |
948 | { | |
949 | const void *k; | |
950 | ||
951 | while ((err = ctf_dynset_cnext (citers, &i, &k)) == 0) | |
952 | { | |
953 | ctf_dynset_t *citer_hashes; | |
954 | citer = (const char *) k; | |
955 | ||
956 | if ((citer_hashes = make_set_element (d->cd_citers, citer)) == NULL) | |
957 | goto oom; | |
958 | ||
959 | if (ctf_dynset_exists (citer_hashes, hval, NULL)) | |
960 | continue; | |
961 | if (ctf_dynset_cinsert (citer_hashes, hval) < 0) | |
962 | goto oom; | |
963 | } | |
964 | if (err != ECTF_NEXT_END) | |
965 | goto err; | |
966 | ctf_dynset_destroy (citers); | |
967 | } | |
968 | ||
969 | return hval; | |
970 | ||
971 | iterr: | |
972 | ctf_next_destroy (i); | |
973 | err: | |
974 | ctf_sha1_fini (&hash, NULL); | |
926c9e76 NA |
975 | ctf_err_warn (fp, 0, 0, _("%s (%i): %s: during type hashing for type %lx, " |
976 | "kind %i"), ctf_link_input_name (input), | |
977 | input_num, gettext (whaterr), type, kind); | |
0f0c11f7 NA |
978 | return NULL; |
979 | oom: | |
980 | ctf_set_errno (fp, errno); | |
926c9e76 NA |
981 | ctf_err_warn (fp, 0, 0, _("%s (%i): %s: during type hashing for type %lx, " |
982 | "kind %i"), ctf_link_input_name (input), | |
983 | input_num, gettext (whaterr), type, kind); | |
0f0c11f7 NA |
984 | return NULL; |
985 | } | |
986 | ||
987 | /* Hash a TYPE in the INPUT: FP is the eventual output, where the ctf_dedup | |
988 | state is stored. INPUT_NUM is the number of this input in the set of inputs. | |
989 | Record its hash in FP's cd_type_hashes once it is known. PARENTS is | |
990 | described in the comment above ctf_dedup. | |
991 | ||
992 | (The flags argument currently accepts only the flag | |
993 | CTF_DEDUP_HASH_INTERNAL_CHILD, an implementation detail used to prevent | |
994 | struct/union hashing in recursive traversals below the TYPE.) | |
995 | ||
996 | We use the CTF API rather than direct access wherever possible, because types | |
997 | that appear identical through the API should be considered identical, with | |
998 | one exception: slices should only be considered identical to other slices, | |
999 | not to the corresponding unsliced type. | |
1000 | ||
1001 | The POPULATE_FUN is a mandatory hook that populates other mappings with each | |
1002 | type we see (excepting types that are recursively hashed as stubs). The | |
1003 | caller should not rely on the order of calls to this hook, though it will be | |
1004 | called at least once for every non-stub reference to every type. | |
1005 | ||
1006 | Returns a hash value (an atom), or NULL on error. */ | |
1007 | ||
1008 | static const char * | |
139633c3 NA |
1009 | ctf_dedup_hash_type (ctf_dict_t *fp, ctf_dict_t *input, |
1010 | ctf_dict_t **inputs, uint32_t *parents, | |
0f0c11f7 NA |
1011 | int input_num, ctf_id_t type, int flags, |
1012 | unsigned long depth, | |
139633c3 NA |
1013 | int (*populate_fun) (ctf_dict_t *fp, |
1014 | ctf_dict_t *input, | |
1015 | ctf_dict_t **inputs, | |
0f0c11f7 NA |
1016 | int input_num, |
1017 | ctf_id_t type, | |
1018 | void *id, | |
1019 | const char *decorated_name, | |
1020 | const char *hash)) | |
1021 | { | |
1022 | ctf_dedup_t *d = &fp->ctf_dedup; | |
1023 | const ctf_type_t *tp; | |
1024 | void *type_id; | |
1025 | const char *hval = NULL; | |
1026 | const char *name; | |
1027 | const char *whaterr; | |
1028 | const char *decorated = NULL; | |
1029 | uint32_t kind, fwdkind; | |
1030 | ||
1031 | depth++; | |
1032 | ||
1033 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
1034 | ctf_dprintf ("%lu: ctf_dedup_hash_type (%i, %lx, flags %x)\n", depth, input_num, type, flags); | |
1035 | #endif | |
1036 | ||
1037 | /* The unimplemented type doesn't really exist, but must be noted in parent | |
1038 | hashes: so it gets a fixed, arbitrary hash. */ | |
1039 | if (type == 0) | |
1040 | return "00000000000000000000"; | |
1041 | ||
1042 | /* Possible optimization: if the input type is in the parent type space, just | |
1043 | copy recursively-cited hashes from the parent's types into the output | |
1044 | mapping rather than rehashing them. */ | |
1045 | ||
1046 | type_id = CTF_DEDUP_GID (fp, input_num, type); | |
1047 | ||
1048 | if ((tp = ctf_lookup_by_id (&input, type)) == NULL) | |
1049 | { | |
926c9e76 NA |
1050 | ctf_set_errno (fp, ctf_errno (input)); |
1051 | ctf_err_warn (fp, 0, 0, _("%s (%i): lookup failure for type %lx: " | |
1052 | "flags %x"), ctf_link_input_name (input), | |
1053 | input_num, type, flags); | |
0f0c11f7 NA |
1054 | return NULL; /* errno is set for us. */ |
1055 | } | |
1056 | ||
1057 | kind = LCTF_INFO_KIND (input, tp->ctt_info); | |
1058 | name = ctf_strraw (input, tp->ctt_name); | |
1059 | ||
1060 | if (tp->ctt_name == 0 || !name || name[0] == '\0') | |
1061 | name = NULL; | |
1062 | ||
1063 | /* Treat the unknown kind just like the unimplemented type. */ | |
1064 | if (kind == CTF_K_UNKNOWN) | |
1065 | return "00000000000000000000"; | |
1066 | ||
1067 | /* Decorate the name appropriately for the namespace it appears in: forwards | |
1068 | appear in the namespace of their referent. */ | |
1069 | ||
1070 | fwdkind = kind; | |
1071 | if (name) | |
1072 | { | |
1073 | if (kind == CTF_K_FORWARD) | |
1074 | fwdkind = tp->ctt_type; | |
1075 | ||
1076 | if ((decorated = ctf_decorate_type_name (fp, name, fwdkind)) == NULL) | |
1077 | return NULL; /* errno is set for us. */ | |
1078 | } | |
1079 | ||
1080 | /* If not hashing a stub, we can rely on various sorts of caches. | |
1081 | ||
1082 | Optimization opportunity: we may be able to avoid calling the populate_fun | |
1083 | sometimes here. */ | |
1084 | ||
1085 | if (!ctf_dedup_is_stub (name, kind, fwdkind, flags)) | |
1086 | { | |
1087 | if ((hval = ctf_dynhash_lookup (d->cd_type_hashes, type_id)) != NULL) | |
1088 | { | |
1089 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
1090 | ctf_dprintf ("%lu: Known hash for ID %i/%lx: %s\n", depth, input_num, | |
1091 | type, hval); | |
1092 | #endif | |
1093 | populate_fun (fp, input, inputs, input_num, type, type_id, | |
1094 | decorated, hval); | |
1095 | ||
1096 | return hval; | |
1097 | } | |
1098 | } | |
1099 | ||
1100 | /* We have never seen this type before, and must figure out its hash and the | |
1101 | hashes of the types it cites. | |
1102 | ||
1103 | Hash this type, and call ourselves recursively. (The hashing part is | |
1104 | optional, and is disabled if overidden_hval is set.) */ | |
1105 | ||
1106 | if ((hval = ctf_dedup_rhash_type (fp, input, inputs, parents, input_num, | |
1107 | type, type_id, tp, name, decorated, | |
1108 | kind, flags, depth, populate_fun)) == NULL) | |
1109 | return NULL; /* errno is set for us. */ | |
1110 | ||
1111 | /* The hash of this type is now known: record it unless caching is unsafe | |
1112 | because the hash value will change later. This will be the final storage | |
1113 | of this type's hash, so we call the population function on it. */ | |
1114 | ||
1115 | if (!ctf_dedup_is_stub (name, kind, fwdkind, flags)) | |
1116 | { | |
1117 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
1118 | ctf_dprintf ("Caching %lx, ID %p (%s), %s in final location\n", type, | |
1119 | type_id, name ? name : "", hval); | |
1120 | #endif | |
1121 | ||
1122 | if (ctf_dynhash_cinsert (d->cd_type_hashes, type_id, hval) < 0) | |
1123 | { | |
926c9e76 | 1124 | whaterr = N_("error hash caching"); |
0f0c11f7 NA |
1125 | goto oom; |
1126 | } | |
1127 | ||
1128 | if (populate_fun (fp, input, inputs, input_num, type, type_id, | |
1129 | decorated, hval) < 0) | |
1130 | { | |
926c9e76 | 1131 | whaterr = N_("error calling population function"); |
0f0c11f7 NA |
1132 | goto err; /* errno is set for us. */ |
1133 | } | |
1134 | } | |
1135 | ||
1136 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
1137 | ctf_dprintf ("%lu: Returning final hash for ID %i/%lx: %s\n", depth, | |
1138 | input_num, type, hval); | |
1139 | #endif | |
1140 | return hval; | |
1141 | ||
1142 | oom: | |
1143 | ctf_set_errno (fp, errno); | |
1144 | err: | |
926c9e76 NA |
1145 | ctf_err_warn (fp, 0, 0, _("%s (%i): %s: during type hashing, " |
1146 | "type %lx, kind %i"), | |
1147 | ctf_link_input_name (input), input_num, | |
1148 | gettext (whaterr), type, kind); | |
0f0c11f7 NA |
1149 | return NULL; |
1150 | } | |
1151 | ||
1152 | /* Populate a number of useful mappings not directly used by the hashing | |
1153 | machinery: the output mapping, the cd_name_counts mapping from name -> hash | |
1154 | -> count of hashval deduplication state for a given hashed type, and the | |
1155 | cd_output_first_tu mapping. */ | |
1156 | ||
1157 | static int | |
139633c3 NA |
1158 | ctf_dedup_populate_mappings (ctf_dict_t *fp, ctf_dict_t *input _libctf_unused_, |
1159 | ctf_dict_t **inputs _libctf_unused_, | |
0f0c11f7 NA |
1160 | int input_num _libctf_unused_, |
1161 | ctf_id_t type _libctf_unused_, void *id, | |
1162 | const char *decorated_name, | |
1163 | const char *hval) | |
1164 | { | |
1165 | ctf_dedup_t *d = &fp->ctf_dedup; | |
1166 | ctf_dynset_t *type_ids; | |
1167 | ctf_dynhash_t *name_counts; | |
1168 | long int count; | |
1169 | ||
1170 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
1171 | ctf_dprintf ("Hash %s, %s, into output mapping for %i/%lx @ %s\n", | |
1172 | hval, decorated_name ? decorated_name : "(unnamed)", | |
1173 | input_num, type, ctf_link_input_name (input)); | |
1174 | ||
1175 | const char *orig_hval; | |
1176 | ||
1177 | /* Make sure we never map a single GID to multiple hash values. */ | |
1178 | ||
1179 | if ((orig_hval = ctf_dynhash_lookup (d->cd_output_mapping_guard, id)) != NULL) | |
1180 | { | |
1181 | /* We can rely on pointer identity here, since all hashes are | |
1182 | interned. */ | |
1183 | if (!ctf_assert (fp, orig_hval == hval)) | |
1184 | return -1; | |
1185 | } | |
1186 | else | |
1187 | if (ctf_dynhash_cinsert (d->cd_output_mapping_guard, id, hval) < 0) | |
1188 | return ctf_set_errno (fp, errno); | |
1189 | #endif | |
1190 | ||
1191 | /* Record the type in the output mapping: if this is the first time this type | |
1192 | has been seen, also record it in the cd_output_first_gid. Because we | |
1193 | traverse types in TU order and we do not merge types after the hashing | |
1194 | phase, this will be the lowest TU this type ever appears in. */ | |
1195 | ||
1196 | if ((type_ids = ctf_dynhash_lookup (d->cd_output_mapping, | |
1197 | hval)) == NULL) | |
1198 | { | |
1199 | if (ctf_dynhash_cinsert (d->cd_output_first_gid, hval, id) < 0) | |
1200 | return ctf_set_errno (fp, errno); | |
1201 | ||
1202 | if ((type_ids = ctf_dynset_create (htab_hash_pointer, | |
1203 | htab_eq_pointer, | |
1204 | NULL)) == NULL) | |
1205 | return ctf_set_errno (fp, errno); | |
1206 | if (ctf_dynhash_insert (d->cd_output_mapping, (void *) hval, | |
1207 | type_ids) < 0) | |
1208 | { | |
1209 | ctf_dynset_destroy (type_ids); | |
1210 | return ctf_set_errno (fp, errno); | |
1211 | } | |
1212 | } | |
1213 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
1214 | { | |
1215 | /* Verify that all types with this hash are of the same kind, and that the | |
1216 | first TU a type was seen in never falls. */ | |
1217 | ||
1218 | int err; | |
1219 | const void *one_id; | |
1220 | ctf_next_t *i = NULL; | |
1221 | int orig_kind = ctf_type_kind_unsliced (input, type); | |
1222 | int orig_first_tu; | |
1223 | ||
1224 | orig_first_tu = CTF_DEDUP_GID_TO_INPUT | |
1225 | (ctf_dynhash_lookup (d->cd_output_first_gid, hval)); | |
1226 | if (!ctf_assert (fp, orig_first_tu <= CTF_DEDUP_GID_TO_INPUT (id))) | |
1227 | return -1; | |
1228 | ||
1229 | while ((err = ctf_dynset_cnext (type_ids, &i, &one_id)) == 0) | |
1230 | { | |
139633c3 | 1231 | ctf_dict_t *foo = inputs[CTF_DEDUP_GID_TO_INPUT (one_id)]; |
0f0c11f7 NA |
1232 | ctf_id_t bar = CTF_DEDUP_GID_TO_TYPE (one_id); |
1233 | if (ctf_type_kind_unsliced (foo, bar) != orig_kind) | |
1234 | { | |
926c9e76 | 1235 | ctf_err_warn (fp, 1, 0, "added wrong kind to output mapping " |
0f0c11f7 NA |
1236 | "for hash %s named %s: %p/%lx from %s is " |
1237 | "kind %i, but newly-added %p/%lx from %s is " | |
1238 | "kind %i", hval, | |
1239 | decorated_name ? decorated_name : "(unnamed)", | |
1240 | (void *) foo, bar, | |
1241 | ctf_link_input_name (foo), | |
1242 | ctf_type_kind_unsliced (foo, bar), | |
1243 | (void *) input, type, | |
1244 | ctf_link_input_name (input), orig_kind); | |
1245 | if (!ctf_assert (fp, ctf_type_kind_unsliced (foo, bar) | |
1246 | == orig_kind)) | |
1247 | return -1; | |
1248 | } | |
1249 | } | |
1250 | if (err != ECTF_NEXT_END) | |
1251 | return ctf_set_errno (fp, err); | |
1252 | } | |
1253 | #endif | |
1254 | ||
1255 | /* This function will be repeatedly called for the same types many times: | |
1256 | don't waste time reinserting the same keys in that case. */ | |
1257 | if (!ctf_dynset_exists (type_ids, id, NULL) | |
1258 | && ctf_dynset_insert (type_ids, id) < 0) | |
1259 | return ctf_set_errno (fp, errno); | |
1260 | ||
1261 | /* The rest only needs to happen for types with names. */ | |
1262 | if (!decorated_name) | |
1263 | return 0; | |
1264 | ||
1265 | /* Count the number of occurrences of the hash value for this GID. */ | |
1266 | ||
1267 | hval = ctf_dynhash_lookup (d->cd_type_hashes, id); | |
1268 | ||
1269 | /* Mapping from name -> hash(hashval, count) not already present? */ | |
1270 | if ((name_counts = ctf_dynhash_lookup (d->cd_name_counts, | |
1271 | decorated_name)) == NULL) | |
1272 | { | |
1273 | if ((name_counts = ctf_dynhash_create (ctf_hash_string, | |
1274 | ctf_hash_eq_string, | |
1275 | NULL, NULL)) == NULL) | |
1276 | return ctf_set_errno (fp, errno); | |
1277 | if (ctf_dynhash_cinsert (d->cd_name_counts, decorated_name, | |
1278 | name_counts) < 0) | |
1279 | { | |
1280 | ctf_dynhash_destroy (name_counts); | |
1281 | return ctf_set_errno (fp, errno); | |
1282 | } | |
1283 | } | |
1284 | ||
1285 | /* This will, conveniently, return NULL (i.e. 0) for a new entry. */ | |
1286 | count = (long int) (uintptr_t) ctf_dynhash_lookup (name_counts, hval); | |
1287 | ||
1288 | if (ctf_dynhash_cinsert (name_counts, hval, | |
1289 | (const void *) (uintptr_t) (count + 1)) < 0) | |
1290 | return ctf_set_errno (fp, errno); | |
1291 | ||
1292 | return 0; | |
1293 | } | |
1294 | ||
1295 | /* Mark a single hash as corresponding to a conflicting type. Mark all types | |
1296 | that cite it as conflicting as well, terminating the recursive walk only when | |
1297 | types that are already conflicted or types do not cite other types are seen. | |
1298 | (Tagged structures and unions do not appear in the cd_citers graph, so the | |
1299 | walk also terminates there, since any reference to a conflicting structure is | |
1300 | just going to reference an unconflicting forward instead: see | |
1301 | ctf_dedup_maybe_synthesize_forward.) */ | |
1302 | ||
1303 | static int | |
139633c3 | 1304 | ctf_dedup_mark_conflicting_hash (ctf_dict_t *fp, const char *hval) |
0f0c11f7 NA |
1305 | { |
1306 | ctf_dedup_t *d = &fp->ctf_dedup; | |
1307 | ctf_next_t *i = NULL; | |
1308 | int err; | |
1309 | const void *k; | |
1310 | ctf_dynset_t *citers; | |
1311 | ||
1312 | /* Mark conflicted if not already so marked. */ | |
1313 | if (ctf_dynset_exists (d->cd_conflicting_types, hval, NULL)) | |
1314 | return 0; | |
1315 | ||
1316 | ctf_dprintf ("Marking %s as conflicted\n", hval); | |
1317 | ||
1318 | if (ctf_dynset_cinsert (d->cd_conflicting_types, hval) < 0) | |
1319 | { | |
1320 | ctf_dprintf ("Out of memory marking %s as conflicted\n", hval); | |
1321 | ctf_set_errno (fp, errno); | |
1322 | return -1; | |
1323 | } | |
1324 | ||
1325 | /* If any types cite this type, mark them conflicted too. */ | |
1326 | if ((citers = ctf_dynhash_lookup (d->cd_citers, hval)) == NULL) | |
1327 | return 0; | |
1328 | ||
1329 | while ((err = ctf_dynset_cnext (citers, &i, &k)) == 0) | |
1330 | { | |
1331 | const char *hv = (const char *) k; | |
1332 | ||
1333 | if (ctf_dynset_exists (d->cd_conflicting_types, hv, NULL)) | |
1334 | continue; | |
1335 | ||
1336 | if (ctf_dedup_mark_conflicting_hash (fp, hv) < 0) | |
1337 | { | |
1338 | ctf_next_destroy (i); | |
1339 | return -1; /* errno is set for us. */ | |
1340 | } | |
1341 | } | |
1342 | if (err != ECTF_NEXT_END) | |
1343 | return ctf_set_errno (fp, err); | |
1344 | ||
1345 | return 0; | |
1346 | } | |
1347 | ||
1348 | /* Look up a type kind from the output mapping, given a type hash value. */ | |
1349 | static int | |
139633c3 | 1350 | ctf_dedup_hash_kind (ctf_dict_t *fp, ctf_dict_t **inputs, const char *hash) |
0f0c11f7 NA |
1351 | { |
1352 | ctf_dedup_t *d = &fp->ctf_dedup; | |
1353 | void *id; | |
1354 | ctf_dynset_t *type_ids; | |
1355 | ||
1356 | /* Precondition: the output mapping is populated. */ | |
1357 | if (!ctf_assert (fp, ctf_dynhash_elements (d->cd_output_mapping) > 0)) | |
1358 | return -1; | |
1359 | ||
1360 | /* Look up some GID from the output hash for this type. (They are all | |
1361 | identical, so we can pick any). Don't assert if someone calls this | |
1362 | function wrongly, but do assert if the output mapping knows about the hash, | |
1363 | but has nothing associated with it. */ | |
1364 | ||
1365 | type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hash); | |
1366 | if (!type_ids) | |
1367 | { | |
1368 | ctf_dprintf ("Looked up type kind by nonexistent hash %s.\n", hash); | |
1369 | return ctf_set_errno (fp, ECTF_INTERNAL); | |
1370 | } | |
1371 | id = ctf_dynset_lookup_any (type_ids); | |
1372 | if (!ctf_assert (fp, id)) | |
1373 | return -1; | |
1374 | ||
1375 | return ctf_type_kind_unsliced (inputs[CTF_DEDUP_GID_TO_INPUT (id)], | |
1376 | CTF_DEDUP_GID_TO_TYPE (id)); | |
1377 | } | |
1378 | ||
1379 | /* Used to keep a count of types: i.e. distinct type hash values. */ | |
1380 | typedef struct ctf_dedup_type_counter | |
1381 | { | |
139633c3 NA |
1382 | ctf_dict_t *fp; |
1383 | ctf_dict_t **inputs; | |
0f0c11f7 NA |
1384 | int num_non_forwards; |
1385 | } ctf_dedup_type_counter_t; | |
1386 | ||
1387 | /* Add to the type counter for one name entry from the cd_name_counts. */ | |
1388 | static int | |
1389 | ctf_dedup_count_types (void *key_, void *value _libctf_unused_, void *arg_) | |
1390 | { | |
1391 | const char *hval = (const char *) key_; | |
1392 | int kind; | |
1393 | ctf_dedup_type_counter_t *arg = (ctf_dedup_type_counter_t *) arg_; | |
1394 | ||
1395 | kind = ctf_dedup_hash_kind (arg->fp, arg->inputs, hval); | |
1396 | ||
1397 | /* We rely on ctf_dedup_hash_kind setting the fp to -ECTF_INTERNAL on error to | |
1398 | smuggle errors out of here. */ | |
1399 | ||
1400 | if (kind != CTF_K_FORWARD) | |
1401 | { | |
1402 | arg->num_non_forwards++; | |
1403 | ctf_dprintf ("Counting hash %s: kind %i: num_non_forwards is %i\n", | |
1404 | hval, kind, arg->num_non_forwards); | |
1405 | } | |
1406 | ||
1407 | /* We only need to know if there is more than one non-forward (an ambiguous | |
1408 | type): don't waste time iterating any more than needed to figure that | |
1409 | out. */ | |
1410 | ||
1411 | if (arg->num_non_forwards > 1) | |
1412 | return 1; | |
1413 | ||
1414 | return 0; | |
1415 | } | |
1416 | ||
1417 | /* Detect name ambiguity and mark ambiguous names as conflicting, other than the | |
1418 | most common. */ | |
1419 | static int | |
139633c3 | 1420 | ctf_dedup_detect_name_ambiguity (ctf_dict_t *fp, ctf_dict_t **inputs) |
0f0c11f7 NA |
1421 | { |
1422 | ctf_dedup_t *d = &fp->ctf_dedup; | |
1423 | ctf_next_t *i = NULL; | |
1424 | void *k; | |
1425 | void *v; | |
1426 | int err; | |
926c9e76 | 1427 | const char *whaterr; |
0f0c11f7 NA |
1428 | |
1429 | /* Go through cd_name_counts for all CTF namespaces in turn. */ | |
1430 | ||
1431 | while ((err = ctf_dynhash_next (d->cd_name_counts, &i, &k, &v)) == 0) | |
1432 | { | |
1433 | const char *decorated = (const char *) k; | |
1434 | ctf_dynhash_t *name_counts = (ctf_dynhash_t *) v; | |
1435 | ctf_next_t *j = NULL; | |
1436 | ||
1437 | /* If this is a forwardable kind or a forward (which we can tell without | |
1438 | consulting the type because its decorated name has a space as its | |
1439 | second character: see ctf_decorate_type_name), we are only interested | |
1440 | in whether this name has many hashes associated with it: any such name | |
1441 | is necessarily ambiguous, and types with that name are conflicting. | |
1442 | Once we know whether this is true, we can skip to the next name: so use | |
1443 | ctf_dynhash_iter_find for efficiency. */ | |
1444 | ||
1445 | if (decorated[0] != '\0' && decorated[1] == ' ') | |
1446 | { | |
1447 | ctf_dedup_type_counter_t counters = { fp, inputs, 0 }; | |
1448 | ctf_dynhash_t *counts = (ctf_dynhash_t *) v; | |
1449 | ||
1450 | ctf_dynhash_iter_find (counts, ctf_dedup_count_types, &counters); | |
1451 | ||
1452 | /* Check for assertion failure and pass it up. */ | |
1453 | if (ctf_errno (fp) == ECTF_INTERNAL) | |
1454 | goto assert_err; | |
1455 | ||
1456 | if (counters.num_non_forwards > 1) | |
1457 | { | |
1458 | const void *hval_; | |
1459 | ||
1460 | while ((err = ctf_dynhash_cnext (counts, &j, &hval_, NULL)) == 0) | |
1461 | { | |
1462 | const char *hval = (const char *) hval_; | |
1463 | ctf_dynset_t *type_ids; | |
1464 | void *id; | |
1465 | int kind; | |
1466 | ||
1467 | /* Dig through the types in this hash to find the non-forwards | |
1468 | and mark them ambiguous. */ | |
1469 | ||
1470 | type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval); | |
1471 | ||
1472 | /* Nonexistent? Must be a forward with no referent. */ | |
1473 | if (!type_ids) | |
1474 | continue; | |
1475 | ||
1476 | id = ctf_dynset_lookup_any (type_ids); | |
1477 | ||
1478 | kind = ctf_type_kind (inputs[CTF_DEDUP_GID_TO_INPUT (id)], | |
1479 | CTF_DEDUP_GID_TO_TYPE (id)); | |
1480 | ||
1481 | if (kind != CTF_K_FORWARD) | |
1482 | { | |
1483 | ctf_dprintf ("Marking %p, with hash %s, conflicting: one " | |
1484 | "of many non-forward GIDs for %s\n", id, | |
1485 | hval, (char *) k); | |
1486 | ctf_dedup_mark_conflicting_hash (fp, hval); | |
1487 | } | |
1488 | } | |
1489 | if (err != ECTF_NEXT_END) | |
1490 | { | |
926c9e76 | 1491 | whaterr = N_("error marking conflicting structs/unions"); |
0f0c11f7 NA |
1492 | goto iterr; |
1493 | } | |
1494 | } | |
1495 | } | |
1496 | else | |
1497 | { | |
1498 | /* This is an ordinary type. Find the most common type with this | |
1499 | name, and mark it unconflicting: all others are conflicting. (We | |
1500 | cannot do this sort of popularity contest with forwardable types | |
1501 | because any forwards to that type would be immediately unified with | |
1502 | the most-popular type on insertion, and we want conflicting structs | |
1503 | et al to have all forwards left intact, so the user is notified | |
1504 | that this type is conflicting. TODO: improve this in future by | |
1505 | setting such forwards non-root-visible.) */ | |
1506 | ||
1507 | const void *key; | |
1508 | const void *count; | |
1509 | const char *hval; | |
1510 | long max_hcount = -1; | |
1511 | const char *max_hval = NULL; | |
1512 | ||
1513 | if (ctf_dynhash_elements (name_counts) <= 1) | |
1514 | continue; | |
1515 | ||
1516 | /* First find the most common. */ | |
1517 | while ((err = ctf_dynhash_cnext (name_counts, &j, &key, &count)) == 0) | |
1518 | { | |
1519 | hval = (const char *) key; | |
1520 | if ((long int) (uintptr_t) count > max_hcount) | |
1521 | { | |
1522 | max_hcount = (long int) (uintptr_t) count; | |
1523 | max_hval = hval; | |
1524 | } | |
1525 | } | |
1526 | if (err != ECTF_NEXT_END) | |
1527 | { | |
926c9e76 | 1528 | whaterr = N_("error finding commonest conflicting type"); |
0f0c11f7 NA |
1529 | goto iterr; |
1530 | } | |
1531 | ||
1532 | /* Mark all the others as conflicting. */ | |
1533 | while ((err = ctf_dynhash_cnext (name_counts, &j, &key, NULL)) == 0) | |
1534 | { | |
1535 | hval = (const char *) key; | |
1536 | if (strcmp (max_hval, hval) == 0) | |
1537 | continue; | |
1538 | ||
1539 | ctf_dprintf ("Marking %s, an uncommon hash for %s, conflicting\n", | |
1540 | hval, (const char *) k); | |
1541 | if (ctf_dedup_mark_conflicting_hash (fp, hval) < 0) | |
1542 | { | |
926c9e76 | 1543 | whaterr = N_("error marking hashes as conflicting"); |
0f0c11f7 NA |
1544 | goto err; |
1545 | } | |
1546 | } | |
1547 | if (err != ECTF_NEXT_END) | |
1548 | { | |
926c9e76 | 1549 | whaterr = N_("marking uncommon conflicting types"); |
0f0c11f7 NA |
1550 | goto iterr; |
1551 | } | |
1552 | } | |
1553 | } | |
1554 | if (err != ECTF_NEXT_END) | |
1555 | { | |
926c9e76 | 1556 | whaterr = N_("scanning for ambiguous names"); |
0f0c11f7 NA |
1557 | goto iterr; |
1558 | } | |
1559 | ||
1560 | return 0; | |
1561 | ||
1562 | err: | |
1563 | ctf_next_destroy (i); | |
926c9e76 NA |
1564 | ctf_err_warn (fp, 0, 0, "%s", gettext (whaterr)); |
1565 | return -1; /* errno is set for us. */ | |
0f0c11f7 NA |
1566 | |
1567 | iterr: | |
926c9e76 | 1568 | ctf_err_warn (fp, 0, err, _("iteration failed: %s"), gettext (whaterr)); |
0f0c11f7 NA |
1569 | return ctf_set_errno (fp, err); |
1570 | ||
1571 | assert_err: | |
1572 | ctf_next_destroy (i); | |
1573 | return -1; /* errno is set for us. */ | |
1574 | } | |
1575 | ||
1576 | /* Initialize the deduplication machinery. */ | |
1577 | ||
1578 | static int | |
139633c3 | 1579 | ctf_dedup_init (ctf_dict_t *fp) |
0f0c11f7 NA |
1580 | { |
1581 | ctf_dedup_t *d = &fp->ctf_dedup; | |
1582 | size_t i; | |
1583 | ||
1584 | if (ctf_dedup_atoms_init (fp) < 0) | |
1585 | goto oom; | |
1586 | ||
1587 | #if IDS_NEED_ALLOCATION | |
139633c3 | 1588 | if ((d->cd_id_to_dict_t = ctf_dynhash_create (ctf_hash_type_id_key, |
0f0c11f7 NA |
1589 | ctf_hash_eq_type_id_key, |
1590 | free, NULL)) == NULL) | |
1591 | goto oom; | |
1592 | #endif | |
1593 | ||
1594 | for (i = 0; i < 4; i++) | |
1595 | { | |
1596 | if ((d->cd_decorated_names[i] = ctf_dynhash_create (ctf_hash_string, | |
1597 | ctf_hash_eq_string, | |
1598 | NULL, NULL)) == NULL) | |
1599 | goto oom; | |
1600 | } | |
1601 | ||
1602 | if ((d->cd_name_counts | |
1603 | = ctf_dynhash_create (ctf_hash_string, | |
1604 | ctf_hash_eq_string, NULL, | |
1605 | (ctf_hash_free_fun) ctf_dynhash_destroy)) == NULL) | |
1606 | goto oom; | |
1607 | ||
1608 | if ((d->cd_type_hashes | |
1609 | = ctf_dynhash_create (ctf_hash_integer, | |
1610 | ctf_hash_eq_integer, | |
1611 | NULL, NULL)) == NULL) | |
1612 | goto oom; | |
1613 | ||
1614 | if ((d->cd_struct_origin | |
1615 | = ctf_dynhash_create (ctf_hash_string, | |
1616 | ctf_hash_eq_string, | |
1617 | NULL, NULL)) == NULL) | |
1618 | goto oom; | |
1619 | ||
1620 | if ((d->cd_citers | |
1621 | = ctf_dynhash_create (ctf_hash_string, | |
1622 | ctf_hash_eq_string, NULL, | |
1623 | (ctf_hash_free_fun) ctf_dynset_destroy)) == NULL) | |
1624 | goto oom; | |
1625 | ||
1626 | if ((d->cd_output_mapping | |
1627 | = ctf_dynhash_create (ctf_hash_string, | |
1628 | ctf_hash_eq_string, NULL, | |
1629 | (ctf_hash_free_fun) ctf_dynset_destroy)) == NULL) | |
1630 | goto oom; | |
1631 | ||
1632 | if ((d->cd_output_first_gid | |
1633 | = ctf_dynhash_create (ctf_hash_string, | |
1634 | ctf_hash_eq_string, | |
1635 | NULL, NULL)) == NULL) | |
1636 | goto oom; | |
1637 | ||
1638 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
1639 | if ((d->cd_output_mapping_guard | |
1640 | = ctf_dynhash_create (ctf_hash_integer, | |
1641 | ctf_hash_eq_integer, NULL, NULL)) == NULL) | |
1642 | goto oom; | |
1643 | #endif | |
1644 | ||
1645 | if ((d->cd_emission_struct_members | |
1646 | = ctf_dynhash_create (ctf_hash_integer, | |
1647 | ctf_hash_eq_integer, | |
1648 | NULL, NULL)) == NULL) | |
1649 | goto oom; | |
1650 | ||
1651 | if ((d->cd_conflicting_types | |
1652 | = ctf_dynset_create (htab_hash_string, | |
1653 | ctf_dynset_eq_string, NULL)) == NULL) | |
1654 | goto oom; | |
1655 | ||
1656 | return 0; | |
1657 | ||
1658 | oom: | |
926c9e76 NA |
1659 | ctf_err_warn (fp, 0, ENOMEM, _("ctf_dedup_init: cannot initialize: " |
1660 | "out of memory")); | |
0f0c11f7 NA |
1661 | return ctf_set_errno (fp, ENOMEM); |
1662 | } | |
1663 | ||
1664 | void | |
139633c3 | 1665 | ctf_dedup_fini (ctf_dict_t *fp, ctf_dict_t **outputs, uint32_t noutputs) |
0f0c11f7 NA |
1666 | { |
1667 | ctf_dedup_t *d = &fp->ctf_dedup; | |
1668 | size_t i; | |
1669 | ||
1670 | /* ctf_dedup_atoms is kept across links. */ | |
1671 | #if IDS_NEED_ALLOCATION | |
139633c3 | 1672 | ctf_dynhash_destroy (d->cd_id_to_dict_t); |
0f0c11f7 NA |
1673 | #endif |
1674 | for (i = 0; i < 4; i++) | |
1675 | ctf_dynhash_destroy (d->cd_decorated_names[i]); | |
1676 | ctf_dynhash_destroy (d->cd_name_counts); | |
1677 | ctf_dynhash_destroy (d->cd_type_hashes); | |
1678 | ctf_dynhash_destroy (d->cd_struct_origin); | |
1679 | ctf_dynhash_destroy (d->cd_citers); | |
1680 | ctf_dynhash_destroy (d->cd_output_mapping); | |
1681 | ctf_dynhash_destroy (d->cd_output_first_gid); | |
1682 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
1683 | ctf_dynhash_destroy (d->cd_output_mapping_guard); | |
1684 | #endif | |
1685 | ctf_dynhash_destroy (d->cd_emission_struct_members); | |
1686 | ctf_dynset_destroy (d->cd_conflicting_types); | |
1687 | ||
1688 | /* Free the per-output state. */ | |
1689 | if (outputs) | |
1690 | { | |
1691 | for (i = 0; i < noutputs; i++) | |
1692 | { | |
1693 | ctf_dedup_t *od = &outputs[i]->ctf_dedup; | |
1694 | ctf_dynhash_destroy (od->cd_output_emission_hashes); | |
1695 | ctf_dynhash_destroy (od->cd_output_emission_conflicted_forwards); | |
139633c3 | 1696 | ctf_dict_close (od->cd_output); |
0f0c11f7 NA |
1697 | } |
1698 | } | |
1699 | memset (d, 0, sizeof (ctf_dedup_t)); | |
1700 | } | |
1701 | ||
1702 | /* Return 1 if this type is cited by multiple input dictionaries. */ | |
1703 | ||
1704 | static int | |
139633c3 | 1705 | ctf_dedup_multiple_input_dicts (ctf_dict_t *output, ctf_dict_t **inputs, |
0f0c11f7 NA |
1706 | const char *hval) |
1707 | { | |
1708 | ctf_dedup_t *d = &output->ctf_dedup; | |
1709 | ctf_dynset_t *type_ids; | |
1710 | ctf_next_t *i = NULL; | |
1711 | void *id; | |
139633c3 | 1712 | ctf_dict_t *found = NULL, *relative_found = NULL; |
0f0c11f7 | 1713 | const char *type_id; |
139633c3 | 1714 | ctf_dict_t *input_fp; |
0f0c11f7 NA |
1715 | ctf_id_t input_id; |
1716 | const char *name; | |
1717 | const char *decorated; | |
1718 | int fwdkind; | |
1719 | int multiple = 0; | |
1720 | int err; | |
1721 | ||
1722 | type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval); | |
1723 | if (!ctf_assert (output, type_ids)) | |
1724 | return -1; | |
1725 | ||
1726 | /* Scan across the IDs until we find proof that two disjoint dictionaries | |
1727 | are referenced. Exit as soon as possible. Optimization opportunity, but | |
1728 | possibly not worth it, given that this is only executed in | |
1729 | CTF_LINK_SHARE_DUPLICATED mode. */ | |
1730 | ||
1731 | while ((err = ctf_dynset_next (type_ids, &i, &id)) == 0) | |
1732 | { | |
139633c3 | 1733 | ctf_dict_t *fp = inputs[CTF_DEDUP_GID_TO_INPUT (id)]; |
0f0c11f7 NA |
1734 | |
1735 | if (fp == found || fp == relative_found) | |
1736 | continue; | |
1737 | ||
1738 | if (!found) | |
1739 | { | |
1740 | found = fp; | |
1741 | continue; | |
1742 | } | |
1743 | ||
1744 | if (!relative_found | |
1745 | && (fp->ctf_parent == found || found->ctf_parent == fp)) | |
1746 | { | |
1747 | relative_found = fp; | |
1748 | continue; | |
1749 | } | |
1750 | ||
1751 | multiple = 1; | |
1752 | ctf_next_destroy (i); | |
1753 | break; | |
1754 | } | |
1755 | if ((err != ECTF_NEXT_END) && (err != 0)) | |
1756 | { | |
926c9e76 NA |
1757 | ctf_err_warn (output, 0, err, _("iteration error " |
1758 | "propagating conflictedness")); | |
1759 | return ctf_set_errno (output, err); | |
0f0c11f7 NA |
1760 | } |
1761 | ||
1762 | if (multiple) | |
1763 | return multiple; | |
1764 | ||
1765 | /* This type itself does not appear in multiple input dicts: how about another | |
1766 | related type with the same name (e.g. a forward if this is a struct, | |
1767 | etc). */ | |
1768 | ||
1769 | type_id = ctf_dynset_lookup_any (type_ids); | |
1770 | if (!ctf_assert (output, type_id)) | |
1771 | return -1; | |
1772 | ||
1773 | input_fp = inputs[CTF_DEDUP_GID_TO_INPUT (type_id)]; | |
1774 | input_id = CTF_DEDUP_GID_TO_TYPE (type_id); | |
1775 | fwdkind = ctf_type_kind_forwarded (input_fp, input_id); | |
1776 | name = ctf_type_name_raw (input_fp, input_id); | |
1777 | ||
1778 | if ((fwdkind == CTF_K_STRUCT || fwdkind == CTF_K_UNION) | |
1779 | && name && name[0] != '\0') | |
1780 | { | |
1781 | const void *origin; | |
1782 | ||
1783 | if ((decorated = ctf_decorate_type_name (output, name, | |
1784 | fwdkind)) == NULL) | |
1785 | return -1; /* errno is set for us. */ | |
1786 | ||
1787 | origin = ctf_dynhash_lookup (d->cd_struct_origin, decorated); | |
1788 | if ((origin != NULL) && (CTF_DEDUP_GID_TO_INPUT (origin) < 0)) | |
1789 | multiple = 1; | |
1790 | } | |
1791 | ||
1792 | return multiple; | |
1793 | } | |
1794 | ||
1795 | /* Demote unconflicting types which reference only one input, or which reference | |
1796 | two inputs where one input is the parent of the other, into conflicting | |
1797 | types. Only used if the link mode is CTF_LINK_SHARE_DUPLICATED. */ | |
1798 | ||
1799 | static int | |
139633c3 | 1800 | ctf_dedup_conflictify_unshared (ctf_dict_t *output, ctf_dict_t **inputs) |
0f0c11f7 NA |
1801 | { |
1802 | ctf_dedup_t *d = &output->ctf_dedup; | |
1803 | ctf_next_t *i = NULL; | |
1804 | int err; | |
1805 | const void *k; | |
1806 | ctf_dynset_t *to_mark = NULL; | |
1807 | ||
1808 | if ((to_mark = ctf_dynset_create (htab_hash_string, ctf_dynset_eq_string, | |
1809 | NULL)) == NULL) | |
1810 | goto err_no; | |
1811 | ||
1812 | while ((err = ctf_dynhash_cnext (d->cd_output_mapping, &i, &k, NULL)) == 0) | |
1813 | { | |
1814 | const char *hval = (const char *) k; | |
1815 | int conflicting; | |
1816 | ||
1817 | /* Types referenced by only one dict, with no type appearing under that | |
1818 | name elsewhere, are marked conflicting. */ | |
1819 | ||
1820 | conflicting = !ctf_dedup_multiple_input_dicts (output, inputs, hval); | |
1821 | ||
1822 | if (conflicting < 0) | |
1823 | goto err; /* errno is set for us. */ | |
1824 | ||
1825 | if (conflicting) | |
1826 | if (ctf_dynset_cinsert (to_mark, hval) < 0) | |
1827 | goto err; | |
1828 | } | |
1829 | if (err != ECTF_NEXT_END) | |
1830 | goto iterr; | |
1831 | ||
1832 | while ((err = ctf_dynset_cnext (to_mark, &i, &k)) == 0) | |
1833 | { | |
1834 | const char *hval = (const char *) k; | |
1835 | ||
1836 | if (ctf_dedup_mark_conflicting_hash (output, hval) < 0) | |
1837 | goto err; | |
1838 | } | |
1839 | if (err != ECTF_NEXT_END) | |
1840 | goto iterr; | |
1841 | ||
1842 | ctf_dynset_destroy (to_mark); | |
1843 | ||
1844 | return 0; | |
1845 | ||
1846 | err_no: | |
1847 | ctf_set_errno (output, errno); | |
1848 | err: | |
1849 | err = ctf_errno (output); | |
1850 | ctf_next_destroy (i); | |
1851 | iterr: | |
0f0c11f7 | 1852 | ctf_dynset_destroy (to_mark); |
926c9e76 NA |
1853 | ctf_err_warn (output, 0, err, _("conflictifying unshared types")); |
1854 | return ctf_set_errno (output, err); | |
0f0c11f7 NA |
1855 | } |
1856 | ||
1857 | /* The core deduplicator. Populate cd_output_mapping in the output ctf_dedup | |
1858 | with a mapping of all types that belong in this dictionary and where they | |
1859 | come from, and cd_conflicting_types with an indication of whether each type | |
1860 | is conflicted or not. OUTPUT is the top-level output: INPUTS is the array of | |
1861 | input dicts; NINPUTS is the size of that array; PARENTS is an NINPUTS-element | |
1862 | array with each element corresponding to a input which is a child dict set to | |
1863 | the number in the INPUTS array of that input's parent. | |
1864 | ||
1865 | If CU_MAPPED is set, this is a first pass for a link with a non-empty CU | |
1866 | mapping: only one output will result. | |
1867 | ||
1868 | Only deduplicates: does not emit the types into the output. Call | |
1869 | ctf_dedup_emit afterwards to do that. */ | |
1870 | ||
1871 | int | |
139633c3 | 1872 | ctf_dedup (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs, |
0f0c11f7 NA |
1873 | uint32_t *parents, int cu_mapped) |
1874 | { | |
1875 | ctf_dedup_t *d = &output->ctf_dedup; | |
1876 | size_t i; | |
1877 | ctf_next_t *it = NULL; | |
1878 | ||
1879 | for (i = 0; i < ninputs; i++) | |
1880 | ctf_dprintf ("Input %i: %s\n", (int) i, ctf_link_input_name (inputs[i])); | |
1881 | ||
1882 | if (ctf_dedup_init (output) < 0) | |
1883 | return -1; /* errno is set for us. */ | |
1884 | ||
1885 | /* Some flags do not apply when CU-mapping: this is not a duplicated link, | |
1886 | because there is only one output and we really don't want to end up marking | |
1887 | all nonconflicting but appears-only-once types as conflicting (which in the | |
1888 | CU-mapped link means we'd mark them all as non-root-visible!). */ | |
1889 | d->cd_link_flags = output->ctf_link_flags; | |
1890 | if (cu_mapped) | |
1891 | d->cd_link_flags &= ~(CTF_LINK_SHARE_DUPLICATED); | |
1892 | ||
1893 | /* Compute hash values for all types, recursively, treating child structures | |
1894 | and unions equivalent to forwards, and hashing in the name of the referent | |
1895 | of each such type into structures, unions, and non-opaque forwards. | |
1896 | Populate a mapping from decorated name (including an indication of | |
1897 | struct/union/enum namespace) to count of type hash values in | |
1898 | cd_name_counts, a mapping from and a mapping from hash values to input type | |
1899 | IDs in cd_output_mapping. */ | |
1900 | ||
1901 | ctf_dprintf ("Computing type hashes\n"); | |
1902 | for (i = 0; i < ninputs; i++) | |
1903 | { | |
1904 | ctf_id_t id; | |
1905 | ||
1906 | while ((id = ctf_type_next (inputs[i], &it, NULL, 1)) != CTF_ERR) | |
1907 | { | |
1908 | ctf_dedup_hash_type (output, inputs[i], inputs, parents, | |
1909 | i, id, 0, 0, ctf_dedup_populate_mappings); | |
1910 | } | |
1911 | if (ctf_errno (inputs[i]) != ECTF_NEXT_END) | |
1912 | { | |
926c9e76 NA |
1913 | ctf_set_errno (output, ctf_errno (inputs[i])); |
1914 | ctf_err_warn (output, 0, 0, _("iteration failure " | |
1915 | "computing type hashes")); | |
1916 | return -1; | |
0f0c11f7 NA |
1917 | } |
1918 | } | |
1919 | ||
1920 | /* Go through the cd_name_counts name->hash->count mapping for all CTF | |
1921 | namespaces: any name with many hashes associated with it at this stage is | |
1922 | necessarily ambiguous. Mark all the hashes except the most common as | |
1923 | conflicting in the output. */ | |
1924 | ||
1925 | ctf_dprintf ("Detecting type name ambiguity\n"); | |
1926 | if (ctf_dedup_detect_name_ambiguity (output, inputs) < 0) | |
1927 | return -1; /* errno is set for us. */ | |
1928 | ||
1929 | /* If the link mode is CTF_LINK_SHARE_DUPLICATED, we change any unconflicting | |
1930 | types whose output mapping references only one input dict into a | |
1931 | conflicting type, so that they end up in the per-CU dictionaries. */ | |
1932 | ||
1933 | if (d->cd_link_flags & CTF_LINK_SHARE_DUPLICATED) | |
1934 | { | |
1935 | ctf_dprintf ("Conflictifying unshared types\n"); | |
1936 | if (ctf_dedup_conflictify_unshared (output, inputs) < 0) | |
1937 | return -1; /* errno is set for us. */ | |
1938 | } | |
1939 | return 0; | |
1940 | } | |
1941 | ||
1942 | static int | |
139633c3 | 1943 | ctf_dedup_rwalk_output_mapping (ctf_dict_t *output, ctf_dict_t **inputs, |
0f0c11f7 NA |
1944 | uint32_t ninputs, uint32_t *parents, |
1945 | ctf_dynset_t *already_visited, | |
1946 | const char *hval, | |
1947 | int (*visit_fun) (const char *hval, | |
139633c3 NA |
1948 | ctf_dict_t *output, |
1949 | ctf_dict_t **inputs, | |
0f0c11f7 NA |
1950 | uint32_t ninputs, |
1951 | uint32_t *parents, | |
1952 | int already_visited, | |
139633c3 | 1953 | ctf_dict_t *input, |
0f0c11f7 NA |
1954 | ctf_id_t type, |
1955 | void *id, | |
1956 | int depth, | |
1957 | void *arg), | |
1958 | void *arg, unsigned long depth); | |
1959 | ||
1960 | /* Like ctf_dedup_rwalk_output_mapping (which see), only takes a single target | |
1961 | type and visits it. */ | |
1962 | static int | |
139633c3 NA |
1963 | ctf_dedup_rwalk_one_output_mapping (ctf_dict_t *output, |
1964 | ctf_dict_t **inputs, uint32_t ninputs, | |
0f0c11f7 NA |
1965 | uint32_t *parents, |
1966 | ctf_dynset_t *already_visited, | |
1967 | int visited, void *type_id, | |
1968 | const char *hval, | |
1969 | int (*visit_fun) (const char *hval, | |
139633c3 NA |
1970 | ctf_dict_t *output, |
1971 | ctf_dict_t **inputs, | |
0f0c11f7 NA |
1972 | uint32_t ninputs, |
1973 | uint32_t *parents, | |
1974 | int already_visited, | |
139633c3 | 1975 | ctf_dict_t *input, |
0f0c11f7 NA |
1976 | ctf_id_t type, |
1977 | void *id, | |
1978 | int depth, | |
1979 | void *arg), | |
1980 | void *arg, unsigned long depth) | |
1981 | { | |
1982 | ctf_dedup_t *d = &output->ctf_dedup; | |
139633c3 | 1983 | ctf_dict_t *fp; |
0f0c11f7 NA |
1984 | int input_num; |
1985 | ctf_id_t type; | |
1986 | int ret; | |
1987 | const char *whaterr; | |
1988 | ||
1989 | input_num = CTF_DEDUP_GID_TO_INPUT (type_id); | |
1990 | fp = inputs[input_num]; | |
1991 | type = CTF_DEDUP_GID_TO_TYPE (type_id); | |
1992 | ||
1993 | ctf_dprintf ("%lu: Starting walk over type %s, %i/%lx (%p), from %s, " | |
1994 | "kind %i\n", depth, hval, input_num, type, (void *) fp, | |
1995 | ctf_link_input_name (fp), ctf_type_kind_unsliced (fp, type)); | |
1996 | ||
1997 | /* Get the single call we do if this type has already been visited out of the | |
1998 | way. */ | |
1999 | if (visited) | |
2000 | return visit_fun (hval, output, inputs, ninputs, parents, visited, fp, | |
2001 | type, type_id, depth, arg); | |
2002 | ||
2003 | /* This macro is really ugly, but the alternative is repeating this code many | |
2004 | times, which is worse. */ | |
2005 | ||
2006 | #define CTF_TYPE_WALK(type, errlabel, errmsg) \ | |
2007 | do { \ | |
2008 | void *type_id; \ | |
2009 | const char *hashval; \ | |
2010 | int cited_type_input_num = input_num; \ | |
2011 | \ | |
2012 | if ((fp->ctf_flags & LCTF_CHILD) && (LCTF_TYPE_ISPARENT (fp, type))) \ | |
2013 | cited_type_input_num = parents[input_num]; \ | |
2014 | \ | |
2015 | type_id = CTF_DEDUP_GID (output, cited_type_input_num, type); \ | |
2016 | \ | |
2017 | if (type == 0) \ | |
2018 | { \ | |
2019 | ctf_dprintf ("Walking: unimplemented type\n"); \ | |
2020 | break; \ | |
2021 | } \ | |
2022 | \ | |
2023 | ctf_dprintf ("Looking up ID %i/%lx in type hashes\n", \ | |
2024 | cited_type_input_num, type); \ | |
2025 | hashval = ctf_dynhash_lookup (d->cd_type_hashes, type_id); \ | |
2026 | if (!ctf_assert (output, hashval)) \ | |
2027 | { \ | |
926c9e76 | 2028 | whaterr = N_("error looking up ID in type hashes"); \ |
0f0c11f7 NA |
2029 | goto errlabel; \ |
2030 | } \ | |
2031 | ctf_dprintf ("ID %i/%lx has hash %s\n", cited_type_input_num, type, \ | |
2032 | hashval); \ | |
2033 | \ | |
2034 | ret = ctf_dedup_rwalk_output_mapping (output, inputs, ninputs, parents, \ | |
2035 | already_visited, hashval, \ | |
2036 | visit_fun, arg, depth); \ | |
2037 | if (ret < 0) \ | |
2038 | { \ | |
2039 | whaterr = errmsg; \ | |
2040 | goto errlabel; \ | |
2041 | } \ | |
2042 | } while (0) | |
2043 | ||
2044 | switch (ctf_type_kind_unsliced (fp, type)) | |
2045 | { | |
2046 | case CTF_K_UNKNOWN: | |
2047 | /* Just skip things of unknown kind. */ | |
2048 | return 0; | |
2049 | case CTF_K_FORWARD: | |
2050 | case CTF_K_INTEGER: | |
2051 | case CTF_K_FLOAT: | |
2052 | case CTF_K_ENUM: | |
2053 | /* No types referenced. */ | |
2054 | break; | |
2055 | ||
2056 | case CTF_K_TYPEDEF: | |
2057 | case CTF_K_VOLATILE: | |
2058 | case CTF_K_CONST: | |
2059 | case CTF_K_RESTRICT: | |
2060 | case CTF_K_POINTER: | |
2061 | case CTF_K_SLICE: | |
2062 | CTF_TYPE_WALK (ctf_type_reference (fp, type), err, | |
926c9e76 | 2063 | N_("error during referenced type walk")); |
0f0c11f7 NA |
2064 | break; |
2065 | ||
2066 | case CTF_K_ARRAY: | |
2067 | { | |
2068 | ctf_arinfo_t ar; | |
2069 | ||
2070 | if (ctf_array_info (fp, type, &ar) < 0) | |
2071 | { | |
926c9e76 | 2072 | whaterr = N_("error during array info lookup"); |
0f0c11f7 NA |
2073 | goto err_msg; |
2074 | } | |
2075 | ||
926c9e76 NA |
2076 | CTF_TYPE_WALK (ar.ctr_contents, err, |
2077 | N_("error during array contents type walk")); | |
2078 | CTF_TYPE_WALK (ar.ctr_index, err, | |
2079 | N_("error during array index type walk")); | |
0f0c11f7 NA |
2080 | break; |
2081 | } | |
2082 | ||
2083 | case CTF_K_FUNCTION: | |
2084 | { | |
2085 | ctf_funcinfo_t fi; | |
2086 | ctf_id_t *args; | |
2087 | uint32_t j; | |
2088 | ||
2089 | if (ctf_func_type_info (fp, type, &fi) < 0) | |
2090 | { | |
926c9e76 | 2091 | whaterr = N_("error during func type info lookup"); |
0f0c11f7 NA |
2092 | goto err_msg; |
2093 | } | |
2094 | ||
926c9e76 NA |
2095 | CTF_TYPE_WALK (fi.ctc_return, err, |
2096 | N_("error during func return type walk")); | |
0f0c11f7 NA |
2097 | |
2098 | if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL) | |
2099 | { | |
926c9e76 | 2100 | whaterr = N_("error doing memory allocation"); |
0f0c11f7 NA |
2101 | goto err_msg; |
2102 | } | |
2103 | ||
2104 | if (ctf_func_type_args (fp, type, fi.ctc_argc, args) < 0) | |
2105 | { | |
926c9e76 | 2106 | whaterr = N_("error doing func arg type lookup"); |
0f0c11f7 NA |
2107 | free (args); |
2108 | goto err_msg; | |
2109 | } | |
2110 | ||
2111 | for (j = 0; j < fi.ctc_argc; j++) | |
926c9e76 NA |
2112 | CTF_TYPE_WALK (args[j], err_free_args, |
2113 | N_("error during Func arg type walk")); | |
0f0c11f7 NA |
2114 | free (args); |
2115 | break; | |
2116 | ||
2117 | err_free_args: | |
2118 | free (args); | |
2119 | goto err; | |
2120 | } | |
2121 | case CTF_K_STRUCT: | |
2122 | case CTF_K_UNION: | |
2123 | /* We do not recursively traverse the members of structures: they are | |
2124 | emitted later, in a separate pass. */ | |
2125 | break; | |
2126 | default: | |
926c9e76 NA |
2127 | whaterr = N_("CTF dict corruption: unknown type kind"); |
2128 | goto err_msg; | |
0f0c11f7 NA |
2129 | } |
2130 | ||
2131 | return visit_fun (hval, output, inputs, ninputs, parents, visited, fp, type, | |
2132 | type_id, depth, arg); | |
2133 | ||
2134 | err_msg: | |
2135 | ctf_set_errno (output, ctf_errno (fp)); | |
926c9e76 NA |
2136 | ctf_err_warn (output, 0, 0, _("%s in input file %s at type ID %lx"), |
2137 | gettext (whaterr), ctf_link_input_name (fp), type); | |
0f0c11f7 NA |
2138 | err: |
2139 | return -1; | |
2140 | } | |
2141 | /* Recursively traverse the output mapping, and do something with each type | |
2142 | visited, from leaves to root. VISIT_FUN, called as recursion unwinds, | |
2143 | returns a negative error code or zero. Type hashes may be visited more than | |
2144 | once, but are not recursed through repeatedly: ALREADY_VISITED tracks whether | |
2145 | types have already been visited. */ | |
2146 | static int | |
139633c3 | 2147 | ctf_dedup_rwalk_output_mapping (ctf_dict_t *output, ctf_dict_t **inputs, |
0f0c11f7 NA |
2148 | uint32_t ninputs, uint32_t *parents, |
2149 | ctf_dynset_t *already_visited, | |
2150 | const char *hval, | |
2151 | int (*visit_fun) (const char *hval, | |
139633c3 NA |
2152 | ctf_dict_t *output, |
2153 | ctf_dict_t **inputs, | |
0f0c11f7 NA |
2154 | uint32_t ninputs, |
2155 | uint32_t *parents, | |
2156 | int already_visited, | |
139633c3 | 2157 | ctf_dict_t *input, |
0f0c11f7 NA |
2158 | ctf_id_t type, |
2159 | void *id, | |
2160 | int depth, | |
2161 | void *arg), | |
2162 | void *arg, unsigned long depth) | |
2163 | { | |
2164 | ctf_dedup_t *d = &output->ctf_dedup; | |
2165 | ctf_next_t *i = NULL; | |
2166 | int err; | |
2167 | int visited = 1; | |
2168 | ctf_dynset_t *type_ids; | |
2169 | void *id; | |
2170 | ||
2171 | depth++; | |
2172 | ||
2173 | type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval); | |
2174 | if (!type_ids) | |
2175 | { | |
926c9e76 NA |
2176 | ctf_err_warn (output, 0, ECTF_INTERNAL, |
2177 | _("looked up type kind by nonexistent hash %s"), hval); | |
0f0c11f7 NA |
2178 | return ctf_set_errno (output, ECTF_INTERNAL); |
2179 | } | |
2180 | ||
2181 | /* Have we seen this type before? */ | |
2182 | ||
2183 | if (!ctf_dynset_exists (already_visited, hval, NULL)) | |
2184 | { | |
2185 | /* Mark as already-visited immediately, to eliminate the possibility of | |
2186 | cycles: but remember we have not actually visited it yet for the | |
2187 | upcoming call to the visit_fun. (All our callers handle cycles | |
2188 | properly themselves, so we can just abort them aggressively as soon as | |
2189 | we find ourselves in one.) */ | |
2190 | ||
2191 | visited = 0; | |
2192 | if (ctf_dynset_cinsert (already_visited, hval) < 0) | |
2193 | { | |
926c9e76 NA |
2194 | ctf_err_warn (output, 0, ENOMEM, |
2195 | _("out of memory tracking already-visited types")); | |
0f0c11f7 NA |
2196 | return ctf_set_errno (output, ENOMEM); |
2197 | } | |
2198 | } | |
2199 | ||
2200 | /* If this type is marked conflicted, traverse members and call | |
2201 | ctf_dedup_rwalk_output_mapping_once on all the unique ones: otherwise, just | |
2202 | pick a random one and use it. */ | |
2203 | ||
2204 | if (!ctf_dynset_exists (d->cd_conflicting_types, hval, NULL)) | |
2205 | { | |
2206 | id = ctf_dynset_lookup_any (type_ids); | |
2207 | if (!ctf_assert (output, id)) | |
2208 | return -1; | |
2209 | ||
2210 | return ctf_dedup_rwalk_one_output_mapping (output, inputs, ninputs, | |
2211 | parents, already_visited, | |
2212 | visited, id, hval, visit_fun, | |
2213 | arg, depth); | |
2214 | } | |
2215 | ||
2216 | while ((err = ctf_dynset_next (type_ids, &i, &id)) == 0) | |
2217 | { | |
2218 | int ret; | |
2219 | ||
2220 | ret = ctf_dedup_rwalk_one_output_mapping (output, inputs, ninputs, | |
2221 | parents, already_visited, | |
2222 | visited, id, hval, | |
2223 | visit_fun, arg, depth); | |
2224 | if (ret < 0) | |
2225 | { | |
2226 | ctf_next_destroy (i); | |
2227 | return ret; /* errno is set for us. */ | |
2228 | } | |
2229 | } | |
2230 | if (err != ECTF_NEXT_END) | |
2231 | { | |
926c9e76 | 2232 | ctf_err_warn (output, 0, err, _("cannot walk conflicted type")); |
0f0c11f7 NA |
2233 | return ctf_set_errno (output, err); |
2234 | } | |
2235 | ||
2236 | return 0; | |
2237 | } | |
2238 | ||
2239 | typedef struct ctf_sort_om_cb_arg | |
2240 | { | |
139633c3 | 2241 | ctf_dict_t **inputs; |
0f0c11f7 NA |
2242 | uint32_t ninputs; |
2243 | ctf_dedup_t *d; | |
2244 | } ctf_sort_om_cb_arg_t; | |
2245 | ||
2246 | /* Sort the output mapping into order: types first appearing in earlier inputs | |
2247 | first, parents preceding children: if types first appear in the same input, | |
2248 | sort those with earlier ctf_id_t's first. */ | |
2249 | static int | |
2250 | sort_output_mapping (const ctf_next_hkv_t *one, const ctf_next_hkv_t *two, | |
2251 | void *arg_) | |
2252 | { | |
2253 | ctf_sort_om_cb_arg_t *arg = (ctf_sort_om_cb_arg_t *) arg_; | |
2254 | ctf_dedup_t *d = arg->d; | |
2255 | const char *one_hval = (const char *) one->hkv_key; | |
2256 | const char *two_hval = (const char *) two->hkv_key; | |
2257 | void *one_gid, *two_gid; | |
2258 | uint32_t one_ninput; | |
2259 | uint32_t two_ninput; | |
139633c3 NA |
2260 | ctf_dict_t *one_fp; |
2261 | ctf_dict_t *two_fp; | |
0f0c11f7 NA |
2262 | ctf_id_t one_type; |
2263 | ctf_id_t two_type; | |
2264 | ||
2265 | one_gid = ctf_dynhash_lookup (d->cd_output_first_gid, one_hval); | |
2266 | two_gid = ctf_dynhash_lookup (d->cd_output_first_gid, two_hval); | |
2267 | ||
2268 | one_ninput = CTF_DEDUP_GID_TO_INPUT (one_gid); | |
2269 | two_ninput = CTF_DEDUP_GID_TO_INPUT (two_gid); | |
2270 | ||
2271 | one_type = CTF_DEDUP_GID_TO_TYPE (one_gid); | |
2272 | two_type = CTF_DEDUP_GID_TO_TYPE (two_gid); | |
2273 | ||
2274 | /* It's kind of hard to smuggle an assertion failure out of here. */ | |
2275 | assert (one_ninput < arg->ninputs && two_ninput < arg->ninputs); | |
2276 | ||
2277 | one_fp = arg->inputs[one_ninput]; | |
2278 | two_fp = arg->inputs[two_ninput]; | |
2279 | ||
2280 | /* Parents before children. */ | |
2281 | ||
2282 | if (!(one_fp->ctf_flags & LCTF_CHILD) | |
2283 | && (two_fp->ctf_flags & LCTF_CHILD)) | |
2284 | return -1; | |
2285 | else if ((one_fp->ctf_flags & LCTF_CHILD) | |
2286 | && !(two_fp->ctf_flags & LCTF_CHILD)) | |
2287 | return 1; | |
2288 | ||
2289 | /* ninput order, types appearing in earlier TUs first. */ | |
2290 | ||
2291 | if (one_ninput < two_ninput) | |
2292 | return -1; | |
2293 | else if (two_ninput < one_ninput) | |
2294 | return 1; | |
2295 | ||
2296 | /* Same TU. Earliest ctf_id_t first. They cannot be the same. */ | |
2297 | ||
2298 | assert (one_type != two_type); | |
2299 | if (one_type < two_type) | |
2300 | return -1; | |
2301 | else | |
2302 | return 1; | |
2303 | } | |
2304 | ||
2305 | /* The public entry point to ctf_dedup_rwalk_output_mapping, above. */ | |
2306 | static int | |
139633c3 | 2307 | ctf_dedup_walk_output_mapping (ctf_dict_t *output, ctf_dict_t **inputs, |
0f0c11f7 NA |
2308 | uint32_t ninputs, uint32_t *parents, |
2309 | int (*visit_fun) (const char *hval, | |
139633c3 NA |
2310 | ctf_dict_t *output, |
2311 | ctf_dict_t **inputs, | |
0f0c11f7 NA |
2312 | uint32_t ninputs, |
2313 | uint32_t *parents, | |
2314 | int already_visited, | |
139633c3 | 2315 | ctf_dict_t *input, |
0f0c11f7 NA |
2316 | ctf_id_t type, |
2317 | void *id, | |
2318 | int depth, | |
2319 | void *arg), | |
2320 | void *arg) | |
2321 | { | |
2322 | ctf_dynset_t *already_visited; | |
2323 | ctf_next_t *i = NULL; | |
2324 | ctf_sort_om_cb_arg_t sort_arg; | |
2325 | int err; | |
2326 | void *k; | |
2327 | ||
2328 | if ((already_visited = ctf_dynset_create (htab_hash_string, | |
2329 | ctf_dynset_eq_string, | |
2330 | NULL)) == NULL) | |
2331 | return ctf_set_errno (output, ENOMEM); | |
2332 | ||
2333 | sort_arg.inputs = inputs; | |
2334 | sort_arg.ninputs = ninputs; | |
2335 | sort_arg.d = &output->ctf_dedup; | |
2336 | ||
2337 | while ((err = ctf_dynhash_next_sorted (output->ctf_dedup.cd_output_mapping, | |
2338 | &i, &k, NULL, sort_output_mapping, | |
2339 | &sort_arg)) == 0) | |
2340 | { | |
2341 | const char *hval = (const char *) k; | |
2342 | ||
2343 | err = ctf_dedup_rwalk_output_mapping (output, inputs, ninputs, parents, | |
2344 | already_visited, hval, visit_fun, | |
2345 | arg, 0); | |
2346 | if (err < 0) | |
2347 | { | |
2348 | ctf_next_destroy (i); | |
2349 | goto err; /* errno is set for us. */ | |
2350 | } | |
2351 | } | |
2352 | if (err != ECTF_NEXT_END) | |
2353 | { | |
926c9e76 | 2354 | ctf_err_warn (output, 0, err, _("cannot recurse over output mapping")); |
0f0c11f7 NA |
2355 | ctf_set_errno (output, err); |
2356 | goto err; | |
2357 | } | |
2358 | ctf_dynset_destroy (already_visited); | |
2359 | ||
2360 | return 0; | |
2361 | err: | |
2362 | ctf_dynset_destroy (already_visited); | |
2363 | return -1; | |
2364 | } | |
2365 | ||
2366 | /* Possibly synthesise a synthetic forward in TARGET to subsitute for a | |
2367 | conflicted per-TU type ID in INPUT with hash HVAL. Return its CTF ID, or 0 | |
2368 | if none was needed. */ | |
2369 | static ctf_id_t | |
139633c3 NA |
2370 | ctf_dedup_maybe_synthesize_forward (ctf_dict_t *output, ctf_dict_t *target, |
2371 | ctf_dict_t *input, ctf_id_t id, | |
0f0c11f7 NA |
2372 | const char *hval) |
2373 | { | |
2374 | ctf_dedup_t *od = &output->ctf_dedup; | |
2375 | ctf_dedup_t *td = &target->ctf_dedup; | |
2376 | int kind; | |
2377 | int fwdkind; | |
2378 | const char *name; | |
2379 | const char *decorated; | |
2380 | void *v; | |
2381 | ctf_id_t emitted_forward; | |
2382 | ||
2383 | if (!ctf_dynset_exists (od->cd_conflicting_types, hval, NULL) | |
2384 | || target->ctf_flags & LCTF_CHILD | |
2385 | || !ctf_type_name_raw (input, id) | |
2386 | || (((kind = ctf_type_kind_unsliced (input, id)) != CTF_K_STRUCT | |
2387 | && kind != CTF_K_UNION && kind != CTF_K_FORWARD))) | |
2388 | return 0; | |
2389 | ||
2390 | fwdkind = ctf_type_kind_forwarded (input, id); | |
2391 | name = ctf_type_name_raw (input, id); | |
2392 | ||
2393 | ctf_dprintf ("Using synthetic forward for conflicted struct/union with " | |
2394 | "hval %s\n", hval); | |
2395 | ||
2396 | if (!ctf_assert (output, name)) | |
2397 | return CTF_ERR; | |
2398 | ||
2399 | if ((decorated = ctf_decorate_type_name (output, name, fwdkind)) == NULL) | |
2400 | return CTF_ERR; | |
2401 | ||
2402 | if (!ctf_dynhash_lookup_kv (td->cd_output_emission_conflicted_forwards, | |
2403 | decorated, NULL, &v)) | |
2404 | { | |
2405 | if ((emitted_forward = ctf_add_forward (target, CTF_ADD_ROOT, name, | |
2406 | fwdkind)) == CTF_ERR) | |
2407 | { | |
2408 | ctf_set_errno (output, ctf_errno (target)); | |
2409 | return CTF_ERR; | |
2410 | } | |
2411 | ||
2412 | if (ctf_dynhash_cinsert (td->cd_output_emission_conflicted_forwards, | |
2413 | decorated, (void *) (uintptr_t) | |
2414 | emitted_forward) < 0) | |
2415 | { | |
2416 | ctf_set_errno (output, ENOMEM); | |
2417 | return CTF_ERR; | |
2418 | } | |
2419 | } | |
2420 | else | |
2421 | emitted_forward = (ctf_id_t) (uintptr_t) v; | |
2422 | ||
2423 | ctf_dprintf ("Cross-TU conflicted struct: passing back forward, %lx\n", | |
2424 | emitted_forward); | |
2425 | ||
2426 | return emitted_forward; | |
2427 | } | |
2428 | ||
2429 | /* Map a GID in some INPUT dict, in the form of an input number and a ctf_id_t, | |
2430 | into a GID in a target output dict. If it returns 0, this is the | |
2431 | unimplemented type, and the input type must have been 0. The OUTPUT dict is | |
2432 | assumed to be the parent of the TARGET, if it is not the TARGET itself. | |
2433 | ||
2434 | Returns CTF_ERR on failure. Responds to an incoming CTF_ERR as an 'id' by | |
2435 | returning CTF_ERR, to simplify callers. Errors are always propagated to the | |
2436 | input, even if they relate to the target, for the same reason. (Target | |
2437 | errors are expected to be very rare.) | |
2438 | ||
2439 | If the type in question is a citation of a conflicted type in a different TU, | |
2440 | emit a forward of the right type in its place (if not already emitted), and | |
2441 | record that forward in cd_output_emission_conflicted_forwards. This avoids | |
2442 | the need to replicate the entire type graph below this point in the current | |
2443 | TU (an appalling waste of space). | |
2444 | ||
2445 | TODO: maybe replace forwards in the same TU with their referents? Might | |
2446 | make usability a bit better. */ | |
2447 | ||
2448 | static ctf_id_t | |
139633c3 NA |
2449 | ctf_dedup_id_to_target (ctf_dict_t *output, ctf_dict_t *target, |
2450 | ctf_dict_t **inputs, uint32_t ninputs, | |
2451 | uint32_t *parents, ctf_dict_t *input, int input_num, | |
0f0c11f7 NA |
2452 | ctf_id_t id) |
2453 | { | |
2454 | ctf_dedup_t *od = &output->ctf_dedup; | |
2455 | ctf_dedup_t *td = &target->ctf_dedup; | |
139633c3 | 2456 | ctf_dict_t *err_fp = input; |
0f0c11f7 NA |
2457 | const char *hval; |
2458 | void *target_id; | |
2459 | ctf_id_t emitted_forward; | |
2460 | ||
2461 | /* The target type of an error is an error. */ | |
2462 | if (id == CTF_ERR) | |
2463 | return CTF_ERR; | |
2464 | ||
2465 | /* The unimplemented type's ID never changes. */ | |
2466 | if (!id) | |
2467 | { | |
2468 | ctf_dprintf ("%i/%lx: unimplemented type\n", input_num, id); | |
2469 | return 0; | |
2470 | } | |
2471 | ||
2472 | ctf_dprintf ("Mapping %i/%lx to target %p (%s)\n", input_num, | |
2473 | id, (void *) target, ctf_link_input_name (target)); | |
2474 | ||
2475 | /* If the input type is in the parent type space, and this is a child, reset | |
2476 | the input to the parent (which must already have been emitted, since | |
2477 | emission of parent dicts happens before children). */ | |
2478 | if ((input->ctf_flags & LCTF_CHILD) && (LCTF_TYPE_ISPARENT (input, id))) | |
2479 | { | |
2480 | if (!ctf_assert (output, parents[input_num] <= ninputs)) | |
2481 | return -1; | |
2482 | input = inputs[parents[input_num]]; | |
2483 | input_num = parents[input_num]; | |
2484 | } | |
2485 | ||
2486 | hval = ctf_dynhash_lookup (od->cd_type_hashes, | |
2487 | CTF_DEDUP_GID (output, input_num, id)); | |
2488 | ||
2489 | if (!ctf_assert (output, hval && td->cd_output_emission_hashes)) | |
2490 | return -1; | |
2491 | ||
2492 | /* If this type is a conflicted tagged structure, union, or forward, | |
2493 | substitute a synthetic forward instead, emitting it if need be. Only do | |
2494 | this if the target is in the parent dict: if it's in the child dict, we can | |
2495 | just point straight at the thing itself. Of course, we might be looking in | |
2496 | the child dict right now and not find it and have to look in the parent, so | |
2497 | we have to do this check twice. */ | |
2498 | ||
2499 | emitted_forward = ctf_dedup_maybe_synthesize_forward (output, target, | |
2500 | input, id, hval); | |
2501 | switch (emitted_forward) | |
2502 | { | |
2503 | case 0: /* No forward needed. */ | |
2504 | break; | |
2505 | case -1: | |
2506 | ctf_set_errno (err_fp, ctf_errno (output)); | |
926c9e76 NA |
2507 | ctf_err_warn (err_fp, 0, 0, _("cannot add synthetic forward for type " |
2508 | "%i/%lx"), input_num, id); | |
0f0c11f7 NA |
2509 | return -1; |
2510 | default: | |
2511 | return emitted_forward; | |
2512 | } | |
2513 | ||
2514 | ctf_dprintf ("Looking up %i/%lx, hash %s, in target\n", input_num, id, hval); | |
2515 | ||
2516 | target_id = ctf_dynhash_lookup (td->cd_output_emission_hashes, hval); | |
2517 | if (!target_id) | |
2518 | { | |
2519 | /* Must be in the parent, so this must be a child, and they must not be | |
2520 | the same dict. */ | |
2521 | ctf_dprintf ("Checking shared parent for target\n"); | |
2522 | if (!ctf_assert (output, (target != output) | |
2523 | && (target->ctf_flags & LCTF_CHILD))) | |
2524 | return -1; | |
2525 | ||
2526 | target_id = ctf_dynhash_lookup (od->cd_output_emission_hashes, hval); | |
2527 | ||
2528 | emitted_forward = ctf_dedup_maybe_synthesize_forward (output, output, | |
2529 | input, id, hval); | |
2530 | switch (emitted_forward) | |
2531 | { | |
2532 | case 0: /* No forward needed. */ | |
2533 | break; | |
2534 | case -1: | |
926c9e76 NA |
2535 | ctf_err_warn (err_fp, 0, ctf_errno (output), |
2536 | _("cannot add synthetic forward for type %i/%lx"), | |
2537 | input_num, id); | |
2538 | return ctf_set_errno (err_fp, ctf_errno (output)); | |
0f0c11f7 NA |
2539 | default: |
2540 | return emitted_forward; | |
2541 | } | |
2542 | } | |
2543 | if (!ctf_assert (output, target_id)) | |
2544 | return -1; | |
2545 | return (ctf_id_t) (uintptr_t) target_id; | |
2546 | } | |
2547 | ||
2548 | /* Emit a single deduplicated TYPE with the given HVAL, located in a given | |
2549 | INPUT, with the given (G)ID, into the shared OUTPUT or a | |
2550 | possibly-newly-created per-CU dict. All the types this type depends upon | |
2551 | have already been emitted. (This type itself may also have been emitted.) | |
2552 | ||
2553 | If the ARG is 1, this is a CU-mapped deduplication round mapping many | |
139633c3 | 2554 | ctf_dict_t's into precisely one: conflicting types should be marked |
0f0c11f7 NA |
2555 | non-root-visible. If the ARG is 0, conflicting types go into per-CU |
2556 | dictionaries stored in the input's ctf_dedup.cd_output: otherwise, everything | |
2557 | is emitted directly into the output. No struct/union members are emitted. | |
2558 | ||
2559 | Optimization opportunity: trace the ancestry of non-root-visible types and | |
2560 | elide all that neither have a root-visible type somewhere towards their root, | |
2561 | nor have the type visible via any other route (the function info section, | |
2562 | data object section, backtrace section etc). */ | |
2563 | ||
2564 | static int | |
139633c3 | 2565 | ctf_dedup_emit_type (const char *hval, ctf_dict_t *output, ctf_dict_t **inputs, |
0f0c11f7 | 2566 | uint32_t ninputs, uint32_t *parents, int already_visited, |
139633c3 | 2567 | ctf_dict_t *input, ctf_id_t type, void *id, int depth, |
0f0c11f7 NA |
2568 | void *arg) |
2569 | { | |
2570 | ctf_dedup_t *d = &output->ctf_dedup; | |
2571 | int kind = ctf_type_kind_unsliced (input, type); | |
2572 | const char *name; | |
139633c3 NA |
2573 | ctf_dict_t *target = output; |
2574 | ctf_dict_t *real_input; | |
0f0c11f7 NA |
2575 | const ctf_type_t *tp; |
2576 | int input_num = CTF_DEDUP_GID_TO_INPUT (id); | |
2577 | int output_num = (uint32_t) -1; /* 'shared' */ | |
2578 | int cu_mapped = *(int *)arg; | |
2579 | int isroot = 1; | |
2580 | int is_conflicting; | |
2581 | ||
2582 | ctf_next_t *i = NULL; | |
2583 | ctf_id_t new_type; | |
2584 | ctf_id_t ref; | |
2585 | ctf_id_t maybe_dup = 0; | |
2586 | ctf_encoding_t ep; | |
926c9e76 | 2587 | const char *errtype; |
0f0c11f7 NA |
2588 | int emission_hashed = 0; |
2589 | ||
2590 | /* We don't want to re-emit something we've already emitted. */ | |
2591 | ||
2592 | if (already_visited) | |
2593 | return 0; | |
2594 | ||
2595 | ctf_dprintf ("%i: Emitting type with hash %s from %s: determining target\n", | |
2596 | depth, hval, ctf_link_input_name (input)); | |
2597 | ||
2598 | /* Conflicting types go into a per-CU output dictionary, unless this is a | |
2599 | CU-mapped run. The import is not refcounted, since it goes into the | |
2600 | ctf_link_outputs dict of the output that is its parent. */ | |
2601 | is_conflicting = ctf_dynset_exists (d->cd_conflicting_types, hval, NULL); | |
2602 | ||
2603 | if (is_conflicting && !cu_mapped) | |
2604 | { | |
2605 | ctf_dprintf ("%i: Type %s in %i/%lx is conflicted: " | |
2606 | "inserting into per-CU target.\n", | |
2607 | depth, hval, input_num, type); | |
2608 | ||
2609 | if (input->ctf_dedup.cd_output) | |
2610 | target = input->ctf_dedup.cd_output; | |
2611 | else | |
2612 | { | |
2613 | int err; | |
2614 | ||
2615 | if ((target = ctf_create (&err)) == NULL) | |
2616 | { | |
926c9e76 NA |
2617 | ctf_err_warn (output, 0, err, |
2618 | _("cannot create per-CU CTF archive for CU %s"), | |
2619 | ctf_link_input_name (input)); | |
2620 | return ctf_set_errno (output, err); | |
0f0c11f7 NA |
2621 | } |
2622 | ||
2623 | ctf_import_unref (target, output); | |
2624 | if (ctf_cuname (input) != NULL) | |
2625 | ctf_cuname_set (target, ctf_cuname (input)); | |
2626 | else | |
2627 | ctf_cuname_set (target, "unnamed-CU"); | |
2628 | ctf_parent_name_set (target, _CTF_SECTION); | |
2629 | ||
2630 | input->ctf_dedup.cd_output = target; | |
2631 | } | |
2632 | output_num = input_num; | |
2633 | } | |
2634 | ||
2635 | real_input = input; | |
2636 | if ((tp = ctf_lookup_by_id (&real_input, type)) == NULL) | |
2637 | { | |
926c9e76 NA |
2638 | ctf_err_warn (output, 0, ctf_errno (input), |
2639 | _("%s: lookup failure for type %lx"), | |
2640 | ctf_link_input_name (real_input), type); | |
2641 | return ctf_set_errno (output, ctf_errno (input)); | |
0f0c11f7 NA |
2642 | } |
2643 | ||
2644 | name = ctf_strraw (real_input, tp->ctt_name); | |
2645 | ||
2646 | /* Hide conflicting types, if we were asked to: also hide if a type with this | |
2647 | name already exists and is not a forward. */ | |
2648 | if (cu_mapped && is_conflicting) | |
2649 | isroot = 0; | |
2650 | else if (name | |
2651 | && (maybe_dup = ctf_lookup_by_rawname (target, kind, name)) != 0) | |
2652 | { | |
2653 | if (ctf_type_kind (target, maybe_dup) != CTF_K_FORWARD) | |
2654 | isroot = 0; | |
2655 | } | |
2656 | ||
2657 | ctf_dprintf ("%i: Emitting type with hash %s (%s), into target %i/%p\n", | |
2658 | depth, hval, name ? name : "", input_num, (void *) target); | |
2659 | ||
2660 | if (!target->ctf_dedup.cd_output_emission_hashes) | |
2661 | if ((target->ctf_dedup.cd_output_emission_hashes | |
2662 | = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, | |
2663 | NULL, NULL)) == NULL) | |
2664 | goto oom_hash; | |
2665 | ||
2666 | if (!target->ctf_dedup.cd_output_emission_conflicted_forwards) | |
2667 | if ((target->ctf_dedup.cd_output_emission_conflicted_forwards | |
2668 | = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string, | |
2669 | NULL, NULL)) == NULL) | |
2670 | goto oom_hash; | |
2671 | ||
2672 | switch (kind) | |
2673 | { | |
2674 | case CTF_K_UNKNOWN: | |
2675 | /* These are types that CTF cannot encode, marked as such by the compile. | |
2676 | We intentionally do not re-emit these. */ | |
2677 | new_type = 0; | |
2678 | break; | |
2679 | case CTF_K_FORWARD: | |
2680 | /* This will do nothing if the type to which this forwards already exists, | |
2681 | and will be replaced with such a type if it appears later. */ | |
2682 | ||
926c9e76 | 2683 | errtype = _("forward"); |
0f0c11f7 NA |
2684 | if ((new_type = ctf_add_forward (target, isroot, name, |
2685 | ctf_type_kind_forwarded (input, type))) | |
2686 | == CTF_ERR) | |
2687 | goto err_target; | |
2688 | break; | |
2689 | ||
2690 | case CTF_K_FLOAT: | |
2691 | case CTF_K_INTEGER: | |
926c9e76 | 2692 | errtype = _("float/int"); |
0f0c11f7 NA |
2693 | if (ctf_type_encoding (input, type, &ep) < 0) |
2694 | goto err_input; /* errno is set for us. */ | |
2695 | if ((new_type = ctf_add_encoded (target, isroot, name, &ep, kind)) | |
2696 | == CTF_ERR) | |
2697 | goto err_target; | |
2698 | break; | |
2699 | ||
2700 | case CTF_K_ENUM: | |
2701 | { | |
2702 | int val; | |
926c9e76 | 2703 | errtype = _("enum"); |
0f0c11f7 NA |
2704 | if ((new_type = ctf_add_enum (target, isroot, name)) == CTF_ERR) |
2705 | goto err_input; /* errno is set for us. */ | |
2706 | ||
2707 | while ((name = ctf_enum_next (input, type, &i, &val)) != NULL) | |
2708 | { | |
2709 | if (ctf_add_enumerator (target, new_type, name, val) < 0) | |
2710 | { | |
926c9e76 NA |
2711 | ctf_err_warn (target, 0, ctf_errno (target), |
2712 | _("%s (%i): cannot add enumeration value %s " | |
2713 | "from input type %lx"), | |
0f0c11f7 | 2714 | ctf_link_input_name (input), input_num, name, |
926c9e76 | 2715 | type); |
0f0c11f7 NA |
2716 | ctf_next_destroy (i); |
2717 | return ctf_set_errno (output, ctf_errno (target)); | |
2718 | } | |
2719 | } | |
2720 | if (ctf_errno (input) != ECTF_NEXT_END) | |
2721 | goto err_input; | |
2722 | break; | |
2723 | } | |
2724 | ||
2725 | case CTF_K_TYPEDEF: | |
926c9e76 | 2726 | errtype = _("typedef"); |
0f0c11f7 NA |
2727 | |
2728 | ref = ctf_type_reference (input, type); | |
2729 | if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs, | |
2730 | parents, input, input_num, | |
2731 | ref)) == CTF_ERR) | |
2732 | goto err_input; /* errno is set for us. */ | |
2733 | ||
2734 | if ((new_type = ctf_add_typedef (target, isroot, name, ref)) == CTF_ERR) | |
2735 | goto err_target; /* errno is set for us. */ | |
2736 | break; | |
2737 | ||
2738 | case CTF_K_VOLATILE: | |
2739 | case CTF_K_CONST: | |
2740 | case CTF_K_RESTRICT: | |
2741 | case CTF_K_POINTER: | |
926c9e76 | 2742 | errtype = _("pointer or cvr-qual"); |
0f0c11f7 NA |
2743 | |
2744 | ref = ctf_type_reference (input, type); | |
2745 | if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs, | |
2746 | parents, input, input_num, | |
2747 | ref)) == CTF_ERR) | |
2748 | goto err_input; /* errno is set for us. */ | |
2749 | ||
2750 | if ((new_type = ctf_add_reftype (target, isroot, ref, kind)) == CTF_ERR) | |
2751 | goto err_target; /* errno is set for us. */ | |
2752 | break; | |
2753 | ||
2754 | case CTF_K_SLICE: | |
926c9e76 | 2755 | errtype = _("slice"); |
0f0c11f7 NA |
2756 | |
2757 | if (ctf_type_encoding (input, type, &ep) < 0) | |
2758 | goto err_input; /* errno is set for us. */ | |
2759 | ||
2760 | ref = ctf_type_reference (input, type); | |
2761 | if ((ref = ctf_dedup_id_to_target (output, target, inputs, ninputs, | |
2762 | parents, input, input_num, | |
2763 | ref)) == CTF_ERR) | |
2764 | goto err_input; | |
2765 | ||
2766 | if ((new_type = ctf_add_slice (target, isroot, ref, &ep)) == CTF_ERR) | |
2767 | goto err_target; | |
2768 | break; | |
2769 | ||
2770 | case CTF_K_ARRAY: | |
2771 | { | |
2772 | ctf_arinfo_t ar; | |
2773 | ||
926c9e76 | 2774 | errtype = _("array info"); |
0f0c11f7 NA |
2775 | if (ctf_array_info (input, type, &ar) < 0) |
2776 | goto err_input; | |
2777 | ||
2778 | ar.ctr_contents = ctf_dedup_id_to_target (output, target, inputs, | |
2779 | ninputs, parents, input, | |
2780 | input_num, ar.ctr_contents); | |
2781 | ar.ctr_index = ctf_dedup_id_to_target (output, target, inputs, ninputs, | |
2782 | parents, input, input_num, | |
2783 | ar.ctr_index); | |
2784 | ||
2785 | if (ar.ctr_contents == CTF_ERR || ar.ctr_index == CTF_ERR) | |
2786 | goto err_input; | |
2787 | ||
2788 | if ((new_type = ctf_add_array (target, isroot, &ar)) == CTF_ERR) | |
2789 | goto err_target; | |
2790 | ||
2791 | break; | |
2792 | } | |
2793 | ||
2794 | case CTF_K_FUNCTION: | |
2795 | { | |
2796 | ctf_funcinfo_t fi; | |
2797 | ctf_id_t *args; | |
2798 | uint32_t j; | |
2799 | ||
926c9e76 | 2800 | errtype = _("function"); |
0f0c11f7 NA |
2801 | if (ctf_func_type_info (input, type, &fi) < 0) |
2802 | goto err_input; | |
2803 | ||
2804 | fi.ctc_return = ctf_dedup_id_to_target (output, target, inputs, ninputs, | |
2805 | parents, input, input_num, | |
2806 | fi.ctc_return); | |
2807 | if (fi.ctc_return == CTF_ERR) | |
2808 | goto err_input; | |
2809 | ||
2810 | if ((args = calloc (fi.ctc_argc, sizeof (ctf_id_t))) == NULL) | |
2811 | { | |
2812 | ctf_set_errno (input, ENOMEM); | |
2813 | goto err_input; | |
2814 | } | |
2815 | ||
926c9e76 | 2816 | errtype = _("function args"); |
0f0c11f7 NA |
2817 | if (ctf_func_type_args (input, type, fi.ctc_argc, args) < 0) |
2818 | { | |
2819 | free (args); | |
2820 | goto err_input; | |
2821 | } | |
2822 | ||
2823 | for (j = 0; j < fi.ctc_argc; j++) | |
2824 | { | |
2825 | args[j] = ctf_dedup_id_to_target (output, target, inputs, ninputs, | |
2826 | parents, input, input_num, | |
2827 | args[j]); | |
2828 | if (args[j] == CTF_ERR) | |
2829 | goto err_input; | |
2830 | } | |
2831 | ||
2832 | if ((new_type = ctf_add_function (target, isroot, | |
2833 | &fi, args)) == CTF_ERR) | |
2834 | { | |
2835 | free (args); | |
2836 | goto err_target; | |
2837 | } | |
2838 | free (args); | |
2839 | break; | |
2840 | } | |
2841 | ||
2842 | case CTF_K_STRUCT: | |
2843 | case CTF_K_UNION: | |
2844 | { | |
2845 | size_t size = ctf_type_size (input, type); | |
2846 | void *out_id; | |
2847 | /* Insert the structure itself, so other types can refer to it. */ | |
2848 | ||
926c9e76 | 2849 | errtype = _("structure/union"); |
0f0c11f7 NA |
2850 | if (kind == CTF_K_STRUCT) |
2851 | new_type = ctf_add_struct_sized (target, isroot, name, size); | |
2852 | else | |
2853 | new_type = ctf_add_union_sized (target, isroot, name, size); | |
2854 | ||
2855 | if (new_type == CTF_ERR) | |
2856 | goto err_target; | |
2857 | ||
2858 | out_id = CTF_DEDUP_GID (output, output_num, new_type); | |
2859 | ctf_dprintf ("%i: Noting need to emit members of %p -> %p\n", depth, | |
2860 | id, out_id); | |
2861 | /* Record the need to emit the members of this structure later. */ | |
2862 | if (ctf_dynhash_insert (d->cd_emission_struct_members, id, out_id) < 0) | |
2863 | goto err_target; | |
2864 | break; | |
2865 | } | |
2866 | default: | |
926c9e76 NA |
2867 | ctf_err_warn (output, 0, ECTF_CORRUPT, _("%s: unknown type kind for " |
2868 | "input type %lx"), | |
0f0c11f7 | 2869 | ctf_link_input_name (input), type); |
926c9e76 | 2870 | return ctf_set_errno (output, ECTF_CORRUPT); |
0f0c11f7 NA |
2871 | } |
2872 | ||
2873 | if (!emission_hashed | |
2874 | && new_type != 0 | |
2875 | && ctf_dynhash_cinsert (target->ctf_dedup.cd_output_emission_hashes, | |
2876 | hval, (void *) (uintptr_t) new_type) < 0) | |
2877 | { | |
926c9e76 NA |
2878 | ctf_err_warn (output, 0, ENOMEM, _("out of memory tracking deduplicated " |
2879 | "global type IDs")); | |
0f0c11f7 NA |
2880 | return ctf_set_errno (output, ENOMEM); |
2881 | } | |
2882 | ||
2883 | if (!emission_hashed && new_type != 0) | |
2884 | ctf_dprintf ("%i: Inserted %s, %i/%lx -> %lx into emission hash for " | |
2885 | "target %p (%s)\n", depth, hval, input_num, type, new_type, | |
2886 | (void *) target, ctf_link_input_name (target)); | |
2887 | ||
2888 | return 0; | |
2889 | ||
2890 | oom_hash: | |
926c9e76 NA |
2891 | ctf_err_warn (output, 0, ENOMEM, _("out of memory creating emission-tracking " |
2892 | "hashes")); | |
0f0c11f7 NA |
2893 | return ctf_set_errno (output, ENOMEM); |
2894 | ||
2895 | err_input: | |
926c9e76 NA |
2896 | ctf_err_warn (output, 0, ctf_errno (input), |
2897 | _("%s (%i): while emitting deduplicated %s, error getting " | |
2898 | "input type %lx"), ctf_link_input_name (input), | |
2899 | input_num, errtype, type); | |
2900 | return ctf_set_errno (output, ctf_errno (input)); | |
0f0c11f7 | 2901 | err_target: |
926c9e76 NA |
2902 | ctf_err_warn (output, 0, ctf_errno (target), |
2903 | _("%s (%i): while emitting deduplicated %s, error emitting " | |
2904 | "target type from input type %lx"), | |
2905 | ctf_link_input_name (input), input_num, | |
2906 | errtype, type); | |
2907 | return ctf_set_errno (output, ctf_errno (target)); | |
0f0c11f7 NA |
2908 | } |
2909 | ||
2910 | /* Traverse the cd_emission_struct_members and emit the members of all | |
2911 | structures and unions. All other types are emitted and complete by this | |
2912 | point. */ | |
2913 | ||
2914 | static int | |
139633c3 | 2915 | ctf_dedup_emit_struct_members (ctf_dict_t *output, ctf_dict_t **inputs, |
0f0c11f7 NA |
2916 | uint32_t ninputs, uint32_t *parents) |
2917 | { | |
2918 | ctf_dedup_t *d = &output->ctf_dedup; | |
2919 | ctf_next_t *i = NULL; | |
2920 | void *input_id, *target_id; | |
2921 | int err; | |
139633c3 | 2922 | ctf_dict_t *err_fp, *input_fp; |
0f0c11f7 NA |
2923 | int input_num; |
2924 | ctf_id_t err_type; | |
2925 | ||
2926 | while ((err = ctf_dynhash_next (d->cd_emission_struct_members, &i, | |
2927 | &input_id, &target_id)) == 0) | |
2928 | { | |
2929 | ctf_next_t *j = NULL; | |
139633c3 | 2930 | ctf_dict_t *target; |
0f0c11f7 NA |
2931 | uint32_t target_num; |
2932 | ctf_id_t input_type, target_type; | |
2933 | ssize_t offset; | |
2934 | ctf_id_t membtype; | |
2935 | const char *name; | |
2936 | ||
2937 | input_num = CTF_DEDUP_GID_TO_INPUT (input_id); | |
2938 | input_fp = inputs[input_num]; | |
2939 | input_type = CTF_DEDUP_GID_TO_TYPE (input_id); | |
2940 | ||
2941 | /* The output is either -1 (for the shared, parent output dict) or the | |
2942 | number of the corresponding input. */ | |
2943 | target_num = CTF_DEDUP_GID_TO_INPUT (target_id); | |
2944 | if (target_num == (uint32_t) -1) | |
2945 | target = output; | |
2946 | else | |
2947 | { | |
2948 | target = inputs[target_num]->ctf_dedup.cd_output; | |
2949 | if (!ctf_assert (output, target)) | |
2950 | { | |
2951 | err_fp = output; | |
2952 | err_type = input_type; | |
2953 | goto err_target; | |
2954 | } | |
2955 | } | |
2956 | target_type = CTF_DEDUP_GID_TO_TYPE (target_id); | |
2957 | ||
2958 | while ((offset = ctf_member_next (input_fp, input_type, &j, &name, | |
2959 | &membtype)) >= 0) | |
2960 | { | |
2961 | err_fp = target; | |
2962 | err_type = target_type; | |
2963 | if ((membtype = ctf_dedup_id_to_target (output, target, inputs, | |
2964 | ninputs, parents, input_fp, | |
2965 | input_num, | |
2966 | membtype)) == CTF_ERR) | |
2967 | { | |
2968 | ctf_next_destroy (j); | |
2969 | goto err_target; | |
2970 | } | |
2971 | ||
2972 | if (name == NULL) | |
2973 | name = ""; | |
2974 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
2975 | ctf_dprintf ("Emitting %s, offset %zi\n", name, offset); | |
2976 | #endif | |
2977 | if (ctf_add_member_offset (target, target_type, name, | |
2978 | membtype, offset) < 0) | |
2979 | { | |
2980 | ctf_next_destroy (j); | |
2981 | goto err_target; | |
2982 | } | |
2983 | } | |
2984 | if (ctf_errno (input_fp) != ECTF_NEXT_END) | |
2985 | { | |
2986 | err = ctf_errno (input_fp); | |
2987 | ctf_next_destroy (i); | |
2988 | goto iterr; | |
2989 | } | |
2990 | } | |
2991 | if (err != ECTF_NEXT_END) | |
2992 | goto iterr; | |
2993 | ||
2994 | return 0; | |
2995 | err_target: | |
2996 | ctf_next_destroy (i); | |
926c9e76 NA |
2997 | ctf_err_warn (output, 0, ctf_errno (err_fp), |
2998 | _("%s (%i): error emitting members for structure type %lx"), | |
2999 | ctf_link_input_name (input_fp), input_num, err_type); | |
3000 | return ctf_set_errno (output, ctf_errno (err_fp)); | |
0f0c11f7 | 3001 | iterr: |
926c9e76 NA |
3002 | ctf_err_warn (output, 0, err, _("iteration failure emitting " |
3003 | "structure members")); | |
3004 | return ctf_set_errno (output, err); | |
0f0c11f7 NA |
3005 | } |
3006 | ||
3007 | /* Populate the type mapping used by the types in one FP (which must be an input | |
3008 | dict containing a non-null cd_output resulting from a ctf_dedup_emit_type | |
3009 | walk). */ | |
3010 | static int | |
139633c3 NA |
3011 | ctf_dedup_populate_type_mapping (ctf_dict_t *shared, ctf_dict_t *fp, |
3012 | ctf_dict_t **inputs) | |
0f0c11f7 NA |
3013 | { |
3014 | ctf_dedup_t *d = &shared->ctf_dedup; | |
139633c3 | 3015 | ctf_dict_t *output = fp->ctf_dedup.cd_output; |
0f0c11f7 NA |
3016 | const void *k, *v; |
3017 | ctf_next_t *i = NULL; | |
3018 | int err; | |
3019 | ||
3020 | /* The shared dict (the output) stores its types in the fp itself, not in a | |
3021 | separate cd_output dict. */ | |
3022 | if (shared == fp) | |
3023 | output = fp; | |
3024 | ||
3025 | /* There may be no types to emit at all, or all the types in this TU may be | |
3026 | shared. */ | |
3027 | if (!output || !output->ctf_dedup.cd_output_emission_hashes) | |
3028 | return 0; | |
3029 | ||
3030 | while ((err = ctf_dynhash_cnext (output->ctf_dedup.cd_output_emission_hashes, | |
3031 | &i, &k, &v)) == 0) | |
3032 | { | |
3033 | const char *hval = (const char *) k; | |
3034 | ctf_id_t id_out = (ctf_id_t) (uintptr_t) v; | |
3035 | ctf_next_t *j = NULL; | |
3036 | ctf_dynset_t *type_ids; | |
3037 | const void *id; | |
3038 | ||
3039 | type_ids = ctf_dynhash_lookup (d->cd_output_mapping, hval); | |
3040 | if (!ctf_assert (shared, type_ids)) | |
3041 | return -1; | |
3042 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
3043 | ctf_dprintf ("Traversing emission hash: hval %s\n", hval); | |
3044 | #endif | |
3045 | ||
3046 | while ((err = ctf_dynset_cnext (type_ids, &j, &id)) == 0) | |
3047 | { | |
139633c3 | 3048 | ctf_dict_t *input = inputs[CTF_DEDUP_GID_TO_INPUT (id)]; |
0f0c11f7 NA |
3049 | ctf_id_t id_in = CTF_DEDUP_GID_TO_TYPE (id); |
3050 | ||
3051 | #ifdef ENABLE_LIBCTF_HASH_DEBUGGING | |
3052 | ctf_dprintf ("Adding mapping from %i/%lx to %lx\n", | |
3053 | CTF_DEDUP_GID_TO_INPUT (id), id_in, id_out); | |
3054 | #endif | |
3055 | ctf_add_type_mapping (input, id_in, output, id_out); | |
3056 | } | |
3057 | if (err != ECTF_NEXT_END) | |
3058 | { | |
3059 | ctf_next_destroy (i); | |
3060 | goto err; | |
3061 | } | |
3062 | } | |
3063 | if (err != ECTF_NEXT_END) | |
3064 | goto err; | |
3065 | ||
3066 | return 0; | |
3067 | ||
3068 | err: | |
926c9e76 | 3069 | ctf_err_warn (shared, 0, err, _("iteration error populating the type mapping")); |
0f0c11f7 NA |
3070 | return ctf_set_errno (shared, err); |
3071 | } | |
3072 | ||
3073 | /* Populate the type mapping machinery used by the rest of the linker, | |
3074 | by ctf_add_type, etc. */ | |
3075 | static int | |
139633c3 | 3076 | ctf_dedup_populate_type_mappings (ctf_dict_t *output, ctf_dict_t **inputs, |
0f0c11f7 NA |
3077 | uint32_t ninputs) |
3078 | { | |
3079 | size_t i; | |
3080 | ||
3081 | if (ctf_dedup_populate_type_mapping (output, output, inputs) < 0) | |
3082 | { | |
926c9e76 NA |
3083 | ctf_err_warn (output, 0, 0, _("cannot populate type mappings for shared " |
3084 | "CTF dict")); | |
0f0c11f7 NA |
3085 | return -1; /* errno is set for us. */ |
3086 | } | |
3087 | ||
3088 | for (i = 0; i < ninputs; i++) | |
3089 | { | |
3090 | if (ctf_dedup_populate_type_mapping (output, inputs[i], inputs) < 0) | |
3091 | { | |
926c9e76 NA |
3092 | ctf_err_warn (output, 0, ctf_errno (inputs[i]), |
3093 | _("cannot populate type mappings for per-CU CTF dict")); | |
0f0c11f7 NA |
3094 | return ctf_set_errno (output, ctf_errno (inputs[i])); |
3095 | } | |
3096 | } | |
3097 | ||
3098 | return 0; | |
3099 | } | |
3100 | ||
3101 | /* Emit deduplicated types into the outputs. The shared type repository is | |
3102 | OUTPUT, on which the ctf_dedup function must have already been called. The | |
3103 | PARENTS array contains the INPUTS index of the parent dict for every child | |
3104 | dict at the corresponding index in the INPUTS (for non-child dicts, the value | |
3105 | is undefined). | |
3106 | ||
3107 | Return an array of fps with content emitted into them (starting with OUTPUT, | |
3108 | which is the parent of all others, then all the newly-generated outputs). | |
3109 | ||
3110 | If CU_MAPPED is set, this is a first pass for a link with a non-empty CU | |
3111 | mapping: only one output will result. */ | |
3112 | ||
139633c3 NA |
3113 | ctf_dict_t ** |
3114 | ctf_dedup_emit (ctf_dict_t *output, ctf_dict_t **inputs, uint32_t ninputs, | |
0f0c11f7 NA |
3115 | uint32_t *parents, uint32_t *noutputs, int cu_mapped) |
3116 | { | |
3117 | size_t num_outputs = 1; /* Always at least one output: us. */ | |
139633c3 NA |
3118 | ctf_dict_t **outputs; |
3119 | ctf_dict_t **walk; | |
0f0c11f7 NA |
3120 | size_t i; |
3121 | ||
3122 | ctf_dprintf ("Triggering emission.\n"); | |
3123 | if (ctf_dedup_walk_output_mapping (output, inputs, ninputs, parents, | |
3124 | ctf_dedup_emit_type, &cu_mapped) < 0) | |
3125 | return NULL; /* errno is set for us. */ | |
3126 | ||
3127 | ctf_dprintf ("Populating struct members.\n"); | |
3128 | if (ctf_dedup_emit_struct_members (output, inputs, ninputs, parents) < 0) | |
3129 | return NULL; /* errno is set for us. */ | |
3130 | ||
3131 | if (ctf_dedup_populate_type_mappings (output, inputs, ninputs) < 0) | |
3132 | return NULL; /* errno is set for us. */ | |
3133 | ||
3134 | for (i = 0; i < ninputs; i++) | |
3135 | { | |
3136 | if (inputs[i]->ctf_dedup.cd_output) | |
3137 | num_outputs++; | |
3138 | } | |
3139 | ||
3140 | if (!ctf_assert (output, !cu_mapped || (cu_mapped && num_outputs == 1))) | |
3141 | return NULL; | |
3142 | ||
139633c3 | 3143 | if ((outputs = calloc (num_outputs, sizeof (ctf_dict_t *))) == NULL) |
0f0c11f7 | 3144 | { |
926c9e76 NA |
3145 | ctf_err_warn (output, 0, ENOMEM, |
3146 | _("out of memory allocating link outputs array")); | |
0f0c11f7 NA |
3147 | ctf_set_errno (output, ENOMEM); |
3148 | return NULL; | |
3149 | } | |
3150 | *noutputs = num_outputs; | |
3151 | ||
3152 | walk = outputs; | |
3153 | *walk = output; | |
3154 | output->ctf_refcnt++; | |
3155 | walk++; | |
3156 | ||
3157 | for (i = 0; i < ninputs; i++) | |
3158 | { | |
3159 | if (inputs[i]->ctf_dedup.cd_output) | |
3160 | { | |
3161 | *walk = inputs[i]->ctf_dedup.cd_output; | |
3162 | inputs[i]->ctf_dedup.cd_output = NULL; | |
3163 | walk++; | |
3164 | } | |
3165 | } | |
3166 | ||
3167 | ctf_dedup_fini (output, outputs, num_outputs); | |
3168 | return outputs; | |
3169 | } |