Commit | Line | Data |
---|---|---|
dbe717ef ILT |
1 | // script.cc -- handle linker scripts for gold. |
2 | ||
3 | #include "gold.h" | |
4 | ||
5 | #include <string> | |
6 | #include <vector> | |
dbe717ef ILT |
7 | #include <cstdio> |
8 | #include <cstdlib> | |
9 | ||
10 | #include "options.h" | |
11 | #include "fileread.h" | |
12 | #include "workqueue.h" | |
13 | #include "readsyms.h" | |
14 | #include "yyscript.h" | |
15 | #include "script.h" | |
16 | #include "script-c.h" | |
17 | ||
18 | namespace gold | |
19 | { | |
20 | ||
21 | // A token read from a script file. We don't implement keywords here; | |
22 | // all keywords are simply represented as a string. | |
23 | ||
24 | class Token | |
25 | { | |
26 | public: | |
27 | // Token classification. | |
28 | enum Classification | |
29 | { | |
30 | // Token is invalid. | |
31 | TOKEN_INVALID, | |
32 | // Token indicates end of input. | |
33 | TOKEN_EOF, | |
34 | // Token is a string of characters. | |
35 | TOKEN_STRING, | |
36 | // Token is an operator. | |
37 | TOKEN_OPERATOR, | |
38 | // Token is a number (an integer). | |
39 | TOKEN_INTEGER | |
40 | }; | |
41 | ||
42 | // We need an empty constructor so that we can put this STL objects. | |
43 | Token() | |
44 | : classification_(TOKEN_INVALID), value_(), opcode_(0), | |
45 | lineno_(0), charpos_(0) | |
46 | { } | |
47 | ||
48 | // A general token with no value. | |
49 | Token(Classification classification, int lineno, int charpos) | |
50 | : classification_(classification), value_(), opcode_(0), | |
51 | lineno_(lineno), charpos_(charpos) | |
a3ad94ed ILT |
52 | { |
53 | gold_assert(classification == TOKEN_INVALID | |
54 | || classification == TOKEN_EOF); | |
55 | } | |
dbe717ef ILT |
56 | |
57 | // A general token with a value. | |
58 | Token(Classification classification, const std::string& value, | |
59 | int lineno, int charpos) | |
60 | : classification_(classification), value_(value), opcode_(0), | |
61 | lineno_(lineno), charpos_(charpos) | |
a3ad94ed ILT |
62 | { |
63 | gold_assert(classification != TOKEN_INVALID | |
64 | && classification != TOKEN_EOF); | |
65 | } | |
dbe717ef ILT |
66 | |
67 | // A token representing a string of characters. | |
68 | Token(const std::string& s, int lineno, int charpos) | |
69 | : classification_(TOKEN_STRING), value_(s), opcode_(0), | |
70 | lineno_(lineno), charpos_(charpos) | |
71 | { } | |
72 | ||
73 | // A token representing an operator. | |
74 | Token(int opcode, int lineno, int charpos) | |
75 | : classification_(TOKEN_OPERATOR), value_(), opcode_(opcode), | |
76 | lineno_(lineno), charpos_(charpos) | |
77 | { } | |
78 | ||
79 | // Return whether the token is invalid. | |
80 | bool | |
81 | is_invalid() const | |
82 | { return this->classification_ == TOKEN_INVALID; } | |
83 | ||
84 | // Return whether this is an EOF token. | |
85 | bool | |
86 | is_eof() const | |
87 | { return this->classification_ == TOKEN_EOF; } | |
88 | ||
89 | // Return the token classification. | |
90 | Classification | |
91 | classification() const | |
92 | { return this->classification_; } | |
93 | ||
94 | // Return the line number at which the token starts. | |
95 | int | |
96 | lineno() const | |
97 | { return this->lineno_; } | |
98 | ||
99 | // Return the character position at this the token starts. | |
100 | int | |
101 | charpos() const | |
102 | { return this->charpos_; } | |
103 | ||
104 | // Get the value of a token. | |
105 | ||
106 | const std::string& | |
107 | string_value() const | |
108 | { | |
a3ad94ed | 109 | gold_assert(this->classification_ == TOKEN_STRING); |
dbe717ef ILT |
110 | return this->value_; |
111 | } | |
112 | ||
113 | int | |
114 | operator_value() const | |
115 | { | |
a3ad94ed | 116 | gold_assert(this->classification_ == TOKEN_OPERATOR); |
dbe717ef ILT |
117 | return this->opcode_; |
118 | } | |
119 | ||
120 | int64_t | |
121 | integer_value() const | |
122 | { | |
a3ad94ed | 123 | gold_assert(this->classification_ == TOKEN_INTEGER); |
dbe717ef ILT |
124 | return strtoll(this->value_.c_str(), NULL, 0); |
125 | } | |
126 | ||
127 | private: | |
128 | // The token classification. | |
129 | Classification classification_; | |
130 | // The token value, for TOKEN_STRING or TOKEN_INTEGER. | |
131 | std::string value_; | |
132 | // The token value, for TOKEN_OPERATOR. | |
133 | int opcode_; | |
134 | // The line number where this token started (one based). | |
135 | int lineno_; | |
136 | // The character position within the line where this token started | |
137 | // (one based). | |
138 | int charpos_; | |
139 | }; | |
140 | ||
141 | // This class handles lexing a file into a sequence of tokens. We | |
142 | // don't expect linker scripts to be large, so we just read them and | |
143 | // tokenize them all at once. | |
144 | ||
145 | class Lex | |
146 | { | |
147 | public: | |
148 | Lex(Input_file* input_file) | |
149 | : input_file_(input_file), tokens_() | |
150 | { } | |
151 | ||
152 | // Tokenize the file. Return the final token, which will be either | |
153 | // an invalid token or an EOF token. An invalid token indicates | |
154 | // that tokenization failed. | |
155 | Token | |
156 | tokenize(); | |
157 | ||
158 | // A token sequence. | |
159 | typedef std::vector<Token> Token_sequence; | |
160 | ||
161 | // Return the tokens. | |
162 | const Token_sequence& | |
163 | tokens() const | |
164 | { return this->tokens_; } | |
165 | ||
166 | private: | |
167 | Lex(const Lex&); | |
168 | Lex& operator=(const Lex&); | |
169 | ||
170 | // Read the file into a string buffer. | |
171 | void | |
172 | read_file(std::string*); | |
173 | ||
174 | // Make a general token with no value at the current location. | |
175 | Token | |
176 | make_token(Token::Classification c, const char* p) const | |
177 | { return Token(c, this->lineno_, p - this->linestart_ + 1); } | |
178 | ||
179 | // Make a general token with a value at the current location. | |
180 | Token | |
181 | make_token(Token::Classification c, const std::string& v, const char* p) | |
182 | const | |
183 | { return Token(c, v, this->lineno_, p - this->linestart_ + 1); } | |
184 | ||
185 | // Make an operator token at the current location. | |
186 | Token | |
187 | make_token(int opcode, const char* p) const | |
188 | { return Token(opcode, this->lineno_, p - this->linestart_ + 1); } | |
189 | ||
190 | // Make an invalid token at the current location. | |
191 | Token | |
192 | make_invalid_token(const char* p) | |
193 | { return this->make_token(Token::TOKEN_INVALID, p); } | |
194 | ||
195 | // Make an EOF token at the current location. | |
196 | Token | |
197 | make_eof_token(const char* p) | |
198 | { return this->make_token(Token::TOKEN_EOF, p); } | |
199 | ||
200 | // Return whether C can be the first character in a name. C2 is the | |
201 | // next character, since we sometimes need that. | |
202 | static inline bool | |
203 | can_start_name(char c, char c2); | |
204 | ||
205 | // Return whether C can appear in a name which has already started. | |
206 | static inline bool | |
207 | can_continue_name(char c); | |
208 | ||
209 | // Return whether C, C2, C3 can start a hex number. | |
210 | static inline bool | |
211 | can_start_hex(char c, char c2, char c3); | |
212 | ||
213 | // Return whether C can appear in a hex number. | |
214 | static inline bool | |
215 | can_continue_hex(char c); | |
216 | ||
217 | // Return whether C can start a non-hex number. | |
218 | static inline bool | |
219 | can_start_number(char c); | |
220 | ||
221 | // Return whether C can appear in a non-hex number. | |
222 | static inline bool | |
223 | can_continue_number(char c) | |
224 | { return Lex::can_start_number(c); } | |
225 | ||
226 | // If C1 C2 C3 form a valid three character operator, return the | |
227 | // opcode. Otherwise return 0. | |
228 | static inline int | |
229 | three_char_operator(char c1, char c2, char c3); | |
230 | ||
231 | // If C1 C2 form a valid two character operator, return the opcode. | |
232 | // Otherwise return 0. | |
233 | static inline int | |
234 | two_char_operator(char c1, char c2); | |
235 | ||
236 | // If C1 is a valid one character operator, return the opcode. | |
237 | // Otherwise return 0. | |
238 | static inline int | |
239 | one_char_operator(char c1); | |
240 | ||
241 | // Read the next token. | |
242 | Token | |
243 | get_token(const char**); | |
244 | ||
245 | // Skip a C style /* */ comment. Return false if the comment did | |
246 | // not end. | |
247 | bool | |
248 | skip_c_comment(const char**); | |
249 | ||
250 | // Skip a line # comment. Return false if there was no newline. | |
251 | bool | |
252 | skip_line_comment(const char**); | |
253 | ||
254 | // Build a token CLASSIFICATION from all characters that match | |
255 | // CAN_CONTINUE_FN. The token starts at START. Start matching from | |
256 | // MATCH. Set *PP to the character following the token. | |
257 | inline Token | |
258 | gather_token(Token::Classification, bool (*can_continue_fn)(char), | |
259 | const char* start, const char* match, const char** pp); | |
260 | ||
261 | // Build a token from a quoted string. | |
262 | Token | |
263 | gather_quoted_string(const char** pp); | |
264 | ||
265 | // The file we are reading. | |
266 | Input_file* input_file_; | |
267 | // The token sequence we create. | |
268 | Token_sequence tokens_; | |
269 | // The current line number. | |
270 | int lineno_; | |
271 | // The start of the current line in the buffer. | |
272 | const char* linestart_; | |
273 | }; | |
274 | ||
275 | // Read the whole file into memory. We don't expect linker scripts to | |
276 | // be large, so we just use a std::string as a buffer. We ignore the | |
277 | // data we've already read, so that we read aligned buffers. | |
278 | ||
279 | void | |
280 | Lex::read_file(std::string* contents) | |
281 | { | |
282 | contents->clear(); | |
283 | off_t off = 0; | |
284 | off_t got; | |
285 | unsigned char buf[BUFSIZ]; | |
286 | do | |
287 | { | |
288 | this->input_file_->file().read(off, sizeof buf, buf, &got); | |
289 | contents->append(reinterpret_cast<char*>(&buf[0]), got); | |
290 | } | |
291 | while (got == sizeof buf); | |
292 | } | |
293 | ||
294 | // Return whether C can be the start of a name, if the next character | |
295 | // is C2. A name can being with a letter, underscore, period, or | |
296 | // dollar sign. Because a name can be a file name, we also permit | |
297 | // forward slash, backslash, and tilde. Tilde is the tricky case | |
298 | // here; GNU ld also uses it as a bitwise not operator. It is only | |
299 | // recognized as the operator if it is not immediately followed by | |
300 | // some character which can appear in a symbol. That is, "~0" is a | |
301 | // symbol name, and "~ 0" is an expression using bitwise not. We are | |
302 | // compatible. | |
303 | ||
304 | inline bool | |
305 | Lex::can_start_name(char c, char c2) | |
306 | { | |
307 | switch (c) | |
308 | { | |
309 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
310 | case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': | |
311 | case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R': | |
312 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
313 | case 'Y': case 'Z': | |
314 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
315 | case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
316 | case 'm': case 'n': case 'o': case 'q': case 'p': case 'r': | |
317 | case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
318 | case 'y': case 'z': | |
319 | case '_': case '.': case '$': case '/': case '\\': | |
320 | return true; | |
321 | ||
322 | case '~': | |
323 | return can_continue_name(c2); | |
324 | ||
325 | default: | |
326 | return false; | |
327 | } | |
328 | } | |
329 | ||
330 | // Return whether C can continue a name which has already started. | |
331 | // Subsequent characters in a name are the same as the leading | |
332 | // characters, plus digits and "=+-:[],?*". So in general the linker | |
333 | // script language requires spaces around operators. | |
334 | ||
335 | inline bool | |
336 | Lex::can_continue_name(char c) | |
337 | { | |
338 | switch (c) | |
339 | { | |
340 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
341 | case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': | |
342 | case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R': | |
343 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
344 | case 'Y': case 'Z': | |
345 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
346 | case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
347 | case 'm': case 'n': case 'o': case 'q': case 'p': case 'r': | |
348 | case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
349 | case 'y': case 'z': | |
350 | case '_': case '.': case '$': case '/': case '\\': | |
351 | case '~': | |
352 | case '0': case '1': case '2': case '3': case '4': | |
353 | case '5': case '6': case '7': case '8': case '9': | |
354 | case '=': case '+': case '-': case ':': case '[': case ']': | |
355 | case ',': case '?': case '*': | |
356 | return true; | |
357 | ||
358 | default: | |
359 | return false; | |
360 | } | |
361 | } | |
362 | ||
363 | // For a number we accept 0x followed by hex digits, or any sequence | |
364 | // of digits. The old linker accepts leading '$' for hex, and | |
365 | // trailing HXBOD. Those are for MRI compatibility and we don't | |
366 | // accept them. The old linker also accepts trailing MK for mega or | |
367 | // kilo. Those are mentioned in the documentation, and we accept | |
368 | // them. | |
369 | ||
370 | // Return whether C1 C2 C3 can start a hex number. | |
371 | ||
372 | inline bool | |
373 | Lex::can_start_hex(char c1, char c2, char c3) | |
374 | { | |
375 | if (c1 == '0' && (c2 == 'x' || c2 == 'X')) | |
376 | return Lex::can_continue_hex(c3); | |
377 | return false; | |
378 | } | |
379 | ||
380 | // Return whether C can appear in a hex number. | |
381 | ||
382 | inline bool | |
383 | Lex::can_continue_hex(char c) | |
384 | { | |
385 | switch (c) | |
386 | { | |
387 | case '0': case '1': case '2': case '3': case '4': | |
388 | case '5': case '6': case '7': case '8': case '9': | |
389 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
390 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
391 | return true; | |
392 | ||
393 | default: | |
394 | return false; | |
395 | } | |
396 | } | |
397 | ||
398 | // Return whether C can start a non-hex number. | |
399 | ||
400 | inline bool | |
401 | Lex::can_start_number(char c) | |
402 | { | |
403 | switch (c) | |
404 | { | |
405 | case '0': case '1': case '2': case '3': case '4': | |
406 | case '5': case '6': case '7': case '8': case '9': | |
407 | return true; | |
408 | ||
409 | default: | |
410 | return false; | |
411 | } | |
412 | } | |
413 | ||
414 | // If C1 C2 C3 form a valid three character operator, return the | |
415 | // opcode (defined in the yyscript.h file generated from yyscript.y). | |
416 | // Otherwise return 0. | |
417 | ||
418 | inline int | |
419 | Lex::three_char_operator(char c1, char c2, char c3) | |
420 | { | |
421 | switch (c1) | |
422 | { | |
423 | case '<': | |
424 | if (c2 == '<' && c3 == '=') | |
425 | return LSHIFTEQ; | |
426 | break; | |
427 | case '>': | |
428 | if (c2 == '>' && c3 == '=') | |
429 | return RSHIFTEQ; | |
430 | break; | |
431 | default: | |
432 | break; | |
433 | } | |
434 | return 0; | |
435 | } | |
436 | ||
437 | // If C1 C2 form a valid two character operator, return the opcode | |
438 | // (defined in the yyscript.h file generated from yyscript.y). | |
439 | // Otherwise return 0. | |
440 | ||
441 | inline int | |
442 | Lex::two_char_operator(char c1, char c2) | |
443 | { | |
444 | switch (c1) | |
445 | { | |
446 | case '=': | |
447 | if (c2 == '=') | |
448 | return EQ; | |
449 | break; | |
450 | case '!': | |
451 | if (c2 == '=') | |
452 | return NE; | |
453 | break; | |
454 | case '+': | |
455 | if (c2 == '=') | |
456 | return PLUSEQ; | |
457 | break; | |
458 | case '-': | |
459 | if (c2 == '=') | |
460 | return MINUSEQ; | |
461 | break; | |
462 | case '*': | |
463 | if (c2 == '=') | |
464 | return MULTEQ; | |
465 | break; | |
466 | case '/': | |
467 | if (c2 == '=') | |
468 | return DIVEQ; | |
469 | break; | |
470 | case '|': | |
471 | if (c2 == '=') | |
472 | return OREQ; | |
473 | if (c2 == '|') | |
474 | return OROR; | |
475 | break; | |
476 | case '&': | |
477 | if (c2 == '=') | |
478 | return ANDEQ; | |
479 | if (c2 == '&') | |
480 | return ANDAND; | |
481 | break; | |
482 | case '>': | |
483 | if (c2 == '=') | |
484 | return GE; | |
485 | if (c2 == '>') | |
486 | return RSHIFT; | |
487 | break; | |
488 | case '<': | |
489 | if (c2 == '=') | |
490 | return LE; | |
491 | if (c2 == '<') | |
492 | return LSHIFT; | |
493 | break; | |
494 | default: | |
495 | break; | |
496 | } | |
497 | return 0; | |
498 | } | |
499 | ||
500 | // If C1 is a valid operator, return the opcode. Otherwise return 0. | |
501 | ||
502 | inline int | |
503 | Lex::one_char_operator(char c1) | |
504 | { | |
505 | switch (c1) | |
506 | { | |
507 | case '+': | |
508 | case '-': | |
509 | case '*': | |
510 | case '/': | |
511 | case '%': | |
512 | case '!': | |
513 | case '&': | |
514 | case '|': | |
515 | case '^': | |
516 | case '~': | |
517 | case '<': | |
518 | case '>': | |
519 | case '=': | |
520 | case '?': | |
521 | case ',': | |
522 | case '(': | |
523 | case ')': | |
524 | case '{': | |
525 | case '}': | |
526 | case '[': | |
527 | case ']': | |
528 | case ':': | |
529 | case ';': | |
530 | return c1; | |
531 | default: | |
532 | return 0; | |
533 | } | |
534 | } | |
535 | ||
536 | // Skip a C style comment. *PP points to just after the "/*". Return | |
537 | // false if the comment did not end. | |
538 | ||
539 | bool | |
540 | Lex::skip_c_comment(const char** pp) | |
541 | { | |
542 | const char* p = *pp; | |
543 | while (p[0] != '*' || p[1] != '/') | |
544 | { | |
545 | if (*p == '\0') | |
546 | { | |
547 | *pp = p; | |
548 | return false; | |
549 | } | |
550 | ||
551 | if (*p == '\n') | |
552 | { | |
553 | ++this->lineno_; | |
554 | this->linestart_ = p + 1; | |
555 | } | |
556 | ++p; | |
557 | } | |
558 | ||
559 | *pp = p + 2; | |
560 | return true; | |
561 | } | |
562 | ||
563 | // Skip a line # comment. Return false if there was no newline. | |
564 | ||
565 | bool | |
566 | Lex::skip_line_comment(const char** pp) | |
567 | { | |
568 | const char* p = *pp; | |
569 | size_t skip = strcspn(p, "\n"); | |
570 | if (p[skip] == '\0') | |
571 | { | |
572 | *pp = p + skip; | |
573 | return false; | |
574 | } | |
575 | ||
576 | p += skip + 1; | |
577 | ++this->lineno_; | |
578 | this->linestart_ = p; | |
579 | *pp = p; | |
580 | ||
581 | return true; | |
582 | } | |
583 | ||
584 | // Build a token CLASSIFICATION from all characters that match | |
585 | // CAN_CONTINUE_FN. Update *PP. | |
586 | ||
587 | inline Token | |
588 | Lex::gather_token(Token::Classification classification, | |
589 | bool (*can_continue_fn)(char), | |
590 | const char* start, | |
591 | const char* match, | |
592 | const char **pp) | |
593 | { | |
594 | while ((*can_continue_fn)(*match)) | |
595 | ++match; | |
596 | *pp = match; | |
597 | return this->make_token(classification, | |
598 | std::string(start, match - start), | |
599 | start); | |
600 | } | |
601 | ||
602 | // Build a token from a quoted string. | |
603 | ||
604 | Token | |
605 | Lex::gather_quoted_string(const char** pp) | |
606 | { | |
607 | const char* start = *pp; | |
608 | const char* p = start; | |
609 | ++p; | |
610 | size_t skip = strcspn(p, "\"\n"); | |
611 | if (p[skip] != '"') | |
612 | return this->make_invalid_token(start); | |
613 | *pp = p + skip + 1; | |
614 | return this->make_token(Token::TOKEN_STRING, | |
615 | std::string(p, skip), | |
616 | start); | |
617 | } | |
618 | ||
619 | // Return the next token at *PP. Update *PP. General guideline: we | |
620 | // require linker scripts to be simple ASCII. No unicode linker | |
621 | // scripts. In particular we can assume that any '\0' is the end of | |
622 | // the input. | |
623 | ||
624 | Token | |
625 | Lex::get_token(const char** pp) | |
626 | { | |
627 | const char* p = *pp; | |
628 | ||
629 | while (true) | |
630 | { | |
631 | if (*p == '\0') | |
632 | { | |
633 | *pp = p; | |
634 | return this->make_eof_token(p); | |
635 | } | |
636 | ||
637 | // Skip whitespace quickly. | |
638 | while (*p == ' ' || *p == '\t') | |
639 | ++p; | |
640 | ||
641 | if (*p == '\n') | |
642 | { | |
643 | ++p; | |
644 | ++this->lineno_; | |
645 | this->linestart_ = p; | |
646 | continue; | |
647 | } | |
648 | ||
649 | // Skip C style comments. | |
650 | if (p[0] == '/' && p[1] == '*') | |
651 | { | |
652 | int lineno = this->lineno_; | |
653 | int charpos = p - this->linestart_ + 1; | |
654 | ||
655 | *pp = p + 2; | |
656 | if (!this->skip_c_comment(pp)) | |
657 | return Token(Token::TOKEN_INVALID, lineno, charpos); | |
658 | p = *pp; | |
659 | ||
660 | continue; | |
661 | } | |
662 | ||
663 | // Skip line comments. | |
664 | if (*p == '#') | |
665 | { | |
666 | *pp = p + 1; | |
667 | if (!this->skip_line_comment(pp)) | |
668 | return this->make_eof_token(p); | |
669 | p = *pp; | |
670 | continue; | |
671 | } | |
672 | ||
673 | // Check for a name. | |
674 | if (Lex::can_start_name(p[0], p[1])) | |
675 | return this->gather_token(Token::TOKEN_STRING, | |
676 | Lex::can_continue_name, | |
677 | p, p + 2, pp); | |
678 | ||
679 | // We accept any arbitrary name in double quotes, as long as it | |
680 | // does not cross a line boundary. | |
681 | if (*p == '"') | |
682 | { | |
683 | *pp = p; | |
684 | return this->gather_quoted_string(pp); | |
685 | } | |
686 | ||
687 | // Check for a number. | |
688 | ||
689 | if (Lex::can_start_hex(p[0], p[1], p[2])) | |
690 | return this->gather_token(Token::TOKEN_INTEGER, | |
691 | Lex::can_continue_hex, | |
692 | p, p + 3, pp); | |
693 | ||
694 | if (Lex::can_start_number(p[0])) | |
695 | return this->gather_token(Token::TOKEN_INTEGER, | |
696 | Lex::can_continue_number, | |
697 | p, p + 1, pp); | |
698 | ||
699 | // Check for operators. | |
700 | ||
701 | int opcode = Lex::three_char_operator(p[0], p[1], p[2]); | |
702 | if (opcode != 0) | |
703 | { | |
704 | *pp = p + 3; | |
705 | return this->make_token(opcode, p); | |
706 | } | |
707 | ||
708 | opcode = Lex::two_char_operator(p[0], p[1]); | |
709 | if (opcode != 0) | |
710 | { | |
711 | *pp = p + 2; | |
712 | return this->make_token(opcode, p); | |
713 | } | |
714 | ||
715 | opcode = Lex::one_char_operator(p[0]); | |
716 | if (opcode != 0) | |
717 | { | |
718 | *pp = p + 1; | |
719 | return this->make_token(opcode, p); | |
720 | } | |
721 | ||
722 | return this->make_token(Token::TOKEN_INVALID, p); | |
723 | } | |
724 | } | |
725 | ||
726 | // Tokenize the file. Return the final token. | |
727 | ||
728 | Token | |
729 | Lex::tokenize() | |
730 | { | |
731 | std::string contents; | |
732 | this->read_file(&contents); | |
733 | ||
734 | const char* p = contents.c_str(); | |
735 | ||
736 | this->lineno_ = 1; | |
737 | this->linestart_ = p; | |
738 | ||
739 | while (true) | |
740 | { | |
741 | Token t(this->get_token(&p)); | |
742 | ||
743 | // Don't let an early null byte fool us into thinking that we've | |
744 | // reached the end of the file. | |
745 | if (t.is_eof() | |
746 | && static_cast<size_t>(p - contents.c_str()) < contents.length()) | |
747 | t = this->make_invalid_token(p); | |
748 | ||
749 | if (t.is_invalid() || t.is_eof()) | |
750 | return t; | |
751 | ||
752 | this->tokens_.push_back(t); | |
753 | } | |
754 | } | |
755 | ||
756 | // A trivial task which waits for THIS_BLOCKER to be clear and then | |
757 | // clears NEXT_BLOCKER. THIS_BLOCKER may be NULL. | |
758 | ||
759 | class Script_unblock : public Task | |
760 | { | |
761 | public: | |
762 | Script_unblock(Task_token* this_blocker, Task_token* next_blocker) | |
763 | : this_blocker_(this_blocker), next_blocker_(next_blocker) | |
764 | { } | |
765 | ||
766 | ~Script_unblock() | |
767 | { | |
768 | if (this->this_blocker_ != NULL) | |
769 | delete this->this_blocker_; | |
770 | } | |
771 | ||
772 | Is_runnable_type | |
773 | is_runnable(Workqueue*) | |
774 | { | |
775 | if (this->this_blocker_ != NULL && this->this_blocker_->is_blocked()) | |
776 | return IS_BLOCKED; | |
777 | return IS_RUNNABLE; | |
778 | } | |
779 | ||
780 | Task_locker* | |
781 | locks(Workqueue* workqueue) | |
782 | { | |
783 | return new Task_locker_block(*this->next_blocker_, workqueue); | |
784 | } | |
785 | ||
786 | void | |
787 | run(Workqueue*) | |
788 | { } | |
789 | ||
790 | private: | |
791 | Task_token* this_blocker_; | |
792 | Task_token* next_blocker_; | |
793 | }; | |
794 | ||
795 | // This class holds data passed through the parser to the lexer and to | |
796 | // the parser support functions. This avoids global variables. We | |
797 | // can't use global variables because we need not be called in the | |
798 | // main thread. | |
799 | ||
800 | class Parser_closure | |
801 | { | |
802 | public: | |
803 | Parser_closure(const char* filename, | |
804 | const Position_dependent_options& posdep_options, | |
805 | bool in_group, | |
806 | const Lex::Token_sequence* tokens) | |
807 | : filename_(filename), posdep_options_(posdep_options), | |
808 | in_group_(in_group), tokens_(tokens), | |
809 | next_token_index_(0), inputs_(NULL) | |
810 | { } | |
811 | ||
812 | // Return the file name. | |
813 | const char* | |
814 | filename() const | |
815 | { return this->filename_; } | |
816 | ||
817 | // Return the position dependent options. The caller may modify | |
818 | // this. | |
819 | Position_dependent_options& | |
820 | position_dependent_options() | |
821 | { return this->posdep_options_; } | |
822 | ||
823 | // Return whether this script is being run in a group. | |
824 | bool | |
825 | in_group() const | |
826 | { return this->in_group_; } | |
827 | ||
828 | // Whether we are at the end of the token list. | |
829 | bool | |
830 | at_eof() const | |
831 | { return this->next_token_index_ >= this->tokens_->size(); } | |
832 | ||
833 | // Return the next token. | |
834 | const Token* | |
835 | next_token() | |
836 | { | |
837 | const Token* ret = &(*this->tokens_)[this->next_token_index_]; | |
838 | ++this->next_token_index_; | |
839 | return ret; | |
840 | } | |
841 | ||
842 | // Return the list of input files, creating it if necessary. This | |
843 | // is a space leak--we never free the INPUTS_ pointer. | |
844 | Input_arguments* | |
845 | inputs() | |
846 | { | |
847 | if (this->inputs_ == NULL) | |
848 | this->inputs_ = new Input_arguments(); | |
849 | return this->inputs_; | |
850 | } | |
851 | ||
852 | // Return whether we saw any input files. | |
853 | bool | |
854 | saw_inputs() const | |
855 | { return this->inputs_ != NULL && !this->inputs_->empty(); } | |
856 | ||
857 | private: | |
858 | // The name of the file we are reading. | |
859 | const char* filename_; | |
860 | // The position dependent options. | |
861 | Position_dependent_options posdep_options_; | |
862 | // Whether we are currently in a --start-group/--end-group. | |
863 | bool in_group_; | |
864 | ||
865 | // The tokens to be returned by the lexer. | |
866 | const Lex::Token_sequence* tokens_; | |
867 | // The index of the next token to return. | |
868 | unsigned int next_token_index_; | |
869 | // New input files found to add to the link. | |
870 | Input_arguments* inputs_; | |
871 | }; | |
872 | ||
873 | // FILE was found as an argument on the command line. Try to read it | |
874 | // as a script. We've already read BYTES of data into P, but we | |
875 | // ignore that. Return true if the file was handled. | |
876 | ||
877 | bool | |
878 | read_input_script(Workqueue* workqueue, const General_options& options, | |
879 | Symbol_table* symtab, Layout* layout, | |
880 | const Dirsearch& dirsearch, Input_objects* input_objects, | |
881 | Input_group* input_group, | |
882 | const Input_argument* input_argument, | |
883 | Input_file* input_file, const unsigned char*, off_t, | |
884 | Task_token* this_blocker, Task_token* next_blocker) | |
885 | { | |
886 | Lex lex(input_file); | |
887 | if (lex.tokenize().is_invalid()) | |
888 | return false; | |
889 | ||
890 | Parser_closure closure(input_file->filename().c_str(), | |
891 | input_argument->file().options(), | |
892 | input_group != NULL, | |
893 | &lex.tokens()); | |
894 | ||
895 | if (yyparse(&closure) != 0) | |
896 | return false; | |
897 | ||
898 | // THIS_BLOCKER must be clear before we may add anything to the | |
899 | // symbol table. We are responsible for unblocking NEXT_BLOCKER | |
900 | // when we are done. We are responsible for deleting THIS_BLOCKER | |
901 | // when it is unblocked. | |
902 | ||
903 | if (!closure.saw_inputs()) | |
904 | { | |
905 | // The script did not add any files to read. Note that we are | |
906 | // not permitted to call NEXT_BLOCKER->unblock() here even if | |
907 | // THIS_BLOCKER is NULL, as we are not in the main thread. | |
908 | workqueue->queue(new Script_unblock(this_blocker, next_blocker)); | |
909 | return true; | |
910 | } | |
911 | ||
912 | for (Input_arguments::const_iterator p = closure.inputs()->begin(); | |
913 | p != closure.inputs()->end(); | |
914 | ++p) | |
915 | { | |
916 | Task_token* nb; | |
917 | if (p + 1 == closure.inputs()->end()) | |
918 | nb = next_blocker; | |
919 | else | |
920 | { | |
921 | nb = new Task_token(); | |
922 | nb->add_blocker(); | |
923 | } | |
924 | workqueue->queue(new Read_symbols(options, input_objects, symtab, | |
925 | layout, dirsearch, &*p, | |
926 | input_group, this_blocker, nb)); | |
927 | this_blocker = nb; | |
928 | } | |
929 | ||
930 | return true; | |
931 | } | |
932 | ||
933 | // Manage mapping from keywords to the codes expected by the bison | |
934 | // parser. | |
935 | ||
936 | class Keyword_to_parsecode | |
937 | { | |
938 | public: | |
939 | // The structure which maps keywords to parsecodes. | |
940 | struct Keyword_parsecode | |
941 | { | |
942 | // Keyword. | |
943 | const char* keyword; | |
944 | // Corresponding parsecode. | |
945 | int parsecode; | |
946 | }; | |
947 | ||
948 | // Return the parsecode corresponding KEYWORD, or 0 if it is not a | |
949 | // keyword. | |
950 | static int | |
951 | keyword_to_parsecode(const char* keyword); | |
952 | ||
953 | private: | |
954 | // The array of all keywords. | |
955 | static const Keyword_parsecode keyword_parsecodes_[]; | |
956 | ||
957 | // The number of keywords. | |
958 | static const int keyword_count; | |
959 | }; | |
960 | ||
961 | // Mapping from keyword string to keyword parsecode. This array must | |
962 | // be kept in sorted order. Parsecodes are looked up using bsearch. | |
963 | // This array must correspond to the list of parsecodes in yyscript.y. | |
964 | ||
965 | const Keyword_to_parsecode::Keyword_parsecode | |
966 | Keyword_to_parsecode::keyword_parsecodes_[] = | |
967 | { | |
968 | { "ABSOLUTE", ABSOLUTE }, | |
969 | { "ADDR", ADDR }, | |
970 | { "ALIGN", ALIGN_K }, | |
971 | { "ASSERT", ASSERT_K }, | |
972 | { "AS_NEEDED", AS_NEEDED }, | |
973 | { "AT", AT }, | |
974 | { "BIND", BIND }, | |
975 | { "BLOCK", BLOCK }, | |
976 | { "BYTE", BYTE }, | |
977 | { "CONSTANT", CONSTANT }, | |
978 | { "CONSTRUCTORS", CONSTRUCTORS }, | |
979 | { "COPY", COPY }, | |
980 | { "CREATE_OBJECT_SYMBOLS", CREATE_OBJECT_SYMBOLS }, | |
981 | { "DATA_SEGMENT_ALIGN", DATA_SEGMENT_ALIGN }, | |
982 | { "DATA_SEGMENT_END", DATA_SEGMENT_END }, | |
983 | { "DATA_SEGMENT_RELRO_END", DATA_SEGMENT_RELRO_END }, | |
984 | { "DEFINED", DEFINED }, | |
985 | { "DSECT", DSECT }, | |
986 | { "ENTRY", ENTRY }, | |
987 | { "EXCLUDE_FILE", EXCLUDE_FILE }, | |
988 | { "EXTERN", EXTERN }, | |
989 | { "FILL", FILL }, | |
990 | { "FLOAT", FLOAT }, | |
991 | { "FORCE_COMMON_ALLOCATION", FORCE_COMMON_ALLOCATION }, | |
992 | { "GROUP", GROUP }, | |
993 | { "HLL", HLL }, | |
994 | { "INCLUDE", INCLUDE }, | |
995 | { "INFO", INFO }, | |
996 | { "INHIBIT_COMMON_ALLOCATION", INHIBIT_COMMON_ALLOCATION }, | |
997 | { "INPUT", INPUT }, | |
998 | { "KEEP", KEEP }, | |
999 | { "LENGTH", LENGTH }, | |
1000 | { "LOADADDR", LOADADDR }, | |
1001 | { "LONG", LONG }, | |
1002 | { "MAP", MAP }, | |
1003 | { "MAX", MAX_K }, | |
1004 | { "MEMORY", MEMORY }, | |
1005 | { "MIN", MIN_K }, | |
1006 | { "NEXT", NEXT }, | |
1007 | { "NOCROSSREFS", NOCROSSREFS }, | |
1008 | { "NOFLOAT", NOFLOAT }, | |
1009 | { "NOLOAD", NOLOAD }, | |
1010 | { "ONLY_IF_RO", ONLY_IF_RO }, | |
1011 | { "ONLY_IF_RW", ONLY_IF_RW }, | |
1012 | { "ORIGIN", ORIGIN }, | |
1013 | { "OUTPUT", OUTPUT }, | |
1014 | { "OUTPUT_ARCH", OUTPUT_ARCH }, | |
1015 | { "OUTPUT_FORMAT", OUTPUT_FORMAT }, | |
1016 | { "OVERLAY", OVERLAY }, | |
1017 | { "PHDRS", PHDRS }, | |
1018 | { "PROVIDE", PROVIDE }, | |
1019 | { "PROVIDE_HIDDEN", PROVIDE_HIDDEN }, | |
1020 | { "QUAD", QUAD }, | |
1021 | { "SEARCH_DIR", SEARCH_DIR }, | |
1022 | { "SECTIONS", SECTIONS }, | |
1023 | { "SEGMENT_START", SEGMENT_START }, | |
1024 | { "SHORT", SHORT }, | |
1025 | { "SIZEOF", SIZEOF }, | |
1026 | { "SIZEOF_HEADERS", SIZEOF_HEADERS }, | |
1027 | { "SORT_BY_ALIGNMENT", SORT_BY_ALIGNMENT }, | |
1028 | { "SORT_BY_NAME", SORT_BY_NAME }, | |
1029 | { "SPECIAL", SPECIAL }, | |
1030 | { "SQUAD", SQUAD }, | |
1031 | { "STARTUP", STARTUP }, | |
1032 | { "SUBALIGN", SUBALIGN }, | |
1033 | { "SYSLIB", SYSLIB }, | |
1034 | { "TARGET", TARGET_K }, | |
1035 | { "TRUNCATE", TRUNCATE }, | |
1036 | { "VERSION", VERSIONK }, | |
1037 | { "global", GLOBAL }, | |
1038 | { "l", LENGTH }, | |
1039 | { "len", LENGTH }, | |
1040 | { "local", LOCAL }, | |
1041 | { "o", ORIGIN }, | |
1042 | { "org", ORIGIN }, | |
1043 | { "sizeof_headers", SIZEOF_HEADERS }, | |
1044 | }; | |
1045 | ||
1046 | const int Keyword_to_parsecode::keyword_count = | |
1047 | (sizeof(Keyword_to_parsecode::keyword_parsecodes_) | |
1048 | / sizeof(Keyword_to_parsecode::keyword_parsecodes_[0])); | |
1049 | ||
1050 | // Comparison function passed to bsearch. | |
1051 | ||
1052 | extern "C" | |
1053 | { | |
1054 | ||
1055 | static int | |
1056 | ktt_compare(const void* keyv, const void* kttv) | |
1057 | { | |
1058 | const char* key = static_cast<const char*>(keyv); | |
1059 | const Keyword_to_parsecode::Keyword_parsecode* ktt = | |
1060 | static_cast<const Keyword_to_parsecode::Keyword_parsecode*>(kttv); | |
1061 | return strcmp(key, ktt->keyword); | |
1062 | } | |
1063 | ||
1064 | } // End extern "C". | |
1065 | ||
1066 | int | |
1067 | Keyword_to_parsecode::keyword_to_parsecode(const char* keyword) | |
1068 | { | |
1069 | void* kttv = bsearch(keyword, | |
1070 | Keyword_to_parsecode::keyword_parsecodes_, | |
1071 | Keyword_to_parsecode::keyword_count, | |
1072 | sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]), | |
1073 | ktt_compare); | |
1074 | if (kttv == NULL) | |
1075 | return 0; | |
1076 | Keyword_parsecode* ktt = static_cast<Keyword_parsecode*>(kttv); | |
1077 | return ktt->parsecode; | |
1078 | } | |
1079 | ||
1080 | } // End namespace gold. | |
1081 | ||
1082 | // The remaining functions are extern "C", so it's clearer to not put | |
1083 | // them in namespace gold. | |
1084 | ||
1085 | using namespace gold; | |
1086 | ||
1087 | // This function is called by the bison parser to return the next | |
1088 | // token. | |
1089 | ||
1090 | extern "C" int | |
1091 | yylex(YYSTYPE* lvalp, void* closurev) | |
1092 | { | |
1093 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1094 | ||
1095 | if (closure->at_eof()) | |
1096 | return 0; | |
1097 | ||
1098 | const Token* token = closure->next_token(); | |
1099 | ||
1100 | switch (token->classification()) | |
1101 | { | |
1102 | default: | |
1103 | case Token::TOKEN_INVALID: | |
1104 | case Token::TOKEN_EOF: | |
a3ad94ed | 1105 | gold_unreachable(); |
dbe717ef ILT |
1106 | |
1107 | case Token::TOKEN_STRING: | |
1108 | { | |
1109 | const char* str = token->string_value().c_str(); | |
1110 | int parsecode = Keyword_to_parsecode::keyword_to_parsecode(str); | |
1111 | if (parsecode != 0) | |
1112 | return parsecode; | |
1113 | lvalp->string = str; | |
1114 | return STRING; | |
1115 | } | |
1116 | ||
1117 | case Token::TOKEN_OPERATOR: | |
1118 | return token->operator_value(); | |
1119 | ||
1120 | case Token::TOKEN_INTEGER: | |
1121 | lvalp->integer = token->integer_value(); | |
1122 | return INTEGER; | |
1123 | } | |
1124 | } | |
1125 | ||
1126 | // This function is called by the bison parser to report an error. | |
1127 | ||
1128 | extern "C" void | |
1129 | yyerror(void* closurev, const char* message) | |
1130 | { | |
1131 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1132 | ||
1133 | fprintf(stderr, _("%s: %s: %s\n"), | |
1134 | program_name, closure->filename(), message); | |
1135 | gold_exit(false); | |
1136 | } | |
1137 | ||
1138 | // Called by the bison parser to add a file to the link. | |
1139 | ||
1140 | extern "C" void | |
1141 | script_add_file(void* closurev, const char* name) | |
1142 | { | |
1143 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1144 | Input_file_argument file(name, false, closure->position_dependent_options()); | |
1145 | closure->inputs()->add_file(file); | |
1146 | } | |
1147 | ||
1148 | // Called by the bison parser to start a group. If we are already in | |
1149 | // a group, that means that this script was invoked within a | |
1150 | // --start-group --end-group sequence on the command line, or that | |
1151 | // this script was found in a GROUP of another script. In that case, | |
1152 | // we simply continue the existing group, rather than starting a new | |
1153 | // one. It is possible to construct a case in which this will do | |
1154 | // something other than what would happen if we did a recursive group, | |
1155 | // but it's hard to imagine why the different behaviour would be | |
1156 | // useful for a real program. Avoiding recursive groups is simpler | |
1157 | // and more efficient. | |
1158 | ||
1159 | extern "C" void | |
1160 | script_start_group(void* closurev) | |
1161 | { | |
1162 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1163 | if (!closure->in_group()) | |
1164 | closure->inputs()->start_group(); | |
1165 | } | |
1166 | ||
1167 | // Called by the bison parser at the end of a group. | |
1168 | ||
1169 | extern "C" void | |
1170 | script_end_group(void* closurev) | |
1171 | { | |
1172 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1173 | if (!closure->in_group()) | |
1174 | closure->inputs()->end_group(); | |
1175 | } | |
1176 | ||
1177 | // Called by the bison parser to start an AS_NEEDED list. | |
1178 | ||
1179 | extern "C" void | |
1180 | script_start_as_needed(void* closurev) | |
1181 | { | |
1182 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1183 | closure->position_dependent_options().set_as_needed(); | |
1184 | } | |
1185 | ||
1186 | // Called by the bison parser at the end of an AS_NEEDED list. | |
1187 | ||
1188 | extern "C" void | |
1189 | script_end_as_needed(void* closurev) | |
1190 | { | |
1191 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1192 | closure->position_dependent_options().clear_as_needed(); | |
1193 | } |