Make power8 the default cpu when assembling for 64-bit little endian targets.
[deliverable/binutils-gdb.git] / gas / app.c
1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987-2018 Free Software Foundation, Inc.
3
4 This file is part of GAS, the GNU Assembler.
5
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GAS is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19 02110-1301, USA. */
20
21 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
22 /* App, the assembler pre-processor. This pre-processor strips out
23 excess spaces, turns single-quoted characters into a decimal
24 constant, and turns the # in # <number> <filename> <garbage> into a
25 .linefile. This needs better error-handling. */
26
27 #include "as.h"
28
29 #if (__STDC__ != 1)
30 #ifndef const
31 #define const /* empty */
32 #endif
33 #endif
34
35 #ifdef H_TICK_HEX
36 int enable_h_tick_hex = 0;
37 #endif
38
39 #ifdef TC_M68K
40 /* Whether we are scrubbing in m68k MRI mode. This is different from
41 flag_m68k_mri, because the two flags will be affected by the .mri
42 pseudo-op at different times. */
43 static int scrub_m68k_mri;
44
45 /* The pseudo-op which switches in and out of MRI mode. See the
46 comment in do_scrub_chars. */
47 static const char mri_pseudo[] = ".mri 0";
48 #else
49 #define scrub_m68k_mri 0
50 #endif
51
52 #if defined TC_ARM && defined OBJ_ELF
53 /* The pseudo-op for which we need to special-case `@' characters.
54 See the comment in do_scrub_chars. */
55 static const char symver_pseudo[] = ".symver";
56 static const char * symver_state;
57 #endif
58 #ifdef TC_ARM
59 static char last_char;
60 #endif
61
62 static char lex[256];
63 static const char symbol_chars[] =
64 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
65
66 #define LEX_IS_SYMBOL_COMPONENT 1
67 #define LEX_IS_WHITESPACE 2
68 #define LEX_IS_LINE_SEPARATOR 3
69 #define LEX_IS_COMMENT_START 4
70 #define LEX_IS_LINE_COMMENT_START 5
71 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
72 #define LEX_IS_STRINGQUOTE 8
73 #define LEX_IS_COLON 9
74 #define LEX_IS_NEWLINE 10
75 #define LEX_IS_ONECHAR_QUOTE 11
76 #ifdef TC_V850
77 #define LEX_IS_DOUBLEDASH_1ST 12
78 #endif
79 #ifdef TC_M32R
80 #define DOUBLEBAR_PARALLEL
81 #endif
82 #ifdef DOUBLEBAR_PARALLEL
83 #define LEX_IS_DOUBLEBAR_1ST 13
84 #endif
85 #define LEX_IS_PARALLEL_SEPARATOR 14
86 #ifdef H_TICK_HEX
87 #define LEX_IS_H 15
88 #endif
89 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
90 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
91 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
92 #define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
93 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
94 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
95 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
96
97 static int process_escape (int);
98
99 /* FIXME-soon: The entire lexer/parser thingy should be
100 built statically at compile time rather than dynamically
101 each and every time the assembler is run. xoxorich. */
102
103 void
104 do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
105 {
106 const char *p;
107 int c;
108
109 lex[' '] = LEX_IS_WHITESPACE;
110 lex['\t'] = LEX_IS_WHITESPACE;
111 lex['\r'] = LEX_IS_WHITESPACE;
112 lex['\n'] = LEX_IS_NEWLINE;
113 lex[':'] = LEX_IS_COLON;
114
115 #ifdef TC_M68K
116 scrub_m68k_mri = m68k_mri;
117
118 if (! m68k_mri)
119 #endif
120 {
121 lex['"'] = LEX_IS_STRINGQUOTE;
122
123 #if ! defined (TC_HPPA) && ! defined (TC_I370)
124 /* I370 uses single-quotes to delimit integer, float constants. */
125 lex['\''] = LEX_IS_ONECHAR_QUOTE;
126 #endif
127
128 #ifdef SINGLE_QUOTE_STRINGS
129 lex['\''] = LEX_IS_STRINGQUOTE;
130 #endif
131 }
132
133 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
134 in state 5 of do_scrub_chars must be changed. */
135
136 /* Note that these override the previous defaults, e.g. if ';' is a
137 comment char, then it isn't a line separator. */
138 for (p = symbol_chars; *p; ++p)
139 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
140
141 for (c = 128; c < 256; ++c)
142 lex[c] = LEX_IS_SYMBOL_COMPONENT;
143
144 #ifdef tc_symbol_chars
145 /* This macro permits the processor to specify all characters which
146 may appears in an operand. This will prevent the scrubber from
147 discarding meaningful whitespace in certain cases. The i386
148 backend uses this to support prefixes, which can confuse the
149 scrubber as to whether it is parsing operands or opcodes. */
150 for (p = tc_symbol_chars; *p; ++p)
151 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
152 #endif
153
154 /* The m68k backend wants to be able to change comment_chars. */
155 #ifndef tc_comment_chars
156 #define tc_comment_chars comment_chars
157 #endif
158 for (p = tc_comment_chars; *p; p++)
159 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
160
161 for (p = line_comment_chars; *p; p++)
162 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
163
164 #ifndef tc_line_separator_chars
165 #define tc_line_separator_chars line_separator_chars
166 #endif
167 for (p = tc_line_separator_chars; *p; p++)
168 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
169
170 #ifdef tc_parallel_separator_chars
171 /* This macro permits the processor to specify all characters which
172 separate parallel insns on the same line. */
173 for (p = tc_parallel_separator_chars; *p; p++)
174 lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
175 #endif
176
177 /* Only allow slash-star comments if slash is not in use.
178 FIXME: This isn't right. We should always permit them. */
179 if (lex['/'] == 0)
180 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
181
182 #ifdef TC_M68K
183 if (m68k_mri)
184 {
185 lex['\''] = LEX_IS_STRINGQUOTE;
186 lex[';'] = LEX_IS_COMMENT_START;
187 lex['*'] = LEX_IS_LINE_COMMENT_START;
188 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
189 then it can't be used in an expression. */
190 lex['!'] = LEX_IS_LINE_COMMENT_START;
191 }
192 #endif
193
194 #ifdef TC_V850
195 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
196 #endif
197 #ifdef DOUBLEBAR_PARALLEL
198 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
199 #endif
200 #ifdef TC_D30V
201 /* Must do this is we want VLIW instruction with "->" or "<-". */
202 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
203 #endif
204
205 #ifdef H_TICK_HEX
206 if (enable_h_tick_hex)
207 {
208 lex['h'] = LEX_IS_H;
209 lex['H'] = LEX_IS_H;
210 }
211 #endif
212 }
213
214 /* Saved state of the scrubber. */
215 static int state;
216 static int old_state;
217 static const char *out_string;
218 static char out_buf[20];
219 static int add_newlines;
220 static char *saved_input;
221 static size_t saved_input_len;
222 static char input_buffer[32 * 1024];
223 static const char *mri_state;
224 static char mri_last_ch;
225
226 /* Data structure for saving the state of app across #include's. Note that
227 app is called asynchronously to the parsing of the .include's, so our
228 state at the time .include is interpreted is completely unrelated.
229 That's why we have to save it all. */
230
231 struct app_save
232 {
233 int state;
234 int old_state;
235 const char * out_string;
236 char out_buf[sizeof (out_buf)];
237 int add_newlines;
238 char * saved_input;
239 size_t saved_input_len;
240 #ifdef TC_M68K
241 int scrub_m68k_mri;
242 #endif
243 const char * mri_state;
244 char mri_last_ch;
245 #if defined TC_ARM && defined OBJ_ELF
246 const char * symver_state;
247 #endif
248 #ifdef TC_ARM
249 char last_char;
250 #endif
251 };
252
253 char *
254 app_push (void)
255 {
256 struct app_save *saved;
257
258 saved = XNEW (struct app_save);
259 saved->state = state;
260 saved->old_state = old_state;
261 saved->out_string = out_string;
262 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
263 saved->add_newlines = add_newlines;
264 if (saved_input == NULL)
265 saved->saved_input = NULL;
266 else
267 {
268 saved->saved_input = XNEWVEC (char, saved_input_len);
269 memcpy (saved->saved_input, saved_input, saved_input_len);
270 saved->saved_input_len = saved_input_len;
271 }
272 #ifdef TC_M68K
273 saved->scrub_m68k_mri = scrub_m68k_mri;
274 #endif
275 saved->mri_state = mri_state;
276 saved->mri_last_ch = mri_last_ch;
277 #if defined TC_ARM && defined OBJ_ELF
278 saved->symver_state = symver_state;
279 #endif
280 #ifdef TC_ARM
281 saved->last_char = last_char;
282 #endif
283
284 /* do_scrub_begin() is not useful, just wastes time. */
285
286 state = 0;
287 saved_input = NULL;
288 add_newlines = 0;
289
290 return (char *) saved;
291 }
292
293 void
294 app_pop (char *arg)
295 {
296 struct app_save *saved = (struct app_save *) arg;
297
298 /* There is no do_scrub_end (). */
299 state = saved->state;
300 old_state = saved->old_state;
301 out_string = saved->out_string;
302 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
303 add_newlines = saved->add_newlines;
304 if (saved->saved_input == NULL)
305 saved_input = NULL;
306 else
307 {
308 gas_assert (saved->saved_input_len <= sizeof (input_buffer));
309 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
310 saved_input = input_buffer;
311 saved_input_len = saved->saved_input_len;
312 free (saved->saved_input);
313 }
314 #ifdef TC_M68K
315 scrub_m68k_mri = saved->scrub_m68k_mri;
316 #endif
317 mri_state = saved->mri_state;
318 mri_last_ch = saved->mri_last_ch;
319 #if defined TC_ARM && defined OBJ_ELF
320 symver_state = saved->symver_state;
321 #endif
322 #ifdef TC_ARM
323 last_char = saved->last_char;
324 #endif
325
326 free (arg);
327 }
328
329 /* @@ This assumes that \n &c are the same on host and target. This is not
330 necessarily true. */
331
332 static int
333 process_escape (int ch)
334 {
335 switch (ch)
336 {
337 case 'b':
338 return '\b';
339 case 'f':
340 return '\f';
341 case 'n':
342 return '\n';
343 case 'r':
344 return '\r';
345 case 't':
346 return '\t';
347 case '\'':
348 return '\'';
349 case '"':
350 return '\"';
351 default:
352 return ch;
353 }
354 }
355
356 /* This function is called to process input characters. The GET
357 parameter is used to retrieve more input characters. GET should
358 set its parameter to point to a buffer, and return the length of
359 the buffer; it should return 0 at end of file. The scrubbed output
360 characters are put into the buffer starting at TOSTART; the TOSTART
361 buffer is TOLEN bytes in length. The function returns the number
362 of scrubbed characters put into TOSTART. This will be TOLEN unless
363 end of file was seen. This function is arranged as a state
364 machine, and saves its state so that it may return at any point.
365 This is the way the old code used to work. */
366
367 size_t
368 do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
369 {
370 char *to = tostart;
371 char *toend = tostart + tolen;
372 char *from;
373 char *fromend;
374 size_t fromlen;
375 int ch, ch2 = 0;
376 /* Character that started the string we're working on. */
377 static char quotechar;
378
379 /*State 0: beginning of normal line
380 1: After first whitespace on line (flush more white)
381 2: After first non-white (opcode) on line (keep 1white)
382 3: after second white on line (into operands) (flush white)
383 4: after putting out a .linefile, put out digits
384 5: parsing a string, then go to old-state
385 6: putting out \ escape in a "d string.
386 7: no longer used
387 8: no longer used
388 9: After seeing symbol char in state 3 (keep 1white after symchar)
389 10: After seeing whitespace in state 9 (keep white before symchar)
390 11: After seeing a symbol character in state 0 (eg a label definition)
391 -1: output string in out_string and go to the state in old_state
392 -2: flush text until a '*' '/' is seen, then go to state old_state
393 #ifdef TC_V850
394 12: After seeing a dash, looking for a second dash as a start
395 of comment.
396 #endif
397 #ifdef DOUBLEBAR_PARALLEL
398 13: After seeing a vertical bar, looking for a second
399 vertical bar as a parallel expression separator.
400 #endif
401 #ifdef TC_PREDICATE_START_CHAR
402 14: After seeing a predicate start character at state 0, looking
403 for a predicate end character as predicate.
404 15: After seeing a predicate start character at state 1, looking
405 for a predicate end character as predicate.
406 #endif
407 #ifdef TC_Z80
408 16: After seeing an 'a' or an 'A' at the start of a symbol
409 17: After seeing an 'f' or an 'F' in state 16
410 #endif
411 */
412
413 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
414 constructs like ``.loc 1 20''. This was turning into ``.loc
415 120''. States 9 and 10 ensure that a space is never dropped in
416 between characters which could appear in an identifier. Ian
417 Taylor, ian@cygnus.com.
418
419 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
420 correctly on the PA (and any other target where colons are optional).
421 Jeff Law, law@cs.utah.edu.
422
423 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
424 get squashed into "cmp r1,r2||trap#1", with the all important space
425 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
426
427 /* This macro gets the next input character. */
428
429 #define GET() \
430 (from < fromend \
431 ? * (unsigned char *) (from++) \
432 : (saved_input = NULL, \
433 fromlen = (*get) (input_buffer, sizeof input_buffer), \
434 from = input_buffer, \
435 fromend = from + fromlen, \
436 (fromlen == 0 \
437 ? EOF \
438 : * (unsigned char *) (from++))))
439
440 /* This macro pushes a character back on the input stream. */
441
442 #define UNGET(uch) (*--from = (uch))
443
444 /* This macro puts a character into the output buffer. If this
445 character fills the output buffer, this macro jumps to the label
446 TOFULL. We use this rather ugly approach because we need to
447 handle two different termination conditions: EOF on the input
448 stream, and a full output buffer. It would be simpler if we
449 always read in the entire input stream before processing it, but
450 I don't want to make such a significant change to the assembler's
451 memory usage. */
452
453 #define PUT(pch) \
454 do \
455 { \
456 *to++ = (pch); \
457 if (to >= toend) \
458 goto tofull; \
459 } \
460 while (0)
461
462 if (saved_input != NULL)
463 {
464 from = saved_input;
465 fromend = from + saved_input_len;
466 }
467 else
468 {
469 fromlen = (*get) (input_buffer, sizeof input_buffer);
470 if (fromlen == 0)
471 return 0;
472 from = input_buffer;
473 fromend = from + fromlen;
474 }
475
476 while (1)
477 {
478 /* The cases in this switch end with continue, in order to
479 branch back to the top of this while loop and generate the
480 next output character in the appropriate state. */
481 switch (state)
482 {
483 case -1:
484 ch = *out_string++;
485 if (*out_string == '\0')
486 {
487 state = old_state;
488 old_state = 3;
489 }
490 PUT (ch);
491 continue;
492
493 case -2:
494 for (;;)
495 {
496 do
497 {
498 ch = GET ();
499
500 if (ch == EOF)
501 {
502 as_warn (_("end of file in comment"));
503 goto fromeof;
504 }
505
506 if (ch == '\n')
507 PUT ('\n');
508 }
509 while (ch != '*');
510
511 while ((ch = GET ()) == '*')
512 ;
513
514 if (ch == EOF)
515 {
516 as_warn (_("end of file in comment"));
517 goto fromeof;
518 }
519
520 if (ch == '/')
521 break;
522
523 UNGET (ch);
524 }
525
526 state = old_state;
527 UNGET (' ');
528 continue;
529
530 case 4:
531 ch = GET ();
532 if (ch == EOF)
533 goto fromeof;
534 else if (ch >= '0' && ch <= '9')
535 PUT (ch);
536 else
537 {
538 while (ch != EOF && IS_WHITESPACE (ch))
539 ch = GET ();
540 if (ch == '"')
541 {
542 quotechar = ch;
543 state = 5;
544 old_state = 3;
545 PUT (ch);
546 }
547 else
548 {
549 while (ch != EOF && ch != '\n')
550 ch = GET ();
551 state = 0;
552 PUT (ch);
553 }
554 }
555 continue;
556
557 case 5:
558 /* We are going to copy everything up to a quote character,
559 with special handling for a backslash. We try to
560 optimize the copying in the simple case without using the
561 GET and PUT macros. */
562 {
563 char *s;
564 ptrdiff_t len;
565
566 for (s = from; s < fromend; s++)
567 {
568 ch = *s;
569 if (ch == '\\'
570 || ch == quotechar
571 || ch == '\n')
572 break;
573 }
574 len = s - from;
575 if (len > toend - to)
576 len = toend - to;
577 if (len > 0)
578 {
579 memcpy (to, from, len);
580 to += len;
581 from += len;
582 if (to >= toend)
583 goto tofull;
584 }
585 }
586
587 ch = GET ();
588 if (ch == EOF)
589 {
590 /* This buffer is here specifically so
591 that the UNGET below will work. */
592 static char one_char_buf[1];
593
594 as_warn (_("end of file in string; '%c' inserted"), quotechar);
595 state = old_state;
596 from = fromend = one_char_buf + 1;
597 fromlen = 1;
598 UNGET ('\n');
599 PUT (quotechar);
600 }
601 else if (ch == quotechar)
602 {
603 state = old_state;
604 PUT (ch);
605 }
606 #ifndef NO_STRING_ESCAPES
607 else if (ch == '\\')
608 {
609 state = 6;
610 PUT (ch);
611 }
612 #endif
613 else if (scrub_m68k_mri && ch == '\n')
614 {
615 /* Just quietly terminate the string. This permits lines like
616 bne label loop if we haven't reach end yet. */
617 state = old_state;
618 UNGET (ch);
619 PUT ('\'');
620 }
621 else
622 {
623 PUT (ch);
624 }
625 continue;
626
627 case 6:
628 state = 5;
629 ch = GET ();
630 switch (ch)
631 {
632 /* Handle strings broken across lines, by turning '\n' into
633 '\\' and 'n'. */
634 case '\n':
635 UNGET ('n');
636 add_newlines++;
637 PUT ('\\');
638 continue;
639
640 case EOF:
641 as_warn (_("end of file in string; '%c' inserted"), quotechar);
642 PUT (quotechar);
643 continue;
644
645 case '"':
646 case '\\':
647 case 'b':
648 case 'f':
649 case 'n':
650 case 'r':
651 case 't':
652 case 'v':
653 case 'x':
654 case 'X':
655 case '0':
656 case '1':
657 case '2':
658 case '3':
659 case '4':
660 case '5':
661 case '6':
662 case '7':
663 break;
664
665 default:
666 #ifdef ONLY_STANDARD_ESCAPES
667 as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
668 #endif
669 break;
670 }
671 PUT (ch);
672 continue;
673
674 #ifdef DOUBLEBAR_PARALLEL
675 case 13:
676 ch = GET ();
677 if (ch != '|')
678 abort ();
679
680 /* Reset back to state 1 and pretend that we are parsing a
681 line from just after the first white space. */
682 state = 1;
683 PUT ('|');
684 #ifdef TC_TIC6X
685 /* "||^" is used for SPMASKed instructions. */
686 ch = GET ();
687 if (ch == EOF)
688 goto fromeof;
689 else if (ch == '^')
690 PUT ('^');
691 else
692 UNGET (ch);
693 #endif
694 continue;
695 #endif
696 #ifdef TC_Z80
697 case 16:
698 /* We have seen an 'a' at the start of a symbol, look for an 'f'. */
699 ch = GET ();
700 if (ch == 'f' || ch == 'F')
701 {
702 state = 17;
703 PUT (ch);
704 }
705 else
706 {
707 state = 9;
708 break;
709 }
710 /* Fall through. */
711 case 17:
712 /* We have seen "af" at the start of a symbol,
713 a ' here is a part of that symbol. */
714 ch = GET ();
715 state = 9;
716 if (ch == '\'')
717 /* Change to avoid warning about unclosed string. */
718 PUT ('`');
719 else if (ch != EOF)
720 UNGET (ch);
721 break;
722 #endif
723 }
724
725 /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
726
727 /* flushchar: */
728 ch = GET ();
729
730 #ifdef TC_PREDICATE_START_CHAR
731 if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
732 {
733 state += 14;
734 PUT (ch);
735 continue;
736 }
737 else if (state == 14 || state == 15)
738 {
739 if (ch == TC_PREDICATE_END_CHAR)
740 {
741 state -= 14;
742 PUT (ch);
743 ch = GET ();
744 }
745 else
746 {
747 PUT (ch);
748 continue;
749 }
750 }
751 #endif
752
753 recycle:
754
755 #if defined TC_ARM && defined OBJ_ELF
756 /* We need to watch out for .symver directives. See the comment later
757 in this function. */
758 if (symver_state == NULL)
759 {
760 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
761 symver_state = symver_pseudo + 1;
762 }
763 else
764 {
765 /* We advance to the next state if we find the right
766 character. */
767 if (ch != '\0' && (*symver_state == ch))
768 ++symver_state;
769 else if (*symver_state != '\0')
770 /* We did not get the expected character, or we didn't
771 get a valid terminating character after seeing the
772 entire pseudo-op, so we must go back to the beginning. */
773 symver_state = NULL;
774 else
775 {
776 /* We've read the entire pseudo-op. If this is the end
777 of the line, go back to the beginning. */
778 if (IS_NEWLINE (ch))
779 symver_state = NULL;
780 }
781 }
782 #endif /* TC_ARM && OBJ_ELF */
783
784 #ifdef TC_M68K
785 /* We want to have pseudo-ops which control whether we are in
786 MRI mode or not. Unfortunately, since m68k MRI mode affects
787 the scrubber, that means that we need a special purpose
788 recognizer here. */
789 if (mri_state == NULL)
790 {
791 if ((state == 0 || state == 1)
792 && ch == mri_pseudo[0])
793 mri_state = mri_pseudo + 1;
794 }
795 else
796 {
797 /* We advance to the next state if we find the right
798 character, or if we need a space character and we get any
799 whitespace character, or if we need a '0' and we get a
800 '1' (this is so that we only need one state to handle
801 ``.mri 0'' and ``.mri 1''). */
802 if (ch != '\0'
803 && (*mri_state == ch
804 || (*mri_state == ' '
805 && lex[ch] == LEX_IS_WHITESPACE)
806 || (*mri_state == '0'
807 && ch == '1')))
808 {
809 mri_last_ch = ch;
810 ++mri_state;
811 }
812 else if (*mri_state != '\0'
813 || (lex[ch] != LEX_IS_WHITESPACE
814 && lex[ch] != LEX_IS_NEWLINE))
815 {
816 /* We did not get the expected character, or we didn't
817 get a valid terminating character after seeing the
818 entire pseudo-op, so we must go back to the
819 beginning. */
820 mri_state = NULL;
821 }
822 else
823 {
824 /* We've read the entire pseudo-op. mips_last_ch is
825 either '0' or '1' indicating whether to enter or
826 leave MRI mode. */
827 do_scrub_begin (mri_last_ch == '1');
828 mri_state = NULL;
829
830 /* We continue handling the character as usual. The
831 main gas reader must also handle the .mri pseudo-op
832 to control expression parsing and the like. */
833 }
834 }
835 #endif
836
837 if (ch == EOF)
838 {
839 if (state != 0)
840 {
841 as_warn (_("end of file not at end of a line; newline inserted"));
842 state = 0;
843 PUT ('\n');
844 }
845 goto fromeof;
846 }
847
848 switch (lex[ch])
849 {
850 case LEX_IS_WHITESPACE:
851 do
852 {
853 ch = GET ();
854 }
855 while (ch != EOF && IS_WHITESPACE (ch));
856 if (ch == EOF)
857 goto fromeof;
858
859 if (state == 0)
860 {
861 /* Preserve a single whitespace character at the
862 beginning of a line. */
863 state = 1;
864 UNGET (ch);
865 PUT (' ');
866 break;
867 }
868
869 #ifdef KEEP_WHITE_AROUND_COLON
870 if (lex[ch] == LEX_IS_COLON)
871 {
872 /* Only keep this white if there's no white *after* the
873 colon. */
874 ch2 = GET ();
875 if (ch2 != EOF)
876 UNGET (ch2);
877 if (!IS_WHITESPACE (ch2))
878 {
879 state = 9;
880 UNGET (ch);
881 PUT (' ');
882 break;
883 }
884 }
885 #endif
886 if (IS_COMMENT (ch)
887 || ch == '/'
888 || IS_LINE_SEPARATOR (ch)
889 || IS_PARALLEL_SEPARATOR (ch))
890 {
891 if (scrub_m68k_mri)
892 {
893 /* In MRI mode, we keep these spaces. */
894 UNGET (ch);
895 PUT (' ');
896 break;
897 }
898 goto recycle;
899 }
900
901 /* If we're in state 2 or 11, we've seen a non-white
902 character followed by whitespace. If the next character
903 is ':', this is whitespace after a label name which we
904 normally must ignore. In MRI mode, though, spaces are
905 not permitted between the label and the colon. */
906 if ((state == 2 || state == 11)
907 && lex[ch] == LEX_IS_COLON
908 && ! scrub_m68k_mri)
909 {
910 state = 1;
911 PUT (ch);
912 break;
913 }
914
915 switch (state)
916 {
917 case 1:
918 /* We can arrive here if we leave a leading whitespace
919 character at the beginning of a line. */
920 goto recycle;
921 case 2:
922 state = 3;
923 if (to + 1 < toend)
924 {
925 /* Optimize common case by skipping UNGET/GET. */
926 PUT (' '); /* Sp after opco */
927 goto recycle;
928 }
929 UNGET (ch);
930 PUT (' ');
931 break;
932 case 3:
933 #ifndef TC_KEEP_OPERAND_SPACES
934 /* For TI C6X, we keep these spaces as they may separate
935 functional unit specifiers from operands. */
936 if (scrub_m68k_mri)
937 #endif
938 {
939 /* In MRI mode, we keep these spaces. */
940 UNGET (ch);
941 PUT (' ');
942 break;
943 }
944 goto recycle; /* Sp in operands */
945 case 9:
946 case 10:
947 #ifndef TC_KEEP_OPERAND_SPACES
948 if (scrub_m68k_mri)
949 #endif
950 {
951 /* In MRI mode, we keep these spaces. */
952 state = 3;
953 UNGET (ch);
954 PUT (' ');
955 break;
956 }
957 state = 10; /* Sp after symbol char */
958 goto recycle;
959 case 11:
960 if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
961 state = 1;
962 else
963 {
964 /* We know that ch is not ':', since we tested that
965 case above. Therefore this is not a label, so it
966 must be the opcode, and we've just seen the
967 whitespace after it. */
968 state = 3;
969 }
970 UNGET (ch);
971 PUT (' '); /* Sp after label definition. */
972 break;
973 default:
974 BAD_CASE (state);
975 }
976 break;
977
978 case LEX_IS_TWOCHAR_COMMENT_1ST:
979 ch2 = GET ();
980 if (ch2 == '*')
981 {
982 for (;;)
983 {
984 do
985 {
986 ch2 = GET ();
987 if (ch2 != EOF && IS_NEWLINE (ch2))
988 add_newlines++;
989 }
990 while (ch2 != EOF && ch2 != '*');
991
992 while (ch2 == '*')
993 ch2 = GET ();
994
995 if (ch2 == EOF || ch2 == '/')
996 break;
997
998 /* This UNGET will ensure that we count newlines
999 correctly. */
1000 UNGET (ch2);
1001 }
1002
1003 if (ch2 == EOF)
1004 as_warn (_("end of file in multiline comment"));
1005
1006 ch = ' ';
1007 goto recycle;
1008 }
1009 #ifdef DOUBLESLASH_LINE_COMMENTS
1010 else if (ch2 == '/')
1011 {
1012 do
1013 {
1014 ch = GET ();
1015 }
1016 while (ch != EOF && !IS_NEWLINE (ch));
1017 if (ch == EOF)
1018 as_warn ("end of file in comment; newline inserted");
1019 state = 0;
1020 PUT ('\n');
1021 break;
1022 }
1023 #endif
1024 else
1025 {
1026 if (ch2 != EOF)
1027 UNGET (ch2);
1028 if (state == 9 || state == 10)
1029 state = 3;
1030 PUT (ch);
1031 }
1032 break;
1033
1034 case LEX_IS_STRINGQUOTE:
1035 quotechar = ch;
1036 if (state == 10)
1037 {
1038 /* Preserve the whitespace in foo "bar". */
1039 UNGET (ch);
1040 state = 3;
1041 PUT (' ');
1042
1043 /* PUT didn't jump out. We could just break, but we
1044 know what will happen, so optimize a bit. */
1045 ch = GET ();
1046 old_state = 3;
1047 }
1048 else if (state == 9)
1049 old_state = 3;
1050 else
1051 old_state = state;
1052 state = 5;
1053 PUT (ch);
1054 break;
1055
1056 #ifndef IEEE_STYLE
1057 case LEX_IS_ONECHAR_QUOTE:
1058 #ifdef H_TICK_HEX
1059 if (state == 9 && enable_h_tick_hex)
1060 {
1061 char c;
1062
1063 c = GET ();
1064 as_warn ("'%c found after symbol", c);
1065 UNGET (c);
1066 }
1067 #endif
1068 if (state == 10)
1069 {
1070 /* Preserve the whitespace in foo 'b'. */
1071 UNGET (ch);
1072 state = 3;
1073 PUT (' ');
1074 break;
1075 }
1076 ch = GET ();
1077 if (ch == EOF)
1078 {
1079 as_warn (_("end of file after a one-character quote; \\0 inserted"));
1080 ch = 0;
1081 }
1082 if (ch == '\\')
1083 {
1084 ch = GET ();
1085 if (ch == EOF)
1086 {
1087 as_warn (_("end of file in escape character"));
1088 ch = '\\';
1089 }
1090 else
1091 ch = process_escape (ch);
1092 }
1093 sprintf (out_buf, "%d", (int) (unsigned char) ch);
1094
1095 /* None of these 'x constants for us. We want 'x'. */
1096 if ((ch = GET ()) != '\'')
1097 {
1098 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1099 as_warn (_("missing close quote; (assumed)"));
1100 #else
1101 if (ch != EOF)
1102 UNGET (ch);
1103 #endif
1104 }
1105 if (strlen (out_buf) == 1)
1106 {
1107 PUT (out_buf[0]);
1108 break;
1109 }
1110 if (state == 9)
1111 old_state = 3;
1112 else
1113 old_state = state;
1114 state = -1;
1115 out_string = out_buf;
1116 PUT (*out_string++);
1117 break;
1118 #endif
1119
1120 case LEX_IS_COLON:
1121 #ifdef KEEP_WHITE_AROUND_COLON
1122 state = 9;
1123 #else
1124 if (state == 9 || state == 10)
1125 state = 3;
1126 else if (state != 3)
1127 state = 1;
1128 #endif
1129 PUT (ch);
1130 break;
1131
1132 case LEX_IS_NEWLINE:
1133 /* Roll out a bunch of newlines from inside comments, etc. */
1134 if (add_newlines)
1135 {
1136 --add_newlines;
1137 UNGET (ch);
1138 }
1139 /* Fall through. */
1140
1141 case LEX_IS_LINE_SEPARATOR:
1142 state = 0;
1143 PUT (ch);
1144 break;
1145
1146 case LEX_IS_PARALLEL_SEPARATOR:
1147 state = 1;
1148 PUT (ch);
1149 break;
1150
1151 #ifdef TC_V850
1152 case LEX_IS_DOUBLEDASH_1ST:
1153 ch2 = GET ();
1154 if (ch2 != '-')
1155 {
1156 if (ch2 != EOF)
1157 UNGET (ch2);
1158 goto de_fault;
1159 }
1160 /* Read and skip to end of line. */
1161 do
1162 {
1163 ch = GET ();
1164 }
1165 while (ch != EOF && ch != '\n');
1166
1167 if (ch == EOF)
1168 as_warn (_("end of file in comment; newline inserted"));
1169
1170 state = 0;
1171 PUT ('\n');
1172 break;
1173 #endif
1174 #ifdef DOUBLEBAR_PARALLEL
1175 case LEX_IS_DOUBLEBAR_1ST:
1176 ch2 = GET ();
1177 if (ch2 != EOF)
1178 UNGET (ch2);
1179 if (ch2 != '|')
1180 goto de_fault;
1181
1182 /* Handle '||' in two states as invoking PUT twice might
1183 result in the first one jumping out of this loop. We'd
1184 then lose track of the state and one '|' char. */
1185 state = 13;
1186 PUT ('|');
1187 break;
1188 #endif
1189 case LEX_IS_LINE_COMMENT_START:
1190 /* FIXME-someday: The two character comment stuff was badly
1191 thought out. On i386, we want '/' as line comment start
1192 AND we want C style comments. hence this hack. The
1193 whole lexical process should be reworked. xoxorich. */
1194 if (ch == '/')
1195 {
1196 ch2 = GET ();
1197 if (ch2 == '*')
1198 {
1199 old_state = 3;
1200 state = -2;
1201 break;
1202 }
1203 else if (ch2 != EOF)
1204 {
1205 UNGET (ch2);
1206 }
1207 }
1208
1209 if (state == 0 || state == 1) /* Only comment at start of line. */
1210 {
1211 int startch;
1212
1213 startch = ch;
1214
1215 do
1216 {
1217 ch = GET ();
1218 }
1219 while (ch != EOF && IS_WHITESPACE (ch));
1220
1221 if (ch == EOF)
1222 {
1223 as_warn (_("end of file in comment; newline inserted"));
1224 PUT ('\n');
1225 break;
1226 }
1227
1228 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1229 {
1230 /* Not a cpp line. */
1231 while (ch != EOF && !IS_NEWLINE (ch))
1232 ch = GET ();
1233 if (ch == EOF)
1234 {
1235 as_warn (_("end of file in comment; newline inserted"));
1236 PUT ('\n');
1237 }
1238 else /* IS_NEWLINE (ch) */
1239 {
1240 /* To process non-zero add_newlines. */
1241 UNGET (ch);
1242 }
1243 state = 0;
1244 break;
1245 }
1246 /* Looks like `# 123 "filename"' from cpp. */
1247 UNGET (ch);
1248 old_state = 4;
1249 state = -1;
1250 if (scrub_m68k_mri)
1251 out_string = "\tlinefile ";
1252 else
1253 out_string = "\t.linefile ";
1254 PUT (*out_string++);
1255 break;
1256 }
1257
1258 #ifdef TC_D10V
1259 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1260 Trap is the only short insn that has a first operand that is
1261 neither register nor label.
1262 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1263 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1264 already LEX_IS_LINE_COMMENT_START. However, it is the
1265 only character in line_comment_chars for d10v, hence we
1266 can recognize it as such. */
1267 /* An alternative approach would be to reset the state to 1 when
1268 we see '||', '<'- or '->', but that seems to be overkill. */
1269 if (state == 10)
1270 PUT (' ');
1271 #endif
1272 /* We have a line comment character which is not at the
1273 start of a line. If this is also a normal comment
1274 character, fall through. Otherwise treat it as a default
1275 character. */
1276 if (strchr (tc_comment_chars, ch) == NULL
1277 && (! scrub_m68k_mri
1278 || (ch != '!' && ch != '*')))
1279 goto de_fault;
1280 if (scrub_m68k_mri
1281 && (ch == '!' || ch == '*' || ch == '#')
1282 && state != 1
1283 && state != 10)
1284 goto de_fault;
1285 /* Fall through. */
1286 case LEX_IS_COMMENT_START:
1287 #if defined TC_ARM && defined OBJ_ELF
1288 /* On the ARM, `@' is the comment character.
1289 Unfortunately this is also a special character in ELF .symver
1290 directives (and .type, though we deal with those another way).
1291 So we check if this line is such a directive, and treat
1292 the character as default if so. This is a hack. */
1293 if ((symver_state != NULL) && (*symver_state == 0))
1294 goto de_fault;
1295 #endif
1296
1297 #ifdef TC_ARM
1298 /* For the ARM, care is needed not to damage occurrences of \@
1299 by stripping the @ onwards. Yuck. */
1300 if ((to > tostart ? to[-1] : last_char) == '\\')
1301 /* Do not treat the @ as a start-of-comment. */
1302 goto de_fault;
1303 #endif
1304
1305 #ifdef WARN_COMMENTS
1306 if (!found_comment)
1307 found_comment_file = as_where (&found_comment);
1308 #endif
1309 do
1310 {
1311 ch = GET ();
1312 }
1313 while (ch != EOF && !IS_NEWLINE (ch));
1314 if (ch == EOF)
1315 as_warn (_("end of file in comment; newline inserted"));
1316 state = 0;
1317 PUT ('\n');
1318 break;
1319
1320 #ifdef H_TICK_HEX
1321 case LEX_IS_H:
1322 /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1323 the H' with 0x to make them gas-style hex characters. */
1324 if (enable_h_tick_hex)
1325 {
1326 char quot;
1327
1328 quot = GET ();
1329 if (quot == '\'')
1330 {
1331 UNGET ('x');
1332 ch = '0';
1333 }
1334 else
1335 UNGET (quot);
1336 }
1337 #endif
1338 /* Fall through. */
1339
1340 case LEX_IS_SYMBOL_COMPONENT:
1341 if (state == 10)
1342 {
1343 /* This is a symbol character following another symbol
1344 character, with whitespace in between. We skipped
1345 the whitespace earlier, so output it now. */
1346 UNGET (ch);
1347 state = 3;
1348 PUT (' ');
1349 break;
1350 }
1351
1352 #ifdef TC_Z80
1353 /* "af'" is a symbol containing '\''. */
1354 if (state == 3 && (ch == 'a' || ch == 'A'))
1355 {
1356 state = 16;
1357 PUT (ch);
1358 ch = GET ();
1359 if (ch == 'f' || ch == 'F')
1360 {
1361 state = 17;
1362 PUT (ch);
1363 break;
1364 }
1365 else
1366 {
1367 state = 9;
1368 if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1369 {
1370 if (ch != EOF)
1371 UNGET (ch);
1372 break;
1373 }
1374 }
1375 }
1376 #endif
1377 if (state == 3)
1378 state = 9;
1379
1380 /* This is a common case. Quickly copy CH and all the
1381 following symbol component or normal characters. */
1382 if (to + 1 < toend
1383 && mri_state == NULL
1384 #if defined TC_ARM && defined OBJ_ELF
1385 && symver_state == NULL
1386 #endif
1387 )
1388 {
1389 char *s;
1390 ptrdiff_t len;
1391
1392 for (s = from; s < fromend; s++)
1393 {
1394 int type;
1395
1396 ch2 = *(unsigned char *) s;
1397 type = lex[ch2];
1398 if (type != 0
1399 && type != LEX_IS_SYMBOL_COMPONENT)
1400 break;
1401 }
1402
1403 if (s > from)
1404 /* Handle the last character normally, for
1405 simplicity. */
1406 --s;
1407
1408 len = s - from;
1409
1410 if (len > (toend - to) - 1)
1411 len = (toend - to) - 1;
1412
1413 if (len > 0)
1414 {
1415 PUT (ch);
1416 memcpy (to, from, len);
1417 to += len;
1418 from += len;
1419 if (to >= toend)
1420 goto tofull;
1421 ch = GET ();
1422 }
1423 }
1424
1425 /* Fall through. */
1426 default:
1427 de_fault:
1428 /* Some relatively `normal' character. */
1429 if (state == 0)
1430 {
1431 state = 11; /* Now seeing label definition. */
1432 }
1433 else if (state == 1)
1434 {
1435 state = 2; /* Ditto. */
1436 }
1437 else if (state == 9)
1438 {
1439 if (!IS_SYMBOL_COMPONENT (ch))
1440 state = 3;
1441 }
1442 else if (state == 10)
1443 {
1444 if (ch == '\\')
1445 {
1446 /* Special handling for backslash: a backslash may
1447 be the beginning of a formal parameter (of a
1448 macro) following another symbol character, with
1449 whitespace in between. If that is the case, we
1450 output a space before the parameter. Strictly
1451 speaking, correct handling depends upon what the
1452 macro parameter expands into; if the parameter
1453 expands into something which does not start with
1454 an operand character, then we don't want to keep
1455 the space. We don't have enough information to
1456 make the right choice, so here we are making the
1457 choice which is more likely to be correct. */
1458 if (to + 1 >= toend)
1459 {
1460 /* If we're near the end of the buffer, save the
1461 character for the next time round. Otherwise
1462 we'll lose our state. */
1463 UNGET (ch);
1464 goto tofull;
1465 }
1466 *to++ = ' ';
1467 }
1468
1469 state = 3;
1470 }
1471 PUT (ch);
1472 break;
1473 }
1474 }
1475
1476 /*NOTREACHED*/
1477
1478 fromeof:
1479 /* We have reached the end of the input. */
1480 #ifdef TC_ARM
1481 if (to > tostart)
1482 last_char = to[-1];
1483 #endif
1484 return to - tostart;
1485
1486 tofull:
1487 /* The output buffer is full. Save any input we have not yet
1488 processed. */
1489 if (fromend > from)
1490 {
1491 saved_input = from;
1492 saved_input_len = fromend - from;
1493 }
1494 else
1495 saved_input = NULL;
1496
1497 #ifdef TC_ARM
1498 if (to > tostart)
1499 last_char = to[-1];
1500 #endif
1501 return to - tostart;
1502 }
This page took 0.061621 seconds and 4 git commands to generate.