4699a28936e83561392ae70583e45d6cce3cb2ce
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.15.0"
34 __all__ = [
35 "__author__",
36 "__version__",
37 "ByteOrder",
38 "LabelsT",
39 "parse",
40 "ParseError",
41 "ParseErrorMessage",
42 "ParseResult",
43 "TextLocation",
44 "VariablesT",
45 ]
46
47 import re
48 import abc
49 import ast
50 import sys
51 import copy
52 import enum
53 import math
54 import struct
55 import typing
56 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
57
58
59 # Text location (line and column numbers).
60 class TextLocation:
61 @classmethod
62 def _create(cls, line_no: int, col_no: int):
63 self = cls.__new__(cls)
64 self._init(line_no, col_no)
65 return self
66
67 def __init__(*args, **kwargs): # type: ignore
68 raise NotImplementedError
69
70 def _init(self, line_no: int, col_no: int):
71 self._line_no = line_no
72 self._col_no = col_no
73
74 # Line number.
75 @property
76 def line_no(self):
77 return self._line_no
78
79 # Column number.
80 @property
81 def col_no(self):
82 return self._col_no
83
84 def __repr__(self):
85 return "TextLocation({}, {})".format(self._line_no, self._col_no)
86
87
88 # Any item.
89 class _Item:
90 def __init__(self, text_loc: TextLocation):
91 self._text_loc = text_loc
92
93 # Source text location.
94 @property
95 def text_loc(self):
96 return self._text_loc
97
98
99 # Scalar item.
100 class _ScalarItem(_Item):
101 # Returns the size, in bytes, of this item.
102 @property
103 @abc.abstractmethod
104 def size(self) -> int:
105 ...
106
107
108 # A repeatable item.
109 class _RepableItem:
110 pass
111
112
113 # Single byte.
114 class _Byte(_ScalarItem, _RepableItem):
115 def __init__(self, val: int, text_loc: TextLocation):
116 super().__init__(text_loc)
117 self._val = val
118
119 # Byte value.
120 @property
121 def val(self):
122 return self._val
123
124 @property
125 def size(self):
126 return 1
127
128 def __repr__(self):
129 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
130
131
132 # String.
133 class _Str(_ScalarItem, _RepableItem):
134 def __init__(self, data: bytes, text_loc: TextLocation):
135 super().__init__(text_loc)
136 self._data = data
137
138 # Encoded bytes.
139 @property
140 def data(self):
141 return self._data
142
143 @property
144 def size(self):
145 return len(self._data)
146
147 def __repr__(self):
148 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
149
150
151 # Byte order.
152 @enum.unique
153 class ByteOrder(enum.Enum):
154 # Big endian.
155 BE = "be"
156
157 # Little endian.
158 LE = "le"
159
160
161 # Byte order setting.
162 class _SetBo(_Item):
163 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
164 super().__init__(text_loc)
165 self._bo = bo
166
167 @property
168 def bo(self):
169 return self._bo
170
171 def __repr__(self):
172 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
173
174
175 # Label.
176 class _Label(_Item):
177 def __init__(self, name: str, text_loc: TextLocation):
178 super().__init__(text_loc)
179 self._name = name
180
181 # Label name.
182 @property
183 def name(self):
184 return self._name
185
186 def __repr__(self):
187 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
188
189
190 # Offset setting.
191 class _SetOffset(_Item):
192 def __init__(self, val: int, text_loc: TextLocation):
193 super().__init__(text_loc)
194 self._val = val
195
196 # Offset value (bytes).
197 @property
198 def val(self):
199 return self._val
200
201 def __repr__(self):
202 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
203
204
205 # Offset alignment.
206 class _AlignOffset(_Item):
207 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
208 super().__init__(text_loc)
209 self._val = val
210 self._pad_val = pad_val
211
212 # Alignment value (bits).
213 @property
214 def val(self):
215 return self._val
216
217 # Padding byte value.
218 @property
219 def pad_val(self):
220 return self._pad_val
221
222 def __repr__(self):
223 return "_AlignOffset({}, {}, {})".format(
224 repr(self._val), repr(self._pad_val), repr(self._text_loc)
225 )
226
227
228 # Mixin of containing an AST expression and its string.
229 class _ExprMixin:
230 def __init__(self, expr_str: str, expr: ast.Expression):
231 self._expr_str = expr_str
232 self._expr = expr
233
234 # Expression string.
235 @property
236 def expr_str(self):
237 return self._expr_str
238
239 # Expression node to evaluate.
240 @property
241 def expr(self):
242 return self._expr
243
244
245 # Fill until some offset.
246 class _FillUntil(_Item, _ExprMixin):
247 def __init__(
248 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
249 ):
250 super().__init__(text_loc)
251 _ExprMixin.__init__(self, expr_str, expr)
252 self._pad_val = pad_val
253
254 # Padding byte value.
255 @property
256 def pad_val(self):
257 return self._pad_val
258
259 def __repr__(self):
260 return "_FillUntil({}, {}, {}, {})".format(
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._pad_val),
264 repr(self._text_loc),
265 )
266
267
268 # Variable assignment.
269 class _VarAssign(_Item, _ExprMixin):
270 def __init__(
271 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
272 ):
273 super().__init__(text_loc)
274 _ExprMixin.__init__(self, expr_str, expr)
275 self._name = name
276
277 # Name.
278 @property
279 def name(self):
280 return self._name
281
282 def __repr__(self):
283 return "_VarAssign({}, {}, {}, {})".format(
284 repr(self._name),
285 repr(self._expr_str),
286 repr(self._expr),
287 repr(self._text_loc),
288 )
289
290
291 # Fixed-length number, possibly needing more than one byte.
292 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
293 def __init__(
294 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
295 ):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298 self._len = len
299
300 # Length (bits).
301 @property
302 def len(self):
303 return self._len
304
305 @property
306 def size(self):
307 return self._len // 8
308
309 def __repr__(self):
310 return "_FlNum({}, {}, {}, {})".format(
311 repr(self._expr_str),
312 repr(self._expr),
313 repr(self._len),
314 repr(self._text_loc),
315 )
316
317
318 # LEB128 integer.
319 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
320 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
321 super().__init__(text_loc)
322 _ExprMixin.__init__(self, expr_str, expr)
323
324 def __repr__(self):
325 return "{}({}, {}, {})".format(
326 self.__class__.__name__,
327 repr(self._expr_str),
328 repr(self._expr),
329 repr(self._text_loc),
330 )
331
332
333 # Unsigned LEB128 integer.
334 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
335 pass
336
337
338 # Signed LEB128 integer.
339 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
343 # Group of items.
344 class _Group(_Item, _RepableItem):
345 def __init__(self, items: List[_Item], text_loc: TextLocation):
346 super().__init__(text_loc)
347 self._items = items
348
349 # Contained items.
350 @property
351 def items(self):
352 return self._items
353
354 def __repr__(self):
355 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
356
357
358 # Repetition item.
359 class _Rep(_Item, _ExprMixin):
360 def __init__(
361 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
362 ):
363 super().__init__(text_loc)
364 _ExprMixin.__init__(self, expr_str, expr)
365 self._item = item
366
367 # Item to repeat.
368 @property
369 def item(self):
370 return self._item
371
372 def __repr__(self):
373 return "_Rep({}, {}, {}, {})".format(
374 repr(self._item),
375 repr(self._expr_str),
376 repr(self._expr),
377 repr(self._text_loc),
378 )
379
380
381 # Conditional item.
382 class _Cond(_Item, _ExprMixin):
383 def __init__(
384 self,
385 true_item: _Item,
386 false_item: _Item,
387 expr_str: str,
388 expr: ast.Expression,
389 text_loc: TextLocation,
390 ):
391 super().__init__(text_loc)
392 _ExprMixin.__init__(self, expr_str, expr)
393 self._true_item = true_item
394 self._false_item = false_item
395
396 # Item when condition is true.
397 @property
398 def true_item(self):
399 return self._true_item
400
401 # Item when condition is false.
402 @property
403 def false_item(self):
404 return self._false_item
405
406 def __repr__(self):
407 return "_Cond({}, {}, {}, {}, {})".format(
408 repr(self._true_item),
409 repr(self._false_item),
410 repr(self._expr_str),
411 repr(self._expr),
412 repr(self._text_loc),
413 )
414
415
416 # Macro definition item.
417 class _MacroDef(_Item):
418 def __init__(
419 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
420 ):
421 super().__init__(text_loc)
422 self._name = name
423 self._param_names = param_names
424 self._group = group
425
426 # Name.
427 @property
428 def name(self):
429 return self._name
430
431 # Parameters.
432 @property
433 def param_names(self):
434 return self._param_names
435
436 # Contained items.
437 @property
438 def group(self):
439 return self._group
440
441 def __repr__(self):
442 return "_MacroDef({}, {}, {}, {})".format(
443 repr(self._name),
444 repr(self._param_names),
445 repr(self._group),
446 repr(self._text_loc),
447 )
448
449
450 # Macro expansion parameter.
451 class _MacroExpParam:
452 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
453 self._expr_str = expr_str
454 self._expr = expr
455 self._text_loc = text_loc
456
457 # Expression string.
458 @property
459 def expr_str(self):
460 return self._expr_str
461
462 # Expression.
463 @property
464 def expr(self):
465 return self._expr
466
467 # Source text location.
468 @property
469 def text_loc(self):
470 return self._text_loc
471
472 def __repr__(self):
473 return "_MacroExpParam({}, {}, {})".format(
474 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
475 )
476
477
478 # Macro expansion item.
479 class _MacroExp(_Item, _RepableItem):
480 def __init__(
481 self,
482 name: str,
483 params: List[_MacroExpParam],
484 text_loc: TextLocation,
485 ):
486 super().__init__(text_loc)
487 self._name = name
488 self._params = params
489
490 # Name.
491 @property
492 def name(self):
493 return self._name
494
495 # Parameters.
496 @property
497 def params(self):
498 return self._params
499
500 def __repr__(self):
501 return "_MacroExp({}, {}, {})".format(
502 repr(self._name),
503 repr(self._params),
504 repr(self._text_loc),
505 )
506
507
508 # A parsing error message: a string and a text location.
509 class ParseErrorMessage:
510 @classmethod
511 def _create(cls, text: str, text_loc: TextLocation):
512 self = cls.__new__(cls)
513 self._init(text, text_loc)
514 return self
515
516 def __init__(self, *args, **kwargs): # type: ignore
517 raise NotImplementedError
518
519 def _init(self, text: str, text_loc: TextLocation):
520 self._text = text
521 self._text_loc = text_loc
522
523 # Message text.
524 @property
525 def text(self):
526 return self._text
527
528 # Source text location.
529 @property
530 def text_location(self):
531 return self._text_loc
532
533
534 # A parsing error containing one or more messages (`ParseErrorMessage`).
535 class ParseError(RuntimeError):
536 @classmethod
537 def _create(cls, msg: str, text_loc: TextLocation):
538 self = cls.__new__(cls)
539 self._init(msg, text_loc)
540 return self
541
542 def __init__(self, *args, **kwargs): # type: ignore
543 raise NotImplementedError
544
545 def _init(self, msg: str, text_loc: TextLocation):
546 super().__init__(msg)
547 self._msgs = [] # type: List[ParseErrorMessage]
548 self._add_msg(msg, text_loc)
549
550 def _add_msg(self, msg: str, text_loc: TextLocation):
551 self._msgs.append(
552 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
553 msg, text_loc
554 )
555 )
556
557 # Parsing error messages.
558 #
559 # The first message is the most specific one.
560 @property
561 def messages(self):
562 return self._msgs
563
564
565 # Raises a parsing error, forwarding the parameters to the constructor.
566 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
567 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
568
569
570 # Adds a message to the parsing error `exc`.
571 def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
572 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
573
574
575 # Appends a message to the parsing error `exc` and reraises it.
576 def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
577 _add_error_msg(exc, msg, text_loc)
578 raise exc
579
580
581 # Variables dictionary type (for type hints).
582 VariablesT = Dict[str, Union[int, float]]
583
584
585 # Labels dictionary type (for type hints).
586 LabelsT = Dict[str, int]
587
588
589 # Python name pattern.
590 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
591
592
593 # Macro definition dictionary.
594 _MacroDefsT = Dict[str, _MacroDef]
595
596
597 # Normand parser.
598 #
599 # The constructor accepts a Normand input. After building, use the `res`
600 # property to get the resulting main group.
601 class _Parser:
602 # Builds a parser to parse the Normand input `normand`, parsing
603 # immediately.
604 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
605 self._normand = normand
606 self._at = 0
607 self._line_no = 1
608 self._col_no = 1
609 self._label_names = set(labels.keys())
610 self._var_names = set(variables.keys())
611 self._macro_defs = {} # type: _MacroDefsT
612 self._parse()
613
614 # Result (main group).
615 @property
616 def res(self):
617 return self._res
618
619 # Macro definitions.
620 @property
621 def macro_defs(self):
622 return self._macro_defs
623
624 # Current text location.
625 @property
626 def _text_loc(self):
627 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
628 self._line_no, self._col_no
629 )
630
631 # Returns `True` if this parser is done parsing.
632 def _is_done(self):
633 return self._at == len(self._normand)
634
635 # Returns `True` if this parser isn't done parsing.
636 def _isnt_done(self):
637 return not self._is_done()
638
639 # Raises a parse error, creating it using the message `msg` and the
640 # current text location.
641 def _raise_error(self, msg: str) -> NoReturn:
642 _raise_error(msg, self._text_loc)
643
644 # Tries to make the pattern `pat` match the current substring,
645 # returning the match object and updating `self._at`,
646 # `self._line_no`, and `self._col_no` on success.
647 def _try_parse_pat(self, pat: Pattern[str]):
648 m = pat.match(self._normand, self._at)
649
650 if m is None:
651 return
652
653 # Skip matched string
654 self._at += len(m.group(0))
655
656 # Update line number
657 self._line_no += m.group(0).count("\n")
658
659 # Update column number
660 for i in reversed(range(self._at)):
661 if self._normand[i] == "\n" or i == 0:
662 if i == 0:
663 self._col_no = self._at + 1
664 else:
665 self._col_no = self._at - i
666
667 break
668
669 # Return match object
670 return m
671
672 # Expects the pattern `pat` to match the current substring,
673 # returning the match object and updating `self._at`,
674 # `self._line_no`, and `self._col_no` on success, or raising a parse
675 # error with the message `error_msg` on error.
676 def _expect_pat(self, pat: Pattern[str], error_msg: str):
677 # Match
678 m = self._try_parse_pat(pat)
679
680 if m is None:
681 # No match: error
682 self._raise_error(error_msg)
683
684 # Return match object
685 return m
686
687 # Pattern for _skip_ws_and_comments()
688 _ws_or_syms_or_comments_pat = re.compile(
689 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
690 )
691
692 # Skips as many whitespaces, insignificant symbol characters, and
693 # comments as possible.
694 def _skip_ws_and_comments(self):
695 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
696
697 # Pattern for _skip_ws()
698 _ws_pat = re.compile(r"\s*")
699
700 # Skips as many whitespaces as possible.
701 def _skip_ws(self):
702 self._try_parse_pat(self._ws_pat)
703
704 # Pattern for _try_parse_hex_byte()
705 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
706
707 # Tries to parse a hexadecimal byte, returning a byte item on
708 # success.
709 def _try_parse_hex_byte(self):
710 begin_text_loc = self._text_loc
711
712 # Match initial nibble
713 m_high = self._try_parse_pat(self._nibble_pat)
714
715 if m_high is None:
716 # No match
717 return
718
719 # Expect another nibble
720 self._skip_ws_and_comments()
721 m_low = self._expect_pat(
722 self._nibble_pat, "Expecting another hexadecimal nibble"
723 )
724
725 # Return item
726 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
727
728 # Patterns for _try_parse_bin_byte()
729 _bin_byte_bit_pat = re.compile(r"[01]")
730 _bin_byte_prefix_pat = re.compile(r"%")
731
732 # Tries to parse a binary byte, returning a byte item on success.
733 def _try_parse_bin_byte(self):
734 begin_text_loc = self._text_loc
735
736 # Match prefix
737 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
738 # No match
739 return
740
741 # Expect eight bits
742 bits = [] # type: List[str]
743
744 for _ in range(8):
745 self._skip_ws_and_comments()
746 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
747 bits.append(m.group(0))
748
749 # Return item
750 return _Byte(int("".join(bits), 2), begin_text_loc)
751
752 # Patterns for _try_parse_dec_byte()
753 _dec_byte_prefix_pat = re.compile(r"\$")
754 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
755
756 # Tries to parse a decimal byte, returning a byte item on success.
757 def _try_parse_dec_byte(self):
758 begin_text_loc = self._text_loc
759
760 # Match prefix
761 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
762 # No match
763 return
764
765 # Expect the value
766 self._skip_ws()
767 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
768
769 # Compute value
770 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
771
772 # Validate
773 if val < -128 or val > 255:
774 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
775
776 # Two's complement
777 val %= 256
778
779 # Return item
780 return _Byte(val, begin_text_loc)
781
782 # Tries to parse a byte, returning a byte item on success.
783 def _try_parse_byte(self):
784 # Hexadecimal
785 item = self._try_parse_hex_byte()
786
787 if item is not None:
788 return item
789
790 # Binary
791 item = self._try_parse_bin_byte()
792
793 if item is not None:
794 return item
795
796 # Decimal
797 item = self._try_parse_dec_byte()
798
799 if item is not None:
800 return item
801
802 # Patterns for _try_parse_str()
803 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
804 _str_suffix_pat = re.compile(r'"')
805 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
806
807 # Strings corresponding to escape sequence characters
808 _str_escape_seq_strs = {
809 "0": "\0",
810 "a": "\a",
811 "b": "\b",
812 "e": "\x1b",
813 "f": "\f",
814 "n": "\n",
815 "r": "\r",
816 "t": "\t",
817 "v": "\v",
818 "\\": "\\",
819 '"': '"',
820 }
821
822 # Tries to parse a string, returning a string item on success.
823 def _try_parse_str(self):
824 begin_text_loc = self._text_loc
825
826 # Match prefix
827 m = self._try_parse_pat(self._str_prefix_pat)
828
829 if m is None:
830 # No match
831 return
832
833 # Get encoding
834 encoding = "utf8"
835
836 if m.group("len") is not None:
837 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
838
839 # Actual string
840 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
841
842 # Expect end of string
843 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
844
845 # Replace escape sequences
846 val = m.group(0)
847
848 for ec in '0abefnrtv"\\':
849 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
850
851 # Encode
852 data = val.encode(encoding)
853
854 # Return item
855 return _Str(data, begin_text_loc)
856
857 # Common right parenthesis pattern
858 _right_paren_pat = re.compile(r"\)")
859
860 # Patterns for _try_parse_group()
861 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
862
863 # Tries to parse a group, returning a group item on success.
864 def _try_parse_group(self):
865 begin_text_loc = self._text_loc
866
867 # Match prefix
868 m_open = self._try_parse_pat(self._group_prefix_pat)
869
870 if m_open is None:
871 # No match
872 return
873
874 # Parse items
875 items = self._parse_items()
876
877 # Expect end of group
878 self._skip_ws_and_comments()
879
880 if m_open.group(0) == "(":
881 pat = self._right_paren_pat
882 exp = ")"
883 else:
884 pat = self._block_end_pat
885 exp = "!end"
886
887 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
888
889 # Return item
890 return _Group(items, begin_text_loc)
891
892 # Returns a stripped expression string and an AST expression node
893 # from the expression string `expr_str` at text location `text_loc`.
894 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
895 # Create an expression node from the expression string
896 expr_str = expr_str.strip().replace("\n", " ")
897
898 try:
899 expr = ast.parse(expr_str, mode="eval")
900 except SyntaxError:
901 _raise_error(
902 "Invalid expression `{}`: invalid syntax".format(expr_str),
903 text_loc,
904 )
905
906 return expr_str, expr
907
908 # Patterns for _try_parse_num_and_attr()
909 _val_expr_pat = re.compile(r"([^}:]+):\s*")
910 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
911 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
912
913 # Tries to parse a value and attribute (fixed length in bits or
914 # `leb128`), returning a value item on success.
915 def _try_parse_num_and_attr(self):
916 begin_text_loc = self._text_loc
917
918 # Match
919 m_expr = self._try_parse_pat(self._val_expr_pat)
920
921 if m_expr is None:
922 # No match
923 return
924
925 # Create an expression node from the expression string
926 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
927
928 # Length?
929 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
930
931 if m_attr is None:
932 # LEB128?
933 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
934
935 if m_attr is None:
936 # At this point it's invalid
937 self._raise_error(
938 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
939 )
940
941 # Return LEB128 integer item
942 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
943 return cls(expr_str, expr, begin_text_loc)
944 else:
945 # Return fixed-length number item
946 return _FlNum(
947 expr_str,
948 expr,
949 int(m_attr.group(0)),
950 begin_text_loc,
951 )
952
953 # Patterns for _try_parse_var_assign()
954 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
955 _var_assign_expr_pat = re.compile(r"[^}]+")
956
957 # Tries to parse a variable assignment, returning a variable
958 # assignment item on success.
959 def _try_parse_var_assign(self):
960 begin_text_loc = self._text_loc
961
962 # Match
963 m = self._try_parse_pat(self._var_assign_name_equal_pat)
964
965 if m is None:
966 # No match
967 return
968
969 # Validate name
970 name = m.group(1)
971
972 if name == _icitte_name:
973 _raise_error(
974 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
975 )
976
977 if name in self._label_names:
978 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
979
980 # Expect an expression
981 self._skip_ws()
982 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
983
984 # Create an expression node from the expression string
985 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
986
987 # Add to known variable names
988 self._var_names.add(name)
989
990 # Return item
991 return _VarAssign(
992 name,
993 expr_str,
994 expr,
995 begin_text_loc,
996 )
997
998 # Pattern for _try_parse_set_bo()
999 _bo_pat = re.compile(r"[bl]e")
1000
1001 # Tries to parse a byte order name, returning a byte order setting
1002 # item on success.
1003 def _try_parse_set_bo(self):
1004 begin_text_loc = self._text_loc
1005
1006 # Match
1007 m = self._try_parse_pat(self._bo_pat)
1008
1009 if m is None:
1010 # No match
1011 return
1012
1013 # Return corresponding item
1014 if m.group(0) == "be":
1015 return _SetBo(ByteOrder.BE, begin_text_loc)
1016 else:
1017 assert m.group(0) == "le"
1018 return _SetBo(ByteOrder.LE, begin_text_loc)
1019
1020 # Patterns for _try_parse_val_or_bo()
1021 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1022 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
1023
1024 # Tries to parse a value, a variable assignment, or a byte order
1025 # setting, returning an item on success.
1026 def _try_parse_val_or_var_assign_or_set_bo(self):
1027 # Match prefix
1028 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
1029 # No match
1030 return
1031
1032 self._skip_ws()
1033
1034 # Variable assignment item?
1035 item = self._try_parse_var_assign()
1036
1037 if item is None:
1038 # Number item?
1039 item = self._try_parse_num_and_attr()
1040
1041 if item is None:
1042 # Byte order setting item?
1043 item = self._try_parse_set_bo()
1044
1045 if item is None:
1046 # At this point it's invalid
1047 self._raise_error(
1048 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
1049 )
1050
1051 # Expect suffix
1052 self._skip_ws()
1053 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
1054 return item
1055
1056 # Returns a normalized version (so as to be parseable by int()) of
1057 # the constant integer string `s`, possibly negative, dealing with
1058 # any radix suffix.
1059 @staticmethod
1060 def _norm_const_int(s: str):
1061 neg = ""
1062 pos = s
1063
1064 if s.startswith("-"):
1065 neg = "-"
1066 pos = s[1:]
1067
1068 for r in "xXoObB":
1069 if pos.startswith("0" + r):
1070 # Already correct
1071 return s
1072
1073 # Try suffix
1074 asm_suf_base = {
1075 "h": "x",
1076 "H": "x",
1077 "q": "o",
1078 "Q": "o",
1079 "o": "o",
1080 "O": "o",
1081 "b": "b",
1082 "B": "B",
1083 }
1084
1085 for suf in asm_suf_base:
1086 if pos[-1] == suf:
1087 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
1088
1089 return s
1090
1091 # Common constant integer patterns
1092 _pos_const_int_pat = re.compile(
1093 r"0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+"
1094 )
1095 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
1096
1097 # Tries to parse an offset setting value (after the initial `<`),
1098 # returning an offset item on success.
1099 def _try_parse_set_offset_val(self):
1100 begin_text_loc = self._text_loc
1101
1102 # Match
1103 m = self._try_parse_pat(self._pos_const_int_pat)
1104
1105 if m is None:
1106 # No match
1107 return
1108
1109 # Return item
1110 return _SetOffset(int(self._norm_const_int(m.group(0)), 0), begin_text_loc)
1111
1112 # Tries to parse a label name (after the initial `<`), returning a
1113 # label item on success.
1114 def _try_parse_label_name(self):
1115 begin_text_loc = self._text_loc
1116
1117 # Match
1118 m = self._try_parse_pat(_py_name_pat)
1119
1120 if m is None:
1121 # No match
1122 return
1123
1124 # Validate
1125 name = m.group(0)
1126
1127 if name == _icitte_name:
1128 _raise_error(
1129 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1130 )
1131
1132 if name in self._label_names:
1133 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
1134
1135 if name in self._var_names:
1136 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
1137
1138 # Add to known label names
1139 self._label_names.add(name)
1140
1141 # Return item
1142 return _Label(name, begin_text_loc)
1143
1144 # Patterns for _try_parse_label_or_set_offset()
1145 _label_set_offset_prefix_pat = re.compile(r"<")
1146 _label_set_offset_suffix_pat = re.compile(r">")
1147
1148 # Tries to parse a label or an offset setting, returning an item on
1149 # success.
1150 def _try_parse_label_or_set_offset(self):
1151 # Match prefix
1152 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
1153 # No match
1154 return
1155
1156 # Offset setting item?
1157 self._skip_ws()
1158 item = self._try_parse_set_offset_val()
1159
1160 if item is None:
1161 # Label item?
1162 item = self._try_parse_label_name()
1163
1164 if item is None:
1165 # At this point it's invalid
1166 self._raise_error("Expecting a label name or an offset setting value")
1167
1168 # Expect suffix
1169 self._skip_ws()
1170 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
1171 return item
1172
1173 # Pattern for _parse_pad_val()
1174 _pad_val_prefix_pat = re.compile(r"~")
1175
1176 # Tries to parse a padding value, returning the padding value, or 0
1177 # if none.
1178 def _parse_pad_val(self):
1179 # Padding value?
1180 self._skip_ws()
1181 pad_val = 0
1182
1183 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1184 self._skip_ws()
1185 pad_val_text_loc = self._text_loc
1186 m = self._expect_pat(
1187 self._pos_const_int_pat,
1188 "Expecting a positive constant integer (byte value)",
1189 )
1190
1191 # Validate
1192 pad_val = int(self._norm_const_int(m.group(0)), 0)
1193
1194 if pad_val > 255:
1195 _raise_error(
1196 "Invalid padding byte value {}".format(pad_val),
1197 pad_val_text_loc,
1198 )
1199
1200 return pad_val
1201
1202 # Patterns for _try_parse_align_offset()
1203 _align_offset_prefix_pat = re.compile(r"@")
1204 _align_offset_val_pat = re.compile(r"\d+")
1205
1206 # Tries to parse an offset alignment, returning an offset alignment
1207 # item on success.
1208 def _try_parse_align_offset(self):
1209 begin_text_loc = self._text_loc
1210
1211 # Match prefix
1212 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1213 # No match
1214 return
1215
1216 # Expect an alignment
1217 self._skip_ws()
1218 align_text_loc = self._text_loc
1219 m = self._expect_pat(
1220 self._align_offset_val_pat,
1221 "Expecting an alignment (positive multiple of eight bits)",
1222 )
1223
1224 # Validate alignment
1225 val = int(m.group(0))
1226
1227 if val <= 0 or (val % 8) != 0:
1228 _raise_error(
1229 "Invalid alignment value {} (not a positive multiple of eight)".format(
1230 val
1231 ),
1232 align_text_loc,
1233 )
1234
1235 # Padding value
1236 pad_val = self._parse_pad_val()
1237
1238 # Return item
1239 return _AlignOffset(val, pad_val, begin_text_loc)
1240
1241 # Patterns for _try_parse_fill_until()
1242 _fill_until_prefix_pat = re.compile(r"\+")
1243 _fill_until_pad_val_prefix_pat = re.compile(r"~")
1244
1245 # Tries to parse a filling, returning a filling item on success.
1246 def _try_parse_fill_until(self):
1247 begin_text_loc = self._text_loc
1248
1249 # Match prefix
1250 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1251 # No match
1252 return
1253
1254 # Expect expression
1255 self._skip_ws()
1256 expr_str, expr = self._expect_const_int_name_expr(True)
1257
1258 # Padding value
1259 pad_val = self._parse_pad_val()
1260
1261 # Return item
1262 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
1263
1264 # Patterns for _expect_rep_mul_expr()
1265 _inner_expr_prefix_pat = re.compile(r"\{")
1266 _inner_expr_pat = re.compile(r"[^}]+")
1267 _inner_expr_suffix_pat = re.compile(r"\}")
1268
1269 # Parses a constant integer if `accept_const_int` is `True`
1270 # (possibly negative if `allow_neg` is `True`), a name, or an
1271 # expression within `{` and `}`.
1272 def _expect_const_int_name_expr(
1273 self, accept_const_int: bool, allow_neg: bool = False
1274 ):
1275 expr_text_loc = self._text_loc
1276
1277 # Constant integer?
1278 m = None
1279
1280 if accept_const_int:
1281 m = self._try_parse_pat(self._const_int_pat)
1282
1283 if m is None:
1284 # Name?
1285 m = self._try_parse_pat(_py_name_pat)
1286
1287 if m is None:
1288 # Expression?
1289 if self._try_parse_pat(self._inner_expr_prefix_pat) is None:
1290 pos_msg = "" if allow_neg else "positive "
1291
1292 if accept_const_int:
1293 mid_msg = "a {}constant integer, a name, or `{{`".format(
1294 pos_msg
1295 )
1296 else:
1297 mid_msg = "a name or `{`"
1298
1299 # At this point it's invalid
1300 self._raise_error("Expecting {}".format(mid_msg))
1301
1302 # Expect an expression
1303 self._skip_ws()
1304 expr_text_loc = self._text_loc
1305 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1306 expr_str = m.group(0)
1307
1308 # Expect `}`
1309 self._skip_ws()
1310 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1311 else:
1312 expr_str = m.group(0)
1313 else:
1314 if m.group("neg") == "-" and not allow_neg:
1315 _raise_error("Expecting a positive constant integer", expr_text_loc)
1316
1317 expr_str = self._norm_const_int(m.group(0))
1318
1319 return self._ast_expr_from_str(expr_str, expr_text_loc)
1320
1321 # Parses the multiplier expression of a repetition (block or
1322 # post-item) and returns the expression string and AST node.
1323 def _expect_rep_mul_expr(self):
1324 return self._expect_const_int_name_expr(True)
1325
1326 # Common block end pattern
1327 _block_end_pat = re.compile(r"!end\b")
1328
1329 # Pattern for _try_parse_rep_block()
1330 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
1331
1332 # Tries to parse a repetition block, returning a repetition item on
1333 # success.
1334 def _try_parse_rep_block(self):
1335 begin_text_loc = self._text_loc
1336
1337 # Match prefix
1338 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1339 # No match
1340 return
1341
1342 # Expect expression
1343 self._skip_ws_and_comments()
1344 expr_str, expr = self._expect_rep_mul_expr()
1345
1346 # Parse items
1347 self._skip_ws_and_comments()
1348 items_text_loc = self._text_loc
1349 items = self._parse_items()
1350
1351 # Expect end of block
1352 self._skip_ws_and_comments()
1353 self._expect_pat(
1354 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1355 )
1356
1357 # Return item
1358 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1359
1360 # Pattern for _try_parse_cond_block()
1361 _cond_block_prefix_pat = re.compile(r"!if\b")
1362 _cond_block_else_pat = re.compile(r"!else\b")
1363
1364 # Tries to parse a conditional block, returning a conditional item
1365 # on success.
1366 def _try_parse_cond_block(self):
1367 begin_text_loc = self._text_loc
1368
1369 # Match prefix
1370 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1371 # No match
1372 return
1373
1374 # Expect expression
1375 self._skip_ws_and_comments()
1376 expr_str, expr = self._expect_const_int_name_expr(False)
1377
1378 # Parse "true" items
1379 self._skip_ws_and_comments()
1380 true_items_text_loc = self._text_loc
1381 true_items = self._parse_items()
1382 false_items = [] # type: List[_Item]
1383 false_items_text_loc = begin_text_loc
1384
1385 # `!else`?
1386 self._skip_ws_and_comments()
1387
1388 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1389 # Parse "false" items
1390 self._skip_ws_and_comments()
1391 false_items_text_loc = self._text_loc
1392 false_items = self._parse_items()
1393
1394 # Expect end of block
1395 self._expect_pat(
1396 self._block_end_pat,
1397 "Expecting an item, `!else`, or `!end` (end of conditional block)",
1398 )
1399
1400 # Return item
1401 return _Cond(
1402 _Group(true_items, true_items_text_loc),
1403 _Group(false_items, false_items_text_loc),
1404 expr_str,
1405 expr,
1406 begin_text_loc,
1407 )
1408
1409 # Common left parenthesis pattern
1410 _left_paren_pat = re.compile(r"\(")
1411
1412 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1413 _macro_params_comma_pat = re.compile(",")
1414
1415 # Patterns for _try_parse_macro_def()
1416 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1417
1418 # Tries to parse a macro definition, adding it to `self._macro_defs`
1419 # and returning `True` on success.
1420 def _try_parse_macro_def(self):
1421 begin_text_loc = self._text_loc
1422
1423 # Match prefix
1424 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1425 # No match
1426 return False
1427
1428 # Expect a name
1429 self._skip_ws()
1430 name_text_loc = self._text_loc
1431 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1432
1433 # Validate name
1434 name = m.group(0)
1435
1436 if name in self._macro_defs:
1437 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1438
1439 # Expect `(`
1440 self._skip_ws()
1441 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1442
1443 # Try to parse comma-separated parameter names
1444 param_names = [] # type: List[str]
1445 expect_comma = False
1446
1447 while True:
1448 self._skip_ws()
1449
1450 # End?
1451 if self._try_parse_pat(self._right_paren_pat) is not None:
1452 # End
1453 break
1454
1455 # Comma?
1456 if expect_comma:
1457 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1458
1459 # Expect parameter name
1460 self._skip_ws()
1461 param_text_loc = self._text_loc
1462 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1463
1464 if m.group(0) in param_names:
1465 _raise_error(
1466 "Duplicate macro parameter named `{}`".format(m.group(0)),
1467 param_text_loc,
1468 )
1469
1470 param_names.append(m.group(0))
1471 expect_comma = True
1472
1473 # Expect items
1474 self._skip_ws_and_comments()
1475 items_text_loc = self._text_loc
1476 old_var_names = self._var_names.copy()
1477 old_label_names = self._label_names.copy()
1478 self._var_names = set() # type: Set[str]
1479 self._label_names = set() # type: Set[str]
1480 items = self._parse_items()
1481 self._var_names = old_var_names
1482 self._label_names = old_label_names
1483
1484 # Expect suffix
1485 self._expect_pat(
1486 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1487 )
1488
1489 # Register macro
1490 self._macro_defs[name] = _MacroDef(
1491 name, param_names, _Group(items, items_text_loc), begin_text_loc
1492 )
1493
1494 return True
1495
1496 # Patterns for _try_parse_macro_exp()
1497 _macro_exp_prefix_pat = re.compile(r"m\b")
1498 _macro_exp_colon_pat = re.compile(r":")
1499
1500 # Tries to parse a macro expansion, returning a macro expansion item
1501 # on success.
1502 def _try_parse_macro_exp(self):
1503 begin_text_loc = self._text_loc
1504
1505 # Match prefix
1506 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1507 # No match
1508 return
1509
1510 # Expect `:`
1511 self._skip_ws()
1512 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1513
1514 # Expect a macro name
1515 self._skip_ws()
1516 name_text_loc = self._text_loc
1517 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1518
1519 # Validate name
1520 name = m.group(0)
1521 macro_def = self._macro_defs.get(name)
1522
1523 if macro_def is None:
1524 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1525
1526 # Expect `(`
1527 self._skip_ws()
1528 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1529
1530 # Try to parse comma-separated parameter values
1531 params_text_loc = self._text_loc
1532 params = [] # type: List[_MacroExpParam]
1533 expect_comma = False
1534
1535 while True:
1536 self._skip_ws()
1537
1538 # End?
1539 if self._try_parse_pat(self._right_paren_pat) is not None:
1540 # End
1541 break
1542
1543 # Expect a Value
1544 if expect_comma:
1545 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1546
1547 self._skip_ws()
1548 param_text_loc = self._text_loc
1549 params.append(
1550 _MacroExpParam(
1551 *self._expect_const_int_name_expr(True, True), param_text_loc
1552 )
1553 )
1554 expect_comma = True
1555
1556 # Validate parameter values
1557 if len(params) != len(macro_def.param_names):
1558 sing_plur = "" if len(params) == 1 else "s"
1559 _raise_error(
1560 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1561 len(params), sing_plur, len(macro_def.param_names)
1562 ),
1563 params_text_loc,
1564 )
1565
1566 # Return item
1567 return _MacroExp(name, params, begin_text_loc)
1568
1569 # Tries to parse a base item (anything except a repetition),
1570 # returning it on success.
1571 def _try_parse_base_item(self):
1572 # Byte item?
1573 item = self._try_parse_byte()
1574
1575 if item is not None:
1576 return item
1577
1578 # String item?
1579 item = self._try_parse_str()
1580
1581 if item is not None:
1582 return item
1583
1584 # Value, variable assignment, or byte order setting item?
1585 item = self._try_parse_val_or_var_assign_or_set_bo()
1586
1587 if item is not None:
1588 return item
1589
1590 # Label or offset setting item?
1591 item = self._try_parse_label_or_set_offset()
1592
1593 if item is not None:
1594 return item
1595
1596 # Offset alignment item?
1597 item = self._try_parse_align_offset()
1598
1599 if item is not None:
1600 return item
1601
1602 # Filling item?
1603 item = self._try_parse_fill_until()
1604
1605 if item is not None:
1606 return item
1607
1608 # Group item?
1609 item = self._try_parse_group()
1610
1611 if item is not None:
1612 return item
1613
1614 # Repetition block item?
1615 item = self._try_parse_rep_block()
1616
1617 if item is not None:
1618 return item
1619
1620 # Conditional block item?
1621 item = self._try_parse_cond_block()
1622
1623 if item is not None:
1624 return item
1625
1626 # Macro expansion?
1627 item = self._try_parse_macro_exp()
1628
1629 if item is not None:
1630 return item
1631
1632 # Pattern for _try_parse_rep_post()
1633 _rep_post_prefix_pat = re.compile(r"\*")
1634
1635 # Tries to parse a post-item repetition, returning the expression
1636 # string and AST expression node on success.
1637 def _try_parse_rep_post(self):
1638 # Match prefix
1639 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1640 # No match
1641 return
1642
1643 # Return expression string and AST expression
1644 self._skip_ws_and_comments()
1645 return self._expect_rep_mul_expr()
1646
1647 # Tries to parse an item, possibly followed by a repetition,
1648 # returning `True` on success.
1649 #
1650 # Appends any parsed item to `items`.
1651 def _try_append_item(self, items: List[_Item]):
1652 self._skip_ws_and_comments()
1653
1654 # Base item
1655 item = self._try_parse_base_item()
1656
1657 if item is None:
1658 return
1659
1660 # Parse repetition if the base item is repeatable
1661 if isinstance(item, _RepableItem):
1662 self._skip_ws_and_comments()
1663 rep_text_loc = self._text_loc
1664 rep_ret = self._try_parse_rep_post()
1665
1666 if rep_ret is not None:
1667 item = _Rep(item, *rep_ret, rep_text_loc)
1668
1669 items.append(item)
1670 return True
1671
1672 # Parses and returns items, skipping whitespaces, insignificant
1673 # symbols, and comments when allowed, and stopping at the first
1674 # unknown character.
1675 #
1676 # Accepts and registers macro definitions if `accept_macro_defs`
1677 # is `True`.
1678 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
1679 items = [] # type: List[_Item]
1680
1681 while self._isnt_done():
1682 # Try to append item
1683 if not self._try_append_item(items):
1684 if accept_macro_defs and self._try_parse_macro_def():
1685 continue
1686
1687 # Unknown at this point
1688 break
1689
1690 return items
1691
1692 # Parses the whole Normand input, setting `self._res` to the main
1693 # group item on success.
1694 def _parse(self):
1695 if len(self._normand.strip()) == 0:
1696 # Special case to make sure there's something to consume
1697 self._res = _Group([], self._text_loc)
1698 return
1699
1700 # Parse first level items
1701 items = self._parse_items(True)
1702
1703 # Make sure there's nothing left
1704 self._skip_ws_and_comments()
1705
1706 if self._isnt_done():
1707 self._raise_error(
1708 "Unexpected character `{}`".format(self._normand[self._at])
1709 )
1710
1711 # Set main group item
1712 self._res = _Group(items, self._text_loc)
1713
1714
1715 # The return type of parse().
1716 class ParseResult:
1717 @classmethod
1718 def _create(
1719 cls,
1720 data: bytearray,
1721 variables: VariablesT,
1722 labels: LabelsT,
1723 offset: int,
1724 bo: Optional[ByteOrder],
1725 ):
1726 self = cls.__new__(cls)
1727 self._init(data, variables, labels, offset, bo)
1728 return self
1729
1730 def __init__(self, *args, **kwargs): # type: ignore
1731 raise NotImplementedError
1732
1733 def _init(
1734 self,
1735 data: bytearray,
1736 variables: VariablesT,
1737 labels: LabelsT,
1738 offset: int,
1739 bo: Optional[ByteOrder],
1740 ):
1741 self._data = data
1742 self._vars = variables
1743 self._labels = labels
1744 self._offset = offset
1745 self._bo = bo
1746
1747 # Generated data.
1748 @property
1749 def data(self):
1750 return self._data
1751
1752 # Dictionary of updated variable names to their last computed value.
1753 @property
1754 def variables(self):
1755 return self._vars
1756
1757 # Dictionary of updated main group label names to their computed
1758 # value.
1759 @property
1760 def labels(self):
1761 return self._labels
1762
1763 # Updated offset.
1764 @property
1765 def offset(self):
1766 return self._offset
1767
1768 # Updated byte order.
1769 @property
1770 def byte_order(self):
1771 return self._bo
1772
1773
1774 # Raises a parse error for the item `item`, creating it using the
1775 # message `msg`.
1776 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1777 _raise_error(msg, item.text_loc)
1778
1779
1780 # The `ICITTE` reserved name.
1781 _icitte_name = "ICITTE"
1782
1783
1784 # Base node visitor.
1785 #
1786 # Calls the _visit_name() method for each name node which isn't the name
1787 # of a call.
1788 class _NodeVisitor(ast.NodeVisitor):
1789 def __init__(self):
1790 self._parent_is_call = False
1791
1792 def generic_visit(self, node: ast.AST):
1793 if type(node) is ast.Call:
1794 self._parent_is_call = True
1795 elif type(node) is ast.Name and not self._parent_is_call:
1796 self._visit_name(node.id)
1797
1798 super().generic_visit(node)
1799 self._parent_is_call = False
1800
1801 @abc.abstractmethod
1802 def _visit_name(self, name: str):
1803 ...
1804
1805
1806 # Expression validator: validates that all the names within the
1807 # expression are allowed.
1808 class _ExprValidator(_NodeVisitor):
1809 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
1810 super().__init__()
1811 self._expr_str = expr_str
1812 self._text_loc = text_loc
1813 self._allowed_names = allowed_names
1814
1815 def _visit_name(self, name: str):
1816 # Make sure the name refers to a known and reachable
1817 # variable/label name.
1818 if name != _icitte_name and name not in self._allowed_names:
1819 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1820 name, self._expr_str
1821 )
1822
1823 allowed_names = self._allowed_names.copy()
1824 allowed_names.add(_icitte_name)
1825
1826 if len(allowed_names) > 0:
1827 allowed_names_str = ", ".join(
1828 sorted(["`{}`".format(name) for name in allowed_names])
1829 )
1830 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1831
1832 _raise_error(
1833 msg,
1834 self._text_loc,
1835 )
1836
1837
1838 # Generator state.
1839 class _GenState:
1840 def __init__(
1841 self,
1842 variables: VariablesT,
1843 labels: LabelsT,
1844 offset: int,
1845 bo: Optional[ByteOrder],
1846 ):
1847 self.variables = variables.copy()
1848 self.labels = labels.copy()
1849 self.offset = offset
1850 self.bo = bo
1851
1852 def __repr__(self):
1853 return "_GenState({}, {}, {}, {})".format(
1854 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1855 )
1856
1857
1858 # Fixed-length number item instance.
1859 class _FlNumItemInst:
1860 def __init__(
1861 self,
1862 item: _FlNum,
1863 offset_in_data: int,
1864 state: _GenState,
1865 parse_error_msgs: List[ParseErrorMessage],
1866 ):
1867 self._item = item
1868 self._offset_in_data = offset_in_data
1869 self._state = state
1870 self._parse_error_msgs = parse_error_msgs
1871
1872 @property
1873 def item(self):
1874 return self._item
1875
1876 @property
1877 def offset_in_data(self):
1878 return self._offset_in_data
1879
1880 @property
1881 def state(self):
1882 return self._state
1883
1884 @property
1885 def parse_error_msgs(self):
1886 return self._parse_error_msgs
1887
1888
1889 # Generator of data and final state from a group item.
1890 #
1891 # Generation happens in memory at construction time. After building, use
1892 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1893 # get the resulting context.
1894 #
1895 # The steps of generation are:
1896 #
1897 # 1. Handle each item in prefix order.
1898 #
1899 # The handlers append bytes to `self._data` and update some current
1900 # state object (`_GenState` instance).
1901 #
1902 # When handling a fixed-length number item, try to evaluate its
1903 # expression using the current state. If this fails, then it might be
1904 # because the expression refers to a "future" label: save the current
1905 # offset in `self._data` (generated data) and a snapshot of the
1906 # current state within `self._fl_num_item_insts` (`_FlNumItemInst`
1907 # object). _gen_fl_num_item_insts() will deal with this later. A
1908 # `_FlNumItemInst` instance also contains a snapshot of the current
1909 # parsing error messages (`self._parse_error_msgs`) which need to be
1910 # taken into account when handling the instance later.
1911 #
1912 # When handling the items of a group, keep a map of immediate label
1913 # names to their offset. Then, after having processed all the items,
1914 # update the relevant saved state snapshots in
1915 # `self._fl_num_item_insts` with those immediate label values.
1916 # _gen_fl_num_item_insts() will deal with this later.
1917 #
1918 # 2. Handle all the fixed-length number item instances of which the
1919 # expression evaluation failed before.
1920 #
1921 # At this point, `self._fl_num_item_insts` contains everything that's
1922 # needed to evaluate the expressions, including the values of
1923 # "future" labels from the point of view of some fixed-length number
1924 # item instance.
1925 #
1926 # If an evaluation fails at this point, then it's a user error. Add
1927 # to the parsing error all the saved parsing error messages of the
1928 # instance. Those additional messages add precious context to the
1929 # error.
1930 class _Gen:
1931 def __init__(
1932 self,
1933 group: _Group,
1934 macro_defs: _MacroDefsT,
1935 variables: VariablesT,
1936 labels: LabelsT,
1937 offset: int,
1938 bo: Optional[ByteOrder],
1939 ):
1940 self._macro_defs = macro_defs
1941 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
1942 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
1943 self._gen(group, _GenState(variables, labels, offset, bo))
1944
1945 # Generated bytes.
1946 @property
1947 def data(self):
1948 return self._data
1949
1950 # Updated variables.
1951 @property
1952 def variables(self):
1953 return self._final_state.variables
1954
1955 # Updated main group labels.
1956 @property
1957 def labels(self):
1958 return self._final_state.labels
1959
1960 # Updated offset.
1961 @property
1962 def offset(self):
1963 return self._final_state.offset
1964
1965 # Updated byte order.
1966 @property
1967 def bo(self):
1968 return self._final_state.bo
1969
1970 # Evaluates the expression `expr` of which the original string is
1971 # `expr_str` at the location `text_loc` considering the current
1972 # generation state `state`.
1973 #
1974 # If `allow_float` is `True`, then the type of the result may be
1975 # `float` too.
1976 @staticmethod
1977 def _eval_expr(
1978 expr_str: str,
1979 expr: ast.Expression,
1980 text_loc: TextLocation,
1981 state: _GenState,
1982 allow_float: bool = False,
1983 ):
1984 syms = {} # type: VariablesT
1985 syms.update(state.labels)
1986
1987 # Set the `ICITTE` name to the current offset
1988 syms[_icitte_name] = state.offset
1989
1990 # Add the current variables
1991 syms.update(state.variables)
1992
1993 # Validate the node and its children
1994 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
1995
1996 # Compile and evaluate expression node
1997 try:
1998 val = eval(compile(expr, "", "eval"), None, syms)
1999 except Exception as exc:
2000 _raise_error(
2001 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2002 text_loc,
2003 )
2004
2005 # Convert `bool` result type to `int` to normalize
2006 if type(val) is bool:
2007 val = int(val)
2008
2009 # Validate result type
2010 expected_types = {int} # type: Set[type]
2011 type_msg = "`int`"
2012
2013 if allow_float:
2014 expected_types.add(float)
2015 type_msg += " or `float`"
2016
2017 if type(val) not in expected_types:
2018 _raise_error(
2019 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
2020 expr_str, type_msg, type(val).__name__
2021 ),
2022 text_loc,
2023 )
2024
2025 return val
2026
2027 # Evaluates the expression of `item` considering the current
2028 # generation state `state`.
2029 #
2030 # If `allow_float` is `True`, then the type of the result may be
2031 # `float` too.
2032 @staticmethod
2033 def _eval_item_expr(
2034 item: Union[_FlNum, _Leb128Int, _FillUntil, _VarAssign, _Rep, _Cond],
2035 state: _GenState,
2036 allow_float: bool = False,
2037 ):
2038 return _Gen._eval_expr(
2039 item.expr_str, item.expr, item.text_loc, state, allow_float
2040 )
2041
2042 # Handles the byte item `item`.
2043 def _handle_byte_item(self, item: _Byte, state: _GenState):
2044 self._data.append(item.val)
2045 state.offset += item.size
2046
2047 # Handles the string item `item`.
2048 def _handle_str_item(self, item: _Str, state: _GenState):
2049 self._data += item.data
2050 state.offset += item.size
2051
2052 # Handles the byte order setting item `item`.
2053 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2054 # Update current byte order
2055 state.bo = item.bo
2056
2057 # Handles the variable assignment item `item`.
2058 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2059 # Update variable
2060 state.variables[item.name] = self._eval_item_expr(item, state, True)
2061
2062 # Handles the fixed-length number item `item`.
2063 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2064 # Validate current byte order
2065 if state.bo is None and item.len > 8:
2066 _raise_error_for_item(
2067 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2068 item.expr_str
2069 ),
2070 item,
2071 )
2072
2073 # Try an immediate evaluation. If it fails, then keep everything
2074 # needed to (try to) generate the bytes of this item later.
2075 try:
2076 data = self._gen_fl_num_item_inst_data(item, state)
2077 except Exception:
2078 self._fl_num_item_insts.append(
2079 _FlNumItemInst(
2080 item,
2081 len(self._data),
2082 copy.deepcopy(state),
2083 copy.deepcopy(self._parse_error_msgs),
2084 )
2085 )
2086
2087 # Reserve space in `self._data` for this instance
2088 data = bytes([0] * (item.len // 8))
2089
2090 # Append bytes
2091 self._data += data
2092
2093 # Update offset
2094 state.offset += len(data)
2095
2096 # Returns the size, in bytes, required to encode the value `val`
2097 # with LEB128 (signed version if `is_signed` is `True`).
2098 @staticmethod
2099 def _leb128_size_for_val(val: int, is_signed: bool):
2100 if val < 0:
2101 # Equivalent upper bound.
2102 #
2103 # For example, if `val` is -128, then the full integer for
2104 # this number of bits would be [-128, 127].
2105 val = -val - 1
2106
2107 # Number of bits (add one for the sign if needed)
2108 bits = val.bit_length() + int(is_signed)
2109
2110 if bits == 0:
2111 bits = 1
2112
2113 # Seven bits per byte
2114 return math.ceil(bits / 7)
2115
2116 # Handles the LEB128 integer item `item`.
2117 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2118 # Compute value
2119 val = self._eval_item_expr(item, state, False)
2120
2121 # Size in bytes
2122 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
2123
2124 # For each byte
2125 for _ in range(size):
2126 # Seven LSBs, MSB of the byte set (continue)
2127 self._data.append((val & 0x7F) | 0x80)
2128 val >>= 7
2129
2130 # Clear MSB of last byte (stop)
2131 self._data[-1] &= ~0x80
2132
2133 # Update offset
2134 state.offset += size
2135
2136 # Handles the group item `item`, removing the immediate labels from
2137 # `state` at the end if `remove_immediate_labels` is `True`.
2138 def _handle_group_item(
2139 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2140 ):
2141 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2142 immediate_labels = {} # type: LabelsT
2143
2144 # Handle each item
2145 for subitem in item.items:
2146 if type(subitem) is _Label:
2147 # Add to local immediate labels
2148 immediate_labels[subitem.name] = state.offset
2149
2150 self._handle_item(subitem, state)
2151
2152 # Remove immediate labels from current state if needed
2153 if remove_immediate_labels:
2154 for name in immediate_labels:
2155 del state.labels[name]
2156
2157 # Add all immediate labels to all state snapshots since
2158 # `first_fl_num_item_inst_index`.
2159 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2160 inst.state.labels.update(immediate_labels)
2161
2162 # Handles the repetition item `item`.
2163 def _handle_rep_item(self, item: _Rep, state: _GenState):
2164 # Compute the repetition count
2165 mul = _Gen._eval_item_expr(item, state)
2166
2167 # Validate result
2168 if mul < 0:
2169 _raise_error_for_item(
2170 "Invalid expression `{}`: unexpected negative result {:,}".format(
2171 item.expr_str, mul
2172 ),
2173 item,
2174 )
2175
2176 # Generate item data `mul` times
2177 for _ in range(mul):
2178 self._handle_item(item.item, state)
2179
2180 # Handles the conditional item `item`.
2181 def _handle_cond_item(self, item: _Cond, state: _GenState):
2182 # Compute the conditional value
2183 val = _Gen._eval_item_expr(item, state)
2184
2185 # Generate item data if needed
2186 if val:
2187 self._handle_item(item.true_item, state)
2188 else:
2189 self._handle_item(item.false_item, state)
2190
2191 # Evaluates the parameters of the macro expansion item `item`
2192 # considering the initial state `init_state` and returns a new state
2193 # to handle the items of the macro.
2194 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2195 # New state
2196 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2197
2198 # Evaluate the parameter expressions
2199 macro_def = self._macro_defs[item.name]
2200
2201 for param_name, param in zip(macro_def.param_names, item.params):
2202 exp_state.variables[param_name] = _Gen._eval_expr(
2203 param.expr_str, param.expr, param.text_loc, init_state, True
2204 )
2205
2206 return exp_state
2207
2208 # Handles the macro expansion item `item`.
2209 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2210 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
2211
2212 try:
2213 # New state
2214 exp_state = self._eval_macro_exp_params(item, state)
2215
2216 # Process the contained group
2217 init_data_size = len(self._data)
2218 parse_error_msg = (
2219 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2220 parse_error_msg_text, item.text_loc
2221 )
2222 )
2223 self._parse_error_msgs.append(parse_error_msg)
2224 self._handle_item(self._macro_defs[item.name].group, exp_state)
2225 self._parse_error_msgs.pop()
2226 except ParseError as exc:
2227 _augment_error(exc, parse_error_msg_text, item.text_loc)
2228
2229 # Update state offset and return
2230 state.offset += len(self._data) - init_data_size
2231
2232 # Handles the offset setting item `item`.
2233 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
2234 state.offset = item.val
2235
2236 # Handles the offset alignment item `item` (adds padding).
2237 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2238 init_offset = state.offset
2239 align_bytes = item.val // 8
2240 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2241 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2242
2243 # Handles the filling item `item` (adds padding).
2244 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2245 # Compute the new offset
2246 new_offset = _Gen._eval_item_expr(item, state)
2247
2248 # Validate the new offset
2249 if new_offset < state.offset:
2250 _raise_error_for_item(
2251 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2252 item.expr_str, new_offset, state.offset
2253 ),
2254 item,
2255 )
2256
2257 # Fill
2258 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2259
2260 # Update offset
2261 state.offset = new_offset
2262
2263 # Handles the label item `item`.
2264 def _handle_label_item(self, item: _Label, state: _GenState):
2265 state.labels[item.name] = state.offset
2266
2267 # Handles the item `item`, returning the updated next repetition
2268 # instance.
2269 def _handle_item(self, item: _Item, state: _GenState):
2270 return self._item_handlers[type(item)](item, state)
2271
2272 # Generates the data for a fixed-length integer item instance having
2273 # the value `val` and returns it.
2274 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
2275 # Validate range
2276 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2277 _raise_error_for_item(
2278 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2279 val, item.len, item.expr_str
2280 ),
2281 item,
2282 )
2283
2284 # Encode result on 64 bits (to extend the sign bit whatever the
2285 # value of `item.len`).
2286 data = struct.pack(
2287 "{}{}".format(
2288 ">" if state.bo in (None, ByteOrder.BE) else "<",
2289 "Q" if val >= 0 else "q",
2290 ),
2291 val,
2292 )
2293
2294 # Keep only the requested length
2295 len_bytes = item.len // 8
2296
2297 if state.bo in (None, ByteOrder.BE):
2298 # Big endian: keep last bytes
2299 data = data[-len_bytes:]
2300 else:
2301 # Little endian: keep first bytes
2302 assert state.bo == ByteOrder.LE
2303 data = data[:len_bytes]
2304
2305 # Return data
2306 return data
2307
2308 # Generates the data for a fixed-length floating point number item
2309 # instance having the value `val` and returns it.
2310 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
2311 # Validate length
2312 if item.len not in (32, 64):
2313 _raise_error_for_item(
2314 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2315 item.len, val
2316 ),
2317 item,
2318 )
2319
2320 # Encode and return result
2321 return struct.pack(
2322 "{}{}".format(
2323 ">" if state.bo in (None, ByteOrder.BE) else "<",
2324 "f" if item.len == 32 else "d",
2325 ),
2326 val,
2327 )
2328
2329 # Generates the data for a fixed-length number item instance and
2330 # returns it.
2331 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
2332 # Compute value
2333 val = self._eval_item_expr(item, state, True)
2334
2335 # Handle depending on type
2336 if type(val) is int:
2337 return self._gen_fl_int_item_inst_data(val, item, state)
2338 else:
2339 assert type(val) is float
2340 return self._gen_fl_float_item_inst_data(val, item, state)
2341
2342 # Generates the data for all the fixed-length number item instances
2343 # and writes it at the correct offset within `self._data`.
2344 def _gen_fl_num_item_insts(self):
2345 for inst in self._fl_num_item_insts:
2346 # Generate bytes
2347 try:
2348 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2349 except ParseError as exc:
2350 # Add all the saved parse error messages for this
2351 # instance.
2352 for msg in reversed(inst.parse_error_msgs):
2353 _add_error_msg(exc, msg.text, msg.text_location)
2354
2355 raise
2356
2357 # Insert bytes into `self._data`
2358 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2359
2360 # Generates the data (`self._data`) and final state
2361 # (`self._final_state`) from `group` and the initial state `state`.
2362 def _gen(self, group: _Group, state: _GenState):
2363 # Initial state
2364 self._data = bytearray()
2365
2366 # Item handlers
2367 self._item_handlers = {
2368 _AlignOffset: self._handle_align_offset_item,
2369 _Byte: self._handle_byte_item,
2370 _Cond: self._handle_cond_item,
2371 _FillUntil: self._handle_fill_until_item,
2372 _FlNum: self._handle_fl_num_item,
2373 _Group: self._handle_group_item,
2374 _Label: self._handle_label_item,
2375 _MacroExp: self._handle_macro_exp_item,
2376 _Rep: self._handle_rep_item,
2377 _SetBo: self._handle_set_bo_item,
2378 _SetOffset: self._handle_set_offset_item,
2379 _SLeb128Int: self._handle_leb128_int_item,
2380 _Str: self._handle_str_item,
2381 _ULeb128Int: self._handle_leb128_int_item,
2382 _VarAssign: self._handle_var_assign_item,
2383 } # type: Dict[type, Callable[[Any, _GenState], None]]
2384
2385 # Handle the group item, _not_ removing the immediate labels
2386 # because the `labels` property offers them.
2387 self._handle_group_item(group, state, False)
2388
2389 # This is actually the final state
2390 self._final_state = state
2391
2392 # Generate all the fixed-length number bytes now that we know
2393 # their full state
2394 self._gen_fl_num_item_insts()
2395
2396
2397 # Returns a `ParseResult` instance containing the bytes encoded by the
2398 # input string `normand`.
2399 #
2400 # `init_variables` is a dictionary of initial variable names (valid
2401 # Python names) to integral values. A variable name must not be the
2402 # reserved name `ICITTE`.
2403 #
2404 # `init_labels` is a dictionary of initial label names (valid Python
2405 # names) to integral values. A label name must not be the reserved name
2406 # `ICITTE`.
2407 #
2408 # `init_offset` is the initial offset.
2409 #
2410 # `init_byte_order` is the initial byte order.
2411 #
2412 # Raises `ParseError` on any parsing error.
2413 def parse(
2414 normand: str,
2415 init_variables: Optional[VariablesT] = None,
2416 init_labels: Optional[LabelsT] = None,
2417 init_offset: int = 0,
2418 init_byte_order: Optional[ByteOrder] = None,
2419 ):
2420 if init_variables is None:
2421 init_variables = {}
2422
2423 if init_labels is None:
2424 init_labels = {}
2425
2426 parser = _Parser(normand, init_variables, init_labels)
2427 gen = _Gen(
2428 parser.res,
2429 parser.macro_defs,
2430 init_variables,
2431 init_labels,
2432 init_offset,
2433 init_byte_order,
2434 )
2435 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2436 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2437 )
2438
2439
2440 # Raises a command-line error with the message `msg`.
2441 def _raise_cli_error(msg: str) -> NoReturn:
2442 raise RuntimeError("Command-line error: {}".format(msg))
2443
2444
2445 # Returns a dictionary of string to integers from the list of strings
2446 # `args` containing `NAME=VAL` entries.
2447 def _dict_from_arg(args: Optional[List[str]]):
2448 d = {} # type: LabelsT
2449
2450 if args is None:
2451 return d
2452
2453 for arg in args:
2454 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2455
2456 if m is None:
2457 _raise_cli_error("Invalid assignment {}".format(arg))
2458
2459 d[m.group(1)] = int(m.group(2))
2460
2461 return d
2462
2463
2464 # Parses the command-line arguments and returns, in this order:
2465 #
2466 # 1. The input file path, or `None` if none.
2467 # 2. The Normand input text.
2468 # 3. The initial offset.
2469 # 4. The initial byte order.
2470 # 5. The initial variables.
2471 # 6. The initial labels.
2472 def _parse_cli_args():
2473 import argparse
2474
2475 # Build parser
2476 ap = argparse.ArgumentParser()
2477 ap.add_argument(
2478 "--offset",
2479 metavar="OFFSET",
2480 action="store",
2481 type=int,
2482 default=0,
2483 help="initial offset (positive)",
2484 )
2485 ap.add_argument(
2486 "-b",
2487 "--byte-order",
2488 metavar="BO",
2489 choices=["be", "le"],
2490 type=str,
2491 help="initial byte order (`be` or `le`)",
2492 )
2493 ap.add_argument(
2494 "--var",
2495 metavar="NAME=VAL",
2496 action="append",
2497 help="add an initial variable (may be repeated)",
2498 )
2499 ap.add_argument(
2500 "-l",
2501 "--label",
2502 metavar="NAME=VAL",
2503 action="append",
2504 help="add an initial label (may be repeated)",
2505 )
2506 ap.add_argument(
2507 "--version", action="version", version="Normand {}".format(__version__)
2508 )
2509 ap.add_argument(
2510 "path",
2511 metavar="PATH",
2512 action="store",
2513 nargs="?",
2514 help="input path (none means standard input)",
2515 )
2516
2517 # Parse
2518 args = ap.parse_args()
2519
2520 # Read input
2521 if args.path is None:
2522 normand = sys.stdin.read()
2523 else:
2524 with open(args.path) as f:
2525 normand = f.read()
2526
2527 # Variables and labels
2528 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
2529 labels = _dict_from_arg(args.label)
2530
2531 # Validate offset
2532 if args.offset < 0:
2533 _raise_cli_error("Invalid negative offset {}")
2534
2535 # Validate and set byte order
2536 bo = None # type: Optional[ByteOrder]
2537
2538 if args.byte_order is not None:
2539 if args.byte_order == "be":
2540 bo = ByteOrder.BE
2541 else:
2542 assert args.byte_order == "le"
2543 bo = ByteOrder.LE
2544
2545 # Return input and initial state
2546 return args.path, normand, args.offset, bo, variables, labels
2547
2548
2549 # CLI entry point without exception handling.
2550 def _run_cli_with_args(
2551 normand: str,
2552 offset: int,
2553 bo: Optional[ByteOrder],
2554 variables: VariablesT,
2555 labels: LabelsT,
2556 ):
2557 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
2558
2559
2560 # Prints the exception message `msg` and exits with status 1.
2561 def _fail(msg: str) -> NoReturn:
2562 if not msg.endswith("."):
2563 msg += "."
2564
2565 print(msg.strip(), file=sys.stderr)
2566 sys.exit(1)
2567
2568
2569 # CLI entry point.
2570 def _run_cli():
2571 try:
2572 args = _parse_cli_args()
2573 except Exception as exc:
2574 _fail(str(exc))
2575
2576 try:
2577 _run_cli_with_args(*args[1:])
2578 except ParseError as exc:
2579 import os.path
2580
2581 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2582 fail_msg = ""
2583
2584 for msg in reversed(exc.messages):
2585 fail_msg += "{}{}:{} - {}".format(
2586 prefix,
2587 msg.text_location.line_no,
2588 msg.text_location.col_no,
2589 msg.text,
2590 )
2591
2592 if fail_msg[-1] not in ".:;":
2593 fail_msg += "."
2594
2595 fail_msg += "\n"
2596
2597 _fail(fail_msg.strip())
2598 except Exception as exc:
2599 _fail(str(exc))
2600
2601
2602 if __name__ == "__main__":
2603 _run_cli()
This page took 0.0794 seconds and 3 git commands to generate.