Make it possible to specify more that one byte with `%`
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.16.0"
34 __all__ = [
35 "__author__",
36 "__version__",
37 "ByteOrder",
38 "LabelsT",
39 "parse",
40 "ParseError",
41 "ParseErrorMessage",
42 "ParseResult",
43 "TextLocation",
44 "VariablesT",
45 ]
46
47 import re
48 import abc
49 import ast
50 import sys
51 import copy
52 import enum
53 import math
54 import struct
55 import typing
56 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
57
58
59 # Text location (line and column numbers).
60 class TextLocation:
61 @classmethod
62 def _create(cls, line_no: int, col_no: int):
63 self = cls.__new__(cls)
64 self._init(line_no, col_no)
65 return self
66
67 def __init__(*args, **kwargs): # type: ignore
68 raise NotImplementedError
69
70 def _init(self, line_no: int, col_no: int):
71 self._line_no = line_no
72 self._col_no = col_no
73
74 # Line number.
75 @property
76 def line_no(self):
77 return self._line_no
78
79 # Column number.
80 @property
81 def col_no(self):
82 return self._col_no
83
84 def __repr__(self):
85 return "TextLocation({}, {})".format(self._line_no, self._col_no)
86
87
88 # Any item.
89 class _Item:
90 def __init__(self, text_loc: TextLocation):
91 self._text_loc = text_loc
92
93 # Source text location.
94 @property
95 def text_loc(self):
96 return self._text_loc
97
98
99 # Scalar item.
100 class _ScalarItem(_Item):
101 # Returns the size, in bytes, of this item.
102 @property
103 @abc.abstractmethod
104 def size(self) -> int:
105 ...
106
107
108 # A repeatable item.
109 class _RepableItem:
110 pass
111
112
113 # Single byte.
114 class _Byte(_ScalarItem, _RepableItem):
115 def __init__(self, val: int, text_loc: TextLocation):
116 super().__init__(text_loc)
117 self._val = val
118
119 # Byte value.
120 @property
121 def val(self):
122 return self._val
123
124 @property
125 def size(self):
126 return 1
127
128 def __repr__(self):
129 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
130
131
132 # String.
133 class _Str(_ScalarItem, _RepableItem):
134 def __init__(self, data: bytes, text_loc: TextLocation):
135 super().__init__(text_loc)
136 self._data = data
137
138 # Encoded bytes.
139 @property
140 def data(self):
141 return self._data
142
143 @property
144 def size(self):
145 return len(self._data)
146
147 def __repr__(self):
148 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
149
150
151 # Byte order.
152 @enum.unique
153 class ByteOrder(enum.Enum):
154 # Big endian.
155 BE = "be"
156
157 # Little endian.
158 LE = "le"
159
160
161 # Byte order setting.
162 class _SetBo(_Item):
163 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
164 super().__init__(text_loc)
165 self._bo = bo
166
167 @property
168 def bo(self):
169 return self._bo
170
171 def __repr__(self):
172 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
173
174
175 # Label.
176 class _Label(_Item):
177 def __init__(self, name: str, text_loc: TextLocation):
178 super().__init__(text_loc)
179 self._name = name
180
181 # Label name.
182 @property
183 def name(self):
184 return self._name
185
186 def __repr__(self):
187 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
188
189
190 # Offset setting.
191 class _SetOffset(_Item):
192 def __init__(self, val: int, text_loc: TextLocation):
193 super().__init__(text_loc)
194 self._val = val
195
196 # Offset value (bytes).
197 @property
198 def val(self):
199 return self._val
200
201 def __repr__(self):
202 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
203
204
205 # Offset alignment.
206 class _AlignOffset(_Item):
207 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
208 super().__init__(text_loc)
209 self._val = val
210 self._pad_val = pad_val
211
212 # Alignment value (bits).
213 @property
214 def val(self):
215 return self._val
216
217 # Padding byte value.
218 @property
219 def pad_val(self):
220 return self._pad_val
221
222 def __repr__(self):
223 return "_AlignOffset({}, {}, {})".format(
224 repr(self._val), repr(self._pad_val), repr(self._text_loc)
225 )
226
227
228 # Mixin of containing an AST expression and its string.
229 class _ExprMixin:
230 def __init__(self, expr_str: str, expr: ast.Expression):
231 self._expr_str = expr_str
232 self._expr = expr
233
234 # Expression string.
235 @property
236 def expr_str(self):
237 return self._expr_str
238
239 # Expression node to evaluate.
240 @property
241 def expr(self):
242 return self._expr
243
244
245 # Fill until some offset.
246 class _FillUntil(_Item, _ExprMixin):
247 def __init__(
248 self, expr_str: str, expr: ast.Expression, pad_val: int, text_loc: TextLocation
249 ):
250 super().__init__(text_loc)
251 _ExprMixin.__init__(self, expr_str, expr)
252 self._pad_val = pad_val
253
254 # Padding byte value.
255 @property
256 def pad_val(self):
257 return self._pad_val
258
259 def __repr__(self):
260 return "_FillUntil({}, {}, {}, {})".format(
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._pad_val),
264 repr(self._text_loc),
265 )
266
267
268 # Variable assignment.
269 class _VarAssign(_Item, _ExprMixin):
270 def __init__(
271 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
272 ):
273 super().__init__(text_loc)
274 _ExprMixin.__init__(self, expr_str, expr)
275 self._name = name
276
277 # Name.
278 @property
279 def name(self):
280 return self._name
281
282 def __repr__(self):
283 return "_VarAssign({}, {}, {}, {})".format(
284 repr(self._name),
285 repr(self._expr_str),
286 repr(self._expr),
287 repr(self._text_loc),
288 )
289
290
291 # Fixed-length number, possibly needing more than one byte.
292 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
293 def __init__(
294 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
295 ):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298 self._len = len
299
300 # Length (bits).
301 @property
302 def len(self):
303 return self._len
304
305 @property
306 def size(self):
307 return self._len // 8
308
309 def __repr__(self):
310 return "_FlNum({}, {}, {}, {})".format(
311 repr(self._expr_str),
312 repr(self._expr),
313 repr(self._len),
314 repr(self._text_loc),
315 )
316
317
318 # LEB128 integer.
319 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
320 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
321 super().__init__(text_loc)
322 _ExprMixin.__init__(self, expr_str, expr)
323
324 def __repr__(self):
325 return "{}({}, {}, {})".format(
326 self.__class__.__name__,
327 repr(self._expr_str),
328 repr(self._expr),
329 repr(self._text_loc),
330 )
331
332
333 # Unsigned LEB128 integer.
334 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
335 pass
336
337
338 # Signed LEB128 integer.
339 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
340 pass
341
342
343 # Group of items.
344 class _Group(_Item, _RepableItem):
345 def __init__(self, items: List[_Item], text_loc: TextLocation):
346 super().__init__(text_loc)
347 self._items = items
348
349 # Contained items.
350 @property
351 def items(self):
352 return self._items
353
354 def __repr__(self):
355 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
356
357
358 # Repetition item.
359 class _Rep(_Item, _ExprMixin):
360 def __init__(
361 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
362 ):
363 super().__init__(text_loc)
364 _ExprMixin.__init__(self, expr_str, expr)
365 self._item = item
366
367 # Item to repeat.
368 @property
369 def item(self):
370 return self._item
371
372 def __repr__(self):
373 return "_Rep({}, {}, {}, {})".format(
374 repr(self._item),
375 repr(self._expr_str),
376 repr(self._expr),
377 repr(self._text_loc),
378 )
379
380
381 # Conditional item.
382 class _Cond(_Item, _ExprMixin):
383 def __init__(
384 self,
385 true_item: _Item,
386 false_item: _Item,
387 expr_str: str,
388 expr: ast.Expression,
389 text_loc: TextLocation,
390 ):
391 super().__init__(text_loc)
392 _ExprMixin.__init__(self, expr_str, expr)
393 self._true_item = true_item
394 self._false_item = false_item
395
396 # Item when condition is true.
397 @property
398 def true_item(self):
399 return self._true_item
400
401 # Item when condition is false.
402 @property
403 def false_item(self):
404 return self._false_item
405
406 def __repr__(self):
407 return "_Cond({}, {}, {}, {}, {})".format(
408 repr(self._true_item),
409 repr(self._false_item),
410 repr(self._expr_str),
411 repr(self._expr),
412 repr(self._text_loc),
413 )
414
415
416 # Macro definition item.
417 class _MacroDef(_Item):
418 def __init__(
419 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
420 ):
421 super().__init__(text_loc)
422 self._name = name
423 self._param_names = param_names
424 self._group = group
425
426 # Name.
427 @property
428 def name(self):
429 return self._name
430
431 # Parameters.
432 @property
433 def param_names(self):
434 return self._param_names
435
436 # Contained items.
437 @property
438 def group(self):
439 return self._group
440
441 def __repr__(self):
442 return "_MacroDef({}, {}, {}, {})".format(
443 repr(self._name),
444 repr(self._param_names),
445 repr(self._group),
446 repr(self._text_loc),
447 )
448
449
450 # Macro expansion parameter.
451 class _MacroExpParam:
452 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
453 self._expr_str = expr_str
454 self._expr = expr
455 self._text_loc = text_loc
456
457 # Expression string.
458 @property
459 def expr_str(self):
460 return self._expr_str
461
462 # Expression.
463 @property
464 def expr(self):
465 return self._expr
466
467 # Source text location.
468 @property
469 def text_loc(self):
470 return self._text_loc
471
472 def __repr__(self):
473 return "_MacroExpParam({}, {}, {})".format(
474 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
475 )
476
477
478 # Macro expansion item.
479 class _MacroExp(_Item, _RepableItem):
480 def __init__(
481 self,
482 name: str,
483 params: List[_MacroExpParam],
484 text_loc: TextLocation,
485 ):
486 super().__init__(text_loc)
487 self._name = name
488 self._params = params
489
490 # Name.
491 @property
492 def name(self):
493 return self._name
494
495 # Parameters.
496 @property
497 def params(self):
498 return self._params
499
500 def __repr__(self):
501 return "_MacroExp({}, {}, {})".format(
502 repr(self._name),
503 repr(self._params),
504 repr(self._text_loc),
505 )
506
507
508 # A parsing error message: a string and a text location.
509 class ParseErrorMessage:
510 @classmethod
511 def _create(cls, text: str, text_loc: TextLocation):
512 self = cls.__new__(cls)
513 self._init(text, text_loc)
514 return self
515
516 def __init__(self, *args, **kwargs): # type: ignore
517 raise NotImplementedError
518
519 def _init(self, text: str, text_loc: TextLocation):
520 self._text = text
521 self._text_loc = text_loc
522
523 # Message text.
524 @property
525 def text(self):
526 return self._text
527
528 # Source text location.
529 @property
530 def text_location(self):
531 return self._text_loc
532
533
534 # A parsing error containing one or more messages (`ParseErrorMessage`).
535 class ParseError(RuntimeError):
536 @classmethod
537 def _create(cls, msg: str, text_loc: TextLocation):
538 self = cls.__new__(cls)
539 self._init(msg, text_loc)
540 return self
541
542 def __init__(self, *args, **kwargs): # type: ignore
543 raise NotImplementedError
544
545 def _init(self, msg: str, text_loc: TextLocation):
546 super().__init__(msg)
547 self._msgs = [] # type: List[ParseErrorMessage]
548 self._add_msg(msg, text_loc)
549
550 def _add_msg(self, msg: str, text_loc: TextLocation):
551 self._msgs.append(
552 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
553 msg, text_loc
554 )
555 )
556
557 # Parsing error messages.
558 #
559 # The first message is the most specific one.
560 @property
561 def messages(self):
562 return self._msgs
563
564
565 # Raises a parsing error, forwarding the parameters to the constructor.
566 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
567 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
568
569
570 # Adds a message to the parsing error `exc`.
571 def _add_error_msg(exc: ParseError, msg: str, text_loc: TextLocation):
572 exc._add_msg(msg, text_loc) # pyright: ignore[reportPrivateUsage]
573
574
575 # Appends a message to the parsing error `exc` and reraises it.
576 def _augment_error(exc: ParseError, msg: str, text_loc: TextLocation) -> NoReturn:
577 _add_error_msg(exc, msg, text_loc)
578 raise exc
579
580
581 # Variables dictionary type (for type hints).
582 VariablesT = Dict[str, Union[int, float]]
583
584
585 # Labels dictionary type (for type hints).
586 LabelsT = Dict[str, int]
587
588
589 # Python name pattern.
590 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
591
592
593 # Macro definition dictionary.
594 _MacroDefsT = Dict[str, _MacroDef]
595
596
597 # Normand parser.
598 #
599 # The constructor accepts a Normand input. After building, use the `res`
600 # property to get the resulting main group.
601 class _Parser:
602 # Builds a parser to parse the Normand input `normand`, parsing
603 # immediately.
604 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
605 self._normand = normand
606 self._at = 0
607 self._line_no = 1
608 self._col_no = 1
609 self._label_names = set(labels.keys())
610 self._var_names = set(variables.keys())
611 self._macro_defs = {} # type: _MacroDefsT
612 self._parse()
613
614 # Result (main group).
615 @property
616 def res(self):
617 return self._res
618
619 # Macro definitions.
620 @property
621 def macro_defs(self):
622 return self._macro_defs
623
624 # Current text location.
625 @property
626 def _text_loc(self):
627 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
628 self._line_no, self._col_no
629 )
630
631 # Returns `True` if this parser is done parsing.
632 def _is_done(self):
633 return self._at == len(self._normand)
634
635 # Returns `True` if this parser isn't done parsing.
636 def _isnt_done(self):
637 return not self._is_done()
638
639 # Raises a parse error, creating it using the message `msg` and the
640 # current text location.
641 def _raise_error(self, msg: str) -> NoReturn:
642 _raise_error(msg, self._text_loc)
643
644 # Tries to make the pattern `pat` match the current substring,
645 # returning the match object and updating `self._at`,
646 # `self._line_no`, and `self._col_no` on success.
647 def _try_parse_pat(self, pat: Pattern[str]):
648 m = pat.match(self._normand, self._at)
649
650 if m is None:
651 return
652
653 # Skip matched string
654 self._at += len(m.group(0))
655
656 # Update line number
657 self._line_no += m.group(0).count("\n")
658
659 # Update column number
660 for i in reversed(range(self._at)):
661 if self._normand[i] == "\n" or i == 0:
662 if i == 0:
663 self._col_no = self._at + 1
664 else:
665 self._col_no = self._at - i
666
667 break
668
669 # Return match object
670 return m
671
672 # Expects the pattern `pat` to match the current substring,
673 # returning the match object and updating `self._at`,
674 # `self._line_no`, and `self._col_no` on success, or raising a parse
675 # error with the message `error_msg` on error.
676 def _expect_pat(self, pat: Pattern[str], error_msg: str):
677 # Match
678 m = self._try_parse_pat(pat)
679
680 if m is None:
681 # No match: error
682 self._raise_error(error_msg)
683
684 # Return match object
685 return m
686
687 # Pattern for _skip_ws_and_comments()
688 _ws_or_syms_or_comments_pat = re.compile(
689 r"(?:[\s/\\?&:;.,[\]_=|-]|#[^#]*?(?:\n|#))*"
690 )
691
692 # Skips as many whitespaces, insignificant symbol characters, and
693 # comments as possible.
694 def _skip_ws_and_comments(self):
695 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
696
697 # Pattern for _skip_ws()
698 _ws_pat = re.compile(r"\s*")
699
700 # Skips as many whitespaces as possible.
701 def _skip_ws(self):
702 self._try_parse_pat(self._ws_pat)
703
704 # Pattern for _try_parse_hex_byte()
705 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
706
707 # Tries to parse a hexadecimal byte, returning a byte item on
708 # success.
709 def _try_parse_hex_byte(self):
710 begin_text_loc = self._text_loc
711
712 # Match initial nibble
713 m_high = self._try_parse_pat(self._nibble_pat)
714
715 if m_high is None:
716 # No match
717 return
718
719 # Expect another nibble
720 self._skip_ws_and_comments()
721 m_low = self._expect_pat(
722 self._nibble_pat, "Expecting another hexadecimal nibble"
723 )
724
725 # Return item
726 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
727
728 # Patterns for _try_parse_bin_byte()
729 _bin_byte_bit_pat = re.compile(r"[01]")
730 _bin_byte_prefix_pat = re.compile(r"%+")
731
732 # Tries to parse a binary byte, returning a byte item on success.
733 def _try_parse_bin_byte(self):
734 begin_text_loc = self._text_loc
735
736 # Match prefix
737 m = self._try_parse_pat(self._bin_byte_prefix_pat)
738
739 if m is None:
740 # No match
741 return
742
743 # Expect as many bytes as there are `%` prefixes
744 items = [] # type: List[_Item]
745
746 for _ in range(len(m.group(0))):
747 self._skip_ws_and_comments()
748 byte_text_loc = self._text_loc
749 bits = [] # type: List[str]
750
751 # Expect eight bits
752 for _ in range(8):
753 self._skip_ws_and_comments()
754 m = self._expect_pat(
755 self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)"
756 )
757 bits.append(m.group(0))
758
759 items.append(_Byte(int("".join(bits), 2), byte_text_loc))
760
761 # Return item
762 if len(items) == 1:
763 return items[0]
764
765 # As group
766 return _Group(items, begin_text_loc)
767
768 # Patterns for _try_parse_dec_byte()
769 _dec_byte_prefix_pat = re.compile(r"\$")
770 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
771
772 # Tries to parse a decimal byte, returning a byte item on success.
773 def _try_parse_dec_byte(self):
774 begin_text_loc = self._text_loc
775
776 # Match prefix
777 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
778 # No match
779 return
780
781 # Expect the value
782 self._skip_ws()
783 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
784
785 # Compute value
786 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
787
788 # Validate
789 if val < -128 or val > 255:
790 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
791
792 # Two's complement
793 val %= 256
794
795 # Return item
796 return _Byte(val, begin_text_loc)
797
798 # Tries to parse a byte, returning a byte item on success.
799 def _try_parse_byte(self):
800 # Hexadecimal
801 item = self._try_parse_hex_byte()
802
803 if item is not None:
804 return item
805
806 # Binary
807 item = self._try_parse_bin_byte()
808
809 if item is not None:
810 return item
811
812 # Decimal
813 item = self._try_parse_dec_byte()
814
815 if item is not None:
816 return item
817
818 # Patterns for _try_parse_str()
819 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
820 _str_suffix_pat = re.compile(r'"')
821 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
822
823 # Strings corresponding to escape sequence characters
824 _str_escape_seq_strs = {
825 "0": "\0",
826 "a": "\a",
827 "b": "\b",
828 "e": "\x1b",
829 "f": "\f",
830 "n": "\n",
831 "r": "\r",
832 "t": "\t",
833 "v": "\v",
834 "\\": "\\",
835 '"': '"',
836 }
837
838 # Tries to parse a string, returning a string item on success.
839 def _try_parse_str(self):
840 begin_text_loc = self._text_loc
841
842 # Match prefix
843 m = self._try_parse_pat(self._str_prefix_pat)
844
845 if m is None:
846 # No match
847 return
848
849 # Get encoding
850 encoding = "utf8"
851
852 if m.group("len") is not None:
853 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
854
855 # Actual string
856 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
857
858 # Expect end of string
859 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
860
861 # Replace escape sequences
862 val = m.group(0)
863
864 for ec in '0abefnrtv"\\':
865 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
866
867 # Encode
868 data = val.encode(encoding)
869
870 # Return item
871 return _Str(data, begin_text_loc)
872
873 # Common right parenthesis pattern
874 _right_paren_pat = re.compile(r"\)")
875
876 # Patterns for _try_parse_group()
877 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
878
879 # Tries to parse a group, returning a group item on success.
880 def _try_parse_group(self):
881 begin_text_loc = self._text_loc
882
883 # Match prefix
884 m_open = self._try_parse_pat(self._group_prefix_pat)
885
886 if m_open is None:
887 # No match
888 return
889
890 # Parse items
891 items = self._parse_items()
892
893 # Expect end of group
894 self._skip_ws_and_comments()
895
896 if m_open.group(0) == "(":
897 pat = self._right_paren_pat
898 exp = ")"
899 else:
900 pat = self._block_end_pat
901 exp = "!end"
902
903 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
904
905 # Return item
906 return _Group(items, begin_text_loc)
907
908 # Returns a stripped expression string and an AST expression node
909 # from the expression string `expr_str` at text location `text_loc`.
910 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
911 # Create an expression node from the expression string
912 expr_str = expr_str.strip().replace("\n", " ")
913
914 try:
915 expr = ast.parse(expr_str, mode="eval")
916 except SyntaxError:
917 _raise_error(
918 "Invalid expression `{}`: invalid syntax".format(expr_str),
919 text_loc,
920 )
921
922 return expr_str, expr
923
924 # Patterns for _try_parse_num_and_attr()
925 _val_expr_pat = re.compile(r"([^}:]+):\s*")
926 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
927 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
928
929 # Tries to parse a value and attribute (fixed length in bits or
930 # `leb128`), returning a value item on success.
931 def _try_parse_num_and_attr(self):
932 begin_text_loc = self._text_loc
933
934 # Match
935 m_expr = self._try_parse_pat(self._val_expr_pat)
936
937 if m_expr is None:
938 # No match
939 return
940
941 # Create an expression node from the expression string
942 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
943
944 # Length?
945 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
946
947 if m_attr is None:
948 # LEB128?
949 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
950
951 if m_attr is None:
952 # At this point it's invalid
953 self._raise_error(
954 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
955 )
956
957 # Return LEB128 integer item
958 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
959 return cls(expr_str, expr, begin_text_loc)
960 else:
961 # Return fixed-length number item
962 return _FlNum(
963 expr_str,
964 expr,
965 int(m_attr.group(0)),
966 begin_text_loc,
967 )
968
969 # Patterns for _try_parse_var_assign()
970 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
971 _var_assign_expr_pat = re.compile(r"[^}]+")
972
973 # Tries to parse a variable assignment, returning a variable
974 # assignment item on success.
975 def _try_parse_var_assign(self):
976 begin_text_loc = self._text_loc
977
978 # Match
979 m = self._try_parse_pat(self._var_assign_name_equal_pat)
980
981 if m is None:
982 # No match
983 return
984
985 # Validate name
986 name = m.group(1)
987
988 if name == _icitte_name:
989 _raise_error(
990 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
991 )
992
993 if name in self._label_names:
994 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
995
996 # Expect an expression
997 self._skip_ws()
998 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
999
1000 # Create an expression node from the expression string
1001 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
1002
1003 # Add to known variable names
1004 self._var_names.add(name)
1005
1006 # Return item
1007 return _VarAssign(
1008 name,
1009 expr_str,
1010 expr,
1011 begin_text_loc,
1012 )
1013
1014 # Pattern for _try_parse_set_bo()
1015 _bo_pat = re.compile(r"[bl]e")
1016
1017 # Tries to parse a byte order name, returning a byte order setting
1018 # item on success.
1019 def _try_parse_set_bo(self):
1020 begin_text_loc = self._text_loc
1021
1022 # Match
1023 m = self._try_parse_pat(self._bo_pat)
1024
1025 if m is None:
1026 # No match
1027 return
1028
1029 # Return corresponding item
1030 if m.group(0) == "be":
1031 return _SetBo(ByteOrder.BE, begin_text_loc)
1032 else:
1033 assert m.group(0) == "le"
1034 return _SetBo(ByteOrder.LE, begin_text_loc)
1035
1036 # Patterns for _try_parse_val_or_bo()
1037 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
1038 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
1039
1040 # Tries to parse a value, a variable assignment, or a byte order
1041 # setting, returning an item on success.
1042 def _try_parse_val_or_var_assign_or_set_bo(self):
1043 # Match prefix
1044 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
1045 # No match
1046 return
1047
1048 self._skip_ws()
1049
1050 # Variable assignment item?
1051 item = self._try_parse_var_assign()
1052
1053 if item is None:
1054 # Number item?
1055 item = self._try_parse_num_and_attr()
1056
1057 if item is None:
1058 # Byte order setting item?
1059 item = self._try_parse_set_bo()
1060
1061 if item is None:
1062 # At this point it's invalid
1063 self._raise_error(
1064 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
1065 )
1066
1067 # Expect suffix
1068 self._skip_ws()
1069 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
1070 return item
1071
1072 # Returns a normalized version (so as to be parseable by int()) of
1073 # the constant integer string `s`, possibly negative, dealing with
1074 # any radix suffix.
1075 @staticmethod
1076 def _norm_const_int(s: str):
1077 neg = ""
1078 pos = s
1079
1080 if s.startswith("-"):
1081 neg = "-"
1082 pos = s[1:]
1083
1084 for r in "xXoObB":
1085 if pos.startswith("0" + r):
1086 # Already correct
1087 return s
1088
1089 # Try suffix
1090 asm_suf_base = {
1091 "h": "x",
1092 "H": "x",
1093 "q": "o",
1094 "Q": "o",
1095 "o": "o",
1096 "O": "o",
1097 "b": "b",
1098 "B": "B",
1099 }
1100
1101 for suf in asm_suf_base:
1102 if pos[-1] == suf:
1103 s = "{}0{}{}".format(neg, asm_suf_base[suf], pos.rstrip(suf))
1104
1105 return s
1106
1107 # Common constant integer patterns
1108 _pos_const_int_pat = re.compile(
1109 r"0[Xx][A-Fa-f0-9]+|0[Oo][0-7]+|0[Bb][01]+|[A-Fa-f0-9]+[hH]|[0-7]+[qQoO]|[01]+[bB]|\d+"
1110 )
1111 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
1112
1113 # Tries to parse an offset setting value (after the initial `<`),
1114 # returning an offset item on success.
1115 def _try_parse_set_offset_val(self):
1116 begin_text_loc = self._text_loc
1117
1118 # Match
1119 m = self._try_parse_pat(self._pos_const_int_pat)
1120
1121 if m is None:
1122 # No match
1123 return
1124
1125 # Return item
1126 return _SetOffset(int(self._norm_const_int(m.group(0)), 0), begin_text_loc)
1127
1128 # Tries to parse a label name (after the initial `<`), returning a
1129 # label item on success.
1130 def _try_parse_label_name(self):
1131 begin_text_loc = self._text_loc
1132
1133 # Match
1134 m = self._try_parse_pat(_py_name_pat)
1135
1136 if m is None:
1137 # No match
1138 return
1139
1140 # Validate
1141 name = m.group(0)
1142
1143 if name == _icitte_name:
1144 _raise_error(
1145 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1146 )
1147
1148 if name in self._label_names:
1149 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
1150
1151 if name in self._var_names:
1152 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
1153
1154 # Add to known label names
1155 self._label_names.add(name)
1156
1157 # Return item
1158 return _Label(name, begin_text_loc)
1159
1160 # Patterns for _try_parse_label_or_set_offset()
1161 _label_set_offset_prefix_pat = re.compile(r"<")
1162 _label_set_offset_suffix_pat = re.compile(r">")
1163
1164 # Tries to parse a label or an offset setting, returning an item on
1165 # success.
1166 def _try_parse_label_or_set_offset(self):
1167 # Match prefix
1168 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
1169 # No match
1170 return
1171
1172 # Offset setting item?
1173 self._skip_ws()
1174 item = self._try_parse_set_offset_val()
1175
1176 if item is None:
1177 # Label item?
1178 item = self._try_parse_label_name()
1179
1180 if item is None:
1181 # At this point it's invalid
1182 self._raise_error("Expecting a label name or an offset setting value")
1183
1184 # Expect suffix
1185 self._skip_ws()
1186 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
1187 return item
1188
1189 # Pattern for _parse_pad_val()
1190 _pad_val_prefix_pat = re.compile(r"~")
1191
1192 # Tries to parse a padding value, returning the padding value, or 0
1193 # if none.
1194 def _parse_pad_val(self):
1195 # Padding value?
1196 self._skip_ws()
1197 pad_val = 0
1198
1199 if self._try_parse_pat(self._pad_val_prefix_pat) is not None:
1200 self._skip_ws()
1201 pad_val_text_loc = self._text_loc
1202 m = self._expect_pat(
1203 self._pos_const_int_pat,
1204 "Expecting a positive constant integer (byte value)",
1205 )
1206
1207 # Validate
1208 pad_val = int(self._norm_const_int(m.group(0)), 0)
1209
1210 if pad_val > 255:
1211 _raise_error(
1212 "Invalid padding byte value {}".format(pad_val),
1213 pad_val_text_loc,
1214 )
1215
1216 return pad_val
1217
1218 # Patterns for _try_parse_align_offset()
1219 _align_offset_prefix_pat = re.compile(r"@")
1220 _align_offset_val_pat = re.compile(r"\d+")
1221
1222 # Tries to parse an offset alignment, returning an offset alignment
1223 # item on success.
1224 def _try_parse_align_offset(self):
1225 begin_text_loc = self._text_loc
1226
1227 # Match prefix
1228 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1229 # No match
1230 return
1231
1232 # Expect an alignment
1233 self._skip_ws()
1234 align_text_loc = self._text_loc
1235 m = self._expect_pat(
1236 self._align_offset_val_pat,
1237 "Expecting an alignment (positive multiple of eight bits)",
1238 )
1239
1240 # Validate alignment
1241 val = int(m.group(0))
1242
1243 if val <= 0 or (val % 8) != 0:
1244 _raise_error(
1245 "Invalid alignment value {} (not a positive multiple of eight)".format(
1246 val
1247 ),
1248 align_text_loc,
1249 )
1250
1251 # Padding value
1252 pad_val = self._parse_pad_val()
1253
1254 # Return item
1255 return _AlignOffset(val, pad_val, begin_text_loc)
1256
1257 # Patterns for _try_parse_fill_until()
1258 _fill_until_prefix_pat = re.compile(r"\+")
1259 _fill_until_pad_val_prefix_pat = re.compile(r"~")
1260
1261 # Tries to parse a filling, returning a filling item on success.
1262 def _try_parse_fill_until(self):
1263 begin_text_loc = self._text_loc
1264
1265 # Match prefix
1266 if self._try_parse_pat(self._fill_until_prefix_pat) is None:
1267 # No match
1268 return
1269
1270 # Expect expression
1271 self._skip_ws()
1272 expr_str, expr = self._expect_const_int_name_expr(True)
1273
1274 # Padding value
1275 pad_val = self._parse_pad_val()
1276
1277 # Return item
1278 return _FillUntil(expr_str, expr, pad_val, begin_text_loc)
1279
1280 # Patterns for _expect_rep_mul_expr()
1281 _inner_expr_prefix_pat = re.compile(r"\{")
1282 _inner_expr_pat = re.compile(r"[^}]+")
1283 _inner_expr_suffix_pat = re.compile(r"\}")
1284
1285 # Parses a constant integer if `accept_const_int` is `True`
1286 # (possibly negative if `allow_neg` is `True`), a name, or an
1287 # expression within `{` and `}`.
1288 def _expect_const_int_name_expr(
1289 self, accept_const_int: bool, allow_neg: bool = False
1290 ):
1291 expr_text_loc = self._text_loc
1292
1293 # Constant integer?
1294 m = None
1295
1296 if accept_const_int:
1297 m = self._try_parse_pat(self._const_int_pat)
1298
1299 if m is None:
1300 # Name?
1301 m = self._try_parse_pat(_py_name_pat)
1302
1303 if m is None:
1304 # Expression?
1305 if self._try_parse_pat(self._inner_expr_prefix_pat) is None:
1306 pos_msg = "" if allow_neg else "positive "
1307
1308 if accept_const_int:
1309 mid_msg = "a {}constant integer, a name, or `{{`".format(
1310 pos_msg
1311 )
1312 else:
1313 mid_msg = "a name or `{`"
1314
1315 # At this point it's invalid
1316 self._raise_error("Expecting {}".format(mid_msg))
1317
1318 # Expect an expression
1319 self._skip_ws()
1320 expr_text_loc = self._text_loc
1321 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1322 expr_str = m.group(0)
1323
1324 # Expect `}`
1325 self._skip_ws()
1326 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1327 else:
1328 expr_str = m.group(0)
1329 else:
1330 if m.group("neg") == "-" and not allow_neg:
1331 _raise_error("Expecting a positive constant integer", expr_text_loc)
1332
1333 expr_str = self._norm_const_int(m.group(0))
1334
1335 return self._ast_expr_from_str(expr_str, expr_text_loc)
1336
1337 # Parses the multiplier expression of a repetition (block or
1338 # post-item) and returns the expression string and AST node.
1339 def _expect_rep_mul_expr(self):
1340 return self._expect_const_int_name_expr(True)
1341
1342 # Common block end pattern
1343 _block_end_pat = re.compile(r"!end\b")
1344
1345 # Pattern for _try_parse_rep_block()
1346 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
1347
1348 # Tries to parse a repetition block, returning a repetition item on
1349 # success.
1350 def _try_parse_rep_block(self):
1351 begin_text_loc = self._text_loc
1352
1353 # Match prefix
1354 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1355 # No match
1356 return
1357
1358 # Expect expression
1359 self._skip_ws_and_comments()
1360 expr_str, expr = self._expect_rep_mul_expr()
1361
1362 # Parse items
1363 self._skip_ws_and_comments()
1364 items_text_loc = self._text_loc
1365 items = self._parse_items()
1366
1367 # Expect end of block
1368 self._skip_ws_and_comments()
1369 self._expect_pat(
1370 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1371 )
1372
1373 # Return item
1374 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1375
1376 # Pattern for _try_parse_cond_block()
1377 _cond_block_prefix_pat = re.compile(r"!if\b")
1378 _cond_block_else_pat = re.compile(r"!else\b")
1379
1380 # Tries to parse a conditional block, returning a conditional item
1381 # on success.
1382 def _try_parse_cond_block(self):
1383 begin_text_loc = self._text_loc
1384
1385 # Match prefix
1386 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1387 # No match
1388 return
1389
1390 # Expect expression
1391 self._skip_ws_and_comments()
1392 expr_str, expr = self._expect_const_int_name_expr(False)
1393
1394 # Parse "true" items
1395 self._skip_ws_and_comments()
1396 true_items_text_loc = self._text_loc
1397 true_items = self._parse_items()
1398 false_items = [] # type: List[_Item]
1399 false_items_text_loc = begin_text_loc
1400
1401 # `!else`?
1402 self._skip_ws_and_comments()
1403
1404 if self._try_parse_pat(self._cond_block_else_pat) is not None:
1405 # Parse "false" items
1406 self._skip_ws_and_comments()
1407 false_items_text_loc = self._text_loc
1408 false_items = self._parse_items()
1409
1410 # Expect end of block
1411 self._expect_pat(
1412 self._block_end_pat,
1413 "Expecting an item, `!else`, or `!end` (end of conditional block)",
1414 )
1415
1416 # Return item
1417 return _Cond(
1418 _Group(true_items, true_items_text_loc),
1419 _Group(false_items, false_items_text_loc),
1420 expr_str,
1421 expr,
1422 begin_text_loc,
1423 )
1424
1425 # Common left parenthesis pattern
1426 _left_paren_pat = re.compile(r"\(")
1427
1428 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1429 _macro_params_comma_pat = re.compile(",")
1430
1431 # Patterns for _try_parse_macro_def()
1432 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1433
1434 # Tries to parse a macro definition, adding it to `self._macro_defs`
1435 # and returning `True` on success.
1436 def _try_parse_macro_def(self):
1437 begin_text_loc = self._text_loc
1438
1439 # Match prefix
1440 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1441 # No match
1442 return False
1443
1444 # Expect a name
1445 self._skip_ws()
1446 name_text_loc = self._text_loc
1447 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1448
1449 # Validate name
1450 name = m.group(0)
1451
1452 if name in self._macro_defs:
1453 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1454
1455 # Expect `(`
1456 self._skip_ws()
1457 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1458
1459 # Try to parse comma-separated parameter names
1460 param_names = [] # type: List[str]
1461 expect_comma = False
1462
1463 while True:
1464 self._skip_ws()
1465
1466 # End?
1467 if self._try_parse_pat(self._right_paren_pat) is not None:
1468 # End
1469 break
1470
1471 # Comma?
1472 if expect_comma:
1473 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1474
1475 # Expect parameter name
1476 self._skip_ws()
1477 param_text_loc = self._text_loc
1478 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1479
1480 if m.group(0) in param_names:
1481 _raise_error(
1482 "Duplicate macro parameter named `{}`".format(m.group(0)),
1483 param_text_loc,
1484 )
1485
1486 param_names.append(m.group(0))
1487 expect_comma = True
1488
1489 # Expect items
1490 self._skip_ws_and_comments()
1491 items_text_loc = self._text_loc
1492 old_var_names = self._var_names.copy()
1493 old_label_names = self._label_names.copy()
1494 self._var_names = set() # type: Set[str]
1495 self._label_names = set() # type: Set[str]
1496 items = self._parse_items()
1497 self._var_names = old_var_names
1498 self._label_names = old_label_names
1499
1500 # Expect suffix
1501 self._expect_pat(
1502 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1503 )
1504
1505 # Register macro
1506 self._macro_defs[name] = _MacroDef(
1507 name, param_names, _Group(items, items_text_loc), begin_text_loc
1508 )
1509
1510 return True
1511
1512 # Patterns for _try_parse_macro_exp()
1513 _macro_exp_prefix_pat = re.compile(r"m\b")
1514 _macro_exp_colon_pat = re.compile(r":")
1515
1516 # Tries to parse a macro expansion, returning a macro expansion item
1517 # on success.
1518 def _try_parse_macro_exp(self):
1519 begin_text_loc = self._text_loc
1520
1521 # Match prefix
1522 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1523 # No match
1524 return
1525
1526 # Expect `:`
1527 self._skip_ws()
1528 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1529
1530 # Expect a macro name
1531 self._skip_ws()
1532 name_text_loc = self._text_loc
1533 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1534
1535 # Validate name
1536 name = m.group(0)
1537 macro_def = self._macro_defs.get(name)
1538
1539 if macro_def is None:
1540 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1541
1542 # Expect `(`
1543 self._skip_ws()
1544 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1545
1546 # Try to parse comma-separated parameter values
1547 params_text_loc = self._text_loc
1548 params = [] # type: List[_MacroExpParam]
1549 expect_comma = False
1550
1551 while True:
1552 self._skip_ws()
1553
1554 # End?
1555 if self._try_parse_pat(self._right_paren_pat) is not None:
1556 # End
1557 break
1558
1559 # Expect a Value
1560 if expect_comma:
1561 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1562
1563 self._skip_ws()
1564 param_text_loc = self._text_loc
1565 params.append(
1566 _MacroExpParam(
1567 *self._expect_const_int_name_expr(True, True),
1568 text_loc=param_text_loc
1569 )
1570 )
1571 expect_comma = True
1572
1573 # Validate parameter values
1574 if len(params) != len(macro_def.param_names):
1575 sing_plur = "" if len(params) == 1 else "s"
1576 _raise_error(
1577 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1578 len(params), sing_plur, len(macro_def.param_names)
1579 ),
1580 params_text_loc,
1581 )
1582
1583 # Return item
1584 return _MacroExp(name, params, begin_text_loc)
1585
1586 # Tries to parse a base item (anything except a repetition),
1587 # returning it on success.
1588 def _try_parse_base_item(self):
1589 # Byte item?
1590 item = self._try_parse_byte()
1591
1592 if item is not None:
1593 return item
1594
1595 # String item?
1596 item = self._try_parse_str()
1597
1598 if item is not None:
1599 return item
1600
1601 # Value, variable assignment, or byte order setting item?
1602 item = self._try_parse_val_or_var_assign_or_set_bo()
1603
1604 if item is not None:
1605 return item
1606
1607 # Label or offset setting item?
1608 item = self._try_parse_label_or_set_offset()
1609
1610 if item is not None:
1611 return item
1612
1613 # Offset alignment item?
1614 item = self._try_parse_align_offset()
1615
1616 if item is not None:
1617 return item
1618
1619 # Filling item?
1620 item = self._try_parse_fill_until()
1621
1622 if item is not None:
1623 return item
1624
1625 # Group item?
1626 item = self._try_parse_group()
1627
1628 if item is not None:
1629 return item
1630
1631 # Repetition block item?
1632 item = self._try_parse_rep_block()
1633
1634 if item is not None:
1635 return item
1636
1637 # Conditional block item?
1638 item = self._try_parse_cond_block()
1639
1640 if item is not None:
1641 return item
1642
1643 # Macro expansion?
1644 item = self._try_parse_macro_exp()
1645
1646 if item is not None:
1647 return item
1648
1649 # Pattern for _try_parse_rep_post()
1650 _rep_post_prefix_pat = re.compile(r"\*")
1651
1652 # Tries to parse a post-item repetition, returning the expression
1653 # string and AST expression node on success.
1654 def _try_parse_rep_post(self):
1655 # Match prefix
1656 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1657 # No match
1658 return
1659
1660 # Return expression string and AST expression
1661 self._skip_ws_and_comments()
1662 return self._expect_rep_mul_expr()
1663
1664 # Tries to parse an item, possibly followed by a repetition,
1665 # returning `True` on success.
1666 #
1667 # Appends any parsed item to `items`.
1668 def _try_append_item(self, items: List[_Item]):
1669 self._skip_ws_and_comments()
1670
1671 # Base item
1672 item = self._try_parse_base_item()
1673
1674 if item is None:
1675 return
1676
1677 # Parse repetition if the base item is repeatable
1678 if isinstance(item, _RepableItem):
1679 self._skip_ws_and_comments()
1680 rep_text_loc = self._text_loc
1681 rep_ret = self._try_parse_rep_post()
1682
1683 if rep_ret is not None:
1684 item = _Rep(item, *rep_ret, text_loc=rep_text_loc)
1685
1686 items.append(item)
1687 return True
1688
1689 # Parses and returns items, skipping whitespaces, insignificant
1690 # symbols, and comments when allowed, and stopping at the first
1691 # unknown character.
1692 #
1693 # Accepts and registers macro definitions if `accept_macro_defs`
1694 # is `True`.
1695 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
1696 items = [] # type: List[_Item]
1697
1698 while self._isnt_done():
1699 # Try to append item
1700 if not self._try_append_item(items):
1701 if accept_macro_defs and self._try_parse_macro_def():
1702 continue
1703
1704 # Unknown at this point
1705 break
1706
1707 return items
1708
1709 # Parses the whole Normand input, setting `self._res` to the main
1710 # group item on success.
1711 def _parse(self):
1712 if len(self._normand.strip()) == 0:
1713 # Special case to make sure there's something to consume
1714 self._res = _Group([], self._text_loc)
1715 return
1716
1717 # Parse first level items
1718 items = self._parse_items(True)
1719
1720 # Make sure there's nothing left
1721 self._skip_ws_and_comments()
1722
1723 if self._isnt_done():
1724 self._raise_error(
1725 "Unexpected character `{}`".format(self._normand[self._at])
1726 )
1727
1728 # Set main group item
1729 self._res = _Group(items, self._text_loc)
1730
1731
1732 # The return type of parse().
1733 class ParseResult:
1734 @classmethod
1735 def _create(
1736 cls,
1737 data: bytearray,
1738 variables: VariablesT,
1739 labels: LabelsT,
1740 offset: int,
1741 bo: Optional[ByteOrder],
1742 ):
1743 self = cls.__new__(cls)
1744 self._init(data, variables, labels, offset, bo)
1745 return self
1746
1747 def __init__(self, *args, **kwargs): # type: ignore
1748 raise NotImplementedError
1749
1750 def _init(
1751 self,
1752 data: bytearray,
1753 variables: VariablesT,
1754 labels: LabelsT,
1755 offset: int,
1756 bo: Optional[ByteOrder],
1757 ):
1758 self._data = data
1759 self._vars = variables
1760 self._labels = labels
1761 self._offset = offset
1762 self._bo = bo
1763
1764 # Generated data.
1765 @property
1766 def data(self):
1767 return self._data
1768
1769 # Dictionary of updated variable names to their last computed value.
1770 @property
1771 def variables(self):
1772 return self._vars
1773
1774 # Dictionary of updated main group label names to their computed
1775 # value.
1776 @property
1777 def labels(self):
1778 return self._labels
1779
1780 # Updated offset.
1781 @property
1782 def offset(self):
1783 return self._offset
1784
1785 # Updated byte order.
1786 @property
1787 def byte_order(self):
1788 return self._bo
1789
1790
1791 # Raises a parse error for the item `item`, creating it using the
1792 # message `msg`.
1793 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1794 _raise_error(msg, item.text_loc)
1795
1796
1797 # The `ICITTE` reserved name.
1798 _icitte_name = "ICITTE"
1799
1800
1801 # Base node visitor.
1802 #
1803 # Calls the _visit_name() method for each name node which isn't the name
1804 # of a call.
1805 class _NodeVisitor(ast.NodeVisitor):
1806 def __init__(self):
1807 self._parent_is_call = False
1808
1809 def generic_visit(self, node: ast.AST):
1810 if type(node) is ast.Call:
1811 self._parent_is_call = True
1812 elif type(node) is ast.Name and not self._parent_is_call:
1813 self._visit_name(node.id)
1814
1815 super().generic_visit(node)
1816 self._parent_is_call = False
1817
1818 @abc.abstractmethod
1819 def _visit_name(self, name: str):
1820 ...
1821
1822
1823 # Expression validator: validates that all the names within the
1824 # expression are allowed.
1825 class _ExprValidator(_NodeVisitor):
1826 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
1827 super().__init__()
1828 self._expr_str = expr_str
1829 self._text_loc = text_loc
1830 self._allowed_names = allowed_names
1831
1832 def _visit_name(self, name: str):
1833 # Make sure the name refers to a known and reachable
1834 # variable/label name.
1835 if name != _icitte_name and name not in self._allowed_names:
1836 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1837 name, self._expr_str
1838 )
1839
1840 allowed_names = self._allowed_names.copy()
1841 allowed_names.add(_icitte_name)
1842
1843 if len(allowed_names) > 0:
1844 allowed_names_str = ", ".join(
1845 sorted(["`{}`".format(name) for name in allowed_names])
1846 )
1847 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1848
1849 _raise_error(
1850 msg,
1851 self._text_loc,
1852 )
1853
1854
1855 # Generator state.
1856 class _GenState:
1857 def __init__(
1858 self,
1859 variables: VariablesT,
1860 labels: LabelsT,
1861 offset: int,
1862 bo: Optional[ByteOrder],
1863 ):
1864 self.variables = variables.copy()
1865 self.labels = labels.copy()
1866 self.offset = offset
1867 self.bo = bo
1868
1869 def __repr__(self):
1870 return "_GenState({}, {}, {}, {})".format(
1871 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1872 )
1873
1874
1875 # Fixed-length number item instance.
1876 class _FlNumItemInst:
1877 def __init__(
1878 self,
1879 item: _FlNum,
1880 offset_in_data: int,
1881 state: _GenState,
1882 parse_error_msgs: List[ParseErrorMessage],
1883 ):
1884 self._item = item
1885 self._offset_in_data = offset_in_data
1886 self._state = state
1887 self._parse_error_msgs = parse_error_msgs
1888
1889 @property
1890 def item(self):
1891 return self._item
1892
1893 @property
1894 def offset_in_data(self):
1895 return self._offset_in_data
1896
1897 @property
1898 def state(self):
1899 return self._state
1900
1901 @property
1902 def parse_error_msgs(self):
1903 return self._parse_error_msgs
1904
1905
1906 # Generator of data and final state from a group item.
1907 #
1908 # Generation happens in memory at construction time. After building, use
1909 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1910 # get the resulting context.
1911 #
1912 # The steps of generation are:
1913 #
1914 # 1. Handle each item in prefix order.
1915 #
1916 # The handlers append bytes to `self._data` and update some current
1917 # state object (`_GenState` instance).
1918 #
1919 # When handling a fixed-length number item, try to evaluate its
1920 # expression using the current state. If this fails, then it might be
1921 # because the expression refers to a "future" label: save the current
1922 # offset in `self._data` (generated data) and a snapshot of the
1923 # current state within `self._fl_num_item_insts` (`_FlNumItemInst`
1924 # object). _gen_fl_num_item_insts() will deal with this later. A
1925 # `_FlNumItemInst` instance also contains a snapshot of the current
1926 # parsing error messages (`self._parse_error_msgs`) which need to be
1927 # taken into account when handling the instance later.
1928 #
1929 # When handling the items of a group, keep a map of immediate label
1930 # names to their offset. Then, after having processed all the items,
1931 # update the relevant saved state snapshots in
1932 # `self._fl_num_item_insts` with those immediate label values.
1933 # _gen_fl_num_item_insts() will deal with this later.
1934 #
1935 # 2. Handle all the fixed-length number item instances of which the
1936 # expression evaluation failed before.
1937 #
1938 # At this point, `self._fl_num_item_insts` contains everything that's
1939 # needed to evaluate the expressions, including the values of
1940 # "future" labels from the point of view of some fixed-length number
1941 # item instance.
1942 #
1943 # If an evaluation fails at this point, then it's a user error. Add
1944 # to the parsing error all the saved parsing error messages of the
1945 # instance. Those additional messages add precious context to the
1946 # error.
1947 class _Gen:
1948 def __init__(
1949 self,
1950 group: _Group,
1951 macro_defs: _MacroDefsT,
1952 variables: VariablesT,
1953 labels: LabelsT,
1954 offset: int,
1955 bo: Optional[ByteOrder],
1956 ):
1957 self._macro_defs = macro_defs
1958 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
1959 self._parse_error_msgs = [] # type: List[ParseErrorMessage]
1960 self._gen(group, _GenState(variables, labels, offset, bo))
1961
1962 # Generated bytes.
1963 @property
1964 def data(self):
1965 return self._data
1966
1967 # Updated variables.
1968 @property
1969 def variables(self):
1970 return self._final_state.variables
1971
1972 # Updated main group labels.
1973 @property
1974 def labels(self):
1975 return self._final_state.labels
1976
1977 # Updated offset.
1978 @property
1979 def offset(self):
1980 return self._final_state.offset
1981
1982 # Updated byte order.
1983 @property
1984 def bo(self):
1985 return self._final_state.bo
1986
1987 # Evaluates the expression `expr` of which the original string is
1988 # `expr_str` at the location `text_loc` considering the current
1989 # generation state `state`.
1990 #
1991 # If `allow_float` is `True`, then the type of the result may be
1992 # `float` too.
1993 @staticmethod
1994 def _eval_expr(
1995 expr_str: str,
1996 expr: ast.Expression,
1997 text_loc: TextLocation,
1998 state: _GenState,
1999 allow_float: bool = False,
2000 ):
2001 syms = {} # type: VariablesT
2002 syms.update(state.labels)
2003
2004 # Set the `ICITTE` name to the current offset
2005 syms[_icitte_name] = state.offset
2006
2007 # Add the current variables
2008 syms.update(state.variables)
2009
2010 # Validate the node and its children
2011 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
2012
2013 # Compile and evaluate expression node
2014 try:
2015 val = eval(compile(expr, "", "eval"), None, syms)
2016 except Exception as exc:
2017 _raise_error(
2018 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
2019 text_loc,
2020 )
2021
2022 # Convert `bool` result type to `int` to normalize
2023 if type(val) is bool:
2024 val = int(val)
2025
2026 # Validate result type
2027 expected_types = {int} # type: Set[type]
2028 type_msg = "`int`"
2029
2030 if allow_float:
2031 expected_types.add(float)
2032 type_msg += " or `float`"
2033
2034 if type(val) not in expected_types:
2035 _raise_error(
2036 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
2037 expr_str, type_msg, type(val).__name__
2038 ),
2039 text_loc,
2040 )
2041
2042 return val
2043
2044 # Evaluates the expression of `item` considering the current
2045 # generation state `state`.
2046 #
2047 # If `allow_float` is `True`, then the type of the result may be
2048 # `float` too.
2049 @staticmethod
2050 def _eval_item_expr(
2051 item: Union[_FlNum, _Leb128Int, _FillUntil, _VarAssign, _Rep, _Cond],
2052 state: _GenState,
2053 allow_float: bool = False,
2054 ):
2055 return _Gen._eval_expr(
2056 item.expr_str, item.expr, item.text_loc, state, allow_float
2057 )
2058
2059 # Handles the byte item `item`.
2060 def _handle_byte_item(self, item: _Byte, state: _GenState):
2061 self._data.append(item.val)
2062 state.offset += item.size
2063
2064 # Handles the string item `item`.
2065 def _handle_str_item(self, item: _Str, state: _GenState):
2066 self._data += item.data
2067 state.offset += item.size
2068
2069 # Handles the byte order setting item `item`.
2070 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
2071 # Update current byte order
2072 state.bo = item.bo
2073
2074 # Handles the variable assignment item `item`.
2075 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
2076 # Update variable
2077 state.variables[item.name] = self._eval_item_expr(item, state, True)
2078
2079 # Handles the fixed-length number item `item`.
2080 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
2081 # Validate current byte order
2082 if state.bo is None and item.len > 8:
2083 _raise_error_for_item(
2084 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
2085 item.expr_str
2086 ),
2087 item,
2088 )
2089
2090 # Try an immediate evaluation. If it fails, then keep everything
2091 # needed to (try to) generate the bytes of this item later.
2092 try:
2093 data = self._gen_fl_num_item_inst_data(item, state)
2094 except Exception:
2095 self._fl_num_item_insts.append(
2096 _FlNumItemInst(
2097 item,
2098 len(self._data),
2099 copy.deepcopy(state),
2100 copy.deepcopy(self._parse_error_msgs),
2101 )
2102 )
2103
2104 # Reserve space in `self._data` for this instance
2105 data = bytes([0] * (item.len // 8))
2106
2107 # Append bytes
2108 self._data += data
2109
2110 # Update offset
2111 state.offset += len(data)
2112
2113 # Returns the size, in bytes, required to encode the value `val`
2114 # with LEB128 (signed version if `is_signed` is `True`).
2115 @staticmethod
2116 def _leb128_size_for_val(val: int, is_signed: bool):
2117 if val < 0:
2118 # Equivalent upper bound.
2119 #
2120 # For example, if `val` is -128, then the full integer for
2121 # this number of bits would be [-128, 127].
2122 val = -val - 1
2123
2124 # Number of bits (add one for the sign if needed)
2125 bits = val.bit_length() + int(is_signed)
2126
2127 if bits == 0:
2128 bits = 1
2129
2130 # Seven bits per byte
2131 return math.ceil(bits / 7)
2132
2133 # Handles the LEB128 integer item `item`.
2134 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
2135 # Compute value
2136 val = self._eval_item_expr(item, state, False)
2137
2138 # Size in bytes
2139 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
2140
2141 # For each byte
2142 for _ in range(size):
2143 # Seven LSBs, MSB of the byte set (continue)
2144 self._data.append((val & 0x7F) | 0x80)
2145 val >>= 7
2146
2147 # Clear MSB of last byte (stop)
2148 self._data[-1] &= ~0x80
2149
2150 # Update offset
2151 state.offset += size
2152
2153 # Handles the group item `item`, removing the immediate labels from
2154 # `state` at the end if `remove_immediate_labels` is `True`.
2155 def _handle_group_item(
2156 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
2157 ):
2158 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
2159 immediate_labels = {} # type: LabelsT
2160
2161 # Handle each item
2162 for subitem in item.items:
2163 if type(subitem) is _Label:
2164 # Add to local immediate labels
2165 immediate_labels[subitem.name] = state.offset
2166
2167 self._handle_item(subitem, state)
2168
2169 # Remove immediate labels from current state if needed
2170 if remove_immediate_labels:
2171 for name in immediate_labels:
2172 del state.labels[name]
2173
2174 # Add all immediate labels to all state snapshots since
2175 # `first_fl_num_item_inst_index`.
2176 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
2177 inst.state.labels.update(immediate_labels)
2178
2179 # Handles the repetition item `item`.
2180 def _handle_rep_item(self, item: _Rep, state: _GenState):
2181 # Compute the repetition count
2182 mul = _Gen._eval_item_expr(item, state)
2183
2184 # Validate result
2185 if mul < 0:
2186 _raise_error_for_item(
2187 "Invalid expression `{}`: unexpected negative result {:,}".format(
2188 item.expr_str, mul
2189 ),
2190 item,
2191 )
2192
2193 # Generate item data `mul` times
2194 for _ in range(mul):
2195 self._handle_item(item.item, state)
2196
2197 # Handles the conditional item `item`.
2198 def _handle_cond_item(self, item: _Cond, state: _GenState):
2199 # Compute the conditional value
2200 val = _Gen._eval_item_expr(item, state)
2201
2202 # Generate item data if needed
2203 if val:
2204 self._handle_item(item.true_item, state)
2205 else:
2206 self._handle_item(item.false_item, state)
2207
2208 # Evaluates the parameters of the macro expansion item `item`
2209 # considering the initial state `init_state` and returns a new state
2210 # to handle the items of the macro.
2211 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
2212 # New state
2213 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
2214
2215 # Evaluate the parameter expressions
2216 macro_def = self._macro_defs[item.name]
2217
2218 for param_name, param in zip(macro_def.param_names, item.params):
2219 exp_state.variables[param_name] = _Gen._eval_expr(
2220 param.expr_str, param.expr, param.text_loc, init_state, True
2221 )
2222
2223 return exp_state
2224
2225 # Handles the macro expansion item `item`.
2226 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2227 parse_error_msg_text = "While expanding the macro `{}`:".format(item.name)
2228
2229 try:
2230 # New state
2231 exp_state = self._eval_macro_exp_params(item, state)
2232
2233 # Process the contained group
2234 init_data_size = len(self._data)
2235 parse_error_msg = (
2236 ParseErrorMessage._create( # pyright: ignore[reportPrivateUsage]
2237 parse_error_msg_text, item.text_loc
2238 )
2239 )
2240 self._parse_error_msgs.append(parse_error_msg)
2241 self._handle_item(self._macro_defs[item.name].group, exp_state)
2242 self._parse_error_msgs.pop()
2243 except ParseError as exc:
2244 _augment_error(exc, parse_error_msg_text, item.text_loc)
2245
2246 # Update state offset and return
2247 state.offset += len(self._data) - init_data_size
2248
2249 # Handles the offset setting item `item`.
2250 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
2251 state.offset = item.val
2252
2253 # Handles the offset alignment item `item` (adds padding).
2254 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2255 init_offset = state.offset
2256 align_bytes = item.val // 8
2257 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2258 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2259
2260 # Handles the filling item `item` (adds padding).
2261 def _handle_fill_until_item(self, item: _FillUntil, state: _GenState):
2262 # Compute the new offset
2263 new_offset = _Gen._eval_item_expr(item, state)
2264
2265 # Validate the new offset
2266 if new_offset < state.offset:
2267 _raise_error_for_item(
2268 "Invalid expression `{}`: new offset {:,} is less than current offset {:,}".format(
2269 item.expr_str, new_offset, state.offset
2270 ),
2271 item,
2272 )
2273
2274 # Fill
2275 self._data += bytes([item.pad_val] * (new_offset - state.offset))
2276
2277 # Update offset
2278 state.offset = new_offset
2279
2280 # Handles the label item `item`.
2281 def _handle_label_item(self, item: _Label, state: _GenState):
2282 state.labels[item.name] = state.offset
2283
2284 # Handles the item `item`, returning the updated next repetition
2285 # instance.
2286 def _handle_item(self, item: _Item, state: _GenState):
2287 return self._item_handlers[type(item)](item, state)
2288
2289 # Generates the data for a fixed-length integer item instance having
2290 # the value `val` and returns it.
2291 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
2292 # Validate range
2293 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2294 _raise_error_for_item(
2295 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2296 val, item.len, item.expr_str
2297 ),
2298 item,
2299 )
2300
2301 # Encode result on 64 bits (to extend the sign bit whatever the
2302 # value of `item.len`).
2303 data = struct.pack(
2304 "{}{}".format(
2305 ">" if state.bo in (None, ByteOrder.BE) else "<",
2306 "Q" if val >= 0 else "q",
2307 ),
2308 val,
2309 )
2310
2311 # Keep only the requested length
2312 len_bytes = item.len // 8
2313
2314 if state.bo in (None, ByteOrder.BE):
2315 # Big endian: keep last bytes
2316 data = data[-len_bytes:]
2317 else:
2318 # Little endian: keep first bytes
2319 assert state.bo == ByteOrder.LE
2320 data = data[:len_bytes]
2321
2322 # Return data
2323 return data
2324
2325 # Generates the data for a fixed-length floating point number item
2326 # instance having the value `val` and returns it.
2327 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
2328 # Validate length
2329 if item.len not in (32, 64):
2330 _raise_error_for_item(
2331 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2332 item.len, val
2333 ),
2334 item,
2335 )
2336
2337 # Encode and return result
2338 return struct.pack(
2339 "{}{}".format(
2340 ">" if state.bo in (None, ByteOrder.BE) else "<",
2341 "f" if item.len == 32 else "d",
2342 ),
2343 val,
2344 )
2345
2346 # Generates the data for a fixed-length number item instance and
2347 # returns it.
2348 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
2349 # Compute value
2350 val = self._eval_item_expr(item, state, True)
2351
2352 # Handle depending on type
2353 if type(val) is int:
2354 return self._gen_fl_int_item_inst_data(val, item, state)
2355 else:
2356 assert type(val) is float
2357 return self._gen_fl_float_item_inst_data(val, item, state)
2358
2359 # Generates the data for all the fixed-length number item instances
2360 # and writes it at the correct offset within `self._data`.
2361 def _gen_fl_num_item_insts(self):
2362 for inst in self._fl_num_item_insts:
2363 # Generate bytes
2364 try:
2365 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2366 except ParseError as exc:
2367 # Add all the saved parse error messages for this
2368 # instance.
2369 for msg in reversed(inst.parse_error_msgs):
2370 _add_error_msg(exc, msg.text, msg.text_location)
2371
2372 raise
2373
2374 # Insert bytes into `self._data`
2375 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2376
2377 # Generates the data (`self._data`) and final state
2378 # (`self._final_state`) from `group` and the initial state `state`.
2379 def _gen(self, group: _Group, state: _GenState):
2380 # Initial state
2381 self._data = bytearray()
2382
2383 # Item handlers
2384 self._item_handlers = {
2385 _AlignOffset: self._handle_align_offset_item,
2386 _Byte: self._handle_byte_item,
2387 _Cond: self._handle_cond_item,
2388 _FillUntil: self._handle_fill_until_item,
2389 _FlNum: self._handle_fl_num_item,
2390 _Group: self._handle_group_item,
2391 _Label: self._handle_label_item,
2392 _MacroExp: self._handle_macro_exp_item,
2393 _Rep: self._handle_rep_item,
2394 _SetBo: self._handle_set_bo_item,
2395 _SetOffset: self._handle_set_offset_item,
2396 _SLeb128Int: self._handle_leb128_int_item,
2397 _Str: self._handle_str_item,
2398 _ULeb128Int: self._handle_leb128_int_item,
2399 _VarAssign: self._handle_var_assign_item,
2400 } # type: Dict[type, Callable[[Any, _GenState], None]]
2401
2402 # Handle the group item, _not_ removing the immediate labels
2403 # because the `labels` property offers them.
2404 self._handle_group_item(group, state, False)
2405
2406 # This is actually the final state
2407 self._final_state = state
2408
2409 # Generate all the fixed-length number bytes now that we know
2410 # their full state
2411 self._gen_fl_num_item_insts()
2412
2413
2414 # Returns a `ParseResult` instance containing the bytes encoded by the
2415 # input string `normand`.
2416 #
2417 # `init_variables` is a dictionary of initial variable names (valid
2418 # Python names) to integral values. A variable name must not be the
2419 # reserved name `ICITTE`.
2420 #
2421 # `init_labels` is a dictionary of initial label names (valid Python
2422 # names) to integral values. A label name must not be the reserved name
2423 # `ICITTE`.
2424 #
2425 # `init_offset` is the initial offset.
2426 #
2427 # `init_byte_order` is the initial byte order.
2428 #
2429 # Raises `ParseError` on any parsing error.
2430 def parse(
2431 normand: str,
2432 init_variables: Optional[VariablesT] = None,
2433 init_labels: Optional[LabelsT] = None,
2434 init_offset: int = 0,
2435 init_byte_order: Optional[ByteOrder] = None,
2436 ):
2437 if init_variables is None:
2438 init_variables = {}
2439
2440 if init_labels is None:
2441 init_labels = {}
2442
2443 parser = _Parser(normand, init_variables, init_labels)
2444 gen = _Gen(
2445 parser.res,
2446 parser.macro_defs,
2447 init_variables,
2448 init_labels,
2449 init_offset,
2450 init_byte_order,
2451 )
2452 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2453 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2454 )
2455
2456
2457 # Raises a command-line error with the message `msg`.
2458 def _raise_cli_error(msg: str) -> NoReturn:
2459 raise RuntimeError("Command-line error: {}".format(msg))
2460
2461
2462 # Returns a dictionary of string to integers from the list of strings
2463 # `args` containing `NAME=VAL` entries.
2464 def _dict_from_arg(args: Optional[List[str]]):
2465 d = {} # type: LabelsT
2466
2467 if args is None:
2468 return d
2469
2470 for arg in args:
2471 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2472
2473 if m is None:
2474 _raise_cli_error("Invalid assignment {}".format(arg))
2475
2476 d[m.group(1)] = int(m.group(2))
2477
2478 return d
2479
2480
2481 # Parses the command-line arguments and returns, in this order:
2482 #
2483 # 1. The input file path, or `None` if none.
2484 # 2. The Normand input text.
2485 # 3. The initial offset.
2486 # 4. The initial byte order.
2487 # 5. The initial variables.
2488 # 6. The initial labels.
2489 def _parse_cli_args():
2490 import argparse
2491
2492 # Build parser
2493 ap = argparse.ArgumentParser()
2494 ap.add_argument(
2495 "--offset",
2496 metavar="OFFSET",
2497 action="store",
2498 type=int,
2499 default=0,
2500 help="initial offset (positive)",
2501 )
2502 ap.add_argument(
2503 "-b",
2504 "--byte-order",
2505 metavar="BO",
2506 choices=["be", "le"],
2507 type=str,
2508 help="initial byte order (`be` or `le`)",
2509 )
2510 ap.add_argument(
2511 "--var",
2512 metavar="NAME=VAL",
2513 action="append",
2514 help="add an initial variable (may be repeated)",
2515 )
2516 ap.add_argument(
2517 "-l",
2518 "--label",
2519 metavar="NAME=VAL",
2520 action="append",
2521 help="add an initial label (may be repeated)",
2522 )
2523 ap.add_argument(
2524 "--version", action="version", version="Normand {}".format(__version__)
2525 )
2526 ap.add_argument(
2527 "path",
2528 metavar="PATH",
2529 action="store",
2530 nargs="?",
2531 help="input path (none means standard input)",
2532 )
2533
2534 # Parse
2535 args = ap.parse_args()
2536
2537 # Read input
2538 if args.path is None:
2539 normand = sys.stdin.read()
2540 else:
2541 with open(args.path) as f:
2542 normand = f.read()
2543
2544 # Variables and labels
2545 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
2546 labels = _dict_from_arg(args.label)
2547
2548 # Validate offset
2549 if args.offset < 0:
2550 _raise_cli_error("Invalid negative offset {}")
2551
2552 # Validate and set byte order
2553 bo = None # type: Optional[ByteOrder]
2554
2555 if args.byte_order is not None:
2556 if args.byte_order == "be":
2557 bo = ByteOrder.BE
2558 else:
2559 assert args.byte_order == "le"
2560 bo = ByteOrder.LE
2561
2562 # Return input and initial state
2563 return args.path, normand, args.offset, bo, variables, labels
2564
2565
2566 # CLI entry point without exception handling.
2567 def _run_cli_with_args(
2568 normand: str,
2569 offset: int,
2570 bo: Optional[ByteOrder],
2571 variables: VariablesT,
2572 labels: LabelsT,
2573 ):
2574 sys.stdout.buffer.write(parse(normand, variables, labels, offset, bo).data)
2575
2576
2577 # Prints the exception message `msg` and exits with status 1.
2578 def _fail(msg: str) -> NoReturn:
2579 if not msg.endswith("."):
2580 msg += "."
2581
2582 print(msg.strip(), file=sys.stderr)
2583 sys.exit(1)
2584
2585
2586 # CLI entry point.
2587 def _run_cli():
2588 try:
2589 args = _parse_cli_args()
2590 except Exception as exc:
2591 _fail(str(exc))
2592
2593 try:
2594 _run_cli_with_args(*args[1:])
2595 except ParseError as exc:
2596 import os.path
2597
2598 prefix = "" if args[0] is None else "{}:".format(os.path.abspath(args[0]))
2599 fail_msg = ""
2600
2601 for msg in reversed(exc.messages):
2602 fail_msg += "{}{}:{} - {}".format(
2603 prefix,
2604 msg.text_location.line_no,
2605 msg.text_location.col_no,
2606 msg.text,
2607 )
2608
2609 if fail_msg[-1] not in ".:;":
2610 fail_msg += "."
2611
2612 fail_msg += "\n"
2613
2614 _fail(fail_msg.strip())
2615 except Exception as exc:
2616 _fail(str(exc))
2617
2618
2619 if __name__ == "__main__":
2620 _run_cli()
This page took 0.084457 seconds and 4 git commands to generate.