Add macro support
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.11.0"
34 __all__ = [
35 "__author__",
36 "__version__",
37 "ByteOrder",
38 "LabelsT",
39 "parse",
40 "ParseError",
41 "ParseResult",
42 "TextLocation",
43 "VariablesT",
44 ]
45
46 import re
47 import abc
48 import ast
49 import sys
50 import copy
51 import enum
52 import math
53 import struct
54 import typing
55 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
56
57
58 # Text location (line and column numbers).
59 class TextLocation:
60 @classmethod
61 def _create(cls, line_no: int, col_no: int):
62 self = cls.__new__(cls)
63 self._init(line_no, col_no)
64 return self
65
66 def __init__(*args, **kwargs): # type: ignore
67 raise NotImplementedError
68
69 def _init(self, line_no: int, col_no: int):
70 self._line_no = line_no
71 self._col_no = col_no
72
73 # Line number.
74 @property
75 def line_no(self):
76 return self._line_no
77
78 # Column number.
79 @property
80 def col_no(self):
81 return self._col_no
82
83 def __repr__(self):
84 return "TextLocation({}, {})".format(self._line_no, self._col_no)
85
86
87 # Any item.
88 class _Item:
89 def __init__(self, text_loc: TextLocation):
90 self._text_loc = text_loc
91
92 # Source text location.
93 @property
94 def text_loc(self):
95 return self._text_loc
96
97
98 # Scalar item.
99 class _ScalarItem(_Item):
100 # Returns the size, in bytes, of this item.
101 @property
102 @abc.abstractmethod
103 def size(self) -> int:
104 ...
105
106
107 # A repeatable item.
108 class _RepableItem:
109 pass
110
111
112 # Single byte.
113 class _Byte(_ScalarItem, _RepableItem):
114 def __init__(self, val: int, text_loc: TextLocation):
115 super().__init__(text_loc)
116 self._val = val
117
118 # Byte value.
119 @property
120 def val(self):
121 return self._val
122
123 @property
124 def size(self):
125 return 1
126
127 def __repr__(self):
128 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
129
130
131 # String.
132 class _Str(_ScalarItem, _RepableItem):
133 def __init__(self, data: bytes, text_loc: TextLocation):
134 super().__init__(text_loc)
135 self._data = data
136
137 # Encoded bytes.
138 @property
139 def data(self):
140 return self._data
141
142 @property
143 def size(self):
144 return len(self._data)
145
146 def __repr__(self):
147 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
148
149
150 # Byte order.
151 @enum.unique
152 class ByteOrder(enum.Enum):
153 # Big endian.
154 BE = "be"
155
156 # Little endian.
157 LE = "le"
158
159
160 # Byte order setting.
161 class _SetBo(_Item):
162 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
163 super().__init__(text_loc)
164 self._bo = bo
165
166 @property
167 def bo(self):
168 return self._bo
169
170 def __repr__(self):
171 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
172
173
174 # Label.
175 class _Label(_Item):
176 def __init__(self, name: str, text_loc: TextLocation):
177 super().__init__(text_loc)
178 self._name = name
179
180 # Label name.
181 @property
182 def name(self):
183 return self._name
184
185 def __repr__(self):
186 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
187
188
189 # Offset setting.
190 class _SetOffset(_Item):
191 def __init__(self, val: int, text_loc: TextLocation):
192 super().__init__(text_loc)
193 self._val = val
194
195 # Offset value (bytes).
196 @property
197 def val(self):
198 return self._val
199
200 def __repr__(self):
201 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
202
203
204 # Offset alignment.
205 class _AlignOffset(_Item):
206 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
207 super().__init__(text_loc)
208 self._val = val
209 self._pad_val = pad_val
210
211 # Alignment value (bits).
212 @property
213 def val(self):
214 return self._val
215
216 # Padding byte value.
217 @property
218 def pad_val(self):
219 return self._pad_val
220
221 def __repr__(self):
222 return "_AlignOffset({}, {}, {})".format(
223 repr(self._val), repr(self._pad_val), repr(self._text_loc)
224 )
225
226
227 # Mixin of containing an AST expression and its string.
228 class _ExprMixin:
229 def __init__(self, expr_str: str, expr: ast.Expression):
230 self._expr_str = expr_str
231 self._expr = expr
232
233 # Expression string.
234 @property
235 def expr_str(self):
236 return self._expr_str
237
238 # Expression node to evaluate.
239 @property
240 def expr(self):
241 return self._expr
242
243
244 # Variable assignment.
245 class _VarAssign(_Item, _ExprMixin):
246 def __init__(
247 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
248 ):
249 super().__init__(text_loc)
250 _ExprMixin.__init__(self, expr_str, expr)
251 self._name = name
252
253 # Name.
254 @property
255 def name(self):
256 return self._name
257
258 def __repr__(self):
259 return "_VarAssign({}, {}, {}, {})".format(
260 repr(self._name),
261 repr(self._expr_str),
262 repr(self._expr),
263 repr(self._text_loc),
264 )
265
266
267 # Fixed-length number, possibly needing more than one byte.
268 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
269 def __init__(
270 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
271 ):
272 super().__init__(text_loc)
273 _ExprMixin.__init__(self, expr_str, expr)
274 self._len = len
275
276 # Length (bits).
277 @property
278 def len(self):
279 return self._len
280
281 @property
282 def size(self):
283 return self._len // 8
284
285 def __repr__(self):
286 return "_FlNum({}, {}, {}, {})".format(
287 repr(self._expr_str),
288 repr(self._expr),
289 repr(self._len),
290 repr(self._text_loc),
291 )
292
293
294 # LEB128 integer.
295 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
296 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
297 super().__init__(text_loc)
298 _ExprMixin.__init__(self, expr_str, expr)
299
300 def __repr__(self):
301 return "{}({}, {}, {})".format(
302 self.__class__.__name__,
303 repr(self._expr_str),
304 repr(self._expr),
305 repr(self._text_loc),
306 )
307
308
309 # Unsigned LEB128 integer.
310 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
311 pass
312
313
314 # Signed LEB128 integer.
315 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
316 pass
317
318
319 # Group of items.
320 class _Group(_Item, _RepableItem):
321 def __init__(self, items: List[_Item], text_loc: TextLocation):
322 super().__init__(text_loc)
323 self._items = items
324
325 # Contained items.
326 @property
327 def items(self):
328 return self._items
329
330 def __repr__(self):
331 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
332
333
334 # Repetition item.
335 class _Rep(_Item, _ExprMixin):
336 def __init__(
337 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
338 ):
339 super().__init__(text_loc)
340 _ExprMixin.__init__(self, expr_str, expr)
341 self._item = item
342
343 # Item to repeat.
344 @property
345 def item(self):
346 return self._item
347
348 def __repr__(self):
349 return "_Rep({}, {}, {}, {})".format(
350 repr(self._item),
351 repr(self._expr_str),
352 repr(self._expr),
353 repr(self._text_loc),
354 )
355
356
357 # Conditional item.
358 class _Cond(_Item, _ExprMixin):
359 def __init__(
360 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
361 ):
362 super().__init__(text_loc)
363 _ExprMixin.__init__(self, expr_str, expr)
364 self._item = item
365
366 # Conditional item.
367 @property
368 def item(self):
369 return self._item
370
371 def __repr__(self):
372 return "_Cond({}, {}, {}, {})".format(
373 repr(self._item),
374 repr(self._expr_str),
375 repr(self._expr),
376 repr(self._text_loc),
377 )
378
379
380 # Macro definition item.
381 class _MacroDef(_Item):
382 def __init__(
383 self, name: str, param_names: List[str], group: _Group, text_loc: TextLocation
384 ):
385 super().__init__(text_loc)
386 self._name = name
387 self._param_names = param_names
388 self._group = group
389
390 # Name.
391 @property
392 def name(self):
393 return self._name
394
395 # Parameters.
396 @property
397 def param_names(self):
398 return self._param_names
399
400 # Contained items.
401 @property
402 def group(self):
403 return self._group
404
405 def __repr__(self):
406 return "_MacroDef({}, {}, {}, {})".format(
407 repr(self._name),
408 repr(self._param_names),
409 repr(self._group),
410 repr(self._text_loc),
411 )
412
413
414 # Macro expansion parameter.
415 class _MacroExpParam:
416 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
417 self._expr_str = expr_str
418 self._expr = expr
419 self._text_loc = text_loc
420
421 # Expression string.
422 @property
423 def expr_str(self):
424 return self._expr_str
425
426 # Expression.
427 @property
428 def expr(self):
429 return self._expr
430
431 # Source text location.
432 @property
433 def text_loc(self):
434 return self._text_loc
435
436 def __repr__(self):
437 return "_MacroExpParam({}, {}, {})".format(
438 repr(self._expr_str), repr(self._expr), repr(self._text_loc)
439 )
440
441
442 # Macro expansion item.
443 class _MacroExp(_Item, _RepableItem):
444 def __init__(
445 self,
446 name: str,
447 params: List[_MacroExpParam],
448 text_loc: TextLocation,
449 ):
450 super().__init__(text_loc)
451 self._name = name
452 self._params = params
453
454 # Name.
455 @property
456 def name(self):
457 return self._name
458
459 # Parameters.
460 @property
461 def params(self):
462 return self._params
463
464 def __repr__(self):
465 return "_MacroExp({}, {}, {})".format(
466 repr(self._name),
467 repr(self._params),
468 repr(self._text_loc),
469 )
470
471
472 # A parsing error containing a message and a text location.
473 class ParseError(RuntimeError):
474 @classmethod
475 def _create(cls, msg: str, text_loc: TextLocation):
476 self = cls.__new__(cls)
477 self._init(msg, text_loc)
478 return self
479
480 def __init__(self, *args, **kwargs): # type: ignore
481 raise NotImplementedError
482
483 def _init(self, msg: str, text_loc: TextLocation):
484 super().__init__(msg)
485 self._text_loc = text_loc
486
487 # Source text location.
488 @property
489 def text_loc(self):
490 return self._text_loc
491
492
493 # Raises a parsing error, forwarding the parameters to the constructor.
494 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
495 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
496
497
498 # Variables dictionary type (for type hints).
499 VariablesT = Dict[str, Union[int, float]]
500
501
502 # Labels dictionary type (for type hints).
503 LabelsT = Dict[str, int]
504
505
506 # Python name pattern.
507 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
508
509
510 # Macro definition dictionary.
511 _MacroDefsT = Dict[str, _MacroDef]
512
513
514 # Normand parser.
515 #
516 # The constructor accepts a Normand input. After building, use the `res`
517 # property to get the resulting main group.
518 class _Parser:
519 # Builds a parser to parse the Normand input `normand`, parsing
520 # immediately.
521 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
522 self._normand = normand
523 self._at = 0
524 self._line_no = 1
525 self._col_no = 1
526 self._label_names = set(labels.keys())
527 self._var_names = set(variables.keys())
528 self._macro_defs = {} # type: _MacroDefsT
529 self._parse()
530
531 # Result (main group).
532 @property
533 def res(self):
534 return self._res
535
536 # Macro definitions.
537 @property
538 def macro_defs(self):
539 return self._macro_defs
540
541 # Current text location.
542 @property
543 def _text_loc(self):
544 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
545 self._line_no, self._col_no
546 )
547
548 # Returns `True` if this parser is done parsing.
549 def _is_done(self):
550 return self._at == len(self._normand)
551
552 # Returns `True` if this parser isn't done parsing.
553 def _isnt_done(self):
554 return not self._is_done()
555
556 # Raises a parse error, creating it using the message `msg` and the
557 # current text location.
558 def _raise_error(self, msg: str) -> NoReturn:
559 _raise_error(msg, self._text_loc)
560
561 # Tries to make the pattern `pat` match the current substring,
562 # returning the match object and updating `self._at`,
563 # `self._line_no`, and `self._col_no` on success.
564 def _try_parse_pat(self, pat: Pattern[str]):
565 m = pat.match(self._normand, self._at)
566
567 if m is None:
568 return
569
570 # Skip matched string
571 self._at += len(m.group(0))
572
573 # Update line number
574 self._line_no += m.group(0).count("\n")
575
576 # Update column number
577 for i in reversed(range(self._at)):
578 if self._normand[i] == "\n" or i == 0:
579 if i == 0:
580 self._col_no = self._at + 1
581 else:
582 self._col_no = self._at - i
583
584 break
585
586 # Return match object
587 return m
588
589 # Expects the pattern `pat` to match the current substring,
590 # returning the match object and updating `self._at`,
591 # `self._line_no`, and `self._col_no` on success, or raising a parse
592 # error with the message `error_msg` on error.
593 def _expect_pat(self, pat: Pattern[str], error_msg: str):
594 # Match
595 m = self._try_parse_pat(pat)
596
597 if m is None:
598 # No match: error
599 self._raise_error(error_msg)
600
601 # Return match object
602 return m
603
604 # Pattern for _skip_ws_and_comments()
605 _ws_or_syms_or_comments_pat = re.compile(
606 r"(?:[\s/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
607 )
608
609 # Skips as many whitespaces, insignificant symbol characters, and
610 # comments as possible.
611 def _skip_ws_and_comments(self):
612 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
613
614 # Pattern for _skip_ws()
615 _ws_pat = re.compile(r"\s*")
616
617 # Skips as many whitespaces as possible.
618 def _skip_ws(self):
619 self._try_parse_pat(self._ws_pat)
620
621 # Pattern for _try_parse_hex_byte()
622 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
623
624 # Tries to parse a hexadecimal byte, returning a byte item on
625 # success.
626 def _try_parse_hex_byte(self):
627 begin_text_loc = self._text_loc
628
629 # Match initial nibble
630 m_high = self._try_parse_pat(self._nibble_pat)
631
632 if m_high is None:
633 # No match
634 return
635
636 # Expect another nibble
637 self._skip_ws_and_comments()
638 m_low = self._expect_pat(
639 self._nibble_pat, "Expecting another hexadecimal nibble"
640 )
641
642 # Return item
643 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
644
645 # Patterns for _try_parse_bin_byte()
646 _bin_byte_bit_pat = re.compile(r"[01]")
647 _bin_byte_prefix_pat = re.compile(r"%")
648
649 # Tries to parse a binary byte, returning a byte item on success.
650 def _try_parse_bin_byte(self):
651 begin_text_loc = self._text_loc
652
653 # Match prefix
654 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
655 # No match
656 return
657
658 # Expect eight bits
659 bits = [] # type: List[str]
660
661 for _ in range(8):
662 self._skip_ws_and_comments()
663 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
664 bits.append(m.group(0))
665
666 # Return item
667 return _Byte(int("".join(bits), 2), begin_text_loc)
668
669 # Patterns for _try_parse_dec_byte()
670 _dec_byte_prefix_pat = re.compile(r"\$")
671 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
672
673 # Tries to parse a decimal byte, returning a byte item on success.
674 def _try_parse_dec_byte(self):
675 begin_text_loc = self._text_loc
676
677 # Match prefix
678 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
679 # No match
680 return
681
682 # Expect the value
683 self._skip_ws()
684 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
685
686 # Compute value
687 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
688
689 # Validate
690 if val < -128 or val > 255:
691 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
692
693 # Two's complement
694 val %= 256
695
696 # Return item
697 return _Byte(val, begin_text_loc)
698
699 # Tries to parse a byte, returning a byte item on success.
700 def _try_parse_byte(self):
701 # Hexadecimal
702 item = self._try_parse_hex_byte()
703
704 if item is not None:
705 return item
706
707 # Binary
708 item = self._try_parse_bin_byte()
709
710 if item is not None:
711 return item
712
713 # Decimal
714 item = self._try_parse_dec_byte()
715
716 if item is not None:
717 return item
718
719 # Patterns for _try_parse_str()
720 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
721 _str_suffix_pat = re.compile(r'"')
722 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
723
724 # Strings corresponding to escape sequence characters
725 _str_escape_seq_strs = {
726 "0": "\0",
727 "a": "\a",
728 "b": "\b",
729 "e": "\x1b",
730 "f": "\f",
731 "n": "\n",
732 "r": "\r",
733 "t": "\t",
734 "v": "\v",
735 "\\": "\\",
736 '"': '"',
737 }
738
739 # Tries to parse a string, returning a string item on success.
740 def _try_parse_str(self):
741 begin_text_loc = self._text_loc
742
743 # Match prefix
744 m = self._try_parse_pat(self._str_prefix_pat)
745
746 if m is None:
747 # No match
748 return
749
750 # Get encoding
751 encoding = "utf8"
752
753 if m.group("len") is not None:
754 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
755
756 # Actual string
757 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
758
759 # Expect end of string
760 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
761
762 # Replace escape sequences
763 val = m.group(0)
764
765 for ec in '0abefnrtv"\\':
766 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
767
768 # Encode
769 data = val.encode(encoding)
770
771 # Return item
772 return _Str(data, begin_text_loc)
773
774 # Common right parenthesis pattern
775 _right_paren_pat = re.compile(r"\)")
776
777 # Patterns for _try_parse_group()
778 _group_prefix_pat = re.compile(r"\(|!g(?:roup)?\b")
779
780 # Tries to parse a group, returning a group item on success.
781 def _try_parse_group(self):
782 begin_text_loc = self._text_loc
783
784 # Match prefix
785 m_open = self._try_parse_pat(self._group_prefix_pat)
786
787 if m_open is None:
788 # No match
789 return
790
791 # Parse items
792 items = self._parse_items()
793
794 # Expect end of group
795 self._skip_ws_and_comments()
796
797 if m_open.group(0) == "(":
798 pat = self._right_paren_pat
799 exp = ")"
800 else:
801 pat = self._block_end_pat
802 exp = "!end"
803
804 self._expect_pat(pat, "Expecting an item or `{}` (end of group)".format(exp))
805
806 # Return item
807 return _Group(items, begin_text_loc)
808
809 # Returns a stripped expression string and an AST expression node
810 # from the expression string `expr_str` at text location `text_loc`.
811 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
812 # Create an expression node from the expression string
813 expr_str = expr_str.strip().replace("\n", " ")
814
815 try:
816 expr = ast.parse(expr_str, mode="eval")
817 except SyntaxError:
818 _raise_error(
819 "Invalid expression `{}`: invalid syntax".format(expr_str),
820 text_loc,
821 )
822
823 return expr_str, expr
824
825 # Patterns for _try_parse_num_and_attr()
826 _val_expr_pat = re.compile(r"([^}:]+):\s*")
827 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
828 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
829
830 # Tries to parse a value and attribute (fixed length in bits or
831 # `leb128`), returning a value item on success.
832 def _try_parse_num_and_attr(self):
833 begin_text_loc = self._text_loc
834
835 # Match
836 m_expr = self._try_parse_pat(self._val_expr_pat)
837
838 if m_expr is None:
839 # No match
840 return
841
842 # Create an expression node from the expression string
843 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
844
845 # Length?
846 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
847
848 if m_attr is None:
849 # LEB128?
850 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
851
852 if m_attr is None:
853 # At this point it's invalid
854 self._raise_error(
855 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
856 )
857
858 # Return LEB128 integer item
859 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
860 return cls(expr_str, expr, begin_text_loc)
861 else:
862 # Return fixed-length number item
863 return _FlNum(
864 expr_str,
865 expr,
866 int(m_attr.group(0)),
867 begin_text_loc,
868 )
869
870 # Patterns for _try_parse_var_assign()
871 _var_assign_name_equal_pat = re.compile(r"({})\s*=".format(_py_name_pat.pattern))
872 _var_assign_expr_pat = re.compile(r"[^}]+")
873
874 # Tries to parse a variable assignment, returning a variable
875 # assignment item on success.
876 def _try_parse_var_assign(self):
877 begin_text_loc = self._text_loc
878
879 # Match
880 m = self._try_parse_pat(self._var_assign_name_equal_pat)
881
882 if m is None:
883 # No match
884 return
885
886 # Validate name
887 name = m.group(1)
888
889 if name == _icitte_name:
890 _raise_error(
891 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
892 )
893
894 if name in self._label_names:
895 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
896
897 # Expect an expression
898 self._skip_ws()
899 m = self._expect_pat(self._var_assign_expr_pat, "Expecting an expression")
900
901 # Create an expression node from the expression string
902 expr_str, expr = self._ast_expr_from_str(m.group(0), begin_text_loc)
903
904 # Add to known variable names
905 self._var_names.add(name)
906
907 # Return item
908 return _VarAssign(
909 name,
910 expr_str,
911 expr,
912 begin_text_loc,
913 )
914
915 # Pattern for _try_parse_set_bo()
916 _bo_pat = re.compile(r"[bl]e")
917
918 # Tries to parse a byte order name, returning a byte order setting
919 # item on success.
920 def _try_parse_set_bo(self):
921 begin_text_loc = self._text_loc
922
923 # Match
924 m = self._try_parse_pat(self._bo_pat)
925
926 if m is None:
927 # No match
928 return
929
930 # Return corresponding item
931 if m.group(0) == "be":
932 return _SetBo(ByteOrder.BE, begin_text_loc)
933 else:
934 assert m.group(0) == "le"
935 return _SetBo(ByteOrder.LE, begin_text_loc)
936
937 # Patterns for _try_parse_val_or_bo()
938 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{")
939 _val_var_assign_set_bo_suffix_pat = re.compile(r"\}")
940
941 # Tries to parse a value, a variable assignment, or a byte order
942 # setting, returning an item on success.
943 def _try_parse_val_or_var_assign_or_set_bo(self):
944 # Match prefix
945 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
946 # No match
947 return
948
949 self._skip_ws()
950
951 # Variable assignment item?
952 item = self._try_parse_var_assign()
953
954 if item is None:
955 # Number item?
956 item = self._try_parse_num_and_attr()
957
958 if item is None:
959 # Byte order setting item?
960 item = self._try_parse_set_bo()
961
962 if item is None:
963 # At this point it's invalid
964 self._raise_error(
965 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
966 )
967
968 # Expect suffix
969 self._skip_ws()
970 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
971 return item
972
973 # Common constant integer patterns
974 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
975 _const_int_pat = re.compile(r"(?P<neg>-)?(?:{})".format(_pos_const_int_pat.pattern))
976
977 # Tries to parse an offset setting value (after the initial `<`),
978 # returning an offset item on success.
979 def _try_parse_set_offset_val(self):
980 begin_text_loc = self._text_loc
981
982 # Match
983 m = self._try_parse_pat(self._pos_const_int_pat)
984
985 if m is None:
986 # No match
987 return
988
989 # Return item
990 return _SetOffset(int(m.group(0), 0), begin_text_loc)
991
992 # Tries to parse a label name (after the initial `<`), returning a
993 # label item on success.
994 def _try_parse_label_name(self):
995 begin_text_loc = self._text_loc
996
997 # Match
998 m = self._try_parse_pat(_py_name_pat)
999
1000 if m is None:
1001 # No match
1002 return
1003
1004 # Validate
1005 name = m.group(0)
1006
1007 if name == _icitte_name:
1008 _raise_error(
1009 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
1010 )
1011
1012 if name in self._label_names:
1013 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
1014
1015 if name in self._var_names:
1016 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
1017
1018 # Add to known label names
1019 self._label_names.add(name)
1020
1021 # Return item
1022 return _Label(name, begin_text_loc)
1023
1024 # Patterns for _try_parse_label_or_set_offset()
1025 _label_set_offset_prefix_pat = re.compile(r"<")
1026 _label_set_offset_suffix_pat = re.compile(r">")
1027
1028 # Tries to parse a label or an offset setting, returning an item on
1029 # success.
1030 def _try_parse_label_or_set_offset(self):
1031 # Match prefix
1032 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
1033 # No match
1034 return
1035
1036 # Offset setting item?
1037 self._skip_ws()
1038 item = self._try_parse_set_offset_val()
1039
1040 if item is None:
1041 # Label item?
1042 item = self._try_parse_label_name()
1043
1044 if item is None:
1045 # At this point it's invalid
1046 self._raise_error("Expecting a label name or an offset setting value")
1047
1048 # Expect suffix
1049 self._skip_ws()
1050 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
1051 return item
1052
1053 # Patterns for _try_parse_align_offset()
1054 _align_offset_prefix_pat = re.compile(r"@")
1055 _align_offset_val_pat = re.compile(r"\d+")
1056 _align_offset_pad_val_prefix_pat = re.compile(r"~")
1057
1058 # Tries to parse an offset alignment, returning an offset alignment
1059 # item on success.
1060 def _try_parse_align_offset(self):
1061 begin_text_loc = self._text_loc
1062
1063 # Match prefix
1064 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
1065 # No match
1066 return
1067
1068 self._skip_ws()
1069
1070 # Expect an alignment
1071 align_text_loc = self._text_loc
1072 m = self._expect_pat(
1073 self._align_offset_val_pat,
1074 "Expecting an alignment (positive multiple of eight bits)",
1075 )
1076
1077 # Validate alignment
1078 val = int(m.group(0))
1079
1080 if val <= 0 or (val % 8) != 0:
1081 _raise_error(
1082 "Invalid alignment value {} (not a positive multiple of eight)".format(
1083 val
1084 ),
1085 align_text_loc,
1086 )
1087
1088 # Padding value?
1089 self._skip_ws()
1090 pad_val = 0
1091
1092 if self._try_parse_pat(self._align_offset_pad_val_prefix_pat) is not None:
1093 self._skip_ws()
1094 pad_val_text_loc = self._text_loc
1095 m = self._expect_pat(self._pos_const_int_pat, "Expecting a byte value")
1096
1097 # Validate
1098 pad_val = int(m.group(0), 0)
1099
1100 if pad_val > 255:
1101 _raise_error(
1102 "Invalid padding byte value {}".format(pad_val),
1103 pad_val_text_loc,
1104 )
1105
1106 # Return item
1107 return _AlignOffset(val, pad_val, begin_text_loc)
1108
1109 # Patterns for _expect_rep_mul_expr()
1110 _inner_expr_prefix_pat = re.compile(r"\{")
1111 _inner_expr_pat = re.compile(r"[^}]+")
1112 _inner_expr_suffix_pat = re.compile(r"\}")
1113
1114 # Parses a constant integer if `accept_const_int` is `True`
1115 # (possibly negative if `allow_neg` is `True`), a name, or an
1116 # expression within `{` and `}`.
1117 def _expect_const_int_name_expr(
1118 self, accept_const_int: bool, allow_neg: bool = False
1119 ):
1120 expr_text_loc = self._text_loc
1121
1122 # Constant integer?
1123 m = None
1124
1125 if accept_const_int:
1126 m = self._try_parse_pat(self._const_int_pat)
1127
1128 if m is None:
1129 # Name?
1130 m = self._try_parse_pat(_py_name_pat)
1131
1132 if m is None:
1133 # Expression?
1134 if self._try_parse_pat(self._inner_expr_prefix_pat) is None:
1135 pos_msg = "" if allow_neg else "positive "
1136
1137 if accept_const_int:
1138 mid_msg = "a {}constant integer, a name, or `{{`".format(
1139 pos_msg
1140 )
1141 else:
1142 mid_msg = "a name or `{`"
1143
1144 # At this point it's invalid
1145 self._raise_error("Expecting {}".format(mid_msg))
1146
1147 # Expect an expression
1148 self._skip_ws()
1149 expr_text_loc = self._text_loc
1150 m = self._expect_pat(self._inner_expr_pat, "Expecting an expression")
1151 expr_str = m.group(0)
1152
1153 # Expect `}`
1154 self._skip_ws()
1155 self._expect_pat(self._inner_expr_suffix_pat, "Expecting `}`")
1156 else:
1157 expr_str = m.group(0)
1158 else:
1159 if m.group("neg") == "-" and not allow_neg:
1160 _raise_error("Expecting a positive constant integer", expr_text_loc)
1161
1162 expr_str = m.group(0)
1163
1164 return self._ast_expr_from_str(expr_str, expr_text_loc)
1165
1166 # Parses the multiplier expression of a repetition (block or
1167 # post-item) and returns the expression string and AST node.
1168 def _expect_rep_mul_expr(self):
1169 return self._expect_const_int_name_expr(True)
1170
1171 # Common block end pattern
1172 _block_end_pat = re.compile(r"!end\b")
1173
1174 # Pattern for _try_parse_rep_block()
1175 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b")
1176
1177 # Tries to parse a repetition block, returning a repetition item on
1178 # success.
1179 def _try_parse_rep_block(self):
1180 begin_text_loc = self._text_loc
1181
1182 # Match prefix
1183 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1184 # No match
1185 return
1186
1187 # Expect expression
1188 self._skip_ws_and_comments()
1189 expr_str, expr = self._expect_rep_mul_expr()
1190
1191 # Parse items
1192 self._skip_ws_and_comments()
1193 items_text_loc = self._text_loc
1194 items = self._parse_items()
1195
1196 # Expect end of block
1197 self._skip_ws_and_comments()
1198 self._expect_pat(
1199 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1200 )
1201
1202 # Return item
1203 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1204
1205 # Pattern for _try_parse_cond_block()
1206 _cond_block_prefix_pat = re.compile(r"!if\b")
1207
1208 # Tries to parse a conditional block, returning a conditional item
1209 # on success.
1210 def _try_parse_cond_block(self):
1211 begin_text_loc = self._text_loc
1212
1213 # Match prefix
1214 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1215 # No match
1216 return
1217
1218 # Expect expression
1219 self._skip_ws_and_comments()
1220 expr_str, expr = self._expect_const_int_name_expr(False)
1221
1222 # Parse items
1223 self._skip_ws_and_comments()
1224 items_text_loc = self._text_loc
1225 items = self._parse_items()
1226
1227 # Expect end of block
1228 self._skip_ws_and_comments()
1229 self._expect_pat(
1230 self._block_end_pat,
1231 "Expecting an item or `!end` (end of conditional block)",
1232 )
1233
1234 # Return item
1235 return _Cond(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1236
1237 # Common left parenthesis pattern
1238 _left_paren_pat = re.compile(r"\(")
1239
1240 # Patterns for _try_parse_macro_def() and _try_parse_macro_exp()
1241 _macro_params_comma_pat = re.compile(",")
1242
1243 # Patterns for _try_parse_macro_def()
1244 _macro_def_prefix_pat = re.compile(r"!m(?:acro)?\b")
1245
1246 # Tries to parse a macro definition, adding it to `self._macro_defs`
1247 # and returning `True` on success.
1248 def _try_parse_macro_def(self):
1249 begin_text_loc = self._text_loc
1250
1251 # Match prefix
1252 if self._try_parse_pat(self._macro_def_prefix_pat) is None:
1253 # No match
1254 return False
1255
1256 # Expect a name
1257 self._skip_ws()
1258 name_text_loc = self._text_loc
1259 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1260
1261 # Validate name
1262 name = m.group(0)
1263
1264 if name in self._macro_defs:
1265 _raise_error("Duplicate macro named `{}`".format(name), name_text_loc)
1266
1267 # Expect `(`
1268 self._skip_ws()
1269 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1270
1271 # Try to parse comma-separated parameter names
1272 param_names = [] # type: List[str]
1273 expect_comma = False
1274
1275 while True:
1276 self._skip_ws()
1277
1278 # End?
1279 if self._try_parse_pat(self._right_paren_pat) is not None:
1280 # End
1281 break
1282
1283 # Comma?
1284 if expect_comma:
1285 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1286
1287 # Expect parameter name
1288 self._skip_ws()
1289 param_text_loc = self._text_loc
1290 m = self._expect_pat(_py_name_pat, "Expecting valid parameter name")
1291
1292 if m.group(0) in param_names:
1293 _raise_error(
1294 "Duplicate macro parameter named `{}`".format(m.group(0)),
1295 param_text_loc,
1296 )
1297
1298 param_names.append(m.group(0))
1299 expect_comma = True
1300
1301 # Expect items
1302 self._skip_ws_and_comments()
1303 items_text_loc = self._text_loc
1304 old_var_names = self._var_names.copy()
1305 old_label_names = self._label_names.copy()
1306 self._var_names = set() # type: Set[str]
1307 self._label_names = set() # type: Set[str]
1308 items = self._parse_items()
1309 self._var_names = old_var_names
1310 self._label_names = old_label_names
1311
1312 # Expect suffix
1313 self._expect_pat(
1314 self._block_end_pat, "Expecting an item or `!end` (end of macro block)"
1315 )
1316
1317 # Register macro
1318 self._macro_defs[name] = _MacroDef(
1319 name, param_names, _Group(items, items_text_loc), begin_text_loc
1320 )
1321
1322 return True
1323
1324 # Patterns for _try_parse_macro_exp()
1325 _macro_exp_prefix_pat = re.compile(r"m\b")
1326 _macro_exp_colon_pat = re.compile(r":")
1327
1328 # Tries to parse a macro expansion, returning a macro expansion item
1329 # on success.
1330 def _try_parse_macro_exp(self):
1331 begin_text_loc = self._text_loc
1332
1333 # Match prefix
1334 if self._try_parse_pat(self._macro_exp_prefix_pat) is None:
1335 # No match
1336 return
1337
1338 # Expect `:`
1339 self._skip_ws()
1340 self._expect_pat(self._macro_exp_colon_pat, "Expecting `:`")
1341
1342 # Expect a macro name
1343 self._skip_ws()
1344 name_text_loc = self._text_loc
1345 m = self._expect_pat(_py_name_pat, "Expecting a valid macro name")
1346
1347 # Validate name
1348 name = m.group(0)
1349 macro_def = self._macro_defs.get(name)
1350
1351 if macro_def is None:
1352 _raise_error("Unknown macro name `{}`".format(name), name_text_loc)
1353
1354 # Expect `(`
1355 self._skip_ws()
1356 self._expect_pat(self._left_paren_pat, "Expecting `(`")
1357
1358 # Try to parse comma-separated parameter values
1359 params_text_loc = self._text_loc
1360 params = [] # type: List[_MacroExpParam]
1361 expect_comma = False
1362
1363 while True:
1364 self._skip_ws()
1365
1366 # End?
1367 if self._try_parse_pat(self._right_paren_pat) is not None:
1368 # End
1369 break
1370
1371 # Expect a Value
1372 if expect_comma:
1373 self._expect_pat(self._macro_params_comma_pat, "Expecting `,`")
1374
1375 self._skip_ws()
1376 param_text_loc = self._text_loc
1377 params.append(
1378 _MacroExpParam(
1379 *self._expect_const_int_name_expr(True, True), param_text_loc
1380 )
1381 )
1382 expect_comma = True
1383
1384 # Validate parameter values
1385 if len(params) != len(macro_def.param_names):
1386 sing_plur = "" if len(params) == 1 else "s"
1387 _raise_error(
1388 "Macro expansion passes {} parameter{} while the definition expects {}".format(
1389 len(params), sing_plur, len(macro_def.param_names)
1390 ),
1391 params_text_loc,
1392 )
1393
1394 # Return item
1395 return _MacroExp(name, params, begin_text_loc)
1396
1397 # Tries to parse a base item (anything except a repetition),
1398 # returning it on success.
1399 def _try_parse_base_item(self):
1400 # Byte item?
1401 item = self._try_parse_byte()
1402
1403 if item is not None:
1404 return item
1405
1406 # String item?
1407 item = self._try_parse_str()
1408
1409 if item is not None:
1410 return item
1411
1412 # Value, variable assignment, or byte order setting item?
1413 item = self._try_parse_val_or_var_assign_or_set_bo()
1414
1415 if item is not None:
1416 return item
1417
1418 # Label or offset setting item?
1419 item = self._try_parse_label_or_set_offset()
1420
1421 if item is not None:
1422 return item
1423
1424 # Offset alignment item?
1425 item = self._try_parse_align_offset()
1426
1427 if item is not None:
1428 return item
1429
1430 # Group item?
1431 item = self._try_parse_group()
1432
1433 if item is not None:
1434 return item
1435
1436 # Repetition block item?
1437 item = self._try_parse_rep_block()
1438
1439 if item is not None:
1440 return item
1441
1442 # Conditional block item?
1443 item = self._try_parse_cond_block()
1444
1445 if item is not None:
1446 return item
1447
1448 # Macro expansion?
1449 item = self._try_parse_macro_exp()
1450
1451 if item is not None:
1452 return item
1453
1454 # Pattern for _try_parse_rep_post()
1455 _rep_post_prefix_pat = re.compile(r"\*")
1456
1457 # Tries to parse a post-item repetition, returning the expression
1458 # string and AST expression node on success.
1459 def _try_parse_rep_post(self):
1460 # Match prefix
1461 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1462 # No match
1463 return
1464
1465 # Return expression string and AST expression
1466 self._skip_ws_and_comments()
1467 return self._expect_rep_mul_expr()
1468
1469 # Tries to parse an item, possibly followed by a repetition,
1470 # returning `True` on success.
1471 #
1472 # Appends any parsed item to `items`.
1473 def _try_append_item(self, items: List[_Item]):
1474 self._skip_ws_and_comments()
1475
1476 # Base item
1477 item = self._try_parse_base_item()
1478
1479 if item is None:
1480 return
1481
1482 # Parse repetition if the base item is repeatable
1483 if isinstance(item, _RepableItem):
1484 self._skip_ws_and_comments()
1485 rep_text_loc = self._text_loc
1486 rep_ret = self._try_parse_rep_post()
1487
1488 if rep_ret is not None:
1489 item = _Rep(item, *rep_ret, rep_text_loc)
1490
1491 items.append(item)
1492 return True
1493
1494 # Parses and returns items, skipping whitespaces, insignificant
1495 # symbols, and comments when allowed, and stopping at the first
1496 # unknown character.
1497 #
1498 # Accepts and registers macro definitions if `accept_macro_defs`
1499 # is `True`.
1500 def _parse_items(self, accept_macro_defs: bool = False) -> List[_Item]:
1501 items = [] # type: List[_Item]
1502
1503 while self._isnt_done():
1504 # Try to append item
1505 if not self._try_append_item(items):
1506 if accept_macro_defs and self._try_parse_macro_def():
1507 continue
1508
1509 # Unknown at this point
1510 break
1511
1512 return items
1513
1514 # Parses the whole Normand input, setting `self._res` to the main
1515 # group item on success.
1516 def _parse(self):
1517 if len(self._normand.strip()) == 0:
1518 # Special case to make sure there's something to consume
1519 self._res = _Group([], self._text_loc)
1520 return
1521
1522 # Parse first level items
1523 items = self._parse_items(True)
1524
1525 # Make sure there's nothing left
1526 self._skip_ws_and_comments()
1527
1528 if self._isnt_done():
1529 self._raise_error(
1530 "Unexpected character `{}`".format(self._normand[self._at])
1531 )
1532
1533 # Set main group item
1534 self._res = _Group(items, self._text_loc)
1535
1536
1537 # The return type of parse().
1538 class ParseResult:
1539 @classmethod
1540 def _create(
1541 cls,
1542 data: bytearray,
1543 variables: VariablesT,
1544 labels: LabelsT,
1545 offset: int,
1546 bo: Optional[ByteOrder],
1547 ):
1548 self = cls.__new__(cls)
1549 self._init(data, variables, labels, offset, bo)
1550 return self
1551
1552 def __init__(self, *args, **kwargs): # type: ignore
1553 raise NotImplementedError
1554
1555 def _init(
1556 self,
1557 data: bytearray,
1558 variables: VariablesT,
1559 labels: LabelsT,
1560 offset: int,
1561 bo: Optional[ByteOrder],
1562 ):
1563 self._data = data
1564 self._vars = variables
1565 self._labels = labels
1566 self._offset = offset
1567 self._bo = bo
1568
1569 # Generated data.
1570 @property
1571 def data(self):
1572 return self._data
1573
1574 # Dictionary of updated variable names to their last computed value.
1575 @property
1576 def variables(self):
1577 return self._vars
1578
1579 # Dictionary of updated main group label names to their computed
1580 # value.
1581 @property
1582 def labels(self):
1583 return self._labels
1584
1585 # Updated offset.
1586 @property
1587 def offset(self):
1588 return self._offset
1589
1590 # Updated byte order.
1591 @property
1592 def byte_order(self):
1593 return self._bo
1594
1595
1596 # Raises a parse error for the item `item`, creating it using the
1597 # message `msg`.
1598 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1599 _raise_error(msg, item.text_loc)
1600
1601
1602 # The `ICITTE` reserved name.
1603 _icitte_name = "ICITTE"
1604
1605
1606 # Base node visitor.
1607 #
1608 # Calls the _visit_name() method for each name node which isn't the name
1609 # of a call.
1610 class _NodeVisitor(ast.NodeVisitor):
1611 def __init__(self):
1612 self._parent_is_call = False
1613
1614 def generic_visit(self, node: ast.AST):
1615 if type(node) is ast.Call:
1616 self._parent_is_call = True
1617 elif type(node) is ast.Name and not self._parent_is_call:
1618 self._visit_name(node.id)
1619
1620 super().generic_visit(node)
1621 self._parent_is_call = False
1622
1623 @abc.abstractmethod
1624 def _visit_name(self, name: str):
1625 ...
1626
1627
1628 # Expression validator: validates that all the names within the
1629 # expression are allowed.
1630 class _ExprValidator(_NodeVisitor):
1631 def __init__(self, expr_str: str, text_loc: TextLocation, allowed_names: Set[str]):
1632 super().__init__()
1633 self._expr_str = expr_str
1634 self._text_loc = text_loc
1635 self._allowed_names = allowed_names
1636
1637 def _visit_name(self, name: str):
1638 # Make sure the name refers to a known and reachable
1639 # variable/label name.
1640 if name != _icitte_name and name not in self._allowed_names:
1641 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1642 name, self._expr_str
1643 )
1644
1645 allowed_names = self._allowed_names.copy()
1646 allowed_names.add(_icitte_name)
1647
1648 if len(allowed_names) > 0:
1649 allowed_names_str = ", ".join(
1650 sorted(["`{}`".format(name) for name in allowed_names])
1651 )
1652 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1653
1654 _raise_error(
1655 msg,
1656 self._text_loc,
1657 )
1658
1659
1660 # Generator state.
1661 class _GenState:
1662 def __init__(
1663 self,
1664 variables: VariablesT,
1665 labels: LabelsT,
1666 offset: int,
1667 bo: Optional[ByteOrder],
1668 ):
1669 self.variables = variables.copy()
1670 self.labels = labels.copy()
1671 self.offset = offset
1672 self.bo = bo
1673
1674 def __repr__(self):
1675 return "_GenState({}, {}, {}, {})".format(
1676 repr(self.variables), repr(self.labels), repr(self.offset), repr(self.bo)
1677 )
1678
1679
1680 # Fixed-length number item instance.
1681 class _FlNumItemInst:
1682 def __init__(self, item: _FlNum, offset_in_data: int, state: _GenState):
1683 self._item = item
1684 self._offset_in_data = offset_in_data
1685 self._state = state
1686
1687 @property
1688 def item(self):
1689 return self._item
1690
1691 @property
1692 def offset_in_data(self):
1693 return self._offset_in_data
1694
1695 @property
1696 def state(self):
1697 return self._state
1698
1699
1700 # Generator of data and final state from a group item.
1701 #
1702 # Generation happens in memory at construction time. After building, use
1703 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1704 # get the resulting context.
1705 #
1706 # The steps of generation are:
1707 #
1708 # 1. Handle each item in prefix order.
1709 #
1710 # The handlers append bytes to `self._data` and update some current
1711 # state object (`_GenState` instance).
1712 #
1713 # When handling a fixed-length number item, try to evaluate its
1714 # expression using the current state. If this fails, then it might be
1715 # because the expression refers to a "future" label: save the current
1716 # offset in `self._data` (generated data) and a snapshot of the
1717 # current state within `self._fl_num_item_insts` (`_FlNumItemInst`
1718 # object). _gen_fl_num_item_insts() will deal with this later.
1719 #
1720 # When handling the items of a group, keep a map of immediate label
1721 # names to their offset. Then, after having processed all the items,
1722 # update the relevant saved state snapshots in
1723 # `self._fl_num_item_insts` with those immediate label values.
1724 # _gen_fl_num_item_insts() will deal with this later.
1725 #
1726 # 2. Handle all the fixed-length number item instances of which the
1727 # expression evaluation failed before.
1728 #
1729 # At this point, `self._fl_num_item_insts` contains everything that's
1730 # needed to evaluate the expressions, including the values of
1731 # "future" labels from the point of view of some fixed-length number
1732 # item instance.
1733 #
1734 # If an evaluation fails at this point, then it's a user error.
1735 class _Gen:
1736 def __init__(
1737 self,
1738 group: _Group,
1739 macro_defs: _MacroDefsT,
1740 variables: VariablesT,
1741 labels: LabelsT,
1742 offset: int,
1743 bo: Optional[ByteOrder],
1744 ):
1745 self._macro_defs = macro_defs
1746 self._fl_num_item_insts = [] # type: List[_FlNumItemInst]
1747 self._gen(group, _GenState(variables, labels, offset, bo))
1748
1749 # Generated bytes.
1750 @property
1751 def data(self):
1752 return self._data
1753
1754 # Updated variables.
1755 @property
1756 def variables(self):
1757 return self._final_state.variables
1758
1759 # Updated main group labels.
1760 @property
1761 def labels(self):
1762 return self._final_state.labels
1763
1764 # Updated offset.
1765 @property
1766 def offset(self):
1767 return self._final_state.offset
1768
1769 # Updated byte order.
1770 @property
1771 def bo(self):
1772 return self._final_state.bo
1773
1774 # Evaluates the expression `expr` of which the original string is
1775 # `expr_str` at the location `text_loc` considering the current
1776 # generation state `state`.
1777 #
1778 # If `allow_float` is `True`, then the type of the result may be
1779 # `float` too.
1780 @staticmethod
1781 def _eval_expr(
1782 expr_str: str,
1783 expr: ast.Expression,
1784 text_loc: TextLocation,
1785 state: _GenState,
1786 allow_float: bool = False,
1787 ):
1788 syms = {} # type: VariablesT
1789 syms.update(state.labels)
1790
1791 # Set the `ICITTE` name to the current offset
1792 syms[_icitte_name] = state.offset
1793
1794 # Add the current variables
1795 syms.update(state.variables)
1796
1797 # Validate the node and its children
1798 _ExprValidator(expr_str, text_loc, set(syms.keys())).visit(expr)
1799
1800 # Compile and evaluate expression node
1801 try:
1802 val = eval(compile(expr, "", "eval"), None, syms)
1803 except Exception as exc:
1804 _raise_error(
1805 "Failed to evaluate expression `{}`: {}".format(expr_str, exc),
1806 text_loc,
1807 )
1808
1809 # Convert `bool` result type to `int` to normalize
1810 if type(val) is bool:
1811 val = int(val)
1812
1813 # Validate result type
1814 expected_types = {int} # type: Set[type]
1815 type_msg = "`int`"
1816
1817 if allow_float:
1818 expected_types.add(float)
1819 type_msg += " or `float`"
1820
1821 if type(val) not in expected_types:
1822 _raise_error(
1823 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
1824 expr_str, type_msg, type(val).__name__
1825 ),
1826 text_loc,
1827 )
1828
1829 return val
1830
1831 # Evaluates the expression of `item` considering the current
1832 # generation state `state`.
1833 #
1834 # If `allow_float` is `True`, then the type of the result may be
1835 # `float` too.
1836 @staticmethod
1837 def _eval_item_expr(
1838 item: Union[_FlNum, _Leb128Int, _VarAssign, _Rep, _Cond],
1839 state: _GenState,
1840 allow_float: bool = False,
1841 ):
1842 return _Gen._eval_expr(
1843 item.expr_str, item.expr, item.text_loc, state, allow_float
1844 )
1845
1846 # Handles the byte item `item`.
1847 def _handle_byte_item(self, item: _Byte, state: _GenState):
1848 self._data.append(item.val)
1849 state.offset += item.size
1850
1851 # Handles the string item `item`.
1852 def _handle_str_item(self, item: _Str, state: _GenState):
1853 self._data += item.data
1854 state.offset += item.size
1855
1856 # Handles the byte order setting item `item`.
1857 def _handle_set_bo_item(self, item: _SetBo, state: _GenState):
1858 # Update current byte order
1859 state.bo = item.bo
1860
1861 # Handles the variable assignment item `item`.
1862 def _handle_var_assign_item(self, item: _VarAssign, state: _GenState):
1863 # Update variable
1864 state.variables[item.name] = self._eval_item_expr(item, state, True)
1865
1866 # Handles the fixed-length number item `item`.
1867 def _handle_fl_num_item(self, item: _FlNum, state: _GenState):
1868 # Validate current byte order
1869 if state.bo is None and item.len > 8:
1870 _raise_error_for_item(
1871 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1872 item.expr_str
1873 ),
1874 item,
1875 )
1876
1877 # Try an immediate evaluation. If it fails, then keep everything
1878 # needed to (try to) generate the bytes of this item later.
1879 try:
1880 data = self._gen_fl_num_item_inst_data(item, state)
1881 except Exception:
1882 self._fl_num_item_insts.append(
1883 _FlNumItemInst(item, len(self._data), copy.deepcopy(state))
1884 )
1885
1886 # Reserve space in `self._data` for this instance
1887 data = bytes([0] * (item.len // 8))
1888
1889 # Append bytes
1890 self._data += data
1891
1892 # Update offset
1893 state.offset += len(data)
1894
1895 # Returns the size, in bytes, required to encode the value `val`
1896 # with LEB128 (signed version if `is_signed` is `True`).
1897 @staticmethod
1898 def _leb128_size_for_val(val: int, is_signed: bool):
1899 if val < 0:
1900 # Equivalent upper bound.
1901 #
1902 # For example, if `val` is -128, then the full integer for
1903 # this number of bits would be [-128, 127].
1904 val = -val - 1
1905
1906 # Number of bits (add one for the sign if needed)
1907 bits = val.bit_length() + int(is_signed)
1908
1909 if bits == 0:
1910 bits = 1
1911
1912 # Seven bits per byte
1913 return math.ceil(bits / 7)
1914
1915 # Handles the LEB128 integer item `item`.
1916 def _handle_leb128_int_item(self, item: _Leb128Int, state: _GenState):
1917 # Compute value
1918 val = self._eval_item_expr(item, state, False)
1919
1920 # Size in bytes
1921 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1922
1923 # For each byte
1924 for _ in range(size):
1925 # Seven LSBs, MSB of the byte set (continue)
1926 self._data.append((val & 0x7F) | 0x80)
1927 val >>= 7
1928
1929 # Clear MSB of last byte (stop)
1930 self._data[-1] &= ~0x80
1931
1932 # Update offset
1933 state.offset += size
1934
1935 # Handles the group item `item`, removing the immediate labels from
1936 # `state` at the end if `remove_immediate_labels` is `True`.
1937 def _handle_group_item(
1938 self, item: _Group, state: _GenState, remove_immediate_labels: bool = True
1939 ):
1940 first_fl_num_item_inst_index = len(self._fl_num_item_insts)
1941 immediate_labels = {} # type: LabelsT
1942
1943 # Handle each item
1944 for subitem in item.items:
1945 if type(subitem) is _Label:
1946 # Add to local immediate labels
1947 immediate_labels[subitem.name] = state.offset
1948
1949 self._handle_item(subitem, state)
1950
1951 # Remove immediate labels from current state if needed
1952 if remove_immediate_labels:
1953 for name in immediate_labels:
1954 del state.labels[name]
1955
1956 # Add all immediate labels to all state snapshots since
1957 # `first_fl_num_item_inst_index`.
1958 for inst in self._fl_num_item_insts[first_fl_num_item_inst_index:]:
1959 inst.state.labels.update(immediate_labels)
1960
1961 # Handles the repetition item `item`.
1962 def _handle_rep_item(self, item: _Rep, state: _GenState):
1963 # Compute the repetition count
1964 mul = _Gen._eval_item_expr(item, state)
1965
1966 # Validate result
1967 if mul < 0:
1968 _raise_error_for_item(
1969 "Invalid expression `{}`: unexpected negative result {:,}".format(
1970 item.expr_str, mul
1971 ),
1972 item,
1973 )
1974
1975 # Generate item data `mul` times
1976 for _ in range(mul):
1977 self._handle_item(item.item, state)
1978
1979 # Handles the conditional item `item`.
1980 def _handle_cond_item(self, item: _Rep, state: _GenState):
1981 # Compute the conditional value
1982 val = _Gen._eval_item_expr(item, state)
1983
1984 # Generate item data if needed
1985 if val:
1986 self._handle_item(item.item, state)
1987
1988 # Evaluates the parameters of the macro expansion item `item`
1989 # considering the initial state `init_state` and returns a new state
1990 # to handle the items of the macro.
1991 def _eval_macro_exp_params(self, item: _MacroExp, init_state: _GenState):
1992 # New state
1993 exp_state = _GenState({}, {}, init_state.offset, init_state.bo)
1994
1995 # Evaluate the parameter expressions
1996 macro_def = self._macro_defs[item.name]
1997
1998 for param_name, param in zip(macro_def.param_names, item.params):
1999 exp_state.variables[param_name] = _Gen._eval_expr(
2000 param.expr_str, param.expr, param.text_loc, init_state, True
2001 )
2002
2003 return exp_state
2004
2005 # Handles the macro expansion item `item`.
2006 def _handle_macro_exp_item(self, item: _MacroExp, state: _GenState):
2007 # New state
2008 exp_state = self._eval_macro_exp_params(item, state)
2009
2010 # Process the contained group
2011 init_data_size = len(self._data)
2012 self._handle_item(self._macro_defs[item.name].group, exp_state)
2013
2014 # Update state offset and return
2015 state.offset += len(self._data) - init_data_size
2016
2017 # Handles the offset setting item `item`.
2018 def _handle_set_offset_item(self, item: _SetOffset, state: _GenState):
2019 state.offset = item.val
2020
2021 # Handles offset alignment item `item` (adds padding).
2022 def _handle_align_offset_item(self, item: _AlignOffset, state: _GenState):
2023 init_offset = state.offset
2024 align_bytes = item.val // 8
2025 state.offset = (state.offset + align_bytes - 1) // align_bytes * align_bytes
2026 self._data += bytes([item.pad_val] * (state.offset - init_offset))
2027
2028 # Handles the label item `item`.
2029 def _handle_label_item(self, item: _Label, state: _GenState):
2030 state.labels[item.name] = state.offset
2031
2032 # Handles the item `item`, returning the updated next repetition
2033 # instance.
2034 def _handle_item(self, item: _Item, state: _GenState):
2035 return self._item_handlers[type(item)](item, state)
2036
2037 # Generates the data for a fixed-length integer item instance having
2038 # the value `val` and returns it.
2039 def _gen_fl_int_item_inst_data(self, val: int, item: _FlNum, state: _GenState):
2040 # Validate range
2041 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
2042 _raise_error_for_item(
2043 "Value {:,} is outside the {}-bit range when evaluating expression `{}`".format(
2044 val, item.len, item.expr_str
2045 ),
2046 item,
2047 )
2048
2049 # Encode result on 64 bits (to extend the sign bit whatever the
2050 # value of `item.len`).
2051 data = struct.pack(
2052 "{}{}".format(
2053 ">" if state.bo in (None, ByteOrder.BE) else "<",
2054 "Q" if val >= 0 else "q",
2055 ),
2056 val,
2057 )
2058
2059 # Keep only the requested length
2060 len_bytes = item.len // 8
2061
2062 if state.bo in (None, ByteOrder.BE):
2063 # Big endian: keep last bytes
2064 data = data[-len_bytes:]
2065 else:
2066 # Little endian: keep first bytes
2067 assert state.bo == ByteOrder.LE
2068 data = data[:len_bytes]
2069
2070 # Return data
2071 return data
2072
2073 # Generates the data for a fixed-length floating point number item
2074 # instance having the value `val` and returns it.
2075 def _gen_fl_float_item_inst_data(self, val: float, item: _FlNum, state: _GenState):
2076 # Validate length
2077 if item.len not in (32, 64):
2078 _raise_error_for_item(
2079 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
2080 item.len, val
2081 ),
2082 item,
2083 )
2084
2085 # Encode and return result
2086 return struct.pack(
2087 "{}{}".format(
2088 ">" if state.bo in (None, ByteOrder.BE) else "<",
2089 "f" if item.len == 32 else "d",
2090 ),
2091 val,
2092 )
2093
2094 # Generates the data for a fixed-length number item instance and
2095 # returns it.
2096 def _gen_fl_num_item_inst_data(self, item: _FlNum, state: _GenState):
2097 # Compute value
2098 val = self._eval_item_expr(item, state, True)
2099
2100 # Handle depending on type
2101 if type(val) is int:
2102 return self._gen_fl_int_item_inst_data(val, item, state)
2103 else:
2104 assert type(val) is float
2105 return self._gen_fl_float_item_inst_data(val, item, state)
2106
2107 # Generates the data for all the fixed-length number item instances
2108 # and writes it at the correct offset within `self._data`.
2109 def _gen_fl_num_item_insts(self):
2110 for inst in self._fl_num_item_insts:
2111 # Generate bytes
2112 data = self._gen_fl_num_item_inst_data(inst.item, inst.state)
2113
2114 # Insert bytes into `self._data`
2115 self._data[inst.offset_in_data : inst.offset_in_data + len(data)] = data
2116
2117 # Generates the data (`self._data`) and final state
2118 # (`self._final_state`) from `group` and the initial state `state`.
2119 def _gen(self, group: _Group, state: _GenState):
2120 # Initial state
2121 self._data = bytearray()
2122
2123 # Item handlers
2124 self._item_handlers = {
2125 _AlignOffset: self._handle_align_offset_item,
2126 _Byte: self._handle_byte_item,
2127 _Cond: self._handle_cond_item,
2128 _FlNum: self._handle_fl_num_item,
2129 _Group: self._handle_group_item,
2130 _Label: self._handle_label_item,
2131 _MacroExp: self._handle_macro_exp_item,
2132 _Rep: self._handle_rep_item,
2133 _SetBo: self._handle_set_bo_item,
2134 _SetOffset: self._handle_set_offset_item,
2135 _SLeb128Int: self._handle_leb128_int_item,
2136 _Str: self._handle_str_item,
2137 _ULeb128Int: self._handle_leb128_int_item,
2138 _VarAssign: self._handle_var_assign_item,
2139 } # type: Dict[type, Callable[[Any, _GenState], None]]
2140
2141 # Handle the group item, _not_ removing the immediate labels
2142 # because the `labels` property offers them.
2143 self._handle_group_item(group, state, False)
2144
2145 # This is actually the final state
2146 self._final_state = state
2147
2148 # Generate all the fixed-length number bytes now that we know
2149 # their full state
2150 self._gen_fl_num_item_insts()
2151
2152
2153 # Returns a `ParseResult` instance containing the bytes encoded by the
2154 # input string `normand`.
2155 #
2156 # `init_variables` is a dictionary of initial variable names (valid
2157 # Python names) to integral values. A variable name must not be the
2158 # reserved name `ICITTE`.
2159 #
2160 # `init_labels` is a dictionary of initial label names (valid Python
2161 # names) to integral values. A label name must not be the reserved name
2162 # `ICITTE`.
2163 #
2164 # `init_offset` is the initial offset.
2165 #
2166 # `init_byte_order` is the initial byte order.
2167 #
2168 # Raises `ParseError` on any parsing error.
2169 def parse(
2170 normand: str,
2171 init_variables: Optional[VariablesT] = None,
2172 init_labels: Optional[LabelsT] = None,
2173 init_offset: int = 0,
2174 init_byte_order: Optional[ByteOrder] = None,
2175 ):
2176 if init_variables is None:
2177 init_variables = {}
2178
2179 if init_labels is None:
2180 init_labels = {}
2181
2182 parser = _Parser(normand, init_variables, init_labels)
2183 gen = _Gen(
2184 parser.res,
2185 parser.macro_defs,
2186 init_variables,
2187 init_labels,
2188 init_offset,
2189 init_byte_order,
2190 )
2191 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2192 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2193 )
2194
2195
2196 # Parses the command-line arguments.
2197 def _parse_cli_args():
2198 import argparse
2199
2200 # Build parser
2201 ap = argparse.ArgumentParser()
2202 ap.add_argument(
2203 "--offset",
2204 metavar="OFFSET",
2205 action="store",
2206 type=int,
2207 default=0,
2208 help="initial offset (positive)",
2209 )
2210 ap.add_argument(
2211 "-b",
2212 "--byte-order",
2213 metavar="BO",
2214 choices=["be", "le"],
2215 type=str,
2216 help="initial byte order (`be` or `le`)",
2217 )
2218 ap.add_argument(
2219 "--var",
2220 metavar="NAME=VAL",
2221 action="append",
2222 help="add an initial variable (may be repeated)",
2223 )
2224 ap.add_argument(
2225 "-l",
2226 "--label",
2227 metavar="NAME=VAL",
2228 action="append",
2229 help="add an initial label (may be repeated)",
2230 )
2231 ap.add_argument(
2232 "--version", action="version", version="Normand {}".format(__version__)
2233 )
2234 ap.add_argument(
2235 "path",
2236 metavar="PATH",
2237 action="store",
2238 nargs="?",
2239 help="input path (none means standard input)",
2240 )
2241
2242 # Parse
2243 return ap.parse_args()
2244
2245
2246 # Raises a command-line error with the message `msg`.
2247 def _raise_cli_error(msg: str) -> NoReturn:
2248 raise RuntimeError("Command-line error: {}".format(msg))
2249
2250
2251 # Returns a dictionary of string to integers from the list of strings
2252 # `args` containing `NAME=VAL` entries.
2253 def _dict_from_arg(args: Optional[List[str]]):
2254 d = {} # type: LabelsT
2255
2256 if args is None:
2257 return d
2258
2259 for arg in args:
2260 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2261
2262 if m is None:
2263 _raise_cli_error("Invalid assignment {}".format(arg))
2264
2265 d[m.group(1)] = int(m.group(2))
2266
2267 return d
2268
2269
2270 # CLI entry point without exception handling.
2271 def _try_run_cli():
2272 import os.path
2273
2274 # Parse arguments
2275 args = _parse_cli_args()
2276
2277 # Read input
2278 if args.path is None:
2279 normand = sys.stdin.read()
2280 else:
2281 with open(args.path) as f:
2282 normand = f.read()
2283
2284 # Variables and labels
2285 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
2286 labels = _dict_from_arg(args.label)
2287
2288 # Validate offset
2289 if args.offset < 0:
2290 _raise_cli_error("Invalid negative offset {}")
2291
2292 # Validate and set byte order
2293 bo = None # type: Optional[ByteOrder]
2294
2295 if args.byte_order is not None:
2296 if args.byte_order == "be":
2297 bo = ByteOrder.BE
2298 else:
2299 assert args.byte_order == "le"
2300 bo = ByteOrder.LE
2301
2302 # Parse
2303 try:
2304 res = parse(normand, variables, labels, args.offset, bo)
2305 except ParseError as exc:
2306 prefix = ""
2307
2308 if args.path is not None:
2309 prefix = "{}:".format(os.path.abspath(args.path))
2310
2311 _fail(
2312 "{}{}:{} - {}".format(
2313 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
2314 )
2315 )
2316
2317 # Print
2318 sys.stdout.buffer.write(res.data)
2319
2320
2321 # Prints the exception message `msg` and exits with status 1.
2322 def _fail(msg: str) -> NoReturn:
2323 if not msg.endswith("."):
2324 msg += "."
2325
2326 print(msg, file=sys.stderr)
2327 sys.exit(1)
2328
2329
2330 # CLI entry point.
2331 def _run_cli():
2332 try:
2333 _try_run_cli()
2334 except Exception as exc:
2335 _fail(str(exc))
2336
2337
2338 if __name__ == "__main__":
2339 _run_cli()
This page took 0.077484 seconds and 4 git commands to generate.