b2f3069f8b8e55aee7f8a608741df75fc56b4660
[normand.git] / normand / normand.py
1 # The MIT License (MIT)
2 #
3 # Copyright (c) 2023 Philippe Proulx <eeppeliteloop@gmail.com>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24 # This module is the portable Normand processor. It offers both the
25 # parse() function and the command-line tool (run the module itself)
26 # without external dependencies except a `typing` module for Python 3.4.
27 #
28 # Feel free to copy this module file to your own project to use Normand.
29 #
30 # Upstream repository: <https://github.com/efficios/normand>.
31
32 __author__ = "Philippe Proulx"
33 __version__ = "0.9.0"
34 __all__ = [
35 "ByteOrder",
36 "parse",
37 "ParseError",
38 "ParseResult",
39 "TextLocation",
40 "LabelsT",
41 "VariablesT",
42 "__author__",
43 "__version__",
44 ]
45
46 import re
47 import abc
48 import ast
49 import sys
50 import enum
51 import math
52 import struct
53 import typing
54 from typing import Any, Set, Dict, List, Union, Pattern, Callable, NoReturn, Optional
55
56
57 # Text location (line and column numbers).
58 class TextLocation:
59 @classmethod
60 def _create(cls, line_no: int, col_no: int):
61 self = cls.__new__(cls)
62 self._init(line_no, col_no)
63 return self
64
65 def __init__(*args, **kwargs): # type: ignore
66 raise NotImplementedError
67
68 def _init(self, line_no: int, col_no: int):
69 self._line_no = line_no
70 self._col_no = col_no
71
72 # Line number.
73 @property
74 def line_no(self):
75 return self._line_no
76
77 # Column number.
78 @property
79 def col_no(self):
80 return self._col_no
81
82 def __repr__(self):
83 return "TextLocation({}, {})".format(self._line_no, self._col_no)
84
85
86 # Any item.
87 class _Item:
88 def __init__(self, text_loc: TextLocation):
89 self._text_loc = text_loc
90
91 # Source text location.
92 @property
93 def text_loc(self):
94 return self._text_loc
95
96
97 # Scalar item.
98 class _ScalarItem(_Item):
99 # Returns the size, in bytes, of this item.
100 @property
101 @abc.abstractmethod
102 def size(self) -> int:
103 ...
104
105
106 # A repeatable item.
107 class _RepableItem:
108 pass
109
110
111 # Single byte.
112 class _Byte(_ScalarItem, _RepableItem):
113 def __init__(self, val: int, text_loc: TextLocation):
114 super().__init__(text_loc)
115 self._val = val
116
117 # Byte value.
118 @property
119 def val(self):
120 return self._val
121
122 @property
123 def size(self):
124 return 1
125
126 def __repr__(self):
127 return "_Byte({}, {})".format(hex(self._val), repr(self._text_loc))
128
129
130 # String.
131 class _Str(_ScalarItem, _RepableItem):
132 def __init__(self, data: bytes, text_loc: TextLocation):
133 super().__init__(text_loc)
134 self._data = data
135
136 # Encoded bytes.
137 @property
138 def data(self):
139 return self._data
140
141 @property
142 def size(self):
143 return len(self._data)
144
145 def __repr__(self):
146 return "_Str({}, {})".format(repr(self._data), repr(self._text_loc))
147
148
149 # Byte order.
150 @enum.unique
151 class ByteOrder(enum.Enum):
152 # Big endian.
153 BE = "be"
154
155 # Little endian.
156 LE = "le"
157
158
159 # Byte order setting.
160 class _SetBo(_Item):
161 def __init__(self, bo: ByteOrder, text_loc: TextLocation):
162 super().__init__(text_loc)
163 self._bo = bo
164
165 @property
166 def bo(self):
167 return self._bo
168
169 def __repr__(self):
170 return "_SetBo({}, {})".format(repr(self._bo), repr(self._text_loc))
171
172
173 # Label.
174 class _Label(_Item):
175 def __init__(self, name: str, text_loc: TextLocation):
176 super().__init__(text_loc)
177 self._name = name
178
179 # Label name.
180 @property
181 def name(self):
182 return self._name
183
184 def __repr__(self):
185 return "_Label({}, {})".format(repr(self._name), repr(self._text_loc))
186
187
188 # Offset setting.
189 class _SetOffset(_Item):
190 def __init__(self, val: int, text_loc: TextLocation):
191 super().__init__(text_loc)
192 self._val = val
193
194 # Offset value (bytes).
195 @property
196 def val(self):
197 return self._val
198
199 def __repr__(self):
200 return "_SetOffset({}, {})".format(repr(self._val), repr(self._text_loc))
201
202
203 # Offset alignment.
204 class _AlignOffset(_Item):
205 def __init__(self, val: int, pad_val: int, text_loc: TextLocation):
206 super().__init__(text_loc)
207 self._val = val
208 self._pad_val = pad_val
209
210 # Alignment value (bits).
211 @property
212 def val(self):
213 return self._val
214
215 # Padding byte value.
216 @property
217 def pad_val(self):
218 return self._pad_val
219
220 def __repr__(self):
221 return "_AlignOffset({}, {}, {})".format(
222 repr(self._val), repr(self._pad_val), repr(self._text_loc)
223 )
224
225
226 # Mixin of containing an AST expression and its string.
227 class _ExprMixin:
228 def __init__(self, expr_str: str, expr: ast.Expression):
229 self._expr_str = expr_str
230 self._expr = expr
231
232 # Expression string.
233 @property
234 def expr_str(self):
235 return self._expr_str
236
237 # Expression node to evaluate.
238 @property
239 def expr(self):
240 return self._expr
241
242
243 # Variable assignment.
244 class _VarAssign(_Item, _ExprMixin):
245 def __init__(
246 self, name: str, expr_str: str, expr: ast.Expression, text_loc: TextLocation
247 ):
248 super().__init__(text_loc)
249 _ExprMixin.__init__(self, expr_str, expr)
250 self._name = name
251
252 # Name.
253 @property
254 def name(self):
255 return self._name
256
257 def __repr__(self):
258 return "_VarAssign({}, {}, {}, {})".format(
259 repr(self._name),
260 repr(self._expr_str),
261 repr(self._expr),
262 repr(self._text_loc),
263 )
264
265
266 # Fixed-length number, possibly needing more than one byte.
267 class _FlNum(_ScalarItem, _RepableItem, _ExprMixin):
268 def __init__(
269 self, expr_str: str, expr: ast.Expression, len: int, text_loc: TextLocation
270 ):
271 super().__init__(text_loc)
272 _ExprMixin.__init__(self, expr_str, expr)
273 self._len = len
274
275 # Length (bits).
276 @property
277 def len(self):
278 return self._len
279
280 @property
281 def size(self):
282 return self._len // 8
283
284 def __repr__(self):
285 return "_FlNum({}, {}, {}, {})".format(
286 repr(self._expr_str),
287 repr(self._expr),
288 repr(self._len),
289 repr(self._text_loc),
290 )
291
292
293 # LEB128 integer.
294 class _Leb128Int(_Item, _RepableItem, _ExprMixin):
295 def __init__(self, expr_str: str, expr: ast.Expression, text_loc: TextLocation):
296 super().__init__(text_loc)
297 _ExprMixin.__init__(self, expr_str, expr)
298
299 def __repr__(self):
300 return "{}({}, {}, {})".format(
301 self.__class__.__name__,
302 repr(self._expr_str),
303 repr(self._expr),
304 repr(self._text_loc),
305 )
306
307
308 # Unsigned LEB128 integer.
309 class _ULeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
310 pass
311
312
313 # Signed LEB128 integer.
314 class _SLeb128Int(_Leb128Int, _RepableItem, _ExprMixin):
315 pass
316
317
318 # Group of items.
319 class _Group(_Item, _RepableItem):
320 def __init__(self, items: List[_Item], text_loc: TextLocation):
321 super().__init__(text_loc)
322 self._items = items
323
324 # Contained items.
325 @property
326 def items(self):
327 return self._items
328
329 def __repr__(self):
330 return "_Group({}, {})".format(repr(self._items), repr(self._text_loc))
331
332
333 # Repetition item.
334 class _Rep(_Item, _ExprMixin):
335 def __init__(
336 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
337 ):
338 super().__init__(text_loc)
339 _ExprMixin.__init__(self, expr_str, expr)
340 self._item = item
341
342 # Item to repeat.
343 @property
344 def item(self):
345 return self._item
346
347 def __repr__(self):
348 return "_Rep({}, {}, {}, {})".format(
349 repr(self._item),
350 repr(self._expr_str),
351 repr(self._expr),
352 repr(self._text_loc),
353 )
354
355
356 # Conditional item.
357 class _Cond(_Item, _ExprMixin):
358 def __init__(
359 self, item: _Item, expr_str: str, expr: ast.Expression, text_loc: TextLocation
360 ):
361 super().__init__(text_loc)
362 _ExprMixin.__init__(self, expr_str, expr)
363 self._item = item
364
365 # Conditional item.
366 @property
367 def item(self):
368 return self._item
369
370 def __repr__(self):
371 return "_Cond({}, {}, {}, {})".format(
372 repr(self._item),
373 repr(self._expr_str),
374 repr(self._expr),
375 repr(self._text_loc),
376 )
377
378
379 # Expression item type.
380 _ExprItemT = Union[_FlNum, _Leb128Int, _VarAssign, _Rep, _Cond]
381
382
383 # A parsing error containing a message and a text location.
384 class ParseError(RuntimeError):
385 @classmethod
386 def _create(cls, msg: str, text_loc: TextLocation):
387 self = cls.__new__(cls)
388 self._init(msg, text_loc)
389 return self
390
391 def __init__(self, *args, **kwargs): # type: ignore
392 raise NotImplementedError
393
394 def _init(self, msg: str, text_loc: TextLocation):
395 super().__init__(msg)
396 self._text_loc = text_loc
397
398 # Source text location.
399 @property
400 def text_loc(self):
401 return self._text_loc
402
403
404 # Raises a parsing error, forwarding the parameters to the constructor.
405 def _raise_error(msg: str, text_loc: TextLocation) -> NoReturn:
406 raise ParseError._create(msg, text_loc) # pyright: ignore[reportPrivateUsage]
407
408
409 # Variables dictionary type (for type hints).
410 VariablesT = Dict[str, Union[int, float]]
411
412
413 # Labels dictionary type (for type hints).
414 LabelsT = Dict[str, int]
415
416
417 # Python name pattern.
418 _py_name_pat = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*")
419
420
421 # Normand parser.
422 #
423 # The constructor accepts a Normand input. After building, use the `res`
424 # property to get the resulting main group.
425 class _Parser:
426 # Builds a parser to parse the Normand input `normand`, parsing
427 # immediately.
428 def __init__(self, normand: str, variables: VariablesT, labels: LabelsT):
429 self._normand = normand
430 self._at = 0
431 self._line_no = 1
432 self._col_no = 1
433 self._label_names = set(labels.keys())
434 self._var_names = set(variables.keys())
435 self._parse()
436
437 # Result (main group).
438 @property
439 def res(self):
440 return self._res
441
442 # Current text location.
443 @property
444 def _text_loc(self):
445 return TextLocation._create( # pyright: ignore[reportPrivateUsage]
446 self._line_no, self._col_no
447 )
448
449 # Returns `True` if this parser is done parsing.
450 def _is_done(self):
451 return self._at == len(self._normand)
452
453 # Returns `True` if this parser isn't done parsing.
454 def _isnt_done(self):
455 return not self._is_done()
456
457 # Raises a parse error, creating it using the message `msg` and the
458 # current text location.
459 def _raise_error(self, msg: str) -> NoReturn:
460 _raise_error(msg, self._text_loc)
461
462 # Tries to make the pattern `pat` match the current substring,
463 # returning the match object and updating `self._at`,
464 # `self._line_no`, and `self._col_no` on success.
465 def _try_parse_pat(self, pat: Pattern[str]):
466 m = pat.match(self._normand, self._at)
467
468 if m is None:
469 return
470
471 # Skip matched string
472 self._at += len(m.group(0))
473
474 # Update line number
475 self._line_no += m.group(0).count("\n")
476
477 # Update column number
478 for i in reversed(range(self._at)):
479 if self._normand[i] == "\n" or i == 0:
480 if i == 0:
481 self._col_no = self._at + 1
482 else:
483 self._col_no = self._at - i
484
485 break
486
487 # Return match object
488 return m
489
490 # Expects the pattern `pat` to match the current substring,
491 # returning the match object and updating `self._at`,
492 # `self._line_no`, and `self._col_no` on success, or raising a parse
493 # error with the message `error_msg` on error.
494 def _expect_pat(self, pat: Pattern[str], error_msg: str):
495 # Match
496 m = self._try_parse_pat(pat)
497
498 if m is None:
499 # No match: error
500 self._raise_error(error_msg)
501
502 # Return match object
503 return m
504
505 # Pattern for _skip_ws_and_comments()
506 _ws_or_syms_or_comments_pat = re.compile(
507 r"(?:[\s/\\?&:;.,+[\]_=|-]|#[^#]*?(?:\n|#))*"
508 )
509
510 # Skips as many whitespaces, insignificant symbol characters, and
511 # comments as possible.
512 def _skip_ws_and_comments(self):
513 self._try_parse_pat(self._ws_or_syms_or_comments_pat)
514
515 # Pattern for _try_parse_hex_byte()
516 _nibble_pat = re.compile(r"[A-Fa-f0-9]")
517
518 # Tries to parse a hexadecimal byte, returning a byte item on
519 # success.
520 def _try_parse_hex_byte(self):
521 begin_text_loc = self._text_loc
522
523 # Match initial nibble
524 m_high = self._try_parse_pat(self._nibble_pat)
525
526 if m_high is None:
527 # No match
528 return
529
530 # Expect another nibble
531 self._skip_ws_and_comments()
532 m_low = self._expect_pat(
533 self._nibble_pat, "Expecting another hexadecimal nibble"
534 )
535
536 # Return item
537 return _Byte(int(m_high.group(0) + m_low.group(0), 16), begin_text_loc)
538
539 # Patterns for _try_parse_bin_byte()
540 _bin_byte_bit_pat = re.compile(r"[01]")
541 _bin_byte_prefix_pat = re.compile(r"%")
542
543 # Tries to parse a binary byte, returning a byte item on success.
544 def _try_parse_bin_byte(self):
545 begin_text_loc = self._text_loc
546
547 # Match prefix
548 if self._try_parse_pat(self._bin_byte_prefix_pat) is None:
549 # No match
550 return
551
552 # Expect eight bits
553 bits = [] # type: List[str]
554
555 for _ in range(8):
556 self._skip_ws_and_comments()
557 m = self._expect_pat(self._bin_byte_bit_pat, "Expecting a bit (`0` or `1`)")
558 bits.append(m.group(0))
559
560 # Return item
561 return _Byte(int("".join(bits), 2), begin_text_loc)
562
563 # Patterns for _try_parse_dec_byte()
564 _dec_byte_prefix_pat = re.compile(r"\$\s*")
565 _dec_byte_val_pat = re.compile(r"(?P<neg>-?)(?P<val>\d+)")
566
567 # Tries to parse a decimal byte, returning a byte item on success.
568 def _try_parse_dec_byte(self):
569 begin_text_loc = self._text_loc
570
571 # Match prefix
572 if self._try_parse_pat(self._dec_byte_prefix_pat) is None:
573 # No match
574 return
575
576 # Expect the value
577 m = self._expect_pat(self._dec_byte_val_pat, "Expecting a decimal constant")
578
579 # Compute value
580 val = int(m.group("val")) * (-1 if m.group("neg") == "-" else 1)
581
582 # Validate
583 if val < -128 or val > 255:
584 _raise_error("Invalid decimal byte value {}".format(val), begin_text_loc)
585
586 # Two's complement
587 val %= 256
588
589 # Return item
590 return _Byte(val, begin_text_loc)
591
592 # Tries to parse a byte, returning a byte item on success.
593 def _try_parse_byte(self):
594 # Hexadecimal
595 item = self._try_parse_hex_byte()
596
597 if item is not None:
598 return item
599
600 # Binary
601 item = self._try_parse_bin_byte()
602
603 if item is not None:
604 return item
605
606 # Decimal
607 item = self._try_parse_dec_byte()
608
609 if item is not None:
610 return item
611
612 # Patterns for _try_parse_str()
613 _str_prefix_pat = re.compile(r'(?:u(?P<len>16|32)(?P<bo>be|le))?\s*"')
614 _str_suffix_pat = re.compile(r'"')
615 _str_str_pat = re.compile(r'(?:(?:\\.)|[^"])*')
616
617 # Strings corresponding to escape sequence characters
618 _str_escape_seq_strs = {
619 "0": "\0",
620 "a": "\a",
621 "b": "\b",
622 "e": "\x1b",
623 "f": "\f",
624 "n": "\n",
625 "r": "\r",
626 "t": "\t",
627 "v": "\v",
628 "\\": "\\",
629 '"': '"',
630 }
631
632 # Tries to parse a string, returning a string item on success.
633 def _try_parse_str(self):
634 begin_text_loc = self._text_loc
635
636 # Match prefix
637 m = self._try_parse_pat(self._str_prefix_pat)
638
639 if m is None:
640 # No match
641 return
642
643 # Get encoding
644 encoding = "utf8"
645
646 if m.group("len") is not None:
647 encoding = "utf_{}_{}".format(m.group("len"), m.group("bo"))
648
649 # Actual string
650 m = self._expect_pat(self._str_str_pat, "Expecting a literal string")
651
652 # Expect end of string
653 self._expect_pat(self._str_suffix_pat, 'Expecting `"` (end of literal string)')
654
655 # Replace escape sequences
656 val = m.group(0)
657
658 for ec in '0abefnrtv"\\':
659 val = val.replace(r"\{}".format(ec), self._str_escape_seq_strs[ec])
660
661 # Encode
662 data = val.encode(encoding)
663
664 # Return item
665 return _Str(data, begin_text_loc)
666
667 # Patterns for _try_parse_group()
668 _group_prefix_pat = re.compile(r"\(")
669 _group_suffix_pat = re.compile(r"\)")
670
671 # Tries to parse a group, returning a group item on success.
672 def _try_parse_group(self):
673 begin_text_loc = self._text_loc
674
675 # Match prefix
676 if self._try_parse_pat(self._group_prefix_pat) is None:
677 # No match
678 return
679
680 # Parse items
681 items = self._parse_items()
682
683 # Expect end of group
684 self._skip_ws_and_comments()
685 self._expect_pat(
686 self._group_suffix_pat, "Expecting an item or `)` (end of group)"
687 )
688
689 # Return item
690 return _Group(items, begin_text_loc)
691
692 # Returns a stripped expression string and an AST expression node
693 # from the expression string `expr_str` at text location `text_loc`.
694 def _ast_expr_from_str(self, expr_str: str, text_loc: TextLocation):
695 # Create an expression node from the expression string
696 expr_str = expr_str.strip().replace("\n", " ")
697
698 try:
699 expr = ast.parse(expr_str, mode="eval")
700 except SyntaxError:
701 _raise_error(
702 "Invalid expression `{}`: invalid syntax".format(expr_str),
703 text_loc,
704 )
705
706 return expr_str, expr
707
708 # Patterns for _try_parse_num_and_attr()
709 _val_expr_pat = re.compile(r"([^}:]+):\s*")
710 _fl_num_len_attr_pat = re.compile(r"8|16|24|32|40|48|56|64")
711 _leb128_int_attr_pat = re.compile(r"(u|s)leb128")
712
713 # Tries to parse a value and attribute (fixed length in bits or
714 # `leb128`), returning a value item on success.
715 def _try_parse_num_and_attr(self):
716 begin_text_loc = self._text_loc
717
718 # Match
719 m_expr = self._try_parse_pat(self._val_expr_pat)
720
721 if m_expr is None:
722 # No match
723 return
724
725 # Create an expression node from the expression string
726 expr_str, expr = self._ast_expr_from_str(m_expr.group(1), begin_text_loc)
727
728 # Length?
729 m_attr = self._try_parse_pat(self._fl_num_len_attr_pat)
730
731 if m_attr is None:
732 # LEB128?
733 m_attr = self._try_parse_pat(self._leb128_int_attr_pat)
734
735 if m_attr is None:
736 # At this point it's invalid
737 self._raise_error(
738 "Expecting a length (multiple of eight bits), `uleb128`, or `sleb128`"
739 )
740
741 # Return LEB128 integer item
742 cls = _ULeb128Int if m_attr.group(1) == "u" else _SLeb128Int
743 return cls(expr_str, expr, begin_text_loc)
744 else:
745 # Return fixed-length number item
746 return _FlNum(
747 expr_str,
748 expr,
749 int(m_attr.group(0)),
750 begin_text_loc,
751 )
752
753 # Patterns for _try_parse_num_and_attr()
754 _var_assign_pat = re.compile(
755 r"(?P<name>{})\s*=\s*(?P<expr>[^}}]+)".format(_py_name_pat.pattern)
756 )
757
758 # Tries to parse a variable assignment, returning a variable
759 # assignment item on success.
760 def _try_parse_var_assign(self):
761 begin_text_loc = self._text_loc
762
763 # Match
764 m = self._try_parse_pat(self._var_assign_pat)
765
766 if m is None:
767 # No match
768 return
769
770 # Validate name
771 name = m.group("name")
772
773 if name == _icitte_name:
774 _raise_error(
775 "`{}` is a reserved variable name".format(_icitte_name), begin_text_loc
776 )
777
778 if name in self._label_names:
779 _raise_error("Existing label named `{}`".format(name), begin_text_loc)
780
781 # Add to known variable names
782 self._var_names.add(name)
783
784 # Create an expression node from the expression string
785 expr_str, expr = self._ast_expr_from_str(m.group("expr"), begin_text_loc)
786
787 # Return item
788 return _VarAssign(
789 name,
790 expr_str,
791 expr,
792 begin_text_loc,
793 )
794
795 # Pattern for _try_parse_set_bo()
796 _bo_pat = re.compile(r"[bl]e")
797
798 # Tries to parse a byte order name, returning a byte order setting
799 # item on success.
800 def _try_parse_set_bo(self):
801 begin_text_loc = self._text_loc
802
803 # Match
804 m = self._try_parse_pat(self._bo_pat)
805
806 if m is None:
807 # No match
808 return
809
810 # Return corresponding item
811 if m.group(0) == "be":
812 return _SetBo(ByteOrder.BE, begin_text_loc)
813 else:
814 assert m.group(0) == "le"
815 return _SetBo(ByteOrder.LE, begin_text_loc)
816
817 # Patterns for _try_parse_val_or_bo()
818 _val_var_assign_set_bo_prefix_pat = re.compile(r"\{\s*")
819 _val_var_assign_set_bo_suffix_pat = re.compile(r"\s*}")
820
821 # Tries to parse a value, a variable assignment, or a byte order
822 # setting, returning an item on success.
823 def _try_parse_val_or_var_assign_or_set_bo(self):
824 # Match prefix
825 if self._try_parse_pat(self._val_var_assign_set_bo_prefix_pat) is None:
826 # No match
827 return
828
829 # Variable assignment item?
830 item = self._try_parse_var_assign()
831
832 if item is None:
833 # Number item?
834 item = self._try_parse_num_and_attr()
835
836 if item is None:
837 # Byte order setting item?
838 item = self._try_parse_set_bo()
839
840 if item is None:
841 # At this point it's invalid
842 self._raise_error(
843 "Expecting a fixed-length number, a variable assignment, or a byte order setting"
844 )
845
846 # Expect suffix
847 self._expect_pat(self._val_var_assign_set_bo_suffix_pat, "Expecting `}`")
848 return item
849
850 # Common positive constant integer pattern
851 _pos_const_int_pat = re.compile(r"0[Xx][A-Fa-f0-9]+|\d+")
852
853 # Tries to parse an offset setting value (after the initial `<`),
854 # returning an offset item on success.
855 def _try_parse_set_offset_val(self):
856 begin_text_loc = self._text_loc
857
858 # Match
859 m = self._try_parse_pat(self._pos_const_int_pat)
860
861 if m is None:
862 # No match
863 return
864
865 # Return item
866 return _SetOffset(int(m.group(0), 0), begin_text_loc)
867
868 # Tries to parse a label name (after the initial `<`), returning a
869 # label item on success.
870 def _try_parse_label_name(self):
871 begin_text_loc = self._text_loc
872
873 # Match
874 m = self._try_parse_pat(_py_name_pat)
875
876 if m is None:
877 # No match
878 return
879
880 # Validate
881 name = m.group(0)
882
883 if name == _icitte_name:
884 _raise_error(
885 "`{}` is a reserved label name".format(_icitte_name), begin_text_loc
886 )
887
888 if name in self._label_names:
889 _raise_error("Duplicate label name `{}`".format(name), begin_text_loc)
890
891 if name in self._var_names:
892 _raise_error("Existing variable named `{}`".format(name), begin_text_loc)
893
894 # Add to known label names
895 self._label_names.add(name)
896
897 # Return item
898 return _Label(name, begin_text_loc)
899
900 # Patterns for _try_parse_label_or_set_offset()
901 _label_set_offset_prefix_pat = re.compile(r"<\s*")
902 _label_set_offset_suffix_pat = re.compile(r"\s*>")
903
904 # Tries to parse a label or an offset setting, returning an item on
905 # success.
906 def _try_parse_label_or_set_offset(self):
907 # Match prefix
908 if self._try_parse_pat(self._label_set_offset_prefix_pat) is None:
909 # No match
910 return
911
912 # Offset setting item?
913 item = self._try_parse_set_offset_val()
914
915 if item is None:
916 # Label item?
917 item = self._try_parse_label_name()
918
919 if item is None:
920 # At this point it's invalid
921 self._raise_error("Expecting a label name or an offset setting value")
922
923 # Expect suffix
924 self._expect_pat(self._label_set_offset_suffix_pat, "Expecting `>`")
925 return item
926
927 # Patterns for _try_parse_align_offset()
928 _align_offset_prefix_pat = re.compile(r"@\s*")
929 _align_offset_val_pat = re.compile(r"(\d+)\s*")
930 _align_offset_pad_val_prefix_pat = re.compile(r"~\s*")
931
932 # Tries to parse an offset alignment, returning an offset alignment
933 # item on success.
934 def _try_parse_align_offset(self):
935 begin_text_loc = self._text_loc
936
937 # Match prefix
938 if self._try_parse_pat(self._align_offset_prefix_pat) is None:
939 # No match
940 return
941
942 align_text_loc = self._text_loc
943 m = self._expect_pat(
944 self._align_offset_val_pat,
945 "Expecting an alignment (positive multiple of eight bits)",
946 )
947
948 # Validate alignment
949 val = int(m.group(1))
950
951 if val <= 0 or (val % 8) != 0:
952 _raise_error(
953 "Invalid alignment value {} (not a positive multiple of eight)".format(
954 val
955 ),
956 align_text_loc,
957 )
958
959 # Padding value?
960 pad_val = 0
961
962 if self._try_parse_pat(self._align_offset_pad_val_prefix_pat) is not None:
963 pad_val_text_loc = self._text_loc
964 m = self._expect_pat(self._pos_const_int_pat, "Expecting a byte value")
965
966 # Validate
967 pad_val = int(m.group(0), 0)
968
969 if pad_val > 255:
970 _raise_error(
971 "Invalid padding byte value {}".format(pad_val),
972 pad_val_text_loc,
973 )
974
975 # Return item
976 return _AlignOffset(val, pad_val, begin_text_loc)
977
978 # Patterns for _expect_rep_mul_expr()
979 _rep_cond_expr_prefix_pat = re.compile(r"\{")
980 _rep_cond_expr_pat = re.compile(r"[^}]+")
981 _rep_cond_expr_suffix_pat = re.compile(r"\}")
982
983 # Parses the expression of a conditional block or of a repetition
984 # (block or post-item) and returns the expression string and AST
985 # node.
986 def _expect_rep_cond_expr(self, accept_int: bool):
987 expr_text_loc = self._text_loc
988
989 # Constant integer?
990 m = None
991
992 if accept_int:
993 m = self._try_parse_pat(self._pos_const_int_pat)
994
995 if m is None:
996 # Name?
997 m = self._try_parse_pat(_py_name_pat)
998
999 if m is None:
1000 # Expression?
1001 if self._try_parse_pat(self._rep_cond_expr_prefix_pat) is None:
1002 if accept_int:
1003 mid_msg = "a positive constant integer, a name, or `{`"
1004 else:
1005 mid_msg = "a name or `{`"
1006
1007 # At this point it's invalid
1008 self._raise_error("Expecting {}".format(mid_msg))
1009
1010 # Expect an expression
1011 expr_text_loc = self._text_loc
1012 m = self._expect_pat(self._rep_cond_expr_pat, "Expecting an expression")
1013 expr_str = m.group(0)
1014
1015 # Expect `}`
1016 self._expect_pat(self._rep_cond_expr_suffix_pat, "Expecting `}`")
1017 else:
1018 expr_str = m.group(0)
1019 else:
1020 expr_str = m.group(0)
1021
1022 return self._ast_expr_from_str(expr_str, expr_text_loc)
1023
1024 # Parses the multiplier expression of a repetition (block or
1025 # post-item) and returns the expression string and AST node.
1026 def _expect_rep_mul_expr(self):
1027 return self._expect_rep_cond_expr(True)
1028
1029 # Common block end pattern
1030 _block_end_pat = re.compile(r"!end\b\s*")
1031
1032 # Pattern for _try_parse_rep_block()
1033 _rep_block_prefix_pat = re.compile(r"!r(?:epeat)?\b\s*")
1034
1035 # Tries to parse a repetition block, returning a repetition item on
1036 # success.
1037 def _try_parse_rep_block(self):
1038 begin_text_loc = self._text_loc
1039
1040 # Match prefix
1041 if self._try_parse_pat(self._rep_block_prefix_pat) is None:
1042 # No match
1043 return
1044
1045 # Expect expression
1046 self._skip_ws_and_comments()
1047 expr_str, expr = self._expect_rep_mul_expr()
1048
1049 # Parse items
1050 self._skip_ws_and_comments()
1051 items_text_loc = self._text_loc
1052 items = self._parse_items()
1053
1054 # Expect end of block
1055 self._skip_ws_and_comments()
1056 self._expect_pat(
1057 self._block_end_pat, "Expecting an item or `!end` (end of repetition block)"
1058 )
1059
1060 # Return item
1061 return _Rep(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1062
1063 # Pattern for _try_parse_cond_block()
1064 _cond_block_prefix_pat = re.compile(r"!if\b\s*")
1065
1066 # Tries to parse a conditional block, returning a conditional item
1067 # on success.
1068 def _try_parse_cond_block(self):
1069 begin_text_loc = self._text_loc
1070
1071 # Match prefix
1072 if self._try_parse_pat(self._cond_block_prefix_pat) is None:
1073 # No match
1074 return
1075
1076 # Expect expression
1077 self._skip_ws_and_comments()
1078 expr_str, expr = self._expect_rep_cond_expr(False)
1079
1080 # Parse items
1081 self._skip_ws_and_comments()
1082 items_text_loc = self._text_loc
1083 items = self._parse_items()
1084
1085 # Expect end of block
1086 self._skip_ws_and_comments()
1087 self._expect_pat(
1088 self._block_end_pat,
1089 "Expecting an item or `!end` (end of conditional block)",
1090 )
1091
1092 # Return item
1093 return _Cond(_Group(items, items_text_loc), expr_str, expr, begin_text_loc)
1094
1095 # Tries to parse a base item (anything except a repetition),
1096 # returning it on success.
1097 def _try_parse_base_item(self):
1098 # Byte item?
1099 item = self._try_parse_byte()
1100
1101 if item is not None:
1102 return item
1103
1104 # String item?
1105 item = self._try_parse_str()
1106
1107 if item is not None:
1108 return item
1109
1110 # Value, variable assignment, or byte order setting item?
1111 item = self._try_parse_val_or_var_assign_or_set_bo()
1112
1113 if item is not None:
1114 return item
1115
1116 # Label or offset setting item?
1117 item = self._try_parse_label_or_set_offset()
1118
1119 if item is not None:
1120 return item
1121
1122 # Offset alignment item?
1123 item = self._try_parse_align_offset()
1124
1125 if item is not None:
1126 return item
1127
1128 # Group item?
1129 item = self._try_parse_group()
1130
1131 if item is not None:
1132 return item
1133
1134 # Repetition (block) item?
1135 item = self._try_parse_rep_block()
1136
1137 if item is not None:
1138 return item
1139
1140 # Conditional block item?
1141 item = self._try_parse_cond_block()
1142
1143 if item is not None:
1144 return item
1145
1146 # Pattern for _try_parse_rep_post()
1147 _rep_post_prefix_pat = re.compile(r"\*")
1148
1149 # Tries to parse a post-item repetition, returning the expression
1150 # string and AST expression node on success.
1151 def _try_parse_rep_post(self):
1152 # Match prefix
1153 if self._try_parse_pat(self._rep_post_prefix_pat) is None:
1154 # No match
1155 return
1156
1157 # Return expression string and AST expression
1158 self._skip_ws_and_comments()
1159 return self._expect_rep_mul_expr()
1160
1161 # Tries to parse an item, possibly followed by a repetition,
1162 # returning `True` on success.
1163 #
1164 # Appends any parsed item to `items`.
1165 def _try_append_item(self, items: List[_Item]):
1166 self._skip_ws_and_comments()
1167
1168 # Parse a base item
1169 item = self._try_parse_base_item()
1170
1171 if item is None:
1172 # No item
1173 return False
1174
1175 # Parse repetition if the base item is repeatable
1176 if isinstance(item, _RepableItem):
1177 self._skip_ws_and_comments()
1178 rep_text_loc = self._text_loc
1179 rep_ret = self._try_parse_rep_post()
1180
1181 if rep_ret is not None:
1182 item = _Rep(item, rep_ret[0], rep_ret[1], rep_text_loc)
1183
1184 items.append(item)
1185 return True
1186
1187 # Parses and returns items, skipping whitespaces, insignificant
1188 # symbols, and comments when allowed, and stopping at the first
1189 # unknown character.
1190 def _parse_items(self) -> List[_Item]:
1191 items = [] # type: List[_Item]
1192
1193 while self._isnt_done():
1194 # Try to append item
1195 if not self._try_append_item(items):
1196 # Unknown at this point
1197 break
1198
1199 return items
1200
1201 # Parses the whole Normand input, setting `self._res` to the main
1202 # group item on success.
1203 def _parse(self):
1204 if len(self._normand.strip()) == 0:
1205 # Special case to make sure there's something to consume
1206 self._res = _Group([], self._text_loc)
1207 return
1208
1209 # Parse first level items
1210 items = self._parse_items()
1211
1212 # Make sure there's nothing left
1213 self._skip_ws_and_comments()
1214
1215 if self._isnt_done():
1216 self._raise_error(
1217 "Unexpected character `{}`".format(self._normand[self._at])
1218 )
1219
1220 # Set main group item
1221 self._res = _Group(items, self._text_loc)
1222
1223
1224 # The return type of parse().
1225 class ParseResult:
1226 @classmethod
1227 def _create(
1228 cls,
1229 data: bytearray,
1230 variables: VariablesT,
1231 labels: LabelsT,
1232 offset: int,
1233 bo: Optional[ByteOrder],
1234 ):
1235 self = cls.__new__(cls)
1236 self._init(data, variables, labels, offset, bo)
1237 return self
1238
1239 def __init__(self, *args, **kwargs): # type: ignore
1240 raise NotImplementedError
1241
1242 def _init(
1243 self,
1244 data: bytearray,
1245 variables: VariablesT,
1246 labels: LabelsT,
1247 offset: int,
1248 bo: Optional[ByteOrder],
1249 ):
1250 self._data = data
1251 self._vars = variables
1252 self._labels = labels
1253 self._offset = offset
1254 self._bo = bo
1255
1256 # Generated data.
1257 @property
1258 def data(self):
1259 return self._data
1260
1261 # Dictionary of updated variable names to their last computed value.
1262 @property
1263 def variables(self):
1264 return self._vars
1265
1266 # Dictionary of updated main group label names to their computed
1267 # value.
1268 @property
1269 def labels(self):
1270 return self._labels
1271
1272 # Updated offset.
1273 @property
1274 def offset(self):
1275 return self._offset
1276
1277 # Updated byte order.
1278 @property
1279 def byte_order(self):
1280 return self._bo
1281
1282
1283 # Raises a parse error for the item `item`, creating it using the
1284 # message `msg`.
1285 def _raise_error_for_item(msg: str, item: _Item) -> NoReturn:
1286 _raise_error(msg, item.text_loc)
1287
1288
1289 # The `ICITTE` reserved name.
1290 _icitte_name = "ICITTE"
1291
1292
1293 # Base node visitor.
1294 #
1295 # Calls the _visit_name() method for each name node which isn't the name
1296 # of a call.
1297 class _NodeVisitor(ast.NodeVisitor):
1298 def __init__(self):
1299 self._parent_is_call = False
1300
1301 def generic_visit(self, node: ast.AST):
1302 if type(node) is ast.Call:
1303 self._parent_is_call = True
1304 elif type(node) is ast.Name and not self._parent_is_call:
1305 self._visit_name(node.id)
1306
1307 super().generic_visit(node)
1308 self._parent_is_call = False
1309
1310 @abc.abstractmethod
1311 def _visit_name(self, name: str):
1312 ...
1313
1314
1315 # Expression validator: validates that all the names within the
1316 # expression are allowed.
1317 class _ExprValidator(_NodeVisitor):
1318 def __init__(self, item: _ExprItemT, allowed_names: Set[str]):
1319 super().__init__()
1320 self._item = item
1321 self._allowed_names = allowed_names
1322
1323 def _visit_name(self, name: str):
1324 # Make sure the name refers to a known and reachable
1325 # variable/label name.
1326 if name != _icitte_name and name not in self._allowed_names:
1327 msg = "Illegal (unknown or unreachable) variable/label name `{}` in expression `{}`".format(
1328 name, self._item.expr_str
1329 )
1330
1331 allowed_names = self._allowed_names.copy()
1332 allowed_names.add(_icitte_name)
1333
1334 if len(allowed_names) > 0:
1335 allowed_names_str = ", ".join(
1336 sorted(["`{}`".format(name) for name in allowed_names])
1337 )
1338 msg += "; the legal names are {{{}}}".format(allowed_names_str)
1339
1340 _raise_error(
1341 msg,
1342 self._item.text_loc,
1343 )
1344
1345
1346 # Expression visitor getting all the contained names.
1347 class _ExprNamesVisitor(_NodeVisitor):
1348 def __init__(self):
1349 self._parent_is_call = False
1350 self._names = set() # type: Set[str]
1351
1352 @property
1353 def names(self):
1354 return self._names
1355
1356 def _visit_name(self, name: str):
1357 self._names.add(name)
1358
1359
1360 # Generator state.
1361 class _GenState:
1362 def __init__(
1363 self,
1364 variables: VariablesT,
1365 labels: LabelsT,
1366 offset: int,
1367 bo: Optional[ByteOrder],
1368 ):
1369 self.variables = variables.copy()
1370 self.labels = labels.copy()
1371 self.offset = offset
1372 self.bo = bo
1373
1374
1375 # Generator of data and final state from a group item.
1376 #
1377 # Generation happens in memory at construction time. After building, use
1378 # the `data`, `variables`, `labels`, `offset`, and `bo` properties to
1379 # get the resulting context.
1380 #
1381 # The steps of generation are:
1382 #
1383 # 1. Validate that each repetition, conditional, and LEB128 integer
1384 # expression uses only reachable names.
1385 #
1386 # 2. Compute and keep the effective repetition count, conditional value,
1387 # and LEB128 integer value for each repetition and LEB128 integer
1388 # instance.
1389 #
1390 # 3. Generate bytes, updating the initial state as it goes which becomes
1391 # the final state after the operation.
1392 #
1393 # During the generation, when handling a `_Rep`, `_Cond`, or
1394 # `_Leb128Int` item, we already have the effective repetition count,
1395 # conditional value, or value of the instance.
1396 #
1397 # When handling a `_Group` item, first update the current labels with
1398 # all the immediate (not nested) labels, and then handle each
1399 # contained item. This gives contained item access to "future" outer
1400 # labels. Then remove the immediate labels from the state so that
1401 # outer items don't have access to inner labels.
1402 class _Gen:
1403 def __init__(
1404 self,
1405 group: _Group,
1406 variables: VariablesT,
1407 labels: LabelsT,
1408 offset: int,
1409 bo: Optional[ByteOrder],
1410 ):
1411 self._validate_vl_exprs(group, set(variables.keys()), set(labels.keys()))
1412 self._vl_instance_vals = self._compute_vl_instance_vals(
1413 group, _GenState(variables, labels, offset, bo)
1414 )
1415 self._gen(group, _GenState(variables, labels, offset, bo))
1416
1417 # Generated bytes.
1418 @property
1419 def data(self):
1420 return self._data
1421
1422 # Updated variables.
1423 @property
1424 def variables(self):
1425 return self._final_state.variables
1426
1427 # Updated main group labels.
1428 @property
1429 def labels(self):
1430 return self._final_state.labels
1431
1432 # Updated offset.
1433 @property
1434 def offset(self):
1435 return self._final_state.offset
1436
1437 # Updated byte order.
1438 @property
1439 def bo(self):
1440 return self._final_state.bo
1441
1442 # Returns the set of used, non-called names within the AST
1443 # expression `expr`.
1444 @staticmethod
1445 def _names_of_expr(expr: ast.Expression):
1446 visitor = _ExprNamesVisitor()
1447 visitor.visit(expr)
1448 return visitor.names
1449
1450 # Validates that all the repetition, conditional, and LEB128 integer
1451 # expressions within `group` don't refer, directly or indirectly, to
1452 # subsequent labels.
1453 #
1454 # The strategy here is to keep a set of allowed label names, per
1455 # group, initialized to `allowed_label_names`, and a set of allowed
1456 # variable names initialized to `allowed_variable_names`.
1457 #
1458 # Then, depending on the type of `item`:
1459 #
1460 # `_Label`:
1461 # Add its name to the local allowed label names: a label
1462 # occurring before a repetition, and not within a nested group,
1463 # is always reachable.
1464 #
1465 # `_VarAssign`:
1466 # If all the names within its expression are allowed, then add
1467 # its name to the allowed variable names.
1468 #
1469 # Otherwise, remove its name from the allowed variable names (if
1470 # it's in there): a variable which refers to an unreachable name
1471 # is unreachable itself.
1472 #
1473 # `_Rep`, `_Cond`, and `_Leb128`:
1474 # Make sure all the names within its expression are allowed.
1475 #
1476 # `_Group`:
1477 # Call this function for each contained item with a _copy_ of
1478 # the current allowed label names and the same current allowed
1479 # variable names.
1480 @staticmethod
1481 def _validate_vl_exprs(
1482 item: _Item, allowed_variable_names: Set[str], allowed_label_names: Set[str]
1483 ):
1484 if type(item) is _Label:
1485 allowed_label_names.add(item.name)
1486 elif type(item) is _VarAssign:
1487 # Check if this variable name is allowed
1488 allowed = True
1489
1490 for name in _Gen._names_of_expr(item.expr):
1491 if name not in (
1492 allowed_label_names | allowed_variable_names | {_icitte_name}
1493 ):
1494 # Not allowed
1495 allowed = False
1496 break
1497
1498 if allowed:
1499 allowed_variable_names.add(item.name)
1500 elif item.name in allowed_variable_names:
1501 allowed_variable_names.remove(item.name)
1502 elif isinstance(item, _Leb128Int):
1503 # Validate the expression
1504 _ExprValidator(item, allowed_label_names | allowed_variable_names).visit(
1505 item.expr
1506 )
1507 elif type(item) is _Rep or type(item) is _Cond:
1508 # Validate the expression first
1509 _ExprValidator(item, allowed_label_names | allowed_variable_names).visit(
1510 item.expr
1511 )
1512
1513 # Validate inner item
1514 _Gen._validate_vl_exprs(
1515 item.item, allowed_variable_names, allowed_label_names
1516 )
1517 elif type(item) is _Group:
1518 # Copy `allowed_label_names` so that this frame cannot
1519 # access the nested label names.
1520 group_allowed_label_names = allowed_label_names.copy()
1521
1522 for subitem in item.items:
1523 _Gen._validate_vl_exprs(
1524 subitem, allowed_variable_names, group_allowed_label_names
1525 )
1526
1527 # Evaluates the expression of `item` considering the current
1528 # generation state `state`.
1529 #
1530 # If `allow_float` is `True`, then the type of the result may be
1531 # `float` too.
1532 @staticmethod
1533 def _eval_item_expr(
1534 item: _ExprItemT,
1535 state: _GenState,
1536 allow_float: bool = False,
1537 ):
1538 syms = {} # type: VariablesT
1539 syms.update(state.labels)
1540
1541 # Set the `ICITTE` name to the current offset
1542 syms[_icitte_name] = state.offset
1543
1544 # Add the current variables
1545 syms.update(state.variables)
1546
1547 # Validate the node and its children
1548 _ExprValidator(item, set(syms.keys())).visit(item.expr)
1549
1550 # Compile and evaluate expression node
1551 try:
1552 val = eval(compile(item.expr, "", "eval"), None, syms)
1553 except Exception as exc:
1554 _raise_error_for_item(
1555 "Failed to evaluate expression `{}`: {}".format(item.expr_str, exc),
1556 item,
1557 )
1558
1559 # Convert `bool` result type to `int` to normalize
1560 if type(val) is bool:
1561 val = int(val)
1562
1563 # Validate result type
1564 expected_types = {int} # type: Set[type]
1565 type_msg = "`int`"
1566
1567 if allow_float:
1568 expected_types.add(float)
1569 type_msg += " or `float`"
1570
1571 if type(val) not in expected_types:
1572 _raise_error_for_item(
1573 "Invalid expression `{}`: expecting result type {}, not `{}`".format(
1574 item.expr_str, type_msg, type(val).__name__
1575 ),
1576 item,
1577 )
1578
1579 return val
1580
1581 # Returns the size, in bytes, required to encode the value `val`
1582 # with LEB128 (signed version if `is_signed` is `True`).
1583 @staticmethod
1584 def _leb128_size_for_val(val: int, is_signed: bool):
1585 if val < 0:
1586 # Equivalent upper bound.
1587 #
1588 # For example, if `val` is -128, then the full integer for
1589 # this number of bits would be [-128, 127].
1590 val = -val - 1
1591
1592 # Number of bits (add one for the sign if needed)
1593 bits = val.bit_length() + int(is_signed)
1594
1595 if bits == 0:
1596 bits = 1
1597
1598 # Seven bits per byte
1599 return math.ceil(bits / 7)
1600
1601 # Returns the offset `offset` aligned according to `item`.
1602 @staticmethod
1603 def _align_offset(offset: int, item: _AlignOffset):
1604 align_bytes = item.val // 8
1605 return (offset + align_bytes - 1) // align_bytes * align_bytes
1606
1607 # Computes the effective value for each repetition, conditional, and
1608 # LEB128 integer instance, filling `instance_vals` (if not `None`)
1609 # and returning `instance_vals`.
1610 #
1611 # At this point it must be known that, for a given variable-length
1612 # item, its expression only contains reachable names.
1613 #
1614 # When handling a `_Rep` or `_Cond` item, this function appends its
1615 # effective multiplier/value to `instance_vals` _before_ handling
1616 # its repeated/conditional item.
1617 #
1618 # When handling a `_VarAssign` item, this function only evaluates it
1619 # if all its names are reachable.
1620 @staticmethod
1621 def _compute_vl_instance_vals(
1622 item: _Item, state: _GenState, instance_vals: Optional[List[int]] = None
1623 ):
1624 if instance_vals is None:
1625 instance_vals = []
1626
1627 if isinstance(item, _ScalarItem):
1628 state.offset += item.size
1629 elif type(item) is _Label:
1630 state.labels[item.name] = state.offset
1631 elif type(item) is _VarAssign:
1632 # Check if all the names are reachable
1633 do_eval = True
1634
1635 for name in _Gen._names_of_expr(item.expr):
1636 if (
1637 name != _icitte_name
1638 and name not in state.variables
1639 and name not in state.labels
1640 ):
1641 # A name is unknown: cannot evaluate
1642 do_eval = False
1643 break
1644
1645 if do_eval:
1646 # Evaluate the expression and keep the result
1647 state.variables[item.name] = _Gen._eval_item_expr(item, state, True)
1648 elif type(item) is _SetOffset:
1649 state.offset = item.val
1650 elif type(item) is _AlignOffset:
1651 state.offset = _Gen._align_offset(state.offset, item)
1652 elif isinstance(item, _Leb128Int):
1653 # Evaluate the expression
1654 val = _Gen._eval_item_expr(item, state)
1655
1656 # Validate result
1657 if type(item) is _ULeb128Int and val < 0:
1658 _raise_error_for_item(
1659 "Invalid expression `{}`: unexpected negative result {:,} for a ULEB128 encoding".format(
1660 item.expr_str, val
1661 ),
1662 item,
1663 )
1664
1665 # Add the evaluation result to the to variable-length item
1666 # instance values.
1667 instance_vals.append(val)
1668
1669 # Update offset
1670 state.offset += _Gen._leb128_size_for_val(val, type(item) is _SLeb128Int)
1671 elif type(item) is _Rep:
1672 # Evaluate the expression and keep the result
1673 val = _Gen._eval_item_expr(item, state)
1674
1675 # Validate result
1676 if val < 0:
1677 _raise_error_for_item(
1678 "Invalid expression `{}`: unexpected negative result {:,}".format(
1679 item.expr_str, val
1680 ),
1681 item,
1682 )
1683
1684 # Add to variable-length item instance values
1685 instance_vals.append(val)
1686
1687 # Process the repeated item `val` times
1688 for _ in range(val):
1689 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
1690 elif type(item) is _Cond:
1691 # Evaluate the expression and keep the result
1692 val = _Gen._eval_item_expr(item, state)
1693
1694 # Add to variable-length item instance values
1695 instance_vals.append(val)
1696
1697 # Process the conditional item if needed
1698 if val:
1699 _Gen._compute_vl_instance_vals(item.item, state, instance_vals)
1700 elif type(item) is _Group:
1701 prev_labels = state.labels.copy()
1702
1703 # Process each item
1704 for subitem in item.items:
1705 _Gen._compute_vl_instance_vals(subitem, state, instance_vals)
1706
1707 state.labels = prev_labels
1708
1709 return instance_vals
1710
1711 def _update_offset_noop(self, item: _Item, state: _GenState, next_vl_instance: int):
1712 return next_vl_instance
1713
1714 def _dry_handle_scalar_item(
1715 self, item: _ScalarItem, state: _GenState, next_vl_instance: int
1716 ):
1717 state.offset += item.size
1718 return next_vl_instance
1719
1720 def _dry_handle_leb128_int_item(
1721 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1722 ):
1723 # Get the value from `self._vl_instance_vals` _before_
1724 # incrementing `next_vl_instance` to honor the order of
1725 # _compute_vl_instance_vals().
1726 state.offset += self._leb128_size_for_val(
1727 self._vl_instance_vals[next_vl_instance], type(item) is _SLeb128Int
1728 )
1729
1730 return next_vl_instance + 1
1731
1732 def _dry_handle_group_item(
1733 self, item: _Group, state: _GenState, next_vl_instance: int
1734 ):
1735 for subitem in item.items:
1736 next_vl_instance = self._dry_handle_item(subitem, state, next_vl_instance)
1737
1738 return next_vl_instance
1739
1740 def _dry_handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1741 # Get the value from `self._vl_instance_vals` _before_
1742 # incrementing `next_vl_instance` to honor the order of
1743 # _compute_vl_instance_vals().
1744 mul = self._vl_instance_vals[next_vl_instance]
1745 next_vl_instance += 1
1746
1747 for _ in range(mul):
1748 next_vl_instance = self._dry_handle_item(item.item, state, next_vl_instance)
1749
1750 return next_vl_instance
1751
1752 def _dry_handle_cond_item(
1753 self, item: _Cond, state: _GenState, next_vl_instance: int
1754 ):
1755 # Get the value from `self._vl_instance_vals` _before_
1756 # incrementing `next_vl_instance` to honor the order of
1757 # _compute_vl_instance_vals().
1758 val = self._vl_instance_vals[next_vl_instance]
1759 next_vl_instance += 1
1760
1761 if val:
1762 next_vl_instance = self._dry_handle_item(item.item, state, next_vl_instance)
1763
1764 return next_vl_instance
1765
1766 def _dry_handle_align_offset_item(
1767 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1768 ):
1769 state.offset = self._align_offset(state.offset, item)
1770 return next_vl_instance
1771
1772 def _dry_handle_set_offset_item(
1773 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1774 ):
1775 state.offset = item.val
1776 return next_vl_instance
1777
1778 # Updates `state.offset` considering the generated data of `item`,
1779 # without generating any, and returns the updated next
1780 # variable-length item instance.
1781 def _dry_handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
1782 return self._dry_handle_item_funcs[type(item)](item, state, next_vl_instance)
1783
1784 # Handles the byte item `item`.
1785 def _handle_byte_item(self, item: _Byte, state: _GenState, next_vl_instance: int):
1786 self._data.append(item.val)
1787 state.offset += item.size
1788 return next_vl_instance
1789
1790 # Handles the string item `item`.
1791 def _handle_str_item(self, item: _Str, state: _GenState, next_vl_instance: int):
1792 self._data += item.data
1793 state.offset += item.size
1794 return next_vl_instance
1795
1796 # Handles the byte order setting item `item`.
1797 def _handle_set_bo_item(
1798 self, item: _SetBo, state: _GenState, next_vl_instance: int
1799 ):
1800 # Update current byte order
1801 state.bo = item.bo
1802 return next_vl_instance
1803
1804 # Handles the variable assignment item `item`.
1805 def _handle_var_assign_item(
1806 self, item: _VarAssign, state: _GenState, next_vl_instance: int
1807 ):
1808 # Update variable
1809 state.variables[item.name] = self._eval_item_expr(item, state, True)
1810 return next_vl_instance
1811
1812 # Handles the fixed-length integer item `item`.
1813 def _handle_fl_int_item(self, val: int, item: _FlNum, state: _GenState):
1814 # Validate range
1815 if val < -(2 ** (item.len - 1)) or val > 2**item.len - 1:
1816 _raise_error_for_item(
1817 "Value {:,} is outside the {}-bit range when evaluating expression `{}` at byte offset {:,}".format(
1818 val, item.len, item.expr_str, state.offset
1819 ),
1820 item,
1821 )
1822
1823 # Encode result on 64 bits (to extend the sign bit whatever the
1824 # value of `item.len`).
1825 data = struct.pack(
1826 "{}{}".format(
1827 ">" if state.bo in (None, ByteOrder.BE) else "<",
1828 "Q" if val >= 0 else "q",
1829 ),
1830 val,
1831 )
1832
1833 # Keep only the requested length
1834 len_bytes = item.len // 8
1835
1836 if state.bo in (None, ByteOrder.BE):
1837 # Big endian: keep last bytes
1838 data = data[-len_bytes:]
1839 else:
1840 # Little endian: keep first bytes
1841 assert state.bo == ByteOrder.LE
1842 data = data[:len_bytes]
1843
1844 # Append to current bytes and update offset
1845 self._data += data
1846
1847 # Handles the fixed-length integer item `item`.
1848 def _handle_fl_float_item(self, val: float, item: _FlNum, state: _GenState):
1849 # Validate length
1850 if item.len not in (32, 64):
1851 _raise_error_for_item(
1852 "Invalid {}-bit length for a fixed-length floating point number (value {:,})".format(
1853 item.len, val
1854 ),
1855 item,
1856 )
1857
1858 # Encode result
1859 self._data += struct.pack(
1860 "{}{}".format(
1861 ">" if state.bo in (None, ByteOrder.BE) else "<",
1862 "f" if item.len == 32 else "d",
1863 ),
1864 val,
1865 )
1866
1867 # Handles the fixed-length number item `item`.
1868 def _handle_fl_num_item(
1869 self, item: _FlNum, state: _GenState, next_vl_instance: int
1870 ):
1871 # Compute value
1872 val = self._eval_item_expr(item, state, True)
1873
1874 # Validate current byte order
1875 if state.bo is None and item.len > 8:
1876 _raise_error_for_item(
1877 "Current byte order isn't defined at first fixed-length number (`{}`) to encode on more than 8 bits".format(
1878 item.expr_str
1879 ),
1880 item,
1881 )
1882
1883 # Handle depending on type
1884 if type(val) is int:
1885 self._handle_fl_int_item(val, item, state)
1886 else:
1887 assert type(val) is float
1888 self._handle_fl_float_item(val, item, state)
1889
1890 # Update offset
1891 state.offset += item.size
1892
1893 return next_vl_instance
1894
1895 # Handles the LEB128 integer item `item`.
1896 def _handle_leb128_int_item(
1897 self, item: _Leb128Int, state: _GenState, next_vl_instance: int
1898 ):
1899 # Get the precomputed value
1900 val = self._vl_instance_vals[next_vl_instance]
1901
1902 # Size in bytes
1903 size = self._leb128_size_for_val(val, type(item) is _SLeb128Int)
1904
1905 # For each byte
1906 for _ in range(size):
1907 # Seven LSBs, MSB of the byte set (continue)
1908 self._data.append((val & 0x7F) | 0x80)
1909 val >>= 7
1910
1911 # Clear MSB of last byte (stop)
1912 self._data[-1] &= ~0x80
1913
1914 # Consumed this instance
1915 return next_vl_instance + 1
1916
1917 # Handles the group item `item`, only removing the immediate labels
1918 # from `state.labels` if `remove_immediate_labels` is `True`.
1919 def _handle_group_item(
1920 self,
1921 item: _Group,
1922 state: _GenState,
1923 next_vl_instance: int,
1924 remove_immediate_labels: bool = True,
1925 ):
1926 # Compute the values of the immediate (not nested) labels. Those
1927 # labels are reachable by any expression within the group.
1928 tmp_state = _GenState({}, {}, state.offset, None)
1929 immediate_label_names = set() # type: Set[str]
1930 tmp_next_vl_instance = next_vl_instance
1931
1932 for subitem in item.items:
1933 if type(subitem) is _Label:
1934 # New immediate label
1935 state.labels[subitem.name] = tmp_state.offset
1936 immediate_label_names.add(subitem.name)
1937
1938 tmp_next_vl_instance = self._dry_handle_item(
1939 subitem, tmp_state, tmp_next_vl_instance
1940 )
1941
1942 # Handle each item now with the actual state
1943 for subitem in item.items:
1944 next_vl_instance = self._handle_item(subitem, state, next_vl_instance)
1945
1946 # Remove immediate labels if required so that outer items won't
1947 # reach inner labels.
1948 if remove_immediate_labels:
1949 for name in immediate_label_names:
1950 del state.labels[name]
1951
1952 return next_vl_instance
1953
1954 # Handles the repetition item `item`.
1955 def _handle_rep_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1956 # Get the precomputed repetition count
1957 mul = self._vl_instance_vals[next_vl_instance]
1958
1959 # Consumed this instance
1960 next_vl_instance += 1
1961
1962 for _ in range(mul):
1963 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
1964
1965 return next_vl_instance
1966
1967 # Handles the conditional item `item`.
1968 def _handle_cond_item(self, item: _Rep, state: _GenState, next_vl_instance: int):
1969 # Get the precomputed conditional value
1970 val = self._vl_instance_vals[next_vl_instance]
1971
1972 # Consumed this instance
1973 next_vl_instance += 1
1974
1975 if val:
1976 next_vl_instance = self._handle_item(item.item, state, next_vl_instance)
1977
1978 return next_vl_instance
1979
1980 # Handles the offset setting item `item`.
1981 def _handle_set_offset_item(
1982 self, item: _SetOffset, state: _GenState, next_vl_instance: int
1983 ):
1984 state.offset = item.val
1985 return next_vl_instance
1986
1987 # Handles offset alignment item `item` (adds padding).
1988 def _handle_align_offset_item(
1989 self, item: _AlignOffset, state: _GenState, next_vl_instance: int
1990 ):
1991 init_offset = state.offset
1992 state.offset = self._align_offset(state.offset, item)
1993 self._data += bytes([item.pad_val] * (state.offset - init_offset))
1994 return next_vl_instance
1995
1996 # Handles the label item `item`.
1997 def _handle_label_item(self, item: _Label, state: _GenState, next_vl_instance: int):
1998 return next_vl_instance
1999
2000 # Handles the item `item`, returning the updated next repetition
2001 # instance.
2002 def _handle_item(self, item: _Item, state: _GenState, next_vl_instance: int):
2003 return self._item_handlers[type(item)](item, state, next_vl_instance)
2004
2005 # Generates the data (`self._data`) and final state
2006 # (`self._final_state`) from `group` and the initial state `state`.
2007 def _gen(self, group: _Group, state: _GenState):
2008 # Initial state
2009 self._data = bytearray()
2010
2011 # Item handlers
2012 self._item_handlers = {
2013 _AlignOffset: self._handle_align_offset_item,
2014 _Byte: self._handle_byte_item,
2015 _Cond: self._handle_cond_item,
2016 _FlNum: self._handle_fl_num_item,
2017 _Group: self._handle_group_item,
2018 _Label: self._handle_label_item,
2019 _Rep: self._handle_rep_item,
2020 _SetBo: self._handle_set_bo_item,
2021 _SetOffset: self._handle_set_offset_item,
2022 _SLeb128Int: self._handle_leb128_int_item,
2023 _Str: self._handle_str_item,
2024 _ULeb128Int: self._handle_leb128_int_item,
2025 _VarAssign: self._handle_var_assign_item,
2026 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2027
2028 # Dry item handlers (only updates the state offset)
2029 self._dry_handle_item_funcs = {
2030 _AlignOffset: self._dry_handle_align_offset_item,
2031 _Byte: self._dry_handle_scalar_item,
2032 _Cond: self._dry_handle_cond_item,
2033 _FlNum: self._dry_handle_scalar_item,
2034 _Group: self._dry_handle_group_item,
2035 _Label: self._update_offset_noop,
2036 _Rep: self._dry_handle_rep_item,
2037 _SetBo: self._update_offset_noop,
2038 _SetOffset: self._dry_handle_set_offset_item,
2039 _SLeb128Int: self._dry_handle_leb128_int_item,
2040 _Str: self._dry_handle_scalar_item,
2041 _ULeb128Int: self._dry_handle_leb128_int_item,
2042 _VarAssign: self._update_offset_noop,
2043 } # type: Dict[type, Callable[[Any, _GenState, int], int]]
2044
2045 # Handle the group item, _not_ removing the immediate labels
2046 # because the `labels` property offers them.
2047 self._handle_group_item(group, state, 0, False)
2048
2049 # This is actually the final state
2050 self._final_state = state
2051
2052
2053 # Returns a `ParseResult` instance containing the bytes encoded by the
2054 # input string `normand`.
2055 #
2056 # `init_variables` is a dictionary of initial variable names (valid
2057 # Python names) to integral values. A variable name must not be the
2058 # reserved name `ICITTE`.
2059 #
2060 # `init_labels` is a dictionary of initial label names (valid Python
2061 # names) to integral values. A label name must not be the reserved name
2062 # `ICITTE`.
2063 #
2064 # `init_offset` is the initial offset.
2065 #
2066 # `init_byte_order` is the initial byte order.
2067 #
2068 # Raises `ParseError` on any parsing error.
2069 def parse(
2070 normand: str,
2071 init_variables: Optional[VariablesT] = None,
2072 init_labels: Optional[LabelsT] = None,
2073 init_offset: int = 0,
2074 init_byte_order: Optional[ByteOrder] = None,
2075 ):
2076 if init_variables is None:
2077 init_variables = {}
2078
2079 if init_labels is None:
2080 init_labels = {}
2081
2082 gen = _Gen(
2083 _Parser(normand, init_variables, init_labels).res,
2084 init_variables,
2085 init_labels,
2086 init_offset,
2087 init_byte_order,
2088 )
2089 return ParseResult._create( # pyright: ignore[reportPrivateUsage]
2090 gen.data, gen.variables, gen.labels, gen.offset, gen.bo
2091 )
2092
2093
2094 # Parses the command-line arguments.
2095 def _parse_cli_args():
2096 import argparse
2097
2098 # Build parser
2099 ap = argparse.ArgumentParser()
2100 ap.add_argument(
2101 "--offset",
2102 metavar="OFFSET",
2103 action="store",
2104 type=int,
2105 default=0,
2106 help="initial offset (positive)",
2107 )
2108 ap.add_argument(
2109 "-b",
2110 "--byte-order",
2111 metavar="BO",
2112 choices=["be", "le"],
2113 type=str,
2114 help="initial byte order (`be` or `le`)",
2115 )
2116 ap.add_argument(
2117 "--var",
2118 metavar="NAME=VAL",
2119 action="append",
2120 help="add an initial variable (may be repeated)",
2121 )
2122 ap.add_argument(
2123 "-l",
2124 "--label",
2125 metavar="NAME=VAL",
2126 action="append",
2127 help="add an initial label (may be repeated)",
2128 )
2129 ap.add_argument(
2130 "--version", action="version", version="Normand {}".format(__version__)
2131 )
2132 ap.add_argument(
2133 "path",
2134 metavar="PATH",
2135 action="store",
2136 nargs="?",
2137 help="input path (none means standard input)",
2138 )
2139
2140 # Parse
2141 return ap.parse_args()
2142
2143
2144 # Raises a command-line error with the message `msg`.
2145 def _raise_cli_error(msg: str) -> NoReturn:
2146 raise RuntimeError("Command-line error: {}".format(msg))
2147
2148
2149 # Returns a dictionary of string to integers from the list of strings
2150 # `args` containing `NAME=VAL` entries.
2151 def _dict_from_arg(args: Optional[List[str]]):
2152 d = {} # type: LabelsT
2153
2154 if args is None:
2155 return d
2156
2157 for arg in args:
2158 m = re.match(r"({})=(\d+)$".format(_py_name_pat.pattern), arg)
2159
2160 if m is None:
2161 _raise_cli_error("Invalid assignment {}".format(arg))
2162
2163 d[m.group(1)] = int(m.group(2))
2164
2165 return d
2166
2167
2168 # CLI entry point without exception handling.
2169 def _try_run_cli():
2170 import os.path
2171
2172 # Parse arguments
2173 args = _parse_cli_args()
2174
2175 # Read input
2176 if args.path is None:
2177 normand = sys.stdin.read()
2178 else:
2179 with open(args.path) as f:
2180 normand = f.read()
2181
2182 # Variables and labels
2183 variables = typing.cast(VariablesT, _dict_from_arg(args.var))
2184 labels = _dict_from_arg(args.label)
2185
2186 # Validate offset
2187 if args.offset < 0:
2188 _raise_cli_error("Invalid negative offset {}")
2189
2190 # Validate and set byte order
2191 bo = None # type: Optional[ByteOrder]
2192
2193 if args.byte_order is not None:
2194 if args.byte_order == "be":
2195 bo = ByteOrder.BE
2196 else:
2197 assert args.byte_order == "le"
2198 bo = ByteOrder.LE
2199
2200 # Parse
2201 try:
2202 res = parse(normand, variables, labels, args.offset, bo)
2203 except ParseError as exc:
2204 prefix = ""
2205
2206 if args.path is not None:
2207 prefix = "{}:".format(os.path.abspath(args.path))
2208
2209 _fail(
2210 "{}{}:{} - {}".format(
2211 prefix, exc.text_loc.line_no, exc.text_loc.col_no, str(exc)
2212 )
2213 )
2214
2215 # Print
2216 sys.stdout.buffer.write(res.data)
2217
2218
2219 # Prints the exception message `msg` and exits with status 1.
2220 def _fail(msg: str) -> NoReturn:
2221 if not msg.endswith("."):
2222 msg += "."
2223
2224 print(msg, file=sys.stderr)
2225 sys.exit(1)
2226
2227
2228 # CLI entry point.
2229 def _run_cli():
2230 try:
2231 _try_run_cli()
2232 except Exception as exc:
2233 _fail(str(exc))
2234
2235
2236 if __name__ == "__main__":
2237 _run_cli()
This page took 0.070262 seconds and 3 git commands to generate.